diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,32356 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.2554355997955073, + "global_step": 525000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.000000000000001e-07, + "loss": 14.3541, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 14.3676, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 1.5e-06, + "loss": 14.2824, + "step": 300 + }, + { + "epoch": 0.0, + "learning_rate": 2.0000000000000003e-06, + "loss": 14.2217, + "step": 400 + }, + { + "epoch": 0.0, + "learning_rate": 2.5e-06, + "loss": 13.7982, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 13.6274, + "step": 600 + }, + { + "epoch": 0.0, + "learning_rate": 3.5000000000000004e-06, + "loss": 13.2412, + "step": 700 + }, + { + "epoch": 0.0, + "learning_rate": 4.000000000000001e-06, + "loss": 12.586, + "step": 800 + }, + { + "epoch": 0.0, + "learning_rate": 4.5e-06, + "loss": 12.0465, + "step": 900 + }, + { + "epoch": 0.0, + "learning_rate": 5e-06, + "loss": 11.3204, + "step": 1000 + }, + { + "epoch": 0.0, + "learning_rate": 5.500000000000001e-06, + "loss": 10.3171, + "step": 1100 + }, + { + "epoch": 0.01, + "learning_rate": 6e-06, + "loss": 9.2676, + "step": 1200 + }, + { + "epoch": 0.01, + "learning_rate": 6.5000000000000004e-06, + "loss": 8.2344, + "step": 1300 + }, + { + "epoch": 0.01, + "learning_rate": 7.000000000000001e-06, + "loss": 6.8489, + "step": 1400 + }, + { + "epoch": 0.01, + "learning_rate": 7.5e-06, + "loss": 5.5019, + "step": 1500 + }, + { + "epoch": 0.01, + "learning_rate": 8.000000000000001e-06, + "loss": 4.6103, + "step": 1600 + }, + { + "epoch": 0.01, + "learning_rate": 8.500000000000002e-06, + "loss": 4.0935, + "step": 1700 + }, + { + "epoch": 0.01, + "learning_rate": 9e-06, + "loss": 3.9416, + "step": 1800 + }, + { + "epoch": 0.01, + "learning_rate": 9.5e-06, + "loss": 3.7521, + "step": 1900 + }, + { + "epoch": 0.01, + "learning_rate": 1e-05, + "loss": 3.6218, + "step": 2000 + }, + { + "epoch": 0.01, + "learning_rate": 1.05e-05, + "loss": 3.5332, + "step": 2100 + }, + { + "epoch": 0.01, + "learning_rate": 1.1000000000000001e-05, + "loss": 3.3914, + "step": 2200 + }, + { + "epoch": 0.01, + "learning_rate": 1.1500000000000002e-05, + "loss": 3.3178, + "step": 2300 + }, + { + "epoch": 0.01, + "learning_rate": 1.2e-05, + "loss": 3.2905, + "step": 2400 + }, + { + "epoch": 0.01, + "learning_rate": 1.25e-05, + "loss": 3.1829, + "step": 2500 + }, + { + "epoch": 0.01, + "learning_rate": 1.3000000000000001e-05, + "loss": 3.1098, + "step": 2600 + }, + { + "epoch": 0.01, + "learning_rate": 1.3500000000000001e-05, + "loss": 3.0184, + "step": 2700 + }, + { + "epoch": 0.01, + "learning_rate": 1.4000000000000001e-05, + "loss": 2.9732, + "step": 2800 + }, + { + "epoch": 0.01, + "learning_rate": 1.45e-05, + "loss": 2.9343, + "step": 2900 + }, + { + "epoch": 0.01, + "learning_rate": 1.5e-05, + "loss": 2.9035, + "step": 3000 + }, + { + "epoch": 0.01, + "learning_rate": 1.55e-05, + "loss": 2.8675, + "step": 3100 + }, + { + "epoch": 0.01, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.8479, + "step": 3200 + }, + { + "epoch": 0.01, + "learning_rate": 1.65e-05, + "loss": 2.8325, + "step": 3300 + }, + { + "epoch": 0.01, + "learning_rate": 1.7000000000000003e-05, + "loss": 2.7818, + "step": 3400 + }, + { + "epoch": 0.02, + "learning_rate": 1.75e-05, + "loss": 2.7728, + "step": 3500 + }, + { + "epoch": 0.02, + "learning_rate": 1.8e-05, + "loss": 2.7172, + "step": 3600 + }, + { + "epoch": 0.02, + "learning_rate": 1.85e-05, + "loss": 2.7153, + "step": 3700 + }, + { + "epoch": 0.02, + "learning_rate": 1.9e-05, + "loss": 2.6476, + "step": 3800 + }, + { + "epoch": 0.02, + "learning_rate": 1.9500000000000003e-05, + "loss": 2.6379, + "step": 3900 + }, + { + "epoch": 0.02, + "learning_rate": 2e-05, + "loss": 2.6251, + "step": 4000 + }, + { + "epoch": 0.02, + "learning_rate": 2.05e-05, + "loss": 2.622, + "step": 4100 + }, + { + "epoch": 0.02, + "learning_rate": 2.1e-05, + "loss": 2.5939, + "step": 4200 + }, + { + "epoch": 0.02, + "learning_rate": 2.15e-05, + "loss": 2.5749, + "step": 4300 + }, + { + "epoch": 0.02, + "learning_rate": 2.2000000000000003e-05, + "loss": 2.5746, + "step": 4400 + }, + { + "epoch": 0.02, + "learning_rate": 2.25e-05, + "loss": 2.5466, + "step": 4500 + }, + { + "epoch": 0.02, + "learning_rate": 2.3000000000000003e-05, + "loss": 2.5754, + "step": 4600 + }, + { + "epoch": 0.02, + "learning_rate": 2.35e-05, + "loss": 2.5217, + "step": 4700 + }, + { + "epoch": 0.02, + "learning_rate": 2.4e-05, + "loss": 2.5456, + "step": 4800 + }, + { + "epoch": 0.02, + "learning_rate": 2.45e-05, + "loss": 2.4834, + "step": 4900 + }, + { + "epoch": 0.02, + "learning_rate": 2.5e-05, + "loss": 2.49, + "step": 5000 + }, + { + "epoch": 0.02, + "eval_loss": 1.9592857360839844, + "eval_runtime": 18.7264, + "eval_samples_per_second": 534.004, + "eval_steps_per_second": 16.714, + "step": 5000 + }, + { + "epoch": 0.02, + "learning_rate": 2.5500000000000003e-05, + "loss": 2.4657, + "step": 5100 + }, + { + "epoch": 0.02, + "learning_rate": 2.6000000000000002e-05, + "loss": 2.457, + "step": 5200 + }, + { + "epoch": 0.02, + "learning_rate": 2.6500000000000004e-05, + "loss": 2.4793, + "step": 5300 + }, + { + "epoch": 0.02, + "learning_rate": 2.7000000000000002e-05, + "loss": 2.4802, + "step": 5400 + }, + { + "epoch": 0.02, + "learning_rate": 2.7500000000000004e-05, + "loss": 2.4615, + "step": 5500 + }, + { + "epoch": 0.02, + "learning_rate": 2.8000000000000003e-05, + "loss": 2.4107, + "step": 5600 + }, + { + "epoch": 0.02, + "learning_rate": 2.8499999999999998e-05, + "loss": 2.4498, + "step": 5700 + }, + { + "epoch": 0.02, + "learning_rate": 2.9e-05, + "loss": 2.3964, + "step": 5800 + }, + { + "epoch": 0.03, + "learning_rate": 2.95e-05, + "loss": 2.4961, + "step": 5900 + }, + { + "epoch": 0.03, + "learning_rate": 3e-05, + "loss": 2.4611, + "step": 6000 + }, + { + "epoch": 0.03, + "learning_rate": 3.05e-05, + "loss": 2.4297, + "step": 6100 + }, + { + "epoch": 0.03, + "learning_rate": 3.1e-05, + "loss": 2.4205, + "step": 6200 + }, + { + "epoch": 0.03, + "learning_rate": 3.15e-05, + "loss": 2.4465, + "step": 6300 + }, + { + "epoch": 0.03, + "learning_rate": 3.2000000000000005e-05, + "loss": 2.4045, + "step": 6400 + }, + { + "epoch": 0.03, + "learning_rate": 3.2500000000000004e-05, + "loss": 2.4217, + "step": 6500 + }, + { + "epoch": 0.03, + "learning_rate": 3.3e-05, + "loss": 2.3831, + "step": 6600 + }, + { + "epoch": 0.03, + "learning_rate": 3.35e-05, + "loss": 2.3653, + "step": 6700 + }, + { + "epoch": 0.03, + "learning_rate": 3.4000000000000007e-05, + "loss": 2.3325, + "step": 6800 + }, + { + "epoch": 0.03, + "learning_rate": 3.45e-05, + "loss": 2.38, + "step": 6900 + }, + { + "epoch": 0.03, + "learning_rate": 3.5e-05, + "loss": 2.3534, + "step": 7000 + }, + { + "epoch": 0.03, + "learning_rate": 3.55e-05, + "loss": 2.3172, + "step": 7100 + }, + { + "epoch": 0.03, + "learning_rate": 3.6e-05, + "loss": 2.3546, + "step": 7200 + }, + { + "epoch": 0.03, + "learning_rate": 3.65e-05, + "loss": 2.311, + "step": 7300 + }, + { + "epoch": 0.03, + "learning_rate": 3.7e-05, + "loss": 2.2996, + "step": 7400 + }, + { + "epoch": 0.03, + "learning_rate": 3.7500000000000003e-05, + "loss": 2.3427, + "step": 7500 + }, + { + "epoch": 0.03, + "learning_rate": 3.8e-05, + "loss": 2.2847, + "step": 7600 + }, + { + "epoch": 0.03, + "learning_rate": 3.85e-05, + "loss": 2.2906, + "step": 7700 + }, + { + "epoch": 0.03, + "learning_rate": 3.9000000000000006e-05, + "loss": 2.2957, + "step": 7800 + }, + { + "epoch": 0.03, + "learning_rate": 3.9500000000000005e-05, + "loss": 2.2933, + "step": 7900 + }, + { + "epoch": 0.03, + "learning_rate": 4e-05, + "loss": 2.2658, + "step": 8000 + }, + { + "epoch": 0.03, + "learning_rate": 4.05e-05, + "loss": 2.2802, + "step": 8100 + }, + { + "epoch": 0.04, + "learning_rate": 4.1e-05, + "loss": 2.2505, + "step": 8200 + }, + { + "epoch": 0.04, + "learning_rate": 4.15e-05, + "loss": 2.2688, + "step": 8300 + }, + { + "epoch": 0.04, + "learning_rate": 4.2e-05, + "loss": 2.2176, + "step": 8400 + }, + { + "epoch": 0.04, + "learning_rate": 4.25e-05, + "loss": 2.2627, + "step": 8500 + }, + { + "epoch": 0.04, + "learning_rate": 4.3e-05, + "loss": 2.266, + "step": 8600 + }, + { + "epoch": 0.04, + "learning_rate": 4.35e-05, + "loss": 2.2386, + "step": 8700 + }, + { + "epoch": 0.04, + "learning_rate": 4.4000000000000006e-05, + "loss": 2.2339, + "step": 8800 + }, + { + "epoch": 0.04, + "learning_rate": 4.4500000000000004e-05, + "loss": 2.2431, + "step": 8900 + }, + { + "epoch": 0.04, + "learning_rate": 4.5e-05, + "loss": 2.2403, + "step": 9000 + }, + { + "epoch": 0.04, + "learning_rate": 4.55e-05, + "loss": 2.2199, + "step": 9100 + }, + { + "epoch": 0.04, + "learning_rate": 4.600000000000001e-05, + "loss": 2.228, + "step": 9200 + }, + { + "epoch": 0.04, + "learning_rate": 4.6500000000000005e-05, + "loss": 2.2257, + "step": 9300 + }, + { + "epoch": 0.04, + "learning_rate": 4.7e-05, + "loss": 2.2182, + "step": 9400 + }, + { + "epoch": 0.04, + "learning_rate": 4.75e-05, + "loss": 2.2455, + "step": 9500 + }, + { + "epoch": 0.04, + "learning_rate": 4.8e-05, + "loss": 2.2127, + "step": 9600 + }, + { + "epoch": 0.04, + "learning_rate": 4.85e-05, + "loss": 2.1893, + "step": 9700 + }, + { + "epoch": 0.04, + "learning_rate": 4.9e-05, + "loss": 2.1823, + "step": 9800 + }, + { + "epoch": 0.04, + "learning_rate": 4.9500000000000004e-05, + "loss": 2.1782, + "step": 9900 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 2.1625, + "step": 10000 + }, + { + "epoch": 0.04, + "eval_loss": 1.8114508390426636, + "eval_runtime": 18.7835, + "eval_samples_per_second": 532.381, + "eval_steps_per_second": 16.664, + "step": 10000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9994571613446764e-05, + "loss": 2.2058, + "step": 10100 + }, + { + "epoch": 0.04, + "learning_rate": 4.998914322689353e-05, + "loss": 2.2157, + "step": 10200 + }, + { + "epoch": 0.04, + "learning_rate": 4.9983714840340295e-05, + "loss": 2.1651, + "step": 10300 + }, + { + "epoch": 0.04, + "learning_rate": 4.997828645378706e-05, + "loss": 2.2339, + "step": 10400 + }, + { + "epoch": 0.05, + "learning_rate": 4.9972858067233825e-05, + "loss": 2.1859, + "step": 10500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9967429680680594e-05, + "loss": 2.1955, + "step": 10600 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962001294127356e-05, + "loss": 2.1904, + "step": 10700 + }, + { + "epoch": 0.05, + "learning_rate": 4.9956572907574125e-05, + "loss": 2.1527, + "step": 10800 + }, + { + "epoch": 0.05, + "learning_rate": 4.9951144521020886e-05, + "loss": 2.1523, + "step": 10900 + }, + { + "epoch": 0.05, + "learning_rate": 4.994571613446765e-05, + "loss": 2.1323, + "step": 11000 + }, + { + "epoch": 0.05, + "learning_rate": 4.994028774791442e-05, + "loss": 2.142, + "step": 11100 + }, + { + "epoch": 0.05, + "learning_rate": 4.993485936136118e-05, + "loss": 2.1683, + "step": 11200 + }, + { + "epoch": 0.05, + "learning_rate": 4.992943097480795e-05, + "loss": 2.1413, + "step": 11300 + }, + { + "epoch": 0.05, + "learning_rate": 4.992400258825471e-05, + "loss": 2.1629, + "step": 11400 + }, + { + "epoch": 0.05, + "learning_rate": 4.991857420170147e-05, + "loss": 2.1141, + "step": 11500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913145815148247e-05, + "loss": 2.1713, + "step": 11600 + }, + { + "epoch": 0.05, + "learning_rate": 4.990771742859501e-05, + "loss": 2.1313, + "step": 11700 + }, + { + "epoch": 0.05, + "learning_rate": 4.990228904204177e-05, + "loss": 2.1351, + "step": 11800 + }, + { + "epoch": 0.05, + "learning_rate": 4.989686065548854e-05, + "loss": 2.1445, + "step": 11900 + }, + { + "epoch": 0.05, + "learning_rate": 4.98914322689353e-05, + "loss": 2.1467, + "step": 12000 + }, + { + "epoch": 0.05, + "learning_rate": 4.988600388238206e-05, + "loss": 2.1156, + "step": 12100 + }, + { + "epoch": 0.05, + "learning_rate": 4.988057549582883e-05, + "loss": 2.1247, + "step": 12200 + }, + { + "epoch": 0.05, + "learning_rate": 4.987514710927559e-05, + "loss": 2.1161, + "step": 12300 + }, + { + "epoch": 0.05, + "learning_rate": 4.9869718722722355e-05, + "loss": 2.112, + "step": 12400 + }, + { + "epoch": 0.05, + "learning_rate": 4.9864290336169124e-05, + "loss": 2.1165, + "step": 12500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9858861949615886e-05, + "loss": 2.1256, + "step": 12600 + }, + { + "epoch": 0.05, + "learning_rate": 4.9853433563062654e-05, + "loss": 2.0823, + "step": 12700 + }, + { + "epoch": 0.05, + "learning_rate": 4.984800517650942e-05, + "loss": 2.1153, + "step": 12800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9842576789956185e-05, + "loss": 2.109, + "step": 12900 + }, + { + "epoch": 0.06, + "learning_rate": 4.983714840340295e-05, + "loss": 2.1053, + "step": 13000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9831720016849715e-05, + "loss": 2.1166, + "step": 13100 + }, + { + "epoch": 0.06, + "learning_rate": 4.982629163029648e-05, + "loss": 2.0674, + "step": 13200 + }, + { + "epoch": 0.06, + "learning_rate": 4.9820863243743246e-05, + "loss": 2.1002, + "step": 13300 + }, + { + "epoch": 0.06, + "learning_rate": 4.981543485719001e-05, + "loss": 2.095, + "step": 13400 + }, + { + "epoch": 0.06, + "learning_rate": 4.981000647063677e-05, + "loss": 2.0961, + "step": 13500 + }, + { + "epoch": 0.06, + "learning_rate": 4.980457808408354e-05, + "loss": 2.0961, + "step": 13600 + }, + { + "epoch": 0.06, + "learning_rate": 4.979914969753031e-05, + "loss": 2.079, + "step": 13700 + }, + { + "epoch": 0.06, + "learning_rate": 4.979372131097707e-05, + "loss": 2.0919, + "step": 13800 + }, + { + "epoch": 0.06, + "learning_rate": 4.978829292442384e-05, + "loss": 2.1167, + "step": 13900 + }, + { + "epoch": 0.06, + "learning_rate": 4.97828645378706e-05, + "loss": 2.0993, + "step": 14000 + }, + { + "epoch": 0.06, + "learning_rate": 4.977743615131736e-05, + "loss": 2.0618, + "step": 14100 + }, + { + "epoch": 0.06, + "learning_rate": 4.977200776476413e-05, + "loss": 2.0935, + "step": 14200 + }, + { + "epoch": 0.06, + "learning_rate": 4.976657937821089e-05, + "loss": 2.0928, + "step": 14300 + }, + { + "epoch": 0.06, + "learning_rate": 4.9761150991657654e-05, + "loss": 2.1033, + "step": 14400 + }, + { + "epoch": 0.06, + "learning_rate": 4.975572260510442e-05, + "loss": 2.1083, + "step": 14500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9750294218551184e-05, + "loss": 2.0568, + "step": 14600 + }, + { + "epoch": 0.06, + "learning_rate": 4.974486583199795e-05, + "loss": 2.039, + "step": 14700 + }, + { + "epoch": 0.06, + "learning_rate": 4.973943744544472e-05, + "loss": 2.0834, + "step": 14800 + }, + { + "epoch": 0.06, + "learning_rate": 4.973400905889148e-05, + "loss": 2.0732, + "step": 14900 + }, + { + "epoch": 0.06, + "learning_rate": 4.9728580672338245e-05, + "loss": 2.0624, + "step": 15000 + }, + { + "epoch": 0.06, + "eval_loss": 1.7244441509246826, + "eval_runtime": 18.7641, + "eval_samples_per_second": 532.933, + "eval_steps_per_second": 16.681, + "step": 15000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9723152285785014e-05, + "loss": 2.0532, + "step": 15100 + }, + { + "epoch": 0.07, + "learning_rate": 4.9717723899231776e-05, + "loss": 2.0569, + "step": 15200 + }, + { + "epoch": 0.07, + "learning_rate": 4.9712295512678544e-05, + "loss": 2.0622, + "step": 15300 + }, + { + "epoch": 0.07, + "learning_rate": 4.9706867126125306e-05, + "loss": 2.0448, + "step": 15400 + }, + { + "epoch": 0.07, + "learning_rate": 4.970143873957207e-05, + "loss": 2.0594, + "step": 15500 + }, + { + "epoch": 0.07, + "learning_rate": 4.969601035301884e-05, + "loss": 2.0486, + "step": 15600 + }, + { + "epoch": 0.07, + "learning_rate": 4.96905819664656e-05, + "loss": 2.0488, + "step": 15700 + }, + { + "epoch": 0.07, + "learning_rate": 4.968515357991237e-05, + "loss": 2.03, + "step": 15800 + }, + { + "epoch": 0.07, + "learning_rate": 4.9679725193359136e-05, + "loss": 2.0228, + "step": 15900 + }, + { + "epoch": 0.07, + "learning_rate": 4.96742968068059e-05, + "loss": 2.0109, + "step": 16000 + }, + { + "epoch": 0.07, + "learning_rate": 4.966886842025266e-05, + "loss": 2.0503, + "step": 16100 + }, + { + "epoch": 0.07, + "learning_rate": 4.966344003369943e-05, + "loss": 2.0112, + "step": 16200 + }, + { + "epoch": 0.07, + "learning_rate": 4.965801164714619e-05, + "loss": 2.0324, + "step": 16300 + }, + { + "epoch": 0.07, + "learning_rate": 4.965258326059295e-05, + "loss": 2.0737, + "step": 16400 + }, + { + "epoch": 0.07, + "learning_rate": 4.964715487403972e-05, + "loss": 2.0184, + "step": 16500 + }, + { + "epoch": 0.07, + "learning_rate": 4.964172648748648e-05, + "loss": 2.0513, + "step": 16600 + }, + { + "epoch": 0.07, + "learning_rate": 4.963629810093325e-05, + "loss": 2.0329, + "step": 16700 + }, + { + "epoch": 0.07, + "learning_rate": 4.963086971438001e-05, + "loss": 2.0303, + "step": 16800 + }, + { + "epoch": 0.07, + "learning_rate": 4.962544132782678e-05, + "loss": 2.0042, + "step": 16900 + }, + { + "epoch": 0.07, + "learning_rate": 4.9620012941273544e-05, + "loss": 2.0358, + "step": 17000 + }, + { + "epoch": 0.07, + "learning_rate": 4.961458455472031e-05, + "loss": 2.041, + "step": 17100 + }, + { + "epoch": 0.07, + "learning_rate": 4.9609156168167074e-05, + "loss": 2.0349, + "step": 17200 + }, + { + "epoch": 0.07, + "learning_rate": 4.960372778161384e-05, + "loss": 2.0428, + "step": 17300 + }, + { + "epoch": 0.07, + "learning_rate": 4.9598299395060605e-05, + "loss": 2.0209, + "step": 17400 + }, + { + "epoch": 0.08, + "learning_rate": 4.959287100850737e-05, + "loss": 2.0384, + "step": 17500 + }, + { + "epoch": 0.08, + "learning_rate": 4.9587442621954135e-05, + "loss": 2.0594, + "step": 17600 + }, + { + "epoch": 0.08, + "learning_rate": 4.95820142354009e-05, + "loss": 2.0087, + "step": 17700 + }, + { + "epoch": 0.08, + "learning_rate": 4.957658584884766e-05, + "loss": 2.0458, + "step": 17800 + }, + { + "epoch": 0.08, + "learning_rate": 4.9571157462294434e-05, + "loss": 2.0377, + "step": 17900 + }, + { + "epoch": 0.08, + "learning_rate": 4.9565729075741196e-05, + "loss": 2.0111, + "step": 18000 + }, + { + "epoch": 0.08, + "learning_rate": 4.956030068918796e-05, + "loss": 2.0155, + "step": 18100 + }, + { + "epoch": 0.08, + "learning_rate": 4.955487230263473e-05, + "loss": 2.0355, + "step": 18200 + }, + { + "epoch": 0.08, + "learning_rate": 4.954944391608149e-05, + "loss": 2.0127, + "step": 18300 + }, + { + "epoch": 0.08, + "learning_rate": 4.954401552952825e-05, + "loss": 1.9876, + "step": 18400 + }, + { + "epoch": 0.08, + "learning_rate": 4.953858714297502e-05, + "loss": 2.0298, + "step": 18500 + }, + { + "epoch": 0.08, + "learning_rate": 4.953315875642178e-05, + "loss": 2.0306, + "step": 18600 + }, + { + "epoch": 0.08, + "learning_rate": 4.952773036986855e-05, + "loss": 2.0032, + "step": 18700 + }, + { + "epoch": 0.08, + "learning_rate": 4.952230198331531e-05, + "loss": 1.9919, + "step": 18800 + }, + { + "epoch": 0.08, + "learning_rate": 4.9516873596762074e-05, + "loss": 1.9956, + "step": 18900 + }, + { + "epoch": 0.08, + "learning_rate": 4.951144521020884e-05, + "loss": 1.9778, + "step": 19000 + }, + { + "epoch": 0.08, + "learning_rate": 4.950601682365561e-05, + "loss": 1.9788, + "step": 19100 + }, + { + "epoch": 0.08, + "learning_rate": 4.950058843710237e-05, + "loss": 2.003, + "step": 19200 + }, + { + "epoch": 0.08, + "learning_rate": 4.949516005054914e-05, + "loss": 2.0157, + "step": 19300 + }, + { + "epoch": 0.08, + "learning_rate": 4.94897316639959e-05, + "loss": 2.0052, + "step": 19400 + }, + { + "epoch": 0.08, + "learning_rate": 4.9484303277442665e-05, + "loss": 2.0187, + "step": 19500 + }, + { + "epoch": 0.08, + "learning_rate": 4.9478874890889434e-05, + "loss": 1.9949, + "step": 19600 + }, + { + "epoch": 0.08, + "learning_rate": 4.9473446504336196e-05, + "loss": 1.9735, + "step": 19700 + }, + { + "epoch": 0.09, + "learning_rate": 4.946801811778296e-05, + "loss": 1.9628, + "step": 19800 + }, + { + "epoch": 0.09, + "learning_rate": 4.9462589731229726e-05, + "loss": 1.9955, + "step": 19900 + }, + { + "epoch": 0.09, + "learning_rate": 4.9457161344676495e-05, + "loss": 1.9873, + "step": 20000 + }, + { + "epoch": 0.09, + "eval_loss": 1.6870460510253906, + "eval_runtime": 18.8219, + "eval_samples_per_second": 531.295, + "eval_steps_per_second": 16.63, + "step": 20000 + }, + { + "epoch": 0.09, + "learning_rate": 4.945173295812326e-05, + "loss": 1.9887, + "step": 20100 + }, + { + "epoch": 0.09, + "learning_rate": 4.9446304571570025e-05, + "loss": 2.0004, + "step": 20200 + }, + { + "epoch": 0.09, + "learning_rate": 4.944087618501679e-05, + "loss": 1.9967, + "step": 20300 + }, + { + "epoch": 0.09, + "learning_rate": 4.943544779846355e-05, + "loss": 2.0021, + "step": 20400 + }, + { + "epoch": 0.09, + "learning_rate": 4.943001941191032e-05, + "loss": 1.9617, + "step": 20500 + }, + { + "epoch": 0.09, + "learning_rate": 4.942459102535708e-05, + "loss": 1.9755, + "step": 20600 + }, + { + "epoch": 0.09, + "learning_rate": 4.941916263880385e-05, + "loss": 2.0077, + "step": 20700 + }, + { + "epoch": 0.09, + "learning_rate": 4.941373425225061e-05, + "loss": 2.0046, + "step": 20800 + }, + { + "epoch": 0.09, + "learning_rate": 4.940830586569737e-05, + "loss": 1.9379, + "step": 20900 + }, + { + "epoch": 0.09, + "learning_rate": 4.940287747914414e-05, + "loss": 2.0161, + "step": 21000 + }, + { + "epoch": 0.09, + "learning_rate": 4.939744909259091e-05, + "loss": 2.0048, + "step": 21100 + }, + { + "epoch": 0.09, + "learning_rate": 4.939202070603767e-05, + "loss": 1.9474, + "step": 21200 + }, + { + "epoch": 0.09, + "learning_rate": 4.938659231948444e-05, + "loss": 1.9633, + "step": 21300 + }, + { + "epoch": 0.09, + "learning_rate": 4.93811639329312e-05, + "loss": 1.9736, + "step": 21400 + }, + { + "epoch": 0.09, + "learning_rate": 4.9375735546377964e-05, + "loss": 1.9645, + "step": 21500 + }, + { + "epoch": 0.09, + "learning_rate": 4.937030715982473e-05, + "loss": 1.9823, + "step": 21600 + }, + { + "epoch": 0.09, + "learning_rate": 4.9364878773271494e-05, + "loss": 1.9318, + "step": 21700 + }, + { + "epoch": 0.09, + "learning_rate": 4.9359450386718256e-05, + "loss": 1.9283, + "step": 21800 + }, + { + "epoch": 0.09, + "learning_rate": 4.9354022000165025e-05, + "loss": 1.9433, + "step": 21900 + }, + { + "epoch": 0.09, + "learning_rate": 4.9348593613611787e-05, + "loss": 1.9767, + "step": 22000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9343165227058555e-05, + "loss": 1.9709, + "step": 22100 + }, + { + "epoch": 0.1, + "learning_rate": 4.9337736840505324e-05, + "loss": 1.9541, + "step": 22200 + }, + { + "epoch": 0.1, + "learning_rate": 4.9332308453952086e-05, + "loss": 1.9767, + "step": 22300 + }, + { + "epoch": 0.1, + "learning_rate": 4.932688006739885e-05, + "loss": 1.959, + "step": 22400 + }, + { + "epoch": 0.1, + "learning_rate": 4.9321451680845616e-05, + "loss": 1.9543, + "step": 22500 + }, + { + "epoch": 0.1, + "learning_rate": 4.931602329429238e-05, + "loss": 1.9563, + "step": 22600 + }, + { + "epoch": 0.1, + "learning_rate": 4.931059490773915e-05, + "loss": 1.94, + "step": 22700 + }, + { + "epoch": 0.1, + "learning_rate": 4.930516652118591e-05, + "loss": 1.9529, + "step": 22800 + }, + { + "epoch": 0.1, + "learning_rate": 4.929973813463267e-05, + "loss": 1.9763, + "step": 22900 + }, + { + "epoch": 0.1, + "learning_rate": 4.929430974807944e-05, + "loss": 2.0103, + "step": 23000 + }, + { + "epoch": 0.1, + "learning_rate": 4.928888136152621e-05, + "loss": 1.9782, + "step": 23100 + }, + { + "epoch": 0.1, + "learning_rate": 4.928345297497297e-05, + "loss": 1.9247, + "step": 23200 + }, + { + "epoch": 0.1, + "learning_rate": 4.927802458841974e-05, + "loss": 1.9302, + "step": 23300 + }, + { + "epoch": 0.1, + "learning_rate": 4.92725962018665e-05, + "loss": 1.9572, + "step": 23400 + }, + { + "epoch": 0.1, + "learning_rate": 4.926716781531326e-05, + "loss": 1.9389, + "step": 23500 + }, + { + "epoch": 0.1, + "learning_rate": 4.926173942876003e-05, + "loss": 1.9863, + "step": 23600 + }, + { + "epoch": 0.1, + "learning_rate": 4.925631104220679e-05, + "loss": 1.9723, + "step": 23700 + }, + { + "epoch": 0.1, + "learning_rate": 4.9250882655653555e-05, + "loss": 1.9609, + "step": 23800 + }, + { + "epoch": 0.1, + "learning_rate": 4.924545426910032e-05, + "loss": 1.9826, + "step": 23900 + }, + { + "epoch": 0.1, + "learning_rate": 4.9240025882547085e-05, + "loss": 1.9683, + "step": 24000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9234597495993854e-05, + "loss": 1.9441, + "step": 24100 + }, + { + "epoch": 0.1, + "learning_rate": 4.922916910944062e-05, + "loss": 1.888, + "step": 24200 + }, + { + "epoch": 0.1, + "learning_rate": 4.9223740722887384e-05, + "loss": 1.9307, + "step": 24300 + }, + { + "epoch": 0.1, + "learning_rate": 4.9218312336334146e-05, + "loss": 1.9494, + "step": 24400 + }, + { + "epoch": 0.11, + "learning_rate": 4.9212883949780915e-05, + "loss": 1.9249, + "step": 24500 + }, + { + "epoch": 0.11, + "learning_rate": 4.920745556322768e-05, + "loss": 1.936, + "step": 24600 + }, + { + "epoch": 0.11, + "learning_rate": 4.9202027176674445e-05, + "loss": 1.9151, + "step": 24700 + }, + { + "epoch": 0.11, + "learning_rate": 4.919659879012121e-05, + "loss": 1.9327, + "step": 24800 + }, + { + "epoch": 0.11, + "learning_rate": 4.919117040356797e-05, + "loss": 1.9377, + "step": 24900 + }, + { + "epoch": 0.11, + "learning_rate": 4.918574201701474e-05, + "loss": 1.9319, + "step": 25000 + }, + { + "epoch": 0.11, + "eval_loss": 1.659123420715332, + "eval_runtime": 18.8272, + "eval_samples_per_second": 531.147, + "eval_steps_per_second": 16.625, + "step": 25000 + }, + { + "epoch": 0.11, + "learning_rate": 4.91803136304615e-05, + "loss": 1.9475, + "step": 25100 + }, + { + "epoch": 0.11, + "learning_rate": 4.917488524390827e-05, + "loss": 1.9135, + "step": 25200 + }, + { + "epoch": 0.11, + "learning_rate": 4.916945685735504e-05, + "loss": 1.9213, + "step": 25300 + }, + { + "epoch": 0.11, + "learning_rate": 4.91640284708018e-05, + "loss": 1.9438, + "step": 25400 + }, + { + "epoch": 0.11, + "learning_rate": 4.915860008424856e-05, + "loss": 1.9821, + "step": 25500 + }, + { + "epoch": 0.11, + "learning_rate": 4.915317169769533e-05, + "loss": 1.9149, + "step": 25600 + }, + { + "epoch": 0.11, + "learning_rate": 4.914774331114209e-05, + "loss": 1.9356, + "step": 25700 + }, + { + "epoch": 0.11, + "learning_rate": 4.914231492458885e-05, + "loss": 1.9192, + "step": 25800 + }, + { + "epoch": 0.11, + "learning_rate": 4.913688653803562e-05, + "loss": 1.9404, + "step": 25900 + }, + { + "epoch": 0.11, + "learning_rate": 4.9131458151482384e-05, + "loss": 1.9447, + "step": 26000 + }, + { + "epoch": 0.11, + "learning_rate": 4.912602976492915e-05, + "loss": 1.9286, + "step": 26100 + }, + { + "epoch": 0.11, + "learning_rate": 4.9120601378375914e-05, + "loss": 1.9304, + "step": 26200 + }, + { + "epoch": 0.11, + "learning_rate": 4.911517299182268e-05, + "loss": 1.9031, + "step": 26300 + }, + { + "epoch": 0.11, + "learning_rate": 4.9109744605269445e-05, + "loss": 1.9244, + "step": 26400 + }, + { + "epoch": 0.11, + "learning_rate": 4.910431621871621e-05, + "loss": 1.9265, + "step": 26500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9098887832162975e-05, + "loss": 1.9259, + "step": 26600 + }, + { + "epoch": 0.11, + "learning_rate": 4.9093459445609744e-05, + "loss": 1.9633, + "step": 26700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9088031059056506e-05, + "loss": 1.949, + "step": 26800 + }, + { + "epoch": 0.12, + "learning_rate": 4.908260267250327e-05, + "loss": 1.9403, + "step": 26900 + }, + { + "epoch": 0.12, + "learning_rate": 4.9077174285950036e-05, + "loss": 1.9391, + "step": 27000 + }, + { + "epoch": 0.12, + "learning_rate": 4.90717458993968e-05, + "loss": 1.9692, + "step": 27100 + }, + { + "epoch": 0.12, + "learning_rate": 4.906631751284356e-05, + "loss": 1.9442, + "step": 27200 + }, + { + "epoch": 0.12, + "learning_rate": 4.9060889126290335e-05, + "loss": 1.9369, + "step": 27300 + }, + { + "epoch": 0.12, + "learning_rate": 4.90554607397371e-05, + "loss": 1.9239, + "step": 27400 + }, + { + "epoch": 0.12, + "learning_rate": 4.905003235318386e-05, + "loss": 1.9146, + "step": 27500 + }, + { + "epoch": 0.12, + "learning_rate": 4.904460396663063e-05, + "loss": 1.9086, + "step": 27600 + }, + { + "epoch": 0.12, + "learning_rate": 4.903917558007739e-05, + "loss": 1.9168, + "step": 27700 + }, + { + "epoch": 0.12, + "learning_rate": 4.903374719352415e-05, + "loss": 1.9262, + "step": 27800 + }, + { + "epoch": 0.12, + "learning_rate": 4.902831880697092e-05, + "loss": 1.9099, + "step": 27900 + }, + { + "epoch": 0.12, + "learning_rate": 4.902289042041768e-05, + "loss": 1.9157, + "step": 28000 + }, + { + "epoch": 0.12, + "learning_rate": 4.901746203386445e-05, + "loss": 1.9182, + "step": 28100 + }, + { + "epoch": 0.12, + "learning_rate": 4.901203364731121e-05, + "loss": 1.8918, + "step": 28200 + }, + { + "epoch": 0.12, + "learning_rate": 4.9006605260757974e-05, + "loss": 1.9396, + "step": 28300 + }, + { + "epoch": 0.12, + "learning_rate": 4.900117687420474e-05, + "loss": 1.898, + "step": 28400 + }, + { + "epoch": 0.12, + "learning_rate": 4.899574848765151e-05, + "loss": 1.9377, + "step": 28500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8990320101098274e-05, + "loss": 1.9022, + "step": 28600 + }, + { + "epoch": 0.12, + "learning_rate": 4.898489171454504e-05, + "loss": 1.9118, + "step": 28700 + }, + { + "epoch": 0.12, + "learning_rate": 4.8979463327991804e-05, + "loss": 1.9007, + "step": 28800 + }, + { + "epoch": 0.12, + "learning_rate": 4.8974034941438566e-05, + "loss": 1.9042, + "step": 28900 + }, + { + "epoch": 0.12, + "learning_rate": 4.8968606554885335e-05, + "loss": 1.8985, + "step": 29000 + }, + { + "epoch": 0.13, + "learning_rate": 4.8963178168332097e-05, + "loss": 1.935, + "step": 29100 + }, + { + "epoch": 0.13, + "learning_rate": 4.895774978177886e-05, + "loss": 1.8826, + "step": 29200 + }, + { + "epoch": 0.13, + "learning_rate": 4.895232139522563e-05, + "loss": 1.9218, + "step": 29300 + }, + { + "epoch": 0.13, + "learning_rate": 4.8946893008672396e-05, + "loss": 1.8877, + "step": 29400 + }, + { + "epoch": 0.13, + "learning_rate": 4.894146462211916e-05, + "loss": 1.8943, + "step": 29500 + }, + { + "epoch": 0.13, + "learning_rate": 4.8936036235565926e-05, + "loss": 1.9168, + "step": 29600 + }, + { + "epoch": 0.13, + "learning_rate": 4.893060784901269e-05, + "loss": 1.8982, + "step": 29700 + }, + { + "epoch": 0.13, + "learning_rate": 4.892517946245945e-05, + "loss": 1.9107, + "step": 29800 + }, + { + "epoch": 0.13, + "learning_rate": 4.891975107590622e-05, + "loss": 1.9192, + "step": 29900 + }, + { + "epoch": 0.13, + "learning_rate": 4.891432268935298e-05, + "loss": 1.8701, + "step": 30000 + }, + { + "epoch": 0.13, + "eval_loss": 1.6432205438613892, + "eval_runtime": 18.7697, + "eval_samples_per_second": 532.772, + "eval_steps_per_second": 16.676, + "step": 30000 + }, + { + "epoch": 0.13, + "learning_rate": 4.890889430279975e-05, + "loss": 1.8962, + "step": 30100 + }, + { + "epoch": 0.13, + "learning_rate": 4.890346591624651e-05, + "loss": 1.9178, + "step": 30200 + }, + { + "epoch": 0.13, + "learning_rate": 4.889803752969327e-05, + "loss": 1.9007, + "step": 30300 + }, + { + "epoch": 0.13, + "learning_rate": 4.889260914314004e-05, + "loss": 1.9306, + "step": 30400 + }, + { + "epoch": 0.13, + "learning_rate": 4.888718075658681e-05, + "loss": 1.8875, + "step": 30500 + }, + { + "epoch": 0.13, + "learning_rate": 4.888175237003357e-05, + "loss": 1.9071, + "step": 30600 + }, + { + "epoch": 0.13, + "learning_rate": 4.887632398348034e-05, + "loss": 1.9057, + "step": 30700 + }, + { + "epoch": 0.13, + "learning_rate": 4.88708955969271e-05, + "loss": 1.9082, + "step": 30800 + }, + { + "epoch": 0.13, + "learning_rate": 4.8865467210373864e-05, + "loss": 1.9019, + "step": 30900 + }, + { + "epoch": 0.13, + "learning_rate": 4.886003882382063e-05, + "loss": 1.9186, + "step": 31000 + }, + { + "epoch": 0.13, + "learning_rate": 4.8854610437267395e-05, + "loss": 1.9171, + "step": 31100 + }, + { + "epoch": 0.13, + "learning_rate": 4.884918205071416e-05, + "loss": 1.9104, + "step": 31200 + }, + { + "epoch": 0.13, + "learning_rate": 4.8843753664160926e-05, + "loss": 1.8954, + "step": 31300 + }, + { + "epoch": 0.13, + "learning_rate": 4.883832527760769e-05, + "loss": 1.9037, + "step": 31400 + }, + { + "epoch": 0.14, + "learning_rate": 4.8832896891054456e-05, + "loss": 1.8636, + "step": 31500 + }, + { + "epoch": 0.14, + "learning_rate": 4.8827468504501225e-05, + "loss": 1.8734, + "step": 31600 + }, + { + "epoch": 0.14, + "learning_rate": 4.8822040117947987e-05, + "loss": 1.8882, + "step": 31700 + }, + { + "epoch": 0.14, + "learning_rate": 4.881661173139475e-05, + "loss": 1.8936, + "step": 31800 + }, + { + "epoch": 0.14, + "learning_rate": 4.881118334484152e-05, + "loss": 1.8927, + "step": 31900 + }, + { + "epoch": 0.14, + "learning_rate": 4.880575495828828e-05, + "loss": 1.8856, + "step": 32000 + }, + { + "epoch": 0.14, + "learning_rate": 4.880032657173505e-05, + "loss": 1.8895, + "step": 32100 + }, + { + "epoch": 0.14, + "learning_rate": 4.879489818518181e-05, + "loss": 1.855, + "step": 32200 + }, + { + "epoch": 0.14, + "learning_rate": 4.878946979862857e-05, + "loss": 1.9009, + "step": 32300 + }, + { + "epoch": 0.14, + "learning_rate": 4.878404141207534e-05, + "loss": 1.8999, + "step": 32400 + }, + { + "epoch": 0.14, + "learning_rate": 4.87786130255221e-05, + "loss": 1.9017, + "step": 32500 + }, + { + "epoch": 0.14, + "learning_rate": 4.877318463896887e-05, + "loss": 1.9007, + "step": 32600 + }, + { + "epoch": 0.14, + "learning_rate": 4.876775625241564e-05, + "loss": 1.8886, + "step": 32700 + }, + { + "epoch": 0.14, + "learning_rate": 4.87623278658624e-05, + "loss": 1.8951, + "step": 32800 + }, + { + "epoch": 0.14, + "learning_rate": 4.875689947930916e-05, + "loss": 1.9086, + "step": 32900 + }, + { + "epoch": 0.14, + "learning_rate": 4.875147109275593e-05, + "loss": 1.8772, + "step": 33000 + }, + { + "epoch": 0.14, + "learning_rate": 4.8746042706202693e-05, + "loss": 1.849, + "step": 33100 + }, + { + "epoch": 0.14, + "learning_rate": 4.8740614319649455e-05, + "loss": 1.9026, + "step": 33200 + }, + { + "epoch": 0.14, + "learning_rate": 4.8735185933096224e-05, + "loss": 1.8813, + "step": 33300 + }, + { + "epoch": 0.14, + "learning_rate": 4.8729757546542986e-05, + "loss": 1.8758, + "step": 33400 + }, + { + "epoch": 0.14, + "learning_rate": 4.872432915998975e-05, + "loss": 1.8896, + "step": 33500 + }, + { + "epoch": 0.14, + "learning_rate": 4.871890077343652e-05, + "loss": 1.8888, + "step": 33600 + }, + { + "epoch": 0.14, + "learning_rate": 4.8713472386883285e-05, + "loss": 1.8835, + "step": 33700 + }, + { + "epoch": 0.15, + "learning_rate": 4.870804400033005e-05, + "loss": 1.8602, + "step": 33800 + }, + { + "epoch": 0.15, + "learning_rate": 4.8702615613776816e-05, + "loss": 1.9211, + "step": 33900 + }, + { + "epoch": 0.15, + "learning_rate": 4.869718722722358e-05, + "loss": 1.8607, + "step": 34000 + }, + { + "epoch": 0.15, + "learning_rate": 4.8691758840670346e-05, + "loss": 1.9135, + "step": 34100 + }, + { + "epoch": 0.15, + "learning_rate": 4.868633045411711e-05, + "loss": 1.8718, + "step": 34200 + }, + { + "epoch": 0.15, + "learning_rate": 4.868090206756387e-05, + "loss": 1.8732, + "step": 34300 + }, + { + "epoch": 0.15, + "learning_rate": 4.867547368101064e-05, + "loss": 1.8962, + "step": 34400 + }, + { + "epoch": 0.15, + "learning_rate": 4.86700452944574e-05, + "loss": 1.8708, + "step": 34500 + }, + { + "epoch": 0.15, + "learning_rate": 4.866461690790416e-05, + "loss": 1.8837, + "step": 34600 + }, + { + "epoch": 0.15, + "learning_rate": 4.865918852135094e-05, + "loss": 1.8801, + "step": 34700 + }, + { + "epoch": 0.15, + "learning_rate": 4.86537601347977e-05, + "loss": 1.8832, + "step": 34800 + }, + { + "epoch": 0.15, + "learning_rate": 4.864833174824446e-05, + "loss": 1.8599, + "step": 34900 + }, + { + "epoch": 0.15, + "learning_rate": 4.864290336169123e-05, + "loss": 1.878, + "step": 35000 + }, + { + "epoch": 0.15, + "eval_loss": 1.6231168508529663, + "eval_runtime": 18.8376, + "eval_samples_per_second": 530.853, + "eval_steps_per_second": 16.616, + "step": 35000 + }, + { + "epoch": 0.15, + "learning_rate": 4.863747497513799e-05, + "loss": 1.8869, + "step": 35100 + }, + { + "epoch": 0.15, + "learning_rate": 4.8632046588584754e-05, + "loss": 1.8941, + "step": 35200 + }, + { + "epoch": 0.15, + "learning_rate": 4.862661820203152e-05, + "loss": 1.8673, + "step": 35300 + }, + { + "epoch": 0.15, + "learning_rate": 4.8621189815478284e-05, + "loss": 1.8841, + "step": 35400 + }, + { + "epoch": 0.15, + "learning_rate": 4.8615761428925046e-05, + "loss": 1.8753, + "step": 35500 + }, + { + "epoch": 0.15, + "learning_rate": 4.8610333042371815e-05, + "loss": 1.8634, + "step": 35600 + }, + { + "epoch": 0.15, + "learning_rate": 4.8604904655818584e-05, + "loss": 1.8727, + "step": 35700 + }, + { + "epoch": 0.15, + "learning_rate": 4.8599476269265345e-05, + "loss": 1.858, + "step": 35800 + }, + { + "epoch": 0.15, + "learning_rate": 4.8594047882712114e-05, + "loss": 1.9326, + "step": 35900 + }, + { + "epoch": 0.15, + "learning_rate": 4.8588619496158876e-05, + "loss": 1.8632, + "step": 36000 + }, + { + "epoch": 0.16, + "learning_rate": 4.8583191109605645e-05, + "loss": 1.8698, + "step": 36100 + }, + { + "epoch": 0.16, + "learning_rate": 4.8577762723052406e-05, + "loss": 1.8401, + "step": 36200 + }, + { + "epoch": 0.16, + "learning_rate": 4.857233433649917e-05, + "loss": 1.8499, + "step": 36300 + }, + { + "epoch": 0.16, + "learning_rate": 4.856690594994594e-05, + "loss": 1.8542, + "step": 36400 + }, + { + "epoch": 0.16, + "learning_rate": 4.85614775633927e-05, + "loss": 1.8607, + "step": 36500 + }, + { + "epoch": 0.16, + "learning_rate": 4.855604917683946e-05, + "loss": 1.8488, + "step": 36600 + }, + { + "epoch": 0.16, + "learning_rate": 4.855062079028623e-05, + "loss": 1.8528, + "step": 36700 + }, + { + "epoch": 0.16, + "learning_rate": 4.8545192403733e-05, + "loss": 1.8522, + "step": 36800 + }, + { + "epoch": 0.16, + "learning_rate": 4.853976401717976e-05, + "loss": 1.8263, + "step": 36900 + }, + { + "epoch": 0.16, + "learning_rate": 4.853433563062653e-05, + "loss": 1.8421, + "step": 37000 + }, + { + "epoch": 0.16, + "learning_rate": 4.852890724407329e-05, + "loss": 1.8677, + "step": 37100 + }, + { + "epoch": 0.16, + "learning_rate": 4.852347885752005e-05, + "loss": 1.8743, + "step": 37200 + }, + { + "epoch": 0.16, + "learning_rate": 4.851805047096682e-05, + "loss": 1.8847, + "step": 37300 + }, + { + "epoch": 0.16, + "learning_rate": 4.851262208441358e-05, + "loss": 1.8881, + "step": 37400 + }, + { + "epoch": 0.16, + "learning_rate": 4.8507193697860345e-05, + "loss": 1.8622, + "step": 37500 + }, + { + "epoch": 0.16, + "learning_rate": 4.8501765311307113e-05, + "loss": 1.8695, + "step": 37600 + }, + { + "epoch": 0.16, + "learning_rate": 4.8496336924753875e-05, + "loss": 1.8583, + "step": 37700 + }, + { + "epoch": 0.16, + "learning_rate": 4.8490908538200644e-05, + "loss": 1.8674, + "step": 37800 + }, + { + "epoch": 0.16, + "learning_rate": 4.848548015164741e-05, + "loss": 1.8767, + "step": 37900 + }, + { + "epoch": 0.16, + "learning_rate": 4.8480051765094174e-05, + "loss": 1.8579, + "step": 38000 + }, + { + "epoch": 0.16, + "learning_rate": 4.847462337854094e-05, + "loss": 1.8644, + "step": 38100 + }, + { + "epoch": 0.16, + "learning_rate": 4.8469194991987705e-05, + "loss": 1.8259, + "step": 38200 + }, + { + "epoch": 0.16, + "learning_rate": 4.846376660543447e-05, + "loss": 1.8685, + "step": 38300 + }, + { + "epoch": 0.16, + "learning_rate": 4.8458338218881236e-05, + "loss": 1.8588, + "step": 38400 + }, + { + "epoch": 0.17, + "learning_rate": 4.8452909832328e-05, + "loss": 1.8768, + "step": 38500 + }, + { + "epoch": 0.17, + "learning_rate": 4.844748144577476e-05, + "loss": 1.8565, + "step": 38600 + }, + { + "epoch": 0.17, + "learning_rate": 4.844205305922153e-05, + "loss": 1.8883, + "step": 38700 + }, + { + "epoch": 0.17, + "learning_rate": 4.843662467266829e-05, + "loss": 1.8884, + "step": 38800 + }, + { + "epoch": 0.17, + "learning_rate": 4.843119628611506e-05, + "loss": 1.8314, + "step": 38900 + }, + { + "epoch": 0.17, + "learning_rate": 4.842576789956183e-05, + "loss": 1.8166, + "step": 39000 + }, + { + "epoch": 0.17, + "learning_rate": 4.842033951300859e-05, + "loss": 1.8752, + "step": 39100 + }, + { + "epoch": 0.17, + "learning_rate": 4.841491112645535e-05, + "loss": 1.8429, + "step": 39200 + }, + { + "epoch": 0.17, + "learning_rate": 4.840948273990212e-05, + "loss": 1.841, + "step": 39300 + }, + { + "epoch": 0.17, + "learning_rate": 4.840405435334888e-05, + "loss": 1.8476, + "step": 39400 + }, + { + "epoch": 0.17, + "learning_rate": 4.839862596679564e-05, + "loss": 1.8538, + "step": 39500 + }, + { + "epoch": 0.17, + "learning_rate": 4.839319758024241e-05, + "loss": 1.8662, + "step": 39600 + }, + { + "epoch": 0.17, + "learning_rate": 4.8387769193689174e-05, + "loss": 1.8432, + "step": 39700 + }, + { + "epoch": 0.17, + "learning_rate": 4.838234080713594e-05, + "loss": 1.8317, + "step": 39800 + }, + { + "epoch": 0.17, + "learning_rate": 4.837691242058271e-05, + "loss": 1.8308, + "step": 39900 + }, + { + "epoch": 0.17, + "learning_rate": 4.837148403402947e-05, + "loss": 1.8797, + "step": 40000 + }, + { + "epoch": 0.17, + "eval_loss": 1.6136231422424316, + "eval_runtime": 18.8524, + "eval_samples_per_second": 530.437, + "eval_steps_per_second": 16.603, + "step": 40000 + }, + { + "epoch": 0.17, + "learning_rate": 4.836605564747624e-05, + "loss": 1.857, + "step": 40100 + }, + { + "epoch": 0.17, + "learning_rate": 4.8360627260923003e-05, + "loss": 1.8894, + "step": 40200 + }, + { + "epoch": 0.17, + "learning_rate": 4.8355198874369765e-05, + "loss": 1.881, + "step": 40300 + }, + { + "epoch": 0.17, + "learning_rate": 4.8349770487816534e-05, + "loss": 1.8252, + "step": 40400 + }, + { + "epoch": 0.17, + "learning_rate": 4.8344342101263296e-05, + "loss": 1.8565, + "step": 40500 + }, + { + "epoch": 0.17, + "learning_rate": 4.833891371471006e-05, + "loss": 1.8664, + "step": 40600 + }, + { + "epoch": 0.17, + "learning_rate": 4.8333485328156826e-05, + "loss": 1.8833, + "step": 40700 + }, + { + "epoch": 0.18, + "learning_rate": 4.832805694160359e-05, + "loss": 1.8351, + "step": 40800 + }, + { + "epoch": 0.18, + "learning_rate": 4.832262855505036e-05, + "loss": 1.8889, + "step": 40900 + }, + { + "epoch": 0.18, + "learning_rate": 4.8317200168497126e-05, + "loss": 1.8882, + "step": 41000 + }, + { + "epoch": 0.18, + "learning_rate": 4.831177178194389e-05, + "loss": 1.8759, + "step": 41100 + }, + { + "epoch": 0.18, + "learning_rate": 4.830634339539065e-05, + "loss": 1.8267, + "step": 41200 + }, + { + "epoch": 0.18, + "learning_rate": 4.830091500883742e-05, + "loss": 1.8654, + "step": 41300 + }, + { + "epoch": 0.18, + "learning_rate": 4.829548662228418e-05, + "loss": 1.8388, + "step": 41400 + }, + { + "epoch": 0.18, + "learning_rate": 4.829005823573094e-05, + "loss": 1.8393, + "step": 41500 + }, + { + "epoch": 0.18, + "learning_rate": 4.828462984917771e-05, + "loss": 1.8669, + "step": 41600 + }, + { + "epoch": 0.18, + "learning_rate": 4.827920146262447e-05, + "loss": 1.8393, + "step": 41700 + }, + { + "epoch": 0.18, + "learning_rate": 4.827377307607124e-05, + "loss": 1.8635, + "step": 41800 + }, + { + "epoch": 0.18, + "learning_rate": 4.8268344689518e-05, + "loss": 1.8317, + "step": 41900 + }, + { + "epoch": 0.18, + "learning_rate": 4.826291630296477e-05, + "loss": 1.8476, + "step": 42000 + }, + { + "epoch": 0.18, + "learning_rate": 4.825748791641154e-05, + "loss": 1.8106, + "step": 42100 + }, + { + "epoch": 0.18, + "learning_rate": 4.82520595298583e-05, + "loss": 1.8654, + "step": 42200 + }, + { + "epoch": 0.18, + "learning_rate": 4.8246631143305064e-05, + "loss": 1.8358, + "step": 42300 + }, + { + "epoch": 0.18, + "learning_rate": 4.824120275675183e-05, + "loss": 1.8609, + "step": 42400 + }, + { + "epoch": 0.18, + "learning_rate": 4.8235774370198594e-05, + "loss": 1.8331, + "step": 42500 + }, + { + "epoch": 0.18, + "learning_rate": 4.8230345983645356e-05, + "loss": 1.8536, + "step": 42600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8224917597092125e-05, + "loss": 1.8018, + "step": 42700 + }, + { + "epoch": 0.18, + "learning_rate": 4.821948921053889e-05, + "loss": 1.8408, + "step": 42800 + }, + { + "epoch": 0.18, + "learning_rate": 4.821406082398565e-05, + "loss": 1.803, + "step": 42900 + }, + { + "epoch": 0.18, + "learning_rate": 4.8208632437432424e-05, + "loss": 1.8361, + "step": 43000 + }, + { + "epoch": 0.19, + "learning_rate": 4.8203204050879186e-05, + "loss": 1.8156, + "step": 43100 + }, + { + "epoch": 0.19, + "learning_rate": 4.819777566432595e-05, + "loss": 1.8568, + "step": 43200 + }, + { + "epoch": 0.19, + "learning_rate": 4.8192347277772716e-05, + "loss": 1.8422, + "step": 43300 + }, + { + "epoch": 0.19, + "learning_rate": 4.818691889121948e-05, + "loss": 1.8498, + "step": 43400 + }, + { + "epoch": 0.19, + "learning_rate": 4.818149050466624e-05, + "loss": 1.8605, + "step": 43500 + }, + { + "epoch": 0.19, + "learning_rate": 4.817606211811301e-05, + "loss": 1.8621, + "step": 43600 + }, + { + "epoch": 0.19, + "learning_rate": 4.817063373155977e-05, + "loss": 1.8613, + "step": 43700 + }, + { + "epoch": 0.19, + "learning_rate": 4.816520534500654e-05, + "loss": 1.839, + "step": 43800 + }, + { + "epoch": 0.19, + "learning_rate": 4.81597769584533e-05, + "loss": 1.8462, + "step": 43900 + }, + { + "epoch": 0.19, + "learning_rate": 4.815434857190006e-05, + "loss": 1.7864, + "step": 44000 + }, + { + "epoch": 0.19, + "learning_rate": 4.814892018534684e-05, + "loss": 1.8452, + "step": 44100 + }, + { + "epoch": 0.19, + "learning_rate": 4.81434917987936e-05, + "loss": 1.8469, + "step": 44200 + }, + { + "epoch": 0.19, + "learning_rate": 4.813806341224036e-05, + "loss": 1.8697, + "step": 44300 + }, + { + "epoch": 0.19, + "learning_rate": 4.813263502568713e-05, + "loss": 1.8287, + "step": 44400 + }, + { + "epoch": 0.19, + "learning_rate": 4.812720663913389e-05, + "loss": 1.8486, + "step": 44500 + }, + { + "epoch": 0.19, + "learning_rate": 4.8121778252580655e-05, + "loss": 1.8107, + "step": 44600 + }, + { + "epoch": 0.19, + "learning_rate": 4.811634986602742e-05, + "loss": 1.8509, + "step": 44700 + }, + { + "epoch": 0.19, + "learning_rate": 4.8110921479474185e-05, + "loss": 1.8531, + "step": 44800 + }, + { + "epoch": 0.19, + "learning_rate": 4.810549309292095e-05, + "loss": 1.8266, + "step": 44900 + }, + { + "epoch": 0.19, + "learning_rate": 4.8100064706367716e-05, + "loss": 1.8125, + "step": 45000 + }, + { + "epoch": 0.19, + "eval_loss": 1.6042814254760742, + "eval_runtime": 18.8207, + "eval_samples_per_second": 531.33, + "eval_steps_per_second": 16.631, + "step": 45000 + }, + { + "epoch": 0.19, + "learning_rate": 4.8094636319814484e-05, + "loss": 1.8248, + "step": 45100 + }, + { + "epoch": 0.19, + "learning_rate": 4.8089207933261246e-05, + "loss": 1.83, + "step": 45200 + }, + { + "epoch": 0.19, + "learning_rate": 4.8083779546708015e-05, + "loss": 1.8339, + "step": 45300 + }, + { + "epoch": 0.2, + "learning_rate": 4.807835116015478e-05, + "loss": 1.8315, + "step": 45400 + }, + { + "epoch": 0.2, + "learning_rate": 4.807292277360154e-05, + "loss": 1.8314, + "step": 45500 + }, + { + "epoch": 0.2, + "learning_rate": 4.806749438704831e-05, + "loss": 1.8101, + "step": 45600 + }, + { + "epoch": 0.2, + "learning_rate": 4.806206600049507e-05, + "loss": 1.822, + "step": 45700 + }, + { + "epoch": 0.2, + "learning_rate": 4.805663761394184e-05, + "loss": 1.8244, + "step": 45800 + }, + { + "epoch": 0.2, + "learning_rate": 4.80512092273886e-05, + "loss": 1.8312, + "step": 45900 + }, + { + "epoch": 0.2, + "learning_rate": 4.804578084083536e-05, + "loss": 1.8351, + "step": 46000 + }, + { + "epoch": 0.2, + "learning_rate": 4.804035245428213e-05, + "loss": 1.8355, + "step": 46100 + }, + { + "epoch": 0.2, + "learning_rate": 4.80349240677289e-05, + "loss": 1.8204, + "step": 46200 + }, + { + "epoch": 0.2, + "learning_rate": 4.802949568117566e-05, + "loss": 1.8721, + "step": 46300 + }, + { + "epoch": 0.2, + "learning_rate": 4.802406729462243e-05, + "loss": 1.8235, + "step": 46400 + }, + { + "epoch": 0.2, + "learning_rate": 4.801863890806919e-05, + "loss": 1.8558, + "step": 46500 + }, + { + "epoch": 0.2, + "learning_rate": 4.801321052151595e-05, + "loss": 1.8399, + "step": 46600 + }, + { + "epoch": 0.2, + "learning_rate": 4.800778213496272e-05, + "loss": 1.8175, + "step": 46700 + }, + { + "epoch": 0.2, + "learning_rate": 4.8002353748409484e-05, + "loss": 1.825, + "step": 46800 + }, + { + "epoch": 0.2, + "learning_rate": 4.7996925361856246e-05, + "loss": 1.8156, + "step": 46900 + }, + { + "epoch": 0.2, + "learning_rate": 4.7991496975303014e-05, + "loss": 1.8063, + "step": 47000 + }, + { + "epoch": 0.2, + "learning_rate": 4.7986068588749776e-05, + "loss": 1.8077, + "step": 47100 + }, + { + "epoch": 0.2, + "learning_rate": 4.7980640202196545e-05, + "loss": 1.8135, + "step": 47200 + }, + { + "epoch": 0.2, + "learning_rate": 4.7975211815643313e-05, + "loss": 1.8466, + "step": 47300 + }, + { + "epoch": 0.2, + "learning_rate": 4.7969783429090075e-05, + "loss": 1.8319, + "step": 47400 + }, + { + "epoch": 0.2, + "learning_rate": 4.796435504253684e-05, + "loss": 1.8166, + "step": 47500 + }, + { + "epoch": 0.2, + "learning_rate": 4.7958926655983606e-05, + "loss": 1.8097, + "step": 47600 + }, + { + "epoch": 0.2, + "learning_rate": 4.795349826943037e-05, + "loss": 1.8231, + "step": 47700 + }, + { + "epoch": 0.21, + "learning_rate": 4.7948069882877136e-05, + "loss": 1.8371, + "step": 47800 + }, + { + "epoch": 0.21, + "learning_rate": 4.79426414963239e-05, + "loss": 1.8519, + "step": 47900 + }, + { + "epoch": 0.21, + "learning_rate": 4.793721310977066e-05, + "loss": 1.8226, + "step": 48000 + }, + { + "epoch": 0.21, + "learning_rate": 4.793178472321743e-05, + "loss": 1.8081, + "step": 48100 + }, + { + "epoch": 0.21, + "learning_rate": 4.792635633666419e-05, + "loss": 1.7755, + "step": 48200 + }, + { + "epoch": 0.21, + "learning_rate": 4.792092795011096e-05, + "loss": 1.8338, + "step": 48300 + }, + { + "epoch": 0.21, + "learning_rate": 4.791549956355773e-05, + "loss": 1.8139, + "step": 48400 + }, + { + "epoch": 0.21, + "learning_rate": 4.791007117700449e-05, + "loss": 1.8214, + "step": 48500 + }, + { + "epoch": 0.21, + "learning_rate": 4.790464279045125e-05, + "loss": 1.8017, + "step": 48600 + }, + { + "epoch": 0.21, + "learning_rate": 4.789921440389802e-05, + "loss": 1.8379, + "step": 48700 + }, + { + "epoch": 0.21, + "learning_rate": 4.789378601734478e-05, + "loss": 1.8305, + "step": 48800 + }, + { + "epoch": 0.21, + "learning_rate": 4.7888357630791544e-05, + "loss": 1.8496, + "step": 48900 + }, + { + "epoch": 0.21, + "learning_rate": 4.788292924423831e-05, + "loss": 1.8071, + "step": 49000 + }, + { + "epoch": 0.21, + "learning_rate": 4.7877500857685075e-05, + "loss": 1.8217, + "step": 49100 + }, + { + "epoch": 0.21, + "learning_rate": 4.787207247113184e-05, + "loss": 1.7922, + "step": 49200 + }, + { + "epoch": 0.21, + "learning_rate": 4.786664408457861e-05, + "loss": 1.8252, + "step": 49300 + }, + { + "epoch": 0.21, + "learning_rate": 4.7861215698025374e-05, + "loss": 1.8218, + "step": 49400 + }, + { + "epoch": 0.21, + "learning_rate": 4.7855787311472136e-05, + "loss": 1.8188, + "step": 49500 + }, + { + "epoch": 0.21, + "learning_rate": 4.7850358924918904e-05, + "loss": 1.8211, + "step": 49600 + }, + { + "epoch": 0.21, + "learning_rate": 4.7844930538365666e-05, + "loss": 1.8032, + "step": 49700 + }, + { + "epoch": 0.21, + "learning_rate": 4.7839502151812435e-05, + "loss": 1.7968, + "step": 49800 + }, + { + "epoch": 0.21, + "learning_rate": 4.78340737652592e-05, + "loss": 1.8099, + "step": 49900 + }, + { + "epoch": 0.21, + "learning_rate": 4.782864537870596e-05, + "loss": 1.8188, + "step": 50000 + }, + { + "epoch": 0.21, + "eval_loss": 1.5851926803588867, + "eval_runtime": 18.9094, + "eval_samples_per_second": 528.838, + "eval_steps_per_second": 16.553, + "step": 50000 + }, + { + "epoch": 0.22, + "learning_rate": 4.782321699215273e-05, + "loss": 1.8253, + "step": 50100 + }, + { + "epoch": 0.22, + "learning_rate": 4.781778860559949e-05, + "loss": 1.7936, + "step": 50200 + }, + { + "epoch": 0.22, + "learning_rate": 4.781236021904625e-05, + "loss": 1.8353, + "step": 50300 + }, + { + "epoch": 0.22, + "learning_rate": 4.7806931832493026e-05, + "loss": 1.7939, + "step": 50400 + }, + { + "epoch": 0.22, + "learning_rate": 4.780150344593979e-05, + "loss": 1.8055, + "step": 50500 + }, + { + "epoch": 0.22, + "learning_rate": 4.779607505938655e-05, + "loss": 1.8032, + "step": 50600 + }, + { + "epoch": 0.22, + "learning_rate": 4.779064667283332e-05, + "loss": 1.8171, + "step": 50700 + }, + { + "epoch": 0.22, + "learning_rate": 4.778521828628008e-05, + "loss": 1.8282, + "step": 50800 + }, + { + "epoch": 0.22, + "learning_rate": 4.777978989972684e-05, + "loss": 1.8412, + "step": 50900 + }, + { + "epoch": 0.22, + "learning_rate": 4.777436151317361e-05, + "loss": 1.8437, + "step": 51000 + }, + { + "epoch": 0.22, + "learning_rate": 4.776893312662037e-05, + "loss": 1.8513, + "step": 51100 + }, + { + "epoch": 0.22, + "learning_rate": 4.776350474006714e-05, + "loss": 1.7993, + "step": 51200 + }, + { + "epoch": 0.22, + "learning_rate": 4.7758076353513904e-05, + "loss": 1.7976, + "step": 51300 + }, + { + "epoch": 0.22, + "learning_rate": 4.775264796696067e-05, + "loss": 1.8163, + "step": 51400 + }, + { + "epoch": 0.22, + "learning_rate": 4.7747219580407434e-05, + "loss": 1.8247, + "step": 51500 + }, + { + "epoch": 0.22, + "learning_rate": 4.77417911938542e-05, + "loss": 1.7752, + "step": 51600 + }, + { + "epoch": 0.22, + "learning_rate": 4.7736362807300965e-05, + "loss": 1.8202, + "step": 51700 + }, + { + "epoch": 0.22, + "learning_rate": 4.773093442074773e-05, + "loss": 1.8208, + "step": 51800 + }, + { + "epoch": 0.22, + "learning_rate": 4.7725506034194495e-05, + "loss": 1.8177, + "step": 51900 + }, + { + "epoch": 0.22, + "learning_rate": 4.772007764764126e-05, + "loss": 1.8148, + "step": 52000 + }, + { + "epoch": 0.22, + "learning_rate": 4.7714649261088026e-05, + "loss": 1.8204, + "step": 52100 + }, + { + "epoch": 0.22, + "learning_rate": 4.770922087453479e-05, + "loss": 1.8226, + "step": 52200 + }, + { + "epoch": 0.22, + "learning_rate": 4.770379248798155e-05, + "loss": 1.8235, + "step": 52300 + }, + { + "epoch": 0.23, + "learning_rate": 4.769836410142832e-05, + "loss": 1.7702, + "step": 52400 + }, + { + "epoch": 0.23, + "learning_rate": 4.769293571487509e-05, + "loss": 1.8013, + "step": 52500 + }, + { + "epoch": 0.23, + "learning_rate": 4.768750732832185e-05, + "loss": 1.801, + "step": 52600 + }, + { + "epoch": 0.23, + "learning_rate": 4.768207894176862e-05, + "loss": 1.8202, + "step": 52700 + }, + { + "epoch": 0.23, + "learning_rate": 4.767665055521538e-05, + "loss": 1.7901, + "step": 52800 + }, + { + "epoch": 0.23, + "learning_rate": 4.767122216866214e-05, + "loss": 1.8395, + "step": 52900 + }, + { + "epoch": 0.23, + "learning_rate": 4.766579378210891e-05, + "loss": 1.7996, + "step": 53000 + }, + { + "epoch": 0.23, + "learning_rate": 4.766036539555567e-05, + "loss": 1.7787, + "step": 53100 + }, + { + "epoch": 0.23, + "learning_rate": 4.765493700900244e-05, + "loss": 1.7861, + "step": 53200 + }, + { + "epoch": 0.23, + "learning_rate": 4.76495086224492e-05, + "loss": 1.8164, + "step": 53300 + }, + { + "epoch": 0.23, + "learning_rate": 4.7644080235895964e-05, + "loss": 1.8221, + "step": 53400 + }, + { + "epoch": 0.23, + "learning_rate": 4.763865184934273e-05, + "loss": 1.7885, + "step": 53500 + }, + { + "epoch": 0.23, + "learning_rate": 4.76332234627895e-05, + "loss": 1.8154, + "step": 53600 + }, + { + "epoch": 0.23, + "learning_rate": 4.762779507623626e-05, + "loss": 1.7901, + "step": 53700 + }, + { + "epoch": 0.23, + "learning_rate": 4.762236668968303e-05, + "loss": 1.8313, + "step": 53800 + }, + { + "epoch": 0.23, + "learning_rate": 4.7616938303129794e-05, + "loss": 1.801, + "step": 53900 + }, + { + "epoch": 0.23, + "learning_rate": 4.7611509916576556e-05, + "loss": 1.8021, + "step": 54000 + }, + { + "epoch": 0.23, + "learning_rate": 4.7606081530023324e-05, + "loss": 1.8145, + "step": 54100 + }, + { + "epoch": 0.23, + "learning_rate": 4.7600653143470086e-05, + "loss": 1.7976, + "step": 54200 + }, + { + "epoch": 0.23, + "learning_rate": 4.759522475691685e-05, + "loss": 1.7932, + "step": 54300 + }, + { + "epoch": 0.23, + "learning_rate": 4.758979637036362e-05, + "loss": 1.8452, + "step": 54400 + }, + { + "epoch": 0.23, + "learning_rate": 4.758436798381038e-05, + "loss": 1.7923, + "step": 54500 + }, + { + "epoch": 0.23, + "learning_rate": 4.757893959725715e-05, + "loss": 1.7934, + "step": 54600 + }, + { + "epoch": 0.23, + "learning_rate": 4.7573511210703916e-05, + "loss": 1.8072, + "step": 54700 + }, + { + "epoch": 0.24, + "learning_rate": 4.756808282415068e-05, + "loss": 1.8279, + "step": 54800 + }, + { + "epoch": 0.24, + "learning_rate": 4.756265443759744e-05, + "loss": 1.8289, + "step": 54900 + }, + { + "epoch": 0.24, + "learning_rate": 4.755722605104421e-05, + "loss": 1.812, + "step": 55000 + }, + { + "epoch": 0.24, + "eval_loss": 1.5801465511322021, + "eval_runtime": 18.8482, + "eval_samples_per_second": 530.554, + "eval_steps_per_second": 16.606, + "step": 55000 + }, + { + "epoch": 0.24, + "learning_rate": 4.755179766449097e-05, + "loss": 1.7898, + "step": 55100 + }, + { + "epoch": 0.24, + "learning_rate": 4.754636927793774e-05, + "loss": 1.7933, + "step": 55200 + }, + { + "epoch": 0.24, + "learning_rate": 4.75409408913845e-05, + "loss": 1.7846, + "step": 55300 + }, + { + "epoch": 0.24, + "learning_rate": 4.753551250483126e-05, + "loss": 1.7918, + "step": 55400 + }, + { + "epoch": 0.24, + "learning_rate": 4.753008411827803e-05, + "loss": 1.798, + "step": 55500 + }, + { + "epoch": 0.24, + "learning_rate": 4.75246557317248e-05, + "loss": 1.799, + "step": 55600 + }, + { + "epoch": 0.24, + "learning_rate": 4.751922734517156e-05, + "loss": 1.8078, + "step": 55700 + }, + { + "epoch": 0.24, + "learning_rate": 4.751379895861833e-05, + "loss": 1.808, + "step": 55800 + }, + { + "epoch": 0.24, + "learning_rate": 4.750837057206509e-05, + "loss": 1.7562, + "step": 55900 + }, + { + "epoch": 0.24, + "learning_rate": 4.7502942185511854e-05, + "loss": 1.8129, + "step": 56000 + }, + { + "epoch": 0.24, + "learning_rate": 4.749751379895862e-05, + "loss": 1.8013, + "step": 56100 + }, + { + "epoch": 0.24, + "learning_rate": 4.7492085412405385e-05, + "loss": 1.8138, + "step": 56200 + }, + { + "epoch": 0.24, + "learning_rate": 4.7486657025852146e-05, + "loss": 1.8014, + "step": 56300 + }, + { + "epoch": 0.24, + "learning_rate": 4.7481228639298915e-05, + "loss": 1.8381, + "step": 56400 + }, + { + "epoch": 0.24, + "learning_rate": 4.747580025274568e-05, + "loss": 1.7954, + "step": 56500 + }, + { + "epoch": 0.24, + "learning_rate": 4.7470371866192446e-05, + "loss": 1.7694, + "step": 56600 + }, + { + "epoch": 0.24, + "learning_rate": 4.7464943479639214e-05, + "loss": 1.7909, + "step": 56700 + }, + { + "epoch": 0.24, + "learning_rate": 4.7459515093085976e-05, + "loss": 1.8301, + "step": 56800 + }, + { + "epoch": 0.24, + "learning_rate": 4.745408670653274e-05, + "loss": 1.7995, + "step": 56900 + }, + { + "epoch": 0.24, + "learning_rate": 4.744865831997951e-05, + "loss": 1.8072, + "step": 57000 + }, + { + "epoch": 0.25, + "learning_rate": 4.744322993342627e-05, + "loss": 1.784, + "step": 57100 + }, + { + "epoch": 0.25, + "learning_rate": 4.743780154687304e-05, + "loss": 1.8026, + "step": 57200 + }, + { + "epoch": 0.25, + "learning_rate": 4.74323731603198e-05, + "loss": 1.7927, + "step": 57300 + }, + { + "epoch": 0.25, + "learning_rate": 4.742694477376656e-05, + "loss": 1.7959, + "step": 57400 + }, + { + "epoch": 0.25, + "learning_rate": 4.742151638721333e-05, + "loss": 1.8272, + "step": 57500 + }, + { + "epoch": 0.25, + "learning_rate": 4.741608800066009e-05, + "loss": 1.7942, + "step": 57600 + }, + { + "epoch": 0.25, + "learning_rate": 4.741065961410686e-05, + "loss": 1.8021, + "step": 57700 + }, + { + "epoch": 0.25, + "learning_rate": 4.740523122755363e-05, + "loss": 1.8274, + "step": 57800 + }, + { + "epoch": 0.25, + "learning_rate": 4.739980284100039e-05, + "loss": 1.7891, + "step": 57900 + }, + { + "epoch": 0.25, + "learning_rate": 4.739437445444715e-05, + "loss": 1.7906, + "step": 58000 + }, + { + "epoch": 0.25, + "learning_rate": 4.738894606789392e-05, + "loss": 1.7785, + "step": 58100 + }, + { + "epoch": 0.25, + "learning_rate": 4.738351768134068e-05, + "loss": 1.7844, + "step": 58200 + }, + { + "epoch": 0.25, + "learning_rate": 4.7378089294787445e-05, + "loss": 1.7928, + "step": 58300 + }, + { + "epoch": 0.25, + "learning_rate": 4.7372660908234214e-05, + "loss": 1.7681, + "step": 58400 + }, + { + "epoch": 0.25, + "learning_rate": 4.7367232521680975e-05, + "loss": 1.7946, + "step": 58500 + }, + { + "epoch": 0.25, + "learning_rate": 4.7361804135127744e-05, + "loss": 1.7686, + "step": 58600 + }, + { + "epoch": 0.25, + "learning_rate": 4.7356375748574506e-05, + "loss": 1.7791, + "step": 58700 + }, + { + "epoch": 0.25, + "learning_rate": 4.7350947362021275e-05, + "loss": 1.8047, + "step": 58800 + }, + { + "epoch": 0.25, + "learning_rate": 4.7345518975468037e-05, + "loss": 1.7986, + "step": 58900 + }, + { + "epoch": 0.25, + "learning_rate": 4.7340090588914805e-05, + "loss": 1.8107, + "step": 59000 + }, + { + "epoch": 0.25, + "learning_rate": 4.733466220236157e-05, + "loss": 1.7675, + "step": 59100 + }, + { + "epoch": 0.25, + "learning_rate": 4.7329233815808336e-05, + "loss": 1.7916, + "step": 59200 + }, + { + "epoch": 0.25, + "learning_rate": 4.73238054292551e-05, + "loss": 1.7797, + "step": 59300 + }, + { + "epoch": 0.26, + "learning_rate": 4.731837704270186e-05, + "loss": 1.7827, + "step": 59400 + }, + { + "epoch": 0.26, + "learning_rate": 4.731294865614863e-05, + "loss": 1.7766, + "step": 59500 + }, + { + "epoch": 0.26, + "learning_rate": 4.730752026959539e-05, + "loss": 1.7927, + "step": 59600 + }, + { + "epoch": 0.26, + "learning_rate": 4.730209188304215e-05, + "loss": 1.7896, + "step": 59700 + }, + { + "epoch": 0.26, + "learning_rate": 4.729666349648893e-05, + "loss": 1.7775, + "step": 59800 + }, + { + "epoch": 0.26, + "learning_rate": 4.729123510993569e-05, + "loss": 1.7838, + "step": 59900 + }, + { + "epoch": 0.26, + "learning_rate": 4.728580672338245e-05, + "loss": 1.7729, + "step": 60000 + }, + { + "epoch": 0.26, + "eval_loss": 1.5760066509246826, + "eval_runtime": 18.8895, + "eval_samples_per_second": 529.396, + "eval_steps_per_second": 16.57, + "step": 60000 + }, + { + "epoch": 0.26, + "learning_rate": 4.728037833682922e-05, + "loss": 1.822, + "step": 60100 + }, + { + "epoch": 0.26, + "learning_rate": 4.727494995027598e-05, + "loss": 1.7779, + "step": 60200 + }, + { + "epoch": 0.26, + "learning_rate": 4.7269521563722743e-05, + "loss": 1.7766, + "step": 60300 + }, + { + "epoch": 0.26, + "learning_rate": 4.726409317716951e-05, + "loss": 1.7906, + "step": 60400 + }, + { + "epoch": 0.26, + "learning_rate": 4.7258664790616274e-05, + "loss": 1.8065, + "step": 60500 + }, + { + "epoch": 0.26, + "learning_rate": 4.725323640406304e-05, + "loss": 1.7614, + "step": 60600 + }, + { + "epoch": 0.26, + "learning_rate": 4.7247808017509805e-05, + "loss": 1.7844, + "step": 60700 + }, + { + "epoch": 0.26, + "learning_rate": 4.724237963095657e-05, + "loss": 1.7864, + "step": 60800 + }, + { + "epoch": 0.26, + "learning_rate": 4.7236951244403335e-05, + "loss": 1.7761, + "step": 60900 + }, + { + "epoch": 0.26, + "learning_rate": 4.7231522857850104e-05, + "loss": 1.7707, + "step": 61000 + }, + { + "epoch": 0.26, + "learning_rate": 4.7226094471296866e-05, + "loss": 1.8049, + "step": 61100 + }, + { + "epoch": 0.26, + "learning_rate": 4.7220666084743634e-05, + "loss": 1.786, + "step": 61200 + }, + { + "epoch": 0.26, + "learning_rate": 4.7215237698190396e-05, + "loss": 1.8102, + "step": 61300 + }, + { + "epoch": 0.26, + "learning_rate": 4.720980931163716e-05, + "loss": 1.7937, + "step": 61400 + }, + { + "epoch": 0.26, + "learning_rate": 4.7204380925083927e-05, + "loss": 1.7523, + "step": 61500 + }, + { + "epoch": 0.26, + "learning_rate": 4.719895253853069e-05, + "loss": 1.7976, + "step": 61600 + }, + { + "epoch": 0.27, + "learning_rate": 4.719352415197745e-05, + "loss": 1.7951, + "step": 61700 + }, + { + "epoch": 0.27, + "learning_rate": 4.718809576542422e-05, + "loss": 1.767, + "step": 61800 + }, + { + "epoch": 0.27, + "learning_rate": 4.718266737887099e-05, + "loss": 1.7567, + "step": 61900 + }, + { + "epoch": 0.27, + "learning_rate": 4.717723899231775e-05, + "loss": 1.8087, + "step": 62000 + }, + { + "epoch": 0.27, + "learning_rate": 4.717181060576452e-05, + "loss": 1.8043, + "step": 62100 + }, + { + "epoch": 0.27, + "learning_rate": 4.716638221921128e-05, + "loss": 1.7823, + "step": 62200 + }, + { + "epoch": 0.27, + "learning_rate": 4.716095383265804e-05, + "loss": 1.7519, + "step": 62300 + }, + { + "epoch": 0.27, + "learning_rate": 4.715552544610481e-05, + "loss": 1.7821, + "step": 62400 + }, + { + "epoch": 0.27, + "learning_rate": 4.715009705955157e-05, + "loss": 1.7673, + "step": 62500 + }, + { + "epoch": 0.27, + "learning_rate": 4.714466867299834e-05, + "loss": 1.7676, + "step": 62600 + }, + { + "epoch": 0.27, + "learning_rate": 4.71392402864451e-05, + "loss": 1.7674, + "step": 62700 + }, + { + "epoch": 0.27, + "learning_rate": 4.7133811899891865e-05, + "loss": 1.7856, + "step": 62800 + }, + { + "epoch": 0.27, + "learning_rate": 4.7128383513338634e-05, + "loss": 1.791, + "step": 62900 + }, + { + "epoch": 0.27, + "learning_rate": 4.71229551267854e-05, + "loss": 1.7697, + "step": 63000 + }, + { + "epoch": 0.27, + "learning_rate": 4.7117526740232164e-05, + "loss": 1.7897, + "step": 63100 + }, + { + "epoch": 0.27, + "learning_rate": 4.711209835367893e-05, + "loss": 1.7579, + "step": 63200 + }, + { + "epoch": 0.27, + "learning_rate": 4.7106669967125695e-05, + "loss": 1.7752, + "step": 63300 + }, + { + "epoch": 0.27, + "learning_rate": 4.7101241580572456e-05, + "loss": 1.772, + "step": 63400 + }, + { + "epoch": 0.27, + "learning_rate": 4.7095813194019225e-05, + "loss": 1.7648, + "step": 63500 + }, + { + "epoch": 0.27, + "learning_rate": 4.709038480746599e-05, + "loss": 1.7699, + "step": 63600 + }, + { + "epoch": 0.27, + "learning_rate": 4.708495642091275e-05, + "loss": 1.7708, + "step": 63700 + }, + { + "epoch": 0.27, + "learning_rate": 4.707952803435952e-05, + "loss": 1.7878, + "step": 63800 + }, + { + "epoch": 0.27, + "learning_rate": 4.707409964780628e-05, + "loss": 1.805, + "step": 63900 + }, + { + "epoch": 0.27, + "learning_rate": 4.706867126125305e-05, + "loss": 1.774, + "step": 64000 + }, + { + "epoch": 0.28, + "learning_rate": 4.706324287469982e-05, + "loss": 1.7729, + "step": 64100 + }, + { + "epoch": 0.28, + "learning_rate": 4.705781448814658e-05, + "loss": 1.759, + "step": 64200 + }, + { + "epoch": 0.28, + "learning_rate": 4.705238610159334e-05, + "loss": 1.7649, + "step": 64300 + }, + { + "epoch": 0.28, + "learning_rate": 4.704695771504011e-05, + "loss": 1.7887, + "step": 64400 + }, + { + "epoch": 0.28, + "learning_rate": 4.704152932848687e-05, + "loss": 1.8055, + "step": 64500 + }, + { + "epoch": 0.28, + "learning_rate": 4.703610094193364e-05, + "loss": 1.7912, + "step": 64600 + }, + { + "epoch": 0.28, + "learning_rate": 4.70306725553804e-05, + "loss": 1.759, + "step": 64700 + }, + { + "epoch": 0.28, + "learning_rate": 4.702524416882716e-05, + "loss": 1.7841, + "step": 64800 + }, + { + "epoch": 0.28, + "learning_rate": 4.701981578227393e-05, + "loss": 1.7759, + "step": 64900 + }, + { + "epoch": 0.28, + "learning_rate": 4.70143873957207e-05, + "loss": 1.7306, + "step": 65000 + }, + { + "epoch": 0.28, + "eval_loss": 1.5630521774291992, + "eval_runtime": 18.8835, + "eval_samples_per_second": 529.564, + "eval_steps_per_second": 16.575, + "step": 65000 + }, + { + "epoch": 0.28, + "learning_rate": 4.700895900916746e-05, + "loss": 1.7537, + "step": 65100 + }, + { + "epoch": 0.28, + "learning_rate": 4.700353062261423e-05, + "loss": 1.7783, + "step": 65200 + }, + { + "epoch": 0.28, + "learning_rate": 4.699810223606099e-05, + "loss": 1.7818, + "step": 65300 + }, + { + "epoch": 0.28, + "learning_rate": 4.6992673849507755e-05, + "loss": 1.7749, + "step": 65400 + }, + { + "epoch": 0.28, + "learning_rate": 4.6987245462954524e-05, + "loss": 1.7662, + "step": 65500 + }, + { + "epoch": 0.28, + "learning_rate": 4.6981817076401285e-05, + "loss": 1.7677, + "step": 65600 + }, + { + "epoch": 0.28, + "learning_rate": 4.697638868984805e-05, + "loss": 1.7521, + "step": 65700 + }, + { + "epoch": 0.28, + "learning_rate": 4.6970960303294816e-05, + "loss": 1.7751, + "step": 65800 + }, + { + "epoch": 0.28, + "learning_rate": 4.696553191674158e-05, + "loss": 1.7846, + "step": 65900 + }, + { + "epoch": 0.28, + "learning_rate": 4.696010353018834e-05, + "loss": 1.7987, + "step": 66000 + }, + { + "epoch": 0.28, + "learning_rate": 4.6954675143635115e-05, + "loss": 1.7691, + "step": 66100 + }, + { + "epoch": 0.28, + "learning_rate": 4.694924675708188e-05, + "loss": 1.7955, + "step": 66200 + }, + { + "epoch": 0.28, + "learning_rate": 4.694381837052864e-05, + "loss": 1.8008, + "step": 66300 + }, + { + "epoch": 0.29, + "learning_rate": 4.693838998397541e-05, + "loss": 1.77, + "step": 66400 + }, + { + "epoch": 0.29, + "learning_rate": 4.693296159742217e-05, + "loss": 1.7612, + "step": 66500 + }, + { + "epoch": 0.29, + "learning_rate": 4.692753321086894e-05, + "loss": 1.7797, + "step": 66600 + }, + { + "epoch": 0.29, + "learning_rate": 4.69221048243157e-05, + "loss": 1.8038, + "step": 66700 + }, + { + "epoch": 0.29, + "learning_rate": 4.691667643776246e-05, + "loss": 1.7792, + "step": 66800 + }, + { + "epoch": 0.29, + "learning_rate": 4.691124805120923e-05, + "loss": 1.7614, + "step": 66900 + }, + { + "epoch": 0.29, + "learning_rate": 4.690581966465599e-05, + "loss": 1.7548, + "step": 67000 + }, + { + "epoch": 0.29, + "learning_rate": 4.690039127810276e-05, + "loss": 1.773, + "step": 67100 + }, + { + "epoch": 0.29, + "learning_rate": 4.689496289154953e-05, + "loss": 1.7859, + "step": 67200 + }, + { + "epoch": 0.29, + "learning_rate": 4.688953450499629e-05, + "loss": 1.7366, + "step": 67300 + }, + { + "epoch": 0.29, + "learning_rate": 4.6884106118443053e-05, + "loss": 1.7685, + "step": 67400 + }, + { + "epoch": 0.29, + "learning_rate": 4.687867773188982e-05, + "loss": 1.7628, + "step": 67500 + }, + { + "epoch": 0.29, + "learning_rate": 4.6873249345336584e-05, + "loss": 1.7561, + "step": 67600 + }, + { + "epoch": 0.29, + "learning_rate": 4.6867820958783346e-05, + "loss": 1.8026, + "step": 67700 + }, + { + "epoch": 0.29, + "learning_rate": 4.6862392572230114e-05, + "loss": 1.7847, + "step": 67800 + }, + { + "epoch": 0.29, + "learning_rate": 4.6856964185676876e-05, + "loss": 1.7646, + "step": 67900 + }, + { + "epoch": 0.29, + "learning_rate": 4.685153579912364e-05, + "loss": 1.7906, + "step": 68000 + }, + { + "epoch": 0.29, + "learning_rate": 4.684610741257041e-05, + "loss": 1.7476, + "step": 68100 + }, + { + "epoch": 0.29, + "learning_rate": 4.6840679026017176e-05, + "loss": 1.7534, + "step": 68200 + }, + { + "epoch": 0.29, + "learning_rate": 4.683525063946394e-05, + "loss": 1.7639, + "step": 68300 + }, + { + "epoch": 0.29, + "learning_rate": 4.6829822252910706e-05, + "loss": 1.7499, + "step": 68400 + }, + { + "epoch": 0.29, + "learning_rate": 4.682439386635747e-05, + "loss": 1.7743, + "step": 68500 + }, + { + "epoch": 0.29, + "learning_rate": 4.6818965479804237e-05, + "loss": 1.7483, + "step": 68600 + }, + { + "epoch": 0.3, + "learning_rate": 4.6813537093251e-05, + "loss": 1.7807, + "step": 68700 + }, + { + "epoch": 0.3, + "learning_rate": 4.680810870669776e-05, + "loss": 1.7912, + "step": 68800 + }, + { + "epoch": 0.3, + "learning_rate": 4.680268032014453e-05, + "loss": 1.7632, + "step": 68900 + }, + { + "epoch": 0.3, + "learning_rate": 4.679725193359129e-05, + "loss": 1.762, + "step": 69000 + }, + { + "epoch": 0.3, + "learning_rate": 4.679182354703805e-05, + "loss": 1.7498, + "step": 69100 + }, + { + "epoch": 0.3, + "learning_rate": 4.678639516048483e-05, + "loss": 1.7597, + "step": 69200 + }, + { + "epoch": 0.3, + "learning_rate": 4.678096677393159e-05, + "loss": 1.7851, + "step": 69300 + }, + { + "epoch": 0.3, + "learning_rate": 4.677553838737835e-05, + "loss": 1.7746, + "step": 69400 + }, + { + "epoch": 0.3, + "learning_rate": 4.677011000082512e-05, + "loss": 1.7738, + "step": 69500 + }, + { + "epoch": 0.3, + "learning_rate": 4.676468161427188e-05, + "loss": 1.7909, + "step": 69600 + }, + { + "epoch": 0.3, + "learning_rate": 4.6759253227718644e-05, + "loss": 1.768, + "step": 69700 + }, + { + "epoch": 0.3, + "learning_rate": 4.675382484116541e-05, + "loss": 1.7782, + "step": 69800 + }, + { + "epoch": 0.3, + "learning_rate": 4.6748396454612175e-05, + "loss": 1.7663, + "step": 69900 + }, + { + "epoch": 0.3, + "learning_rate": 4.674296806805894e-05, + "loss": 1.7202, + "step": 70000 + }, + { + "epoch": 0.3, + "eval_loss": 1.5640525817871094, + "eval_runtime": 18.9125, + "eval_samples_per_second": 528.752, + "eval_steps_per_second": 16.55, + "step": 70000 + }, + { + "epoch": 0.3, + "learning_rate": 4.6737539681505705e-05, + "loss": 1.7697, + "step": 70100 + }, + { + "epoch": 0.3, + "learning_rate": 4.673211129495247e-05, + "loss": 1.7581, + "step": 70200 + }, + { + "epoch": 0.3, + "learning_rate": 4.6726682908399236e-05, + "loss": 1.7291, + "step": 70300 + }, + { + "epoch": 0.3, + "learning_rate": 4.6721254521846005e-05, + "loss": 1.7576, + "step": 70400 + }, + { + "epoch": 0.3, + "learning_rate": 4.6715826135292766e-05, + "loss": 1.7384, + "step": 70500 + }, + { + "epoch": 0.3, + "learning_rate": 4.671039774873953e-05, + "loss": 1.7676, + "step": 70600 + }, + { + "epoch": 0.3, + "learning_rate": 4.67049693621863e-05, + "loss": 1.7576, + "step": 70700 + }, + { + "epoch": 0.3, + "learning_rate": 4.669954097563306e-05, + "loss": 1.7404, + "step": 70800 + }, + { + "epoch": 0.3, + "learning_rate": 4.669411258907983e-05, + "loss": 1.7635, + "step": 70900 + }, + { + "epoch": 0.31, + "learning_rate": 4.668868420252659e-05, + "loss": 1.7898, + "step": 71000 + }, + { + "epoch": 0.31, + "learning_rate": 4.668325581597335e-05, + "loss": 1.7269, + "step": 71100 + }, + { + "epoch": 0.31, + "learning_rate": 4.667782742942012e-05, + "loss": 1.756, + "step": 71200 + }, + { + "epoch": 0.31, + "learning_rate": 4.667239904286689e-05, + "loss": 1.7796, + "step": 71300 + }, + { + "epoch": 0.31, + "learning_rate": 4.666697065631365e-05, + "loss": 1.7805, + "step": 71400 + }, + { + "epoch": 0.31, + "learning_rate": 4.666154226976042e-05, + "loss": 1.7324, + "step": 71500 + }, + { + "epoch": 0.31, + "learning_rate": 4.665611388320718e-05, + "loss": 1.7412, + "step": 71600 + }, + { + "epoch": 0.31, + "learning_rate": 4.665068549665394e-05, + "loss": 1.7363, + "step": 71700 + }, + { + "epoch": 0.31, + "learning_rate": 4.664525711010071e-05, + "loss": 1.7615, + "step": 71800 + }, + { + "epoch": 0.31, + "learning_rate": 4.663982872354747e-05, + "loss": 1.7356, + "step": 71900 + }, + { + "epoch": 0.31, + "learning_rate": 4.6634400336994235e-05, + "loss": 1.7364, + "step": 72000 + }, + { + "epoch": 0.31, + "learning_rate": 4.6628971950441004e-05, + "loss": 1.7587, + "step": 72100 + }, + { + "epoch": 0.31, + "learning_rate": 4.6623543563887766e-05, + "loss": 1.742, + "step": 72200 + }, + { + "epoch": 0.31, + "learning_rate": 4.6618115177334534e-05, + "loss": 1.7618, + "step": 72300 + }, + { + "epoch": 0.31, + "learning_rate": 4.66126867907813e-05, + "loss": 1.7374, + "step": 72400 + }, + { + "epoch": 0.31, + "learning_rate": 4.6607258404228065e-05, + "loss": 1.7804, + "step": 72500 + }, + { + "epoch": 0.31, + "learning_rate": 4.660183001767483e-05, + "loss": 1.7173, + "step": 72600 + }, + { + "epoch": 0.31, + "learning_rate": 4.6596401631121595e-05, + "loss": 1.7459, + "step": 72700 + }, + { + "epoch": 0.31, + "learning_rate": 4.659097324456836e-05, + "loss": 1.7441, + "step": 72800 + }, + { + "epoch": 0.31, + "learning_rate": 4.6585544858015126e-05, + "loss": 1.7598, + "step": 72900 + }, + { + "epoch": 0.31, + "learning_rate": 4.658011647146189e-05, + "loss": 1.7476, + "step": 73000 + }, + { + "epoch": 0.31, + "learning_rate": 4.657468808490865e-05, + "loss": 1.7596, + "step": 73100 + }, + { + "epoch": 0.31, + "learning_rate": 4.656925969835542e-05, + "loss": 1.7438, + "step": 73200 + }, + { + "epoch": 0.31, + "learning_rate": 4.656383131180218e-05, + "loss": 1.7752, + "step": 73300 + }, + { + "epoch": 0.32, + "learning_rate": 4.655840292524895e-05, + "loss": 1.7214, + "step": 73400 + }, + { + "epoch": 0.32, + "learning_rate": 4.655297453869572e-05, + "loss": 1.7437, + "step": 73500 + }, + { + "epoch": 0.32, + "learning_rate": 4.654754615214248e-05, + "loss": 1.7531, + "step": 73600 + }, + { + "epoch": 0.32, + "learning_rate": 4.654211776558924e-05, + "loss": 1.746, + "step": 73700 + }, + { + "epoch": 0.32, + "learning_rate": 4.653668937903601e-05, + "loss": 1.748, + "step": 73800 + }, + { + "epoch": 0.32, + "learning_rate": 4.653126099248277e-05, + "loss": 1.7357, + "step": 73900 + }, + { + "epoch": 0.32, + "learning_rate": 4.6525832605929534e-05, + "loss": 1.7325, + "step": 74000 + }, + { + "epoch": 0.32, + "learning_rate": 4.65204042193763e-05, + "loss": 1.7155, + "step": 74100 + }, + { + "epoch": 0.32, + "learning_rate": 4.6514975832823064e-05, + "loss": 1.7436, + "step": 74200 + }, + { + "epoch": 0.32, + "learning_rate": 4.650954744626983e-05, + "loss": 1.7245, + "step": 74300 + }, + { + "epoch": 0.32, + "learning_rate": 4.6504119059716595e-05, + "loss": 1.7332, + "step": 74400 + }, + { + "epoch": 0.32, + "learning_rate": 4.6498690673163363e-05, + "loss": 1.7614, + "step": 74500 + }, + { + "epoch": 0.32, + "learning_rate": 4.6493262286610125e-05, + "loss": 1.7565, + "step": 74600 + }, + { + "epoch": 0.32, + "learning_rate": 4.6487833900056894e-05, + "loss": 1.7641, + "step": 74700 + }, + { + "epoch": 0.32, + "learning_rate": 4.6482405513503656e-05, + "loss": 1.7415, + "step": 74800 + }, + { + "epoch": 0.32, + "learning_rate": 4.6476977126950424e-05, + "loss": 1.7563, + "step": 74900 + }, + { + "epoch": 0.32, + "learning_rate": 4.6471548740397186e-05, + "loss": 1.7745, + "step": 75000 + }, + { + "epoch": 0.32, + "eval_loss": 1.5477814674377441, + "eval_runtime": 18.8766, + "eval_samples_per_second": 529.758, + "eval_steps_per_second": 16.581, + "step": 75000 + }, + { + "epoch": 0.32, + "learning_rate": 4.646612035384395e-05, + "loss": 1.7393, + "step": 75100 + }, + { + "epoch": 0.32, + "learning_rate": 4.646069196729072e-05, + "loss": 1.752, + "step": 75200 + }, + { + "epoch": 0.32, + "learning_rate": 4.645526358073748e-05, + "loss": 1.7421, + "step": 75300 + }, + { + "epoch": 0.32, + "learning_rate": 4.644983519418424e-05, + "loss": 1.7443, + "step": 75400 + }, + { + "epoch": 0.32, + "learning_rate": 4.6444406807631016e-05, + "loss": 1.7661, + "step": 75500 + }, + { + "epoch": 0.32, + "learning_rate": 4.643897842107778e-05, + "loss": 1.7527, + "step": 75600 + }, + { + "epoch": 0.33, + "learning_rate": 4.643355003452454e-05, + "loss": 1.7628, + "step": 75700 + }, + { + "epoch": 0.33, + "learning_rate": 4.642812164797131e-05, + "loss": 1.752, + "step": 75800 + }, + { + "epoch": 0.33, + "learning_rate": 4.642269326141807e-05, + "loss": 1.7392, + "step": 75900 + }, + { + "epoch": 0.33, + "learning_rate": 4.641726487486483e-05, + "loss": 1.7515, + "step": 76000 + }, + { + "epoch": 0.33, + "learning_rate": 4.64118364883116e-05, + "loss": 1.7412, + "step": 76100 + }, + { + "epoch": 0.33, + "learning_rate": 4.640640810175836e-05, + "loss": 1.7418, + "step": 76200 + }, + { + "epoch": 0.33, + "learning_rate": 4.640097971520513e-05, + "loss": 1.7248, + "step": 76300 + }, + { + "epoch": 0.33, + "learning_rate": 4.639555132865189e-05, + "loss": 1.7437, + "step": 76400 + }, + { + "epoch": 0.33, + "learning_rate": 4.6390122942098655e-05, + "loss": 1.7433, + "step": 76500 + }, + { + "epoch": 0.33, + "learning_rate": 4.6384694555545424e-05, + "loss": 1.7226, + "step": 76600 + }, + { + "epoch": 0.33, + "learning_rate": 4.637926616899219e-05, + "loss": 1.7266, + "step": 76700 + }, + { + "epoch": 0.33, + "learning_rate": 4.6373837782438954e-05, + "loss": 1.7332, + "step": 76800 + }, + { + "epoch": 0.33, + "learning_rate": 4.636840939588572e-05, + "loss": 1.7427, + "step": 76900 + }, + { + "epoch": 0.33, + "learning_rate": 4.6362981009332485e-05, + "loss": 1.7679, + "step": 77000 + }, + { + "epoch": 0.33, + "learning_rate": 4.635755262277925e-05, + "loss": 1.7527, + "step": 77100 + }, + { + "epoch": 0.33, + "learning_rate": 4.6352124236226015e-05, + "loss": 1.7443, + "step": 77200 + }, + { + "epoch": 0.33, + "learning_rate": 4.634669584967278e-05, + "loss": 1.7738, + "step": 77300 + }, + { + "epoch": 0.33, + "learning_rate": 4.634126746311954e-05, + "loss": 1.735, + "step": 77400 + }, + { + "epoch": 0.33, + "learning_rate": 4.633583907656631e-05, + "loss": 1.7555, + "step": 77500 + }, + { + "epoch": 0.33, + "learning_rate": 4.6330410690013076e-05, + "loss": 1.7362, + "step": 77600 + }, + { + "epoch": 0.33, + "learning_rate": 4.632498230345984e-05, + "loss": 1.7026, + "step": 77700 + }, + { + "epoch": 0.33, + "learning_rate": 4.631955391690661e-05, + "loss": 1.7306, + "step": 77800 + }, + { + "epoch": 0.33, + "learning_rate": 4.631412553035337e-05, + "loss": 1.7717, + "step": 77900 + }, + { + "epoch": 0.34, + "learning_rate": 4.630869714380013e-05, + "loss": 1.7467, + "step": 78000 + }, + { + "epoch": 0.34, + "learning_rate": 4.63032687572469e-05, + "loss": 1.7282, + "step": 78100 + }, + { + "epoch": 0.34, + "learning_rate": 4.629784037069366e-05, + "loss": 1.7238, + "step": 78200 + }, + { + "epoch": 0.34, + "learning_rate": 4.629241198414043e-05, + "loss": 1.7432, + "step": 78300 + }, + { + "epoch": 0.34, + "learning_rate": 4.628698359758719e-05, + "loss": 1.7318, + "step": 78400 + }, + { + "epoch": 0.34, + "learning_rate": 4.6281555211033954e-05, + "loss": 1.7544, + "step": 78500 + }, + { + "epoch": 0.34, + "learning_rate": 4.627612682448072e-05, + "loss": 1.7452, + "step": 78600 + }, + { + "epoch": 0.34, + "learning_rate": 4.627069843792749e-05, + "loss": 1.757, + "step": 78700 + }, + { + "epoch": 0.34, + "learning_rate": 4.626527005137425e-05, + "loss": 1.754, + "step": 78800 + }, + { + "epoch": 0.34, + "learning_rate": 4.625984166482102e-05, + "loss": 1.7073, + "step": 78900 + }, + { + "epoch": 0.34, + "learning_rate": 4.625441327826778e-05, + "loss": 1.7104, + "step": 79000 + }, + { + "epoch": 0.34, + "learning_rate": 4.6248984891714545e-05, + "loss": 1.7339, + "step": 79100 + }, + { + "epoch": 0.34, + "learning_rate": 4.6243556505161314e-05, + "loss": 1.722, + "step": 79200 + }, + { + "epoch": 0.34, + "learning_rate": 4.6238128118608076e-05, + "loss": 1.7456, + "step": 79300 + }, + { + "epoch": 0.34, + "learning_rate": 4.623269973205484e-05, + "loss": 1.7273, + "step": 79400 + }, + { + "epoch": 0.34, + "learning_rate": 4.6227271345501606e-05, + "loss": 1.7432, + "step": 79500 + }, + { + "epoch": 0.34, + "learning_rate": 4.622184295894837e-05, + "loss": 1.76, + "step": 79600 + }, + { + "epoch": 0.34, + "learning_rate": 4.621641457239514e-05, + "loss": 1.7446, + "step": 79700 + }, + { + "epoch": 0.34, + "learning_rate": 4.6210986185841905e-05, + "loss": 1.7589, + "step": 79800 + }, + { + "epoch": 0.34, + "learning_rate": 4.620555779928867e-05, + "loss": 1.7662, + "step": 79900 + }, + { + "epoch": 0.34, + "learning_rate": 4.620012941273543e-05, + "loss": 1.7185, + "step": 80000 + }, + { + "epoch": 0.34, + "eval_loss": 1.5425916910171509, + "eval_runtime": 18.916, + "eval_samples_per_second": 528.654, + "eval_steps_per_second": 16.547, + "step": 80000 + }, + { + "epoch": 0.34, + "learning_rate": 4.61947010261822e-05, + "loss": 1.7206, + "step": 80100 + }, + { + "epoch": 0.34, + "learning_rate": 4.618927263962896e-05, + "loss": 1.7431, + "step": 80200 + }, + { + "epoch": 0.34, + "learning_rate": 4.618384425307573e-05, + "loss": 1.7112, + "step": 80300 + }, + { + "epoch": 0.35, + "learning_rate": 4.617841586652249e-05, + "loss": 1.7242, + "step": 80400 + }, + { + "epoch": 0.35, + "learning_rate": 4.617298747996925e-05, + "loss": 1.7106, + "step": 80500 + }, + { + "epoch": 0.35, + "learning_rate": 4.616755909341602e-05, + "loss": 1.7363, + "step": 80600 + }, + { + "epoch": 0.35, + "learning_rate": 4.616213070686279e-05, + "loss": 1.7411, + "step": 80700 + }, + { + "epoch": 0.35, + "learning_rate": 4.615670232030955e-05, + "loss": 1.7663, + "step": 80800 + }, + { + "epoch": 0.35, + "learning_rate": 4.615127393375632e-05, + "loss": 1.7612, + "step": 80900 + }, + { + "epoch": 0.35, + "learning_rate": 4.614584554720308e-05, + "loss": 1.7457, + "step": 81000 + }, + { + "epoch": 0.35, + "learning_rate": 4.6140417160649844e-05, + "loss": 1.7319, + "step": 81100 + }, + { + "epoch": 0.35, + "learning_rate": 4.613498877409661e-05, + "loss": 1.744, + "step": 81200 + }, + { + "epoch": 0.35, + "learning_rate": 4.6129560387543374e-05, + "loss": 1.7076, + "step": 81300 + }, + { + "epoch": 0.35, + "learning_rate": 4.6124132000990136e-05, + "loss": 1.7594, + "step": 81400 + }, + { + "epoch": 0.35, + "learning_rate": 4.6118703614436905e-05, + "loss": 1.7443, + "step": 81500 + }, + { + "epoch": 0.35, + "learning_rate": 4.6113275227883667e-05, + "loss": 1.7158, + "step": 81600 + }, + { + "epoch": 0.35, + "learning_rate": 4.6107846841330435e-05, + "loss": 1.7315, + "step": 81700 + }, + { + "epoch": 0.35, + "learning_rate": 4.6102418454777204e-05, + "loss": 1.756, + "step": 81800 + }, + { + "epoch": 0.35, + "learning_rate": 4.6096990068223966e-05, + "loss": 1.7466, + "step": 81900 + }, + { + "epoch": 0.35, + "learning_rate": 4.609156168167073e-05, + "loss": 1.7233, + "step": 82000 + }, + { + "epoch": 0.35, + "learning_rate": 4.6086133295117496e-05, + "loss": 1.736, + "step": 82100 + }, + { + "epoch": 0.35, + "learning_rate": 4.608070490856426e-05, + "loss": 1.7329, + "step": 82200 + }, + { + "epoch": 0.35, + "learning_rate": 4.607527652201103e-05, + "loss": 1.7311, + "step": 82300 + }, + { + "epoch": 0.35, + "learning_rate": 4.606984813545779e-05, + "loss": 1.7318, + "step": 82400 + }, + { + "epoch": 0.35, + "learning_rate": 4.606441974890455e-05, + "loss": 1.7485, + "step": 82500 + }, + { + "epoch": 0.35, + "learning_rate": 4.605899136235132e-05, + "loss": 1.6999, + "step": 82600 + }, + { + "epoch": 0.36, + "learning_rate": 4.605356297579808e-05, + "loss": 1.7136, + "step": 82700 + }, + { + "epoch": 0.36, + "learning_rate": 4.604813458924485e-05, + "loss": 1.724, + "step": 82800 + }, + { + "epoch": 0.36, + "learning_rate": 4.604270620269162e-05, + "loss": 1.7513, + "step": 82900 + }, + { + "epoch": 0.36, + "learning_rate": 4.603727781613838e-05, + "loss": 1.7232, + "step": 83000 + }, + { + "epoch": 0.36, + "learning_rate": 4.603184942958514e-05, + "loss": 1.7535, + "step": 83100 + }, + { + "epoch": 0.36, + "learning_rate": 4.602642104303191e-05, + "loss": 1.7338, + "step": 83200 + }, + { + "epoch": 0.36, + "learning_rate": 4.602099265647867e-05, + "loss": 1.7305, + "step": 83300 + }, + { + "epoch": 0.36, + "learning_rate": 4.6015564269925435e-05, + "loss": 1.7295, + "step": 83400 + }, + { + "epoch": 0.36, + "learning_rate": 4.60101358833722e-05, + "loss": 1.7555, + "step": 83500 + }, + { + "epoch": 0.36, + "learning_rate": 4.6004707496818965e-05, + "loss": 1.7299, + "step": 83600 + }, + { + "epoch": 0.36, + "learning_rate": 4.5999279110265734e-05, + "loss": 1.7132, + "step": 83700 + }, + { + "epoch": 0.36, + "learning_rate": 4.5993850723712496e-05, + "loss": 1.7587, + "step": 83800 + }, + { + "epoch": 0.36, + "learning_rate": 4.5988422337159264e-05, + "loss": 1.7486, + "step": 83900 + }, + { + "epoch": 0.36, + "learning_rate": 4.5982993950606026e-05, + "loss": 1.6919, + "step": 84000 + }, + { + "epoch": 0.36, + "learning_rate": 4.5977565564052795e-05, + "loss": 1.7116, + "step": 84100 + }, + { + "epoch": 0.36, + "learning_rate": 4.597213717749956e-05, + "loss": 1.7302, + "step": 84200 + }, + { + "epoch": 0.36, + "learning_rate": 4.5966708790946325e-05, + "loss": 1.7044, + "step": 84300 + }, + { + "epoch": 0.36, + "learning_rate": 4.596128040439309e-05, + "loss": 1.7475, + "step": 84400 + }, + { + "epoch": 0.36, + "learning_rate": 4.595585201783985e-05, + "loss": 1.7357, + "step": 84500 + }, + { + "epoch": 0.36, + "learning_rate": 4.595042363128662e-05, + "loss": 1.7234, + "step": 84600 + }, + { + "epoch": 0.36, + "learning_rate": 4.594499524473338e-05, + "loss": 1.7307, + "step": 84700 + }, + { + "epoch": 0.36, + "learning_rate": 4.593956685818014e-05, + "loss": 1.7138, + "step": 84800 + }, + { + "epoch": 0.36, + "learning_rate": 4.593413847162692e-05, + "loss": 1.7437, + "step": 84900 + }, + { + "epoch": 0.37, + "learning_rate": 4.592871008507368e-05, + "loss": 1.761, + "step": 85000 + }, + { + "epoch": 0.37, + "eval_loss": 1.5349645614624023, + "eval_runtime": 18.9426, + "eval_samples_per_second": 527.911, + "eval_steps_per_second": 16.524, + "step": 85000 + }, + { + "epoch": 0.37, + "learning_rate": 4.592328169852044e-05, + "loss": 1.7331, + "step": 85100 + }, + { + "epoch": 0.37, + "learning_rate": 4.591785331196721e-05, + "loss": 1.7209, + "step": 85200 + }, + { + "epoch": 0.37, + "learning_rate": 4.591242492541397e-05, + "loss": 1.7032, + "step": 85300 + }, + { + "epoch": 0.37, + "learning_rate": 4.590699653886073e-05, + "loss": 1.7575, + "step": 85400 + }, + { + "epoch": 0.37, + "learning_rate": 4.59015681523075e-05, + "loss": 1.7372, + "step": 85500 + }, + { + "epoch": 0.37, + "learning_rate": 4.5896139765754264e-05, + "loss": 1.7554, + "step": 85600 + }, + { + "epoch": 0.37, + "learning_rate": 4.589071137920103e-05, + "loss": 1.7196, + "step": 85700 + }, + { + "epoch": 0.37, + "learning_rate": 4.5885282992647794e-05, + "loss": 1.6947, + "step": 85800 + }, + { + "epoch": 0.37, + "learning_rate": 4.5879854606094556e-05, + "loss": 1.7231, + "step": 85900 + }, + { + "epoch": 0.37, + "learning_rate": 4.5874426219541325e-05, + "loss": 1.7123, + "step": 86000 + }, + { + "epoch": 0.37, + "learning_rate": 4.586899783298809e-05, + "loss": 1.7224, + "step": 86100 + }, + { + "epoch": 0.37, + "learning_rate": 4.5863569446434855e-05, + "loss": 1.7402, + "step": 86200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5858141059881624e-05, + "loss": 1.7189, + "step": 86300 + }, + { + "epoch": 0.37, + "learning_rate": 4.5852712673328386e-05, + "loss": 1.7363, + "step": 86400 + }, + { + "epoch": 0.37, + "learning_rate": 4.584728428677515e-05, + "loss": 1.7376, + "step": 86500 + }, + { + "epoch": 0.37, + "learning_rate": 4.5841855900221916e-05, + "loss": 1.739, + "step": 86600 + }, + { + "epoch": 0.37, + "learning_rate": 4.583642751366868e-05, + "loss": 1.7794, + "step": 86700 + }, + { + "epoch": 0.37, + "learning_rate": 4.583099912711544e-05, + "loss": 1.7424, + "step": 86800 + }, + { + "epoch": 0.37, + "learning_rate": 4.582557074056221e-05, + "loss": 1.7139, + "step": 86900 + }, + { + "epoch": 0.37, + "learning_rate": 4.582014235400898e-05, + "loss": 1.7696, + "step": 87000 + }, + { + "epoch": 0.37, + "learning_rate": 4.581471396745574e-05, + "loss": 1.7081, + "step": 87100 + }, + { + "epoch": 0.37, + "learning_rate": 4.580928558090251e-05, + "loss": 1.7361, + "step": 87200 + }, + { + "epoch": 0.38, + "learning_rate": 4.580385719434927e-05, + "loss": 1.7095, + "step": 87300 + }, + { + "epoch": 0.38, + "learning_rate": 4.579842880779603e-05, + "loss": 1.7303, + "step": 87400 + }, + { + "epoch": 0.38, + "learning_rate": 4.57930004212428e-05, + "loss": 1.6847, + "step": 87500 + }, + { + "epoch": 0.38, + "learning_rate": 4.578757203468956e-05, + "loss": 1.7079, + "step": 87600 + }, + { + "epoch": 0.38, + "learning_rate": 4.578214364813633e-05, + "loss": 1.7344, + "step": 87700 + }, + { + "epoch": 0.38, + "learning_rate": 4.577671526158309e-05, + "loss": 1.731, + "step": 87800 + }, + { + "epoch": 0.38, + "learning_rate": 4.5771286875029854e-05, + "loss": 1.7397, + "step": 87900 + }, + { + "epoch": 0.38, + "learning_rate": 4.576585848847662e-05, + "loss": 1.7308, + "step": 88000 + }, + { + "epoch": 0.38, + "learning_rate": 4.576043010192339e-05, + "loss": 1.7609, + "step": 88100 + }, + { + "epoch": 0.38, + "learning_rate": 4.5755001715370154e-05, + "loss": 1.7123, + "step": 88200 + }, + { + "epoch": 0.38, + "learning_rate": 4.574957332881692e-05, + "loss": 1.7093, + "step": 88300 + }, + { + "epoch": 0.38, + "learning_rate": 4.5744144942263684e-05, + "loss": 1.7262, + "step": 88400 + }, + { + "epoch": 0.38, + "learning_rate": 4.5738716555710446e-05, + "loss": 1.7057, + "step": 88500 + }, + { + "epoch": 0.38, + "learning_rate": 4.5733288169157215e-05, + "loss": 1.7473, + "step": 88600 + }, + { + "epoch": 0.38, + "learning_rate": 4.5727859782603977e-05, + "loss": 1.7331, + "step": 88700 + }, + { + "epoch": 0.38, + "learning_rate": 4.572243139605074e-05, + "loss": 1.7549, + "step": 88800 + }, + { + "epoch": 0.38, + "learning_rate": 4.571700300949751e-05, + "loss": 1.7222, + "step": 88900 + }, + { + "epoch": 0.38, + "learning_rate": 4.571157462294427e-05, + "loss": 1.6896, + "step": 89000 + }, + { + "epoch": 0.38, + "learning_rate": 4.570614623639104e-05, + "loss": 1.716, + "step": 89100 + }, + { + "epoch": 0.38, + "learning_rate": 4.5700717849837806e-05, + "loss": 1.7272, + "step": 89200 + }, + { + "epoch": 0.38, + "learning_rate": 4.569528946328457e-05, + "loss": 1.692, + "step": 89300 + }, + { + "epoch": 0.38, + "learning_rate": 4.568986107673133e-05, + "loss": 1.7422, + "step": 89400 + }, + { + "epoch": 0.38, + "learning_rate": 4.56844326901781e-05, + "loss": 1.7287, + "step": 89500 + }, + { + "epoch": 0.38, + "learning_rate": 4.567900430362486e-05, + "loss": 1.7368, + "step": 89600 + }, + { + "epoch": 0.39, + "learning_rate": 4.567357591707163e-05, + "loss": 1.6947, + "step": 89700 + }, + { + "epoch": 0.39, + "learning_rate": 4.566814753051839e-05, + "loss": 1.7064, + "step": 89800 + }, + { + "epoch": 0.39, + "learning_rate": 4.566271914396515e-05, + "loss": 1.7035, + "step": 89900 + }, + { + "epoch": 0.39, + "learning_rate": 4.565729075741192e-05, + "loss": 1.7442, + "step": 90000 + }, + { + "epoch": 0.39, + "eval_loss": 1.5332447290420532, + "eval_runtime": 18.9917, + "eval_samples_per_second": 526.545, + "eval_steps_per_second": 16.481, + "step": 90000 + }, + { + "epoch": 0.39, + "learning_rate": 4.5651862370858683e-05, + "loss": 1.7316, + "step": 90100 + }, + { + "epoch": 0.39, + "learning_rate": 4.564643398430545e-05, + "loss": 1.6982, + "step": 90200 + }, + { + "epoch": 0.39, + "learning_rate": 4.564100559775222e-05, + "loss": 1.7471, + "step": 90300 + }, + { + "epoch": 0.39, + "learning_rate": 4.563557721119898e-05, + "loss": 1.7449, + "step": 90400 + }, + { + "epoch": 0.39, + "learning_rate": 4.5630148824645745e-05, + "loss": 1.7299, + "step": 90500 + }, + { + "epoch": 0.39, + "learning_rate": 4.562472043809251e-05, + "loss": 1.7059, + "step": 90600 + }, + { + "epoch": 0.39, + "learning_rate": 4.5619292051539275e-05, + "loss": 1.7734, + "step": 90700 + }, + { + "epoch": 0.39, + "learning_rate": 4.561386366498604e-05, + "loss": 1.7181, + "step": 90800 + }, + { + "epoch": 0.39, + "learning_rate": 4.5608435278432806e-05, + "loss": 1.7107, + "step": 90900 + }, + { + "epoch": 0.39, + "learning_rate": 4.560300689187957e-05, + "loss": 1.7182, + "step": 91000 + }, + { + "epoch": 0.39, + "learning_rate": 4.559757850532633e-05, + "loss": 1.6972, + "step": 91100 + }, + { + "epoch": 0.39, + "learning_rate": 4.5592150118773105e-05, + "loss": 1.7136, + "step": 91200 + }, + { + "epoch": 0.39, + "learning_rate": 4.558672173221987e-05, + "loss": 1.71, + "step": 91300 + }, + { + "epoch": 0.39, + "learning_rate": 4.558129334566663e-05, + "loss": 1.7111, + "step": 91400 + }, + { + "epoch": 0.39, + "learning_rate": 4.55758649591134e-05, + "loss": 1.6984, + "step": 91500 + }, + { + "epoch": 0.39, + "learning_rate": 4.557043657256016e-05, + "loss": 1.7014, + "step": 91600 + }, + { + "epoch": 0.39, + "learning_rate": 4.556500818600693e-05, + "loss": 1.7238, + "step": 91700 + }, + { + "epoch": 0.39, + "learning_rate": 4.555957979945369e-05, + "loss": 1.7368, + "step": 91800 + }, + { + "epoch": 0.39, + "learning_rate": 4.555415141290045e-05, + "loss": 1.7427, + "step": 91900 + }, + { + "epoch": 0.4, + "learning_rate": 4.554872302634722e-05, + "loss": 1.7477, + "step": 92000 + }, + { + "epoch": 0.4, + "learning_rate": 4.554329463979398e-05, + "loss": 1.707, + "step": 92100 + }, + { + "epoch": 0.4, + "learning_rate": 4.5537866253240744e-05, + "loss": 1.6978, + "step": 92200 + }, + { + "epoch": 0.4, + "learning_rate": 4.553243786668752e-05, + "loss": 1.7144, + "step": 92300 + }, + { + "epoch": 0.4, + "learning_rate": 4.552700948013428e-05, + "loss": 1.702, + "step": 92400 + }, + { + "epoch": 0.4, + "learning_rate": 4.552158109358104e-05, + "loss": 1.7267, + "step": 92500 + }, + { + "epoch": 0.4, + "learning_rate": 4.551615270702781e-05, + "loss": 1.7618, + "step": 92600 + }, + { + "epoch": 0.4, + "learning_rate": 4.5510724320474574e-05, + "loss": 1.7011, + "step": 92700 + }, + { + "epoch": 0.4, + "learning_rate": 4.5505295933921335e-05, + "loss": 1.6948, + "step": 92800 + }, + { + "epoch": 0.4, + "learning_rate": 4.5499867547368104e-05, + "loss": 1.7423, + "step": 92900 + }, + { + "epoch": 0.4, + "learning_rate": 4.5494439160814866e-05, + "loss": 1.7146, + "step": 93000 + }, + { + "epoch": 0.4, + "learning_rate": 4.548901077426163e-05, + "loss": 1.6756, + "step": 93100 + }, + { + "epoch": 0.4, + "learning_rate": 4.5483582387708396e-05, + "loss": 1.7365, + "step": 93200 + }, + { + "epoch": 0.4, + "learning_rate": 4.5478154001155165e-05, + "loss": 1.7188, + "step": 93300 + }, + { + "epoch": 0.4, + "learning_rate": 4.547272561460193e-05, + "loss": 1.7, + "step": 93400 + }, + { + "epoch": 0.4, + "learning_rate": 4.5467297228048696e-05, + "loss": 1.7037, + "step": 93500 + }, + { + "epoch": 0.4, + "learning_rate": 4.546186884149546e-05, + "loss": 1.7251, + "step": 93600 + }, + { + "epoch": 0.4, + "learning_rate": 4.5456440454942226e-05, + "loss": 1.7172, + "step": 93700 + }, + { + "epoch": 0.4, + "learning_rate": 4.545101206838899e-05, + "loss": 1.707, + "step": 93800 + }, + { + "epoch": 0.4, + "learning_rate": 4.544558368183575e-05, + "loss": 1.7124, + "step": 93900 + }, + { + "epoch": 0.4, + "learning_rate": 4.544015529528252e-05, + "loss": 1.7222, + "step": 94000 + }, + { + "epoch": 0.4, + "learning_rate": 4.543472690872928e-05, + "loss": 1.7044, + "step": 94100 + }, + { + "epoch": 0.4, + "learning_rate": 4.542929852217604e-05, + "loss": 1.7188, + "step": 94200 + }, + { + "epoch": 0.41, + "learning_rate": 4.542387013562281e-05, + "loss": 1.6926, + "step": 94300 + }, + { + "epoch": 0.41, + "learning_rate": 4.541844174906958e-05, + "loss": 1.733, + "step": 94400 + }, + { + "epoch": 0.41, + "learning_rate": 4.541301336251634e-05, + "loss": 1.7195, + "step": 94500 + }, + { + "epoch": 0.41, + "learning_rate": 4.540758497596311e-05, + "loss": 1.7094, + "step": 94600 + }, + { + "epoch": 0.41, + "learning_rate": 4.540215658940987e-05, + "loss": 1.6916, + "step": 94700 + }, + { + "epoch": 0.41, + "learning_rate": 4.5396728202856634e-05, + "loss": 1.7094, + "step": 94800 + }, + { + "epoch": 0.41, + "learning_rate": 4.53912998163034e-05, + "loss": 1.7307, + "step": 94900 + }, + { + "epoch": 0.41, + "learning_rate": 4.5385871429750164e-05, + "loss": 1.7251, + "step": 95000 + }, + { + "epoch": 0.41, + "eval_loss": 1.524511694908142, + "eval_runtime": 19.0193, + "eval_samples_per_second": 525.783, + "eval_steps_per_second": 16.457, + "step": 95000 + }, + { + "epoch": 0.41, + "learning_rate": 4.5380443043196926e-05, + "loss": 1.7106, + "step": 95100 + }, + { + "epoch": 0.41, + "learning_rate": 4.5375014656643695e-05, + "loss": 1.713, + "step": 95200 + }, + { + "epoch": 0.41, + "learning_rate": 4.536958627009046e-05, + "loss": 1.7098, + "step": 95300 + }, + { + "epoch": 0.41, + "learning_rate": 4.5364157883537225e-05, + "loss": 1.6978, + "step": 95400 + }, + { + "epoch": 0.41, + "learning_rate": 4.5358729496983994e-05, + "loss": 1.7265, + "step": 95500 + }, + { + "epoch": 0.41, + "learning_rate": 4.5353301110430756e-05, + "loss": 1.7404, + "step": 95600 + }, + { + "epoch": 0.41, + "learning_rate": 4.5347872723877525e-05, + "loss": 1.7086, + "step": 95700 + }, + { + "epoch": 0.41, + "learning_rate": 4.5342444337324287e-05, + "loss": 1.6896, + "step": 95800 + }, + { + "epoch": 0.41, + "learning_rate": 4.533701595077105e-05, + "loss": 1.7227, + "step": 95900 + }, + { + "epoch": 0.41, + "learning_rate": 4.533158756421782e-05, + "loss": 1.7086, + "step": 96000 + }, + { + "epoch": 0.41, + "learning_rate": 4.532615917766458e-05, + "loss": 1.6899, + "step": 96100 + }, + { + "epoch": 0.41, + "learning_rate": 4.532073079111134e-05, + "loss": 1.7089, + "step": 96200 + }, + { + "epoch": 0.41, + "learning_rate": 4.531530240455811e-05, + "loss": 1.6741, + "step": 96300 + }, + { + "epoch": 0.41, + "learning_rate": 4.530987401800487e-05, + "loss": 1.6886, + "step": 96400 + }, + { + "epoch": 0.41, + "learning_rate": 4.530444563145164e-05, + "loss": 1.7238, + "step": 96500 + }, + { + "epoch": 0.42, + "learning_rate": 4.529901724489841e-05, + "loss": 1.7371, + "step": 96600 + }, + { + "epoch": 0.42, + "learning_rate": 4.529358885834517e-05, + "loss": 1.7138, + "step": 96700 + }, + { + "epoch": 0.42, + "learning_rate": 4.528816047179193e-05, + "loss": 1.7232, + "step": 96800 + }, + { + "epoch": 0.42, + "learning_rate": 4.52827320852387e-05, + "loss": 1.7497, + "step": 96900 + }, + { + "epoch": 0.42, + "learning_rate": 4.527730369868546e-05, + "loss": 1.7202, + "step": 97000 + }, + { + "epoch": 0.42, + "learning_rate": 4.5271875312132225e-05, + "loss": 1.7008, + "step": 97100 + }, + { + "epoch": 0.42, + "learning_rate": 4.5266446925578993e-05, + "loss": 1.6832, + "step": 97200 + }, + { + "epoch": 0.42, + "learning_rate": 4.5261018539025755e-05, + "loss": 1.6893, + "step": 97300 + }, + { + "epoch": 0.42, + "learning_rate": 4.5255590152472524e-05, + "loss": 1.7067, + "step": 97400 + }, + { + "epoch": 0.42, + "learning_rate": 4.525016176591929e-05, + "loss": 1.7119, + "step": 97500 + }, + { + "epoch": 0.42, + "learning_rate": 4.5244733379366054e-05, + "loss": 1.7267, + "step": 97600 + }, + { + "epoch": 0.42, + "learning_rate": 4.523930499281282e-05, + "loss": 1.6911, + "step": 97700 + }, + { + "epoch": 0.42, + "learning_rate": 4.5233876606259585e-05, + "loss": 1.7164, + "step": 97800 + }, + { + "epoch": 0.42, + "learning_rate": 4.522844821970635e-05, + "loss": 1.6748, + "step": 97900 + }, + { + "epoch": 0.42, + "learning_rate": 4.5223019833153116e-05, + "loss": 1.7339, + "step": 98000 + }, + { + "epoch": 0.42, + "learning_rate": 4.521759144659988e-05, + "loss": 1.692, + "step": 98100 + }, + { + "epoch": 0.42, + "learning_rate": 4.521216306004664e-05, + "loss": 1.6869, + "step": 98200 + }, + { + "epoch": 0.42, + "learning_rate": 4.520673467349341e-05, + "loss": 1.7191, + "step": 98300 + }, + { + "epoch": 0.42, + "learning_rate": 4.520130628694017e-05, + "loss": 1.7228, + "step": 98400 + }, + { + "epoch": 0.42, + "learning_rate": 4.519587790038693e-05, + "loss": 1.7141, + "step": 98500 + }, + { + "epoch": 0.42, + "learning_rate": 4.519044951383371e-05, + "loss": 1.7032, + "step": 98600 + }, + { + "epoch": 0.42, + "learning_rate": 4.518502112728047e-05, + "loss": 1.7326, + "step": 98700 + }, + { + "epoch": 0.42, + "learning_rate": 4.517959274072723e-05, + "loss": 1.7107, + "step": 98800 + }, + { + "epoch": 0.42, + "learning_rate": 4.5174164354174e-05, + "loss": 1.6987, + "step": 98900 + }, + { + "epoch": 0.43, + "learning_rate": 4.516873596762076e-05, + "loss": 1.697, + "step": 99000 + }, + { + "epoch": 0.43, + "learning_rate": 4.516330758106752e-05, + "loss": 1.7505, + "step": 99100 + }, + { + "epoch": 0.43, + "learning_rate": 4.515787919451429e-05, + "loss": 1.7094, + "step": 99200 + }, + { + "epoch": 0.43, + "learning_rate": 4.5152450807961054e-05, + "loss": 1.6878, + "step": 99300 + }, + { + "epoch": 0.43, + "learning_rate": 4.514702242140782e-05, + "loss": 1.6845, + "step": 99400 + }, + { + "epoch": 0.43, + "learning_rate": 4.5141594034854584e-05, + "loss": 1.6908, + "step": 99500 + }, + { + "epoch": 0.43, + "learning_rate": 4.513616564830135e-05, + "loss": 1.7112, + "step": 99600 + }, + { + "epoch": 0.43, + "learning_rate": 4.513073726174812e-05, + "loss": 1.7154, + "step": 99700 + }, + { + "epoch": 0.43, + "learning_rate": 4.5125308875194884e-05, + "loss": 1.7267, + "step": 99800 + }, + { + "epoch": 0.43, + "learning_rate": 4.5119880488641645e-05, + "loss": 1.7002, + "step": 99900 + }, + { + "epoch": 0.43, + "learning_rate": 4.5114452102088414e-05, + "loss": 1.7055, + "step": 100000 + }, + { + "epoch": 0.43, + "eval_loss": 1.520733118057251, + "eval_runtime": 19.0032, + "eval_samples_per_second": 526.228, + "eval_steps_per_second": 16.471, + "step": 100000 + }, + { + "epoch": 0.43, + "learning_rate": 4.5109023715535176e-05, + "loss": 1.7008, + "step": 100100 + }, + { + "epoch": 0.43, + "learning_rate": 4.510359532898194e-05, + "loss": 1.728, + "step": 100200 + }, + { + "epoch": 0.43, + "learning_rate": 4.5098166942428706e-05, + "loss": 1.7455, + "step": 100300 + }, + { + "epoch": 0.43, + "learning_rate": 4.509273855587547e-05, + "loss": 1.7056, + "step": 100400 + }, + { + "epoch": 0.43, + "learning_rate": 4.508731016932223e-05, + "loss": 1.722, + "step": 100500 + }, + { + "epoch": 0.43, + "learning_rate": 4.5081881782769e-05, + "loss": 1.7009, + "step": 100600 + }, + { + "epoch": 0.43, + "learning_rate": 4.507645339621577e-05, + "loss": 1.6849, + "step": 100700 + }, + { + "epoch": 0.43, + "learning_rate": 4.507102500966253e-05, + "loss": 1.7089, + "step": 100800 + }, + { + "epoch": 0.43, + "learning_rate": 4.50655966231093e-05, + "loss": 1.7269, + "step": 100900 + }, + { + "epoch": 0.43, + "learning_rate": 4.506016823655606e-05, + "loss": 1.6955, + "step": 101000 + }, + { + "epoch": 0.43, + "learning_rate": 4.505473985000282e-05, + "loss": 1.7307, + "step": 101100 + }, + { + "epoch": 0.43, + "learning_rate": 4.504931146344959e-05, + "loss": 1.6969, + "step": 101200 + }, + { + "epoch": 0.44, + "learning_rate": 4.504388307689635e-05, + "loss": 1.7104, + "step": 101300 + }, + { + "epoch": 0.44, + "learning_rate": 4.503845469034312e-05, + "loss": 1.7175, + "step": 101400 + }, + { + "epoch": 0.44, + "learning_rate": 4.503302630378988e-05, + "loss": 1.7054, + "step": 101500 + }, + { + "epoch": 0.44, + "learning_rate": 4.5027597917236645e-05, + "loss": 1.6907, + "step": 101600 + }, + { + "epoch": 0.44, + "learning_rate": 4.502216953068342e-05, + "loss": 1.7202, + "step": 101700 + }, + { + "epoch": 0.44, + "learning_rate": 4.501674114413018e-05, + "loss": 1.7127, + "step": 101800 + }, + { + "epoch": 0.44, + "learning_rate": 4.5011312757576944e-05, + "loss": 1.7052, + "step": 101900 + }, + { + "epoch": 0.44, + "learning_rate": 4.500588437102371e-05, + "loss": 1.7044, + "step": 102000 + }, + { + "epoch": 0.44, + "learning_rate": 4.5000455984470474e-05, + "loss": 1.7022, + "step": 102100 + }, + { + "epoch": 0.44, + "learning_rate": 4.4995027597917236e-05, + "loss": 1.7027, + "step": 102200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4989599211364005e-05, + "loss": 1.6933, + "step": 102300 + }, + { + "epoch": 0.44, + "learning_rate": 4.498417082481077e-05, + "loss": 1.7638, + "step": 102400 + }, + { + "epoch": 0.44, + "learning_rate": 4.497874243825753e-05, + "loss": 1.7242, + "step": 102500 + }, + { + "epoch": 0.44, + "learning_rate": 4.49733140517043e-05, + "loss": 1.7152, + "step": 102600 + }, + { + "epoch": 0.44, + "learning_rate": 4.4967885665151066e-05, + "loss": 1.7038, + "step": 102700 + }, + { + "epoch": 0.44, + "learning_rate": 4.496245727859783e-05, + "loss": 1.7027, + "step": 102800 + }, + { + "epoch": 0.44, + "learning_rate": 4.4957028892044597e-05, + "loss": 1.7215, + "step": 102900 + }, + { + "epoch": 0.44, + "learning_rate": 4.495160050549136e-05, + "loss": 1.6996, + "step": 103000 + }, + { + "epoch": 0.44, + "learning_rate": 4.494617211893812e-05, + "loss": 1.6896, + "step": 103100 + }, + { + "epoch": 0.44, + "learning_rate": 4.494074373238489e-05, + "loss": 1.6978, + "step": 103200 + }, + { + "epoch": 0.44, + "learning_rate": 4.493531534583165e-05, + "loss": 1.6943, + "step": 103300 + }, + { + "epoch": 0.44, + "learning_rate": 4.492988695927842e-05, + "loss": 1.7214, + "step": 103400 + }, + { + "epoch": 0.44, + "learning_rate": 4.492445857272518e-05, + "loss": 1.717, + "step": 103500 + }, + { + "epoch": 0.45, + "learning_rate": 4.491903018617194e-05, + "loss": 1.6582, + "step": 103600 + }, + { + "epoch": 0.45, + "learning_rate": 4.491360179961871e-05, + "loss": 1.6872, + "step": 103700 + }, + { + "epoch": 0.45, + "learning_rate": 4.490817341306548e-05, + "loss": 1.7238, + "step": 103800 + }, + { + "epoch": 0.45, + "learning_rate": 4.490274502651224e-05, + "loss": 1.6819, + "step": 103900 + }, + { + "epoch": 0.45, + "learning_rate": 4.489731663995901e-05, + "loss": 1.6898, + "step": 104000 + }, + { + "epoch": 0.45, + "learning_rate": 4.489188825340577e-05, + "loss": 1.6868, + "step": 104100 + }, + { + "epoch": 0.45, + "learning_rate": 4.4886459866852535e-05, + "loss": 1.6888, + "step": 104200 + }, + { + "epoch": 0.45, + "learning_rate": 4.4881031480299303e-05, + "loss": 1.7046, + "step": 104300 + }, + { + "epoch": 0.45, + "learning_rate": 4.4875603093746065e-05, + "loss": 1.6846, + "step": 104400 + }, + { + "epoch": 0.45, + "learning_rate": 4.487017470719283e-05, + "loss": 1.6848, + "step": 104500 + }, + { + "epoch": 0.45, + "learning_rate": 4.4864746320639596e-05, + "loss": 1.6822, + "step": 104600 + }, + { + "epoch": 0.45, + "learning_rate": 4.485931793408636e-05, + "loss": 1.6829, + "step": 104700 + }, + { + "epoch": 0.45, + "learning_rate": 4.4853889547533126e-05, + "loss": 1.7006, + "step": 104800 + }, + { + "epoch": 0.45, + "learning_rate": 4.4848461160979895e-05, + "loss": 1.6972, + "step": 104900 + }, + { + "epoch": 0.45, + "learning_rate": 4.484303277442666e-05, + "loss": 1.7391, + "step": 105000 + }, + { + "epoch": 0.45, + "eval_loss": 1.5192019939422607, + "eval_runtime": 18.9573, + "eval_samples_per_second": 527.5, + "eval_steps_per_second": 16.511, + "step": 105000 + }, + { + "epoch": 0.45, + "learning_rate": 4.483760438787342e-05, + "loss": 1.6914, + "step": 105100 + }, + { + "epoch": 0.45, + "learning_rate": 4.483217600132019e-05, + "loss": 1.6868, + "step": 105200 + }, + { + "epoch": 0.45, + "learning_rate": 4.482674761476695e-05, + "loss": 1.679, + "step": 105300 + }, + { + "epoch": 0.45, + "learning_rate": 4.482131922821372e-05, + "loss": 1.6947, + "step": 105400 + }, + { + "epoch": 0.45, + "learning_rate": 4.481589084166048e-05, + "loss": 1.683, + "step": 105500 + }, + { + "epoch": 0.45, + "learning_rate": 4.481046245510724e-05, + "loss": 1.7186, + "step": 105600 + }, + { + "epoch": 0.45, + "learning_rate": 4.480503406855401e-05, + "loss": 1.7041, + "step": 105700 + }, + { + "epoch": 0.45, + "learning_rate": 4.479960568200077e-05, + "loss": 1.6946, + "step": 105800 + }, + { + "epoch": 0.45, + "learning_rate": 4.479417729544754e-05, + "loss": 1.6408, + "step": 105900 + }, + { + "epoch": 0.46, + "learning_rate": 4.478874890889431e-05, + "loss": 1.6982, + "step": 106000 + }, + { + "epoch": 0.46, + "learning_rate": 4.478332052234107e-05, + "loss": 1.6851, + "step": 106100 + }, + { + "epoch": 0.46, + "learning_rate": 4.477789213578783e-05, + "loss": 1.7197, + "step": 106200 + }, + { + "epoch": 0.46, + "learning_rate": 4.47724637492346e-05, + "loss": 1.6963, + "step": 106300 + }, + { + "epoch": 0.46, + "learning_rate": 4.4767035362681364e-05, + "loss": 1.7035, + "step": 106400 + }, + { + "epoch": 0.46, + "learning_rate": 4.4761606976128126e-05, + "loss": 1.684, + "step": 106500 + }, + { + "epoch": 0.46, + "learning_rate": 4.4756178589574894e-05, + "loss": 1.7079, + "step": 106600 + }, + { + "epoch": 0.46, + "learning_rate": 4.4750750203021656e-05, + "loss": 1.6791, + "step": 106700 + }, + { + "epoch": 0.46, + "learning_rate": 4.4745321816468425e-05, + "loss": 1.6869, + "step": 106800 + }, + { + "epoch": 0.46, + "learning_rate": 4.4739893429915193e-05, + "loss": 1.6882, + "step": 106900 + }, + { + "epoch": 0.46, + "learning_rate": 4.4734465043361955e-05, + "loss": 1.6805, + "step": 107000 + }, + { + "epoch": 0.46, + "learning_rate": 4.472903665680872e-05, + "loss": 1.6941, + "step": 107100 + }, + { + "epoch": 0.46, + "learning_rate": 4.4723608270255486e-05, + "loss": 1.7022, + "step": 107200 + }, + { + "epoch": 0.46, + "learning_rate": 4.471817988370225e-05, + "loss": 1.6859, + "step": 107300 + }, + { + "epoch": 0.46, + "learning_rate": 4.4712751497149016e-05, + "loss": 1.6924, + "step": 107400 + }, + { + "epoch": 0.46, + "learning_rate": 4.470732311059578e-05, + "loss": 1.6669, + "step": 107500 + }, + { + "epoch": 0.46, + "learning_rate": 4.470189472404254e-05, + "loss": 1.6708, + "step": 107600 + }, + { + "epoch": 0.46, + "learning_rate": 4.469646633748931e-05, + "loss": 1.7214, + "step": 107700 + }, + { + "epoch": 0.46, + "learning_rate": 4.469103795093607e-05, + "loss": 1.6922, + "step": 107800 + }, + { + "epoch": 0.46, + "learning_rate": 4.468560956438283e-05, + "loss": 1.7017, + "step": 107900 + }, + { + "epoch": 0.46, + "learning_rate": 4.468018117782961e-05, + "loss": 1.6998, + "step": 108000 + }, + { + "epoch": 0.46, + "learning_rate": 4.467475279127637e-05, + "loss": 1.703, + "step": 108100 + }, + { + "epoch": 0.46, + "learning_rate": 4.466932440472313e-05, + "loss": 1.6755, + "step": 108200 + }, + { + "epoch": 0.47, + "learning_rate": 4.46638960181699e-05, + "loss": 1.7187, + "step": 108300 + }, + { + "epoch": 0.47, + "learning_rate": 4.465846763161666e-05, + "loss": 1.6764, + "step": 108400 + }, + { + "epoch": 0.47, + "learning_rate": 4.4653039245063424e-05, + "loss": 1.7016, + "step": 108500 + }, + { + "epoch": 0.47, + "learning_rate": 4.464761085851019e-05, + "loss": 1.7061, + "step": 108600 + }, + { + "epoch": 0.47, + "learning_rate": 4.4642182471956955e-05, + "loss": 1.6835, + "step": 108700 + }, + { + "epoch": 0.47, + "learning_rate": 4.463675408540372e-05, + "loss": 1.6994, + "step": 108800 + }, + { + "epoch": 0.47, + "learning_rate": 4.4631325698850485e-05, + "loss": 1.6901, + "step": 108900 + }, + { + "epoch": 0.47, + "learning_rate": 4.4625897312297254e-05, + "loss": 1.7016, + "step": 109000 + }, + { + "epoch": 0.47, + "learning_rate": 4.4620468925744016e-05, + "loss": 1.6906, + "step": 109100 + }, + { + "epoch": 0.47, + "learning_rate": 4.4615040539190784e-05, + "loss": 1.6993, + "step": 109200 + }, + { + "epoch": 0.47, + "learning_rate": 4.4609612152637546e-05, + "loss": 1.7195, + "step": 109300 + }, + { + "epoch": 0.47, + "learning_rate": 4.4604183766084315e-05, + "loss": 1.6942, + "step": 109400 + }, + { + "epoch": 0.47, + "learning_rate": 4.459875537953108e-05, + "loss": 1.7097, + "step": 109500 + }, + { + "epoch": 0.47, + "learning_rate": 4.459332699297784e-05, + "loss": 1.669, + "step": 109600 + }, + { + "epoch": 0.47, + "learning_rate": 4.458789860642461e-05, + "loss": 1.7243, + "step": 109700 + }, + { + "epoch": 0.47, + "learning_rate": 4.458247021987137e-05, + "loss": 1.6813, + "step": 109800 + }, + { + "epoch": 0.47, + "learning_rate": 4.457704183331813e-05, + "loss": 1.7064, + "step": 109900 + }, + { + "epoch": 0.47, + "learning_rate": 4.45716134467649e-05, + "loss": 1.7051, + "step": 110000 + }, + { + "epoch": 0.47, + "eval_loss": 1.51393461227417, + "eval_runtime": 19.0208, + "eval_samples_per_second": 525.74, + "eval_steps_per_second": 16.456, + "step": 110000 + }, + { + "epoch": 0.47, + "learning_rate": 4.456618506021167e-05, + "loss": 1.6981, + "step": 110100 + }, + { + "epoch": 0.47, + "learning_rate": 4.456075667365843e-05, + "loss": 1.7, + "step": 110200 + }, + { + "epoch": 0.47, + "learning_rate": 4.45553282871052e-05, + "loss": 1.6962, + "step": 110300 + }, + { + "epoch": 0.47, + "learning_rate": 4.454989990055196e-05, + "loss": 1.6814, + "step": 110400 + }, + { + "epoch": 0.47, + "learning_rate": 4.454447151399872e-05, + "loss": 1.6765, + "step": 110500 + }, + { + "epoch": 0.48, + "learning_rate": 4.453904312744549e-05, + "loss": 1.6811, + "step": 110600 + }, + { + "epoch": 0.48, + "learning_rate": 4.453361474089225e-05, + "loss": 1.6918, + "step": 110700 + }, + { + "epoch": 0.48, + "learning_rate": 4.452818635433902e-05, + "loss": 1.7026, + "step": 110800 + }, + { + "epoch": 0.48, + "learning_rate": 4.4522757967785784e-05, + "loss": 1.6863, + "step": 110900 + }, + { + "epoch": 0.48, + "learning_rate": 4.4517329581232546e-05, + "loss": 1.6976, + "step": 111000 + }, + { + "epoch": 0.48, + "learning_rate": 4.4511901194679314e-05, + "loss": 1.715, + "step": 111100 + }, + { + "epoch": 0.48, + "learning_rate": 4.450647280812608e-05, + "loss": 1.689, + "step": 111200 + }, + { + "epoch": 0.48, + "learning_rate": 4.4501044421572845e-05, + "loss": 1.679, + "step": 111300 + }, + { + "epoch": 0.48, + "learning_rate": 4.4495616035019613e-05, + "loss": 1.6749, + "step": 111400 + }, + { + "epoch": 0.48, + "learning_rate": 4.4490187648466375e-05, + "loss": 1.6885, + "step": 111500 + }, + { + "epoch": 0.48, + "learning_rate": 4.448475926191314e-05, + "loss": 1.6888, + "step": 111600 + }, + { + "epoch": 0.48, + "learning_rate": 4.4479330875359906e-05, + "loss": 1.6944, + "step": 111700 + }, + { + "epoch": 0.48, + "learning_rate": 4.447390248880667e-05, + "loss": 1.6997, + "step": 111800 + }, + { + "epoch": 0.48, + "learning_rate": 4.446847410225343e-05, + "loss": 1.685, + "step": 111900 + }, + { + "epoch": 0.48, + "learning_rate": 4.44630457157002e-05, + "loss": 1.6695, + "step": 112000 + }, + { + "epoch": 0.48, + "learning_rate": 4.445761732914696e-05, + "loss": 1.6916, + "step": 112100 + }, + { + "epoch": 0.48, + "learning_rate": 4.445218894259373e-05, + "loss": 1.7029, + "step": 112200 + }, + { + "epoch": 0.48, + "learning_rate": 4.44467605560405e-05, + "loss": 1.7237, + "step": 112300 + }, + { + "epoch": 0.48, + "learning_rate": 4.444133216948726e-05, + "loss": 1.7238, + "step": 112400 + }, + { + "epoch": 0.48, + "learning_rate": 4.443590378293402e-05, + "loss": 1.6702, + "step": 112500 + }, + { + "epoch": 0.48, + "learning_rate": 4.443047539638079e-05, + "loss": 1.6817, + "step": 112600 + }, + { + "epoch": 0.48, + "learning_rate": 4.442504700982755e-05, + "loss": 1.7053, + "step": 112700 + }, + { + "epoch": 0.48, + "learning_rate": 4.441961862327432e-05, + "loss": 1.6739, + "step": 112800 + }, + { + "epoch": 0.49, + "learning_rate": 4.441419023672108e-05, + "loss": 1.682, + "step": 112900 + }, + { + "epoch": 0.49, + "learning_rate": 4.4408761850167844e-05, + "loss": 1.6826, + "step": 113000 + }, + { + "epoch": 0.49, + "learning_rate": 4.440333346361461e-05, + "loss": 1.7159, + "step": 113100 + }, + { + "epoch": 0.49, + "learning_rate": 4.439790507706138e-05, + "loss": 1.6906, + "step": 113200 + }, + { + "epoch": 0.49, + "learning_rate": 4.439247669050814e-05, + "loss": 1.6761, + "step": 113300 + }, + { + "epoch": 0.49, + "learning_rate": 4.438704830395491e-05, + "loss": 1.7117, + "step": 113400 + }, + { + "epoch": 0.49, + "learning_rate": 4.4381619917401674e-05, + "loss": 1.6905, + "step": 113500 + }, + { + "epoch": 0.49, + "learning_rate": 4.4376191530848436e-05, + "loss": 1.6703, + "step": 113600 + }, + { + "epoch": 0.49, + "learning_rate": 4.4370763144295204e-05, + "loss": 1.6829, + "step": 113700 + }, + { + "epoch": 0.49, + "learning_rate": 4.4365334757741966e-05, + "loss": 1.7074, + "step": 113800 + }, + { + "epoch": 0.49, + "learning_rate": 4.435990637118873e-05, + "loss": 1.6749, + "step": 113900 + }, + { + "epoch": 0.49, + "learning_rate": 4.43544779846355e-05, + "loss": 1.6903, + "step": 114000 + }, + { + "epoch": 0.49, + "learning_rate": 4.434904959808226e-05, + "loss": 1.6965, + "step": 114100 + }, + { + "epoch": 0.49, + "learning_rate": 4.434362121152903e-05, + "loss": 1.707, + "step": 114200 + }, + { + "epoch": 0.49, + "learning_rate": 4.4338192824975796e-05, + "loss": 1.7036, + "step": 114300 + }, + { + "epoch": 0.49, + "learning_rate": 4.433276443842256e-05, + "loss": 1.6914, + "step": 114400 + }, + { + "epoch": 0.49, + "learning_rate": 4.432733605186932e-05, + "loss": 1.7129, + "step": 114500 + }, + { + "epoch": 0.49, + "learning_rate": 4.432190766531609e-05, + "loss": 1.6971, + "step": 114600 + }, + { + "epoch": 0.49, + "learning_rate": 4.431647927876285e-05, + "loss": 1.69, + "step": 114700 + }, + { + "epoch": 0.49, + "learning_rate": 4.431105089220962e-05, + "loss": 1.6701, + "step": 114800 + }, + { + "epoch": 0.49, + "learning_rate": 4.430562250565638e-05, + "loss": 1.7008, + "step": 114900 + }, + { + "epoch": 0.49, + "learning_rate": 4.430019411910314e-05, + "loss": 1.7034, + "step": 115000 + }, + { + "epoch": 0.49, + "eval_loss": 1.5167127847671509, + "eval_runtime": 19.0213, + "eval_samples_per_second": 525.726, + "eval_steps_per_second": 16.455, + "step": 115000 + }, + { + "epoch": 0.49, + "learning_rate": 4.429476573254991e-05, + "loss": 1.6797, + "step": 115100 + }, + { + "epoch": 0.49, + "learning_rate": 4.428933734599667e-05, + "loss": 1.6997, + "step": 115200 + }, + { + "epoch": 0.5, + "learning_rate": 4.428390895944344e-05, + "loss": 1.6886, + "step": 115300 + }, + { + "epoch": 0.5, + "learning_rate": 4.427848057289021e-05, + "loss": 1.7028, + "step": 115400 + }, + { + "epoch": 0.5, + "learning_rate": 4.427305218633697e-05, + "loss": 1.675, + "step": 115500 + }, + { + "epoch": 0.5, + "learning_rate": 4.4267623799783734e-05, + "loss": 1.6623, + "step": 115600 + }, + { + "epoch": 0.5, + "learning_rate": 4.42621954132305e-05, + "loss": 1.6872, + "step": 115700 + }, + { + "epoch": 0.5, + "learning_rate": 4.4256767026677265e-05, + "loss": 1.6599, + "step": 115800 + }, + { + "epoch": 0.5, + "learning_rate": 4.4251338640124027e-05, + "loss": 1.6994, + "step": 115900 + }, + { + "epoch": 0.5, + "learning_rate": 4.4245910253570795e-05, + "loss": 1.6918, + "step": 116000 + }, + { + "epoch": 0.5, + "learning_rate": 4.424048186701756e-05, + "loss": 1.6871, + "step": 116100 + }, + { + "epoch": 0.5, + "learning_rate": 4.4235053480464326e-05, + "loss": 1.7014, + "step": 116200 + }, + { + "epoch": 0.5, + "learning_rate": 4.422962509391109e-05, + "loss": 1.6834, + "step": 116300 + }, + { + "epoch": 0.5, + "learning_rate": 4.4224196707357856e-05, + "loss": 1.6943, + "step": 116400 + }, + { + "epoch": 0.5, + "learning_rate": 4.421876832080462e-05, + "loss": 1.6789, + "step": 116500 + }, + { + "epoch": 0.5, + "learning_rate": 4.421333993425139e-05, + "loss": 1.718, + "step": 116600 + }, + { + "epoch": 0.5, + "learning_rate": 4.420791154769815e-05, + "loss": 1.7048, + "step": 116700 + }, + { + "epoch": 0.5, + "learning_rate": 4.420248316114492e-05, + "loss": 1.7218, + "step": 116800 + }, + { + "epoch": 0.5, + "learning_rate": 4.419705477459168e-05, + "loss": 1.6819, + "step": 116900 + }, + { + "epoch": 0.5, + "learning_rate": 4.419162638803844e-05, + "loss": 1.7022, + "step": 117000 + }, + { + "epoch": 0.5, + "learning_rate": 4.418619800148521e-05, + "loss": 1.6735, + "step": 117100 + }, + { + "epoch": 0.5, + "learning_rate": 4.418076961493197e-05, + "loss": 1.7054, + "step": 117200 + }, + { + "epoch": 0.5, + "learning_rate": 4.4175341228378733e-05, + "loss": 1.6838, + "step": 117300 + }, + { + "epoch": 0.5, + "learning_rate": 4.416991284182551e-05, + "loss": 1.7075, + "step": 117400 + }, + { + "epoch": 0.5, + "learning_rate": 4.416448445527227e-05, + "loss": 1.6719, + "step": 117500 + }, + { + "epoch": 0.51, + "learning_rate": 4.415905606871903e-05, + "loss": 1.693, + "step": 117600 + }, + { + "epoch": 0.51, + "learning_rate": 4.41536276821658e-05, + "loss": 1.678, + "step": 117700 + }, + { + "epoch": 0.51, + "learning_rate": 4.414819929561256e-05, + "loss": 1.723, + "step": 117800 + }, + { + "epoch": 0.51, + "learning_rate": 4.4142770909059325e-05, + "loss": 1.7097, + "step": 117900 + }, + { + "epoch": 0.51, + "learning_rate": 4.4137342522506094e-05, + "loss": 1.6379, + "step": 118000 + }, + { + "epoch": 0.51, + "learning_rate": 4.4131914135952856e-05, + "loss": 1.6527, + "step": 118100 + }, + { + "epoch": 0.51, + "learning_rate": 4.4126485749399624e-05, + "loss": 1.6692, + "step": 118200 + }, + { + "epoch": 0.51, + "learning_rate": 4.4121057362846386e-05, + "loss": 1.6807, + "step": 118300 + }, + { + "epoch": 0.51, + "learning_rate": 4.411562897629315e-05, + "loss": 1.6781, + "step": 118400 + }, + { + "epoch": 0.51, + "learning_rate": 4.4110200589739917e-05, + "loss": 1.6679, + "step": 118500 + }, + { + "epoch": 0.51, + "learning_rate": 4.4104772203186685e-05, + "loss": 1.6798, + "step": 118600 + }, + { + "epoch": 0.51, + "learning_rate": 4.409934381663345e-05, + "loss": 1.6614, + "step": 118700 + }, + { + "epoch": 0.51, + "learning_rate": 4.4093915430080216e-05, + "loss": 1.7208, + "step": 118800 + }, + { + "epoch": 0.51, + "learning_rate": 4.408848704352698e-05, + "loss": 1.6992, + "step": 118900 + }, + { + "epoch": 0.51, + "learning_rate": 4.408305865697374e-05, + "loss": 1.7189, + "step": 119000 + }, + { + "epoch": 0.51, + "learning_rate": 4.407763027042051e-05, + "loss": 1.6979, + "step": 119100 + }, + { + "epoch": 0.51, + "learning_rate": 4.407220188386727e-05, + "loss": 1.6903, + "step": 119200 + }, + { + "epoch": 0.51, + "learning_rate": 4.406677349731403e-05, + "loss": 1.679, + "step": 119300 + }, + { + "epoch": 0.51, + "learning_rate": 4.40613451107608e-05, + "loss": 1.6904, + "step": 119400 + }, + { + "epoch": 0.51, + "learning_rate": 4.405591672420757e-05, + "loss": 1.6697, + "step": 119500 + }, + { + "epoch": 0.51, + "learning_rate": 4.405048833765433e-05, + "loss": 1.6896, + "step": 119600 + }, + { + "epoch": 0.51, + "learning_rate": 4.40450599511011e-05, + "loss": 1.6877, + "step": 119700 + }, + { + "epoch": 0.51, + "learning_rate": 4.403963156454786e-05, + "loss": 1.6599, + "step": 119800 + }, + { + "epoch": 0.52, + "learning_rate": 4.4034203177994623e-05, + "loss": 1.7118, + "step": 119900 + }, + { + "epoch": 0.52, + "learning_rate": 4.402877479144139e-05, + "loss": 1.6659, + "step": 120000 + }, + { + "epoch": 0.52, + "eval_loss": 1.5134057998657227, + "eval_runtime": 19.0004, + "eval_samples_per_second": 526.305, + "eval_steps_per_second": 16.473, + "step": 120000 + }, + { + "epoch": 0.52, + "learning_rate": 4.4023346404888154e-05, + "loss": 1.6814, + "step": 120100 + }, + { + "epoch": 0.52, + "learning_rate": 4.401791801833492e-05, + "loss": 1.6708, + "step": 120200 + }, + { + "epoch": 0.52, + "learning_rate": 4.4012489631781685e-05, + "loss": 1.6479, + "step": 120300 + }, + { + "epoch": 0.52, + "learning_rate": 4.4007061245228446e-05, + "loss": 1.6741, + "step": 120400 + }, + { + "epoch": 0.52, + "learning_rate": 4.4001632858675215e-05, + "loss": 1.6662, + "step": 120500 + }, + { + "epoch": 0.52, + "learning_rate": 4.3996204472121984e-05, + "loss": 1.6919, + "step": 120600 + }, + { + "epoch": 0.52, + "learning_rate": 4.3990776085568746e-05, + "loss": 1.6835, + "step": 120700 + }, + { + "epoch": 0.52, + "learning_rate": 4.3985347699015514e-05, + "loss": 1.6708, + "step": 120800 + }, + { + "epoch": 0.52, + "learning_rate": 4.3979919312462276e-05, + "loss": 1.6714, + "step": 120900 + }, + { + "epoch": 0.52, + "learning_rate": 4.397449092590904e-05, + "loss": 1.674, + "step": 121000 + }, + { + "epoch": 0.52, + "learning_rate": 4.396906253935581e-05, + "loss": 1.6925, + "step": 121100 + }, + { + "epoch": 0.52, + "learning_rate": 4.396363415280257e-05, + "loss": 1.654, + "step": 121200 + }, + { + "epoch": 0.52, + "learning_rate": 4.395820576624933e-05, + "loss": 1.6583, + "step": 121300 + }, + { + "epoch": 0.52, + "learning_rate": 4.39527773796961e-05, + "loss": 1.6943, + "step": 121400 + }, + { + "epoch": 0.52, + "learning_rate": 4.394734899314286e-05, + "loss": 1.6652, + "step": 121500 + }, + { + "epoch": 0.52, + "learning_rate": 4.394192060658963e-05, + "loss": 1.6484, + "step": 121600 + }, + { + "epoch": 0.52, + "learning_rate": 4.39364922200364e-05, + "loss": 1.6899, + "step": 121700 + }, + { + "epoch": 0.52, + "learning_rate": 4.393106383348316e-05, + "loss": 1.6845, + "step": 121800 + }, + { + "epoch": 0.52, + "learning_rate": 4.392563544692992e-05, + "loss": 1.6607, + "step": 121900 + }, + { + "epoch": 0.52, + "learning_rate": 4.392020706037669e-05, + "loss": 1.6697, + "step": 122000 + }, + { + "epoch": 0.52, + "learning_rate": 4.391477867382345e-05, + "loss": 1.6999, + "step": 122100 + }, + { + "epoch": 0.52, + "learning_rate": 4.390935028727022e-05, + "loss": 1.6962, + "step": 122200 + }, + { + "epoch": 0.53, + "learning_rate": 4.390392190071698e-05, + "loss": 1.6608, + "step": 122300 + }, + { + "epoch": 0.53, + "learning_rate": 4.3898493514163745e-05, + "loss": 1.7008, + "step": 122400 + }, + { + "epoch": 0.53, + "learning_rate": 4.3893065127610514e-05, + "loss": 1.6752, + "step": 122500 + }, + { + "epoch": 0.53, + "learning_rate": 4.388763674105728e-05, + "loss": 1.6751, + "step": 122600 + }, + { + "epoch": 0.53, + "learning_rate": 4.3882208354504044e-05, + "loss": 1.6752, + "step": 122700 + }, + { + "epoch": 0.53, + "learning_rate": 4.387677996795081e-05, + "loss": 1.6605, + "step": 122800 + }, + { + "epoch": 0.53, + "learning_rate": 4.3871351581397575e-05, + "loss": 1.6843, + "step": 122900 + }, + { + "epoch": 0.53, + "learning_rate": 4.3865923194844336e-05, + "loss": 1.6849, + "step": 123000 + }, + { + "epoch": 0.53, + "learning_rate": 4.3860494808291105e-05, + "loss": 1.6412, + "step": 123100 + }, + { + "epoch": 0.53, + "learning_rate": 4.385506642173787e-05, + "loss": 1.6645, + "step": 123200 + }, + { + "epoch": 0.53, + "learning_rate": 4.384963803518463e-05, + "loss": 1.6803, + "step": 123300 + }, + { + "epoch": 0.53, + "learning_rate": 4.38442096486314e-05, + "loss": 1.6622, + "step": 123400 + }, + { + "epoch": 0.53, + "learning_rate": 4.383878126207816e-05, + "loss": 1.6673, + "step": 123500 + }, + { + "epoch": 0.53, + "learning_rate": 4.383335287552492e-05, + "loss": 1.6334, + "step": 123600 + }, + { + "epoch": 0.53, + "learning_rate": 4.38279244889717e-05, + "loss": 1.6778, + "step": 123700 + }, + { + "epoch": 0.53, + "learning_rate": 4.382249610241846e-05, + "loss": 1.6805, + "step": 123800 + }, + { + "epoch": 0.53, + "learning_rate": 4.381706771586522e-05, + "loss": 1.711, + "step": 123900 + }, + { + "epoch": 0.53, + "learning_rate": 4.381163932931199e-05, + "loss": 1.6893, + "step": 124000 + }, + { + "epoch": 0.53, + "learning_rate": 4.380621094275875e-05, + "loss": 1.6537, + "step": 124100 + }, + { + "epoch": 0.53, + "learning_rate": 4.380078255620552e-05, + "loss": 1.6938, + "step": 124200 + }, + { + "epoch": 0.53, + "learning_rate": 4.379535416965228e-05, + "loss": 1.6666, + "step": 124300 + }, + { + "epoch": 0.53, + "learning_rate": 4.3789925783099043e-05, + "loss": 1.709, + "step": 124400 + }, + { + "epoch": 0.53, + "learning_rate": 4.378449739654581e-05, + "loss": 1.6937, + "step": 124500 + }, + { + "epoch": 0.54, + "learning_rate": 4.3779069009992574e-05, + "loss": 1.7111, + "step": 124600 + }, + { + "epoch": 0.54, + "learning_rate": 4.377364062343934e-05, + "loss": 1.6885, + "step": 124700 + }, + { + "epoch": 0.54, + "learning_rate": 4.376821223688611e-05, + "loss": 1.6691, + "step": 124800 + }, + { + "epoch": 0.54, + "learning_rate": 4.376278385033287e-05, + "loss": 1.6659, + "step": 124900 + }, + { + "epoch": 0.54, + "learning_rate": 4.3757355463779635e-05, + "loss": 1.6814, + "step": 125000 + }, + { + "epoch": 0.54, + "eval_loss": 1.5067857503890991, + "eval_runtime": 18.9814, + "eval_samples_per_second": 526.832, + "eval_steps_per_second": 16.49, + "step": 125000 + }, + { + "epoch": 0.54, + "learning_rate": 4.3751927077226404e-05, + "loss": 1.7118, + "step": 125100 + }, + { + "epoch": 0.54, + "learning_rate": 4.3746498690673166e-05, + "loss": 1.6906, + "step": 125200 + }, + { + "epoch": 0.54, + "learning_rate": 4.374107030411993e-05, + "loss": 1.6594, + "step": 125300 + }, + { + "epoch": 0.54, + "learning_rate": 4.3735641917566696e-05, + "loss": 1.6768, + "step": 125400 + }, + { + "epoch": 0.54, + "learning_rate": 4.373021353101346e-05, + "loss": 1.6496, + "step": 125500 + }, + { + "epoch": 0.54, + "learning_rate": 4.372478514446022e-05, + "loss": 1.6796, + "step": 125600 + }, + { + "epoch": 0.54, + "learning_rate": 4.371935675790699e-05, + "loss": 1.6844, + "step": 125700 + }, + { + "epoch": 0.54, + "learning_rate": 4.371392837135376e-05, + "loss": 1.6712, + "step": 125800 + }, + { + "epoch": 0.54, + "learning_rate": 4.370849998480052e-05, + "loss": 1.6566, + "step": 125900 + }, + { + "epoch": 0.54, + "learning_rate": 4.370307159824729e-05, + "loss": 1.653, + "step": 126000 + }, + { + "epoch": 0.54, + "learning_rate": 4.369764321169405e-05, + "loss": 1.711, + "step": 126100 + }, + { + "epoch": 0.54, + "learning_rate": 4.369221482514082e-05, + "loss": 1.6782, + "step": 126200 + }, + { + "epoch": 0.54, + "learning_rate": 4.368678643858758e-05, + "loss": 1.6618, + "step": 126300 + }, + { + "epoch": 0.54, + "learning_rate": 4.368135805203434e-05, + "loss": 1.6439, + "step": 126400 + }, + { + "epoch": 0.54, + "learning_rate": 4.367592966548111e-05, + "loss": 1.6435, + "step": 126500 + }, + { + "epoch": 0.54, + "learning_rate": 4.367050127892787e-05, + "loss": 1.6772, + "step": 126600 + }, + { + "epoch": 0.54, + "learning_rate": 4.3665072892374634e-05, + "loss": 1.6647, + "step": 126700 + }, + { + "epoch": 0.54, + "learning_rate": 4.365964450582141e-05, + "loss": 1.6984, + "step": 126800 + }, + { + "epoch": 0.55, + "learning_rate": 4.365421611926817e-05, + "loss": 1.652, + "step": 126900 + }, + { + "epoch": 0.55, + "learning_rate": 4.3648787732714933e-05, + "loss": 1.6726, + "step": 127000 + }, + { + "epoch": 0.55, + "learning_rate": 4.36433593461617e-05, + "loss": 1.6786, + "step": 127100 + }, + { + "epoch": 0.55, + "learning_rate": 4.3637930959608464e-05, + "loss": 1.6714, + "step": 127200 + }, + { + "epoch": 0.55, + "learning_rate": 4.3632502573055226e-05, + "loss": 1.6693, + "step": 127300 + }, + { + "epoch": 0.55, + "learning_rate": 4.3627074186501995e-05, + "loss": 1.6929, + "step": 127400 + }, + { + "epoch": 0.55, + "learning_rate": 4.3621645799948756e-05, + "loss": 1.6755, + "step": 127500 + }, + { + "epoch": 0.55, + "learning_rate": 4.361621741339552e-05, + "loss": 1.6561, + "step": 127600 + }, + { + "epoch": 0.55, + "learning_rate": 4.361078902684229e-05, + "loss": 1.6771, + "step": 127700 + }, + { + "epoch": 0.55, + "learning_rate": 4.360536064028905e-05, + "loss": 1.682, + "step": 127800 + }, + { + "epoch": 0.55, + "learning_rate": 4.359993225373582e-05, + "loss": 1.6525, + "step": 127900 + }, + { + "epoch": 0.55, + "learning_rate": 4.3594503867182586e-05, + "loss": 1.6523, + "step": 128000 + }, + { + "epoch": 0.55, + "learning_rate": 4.358907548062935e-05, + "loss": 1.6703, + "step": 128100 + }, + { + "epoch": 0.55, + "learning_rate": 4.358364709407612e-05, + "loss": 1.659, + "step": 128200 + }, + { + "epoch": 0.55, + "learning_rate": 4.357821870752288e-05, + "loss": 1.6595, + "step": 128300 + }, + { + "epoch": 0.55, + "learning_rate": 4.357279032096964e-05, + "loss": 1.655, + "step": 128400 + }, + { + "epoch": 0.55, + "learning_rate": 4.356736193441641e-05, + "loss": 1.674, + "step": 128500 + }, + { + "epoch": 0.55, + "learning_rate": 4.356193354786317e-05, + "loss": 1.6901, + "step": 128600 + }, + { + "epoch": 0.55, + "learning_rate": 4.355650516130993e-05, + "loss": 1.681, + "step": 128700 + }, + { + "epoch": 0.55, + "learning_rate": 4.35510767747567e-05, + "loss": 1.6741, + "step": 128800 + }, + { + "epoch": 0.55, + "learning_rate": 4.354564838820347e-05, + "loss": 1.6859, + "step": 128900 + }, + { + "epoch": 0.55, + "learning_rate": 4.354022000165023e-05, + "loss": 1.6399, + "step": 129000 + }, + { + "epoch": 0.55, + "learning_rate": 4.3534791615097e-05, + "loss": 1.6625, + "step": 129100 + }, + { + "epoch": 0.56, + "learning_rate": 4.352936322854376e-05, + "loss": 1.6884, + "step": 129200 + }, + { + "epoch": 0.56, + "learning_rate": 4.3523934841990524e-05, + "loss": 1.6917, + "step": 129300 + }, + { + "epoch": 0.56, + "learning_rate": 4.351850645543729e-05, + "loss": 1.6867, + "step": 129400 + }, + { + "epoch": 0.56, + "learning_rate": 4.3513078068884055e-05, + "loss": 1.6759, + "step": 129500 + }, + { + "epoch": 0.56, + "learning_rate": 4.350764968233082e-05, + "loss": 1.6718, + "step": 129600 + }, + { + "epoch": 0.56, + "learning_rate": 4.3502221295777585e-05, + "loss": 1.6435, + "step": 129700 + }, + { + "epoch": 0.56, + "learning_rate": 4.349679290922435e-05, + "loss": 1.6757, + "step": 129800 + }, + { + "epoch": 0.56, + "learning_rate": 4.3491364522671116e-05, + "loss": 1.6973, + "step": 129900 + }, + { + "epoch": 0.56, + "learning_rate": 4.3485936136117885e-05, + "loss": 1.6855, + "step": 130000 + }, + { + "epoch": 0.56, + "eval_loss": 1.5044703483581543, + "eval_runtime": 18.9872, + "eval_samples_per_second": 526.672, + "eval_steps_per_second": 16.485, + "step": 130000 + }, + { + "epoch": 0.56, + "learning_rate": 4.3480507749564646e-05, + "loss": 1.6843, + "step": 130100 + }, + { + "epoch": 0.56, + "learning_rate": 4.347507936301141e-05, + "loss": 1.6709, + "step": 130200 + }, + { + "epoch": 0.56, + "learning_rate": 4.346965097645818e-05, + "loss": 1.6656, + "step": 130300 + }, + { + "epoch": 0.56, + "learning_rate": 4.346422258990494e-05, + "loss": 1.6635, + "step": 130400 + }, + { + "epoch": 0.56, + "learning_rate": 4.345879420335171e-05, + "loss": 1.6454, + "step": 130500 + }, + { + "epoch": 0.56, + "learning_rate": 4.345336581679847e-05, + "loss": 1.678, + "step": 130600 + }, + { + "epoch": 0.56, + "learning_rate": 4.344793743024523e-05, + "loss": 1.6965, + "step": 130700 + }, + { + "epoch": 0.56, + "learning_rate": 4.3442509043692e-05, + "loss": 1.6447, + "step": 130800 + }, + { + "epoch": 0.56, + "learning_rate": 4.343708065713876e-05, + "loss": 1.6605, + "step": 130900 + }, + { + "epoch": 0.56, + "learning_rate": 4.343165227058553e-05, + "loss": 1.6782, + "step": 131000 + }, + { + "epoch": 0.56, + "learning_rate": 4.34262238840323e-05, + "loss": 1.6535, + "step": 131100 + }, + { + "epoch": 0.56, + "learning_rate": 4.342079549747906e-05, + "loss": 1.6603, + "step": 131200 + }, + { + "epoch": 0.56, + "learning_rate": 4.341536711092582e-05, + "loss": 1.6508, + "step": 131300 + }, + { + "epoch": 0.56, + "learning_rate": 4.340993872437259e-05, + "loss": 1.6625, + "step": 131400 + }, + { + "epoch": 0.56, + "learning_rate": 4.340451033781935e-05, + "loss": 1.6584, + "step": 131500 + }, + { + "epoch": 0.57, + "learning_rate": 4.3399081951266115e-05, + "loss": 1.6942, + "step": 131600 + }, + { + "epoch": 0.57, + "learning_rate": 4.3393653564712884e-05, + "loss": 1.6679, + "step": 131700 + }, + { + "epoch": 0.57, + "learning_rate": 4.3388225178159646e-05, + "loss": 1.6651, + "step": 131800 + }, + { + "epoch": 0.57, + "learning_rate": 4.3382796791606414e-05, + "loss": 1.6969, + "step": 131900 + }, + { + "epoch": 0.57, + "learning_rate": 4.3377368405053176e-05, + "loss": 1.7259, + "step": 132000 + }, + { + "epoch": 0.57, + "learning_rate": 4.3371940018499945e-05, + "loss": 1.6257, + "step": 132100 + }, + { + "epoch": 0.57, + "learning_rate": 4.336651163194671e-05, + "loss": 1.6517, + "step": 132200 + }, + { + "epoch": 0.57, + "learning_rate": 4.3361083245393475e-05, + "loss": 1.7008, + "step": 132300 + }, + { + "epoch": 0.57, + "learning_rate": 4.335565485884024e-05, + "loss": 1.6519, + "step": 132400 + }, + { + "epoch": 0.57, + "learning_rate": 4.3350226472287006e-05, + "loss": 1.6914, + "step": 132500 + }, + { + "epoch": 0.57, + "learning_rate": 4.334479808573377e-05, + "loss": 1.6574, + "step": 132600 + }, + { + "epoch": 0.57, + "learning_rate": 4.333936969918053e-05, + "loss": 1.6667, + "step": 132700 + }, + { + "epoch": 0.57, + "learning_rate": 4.33339413126273e-05, + "loss": 1.6928, + "step": 132800 + }, + { + "epoch": 0.57, + "learning_rate": 4.332851292607406e-05, + "loss": 1.677, + "step": 132900 + }, + { + "epoch": 0.57, + "learning_rate": 4.332308453952082e-05, + "loss": 1.6511, + "step": 133000 + }, + { + "epoch": 0.57, + "learning_rate": 4.33176561529676e-05, + "loss": 1.6605, + "step": 133100 + }, + { + "epoch": 0.57, + "learning_rate": 4.331222776641436e-05, + "loss": 1.6317, + "step": 133200 + }, + { + "epoch": 0.57, + "learning_rate": 4.330679937986112e-05, + "loss": 1.6925, + "step": 133300 + }, + { + "epoch": 0.57, + "learning_rate": 4.330137099330789e-05, + "loss": 1.6553, + "step": 133400 + }, + { + "epoch": 0.57, + "learning_rate": 4.329594260675465e-05, + "loss": 1.6675, + "step": 133500 + }, + { + "epoch": 0.57, + "learning_rate": 4.3290514220201414e-05, + "loss": 1.6711, + "step": 133600 + }, + { + "epoch": 0.57, + "learning_rate": 4.328508583364818e-05, + "loss": 1.64, + "step": 133700 + }, + { + "epoch": 0.57, + "learning_rate": 4.3279657447094944e-05, + "loss": 1.6866, + "step": 133800 + }, + { + "epoch": 0.58, + "learning_rate": 4.327422906054171e-05, + "loss": 1.6831, + "step": 133900 + }, + { + "epoch": 0.58, + "learning_rate": 4.3268800673988475e-05, + "loss": 1.6413, + "step": 134000 + }, + { + "epoch": 0.58, + "learning_rate": 4.326337228743524e-05, + "loss": 1.6605, + "step": 134100 + }, + { + "epoch": 0.58, + "learning_rate": 4.3257943900882005e-05, + "loss": 1.7032, + "step": 134200 + }, + { + "epoch": 0.58, + "learning_rate": 4.3252515514328774e-05, + "loss": 1.6694, + "step": 134300 + }, + { + "epoch": 0.58, + "learning_rate": 4.3247087127775536e-05, + "loss": 1.6462, + "step": 134400 + }, + { + "epoch": 0.58, + "learning_rate": 4.3241658741222304e-05, + "loss": 1.6618, + "step": 134500 + }, + { + "epoch": 0.58, + "learning_rate": 4.3236230354669066e-05, + "loss": 1.6611, + "step": 134600 + }, + { + "epoch": 0.58, + "learning_rate": 4.323080196811583e-05, + "loss": 1.6743, + "step": 134700 + }, + { + "epoch": 0.58, + "learning_rate": 4.32253735815626e-05, + "loss": 1.7014, + "step": 134800 + }, + { + "epoch": 0.58, + "learning_rate": 4.321994519500936e-05, + "loss": 1.6307, + "step": 134900 + }, + { + "epoch": 0.58, + "learning_rate": 4.321451680845612e-05, + "loss": 1.7163, + "step": 135000 + }, + { + "epoch": 0.58, + "eval_loss": 1.502817153930664, + "eval_runtime": 19.0069, + "eval_samples_per_second": 526.124, + "eval_steps_per_second": 16.468, + "step": 135000 + }, + { + "epoch": 0.58, + "learning_rate": 4.320908842190289e-05, + "loss": 1.6829, + "step": 135100 + }, + { + "epoch": 0.58, + "learning_rate": 4.320366003534966e-05, + "loss": 1.6813, + "step": 135200 + }, + { + "epoch": 0.58, + "learning_rate": 4.319823164879642e-05, + "loss": 1.7002, + "step": 135300 + }, + { + "epoch": 0.58, + "learning_rate": 4.319280326224319e-05, + "loss": 1.69, + "step": 135400 + }, + { + "epoch": 0.58, + "learning_rate": 4.318737487568995e-05, + "loss": 1.6889, + "step": 135500 + }, + { + "epoch": 0.58, + "learning_rate": 4.318194648913671e-05, + "loss": 1.6668, + "step": 135600 + }, + { + "epoch": 0.58, + "learning_rate": 4.317651810258348e-05, + "loss": 1.6814, + "step": 135700 + }, + { + "epoch": 0.58, + "learning_rate": 4.317108971603024e-05, + "loss": 1.6247, + "step": 135800 + }, + { + "epoch": 0.58, + "learning_rate": 4.316566132947701e-05, + "loss": 1.6818, + "step": 135900 + }, + { + "epoch": 0.58, + "learning_rate": 4.316023294292377e-05, + "loss": 1.696, + "step": 136000 + }, + { + "epoch": 0.58, + "learning_rate": 4.3154804556370535e-05, + "loss": 1.6665, + "step": 136100 + }, + { + "epoch": 0.59, + "learning_rate": 4.3149376169817304e-05, + "loss": 1.6671, + "step": 136200 + }, + { + "epoch": 0.59, + "learning_rate": 4.314394778326407e-05, + "loss": 1.6745, + "step": 136300 + }, + { + "epoch": 0.59, + "learning_rate": 4.3138519396710834e-05, + "loss": 1.6701, + "step": 136400 + }, + { + "epoch": 0.59, + "learning_rate": 4.31330910101576e-05, + "loss": 1.6677, + "step": 136500 + }, + { + "epoch": 0.59, + "learning_rate": 4.3127662623604365e-05, + "loss": 1.6759, + "step": 136600 + }, + { + "epoch": 0.59, + "learning_rate": 4.312223423705113e-05, + "loss": 1.6808, + "step": 136700 + }, + { + "epoch": 0.59, + "learning_rate": 4.3116805850497895e-05, + "loss": 1.6809, + "step": 136800 + }, + { + "epoch": 0.59, + "learning_rate": 4.311137746394466e-05, + "loss": 1.6222, + "step": 136900 + }, + { + "epoch": 0.59, + "learning_rate": 4.310594907739142e-05, + "loss": 1.698, + "step": 137000 + }, + { + "epoch": 0.59, + "learning_rate": 4.310052069083819e-05, + "loss": 1.6752, + "step": 137100 + }, + { + "epoch": 0.59, + "learning_rate": 4.309509230428495e-05, + "loss": 1.7055, + "step": 137200 + }, + { + "epoch": 0.59, + "learning_rate": 4.308966391773172e-05, + "loss": 1.6652, + "step": 137300 + }, + { + "epoch": 0.59, + "learning_rate": 4.308423553117849e-05, + "loss": 1.6686, + "step": 137400 + }, + { + "epoch": 0.59, + "learning_rate": 4.307880714462525e-05, + "loss": 1.6582, + "step": 137500 + }, + { + "epoch": 0.59, + "learning_rate": 4.307337875807201e-05, + "loss": 1.6555, + "step": 137600 + }, + { + "epoch": 0.59, + "learning_rate": 4.306795037151878e-05, + "loss": 1.6501, + "step": 137700 + }, + { + "epoch": 0.59, + "learning_rate": 4.306252198496554e-05, + "loss": 1.6653, + "step": 137800 + }, + { + "epoch": 0.59, + "learning_rate": 4.305709359841231e-05, + "loss": 1.6581, + "step": 137900 + }, + { + "epoch": 0.59, + "learning_rate": 4.305166521185907e-05, + "loss": 1.6755, + "step": 138000 + }, + { + "epoch": 0.59, + "learning_rate": 4.3046236825305834e-05, + "loss": 1.6837, + "step": 138100 + }, + { + "epoch": 0.59, + "learning_rate": 4.30408084387526e-05, + "loss": 1.6873, + "step": 138200 + }, + { + "epoch": 0.59, + "learning_rate": 4.3035380052199364e-05, + "loss": 1.6475, + "step": 138300 + }, + { + "epoch": 0.59, + "learning_rate": 4.302995166564613e-05, + "loss": 1.6719, + "step": 138400 + }, + { + "epoch": 0.6, + "learning_rate": 4.30245232790929e-05, + "loss": 1.664, + "step": 138500 + }, + { + "epoch": 0.6, + "learning_rate": 4.301909489253966e-05, + "loss": 1.6426, + "step": 138600 + }, + { + "epoch": 0.6, + "learning_rate": 4.3013666505986425e-05, + "loss": 1.6444, + "step": 138700 + }, + { + "epoch": 0.6, + "learning_rate": 4.3008238119433194e-05, + "loss": 1.6721, + "step": 138800 + }, + { + "epoch": 0.6, + "learning_rate": 4.3002809732879956e-05, + "loss": 1.6697, + "step": 138900 + }, + { + "epoch": 0.6, + "learning_rate": 4.299738134632672e-05, + "loss": 1.6546, + "step": 139000 + }, + { + "epoch": 0.6, + "learning_rate": 4.2991952959773486e-05, + "loss": 1.6909, + "step": 139100 + }, + { + "epoch": 0.6, + "learning_rate": 4.298652457322025e-05, + "loss": 1.6854, + "step": 139200 + }, + { + "epoch": 0.6, + "learning_rate": 4.298109618666702e-05, + "loss": 1.6527, + "step": 139300 + }, + { + "epoch": 0.6, + "learning_rate": 4.2975667800113785e-05, + "loss": 1.6853, + "step": 139400 + }, + { + "epoch": 0.6, + "learning_rate": 4.297023941356055e-05, + "loss": 1.6556, + "step": 139500 + }, + { + "epoch": 0.6, + "learning_rate": 4.296481102700731e-05, + "loss": 1.6656, + "step": 139600 + }, + { + "epoch": 0.6, + "learning_rate": 4.295938264045408e-05, + "loss": 1.6581, + "step": 139700 + }, + { + "epoch": 0.6, + "learning_rate": 4.295395425390084e-05, + "loss": 1.6422, + "step": 139800 + }, + { + "epoch": 0.6, + "learning_rate": 4.294852586734761e-05, + "loss": 1.6744, + "step": 139900 + }, + { + "epoch": 0.6, + "learning_rate": 4.294309748079437e-05, + "loss": 1.6645, + "step": 140000 + }, + { + "epoch": 0.6, + "eval_loss": 1.4998124837875366, + "eval_runtime": 18.9623, + "eval_samples_per_second": 527.362, + "eval_steps_per_second": 16.506, + "step": 140000 + }, + { + "epoch": 0.6, + "learning_rate": 4.293766909424113e-05, + "loss": 1.7061, + "step": 140100 + }, + { + "epoch": 0.6, + "learning_rate": 4.29322407076879e-05, + "loss": 1.6883, + "step": 140200 + }, + { + "epoch": 0.6, + "learning_rate": 4.292681232113466e-05, + "loss": 1.6674, + "step": 140300 + }, + { + "epoch": 0.6, + "learning_rate": 4.2921383934581425e-05, + "loss": 1.6763, + "step": 140400 + }, + { + "epoch": 0.6, + "learning_rate": 4.29159555480282e-05, + "loss": 1.6551, + "step": 140500 + }, + { + "epoch": 0.6, + "learning_rate": 4.291052716147496e-05, + "loss": 1.6787, + "step": 140600 + }, + { + "epoch": 0.6, + "learning_rate": 4.2905098774921724e-05, + "loss": 1.6661, + "step": 140700 + }, + { + "epoch": 0.6, + "learning_rate": 4.289967038836849e-05, + "loss": 1.6769, + "step": 140800 + }, + { + "epoch": 0.61, + "learning_rate": 4.2894242001815254e-05, + "loss": 1.6581, + "step": 140900 + }, + { + "epoch": 0.61, + "learning_rate": 4.2888813615262016e-05, + "loss": 1.6592, + "step": 141000 + }, + { + "epoch": 0.61, + "learning_rate": 4.2883385228708785e-05, + "loss": 1.6744, + "step": 141100 + }, + { + "epoch": 0.61, + "learning_rate": 4.287795684215555e-05, + "loss": 1.6752, + "step": 141200 + }, + { + "epoch": 0.61, + "learning_rate": 4.2872528455602315e-05, + "loss": 1.6546, + "step": 141300 + }, + { + "epoch": 0.61, + "learning_rate": 4.286710006904908e-05, + "loss": 1.6599, + "step": 141400 + }, + { + "epoch": 0.61, + "learning_rate": 4.2861671682495846e-05, + "loss": 1.6713, + "step": 141500 + }, + { + "epoch": 0.61, + "learning_rate": 4.285624329594261e-05, + "loss": 1.6276, + "step": 141600 + }, + { + "epoch": 0.61, + "learning_rate": 4.2850814909389376e-05, + "loss": 1.6647, + "step": 141700 + }, + { + "epoch": 0.61, + "learning_rate": 4.284538652283614e-05, + "loss": 1.6548, + "step": 141800 + }, + { + "epoch": 0.61, + "learning_rate": 4.283995813628291e-05, + "loss": 1.6565, + "step": 141900 + }, + { + "epoch": 0.61, + "learning_rate": 4.283452974972967e-05, + "loss": 1.6679, + "step": 142000 + }, + { + "epoch": 0.61, + "learning_rate": 4.282910136317643e-05, + "loss": 1.6429, + "step": 142100 + }, + { + "epoch": 0.61, + "learning_rate": 4.28236729766232e-05, + "loss": 1.6365, + "step": 142200 + }, + { + "epoch": 0.61, + "learning_rate": 4.281824459006996e-05, + "loss": 1.6318, + "step": 142300 + }, + { + "epoch": 0.61, + "learning_rate": 4.281281620351672e-05, + "loss": 1.6821, + "step": 142400 + }, + { + "epoch": 0.61, + "learning_rate": 4.28073878169635e-05, + "loss": 1.658, + "step": 142500 + }, + { + "epoch": 0.61, + "learning_rate": 4.280195943041026e-05, + "loss": 1.6852, + "step": 142600 + }, + { + "epoch": 0.61, + "learning_rate": 4.279653104385702e-05, + "loss": 1.6617, + "step": 142700 + }, + { + "epoch": 0.61, + "learning_rate": 4.279110265730379e-05, + "loss": 1.699, + "step": 142800 + }, + { + "epoch": 0.61, + "learning_rate": 4.278567427075055e-05, + "loss": 1.6443, + "step": 142900 + }, + { + "epoch": 0.61, + "learning_rate": 4.2780245884197315e-05, + "loss": 1.6357, + "step": 143000 + }, + { + "epoch": 0.61, + "learning_rate": 4.277481749764408e-05, + "loss": 1.6719, + "step": 143100 + }, + { + "epoch": 0.62, + "learning_rate": 4.2769389111090845e-05, + "loss": 1.6529, + "step": 143200 + }, + { + "epoch": 0.62, + "learning_rate": 4.2763960724537614e-05, + "loss": 1.6443, + "step": 143300 + }, + { + "epoch": 0.62, + "learning_rate": 4.2758532337984376e-05, + "loss": 1.6534, + "step": 143400 + }, + { + "epoch": 0.62, + "learning_rate": 4.275310395143114e-05, + "loss": 1.6673, + "step": 143500 + }, + { + "epoch": 0.62, + "learning_rate": 4.2747675564877906e-05, + "loss": 1.6496, + "step": 143600 + }, + { + "epoch": 0.62, + "learning_rate": 4.2742247178324675e-05, + "loss": 1.6808, + "step": 143700 + }, + { + "epoch": 0.62, + "learning_rate": 4.273681879177144e-05, + "loss": 1.6599, + "step": 143800 + }, + { + "epoch": 0.62, + "learning_rate": 4.2731390405218205e-05, + "loss": 1.6626, + "step": 143900 + }, + { + "epoch": 0.62, + "learning_rate": 4.272596201866497e-05, + "loss": 1.6623, + "step": 144000 + }, + { + "epoch": 0.62, + "learning_rate": 4.272053363211173e-05, + "loss": 1.6576, + "step": 144100 + }, + { + "epoch": 0.62, + "learning_rate": 4.27151052455585e-05, + "loss": 1.6479, + "step": 144200 + }, + { + "epoch": 0.62, + "learning_rate": 4.270967685900526e-05, + "loss": 1.6492, + "step": 144300 + }, + { + "epoch": 0.62, + "learning_rate": 4.270424847245202e-05, + "loss": 1.6916, + "step": 144400 + }, + { + "epoch": 0.62, + "learning_rate": 4.269882008589879e-05, + "loss": 1.6518, + "step": 144500 + }, + { + "epoch": 0.62, + "learning_rate": 4.269339169934556e-05, + "loss": 1.6815, + "step": 144600 + }, + { + "epoch": 0.62, + "learning_rate": 4.268796331279232e-05, + "loss": 1.6319, + "step": 144700 + }, + { + "epoch": 0.62, + "learning_rate": 4.268253492623909e-05, + "loss": 1.668, + "step": 144800 + }, + { + "epoch": 0.62, + "learning_rate": 4.267710653968585e-05, + "loss": 1.6594, + "step": 144900 + }, + { + "epoch": 0.62, + "learning_rate": 4.267167815313261e-05, + "loss": 1.6918, + "step": 145000 + }, + { + "epoch": 0.62, + "eval_loss": 1.4964494705200195, + "eval_runtime": 19.0245, + "eval_samples_per_second": 525.638, + "eval_steps_per_second": 16.452, + "step": 145000 + }, + { + "epoch": 0.62, + "learning_rate": 4.266624976657938e-05, + "loss": 1.6603, + "step": 145100 + }, + { + "epoch": 0.62, + "learning_rate": 4.2660821380026144e-05, + "loss": 1.6859, + "step": 145200 + }, + { + "epoch": 0.62, + "learning_rate": 4.265539299347291e-05, + "loss": 1.6378, + "step": 145300 + }, + { + "epoch": 0.62, + "learning_rate": 4.2649964606919674e-05, + "loss": 1.6768, + "step": 145400 + }, + { + "epoch": 0.63, + "learning_rate": 4.2644536220366436e-05, + "loss": 1.6845, + "step": 145500 + }, + { + "epoch": 0.63, + "learning_rate": 4.2639107833813205e-05, + "loss": 1.6757, + "step": 145600 + }, + { + "epoch": 0.63, + "learning_rate": 4.263367944725997e-05, + "loss": 1.6575, + "step": 145700 + }, + { + "epoch": 0.63, + "learning_rate": 4.2628251060706735e-05, + "loss": 1.6798, + "step": 145800 + }, + { + "epoch": 0.63, + "learning_rate": 4.2622822674153504e-05, + "loss": 1.6504, + "step": 145900 + }, + { + "epoch": 0.63, + "learning_rate": 4.2617394287600266e-05, + "loss": 1.6527, + "step": 146000 + }, + { + "epoch": 0.63, + "learning_rate": 4.261196590104703e-05, + "loss": 1.6972, + "step": 146100 + }, + { + "epoch": 0.63, + "learning_rate": 4.2606537514493796e-05, + "loss": 1.6573, + "step": 146200 + }, + { + "epoch": 0.63, + "learning_rate": 4.260110912794056e-05, + "loss": 1.6811, + "step": 146300 + }, + { + "epoch": 0.63, + "learning_rate": 4.259568074138732e-05, + "loss": 1.6494, + "step": 146400 + }, + { + "epoch": 0.63, + "learning_rate": 4.259025235483409e-05, + "loss": 1.6271, + "step": 146500 + }, + { + "epoch": 0.63, + "learning_rate": 4.258482396828085e-05, + "loss": 1.6904, + "step": 146600 + }, + { + "epoch": 0.63, + "learning_rate": 4.257939558172762e-05, + "loss": 1.6779, + "step": 146700 + }, + { + "epoch": 0.63, + "learning_rate": 4.257396719517439e-05, + "loss": 1.6485, + "step": 146800 + }, + { + "epoch": 0.63, + "learning_rate": 4.256853880862115e-05, + "loss": 1.6511, + "step": 146900 + }, + { + "epoch": 0.63, + "learning_rate": 4.256311042206791e-05, + "loss": 1.6497, + "step": 147000 + }, + { + "epoch": 0.63, + "learning_rate": 4.255768203551468e-05, + "loss": 1.6858, + "step": 147100 + }, + { + "epoch": 0.63, + "learning_rate": 4.255225364896144e-05, + "loss": 1.6702, + "step": 147200 + }, + { + "epoch": 0.63, + "learning_rate": 4.254682526240821e-05, + "loss": 1.6352, + "step": 147300 + }, + { + "epoch": 0.63, + "learning_rate": 4.254139687585497e-05, + "loss": 1.6741, + "step": 147400 + }, + { + "epoch": 0.63, + "learning_rate": 4.2535968489301735e-05, + "loss": 1.6842, + "step": 147500 + }, + { + "epoch": 0.63, + "learning_rate": 4.25305401027485e-05, + "loss": 1.6406, + "step": 147600 + }, + { + "epoch": 0.63, + "learning_rate": 4.2525111716195265e-05, + "loss": 1.6622, + "step": 147700 + }, + { + "epoch": 0.63, + "learning_rate": 4.2519683329642034e-05, + "loss": 1.636, + "step": 147800 + }, + { + "epoch": 0.64, + "learning_rate": 4.25142549430888e-05, + "loss": 1.6487, + "step": 147900 + }, + { + "epoch": 0.64, + "learning_rate": 4.2508826556535564e-05, + "loss": 1.6737, + "step": 148000 + }, + { + "epoch": 0.64, + "learning_rate": 4.2503398169982326e-05, + "loss": 1.6372, + "step": 148100 + }, + { + "epoch": 0.64, + "learning_rate": 4.2497969783429095e-05, + "loss": 1.6472, + "step": 148200 + }, + { + "epoch": 0.64, + "learning_rate": 4.2492541396875857e-05, + "loss": 1.6508, + "step": 148300 + }, + { + "epoch": 0.64, + "learning_rate": 4.248711301032262e-05, + "loss": 1.6693, + "step": 148400 + }, + { + "epoch": 0.64, + "learning_rate": 4.248168462376939e-05, + "loss": 1.6808, + "step": 148500 + }, + { + "epoch": 0.64, + "learning_rate": 4.247625623721615e-05, + "loss": 1.6758, + "step": 148600 + }, + { + "epoch": 0.64, + "learning_rate": 4.247082785066291e-05, + "loss": 1.6726, + "step": 148700 + }, + { + "epoch": 0.64, + "learning_rate": 4.2465399464109686e-05, + "loss": 1.6293, + "step": 148800 + }, + { + "epoch": 0.64, + "learning_rate": 4.245997107755645e-05, + "loss": 1.6371, + "step": 148900 + }, + { + "epoch": 0.64, + "learning_rate": 4.245454269100321e-05, + "loss": 1.6726, + "step": 149000 + }, + { + "epoch": 0.64, + "learning_rate": 4.244911430444998e-05, + "loss": 1.6672, + "step": 149100 + }, + { + "epoch": 0.64, + "learning_rate": 4.244368591789674e-05, + "loss": 1.6564, + "step": 149200 + }, + { + "epoch": 0.64, + "learning_rate": 4.243825753134351e-05, + "loss": 1.6373, + "step": 149300 + }, + { + "epoch": 0.64, + "learning_rate": 4.243282914479027e-05, + "loss": 1.6795, + "step": 149400 + }, + { + "epoch": 0.64, + "learning_rate": 4.242740075823703e-05, + "loss": 1.6624, + "step": 149500 + }, + { + "epoch": 0.64, + "learning_rate": 4.24219723716838e-05, + "loss": 1.6491, + "step": 149600 + }, + { + "epoch": 0.64, + "learning_rate": 4.2416543985130564e-05, + "loss": 1.6289, + "step": 149700 + }, + { + "epoch": 0.64, + "learning_rate": 4.2411115598577325e-05, + "loss": 1.6284, + "step": 149800 + }, + { + "epoch": 0.64, + "learning_rate": 4.24056872120241e-05, + "loss": 1.6781, + "step": 149900 + }, + { + "epoch": 0.64, + "learning_rate": 4.240025882547086e-05, + "loss": 1.6385, + "step": 150000 + }, + { + "epoch": 0.64, + "eval_loss": 1.498610019683838, + "eval_runtime": 19.0453, + "eval_samples_per_second": 525.065, + "eval_steps_per_second": 16.435, + "step": 150000 + }, + { + "epoch": 0.64, + "learning_rate": 4.2394830438917625e-05, + "loss": 1.663, + "step": 150100 + }, + { + "epoch": 0.65, + "learning_rate": 4.238940205236439e-05, + "loss": 1.6227, + "step": 150200 + }, + { + "epoch": 0.65, + "learning_rate": 4.2383973665811155e-05, + "loss": 1.6561, + "step": 150300 + }, + { + "epoch": 0.65, + "learning_rate": 4.237854527925792e-05, + "loss": 1.6525, + "step": 150400 + }, + { + "epoch": 0.65, + "learning_rate": 4.2373116892704686e-05, + "loss": 1.6423, + "step": 150500 + }, + { + "epoch": 0.65, + "learning_rate": 4.236768850615145e-05, + "loss": 1.6528, + "step": 150600 + }, + { + "epoch": 0.65, + "learning_rate": 4.236226011959821e-05, + "loss": 1.6514, + "step": 150700 + }, + { + "epoch": 0.65, + "learning_rate": 4.235683173304498e-05, + "loss": 1.6747, + "step": 150800 + }, + { + "epoch": 0.65, + "learning_rate": 4.235140334649175e-05, + "loss": 1.6646, + "step": 150900 + }, + { + "epoch": 0.65, + "learning_rate": 4.234597495993851e-05, + "loss": 1.6572, + "step": 151000 + }, + { + "epoch": 0.65, + "learning_rate": 4.234054657338528e-05, + "loss": 1.6396, + "step": 151100 + }, + { + "epoch": 0.65, + "learning_rate": 4.233511818683204e-05, + "loss": 1.647, + "step": 151200 + }, + { + "epoch": 0.65, + "learning_rate": 4.232968980027881e-05, + "loss": 1.6633, + "step": 151300 + }, + { + "epoch": 0.65, + "learning_rate": 4.232426141372557e-05, + "loss": 1.6479, + "step": 151400 + }, + { + "epoch": 0.65, + "learning_rate": 4.231883302717233e-05, + "loss": 1.6554, + "step": 151500 + }, + { + "epoch": 0.65, + "learning_rate": 4.23134046406191e-05, + "loss": 1.6536, + "step": 151600 + }, + { + "epoch": 0.65, + "learning_rate": 4.230797625406586e-05, + "loss": 1.6546, + "step": 151700 + }, + { + "epoch": 0.65, + "learning_rate": 4.2302547867512624e-05, + "loss": 1.6994, + "step": 151800 + }, + { + "epoch": 0.65, + "learning_rate": 4.229711948095939e-05, + "loss": 1.6134, + "step": 151900 + }, + { + "epoch": 0.65, + "learning_rate": 4.229169109440616e-05, + "loss": 1.6861, + "step": 152000 + }, + { + "epoch": 0.65, + "learning_rate": 4.228626270785292e-05, + "loss": 1.6588, + "step": 152100 + }, + { + "epoch": 0.65, + "learning_rate": 4.228083432129969e-05, + "loss": 1.6333, + "step": 152200 + }, + { + "epoch": 0.65, + "learning_rate": 4.2275405934746454e-05, + "loss": 1.6712, + "step": 152300 + }, + { + "epoch": 0.65, + "learning_rate": 4.2269977548193215e-05, + "loss": 1.6621, + "step": 152400 + }, + { + "epoch": 0.66, + "learning_rate": 4.2264549161639984e-05, + "loss": 1.6447, + "step": 152500 + }, + { + "epoch": 0.66, + "learning_rate": 4.2259120775086746e-05, + "loss": 1.6746, + "step": 152600 + }, + { + "epoch": 0.66, + "learning_rate": 4.225369238853351e-05, + "loss": 1.6465, + "step": 152700 + }, + { + "epoch": 0.66, + "learning_rate": 4.2248264001980277e-05, + "loss": 1.6743, + "step": 152800 + }, + { + "epoch": 0.66, + "learning_rate": 4.224283561542704e-05, + "loss": 1.6732, + "step": 152900 + }, + { + "epoch": 0.66, + "learning_rate": 4.223740722887381e-05, + "loss": 1.6312, + "step": 153000 + }, + { + "epoch": 0.66, + "learning_rate": 4.2231978842320576e-05, + "loss": 1.6696, + "step": 153100 + }, + { + "epoch": 0.66, + "learning_rate": 4.222655045576734e-05, + "loss": 1.6439, + "step": 153200 + }, + { + "epoch": 0.66, + "learning_rate": 4.2221122069214106e-05, + "loss": 1.6474, + "step": 153300 + }, + { + "epoch": 0.66, + "learning_rate": 4.221569368266087e-05, + "loss": 1.6472, + "step": 153400 + }, + { + "epoch": 0.66, + "learning_rate": 4.221026529610763e-05, + "loss": 1.6635, + "step": 153500 + }, + { + "epoch": 0.66, + "learning_rate": 4.22048369095544e-05, + "loss": 1.6512, + "step": 153600 + }, + { + "epoch": 0.66, + "learning_rate": 4.219940852300116e-05, + "loss": 1.6748, + "step": 153700 + }, + { + "epoch": 0.66, + "learning_rate": 4.219398013644792e-05, + "loss": 1.666, + "step": 153800 + }, + { + "epoch": 0.66, + "learning_rate": 4.218855174989469e-05, + "loss": 1.6338, + "step": 153900 + }, + { + "epoch": 0.66, + "learning_rate": 4.218312336334145e-05, + "loss": 1.6643, + "step": 154000 + }, + { + "epoch": 0.66, + "learning_rate": 4.217769497678822e-05, + "loss": 1.6548, + "step": 154100 + }, + { + "epoch": 0.66, + "learning_rate": 4.217226659023499e-05, + "loss": 1.6619, + "step": 154200 + }, + { + "epoch": 0.66, + "learning_rate": 4.216683820368175e-05, + "loss": 1.6543, + "step": 154300 + }, + { + "epoch": 0.66, + "learning_rate": 4.2161409817128514e-05, + "loss": 1.6247, + "step": 154400 + }, + { + "epoch": 0.66, + "learning_rate": 4.215598143057528e-05, + "loss": 1.6504, + "step": 154500 + }, + { + "epoch": 0.66, + "learning_rate": 4.2150553044022044e-05, + "loss": 1.6754, + "step": 154600 + }, + { + "epoch": 0.66, + "learning_rate": 4.2145124657468806e-05, + "loss": 1.649, + "step": 154700 + }, + { + "epoch": 0.67, + "learning_rate": 4.2139696270915575e-05, + "loss": 1.6665, + "step": 154800 + }, + { + "epoch": 0.67, + "learning_rate": 4.213426788436234e-05, + "loss": 1.6553, + "step": 154900 + }, + { + "epoch": 0.67, + "learning_rate": 4.2128839497809106e-05, + "loss": 1.632, + "step": 155000 + }, + { + "epoch": 0.67, + "eval_loss": 1.4961014986038208, + "eval_runtime": 18.9397, + "eval_samples_per_second": 527.991, + "eval_steps_per_second": 16.526, + "step": 155000 + }, + { + "epoch": 0.67, + "learning_rate": 4.2123411111255874e-05, + "loss": 1.6508, + "step": 155100 + }, + { + "epoch": 0.67, + "learning_rate": 4.2117982724702636e-05, + "loss": 1.6373, + "step": 155200 + }, + { + "epoch": 0.67, + "learning_rate": 4.2112554338149405e-05, + "loss": 1.6201, + "step": 155300 + }, + { + "epoch": 0.67, + "learning_rate": 4.2107125951596167e-05, + "loss": 1.6426, + "step": 155400 + }, + { + "epoch": 0.67, + "learning_rate": 4.210169756504293e-05, + "loss": 1.678, + "step": 155500 + }, + { + "epoch": 0.67, + "learning_rate": 4.20962691784897e-05, + "loss": 1.65, + "step": 155600 + }, + { + "epoch": 0.67, + "learning_rate": 4.209084079193646e-05, + "loss": 1.6561, + "step": 155700 + }, + { + "epoch": 0.67, + "learning_rate": 4.208541240538322e-05, + "loss": 1.6604, + "step": 155800 + }, + { + "epoch": 0.67, + "learning_rate": 4.207998401882999e-05, + "loss": 1.6548, + "step": 155900 + }, + { + "epoch": 0.67, + "learning_rate": 4.207455563227675e-05, + "loss": 1.6122, + "step": 156000 + }, + { + "epoch": 0.67, + "learning_rate": 4.206912724572351e-05, + "loss": 1.6578, + "step": 156100 + }, + { + "epoch": 0.67, + "learning_rate": 4.206369885917029e-05, + "loss": 1.6357, + "step": 156200 + }, + { + "epoch": 0.67, + "learning_rate": 4.205827047261705e-05, + "loss": 1.6642, + "step": 156300 + }, + { + "epoch": 0.67, + "learning_rate": 4.205284208606381e-05, + "loss": 1.6514, + "step": 156400 + }, + { + "epoch": 0.67, + "learning_rate": 4.204741369951058e-05, + "loss": 1.6546, + "step": 156500 + }, + { + "epoch": 0.67, + "learning_rate": 4.204198531295734e-05, + "loss": 1.6795, + "step": 156600 + }, + { + "epoch": 0.67, + "learning_rate": 4.2036556926404105e-05, + "loss": 1.6423, + "step": 156700 + }, + { + "epoch": 0.67, + "learning_rate": 4.2031128539850873e-05, + "loss": 1.6395, + "step": 156800 + }, + { + "epoch": 0.67, + "learning_rate": 4.2025700153297635e-05, + "loss": 1.6905, + "step": 156900 + }, + { + "epoch": 0.67, + "learning_rate": 4.2020271766744404e-05, + "loss": 1.6481, + "step": 157000 + }, + { + "epoch": 0.67, + "learning_rate": 4.2014843380191166e-05, + "loss": 1.6364, + "step": 157100 + }, + { + "epoch": 0.68, + "learning_rate": 4.2009414993637935e-05, + "loss": 1.6389, + "step": 157200 + }, + { + "epoch": 0.68, + "learning_rate": 4.20039866070847e-05, + "loss": 1.6198, + "step": 157300 + }, + { + "epoch": 0.68, + "learning_rate": 4.1998558220531465e-05, + "loss": 1.6397, + "step": 157400 + }, + { + "epoch": 0.68, + "learning_rate": 4.199312983397823e-05, + "loss": 1.6326, + "step": 157500 + }, + { + "epoch": 0.68, + "learning_rate": 4.1987701447424996e-05, + "loss": 1.6583, + "step": 157600 + }, + { + "epoch": 0.68, + "learning_rate": 4.198227306087176e-05, + "loss": 1.6271, + "step": 157700 + }, + { + "epoch": 0.68, + "learning_rate": 4.197684467431852e-05, + "loss": 1.6297, + "step": 157800 + }, + { + "epoch": 0.68, + "learning_rate": 4.197141628776529e-05, + "loss": 1.653, + "step": 157900 + }, + { + "epoch": 0.68, + "learning_rate": 4.196598790121205e-05, + "loss": 1.6225, + "step": 158000 + }, + { + "epoch": 0.68, + "learning_rate": 4.196055951465881e-05, + "loss": 1.6523, + "step": 158100 + }, + { + "epoch": 0.68, + "learning_rate": 4.195513112810558e-05, + "loss": 1.6518, + "step": 158200 + }, + { + "epoch": 0.68, + "learning_rate": 4.194970274155235e-05, + "loss": 1.6458, + "step": 158300 + }, + { + "epoch": 0.68, + "learning_rate": 4.194427435499911e-05, + "loss": 1.6243, + "step": 158400 + }, + { + "epoch": 0.68, + "learning_rate": 4.193884596844588e-05, + "loss": 1.644, + "step": 158500 + }, + { + "epoch": 0.68, + "learning_rate": 4.193341758189264e-05, + "loss": 1.6883, + "step": 158600 + }, + { + "epoch": 0.68, + "learning_rate": 4.19279891953394e-05, + "loss": 1.6521, + "step": 158700 + }, + { + "epoch": 0.68, + "learning_rate": 4.192256080878617e-05, + "loss": 1.7046, + "step": 158800 + }, + { + "epoch": 0.68, + "learning_rate": 4.1917132422232934e-05, + "loss": 1.663, + "step": 158900 + }, + { + "epoch": 0.68, + "learning_rate": 4.19117040356797e-05, + "loss": 1.6656, + "step": 159000 + }, + { + "epoch": 0.68, + "learning_rate": 4.1906275649126464e-05, + "loss": 1.6674, + "step": 159100 + }, + { + "epoch": 0.68, + "learning_rate": 4.1900847262573226e-05, + "loss": 1.6277, + "step": 159200 + }, + { + "epoch": 0.68, + "learning_rate": 4.189541887602e-05, + "loss": 1.6436, + "step": 159300 + }, + { + "epoch": 0.68, + "learning_rate": 4.1889990489466764e-05, + "loss": 1.6566, + "step": 159400 + }, + { + "epoch": 0.69, + "learning_rate": 4.1884562102913525e-05, + "loss": 1.6512, + "step": 159500 + }, + { + "epoch": 0.69, + "learning_rate": 4.1879133716360294e-05, + "loss": 1.6525, + "step": 159600 + }, + { + "epoch": 0.69, + "learning_rate": 4.1873705329807056e-05, + "loss": 1.643, + "step": 159700 + }, + { + "epoch": 0.69, + "learning_rate": 4.186827694325382e-05, + "loss": 1.6227, + "step": 159800 + }, + { + "epoch": 0.69, + "learning_rate": 4.1862848556700586e-05, + "loss": 1.6451, + "step": 159900 + }, + { + "epoch": 0.69, + "learning_rate": 4.185742017014735e-05, + "loss": 1.6474, + "step": 160000 + }, + { + "epoch": 0.69, + "eval_loss": 1.4892033338546753, + "eval_runtime": 18.9542, + "eval_samples_per_second": 527.588, + "eval_steps_per_second": 16.513, + "step": 160000 + }, + { + "epoch": 0.69, + "learning_rate": 4.185199178359411e-05, + "loss": 1.653, + "step": 160100 + }, + { + "epoch": 0.69, + "learning_rate": 4.184656339704088e-05, + "loss": 1.6536, + "step": 160200 + }, + { + "epoch": 0.69, + "learning_rate": 4.184113501048764e-05, + "loss": 1.6498, + "step": 160300 + }, + { + "epoch": 0.69, + "learning_rate": 4.183570662393441e-05, + "loss": 1.6401, + "step": 160400 + }, + { + "epoch": 0.69, + "learning_rate": 4.183027823738118e-05, + "loss": 1.6488, + "step": 160500 + }, + { + "epoch": 0.69, + "learning_rate": 4.182484985082794e-05, + "loss": 1.6423, + "step": 160600 + }, + { + "epoch": 0.69, + "learning_rate": 4.18194214642747e-05, + "loss": 1.636, + "step": 160700 + }, + { + "epoch": 0.69, + "learning_rate": 4.181399307772147e-05, + "loss": 1.6501, + "step": 160800 + }, + { + "epoch": 0.69, + "learning_rate": 4.180856469116823e-05, + "loss": 1.6302, + "step": 160900 + }, + { + "epoch": 0.69, + "learning_rate": 4.1803136304615e-05, + "loss": 1.6565, + "step": 161000 + }, + { + "epoch": 0.69, + "learning_rate": 4.179770791806176e-05, + "loss": 1.636, + "step": 161100 + }, + { + "epoch": 0.69, + "learning_rate": 4.1792279531508525e-05, + "loss": 1.666, + "step": 161200 + }, + { + "epoch": 0.69, + "learning_rate": 4.1786851144955293e-05, + "loss": 1.6687, + "step": 161300 + }, + { + "epoch": 0.69, + "learning_rate": 4.178142275840206e-05, + "loss": 1.6333, + "step": 161400 + }, + { + "epoch": 0.69, + "learning_rate": 4.1775994371848824e-05, + "loss": 1.645, + "step": 161500 + }, + { + "epoch": 0.69, + "learning_rate": 4.177056598529559e-05, + "loss": 1.6549, + "step": 161600 + }, + { + "epoch": 0.69, + "learning_rate": 4.1765137598742354e-05, + "loss": 1.656, + "step": 161700 + }, + { + "epoch": 0.7, + "learning_rate": 4.1759709212189116e-05, + "loss": 1.6529, + "step": 161800 + }, + { + "epoch": 0.7, + "learning_rate": 4.1754280825635885e-05, + "loss": 1.6249, + "step": 161900 + }, + { + "epoch": 0.7, + "learning_rate": 4.174885243908265e-05, + "loss": 1.644, + "step": 162000 + }, + { + "epoch": 0.7, + "learning_rate": 4.174342405252941e-05, + "loss": 1.6583, + "step": 162100 + }, + { + "epoch": 0.7, + "learning_rate": 4.173799566597618e-05, + "loss": 1.6601, + "step": 162200 + }, + { + "epoch": 0.7, + "learning_rate": 4.173256727942294e-05, + "loss": 1.6653, + "step": 162300 + }, + { + "epoch": 0.7, + "learning_rate": 4.172713889286971e-05, + "loss": 1.6278, + "step": 162400 + }, + { + "epoch": 0.7, + "learning_rate": 4.1721710506316477e-05, + "loss": 1.7011, + "step": 162500 + }, + { + "epoch": 0.7, + "learning_rate": 4.171628211976324e-05, + "loss": 1.6512, + "step": 162600 + }, + { + "epoch": 0.7, + "learning_rate": 4.171085373321e-05, + "loss": 1.6867, + "step": 162700 + }, + { + "epoch": 0.7, + "learning_rate": 4.170542534665677e-05, + "loss": 1.6322, + "step": 162800 + }, + { + "epoch": 0.7, + "learning_rate": 4.169999696010353e-05, + "loss": 1.6367, + "step": 162900 + }, + { + "epoch": 0.7, + "learning_rate": 4.16945685735503e-05, + "loss": 1.6477, + "step": 163000 + }, + { + "epoch": 0.7, + "learning_rate": 4.168914018699706e-05, + "loss": 1.6592, + "step": 163100 + }, + { + "epoch": 0.7, + "learning_rate": 4.168371180044382e-05, + "loss": 1.6355, + "step": 163200 + }, + { + "epoch": 0.7, + "learning_rate": 4.167828341389059e-05, + "loss": 1.6156, + "step": 163300 + }, + { + "epoch": 0.7, + "learning_rate": 4.1672855027337354e-05, + "loss": 1.6602, + "step": 163400 + }, + { + "epoch": 0.7, + "learning_rate": 4.166742664078412e-05, + "loss": 1.6826, + "step": 163500 + }, + { + "epoch": 0.7, + "learning_rate": 4.166199825423089e-05, + "loss": 1.6549, + "step": 163600 + }, + { + "epoch": 0.7, + "learning_rate": 4.165656986767765e-05, + "loss": 1.6715, + "step": 163700 + }, + { + "epoch": 0.7, + "learning_rate": 4.1651141481124415e-05, + "loss": 1.7004, + "step": 163800 + }, + { + "epoch": 0.7, + "learning_rate": 4.1645713094571183e-05, + "loss": 1.6407, + "step": 163900 + }, + { + "epoch": 0.7, + "learning_rate": 4.1640284708017945e-05, + "loss": 1.6596, + "step": 164000 + }, + { + "epoch": 0.7, + "learning_rate": 4.163485632146471e-05, + "loss": 1.6159, + "step": 164100 + }, + { + "epoch": 0.71, + "learning_rate": 4.1629427934911476e-05, + "loss": 1.6305, + "step": 164200 + }, + { + "epoch": 0.71, + "learning_rate": 4.162399954835824e-05, + "loss": 1.6307, + "step": 164300 + }, + { + "epoch": 0.71, + "learning_rate": 4.1618571161805006e-05, + "loss": 1.6507, + "step": 164400 + }, + { + "epoch": 0.71, + "learning_rate": 4.1613142775251775e-05, + "loss": 1.668, + "step": 164500 + }, + { + "epoch": 0.71, + "learning_rate": 4.160771438869854e-05, + "loss": 1.6264, + "step": 164600 + }, + { + "epoch": 0.71, + "learning_rate": 4.16022860021453e-05, + "loss": 1.6534, + "step": 164700 + }, + { + "epoch": 0.71, + "learning_rate": 4.159685761559207e-05, + "loss": 1.655, + "step": 164800 + }, + { + "epoch": 0.71, + "learning_rate": 4.159142922903883e-05, + "loss": 1.6359, + "step": 164900 + }, + { + "epoch": 0.71, + "learning_rate": 4.15860008424856e-05, + "loss": 1.656, + "step": 165000 + }, + { + "epoch": 0.71, + "eval_loss": 1.4892749786376953, + "eval_runtime": 19.0388, + "eval_samples_per_second": 525.242, + "eval_steps_per_second": 16.44, + "step": 165000 + }, + { + "epoch": 0.71, + "learning_rate": 4.158057245593236e-05, + "loss": 1.6642, + "step": 165100 + }, + { + "epoch": 0.71, + "learning_rate": 4.157514406937912e-05, + "loss": 1.6051, + "step": 165200 + }, + { + "epoch": 0.71, + "learning_rate": 4.156971568282589e-05, + "loss": 1.6373, + "step": 165300 + }, + { + "epoch": 0.71, + "learning_rate": 4.156428729627265e-05, + "loss": 1.6431, + "step": 165400 + }, + { + "epoch": 0.71, + "learning_rate": 4.1558858909719414e-05, + "loss": 1.6662, + "step": 165500 + }, + { + "epoch": 0.71, + "learning_rate": 4.155343052316619e-05, + "loss": 1.6377, + "step": 165600 + }, + { + "epoch": 0.71, + "learning_rate": 4.154800213661295e-05, + "loss": 1.6392, + "step": 165700 + }, + { + "epoch": 0.71, + "learning_rate": 4.154257375005971e-05, + "loss": 1.6564, + "step": 165800 + }, + { + "epoch": 0.71, + "learning_rate": 4.153714536350648e-05, + "loss": 1.6458, + "step": 165900 + }, + { + "epoch": 0.71, + "learning_rate": 4.1531716976953244e-05, + "loss": 1.6254, + "step": 166000 + }, + { + "epoch": 0.71, + "learning_rate": 4.1526288590400006e-05, + "loss": 1.6604, + "step": 166100 + }, + { + "epoch": 0.71, + "learning_rate": 4.1520860203846774e-05, + "loss": 1.6246, + "step": 166200 + }, + { + "epoch": 0.71, + "learning_rate": 4.1515431817293536e-05, + "loss": 1.6278, + "step": 166300 + }, + { + "epoch": 0.71, + "learning_rate": 4.1510003430740305e-05, + "loss": 1.6363, + "step": 166400 + }, + { + "epoch": 0.72, + "learning_rate": 4.150457504418707e-05, + "loss": 1.6379, + "step": 166500 + }, + { + "epoch": 0.72, + "learning_rate": 4.1499146657633835e-05, + "loss": 1.6476, + "step": 166600 + }, + { + "epoch": 0.72, + "learning_rate": 4.14937182710806e-05, + "loss": 1.6306, + "step": 166700 + }, + { + "epoch": 0.72, + "learning_rate": 4.1488289884527366e-05, + "loss": 1.6411, + "step": 166800 + }, + { + "epoch": 0.72, + "learning_rate": 4.148286149797413e-05, + "loss": 1.6529, + "step": 166900 + }, + { + "epoch": 0.72, + "learning_rate": 4.1477433111420896e-05, + "loss": 1.6336, + "step": 167000 + }, + { + "epoch": 0.72, + "learning_rate": 4.147200472486766e-05, + "loss": 1.641, + "step": 167100 + }, + { + "epoch": 0.72, + "learning_rate": 4.146657633831442e-05, + "loss": 1.6318, + "step": 167200 + }, + { + "epoch": 0.72, + "learning_rate": 4.146114795176119e-05, + "loss": 1.6243, + "step": 167300 + }, + { + "epoch": 0.72, + "learning_rate": 4.145571956520795e-05, + "loss": 1.6485, + "step": 167400 + }, + { + "epoch": 0.72, + "learning_rate": 4.145029117865471e-05, + "loss": 1.6424, + "step": 167500 + }, + { + "epoch": 0.72, + "learning_rate": 4.144486279210148e-05, + "loss": 1.6495, + "step": 167600 + }, + { + "epoch": 0.72, + "learning_rate": 4.143943440554825e-05, + "loss": 1.6417, + "step": 167700 + }, + { + "epoch": 0.72, + "learning_rate": 4.143400601899501e-05, + "loss": 1.6229, + "step": 167800 + }, + { + "epoch": 0.72, + "learning_rate": 4.142857763244178e-05, + "loss": 1.6565, + "step": 167900 + }, + { + "epoch": 0.72, + "learning_rate": 4.142314924588854e-05, + "loss": 1.6324, + "step": 168000 + }, + { + "epoch": 0.72, + "learning_rate": 4.1417720859335304e-05, + "loss": 1.6411, + "step": 168100 + }, + { + "epoch": 0.72, + "learning_rate": 4.141229247278207e-05, + "loss": 1.6343, + "step": 168200 + }, + { + "epoch": 0.72, + "learning_rate": 4.1406864086228835e-05, + "loss": 1.6355, + "step": 168300 + }, + { + "epoch": 0.72, + "learning_rate": 4.14014356996756e-05, + "loss": 1.646, + "step": 168400 + }, + { + "epoch": 0.72, + "learning_rate": 4.1396007313122365e-05, + "loss": 1.6457, + "step": 168500 + }, + { + "epoch": 0.72, + "learning_rate": 4.139057892656913e-05, + "loss": 1.6516, + "step": 168600 + }, + { + "epoch": 0.72, + "learning_rate": 4.1385150540015896e-05, + "loss": 1.6479, + "step": 168700 + }, + { + "epoch": 0.73, + "learning_rate": 4.1379722153462664e-05, + "loss": 1.6242, + "step": 168800 + }, + { + "epoch": 0.73, + "learning_rate": 4.1374293766909426e-05, + "loss": 1.6569, + "step": 168900 + }, + { + "epoch": 0.73, + "learning_rate": 4.1368865380356195e-05, + "loss": 1.6481, + "step": 169000 + }, + { + "epoch": 0.73, + "learning_rate": 4.136343699380296e-05, + "loss": 1.6365, + "step": 169100 + }, + { + "epoch": 0.73, + "learning_rate": 4.135800860724972e-05, + "loss": 1.6197, + "step": 169200 + }, + { + "epoch": 0.73, + "learning_rate": 4.135258022069649e-05, + "loss": 1.6592, + "step": 169300 + }, + { + "epoch": 0.73, + "learning_rate": 4.134715183414325e-05, + "loss": 1.6261, + "step": 169400 + }, + { + "epoch": 0.73, + "learning_rate": 4.134172344759001e-05, + "loss": 1.6491, + "step": 169500 + }, + { + "epoch": 0.73, + "learning_rate": 4.133629506103678e-05, + "loss": 1.6227, + "step": 169600 + }, + { + "epoch": 0.73, + "learning_rate": 4.133086667448354e-05, + "loss": 1.6486, + "step": 169700 + }, + { + "epoch": 0.73, + "learning_rate": 4.132543828793031e-05, + "loss": 1.6376, + "step": 169800 + }, + { + "epoch": 0.73, + "learning_rate": 4.132000990137708e-05, + "loss": 1.6393, + "step": 169900 + }, + { + "epoch": 0.73, + "learning_rate": 4.131458151482384e-05, + "loss": 1.6392, + "step": 170000 + }, + { + "epoch": 0.73, + "eval_loss": 1.4881998300552368, + "eval_runtime": 19.0174, + "eval_samples_per_second": 525.834, + "eval_steps_per_second": 16.459, + "step": 170000 + }, + { + "epoch": 0.73, + "learning_rate": 4.13091531282706e-05, + "loss": 1.6026, + "step": 170100 + }, + { + "epoch": 0.73, + "learning_rate": 4.130372474171737e-05, + "loss": 1.6467, + "step": 170200 + }, + { + "epoch": 0.73, + "learning_rate": 4.129829635516413e-05, + "loss": 1.6615, + "step": 170300 + }, + { + "epoch": 0.73, + "learning_rate": 4.12928679686109e-05, + "loss": 1.6277, + "step": 170400 + }, + { + "epoch": 0.73, + "learning_rate": 4.1287439582057664e-05, + "loss": 1.6062, + "step": 170500 + }, + { + "epoch": 0.73, + "learning_rate": 4.1282011195504426e-05, + "loss": 1.6005, + "step": 170600 + }, + { + "epoch": 0.73, + "learning_rate": 4.1276582808951194e-05, + "loss": 1.6392, + "step": 170700 + }, + { + "epoch": 0.73, + "learning_rate": 4.127115442239796e-05, + "loss": 1.635, + "step": 170800 + }, + { + "epoch": 0.73, + "learning_rate": 4.1265726035844725e-05, + "loss": 1.6457, + "step": 170900 + }, + { + "epoch": 0.73, + "learning_rate": 4.1260297649291493e-05, + "loss": 1.6484, + "step": 171000 + }, + { + "epoch": 0.74, + "learning_rate": 4.1254869262738255e-05, + "loss": 1.6292, + "step": 171100 + }, + { + "epoch": 0.74, + "learning_rate": 4.124944087618502e-05, + "loss": 1.6596, + "step": 171200 + }, + { + "epoch": 0.74, + "learning_rate": 4.1244012489631786e-05, + "loss": 1.6474, + "step": 171300 + }, + { + "epoch": 0.74, + "learning_rate": 4.123858410307855e-05, + "loss": 1.6271, + "step": 171400 + }, + { + "epoch": 0.74, + "learning_rate": 4.123315571652531e-05, + "loss": 1.6191, + "step": 171500 + }, + { + "epoch": 0.74, + "learning_rate": 4.122772732997208e-05, + "loss": 1.6352, + "step": 171600 + }, + { + "epoch": 0.74, + "learning_rate": 4.122229894341884e-05, + "loss": 1.6396, + "step": 171700 + }, + { + "epoch": 0.74, + "learning_rate": 4.121687055686561e-05, + "loss": 1.6545, + "step": 171800 + }, + { + "epoch": 0.74, + "learning_rate": 4.121144217031238e-05, + "loss": 1.6158, + "step": 171900 + }, + { + "epoch": 0.74, + "learning_rate": 4.120601378375914e-05, + "loss": 1.6239, + "step": 172000 + }, + { + "epoch": 0.74, + "learning_rate": 4.12005853972059e-05, + "loss": 1.6401, + "step": 172100 + }, + { + "epoch": 0.74, + "learning_rate": 4.119515701065267e-05, + "loss": 1.6423, + "step": 172200 + }, + { + "epoch": 0.74, + "learning_rate": 4.118972862409943e-05, + "loss": 1.633, + "step": 172300 + }, + { + "epoch": 0.74, + "learning_rate": 4.11843002375462e-05, + "loss": 1.6451, + "step": 172400 + }, + { + "epoch": 0.74, + "learning_rate": 4.117887185099296e-05, + "loss": 1.6551, + "step": 172500 + }, + { + "epoch": 0.74, + "learning_rate": 4.1173443464439724e-05, + "loss": 1.6391, + "step": 172600 + }, + { + "epoch": 0.74, + "learning_rate": 4.116801507788649e-05, + "loss": 1.6313, + "step": 172700 + }, + { + "epoch": 0.74, + "learning_rate": 4.1162586691333255e-05, + "loss": 1.6167, + "step": 172800 + }, + { + "epoch": 0.74, + "learning_rate": 4.115715830478002e-05, + "loss": 1.6307, + "step": 172900 + }, + { + "epoch": 0.74, + "learning_rate": 4.115172991822679e-05, + "loss": 1.6271, + "step": 173000 + }, + { + "epoch": 0.74, + "learning_rate": 4.1146301531673554e-05, + "loss": 1.6487, + "step": 173100 + }, + { + "epoch": 0.74, + "learning_rate": 4.1140873145120316e-05, + "loss": 1.6376, + "step": 173200 + }, + { + "epoch": 0.74, + "learning_rate": 4.1135444758567084e-05, + "loss": 1.6223, + "step": 173300 + }, + { + "epoch": 0.74, + "learning_rate": 4.1130016372013846e-05, + "loss": 1.6545, + "step": 173400 + }, + { + "epoch": 0.75, + "learning_rate": 4.112458798546061e-05, + "loss": 1.6432, + "step": 173500 + }, + { + "epoch": 0.75, + "learning_rate": 4.111915959890738e-05, + "loss": 1.6115, + "step": 173600 + }, + { + "epoch": 0.75, + "learning_rate": 4.111373121235414e-05, + "loss": 1.6354, + "step": 173700 + }, + { + "epoch": 0.75, + "learning_rate": 4.110830282580091e-05, + "loss": 1.6091, + "step": 173800 + }, + { + "epoch": 0.75, + "learning_rate": 4.110287443924767e-05, + "loss": 1.6004, + "step": 173900 + }, + { + "epoch": 0.75, + "learning_rate": 4.109744605269444e-05, + "loss": 1.6446, + "step": 174000 + }, + { + "epoch": 0.75, + "learning_rate": 4.10920176661412e-05, + "loss": 1.6052, + "step": 174100 + }, + { + "epoch": 0.75, + "learning_rate": 4.108658927958797e-05, + "loss": 1.6133, + "step": 174200 + }, + { + "epoch": 0.75, + "learning_rate": 4.108116089303473e-05, + "loss": 1.6266, + "step": 174300 + }, + { + "epoch": 0.75, + "learning_rate": 4.10757325064815e-05, + "loss": 1.6683, + "step": 174400 + }, + { + "epoch": 0.75, + "learning_rate": 4.107030411992826e-05, + "loss": 1.6587, + "step": 174500 + }, + { + "epoch": 0.75, + "learning_rate": 4.106487573337502e-05, + "loss": 1.649, + "step": 174600 + }, + { + "epoch": 0.75, + "learning_rate": 4.105944734682179e-05, + "loss": 1.6482, + "step": 174700 + }, + { + "epoch": 0.75, + "learning_rate": 4.105401896026855e-05, + "loss": 1.6345, + "step": 174800 + }, + { + "epoch": 0.75, + "learning_rate": 4.1048590573715315e-05, + "loss": 1.6236, + "step": 174900 + }, + { + "epoch": 0.75, + "learning_rate": 4.104316218716209e-05, + "loss": 1.6541, + "step": 175000 + }, + { + "epoch": 0.75, + "eval_loss": 1.4871829748153687, + "eval_runtime": 19.0226, + "eval_samples_per_second": 525.691, + "eval_steps_per_second": 16.454, + "step": 175000 + }, + { + "epoch": 0.75, + "learning_rate": 4.103773380060885e-05, + "loss": 1.6537, + "step": 175100 + }, + { + "epoch": 0.75, + "learning_rate": 4.1032305414055614e-05, + "loss": 1.6186, + "step": 175200 + }, + { + "epoch": 0.75, + "learning_rate": 4.102687702750238e-05, + "loss": 1.6422, + "step": 175300 + }, + { + "epoch": 0.75, + "learning_rate": 4.1021448640949145e-05, + "loss": 1.6535, + "step": 175400 + }, + { + "epoch": 0.75, + "learning_rate": 4.1016020254395907e-05, + "loss": 1.654, + "step": 175500 + }, + { + "epoch": 0.75, + "learning_rate": 4.1010591867842675e-05, + "loss": 1.6388, + "step": 175600 + }, + { + "epoch": 0.75, + "learning_rate": 4.100516348128944e-05, + "loss": 1.6391, + "step": 175700 + }, + { + "epoch": 0.76, + "learning_rate": 4.0999735094736206e-05, + "loss": 1.6144, + "step": 175800 + }, + { + "epoch": 0.76, + "learning_rate": 4.099430670818297e-05, + "loss": 1.6391, + "step": 175900 + }, + { + "epoch": 0.76, + "learning_rate": 4.098887832162973e-05, + "loss": 1.6552, + "step": 176000 + }, + { + "epoch": 0.76, + "learning_rate": 4.09834499350765e-05, + "loss": 1.6019, + "step": 176100 + }, + { + "epoch": 0.76, + "learning_rate": 4.097802154852327e-05, + "loss": 1.6177, + "step": 176200 + }, + { + "epoch": 0.76, + "learning_rate": 4.097259316197003e-05, + "loss": 1.6369, + "step": 176300 + }, + { + "epoch": 0.76, + "learning_rate": 4.09671647754168e-05, + "loss": 1.6127, + "step": 176400 + }, + { + "epoch": 0.76, + "learning_rate": 4.096173638886356e-05, + "loss": 1.6506, + "step": 176500 + }, + { + "epoch": 0.76, + "learning_rate": 4.095630800231032e-05, + "loss": 1.6262, + "step": 176600 + }, + { + "epoch": 0.76, + "learning_rate": 4.095087961575709e-05, + "loss": 1.6192, + "step": 176700 + }, + { + "epoch": 0.76, + "learning_rate": 4.094545122920385e-05, + "loss": 1.6, + "step": 176800 + }, + { + "epoch": 0.76, + "learning_rate": 4.0940022842650613e-05, + "loss": 1.6306, + "step": 176900 + }, + { + "epoch": 0.76, + "learning_rate": 4.093459445609738e-05, + "loss": 1.639, + "step": 177000 + }, + { + "epoch": 0.76, + "learning_rate": 4.092916606954415e-05, + "loss": 1.64, + "step": 177100 + }, + { + "epoch": 0.76, + "learning_rate": 4.092373768299091e-05, + "loss": 1.6938, + "step": 177200 + }, + { + "epoch": 0.76, + "learning_rate": 4.091830929643768e-05, + "loss": 1.6229, + "step": 177300 + }, + { + "epoch": 0.76, + "learning_rate": 4.091288090988444e-05, + "loss": 1.6599, + "step": 177400 + }, + { + "epoch": 0.76, + "learning_rate": 4.0907452523331205e-05, + "loss": 1.6133, + "step": 177500 + }, + { + "epoch": 0.76, + "learning_rate": 4.0902024136777974e-05, + "loss": 1.6335, + "step": 177600 + }, + { + "epoch": 0.76, + "learning_rate": 4.0896595750224736e-05, + "loss": 1.6426, + "step": 177700 + }, + { + "epoch": 0.76, + "learning_rate": 4.0891167363671504e-05, + "loss": 1.6161, + "step": 177800 + }, + { + "epoch": 0.76, + "learning_rate": 4.0885738977118266e-05, + "loss": 1.6421, + "step": 177900 + }, + { + "epoch": 0.76, + "learning_rate": 4.088031059056503e-05, + "loss": 1.6161, + "step": 178000 + }, + { + "epoch": 0.77, + "learning_rate": 4.08748822040118e-05, + "loss": 1.6326, + "step": 178100 + }, + { + "epoch": 0.77, + "learning_rate": 4.0869453817458565e-05, + "loss": 1.6143, + "step": 178200 + }, + { + "epoch": 0.77, + "learning_rate": 4.086402543090533e-05, + "loss": 1.6332, + "step": 178300 + }, + { + "epoch": 0.77, + "learning_rate": 4.0858597044352096e-05, + "loss": 1.6198, + "step": 178400 + }, + { + "epoch": 0.77, + "learning_rate": 4.085316865779886e-05, + "loss": 1.6098, + "step": 178500 + }, + { + "epoch": 0.77, + "learning_rate": 4.084774027124562e-05, + "loss": 1.6328, + "step": 178600 + }, + { + "epoch": 0.77, + "learning_rate": 4.084231188469239e-05, + "loss": 1.6164, + "step": 178700 + }, + { + "epoch": 0.77, + "learning_rate": 4.083688349813915e-05, + "loss": 1.6256, + "step": 178800 + }, + { + "epoch": 0.77, + "learning_rate": 4.083145511158591e-05, + "loss": 1.6203, + "step": 178900 + }, + { + "epoch": 0.77, + "learning_rate": 4.082602672503268e-05, + "loss": 1.6494, + "step": 179000 + }, + { + "epoch": 0.77, + "learning_rate": 4.082059833847944e-05, + "loss": 1.645, + "step": 179100 + }, + { + "epoch": 0.77, + "learning_rate": 4.081516995192621e-05, + "loss": 1.6414, + "step": 179200 + }, + { + "epoch": 0.77, + "learning_rate": 4.080974156537298e-05, + "loss": 1.6364, + "step": 179300 + }, + { + "epoch": 0.77, + "learning_rate": 4.080431317881974e-05, + "loss": 1.6388, + "step": 179400 + }, + { + "epoch": 0.77, + "learning_rate": 4.0798884792266504e-05, + "loss": 1.6376, + "step": 179500 + }, + { + "epoch": 0.77, + "learning_rate": 4.079345640571327e-05, + "loss": 1.6193, + "step": 179600 + }, + { + "epoch": 0.77, + "learning_rate": 4.0788028019160034e-05, + "loss": 1.6593, + "step": 179700 + }, + { + "epoch": 0.77, + "learning_rate": 4.07825996326068e-05, + "loss": 1.6466, + "step": 179800 + }, + { + "epoch": 0.77, + "learning_rate": 4.0777171246053565e-05, + "loss": 1.6292, + "step": 179900 + }, + { + "epoch": 0.77, + "learning_rate": 4.0771742859500326e-05, + "loss": 1.6344, + "step": 180000 + }, + { + "epoch": 0.77, + "eval_loss": 1.4833685159683228, + "eval_runtime": 19.0127, + "eval_samples_per_second": 525.965, + "eval_steps_per_second": 16.463, + "step": 180000 + }, + { + "epoch": 0.77, + "learning_rate": 4.0766314472947095e-05, + "loss": 1.6491, + "step": 180100 + }, + { + "epoch": 0.77, + "learning_rate": 4.076088608639386e-05, + "loss": 1.6105, + "step": 180200 + }, + { + "epoch": 0.77, + "learning_rate": 4.0755457699840626e-05, + "loss": 1.6402, + "step": 180300 + }, + { + "epoch": 0.78, + "learning_rate": 4.0750029313287394e-05, + "loss": 1.632, + "step": 180400 + }, + { + "epoch": 0.78, + "learning_rate": 4.0744600926734156e-05, + "loss": 1.6187, + "step": 180500 + }, + { + "epoch": 0.78, + "learning_rate": 4.073917254018092e-05, + "loss": 1.6083, + "step": 180600 + }, + { + "epoch": 0.78, + "learning_rate": 4.073374415362769e-05, + "loss": 1.6105, + "step": 180700 + }, + { + "epoch": 0.78, + "learning_rate": 4.072831576707445e-05, + "loss": 1.6104, + "step": 180800 + }, + { + "epoch": 0.78, + "learning_rate": 4.072288738052121e-05, + "loss": 1.6171, + "step": 180900 + }, + { + "epoch": 0.78, + "learning_rate": 4.071745899396798e-05, + "loss": 1.6525, + "step": 181000 + }, + { + "epoch": 0.78, + "learning_rate": 4.071203060741474e-05, + "loss": 1.6322, + "step": 181100 + }, + { + "epoch": 0.78, + "learning_rate": 4.07066022208615e-05, + "loss": 1.6336, + "step": 181200 + }, + { + "epoch": 0.78, + "learning_rate": 4.070117383430828e-05, + "loss": 1.6341, + "step": 181300 + }, + { + "epoch": 0.78, + "learning_rate": 4.069574544775504e-05, + "loss": 1.6351, + "step": 181400 + }, + { + "epoch": 0.78, + "learning_rate": 4.06903170612018e-05, + "loss": 1.6252, + "step": 181500 + }, + { + "epoch": 0.78, + "learning_rate": 4.068488867464857e-05, + "loss": 1.6479, + "step": 181600 + }, + { + "epoch": 0.78, + "learning_rate": 4.067946028809533e-05, + "loss": 1.655, + "step": 181700 + }, + { + "epoch": 0.78, + "learning_rate": 4.06740319015421e-05, + "loss": 1.6359, + "step": 181800 + }, + { + "epoch": 0.78, + "learning_rate": 4.066860351498886e-05, + "loss": 1.6313, + "step": 181900 + }, + { + "epoch": 0.78, + "learning_rate": 4.0663175128435625e-05, + "loss": 1.6219, + "step": 182000 + }, + { + "epoch": 0.78, + "learning_rate": 4.0657746741882394e-05, + "loss": 1.6426, + "step": 182100 + }, + { + "epoch": 0.78, + "learning_rate": 4.0652318355329155e-05, + "loss": 1.6112, + "step": 182200 + }, + { + "epoch": 0.78, + "learning_rate": 4.0646889968775924e-05, + "loss": 1.6419, + "step": 182300 + }, + { + "epoch": 0.78, + "learning_rate": 4.064146158222269e-05, + "loss": 1.6644, + "step": 182400 + }, + { + "epoch": 0.78, + "learning_rate": 4.0636033195669455e-05, + "loss": 1.6382, + "step": 182500 + }, + { + "epoch": 0.78, + "learning_rate": 4.0630604809116217e-05, + "loss": 1.6431, + "step": 182600 + }, + { + "epoch": 0.78, + "learning_rate": 4.0625176422562985e-05, + "loss": 1.6237, + "step": 182700 + }, + { + "epoch": 0.79, + "learning_rate": 4.061974803600975e-05, + "loss": 1.66, + "step": 182800 + }, + { + "epoch": 0.79, + "learning_rate": 4.061431964945651e-05, + "loss": 1.6497, + "step": 182900 + }, + { + "epoch": 0.79, + "learning_rate": 4.060889126290328e-05, + "loss": 1.6327, + "step": 183000 + }, + { + "epoch": 0.79, + "learning_rate": 4.060346287635004e-05, + "loss": 1.6126, + "step": 183100 + }, + { + "epoch": 0.79, + "learning_rate": 4.05980344897968e-05, + "loss": 1.6265, + "step": 183200 + }, + { + "epoch": 0.79, + "learning_rate": 4.059260610324357e-05, + "loss": 1.6381, + "step": 183300 + }, + { + "epoch": 0.79, + "learning_rate": 4.058717771669034e-05, + "loss": 1.6149, + "step": 183400 + }, + { + "epoch": 0.79, + "learning_rate": 4.05817493301371e-05, + "loss": 1.6469, + "step": 183500 + }, + { + "epoch": 0.79, + "learning_rate": 4.057632094358387e-05, + "loss": 1.6346, + "step": 183600 + }, + { + "epoch": 0.79, + "learning_rate": 4.057089255703063e-05, + "loss": 1.6221, + "step": 183700 + }, + { + "epoch": 0.79, + "learning_rate": 4.05654641704774e-05, + "loss": 1.6388, + "step": 183800 + }, + { + "epoch": 0.79, + "learning_rate": 4.056003578392416e-05, + "loss": 1.6235, + "step": 183900 + }, + { + "epoch": 0.79, + "learning_rate": 4.0554607397370923e-05, + "loss": 1.6216, + "step": 184000 + }, + { + "epoch": 0.79, + "learning_rate": 4.054917901081769e-05, + "loss": 1.6219, + "step": 184100 + }, + { + "epoch": 0.79, + "learning_rate": 4.0543750624264454e-05, + "loss": 1.6149, + "step": 184200 + }, + { + "epoch": 0.79, + "learning_rate": 4.0538322237711216e-05, + "loss": 1.6441, + "step": 184300 + }, + { + "epoch": 0.79, + "learning_rate": 4.053289385115799e-05, + "loss": 1.6382, + "step": 184400 + }, + { + "epoch": 0.79, + "learning_rate": 4.052746546460475e-05, + "loss": 1.6103, + "step": 184500 + }, + { + "epoch": 0.79, + "learning_rate": 4.0522037078051515e-05, + "loss": 1.6291, + "step": 184600 + }, + { + "epoch": 0.79, + "learning_rate": 4.0516608691498284e-05, + "loss": 1.6306, + "step": 184700 + }, + { + "epoch": 0.79, + "learning_rate": 4.0511180304945046e-05, + "loss": 1.6277, + "step": 184800 + }, + { + "epoch": 0.79, + "learning_rate": 4.050575191839181e-05, + "loss": 1.6136, + "step": 184900 + }, + { + "epoch": 0.79, + "learning_rate": 4.0500323531838576e-05, + "loss": 1.6362, + "step": 185000 + }, + { + "epoch": 0.79, + "eval_loss": 1.480057954788208, + "eval_runtime": 19.051, + "eval_samples_per_second": 524.905, + "eval_steps_per_second": 16.43, + "step": 185000 + }, + { + "epoch": 0.8, + "learning_rate": 4.049489514528534e-05, + "loss": 1.6264, + "step": 185100 + }, + { + "epoch": 0.8, + "learning_rate": 4.04894667587321e-05, + "loss": 1.6282, + "step": 185200 + }, + { + "epoch": 0.8, + "learning_rate": 4.048403837217887e-05, + "loss": 1.6327, + "step": 185300 + }, + { + "epoch": 0.8, + "learning_rate": 4.047860998562563e-05, + "loss": 1.6176, + "step": 185400 + }, + { + "epoch": 0.8, + "learning_rate": 4.04731815990724e-05, + "loss": 1.6385, + "step": 185500 + }, + { + "epoch": 0.8, + "learning_rate": 4.046775321251917e-05, + "loss": 1.6421, + "step": 185600 + }, + { + "epoch": 0.8, + "learning_rate": 4.046232482596593e-05, + "loss": 1.6373, + "step": 185700 + }, + { + "epoch": 0.8, + "learning_rate": 4.04568964394127e-05, + "loss": 1.6184, + "step": 185800 + }, + { + "epoch": 0.8, + "learning_rate": 4.045146805285946e-05, + "loss": 1.6273, + "step": 185900 + }, + { + "epoch": 0.8, + "learning_rate": 4.044603966630622e-05, + "loss": 1.6186, + "step": 186000 + }, + { + "epoch": 0.8, + "learning_rate": 4.044061127975299e-05, + "loss": 1.6068, + "step": 186100 + }, + { + "epoch": 0.8, + "learning_rate": 4.043518289319975e-05, + "loss": 1.6195, + "step": 186200 + }, + { + "epoch": 0.8, + "learning_rate": 4.0429754506646514e-05, + "loss": 1.6136, + "step": 186300 + }, + { + "epoch": 0.8, + "learning_rate": 4.042432612009328e-05, + "loss": 1.6372, + "step": 186400 + }, + { + "epoch": 0.8, + "learning_rate": 4.041889773354005e-05, + "loss": 1.6169, + "step": 186500 + }, + { + "epoch": 0.8, + "learning_rate": 4.0413469346986814e-05, + "loss": 1.631, + "step": 186600 + }, + { + "epoch": 0.8, + "learning_rate": 4.040804096043358e-05, + "loss": 1.631, + "step": 186700 + }, + { + "epoch": 0.8, + "learning_rate": 4.0402612573880344e-05, + "loss": 1.6475, + "step": 186800 + }, + { + "epoch": 0.8, + "learning_rate": 4.0397184187327106e-05, + "loss": 1.6576, + "step": 186900 + }, + { + "epoch": 0.8, + "learning_rate": 4.0391755800773875e-05, + "loss": 1.6185, + "step": 187000 + }, + { + "epoch": 0.8, + "learning_rate": 4.0386327414220636e-05, + "loss": 1.6293, + "step": 187100 + }, + { + "epoch": 0.8, + "learning_rate": 4.03808990276674e-05, + "loss": 1.6254, + "step": 187200 + }, + { + "epoch": 0.8, + "learning_rate": 4.037547064111417e-05, + "loss": 1.6058, + "step": 187300 + }, + { + "epoch": 0.81, + "learning_rate": 4.037004225456093e-05, + "loss": 1.5944, + "step": 187400 + }, + { + "epoch": 0.81, + "learning_rate": 4.03646138680077e-05, + "loss": 1.6126, + "step": 187500 + }, + { + "epoch": 0.81, + "learning_rate": 4.0359185481454466e-05, + "loss": 1.6438, + "step": 187600 + }, + { + "epoch": 0.81, + "learning_rate": 4.035375709490123e-05, + "loss": 1.6302, + "step": 187700 + }, + { + "epoch": 0.81, + "learning_rate": 4.034832870834799e-05, + "loss": 1.6346, + "step": 187800 + }, + { + "epoch": 0.81, + "learning_rate": 4.034290032179476e-05, + "loss": 1.6221, + "step": 187900 + }, + { + "epoch": 0.81, + "learning_rate": 4.033747193524152e-05, + "loss": 1.6381, + "step": 188000 + }, + { + "epoch": 0.81, + "learning_rate": 4.033204354868829e-05, + "loss": 1.607, + "step": 188100 + }, + { + "epoch": 0.81, + "learning_rate": 4.032661516213505e-05, + "loss": 1.5969, + "step": 188200 + }, + { + "epoch": 0.81, + "learning_rate": 4.032118677558181e-05, + "loss": 1.6342, + "step": 188300 + }, + { + "epoch": 0.81, + "learning_rate": 4.031575838902858e-05, + "loss": 1.6447, + "step": 188400 + }, + { + "epoch": 0.81, + "learning_rate": 4.031033000247534e-05, + "loss": 1.6283, + "step": 188500 + }, + { + "epoch": 0.81, + "learning_rate": 4.030490161592211e-05, + "loss": 1.6406, + "step": 188600 + }, + { + "epoch": 0.81, + "learning_rate": 4.029947322936888e-05, + "loss": 1.6354, + "step": 188700 + }, + { + "epoch": 0.81, + "learning_rate": 4.029404484281564e-05, + "loss": 1.5996, + "step": 188800 + }, + { + "epoch": 0.81, + "learning_rate": 4.0288616456262404e-05, + "loss": 1.6525, + "step": 188900 + }, + { + "epoch": 0.81, + "learning_rate": 4.028318806970917e-05, + "loss": 1.6675, + "step": 189000 + }, + { + "epoch": 0.81, + "learning_rate": 4.0277759683155935e-05, + "loss": 1.6242, + "step": 189100 + }, + { + "epoch": 0.81, + "learning_rate": 4.02723312966027e-05, + "loss": 1.6512, + "step": 189200 + }, + { + "epoch": 0.81, + "learning_rate": 4.0266902910049465e-05, + "loss": 1.6472, + "step": 189300 + }, + { + "epoch": 0.81, + "learning_rate": 4.026147452349623e-05, + "loss": 1.6366, + "step": 189400 + }, + { + "epoch": 0.81, + "learning_rate": 4.0256046136942996e-05, + "loss": 1.6389, + "step": 189500 + }, + { + "epoch": 0.81, + "learning_rate": 4.025061775038976e-05, + "loss": 1.6271, + "step": 189600 + }, + { + "epoch": 0.81, + "learning_rate": 4.0245189363836527e-05, + "loss": 1.6299, + "step": 189700 + }, + { + "epoch": 0.82, + "learning_rate": 4.023976097728329e-05, + "loss": 1.6095, + "step": 189800 + }, + { + "epoch": 0.82, + "learning_rate": 4.023433259073006e-05, + "loss": 1.6473, + "step": 189900 + }, + { + "epoch": 0.82, + "learning_rate": 4.022890420417682e-05, + "loss": 1.6325, + "step": 190000 + }, + { + "epoch": 0.82, + "eval_loss": 1.4781873226165771, + "eval_runtime": 19.0438, + "eval_samples_per_second": 525.106, + "eval_steps_per_second": 16.436, + "step": 190000 + }, + { + "epoch": 0.82, + "learning_rate": 4.022347581762359e-05, + "loss": 1.6275, + "step": 190100 + }, + { + "epoch": 0.82, + "learning_rate": 4.021804743107035e-05, + "loss": 1.6262, + "step": 190200 + }, + { + "epoch": 0.82, + "learning_rate": 4.021261904451711e-05, + "loss": 1.588, + "step": 190300 + }, + { + "epoch": 0.82, + "learning_rate": 4.020719065796388e-05, + "loss": 1.6254, + "step": 190400 + }, + { + "epoch": 0.82, + "learning_rate": 4.020176227141064e-05, + "loss": 1.6347, + "step": 190500 + }, + { + "epoch": 0.82, + "learning_rate": 4.0196333884857404e-05, + "loss": 1.6128, + "step": 190600 + }, + { + "epoch": 0.82, + "learning_rate": 4.019090549830418e-05, + "loss": 1.6265, + "step": 190700 + }, + { + "epoch": 0.82, + "learning_rate": 4.018547711175094e-05, + "loss": 1.6595, + "step": 190800 + }, + { + "epoch": 0.82, + "learning_rate": 4.01800487251977e-05, + "loss": 1.6308, + "step": 190900 + }, + { + "epoch": 0.82, + "learning_rate": 4.017462033864447e-05, + "loss": 1.6539, + "step": 191000 + }, + { + "epoch": 0.82, + "learning_rate": 4.0169191952091233e-05, + "loss": 1.6196, + "step": 191100 + }, + { + "epoch": 0.82, + "learning_rate": 4.0163763565537995e-05, + "loss": 1.6144, + "step": 191200 + }, + { + "epoch": 0.82, + "learning_rate": 4.0158335178984764e-05, + "loss": 1.627, + "step": 191300 + }, + { + "epoch": 0.82, + "learning_rate": 4.0152906792431526e-05, + "loss": 1.6389, + "step": 191400 + }, + { + "epoch": 0.82, + "learning_rate": 4.0147478405878294e-05, + "loss": 1.5831, + "step": 191500 + }, + { + "epoch": 0.82, + "learning_rate": 4.0142050019325056e-05, + "loss": 1.6268, + "step": 191600 + }, + { + "epoch": 0.82, + "learning_rate": 4.013662163277182e-05, + "loss": 1.634, + "step": 191700 + }, + { + "epoch": 0.82, + "learning_rate": 4.013119324621859e-05, + "loss": 1.635, + "step": 191800 + }, + { + "epoch": 0.82, + "learning_rate": 4.0125764859665356e-05, + "loss": 1.6322, + "step": 191900 + }, + { + "epoch": 0.82, + "learning_rate": 4.012033647311212e-05, + "loss": 1.6223, + "step": 192000 + }, + { + "epoch": 0.83, + "learning_rate": 4.0114908086558886e-05, + "loss": 1.6278, + "step": 192100 + }, + { + "epoch": 0.83, + "learning_rate": 4.010947970000565e-05, + "loss": 1.6351, + "step": 192200 + }, + { + "epoch": 0.83, + "learning_rate": 4.010405131345241e-05, + "loss": 1.5964, + "step": 192300 + }, + { + "epoch": 0.83, + "learning_rate": 4.009862292689918e-05, + "loss": 1.6657, + "step": 192400 + }, + { + "epoch": 0.83, + "learning_rate": 4.009319454034594e-05, + "loss": 1.6335, + "step": 192500 + }, + { + "epoch": 0.83, + "learning_rate": 4.00877661537927e-05, + "loss": 1.6092, + "step": 192600 + }, + { + "epoch": 0.83, + "learning_rate": 4.008233776723947e-05, + "loss": 1.6382, + "step": 192700 + }, + { + "epoch": 0.83, + "learning_rate": 4.007690938068624e-05, + "loss": 1.6323, + "step": 192800 + }, + { + "epoch": 0.83, + "learning_rate": 4.0071480994133e-05, + "loss": 1.6095, + "step": 192900 + }, + { + "epoch": 0.83, + "learning_rate": 4.006605260757977e-05, + "loss": 1.6421, + "step": 193000 + }, + { + "epoch": 0.83, + "learning_rate": 4.006062422102653e-05, + "loss": 1.6308, + "step": 193100 + }, + { + "epoch": 0.83, + "learning_rate": 4.0055195834473294e-05, + "loss": 1.6324, + "step": 193200 + }, + { + "epoch": 0.83, + "learning_rate": 4.004976744792006e-05, + "loss": 1.6081, + "step": 193300 + }, + { + "epoch": 0.83, + "learning_rate": 4.0044339061366824e-05, + "loss": 1.6247, + "step": 193400 + }, + { + "epoch": 0.83, + "learning_rate": 4.003891067481359e-05, + "loss": 1.5981, + "step": 193500 + }, + { + "epoch": 0.83, + "learning_rate": 4.0033482288260355e-05, + "loss": 1.6262, + "step": 193600 + }, + { + "epoch": 0.83, + "learning_rate": 4.002805390170712e-05, + "loss": 1.6318, + "step": 193700 + }, + { + "epoch": 0.83, + "learning_rate": 4.0022625515153885e-05, + "loss": 1.6471, + "step": 193800 + }, + { + "epoch": 0.83, + "learning_rate": 4.0017197128600654e-05, + "loss": 1.5975, + "step": 193900 + }, + { + "epoch": 0.83, + "learning_rate": 4.0011768742047416e-05, + "loss": 1.5955, + "step": 194000 + }, + { + "epoch": 0.83, + "learning_rate": 4.0006340355494185e-05, + "loss": 1.6424, + "step": 194100 + }, + { + "epoch": 0.83, + "learning_rate": 4.0000911968940946e-05, + "loss": 1.6297, + "step": 194200 + }, + { + "epoch": 0.83, + "learning_rate": 3.999548358238771e-05, + "loss": 1.5919, + "step": 194300 + }, + { + "epoch": 0.84, + "learning_rate": 3.999005519583448e-05, + "loss": 1.6007, + "step": 194400 + }, + { + "epoch": 0.84, + "learning_rate": 3.998462680928124e-05, + "loss": 1.6243, + "step": 194500 + }, + { + "epoch": 0.84, + "learning_rate": 3.9979198422728e-05, + "loss": 1.632, + "step": 194600 + }, + { + "epoch": 0.84, + "learning_rate": 3.997377003617477e-05, + "loss": 1.6295, + "step": 194700 + }, + { + "epoch": 0.84, + "learning_rate": 3.996834164962153e-05, + "loss": 1.6243, + "step": 194800 + }, + { + "epoch": 0.84, + "learning_rate": 3.99629132630683e-05, + "loss": 1.6342, + "step": 194900 + }, + { + "epoch": 0.84, + "learning_rate": 3.995748487651507e-05, + "loss": 1.6255, + "step": 195000 + }, + { + "epoch": 0.84, + "eval_loss": 1.4795633554458618, + "eval_runtime": 19.0393, + "eval_samples_per_second": 525.231, + "eval_steps_per_second": 16.44, + "step": 195000 + }, + { + "epoch": 0.84, + "learning_rate": 3.995205648996183e-05, + "loss": 1.6733, + "step": 195100 + }, + { + "epoch": 0.84, + "learning_rate": 3.994662810340859e-05, + "loss": 1.5987, + "step": 195200 + }, + { + "epoch": 0.84, + "learning_rate": 3.994119971685536e-05, + "loss": 1.6215, + "step": 195300 + }, + { + "epoch": 0.84, + "learning_rate": 3.993577133030212e-05, + "loss": 1.6399, + "step": 195400 + }, + { + "epoch": 0.84, + "learning_rate": 3.993034294374889e-05, + "loss": 1.6229, + "step": 195500 + }, + { + "epoch": 0.84, + "learning_rate": 3.992491455719565e-05, + "loss": 1.639, + "step": 195600 + }, + { + "epoch": 0.84, + "learning_rate": 3.9919486170642415e-05, + "loss": 1.6256, + "step": 195700 + }, + { + "epoch": 0.84, + "learning_rate": 3.9914057784089184e-05, + "loss": 1.6476, + "step": 195800 + }, + { + "epoch": 0.84, + "learning_rate": 3.9908629397535946e-05, + "loss": 1.5958, + "step": 195900 + }, + { + "epoch": 0.84, + "learning_rate": 3.9903201010982714e-05, + "loss": 1.6397, + "step": 196000 + }, + { + "epoch": 0.84, + "learning_rate": 3.989777262442948e-05, + "loss": 1.6466, + "step": 196100 + }, + { + "epoch": 0.84, + "learning_rate": 3.9892344237876245e-05, + "loss": 1.6343, + "step": 196200 + }, + { + "epoch": 0.84, + "learning_rate": 3.988691585132301e-05, + "loss": 1.5967, + "step": 196300 + }, + { + "epoch": 0.84, + "learning_rate": 3.9881487464769775e-05, + "loss": 1.6249, + "step": 196400 + }, + { + "epoch": 0.84, + "learning_rate": 3.987605907821654e-05, + "loss": 1.6111, + "step": 196500 + }, + { + "epoch": 0.84, + "learning_rate": 3.98706306916633e-05, + "loss": 1.6295, + "step": 196600 + }, + { + "epoch": 0.85, + "learning_rate": 3.986520230511007e-05, + "loss": 1.6362, + "step": 196700 + }, + { + "epoch": 0.85, + "learning_rate": 3.985977391855683e-05, + "loss": 1.6522, + "step": 196800 + }, + { + "epoch": 0.85, + "learning_rate": 3.98543455320036e-05, + "loss": 1.6481, + "step": 196900 + }, + { + "epoch": 0.85, + "learning_rate": 3.984891714545037e-05, + "loss": 1.6194, + "step": 197000 + }, + { + "epoch": 0.85, + "learning_rate": 3.984348875889713e-05, + "loss": 1.6086, + "step": 197100 + }, + { + "epoch": 0.85, + "learning_rate": 3.983806037234389e-05, + "loss": 1.6251, + "step": 197200 + }, + { + "epoch": 0.85, + "learning_rate": 3.983263198579066e-05, + "loss": 1.6097, + "step": 197300 + }, + { + "epoch": 0.85, + "learning_rate": 3.982720359923742e-05, + "loss": 1.6268, + "step": 197400 + }, + { + "epoch": 0.85, + "learning_rate": 3.982177521268419e-05, + "loss": 1.6386, + "step": 197500 + }, + { + "epoch": 0.85, + "learning_rate": 3.981634682613095e-05, + "loss": 1.6207, + "step": 197600 + }, + { + "epoch": 0.85, + "learning_rate": 3.9810918439577714e-05, + "loss": 1.6248, + "step": 197700 + }, + { + "epoch": 0.85, + "learning_rate": 3.980549005302448e-05, + "loss": 1.6337, + "step": 197800 + }, + { + "epoch": 0.85, + "learning_rate": 3.9800061666471244e-05, + "loss": 1.6195, + "step": 197900 + }, + { + "epoch": 0.85, + "learning_rate": 3.9794633279918006e-05, + "loss": 1.6396, + "step": 198000 + }, + { + "epoch": 0.85, + "learning_rate": 3.978920489336478e-05, + "loss": 1.5919, + "step": 198100 + }, + { + "epoch": 0.85, + "learning_rate": 3.978377650681154e-05, + "loss": 1.6144, + "step": 198200 + }, + { + "epoch": 0.85, + "learning_rate": 3.9778348120258305e-05, + "loss": 1.6001, + "step": 198300 + }, + { + "epoch": 0.85, + "learning_rate": 3.9772919733705074e-05, + "loss": 1.6218, + "step": 198400 + }, + { + "epoch": 0.85, + "learning_rate": 3.9767491347151836e-05, + "loss": 1.6068, + "step": 198500 + }, + { + "epoch": 0.85, + "learning_rate": 3.97620629605986e-05, + "loss": 1.6075, + "step": 198600 + }, + { + "epoch": 0.85, + "learning_rate": 3.9756634574045366e-05, + "loss": 1.5883, + "step": 198700 + }, + { + "epoch": 0.85, + "learning_rate": 3.975120618749213e-05, + "loss": 1.5984, + "step": 198800 + }, + { + "epoch": 0.85, + "learning_rate": 3.97457778009389e-05, + "loss": 1.6311, + "step": 198900 + }, + { + "epoch": 0.85, + "learning_rate": 3.974034941438566e-05, + "loss": 1.6614, + "step": 199000 + }, + { + "epoch": 0.86, + "learning_rate": 3.973492102783243e-05, + "loss": 1.6541, + "step": 199100 + }, + { + "epoch": 0.86, + "learning_rate": 3.972949264127919e-05, + "loss": 1.6475, + "step": 199200 + }, + { + "epoch": 0.86, + "learning_rate": 3.972406425472596e-05, + "loss": 1.6138, + "step": 199300 + }, + { + "epoch": 0.86, + "learning_rate": 3.971863586817272e-05, + "loss": 1.6249, + "step": 199400 + }, + { + "epoch": 0.86, + "learning_rate": 3.971320748161949e-05, + "loss": 1.6141, + "step": 199500 + }, + { + "epoch": 0.86, + "learning_rate": 3.970777909506625e-05, + "loss": 1.6344, + "step": 199600 + }, + { + "epoch": 0.86, + "learning_rate": 3.970235070851301e-05, + "loss": 1.613, + "step": 199700 + }, + { + "epoch": 0.86, + "learning_rate": 3.969692232195978e-05, + "loss": 1.616, + "step": 199800 + }, + { + "epoch": 0.86, + "learning_rate": 3.969149393540654e-05, + "loss": 1.6162, + "step": 199900 + }, + { + "epoch": 0.86, + "learning_rate": 3.9686065548853305e-05, + "loss": 1.6455, + "step": 200000 + }, + { + "epoch": 0.86, + "eval_loss": 1.4754244089126587, + "eval_runtime": 18.9946, + "eval_samples_per_second": 526.464, + "eval_steps_per_second": 16.478, + "step": 200000 + }, + { + "epoch": 0.86, + "learning_rate": 3.968063716230007e-05, + "loss": 1.6311, + "step": 200100 + }, + { + "epoch": 0.86, + "learning_rate": 3.967520877574684e-05, + "loss": 1.6139, + "step": 200200 + }, + { + "epoch": 0.86, + "learning_rate": 3.9669780389193604e-05, + "loss": 1.601, + "step": 200300 + }, + { + "epoch": 0.86, + "learning_rate": 3.966435200264037e-05, + "loss": 1.6321, + "step": 200400 + }, + { + "epoch": 0.86, + "learning_rate": 3.9658923616087134e-05, + "loss": 1.6582, + "step": 200500 + }, + { + "epoch": 0.86, + "learning_rate": 3.9653495229533896e-05, + "loss": 1.6293, + "step": 200600 + }, + { + "epoch": 0.86, + "learning_rate": 3.9648066842980665e-05, + "loss": 1.6144, + "step": 200700 + }, + { + "epoch": 0.86, + "learning_rate": 3.964263845642743e-05, + "loss": 1.6154, + "step": 200800 + }, + { + "epoch": 0.86, + "learning_rate": 3.9637210069874195e-05, + "loss": 1.6168, + "step": 200900 + }, + { + "epoch": 0.86, + "learning_rate": 3.963178168332096e-05, + "loss": 1.6595, + "step": 201000 + }, + { + "epoch": 0.86, + "learning_rate": 3.962635329676772e-05, + "loss": 1.6051, + "step": 201100 + }, + { + "epoch": 0.86, + "learning_rate": 3.962092491021449e-05, + "loss": 1.6103, + "step": 201200 + }, + { + "epoch": 0.86, + "learning_rate": 3.9615496523661256e-05, + "loss": 1.6523, + "step": 201300 + }, + { + "epoch": 0.87, + "learning_rate": 3.961006813710802e-05, + "loss": 1.6216, + "step": 201400 + }, + { + "epoch": 0.87, + "learning_rate": 3.960463975055479e-05, + "loss": 1.6684, + "step": 201500 + }, + { + "epoch": 0.87, + "learning_rate": 3.959921136400155e-05, + "loss": 1.6098, + "step": 201600 + }, + { + "epoch": 0.87, + "learning_rate": 3.959378297744831e-05, + "loss": 1.6309, + "step": 201700 + }, + { + "epoch": 0.87, + "learning_rate": 3.958835459089508e-05, + "loss": 1.6329, + "step": 201800 + }, + { + "epoch": 0.87, + "learning_rate": 3.958292620434184e-05, + "loss": 1.6424, + "step": 201900 + }, + { + "epoch": 0.87, + "learning_rate": 3.95774978177886e-05, + "loss": 1.6526, + "step": 202000 + }, + { + "epoch": 0.87, + "learning_rate": 3.957206943123537e-05, + "loss": 1.6335, + "step": 202100 + }, + { + "epoch": 0.87, + "learning_rate": 3.956664104468214e-05, + "loss": 1.6238, + "step": 202200 + }, + { + "epoch": 0.87, + "learning_rate": 3.95612126581289e-05, + "loss": 1.6289, + "step": 202300 + }, + { + "epoch": 0.87, + "learning_rate": 3.955578427157567e-05, + "loss": 1.6357, + "step": 202400 + }, + { + "epoch": 0.87, + "learning_rate": 3.955035588502243e-05, + "loss": 1.6201, + "step": 202500 + }, + { + "epoch": 0.87, + "learning_rate": 3.9544927498469195e-05, + "loss": 1.6048, + "step": 202600 + }, + { + "epoch": 0.87, + "learning_rate": 3.953949911191596e-05, + "loss": 1.6136, + "step": 202700 + }, + { + "epoch": 0.87, + "learning_rate": 3.9534070725362725e-05, + "loss": 1.6227, + "step": 202800 + }, + { + "epoch": 0.87, + "learning_rate": 3.9528642338809494e-05, + "loss": 1.6221, + "step": 202900 + }, + { + "epoch": 0.87, + "learning_rate": 3.9523213952256256e-05, + "loss": 1.6349, + "step": 203000 + }, + { + "epoch": 0.87, + "learning_rate": 3.951778556570302e-05, + "loss": 1.6481, + "step": 203100 + }, + { + "epoch": 0.87, + "learning_rate": 3.9512357179149786e-05, + "loss": 1.614, + "step": 203200 + }, + { + "epoch": 0.87, + "learning_rate": 3.9506928792596555e-05, + "loss": 1.6304, + "step": 203300 + }, + { + "epoch": 0.87, + "learning_rate": 3.950150040604332e-05, + "loss": 1.6152, + "step": 203400 + }, + { + "epoch": 0.87, + "learning_rate": 3.9496072019490085e-05, + "loss": 1.6251, + "step": 203500 + }, + { + "epoch": 0.87, + "learning_rate": 3.949064363293685e-05, + "loss": 1.6263, + "step": 203600 + }, + { + "epoch": 0.88, + "learning_rate": 3.948521524638361e-05, + "loss": 1.6257, + "step": 203700 + }, + { + "epoch": 0.88, + "learning_rate": 3.947978685983038e-05, + "loss": 1.6283, + "step": 203800 + }, + { + "epoch": 0.88, + "learning_rate": 3.947435847327714e-05, + "loss": 1.6304, + "step": 203900 + }, + { + "epoch": 0.88, + "learning_rate": 3.94689300867239e-05, + "loss": 1.6152, + "step": 204000 + }, + { + "epoch": 0.88, + "learning_rate": 3.946350170017067e-05, + "loss": 1.6025, + "step": 204100 + }, + { + "epoch": 0.88, + "learning_rate": 3.945807331361743e-05, + "loss": 1.63, + "step": 204200 + }, + { + "epoch": 0.88, + "learning_rate": 3.94526449270642e-05, + "loss": 1.611, + "step": 204300 + }, + { + "epoch": 0.88, + "learning_rate": 3.944721654051097e-05, + "loss": 1.6312, + "step": 204400 + }, + { + "epoch": 0.88, + "learning_rate": 3.944178815395773e-05, + "loss": 1.6026, + "step": 204500 + }, + { + "epoch": 0.88, + "learning_rate": 3.943635976740449e-05, + "loss": 1.6058, + "step": 204600 + }, + { + "epoch": 0.88, + "learning_rate": 3.943093138085126e-05, + "loss": 1.644, + "step": 204700 + }, + { + "epoch": 0.88, + "learning_rate": 3.9425502994298024e-05, + "loss": 1.6163, + "step": 204800 + }, + { + "epoch": 0.88, + "learning_rate": 3.942007460774479e-05, + "loss": 1.6285, + "step": 204900 + }, + { + "epoch": 0.88, + "learning_rate": 3.9414646221191554e-05, + "loss": 1.653, + "step": 205000 + }, + { + "epoch": 0.88, + "eval_loss": 1.4734227657318115, + "eval_runtime": 19.0238, + "eval_samples_per_second": 525.657, + "eval_steps_per_second": 16.453, + "step": 205000 + }, + { + "epoch": 0.88, + "learning_rate": 3.9409217834638316e-05, + "loss": 1.6065, + "step": 205100 + }, + { + "epoch": 0.88, + "learning_rate": 3.9403789448085085e-05, + "loss": 1.6388, + "step": 205200 + }, + { + "epoch": 0.88, + "learning_rate": 3.9398361061531847e-05, + "loss": 1.6096, + "step": 205300 + }, + { + "epoch": 0.88, + "learning_rate": 3.9392932674978615e-05, + "loss": 1.6194, + "step": 205400 + }, + { + "epoch": 0.88, + "learning_rate": 3.9387504288425384e-05, + "loss": 1.6156, + "step": 205500 + }, + { + "epoch": 0.88, + "learning_rate": 3.9382075901872146e-05, + "loss": 1.6191, + "step": 205600 + }, + { + "epoch": 0.88, + "learning_rate": 3.937664751531891e-05, + "loss": 1.6216, + "step": 205700 + }, + { + "epoch": 0.88, + "learning_rate": 3.9371219128765676e-05, + "loss": 1.6099, + "step": 205800 + }, + { + "epoch": 0.88, + "learning_rate": 3.936579074221244e-05, + "loss": 1.6193, + "step": 205900 + }, + { + "epoch": 0.88, + "learning_rate": 3.93603623556592e-05, + "loss": 1.6379, + "step": 206000 + }, + { + "epoch": 0.89, + "learning_rate": 3.935493396910597e-05, + "loss": 1.5792, + "step": 206100 + }, + { + "epoch": 0.89, + "learning_rate": 3.934950558255273e-05, + "loss": 1.6132, + "step": 206200 + }, + { + "epoch": 0.89, + "learning_rate": 3.934407719599949e-05, + "loss": 1.598, + "step": 206300 + }, + { + "epoch": 0.89, + "learning_rate": 3.933864880944627e-05, + "loss": 1.656, + "step": 206400 + }, + { + "epoch": 0.89, + "learning_rate": 3.933322042289303e-05, + "loss": 1.6255, + "step": 206500 + }, + { + "epoch": 0.89, + "learning_rate": 3.932779203633979e-05, + "loss": 1.6058, + "step": 206600 + }, + { + "epoch": 0.89, + "learning_rate": 3.932236364978656e-05, + "loss": 1.6469, + "step": 206700 + }, + { + "epoch": 0.89, + "learning_rate": 3.931693526323332e-05, + "loss": 1.6283, + "step": 206800 + }, + { + "epoch": 0.89, + "learning_rate": 3.931150687668009e-05, + "loss": 1.6318, + "step": 206900 + }, + { + "epoch": 0.89, + "learning_rate": 3.930607849012685e-05, + "loss": 1.6243, + "step": 207000 + }, + { + "epoch": 0.89, + "learning_rate": 3.9300650103573615e-05, + "loss": 1.6538, + "step": 207100 + }, + { + "epoch": 0.89, + "learning_rate": 3.929522171702038e-05, + "loss": 1.6271, + "step": 207200 + }, + { + "epoch": 0.89, + "learning_rate": 3.9289793330467145e-05, + "loss": 1.6344, + "step": 207300 + }, + { + "epoch": 0.89, + "learning_rate": 3.928436494391391e-05, + "loss": 1.5985, + "step": 207400 + }, + { + "epoch": 0.89, + "learning_rate": 3.927893655736068e-05, + "loss": 1.5742, + "step": 207500 + }, + { + "epoch": 0.89, + "learning_rate": 3.9273508170807444e-05, + "loss": 1.5863, + "step": 207600 + }, + { + "epoch": 0.89, + "learning_rate": 3.9268079784254206e-05, + "loss": 1.6366, + "step": 207700 + }, + { + "epoch": 0.89, + "learning_rate": 3.9262651397700975e-05, + "loss": 1.5802, + "step": 207800 + }, + { + "epoch": 0.89, + "learning_rate": 3.925722301114774e-05, + "loss": 1.6049, + "step": 207900 + }, + { + "epoch": 0.89, + "learning_rate": 3.92517946245945e-05, + "loss": 1.6003, + "step": 208000 + }, + { + "epoch": 0.89, + "learning_rate": 3.924636623804127e-05, + "loss": 1.6243, + "step": 208100 + }, + { + "epoch": 0.89, + "learning_rate": 3.924093785148803e-05, + "loss": 1.617, + "step": 208200 + }, + { + "epoch": 0.89, + "learning_rate": 3.923550946493479e-05, + "loss": 1.6359, + "step": 208300 + }, + { + "epoch": 0.9, + "learning_rate": 3.923008107838156e-05, + "loss": 1.622, + "step": 208400 + }, + { + "epoch": 0.9, + "learning_rate": 3.922465269182833e-05, + "loss": 1.5969, + "step": 208500 + }, + { + "epoch": 0.9, + "learning_rate": 3.921922430527509e-05, + "loss": 1.6187, + "step": 208600 + }, + { + "epoch": 0.9, + "learning_rate": 3.921379591872186e-05, + "loss": 1.6107, + "step": 208700 + }, + { + "epoch": 0.9, + "learning_rate": 3.920836753216862e-05, + "loss": 1.6074, + "step": 208800 + }, + { + "epoch": 0.9, + "learning_rate": 3.920293914561539e-05, + "loss": 1.644, + "step": 208900 + }, + { + "epoch": 0.9, + "learning_rate": 3.919751075906215e-05, + "loss": 1.5991, + "step": 209000 + }, + { + "epoch": 0.9, + "learning_rate": 3.919208237250891e-05, + "loss": 1.5961, + "step": 209100 + }, + { + "epoch": 0.9, + "learning_rate": 3.918665398595568e-05, + "loss": 1.6598, + "step": 209200 + }, + { + "epoch": 0.9, + "learning_rate": 3.9181225599402444e-05, + "loss": 1.5988, + "step": 209300 + }, + { + "epoch": 0.9, + "learning_rate": 3.9175797212849205e-05, + "loss": 1.6194, + "step": 209400 + }, + { + "epoch": 0.9, + "learning_rate": 3.9170368826295974e-05, + "loss": 1.6176, + "step": 209500 + }, + { + "epoch": 0.9, + "learning_rate": 3.916494043974274e-05, + "loss": 1.6265, + "step": 209600 + }, + { + "epoch": 0.9, + "learning_rate": 3.9159512053189505e-05, + "loss": 1.5909, + "step": 209700 + }, + { + "epoch": 0.9, + "learning_rate": 3.915408366663627e-05, + "loss": 1.626, + "step": 209800 + }, + { + "epoch": 0.9, + "learning_rate": 3.9148655280083035e-05, + "loss": 1.5911, + "step": 209900 + }, + { + "epoch": 0.9, + "learning_rate": 3.91432268935298e-05, + "loss": 1.6366, + "step": 210000 + }, + { + "epoch": 0.9, + "eval_loss": 1.4706979990005493, + "eval_runtime": 19.0444, + "eval_samples_per_second": 525.088, + "eval_steps_per_second": 16.435, + "step": 210000 + }, + { + "epoch": 0.9, + "learning_rate": 3.9137798506976566e-05, + "loss": 1.6034, + "step": 210100 + }, + { + "epoch": 0.9, + "learning_rate": 3.913237012042333e-05, + "loss": 1.5996, + "step": 210200 + }, + { + "epoch": 0.9, + "learning_rate": 3.912694173387009e-05, + "loss": 1.6335, + "step": 210300 + }, + { + "epoch": 0.9, + "learning_rate": 3.912151334731686e-05, + "loss": 1.6271, + "step": 210400 + }, + { + "epoch": 0.9, + "learning_rate": 3.911608496076362e-05, + "loss": 1.6404, + "step": 210500 + }, + { + "epoch": 0.9, + "learning_rate": 3.911065657421039e-05, + "loss": 1.5918, + "step": 210600 + }, + { + "epoch": 0.91, + "learning_rate": 3.910522818765716e-05, + "loss": 1.6124, + "step": 210700 + }, + { + "epoch": 0.91, + "learning_rate": 3.909979980110392e-05, + "loss": 1.5966, + "step": 210800 + }, + { + "epoch": 0.91, + "learning_rate": 3.909437141455069e-05, + "loss": 1.6116, + "step": 210900 + }, + { + "epoch": 0.91, + "learning_rate": 3.908894302799745e-05, + "loss": 1.6332, + "step": 211000 + }, + { + "epoch": 0.91, + "learning_rate": 3.908351464144421e-05, + "loss": 1.6542, + "step": 211100 + }, + { + "epoch": 0.91, + "learning_rate": 3.907808625489098e-05, + "loss": 1.6089, + "step": 211200 + }, + { + "epoch": 0.91, + "learning_rate": 3.907265786833774e-05, + "loss": 1.6082, + "step": 211300 + }, + { + "epoch": 0.91, + "learning_rate": 3.9067229481784504e-05, + "loss": 1.5931, + "step": 211400 + }, + { + "epoch": 0.91, + "learning_rate": 3.906180109523127e-05, + "loss": 1.6417, + "step": 211500 + }, + { + "epoch": 0.91, + "learning_rate": 3.9056372708678034e-05, + "loss": 1.6244, + "step": 211600 + }, + { + "epoch": 0.91, + "learning_rate": 3.90509443221248e-05, + "loss": 1.5985, + "step": 211700 + }, + { + "epoch": 0.91, + "learning_rate": 3.904551593557157e-05, + "loss": 1.6141, + "step": 211800 + }, + { + "epoch": 0.91, + "learning_rate": 3.9040087549018334e-05, + "loss": 1.6266, + "step": 211900 + }, + { + "epoch": 0.91, + "learning_rate": 3.9034659162465096e-05, + "loss": 1.5968, + "step": 212000 + }, + { + "epoch": 0.91, + "learning_rate": 3.9029285059777393e-05, + "loss": 1.6204, + "step": 212100 + }, + { + "epoch": 0.91, + "learning_rate": 3.9023856673224155e-05, + "loss": 1.6088, + "step": 212200 + }, + { + "epoch": 0.91, + "learning_rate": 3.9018428286670924e-05, + "loss": 1.6306, + "step": 212300 + }, + { + "epoch": 0.91, + "learning_rate": 3.901299990011769e-05, + "loss": 1.652, + "step": 212400 + }, + { + "epoch": 0.91, + "learning_rate": 3.9007625797429984e-05, + "loss": 1.6179, + "step": 212500 + }, + { + "epoch": 0.91, + "learning_rate": 3.900219741087675e-05, + "loss": 1.6265, + "step": 212600 + }, + { + "epoch": 0.91, + "learning_rate": 3.8996769024323514e-05, + "loss": 1.6164, + "step": 212700 + }, + { + "epoch": 0.91, + "learning_rate": 3.8991340637770276e-05, + "loss": 1.6125, + "step": 212800 + }, + { + "epoch": 0.91, + "learning_rate": 3.8985912251217045e-05, + "loss": 1.6091, + "step": 212900 + }, + { + "epoch": 0.92, + "learning_rate": 3.8980483864663813e-05, + "loss": 1.5873, + "step": 213000 + }, + { + "epoch": 0.92, + "learning_rate": 3.8975055478110575e-05, + "loss": 1.5956, + "step": 213100 + }, + { + "epoch": 0.92, + "learning_rate": 3.8969627091557344e-05, + "loss": 1.6119, + "step": 213200 + }, + { + "epoch": 0.92, + "learning_rate": 3.8964198705004106e-05, + "loss": 1.6209, + "step": 213300 + }, + { + "epoch": 0.92, + "learning_rate": 3.8958770318450874e-05, + "loss": 1.6383, + "step": 213400 + }, + { + "epoch": 0.92, + "learning_rate": 3.8953341931897636e-05, + "loss": 1.6231, + "step": 213500 + }, + { + "epoch": 0.92, + "learning_rate": 3.89479135453444e-05, + "loss": 1.6086, + "step": 213600 + }, + { + "epoch": 0.92, + "learning_rate": 3.894248515879117e-05, + "loss": 1.6127, + "step": 213700 + }, + { + "epoch": 0.92, + "learning_rate": 3.893705677223793e-05, + "loss": 1.6427, + "step": 213800 + }, + { + "epoch": 0.92, + "learning_rate": 3.893162838568469e-05, + "loss": 1.619, + "step": 213900 + }, + { + "epoch": 0.92, + "learning_rate": 3.892619999913146e-05, + "loss": 1.6003, + "step": 214000 + }, + { + "epoch": 0.92, + "learning_rate": 3.892077161257823e-05, + "loss": 1.6161, + "step": 214100 + }, + { + "epoch": 0.92, + "learning_rate": 3.891534322602499e-05, + "loss": 1.6027, + "step": 214200 + }, + { + "epoch": 0.92, + "learning_rate": 3.890991483947176e-05, + "loss": 1.6419, + "step": 214300 + }, + { + "epoch": 0.92, + "learning_rate": 3.890448645291852e-05, + "loss": 1.6429, + "step": 214400 + }, + { + "epoch": 0.92, + "learning_rate": 3.889905806636528e-05, + "loss": 1.6234, + "step": 214500 + }, + { + "epoch": 0.92, + "learning_rate": 3.889362967981205e-05, + "loss": 1.617, + "step": 214600 + }, + { + "epoch": 0.92, + "learning_rate": 3.888820129325881e-05, + "loss": 1.6263, + "step": 214700 + }, + { + "epoch": 0.92, + "learning_rate": 3.8882772906705575e-05, + "loss": 1.6383, + "step": 214800 + }, + { + "epoch": 0.92, + "learning_rate": 3.887734452015234e-05, + "loss": 1.6223, + "step": 214900 + }, + { + "epoch": 0.92, + "learning_rate": 3.8871916133599105e-05, + "loss": 1.5949, + "step": 215000 + }, + { + "epoch": 0.92, + "eval_loss": 1.4699814319610596, + "eval_runtime": 19.0028, + "eval_samples_per_second": 526.238, + "eval_steps_per_second": 16.471, + "step": 215000 + }, + { + "epoch": 0.92, + "learning_rate": 3.8866487747045874e-05, + "loss": 1.616, + "step": 215100 + }, + { + "epoch": 0.92, + "learning_rate": 3.886105936049264e-05, + "loss": 1.6109, + "step": 215200 + }, + { + "epoch": 0.92, + "learning_rate": 3.8855630973939404e-05, + "loss": 1.6242, + "step": 215300 + }, + { + "epoch": 0.93, + "learning_rate": 3.885020258738617e-05, + "loss": 1.5882, + "step": 215400 + }, + { + "epoch": 0.93, + "learning_rate": 3.8844774200832935e-05, + "loss": 1.6162, + "step": 215500 + }, + { + "epoch": 0.93, + "learning_rate": 3.88393458142797e-05, + "loss": 1.6346, + "step": 215600 + }, + { + "epoch": 0.93, + "learning_rate": 3.8833917427726465e-05, + "loss": 1.6047, + "step": 215700 + }, + { + "epoch": 0.93, + "learning_rate": 3.882848904117323e-05, + "loss": 1.6118, + "step": 215800 + }, + { + "epoch": 0.93, + "learning_rate": 3.882306065461999e-05, + "loss": 1.5989, + "step": 215900 + }, + { + "epoch": 0.93, + "learning_rate": 3.881763226806676e-05, + "loss": 1.6242, + "step": 216000 + }, + { + "epoch": 0.93, + "learning_rate": 3.8812258165379056e-05, + "loss": 1.6333, + "step": 216100 + }, + { + "epoch": 0.93, + "learning_rate": 3.880682977882582e-05, + "loss": 1.5985, + "step": 216200 + }, + { + "epoch": 0.93, + "learning_rate": 3.8801401392272586e-05, + "loss": 1.5896, + "step": 216300 + }, + { + "epoch": 0.93, + "learning_rate": 3.879597300571935e-05, + "loss": 1.5957, + "step": 216400 + }, + { + "epoch": 0.93, + "learning_rate": 3.879054461916612e-05, + "loss": 1.5785, + "step": 216500 + }, + { + "epoch": 0.93, + "learning_rate": 3.878511623261288e-05, + "loss": 1.6345, + "step": 216600 + }, + { + "epoch": 0.93, + "learning_rate": 3.877968784605964e-05, + "loss": 1.5993, + "step": 216700 + }, + { + "epoch": 0.93, + "learning_rate": 3.877425945950641e-05, + "loss": 1.6124, + "step": 216800 + }, + { + "epoch": 0.93, + "learning_rate": 3.876883107295318e-05, + "loss": 1.6128, + "step": 216900 + }, + { + "epoch": 0.93, + "learning_rate": 3.876340268639994e-05, + "loss": 1.5962, + "step": 217000 + }, + { + "epoch": 0.93, + "learning_rate": 3.875797429984671e-05, + "loss": 1.5918, + "step": 217100 + }, + { + "epoch": 0.93, + "learning_rate": 3.875254591329347e-05, + "loss": 1.5932, + "step": 217200 + }, + { + "epoch": 0.93, + "learning_rate": 3.874711752674023e-05, + "loss": 1.5982, + "step": 217300 + }, + { + "epoch": 0.93, + "learning_rate": 3.8741689140187e-05, + "loss": 1.6113, + "step": 217400 + }, + { + "epoch": 0.93, + "learning_rate": 3.873626075363376e-05, + "loss": 1.6173, + "step": 217500 + }, + { + "epoch": 0.93, + "learning_rate": 3.8730832367080524e-05, + "loss": 1.6258, + "step": 217600 + }, + { + "epoch": 0.94, + "learning_rate": 3.872540398052729e-05, + "loss": 1.6248, + "step": 217700 + }, + { + "epoch": 0.94, + "learning_rate": 3.8719975593974055e-05, + "loss": 1.608, + "step": 217800 + }, + { + "epoch": 0.94, + "learning_rate": 3.8714547207420824e-05, + "loss": 1.6077, + "step": 217900 + }, + { + "epoch": 0.94, + "learning_rate": 3.870911882086759e-05, + "loss": 1.6182, + "step": 218000 + }, + { + "epoch": 0.94, + "learning_rate": 3.8703690434314354e-05, + "loss": 1.5938, + "step": 218100 + }, + { + "epoch": 0.94, + "learning_rate": 3.8698262047761116e-05, + "loss": 1.5984, + "step": 218200 + }, + { + "epoch": 0.94, + "learning_rate": 3.8692833661207885e-05, + "loss": 1.6177, + "step": 218300 + }, + { + "epoch": 0.94, + "learning_rate": 3.8687405274654647e-05, + "loss": 1.6058, + "step": 218400 + }, + { + "epoch": 0.94, + "learning_rate": 3.8681976888101415e-05, + "loss": 1.5896, + "step": 218500 + }, + { + "epoch": 0.94, + "learning_rate": 3.867654850154818e-05, + "loss": 1.5958, + "step": 218600 + }, + { + "epoch": 0.94, + "learning_rate": 3.867112011499494e-05, + "loss": 1.6201, + "step": 218700 + }, + { + "epoch": 0.94, + "learning_rate": 3.866569172844171e-05, + "loss": 1.6015, + "step": 218800 + }, + { + "epoch": 0.94, + "learning_rate": 3.866026334188847e-05, + "loss": 1.6328, + "step": 218900 + }, + { + "epoch": 0.94, + "learning_rate": 3.865483495533524e-05, + "loss": 1.6127, + "step": 219000 + }, + { + "epoch": 0.94, + "learning_rate": 3.864940656878201e-05, + "loss": 1.6269, + "step": 219100 + }, + { + "epoch": 0.94, + "learning_rate": 3.864397818222877e-05, + "loss": 1.6029, + "step": 219200 + }, + { + "epoch": 0.94, + "learning_rate": 3.863854979567553e-05, + "loss": 1.6364, + "step": 219300 + }, + { + "epoch": 0.94, + "learning_rate": 3.86331214091223e-05, + "loss": 1.6451, + "step": 219400 + }, + { + "epoch": 0.94, + "learning_rate": 3.862769302256906e-05, + "loss": 1.5971, + "step": 219500 + }, + { + "epoch": 0.94, + "learning_rate": 3.862226463601582e-05, + "loss": 1.6272, + "step": 219600 + }, + { + "epoch": 0.94, + "learning_rate": 3.861683624946259e-05, + "loss": 1.5922, + "step": 219700 + }, + { + "epoch": 0.94, + "learning_rate": 3.8611407862909353e-05, + "loss": 1.6224, + "step": 219800 + }, + { + "epoch": 0.94, + "learning_rate": 3.860597947635612e-05, + "loss": 1.6, + "step": 219900 + }, + { + "epoch": 0.95, + "learning_rate": 3.860055108980289e-05, + "loss": 1.5939, + "step": 220000 + }, + { + "epoch": 0.95, + "eval_loss": 1.468553066253662, + "eval_runtime": 19.0907, + "eval_samples_per_second": 523.816, + "eval_steps_per_second": 16.395, + "step": 220000 + }, + { + "epoch": 0.95, + "learning_rate": 3.859517698711518e-05, + "loss": 1.5755, + "step": 220100 + }, + { + "epoch": 0.95, + "learning_rate": 3.858974860056195e-05, + "loss": 1.6354, + "step": 220200 + }, + { + "epoch": 0.95, + "learning_rate": 3.858432021400871e-05, + "loss": 1.6106, + "step": 220300 + }, + { + "epoch": 0.95, + "learning_rate": 3.8578891827455474e-05, + "loss": 1.6072, + "step": 220400 + }, + { + "epoch": 0.95, + "learning_rate": 3.857351772476778e-05, + "loss": 1.6048, + "step": 220500 + }, + { + "epoch": 0.95, + "learning_rate": 3.856808933821454e-05, + "loss": 1.6102, + "step": 220600 + }, + { + "epoch": 0.95, + "learning_rate": 3.85626609516613e-05, + "loss": 1.5699, + "step": 220700 + }, + { + "epoch": 0.95, + "learning_rate": 3.855723256510807e-05, + "loss": 1.6345, + "step": 220800 + }, + { + "epoch": 0.95, + "learning_rate": 3.855180417855483e-05, + "loss": 1.5963, + "step": 220900 + }, + { + "epoch": 0.95, + "learning_rate": 3.85463757920016e-05, + "loss": 1.6111, + "step": 221000 + }, + { + "epoch": 0.95, + "learning_rate": 3.8540947405448364e-05, + "loss": 1.6211, + "step": 221100 + }, + { + "epoch": 0.95, + "learning_rate": 3.8535519018895126e-05, + "loss": 1.6116, + "step": 221200 + }, + { + "epoch": 0.95, + "learning_rate": 3.8530090632341894e-05, + "loss": 1.5874, + "step": 221300 + }, + { + "epoch": 0.95, + "learning_rate": 3.852466224578866e-05, + "loss": 1.5935, + "step": 221400 + }, + { + "epoch": 0.95, + "learning_rate": 3.8519233859235425e-05, + "loss": 1.6263, + "step": 221500 + }, + { + "epoch": 0.95, + "learning_rate": 3.8513805472682194e-05, + "loss": 1.6139, + "step": 221600 + }, + { + "epoch": 0.95, + "learning_rate": 3.8508377086128955e-05, + "loss": 1.5891, + "step": 221700 + }, + { + "epoch": 0.95, + "learning_rate": 3.850294869957572e-05, + "loss": 1.6065, + "step": 221800 + }, + { + "epoch": 0.95, + "learning_rate": 3.8497520313022486e-05, + "loss": 1.6147, + "step": 221900 + }, + { + "epoch": 0.95, + "learning_rate": 3.849209192646925e-05, + "loss": 1.586, + "step": 222000 + }, + { + "epoch": 0.95, + "learning_rate": 3.848666353991601e-05, + "loss": 1.6067, + "step": 222100 + }, + { + "epoch": 0.95, + "learning_rate": 3.8481289437228314e-05, + "loss": 1.6126, + "step": 222200 + }, + { + "epoch": 0.96, + "learning_rate": 3.8475861050675076e-05, + "loss": 1.6568, + "step": 222300 + }, + { + "epoch": 0.96, + "learning_rate": 3.8470432664121845e-05, + "loss": 1.6035, + "step": 222400 + }, + { + "epoch": 0.96, + "learning_rate": 3.846500427756861e-05, + "loss": 1.601, + "step": 222500 + }, + { + "epoch": 0.96, + "learning_rate": 3.845957589101537e-05, + "loss": 1.5908, + "step": 222600 + }, + { + "epoch": 0.96, + "learning_rate": 3.845414750446214e-05, + "loss": 1.6119, + "step": 222700 + }, + { + "epoch": 0.96, + "learning_rate": 3.84487191179089e-05, + "loss": 1.6209, + "step": 222800 + }, + { + "epoch": 0.96, + "learning_rate": 3.844329073135566e-05, + "loss": 1.6223, + "step": 222900 + }, + { + "epoch": 0.96, + "learning_rate": 3.843786234480243e-05, + "loss": 1.6028, + "step": 223000 + }, + { + "epoch": 0.96, + "learning_rate": 3.84324339582492e-05, + "loss": 1.5922, + "step": 223100 + }, + { + "epoch": 0.96, + "learning_rate": 3.842700557169596e-05, + "loss": 1.6253, + "step": 223200 + }, + { + "epoch": 0.96, + "learning_rate": 3.842157718514273e-05, + "loss": 1.586, + "step": 223300 + }, + { + "epoch": 0.96, + "learning_rate": 3.841614879858949e-05, + "loss": 1.6246, + "step": 223400 + }, + { + "epoch": 0.96, + "learning_rate": 3.841072041203625e-05, + "loss": 1.5854, + "step": 223500 + }, + { + "epoch": 0.96, + "learning_rate": 3.840529202548302e-05, + "loss": 1.6129, + "step": 223600 + }, + { + "epoch": 0.96, + "learning_rate": 3.839986363892978e-05, + "loss": 1.6101, + "step": 223700 + }, + { + "epoch": 0.96, + "learning_rate": 3.8394435252376545e-05, + "loss": 1.5877, + "step": 223800 + }, + { + "epoch": 0.96, + "learning_rate": 3.8389006865823314e-05, + "loss": 1.6158, + "step": 223900 + }, + { + "epoch": 0.96, + "learning_rate": 3.8383578479270076e-05, + "loss": 1.6209, + "step": 224000 + }, + { + "epoch": 0.96, + "learning_rate": 3.8378150092716844e-05, + "loss": 1.614, + "step": 224100 + }, + { + "epoch": 0.96, + "learning_rate": 3.837272170616361e-05, + "loss": 1.614, + "step": 224200 + }, + { + "epoch": 0.96, + "learning_rate": 3.8367293319610375e-05, + "loss": 1.5698, + "step": 224300 + }, + { + "epoch": 0.96, + "learning_rate": 3.8361864933057143e-05, + "loss": 1.6134, + "step": 224400 + }, + { + "epoch": 0.96, + "learning_rate": 3.8356436546503905e-05, + "loss": 1.6126, + "step": 224500 + }, + { + "epoch": 0.96, + "learning_rate": 3.835100815995067e-05, + "loss": 1.6149, + "step": 224600 + }, + { + "epoch": 0.97, + "learning_rate": 3.8345579773397436e-05, + "loss": 1.5913, + "step": 224700 + }, + { + "epoch": 0.97, + "learning_rate": 3.83401513868442e-05, + "loss": 1.6153, + "step": 224800 + }, + { + "epoch": 0.97, + "learning_rate": 3.833472300029096e-05, + "loss": 1.6078, + "step": 224900 + }, + { + "epoch": 0.97, + "learning_rate": 3.832929461373773e-05, + "loss": 1.6227, + "step": 225000 + }, + { + "epoch": 0.97, + "eval_loss": 1.4662607908248901, + "eval_runtime": 19.0457, + "eval_samples_per_second": 525.054, + "eval_steps_per_second": 16.434, + "step": 225000 + }, + { + "epoch": 0.97, + "learning_rate": 3.832386622718449e-05, + "loss": 1.6016, + "step": 225100 + }, + { + "epoch": 0.97, + "learning_rate": 3.831843784063126e-05, + "loss": 1.6083, + "step": 225200 + }, + { + "epoch": 0.97, + "learning_rate": 3.831300945407803e-05, + "loss": 1.6064, + "step": 225300 + }, + { + "epoch": 0.97, + "learning_rate": 3.830758106752479e-05, + "loss": 1.5936, + "step": 225400 + }, + { + "epoch": 0.97, + "learning_rate": 3.830215268097155e-05, + "loss": 1.6125, + "step": 225500 + }, + { + "epoch": 0.97, + "learning_rate": 3.829672429441832e-05, + "loss": 1.6469, + "step": 225600 + }, + { + "epoch": 0.97, + "learning_rate": 3.829129590786508e-05, + "loss": 1.644, + "step": 225700 + }, + { + "epoch": 0.97, + "learning_rate": 3.8285867521311844e-05, + "loss": 1.5836, + "step": 225800 + }, + { + "epoch": 0.97, + "learning_rate": 3.828043913475861e-05, + "loss": 1.6373, + "step": 225900 + }, + { + "epoch": 0.97, + "learning_rate": 3.8275010748205374e-05, + "loss": 1.6034, + "step": 226000 + }, + { + "epoch": 0.97, + "learning_rate": 3.826958236165214e-05, + "loss": 1.6096, + "step": 226100 + }, + { + "epoch": 0.97, + "learning_rate": 3.826415397509891e-05, + "loss": 1.5963, + "step": 226200 + }, + { + "epoch": 0.97, + "learning_rate": 3.825872558854567e-05, + "loss": 1.597, + "step": 226300 + }, + { + "epoch": 0.97, + "learning_rate": 3.8253297201992435e-05, + "loss": 1.582, + "step": 226400 + }, + { + "epoch": 0.97, + "learning_rate": 3.8247868815439204e-05, + "loss": 1.5919, + "step": 226500 + }, + { + "epoch": 0.97, + "learning_rate": 3.8242440428885966e-05, + "loss": 1.6027, + "step": 226600 + }, + { + "epoch": 0.97, + "learning_rate": 3.8237012042332734e-05, + "loss": 1.6298, + "step": 226700 + }, + { + "epoch": 0.97, + "learning_rate": 3.8231583655779496e-05, + "loss": 1.6083, + "step": 226800 + }, + { + "epoch": 0.97, + "learning_rate": 3.822615526922626e-05, + "loss": 1.5936, + "step": 226900 + }, + { + "epoch": 0.98, + "learning_rate": 3.822072688267303e-05, + "loss": 1.6154, + "step": 227000 + }, + { + "epoch": 0.98, + "learning_rate": 3.821529849611979e-05, + "loss": 1.6154, + "step": 227100 + }, + { + "epoch": 0.98, + "learning_rate": 3.820987010956655e-05, + "loss": 1.6051, + "step": 227200 + }, + { + "epoch": 0.98, + "learning_rate": 3.8204441723013326e-05, + "loss": 1.625, + "step": 227300 + }, + { + "epoch": 0.98, + "learning_rate": 3.819901333646009e-05, + "loss": 1.6175, + "step": 227400 + }, + { + "epoch": 0.98, + "learning_rate": 3.819358494990685e-05, + "loss": 1.6182, + "step": 227500 + }, + { + "epoch": 0.98, + "learning_rate": 3.818815656335362e-05, + "loss": 1.5886, + "step": 227600 + }, + { + "epoch": 0.98, + "learning_rate": 3.818272817680038e-05, + "loss": 1.6225, + "step": 227700 + }, + { + "epoch": 0.98, + "learning_rate": 3.817729979024714e-05, + "loss": 1.6206, + "step": 227800 + }, + { + "epoch": 0.98, + "learning_rate": 3.817187140369391e-05, + "loss": 1.5953, + "step": 227900 + }, + { + "epoch": 0.98, + "learning_rate": 3.816644301714067e-05, + "loss": 1.6174, + "step": 228000 + }, + { + "epoch": 0.98, + "learning_rate": 3.816101463058744e-05, + "loss": 1.6002, + "step": 228100 + }, + { + "epoch": 0.98, + "learning_rate": 3.81555862440342e-05, + "loss": 1.6052, + "step": 228200 + }, + { + "epoch": 0.98, + "learning_rate": 3.815015785748097e-05, + "loss": 1.6123, + "step": 228300 + }, + { + "epoch": 0.98, + "learning_rate": 3.8144729470927734e-05, + "loss": 1.6096, + "step": 228400 + }, + { + "epoch": 0.98, + "learning_rate": 3.81393010843745e-05, + "loss": 1.6018, + "step": 228500 + }, + { + "epoch": 0.98, + "learning_rate": 3.8133872697821264e-05, + "loss": 1.5998, + "step": 228600 + }, + { + "epoch": 0.98, + "learning_rate": 3.812844431126803e-05, + "loss": 1.5882, + "step": 228700 + }, + { + "epoch": 0.98, + "learning_rate": 3.8123015924714795e-05, + "loss": 1.6218, + "step": 228800 + }, + { + "epoch": 0.98, + "learning_rate": 3.8117587538161557e-05, + "loss": 1.6005, + "step": 228900 + }, + { + "epoch": 0.98, + "learning_rate": 3.8112159151608325e-05, + "loss": 1.587, + "step": 229000 + }, + { + "epoch": 0.98, + "learning_rate": 3.810673076505509e-05, + "loss": 1.5893, + "step": 229100 + }, + { + "epoch": 0.98, + "learning_rate": 3.810130237850185e-05, + "loss": 1.5836, + "step": 229200 + }, + { + "epoch": 0.99, + "learning_rate": 3.809587399194862e-05, + "loss": 1.595, + "step": 229300 + }, + { + "epoch": 0.99, + "learning_rate": 3.8090445605395386e-05, + "loss": 1.6032, + "step": 229400 + }, + { + "epoch": 0.99, + "learning_rate": 3.808501721884215e-05, + "loss": 1.5978, + "step": 229500 + }, + { + "epoch": 0.99, + "learning_rate": 3.807958883228892e-05, + "loss": 1.5997, + "step": 229600 + }, + { + "epoch": 0.99, + "learning_rate": 3.807416044573568e-05, + "loss": 1.5946, + "step": 229700 + }, + { + "epoch": 0.99, + "learning_rate": 3.806873205918244e-05, + "loss": 1.5957, + "step": 229800 + }, + { + "epoch": 0.99, + "learning_rate": 3.806330367262921e-05, + "loss": 1.5728, + "step": 229900 + }, + { + "epoch": 0.99, + "learning_rate": 3.805787528607597e-05, + "loss": 1.6266, + "step": 230000 + }, + { + "epoch": 0.99, + "eval_loss": 1.4668673276901245, + "eval_runtime": 17.8239, + "eval_samples_per_second": 561.046, + "eval_steps_per_second": 17.561, + "step": 230000 + }, + { + "epoch": 0.99, + "learning_rate": 3.805244689952274e-05, + "loss": 1.6237, + "step": 230100 + }, + { + "epoch": 0.99, + "learning_rate": 3.80470185129695e-05, + "loss": 1.5906, + "step": 230200 + }, + { + "epoch": 0.99, + "learning_rate": 3.8041590126416263e-05, + "loss": 1.5936, + "step": 230300 + }, + { + "epoch": 0.99, + "learning_rate": 3.803616173986303e-05, + "loss": 1.6093, + "step": 230400 + }, + { + "epoch": 0.99, + "learning_rate": 3.80307333533098e-05, + "loss": 1.6156, + "step": 230500 + }, + { + "epoch": 0.99, + "learning_rate": 3.802530496675656e-05, + "loss": 1.5972, + "step": 230600 + }, + { + "epoch": 0.99, + "learning_rate": 3.801987658020333e-05, + "loss": 1.6244, + "step": 230700 + }, + { + "epoch": 0.99, + "learning_rate": 3.801444819365009e-05, + "loss": 1.5729, + "step": 230800 + }, + { + "epoch": 0.99, + "learning_rate": 3.8009019807096855e-05, + "loss": 1.5968, + "step": 230900 + }, + { + "epoch": 0.99, + "learning_rate": 3.8003591420543624e-05, + "loss": 1.6167, + "step": 231000 + }, + { + "epoch": 0.99, + "learning_rate": 3.7998163033990386e-05, + "loss": 1.6071, + "step": 231100 + }, + { + "epoch": 0.99, + "learning_rate": 3.799273464743715e-05, + "loss": 1.5794, + "step": 231200 + }, + { + "epoch": 0.99, + "learning_rate": 3.7987306260883916e-05, + "loss": 1.6213, + "step": 231300 + }, + { + "epoch": 0.99, + "learning_rate": 3.798187787433068e-05, + "loss": 1.6047, + "step": 231400 + }, + { + "epoch": 0.99, + "learning_rate": 3.7976449487777447e-05, + "loss": 1.6175, + "step": 231500 + }, + { + "epoch": 0.99, + "learning_rate": 3.7971021101224215e-05, + "loss": 1.618, + "step": 231600 + }, + { + "epoch": 1.0, + "learning_rate": 3.796559271467098e-05, + "loss": 1.5891, + "step": 231700 + }, + { + "epoch": 1.0, + "learning_rate": 3.796016432811774e-05, + "loss": 1.5905, + "step": 231800 + }, + { + "epoch": 1.0, + "learning_rate": 3.795473594156451e-05, + "loss": 1.5845, + "step": 231900 + }, + { + "epoch": 1.0, + "learning_rate": 3.794930755501127e-05, + "loss": 1.5899, + "step": 232000 + }, + { + "epoch": 1.0, + "learning_rate": 3.794387916845804e-05, + "loss": 1.5894, + "step": 232100 + }, + { + "epoch": 1.0, + "learning_rate": 3.79384507819048e-05, + "loss": 1.6205, + "step": 232200 + }, + { + "epoch": 1.0, + "learning_rate": 3.793302239535156e-05, + "loss": 1.5876, + "step": 232300 + }, + { + "epoch": 1.0, + "learning_rate": 3.792759400879833e-05, + "loss": 1.5929, + "step": 232400 + }, + { + "epoch": 1.0, + "learning_rate": 3.79221656222451e-05, + "loss": 1.6052, + "step": 232500 + }, + { + "epoch": 1.0, + "learning_rate": 3.791673723569186e-05, + "loss": 1.5764, + "step": 232600 + }, + { + "epoch": 1.0, + "learning_rate": 3.791130884913863e-05, + "loss": 1.6319, + "step": 232700 + }, + { + "epoch": 1.0, + "learning_rate": 3.790588046258539e-05, + "loss": 1.6024, + "step": 232800 + }, + { + "epoch": 1.0, + "learning_rate": 3.7900452076032154e-05, + "loss": 1.5373, + "step": 232900 + }, + { + "epoch": 1.0, + "learning_rate": 3.789502368947892e-05, + "loss": 1.5602, + "step": 233000 + }, + { + "epoch": 1.0, + "learning_rate": 3.7889595302925684e-05, + "loss": 1.562, + "step": 233100 + }, + { + "epoch": 1.0, + "learning_rate": 3.7884166916372446e-05, + "loss": 1.5622, + "step": 233200 + }, + { + "epoch": 1.0, + "learning_rate": 3.7878738529819215e-05, + "loss": 1.5711, + "step": 233300 + }, + { + "epoch": 1.0, + "learning_rate": 3.7873310143265976e-05, + "loss": 1.5624, + "step": 233400 + }, + { + "epoch": 1.0, + "learning_rate": 3.7867881756712745e-05, + "loss": 1.622, + "step": 233500 + }, + { + "epoch": 1.0, + "learning_rate": 3.7862453370159514e-05, + "loss": 1.5584, + "step": 233600 + }, + { + "epoch": 1.0, + "learning_rate": 3.7857024983606276e-05, + "loss": 1.5626, + "step": 233700 + }, + { + "epoch": 1.0, + "learning_rate": 3.785159659705304e-05, + "loss": 1.5756, + "step": 233800 + }, + { + "epoch": 1.0, + "learning_rate": 3.7846168210499806e-05, + "loss": 1.6049, + "step": 233900 + }, + { + "epoch": 1.01, + "learning_rate": 3.784073982394657e-05, + "loss": 1.5274, + "step": 234000 + }, + { + "epoch": 1.01, + "learning_rate": 3.783531143739334e-05, + "loss": 1.5675, + "step": 234100 + }, + { + "epoch": 1.01, + "learning_rate": 3.78298830508401e-05, + "loss": 1.5812, + "step": 234200 + }, + { + "epoch": 1.01, + "learning_rate": 3.782445466428686e-05, + "loss": 1.5798, + "step": 234300 + }, + { + "epoch": 1.01, + "learning_rate": 3.781902627773363e-05, + "loss": 1.5817, + "step": 234400 + }, + { + "epoch": 1.01, + "learning_rate": 3.781359789118039e-05, + "loss": 1.5589, + "step": 234500 + }, + { + "epoch": 1.01, + "learning_rate": 3.780816950462716e-05, + "loss": 1.5635, + "step": 234600 + }, + { + "epoch": 1.01, + "learning_rate": 3.780274111807393e-05, + "loss": 1.5495, + "step": 234700 + }, + { + "epoch": 1.01, + "learning_rate": 3.779731273152069e-05, + "loss": 1.5788, + "step": 234800 + }, + { + "epoch": 1.01, + "learning_rate": 3.779188434496745e-05, + "loss": 1.5325, + "step": 234900 + }, + { + "epoch": 1.01, + "learning_rate": 3.778645595841422e-05, + "loss": 1.582, + "step": 235000 + }, + { + "epoch": 1.01, + "eval_loss": 1.4644287824630737, + "eval_runtime": 17.7873, + "eval_samples_per_second": 562.198, + "eval_steps_per_second": 17.597, + "step": 235000 + }, + { + "epoch": 1.01, + "learning_rate": 3.778102757186098e-05, + "loss": 1.5531, + "step": 235100 + }, + { + "epoch": 1.01, + "learning_rate": 3.7775599185307744e-05, + "loss": 1.5584, + "step": 235200 + }, + { + "epoch": 1.01, + "learning_rate": 3.777017079875451e-05, + "loss": 1.5617, + "step": 235300 + }, + { + "epoch": 1.01, + "learning_rate": 3.7764742412201275e-05, + "loss": 1.5662, + "step": 235400 + }, + { + "epoch": 1.01, + "learning_rate": 3.7759314025648044e-05, + "loss": 1.5365, + "step": 235500 + }, + { + "epoch": 1.01, + "learning_rate": 3.7753885639094805e-05, + "loss": 1.5919, + "step": 235600 + }, + { + "epoch": 1.01, + "learning_rate": 3.7748457252541574e-05, + "loss": 1.5485, + "step": 235700 + }, + { + "epoch": 1.01, + "learning_rate": 3.7743028865988336e-05, + "loss": 1.5795, + "step": 235800 + }, + { + "epoch": 1.01, + "learning_rate": 3.7737600479435105e-05, + "loss": 1.5378, + "step": 235900 + }, + { + "epoch": 1.01, + "learning_rate": 3.7732172092881866e-05, + "loss": 1.5702, + "step": 236000 + }, + { + "epoch": 1.01, + "learning_rate": 3.7726743706328635e-05, + "loss": 1.586, + "step": 236100 + }, + { + "epoch": 1.01, + "learning_rate": 3.77213153197754e-05, + "loss": 1.568, + "step": 236200 + }, + { + "epoch": 1.02, + "learning_rate": 3.771588693322216e-05, + "loss": 1.5939, + "step": 236300 + }, + { + "epoch": 1.02, + "learning_rate": 3.771045854666893e-05, + "loss": 1.5841, + "step": 236400 + }, + { + "epoch": 1.02, + "learning_rate": 3.770503016011569e-05, + "loss": 1.5841, + "step": 236500 + }, + { + "epoch": 1.02, + "learning_rate": 3.769960177356245e-05, + "loss": 1.543, + "step": 236600 + }, + { + "epoch": 1.02, + "learning_rate": 3.769417338700923e-05, + "loss": 1.5428, + "step": 236700 + }, + { + "epoch": 1.02, + "learning_rate": 3.768874500045599e-05, + "loss": 1.5519, + "step": 236800 + }, + { + "epoch": 1.02, + "learning_rate": 3.768331661390275e-05, + "loss": 1.5362, + "step": 236900 + }, + { + "epoch": 1.02, + "learning_rate": 3.767788822734952e-05, + "loss": 1.543, + "step": 237000 + }, + { + "epoch": 1.02, + "learning_rate": 3.767245984079628e-05, + "loss": 1.5799, + "step": 237100 + }, + { + "epoch": 1.02, + "learning_rate": 3.766703145424304e-05, + "loss": 1.575, + "step": 237200 + }, + { + "epoch": 1.02, + "learning_rate": 3.766160306768981e-05, + "loss": 1.6244, + "step": 237300 + }, + { + "epoch": 1.02, + "learning_rate": 3.7656174681136573e-05, + "loss": 1.5605, + "step": 237400 + }, + { + "epoch": 1.02, + "learning_rate": 3.765074629458334e-05, + "loss": 1.5672, + "step": 237500 + }, + { + "epoch": 1.02, + "learning_rate": 3.7645317908030104e-05, + "loss": 1.5475, + "step": 237600 + }, + { + "epoch": 1.02, + "learning_rate": 3.7639889521476866e-05, + "loss": 1.5619, + "step": 237700 + }, + { + "epoch": 1.02, + "learning_rate": 3.7634461134923634e-05, + "loss": 1.563, + "step": 237800 + }, + { + "epoch": 1.02, + "learning_rate": 3.76290327483704e-05, + "loss": 1.5412, + "step": 237900 + }, + { + "epoch": 1.02, + "learning_rate": 3.7623604361817165e-05, + "loss": 1.5627, + "step": 238000 + }, + { + "epoch": 1.02, + "learning_rate": 3.7618175975263934e-05, + "loss": 1.5718, + "step": 238100 + }, + { + "epoch": 1.02, + "learning_rate": 3.7612747588710696e-05, + "loss": 1.5722, + "step": 238200 + }, + { + "epoch": 1.02, + "learning_rate": 3.760731920215746e-05, + "loss": 1.5315, + "step": 238300 + }, + { + "epoch": 1.02, + "learning_rate": 3.7601890815604226e-05, + "loss": 1.5888, + "step": 238400 + }, + { + "epoch": 1.02, + "learning_rate": 3.759646242905099e-05, + "loss": 1.5922, + "step": 238500 + }, + { + "epoch": 1.03, + "learning_rate": 3.759103404249775e-05, + "loss": 1.5701, + "step": 238600 + }, + { + "epoch": 1.03, + "learning_rate": 3.758560565594452e-05, + "loss": 1.543, + "step": 238700 + }, + { + "epoch": 1.03, + "learning_rate": 3.758017726939129e-05, + "loss": 1.5407, + "step": 238800 + }, + { + "epoch": 1.03, + "learning_rate": 3.757474888283805e-05, + "loss": 1.5368, + "step": 238900 + }, + { + "epoch": 1.03, + "learning_rate": 3.756932049628482e-05, + "loss": 1.5643, + "step": 239000 + }, + { + "epoch": 1.03, + "learning_rate": 3.756389210973158e-05, + "loss": 1.5682, + "step": 239100 + }, + { + "epoch": 1.03, + "learning_rate": 3.755846372317834e-05, + "loss": 1.6079, + "step": 239200 + }, + { + "epoch": 1.03, + "learning_rate": 3.755303533662511e-05, + "loss": 1.5494, + "step": 239300 + }, + { + "epoch": 1.03, + "learning_rate": 3.754760695007187e-05, + "loss": 1.5696, + "step": 239400 + }, + { + "epoch": 1.03, + "learning_rate": 3.754217856351864e-05, + "loss": 1.589, + "step": 239500 + }, + { + "epoch": 1.03, + "learning_rate": 3.75367501769654e-05, + "loss": 1.5783, + "step": 239600 + }, + { + "epoch": 1.03, + "learning_rate": 3.7531321790412164e-05, + "loss": 1.5656, + "step": 239700 + }, + { + "epoch": 1.03, + "learning_rate": 3.752589340385893e-05, + "loss": 1.5555, + "step": 239800 + }, + { + "epoch": 1.03, + "learning_rate": 3.75204650173057e-05, + "loss": 1.5458, + "step": 239900 + }, + { + "epoch": 1.03, + "learning_rate": 3.7515036630752463e-05, + "loss": 1.554, + "step": 240000 + }, + { + "epoch": 1.03, + "eval_loss": 1.4660414457321167, + "eval_runtime": 17.7415, + "eval_samples_per_second": 563.65, + "eval_steps_per_second": 17.642, + "step": 240000 + }, + { + "epoch": 1.03, + "learning_rate": 3.750960824419923e-05, + "loss": 1.5507, + "step": 240100 + }, + { + "epoch": 1.03, + "learning_rate": 3.7504179857645994e-05, + "loss": 1.576, + "step": 240200 + }, + { + "epoch": 1.03, + "learning_rate": 3.7498751471092756e-05, + "loss": 1.5729, + "step": 240300 + }, + { + "epoch": 1.03, + "learning_rate": 3.7493323084539525e-05, + "loss": 1.5668, + "step": 240400 + }, + { + "epoch": 1.03, + "learning_rate": 3.7487894697986286e-05, + "loss": 1.5422, + "step": 240500 + }, + { + "epoch": 1.03, + "learning_rate": 3.748246631143305e-05, + "loss": 1.5584, + "step": 240600 + }, + { + "epoch": 1.03, + "learning_rate": 3.747703792487982e-05, + "loss": 1.5586, + "step": 240700 + }, + { + "epoch": 1.03, + "learning_rate": 3.747160953832658e-05, + "loss": 1.581, + "step": 240800 + }, + { + "epoch": 1.03, + "learning_rate": 3.746618115177335e-05, + "loss": 1.5551, + "step": 240900 + }, + { + "epoch": 1.04, + "learning_rate": 3.7460752765220116e-05, + "loss": 1.5722, + "step": 241000 + }, + { + "epoch": 1.04, + "learning_rate": 3.745532437866688e-05, + "loss": 1.549, + "step": 241100 + }, + { + "epoch": 1.04, + "learning_rate": 3.744989599211364e-05, + "loss": 1.5682, + "step": 241200 + }, + { + "epoch": 1.04, + "learning_rate": 3.744446760556041e-05, + "loss": 1.5629, + "step": 241300 + }, + { + "epoch": 1.04, + "learning_rate": 3.743903921900717e-05, + "loss": 1.558, + "step": 241400 + }, + { + "epoch": 1.04, + "learning_rate": 3.743361083245394e-05, + "loss": 1.5554, + "step": 241500 + }, + { + "epoch": 1.04, + "learning_rate": 3.74281824459007e-05, + "loss": 1.5644, + "step": 241600 + }, + { + "epoch": 1.04, + "learning_rate": 3.742275405934746e-05, + "loss": 1.5892, + "step": 241700 + }, + { + "epoch": 1.04, + "learning_rate": 3.741732567279423e-05, + "loss": 1.5873, + "step": 241800 + }, + { + "epoch": 1.04, + "learning_rate": 3.741189728624099e-05, + "loss": 1.5837, + "step": 241900 + }, + { + "epoch": 1.04, + "learning_rate": 3.740646889968776e-05, + "loss": 1.5499, + "step": 242000 + }, + { + "epoch": 1.04, + "learning_rate": 3.740104051313453e-05, + "loss": 1.5734, + "step": 242100 + }, + { + "epoch": 1.04, + "learning_rate": 3.739561212658129e-05, + "loss": 1.5728, + "step": 242200 + }, + { + "epoch": 1.04, + "learning_rate": 3.7390183740028054e-05, + "loss": 1.5915, + "step": 242300 + }, + { + "epoch": 1.04, + "learning_rate": 3.738475535347482e-05, + "loss": 1.5816, + "step": 242400 + }, + { + "epoch": 1.04, + "learning_rate": 3.7379326966921585e-05, + "loss": 1.5696, + "step": 242500 + }, + { + "epoch": 1.04, + "learning_rate": 3.737389858036835e-05, + "loss": 1.5796, + "step": 242600 + }, + { + "epoch": 1.04, + "learning_rate": 3.7368470193815115e-05, + "loss": 1.5548, + "step": 242700 + }, + { + "epoch": 1.04, + "learning_rate": 3.736304180726188e-05, + "loss": 1.5453, + "step": 242800 + }, + { + "epoch": 1.04, + "learning_rate": 3.7357613420708646e-05, + "loss": 1.5668, + "step": 242900 + }, + { + "epoch": 1.04, + "learning_rate": 3.7352185034155415e-05, + "loss": 1.5536, + "step": 243000 + }, + { + "epoch": 1.04, + "learning_rate": 3.7346756647602176e-05, + "loss": 1.5793, + "step": 243100 + }, + { + "epoch": 1.04, + "learning_rate": 3.734132826104894e-05, + "loss": 1.571, + "step": 243200 + }, + { + "epoch": 1.05, + "learning_rate": 3.733589987449571e-05, + "loss": 1.5384, + "step": 243300 + }, + { + "epoch": 1.05, + "learning_rate": 3.733047148794247e-05, + "loss": 1.5789, + "step": 243400 + }, + { + "epoch": 1.05, + "learning_rate": 3.732504310138924e-05, + "loss": 1.5537, + "step": 243500 + }, + { + "epoch": 1.05, + "learning_rate": 3.7319614714836e-05, + "loss": 1.5811, + "step": 243600 + }, + { + "epoch": 1.05, + "learning_rate": 3.731418632828276e-05, + "loss": 1.5629, + "step": 243700 + }, + { + "epoch": 1.05, + "learning_rate": 3.730875794172953e-05, + "loss": 1.5489, + "step": 243800 + }, + { + "epoch": 1.05, + "learning_rate": 3.730332955517629e-05, + "loss": 1.5897, + "step": 243900 + }, + { + "epoch": 1.05, + "learning_rate": 3.729790116862306e-05, + "loss": 1.5541, + "step": 244000 + }, + { + "epoch": 1.05, + "learning_rate": 3.729247278206983e-05, + "loss": 1.5482, + "step": 244100 + }, + { + "epoch": 1.05, + "learning_rate": 3.728704439551659e-05, + "loss": 1.5748, + "step": 244200 + }, + { + "epoch": 1.05, + "learning_rate": 3.728161600896335e-05, + "loss": 1.5724, + "step": 244300 + }, + { + "epoch": 1.05, + "learning_rate": 3.727618762241012e-05, + "loss": 1.5728, + "step": 244400 + }, + { + "epoch": 1.05, + "learning_rate": 3.727075923585688e-05, + "loss": 1.5803, + "step": 244500 + }, + { + "epoch": 1.05, + "learning_rate": 3.7265330849303645e-05, + "loss": 1.5593, + "step": 244600 + }, + { + "epoch": 1.05, + "learning_rate": 3.7259902462750414e-05, + "loss": 1.5943, + "step": 244700 + }, + { + "epoch": 1.05, + "learning_rate": 3.7254474076197176e-05, + "loss": 1.5571, + "step": 244800 + }, + { + "epoch": 1.05, + "learning_rate": 3.724904568964394e-05, + "loss": 1.5679, + "step": 244900 + }, + { + "epoch": 1.05, + "learning_rate": 3.7243617303090706e-05, + "loss": 1.5831, + "step": 245000 + }, + { + "epoch": 1.05, + "eval_loss": 1.4603722095489502, + "eval_runtime": 17.8071, + "eval_samples_per_second": 561.575, + "eval_steps_per_second": 17.577, + "step": 245000 + }, + { + "epoch": 1.05, + "learning_rate": 3.7238188916537475e-05, + "loss": 1.5906, + "step": 245100 + }, + { + "epoch": 1.05, + "learning_rate": 3.723276052998424e-05, + "loss": 1.5713, + "step": 245200 + }, + { + "epoch": 1.05, + "learning_rate": 3.7227332143431005e-05, + "loss": 1.57, + "step": 245300 + }, + { + "epoch": 1.05, + "learning_rate": 3.722190375687777e-05, + "loss": 1.5683, + "step": 245400 + }, + { + "epoch": 1.05, + "learning_rate": 3.7216475370324536e-05, + "loss": 1.5457, + "step": 245500 + }, + { + "epoch": 1.06, + "learning_rate": 3.72110469837713e-05, + "loss": 1.5651, + "step": 245600 + }, + { + "epoch": 1.06, + "learning_rate": 3.720561859721806e-05, + "loss": 1.6014, + "step": 245700 + }, + { + "epoch": 1.06, + "learning_rate": 3.720019021066483e-05, + "loss": 1.5894, + "step": 245800 + }, + { + "epoch": 1.06, + "learning_rate": 3.719476182411159e-05, + "loss": 1.5541, + "step": 245900 + }, + { + "epoch": 1.06, + "learning_rate": 3.718933343755835e-05, + "loss": 1.5763, + "step": 246000 + }, + { + "epoch": 1.06, + "learning_rate": 3.718390505100513e-05, + "loss": 1.5676, + "step": 246100 + }, + { + "epoch": 1.06, + "learning_rate": 3.717847666445189e-05, + "loss": 1.6003, + "step": 246200 + }, + { + "epoch": 1.06, + "learning_rate": 3.717304827789865e-05, + "loss": 1.5466, + "step": 246300 + }, + { + "epoch": 1.06, + "learning_rate": 3.716761989134542e-05, + "loss": 1.5364, + "step": 246400 + }, + { + "epoch": 1.06, + "learning_rate": 3.716219150479218e-05, + "loss": 1.5713, + "step": 246500 + }, + { + "epoch": 1.06, + "learning_rate": 3.7156763118238944e-05, + "loss": 1.5586, + "step": 246600 + }, + { + "epoch": 1.06, + "learning_rate": 3.715133473168571e-05, + "loss": 1.5812, + "step": 246700 + }, + { + "epoch": 1.06, + "learning_rate": 3.7145906345132474e-05, + "loss": 1.6289, + "step": 246800 + }, + { + "epoch": 1.06, + "learning_rate": 3.7140477958579236e-05, + "loss": 1.5595, + "step": 246900 + }, + { + "epoch": 1.06, + "learning_rate": 3.7135049572026005e-05, + "loss": 1.5414, + "step": 247000 + }, + { + "epoch": 1.06, + "learning_rate": 3.712962118547277e-05, + "loss": 1.6069, + "step": 247100 + }, + { + "epoch": 1.06, + "learning_rate": 3.7124192798919535e-05, + "loss": 1.5832, + "step": 247200 + }, + { + "epoch": 1.06, + "learning_rate": 3.7118764412366304e-05, + "loss": 1.5645, + "step": 247300 + }, + { + "epoch": 1.06, + "learning_rate": 3.7113336025813066e-05, + "loss": 1.5702, + "step": 247400 + }, + { + "epoch": 1.06, + "learning_rate": 3.7107907639259834e-05, + "loss": 1.5632, + "step": 247500 + }, + { + "epoch": 1.06, + "learning_rate": 3.7102479252706596e-05, + "loss": 1.5915, + "step": 247600 + }, + { + "epoch": 1.06, + "learning_rate": 3.709705086615336e-05, + "loss": 1.5472, + "step": 247700 + }, + { + "epoch": 1.06, + "learning_rate": 3.709162247960013e-05, + "loss": 1.5432, + "step": 247800 + }, + { + "epoch": 1.06, + "learning_rate": 3.708619409304689e-05, + "loss": 1.5848, + "step": 247900 + }, + { + "epoch": 1.07, + "learning_rate": 3.708076570649365e-05, + "loss": 1.5771, + "step": 248000 + }, + { + "epoch": 1.07, + "learning_rate": 3.707533731994042e-05, + "loss": 1.5733, + "step": 248100 + }, + { + "epoch": 1.07, + "learning_rate": 3.706990893338719e-05, + "loss": 1.564, + "step": 248200 + }, + { + "epoch": 1.07, + "learning_rate": 3.706448054683395e-05, + "loss": 1.5416, + "step": 248300 + }, + { + "epoch": 1.07, + "learning_rate": 3.705905216028072e-05, + "loss": 1.5679, + "step": 248400 + }, + { + "epoch": 1.07, + "learning_rate": 3.705362377372748e-05, + "loss": 1.5614, + "step": 248500 + }, + { + "epoch": 1.07, + "learning_rate": 3.704819538717424e-05, + "loss": 1.5452, + "step": 248600 + }, + { + "epoch": 1.07, + "learning_rate": 3.704276700062101e-05, + "loss": 1.551, + "step": 248700 + }, + { + "epoch": 1.07, + "learning_rate": 3.703733861406777e-05, + "loss": 1.5544, + "step": 248800 + }, + { + "epoch": 1.07, + "learning_rate": 3.7031910227514535e-05, + "loss": 1.5753, + "step": 248900 + }, + { + "epoch": 1.07, + "learning_rate": 3.70264818409613e-05, + "loss": 1.5518, + "step": 249000 + }, + { + "epoch": 1.07, + "learning_rate": 3.7021053454408065e-05, + "loss": 1.581, + "step": 249100 + }, + { + "epoch": 1.07, + "learning_rate": 3.7015625067854834e-05, + "loss": 1.556, + "step": 249200 + }, + { + "epoch": 1.07, + "learning_rate": 3.70101966813016e-05, + "loss": 1.5366, + "step": 249300 + }, + { + "epoch": 1.07, + "learning_rate": 3.7004768294748364e-05, + "loss": 1.5959, + "step": 249400 + }, + { + "epoch": 1.07, + "learning_rate": 3.699933990819513e-05, + "loss": 1.5555, + "step": 249500 + }, + { + "epoch": 1.07, + "learning_rate": 3.6993911521641895e-05, + "loss": 1.5879, + "step": 249600 + }, + { + "epoch": 1.07, + "learning_rate": 3.698848313508866e-05, + "loss": 1.5679, + "step": 249700 + }, + { + "epoch": 1.07, + "learning_rate": 3.6983054748535425e-05, + "loss": 1.5627, + "step": 249800 + }, + { + "epoch": 1.07, + "learning_rate": 3.697762636198219e-05, + "loss": 1.581, + "step": 249900 + }, + { + "epoch": 1.07, + "learning_rate": 3.697219797542895e-05, + "loss": 1.5692, + "step": 250000 + }, + { + "epoch": 1.07, + "eval_loss": 1.4626483917236328, + "eval_runtime": 17.7967, + "eval_samples_per_second": 561.901, + "eval_steps_per_second": 17.588, + "step": 250000 + }, + { + "epoch": 1.07, + "learning_rate": 3.696676958887572e-05, + "loss": 1.5572, + "step": 250100 + }, + { + "epoch": 1.07, + "learning_rate": 3.696134120232248e-05, + "loss": 1.5778, + "step": 250200 + }, + { + "epoch": 1.08, + "learning_rate": 3.695591281576925e-05, + "loss": 1.5707, + "step": 250300 + }, + { + "epoch": 1.08, + "learning_rate": 3.695048442921602e-05, + "loss": 1.5474, + "step": 250400 + }, + { + "epoch": 1.08, + "learning_rate": 3.694505604266278e-05, + "loss": 1.5661, + "step": 250500 + }, + { + "epoch": 1.08, + "learning_rate": 3.693962765610954e-05, + "loss": 1.5771, + "step": 250600 + }, + { + "epoch": 1.08, + "learning_rate": 3.693419926955631e-05, + "loss": 1.5655, + "step": 250700 + }, + { + "epoch": 1.08, + "learning_rate": 3.692877088300307e-05, + "loss": 1.565, + "step": 250800 + }, + { + "epoch": 1.08, + "learning_rate": 3.692334249644983e-05, + "loss": 1.553, + "step": 250900 + }, + { + "epoch": 1.08, + "learning_rate": 3.69179141098966e-05, + "loss": 1.5797, + "step": 251000 + }, + { + "epoch": 1.08, + "learning_rate": 3.6912485723343364e-05, + "loss": 1.5696, + "step": 251100 + }, + { + "epoch": 1.08, + "learning_rate": 3.690705733679013e-05, + "loss": 1.5366, + "step": 251200 + }, + { + "epoch": 1.08, + "learning_rate": 3.6901628950236894e-05, + "loss": 1.5782, + "step": 251300 + }, + { + "epoch": 1.08, + "learning_rate": 3.689620056368366e-05, + "loss": 1.5443, + "step": 251400 + }, + { + "epoch": 1.08, + "learning_rate": 3.689077217713043e-05, + "loss": 1.5377, + "step": 251500 + }, + { + "epoch": 1.08, + "learning_rate": 3.688534379057719e-05, + "loss": 1.5757, + "step": 251600 + }, + { + "epoch": 1.08, + "learning_rate": 3.6879915404023955e-05, + "loss": 1.5642, + "step": 251700 + }, + { + "epoch": 1.08, + "learning_rate": 3.6874487017470724e-05, + "loss": 1.5692, + "step": 251800 + }, + { + "epoch": 1.08, + "learning_rate": 3.6869058630917486e-05, + "loss": 1.5721, + "step": 251900 + }, + { + "epoch": 1.08, + "learning_rate": 3.686363024436425e-05, + "loss": 1.5283, + "step": 252000 + }, + { + "epoch": 1.08, + "learning_rate": 3.6858201857811016e-05, + "loss": 1.5645, + "step": 252100 + }, + { + "epoch": 1.08, + "learning_rate": 3.685277347125778e-05, + "loss": 1.5916, + "step": 252200 + }, + { + "epoch": 1.08, + "learning_rate": 3.684734508470454e-05, + "loss": 1.556, + "step": 252300 + }, + { + "epoch": 1.08, + "learning_rate": 3.6841916698151315e-05, + "loss": 1.5695, + "step": 252400 + }, + { + "epoch": 1.08, + "learning_rate": 3.683648831159808e-05, + "loss": 1.5723, + "step": 252500 + }, + { + "epoch": 1.09, + "learning_rate": 3.683105992504484e-05, + "loss": 1.576, + "step": 252600 + }, + { + "epoch": 1.09, + "learning_rate": 3.682563153849161e-05, + "loss": 1.5544, + "step": 252700 + }, + { + "epoch": 1.09, + "learning_rate": 3.682020315193837e-05, + "loss": 1.5653, + "step": 252800 + }, + { + "epoch": 1.09, + "learning_rate": 3.681477476538513e-05, + "loss": 1.5586, + "step": 252900 + }, + { + "epoch": 1.09, + "learning_rate": 3.68093463788319e-05, + "loss": 1.5375, + "step": 253000 + }, + { + "epoch": 1.09, + "learning_rate": 3.680391799227866e-05, + "loss": 1.5459, + "step": 253100 + }, + { + "epoch": 1.09, + "learning_rate": 3.679848960572543e-05, + "loss": 1.5852, + "step": 253200 + }, + { + "epoch": 1.09, + "learning_rate": 3.679306121917219e-05, + "loss": 1.5484, + "step": 253300 + }, + { + "epoch": 1.09, + "learning_rate": 3.6787632832618955e-05, + "loss": 1.58, + "step": 253400 + }, + { + "epoch": 1.09, + "learning_rate": 3.678220444606573e-05, + "loss": 1.5391, + "step": 253500 + }, + { + "epoch": 1.09, + "learning_rate": 3.677677605951249e-05, + "loss": 1.542, + "step": 253600 + }, + { + "epoch": 1.09, + "learning_rate": 3.6771347672959254e-05, + "loss": 1.5494, + "step": 253700 + }, + { + "epoch": 1.09, + "learning_rate": 3.676591928640602e-05, + "loss": 1.6048, + "step": 253800 + }, + { + "epoch": 1.09, + "learning_rate": 3.6760490899852784e-05, + "loss": 1.5955, + "step": 253900 + }, + { + "epoch": 1.09, + "learning_rate": 3.6755062513299546e-05, + "loss": 1.5669, + "step": 254000 + }, + { + "epoch": 1.09, + "learning_rate": 3.6749634126746315e-05, + "loss": 1.5435, + "step": 254100 + }, + { + "epoch": 1.09, + "learning_rate": 3.674420574019308e-05, + "loss": 1.5766, + "step": 254200 + }, + { + "epoch": 1.09, + "learning_rate": 3.673877735363984e-05, + "loss": 1.5561, + "step": 254300 + }, + { + "epoch": 1.09, + "learning_rate": 3.673334896708661e-05, + "loss": 1.565, + "step": 254400 + }, + { + "epoch": 1.09, + "learning_rate": 3.6727920580533376e-05, + "loss": 1.5592, + "step": 254500 + }, + { + "epoch": 1.09, + "learning_rate": 3.672249219398014e-05, + "loss": 1.5598, + "step": 254600 + }, + { + "epoch": 1.09, + "learning_rate": 3.6717063807426906e-05, + "loss": 1.5901, + "step": 254700 + }, + { + "epoch": 1.09, + "learning_rate": 3.671163542087367e-05, + "loss": 1.5766, + "step": 254800 + }, + { + "epoch": 1.1, + "learning_rate": 3.670620703432043e-05, + "loss": 1.5855, + "step": 254900 + }, + { + "epoch": 1.1, + "learning_rate": 3.67007786477672e-05, + "loss": 1.558, + "step": 255000 + }, + { + "epoch": 1.1, + "eval_loss": 1.464188814163208, + "eval_runtime": 17.8297, + "eval_samples_per_second": 560.862, + "eval_steps_per_second": 17.555, + "step": 255000 + }, + { + "epoch": 1.1, + "learning_rate": 3.669535026121396e-05, + "loss": 1.5314, + "step": 255100 + }, + { + "epoch": 1.1, + "learning_rate": 3.668992187466073e-05, + "loss": 1.5544, + "step": 255200 + }, + { + "epoch": 1.1, + "learning_rate": 3.668449348810749e-05, + "loss": 1.5317, + "step": 255300 + }, + { + "epoch": 1.1, + "learning_rate": 3.667906510155425e-05, + "loss": 1.5621, + "step": 255400 + }, + { + "epoch": 1.1, + "learning_rate": 3.667363671500102e-05, + "loss": 1.5577, + "step": 255500 + }, + { + "epoch": 1.1, + "learning_rate": 3.666820832844779e-05, + "loss": 1.6049, + "step": 255600 + }, + { + "epoch": 1.1, + "learning_rate": 3.666277994189455e-05, + "loss": 1.5713, + "step": 255700 + }, + { + "epoch": 1.1, + "learning_rate": 3.665735155534132e-05, + "loss": 1.5628, + "step": 255800 + }, + { + "epoch": 1.1, + "learning_rate": 3.665192316878808e-05, + "loss": 1.5665, + "step": 255900 + }, + { + "epoch": 1.1, + "learning_rate": 3.6646494782234845e-05, + "loss": 1.5555, + "step": 256000 + }, + { + "epoch": 1.1, + "learning_rate": 3.664106639568161e-05, + "loss": 1.5425, + "step": 256100 + }, + { + "epoch": 1.1, + "learning_rate": 3.6635638009128375e-05, + "loss": 1.5902, + "step": 256200 + }, + { + "epoch": 1.1, + "learning_rate": 3.663020962257514e-05, + "loss": 1.5094, + "step": 256300 + }, + { + "epoch": 1.1, + "learning_rate": 3.6624781236021906e-05, + "loss": 1.5773, + "step": 256400 + }, + { + "epoch": 1.1, + "learning_rate": 3.661935284946867e-05, + "loss": 1.5582, + "step": 256500 + }, + { + "epoch": 1.1, + "learning_rate": 3.6613924462915436e-05, + "loss": 1.5661, + "step": 256600 + }, + { + "epoch": 1.1, + "learning_rate": 3.6608496076362205e-05, + "loss": 1.5714, + "step": 256700 + }, + { + "epoch": 1.1, + "learning_rate": 3.660306768980897e-05, + "loss": 1.5805, + "step": 256800 + }, + { + "epoch": 1.1, + "learning_rate": 3.659763930325573e-05, + "loss": 1.6005, + "step": 256900 + }, + { + "epoch": 1.1, + "learning_rate": 3.65922109167025e-05, + "loss": 1.5594, + "step": 257000 + }, + { + "epoch": 1.1, + "learning_rate": 3.658678253014926e-05, + "loss": 1.564, + "step": 257100 + }, + { + "epoch": 1.1, + "learning_rate": 3.658135414359603e-05, + "loss": 1.5615, + "step": 257200 + }, + { + "epoch": 1.11, + "learning_rate": 3.657592575704279e-05, + "loss": 1.5556, + "step": 257300 + }, + { + "epoch": 1.11, + "learning_rate": 3.657049737048955e-05, + "loss": 1.5579, + "step": 257400 + }, + { + "epoch": 1.11, + "learning_rate": 3.656506898393632e-05, + "loss": 1.5476, + "step": 257500 + }, + { + "epoch": 1.11, + "learning_rate": 3.655964059738308e-05, + "loss": 1.5855, + "step": 257600 + }, + { + "epoch": 1.11, + "learning_rate": 3.655421221082985e-05, + "loss": 1.5781, + "step": 257700 + }, + { + "epoch": 1.11, + "learning_rate": 3.654878382427662e-05, + "loss": 1.5664, + "step": 257800 + }, + { + "epoch": 1.11, + "learning_rate": 3.654335543772338e-05, + "loss": 1.5789, + "step": 257900 + }, + { + "epoch": 1.11, + "learning_rate": 3.653792705117014e-05, + "loss": 1.5674, + "step": 258000 + }, + { + "epoch": 1.11, + "learning_rate": 3.653249866461691e-05, + "loss": 1.5452, + "step": 258100 + }, + { + "epoch": 1.11, + "learning_rate": 3.6527070278063674e-05, + "loss": 1.576, + "step": 258200 + }, + { + "epoch": 1.11, + "learning_rate": 3.6521641891510435e-05, + "loss": 1.5243, + "step": 258300 + }, + { + "epoch": 1.11, + "learning_rate": 3.6516213504957204e-05, + "loss": 1.5706, + "step": 258400 + }, + { + "epoch": 1.11, + "learning_rate": 3.6510785118403966e-05, + "loss": 1.5672, + "step": 258500 + }, + { + "epoch": 1.11, + "learning_rate": 3.6505356731850735e-05, + "loss": 1.5574, + "step": 258600 + }, + { + "epoch": 1.11, + "learning_rate": 3.64999283452975e-05, + "loss": 1.5636, + "step": 258700 + }, + { + "epoch": 1.11, + "learning_rate": 3.6494499958744265e-05, + "loss": 1.5683, + "step": 258800 + }, + { + "epoch": 1.11, + "learning_rate": 3.648907157219103e-05, + "loss": 1.6023, + "step": 258900 + }, + { + "epoch": 1.11, + "learning_rate": 3.6483643185637796e-05, + "loss": 1.6015, + "step": 259000 + }, + { + "epoch": 1.11, + "learning_rate": 3.647821479908456e-05, + "loss": 1.5853, + "step": 259100 + }, + { + "epoch": 1.11, + "learning_rate": 3.6472786412531326e-05, + "loss": 1.5587, + "step": 259200 + }, + { + "epoch": 1.11, + "learning_rate": 3.646735802597809e-05, + "loss": 1.5462, + "step": 259300 + }, + { + "epoch": 1.11, + "learning_rate": 3.646192963942485e-05, + "loss": 1.5808, + "step": 259400 + }, + { + "epoch": 1.11, + "learning_rate": 3.645650125287162e-05, + "loss": 1.5588, + "step": 259500 + }, + { + "epoch": 1.12, + "learning_rate": 3.645107286631838e-05, + "loss": 1.5724, + "step": 259600 + }, + { + "epoch": 1.12, + "learning_rate": 3.644564447976514e-05, + "loss": 1.5439, + "step": 259700 + }, + { + "epoch": 1.12, + "learning_rate": 3.644021609321192e-05, + "loss": 1.5489, + "step": 259800 + }, + { + "epoch": 1.12, + "learning_rate": 3.643478770665868e-05, + "loss": 1.5738, + "step": 259900 + }, + { + "epoch": 1.12, + "learning_rate": 3.642935932010544e-05, + "loss": 1.5925, + "step": 260000 + }, + { + "epoch": 1.12, + "eval_loss": 1.4609049558639526, + "eval_runtime": 17.8329, + "eval_samples_per_second": 560.761, + "eval_steps_per_second": 17.552, + "step": 260000 + }, + { + "epoch": 1.12, + "learning_rate": 3.642393093355221e-05, + "loss": 1.531, + "step": 260100 + }, + { + "epoch": 1.12, + "learning_rate": 3.641850254699897e-05, + "loss": 1.5514, + "step": 260200 + }, + { + "epoch": 1.12, + "learning_rate": 3.6413074160445734e-05, + "loss": 1.5872, + "step": 260300 + }, + { + "epoch": 1.12, + "learning_rate": 3.64076457738925e-05, + "loss": 1.5746, + "step": 260400 + }, + { + "epoch": 1.12, + "learning_rate": 3.6402217387339265e-05, + "loss": 1.5698, + "step": 260500 + }, + { + "epoch": 1.12, + "learning_rate": 3.639678900078603e-05, + "loss": 1.5619, + "step": 260600 + }, + { + "epoch": 1.12, + "learning_rate": 3.6391360614232795e-05, + "loss": 1.5511, + "step": 260700 + }, + { + "epoch": 1.12, + "learning_rate": 3.6385932227679564e-05, + "loss": 1.562, + "step": 260800 + }, + { + "epoch": 1.12, + "learning_rate": 3.6380503841126326e-05, + "loss": 1.5527, + "step": 260900 + }, + { + "epoch": 1.12, + "learning_rate": 3.6375075454573094e-05, + "loss": 1.5525, + "step": 261000 + }, + { + "epoch": 1.12, + "learning_rate": 3.6369647068019856e-05, + "loss": 1.572, + "step": 261100 + }, + { + "epoch": 1.12, + "learning_rate": 3.6364218681466625e-05, + "loss": 1.6127, + "step": 261200 + }, + { + "epoch": 1.12, + "learning_rate": 3.635879029491339e-05, + "loss": 1.5688, + "step": 261300 + }, + { + "epoch": 1.12, + "learning_rate": 3.635336190836015e-05, + "loss": 1.5462, + "step": 261400 + }, + { + "epoch": 1.12, + "learning_rate": 3.634793352180692e-05, + "loss": 1.5759, + "step": 261500 + }, + { + "epoch": 1.12, + "learning_rate": 3.634250513525368e-05, + "loss": 1.5499, + "step": 261600 + }, + { + "epoch": 1.12, + "learning_rate": 3.633707674870044e-05, + "loss": 1.5334, + "step": 261700 + }, + { + "epoch": 1.12, + "learning_rate": 3.633164836214721e-05, + "loss": 1.5306, + "step": 261800 + }, + { + "epoch": 1.13, + "learning_rate": 3.632621997559398e-05, + "loss": 1.5375, + "step": 261900 + }, + { + "epoch": 1.13, + "learning_rate": 3.632079158904074e-05, + "loss": 1.5449, + "step": 262000 + }, + { + "epoch": 1.13, + "learning_rate": 3.631536320248751e-05, + "loss": 1.5735, + "step": 262100 + }, + { + "epoch": 1.13, + "learning_rate": 3.630993481593427e-05, + "loss": 1.5231, + "step": 262200 + }, + { + "epoch": 1.13, + "learning_rate": 3.630450642938103e-05, + "loss": 1.5716, + "step": 262300 + }, + { + "epoch": 1.13, + "learning_rate": 3.62990780428278e-05, + "loss": 1.5735, + "step": 262400 + }, + { + "epoch": 1.13, + "learning_rate": 3.629364965627456e-05, + "loss": 1.5733, + "step": 262500 + }, + { + "epoch": 1.13, + "learning_rate": 3.628822126972133e-05, + "loss": 1.5416, + "step": 262600 + }, + { + "epoch": 1.13, + "learning_rate": 3.6282792883168094e-05, + "loss": 1.5918, + "step": 262700 + }, + { + "epoch": 1.13, + "learning_rate": 3.6277364496614855e-05, + "loss": 1.5342, + "step": 262800 + }, + { + "epoch": 1.13, + "learning_rate": 3.6271936110061624e-05, + "loss": 1.5777, + "step": 262900 + }, + { + "epoch": 1.13, + "learning_rate": 3.626650772350839e-05, + "loss": 1.5328, + "step": 263000 + }, + { + "epoch": 1.13, + "learning_rate": 3.6261079336955155e-05, + "loss": 1.5647, + "step": 263100 + }, + { + "epoch": 1.13, + "learning_rate": 3.625565095040192e-05, + "loss": 1.5391, + "step": 263200 + }, + { + "epoch": 1.13, + "learning_rate": 3.6250222563848685e-05, + "loss": 1.5861, + "step": 263300 + }, + { + "epoch": 1.13, + "learning_rate": 3.624479417729545e-05, + "loss": 1.5629, + "step": 263400 + }, + { + "epoch": 1.13, + "learning_rate": 3.6239365790742216e-05, + "loss": 1.5676, + "step": 263500 + }, + { + "epoch": 1.13, + "learning_rate": 3.623393740418898e-05, + "loss": 1.5394, + "step": 263600 + }, + { + "epoch": 1.13, + "learning_rate": 3.622850901763574e-05, + "loss": 1.5281, + "step": 263700 + }, + { + "epoch": 1.13, + "learning_rate": 3.622308063108251e-05, + "loss": 1.548, + "step": 263800 + }, + { + "epoch": 1.13, + "learning_rate": 3.621765224452928e-05, + "loss": 1.5544, + "step": 263900 + }, + { + "epoch": 1.13, + "learning_rate": 3.621222385797604e-05, + "loss": 1.5579, + "step": 264000 + }, + { + "epoch": 1.13, + "learning_rate": 3.620679547142281e-05, + "loss": 1.5662, + "step": 264100 + }, + { + "epoch": 1.14, + "learning_rate": 3.620136708486957e-05, + "loss": 1.5606, + "step": 264200 + }, + { + "epoch": 1.14, + "learning_rate": 3.619593869831633e-05, + "loss": 1.5445, + "step": 264300 + }, + { + "epoch": 1.14, + "learning_rate": 3.61905103117631e-05, + "loss": 1.5551, + "step": 264400 + }, + { + "epoch": 1.14, + "learning_rate": 3.618508192520986e-05, + "loss": 1.5423, + "step": 264500 + }, + { + "epoch": 1.14, + "learning_rate": 3.617965353865663e-05, + "loss": 1.5331, + "step": 264600 + }, + { + "epoch": 1.14, + "learning_rate": 3.617422515210339e-05, + "loss": 1.5552, + "step": 264700 + }, + { + "epoch": 1.14, + "learning_rate": 3.6168796765550154e-05, + "loss": 1.5557, + "step": 264800 + }, + { + "epoch": 1.14, + "learning_rate": 3.616336837899692e-05, + "loss": 1.5547, + "step": 264900 + }, + { + "epoch": 1.14, + "learning_rate": 3.615793999244369e-05, + "loss": 1.5396, + "step": 265000 + }, + { + "epoch": 1.14, + "eval_loss": 1.4619945287704468, + "eval_runtime": 17.8247, + "eval_samples_per_second": 561.019, + "eval_steps_per_second": 17.56, + "step": 265000 + }, + { + "epoch": 1.14, + "learning_rate": 3.615251160589045e-05, + "loss": 1.5693, + "step": 265100 + }, + { + "epoch": 1.14, + "learning_rate": 3.614708321933722e-05, + "loss": 1.6009, + "step": 265200 + }, + { + "epoch": 1.14, + "learning_rate": 3.6141654832783984e-05, + "loss": 1.5776, + "step": 265300 + }, + { + "epoch": 1.14, + "learning_rate": 3.6136226446230745e-05, + "loss": 1.5691, + "step": 265400 + }, + { + "epoch": 1.14, + "learning_rate": 3.6130798059677514e-05, + "loss": 1.5786, + "step": 265500 + }, + { + "epoch": 1.14, + "learning_rate": 3.6125369673124276e-05, + "loss": 1.568, + "step": 265600 + }, + { + "epoch": 1.14, + "learning_rate": 3.611994128657104e-05, + "loss": 1.5471, + "step": 265700 + }, + { + "epoch": 1.14, + "learning_rate": 3.6114512900017807e-05, + "loss": 1.5895, + "step": 265800 + }, + { + "epoch": 1.14, + "learning_rate": 3.610908451346457e-05, + "loss": 1.5691, + "step": 265900 + }, + { + "epoch": 1.14, + "learning_rate": 3.610365612691134e-05, + "loss": 1.5361, + "step": 266000 + }, + { + "epoch": 1.14, + "learning_rate": 3.6098227740358106e-05, + "loss": 1.5495, + "step": 266100 + }, + { + "epoch": 1.14, + "learning_rate": 3.609279935380487e-05, + "loss": 1.5553, + "step": 266200 + }, + { + "epoch": 1.14, + "learning_rate": 3.608737096725163e-05, + "loss": 1.5584, + "step": 266300 + }, + { + "epoch": 1.14, + "learning_rate": 3.60819425806984e-05, + "loss": 1.528, + "step": 266400 + }, + { + "epoch": 1.14, + "learning_rate": 3.607651419414516e-05, + "loss": 1.5457, + "step": 266500 + }, + { + "epoch": 1.15, + "learning_rate": 3.607108580759193e-05, + "loss": 1.5393, + "step": 266600 + }, + { + "epoch": 1.15, + "learning_rate": 3.606565742103869e-05, + "loss": 1.5813, + "step": 266700 + }, + { + "epoch": 1.15, + "learning_rate": 3.606022903448545e-05, + "loss": 1.5665, + "step": 266800 + }, + { + "epoch": 1.15, + "learning_rate": 3.605480064793222e-05, + "loss": 1.5375, + "step": 266900 + }, + { + "epoch": 1.15, + "learning_rate": 3.604937226137898e-05, + "loss": 1.5525, + "step": 267000 + }, + { + "epoch": 1.15, + "learning_rate": 3.604394387482575e-05, + "loss": 1.5531, + "step": 267100 + }, + { + "epoch": 1.15, + "learning_rate": 3.603851548827252e-05, + "loss": 1.5469, + "step": 267200 + }, + { + "epoch": 1.15, + "learning_rate": 3.603308710171928e-05, + "loss": 1.587, + "step": 267300 + }, + { + "epoch": 1.15, + "learning_rate": 3.6027658715166044e-05, + "loss": 1.5445, + "step": 267400 + }, + { + "epoch": 1.15, + "learning_rate": 3.602223032861281e-05, + "loss": 1.5907, + "step": 267500 + }, + { + "epoch": 1.15, + "learning_rate": 3.6016801942059574e-05, + "loss": 1.5455, + "step": 267600 + }, + { + "epoch": 1.15, + "learning_rate": 3.6011373555506336e-05, + "loss": 1.5386, + "step": 267700 + }, + { + "epoch": 1.15, + "learning_rate": 3.6005945168953105e-05, + "loss": 1.5539, + "step": 267800 + }, + { + "epoch": 1.15, + "learning_rate": 3.600051678239987e-05, + "loss": 1.5513, + "step": 267900 + }, + { + "epoch": 1.15, + "learning_rate": 3.5995088395846636e-05, + "loss": 1.5439, + "step": 268000 + }, + { + "epoch": 1.15, + "learning_rate": 3.5989660009293404e-05, + "loss": 1.5362, + "step": 268100 + }, + { + "epoch": 1.15, + "learning_rate": 3.5984231622740166e-05, + "loss": 1.554, + "step": 268200 + }, + { + "epoch": 1.15, + "learning_rate": 3.597880323618693e-05, + "loss": 1.5568, + "step": 268300 + }, + { + "epoch": 1.15, + "learning_rate": 3.5973374849633697e-05, + "loss": 1.5624, + "step": 268400 + }, + { + "epoch": 1.15, + "learning_rate": 3.596794646308046e-05, + "loss": 1.5489, + "step": 268500 + }, + { + "epoch": 1.15, + "learning_rate": 3.596251807652723e-05, + "loss": 1.5618, + "step": 268600 + }, + { + "epoch": 1.15, + "learning_rate": 3.595708968997399e-05, + "loss": 1.5626, + "step": 268700 + }, + { + "epoch": 1.15, + "learning_rate": 3.595166130342075e-05, + "loss": 1.5691, + "step": 268800 + }, + { + "epoch": 1.16, + "learning_rate": 3.594623291686752e-05, + "loss": 1.5809, + "step": 268900 + }, + { + "epoch": 1.16, + "learning_rate": 3.594080453031428e-05, + "loss": 1.5376, + "step": 269000 + }, + { + "epoch": 1.16, + "learning_rate": 3.593537614376104e-05, + "loss": 1.5724, + "step": 269100 + }, + { + "epoch": 1.16, + "learning_rate": 3.592994775720782e-05, + "loss": 1.5474, + "step": 269200 + }, + { + "epoch": 1.16, + "learning_rate": 3.592451937065458e-05, + "loss": 1.5703, + "step": 269300 + }, + { + "epoch": 1.16, + "learning_rate": 3.591909098410134e-05, + "loss": 1.5577, + "step": 269400 + }, + { + "epoch": 1.16, + "learning_rate": 3.591366259754811e-05, + "loss": 1.5545, + "step": 269500 + }, + { + "epoch": 1.16, + "learning_rate": 3.590823421099487e-05, + "loss": 1.5388, + "step": 269600 + }, + { + "epoch": 1.16, + "learning_rate": 3.5902805824441635e-05, + "loss": 1.564, + "step": 269700 + }, + { + "epoch": 1.16, + "learning_rate": 3.5897377437888403e-05, + "loss": 1.5424, + "step": 269800 + }, + { + "epoch": 1.16, + "learning_rate": 3.5891949051335165e-05, + "loss": 1.5829, + "step": 269900 + }, + { + "epoch": 1.16, + "learning_rate": 3.5886520664781934e-05, + "loss": 1.59, + "step": 270000 + }, + { + "epoch": 1.16, + "eval_loss": 1.4590508937835693, + "eval_runtime": 17.8197, + "eval_samples_per_second": 561.177, + "eval_steps_per_second": 17.565, + "step": 270000 + }, + { + "epoch": 1.16, + "learning_rate": 3.5881092278228696e-05, + "loss": 1.5582, + "step": 270100 + }, + { + "epoch": 1.16, + "learning_rate": 3.5875663891675465e-05, + "loss": 1.5847, + "step": 270200 + }, + { + "epoch": 1.16, + "learning_rate": 3.5870235505122226e-05, + "loss": 1.5796, + "step": 270300 + }, + { + "epoch": 1.16, + "learning_rate": 3.5864807118568995e-05, + "loss": 1.5329, + "step": 270400 + }, + { + "epoch": 1.16, + "learning_rate": 3.585937873201576e-05, + "loss": 1.5347, + "step": 270500 + }, + { + "epoch": 1.16, + "learning_rate": 3.5853950345462526e-05, + "loss": 1.535, + "step": 270600 + }, + { + "epoch": 1.16, + "learning_rate": 3.584852195890929e-05, + "loss": 1.5335, + "step": 270700 + }, + { + "epoch": 1.16, + "learning_rate": 3.584309357235605e-05, + "loss": 1.5717, + "step": 270800 + }, + { + "epoch": 1.16, + "learning_rate": 3.583766518580282e-05, + "loss": 1.5496, + "step": 270900 + }, + { + "epoch": 1.16, + "learning_rate": 3.583223679924958e-05, + "loss": 1.5529, + "step": 271000 + }, + { + "epoch": 1.16, + "learning_rate": 3.582680841269634e-05, + "loss": 1.5607, + "step": 271100 + }, + { + "epoch": 1.17, + "learning_rate": 3.582138002614311e-05, + "loss": 1.5481, + "step": 271200 + }, + { + "epoch": 1.17, + "learning_rate": 3.581595163958988e-05, + "loss": 1.5442, + "step": 271300 + }, + { + "epoch": 1.17, + "learning_rate": 3.581052325303664e-05, + "loss": 1.5719, + "step": 271400 + }, + { + "epoch": 1.17, + "learning_rate": 3.580509486648341e-05, + "loss": 1.5845, + "step": 271500 + }, + { + "epoch": 1.17, + "learning_rate": 3.579966647993017e-05, + "loss": 1.5695, + "step": 271600 + }, + { + "epoch": 1.17, + "learning_rate": 3.579423809337693e-05, + "loss": 1.5496, + "step": 271700 + }, + { + "epoch": 1.17, + "learning_rate": 3.57888097068237e-05, + "loss": 1.5713, + "step": 271800 + }, + { + "epoch": 1.17, + "learning_rate": 3.5783381320270464e-05, + "loss": 1.5643, + "step": 271900 + }, + { + "epoch": 1.17, + "learning_rate": 3.577795293371723e-05, + "loss": 1.5655, + "step": 272000 + }, + { + "epoch": 1.17, + "learning_rate": 3.5772524547163994e-05, + "loss": 1.5866, + "step": 272100 + }, + { + "epoch": 1.17, + "learning_rate": 3.5767096160610756e-05, + "loss": 1.5959, + "step": 272200 + }, + { + "epoch": 1.17, + "learning_rate": 3.5761667774057525e-05, + "loss": 1.5499, + "step": 272300 + }, + { + "epoch": 1.17, + "learning_rate": 3.5756239387504294e-05, + "loss": 1.5568, + "step": 272400 + }, + { + "epoch": 1.17, + "learning_rate": 3.5750811000951055e-05, + "loss": 1.5488, + "step": 272500 + }, + { + "epoch": 1.17, + "learning_rate": 3.5745382614397824e-05, + "loss": 1.557, + "step": 272600 + }, + { + "epoch": 1.17, + "learning_rate": 3.5739954227844586e-05, + "loss": 1.5672, + "step": 272700 + }, + { + "epoch": 1.17, + "learning_rate": 3.573452584129135e-05, + "loss": 1.5689, + "step": 272800 + }, + { + "epoch": 1.17, + "learning_rate": 3.5729097454738116e-05, + "loss": 1.549, + "step": 272900 + }, + { + "epoch": 1.17, + "learning_rate": 3.572366906818488e-05, + "loss": 1.5771, + "step": 273000 + }, + { + "epoch": 1.17, + "learning_rate": 3.571824068163164e-05, + "loss": 1.5602, + "step": 273100 + }, + { + "epoch": 1.17, + "learning_rate": 3.571281229507841e-05, + "loss": 1.5542, + "step": 273200 + }, + { + "epoch": 1.17, + "learning_rate": 3.570738390852517e-05, + "loss": 1.5522, + "step": 273300 + }, + { + "epoch": 1.17, + "learning_rate": 3.570195552197194e-05, + "loss": 1.5752, + "step": 273400 + }, + { + "epoch": 1.17, + "learning_rate": 3.569652713541871e-05, + "loss": 1.5649, + "step": 273500 + }, + { + "epoch": 1.18, + "learning_rate": 3.569109874886547e-05, + "loss": 1.531, + "step": 273600 + }, + { + "epoch": 1.18, + "learning_rate": 3.568567036231223e-05, + "loss": 1.5486, + "step": 273700 + }, + { + "epoch": 1.18, + "learning_rate": 3.5680241975759e-05, + "loss": 1.5541, + "step": 273800 + }, + { + "epoch": 1.18, + "learning_rate": 3.567481358920576e-05, + "loss": 1.5403, + "step": 273900 + }, + { + "epoch": 1.18, + "learning_rate": 3.566938520265253e-05, + "loss": 1.556, + "step": 274000 + }, + { + "epoch": 1.18, + "learning_rate": 3.566395681609929e-05, + "loss": 1.573, + "step": 274100 + }, + { + "epoch": 1.18, + "learning_rate": 3.5658528429546055e-05, + "loss": 1.5342, + "step": 274200 + }, + { + "epoch": 1.18, + "learning_rate": 3.5653100042992823e-05, + "loss": 1.5544, + "step": 274300 + }, + { + "epoch": 1.18, + "learning_rate": 3.564767165643959e-05, + "loss": 1.5691, + "step": 274400 + }, + { + "epoch": 1.18, + "learning_rate": 3.5642243269886354e-05, + "loss": 1.5439, + "step": 274500 + }, + { + "epoch": 1.18, + "learning_rate": 3.563681488333312e-05, + "loss": 1.5634, + "step": 274600 + }, + { + "epoch": 1.18, + "learning_rate": 3.5631386496779884e-05, + "loss": 1.5517, + "step": 274700 + }, + { + "epoch": 1.18, + "learning_rate": 3.5625958110226646e-05, + "loss": 1.5376, + "step": 274800 + }, + { + "epoch": 1.18, + "learning_rate": 3.5620529723673415e-05, + "loss": 1.6075, + "step": 274900 + }, + { + "epoch": 1.18, + "learning_rate": 3.561510133712018e-05, + "loss": 1.5671, + "step": 275000 + }, + { + "epoch": 1.18, + "eval_loss": 1.4539235830307007, + "eval_runtime": 17.8159, + "eval_samples_per_second": 561.296, + "eval_steps_per_second": 17.569, + "step": 275000 + }, + { + "epoch": 1.18, + "learning_rate": 3.560967295056694e-05, + "loss": 1.5554, + "step": 275100 + }, + { + "epoch": 1.18, + "learning_rate": 3.560424456401371e-05, + "loss": 1.5644, + "step": 275200 + }, + { + "epoch": 1.18, + "learning_rate": 3.559881617746047e-05, + "loss": 1.5373, + "step": 275300 + }, + { + "epoch": 1.18, + "learning_rate": 3.559338779090723e-05, + "loss": 1.5611, + "step": 275400 + }, + { + "epoch": 1.18, + "learning_rate": 3.5587959404354007e-05, + "loss": 1.5701, + "step": 275500 + }, + { + "epoch": 1.18, + "learning_rate": 3.558253101780077e-05, + "loss": 1.5404, + "step": 275600 + }, + { + "epoch": 1.18, + "learning_rate": 3.557710263124753e-05, + "loss": 1.5647, + "step": 275700 + }, + { + "epoch": 1.18, + "learning_rate": 3.55716742446943e-05, + "loss": 1.5538, + "step": 275800 + }, + { + "epoch": 1.19, + "learning_rate": 3.556624585814106e-05, + "loss": 1.5497, + "step": 275900 + }, + { + "epoch": 1.19, + "learning_rate": 3.556081747158783e-05, + "loss": 1.5548, + "step": 276000 + }, + { + "epoch": 1.19, + "learning_rate": 3.555538908503459e-05, + "loss": 1.566, + "step": 276100 + }, + { + "epoch": 1.19, + "learning_rate": 3.554996069848135e-05, + "loss": 1.5541, + "step": 276200 + }, + { + "epoch": 1.19, + "learning_rate": 3.554453231192812e-05, + "loss": 1.5611, + "step": 276300 + }, + { + "epoch": 1.19, + "learning_rate": 3.5539103925374884e-05, + "loss": 1.5562, + "step": 276400 + }, + { + "epoch": 1.19, + "learning_rate": 3.553367553882165e-05, + "loss": 1.562, + "step": 276500 + }, + { + "epoch": 1.19, + "learning_rate": 3.552824715226842e-05, + "loss": 1.5387, + "step": 276600 + }, + { + "epoch": 1.19, + "learning_rate": 3.552281876571518e-05, + "loss": 1.5448, + "step": 276700 + }, + { + "epoch": 1.19, + "learning_rate": 3.5517390379161945e-05, + "loss": 1.5676, + "step": 276800 + }, + { + "epoch": 1.19, + "learning_rate": 3.5511961992608713e-05, + "loss": 1.5381, + "step": 276900 + }, + { + "epoch": 1.19, + "learning_rate": 3.5506533606055475e-05, + "loss": 1.5715, + "step": 277000 + }, + { + "epoch": 1.19, + "learning_rate": 3.550110521950224e-05, + "loss": 1.5469, + "step": 277100 + }, + { + "epoch": 1.19, + "learning_rate": 3.5495676832949006e-05, + "loss": 1.5773, + "step": 277200 + }, + { + "epoch": 1.19, + "learning_rate": 3.549024844639577e-05, + "loss": 1.5712, + "step": 277300 + }, + { + "epoch": 1.19, + "learning_rate": 3.548482005984253e-05, + "loss": 1.5421, + "step": 277400 + }, + { + "epoch": 1.19, + "learning_rate": 3.54793916732893e-05, + "loss": 1.5442, + "step": 277500 + }, + { + "epoch": 1.19, + "learning_rate": 3.547396328673607e-05, + "loss": 1.5309, + "step": 277600 + }, + { + "epoch": 1.19, + "learning_rate": 3.546853490018283e-05, + "loss": 1.5588, + "step": 277700 + }, + { + "epoch": 1.19, + "learning_rate": 3.54631065136296e-05, + "loss": 1.5598, + "step": 277800 + }, + { + "epoch": 1.19, + "learning_rate": 3.545767812707636e-05, + "loss": 1.5618, + "step": 277900 + }, + { + "epoch": 1.19, + "learning_rate": 3.545224974052313e-05, + "loss": 1.5579, + "step": 278000 + }, + { + "epoch": 1.19, + "learning_rate": 3.544682135396989e-05, + "loss": 1.5515, + "step": 278100 + }, + { + "epoch": 1.2, + "learning_rate": 3.544139296741665e-05, + "loss": 1.5779, + "step": 278200 + }, + { + "epoch": 1.2, + "learning_rate": 3.543596458086342e-05, + "loss": 1.5852, + "step": 278300 + }, + { + "epoch": 1.2, + "learning_rate": 3.543053619431018e-05, + "loss": 1.5487, + "step": 278400 + }, + { + "epoch": 1.2, + "learning_rate": 3.5425107807756944e-05, + "loss": 1.5745, + "step": 278500 + }, + { + "epoch": 1.2, + "learning_rate": 3.541967942120372e-05, + "loss": 1.5421, + "step": 278600 + }, + { + "epoch": 1.2, + "learning_rate": 3.541425103465048e-05, + "loss": 1.5352, + "step": 278700 + }, + { + "epoch": 1.2, + "learning_rate": 3.540882264809724e-05, + "loss": 1.556, + "step": 278800 + }, + { + "epoch": 1.2, + "learning_rate": 3.540339426154401e-05, + "loss": 1.5488, + "step": 278900 + }, + { + "epoch": 1.2, + "learning_rate": 3.5397965874990774e-05, + "loss": 1.5424, + "step": 279000 + }, + { + "epoch": 1.2, + "learning_rate": 3.5392537488437536e-05, + "loss": 1.5643, + "step": 279100 + }, + { + "epoch": 1.2, + "learning_rate": 3.5387109101884304e-05, + "loss": 1.5561, + "step": 279200 + }, + { + "epoch": 1.2, + "learning_rate": 3.5381680715331066e-05, + "loss": 1.5653, + "step": 279300 + }, + { + "epoch": 1.2, + "learning_rate": 3.537625232877783e-05, + "loss": 1.5528, + "step": 279400 + }, + { + "epoch": 1.2, + "learning_rate": 3.53708239422246e-05, + "loss": 1.566, + "step": 279500 + }, + { + "epoch": 1.2, + "learning_rate": 3.536539555567136e-05, + "loss": 1.5282, + "step": 279600 + }, + { + "epoch": 1.2, + "learning_rate": 3.535996716911813e-05, + "loss": 1.5516, + "step": 279700 + }, + { + "epoch": 1.2, + "learning_rate": 3.5354538782564896e-05, + "loss": 1.531, + "step": 279800 + }, + { + "epoch": 1.2, + "learning_rate": 3.534911039601166e-05, + "loss": 1.5397, + "step": 279900 + }, + { + "epoch": 1.2, + "learning_rate": 3.5343682009458426e-05, + "loss": 1.5497, + "step": 280000 + }, + { + "epoch": 1.2, + "eval_loss": 1.456358551979065, + "eval_runtime": 17.8134, + "eval_samples_per_second": 561.375, + "eval_steps_per_second": 17.571, + "step": 280000 + }, + { + "epoch": 1.2, + "learning_rate": 3.533825362290519e-05, + "loss": 1.5704, + "step": 280100 + }, + { + "epoch": 1.2, + "learning_rate": 3.533282523635195e-05, + "loss": 1.5544, + "step": 280200 + }, + { + "epoch": 1.2, + "learning_rate": 3.532739684979872e-05, + "loss": 1.5539, + "step": 280300 + }, + { + "epoch": 1.2, + "learning_rate": 3.532196846324548e-05, + "loss": 1.5754, + "step": 280400 + }, + { + "epoch": 1.21, + "learning_rate": 3.531654007669224e-05, + "loss": 1.5399, + "step": 280500 + }, + { + "epoch": 1.21, + "learning_rate": 3.531111169013901e-05, + "loss": 1.5785, + "step": 280600 + }, + { + "epoch": 1.21, + "learning_rate": 3.530568330358578e-05, + "loss": 1.5415, + "step": 280700 + }, + { + "epoch": 1.21, + "learning_rate": 3.530025491703254e-05, + "loss": 1.5825, + "step": 280800 + }, + { + "epoch": 1.21, + "learning_rate": 3.529482653047931e-05, + "loss": 1.5565, + "step": 280900 + }, + { + "epoch": 1.21, + "learning_rate": 3.528939814392607e-05, + "loss": 1.567, + "step": 281000 + }, + { + "epoch": 1.21, + "learning_rate": 3.5283969757372834e-05, + "loss": 1.5726, + "step": 281100 + }, + { + "epoch": 1.21, + "learning_rate": 3.52785413708196e-05, + "loss": 1.5311, + "step": 281200 + }, + { + "epoch": 1.21, + "learning_rate": 3.5273112984266365e-05, + "loss": 1.5515, + "step": 281300 + }, + { + "epoch": 1.21, + "learning_rate": 3.5267684597713127e-05, + "loss": 1.5515, + "step": 281400 + }, + { + "epoch": 1.21, + "learning_rate": 3.5262256211159895e-05, + "loss": 1.5597, + "step": 281500 + }, + { + "epoch": 1.21, + "learning_rate": 3.525682782460666e-05, + "loss": 1.5452, + "step": 281600 + }, + { + "epoch": 1.21, + "learning_rate": 3.5251399438053426e-05, + "loss": 1.5714, + "step": 281700 + }, + { + "epoch": 1.21, + "learning_rate": 3.5245971051500194e-05, + "loss": 1.5578, + "step": 281800 + }, + { + "epoch": 1.21, + "learning_rate": 3.5240542664946956e-05, + "loss": 1.5484, + "step": 281900 + }, + { + "epoch": 1.21, + "learning_rate": 3.5235114278393725e-05, + "loss": 1.5792, + "step": 282000 + }, + { + "epoch": 1.21, + "learning_rate": 3.522968589184049e-05, + "loss": 1.5652, + "step": 282100 + }, + { + "epoch": 1.21, + "learning_rate": 3.522425750528725e-05, + "loss": 1.5616, + "step": 282200 + }, + { + "epoch": 1.21, + "learning_rate": 3.521882911873402e-05, + "loss": 1.561, + "step": 282300 + }, + { + "epoch": 1.21, + "learning_rate": 3.521340073218078e-05, + "loss": 1.5664, + "step": 282400 + }, + { + "epoch": 1.21, + "learning_rate": 3.520797234562754e-05, + "loss": 1.5775, + "step": 282500 + }, + { + "epoch": 1.21, + "learning_rate": 3.520254395907431e-05, + "loss": 1.5332, + "step": 282600 + }, + { + "epoch": 1.21, + "learning_rate": 3.519711557252107e-05, + "loss": 1.5193, + "step": 282700 + }, + { + "epoch": 1.21, + "learning_rate": 3.519168718596784e-05, + "loss": 1.5546, + "step": 282800 + }, + { + "epoch": 1.22, + "learning_rate": 3.518625879941461e-05, + "loss": 1.5636, + "step": 282900 + }, + { + "epoch": 1.22, + "learning_rate": 3.518083041286137e-05, + "loss": 1.5514, + "step": 283000 + }, + { + "epoch": 1.22, + "learning_rate": 3.517540202630813e-05, + "loss": 1.5575, + "step": 283100 + }, + { + "epoch": 1.22, + "learning_rate": 3.51699736397549e-05, + "loss": 1.5341, + "step": 283200 + }, + { + "epoch": 1.22, + "learning_rate": 3.516454525320166e-05, + "loss": 1.5468, + "step": 283300 + }, + { + "epoch": 1.22, + "learning_rate": 3.5159116866648425e-05, + "loss": 1.5506, + "step": 283400 + }, + { + "epoch": 1.22, + "learning_rate": 3.5153688480095194e-05, + "loss": 1.5696, + "step": 283500 + }, + { + "epoch": 1.22, + "learning_rate": 3.5148260093541956e-05, + "loss": 1.5704, + "step": 283600 + }, + { + "epoch": 1.22, + "learning_rate": 3.5142831706988724e-05, + "loss": 1.5557, + "step": 283700 + }, + { + "epoch": 1.22, + "learning_rate": 3.513740332043549e-05, + "loss": 1.5561, + "step": 283800 + }, + { + "epoch": 1.22, + "learning_rate": 3.5131974933882255e-05, + "loss": 1.5315, + "step": 283900 + }, + { + "epoch": 1.22, + "learning_rate": 3.5126546547329023e-05, + "loss": 1.5445, + "step": 284000 + }, + { + "epoch": 1.22, + "learning_rate": 3.5121118160775785e-05, + "loss": 1.5352, + "step": 284100 + }, + { + "epoch": 1.22, + "learning_rate": 3.511568977422255e-05, + "loss": 1.5458, + "step": 284200 + }, + { + "epoch": 1.22, + "learning_rate": 3.5110261387669316e-05, + "loss": 1.5889, + "step": 284300 + }, + { + "epoch": 1.22, + "learning_rate": 3.510483300111608e-05, + "loss": 1.5682, + "step": 284400 + }, + { + "epoch": 1.22, + "learning_rate": 3.509940461456284e-05, + "loss": 1.5332, + "step": 284500 + }, + { + "epoch": 1.22, + "learning_rate": 3.509397622800961e-05, + "loss": 1.5871, + "step": 284600 + }, + { + "epoch": 1.22, + "learning_rate": 3.508854784145637e-05, + "loss": 1.5586, + "step": 284700 + }, + { + "epoch": 1.22, + "learning_rate": 3.508311945490313e-05, + "loss": 1.5607, + "step": 284800 + }, + { + "epoch": 1.22, + "learning_rate": 3.507769106834991e-05, + "loss": 1.576, + "step": 284900 + }, + { + "epoch": 1.22, + "learning_rate": 3.507226268179667e-05, + "loss": 1.5491, + "step": 285000 + }, + { + "epoch": 1.22, + "eval_loss": 1.4562804698944092, + "eval_runtime": 17.8003, + "eval_samples_per_second": 561.788, + "eval_steps_per_second": 17.584, + "step": 285000 + }, + { + "epoch": 1.22, + "learning_rate": 3.506683429524343e-05, + "loss": 1.5509, + "step": 285100 + }, + { + "epoch": 1.23, + "learning_rate": 3.50614059086902e-05, + "loss": 1.5571, + "step": 285200 + }, + { + "epoch": 1.23, + "learning_rate": 3.505597752213696e-05, + "loss": 1.541, + "step": 285300 + }, + { + "epoch": 1.23, + "learning_rate": 3.5050549135583724e-05, + "loss": 1.5445, + "step": 285400 + }, + { + "epoch": 1.23, + "learning_rate": 3.504512074903049e-05, + "loss": 1.5346, + "step": 285500 + }, + { + "epoch": 1.23, + "learning_rate": 3.5039692362477254e-05, + "loss": 1.5462, + "step": 285600 + }, + { + "epoch": 1.23, + "learning_rate": 3.503426397592402e-05, + "loss": 1.5539, + "step": 285700 + }, + { + "epoch": 1.23, + "learning_rate": 3.5028835589370785e-05, + "loss": 1.5642, + "step": 285800 + }, + { + "epoch": 1.23, + "learning_rate": 3.502340720281755e-05, + "loss": 1.56, + "step": 285900 + }, + { + "epoch": 1.23, + "learning_rate": 3.5017978816264315e-05, + "loss": 1.5673, + "step": 286000 + }, + { + "epoch": 1.23, + "learning_rate": 3.5012550429711084e-05, + "loss": 1.5509, + "step": 286100 + }, + { + "epoch": 1.23, + "learning_rate": 3.5007122043157846e-05, + "loss": 1.5552, + "step": 286200 + }, + { + "epoch": 1.23, + "learning_rate": 3.5001693656604614e-05, + "loss": 1.5524, + "step": 286300 + }, + { + "epoch": 1.23, + "learning_rate": 3.4996265270051376e-05, + "loss": 1.5624, + "step": 286400 + }, + { + "epoch": 1.23, + "learning_rate": 3.499083688349814e-05, + "loss": 1.5801, + "step": 286500 + }, + { + "epoch": 1.23, + "learning_rate": 3.498540849694491e-05, + "loss": 1.5274, + "step": 286600 + }, + { + "epoch": 1.23, + "learning_rate": 3.497998011039167e-05, + "loss": 1.5369, + "step": 286700 + }, + { + "epoch": 1.23, + "learning_rate": 3.497455172383843e-05, + "loss": 1.5509, + "step": 286800 + }, + { + "epoch": 1.23, + "learning_rate": 3.49691233372852e-05, + "loss": 1.5426, + "step": 286900 + }, + { + "epoch": 1.23, + "learning_rate": 3.496369495073197e-05, + "loss": 1.5654, + "step": 287000 + }, + { + "epoch": 1.23, + "learning_rate": 3.495826656417873e-05, + "loss": 1.55, + "step": 287100 + }, + { + "epoch": 1.23, + "learning_rate": 3.49528381776255e-05, + "loss": 1.5626, + "step": 287200 + }, + { + "epoch": 1.23, + "learning_rate": 3.494740979107226e-05, + "loss": 1.5696, + "step": 287300 + }, + { + "epoch": 1.23, + "learning_rate": 3.494198140451902e-05, + "loss": 1.5594, + "step": 287400 + }, + { + "epoch": 1.24, + "learning_rate": 3.493655301796579e-05, + "loss": 1.5418, + "step": 287500 + }, + { + "epoch": 1.24, + "learning_rate": 3.493112463141255e-05, + "loss": 1.5708, + "step": 287600 + }, + { + "epoch": 1.24, + "learning_rate": 3.492569624485932e-05, + "loss": 1.57, + "step": 287700 + }, + { + "epoch": 1.24, + "learning_rate": 3.492026785830608e-05, + "loss": 1.5632, + "step": 287800 + }, + { + "epoch": 1.24, + "learning_rate": 3.4914839471752845e-05, + "loss": 1.5588, + "step": 287900 + }, + { + "epoch": 1.24, + "learning_rate": 3.4909411085199614e-05, + "loss": 1.5361, + "step": 288000 + }, + { + "epoch": 1.24, + "learning_rate": 3.490398269864638e-05, + "loss": 1.5445, + "step": 288100 + }, + { + "epoch": 1.24, + "learning_rate": 3.4898554312093144e-05, + "loss": 1.559, + "step": 288200 + }, + { + "epoch": 1.24, + "learning_rate": 3.489312592553991e-05, + "loss": 1.5863, + "step": 288300 + }, + { + "epoch": 1.24, + "learning_rate": 3.4887697538986675e-05, + "loss": 1.5221, + "step": 288400 + }, + { + "epoch": 1.24, + "learning_rate": 3.4882269152433437e-05, + "loss": 1.5596, + "step": 288500 + }, + { + "epoch": 1.24, + "learning_rate": 3.4876840765880205e-05, + "loss": 1.5405, + "step": 288600 + }, + { + "epoch": 1.24, + "learning_rate": 3.487141237932697e-05, + "loss": 1.5409, + "step": 288700 + }, + { + "epoch": 1.24, + "learning_rate": 3.486598399277373e-05, + "loss": 1.5524, + "step": 288800 + }, + { + "epoch": 1.24, + "learning_rate": 3.48605556062205e-05, + "loss": 1.5533, + "step": 288900 + }, + { + "epoch": 1.24, + "learning_rate": 3.485512721966726e-05, + "loss": 1.5379, + "step": 289000 + }, + { + "epoch": 1.24, + "learning_rate": 3.484969883311403e-05, + "loss": 1.5547, + "step": 289100 + }, + { + "epoch": 1.24, + "learning_rate": 3.48442704465608e-05, + "loss": 1.5598, + "step": 289200 + }, + { + "epoch": 1.24, + "learning_rate": 3.483884206000756e-05, + "loss": 1.5472, + "step": 289300 + }, + { + "epoch": 1.24, + "learning_rate": 3.483341367345432e-05, + "loss": 1.5289, + "step": 289400 + }, + { + "epoch": 1.24, + "learning_rate": 3.482798528690109e-05, + "loss": 1.543, + "step": 289500 + }, + { + "epoch": 1.24, + "learning_rate": 3.482255690034785e-05, + "loss": 1.5766, + "step": 289600 + }, + { + "epoch": 1.24, + "learning_rate": 3.481712851379462e-05, + "loss": 1.5635, + "step": 289700 + }, + { + "epoch": 1.25, + "learning_rate": 3.481170012724138e-05, + "loss": 1.5564, + "step": 289800 + }, + { + "epoch": 1.25, + "learning_rate": 3.4806271740688143e-05, + "loss": 1.5679, + "step": 289900 + }, + { + "epoch": 1.25, + "learning_rate": 3.480084335413491e-05, + "loss": 1.5554, + "step": 290000 + }, + { + "epoch": 1.25, + "eval_loss": 1.4566130638122559, + "eval_runtime": 17.7457, + "eval_samples_per_second": 563.517, + "eval_steps_per_second": 17.638, + "step": 290000 + }, + { + "epoch": 1.25, + "learning_rate": 3.479541496758168e-05, + "loss": 1.5559, + "step": 290100 + }, + { + "epoch": 1.25, + "learning_rate": 3.478998658102844e-05, + "loss": 1.5364, + "step": 290200 + }, + { + "epoch": 1.25, + "learning_rate": 3.478455819447521e-05, + "loss": 1.5495, + "step": 290300 + }, + { + "epoch": 1.25, + "learning_rate": 3.477912980792197e-05, + "loss": 1.5425, + "step": 290400 + }, + { + "epoch": 1.25, + "learning_rate": 3.4773701421368735e-05, + "loss": 1.5416, + "step": 290500 + }, + { + "epoch": 1.25, + "learning_rate": 3.4768273034815504e-05, + "loss": 1.5573, + "step": 290600 + }, + { + "epoch": 1.25, + "learning_rate": 3.4762844648262266e-05, + "loss": 1.5505, + "step": 290700 + }, + { + "epoch": 1.25, + "learning_rate": 3.475741626170903e-05, + "loss": 1.5827, + "step": 290800 + }, + { + "epoch": 1.25, + "learning_rate": 3.4751987875155796e-05, + "loss": 1.5488, + "step": 290900 + }, + { + "epoch": 1.25, + "learning_rate": 3.474655948860256e-05, + "loss": 1.5206, + "step": 291000 + }, + { + "epoch": 1.25, + "learning_rate": 3.474113110204933e-05, + "loss": 1.562, + "step": 291100 + }, + { + "epoch": 1.25, + "learning_rate": 3.4735702715496095e-05, + "loss": 1.552, + "step": 291200 + }, + { + "epoch": 1.25, + "learning_rate": 3.473027432894286e-05, + "loss": 1.5442, + "step": 291300 + }, + { + "epoch": 1.25, + "learning_rate": 3.472484594238962e-05, + "loss": 1.5581, + "step": 291400 + }, + { + "epoch": 1.25, + "learning_rate": 3.471941755583639e-05, + "loss": 1.5588, + "step": 291500 + }, + { + "epoch": 1.25, + "learning_rate": 3.471398916928315e-05, + "loss": 1.5738, + "step": 291600 + }, + { + "epoch": 1.25, + "learning_rate": 3.470856078272992e-05, + "loss": 1.5371, + "step": 291700 + }, + { + "epoch": 1.25, + "learning_rate": 3.470313239617668e-05, + "loss": 1.5712, + "step": 291800 + }, + { + "epoch": 1.25, + "learning_rate": 3.469770400962344e-05, + "loss": 1.5889, + "step": 291900 + }, + { + "epoch": 1.25, + "learning_rate": 3.469227562307021e-05, + "loss": 1.5397, + "step": 292000 + }, + { + "epoch": 1.25, + "learning_rate": 3.468684723651697e-05, + "loss": 1.5654, + "step": 292100 + }, + { + "epoch": 1.26, + "learning_rate": 3.468141884996374e-05, + "loss": 1.5338, + "step": 292200 + }, + { + "epoch": 1.26, + "learning_rate": 3.467599046341051e-05, + "loss": 1.5349, + "step": 292300 + }, + { + "epoch": 1.26, + "learning_rate": 3.467056207685727e-05, + "loss": 1.553, + "step": 292400 + }, + { + "epoch": 1.26, + "learning_rate": 3.4665133690304034e-05, + "loss": 1.5506, + "step": 292500 + }, + { + "epoch": 1.26, + "learning_rate": 3.46597053037508e-05, + "loss": 1.5682, + "step": 292600 + }, + { + "epoch": 1.26, + "learning_rate": 3.4654276917197564e-05, + "loss": 1.5506, + "step": 292700 + }, + { + "epoch": 1.26, + "learning_rate": 3.4648848530644326e-05, + "loss": 1.549, + "step": 292800 + }, + { + "epoch": 1.26, + "learning_rate": 3.4643420144091095e-05, + "loss": 1.5393, + "step": 292900 + }, + { + "epoch": 1.26, + "learning_rate": 3.4637991757537856e-05, + "loss": 1.5481, + "step": 293000 + }, + { + "epoch": 1.26, + "learning_rate": 3.4632563370984625e-05, + "loss": 1.5265, + "step": 293100 + }, + { + "epoch": 1.26, + "learning_rate": 3.462713498443139e-05, + "loss": 1.5614, + "step": 293200 + }, + { + "epoch": 1.26, + "learning_rate": 3.4621706597878156e-05, + "loss": 1.5649, + "step": 293300 + }, + { + "epoch": 1.26, + "learning_rate": 3.461627821132492e-05, + "loss": 1.5426, + "step": 293400 + }, + { + "epoch": 1.26, + "learning_rate": 3.4610849824771686e-05, + "loss": 1.5627, + "step": 293500 + }, + { + "epoch": 1.26, + "learning_rate": 3.460542143821845e-05, + "loss": 1.5791, + "step": 293600 + }, + { + "epoch": 1.26, + "learning_rate": 3.459999305166522e-05, + "loss": 1.5435, + "step": 293700 + }, + { + "epoch": 1.26, + "learning_rate": 3.459456466511198e-05, + "loss": 1.5421, + "step": 293800 + }, + { + "epoch": 1.26, + "learning_rate": 3.458913627855874e-05, + "loss": 1.5323, + "step": 293900 + }, + { + "epoch": 1.26, + "learning_rate": 3.458370789200551e-05, + "loss": 1.5384, + "step": 294000 + }, + { + "epoch": 1.26, + "learning_rate": 3.457827950545227e-05, + "loss": 1.5803, + "step": 294100 + }, + { + "epoch": 1.26, + "learning_rate": 3.457285111889903e-05, + "loss": 1.5698, + "step": 294200 + }, + { + "epoch": 1.26, + "learning_rate": 3.456742273234581e-05, + "loss": 1.5649, + "step": 294300 + }, + { + "epoch": 1.26, + "learning_rate": 3.456199434579257e-05, + "loss": 1.587, + "step": 294400 + }, + { + "epoch": 1.27, + "learning_rate": 3.455656595923933e-05, + "loss": 1.5739, + "step": 294500 + }, + { + "epoch": 1.27, + "learning_rate": 3.45511375726861e-05, + "loss": 1.5239, + "step": 294600 + }, + { + "epoch": 1.27, + "learning_rate": 3.454570918613286e-05, + "loss": 1.5361, + "step": 294700 + }, + { + "epoch": 1.27, + "learning_rate": 3.4540280799579624e-05, + "loss": 1.5392, + "step": 294800 + }, + { + "epoch": 1.27, + "learning_rate": 3.453485241302639e-05, + "loss": 1.541, + "step": 294900 + }, + { + "epoch": 1.27, + "learning_rate": 3.4529424026473155e-05, + "loss": 1.5379, + "step": 295000 + }, + { + "epoch": 1.27, + "eval_loss": 1.4548817873001099, + "eval_runtime": 17.8195, + "eval_samples_per_second": 561.182, + "eval_steps_per_second": 17.565, + "step": 295000 + }, + { + "epoch": 1.27, + "learning_rate": 3.4523995639919924e-05, + "loss": 1.5534, + "step": 295100 + }, + { + "epoch": 1.27, + "learning_rate": 3.4518567253366685e-05, + "loss": 1.5415, + "step": 295200 + }, + { + "epoch": 1.27, + "learning_rate": 3.451313886681345e-05, + "loss": 1.5556, + "step": 295300 + }, + { + "epoch": 1.27, + "learning_rate": 3.4507710480260216e-05, + "loss": 1.5313, + "step": 295400 + }, + { + "epoch": 1.27, + "learning_rate": 3.4502282093706985e-05, + "loss": 1.5754, + "step": 295500 + }, + { + "epoch": 1.27, + "learning_rate": 3.4496853707153747e-05, + "loss": 1.5621, + "step": 295600 + }, + { + "epoch": 1.27, + "learning_rate": 3.4491425320600515e-05, + "loss": 1.5538, + "step": 295700 + }, + { + "epoch": 1.27, + "learning_rate": 3.448599693404728e-05, + "loss": 1.5616, + "step": 295800 + }, + { + "epoch": 1.27, + "learning_rate": 3.448056854749404e-05, + "loss": 1.554, + "step": 295900 + }, + { + "epoch": 1.27, + "learning_rate": 3.447514016094081e-05, + "loss": 1.5214, + "step": 296000 + }, + { + "epoch": 1.27, + "learning_rate": 3.446971177438757e-05, + "loss": 1.5608, + "step": 296100 + }, + { + "epoch": 1.27, + "learning_rate": 3.446428338783433e-05, + "loss": 1.5388, + "step": 296200 + }, + { + "epoch": 1.27, + "learning_rate": 3.44588550012811e-05, + "loss": 1.5748, + "step": 296300 + }, + { + "epoch": 1.27, + "learning_rate": 3.445342661472787e-05, + "loss": 1.5302, + "step": 296400 + }, + { + "epoch": 1.27, + "learning_rate": 3.444799822817463e-05, + "loss": 1.57, + "step": 296500 + }, + { + "epoch": 1.27, + "learning_rate": 3.44425698416214e-05, + "loss": 1.5821, + "step": 296600 + }, + { + "epoch": 1.27, + "learning_rate": 3.443714145506816e-05, + "loss": 1.5377, + "step": 296700 + }, + { + "epoch": 1.28, + "learning_rate": 3.443171306851492e-05, + "loss": 1.5542, + "step": 296800 + }, + { + "epoch": 1.28, + "learning_rate": 3.442628468196169e-05, + "loss": 1.574, + "step": 296900 + }, + { + "epoch": 1.28, + "learning_rate": 3.4420856295408453e-05, + "loss": 1.5629, + "step": 297000 + }, + { + "epoch": 1.28, + "learning_rate": 3.441542790885522e-05, + "loss": 1.4973, + "step": 297100 + }, + { + "epoch": 1.28, + "learning_rate": 3.4409999522301984e-05, + "loss": 1.5659, + "step": 297200 + }, + { + "epoch": 1.28, + "learning_rate": 3.4404571135748746e-05, + "loss": 1.5541, + "step": 297300 + }, + { + "epoch": 1.28, + "learning_rate": 3.4399142749195515e-05, + "loss": 1.553, + "step": 297400 + }, + { + "epoch": 1.28, + "learning_rate": 3.439371436264228e-05, + "loss": 1.5844, + "step": 297500 + }, + { + "epoch": 1.28, + "learning_rate": 3.4388285976089045e-05, + "loss": 1.5459, + "step": 297600 + }, + { + "epoch": 1.28, + "learning_rate": 3.4382857589535814e-05, + "loss": 1.5314, + "step": 297700 + }, + { + "epoch": 1.28, + "learning_rate": 3.4377429202982576e-05, + "loss": 1.5155, + "step": 297800 + }, + { + "epoch": 1.28, + "learning_rate": 3.437200081642934e-05, + "loss": 1.5474, + "step": 297900 + }, + { + "epoch": 1.28, + "learning_rate": 3.4366572429876106e-05, + "loss": 1.539, + "step": 298000 + }, + { + "epoch": 1.28, + "learning_rate": 3.436114404332287e-05, + "loss": 1.5794, + "step": 298100 + }, + { + "epoch": 1.28, + "learning_rate": 3.435571565676963e-05, + "loss": 1.5276, + "step": 298200 + }, + { + "epoch": 1.28, + "learning_rate": 3.43502872702164e-05, + "loss": 1.5574, + "step": 298300 + }, + { + "epoch": 1.28, + "learning_rate": 3.434485888366316e-05, + "loss": 1.5615, + "step": 298400 + }, + { + "epoch": 1.28, + "learning_rate": 3.433943049710993e-05, + "loss": 1.5511, + "step": 298500 + }, + { + "epoch": 1.28, + "learning_rate": 3.43340021105567e-05, + "loss": 1.5437, + "step": 298600 + }, + { + "epoch": 1.28, + "learning_rate": 3.432857372400346e-05, + "loss": 1.5773, + "step": 298700 + }, + { + "epoch": 1.28, + "learning_rate": 3.432314533745022e-05, + "loss": 1.5682, + "step": 298800 + }, + { + "epoch": 1.28, + "learning_rate": 3.431771695089699e-05, + "loss": 1.5283, + "step": 298900 + }, + { + "epoch": 1.28, + "learning_rate": 3.431228856434375e-05, + "loss": 1.5506, + "step": 299000 + }, + { + "epoch": 1.28, + "learning_rate": 3.430686017779052e-05, + "loss": 1.5324, + "step": 299100 + }, + { + "epoch": 1.29, + "learning_rate": 3.430143179123728e-05, + "loss": 1.5774, + "step": 299200 + }, + { + "epoch": 1.29, + "learning_rate": 3.4296003404684044e-05, + "loss": 1.5498, + "step": 299300 + }, + { + "epoch": 1.29, + "learning_rate": 3.429057501813081e-05, + "loss": 1.5267, + "step": 299400 + }, + { + "epoch": 1.29, + "learning_rate": 3.4285146631577575e-05, + "loss": 1.5507, + "step": 299500 + }, + { + "epoch": 1.29, + "learning_rate": 3.4279718245024344e-05, + "loss": 1.5379, + "step": 299600 + }, + { + "epoch": 1.29, + "learning_rate": 3.427428985847111e-05, + "loss": 1.5369, + "step": 299700 + }, + { + "epoch": 1.29, + "learning_rate": 3.4268861471917874e-05, + "loss": 1.552, + "step": 299800 + }, + { + "epoch": 1.29, + "learning_rate": 3.4263433085364636e-05, + "loss": 1.5454, + "step": 299900 + }, + { + "epoch": 1.29, + "learning_rate": 3.4258004698811405e-05, + "loss": 1.5883, + "step": 300000 + }, + { + "epoch": 1.29, + "eval_loss": 1.4549944400787354, + "eval_runtime": 17.7682, + "eval_samples_per_second": 562.803, + "eval_steps_per_second": 17.616, + "step": 300000 + }, + { + "epoch": 1.29, + "learning_rate": 3.4252576312258166e-05, + "loss": 1.5578, + "step": 300100 + }, + { + "epoch": 1.29, + "learning_rate": 3.424714792570493e-05, + "loss": 1.5529, + "step": 300200 + }, + { + "epoch": 1.29, + "learning_rate": 3.42417195391517e-05, + "loss": 1.555, + "step": 300300 + }, + { + "epoch": 1.29, + "learning_rate": 3.423629115259846e-05, + "loss": 1.5347, + "step": 300400 + }, + { + "epoch": 1.29, + "learning_rate": 3.423086276604523e-05, + "loss": 1.5489, + "step": 300500 + }, + { + "epoch": 1.29, + "learning_rate": 3.4225434379491996e-05, + "loss": 1.5477, + "step": 300600 + }, + { + "epoch": 1.29, + "learning_rate": 3.422000599293876e-05, + "loss": 1.576, + "step": 300700 + }, + { + "epoch": 1.29, + "learning_rate": 3.421457760638552e-05, + "loss": 1.5734, + "step": 300800 + }, + { + "epoch": 1.29, + "learning_rate": 3.420914921983229e-05, + "loss": 1.5729, + "step": 300900 + }, + { + "epoch": 1.29, + "learning_rate": 3.420372083327905e-05, + "loss": 1.5648, + "step": 301000 + }, + { + "epoch": 1.29, + "learning_rate": 3.419829244672582e-05, + "loss": 1.5804, + "step": 301100 + }, + { + "epoch": 1.29, + "learning_rate": 3.419286406017258e-05, + "loss": 1.554, + "step": 301200 + }, + { + "epoch": 1.29, + "learning_rate": 3.418743567361934e-05, + "loss": 1.543, + "step": 301300 + }, + { + "epoch": 1.29, + "learning_rate": 3.418200728706611e-05, + "loss": 1.5486, + "step": 301400 + }, + { + "epoch": 1.3, + "learning_rate": 3.417657890051287e-05, + "loss": 1.5243, + "step": 301500 + }, + { + "epoch": 1.3, + "learning_rate": 3.4171150513959635e-05, + "loss": 1.5519, + "step": 301600 + }, + { + "epoch": 1.3, + "learning_rate": 3.416572212740641e-05, + "loss": 1.5429, + "step": 301700 + }, + { + "epoch": 1.3, + "learning_rate": 3.416029374085317e-05, + "loss": 1.5501, + "step": 301800 + }, + { + "epoch": 1.3, + "learning_rate": 3.4154865354299934e-05, + "loss": 1.5872, + "step": 301900 + }, + { + "epoch": 1.3, + "learning_rate": 3.41494369677467e-05, + "loss": 1.5625, + "step": 302000 + }, + { + "epoch": 1.3, + "learning_rate": 3.4144008581193465e-05, + "loss": 1.5918, + "step": 302100 + }, + { + "epoch": 1.3, + "learning_rate": 3.413858019464023e-05, + "loss": 1.5764, + "step": 302200 + }, + { + "epoch": 1.3, + "learning_rate": 3.4133151808086995e-05, + "loss": 1.5277, + "step": 302300 + }, + { + "epoch": 1.3, + "learning_rate": 3.412772342153376e-05, + "loss": 1.5534, + "step": 302400 + }, + { + "epoch": 1.3, + "learning_rate": 3.4122295034980526e-05, + "loss": 1.562, + "step": 302500 + }, + { + "epoch": 1.3, + "learning_rate": 3.411686664842729e-05, + "loss": 1.5329, + "step": 302600 + }, + { + "epoch": 1.3, + "learning_rate": 3.4111438261874057e-05, + "loss": 1.575, + "step": 302700 + }, + { + "epoch": 1.3, + "learning_rate": 3.410600987532082e-05, + "loss": 1.5411, + "step": 302800 + }, + { + "epoch": 1.3, + "learning_rate": 3.410058148876759e-05, + "loss": 1.5783, + "step": 302900 + }, + { + "epoch": 1.3, + "learning_rate": 3.409515310221435e-05, + "loss": 1.5569, + "step": 303000 + }, + { + "epoch": 1.3, + "learning_rate": 3.408972471566112e-05, + "loss": 1.5529, + "step": 303100 + }, + { + "epoch": 1.3, + "learning_rate": 3.408429632910788e-05, + "loss": 1.5409, + "step": 303200 + }, + { + "epoch": 1.3, + "learning_rate": 3.407886794255464e-05, + "loss": 1.53, + "step": 303300 + }, + { + "epoch": 1.3, + "learning_rate": 3.407343955600141e-05, + "loss": 1.5868, + "step": 303400 + }, + { + "epoch": 1.3, + "learning_rate": 3.406801116944817e-05, + "loss": 1.5303, + "step": 303500 + }, + { + "epoch": 1.3, + "learning_rate": 3.4062582782894934e-05, + "loss": 1.5679, + "step": 303600 + }, + { + "epoch": 1.3, + "learning_rate": 3.405715439634171e-05, + "loss": 1.5392, + "step": 303700 + }, + { + "epoch": 1.31, + "learning_rate": 3.405172600978847e-05, + "loss": 1.5518, + "step": 303800 + }, + { + "epoch": 1.31, + "learning_rate": 3.404629762323523e-05, + "loss": 1.5485, + "step": 303900 + }, + { + "epoch": 1.31, + "learning_rate": 3.4040869236682e-05, + "loss": 1.5709, + "step": 304000 + }, + { + "epoch": 1.31, + "learning_rate": 3.4035440850128763e-05, + "loss": 1.558, + "step": 304100 + }, + { + "epoch": 1.31, + "learning_rate": 3.4030012463575525e-05, + "loss": 1.5485, + "step": 304200 + }, + { + "epoch": 1.31, + "learning_rate": 3.4024584077022294e-05, + "loss": 1.5486, + "step": 304300 + }, + { + "epoch": 1.31, + "learning_rate": 3.4019155690469056e-05, + "loss": 1.5748, + "step": 304400 + }, + { + "epoch": 1.31, + "learning_rate": 3.401372730391582e-05, + "loss": 1.5405, + "step": 304500 + }, + { + "epoch": 1.31, + "learning_rate": 3.4008298917362586e-05, + "loss": 1.5368, + "step": 304600 + }, + { + "epoch": 1.31, + "learning_rate": 3.400287053080935e-05, + "loss": 1.5462, + "step": 304700 + }, + { + "epoch": 1.31, + "learning_rate": 3.399744214425612e-05, + "loss": 1.5653, + "step": 304800 + }, + { + "epoch": 1.31, + "learning_rate": 3.3992013757702886e-05, + "loss": 1.5397, + "step": 304900 + }, + { + "epoch": 1.31, + "learning_rate": 3.398658537114965e-05, + "loss": 1.5411, + "step": 305000 + }, + { + "epoch": 1.31, + "eval_loss": 1.4525820016860962, + "eval_runtime": 17.7928, + "eval_samples_per_second": 562.025, + "eval_steps_per_second": 17.591, + "step": 305000 + }, + { + "epoch": 1.31, + "learning_rate": 3.3981156984596416e-05, + "loss": 1.5321, + "step": 305100 + }, + { + "epoch": 1.31, + "learning_rate": 3.397572859804318e-05, + "loss": 1.5782, + "step": 305200 + }, + { + "epoch": 1.31, + "learning_rate": 3.397030021148994e-05, + "loss": 1.5761, + "step": 305300 + }, + { + "epoch": 1.31, + "learning_rate": 3.396487182493671e-05, + "loss": 1.5259, + "step": 305400 + }, + { + "epoch": 1.31, + "learning_rate": 3.395944343838347e-05, + "loss": 1.5641, + "step": 305500 + }, + { + "epoch": 1.31, + "learning_rate": 3.395401505183023e-05, + "loss": 1.5323, + "step": 305600 + }, + { + "epoch": 1.31, + "learning_rate": 3.3948586665277e-05, + "loss": 1.555, + "step": 305700 + }, + { + "epoch": 1.31, + "learning_rate": 3.394315827872377e-05, + "loss": 1.5812, + "step": 305800 + }, + { + "epoch": 1.31, + "learning_rate": 3.393772989217053e-05, + "loss": 1.5379, + "step": 305900 + }, + { + "epoch": 1.31, + "learning_rate": 3.39323015056173e-05, + "loss": 1.5659, + "step": 306000 + }, + { + "epoch": 1.32, + "learning_rate": 3.392687311906406e-05, + "loss": 1.5332, + "step": 306100 + }, + { + "epoch": 1.32, + "learning_rate": 3.3921444732510824e-05, + "loss": 1.5472, + "step": 306200 + }, + { + "epoch": 1.32, + "learning_rate": 3.391601634595759e-05, + "loss": 1.5734, + "step": 306300 + }, + { + "epoch": 1.32, + "learning_rate": 3.3910587959404354e-05, + "loss": 1.5283, + "step": 306400 + }, + { + "epoch": 1.32, + "learning_rate": 3.3905159572851116e-05, + "loss": 1.5499, + "step": 306500 + }, + { + "epoch": 1.32, + "learning_rate": 3.3899731186297885e-05, + "loss": 1.5867, + "step": 306600 + }, + { + "epoch": 1.32, + "learning_rate": 3.389430279974465e-05, + "loss": 1.5538, + "step": 306700 + }, + { + "epoch": 1.32, + "learning_rate": 3.3888874413191415e-05, + "loss": 1.5579, + "step": 306800 + }, + { + "epoch": 1.32, + "learning_rate": 3.3883446026638184e-05, + "loss": 1.5155, + "step": 306900 + }, + { + "epoch": 1.32, + "learning_rate": 3.3878017640084946e-05, + "loss": 1.5217, + "step": 307000 + }, + { + "epoch": 1.32, + "learning_rate": 3.3872589253531715e-05, + "loss": 1.555, + "step": 307100 + }, + { + "epoch": 1.32, + "learning_rate": 3.3867160866978476e-05, + "loss": 1.5743, + "step": 307200 + }, + { + "epoch": 1.32, + "learning_rate": 3.386173248042524e-05, + "loss": 1.5423, + "step": 307300 + }, + { + "epoch": 1.32, + "learning_rate": 3.385630409387201e-05, + "loss": 1.54, + "step": 307400 + }, + { + "epoch": 1.32, + "learning_rate": 3.385087570731877e-05, + "loss": 1.5613, + "step": 307500 + }, + { + "epoch": 1.32, + "learning_rate": 3.384544732076553e-05, + "loss": 1.572, + "step": 307600 + }, + { + "epoch": 1.32, + "learning_rate": 3.38400189342123e-05, + "loss": 1.5829, + "step": 307700 + }, + { + "epoch": 1.32, + "learning_rate": 3.383459054765906e-05, + "loss": 1.5477, + "step": 307800 + }, + { + "epoch": 1.32, + "learning_rate": 3.382916216110583e-05, + "loss": 1.5112, + "step": 307900 + }, + { + "epoch": 1.32, + "learning_rate": 3.38237337745526e-05, + "loss": 1.5316, + "step": 308000 + }, + { + "epoch": 1.32, + "learning_rate": 3.381830538799936e-05, + "loss": 1.5668, + "step": 308100 + }, + { + "epoch": 1.32, + "learning_rate": 3.381287700144612e-05, + "loss": 1.5425, + "step": 308200 + }, + { + "epoch": 1.32, + "learning_rate": 3.380744861489289e-05, + "loss": 1.5763, + "step": 308300 + }, + { + "epoch": 1.32, + "learning_rate": 3.380202022833965e-05, + "loss": 1.5412, + "step": 308400 + }, + { + "epoch": 1.33, + "learning_rate": 3.3796591841786415e-05, + "loss": 1.5534, + "step": 308500 + }, + { + "epoch": 1.33, + "learning_rate": 3.379116345523318e-05, + "loss": 1.548, + "step": 308600 + }, + { + "epoch": 1.33, + "learning_rate": 3.3785735068679945e-05, + "loss": 1.5331, + "step": 308700 + }, + { + "epoch": 1.33, + "learning_rate": 3.3780306682126714e-05, + "loss": 1.555, + "step": 308800 + }, + { + "epoch": 1.33, + "learning_rate": 3.3774878295573476e-05, + "loss": 1.5385, + "step": 308900 + }, + { + "epoch": 1.33, + "learning_rate": 3.3769449909020244e-05, + "loss": 1.5703, + "step": 309000 + }, + { + "epoch": 1.33, + "learning_rate": 3.376402152246701e-05, + "loss": 1.5898, + "step": 309100 + }, + { + "epoch": 1.33, + "learning_rate": 3.3758593135913775e-05, + "loss": 1.5576, + "step": 309200 + }, + { + "epoch": 1.33, + "learning_rate": 3.375316474936054e-05, + "loss": 1.5749, + "step": 309300 + }, + { + "epoch": 1.33, + "learning_rate": 3.3747736362807305e-05, + "loss": 1.5374, + "step": 309400 + }, + { + "epoch": 1.33, + "learning_rate": 3.374230797625407e-05, + "loss": 1.5718, + "step": 309500 + }, + { + "epoch": 1.33, + "learning_rate": 3.373687958970083e-05, + "loss": 1.5414, + "step": 309600 + }, + { + "epoch": 1.33, + "learning_rate": 3.37314512031476e-05, + "loss": 1.5578, + "step": 309700 + }, + { + "epoch": 1.33, + "learning_rate": 3.372602281659436e-05, + "loss": 1.558, + "step": 309800 + }, + { + "epoch": 1.33, + "learning_rate": 3.372059443004112e-05, + "loss": 1.5667, + "step": 309900 + }, + { + "epoch": 1.33, + "learning_rate": 3.37151660434879e-05, + "loss": 1.5619, + "step": 310000 + }, + { + "epoch": 1.33, + "eval_loss": 1.4491908550262451, + "eval_runtime": 17.778, + "eval_samples_per_second": 562.492, + "eval_steps_per_second": 17.606, + "step": 310000 + }, + { + "epoch": 1.33, + "learning_rate": 3.370973765693466e-05, + "loss": 1.5363, + "step": 310100 + }, + { + "epoch": 1.33, + "learning_rate": 3.370430927038142e-05, + "loss": 1.5495, + "step": 310200 + }, + { + "epoch": 1.33, + "learning_rate": 3.369888088382819e-05, + "loss": 1.5621, + "step": 310300 + }, + { + "epoch": 1.33, + "learning_rate": 3.369345249727495e-05, + "loss": 1.5361, + "step": 310400 + }, + { + "epoch": 1.33, + "learning_rate": 3.368802411072171e-05, + "loss": 1.534, + "step": 310500 + }, + { + "epoch": 1.33, + "learning_rate": 3.368259572416848e-05, + "loss": 1.5424, + "step": 310600 + }, + { + "epoch": 1.33, + "learning_rate": 3.3677167337615244e-05, + "loss": 1.5508, + "step": 310700 + }, + { + "epoch": 1.34, + "learning_rate": 3.367173895106201e-05, + "loss": 1.556, + "step": 310800 + }, + { + "epoch": 1.34, + "learning_rate": 3.3666310564508774e-05, + "loss": 1.5717, + "step": 310900 + }, + { + "epoch": 1.34, + "learning_rate": 3.3660882177955536e-05, + "loss": 1.538, + "step": 311000 + }, + { + "epoch": 1.34, + "learning_rate": 3.365545379140231e-05, + "loss": 1.5567, + "step": 311100 + }, + { + "epoch": 1.34, + "learning_rate": 3.3650025404849073e-05, + "loss": 1.5566, + "step": 311200 + }, + { + "epoch": 1.34, + "learning_rate": 3.3644597018295835e-05, + "loss": 1.5158, + "step": 311300 + }, + { + "epoch": 1.34, + "learning_rate": 3.3639168631742604e-05, + "loss": 1.5473, + "step": 311400 + }, + { + "epoch": 1.34, + "learning_rate": 3.3633740245189366e-05, + "loss": 1.5722, + "step": 311500 + }, + { + "epoch": 1.34, + "learning_rate": 3.362831185863613e-05, + "loss": 1.5427, + "step": 311600 + }, + { + "epoch": 1.34, + "learning_rate": 3.3622883472082896e-05, + "loss": 1.5306, + "step": 311700 + }, + { + "epoch": 1.34, + "learning_rate": 3.361745508552966e-05, + "loss": 1.5336, + "step": 311800 + }, + { + "epoch": 1.34, + "learning_rate": 3.361202669897642e-05, + "loss": 1.5604, + "step": 311900 + }, + { + "epoch": 1.34, + "learning_rate": 3.360659831242319e-05, + "loss": 1.5639, + "step": 312000 + }, + { + "epoch": 1.34, + "learning_rate": 3.360116992586996e-05, + "loss": 1.5306, + "step": 312100 + }, + { + "epoch": 1.34, + "learning_rate": 3.359574153931672e-05, + "loss": 1.5653, + "step": 312200 + }, + { + "epoch": 1.34, + "learning_rate": 3.359031315276349e-05, + "loss": 1.5584, + "step": 312300 + }, + { + "epoch": 1.34, + "learning_rate": 3.358488476621025e-05, + "loss": 1.5973, + "step": 312400 + }, + { + "epoch": 1.34, + "learning_rate": 3.357945637965701e-05, + "loss": 1.5377, + "step": 312500 + }, + { + "epoch": 1.34, + "learning_rate": 3.357402799310378e-05, + "loss": 1.5437, + "step": 312600 + }, + { + "epoch": 1.34, + "learning_rate": 3.356859960655054e-05, + "loss": 1.5638, + "step": 312700 + }, + { + "epoch": 1.34, + "learning_rate": 3.356317121999731e-05, + "loss": 1.5189, + "step": 312800 + }, + { + "epoch": 1.34, + "learning_rate": 3.355774283344407e-05, + "loss": 1.5334, + "step": 312900 + }, + { + "epoch": 1.34, + "learning_rate": 3.3552314446890835e-05, + "loss": 1.5373, + "step": 313000 + }, + { + "epoch": 1.35, + "learning_rate": 3.35468860603376e-05, + "loss": 1.5312, + "step": 313100 + }, + { + "epoch": 1.35, + "learning_rate": 3.354145767378437e-05, + "loss": 1.5472, + "step": 313200 + }, + { + "epoch": 1.35, + "learning_rate": 3.3536029287231134e-05, + "loss": 1.5665, + "step": 313300 + }, + { + "epoch": 1.35, + "learning_rate": 3.35306009006779e-05, + "loss": 1.5235, + "step": 313400 + }, + { + "epoch": 1.35, + "learning_rate": 3.3525172514124664e-05, + "loss": 1.5534, + "step": 313500 + }, + { + "epoch": 1.35, + "learning_rate": 3.3519744127571426e-05, + "loss": 1.5593, + "step": 313600 + }, + { + "epoch": 1.35, + "learning_rate": 3.3514315741018195e-05, + "loss": 1.5607, + "step": 313700 + }, + { + "epoch": 1.35, + "learning_rate": 3.350888735446496e-05, + "loss": 1.5367, + "step": 313800 + }, + { + "epoch": 1.35, + "learning_rate": 3.350345896791172e-05, + "loss": 1.5468, + "step": 313900 + }, + { + "epoch": 1.35, + "learning_rate": 3.349803058135849e-05, + "loss": 1.5385, + "step": 314000 + }, + { + "epoch": 1.35, + "learning_rate": 3.349260219480525e-05, + "loss": 1.59, + "step": 314100 + }, + { + "epoch": 1.35, + "learning_rate": 3.348717380825202e-05, + "loss": 1.5429, + "step": 314200 + }, + { + "epoch": 1.35, + "learning_rate": 3.3481745421698786e-05, + "loss": 1.5347, + "step": 314300 + }, + { + "epoch": 1.35, + "learning_rate": 3.347631703514555e-05, + "loss": 1.5435, + "step": 314400 + }, + { + "epoch": 1.35, + "learning_rate": 3.347088864859231e-05, + "loss": 1.5158, + "step": 314500 + }, + { + "epoch": 1.35, + "learning_rate": 3.346546026203908e-05, + "loss": 1.5348, + "step": 314600 + }, + { + "epoch": 1.35, + "learning_rate": 3.346003187548584e-05, + "loss": 1.5336, + "step": 314700 + }, + { + "epoch": 1.35, + "learning_rate": 3.345460348893261e-05, + "loss": 1.5486, + "step": 314800 + }, + { + "epoch": 1.35, + "learning_rate": 3.344917510237937e-05, + "loss": 1.5407, + "step": 314900 + }, + { + "epoch": 1.35, + "learning_rate": 3.344374671582613e-05, + "loss": 1.5393, + "step": 315000 + }, + { + "epoch": 1.35, + "eval_loss": 1.4514410495758057, + "eval_runtime": 17.8233, + "eval_samples_per_second": 561.064, + "eval_steps_per_second": 17.561, + "step": 315000 + }, + { + "epoch": 1.35, + "learning_rate": 3.34383183292729e-05, + "loss": 1.5579, + "step": 315100 + }, + { + "epoch": 1.35, + "learning_rate": 3.3432889942719664e-05, + "loss": 1.5455, + "step": 315200 + }, + { + "epoch": 1.35, + "learning_rate": 3.342746155616643e-05, + "loss": 1.514, + "step": 315300 + }, + { + "epoch": 1.35, + "learning_rate": 3.34220331696132e-05, + "loss": 1.5846, + "step": 315400 + }, + { + "epoch": 1.36, + "learning_rate": 3.341660478305996e-05, + "loss": 1.5471, + "step": 315500 + }, + { + "epoch": 1.36, + "learning_rate": 3.3411176396506725e-05, + "loss": 1.5392, + "step": 315600 + }, + { + "epoch": 1.36, + "learning_rate": 3.340574800995349e-05, + "loss": 1.5526, + "step": 315700 + }, + { + "epoch": 1.36, + "learning_rate": 3.3400319623400255e-05, + "loss": 1.5548, + "step": 315800 + }, + { + "epoch": 1.36, + "learning_rate": 3.339489123684702e-05, + "loss": 1.545, + "step": 315900 + }, + { + "epoch": 1.36, + "learning_rate": 3.3389462850293786e-05, + "loss": 1.5535, + "step": 316000 + }, + { + "epoch": 1.36, + "learning_rate": 3.338403446374055e-05, + "loss": 1.5481, + "step": 316100 + }, + { + "epoch": 1.36, + "learning_rate": 3.3378606077187316e-05, + "loss": 1.5492, + "step": 316200 + }, + { + "epoch": 1.36, + "learning_rate": 3.3373177690634085e-05, + "loss": 1.549, + "step": 316300 + }, + { + "epoch": 1.36, + "learning_rate": 3.336774930408085e-05, + "loss": 1.5482, + "step": 316400 + }, + { + "epoch": 1.36, + "learning_rate": 3.336232091752761e-05, + "loss": 1.5448, + "step": 316500 + }, + { + "epoch": 1.36, + "learning_rate": 3.335689253097438e-05, + "loss": 1.5604, + "step": 316600 + }, + { + "epoch": 1.36, + "learning_rate": 3.335146414442114e-05, + "loss": 1.5651, + "step": 316700 + }, + { + "epoch": 1.36, + "learning_rate": 3.334603575786791e-05, + "loss": 1.5387, + "step": 316800 + }, + { + "epoch": 1.36, + "learning_rate": 3.334060737131467e-05, + "loss": 1.5601, + "step": 316900 + }, + { + "epoch": 1.36, + "learning_rate": 3.333517898476143e-05, + "loss": 1.5667, + "step": 317000 + }, + { + "epoch": 1.36, + "learning_rate": 3.33297505982082e-05, + "loss": 1.5659, + "step": 317100 + }, + { + "epoch": 1.36, + "learning_rate": 3.332432221165496e-05, + "loss": 1.5477, + "step": 317200 + }, + { + "epoch": 1.36, + "learning_rate": 3.3318893825101724e-05, + "loss": 1.5303, + "step": 317300 + }, + { + "epoch": 1.36, + "learning_rate": 3.33134654385485e-05, + "loss": 1.5476, + "step": 317400 + }, + { + "epoch": 1.36, + "learning_rate": 3.330803705199526e-05, + "loss": 1.56, + "step": 317500 + }, + { + "epoch": 1.36, + "learning_rate": 3.330260866544202e-05, + "loss": 1.5144, + "step": 317600 + }, + { + "epoch": 1.36, + "learning_rate": 3.329718027888879e-05, + "loss": 1.5498, + "step": 317700 + }, + { + "epoch": 1.37, + "learning_rate": 3.3291751892335554e-05, + "loss": 1.5201, + "step": 317800 + }, + { + "epoch": 1.37, + "learning_rate": 3.3286323505782316e-05, + "loss": 1.5448, + "step": 317900 + }, + { + "epoch": 1.37, + "learning_rate": 3.3280895119229084e-05, + "loss": 1.5226, + "step": 318000 + }, + { + "epoch": 1.37, + "learning_rate": 3.3275466732675846e-05, + "loss": 1.5646, + "step": 318100 + }, + { + "epoch": 1.37, + "learning_rate": 3.3270038346122615e-05, + "loss": 1.5561, + "step": 318200 + }, + { + "epoch": 1.37, + "learning_rate": 3.3264609959569377e-05, + "loss": 1.5407, + "step": 318300 + }, + { + "epoch": 1.37, + "learning_rate": 3.3259181573016145e-05, + "loss": 1.5461, + "step": 318400 + }, + { + "epoch": 1.37, + "learning_rate": 3.325375318646291e-05, + "loss": 1.5291, + "step": 318500 + }, + { + "epoch": 1.37, + "learning_rate": 3.3248324799909676e-05, + "loss": 1.5518, + "step": 318600 + }, + { + "epoch": 1.37, + "learning_rate": 3.324289641335644e-05, + "loss": 1.5508, + "step": 318700 + }, + { + "epoch": 1.37, + "learning_rate": 3.3237468026803206e-05, + "loss": 1.5272, + "step": 318800 + }, + { + "epoch": 1.37, + "learning_rate": 3.323203964024997e-05, + "loss": 1.573, + "step": 318900 + }, + { + "epoch": 1.37, + "learning_rate": 3.322661125369673e-05, + "loss": 1.5179, + "step": 319000 + }, + { + "epoch": 1.37, + "learning_rate": 3.32211828671435e-05, + "loss": 1.5202, + "step": 319100 + }, + { + "epoch": 1.37, + "learning_rate": 3.321575448059026e-05, + "loss": 1.5343, + "step": 319200 + }, + { + "epoch": 1.37, + "learning_rate": 3.321032609403702e-05, + "loss": 1.5627, + "step": 319300 + }, + { + "epoch": 1.37, + "learning_rate": 3.320489770748379e-05, + "loss": 1.5392, + "step": 319400 + }, + { + "epoch": 1.37, + "learning_rate": 3.319946932093056e-05, + "loss": 1.5463, + "step": 319500 + }, + { + "epoch": 1.37, + "learning_rate": 3.319404093437732e-05, + "loss": 1.5394, + "step": 319600 + }, + { + "epoch": 1.37, + "learning_rate": 3.318861254782409e-05, + "loss": 1.5631, + "step": 319700 + }, + { + "epoch": 1.37, + "learning_rate": 3.318318416127085e-05, + "loss": 1.553, + "step": 319800 + }, + { + "epoch": 1.37, + "learning_rate": 3.3177755774717614e-05, + "loss": 1.5476, + "step": 319900 + }, + { + "epoch": 1.37, + "learning_rate": 3.317232738816438e-05, + "loss": 1.5625, + "step": 320000 + }, + { + "epoch": 1.37, + "eval_loss": 1.449994683265686, + "eval_runtime": 17.8575, + "eval_samples_per_second": 559.99, + "eval_steps_per_second": 17.528, + "step": 320000 + }, + { + "epoch": 1.38, + "learning_rate": 3.3166899001611145e-05, + "loss": 1.5589, + "step": 320100 + }, + { + "epoch": 1.38, + "learning_rate": 3.316147061505791e-05, + "loss": 1.5389, + "step": 320200 + }, + { + "epoch": 1.38, + "learning_rate": 3.3156042228504675e-05, + "loss": 1.5409, + "step": 320300 + }, + { + "epoch": 1.38, + "learning_rate": 3.315061384195144e-05, + "loss": 1.578, + "step": 320400 + }, + { + "epoch": 1.38, + "learning_rate": 3.3145185455398206e-05, + "loss": 1.5632, + "step": 320500 + }, + { + "epoch": 1.38, + "learning_rate": 3.3139757068844974e-05, + "loss": 1.5289, + "step": 320600 + }, + { + "epoch": 1.38, + "learning_rate": 3.3134328682291736e-05, + "loss": 1.5462, + "step": 320700 + }, + { + "epoch": 1.38, + "learning_rate": 3.3128900295738505e-05, + "loss": 1.516, + "step": 320800 + }, + { + "epoch": 1.38, + "learning_rate": 3.312347190918527e-05, + "loss": 1.5305, + "step": 320900 + }, + { + "epoch": 1.38, + "learning_rate": 3.311804352263203e-05, + "loss": 1.5529, + "step": 321000 + }, + { + "epoch": 1.38, + "learning_rate": 3.31126151360788e-05, + "loss": 1.5553, + "step": 321100 + }, + { + "epoch": 1.38, + "learning_rate": 3.310718674952556e-05, + "loss": 1.5441, + "step": 321200 + }, + { + "epoch": 1.38, + "learning_rate": 3.310175836297232e-05, + "loss": 1.5222, + "step": 321300 + }, + { + "epoch": 1.38, + "learning_rate": 3.309632997641909e-05, + "loss": 1.5558, + "step": 321400 + }, + { + "epoch": 1.38, + "learning_rate": 3.309090158986585e-05, + "loss": 1.5436, + "step": 321500 + }, + { + "epoch": 1.38, + "learning_rate": 3.308547320331262e-05, + "loss": 1.5389, + "step": 321600 + }, + { + "epoch": 1.38, + "learning_rate": 3.308004481675939e-05, + "loss": 1.5522, + "step": 321700 + }, + { + "epoch": 1.38, + "learning_rate": 3.307461643020615e-05, + "loss": 1.5622, + "step": 321800 + }, + { + "epoch": 1.38, + "learning_rate": 3.306918804365291e-05, + "loss": 1.565, + "step": 321900 + }, + { + "epoch": 1.38, + "learning_rate": 3.306375965709968e-05, + "loss": 1.5737, + "step": 322000 + }, + { + "epoch": 1.38, + "learning_rate": 3.305833127054644e-05, + "loss": 1.5529, + "step": 322100 + }, + { + "epoch": 1.38, + "learning_rate": 3.305290288399321e-05, + "loss": 1.5312, + "step": 322200 + }, + { + "epoch": 1.38, + "learning_rate": 3.3047474497439974e-05, + "loss": 1.547, + "step": 322300 + }, + { + "epoch": 1.39, + "learning_rate": 3.3042046110886735e-05, + "loss": 1.5254, + "step": 322400 + }, + { + "epoch": 1.39, + "learning_rate": 3.3036617724333504e-05, + "loss": 1.542, + "step": 322500 + }, + { + "epoch": 1.39, + "learning_rate": 3.303118933778027e-05, + "loss": 1.5424, + "step": 322600 + }, + { + "epoch": 1.39, + "learning_rate": 3.3025760951227035e-05, + "loss": 1.5567, + "step": 322700 + }, + { + "epoch": 1.39, + "learning_rate": 3.30203325646738e-05, + "loss": 1.5519, + "step": 322800 + }, + { + "epoch": 1.39, + "learning_rate": 3.3014904178120565e-05, + "loss": 1.5261, + "step": 322900 + }, + { + "epoch": 1.39, + "learning_rate": 3.300947579156733e-05, + "loss": 1.5377, + "step": 323000 + }, + { + "epoch": 1.39, + "learning_rate": 3.3004047405014096e-05, + "loss": 1.5222, + "step": 323100 + }, + { + "epoch": 1.39, + "learning_rate": 3.299861901846086e-05, + "loss": 1.5657, + "step": 323200 + }, + { + "epoch": 1.39, + "learning_rate": 3.299319063190762e-05, + "loss": 1.593, + "step": 323300 + }, + { + "epoch": 1.39, + "learning_rate": 3.298776224535439e-05, + "loss": 1.579, + "step": 323400 + }, + { + "epoch": 1.39, + "learning_rate": 3.298233385880115e-05, + "loss": 1.5289, + "step": 323500 + }, + { + "epoch": 1.39, + "learning_rate": 3.297690547224792e-05, + "loss": 1.5249, + "step": 323600 + }, + { + "epoch": 1.39, + "learning_rate": 3.297147708569469e-05, + "loss": 1.5064, + "step": 323700 + }, + { + "epoch": 1.39, + "learning_rate": 3.296604869914145e-05, + "loss": 1.563, + "step": 323800 + }, + { + "epoch": 1.39, + "learning_rate": 3.296062031258821e-05, + "loss": 1.5391, + "step": 323900 + }, + { + "epoch": 1.39, + "learning_rate": 3.295519192603498e-05, + "loss": 1.5499, + "step": 324000 + }, + { + "epoch": 1.39, + "learning_rate": 3.294976353948174e-05, + "loss": 1.533, + "step": 324100 + }, + { + "epoch": 1.39, + "learning_rate": 3.294433515292851e-05, + "loss": 1.5583, + "step": 324200 + }, + { + "epoch": 1.39, + "learning_rate": 3.293890676637527e-05, + "loss": 1.5807, + "step": 324300 + }, + { + "epoch": 1.39, + "learning_rate": 3.2933478379822034e-05, + "loss": 1.5936, + "step": 324400 + }, + { + "epoch": 1.39, + "learning_rate": 3.29280499932688e-05, + "loss": 1.5425, + "step": 324500 + }, + { + "epoch": 1.39, + "learning_rate": 3.2922621606715564e-05, + "loss": 1.5584, + "step": 324600 + }, + { + "epoch": 1.39, + "learning_rate": 3.291719322016233e-05, + "loss": 1.5303, + "step": 324700 + }, + { + "epoch": 1.4, + "learning_rate": 3.29117648336091e-05, + "loss": 1.513, + "step": 324800 + }, + { + "epoch": 1.4, + "learning_rate": 3.2906336447055864e-05, + "loss": 1.5649, + "step": 324900 + }, + { + "epoch": 1.4, + "learning_rate": 3.2900908060502626e-05, + "loss": 1.5681, + "step": 325000 + }, + { + "epoch": 1.4, + "eval_loss": 1.451581597328186, + "eval_runtime": 17.7955, + "eval_samples_per_second": 561.941, + "eval_steps_per_second": 17.589, + "step": 325000 + }, + { + "epoch": 1.4, + "learning_rate": 3.2895479673949394e-05, + "loss": 1.5595, + "step": 325100 + }, + { + "epoch": 1.4, + "learning_rate": 3.2890051287396156e-05, + "loss": 1.571, + "step": 325200 + }, + { + "epoch": 1.4, + "learning_rate": 3.288462290084292e-05, + "loss": 1.5265, + "step": 325300 + }, + { + "epoch": 1.4, + "learning_rate": 3.2879194514289687e-05, + "loss": 1.5329, + "step": 325400 + }, + { + "epoch": 1.4, + "learning_rate": 3.287376612773645e-05, + "loss": 1.5374, + "step": 325500 + }, + { + "epoch": 1.4, + "learning_rate": 3.286833774118322e-05, + "loss": 1.5731, + "step": 325600 + }, + { + "epoch": 1.4, + "learning_rate": 3.2862909354629986e-05, + "loss": 1.5147, + "step": 325700 + }, + { + "epoch": 1.4, + "learning_rate": 3.285748096807675e-05, + "loss": 1.5584, + "step": 325800 + }, + { + "epoch": 1.4, + "learning_rate": 3.285205258152351e-05, + "loss": 1.5324, + "step": 325900 + }, + { + "epoch": 1.4, + "learning_rate": 3.284662419497028e-05, + "loss": 1.5545, + "step": 326000 + }, + { + "epoch": 1.4, + "learning_rate": 3.284119580841704e-05, + "loss": 1.5175, + "step": 326100 + }, + { + "epoch": 1.4, + "learning_rate": 3.283576742186381e-05, + "loss": 1.5394, + "step": 326200 + }, + { + "epoch": 1.4, + "learning_rate": 3.283033903531057e-05, + "loss": 1.5742, + "step": 326300 + }, + { + "epoch": 1.4, + "learning_rate": 3.282491064875733e-05, + "loss": 1.5592, + "step": 326400 + }, + { + "epoch": 1.4, + "learning_rate": 3.28194822622041e-05, + "loss": 1.5361, + "step": 326500 + }, + { + "epoch": 1.4, + "learning_rate": 3.281405387565086e-05, + "loss": 1.5426, + "step": 326600 + }, + { + "epoch": 1.4, + "learning_rate": 3.2808625489097625e-05, + "loss": 1.5495, + "step": 326700 + }, + { + "epoch": 1.4, + "learning_rate": 3.28031971025444e-05, + "loss": 1.5557, + "step": 326800 + }, + { + "epoch": 1.4, + "learning_rate": 3.279776871599116e-05, + "loss": 1.5362, + "step": 326900 + }, + { + "epoch": 1.4, + "learning_rate": 3.2792340329437924e-05, + "loss": 1.5475, + "step": 327000 + }, + { + "epoch": 1.41, + "learning_rate": 3.278691194288469e-05, + "loss": 1.573, + "step": 327100 + }, + { + "epoch": 1.41, + "learning_rate": 3.2781483556331455e-05, + "loss": 1.5561, + "step": 327200 + }, + { + "epoch": 1.41, + "learning_rate": 3.2776055169778216e-05, + "loss": 1.563, + "step": 327300 + }, + { + "epoch": 1.41, + "learning_rate": 3.2770626783224985e-05, + "loss": 1.5532, + "step": 327400 + }, + { + "epoch": 1.41, + "learning_rate": 3.276519839667175e-05, + "loss": 1.5384, + "step": 327500 + }, + { + "epoch": 1.41, + "learning_rate": 3.2759770010118516e-05, + "loss": 1.5402, + "step": 327600 + }, + { + "epoch": 1.41, + "learning_rate": 3.275434162356528e-05, + "loss": 1.5416, + "step": 327700 + }, + { + "epoch": 1.41, + "learning_rate": 3.2748913237012046e-05, + "loss": 1.5601, + "step": 327800 + }, + { + "epoch": 1.41, + "learning_rate": 3.274348485045881e-05, + "loss": 1.5608, + "step": 327900 + }, + { + "epoch": 1.41, + "learning_rate": 3.273805646390558e-05, + "loss": 1.5469, + "step": 328000 + }, + { + "epoch": 1.41, + "learning_rate": 3.273262807735234e-05, + "loss": 1.5253, + "step": 328100 + }, + { + "epoch": 1.41, + "learning_rate": 3.272719969079911e-05, + "loss": 1.5558, + "step": 328200 + }, + { + "epoch": 1.41, + "learning_rate": 3.272177130424587e-05, + "loss": 1.5568, + "step": 328300 + }, + { + "epoch": 1.41, + "learning_rate": 3.271634291769263e-05, + "loss": 1.4968, + "step": 328400 + }, + { + "epoch": 1.41, + "learning_rate": 3.27109145311394e-05, + "loss": 1.5454, + "step": 328500 + }, + { + "epoch": 1.41, + "learning_rate": 3.270548614458616e-05, + "loss": 1.5199, + "step": 328600 + }, + { + "epoch": 1.41, + "learning_rate": 3.270005775803292e-05, + "loss": 1.5302, + "step": 328700 + }, + { + "epoch": 1.41, + "learning_rate": 3.269462937147969e-05, + "loss": 1.5254, + "step": 328800 + }, + { + "epoch": 1.41, + "learning_rate": 3.268920098492646e-05, + "loss": 1.5374, + "step": 328900 + }, + { + "epoch": 1.41, + "learning_rate": 3.268377259837322e-05, + "loss": 1.5471, + "step": 329000 + }, + { + "epoch": 1.41, + "learning_rate": 3.267834421181999e-05, + "loss": 1.5328, + "step": 329100 + }, + { + "epoch": 1.41, + "learning_rate": 3.267291582526675e-05, + "loss": 1.5556, + "step": 329200 + }, + { + "epoch": 1.41, + "learning_rate": 3.2667487438713515e-05, + "loss": 1.5498, + "step": 329300 + }, + { + "epoch": 1.42, + "learning_rate": 3.2662059052160284e-05, + "loss": 1.5521, + "step": 329400 + }, + { + "epoch": 1.42, + "learning_rate": 3.2656630665607045e-05, + "loss": 1.551, + "step": 329500 + }, + { + "epoch": 1.42, + "learning_rate": 3.2651202279053814e-05, + "loss": 1.5019, + "step": 329600 + }, + { + "epoch": 1.42, + "learning_rate": 3.2645773892500576e-05, + "loss": 1.5463, + "step": 329700 + }, + { + "epoch": 1.42, + "learning_rate": 3.264034550594734e-05, + "loss": 1.5388, + "step": 329800 + }, + { + "epoch": 1.42, + "learning_rate": 3.2634917119394106e-05, + "loss": 1.5319, + "step": 329900 + }, + { + "epoch": 1.42, + "learning_rate": 3.2629488732840875e-05, + "loss": 1.5459, + "step": 330000 + }, + { + "epoch": 1.42, + "eval_loss": 1.4472662210464478, + "eval_runtime": 17.7977, + "eval_samples_per_second": 561.871, + "eval_steps_per_second": 17.587, + "step": 330000 + }, + { + "epoch": 1.42, + "learning_rate": 3.262406034628764e-05, + "loss": 1.5692, + "step": 330100 + }, + { + "epoch": 1.42, + "learning_rate": 3.2618631959734406e-05, + "loss": 1.5344, + "step": 330200 + }, + { + "epoch": 1.42, + "learning_rate": 3.261320357318117e-05, + "loss": 1.5314, + "step": 330300 + }, + { + "epoch": 1.42, + "learning_rate": 3.260777518662793e-05, + "loss": 1.5389, + "step": 330400 + }, + { + "epoch": 1.42, + "learning_rate": 3.26023468000747e-05, + "loss": 1.5467, + "step": 330500 + }, + { + "epoch": 1.42, + "learning_rate": 3.259691841352146e-05, + "loss": 1.5338, + "step": 330600 + }, + { + "epoch": 1.42, + "learning_rate": 3.259149002696822e-05, + "loss": 1.5669, + "step": 330700 + }, + { + "epoch": 1.42, + "learning_rate": 3.258606164041499e-05, + "loss": 1.5344, + "step": 330800 + }, + { + "epoch": 1.42, + "learning_rate": 3.258063325386175e-05, + "loss": 1.5236, + "step": 330900 + }, + { + "epoch": 1.42, + "learning_rate": 3.257520486730852e-05, + "loss": 1.5438, + "step": 331000 + }, + { + "epoch": 1.42, + "learning_rate": 3.256977648075529e-05, + "loss": 1.5269, + "step": 331100 + }, + { + "epoch": 1.42, + "learning_rate": 3.256434809420205e-05, + "loss": 1.5401, + "step": 331200 + }, + { + "epoch": 1.42, + "learning_rate": 3.255891970764881e-05, + "loss": 1.5533, + "step": 331300 + }, + { + "epoch": 1.42, + "learning_rate": 3.255349132109558e-05, + "loss": 1.5514, + "step": 331400 + }, + { + "epoch": 1.42, + "learning_rate": 3.2548062934542344e-05, + "loss": 1.5562, + "step": 331500 + }, + { + "epoch": 1.42, + "learning_rate": 3.254263454798911e-05, + "loss": 1.5358, + "step": 331600 + }, + { + "epoch": 1.43, + "learning_rate": 3.2537206161435874e-05, + "loss": 1.5287, + "step": 331700 + }, + { + "epoch": 1.43, + "learning_rate": 3.2531777774882636e-05, + "loss": 1.5347, + "step": 331800 + }, + { + "epoch": 1.43, + "learning_rate": 3.2526349388329405e-05, + "loss": 1.5572, + "step": 331900 + }, + { + "epoch": 1.43, + "learning_rate": 3.2520921001776174e-05, + "loss": 1.5723, + "step": 332000 + }, + { + "epoch": 1.43, + "learning_rate": 3.2515492615222935e-05, + "loss": 1.5383, + "step": 332100 + }, + { + "epoch": 1.43, + "learning_rate": 3.2510064228669704e-05, + "loss": 1.5197, + "step": 332200 + }, + { + "epoch": 1.43, + "learning_rate": 3.2504635842116466e-05, + "loss": 1.5499, + "step": 332300 + }, + { + "epoch": 1.43, + "learning_rate": 3.249920745556323e-05, + "loss": 1.5379, + "step": 332400 + }, + { + "epoch": 1.43, + "learning_rate": 3.2493779069009997e-05, + "loss": 1.5418, + "step": 332500 + }, + { + "epoch": 1.43, + "learning_rate": 3.248835068245676e-05, + "loss": 1.5495, + "step": 332600 + }, + { + "epoch": 1.43, + "learning_rate": 3.248292229590352e-05, + "loss": 1.5348, + "step": 332700 + }, + { + "epoch": 1.43, + "learning_rate": 3.247749390935029e-05, + "loss": 1.55, + "step": 332800 + }, + { + "epoch": 1.43, + "learning_rate": 3.247206552279705e-05, + "loss": 1.5507, + "step": 332900 + }, + { + "epoch": 1.43, + "learning_rate": 3.246663713624381e-05, + "loss": 1.5419, + "step": 333000 + }, + { + "epoch": 1.43, + "learning_rate": 3.246120874969059e-05, + "loss": 1.5527, + "step": 333100 + }, + { + "epoch": 1.43, + "learning_rate": 3.245578036313735e-05, + "loss": 1.5638, + "step": 333200 + }, + { + "epoch": 1.43, + "learning_rate": 3.245035197658411e-05, + "loss": 1.5521, + "step": 333300 + }, + { + "epoch": 1.43, + "learning_rate": 3.244492359003088e-05, + "loss": 1.5566, + "step": 333400 + }, + { + "epoch": 1.43, + "learning_rate": 3.243949520347764e-05, + "loss": 1.5747, + "step": 333500 + }, + { + "epoch": 1.43, + "learning_rate": 3.243406681692441e-05, + "loss": 1.5436, + "step": 333600 + }, + { + "epoch": 1.43, + "learning_rate": 3.242863843037117e-05, + "loss": 1.5771, + "step": 333700 + }, + { + "epoch": 1.43, + "learning_rate": 3.2423210043817935e-05, + "loss": 1.5296, + "step": 333800 + }, + { + "epoch": 1.43, + "learning_rate": 3.2417781657264703e-05, + "loss": 1.5294, + "step": 333900 + }, + { + "epoch": 1.43, + "learning_rate": 3.2412353270711465e-05, + "loss": 1.5422, + "step": 334000 + }, + { + "epoch": 1.44, + "learning_rate": 3.2406924884158234e-05, + "loss": 1.5428, + "step": 334100 + }, + { + "epoch": 1.44, + "learning_rate": 3.2401496497605e-05, + "loss": 1.5283, + "step": 334200 + }, + { + "epoch": 1.44, + "learning_rate": 3.2396068111051764e-05, + "loss": 1.559, + "step": 334300 + }, + { + "epoch": 1.44, + "learning_rate": 3.2390639724498526e-05, + "loss": 1.5509, + "step": 334400 + }, + { + "epoch": 1.44, + "learning_rate": 3.2385211337945295e-05, + "loss": 1.5446, + "step": 334500 + }, + { + "epoch": 1.44, + "learning_rate": 3.237978295139206e-05, + "loss": 1.5647, + "step": 334600 + }, + { + "epoch": 1.44, + "learning_rate": 3.237435456483882e-05, + "loss": 1.5359, + "step": 334700 + }, + { + "epoch": 1.44, + "learning_rate": 3.236892617828559e-05, + "loss": 1.5501, + "step": 334800 + }, + { + "epoch": 1.44, + "learning_rate": 3.236349779173235e-05, + "loss": 1.5473, + "step": 334900 + }, + { + "epoch": 1.44, + "learning_rate": 3.235806940517911e-05, + "loss": 1.5413, + "step": 335000 + }, + { + "epoch": 1.44, + "eval_loss": 1.4457744359970093, + "eval_runtime": 17.8184, + "eval_samples_per_second": 561.217, + "eval_steps_per_second": 17.566, + "step": 335000 + }, + { + "epoch": 1.44, + "learning_rate": 3.235264101862588e-05, + "loss": 1.5361, + "step": 335100 + }, + { + "epoch": 1.44, + "learning_rate": 3.234721263207265e-05, + "loss": 1.5402, + "step": 335200 + }, + { + "epoch": 1.44, + "learning_rate": 3.234178424551941e-05, + "loss": 1.5459, + "step": 335300 + }, + { + "epoch": 1.44, + "learning_rate": 3.233635585896618e-05, + "loss": 1.5213, + "step": 335400 + }, + { + "epoch": 1.44, + "learning_rate": 3.233092747241294e-05, + "loss": 1.5281, + "step": 335500 + }, + { + "epoch": 1.44, + "learning_rate": 3.232549908585971e-05, + "loss": 1.547, + "step": 335600 + }, + { + "epoch": 1.44, + "learning_rate": 3.232007069930647e-05, + "loss": 1.5668, + "step": 335700 + }, + { + "epoch": 1.44, + "learning_rate": 3.231464231275323e-05, + "loss": 1.533, + "step": 335800 + }, + { + "epoch": 1.44, + "learning_rate": 3.23092139262e-05, + "loss": 1.5421, + "step": 335900 + }, + { + "epoch": 1.44, + "learning_rate": 3.2303785539646764e-05, + "loss": 1.5544, + "step": 336000 + }, + { + "epoch": 1.44, + "learning_rate": 3.2298357153093526e-05, + "loss": 1.5418, + "step": 336100 + }, + { + "epoch": 1.44, + "learning_rate": 3.22929287665403e-05, + "loss": 1.5508, + "step": 336200 + }, + { + "epoch": 1.44, + "learning_rate": 3.228750037998706e-05, + "loss": 1.5832, + "step": 336300 + }, + { + "epoch": 1.45, + "learning_rate": 3.2282071993433825e-05, + "loss": 1.5215, + "step": 336400 + }, + { + "epoch": 1.45, + "learning_rate": 3.2276643606880594e-05, + "loss": 1.5246, + "step": 336500 + }, + { + "epoch": 1.45, + "learning_rate": 3.2271215220327355e-05, + "loss": 1.5504, + "step": 336600 + }, + { + "epoch": 1.45, + "learning_rate": 3.226578683377412e-05, + "loss": 1.5488, + "step": 336700 + }, + { + "epoch": 1.45, + "learning_rate": 3.2260358447220886e-05, + "loss": 1.5597, + "step": 336800 + }, + { + "epoch": 1.45, + "learning_rate": 3.225493006066765e-05, + "loss": 1.5257, + "step": 336900 + }, + { + "epoch": 1.45, + "learning_rate": 3.224950167411441e-05, + "loss": 1.5254, + "step": 337000 + }, + { + "epoch": 1.45, + "learning_rate": 3.224407328756118e-05, + "loss": 1.5362, + "step": 337100 + }, + { + "epoch": 1.45, + "learning_rate": 3.223864490100794e-05, + "loss": 1.5569, + "step": 337200 + }, + { + "epoch": 1.45, + "learning_rate": 3.223321651445471e-05, + "loss": 1.5443, + "step": 337300 + }, + { + "epoch": 1.45, + "learning_rate": 3.222778812790148e-05, + "loss": 1.5064, + "step": 337400 + }, + { + "epoch": 1.45, + "learning_rate": 3.222235974134824e-05, + "loss": 1.5525, + "step": 337500 + }, + { + "epoch": 1.45, + "learning_rate": 3.221693135479501e-05, + "loss": 1.5267, + "step": 337600 + }, + { + "epoch": 1.45, + "learning_rate": 3.221150296824177e-05, + "loss": 1.498, + "step": 337700 + }, + { + "epoch": 1.45, + "learning_rate": 3.220607458168853e-05, + "loss": 1.5337, + "step": 337800 + }, + { + "epoch": 1.45, + "learning_rate": 3.22006461951353e-05, + "loss": 1.5393, + "step": 337900 + }, + { + "epoch": 1.45, + "learning_rate": 3.219521780858206e-05, + "loss": 1.5382, + "step": 338000 + }, + { + "epoch": 1.45, + "learning_rate": 3.2189789422028824e-05, + "loss": 1.5627, + "step": 338100 + }, + { + "epoch": 1.45, + "learning_rate": 3.218436103547559e-05, + "loss": 1.531, + "step": 338200 + }, + { + "epoch": 1.45, + "learning_rate": 3.217893264892236e-05, + "loss": 1.5392, + "step": 338300 + }, + { + "epoch": 1.45, + "learning_rate": 3.217350426236912e-05, + "loss": 1.5617, + "step": 338400 + }, + { + "epoch": 1.45, + "learning_rate": 3.216807587581589e-05, + "loss": 1.5522, + "step": 338500 + }, + { + "epoch": 1.45, + "learning_rate": 3.2162647489262654e-05, + "loss": 1.5298, + "step": 338600 + }, + { + "epoch": 1.46, + "learning_rate": 3.2157219102709416e-05, + "loss": 1.5476, + "step": 338700 + }, + { + "epoch": 1.46, + "learning_rate": 3.2151790716156184e-05, + "loss": 1.542, + "step": 338800 + }, + { + "epoch": 1.46, + "learning_rate": 3.2146362329602946e-05, + "loss": 1.5461, + "step": 338900 + }, + { + "epoch": 1.46, + "learning_rate": 3.214093394304971e-05, + "loss": 1.5699, + "step": 339000 + }, + { + "epoch": 1.46, + "learning_rate": 3.213550555649648e-05, + "loss": 1.5064, + "step": 339100 + }, + { + "epoch": 1.46, + "learning_rate": 3.213007716994324e-05, + "loss": 1.5375, + "step": 339200 + }, + { + "epoch": 1.46, + "learning_rate": 3.212464878339001e-05, + "loss": 1.5728, + "step": 339300 + }, + { + "epoch": 1.46, + "learning_rate": 3.2119220396836776e-05, + "loss": 1.5027, + "step": 339400 + }, + { + "epoch": 1.46, + "learning_rate": 3.211379201028354e-05, + "loss": 1.5562, + "step": 339500 + }, + { + "epoch": 1.46, + "learning_rate": 3.2108363623730307e-05, + "loss": 1.5628, + "step": 339600 + }, + { + "epoch": 1.46, + "learning_rate": 3.210293523717707e-05, + "loss": 1.5528, + "step": 339700 + }, + { + "epoch": 1.46, + "learning_rate": 3.209750685062383e-05, + "loss": 1.5226, + "step": 339800 + }, + { + "epoch": 1.46, + "learning_rate": 3.20920784640706e-05, + "loss": 1.5621, + "step": 339900 + }, + { + "epoch": 1.46, + "learning_rate": 3.208665007751736e-05, + "loss": 1.5492, + "step": 340000 + }, + { + "epoch": 1.46, + "eval_loss": 1.4450249671936035, + "eval_runtime": 17.7966, + "eval_samples_per_second": 561.904, + "eval_steps_per_second": 17.588, + "step": 340000 + }, + { + "epoch": 1.46, + "learning_rate": 3.208122169096412e-05, + "loss": 1.5556, + "step": 340100 + }, + { + "epoch": 1.46, + "learning_rate": 3.207579330441089e-05, + "loss": 1.5418, + "step": 340200 + }, + { + "epoch": 1.46, + "learning_rate": 3.207036491785765e-05, + "loss": 1.5288, + "step": 340300 + }, + { + "epoch": 1.46, + "learning_rate": 3.206493653130442e-05, + "loss": 1.5588, + "step": 340400 + }, + { + "epoch": 1.46, + "learning_rate": 3.205950814475119e-05, + "loss": 1.54, + "step": 340500 + }, + { + "epoch": 1.46, + "learning_rate": 3.205407975819795e-05, + "loss": 1.5419, + "step": 340600 + }, + { + "epoch": 1.46, + "learning_rate": 3.2048651371644714e-05, + "loss": 1.5594, + "step": 340700 + }, + { + "epoch": 1.46, + "learning_rate": 3.204322298509148e-05, + "loss": 1.5502, + "step": 340800 + }, + { + "epoch": 1.46, + "learning_rate": 3.2037794598538245e-05, + "loss": 1.5399, + "step": 340900 + }, + { + "epoch": 1.46, + "learning_rate": 3.203236621198501e-05, + "loss": 1.5661, + "step": 341000 + }, + { + "epoch": 1.47, + "learning_rate": 3.2026937825431775e-05, + "loss": 1.5435, + "step": 341100 + }, + { + "epoch": 1.47, + "learning_rate": 3.202150943887854e-05, + "loss": 1.5386, + "step": 341200 + }, + { + "epoch": 1.47, + "learning_rate": 3.2016081052325306e-05, + "loss": 1.5634, + "step": 341300 + }, + { + "epoch": 1.47, + "learning_rate": 3.201065266577207e-05, + "loss": 1.5215, + "step": 341400 + }, + { + "epoch": 1.47, + "learning_rate": 3.2005224279218836e-05, + "loss": 1.5667, + "step": 341500 + }, + { + "epoch": 1.47, + "learning_rate": 3.1999795892665605e-05, + "loss": 1.5362, + "step": 341600 + }, + { + "epoch": 1.47, + "learning_rate": 3.199436750611237e-05, + "loss": 1.5467, + "step": 341700 + }, + { + "epoch": 1.47, + "learning_rate": 3.198893911955913e-05, + "loss": 1.5496, + "step": 341800 + }, + { + "epoch": 1.47, + "learning_rate": 3.19835107330059e-05, + "loss": 1.5406, + "step": 341900 + }, + { + "epoch": 1.47, + "learning_rate": 3.197808234645266e-05, + "loss": 1.5568, + "step": 342000 + }, + { + "epoch": 1.47, + "learning_rate": 3.197265395989942e-05, + "loss": 1.5319, + "step": 342100 + }, + { + "epoch": 1.47, + "learning_rate": 3.196722557334619e-05, + "loss": 1.5338, + "step": 342200 + }, + { + "epoch": 1.47, + "learning_rate": 3.196179718679295e-05, + "loss": 1.5516, + "step": 342300 + }, + { + "epoch": 1.47, + "learning_rate": 3.1956368800239714e-05, + "loss": 1.5399, + "step": 342400 + }, + { + "epoch": 1.47, + "learning_rate": 3.195094041368649e-05, + "loss": 1.545, + "step": 342500 + }, + { + "epoch": 1.47, + "learning_rate": 3.194551202713325e-05, + "loss": 1.5531, + "step": 342600 + }, + { + "epoch": 1.47, + "learning_rate": 3.194008364058001e-05, + "loss": 1.5147, + "step": 342700 + }, + { + "epoch": 1.47, + "learning_rate": 3.193465525402678e-05, + "loss": 1.5641, + "step": 342800 + }, + { + "epoch": 1.47, + "learning_rate": 3.192922686747354e-05, + "loss": 1.5532, + "step": 342900 + }, + { + "epoch": 1.47, + "learning_rate": 3.1923798480920305e-05, + "loss": 1.5282, + "step": 343000 + }, + { + "epoch": 1.47, + "learning_rate": 3.1918370094367074e-05, + "loss": 1.5503, + "step": 343100 + }, + { + "epoch": 1.47, + "learning_rate": 3.1912941707813836e-05, + "loss": 1.5604, + "step": 343200 + }, + { + "epoch": 1.47, + "learning_rate": 3.1907513321260604e-05, + "loss": 1.527, + "step": 343300 + }, + { + "epoch": 1.48, + "learning_rate": 3.1902084934707366e-05, + "loss": 1.547, + "step": 343400 + }, + { + "epoch": 1.48, + "learning_rate": 3.1896656548154135e-05, + "loss": 1.5202, + "step": 343500 + }, + { + "epoch": 1.48, + "learning_rate": 3.1891228161600903e-05, + "loss": 1.5646, + "step": 343600 + }, + { + "epoch": 1.48, + "learning_rate": 3.1885799775047665e-05, + "loss": 1.5329, + "step": 343700 + }, + { + "epoch": 1.48, + "learning_rate": 3.188037138849443e-05, + "loss": 1.5512, + "step": 343800 + }, + { + "epoch": 1.48, + "learning_rate": 3.1874943001941196e-05, + "loss": 1.5299, + "step": 343900 + }, + { + "epoch": 1.48, + "learning_rate": 3.186951461538796e-05, + "loss": 1.5369, + "step": 344000 + }, + { + "epoch": 1.48, + "learning_rate": 3.186408622883472e-05, + "loss": 1.5414, + "step": 344100 + }, + { + "epoch": 1.48, + "learning_rate": 3.185865784228149e-05, + "loss": 1.5322, + "step": 344200 + }, + { + "epoch": 1.48, + "learning_rate": 3.185322945572825e-05, + "loss": 1.5215, + "step": 344300 + }, + { + "epoch": 1.48, + "learning_rate": 3.184780106917501e-05, + "loss": 1.5493, + "step": 344400 + }, + { + "epoch": 1.48, + "learning_rate": 3.184237268262178e-05, + "loss": 1.5299, + "step": 344500 + }, + { + "epoch": 1.48, + "learning_rate": 3.183694429606855e-05, + "loss": 1.5392, + "step": 344600 + }, + { + "epoch": 1.48, + "learning_rate": 3.183151590951531e-05, + "loss": 1.5435, + "step": 344700 + }, + { + "epoch": 1.48, + "learning_rate": 3.182608752296208e-05, + "loss": 1.5557, + "step": 344800 + }, + { + "epoch": 1.48, + "learning_rate": 3.182065913640884e-05, + "loss": 1.5388, + "step": 344900 + }, + { + "epoch": 1.48, + "learning_rate": 3.1815230749855604e-05, + "loss": 1.5414, + "step": 345000 + }, + { + "epoch": 1.48, + "eval_loss": 1.4478392601013184, + "eval_runtime": 17.7608, + "eval_samples_per_second": 563.038, + "eval_steps_per_second": 17.623, + "step": 345000 + }, + { + "epoch": 1.48, + "learning_rate": 3.180980236330237e-05, + "loss": 1.5371, + "step": 345100 + }, + { + "epoch": 1.48, + "learning_rate": 3.1804373976749134e-05, + "loss": 1.5326, + "step": 345200 + }, + { + "epoch": 1.48, + "learning_rate": 3.17989455901959e-05, + "loss": 1.5514, + "step": 345300 + }, + { + "epoch": 1.48, + "learning_rate": 3.1793517203642665e-05, + "loss": 1.5307, + "step": 345400 + }, + { + "epoch": 1.48, + "learning_rate": 3.1788088817089427e-05, + "loss": 1.5185, + "step": 345500 + }, + { + "epoch": 1.48, + "learning_rate": 3.1782660430536195e-05, + "loss": 1.575, + "step": 345600 + }, + { + "epoch": 1.49, + "learning_rate": 3.1777232043982964e-05, + "loss": 1.5719, + "step": 345700 + }, + { + "epoch": 1.49, + "learning_rate": 3.1771803657429726e-05, + "loss": 1.5208, + "step": 345800 + }, + { + "epoch": 1.49, + "learning_rate": 3.1766375270876494e-05, + "loss": 1.5388, + "step": 345900 + }, + { + "epoch": 1.49, + "learning_rate": 3.1760946884323256e-05, + "loss": 1.5615, + "step": 346000 + }, + { + "epoch": 1.49, + "learning_rate": 3.175551849777002e-05, + "loss": 1.5374, + "step": 346100 + }, + { + "epoch": 1.49, + "learning_rate": 3.175009011121679e-05, + "loss": 1.5419, + "step": 346200 + }, + { + "epoch": 1.49, + "learning_rate": 3.174466172466355e-05, + "loss": 1.5355, + "step": 346300 + }, + { + "epoch": 1.49, + "learning_rate": 3.173923333811031e-05, + "loss": 1.5732, + "step": 346400 + }, + { + "epoch": 1.49, + "learning_rate": 3.173380495155708e-05, + "loss": 1.514, + "step": 346500 + }, + { + "epoch": 1.49, + "learning_rate": 3.172837656500384e-05, + "loss": 1.5063, + "step": 346600 + }, + { + "epoch": 1.49, + "learning_rate": 3.172294817845061e-05, + "loss": 1.5282, + "step": 346700 + }, + { + "epoch": 1.49, + "learning_rate": 3.171751979189738e-05, + "loss": 1.5447, + "step": 346800 + }, + { + "epoch": 1.49, + "learning_rate": 3.171209140534414e-05, + "loss": 1.553, + "step": 346900 + }, + { + "epoch": 1.49, + "learning_rate": 3.17066630187909e-05, + "loss": 1.5592, + "step": 347000 + }, + { + "epoch": 1.49, + "learning_rate": 3.170123463223767e-05, + "loss": 1.562, + "step": 347100 + }, + { + "epoch": 1.49, + "learning_rate": 3.169580624568443e-05, + "loss": 1.5373, + "step": 347200 + }, + { + "epoch": 1.49, + "learning_rate": 3.16903778591312e-05, + "loss": 1.5465, + "step": 347300 + }, + { + "epoch": 1.49, + "learning_rate": 3.168494947257796e-05, + "loss": 1.5292, + "step": 347400 + }, + { + "epoch": 1.49, + "learning_rate": 3.1679521086024725e-05, + "loss": 1.5545, + "step": 347500 + }, + { + "epoch": 1.49, + "learning_rate": 3.1674092699471494e-05, + "loss": 1.5225, + "step": 347600 + }, + { + "epoch": 1.49, + "learning_rate": 3.166866431291826e-05, + "loss": 1.5182, + "step": 347700 + }, + { + "epoch": 1.49, + "learning_rate": 3.1663235926365024e-05, + "loss": 1.5689, + "step": 347800 + }, + { + "epoch": 1.49, + "learning_rate": 3.165780753981179e-05, + "loss": 1.535, + "step": 347900 + }, + { + "epoch": 1.5, + "learning_rate": 3.1652379153258555e-05, + "loss": 1.5145, + "step": 348000 + }, + { + "epoch": 1.5, + "learning_rate": 3.1646950766705317e-05, + "loss": 1.5291, + "step": 348100 + }, + { + "epoch": 1.5, + "learning_rate": 3.1641522380152085e-05, + "loss": 1.5232, + "step": 348200 + }, + { + "epoch": 1.5, + "learning_rate": 3.163609399359885e-05, + "loss": 1.5489, + "step": 348300 + }, + { + "epoch": 1.5, + "learning_rate": 3.163066560704561e-05, + "loss": 1.5601, + "step": 348400 + }, + { + "epoch": 1.5, + "learning_rate": 3.162523722049238e-05, + "loss": 1.5143, + "step": 348500 + }, + { + "epoch": 1.5, + "learning_rate": 3.161980883393914e-05, + "loss": 1.5376, + "step": 348600 + }, + { + "epoch": 1.5, + "learning_rate": 3.161438044738591e-05, + "loss": 1.5192, + "step": 348700 + }, + { + "epoch": 1.5, + "learning_rate": 3.160895206083268e-05, + "loss": 1.5509, + "step": 348800 + }, + { + "epoch": 1.5, + "learning_rate": 3.160352367427944e-05, + "loss": 1.5483, + "step": 348900 + }, + { + "epoch": 1.5, + "learning_rate": 3.15980952877262e-05, + "loss": 1.5048, + "step": 349000 + }, + { + "epoch": 1.5, + "learning_rate": 3.159266690117297e-05, + "loss": 1.522, + "step": 349100 + }, + { + "epoch": 1.5, + "learning_rate": 3.158723851461973e-05, + "loss": 1.5402, + "step": 349200 + }, + { + "epoch": 1.5, + "learning_rate": 3.15818101280665e-05, + "loss": 1.5543, + "step": 349300 + }, + { + "epoch": 1.5, + "learning_rate": 3.157638174151326e-05, + "loss": 1.5409, + "step": 349400 + }, + { + "epoch": 1.5, + "learning_rate": 3.1570953354960024e-05, + "loss": 1.5243, + "step": 349500 + }, + { + "epoch": 1.5, + "learning_rate": 3.156552496840679e-05, + "loss": 1.5425, + "step": 349600 + }, + { + "epoch": 1.5, + "learning_rate": 3.1560096581853554e-05, + "loss": 1.5237, + "step": 349700 + }, + { + "epoch": 1.5, + "learning_rate": 3.155466819530032e-05, + "loss": 1.541, + "step": 349800 + }, + { + "epoch": 1.5, + "learning_rate": 3.154923980874709e-05, + "loss": 1.5575, + "step": 349900 + }, + { + "epoch": 1.5, + "learning_rate": 3.154381142219385e-05, + "loss": 1.5517, + "step": 350000 + }, + { + "epoch": 1.5, + "eval_loss": 1.4470714330673218, + "eval_runtime": 17.6899, + "eval_samples_per_second": 565.294, + "eval_steps_per_second": 17.694, + "step": 350000 + }, + { + "epoch": 1.5, + "learning_rate": 3.1538383035640615e-05, + "loss": 1.532, + "step": 350100 + }, + { + "epoch": 1.5, + "learning_rate": 3.1532954649087384e-05, + "loss": 1.5497, + "step": 350200 + }, + { + "epoch": 1.5, + "learning_rate": 3.1527526262534146e-05, + "loss": 1.545, + "step": 350300 + }, + { + "epoch": 1.51, + "learning_rate": 3.152209787598091e-05, + "loss": 1.5249, + "step": 350400 + }, + { + "epoch": 1.51, + "learning_rate": 3.1516669489427676e-05, + "loss": 1.5464, + "step": 350500 + }, + { + "epoch": 1.51, + "learning_rate": 3.151124110287444e-05, + "loss": 1.557, + "step": 350600 + }, + { + "epoch": 1.51, + "learning_rate": 3.150581271632121e-05, + "loss": 1.5432, + "step": 350700 + }, + { + "epoch": 1.51, + "learning_rate": 3.150038432976797e-05, + "loss": 1.5386, + "step": 350800 + }, + { + "epoch": 1.51, + "learning_rate": 3.149495594321474e-05, + "loss": 1.5456, + "step": 350900 + }, + { + "epoch": 1.51, + "learning_rate": 3.14895275566615e-05, + "loss": 1.538, + "step": 351000 + }, + { + "epoch": 1.51, + "learning_rate": 3.148409917010827e-05, + "loss": 1.5359, + "step": 351100 + }, + { + "epoch": 1.51, + "learning_rate": 3.147867078355503e-05, + "loss": 1.5138, + "step": 351200 + }, + { + "epoch": 1.51, + "learning_rate": 3.14732423970018e-05, + "loss": 1.5448, + "step": 351300 + }, + { + "epoch": 1.51, + "learning_rate": 3.146781401044856e-05, + "loss": 1.536, + "step": 351400 + }, + { + "epoch": 1.51, + "learning_rate": 3.146238562389532e-05, + "loss": 1.5433, + "step": 351500 + }, + { + "epoch": 1.51, + "learning_rate": 3.145695723734209e-05, + "loss": 1.5384, + "step": 351600 + }, + { + "epoch": 1.51, + "learning_rate": 3.145152885078885e-05, + "loss": 1.5072, + "step": 351700 + }, + { + "epoch": 1.51, + "learning_rate": 3.1446100464235614e-05, + "loss": 1.5633, + "step": 351800 + }, + { + "epoch": 1.51, + "learning_rate": 3.144067207768239e-05, + "loss": 1.5238, + "step": 351900 + }, + { + "epoch": 1.51, + "learning_rate": 3.143524369112915e-05, + "loss": 1.5614, + "step": 352000 + }, + { + "epoch": 1.51, + "learning_rate": 3.1429815304575914e-05, + "loss": 1.5744, + "step": 352100 + }, + { + "epoch": 1.51, + "learning_rate": 3.142438691802268e-05, + "loss": 1.5098, + "step": 352200 + }, + { + "epoch": 1.51, + "learning_rate": 3.1418958531469444e-05, + "loss": 1.5568, + "step": 352300 + }, + { + "epoch": 1.51, + "learning_rate": 3.1413530144916206e-05, + "loss": 1.5406, + "step": 352400 + }, + { + "epoch": 1.51, + "learning_rate": 3.1408101758362975e-05, + "loss": 1.5192, + "step": 352500 + }, + { + "epoch": 1.51, + "learning_rate": 3.1402673371809737e-05, + "loss": 1.5363, + "step": 352600 + }, + { + "epoch": 1.52, + "learning_rate": 3.1397244985256505e-05, + "loss": 1.5626, + "step": 352700 + }, + { + "epoch": 1.52, + "learning_rate": 3.139181659870327e-05, + "loss": 1.5644, + "step": 352800 + }, + { + "epoch": 1.52, + "learning_rate": 3.138638821215003e-05, + "loss": 1.5126, + "step": 352900 + }, + { + "epoch": 1.52, + "learning_rate": 3.13809598255968e-05, + "loss": 1.5045, + "step": 353000 + }, + { + "epoch": 1.52, + "learning_rate": 3.1375531439043566e-05, + "loss": 1.5488, + "step": 353100 + }, + { + "epoch": 1.52, + "learning_rate": 3.137010305249033e-05, + "loss": 1.5111, + "step": 353200 + }, + { + "epoch": 1.52, + "learning_rate": 3.13646746659371e-05, + "loss": 1.5539, + "step": 353300 + }, + { + "epoch": 1.52, + "learning_rate": 3.135924627938386e-05, + "loss": 1.5193, + "step": 353400 + }, + { + "epoch": 1.52, + "learning_rate": 3.135381789283062e-05, + "loss": 1.5483, + "step": 353500 + }, + { + "epoch": 1.52, + "learning_rate": 3.134838950627739e-05, + "loss": 1.5216, + "step": 353600 + }, + { + "epoch": 1.52, + "learning_rate": 3.134296111972415e-05, + "loss": 1.5613, + "step": 353700 + }, + { + "epoch": 1.52, + "learning_rate": 3.133753273317091e-05, + "loss": 1.5414, + "step": 353800 + }, + { + "epoch": 1.52, + "learning_rate": 3.133210434661768e-05, + "loss": 1.5307, + "step": 353900 + }, + { + "epoch": 1.52, + "learning_rate": 3.132667596006445e-05, + "loss": 1.5392, + "step": 354000 + }, + { + "epoch": 1.52, + "learning_rate": 3.132124757351121e-05, + "loss": 1.5457, + "step": 354100 + }, + { + "epoch": 1.52, + "learning_rate": 3.131581918695798e-05, + "loss": 1.539, + "step": 354200 + }, + { + "epoch": 1.52, + "learning_rate": 3.131039080040474e-05, + "loss": 1.5216, + "step": 354300 + }, + { + "epoch": 1.52, + "learning_rate": 3.1304962413851504e-05, + "loss": 1.5752, + "step": 354400 + }, + { + "epoch": 1.52, + "learning_rate": 3.129953402729827e-05, + "loss": 1.5301, + "step": 354500 + }, + { + "epoch": 1.52, + "learning_rate": 3.1294105640745035e-05, + "loss": 1.5457, + "step": 354600 + }, + { + "epoch": 1.52, + "learning_rate": 3.1288677254191804e-05, + "loss": 1.5437, + "step": 354700 + }, + { + "epoch": 1.52, + "learning_rate": 3.1283248867638566e-05, + "loss": 1.5305, + "step": 354800 + }, + { + "epoch": 1.52, + "learning_rate": 3.127782048108533e-05, + "loss": 1.5045, + "step": 354900 + }, + { + "epoch": 1.53, + "learning_rate": 3.1272392094532096e-05, + "loss": 1.5433, + "step": 355000 + }, + { + "epoch": 1.53, + "eval_loss": 1.4460771083831787, + "eval_runtime": 17.7968, + "eval_samples_per_second": 561.9, + "eval_steps_per_second": 17.587, + "step": 355000 + }, + { + "epoch": 1.53, + "learning_rate": 3.1266963707978865e-05, + "loss": 1.5781, + "step": 355100 + }, + { + "epoch": 1.53, + "learning_rate": 3.1261535321425627e-05, + "loss": 1.5645, + "step": 355200 + }, + { + "epoch": 1.53, + "learning_rate": 3.1256106934872395e-05, + "loss": 1.5349, + "step": 355300 + }, + { + "epoch": 1.53, + "learning_rate": 3.125067854831916e-05, + "loss": 1.5459, + "step": 355400 + }, + { + "epoch": 1.53, + "learning_rate": 3.124525016176592e-05, + "loss": 1.5356, + "step": 355500 + }, + { + "epoch": 1.53, + "learning_rate": 3.123982177521269e-05, + "loss": 1.556, + "step": 355600 + }, + { + "epoch": 1.53, + "learning_rate": 3.123439338865945e-05, + "loss": 1.5376, + "step": 355700 + }, + { + "epoch": 1.53, + "learning_rate": 3.122896500210621e-05, + "loss": 1.5308, + "step": 355800 + }, + { + "epoch": 1.53, + "learning_rate": 3.122353661555298e-05, + "loss": 1.5548, + "step": 355900 + }, + { + "epoch": 1.53, + "learning_rate": 3.121810822899974e-05, + "loss": 1.5357, + "step": 356000 + }, + { + "epoch": 1.53, + "learning_rate": 3.121267984244651e-05, + "loss": 1.5202, + "step": 356100 + }, + { + "epoch": 1.53, + "learning_rate": 3.120725145589328e-05, + "loss": 1.5478, + "step": 356200 + }, + { + "epoch": 1.53, + "learning_rate": 3.120182306934004e-05, + "loss": 1.5502, + "step": 356300 + }, + { + "epoch": 1.53, + "learning_rate": 3.11963946827868e-05, + "loss": 1.5432, + "step": 356400 + }, + { + "epoch": 1.53, + "learning_rate": 3.119096629623357e-05, + "loss": 1.5386, + "step": 356500 + }, + { + "epoch": 1.53, + "learning_rate": 3.1185537909680333e-05, + "loss": 1.5578, + "step": 356600 + }, + { + "epoch": 1.53, + "learning_rate": 3.11801095231271e-05, + "loss": 1.5365, + "step": 356700 + }, + { + "epoch": 1.53, + "learning_rate": 3.1174681136573864e-05, + "loss": 1.5718, + "step": 356800 + }, + { + "epoch": 1.53, + "learning_rate": 3.1169252750020626e-05, + "loss": 1.5305, + "step": 356900 + }, + { + "epoch": 1.53, + "learning_rate": 3.1163824363467395e-05, + "loss": 1.5333, + "step": 357000 + }, + { + "epoch": 1.53, + "learning_rate": 3.1158395976914156e-05, + "loss": 1.5484, + "step": 357100 + }, + { + "epoch": 1.53, + "learning_rate": 3.1152967590360925e-05, + "loss": 1.5521, + "step": 357200 + }, + { + "epoch": 1.53, + "learning_rate": 3.1147539203807694e-05, + "loss": 1.5207, + "step": 357300 + }, + { + "epoch": 1.54, + "learning_rate": 3.1142110817254456e-05, + "loss": 1.5525, + "step": 357400 + }, + { + "epoch": 1.54, + "learning_rate": 3.113668243070122e-05, + "loss": 1.5497, + "step": 357500 + }, + { + "epoch": 1.54, + "learning_rate": 3.1131254044147986e-05, + "loss": 1.5305, + "step": 357600 + }, + { + "epoch": 1.54, + "learning_rate": 3.112582565759475e-05, + "loss": 1.5587, + "step": 357700 + }, + { + "epoch": 1.54, + "learning_rate": 3.112039727104151e-05, + "loss": 1.5261, + "step": 357800 + }, + { + "epoch": 1.54, + "learning_rate": 3.111496888448828e-05, + "loss": 1.5192, + "step": 357900 + }, + { + "epoch": 1.54, + "learning_rate": 3.110954049793504e-05, + "loss": 1.5383, + "step": 358000 + }, + { + "epoch": 1.54, + "learning_rate": 3.110411211138181e-05, + "loss": 1.5502, + "step": 358100 + }, + { + "epoch": 1.54, + "learning_rate": 3.109868372482858e-05, + "loss": 1.5433, + "step": 358200 + }, + { + "epoch": 1.54, + "learning_rate": 3.109325533827534e-05, + "loss": 1.5171, + "step": 358300 + }, + { + "epoch": 1.54, + "learning_rate": 3.10878269517221e-05, + "loss": 1.5241, + "step": 358400 + }, + { + "epoch": 1.54, + "learning_rate": 3.108239856516887e-05, + "loss": 1.5281, + "step": 358500 + }, + { + "epoch": 1.54, + "learning_rate": 3.107697017861563e-05, + "loss": 1.5371, + "step": 358600 + }, + { + "epoch": 1.54, + "learning_rate": 3.10715417920624e-05, + "loss": 1.5413, + "step": 358700 + }, + { + "epoch": 1.54, + "learning_rate": 3.106611340550916e-05, + "loss": 1.509, + "step": 358800 + }, + { + "epoch": 1.54, + "learning_rate": 3.1060685018955924e-05, + "loss": 1.5464, + "step": 358900 + }, + { + "epoch": 1.54, + "learning_rate": 3.105525663240269e-05, + "loss": 1.5556, + "step": 359000 + }, + { + "epoch": 1.54, + "learning_rate": 3.1049828245849455e-05, + "loss": 1.5318, + "step": 359100 + }, + { + "epoch": 1.54, + "learning_rate": 3.104439985929622e-05, + "loss": 1.5198, + "step": 359200 + }, + { + "epoch": 1.54, + "learning_rate": 3.103897147274299e-05, + "loss": 1.529, + "step": 359300 + }, + { + "epoch": 1.54, + "learning_rate": 3.1033543086189754e-05, + "loss": 1.5501, + "step": 359400 + }, + { + "epoch": 1.54, + "learning_rate": 3.1028114699636516e-05, + "loss": 1.5562, + "step": 359500 + }, + { + "epoch": 1.54, + "learning_rate": 3.1022686313083285e-05, + "loss": 1.5365, + "step": 359600 + }, + { + "epoch": 1.55, + "learning_rate": 3.1017257926530046e-05, + "loss": 1.5487, + "step": 359700 + }, + { + "epoch": 1.55, + "learning_rate": 3.101182953997681e-05, + "loss": 1.5359, + "step": 359800 + }, + { + "epoch": 1.55, + "learning_rate": 3.100640115342358e-05, + "loss": 1.5248, + "step": 359900 + }, + { + "epoch": 1.55, + "learning_rate": 3.100097276687034e-05, + "loss": 1.5636, + "step": 360000 + }, + { + "epoch": 1.55, + "eval_loss": 1.4448529481887817, + "eval_runtime": 17.7635, + "eval_samples_per_second": 562.952, + "eval_steps_per_second": 17.62, + "step": 360000 + }, + { + "epoch": 1.55, + "learning_rate": 3.099554438031711e-05, + "loss": 1.5315, + "step": 360100 + }, + { + "epoch": 1.55, + "learning_rate": 3.099011599376387e-05, + "loss": 1.5473, + "step": 360200 + }, + { + "epoch": 1.55, + "learning_rate": 3.098468760721064e-05, + "loss": 1.5161, + "step": 360300 + }, + { + "epoch": 1.55, + "learning_rate": 3.09792592206574e-05, + "loss": 1.5373, + "step": 360400 + }, + { + "epoch": 1.55, + "learning_rate": 3.097383083410417e-05, + "loss": 1.5096, + "step": 360500 + }, + { + "epoch": 1.55, + "learning_rate": 3.096840244755093e-05, + "loss": 1.5239, + "step": 360600 + }, + { + "epoch": 1.55, + "learning_rate": 3.09629740609977e-05, + "loss": 1.563, + "step": 360700 + }, + { + "epoch": 1.55, + "learning_rate": 3.095754567444446e-05, + "loss": 1.5418, + "step": 360800 + }, + { + "epoch": 1.55, + "learning_rate": 3.095211728789122e-05, + "loss": 1.5406, + "step": 360900 + }, + { + "epoch": 1.55, + "learning_rate": 3.094668890133799e-05, + "loss": 1.5123, + "step": 361000 + }, + { + "epoch": 1.55, + "learning_rate": 3.0941260514784753e-05, + "loss": 1.5302, + "step": 361100 + }, + { + "epoch": 1.55, + "learning_rate": 3.0935832128231515e-05, + "loss": 1.5363, + "step": 361200 + }, + { + "epoch": 1.55, + "learning_rate": 3.0930403741678284e-05, + "loss": 1.5542, + "step": 361300 + }, + { + "epoch": 1.55, + "learning_rate": 3.092497535512505e-05, + "loss": 1.5293, + "step": 361400 + }, + { + "epoch": 1.55, + "learning_rate": 3.0919546968571814e-05, + "loss": 1.5282, + "step": 361500 + }, + { + "epoch": 1.55, + "learning_rate": 3.091411858201858e-05, + "loss": 1.5087, + "step": 361600 + }, + { + "epoch": 1.55, + "learning_rate": 3.0908690195465345e-05, + "loss": 1.5276, + "step": 361700 + }, + { + "epoch": 1.55, + "learning_rate": 3.090326180891211e-05, + "loss": 1.5427, + "step": 361800 + }, + { + "epoch": 1.55, + "learning_rate": 3.0897833422358876e-05, + "loss": 1.5427, + "step": 361900 + }, + { + "epoch": 1.56, + "learning_rate": 3.089240503580564e-05, + "loss": 1.5539, + "step": 362000 + }, + { + "epoch": 1.56, + "learning_rate": 3.0886976649252406e-05, + "loss": 1.539, + "step": 362100 + }, + { + "epoch": 1.56, + "learning_rate": 3.088154826269917e-05, + "loss": 1.5205, + "step": 362200 + }, + { + "epoch": 1.56, + "learning_rate": 3.087611987614593e-05, + "loss": 1.549, + "step": 362300 + }, + { + "epoch": 1.56, + "learning_rate": 3.08706914895927e-05, + "loss": 1.5274, + "step": 362400 + }, + { + "epoch": 1.56, + "learning_rate": 3.086526310303947e-05, + "loss": 1.5495, + "step": 362500 + }, + { + "epoch": 1.56, + "learning_rate": 3.085983471648623e-05, + "loss": 1.5387, + "step": 362600 + }, + { + "epoch": 1.56, + "learning_rate": 3.0854406329933e-05, + "loss": 1.5476, + "step": 362700 + }, + { + "epoch": 1.56, + "learning_rate": 3.084897794337976e-05, + "loss": 1.5327, + "step": 362800 + }, + { + "epoch": 1.56, + "learning_rate": 3.084354955682652e-05, + "loss": 1.5387, + "step": 362900 + }, + { + "epoch": 1.56, + "learning_rate": 3.083812117027329e-05, + "loss": 1.5682, + "step": 363000 + }, + { + "epoch": 1.56, + "learning_rate": 3.083269278372005e-05, + "loss": 1.5104, + "step": 363100 + }, + { + "epoch": 1.56, + "learning_rate": 3.0827264397166814e-05, + "loss": 1.5487, + "step": 363200 + }, + { + "epoch": 1.56, + "learning_rate": 3.082183601061358e-05, + "loss": 1.534, + "step": 363300 + }, + { + "epoch": 1.56, + "learning_rate": 3.081640762406035e-05, + "loss": 1.5264, + "step": 363400 + }, + { + "epoch": 1.56, + "learning_rate": 3.081097923750711e-05, + "loss": 1.5203, + "step": 363500 + }, + { + "epoch": 1.56, + "learning_rate": 3.080555085095388e-05, + "loss": 1.5561, + "step": 363600 + }, + { + "epoch": 1.56, + "learning_rate": 3.0800122464400643e-05, + "loss": 1.5083, + "step": 363700 + }, + { + "epoch": 1.56, + "learning_rate": 3.0794694077847405e-05, + "loss": 1.5377, + "step": 363800 + }, + { + "epoch": 1.56, + "learning_rate": 3.0789265691294174e-05, + "loss": 1.5362, + "step": 363900 + }, + { + "epoch": 1.56, + "learning_rate": 3.0783837304740936e-05, + "loss": 1.548, + "step": 364000 + }, + { + "epoch": 1.56, + "learning_rate": 3.07784089181877e-05, + "loss": 1.5398, + "step": 364100 + }, + { + "epoch": 1.56, + "learning_rate": 3.0772980531634466e-05, + "loss": 1.5074, + "step": 364200 + }, + { + "epoch": 1.57, + "learning_rate": 3.076755214508123e-05, + "loss": 1.5492, + "step": 364300 + }, + { + "epoch": 1.57, + "learning_rate": 3.0762123758528e-05, + "loss": 1.5581, + "step": 364400 + }, + { + "epoch": 1.57, + "learning_rate": 3.0756695371974766e-05, + "loss": 1.5623, + "step": 364500 + }, + { + "epoch": 1.57, + "learning_rate": 3.075126698542153e-05, + "loss": 1.5235, + "step": 364600 + }, + { + "epoch": 1.57, + "learning_rate": 3.0745838598868296e-05, + "loss": 1.5464, + "step": 364700 + }, + { + "epoch": 1.57, + "learning_rate": 3.074041021231506e-05, + "loss": 1.5277, + "step": 364800 + }, + { + "epoch": 1.57, + "learning_rate": 3.073498182576182e-05, + "loss": 1.5038, + "step": 364900 + }, + { + "epoch": 1.57, + "learning_rate": 3.072955343920859e-05, + "loss": 1.5474, + "step": 365000 + }, + { + "epoch": 1.57, + "eval_loss": 1.447064995765686, + "eval_runtime": 17.8278, + "eval_samples_per_second": 560.922, + "eval_steps_per_second": 17.557, + "step": 365000 + }, + { + "epoch": 1.57, + "learning_rate": 3.072412505265535e-05, + "loss": 1.5234, + "step": 365100 + }, + { + "epoch": 1.57, + "learning_rate": 3.071869666610211e-05, + "loss": 1.5393, + "step": 365200 + }, + { + "epoch": 1.57, + "learning_rate": 3.071326827954888e-05, + "loss": 1.5358, + "step": 365300 + }, + { + "epoch": 1.57, + "learning_rate": 3.070783989299564e-05, + "loss": 1.5163, + "step": 365400 + }, + { + "epoch": 1.57, + "learning_rate": 3.070241150644241e-05, + "loss": 1.5246, + "step": 365500 + }, + { + "epoch": 1.57, + "learning_rate": 3.069698311988918e-05, + "loss": 1.5233, + "step": 365600 + }, + { + "epoch": 1.57, + "learning_rate": 3.069155473333594e-05, + "loss": 1.5309, + "step": 365700 + }, + { + "epoch": 1.57, + "learning_rate": 3.0686126346782704e-05, + "loss": 1.5457, + "step": 365800 + }, + { + "epoch": 1.57, + "learning_rate": 3.068069796022947e-05, + "loss": 1.5595, + "step": 365900 + }, + { + "epoch": 1.57, + "learning_rate": 3.0675269573676234e-05, + "loss": 1.5376, + "step": 366000 + }, + { + "epoch": 1.57, + "learning_rate": 3.0669841187122996e-05, + "loss": 1.548, + "step": 366100 + }, + { + "epoch": 1.57, + "learning_rate": 3.0664412800569765e-05, + "loss": 1.5257, + "step": 366200 + }, + { + "epoch": 1.57, + "learning_rate": 3.065898441401653e-05, + "loss": 1.5412, + "step": 366300 + }, + { + "epoch": 1.57, + "learning_rate": 3.0653556027463295e-05, + "loss": 1.5206, + "step": 366400 + }, + { + "epoch": 1.57, + "learning_rate": 3.064812764091006e-05, + "loss": 1.5396, + "step": 366500 + }, + { + "epoch": 1.57, + "learning_rate": 3.0642699254356826e-05, + "loss": 1.5389, + "step": 366600 + }, + { + "epoch": 1.58, + "learning_rate": 3.0637270867803595e-05, + "loss": 1.5348, + "step": 366700 + }, + { + "epoch": 1.58, + "learning_rate": 3.0631842481250356e-05, + "loss": 1.5322, + "step": 366800 + }, + { + "epoch": 1.58, + "learning_rate": 3.062641409469712e-05, + "loss": 1.5203, + "step": 366900 + }, + { + "epoch": 1.58, + "learning_rate": 3.062098570814389e-05, + "loss": 1.573, + "step": 367000 + }, + { + "epoch": 1.58, + "learning_rate": 3.061555732159065e-05, + "loss": 1.53, + "step": 367100 + }, + { + "epoch": 1.58, + "learning_rate": 3.061012893503741e-05, + "loss": 1.5678, + "step": 367200 + }, + { + "epoch": 1.58, + "learning_rate": 3.060470054848418e-05, + "loss": 1.5372, + "step": 367300 + }, + { + "epoch": 1.58, + "learning_rate": 3.059927216193094e-05, + "loss": 1.5358, + "step": 367400 + }, + { + "epoch": 1.58, + "learning_rate": 3.05938437753777e-05, + "loss": 1.5054, + "step": 367500 + }, + { + "epoch": 1.58, + "learning_rate": 3.058841538882448e-05, + "loss": 1.5148, + "step": 367600 + }, + { + "epoch": 1.58, + "learning_rate": 3.058298700227124e-05, + "loss": 1.5508, + "step": 367700 + }, + { + "epoch": 1.58, + "learning_rate": 3.0577558615718e-05, + "loss": 1.5353, + "step": 367800 + }, + { + "epoch": 1.58, + "learning_rate": 3.057213022916477e-05, + "loss": 1.5434, + "step": 367900 + }, + { + "epoch": 1.58, + "learning_rate": 3.056670184261153e-05, + "loss": 1.5367, + "step": 368000 + }, + { + "epoch": 1.58, + "learning_rate": 3.0561273456058295e-05, + "loss": 1.5396, + "step": 368100 + }, + { + "epoch": 1.58, + "learning_rate": 3.055584506950506e-05, + "loss": 1.51, + "step": 368200 + }, + { + "epoch": 1.58, + "learning_rate": 3.0550416682951825e-05, + "loss": 1.5025, + "step": 368300 + }, + { + "epoch": 1.58, + "learning_rate": 3.0544988296398594e-05, + "loss": 1.5592, + "step": 368400 + }, + { + "epoch": 1.58, + "learning_rate": 3.0539559909845356e-05, + "loss": 1.5472, + "step": 368500 + }, + { + "epoch": 1.58, + "learning_rate": 3.053413152329212e-05, + "loss": 1.5681, + "step": 368600 + }, + { + "epoch": 1.58, + "learning_rate": 3.052870313673889e-05, + "loss": 1.5353, + "step": 368700 + }, + { + "epoch": 1.58, + "learning_rate": 3.0523274750185655e-05, + "loss": 1.5521, + "step": 368800 + }, + { + "epoch": 1.58, + "learning_rate": 3.051784636363242e-05, + "loss": 1.5408, + "step": 368900 + }, + { + "epoch": 1.59, + "learning_rate": 3.0512417977079182e-05, + "loss": 1.5242, + "step": 369000 + }, + { + "epoch": 1.59, + "learning_rate": 3.0506989590525947e-05, + "loss": 1.5627, + "step": 369100 + }, + { + "epoch": 1.59, + "learning_rate": 3.0501561203972713e-05, + "loss": 1.5177, + "step": 369200 + }, + { + "epoch": 1.59, + "learning_rate": 3.0496132817419474e-05, + "loss": 1.5447, + "step": 369300 + }, + { + "epoch": 1.59, + "learning_rate": 3.049070443086624e-05, + "loss": 1.5235, + "step": 369400 + }, + { + "epoch": 1.59, + "learning_rate": 3.0485276044313005e-05, + "loss": 1.5045, + "step": 369500 + }, + { + "epoch": 1.59, + "learning_rate": 3.047984765775977e-05, + "loss": 1.5419, + "step": 369600 + }, + { + "epoch": 1.59, + "learning_rate": 3.047441927120654e-05, + "loss": 1.514, + "step": 369700 + }, + { + "epoch": 1.59, + "learning_rate": 3.0468990884653304e-05, + "loss": 1.5397, + "step": 369800 + }, + { + "epoch": 1.59, + "learning_rate": 3.046356249810007e-05, + "loss": 1.5312, + "step": 369900 + }, + { + "epoch": 1.59, + "learning_rate": 3.045813411154683e-05, + "loss": 1.547, + "step": 370000 + }, + { + "epoch": 1.59, + "eval_loss": 1.443129301071167, + "eval_runtime": 17.8114, + "eval_samples_per_second": 561.438, + "eval_steps_per_second": 17.573, + "step": 370000 + }, + { + "epoch": 1.59, + "learning_rate": 3.0452705724993597e-05, + "loss": 1.5362, + "step": 370100 + }, + { + "epoch": 1.59, + "learning_rate": 3.0447277338440362e-05, + "loss": 1.5216, + "step": 370200 + }, + { + "epoch": 1.59, + "learning_rate": 3.0441848951887124e-05, + "loss": 1.542, + "step": 370300 + }, + { + "epoch": 1.59, + "learning_rate": 3.043642056533389e-05, + "loss": 1.5317, + "step": 370400 + }, + { + "epoch": 1.59, + "learning_rate": 3.0430992178780654e-05, + "loss": 1.5284, + "step": 370500 + }, + { + "epoch": 1.59, + "learning_rate": 3.042556379222742e-05, + "loss": 1.527, + "step": 370600 + }, + { + "epoch": 1.59, + "learning_rate": 3.042013540567418e-05, + "loss": 1.5274, + "step": 370700 + }, + { + "epoch": 1.59, + "learning_rate": 3.0414707019120953e-05, + "loss": 1.5469, + "step": 370800 + }, + { + "epoch": 1.59, + "learning_rate": 3.040927863256772e-05, + "loss": 1.5412, + "step": 370900 + }, + { + "epoch": 1.59, + "learning_rate": 3.040385024601448e-05, + "loss": 1.5134, + "step": 371000 + }, + { + "epoch": 1.59, + "learning_rate": 3.0398421859461246e-05, + "loss": 1.4993, + "step": 371100 + }, + { + "epoch": 1.59, + "learning_rate": 3.039299347290801e-05, + "loss": 1.5266, + "step": 371200 + }, + { + "epoch": 1.6, + "learning_rate": 3.0387565086354773e-05, + "loss": 1.5317, + "step": 371300 + }, + { + "epoch": 1.6, + "learning_rate": 3.0382136699801538e-05, + "loss": 1.5536, + "step": 371400 + }, + { + "epoch": 1.6, + "learning_rate": 3.0376708313248304e-05, + "loss": 1.5476, + "step": 371500 + }, + { + "epoch": 1.6, + "learning_rate": 3.037127992669507e-05, + "loss": 1.566, + "step": 371600 + }, + { + "epoch": 1.6, + "learning_rate": 3.036585154014183e-05, + "loss": 1.5351, + "step": 371700 + }, + { + "epoch": 1.6, + "learning_rate": 3.0360423153588603e-05, + "loss": 1.5477, + "step": 371800 + }, + { + "epoch": 1.6, + "learning_rate": 3.0354994767035368e-05, + "loss": 1.5356, + "step": 371900 + }, + { + "epoch": 1.6, + "learning_rate": 3.034956638048213e-05, + "loss": 1.5352, + "step": 372000 + }, + { + "epoch": 1.6, + "learning_rate": 3.0344137993928895e-05, + "loss": 1.5359, + "step": 372100 + }, + { + "epoch": 1.6, + "learning_rate": 3.033870960737566e-05, + "loss": 1.536, + "step": 372200 + }, + { + "epoch": 1.6, + "learning_rate": 3.0333281220822422e-05, + "loss": 1.5461, + "step": 372300 + }, + { + "epoch": 1.6, + "learning_rate": 3.0327852834269187e-05, + "loss": 1.5335, + "step": 372400 + }, + { + "epoch": 1.6, + "learning_rate": 3.0322424447715953e-05, + "loss": 1.5377, + "step": 372500 + }, + { + "epoch": 1.6, + "learning_rate": 3.0316996061162718e-05, + "loss": 1.5455, + "step": 372600 + }, + { + "epoch": 1.6, + "learning_rate": 3.031156767460948e-05, + "loss": 1.5413, + "step": 372700 + }, + { + "epoch": 1.6, + "learning_rate": 3.0306139288056245e-05, + "loss": 1.5349, + "step": 372800 + }, + { + "epoch": 1.6, + "learning_rate": 3.0300710901503017e-05, + "loss": 1.5368, + "step": 372900 + }, + { + "epoch": 1.6, + "learning_rate": 3.029528251494978e-05, + "loss": 1.517, + "step": 373000 + }, + { + "epoch": 1.6, + "learning_rate": 3.0289854128396544e-05, + "loss": 1.5187, + "step": 373100 + }, + { + "epoch": 1.6, + "learning_rate": 3.028442574184331e-05, + "loss": 1.5592, + "step": 373200 + }, + { + "epoch": 1.6, + "learning_rate": 3.027899735529007e-05, + "loss": 1.55, + "step": 373300 + }, + { + "epoch": 1.6, + "learning_rate": 3.0273568968736837e-05, + "loss": 1.5557, + "step": 373400 + }, + { + "epoch": 1.6, + "learning_rate": 3.0268140582183602e-05, + "loss": 1.5562, + "step": 373500 + }, + { + "epoch": 1.61, + "learning_rate": 3.0262712195630367e-05, + "loss": 1.5256, + "step": 373600 + }, + { + "epoch": 1.61, + "learning_rate": 3.025728380907713e-05, + "loss": 1.5164, + "step": 373700 + }, + { + "epoch": 1.61, + "learning_rate": 3.0251855422523894e-05, + "loss": 1.5493, + "step": 373800 + }, + { + "epoch": 1.61, + "learning_rate": 3.0246427035970663e-05, + "loss": 1.4844, + "step": 373900 + }, + { + "epoch": 1.61, + "learning_rate": 3.024099864941743e-05, + "loss": 1.527, + "step": 374000 + }, + { + "epoch": 1.61, + "learning_rate": 3.0235570262864194e-05, + "loss": 1.5316, + "step": 374100 + }, + { + "epoch": 1.61, + "learning_rate": 3.023014187631096e-05, + "loss": 1.5353, + "step": 374200 + }, + { + "epoch": 1.61, + "learning_rate": 3.022471348975772e-05, + "loss": 1.5089, + "step": 374300 + }, + { + "epoch": 1.61, + "learning_rate": 3.0219285103204486e-05, + "loss": 1.5295, + "step": 374400 + }, + { + "epoch": 1.61, + "learning_rate": 3.021385671665125e-05, + "loss": 1.5455, + "step": 374500 + }, + { + "epoch": 1.61, + "learning_rate": 3.0208428330098016e-05, + "loss": 1.5158, + "step": 374600 + }, + { + "epoch": 1.61, + "learning_rate": 3.020299994354478e-05, + "loss": 1.5624, + "step": 374700 + }, + { + "epoch": 1.61, + "learning_rate": 3.0197571556991544e-05, + "loss": 1.5486, + "step": 374800 + }, + { + "epoch": 1.61, + "learning_rate": 3.019214317043831e-05, + "loss": 1.5279, + "step": 374900 + }, + { + "epoch": 1.61, + "learning_rate": 3.0186714783885078e-05, + "loss": 1.545, + "step": 375000 + }, + { + "epoch": 1.61, + "eval_loss": 1.44380521774292, + "eval_runtime": 17.8282, + "eval_samples_per_second": 560.909, + "eval_steps_per_second": 17.556, + "step": 375000 + }, + { + "epoch": 1.61, + "learning_rate": 3.0181286397331843e-05, + "loss": 1.5086, + "step": 375100 + }, + { + "epoch": 1.61, + "learning_rate": 3.0175858010778608e-05, + "loss": 1.5125, + "step": 375200 + }, + { + "epoch": 1.61, + "learning_rate": 3.017042962422537e-05, + "loss": 1.5614, + "step": 375300 + }, + { + "epoch": 1.61, + "learning_rate": 3.0165001237672135e-05, + "loss": 1.5253, + "step": 375400 + }, + { + "epoch": 1.61, + "learning_rate": 3.01595728511189e-05, + "loss": 1.5391, + "step": 375500 + }, + { + "epoch": 1.61, + "learning_rate": 3.0154144464565666e-05, + "loss": 1.5503, + "step": 375600 + }, + { + "epoch": 1.61, + "learning_rate": 3.0148716078012428e-05, + "loss": 1.5573, + "step": 375700 + }, + { + "epoch": 1.61, + "learning_rate": 3.0143287691459193e-05, + "loss": 1.5312, + "step": 375800 + }, + { + "epoch": 1.61, + "learning_rate": 3.0137859304905958e-05, + "loss": 1.5472, + "step": 375900 + }, + { + "epoch": 1.62, + "learning_rate": 3.0132430918352727e-05, + "loss": 1.5398, + "step": 376000 + }, + { + "epoch": 1.62, + "learning_rate": 3.0127002531799492e-05, + "loss": 1.5737, + "step": 376100 + }, + { + "epoch": 1.62, + "learning_rate": 3.0121574145246257e-05, + "loss": 1.5449, + "step": 376200 + }, + { + "epoch": 1.62, + "learning_rate": 3.011614575869302e-05, + "loss": 1.5374, + "step": 376300 + }, + { + "epoch": 1.62, + "learning_rate": 3.0110717372139784e-05, + "loss": 1.5009, + "step": 376400 + }, + { + "epoch": 1.62, + "learning_rate": 3.010528898558655e-05, + "loss": 1.5409, + "step": 376500 + }, + { + "epoch": 1.62, + "learning_rate": 3.0099860599033315e-05, + "loss": 1.5352, + "step": 376600 + }, + { + "epoch": 1.62, + "learning_rate": 3.0094432212480077e-05, + "loss": 1.5282, + "step": 376700 + }, + { + "epoch": 1.62, + "learning_rate": 3.0089003825926842e-05, + "loss": 1.5344, + "step": 376800 + }, + { + "epoch": 1.62, + "learning_rate": 3.0083575439373607e-05, + "loss": 1.5274, + "step": 376900 + }, + { + "epoch": 1.62, + "learning_rate": 3.0078147052820373e-05, + "loss": 1.5241, + "step": 377000 + }, + { + "epoch": 1.62, + "learning_rate": 3.007271866626714e-05, + "loss": 1.5125, + "step": 377100 + }, + { + "epoch": 1.62, + "learning_rate": 3.0067290279713907e-05, + "loss": 1.5378, + "step": 377200 + }, + { + "epoch": 1.62, + "learning_rate": 3.006186189316067e-05, + "loss": 1.5027, + "step": 377300 + }, + { + "epoch": 1.62, + "learning_rate": 3.0056433506607434e-05, + "loss": 1.5362, + "step": 377400 + }, + { + "epoch": 1.62, + "learning_rate": 3.00510051200542e-05, + "loss": 1.5468, + "step": 377500 + }, + { + "epoch": 1.62, + "learning_rate": 3.0045576733500964e-05, + "loss": 1.5328, + "step": 377600 + }, + { + "epoch": 1.62, + "learning_rate": 3.0040148346947726e-05, + "loss": 1.553, + "step": 377700 + }, + { + "epoch": 1.62, + "learning_rate": 3.003471996039449e-05, + "loss": 1.5313, + "step": 377800 + }, + { + "epoch": 1.62, + "learning_rate": 3.0029291573841257e-05, + "loss": 1.5494, + "step": 377900 + }, + { + "epoch": 1.62, + "learning_rate": 3.0023863187288022e-05, + "loss": 1.5392, + "step": 378000 + }, + { + "epoch": 1.62, + "learning_rate": 3.001843480073479e-05, + "loss": 1.5492, + "step": 378100 + }, + { + "epoch": 1.62, + "learning_rate": 3.0013006414181556e-05, + "loss": 1.5513, + "step": 378200 + }, + { + "epoch": 1.63, + "learning_rate": 3.0007578027628318e-05, + "loss": 1.5523, + "step": 378300 + }, + { + "epoch": 1.63, + "learning_rate": 3.0002149641075083e-05, + "loss": 1.5254, + "step": 378400 + }, + { + "epoch": 1.63, + "learning_rate": 2.9996721254521848e-05, + "loss": 1.5396, + "step": 378500 + }, + { + "epoch": 1.63, + "learning_rate": 2.9991292867968613e-05, + "loss": 1.5326, + "step": 378600 + }, + { + "epoch": 1.63, + "learning_rate": 2.9985864481415375e-05, + "loss": 1.5232, + "step": 378700 + }, + { + "epoch": 1.63, + "learning_rate": 2.998043609486214e-05, + "loss": 1.5402, + "step": 378800 + }, + { + "epoch": 1.63, + "learning_rate": 2.9975007708308906e-05, + "loss": 1.5314, + "step": 378900 + }, + { + "epoch": 1.63, + "learning_rate": 2.996957932175567e-05, + "loss": 1.5268, + "step": 379000 + }, + { + "epoch": 1.63, + "learning_rate": 2.9964150935202433e-05, + "loss": 1.5378, + "step": 379100 + }, + { + "epoch": 1.63, + "learning_rate": 2.9958722548649205e-05, + "loss": 1.5408, + "step": 379200 + }, + { + "epoch": 1.63, + "learning_rate": 2.9953294162095967e-05, + "loss": 1.504, + "step": 379300 + }, + { + "epoch": 1.63, + "learning_rate": 2.9947865775542732e-05, + "loss": 1.5343, + "step": 379400 + }, + { + "epoch": 1.63, + "learning_rate": 2.9942437388989497e-05, + "loss": 1.5036, + "step": 379500 + }, + { + "epoch": 1.63, + "learning_rate": 2.9937009002436263e-05, + "loss": 1.5604, + "step": 379600 + }, + { + "epoch": 1.63, + "learning_rate": 2.9931580615883025e-05, + "loss": 1.5606, + "step": 379700 + }, + { + "epoch": 1.63, + "learning_rate": 2.992615222932979e-05, + "loss": 1.5496, + "step": 379800 + }, + { + "epoch": 1.63, + "learning_rate": 2.9920723842776555e-05, + "loss": 1.541, + "step": 379900 + }, + { + "epoch": 1.63, + "learning_rate": 2.991529545622332e-05, + "loss": 1.519, + "step": 380000 + }, + { + "epoch": 1.63, + "eval_loss": 1.441465973854065, + "eval_runtime": 17.8531, + "eval_samples_per_second": 560.125, + "eval_steps_per_second": 17.532, + "step": 380000 + }, + { + "epoch": 1.63, + "learning_rate": 2.9909867069670082e-05, + "loss": 1.5429, + "step": 380100 + }, + { + "epoch": 1.63, + "learning_rate": 2.9904438683116854e-05, + "loss": 1.5333, + "step": 380200 + }, + { + "epoch": 1.63, + "learning_rate": 2.9899010296563616e-05, + "loss": 1.5395, + "step": 380300 + }, + { + "epoch": 1.63, + "learning_rate": 2.989358191001038e-05, + "loss": 1.5221, + "step": 380400 + }, + { + "epoch": 1.63, + "learning_rate": 2.9888153523457147e-05, + "loss": 1.5362, + "step": 380500 + }, + { + "epoch": 1.64, + "learning_rate": 2.9882725136903912e-05, + "loss": 1.5406, + "step": 380600 + }, + { + "epoch": 1.64, + "learning_rate": 2.9877296750350674e-05, + "loss": 1.5323, + "step": 380700 + }, + { + "epoch": 1.64, + "learning_rate": 2.987186836379744e-05, + "loss": 1.5644, + "step": 380800 + }, + { + "epoch": 1.64, + "learning_rate": 2.9866439977244204e-05, + "loss": 1.5275, + "step": 380900 + }, + { + "epoch": 1.64, + "learning_rate": 2.986101159069097e-05, + "loss": 1.5485, + "step": 381000 + }, + { + "epoch": 1.64, + "learning_rate": 2.985558320413773e-05, + "loss": 1.5074, + "step": 381100 + }, + { + "epoch": 1.64, + "learning_rate": 2.9850154817584497e-05, + "loss": 1.5068, + "step": 381200 + }, + { + "epoch": 1.64, + "learning_rate": 2.9844726431031265e-05, + "loss": 1.5295, + "step": 381300 + }, + { + "epoch": 1.64, + "learning_rate": 2.983929804447803e-05, + "loss": 1.5248, + "step": 381400 + }, + { + "epoch": 1.64, + "learning_rate": 2.9833869657924796e-05, + "loss": 1.5372, + "step": 381500 + }, + { + "epoch": 1.64, + "learning_rate": 2.982844127137156e-05, + "loss": 1.5467, + "step": 381600 + }, + { + "epoch": 1.64, + "learning_rate": 2.9823012884818323e-05, + "loss": 1.508, + "step": 381700 + }, + { + "epoch": 1.64, + "learning_rate": 2.981758449826509e-05, + "loss": 1.545, + "step": 381800 + }, + { + "epoch": 1.64, + "learning_rate": 2.9812156111711854e-05, + "loss": 1.5113, + "step": 381900 + }, + { + "epoch": 1.64, + "learning_rate": 2.980672772515862e-05, + "loss": 1.5503, + "step": 382000 + }, + { + "epoch": 1.64, + "learning_rate": 2.980129933860538e-05, + "loss": 1.5146, + "step": 382100 + }, + { + "epoch": 1.64, + "learning_rate": 2.9795870952052146e-05, + "loss": 1.5072, + "step": 382200 + }, + { + "epoch": 1.64, + "learning_rate": 2.9790442565498915e-05, + "loss": 1.5394, + "step": 382300 + }, + { + "epoch": 1.64, + "learning_rate": 2.978501417894568e-05, + "loss": 1.5449, + "step": 382400 + }, + { + "epoch": 1.64, + "learning_rate": 2.9779585792392445e-05, + "loss": 1.5293, + "step": 382500 + }, + { + "epoch": 1.64, + "learning_rate": 2.977415740583921e-05, + "loss": 1.5363, + "step": 382600 + }, + { + "epoch": 1.64, + "learning_rate": 2.9768729019285972e-05, + "loss": 1.5216, + "step": 382700 + }, + { + "epoch": 1.64, + "learning_rate": 2.9763300632732738e-05, + "loss": 1.5477, + "step": 382800 + }, + { + "epoch": 1.64, + "learning_rate": 2.9757872246179503e-05, + "loss": 1.5265, + "step": 382900 + }, + { + "epoch": 1.65, + "learning_rate": 2.9752443859626268e-05, + "loss": 1.4951, + "step": 383000 + }, + { + "epoch": 1.65, + "learning_rate": 2.974701547307303e-05, + "loss": 1.5332, + "step": 383100 + }, + { + "epoch": 1.65, + "learning_rate": 2.9741587086519795e-05, + "loss": 1.5358, + "step": 383200 + }, + { + "epoch": 1.65, + "learning_rate": 2.9736158699966564e-05, + "loss": 1.5612, + "step": 383300 + }, + { + "epoch": 1.65, + "learning_rate": 2.973073031341333e-05, + "loss": 1.5337, + "step": 383400 + }, + { + "epoch": 1.65, + "learning_rate": 2.9725301926860094e-05, + "loss": 1.5483, + "step": 383500 + }, + { + "epoch": 1.65, + "learning_rate": 2.971987354030686e-05, + "loss": 1.503, + "step": 383600 + }, + { + "epoch": 1.65, + "learning_rate": 2.971444515375362e-05, + "loss": 1.555, + "step": 383700 + }, + { + "epoch": 1.65, + "learning_rate": 2.9709016767200387e-05, + "loss": 1.5329, + "step": 383800 + }, + { + "epoch": 1.65, + "learning_rate": 2.9703588380647152e-05, + "loss": 1.5388, + "step": 383900 + }, + { + "epoch": 1.65, + "learning_rate": 2.9698159994093917e-05, + "loss": 1.5073, + "step": 384000 + }, + { + "epoch": 1.65, + "learning_rate": 2.969273160754068e-05, + "loss": 1.5039, + "step": 384100 + }, + { + "epoch": 1.65, + "learning_rate": 2.9687303220987444e-05, + "loss": 1.5433, + "step": 384200 + }, + { + "epoch": 1.65, + "learning_rate": 2.968187483443421e-05, + "loss": 1.5517, + "step": 384300 + }, + { + "epoch": 1.65, + "learning_rate": 2.967644644788098e-05, + "loss": 1.5244, + "step": 384400 + }, + { + "epoch": 1.65, + "learning_rate": 2.9671018061327744e-05, + "loss": 1.5426, + "step": 384500 + }, + { + "epoch": 1.65, + "learning_rate": 2.966558967477451e-05, + "loss": 1.5364, + "step": 384600 + }, + { + "epoch": 1.65, + "learning_rate": 2.966016128822127e-05, + "loss": 1.561, + "step": 384700 + }, + { + "epoch": 1.65, + "learning_rate": 2.9654732901668036e-05, + "loss": 1.5231, + "step": 384800 + }, + { + "epoch": 1.65, + "learning_rate": 2.96493045151148e-05, + "loss": 1.5599, + "step": 384900 + }, + { + "epoch": 1.65, + "learning_rate": 2.9643876128561567e-05, + "loss": 1.5445, + "step": 385000 + }, + { + "epoch": 1.65, + "eval_loss": 1.4401415586471558, + "eval_runtime": 17.7618, + "eval_samples_per_second": 563.005, + "eval_steps_per_second": 17.622, + "step": 385000 + }, + { + "epoch": 1.65, + "learning_rate": 2.963844774200833e-05, + "loss": 1.5467, + "step": 385100 + }, + { + "epoch": 1.65, + "learning_rate": 2.9633019355455094e-05, + "loss": 1.5363, + "step": 385200 + }, + { + "epoch": 1.66, + "learning_rate": 2.962759096890186e-05, + "loss": 1.5448, + "step": 385300 + }, + { + "epoch": 1.66, + "learning_rate": 2.9622162582348628e-05, + "loss": 1.5497, + "step": 385400 + }, + { + "epoch": 1.66, + "learning_rate": 2.9616734195795393e-05, + "loss": 1.5068, + "step": 385500 + }, + { + "epoch": 1.66, + "learning_rate": 2.9611305809242158e-05, + "loss": 1.5253, + "step": 385600 + }, + { + "epoch": 1.66, + "learning_rate": 2.960587742268892e-05, + "loss": 1.5221, + "step": 385700 + }, + { + "epoch": 1.66, + "learning_rate": 2.9600449036135685e-05, + "loss": 1.5336, + "step": 385800 + }, + { + "epoch": 1.66, + "learning_rate": 2.959502064958245e-05, + "loss": 1.5336, + "step": 385900 + }, + { + "epoch": 1.66, + "learning_rate": 2.9589592263029216e-05, + "loss": 1.5239, + "step": 386000 + }, + { + "epoch": 1.66, + "learning_rate": 2.9584163876475978e-05, + "loss": 1.5261, + "step": 386100 + }, + { + "epoch": 1.66, + "learning_rate": 2.9578735489922743e-05, + "loss": 1.5146, + "step": 386200 + }, + { + "epoch": 1.66, + "learning_rate": 2.9573307103369508e-05, + "loss": 1.5527, + "step": 386300 + }, + { + "epoch": 1.66, + "learning_rate": 2.9567878716816274e-05, + "loss": 1.5197, + "step": 386400 + }, + { + "epoch": 1.66, + "learning_rate": 2.9562450330263042e-05, + "loss": 1.5463, + "step": 386500 + }, + { + "epoch": 1.66, + "learning_rate": 2.9557021943709807e-05, + "loss": 1.5406, + "step": 386600 + }, + { + "epoch": 1.66, + "learning_rate": 2.955159355715657e-05, + "loss": 1.536, + "step": 386700 + }, + { + "epoch": 1.66, + "learning_rate": 2.9546165170603335e-05, + "loss": 1.5257, + "step": 386800 + }, + { + "epoch": 1.66, + "learning_rate": 2.95407367840501e-05, + "loss": 1.5449, + "step": 386900 + }, + { + "epoch": 1.66, + "learning_rate": 2.9535308397496865e-05, + "loss": 1.5352, + "step": 387000 + }, + { + "epoch": 1.66, + "learning_rate": 2.9529880010943627e-05, + "loss": 1.5447, + "step": 387100 + }, + { + "epoch": 1.66, + "learning_rate": 2.9524451624390392e-05, + "loss": 1.5273, + "step": 387200 + }, + { + "epoch": 1.66, + "learning_rate": 2.9519023237837157e-05, + "loss": 1.515, + "step": 387300 + }, + { + "epoch": 1.66, + "learning_rate": 2.9513594851283923e-05, + "loss": 1.5312, + "step": 387400 + }, + { + "epoch": 1.66, + "learning_rate": 2.950816646473069e-05, + "loss": 1.5419, + "step": 387500 + }, + { + "epoch": 1.67, + "learning_rate": 2.9502738078177457e-05, + "loss": 1.5344, + "step": 387600 + }, + { + "epoch": 1.67, + "learning_rate": 2.949730969162422e-05, + "loss": 1.5083, + "step": 387700 + }, + { + "epoch": 1.67, + "learning_rate": 2.9491881305070984e-05, + "loss": 1.5281, + "step": 387800 + }, + { + "epoch": 1.67, + "learning_rate": 2.948645291851775e-05, + "loss": 1.5292, + "step": 387900 + }, + { + "epoch": 1.67, + "learning_rate": 2.9481024531964514e-05, + "loss": 1.5412, + "step": 388000 + }, + { + "epoch": 1.67, + "learning_rate": 2.9475596145411276e-05, + "loss": 1.5267, + "step": 388100 + }, + { + "epoch": 1.67, + "learning_rate": 2.947016775885804e-05, + "loss": 1.5315, + "step": 388200 + }, + { + "epoch": 1.67, + "learning_rate": 2.9464739372304807e-05, + "loss": 1.5206, + "step": 388300 + }, + { + "epoch": 1.67, + "learning_rate": 2.9459310985751572e-05, + "loss": 1.5419, + "step": 388400 + }, + { + "epoch": 1.67, + "learning_rate": 2.9453882599198334e-05, + "loss": 1.5717, + "step": 388500 + }, + { + "epoch": 1.67, + "learning_rate": 2.9448454212645106e-05, + "loss": 1.5388, + "step": 388600 + }, + { + "epoch": 1.67, + "learning_rate": 2.9443025826091868e-05, + "loss": 1.5255, + "step": 388700 + }, + { + "epoch": 1.67, + "learning_rate": 2.9437597439538633e-05, + "loss": 1.5424, + "step": 388800 + }, + { + "epoch": 1.67, + "learning_rate": 2.94321690529854e-05, + "loss": 1.5145, + "step": 388900 + }, + { + "epoch": 1.67, + "learning_rate": 2.9426740666432164e-05, + "loss": 1.5352, + "step": 389000 + }, + { + "epoch": 1.67, + "learning_rate": 2.9421312279878925e-05, + "loss": 1.5473, + "step": 389100 + }, + { + "epoch": 1.67, + "learning_rate": 2.941588389332569e-05, + "loss": 1.5378, + "step": 389200 + }, + { + "epoch": 1.67, + "learning_rate": 2.9410455506772456e-05, + "loss": 1.5358, + "step": 389300 + }, + { + "epoch": 1.67, + "learning_rate": 2.940502712021922e-05, + "loss": 1.5273, + "step": 389400 + }, + { + "epoch": 1.67, + "learning_rate": 2.9399598733665983e-05, + "loss": 1.5294, + "step": 389500 + }, + { + "epoch": 1.67, + "learning_rate": 2.9394170347112755e-05, + "loss": 1.5519, + "step": 389600 + }, + { + "epoch": 1.67, + "learning_rate": 2.9388741960559517e-05, + "loss": 1.5548, + "step": 389700 + }, + { + "epoch": 1.67, + "learning_rate": 2.9383313574006282e-05, + "loss": 1.5096, + "step": 389800 + }, + { + "epoch": 1.68, + "learning_rate": 2.9377885187453048e-05, + "loss": 1.5136, + "step": 389900 + }, + { + "epoch": 1.68, + "learning_rate": 2.9372456800899813e-05, + "loss": 1.5348, + "step": 390000 + }, + { + "epoch": 1.68, + "eval_loss": 1.4426006078720093, + "eval_runtime": 36.5743, + "eval_samples_per_second": 273.416, + "eval_steps_per_second": 8.558, + "step": 390000 + }, + { + "epoch": 1.68, + "learning_rate": 2.9367028414346575e-05, + "loss": 1.5546, + "step": 390100 + }, + { + "epoch": 1.68, + "learning_rate": 2.936160002779334e-05, + "loss": 1.5413, + "step": 390200 + }, + { + "epoch": 1.68, + "learning_rate": 2.9356171641240105e-05, + "loss": 1.5366, + "step": 390300 + }, + { + "epoch": 1.68, + "learning_rate": 2.935074325468687e-05, + "loss": 1.5408, + "step": 390400 + }, + { + "epoch": 1.68, + "learning_rate": 2.9345314868133632e-05, + "loss": 1.53, + "step": 390500 + }, + { + "epoch": 1.68, + "learning_rate": 2.9339886481580398e-05, + "loss": 1.4864, + "step": 390600 + }, + { + "epoch": 1.68, + "learning_rate": 2.9334458095027166e-05, + "loss": 1.5162, + "step": 390700 + }, + { + "epoch": 1.68, + "learning_rate": 2.932902970847393e-05, + "loss": 1.5158, + "step": 390800 + }, + { + "epoch": 1.68, + "learning_rate": 2.9323601321920697e-05, + "loss": 1.5527, + "step": 390900 + }, + { + "epoch": 1.68, + "learning_rate": 2.9318172935367462e-05, + "loss": 1.5186, + "step": 391000 + }, + { + "epoch": 1.68, + "learning_rate": 2.9312744548814224e-05, + "loss": 1.5328, + "step": 391100 + }, + { + "epoch": 1.68, + "learning_rate": 2.930731616226099e-05, + "loss": 1.5606, + "step": 391200 + }, + { + "epoch": 1.68, + "learning_rate": 2.9301887775707754e-05, + "loss": 1.5256, + "step": 391300 + }, + { + "epoch": 1.68, + "learning_rate": 2.929645938915452e-05, + "loss": 1.5284, + "step": 391400 + }, + { + "epoch": 1.68, + "learning_rate": 2.929103100260128e-05, + "loss": 1.5403, + "step": 391500 + }, + { + "epoch": 1.68, + "learning_rate": 2.9285602616048047e-05, + "loss": 1.5277, + "step": 391600 + }, + { + "epoch": 1.68, + "learning_rate": 2.9280174229494816e-05, + "loss": 1.5158, + "step": 391700 + }, + { + "epoch": 1.68, + "learning_rate": 2.927474584294158e-05, + "loss": 1.5156, + "step": 391800 + }, + { + "epoch": 1.68, + "learning_rate": 2.9269317456388346e-05, + "loss": 1.519, + "step": 391900 + }, + { + "epoch": 1.68, + "learning_rate": 2.926388906983511e-05, + "loss": 1.5105, + "step": 392000 + }, + { + "epoch": 1.68, + "learning_rate": 2.9258460683281873e-05, + "loss": 1.4996, + "step": 392100 + }, + { + "epoch": 1.68, + "learning_rate": 2.925303229672864e-05, + "loss": 1.538, + "step": 392200 + }, + { + "epoch": 1.69, + "learning_rate": 2.9247603910175404e-05, + "loss": 1.5354, + "step": 392300 + }, + { + "epoch": 1.69, + "learning_rate": 2.9242175523622166e-05, + "loss": 1.5445, + "step": 392400 + }, + { + "epoch": 1.69, + "learning_rate": 2.923674713706893e-05, + "loss": 1.5375, + "step": 392500 + }, + { + "epoch": 1.69, + "learning_rate": 2.9231318750515696e-05, + "loss": 1.5331, + "step": 392600 + }, + { + "epoch": 1.69, + "learning_rate": 2.922589036396246e-05, + "loss": 1.5468, + "step": 392700 + }, + { + "epoch": 1.69, + "learning_rate": 2.922046197740923e-05, + "loss": 1.5413, + "step": 392800 + }, + { + "epoch": 1.69, + "learning_rate": 2.9215033590855995e-05, + "loss": 1.5525, + "step": 392900 + }, + { + "epoch": 1.69, + "learning_rate": 2.920960520430276e-05, + "loss": 1.5149, + "step": 393000 + }, + { + "epoch": 1.69, + "learning_rate": 2.9204176817749522e-05, + "loss": 1.5498, + "step": 393100 + }, + { + "epoch": 1.69, + "learning_rate": 2.9198748431196288e-05, + "loss": 1.5412, + "step": 393200 + }, + { + "epoch": 1.69, + "learning_rate": 2.9193320044643053e-05, + "loss": 1.5368, + "step": 393300 + }, + { + "epoch": 1.69, + "learning_rate": 2.9187891658089815e-05, + "loss": 1.542, + "step": 393400 + }, + { + "epoch": 1.69, + "learning_rate": 2.918246327153658e-05, + "loss": 1.5243, + "step": 393500 + }, + { + "epoch": 1.69, + "learning_rate": 2.9177034884983345e-05, + "loss": 1.5567, + "step": 393600 + }, + { + "epoch": 1.69, + "learning_rate": 2.917160649843011e-05, + "loss": 1.5155, + "step": 393700 + }, + { + "epoch": 1.69, + "learning_rate": 2.916617811187688e-05, + "loss": 1.5208, + "step": 393800 + }, + { + "epoch": 1.69, + "learning_rate": 2.9160749725323645e-05, + "loss": 1.5428, + "step": 393900 + }, + { + "epoch": 1.69, + "learning_rate": 2.915532133877041e-05, + "loss": 1.5396, + "step": 394000 + }, + { + "epoch": 1.69, + "learning_rate": 2.914989295221717e-05, + "loss": 1.5331, + "step": 394100 + }, + { + "epoch": 1.69, + "learning_rate": 2.9144464565663937e-05, + "loss": 1.5322, + "step": 394200 + }, + { + "epoch": 1.69, + "learning_rate": 2.9139036179110702e-05, + "loss": 1.57, + "step": 394300 + }, + { + "epoch": 1.69, + "learning_rate": 2.9133607792557464e-05, + "loss": 1.5523, + "step": 394400 + }, + { + "epoch": 1.69, + "learning_rate": 2.912817940600423e-05, + "loss": 1.515, + "step": 394500 + }, + { + "epoch": 1.7, + "learning_rate": 2.9122751019450995e-05, + "loss": 1.5288, + "step": 394600 + }, + { + "epoch": 1.7, + "learning_rate": 2.911732263289776e-05, + "loss": 1.5688, + "step": 394700 + }, + { + "epoch": 1.7, + "learning_rate": 2.9111894246344522e-05, + "loss": 1.5503, + "step": 394800 + }, + { + "epoch": 1.7, + "learning_rate": 2.9106465859791294e-05, + "loss": 1.5064, + "step": 394900 + }, + { + "epoch": 1.7, + "learning_rate": 2.910103747323806e-05, + "loss": 1.525, + "step": 395000 + }, + { + "epoch": 1.7, + "eval_loss": 1.441019892692566, + "eval_runtime": 17.8041, + "eval_samples_per_second": 561.668, + "eval_steps_per_second": 17.58, + "step": 395000 + }, + { + "epoch": 1.7, + "learning_rate": 2.909560908668482e-05, + "loss": 1.5334, + "step": 395100 + }, + { + "epoch": 1.7, + "learning_rate": 2.9090180700131586e-05, + "loss": 1.5313, + "step": 395200 + }, + { + "epoch": 1.7, + "learning_rate": 2.908475231357835e-05, + "loss": 1.5513, + "step": 395300 + }, + { + "epoch": 1.7, + "learning_rate": 2.9079323927025113e-05, + "loss": 1.5522, + "step": 395400 + }, + { + "epoch": 1.7, + "learning_rate": 2.907389554047188e-05, + "loss": 1.5336, + "step": 395500 + }, + { + "epoch": 1.7, + "learning_rate": 2.9068467153918644e-05, + "loss": 1.5244, + "step": 395600 + }, + { + "epoch": 1.7, + "learning_rate": 2.906303876736541e-05, + "loss": 1.5485, + "step": 395700 + }, + { + "epoch": 1.7, + "learning_rate": 2.905761038081217e-05, + "loss": 1.562, + "step": 395800 + }, + { + "epoch": 1.7, + "learning_rate": 2.9052181994258943e-05, + "loss": 1.5201, + "step": 395900 + }, + { + "epoch": 1.7, + "learning_rate": 2.9046753607705708e-05, + "loss": 1.5629, + "step": 396000 + }, + { + "epoch": 1.7, + "learning_rate": 2.904132522115247e-05, + "loss": 1.5387, + "step": 396100 + }, + { + "epoch": 1.7, + "learning_rate": 2.9035896834599235e-05, + "loss": 1.5281, + "step": 396200 + }, + { + "epoch": 1.7, + "learning_rate": 2.9030468448046e-05, + "loss": 1.5412, + "step": 396300 + }, + { + "epoch": 1.7, + "learning_rate": 2.9025040061492763e-05, + "loss": 1.5418, + "step": 396400 + }, + { + "epoch": 1.7, + "learning_rate": 2.9019611674939528e-05, + "loss": 1.5426, + "step": 396500 + }, + { + "epoch": 1.7, + "learning_rate": 2.9014183288386293e-05, + "loss": 1.5182, + "step": 396600 + }, + { + "epoch": 1.7, + "learning_rate": 2.900875490183306e-05, + "loss": 1.5376, + "step": 396700 + }, + { + "epoch": 1.7, + "learning_rate": 2.900332651527982e-05, + "loss": 1.5392, + "step": 396800 + }, + { + "epoch": 1.71, + "learning_rate": 2.8997898128726585e-05, + "loss": 1.5274, + "step": 396900 + }, + { + "epoch": 1.71, + "learning_rate": 2.8992469742173358e-05, + "loss": 1.5162, + "step": 397000 + }, + { + "epoch": 1.71, + "learning_rate": 2.898704135562012e-05, + "loss": 1.5394, + "step": 397100 + }, + { + "epoch": 1.71, + "learning_rate": 2.8981612969066885e-05, + "loss": 1.5618, + "step": 397200 + }, + { + "epoch": 1.71, + "learning_rate": 2.897618458251365e-05, + "loss": 1.5106, + "step": 397300 + }, + { + "epoch": 1.71, + "learning_rate": 2.8970756195960412e-05, + "loss": 1.5487, + "step": 397400 + }, + { + "epoch": 1.71, + "learning_rate": 2.8965327809407177e-05, + "loss": 1.5158, + "step": 397500 + }, + { + "epoch": 1.71, + "learning_rate": 2.8959899422853942e-05, + "loss": 1.5026, + "step": 397600 + }, + { + "epoch": 1.71, + "learning_rate": 2.8954471036300708e-05, + "loss": 1.5133, + "step": 397700 + }, + { + "epoch": 1.71, + "learning_rate": 2.894904264974747e-05, + "loss": 1.5608, + "step": 397800 + }, + { + "epoch": 1.71, + "learning_rate": 2.8943614263194235e-05, + "loss": 1.5655, + "step": 397900 + }, + { + "epoch": 1.71, + "learning_rate": 2.8938185876641007e-05, + "loss": 1.5126, + "step": 398000 + }, + { + "epoch": 1.71, + "learning_rate": 2.893275749008777e-05, + "loss": 1.556, + "step": 398100 + }, + { + "epoch": 1.71, + "learning_rate": 2.8927329103534534e-05, + "loss": 1.5335, + "step": 398200 + }, + { + "epoch": 1.71, + "learning_rate": 2.89219007169813e-05, + "loss": 1.5421, + "step": 398300 + }, + { + "epoch": 1.71, + "learning_rate": 2.891647233042806e-05, + "loss": 1.5166, + "step": 398400 + }, + { + "epoch": 1.71, + "learning_rate": 2.8911043943874826e-05, + "loss": 1.5319, + "step": 398500 + }, + { + "epoch": 1.71, + "learning_rate": 2.890561555732159e-05, + "loss": 1.5482, + "step": 398600 + }, + { + "epoch": 1.71, + "learning_rate": 2.8900187170768357e-05, + "loss": 1.5344, + "step": 398700 + }, + { + "epoch": 1.71, + "learning_rate": 2.889475878421512e-05, + "loss": 1.5221, + "step": 398800 + }, + { + "epoch": 1.71, + "learning_rate": 2.8889330397661884e-05, + "loss": 1.5241, + "step": 398900 + }, + { + "epoch": 1.71, + "learning_rate": 2.888390201110865e-05, + "loss": 1.5421, + "step": 399000 + }, + { + "epoch": 1.71, + "learning_rate": 2.8878473624555418e-05, + "loss": 1.5389, + "step": 399100 + }, + { + "epoch": 1.71, + "learning_rate": 2.8873045238002183e-05, + "loss": 1.5364, + "step": 399200 + }, + { + "epoch": 1.72, + "learning_rate": 2.886761685144895e-05, + "loss": 1.5273, + "step": 399300 + }, + { + "epoch": 1.72, + "learning_rate": 2.886218846489571e-05, + "loss": 1.5398, + "step": 399400 + }, + { + "epoch": 1.72, + "learning_rate": 2.8856760078342476e-05, + "loss": 1.5343, + "step": 399500 + }, + { + "epoch": 1.72, + "learning_rate": 2.885133169178924e-05, + "loss": 1.5356, + "step": 399600 + }, + { + "epoch": 1.72, + "learning_rate": 2.8845903305236006e-05, + "loss": 1.5525, + "step": 399700 + }, + { + "epoch": 1.72, + "learning_rate": 2.8840474918682768e-05, + "loss": 1.539, + "step": 399800 + }, + { + "epoch": 1.72, + "learning_rate": 2.8835046532129533e-05, + "loss": 1.5196, + "step": 399900 + }, + { + "epoch": 1.72, + "learning_rate": 2.88296181455763e-05, + "loss": 1.5199, + "step": 400000 + }, + { + "epoch": 1.72, + "eval_loss": 1.4402216672897339, + "eval_runtime": 17.7998, + "eval_samples_per_second": 561.804, + "eval_steps_per_second": 17.584, + "step": 400000 + }, + { + "epoch": 1.72, + "learning_rate": 2.8824189759023067e-05, + "loss": 1.5239, + "step": 400100 + }, + { + "epoch": 1.72, + "learning_rate": 2.8818761372469832e-05, + "loss": 1.553, + "step": 400200 + }, + { + "epoch": 1.72, + "learning_rate": 2.8813332985916598e-05, + "loss": 1.4987, + "step": 400300 + }, + { + "epoch": 1.72, + "learning_rate": 2.880790459936336e-05, + "loss": 1.5189, + "step": 400400 + }, + { + "epoch": 1.72, + "learning_rate": 2.8802476212810125e-05, + "loss": 1.5222, + "step": 400500 + }, + { + "epoch": 1.72, + "learning_rate": 2.879704782625689e-05, + "loss": 1.5357, + "step": 400600 + }, + { + "epoch": 1.72, + "learning_rate": 2.8791619439703655e-05, + "loss": 1.5154, + "step": 400700 + }, + { + "epoch": 1.72, + "learning_rate": 2.8786191053150417e-05, + "loss": 1.5355, + "step": 400800 + }, + { + "epoch": 1.72, + "learning_rate": 2.8780762666597182e-05, + "loss": 1.5319, + "step": 400900 + }, + { + "epoch": 1.72, + "learning_rate": 2.8775334280043948e-05, + "loss": 1.5456, + "step": 401000 + }, + { + "epoch": 1.72, + "learning_rate": 2.8769905893490713e-05, + "loss": 1.5551, + "step": 401100 + }, + { + "epoch": 1.72, + "learning_rate": 2.876447750693748e-05, + "loss": 1.5271, + "step": 401200 + }, + { + "epoch": 1.72, + "learning_rate": 2.8759049120384247e-05, + "loss": 1.5595, + "step": 401300 + }, + { + "epoch": 1.72, + "learning_rate": 2.875362073383101e-05, + "loss": 1.5349, + "step": 401400 + }, + { + "epoch": 1.72, + "learning_rate": 2.8748192347277774e-05, + "loss": 1.5408, + "step": 401500 + }, + { + "epoch": 1.73, + "learning_rate": 2.874276396072454e-05, + "loss": 1.5176, + "step": 401600 + }, + { + "epoch": 1.73, + "learning_rate": 2.8737335574171305e-05, + "loss": 1.4892, + "step": 401700 + }, + { + "epoch": 1.73, + "learning_rate": 2.8731907187618066e-05, + "loss": 1.5339, + "step": 401800 + }, + { + "epoch": 1.73, + "learning_rate": 2.8726478801064832e-05, + "loss": 1.5057, + "step": 401900 + }, + { + "epoch": 1.73, + "learning_rate": 2.8721050414511597e-05, + "loss": 1.5326, + "step": 402000 + }, + { + "epoch": 1.73, + "learning_rate": 2.8715622027958362e-05, + "loss": 1.5439, + "step": 402100 + }, + { + "epoch": 1.73, + "learning_rate": 2.871019364140513e-05, + "loss": 1.5396, + "step": 402200 + }, + { + "epoch": 1.73, + "learning_rate": 2.8704765254851896e-05, + "loss": 1.5363, + "step": 402300 + }, + { + "epoch": 1.73, + "learning_rate": 2.8699336868298658e-05, + "loss": 1.5016, + "step": 402400 + }, + { + "epoch": 1.73, + "learning_rate": 2.8693908481745423e-05, + "loss": 1.522, + "step": 402500 + }, + { + "epoch": 1.73, + "learning_rate": 2.868848009519219e-05, + "loss": 1.5246, + "step": 402600 + }, + { + "epoch": 1.73, + "learning_rate": 2.8683051708638954e-05, + "loss": 1.5332, + "step": 402700 + }, + { + "epoch": 1.73, + "learning_rate": 2.8677623322085716e-05, + "loss": 1.5089, + "step": 402800 + }, + { + "epoch": 1.73, + "learning_rate": 2.867219493553248e-05, + "loss": 1.5207, + "step": 402900 + }, + { + "epoch": 1.73, + "learning_rate": 2.8666766548979246e-05, + "loss": 1.5402, + "step": 403000 + }, + { + "epoch": 1.73, + "learning_rate": 2.866133816242601e-05, + "loss": 1.5388, + "step": 403100 + }, + { + "epoch": 1.73, + "learning_rate": 2.8655909775872773e-05, + "loss": 1.5333, + "step": 403200 + }, + { + "epoch": 1.73, + "learning_rate": 2.8650481389319545e-05, + "loss": 1.5238, + "step": 403300 + }, + { + "epoch": 1.73, + "learning_rate": 2.8645053002766307e-05, + "loss": 1.4922, + "step": 403400 + }, + { + "epoch": 1.73, + "learning_rate": 2.8639624616213073e-05, + "loss": 1.5461, + "step": 403500 + }, + { + "epoch": 1.73, + "learning_rate": 2.8634196229659838e-05, + "loss": 1.5317, + "step": 403600 + }, + { + "epoch": 1.73, + "learning_rate": 2.8628767843106603e-05, + "loss": 1.543, + "step": 403700 + }, + { + "epoch": 1.73, + "learning_rate": 2.8623339456553365e-05, + "loss": 1.5036, + "step": 403800 + }, + { + "epoch": 1.74, + "learning_rate": 2.861791107000013e-05, + "loss": 1.5589, + "step": 403900 + }, + { + "epoch": 1.74, + "learning_rate": 2.8612482683446895e-05, + "loss": 1.5317, + "step": 404000 + }, + { + "epoch": 1.74, + "learning_rate": 2.860705429689366e-05, + "loss": 1.5291, + "step": 404100 + }, + { + "epoch": 1.74, + "learning_rate": 2.8601625910340423e-05, + "loss": 1.5626, + "step": 404200 + }, + { + "epoch": 1.74, + "learning_rate": 2.8596197523787195e-05, + "loss": 1.5296, + "step": 404300 + }, + { + "epoch": 1.74, + "learning_rate": 2.8590769137233957e-05, + "loss": 1.5301, + "step": 404400 + }, + { + "epoch": 1.74, + "learning_rate": 2.8585340750680722e-05, + "loss": 1.528, + "step": 404500 + }, + { + "epoch": 1.74, + "learning_rate": 2.8579912364127487e-05, + "loss": 1.5344, + "step": 404600 + }, + { + "epoch": 1.74, + "learning_rate": 2.8574483977574252e-05, + "loss": 1.5603, + "step": 404700 + }, + { + "epoch": 1.74, + "learning_rate": 2.8569055591021014e-05, + "loss": 1.5, + "step": 404800 + }, + { + "epoch": 1.74, + "learning_rate": 2.856362720446778e-05, + "loss": 1.519, + "step": 404900 + }, + { + "epoch": 1.74, + "learning_rate": 2.8558198817914545e-05, + "loss": 1.5268, + "step": 405000 + }, + { + "epoch": 1.74, + "eval_loss": 1.4383865594863892, + "eval_runtime": 17.8138, + "eval_samples_per_second": 561.361, + "eval_steps_per_second": 17.571, + "step": 405000 + }, + { + "epoch": 1.74, + "learning_rate": 2.855277043136131e-05, + "loss": 1.5201, + "step": 405100 + }, + { + "epoch": 1.74, + "learning_rate": 2.8547342044808072e-05, + "loss": 1.531, + "step": 405200 + }, + { + "epoch": 1.74, + "learning_rate": 2.8541913658254844e-05, + "loss": 1.5284, + "step": 405300 + }, + { + "epoch": 1.74, + "learning_rate": 2.8536485271701606e-05, + "loss": 1.5239, + "step": 405400 + }, + { + "epoch": 1.74, + "learning_rate": 2.853105688514837e-05, + "loss": 1.5457, + "step": 405500 + }, + { + "epoch": 1.74, + "learning_rate": 2.8525628498595136e-05, + "loss": 1.517, + "step": 405600 + }, + { + "epoch": 1.74, + "learning_rate": 2.85202001120419e-05, + "loss": 1.5408, + "step": 405700 + }, + { + "epoch": 1.74, + "learning_rate": 2.8514771725488663e-05, + "loss": 1.5529, + "step": 405800 + }, + { + "epoch": 1.74, + "learning_rate": 2.850934333893543e-05, + "loss": 1.5393, + "step": 405900 + }, + { + "epoch": 1.74, + "learning_rate": 2.8503914952382194e-05, + "loss": 1.5388, + "step": 406000 + }, + { + "epoch": 1.74, + "learning_rate": 2.849848656582896e-05, + "loss": 1.5186, + "step": 406100 + }, + { + "epoch": 1.75, + "learning_rate": 2.849305817927572e-05, + "loss": 1.5518, + "step": 406200 + }, + { + "epoch": 1.75, + "learning_rate": 2.8487629792722486e-05, + "loss": 1.543, + "step": 406300 + }, + { + "epoch": 1.75, + "learning_rate": 2.8482201406169255e-05, + "loss": 1.5168, + "step": 406400 + }, + { + "epoch": 1.75, + "learning_rate": 2.847677301961602e-05, + "loss": 1.5481, + "step": 406500 + }, + { + "epoch": 1.75, + "learning_rate": 2.8471344633062786e-05, + "loss": 1.5403, + "step": 406600 + }, + { + "epoch": 1.75, + "learning_rate": 2.846591624650955e-05, + "loss": 1.517, + "step": 406700 + }, + { + "epoch": 1.75, + "learning_rate": 2.8460487859956313e-05, + "loss": 1.5456, + "step": 406800 + }, + { + "epoch": 1.75, + "learning_rate": 2.8455059473403078e-05, + "loss": 1.5158, + "step": 406900 + }, + { + "epoch": 1.75, + "learning_rate": 2.8449631086849843e-05, + "loss": 1.5657, + "step": 407000 + }, + { + "epoch": 1.75, + "learning_rate": 2.844420270029661e-05, + "loss": 1.537, + "step": 407100 + }, + { + "epoch": 1.75, + "learning_rate": 2.843877431374337e-05, + "loss": 1.5425, + "step": 407200 + }, + { + "epoch": 1.75, + "learning_rate": 2.8433345927190136e-05, + "loss": 1.4908, + "step": 407300 + }, + { + "epoch": 1.75, + "learning_rate": 2.8427917540636904e-05, + "loss": 1.5385, + "step": 407400 + }, + { + "epoch": 1.75, + "learning_rate": 2.842248915408367e-05, + "loss": 1.5428, + "step": 407500 + }, + { + "epoch": 1.75, + "learning_rate": 2.8417060767530435e-05, + "loss": 1.5489, + "step": 407600 + }, + { + "epoch": 1.75, + "learning_rate": 2.84116323809772e-05, + "loss": 1.5266, + "step": 407700 + }, + { + "epoch": 1.75, + "learning_rate": 2.8406203994423962e-05, + "loss": 1.524, + "step": 407800 + }, + { + "epoch": 1.75, + "learning_rate": 2.8400775607870727e-05, + "loss": 1.5025, + "step": 407900 + }, + { + "epoch": 1.75, + "learning_rate": 2.8395347221317492e-05, + "loss": 1.5191, + "step": 408000 + }, + { + "epoch": 1.75, + "learning_rate": 2.8389918834764258e-05, + "loss": 1.5252, + "step": 408100 + }, + { + "epoch": 1.75, + "learning_rate": 2.838449044821102e-05, + "loss": 1.5154, + "step": 408200 + }, + { + "epoch": 1.75, + "learning_rate": 2.8379062061657785e-05, + "loss": 1.5204, + "step": 408300 + }, + { + "epoch": 1.75, + "learning_rate": 2.837363367510455e-05, + "loss": 1.5414, + "step": 408400 + }, + { + "epoch": 1.75, + "learning_rate": 2.836820528855132e-05, + "loss": 1.5467, + "step": 408500 + }, + { + "epoch": 1.76, + "learning_rate": 2.8362776901998084e-05, + "loss": 1.5474, + "step": 408600 + }, + { + "epoch": 1.76, + "learning_rate": 2.835734851544485e-05, + "loss": 1.5368, + "step": 408700 + }, + { + "epoch": 1.76, + "learning_rate": 2.835192012889161e-05, + "loss": 1.5173, + "step": 408800 + }, + { + "epoch": 1.76, + "learning_rate": 2.8346491742338376e-05, + "loss": 1.5255, + "step": 408900 + }, + { + "epoch": 1.76, + "learning_rate": 2.834106335578514e-05, + "loss": 1.5475, + "step": 409000 + }, + { + "epoch": 1.76, + "learning_rate": 2.8335634969231907e-05, + "loss": 1.5244, + "step": 409100 + }, + { + "epoch": 1.76, + "learning_rate": 2.833020658267867e-05, + "loss": 1.5163, + "step": 409200 + }, + { + "epoch": 1.76, + "learning_rate": 2.8324778196125434e-05, + "loss": 1.532, + "step": 409300 + }, + { + "epoch": 1.76, + "learning_rate": 2.83193498095722e-05, + "loss": 1.5529, + "step": 409400 + }, + { + "epoch": 1.76, + "learning_rate": 2.8313921423018968e-05, + "loss": 1.5279, + "step": 409500 + }, + { + "epoch": 1.76, + "learning_rate": 2.8308493036465733e-05, + "loss": 1.5326, + "step": 409600 + }, + { + "epoch": 1.76, + "learning_rate": 2.83030646499125e-05, + "loss": 1.4819, + "step": 409700 + }, + { + "epoch": 1.76, + "learning_rate": 2.829763626335926e-05, + "loss": 1.5172, + "step": 409800 + }, + { + "epoch": 1.76, + "learning_rate": 2.8292207876806026e-05, + "loss": 1.5296, + "step": 409900 + }, + { + "epoch": 1.76, + "learning_rate": 2.828677949025279e-05, + "loss": 1.5574, + "step": 410000 + }, + { + "epoch": 1.76, + "eval_loss": 1.4406307935714722, + "eval_runtime": 17.8024, + "eval_samples_per_second": 561.722, + "eval_steps_per_second": 17.582, + "step": 410000 + }, + { + "epoch": 1.76, + "learning_rate": 2.8281351103699556e-05, + "loss": 1.5008, + "step": 410100 + }, + { + "epoch": 1.76, + "learning_rate": 2.8275922717146318e-05, + "loss": 1.5263, + "step": 410200 + }, + { + "epoch": 1.76, + "learning_rate": 2.8270494330593083e-05, + "loss": 1.5134, + "step": 410300 + }, + { + "epoch": 1.76, + "learning_rate": 2.826506594403985e-05, + "loss": 1.532, + "step": 410400 + }, + { + "epoch": 1.76, + "learning_rate": 2.8259637557486614e-05, + "loss": 1.5147, + "step": 410500 + }, + { + "epoch": 1.76, + "learning_rate": 2.8254209170933383e-05, + "loss": 1.4813, + "step": 410600 + }, + { + "epoch": 1.76, + "learning_rate": 2.8248780784380148e-05, + "loss": 1.5178, + "step": 410700 + }, + { + "epoch": 1.76, + "learning_rate": 2.824335239782691e-05, + "loss": 1.5388, + "step": 410800 + }, + { + "epoch": 1.77, + "learning_rate": 2.8237924011273675e-05, + "loss": 1.5144, + "step": 410900 + }, + { + "epoch": 1.77, + "learning_rate": 2.823249562472044e-05, + "loss": 1.4999, + "step": 411000 + }, + { + "epoch": 1.77, + "learning_rate": 2.8227067238167205e-05, + "loss": 1.5258, + "step": 411100 + }, + { + "epoch": 1.77, + "learning_rate": 2.8221638851613967e-05, + "loss": 1.5123, + "step": 411200 + }, + { + "epoch": 1.77, + "learning_rate": 2.8216210465060733e-05, + "loss": 1.5162, + "step": 411300 + }, + { + "epoch": 1.77, + "learning_rate": 2.8210782078507498e-05, + "loss": 1.5132, + "step": 411400 + }, + { + "epoch": 1.77, + "learning_rate": 2.8205353691954263e-05, + "loss": 1.5276, + "step": 411500 + }, + { + "epoch": 1.77, + "learning_rate": 2.8199925305401032e-05, + "loss": 1.4997, + "step": 411600 + }, + { + "epoch": 1.77, + "learning_rate": 2.8194496918847797e-05, + "loss": 1.5132, + "step": 411700 + }, + { + "epoch": 1.77, + "learning_rate": 2.818906853229456e-05, + "loss": 1.5267, + "step": 411800 + }, + { + "epoch": 1.77, + "learning_rate": 2.8183640145741324e-05, + "loss": 1.5039, + "step": 411900 + }, + { + "epoch": 1.77, + "learning_rate": 2.817821175918809e-05, + "loss": 1.5142, + "step": 412000 + }, + { + "epoch": 1.77, + "learning_rate": 2.8172783372634855e-05, + "loss": 1.526, + "step": 412100 + }, + { + "epoch": 1.77, + "learning_rate": 2.8167354986081617e-05, + "loss": 1.5519, + "step": 412200 + }, + { + "epoch": 1.77, + "learning_rate": 2.8161926599528382e-05, + "loss": 1.5155, + "step": 412300 + }, + { + "epoch": 1.77, + "learning_rate": 2.8156498212975147e-05, + "loss": 1.5072, + "step": 412400 + }, + { + "epoch": 1.77, + "learning_rate": 2.8151069826421912e-05, + "loss": 1.5312, + "step": 412500 + }, + { + "epoch": 1.77, + "learning_rate": 2.8145641439868674e-05, + "loss": 1.5432, + "step": 412600 + }, + { + "epoch": 1.77, + "learning_rate": 2.8140213053315446e-05, + "loss": 1.5491, + "step": 412700 + }, + { + "epoch": 1.77, + "learning_rate": 2.8134784666762208e-05, + "loss": 1.5052, + "step": 412800 + }, + { + "epoch": 1.77, + "learning_rate": 2.8129356280208973e-05, + "loss": 1.5212, + "step": 412900 + }, + { + "epoch": 1.77, + "learning_rate": 2.812392789365574e-05, + "loss": 1.5209, + "step": 413000 + }, + { + "epoch": 1.77, + "learning_rate": 2.8118499507102504e-05, + "loss": 1.5077, + "step": 413100 + }, + { + "epoch": 1.78, + "learning_rate": 2.8113071120549266e-05, + "loss": 1.4798, + "step": 413200 + }, + { + "epoch": 1.78, + "learning_rate": 2.810764273399603e-05, + "loss": 1.5385, + "step": 413300 + }, + { + "epoch": 1.78, + "learning_rate": 2.8102214347442796e-05, + "loss": 1.5457, + "step": 413400 + }, + { + "epoch": 1.78, + "learning_rate": 2.809678596088956e-05, + "loss": 1.523, + "step": 413500 + }, + { + "epoch": 1.78, + "learning_rate": 2.8091357574336323e-05, + "loss": 1.5384, + "step": 413600 + }, + { + "epoch": 1.78, + "learning_rate": 2.8085929187783096e-05, + "loss": 1.5193, + "step": 413700 + }, + { + "epoch": 1.78, + "learning_rate": 2.8080500801229857e-05, + "loss": 1.5332, + "step": 413800 + }, + { + "epoch": 1.78, + "learning_rate": 2.8075072414676623e-05, + "loss": 1.5462, + "step": 413900 + }, + { + "epoch": 1.78, + "learning_rate": 2.8069644028123388e-05, + "loss": 1.5654, + "step": 414000 + }, + { + "epoch": 1.78, + "learning_rate": 2.8064215641570153e-05, + "loss": 1.5277, + "step": 414100 + }, + { + "epoch": 1.78, + "learning_rate": 2.8058787255016915e-05, + "loss": 1.485, + "step": 414200 + }, + { + "epoch": 1.78, + "learning_rate": 2.805335886846368e-05, + "loss": 1.4895, + "step": 414300 + }, + { + "epoch": 1.78, + "learning_rate": 2.8047930481910446e-05, + "loss": 1.5262, + "step": 414400 + }, + { + "epoch": 1.78, + "learning_rate": 2.804250209535721e-05, + "loss": 1.5392, + "step": 414500 + }, + { + "epoch": 1.78, + "learning_rate": 2.8037073708803973e-05, + "loss": 1.4971, + "step": 414600 + }, + { + "epoch": 1.78, + "learning_rate": 2.8031645322250738e-05, + "loss": 1.5281, + "step": 414700 + }, + { + "epoch": 1.78, + "learning_rate": 2.8026216935697507e-05, + "loss": 1.5242, + "step": 414800 + }, + { + "epoch": 1.78, + "learning_rate": 2.8020788549144272e-05, + "loss": 1.5357, + "step": 414900 + }, + { + "epoch": 1.78, + "learning_rate": 2.8015360162591037e-05, + "loss": 1.5335, + "step": 415000 + }, + { + "epoch": 1.78, + "eval_loss": 1.4382429122924805, + "eval_runtime": 17.7575, + "eval_samples_per_second": 563.143, + "eval_steps_per_second": 17.626, + "step": 415000 + }, + { + "epoch": 1.78, + "learning_rate": 2.8009931776037802e-05, + "loss": 1.5204, + "step": 415100 + }, + { + "epoch": 1.78, + "learning_rate": 2.8004503389484564e-05, + "loss": 1.5121, + "step": 415200 + }, + { + "epoch": 1.78, + "learning_rate": 2.799907500293133e-05, + "loss": 1.5203, + "step": 415300 + }, + { + "epoch": 1.78, + "learning_rate": 2.7993646616378095e-05, + "loss": 1.5089, + "step": 415400 + }, + { + "epoch": 1.79, + "learning_rate": 2.798821822982486e-05, + "loss": 1.5304, + "step": 415500 + }, + { + "epoch": 1.79, + "learning_rate": 2.7982789843271622e-05, + "loss": 1.524, + "step": 415600 + }, + { + "epoch": 1.79, + "learning_rate": 2.7977361456718387e-05, + "loss": 1.5185, + "step": 415700 + }, + { + "epoch": 1.79, + "learning_rate": 2.7971933070165156e-05, + "loss": 1.5323, + "step": 415800 + }, + { + "epoch": 1.79, + "learning_rate": 2.796650468361192e-05, + "loss": 1.5207, + "step": 415900 + }, + { + "epoch": 1.79, + "learning_rate": 2.7961076297058686e-05, + "loss": 1.5296, + "step": 416000 + }, + { + "epoch": 1.79, + "learning_rate": 2.795564791050545e-05, + "loss": 1.51, + "step": 416100 + }, + { + "epoch": 1.79, + "learning_rate": 2.7950219523952214e-05, + "loss": 1.5265, + "step": 416200 + }, + { + "epoch": 1.79, + "learning_rate": 2.794479113739898e-05, + "loss": 1.5355, + "step": 416300 + }, + { + "epoch": 1.79, + "learning_rate": 2.7939362750845744e-05, + "loss": 1.5426, + "step": 416400 + }, + { + "epoch": 1.79, + "learning_rate": 2.793393436429251e-05, + "loss": 1.5439, + "step": 416500 + }, + { + "epoch": 1.79, + "learning_rate": 2.792850597773927e-05, + "loss": 1.5535, + "step": 416600 + }, + { + "epoch": 1.79, + "learning_rate": 2.7923077591186036e-05, + "loss": 1.516, + "step": 416700 + }, + { + "epoch": 1.79, + "learning_rate": 2.7917649204632802e-05, + "loss": 1.5376, + "step": 416800 + }, + { + "epoch": 1.79, + "learning_rate": 2.791222081807957e-05, + "loss": 1.528, + "step": 416900 + }, + { + "epoch": 1.79, + "learning_rate": 2.7906792431526336e-05, + "loss": 1.5282, + "step": 417000 + }, + { + "epoch": 1.79, + "learning_rate": 2.79013640449731e-05, + "loss": 1.528, + "step": 417100 + }, + { + "epoch": 1.79, + "learning_rate": 2.7895935658419863e-05, + "loss": 1.5245, + "step": 417200 + }, + { + "epoch": 1.79, + "learning_rate": 2.7890507271866628e-05, + "loss": 1.5342, + "step": 417300 + }, + { + "epoch": 1.79, + "learning_rate": 2.7885078885313393e-05, + "loss": 1.5092, + "step": 417400 + }, + { + "epoch": 1.79, + "learning_rate": 2.787965049876016e-05, + "loss": 1.5356, + "step": 417500 + }, + { + "epoch": 1.79, + "learning_rate": 2.787422211220692e-05, + "loss": 1.5228, + "step": 417600 + }, + { + "epoch": 1.79, + "learning_rate": 2.7868793725653686e-05, + "loss": 1.5355, + "step": 417700 + }, + { + "epoch": 1.79, + "learning_rate": 2.786336533910045e-05, + "loss": 1.5256, + "step": 417800 + }, + { + "epoch": 1.8, + "learning_rate": 2.785793695254722e-05, + "loss": 1.4689, + "step": 417900 + }, + { + "epoch": 1.8, + "learning_rate": 2.7852508565993985e-05, + "loss": 1.5218, + "step": 418000 + }, + { + "epoch": 1.8, + "learning_rate": 2.784708017944075e-05, + "loss": 1.5373, + "step": 418100 + }, + { + "epoch": 1.8, + "learning_rate": 2.7841651792887512e-05, + "loss": 1.5249, + "step": 418200 + }, + { + "epoch": 1.8, + "learning_rate": 2.7836223406334277e-05, + "loss": 1.526, + "step": 418300 + }, + { + "epoch": 1.8, + "learning_rate": 2.7830795019781043e-05, + "loss": 1.5213, + "step": 418400 + }, + { + "epoch": 1.8, + "learning_rate": 2.7825366633227808e-05, + "loss": 1.5095, + "step": 418500 + }, + { + "epoch": 1.8, + "learning_rate": 2.781993824667457e-05, + "loss": 1.5196, + "step": 418600 + }, + { + "epoch": 1.8, + "learning_rate": 2.7814509860121335e-05, + "loss": 1.5147, + "step": 418700 + }, + { + "epoch": 1.8, + "learning_rate": 2.78090814735681e-05, + "loss": 1.5175, + "step": 418800 + }, + { + "epoch": 1.8, + "learning_rate": 2.7803653087014862e-05, + "loss": 1.535, + "step": 418900 + }, + { + "epoch": 1.8, + "learning_rate": 2.7798224700461634e-05, + "loss": 1.4684, + "step": 419000 + }, + { + "epoch": 1.8, + "learning_rate": 2.77927963139084e-05, + "loss": 1.5448, + "step": 419100 + }, + { + "epoch": 1.8, + "learning_rate": 2.778736792735516e-05, + "loss": 1.5211, + "step": 419200 + }, + { + "epoch": 1.8, + "learning_rate": 2.7781939540801927e-05, + "loss": 1.5303, + "step": 419300 + }, + { + "epoch": 1.8, + "learning_rate": 2.7776511154248692e-05, + "loss": 1.5182, + "step": 419400 + }, + { + "epoch": 1.8, + "learning_rate": 2.7771082767695457e-05, + "loss": 1.5291, + "step": 419500 + }, + { + "epoch": 1.8, + "learning_rate": 2.776565438114222e-05, + "loss": 1.5256, + "step": 419600 + }, + { + "epoch": 1.8, + "learning_rate": 2.7760225994588984e-05, + "loss": 1.5461, + "step": 419700 + }, + { + "epoch": 1.8, + "learning_rate": 2.775479760803575e-05, + "loss": 1.5385, + "step": 419800 + }, + { + "epoch": 1.8, + "learning_rate": 2.774936922148251e-05, + "loss": 1.547, + "step": 419900 + }, + { + "epoch": 1.8, + "learning_rate": 2.7743940834929283e-05, + "loss": 1.5163, + "step": 420000 + }, + { + "epoch": 1.8, + "eval_loss": 1.4372782707214355, + "eval_runtime": 17.8039, + "eval_samples_per_second": 561.676, + "eval_steps_per_second": 17.58, + "step": 420000 + }, + { + "epoch": 1.8, + "learning_rate": 2.773851244837605e-05, + "loss": 1.4974, + "step": 420100 + }, + { + "epoch": 1.81, + "learning_rate": 2.773308406182281e-05, + "loss": 1.5112, + "step": 420200 + }, + { + "epoch": 1.81, + "learning_rate": 2.7727655675269576e-05, + "loss": 1.5071, + "step": 420300 + }, + { + "epoch": 1.81, + "learning_rate": 2.772222728871634e-05, + "loss": 1.5225, + "step": 420400 + }, + { + "epoch": 1.81, + "learning_rate": 2.7716798902163106e-05, + "loss": 1.5471, + "step": 420500 + }, + { + "epoch": 1.81, + "learning_rate": 2.7711370515609868e-05, + "loss": 1.5217, + "step": 420600 + }, + { + "epoch": 1.81, + "learning_rate": 2.7705942129056633e-05, + "loss": 1.4961, + "step": 420700 + }, + { + "epoch": 1.81, + "learning_rate": 2.77005137425034e-05, + "loss": 1.5081, + "step": 420800 + }, + { + "epoch": 1.81, + "learning_rate": 2.769508535595016e-05, + "loss": 1.5024, + "step": 420900 + }, + { + "epoch": 1.81, + "learning_rate": 2.7689656969396926e-05, + "loss": 1.5408, + "step": 421000 + }, + { + "epoch": 1.81, + "learning_rate": 2.7684228582843698e-05, + "loss": 1.537, + "step": 421100 + }, + { + "epoch": 1.81, + "learning_rate": 2.767880019629046e-05, + "loss": 1.5233, + "step": 421200 + }, + { + "epoch": 1.81, + "learning_rate": 2.7673371809737225e-05, + "loss": 1.5183, + "step": 421300 + }, + { + "epoch": 1.81, + "learning_rate": 2.766794342318399e-05, + "loss": 1.5317, + "step": 421400 + }, + { + "epoch": 1.81, + "learning_rate": 2.7662515036630756e-05, + "loss": 1.5374, + "step": 421500 + }, + { + "epoch": 1.81, + "learning_rate": 2.7657086650077517e-05, + "loss": 1.525, + "step": 421600 + }, + { + "epoch": 1.81, + "learning_rate": 2.7651658263524283e-05, + "loss": 1.5106, + "step": 421700 + }, + { + "epoch": 1.81, + "learning_rate": 2.7646229876971048e-05, + "loss": 1.4967, + "step": 421800 + }, + { + "epoch": 1.81, + "learning_rate": 2.764080149041781e-05, + "loss": 1.5214, + "step": 421900 + }, + { + "epoch": 1.81, + "learning_rate": 2.7635373103864575e-05, + "loss": 1.522, + "step": 422000 + }, + { + "epoch": 1.81, + "learning_rate": 2.7629944717311347e-05, + "loss": 1.5259, + "step": 422100 + }, + { + "epoch": 1.81, + "learning_rate": 2.762451633075811e-05, + "loss": 1.4915, + "step": 422200 + }, + { + "epoch": 1.81, + "learning_rate": 2.7619087944204874e-05, + "loss": 1.531, + "step": 422300 + }, + { + "epoch": 1.81, + "learning_rate": 2.761365955765164e-05, + "loss": 1.5303, + "step": 422400 + }, + { + "epoch": 1.82, + "learning_rate": 2.7608231171098405e-05, + "loss": 1.505, + "step": 422500 + }, + { + "epoch": 1.82, + "learning_rate": 2.7602802784545167e-05, + "loss": 1.5445, + "step": 422600 + }, + { + "epoch": 1.82, + "learning_rate": 2.7597374397991932e-05, + "loss": 1.5211, + "step": 422700 + }, + { + "epoch": 1.82, + "learning_rate": 2.7591946011438697e-05, + "loss": 1.5107, + "step": 422800 + }, + { + "epoch": 1.82, + "learning_rate": 2.758651762488546e-05, + "loss": 1.4987, + "step": 422900 + }, + { + "epoch": 1.82, + "learning_rate": 2.7581089238332224e-05, + "loss": 1.5144, + "step": 423000 + }, + { + "epoch": 1.82, + "learning_rate": 2.757566085177899e-05, + "loss": 1.5307, + "step": 423100 + }, + { + "epoch": 1.82, + "learning_rate": 2.7570232465225758e-05, + "loss": 1.5336, + "step": 423200 + }, + { + "epoch": 1.82, + "learning_rate": 2.7564804078672524e-05, + "loss": 1.5058, + "step": 423300 + }, + { + "epoch": 1.82, + "learning_rate": 2.755937569211929e-05, + "loss": 1.5249, + "step": 423400 + }, + { + "epoch": 1.82, + "learning_rate": 2.7553947305566054e-05, + "loss": 1.5157, + "step": 423500 + }, + { + "epoch": 1.82, + "learning_rate": 2.7548518919012816e-05, + "loss": 1.5169, + "step": 423600 + }, + { + "epoch": 1.82, + "learning_rate": 2.754309053245958e-05, + "loss": 1.5482, + "step": 423700 + }, + { + "epoch": 1.82, + "learning_rate": 2.7537662145906346e-05, + "loss": 1.5145, + "step": 423800 + }, + { + "epoch": 1.82, + "learning_rate": 2.753223375935311e-05, + "loss": 1.5434, + "step": 423900 + }, + { + "epoch": 1.82, + "learning_rate": 2.7526805372799874e-05, + "loss": 1.505, + "step": 424000 + }, + { + "epoch": 1.82, + "learning_rate": 2.752137698624664e-05, + "loss": 1.5246, + "step": 424100 + }, + { + "epoch": 1.82, + "learning_rate": 2.7515948599693407e-05, + "loss": 1.532, + "step": 424200 + }, + { + "epoch": 1.82, + "learning_rate": 2.7510520213140173e-05, + "loss": 1.4976, + "step": 424300 + }, + { + "epoch": 1.82, + "learning_rate": 2.7505091826586938e-05, + "loss": 1.5232, + "step": 424400 + }, + { + "epoch": 1.82, + "learning_rate": 2.7499663440033703e-05, + "loss": 1.5196, + "step": 424500 + }, + { + "epoch": 1.82, + "learning_rate": 2.7494235053480465e-05, + "loss": 1.5297, + "step": 424600 + }, + { + "epoch": 1.82, + "learning_rate": 2.748880666692723e-05, + "loss": 1.5444, + "step": 424700 + }, + { + "epoch": 1.82, + "learning_rate": 2.7483378280373996e-05, + "loss": 1.5246, + "step": 424800 + }, + { + "epoch": 1.83, + "learning_rate": 2.7477949893820758e-05, + "loss": 1.5539, + "step": 424900 + }, + { + "epoch": 1.83, + "learning_rate": 2.7472521507267523e-05, + "loss": 1.5018, + "step": 425000 + }, + { + "epoch": 1.83, + "eval_loss": 1.439378261566162, + "eval_runtime": 17.7944, + "eval_samples_per_second": 561.975, + "eval_steps_per_second": 17.59, + "step": 425000 + }, + { + "epoch": 1.83, + "learning_rate": 2.7467093120714288e-05, + "loss": 1.5323, + "step": 425100 + }, + { + "epoch": 1.83, + "learning_rate": 2.7461664734161057e-05, + "loss": 1.557, + "step": 425200 + }, + { + "epoch": 1.83, + "learning_rate": 2.7456236347607822e-05, + "loss": 1.5208, + "step": 425300 + }, + { + "epoch": 1.83, + "learning_rate": 2.7450807961054587e-05, + "loss": 1.5268, + "step": 425400 + }, + { + "epoch": 1.83, + "learning_rate": 2.7445379574501353e-05, + "loss": 1.5093, + "step": 425500 + }, + { + "epoch": 1.83, + "learning_rate": 2.7439951187948114e-05, + "loss": 1.5231, + "step": 425600 + }, + { + "epoch": 1.83, + "learning_rate": 2.743452280139488e-05, + "loss": 1.5062, + "step": 425700 + }, + { + "epoch": 1.83, + "learning_rate": 2.7429094414841645e-05, + "loss": 1.5069, + "step": 425800 + }, + { + "epoch": 1.83, + "learning_rate": 2.7423666028288407e-05, + "loss": 1.5228, + "step": 425900 + }, + { + "epoch": 1.83, + "learning_rate": 2.7418237641735172e-05, + "loss": 1.5146, + "step": 426000 + }, + { + "epoch": 1.83, + "learning_rate": 2.7412809255181937e-05, + "loss": 1.5084, + "step": 426100 + }, + { + "epoch": 1.83, + "learning_rate": 2.7407380868628703e-05, + "loss": 1.5113, + "step": 426200 + }, + { + "epoch": 1.83, + "learning_rate": 2.740195248207547e-05, + "loss": 1.523, + "step": 426300 + }, + { + "epoch": 1.83, + "learning_rate": 2.7396524095522237e-05, + "loss": 1.5522, + "step": 426400 + }, + { + "epoch": 1.83, + "learning_rate": 2.7391095708969002e-05, + "loss": 1.5223, + "step": 426500 + }, + { + "epoch": 1.83, + "learning_rate": 2.7385667322415764e-05, + "loss": 1.5028, + "step": 426600 + }, + { + "epoch": 1.83, + "learning_rate": 2.738023893586253e-05, + "loss": 1.5345, + "step": 426700 + }, + { + "epoch": 1.83, + "learning_rate": 2.7374810549309294e-05, + "loss": 1.5229, + "step": 426800 + }, + { + "epoch": 1.83, + "learning_rate": 2.7369382162756056e-05, + "loss": 1.5139, + "step": 426900 + }, + { + "epoch": 1.83, + "learning_rate": 2.736395377620282e-05, + "loss": 1.5395, + "step": 427000 + }, + { + "epoch": 1.83, + "learning_rate": 2.7358525389649587e-05, + "loss": 1.542, + "step": 427100 + }, + { + "epoch": 1.84, + "learning_rate": 2.7353097003096352e-05, + "loss": 1.5018, + "step": 427200 + }, + { + "epoch": 1.84, + "learning_rate": 2.734766861654312e-05, + "loss": 1.5242, + "step": 427300 + }, + { + "epoch": 1.84, + "learning_rate": 2.7342240229989886e-05, + "loss": 1.5297, + "step": 427400 + }, + { + "epoch": 1.84, + "learning_rate": 2.733681184343665e-05, + "loss": 1.5024, + "step": 427500 + }, + { + "epoch": 1.84, + "learning_rate": 2.7331383456883413e-05, + "loss": 1.5456, + "step": 427600 + }, + { + "epoch": 1.84, + "learning_rate": 2.7325955070330178e-05, + "loss": 1.5168, + "step": 427700 + }, + { + "epoch": 1.84, + "learning_rate": 2.7320526683776943e-05, + "loss": 1.5301, + "step": 427800 + }, + { + "epoch": 1.84, + "learning_rate": 2.7315098297223705e-05, + "loss": 1.5365, + "step": 427900 + }, + { + "epoch": 1.84, + "learning_rate": 2.730966991067047e-05, + "loss": 1.5342, + "step": 428000 + }, + { + "epoch": 1.84, + "learning_rate": 2.7304241524117236e-05, + "loss": 1.543, + "step": 428100 + }, + { + "epoch": 1.84, + "learning_rate": 2.7298813137564e-05, + "loss": 1.5274, + "step": 428200 + }, + { + "epoch": 1.84, + "learning_rate": 2.7293384751010763e-05, + "loss": 1.5155, + "step": 428300 + }, + { + "epoch": 1.84, + "learning_rate": 2.7287956364457535e-05, + "loss": 1.5238, + "step": 428400 + }, + { + "epoch": 1.84, + "learning_rate": 2.72825279779043e-05, + "loss": 1.5082, + "step": 428500 + }, + { + "epoch": 1.84, + "learning_rate": 2.7277099591351062e-05, + "loss": 1.5241, + "step": 428600 + }, + { + "epoch": 1.84, + "learning_rate": 2.7271671204797827e-05, + "loss": 1.538, + "step": 428700 + }, + { + "epoch": 1.84, + "learning_rate": 2.7266242818244593e-05, + "loss": 1.5274, + "step": 428800 + }, + { + "epoch": 1.84, + "learning_rate": 2.7260814431691355e-05, + "loss": 1.5441, + "step": 428900 + }, + { + "epoch": 1.84, + "learning_rate": 2.725538604513812e-05, + "loss": 1.5111, + "step": 429000 + }, + { + "epoch": 1.84, + "learning_rate": 2.7249957658584885e-05, + "loss": 1.5125, + "step": 429100 + }, + { + "epoch": 1.84, + "learning_rate": 2.724452927203165e-05, + "loss": 1.5252, + "step": 429200 + }, + { + "epoch": 1.84, + "learning_rate": 2.7239100885478412e-05, + "loss": 1.5299, + "step": 429300 + }, + { + "epoch": 1.84, + "learning_rate": 2.7233672498925184e-05, + "loss": 1.512, + "step": 429400 + }, + { + "epoch": 1.85, + "learning_rate": 2.722824411237195e-05, + "loss": 1.5447, + "step": 429500 + }, + { + "epoch": 1.85, + "learning_rate": 2.722281572581871e-05, + "loss": 1.5315, + "step": 429600 + }, + { + "epoch": 1.85, + "learning_rate": 2.7217387339265477e-05, + "loss": 1.5102, + "step": 429700 + }, + { + "epoch": 1.85, + "learning_rate": 2.7211958952712242e-05, + "loss": 1.5258, + "step": 429800 + }, + { + "epoch": 1.85, + "learning_rate": 2.7206530566159004e-05, + "loss": 1.5251, + "step": 429900 + }, + { + "epoch": 1.85, + "learning_rate": 2.720110217960577e-05, + "loss": 1.5062, + "step": 430000 + }, + { + "epoch": 1.85, + "eval_loss": 1.4390578269958496, + "eval_runtime": 17.787, + "eval_samples_per_second": 562.209, + "eval_steps_per_second": 17.597, + "step": 430000 + }, + { + "epoch": 1.85, + "learning_rate": 2.7195673793052534e-05, + "loss": 1.5278, + "step": 430100 + }, + { + "epoch": 1.85, + "learning_rate": 2.71902454064993e-05, + "loss": 1.5421, + "step": 430200 + }, + { + "epoch": 1.85, + "learning_rate": 2.718481701994606e-05, + "loss": 1.5117, + "step": 430300 + }, + { + "epoch": 1.85, + "learning_rate": 2.7179388633392827e-05, + "loss": 1.5179, + "step": 430400 + }, + { + "epoch": 1.85, + "learning_rate": 2.71739602468396e-05, + "loss": 1.5315, + "step": 430500 + }, + { + "epoch": 1.85, + "learning_rate": 2.716853186028636e-05, + "loss": 1.5208, + "step": 430600 + }, + { + "epoch": 1.85, + "learning_rate": 2.7163103473733126e-05, + "loss": 1.5164, + "step": 430700 + }, + { + "epoch": 1.85, + "learning_rate": 2.715767508717989e-05, + "loss": 1.5265, + "step": 430800 + }, + { + "epoch": 1.85, + "learning_rate": 2.7152246700626653e-05, + "loss": 1.5322, + "step": 430900 + }, + { + "epoch": 1.85, + "learning_rate": 2.7146818314073418e-05, + "loss": 1.5207, + "step": 431000 + }, + { + "epoch": 1.85, + "learning_rate": 2.7141389927520184e-05, + "loss": 1.5297, + "step": 431100 + }, + { + "epoch": 1.85, + "learning_rate": 2.713596154096695e-05, + "loss": 1.5421, + "step": 431200 + }, + { + "epoch": 1.85, + "learning_rate": 2.713053315441371e-05, + "loss": 1.5178, + "step": 431300 + }, + { + "epoch": 1.85, + "learning_rate": 2.7125104767860476e-05, + "loss": 1.5537, + "step": 431400 + }, + { + "epoch": 1.85, + "learning_rate": 2.7119676381307248e-05, + "loss": 1.5231, + "step": 431500 + }, + { + "epoch": 1.85, + "learning_rate": 2.711424799475401e-05, + "loss": 1.5325, + "step": 431600 + }, + { + "epoch": 1.85, + "learning_rate": 2.7108819608200775e-05, + "loss": 1.5152, + "step": 431700 + }, + { + "epoch": 1.86, + "learning_rate": 2.710339122164754e-05, + "loss": 1.5053, + "step": 431800 + }, + { + "epoch": 1.86, + "learning_rate": 2.7097962835094302e-05, + "loss": 1.5255, + "step": 431900 + }, + { + "epoch": 1.86, + "learning_rate": 2.7092534448541068e-05, + "loss": 1.5174, + "step": 432000 + }, + { + "epoch": 1.86, + "learning_rate": 2.7087106061987833e-05, + "loss": 1.5404, + "step": 432100 + }, + { + "epoch": 1.86, + "learning_rate": 2.7081677675434598e-05, + "loss": 1.5016, + "step": 432200 + }, + { + "epoch": 1.86, + "learning_rate": 2.707624928888136e-05, + "loss": 1.5244, + "step": 432300 + }, + { + "epoch": 1.86, + "learning_rate": 2.7070820902328125e-05, + "loss": 1.5198, + "step": 432400 + }, + { + "epoch": 1.86, + "learning_rate": 2.706539251577489e-05, + "loss": 1.5196, + "step": 432500 + }, + { + "epoch": 1.86, + "learning_rate": 2.705996412922166e-05, + "loss": 1.5185, + "step": 432600 + }, + { + "epoch": 1.86, + "learning_rate": 2.7054535742668424e-05, + "loss": 1.5117, + "step": 432700 + }, + { + "epoch": 1.86, + "learning_rate": 2.704910735611519e-05, + "loss": 1.5151, + "step": 432800 + }, + { + "epoch": 1.86, + "learning_rate": 2.704367896956195e-05, + "loss": 1.5069, + "step": 432900 + }, + { + "epoch": 1.86, + "learning_rate": 2.7038250583008717e-05, + "loss": 1.4896, + "step": 433000 + }, + { + "epoch": 1.86, + "learning_rate": 2.7032822196455482e-05, + "loss": 1.5276, + "step": 433100 + }, + { + "epoch": 1.86, + "learning_rate": 2.7027393809902247e-05, + "loss": 1.5253, + "step": 433200 + }, + { + "epoch": 1.86, + "learning_rate": 2.702196542334901e-05, + "loss": 1.5171, + "step": 433300 + }, + { + "epoch": 1.86, + "learning_rate": 2.7016537036795774e-05, + "loss": 1.5322, + "step": 433400 + }, + { + "epoch": 1.86, + "learning_rate": 2.701110865024254e-05, + "loss": 1.53, + "step": 433500 + }, + { + "epoch": 1.86, + "learning_rate": 2.700568026368931e-05, + "loss": 1.5743, + "step": 433600 + }, + { + "epoch": 1.86, + "learning_rate": 2.7000251877136074e-05, + "loss": 1.551, + "step": 433700 + }, + { + "epoch": 1.86, + "learning_rate": 2.699482349058284e-05, + "loss": 1.5115, + "step": 433800 + }, + { + "epoch": 1.86, + "learning_rate": 2.69893951040296e-05, + "loss": 1.5231, + "step": 433900 + }, + { + "epoch": 1.86, + "learning_rate": 2.6983966717476366e-05, + "loss": 1.5368, + "step": 434000 + }, + { + "epoch": 1.86, + "learning_rate": 2.697853833092313e-05, + "loss": 1.5166, + "step": 434100 + }, + { + "epoch": 1.87, + "learning_rate": 2.6973109944369897e-05, + "loss": 1.4913, + "step": 434200 + }, + { + "epoch": 1.87, + "learning_rate": 2.696768155781666e-05, + "loss": 1.5306, + "step": 434300 + }, + { + "epoch": 1.87, + "learning_rate": 2.6962253171263424e-05, + "loss": 1.5197, + "step": 434400 + }, + { + "epoch": 1.87, + "learning_rate": 2.695682478471019e-05, + "loss": 1.5224, + "step": 434500 + }, + { + "epoch": 1.87, + "learning_rate": 2.6951396398156954e-05, + "loss": 1.55, + "step": 434600 + }, + { + "epoch": 1.87, + "learning_rate": 2.6945968011603723e-05, + "loss": 1.5211, + "step": 434700 + }, + { + "epoch": 1.87, + "learning_rate": 2.6940539625050488e-05, + "loss": 1.521, + "step": 434800 + }, + { + "epoch": 1.87, + "learning_rate": 2.693511123849725e-05, + "loss": 1.5298, + "step": 434900 + }, + { + "epoch": 1.87, + "learning_rate": 2.6929682851944015e-05, + "loss": 1.5378, + "step": 435000 + }, + { + "epoch": 1.87, + "eval_loss": 1.4357421398162842, + "eval_runtime": 17.828, + "eval_samples_per_second": 560.916, + "eval_steps_per_second": 17.557, + "step": 435000 + }, + { + "epoch": 1.87, + "learning_rate": 2.692425446539078e-05, + "loss": 1.5364, + "step": 435100 + }, + { + "epoch": 1.87, + "learning_rate": 2.6918826078837546e-05, + "loss": 1.5312, + "step": 435200 + }, + { + "epoch": 1.87, + "learning_rate": 2.6913397692284308e-05, + "loss": 1.5099, + "step": 435300 + }, + { + "epoch": 1.87, + "learning_rate": 2.6907969305731073e-05, + "loss": 1.5087, + "step": 435400 + }, + { + "epoch": 1.87, + "learning_rate": 2.6902540919177838e-05, + "loss": 1.5656, + "step": 435500 + }, + { + "epoch": 1.87, + "learning_rate": 2.6897112532624603e-05, + "loss": 1.5303, + "step": 435600 + }, + { + "epoch": 1.87, + "learning_rate": 2.6891684146071372e-05, + "loss": 1.5082, + "step": 435700 + }, + { + "epoch": 1.87, + "learning_rate": 2.6886255759518137e-05, + "loss": 1.5383, + "step": 435800 + }, + { + "epoch": 1.87, + "learning_rate": 2.68808273729649e-05, + "loss": 1.5201, + "step": 435900 + }, + { + "epoch": 1.87, + "learning_rate": 2.6875398986411665e-05, + "loss": 1.5027, + "step": 436000 + }, + { + "epoch": 1.87, + "learning_rate": 2.686997059985843e-05, + "loss": 1.5225, + "step": 436100 + }, + { + "epoch": 1.87, + "learning_rate": 2.6864542213305195e-05, + "loss": 1.4969, + "step": 436200 + }, + { + "epoch": 1.87, + "learning_rate": 2.6859113826751957e-05, + "loss": 1.5574, + "step": 436300 + }, + { + "epoch": 1.87, + "learning_rate": 2.6853685440198722e-05, + "loss": 1.5404, + "step": 436400 + }, + { + "epoch": 1.88, + "learning_rate": 2.6848257053645487e-05, + "loss": 1.5161, + "step": 436500 + }, + { + "epoch": 1.88, + "learning_rate": 2.6842828667092253e-05, + "loss": 1.5114, + "step": 436600 + }, + { + "epoch": 1.88, + "learning_rate": 2.6837400280539015e-05, + "loss": 1.514, + "step": 436700 + }, + { + "epoch": 1.88, + "learning_rate": 2.6831971893985787e-05, + "loss": 1.5196, + "step": 436800 + }, + { + "epoch": 1.88, + "learning_rate": 2.682654350743255e-05, + "loss": 1.5169, + "step": 436900 + }, + { + "epoch": 1.88, + "learning_rate": 2.6821115120879314e-05, + "loss": 1.513, + "step": 437000 + }, + { + "epoch": 1.88, + "learning_rate": 2.681568673432608e-05, + "loss": 1.5279, + "step": 437100 + }, + { + "epoch": 1.88, + "learning_rate": 2.6810258347772844e-05, + "loss": 1.5097, + "step": 437200 + }, + { + "epoch": 1.88, + "learning_rate": 2.6804829961219606e-05, + "loss": 1.522, + "step": 437300 + }, + { + "epoch": 1.88, + "learning_rate": 2.679940157466637e-05, + "loss": 1.5354, + "step": 437400 + }, + { + "epoch": 1.88, + "learning_rate": 2.6793973188113137e-05, + "loss": 1.5158, + "step": 437500 + }, + { + "epoch": 1.88, + "learning_rate": 2.6788544801559902e-05, + "loss": 1.5274, + "step": 437600 + }, + { + "epoch": 1.88, + "learning_rate": 2.6783116415006664e-05, + "loss": 1.5224, + "step": 437700 + }, + { + "epoch": 1.88, + "learning_rate": 2.6777688028453436e-05, + "loss": 1.5052, + "step": 437800 + }, + { + "epoch": 1.88, + "learning_rate": 2.6772259641900198e-05, + "loss": 1.5173, + "step": 437900 + }, + { + "epoch": 1.88, + "learning_rate": 2.6766831255346963e-05, + "loss": 1.4881, + "step": 438000 + }, + { + "epoch": 1.88, + "learning_rate": 2.6761402868793728e-05, + "loss": 1.514, + "step": 438100 + }, + { + "epoch": 1.88, + "learning_rate": 2.6755974482240494e-05, + "loss": 1.5351, + "step": 438200 + }, + { + "epoch": 1.88, + "learning_rate": 2.6750546095687255e-05, + "loss": 1.5091, + "step": 438300 + }, + { + "epoch": 1.88, + "learning_rate": 2.674511770913402e-05, + "loss": 1.5445, + "step": 438400 + }, + { + "epoch": 1.88, + "learning_rate": 2.6739689322580786e-05, + "loss": 1.5062, + "step": 438500 + }, + { + "epoch": 1.88, + "learning_rate": 2.673426093602755e-05, + "loss": 1.558, + "step": 438600 + }, + { + "epoch": 1.88, + "learning_rate": 2.6728832549474313e-05, + "loss": 1.5366, + "step": 438700 + }, + { + "epoch": 1.89, + "learning_rate": 2.672340416292108e-05, + "loss": 1.5452, + "step": 438800 + }, + { + "epoch": 1.89, + "learning_rate": 2.6717975776367847e-05, + "loss": 1.5135, + "step": 438900 + }, + { + "epoch": 1.89, + "learning_rate": 2.6712547389814612e-05, + "loss": 1.5453, + "step": 439000 + }, + { + "epoch": 1.89, + "learning_rate": 2.6707119003261377e-05, + "loss": 1.5165, + "step": 439100 + }, + { + "epoch": 1.89, + "learning_rate": 2.6701690616708143e-05, + "loss": 1.5297, + "step": 439200 + }, + { + "epoch": 1.89, + "learning_rate": 2.6696262230154905e-05, + "loss": 1.5236, + "step": 439300 + }, + { + "epoch": 1.89, + "learning_rate": 2.669083384360167e-05, + "loss": 1.5378, + "step": 439400 + }, + { + "epoch": 1.89, + "learning_rate": 2.6685405457048435e-05, + "loss": 1.5295, + "step": 439500 + }, + { + "epoch": 1.89, + "learning_rate": 2.66799770704952e-05, + "loss": 1.5415, + "step": 439600 + }, + { + "epoch": 1.89, + "learning_rate": 2.6674548683941962e-05, + "loss": 1.5155, + "step": 439700 + }, + { + "epoch": 1.89, + "learning_rate": 2.6669120297388728e-05, + "loss": 1.5277, + "step": 439800 + }, + { + "epoch": 1.89, + "learning_rate": 2.6663691910835496e-05, + "loss": 1.535, + "step": 439900 + }, + { + "epoch": 1.89, + "learning_rate": 2.665826352428226e-05, + "loss": 1.5224, + "step": 440000 + }, + { + "epoch": 1.89, + "eval_loss": 1.4352110624313354, + "eval_runtime": 17.8498, + "eval_samples_per_second": 560.232, + "eval_steps_per_second": 17.535, + "step": 440000 + }, + { + "epoch": 1.89, + "learning_rate": 2.6652835137729027e-05, + "loss": 1.5307, + "step": 440100 + }, + { + "epoch": 1.89, + "learning_rate": 2.6647406751175792e-05, + "loss": 1.5424, + "step": 440200 + }, + { + "epoch": 1.89, + "learning_rate": 2.6641978364622554e-05, + "loss": 1.5518, + "step": 440300 + }, + { + "epoch": 1.89, + "learning_rate": 2.663654997806932e-05, + "loss": 1.5074, + "step": 440400 + }, + { + "epoch": 1.89, + "learning_rate": 2.6631121591516084e-05, + "loss": 1.5107, + "step": 440500 + }, + { + "epoch": 1.89, + "learning_rate": 2.662569320496285e-05, + "loss": 1.5267, + "step": 440600 + }, + { + "epoch": 1.89, + "learning_rate": 2.662026481840961e-05, + "loss": 1.5299, + "step": 440700 + }, + { + "epoch": 1.89, + "learning_rate": 2.6614836431856377e-05, + "loss": 1.5614, + "step": 440800 + }, + { + "epoch": 1.89, + "learning_rate": 2.6609408045303142e-05, + "loss": 1.5349, + "step": 440900 + }, + { + "epoch": 1.89, + "learning_rate": 2.660397965874991e-05, + "loss": 1.5416, + "step": 441000 + }, + { + "epoch": 1.89, + "learning_rate": 2.6598551272196676e-05, + "loss": 1.5203, + "step": 441100 + }, + { + "epoch": 1.9, + "learning_rate": 2.659312288564344e-05, + "loss": 1.5251, + "step": 441200 + }, + { + "epoch": 1.9, + "learning_rate": 2.6587694499090203e-05, + "loss": 1.5086, + "step": 441300 + }, + { + "epoch": 1.9, + "learning_rate": 2.658226611253697e-05, + "loss": 1.5369, + "step": 441400 + }, + { + "epoch": 1.9, + "learning_rate": 2.6576837725983734e-05, + "loss": 1.5259, + "step": 441500 + }, + { + "epoch": 1.9, + "learning_rate": 2.65714093394305e-05, + "loss": 1.5062, + "step": 441600 + }, + { + "epoch": 1.9, + "learning_rate": 2.656598095287726e-05, + "loss": 1.4988, + "step": 441700 + }, + { + "epoch": 1.9, + "learning_rate": 2.6560552566324026e-05, + "loss": 1.5246, + "step": 441800 + }, + { + "epoch": 1.9, + "learning_rate": 2.655512417977079e-05, + "loss": 1.5177, + "step": 441900 + }, + { + "epoch": 1.9, + "learning_rate": 2.654969579321756e-05, + "loss": 1.5016, + "step": 442000 + }, + { + "epoch": 1.9, + "learning_rate": 2.6544267406664325e-05, + "loss": 1.5054, + "step": 442100 + }, + { + "epoch": 1.9, + "learning_rate": 2.653883902011109e-05, + "loss": 1.5185, + "step": 442200 + }, + { + "epoch": 1.9, + "learning_rate": 2.6533410633557852e-05, + "loss": 1.5031, + "step": 442300 + }, + { + "epoch": 1.9, + "learning_rate": 2.6527982247004618e-05, + "loss": 1.5084, + "step": 442400 + }, + { + "epoch": 1.9, + "learning_rate": 2.6522553860451383e-05, + "loss": 1.5021, + "step": 442500 + }, + { + "epoch": 1.9, + "learning_rate": 2.6517125473898148e-05, + "loss": 1.5188, + "step": 442600 + }, + { + "epoch": 1.9, + "learning_rate": 2.651169708734491e-05, + "loss": 1.5382, + "step": 442700 + }, + { + "epoch": 1.9, + "learning_rate": 2.6506268700791675e-05, + "loss": 1.5199, + "step": 442800 + }, + { + "epoch": 1.9, + "learning_rate": 2.650084031423844e-05, + "loss": 1.5069, + "step": 442900 + }, + { + "epoch": 1.9, + "learning_rate": 2.6495411927685206e-05, + "loss": 1.4994, + "step": 443000 + }, + { + "epoch": 1.9, + "learning_rate": 2.6489983541131974e-05, + "loss": 1.5141, + "step": 443100 + }, + { + "epoch": 1.9, + "learning_rate": 2.648455515457874e-05, + "loss": 1.5473, + "step": 443200 + }, + { + "epoch": 1.9, + "learning_rate": 2.64791267680255e-05, + "loss": 1.549, + "step": 443300 + }, + { + "epoch": 1.9, + "learning_rate": 2.6473698381472267e-05, + "loss": 1.5292, + "step": 443400 + }, + { + "epoch": 1.91, + "learning_rate": 2.6468269994919032e-05, + "loss": 1.535, + "step": 443500 + }, + { + "epoch": 1.91, + "learning_rate": 2.6462841608365797e-05, + "loss": 1.5304, + "step": 443600 + }, + { + "epoch": 1.91, + "learning_rate": 2.645741322181256e-05, + "loss": 1.5453, + "step": 443700 + }, + { + "epoch": 1.91, + "learning_rate": 2.6451984835259325e-05, + "loss": 1.5084, + "step": 443800 + }, + { + "epoch": 1.91, + "learning_rate": 2.644655644870609e-05, + "loss": 1.5249, + "step": 443900 + }, + { + "epoch": 1.91, + "learning_rate": 2.6441128062152855e-05, + "loss": 1.5258, + "step": 444000 + }, + { + "epoch": 1.91, + "learning_rate": 2.6435699675599624e-05, + "loss": 1.5305, + "step": 444100 + }, + { + "epoch": 1.91, + "learning_rate": 2.643027128904639e-05, + "loss": 1.5316, + "step": 444200 + }, + { + "epoch": 1.91, + "learning_rate": 2.642484290249315e-05, + "loss": 1.5522, + "step": 444300 + }, + { + "epoch": 1.91, + "learning_rate": 2.6419414515939916e-05, + "loss": 1.5034, + "step": 444400 + }, + { + "epoch": 1.91, + "learning_rate": 2.641398612938668e-05, + "loss": 1.532, + "step": 444500 + }, + { + "epoch": 1.91, + "learning_rate": 2.6408557742833447e-05, + "loss": 1.5135, + "step": 444600 + }, + { + "epoch": 1.91, + "learning_rate": 2.640312935628021e-05, + "loss": 1.501, + "step": 444700 + }, + { + "epoch": 1.91, + "learning_rate": 2.6397700969726974e-05, + "loss": 1.5275, + "step": 444800 + }, + { + "epoch": 1.91, + "learning_rate": 2.639227258317374e-05, + "loss": 1.5153, + "step": 444900 + }, + { + "epoch": 1.91, + "learning_rate": 2.6386844196620504e-05, + "loss": 1.5199, + "step": 445000 + }, + { + "epoch": 1.91, + "eval_loss": 1.4345873594284058, + "eval_runtime": 17.7853, + "eval_samples_per_second": 562.264, + "eval_steps_per_second": 17.599, + "step": 445000 + }, + { + "epoch": 1.91, + "learning_rate": 2.6381415810067273e-05, + "loss": 1.5318, + "step": 445100 + }, + { + "epoch": 1.91, + "learning_rate": 2.6375987423514038e-05, + "loss": 1.5213, + "step": 445200 + }, + { + "epoch": 1.91, + "learning_rate": 2.63705590369608e-05, + "loss": 1.5152, + "step": 445300 + }, + { + "epoch": 1.91, + "learning_rate": 2.6365130650407565e-05, + "loss": 1.5457, + "step": 445400 + }, + { + "epoch": 1.91, + "learning_rate": 2.635970226385433e-05, + "loss": 1.5134, + "step": 445500 + }, + { + "epoch": 1.91, + "learning_rate": 2.6354273877301096e-05, + "loss": 1.5224, + "step": 445600 + }, + { + "epoch": 1.91, + "learning_rate": 2.6348845490747858e-05, + "loss": 1.4994, + "step": 445700 + }, + { + "epoch": 1.92, + "learning_rate": 2.6343417104194623e-05, + "loss": 1.537, + "step": 445800 + }, + { + "epoch": 1.92, + "learning_rate": 2.6337988717641388e-05, + "loss": 1.5304, + "step": 445900 + }, + { + "epoch": 1.92, + "learning_rate": 2.6332560331088154e-05, + "loss": 1.5238, + "step": 446000 + }, + { + "epoch": 1.92, + "learning_rate": 2.6327131944534915e-05, + "loss": 1.5099, + "step": 446100 + }, + { + "epoch": 1.92, + "learning_rate": 2.6321703557981687e-05, + "loss": 1.5341, + "step": 446200 + }, + { + "epoch": 1.92, + "learning_rate": 2.631627517142845e-05, + "loss": 1.5001, + "step": 446300 + }, + { + "epoch": 1.92, + "learning_rate": 2.6310846784875215e-05, + "loss": 1.5191, + "step": 446400 + }, + { + "epoch": 1.92, + "learning_rate": 2.630541839832198e-05, + "loss": 1.5184, + "step": 446500 + }, + { + "epoch": 1.92, + "learning_rate": 2.6299990011768745e-05, + "loss": 1.5, + "step": 446600 + }, + { + "epoch": 1.92, + "learning_rate": 2.6294561625215507e-05, + "loss": 1.522, + "step": 446700 + }, + { + "epoch": 1.92, + "learning_rate": 2.6289133238662272e-05, + "loss": 1.5686, + "step": 446800 + }, + { + "epoch": 1.92, + "learning_rate": 2.6283704852109038e-05, + "loss": 1.5029, + "step": 446900 + }, + { + "epoch": 1.92, + "learning_rate": 2.6278276465555803e-05, + "loss": 1.5285, + "step": 447000 + }, + { + "epoch": 1.92, + "learning_rate": 2.6272848079002565e-05, + "loss": 1.5039, + "step": 447100 + }, + { + "epoch": 1.92, + "learning_rate": 2.6267419692449337e-05, + "loss": 1.5305, + "step": 447200 + }, + { + "epoch": 1.92, + "learning_rate": 2.62619913058961e-05, + "loss": 1.5144, + "step": 447300 + }, + { + "epoch": 1.92, + "learning_rate": 2.6256562919342864e-05, + "loss": 1.5168, + "step": 447400 + }, + { + "epoch": 1.92, + "learning_rate": 2.625113453278963e-05, + "loss": 1.5162, + "step": 447500 + }, + { + "epoch": 1.92, + "learning_rate": 2.6245706146236394e-05, + "loss": 1.5354, + "step": 447600 + }, + { + "epoch": 1.92, + "learning_rate": 2.6240277759683156e-05, + "loss": 1.5558, + "step": 447700 + }, + { + "epoch": 1.92, + "learning_rate": 2.623484937312992e-05, + "loss": 1.491, + "step": 447800 + }, + { + "epoch": 1.92, + "learning_rate": 2.6229420986576687e-05, + "loss": 1.5375, + "step": 447900 + }, + { + "epoch": 1.92, + "learning_rate": 2.6223992600023452e-05, + "loss": 1.5197, + "step": 448000 + }, + { + "epoch": 1.93, + "learning_rate": 2.6218564213470214e-05, + "loss": 1.5252, + "step": 448100 + }, + { + "epoch": 1.93, + "learning_rate": 2.621313582691698e-05, + "loss": 1.4979, + "step": 448200 + }, + { + "epoch": 1.93, + "learning_rate": 2.6207707440363748e-05, + "loss": 1.5203, + "step": 448300 + }, + { + "epoch": 1.93, + "learning_rate": 2.6202279053810513e-05, + "loss": 1.4894, + "step": 448400 + }, + { + "epoch": 1.93, + "learning_rate": 2.619685066725728e-05, + "loss": 1.5117, + "step": 448500 + }, + { + "epoch": 1.93, + "learning_rate": 2.6191422280704044e-05, + "loss": 1.5073, + "step": 448600 + }, + { + "epoch": 1.93, + "learning_rate": 2.6185993894150805e-05, + "loss": 1.5084, + "step": 448700 + }, + { + "epoch": 1.93, + "learning_rate": 2.618056550759757e-05, + "loss": 1.5031, + "step": 448800 + }, + { + "epoch": 1.93, + "learning_rate": 2.6175137121044336e-05, + "loss": 1.5309, + "step": 448900 + }, + { + "epoch": 1.93, + "learning_rate": 2.61697087344911e-05, + "loss": 1.5146, + "step": 449000 + }, + { + "epoch": 1.93, + "learning_rate": 2.6164280347937863e-05, + "loss": 1.5523, + "step": 449100 + }, + { + "epoch": 1.93, + "learning_rate": 2.615885196138463e-05, + "loss": 1.54, + "step": 449200 + }, + { + "epoch": 1.93, + "learning_rate": 2.6153423574831397e-05, + "loss": 1.5049, + "step": 449300 + }, + { + "epoch": 1.93, + "learning_rate": 2.6147995188278162e-05, + "loss": 1.531, + "step": 449400 + }, + { + "epoch": 1.93, + "learning_rate": 2.6142566801724928e-05, + "loss": 1.4931, + "step": 449500 + }, + { + "epoch": 1.93, + "learning_rate": 2.6137138415171693e-05, + "loss": 1.4995, + "step": 449600 + }, + { + "epoch": 1.93, + "learning_rate": 2.6131710028618455e-05, + "loss": 1.5199, + "step": 449700 + }, + { + "epoch": 1.93, + "learning_rate": 2.612628164206522e-05, + "loss": 1.4933, + "step": 449800 + }, + { + "epoch": 1.93, + "learning_rate": 2.6120853255511985e-05, + "loss": 1.5186, + "step": 449900 + }, + { + "epoch": 1.93, + "learning_rate": 2.611542486895875e-05, + "loss": 1.5041, + "step": 450000 + }, + { + "epoch": 1.93, + "eval_loss": 1.4356664419174194, + "eval_runtime": 17.7962, + "eval_samples_per_second": 561.918, + "eval_steps_per_second": 17.588, + "step": 450000 + }, + { + "epoch": 1.93, + "learning_rate": 2.6109996482405512e-05, + "loss": 1.487, + "step": 450100 + }, + { + "epoch": 1.93, + "learning_rate": 2.6104568095852278e-05, + "loss": 1.5048, + "step": 450200 + }, + { + "epoch": 1.93, + "learning_rate": 2.6099139709299043e-05, + "loss": 1.5171, + "step": 450300 + }, + { + "epoch": 1.93, + "learning_rate": 2.609371132274581e-05, + "loss": 1.5121, + "step": 450400 + }, + { + "epoch": 1.94, + "learning_rate": 2.6088282936192577e-05, + "loss": 1.528, + "step": 450500 + }, + { + "epoch": 1.94, + "learning_rate": 2.6082854549639342e-05, + "loss": 1.5088, + "step": 450600 + }, + { + "epoch": 1.94, + "learning_rate": 2.6077426163086104e-05, + "loss": 1.5042, + "step": 450700 + }, + { + "epoch": 1.94, + "learning_rate": 2.607199777653287e-05, + "loss": 1.5142, + "step": 450800 + }, + { + "epoch": 1.94, + "learning_rate": 2.6066569389979635e-05, + "loss": 1.532, + "step": 450900 + }, + { + "epoch": 1.94, + "learning_rate": 2.6061141003426396e-05, + "loss": 1.5488, + "step": 451000 + }, + { + "epoch": 1.94, + "learning_rate": 2.605571261687316e-05, + "loss": 1.5321, + "step": 451100 + }, + { + "epoch": 1.94, + "learning_rate": 2.6050284230319927e-05, + "loss": 1.521, + "step": 451200 + }, + { + "epoch": 1.94, + "learning_rate": 2.6044855843766692e-05, + "loss": 1.5191, + "step": 451300 + }, + { + "epoch": 1.94, + "learning_rate": 2.603942745721346e-05, + "loss": 1.501, + "step": 451400 + }, + { + "epoch": 1.94, + "learning_rate": 2.6033999070660226e-05, + "loss": 1.5219, + "step": 451500 + }, + { + "epoch": 1.94, + "learning_rate": 2.602857068410699e-05, + "loss": 1.5292, + "step": 451600 + }, + { + "epoch": 1.94, + "learning_rate": 2.6023142297553753e-05, + "loss": 1.5018, + "step": 451700 + }, + { + "epoch": 1.94, + "learning_rate": 2.601771391100052e-05, + "loss": 1.55, + "step": 451800 + }, + { + "epoch": 1.94, + "learning_rate": 2.6012285524447284e-05, + "loss": 1.511, + "step": 451900 + }, + { + "epoch": 1.94, + "learning_rate": 2.6006857137894046e-05, + "loss": 1.5349, + "step": 452000 + }, + { + "epoch": 1.94, + "learning_rate": 2.600142875134081e-05, + "loss": 1.5489, + "step": 452100 + }, + { + "epoch": 1.94, + "learning_rate": 2.5996000364787576e-05, + "loss": 1.5337, + "step": 452200 + }, + { + "epoch": 1.94, + "learning_rate": 2.599057197823434e-05, + "loss": 1.5176, + "step": 452300 + }, + { + "epoch": 1.94, + "learning_rate": 2.5985143591681103e-05, + "loss": 1.4814, + "step": 452400 + }, + { + "epoch": 1.94, + "learning_rate": 2.5979715205127875e-05, + "loss": 1.5198, + "step": 452500 + }, + { + "epoch": 1.94, + "learning_rate": 2.597428681857464e-05, + "loss": 1.4852, + "step": 452600 + }, + { + "epoch": 1.94, + "learning_rate": 2.5968858432021402e-05, + "loss": 1.5318, + "step": 452700 + }, + { + "epoch": 1.95, + "learning_rate": 2.5963430045468168e-05, + "loss": 1.5186, + "step": 452800 + }, + { + "epoch": 1.95, + "learning_rate": 2.5958001658914933e-05, + "loss": 1.5258, + "step": 452900 + }, + { + "epoch": 1.95, + "learning_rate": 2.5952573272361695e-05, + "loss": 1.5404, + "step": 453000 + }, + { + "epoch": 1.95, + "learning_rate": 2.594714488580846e-05, + "loss": 1.4918, + "step": 453100 + }, + { + "epoch": 1.95, + "learning_rate": 2.5941716499255225e-05, + "loss": 1.5234, + "step": 453200 + }, + { + "epoch": 1.95, + "learning_rate": 2.593628811270199e-05, + "loss": 1.5085, + "step": 453300 + }, + { + "epoch": 1.95, + "learning_rate": 2.5930859726148753e-05, + "loss": 1.5206, + "step": 453400 + }, + { + "epoch": 1.95, + "learning_rate": 2.5925431339595525e-05, + "loss": 1.5355, + "step": 453500 + }, + { + "epoch": 1.95, + "learning_rate": 2.592000295304229e-05, + "loss": 1.5291, + "step": 453600 + }, + { + "epoch": 1.95, + "learning_rate": 2.5914574566489052e-05, + "loss": 1.5067, + "step": 453700 + }, + { + "epoch": 1.95, + "learning_rate": 2.5909146179935817e-05, + "loss": 1.5327, + "step": 453800 + }, + { + "epoch": 1.95, + "learning_rate": 2.5903717793382582e-05, + "loss": 1.5209, + "step": 453900 + }, + { + "epoch": 1.95, + "learning_rate": 2.5898289406829344e-05, + "loss": 1.4975, + "step": 454000 + }, + { + "epoch": 1.95, + "learning_rate": 2.589286102027611e-05, + "loss": 1.526, + "step": 454100 + }, + { + "epoch": 1.95, + "learning_rate": 2.5887432633722875e-05, + "loss": 1.5319, + "step": 454200 + }, + { + "epoch": 1.95, + "learning_rate": 2.588200424716964e-05, + "loss": 1.5306, + "step": 454300 + }, + { + "epoch": 1.95, + "learning_rate": 2.5876575860616402e-05, + "loss": 1.5138, + "step": 454400 + }, + { + "epoch": 1.95, + "learning_rate": 2.5871147474063167e-05, + "loss": 1.5177, + "step": 454500 + }, + { + "epoch": 1.95, + "learning_rate": 2.586571908750994e-05, + "loss": 1.487, + "step": 454600 + }, + { + "epoch": 1.95, + "learning_rate": 2.58602907009567e-05, + "loss": 1.5251, + "step": 454700 + }, + { + "epoch": 1.95, + "learning_rate": 2.5854862314403466e-05, + "loss": 1.5141, + "step": 454800 + }, + { + "epoch": 1.95, + "learning_rate": 2.584943392785023e-05, + "loss": 1.5246, + "step": 454900 + }, + { + "epoch": 1.95, + "learning_rate": 2.5844005541296993e-05, + "loss": 1.4794, + "step": 455000 + }, + { + "epoch": 1.95, + "eval_loss": 1.437150239944458, + "eval_runtime": 17.7517, + "eval_samples_per_second": 563.327, + "eval_steps_per_second": 17.632, + "step": 455000 + }, + { + "epoch": 1.96, + "learning_rate": 2.583857715474376e-05, + "loss": 1.5201, + "step": 455100 + }, + { + "epoch": 1.96, + "learning_rate": 2.5833148768190524e-05, + "loss": 1.4924, + "step": 455200 + }, + { + "epoch": 1.96, + "learning_rate": 2.582772038163729e-05, + "loss": 1.5248, + "step": 455300 + }, + { + "epoch": 1.96, + "learning_rate": 2.582229199508405e-05, + "loss": 1.5092, + "step": 455400 + }, + { + "epoch": 1.96, + "learning_rate": 2.5816863608530816e-05, + "loss": 1.521, + "step": 455500 + }, + { + "epoch": 1.96, + "learning_rate": 2.581143522197759e-05, + "loss": 1.5166, + "step": 455600 + }, + { + "epoch": 1.96, + "learning_rate": 2.580600683542435e-05, + "loss": 1.5099, + "step": 455700 + }, + { + "epoch": 1.96, + "learning_rate": 2.5800578448871115e-05, + "loss": 1.5127, + "step": 455800 + }, + { + "epoch": 1.96, + "learning_rate": 2.579515006231788e-05, + "loss": 1.5218, + "step": 455900 + }, + { + "epoch": 1.96, + "learning_rate": 2.5789721675764643e-05, + "loss": 1.527, + "step": 456000 + }, + { + "epoch": 1.96, + "learning_rate": 2.5784293289211408e-05, + "loss": 1.532, + "step": 456100 + }, + { + "epoch": 1.96, + "learning_rate": 2.5778864902658173e-05, + "loss": 1.5171, + "step": 456200 + }, + { + "epoch": 1.96, + "learning_rate": 2.577343651610494e-05, + "loss": 1.5231, + "step": 456300 + }, + { + "epoch": 1.96, + "learning_rate": 2.57680081295517e-05, + "loss": 1.5034, + "step": 456400 + }, + { + "epoch": 1.96, + "learning_rate": 2.5762579742998466e-05, + "loss": 1.5194, + "step": 456500 + }, + { + "epoch": 1.96, + "learning_rate": 2.575715135644523e-05, + "loss": 1.5078, + "step": 456600 + }, + { + "epoch": 1.96, + "learning_rate": 2.5751722969892e-05, + "loss": 1.5181, + "step": 456700 + }, + { + "epoch": 1.96, + "learning_rate": 2.5746294583338765e-05, + "loss": 1.5411, + "step": 456800 + }, + { + "epoch": 1.96, + "learning_rate": 2.574086619678553e-05, + "loss": 1.5068, + "step": 456900 + }, + { + "epoch": 1.96, + "learning_rate": 2.5735437810232292e-05, + "loss": 1.5259, + "step": 457000 + }, + { + "epoch": 1.96, + "learning_rate": 2.5730009423679057e-05, + "loss": 1.5365, + "step": 457100 + }, + { + "epoch": 1.96, + "learning_rate": 2.5724581037125822e-05, + "loss": 1.5247, + "step": 457200 + }, + { + "epoch": 1.96, + "learning_rate": 2.5719152650572588e-05, + "loss": 1.5386, + "step": 457300 + }, + { + "epoch": 1.97, + "learning_rate": 2.571372426401935e-05, + "loss": 1.4939, + "step": 457400 + }, + { + "epoch": 1.97, + "learning_rate": 2.5708295877466115e-05, + "loss": 1.5226, + "step": 457500 + }, + { + "epoch": 1.97, + "learning_rate": 2.570286749091288e-05, + "loss": 1.5273, + "step": 457600 + }, + { + "epoch": 1.97, + "learning_rate": 2.569743910435965e-05, + "loss": 1.5175, + "step": 457700 + }, + { + "epoch": 1.97, + "learning_rate": 2.5692010717806414e-05, + "loss": 1.5396, + "step": 457800 + }, + { + "epoch": 1.97, + "learning_rate": 2.568658233125318e-05, + "loss": 1.5206, + "step": 457900 + }, + { + "epoch": 1.97, + "learning_rate": 2.568115394469994e-05, + "loss": 1.4951, + "step": 458000 + }, + { + "epoch": 1.97, + "learning_rate": 2.5675725558146706e-05, + "loss": 1.5071, + "step": 458100 + }, + { + "epoch": 1.97, + "learning_rate": 2.567029717159347e-05, + "loss": 1.5149, + "step": 458200 + }, + { + "epoch": 1.97, + "learning_rate": 2.5664868785040237e-05, + "loss": 1.5149, + "step": 458300 + }, + { + "epoch": 1.97, + "learning_rate": 2.5659440398487e-05, + "loss": 1.4868, + "step": 458400 + }, + { + "epoch": 1.97, + "learning_rate": 2.5654012011933764e-05, + "loss": 1.5077, + "step": 458500 + }, + { + "epoch": 1.97, + "learning_rate": 2.564858362538053e-05, + "loss": 1.5018, + "step": 458600 + }, + { + "epoch": 1.97, + "learning_rate": 2.5643155238827295e-05, + "loss": 1.5123, + "step": 458700 + }, + { + "epoch": 1.97, + "learning_rate": 2.5637726852274063e-05, + "loss": 1.4973, + "step": 458800 + }, + { + "epoch": 1.97, + "learning_rate": 2.563229846572083e-05, + "loss": 1.4967, + "step": 458900 + }, + { + "epoch": 1.97, + "learning_rate": 2.562687007916759e-05, + "loss": 1.516, + "step": 459000 + }, + { + "epoch": 1.97, + "learning_rate": 2.5621441692614356e-05, + "loss": 1.5316, + "step": 459100 + }, + { + "epoch": 1.97, + "learning_rate": 2.561601330606112e-05, + "loss": 1.5445, + "step": 459200 + }, + { + "epoch": 1.97, + "learning_rate": 2.5610584919507886e-05, + "loss": 1.5118, + "step": 459300 + }, + { + "epoch": 1.97, + "learning_rate": 2.5605156532954648e-05, + "loss": 1.5051, + "step": 459400 + }, + { + "epoch": 1.97, + "learning_rate": 2.5599728146401413e-05, + "loss": 1.5285, + "step": 459500 + }, + { + "epoch": 1.97, + "learning_rate": 2.559429975984818e-05, + "loss": 1.5028, + "step": 459600 + }, + { + "epoch": 1.97, + "learning_rate": 2.5588871373294944e-05, + "loss": 1.5188, + "step": 459700 + }, + { + "epoch": 1.98, + "learning_rate": 2.5583442986741712e-05, + "loss": 1.5244, + "step": 459800 + }, + { + "epoch": 1.98, + "learning_rate": 2.5578014600188478e-05, + "loss": 1.5232, + "step": 459900 + }, + { + "epoch": 1.98, + "learning_rate": 2.557258621363524e-05, + "loss": 1.5254, + "step": 460000 + }, + { + "epoch": 1.98, + "eval_loss": 1.4372631311416626, + "eval_runtime": 17.8072, + "eval_samples_per_second": 561.57, + "eval_steps_per_second": 17.577, + "step": 460000 + }, + { + "epoch": 1.98, + "learning_rate": 2.5567157827082005e-05, + "loss": 1.5217, + "step": 460100 + }, + { + "epoch": 1.98, + "learning_rate": 2.556172944052877e-05, + "loss": 1.5216, + "step": 460200 + }, + { + "epoch": 1.98, + "learning_rate": 2.5556301053975535e-05, + "loss": 1.524, + "step": 460300 + }, + { + "epoch": 1.98, + "learning_rate": 2.5550872667422297e-05, + "loss": 1.5024, + "step": 460400 + }, + { + "epoch": 1.98, + "learning_rate": 2.5545444280869063e-05, + "loss": 1.5246, + "step": 460500 + }, + { + "epoch": 1.98, + "learning_rate": 2.5540015894315828e-05, + "loss": 1.5202, + "step": 460600 + }, + { + "epoch": 1.98, + "learning_rate": 2.5534587507762593e-05, + "loss": 1.5033, + "step": 460700 + }, + { + "epoch": 1.98, + "learning_rate": 2.5529159121209355e-05, + "loss": 1.5379, + "step": 460800 + }, + { + "epoch": 1.98, + "learning_rate": 2.5523730734656127e-05, + "loss": 1.5156, + "step": 460900 + }, + { + "epoch": 1.98, + "learning_rate": 2.551830234810289e-05, + "loss": 1.514, + "step": 461000 + }, + { + "epoch": 1.98, + "learning_rate": 2.5512873961549654e-05, + "loss": 1.4981, + "step": 461100 + }, + { + "epoch": 1.98, + "learning_rate": 2.550744557499642e-05, + "loss": 1.47, + "step": 461200 + }, + { + "epoch": 1.98, + "learning_rate": 2.5502017188443185e-05, + "loss": 1.5092, + "step": 461300 + }, + { + "epoch": 1.98, + "learning_rate": 2.5496588801889946e-05, + "loss": 1.5191, + "step": 461400 + }, + { + "epoch": 1.98, + "learning_rate": 2.5491160415336712e-05, + "loss": 1.504, + "step": 461500 + }, + { + "epoch": 1.98, + "learning_rate": 2.5485732028783477e-05, + "loss": 1.5084, + "step": 461600 + }, + { + "epoch": 1.98, + "learning_rate": 2.5480303642230242e-05, + "loss": 1.4905, + "step": 461700 + }, + { + "epoch": 1.98, + "learning_rate": 2.5474875255677004e-05, + "loss": 1.5079, + "step": 461800 + }, + { + "epoch": 1.98, + "learning_rate": 2.5469446869123776e-05, + "loss": 1.4988, + "step": 461900 + }, + { + "epoch": 1.98, + "learning_rate": 2.5464018482570538e-05, + "loss": 1.5224, + "step": 462000 + }, + { + "epoch": 1.99, + "learning_rate": 2.5458590096017303e-05, + "loss": 1.5292, + "step": 462100 + }, + { + "epoch": 1.99, + "learning_rate": 2.545316170946407e-05, + "loss": 1.5291, + "step": 462200 + }, + { + "epoch": 1.99, + "learning_rate": 2.5447733322910834e-05, + "loss": 1.5155, + "step": 462300 + }, + { + "epoch": 1.99, + "learning_rate": 2.5442304936357596e-05, + "loss": 1.5209, + "step": 462400 + }, + { + "epoch": 1.99, + "learning_rate": 2.543687654980436e-05, + "loss": 1.5217, + "step": 462500 + }, + { + "epoch": 1.99, + "learning_rate": 2.5431448163251126e-05, + "loss": 1.5291, + "step": 462600 + }, + { + "epoch": 1.99, + "learning_rate": 2.542601977669789e-05, + "loss": 1.5398, + "step": 462700 + }, + { + "epoch": 1.99, + "learning_rate": 2.5420591390144653e-05, + "loss": 1.5249, + "step": 462800 + }, + { + "epoch": 1.99, + "learning_rate": 2.541516300359142e-05, + "loss": 1.5094, + "step": 462900 + }, + { + "epoch": 1.99, + "learning_rate": 2.5409734617038187e-05, + "loss": 1.5231, + "step": 463000 + }, + { + "epoch": 1.99, + "learning_rate": 2.5404306230484953e-05, + "loss": 1.4996, + "step": 463100 + }, + { + "epoch": 1.99, + "learning_rate": 2.5398877843931718e-05, + "loss": 1.5071, + "step": 463200 + }, + { + "epoch": 1.99, + "learning_rate": 2.5393449457378483e-05, + "loss": 1.5225, + "step": 463300 + }, + { + "epoch": 1.99, + "learning_rate": 2.5388021070825245e-05, + "loss": 1.5091, + "step": 463400 + }, + { + "epoch": 1.99, + "learning_rate": 2.538259268427201e-05, + "loss": 1.5096, + "step": 463500 + }, + { + "epoch": 1.99, + "learning_rate": 2.5377164297718776e-05, + "loss": 1.5081, + "step": 463600 + }, + { + "epoch": 1.99, + "learning_rate": 2.537173591116554e-05, + "loss": 1.5027, + "step": 463700 + }, + { + "epoch": 1.99, + "learning_rate": 2.5366307524612303e-05, + "loss": 1.5129, + "step": 463800 + }, + { + "epoch": 1.99, + "learning_rate": 2.5360879138059068e-05, + "loss": 1.5128, + "step": 463900 + }, + { + "epoch": 1.99, + "learning_rate": 2.5355450751505837e-05, + "loss": 1.5264, + "step": 464000 + }, + { + "epoch": 1.99, + "learning_rate": 2.5350022364952602e-05, + "loss": 1.5181, + "step": 464100 + }, + { + "epoch": 1.99, + "learning_rate": 2.5344593978399367e-05, + "loss": 1.492, + "step": 464200 + }, + { + "epoch": 1.99, + "learning_rate": 2.5339165591846132e-05, + "loss": 1.5279, + "step": 464300 + }, + { + "epoch": 2.0, + "learning_rate": 2.5333737205292894e-05, + "loss": 1.5017, + "step": 464400 + }, + { + "epoch": 2.0, + "learning_rate": 2.532830881873966e-05, + "loss": 1.4949, + "step": 464500 + }, + { + "epoch": 2.0, + "learning_rate": 2.5322880432186425e-05, + "loss": 1.5386, + "step": 464600 + }, + { + "epoch": 2.0, + "learning_rate": 2.531745204563319e-05, + "loss": 1.501, + "step": 464700 + }, + { + "epoch": 2.0, + "learning_rate": 2.5312023659079952e-05, + "loss": 1.556, + "step": 464800 + }, + { + "epoch": 2.0, + "learning_rate": 2.5306595272526717e-05, + "loss": 1.5504, + "step": 464900 + }, + { + "epoch": 2.0, + "learning_rate": 2.5301166885973486e-05, + "loss": 1.5136, + "step": 465000 + }, + { + "epoch": 2.0, + "eval_loss": 1.4351954460144043, + "eval_runtime": 17.8126, + "eval_samples_per_second": 561.4, + "eval_steps_per_second": 17.572, + "step": 465000 + }, + { + "epoch": 2.0, + "learning_rate": 2.529573849942025e-05, + "loss": 1.537, + "step": 465100 + }, + { + "epoch": 2.0, + "learning_rate": 2.5290310112867016e-05, + "loss": 1.5129, + "step": 465200 + }, + { + "epoch": 2.0, + "learning_rate": 2.528488172631378e-05, + "loss": 1.533, + "step": 465300 + }, + { + "epoch": 2.0, + "learning_rate": 2.5279453339760543e-05, + "loss": 1.5411, + "step": 465400 + }, + { + "epoch": 2.0, + "learning_rate": 2.527402495320731e-05, + "loss": 1.5306, + "step": 465500 + }, + { + "epoch": 2.0, + "learning_rate": 2.5268596566654074e-05, + "loss": 1.4965, + "step": 465600 + }, + { + "epoch": 2.0, + "learning_rate": 2.526316818010084e-05, + "loss": 1.5094, + "step": 465700 + }, + { + "epoch": 2.0, + "learning_rate": 2.52577397935476e-05, + "loss": 1.481, + "step": 465800 + }, + { + "epoch": 2.0, + "learning_rate": 2.5252311406994366e-05, + "loss": 1.4859, + "step": 465900 + }, + { + "epoch": 2.0, + "learning_rate": 2.524688302044113e-05, + "loss": 1.4919, + "step": 466000 + }, + { + "epoch": 2.0, + "learning_rate": 2.52414546338879e-05, + "loss": 1.5023, + "step": 466100 + }, + { + "epoch": 2.0, + "learning_rate": 2.5236026247334666e-05, + "loss": 1.4949, + "step": 466200 + }, + { + "epoch": 2.0, + "learning_rate": 2.523059786078143e-05, + "loss": 1.4836, + "step": 466300 + }, + { + "epoch": 2.0, + "learning_rate": 2.5225169474228193e-05, + "loss": 1.4577, + "step": 466400 + }, + { + "epoch": 2.0, + "learning_rate": 2.5219741087674958e-05, + "loss": 1.5039, + "step": 466500 + }, + { + "epoch": 2.0, + "learning_rate": 2.5214312701121723e-05, + "loss": 1.4643, + "step": 466600 + }, + { + "epoch": 2.0, + "learning_rate": 2.520888431456849e-05, + "loss": 1.5069, + "step": 466700 + }, + { + "epoch": 2.01, + "learning_rate": 2.520345592801525e-05, + "loss": 1.4909, + "step": 466800 + }, + { + "epoch": 2.01, + "learning_rate": 2.5198027541462016e-05, + "loss": 1.4872, + "step": 466900 + }, + { + "epoch": 2.01, + "learning_rate": 2.519259915490878e-05, + "loss": 1.4882, + "step": 467000 + }, + { + "epoch": 2.01, + "learning_rate": 2.518717076835555e-05, + "loss": 1.4986, + "step": 467100 + }, + { + "epoch": 2.01, + "learning_rate": 2.5181742381802315e-05, + "loss": 1.4678, + "step": 467200 + }, + { + "epoch": 2.01, + "learning_rate": 2.517631399524908e-05, + "loss": 1.4588, + "step": 467300 + }, + { + "epoch": 2.01, + "learning_rate": 2.5170885608695842e-05, + "loss": 1.4589, + "step": 467400 + }, + { + "epoch": 2.01, + "learning_rate": 2.5165457222142607e-05, + "loss": 1.4516, + "step": 467500 + }, + { + "epoch": 2.01, + "learning_rate": 2.5160028835589372e-05, + "loss": 1.4877, + "step": 467600 + }, + { + "epoch": 2.01, + "learning_rate": 2.5154600449036138e-05, + "loss": 1.46, + "step": 467700 + }, + { + "epoch": 2.01, + "learning_rate": 2.51491720624829e-05, + "loss": 1.4712, + "step": 467800 + }, + { + "epoch": 2.01, + "learning_rate": 2.5143743675929665e-05, + "loss": 1.4915, + "step": 467900 + }, + { + "epoch": 2.01, + "learning_rate": 2.513831528937643e-05, + "loss": 1.4865, + "step": 468000 + }, + { + "epoch": 2.01, + "learning_rate": 2.5132886902823195e-05, + "loss": 1.5087, + "step": 468100 + }, + { + "epoch": 2.01, + "learning_rate": 2.5127458516269964e-05, + "loss": 1.4844, + "step": 468200 + }, + { + "epoch": 2.01, + "learning_rate": 2.512203012971673e-05, + "loss": 1.4879, + "step": 468300 + }, + { + "epoch": 2.01, + "learning_rate": 2.511660174316349e-05, + "loss": 1.4974, + "step": 468400 + }, + { + "epoch": 2.01, + "learning_rate": 2.5111173356610256e-05, + "loss": 1.4683, + "step": 468500 + }, + { + "epoch": 2.01, + "learning_rate": 2.5105744970057022e-05, + "loss": 1.4931, + "step": 468600 + }, + { + "epoch": 2.01, + "learning_rate": 2.5100316583503787e-05, + "loss": 1.4729, + "step": 468700 + }, + { + "epoch": 2.01, + "learning_rate": 2.509488819695055e-05, + "loss": 1.4603, + "step": 468800 + }, + { + "epoch": 2.01, + "learning_rate": 2.5089459810397314e-05, + "loss": 1.4908, + "step": 468900 + }, + { + "epoch": 2.01, + "learning_rate": 2.508403142384408e-05, + "loss": 1.4753, + "step": 469000 + }, + { + "epoch": 2.02, + "learning_rate": 2.5078603037290845e-05, + "loss": 1.5068, + "step": 469100 + }, + { + "epoch": 2.02, + "learning_rate": 2.5073174650737613e-05, + "loss": 1.48, + "step": 469200 + }, + { + "epoch": 2.02, + "learning_rate": 2.506774626418438e-05, + "loss": 1.465, + "step": 469300 + }, + { + "epoch": 2.02, + "learning_rate": 2.506231787763114e-05, + "loss": 1.4885, + "step": 469400 + }, + { + "epoch": 2.02, + "learning_rate": 2.5056889491077906e-05, + "loss": 1.5117, + "step": 469500 + }, + { + "epoch": 2.02, + "learning_rate": 2.505146110452467e-05, + "loss": 1.4701, + "step": 469600 + }, + { + "epoch": 2.02, + "learning_rate": 2.5046032717971436e-05, + "loss": 1.4813, + "step": 469700 + }, + { + "epoch": 2.02, + "learning_rate": 2.5040604331418198e-05, + "loss": 1.489, + "step": 469800 + }, + { + "epoch": 2.02, + "learning_rate": 2.5035175944864963e-05, + "loss": 1.4867, + "step": 469900 + }, + { + "epoch": 2.02, + "learning_rate": 2.502974755831173e-05, + "loss": 1.4885, + "step": 470000 + }, + { + "epoch": 2.02, + "eval_loss": 1.4379206895828247, + "eval_runtime": 17.8048, + "eval_samples_per_second": 561.647, + "eval_steps_per_second": 17.58, + "step": 470000 + }, + { + "epoch": 2.02, + "learning_rate": 2.5024319171758494e-05, + "loss": 1.4895, + "step": 470100 + }, + { + "epoch": 2.02, + "learning_rate": 2.5018890785205256e-05, + "loss": 1.4791, + "step": 470200 + }, + { + "epoch": 2.02, + "learning_rate": 2.5013462398652028e-05, + "loss": 1.4837, + "step": 470300 + }, + { + "epoch": 2.02, + "learning_rate": 2.500803401209879e-05, + "loss": 1.4775, + "step": 470400 + }, + { + "epoch": 2.02, + "learning_rate": 2.5002605625545555e-05, + "loss": 1.519, + "step": 470500 + }, + { + "epoch": 2.02, + "learning_rate": 2.499717723899232e-05, + "loss": 1.4725, + "step": 470600 + }, + { + "epoch": 2.02, + "learning_rate": 2.4991748852439085e-05, + "loss": 1.5076, + "step": 470700 + }, + { + "epoch": 2.02, + "learning_rate": 2.4986320465885847e-05, + "loss": 1.4859, + "step": 470800 + }, + { + "epoch": 2.02, + "learning_rate": 2.4980892079332613e-05, + "loss": 1.4572, + "step": 470900 + }, + { + "epoch": 2.02, + "learning_rate": 2.4975463692779378e-05, + "loss": 1.4833, + "step": 471000 + }, + { + "epoch": 2.02, + "learning_rate": 2.4970035306226143e-05, + "loss": 1.4604, + "step": 471100 + }, + { + "epoch": 2.02, + "learning_rate": 2.496460691967291e-05, + "loss": 1.4769, + "step": 471200 + }, + { + "epoch": 2.02, + "learning_rate": 2.4959178533119674e-05, + "loss": 1.4817, + "step": 471300 + }, + { + "epoch": 2.03, + "learning_rate": 2.495375014656644e-05, + "loss": 1.4724, + "step": 471400 + }, + { + "epoch": 2.03, + "learning_rate": 2.49483217600132e-05, + "loss": 1.4648, + "step": 471500 + }, + { + "epoch": 2.03, + "learning_rate": 2.494289337345997e-05, + "loss": 1.4813, + "step": 471600 + }, + { + "epoch": 2.03, + "learning_rate": 2.4937464986906735e-05, + "loss": 1.4614, + "step": 471700 + }, + { + "epoch": 2.03, + "learning_rate": 2.4932036600353497e-05, + "loss": 1.4949, + "step": 471800 + }, + { + "epoch": 2.03, + "learning_rate": 2.4926608213800262e-05, + "loss": 1.4819, + "step": 471900 + }, + { + "epoch": 2.03, + "learning_rate": 2.4921179827247027e-05, + "loss": 1.4853, + "step": 472000 + }, + { + "epoch": 2.03, + "learning_rate": 2.4915751440693792e-05, + "loss": 1.4949, + "step": 472100 + }, + { + "epoch": 2.03, + "learning_rate": 2.4910323054140558e-05, + "loss": 1.4927, + "step": 472200 + }, + { + "epoch": 2.03, + "learning_rate": 2.4904894667587323e-05, + "loss": 1.4945, + "step": 472300 + }, + { + "epoch": 2.03, + "learning_rate": 2.4899466281034088e-05, + "loss": 1.4696, + "step": 472400 + }, + { + "epoch": 2.03, + "learning_rate": 2.489403789448085e-05, + "loss": 1.447, + "step": 472500 + }, + { + "epoch": 2.03, + "learning_rate": 2.488860950792762e-05, + "loss": 1.4982, + "step": 472600 + }, + { + "epoch": 2.03, + "learning_rate": 2.4883181121374384e-05, + "loss": 1.4921, + "step": 472700 + }, + { + "epoch": 2.03, + "learning_rate": 2.4877752734821146e-05, + "loss": 1.4724, + "step": 472800 + }, + { + "epoch": 2.03, + "learning_rate": 2.487232434826791e-05, + "loss": 1.5049, + "step": 472900 + }, + { + "epoch": 2.03, + "learning_rate": 2.4866895961714676e-05, + "loss": 1.4724, + "step": 473000 + }, + { + "epoch": 2.03, + "learning_rate": 2.486146757516144e-05, + "loss": 1.4891, + "step": 473100 + }, + { + "epoch": 2.03, + "learning_rate": 2.4856039188608207e-05, + "loss": 1.4846, + "step": 473200 + }, + { + "epoch": 2.03, + "learning_rate": 2.4850610802054972e-05, + "loss": 1.4849, + "step": 473300 + }, + { + "epoch": 2.03, + "learning_rate": 2.4845182415501737e-05, + "loss": 1.4698, + "step": 473400 + }, + { + "epoch": 2.03, + "learning_rate": 2.48397540289485e-05, + "loss": 1.4848, + "step": 473500 + }, + { + "epoch": 2.03, + "learning_rate": 2.4834325642395265e-05, + "loss": 1.5008, + "step": 473600 + }, + { + "epoch": 2.04, + "learning_rate": 2.4828897255842033e-05, + "loss": 1.4955, + "step": 473700 + }, + { + "epoch": 2.04, + "learning_rate": 2.4823468869288795e-05, + "loss": 1.5106, + "step": 473800 + }, + { + "epoch": 2.04, + "learning_rate": 2.481804048273556e-05, + "loss": 1.4705, + "step": 473900 + }, + { + "epoch": 2.04, + "learning_rate": 2.4812612096182326e-05, + "loss": 1.4965, + "step": 474000 + }, + { + "epoch": 2.04, + "learning_rate": 2.480718370962909e-05, + "loss": 1.4966, + "step": 474100 + }, + { + "epoch": 2.04, + "learning_rate": 2.4801755323075856e-05, + "loss": 1.4636, + "step": 474200 + }, + { + "epoch": 2.04, + "learning_rate": 2.479632693652262e-05, + "loss": 1.493, + "step": 474300 + }, + { + "epoch": 2.04, + "learning_rate": 2.4790898549969387e-05, + "loss": 1.4891, + "step": 474400 + }, + { + "epoch": 2.04, + "learning_rate": 2.478547016341615e-05, + "loss": 1.47, + "step": 474500 + }, + { + "epoch": 2.04, + "learning_rate": 2.4780041776862914e-05, + "loss": 1.4939, + "step": 474600 + }, + { + "epoch": 2.04, + "learning_rate": 2.4774613390309682e-05, + "loss": 1.5032, + "step": 474700 + }, + { + "epoch": 2.04, + "learning_rate": 2.4769185003756444e-05, + "loss": 1.4862, + "step": 474800 + }, + { + "epoch": 2.04, + "learning_rate": 2.476375661720321e-05, + "loss": 1.4728, + "step": 474900 + }, + { + "epoch": 2.04, + "learning_rate": 2.4758328230649975e-05, + "loss": 1.4978, + "step": 475000 + }, + { + "epoch": 2.04, + "eval_loss": 1.4373873472213745, + "eval_runtime": 17.7804, + "eval_samples_per_second": 562.418, + "eval_steps_per_second": 17.604, + "step": 475000 + }, + { + "epoch": 2.04, + "learning_rate": 2.475289984409674e-05, + "loss": 1.4943, + "step": 475100 + }, + { + "epoch": 2.04, + "learning_rate": 2.4747471457543502e-05, + "loss": 1.4735, + "step": 475200 + }, + { + "epoch": 2.04, + "learning_rate": 2.474204307099027e-05, + "loss": 1.4863, + "step": 475300 + }, + { + "epoch": 2.04, + "learning_rate": 2.4736614684437036e-05, + "loss": 1.4672, + "step": 475400 + }, + { + "epoch": 2.04, + "learning_rate": 2.4731186297883798e-05, + "loss": 1.5015, + "step": 475500 + }, + { + "epoch": 2.04, + "learning_rate": 2.4725757911330563e-05, + "loss": 1.4682, + "step": 475600 + }, + { + "epoch": 2.04, + "learning_rate": 2.472032952477733e-05, + "loss": 1.4963, + "step": 475700 + }, + { + "epoch": 2.04, + "learning_rate": 2.4714901138224094e-05, + "loss": 1.4793, + "step": 475800 + }, + { + "epoch": 2.04, + "learning_rate": 2.470947275167086e-05, + "loss": 1.4853, + "step": 475900 + }, + { + "epoch": 2.04, + "learning_rate": 2.4704044365117624e-05, + "loss": 1.4781, + "step": 476000 + }, + { + "epoch": 2.05, + "learning_rate": 2.469861597856439e-05, + "loss": 1.4825, + "step": 476100 + }, + { + "epoch": 2.05, + "learning_rate": 2.469318759201115e-05, + "loss": 1.502, + "step": 476200 + }, + { + "epoch": 2.05, + "learning_rate": 2.468775920545792e-05, + "loss": 1.4894, + "step": 476300 + }, + { + "epoch": 2.05, + "learning_rate": 2.4682330818904685e-05, + "loss": 1.5123, + "step": 476400 + }, + { + "epoch": 2.05, + "learning_rate": 2.4676902432351447e-05, + "loss": 1.4597, + "step": 476500 + }, + { + "epoch": 2.05, + "learning_rate": 2.4671474045798212e-05, + "loss": 1.4996, + "step": 476600 + }, + { + "epoch": 2.05, + "learning_rate": 2.4666045659244978e-05, + "loss": 1.4949, + "step": 476700 + }, + { + "epoch": 2.05, + "learning_rate": 2.4660617272691743e-05, + "loss": 1.4974, + "step": 476800 + }, + { + "epoch": 2.05, + "learning_rate": 2.4655188886138508e-05, + "loss": 1.4989, + "step": 476900 + }, + { + "epoch": 2.05, + "learning_rate": 2.4649760499585273e-05, + "loss": 1.4714, + "step": 477000 + }, + { + "epoch": 2.05, + "learning_rate": 2.464433211303204e-05, + "loss": 1.4798, + "step": 477100 + }, + { + "epoch": 2.05, + "learning_rate": 2.46389037264788e-05, + "loss": 1.4677, + "step": 477200 + }, + { + "epoch": 2.05, + "learning_rate": 2.4633475339925566e-05, + "loss": 1.4871, + "step": 477300 + }, + { + "epoch": 2.05, + "learning_rate": 2.4628046953372334e-05, + "loss": 1.5236, + "step": 477400 + }, + { + "epoch": 2.05, + "learning_rate": 2.4622618566819096e-05, + "loss": 1.48, + "step": 477500 + }, + { + "epoch": 2.05, + "learning_rate": 2.461719018026586e-05, + "loss": 1.473, + "step": 477600 + }, + { + "epoch": 2.05, + "learning_rate": 2.4611761793712627e-05, + "loss": 1.4726, + "step": 477700 + }, + { + "epoch": 2.05, + "learning_rate": 2.4606333407159392e-05, + "loss": 1.4829, + "step": 477800 + }, + { + "epoch": 2.05, + "learning_rate": 2.4600905020606157e-05, + "loss": 1.4723, + "step": 477900 + }, + { + "epoch": 2.05, + "learning_rate": 2.4595476634052923e-05, + "loss": 1.4876, + "step": 478000 + }, + { + "epoch": 2.05, + "learning_rate": 2.4590048247499688e-05, + "loss": 1.4885, + "step": 478100 + }, + { + "epoch": 2.05, + "learning_rate": 2.458461986094645e-05, + "loss": 1.486, + "step": 478200 + }, + { + "epoch": 2.05, + "learning_rate": 2.4579191474393215e-05, + "loss": 1.4968, + "step": 478300 + }, + { + "epoch": 2.06, + "learning_rate": 2.4573763087839984e-05, + "loss": 1.4917, + "step": 478400 + }, + { + "epoch": 2.06, + "learning_rate": 2.4568334701286746e-05, + "loss": 1.4664, + "step": 478500 + }, + { + "epoch": 2.06, + "learning_rate": 2.456290631473351e-05, + "loss": 1.5124, + "step": 478600 + }, + { + "epoch": 2.06, + "learning_rate": 2.4557477928180276e-05, + "loss": 1.4945, + "step": 478700 + }, + { + "epoch": 2.06, + "learning_rate": 2.455204954162704e-05, + "loss": 1.48, + "step": 478800 + }, + { + "epoch": 2.06, + "learning_rate": 2.4546621155073807e-05, + "loss": 1.4764, + "step": 478900 + }, + { + "epoch": 2.06, + "learning_rate": 2.4541192768520572e-05, + "loss": 1.51, + "step": 479000 + }, + { + "epoch": 2.06, + "learning_rate": 2.4535764381967337e-05, + "loss": 1.4715, + "step": 479100 + }, + { + "epoch": 2.06, + "learning_rate": 2.45303359954141e-05, + "loss": 1.4857, + "step": 479200 + }, + { + "epoch": 2.06, + "learning_rate": 2.4524907608860864e-05, + "loss": 1.5054, + "step": 479300 + }, + { + "epoch": 2.06, + "learning_rate": 2.4519479222307633e-05, + "loss": 1.4588, + "step": 479400 + }, + { + "epoch": 2.06, + "learning_rate": 2.4514050835754395e-05, + "loss": 1.4906, + "step": 479500 + }, + { + "epoch": 2.06, + "learning_rate": 2.450862244920116e-05, + "loss": 1.4416, + "step": 479600 + }, + { + "epoch": 2.06, + "learning_rate": 2.4503194062647925e-05, + "loss": 1.4945, + "step": 479700 + }, + { + "epoch": 2.06, + "learning_rate": 2.449776567609469e-05, + "loss": 1.4888, + "step": 479800 + }, + { + "epoch": 2.06, + "learning_rate": 2.4492337289541452e-05, + "loss": 1.4918, + "step": 479900 + }, + { + "epoch": 2.06, + "learning_rate": 2.448690890298822e-05, + "loss": 1.4872, + "step": 480000 + }, + { + "epoch": 2.06, + "eval_loss": 1.435674786567688, + "eval_runtime": 17.7907, + "eval_samples_per_second": 562.091, + "eval_steps_per_second": 17.593, + "step": 480000 + }, + { + "epoch": 2.06, + "learning_rate": 2.4481480516434986e-05, + "loss": 1.4666, + "step": 480100 + }, + { + "epoch": 2.06, + "learning_rate": 2.4476052129881748e-05, + "loss": 1.491, + "step": 480200 + }, + { + "epoch": 2.06, + "learning_rate": 2.4470623743328513e-05, + "loss": 1.482, + "step": 480300 + }, + { + "epoch": 2.06, + "learning_rate": 2.446519535677528e-05, + "loss": 1.4848, + "step": 480400 + }, + { + "epoch": 2.06, + "learning_rate": 2.4459766970222044e-05, + "loss": 1.4843, + "step": 480500 + }, + { + "epoch": 2.06, + "learning_rate": 2.445433858366881e-05, + "loss": 1.4776, + "step": 480600 + }, + { + "epoch": 2.07, + "learning_rate": 2.4448910197115575e-05, + "loss": 1.4761, + "step": 480700 + }, + { + "epoch": 2.07, + "learning_rate": 2.4443481810562336e-05, + "loss": 1.5118, + "step": 480800 + }, + { + "epoch": 2.07, + "learning_rate": 2.44380534240091e-05, + "loss": 1.4673, + "step": 480900 + }, + { + "epoch": 2.07, + "learning_rate": 2.443262503745587e-05, + "loss": 1.5171, + "step": 481000 + }, + { + "epoch": 2.07, + "learning_rate": 2.4427196650902636e-05, + "loss": 1.4556, + "step": 481100 + }, + { + "epoch": 2.07, + "learning_rate": 2.4421768264349397e-05, + "loss": 1.5094, + "step": 481200 + }, + { + "epoch": 2.07, + "learning_rate": 2.4416339877796163e-05, + "loss": 1.4814, + "step": 481300 + }, + { + "epoch": 2.07, + "learning_rate": 2.4410911491242928e-05, + "loss": 1.4673, + "step": 481400 + }, + { + "epoch": 2.07, + "learning_rate": 2.4405483104689693e-05, + "loss": 1.4845, + "step": 481500 + }, + { + "epoch": 2.07, + "learning_rate": 2.440005471813646e-05, + "loss": 1.507, + "step": 481600 + }, + { + "epoch": 2.07, + "learning_rate": 2.4394626331583224e-05, + "loss": 1.4716, + "step": 481700 + }, + { + "epoch": 2.07, + "learning_rate": 2.4389197945029986e-05, + "loss": 1.4578, + "step": 481800 + }, + { + "epoch": 2.07, + "learning_rate": 2.438376955847675e-05, + "loss": 1.4706, + "step": 481900 + }, + { + "epoch": 2.07, + "learning_rate": 2.4378341171923516e-05, + "loss": 1.4801, + "step": 482000 + }, + { + "epoch": 2.07, + "learning_rate": 2.4372912785370285e-05, + "loss": 1.4767, + "step": 482100 + }, + { + "epoch": 2.07, + "learning_rate": 2.4367484398817047e-05, + "loss": 1.4646, + "step": 482200 + }, + { + "epoch": 2.07, + "learning_rate": 2.4362056012263812e-05, + "loss": 1.4711, + "step": 482300 + }, + { + "epoch": 2.07, + "learning_rate": 2.4356627625710577e-05, + "loss": 1.5042, + "step": 482400 + }, + { + "epoch": 2.07, + "learning_rate": 2.435119923915734e-05, + "loss": 1.5114, + "step": 482500 + }, + { + "epoch": 2.07, + "learning_rate": 2.4345770852604108e-05, + "loss": 1.4726, + "step": 482600 + }, + { + "epoch": 2.07, + "learning_rate": 2.4340342466050873e-05, + "loss": 1.4743, + "step": 482700 + }, + { + "epoch": 2.07, + "learning_rate": 2.4334914079497635e-05, + "loss": 1.4796, + "step": 482800 + }, + { + "epoch": 2.07, + "learning_rate": 2.43294856929444e-05, + "loss": 1.4428, + "step": 482900 + }, + { + "epoch": 2.08, + "learning_rate": 2.4324057306391165e-05, + "loss": 1.5025, + "step": 483000 + }, + { + "epoch": 2.08, + "learning_rate": 2.4318628919837934e-05, + "loss": 1.5128, + "step": 483100 + }, + { + "epoch": 2.08, + "learning_rate": 2.4313200533284696e-05, + "loss": 1.4972, + "step": 483200 + }, + { + "epoch": 2.08, + "learning_rate": 2.430777214673146e-05, + "loss": 1.4691, + "step": 483300 + }, + { + "epoch": 2.08, + "learning_rate": 2.4302343760178226e-05, + "loss": 1.4655, + "step": 483400 + }, + { + "epoch": 2.08, + "learning_rate": 2.429691537362499e-05, + "loss": 1.4713, + "step": 483500 + }, + { + "epoch": 2.08, + "learning_rate": 2.4291486987071757e-05, + "loss": 1.4963, + "step": 483600 + }, + { + "epoch": 2.08, + "learning_rate": 2.4286058600518522e-05, + "loss": 1.4704, + "step": 483700 + }, + { + "epoch": 2.08, + "learning_rate": 2.4280630213965284e-05, + "loss": 1.4644, + "step": 483800 + }, + { + "epoch": 2.08, + "learning_rate": 2.427520182741205e-05, + "loss": 1.5307, + "step": 483900 + }, + { + "epoch": 2.08, + "learning_rate": 2.4269773440858815e-05, + "loss": 1.4934, + "step": 484000 + }, + { + "epoch": 2.08, + "learning_rate": 2.426434505430558e-05, + "loss": 1.4855, + "step": 484100 + }, + { + "epoch": 2.08, + "learning_rate": 2.4258916667752345e-05, + "loss": 1.4747, + "step": 484200 + }, + { + "epoch": 2.08, + "learning_rate": 2.425348828119911e-05, + "loss": 1.4986, + "step": 484300 + }, + { + "epoch": 2.08, + "learning_rate": 2.4248059894645876e-05, + "loss": 1.496, + "step": 484400 + }, + { + "epoch": 2.08, + "learning_rate": 2.4242631508092638e-05, + "loss": 1.4976, + "step": 484500 + }, + { + "epoch": 2.08, + "learning_rate": 2.4237203121539403e-05, + "loss": 1.4877, + "step": 484600 + }, + { + "epoch": 2.08, + "learning_rate": 2.423177473498617e-05, + "loss": 1.5055, + "step": 484700 + }, + { + "epoch": 2.08, + "learning_rate": 2.4226346348432933e-05, + "loss": 1.4922, + "step": 484800 + }, + { + "epoch": 2.08, + "learning_rate": 2.42209179618797e-05, + "loss": 1.4723, + "step": 484900 + }, + { + "epoch": 2.08, + "learning_rate": 2.4215489575326464e-05, + "loss": 1.5055, + "step": 485000 + }, + { + "epoch": 2.08, + "eval_loss": 1.433396816253662, + "eval_runtime": 17.803, + "eval_samples_per_second": 561.704, + "eval_steps_per_second": 17.581, + "step": 485000 + }, + { + "epoch": 2.08, + "learning_rate": 2.421006118877323e-05, + "loss": 1.4748, + "step": 485100 + }, + { + "epoch": 2.08, + "learning_rate": 2.4204632802219994e-05, + "loss": 1.451, + "step": 485200 + }, + { + "epoch": 2.08, + "learning_rate": 2.419920441566676e-05, + "loss": 1.4868, + "step": 485300 + }, + { + "epoch": 2.09, + "learning_rate": 2.4193776029113525e-05, + "loss": 1.4999, + "step": 485400 + }, + { + "epoch": 2.09, + "learning_rate": 2.4188347642560287e-05, + "loss": 1.4661, + "step": 485500 + }, + { + "epoch": 2.09, + "learning_rate": 2.4182919256007052e-05, + "loss": 1.5062, + "step": 485600 + }, + { + "epoch": 2.09, + "learning_rate": 2.417749086945382e-05, + "loss": 1.4775, + "step": 485700 + }, + { + "epoch": 2.09, + "learning_rate": 2.4172062482900583e-05, + "loss": 1.4957, + "step": 485800 + }, + { + "epoch": 2.09, + "learning_rate": 2.4166634096347348e-05, + "loss": 1.5192, + "step": 485900 + }, + { + "epoch": 2.09, + "learning_rate": 2.4161205709794113e-05, + "loss": 1.4921, + "step": 486000 + }, + { + "epoch": 2.09, + "learning_rate": 2.415577732324088e-05, + "loss": 1.4891, + "step": 486100 + }, + { + "epoch": 2.09, + "learning_rate": 2.415034893668764e-05, + "loss": 1.4821, + "step": 486200 + }, + { + "epoch": 2.09, + "learning_rate": 2.414492055013441e-05, + "loss": 1.491, + "step": 486300 + }, + { + "epoch": 2.09, + "learning_rate": 2.4139492163581174e-05, + "loss": 1.5049, + "step": 486400 + }, + { + "epoch": 2.09, + "learning_rate": 2.4134063777027936e-05, + "loss": 1.5108, + "step": 486500 + }, + { + "epoch": 2.09, + "learning_rate": 2.41286353904747e-05, + "loss": 1.4862, + "step": 486600 + }, + { + "epoch": 2.09, + "learning_rate": 2.4123207003921467e-05, + "loss": 1.4695, + "step": 486700 + }, + { + "epoch": 2.09, + "learning_rate": 2.4117778617368232e-05, + "loss": 1.4921, + "step": 486800 + }, + { + "epoch": 2.09, + "learning_rate": 2.4112350230814997e-05, + "loss": 1.4882, + "step": 486900 + }, + { + "epoch": 2.09, + "learning_rate": 2.4106921844261762e-05, + "loss": 1.4948, + "step": 487000 + }, + { + "epoch": 2.09, + "learning_rate": 2.4101493457708528e-05, + "loss": 1.4979, + "step": 487100 + }, + { + "epoch": 2.09, + "learning_rate": 2.409606507115529e-05, + "loss": 1.4862, + "step": 487200 + }, + { + "epoch": 2.09, + "learning_rate": 2.4090636684602058e-05, + "loss": 1.4722, + "step": 487300 + }, + { + "epoch": 2.09, + "learning_rate": 2.4085208298048823e-05, + "loss": 1.4701, + "step": 487400 + }, + { + "epoch": 2.09, + "learning_rate": 2.4079779911495585e-05, + "loss": 1.46, + "step": 487500 + }, + { + "epoch": 2.09, + "learning_rate": 2.407435152494235e-05, + "loss": 1.4932, + "step": 487600 + }, + { + "epoch": 2.1, + "learning_rate": 2.4068923138389116e-05, + "loss": 1.482, + "step": 487700 + }, + { + "epoch": 2.1, + "learning_rate": 2.406349475183588e-05, + "loss": 1.4611, + "step": 487800 + }, + { + "epoch": 2.1, + "learning_rate": 2.4058066365282646e-05, + "loss": 1.4995, + "step": 487900 + }, + { + "epoch": 2.1, + "learning_rate": 2.405263797872941e-05, + "loss": 1.451, + "step": 488000 + }, + { + "epoch": 2.1, + "learning_rate": 2.4047209592176177e-05, + "loss": 1.4817, + "step": 488100 + }, + { + "epoch": 2.1, + "learning_rate": 2.404178120562294e-05, + "loss": 1.4892, + "step": 488200 + }, + { + "epoch": 2.1, + "learning_rate": 2.4036352819069704e-05, + "loss": 1.4875, + "step": 488300 + }, + { + "epoch": 2.1, + "learning_rate": 2.4030924432516473e-05, + "loss": 1.4717, + "step": 488400 + }, + { + "epoch": 2.1, + "learning_rate": 2.4025496045963235e-05, + "loss": 1.4998, + "step": 488500 + }, + { + "epoch": 2.1, + "learning_rate": 2.402006765941e-05, + "loss": 1.4945, + "step": 488600 + }, + { + "epoch": 2.1, + "learning_rate": 2.4014639272856765e-05, + "loss": 1.4908, + "step": 488700 + }, + { + "epoch": 2.1, + "learning_rate": 2.400921088630353e-05, + "loss": 1.4813, + "step": 488800 + }, + { + "epoch": 2.1, + "learning_rate": 2.4003782499750296e-05, + "loss": 1.4892, + "step": 488900 + }, + { + "epoch": 2.1, + "learning_rate": 2.399835411319706e-05, + "loss": 1.4752, + "step": 489000 + }, + { + "epoch": 2.1, + "learning_rate": 2.3992925726643826e-05, + "loss": 1.5083, + "step": 489100 + }, + { + "epoch": 2.1, + "learning_rate": 2.3987497340090588e-05, + "loss": 1.4873, + "step": 489200 + }, + { + "epoch": 2.1, + "learning_rate": 2.3982068953537353e-05, + "loss": 1.4875, + "step": 489300 + }, + { + "epoch": 2.1, + "learning_rate": 2.3976640566984122e-05, + "loss": 1.4657, + "step": 489400 + }, + { + "epoch": 2.1, + "learning_rate": 2.3971212180430884e-05, + "loss": 1.4937, + "step": 489500 + }, + { + "epoch": 2.1, + "learning_rate": 2.396578379387765e-05, + "loss": 1.4957, + "step": 489600 + }, + { + "epoch": 2.1, + "learning_rate": 2.3960355407324414e-05, + "loss": 1.497, + "step": 489700 + }, + { + "epoch": 2.1, + "learning_rate": 2.395492702077118e-05, + "loss": 1.4825, + "step": 489800 + }, + { + "epoch": 2.1, + "learning_rate": 2.3949498634217945e-05, + "loss": 1.481, + "step": 489900 + }, + { + "epoch": 2.11, + "learning_rate": 2.394407024766471e-05, + "loss": 1.4926, + "step": 490000 + }, + { + "epoch": 2.11, + "eval_loss": 1.4341309070587158, + "eval_runtime": 17.8295, + "eval_samples_per_second": 560.867, + "eval_steps_per_second": 17.555, + "step": 490000 + }, + { + "epoch": 2.11, + "learning_rate": 2.3938641861111475e-05, + "loss": 1.4879, + "step": 490100 + }, + { + "epoch": 2.11, + "learning_rate": 2.3933213474558237e-05, + "loss": 1.4901, + "step": 490200 + }, + { + "epoch": 2.11, + "learning_rate": 2.3927785088005003e-05, + "loss": 1.4596, + "step": 490300 + }, + { + "epoch": 2.11, + "learning_rate": 2.392235670145177e-05, + "loss": 1.5034, + "step": 490400 + }, + { + "epoch": 2.11, + "learning_rate": 2.3916928314898533e-05, + "loss": 1.4965, + "step": 490500 + }, + { + "epoch": 2.11, + "learning_rate": 2.39114999283453e-05, + "loss": 1.4744, + "step": 490600 + }, + { + "epoch": 2.11, + "learning_rate": 2.3906071541792064e-05, + "loss": 1.4832, + "step": 490700 + }, + { + "epoch": 2.11, + "learning_rate": 2.390064315523883e-05, + "loss": 1.4838, + "step": 490800 + }, + { + "epoch": 2.11, + "learning_rate": 2.389521476868559e-05, + "loss": 1.474, + "step": 490900 + }, + { + "epoch": 2.11, + "learning_rate": 2.388978638213236e-05, + "loss": 1.502, + "step": 491000 + }, + { + "epoch": 2.11, + "learning_rate": 2.3884357995579125e-05, + "loss": 1.5034, + "step": 491100 + }, + { + "epoch": 2.11, + "learning_rate": 2.3878929609025887e-05, + "loss": 1.4979, + "step": 491200 + }, + { + "epoch": 2.11, + "learning_rate": 2.3873501222472652e-05, + "loss": 1.4854, + "step": 491300 + }, + { + "epoch": 2.11, + "learning_rate": 2.3868072835919417e-05, + "loss": 1.4684, + "step": 491400 + }, + { + "epoch": 2.11, + "learning_rate": 2.3862644449366182e-05, + "loss": 1.4853, + "step": 491500 + }, + { + "epoch": 2.11, + "learning_rate": 2.3857216062812948e-05, + "loss": 1.4748, + "step": 491600 + }, + { + "epoch": 2.11, + "learning_rate": 2.3851787676259713e-05, + "loss": 1.4767, + "step": 491700 + }, + { + "epoch": 2.11, + "learning_rate": 2.3846359289706478e-05, + "loss": 1.471, + "step": 491800 + }, + { + "epoch": 2.11, + "learning_rate": 2.384093090315324e-05, + "loss": 1.5002, + "step": 491900 + }, + { + "epoch": 2.11, + "learning_rate": 2.383550251660001e-05, + "loss": 1.4628, + "step": 492000 + }, + { + "epoch": 2.11, + "learning_rate": 2.3830074130046774e-05, + "loss": 1.4645, + "step": 492100 + }, + { + "epoch": 2.11, + "learning_rate": 2.3824645743493536e-05, + "loss": 1.4713, + "step": 492200 + }, + { + "epoch": 2.11, + "learning_rate": 2.38192173569403e-05, + "loss": 1.476, + "step": 492300 + }, + { + "epoch": 2.12, + "learning_rate": 2.3813788970387066e-05, + "loss": 1.4673, + "step": 492400 + }, + { + "epoch": 2.12, + "learning_rate": 2.380836058383383e-05, + "loss": 1.4811, + "step": 492500 + }, + { + "epoch": 2.12, + "learning_rate": 2.3802932197280597e-05, + "loss": 1.4554, + "step": 492600 + }, + { + "epoch": 2.12, + "learning_rate": 2.3797503810727362e-05, + "loss": 1.4661, + "step": 492700 + }, + { + "epoch": 2.12, + "learning_rate": 2.3792075424174127e-05, + "loss": 1.45, + "step": 492800 + }, + { + "epoch": 2.12, + "learning_rate": 2.378664703762089e-05, + "loss": 1.4734, + "step": 492900 + }, + { + "epoch": 2.12, + "learning_rate": 2.3781218651067654e-05, + "loss": 1.5128, + "step": 493000 + }, + { + "epoch": 2.12, + "learning_rate": 2.3775790264514423e-05, + "loss": 1.4665, + "step": 493100 + }, + { + "epoch": 2.12, + "learning_rate": 2.3770361877961185e-05, + "loss": 1.5025, + "step": 493200 + }, + { + "epoch": 2.12, + "learning_rate": 2.376493349140795e-05, + "loss": 1.4727, + "step": 493300 + }, + { + "epoch": 2.12, + "learning_rate": 2.3759505104854716e-05, + "loss": 1.4396, + "step": 493400 + }, + { + "epoch": 2.12, + "learning_rate": 2.375407671830148e-05, + "loss": 1.4604, + "step": 493500 + }, + { + "epoch": 2.12, + "learning_rate": 2.3748648331748246e-05, + "loss": 1.4847, + "step": 493600 + }, + { + "epoch": 2.12, + "learning_rate": 2.374321994519501e-05, + "loss": 1.4947, + "step": 493700 + }, + { + "epoch": 2.12, + "learning_rate": 2.3737791558641777e-05, + "loss": 1.4925, + "step": 493800 + }, + { + "epoch": 2.12, + "learning_rate": 2.373236317208854e-05, + "loss": 1.4761, + "step": 493900 + }, + { + "epoch": 2.12, + "learning_rate": 2.3726934785535304e-05, + "loss": 1.5098, + "step": 494000 + }, + { + "epoch": 2.12, + "learning_rate": 2.3721506398982072e-05, + "loss": 1.49, + "step": 494100 + }, + { + "epoch": 2.12, + "learning_rate": 2.3716078012428834e-05, + "loss": 1.5106, + "step": 494200 + }, + { + "epoch": 2.12, + "learning_rate": 2.37106496258756e-05, + "loss": 1.489, + "step": 494300 + }, + { + "epoch": 2.12, + "learning_rate": 2.3705221239322365e-05, + "loss": 1.5143, + "step": 494400 + }, + { + "epoch": 2.12, + "learning_rate": 2.369979285276913e-05, + "loss": 1.4895, + "step": 494500 + }, + { + "epoch": 2.12, + "learning_rate": 2.3694364466215895e-05, + "loss": 1.4879, + "step": 494600 + }, + { + "epoch": 2.13, + "learning_rate": 2.368893607966266e-05, + "loss": 1.5082, + "step": 494700 + }, + { + "epoch": 2.13, + "learning_rate": 2.3683507693109426e-05, + "loss": 1.4785, + "step": 494800 + }, + { + "epoch": 2.13, + "learning_rate": 2.3678079306556188e-05, + "loss": 1.4912, + "step": 494900 + }, + { + "epoch": 2.13, + "learning_rate": 2.3672650920002953e-05, + "loss": 1.4702, + "step": 495000 + }, + { + "epoch": 2.13, + "eval_loss": 1.4374511241912842, + "eval_runtime": 17.7875, + "eval_samples_per_second": 562.192, + "eval_steps_per_second": 17.597, + "step": 495000 + }, + { + "epoch": 2.13, + "learning_rate": 2.3667222533449718e-05, + "loss": 1.5075, + "step": 495100 + }, + { + "epoch": 2.13, + "learning_rate": 2.3661794146896483e-05, + "loss": 1.4957, + "step": 495200 + }, + { + "epoch": 2.13, + "learning_rate": 2.365636576034325e-05, + "loss": 1.4718, + "step": 495300 + }, + { + "epoch": 2.13, + "learning_rate": 2.3650937373790014e-05, + "loss": 1.4745, + "step": 495400 + }, + { + "epoch": 2.13, + "learning_rate": 2.364550898723678e-05, + "loss": 1.4938, + "step": 495500 + }, + { + "epoch": 2.13, + "learning_rate": 2.364008060068354e-05, + "loss": 1.5004, + "step": 495600 + }, + { + "epoch": 2.13, + "learning_rate": 2.363465221413031e-05, + "loss": 1.4827, + "step": 495700 + }, + { + "epoch": 2.13, + "learning_rate": 2.3629223827577075e-05, + "loss": 1.5126, + "step": 495800 + }, + { + "epoch": 2.13, + "learning_rate": 2.3623795441023837e-05, + "loss": 1.4899, + "step": 495900 + }, + { + "epoch": 2.13, + "learning_rate": 2.3618367054470602e-05, + "loss": 1.4931, + "step": 496000 + }, + { + "epoch": 2.13, + "learning_rate": 2.3612938667917367e-05, + "loss": 1.454, + "step": 496100 + }, + { + "epoch": 2.13, + "learning_rate": 2.3607510281364133e-05, + "loss": 1.4802, + "step": 496200 + }, + { + "epoch": 2.13, + "learning_rate": 2.3602081894810898e-05, + "loss": 1.4774, + "step": 496300 + }, + { + "epoch": 2.13, + "learning_rate": 2.3596653508257663e-05, + "loss": 1.4746, + "step": 496400 + }, + { + "epoch": 2.13, + "learning_rate": 2.359122512170443e-05, + "loss": 1.5036, + "step": 496500 + }, + { + "epoch": 2.13, + "learning_rate": 2.358579673515119e-05, + "loss": 1.4754, + "step": 496600 + }, + { + "epoch": 2.13, + "learning_rate": 2.358036834859796e-05, + "loss": 1.4807, + "step": 496700 + }, + { + "epoch": 2.13, + "learning_rate": 2.3574939962044724e-05, + "loss": 1.4749, + "step": 496800 + }, + { + "epoch": 2.13, + "learning_rate": 2.3569511575491486e-05, + "loss": 1.5181, + "step": 496900 + }, + { + "epoch": 2.14, + "learning_rate": 2.356408318893825e-05, + "loss": 1.5014, + "step": 497000 + }, + { + "epoch": 2.14, + "learning_rate": 2.3558654802385017e-05, + "loss": 1.5074, + "step": 497100 + }, + { + "epoch": 2.14, + "learning_rate": 2.3553226415831782e-05, + "loss": 1.4821, + "step": 497200 + }, + { + "epoch": 2.14, + "learning_rate": 2.3547798029278547e-05, + "loss": 1.4851, + "step": 497300 + }, + { + "epoch": 2.14, + "learning_rate": 2.3542369642725313e-05, + "loss": 1.4852, + "step": 497400 + }, + { + "epoch": 2.14, + "learning_rate": 2.3536941256172078e-05, + "loss": 1.4734, + "step": 497500 + }, + { + "epoch": 2.14, + "learning_rate": 2.353151286961884e-05, + "loss": 1.4846, + "step": 497600 + }, + { + "epoch": 2.14, + "learning_rate": 2.3526084483065605e-05, + "loss": 1.5076, + "step": 497700 + }, + { + "epoch": 2.14, + "learning_rate": 2.3520656096512374e-05, + "loss": 1.4697, + "step": 497800 + }, + { + "epoch": 2.14, + "learning_rate": 2.3515227709959135e-05, + "loss": 1.476, + "step": 497900 + }, + { + "epoch": 2.14, + "learning_rate": 2.35097993234059e-05, + "loss": 1.4897, + "step": 498000 + }, + { + "epoch": 2.14, + "learning_rate": 2.3504370936852666e-05, + "loss": 1.4802, + "step": 498100 + }, + { + "epoch": 2.14, + "learning_rate": 2.349894255029943e-05, + "loss": 1.4825, + "step": 498200 + }, + { + "epoch": 2.14, + "learning_rate": 2.3493514163746196e-05, + "loss": 1.4744, + "step": 498300 + }, + { + "epoch": 2.14, + "learning_rate": 2.3488085777192962e-05, + "loss": 1.462, + "step": 498400 + }, + { + "epoch": 2.14, + "learning_rate": 2.3482657390639727e-05, + "loss": 1.4743, + "step": 498500 + }, + { + "epoch": 2.14, + "learning_rate": 2.347722900408649e-05, + "loss": 1.4801, + "step": 498600 + }, + { + "epoch": 2.14, + "learning_rate": 2.3471800617533254e-05, + "loss": 1.4718, + "step": 498700 + }, + { + "epoch": 2.14, + "learning_rate": 2.3466372230980023e-05, + "loss": 1.4911, + "step": 498800 + }, + { + "epoch": 2.14, + "learning_rate": 2.3460943844426785e-05, + "loss": 1.4852, + "step": 498900 + }, + { + "epoch": 2.14, + "learning_rate": 2.345551545787355e-05, + "loss": 1.4648, + "step": 499000 + }, + { + "epoch": 2.14, + "learning_rate": 2.3450087071320315e-05, + "loss": 1.4936, + "step": 499100 + }, + { + "epoch": 2.14, + "learning_rate": 2.344465868476708e-05, + "loss": 1.4734, + "step": 499200 + }, + { + "epoch": 2.15, + "learning_rate": 2.3439230298213846e-05, + "loss": 1.4845, + "step": 499300 + }, + { + "epoch": 2.15, + "learning_rate": 2.343380191166061e-05, + "loss": 1.4689, + "step": 499400 + }, + { + "epoch": 2.15, + "learning_rate": 2.3428373525107376e-05, + "loss": 1.4944, + "step": 499500 + }, + { + "epoch": 2.15, + "learning_rate": 2.3422945138554138e-05, + "loss": 1.4626, + "step": 499600 + }, + { + "epoch": 2.15, + "learning_rate": 2.3417516752000903e-05, + "loss": 1.4614, + "step": 499700 + }, + { + "epoch": 2.15, + "learning_rate": 2.341208836544767e-05, + "loss": 1.509, + "step": 499800 + }, + { + "epoch": 2.15, + "learning_rate": 2.3406659978894434e-05, + "loss": 1.4618, + "step": 499900 + }, + { + "epoch": 2.15, + "learning_rate": 2.34012315923412e-05, + "loss": 1.4663, + "step": 500000 + }, + { + "epoch": 2.15, + "eval_loss": 1.436499834060669, + "eval_runtime": 17.8206, + "eval_samples_per_second": 561.148, + "eval_steps_per_second": 17.564, + "step": 500000 + }, + { + "epoch": 2.15, + "learning_rate": 2.3395803205787964e-05, + "loss": 1.4606, + "step": 500100 + }, + { + "epoch": 2.15, + "learning_rate": 2.339037481923473e-05, + "loss": 1.4687, + "step": 500200 + }, + { + "epoch": 2.15, + "learning_rate": 2.338494643268149e-05, + "loss": 1.4966, + "step": 500300 + }, + { + "epoch": 2.15, + "learning_rate": 2.337951804612826e-05, + "loss": 1.4548, + "step": 500400 + }, + { + "epoch": 2.15, + "learning_rate": 2.3374089659575026e-05, + "loss": 1.4535, + "step": 500500 + }, + { + "epoch": 2.15, + "learning_rate": 2.3368661273021787e-05, + "loss": 1.4985, + "step": 500600 + }, + { + "epoch": 2.15, + "learning_rate": 2.3363232886468553e-05, + "loss": 1.4927, + "step": 500700 + }, + { + "epoch": 2.15, + "learning_rate": 2.3357804499915318e-05, + "loss": 1.4969, + "step": 500800 + }, + { + "epoch": 2.15, + "learning_rate": 2.3352376113362083e-05, + "loss": 1.4861, + "step": 500900 + }, + { + "epoch": 2.15, + "learning_rate": 2.334694772680885e-05, + "loss": 1.4743, + "step": 501000 + }, + { + "epoch": 2.15, + "learning_rate": 2.3341519340255614e-05, + "loss": 1.507, + "step": 501100 + }, + { + "epoch": 2.15, + "learning_rate": 2.333609095370238e-05, + "loss": 1.5005, + "step": 501200 + }, + { + "epoch": 2.15, + "learning_rate": 2.333066256714914e-05, + "loss": 1.4792, + "step": 501300 + }, + { + "epoch": 2.15, + "learning_rate": 2.332523418059591e-05, + "loss": 1.4758, + "step": 501400 + }, + { + "epoch": 2.15, + "learning_rate": 2.3319805794042675e-05, + "loss": 1.5028, + "step": 501500 + }, + { + "epoch": 2.15, + "learning_rate": 2.3314377407489437e-05, + "loss": 1.4661, + "step": 501600 + }, + { + "epoch": 2.16, + "learning_rate": 2.3308949020936202e-05, + "loss": 1.4981, + "step": 501700 + }, + { + "epoch": 2.16, + "learning_rate": 2.3303520634382967e-05, + "loss": 1.4711, + "step": 501800 + }, + { + "epoch": 2.16, + "learning_rate": 2.3298092247829732e-05, + "loss": 1.4494, + "step": 501900 + }, + { + "epoch": 2.16, + "learning_rate": 2.3292663861276498e-05, + "loss": 1.4624, + "step": 502000 + }, + { + "epoch": 2.16, + "learning_rate": 2.3287235474723263e-05, + "loss": 1.49, + "step": 502100 + }, + { + "epoch": 2.16, + "learning_rate": 2.3281807088170028e-05, + "loss": 1.4928, + "step": 502200 + }, + { + "epoch": 2.16, + "learning_rate": 2.327637870161679e-05, + "loss": 1.5082, + "step": 502300 + }, + { + "epoch": 2.16, + "learning_rate": 2.3270950315063555e-05, + "loss": 1.4798, + "step": 502400 + }, + { + "epoch": 2.16, + "learning_rate": 2.3265521928510324e-05, + "loss": 1.4794, + "step": 502500 + }, + { + "epoch": 2.16, + "learning_rate": 2.3260093541957086e-05, + "loss": 1.5046, + "step": 502600 + }, + { + "epoch": 2.16, + "learning_rate": 2.325466515540385e-05, + "loss": 1.4967, + "step": 502700 + }, + { + "epoch": 2.16, + "learning_rate": 2.3249236768850616e-05, + "loss": 1.5038, + "step": 502800 + }, + { + "epoch": 2.16, + "learning_rate": 2.324380838229738e-05, + "loss": 1.5092, + "step": 502900 + }, + { + "epoch": 2.16, + "learning_rate": 2.3238379995744147e-05, + "loss": 1.4548, + "step": 503000 + }, + { + "epoch": 2.16, + "learning_rate": 2.3232951609190912e-05, + "loss": 1.4837, + "step": 503100 + }, + { + "epoch": 2.16, + "learning_rate": 2.3227523222637677e-05, + "loss": 1.4739, + "step": 503200 + }, + { + "epoch": 2.16, + "learning_rate": 2.322209483608444e-05, + "loss": 1.489, + "step": 503300 + }, + { + "epoch": 2.16, + "learning_rate": 2.3216666449531205e-05, + "loss": 1.4906, + "step": 503400 + }, + { + "epoch": 2.16, + "learning_rate": 2.3211238062977973e-05, + "loss": 1.5106, + "step": 503500 + }, + { + "epoch": 2.16, + "learning_rate": 2.3205809676424735e-05, + "loss": 1.4605, + "step": 503600 + }, + { + "epoch": 2.16, + "learning_rate": 2.32003812898715e-05, + "loss": 1.4719, + "step": 503700 + }, + { + "epoch": 2.16, + "learning_rate": 2.3194952903318266e-05, + "loss": 1.4849, + "step": 503800 + }, + { + "epoch": 2.16, + "learning_rate": 2.318952451676503e-05, + "loss": 1.4882, + "step": 503900 + }, + { + "epoch": 2.17, + "learning_rate": 2.3184096130211793e-05, + "loss": 1.4845, + "step": 504000 + }, + { + "epoch": 2.17, + "learning_rate": 2.317866774365856e-05, + "loss": 1.4624, + "step": 504100 + }, + { + "epoch": 2.17, + "learning_rate": 2.3173239357105327e-05, + "loss": 1.4871, + "step": 504200 + }, + { + "epoch": 2.17, + "learning_rate": 2.316781097055209e-05, + "loss": 1.4711, + "step": 504300 + }, + { + "epoch": 2.17, + "learning_rate": 2.3162382583998854e-05, + "loss": 1.4815, + "step": 504400 + }, + { + "epoch": 2.17, + "learning_rate": 2.315695419744562e-05, + "loss": 1.4643, + "step": 504500 + }, + { + "epoch": 2.17, + "learning_rate": 2.3151525810892384e-05, + "loss": 1.4835, + "step": 504600 + }, + { + "epoch": 2.17, + "learning_rate": 2.314609742433915e-05, + "loss": 1.4565, + "step": 504700 + }, + { + "epoch": 2.17, + "learning_rate": 2.3140669037785915e-05, + "loss": 1.4739, + "step": 504800 + }, + { + "epoch": 2.17, + "learning_rate": 2.313524065123268e-05, + "loss": 1.4988, + "step": 504900 + }, + { + "epoch": 2.17, + "learning_rate": 2.3129812264679442e-05, + "loss": 1.4915, + "step": 505000 + }, + { + "epoch": 2.17, + "eval_loss": 1.4346122741699219, + "eval_runtime": 17.8459, + "eval_samples_per_second": 560.352, + "eval_steps_per_second": 17.539, + "step": 505000 + }, + { + "epoch": 2.17, + "learning_rate": 2.312438387812621e-05, + "loss": 1.5072, + "step": 505100 + }, + { + "epoch": 2.17, + "learning_rate": 2.3118955491572976e-05, + "loss": 1.5014, + "step": 505200 + }, + { + "epoch": 2.17, + "learning_rate": 2.3113527105019738e-05, + "loss": 1.461, + "step": 505300 + }, + { + "epoch": 2.17, + "learning_rate": 2.3108098718466503e-05, + "loss": 1.4723, + "step": 505400 + }, + { + "epoch": 2.17, + "learning_rate": 2.310267033191327e-05, + "loss": 1.4914, + "step": 505500 + }, + { + "epoch": 2.17, + "learning_rate": 2.3097241945360034e-05, + "loss": 1.4882, + "step": 505600 + }, + { + "epoch": 2.17, + "learning_rate": 2.30918135588068e-05, + "loss": 1.4902, + "step": 505700 + }, + { + "epoch": 2.17, + "learning_rate": 2.3086385172253564e-05, + "loss": 1.4811, + "step": 505800 + }, + { + "epoch": 2.17, + "learning_rate": 2.308095678570033e-05, + "loss": 1.4857, + "step": 505900 + }, + { + "epoch": 2.17, + "learning_rate": 2.307552839914709e-05, + "loss": 1.5034, + "step": 506000 + }, + { + "epoch": 2.17, + "learning_rate": 2.3070100012593857e-05, + "loss": 1.4841, + "step": 506100 + }, + { + "epoch": 2.17, + "learning_rate": 2.3064671626040625e-05, + "loss": 1.4934, + "step": 506200 + }, + { + "epoch": 2.18, + "learning_rate": 2.3059243239487387e-05, + "loss": 1.4803, + "step": 506300 + }, + { + "epoch": 2.18, + "learning_rate": 2.3053814852934152e-05, + "loss": 1.4701, + "step": 506400 + }, + { + "epoch": 2.18, + "learning_rate": 2.3048386466380918e-05, + "loss": 1.4797, + "step": 506500 + }, + { + "epoch": 2.18, + "learning_rate": 2.3042958079827683e-05, + "loss": 1.4941, + "step": 506600 + }, + { + "epoch": 2.18, + "learning_rate": 2.3037529693274448e-05, + "loss": 1.4571, + "step": 506700 + }, + { + "epoch": 2.18, + "learning_rate": 2.3032101306721213e-05, + "loss": 1.4694, + "step": 506800 + }, + { + "epoch": 2.18, + "learning_rate": 2.302667292016798e-05, + "loss": 1.4953, + "step": 506900 + }, + { + "epoch": 2.18, + "learning_rate": 2.302124453361474e-05, + "loss": 1.5125, + "step": 507000 + }, + { + "epoch": 2.18, + "learning_rate": 2.3015816147061506e-05, + "loss": 1.5192, + "step": 507100 + }, + { + "epoch": 2.18, + "learning_rate": 2.3010387760508274e-05, + "loss": 1.4959, + "step": 507200 + }, + { + "epoch": 2.18, + "learning_rate": 2.3004959373955036e-05, + "loss": 1.4939, + "step": 507300 + }, + { + "epoch": 2.18, + "learning_rate": 2.29995309874018e-05, + "loss": 1.5133, + "step": 507400 + }, + { + "epoch": 2.18, + "learning_rate": 2.2994102600848567e-05, + "loss": 1.4837, + "step": 507500 + }, + { + "epoch": 2.18, + "learning_rate": 2.2988674214295332e-05, + "loss": 1.4846, + "step": 507600 + }, + { + "epoch": 2.18, + "learning_rate": 2.2983245827742097e-05, + "loss": 1.502, + "step": 507700 + }, + { + "epoch": 2.18, + "learning_rate": 2.2977817441188863e-05, + "loss": 1.5174, + "step": 507800 + }, + { + "epoch": 2.18, + "learning_rate": 2.2972389054635628e-05, + "loss": 1.4953, + "step": 507900 + }, + { + "epoch": 2.18, + "learning_rate": 2.296696066808239e-05, + "loss": 1.4986, + "step": 508000 + }, + { + "epoch": 2.18, + "learning_rate": 2.2961532281529155e-05, + "loss": 1.4764, + "step": 508100 + }, + { + "epoch": 2.18, + "learning_rate": 2.295610389497592e-05, + "loss": 1.4775, + "step": 508200 + }, + { + "epoch": 2.18, + "learning_rate": 2.2950675508422686e-05, + "loss": 1.4733, + "step": 508300 + }, + { + "epoch": 2.18, + "learning_rate": 2.294524712186945e-05, + "loss": 1.4912, + "step": 508400 + }, + { + "epoch": 2.18, + "learning_rate": 2.2939818735316216e-05, + "loss": 1.4955, + "step": 508500 + }, + { + "epoch": 2.18, + "learning_rate": 2.293439034876298e-05, + "loss": 1.466, + "step": 508600 + }, + { + "epoch": 2.19, + "learning_rate": 2.2928961962209743e-05, + "loss": 1.5112, + "step": 508700 + }, + { + "epoch": 2.19, + "learning_rate": 2.2923533575656512e-05, + "loss": 1.4897, + "step": 508800 + }, + { + "epoch": 2.19, + "learning_rate": 2.2918105189103277e-05, + "loss": 1.5011, + "step": 508900 + }, + { + "epoch": 2.19, + "learning_rate": 2.291267680255004e-05, + "loss": 1.4853, + "step": 509000 + }, + { + "epoch": 2.19, + "learning_rate": 2.2907248415996804e-05, + "loss": 1.4533, + "step": 509100 + }, + { + "epoch": 2.19, + "learning_rate": 2.290182002944357e-05, + "loss": 1.4893, + "step": 509200 + }, + { + "epoch": 2.19, + "learning_rate": 2.2896391642890335e-05, + "loss": 1.5081, + "step": 509300 + }, + { + "epoch": 2.19, + "learning_rate": 2.28909632563371e-05, + "loss": 1.4702, + "step": 509400 + }, + { + "epoch": 2.19, + "learning_rate": 2.2885534869783865e-05, + "loss": 1.5152, + "step": 509500 + }, + { + "epoch": 2.19, + "learning_rate": 2.288010648323063e-05, + "loss": 1.4632, + "step": 509600 + }, + { + "epoch": 2.19, + "learning_rate": 2.2874678096677392e-05, + "loss": 1.4534, + "step": 509700 + }, + { + "epoch": 2.19, + "learning_rate": 2.286924971012416e-05, + "loss": 1.484, + "step": 509800 + }, + { + "epoch": 2.19, + "learning_rate": 2.2863821323570926e-05, + "loss": 1.464, + "step": 509900 + }, + { + "epoch": 2.19, + "learning_rate": 2.2858392937017688e-05, + "loss": 1.4934, + "step": 510000 + }, + { + "epoch": 2.19, + "eval_loss": 1.4337053298950195, + "eval_runtime": 17.8118, + "eval_samples_per_second": 561.427, + "eval_steps_per_second": 17.573, + "step": 510000 + }, + { + "epoch": 2.19, + "learning_rate": 2.2852964550464454e-05, + "loss": 1.4795, + "step": 510100 + }, + { + "epoch": 2.19, + "learning_rate": 2.284753616391122e-05, + "loss": 1.4797, + "step": 510200 + }, + { + "epoch": 2.19, + "learning_rate": 2.2842107777357984e-05, + "loss": 1.4538, + "step": 510300 + }, + { + "epoch": 2.19, + "learning_rate": 2.283667939080475e-05, + "loss": 1.4941, + "step": 510400 + }, + { + "epoch": 2.19, + "learning_rate": 2.2831251004251515e-05, + "loss": 1.4898, + "step": 510500 + }, + { + "epoch": 2.19, + "learning_rate": 2.2825822617698276e-05, + "loss": 1.4943, + "step": 510600 + }, + { + "epoch": 2.19, + "learning_rate": 2.2820394231145042e-05, + "loss": 1.4766, + "step": 510700 + }, + { + "epoch": 2.19, + "learning_rate": 2.2814965844591807e-05, + "loss": 1.5113, + "step": 510800 + }, + { + "epoch": 2.19, + "learning_rate": 2.2809537458038576e-05, + "loss": 1.487, + "step": 510900 + }, + { + "epoch": 2.2, + "learning_rate": 2.2804109071485337e-05, + "loss": 1.4824, + "step": 511000 + }, + { + "epoch": 2.2, + "learning_rate": 2.2798680684932103e-05, + "loss": 1.4604, + "step": 511100 + }, + { + "epoch": 2.2, + "learning_rate": 2.2793252298378868e-05, + "loss": 1.4889, + "step": 511200 + }, + { + "epoch": 2.2, + "learning_rate": 2.278782391182563e-05, + "loss": 1.5088, + "step": 511300 + }, + { + "epoch": 2.2, + "learning_rate": 2.27823955252724e-05, + "loss": 1.4742, + "step": 511400 + }, + { + "epoch": 2.2, + "learning_rate": 2.2776967138719164e-05, + "loss": 1.4663, + "step": 511500 + }, + { + "epoch": 2.2, + "learning_rate": 2.2771538752165926e-05, + "loss": 1.4459, + "step": 511600 + }, + { + "epoch": 2.2, + "learning_rate": 2.276611036561269e-05, + "loss": 1.4881, + "step": 511700 + }, + { + "epoch": 2.2, + "learning_rate": 2.2760681979059456e-05, + "loss": 1.4614, + "step": 511800 + }, + { + "epoch": 2.2, + "learning_rate": 2.2755253592506225e-05, + "loss": 1.4643, + "step": 511900 + }, + { + "epoch": 2.2, + "learning_rate": 2.2749825205952987e-05, + "loss": 1.4864, + "step": 512000 + }, + { + "epoch": 2.2, + "learning_rate": 2.2744396819399752e-05, + "loss": 1.4967, + "step": 512100 + }, + { + "epoch": 2.2, + "learning_rate": 2.2738968432846517e-05, + "loss": 1.4663, + "step": 512200 + }, + { + "epoch": 2.2, + "learning_rate": 2.273354004629328e-05, + "loss": 1.5019, + "step": 512300 + }, + { + "epoch": 2.2, + "learning_rate": 2.2728111659740048e-05, + "loss": 1.4702, + "step": 512400 + }, + { + "epoch": 2.2, + "learning_rate": 2.2722683273186813e-05, + "loss": 1.5025, + "step": 512500 + }, + { + "epoch": 2.2, + "learning_rate": 2.2717254886633575e-05, + "loss": 1.496, + "step": 512600 + }, + { + "epoch": 2.2, + "learning_rate": 2.271182650008034e-05, + "loss": 1.4837, + "step": 512700 + }, + { + "epoch": 2.2, + "learning_rate": 2.2706398113527105e-05, + "loss": 1.4755, + "step": 512800 + }, + { + "epoch": 2.2, + "learning_rate": 2.270096972697387e-05, + "loss": 1.4758, + "step": 512900 + }, + { + "epoch": 2.2, + "learning_rate": 2.2695541340420636e-05, + "loss": 1.4985, + "step": 513000 + }, + { + "epoch": 2.2, + "learning_rate": 2.26901129538674e-05, + "loss": 1.4931, + "step": 513100 + }, + { + "epoch": 2.2, + "learning_rate": 2.2684684567314166e-05, + "loss": 1.4566, + "step": 513200 + }, + { + "epoch": 2.21, + "learning_rate": 2.267925618076093e-05, + "loss": 1.4659, + "step": 513300 + }, + { + "epoch": 2.21, + "learning_rate": 2.2673827794207694e-05, + "loss": 1.4651, + "step": 513400 + }, + { + "epoch": 2.21, + "learning_rate": 2.2668399407654462e-05, + "loss": 1.4844, + "step": 513500 + }, + { + "epoch": 2.21, + "learning_rate": 2.2662971021101224e-05, + "loss": 1.4773, + "step": 513600 + }, + { + "epoch": 2.21, + "learning_rate": 2.265754263454799e-05, + "loss": 1.4805, + "step": 513700 + }, + { + "epoch": 2.21, + "learning_rate": 2.2652114247994755e-05, + "loss": 1.4656, + "step": 513800 + }, + { + "epoch": 2.21, + "learning_rate": 2.264668586144152e-05, + "loss": 1.4908, + "step": 513900 + }, + { + "epoch": 2.21, + "learning_rate": 2.2641257474888285e-05, + "loss": 1.4835, + "step": 514000 + }, + { + "epoch": 2.21, + "learning_rate": 2.263582908833505e-05, + "loss": 1.4758, + "step": 514100 + }, + { + "epoch": 2.21, + "learning_rate": 2.2630400701781816e-05, + "loss": 1.4812, + "step": 514200 + }, + { + "epoch": 2.21, + "learning_rate": 2.2624972315228578e-05, + "loss": 1.4898, + "step": 514300 + }, + { + "epoch": 2.21, + "learning_rate": 2.2619543928675343e-05, + "loss": 1.4905, + "step": 514400 + }, + { + "epoch": 2.21, + "learning_rate": 2.261411554212211e-05, + "loss": 1.4645, + "step": 514500 + }, + { + "epoch": 2.21, + "learning_rate": 2.2608687155568873e-05, + "loss": 1.4528, + "step": 514600 + }, + { + "epoch": 2.21, + "learning_rate": 2.260325876901564e-05, + "loss": 1.4878, + "step": 514700 + }, + { + "epoch": 2.21, + "learning_rate": 2.2597830382462404e-05, + "loss": 1.4721, + "step": 514800 + }, + { + "epoch": 2.21, + "learning_rate": 2.259240199590917e-05, + "loss": 1.5166, + "step": 514900 + }, + { + "epoch": 2.21, + "learning_rate": 2.258697360935593e-05, + "loss": 1.4518, + "step": 515000 + }, + { + "epoch": 2.21, + "eval_loss": 1.4345492124557495, + "eval_runtime": 17.7486, + "eval_samples_per_second": 563.424, + "eval_steps_per_second": 17.635, + "step": 515000 + }, + { + "epoch": 2.21, + "learning_rate": 2.25815452228027e-05, + "loss": 1.5096, + "step": 515100 + }, + { + "epoch": 2.21, + "learning_rate": 2.2576116836249465e-05, + "loss": 1.5049, + "step": 515200 + }, + { + "epoch": 2.21, + "learning_rate": 2.2570688449696227e-05, + "loss": 1.4933, + "step": 515300 + }, + { + "epoch": 2.21, + "learning_rate": 2.2565260063142992e-05, + "loss": 1.4921, + "step": 515400 + }, + { + "epoch": 2.21, + "learning_rate": 2.2559831676589757e-05, + "loss": 1.4608, + "step": 515500 + }, + { + "epoch": 2.22, + "learning_rate": 2.2554403290036523e-05, + "loss": 1.496, + "step": 515600 + }, + { + "epoch": 2.22, + "learning_rate": 2.2548974903483288e-05, + "loss": 1.5036, + "step": 515700 + }, + { + "epoch": 2.22, + "learning_rate": 2.2543546516930053e-05, + "loss": 1.4806, + "step": 515800 + }, + { + "epoch": 2.22, + "learning_rate": 2.253811813037682e-05, + "loss": 1.4941, + "step": 515900 + }, + { + "epoch": 2.22, + "learning_rate": 2.253268974382358e-05, + "loss": 1.4829, + "step": 516000 + }, + { + "epoch": 2.22, + "learning_rate": 2.252726135727035e-05, + "loss": 1.4574, + "step": 516100 + }, + { + "epoch": 2.22, + "learning_rate": 2.2521832970717114e-05, + "loss": 1.4756, + "step": 516200 + }, + { + "epoch": 2.22, + "learning_rate": 2.2516404584163876e-05, + "loss": 1.5002, + "step": 516300 + }, + { + "epoch": 2.22, + "learning_rate": 2.251097619761064e-05, + "loss": 1.4989, + "step": 516400 + }, + { + "epoch": 2.22, + "learning_rate": 2.2505547811057407e-05, + "loss": 1.4672, + "step": 516500 + }, + { + "epoch": 2.22, + "learning_rate": 2.2500119424504172e-05, + "loss": 1.478, + "step": 516600 + }, + { + "epoch": 2.22, + "learning_rate": 2.2494691037950937e-05, + "loss": 1.4855, + "step": 516700 + }, + { + "epoch": 2.22, + "learning_rate": 2.2489262651397702e-05, + "loss": 1.4786, + "step": 516800 + }, + { + "epoch": 2.22, + "learning_rate": 2.2483834264844468e-05, + "loss": 1.4896, + "step": 516900 + }, + { + "epoch": 2.22, + "learning_rate": 2.247840587829123e-05, + "loss": 1.4775, + "step": 517000 + }, + { + "epoch": 2.22, + "learning_rate": 2.2472977491737995e-05, + "loss": 1.4999, + "step": 517100 + }, + { + "epoch": 2.22, + "learning_rate": 2.2467549105184763e-05, + "loss": 1.4808, + "step": 517200 + }, + { + "epoch": 2.22, + "learning_rate": 2.2462120718631525e-05, + "loss": 1.491, + "step": 517300 + }, + { + "epoch": 2.22, + "learning_rate": 2.245669233207829e-05, + "loss": 1.4508, + "step": 517400 + }, + { + "epoch": 2.22, + "learning_rate": 2.2451263945525056e-05, + "loss": 1.4689, + "step": 517500 + }, + { + "epoch": 2.22, + "learning_rate": 2.244583555897182e-05, + "loss": 1.4916, + "step": 517600 + }, + { + "epoch": 2.22, + "learning_rate": 2.2440407172418586e-05, + "loss": 1.4849, + "step": 517700 + }, + { + "epoch": 2.22, + "learning_rate": 2.243497878586535e-05, + "loss": 1.4747, + "step": 517800 + }, + { + "epoch": 2.22, + "learning_rate": 2.2429550399312117e-05, + "loss": 1.4743, + "step": 517900 + }, + { + "epoch": 2.23, + "learning_rate": 2.242412201275888e-05, + "loss": 1.5091, + "step": 518000 + }, + { + "epoch": 2.23, + "learning_rate": 2.2418693626205644e-05, + "loss": 1.4748, + "step": 518100 + }, + { + "epoch": 2.23, + "learning_rate": 2.2413265239652413e-05, + "loss": 1.5079, + "step": 518200 + }, + { + "epoch": 2.23, + "learning_rate": 2.2407836853099175e-05, + "loss": 1.5001, + "step": 518300 + }, + { + "epoch": 2.23, + "learning_rate": 2.240240846654594e-05, + "loss": 1.51, + "step": 518400 + }, + { + "epoch": 2.23, + "learning_rate": 2.2396980079992705e-05, + "loss": 1.4986, + "step": 518500 + }, + { + "epoch": 2.23, + "learning_rate": 2.239155169343947e-05, + "loss": 1.4622, + "step": 518600 + }, + { + "epoch": 2.23, + "learning_rate": 2.2386123306886236e-05, + "loss": 1.4945, + "step": 518700 + }, + { + "epoch": 2.23, + "learning_rate": 2.2380694920333e-05, + "loss": 1.5062, + "step": 518800 + }, + { + "epoch": 2.23, + "learning_rate": 2.2375266533779766e-05, + "loss": 1.4986, + "step": 518900 + }, + { + "epoch": 2.23, + "learning_rate": 2.2369838147226528e-05, + "loss": 1.4779, + "step": 519000 + }, + { + "epoch": 2.23, + "learning_rate": 2.2364409760673293e-05, + "loss": 1.4839, + "step": 519100 + }, + { + "epoch": 2.23, + "learning_rate": 2.2358981374120062e-05, + "loss": 1.511, + "step": 519200 + }, + { + "epoch": 2.23, + "learning_rate": 2.2353552987566824e-05, + "loss": 1.5019, + "step": 519300 + }, + { + "epoch": 2.23, + "learning_rate": 2.234812460101359e-05, + "loss": 1.462, + "step": 519400 + }, + { + "epoch": 2.23, + "learning_rate": 2.2342696214460354e-05, + "loss": 1.4597, + "step": 519500 + }, + { + "epoch": 2.23, + "learning_rate": 2.233726782790712e-05, + "loss": 1.4734, + "step": 519600 + }, + { + "epoch": 2.23, + "learning_rate": 2.233183944135388e-05, + "loss": 1.4984, + "step": 519700 + }, + { + "epoch": 2.23, + "learning_rate": 2.232641105480065e-05, + "loss": 1.4915, + "step": 519800 + }, + { + "epoch": 2.23, + "learning_rate": 2.2320982668247415e-05, + "loss": 1.4425, + "step": 519900 + }, + { + "epoch": 2.23, + "learning_rate": 2.2315554281694177e-05, + "loss": 1.4798, + "step": 520000 + }, + { + "epoch": 2.23, + "eval_loss": 1.4345911741256714, + "eval_runtime": 17.8258, + "eval_samples_per_second": 560.985, + "eval_steps_per_second": 17.559, + "step": 520000 + }, + { + "epoch": 2.23, + "learning_rate": 2.2310125895140943e-05, + "loss": 1.4789, + "step": 520100 + }, + { + "epoch": 2.23, + "learning_rate": 2.2304697508587708e-05, + "loss": 1.5009, + "step": 520200 + }, + { + "epoch": 2.24, + "learning_rate": 2.2299269122034473e-05, + "loss": 1.4765, + "step": 520300 + }, + { + "epoch": 2.24, + "learning_rate": 2.229384073548124e-05, + "loss": 1.4855, + "step": 520400 + }, + { + "epoch": 2.24, + "learning_rate": 2.2288412348928004e-05, + "loss": 1.4905, + "step": 520500 + }, + { + "epoch": 2.24, + "learning_rate": 2.228298396237477e-05, + "loss": 1.5065, + "step": 520600 + }, + { + "epoch": 2.24, + "learning_rate": 2.227755557582153e-05, + "loss": 1.4894, + "step": 520700 + }, + { + "epoch": 2.24, + "learning_rate": 2.22721271892683e-05, + "loss": 1.4793, + "step": 520800 + }, + { + "epoch": 2.24, + "learning_rate": 2.2266698802715065e-05, + "loss": 1.475, + "step": 520900 + }, + { + "epoch": 2.24, + "learning_rate": 2.2261270416161827e-05, + "loss": 1.4647, + "step": 521000 + }, + { + "epoch": 2.24, + "learning_rate": 2.2255842029608592e-05, + "loss": 1.4542, + "step": 521100 + }, + { + "epoch": 2.24, + "learning_rate": 2.2250413643055357e-05, + "loss": 1.4955, + "step": 521200 + }, + { + "epoch": 2.24, + "learning_rate": 2.2244985256502122e-05, + "loss": 1.4918, + "step": 521300 + }, + { + "epoch": 2.24, + "learning_rate": 2.2239556869948888e-05, + "loss": 1.5104, + "step": 521400 + }, + { + "epoch": 2.24, + "learning_rate": 2.2234128483395653e-05, + "loss": 1.5074, + "step": 521500 + }, + { + "epoch": 2.24, + "learning_rate": 2.2228700096842418e-05, + "loss": 1.4817, + "step": 521600 + }, + { + "epoch": 2.24, + "learning_rate": 2.222327171028918e-05, + "loss": 1.4727, + "step": 521700 + }, + { + "epoch": 2.24, + "learning_rate": 2.2217843323735945e-05, + "loss": 1.4676, + "step": 521800 + }, + { + "epoch": 2.24, + "learning_rate": 2.2212414937182714e-05, + "loss": 1.4566, + "step": 521900 + }, + { + "epoch": 2.24, + "learning_rate": 2.2206986550629476e-05, + "loss": 1.4915, + "step": 522000 + }, + { + "epoch": 2.24, + "learning_rate": 2.220155816407624e-05, + "loss": 1.4709, + "step": 522100 + }, + { + "epoch": 2.24, + "learning_rate": 2.2196129777523006e-05, + "loss": 1.4749, + "step": 522200 + }, + { + "epoch": 2.24, + "learning_rate": 2.219070139096977e-05, + "loss": 1.4818, + "step": 522300 + }, + { + "epoch": 2.24, + "learning_rate": 2.2185273004416537e-05, + "loss": 1.4536, + "step": 522400 + }, + { + "epoch": 2.24, + "learning_rate": 2.2179844617863302e-05, + "loss": 1.4939, + "step": 522500 + }, + { + "epoch": 2.25, + "learning_rate": 2.2174416231310067e-05, + "loss": 1.4868, + "step": 522600 + }, + { + "epoch": 2.25, + "learning_rate": 2.216898784475683e-05, + "loss": 1.4932, + "step": 522700 + }, + { + "epoch": 2.25, + "learning_rate": 2.2163559458203594e-05, + "loss": 1.4582, + "step": 522800 + }, + { + "epoch": 2.25, + "learning_rate": 2.2158131071650363e-05, + "loss": 1.4579, + "step": 522900 + }, + { + "epoch": 2.25, + "learning_rate": 2.2152702685097125e-05, + "loss": 1.477, + "step": 523000 + }, + { + "epoch": 2.25, + "learning_rate": 2.214727429854389e-05, + "loss": 1.4633, + "step": 523100 + }, + { + "epoch": 2.25, + "learning_rate": 2.2141845911990656e-05, + "loss": 1.4759, + "step": 523200 + }, + { + "epoch": 2.25, + "learning_rate": 2.213641752543742e-05, + "loss": 1.4887, + "step": 523300 + }, + { + "epoch": 2.25, + "learning_rate": 2.2130989138884186e-05, + "loss": 1.4862, + "step": 523400 + }, + { + "epoch": 2.25, + "learning_rate": 2.212556075233095e-05, + "loss": 1.4889, + "step": 523500 + }, + { + "epoch": 2.25, + "learning_rate": 2.2120132365777717e-05, + "loss": 1.4924, + "step": 523600 + }, + { + "epoch": 2.25, + "learning_rate": 2.211470397922448e-05, + "loss": 1.4896, + "step": 523700 + }, + { + "epoch": 2.25, + "learning_rate": 2.2109275592671244e-05, + "loss": 1.4904, + "step": 523800 + }, + { + "epoch": 2.25, + "learning_rate": 2.210384720611801e-05, + "loss": 1.4934, + "step": 523900 + }, + { + "epoch": 2.25, + "learning_rate": 2.2098418819564774e-05, + "loss": 1.5086, + "step": 524000 + }, + { + "epoch": 2.25, + "learning_rate": 2.209299043301154e-05, + "loss": 1.4682, + "step": 524100 + }, + { + "epoch": 2.25, + "learning_rate": 2.2087562046458305e-05, + "loss": 1.4608, + "step": 524200 + }, + { + "epoch": 2.25, + "learning_rate": 2.208213365990507e-05, + "loss": 1.4936, + "step": 524300 + }, + { + "epoch": 2.25, + "learning_rate": 2.2076705273351832e-05, + "loss": 1.4867, + "step": 524400 + }, + { + "epoch": 2.25, + "learning_rate": 2.20712768867986e-05, + "loss": 1.472, + "step": 524500 + }, + { + "epoch": 2.25, + "learning_rate": 2.2065848500245366e-05, + "loss": 1.4607, + "step": 524600 + }, + { + "epoch": 2.25, + "learning_rate": 2.2060420113692128e-05, + "loss": 1.4941, + "step": 524700 + }, + { + "epoch": 2.25, + "learning_rate": 2.2054991727138893e-05, + "loss": 1.4715, + "step": 524800 + }, + { + "epoch": 2.26, + "learning_rate": 2.2049563340585658e-05, + "loss": 1.4758, + "step": 524900 + }, + { + "epoch": 2.26, + "learning_rate": 2.2044134954032424e-05, + "loss": 1.4918, + "step": 525000 + }, + { + "epoch": 2.26, + "eval_loss": 1.4338562488555908, + "eval_runtime": 17.7905, + "eval_samples_per_second": 562.097, + "eval_steps_per_second": 17.594, + "step": 525000 + } + ], + "max_steps": 931084, + "num_train_epochs": 4, + "total_flos": 1.0159882796191924e+19, + "trial_name": null, + "trial_params": null +}