diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,31325 @@ +{ + "best_metric": 2.46012282371521, + "best_model_checkpoint": "runs/gen_Maestro_REMI/REMI_bpe20000/checkpoint-99000", + "epoch": 168.9189189189189, + "global_step": 100000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 6.666666666666667e-08, + "loss": 9.9965, + "step": 20 + }, + { + "epoch": 0.07, + "learning_rate": 1.3333333333333334e-07, + "loss": 9.9861, + "step": 40 + }, + { + "epoch": 0.1, + "learning_rate": 2.0000000000000002e-07, + "loss": 9.9647, + "step": 60 + }, + { + "epoch": 0.14, + "learning_rate": 2.6666666666666667e-07, + "loss": 9.9301, + "step": 80 + }, + { + "epoch": 0.17, + "learning_rate": 3.3333333333333335e-07, + "loss": 9.8868, + "step": 100 + }, + { + "epoch": 0.2, + "learning_rate": 4.0000000000000003e-07, + "loss": 9.8358, + "step": 120 + }, + { + "epoch": 0.24, + "learning_rate": 4.666666666666667e-07, + "loss": 9.7888, + "step": 140 + }, + { + "epoch": 0.27, + "learning_rate": 5.333333333333333e-07, + "loss": 9.7479, + "step": 160 + }, + { + "epoch": 0.3, + "learning_rate": 6.000000000000001e-07, + "loss": 9.7129, + "step": 180 + }, + { + "epoch": 0.34, + "learning_rate": 6.666666666666667e-07, + "loss": 9.6937, + "step": 200 + }, + { + "epoch": 0.37, + "learning_rate": 7.333333333333333e-07, + "loss": 9.6719, + "step": 220 + }, + { + "epoch": 0.41, + "learning_rate": 8.000000000000001e-07, + "loss": 9.6522, + "step": 240 + }, + { + "epoch": 0.44, + "learning_rate": 8.666666666666667e-07, + "loss": 9.6366, + "step": 260 + }, + { + "epoch": 0.47, + "learning_rate": 9.333333333333334e-07, + "loss": 9.624, + "step": 280 + }, + { + "epoch": 0.51, + "learning_rate": 1.0000000000000002e-06, + "loss": 9.6086, + "step": 300 + }, + { + "epoch": 0.54, + "learning_rate": 1.0666666666666667e-06, + "loss": 9.5939, + "step": 320 + }, + { + "epoch": 0.57, + "learning_rate": 1.1333333333333334e-06, + "loss": 9.5796, + "step": 340 + }, + { + "epoch": 0.61, + "learning_rate": 1.2000000000000002e-06, + "loss": 9.5681, + "step": 360 + }, + { + "epoch": 0.64, + "learning_rate": 1.2666666666666667e-06, + "loss": 9.5595, + "step": 380 + }, + { + "epoch": 0.68, + "learning_rate": 1.3333333333333334e-06, + "loss": 9.5465, + "step": 400 + }, + { + "epoch": 0.71, + "learning_rate": 1.4000000000000001e-06, + "loss": 9.5305, + "step": 420 + }, + { + "epoch": 0.74, + "learning_rate": 1.4666666666666667e-06, + "loss": 9.5193, + "step": 440 + }, + { + "epoch": 0.78, + "learning_rate": 1.5333333333333334e-06, + "loss": 9.5049, + "step": 460 + }, + { + "epoch": 0.81, + "learning_rate": 1.6000000000000001e-06, + "loss": 9.4885, + "step": 480 + }, + { + "epoch": 0.84, + "learning_rate": 1.6666666666666667e-06, + "loss": 9.4719, + "step": 500 + }, + { + "epoch": 0.88, + "learning_rate": 1.7333333333333334e-06, + "loss": 9.4612, + "step": 520 + }, + { + "epoch": 0.91, + "learning_rate": 1.8e-06, + "loss": 9.4444, + "step": 540 + }, + { + "epoch": 0.95, + "learning_rate": 1.8666666666666669e-06, + "loss": 9.4235, + "step": 560 + }, + { + "epoch": 0.98, + "learning_rate": 1.9333333333333336e-06, + "loss": 9.4066, + "step": 580 + }, + { + "epoch": 1.01, + "learning_rate": 2.0000000000000003e-06, + "loss": 9.3919, + "step": 600 + }, + { + "epoch": 1.05, + "learning_rate": 2.0666666666666666e-06, + "loss": 9.3731, + "step": 620 + }, + { + "epoch": 1.08, + "learning_rate": 2.1333333333333334e-06, + "loss": 9.3541, + "step": 640 + }, + { + "epoch": 1.11, + "learning_rate": 2.2e-06, + "loss": 9.336, + "step": 660 + }, + { + "epoch": 1.15, + "learning_rate": 2.266666666666667e-06, + "loss": 9.3156, + "step": 680 + }, + { + "epoch": 1.18, + "learning_rate": 2.3333333333333336e-06, + "loss": 9.2945, + "step": 700 + }, + { + "epoch": 1.22, + "learning_rate": 2.4000000000000003e-06, + "loss": 9.2704, + "step": 720 + }, + { + "epoch": 1.25, + "learning_rate": 2.4666666666666666e-06, + "loss": 9.2556, + "step": 740 + }, + { + "epoch": 1.28, + "learning_rate": 2.5333333333333334e-06, + "loss": 9.2401, + "step": 760 + }, + { + "epoch": 1.32, + "learning_rate": 2.6e-06, + "loss": 9.2208, + "step": 780 + }, + { + "epoch": 1.35, + "learning_rate": 2.666666666666667e-06, + "loss": 9.1983, + "step": 800 + }, + { + "epoch": 1.39, + "learning_rate": 2.7333333333333336e-06, + "loss": 9.1781, + "step": 820 + }, + { + "epoch": 1.42, + "learning_rate": 2.8000000000000003e-06, + "loss": 9.1574, + "step": 840 + }, + { + "epoch": 1.45, + "learning_rate": 2.8666666666666666e-06, + "loss": 9.133, + "step": 860 + }, + { + "epoch": 1.49, + "learning_rate": 2.9333333333333333e-06, + "loss": 9.1153, + "step": 880 + }, + { + "epoch": 1.52, + "learning_rate": 3e-06, + "loss": 9.0907, + "step": 900 + }, + { + "epoch": 1.55, + "learning_rate": 3.066666666666667e-06, + "loss": 9.0703, + "step": 920 + }, + { + "epoch": 1.59, + "learning_rate": 3.133333333333333e-06, + "loss": 9.0484, + "step": 940 + }, + { + "epoch": 1.62, + "learning_rate": 3.2000000000000003e-06, + "loss": 9.0259, + "step": 960 + }, + { + "epoch": 1.66, + "learning_rate": 3.2666666666666666e-06, + "loss": 9.0088, + "step": 980 + }, + { + "epoch": 1.69, + "learning_rate": 3.3333333333333333e-06, + "loss": 8.9866, + "step": 1000 + }, + { + "epoch": 1.69, + "eval_loss": 8.95605182647705, + "eval_runtime": 46.5795, + "eval_samples_per_second": 21.233, + "eval_steps_per_second": 0.129, + "eval_tse_ndup": 0.0, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.0006509407349596912, + "eval_tse_type": 0.3327419239609195, + "step": 1000 + }, + { + "epoch": 1.72, + "learning_rate": 3.4000000000000005e-06, + "loss": 8.9656, + "step": 1020 + }, + { + "epoch": 1.76, + "learning_rate": 3.466666666666667e-06, + "loss": 8.946, + "step": 1040 + }, + { + "epoch": 1.79, + "learning_rate": 3.5333333333333335e-06, + "loss": 8.9318, + "step": 1060 + }, + { + "epoch": 1.82, + "learning_rate": 3.6e-06, + "loss": 8.903, + "step": 1080 + }, + { + "epoch": 1.86, + "learning_rate": 3.666666666666667e-06, + "loss": 8.8817, + "step": 1100 + }, + { + "epoch": 1.89, + "learning_rate": 3.7333333333333337e-06, + "loss": 8.8683, + "step": 1120 + }, + { + "epoch": 1.93, + "learning_rate": 3.8e-06, + "loss": 8.8524, + "step": 1140 + }, + { + "epoch": 1.96, + "learning_rate": 3.866666666666667e-06, + "loss": 8.833, + "step": 1160 + }, + { + "epoch": 1.99, + "learning_rate": 3.9333333333333335e-06, + "loss": 8.8211, + "step": 1180 + }, + { + "epoch": 2.03, + "learning_rate": 4.000000000000001e-06, + "loss": 8.7941, + "step": 1200 + }, + { + "epoch": 2.06, + "learning_rate": 4.066666666666666e-06, + "loss": 8.7745, + "step": 1220 + }, + { + "epoch": 2.09, + "learning_rate": 4.133333333333333e-06, + "loss": 8.761, + "step": 1240 + }, + { + "epoch": 2.13, + "learning_rate": 4.2000000000000004e-06, + "loss": 8.7479, + "step": 1260 + }, + { + "epoch": 2.16, + "learning_rate": 4.266666666666667e-06, + "loss": 8.7301, + "step": 1280 + }, + { + "epoch": 2.2, + "learning_rate": 4.333333333333334e-06, + "loss": 8.7187, + "step": 1300 + }, + { + "epoch": 2.23, + "learning_rate": 4.4e-06, + "loss": 8.7004, + "step": 1320 + }, + { + "epoch": 2.26, + "learning_rate": 4.4666666666666665e-06, + "loss": 8.683, + "step": 1340 + }, + { + "epoch": 2.3, + "learning_rate": 4.533333333333334e-06, + "loss": 8.6711, + "step": 1360 + }, + { + "epoch": 2.33, + "learning_rate": 4.6e-06, + "loss": 8.6576, + "step": 1380 + }, + { + "epoch": 2.36, + "learning_rate": 4.666666666666667e-06, + "loss": 8.6394, + "step": 1400 + }, + { + "epoch": 2.4, + "learning_rate": 4.7333333333333335e-06, + "loss": 8.6242, + "step": 1420 + }, + { + "epoch": 2.43, + "learning_rate": 4.800000000000001e-06, + "loss": 8.6158, + "step": 1440 + }, + { + "epoch": 2.47, + "learning_rate": 4.866666666666667e-06, + "loss": 8.5992, + "step": 1460 + }, + { + "epoch": 2.5, + "learning_rate": 4.933333333333333e-06, + "loss": 8.5956, + "step": 1480 + }, + { + "epoch": 2.53, + "learning_rate": 5e-06, + "loss": 8.5753, + "step": 1500 + }, + { + "epoch": 2.57, + "learning_rate": 5.066666666666667e-06, + "loss": 8.5659, + "step": 1520 + }, + { + "epoch": 2.6, + "learning_rate": 5.133333333333334e-06, + "loss": 8.5598, + "step": 1540 + }, + { + "epoch": 2.64, + "learning_rate": 5.2e-06, + "loss": 8.546, + "step": 1560 + }, + { + "epoch": 2.67, + "learning_rate": 5.266666666666667e-06, + "loss": 8.5332, + "step": 1580 + }, + { + "epoch": 2.7, + "learning_rate": 5.333333333333334e-06, + "loss": 8.5348, + "step": 1600 + }, + { + "epoch": 2.74, + "learning_rate": 5.4e-06, + "loss": 8.5243, + "step": 1620 + }, + { + "epoch": 2.77, + "learning_rate": 5.466666666666667e-06, + "loss": 8.5171, + "step": 1640 + }, + { + "epoch": 2.8, + "learning_rate": 5.5333333333333334e-06, + "loss": 8.5057, + "step": 1660 + }, + { + "epoch": 2.84, + "learning_rate": 5.600000000000001e-06, + "loss": 8.5059, + "step": 1680 + }, + { + "epoch": 2.87, + "learning_rate": 5.666666666666667e-06, + "loss": 8.501, + "step": 1700 + }, + { + "epoch": 2.91, + "learning_rate": 5.733333333333333e-06, + "loss": 8.4816, + "step": 1720 + }, + { + "epoch": 2.94, + "learning_rate": 5.8e-06, + "loss": 8.4922, + "step": 1740 + }, + { + "epoch": 2.97, + "learning_rate": 5.866666666666667e-06, + "loss": 8.4783, + "step": 1760 + }, + { + "epoch": 3.01, + "learning_rate": 5.933333333333334e-06, + "loss": 8.4751, + "step": 1780 + }, + { + "epoch": 3.04, + "learning_rate": 6e-06, + "loss": 8.4588, + "step": 1800 + }, + { + "epoch": 3.07, + "learning_rate": 6.066666666666667e-06, + "loss": 8.4623, + "step": 1820 + }, + { + "epoch": 3.11, + "learning_rate": 6.133333333333334e-06, + "loss": 8.4415, + "step": 1840 + }, + { + "epoch": 3.14, + "learning_rate": 6.2e-06, + "loss": 8.443, + "step": 1860 + }, + { + "epoch": 3.18, + "learning_rate": 6.266666666666666e-06, + "loss": 8.4319, + "step": 1880 + }, + { + "epoch": 3.21, + "learning_rate": 6.333333333333334e-06, + "loss": 8.4269, + "step": 1900 + }, + { + "epoch": 3.24, + "learning_rate": 6.4000000000000006e-06, + "loss": 8.4127, + "step": 1920 + }, + { + "epoch": 3.28, + "learning_rate": 6.466666666666667e-06, + "loss": 8.4157, + "step": 1940 + }, + { + "epoch": 3.31, + "learning_rate": 6.533333333333333e-06, + "loss": 8.4084, + "step": 1960 + }, + { + "epoch": 3.34, + "learning_rate": 6.6e-06, + "loss": 8.4011, + "step": 1980 + }, + { + "epoch": 3.38, + "learning_rate": 6.666666666666667e-06, + "loss": 8.3922, + "step": 2000 + }, + { + "epoch": 3.38, + "eval_loss": 8.35992431640625, + "eval_runtime": 46.4907, + "eval_samples_per_second": 21.273, + "eval_steps_per_second": 0.129, + "eval_tse_ndup": 0.0, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.0005799355217422038, + "eval_tse_type": 0.3327419239609195, + "step": 2000 + }, + { + "epoch": 3.41, + "learning_rate": 6.733333333333333e-06, + "loss": 8.379, + "step": 2020 + }, + { + "epoch": 3.45, + "learning_rate": 6.800000000000001e-06, + "loss": 8.3882, + "step": 2040 + }, + { + "epoch": 3.48, + "learning_rate": 6.866666666666667e-06, + "loss": 8.3824, + "step": 2060 + }, + { + "epoch": 3.51, + "learning_rate": 6.933333333333334e-06, + "loss": 8.3645, + "step": 2080 + }, + { + "epoch": 3.55, + "learning_rate": 7.000000000000001e-06, + "loss": 8.3618, + "step": 2100 + }, + { + "epoch": 3.58, + "learning_rate": 7.066666666666667e-06, + "loss": 8.3556, + "step": 2120 + }, + { + "epoch": 3.61, + "learning_rate": 7.133333333333333e-06, + "loss": 8.349, + "step": 2140 + }, + { + "epoch": 3.65, + "learning_rate": 7.2e-06, + "loss": 8.349, + "step": 2160 + }, + { + "epoch": 3.68, + "learning_rate": 7.266666666666668e-06, + "loss": 8.3382, + "step": 2180 + }, + { + "epoch": 3.72, + "learning_rate": 7.333333333333334e-06, + "loss": 8.3239, + "step": 2200 + }, + { + "epoch": 3.75, + "learning_rate": 7.4e-06, + "loss": 8.3113, + "step": 2220 + }, + { + "epoch": 3.78, + "learning_rate": 7.4666666666666675e-06, + "loss": 8.3119, + "step": 2240 + }, + { + "epoch": 3.82, + "learning_rate": 7.533333333333334e-06, + "loss": 8.2856, + "step": 2260 + }, + { + "epoch": 3.85, + "learning_rate": 7.6e-06, + "loss": 8.2844, + "step": 2280 + }, + { + "epoch": 3.89, + "learning_rate": 7.666666666666667e-06, + "loss": 8.2633, + "step": 2300 + }, + { + "epoch": 3.92, + "learning_rate": 7.733333333333334e-06, + "loss": 8.2571, + "step": 2320 + }, + { + "epoch": 3.95, + "learning_rate": 7.8e-06, + "loss": 8.2318, + "step": 2340 + }, + { + "epoch": 3.99, + "learning_rate": 7.866666666666667e-06, + "loss": 8.2176, + "step": 2360 + }, + { + "epoch": 4.02, + "learning_rate": 7.933333333333334e-06, + "loss": 8.221, + "step": 2380 + }, + { + "epoch": 4.05, + "learning_rate": 8.000000000000001e-06, + "loss": 8.2058, + "step": 2400 + }, + { + "epoch": 4.09, + "learning_rate": 8.066666666666667e-06, + "loss": 8.2073, + "step": 2420 + }, + { + "epoch": 4.12, + "learning_rate": 8.133333333333332e-06, + "loss": 8.1965, + "step": 2440 + }, + { + "epoch": 4.16, + "learning_rate": 8.200000000000001e-06, + "loss": 8.1608, + "step": 2460 + }, + { + "epoch": 4.19, + "learning_rate": 8.266666666666667e-06, + "loss": 8.1546, + "step": 2480 + }, + { + "epoch": 4.22, + "learning_rate": 8.333333333333334e-06, + "loss": 8.1279, + "step": 2500 + }, + { + "epoch": 4.26, + "learning_rate": 8.400000000000001e-06, + "loss": 8.1274, + "step": 2520 + }, + { + "epoch": 4.29, + "learning_rate": 8.466666666666666e-06, + "loss": 8.1081, + "step": 2540 + }, + { + "epoch": 4.32, + "learning_rate": 8.533333333333334e-06, + "loss": 8.0982, + "step": 2560 + }, + { + "epoch": 4.36, + "learning_rate": 8.599999999999999e-06, + "loss": 8.0698, + "step": 2580 + }, + { + "epoch": 4.39, + "learning_rate": 8.666666666666668e-06, + "loss": 8.0593, + "step": 2600 + }, + { + "epoch": 4.43, + "learning_rate": 8.733333333333333e-06, + "loss": 8.0368, + "step": 2620 + }, + { + "epoch": 4.46, + "learning_rate": 8.8e-06, + "loss": 8.0272, + "step": 2640 + }, + { + "epoch": 4.49, + "learning_rate": 8.866666666666668e-06, + "loss": 8.0123, + "step": 2660 + }, + { + "epoch": 4.53, + "learning_rate": 8.933333333333333e-06, + "loss": 7.9895, + "step": 2680 + }, + { + "epoch": 4.56, + "learning_rate": 9e-06, + "loss": 7.9688, + "step": 2700 + }, + { + "epoch": 4.59, + "learning_rate": 9.066666666666667e-06, + "loss": 7.956, + "step": 2720 + }, + { + "epoch": 4.63, + "learning_rate": 9.133333333333335e-06, + "loss": 7.9605, + "step": 2740 + }, + { + "epoch": 4.66, + "learning_rate": 9.2e-06, + "loss": 7.933, + "step": 2760 + }, + { + "epoch": 4.7, + "learning_rate": 9.266666666666667e-06, + "loss": 7.9012, + "step": 2780 + }, + { + "epoch": 4.73, + "learning_rate": 9.333333333333334e-06, + "loss": 7.8915, + "step": 2800 + }, + { + "epoch": 4.76, + "learning_rate": 9.4e-06, + "loss": 7.8632, + "step": 2820 + }, + { + "epoch": 4.8, + "learning_rate": 9.466666666666667e-06, + "loss": 7.8485, + "step": 2840 + }, + { + "epoch": 4.83, + "learning_rate": 9.533333333333334e-06, + "loss": 7.8243, + "step": 2860 + }, + { + "epoch": 4.86, + "learning_rate": 9.600000000000001e-06, + "loss": 7.8058, + "step": 2880 + }, + { + "epoch": 4.9, + "learning_rate": 9.666666666666667e-06, + "loss": 7.7773, + "step": 2900 + }, + { + "epoch": 4.93, + "learning_rate": 9.733333333333334e-06, + "loss": 7.7615, + "step": 2920 + }, + { + "epoch": 4.97, + "learning_rate": 9.800000000000001e-06, + "loss": 7.7201, + "step": 2940 + }, + { + "epoch": 5.0, + "learning_rate": 9.866666666666667e-06, + "loss": 7.6944, + "step": 2960 + }, + { + "epoch": 5.03, + "learning_rate": 9.933333333333334e-06, + "loss": 7.681, + "step": 2980 + }, + { + "epoch": 5.07, + "learning_rate": 1e-05, + "loss": 7.6484, + "step": 3000 + }, + { + "epoch": 5.07, + "eval_loss": 7.5473527908325195, + "eval_runtime": 46.812, + "eval_samples_per_second": 21.127, + "eval_steps_per_second": 0.128, + "eval_tse_ndup": 0.019046315020784185, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.08615238659379869, + "eval_tse_type": 0.0334121503263036, + "step": 3000 + }, + { + "epoch": 5.1, + "learning_rate": 1.0066666666666668e-05, + "loss": 7.6008, + "step": 3020 + }, + { + "epoch": 5.14, + "learning_rate": 1.0133333333333333e-05, + "loss": 7.5998, + "step": 3040 + }, + { + "epoch": 5.17, + "learning_rate": 1.02e-05, + "loss": 7.5668, + "step": 3060 + }, + { + "epoch": 5.2, + "learning_rate": 1.0266666666666668e-05, + "loss": 7.5497, + "step": 3080 + }, + { + "epoch": 5.24, + "learning_rate": 1.0333333333333333e-05, + "loss": 7.5271, + "step": 3100 + }, + { + "epoch": 5.27, + "learning_rate": 1.04e-05, + "loss": 7.4934, + "step": 3120 + }, + { + "epoch": 5.3, + "learning_rate": 1.0466666666666668e-05, + "loss": 7.464, + "step": 3140 + }, + { + "epoch": 5.34, + "learning_rate": 1.0533333333333335e-05, + "loss": 7.4618, + "step": 3160 + }, + { + "epoch": 5.37, + "learning_rate": 1.06e-05, + "loss": 7.4438, + "step": 3180 + }, + { + "epoch": 5.41, + "learning_rate": 1.0666666666666667e-05, + "loss": 7.3952, + "step": 3200 + }, + { + "epoch": 5.44, + "learning_rate": 1.0733333333333334e-05, + "loss": 7.3956, + "step": 3220 + }, + { + "epoch": 5.47, + "learning_rate": 1.08e-05, + "loss": 7.3867, + "step": 3240 + }, + { + "epoch": 5.51, + "learning_rate": 1.0866666666666667e-05, + "loss": 7.3624, + "step": 3260 + }, + { + "epoch": 5.54, + "learning_rate": 1.0933333333333334e-05, + "loss": 7.3379, + "step": 3280 + }, + { + "epoch": 5.57, + "learning_rate": 1.1000000000000001e-05, + "loss": 7.2998, + "step": 3300 + }, + { + "epoch": 5.61, + "learning_rate": 1.1066666666666667e-05, + "loss": 7.2783, + "step": 3320 + }, + { + "epoch": 5.64, + "learning_rate": 1.1133333333333334e-05, + "loss": 7.2713, + "step": 3340 + }, + { + "epoch": 5.68, + "learning_rate": 1.1200000000000001e-05, + "loss": 7.2402, + "step": 3360 + }, + { + "epoch": 5.71, + "learning_rate": 1.1266666666666667e-05, + "loss": 7.2445, + "step": 3380 + }, + { + "epoch": 5.74, + "learning_rate": 1.1333333333333334e-05, + "loss": 7.2294, + "step": 3400 + }, + { + "epoch": 5.78, + "learning_rate": 1.1400000000000001e-05, + "loss": 7.2023, + "step": 3420 + }, + { + "epoch": 5.81, + "learning_rate": 1.1466666666666666e-05, + "loss": 7.1985, + "step": 3440 + }, + { + "epoch": 5.84, + "learning_rate": 1.1533333333333334e-05, + "loss": 7.1602, + "step": 3460 + }, + { + "epoch": 5.88, + "learning_rate": 1.16e-05, + "loss": 7.1214, + "step": 3480 + }, + { + "epoch": 5.91, + "learning_rate": 1.1666666666666668e-05, + "loss": 7.1281, + "step": 3500 + }, + { + "epoch": 5.95, + "learning_rate": 1.1733333333333333e-05, + "loss": 7.0915, + "step": 3520 + }, + { + "epoch": 5.98, + "learning_rate": 1.18e-05, + "loss": 7.0716, + "step": 3540 + }, + { + "epoch": 6.01, + "learning_rate": 1.1866666666666668e-05, + "loss": 7.0603, + "step": 3560 + }, + { + "epoch": 6.05, + "learning_rate": 1.1933333333333333e-05, + "loss": 7.0538, + "step": 3580 + }, + { + "epoch": 6.08, + "learning_rate": 1.2e-05, + "loss": 7.0202, + "step": 3600 + }, + { + "epoch": 6.11, + "learning_rate": 1.2066666666666667e-05, + "loss": 7.0241, + "step": 3620 + }, + { + "epoch": 6.15, + "learning_rate": 1.2133333333333335e-05, + "loss": 7.0084, + "step": 3640 + }, + { + "epoch": 6.18, + "learning_rate": 1.22e-05, + "loss": 7.0037, + "step": 3660 + }, + { + "epoch": 6.22, + "learning_rate": 1.2266666666666667e-05, + "loss": 6.978, + "step": 3680 + }, + { + "epoch": 6.25, + "learning_rate": 1.2333333333333334e-05, + "loss": 6.9479, + "step": 3700 + }, + { + "epoch": 6.28, + "learning_rate": 1.24e-05, + "loss": 6.9353, + "step": 3720 + }, + { + "epoch": 6.32, + "learning_rate": 1.2466666666666667e-05, + "loss": 6.9249, + "step": 3740 + }, + { + "epoch": 6.35, + "learning_rate": 1.2533333333333332e-05, + "loss": 6.9284, + "step": 3760 + }, + { + "epoch": 6.39, + "learning_rate": 1.2600000000000001e-05, + "loss": 6.8827, + "step": 3780 + }, + { + "epoch": 6.42, + "learning_rate": 1.2666666666666668e-05, + "loss": 6.9025, + "step": 3800 + }, + { + "epoch": 6.45, + "learning_rate": 1.2733333333333334e-05, + "loss": 6.8757, + "step": 3820 + }, + { + "epoch": 6.49, + "learning_rate": 1.2800000000000001e-05, + "loss": 6.8597, + "step": 3840 + }, + { + "epoch": 6.52, + "learning_rate": 1.2866666666666668e-05, + "loss": 6.837, + "step": 3860 + }, + { + "epoch": 6.55, + "learning_rate": 1.2933333333333334e-05, + "loss": 6.8198, + "step": 3880 + }, + { + "epoch": 6.59, + "learning_rate": 1.3000000000000001e-05, + "loss": 6.8343, + "step": 3900 + }, + { + "epoch": 6.62, + "learning_rate": 1.3066666666666666e-05, + "loss": 6.78, + "step": 3920 + }, + { + "epoch": 6.66, + "learning_rate": 1.3133333333333334e-05, + "loss": 6.777, + "step": 3940 + }, + { + "epoch": 6.69, + "learning_rate": 1.32e-05, + "loss": 6.7962, + "step": 3960 + }, + { + "epoch": 6.72, + "learning_rate": 1.3266666666666666e-05, + "loss": 6.7663, + "step": 3980 + }, + { + "epoch": 6.76, + "learning_rate": 1.3333333333333333e-05, + "loss": 6.7458, + "step": 4000 + }, + { + "epoch": 6.76, + "eval_loss": 6.617990493774414, + "eval_runtime": 46.7629, + "eval_samples_per_second": 21.149, + "eval_steps_per_second": 0.128, + "eval_tse_ndup": 0.03450995908266761, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.04389155620161396, + "eval_tse_type": 0.0030251448519484775, + "step": 4000 + }, + { + "epoch": 6.79, + "learning_rate": 1.3400000000000002e-05, + "loss": 6.7378, + "step": 4020 + }, + { + "epoch": 6.82, + "learning_rate": 1.3466666666666666e-05, + "loss": 6.73, + "step": 4040 + }, + { + "epoch": 6.86, + "learning_rate": 1.3533333333333335e-05, + "loss": 6.7169, + "step": 4060 + }, + { + "epoch": 6.89, + "learning_rate": 1.3600000000000002e-05, + "loss": 6.711, + "step": 4080 + }, + { + "epoch": 6.93, + "learning_rate": 1.3666666666666666e-05, + "loss": 6.681, + "step": 4100 + }, + { + "epoch": 6.96, + "learning_rate": 1.3733333333333335e-05, + "loss": 6.6731, + "step": 4120 + }, + { + "epoch": 6.99, + "learning_rate": 1.3800000000000002e-05, + "loss": 6.6799, + "step": 4140 + }, + { + "epoch": 7.03, + "learning_rate": 1.3866666666666667e-05, + "loss": 6.6362, + "step": 4160 + }, + { + "epoch": 7.06, + "learning_rate": 1.3933333333333334e-05, + "loss": 6.6247, + "step": 4180 + }, + { + "epoch": 7.09, + "learning_rate": 1.4000000000000001e-05, + "loss": 6.6309, + "step": 4200 + }, + { + "epoch": 7.13, + "learning_rate": 1.4066666666666667e-05, + "loss": 6.6312, + "step": 4220 + }, + { + "epoch": 7.16, + "learning_rate": 1.4133333333333334e-05, + "loss": 6.6072, + "step": 4240 + }, + { + "epoch": 7.2, + "learning_rate": 1.42e-05, + "loss": 6.5955, + "step": 4260 + }, + { + "epoch": 7.23, + "learning_rate": 1.4266666666666667e-05, + "loss": 6.5989, + "step": 4280 + }, + { + "epoch": 7.26, + "learning_rate": 1.4333333333333334e-05, + "loss": 6.6025, + "step": 4300 + }, + { + "epoch": 7.3, + "learning_rate": 1.44e-05, + "loss": 6.5728, + "step": 4320 + }, + { + "epoch": 7.33, + "learning_rate": 1.4466666666666667e-05, + "loss": 6.578, + "step": 4340 + }, + { + "epoch": 7.36, + "learning_rate": 1.4533333333333335e-05, + "loss": 6.5628, + "step": 4360 + }, + { + "epoch": 7.4, + "learning_rate": 1.4599999999999999e-05, + "loss": 6.5257, + "step": 4380 + }, + { + "epoch": 7.43, + "learning_rate": 1.4666666666666668e-05, + "loss": 6.5414, + "step": 4400 + }, + { + "epoch": 7.47, + "learning_rate": 1.4733333333333335e-05, + "loss": 6.5253, + "step": 4420 + }, + { + "epoch": 7.5, + "learning_rate": 1.48e-05, + "loss": 6.5293, + "step": 4440 + }, + { + "epoch": 7.53, + "learning_rate": 1.4866666666666668e-05, + "loss": 6.4852, + "step": 4460 + }, + { + "epoch": 7.57, + "learning_rate": 1.4933333333333335e-05, + "loss": 6.488, + "step": 4480 + }, + { + "epoch": 7.6, + "learning_rate": 1.5e-05, + "loss": 6.5069, + "step": 4500 + }, + { + "epoch": 7.64, + "learning_rate": 1.5066666666666668e-05, + "loss": 6.498, + "step": 4520 + }, + { + "epoch": 7.67, + "learning_rate": 1.5133333333333333e-05, + "loss": 6.4785, + "step": 4540 + }, + { + "epoch": 7.7, + "learning_rate": 1.52e-05, + "loss": 6.4584, + "step": 4560 + }, + { + "epoch": 7.74, + "learning_rate": 1.5266666666666667e-05, + "loss": 6.4683, + "step": 4580 + }, + { + "epoch": 7.77, + "learning_rate": 1.5333333333333334e-05, + "loss": 6.4477, + "step": 4600 + }, + { + "epoch": 7.8, + "learning_rate": 1.54e-05, + "loss": 6.4477, + "step": 4620 + }, + { + "epoch": 7.84, + "learning_rate": 1.546666666666667e-05, + "loss": 6.4117, + "step": 4640 + }, + { + "epoch": 7.87, + "learning_rate": 1.5533333333333333e-05, + "loss": 6.4222, + "step": 4660 + }, + { + "epoch": 7.91, + "learning_rate": 1.56e-05, + "loss": 6.4158, + "step": 4680 + }, + { + "epoch": 7.94, + "learning_rate": 1.5666666666666667e-05, + "loss": 6.417, + "step": 4700 + }, + { + "epoch": 7.97, + "learning_rate": 1.5733333333333334e-05, + "loss": 6.3885, + "step": 4720 + }, + { + "epoch": 8.01, + "learning_rate": 1.58e-05, + "loss": 6.3959, + "step": 4740 + }, + { + "epoch": 8.04, + "learning_rate": 1.586666666666667e-05, + "loss": 6.3683, + "step": 4760 + }, + { + "epoch": 8.07, + "learning_rate": 1.5933333333333332e-05, + "loss": 6.3691, + "step": 4780 + }, + { + "epoch": 8.11, + "learning_rate": 1.6000000000000003e-05, + "loss": 6.3567, + "step": 4800 + }, + { + "epoch": 8.14, + "learning_rate": 1.606666666666667e-05, + "loss": 6.3445, + "step": 4820 + }, + { + "epoch": 8.18, + "learning_rate": 1.6133333333333334e-05, + "loss": 6.3392, + "step": 4840 + }, + { + "epoch": 8.21, + "learning_rate": 1.62e-05, + "loss": 6.3109, + "step": 4860 + }, + { + "epoch": 8.24, + "learning_rate": 1.6266666666666665e-05, + "loss": 6.3362, + "step": 4880 + }, + { + "epoch": 8.28, + "learning_rate": 1.6333333333333335e-05, + "loss": 6.3446, + "step": 4900 + }, + { + "epoch": 8.31, + "learning_rate": 1.6400000000000002e-05, + "loss": 6.3071, + "step": 4920 + }, + { + "epoch": 8.34, + "learning_rate": 1.6466666666666666e-05, + "loss": 6.3113, + "step": 4940 + }, + { + "epoch": 8.38, + "learning_rate": 1.6533333333333333e-05, + "loss": 6.279, + "step": 4960 + }, + { + "epoch": 8.41, + "learning_rate": 1.66e-05, + "loss": 6.2803, + "step": 4980 + }, + { + "epoch": 8.45, + "learning_rate": 1.6666666666666667e-05, + "loss": 6.2993, + "step": 5000 + }, + { + "epoch": 8.45, + "eval_loss": 6.150936603546143, + "eval_runtime": 46.9103, + "eval_samples_per_second": 21.083, + "eval_steps_per_second": 0.128, + "eval_tse_ndup": 0.02838463484700525, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.030864985103219583, + "eval_tse_type": 0.0033177451971688576, + "step": 5000 + }, + { + "epoch": 8.48, + "learning_rate": 1.6733333333333335e-05, + "loss": 6.2719, + "step": 5020 + }, + { + "epoch": 8.51, + "learning_rate": 1.6800000000000002e-05, + "loss": 6.2829, + "step": 5040 + }, + { + "epoch": 8.55, + "learning_rate": 1.6866666666666666e-05, + "loss": 6.2579, + "step": 5060 + }, + { + "epoch": 8.58, + "learning_rate": 1.6933333333333333e-05, + "loss": 6.2497, + "step": 5080 + }, + { + "epoch": 8.61, + "learning_rate": 1.7000000000000003e-05, + "loss": 6.2598, + "step": 5100 + }, + { + "epoch": 8.65, + "learning_rate": 1.7066666666666667e-05, + "loss": 6.2333, + "step": 5120 + }, + { + "epoch": 8.68, + "learning_rate": 1.7133333333333334e-05, + "loss": 6.2455, + "step": 5140 + }, + { + "epoch": 8.72, + "learning_rate": 1.7199999999999998e-05, + "loss": 6.2239, + "step": 5160 + }, + { + "epoch": 8.75, + "learning_rate": 1.726666666666667e-05, + "loss": 6.237, + "step": 5180 + }, + { + "epoch": 8.78, + "learning_rate": 1.7333333333333336e-05, + "loss": 6.2274, + "step": 5200 + }, + { + "epoch": 8.82, + "learning_rate": 1.74e-05, + "loss": 6.2016, + "step": 5220 + }, + { + "epoch": 8.85, + "learning_rate": 1.7466666666666667e-05, + "loss": 6.2061, + "step": 5240 + }, + { + "epoch": 8.89, + "learning_rate": 1.7533333333333334e-05, + "loss": 6.1996, + "step": 5260 + }, + { + "epoch": 8.92, + "learning_rate": 1.76e-05, + "loss": 6.1854, + "step": 5280 + }, + { + "epoch": 8.95, + "learning_rate": 1.7666666666666668e-05, + "loss": 6.1892, + "step": 5300 + }, + { + "epoch": 8.99, + "learning_rate": 1.7733333333333335e-05, + "loss": 6.1803, + "step": 5320 + }, + { + "epoch": 9.02, + "learning_rate": 1.78e-05, + "loss": 6.1609, + "step": 5340 + }, + { + "epoch": 9.05, + "learning_rate": 1.7866666666666666e-05, + "loss": 6.1776, + "step": 5360 + }, + { + "epoch": 9.09, + "learning_rate": 1.7933333333333337e-05, + "loss": 6.133, + "step": 5380 + }, + { + "epoch": 9.12, + "learning_rate": 1.8e-05, + "loss": 6.1621, + "step": 5400 + }, + { + "epoch": 9.16, + "learning_rate": 1.8066666666666668e-05, + "loss": 6.1211, + "step": 5420 + }, + { + "epoch": 9.19, + "learning_rate": 1.8133333333333335e-05, + "loss": 6.1205, + "step": 5440 + }, + { + "epoch": 9.22, + "learning_rate": 1.8200000000000002e-05, + "loss": 6.1046, + "step": 5460 + }, + { + "epoch": 9.26, + "learning_rate": 1.826666666666667e-05, + "loss": 6.1094, + "step": 5480 + }, + { + "epoch": 9.29, + "learning_rate": 1.8333333333333333e-05, + "loss": 6.115, + "step": 5500 + }, + { + "epoch": 9.32, + "learning_rate": 1.84e-05, + "loss": 6.0756, + "step": 5520 + }, + { + "epoch": 9.36, + "learning_rate": 1.8466666666666667e-05, + "loss": 6.0964, + "step": 5540 + }, + { + "epoch": 9.39, + "learning_rate": 1.8533333333333334e-05, + "loss": 6.0684, + "step": 5560 + }, + { + "epoch": 9.43, + "learning_rate": 1.86e-05, + "loss": 6.0754, + "step": 5580 + }, + { + "epoch": 9.46, + "learning_rate": 1.866666666666667e-05, + "loss": 6.0827, + "step": 5600 + }, + { + "epoch": 9.49, + "learning_rate": 1.8733333333333332e-05, + "loss": 6.0703, + "step": 5620 + }, + { + "epoch": 9.53, + "learning_rate": 1.88e-05, + "loss": 6.0633, + "step": 5640 + }, + { + "epoch": 9.56, + "learning_rate": 1.886666666666667e-05, + "loss": 6.0579, + "step": 5660 + }, + { + "epoch": 9.59, + "learning_rate": 1.8933333333333334e-05, + "loss": 6.0445, + "step": 5680 + }, + { + "epoch": 9.63, + "learning_rate": 1.9e-05, + "loss": 6.0616, + "step": 5700 + }, + { + "epoch": 9.66, + "learning_rate": 1.9066666666666668e-05, + "loss": 6.0371, + "step": 5720 + }, + { + "epoch": 9.7, + "learning_rate": 1.9133333333333332e-05, + "loss": 6.028, + "step": 5740 + }, + { + "epoch": 9.73, + "learning_rate": 1.9200000000000003e-05, + "loss": 6.0276, + "step": 5760 + }, + { + "epoch": 9.76, + "learning_rate": 1.926666666666667e-05, + "loss": 6.0154, + "step": 5780 + }, + { + "epoch": 9.8, + "learning_rate": 1.9333333333333333e-05, + "loss": 5.989, + "step": 5800 + }, + { + "epoch": 9.83, + "learning_rate": 1.94e-05, + "loss": 6.0047, + "step": 5820 + }, + { + "epoch": 9.86, + "learning_rate": 1.9466666666666668e-05, + "loss": 6.0087, + "step": 5840 + }, + { + "epoch": 9.9, + "learning_rate": 1.9533333333333335e-05, + "loss": 5.984, + "step": 5860 + }, + { + "epoch": 9.93, + "learning_rate": 1.9600000000000002e-05, + "loss": 5.9697, + "step": 5880 + }, + { + "epoch": 9.97, + "learning_rate": 1.9666666666666666e-05, + "loss": 5.989, + "step": 5900 + }, + { + "epoch": 10.0, + "learning_rate": 1.9733333333333333e-05, + "loss": 5.9804, + "step": 5920 + }, + { + "epoch": 10.03, + "learning_rate": 1.9800000000000004e-05, + "loss": 5.9616, + "step": 5940 + }, + { + "epoch": 10.07, + "learning_rate": 1.9866666666666667e-05, + "loss": 5.952, + "step": 5960 + }, + { + "epoch": 10.1, + "learning_rate": 1.9933333333333334e-05, + "loss": 5.9399, + "step": 5980 + }, + { + "epoch": 10.14, + "learning_rate": 2e-05, + "loss": 5.9317, + "step": 6000 + }, + { + "epoch": 10.14, + "eval_loss": 5.784234523773193, + "eval_runtime": 49.1403, + "eval_samples_per_second": 20.126, + "eval_steps_per_second": 0.122, + "eval_tse_ndup": 0.011724633008987968, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.023856265453044822, + "eval_tse_type": 0.001799306257723851, + "step": 6000 + }, + { + "epoch": 10.17, + "learning_rate": 2.0066666666666665e-05, + "loss": 5.928, + "step": 6020 + }, + { + "epoch": 10.2, + "learning_rate": 2.0133333333333336e-05, + "loss": 5.9352, + "step": 6040 + }, + { + "epoch": 10.24, + "learning_rate": 2.0200000000000003e-05, + "loss": 5.9184, + "step": 6060 + }, + { + "epoch": 10.27, + "learning_rate": 2.0266666666666667e-05, + "loss": 5.9234, + "step": 6080 + }, + { + "epoch": 10.3, + "learning_rate": 2.0333333333333334e-05, + "loss": 5.8903, + "step": 6100 + }, + { + "epoch": 10.34, + "learning_rate": 2.04e-05, + "loss": 5.9154, + "step": 6120 + }, + { + "epoch": 10.37, + "learning_rate": 2.046666666666667e-05, + "loss": 5.8967, + "step": 6140 + }, + { + "epoch": 10.41, + "learning_rate": 2.0533333333333336e-05, + "loss": 5.9033, + "step": 6160 + }, + { + "epoch": 10.44, + "learning_rate": 2.06e-05, + "loss": 5.8873, + "step": 6180 + }, + { + "epoch": 10.47, + "learning_rate": 2.0663333333333336e-05, + "loss": 5.8548, + "step": 6200 + }, + { + "epoch": 10.51, + "learning_rate": 2.0730000000000003e-05, + "loss": 5.8844, + "step": 6220 + }, + { + "epoch": 10.54, + "learning_rate": 2.0796666666666667e-05, + "loss": 5.8576, + "step": 6240 + }, + { + "epoch": 10.57, + "learning_rate": 2.0863333333333334e-05, + "loss": 5.8652, + "step": 6260 + }, + { + "epoch": 10.61, + "learning_rate": 2.093e-05, + "loss": 5.86, + "step": 6280 + }, + { + "epoch": 10.64, + "learning_rate": 2.099666666666667e-05, + "loss": 5.8448, + "step": 6300 + }, + { + "epoch": 10.68, + "learning_rate": 2.1063333333333336e-05, + "loss": 5.8298, + "step": 6320 + }, + { + "epoch": 10.71, + "learning_rate": 2.113e-05, + "loss": 5.8472, + "step": 6340 + }, + { + "epoch": 10.74, + "learning_rate": 2.1196666666666666e-05, + "loss": 5.8317, + "step": 6360 + }, + { + "epoch": 10.78, + "learning_rate": 2.1263333333333334e-05, + "loss": 5.818, + "step": 6380 + }, + { + "epoch": 10.81, + "learning_rate": 2.133e-05, + "loss": 5.8043, + "step": 6400 + }, + { + "epoch": 10.84, + "learning_rate": 2.1396666666666668e-05, + "loss": 5.8164, + "step": 6420 + }, + { + "epoch": 10.88, + "learning_rate": 2.1463333333333335e-05, + "loss": 5.8146, + "step": 6440 + }, + { + "epoch": 10.91, + "learning_rate": 2.153e-05, + "loss": 5.7853, + "step": 6460 + }, + { + "epoch": 10.95, + "learning_rate": 2.159666666666667e-05, + "loss": 5.7954, + "step": 6480 + }, + { + "epoch": 10.98, + "learning_rate": 2.1663333333333337e-05, + "loss": 5.7965, + "step": 6500 + }, + { + "epoch": 11.01, + "learning_rate": 2.173e-05, + "loss": 5.7747, + "step": 6520 + }, + { + "epoch": 11.05, + "learning_rate": 2.1796666666666667e-05, + "loss": 5.7595, + "step": 6540 + }, + { + "epoch": 11.08, + "learning_rate": 2.1863333333333335e-05, + "loss": 5.7593, + "step": 6560 + }, + { + "epoch": 11.11, + "learning_rate": 2.1930000000000002e-05, + "loss": 5.754, + "step": 6580 + }, + { + "epoch": 11.15, + "learning_rate": 2.199666666666667e-05, + "loss": 5.7569, + "step": 6600 + }, + { + "epoch": 11.18, + "learning_rate": 2.2063333333333333e-05, + "loss": 5.7343, + "step": 6620 + }, + { + "epoch": 11.22, + "learning_rate": 2.213e-05, + "loss": 5.7316, + "step": 6640 + }, + { + "epoch": 11.25, + "learning_rate": 2.2196666666666667e-05, + "loss": 5.7496, + "step": 6660 + }, + { + "epoch": 11.28, + "learning_rate": 2.2263333333333334e-05, + "loss": 5.7162, + "step": 6680 + }, + { + "epoch": 11.32, + "learning_rate": 2.233e-05, + "loss": 5.7307, + "step": 6700 + }, + { + "epoch": 11.35, + "learning_rate": 2.239666666666667e-05, + "loss": 5.7071, + "step": 6720 + }, + { + "epoch": 11.39, + "learning_rate": 2.2463333333333332e-05, + "loss": 5.7166, + "step": 6740 + }, + { + "epoch": 11.42, + "learning_rate": 2.253e-05, + "loss": 5.711, + "step": 6760 + }, + { + "epoch": 11.45, + "learning_rate": 2.259666666666667e-05, + "loss": 5.6948, + "step": 6780 + }, + { + "epoch": 11.49, + "learning_rate": 2.2663333333333334e-05, + "loss": 5.6979, + "step": 6800 + }, + { + "epoch": 11.52, + "learning_rate": 2.273e-05, + "loss": 5.6902, + "step": 6820 + }, + { + "epoch": 11.55, + "learning_rate": 2.2796666666666668e-05, + "loss": 5.6826, + "step": 6840 + }, + { + "epoch": 11.59, + "learning_rate": 2.2863333333333335e-05, + "loss": 5.6601, + "step": 6860 + }, + { + "epoch": 11.62, + "learning_rate": 2.2930000000000002e-05, + "loss": 5.66, + "step": 6880 + }, + { + "epoch": 11.66, + "learning_rate": 2.299666666666667e-05, + "loss": 5.6611, + "step": 6900 + }, + { + "epoch": 11.69, + "learning_rate": 2.3063333333333333e-05, + "loss": 5.6563, + "step": 6920 + }, + { + "epoch": 11.72, + "learning_rate": 2.313e-05, + "loss": 5.6494, + "step": 6940 + }, + { + "epoch": 11.76, + "learning_rate": 2.3196666666666668e-05, + "loss": 5.6565, + "step": 6960 + }, + { + "epoch": 11.79, + "learning_rate": 2.3263333333333335e-05, + "loss": 5.641, + "step": 6980 + }, + { + "epoch": 11.82, + "learning_rate": 2.3330000000000002e-05, + "loss": 5.6336, + "step": 7000 + }, + { + "epoch": 11.82, + "eval_loss": 5.458117961883545, + "eval_runtime": 50.0555, + "eval_samples_per_second": 19.758, + "eval_steps_per_second": 0.12, + "eval_tse_ndup": 0.017345052495841798, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.017512275737317114, + "eval_tse_type": 0.00018080833614200653, + "step": 7000 + }, + { + "epoch": 11.86, + "learning_rate": 2.3396666666666666e-05, + "loss": 5.6352, + "step": 7020 + }, + { + "epoch": 11.89, + "learning_rate": 2.3463333333333333e-05, + "loss": 5.6328, + "step": 7040 + }, + { + "epoch": 11.93, + "learning_rate": 2.3530000000000003e-05, + "loss": 5.6168, + "step": 7060 + }, + { + "epoch": 11.96, + "learning_rate": 2.3596666666666667e-05, + "loss": 5.6141, + "step": 7080 + }, + { + "epoch": 11.99, + "learning_rate": 2.3663333333333334e-05, + "loss": 5.6081, + "step": 7100 + }, + { + "epoch": 12.03, + "learning_rate": 2.373e-05, + "loss": 5.5965, + "step": 7120 + }, + { + "epoch": 12.06, + "learning_rate": 2.379666666666667e-05, + "loss": 5.6029, + "step": 7140 + }, + { + "epoch": 12.09, + "learning_rate": 2.3863333333333336e-05, + "loss": 5.6032, + "step": 7160 + }, + { + "epoch": 12.13, + "learning_rate": 2.3930000000000003e-05, + "loss": 5.5922, + "step": 7180 + }, + { + "epoch": 12.16, + "learning_rate": 2.3996666666666667e-05, + "loss": 5.5751, + "step": 7200 + }, + { + "epoch": 12.2, + "learning_rate": 2.4063333333333334e-05, + "loss": 5.5766, + "step": 7220 + }, + { + "epoch": 12.23, + "learning_rate": 2.413e-05, + "loss": 5.5592, + "step": 7240 + }, + { + "epoch": 12.26, + "learning_rate": 2.4196666666666668e-05, + "loss": 5.5591, + "step": 7260 + }, + { + "epoch": 12.3, + "learning_rate": 2.4263333333333335e-05, + "loss": 5.55, + "step": 7280 + }, + { + "epoch": 12.33, + "learning_rate": 2.433e-05, + "loss": 5.5541, + "step": 7300 + }, + { + "epoch": 12.36, + "learning_rate": 2.4396666666666666e-05, + "loss": 5.5417, + "step": 7320 + }, + { + "epoch": 12.4, + "learning_rate": 2.4463333333333337e-05, + "loss": 5.5455, + "step": 7340 + }, + { + "epoch": 12.43, + "learning_rate": 2.453e-05, + "loss": 5.5344, + "step": 7360 + }, + { + "epoch": 12.47, + "learning_rate": 2.4596666666666668e-05, + "loss": 5.5202, + "step": 7380 + }, + { + "epoch": 12.5, + "learning_rate": 2.4663333333333335e-05, + "loss": 5.5374, + "step": 7400 + }, + { + "epoch": 12.53, + "learning_rate": 2.473e-05, + "loss": 5.5242, + "step": 7420 + }, + { + "epoch": 12.57, + "learning_rate": 2.479666666666667e-05, + "loss": 5.505, + "step": 7440 + }, + { + "epoch": 12.6, + "learning_rate": 2.4863333333333336e-05, + "loss": 5.5153, + "step": 7460 + }, + { + "epoch": 12.64, + "learning_rate": 2.493e-05, + "loss": 5.4931, + "step": 7480 + }, + { + "epoch": 12.67, + "learning_rate": 2.4996666666666667e-05, + "loss": 5.4959, + "step": 7500 + }, + { + "epoch": 12.7, + "learning_rate": 2.5063333333333334e-05, + "loss": 5.4914, + "step": 7520 + }, + { + "epoch": 12.74, + "learning_rate": 2.5130000000000005e-05, + "loss": 5.4984, + "step": 7540 + }, + { + "epoch": 12.77, + "learning_rate": 2.519666666666667e-05, + "loss": 5.4908, + "step": 7560 + }, + { + "epoch": 12.8, + "learning_rate": 2.5263333333333333e-05, + "loss": 5.4752, + "step": 7580 + }, + { + "epoch": 12.84, + "learning_rate": 2.5330000000000003e-05, + "loss": 5.4862, + "step": 7600 + }, + { + "epoch": 12.87, + "learning_rate": 2.539666666666667e-05, + "loss": 5.4718, + "step": 7620 + }, + { + "epoch": 12.91, + "learning_rate": 2.5463333333333334e-05, + "loss": 5.4496, + "step": 7640 + }, + { + "epoch": 12.94, + "learning_rate": 2.5530000000000005e-05, + "loss": 5.4577, + "step": 7660 + }, + { + "epoch": 12.97, + "learning_rate": 2.559666666666667e-05, + "loss": 5.463, + "step": 7680 + }, + { + "epoch": 13.01, + "learning_rate": 2.5663333333333332e-05, + "loss": 5.4542, + "step": 7700 + }, + { + "epoch": 13.04, + "learning_rate": 2.573e-05, + "loss": 5.4298, + "step": 7720 + }, + { + "epoch": 13.07, + "learning_rate": 2.579666666666667e-05, + "loss": 5.4478, + "step": 7740 + }, + { + "epoch": 13.11, + "learning_rate": 2.5863333333333334e-05, + "loss": 5.4353, + "step": 7760 + }, + { + "epoch": 13.14, + "learning_rate": 2.5929999999999997e-05, + "loss": 5.4353, + "step": 7780 + }, + { + "epoch": 13.18, + "learning_rate": 2.5996666666666668e-05, + "loss": 5.4234, + "step": 7800 + }, + { + "epoch": 13.21, + "learning_rate": 2.6063333333333335e-05, + "loss": 5.4327, + "step": 7820 + }, + { + "epoch": 13.24, + "learning_rate": 2.613e-05, + "loss": 5.4052, + "step": 7840 + }, + { + "epoch": 13.28, + "learning_rate": 2.619666666666667e-05, + "loss": 5.4052, + "step": 7860 + }, + { + "epoch": 13.31, + "learning_rate": 2.6263333333333333e-05, + "loss": 5.409, + "step": 7880 + }, + { + "epoch": 13.34, + "learning_rate": 2.633e-05, + "loss": 5.4085, + "step": 7900 + }, + { + "epoch": 13.38, + "learning_rate": 2.639666666666667e-05, + "loss": 5.4105, + "step": 7920 + }, + { + "epoch": 13.41, + "learning_rate": 2.6463333333333335e-05, + "loss": 5.3895, + "step": 7940 + }, + { + "epoch": 13.45, + "learning_rate": 2.653e-05, + "loss": 5.404, + "step": 7960 + }, + { + "epoch": 13.48, + "learning_rate": 2.659666666666667e-05, + "loss": 5.3775, + "step": 7980 + }, + { + "epoch": 13.51, + "learning_rate": 2.6663333333333336e-05, + "loss": 5.3844, + "step": 8000 + }, + { + "epoch": 13.51, + "eval_loss": 5.198601245880127, + "eval_runtime": 47.5753, + "eval_samples_per_second": 20.788, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.019161429918440446, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.013836201150024673, + "eval_tse_type": 0.00011936861026850915, + "step": 8000 + }, + { + "epoch": 13.55, + "learning_rate": 2.673e-05, + "loss": 5.3828, + "step": 8020 + }, + { + "epoch": 13.58, + "learning_rate": 2.679666666666667e-05, + "loss": 5.3827, + "step": 8040 + }, + { + "epoch": 13.61, + "learning_rate": 2.6863333333333334e-05, + "loss": 5.3618, + "step": 8060 + }, + { + "epoch": 13.65, + "learning_rate": 2.693e-05, + "loss": 5.3752, + "step": 8080 + }, + { + "epoch": 13.68, + "learning_rate": 2.699666666666667e-05, + "loss": 5.3743, + "step": 8100 + }, + { + "epoch": 13.72, + "learning_rate": 2.7063333333333336e-05, + "loss": 5.3703, + "step": 8120 + }, + { + "epoch": 13.75, + "learning_rate": 2.713e-05, + "loss": 5.3624, + "step": 8140 + }, + { + "epoch": 13.78, + "learning_rate": 2.719666666666667e-05, + "loss": 5.3535, + "step": 8160 + }, + { + "epoch": 13.82, + "learning_rate": 2.7263333333333334e-05, + "loss": 5.3541, + "step": 8180 + }, + { + "epoch": 13.85, + "learning_rate": 2.733e-05, + "loss": 5.3412, + "step": 8200 + }, + { + "epoch": 13.89, + "learning_rate": 2.739666666666667e-05, + "loss": 5.3284, + "step": 8220 + }, + { + "epoch": 13.92, + "learning_rate": 2.7463333333333335e-05, + "loss": 5.3353, + "step": 8240 + }, + { + "epoch": 13.95, + "learning_rate": 2.753e-05, + "loss": 5.3302, + "step": 8260 + }, + { + "epoch": 13.99, + "learning_rate": 2.759666666666667e-05, + "loss": 5.3334, + "step": 8280 + }, + { + "epoch": 14.02, + "learning_rate": 2.7663333333333337e-05, + "loss": 5.3116, + "step": 8300 + }, + { + "epoch": 14.05, + "learning_rate": 2.773e-05, + "loss": 5.3125, + "step": 8320 + }, + { + "epoch": 14.09, + "learning_rate": 2.7796666666666664e-05, + "loss": 5.3136, + "step": 8340 + }, + { + "epoch": 14.12, + "learning_rate": 2.7860000000000004e-05, + "loss": 5.2997, + "step": 8360 + }, + { + "epoch": 14.16, + "learning_rate": 2.7926666666666668e-05, + "loss": 5.3007, + "step": 8380 + }, + { + "epoch": 14.19, + "learning_rate": 2.7993333333333332e-05, + "loss": 5.2872, + "step": 8400 + }, + { + "epoch": 14.22, + "learning_rate": 2.8060000000000002e-05, + "loss": 5.2895, + "step": 8420 + }, + { + "epoch": 14.26, + "learning_rate": 2.8126666666666666e-05, + "loss": 5.2961, + "step": 8440 + }, + { + "epoch": 14.29, + "learning_rate": 2.8193333333333333e-05, + "loss": 5.2973, + "step": 8460 + }, + { + "epoch": 14.32, + "learning_rate": 2.8260000000000004e-05, + "loss": 5.2681, + "step": 8480 + }, + { + "epoch": 14.36, + "learning_rate": 2.8326666666666668e-05, + "loss": 5.2751, + "step": 8500 + }, + { + "epoch": 14.39, + "learning_rate": 2.839333333333333e-05, + "loss": 5.2667, + "step": 8520 + }, + { + "epoch": 14.43, + "learning_rate": 2.8460000000000002e-05, + "loss": 5.2647, + "step": 8540 + }, + { + "epoch": 14.46, + "learning_rate": 2.852666666666667e-05, + "loss": 5.269, + "step": 8560 + }, + { + "epoch": 14.49, + "learning_rate": 2.8593333333333333e-05, + "loss": 5.2801, + "step": 8580 + }, + { + "epoch": 14.53, + "learning_rate": 2.8660000000000003e-05, + "loss": 5.2621, + "step": 8600 + }, + { + "epoch": 14.56, + "learning_rate": 2.8726666666666667e-05, + "loss": 5.2705, + "step": 8620 + }, + { + "epoch": 14.59, + "learning_rate": 2.8793333333333334e-05, + "loss": 5.2486, + "step": 8640 + }, + { + "epoch": 14.63, + "learning_rate": 2.8860000000000005e-05, + "loss": 5.2662, + "step": 8660 + }, + { + "epoch": 14.66, + "learning_rate": 2.892666666666667e-05, + "loss": 5.2371, + "step": 8680 + }, + { + "epoch": 14.7, + "learning_rate": 2.8993333333333332e-05, + "loss": 5.2561, + "step": 8700 + }, + { + "epoch": 14.73, + "learning_rate": 2.9060000000000003e-05, + "loss": 5.2491, + "step": 8720 + }, + { + "epoch": 14.76, + "learning_rate": 2.912666666666667e-05, + "loss": 5.2385, + "step": 8740 + }, + { + "epoch": 14.8, + "learning_rate": 2.9193333333333334e-05, + "loss": 5.2527, + "step": 8760 + }, + { + "epoch": 14.83, + "learning_rate": 2.9260000000000004e-05, + "loss": 5.232, + "step": 8780 + }, + { + "epoch": 14.86, + "learning_rate": 2.9326666666666668e-05, + "loss": 5.219, + "step": 8800 + }, + { + "epoch": 14.9, + "learning_rate": 2.9393333333333335e-05, + "loss": 5.2324, + "step": 8820 + }, + { + "epoch": 14.93, + "learning_rate": 2.946e-05, + "loss": 5.2374, + "step": 8840 + }, + { + "epoch": 14.97, + "learning_rate": 2.952666666666667e-05, + "loss": 5.225, + "step": 8860 + }, + { + "epoch": 15.0, + "learning_rate": 2.9593333333333333e-05, + "loss": 5.2252, + "step": 8880 + }, + { + "epoch": 15.03, + "learning_rate": 2.9659999999999997e-05, + "loss": 5.197, + "step": 8900 + }, + { + "epoch": 15.07, + "learning_rate": 2.9726666666666668e-05, + "loss": 5.1955, + "step": 8920 + }, + { + "epoch": 15.1, + "learning_rate": 2.9793333333333335e-05, + "loss": 5.1948, + "step": 8940 + }, + { + "epoch": 15.14, + "learning_rate": 2.986e-05, + "loss": 5.1977, + "step": 8960 + }, + { + "epoch": 15.17, + "learning_rate": 2.992666666666667e-05, + "loss": 5.1877, + "step": 8980 + }, + { + "epoch": 15.2, + "learning_rate": 2.9993333333333333e-05, + "loss": 5.185, + "step": 9000 + }, + { + "epoch": 15.2, + "eval_loss": 4.983438014984131, + "eval_runtime": 47.943, + "eval_samples_per_second": 20.629, + "eval_steps_per_second": 0.125, + "eval_tse_ndup": 0.006947718609787001, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.021488613614829313, + "eval_tse_type": 5.266262217728345e-05, + "step": 9000 + }, + { + "epoch": 15.24, + "learning_rate": 3.006e-05, + "loss": 5.1537, + "step": 9020 + }, + { + "epoch": 15.27, + "learning_rate": 3.012666666666667e-05, + "loss": 5.1774, + "step": 9040 + }, + { + "epoch": 15.3, + "learning_rate": 3.0193333333333335e-05, + "loss": 5.1848, + "step": 9060 + }, + { + "epoch": 15.34, + "learning_rate": 3.0259999999999998e-05, + "loss": 5.187, + "step": 9080 + }, + { + "epoch": 15.37, + "learning_rate": 3.032666666666667e-05, + "loss": 5.1824, + "step": 9100 + }, + { + "epoch": 15.41, + "learning_rate": 3.0393333333333336e-05, + "loss": 5.1581, + "step": 9120 + }, + { + "epoch": 15.44, + "learning_rate": 3.046e-05, + "loss": 5.175, + "step": 9140 + }, + { + "epoch": 15.47, + "learning_rate": 3.052666666666667e-05, + "loss": 5.1655, + "step": 9160 + }, + { + "epoch": 15.51, + "learning_rate": 3.059333333333334e-05, + "loss": 5.1884, + "step": 9180 + }, + { + "epoch": 15.54, + "learning_rate": 3.066e-05, + "loss": 5.1622, + "step": 9200 + }, + { + "epoch": 15.57, + "learning_rate": 3.072666666666667e-05, + "loss": 5.166, + "step": 9220 + }, + { + "epoch": 15.61, + "learning_rate": 3.0793333333333336e-05, + "loss": 5.1487, + "step": 9240 + }, + { + "epoch": 15.64, + "learning_rate": 3.086e-05, + "loss": 5.1449, + "step": 9260 + }, + { + "epoch": 15.68, + "learning_rate": 3.092666666666667e-05, + "loss": 5.1215, + "step": 9280 + }, + { + "epoch": 15.71, + "learning_rate": 3.0993333333333334e-05, + "loss": 5.1455, + "step": 9300 + }, + { + "epoch": 15.74, + "learning_rate": 3.106e-05, + "loss": 5.141, + "step": 9320 + }, + { + "epoch": 15.78, + "learning_rate": 3.112666666666667e-05, + "loss": 5.1339, + "step": 9340 + }, + { + "epoch": 15.81, + "learning_rate": 3.119333333333334e-05, + "loss": 5.1455, + "step": 9360 + }, + { + "epoch": 15.84, + "learning_rate": 3.126e-05, + "loss": 5.1416, + "step": 9380 + }, + { + "epoch": 15.88, + "learning_rate": 3.132666666666667e-05, + "loss": 5.1371, + "step": 9400 + }, + { + "epoch": 15.91, + "learning_rate": 3.1393333333333337e-05, + "loss": 5.1065, + "step": 9420 + }, + { + "epoch": 15.95, + "learning_rate": 3.146e-05, + "loss": 5.1175, + "step": 9440 + }, + { + "epoch": 15.98, + "learning_rate": 3.1526666666666664e-05, + "loss": 5.1194, + "step": 9460 + }, + { + "epoch": 16.01, + "learning_rate": 3.1593333333333335e-05, + "loss": 5.1007, + "step": 9480 + }, + { + "epoch": 16.05, + "learning_rate": 3.166e-05, + "loss": 5.1, + "step": 9500 + }, + { + "epoch": 16.08, + "learning_rate": 3.172666666666667e-05, + "loss": 5.0957, + "step": 9520 + }, + { + "epoch": 16.11, + "learning_rate": 3.179333333333333e-05, + "loss": 5.0958, + "step": 9540 + }, + { + "epoch": 16.15, + "learning_rate": 3.186e-05, + "loss": 5.0879, + "step": 9560 + }, + { + "epoch": 16.18, + "learning_rate": 3.192666666666667e-05, + "loss": 5.107, + "step": 9580 + }, + { + "epoch": 16.22, + "learning_rate": 3.199333333333334e-05, + "loss": 5.0921, + "step": 9600 + }, + { + "epoch": 16.25, + "learning_rate": 3.206e-05, + "loss": 5.0837, + "step": 9620 + }, + { + "epoch": 16.28, + "learning_rate": 3.2126666666666665e-05, + "loss": 5.0836, + "step": 9640 + }, + { + "epoch": 16.32, + "learning_rate": 3.2193333333333336e-05, + "loss": 5.0768, + "step": 9660 + }, + { + "epoch": 16.35, + "learning_rate": 3.226e-05, + "loss": 5.0825, + "step": 9680 + }, + { + "epoch": 16.39, + "learning_rate": 3.232666666666666e-05, + "loss": 5.0684, + "step": 9700 + }, + { + "epoch": 16.42, + "learning_rate": 3.2393333333333334e-05, + "loss": 5.08, + "step": 9720 + }, + { + "epoch": 16.45, + "learning_rate": 3.2460000000000004e-05, + "loss": 5.0809, + "step": 9740 + }, + { + "epoch": 16.49, + "learning_rate": 3.252666666666667e-05, + "loss": 5.0806, + "step": 9760 + }, + { + "epoch": 16.52, + "learning_rate": 3.259333333333334e-05, + "loss": 5.0728, + "step": 9780 + }, + { + "epoch": 16.55, + "learning_rate": 3.266e-05, + "loss": 5.045, + "step": 9800 + }, + { + "epoch": 16.59, + "learning_rate": 3.2726666666666666e-05, + "loss": 5.0504, + "step": 9820 + }, + { + "epoch": 16.62, + "learning_rate": 3.279333333333334e-05, + "loss": 5.05, + "step": 9840 + }, + { + "epoch": 16.66, + "learning_rate": 3.286e-05, + "loss": 5.05, + "step": 9860 + }, + { + "epoch": 16.69, + "learning_rate": 3.2926666666666664e-05, + "loss": 5.0503, + "step": 9880 + }, + { + "epoch": 16.72, + "learning_rate": 3.2993333333333335e-05, + "loss": 5.0635, + "step": 9900 + }, + { + "epoch": 16.76, + "learning_rate": 3.3060000000000005e-05, + "loss": 5.0389, + "step": 9920 + }, + { + "epoch": 16.79, + "learning_rate": 3.312666666666667e-05, + "loss": 5.0455, + "step": 9940 + }, + { + "epoch": 16.82, + "learning_rate": 3.319333333333334e-05, + "loss": 5.0375, + "step": 9960 + }, + { + "epoch": 16.86, + "learning_rate": 3.3260000000000003e-05, + "loss": 5.0244, + "step": 9980 + }, + { + "epoch": 16.89, + "learning_rate": 3.332666666666667e-05, + "loss": 5.0289, + "step": 10000 + }, + { + "epoch": 16.89, + "eval_loss": 4.82033109664917, + "eval_runtime": 47.23, + "eval_samples_per_second": 20.94, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.011317646952682265, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.020578981676853885, + "eval_tse_type": 2.3322018392796954e-05, + "step": 10000 + }, + { + "epoch": 16.93, + "learning_rate": 3.339333333333334e-05, + "loss": 5.0197, + "step": 10020 + }, + { + "epoch": 16.96, + "learning_rate": 3.346e-05, + "loss": 5.0374, + "step": 10040 + }, + { + "epoch": 16.99, + "learning_rate": 3.3526666666666665e-05, + "loss": 5.0181, + "step": 10060 + }, + { + "epoch": 17.03, + "learning_rate": 3.359333333333333e-05, + "loss": 5.025, + "step": 10080 + }, + { + "epoch": 17.06, + "learning_rate": 3.366e-05, + "loss": 5.0142, + "step": 10100 + }, + { + "epoch": 17.09, + "learning_rate": 3.372666666666667e-05, + "loss": 4.9989, + "step": 10120 + }, + { + "epoch": 17.13, + "learning_rate": 3.3793333333333334e-05, + "loss": 5.006, + "step": 10140 + }, + { + "epoch": 17.16, + "learning_rate": 3.3860000000000004e-05, + "loss": 5.0033, + "step": 10160 + }, + { + "epoch": 17.2, + "learning_rate": 3.392666666666667e-05, + "loss": 4.9941, + "step": 10180 + }, + { + "epoch": 17.23, + "learning_rate": 3.399333333333333e-05, + "loss": 4.9855, + "step": 10200 + }, + { + "epoch": 17.26, + "learning_rate": 3.406e-05, + "loss": 4.9799, + "step": 10220 + }, + { + "epoch": 17.3, + "learning_rate": 3.4126666666666666e-05, + "loss": 4.9858, + "step": 10240 + }, + { + "epoch": 17.33, + "learning_rate": 3.419333333333333e-05, + "loss": 4.9752, + "step": 10260 + }, + { + "epoch": 17.36, + "learning_rate": 3.426e-05, + "loss": 5.002, + "step": 10280 + }, + { + "epoch": 17.4, + "learning_rate": 3.432666666666667e-05, + "loss": 4.9834, + "step": 10300 + }, + { + "epoch": 17.43, + "learning_rate": 3.4393333333333335e-05, + "loss": 4.9915, + "step": 10320 + }, + { + "epoch": 17.47, + "learning_rate": 3.4460000000000005e-05, + "loss": 4.9699, + "step": 10340 + }, + { + "epoch": 17.5, + "learning_rate": 3.452666666666667e-05, + "loss": 4.9862, + "step": 10360 + }, + { + "epoch": 17.53, + "learning_rate": 3.459333333333333e-05, + "loss": 4.9749, + "step": 10380 + }, + { + "epoch": 17.57, + "learning_rate": 3.4660000000000004e-05, + "loss": 4.9767, + "step": 10400 + }, + { + "epoch": 17.6, + "learning_rate": 3.472666666666667e-05, + "loss": 4.9717, + "step": 10420 + }, + { + "epoch": 17.64, + "learning_rate": 3.479333333333333e-05, + "loss": 4.9541, + "step": 10440 + }, + { + "epoch": 17.67, + "learning_rate": 3.486e-05, + "loss": 4.9645, + "step": 10460 + }, + { + "epoch": 17.7, + "learning_rate": 3.4926666666666665e-05, + "loss": 4.9683, + "step": 10480 + }, + { + "epoch": 17.74, + "learning_rate": 3.4993333333333336e-05, + "loss": 4.9608, + "step": 10500 + }, + { + "epoch": 17.77, + "learning_rate": 3.5060000000000007e-05, + "loss": 4.9486, + "step": 10520 + }, + { + "epoch": 17.8, + "learning_rate": 3.512666666666667e-05, + "loss": 4.9536, + "step": 10540 + }, + { + "epoch": 17.84, + "learning_rate": 3.5193333333333334e-05, + "loss": 4.9385, + "step": 10560 + }, + { + "epoch": 17.87, + "learning_rate": 3.5260000000000005e-05, + "loss": 4.9397, + "step": 10580 + }, + { + "epoch": 17.91, + "learning_rate": 3.532666666666667e-05, + "loss": 4.9217, + "step": 10600 + }, + { + "epoch": 17.94, + "learning_rate": 3.539333333333333e-05, + "loss": 4.9301, + "step": 10620 + }, + { + "epoch": 17.97, + "learning_rate": 3.546e-05, + "loss": 4.9377, + "step": 10640 + }, + { + "epoch": 18.01, + "learning_rate": 3.5526666666666666e-05, + "loss": 4.9295, + "step": 10660 + }, + { + "epoch": 18.04, + "learning_rate": 3.559333333333334e-05, + "loss": 4.9267, + "step": 10680 + }, + { + "epoch": 18.07, + "learning_rate": 3.566e-05, + "loss": 4.9248, + "step": 10700 + }, + { + "epoch": 18.11, + "learning_rate": 3.572333333333334e-05, + "loss": 4.9083, + "step": 10720 + }, + { + "epoch": 18.14, + "learning_rate": 3.579e-05, + "loss": 4.9003, + "step": 10740 + }, + { + "epoch": 18.18, + "learning_rate": 3.5856666666666665e-05, + "loss": 4.9183, + "step": 10760 + }, + { + "epoch": 18.21, + "learning_rate": 3.5923333333333336e-05, + "loss": 4.9017, + "step": 10780 + }, + { + "epoch": 18.24, + "learning_rate": 3.599e-05, + "loss": 4.9102, + "step": 10800 + }, + { + "epoch": 18.28, + "learning_rate": 3.605666666666666e-05, + "loss": 4.9091, + "step": 10820 + }, + { + "epoch": 18.31, + "learning_rate": 3.6123333333333334e-05, + "loss": 4.9065, + "step": 10840 + }, + { + "epoch": 18.34, + "learning_rate": 3.6190000000000004e-05, + "loss": 4.8885, + "step": 10860 + }, + { + "epoch": 18.38, + "learning_rate": 3.625666666666667e-05, + "loss": 4.8856, + "step": 10880 + }, + { + "epoch": 18.41, + "learning_rate": 3.632333333333334e-05, + "loss": 4.8897, + "step": 10900 + }, + { + "epoch": 18.45, + "learning_rate": 3.639e-05, + "loss": 4.8961, + "step": 10920 + }, + { + "epoch": 18.48, + "learning_rate": 3.6456666666666666e-05, + "loss": 4.8833, + "step": 10940 + }, + { + "epoch": 18.51, + "learning_rate": 3.6523333333333337e-05, + "loss": 4.8744, + "step": 10960 + }, + { + "epoch": 18.55, + "learning_rate": 3.659e-05, + "loss": 4.8762, + "step": 10980 + }, + { + "epoch": 18.58, + "learning_rate": 3.6656666666666664e-05, + "loss": 4.9051, + "step": 11000 + }, + { + "epoch": 18.58, + "eval_loss": 4.668403148651123, + "eval_runtime": 47.1936, + "eval_samples_per_second": 20.956, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.013447760109870657, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.017324954159006013, + "eval_tse_type": 2.2820469610156163e-05, + "step": 11000 + }, + { + "epoch": 18.61, + "learning_rate": 3.6723333333333335e-05, + "loss": 4.8762, + "step": 11020 + }, + { + "epoch": 18.65, + "learning_rate": 3.6790000000000005e-05, + "loss": 4.8818, + "step": 11040 + }, + { + "epoch": 18.68, + "learning_rate": 3.685666666666667e-05, + "loss": 4.8846, + "step": 11060 + }, + { + "epoch": 18.72, + "learning_rate": 3.692333333333334e-05, + "loss": 4.869, + "step": 11080 + }, + { + "epoch": 18.75, + "learning_rate": 3.699e-05, + "loss": 4.8728, + "step": 11100 + }, + { + "epoch": 18.78, + "learning_rate": 3.705666666666667e-05, + "loss": 4.8554, + "step": 11120 + }, + { + "epoch": 18.82, + "learning_rate": 3.712333333333334e-05, + "loss": 4.8501, + "step": 11140 + }, + { + "epoch": 18.85, + "learning_rate": 3.719e-05, + "loss": 4.8576, + "step": 11160 + }, + { + "epoch": 18.89, + "learning_rate": 3.7256666666666665e-05, + "loss": 4.8459, + "step": 11180 + }, + { + "epoch": 18.92, + "learning_rate": 3.7323333333333336e-05, + "loss": 4.8468, + "step": 11200 + }, + { + "epoch": 18.95, + "learning_rate": 3.739e-05, + "loss": 4.8401, + "step": 11220 + }, + { + "epoch": 18.99, + "learning_rate": 3.745666666666667e-05, + "loss": 4.8533, + "step": 11240 + }, + { + "epoch": 19.02, + "learning_rate": 3.7523333333333334e-05, + "loss": 4.8369, + "step": 11260 + }, + { + "epoch": 19.05, + "learning_rate": 3.7590000000000004e-05, + "loss": 4.834, + "step": 11280 + }, + { + "epoch": 19.09, + "learning_rate": 3.765666666666667e-05, + "loss": 4.8195, + "step": 11300 + }, + { + "epoch": 19.12, + "learning_rate": 3.772333333333333e-05, + "loss": 4.8366, + "step": 11320 + }, + { + "epoch": 19.16, + "learning_rate": 3.779e-05, + "loss": 4.8187, + "step": 11340 + }, + { + "epoch": 19.19, + "learning_rate": 3.7856666666666666e-05, + "loss": 4.8247, + "step": 11360 + }, + { + "epoch": 19.22, + "learning_rate": 3.792333333333333e-05, + "loss": 4.8125, + "step": 11380 + }, + { + "epoch": 19.26, + "learning_rate": 3.799e-05, + "loss": 4.8191, + "step": 11400 + }, + { + "epoch": 19.29, + "learning_rate": 3.805666666666667e-05, + "loss": 4.8088, + "step": 11420 + }, + { + "epoch": 19.32, + "learning_rate": 3.8123333333333335e-05, + "loss": 4.809, + "step": 11440 + }, + { + "epoch": 19.36, + "learning_rate": 3.8190000000000005e-05, + "loss": 4.8036, + "step": 11460 + }, + { + "epoch": 19.39, + "learning_rate": 3.825666666666667e-05, + "loss": 4.8101, + "step": 11480 + }, + { + "epoch": 19.43, + "learning_rate": 3.832333333333333e-05, + "loss": 4.8087, + "step": 11500 + }, + { + "epoch": 19.46, + "learning_rate": 3.8390000000000003e-05, + "loss": 4.8172, + "step": 11520 + }, + { + "epoch": 19.49, + "learning_rate": 3.845666666666667e-05, + "loss": 4.7924, + "step": 11540 + }, + { + "epoch": 19.53, + "learning_rate": 3.852333333333333e-05, + "loss": 4.7777, + "step": 11560 + }, + { + "epoch": 19.56, + "learning_rate": 3.859e-05, + "loss": 4.7867, + "step": 11580 + }, + { + "epoch": 19.59, + "learning_rate": 3.865666666666667e-05, + "loss": 4.7873, + "step": 11600 + }, + { + "epoch": 19.63, + "learning_rate": 3.8723333333333336e-05, + "loss": 4.7886, + "step": 11620 + }, + { + "epoch": 19.66, + "learning_rate": 3.8790000000000006e-05, + "loss": 4.7865, + "step": 11640 + }, + { + "epoch": 19.7, + "learning_rate": 3.885666666666667e-05, + "loss": 4.7948, + "step": 11660 + }, + { + "epoch": 19.73, + "learning_rate": 3.8923333333333334e-05, + "loss": 4.7785, + "step": 11680 + }, + { + "epoch": 19.76, + "learning_rate": 3.8990000000000004e-05, + "loss": 4.7578, + "step": 11700 + }, + { + "epoch": 19.8, + "learning_rate": 3.905666666666667e-05, + "loss": 4.7851, + "step": 11720 + }, + { + "epoch": 19.83, + "learning_rate": 3.912333333333333e-05, + "loss": 4.7637, + "step": 11740 + }, + { + "epoch": 19.86, + "learning_rate": 3.919e-05, + "loss": 4.769, + "step": 11760 + }, + { + "epoch": 19.9, + "learning_rate": 3.9256666666666666e-05, + "loss": 4.7635, + "step": 11780 + }, + { + "epoch": 19.93, + "learning_rate": 3.932333333333334e-05, + "loss": 4.7621, + "step": 11800 + }, + { + "epoch": 19.97, + "learning_rate": 3.939e-05, + "loss": 4.7544, + "step": 11820 + }, + { + "epoch": 20.0, + "learning_rate": 3.945666666666667e-05, + "loss": 4.7671, + "step": 11840 + }, + { + "epoch": 20.03, + "learning_rate": 3.9523333333333335e-05, + "loss": 4.7392, + "step": 11860 + }, + { + "epoch": 20.07, + "learning_rate": 3.959e-05, + "loss": 4.7206, + "step": 11880 + }, + { + "epoch": 20.1, + "learning_rate": 3.965666666666667e-05, + "loss": 4.7336, + "step": 11900 + }, + { + "epoch": 20.14, + "learning_rate": 3.972333333333333e-05, + "loss": 4.7395, + "step": 11920 + }, + { + "epoch": 20.17, + "learning_rate": 3.979e-05, + "loss": 4.7248, + "step": 11940 + }, + { + "epoch": 20.2, + "learning_rate": 3.985666666666667e-05, + "loss": 4.7279, + "step": 11960 + }, + { + "epoch": 20.24, + "learning_rate": 3.992333333333334e-05, + "loss": 4.7454, + "step": 11980 + }, + { + "epoch": 20.27, + "learning_rate": 3.999e-05, + "loss": 4.7226, + "step": 12000 + }, + { + "epoch": 20.27, + "eval_loss": 4.5096211433410645, + "eval_runtime": 47.265, + "eval_samples_per_second": 20.925, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.004553048152892597, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.023718560323270747, + "eval_tse_type": 3.159757330637007e-05, + "step": 12000 + }, + { + "epoch": 20.3, + "learning_rate": 4.005666666666667e-05, + "loss": 4.7199, + "step": 12020 + }, + { + "epoch": 20.34, + "learning_rate": 4.0123333333333336e-05, + "loss": 4.7172, + "step": 12040 + }, + { + "epoch": 20.37, + "learning_rate": 4.019e-05, + "loss": 4.721, + "step": 12060 + }, + { + "epoch": 20.41, + "learning_rate": 4.025666666666667e-05, + "loss": 4.6972, + "step": 12080 + }, + { + "epoch": 20.44, + "learning_rate": 4.0323333333333334e-05, + "loss": 4.7091, + "step": 12100 + }, + { + "epoch": 20.47, + "learning_rate": 4.039e-05, + "loss": 4.7091, + "step": 12120 + }, + { + "epoch": 20.51, + "learning_rate": 4.045666666666667e-05, + "loss": 4.6999, + "step": 12140 + }, + { + "epoch": 20.54, + "learning_rate": 4.052333333333333e-05, + "loss": 4.6933, + "step": 12160 + }, + { + "epoch": 20.57, + "learning_rate": 4.059e-05, + "loss": 4.707, + "step": 12180 + }, + { + "epoch": 20.61, + "learning_rate": 4.065666666666667e-05, + "loss": 4.7006, + "step": 12200 + }, + { + "epoch": 20.64, + "learning_rate": 4.072333333333334e-05, + "loss": 4.6829, + "step": 12220 + }, + { + "epoch": 20.68, + "learning_rate": 4.079e-05, + "loss": 4.7021, + "step": 12240 + }, + { + "epoch": 20.71, + "learning_rate": 4.085666666666667e-05, + "loss": 4.7069, + "step": 12260 + }, + { + "epoch": 20.74, + "learning_rate": 4.0923333333333335e-05, + "loss": 4.6919, + "step": 12280 + }, + { + "epoch": 20.78, + "learning_rate": 4.099e-05, + "loss": 4.6995, + "step": 12300 + }, + { + "epoch": 20.81, + "learning_rate": 4.105666666666667e-05, + "loss": 4.689, + "step": 12320 + }, + { + "epoch": 20.84, + "learning_rate": 4.112333333333333e-05, + "loss": 4.6863, + "step": 12340 + }, + { + "epoch": 20.88, + "learning_rate": 4.1190000000000004e-05, + "loss": 4.6779, + "step": 12360 + }, + { + "epoch": 20.91, + "learning_rate": 4.1256666666666674e-05, + "loss": 4.6769, + "step": 12380 + }, + { + "epoch": 20.95, + "learning_rate": 4.132333333333334e-05, + "loss": 4.6919, + "step": 12400 + }, + { + "epoch": 20.98, + "learning_rate": 4.139e-05, + "loss": 4.6536, + "step": 12420 + }, + { + "epoch": 21.01, + "learning_rate": 4.145666666666667e-05, + "loss": 4.6801, + "step": 12440 + }, + { + "epoch": 21.05, + "learning_rate": 4.1523333333333336e-05, + "loss": 4.6575, + "step": 12460 + }, + { + "epoch": 21.08, + "learning_rate": 4.159e-05, + "loss": 4.6575, + "step": 12480 + }, + { + "epoch": 21.11, + "learning_rate": 4.1656666666666664e-05, + "loss": 4.6461, + "step": 12500 + }, + { + "epoch": 21.15, + "learning_rate": 4.1723333333333334e-05, + "loss": 4.6405, + "step": 12520 + }, + { + "epoch": 21.18, + "learning_rate": 4.179e-05, + "loss": 4.6446, + "step": 12540 + }, + { + "epoch": 21.22, + "learning_rate": 4.185666666666667e-05, + "loss": 4.634, + "step": 12560 + }, + { + "epoch": 21.25, + "learning_rate": 4.192333333333334e-05, + "loss": 4.6323, + "step": 12580 + }, + { + "epoch": 21.28, + "learning_rate": 4.199e-05, + "loss": 4.6389, + "step": 12600 + }, + { + "epoch": 21.32, + "learning_rate": 4.205666666666667e-05, + "loss": 4.6479, + "step": 12620 + }, + { + "epoch": 21.35, + "learning_rate": 4.212333333333334e-05, + "loss": 4.6287, + "step": 12640 + }, + { + "epoch": 21.39, + "learning_rate": 4.219e-05, + "loss": 4.6206, + "step": 12660 + }, + { + "epoch": 21.42, + "learning_rate": 4.2256666666666665e-05, + "loss": 4.6247, + "step": 12680 + }, + { + "epoch": 21.45, + "learning_rate": 4.2323333333333335e-05, + "loss": 4.6219, + "step": 12700 + }, + { + "epoch": 21.49, + "learning_rate": 4.239e-05, + "loss": 4.6233, + "step": 12720 + }, + { + "epoch": 21.52, + "learning_rate": 4.245666666666667e-05, + "loss": 4.6182, + "step": 12740 + }, + { + "epoch": 21.55, + "learning_rate": 4.252333333333334e-05, + "loss": 4.6204, + "step": 12760 + }, + { + "epoch": 21.59, + "learning_rate": 4.2590000000000004e-05, + "loss": 4.6288, + "step": 12780 + }, + { + "epoch": 21.62, + "learning_rate": 4.265666666666667e-05, + "loss": 4.6106, + "step": 12800 + }, + { + "epoch": 21.66, + "learning_rate": 4.272333333333334e-05, + "loss": 4.6268, + "step": 12820 + }, + { + "epoch": 21.69, + "learning_rate": 4.279e-05, + "loss": 4.6165, + "step": 12840 + }, + { + "epoch": 21.72, + "learning_rate": 4.2856666666666666e-05, + "loss": 4.6112, + "step": 12860 + }, + { + "epoch": 21.76, + "learning_rate": 4.292e-05, + "loss": 4.6031, + "step": 12880 + }, + { + "epoch": 21.79, + "learning_rate": 4.2986666666666666e-05, + "loss": 4.6123, + "step": 12900 + }, + { + "epoch": 21.82, + "learning_rate": 4.305333333333334e-05, + "loss": 4.5946, + "step": 12920 + }, + { + "epoch": 21.86, + "learning_rate": 4.312000000000001e-05, + "loss": 4.5956, + "step": 12940 + }, + { + "epoch": 21.89, + "learning_rate": 4.318666666666667e-05, + "loss": 4.6022, + "step": 12960 + }, + { + "epoch": 21.93, + "learning_rate": 4.3253333333333335e-05, + "loss": 4.6004, + "step": 12980 + }, + { + "epoch": 21.96, + "learning_rate": 4.332e-05, + "loss": 4.591, + "step": 13000 + }, + { + "epoch": 21.96, + "eval_loss": 4.367846488952637, + "eval_runtime": 48.351, + "eval_samples_per_second": 20.455, + "eval_steps_per_second": 0.124, + "eval_tse_ndup": 0.011010868594862631, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.025262542461155377, + "eval_tse_type": 8.074935400516795e-05, + "step": 13000 + }, + { + "epoch": 21.99, + "learning_rate": 4.338666666666667e-05, + "loss": 4.5838, + "step": 13020 + }, + { + "epoch": 22.03, + "learning_rate": 4.345333333333333e-05, + "loss": 4.5609, + "step": 13040 + }, + { + "epoch": 22.06, + "learning_rate": 4.352e-05, + "loss": 4.5916, + "step": 13060 + }, + { + "epoch": 22.09, + "learning_rate": 4.358666666666667e-05, + "loss": 4.5582, + "step": 13080 + }, + { + "epoch": 22.13, + "learning_rate": 4.365333333333334e-05, + "loss": 4.5623, + "step": 13100 + }, + { + "epoch": 22.16, + "learning_rate": 4.372e-05, + "loss": 4.563, + "step": 13120 + }, + { + "epoch": 22.2, + "learning_rate": 4.378666666666667e-05, + "loss": 4.5692, + "step": 13140 + }, + { + "epoch": 22.23, + "learning_rate": 4.3853333333333336e-05, + "loss": 4.5555, + "step": 13160 + }, + { + "epoch": 22.26, + "learning_rate": 4.392e-05, + "loss": 4.5659, + "step": 13180 + }, + { + "epoch": 22.3, + "learning_rate": 4.398666666666667e-05, + "loss": 4.5452, + "step": 13200 + }, + { + "epoch": 22.33, + "learning_rate": 4.4053333333333334e-05, + "loss": 4.5542, + "step": 13220 + }, + { + "epoch": 22.36, + "learning_rate": 4.412e-05, + "loss": 4.5599, + "step": 13240 + }, + { + "epoch": 22.4, + "learning_rate": 4.418666666666667e-05, + "loss": 4.5708, + "step": 13260 + }, + { + "epoch": 22.43, + "learning_rate": 4.425333333333334e-05, + "loss": 4.5405, + "step": 13280 + }, + { + "epoch": 22.47, + "learning_rate": 4.432e-05, + "loss": 4.534, + "step": 13300 + }, + { + "epoch": 22.5, + "learning_rate": 4.438666666666667e-05, + "loss": 4.5443, + "step": 13320 + }, + { + "epoch": 22.53, + "learning_rate": 4.445333333333334e-05, + "loss": 4.5482, + "step": 13340 + }, + { + "epoch": 22.57, + "learning_rate": 4.452e-05, + "loss": 4.5212, + "step": 13360 + }, + { + "epoch": 22.6, + "learning_rate": 4.458666666666667e-05, + "loss": 4.5322, + "step": 13380 + }, + { + "epoch": 22.64, + "learning_rate": 4.4653333333333335e-05, + "loss": 4.5235, + "step": 13400 + }, + { + "epoch": 22.67, + "learning_rate": 4.472e-05, + "loss": 4.5324, + "step": 13420 + }, + { + "epoch": 22.7, + "learning_rate": 4.478666666666667e-05, + "loss": 4.5232, + "step": 13440 + }, + { + "epoch": 22.74, + "learning_rate": 4.485333333333333e-05, + "loss": 4.5203, + "step": 13460 + }, + { + "epoch": 22.77, + "learning_rate": 4.4920000000000004e-05, + "loss": 4.5214, + "step": 13480 + }, + { + "epoch": 22.8, + "learning_rate": 4.4986666666666674e-05, + "loss": 4.5212, + "step": 13500 + }, + { + "epoch": 22.84, + "learning_rate": 4.505333333333334e-05, + "loss": 4.5101, + "step": 13520 + }, + { + "epoch": 22.87, + "learning_rate": 4.512e-05, + "loss": 4.5227, + "step": 13540 + }, + { + "epoch": 22.91, + "learning_rate": 4.518666666666667e-05, + "loss": 4.5105, + "step": 13560 + }, + { + "epoch": 22.94, + "learning_rate": 4.5253333333333336e-05, + "loss": 4.5185, + "step": 13580 + }, + { + "epoch": 22.97, + "learning_rate": 4.532e-05, + "loss": 4.5193, + "step": 13600 + }, + { + "epoch": 23.01, + "learning_rate": 4.5386666666666664e-05, + "loss": 4.4876, + "step": 13620 + }, + { + "epoch": 23.04, + "learning_rate": 4.5453333333333334e-05, + "loss": 4.4763, + "step": 13640 + }, + { + "epoch": 23.07, + "learning_rate": 4.5520000000000005e-05, + "loss": 4.4667, + "step": 13660 + }, + { + "epoch": 23.11, + "learning_rate": 4.558666666666667e-05, + "loss": 4.4732, + "step": 13680 + }, + { + "epoch": 23.14, + "learning_rate": 4.565333333333334e-05, + "loss": 4.4953, + "step": 13700 + }, + { + "epoch": 23.18, + "learning_rate": 4.572e-05, + "loss": 4.4903, + "step": 13720 + }, + { + "epoch": 23.21, + "learning_rate": 4.5786666666666666e-05, + "loss": 4.4833, + "step": 13740 + }, + { + "epoch": 23.24, + "learning_rate": 4.585333333333334e-05, + "loss": 4.475, + "step": 13760 + }, + { + "epoch": 23.28, + "learning_rate": 4.592e-05, + "loss": 4.4632, + "step": 13780 + }, + { + "epoch": 23.31, + "learning_rate": 4.5986666666666665e-05, + "loss": 4.4851, + "step": 13800 + }, + { + "epoch": 23.34, + "learning_rate": 4.6053333333333335e-05, + "loss": 4.4872, + "step": 13820 + }, + { + "epoch": 23.38, + "learning_rate": 4.612e-05, + "loss": 4.4695, + "step": 13840 + }, + { + "epoch": 23.41, + "learning_rate": 4.618666666666667e-05, + "loss": 4.45, + "step": 13860 + }, + { + "epoch": 23.45, + "learning_rate": 4.625333333333334e-05, + "loss": 4.4737, + "step": 13880 + }, + { + "epoch": 23.48, + "learning_rate": 4.6320000000000004e-05, + "loss": 4.4587, + "step": 13900 + }, + { + "epoch": 23.51, + "learning_rate": 4.638666666666667e-05, + "loss": 4.4517, + "step": 13920 + }, + { + "epoch": 23.55, + "learning_rate": 4.645333333333334e-05, + "loss": 4.4483, + "step": 13940 + }, + { + "epoch": 23.58, + "learning_rate": 4.652e-05, + "loss": 4.4679, + "step": 13960 + }, + { + "epoch": 23.61, + "learning_rate": 4.6586666666666666e-05, + "loss": 4.4671, + "step": 13980 + }, + { + "epoch": 23.65, + "learning_rate": 4.6653333333333336e-05, + "loss": 4.4681, + "step": 14000 + }, + { + "epoch": 23.65, + "eval_loss": 4.2354302406311035, + "eval_runtime": 47.1335, + "eval_samples_per_second": 20.983, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.0073013202445331455, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.022982798478565217, + "eval_tse_type": 4.739635995955511e-05, + "step": 14000 + }, + { + "epoch": 23.68, + "learning_rate": 4.672e-05, + "loss": 4.455, + "step": 14020 + }, + { + "epoch": 23.72, + "learning_rate": 4.678666666666667e-05, + "loss": 4.4507, + "step": 14040 + }, + { + "epoch": 23.75, + "learning_rate": 4.685333333333334e-05, + "loss": 4.4374, + "step": 14060 + }, + { + "epoch": 23.78, + "learning_rate": 4.6920000000000005e-05, + "loss": 4.4594, + "step": 14080 + }, + { + "epoch": 23.82, + "learning_rate": 4.698666666666667e-05, + "loss": 4.4386, + "step": 14100 + }, + { + "epoch": 23.85, + "learning_rate": 4.705333333333334e-05, + "loss": 4.4435, + "step": 14120 + }, + { + "epoch": 23.89, + "learning_rate": 4.712e-05, + "loss": 4.4422, + "step": 14140 + }, + { + "epoch": 23.92, + "learning_rate": 4.718666666666667e-05, + "loss": 4.4278, + "step": 14160 + }, + { + "epoch": 23.95, + "learning_rate": 4.725333333333334e-05, + "loss": 4.4381, + "step": 14180 + }, + { + "epoch": 23.99, + "learning_rate": 4.732e-05, + "loss": 4.4109, + "step": 14200 + }, + { + "epoch": 24.02, + "learning_rate": 4.7386666666666665e-05, + "loss": 4.3991, + "step": 14220 + }, + { + "epoch": 24.05, + "learning_rate": 4.7453333333333335e-05, + "loss": 4.4284, + "step": 14240 + }, + { + "epoch": 24.09, + "learning_rate": 4.7520000000000006e-05, + "loss": 4.4072, + "step": 14260 + }, + { + "epoch": 24.12, + "learning_rate": 4.758666666666667e-05, + "loss": 4.3891, + "step": 14280 + }, + { + "epoch": 24.16, + "learning_rate": 4.765333333333333e-05, + "loss": 4.3962, + "step": 14300 + }, + { + "epoch": 24.19, + "learning_rate": 4.7720000000000004e-05, + "loss": 4.4233, + "step": 14320 + }, + { + "epoch": 24.22, + "learning_rate": 4.778666666666667e-05, + "loss": 4.4073, + "step": 14340 + }, + { + "epoch": 24.26, + "learning_rate": 4.785333333333333e-05, + "loss": 4.3932, + "step": 14360 + }, + { + "epoch": 24.29, + "learning_rate": 4.792e-05, + "loss": 4.3902, + "step": 14380 + }, + { + "epoch": 24.32, + "learning_rate": 4.7986666666666666e-05, + "loss": 4.4077, + "step": 14400 + }, + { + "epoch": 24.36, + "learning_rate": 4.8053333333333336e-05, + "loss": 4.4033, + "step": 14420 + }, + { + "epoch": 24.39, + "learning_rate": 4.812000000000001e-05, + "loss": 4.3889, + "step": 14440 + }, + { + "epoch": 24.43, + "learning_rate": 4.818666666666667e-05, + "loss": 4.3902, + "step": 14460 + }, + { + "epoch": 24.46, + "learning_rate": 4.8253333333333334e-05, + "loss": 4.3947, + "step": 14480 + }, + { + "epoch": 24.49, + "learning_rate": 4.8320000000000005e-05, + "loss": 4.3853, + "step": 14500 + }, + { + "epoch": 24.53, + "learning_rate": 4.838666666666667e-05, + "loss": 4.3885, + "step": 14520 + }, + { + "epoch": 24.56, + "learning_rate": 4.845333333333333e-05, + "loss": 4.377, + "step": 14540 + }, + { + "epoch": 24.59, + "learning_rate": 4.852e-05, + "loss": 4.3586, + "step": 14560 + }, + { + "epoch": 24.63, + "learning_rate": 4.858666666666667e-05, + "loss": 4.3676, + "step": 14580 + }, + { + "epoch": 24.66, + "learning_rate": 4.865333333333334e-05, + "loss": 4.3796, + "step": 14600 + }, + { + "epoch": 24.7, + "learning_rate": 4.872000000000001e-05, + "loss": 4.3834, + "step": 14620 + }, + { + "epoch": 24.73, + "learning_rate": 4.878666666666667e-05, + "loss": 4.3548, + "step": 14640 + }, + { + "epoch": 24.76, + "learning_rate": 4.8853333333333335e-05, + "loss": 4.3818, + "step": 14660 + }, + { + "epoch": 24.8, + "learning_rate": 4.8920000000000006e-05, + "loss": 4.3803, + "step": 14680 + }, + { + "epoch": 24.83, + "learning_rate": 4.898666666666667e-05, + "loss": 4.3532, + "step": 14700 + }, + { + "epoch": 24.86, + "learning_rate": 4.9053333333333333e-05, + "loss": 4.3777, + "step": 14720 + }, + { + "epoch": 24.9, + "learning_rate": 4.9120000000000004e-05, + "loss": 4.3767, + "step": 14740 + }, + { + "epoch": 24.93, + "learning_rate": 4.918666666666667e-05, + "loss": 4.3639, + "step": 14760 + }, + { + "epoch": 24.97, + "learning_rate": 4.925333333333333e-05, + "loss": 4.3665, + "step": 14780 + }, + { + "epoch": 25.0, + "learning_rate": 4.932e-05, + "loss": 4.3638, + "step": 14800 + }, + { + "epoch": 25.03, + "learning_rate": 4.938666666666667e-05, + "loss": 4.3518, + "step": 14820 + }, + { + "epoch": 25.07, + "learning_rate": 4.9453333333333336e-05, + "loss": 4.3211, + "step": 14840 + }, + { + "epoch": 25.1, + "learning_rate": 4.952e-05, + "loss": 4.3176, + "step": 14860 + }, + { + "epoch": 25.14, + "learning_rate": 4.958666666666667e-05, + "loss": 4.3258, + "step": 14880 + }, + { + "epoch": 25.17, + "learning_rate": 4.9653333333333335e-05, + "loss": 4.3154, + "step": 14900 + }, + { + "epoch": 25.2, + "learning_rate": 4.972e-05, + "loss": 4.332, + "step": 14920 + }, + { + "epoch": 25.24, + "learning_rate": 4.978666666666667e-05, + "loss": 4.3278, + "step": 14940 + }, + { + "epoch": 25.27, + "learning_rate": 4.985333333333333e-05, + "loss": 4.336, + "step": 14960 + }, + { + "epoch": 25.3, + "learning_rate": 4.992e-05, + "loss": 4.3223, + "step": 14980 + }, + { + "epoch": 25.34, + "learning_rate": 4.9986666666666674e-05, + "loss": 4.301, + "step": 15000 + }, + { + "epoch": 25.34, + "eval_loss": 4.116175174713135, + "eval_runtime": 49.0128, + "eval_samples_per_second": 20.178, + "eval_steps_per_second": 0.122, + "eval_tse_ndup": 0.010817436102613395, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.02719749724053464, + "eval_tse_type": 6.84614088304685e-05, + "step": 15000 + }, + { + "epoch": 25.37, + "learning_rate": 5.0053333333333344e-05, + "loss": 4.3329, + "step": 15020 + }, + { + "epoch": 25.41, + "learning_rate": 5.012e-05, + "loss": 4.3206, + "step": 15040 + }, + { + "epoch": 25.44, + "learning_rate": 5.018666666666667e-05, + "loss": 4.3276, + "step": 15060 + }, + { + "epoch": 25.47, + "learning_rate": 5.025333333333334e-05, + "loss": 4.3132, + "step": 15080 + }, + { + "epoch": 25.51, + "learning_rate": 5.032e-05, + "loss": 4.3199, + "step": 15100 + }, + { + "epoch": 25.54, + "learning_rate": 5.038666666666667e-05, + "loss": 4.3329, + "step": 15120 + }, + { + "epoch": 25.57, + "learning_rate": 5.045333333333333e-05, + "loss": 4.3115, + "step": 15140 + }, + { + "epoch": 25.61, + "learning_rate": 5.052e-05, + "loss": 4.3041, + "step": 15160 + }, + { + "epoch": 25.64, + "learning_rate": 5.058666666666667e-05, + "loss": 4.2975, + "step": 15180 + }, + { + "epoch": 25.68, + "learning_rate": 5.065333333333333e-05, + "loss": 4.3206, + "step": 15200 + }, + { + "epoch": 25.71, + "learning_rate": 5.072e-05, + "loss": 4.3128, + "step": 15220 + }, + { + "epoch": 25.74, + "learning_rate": 5.078666666666667e-05, + "loss": 4.288, + "step": 15240 + }, + { + "epoch": 25.78, + "learning_rate": 5.085333333333333e-05, + "loss": 4.2992, + "step": 15260 + }, + { + "epoch": 25.81, + "learning_rate": 5.092e-05, + "loss": 4.2805, + "step": 15280 + }, + { + "epoch": 25.84, + "learning_rate": 5.098666666666667e-05, + "loss": 4.3033, + "step": 15300 + }, + { + "epoch": 25.88, + "learning_rate": 5.105333333333333e-05, + "loss": 4.297, + "step": 15320 + }, + { + "epoch": 25.91, + "learning_rate": 5.112e-05, + "loss": 4.2841, + "step": 15340 + }, + { + "epoch": 25.95, + "learning_rate": 5.118666666666667e-05, + "loss": 4.3054, + "step": 15360 + }, + { + "epoch": 25.98, + "learning_rate": 5.125333333333333e-05, + "loss": 4.28, + "step": 15380 + }, + { + "epoch": 26.01, + "learning_rate": 5.132e-05, + "loss": 4.2987, + "step": 15400 + }, + { + "epoch": 26.05, + "learning_rate": 5.1386666666666674e-05, + "loss": 4.269, + "step": 15420 + }, + { + "epoch": 26.08, + "learning_rate": 5.145333333333333e-05, + "loss": 4.2666, + "step": 15440 + }, + { + "epoch": 26.11, + "learning_rate": 5.152e-05, + "loss": 4.2605, + "step": 15460 + }, + { + "epoch": 26.15, + "learning_rate": 5.158666666666667e-05, + "loss": 4.2556, + "step": 15480 + }, + { + "epoch": 26.18, + "learning_rate": 5.165333333333333e-05, + "loss": 4.2584, + "step": 15500 + }, + { + "epoch": 26.22, + "learning_rate": 5.172e-05, + "loss": 4.265, + "step": 15520 + }, + { + "epoch": 26.25, + "learning_rate": 5.178666666666667e-05, + "loss": 4.2448, + "step": 15540 + }, + { + "epoch": 26.28, + "learning_rate": 5.1853333333333334e-05, + "loss": 4.2637, + "step": 15560 + }, + { + "epoch": 26.32, + "learning_rate": 5.1920000000000004e-05, + "loss": 4.2644, + "step": 15580 + }, + { + "epoch": 26.35, + "learning_rate": 5.1986666666666675e-05, + "loss": 4.2567, + "step": 15600 + }, + { + "epoch": 26.39, + "learning_rate": 5.205333333333333e-05, + "loss": 4.2591, + "step": 15620 + }, + { + "epoch": 26.42, + "learning_rate": 5.212e-05, + "loss": 4.2574, + "step": 15640 + }, + { + "epoch": 26.45, + "learning_rate": 5.218666666666667e-05, + "loss": 4.2384, + "step": 15660 + }, + { + "epoch": 26.49, + "learning_rate": 5.225333333333333e-05, + "loss": 4.2566, + "step": 15680 + }, + { + "epoch": 26.52, + "learning_rate": 5.232e-05, + "loss": 4.2485, + "step": 15700 + }, + { + "epoch": 26.55, + "learning_rate": 5.238666666666667e-05, + "loss": 4.2617, + "step": 15720 + }, + { + "epoch": 26.59, + "learning_rate": 5.2453333333333335e-05, + "loss": 4.2387, + "step": 15740 + }, + { + "epoch": 26.62, + "learning_rate": 5.2520000000000005e-05, + "loss": 4.2579, + "step": 15760 + }, + { + "epoch": 26.66, + "learning_rate": 5.2586666666666676e-05, + "loss": 4.2199, + "step": 15780 + }, + { + "epoch": 26.69, + "learning_rate": 5.265333333333333e-05, + "loss": 4.2348, + "step": 15800 + }, + { + "epoch": 26.72, + "learning_rate": 5.2720000000000003e-05, + "loss": 4.241, + "step": 15820 + }, + { + "epoch": 26.76, + "learning_rate": 5.2786666666666674e-05, + "loss": 4.2551, + "step": 15840 + }, + { + "epoch": 26.79, + "learning_rate": 5.285333333333333e-05, + "loss": 4.2267, + "step": 15860 + }, + { + "epoch": 26.82, + "learning_rate": 5.292e-05, + "loss": 4.2225, + "step": 15880 + }, + { + "epoch": 26.86, + "learning_rate": 5.298666666666667e-05, + "loss": 4.2263, + "step": 15900 + }, + { + "epoch": 26.89, + "learning_rate": 5.3053333333333336e-05, + "loss": 4.2248, + "step": 15920 + }, + { + "epoch": 26.93, + "learning_rate": 5.3120000000000006e-05, + "loss": 4.2236, + "step": 15940 + }, + { + "epoch": 26.96, + "learning_rate": 5.318666666666667e-05, + "loss": 4.2373, + "step": 15960 + }, + { + "epoch": 26.99, + "learning_rate": 5.3253333333333334e-05, + "loss": 4.216, + "step": 15980 + }, + { + "epoch": 27.03, + "learning_rate": 5.3320000000000004e-05, + "loss": 4.2041, + "step": 16000 + }, + { + "epoch": 27.03, + "eval_loss": 4.027388095855713, + "eval_runtime": 49.1139, + "eval_samples_per_second": 20.137, + "eval_steps_per_second": 0.122, + "eval_tse_ndup": 0.004653635891177064, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.02689549264930171, + "eval_tse_type": 5.4418042916526235e-05, + "step": 16000 + }, + { + "epoch": 27.06, + "learning_rate": 5.3386666666666675e-05, + "loss": 4.1844, + "step": 16020 + }, + { + "epoch": 27.09, + "learning_rate": 5.345333333333333e-05, + "loss": 4.1833, + "step": 16040 + }, + { + "epoch": 27.13, + "learning_rate": 5.352e-05, + "loss": 4.1919, + "step": 16060 + }, + { + "epoch": 27.16, + "learning_rate": 5.358666666666667e-05, + "loss": 4.2044, + "step": 16080 + }, + { + "epoch": 27.2, + "learning_rate": 5.365333333333333e-05, + "loss": 4.1999, + "step": 16100 + }, + { + "epoch": 27.23, + "learning_rate": 5.372e-05, + "loss": 4.1814, + "step": 16120 + }, + { + "epoch": 27.26, + "learning_rate": 5.378666666666667e-05, + "loss": 4.2043, + "step": 16140 + }, + { + "epoch": 27.3, + "learning_rate": 5.3853333333333335e-05, + "loss": 4.1859, + "step": 16160 + }, + { + "epoch": 27.33, + "learning_rate": 5.3920000000000006e-05, + "loss": 4.1992, + "step": 16180 + }, + { + "epoch": 27.36, + "learning_rate": 5.3986666666666676e-05, + "loss": 4.1918, + "step": 16200 + }, + { + "epoch": 27.4, + "learning_rate": 5.405333333333333e-05, + "loss": 4.183, + "step": 16220 + }, + { + "epoch": 27.43, + "learning_rate": 5.4120000000000004e-05, + "loss": 4.1757, + "step": 16240 + }, + { + "epoch": 27.47, + "learning_rate": 5.4186666666666674e-05, + "loss": 4.1887, + "step": 16260 + }, + { + "epoch": 27.5, + "learning_rate": 5.425333333333333e-05, + "loss": 4.1783, + "step": 16280 + }, + { + "epoch": 27.53, + "learning_rate": 5.432e-05, + "loss": 4.1839, + "step": 16300 + }, + { + "epoch": 27.57, + "learning_rate": 5.438666666666667e-05, + "loss": 4.1921, + "step": 16320 + }, + { + "epoch": 27.6, + "learning_rate": 5.4453333333333336e-05, + "loss": 4.1872, + "step": 16340 + }, + { + "epoch": 27.64, + "learning_rate": 5.4520000000000007e-05, + "loss": 4.1655, + "step": 16360 + }, + { + "epoch": 27.67, + "learning_rate": 5.4586666666666664e-05, + "loss": 4.1829, + "step": 16380 + }, + { + "epoch": 27.7, + "learning_rate": 5.4653333333333334e-05, + "loss": 4.1605, + "step": 16400 + }, + { + "epoch": 27.74, + "learning_rate": 5.4720000000000005e-05, + "loss": 4.1699, + "step": 16420 + }, + { + "epoch": 27.77, + "learning_rate": 5.478666666666666e-05, + "loss": 4.1841, + "step": 16440 + }, + { + "epoch": 27.8, + "learning_rate": 5.485333333333333e-05, + "loss": 4.1943, + "step": 16460 + }, + { + "epoch": 27.84, + "learning_rate": 5.492e-05, + "loss": 4.1516, + "step": 16480 + }, + { + "epoch": 27.87, + "learning_rate": 5.4986666666666666e-05, + "loss": 4.1766, + "step": 16500 + }, + { + "epoch": 27.91, + "learning_rate": 5.505333333333334e-05, + "loss": 4.1643, + "step": 16520 + }, + { + "epoch": 27.94, + "learning_rate": 5.512000000000001e-05, + "loss": 4.1652, + "step": 16540 + }, + { + "epoch": 27.97, + "learning_rate": 5.5186666666666665e-05, + "loss": 4.1694, + "step": 16560 + }, + { + "epoch": 28.01, + "learning_rate": 5.5253333333333335e-05, + "loss": 4.1623, + "step": 16580 + }, + { + "epoch": 28.04, + "learning_rate": 5.5320000000000006e-05, + "loss": 4.1371, + "step": 16600 + }, + { + "epoch": 28.07, + "learning_rate": 5.538666666666666e-05, + "loss": 4.142, + "step": 16620 + }, + { + "epoch": 28.11, + "learning_rate": 5.545333333333333e-05, + "loss": 4.1513, + "step": 16640 + }, + { + "epoch": 28.14, + "learning_rate": 5.5520000000000004e-05, + "loss": 4.1357, + "step": 16660 + }, + { + "epoch": 28.18, + "learning_rate": 5.558666666666667e-05, + "loss": 4.107, + "step": 16680 + }, + { + "epoch": 28.21, + "learning_rate": 5.565333333333334e-05, + "loss": 4.1297, + "step": 16700 + }, + { + "epoch": 28.24, + "learning_rate": 5.572000000000001e-05, + "loss": 4.1275, + "step": 16720 + }, + { + "epoch": 28.28, + "learning_rate": 5.5786666666666666e-05, + "loss": 4.1358, + "step": 16740 + }, + { + "epoch": 28.31, + "learning_rate": 5.5853333333333336e-05, + "loss": 4.1243, + "step": 16760 + }, + { + "epoch": 28.34, + "learning_rate": 5.592000000000001e-05, + "loss": 4.1366, + "step": 16780 + }, + { + "epoch": 28.38, + "learning_rate": 5.5986666666666664e-05, + "loss": 4.1266, + "step": 16800 + }, + { + "epoch": 28.41, + "learning_rate": 5.6053333333333334e-05, + "loss": 4.112, + "step": 16820 + }, + { + "epoch": 28.45, + "learning_rate": 5.6120000000000005e-05, + "loss": 4.1305, + "step": 16840 + }, + { + "epoch": 28.48, + "learning_rate": 5.618666666666667e-05, + "loss": 4.1163, + "step": 16860 + }, + { + "epoch": 28.51, + "learning_rate": 5.6250000000000005e-05, + "loss": 4.1156, + "step": 16880 + }, + { + "epoch": 28.55, + "learning_rate": 5.6316666666666676e-05, + "loss": 4.1357, + "step": 16900 + }, + { + "epoch": 28.58, + "learning_rate": 5.638333333333333e-05, + "loss": 4.1129, + "step": 16920 + }, + { + "epoch": 28.61, + "learning_rate": 5.645e-05, + "loss": 4.1258, + "step": 16940 + }, + { + "epoch": 28.65, + "learning_rate": 5.6516666666666674e-05, + "loss": 4.1345, + "step": 16960 + }, + { + "epoch": 28.68, + "learning_rate": 5.658333333333333e-05, + "loss": 4.0953, + "step": 16980 + }, + { + "epoch": 28.72, + "learning_rate": 5.665e-05, + "loss": 4.1014, + "step": 17000 + }, + { + "epoch": 28.72, + "eval_loss": 3.9104785919189453, + "eval_runtime": 47.1515, + "eval_samples_per_second": 20.975, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.0052282058364623105, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.02501951527209312, + "eval_tse_type": 8.426019548365353e-05, + "step": 17000 + }, + { + "epoch": 28.75, + "learning_rate": 5.671666666666667e-05, + "loss": 4.0944, + "step": 17020 + }, + { + "epoch": 28.78, + "learning_rate": 5.6783333333333336e-05, + "loss": 4.1039, + "step": 17040 + }, + { + "epoch": 28.82, + "learning_rate": 5.6850000000000006e-05, + "loss": 4.0933, + "step": 17060 + }, + { + "epoch": 28.85, + "learning_rate": 5.691666666666668e-05, + "loss": 4.1013, + "step": 17080 + }, + { + "epoch": 28.89, + "learning_rate": 5.6983333333333334e-05, + "loss": 4.0946, + "step": 17100 + }, + { + "epoch": 28.92, + "learning_rate": 5.7050000000000004e-05, + "loss": 4.0965, + "step": 17120 + }, + { + "epoch": 28.95, + "learning_rate": 5.7116666666666675e-05, + "loss": 4.0999, + "step": 17140 + }, + { + "epoch": 28.99, + "learning_rate": 5.718333333333333e-05, + "loss": 4.0836, + "step": 17160 + }, + { + "epoch": 29.02, + "learning_rate": 5.725e-05, + "loss": 4.0887, + "step": 17180 + }, + { + "epoch": 29.05, + "learning_rate": 5.731666666666667e-05, + "loss": 4.0653, + "step": 17200 + }, + { + "epoch": 29.09, + "learning_rate": 5.738333333333334e-05, + "loss": 4.0566, + "step": 17220 + }, + { + "epoch": 29.12, + "learning_rate": 5.745e-05, + "loss": 4.0655, + "step": 17240 + }, + { + "epoch": 29.16, + "learning_rate": 5.751666666666667e-05, + "loss": 4.0734, + "step": 17260 + }, + { + "epoch": 29.19, + "learning_rate": 5.7583333333333335e-05, + "loss": 4.0637, + "step": 17280 + }, + { + "epoch": 29.22, + "learning_rate": 5.7650000000000005e-05, + "loss": 4.0756, + "step": 17300 + }, + { + "epoch": 29.26, + "learning_rate": 5.7716666666666676e-05, + "loss": 4.0706, + "step": 17320 + }, + { + "epoch": 29.29, + "learning_rate": 5.778333333333333e-05, + "loss": 4.0541, + "step": 17340 + }, + { + "epoch": 29.32, + "learning_rate": 5.7850000000000003e-05, + "loss": 4.0837, + "step": 17360 + }, + { + "epoch": 29.36, + "learning_rate": 5.7916666666666674e-05, + "loss": 4.0597, + "step": 17380 + }, + { + "epoch": 29.39, + "learning_rate": 5.798333333333333e-05, + "loss": 4.0624, + "step": 17400 + }, + { + "epoch": 29.43, + "learning_rate": 5.805e-05, + "loss": 4.0607, + "step": 17420 + }, + { + "epoch": 29.46, + "learning_rate": 5.811666666666667e-05, + "loss": 4.0493, + "step": 17440 + }, + { + "epoch": 29.49, + "learning_rate": 5.8183333333333336e-05, + "loss": 4.0562, + "step": 17460 + }, + { + "epoch": 29.53, + "learning_rate": 5.8250000000000006e-05, + "loss": 4.0473, + "step": 17480 + }, + { + "epoch": 29.56, + "learning_rate": 5.831666666666668e-05, + "loss": 4.0315, + "step": 17500 + }, + { + "epoch": 29.59, + "learning_rate": 5.8383333333333334e-05, + "loss": 4.0591, + "step": 17520 + }, + { + "epoch": 29.63, + "learning_rate": 5.8450000000000005e-05, + "loss": 4.042, + "step": 17540 + }, + { + "epoch": 29.66, + "learning_rate": 5.851666666666666e-05, + "loss": 4.059, + "step": 17560 + }, + { + "epoch": 29.7, + "learning_rate": 5.858333333333333e-05, + "loss": 4.0502, + "step": 17580 + }, + { + "epoch": 29.73, + "learning_rate": 5.865e-05, + "loss": 4.0425, + "step": 17600 + }, + { + "epoch": 29.76, + "learning_rate": 5.8716666666666666e-05, + "loss": 4.0501, + "step": 17620 + }, + { + "epoch": 29.8, + "learning_rate": 5.878333333333334e-05, + "loss": 4.0398, + "step": 17640 + }, + { + "epoch": 29.83, + "learning_rate": 5.885000000000001e-05, + "loss": 4.0589, + "step": 17660 + }, + { + "epoch": 29.86, + "learning_rate": 5.8916666666666664e-05, + "loss": 4.0511, + "step": 17680 + }, + { + "epoch": 29.9, + "learning_rate": 5.8983333333333335e-05, + "loss": 4.0441, + "step": 17700 + }, + { + "epoch": 29.93, + "learning_rate": 5.9050000000000006e-05, + "loss": 4.0423, + "step": 17720 + }, + { + "epoch": 29.97, + "learning_rate": 5.911666666666666e-05, + "loss": 4.0582, + "step": 17740 + }, + { + "epoch": 30.0, + "learning_rate": 5.918333333333333e-05, + "loss": 4.0172, + "step": 17760 + }, + { + "epoch": 30.03, + "learning_rate": 5.9250000000000004e-05, + "loss": 3.9971, + "step": 17780 + }, + { + "epoch": 30.07, + "learning_rate": 5.931666666666667e-05, + "loss": 4.0049, + "step": 17800 + }, + { + "epoch": 30.1, + "learning_rate": 5.938333333333334e-05, + "loss": 4.0103, + "step": 17820 + }, + { + "epoch": 30.14, + "learning_rate": 5.945000000000001e-05, + "loss": 4.0012, + "step": 17840 + }, + { + "epoch": 30.17, + "learning_rate": 5.9516666666666665e-05, + "loss": 4.0021, + "step": 17860 + }, + { + "epoch": 30.2, + "learning_rate": 5.9583333333333336e-05, + "loss": 4.0068, + "step": 17880 + }, + { + "epoch": 30.24, + "learning_rate": 5.9650000000000007e-05, + "loss": 3.9981, + "step": 17900 + }, + { + "epoch": 30.27, + "learning_rate": 5.9716666666666664e-05, + "loss": 3.9985, + "step": 17920 + }, + { + "epoch": 30.3, + "learning_rate": 5.9783333333333334e-05, + "loss": 4.016, + "step": 17940 + }, + { + "epoch": 30.34, + "learning_rate": 5.9850000000000005e-05, + "loss": 4.0167, + "step": 17960 + }, + { + "epoch": 30.37, + "learning_rate": 5.991666666666667e-05, + "loss": 4.0145, + "step": 17980 + }, + { + "epoch": 30.41, + "learning_rate": 5.998333333333334e-05, + "loss": 3.9873, + "step": 18000 + }, + { + "epoch": 30.41, + "eval_loss": 3.824690818786621, + "eval_runtime": 47.1697, + "eval_samples_per_second": 20.967, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.00881404733764158, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.028798604985216605, + "eval_tse_type": 0.00015096618357487925, + "step": 18000 + }, + { + "epoch": 30.44, + "learning_rate": 6.005000000000001e-05, + "loss": 3.9943, + "step": 18020 + }, + { + "epoch": 30.47, + "learning_rate": 6.0116666666666667e-05, + "loss": 4.0006, + "step": 18040 + }, + { + "epoch": 30.51, + "learning_rate": 6.018333333333334e-05, + "loss": 4.0087, + "step": 18060 + }, + { + "epoch": 30.54, + "learning_rate": 6.025000000000001e-05, + "loss": 3.996, + "step": 18080 + }, + { + "epoch": 30.57, + "learning_rate": 6.0316666666666665e-05, + "loss": 3.9905, + "step": 18100 + }, + { + "epoch": 30.61, + "learning_rate": 6.0383333333333335e-05, + "loss": 3.9765, + "step": 18120 + }, + { + "epoch": 30.64, + "learning_rate": 6.0450000000000006e-05, + "loss": 3.9967, + "step": 18140 + }, + { + "epoch": 30.68, + "learning_rate": 6.051666666666666e-05, + "loss": 3.997, + "step": 18160 + }, + { + "epoch": 30.71, + "learning_rate": 6.058333333333333e-05, + "loss": 3.9924, + "step": 18180 + }, + { + "epoch": 30.74, + "learning_rate": 6.0650000000000004e-05, + "loss": 3.9955, + "step": 18200 + }, + { + "epoch": 30.78, + "learning_rate": 6.071666666666667e-05, + "loss": 3.9964, + "step": 18220 + }, + { + "epoch": 30.81, + "learning_rate": 6.078333333333334e-05, + "loss": 4.0035, + "step": 18240 + }, + { + "epoch": 30.84, + "learning_rate": 6.085000000000001e-05, + "loss": 3.985, + "step": 18260 + }, + { + "epoch": 30.88, + "learning_rate": 6.0916666666666666e-05, + "loss": 3.9823, + "step": 18280 + }, + { + "epoch": 30.91, + "learning_rate": 6.0983333333333336e-05, + "loss": 3.9668, + "step": 18300 + }, + { + "epoch": 30.95, + "learning_rate": 6.105e-05, + "loss": 3.9813, + "step": 18320 + }, + { + "epoch": 30.98, + "learning_rate": 6.111666666666667e-05, + "loss": 3.9721, + "step": 18340 + }, + { + "epoch": 31.01, + "learning_rate": 6.118333333333333e-05, + "loss": 3.9785, + "step": 18360 + }, + { + "epoch": 31.05, + "learning_rate": 6.125000000000001e-05, + "loss": 3.9517, + "step": 18380 + }, + { + "epoch": 31.08, + "learning_rate": 6.131666666666666e-05, + "loss": 3.9356, + "step": 18400 + }, + { + "epoch": 31.11, + "learning_rate": 6.138333333333334e-05, + "loss": 3.9674, + "step": 18420 + }, + { + "epoch": 31.15, + "learning_rate": 6.145e-05, + "loss": 3.9513, + "step": 18440 + }, + { + "epoch": 31.18, + "learning_rate": 6.151666666666667e-05, + "loss": 3.9554, + "step": 18460 + }, + { + "epoch": 31.22, + "learning_rate": 6.158e-05, + "loss": 3.9747, + "step": 18480 + }, + { + "epoch": 31.25, + "learning_rate": 6.164666666666668e-05, + "loss": 3.9409, + "step": 18500 + }, + { + "epoch": 31.28, + "learning_rate": 6.171333333333333e-05, + "loss": 3.9338, + "step": 18520 + }, + { + "epoch": 31.32, + "learning_rate": 6.178000000000001e-05, + "loss": 3.9511, + "step": 18540 + }, + { + "epoch": 31.35, + "learning_rate": 6.184666666666667e-05, + "loss": 3.9455, + "step": 18560 + }, + { + "epoch": 31.39, + "learning_rate": 6.191333333333334e-05, + "loss": 3.9652, + "step": 18580 + }, + { + "epoch": 31.42, + "learning_rate": 6.198e-05, + "loss": 3.948, + "step": 18600 + }, + { + "epoch": 31.45, + "learning_rate": 6.204666666666668e-05, + "loss": 3.9418, + "step": 18620 + }, + { + "epoch": 31.49, + "learning_rate": 6.211333333333334e-05, + "loss": 3.9469, + "step": 18640 + }, + { + "epoch": 31.52, + "learning_rate": 6.218e-05, + "loss": 3.9209, + "step": 18660 + }, + { + "epoch": 31.55, + "learning_rate": 6.224666666666667e-05, + "loss": 3.9364, + "step": 18680 + }, + { + "epoch": 31.59, + "learning_rate": 6.231333333333333e-05, + "loss": 3.9433, + "step": 18700 + }, + { + "epoch": 31.62, + "learning_rate": 6.238000000000001e-05, + "loss": 3.9114, + "step": 18720 + }, + { + "epoch": 31.66, + "learning_rate": 6.244666666666666e-05, + "loss": 3.937, + "step": 18740 + }, + { + "epoch": 31.69, + "learning_rate": 6.251333333333334e-05, + "loss": 3.9331, + "step": 18760 + }, + { + "epoch": 31.72, + "learning_rate": 6.258e-05, + "loss": 3.9161, + "step": 18780 + }, + { + "epoch": 31.76, + "learning_rate": 6.264666666666666e-05, + "loss": 3.9347, + "step": 18800 + }, + { + "epoch": 31.79, + "learning_rate": 6.271333333333334e-05, + "loss": 3.9473, + "step": 18820 + }, + { + "epoch": 31.82, + "learning_rate": 6.278e-05, + "loss": 3.9257, + "step": 18840 + }, + { + "epoch": 31.86, + "learning_rate": 6.284666666666667e-05, + "loss": 3.9495, + "step": 18860 + }, + { + "epoch": 31.89, + "learning_rate": 6.291333333333333e-05, + "loss": 3.9505, + "step": 18880 + }, + { + "epoch": 31.93, + "learning_rate": 6.298000000000001e-05, + "loss": 3.9255, + "step": 18900 + }, + { + "epoch": 31.96, + "learning_rate": 6.304666666666666e-05, + "loss": 3.9245, + "step": 18920 + }, + { + "epoch": 31.99, + "learning_rate": 6.311333333333334e-05, + "loss": 3.9306, + "step": 18940 + }, + { + "epoch": 32.03, + "learning_rate": 6.318e-05, + "loss": 3.8777, + "step": 18960 + }, + { + "epoch": 32.06, + "learning_rate": 6.324666666666667e-05, + "loss": 3.8864, + "step": 18980 + }, + { + "epoch": 32.09, + "learning_rate": 6.331333333333333e-05, + "loss": 3.9045, + "step": 19000 + }, + { + "epoch": 32.09, + "eval_loss": 3.742777109146118, + "eval_runtime": 47.4769, + "eval_samples_per_second": 20.831, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.007489709142089086, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.021463110749665632, + "eval_tse_type": 0.0001351673969216942, + "step": 19000 + }, + { + "epoch": 32.13, + "learning_rate": 6.338e-05, + "loss": 3.8955, + "step": 19020 + }, + { + "epoch": 32.16, + "learning_rate": 6.344666666666667e-05, + "loss": 3.8919, + "step": 19040 + }, + { + "epoch": 32.2, + "learning_rate": 6.351333333333333e-05, + "loss": 3.8977, + "step": 19060 + }, + { + "epoch": 32.23, + "learning_rate": 6.358000000000001e-05, + "loss": 3.8959, + "step": 19080 + }, + { + "epoch": 32.26, + "learning_rate": 6.364666666666666e-05, + "loss": 3.8859, + "step": 19100 + }, + { + "epoch": 32.3, + "learning_rate": 6.371333333333334e-05, + "loss": 3.8999, + "step": 19120 + }, + { + "epoch": 32.33, + "learning_rate": 6.378e-05, + "loss": 3.8998, + "step": 19140 + }, + { + "epoch": 32.36, + "learning_rate": 6.384666666666667e-05, + "loss": 3.885, + "step": 19160 + }, + { + "epoch": 32.4, + "learning_rate": 6.391333333333333e-05, + "loss": 3.9161, + "step": 19180 + }, + { + "epoch": 32.43, + "learning_rate": 6.398000000000001e-05, + "loss": 3.9012, + "step": 19200 + }, + { + "epoch": 32.47, + "learning_rate": 6.404666666666667e-05, + "loss": 3.8793, + "step": 19220 + }, + { + "epoch": 32.5, + "learning_rate": 6.411333333333333e-05, + "loss": 3.8778, + "step": 19240 + }, + { + "epoch": 32.53, + "learning_rate": 6.418000000000001e-05, + "loss": 3.8977, + "step": 19260 + }, + { + "epoch": 32.57, + "learning_rate": 6.424666666666666e-05, + "loss": 3.8798, + "step": 19280 + }, + { + "epoch": 32.6, + "learning_rate": 6.431333333333334e-05, + "loss": 3.8643, + "step": 19300 + }, + { + "epoch": 32.64, + "learning_rate": 6.438e-05, + "loss": 3.8896, + "step": 19320 + }, + { + "epoch": 32.67, + "learning_rate": 6.444666666666667e-05, + "loss": 3.8901, + "step": 19340 + }, + { + "epoch": 32.7, + "learning_rate": 6.451333333333333e-05, + "loss": 3.8805, + "step": 19360 + }, + { + "epoch": 32.74, + "learning_rate": 6.458000000000001e-05, + "loss": 3.892, + "step": 19380 + }, + { + "epoch": 32.77, + "learning_rate": 6.464666666666667e-05, + "loss": 3.8897, + "step": 19400 + }, + { + "epoch": 32.8, + "learning_rate": 6.471333333333334e-05, + "loss": 3.8872, + "step": 19420 + }, + { + "epoch": 32.84, + "learning_rate": 6.478000000000001e-05, + "loss": 3.9123, + "step": 19440 + }, + { + "epoch": 32.87, + "learning_rate": 6.484666666666666e-05, + "loss": 3.8569, + "step": 19460 + }, + { + "epoch": 32.91, + "learning_rate": 6.491333333333334e-05, + "loss": 3.865, + "step": 19480 + }, + { + "epoch": 32.94, + "learning_rate": 6.498e-05, + "loss": 3.8848, + "step": 19500 + }, + { + "epoch": 32.97, + "learning_rate": 6.504666666666667e-05, + "loss": 3.8906, + "step": 19520 + }, + { + "epoch": 33.01, + "learning_rate": 6.511333333333333e-05, + "loss": 3.8834, + "step": 19540 + }, + { + "epoch": 33.04, + "learning_rate": 6.518000000000001e-05, + "loss": 3.8466, + "step": 19560 + }, + { + "epoch": 33.07, + "learning_rate": 6.524666666666667e-05, + "loss": 3.8549, + "step": 19580 + }, + { + "epoch": 33.11, + "learning_rate": 6.531333333333334e-05, + "loss": 3.853, + "step": 19600 + }, + { + "epoch": 33.14, + "learning_rate": 6.538000000000001e-05, + "loss": 3.8348, + "step": 19620 + }, + { + "epoch": 33.18, + "learning_rate": 6.544666666666666e-05, + "loss": 3.8324, + "step": 19640 + }, + { + "epoch": 33.21, + "learning_rate": 6.551333333333334e-05, + "loss": 3.8482, + "step": 19660 + }, + { + "epoch": 33.24, + "learning_rate": 6.558e-05, + "loss": 3.8503, + "step": 19680 + }, + { + "epoch": 33.28, + "learning_rate": 6.564666666666667e-05, + "loss": 3.8215, + "step": 19700 + }, + { + "epoch": 33.31, + "learning_rate": 6.571333333333333e-05, + "loss": 3.8288, + "step": 19720 + }, + { + "epoch": 33.34, + "learning_rate": 6.578000000000001e-05, + "loss": 3.8424, + "step": 19740 + }, + { + "epoch": 33.38, + "learning_rate": 6.584666666666667e-05, + "loss": 3.827, + "step": 19760 + }, + { + "epoch": 33.41, + "learning_rate": 6.591333333333334e-05, + "loss": 3.8296, + "step": 19780 + }, + { + "epoch": 33.45, + "learning_rate": 6.598e-05, + "loss": 3.8509, + "step": 19800 + }, + { + "epoch": 33.48, + "learning_rate": 6.604666666666667e-05, + "loss": 3.8423, + "step": 19820 + }, + { + "epoch": 33.51, + "learning_rate": 6.611333333333334e-05, + "loss": 3.8294, + "step": 19840 + }, + { + "epoch": 33.55, + "learning_rate": 6.618e-05, + "loss": 3.8269, + "step": 19860 + }, + { + "epoch": 33.58, + "learning_rate": 6.624666666666667e-05, + "loss": 3.8302, + "step": 19880 + }, + { + "epoch": 33.61, + "learning_rate": 6.631333333333333e-05, + "loss": 3.8409, + "step": 19900 + }, + { + "epoch": 33.65, + "learning_rate": 6.638e-05, + "loss": 3.8368, + "step": 19920 + }, + { + "epoch": 33.68, + "learning_rate": 6.644666666666666e-05, + "loss": 3.8509, + "step": 19940 + }, + { + "epoch": 33.72, + "learning_rate": 6.651333333333334e-05, + "loss": 3.8311, + "step": 19960 + }, + { + "epoch": 33.75, + "learning_rate": 6.658e-05, + "loss": 3.8274, + "step": 19980 + }, + { + "epoch": 33.78, + "learning_rate": 6.664666666666667e-05, + "loss": 3.8349, + "step": 20000 + }, + { + "epoch": 33.78, + "eval_loss": 3.672532081604004, + "eval_runtime": 47.3457, + "eval_samples_per_second": 20.889, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.006003945345159851, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.030795479673931526, + "eval_tse_type": 3.86192562633412e-05, + "step": 20000 + }, + { + "epoch": 33.82, + "learning_rate": 6.671333333333334e-05, + "loss": 3.844, + "step": 20020 + }, + { + "epoch": 33.85, + "learning_rate": 6.678e-05, + "loss": 3.8501, + "step": 20040 + }, + { + "epoch": 33.89, + "learning_rate": 6.684666666666667e-05, + "loss": 3.8375, + "step": 20060 + }, + { + "epoch": 33.92, + "learning_rate": 6.691333333333334e-05, + "loss": 3.8277, + "step": 20080 + }, + { + "epoch": 33.95, + "learning_rate": 6.698e-05, + "loss": 3.8274, + "step": 20100 + }, + { + "epoch": 33.99, + "learning_rate": 6.704666666666666e-05, + "loss": 3.8322, + "step": 20120 + }, + { + "epoch": 34.02, + "learning_rate": 6.711333333333334e-05, + "loss": 3.7911, + "step": 20140 + }, + { + "epoch": 34.05, + "learning_rate": 6.718e-05, + "loss": 3.7825, + "step": 20160 + }, + { + "epoch": 34.09, + "learning_rate": 6.724666666666667e-05, + "loss": 3.8013, + "step": 20180 + }, + { + "epoch": 34.12, + "learning_rate": 6.731333333333335e-05, + "loss": 3.7964, + "step": 20200 + }, + { + "epoch": 34.16, + "learning_rate": 6.738e-05, + "loss": 3.7865, + "step": 20220 + }, + { + "epoch": 34.19, + "learning_rate": 6.744666666666667e-05, + "loss": 3.8097, + "step": 20240 + }, + { + "epoch": 34.22, + "learning_rate": 6.751333333333334e-05, + "loss": 3.8013, + "step": 20260 + }, + { + "epoch": 34.26, + "learning_rate": 6.758e-05, + "loss": 3.783, + "step": 20280 + }, + { + "epoch": 34.29, + "learning_rate": 6.764666666666666e-05, + "loss": 3.7939, + "step": 20300 + }, + { + "epoch": 34.32, + "learning_rate": 6.771333333333334e-05, + "loss": 3.8121, + "step": 20320 + }, + { + "epoch": 34.36, + "learning_rate": 6.778e-05, + "loss": 3.7952, + "step": 20340 + }, + { + "epoch": 34.39, + "learning_rate": 6.784666666666667e-05, + "loss": 3.7813, + "step": 20360 + }, + { + "epoch": 34.43, + "learning_rate": 6.791333333333335e-05, + "loss": 3.7881, + "step": 20380 + }, + { + "epoch": 34.46, + "learning_rate": 6.798e-05, + "loss": 3.7732, + "step": 20400 + }, + { + "epoch": 34.49, + "learning_rate": 6.804666666666667e-05, + "loss": 3.8046, + "step": 20420 + }, + { + "epoch": 34.53, + "learning_rate": 6.811333333333334e-05, + "loss": 3.8077, + "step": 20440 + }, + { + "epoch": 34.56, + "learning_rate": 6.818e-05, + "loss": 3.784, + "step": 20460 + }, + { + "epoch": 34.59, + "learning_rate": 6.824666666666666e-05, + "loss": 3.7724, + "step": 20480 + }, + { + "epoch": 34.63, + "learning_rate": 6.831333333333334e-05, + "loss": 3.8007, + "step": 20500 + }, + { + "epoch": 34.66, + "learning_rate": 6.837666666666667e-05, + "loss": 3.7861, + "step": 20520 + }, + { + "epoch": 34.7, + "learning_rate": 6.844333333333334e-05, + "loss": 3.7695, + "step": 20540 + }, + { + "epoch": 34.73, + "learning_rate": 6.851e-05, + "loss": 3.7881, + "step": 20560 + }, + { + "epoch": 34.76, + "learning_rate": 6.857666666666667e-05, + "loss": 3.7699, + "step": 20580 + }, + { + "epoch": 34.8, + "learning_rate": 6.864333333333333e-05, + "loss": 3.8045, + "step": 20600 + }, + { + "epoch": 34.83, + "learning_rate": 6.871000000000001e-05, + "loss": 3.7762, + "step": 20620 + }, + { + "epoch": 34.86, + "learning_rate": 6.877666666666666e-05, + "loss": 3.7835, + "step": 20640 + }, + { + "epoch": 34.9, + "learning_rate": 6.884333333333334e-05, + "loss": 3.781, + "step": 20660 + }, + { + "epoch": 34.93, + "learning_rate": 6.891e-05, + "loss": 3.7927, + "step": 20680 + }, + { + "epoch": 34.97, + "learning_rate": 6.897666666666667e-05, + "loss": 3.7805, + "step": 20700 + }, + { + "epoch": 35.0, + "learning_rate": 6.904333333333334e-05, + "loss": 3.7688, + "step": 20720 + }, + { + "epoch": 35.03, + "learning_rate": 6.911000000000001e-05, + "loss": 3.7479, + "step": 20740 + }, + { + "epoch": 35.07, + "learning_rate": 6.917666666666667e-05, + "loss": 3.7441, + "step": 20760 + }, + { + "epoch": 35.1, + "learning_rate": 6.924333333333334e-05, + "loss": 3.7364, + "step": 20780 + }, + { + "epoch": 35.14, + "learning_rate": 6.931000000000001e-05, + "loss": 3.7347, + "step": 20800 + }, + { + "epoch": 35.17, + "learning_rate": 6.937666666666666e-05, + "loss": 3.7438, + "step": 20820 + }, + { + "epoch": 35.2, + "learning_rate": 6.944333333333334e-05, + "loss": 3.7344, + "step": 20840 + }, + { + "epoch": 35.24, + "learning_rate": 6.951e-05, + "loss": 3.721, + "step": 20860 + }, + { + "epoch": 35.27, + "learning_rate": 6.957666666666667e-05, + "loss": 3.7525, + "step": 20880 + }, + { + "epoch": 35.3, + "learning_rate": 6.964333333333334e-05, + "loss": 3.7451, + "step": 20900 + }, + { + "epoch": 35.34, + "learning_rate": 6.971000000000001e-05, + "loss": 3.7693, + "step": 20920 + }, + { + "epoch": 35.37, + "learning_rate": 6.977666666666667e-05, + "loss": 3.7413, + "step": 20940 + }, + { + "epoch": 35.41, + "learning_rate": 6.984333333333334e-05, + "loss": 3.7478, + "step": 20960 + }, + { + "epoch": 35.44, + "learning_rate": 6.991000000000001e-05, + "loss": 3.7363, + "step": 20980 + }, + { + "epoch": 35.47, + "learning_rate": 6.997666666666666e-05, + "loss": 3.7421, + "step": 21000 + }, + { + "epoch": 35.47, + "eval_loss": 3.6117138862609863, + "eval_runtime": 50.7962, + "eval_samples_per_second": 19.47, + "eval_steps_per_second": 0.118, + "eval_tse_ndup": 0.009148751032328753, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.022986131700466112, + "eval_tse_type": 5.4418042916526235e-05, + "step": 21000 + }, + { + "epoch": 35.51, + "learning_rate": 7.004333333333334e-05, + "loss": 3.7338, + "step": 21020 + }, + { + "epoch": 35.54, + "learning_rate": 7.011e-05, + "loss": 3.7443, + "step": 21040 + }, + { + "epoch": 35.57, + "learning_rate": 7.017666666666667e-05, + "loss": 3.7358, + "step": 21060 + }, + { + "epoch": 35.61, + "learning_rate": 7.024333333333333e-05, + "loss": 3.7634, + "step": 21080 + }, + { + "epoch": 35.64, + "learning_rate": 7.031e-05, + "loss": 3.7469, + "step": 21100 + }, + { + "epoch": 35.68, + "learning_rate": 7.037666666666667e-05, + "loss": 3.7311, + "step": 21120 + }, + { + "epoch": 35.71, + "learning_rate": 7.044333333333334e-05, + "loss": 3.7281, + "step": 21140 + }, + { + "epoch": 35.74, + "learning_rate": 7.051e-05, + "loss": 3.7521, + "step": 21160 + }, + { + "epoch": 35.78, + "learning_rate": 7.057666666666666e-05, + "loss": 3.7502, + "step": 21180 + }, + { + "epoch": 35.81, + "learning_rate": 7.064333333333334e-05, + "loss": 3.7454, + "step": 21200 + }, + { + "epoch": 35.84, + "learning_rate": 7.070999999999999e-05, + "loss": 3.7254, + "step": 21220 + }, + { + "epoch": 35.88, + "learning_rate": 7.077666666666667e-05, + "loss": 3.7302, + "step": 21240 + }, + { + "epoch": 35.91, + "learning_rate": 7.084333333333333e-05, + "loss": 3.7285, + "step": 21260 + }, + { + "epoch": 35.95, + "learning_rate": 7.091e-05, + "loss": 3.7413, + "step": 21280 + }, + { + "epoch": 35.98, + "learning_rate": 7.097666666666667e-05, + "loss": 3.7399, + "step": 21300 + }, + { + "epoch": 36.01, + "learning_rate": 7.104333333333334e-05, + "loss": 3.7041, + "step": 21320 + }, + { + "epoch": 36.05, + "learning_rate": 7.111e-05, + "loss": 3.6866, + "step": 21340 + }, + { + "epoch": 36.08, + "learning_rate": 7.117666666666667e-05, + "loss": 3.6953, + "step": 21360 + }, + { + "epoch": 36.11, + "learning_rate": 7.124333333333334e-05, + "loss": 3.692, + "step": 21380 + }, + { + "epoch": 36.15, + "learning_rate": 7.130999999999999e-05, + "loss": 3.7086, + "step": 21400 + }, + { + "epoch": 36.18, + "learning_rate": 7.137666666666667e-05, + "loss": 3.7095, + "step": 21420 + }, + { + "epoch": 36.22, + "learning_rate": 7.144333333333333e-05, + "loss": 3.7011, + "step": 21440 + }, + { + "epoch": 36.25, + "learning_rate": 7.151e-05, + "loss": 3.7144, + "step": 21460 + }, + { + "epoch": 36.28, + "learning_rate": 7.157666666666668e-05, + "loss": 3.6975, + "step": 21480 + }, + { + "epoch": 36.32, + "learning_rate": 7.164333333333334e-05, + "loss": 3.6789, + "step": 21500 + }, + { + "epoch": 36.35, + "learning_rate": 7.171e-05, + "loss": 3.7143, + "step": 21520 + }, + { + "epoch": 36.39, + "learning_rate": 7.177666666666667e-05, + "loss": 3.687, + "step": 21540 + }, + { + "epoch": 36.42, + "learning_rate": 7.184333333333334e-05, + "loss": 3.7027, + "step": 21560 + }, + { + "epoch": 36.45, + "learning_rate": 7.191e-05, + "loss": 3.6983, + "step": 21580 + }, + { + "epoch": 36.49, + "learning_rate": 7.197666666666667e-05, + "loss": 3.705, + "step": 21600 + }, + { + "epoch": 36.52, + "learning_rate": 7.204333333333334e-05, + "loss": 3.6732, + "step": 21620 + }, + { + "epoch": 36.55, + "learning_rate": 7.211e-05, + "loss": 3.7052, + "step": 21640 + }, + { + "epoch": 36.59, + "learning_rate": 7.217666666666668e-05, + "loss": 3.6802, + "step": 21660 + }, + { + "epoch": 36.62, + "learning_rate": 7.224333333333334e-05, + "loss": 3.6838, + "step": 21680 + }, + { + "epoch": 36.66, + "learning_rate": 7.231e-05, + "loss": 3.7156, + "step": 21700 + }, + { + "epoch": 36.69, + "learning_rate": 7.237666666666667e-05, + "loss": 3.6974, + "step": 21720 + }, + { + "epoch": 36.72, + "learning_rate": 7.244333333333335e-05, + "loss": 3.6858, + "step": 21740 + }, + { + "epoch": 36.76, + "learning_rate": 7.251e-05, + "loss": 3.6949, + "step": 21760 + }, + { + "epoch": 36.79, + "learning_rate": 7.257666666666667e-05, + "loss": 3.6842, + "step": 21780 + }, + { + "epoch": 36.82, + "learning_rate": 7.264333333333334e-05, + "loss": 3.6957, + "step": 21800 + }, + { + "epoch": 36.86, + "learning_rate": 7.271e-05, + "loss": 3.6844, + "step": 21820 + }, + { + "epoch": 36.89, + "learning_rate": 7.277666666666668e-05, + "loss": 3.681, + "step": 21840 + }, + { + "epoch": 36.93, + "learning_rate": 7.284333333333334e-05, + "loss": 3.6908, + "step": 21860 + }, + { + "epoch": 36.96, + "learning_rate": 7.291e-05, + "loss": 3.6948, + "step": 21880 + }, + { + "epoch": 36.99, + "learning_rate": 7.297666666666667e-05, + "loss": 3.6721, + "step": 21900 + }, + { + "epoch": 37.03, + "learning_rate": 7.304333333333335e-05, + "loss": 3.6447, + "step": 21920 + }, + { + "epoch": 37.06, + "learning_rate": 7.311e-05, + "loss": 3.6511, + "step": 21940 + }, + { + "epoch": 37.09, + "learning_rate": 7.317666666666667e-05, + "loss": 3.6723, + "step": 21960 + }, + { + "epoch": 37.13, + "learning_rate": 7.324333333333334e-05, + "loss": 3.6579, + "step": 21980 + }, + { + "epoch": 37.16, + "learning_rate": 7.331e-05, + "loss": 3.6252, + "step": 22000 + }, + { + "epoch": 37.16, + "eval_loss": 3.553020715713501, + "eval_runtime": 50.5859, + "eval_samples_per_second": 19.551, + "eval_steps_per_second": 0.119, + "eval_tse_ndup": 0.007445739995200251, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.02299272267063124, + "eval_tse_type": 3.3352994045612854e-05, + "step": 22000 + }, + { + "epoch": 37.2, + "learning_rate": 7.337666666666666e-05, + "loss": 3.6621, + "step": 22020 + }, + { + "epoch": 37.23, + "learning_rate": 7.344333333333334e-05, + "loss": 3.6488, + "step": 22040 + }, + { + "epoch": 37.26, + "learning_rate": 7.351e-05, + "loss": 3.6586, + "step": 22060 + }, + { + "epoch": 37.3, + "learning_rate": 7.357666666666667e-05, + "loss": 3.6362, + "step": 22080 + }, + { + "epoch": 37.33, + "learning_rate": 7.364333333333335e-05, + "loss": 3.6319, + "step": 22100 + }, + { + "epoch": 37.36, + "learning_rate": 7.371e-05, + "loss": 3.6606, + "step": 22120 + }, + { + "epoch": 37.4, + "learning_rate": 7.377666666666667e-05, + "loss": 3.6556, + "step": 22140 + }, + { + "epoch": 37.43, + "learning_rate": 7.384333333333334e-05, + "loss": 3.641, + "step": 22160 + }, + { + "epoch": 37.47, + "learning_rate": 7.391e-05, + "loss": 3.6389, + "step": 22180 + }, + { + "epoch": 37.5, + "learning_rate": 7.397666666666667e-05, + "loss": 3.6665, + "step": 22200 + }, + { + "epoch": 37.53, + "learning_rate": 7.404333333333334e-05, + "loss": 3.6643, + "step": 22220 + }, + { + "epoch": 37.57, + "learning_rate": 7.411000000000001e-05, + "loss": 3.6308, + "step": 22240 + }, + { + "epoch": 37.6, + "learning_rate": 7.417666666666667e-05, + "loss": 3.6632, + "step": 22260 + }, + { + "epoch": 37.64, + "learning_rate": 7.424333333333333e-05, + "loss": 3.6501, + "step": 22280 + }, + { + "epoch": 37.67, + "learning_rate": 7.431e-05, + "loss": 3.6327, + "step": 22300 + }, + { + "epoch": 37.7, + "learning_rate": 7.437666666666668e-05, + "loss": 3.6671, + "step": 22320 + }, + { + "epoch": 37.74, + "learning_rate": 7.444333333333333e-05, + "loss": 3.6545, + "step": 22340 + }, + { + "epoch": 37.77, + "learning_rate": 7.451e-05, + "loss": 3.6403, + "step": 22360 + }, + { + "epoch": 37.8, + "learning_rate": 7.457666666666667e-05, + "loss": 3.6431, + "step": 22380 + }, + { + "epoch": 37.84, + "learning_rate": 7.464333333333333e-05, + "loss": 3.654, + "step": 22400 + }, + { + "epoch": 37.87, + "learning_rate": 7.471000000000001e-05, + "loss": 3.6387, + "step": 22420 + }, + { + "epoch": 37.91, + "learning_rate": 7.477666666666667e-05, + "loss": 3.6501, + "step": 22440 + }, + { + "epoch": 37.94, + "learning_rate": 7.484333333333334e-05, + "loss": 3.6447, + "step": 22460 + }, + { + "epoch": 37.97, + "learning_rate": 7.491e-05, + "loss": 3.6313, + "step": 22480 + }, + { + "epoch": 38.01, + "learning_rate": 7.497666666666668e-05, + "loss": 3.6315, + "step": 22500 + }, + { + "epoch": 38.04, + "learning_rate": 7.504333333333333e-05, + "loss": 3.6047, + "step": 22520 + }, + { + "epoch": 38.07, + "learning_rate": 7.511e-05, + "loss": 3.5979, + "step": 22540 + }, + { + "epoch": 38.11, + "learning_rate": 7.517666666666667e-05, + "loss": 3.5832, + "step": 22560 + }, + { + "epoch": 38.14, + "learning_rate": 7.524e-05, + "loss": 3.6096, + "step": 22580 + }, + { + "epoch": 38.18, + "learning_rate": 7.530666666666667e-05, + "loss": 3.5947, + "step": 22600 + }, + { + "epoch": 38.21, + "learning_rate": 7.537333333333335e-05, + "loss": 3.6199, + "step": 22620 + }, + { + "epoch": 38.24, + "learning_rate": 7.544e-05, + "loss": 3.5862, + "step": 22640 + }, + { + "epoch": 38.28, + "learning_rate": 7.550666666666667e-05, + "loss": 3.6136, + "step": 22660 + }, + { + "epoch": 38.31, + "learning_rate": 7.557333333333334e-05, + "loss": 3.6014, + "step": 22680 + }, + { + "epoch": 38.34, + "learning_rate": 7.564e-05, + "loss": 3.5834, + "step": 22700 + }, + { + "epoch": 38.38, + "learning_rate": 7.570666666666666e-05, + "loss": 3.6177, + "step": 22720 + }, + { + "epoch": 38.41, + "learning_rate": 7.577333333333334e-05, + "loss": 3.5869, + "step": 22740 + }, + { + "epoch": 38.45, + "learning_rate": 7.584e-05, + "loss": 3.6068, + "step": 22760 + }, + { + "epoch": 38.48, + "learning_rate": 7.590666666666667e-05, + "loss": 3.6107, + "step": 22780 + }, + { + "epoch": 38.51, + "learning_rate": 7.597333333333335e-05, + "loss": 3.5974, + "step": 22800 + }, + { + "epoch": 38.55, + "learning_rate": 7.604e-05, + "loss": 3.6298, + "step": 22820 + }, + { + "epoch": 38.58, + "learning_rate": 7.610666666666667e-05, + "loss": 3.6137, + "step": 22840 + }, + { + "epoch": 38.61, + "learning_rate": 7.617333333333334e-05, + "loss": 3.6151, + "step": 22860 + }, + { + "epoch": 38.65, + "learning_rate": 7.624e-05, + "loss": 3.594, + "step": 22880 + }, + { + "epoch": 38.68, + "learning_rate": 7.630666666666667e-05, + "loss": 3.6202, + "step": 22900 + }, + { + "epoch": 38.72, + "learning_rate": 7.637333333333334e-05, + "loss": 3.6162, + "step": 22920 + }, + { + "epoch": 38.75, + "learning_rate": 7.644e-05, + "loss": 3.6031, + "step": 22940 + }, + { + "epoch": 38.78, + "learning_rate": 7.650666666666667e-05, + "loss": 3.6145, + "step": 22960 + }, + { + "epoch": 38.82, + "learning_rate": 7.657333333333335e-05, + "loss": 3.6028, + "step": 22980 + }, + { + "epoch": 38.85, + "learning_rate": 7.664e-05, + "loss": 3.6068, + "step": 23000 + }, + { + "epoch": 38.85, + "eval_loss": 3.4891817569732666, + "eval_runtime": 47.271, + "eval_samples_per_second": 20.922, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.006988584288630435, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.026305780437235755, + "eval_tse_type": 9.305267063993084e-05, + "step": 23000 + }, + { + "epoch": 38.89, + "learning_rate": 7.670666666666668e-05, + "loss": 3.5932, + "step": 23020 + }, + { + "epoch": 38.92, + "learning_rate": 7.677333333333334e-05, + "loss": 3.5951, + "step": 23040 + }, + { + "epoch": 38.95, + "learning_rate": 7.684e-05, + "loss": 3.6082, + "step": 23060 + }, + { + "epoch": 38.99, + "learning_rate": 7.690666666666667e-05, + "loss": 3.5912, + "step": 23080 + }, + { + "epoch": 39.02, + "learning_rate": 7.697333333333334e-05, + "loss": 3.5746, + "step": 23100 + }, + { + "epoch": 39.05, + "learning_rate": 7.704000000000001e-05, + "loss": 3.568, + "step": 23120 + }, + { + "epoch": 39.09, + "learning_rate": 7.710666666666667e-05, + "loss": 3.557, + "step": 23140 + }, + { + "epoch": 39.12, + "learning_rate": 7.717333333333334e-05, + "loss": 3.5467, + "step": 23160 + }, + { + "epoch": 39.16, + "learning_rate": 7.724e-05, + "loss": 3.5625, + "step": 23180 + }, + { + "epoch": 39.19, + "learning_rate": 7.730666666666668e-05, + "loss": 3.5407, + "step": 23200 + }, + { + "epoch": 39.22, + "learning_rate": 7.737333333333334e-05, + "loss": 3.5679, + "step": 23220 + }, + { + "epoch": 39.26, + "learning_rate": 7.744e-05, + "loss": 3.5641, + "step": 23240 + }, + { + "epoch": 39.29, + "learning_rate": 7.750666666666667e-05, + "loss": 3.5661, + "step": 23260 + }, + { + "epoch": 39.32, + "learning_rate": 7.757333333333335e-05, + "loss": 3.554, + "step": 23280 + }, + { + "epoch": 39.36, + "learning_rate": 7.764e-05, + "loss": 3.589, + "step": 23300 + }, + { + "epoch": 39.39, + "learning_rate": 7.770666666666667e-05, + "loss": 3.5516, + "step": 23320 + }, + { + "epoch": 39.43, + "learning_rate": 7.777333333333334e-05, + "loss": 3.5599, + "step": 23340 + }, + { + "epoch": 39.46, + "learning_rate": 7.784e-05, + "loss": 3.5461, + "step": 23360 + }, + { + "epoch": 39.49, + "learning_rate": 7.790666666666668e-05, + "loss": 3.5676, + "step": 23380 + }, + { + "epoch": 39.53, + "learning_rate": 7.797333333333333e-05, + "loss": 3.5393, + "step": 23400 + }, + { + "epoch": 39.56, + "learning_rate": 7.804e-05, + "loss": 3.5668, + "step": 23420 + }, + { + "epoch": 39.59, + "learning_rate": 7.810666666666667e-05, + "loss": 3.5614, + "step": 23440 + }, + { + "epoch": 39.63, + "learning_rate": 7.817333333333333e-05, + "loss": 3.5668, + "step": 23460 + }, + { + "epoch": 39.66, + "learning_rate": 7.824e-05, + "loss": 3.558, + "step": 23480 + }, + { + "epoch": 39.7, + "learning_rate": 7.830666666666667e-05, + "loss": 3.5682, + "step": 23500 + }, + { + "epoch": 39.73, + "learning_rate": 7.837333333333334e-05, + "loss": 3.5582, + "step": 23520 + }, + { + "epoch": 39.76, + "learning_rate": 7.844e-05, + "loss": 3.5874, + "step": 23540 + }, + { + "epoch": 39.8, + "learning_rate": 7.850666666666668e-05, + "loss": 3.5481, + "step": 23560 + }, + { + "epoch": 39.83, + "learning_rate": 7.857333333333333e-05, + "loss": 3.5782, + "step": 23580 + }, + { + "epoch": 39.86, + "learning_rate": 7.864e-05, + "loss": 3.561, + "step": 23600 + }, + { + "epoch": 39.9, + "learning_rate": 7.870666666666667e-05, + "loss": 3.5682, + "step": 23620 + }, + { + "epoch": 39.93, + "learning_rate": 7.877333333333333e-05, + "loss": 3.5647, + "step": 23640 + }, + { + "epoch": 39.97, + "learning_rate": 7.884e-05, + "loss": 3.5623, + "step": 23660 + }, + { + "epoch": 40.0, + "learning_rate": 7.890666666666667e-05, + "loss": 3.5679, + "step": 23680 + }, + { + "epoch": 40.03, + "learning_rate": 7.897333333333334e-05, + "loss": 3.4902, + "step": 23700 + }, + { + "epoch": 40.07, + "learning_rate": 7.904e-05, + "loss": 3.5255, + "step": 23720 + }, + { + "epoch": 40.1, + "learning_rate": 7.910666666666668e-05, + "loss": 3.5233, + "step": 23740 + }, + { + "epoch": 40.14, + "learning_rate": 7.917333333333333e-05, + "loss": 3.4923, + "step": 23760 + }, + { + "epoch": 40.17, + "learning_rate": 7.924000000000001e-05, + "loss": 3.5447, + "step": 23780 + }, + { + "epoch": 40.2, + "learning_rate": 7.930666666666667e-05, + "loss": 3.4885, + "step": 23800 + }, + { + "epoch": 40.24, + "learning_rate": 7.937333333333333e-05, + "loss": 3.517, + "step": 23820 + }, + { + "epoch": 40.27, + "learning_rate": 7.944e-05, + "loss": 3.5105, + "step": 23840 + }, + { + "epoch": 40.3, + "learning_rate": 7.950666666666668e-05, + "loss": 3.5247, + "step": 23860 + }, + { + "epoch": 40.34, + "learning_rate": 7.957333333333334e-05, + "loss": 3.5284, + "step": 23880 + }, + { + "epoch": 40.37, + "learning_rate": 7.964e-05, + "loss": 3.5258, + "step": 23900 + }, + { + "epoch": 40.41, + "learning_rate": 7.970666666666668e-05, + "loss": 3.5195, + "step": 23920 + }, + { + "epoch": 40.44, + "learning_rate": 7.977333333333333e-05, + "loss": 3.5072, + "step": 23940 + }, + { + "epoch": 40.47, + "learning_rate": 7.984000000000001e-05, + "loss": 3.5233, + "step": 23960 + }, + { + "epoch": 40.51, + "learning_rate": 7.990666666666667e-05, + "loss": 3.5202, + "step": 23980 + }, + { + "epoch": 40.54, + "learning_rate": 7.997333333333334e-05, + "loss": 3.5159, + "step": 24000 + }, + { + "epoch": 40.54, + "eval_loss": 3.4419097900390625, + "eval_runtime": 50.9496, + "eval_samples_per_second": 19.411, + "eval_steps_per_second": 0.118, + "eval_tse_ndup": 0.007944092697009155, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.02406613373421399, + "eval_tse_type": 4.2130097741826756e-05, + "step": 24000 + }, + { + "epoch": 40.57, + "learning_rate": 8.004e-05, + "loss": 3.5179, + "step": 24020 + }, + { + "epoch": 40.61, + "learning_rate": 8.010666666666668e-05, + "loss": 3.536, + "step": 24040 + }, + { + "epoch": 40.64, + "learning_rate": 8.017333333333333e-05, + "loss": 3.5399, + "step": 24060 + }, + { + "epoch": 40.68, + "learning_rate": 8.024e-05, + "loss": 3.5033, + "step": 24080 + }, + { + "epoch": 40.71, + "learning_rate": 8.030666666666667e-05, + "loss": 3.527, + "step": 24100 + }, + { + "epoch": 40.74, + "learning_rate": 8.037333333333333e-05, + "loss": 3.5204, + "step": 24120 + }, + { + "epoch": 40.78, + "learning_rate": 8.044000000000001e-05, + "loss": 3.5163, + "step": 24140 + }, + { + "epoch": 40.81, + "learning_rate": 8.050666666666667e-05, + "loss": 3.5316, + "step": 24160 + }, + { + "epoch": 40.84, + "learning_rate": 8.057333333333334e-05, + "loss": 3.5257, + "step": 24180 + }, + { + "epoch": 40.88, + "learning_rate": 8.064e-05, + "loss": 3.5296, + "step": 24200 + }, + { + "epoch": 40.91, + "learning_rate": 8.070666666666668e-05, + "loss": 3.5129, + "step": 24220 + }, + { + "epoch": 40.95, + "learning_rate": 8.077333333333333e-05, + "loss": 3.5407, + "step": 24240 + }, + { + "epoch": 40.98, + "learning_rate": 8.084e-05, + "loss": 3.5082, + "step": 24260 + }, + { + "epoch": 41.01, + "learning_rate": 8.090666666666667e-05, + "loss": 3.4926, + "step": 24280 + }, + { + "epoch": 41.05, + "learning_rate": 8.097333333333333e-05, + "loss": 3.4739, + "step": 24300 + }, + { + "epoch": 41.08, + "learning_rate": 8.104000000000001e-05, + "loss": 3.4794, + "step": 24320 + }, + { + "epoch": 41.11, + "learning_rate": 8.110666666666667e-05, + "loss": 3.4631, + "step": 24340 + }, + { + "epoch": 41.15, + "learning_rate": 8.117333333333334e-05, + "loss": 3.4855, + "step": 24360 + }, + { + "epoch": 41.18, + "learning_rate": 8.124e-05, + "loss": 3.4837, + "step": 24380 + }, + { + "epoch": 41.22, + "learning_rate": 8.130666666666668e-05, + "loss": 3.4819, + "step": 24400 + }, + { + "epoch": 41.25, + "learning_rate": 8.137333333333333e-05, + "loss": 3.4535, + "step": 24420 + }, + { + "epoch": 41.28, + "learning_rate": 8.144e-05, + "loss": 3.4564, + "step": 24440 + }, + { + "epoch": 41.32, + "learning_rate": 8.150666666666667e-05, + "loss": 3.4928, + "step": 24460 + }, + { + "epoch": 41.35, + "learning_rate": 8.157333333333333e-05, + "loss": 3.4743, + "step": 24480 + }, + { + "epoch": 41.39, + "learning_rate": 8.164000000000001e-05, + "loss": 3.4782, + "step": 24500 + }, + { + "epoch": 41.42, + "learning_rate": 8.170666666666667e-05, + "loss": 3.4698, + "step": 24520 + }, + { + "epoch": 41.45, + "learning_rate": 8.177333333333334e-05, + "loss": 3.4868, + "step": 24540 + }, + { + "epoch": 41.49, + "learning_rate": 8.184e-05, + "loss": 3.4795, + "step": 24560 + }, + { + "epoch": 41.52, + "learning_rate": 8.190666666666668e-05, + "loss": 3.4912, + "step": 24580 + }, + { + "epoch": 41.55, + "learning_rate": 8.197333333333333e-05, + "loss": 3.4821, + "step": 24600 + }, + { + "epoch": 41.59, + "learning_rate": 8.203666666666667e-05, + "loss": 3.4764, + "step": 24620 + }, + { + "epoch": 41.62, + "learning_rate": 8.210333333333333e-05, + "loss": 3.4889, + "step": 24640 + }, + { + "epoch": 41.66, + "learning_rate": 8.217000000000001e-05, + "loss": 3.478, + "step": 24660 + }, + { + "epoch": 41.69, + "learning_rate": 8.223666666666667e-05, + "loss": 3.4772, + "step": 24680 + }, + { + "epoch": 41.72, + "learning_rate": 8.230333333333334e-05, + "loss": 3.4694, + "step": 24700 + }, + { + "epoch": 41.76, + "learning_rate": 8.237e-05, + "loss": 3.4743, + "step": 24720 + }, + { + "epoch": 41.79, + "learning_rate": 8.243666666666668e-05, + "loss": 3.4844, + "step": 24740 + }, + { + "epoch": 41.82, + "learning_rate": 8.250333333333333e-05, + "loss": 3.4657, + "step": 24760 + }, + { + "epoch": 41.86, + "learning_rate": 8.257e-05, + "loss": 3.4819, + "step": 24780 + }, + { + "epoch": 41.89, + "learning_rate": 8.263666666666667e-05, + "loss": 3.4704, + "step": 24800 + }, + { + "epoch": 41.93, + "learning_rate": 8.270333333333333e-05, + "loss": 3.4889, + "step": 24820 + }, + { + "epoch": 41.96, + "learning_rate": 8.277000000000001e-05, + "loss": 3.4977, + "step": 24840 + }, + { + "epoch": 41.99, + "learning_rate": 8.283666666666667e-05, + "loss": 3.4965, + "step": 24860 + }, + { + "epoch": 42.03, + "learning_rate": 8.290333333333334e-05, + "loss": 3.4475, + "step": 24880 + }, + { + "epoch": 42.06, + "learning_rate": 8.297e-05, + "loss": 3.4173, + "step": 24900 + }, + { + "epoch": 42.09, + "learning_rate": 8.303666666666668e-05, + "loss": 3.4348, + "step": 24920 + }, + { + "epoch": 42.13, + "learning_rate": 8.310333333333333e-05, + "loss": 3.4405, + "step": 24940 + }, + { + "epoch": 42.16, + "learning_rate": 8.317e-05, + "loss": 3.4271, + "step": 24960 + }, + { + "epoch": 42.2, + "learning_rate": 8.323666666666667e-05, + "loss": 3.4412, + "step": 24980 + }, + { + "epoch": 42.23, + "learning_rate": 8.330333333333333e-05, + "loss": 3.4345, + "step": 25000 + }, + { + "epoch": 42.23, + "eval_loss": 3.3929953575134277, + "eval_runtime": 47.2495, + "eval_samples_per_second": 20.931, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.0033075099350616337, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.028185384195513738, + "eval_tse_type": 6.626145455836754e-05, + "step": 25000 + }, + { + "epoch": 42.26, + "learning_rate": 8.337000000000001e-05, + "loss": 3.45, + "step": 25020 + }, + { + "epoch": 42.3, + "learning_rate": 8.343666666666667e-05, + "loss": 3.4294, + "step": 25040 + }, + { + "epoch": 42.33, + "learning_rate": 8.350333333333334e-05, + "loss": 3.4503, + "step": 25060 + }, + { + "epoch": 42.36, + "learning_rate": 8.357e-05, + "loss": 3.413, + "step": 25080 + }, + { + "epoch": 42.4, + "learning_rate": 8.363666666666668e-05, + "loss": 3.4294, + "step": 25100 + }, + { + "epoch": 42.43, + "learning_rate": 8.370333333333333e-05, + "loss": 3.4421, + "step": 25120 + }, + { + "epoch": 42.47, + "learning_rate": 8.377e-05, + "loss": 3.4619, + "step": 25140 + }, + { + "epoch": 42.5, + "learning_rate": 8.383666666666667e-05, + "loss": 3.4707, + "step": 25160 + }, + { + "epoch": 42.53, + "learning_rate": 8.390333333333333e-05, + "loss": 3.4346, + "step": 25180 + }, + { + "epoch": 42.57, + "learning_rate": 8.397000000000001e-05, + "loss": 3.4436, + "step": 25200 + }, + { + "epoch": 42.6, + "learning_rate": 8.403666666666667e-05, + "loss": 3.4434, + "step": 25220 + }, + { + "epoch": 42.64, + "learning_rate": 8.410333333333334e-05, + "loss": 3.4385, + "step": 25240 + }, + { + "epoch": 42.67, + "learning_rate": 8.417e-05, + "loss": 3.4598, + "step": 25260 + }, + { + "epoch": 42.7, + "learning_rate": 8.423666666666668e-05, + "loss": 3.4443, + "step": 25280 + }, + { + "epoch": 42.74, + "learning_rate": 8.430333333333333e-05, + "loss": 3.4465, + "step": 25300 + }, + { + "epoch": 42.77, + "learning_rate": 8.437000000000001e-05, + "loss": 3.4485, + "step": 25320 + }, + { + "epoch": 42.8, + "learning_rate": 8.443666666666667e-05, + "loss": 3.433, + "step": 25340 + }, + { + "epoch": 42.84, + "learning_rate": 8.450333333333333e-05, + "loss": 3.438, + "step": 25360 + }, + { + "epoch": 42.87, + "learning_rate": 8.457e-05, + "loss": 3.4344, + "step": 25380 + }, + { + "epoch": 42.91, + "learning_rate": 8.463666666666668e-05, + "loss": 3.4269, + "step": 25400 + }, + { + "epoch": 42.94, + "learning_rate": 8.470333333333334e-05, + "loss": 3.4329, + "step": 25420 + }, + { + "epoch": 42.97, + "learning_rate": 8.477e-05, + "loss": 3.4348, + "step": 25440 + }, + { + "epoch": 43.01, + "learning_rate": 8.483666666666668e-05, + "loss": 3.4257, + "step": 25460 + }, + { + "epoch": 43.04, + "learning_rate": 8.490333333333333e-05, + "loss": 3.3811, + "step": 25480 + }, + { + "epoch": 43.07, + "learning_rate": 8.497000000000001e-05, + "loss": 3.4155, + "step": 25500 + }, + { + "epoch": 43.11, + "learning_rate": 8.503666666666667e-05, + "loss": 3.4016, + "step": 25520 + }, + { + "epoch": 43.14, + "learning_rate": 8.510333333333334e-05, + "loss": 3.3912, + "step": 25540 + }, + { + "epoch": 43.18, + "learning_rate": 8.517e-05, + "loss": 3.3946, + "step": 25560 + }, + { + "epoch": 43.21, + "learning_rate": 8.523666666666668e-05, + "loss": 3.3945, + "step": 25580 + }, + { + "epoch": 43.24, + "learning_rate": 8.530333333333334e-05, + "loss": 3.4069, + "step": 25600 + }, + { + "epoch": 43.28, + "learning_rate": 8.537e-05, + "loss": 3.4032, + "step": 25620 + }, + { + "epoch": 43.31, + "learning_rate": 8.543666666666668e-05, + "loss": 3.3907, + "step": 25640 + }, + { + "epoch": 43.34, + "learning_rate": 8.550333333333333e-05, + "loss": 3.4155, + "step": 25660 + }, + { + "epoch": 43.38, + "learning_rate": 8.557000000000001e-05, + "loss": 3.4083, + "step": 25680 + }, + { + "epoch": 43.41, + "learning_rate": 8.563666666666667e-05, + "loss": 3.3871, + "step": 25700 + }, + { + "epoch": 43.45, + "learning_rate": 8.570333333333334e-05, + "loss": 3.4089, + "step": 25720 + }, + { + "epoch": 43.48, + "learning_rate": 8.577e-05, + "loss": 3.4084, + "step": 25740 + }, + { + "epoch": 43.51, + "learning_rate": 8.583666666666666e-05, + "loss": 3.3881, + "step": 25760 + }, + { + "epoch": 43.55, + "learning_rate": 8.590333333333334e-05, + "loss": 3.4156, + "step": 25780 + }, + { + "epoch": 43.58, + "learning_rate": 8.597e-05, + "loss": 3.4007, + "step": 25800 + }, + { + "epoch": 43.61, + "learning_rate": 8.603666666666667e-05, + "loss": 3.385, + "step": 25820 + }, + { + "epoch": 43.65, + "learning_rate": 8.610333333333333e-05, + "loss": 3.4057, + "step": 25840 + }, + { + "epoch": 43.68, + "learning_rate": 8.617000000000001e-05, + "loss": 3.3944, + "step": 25860 + }, + { + "epoch": 43.72, + "learning_rate": 8.623666666666666e-05, + "loss": 3.412, + "step": 25880 + }, + { + "epoch": 43.75, + "learning_rate": 8.630333333333334e-05, + "loss": 3.3974, + "step": 25900 + }, + { + "epoch": 43.78, + "learning_rate": 8.637e-05, + "loss": 3.4025, + "step": 25920 + }, + { + "epoch": 43.82, + "learning_rate": 8.643666666666667e-05, + "loss": 3.4, + "step": 25940 + }, + { + "epoch": 43.85, + "learning_rate": 8.650333333333334e-05, + "loss": 3.4062, + "step": 25960 + }, + { + "epoch": 43.89, + "learning_rate": 8.657e-05, + "loss": 3.3913, + "step": 25980 + }, + { + "epoch": 43.92, + "learning_rate": 8.663666666666667e-05, + "loss": 3.3936, + "step": 26000 + }, + { + "epoch": 43.92, + "eval_loss": 3.3477540016174316, + "eval_runtime": 47.4103, + "eval_samples_per_second": 20.86, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.004606297891629218, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.02700516843596235, + "eval_tse_type": 5.617346365576901e-05, + "step": 26000 + }, + { + "epoch": 43.95, + "learning_rate": 8.670333333333333e-05, + "loss": 3.399, + "step": 26020 + }, + { + "epoch": 43.99, + "learning_rate": 8.677000000000001e-05, + "loss": 3.4121, + "step": 26040 + }, + { + "epoch": 44.02, + "learning_rate": 8.683666666666666e-05, + "loss": 3.3813, + "step": 26060 + }, + { + "epoch": 44.05, + "learning_rate": 8.690333333333334e-05, + "loss": 3.3494, + "step": 26080 + }, + { + "epoch": 44.09, + "learning_rate": 8.697e-05, + "loss": 3.3565, + "step": 26100 + }, + { + "epoch": 44.12, + "learning_rate": 8.703666666666667e-05, + "loss": 3.3417, + "step": 26120 + }, + { + "epoch": 44.16, + "learning_rate": 8.710333333333333e-05, + "loss": 3.3536, + "step": 26140 + }, + { + "epoch": 44.19, + "learning_rate": 8.717000000000001e-05, + "loss": 3.3381, + "step": 26160 + }, + { + "epoch": 44.22, + "learning_rate": 8.723666666666667e-05, + "loss": 3.347, + "step": 26180 + }, + { + "epoch": 44.26, + "learning_rate": 8.730333333333333e-05, + "loss": 3.3678, + "step": 26200 + }, + { + "epoch": 44.29, + "learning_rate": 8.737000000000001e-05, + "loss": 3.361, + "step": 26220 + }, + { + "epoch": 44.32, + "learning_rate": 8.743666666666666e-05, + "loss": 3.3652, + "step": 26240 + }, + { + "epoch": 44.36, + "learning_rate": 8.750333333333334e-05, + "loss": 3.3427, + "step": 26260 + }, + { + "epoch": 44.39, + "learning_rate": 8.757e-05, + "loss": 3.3665, + "step": 26280 + }, + { + "epoch": 44.43, + "learning_rate": 8.763666666666667e-05, + "loss": 3.3604, + "step": 26300 + }, + { + "epoch": 44.46, + "learning_rate": 8.770333333333333e-05, + "loss": 3.3563, + "step": 26320 + }, + { + "epoch": 44.49, + "learning_rate": 8.777000000000001e-05, + "loss": 3.363, + "step": 26340 + }, + { + "epoch": 44.53, + "learning_rate": 8.783666666666667e-05, + "loss": 3.3858, + "step": 26360 + }, + { + "epoch": 44.56, + "learning_rate": 8.790333333333334e-05, + "loss": 3.3711, + "step": 26380 + }, + { + "epoch": 44.59, + "learning_rate": 8.797000000000001e-05, + "loss": 3.3703, + "step": 26400 + }, + { + "epoch": 44.63, + "learning_rate": 8.803666666666666e-05, + "loss": 3.3602, + "step": 26420 + }, + { + "epoch": 44.66, + "learning_rate": 8.810333333333334e-05, + "loss": 3.3624, + "step": 26440 + }, + { + "epoch": 44.7, + "learning_rate": 8.817e-05, + "loss": 3.3859, + "step": 26460 + }, + { + "epoch": 44.73, + "learning_rate": 8.823666666666667e-05, + "loss": 3.3696, + "step": 26480 + }, + { + "epoch": 44.76, + "learning_rate": 8.830333333333333e-05, + "loss": 3.3751, + "step": 26500 + }, + { + "epoch": 44.8, + "learning_rate": 8.837000000000001e-05, + "loss": 3.3658, + "step": 26520 + }, + { + "epoch": 44.83, + "learning_rate": 8.843666666666667e-05, + "loss": 3.3523, + "step": 26540 + }, + { + "epoch": 44.86, + "learning_rate": 8.850333333333334e-05, + "loss": 3.373, + "step": 26560 + }, + { + "epoch": 44.9, + "learning_rate": 8.857000000000001e-05, + "loss": 3.3698, + "step": 26580 + }, + { + "epoch": 44.93, + "learning_rate": 8.863666666666666e-05, + "loss": 3.3809, + "step": 26600 + }, + { + "epoch": 44.97, + "learning_rate": 8.870333333333334e-05, + "loss": 3.3766, + "step": 26620 + }, + { + "epoch": 45.0, + "learning_rate": 8.877e-05, + "loss": 3.3607, + "step": 26640 + }, + { + "epoch": 45.03, + "learning_rate": 8.883333333333333e-05, + "loss": 3.2998, + "step": 26660 + }, + { + "epoch": 45.07, + "learning_rate": 8.89e-05, + "loss": 3.3279, + "step": 26680 + }, + { + "epoch": 45.1, + "learning_rate": 8.896666666666667e-05, + "loss": 3.3109, + "step": 26700 + }, + { + "epoch": 45.14, + "learning_rate": 8.903333333333333e-05, + "loss": 3.3249, + "step": 26720 + }, + { + "epoch": 45.17, + "learning_rate": 8.910000000000001e-05, + "loss": 3.3232, + "step": 26740 + }, + { + "epoch": 45.2, + "learning_rate": 8.916666666666667e-05, + "loss": 3.3187, + "step": 26760 + }, + { + "epoch": 45.24, + "learning_rate": 8.923333333333334e-05, + "loss": 3.3111, + "step": 26780 + }, + { + "epoch": 45.27, + "learning_rate": 8.93e-05, + "loss": 3.3137, + "step": 26800 + }, + { + "epoch": 45.3, + "learning_rate": 8.936666666666668e-05, + "loss": 3.3132, + "step": 26820 + }, + { + "epoch": 45.34, + "learning_rate": 8.943333333333333e-05, + "loss": 3.3091, + "step": 26840 + }, + { + "epoch": 45.37, + "learning_rate": 8.950000000000001e-05, + "loss": 3.3241, + "step": 26860 + }, + { + "epoch": 45.41, + "learning_rate": 8.956666666666667e-05, + "loss": 3.3187, + "step": 26880 + }, + { + "epoch": 45.44, + "learning_rate": 8.963333333333333e-05, + "loss": 3.3425, + "step": 26900 + }, + { + "epoch": 45.47, + "learning_rate": 8.970000000000001e-05, + "loss": 3.3341, + "step": 26920 + }, + { + "epoch": 45.51, + "learning_rate": 8.976666666666666e-05, + "loss": 3.3377, + "step": 26940 + }, + { + "epoch": 45.54, + "learning_rate": 8.983333333333334e-05, + "loss": 3.327, + "step": 26960 + }, + { + "epoch": 45.57, + "learning_rate": 8.99e-05, + "loss": 3.3312, + "step": 26980 + }, + { + "epoch": 45.61, + "learning_rate": 8.996666666666667e-05, + "loss": 3.3293, + "step": 27000 + }, + { + "epoch": 45.61, + "eval_loss": 3.294952869415283, + "eval_runtime": 47.769, + "eval_samples_per_second": 20.704, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.005338363761522809, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.029210528467358388, + "eval_tse_type": 2.106504887091338e-05, + "step": 27000 + }, + { + "epoch": 45.64, + "learning_rate": 9.003333333333333e-05, + "loss": 3.3115, + "step": 27020 + }, + { + "epoch": 45.68, + "learning_rate": 9.010000000000001e-05, + "loss": 3.3484, + "step": 27040 + }, + { + "epoch": 45.71, + "learning_rate": 9.016666666666667e-05, + "loss": 3.3299, + "step": 27060 + }, + { + "epoch": 45.74, + "learning_rate": 9.023333333333334e-05, + "loss": 3.3394, + "step": 27080 + }, + { + "epoch": 45.78, + "learning_rate": 9.030000000000001e-05, + "loss": 3.3232, + "step": 27100 + }, + { + "epoch": 45.81, + "learning_rate": 9.036666666666666e-05, + "loss": 3.3264, + "step": 27120 + }, + { + "epoch": 45.84, + "learning_rate": 9.043333333333334e-05, + "loss": 3.3434, + "step": 27140 + }, + { + "epoch": 45.88, + "learning_rate": 9.05e-05, + "loss": 3.3458, + "step": 27160 + }, + { + "epoch": 45.91, + "learning_rate": 9.056666666666667e-05, + "loss": 3.3291, + "step": 27180 + }, + { + "epoch": 45.95, + "learning_rate": 9.063333333333333e-05, + "loss": 3.3573, + "step": 27200 + }, + { + "epoch": 45.98, + "learning_rate": 9.070000000000001e-05, + "loss": 3.3235, + "step": 27220 + }, + { + "epoch": 46.01, + "learning_rate": 9.076666666666667e-05, + "loss": 3.3058, + "step": 27240 + }, + { + "epoch": 46.05, + "learning_rate": 9.083333333333334e-05, + "loss": 3.2704, + "step": 27260 + }, + { + "epoch": 46.08, + "learning_rate": 9.090000000000001e-05, + "loss": 3.2543, + "step": 27280 + }, + { + "epoch": 46.11, + "learning_rate": 9.096666666666666e-05, + "loss": 3.2758, + "step": 27300 + }, + { + "epoch": 46.15, + "learning_rate": 9.103333333333334e-05, + "loss": 3.2722, + "step": 27320 + }, + { + "epoch": 46.18, + "learning_rate": 9.11e-05, + "loss": 3.2861, + "step": 27340 + }, + { + "epoch": 46.22, + "learning_rate": 9.116666666666667e-05, + "loss": 3.2896, + "step": 27360 + }, + { + "epoch": 46.25, + "learning_rate": 9.123333333333333e-05, + "loss": 3.2744, + "step": 27380 + }, + { + "epoch": 46.28, + "learning_rate": 9.130000000000001e-05, + "loss": 3.2888, + "step": 27400 + }, + { + "epoch": 46.32, + "learning_rate": 9.136666666666666e-05, + "loss": 3.2813, + "step": 27420 + }, + { + "epoch": 46.35, + "learning_rate": 9.143333333333334e-05, + "loss": 3.2856, + "step": 27440 + }, + { + "epoch": 46.39, + "learning_rate": 9.15e-05, + "loss": 3.2896, + "step": 27460 + }, + { + "epoch": 46.42, + "learning_rate": 9.156666666666667e-05, + "loss": 3.2784, + "step": 27480 + }, + { + "epoch": 46.45, + "learning_rate": 9.163333333333334e-05, + "loss": 3.297, + "step": 27500 + }, + { + "epoch": 46.49, + "learning_rate": 9.17e-05, + "loss": 3.2914, + "step": 27520 + }, + { + "epoch": 46.52, + "learning_rate": 9.176666666666667e-05, + "loss": 3.294, + "step": 27540 + }, + { + "epoch": 46.55, + "learning_rate": 9.183333333333333e-05, + "loss": 3.2912, + "step": 27560 + }, + { + "epoch": 46.59, + "learning_rate": 9.190000000000001e-05, + "loss": 3.3075, + "step": 27580 + }, + { + "epoch": 46.62, + "learning_rate": 9.196666666666666e-05, + "loss": 3.295, + "step": 27600 + }, + { + "epoch": 46.66, + "learning_rate": 9.203333333333334e-05, + "loss": 3.298, + "step": 27620 + }, + { + "epoch": 46.69, + "learning_rate": 9.21e-05, + "loss": 3.316, + "step": 27640 + }, + { + "epoch": 46.72, + "learning_rate": 9.216666666666667e-05, + "loss": 3.2933, + "step": 27660 + }, + { + "epoch": 46.76, + "learning_rate": 9.223333333333334e-05, + "loss": 3.3057, + "step": 27680 + }, + { + "epoch": 46.79, + "learning_rate": 9.230000000000001e-05, + "loss": 3.289, + "step": 27700 + }, + { + "epoch": 46.82, + "learning_rate": 9.236666666666667e-05, + "loss": 3.2987, + "step": 27720 + }, + { + "epoch": 46.86, + "learning_rate": 9.243333333333333e-05, + "loss": 3.2761, + "step": 27740 + }, + { + "epoch": 46.89, + "learning_rate": 9.250000000000001e-05, + "loss": 3.2988, + "step": 27760 + }, + { + "epoch": 46.93, + "learning_rate": 9.256666666666666e-05, + "loss": 3.2998, + "step": 27780 + }, + { + "epoch": 46.96, + "learning_rate": 9.263333333333334e-05, + "loss": 3.3402, + "step": 27800 + }, + { + "epoch": 46.99, + "learning_rate": 9.27e-05, + "loss": 3.301, + "step": 27820 + }, + { + "epoch": 47.03, + "learning_rate": 9.276666666666667e-05, + "loss": 3.2546, + "step": 27840 + }, + { + "epoch": 47.06, + "learning_rate": 9.283333333333334e-05, + "loss": 3.234, + "step": 27860 + }, + { + "epoch": 47.09, + "learning_rate": 9.290000000000001e-05, + "loss": 3.2488, + "step": 27880 + }, + { + "epoch": 47.13, + "learning_rate": 9.296666666666667e-05, + "loss": 3.2436, + "step": 27900 + }, + { + "epoch": 47.16, + "learning_rate": 9.303333333333334e-05, + "loss": 3.2488, + "step": 27920 + }, + { + "epoch": 47.2, + "learning_rate": 9.310000000000001e-05, + "loss": 3.2362, + "step": 27940 + }, + { + "epoch": 47.23, + "learning_rate": 9.316666666666666e-05, + "loss": 3.238, + "step": 27960 + }, + { + "epoch": 47.26, + "learning_rate": 9.323333333333334e-05, + "loss": 3.2507, + "step": 27980 + }, + { + "epoch": 47.3, + "learning_rate": 9.33e-05, + "loss": 3.2514, + "step": 28000 + }, + { + "epoch": 47.3, + "eval_loss": 3.2621803283691406, + "eval_runtime": 48.0971, + "eval_samples_per_second": 20.563, + "eval_steps_per_second": 0.125, + "eval_tse_ndup": 0.005875090625116292, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.025641305248088424, + "eval_tse_type": 8.60156162228963e-05, + "step": 28000 + }, + { + "epoch": 47.33, + "learning_rate": 9.336666666666667e-05, + "loss": 3.2557, + "step": 28020 + }, + { + "epoch": 47.36, + "learning_rate": 9.343333333333335e-05, + "loss": 3.2641, + "step": 28040 + }, + { + "epoch": 47.4, + "learning_rate": 9.350000000000001e-05, + "loss": 3.2384, + "step": 28060 + }, + { + "epoch": 47.43, + "learning_rate": 9.356666666666667e-05, + "loss": 3.2572, + "step": 28080 + }, + { + "epoch": 47.47, + "learning_rate": 9.363333333333334e-05, + "loss": 3.253, + "step": 28100 + }, + { + "epoch": 47.5, + "learning_rate": 9.370000000000001e-05, + "loss": 3.2743, + "step": 28120 + }, + { + "epoch": 47.53, + "learning_rate": 9.376666666666666e-05, + "loss": 3.2793, + "step": 28140 + }, + { + "epoch": 47.57, + "learning_rate": 9.383333333333334e-05, + "loss": 3.2907, + "step": 28160 + }, + { + "epoch": 47.6, + "learning_rate": 9.39e-05, + "loss": 3.2625, + "step": 28180 + }, + { + "epoch": 47.64, + "learning_rate": 9.396666666666667e-05, + "loss": 3.2671, + "step": 28200 + }, + { + "epoch": 47.67, + "learning_rate": 9.403333333333335e-05, + "loss": 3.2486, + "step": 28220 + }, + { + "epoch": 47.7, + "learning_rate": 9.41e-05, + "loss": 3.2518, + "step": 28240 + }, + { + "epoch": 47.74, + "learning_rate": 9.416666666666667e-05, + "loss": 3.2705, + "step": 28260 + }, + { + "epoch": 47.77, + "learning_rate": 9.423333333333334e-05, + "loss": 3.2757, + "step": 28280 + }, + { + "epoch": 47.8, + "learning_rate": 9.43e-05, + "loss": 3.2451, + "step": 28300 + }, + { + "epoch": 47.84, + "learning_rate": 9.436666666666667e-05, + "loss": 3.2783, + "step": 28320 + }, + { + "epoch": 47.87, + "learning_rate": 9.443333333333334e-05, + "loss": 3.2583, + "step": 28340 + }, + { + "epoch": 47.91, + "learning_rate": 9.449999999999999e-05, + "loss": 3.2671, + "step": 28360 + }, + { + "epoch": 47.94, + "learning_rate": 9.456666666666667e-05, + "loss": 3.2556, + "step": 28380 + }, + { + "epoch": 47.97, + "learning_rate": 9.463333333333333e-05, + "loss": 3.2696, + "step": 28400 + }, + { + "epoch": 48.01, + "learning_rate": 9.47e-05, + "loss": 3.2638, + "step": 28420 + }, + { + "epoch": 48.04, + "learning_rate": 9.476666666666668e-05, + "loss": 3.1904, + "step": 28440 + }, + { + "epoch": 48.07, + "learning_rate": 9.483333333333334e-05, + "loss": 3.1949, + "step": 28460 + }, + { + "epoch": 48.11, + "learning_rate": 9.49e-05, + "loss": 3.2049, + "step": 28480 + }, + { + "epoch": 48.14, + "learning_rate": 9.496666666666667e-05, + "loss": 3.2076, + "step": 28500 + }, + { + "epoch": 48.18, + "learning_rate": 9.503333333333334e-05, + "loss": 3.2015, + "step": 28520 + }, + { + "epoch": 48.21, + "learning_rate": 9.51e-05, + "loss": 3.2139, + "step": 28540 + }, + { + "epoch": 48.24, + "learning_rate": 9.516333333333334e-05, + "loss": 3.2035, + "step": 28560 + }, + { + "epoch": 48.28, + "learning_rate": 9.523000000000001e-05, + "loss": 3.2229, + "step": 28580 + }, + { + "epoch": 48.31, + "learning_rate": 9.529666666666667e-05, + "loss": 3.2199, + "step": 28600 + }, + { + "epoch": 48.34, + "learning_rate": 9.536333333333334e-05, + "loss": 3.2205, + "step": 28620 + }, + { + "epoch": 48.38, + "learning_rate": 9.543000000000001e-05, + "loss": 3.215, + "step": 28640 + }, + { + "epoch": 48.41, + "learning_rate": 9.549666666666666e-05, + "loss": 3.2398, + "step": 28660 + }, + { + "epoch": 48.45, + "learning_rate": 9.556333333333334e-05, + "loss": 3.2235, + "step": 28680 + }, + { + "epoch": 48.48, + "learning_rate": 9.563e-05, + "loss": 3.2174, + "step": 28700 + }, + { + "epoch": 48.51, + "learning_rate": 9.569666666666667e-05, + "loss": 3.2132, + "step": 28720 + }, + { + "epoch": 48.55, + "learning_rate": 9.576333333333333e-05, + "loss": 3.2246, + "step": 28740 + }, + { + "epoch": 48.58, + "learning_rate": 9.583000000000001e-05, + "loss": 3.2202, + "step": 28760 + }, + { + "epoch": 48.61, + "learning_rate": 9.589666666666667e-05, + "loss": 3.2404, + "step": 28780 + }, + { + "epoch": 48.65, + "learning_rate": 9.596333333333334e-05, + "loss": 3.2408, + "step": 28800 + }, + { + "epoch": 48.68, + "learning_rate": 9.603000000000001e-05, + "loss": 3.2429, + "step": 28820 + }, + { + "epoch": 48.72, + "learning_rate": 9.609666666666666e-05, + "loss": 3.2487, + "step": 28840 + }, + { + "epoch": 48.75, + "learning_rate": 9.616333333333334e-05, + "loss": 3.2461, + "step": 28860 + }, + { + "epoch": 48.78, + "learning_rate": 9.623e-05, + "loss": 3.2251, + "step": 28880 + }, + { + "epoch": 48.82, + "learning_rate": 9.629666666666667e-05, + "loss": 3.2366, + "step": 28900 + }, + { + "epoch": 48.85, + "learning_rate": 9.636333333333333e-05, + "loss": 3.2259, + "step": 28920 + }, + { + "epoch": 48.89, + "learning_rate": 9.643000000000001e-05, + "loss": 3.2304, + "step": 28940 + }, + { + "epoch": 48.92, + "learning_rate": 9.649666666666667e-05, + "loss": 3.2434, + "step": 28960 + }, + { + "epoch": 48.95, + "learning_rate": 9.656333333333334e-05, + "loss": 3.2253, + "step": 28980 + }, + { + "epoch": 48.99, + "learning_rate": 9.663000000000002e-05, + "loss": 3.247, + "step": 29000 + }, + { + "epoch": 48.99, + "eval_loss": 3.2123095989227295, + "eval_runtime": 47.5207, + "eval_samples_per_second": 20.812, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.005857564524854671, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.02986247211927427, + "eval_tse_type": 4.213009774182676e-05, + "step": 29000 + }, + { + "epoch": 49.02, + "learning_rate": 9.669666666666667e-05, + "loss": 3.2048, + "step": 29020 + }, + { + "epoch": 49.05, + "learning_rate": 9.676333333333334e-05, + "loss": 3.1737, + "step": 29040 + }, + { + "epoch": 49.09, + "learning_rate": 9.683e-05, + "loss": 3.1742, + "step": 29060 + }, + { + "epoch": 49.12, + "learning_rate": 9.689666666666667e-05, + "loss": 3.1847, + "step": 29080 + }, + { + "epoch": 49.16, + "learning_rate": 9.696333333333333e-05, + "loss": 3.1615, + "step": 29100 + }, + { + "epoch": 49.19, + "learning_rate": 9.703000000000001e-05, + "loss": 3.1775, + "step": 29120 + }, + { + "epoch": 49.22, + "learning_rate": 9.709666666666667e-05, + "loss": 3.169, + "step": 29140 + }, + { + "epoch": 49.26, + "learning_rate": 9.716333333333334e-05, + "loss": 3.1913, + "step": 29160 + }, + { + "epoch": 49.29, + "learning_rate": 9.723000000000002e-05, + "loss": 3.1899, + "step": 29180 + }, + { + "epoch": 49.32, + "learning_rate": 9.729666666666667e-05, + "loss": 3.1753, + "step": 29200 + }, + { + "epoch": 49.36, + "learning_rate": 9.736333333333334e-05, + "loss": 3.1989, + "step": 29220 + }, + { + "epoch": 49.39, + "learning_rate": 9.743000000000001e-05, + "loss": 3.1976, + "step": 29240 + }, + { + "epoch": 49.43, + "learning_rate": 9.749666666666667e-05, + "loss": 3.1722, + "step": 29260 + }, + { + "epoch": 49.46, + "learning_rate": 9.756333333333333e-05, + "loss": 3.1894, + "step": 29280 + }, + { + "epoch": 49.49, + "learning_rate": 9.763e-05, + "loss": 3.1922, + "step": 29300 + }, + { + "epoch": 49.53, + "learning_rate": 9.769666666666668e-05, + "loss": 3.1944, + "step": 29320 + }, + { + "epoch": 49.56, + "learning_rate": 9.776333333333334e-05, + "loss": 3.1903, + "step": 29340 + }, + { + "epoch": 49.59, + "learning_rate": 9.783e-05, + "loss": 3.1874, + "step": 29360 + }, + { + "epoch": 49.63, + "learning_rate": 9.789666666666667e-05, + "loss": 3.1963, + "step": 29380 + }, + { + "epoch": 49.66, + "learning_rate": 9.796333333333334e-05, + "loss": 3.1988, + "step": 29400 + }, + { + "epoch": 49.7, + "learning_rate": 9.803e-05, + "loss": 3.2034, + "step": 29420 + }, + { + "epoch": 49.73, + "learning_rate": 9.809666666666667e-05, + "loss": 3.1992, + "step": 29440 + }, + { + "epoch": 49.76, + "learning_rate": 9.816333333333334e-05, + "loss": 3.1975, + "step": 29460 + }, + { + "epoch": 49.8, + "learning_rate": 9.823e-05, + "loss": 3.2082, + "step": 29480 + }, + { + "epoch": 49.83, + "learning_rate": 9.829666666666666e-05, + "loss": 3.1831, + "step": 29500 + }, + { + "epoch": 49.86, + "learning_rate": 9.836333333333334e-05, + "loss": 3.2088, + "step": 29520 + }, + { + "epoch": 49.9, + "learning_rate": 9.843e-05, + "loss": 3.2198, + "step": 29540 + }, + { + "epoch": 49.93, + "learning_rate": 9.849666666666667e-05, + "loss": 3.2106, + "step": 29560 + }, + { + "epoch": 49.97, + "learning_rate": 9.856333333333335e-05, + "loss": 3.2075, + "step": 29580 + }, + { + "epoch": 50.0, + "learning_rate": 9.863e-05, + "loss": 3.1942, + "step": 29600 + }, + { + "epoch": 50.03, + "learning_rate": 9.869666666666667e-05, + "loss": 3.1266, + "step": 29620 + }, + { + "epoch": 50.07, + "learning_rate": 9.876333333333334e-05, + "loss": 3.1327, + "step": 29640 + }, + { + "epoch": 50.1, + "learning_rate": 9.883e-05, + "loss": 3.1546, + "step": 29660 + }, + { + "epoch": 50.14, + "learning_rate": 9.889666666666666e-05, + "loss": 3.1446, + "step": 29680 + }, + { + "epoch": 50.17, + "learning_rate": 9.896333333333334e-05, + "loss": 3.1516, + "step": 29700 + }, + { + "epoch": 50.2, + "learning_rate": 9.903e-05, + "loss": 3.1341, + "step": 29720 + }, + { + "epoch": 50.24, + "learning_rate": 9.909666666666667e-05, + "loss": 3.1798, + "step": 29740 + }, + { + "epoch": 50.27, + "learning_rate": 9.916333333333335e-05, + "loss": 3.1588, + "step": 29760 + }, + { + "epoch": 50.3, + "learning_rate": 9.923e-05, + "loss": 3.1727, + "step": 29780 + }, + { + "epoch": 50.34, + "learning_rate": 9.929666666666667e-05, + "loss": 3.1465, + "step": 29800 + }, + { + "epoch": 50.37, + "learning_rate": 9.936333333333334e-05, + "loss": 3.1362, + "step": 29820 + }, + { + "epoch": 50.41, + "learning_rate": 9.943e-05, + "loss": 3.1789, + "step": 29840 + }, + { + "epoch": 50.44, + "learning_rate": 9.949666666666667e-05, + "loss": 3.15, + "step": 29860 + }, + { + "epoch": 50.47, + "learning_rate": 9.956333333333334e-05, + "loss": 3.1583, + "step": 29880 + }, + { + "epoch": 50.51, + "learning_rate": 9.963e-05, + "loss": 3.1661, + "step": 29900 + }, + { + "epoch": 50.54, + "learning_rate": 9.969666666666667e-05, + "loss": 3.1609, + "step": 29920 + }, + { + "epoch": 50.57, + "learning_rate": 9.976333333333335e-05, + "loss": 3.1678, + "step": 29940 + }, + { + "epoch": 50.61, + "learning_rate": 9.983e-05, + "loss": 3.1783, + "step": 29960 + }, + { + "epoch": 50.64, + "learning_rate": 9.989666666666668e-05, + "loss": 3.1553, + "step": 29980 + }, + { + "epoch": 50.68, + "learning_rate": 9.996333333333334e-05, + "loss": 3.1724, + "step": 30000 + }, + { + "epoch": 50.68, + "eval_loss": 3.1809890270233154, + "eval_runtime": 50.8799, + "eval_samples_per_second": 19.438, + "eval_steps_per_second": 0.118, + "eval_tse_ndup": 0.007068895666270629, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.02828921872766767, + "eval_tse_type": 6.119376272675393e-05, + "step": 30000 + }, + { + "epoch": 50.71, + "learning_rate": 9.999999592123497e-05, + "loss": 3.1727, + "step": 30020 + }, + { + "epoch": 50.74, + "learning_rate": 9.999995765134629e-05, + "loss": 3.1707, + "step": 30040 + }, + { + "epoch": 50.78, + "learning_rate": 9.999987909739482e-05, + "loss": 3.1674, + "step": 30060 + }, + { + "epoch": 50.81, + "learning_rate": 9.999976025944386e-05, + "loss": 3.1599, + "step": 30080 + }, + { + "epoch": 50.84, + "learning_rate": 9.999960113758919e-05, + "loss": 3.1661, + "step": 30100 + }, + { + "epoch": 50.88, + "learning_rate": 9.999940173195897e-05, + "loss": 3.1731, + "step": 30120 + }, + { + "epoch": 50.91, + "learning_rate": 9.999916204271387e-05, + "loss": 3.1721, + "step": 30140 + }, + { + "epoch": 50.95, + "learning_rate": 9.9998882070047e-05, + "loss": 3.1665, + "step": 30160 + }, + { + "epoch": 50.98, + "learning_rate": 9.999856181418395e-05, + "loss": 3.1563, + "step": 30180 + }, + { + "epoch": 51.01, + "learning_rate": 9.999820127538271e-05, + "loss": 3.1503, + "step": 30200 + }, + { + "epoch": 51.05, + "learning_rate": 9.999780045393379e-05, + "loss": 3.104, + "step": 30220 + }, + { + "epoch": 51.08, + "learning_rate": 9.99973593501601e-05, + "loss": 3.0959, + "step": 30240 + }, + { + "epoch": 51.11, + "learning_rate": 9.999687796441705e-05, + "loss": 3.1194, + "step": 30260 + }, + { + "epoch": 51.15, + "learning_rate": 9.999635629709249e-05, + "loss": 3.1215, + "step": 30280 + }, + { + "epoch": 51.18, + "learning_rate": 9.99957943486067e-05, + "loss": 3.1198, + "step": 30300 + }, + { + "epoch": 51.22, + "learning_rate": 9.999519211941241e-05, + "loss": 3.1091, + "step": 30320 + }, + { + "epoch": 51.25, + "learning_rate": 9.999454960999488e-05, + "loss": 3.0948, + "step": 30340 + }, + { + "epoch": 51.28, + "learning_rate": 9.999386682087173e-05, + "loss": 3.1113, + "step": 30360 + }, + { + "epoch": 51.32, + "learning_rate": 9.999314375259307e-05, + "loss": 3.1162, + "step": 30380 + }, + { + "epoch": 51.35, + "learning_rate": 9.999238040574151e-05, + "loss": 3.1212, + "step": 30400 + }, + { + "epoch": 51.39, + "learning_rate": 9.999157678093199e-05, + "loss": 3.1081, + "step": 30420 + }, + { + "epoch": 51.42, + "learning_rate": 9.999073287881202e-05, + "loss": 3.1162, + "step": 30440 + }, + { + "epoch": 51.45, + "learning_rate": 9.998984870006152e-05, + "loss": 3.1367, + "step": 30460 + }, + { + "epoch": 51.49, + "learning_rate": 9.998892424539283e-05, + "loss": 3.1441, + "step": 30480 + }, + { + "epoch": 51.52, + "learning_rate": 9.99879595155508e-05, + "loss": 3.1363, + "step": 30500 + }, + { + "epoch": 51.55, + "learning_rate": 9.998695451131268e-05, + "loss": 3.1426, + "step": 30520 + }, + { + "epoch": 51.59, + "learning_rate": 9.998590923348818e-05, + "loss": 3.1136, + "step": 30540 + }, + { + "epoch": 51.62, + "learning_rate": 9.998482368291946e-05, + "loss": 3.1525, + "step": 30560 + }, + { + "epoch": 51.66, + "learning_rate": 9.998369786048113e-05, + "loss": 3.127, + "step": 30580 + }, + { + "epoch": 51.69, + "learning_rate": 9.998253176708026e-05, + "loss": 3.1411, + "step": 30600 + }, + { + "epoch": 51.72, + "learning_rate": 9.998132540365634e-05, + "loss": 3.157, + "step": 30620 + }, + { + "epoch": 51.76, + "learning_rate": 9.99800787711813e-05, + "loss": 3.1286, + "step": 30640 + }, + { + "epoch": 51.79, + "learning_rate": 9.997879187065955e-05, + "loss": 3.1298, + "step": 30660 + }, + { + "epoch": 51.82, + "learning_rate": 9.997746470312792e-05, + "loss": 3.1167, + "step": 30680 + }, + { + "epoch": 51.86, + "learning_rate": 9.997609726965566e-05, + "loss": 3.1338, + "step": 30700 + }, + { + "epoch": 51.89, + "learning_rate": 9.997468957134453e-05, + "loss": 3.1421, + "step": 30720 + }, + { + "epoch": 51.93, + "learning_rate": 9.997324160932864e-05, + "loss": 3.1708, + "step": 30740 + }, + { + "epoch": 51.96, + "learning_rate": 9.997175338477462e-05, + "loss": 3.1372, + "step": 30760 + }, + { + "epoch": 51.99, + "learning_rate": 9.997022489888151e-05, + "loss": 3.1345, + "step": 30780 + }, + { + "epoch": 52.03, + "learning_rate": 9.996865615288076e-05, + "loss": 3.07, + "step": 30800 + }, + { + "epoch": 52.06, + "learning_rate": 9.996704714803629e-05, + "loss": 3.0429, + "step": 30820 + }, + { + "epoch": 52.09, + "learning_rate": 9.996539788564444e-05, + "loss": 3.0817, + "step": 30840 + }, + { + "epoch": 52.13, + "learning_rate": 9.9963708367034e-05, + "loss": 3.0894, + "step": 30860 + }, + { + "epoch": 52.16, + "learning_rate": 9.996197859356618e-05, + "loss": 3.0859, + "step": 30880 + }, + { + "epoch": 52.2, + "learning_rate": 9.996020856663464e-05, + "loss": 3.0826, + "step": 30900 + }, + { + "epoch": 52.23, + "learning_rate": 9.995839828766543e-05, + "loss": 3.0649, + "step": 30920 + }, + { + "epoch": 52.26, + "learning_rate": 9.995654775811709e-05, + "loss": 3.0831, + "step": 30940 + }, + { + "epoch": 52.3, + "learning_rate": 9.995465697948054e-05, + "loss": 3.089, + "step": 30960 + }, + { + "epoch": 52.33, + "learning_rate": 9.995272595327916e-05, + "loss": 3.1017, + "step": 30980 + }, + { + "epoch": 52.36, + "learning_rate": 9.995075468106871e-05, + "loss": 3.0917, + "step": 31000 + }, + { + "epoch": 52.36, + "eval_loss": 3.1495461463928223, + "eval_runtime": 47.9091, + "eval_samples_per_second": 20.643, + "eval_steps_per_second": 0.125, + "eval_tse_ndup": 0.005384660563324352, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.025098678207101443, + "eval_tse_type": 8.777103696213909e-06, + "step": 31000 + }, + { + "epoch": 52.4, + "learning_rate": 9.994874316443746e-05, + "loss": 3.1011, + "step": 31020 + }, + { + "epoch": 52.43, + "learning_rate": 9.994669140500601e-05, + "loss": 3.1018, + "step": 31040 + }, + { + "epoch": 52.47, + "learning_rate": 9.994459940442744e-05, + "loss": 3.1035, + "step": 31060 + }, + { + "epoch": 52.5, + "learning_rate": 9.994246716438724e-05, + "loss": 3.0992, + "step": 31080 + }, + { + "epoch": 52.53, + "learning_rate": 9.994029468660333e-05, + "loss": 3.1029, + "step": 31100 + }, + { + "epoch": 52.57, + "learning_rate": 9.9938081972826e-05, + "loss": 3.1093, + "step": 31120 + }, + { + "epoch": 52.6, + "learning_rate": 9.993582902483802e-05, + "loss": 3.117, + "step": 31140 + }, + { + "epoch": 52.64, + "learning_rate": 9.993353584445455e-05, + "loss": 3.1145, + "step": 31160 + }, + { + "epoch": 52.67, + "learning_rate": 9.993120243352317e-05, + "loss": 3.0942, + "step": 31180 + }, + { + "epoch": 52.7, + "learning_rate": 9.992882879392383e-05, + "loss": 3.0882, + "step": 31200 + }, + { + "epoch": 52.74, + "learning_rate": 9.992641492756895e-05, + "loss": 3.0925, + "step": 31220 + }, + { + "epoch": 52.77, + "learning_rate": 9.992396083640336e-05, + "loss": 3.0963, + "step": 31240 + }, + { + "epoch": 52.8, + "learning_rate": 9.992146652240427e-05, + "loss": 3.1115, + "step": 31260 + }, + { + "epoch": 52.84, + "learning_rate": 9.991893198758127e-05, + "loss": 3.0974, + "step": 31280 + }, + { + "epoch": 52.87, + "learning_rate": 9.99163572339764e-05, + "loss": 3.1019, + "step": 31300 + }, + { + "epoch": 52.91, + "learning_rate": 9.991374226366414e-05, + "loss": 3.1158, + "step": 31320 + }, + { + "epoch": 52.94, + "learning_rate": 9.991108707875127e-05, + "loss": 3.1046, + "step": 31340 + }, + { + "epoch": 52.97, + "learning_rate": 9.990839168137705e-05, + "loss": 3.0875, + "step": 31360 + }, + { + "epoch": 53.01, + "learning_rate": 9.99056560737131e-05, + "loss": 3.0856, + "step": 31380 + }, + { + "epoch": 53.04, + "learning_rate": 9.990288025796345e-05, + "loss": 3.0327, + "step": 31400 + }, + { + "epoch": 53.07, + "learning_rate": 9.990006423636453e-05, + "loss": 3.0263, + "step": 31420 + }, + { + "epoch": 53.11, + "learning_rate": 9.989720801118518e-05, + "loss": 3.0481, + "step": 31440 + }, + { + "epoch": 53.14, + "learning_rate": 9.989431158472657e-05, + "loss": 3.022, + "step": 31460 + }, + { + "epoch": 53.18, + "learning_rate": 9.989137495932231e-05, + "loss": 3.0719, + "step": 31480 + }, + { + "epoch": 53.21, + "learning_rate": 9.98883981373384e-05, + "loss": 3.0342, + "step": 31500 + }, + { + "epoch": 53.24, + "learning_rate": 9.98853811211732e-05, + "loss": 3.0417, + "step": 31520 + }, + { + "epoch": 53.28, + "learning_rate": 9.988232391325747e-05, + "loss": 3.051, + "step": 31540 + }, + { + "epoch": 53.31, + "learning_rate": 9.987922651605435e-05, + "loss": 3.047, + "step": 31560 + }, + { + "epoch": 53.34, + "learning_rate": 9.987608893205935e-05, + "loss": 3.0516, + "step": 31580 + }, + { + "epoch": 53.38, + "learning_rate": 9.987291116380037e-05, + "loss": 3.057, + "step": 31600 + }, + { + "epoch": 53.41, + "learning_rate": 9.986969321383768e-05, + "loss": 3.0616, + "step": 31620 + }, + { + "epoch": 53.45, + "learning_rate": 9.986643508476392e-05, + "loss": 3.0592, + "step": 31640 + }, + { + "epoch": 53.48, + "learning_rate": 9.986313677920411e-05, + "loss": 3.0893, + "step": 31660 + }, + { + "epoch": 53.51, + "learning_rate": 9.985979829981562e-05, + "loss": 3.0649, + "step": 31680 + }, + { + "epoch": 53.55, + "learning_rate": 9.985641964928822e-05, + "loss": 3.0782, + "step": 31700 + }, + { + "epoch": 53.58, + "learning_rate": 9.985300083034403e-05, + "loss": 3.0677, + "step": 31720 + }, + { + "epoch": 53.61, + "learning_rate": 9.984954184573753e-05, + "loss": 3.0715, + "step": 31740 + }, + { + "epoch": 53.65, + "learning_rate": 9.984604269825555e-05, + "loss": 3.0784, + "step": 31760 + }, + { + "epoch": 53.68, + "learning_rate": 9.98425033907173e-05, + "loss": 3.0698, + "step": 31780 + }, + { + "epoch": 53.72, + "learning_rate": 9.983892392597433e-05, + "loss": 3.0702, + "step": 31800 + }, + { + "epoch": 53.75, + "learning_rate": 9.983530430691054e-05, + "loss": 3.0813, + "step": 31820 + }, + { + "epoch": 53.78, + "learning_rate": 9.983164453644222e-05, + "loss": 3.0766, + "step": 31840 + }, + { + "epoch": 53.82, + "learning_rate": 9.982794461751796e-05, + "loss": 3.0617, + "step": 31860 + }, + { + "epoch": 53.85, + "learning_rate": 9.982420455311872e-05, + "loss": 3.0803, + "step": 31880 + }, + { + "epoch": 53.89, + "learning_rate": 9.98204243462578e-05, + "loss": 3.0647, + "step": 31900 + }, + { + "epoch": 53.92, + "learning_rate": 9.981660399998086e-05, + "loss": 3.0745, + "step": 31920 + }, + { + "epoch": 53.95, + "learning_rate": 9.981274351736585e-05, + "loss": 3.089, + "step": 31940 + }, + { + "epoch": 53.99, + "learning_rate": 9.980884290152313e-05, + "loss": 3.0893, + "step": 31960 + }, + { + "epoch": 54.02, + "learning_rate": 9.980490215559535e-05, + "loss": 3.0372, + "step": 31980 + }, + { + "epoch": 54.05, + "learning_rate": 9.980092128275748e-05, + "loss": 3.0138, + "step": 32000 + }, + { + "epoch": 54.05, + "eval_loss": 3.1076266765594482, + "eval_runtime": 47.4505, + "eval_samples_per_second": 20.843, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.004153837387933892, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.029939971117016524, + "eval_tse_type": 5.617346365576901e-05, + "step": 32000 + }, + { + "epoch": 54.09, + "learning_rate": 9.979690028621684e-05, + "loss": 3.0161, + "step": 32020 + }, + { + "epoch": 54.12, + "learning_rate": 9.979283916921308e-05, + "loss": 3.0088, + "step": 32040 + }, + { + "epoch": 54.16, + "learning_rate": 9.978873793501817e-05, + "loss": 3.0205, + "step": 32060 + }, + { + "epoch": 54.19, + "learning_rate": 9.978459658693639e-05, + "loss": 3.0207, + "step": 32080 + }, + { + "epoch": 54.22, + "learning_rate": 9.978041512830438e-05, + "loss": 3.0243, + "step": 32100 + }, + { + "epoch": 54.26, + "learning_rate": 9.977619356249103e-05, + "loss": 3.027, + "step": 32120 + }, + { + "epoch": 54.29, + "learning_rate": 9.97719318928976e-05, + "loss": 3.0175, + "step": 32140 + }, + { + "epoch": 54.32, + "learning_rate": 9.976763012295762e-05, + "loss": 3.027, + "step": 32160 + }, + { + "epoch": 54.36, + "learning_rate": 9.976328825613696e-05, + "loss": 3.0509, + "step": 32180 + }, + { + "epoch": 54.39, + "learning_rate": 9.975890629593378e-05, + "loss": 3.032, + "step": 32200 + }, + { + "epoch": 54.43, + "learning_rate": 9.975448424587858e-05, + "loss": 3.0245, + "step": 32220 + }, + { + "epoch": 54.46, + "learning_rate": 9.975002210953408e-05, + "loss": 3.0334, + "step": 32240 + }, + { + "epoch": 54.49, + "learning_rate": 9.974551989049535e-05, + "loss": 3.0441, + "step": 32260 + }, + { + "epoch": 54.53, + "learning_rate": 9.974097759238976e-05, + "loss": 3.0297, + "step": 32280 + }, + { + "epoch": 54.56, + "learning_rate": 9.973639521887696e-05, + "loss": 3.0405, + "step": 32300 + }, + { + "epoch": 54.59, + "learning_rate": 9.973177277364889e-05, + "loss": 3.0464, + "step": 32320 + }, + { + "epoch": 54.63, + "learning_rate": 9.972711026042975e-05, + "loss": 3.0385, + "step": 32340 + }, + { + "epoch": 54.66, + "learning_rate": 9.972240768297605e-05, + "loss": 3.0736, + "step": 32360 + }, + { + "epoch": 54.7, + "learning_rate": 9.971766504507657e-05, + "loss": 3.038, + "step": 32380 + }, + { + "epoch": 54.73, + "learning_rate": 9.971288235055239e-05, + "loss": 3.0331, + "step": 32400 + }, + { + "epoch": 54.76, + "learning_rate": 9.97080596032568e-05, + "loss": 3.0234, + "step": 32420 + }, + { + "epoch": 54.8, + "learning_rate": 9.970319680707543e-05, + "loss": 3.0328, + "step": 32440 + }, + { + "epoch": 54.83, + "learning_rate": 9.969829396592614e-05, + "loss": 3.0402, + "step": 32460 + }, + { + "epoch": 54.86, + "learning_rate": 9.969335108375907e-05, + "loss": 3.0281, + "step": 32480 + }, + { + "epoch": 54.9, + "learning_rate": 9.968836816455659e-05, + "loss": 3.0512, + "step": 32500 + }, + { + "epoch": 54.93, + "learning_rate": 9.968334521233337e-05, + "loss": 3.0608, + "step": 32520 + }, + { + "epoch": 54.97, + "learning_rate": 9.967828223113629e-05, + "loss": 3.047, + "step": 32540 + }, + { + "epoch": 55.0, + "learning_rate": 9.967317922504452e-05, + "loss": 3.0328, + "step": 32560 + }, + { + "epoch": 55.03, + "learning_rate": 9.966803619816946e-05, + "loss": 2.9831, + "step": 32580 + }, + { + "epoch": 55.07, + "learning_rate": 9.966311325716134e-05, + "loss": 2.9638, + "step": 32600 + }, + { + "epoch": 55.1, + "learning_rate": 9.965789220170637e-05, + "loss": 2.9943, + "step": 32620 + }, + { + "epoch": 55.14, + "learning_rate": 9.965263113778456e-05, + "loss": 2.9563, + "step": 32640 + }, + { + "epoch": 55.17, + "learning_rate": 9.964733006963469e-05, + "loss": 2.9923, + "step": 32660 + }, + { + "epoch": 55.2, + "learning_rate": 9.964198900152773e-05, + "loss": 3.0015, + "step": 32680 + }, + { + "epoch": 55.24, + "learning_rate": 9.963660793776688e-05, + "loss": 2.9952, + "step": 32700 + }, + { + "epoch": 55.27, + "learning_rate": 9.963118688268754e-05, + "loss": 3.0032, + "step": 32720 + }, + { + "epoch": 55.3, + "learning_rate": 9.962572584065739e-05, + "loss": 2.9975, + "step": 32740 + }, + { + "epoch": 55.34, + "learning_rate": 9.962022481607626e-05, + "loss": 2.9956, + "step": 32760 + }, + { + "epoch": 55.37, + "learning_rate": 9.961468381337627e-05, + "loss": 3.0038, + "step": 32780 + }, + { + "epoch": 55.41, + "learning_rate": 9.960910283702167e-05, + "loss": 2.9954, + "step": 32800 + }, + { + "epoch": 55.44, + "learning_rate": 9.960348189150896e-05, + "loss": 2.9743, + "step": 32820 + }, + { + "epoch": 55.47, + "learning_rate": 9.959782098136683e-05, + "loss": 3.0213, + "step": 32840 + }, + { + "epoch": 55.51, + "learning_rate": 9.959212011115619e-05, + "loss": 3.0029, + "step": 32860 + }, + { + "epoch": 55.54, + "learning_rate": 9.958637928547012e-05, + "loss": 2.9911, + "step": 32880 + }, + { + "epoch": 55.57, + "learning_rate": 9.958059850893389e-05, + "loss": 3.0187, + "step": 32900 + }, + { + "epoch": 55.61, + "learning_rate": 9.957477778620497e-05, + "loss": 3.0225, + "step": 32920 + }, + { + "epoch": 55.64, + "learning_rate": 9.956891712197302e-05, + "loss": 3.005, + "step": 32940 + }, + { + "epoch": 55.68, + "learning_rate": 9.956301652095986e-05, + "loss": 3.0013, + "step": 32960 + }, + { + "epoch": 55.71, + "learning_rate": 9.955707598791952e-05, + "loss": 3.0038, + "step": 32980 + }, + { + "epoch": 55.74, + "learning_rate": 9.955109552763815e-05, + "loss": 3.0344, + "step": 33000 + }, + { + "epoch": 55.74, + "eval_loss": 3.0742015838623047, + "eval_runtime": 47.6167, + "eval_samples_per_second": 20.77, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.005540551425196166, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.023727688279164206, + "eval_tse_type": 1.9309628131670596e-05, + "step": 33000 + }, + { + "epoch": 55.78, + "learning_rate": 9.954507514493412e-05, + "loss": 3.0023, + "step": 33020 + }, + { + "epoch": 55.81, + "learning_rate": 9.953901484465794e-05, + "loss": 3.0111, + "step": 33040 + }, + { + "epoch": 55.84, + "learning_rate": 9.953291463169228e-05, + "loss": 3.0387, + "step": 33060 + }, + { + "epoch": 55.88, + "learning_rate": 9.952677451095196e-05, + "loss": 3.0281, + "step": 33080 + }, + { + "epoch": 55.91, + "learning_rate": 9.9520594487384e-05, + "loss": 3.0099, + "step": 33100 + }, + { + "epoch": 55.95, + "learning_rate": 9.95143745659675e-05, + "loss": 3.032, + "step": 33120 + }, + { + "epoch": 55.98, + "learning_rate": 9.950811475171376e-05, + "loss": 3.0183, + "step": 33140 + }, + { + "epoch": 56.01, + "learning_rate": 9.950181504966617e-05, + "loss": 3.0038, + "step": 33160 + }, + { + "epoch": 56.05, + "learning_rate": 9.949547546490032e-05, + "loss": 2.9516, + "step": 33180 + }, + { + "epoch": 56.08, + "learning_rate": 9.948909600252388e-05, + "loss": 2.9413, + "step": 33200 + }, + { + "epoch": 56.11, + "learning_rate": 9.948267666767665e-05, + "loss": 2.9535, + "step": 33220 + }, + { + "epoch": 56.15, + "learning_rate": 9.947621746553062e-05, + "loss": 2.9572, + "step": 33240 + }, + { + "epoch": 56.18, + "learning_rate": 9.946971840128981e-05, + "loss": 2.9445, + "step": 33260 + }, + { + "epoch": 56.22, + "learning_rate": 9.946317948019043e-05, + "loss": 2.9524, + "step": 33280 + }, + { + "epoch": 56.25, + "learning_rate": 9.945660070750074e-05, + "loss": 2.9687, + "step": 33300 + }, + { + "epoch": 56.28, + "learning_rate": 9.944998208852116e-05, + "loss": 2.9653, + "step": 33320 + }, + { + "epoch": 56.32, + "learning_rate": 9.944332362858418e-05, + "loss": 2.9739, + "step": 33340 + }, + { + "epoch": 56.35, + "learning_rate": 9.943662533305442e-05, + "loss": 2.9763, + "step": 33360 + }, + { + "epoch": 56.39, + "learning_rate": 9.942988720732856e-05, + "loss": 2.982, + "step": 33380 + }, + { + "epoch": 56.42, + "learning_rate": 9.942310925683538e-05, + "loss": 2.9574, + "step": 33400 + }, + { + "epoch": 56.45, + "learning_rate": 9.941629148703575e-05, + "loss": 2.9716, + "step": 33420 + }, + { + "epoch": 56.49, + "learning_rate": 9.940943390342264e-05, + "loss": 2.9621, + "step": 33440 + }, + { + "epoch": 56.52, + "learning_rate": 9.940253651152109e-05, + "loss": 2.9938, + "step": 33460 + }, + { + "epoch": 56.55, + "learning_rate": 9.939559931688818e-05, + "loss": 2.9634, + "step": 33480 + }, + { + "epoch": 56.59, + "learning_rate": 9.938862232511309e-05, + "loss": 2.9957, + "step": 33500 + }, + { + "epoch": 56.62, + "learning_rate": 9.938160554181706e-05, + "loss": 2.9867, + "step": 33520 + }, + { + "epoch": 56.66, + "learning_rate": 9.937454897265337e-05, + "loss": 2.9972, + "step": 33540 + }, + { + "epoch": 56.69, + "learning_rate": 9.93674526233074e-05, + "loss": 2.981, + "step": 33560 + }, + { + "epoch": 56.72, + "learning_rate": 9.936031649949654e-05, + "loss": 2.9927, + "step": 33580 + }, + { + "epoch": 56.76, + "learning_rate": 9.935314060697024e-05, + "loss": 3.0049, + "step": 33600 + }, + { + "epoch": 56.79, + "learning_rate": 9.934592495150995e-05, + "loss": 2.9847, + "step": 33620 + }, + { + "epoch": 56.82, + "learning_rate": 9.933866953892923e-05, + "loss": 2.9842, + "step": 33640 + }, + { + "epoch": 56.86, + "learning_rate": 9.933137437507363e-05, + "loss": 3.0006, + "step": 33660 + }, + { + "epoch": 56.89, + "learning_rate": 9.932403946582072e-05, + "loss": 2.9997, + "step": 33680 + }, + { + "epoch": 56.93, + "learning_rate": 9.931666481708013e-05, + "loss": 2.9872, + "step": 33700 + }, + { + "epoch": 56.96, + "learning_rate": 9.930925043479345e-05, + "loss": 3.0034, + "step": 33720 + }, + { + "epoch": 56.99, + "learning_rate": 9.930179632493433e-05, + "loss": 2.9868, + "step": 33740 + }, + { + "epoch": 57.03, + "learning_rate": 9.929430249350839e-05, + "loss": 2.931, + "step": 33760 + }, + { + "epoch": 57.06, + "learning_rate": 9.92867689465533e-05, + "loss": 2.9095, + "step": 33780 + }, + { + "epoch": 57.09, + "learning_rate": 9.927919569013872e-05, + "loss": 2.8951, + "step": 33800 + }, + { + "epoch": 57.13, + "learning_rate": 9.927158273036625e-05, + "loss": 2.9222, + "step": 33820 + }, + { + "epoch": 57.16, + "learning_rate": 9.926393007336952e-05, + "loss": 2.9202, + "step": 33840 + }, + { + "epoch": 57.2, + "learning_rate": 9.925623772531414e-05, + "loss": 2.9256, + "step": 33860 + }, + { + "epoch": 57.23, + "learning_rate": 9.92485056923977e-05, + "loss": 2.9443, + "step": 33880 + }, + { + "epoch": 57.26, + "learning_rate": 9.924073398084976e-05, + "loss": 2.9215, + "step": 33900 + }, + { + "epoch": 57.3, + "learning_rate": 9.923292259693185e-05, + "loss": 2.9173, + "step": 33920 + }, + { + "epoch": 57.33, + "learning_rate": 9.922507154693746e-05, + "loss": 2.9559, + "step": 33940 + }, + { + "epoch": 57.36, + "learning_rate": 9.921718083719203e-05, + "loss": 2.9508, + "step": 33960 + }, + { + "epoch": 57.4, + "learning_rate": 9.920925047405296e-05, + "loss": 2.94, + "step": 33980 + }, + { + "epoch": 57.43, + "learning_rate": 9.920128046390961e-05, + "loss": 2.9482, + "step": 34000 + }, + { + "epoch": 57.43, + "eval_loss": 3.0440194606781006, + "eval_runtime": 50.7751, + "eval_samples_per_second": 19.478, + "eval_steps_per_second": 0.118, + "eval_tse_ndup": 0.005973692780821982, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.025863406414690565, + "eval_tse_type": 5.4418042916526235e-05, + "step": 34000 + }, + { + "epoch": 57.47, + "learning_rate": 9.919327081318328e-05, + "loss": 2.9438, + "step": 34020 + }, + { + "epoch": 57.5, + "learning_rate": 9.91852215283272e-05, + "loss": 2.9469, + "step": 34040 + }, + { + "epoch": 57.53, + "learning_rate": 9.917713261582651e-05, + "loss": 2.9494, + "step": 34060 + }, + { + "epoch": 57.57, + "learning_rate": 9.916900408219834e-05, + "loss": 2.9649, + "step": 34080 + }, + { + "epoch": 57.6, + "learning_rate": 9.916083593399166e-05, + "loss": 2.9598, + "step": 34100 + }, + { + "epoch": 57.64, + "learning_rate": 9.915262817778743e-05, + "loss": 2.9585, + "step": 34120 + }, + { + "epoch": 57.67, + "learning_rate": 9.914438082019848e-05, + "loss": 2.9675, + "step": 34140 + }, + { + "epoch": 57.7, + "learning_rate": 9.913609386786955e-05, + "loss": 2.96, + "step": 34160 + }, + { + "epoch": 57.74, + "learning_rate": 9.912776732747729e-05, + "loss": 2.9655, + "step": 34180 + }, + { + "epoch": 57.77, + "learning_rate": 9.911940120573027e-05, + "loss": 2.967, + "step": 34200 + }, + { + "epoch": 57.8, + "learning_rate": 9.911099550936887e-05, + "loss": 2.966, + "step": 34220 + }, + { + "epoch": 57.84, + "learning_rate": 9.910255024516546e-05, + "loss": 2.9928, + "step": 34240 + }, + { + "epoch": 57.87, + "learning_rate": 9.909406541992421e-05, + "loss": 2.9733, + "step": 34260 + }, + { + "epoch": 57.91, + "learning_rate": 9.90855410404812e-05, + "loss": 2.9482, + "step": 34280 + }, + { + "epoch": 57.94, + "learning_rate": 9.907697711370437e-05, + "loss": 2.9731, + "step": 34300 + }, + { + "epoch": 57.97, + "learning_rate": 9.90683736464935e-05, + "loss": 2.9786, + "step": 34320 + }, + { + "epoch": 58.01, + "learning_rate": 9.905973064578029e-05, + "loss": 2.9644, + "step": 34340 + }, + { + "epoch": 58.04, + "learning_rate": 9.905104811852822e-05, + "loss": 2.9021, + "step": 34360 + }, + { + "epoch": 58.07, + "learning_rate": 9.904232607173262e-05, + "loss": 2.9144, + "step": 34380 + }, + { + "epoch": 58.11, + "learning_rate": 9.903356451242073e-05, + "loss": 2.8858, + "step": 34400 + }, + { + "epoch": 58.14, + "learning_rate": 9.902476344765157e-05, + "loss": 2.8912, + "step": 34420 + }, + { + "epoch": 58.18, + "learning_rate": 9.901592288451599e-05, + "loss": 2.8995, + "step": 34440 + }, + { + "epoch": 58.21, + "learning_rate": 9.900704283013668e-05, + "loss": 2.9142, + "step": 34460 + }, + { + "epoch": 58.24, + "learning_rate": 9.899812329166814e-05, + "loss": 2.9125, + "step": 34480 + }, + { + "epoch": 58.28, + "learning_rate": 9.898916427629665e-05, + "loss": 2.9074, + "step": 34500 + }, + { + "epoch": 58.31, + "learning_rate": 9.898016579124037e-05, + "loss": 2.9143, + "step": 34520 + }, + { + "epoch": 58.34, + "learning_rate": 9.89711278437492e-05, + "loss": 2.9293, + "step": 34540 + }, + { + "epoch": 58.38, + "learning_rate": 9.896205044110486e-05, + "loss": 2.9196, + "step": 34560 + }, + { + "epoch": 58.41, + "learning_rate": 9.895293359062084e-05, + "loss": 2.9231, + "step": 34580 + }, + { + "epoch": 58.45, + "learning_rate": 9.894377729964241e-05, + "loss": 2.9108, + "step": 34600 + }, + { + "epoch": 58.48, + "learning_rate": 9.893504229817371e-05, + "loss": 2.9392, + "step": 34620 + }, + { + "epoch": 58.51, + "learning_rate": 9.892580911947841e-05, + "loss": 2.917, + "step": 34640 + }, + { + "epoch": 58.55, + "learning_rate": 9.891653652214241e-05, + "loss": 2.9132, + "step": 34660 + }, + { + "epoch": 58.58, + "learning_rate": 9.890722451363648e-05, + "loss": 2.9256, + "step": 34680 + }, + { + "epoch": 58.61, + "learning_rate": 9.889787310146313e-05, + "loss": 2.9259, + "step": 34700 + }, + { + "epoch": 58.65, + "learning_rate": 9.88884822931566e-05, + "loss": 2.9287, + "step": 34720 + }, + { + "epoch": 58.68, + "learning_rate": 9.887905209628295e-05, + "loss": 2.9423, + "step": 34740 + }, + { + "epoch": 58.72, + "learning_rate": 9.886958251843985e-05, + "loss": 2.9489, + "step": 34760 + }, + { + "epoch": 58.75, + "learning_rate": 9.886007356725685e-05, + "loss": 2.9277, + "step": 34780 + }, + { + "epoch": 58.78, + "learning_rate": 9.885052525039505e-05, + "loss": 2.9371, + "step": 34800 + }, + { + "epoch": 58.82, + "learning_rate": 9.884093757554743e-05, + "loss": 2.9509, + "step": 34820 + }, + { + "epoch": 58.85, + "learning_rate": 9.883131055043857e-05, + "loss": 2.9292, + "step": 34840 + }, + { + "epoch": 58.89, + "learning_rate": 9.882164418282481e-05, + "loss": 2.9503, + "step": 34860 + }, + { + "epoch": 58.92, + "learning_rate": 9.881193848049415e-05, + "loss": 2.932, + "step": 34880 + }, + { + "epoch": 58.95, + "learning_rate": 9.880219345126628e-05, + "loss": 2.951, + "step": 34900 + }, + { + "epoch": 58.99, + "learning_rate": 9.879240910299265e-05, + "loss": 2.9435, + "step": 34920 + }, + { + "epoch": 59.02, + "learning_rate": 9.87825854435563e-05, + "loss": 2.8869, + "step": 34940 + }, + { + "epoch": 59.05, + "learning_rate": 9.877272248087197e-05, + "loss": 2.8609, + "step": 34960 + }, + { + "epoch": 59.09, + "learning_rate": 9.876282022288609e-05, + "loss": 2.8696, + "step": 34980 + }, + { + "epoch": 59.12, + "learning_rate": 9.875287867757671e-05, + "loss": 2.8636, + "step": 35000 + }, + { + "epoch": 59.12, + "eval_loss": 3.0145843029022217, + "eval_runtime": 47.4119, + "eval_samples_per_second": 20.86, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.006429794500629647, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.029731352765353727, + "eval_tse_type": 1.5798786653185036e-05, + "step": 35000 + }, + { + "epoch": 59.16, + "learning_rate": 9.874289785295356e-05, + "loss": 2.8783, + "step": 35020 + }, + { + "epoch": 59.19, + "learning_rate": 9.873287775705801e-05, + "loss": 2.8789, + "step": 35040 + }, + { + "epoch": 59.22, + "learning_rate": 9.87228183979631e-05, + "loss": 2.8784, + "step": 35060 + }, + { + "epoch": 59.26, + "learning_rate": 9.871271978377341e-05, + "loss": 2.8911, + "step": 35080 + }, + { + "epoch": 59.29, + "learning_rate": 9.870258192262526e-05, + "loss": 2.887, + "step": 35100 + }, + { + "epoch": 59.32, + "learning_rate": 9.869240482268653e-05, + "loss": 2.8873, + "step": 35120 + }, + { + "epoch": 59.36, + "learning_rate": 9.868218849215673e-05, + "loss": 2.8856, + "step": 35140 + }, + { + "epoch": 59.39, + "learning_rate": 9.867193293926695e-05, + "loss": 2.9085, + "step": 35160 + }, + { + "epoch": 59.43, + "learning_rate": 9.866163817227994e-05, + "loss": 2.9092, + "step": 35180 + }, + { + "epoch": 59.46, + "learning_rate": 9.865130419948998e-05, + "loss": 2.9038, + "step": 35200 + }, + { + "epoch": 59.49, + "learning_rate": 9.864093102922299e-05, + "loss": 2.8948, + "step": 35220 + }, + { + "epoch": 59.53, + "learning_rate": 9.863051866983642e-05, + "loss": 2.901, + "step": 35240 + }, + { + "epoch": 59.56, + "learning_rate": 9.862006712971932e-05, + "loss": 2.9121, + "step": 35260 + }, + { + "epoch": 59.59, + "learning_rate": 9.860957641729233e-05, + "loss": 2.9218, + "step": 35280 + }, + { + "epoch": 59.63, + "learning_rate": 9.85990465410076e-05, + "loss": 2.9, + "step": 35300 + }, + { + "epoch": 59.66, + "learning_rate": 9.85884775093489e-05, + "loss": 2.8993, + "step": 35320 + }, + { + "epoch": 59.7, + "learning_rate": 9.857786933083146e-05, + "loss": 2.8981, + "step": 35340 + }, + { + "epoch": 59.73, + "learning_rate": 9.856722201400215e-05, + "loss": 2.9141, + "step": 35360 + }, + { + "epoch": 59.76, + "learning_rate": 9.855653556743927e-05, + "loss": 2.9163, + "step": 35380 + }, + { + "epoch": 59.8, + "learning_rate": 9.854580999975271e-05, + "loss": 2.9001, + "step": 35400 + }, + { + "epoch": 59.83, + "learning_rate": 9.85350453195839e-05, + "loss": 2.9094, + "step": 35420 + }, + { + "epoch": 59.86, + "learning_rate": 9.85242415356057e-05, + "loss": 2.9078, + "step": 35440 + }, + { + "epoch": 59.9, + "learning_rate": 9.851339865652259e-05, + "loss": 2.915, + "step": 35460 + }, + { + "epoch": 59.93, + "learning_rate": 9.85025166910704e-05, + "loss": 2.9259, + "step": 35480 + }, + { + "epoch": 59.97, + "learning_rate": 9.849159564801659e-05, + "loss": 2.9205, + "step": 35500 + }, + { + "epoch": 60.0, + "learning_rate": 9.848063553616003e-05, + "loss": 2.9246, + "step": 35520 + }, + { + "epoch": 60.03, + "learning_rate": 9.846963636433106e-05, + "loss": 2.8246, + "step": 35540 + }, + { + "epoch": 60.07, + "learning_rate": 9.845859814139157e-05, + "loss": 2.8608, + "step": 35560 + }, + { + "epoch": 60.1, + "learning_rate": 9.84475208762348e-05, + "loss": 2.8313, + "step": 35580 + }, + { + "epoch": 60.14, + "learning_rate": 9.843640457778554e-05, + "loss": 2.857, + "step": 35600 + }, + { + "epoch": 60.17, + "learning_rate": 9.842524925499999e-05, + "loss": 2.8624, + "step": 35620 + }, + { + "epoch": 60.2, + "learning_rate": 9.841405491686576e-05, + "loss": 2.8565, + "step": 35640 + }, + { + "epoch": 60.24, + "learning_rate": 9.840282157240194e-05, + "loss": 2.8457, + "step": 35660 + }, + { + "epoch": 60.27, + "learning_rate": 9.839154923065908e-05, + "loss": 2.8677, + "step": 35680 + }, + { + "epoch": 60.3, + "learning_rate": 9.838023790071903e-05, + "loss": 2.8682, + "step": 35700 + }, + { + "epoch": 60.34, + "learning_rate": 9.836888759169516e-05, + "loss": 2.8505, + "step": 35720 + }, + { + "epoch": 60.37, + "learning_rate": 9.83574983127322e-05, + "loss": 2.8816, + "step": 35740 + }, + { + "epoch": 60.41, + "learning_rate": 9.834607007300629e-05, + "loss": 2.8709, + "step": 35760 + }, + { + "epoch": 60.44, + "learning_rate": 9.833460288172495e-05, + "loss": 2.8759, + "step": 35780 + }, + { + "epoch": 60.47, + "learning_rate": 9.832309674812712e-05, + "loss": 2.8548, + "step": 35800 + }, + { + "epoch": 60.51, + "learning_rate": 9.831155168148305e-05, + "loss": 2.8642, + "step": 35820 + }, + { + "epoch": 60.54, + "learning_rate": 9.82999676910944e-05, + "loss": 2.886, + "step": 35840 + }, + { + "epoch": 60.57, + "learning_rate": 9.828834478629418e-05, + "loss": 2.8796, + "step": 35860 + }, + { + "epoch": 60.61, + "learning_rate": 9.827668297644675e-05, + "loss": 2.888, + "step": 35880 + }, + { + "epoch": 60.64, + "learning_rate": 9.826498227094784e-05, + "loss": 2.8884, + "step": 35900 + }, + { + "epoch": 60.68, + "learning_rate": 9.825324267922449e-05, + "loss": 2.9063, + "step": 35920 + }, + { + "epoch": 60.71, + "learning_rate": 9.824146421073506e-05, + "loss": 2.8783, + "step": 35940 + }, + { + "epoch": 60.74, + "learning_rate": 9.822964687496926e-05, + "loss": 2.8948, + "step": 35960 + }, + { + "epoch": 60.78, + "learning_rate": 9.82177906814481e-05, + "loss": 2.8993, + "step": 35980 + }, + { + "epoch": 60.81, + "learning_rate": 9.820589563972392e-05, + "loss": 2.8785, + "step": 36000 + }, + { + "epoch": 60.81, + "eval_loss": 2.9853713512420654, + "eval_runtime": 47.5856, + "eval_samples_per_second": 20.784, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.006577343696938163, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.026311128506210715, + "eval_tse_type": 7.208581726990321e-05, + "step": 36000 + }, + { + "epoch": 60.84, + "learning_rate": 9.819396175938032e-05, + "loss": 2.8898, + "step": 36020 + }, + { + "epoch": 60.88, + "learning_rate": 9.818198905003222e-05, + "loss": 2.8822, + "step": 36040 + }, + { + "epoch": 60.91, + "learning_rate": 9.816997752132582e-05, + "loss": 2.882, + "step": 36060 + }, + { + "epoch": 60.95, + "learning_rate": 9.815792718293857e-05, + "loss": 2.8989, + "step": 36080 + }, + { + "epoch": 60.98, + "learning_rate": 9.814583804457924e-05, + "loss": 2.9002, + "step": 36100 + }, + { + "epoch": 61.01, + "learning_rate": 9.81337101159878e-05, + "loss": 2.8762, + "step": 36120 + }, + { + "epoch": 61.05, + "learning_rate": 9.812154340693553e-05, + "loss": 2.8286, + "step": 36140 + }, + { + "epoch": 61.08, + "learning_rate": 9.810933792722492e-05, + "loss": 2.8041, + "step": 36160 + }, + { + "epoch": 61.11, + "learning_rate": 9.809709368668969e-05, + "loss": 2.8286, + "step": 36180 + }, + { + "epoch": 61.15, + "learning_rate": 9.808481069519482e-05, + "loss": 2.8265, + "step": 36200 + }, + { + "epoch": 61.18, + "learning_rate": 9.807248896263647e-05, + "loss": 2.8351, + "step": 36220 + }, + { + "epoch": 61.22, + "learning_rate": 9.806012849894208e-05, + "loss": 2.8225, + "step": 36240 + }, + { + "epoch": 61.25, + "learning_rate": 9.804772931407023e-05, + "loss": 2.8444, + "step": 36260 + }, + { + "epoch": 61.28, + "learning_rate": 9.803529141801071e-05, + "loss": 2.8221, + "step": 36280 + }, + { + "epoch": 61.32, + "learning_rate": 9.802281482078453e-05, + "loss": 2.8442, + "step": 36300 + }, + { + "epoch": 61.35, + "learning_rate": 9.801029953244383e-05, + "loss": 2.8469, + "step": 36320 + }, + { + "epoch": 61.39, + "learning_rate": 9.799774556307195e-05, + "loss": 2.8387, + "step": 36340 + }, + { + "epoch": 61.42, + "learning_rate": 9.798515292278344e-05, + "loss": 2.8459, + "step": 36360 + }, + { + "epoch": 61.45, + "learning_rate": 9.797252162172393e-05, + "loss": 2.8553, + "step": 36380 + }, + { + "epoch": 61.49, + "learning_rate": 9.795985167007023e-05, + "loss": 2.8497, + "step": 36400 + }, + { + "epoch": 61.52, + "learning_rate": 9.79471430780303e-05, + "loss": 2.8559, + "step": 36420 + }, + { + "epoch": 61.55, + "learning_rate": 9.793439585584324e-05, + "loss": 2.8482, + "step": 36440 + }, + { + "epoch": 61.59, + "learning_rate": 9.792161001377921e-05, + "loss": 2.8522, + "step": 36460 + }, + { + "epoch": 61.62, + "learning_rate": 9.790878556213957e-05, + "loss": 2.86, + "step": 36480 + }, + { + "epoch": 61.66, + "learning_rate": 9.789592251125674e-05, + "loss": 2.874, + "step": 36500 + }, + { + "epoch": 61.69, + "learning_rate": 9.788302087149426e-05, + "loss": 2.8562, + "step": 36520 + }, + { + "epoch": 61.72, + "learning_rate": 9.787008065324672e-05, + "loss": 2.8655, + "step": 36540 + }, + { + "epoch": 61.76, + "learning_rate": 9.785710186693983e-05, + "loss": 2.8679, + "step": 36560 + }, + { + "epoch": 61.79, + "learning_rate": 9.784408452303037e-05, + "loss": 2.8656, + "step": 36580 + }, + { + "epoch": 61.82, + "learning_rate": 9.78310286320062e-05, + "loss": 2.8638, + "step": 36600 + }, + { + "epoch": 61.86, + "learning_rate": 9.781793420438617e-05, + "loss": 2.8619, + "step": 36620 + }, + { + "epoch": 61.89, + "learning_rate": 9.780480125072026e-05, + "loss": 2.874, + "step": 36640 + }, + { + "epoch": 61.93, + "learning_rate": 9.779162978158944e-05, + "loss": 2.872, + "step": 36660 + }, + { + "epoch": 61.96, + "learning_rate": 9.777841980760571e-05, + "loss": 2.8837, + "step": 36680 + }, + { + "epoch": 61.99, + "learning_rate": 9.776517133941214e-05, + "loss": 2.8674, + "step": 36700 + }, + { + "epoch": 62.03, + "learning_rate": 9.775188438768276e-05, + "loss": 2.8009, + "step": 36720 + }, + { + "epoch": 62.06, + "learning_rate": 9.773855896312263e-05, + "loss": 2.7796, + "step": 36740 + }, + { + "epoch": 62.09, + "learning_rate": 9.77251950764678e-05, + "loss": 2.8039, + "step": 36760 + }, + { + "epoch": 62.13, + "learning_rate": 9.771179273848532e-05, + "loss": 2.8124, + "step": 36780 + }, + { + "epoch": 62.16, + "learning_rate": 9.769902491169436e-05, + "loss": 2.8154, + "step": 36800 + }, + { + "epoch": 62.2, + "learning_rate": 9.768554762470898e-05, + "loss": 2.8202, + "step": 36820 + }, + { + "epoch": 62.23, + "learning_rate": 9.767203191833918e-05, + "loss": 2.8323, + "step": 36840 + }, + { + "epoch": 62.26, + "learning_rate": 9.765847780347432e-05, + "loss": 2.8114, + "step": 36860 + }, + { + "epoch": 62.3, + "learning_rate": 9.764488529103471e-05, + "loss": 2.8178, + "step": 36880 + }, + { + "epoch": 62.33, + "learning_rate": 9.76312543919716e-05, + "loss": 2.8239, + "step": 36900 + }, + { + "epoch": 62.36, + "learning_rate": 9.761758511726715e-05, + "loss": 2.8225, + "step": 36920 + }, + { + "epoch": 62.4, + "learning_rate": 9.760387747793445e-05, + "loss": 2.8288, + "step": 36940 + }, + { + "epoch": 62.43, + "learning_rate": 9.759013148501747e-05, + "loss": 2.8147, + "step": 36960 + }, + { + "epoch": 62.47, + "learning_rate": 9.757634714959117e-05, + "loss": 2.8262, + "step": 36980 + }, + { + "epoch": 62.5, + "learning_rate": 9.756252448276127e-05, + "loss": 2.8296, + "step": 37000 + }, + { + "epoch": 62.5, + "eval_loss": 2.9655823707580566, + "eval_runtime": 50.843, + "eval_samples_per_second": 19.452, + "eval_steps_per_second": 0.118, + "eval_tse_ndup": 0.005690481399529718, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.030002900498550245, + "eval_tse_type": 0.00014043365913942254, + "step": 37000 + }, + { + "epoch": 62.53, + "learning_rate": 9.754866349566449e-05, + "loss": 2.8358, + "step": 37020 + }, + { + "epoch": 62.57, + "learning_rate": 9.753476419946837e-05, + "loss": 2.8293, + "step": 37040 + }, + { + "epoch": 62.6, + "learning_rate": 9.752082660537132e-05, + "loss": 2.8435, + "step": 37060 + }, + { + "epoch": 62.64, + "learning_rate": 9.750685072460259e-05, + "loss": 2.8594, + "step": 37080 + }, + { + "epoch": 62.67, + "learning_rate": 9.749283656842232e-05, + "loss": 2.828, + "step": 37100 + }, + { + "epoch": 62.7, + "learning_rate": 9.747878414812144e-05, + "loss": 2.8415, + "step": 37120 + }, + { + "epoch": 62.74, + "learning_rate": 9.746469347502174e-05, + "loss": 2.8504, + "step": 37140 + }, + { + "epoch": 62.77, + "learning_rate": 9.745056456047583e-05, + "loss": 2.8391, + "step": 37160 + }, + { + "epoch": 62.8, + "learning_rate": 9.74363974158671e-05, + "loss": 2.8424, + "step": 37180 + }, + { + "epoch": 62.84, + "learning_rate": 9.742219205260978e-05, + "loss": 2.8376, + "step": 37200 + }, + { + "epoch": 62.87, + "learning_rate": 9.74079484821489e-05, + "loss": 2.8369, + "step": 37220 + }, + { + "epoch": 62.91, + "learning_rate": 9.739366671596018e-05, + "loss": 2.8418, + "step": 37240 + }, + { + "epoch": 62.94, + "learning_rate": 9.737934676555024e-05, + "loss": 2.8364, + "step": 37260 + }, + { + "epoch": 62.97, + "learning_rate": 9.736498864245638e-05, + "loss": 2.8364, + "step": 37280 + }, + { + "epoch": 63.01, + "learning_rate": 9.735059235824669e-05, + "loss": 2.8414, + "step": 37300 + }, + { + "epoch": 63.04, + "learning_rate": 9.733615792451998e-05, + "loss": 2.7791, + "step": 37320 + }, + { + "epoch": 63.07, + "learning_rate": 9.732168535290583e-05, + "loss": 2.7735, + "step": 37340 + }, + { + "epoch": 63.11, + "learning_rate": 9.730717465506452e-05, + "loss": 2.7741, + "step": 37360 + }, + { + "epoch": 63.14, + "learning_rate": 9.729262584268707e-05, + "loss": 2.772, + "step": 37380 + }, + { + "epoch": 63.18, + "learning_rate": 9.727803892749518e-05, + "loss": 2.799, + "step": 37400 + }, + { + "epoch": 63.21, + "learning_rate": 9.726341392124127e-05, + "loss": 2.7911, + "step": 37420 + }, + { + "epoch": 63.24, + "learning_rate": 9.724875083570844e-05, + "loss": 2.7756, + "step": 37440 + }, + { + "epoch": 63.28, + "learning_rate": 9.723404968271049e-05, + "loss": 2.7889, + "step": 37460 + }, + { + "epoch": 63.31, + "learning_rate": 9.721931047409184e-05, + "loss": 2.8044, + "step": 37480 + }, + { + "epoch": 63.34, + "learning_rate": 9.720453322172764e-05, + "loss": 2.811, + "step": 37500 + }, + { + "epoch": 63.38, + "learning_rate": 9.718971793752363e-05, + "loss": 2.7998, + "step": 37520 + }, + { + "epoch": 63.41, + "learning_rate": 9.717486463341623e-05, + "loss": 2.7907, + "step": 37540 + }, + { + "epoch": 63.45, + "learning_rate": 9.715997332137248e-05, + "loss": 2.8197, + "step": 37560 + }, + { + "epoch": 63.48, + "learning_rate": 9.714504401339003e-05, + "loss": 2.7852, + "step": 37580 + }, + { + "epoch": 63.51, + "learning_rate": 9.713007672149716e-05, + "loss": 2.7928, + "step": 37600 + }, + { + "epoch": 63.55, + "learning_rate": 9.711507145775274e-05, + "loss": 2.7938, + "step": 37620 + }, + { + "epoch": 63.58, + "learning_rate": 9.710002823424626e-05, + "loss": 2.8138, + "step": 37640 + }, + { + "epoch": 63.61, + "learning_rate": 9.708494706309775e-05, + "loss": 2.8227, + "step": 37660 + }, + { + "epoch": 63.65, + "learning_rate": 9.706982795645784e-05, + "loss": 2.8256, + "step": 37680 + }, + { + "epoch": 63.68, + "learning_rate": 9.705467092650775e-05, + "loss": 2.8179, + "step": 37700 + }, + { + "epoch": 63.72, + "learning_rate": 9.70394759854592e-05, + "loss": 2.8439, + "step": 37720 + }, + { + "epoch": 63.75, + "learning_rate": 9.702424314555447e-05, + "loss": 2.8262, + "step": 37740 + }, + { + "epoch": 63.78, + "learning_rate": 9.700897241906642e-05, + "loss": 2.8342, + "step": 37760 + }, + { + "epoch": 63.82, + "learning_rate": 9.699366381829836e-05, + "loss": 2.8327, + "step": 37780 + }, + { + "epoch": 63.85, + "learning_rate": 9.697831735558417e-05, + "loss": 2.8321, + "step": 37800 + }, + { + "epoch": 63.89, + "learning_rate": 9.696293304328822e-05, + "loss": 2.8342, + "step": 37820 + }, + { + "epoch": 63.92, + "learning_rate": 9.694751089380536e-05, + "loss": 2.8243, + "step": 37840 + }, + { + "epoch": 63.95, + "learning_rate": 9.693205091956095e-05, + "loss": 2.8131, + "step": 37860 + }, + { + "epoch": 63.99, + "learning_rate": 9.691655313301082e-05, + "loss": 2.8244, + "step": 37880 + }, + { + "epoch": 64.02, + "learning_rate": 9.690101754664124e-05, + "loss": 2.777, + "step": 37900 + }, + { + "epoch": 64.05, + "learning_rate": 9.688544417296896e-05, + "loss": 2.7272, + "step": 37920 + }, + { + "epoch": 64.09, + "learning_rate": 9.686983302454116e-05, + "loss": 2.7529, + "step": 37940 + }, + { + "epoch": 64.12, + "learning_rate": 9.685418411393545e-05, + "loss": 2.7637, + "step": 37960 + }, + { + "epoch": 64.16, + "learning_rate": 9.68384974537599e-05, + "loss": 2.7561, + "step": 37980 + }, + { + "epoch": 64.19, + "learning_rate": 9.682277305665296e-05, + "loss": 2.7802, + "step": 38000 + }, + { + "epoch": 64.19, + "eval_loss": 2.940242052078247, + "eval_runtime": 47.6927, + "eval_samples_per_second": 20.737, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.004603012748291356, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.029666812137402826, + "eval_tse_type": 3.686383552409842e-05, + "step": 38000 + }, + { + "epoch": 64.22, + "learning_rate": 9.680701093528348e-05, + "loss": 2.7769, + "step": 38020 + }, + { + "epoch": 64.26, + "learning_rate": 9.679121110235072e-05, + "loss": 2.7844, + "step": 38040 + }, + { + "epoch": 64.29, + "learning_rate": 9.677537357058433e-05, + "loss": 2.7848, + "step": 38060 + }, + { + "epoch": 64.32, + "learning_rate": 9.675949835274434e-05, + "loss": 2.7617, + "step": 38080 + }, + { + "epoch": 64.36, + "learning_rate": 9.674358546162108e-05, + "loss": 2.79, + "step": 38100 + }, + { + "epoch": 64.39, + "learning_rate": 9.672763491003531e-05, + "loss": 2.7904, + "step": 38120 + }, + { + "epoch": 64.43, + "learning_rate": 9.67116467108381e-05, + "loss": 2.7851, + "step": 38140 + }, + { + "epoch": 64.46, + "learning_rate": 9.669562087691085e-05, + "loss": 2.7901, + "step": 38160 + }, + { + "epoch": 64.49, + "learning_rate": 9.667955742116528e-05, + "loss": 2.7962, + "step": 38180 + }, + { + "epoch": 64.53, + "learning_rate": 9.666345635654342e-05, + "loss": 2.7811, + "step": 38200 + }, + { + "epoch": 64.56, + "learning_rate": 9.664731769601763e-05, + "loss": 2.795, + "step": 38220 + }, + { + "epoch": 64.59, + "learning_rate": 9.663114145259053e-05, + "loss": 2.7813, + "step": 38240 + }, + { + "epoch": 64.63, + "learning_rate": 9.6614927639295e-05, + "loss": 2.8024, + "step": 38260 + }, + { + "epoch": 64.66, + "learning_rate": 9.659867626919425e-05, + "loss": 2.7834, + "step": 38280 + }, + { + "epoch": 64.7, + "learning_rate": 9.65823873553817e-05, + "loss": 2.79, + "step": 38300 + }, + { + "epoch": 64.73, + "learning_rate": 9.656606091098104e-05, + "loss": 2.8086, + "step": 38320 + }, + { + "epoch": 64.76, + "learning_rate": 9.65496969491462e-05, + "loss": 2.7913, + "step": 38340 + }, + { + "epoch": 64.8, + "learning_rate": 9.65332954830613e-05, + "loss": 2.7653, + "step": 38360 + }, + { + "epoch": 64.83, + "learning_rate": 9.651685652594072e-05, + "loss": 2.7919, + "step": 38380 + }, + { + "epoch": 64.86, + "learning_rate": 9.650038009102905e-05, + "loss": 2.822, + "step": 38400 + }, + { + "epoch": 64.9, + "learning_rate": 9.648386619160101e-05, + "loss": 2.8155, + "step": 38420 + }, + { + "epoch": 64.93, + "learning_rate": 9.64673148409616e-05, + "loss": 2.8159, + "step": 38440 + }, + { + "epoch": 64.97, + "learning_rate": 9.645072605244592e-05, + "loss": 2.7967, + "step": 38460 + }, + { + "epoch": 65.0, + "learning_rate": 9.643409983941925e-05, + "loss": 2.8201, + "step": 38480 + }, + { + "epoch": 65.03, + "learning_rate": 9.641743621527706e-05, + "loss": 2.7267, + "step": 38500 + }, + { + "epoch": 65.07, + "learning_rate": 9.640073519344489e-05, + "loss": 2.7323, + "step": 38520 + }, + { + "epoch": 65.1, + "learning_rate": 9.638399678737848e-05, + "loss": 2.7501, + "step": 38540 + }, + { + "epoch": 65.14, + "learning_rate": 9.636722101056366e-05, + "loss": 2.7479, + "step": 38560 + }, + { + "epoch": 65.17, + "learning_rate": 9.635040787651636e-05, + "loss": 2.7484, + "step": 38580 + }, + { + "epoch": 65.2, + "learning_rate": 9.633355739878262e-05, + "loss": 2.7523, + "step": 38600 + }, + { + "epoch": 65.24, + "learning_rate": 9.631666959093857e-05, + "loss": 2.7429, + "step": 38620 + }, + { + "epoch": 65.27, + "learning_rate": 9.62997444665904e-05, + "loss": 2.7495, + "step": 38640 + }, + { + "epoch": 65.3, + "learning_rate": 9.62827820393744e-05, + "loss": 2.7659, + "step": 38660 + }, + { + "epoch": 65.34, + "learning_rate": 9.626578232295689e-05, + "loss": 2.7523, + "step": 38680 + }, + { + "epoch": 65.37, + "learning_rate": 9.624874533103421e-05, + "loss": 2.7508, + "step": 38700 + }, + { + "epoch": 65.41, + "learning_rate": 9.623167107733275e-05, + "loss": 2.754, + "step": 38720 + }, + { + "epoch": 65.44, + "learning_rate": 9.621455957560898e-05, + "loss": 2.7731, + "step": 38740 + }, + { + "epoch": 65.47, + "learning_rate": 9.619741083964929e-05, + "loss": 2.7851, + "step": 38760 + }, + { + "epoch": 65.51, + "learning_rate": 9.618022488327009e-05, + "loss": 2.7552, + "step": 38780 + }, + { + "epoch": 65.54, + "learning_rate": 9.616300172031782e-05, + "loss": 2.7665, + "step": 38800 + }, + { + "epoch": 65.57, + "learning_rate": 9.614574136466888e-05, + "loss": 2.7752, + "step": 38820 + }, + { + "epoch": 65.61, + "learning_rate": 9.61284438302296e-05, + "loss": 2.7793, + "step": 38840 + }, + { + "epoch": 65.64, + "learning_rate": 9.611110913093633e-05, + "loss": 2.7602, + "step": 38860 + }, + { + "epoch": 65.68, + "learning_rate": 9.609460675538197e-05, + "loss": 2.7783, + "step": 38880 + }, + { + "epoch": 65.71, + "learning_rate": 9.607719962482106e-05, + "loss": 2.7763, + "step": 38900 + }, + { + "epoch": 65.74, + "learning_rate": 9.605975537069267e-05, + "loss": 2.788, + "step": 38920 + }, + { + "epoch": 65.78, + "learning_rate": 9.604227400705133e-05, + "loss": 2.7826, + "step": 38940 + }, + { + "epoch": 65.81, + "learning_rate": 9.602475554798141e-05, + "loss": 2.7788, + "step": 38960 + }, + { + "epoch": 65.84, + "learning_rate": 9.600720000759728e-05, + "loss": 2.7906, + "step": 38980 + }, + { + "epoch": 65.88, + "learning_rate": 9.598960740004308e-05, + "loss": 2.783, + "step": 39000 + }, + { + "epoch": 65.88, + "eval_loss": 2.9106831550598145, + "eval_runtime": 47.4521, + "eval_samples_per_second": 20.842, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.005352210245498537, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03049848284971481, + "eval_tse_type": 3.686383552409842e-05, + "step": 39000 + }, + { + "epoch": 65.91, + "learning_rate": 9.597197773949286e-05, + "loss": 2.7798, + "step": 39020 + }, + { + "epoch": 65.95, + "learning_rate": 9.595431104015055e-05, + "loss": 2.7667, + "step": 39040 + }, + { + "epoch": 65.98, + "learning_rate": 9.593660731624987e-05, + "loss": 2.7856, + "step": 39060 + }, + { + "epoch": 66.01, + "learning_rate": 9.591886658205438e-05, + "loss": 2.7445, + "step": 39080 + }, + { + "epoch": 66.05, + "learning_rate": 9.590108885185749e-05, + "loss": 2.718, + "step": 39100 + }, + { + "epoch": 66.08, + "learning_rate": 9.588327413998237e-05, + "loss": 2.708, + "step": 39120 + }, + { + "epoch": 66.11, + "learning_rate": 9.586542246078203e-05, + "loss": 2.7218, + "step": 39140 + }, + { + "epoch": 66.15, + "learning_rate": 9.584753382863924e-05, + "loss": 2.7182, + "step": 39160 + }, + { + "epoch": 66.18, + "learning_rate": 9.582960825796656e-05, + "loss": 2.7243, + "step": 39180 + }, + { + "epoch": 66.22, + "learning_rate": 9.581164576320629e-05, + "loss": 2.7027, + "step": 39200 + }, + { + "epoch": 66.25, + "learning_rate": 9.579364635883048e-05, + "loss": 2.7287, + "step": 39220 + }, + { + "epoch": 66.28, + "learning_rate": 9.577561005934093e-05, + "loss": 2.7371, + "step": 39240 + }, + { + "epoch": 66.32, + "learning_rate": 9.575753687926916e-05, + "loss": 2.7484, + "step": 39260 + }, + { + "epoch": 66.35, + "learning_rate": 9.573942683317641e-05, + "loss": 2.7426, + "step": 39280 + }, + { + "epoch": 66.39, + "learning_rate": 9.572127993565362e-05, + "loss": 2.734, + "step": 39300 + }, + { + "epoch": 66.42, + "learning_rate": 9.57030962013214e-05, + "loss": 2.7459, + "step": 39320 + }, + { + "epoch": 66.45, + "learning_rate": 9.568487564483008e-05, + "loss": 2.7317, + "step": 39340 + }, + { + "epoch": 66.49, + "learning_rate": 9.56666182808596e-05, + "loss": 2.7504, + "step": 39360 + }, + { + "epoch": 66.52, + "learning_rate": 9.564832412411964e-05, + "loss": 2.7567, + "step": 39380 + }, + { + "epoch": 66.55, + "learning_rate": 9.562999318934942e-05, + "loss": 2.7614, + "step": 39400 + }, + { + "epoch": 66.59, + "learning_rate": 9.561162549131788e-05, + "loss": 2.7433, + "step": 39420 + }, + { + "epoch": 66.62, + "learning_rate": 9.559322104482351e-05, + "loss": 2.7592, + "step": 39440 + }, + { + "epoch": 66.66, + "learning_rate": 9.557477986469445e-05, + "loss": 2.7705, + "step": 39460 + }, + { + "epoch": 66.69, + "learning_rate": 9.555630196578845e-05, + "loss": 2.7589, + "step": 39480 + }, + { + "epoch": 66.72, + "learning_rate": 9.553778736299279e-05, + "loss": 2.7704, + "step": 39500 + }, + { + "epoch": 66.76, + "learning_rate": 9.551923607122437e-05, + "loss": 2.7775, + "step": 39520 + }, + { + "epoch": 66.79, + "learning_rate": 9.550064810542962e-05, + "loss": 2.7555, + "step": 39540 + }, + { + "epoch": 66.82, + "learning_rate": 9.548202348058455e-05, + "loss": 2.7396, + "step": 39560 + }, + { + "epoch": 66.86, + "learning_rate": 9.546336221169464e-05, + "loss": 2.7634, + "step": 39580 + }, + { + "epoch": 66.89, + "learning_rate": 9.544466431379498e-05, + "loss": 2.759, + "step": 39600 + }, + { + "epoch": 66.93, + "learning_rate": 9.54259298019501e-05, + "loss": 2.756, + "step": 39620 + }, + { + "epoch": 66.96, + "learning_rate": 9.540715869125407e-05, + "loss": 2.7706, + "step": 39640 + }, + { + "epoch": 66.99, + "learning_rate": 9.538835099683044e-05, + "loss": 2.7784, + "step": 39660 + }, + { + "epoch": 67.03, + "learning_rate": 9.536950673383222e-05, + "loss": 2.7018, + "step": 39680 + }, + { + "epoch": 67.06, + "learning_rate": 9.53506259174419e-05, + "loss": 2.6842, + "step": 39700 + }, + { + "epoch": 67.09, + "learning_rate": 9.533170856287141e-05, + "loss": 2.6832, + "step": 39720 + }, + { + "epoch": 67.13, + "learning_rate": 9.531275468536211e-05, + "loss": 2.7082, + "step": 39740 + }, + { + "epoch": 67.16, + "learning_rate": 9.529376430018482e-05, + "loss": 2.7008, + "step": 39760 + }, + { + "epoch": 67.2, + "learning_rate": 9.527473742263973e-05, + "loss": 2.7047, + "step": 39780 + }, + { + "epoch": 67.23, + "learning_rate": 9.525567406805644e-05, + "loss": 2.6971, + "step": 39800 + }, + { + "epoch": 67.26, + "learning_rate": 9.523657425179399e-05, + "loss": 2.7205, + "step": 39820 + }, + { + "epoch": 67.3, + "learning_rate": 9.521743798924075e-05, + "loss": 2.7176, + "step": 39840 + }, + { + "epoch": 67.33, + "learning_rate": 9.519826529581442e-05, + "loss": 2.7268, + "step": 39860 + }, + { + "epoch": 67.36, + "learning_rate": 9.517905618696212e-05, + "loss": 2.7313, + "step": 39880 + }, + { + "epoch": 67.4, + "learning_rate": 9.51598106781603e-05, + "loss": 2.7348, + "step": 39900 + }, + { + "epoch": 67.43, + "learning_rate": 9.51405287849147e-05, + "loss": 2.7161, + "step": 39920 + }, + { + "epoch": 67.47, + "learning_rate": 9.512121052276037e-05, + "loss": 2.7369, + "step": 39940 + }, + { + "epoch": 67.5, + "learning_rate": 9.510185590726173e-05, + "loss": 2.728, + "step": 39960 + }, + { + "epoch": 67.53, + "learning_rate": 9.508246495401242e-05, + "loss": 2.711, + "step": 39980 + }, + { + "epoch": 67.57, + "learning_rate": 9.506303767863538e-05, + "loss": 2.7312, + "step": 40000 + }, + { + "epoch": 67.57, + "eval_loss": 2.898770809173584, + "eval_runtime": 49.5003, + "eval_samples_per_second": 19.98, + "eval_steps_per_second": 0.121, + "eval_tse_ndup": 0.006783209660764984, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.02881729831959224, + "eval_tse_type": 6.495056735198292e-05, + "step": 40000 + }, + { + "epoch": 67.6, + "learning_rate": 9.504357409678281e-05, + "loss": 2.7354, + "step": 40020 + }, + { + "epoch": 67.64, + "learning_rate": 9.50240742241362e-05, + "loss": 2.7403, + "step": 40040 + }, + { + "epoch": 67.67, + "learning_rate": 9.50045380764062e-05, + "loss": 2.7457, + "step": 40060 + }, + { + "epoch": 67.7, + "learning_rate": 9.498496566933274e-05, + "loss": 2.7379, + "step": 40080 + }, + { + "epoch": 67.74, + "learning_rate": 9.4965357018685e-05, + "loss": 2.7347, + "step": 40100 + }, + { + "epoch": 67.77, + "learning_rate": 9.494571214026126e-05, + "loss": 2.7433, + "step": 40120 + }, + { + "epoch": 67.8, + "learning_rate": 9.492603104988907e-05, + "loss": 2.7424, + "step": 40140 + }, + { + "epoch": 67.84, + "learning_rate": 9.490631376342513e-05, + "loss": 2.736, + "step": 40160 + }, + { + "epoch": 67.87, + "learning_rate": 9.48865602967553e-05, + "loss": 2.7634, + "step": 40180 + }, + { + "epoch": 67.91, + "learning_rate": 9.486677066579456e-05, + "loss": 2.7654, + "step": 40200 + }, + { + "epoch": 67.94, + "learning_rate": 9.484694488648711e-05, + "loss": 2.7387, + "step": 40220 + }, + { + "epoch": 67.97, + "learning_rate": 9.482708297480619e-05, + "loss": 2.7374, + "step": 40240 + }, + { + "epoch": 68.01, + "learning_rate": 9.480718494675419e-05, + "loss": 2.7223, + "step": 40260 + }, + { + "epoch": 68.04, + "learning_rate": 9.478725081836259e-05, + "loss": 2.669, + "step": 40280 + }, + { + "epoch": 68.07, + "learning_rate": 9.476728060569197e-05, + "loss": 2.6612, + "step": 40300 + }, + { + "epoch": 68.11, + "learning_rate": 9.474727432483197e-05, + "loss": 2.676, + "step": 40320 + }, + { + "epoch": 68.14, + "learning_rate": 9.472723199190125e-05, + "loss": 2.6894, + "step": 40340 + }, + { + "epoch": 68.18, + "learning_rate": 9.47071536230476e-05, + "loss": 2.688, + "step": 40360 + }, + { + "epoch": 68.21, + "learning_rate": 9.46870392344478e-05, + "loss": 2.6987, + "step": 40380 + }, + { + "epoch": 68.24, + "learning_rate": 9.466688884230761e-05, + "loss": 2.6964, + "step": 40400 + }, + { + "epoch": 68.28, + "learning_rate": 9.464670246286187e-05, + "loss": 2.6938, + "step": 40420 + }, + { + "epoch": 68.31, + "learning_rate": 9.462648011237439e-05, + "loss": 2.7083, + "step": 40440 + }, + { + "epoch": 68.34, + "learning_rate": 9.460622180713789e-05, + "loss": 2.7013, + "step": 40460 + }, + { + "epoch": 68.38, + "learning_rate": 9.458592756347419e-05, + "loss": 2.7006, + "step": 40480 + }, + { + "epoch": 68.41, + "learning_rate": 9.456559739773398e-05, + "loss": 2.7108, + "step": 40500 + }, + { + "epoch": 68.45, + "learning_rate": 9.454523132629689e-05, + "loss": 2.6886, + "step": 40520 + }, + { + "epoch": 68.48, + "learning_rate": 9.45248293655715e-05, + "loss": 2.708, + "step": 40540 + }, + { + "epoch": 68.51, + "learning_rate": 9.450439153199532e-05, + "loss": 2.719, + "step": 40560 + }, + { + "epoch": 68.55, + "learning_rate": 9.448391784203473e-05, + "loss": 2.7169, + "step": 40580 + }, + { + "epoch": 68.58, + "learning_rate": 9.446340831218499e-05, + "loss": 2.7159, + "step": 40600 + }, + { + "epoch": 68.61, + "learning_rate": 9.444286295897028e-05, + "loss": 2.7166, + "step": 40620 + }, + { + "epoch": 68.65, + "learning_rate": 9.442228179894362e-05, + "loss": 2.7184, + "step": 40640 + }, + { + "epoch": 68.68, + "learning_rate": 9.44016648486869e-05, + "loss": 2.7184, + "step": 40660 + }, + { + "epoch": 68.72, + "learning_rate": 9.438101212481076e-05, + "loss": 2.7079, + "step": 40680 + }, + { + "epoch": 68.75, + "learning_rate": 9.43603236439548e-05, + "loss": 2.7232, + "step": 40700 + }, + { + "epoch": 68.78, + "learning_rate": 9.433959942278732e-05, + "loss": 2.733, + "step": 40720 + }, + { + "epoch": 68.82, + "learning_rate": 9.431883947800543e-05, + "loss": 2.718, + "step": 40740 + }, + { + "epoch": 68.85, + "learning_rate": 9.42980438263351e-05, + "loss": 2.7328, + "step": 40760 + }, + { + "epoch": 68.89, + "learning_rate": 9.427721248453097e-05, + "loss": 2.7245, + "step": 40780 + }, + { + "epoch": 68.92, + "learning_rate": 9.425634546937647e-05, + "loss": 2.7316, + "step": 40800 + }, + { + "epoch": 68.95, + "learning_rate": 9.42354427976838e-05, + "loss": 2.7356, + "step": 40820 + }, + { + "epoch": 68.99, + "learning_rate": 9.421450448629385e-05, + "loss": 2.7379, + "step": 40840 + }, + { + "epoch": 69.02, + "learning_rate": 9.419353055207626e-05, + "loss": 2.6732, + "step": 40860 + }, + { + "epoch": 69.05, + "learning_rate": 9.41735723343163e-05, + "loss": 2.6443, + "step": 40880 + }, + { + "epoch": 69.09, + "learning_rate": 9.41525289842147e-05, + "loss": 2.6368, + "step": 40900 + }, + { + "epoch": 69.12, + "learning_rate": 9.413145006121797e-05, + "loss": 2.6626, + "step": 40920 + }, + { + "epoch": 69.16, + "learning_rate": 9.411033558230904e-05, + "loss": 2.6547, + "step": 40940 + }, + { + "epoch": 69.19, + "learning_rate": 9.408918556449948e-05, + "loss": 2.6757, + "step": 40960 + }, + { + "epoch": 69.22, + "learning_rate": 9.406800002482944e-05, + "loss": 2.6627, + "step": 40980 + }, + { + "epoch": 69.26, + "learning_rate": 9.404677898036776e-05, + "loss": 2.6766, + "step": 41000 + }, + { + "epoch": 69.26, + "eval_loss": 2.8736915588378906, + "eval_runtime": 47.2829, + "eval_samples_per_second": 20.917, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.005726045950618904, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.031480681739473576, + "eval_tse_type": 8.250477474441074e-05, + "step": 41000 + }, + { + "epoch": 69.29, + "learning_rate": 9.402552244821182e-05, + "loss": 2.6666, + "step": 41020 + }, + { + "epoch": 69.32, + "learning_rate": 9.400423044548763e-05, + "loss": 2.6806, + "step": 41040 + }, + { + "epoch": 69.36, + "learning_rate": 9.398290298934979e-05, + "loss": 2.6727, + "step": 41060 + }, + { + "epoch": 69.39, + "learning_rate": 9.396154009698147e-05, + "loss": 2.6937, + "step": 41080 + }, + { + "epoch": 69.43, + "learning_rate": 9.394014178559429e-05, + "loss": 2.6921, + "step": 41100 + }, + { + "epoch": 69.46, + "learning_rate": 9.391870807242855e-05, + "loss": 2.7106, + "step": 41120 + }, + { + "epoch": 69.49, + "learning_rate": 9.389723897475298e-05, + "loss": 2.6732, + "step": 41140 + }, + { + "epoch": 69.53, + "learning_rate": 9.387573450986484e-05, + "loss": 2.7063, + "step": 41160 + }, + { + "epoch": 69.56, + "learning_rate": 9.385419469508991e-05, + "loss": 2.7094, + "step": 41180 + }, + { + "epoch": 69.59, + "learning_rate": 9.383261954778241e-05, + "loss": 2.6941, + "step": 41200 + }, + { + "epoch": 69.63, + "learning_rate": 9.381100908532505e-05, + "loss": 2.6923, + "step": 41220 + }, + { + "epoch": 69.66, + "learning_rate": 9.3789363325129e-05, + "loss": 2.7028, + "step": 41240 + }, + { + "epoch": 69.7, + "learning_rate": 9.376768228463385e-05, + "loss": 2.7205, + "step": 41260 + }, + { + "epoch": 69.73, + "learning_rate": 9.37459659813076e-05, + "loss": 2.709, + "step": 41280 + }, + { + "epoch": 69.76, + "learning_rate": 9.372421443264671e-05, + "loss": 2.7197, + "step": 41300 + }, + { + "epoch": 69.8, + "learning_rate": 9.370242765617603e-05, + "loss": 2.715, + "step": 41320 + }, + { + "epoch": 69.83, + "learning_rate": 9.368060566944874e-05, + "loss": 2.7041, + "step": 41340 + }, + { + "epoch": 69.86, + "learning_rate": 9.365874849004641e-05, + "loss": 2.7144, + "step": 41360 + }, + { + "epoch": 69.9, + "learning_rate": 9.363685613557901e-05, + "loss": 2.7231, + "step": 41380 + }, + { + "epoch": 69.93, + "learning_rate": 9.36149286236848e-05, + "loss": 2.6967, + "step": 41400 + }, + { + "epoch": 69.97, + "learning_rate": 9.359296597203037e-05, + "loss": 2.714, + "step": 41420 + }, + { + "epoch": 70.0, + "learning_rate": 9.357096819831064e-05, + "loss": 2.7044, + "step": 41440 + }, + { + "epoch": 70.03, + "learning_rate": 9.354893532024882e-05, + "loss": 2.6324, + "step": 41460 + }, + { + "epoch": 70.07, + "learning_rate": 9.35268673555964e-05, + "loss": 2.6222, + "step": 41480 + }, + { + "epoch": 70.1, + "learning_rate": 9.350476432213315e-05, + "loss": 2.6242, + "step": 41500 + }, + { + "epoch": 70.14, + "learning_rate": 9.348262623766705e-05, + "loss": 2.657, + "step": 41520 + }, + { + "epoch": 70.17, + "learning_rate": 9.346045312003442e-05, + "loss": 2.6573, + "step": 41540 + }, + { + "epoch": 70.2, + "learning_rate": 9.343824498709968e-05, + "loss": 2.6512, + "step": 41560 + }, + { + "epoch": 70.24, + "learning_rate": 9.341600185675554e-05, + "loss": 2.6473, + "step": 41580 + }, + { + "epoch": 70.27, + "learning_rate": 9.33937237469229e-05, + "loss": 2.6493, + "step": 41600 + }, + { + "epoch": 70.3, + "learning_rate": 9.337141067555081e-05, + "loss": 2.6827, + "step": 41620 + }, + { + "epoch": 70.34, + "learning_rate": 9.334906266061654e-05, + "loss": 2.6617, + "step": 41640 + }, + { + "epoch": 70.37, + "learning_rate": 9.332667972012543e-05, + "loss": 2.669, + "step": 41660 + }, + { + "epoch": 70.41, + "learning_rate": 9.330426187211107e-05, + "loss": 2.6711, + "step": 41680 + }, + { + "epoch": 70.44, + "learning_rate": 9.328180913463508e-05, + "loss": 2.6882, + "step": 41700 + }, + { + "epoch": 70.47, + "learning_rate": 9.325932152578725e-05, + "loss": 2.6664, + "step": 41720 + }, + { + "epoch": 70.51, + "learning_rate": 9.323679906368539e-05, + "loss": 2.6635, + "step": 41740 + }, + { + "epoch": 70.54, + "learning_rate": 9.321424176647551e-05, + "loss": 2.685, + "step": 41760 + }, + { + "epoch": 70.57, + "learning_rate": 9.319164965233156e-05, + "loss": 2.6828, + "step": 41780 + }, + { + "epoch": 70.61, + "learning_rate": 9.316902273945562e-05, + "loss": 2.6887, + "step": 41800 + }, + { + "epoch": 70.64, + "learning_rate": 9.314636104607779e-05, + "loss": 2.7013, + "step": 41820 + }, + { + "epoch": 70.68, + "learning_rate": 9.312366459045618e-05, + "loss": 2.6802, + "step": 41840 + }, + { + "epoch": 70.71, + "learning_rate": 9.31009333908769e-05, + "loss": 2.6761, + "step": 41860 + }, + { + "epoch": 70.74, + "learning_rate": 9.307816746565412e-05, + "loss": 2.6879, + "step": 41880 + }, + { + "epoch": 70.78, + "learning_rate": 9.305536683312988e-05, + "loss": 2.6834, + "step": 41900 + }, + { + "epoch": 70.81, + "learning_rate": 9.303253151167426e-05, + "loss": 2.695, + "step": 41920 + }, + { + "epoch": 70.84, + "learning_rate": 9.300966151968525e-05, + "loss": 2.6889, + "step": 41940 + }, + { + "epoch": 70.88, + "learning_rate": 9.298675687558881e-05, + "loss": 2.68, + "step": 41960 + }, + { + "epoch": 70.91, + "learning_rate": 9.296381759783878e-05, + "loss": 2.6942, + "step": 41980 + }, + { + "epoch": 70.95, + "learning_rate": 9.294084370491694e-05, + "loss": 2.7006, + "step": 42000 + }, + { + "epoch": 70.95, + "eval_loss": 2.8601796627044678, + "eval_runtime": 47.4112, + "eval_samples_per_second": 20.86, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.00567902143861135, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.033240720621940445, + "eval_tse_type": 8.60156162228963e-05, + "step": 42000 + }, + { + "epoch": 70.98, + "learning_rate": 9.291783521533294e-05, + "loss": 2.7007, + "step": 42020 + }, + { + "epoch": 71.01, + "learning_rate": 9.289479214762429e-05, + "loss": 2.671, + "step": 42040 + }, + { + "epoch": 71.05, + "learning_rate": 9.287171452035637e-05, + "loss": 2.6049, + "step": 42060 + }, + { + "epoch": 71.08, + "learning_rate": 9.284860235212242e-05, + "loss": 2.6099, + "step": 42080 + }, + { + "epoch": 71.11, + "learning_rate": 9.28254556615435e-05, + "loss": 2.639, + "step": 42100 + }, + { + "epoch": 71.15, + "learning_rate": 9.280227446726846e-05, + "loss": 2.6199, + "step": 42120 + }, + { + "epoch": 71.18, + "learning_rate": 9.2779058787974e-05, + "loss": 2.6298, + "step": 42140 + }, + { + "epoch": 71.22, + "learning_rate": 9.275580864236455e-05, + "loss": 2.64, + "step": 42160 + }, + { + "epoch": 71.25, + "learning_rate": 9.273252404917235e-05, + "loss": 2.6336, + "step": 42180 + }, + { + "epoch": 71.28, + "learning_rate": 9.270920502715736e-05, + "loss": 2.6368, + "step": 42200 + }, + { + "epoch": 71.32, + "learning_rate": 9.26858515951073e-05, + "loss": 2.6412, + "step": 42220 + }, + { + "epoch": 71.35, + "learning_rate": 9.26624637718376e-05, + "loss": 2.6558, + "step": 42240 + }, + { + "epoch": 71.39, + "learning_rate": 9.263904157619142e-05, + "loss": 2.6413, + "step": 42260 + }, + { + "epoch": 71.42, + "learning_rate": 9.26155850270396e-05, + "loss": 2.6505, + "step": 42280 + }, + { + "epoch": 71.45, + "learning_rate": 9.259209414328065e-05, + "loss": 2.6567, + "step": 42300 + }, + { + "epoch": 71.49, + "learning_rate": 9.256856894384076e-05, + "loss": 2.6604, + "step": 42320 + }, + { + "epoch": 71.52, + "learning_rate": 9.254500944767374e-05, + "loss": 2.6572, + "step": 42340 + }, + { + "epoch": 71.55, + "learning_rate": 9.252141567376107e-05, + "loss": 2.6808, + "step": 42360 + }, + { + "epoch": 71.59, + "learning_rate": 9.249778764111182e-05, + "loss": 2.6744, + "step": 42380 + }, + { + "epoch": 71.62, + "learning_rate": 9.247412536876268e-05, + "loss": 2.6585, + "step": 42400 + }, + { + "epoch": 71.66, + "learning_rate": 9.245042887577788e-05, + "loss": 2.679, + "step": 42420 + }, + { + "epoch": 71.69, + "learning_rate": 9.24266981812493e-05, + "loss": 2.6544, + "step": 42440 + }, + { + "epoch": 71.72, + "learning_rate": 9.240293330429633e-05, + "loss": 2.6672, + "step": 42460 + }, + { + "epoch": 71.76, + "learning_rate": 9.237913426406585e-05, + "loss": 2.6821, + "step": 42480 + }, + { + "epoch": 71.79, + "learning_rate": 9.235530107973237e-05, + "loss": 2.6669, + "step": 42500 + }, + { + "epoch": 71.82, + "learning_rate": 9.233143377049784e-05, + "loss": 2.6678, + "step": 42520 + }, + { + "epoch": 71.86, + "learning_rate": 9.23075323555917e-05, + "loss": 2.6784, + "step": 42540 + }, + { + "epoch": 71.89, + "learning_rate": 9.228359685427095e-05, + "loss": 2.6748, + "step": 42560 + }, + { + "epoch": 71.93, + "learning_rate": 9.225962728581991e-05, + "loss": 2.6953, + "step": 42580 + }, + { + "epoch": 71.96, + "learning_rate": 9.223562366955048e-05, + "loss": 2.6793, + "step": 42600 + }, + { + "epoch": 71.99, + "learning_rate": 9.221158602480193e-05, + "loss": 2.6778, + "step": 42620 + }, + { + "epoch": 72.03, + "learning_rate": 9.218751437094094e-05, + "loss": 2.6099, + "step": 42640 + }, + { + "epoch": 72.06, + "learning_rate": 9.216340872736163e-05, + "loss": 2.5965, + "step": 42660 + }, + { + "epoch": 72.09, + "learning_rate": 9.213926911348548e-05, + "loss": 2.5877, + "step": 42680 + }, + { + "epoch": 72.13, + "learning_rate": 9.21150955487613e-05, + "loss": 2.6262, + "step": 42700 + }, + { + "epoch": 72.16, + "learning_rate": 9.209088805266535e-05, + "loss": 2.6174, + "step": 42720 + }, + { + "epoch": 72.2, + "learning_rate": 9.206664664470115e-05, + "loss": 2.6064, + "step": 42740 + }, + { + "epoch": 72.23, + "learning_rate": 9.204237134439955e-05, + "loss": 2.6175, + "step": 42760 + }, + { + "epoch": 72.26, + "learning_rate": 9.201806217131874e-05, + "loss": 2.6192, + "step": 42780 + }, + { + "epoch": 72.3, + "learning_rate": 9.199371914504417e-05, + "loss": 2.6337, + "step": 42800 + }, + { + "epoch": 72.33, + "learning_rate": 9.196934228518858e-05, + "loss": 2.6214, + "step": 42820 + }, + { + "epoch": 72.36, + "learning_rate": 9.194493161139199e-05, + "loss": 2.6292, + "step": 42840 + }, + { + "epoch": 72.4, + "learning_rate": 9.192048714332161e-05, + "loss": 2.6487, + "step": 42860 + }, + { + "epoch": 72.43, + "learning_rate": 9.189600890067191e-05, + "loss": 2.6624, + "step": 42880 + }, + { + "epoch": 72.47, + "learning_rate": 9.187149690316457e-05, + "loss": 2.6281, + "step": 42900 + }, + { + "epoch": 72.5, + "learning_rate": 9.184695117054847e-05, + "loss": 2.6411, + "step": 42920 + }, + { + "epoch": 72.53, + "learning_rate": 9.182237172259964e-05, + "loss": 2.6444, + "step": 42940 + }, + { + "epoch": 72.57, + "learning_rate": 9.179775857912134e-05, + "loss": 2.6573, + "step": 42960 + }, + { + "epoch": 72.6, + "learning_rate": 9.17731117599439e-05, + "loss": 2.6441, + "step": 42980 + }, + { + "epoch": 72.64, + "learning_rate": 9.174843128492482e-05, + "loss": 2.6471, + "step": 43000 + }, + { + "epoch": 72.64, + "eval_loss": 2.8353114128112793, + "eval_runtime": 47.3722, + "eval_samples_per_second": 20.877, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.005513313223234023, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03286320973778243, + "eval_tse_type": 2.808673182788451e-05, + "step": 43000 + }, + { + "epoch": 72.67, + "learning_rate": 9.172371717394873e-05, + "loss": 2.6493, + "step": 43020 + }, + { + "epoch": 72.7, + "learning_rate": 9.169896944692733e-05, + "loss": 2.6458, + "step": 43040 + }, + { + "epoch": 72.74, + "learning_rate": 9.167418812379942e-05, + "loss": 2.6771, + "step": 43060 + }, + { + "epoch": 72.77, + "learning_rate": 9.165061476661911e-05, + "loss": 2.65, + "step": 43080 + }, + { + "epoch": 72.8, + "learning_rate": 9.162576798853497e-05, + "loss": 2.6617, + "step": 43100 + }, + { + "epoch": 72.84, + "learning_rate": 9.16008876733214e-05, + "loss": 2.6509, + "step": 43120 + }, + { + "epoch": 72.87, + "learning_rate": 9.157597384102406e-05, + "loss": 2.6666, + "step": 43140 + }, + { + "epoch": 72.91, + "learning_rate": 9.155102651171552e-05, + "loss": 2.6678, + "step": 43160 + }, + { + "epoch": 72.94, + "learning_rate": 9.152604570549544e-05, + "loss": 2.663, + "step": 43180 + }, + { + "epoch": 72.97, + "learning_rate": 9.150103144249037e-05, + "loss": 2.6616, + "step": 43200 + }, + { + "epoch": 73.01, + "learning_rate": 9.147598374285387e-05, + "loss": 2.6446, + "step": 43220 + }, + { + "epoch": 73.04, + "learning_rate": 9.145090262676642e-05, + "loss": 2.5908, + "step": 43240 + }, + { + "epoch": 73.07, + "learning_rate": 9.142578811443542e-05, + "loss": 2.5871, + "step": 43260 + }, + { + "epoch": 73.11, + "learning_rate": 9.140064022609517e-05, + "loss": 2.5963, + "step": 43280 + }, + { + "epoch": 73.14, + "learning_rate": 9.137545898200687e-05, + "loss": 2.6026, + "step": 43300 + }, + { + "epoch": 73.18, + "learning_rate": 9.135024440245861e-05, + "loss": 2.5953, + "step": 43320 + }, + { + "epoch": 73.21, + "learning_rate": 9.13249965077653e-05, + "loss": 2.6091, + "step": 43340 + }, + { + "epoch": 73.24, + "learning_rate": 9.129971531826872e-05, + "loss": 2.5937, + "step": 43360 + }, + { + "epoch": 73.28, + "learning_rate": 9.12744008543375e-05, + "loss": 2.6179, + "step": 43380 + }, + { + "epoch": 73.31, + "learning_rate": 9.124905313636698e-05, + "loss": 2.6367, + "step": 43400 + }, + { + "epoch": 73.34, + "learning_rate": 9.122367218477941e-05, + "loss": 2.6228, + "step": 43420 + }, + { + "epoch": 73.38, + "learning_rate": 9.119825802002375e-05, + "loss": 2.6115, + "step": 43440 + }, + { + "epoch": 73.41, + "learning_rate": 9.117281066257574e-05, + "loss": 2.618, + "step": 43460 + }, + { + "epoch": 73.45, + "learning_rate": 9.114733013293783e-05, + "loss": 2.6302, + "step": 43480 + }, + { + "epoch": 73.48, + "learning_rate": 9.112181645163926e-05, + "loss": 2.6248, + "step": 43500 + }, + { + "epoch": 73.51, + "learning_rate": 9.109626963923592e-05, + "loss": 2.6102, + "step": 43520 + }, + { + "epoch": 73.55, + "learning_rate": 9.107068971631043e-05, + "loss": 2.635, + "step": 43540 + }, + { + "epoch": 73.58, + "learning_rate": 9.104507670347204e-05, + "loss": 2.6123, + "step": 43560 + }, + { + "epoch": 73.61, + "learning_rate": 9.101943062135672e-05, + "loss": 2.6225, + "step": 43580 + }, + { + "epoch": 73.65, + "learning_rate": 9.099375149062705e-05, + "loss": 2.6396, + "step": 43600 + }, + { + "epoch": 73.68, + "learning_rate": 9.096803933197225e-05, + "loss": 2.6564, + "step": 43620 + }, + { + "epoch": 73.72, + "learning_rate": 9.094229416610811e-05, + "loss": 2.6402, + "step": 43640 + }, + { + "epoch": 73.75, + "learning_rate": 9.091651601377709e-05, + "loss": 2.6332, + "step": 43660 + }, + { + "epoch": 73.78, + "learning_rate": 9.089070489574814e-05, + "loss": 2.6279, + "step": 43680 + }, + { + "epoch": 73.82, + "learning_rate": 9.086486083281683e-05, + "loss": 2.6469, + "step": 43700 + }, + { + "epoch": 73.85, + "learning_rate": 9.083898384580527e-05, + "loss": 2.6482, + "step": 43720 + }, + { + "epoch": 73.89, + "learning_rate": 9.081307395556206e-05, + "loss": 2.6354, + "step": 43740 + }, + { + "epoch": 73.92, + "learning_rate": 9.078713118296234e-05, + "loss": 2.6315, + "step": 43760 + }, + { + "epoch": 73.95, + "learning_rate": 9.076115554890772e-05, + "loss": 2.6416, + "step": 43780 + }, + { + "epoch": 73.99, + "learning_rate": 9.073514707432631e-05, + "loss": 2.6603, + "step": 43800 + }, + { + "epoch": 74.02, + "learning_rate": 9.070910578017268e-05, + "loss": 2.5905, + "step": 43820 + }, + { + "epoch": 74.05, + "learning_rate": 9.06830316874278e-05, + "loss": 2.5618, + "step": 43840 + }, + { + "epoch": 74.09, + "learning_rate": 9.065692481709913e-05, + "loss": 2.546, + "step": 43860 + }, + { + "epoch": 74.12, + "learning_rate": 9.063078519022048e-05, + "loss": 2.5966, + "step": 43880 + }, + { + "epoch": 74.16, + "learning_rate": 9.060461282785209e-05, + "loss": 2.5812, + "step": 43900 + }, + { + "epoch": 74.19, + "learning_rate": 9.057840775108053e-05, + "loss": 2.5973, + "step": 43920 + }, + { + "epoch": 74.22, + "learning_rate": 9.055216998101879e-05, + "loss": 2.5783, + "step": 43940 + }, + { + "epoch": 74.26, + "learning_rate": 9.052589953880617e-05, + "loss": 2.601, + "step": 43960 + }, + { + "epoch": 74.29, + "learning_rate": 9.049959644560826e-05, + "loss": 2.5945, + "step": 43980 + }, + { + "epoch": 74.32, + "learning_rate": 9.047326072261701e-05, + "loss": 2.5965, + "step": 44000 + }, + { + "epoch": 74.32, + "eval_loss": 2.825916290283203, + "eval_runtime": 47.2292, + "eval_samples_per_second": 20.94, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.004614444174740817, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.032045822172236636, + "eval_tse_type": 0.00011954650185393913, + "step": 44000 + }, + { + "epoch": 74.36, + "learning_rate": 9.044689239105063e-05, + "loss": 2.6169, + "step": 44020 + }, + { + "epoch": 74.39, + "learning_rate": 9.042049147215362e-05, + "loss": 2.6112, + "step": 44040 + }, + { + "epoch": 74.43, + "learning_rate": 9.039405798719668e-05, + "loss": 2.6076, + "step": 44060 + }, + { + "epoch": 74.46, + "learning_rate": 9.036759195747686e-05, + "loss": 2.6172, + "step": 44080 + }, + { + "epoch": 74.49, + "learning_rate": 9.034109340431732e-05, + "loss": 2.6157, + "step": 44100 + }, + { + "epoch": 74.53, + "learning_rate": 9.031456234906746e-05, + "loss": 2.6086, + "step": 44120 + }, + { + "epoch": 74.56, + "learning_rate": 9.028799881310289e-05, + "loss": 2.6188, + "step": 44140 + }, + { + "epoch": 74.59, + "learning_rate": 9.026140281782536e-05, + "loss": 2.6167, + "step": 44160 + }, + { + "epoch": 74.63, + "learning_rate": 9.023477438466279e-05, + "loss": 2.6197, + "step": 44180 + }, + { + "epoch": 74.66, + "learning_rate": 9.020811353506923e-05, + "loss": 2.5992, + "step": 44200 + }, + { + "epoch": 74.7, + "learning_rate": 9.018142029052483e-05, + "loss": 2.6265, + "step": 44220 + }, + { + "epoch": 74.73, + "learning_rate": 9.015469467253588e-05, + "loss": 2.6258, + "step": 44240 + }, + { + "epoch": 74.76, + "learning_rate": 9.012793670263471e-05, + "loss": 2.6088, + "step": 44260 + }, + { + "epoch": 74.8, + "learning_rate": 9.010114640237972e-05, + "loss": 2.6176, + "step": 44280 + }, + { + "epoch": 74.83, + "learning_rate": 9.007432379335541e-05, + "loss": 2.6264, + "step": 44300 + }, + { + "epoch": 74.86, + "learning_rate": 9.004746889717224e-05, + "loss": 2.6386, + "step": 44320 + }, + { + "epoch": 74.9, + "learning_rate": 9.002058173546672e-05, + "loss": 2.6379, + "step": 44340 + }, + { + "epoch": 74.93, + "learning_rate": 8.999366232990136e-05, + "loss": 2.6381, + "step": 44360 + }, + { + "epoch": 74.97, + "learning_rate": 8.996671070216463e-05, + "loss": 2.624, + "step": 44380 + }, + { + "epoch": 75.0, + "learning_rate": 8.993972687397096e-05, + "loss": 2.6408, + "step": 44400 + }, + { + "epoch": 75.03, + "learning_rate": 8.991271086706076e-05, + "loss": 2.5559, + "step": 44420 + }, + { + "epoch": 75.07, + "learning_rate": 8.988566270320032e-05, + "loss": 2.5509, + "step": 44440 + }, + { + "epoch": 75.1, + "learning_rate": 8.985858240418187e-05, + "loss": 2.5518, + "step": 44460 + }, + { + "epoch": 75.14, + "learning_rate": 8.98314699918235e-05, + "loss": 2.5798, + "step": 44480 + }, + { + "epoch": 75.17, + "learning_rate": 8.98043254879692e-05, + "loss": 2.5557, + "step": 44500 + }, + { + "epoch": 75.2, + "learning_rate": 8.977714891448882e-05, + "loss": 2.584, + "step": 44520 + }, + { + "epoch": 75.24, + "learning_rate": 8.974994029327801e-05, + "loss": 2.5792, + "step": 44540 + }, + { + "epoch": 75.27, + "learning_rate": 8.972269964625828e-05, + "loss": 2.5928, + "step": 44560 + }, + { + "epoch": 75.3, + "learning_rate": 8.969542699537692e-05, + "loss": 2.5888, + "step": 44580 + }, + { + "epoch": 75.34, + "learning_rate": 8.966812236260701e-05, + "loss": 2.5817, + "step": 44600 + }, + { + "epoch": 75.37, + "learning_rate": 8.964078576994742e-05, + "loss": 2.5884, + "step": 44620 + }, + { + "epoch": 75.41, + "learning_rate": 8.961341723942271e-05, + "loss": 2.5919, + "step": 44640 + }, + { + "epoch": 75.44, + "learning_rate": 8.958601679308325e-05, + "loss": 2.5928, + "step": 44660 + }, + { + "epoch": 75.47, + "learning_rate": 8.955858445300506e-05, + "loss": 2.5757, + "step": 44680 + }, + { + "epoch": 75.51, + "learning_rate": 8.953112024128991e-05, + "loss": 2.5889, + "step": 44700 + }, + { + "epoch": 75.54, + "learning_rate": 8.950362418006518e-05, + "loss": 2.6046, + "step": 44720 + }, + { + "epoch": 75.57, + "learning_rate": 8.947609629148398e-05, + "loss": 2.5789, + "step": 44740 + }, + { + "epoch": 75.61, + "learning_rate": 8.944853659772501e-05, + "loss": 2.6029, + "step": 44760 + }, + { + "epoch": 75.64, + "learning_rate": 8.942094512099264e-05, + "loss": 2.6039, + "step": 44780 + }, + { + "epoch": 75.68, + "learning_rate": 8.93933218835168e-05, + "loss": 2.6185, + "step": 44800 + }, + { + "epoch": 75.71, + "learning_rate": 8.936566690755308e-05, + "loss": 2.6077, + "step": 44820 + }, + { + "epoch": 75.74, + "learning_rate": 8.933798021538254e-05, + "loss": 2.6162, + "step": 44840 + }, + { + "epoch": 75.78, + "learning_rate": 8.931026182931186e-05, + "loss": 2.6126, + "step": 44860 + }, + { + "epoch": 75.81, + "learning_rate": 8.928251177167326e-05, + "loss": 2.6081, + "step": 44880 + }, + { + "epoch": 75.84, + "learning_rate": 8.925473006482445e-05, + "loss": 2.6011, + "step": 44900 + }, + { + "epoch": 75.88, + "learning_rate": 8.922691673114866e-05, + "loss": 2.6214, + "step": 44920 + }, + { + "epoch": 75.91, + "learning_rate": 8.919907179305459e-05, + "loss": 2.6142, + "step": 44940 + }, + { + "epoch": 75.95, + "learning_rate": 8.91711952729764e-05, + "loss": 2.613, + "step": 44960 + }, + { + "epoch": 75.98, + "learning_rate": 8.914328719337365e-05, + "loss": 2.6126, + "step": 44980 + }, + { + "epoch": 76.01, + "learning_rate": 8.911534757673146e-05, + "loss": 2.6026, + "step": 45000 + }, + { + "epoch": 76.01, + "eval_loss": 2.805588483810425, + "eval_runtime": 50.8806, + "eval_samples_per_second": 19.438, + "eval_steps_per_second": 0.118, + "eval_tse_ndup": 0.006243438008057357, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.027783443514817426, + "eval_tse_type": 0.0002299601168408044, + "step": 45000 + }, + { + "epoch": 76.05, + "learning_rate": 8.908737644556024e-05, + "loss": 2.5423, + "step": 45020 + }, + { + "epoch": 76.08, + "learning_rate": 8.905937382239581e-05, + "loss": 2.5462, + "step": 45040 + }, + { + "epoch": 76.11, + "learning_rate": 8.903133972979938e-05, + "loss": 2.5347, + "step": 45060 + }, + { + "epoch": 76.15, + "learning_rate": 8.900327419035753e-05, + "loss": 2.5776, + "step": 45080 + }, + { + "epoch": 76.18, + "learning_rate": 8.897517722668215e-05, + "loss": 2.5492, + "step": 45100 + }, + { + "epoch": 76.22, + "learning_rate": 8.894704886141046e-05, + "loss": 2.5387, + "step": 45120 + }, + { + "epoch": 76.25, + "learning_rate": 8.891888911720496e-05, + "loss": 2.5581, + "step": 45140 + }, + { + "epoch": 76.28, + "learning_rate": 8.889069801675344e-05, + "loss": 2.5547, + "step": 45160 + }, + { + "epoch": 76.32, + "learning_rate": 8.886247558276901e-05, + "loss": 2.5697, + "step": 45180 + }, + { + "epoch": 76.35, + "learning_rate": 8.883422183798992e-05, + "loss": 2.5709, + "step": 45200 + }, + { + "epoch": 76.39, + "learning_rate": 8.880593680517975e-05, + "loss": 2.5787, + "step": 45220 + }, + { + "epoch": 76.42, + "learning_rate": 8.877762050712719e-05, + "loss": 2.5808, + "step": 45240 + }, + { + "epoch": 76.45, + "learning_rate": 8.874927296664621e-05, + "loss": 2.5762, + "step": 45260 + }, + { + "epoch": 76.49, + "learning_rate": 8.87208942065759e-05, + "loss": 2.5741, + "step": 45280 + }, + { + "epoch": 76.52, + "learning_rate": 8.869248424978049e-05, + "loss": 2.5909, + "step": 45300 + }, + { + "epoch": 76.55, + "learning_rate": 8.866404311914942e-05, + "loss": 2.5921, + "step": 45320 + }, + { + "epoch": 76.59, + "learning_rate": 8.863557083759714e-05, + "loss": 2.5934, + "step": 45340 + }, + { + "epoch": 76.62, + "learning_rate": 8.860706742806328e-05, + "loss": 2.5914, + "step": 45360 + }, + { + "epoch": 76.66, + "learning_rate": 8.857853291351254e-05, + "loss": 2.5862, + "step": 45380 + }, + { + "epoch": 76.69, + "learning_rate": 8.854996731693464e-05, + "loss": 2.592, + "step": 45400 + }, + { + "epoch": 76.72, + "learning_rate": 8.852280123141984e-05, + "loss": 2.5957, + "step": 45420 + }, + { + "epoch": 76.76, + "learning_rate": 8.849417509110805e-05, + "loss": 2.6099, + "step": 45440 + }, + { + "epoch": 76.79, + "learning_rate": 8.846551793673467e-05, + "loss": 2.5944, + "step": 45460 + }, + { + "epoch": 76.82, + "learning_rate": 8.843682979138825e-05, + "loss": 2.6035, + "step": 45480 + }, + { + "epoch": 76.86, + "learning_rate": 8.840811067818233e-05, + "loss": 2.6028, + "step": 45500 + }, + { + "epoch": 76.89, + "learning_rate": 8.837936062025538e-05, + "loss": 2.5908, + "step": 45520 + }, + { + "epoch": 76.93, + "learning_rate": 8.835057964077079e-05, + "loss": 2.5991, + "step": 45540 + }, + { + "epoch": 76.96, + "learning_rate": 8.832176776291688e-05, + "loss": 2.6049, + "step": 45560 + }, + { + "epoch": 76.99, + "learning_rate": 8.829292500990683e-05, + "loss": 2.6029, + "step": 45580 + }, + { + "epoch": 77.03, + "learning_rate": 8.826405140497878e-05, + "loss": 2.5246, + "step": 45600 + }, + { + "epoch": 77.06, + "learning_rate": 8.823514697139564e-05, + "loss": 2.5371, + "step": 45620 + }, + { + "epoch": 77.09, + "learning_rate": 8.820621173244519e-05, + "loss": 2.532, + "step": 45640 + }, + { + "epoch": 77.13, + "learning_rate": 8.817724571144004e-05, + "loss": 2.5445, + "step": 45660 + }, + { + "epoch": 77.16, + "learning_rate": 8.814824893171758e-05, + "loss": 2.5406, + "step": 45680 + }, + { + "epoch": 77.2, + "learning_rate": 8.811922141664e-05, + "loss": 2.5547, + "step": 45700 + }, + { + "epoch": 77.23, + "learning_rate": 8.809016318959424e-05, + "loss": 2.5516, + "step": 45720 + }, + { + "epoch": 77.26, + "learning_rate": 8.806107427399197e-05, + "loss": 2.5474, + "step": 45740 + }, + { + "epoch": 77.3, + "learning_rate": 8.803195469326964e-05, + "loss": 2.5611, + "step": 45760 + }, + { + "epoch": 77.33, + "learning_rate": 8.800280447088836e-05, + "loss": 2.5457, + "step": 45780 + }, + { + "epoch": 77.36, + "learning_rate": 8.797362363033392e-05, + "loss": 2.5677, + "step": 45800 + }, + { + "epoch": 77.4, + "learning_rate": 8.794441219511681e-05, + "loss": 2.5555, + "step": 45820 + }, + { + "epoch": 77.43, + "learning_rate": 8.791517018877216e-05, + "loss": 2.5579, + "step": 45840 + }, + { + "epoch": 77.47, + "learning_rate": 8.78858976348597e-05, + "loss": 2.5605, + "step": 45860 + }, + { + "epoch": 77.5, + "learning_rate": 8.785659455696384e-05, + "loss": 2.5729, + "step": 45880 + }, + { + "epoch": 77.53, + "learning_rate": 8.782726097869349e-05, + "loss": 2.5629, + "step": 45900 + }, + { + "epoch": 77.57, + "learning_rate": 8.779789692368223e-05, + "loss": 2.5627, + "step": 45920 + }, + { + "epoch": 77.6, + "learning_rate": 8.776850241558814e-05, + "loss": 2.5917, + "step": 45940 + }, + { + "epoch": 77.64, + "learning_rate": 8.773907747809383e-05, + "loss": 2.5836, + "step": 45960 + }, + { + "epoch": 77.67, + "learning_rate": 8.770962213490643e-05, + "loss": 2.5908, + "step": 45980 + }, + { + "epoch": 77.7, + "learning_rate": 8.768013640975761e-05, + "loss": 2.5803, + "step": 46000 + }, + { + "epoch": 77.7, + "eval_loss": 2.7924163341522217, + "eval_runtime": 49.1131, + "eval_samples_per_second": 20.137, + "eval_steps_per_second": 0.122, + "eval_tse_ndup": 0.006478176525450504, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.031434061591553826, + "eval_tse_type": 8.426019548365353e-05, + "step": 46000 + }, + { + "epoch": 77.74, + "learning_rate": 8.765062032640346e-05, + "loss": 2.5695, + "step": 46020 + }, + { + "epoch": 77.77, + "learning_rate": 8.762107390862455e-05, + "loss": 2.5713, + "step": 46040 + }, + { + "epoch": 77.8, + "learning_rate": 8.759149718022594e-05, + "loss": 2.5869, + "step": 46060 + }, + { + "epoch": 77.84, + "learning_rate": 8.756189016503702e-05, + "loss": 2.5722, + "step": 46080 + }, + { + "epoch": 77.87, + "learning_rate": 8.753225288691165e-05, + "loss": 2.5811, + "step": 46100 + }, + { + "epoch": 77.91, + "learning_rate": 8.750258536972804e-05, + "loss": 2.5708, + "step": 46120 + }, + { + "epoch": 77.94, + "learning_rate": 8.747288763738877e-05, + "loss": 2.5799, + "step": 46140 + }, + { + "epoch": 77.97, + "learning_rate": 8.744315971382078e-05, + "loss": 2.6076, + "step": 46160 + }, + { + "epoch": 78.01, + "learning_rate": 8.741340162297531e-05, + "loss": 2.5588, + "step": 46180 + }, + { + "epoch": 78.04, + "learning_rate": 8.738361338882792e-05, + "loss": 2.5079, + "step": 46200 + }, + { + "epoch": 78.07, + "learning_rate": 8.735379503537844e-05, + "loss": 2.5229, + "step": 46220 + }, + { + "epoch": 78.11, + "learning_rate": 8.732394658665101e-05, + "loss": 2.5249, + "step": 46240 + }, + { + "epoch": 78.14, + "learning_rate": 8.729406806669396e-05, + "loss": 2.5257, + "step": 46260 + }, + { + "epoch": 78.18, + "learning_rate": 8.726415949957987e-05, + "loss": 2.5111, + "step": 46280 + }, + { + "epoch": 78.21, + "learning_rate": 8.723422090940555e-05, + "loss": 2.5299, + "step": 46300 + }, + { + "epoch": 78.24, + "learning_rate": 8.720425232029198e-05, + "loss": 2.5404, + "step": 46320 + }, + { + "epoch": 78.28, + "learning_rate": 8.717425375638429e-05, + "loss": 2.5249, + "step": 46340 + }, + { + "epoch": 78.31, + "learning_rate": 8.714422524185181e-05, + "loss": 2.5518, + "step": 46360 + }, + { + "epoch": 78.34, + "learning_rate": 8.711416680088795e-05, + "loss": 2.5537, + "step": 46380 + }, + { + "epoch": 78.38, + "learning_rate": 8.708407845771027e-05, + "loss": 2.54, + "step": 46400 + }, + { + "epoch": 78.41, + "learning_rate": 8.70539602365604e-05, + "loss": 2.5449, + "step": 46420 + }, + { + "epoch": 78.45, + "learning_rate": 8.702381216170403e-05, + "loss": 2.535, + "step": 46440 + }, + { + "epoch": 78.48, + "learning_rate": 8.699363425743093e-05, + "loss": 2.5448, + "step": 46460 + }, + { + "epoch": 78.51, + "learning_rate": 8.69634265480549e-05, + "loss": 2.5534, + "step": 46480 + }, + { + "epoch": 78.55, + "learning_rate": 8.693318905791375e-05, + "loss": 2.5649, + "step": 46500 + }, + { + "epoch": 78.58, + "learning_rate": 8.690292181136924e-05, + "loss": 2.5664, + "step": 46520 + }, + { + "epoch": 78.61, + "learning_rate": 8.687262483280719e-05, + "loss": 2.5567, + "step": 46540 + }, + { + "epoch": 78.65, + "learning_rate": 8.684229814663731e-05, + "loss": 2.5679, + "step": 46560 + }, + { + "epoch": 78.68, + "learning_rate": 8.681194177729328e-05, + "loss": 2.5707, + "step": 46580 + }, + { + "epoch": 78.72, + "learning_rate": 8.678155574923265e-05, + "loss": 2.5715, + "step": 46600 + }, + { + "epoch": 78.75, + "learning_rate": 8.675114008693689e-05, + "loss": 2.5694, + "step": 46620 + }, + { + "epoch": 78.78, + "learning_rate": 8.672069481491141e-05, + "loss": 2.5666, + "step": 46640 + }, + { + "epoch": 78.82, + "learning_rate": 8.669021995768534e-05, + "loss": 2.5612, + "step": 46660 + }, + { + "epoch": 78.85, + "learning_rate": 8.665971553981175e-05, + "loss": 2.5749, + "step": 46680 + }, + { + "epoch": 78.89, + "learning_rate": 8.662918158586753e-05, + "loss": 2.5809, + "step": 46700 + }, + { + "epoch": 78.92, + "learning_rate": 8.65986181204533e-05, + "loss": 2.5734, + "step": 46720 + }, + { + "epoch": 78.95, + "learning_rate": 8.656802516819349e-05, + "loss": 2.5695, + "step": 46740 + }, + { + "epoch": 78.99, + "learning_rate": 8.653740275373631e-05, + "loss": 2.5787, + "step": 46760 + }, + { + "epoch": 79.02, + "learning_rate": 8.650675090175366e-05, + "loss": 2.5187, + "step": 46780 + }, + { + "epoch": 79.05, + "learning_rate": 8.647606963694122e-05, + "loss": 2.5092, + "step": 46800 + }, + { + "epoch": 79.09, + "learning_rate": 8.644535898401831e-05, + "loss": 2.5123, + "step": 46820 + }, + { + "epoch": 79.12, + "learning_rate": 8.641461896772793e-05, + "loss": 2.494, + "step": 46840 + }, + { + "epoch": 79.16, + "learning_rate": 8.638384961283679e-05, + "loss": 2.5321, + "step": 46860 + }, + { + "epoch": 79.19, + "learning_rate": 8.63530509441352e-05, + "loss": 2.4998, + "step": 46880 + }, + { + "epoch": 79.22, + "learning_rate": 8.632222298643706e-05, + "loss": 2.5165, + "step": 46900 + }, + { + "epoch": 79.26, + "learning_rate": 8.629136576457991e-05, + "loss": 2.5273, + "step": 46920 + }, + { + "epoch": 79.29, + "learning_rate": 8.626047930342488e-05, + "loss": 2.5145, + "step": 46940 + }, + { + "epoch": 79.32, + "learning_rate": 8.622956362785662e-05, + "loss": 2.539, + "step": 46960 + }, + { + "epoch": 79.36, + "learning_rate": 8.619861876278332e-05, + "loss": 2.5233, + "step": 46980 + }, + { + "epoch": 79.39, + "learning_rate": 8.616764473313671e-05, + "loss": 2.54, + "step": 47000 + }, + { + "epoch": 79.39, + "eval_loss": 2.7760818004608154, + "eval_runtime": 49.6014, + "eval_samples_per_second": 19.939, + "eval_steps_per_second": 0.121, + "eval_tse_ndup": 0.005016615626178201, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.029835610583169678, + "eval_tse_type": 0.00032650825749915734, + "step": 47000 + }, + { + "epoch": 79.43, + "learning_rate": 8.6136641563872e-05, + "loss": 2.5239, + "step": 47020 + }, + { + "epoch": 79.46, + "learning_rate": 8.610560927996788e-05, + "loss": 2.5344, + "step": 47040 + }, + { + "epoch": 79.49, + "learning_rate": 8.607454790642654e-05, + "loss": 2.5406, + "step": 47060 + }, + { + "epoch": 79.53, + "learning_rate": 8.604345746827351e-05, + "loss": 2.557, + "step": 47080 + }, + { + "epoch": 79.56, + "learning_rate": 8.601233799055784e-05, + "loss": 2.5437, + "step": 47100 + }, + { + "epoch": 79.59, + "learning_rate": 8.59811894983519e-05, + "loss": 2.5632, + "step": 47120 + }, + { + "epoch": 79.63, + "learning_rate": 8.595001201675147e-05, + "loss": 2.5543, + "step": 47140 + }, + { + "epoch": 79.66, + "learning_rate": 8.591880557087573e-05, + "loss": 2.5386, + "step": 47160 + }, + { + "epoch": 79.7, + "learning_rate": 8.588757018586708e-05, + "loss": 2.5332, + "step": 47180 + }, + { + "epoch": 79.73, + "learning_rate": 8.585630588689135e-05, + "loss": 2.5485, + "step": 47200 + }, + { + "epoch": 79.76, + "learning_rate": 8.582501269913761e-05, + "loss": 2.5439, + "step": 47220 + }, + { + "epoch": 79.8, + "learning_rate": 8.579369064781819e-05, + "loss": 2.5492, + "step": 47240 + }, + { + "epoch": 79.83, + "learning_rate": 8.576233975816877e-05, + "loss": 2.5538, + "step": 47260 + }, + { + "epoch": 79.86, + "learning_rate": 8.573096005544811e-05, + "loss": 2.5602, + "step": 47280 + }, + { + "epoch": 79.9, + "learning_rate": 8.569955156493834e-05, + "loss": 2.5648, + "step": 47300 + }, + { + "epoch": 79.93, + "learning_rate": 8.566811431194468e-05, + "loss": 2.5615, + "step": 47320 + }, + { + "epoch": 79.97, + "learning_rate": 8.563664832179556e-05, + "loss": 2.5485, + "step": 47340 + }, + { + "epoch": 80.0, + "learning_rate": 8.560515361984256e-05, + "loss": 2.5707, + "step": 47360 + }, + { + "epoch": 80.03, + "learning_rate": 8.55736302314604e-05, + "loss": 2.4823, + "step": 47380 + }, + { + "epoch": 80.07, + "learning_rate": 8.55420781820469e-05, + "loss": 2.4796, + "step": 47400 + }, + { + "epoch": 80.1, + "learning_rate": 8.551049749702297e-05, + "loss": 2.5203, + "step": 47420 + }, + { + "epoch": 80.14, + "learning_rate": 8.548046934569051e-05, + "loss": 2.5065, + "step": 47440 + }, + { + "epoch": 80.17, + "learning_rate": 8.544883289443053e-05, + "loss": 2.4937, + "step": 47460 + }, + { + "epoch": 80.2, + "learning_rate": 8.541716788268617e-05, + "loss": 2.5035, + "step": 47480 + }, + { + "epoch": 80.24, + "learning_rate": 8.538547433596933e-05, + "loss": 2.5136, + "step": 47500 + }, + { + "epoch": 80.27, + "learning_rate": 8.535375227981497e-05, + "loss": 2.5127, + "step": 47520 + }, + { + "epoch": 80.3, + "learning_rate": 8.532200173978097e-05, + "loss": 2.5223, + "step": 47540 + }, + { + "epoch": 80.34, + "learning_rate": 8.529022274144816e-05, + "loss": 2.5202, + "step": 47560 + }, + { + "epoch": 80.37, + "learning_rate": 8.525841531042031e-05, + "loss": 2.5101, + "step": 47580 + }, + { + "epoch": 80.41, + "learning_rate": 8.522657947232407e-05, + "loss": 2.5079, + "step": 47600 + }, + { + "epoch": 80.44, + "learning_rate": 8.519471525280903e-05, + "loss": 2.5074, + "step": 47620 + }, + { + "epoch": 80.47, + "learning_rate": 8.516282267754761e-05, + "loss": 2.532, + "step": 47640 + }, + { + "epoch": 80.51, + "learning_rate": 8.513090177223506e-05, + "loss": 2.5243, + "step": 47660 + }, + { + "epoch": 80.54, + "learning_rate": 8.509895256258948e-05, + "loss": 2.5215, + "step": 47680 + }, + { + "epoch": 80.57, + "learning_rate": 8.506697507435182e-05, + "loss": 2.5225, + "step": 47700 + }, + { + "epoch": 80.61, + "learning_rate": 8.50349693332857e-05, + "loss": 2.5225, + "step": 47720 + }, + { + "epoch": 80.64, + "learning_rate": 8.50029353651776e-05, + "loss": 2.5449, + "step": 47740 + }, + { + "epoch": 80.68, + "learning_rate": 8.497087319583672e-05, + "loss": 2.5336, + "step": 47760 + }, + { + "epoch": 80.71, + "learning_rate": 8.493878285109495e-05, + "loss": 2.5283, + "step": 47780 + }, + { + "epoch": 80.74, + "learning_rate": 8.49066643568069e-05, + "loss": 2.5366, + "step": 47800 + }, + { + "epoch": 80.78, + "learning_rate": 8.487451773884987e-05, + "loss": 2.5433, + "step": 47820 + }, + { + "epoch": 80.81, + "learning_rate": 8.484234302312382e-05, + "loss": 2.5496, + "step": 47840 + }, + { + "epoch": 80.84, + "learning_rate": 8.48101402355513e-05, + "loss": 2.5276, + "step": 47860 + }, + { + "epoch": 80.88, + "learning_rate": 8.477790940207756e-05, + "loss": 2.547, + "step": 47880 + }, + { + "epoch": 80.91, + "learning_rate": 8.474565054867037e-05, + "loss": 2.5479, + "step": 47900 + }, + { + "epoch": 80.95, + "learning_rate": 8.471336370132012e-05, + "loss": 2.56, + "step": 47920 + }, + { + "epoch": 80.98, + "learning_rate": 8.468104888603973e-05, + "loss": 2.5615, + "step": 47940 + }, + { + "epoch": 81.01, + "learning_rate": 8.464870612886467e-05, + "loss": 2.5124, + "step": 47960 + }, + { + "epoch": 81.05, + "learning_rate": 8.46163354558529e-05, + "loss": 2.4734, + "step": 47980 + }, + { + "epoch": 81.08, + "learning_rate": 8.458393689308491e-05, + "loss": 2.469, + "step": 48000 + }, + { + "epoch": 81.08, + "eval_loss": 2.7601113319396973, + "eval_runtime": 50.7409, + "eval_samples_per_second": 19.491, + "eval_steps_per_second": 0.118, + "eval_tse_ndup": 0.005318529479795045, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03067736105780271, + "eval_tse_type": 0.00011779108111469634, + "step": 48000 + }, + { + "epoch": 81.11, + "learning_rate": 8.455151046666363e-05, + "loss": 2.4842, + "step": 48020 + }, + { + "epoch": 81.15, + "learning_rate": 8.451905620271443e-05, + "loss": 2.4875, + "step": 48040 + }, + { + "epoch": 81.18, + "learning_rate": 8.448657412738515e-05, + "loss": 2.4963, + "step": 48060 + }, + { + "epoch": 81.22, + "learning_rate": 8.445406426684598e-05, + "loss": 2.5064, + "step": 48080 + }, + { + "epoch": 81.25, + "learning_rate": 8.442152664728958e-05, + "loss": 2.4863, + "step": 48100 + }, + { + "epoch": 81.28, + "learning_rate": 8.438896129493086e-05, + "loss": 2.4981, + "step": 48120 + }, + { + "epoch": 81.32, + "learning_rate": 8.435636823600716e-05, + "loss": 2.5012, + "step": 48140 + }, + { + "epoch": 81.35, + "learning_rate": 8.432374749677814e-05, + "loss": 2.5092, + "step": 48160 + }, + { + "epoch": 81.39, + "learning_rate": 8.42910991035257e-05, + "loss": 2.5109, + "step": 48180 + }, + { + "epoch": 81.42, + "learning_rate": 8.425842308255412e-05, + "loss": 2.5033, + "step": 48200 + }, + { + "epoch": 81.45, + "learning_rate": 8.422571946018982e-05, + "loss": 2.5004, + "step": 48220 + }, + { + "epoch": 81.49, + "learning_rate": 8.419298826278154e-05, + "loss": 2.5159, + "step": 48240 + }, + { + "epoch": 81.52, + "learning_rate": 8.416022951670022e-05, + "loss": 2.5198, + "step": 48260 + }, + { + "epoch": 81.55, + "learning_rate": 8.412744324833898e-05, + "loss": 2.5176, + "step": 48280 + }, + { + "epoch": 81.59, + "learning_rate": 8.409462948411315e-05, + "loss": 2.516, + "step": 48300 + }, + { + "epoch": 81.62, + "learning_rate": 8.406178825046015e-05, + "loss": 2.5264, + "step": 48320 + }, + { + "epoch": 81.66, + "learning_rate": 8.402891957383959e-05, + "loss": 2.5022, + "step": 48340 + }, + { + "epoch": 81.69, + "learning_rate": 8.399602348073316e-05, + "loss": 2.5241, + "step": 48360 + }, + { + "epoch": 81.72, + "learning_rate": 8.396309999764467e-05, + "loss": 2.5332, + "step": 48380 + }, + { + "epoch": 81.76, + "learning_rate": 8.393014915109995e-05, + "loss": 2.5321, + "step": 48400 + }, + { + "epoch": 81.79, + "learning_rate": 8.389717096764691e-05, + "loss": 2.5321, + "step": 48420 + }, + { + "epoch": 81.82, + "learning_rate": 8.386416547385547e-05, + "loss": 2.5354, + "step": 48440 + }, + { + "epoch": 81.86, + "learning_rate": 8.383113269631757e-05, + "loss": 2.5315, + "step": 48460 + }, + { + "epoch": 81.89, + "learning_rate": 8.379807266164714e-05, + "loss": 2.5348, + "step": 48480 + }, + { + "epoch": 81.93, + "learning_rate": 8.376498539648001e-05, + "loss": 2.5295, + "step": 48500 + }, + { + "epoch": 81.96, + "learning_rate": 8.373187092747403e-05, + "loss": 2.517, + "step": 48520 + }, + { + "epoch": 81.99, + "learning_rate": 8.369872928130891e-05, + "loss": 2.5343, + "step": 48540 + }, + { + "epoch": 82.03, + "learning_rate": 8.366556048468628e-05, + "loss": 2.4666, + "step": 48560 + }, + { + "epoch": 82.06, + "learning_rate": 8.363236456432964e-05, + "loss": 2.4525, + "step": 48580 + }, + { + "epoch": 82.09, + "learning_rate": 8.359914154698434e-05, + "loss": 2.4665, + "step": 48600 + }, + { + "epoch": 82.13, + "learning_rate": 8.356589145941757e-05, + "loss": 2.467, + "step": 48620 + }, + { + "epoch": 82.16, + "learning_rate": 8.353261432841832e-05, + "loss": 2.4681, + "step": 48640 + }, + { + "epoch": 82.2, + "learning_rate": 8.34993101807974e-05, + "loss": 2.4744, + "step": 48660 + }, + { + "epoch": 82.23, + "learning_rate": 8.346597904338731e-05, + "loss": 2.491, + "step": 48680 + }, + { + "epoch": 82.26, + "learning_rate": 8.343262094304238e-05, + "loss": 2.4709, + "step": 48700 + }, + { + "epoch": 82.3, + "learning_rate": 8.339923590663863e-05, + "loss": 2.491, + "step": 48720 + }, + { + "epoch": 82.33, + "learning_rate": 8.336582396107378e-05, + "loss": 2.4757, + "step": 48740 + }, + { + "epoch": 82.36, + "learning_rate": 8.33323851332672e-05, + "loss": 2.4961, + "step": 48760 + }, + { + "epoch": 82.4, + "learning_rate": 8.329891945015998e-05, + "loss": 2.499, + "step": 48780 + }, + { + "epoch": 82.43, + "learning_rate": 8.326542693871482e-05, + "loss": 2.4982, + "step": 48800 + }, + { + "epoch": 82.47, + "learning_rate": 8.323190762591601e-05, + "loss": 2.4993, + "step": 48820 + }, + { + "epoch": 82.5, + "learning_rate": 8.319836153876947e-05, + "loss": 2.4876, + "step": 48840 + }, + { + "epoch": 82.53, + "learning_rate": 8.316478870430269e-05, + "loss": 2.5086, + "step": 48860 + }, + { + "epoch": 82.57, + "learning_rate": 8.313118914956466e-05, + "loss": 2.5213, + "step": 48880 + }, + { + "epoch": 82.6, + "learning_rate": 8.309756290162595e-05, + "loss": 2.5103, + "step": 48900 + }, + { + "epoch": 82.64, + "learning_rate": 8.306390998757863e-05, + "loss": 2.512, + "step": 48920 + }, + { + "epoch": 82.67, + "learning_rate": 8.303023043453624e-05, + "loss": 2.5165, + "step": 48940 + }, + { + "epoch": 82.7, + "learning_rate": 8.299652426963379e-05, + "loss": 2.5069, + "step": 48960 + }, + { + "epoch": 82.74, + "learning_rate": 8.296279152002771e-05, + "loss": 2.5251, + "step": 48980 + }, + { + "epoch": 82.77, + "learning_rate": 8.29290322128959e-05, + "loss": 2.5204, + "step": 49000 + }, + { + "epoch": 82.77, + "eval_loss": 2.746349811553955, + "eval_runtime": 47.9582, + "eval_samples_per_second": 20.622, + "eval_steps_per_second": 0.125, + "eval_tse_ndup": 0.004133415871221496, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03191453978257238, + "eval_tse_type": 0.0001720312324457926, + "step": 49000 + }, + { + "epoch": 82.8, + "learning_rate": 8.28952463754376e-05, + "loss": 2.5142, + "step": 49020 + }, + { + "epoch": 82.84, + "learning_rate": 8.286143403487345e-05, + "loss": 2.5042, + "step": 49040 + }, + { + "epoch": 82.87, + "learning_rate": 8.282759521844545e-05, + "loss": 2.5232, + "step": 49060 + }, + { + "epoch": 82.91, + "learning_rate": 8.279372995341692e-05, + "loss": 2.5167, + "step": 49080 + }, + { + "epoch": 82.94, + "learning_rate": 8.27598382670725e-05, + "loss": 2.5243, + "step": 49100 + }, + { + "epoch": 82.97, + "learning_rate": 8.272592018671809e-05, + "loss": 2.5325, + "step": 49120 + }, + { + "epoch": 83.01, + "learning_rate": 8.269197573968088e-05, + "loss": 2.5253, + "step": 49140 + }, + { + "epoch": 83.04, + "learning_rate": 8.265800495330932e-05, + "loss": 2.442, + "step": 49160 + }, + { + "epoch": 83.07, + "learning_rate": 8.262400785497303e-05, + "loss": 2.4528, + "step": 49180 + }, + { + "epoch": 83.11, + "learning_rate": 8.25899844720629e-05, + "loss": 2.442, + "step": 49200 + }, + { + "epoch": 83.14, + "learning_rate": 8.25559348319909e-05, + "loss": 2.4649, + "step": 49220 + }, + { + "epoch": 83.18, + "learning_rate": 8.252185896219024e-05, + "loss": 2.4605, + "step": 49240 + }, + { + "epoch": 83.21, + "learning_rate": 8.248775689011524e-05, + "loss": 2.4585, + "step": 49260 + }, + { + "epoch": 83.24, + "learning_rate": 8.245362864324131e-05, + "loss": 2.4688, + "step": 49280 + }, + { + "epoch": 83.28, + "learning_rate": 8.241947424906496e-05, + "loss": 2.4734, + "step": 49300 + }, + { + "epoch": 83.31, + "learning_rate": 8.238529373510378e-05, + "loss": 2.4756, + "step": 49320 + }, + { + "epoch": 83.34, + "learning_rate": 8.235108712889637e-05, + "loss": 2.4654, + "step": 49340 + }, + { + "epoch": 83.38, + "learning_rate": 8.23168544580024e-05, + "loss": 2.4779, + "step": 49360 + }, + { + "epoch": 83.41, + "learning_rate": 8.228259575000251e-05, + "loss": 2.496, + "step": 49380 + }, + { + "epoch": 83.45, + "learning_rate": 8.224831103249832e-05, + "loss": 2.4779, + "step": 49400 + }, + { + "epoch": 83.48, + "learning_rate": 8.22140003331124e-05, + "loss": 2.4841, + "step": 49420 + }, + { + "epoch": 83.51, + "learning_rate": 8.217966367948827e-05, + "loss": 2.4896, + "step": 49440 + }, + { + "epoch": 83.55, + "learning_rate": 8.214530109929034e-05, + "loss": 2.5025, + "step": 49460 + }, + { + "epoch": 83.58, + "learning_rate": 8.211091262020393e-05, + "loss": 2.4928, + "step": 49480 + }, + { + "epoch": 83.61, + "learning_rate": 8.207649826993522e-05, + "loss": 2.4943, + "step": 49500 + }, + { + "epoch": 83.65, + "learning_rate": 8.204205807621122e-05, + "loss": 2.5013, + "step": 49520 + }, + { + "epoch": 83.68, + "learning_rate": 8.200759206677979e-05, + "loss": 2.4941, + "step": 49540 + }, + { + "epoch": 83.72, + "learning_rate": 8.197310026940954e-05, + "loss": 2.4958, + "step": 49560 + }, + { + "epoch": 83.75, + "learning_rate": 8.193858271188992e-05, + "loss": 2.5129, + "step": 49580 + }, + { + "epoch": 83.78, + "learning_rate": 8.190403942203109e-05, + "loss": 2.5089, + "step": 49600 + }, + { + "epoch": 83.82, + "learning_rate": 8.186947042766393e-05, + "loss": 2.5179, + "step": 49620 + }, + { + "epoch": 83.85, + "learning_rate": 8.18348757566401e-05, + "loss": 2.5085, + "step": 49640 + }, + { + "epoch": 83.89, + "learning_rate": 8.180025543683188e-05, + "loss": 2.5043, + "step": 49660 + }, + { + "epoch": 83.92, + "learning_rate": 8.176560949613224e-05, + "loss": 2.5093, + "step": 49680 + }, + { + "epoch": 83.95, + "learning_rate": 8.173093796245477e-05, + "loss": 2.5181, + "step": 49700 + }, + { + "epoch": 83.99, + "learning_rate": 8.169624086373371e-05, + "loss": 2.5226, + "step": 49720 + }, + { + "epoch": 84.02, + "learning_rate": 8.166151822792389e-05, + "loss": 2.4702, + "step": 49740 + }, + { + "epoch": 84.05, + "learning_rate": 8.162677008300073e-05, + "loss": 2.4166, + "step": 49760 + }, + { + "epoch": 84.09, + "learning_rate": 8.159199645696016e-05, + "loss": 2.4464, + "step": 49780 + }, + { + "epoch": 84.12, + "learning_rate": 8.155893793585413e-05, + "loss": 2.45, + "step": 49800 + }, + { + "epoch": 84.16, + "learning_rate": 8.152411470223569e-05, + "loss": 2.432, + "step": 49820 + }, + { + "epoch": 84.19, + "learning_rate": 8.148926607020743e-05, + "loss": 2.4586, + "step": 49840 + }, + { + "epoch": 84.22, + "learning_rate": 8.145439206784626e-05, + "loss": 2.4687, + "step": 49860 + }, + { + "epoch": 84.26, + "learning_rate": 8.141949272324953e-05, + "loss": 2.4685, + "step": 49880 + }, + { + "epoch": 84.29, + "learning_rate": 8.138456806453503e-05, + "loss": 2.4564, + "step": 49900 + }, + { + "epoch": 84.32, + "learning_rate": 8.13496181198409e-05, + "loss": 2.4827, + "step": 49920 + }, + { + "epoch": 84.36, + "learning_rate": 8.131464291732572e-05, + "loss": 2.4427, + "step": 49940 + }, + { + "epoch": 84.39, + "learning_rate": 8.127964248516832e-05, + "loss": 2.4619, + "step": 49960 + }, + { + "epoch": 84.43, + "learning_rate": 8.124461685156795e-05, + "loss": 2.4702, + "step": 49980 + }, + { + "epoch": 84.46, + "learning_rate": 8.120956604474415e-05, + "loss": 2.4752, + "step": 50000 + }, + { + "epoch": 84.46, + "eval_loss": 2.737753391265869, + "eval_runtime": 47.6026, + "eval_samples_per_second": 20.776, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.004968291666702018, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03121488428037335, + "eval_tse_type": 0.00018977318537182926, + "step": 50000 + }, + { + "epoch": 84.49, + "learning_rate": 8.117449009293668e-05, + "loss": 2.4778, + "step": 50020 + }, + { + "epoch": 84.53, + "learning_rate": 8.113938902440564e-05, + "loss": 2.4765, + "step": 50040 + }, + { + "epoch": 84.56, + "learning_rate": 8.110426286743129e-05, + "loss": 2.4681, + "step": 50060 + }, + { + "epoch": 84.59, + "learning_rate": 8.106911165031415e-05, + "loss": 2.5011, + "step": 50080 + }, + { + "epoch": 84.63, + "learning_rate": 8.103393540137496e-05, + "loss": 2.4837, + "step": 50100 + }, + { + "epoch": 84.66, + "learning_rate": 8.099873414895453e-05, + "loss": 2.4784, + "step": 50120 + }, + { + "epoch": 84.7, + "learning_rate": 8.096350792141392e-05, + "loss": 2.4975, + "step": 50140 + }, + { + "epoch": 84.73, + "learning_rate": 8.092825674713425e-05, + "loss": 2.4959, + "step": 50160 + }, + { + "epoch": 84.76, + "learning_rate": 8.089298065451672e-05, + "loss": 2.4922, + "step": 50180 + }, + { + "epoch": 84.8, + "learning_rate": 8.085767967198269e-05, + "loss": 2.4765, + "step": 50200 + }, + { + "epoch": 84.83, + "learning_rate": 8.082235382797349e-05, + "loss": 2.4971, + "step": 50220 + }, + { + "epoch": 84.86, + "learning_rate": 8.078700315095055e-05, + "loss": 2.5075, + "step": 50240 + }, + { + "epoch": 84.9, + "learning_rate": 8.075162766939526e-05, + "loss": 2.4855, + "step": 50260 + }, + { + "epoch": 84.93, + "learning_rate": 8.071622741180898e-05, + "loss": 2.5004, + "step": 50280 + }, + { + "epoch": 84.97, + "learning_rate": 8.068080240671308e-05, + "loss": 2.5046, + "step": 50300 + }, + { + "epoch": 85.0, + "learning_rate": 8.064535268264883e-05, + "loss": 2.5152, + "step": 50320 + }, + { + "epoch": 85.03, + "learning_rate": 8.060987826817745e-05, + "loss": 2.4181, + "step": 50340 + }, + { + "epoch": 85.07, + "learning_rate": 8.057437919188005e-05, + "loss": 2.4209, + "step": 50360 + }, + { + "epoch": 85.1, + "learning_rate": 8.053885548235755e-05, + "loss": 2.4479, + "step": 50380 + }, + { + "epoch": 85.14, + "learning_rate": 8.05033071682308e-05, + "loss": 2.4261, + "step": 50400 + }, + { + "epoch": 85.17, + "learning_rate": 8.046773427814042e-05, + "loss": 2.4532, + "step": 50420 + }, + { + "epoch": 85.2, + "learning_rate": 8.043213684074684e-05, + "loss": 2.4296, + "step": 50440 + }, + { + "epoch": 85.24, + "learning_rate": 8.039651488473028e-05, + "loss": 2.4408, + "step": 50460 + }, + { + "epoch": 85.27, + "learning_rate": 8.03608684387907e-05, + "loss": 2.4477, + "step": 50480 + }, + { + "epoch": 85.3, + "learning_rate": 8.03251975316478e-05, + "loss": 2.4433, + "step": 50500 + }, + { + "epoch": 85.34, + "learning_rate": 8.0289502192041e-05, + "loss": 2.457, + "step": 50520 + }, + { + "epoch": 85.37, + "learning_rate": 8.025378244872936e-05, + "loss": 2.4557, + "step": 50540 + }, + { + "epoch": 85.41, + "learning_rate": 8.021803833049166e-05, + "loss": 2.446, + "step": 50560 + }, + { + "epoch": 85.44, + "learning_rate": 8.01822698661263e-05, + "loss": 2.4528, + "step": 50580 + }, + { + "epoch": 85.47, + "learning_rate": 8.014647708445124e-05, + "loss": 2.4858, + "step": 50600 + }, + { + "epoch": 85.51, + "learning_rate": 8.011066001430412e-05, + "loss": 2.4548, + "step": 50620 + }, + { + "epoch": 85.54, + "learning_rate": 8.007481868454208e-05, + "loss": 2.4599, + "step": 50640 + }, + { + "epoch": 85.57, + "learning_rate": 8.003895312404183e-05, + "loss": 2.4664, + "step": 50660 + }, + { + "epoch": 85.61, + "learning_rate": 8.000306336169963e-05, + "loss": 2.4781, + "step": 50680 + }, + { + "epoch": 85.64, + "learning_rate": 7.99671494264312e-05, + "loss": 2.4777, + "step": 50700 + }, + { + "epoch": 85.68, + "learning_rate": 7.993121134717177e-05, + "loss": 2.4732, + "step": 50720 + }, + { + "epoch": 85.71, + "learning_rate": 7.989524915287595e-05, + "loss": 2.4783, + "step": 50740 + }, + { + "epoch": 85.74, + "learning_rate": 7.985926287251787e-05, + "loss": 2.478, + "step": 50760 + }, + { + "epoch": 85.78, + "learning_rate": 7.982325253509102e-05, + "loss": 2.4904, + "step": 50780 + }, + { + "epoch": 85.81, + "learning_rate": 7.978721816960826e-05, + "loss": 2.4812, + "step": 50800 + }, + { + "epoch": 85.84, + "learning_rate": 7.975115980510187e-05, + "loss": 2.487, + "step": 50820 + }, + { + "epoch": 85.88, + "learning_rate": 7.971507747062337e-05, + "loss": 2.4845, + "step": 50840 + }, + { + "epoch": 85.91, + "learning_rate": 7.967897119524368e-05, + "loss": 2.4728, + "step": 50860 + }, + { + "epoch": 85.95, + "learning_rate": 7.964284100805297e-05, + "loss": 2.5053, + "step": 50880 + }, + { + "epoch": 85.98, + "learning_rate": 7.960668693816067e-05, + "loss": 2.5008, + "step": 50900 + }, + { + "epoch": 86.01, + "learning_rate": 7.957050901469545e-05, + "loss": 2.4573, + "step": 50920 + }, + { + "epoch": 86.05, + "learning_rate": 7.953430726680524e-05, + "loss": 2.4049, + "step": 50940 + }, + { + "epoch": 86.08, + "learning_rate": 7.949808172365713e-05, + "loss": 2.4079, + "step": 50960 + }, + { + "epoch": 86.11, + "learning_rate": 7.946183241443736e-05, + "loss": 2.4051, + "step": 50980 + }, + { + "epoch": 86.15, + "learning_rate": 7.942555936835135e-05, + "loss": 2.4396, + "step": 51000 + }, + { + "epoch": 86.15, + "eval_loss": 2.71986985206604, + "eval_runtime": 47.2943, + "eval_samples_per_second": 20.912, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.004861320099787565, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03205852765832834, + "eval_tse_type": 0.0002071396472306482, + "step": 51000 + }, + { + "epoch": 86.18, + "learning_rate": 7.938926261462366e-05, + "loss": 2.428, + "step": 51020 + }, + { + "epoch": 86.22, + "learning_rate": 7.935294218249791e-05, + "loss": 2.4384, + "step": 51040 + }, + { + "epoch": 86.25, + "learning_rate": 7.931659810123683e-05, + "loss": 2.438, + "step": 51060 + }, + { + "epoch": 86.28, + "learning_rate": 7.928023040012216e-05, + "loss": 2.45, + "step": 51080 + }, + { + "epoch": 86.32, + "learning_rate": 7.924383910845474e-05, + "loss": 2.431, + "step": 51100 + }, + { + "epoch": 86.35, + "learning_rate": 7.920742425555436e-05, + "loss": 2.4443, + "step": 51120 + }, + { + "epoch": 86.39, + "learning_rate": 7.91709858707598e-05, + "loss": 2.4404, + "step": 51140 + }, + { + "epoch": 86.42, + "learning_rate": 7.913452398342881e-05, + "loss": 2.4507, + "step": 51160 + }, + { + "epoch": 86.45, + "learning_rate": 7.909803862293808e-05, + "loss": 2.4397, + "step": 51180 + }, + { + "epoch": 86.49, + "learning_rate": 7.906152981868321e-05, + "loss": 2.4595, + "step": 51200 + }, + { + "epoch": 86.52, + "learning_rate": 7.902499760007867e-05, + "loss": 2.4458, + "step": 51220 + }, + { + "epoch": 86.55, + "learning_rate": 7.898844199655784e-05, + "loss": 2.4712, + "step": 51240 + }, + { + "epoch": 86.59, + "learning_rate": 7.895186303757287e-05, + "loss": 2.4648, + "step": 51260 + }, + { + "epoch": 86.62, + "learning_rate": 7.89152607525948e-05, + "loss": 2.4638, + "step": 51280 + }, + { + "epoch": 86.66, + "learning_rate": 7.887863517111338e-05, + "loss": 2.4718, + "step": 51300 + }, + { + "epoch": 86.69, + "learning_rate": 7.884198632263724e-05, + "loss": 2.475, + "step": 51320 + }, + { + "epoch": 86.72, + "learning_rate": 7.880531423669366e-05, + "loss": 2.4567, + "step": 51340 + }, + { + "epoch": 86.76, + "learning_rate": 7.876861894282869e-05, + "loss": 2.4677, + "step": 51360 + }, + { + "epoch": 86.79, + "learning_rate": 7.873190047060706e-05, + "loss": 2.477, + "step": 51380 + }, + { + "epoch": 86.82, + "learning_rate": 7.869515884961218e-05, + "loss": 2.4794, + "step": 51400 + }, + { + "epoch": 86.86, + "learning_rate": 7.865839410944612e-05, + "loss": 2.4674, + "step": 51420 + }, + { + "epoch": 86.89, + "learning_rate": 7.862160627972955e-05, + "loss": 2.4613, + "step": 51440 + }, + { + "epoch": 86.93, + "learning_rate": 7.858479539010177e-05, + "loss": 2.4816, + "step": 51460 + }, + { + "epoch": 86.96, + "learning_rate": 7.854796147022065e-05, + "loss": 2.4687, + "step": 51480 + }, + { + "epoch": 86.99, + "learning_rate": 7.85111045497626e-05, + "loss": 2.4811, + "step": 51500 + }, + { + "epoch": 87.03, + "learning_rate": 7.84742246584226e-05, + "loss": 2.4159, + "step": 51520 + }, + { + "epoch": 87.06, + "learning_rate": 7.84373218259141e-05, + "loss": 2.4262, + "step": 51540 + }, + { + "epoch": 87.09, + "learning_rate": 7.840039608196904e-05, + "loss": 2.3902, + "step": 51560 + }, + { + "epoch": 87.13, + "learning_rate": 7.836344745633783e-05, + "loss": 2.4049, + "step": 51580 + }, + { + "epoch": 87.16, + "learning_rate": 7.832647597878931e-05, + "loss": 2.4193, + "step": 51600 + }, + { + "epoch": 87.2, + "learning_rate": 7.828948167911074e-05, + "loss": 2.4177, + "step": 51620 + }, + { + "epoch": 87.23, + "learning_rate": 7.825246458710773e-05, + "loss": 2.4203, + "step": 51640 + }, + { + "epoch": 87.26, + "learning_rate": 7.821542473260432e-05, + "loss": 2.4161, + "step": 51660 + }, + { + "epoch": 87.3, + "learning_rate": 7.817836214544283e-05, + "loss": 2.4194, + "step": 51680 + }, + { + "epoch": 87.33, + "learning_rate": 7.814127685548391e-05, + "loss": 2.4312, + "step": 51700 + }, + { + "epoch": 87.36, + "learning_rate": 7.810416889260653e-05, + "loss": 2.4292, + "step": 51720 + }, + { + "epoch": 87.4, + "learning_rate": 7.80670382867079e-05, + "loss": 2.4251, + "step": 51740 + }, + { + "epoch": 87.43, + "learning_rate": 7.802988506770347e-05, + "loss": 2.439, + "step": 51760 + }, + { + "epoch": 87.47, + "learning_rate": 7.799270926552693e-05, + "loss": 2.4419, + "step": 51780 + }, + { + "epoch": 87.5, + "learning_rate": 7.795551091013013e-05, + "loss": 2.4607, + "step": 51800 + }, + { + "epoch": 87.53, + "learning_rate": 7.791829003148312e-05, + "loss": 2.4635, + "step": 51820 + }, + { + "epoch": 87.57, + "learning_rate": 7.78829093619214e-05, + "loss": 2.4433, + "step": 51840 + }, + { + "epoch": 87.6, + "learning_rate": 7.784564464920654e-05, + "loss": 2.4617, + "step": 51860 + }, + { + "epoch": 87.64, + "learning_rate": 7.780835750175874e-05, + "loss": 2.4412, + "step": 51880 + }, + { + "epoch": 87.67, + "learning_rate": 7.777104794961957e-05, + "loss": 2.46, + "step": 51900 + }, + { + "epoch": 87.7, + "learning_rate": 7.773371602284869e-05, + "loss": 2.4576, + "step": 51920 + }, + { + "epoch": 87.74, + "learning_rate": 7.769636175152374e-05, + "loss": 2.4639, + "step": 51940 + }, + { + "epoch": 87.77, + "learning_rate": 7.765898516574038e-05, + "loss": 2.4442, + "step": 51960 + }, + { + "epoch": 87.8, + "learning_rate": 7.762158629561225e-05, + "loss": 2.4641, + "step": 51980 + }, + { + "epoch": 87.84, + "learning_rate": 7.758416517127094e-05, + "loss": 2.4595, + "step": 52000 + }, + { + "epoch": 87.84, + "eval_loss": 2.7032272815704346, + "eval_runtime": 50.8829, + "eval_samples_per_second": 19.437, + "eval_steps_per_second": 0.118, + "eval_tse_ndup": 0.004222459898376891, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03180085910706684, + "eval_tse_type": 9.6548140658353e-05, + "step": 52000 + }, + { + "epoch": 87.87, + "learning_rate": 7.7546721822866e-05, + "loss": 2.4544, + "step": 52020 + }, + { + "epoch": 87.91, + "learning_rate": 7.750925628056482e-05, + "loss": 2.4573, + "step": 52040 + }, + { + "epoch": 87.94, + "learning_rate": 7.747176857455275e-05, + "loss": 2.4697, + "step": 52060 + }, + { + "epoch": 87.97, + "learning_rate": 7.743425873503294e-05, + "loss": 2.4598, + "step": 52080 + }, + { + "epoch": 88.01, + "learning_rate": 7.739672679222638e-05, + "loss": 2.4552, + "step": 52100 + }, + { + "epoch": 88.04, + "learning_rate": 7.735917277637189e-05, + "loss": 2.3728, + "step": 52120 + }, + { + "epoch": 88.07, + "learning_rate": 7.732159671772605e-05, + "loss": 2.4024, + "step": 52140 + }, + { + "epoch": 88.11, + "learning_rate": 7.728399864656324e-05, + "loss": 2.3929, + "step": 52160 + }, + { + "epoch": 88.14, + "learning_rate": 7.724637859317551e-05, + "loss": 2.4046, + "step": 52180 + }, + { + "epoch": 88.18, + "learning_rate": 7.720873658787268e-05, + "loss": 2.4109, + "step": 52200 + }, + { + "epoch": 88.21, + "learning_rate": 7.717107266098225e-05, + "loss": 2.4293, + "step": 52220 + }, + { + "epoch": 88.24, + "learning_rate": 7.713338684284932e-05, + "loss": 2.4281, + "step": 52240 + }, + { + "epoch": 88.28, + "learning_rate": 7.709567916383672e-05, + "loss": 2.4039, + "step": 52260 + }, + { + "epoch": 88.31, + "learning_rate": 7.705794965432481e-05, + "loss": 2.4153, + "step": 52280 + }, + { + "epoch": 88.34, + "learning_rate": 7.702019834471159e-05, + "loss": 2.4153, + "step": 52300 + }, + { + "epoch": 88.38, + "learning_rate": 7.698242526541262e-05, + "loss": 2.4314, + "step": 52320 + }, + { + "epoch": 88.41, + "learning_rate": 7.694463044686095e-05, + "loss": 2.4365, + "step": 52340 + }, + { + "epoch": 88.45, + "learning_rate": 7.690681391950723e-05, + "loss": 2.4258, + "step": 52360 + }, + { + "epoch": 88.48, + "learning_rate": 7.686897571381952e-05, + "loss": 2.4224, + "step": 52380 + }, + { + "epoch": 88.51, + "learning_rate": 7.68311158602834e-05, + "loss": 2.4431, + "step": 52400 + }, + { + "epoch": 88.55, + "learning_rate": 7.679323438940184e-05, + "loss": 2.4327, + "step": 52420 + }, + { + "epoch": 88.58, + "learning_rate": 7.67553313316953e-05, + "loss": 2.4403, + "step": 52440 + }, + { + "epoch": 88.61, + "learning_rate": 7.671740671770153e-05, + "loss": 2.4478, + "step": 52460 + }, + { + "epoch": 88.65, + "learning_rate": 7.667946057797578e-05, + "loss": 2.4315, + "step": 52480 + }, + { + "epoch": 88.68, + "learning_rate": 7.664149294309051e-05, + "loss": 2.4405, + "step": 52500 + }, + { + "epoch": 88.72, + "learning_rate": 7.66035038436356e-05, + "loss": 2.4434, + "step": 52520 + }, + { + "epoch": 88.75, + "learning_rate": 7.656549331021814e-05, + "loss": 2.4497, + "step": 52540 + }, + { + "epoch": 88.78, + "learning_rate": 7.652746137346255e-05, + "loss": 2.4517, + "step": 52560 + }, + { + "epoch": 88.82, + "learning_rate": 7.648940806401048e-05, + "loss": 2.4465, + "step": 52580 + }, + { + "epoch": 88.85, + "learning_rate": 7.645133341252078e-05, + "loss": 2.436, + "step": 52600 + }, + { + "epoch": 88.89, + "learning_rate": 7.641323744966953e-05, + "loss": 2.441, + "step": 52620 + }, + { + "epoch": 88.92, + "learning_rate": 7.637512020614995e-05, + "loss": 2.4451, + "step": 52640 + }, + { + "epoch": 88.95, + "learning_rate": 7.633698171267241e-05, + "loss": 2.4658, + "step": 52660 + }, + { + "epoch": 88.99, + "learning_rate": 7.629882199996441e-05, + "loss": 2.4586, + "step": 52680 + }, + { + "epoch": 89.02, + "learning_rate": 7.626064109877054e-05, + "loss": 2.3964, + "step": 52700 + }, + { + "epoch": 89.05, + "learning_rate": 7.622243903985245e-05, + "loss": 2.3635, + "step": 52720 + }, + { + "epoch": 89.09, + "learning_rate": 7.618421585398885e-05, + "loss": 2.3716, + "step": 52740 + }, + { + "epoch": 89.12, + "learning_rate": 7.61459715719755e-05, + "loss": 2.3746, + "step": 52760 + }, + { + "epoch": 89.16, + "learning_rate": 7.610770622462508e-05, + "loss": 2.3948, + "step": 52780 + }, + { + "epoch": 89.19, + "learning_rate": 7.606941984276734e-05, + "loss": 2.3839, + "step": 52800 + }, + { + "epoch": 89.22, + "learning_rate": 7.60311124572489e-05, + "loss": 2.4137, + "step": 52820 + }, + { + "epoch": 89.26, + "learning_rate": 7.599278409893334e-05, + "loss": 2.4058, + "step": 52840 + }, + { + "epoch": 89.29, + "learning_rate": 7.59544347987011e-05, + "loss": 2.4233, + "step": 52860 + }, + { + "epoch": 89.32, + "learning_rate": 7.591606458744955e-05, + "loss": 2.4234, + "step": 52880 + }, + { + "epoch": 89.36, + "learning_rate": 7.587767349609284e-05, + "loss": 2.4087, + "step": 52900 + }, + { + "epoch": 89.39, + "learning_rate": 7.583926155556203e-05, + "loss": 2.418, + "step": 52920 + }, + { + "epoch": 89.43, + "learning_rate": 7.580082879680488e-05, + "loss": 2.41, + "step": 52940 + }, + { + "epoch": 89.46, + "learning_rate": 7.5762375250786e-05, + "loss": 2.4289, + "step": 52960 + }, + { + "epoch": 89.49, + "learning_rate": 7.572390094848669e-05, + "loss": 2.4167, + "step": 52980 + }, + { + "epoch": 89.53, + "learning_rate": 7.568540592090503e-05, + "loss": 2.4164, + "step": 53000 + }, + { + "epoch": 89.53, + "eval_loss": 2.6998112201690674, + "eval_runtime": 47.7256, + "eval_samples_per_second": 20.723, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.006779113199156035, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03221942088756424, + "eval_tse_type": 0.00011761318952926637, + "step": 53000 + }, + { + "epoch": 89.56, + "learning_rate": 7.564689019905575e-05, + "loss": 2.4375, + "step": 53020 + }, + { + "epoch": 89.59, + "learning_rate": 7.560835381397027e-05, + "loss": 2.4225, + "step": 53040 + }, + { + "epoch": 89.63, + "learning_rate": 7.556979679669666e-05, + "loss": 2.4205, + "step": 53060 + }, + { + "epoch": 89.66, + "learning_rate": 7.553121917829962e-05, + "loss": 2.4393, + "step": 53080 + }, + { + "epoch": 89.7, + "learning_rate": 7.549262098986045e-05, + "loss": 2.4471, + "step": 53100 + }, + { + "epoch": 89.73, + "learning_rate": 7.545400226247699e-05, + "loss": 2.4393, + "step": 53120 + }, + { + "epoch": 89.76, + "learning_rate": 7.541536302726367e-05, + "loss": 2.428, + "step": 53140 + }, + { + "epoch": 89.8, + "learning_rate": 7.537670331535138e-05, + "loss": 2.4348, + "step": 53160 + }, + { + "epoch": 89.83, + "learning_rate": 7.533802315788762e-05, + "loss": 2.4507, + "step": 53180 + }, + { + "epoch": 89.86, + "learning_rate": 7.529932258603626e-05, + "loss": 2.4357, + "step": 53200 + }, + { + "epoch": 89.9, + "learning_rate": 7.526060163097766e-05, + "loss": 2.4426, + "step": 53220 + }, + { + "epoch": 89.93, + "learning_rate": 7.522186032390857e-05, + "loss": 2.4383, + "step": 53240 + }, + { + "epoch": 89.97, + "learning_rate": 7.518309869604219e-05, + "loss": 2.4451, + "step": 53260 + }, + { + "epoch": 90.0, + "learning_rate": 7.514431677860805e-05, + "loss": 2.4421, + "step": 53280 + }, + { + "epoch": 90.03, + "learning_rate": 7.510551460285202e-05, + "loss": 2.3711, + "step": 53300 + }, + { + "epoch": 90.07, + "learning_rate": 7.506669220003637e-05, + "loss": 2.3711, + "step": 53320 + }, + { + "epoch": 90.1, + "learning_rate": 7.502784960143955e-05, + "loss": 2.3738, + "step": 53340 + }, + { + "epoch": 90.14, + "learning_rate": 7.498898683835637e-05, + "loss": 2.3822, + "step": 53360 + }, + { + "epoch": 90.17, + "learning_rate": 7.495010394209785e-05, + "loss": 2.3925, + "step": 53380 + }, + { + "epoch": 90.2, + "learning_rate": 7.491120094399124e-05, + "loss": 2.3825, + "step": 53400 + }, + { + "epoch": 90.24, + "learning_rate": 7.487227787537997e-05, + "loss": 2.3945, + "step": 53420 + }, + { + "epoch": 90.27, + "learning_rate": 7.483333476762366e-05, + "loss": 2.3954, + "step": 53440 + }, + { + "epoch": 90.3, + "learning_rate": 7.479437165209808e-05, + "loss": 2.4023, + "step": 53460 + }, + { + "epoch": 90.34, + "learning_rate": 7.475538856019511e-05, + "loss": 2.393, + "step": 53480 + }, + { + "epoch": 90.37, + "learning_rate": 7.47163855233227e-05, + "loss": 2.3872, + "step": 53500 + }, + { + "epoch": 90.41, + "learning_rate": 7.467736257290492e-05, + "loss": 2.405, + "step": 53520 + }, + { + "epoch": 90.44, + "learning_rate": 7.463831974038182e-05, + "loss": 2.4058, + "step": 53540 + }, + { + "epoch": 90.47, + "learning_rate": 7.459925705720954e-05, + "loss": 2.401, + "step": 53560 + }, + { + "epoch": 90.51, + "learning_rate": 7.456017455486017e-05, + "loss": 2.4124, + "step": 53580 + }, + { + "epoch": 90.54, + "learning_rate": 7.452107226482176e-05, + "loss": 2.4088, + "step": 53600 + }, + { + "epoch": 90.57, + "learning_rate": 7.448195021859834e-05, + "loss": 2.4122, + "step": 53620 + }, + { + "epoch": 90.61, + "learning_rate": 7.444280844770981e-05, + "loss": 2.4161, + "step": 53640 + }, + { + "epoch": 90.64, + "learning_rate": 7.440364698369202e-05, + "loss": 2.4245, + "step": 53660 + }, + { + "epoch": 90.68, + "learning_rate": 7.436446585809663e-05, + "loss": 2.4139, + "step": 53680 + }, + { + "epoch": 90.71, + "learning_rate": 7.432526510249117e-05, + "loss": 2.4357, + "step": 53700 + }, + { + "epoch": 90.74, + "learning_rate": 7.4286044748459e-05, + "loss": 2.4225, + "step": 53720 + }, + { + "epoch": 90.78, + "learning_rate": 7.424680482759921e-05, + "loss": 2.4384, + "step": 53740 + }, + { + "epoch": 90.81, + "learning_rate": 7.420754537152674e-05, + "loss": 2.43, + "step": 53760 + }, + { + "epoch": 90.84, + "learning_rate": 7.416826641187219e-05, + "loss": 2.4221, + "step": 53780 + }, + { + "epoch": 90.88, + "learning_rate": 7.412896798028194e-05, + "loss": 2.4463, + "step": 53800 + }, + { + "epoch": 90.91, + "learning_rate": 7.4089650108418e-05, + "loss": 2.4299, + "step": 53820 + }, + { + "epoch": 90.95, + "learning_rate": 7.405031282795807e-05, + "loss": 2.42, + "step": 53840 + }, + { + "epoch": 90.98, + "learning_rate": 7.401095617059552e-05, + "loss": 2.4391, + "step": 53860 + }, + { + "epoch": 91.01, + "learning_rate": 7.397158016803925e-05, + "loss": 2.4055, + "step": 53880 + }, + { + "epoch": 91.05, + "learning_rate": 7.393218485201383e-05, + "loss": 2.3565, + "step": 53900 + }, + { + "epoch": 91.08, + "learning_rate": 7.389277025425933e-05, + "loss": 2.3671, + "step": 53920 + }, + { + "epoch": 91.11, + "learning_rate": 7.38533364065314e-05, + "loss": 2.3723, + "step": 53940 + }, + { + "epoch": 91.15, + "learning_rate": 7.381388334060118e-05, + "loss": 2.3535, + "step": 53960 + }, + { + "epoch": 91.18, + "learning_rate": 7.377441108825526e-05, + "loss": 2.3746, + "step": 53980 + }, + { + "epoch": 91.22, + "learning_rate": 7.373491968129577e-05, + "loss": 2.3736, + "step": 54000 + }, + { + "epoch": 91.22, + "eval_loss": 2.683696985244751, + "eval_runtime": 47.5331, + "eval_samples_per_second": 20.807, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.003875717581254406, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03362074664529218, + "eval_tse_type": 0.00017729749466352098, + "step": 54000 + }, + { + "epoch": 91.25, + "learning_rate": 7.369540915154018e-05, + "loss": 2.383, + "step": 54020 + }, + { + "epoch": 91.28, + "learning_rate": 7.365587953082142e-05, + "loss": 2.3846, + "step": 54040 + }, + { + "epoch": 91.32, + "learning_rate": 7.361633085098781e-05, + "loss": 2.4035, + "step": 54060 + }, + { + "epoch": 91.35, + "learning_rate": 7.3576763143903e-05, + "loss": 2.3946, + "step": 54080 + }, + { + "epoch": 91.39, + "learning_rate": 7.353717644144598e-05, + "loss": 2.3772, + "step": 54100 + }, + { + "epoch": 91.42, + "learning_rate": 7.349757077551101e-05, + "loss": 2.3934, + "step": 54120 + }, + { + "epoch": 91.45, + "learning_rate": 7.34579461780077e-05, + "loss": 2.3944, + "step": 54140 + }, + { + "epoch": 91.49, + "learning_rate": 7.341830268086084e-05, + "loss": 2.4028, + "step": 54160 + }, + { + "epoch": 91.52, + "learning_rate": 7.33786403160105e-05, + "loss": 2.3973, + "step": 54180 + }, + { + "epoch": 91.55, + "learning_rate": 7.333895911541194e-05, + "loss": 2.4226, + "step": 54200 + }, + { + "epoch": 91.59, + "learning_rate": 7.329925911103556e-05, + "loss": 2.4169, + "step": 54220 + }, + { + "epoch": 91.62, + "learning_rate": 7.325954033486695e-05, + "loss": 2.4138, + "step": 54240 + }, + { + "epoch": 91.66, + "learning_rate": 7.321980281890682e-05, + "loss": 2.3961, + "step": 54260 + }, + { + "epoch": 91.69, + "learning_rate": 7.318004659517095e-05, + "loss": 2.3989, + "step": 54280 + }, + { + "epoch": 91.72, + "learning_rate": 7.314226088371854e-05, + "loss": 2.4183, + "step": 54300 + }, + { + "epoch": 91.76, + "learning_rate": 7.31024682719625e-05, + "loss": 2.4142, + "step": 54320 + }, + { + "epoch": 91.79, + "learning_rate": 7.306265704696504e-05, + "loss": 2.4108, + "step": 54340 + }, + { + "epoch": 91.82, + "learning_rate": 7.302282724080138e-05, + "loss": 2.4308, + "step": 54360 + }, + { + "epoch": 91.86, + "learning_rate": 7.298297888556164e-05, + "loss": 2.4199, + "step": 54380 + }, + { + "epoch": 91.89, + "learning_rate": 7.294311201335093e-05, + "loss": 2.4239, + "step": 54400 + }, + { + "epoch": 91.93, + "learning_rate": 7.290322665628928e-05, + "loss": 2.414, + "step": 54420 + }, + { + "epoch": 91.96, + "learning_rate": 7.286332284651159e-05, + "loss": 2.423, + "step": 54440 + }, + { + "epoch": 91.99, + "learning_rate": 7.282340061616766e-05, + "loss": 2.4306, + "step": 54460 + }, + { + "epoch": 92.03, + "learning_rate": 7.278345999742208e-05, + "loss": 2.3636, + "step": 54480 + }, + { + "epoch": 92.06, + "learning_rate": 7.274350102245431e-05, + "loss": 2.3382, + "step": 54500 + }, + { + "epoch": 92.09, + "learning_rate": 7.270352372345855e-05, + "loss": 2.3477, + "step": 54520 + }, + { + "epoch": 92.13, + "learning_rate": 7.266352813264378e-05, + "loss": 2.361, + "step": 54540 + }, + { + "epoch": 92.16, + "learning_rate": 7.262351428223378e-05, + "loss": 2.3566, + "step": 54560 + }, + { + "epoch": 92.2, + "learning_rate": 7.258348220446695e-05, + "loss": 2.3907, + "step": 54580 + }, + { + "epoch": 92.23, + "learning_rate": 7.25434319315964e-05, + "loss": 2.3643, + "step": 54600 + }, + { + "epoch": 92.26, + "learning_rate": 7.250336349588994e-05, + "loss": 2.3844, + "step": 54620 + }, + { + "epoch": 92.3, + "learning_rate": 7.246327692962996e-05, + "loss": 2.3964, + "step": 54640 + }, + { + "epoch": 92.33, + "learning_rate": 7.24231722651135e-05, + "loss": 2.3855, + "step": 54660 + }, + { + "epoch": 92.36, + "learning_rate": 7.238304953465217e-05, + "loss": 2.3828, + "step": 54680 + }, + { + "epoch": 92.4, + "learning_rate": 7.234290877057208e-05, + "loss": 2.3719, + "step": 54700 + }, + { + "epoch": 92.43, + "learning_rate": 7.230275000521398e-05, + "loss": 2.3786, + "step": 54720 + }, + { + "epoch": 92.47, + "learning_rate": 7.226257327093304e-05, + "loss": 2.3923, + "step": 54740 + }, + { + "epoch": 92.5, + "learning_rate": 7.222237860009892e-05, + "loss": 2.3972, + "step": 54760 + }, + { + "epoch": 92.53, + "learning_rate": 7.218216602509574e-05, + "loss": 2.3888, + "step": 54780 + }, + { + "epoch": 92.57, + "learning_rate": 7.214193557832206e-05, + "loss": 2.3937, + "step": 54800 + }, + { + "epoch": 92.6, + "learning_rate": 7.21016872921908e-05, + "loss": 2.3849, + "step": 54820 + }, + { + "epoch": 92.64, + "learning_rate": 7.206142119912931e-05, + "loss": 2.3849, + "step": 54840 + }, + { + "epoch": 92.67, + "learning_rate": 7.202113733157923e-05, + "loss": 2.4095, + "step": 54860 + }, + { + "epoch": 92.7, + "learning_rate": 7.198083572199657e-05, + "loss": 2.4029, + "step": 54880 + }, + { + "epoch": 92.74, + "learning_rate": 7.194051640285157e-05, + "loss": 2.3937, + "step": 54900 + }, + { + "epoch": 92.77, + "learning_rate": 7.190017940662878e-05, + "loss": 2.4132, + "step": 54920 + }, + { + "epoch": 92.8, + "learning_rate": 7.185982476582705e-05, + "loss": 2.3987, + "step": 54940 + }, + { + "epoch": 92.84, + "learning_rate": 7.181945251295931e-05, + "loss": 2.4036, + "step": 54960 + }, + { + "epoch": 92.87, + "learning_rate": 7.17790626805528e-05, + "loss": 2.4164, + "step": 54980 + }, + { + "epoch": 92.91, + "learning_rate": 7.173865530114886e-05, + "loss": 2.4191, + "step": 55000 + }, + { + "epoch": 92.91, + "eval_loss": 2.6698572635650635, + "eval_runtime": 50.8701, + "eval_samples_per_second": 19.442, + "eval_steps_per_second": 0.118, + "eval_tse_ndup": 0.004103632794998836, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03406290900649943, + "eval_tse_type": 0.00011936861026850915, + "step": 55000 + }, + { + "epoch": 92.94, + "learning_rate": 7.1698230407303e-05, + "loss": 2.4115, + "step": 55020 + }, + { + "epoch": 92.97, + "learning_rate": 7.16577880315848e-05, + "loss": 2.4227, + "step": 55040 + }, + { + "epoch": 93.01, + "learning_rate": 7.161732820657799e-05, + "loss": 2.3872, + "step": 55060 + }, + { + "epoch": 93.04, + "learning_rate": 7.157685096488029e-05, + "loss": 2.3322, + "step": 55080 + }, + { + "epoch": 93.07, + "learning_rate": 7.153635633910349e-05, + "loss": 2.3608, + "step": 55100 + }, + { + "epoch": 93.11, + "learning_rate": 7.149584436187338e-05, + "loss": 2.3444, + "step": 55120 + }, + { + "epoch": 93.14, + "learning_rate": 7.145531506582975e-05, + "loss": 2.3532, + "step": 55140 + }, + { + "epoch": 93.18, + "learning_rate": 7.141476848362627e-05, + "loss": 2.3356, + "step": 55160 + }, + { + "epoch": 93.21, + "learning_rate": 7.137420464793063e-05, + "loss": 2.3647, + "step": 55180 + }, + { + "epoch": 93.24, + "learning_rate": 7.133362359142439e-05, + "loss": 2.3639, + "step": 55200 + }, + { + "epoch": 93.28, + "learning_rate": 7.129302534680293e-05, + "loss": 2.3628, + "step": 55220 + }, + { + "epoch": 93.31, + "learning_rate": 7.125240994677557e-05, + "loss": 2.371, + "step": 55240 + }, + { + "epoch": 93.34, + "learning_rate": 7.121177742406534e-05, + "loss": 2.3581, + "step": 55260 + }, + { + "epoch": 93.38, + "learning_rate": 7.11711278114092e-05, + "loss": 2.3809, + "step": 55280 + }, + { + "epoch": 93.41, + "learning_rate": 7.113046114155777e-05, + "loss": 2.3773, + "step": 55300 + }, + { + "epoch": 93.45, + "learning_rate": 7.108977744727547e-05, + "loss": 2.3713, + "step": 55320 + }, + { + "epoch": 93.48, + "learning_rate": 7.104907676134041e-05, + "loss": 2.3754, + "step": 55340 + }, + { + "epoch": 93.51, + "learning_rate": 7.100835911654437e-05, + "loss": 2.3823, + "step": 55360 + }, + { + "epoch": 93.55, + "learning_rate": 7.096762454569289e-05, + "loss": 2.3841, + "step": 55380 + }, + { + "epoch": 93.58, + "learning_rate": 7.0926873081605e-05, + "loss": 2.3738, + "step": 55400 + }, + { + "epoch": 93.61, + "learning_rate": 7.088610475711345e-05, + "loss": 2.3947, + "step": 55420 + }, + { + "epoch": 93.65, + "learning_rate": 7.084531960506456e-05, + "loss": 2.3839, + "step": 55440 + }, + { + "epoch": 93.68, + "learning_rate": 7.080451765831817e-05, + "loss": 2.3935, + "step": 55460 + }, + { + "epoch": 93.72, + "learning_rate": 7.076369894974768e-05, + "loss": 2.4017, + "step": 55480 + }, + { + "epoch": 93.75, + "learning_rate": 7.072286351223999e-05, + "loss": 2.3954, + "step": 55500 + }, + { + "epoch": 93.78, + "learning_rate": 7.068201137869546e-05, + "loss": 2.4097, + "step": 55520 + }, + { + "epoch": 93.82, + "learning_rate": 7.064114258202792e-05, + "loss": 2.3989, + "step": 55540 + }, + { + "epoch": 93.85, + "learning_rate": 7.060025715516463e-05, + "loss": 2.4131, + "step": 55560 + }, + { + "epoch": 93.89, + "learning_rate": 7.055935513104623e-05, + "loss": 2.3959, + "step": 55580 + }, + { + "epoch": 93.92, + "learning_rate": 7.051843654262676e-05, + "loss": 2.4057, + "step": 55600 + }, + { + "epoch": 93.95, + "learning_rate": 7.047750142287356e-05, + "loss": 2.4038, + "step": 55620 + }, + { + "epoch": 93.99, + "learning_rate": 7.043654980476735e-05, + "loss": 2.397, + "step": 55640 + }, + { + "epoch": 94.02, + "learning_rate": 7.039558172130208e-05, + "loss": 2.3503, + "step": 55660 + }, + { + "epoch": 94.05, + "learning_rate": 7.0354597205485e-05, + "loss": 2.3315, + "step": 55680 + }, + { + "epoch": 94.09, + "learning_rate": 7.031359629033661e-05, + "loss": 2.3534, + "step": 55700 + }, + { + "epoch": 94.12, + "learning_rate": 7.027257900889059e-05, + "loss": 2.3252, + "step": 55720 + }, + { + "epoch": 94.16, + "learning_rate": 7.023154539419384e-05, + "loss": 2.3405, + "step": 55740 + }, + { + "epoch": 94.19, + "learning_rate": 7.019049547930638e-05, + "loss": 2.3479, + "step": 55760 + }, + { + "epoch": 94.22, + "learning_rate": 7.01494292973014e-05, + "loss": 2.3548, + "step": 55780 + }, + { + "epoch": 94.26, + "learning_rate": 7.010834688126518e-05, + "loss": 2.3475, + "step": 55800 + }, + { + "epoch": 94.29, + "learning_rate": 7.006724826429706e-05, + "loss": 2.3467, + "step": 55820 + }, + { + "epoch": 94.32, + "learning_rate": 7.002613347950949e-05, + "loss": 2.3527, + "step": 55840 + }, + { + "epoch": 94.36, + "learning_rate": 6.998500256002789e-05, + "loss": 2.3518, + "step": 55860 + }, + { + "epoch": 94.39, + "learning_rate": 6.994385553899069e-05, + "loss": 2.3593, + "step": 55880 + }, + { + "epoch": 94.43, + "learning_rate": 6.990269244954933e-05, + "loss": 2.3759, + "step": 55900 + }, + { + "epoch": 94.46, + "learning_rate": 6.986151332486813e-05, + "loss": 2.3658, + "step": 55920 + }, + { + "epoch": 94.49, + "learning_rate": 6.98203181981244e-05, + "loss": 2.3621, + "step": 55940 + }, + { + "epoch": 94.53, + "learning_rate": 6.97791071025083e-05, + "loss": 2.3945, + "step": 55960 + }, + { + "epoch": 94.56, + "learning_rate": 6.973788007122283e-05, + "loss": 2.3772, + "step": 55980 + }, + { + "epoch": 94.59, + "learning_rate": 6.969663713748392e-05, + "loss": 2.3798, + "step": 56000 + }, + { + "epoch": 94.59, + "eval_loss": 2.668010711669922, + "eval_runtime": 47.8955, + "eval_samples_per_second": 20.649, + "eval_steps_per_second": 0.125, + "eval_tse_ndup": 0.005050061496517336, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.034623850337323764, + "eval_tse_type": 0.00020538422649140546, + "step": 56000 + }, + { + "epoch": 94.63, + "learning_rate": 6.965537833452024e-05, + "loss": 2.3747, + "step": 56020 + }, + { + "epoch": 94.66, + "learning_rate": 6.961410369557323e-05, + "loss": 2.368, + "step": 56040 + }, + { + "epoch": 94.7, + "learning_rate": 6.957281325389717e-05, + "loss": 2.3824, + "step": 56060 + }, + { + "epoch": 94.73, + "learning_rate": 6.9531507042759e-05, + "loss": 2.3905, + "step": 56080 + }, + { + "epoch": 94.76, + "learning_rate": 6.949018509543835e-05, + "loss": 2.3951, + "step": 56100 + }, + { + "epoch": 94.8, + "learning_rate": 6.944884744522764e-05, + "loss": 2.3906, + "step": 56120 + }, + { + "epoch": 94.83, + "learning_rate": 6.940749412543181e-05, + "loss": 2.3845, + "step": 56140 + }, + { + "epoch": 94.86, + "learning_rate": 6.936612516936852e-05, + "loss": 2.3946, + "step": 56160 + }, + { + "epoch": 94.9, + "learning_rate": 6.932474061036797e-05, + "loss": 2.3913, + "step": 56180 + }, + { + "epoch": 94.93, + "learning_rate": 6.928334048177296e-05, + "loss": 2.3949, + "step": 56200 + }, + { + "epoch": 94.97, + "learning_rate": 6.924192481693882e-05, + "loss": 2.4153, + "step": 56220 + }, + { + "epoch": 95.0, + "learning_rate": 6.920049364923342e-05, + "loss": 2.3889, + "step": 56240 + }, + { + "epoch": 95.03, + "learning_rate": 6.915904701203705e-05, + "loss": 2.3298, + "step": 56260 + }, + { + "epoch": 95.07, + "learning_rate": 6.911758493874258e-05, + "loss": 2.3281, + "step": 56280 + }, + { + "epoch": 95.1, + "learning_rate": 6.907610746275523e-05, + "loss": 2.323, + "step": 56300 + }, + { + "epoch": 95.14, + "learning_rate": 6.903461461749266e-05, + "loss": 2.3401, + "step": 56320 + }, + { + "epoch": 95.17, + "learning_rate": 6.89931064363849e-05, + "loss": 2.3285, + "step": 56340 + }, + { + "epoch": 95.2, + "learning_rate": 6.895365948996552e-05, + "loss": 2.3299, + "step": 56360 + }, + { + "epoch": 95.24, + "learning_rate": 6.891212150015955e-05, + "loss": 2.3394, + "step": 56380 + }, + { + "epoch": 95.27, + "learning_rate": 6.887056827319885e-05, + "loss": 2.34, + "step": 56400 + }, + { + "epoch": 95.3, + "learning_rate": 6.882899984256216e-05, + "loss": 2.369, + "step": 56420 + }, + { + "epoch": 95.34, + "learning_rate": 6.878741624174039e-05, + "loss": 2.3588, + "step": 56440 + }, + { + "epoch": 95.37, + "learning_rate": 6.87458175042367e-05, + "loss": 2.3518, + "step": 56460 + }, + { + "epoch": 95.41, + "learning_rate": 6.870420366356642e-05, + "loss": 2.3542, + "step": 56480 + }, + { + "epoch": 95.44, + "learning_rate": 6.86625747532571e-05, + "loss": 2.3635, + "step": 56500 + }, + { + "epoch": 95.47, + "learning_rate": 6.862093080684838e-05, + "loss": 2.3609, + "step": 56520 + }, + { + "epoch": 95.51, + "learning_rate": 6.857927185789204e-05, + "loss": 2.3498, + "step": 56540 + }, + { + "epoch": 95.54, + "learning_rate": 6.853759793995196e-05, + "loss": 2.3651, + "step": 56560 + }, + { + "epoch": 95.57, + "learning_rate": 6.849590908660404e-05, + "loss": 2.3662, + "step": 56580 + }, + { + "epoch": 95.61, + "learning_rate": 6.845420533143627e-05, + "loss": 2.3717, + "step": 56600 + }, + { + "epoch": 95.64, + "learning_rate": 6.841248670804853e-05, + "loss": 2.3478, + "step": 56620 + }, + { + "epoch": 95.68, + "learning_rate": 6.837075325005286e-05, + "loss": 2.3753, + "step": 56640 + }, + { + "epoch": 95.71, + "learning_rate": 6.832900499107311e-05, + "loss": 2.3746, + "step": 56660 + }, + { + "epoch": 95.74, + "learning_rate": 6.82872419647451e-05, + "loss": 2.3741, + "step": 56680 + }, + { + "epoch": 95.78, + "learning_rate": 6.824546420471653e-05, + "loss": 2.377, + "step": 56700 + }, + { + "epoch": 95.81, + "learning_rate": 6.820367174464703e-05, + "loss": 2.3812, + "step": 56720 + }, + { + "epoch": 95.84, + "learning_rate": 6.816186461820798e-05, + "loss": 2.3846, + "step": 56740 + }, + { + "epoch": 95.88, + "learning_rate": 6.812004285908266e-05, + "loss": 2.4012, + "step": 56760 + }, + { + "epoch": 95.91, + "learning_rate": 6.807820650096609e-05, + "loss": 2.3778, + "step": 56780 + }, + { + "epoch": 95.95, + "learning_rate": 6.803635557756507e-05, + "loss": 2.3783, + "step": 56800 + }, + { + "epoch": 95.98, + "learning_rate": 6.799449012259816e-05, + "loss": 2.3772, + "step": 56820 + }, + { + "epoch": 96.01, + "learning_rate": 6.795261016979555e-05, + "loss": 2.3434, + "step": 56840 + }, + { + "epoch": 96.05, + "learning_rate": 6.791071575289922e-05, + "loss": 2.3278, + "step": 56860 + }, + { + "epoch": 96.08, + "learning_rate": 6.786880690566268e-05, + "loss": 2.3018, + "step": 56880 + }, + { + "epoch": 96.11, + "learning_rate": 6.78268836618512e-05, + "loss": 2.3097, + "step": 56900 + }, + { + "epoch": 96.15, + "learning_rate": 6.778494605524151e-05, + "loss": 2.3278, + "step": 56920 + }, + { + "epoch": 96.18, + "learning_rate": 6.774299411962203e-05, + "loss": 2.3357, + "step": 56940 + }, + { + "epoch": 96.22, + "learning_rate": 6.770102788879267e-05, + "loss": 2.3269, + "step": 56960 + }, + { + "epoch": 96.25, + "learning_rate": 6.765904739656486e-05, + "loss": 2.3179, + "step": 56980 + }, + { + "epoch": 96.28, + "learning_rate": 6.761705267676153e-05, + "loss": 2.3506, + "step": 57000 + }, + { + "epoch": 96.28, + "eval_loss": 2.652446985244751, + "eval_runtime": 47.4592, + "eval_samples_per_second": 20.839, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.004419710815808129, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.032274585313776694, + "eval_tse_type": 0.00025804684866868893, + "step": 57000 + }, + { + "epoch": 96.32, + "learning_rate": 6.757504376321704e-05, + "loss": 2.333, + "step": 57020 + }, + { + "epoch": 96.35, + "learning_rate": 6.753302068977725e-05, + "loss": 2.3232, + "step": 57040 + }, + { + "epoch": 96.39, + "learning_rate": 6.749098349029935e-05, + "loss": 2.3507, + "step": 57060 + }, + { + "epoch": 96.42, + "learning_rate": 6.7448932198652e-05, + "loss": 2.3556, + "step": 57080 + }, + { + "epoch": 96.45, + "learning_rate": 6.740686684871515e-05, + "loss": 2.3533, + "step": 57100 + }, + { + "epoch": 96.49, + "learning_rate": 6.736478747438007e-05, + "loss": 2.3587, + "step": 57120 + }, + { + "epoch": 96.52, + "learning_rate": 6.732269410954938e-05, + "loss": 2.3467, + "step": 57140 + }, + { + "epoch": 96.55, + "learning_rate": 6.728058678813694e-05, + "loss": 2.3415, + "step": 57160 + }, + { + "epoch": 96.59, + "learning_rate": 6.723846554406782e-05, + "loss": 2.3442, + "step": 57180 + }, + { + "epoch": 96.62, + "learning_rate": 6.719633041127839e-05, + "loss": 2.3605, + "step": 57200 + }, + { + "epoch": 96.66, + "learning_rate": 6.715418142371614e-05, + "loss": 2.365, + "step": 57220 + }, + { + "epoch": 96.69, + "learning_rate": 6.711201861533978e-05, + "loss": 2.3759, + "step": 57240 + }, + { + "epoch": 96.72, + "learning_rate": 6.70698420201191e-05, + "loss": 2.3712, + "step": 57260 + }, + { + "epoch": 96.76, + "learning_rate": 6.7027651672035e-05, + "loss": 2.3813, + "step": 57280 + }, + { + "epoch": 96.79, + "learning_rate": 6.698544760507952e-05, + "loss": 2.3772, + "step": 57300 + }, + { + "epoch": 96.82, + "learning_rate": 6.694322985325569e-05, + "loss": 2.3764, + "step": 57320 + }, + { + "epoch": 96.86, + "learning_rate": 6.69009984505776e-05, + "loss": 2.365, + "step": 57340 + }, + { + "epoch": 96.89, + "learning_rate": 6.685875343107033e-05, + "loss": 2.3746, + "step": 57360 + }, + { + "epoch": 96.93, + "learning_rate": 6.681649482876994e-05, + "loss": 2.3786, + "step": 57380 + }, + { + "epoch": 96.96, + "learning_rate": 6.677422267772338e-05, + "loss": 2.3717, + "step": 57400 + }, + { + "epoch": 96.99, + "learning_rate": 6.673193701198862e-05, + "loss": 2.3775, + "step": 57420 + }, + { + "epoch": 97.03, + "learning_rate": 6.66896378656344e-05, + "loss": 2.3134, + "step": 57440 + }, + { + "epoch": 97.06, + "learning_rate": 6.664732527274041e-05, + "loss": 2.3173, + "step": 57460 + }, + { + "epoch": 97.09, + "learning_rate": 6.660499926739714e-05, + "loss": 2.3021, + "step": 57480 + }, + { + "epoch": 97.13, + "learning_rate": 6.656265988370588e-05, + "loss": 2.318, + "step": 57500 + }, + { + "epoch": 97.16, + "learning_rate": 6.652030715577871e-05, + "loss": 2.3125, + "step": 57520 + }, + { + "epoch": 97.2, + "learning_rate": 6.647794111773843e-05, + "loss": 2.3126, + "step": 57540 + }, + { + "epoch": 97.23, + "learning_rate": 6.643556180371866e-05, + "loss": 2.3302, + "step": 57560 + }, + { + "epoch": 97.26, + "learning_rate": 6.63931692478636e-05, + "loss": 2.3208, + "step": 57580 + }, + { + "epoch": 97.3, + "learning_rate": 6.635076348432815e-05, + "loss": 2.3349, + "step": 57600 + }, + { + "epoch": 97.33, + "learning_rate": 6.630834454727792e-05, + "loss": 2.34, + "step": 57620 + }, + { + "epoch": 97.36, + "learning_rate": 6.626591247088903e-05, + "loss": 2.3404, + "step": 57640 + }, + { + "epoch": 97.4, + "learning_rate": 6.622346728934827e-05, + "loss": 2.3362, + "step": 57660 + }, + { + "epoch": 97.43, + "learning_rate": 6.618100903685294e-05, + "loss": 2.3237, + "step": 57680 + }, + { + "epoch": 97.47, + "learning_rate": 6.61385377476109e-05, + "loss": 2.3294, + "step": 57700 + }, + { + "epoch": 97.5, + "learning_rate": 6.609605345584047e-05, + "loss": 2.3433, + "step": 57720 + }, + { + "epoch": 97.53, + "learning_rate": 6.605355619577054e-05, + "loss": 2.3468, + "step": 57740 + }, + { + "epoch": 97.57, + "learning_rate": 6.601104600164032e-05, + "loss": 2.3455, + "step": 57760 + }, + { + "epoch": 97.6, + "learning_rate": 6.596852290769952e-05, + "loss": 2.3537, + "step": 57780 + }, + { + "epoch": 97.64, + "learning_rate": 6.592598694820826e-05, + "loss": 2.3462, + "step": 57800 + }, + { + "epoch": 97.67, + "learning_rate": 6.588343815743697e-05, + "loss": 2.3587, + "step": 57820 + }, + { + "epoch": 97.7, + "learning_rate": 6.584087656966644e-05, + "loss": 2.3575, + "step": 57840 + }, + { + "epoch": 97.74, + "learning_rate": 6.57983022191878e-05, + "loss": 2.3752, + "step": 57860 + }, + { + "epoch": 97.77, + "learning_rate": 6.575571514030239e-05, + "loss": 2.3578, + "step": 57880 + }, + { + "epoch": 97.8, + "learning_rate": 6.571311536732188e-05, + "loss": 2.3469, + "step": 57900 + }, + { + "epoch": 97.84, + "learning_rate": 6.567050293456812e-05, + "loss": 2.3702, + "step": 57920 + }, + { + "epoch": 97.87, + "learning_rate": 6.562787787637321e-05, + "loss": 2.3785, + "step": 57940 + }, + { + "epoch": 97.91, + "learning_rate": 6.558524022707935e-05, + "loss": 2.3621, + "step": 57960 + }, + { + "epoch": 97.94, + "learning_rate": 6.554259002103895e-05, + "loss": 2.3432, + "step": 57980 + }, + { + "epoch": 97.97, + "learning_rate": 6.549992729261451e-05, + "loss": 2.3689, + "step": 58000 + }, + { + "epoch": 97.97, + "eval_loss": 2.6414411067962646, + "eval_runtime": 50.7321, + "eval_samples_per_second": 19.495, + "eval_steps_per_second": 0.118, + "eval_tse_ndup": 0.004525041405393203, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03392460574295638, + "eval_tse_type": 0.00019714079755148143, + "step": 58000 + }, + { + "epoch": 98.01, + "learning_rate": 6.54572520761786e-05, + "loss": 2.3433, + "step": 58020 + }, + { + "epoch": 98.04, + "learning_rate": 6.54145644061139e-05, + "loss": 2.2908, + "step": 58040 + }, + { + "epoch": 98.07, + "learning_rate": 6.537186431681306e-05, + "loss": 2.2987, + "step": 58060 + }, + { + "epoch": 98.11, + "learning_rate": 6.532915184267881e-05, + "loss": 2.3084, + "step": 58080 + }, + { + "epoch": 98.14, + "learning_rate": 6.528642701812378e-05, + "loss": 2.3045, + "step": 58100 + }, + { + "epoch": 98.18, + "learning_rate": 6.524368987757061e-05, + "loss": 2.3313, + "step": 58120 + }, + { + "epoch": 98.21, + "learning_rate": 6.520094045545184e-05, + "loss": 2.3024, + "step": 58140 + }, + { + "epoch": 98.24, + "learning_rate": 6.515817878620992e-05, + "loss": 2.3164, + "step": 58160 + }, + { + "epoch": 98.28, + "learning_rate": 6.511540490429713e-05, + "loss": 2.3052, + "step": 58180 + }, + { + "epoch": 98.31, + "learning_rate": 6.507261884417561e-05, + "loss": 2.3216, + "step": 58200 + }, + { + "epoch": 98.34, + "learning_rate": 6.502982064031736e-05, + "loss": 2.322, + "step": 58220 + }, + { + "epoch": 98.38, + "learning_rate": 6.498701032720406e-05, + "loss": 2.3298, + "step": 58240 + }, + { + "epoch": 98.41, + "learning_rate": 6.494418793932728e-05, + "loss": 2.3179, + "step": 58260 + }, + { + "epoch": 98.45, + "learning_rate": 6.490135351118817e-05, + "loss": 2.3315, + "step": 58280 + }, + { + "epoch": 98.48, + "learning_rate": 6.485850707729771e-05, + "loss": 2.34, + "step": 58300 + }, + { + "epoch": 98.51, + "learning_rate": 6.481564867217646e-05, + "loss": 2.3419, + "step": 58320 + }, + { + "epoch": 98.55, + "learning_rate": 6.477277833035467e-05, + "loss": 2.3288, + "step": 58340 + }, + { + "epoch": 98.58, + "learning_rate": 6.472989608637221e-05, + "loss": 2.3389, + "step": 58360 + }, + { + "epoch": 98.61, + "learning_rate": 6.468700197477853e-05, + "loss": 2.3366, + "step": 58380 + }, + { + "epoch": 98.65, + "learning_rate": 6.464409603013264e-05, + "loss": 2.3486, + "step": 58400 + }, + { + "epoch": 98.68, + "learning_rate": 6.46011782870031e-05, + "loss": 2.3382, + "step": 58420 + }, + { + "epoch": 98.72, + "learning_rate": 6.455824877996793e-05, + "loss": 2.34, + "step": 58440 + }, + { + "epoch": 98.75, + "learning_rate": 6.451530754361465e-05, + "loss": 2.3462, + "step": 58460 + }, + { + "epoch": 98.78, + "learning_rate": 6.447235461254029e-05, + "loss": 2.3459, + "step": 58480 + }, + { + "epoch": 98.82, + "learning_rate": 6.442939002135118e-05, + "loss": 2.3578, + "step": 58500 + }, + { + "epoch": 98.85, + "learning_rate": 6.43864138046632e-05, + "loss": 2.3432, + "step": 58520 + }, + { + "epoch": 98.89, + "learning_rate": 6.434342599710145e-05, + "loss": 2.3605, + "step": 58540 + }, + { + "epoch": 98.92, + "learning_rate": 6.430042663330046e-05, + "loss": 2.3593, + "step": 58560 + }, + { + "epoch": 98.95, + "learning_rate": 6.425741574790402e-05, + "loss": 2.3643, + "step": 58580 + }, + { + "epoch": 98.99, + "learning_rate": 6.421439337556523e-05, + "loss": 2.3728, + "step": 58600 + }, + { + "epoch": 99.02, + "learning_rate": 6.417135955094644e-05, + "loss": 2.3148, + "step": 58620 + }, + { + "epoch": 99.05, + "learning_rate": 6.412831430871922e-05, + "loss": 2.2854, + "step": 58640 + }, + { + "epoch": 99.09, + "learning_rate": 6.408525768356435e-05, + "loss": 2.2796, + "step": 58660 + }, + { + "epoch": 99.12, + "learning_rate": 6.404218971017179e-05, + "loss": 2.2844, + "step": 58680 + }, + { + "epoch": 99.16, + "learning_rate": 6.399911042324059e-05, + "loss": 2.309, + "step": 58700 + }, + { + "epoch": 99.19, + "learning_rate": 6.395601985747899e-05, + "loss": 2.3022, + "step": 58720 + }, + { + "epoch": 99.22, + "learning_rate": 6.391291804760427e-05, + "loss": 2.3183, + "step": 58740 + }, + { + "epoch": 99.26, + "learning_rate": 6.386980502834277e-05, + "loss": 2.3138, + "step": 58760 + }, + { + "epoch": 99.29, + "learning_rate": 6.382668083442989e-05, + "loss": 2.3145, + "step": 58780 + }, + { + "epoch": 99.32, + "learning_rate": 6.378354550060997e-05, + "loss": 2.3048, + "step": 58800 + }, + { + "epoch": 99.36, + "learning_rate": 6.374039906163642e-05, + "loss": 2.3131, + "step": 58820 + }, + { + "epoch": 99.39, + "learning_rate": 6.369724155227152e-05, + "loss": 2.3216, + "step": 58840 + }, + { + "epoch": 99.43, + "learning_rate": 6.365407300728653e-05, + "loss": 2.3241, + "step": 58860 + }, + { + "epoch": 99.46, + "learning_rate": 6.361089346146152e-05, + "loss": 2.3188, + "step": 58880 + }, + { + "epoch": 99.49, + "learning_rate": 6.356770294958549e-05, + "loss": 2.3232, + "step": 58900 + }, + { + "epoch": 99.53, + "learning_rate": 6.352450150645626e-05, + "loss": 2.3165, + "step": 58920 + }, + { + "epoch": 99.56, + "learning_rate": 6.348128916688045e-05, + "loss": 2.3247, + "step": 58940 + }, + { + "epoch": 99.59, + "learning_rate": 6.343806596567345e-05, + "loss": 2.3515, + "step": 58960 + }, + { + "epoch": 99.63, + "learning_rate": 6.339483193765941e-05, + "loss": 2.3195, + "step": 58980 + }, + { + "epoch": 99.66, + "learning_rate": 6.335158711767125e-05, + "loss": 2.3521, + "step": 59000 + }, + { + "epoch": 99.66, + "eval_loss": 2.6360507011413574, + "eval_runtime": 47.5023, + "eval_samples_per_second": 20.82, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.0036123021520311147, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03374268969161762, + "eval_tse_type": 0.00028112071577424743, + "step": 59000 + }, + { + "epoch": 99.7, + "learning_rate": 6.330833154055049e-05, + "loss": 2.3336, + "step": 59020 + }, + { + "epoch": 99.73, + "learning_rate": 6.326506524114739e-05, + "loss": 2.3363, + "step": 59040 + }, + { + "epoch": 99.76, + "learning_rate": 6.322178825432082e-05, + "loss": 2.3469, + "step": 59060 + }, + { + "epoch": 99.8, + "learning_rate": 6.317850061493827e-05, + "loss": 2.3395, + "step": 59080 + }, + { + "epoch": 99.83, + "learning_rate": 6.31352023578758e-05, + "loss": 2.3385, + "step": 59100 + }, + { + "epoch": 99.86, + "learning_rate": 6.309189351801805e-05, + "loss": 2.3523, + "step": 59120 + }, + { + "epoch": 99.9, + "learning_rate": 6.304857413025816e-05, + "loss": 2.3339, + "step": 59140 + }, + { + "epoch": 99.93, + "learning_rate": 6.300524422949776e-05, + "loss": 2.3516, + "step": 59160 + }, + { + "epoch": 99.97, + "learning_rate": 6.2961903850647e-05, + "loss": 2.352, + "step": 59180 + }, + { + "epoch": 100.0, + "learning_rate": 6.291855302862442e-05, + "loss": 2.3407, + "step": 59200 + }, + { + "epoch": 100.03, + "learning_rate": 6.287519179835702e-05, + "loss": 2.2646, + "step": 59220 + }, + { + "epoch": 100.07, + "learning_rate": 6.283182019478013e-05, + "loss": 2.2781, + "step": 59240 + }, + { + "epoch": 100.1, + "learning_rate": 6.278843825283749e-05, + "loss": 2.2856, + "step": 59260 + }, + { + "epoch": 100.14, + "learning_rate": 6.274504600748112e-05, + "loss": 2.2917, + "step": 59280 + }, + { + "epoch": 100.17, + "learning_rate": 6.27016434936714e-05, + "loss": 2.3064, + "step": 59300 + }, + { + "epoch": 100.2, + "learning_rate": 6.265823074637692e-05, + "loss": 2.3053, + "step": 59320 + }, + { + "epoch": 100.24, + "learning_rate": 6.261480780057458e-05, + "loss": 2.2988, + "step": 59340 + }, + { + "epoch": 100.27, + "learning_rate": 6.257137469124944e-05, + "loss": 2.307, + "step": 59360 + }, + { + "epoch": 100.3, + "learning_rate": 6.252793145339477e-05, + "loss": 2.2935, + "step": 59380 + }, + { + "epoch": 100.34, + "learning_rate": 6.248447812201201e-05, + "loss": 2.2814, + "step": 59400 + }, + { + "epoch": 100.37, + "learning_rate": 6.244101473211072e-05, + "loss": 2.299, + "step": 59420 + }, + { + "epoch": 100.41, + "learning_rate": 6.23975413187086e-05, + "loss": 2.3196, + "step": 59440 + }, + { + "epoch": 100.44, + "learning_rate": 6.235405791683134e-05, + "loss": 2.3243, + "step": 59460 + }, + { + "epoch": 100.47, + "learning_rate": 6.231056456151278e-05, + "loss": 2.3026, + "step": 59480 + }, + { + "epoch": 100.51, + "learning_rate": 6.226706128779468e-05, + "loss": 2.3176, + "step": 59500 + }, + { + "epoch": 100.54, + "learning_rate": 6.222354813072689e-05, + "loss": 2.317, + "step": 59520 + }, + { + "epoch": 100.57, + "learning_rate": 6.218002512536714e-05, + "loss": 2.3105, + "step": 59540 + }, + { + "epoch": 100.61, + "learning_rate": 6.213649230678116e-05, + "loss": 2.323, + "step": 59560 + }, + { + "epoch": 100.64, + "learning_rate": 6.209294971004253e-05, + "loss": 2.3389, + "step": 59580 + }, + { + "epoch": 100.68, + "learning_rate": 6.204939737023275e-05, + "loss": 2.3472, + "step": 59600 + }, + { + "epoch": 100.71, + "learning_rate": 6.200583532244114e-05, + "loss": 2.3281, + "step": 59620 + }, + { + "epoch": 100.74, + "learning_rate": 6.196226360176486e-05, + "loss": 2.3392, + "step": 59640 + }, + { + "epoch": 100.78, + "learning_rate": 6.191868224330886e-05, + "loss": 2.3325, + "step": 59660 + }, + { + "epoch": 100.81, + "learning_rate": 6.187509128218586e-05, + "loss": 2.334, + "step": 59680 + }, + { + "epoch": 100.84, + "learning_rate": 6.183149075351631e-05, + "loss": 2.329, + "step": 59700 + }, + { + "epoch": 100.88, + "learning_rate": 6.178788069242835e-05, + "loss": 2.3408, + "step": 59720 + }, + { + "epoch": 100.91, + "learning_rate": 6.174426113405783e-05, + "loss": 2.3437, + "step": 59740 + }, + { + "epoch": 100.95, + "learning_rate": 6.170063211354825e-05, + "loss": 2.3345, + "step": 59760 + }, + { + "epoch": 100.98, + "learning_rate": 6.165699366605072e-05, + "loss": 2.3379, + "step": 59780 + }, + { + "epoch": 101.01, + "learning_rate": 6.161334582672393e-05, + "loss": 2.2955, + "step": 59800 + }, + { + "epoch": 101.05, + "learning_rate": 6.156968863073417e-05, + "loss": 2.2768, + "step": 59820 + }, + { + "epoch": 101.08, + "learning_rate": 6.15282056599715e-05, + "loss": 2.2786, + "step": 59840 + }, + { + "epoch": 101.11, + "learning_rate": 6.148453031966447e-05, + "loss": 2.272, + "step": 59860 + }, + { + "epoch": 101.15, + "learning_rate": 6.14408457264788e-05, + "loss": 2.2834, + "step": 59880 + }, + { + "epoch": 101.18, + "learning_rate": 6.139715191561038e-05, + "loss": 2.2731, + "step": 59900 + }, + { + "epoch": 101.22, + "learning_rate": 6.135344892226253e-05, + "loss": 2.2926, + "step": 59920 + }, + { + "epoch": 101.25, + "learning_rate": 6.130973678164593e-05, + "loss": 2.2875, + "step": 59940 + }, + { + "epoch": 101.28, + "learning_rate": 6.126601552897869e-05, + "loss": 2.2858, + "step": 59960 + }, + { + "epoch": 101.32, + "learning_rate": 6.122228519948622e-05, + "loss": 2.2891, + "step": 59980 + }, + { + "epoch": 101.35, + "learning_rate": 6.117854582840129e-05, + "loss": 2.2937, + "step": 60000 + }, + { + "epoch": 101.35, + "eval_loss": 2.6247527599334717, + "eval_runtime": 47.4643, + "eval_samples_per_second": 20.837, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.004146178190892868, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.033031864075150866, + "eval_tse_type": 0.00033554435769788256, + "step": 60000 + }, + { + "epoch": 101.39, + "learning_rate": 6.113479745096387e-05, + "loss": 2.3096, + "step": 60020 + }, + { + "epoch": 101.42, + "learning_rate": 6.109104010242128e-05, + "loss": 2.3123, + "step": 60040 + }, + { + "epoch": 101.45, + "learning_rate": 6.104727381802798e-05, + "loss": 2.3019, + "step": 60060 + }, + { + "epoch": 101.49, + "learning_rate": 6.100349863304573e-05, + "loss": 2.2997, + "step": 60080 + }, + { + "epoch": 101.52, + "learning_rate": 6.0959714582743364e-05, + "loss": 2.3064, + "step": 60100 + }, + { + "epoch": 101.55, + "learning_rate": 6.0915921702396916e-05, + "loss": 2.319, + "step": 60120 + }, + { + "epoch": 101.59, + "learning_rate": 6.0872120027289536e-05, + "loss": 2.3174, + "step": 60140 + }, + { + "epoch": 101.62, + "learning_rate": 6.08283095927114e-05, + "loss": 2.3142, + "step": 60160 + }, + { + "epoch": 101.66, + "learning_rate": 6.078449043395982e-05, + "loss": 2.3278, + "step": 60180 + }, + { + "epoch": 101.69, + "learning_rate": 6.074066258633908e-05, + "loss": 2.3256, + "step": 60200 + }, + { + "epoch": 101.72, + "learning_rate": 6.0696826085160505e-05, + "loss": 2.3238, + "step": 60220 + }, + { + "epoch": 101.76, + "learning_rate": 6.065298096574235e-05, + "loss": 2.3055, + "step": 60240 + }, + { + "epoch": 101.79, + "learning_rate": 6.060912726340986e-05, + "loss": 2.3252, + "step": 60260 + }, + { + "epoch": 101.82, + "learning_rate": 6.0565265013495144e-05, + "loss": 2.3365, + "step": 60280 + }, + { + "epoch": 101.86, + "learning_rate": 6.052139425133724e-05, + "loss": 2.3171, + "step": 60300 + }, + { + "epoch": 101.89, + "learning_rate": 6.0477515012282024e-05, + "loss": 2.3257, + "step": 60320 + }, + { + "epoch": 101.93, + "learning_rate": 6.043362733168223e-05, + "loss": 2.3344, + "step": 60340 + }, + { + "epoch": 101.96, + "learning_rate": 6.038973124489733e-05, + "loss": 2.3307, + "step": 60360 + }, + { + "epoch": 101.99, + "learning_rate": 6.034582678729362e-05, + "loss": 2.3429, + "step": 60380 + }, + { + "epoch": 102.03, + "learning_rate": 6.0301913994244165e-05, + "loss": 2.2761, + "step": 60400 + }, + { + "epoch": 102.06, + "learning_rate": 6.0257992901128655e-05, + "loss": 2.2577, + "step": 60420 + }, + { + "epoch": 102.09, + "learning_rate": 6.0214063543333555e-05, + "loss": 2.2687, + "step": 60440 + }, + { + "epoch": 102.13, + "learning_rate": 6.0170125956251934e-05, + "loss": 2.2709, + "step": 60460 + }, + { + "epoch": 102.16, + "learning_rate": 6.0126180175283554e-05, + "loss": 2.273, + "step": 60480 + }, + { + "epoch": 102.2, + "learning_rate": 6.0082226235834684e-05, + "loss": 2.2673, + "step": 60500 + }, + { + "epoch": 102.23, + "learning_rate": 6.003826417331825e-05, + "loss": 2.2841, + "step": 60520 + }, + { + "epoch": 102.26, + "learning_rate": 5.999429402315367e-05, + "loss": 2.2968, + "step": 60540 + }, + { + "epoch": 102.3, + "learning_rate": 5.995031582076693e-05, + "loss": 2.271, + "step": 60560 + }, + { + "epoch": 102.33, + "learning_rate": 5.990632960159046e-05, + "loss": 2.2915, + "step": 60580 + }, + { + "epoch": 102.36, + "learning_rate": 5.9862335401063155e-05, + "loss": 2.3, + "step": 60600 + }, + { + "epoch": 102.4, + "learning_rate": 5.981833325463034e-05, + "loss": 2.3028, + "step": 60620 + }, + { + "epoch": 102.43, + "learning_rate": 5.9774323197743776e-05, + "loss": 2.2911, + "step": 60640 + }, + { + "epoch": 102.47, + "learning_rate": 5.9730305265861565e-05, + "loss": 2.2932, + "step": 60660 + }, + { + "epoch": 102.5, + "learning_rate": 5.9686279494448125e-05, + "loss": 2.3085, + "step": 60680 + }, + { + "epoch": 102.53, + "learning_rate": 5.964224591897428e-05, + "loss": 2.2809, + "step": 60700 + }, + { + "epoch": 102.57, + "learning_rate": 5.959820457491704e-05, + "loss": 2.3003, + "step": 60720 + }, + { + "epoch": 102.6, + "learning_rate": 5.955415549775974e-05, + "loss": 2.2935, + "step": 60740 + }, + { + "epoch": 102.64, + "learning_rate": 5.9510098722991924e-05, + "loss": 2.3131, + "step": 60760 + }, + { + "epoch": 102.67, + "learning_rate": 5.946603428610935e-05, + "loss": 2.3157, + "step": 60780 + }, + { + "epoch": 102.7, + "learning_rate": 5.9421962222613924e-05, + "loss": 2.3178, + "step": 60800 + }, + { + "epoch": 102.74, + "learning_rate": 5.937788256801371e-05, + "loss": 2.317, + "step": 60820 + }, + { + "epoch": 102.77, + "learning_rate": 5.9333795357822906e-05, + "loss": 2.3317, + "step": 60840 + }, + { + "epoch": 102.8, + "learning_rate": 5.928970062756176e-05, + "loss": 2.3107, + "step": 60860 + }, + { + "epoch": 102.84, + "learning_rate": 5.924559841275661e-05, + "loss": 2.325, + "step": 60880 + }, + { + "epoch": 102.87, + "learning_rate": 5.920148874893982e-05, + "loss": 2.3346, + "step": 60900 + }, + { + "epoch": 102.91, + "learning_rate": 5.915737167164975e-05, + "loss": 2.3138, + "step": 60920 + }, + { + "epoch": 102.94, + "learning_rate": 5.9113247216430725e-05, + "loss": 2.3153, + "step": 60940 + }, + { + "epoch": 102.97, + "learning_rate": 5.906911541883302e-05, + "loss": 2.3074, + "step": 60960 + }, + { + "epoch": 103.01, + "learning_rate": 5.902497631441283e-05, + "loss": 2.3062, + "step": 60980 + }, + { + "epoch": 103.04, + "learning_rate": 5.898082993873223e-05, + "loss": 2.2662, + "step": 61000 + }, + { + "epoch": 103.04, + "eval_loss": 2.6157455444335938, + "eval_runtime": 47.4833, + "eval_samples_per_second": 20.828, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.004475091648603663, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.033247145760106664, + "eval_tse_type": 0.00043183350185372433, + "step": 61000 + }, + { + "epoch": 103.07, + "learning_rate": 5.8936676327359154e-05, + "loss": 2.2542, + "step": 61020 + }, + { + "epoch": 103.11, + "learning_rate": 5.88925155158674e-05, + "loss": 2.2555, + "step": 61040 + }, + { + "epoch": 103.14, + "learning_rate": 5.884834753983649e-05, + "loss": 2.2724, + "step": 61060 + }, + { + "epoch": 103.18, + "learning_rate": 5.880417243485179e-05, + "loss": 2.2722, + "step": 61080 + }, + { + "epoch": 103.21, + "learning_rate": 5.8759990236504405e-05, + "loss": 2.272, + "step": 61100 + }, + { + "epoch": 103.24, + "learning_rate": 5.8715800980391086e-05, + "loss": 2.2718, + "step": 61120 + }, + { + "epoch": 103.28, + "learning_rate": 5.867160470211436e-05, + "loss": 2.2786, + "step": 61140 + }, + { + "epoch": 103.31, + "learning_rate": 5.8627401437282334e-05, + "loss": 2.2754, + "step": 61160 + }, + { + "epoch": 103.34, + "learning_rate": 5.858319122150881e-05, + "loss": 2.2702, + "step": 61180 + }, + { + "epoch": 103.38, + "learning_rate": 5.853897409041314e-05, + "loss": 2.2912, + "step": 61200 + }, + { + "epoch": 103.41, + "learning_rate": 5.849475007962031e-05, + "loss": 2.2892, + "step": 61220 + }, + { + "epoch": 103.45, + "learning_rate": 5.8450519224760746e-05, + "loss": 2.2952, + "step": 61240 + }, + { + "epoch": 103.48, + "learning_rate": 5.840628156147049e-05, + "loss": 2.2924, + "step": 61260 + }, + { + "epoch": 103.51, + "learning_rate": 5.8362037125391e-05, + "loss": 2.3042, + "step": 61280 + }, + { + "epoch": 103.55, + "learning_rate": 5.831778595216924e-05, + "loss": 2.2761, + "step": 61300 + }, + { + "epoch": 103.58, + "learning_rate": 5.8273528077457585e-05, + "loss": 2.3046, + "step": 61320 + }, + { + "epoch": 103.61, + "learning_rate": 5.822926353691378e-05, + "loss": 2.3016, + "step": 61340 + }, + { + "epoch": 103.65, + "learning_rate": 5.818499236620101e-05, + "loss": 2.3065, + "step": 61360 + }, + { + "epoch": 103.68, + "learning_rate": 5.81407146009877e-05, + "loss": 2.3009, + "step": 61380 + }, + { + "epoch": 103.72, + "learning_rate": 5.80964302769477e-05, + "loss": 2.3058, + "step": 61400 + }, + { + "epoch": 103.75, + "learning_rate": 5.805213942976004e-05, + "loss": 2.2974, + "step": 61420 + }, + { + "epoch": 103.78, + "learning_rate": 5.80078420951091e-05, + "loss": 2.3086, + "step": 61440 + }, + { + "epoch": 103.82, + "learning_rate": 5.7963538308684406e-05, + "loss": 2.3129, + "step": 61460 + }, + { + "epoch": 103.85, + "learning_rate": 5.791922810618075e-05, + "loss": 2.3136, + "step": 61480 + }, + { + "epoch": 103.89, + "learning_rate": 5.787491152329804e-05, + "loss": 2.295, + "step": 61500 + }, + { + "epoch": 103.92, + "learning_rate": 5.783058859574136e-05, + "loss": 2.2978, + "step": 61520 + }, + { + "epoch": 103.95, + "learning_rate": 5.7786259359220887e-05, + "loss": 2.3184, + "step": 61540 + }, + { + "epoch": 103.99, + "learning_rate": 5.774192384945188e-05, + "loss": 2.3166, + "step": 61560 + }, + { + "epoch": 104.02, + "learning_rate": 5.769758210215466e-05, + "loss": 2.2875, + "step": 61580 + }, + { + "epoch": 104.05, + "learning_rate": 5.7653234153054556e-05, + "loss": 2.2524, + "step": 61600 + }, + { + "epoch": 104.09, + "learning_rate": 5.7608880037881965e-05, + "loss": 2.2642, + "step": 61620 + }, + { + "epoch": 104.12, + "learning_rate": 5.756451979237214e-05, + "loss": 2.269, + "step": 61640 + }, + { + "epoch": 104.16, + "learning_rate": 5.752015345226537e-05, + "loss": 2.2599, + "step": 61660 + }, + { + "epoch": 104.19, + "learning_rate": 5.7475781053306776e-05, + "loss": 2.2463, + "step": 61680 + }, + { + "epoch": 104.22, + "learning_rate": 5.7431402631246424e-05, + "loss": 2.2711, + "step": 61700 + }, + { + "epoch": 104.26, + "learning_rate": 5.7387018221839197e-05, + "loss": 2.2536, + "step": 61720 + }, + { + "epoch": 104.29, + "learning_rate": 5.73426278608448e-05, + "loss": 2.2713, + "step": 61740 + }, + { + "epoch": 104.32, + "learning_rate": 5.729823158402777e-05, + "loss": 2.2823, + "step": 61760 + }, + { + "epoch": 104.36, + "learning_rate": 5.725382942715738e-05, + "loss": 2.2796, + "step": 61780 + }, + { + "epoch": 104.39, + "learning_rate": 5.720942142600764e-05, + "loss": 2.2541, + "step": 61800 + }, + { + "epoch": 104.43, + "learning_rate": 5.716500761635727e-05, + "loss": 2.2788, + "step": 61820 + }, + { + "epoch": 104.46, + "learning_rate": 5.712058803398969e-05, + "loss": 2.2683, + "step": 61840 + }, + { + "epoch": 104.49, + "learning_rate": 5.707616271469293e-05, + "loss": 2.2898, + "step": 61860 + }, + { + "epoch": 104.53, + "learning_rate": 5.7031731694259696e-05, + "loss": 2.2814, + "step": 61880 + }, + { + "epoch": 104.56, + "learning_rate": 5.698729500848722e-05, + "loss": 2.2925, + "step": 61900 + }, + { + "epoch": 104.59, + "learning_rate": 5.694285269317738e-05, + "loss": 2.306, + "step": 61920 + }, + { + "epoch": 104.63, + "learning_rate": 5.689840478413652e-05, + "loss": 2.2874, + "step": 61940 + }, + { + "epoch": 104.66, + "learning_rate": 5.685395131717553e-05, + "loss": 2.2879, + "step": 61960 + }, + { + "epoch": 104.7, + "learning_rate": 5.680949232810977e-05, + "loss": 2.2877, + "step": 61980 + }, + { + "epoch": 104.73, + "learning_rate": 5.676502785275901e-05, + "loss": 2.296, + "step": 62000 + }, + { + "epoch": 104.73, + "eval_loss": 2.6081273555755615, + "eval_runtime": 50.8346, + "eval_samples_per_second": 19.455, + "eval_steps_per_second": 0.118, + "eval_tse_ndup": 0.004712840923631677, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03142978121741027, + "eval_tse_type": 0.00035712739957381837, + "step": 62000 + }, + { + "epoch": 104.76, + "learning_rate": 5.672055792694753e-05, + "loss": 2.299, + "step": 62020 + }, + { + "epoch": 104.8, + "learning_rate": 5.66760825865039e-05, + "loss": 2.3076, + "step": 62040 + }, + { + "epoch": 104.83, + "learning_rate": 5.663160186726112e-05, + "loss": 2.3144, + "step": 62060 + }, + { + "epoch": 104.86, + "learning_rate": 5.658711580505649e-05, + "loss": 2.3053, + "step": 62080 + }, + { + "epoch": 104.9, + "learning_rate": 5.654262443573164e-05, + "loss": 2.2974, + "step": 62100 + }, + { + "epoch": 104.93, + "learning_rate": 5.649812779513245e-05, + "loss": 2.2965, + "step": 62120 + }, + { + "epoch": 104.97, + "learning_rate": 5.6453625919109074e-05, + "loss": 2.2986, + "step": 62140 + }, + { + "epoch": 105.0, + "learning_rate": 5.6409118843515854e-05, + "loss": 2.2909, + "step": 62160 + }, + { + "epoch": 105.03, + "learning_rate": 5.6364606604211345e-05, + "loss": 2.2567, + "step": 62180 + }, + { + "epoch": 105.07, + "learning_rate": 5.632008923705825e-05, + "loss": 2.2407, + "step": 62200 + }, + { + "epoch": 105.1, + "learning_rate": 5.627556677792343e-05, + "loss": 2.2438, + "step": 62220 + }, + { + "epoch": 105.14, + "learning_rate": 5.623103926267779e-05, + "loss": 2.2538, + "step": 62240 + }, + { + "epoch": 105.17, + "learning_rate": 5.6186506727196364e-05, + "loss": 2.2554, + "step": 62260 + }, + { + "epoch": 105.2, + "learning_rate": 5.614196920735821e-05, + "loss": 2.2541, + "step": 62280 + }, + { + "epoch": 105.24, + "learning_rate": 5.609742673904641e-05, + "loss": 2.2575, + "step": 62300 + }, + { + "epoch": 105.27, + "learning_rate": 5.6052879358148e-05, + "loss": 2.2585, + "step": 62320 + }, + { + "epoch": 105.3, + "learning_rate": 5.600832710055404e-05, + "loss": 2.2517, + "step": 62340 + }, + { + "epoch": 105.34, + "learning_rate": 5.596377000215945e-05, + "loss": 2.2803, + "step": 62360 + }, + { + "epoch": 105.37, + "learning_rate": 5.5919208098863084e-05, + "loss": 2.2715, + "step": 62380 + }, + { + "epoch": 105.41, + "learning_rate": 5.5874641426567684e-05, + "loss": 2.2758, + "step": 62400 + }, + { + "epoch": 105.44, + "learning_rate": 5.583007002117978e-05, + "loss": 2.2809, + "step": 62420 + }, + { + "epoch": 105.47, + "learning_rate": 5.5785493918609776e-05, + "loss": 2.2825, + "step": 62440 + }, + { + "epoch": 105.51, + "learning_rate": 5.5740913154771814e-05, + "loss": 2.275, + "step": 62460 + }, + { + "epoch": 105.54, + "learning_rate": 5.56963277655838e-05, + "loss": 2.2765, + "step": 62480 + }, + { + "epoch": 105.57, + "learning_rate": 5.5651737786967404e-05, + "loss": 2.2791, + "step": 62500 + }, + { + "epoch": 105.61, + "learning_rate": 5.560714325484796e-05, + "loss": 2.2957, + "step": 62520 + }, + { + "epoch": 105.64, + "learning_rate": 5.556254420515448e-05, + "loss": 2.2953, + "step": 62540 + }, + { + "epoch": 105.68, + "learning_rate": 5.552017095627057e-05, + "loss": 2.2706, + "step": 62560 + }, + { + "epoch": 105.71, + "learning_rate": 5.5475563200662275e-05, + "loss": 2.2801, + "step": 62580 + }, + { + "epoch": 105.74, + "learning_rate": 5.54309510334916e-05, + "loss": 2.2807, + "step": 62600 + }, + { + "epoch": 105.78, + "learning_rate": 5.538633449070177e-05, + "loss": 2.291, + "step": 62620 + }, + { + "epoch": 105.81, + "learning_rate": 5.5341713608239534e-05, + "loss": 2.2895, + "step": 62640 + }, + { + "epoch": 105.84, + "learning_rate": 5.529708842205512e-05, + "loss": 2.2955, + "step": 62660 + }, + { + "epoch": 105.88, + "learning_rate": 5.525245896810225e-05, + "loss": 2.288, + "step": 62680 + }, + { + "epoch": 105.91, + "learning_rate": 5.520782528233807e-05, + "loss": 2.284, + "step": 62700 + }, + { + "epoch": 105.95, + "learning_rate": 5.516318740072311e-05, + "loss": 2.294, + "step": 62720 + }, + { + "epoch": 105.98, + "learning_rate": 5.511854535922131e-05, + "loss": 2.2958, + "step": 62740 + }, + { + "epoch": 106.01, + "learning_rate": 5.5073899193799985e-05, + "loss": 2.2622, + "step": 62760 + }, + { + "epoch": 106.05, + "learning_rate": 5.502924894042971e-05, + "loss": 2.2278, + "step": 62780 + }, + { + "epoch": 106.08, + "learning_rate": 5.498459463508443e-05, + "loss": 2.2378, + "step": 62800 + }, + { + "epoch": 106.11, + "learning_rate": 5.4939936313741245e-05, + "loss": 2.2397, + "step": 62820 + }, + { + "epoch": 106.15, + "learning_rate": 5.4895274012380625e-05, + "loss": 2.2511, + "step": 62840 + }, + { + "epoch": 106.18, + "learning_rate": 5.485060776698615e-05, + "loss": 2.2478, + "step": 62860 + }, + { + "epoch": 106.22, + "learning_rate": 5.480593761354461e-05, + "loss": 2.2558, + "step": 62880 + }, + { + "epoch": 106.25, + "learning_rate": 5.476126358804594e-05, + "loss": 2.2576, + "step": 62900 + }, + { + "epoch": 106.28, + "learning_rate": 5.471658572648318e-05, + "loss": 2.2568, + "step": 62920 + }, + { + "epoch": 106.32, + "learning_rate": 5.467190406485252e-05, + "loss": 2.262, + "step": 62940 + }, + { + "epoch": 106.35, + "learning_rate": 5.462721863915312e-05, + "loss": 2.254, + "step": 62960 + }, + { + "epoch": 106.39, + "learning_rate": 5.458252948538724e-05, + "loss": 2.2522, + "step": 62980 + }, + { + "epoch": 106.42, + "learning_rate": 5.4537836639560125e-05, + "loss": 2.2672, + "step": 63000 + }, + { + "epoch": 106.42, + "eval_loss": 2.597033977508545, + "eval_runtime": 48.9564, + "eval_samples_per_second": 20.202, + "eval_steps_per_second": 0.123, + "eval_tse_ndup": 0.004876015186855477, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03177431396473742, + "eval_tse_type": 0.0003672788379025068, + "step": 63000 + }, + { + "epoch": 106.45, + "learning_rate": 5.449314013768e-05, + "loss": 2.2633, + "step": 63020 + }, + { + "epoch": 106.49, + "learning_rate": 5.444844001575802e-05, + "loss": 2.2474, + "step": 63040 + }, + { + "epoch": 106.52, + "learning_rate": 5.440373630980827e-05, + "loss": 2.2761, + "step": 63060 + }, + { + "epoch": 106.55, + "learning_rate": 5.435902905584771e-05, + "loss": 2.27, + "step": 63080 + }, + { + "epoch": 106.59, + "learning_rate": 5.4314318289896185e-05, + "loss": 2.2901, + "step": 63100 + }, + { + "epoch": 106.62, + "learning_rate": 5.4269604047976316e-05, + "loss": 2.2814, + "step": 63120 + }, + { + "epoch": 106.66, + "learning_rate": 5.4224886366113605e-05, + "loss": 2.2735, + "step": 63140 + }, + { + "epoch": 106.69, + "learning_rate": 5.418016528033625e-05, + "loss": 2.2716, + "step": 63160 + }, + { + "epoch": 106.72, + "learning_rate": 5.4135440826675235e-05, + "loss": 2.2765, + "step": 63180 + }, + { + "epoch": 106.76, + "learning_rate": 5.4090713041164245e-05, + "loss": 2.2914, + "step": 63200 + }, + { + "epoch": 106.79, + "learning_rate": 5.404598195983963e-05, + "loss": 2.2833, + "step": 63220 + }, + { + "epoch": 106.82, + "learning_rate": 5.400124761874045e-05, + "loss": 2.2652, + "step": 63240 + }, + { + "epoch": 106.86, + "learning_rate": 5.3956510053908306e-05, + "loss": 2.2812, + "step": 63260 + }, + { + "epoch": 106.89, + "learning_rate": 5.3911769301387505e-05, + "loss": 2.2869, + "step": 63280 + }, + { + "epoch": 106.93, + "learning_rate": 5.3867025397224814e-05, + "loss": 2.2808, + "step": 63300 + }, + { + "epoch": 106.96, + "learning_rate": 5.3822278377469616e-05, + "loss": 2.3026, + "step": 63320 + }, + { + "epoch": 106.99, + "learning_rate": 5.377752827817376e-05, + "loss": 2.2837, + "step": 63340 + }, + { + "epoch": 107.03, + "learning_rate": 5.373277513539162e-05, + "loss": 2.2491, + "step": 63360 + }, + { + "epoch": 107.06, + "learning_rate": 5.3688018985179956e-05, + "loss": 2.2317, + "step": 63380 + }, + { + "epoch": 107.09, + "learning_rate": 5.3643259863598015e-05, + "loss": 2.2223, + "step": 63400 + }, + { + "epoch": 107.13, + "learning_rate": 5.3598497806707406e-05, + "loss": 2.2341, + "step": 63420 + }, + { + "epoch": 107.16, + "learning_rate": 5.3553732850572104e-05, + "loss": 2.2288, + "step": 63440 + }, + { + "epoch": 107.2, + "learning_rate": 5.350896503125845e-05, + "loss": 2.2444, + "step": 63460 + }, + { + "epoch": 107.23, + "learning_rate": 5.346419438483503e-05, + "loss": 2.2453, + "step": 63480 + }, + { + "epoch": 107.26, + "learning_rate": 5.341942094737279e-05, + "loss": 2.2444, + "step": 63500 + }, + { + "epoch": 107.3, + "learning_rate": 5.3374644754944836e-05, + "loss": 2.245, + "step": 63520 + }, + { + "epoch": 107.33, + "learning_rate": 5.332986584362656e-05, + "loss": 2.2478, + "step": 63540 + }, + { + "epoch": 107.36, + "learning_rate": 5.328508424949551e-05, + "loss": 2.2563, + "step": 63560 + }, + { + "epoch": 107.4, + "learning_rate": 5.324030000863143e-05, + "loss": 2.2545, + "step": 63580 + }, + { + "epoch": 107.43, + "learning_rate": 5.319551315711615e-05, + "loss": 2.2581, + "step": 63600 + }, + { + "epoch": 107.47, + "learning_rate": 5.3150723731033644e-05, + "loss": 2.244, + "step": 63620 + }, + { + "epoch": 107.5, + "learning_rate": 5.3105931766469943e-05, + "loss": 2.2545, + "step": 63640 + }, + { + "epoch": 107.53, + "learning_rate": 5.30611372995131e-05, + "loss": 2.2705, + "step": 63660 + }, + { + "epoch": 107.57, + "learning_rate": 5.301634036625324e-05, + "loss": 2.2615, + "step": 63680 + }, + { + "epoch": 107.6, + "learning_rate": 5.297154100278241e-05, + "loss": 2.2632, + "step": 63700 + }, + { + "epoch": 107.64, + "learning_rate": 5.292673924519469e-05, + "loss": 2.2676, + "step": 63720 + }, + { + "epoch": 107.67, + "learning_rate": 5.2881935129586e-05, + "loss": 2.277, + "step": 63740 + }, + { + "epoch": 107.7, + "learning_rate": 5.283712869205426e-05, + "loss": 2.2824, + "step": 63760 + }, + { + "epoch": 107.74, + "learning_rate": 5.279231996869916e-05, + "loss": 2.2842, + "step": 63780 + }, + { + "epoch": 107.77, + "learning_rate": 5.27475089956223e-05, + "loss": 2.274, + "step": 63800 + }, + { + "epoch": 107.8, + "learning_rate": 5.270269580892706e-05, + "loss": 2.2785, + "step": 63820 + }, + { + "epoch": 107.84, + "learning_rate": 5.2657880444718625e-05, + "loss": 2.2765, + "step": 63840 + }, + { + "epoch": 107.87, + "learning_rate": 5.2613062939103927e-05, + "loss": 2.2733, + "step": 63860 + }, + { + "epoch": 107.91, + "learning_rate": 5.256824332819162e-05, + "loss": 2.2809, + "step": 63880 + }, + { + "epoch": 107.94, + "learning_rate": 5.2523421648092044e-05, + "loss": 2.2792, + "step": 63900 + }, + { + "epoch": 107.97, + "learning_rate": 5.247859793491723e-05, + "loss": 2.283, + "step": 63920 + }, + { + "epoch": 108.01, + "learning_rate": 5.243377222478083e-05, + "loss": 2.2723, + "step": 63940 + }, + { + "epoch": 108.04, + "learning_rate": 5.2388944553798106e-05, + "loss": 2.2172, + "step": 63960 + }, + { + "epoch": 108.07, + "learning_rate": 5.2344114958085896e-05, + "loss": 2.2221, + "step": 63980 + }, + { + "epoch": 108.11, + "learning_rate": 5.2299283473762606e-05, + "loss": 2.2378, + "step": 64000 + }, + { + "epoch": 108.11, + "eval_loss": 2.5892648696899414, + "eval_runtime": 47.4999, + "eval_samples_per_second": 20.821, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.00398650239969483, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03195638244943147, + "eval_tse_type": 0.0003826817211549264, + "step": 64000 + }, + { + "epoch": 108.14, + "learning_rate": 5.225445013694816e-05, + "loss": 2.2405, + "step": 64020 + }, + { + "epoch": 108.18, + "learning_rate": 5.2209614983763954e-05, + "loss": 2.2324, + "step": 64040 + }, + { + "epoch": 108.21, + "learning_rate": 5.216477805033287e-05, + "loss": 2.2271, + "step": 64060 + }, + { + "epoch": 108.24, + "learning_rate": 5.2119939372779216e-05, + "loss": 2.2343, + "step": 64080 + }, + { + "epoch": 108.28, + "learning_rate": 5.207509898722869e-05, + "loss": 2.2378, + "step": 64100 + }, + { + "epoch": 108.31, + "learning_rate": 5.20302569298084e-05, + "loss": 2.2396, + "step": 64120 + }, + { + "epoch": 108.34, + "learning_rate": 5.198541323664676e-05, + "loss": 2.2535, + "step": 64140 + }, + { + "epoch": 108.38, + "learning_rate": 5.1940567943873545e-05, + "loss": 2.2533, + "step": 64160 + }, + { + "epoch": 108.41, + "learning_rate": 5.1895721087619774e-05, + "loss": 2.2434, + "step": 64180 + }, + { + "epoch": 108.45, + "learning_rate": 5.185087270401778e-05, + "loss": 2.246, + "step": 64200 + }, + { + "epoch": 108.48, + "learning_rate": 5.180602282920107e-05, + "loss": 2.255, + "step": 64220 + }, + { + "epoch": 108.51, + "learning_rate": 5.1761171499304403e-05, + "loss": 2.2554, + "step": 64240 + }, + { + "epoch": 108.55, + "learning_rate": 5.171631875046366e-05, + "loss": 2.2481, + "step": 64260 + }, + { + "epoch": 108.58, + "learning_rate": 5.167146461881589e-05, + "loss": 2.2584, + "step": 64280 + }, + { + "epoch": 108.61, + "learning_rate": 5.162660914049927e-05, + "loss": 2.2496, + "step": 64300 + }, + { + "epoch": 108.65, + "learning_rate": 5.158175235165306e-05, + "loss": 2.266, + "step": 64320 + }, + { + "epoch": 108.68, + "learning_rate": 5.153689428841754e-05, + "loss": 2.2703, + "step": 64340 + }, + { + "epoch": 108.72, + "learning_rate": 5.1492034986934046e-05, + "loss": 2.2663, + "step": 64360 + }, + { + "epoch": 108.75, + "learning_rate": 5.144717448334493e-05, + "loss": 2.2624, + "step": 64380 + }, + { + "epoch": 108.78, + "learning_rate": 5.140231281379345e-05, + "loss": 2.2639, + "step": 64400 + }, + { + "epoch": 108.82, + "learning_rate": 5.135745001442388e-05, + "loss": 2.26, + "step": 64420 + }, + { + "epoch": 108.85, + "learning_rate": 5.1312586121381335e-05, + "loss": 2.2627, + "step": 64440 + }, + { + "epoch": 108.89, + "learning_rate": 5.1267721170811886e-05, + "loss": 2.2613, + "step": 64460 + }, + { + "epoch": 108.92, + "learning_rate": 5.122285519886236e-05, + "loss": 2.2713, + "step": 64480 + }, + { + "epoch": 108.95, + "learning_rate": 5.117798824168052e-05, + "loss": 2.2873, + "step": 64500 + }, + { + "epoch": 108.99, + "learning_rate": 5.113312033541481e-05, + "loss": 2.2707, + "step": 64520 + }, + { + "epoch": 109.02, + "learning_rate": 5.1088251516214515e-05, + "loss": 2.2342, + "step": 64540 + }, + { + "epoch": 109.05, + "learning_rate": 5.104338182022962e-05, + "loss": 2.2223, + "step": 64560 + }, + { + "epoch": 109.09, + "learning_rate": 5.099851128361085e-05, + "loss": 2.2194, + "step": 64580 + }, + { + "epoch": 109.12, + "learning_rate": 5.0953639942509565e-05, + "loss": 2.2193, + "step": 64600 + }, + { + "epoch": 109.16, + "learning_rate": 5.090876783307781e-05, + "loss": 2.2249, + "step": 64620 + }, + { + "epoch": 109.19, + "learning_rate": 5.086389499146823e-05, + "loss": 2.2338, + "step": 64640 + }, + { + "epoch": 109.22, + "learning_rate": 5.081902145383406e-05, + "loss": 2.2304, + "step": 64660 + }, + { + "epoch": 109.26, + "learning_rate": 5.07741472563291e-05, + "loss": 2.2413, + "step": 64680 + }, + { + "epoch": 109.29, + "learning_rate": 5.072927243510766e-05, + "loss": 2.2229, + "step": 64700 + }, + { + "epoch": 109.32, + "learning_rate": 5.06843970263246e-05, + "loss": 2.226, + "step": 64720 + }, + { + "epoch": 109.36, + "learning_rate": 5.063952106613522e-05, + "loss": 2.2317, + "step": 64740 + }, + { + "epoch": 109.39, + "learning_rate": 5.059464459069527e-05, + "loss": 2.2323, + "step": 64760 + }, + { + "epoch": 109.43, + "learning_rate": 5.0549767636160915e-05, + "loss": 2.2496, + "step": 64780 + }, + { + "epoch": 109.46, + "learning_rate": 5.0504890238688705e-05, + "loss": 2.2398, + "step": 64800 + }, + { + "epoch": 109.49, + "learning_rate": 5.046001243443554e-05, + "loss": 2.257, + "step": 64820 + }, + { + "epoch": 109.53, + "learning_rate": 5.041513425955868e-05, + "loss": 2.2599, + "step": 64840 + }, + { + "epoch": 109.56, + "learning_rate": 5.0370255750215636e-05, + "loss": 2.2503, + "step": 64860 + }, + { + "epoch": 109.59, + "learning_rate": 5.0325376942564215e-05, + "loss": 2.2577, + "step": 64880 + }, + { + "epoch": 109.63, + "learning_rate": 5.028049787276249e-05, + "loss": 2.2485, + "step": 64900 + }, + { + "epoch": 109.66, + "learning_rate": 5.023561857696867e-05, + "loss": 2.2531, + "step": 64920 + }, + { + "epoch": 109.7, + "learning_rate": 5.019073909134124e-05, + "loss": 2.2586, + "step": 64940 + }, + { + "epoch": 109.73, + "learning_rate": 5.0145859452038755e-05, + "loss": 2.2691, + "step": 64960 + }, + { + "epoch": 109.76, + "learning_rate": 5.010097969521996e-05, + "loss": 2.2694, + "step": 64980 + }, + { + "epoch": 109.8, + "learning_rate": 5.0056099857043624e-05, + "loss": 2.2457, + "step": 65000 + }, + { + "epoch": 109.8, + "eval_loss": 2.5843918323516846, + "eval_runtime": 47.5161, + "eval_samples_per_second": 20.814, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.004661628299613522, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03039183561700737, + "eval_tse_type": 0.00044763228850690933, + "step": 65000 + }, + { + "epoch": 109.83, + "learning_rate": 5.001121997366867e-05, + "loss": 2.2672, + "step": 65020 + }, + { + "epoch": 109.86, + "learning_rate": 4.9966340081253966e-05, + "loss": 2.2523, + "step": 65040 + }, + { + "epoch": 109.9, + "learning_rate": 4.992146021595847e-05, + "loss": 2.2544, + "step": 65060 + }, + { + "epoch": 109.93, + "learning_rate": 4.9876580413941045e-05, + "loss": 2.2535, + "step": 65080 + }, + { + "epoch": 109.97, + "learning_rate": 4.983170071136055e-05, + "loss": 2.2583, + "step": 65100 + }, + { + "epoch": 110.0, + "learning_rate": 4.978682114437576e-05, + "loss": 2.2705, + "step": 65120 + }, + { + "epoch": 110.03, + "learning_rate": 4.974194174914531e-05, + "loss": 2.1967, + "step": 65140 + }, + { + "epoch": 110.07, + "learning_rate": 4.9697062561827764e-05, + "loss": 2.2078, + "step": 65160 + }, + { + "epoch": 110.1, + "learning_rate": 4.9652183618581404e-05, + "loss": 2.2159, + "step": 65180 + }, + { + "epoch": 110.14, + "learning_rate": 4.960730495556446e-05, + "loss": 2.2046, + "step": 65200 + }, + { + "epoch": 110.17, + "learning_rate": 4.9562426608934774e-05, + "loss": 2.2123, + "step": 65220 + }, + { + "epoch": 110.2, + "learning_rate": 4.951754861485008e-05, + "loss": 2.2236, + "step": 65240 + }, + { + "epoch": 110.24, + "learning_rate": 4.947267100946777e-05, + "loss": 2.2242, + "step": 65260 + }, + { + "epoch": 110.27, + "learning_rate": 4.942779382894489e-05, + "loss": 2.2472, + "step": 65280 + }, + { + "epoch": 110.3, + "learning_rate": 4.93829171094382e-05, + "loss": 2.2312, + "step": 65300 + }, + { + "epoch": 110.34, + "learning_rate": 4.933804088710403e-05, + "loss": 2.227, + "step": 65320 + }, + { + "epoch": 110.37, + "learning_rate": 4.9293165198098376e-05, + "loss": 2.2231, + "step": 65340 + }, + { + "epoch": 110.41, + "learning_rate": 4.924829007857674e-05, + "loss": 2.2399, + "step": 65360 + }, + { + "epoch": 110.44, + "learning_rate": 4.920341556469421e-05, + "loss": 2.2482, + "step": 65380 + }, + { + "epoch": 110.47, + "learning_rate": 4.915854169260539e-05, + "loss": 2.2318, + "step": 65400 + }, + { + "epoch": 110.51, + "learning_rate": 4.911366849846432e-05, + "loss": 2.2382, + "step": 65420 + }, + { + "epoch": 110.54, + "learning_rate": 4.9068796018424535e-05, + "loss": 2.2403, + "step": 65440 + }, + { + "epoch": 110.57, + "learning_rate": 4.9023924288638975e-05, + "loss": 2.2393, + "step": 65460 + }, + { + "epoch": 110.61, + "learning_rate": 4.897905334525999e-05, + "loss": 2.2485, + "step": 65480 + }, + { + "epoch": 110.64, + "learning_rate": 4.893418322443928e-05, + "loss": 2.2434, + "step": 65500 + }, + { + "epoch": 110.68, + "learning_rate": 4.8889313962327876e-05, + "loss": 2.2399, + "step": 65520 + }, + { + "epoch": 110.71, + "learning_rate": 4.884444559507618e-05, + "loss": 2.2565, + "step": 65540 + }, + { + "epoch": 110.74, + "learning_rate": 4.879957815883378e-05, + "loss": 2.2508, + "step": 65560 + }, + { + "epoch": 110.78, + "learning_rate": 4.875471168974959e-05, + "loss": 2.2577, + "step": 65580 + }, + { + "epoch": 110.81, + "learning_rate": 4.870984622397169e-05, + "loss": 2.2534, + "step": 65600 + }, + { + "epoch": 110.84, + "learning_rate": 4.866498179764739e-05, + "loss": 2.2419, + "step": 65620 + }, + { + "epoch": 110.88, + "learning_rate": 4.862011844692313e-05, + "loss": 2.2655, + "step": 65640 + }, + { + "epoch": 110.91, + "learning_rate": 4.857525620794451e-05, + "loss": 2.2519, + "step": 65660 + }, + { + "epoch": 110.95, + "learning_rate": 4.853039511685626e-05, + "loss": 2.2565, + "step": 65680 + }, + { + "epoch": 110.98, + "learning_rate": 4.848553520980208e-05, + "loss": 2.2471, + "step": 65700 + }, + { + "epoch": 111.01, + "learning_rate": 4.844067652292487e-05, + "loss": 2.2414, + "step": 65720 + }, + { + "epoch": 111.05, + "learning_rate": 4.839581909236638e-05, + "loss": 2.2022, + "step": 65740 + }, + { + "epoch": 111.08, + "learning_rate": 4.83509629542675e-05, + "loss": 2.188, + "step": 65760 + }, + { + "epoch": 111.11, + "learning_rate": 4.830610814476797e-05, + "loss": 2.1921, + "step": 65780 + }, + { + "epoch": 111.15, + "learning_rate": 4.826125470000651e-05, + "loss": 2.2088, + "step": 65800 + }, + { + "epoch": 111.18, + "learning_rate": 4.821640265612075e-05, + "loss": 2.2002, + "step": 65820 + }, + { + "epoch": 111.22, + "learning_rate": 4.817155204924714e-05, + "loss": 2.2228, + "step": 65840 + }, + { + "epoch": 111.25, + "learning_rate": 4.812670291552103e-05, + "loss": 2.2169, + "step": 65860 + }, + { + "epoch": 111.28, + "learning_rate": 4.808185529107652e-05, + "loss": 2.2194, + "step": 65880 + }, + { + "epoch": 111.32, + "learning_rate": 4.8037009212046586e-05, + "loss": 2.2201, + "step": 65900 + }, + { + "epoch": 111.35, + "learning_rate": 4.799216471456284e-05, + "loss": 2.2295, + "step": 65920 + }, + { + "epoch": 111.39, + "learning_rate": 4.794732183475574e-05, + "loss": 2.2339, + "step": 65940 + }, + { + "epoch": 111.42, + "learning_rate": 4.790248060875436e-05, + "loss": 2.2344, + "step": 65960 + }, + { + "epoch": 111.45, + "learning_rate": 4.785764107268647e-05, + "loss": 2.2413, + "step": 65980 + }, + { + "epoch": 111.49, + "learning_rate": 4.7812803262678475e-05, + "loss": 2.2349, + "step": 66000 + }, + { + "epoch": 111.49, + "eval_loss": 2.5746378898620605, + "eval_runtime": 50.8615, + "eval_samples_per_second": 19.445, + "eval_steps_per_second": 0.118, + "eval_tse_ndup": 0.005091489181962179, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.031770601317693443, + "eval_tse_type": 0.00035849893660813784, + "step": 66000 + }, + { + "epoch": 111.52, + "learning_rate": 4.776796721485537e-05, + "loss": 2.2249, + "step": 66020 + }, + { + "epoch": 111.55, + "learning_rate": 4.772313296534079e-05, + "loss": 2.2444, + "step": 66040 + }, + { + "epoch": 111.59, + "learning_rate": 4.767830055025682e-05, + "loss": 2.226, + "step": 66060 + }, + { + "epoch": 111.62, + "learning_rate": 4.7633470005724176e-05, + "loss": 2.2421, + "step": 66080 + }, + { + "epoch": 111.66, + "learning_rate": 4.758864136786202e-05, + "loss": 2.241, + "step": 66100 + }, + { + "epoch": 111.69, + "learning_rate": 4.7543814672787945e-05, + "loss": 2.2329, + "step": 66120 + }, + { + "epoch": 111.72, + "learning_rate": 4.7498989956618035e-05, + "loss": 2.2463, + "step": 66140 + }, + { + "epoch": 111.76, + "learning_rate": 4.745416725546673e-05, + "loss": 2.2587, + "step": 66160 + }, + { + "epoch": 111.79, + "learning_rate": 4.74093466054469e-05, + "loss": 2.2374, + "step": 66180 + }, + { + "epoch": 111.82, + "learning_rate": 4.7364528042669705e-05, + "loss": 2.2524, + "step": 66200 + }, + { + "epoch": 111.86, + "learning_rate": 4.731971160324465e-05, + "loss": 2.2392, + "step": 66220 + }, + { + "epoch": 111.89, + "learning_rate": 4.727489732327958e-05, + "loss": 2.2469, + "step": 66240 + }, + { + "epoch": 111.93, + "learning_rate": 4.7230085238880475e-05, + "loss": 2.2462, + "step": 66260 + }, + { + "epoch": 111.96, + "learning_rate": 4.71852753861517e-05, + "loss": 2.2592, + "step": 66280 + }, + { + "epoch": 111.99, + "learning_rate": 4.714270812602657e-05, + "loss": 2.2527, + "step": 66300 + }, + { + "epoch": 112.03, + "learning_rate": 4.709790272889296e-05, + "loss": 2.2011, + "step": 66320 + }, + { + "epoch": 112.06, + "learning_rate": 4.705309966992672e-05, + "loss": 2.1849, + "step": 66340 + }, + { + "epoch": 112.09, + "learning_rate": 4.700829898522483e-05, + "loss": 2.2052, + "step": 66360 + }, + { + "epoch": 112.13, + "learning_rate": 4.6963500710882435e-05, + "loss": 2.1981, + "step": 66380 + }, + { + "epoch": 112.16, + "learning_rate": 4.691870488299264e-05, + "loss": 2.2063, + "step": 66400 + }, + { + "epoch": 112.2, + "learning_rate": 4.68739115376467e-05, + "loss": 2.2066, + "step": 66420 + }, + { + "epoch": 112.23, + "learning_rate": 4.682912071093374e-05, + "loss": 2.2099, + "step": 66440 + }, + { + "epoch": 112.26, + "learning_rate": 4.6784332438940963e-05, + "loss": 2.2131, + "step": 66460 + }, + { + "epoch": 112.3, + "learning_rate": 4.673954675775347e-05, + "loss": 2.2188, + "step": 66480 + }, + { + "epoch": 112.33, + "learning_rate": 4.669476370345425e-05, + "loss": 2.2169, + "step": 66500 + }, + { + "epoch": 112.36, + "learning_rate": 4.664998331212422e-05, + "loss": 2.2119, + "step": 66520 + }, + { + "epoch": 112.4, + "learning_rate": 4.660520561984211e-05, + "loss": 2.2177, + "step": 66540 + }, + { + "epoch": 112.43, + "learning_rate": 4.6560430662684545e-05, + "loss": 2.2199, + "step": 66560 + }, + { + "epoch": 112.47, + "learning_rate": 4.6515658476725834e-05, + "loss": 2.2066, + "step": 66580 + }, + { + "epoch": 112.5, + "learning_rate": 4.6470889098038174e-05, + "loss": 2.2315, + "step": 66600 + }, + { + "epoch": 112.53, + "learning_rate": 4.6426122562691427e-05, + "loss": 2.2245, + "step": 66620 + }, + { + "epoch": 112.57, + "learning_rate": 4.638135890675317e-05, + "loss": 2.2265, + "step": 66640 + }, + { + "epoch": 112.6, + "learning_rate": 4.633659816628869e-05, + "loss": 2.2283, + "step": 66660 + }, + { + "epoch": 112.64, + "learning_rate": 4.629184037736089e-05, + "loss": 2.2289, + "step": 66680 + }, + { + "epoch": 112.67, + "learning_rate": 4.624708557603034e-05, + "loss": 2.2208, + "step": 66700 + }, + { + "epoch": 112.7, + "learning_rate": 4.620233379835513e-05, + "loss": 2.2176, + "step": 66720 + }, + { + "epoch": 112.74, + "learning_rate": 4.615758508039098e-05, + "loss": 2.2373, + "step": 66740 + }, + { + "epoch": 112.77, + "learning_rate": 4.6112839458191146e-05, + "loss": 2.2379, + "step": 66760 + }, + { + "epoch": 112.8, + "learning_rate": 4.606809696780634e-05, + "loss": 2.246, + "step": 66780 + }, + { + "epoch": 112.84, + "learning_rate": 4.602335764528481e-05, + "loss": 2.2412, + "step": 66800 + }, + { + "epoch": 112.87, + "learning_rate": 4.5978621526672176e-05, + "loss": 2.2338, + "step": 66820 + }, + { + "epoch": 112.91, + "learning_rate": 4.593388864801156e-05, + "loss": 2.2458, + "step": 66840 + }, + { + "epoch": 112.94, + "learning_rate": 4.5889159045343404e-05, + "loss": 2.2484, + "step": 66860 + }, + { + "epoch": 112.97, + "learning_rate": 4.584443275470555e-05, + "loss": 2.2505, + "step": 66880 + }, + { + "epoch": 113.01, + "learning_rate": 4.579970981213319e-05, + "loss": 2.2372, + "step": 66900 + }, + { + "epoch": 113.04, + "learning_rate": 4.575499025365874e-05, + "loss": 2.18, + "step": 66920 + }, + { + "epoch": 113.07, + "learning_rate": 4.571027411531199e-05, + "loss": 2.1834, + "step": 66940 + }, + { + "epoch": 113.11, + "learning_rate": 4.566556143311989e-05, + "loss": 2.2036, + "step": 66960 + }, + { + "epoch": 113.14, + "learning_rate": 4.562085224310667e-05, + "loss": 2.1953, + "step": 66980 + }, + { + "epoch": 113.18, + "learning_rate": 4.5576146581293685e-05, + "loss": 2.1919, + "step": 67000 + }, + { + "epoch": 113.18, + "eval_loss": 2.567063331604004, + "eval_runtime": 47.8109, + "eval_samples_per_second": 20.686, + "eval_steps_per_second": 0.125, + "eval_tse_ndup": 0.00366446102423603, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03183495542273439, + "eval_tse_type": 0.0004318335018537243, + "step": 67000 + }, + { + "epoch": 113.21, + "learning_rate": 4.5531444483699496e-05, + "loss": 2.2279, + "step": 67020 + }, + { + "epoch": 113.24, + "learning_rate": 4.548674598633979e-05, + "loss": 2.1915, + "step": 67040 + }, + { + "epoch": 113.28, + "learning_rate": 4.54420511252273e-05, + "loss": 2.2072, + "step": 67060 + }, + { + "epoch": 113.31, + "learning_rate": 4.539735993637192e-05, + "loss": 2.2144, + "step": 67080 + }, + { + "epoch": 113.34, + "learning_rate": 4.5352672455780474e-05, + "loss": 2.1916, + "step": 67100 + }, + { + "epoch": 113.38, + "learning_rate": 4.530798871945693e-05, + "loss": 2.2115, + "step": 67120 + }, + { + "epoch": 113.41, + "learning_rate": 4.5263308763402084e-05, + "loss": 2.2184, + "step": 67140 + }, + { + "epoch": 113.45, + "learning_rate": 4.5218632623613833e-05, + "loss": 2.2086, + "step": 67160 + }, + { + "epoch": 113.48, + "learning_rate": 4.517396033608692e-05, + "loss": 2.2246, + "step": 67180 + }, + { + "epoch": 113.51, + "learning_rate": 4.512929193681298e-05, + "loss": 2.2148, + "step": 67200 + }, + { + "epoch": 113.55, + "learning_rate": 4.508462746178057e-05, + "loss": 2.2275, + "step": 67220 + }, + { + "epoch": 113.58, + "learning_rate": 4.503996694697502e-05, + "loss": 2.2257, + "step": 67240 + }, + { + "epoch": 113.61, + "learning_rate": 4.4995310428378524e-05, + "loss": 2.2275, + "step": 67260 + }, + { + "epoch": 113.65, + "learning_rate": 4.495065794196999e-05, + "loss": 2.2067, + "step": 67280 + }, + { + "epoch": 113.68, + "learning_rate": 4.4906009523725165e-05, + "loss": 2.2119, + "step": 67300 + }, + { + "epoch": 113.72, + "learning_rate": 4.486136520961647e-05, + "loss": 2.2324, + "step": 67320 + }, + { + "epoch": 113.75, + "learning_rate": 4.481672503561301e-05, + "loss": 2.2291, + "step": 67340 + }, + { + "epoch": 113.78, + "learning_rate": 4.477208903768057e-05, + "loss": 2.2385, + "step": 67360 + }, + { + "epoch": 113.82, + "learning_rate": 4.4727457251781544e-05, + "loss": 2.2394, + "step": 67380 + }, + { + "epoch": 113.85, + "learning_rate": 4.4682829713875e-05, + "loss": 2.2375, + "step": 67400 + }, + { + "epoch": 113.89, + "learning_rate": 4.463820645991651e-05, + "loss": 2.2284, + "step": 67420 + }, + { + "epoch": 113.92, + "learning_rate": 4.4593587525858224e-05, + "loss": 2.2405, + "step": 67440 + }, + { + "epoch": 113.95, + "learning_rate": 4.4548972947648806e-05, + "loss": 2.2442, + "step": 67460 + }, + { + "epoch": 113.99, + "learning_rate": 4.450436276123343e-05, + "loss": 2.226, + "step": 67480 + }, + { + "epoch": 114.02, + "learning_rate": 4.445975700255373e-05, + "loss": 2.1975, + "step": 67500 + }, + { + "epoch": 114.05, + "learning_rate": 4.441515570754774e-05, + "loss": 2.1857, + "step": 67520 + }, + { + "epoch": 114.09, + "learning_rate": 4.437055891214992e-05, + "loss": 2.1835, + "step": 67540 + }, + { + "epoch": 114.12, + "learning_rate": 4.4325966652291103e-05, + "loss": 2.1798, + "step": 67560 + }, + { + "epoch": 114.16, + "learning_rate": 4.428137896389848e-05, + "loss": 2.1794, + "step": 67580 + }, + { + "epoch": 114.19, + "learning_rate": 4.423679588289552e-05, + "loss": 2.1927, + "step": 67600 + }, + { + "epoch": 114.22, + "learning_rate": 4.419221744520201e-05, + "loss": 2.1983, + "step": 67620 + }, + { + "epoch": 114.26, + "learning_rate": 4.414764368673404e-05, + "loss": 2.1889, + "step": 67640 + }, + { + "epoch": 114.29, + "learning_rate": 4.410307464340381e-05, + "loss": 2.1983, + "step": 67660 + }, + { + "epoch": 114.32, + "learning_rate": 4.405851035111985e-05, + "loss": 2.1973, + "step": 67680 + }, + { + "epoch": 114.36, + "learning_rate": 4.4013950845786764e-05, + "loss": 2.2014, + "step": 67700 + }, + { + "epoch": 114.39, + "learning_rate": 4.3969396163305386e-05, + "loss": 2.2061, + "step": 67720 + }, + { + "epoch": 114.43, + "learning_rate": 4.392484633957258e-05, + "loss": 2.2058, + "step": 67740 + }, + { + "epoch": 114.46, + "learning_rate": 4.3880301410481345e-05, + "loss": 2.2005, + "step": 67760 + }, + { + "epoch": 114.49, + "learning_rate": 4.383576141192074e-05, + "loss": 2.2272, + "step": 67780 + }, + { + "epoch": 114.53, + "learning_rate": 4.37912263797758e-05, + "loss": 2.2158, + "step": 67800 + }, + { + "epoch": 114.56, + "learning_rate": 4.3746696349927655e-05, + "loss": 2.2176, + "step": 67820 + }, + { + "epoch": 114.59, + "learning_rate": 4.370217135825329e-05, + "loss": 2.2191, + "step": 67840 + }, + { + "epoch": 114.63, + "learning_rate": 4.3657651440625726e-05, + "loss": 2.2179, + "step": 67860 + }, + { + "epoch": 114.66, + "learning_rate": 4.361313663291382e-05, + "loss": 2.2229, + "step": 67880 + }, + { + "epoch": 114.7, + "learning_rate": 4.356862697098238e-05, + "loss": 2.2155, + "step": 67900 + }, + { + "epoch": 114.73, + "learning_rate": 4.3524122490692027e-05, + "loss": 2.2132, + "step": 67920 + }, + { + "epoch": 114.76, + "learning_rate": 4.347962322789921e-05, + "loss": 2.2158, + "step": 67940 + }, + { + "epoch": 114.8, + "learning_rate": 4.3435129218456193e-05, + "loss": 2.2274, + "step": 67960 + }, + { + "epoch": 114.83, + "learning_rate": 4.339064049821097e-05, + "loss": 2.2316, + "step": 67980 + }, + { + "epoch": 114.86, + "learning_rate": 4.334615710300735e-05, + "loss": 2.2477, + "step": 68000 + }, + { + "epoch": 114.86, + "eval_loss": 2.5606555938720703, + "eval_runtime": 47.4493, + "eval_samples_per_second": 20.843, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.004817544469031207, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03099397112054427, + "eval_tse_type": 0.0007074345579148409, + "step": 68000 + }, + { + "epoch": 114.9, + "learning_rate": 4.330167906868474e-05, + "loss": 2.2318, + "step": 68020 + }, + { + "epoch": 114.93, + "learning_rate": 4.325720643107832e-05, + "loss": 2.2342, + "step": 68040 + }, + { + "epoch": 114.97, + "learning_rate": 4.3212739226018904e-05, + "loss": 2.2288, + "step": 68060 + }, + { + "epoch": 115.0, + "learning_rate": 4.31682774893329e-05, + "loss": 2.2382, + "step": 68080 + }, + { + "epoch": 115.03, + "learning_rate": 4.3123821256842324e-05, + "loss": 2.1705, + "step": 68100 + }, + { + "epoch": 115.07, + "learning_rate": 4.3079370564364755e-05, + "loss": 2.1913, + "step": 68120 + }, + { + "epoch": 115.1, + "learning_rate": 4.303492544771334e-05, + "loss": 2.1782, + "step": 68140 + }, + { + "epoch": 115.14, + "learning_rate": 4.2990485942696665e-05, + "loss": 2.1838, + "step": 68160 + }, + { + "epoch": 115.17, + "learning_rate": 4.2946052085118857e-05, + "loss": 2.195, + "step": 68180 + }, + { + "epoch": 115.2, + "learning_rate": 4.2901623910779494e-05, + "loss": 2.1826, + "step": 68200 + }, + { + "epoch": 115.24, + "learning_rate": 4.2857201455473525e-05, + "loss": 2.184, + "step": 68220 + }, + { + "epoch": 115.27, + "learning_rate": 4.281278475499133e-05, + "loss": 2.1998, + "step": 68240 + }, + { + "epoch": 115.3, + "learning_rate": 4.276837384511864e-05, + "loss": 2.2042, + "step": 68260 + }, + { + "epoch": 115.34, + "learning_rate": 4.2723968761636534e-05, + "loss": 2.1992, + "step": 68280 + }, + { + "epoch": 115.37, + "learning_rate": 4.267956954032136e-05, + "loss": 2.2026, + "step": 68300 + }, + { + "epoch": 115.41, + "learning_rate": 4.263517621694478e-05, + "loss": 2.1895, + "step": 68320 + }, + { + "epoch": 115.44, + "learning_rate": 4.259078882727373e-05, + "loss": 2.2081, + "step": 68340 + }, + { + "epoch": 115.47, + "learning_rate": 4.2546407407070254e-05, + "loss": 2.1983, + "step": 68360 + }, + { + "epoch": 115.51, + "learning_rate": 4.250203199209173e-05, + "loss": 2.207, + "step": 68380 + }, + { + "epoch": 115.54, + "learning_rate": 4.245766261809059e-05, + "loss": 2.2221, + "step": 68400 + }, + { + "epoch": 115.57, + "learning_rate": 4.241329932081446e-05, + "loss": 2.2158, + "step": 68420 + }, + { + "epoch": 115.61, + "learning_rate": 4.2368942136006015e-05, + "loss": 2.1987, + "step": 68440 + }, + { + "epoch": 115.64, + "learning_rate": 4.2324591099403064e-05, + "loss": 2.226, + "step": 68460 + }, + { + "epoch": 115.68, + "learning_rate": 4.228024624673844e-05, + "loss": 2.2055, + "step": 68480 + }, + { + "epoch": 115.71, + "learning_rate": 4.223590761373996e-05, + "loss": 2.2182, + "step": 68500 + }, + { + "epoch": 115.74, + "learning_rate": 4.219157523613051e-05, + "loss": 2.2134, + "step": 68520 + }, + { + "epoch": 115.78, + "learning_rate": 4.2147249149627824e-05, + "loss": 2.2096, + "step": 68540 + }, + { + "epoch": 115.81, + "learning_rate": 4.210292938994469e-05, + "loss": 2.2034, + "step": 68560 + }, + { + "epoch": 115.84, + "learning_rate": 4.205861599278868e-05, + "loss": 2.2085, + "step": 68580 + }, + { + "epoch": 115.88, + "learning_rate": 4.201430899386233e-05, + "loss": 2.22, + "step": 68600 + }, + { + "epoch": 115.91, + "learning_rate": 4.197000842886301e-05, + "loss": 2.2176, + "step": 68620 + }, + { + "epoch": 115.95, + "learning_rate": 4.192571433348284e-05, + "loss": 2.2275, + "step": 68640 + }, + { + "epoch": 115.98, + "learning_rate": 4.18814267434088e-05, + "loss": 2.2078, + "step": 68660 + }, + { + "epoch": 116.01, + "learning_rate": 4.183714569432258e-05, + "loss": 2.1973, + "step": 68680 + }, + { + "epoch": 116.05, + "learning_rate": 4.1792871221900655e-05, + "loss": 2.1775, + "step": 68700 + }, + { + "epoch": 116.08, + "learning_rate": 4.174860336181412e-05, + "loss": 2.1743, + "step": 68720 + }, + { + "epoch": 116.11, + "learning_rate": 4.170655505189254e-05, + "loss": 2.1681, + "step": 68740 + }, + { + "epoch": 116.15, + "learning_rate": 4.1662300188439016e-05, + "loss": 2.1694, + "step": 68760 + }, + { + "epoch": 116.18, + "learning_rate": 4.161805204251963e-05, + "loss": 2.177, + "step": 68780 + }, + { + "epoch": 116.22, + "learning_rate": 4.157381064978432e-05, + "loss": 2.1831, + "step": 68800 + }, + { + "epoch": 116.25, + "learning_rate": 4.1529576045877565e-05, + "loss": 2.1831, + "step": 68820 + }, + { + "epoch": 116.28, + "learning_rate": 4.1485348266438386e-05, + "loss": 2.1931, + "step": 68840 + }, + { + "epoch": 116.32, + "learning_rate": 4.144112734710034e-05, + "loss": 2.193, + "step": 68860 + }, + { + "epoch": 116.35, + "learning_rate": 4.139691332349138e-05, + "loss": 2.2098, + "step": 68880 + }, + { + "epoch": 116.39, + "learning_rate": 4.135270623123398e-05, + "loss": 2.1779, + "step": 68900 + }, + { + "epoch": 116.42, + "learning_rate": 4.130850610594497e-05, + "loss": 2.1898, + "step": 68920 + }, + { + "epoch": 116.45, + "learning_rate": 4.126431298323561e-05, + "loss": 2.2044, + "step": 68940 + }, + { + "epoch": 116.49, + "learning_rate": 4.122012689871149e-05, + "loss": 2.205, + "step": 68960 + }, + { + "epoch": 116.52, + "learning_rate": 4.117594788797253e-05, + "loss": 2.2057, + "step": 68980 + }, + { + "epoch": 116.55, + "learning_rate": 4.113177598661301e-05, + "loss": 2.1895, + "step": 69000 + }, + { + "epoch": 116.55, + "eval_loss": 2.554255247116089, + "eval_runtime": 50.962, + "eval_samples_per_second": 19.407, + "eval_steps_per_second": 0.118, + "eval_tse_ndup": 0.004456970751369127, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.033351287520319316, + "eval_tse_type": 0.0006056201550387597, + "step": 69000 + }, + { + "epoch": 116.59, + "learning_rate": 4.108761123022136e-05, + "loss": 2.2017, + "step": 69020 + }, + { + "epoch": 116.62, + "learning_rate": 4.104345365438039e-05, + "loss": 2.2165, + "step": 69040 + }, + { + "epoch": 116.66, + "learning_rate": 4.099930329466703e-05, + "loss": 2.2083, + "step": 69060 + }, + { + "epoch": 116.69, + "learning_rate": 4.095516018665244e-05, + "loss": 2.2081, + "step": 69080 + }, + { + "epoch": 116.72, + "learning_rate": 4.09110243659019e-05, + "loss": 2.2065, + "step": 69100 + }, + { + "epoch": 116.76, + "learning_rate": 4.0866895867974874e-05, + "loss": 2.2027, + "step": 69120 + }, + { + "epoch": 116.79, + "learning_rate": 4.082277472842488e-05, + "loss": 2.208, + "step": 69140 + }, + { + "epoch": 116.82, + "learning_rate": 4.077866098279952e-05, + "loss": 2.1954, + "step": 69160 + }, + { + "epoch": 116.86, + "learning_rate": 4.073455466664048e-05, + "loss": 2.2217, + "step": 69180 + }, + { + "epoch": 116.89, + "learning_rate": 4.069045581548335e-05, + "loss": 2.2172, + "step": 69200 + }, + { + "epoch": 116.93, + "learning_rate": 4.0646364464857854e-05, + "loss": 2.1965, + "step": 69220 + }, + { + "epoch": 116.96, + "learning_rate": 4.060228065028753e-05, + "loss": 2.2285, + "step": 69240 + }, + { + "epoch": 116.99, + "learning_rate": 4.055820440728996e-05, + "loss": 2.2177, + "step": 69260 + }, + { + "epoch": 117.03, + "learning_rate": 4.051413577137656e-05, + "loss": 2.1659, + "step": 69280 + }, + { + "epoch": 117.06, + "learning_rate": 4.047007477805263e-05, + "loss": 2.1683, + "step": 69300 + }, + { + "epoch": 117.09, + "learning_rate": 4.0426021462817325e-05, + "loss": 2.1777, + "step": 69320 + }, + { + "epoch": 117.13, + "learning_rate": 4.03819758611636e-05, + "loss": 2.1729, + "step": 69340 + }, + { + "epoch": 117.16, + "learning_rate": 4.0337938008578204e-05, + "loss": 2.1827, + "step": 69360 + }, + { + "epoch": 117.2, + "learning_rate": 4.029390794054161e-05, + "loss": 2.1744, + "step": 69380 + }, + { + "epoch": 117.23, + "learning_rate": 4.02498856925281e-05, + "loss": 2.172, + "step": 69400 + }, + { + "epoch": 117.26, + "learning_rate": 4.020587130000558e-05, + "loss": 2.1886, + "step": 69420 + }, + { + "epoch": 117.3, + "learning_rate": 4.0161864798435646e-05, + "loss": 2.1896, + "step": 69440 + }, + { + "epoch": 117.33, + "learning_rate": 4.0117866223273546e-05, + "loss": 2.17, + "step": 69460 + }, + { + "epoch": 117.36, + "learning_rate": 4.007387560996814e-05, + "loss": 2.2075, + "step": 69480 + }, + { + "epoch": 117.4, + "learning_rate": 4.002989299396187e-05, + "loss": 2.1785, + "step": 69500 + }, + { + "epoch": 117.43, + "learning_rate": 3.998591841069072e-05, + "loss": 2.1874, + "step": 69520 + }, + { + "epoch": 117.47, + "learning_rate": 3.994195189558423e-05, + "loss": 2.2015, + "step": 69540 + }, + { + "epoch": 117.5, + "learning_rate": 3.9897993484065435e-05, + "loss": 2.1784, + "step": 69560 + }, + { + "epoch": 117.53, + "learning_rate": 3.985404321155083e-05, + "loss": 2.1873, + "step": 69580 + }, + { + "epoch": 117.57, + "learning_rate": 3.9810101113450356e-05, + "loss": 2.1961, + "step": 69600 + }, + { + "epoch": 117.6, + "learning_rate": 3.976616722516735e-05, + "loss": 2.1805, + "step": 69620 + }, + { + "epoch": 117.64, + "learning_rate": 3.9722241582098574e-05, + "loss": 2.2068, + "step": 69640 + }, + { + "epoch": 117.67, + "learning_rate": 3.967832421963411e-05, + "loss": 2.1996, + "step": 69660 + }, + { + "epoch": 117.7, + "learning_rate": 3.963441517315738e-05, + "loss": 2.2086, + "step": 69680 + }, + { + "epoch": 117.74, + "learning_rate": 3.959051447804515e-05, + "loss": 2.2043, + "step": 69700 + }, + { + "epoch": 117.77, + "learning_rate": 3.954662216966736e-05, + "loss": 2.2006, + "step": 69720 + }, + { + "epoch": 117.8, + "learning_rate": 3.950273828338731e-05, + "loss": 2.2012, + "step": 69740 + }, + { + "epoch": 117.84, + "learning_rate": 3.945886285456138e-05, + "loss": 2.2014, + "step": 69760 + }, + { + "epoch": 117.87, + "learning_rate": 3.941499591853928e-05, + "loss": 2.2047, + "step": 69780 + }, + { + "epoch": 117.91, + "learning_rate": 3.937113751066377e-05, + "loss": 2.2125, + "step": 69800 + }, + { + "epoch": 117.94, + "learning_rate": 3.932728766627079e-05, + "loss": 2.1946, + "step": 69820 + }, + { + "epoch": 117.97, + "learning_rate": 3.9283446420689365e-05, + "loss": 2.2136, + "step": 69840 + }, + { + "epoch": 118.01, + "learning_rate": 3.9239613809241595e-05, + "loss": 2.1912, + "step": 69860 + }, + { + "epoch": 118.04, + "learning_rate": 3.919578986724263e-05, + "loss": 2.1715, + "step": 69880 + }, + { + "epoch": 118.07, + "learning_rate": 3.91519746300006e-05, + "loss": 2.1701, + "step": 69900 + }, + { + "epoch": 118.11, + "learning_rate": 3.91081681328167e-05, + "loss": 2.1539, + "step": 69920 + }, + { + "epoch": 118.14, + "learning_rate": 3.9064370410984976e-05, + "loss": 2.16, + "step": 69940 + }, + { + "epoch": 118.18, + "learning_rate": 3.902058149979252e-05, + "loss": 2.1514, + "step": 69960 + }, + { + "epoch": 118.21, + "learning_rate": 3.8976801434519213e-05, + "loss": 2.1707, + "step": 69980 + }, + { + "epoch": 118.24, + "learning_rate": 3.89330302504379e-05, + "loss": 2.1637, + "step": 70000 + }, + { + "epoch": 118.24, + "eval_loss": 2.547391176223755, + "eval_runtime": 49.2899, + "eval_samples_per_second": 20.065, + "eval_steps_per_second": 0.122, + "eval_tse_ndup": 0.0035748532727668705, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03135054931534757, + "eval_tse_type": 0.0003428075258099138, + "step": 70000 + }, + { + "epoch": 118.28, + "learning_rate": 3.888926798281422e-05, + "loss": 2.1963, + "step": 70020 + }, + { + "epoch": 118.31, + "learning_rate": 3.884551466690664e-05, + "loss": 2.1775, + "step": 70040 + }, + { + "epoch": 118.34, + "learning_rate": 3.880177033796643e-05, + "loss": 2.1822, + "step": 70060 + }, + { + "epoch": 118.38, + "learning_rate": 3.8758035031237565e-05, + "loss": 2.1762, + "step": 70080 + }, + { + "epoch": 118.41, + "learning_rate": 3.871430878195686e-05, + "loss": 2.1821, + "step": 70100 + }, + { + "epoch": 118.45, + "learning_rate": 3.867059162535369e-05, + "loss": 2.1828, + "step": 70120 + }, + { + "epoch": 118.48, + "learning_rate": 3.8626883596650226e-05, + "loss": 2.1742, + "step": 70140 + }, + { + "epoch": 118.51, + "learning_rate": 3.858318473106124e-05, + "loss": 2.1713, + "step": 70160 + }, + { + "epoch": 118.55, + "learning_rate": 3.853949506379408e-05, + "loss": 2.1893, + "step": 70180 + }, + { + "epoch": 118.58, + "learning_rate": 3.849581463004877e-05, + "loss": 2.1871, + "step": 70200 + }, + { + "epoch": 118.61, + "learning_rate": 3.84521434650178e-05, + "loss": 2.207, + "step": 70220 + }, + { + "epoch": 118.65, + "learning_rate": 3.840848160388628e-05, + "loss": 2.1941, + "step": 70240 + }, + { + "epoch": 118.68, + "learning_rate": 3.836482908183175e-05, + "loss": 2.1886, + "step": 70260 + }, + { + "epoch": 118.72, + "learning_rate": 3.832118593402426e-05, + "loss": 2.1996, + "step": 70280 + }, + { + "epoch": 118.75, + "learning_rate": 3.827755219562635e-05, + "loss": 2.1953, + "step": 70300 + }, + { + "epoch": 118.78, + "learning_rate": 3.823392790179288e-05, + "loss": 2.1966, + "step": 70320 + }, + { + "epoch": 118.82, + "learning_rate": 3.819031308767119e-05, + "loss": 2.1935, + "step": 70340 + }, + { + "epoch": 118.85, + "learning_rate": 3.814670778840094e-05, + "loss": 2.2139, + "step": 70360 + }, + { + "epoch": 118.89, + "learning_rate": 3.810311203911412e-05, + "loss": 2.205, + "step": 70380 + }, + { + "epoch": 118.92, + "learning_rate": 3.8059525874935045e-05, + "loss": 2.1983, + "step": 70400 + }, + { + "epoch": 118.95, + "learning_rate": 3.8015949330980296e-05, + "loss": 2.1951, + "step": 70420 + }, + { + "epoch": 118.99, + "learning_rate": 3.797238244235874e-05, + "loss": 2.1979, + "step": 70440 + }, + { + "epoch": 119.02, + "learning_rate": 3.792882524417137e-05, + "loss": 2.18, + "step": 70460 + }, + { + "epoch": 119.05, + "learning_rate": 3.7885277771511495e-05, + "loss": 2.1545, + "step": 70480 + }, + { + "epoch": 119.09, + "learning_rate": 3.7841740059464495e-05, + "loss": 2.1536, + "step": 70500 + }, + { + "epoch": 119.12, + "learning_rate": 3.7798212143107924e-05, + "loss": 2.166, + "step": 70520 + }, + { + "epoch": 119.16, + "learning_rate": 3.775469405751143e-05, + "loss": 2.1646, + "step": 70540 + }, + { + "epoch": 119.19, + "learning_rate": 3.7711185837736765e-05, + "loss": 2.1532, + "step": 70560 + }, + { + "epoch": 119.22, + "learning_rate": 3.7667687518837716e-05, + "loss": 2.1757, + "step": 70580 + }, + { + "epoch": 119.26, + "learning_rate": 3.762419913586006e-05, + "loss": 2.1677, + "step": 70600 + }, + { + "epoch": 119.29, + "learning_rate": 3.758072072384167e-05, + "loss": 2.1733, + "step": 70620 + }, + { + "epoch": 119.32, + "learning_rate": 3.753725231781223e-05, + "loss": 2.1643, + "step": 70640 + }, + { + "epoch": 119.36, + "learning_rate": 3.749379395279352e-05, + "loss": 2.171, + "step": 70660 + }, + { + "epoch": 119.39, + "learning_rate": 3.7450345663799113e-05, + "loss": 2.1729, + "step": 70680 + }, + { + "epoch": 119.43, + "learning_rate": 3.740690748583453e-05, + "loss": 2.179, + "step": 70700 + }, + { + "epoch": 119.46, + "learning_rate": 3.736347945389715e-05, + "loss": 2.1875, + "step": 70720 + }, + { + "epoch": 119.49, + "learning_rate": 3.732006160297611e-05, + "loss": 2.184, + "step": 70740 + }, + { + "epoch": 119.53, + "learning_rate": 3.7276653968052434e-05, + "loss": 2.1702, + "step": 70760 + }, + { + "epoch": 119.56, + "learning_rate": 3.723325658409882e-05, + "loss": 2.1874, + "step": 70780 + }, + { + "epoch": 119.59, + "learning_rate": 3.71898694860798e-05, + "loss": 2.17, + "step": 70800 + }, + { + "epoch": 119.63, + "learning_rate": 3.714649270895153e-05, + "loss": 2.1942, + "step": 70820 + }, + { + "epoch": 119.66, + "learning_rate": 3.7103126287661935e-05, + "loss": 2.1807, + "step": 70840 + }, + { + "epoch": 119.7, + "learning_rate": 3.7059770257150555e-05, + "loss": 2.1919, + "step": 70860 + }, + { + "epoch": 119.73, + "learning_rate": 3.7016424652348536e-05, + "loss": 2.1939, + "step": 70880 + }, + { + "epoch": 119.76, + "learning_rate": 3.697308950817868e-05, + "loss": 2.1762, + "step": 70900 + }, + { + "epoch": 119.8, + "learning_rate": 3.69297648595553e-05, + "loss": 2.1842, + "step": 70920 + }, + { + "epoch": 119.83, + "learning_rate": 3.6886450741384306e-05, + "loss": 2.1964, + "step": 70940 + }, + { + "epoch": 119.86, + "learning_rate": 3.6843147188563075e-05, + "loss": 2.2087, + "step": 70960 + }, + { + "epoch": 119.9, + "learning_rate": 3.6799854235980513e-05, + "loss": 2.1909, + "step": 70980 + }, + { + "epoch": 119.93, + "learning_rate": 3.675657191851698e-05, + "loss": 2.2071, + "step": 71000 + }, + { + "epoch": 119.93, + "eval_loss": 2.5382301807403564, + "eval_runtime": 47.3996, + "eval_samples_per_second": 20.865, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.004431673769041034, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03141421335139171, + "eval_tse_type": 0.00010747656849457508, + "step": 71000 + }, + { + "epoch": 119.97, + "learning_rate": 3.671330027104425e-05, + "loss": 2.1844, + "step": 71020 + }, + { + "epoch": 120.0, + "learning_rate": 3.6670039328425505e-05, + "loss": 2.1934, + "step": 71040 + }, + { + "epoch": 120.03, + "learning_rate": 3.662678912551529e-05, + "loss": 2.1381, + "step": 71060 + }, + { + "epoch": 120.07, + "learning_rate": 3.658354969715955e-05, + "loss": 2.1403, + "step": 71080 + }, + { + "epoch": 120.1, + "learning_rate": 3.654032107819547e-05, + "loss": 2.1517, + "step": 71100 + }, + { + "epoch": 120.14, + "learning_rate": 3.649710330345161e-05, + "loss": 2.1495, + "step": 71120 + }, + { + "epoch": 120.17, + "learning_rate": 3.64538964077477e-05, + "loss": 2.1716, + "step": 71140 + }, + { + "epoch": 120.2, + "learning_rate": 3.641070042589478e-05, + "loss": 2.1473, + "step": 71160 + }, + { + "epoch": 120.24, + "learning_rate": 3.636751539269511e-05, + "loss": 2.154, + "step": 71180 + }, + { + "epoch": 120.27, + "learning_rate": 3.6324341342942017e-05, + "loss": 2.1597, + "step": 71200 + }, + { + "epoch": 120.3, + "learning_rate": 3.628117831142011e-05, + "loss": 2.1724, + "step": 71220 + }, + { + "epoch": 120.34, + "learning_rate": 3.623802633290504e-05, + "loss": 2.1653, + "step": 71240 + }, + { + "epoch": 120.37, + "learning_rate": 3.6197042222829426e-05, + "loss": 2.1706, + "step": 71260 + }, + { + "epoch": 120.41, + "learning_rate": 3.6153911897667496e-05, + "loss": 2.1671, + "step": 71280 + }, + { + "epoch": 120.44, + "learning_rate": 3.6110792728048635e-05, + "loss": 2.1789, + "step": 71300 + }, + { + "epoch": 120.47, + "learning_rate": 3.6067684748713235e-05, + "loss": 2.1671, + "step": 71320 + }, + { + "epoch": 120.51, + "learning_rate": 3.602458799439256e-05, + "loss": 2.1657, + "step": 71340 + }, + { + "epoch": 120.54, + "learning_rate": 3.598150249980892e-05, + "loss": 2.1863, + "step": 71360 + }, + { + "epoch": 120.57, + "learning_rate": 3.593842829967552e-05, + "loss": 2.1698, + "step": 71380 + }, + { + "epoch": 120.61, + "learning_rate": 3.5895365428696446e-05, + "loss": 2.1783, + "step": 71400 + }, + { + "epoch": 120.64, + "learning_rate": 3.58523139215667e-05, + "loss": 2.1882, + "step": 71420 + }, + { + "epoch": 120.68, + "learning_rate": 3.5809273812972074e-05, + "loss": 2.1899, + "step": 71440 + }, + { + "epoch": 120.71, + "learning_rate": 3.576624513758924e-05, + "loss": 2.1927, + "step": 71460 + }, + { + "epoch": 120.74, + "learning_rate": 3.5723227930085576e-05, + "loss": 2.1816, + "step": 71480 + }, + { + "epoch": 120.78, + "learning_rate": 3.568022222511931e-05, + "loss": 2.191, + "step": 71500 + }, + { + "epoch": 120.81, + "learning_rate": 3.563722805733937e-05, + "loss": 2.1844, + "step": 71520 + }, + { + "epoch": 120.84, + "learning_rate": 3.559424546138535e-05, + "loss": 2.1694, + "step": 71540 + }, + { + "epoch": 120.88, + "learning_rate": 3.5551274471887566e-05, + "loss": 2.2022, + "step": 71560 + }, + { + "epoch": 120.91, + "learning_rate": 3.550831512346695e-05, + "loss": 2.1886, + "step": 71580 + }, + { + "epoch": 120.95, + "learning_rate": 3.546536745073511e-05, + "loss": 2.1777, + "step": 71600 + }, + { + "epoch": 120.98, + "learning_rate": 3.542243148829417e-05, + "loss": 2.1868, + "step": 71620 + }, + { + "epoch": 121.01, + "learning_rate": 3.5379507270736865e-05, + "loss": 2.1728, + "step": 71640 + }, + { + "epoch": 121.05, + "learning_rate": 3.533659483264652e-05, + "loss": 2.1448, + "step": 71660 + }, + { + "epoch": 121.08, + "learning_rate": 3.529369420859682e-05, + "loss": 2.1491, + "step": 71680 + }, + { + "epoch": 121.11, + "learning_rate": 3.525080543315209e-05, + "loss": 2.141, + "step": 71700 + }, + { + "epoch": 121.15, + "learning_rate": 3.520792854086702e-05, + "loss": 2.1434, + "step": 71720 + }, + { + "epoch": 121.18, + "learning_rate": 3.516506356628675e-05, + "loss": 2.1548, + "step": 71740 + }, + { + "epoch": 121.22, + "learning_rate": 3.51222105439468e-05, + "loss": 2.1519, + "step": 71760 + }, + { + "epoch": 121.25, + "learning_rate": 3.507936950837309e-05, + "loss": 2.1439, + "step": 71780 + }, + { + "epoch": 121.28, + "learning_rate": 3.5036540494081886e-05, + "loss": 2.1713, + "step": 71800 + }, + { + "epoch": 121.32, + "learning_rate": 3.499372353557971e-05, + "loss": 2.1584, + "step": 71820 + }, + { + "epoch": 121.35, + "learning_rate": 3.495091866736346e-05, + "loss": 2.1696, + "step": 71840 + }, + { + "epoch": 121.39, + "learning_rate": 3.49081259239202e-05, + "loss": 2.165, + "step": 71860 + }, + { + "epoch": 121.42, + "learning_rate": 3.4865345339727307e-05, + "loss": 2.1762, + "step": 71880 + }, + { + "epoch": 121.45, + "learning_rate": 3.48225769492523e-05, + "loss": 2.1646, + "step": 71900 + }, + { + "epoch": 121.49, + "learning_rate": 3.477982078695291e-05, + "loss": 2.1753, + "step": 71920 + }, + { + "epoch": 121.52, + "learning_rate": 3.473707688727701e-05, + "loss": 2.1753, + "step": 71940 + }, + { + "epoch": 121.55, + "learning_rate": 3.4694345284662566e-05, + "loss": 2.1714, + "step": 71960 + }, + { + "epoch": 121.59, + "learning_rate": 3.4651626013537684e-05, + "loss": 2.1697, + "step": 71980 + }, + { + "epoch": 121.62, + "learning_rate": 3.460891910832049e-05, + "loss": 2.1835, + "step": 72000 + }, + { + "epoch": 121.62, + "eval_loss": 2.5357210636138916, + "eval_runtime": 47.4231, + "eval_samples_per_second": 20.855, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.003946419283396043, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.032960075494760176, + "eval_tse_type": 0.0004335889225929671, + "step": 72000 + }, + { + "epoch": 121.66, + "learning_rate": 3.45662246034192e-05, + "loss": 2.183, + "step": 72020 + }, + { + "epoch": 121.69, + "learning_rate": 3.452354253323194e-05, + "loss": 2.1676, + "step": 72040 + }, + { + "epoch": 121.72, + "learning_rate": 3.448087293214693e-05, + "loss": 2.1642, + "step": 72060 + }, + { + "epoch": 121.76, + "learning_rate": 3.443821583454231e-05, + "loss": 2.1796, + "step": 72080 + }, + { + "epoch": 121.79, + "learning_rate": 3.43955712747861e-05, + "loss": 2.166, + "step": 72100 + }, + { + "epoch": 121.82, + "learning_rate": 3.435293928723627e-05, + "loss": 2.185, + "step": 72120 + }, + { + "epoch": 121.86, + "learning_rate": 3.4310319906240626e-05, + "loss": 2.1824, + "step": 72140 + }, + { + "epoch": 121.89, + "learning_rate": 3.426771316613686e-05, + "loss": 2.1815, + "step": 72160 + }, + { + "epoch": 121.93, + "learning_rate": 3.4225119101252425e-05, + "loss": 2.1774, + "step": 72180 + }, + { + "epoch": 121.96, + "learning_rate": 3.4182537745904614e-05, + "loss": 2.1728, + "step": 72200 + }, + { + "epoch": 121.99, + "learning_rate": 3.4139969134400415e-05, + "loss": 2.1813, + "step": 72220 + }, + { + "epoch": 122.03, + "learning_rate": 3.409741330103664e-05, + "loss": 2.1459, + "step": 72240 + }, + { + "epoch": 122.06, + "learning_rate": 3.405487028009974e-05, + "loss": 2.1341, + "step": 72260 + }, + { + "epoch": 122.09, + "learning_rate": 3.401234010586583e-05, + "loss": 2.1337, + "step": 72280 + }, + { + "epoch": 122.13, + "learning_rate": 3.396982281260075e-05, + "loss": 2.146, + "step": 72300 + }, + { + "epoch": 122.16, + "learning_rate": 3.392731843455987e-05, + "loss": 2.1448, + "step": 72320 + }, + { + "epoch": 122.2, + "learning_rate": 3.388482700598823e-05, + "loss": 2.1488, + "step": 72340 + }, + { + "epoch": 122.23, + "learning_rate": 3.384234856112039e-05, + "loss": 2.1569, + "step": 72360 + }, + { + "epoch": 122.26, + "learning_rate": 3.379988313418046e-05, + "loss": 2.1538, + "step": 72380 + }, + { + "epoch": 122.3, + "learning_rate": 3.3757430759382105e-05, + "loss": 2.1583, + "step": 72400 + }, + { + "epoch": 122.33, + "learning_rate": 3.371499147092839e-05, + "loss": 2.1588, + "step": 72420 + }, + { + "epoch": 122.36, + "learning_rate": 3.3672565303011926e-05, + "loss": 2.1596, + "step": 72440 + }, + { + "epoch": 122.4, + "learning_rate": 3.363015228981468e-05, + "loss": 2.1707, + "step": 72460 + }, + { + "epoch": 122.43, + "learning_rate": 3.358775246550806e-05, + "loss": 2.1598, + "step": 72480 + }, + { + "epoch": 122.47, + "learning_rate": 3.354536586425283e-05, + "loss": 2.1622, + "step": 72500 + }, + { + "epoch": 122.5, + "learning_rate": 3.3502992520199104e-05, + "loss": 2.1624, + "step": 72520 + }, + { + "epoch": 122.53, + "learning_rate": 3.346063246748637e-05, + "loss": 2.1513, + "step": 72540 + }, + { + "epoch": 122.57, + "learning_rate": 3.3418285740243286e-05, + "loss": 2.161, + "step": 72560 + }, + { + "epoch": 122.6, + "learning_rate": 3.337595237258791e-05, + "loss": 2.1552, + "step": 72580 + }, + { + "epoch": 122.64, + "learning_rate": 3.333363239862741e-05, + "loss": 2.1554, + "step": 72600 + }, + { + "epoch": 122.67, + "learning_rate": 3.3291325852458274e-05, + "loss": 2.157, + "step": 72620 + }, + { + "epoch": 122.7, + "learning_rate": 3.3249032768166096e-05, + "loss": 2.1671, + "step": 72640 + }, + { + "epoch": 122.74, + "learning_rate": 3.3206753179825664e-05, + "loss": 2.1841, + "step": 72660 + }, + { + "epoch": 122.77, + "learning_rate": 3.3164487121500874e-05, + "loss": 2.1698, + "step": 72680 + }, + { + "epoch": 122.8, + "learning_rate": 3.312223462724472e-05, + "loss": 2.1752, + "step": 72700 + }, + { + "epoch": 122.84, + "learning_rate": 3.3079995731099285e-05, + "loss": 2.1687, + "step": 72720 + }, + { + "epoch": 122.87, + "learning_rate": 3.303777046709565e-05, + "loss": 2.188, + "step": 72740 + }, + { + "epoch": 122.91, + "learning_rate": 3.2995558869254014e-05, + "loss": 2.1668, + "step": 72760 + }, + { + "epoch": 122.94, + "learning_rate": 3.2953360971583436e-05, + "loss": 2.1812, + "step": 72780 + }, + { + "epoch": 122.97, + "learning_rate": 3.291117680808202e-05, + "loss": 2.1768, + "step": 72800 + }, + { + "epoch": 123.01, + "learning_rate": 3.286900641273681e-05, + "loss": 2.1734, + "step": 72820 + }, + { + "epoch": 123.04, + "learning_rate": 3.282684981952369e-05, + "loss": 2.1362, + "step": 72840 + }, + { + "epoch": 123.07, + "learning_rate": 3.278470706240751e-05, + "loss": 2.1436, + "step": 72860 + }, + { + "epoch": 123.11, + "learning_rate": 3.274257817534188e-05, + "loss": 2.1404, + "step": 72880 + }, + { + "epoch": 123.14, + "learning_rate": 3.2700463192269325e-05, + "loss": 2.1367, + "step": 72900 + }, + { + "epoch": 123.18, + "learning_rate": 3.2658362147121086e-05, + "loss": 2.116, + "step": 72920 + }, + { + "epoch": 123.21, + "learning_rate": 3.261627507381725e-05, + "loss": 2.136, + "step": 72940 + }, + { + "epoch": 123.24, + "learning_rate": 3.257420200626661e-05, + "loss": 2.1531, + "step": 72960 + }, + { + "epoch": 123.28, + "learning_rate": 3.2532142978366654e-05, + "loss": 2.1544, + "step": 72980 + }, + { + "epoch": 123.31, + "learning_rate": 3.24900980240036e-05, + "loss": 2.1463, + "step": 73000 + }, + { + "epoch": 123.31, + "eval_loss": 2.5296030044555664, + "eval_runtime": 47.4683, + "eval_samples_per_second": 20.835, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.004114381317956316, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03209363938463498, + "eval_tse_type": 0.0007429229398547771, + "step": 73000 + }, + { + "epoch": 123.34, + "learning_rate": 3.24480671770523e-05, + "loss": 2.1407, + "step": 73020 + }, + { + "epoch": 123.38, + "learning_rate": 3.2406050471376245e-05, + "loss": 2.1516, + "step": 73040 + }, + { + "epoch": 123.41, + "learning_rate": 3.236404794082754e-05, + "loss": 2.1571, + "step": 73060 + }, + { + "epoch": 123.45, + "learning_rate": 3.2322059619246856e-05, + "loss": 2.1435, + "step": 73080 + }, + { + "epoch": 123.48, + "learning_rate": 3.228008554046347e-05, + "loss": 2.1523, + "step": 73100 + }, + { + "epoch": 123.51, + "learning_rate": 3.223812573829506e-05, + "loss": 2.1632, + "step": 73120 + }, + { + "epoch": 123.55, + "learning_rate": 3.2196180246547954e-05, + "loss": 2.1716, + "step": 73140 + }, + { + "epoch": 123.58, + "learning_rate": 3.215424909901683e-05, + "loss": 2.1545, + "step": 73160 + }, + { + "epoch": 123.61, + "learning_rate": 3.2112332329484895e-05, + "loss": 2.1701, + "step": 73180 + }, + { + "epoch": 123.65, + "learning_rate": 3.2070429971723695e-05, + "loss": 2.1641, + "step": 73200 + }, + { + "epoch": 123.68, + "learning_rate": 3.2028542059493224e-05, + "loss": 2.1693, + "step": 73220 + }, + { + "epoch": 123.72, + "learning_rate": 3.198666862654182e-05, + "loss": 2.1524, + "step": 73240 + }, + { + "epoch": 123.75, + "learning_rate": 3.1944809706606124e-05, + "loss": 2.1498, + "step": 73260 + }, + { + "epoch": 123.78, + "learning_rate": 3.190296533341116e-05, + "loss": 2.1765, + "step": 73280 + }, + { + "epoch": 123.82, + "learning_rate": 3.186113554067013e-05, + "loss": 2.1713, + "step": 73300 + }, + { + "epoch": 123.85, + "learning_rate": 3.181932036208458e-05, + "loss": 2.1694, + "step": 73320 + }, + { + "epoch": 123.89, + "learning_rate": 3.177751983134423e-05, + "loss": 2.178, + "step": 73340 + }, + { + "epoch": 123.92, + "learning_rate": 3.1735733982127e-05, + "loss": 2.1723, + "step": 73360 + }, + { + "epoch": 123.95, + "learning_rate": 3.169396284809904e-05, + "loss": 2.1635, + "step": 73380 + }, + { + "epoch": 123.99, + "learning_rate": 3.165220646291454e-05, + "loss": 2.1729, + "step": 73400 + }, + { + "epoch": 124.02, + "learning_rate": 3.1610464860215904e-05, + "loss": 2.1563, + "step": 73420 + }, + { + "epoch": 124.05, + "learning_rate": 3.156873807363356e-05, + "loss": 2.1293, + "step": 73440 + }, + { + "epoch": 124.09, + "learning_rate": 3.152702613678607e-05, + "loss": 2.1267, + "step": 73460 + }, + { + "epoch": 124.12, + "learning_rate": 3.148532908327993e-05, + "loss": 2.139, + "step": 73480 + }, + { + "epoch": 124.16, + "learning_rate": 3.144364694670976e-05, + "loss": 2.1172, + "step": 73500 + }, + { + "epoch": 124.19, + "learning_rate": 3.1401979760658054e-05, + "loss": 2.1292, + "step": 73520 + }, + { + "epoch": 124.22, + "learning_rate": 3.1360327558695335e-05, + "loss": 2.1394, + "step": 73540 + }, + { + "epoch": 124.26, + "learning_rate": 3.1318690374380046e-05, + "loss": 2.1241, + "step": 73560 + }, + { + "epoch": 124.29, + "learning_rate": 3.127706824125848e-05, + "loss": 2.1293, + "step": 73580 + }, + { + "epoch": 124.32, + "learning_rate": 3.123546119286487e-05, + "loss": 2.1459, + "step": 73600 + }, + { + "epoch": 124.36, + "learning_rate": 3.119386926272124e-05, + "loss": 2.1625, + "step": 73620 + }, + { + "epoch": 124.39, + "learning_rate": 3.115229248433747e-05, + "loss": 2.16, + "step": 73640 + }, + { + "epoch": 124.43, + "learning_rate": 3.1110730891211206e-05, + "loss": 2.1519, + "step": 73660 + }, + { + "epoch": 124.46, + "learning_rate": 3.1069184516827887e-05, + "loss": 2.1588, + "step": 73680 + }, + { + "epoch": 124.49, + "learning_rate": 3.1027653394660676e-05, + "loss": 2.1675, + "step": 73700 + }, + { + "epoch": 124.53, + "learning_rate": 3.098613755817044e-05, + "loss": 2.1515, + "step": 73720 + }, + { + "epoch": 124.56, + "learning_rate": 3.094463704080575e-05, + "loss": 2.1502, + "step": 73740 + }, + { + "epoch": 124.59, + "learning_rate": 3.09031518760028e-05, + "loss": 2.1526, + "step": 73760 + }, + { + "epoch": 124.63, + "learning_rate": 3.0861682097185464e-05, + "loss": 2.1701, + "step": 73780 + }, + { + "epoch": 124.66, + "learning_rate": 3.0820227737765176e-05, + "loss": 2.154, + "step": 73800 + }, + { + "epoch": 124.7, + "learning_rate": 3.077878883114096e-05, + "loss": 2.1478, + "step": 73820 + }, + { + "epoch": 124.73, + "learning_rate": 3.073736541069943e-05, + "loss": 2.1544, + "step": 73840 + }, + { + "epoch": 124.76, + "learning_rate": 3.069595750981465e-05, + "loss": 2.1489, + "step": 73860 + }, + { + "epoch": 124.8, + "learning_rate": 3.065456516184824e-05, + "loss": 2.1618, + "step": 73880 + }, + { + "epoch": 124.83, + "learning_rate": 3.061318840014925e-05, + "loss": 2.1645, + "step": 73900 + }, + { + "epoch": 124.86, + "learning_rate": 3.057182725805421e-05, + "loss": 2.1651, + "step": 73920 + }, + { + "epoch": 124.9, + "learning_rate": 3.053048176888702e-05, + "loss": 2.1648, + "step": 73940 + }, + { + "epoch": 124.93, + "learning_rate": 3.0489151965958994e-05, + "loss": 2.1671, + "step": 73960 + }, + { + "epoch": 124.97, + "learning_rate": 3.0447837882568864e-05, + "loss": 2.1627, + "step": 73980 + }, + { + "epoch": 125.0, + "learning_rate": 3.0406539552002557e-05, + "loss": 2.1569, + "step": 74000 + }, + { + "epoch": 125.0, + "eval_loss": 2.520303726196289, + "eval_runtime": 47.0319, + "eval_samples_per_second": 21.028, + "eval_steps_per_second": 0.128, + "eval_tse_ndup": 0.004721879915856345, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03195639919245855, + "eval_tse_type": 0.0003159757330637007, + "step": 74000 + }, + { + "epoch": 125.03, + "learning_rate": 3.0365257007533465e-05, + "loss": 2.1132, + "step": 74020 + }, + { + "epoch": 125.07, + "learning_rate": 3.0323990282422122e-05, + "loss": 2.1262, + "step": 74040 + }, + { + "epoch": 125.1, + "learning_rate": 3.0282739409916445e-05, + "loss": 2.1337, + "step": 74060 + }, + { + "epoch": 125.14, + "learning_rate": 3.0241504423251477e-05, + "loss": 2.1275, + "step": 74080 + }, + { + "epoch": 125.17, + "learning_rate": 3.0200285355649506e-05, + "loss": 2.1291, + "step": 74100 + }, + { + "epoch": 125.2, + "learning_rate": 3.0159082240320013e-05, + "loss": 2.136, + "step": 74120 + }, + { + "epoch": 125.24, + "learning_rate": 3.0117895110459583e-05, + "loss": 2.1255, + "step": 74140 + }, + { + "epoch": 125.27, + "learning_rate": 3.0076723999251953e-05, + "loss": 2.1314, + "step": 74160 + }, + { + "epoch": 125.3, + "learning_rate": 3.003556893986792e-05, + "loss": 2.1267, + "step": 74180 + }, + { + "epoch": 125.34, + "learning_rate": 2.9994429965465427e-05, + "loss": 2.1344, + "step": 74200 + }, + { + "epoch": 125.37, + "learning_rate": 2.9953307109189332e-05, + "loss": 2.1353, + "step": 74220 + }, + { + "epoch": 125.41, + "learning_rate": 2.9912200404171618e-05, + "loss": 2.1274, + "step": 74240 + }, + { + "epoch": 125.44, + "learning_rate": 2.9871109883531228e-05, + "loss": 2.1505, + "step": 74260 + }, + { + "epoch": 125.47, + "learning_rate": 2.9830035580374022e-05, + "loss": 2.1448, + "step": 74280 + }, + { + "epoch": 125.51, + "learning_rate": 2.9788977527792842e-05, + "loss": 2.1497, + "step": 74300 + }, + { + "epoch": 125.54, + "learning_rate": 2.9747935758867408e-05, + "loss": 2.146, + "step": 74320 + }, + { + "epoch": 125.57, + "learning_rate": 2.9706910306664337e-05, + "loss": 2.1499, + "step": 74340 + }, + { + "epoch": 125.61, + "learning_rate": 2.9665901204237085e-05, + "loss": 2.1461, + "step": 74360 + }, + { + "epoch": 125.64, + "learning_rate": 2.9624908484625957e-05, + "loss": 2.175, + "step": 74380 + }, + { + "epoch": 125.68, + "learning_rate": 2.9583932180858066e-05, + "loss": 2.1532, + "step": 74400 + }, + { + "epoch": 125.71, + "learning_rate": 2.9542972325947238e-05, + "loss": 2.1518, + "step": 74420 + }, + { + "epoch": 125.74, + "learning_rate": 2.9502028952894122e-05, + "loss": 2.1628, + "step": 74440 + }, + { + "epoch": 125.78, + "learning_rate": 2.9461102094686026e-05, + "loss": 2.154, + "step": 74460 + }, + { + "epoch": 125.81, + "learning_rate": 2.9420191784297014e-05, + "loss": 2.1549, + "step": 74480 + }, + { + "epoch": 125.84, + "learning_rate": 2.9379298054687747e-05, + "loss": 2.1616, + "step": 74500 + }, + { + "epoch": 125.88, + "learning_rate": 2.9338420938805577e-05, + "loss": 2.16, + "step": 74520 + }, + { + "epoch": 125.91, + "learning_rate": 2.9297560469584494e-05, + "loss": 2.1573, + "step": 74540 + }, + { + "epoch": 125.95, + "learning_rate": 2.9256716679944983e-05, + "loss": 2.1604, + "step": 74560 + }, + { + "epoch": 125.98, + "learning_rate": 2.9215889602794188e-05, + "loss": 2.1683, + "step": 74580 + }, + { + "epoch": 126.01, + "learning_rate": 2.917507927102573e-05, + "loss": 2.1477, + "step": 74600 + }, + { + "epoch": 126.05, + "learning_rate": 2.9134285717519772e-05, + "loss": 2.1189, + "step": 74620 + }, + { + "epoch": 126.08, + "learning_rate": 2.9093508975142896e-05, + "loss": 2.1301, + "step": 74640 + }, + { + "epoch": 126.11, + "learning_rate": 2.9052749076748264e-05, + "loss": 2.1255, + "step": 74660 + }, + { + "epoch": 126.15, + "learning_rate": 2.9012006055175322e-05, + "loss": 2.1163, + "step": 74680 + }, + { + "epoch": 126.18, + "learning_rate": 2.897127994325002e-05, + "loss": 2.1249, + "step": 74700 + }, + { + "epoch": 126.22, + "learning_rate": 2.8930570773784643e-05, + "loss": 2.1243, + "step": 74720 + }, + { + "epoch": 126.25, + "learning_rate": 2.8889878579577835e-05, + "loss": 2.1459, + "step": 74740 + }, + { + "epoch": 126.28, + "learning_rate": 2.884920339341457e-05, + "loss": 2.1288, + "step": 74760 + }, + { + "epoch": 126.32, + "learning_rate": 2.8808545248066087e-05, + "loss": 2.1312, + "step": 74780 + }, + { + "epoch": 126.35, + "learning_rate": 2.876790417628994e-05, + "loss": 2.1255, + "step": 74800 + }, + { + "epoch": 126.39, + "learning_rate": 2.87272802108299e-05, + "loss": 2.1151, + "step": 74820 + }, + { + "epoch": 126.42, + "learning_rate": 2.8686673384415956e-05, + "loss": 2.1279, + "step": 74840 + }, + { + "epoch": 126.45, + "learning_rate": 2.8646083729764306e-05, + "loss": 2.1323, + "step": 74860 + }, + { + "epoch": 126.49, + "learning_rate": 2.860551127957729e-05, + "loss": 2.1408, + "step": 74880 + }, + { + "epoch": 126.52, + "learning_rate": 2.8564956066543435e-05, + "loss": 2.1489, + "step": 74900 + }, + { + "epoch": 126.55, + "learning_rate": 2.8524418123337294e-05, + "loss": 2.1326, + "step": 74920 + }, + { + "epoch": 126.59, + "learning_rate": 2.8483897482619565e-05, + "loss": 2.1237, + "step": 74940 + }, + { + "epoch": 126.62, + "learning_rate": 2.8443394177037014e-05, + "loss": 2.1549, + "step": 74960 + }, + { + "epoch": 126.66, + "learning_rate": 2.8402908239222413e-05, + "loss": 2.1481, + "step": 74980 + }, + { + "epoch": 126.69, + "learning_rate": 2.8362439701794574e-05, + "loss": 2.1478, + "step": 75000 + }, + { + "epoch": 126.69, + "eval_loss": 2.51645827293396, + "eval_runtime": 47.3559, + "eval_samples_per_second": 20.884, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.004002848352361593, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03201635846077423, + "eval_tse_type": 0.0004039481567263266, + "step": 75000 + }, + { + "epoch": 126.72, + "learning_rate": 2.8321988597358207e-05, + "loss": 2.1553, + "step": 75020 + }, + { + "epoch": 126.76, + "learning_rate": 2.828155495850412e-05, + "loss": 2.1474, + "step": 75040 + }, + { + "epoch": 126.79, + "learning_rate": 2.8241138817808888e-05, + "loss": 2.1534, + "step": 75060 + }, + { + "epoch": 126.82, + "learning_rate": 2.8200740207835107e-05, + "loss": 2.1607, + "step": 75080 + }, + { + "epoch": 126.86, + "learning_rate": 2.81603591611312e-05, + "loss": 2.1395, + "step": 75100 + }, + { + "epoch": 126.89, + "learning_rate": 2.811999571023144e-05, + "loss": 2.1534, + "step": 75120 + }, + { + "epoch": 126.93, + "learning_rate": 2.807964988765596e-05, + "loss": 2.1645, + "step": 75140 + }, + { + "epoch": 126.96, + "learning_rate": 2.8039321725910595e-05, + "loss": 2.1695, + "step": 75160 + }, + { + "epoch": 126.99, + "learning_rate": 2.79990112574871e-05, + "loss": 2.1605, + "step": 75180 + }, + { + "epoch": 127.03, + "learning_rate": 2.7958718514862824e-05, + "loss": 2.1235, + "step": 75200 + }, + { + "epoch": 127.06, + "learning_rate": 2.7918443530500937e-05, + "loss": 2.1245, + "step": 75220 + }, + { + "epoch": 127.09, + "learning_rate": 2.787818633685025e-05, + "loss": 2.1131, + "step": 75240 + }, + { + "epoch": 127.13, + "learning_rate": 2.7837946966345262e-05, + "loss": 2.1248, + "step": 75260 + }, + { + "epoch": 127.16, + "learning_rate": 2.7797725451406133e-05, + "loss": 2.1318, + "step": 75280 + }, + { + "epoch": 127.2, + "learning_rate": 2.775752182443856e-05, + "loss": 2.1272, + "step": 75300 + }, + { + "epoch": 127.23, + "learning_rate": 2.771934497705564e-05, + "loss": 2.1192, + "step": 75320 + }, + { + "epoch": 127.26, + "learning_rate": 2.7679176324785106e-05, + "loss": 2.1222, + "step": 75340 + }, + { + "epoch": 127.3, + "learning_rate": 2.7639025655999024e-05, + "loss": 2.1199, + "step": 75360 + }, + { + "epoch": 127.33, + "learning_rate": 2.7598893003046088e-05, + "loss": 2.1319, + "step": 75380 + }, + { + "epoch": 127.36, + "learning_rate": 2.7558778398260442e-05, + "loss": 2.1273, + "step": 75400 + }, + { + "epoch": 127.4, + "learning_rate": 2.7518681873961727e-05, + "loss": 2.1405, + "step": 75420 + }, + { + "epoch": 127.43, + "learning_rate": 2.7478603462454944e-05, + "loss": 2.1495, + "step": 75440 + }, + { + "epoch": 127.47, + "learning_rate": 2.7438543196030542e-05, + "loss": 2.1265, + "step": 75460 + }, + { + "epoch": 127.5, + "learning_rate": 2.7398501106964427e-05, + "loss": 2.133, + "step": 75480 + }, + { + "epoch": 127.53, + "learning_rate": 2.7358477227517708e-05, + "loss": 2.1346, + "step": 75500 + }, + { + "epoch": 127.57, + "learning_rate": 2.731847158993694e-05, + "loss": 2.1308, + "step": 75520 + }, + { + "epoch": 127.6, + "learning_rate": 2.7278484226453926e-05, + "loss": 2.1398, + "step": 75540 + }, + { + "epoch": 127.64, + "learning_rate": 2.72385151692858e-05, + "loss": 2.1329, + "step": 75560 + }, + { + "epoch": 127.67, + "learning_rate": 2.7198564450634856e-05, + "loss": 2.1302, + "step": 75580 + }, + { + "epoch": 127.7, + "learning_rate": 2.7158632102688676e-05, + "loss": 2.1397, + "step": 75600 + }, + { + "epoch": 127.74, + "learning_rate": 2.7118718157620076e-05, + "loss": 2.1417, + "step": 75620 + }, + { + "epoch": 127.77, + "learning_rate": 2.708081698476026e-05, + "loss": 2.1446, + "step": 75640 + }, + { + "epoch": 127.8, + "learning_rate": 2.704093901778363e-05, + "loss": 2.1366, + "step": 75660 + }, + { + "epoch": 127.84, + "learning_rate": 2.7001079548507736e-05, + "loss": 2.1504, + "step": 75680 + }, + { + "epoch": 127.87, + "learning_rate": 2.6961238609046646e-05, + "loss": 2.1425, + "step": 75700 + }, + { + "epoch": 127.91, + "learning_rate": 2.6921416231499498e-05, + "loss": 2.1452, + "step": 75720 + }, + { + "epoch": 127.94, + "learning_rate": 2.6881612447950423e-05, + "loss": 2.1481, + "step": 75740 + }, + { + "epoch": 127.97, + "learning_rate": 2.684182729046863e-05, + "loss": 2.1524, + "step": 75760 + }, + { + "epoch": 128.01, + "learning_rate": 2.68020607911083e-05, + "loss": 2.1465, + "step": 75780 + }, + { + "epoch": 128.04, + "learning_rate": 2.676231298190861e-05, + "loss": 2.1048, + "step": 75800 + }, + { + "epoch": 128.07, + "learning_rate": 2.6722583894893582e-05, + "loss": 2.1034, + "step": 75820 + }, + { + "epoch": 128.11, + "learning_rate": 2.6682873562072298e-05, + "loss": 2.1174, + "step": 75840 + }, + { + "epoch": 128.14, + "learning_rate": 2.664318201543864e-05, + "loss": 2.1147, + "step": 75860 + }, + { + "epoch": 128.18, + "learning_rate": 2.660350928697134e-05, + "loss": 2.1131, + "step": 75880 + }, + { + "epoch": 128.21, + "learning_rate": 2.656385540863402e-05, + "loss": 2.1176, + "step": 75900 + }, + { + "epoch": 128.24, + "learning_rate": 2.6524220412375077e-05, + "loss": 2.1202, + "step": 75920 + }, + { + "epoch": 128.28, + "learning_rate": 2.6484604330127755e-05, + "loss": 2.1247, + "step": 75940 + }, + { + "epoch": 128.31, + "learning_rate": 2.6445007193809945e-05, + "loss": 2.1292, + "step": 75960 + }, + { + "epoch": 128.34, + "learning_rate": 2.6405429035324403e-05, + "loss": 2.1214, + "step": 75980 + }, + { + "epoch": 128.38, + "learning_rate": 2.6365869886558537e-05, + "loss": 2.1315, + "step": 76000 + }, + { + "epoch": 128.38, + "eval_loss": 2.513632297515869, + "eval_runtime": 47.4453, + "eval_samples_per_second": 20.845, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.0034698624169113494, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03335803456619464, + "eval_tse_type": 0.00045816481294236604, + "step": 76000 + }, + { + "epoch": 128.41, + "learning_rate": 2.6326329779384395e-05, + "loss": 2.125, + "step": 76020 + }, + { + "epoch": 128.45, + "learning_rate": 2.6286808745658766e-05, + "loss": 2.1255, + "step": 76040 + }, + { + "epoch": 128.48, + "learning_rate": 2.6247306817223007e-05, + "loss": 2.1307, + "step": 76060 + }, + { + "epoch": 128.51, + "learning_rate": 2.6207824025903137e-05, + "loss": 2.1304, + "step": 76080 + }, + { + "epoch": 128.55, + "learning_rate": 2.6168360403509707e-05, + "loss": 2.1369, + "step": 76100 + }, + { + "epoch": 128.58, + "learning_rate": 2.6128915981837815e-05, + "loss": 2.1337, + "step": 76120 + }, + { + "epoch": 128.61, + "learning_rate": 2.60894907926672e-05, + "loss": 2.1399, + "step": 76140 + }, + { + "epoch": 128.65, + "learning_rate": 2.6050084867761954e-05, + "loss": 2.1461, + "step": 76160 + }, + { + "epoch": 128.68, + "learning_rate": 2.6010698238870744e-05, + "loss": 2.1314, + "step": 76180 + }, + { + "epoch": 128.72, + "learning_rate": 2.597133093772666e-05, + "loss": 2.1386, + "step": 76200 + }, + { + "epoch": 128.75, + "learning_rate": 2.5931982996047255e-05, + "loss": 2.1401, + "step": 76220 + }, + { + "epoch": 128.78, + "learning_rate": 2.589265444553441e-05, + "loss": 2.1452, + "step": 76240 + }, + { + "epoch": 128.82, + "learning_rate": 2.5853345317874445e-05, + "loss": 2.1348, + "step": 76260 + }, + { + "epoch": 128.85, + "learning_rate": 2.581405564473801e-05, + "loss": 2.1289, + "step": 76280 + }, + { + "epoch": 128.89, + "learning_rate": 2.5774785457780103e-05, + "loss": 2.1505, + "step": 76300 + }, + { + "epoch": 128.92, + "learning_rate": 2.5735534788640008e-05, + "loss": 2.1362, + "step": 76320 + }, + { + "epoch": 128.95, + "learning_rate": 2.5696303668941226e-05, + "loss": 2.1446, + "step": 76340 + }, + { + "epoch": 128.99, + "learning_rate": 2.5657092130291638e-05, + "loss": 2.1474, + "step": 76360 + }, + { + "epoch": 129.02, + "learning_rate": 2.561790020428322e-05, + "loss": 2.1166, + "step": 76380 + }, + { + "epoch": 129.05, + "learning_rate": 2.5578727922492206e-05, + "loss": 2.1161, + "step": 76400 + }, + { + "epoch": 129.09, + "learning_rate": 2.5539575316479007e-05, + "loss": 2.1048, + "step": 76420 + }, + { + "epoch": 129.12, + "learning_rate": 2.550044241778817e-05, + "loss": 2.1115, + "step": 76440 + }, + { + "epoch": 129.16, + "learning_rate": 2.546132925794838e-05, + "loss": 2.1134, + "step": 76460 + }, + { + "epoch": 129.19, + "learning_rate": 2.5422235868472345e-05, + "loss": 2.1202, + "step": 76480 + }, + { + "epoch": 129.22, + "learning_rate": 2.5383162280856986e-05, + "loss": 2.1169, + "step": 76500 + }, + { + "epoch": 129.26, + "learning_rate": 2.5344108526583123e-05, + "loss": 2.1021, + "step": 76520 + }, + { + "epoch": 129.29, + "learning_rate": 2.5305074637115677e-05, + "loss": 2.1083, + "step": 76540 + }, + { + "epoch": 129.32, + "learning_rate": 2.5266060643903556e-05, + "loss": 2.1105, + "step": 76560 + }, + { + "epoch": 129.36, + "learning_rate": 2.522706657837962e-05, + "loss": 2.1248, + "step": 76580 + }, + { + "epoch": 129.39, + "learning_rate": 2.5188092471960712e-05, + "loss": 2.1235, + "step": 76600 + }, + { + "epoch": 129.43, + "learning_rate": 2.5149138356047525e-05, + "loss": 2.1311, + "step": 76620 + }, + { + "epoch": 129.46, + "learning_rate": 2.5110204262024706e-05, + "loss": 2.1225, + "step": 76640 + }, + { + "epoch": 129.49, + "learning_rate": 2.507129022126074e-05, + "loss": 2.1256, + "step": 76660 + }, + { + "epoch": 129.53, + "learning_rate": 2.5032396265107984e-05, + "loss": 2.1278, + "step": 76680 + }, + { + "epoch": 129.56, + "learning_rate": 2.499352242490259e-05, + "loss": 2.1273, + "step": 76700 + }, + { + "epoch": 129.59, + "learning_rate": 2.4954668731964496e-05, + "loss": 2.1306, + "step": 76720 + }, + { + "epoch": 129.63, + "learning_rate": 2.491583521759746e-05, + "loss": 2.1217, + "step": 76740 + }, + { + "epoch": 129.66, + "learning_rate": 2.4877021913088893e-05, + "loss": 2.1251, + "step": 76760 + }, + { + "epoch": 129.7, + "learning_rate": 2.483822884971e-05, + "loss": 2.129, + "step": 76780 + }, + { + "epoch": 129.73, + "learning_rate": 2.479945605871564e-05, + "loss": 2.1383, + "step": 76800 + }, + { + "epoch": 129.76, + "learning_rate": 2.4760703571344363e-05, + "loss": 2.131, + "step": 76820 + }, + { + "epoch": 129.8, + "learning_rate": 2.4721971418818357e-05, + "loss": 2.1305, + "step": 76840 + }, + { + "epoch": 129.83, + "learning_rate": 2.4683259632343362e-05, + "loss": 2.1351, + "step": 76860 + }, + { + "epoch": 129.86, + "learning_rate": 2.464456824310885e-05, + "loss": 2.1423, + "step": 76880 + }, + { + "epoch": 129.9, + "learning_rate": 2.460589728228771e-05, + "loss": 2.1416, + "step": 76900 + }, + { + "epoch": 129.93, + "learning_rate": 2.4567246781036457e-05, + "loss": 2.1385, + "step": 76920 + }, + { + "epoch": 129.97, + "learning_rate": 2.45286167704951e-05, + "loss": 2.1273, + "step": 76940 + }, + { + "epoch": 130.0, + "learning_rate": 2.4490007281787164e-05, + "loss": 2.1367, + "step": 76960 + }, + { + "epoch": 130.03, + "learning_rate": 2.4451418346019576e-05, + "loss": 2.0997, + "step": 76980 + }, + { + "epoch": 130.07, + "learning_rate": 2.4412849994282742e-05, + "loss": 2.1031, + "step": 77000 + }, + { + "epoch": 130.07, + "eval_loss": 2.5073578357696533, + "eval_runtime": 50.5928, + "eval_samples_per_second": 19.548, + "eval_steps_per_second": 0.119, + "eval_tse_ndup": 0.00484687591380415, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.0312417529380153, + "eval_tse_type": 0.0004991073247496958, + "step": 77000 + }, + { + "epoch": 130.1, + "learning_rate": 2.437430225765055e-05, + "loss": 2.1186, + "step": 77020 + }, + { + "epoch": 130.14, + "learning_rate": 2.4335775167180153e-05, + "loss": 2.1044, + "step": 77040 + }, + { + "epoch": 130.17, + "learning_rate": 2.4297268753912172e-05, + "loss": 2.105, + "step": 77060 + }, + { + "epoch": 130.2, + "learning_rate": 2.425878304887047e-05, + "loss": 2.1048, + "step": 77080 + }, + { + "epoch": 130.24, + "learning_rate": 2.422031808306236e-05, + "loss": 2.1283, + "step": 77100 + }, + { + "epoch": 130.27, + "learning_rate": 2.4181873887478312e-05, + "loss": 2.1099, + "step": 77120 + }, + { + "epoch": 130.3, + "learning_rate": 2.4143450493092146e-05, + "loss": 2.1001, + "step": 77140 + }, + { + "epoch": 130.34, + "learning_rate": 2.410504793086089e-05, + "loss": 2.1125, + "step": 77160 + }, + { + "epoch": 130.37, + "learning_rate": 2.40666662317248e-05, + "loss": 2.122, + "step": 77180 + }, + { + "epoch": 130.41, + "learning_rate": 2.4028305426607333e-05, + "loss": 2.1181, + "step": 77200 + }, + { + "epoch": 130.44, + "learning_rate": 2.3989965546415045e-05, + "loss": 2.1204, + "step": 77220 + }, + { + "epoch": 130.47, + "learning_rate": 2.395164662203775e-05, + "loss": 2.1112, + "step": 77240 + }, + { + "epoch": 130.51, + "learning_rate": 2.3913348684348264e-05, + "loss": 2.1231, + "step": 77260 + }, + { + "epoch": 130.54, + "learning_rate": 2.3875071764202563e-05, + "loss": 2.1233, + "step": 77280 + }, + { + "epoch": 130.57, + "learning_rate": 2.383681589243967e-05, + "loss": 2.1161, + "step": 77300 + }, + { + "epoch": 130.61, + "learning_rate": 2.3798581099881645e-05, + "loss": 2.1275, + "step": 77320 + }, + { + "epoch": 130.64, + "learning_rate": 2.376036741733359e-05, + "loss": 2.1325, + "step": 77340 + }, + { + "epoch": 130.68, + "learning_rate": 2.3722174875583548e-05, + "loss": 2.1176, + "step": 77360 + }, + { + "epoch": 130.71, + "learning_rate": 2.3684003505402574e-05, + "loss": 2.1128, + "step": 77380 + }, + { + "epoch": 130.74, + "learning_rate": 2.3645853337544654e-05, + "loss": 2.1256, + "step": 77400 + }, + { + "epoch": 130.78, + "learning_rate": 2.3607724402746684e-05, + "loss": 2.1345, + "step": 77420 + }, + { + "epoch": 130.81, + "learning_rate": 2.3569616731728462e-05, + "loss": 2.1257, + "step": 77440 + }, + { + "epoch": 130.84, + "learning_rate": 2.3531530355192643e-05, + "loss": 2.1385, + "step": 77460 + }, + { + "epoch": 130.88, + "learning_rate": 2.3493465303824767e-05, + "loss": 2.1472, + "step": 77480 + }, + { + "epoch": 130.91, + "learning_rate": 2.3455421608293106e-05, + "loss": 2.1227, + "step": 77500 + }, + { + "epoch": 130.95, + "learning_rate": 2.3417399299248803e-05, + "loss": 2.1366, + "step": 77520 + }, + { + "epoch": 130.98, + "learning_rate": 2.3379398407325747e-05, + "loss": 2.1243, + "step": 77540 + }, + { + "epoch": 131.01, + "learning_rate": 2.334141896314057e-05, + "loss": 2.1237, + "step": 77560 + }, + { + "epoch": 131.05, + "learning_rate": 2.3303460997292637e-05, + "loss": 2.1022, + "step": 77580 + }, + { + "epoch": 131.08, + "learning_rate": 2.326552454036395e-05, + "loss": 2.0869, + "step": 77600 + }, + { + "epoch": 131.11, + "learning_rate": 2.3227609622919287e-05, + "loss": 2.094, + "step": 77620 + }, + { + "epoch": 131.15, + "learning_rate": 2.3189716275505967e-05, + "loss": 2.1118, + "step": 77640 + }, + { + "epoch": 131.18, + "learning_rate": 2.3151844528654e-05, + "loss": 2.1038, + "step": 77660 + }, + { + "epoch": 131.22, + "learning_rate": 2.311399441287595e-05, + "loss": 2.0885, + "step": 77680 + }, + { + "epoch": 131.25, + "learning_rate": 2.307616595866699e-05, + "loss": 2.1073, + "step": 77700 + }, + { + "epoch": 131.28, + "learning_rate": 2.3038359196504828e-05, + "loss": 2.113, + "step": 77720 + }, + { + "epoch": 131.32, + "learning_rate": 2.300057415684964e-05, + "loss": 2.1125, + "step": 77740 + }, + { + "epoch": 131.35, + "learning_rate": 2.2962810870144225e-05, + "loss": 2.1142, + "step": 77760 + }, + { + "epoch": 131.39, + "learning_rate": 2.2925069366813717e-05, + "loss": 2.1199, + "step": 77780 + }, + { + "epoch": 131.42, + "learning_rate": 2.288734967726579e-05, + "loss": 2.1166, + "step": 77800 + }, + { + "epoch": 131.45, + "learning_rate": 2.2849651831890517e-05, + "loss": 2.1075, + "step": 77820 + }, + { + "epoch": 131.49, + "learning_rate": 2.2811975861060368e-05, + "loss": 2.1187, + "step": 77840 + }, + { + "epoch": 131.52, + "learning_rate": 2.2774321795130215e-05, + "loss": 2.1093, + "step": 77860 + }, + { + "epoch": 131.55, + "learning_rate": 2.2736689664437217e-05, + "loss": 2.1236, + "step": 77880 + }, + { + "epoch": 131.59, + "learning_rate": 2.2699079499300918e-05, + "loss": 2.1122, + "step": 77900 + }, + { + "epoch": 131.62, + "learning_rate": 2.2661491330023154e-05, + "loss": 2.1163, + "step": 77920 + }, + { + "epoch": 131.66, + "learning_rate": 2.2623925186888056e-05, + "loss": 2.123, + "step": 77940 + }, + { + "epoch": 131.69, + "learning_rate": 2.2586381100161923e-05, + "loss": 2.1282, + "step": 77960 + }, + { + "epoch": 131.72, + "learning_rate": 2.2548859100093407e-05, + "loss": 2.1131, + "step": 77980 + }, + { + "epoch": 131.76, + "learning_rate": 2.2511359216913304e-05, + "loss": 2.1234, + "step": 78000 + }, + { + "epoch": 131.76, + "eval_loss": 2.5044639110565186, + "eval_runtime": 47.4669, + "eval_samples_per_second": 20.836, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.0041822542590794895, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.0324066122092436, + "eval_tse_type": 0.0006477502527805865, + "step": 78000 + }, + { + "epoch": 131.79, + "learning_rate": 2.247388148083456e-05, + "loss": 2.1487, + "step": 78020 + }, + { + "epoch": 131.82, + "learning_rate": 2.2436425922052324e-05, + "loss": 2.1194, + "step": 78040 + }, + { + "epoch": 131.86, + "learning_rate": 2.2398992570743866e-05, + "loss": 2.1294, + "step": 78060 + }, + { + "epoch": 131.89, + "learning_rate": 2.2361581457068574e-05, + "loss": 2.1263, + "step": 78080 + }, + { + "epoch": 131.93, + "learning_rate": 2.2324192611167875e-05, + "loss": 2.1248, + "step": 78100 + }, + { + "epoch": 131.96, + "learning_rate": 2.228682606316529e-05, + "loss": 2.1209, + "step": 78120 + }, + { + "epoch": 131.99, + "learning_rate": 2.224948184316642e-05, + "loss": 2.1283, + "step": 78140 + }, + { + "epoch": 132.03, + "learning_rate": 2.2212159981258774e-05, + "loss": 2.0941, + "step": 78160 + }, + { + "epoch": 132.06, + "learning_rate": 2.2174860507511924e-05, + "loss": 2.0923, + "step": 78180 + }, + { + "epoch": 132.09, + "learning_rate": 2.2137583451977377e-05, + "loss": 2.0967, + "step": 78200 + }, + { + "epoch": 132.13, + "learning_rate": 2.210032884468861e-05, + "loss": 2.1014, + "step": 78220 + }, + { + "epoch": 132.16, + "learning_rate": 2.2063096715660947e-05, + "loss": 2.0929, + "step": 78240 + }, + { + "epoch": 132.2, + "learning_rate": 2.2025887094891657e-05, + "loss": 2.0848, + "step": 78260 + }, + { + "epoch": 132.23, + "learning_rate": 2.1988700012359862e-05, + "loss": 2.0958, + "step": 78280 + }, + { + "epoch": 132.26, + "learning_rate": 2.1951535498026527e-05, + "loss": 2.1127, + "step": 78300 + }, + { + "epoch": 132.3, + "learning_rate": 2.1914393581834418e-05, + "loss": 2.101, + "step": 78320 + }, + { + "epoch": 132.33, + "learning_rate": 2.1877274293708116e-05, + "loss": 2.1041, + "step": 78340 + }, + { + "epoch": 132.36, + "learning_rate": 2.1840177663553974e-05, + "loss": 2.1125, + "step": 78360 + }, + { + "epoch": 132.4, + "learning_rate": 2.180310372126005e-05, + "loss": 2.1062, + "step": 78380 + }, + { + "epoch": 132.43, + "learning_rate": 2.1766052496696153e-05, + "loss": 2.1232, + "step": 78400 + }, + { + "epoch": 132.47, + "learning_rate": 2.1729024019713794e-05, + "loss": 2.1115, + "step": 78420 + }, + { + "epoch": 132.5, + "learning_rate": 2.1692018320146153e-05, + "loss": 2.1086, + "step": 78440 + }, + { + "epoch": 132.53, + "learning_rate": 2.165503542780806e-05, + "loss": 2.104, + "step": 78460 + }, + { + "epoch": 132.57, + "learning_rate": 2.1618075372495916e-05, + "loss": 2.1271, + "step": 78480 + }, + { + "epoch": 132.6, + "learning_rate": 2.158113818398784e-05, + "loss": 2.1185, + "step": 78500 + }, + { + "epoch": 132.64, + "learning_rate": 2.1544223892043406e-05, + "loss": 2.1248, + "step": 78520 + }, + { + "epoch": 132.67, + "learning_rate": 2.150733252640381e-05, + "loss": 2.1102, + "step": 78540 + }, + { + "epoch": 132.7, + "learning_rate": 2.147046411679176e-05, + "loss": 2.1075, + "step": 78560 + }, + { + "epoch": 132.74, + "learning_rate": 2.1433618692911467e-05, + "loss": 2.1251, + "step": 78580 + }, + { + "epoch": 132.77, + "learning_rate": 2.139679628444864e-05, + "loss": 2.1113, + "step": 78600 + }, + { + "epoch": 132.8, + "learning_rate": 2.135999692107039e-05, + "loss": 2.1243, + "step": 78620 + }, + { + "epoch": 132.84, + "learning_rate": 2.1323220632425316e-05, + "loss": 2.1215, + "step": 78640 + }, + { + "epoch": 132.87, + "learning_rate": 2.128646744814342e-05, + "loss": 2.13, + "step": 78660 + }, + { + "epoch": 132.91, + "learning_rate": 2.124973739783609e-05, + "loss": 2.1184, + "step": 78680 + }, + { + "epoch": 132.94, + "learning_rate": 2.121303051109601e-05, + "loss": 2.1202, + "step": 78700 + }, + { + "epoch": 132.97, + "learning_rate": 2.117634681749731e-05, + "loss": 2.1065, + "step": 78720 + }, + { + "epoch": 133.01, + "learning_rate": 2.1139686346595377e-05, + "loss": 2.1247, + "step": 78740 + }, + { + "epoch": 133.04, + "learning_rate": 2.110304912792686e-05, + "loss": 2.0774, + "step": 78760 + }, + { + "epoch": 133.07, + "learning_rate": 2.1066435191009715e-05, + "loss": 2.0867, + "step": 78780 + }, + { + "epoch": 133.11, + "learning_rate": 2.102984456534314e-05, + "loss": 2.1156, + "step": 78800 + }, + { + "epoch": 133.14, + "learning_rate": 2.0993277280407548e-05, + "loss": 2.089, + "step": 78820 + }, + { + "epoch": 133.18, + "learning_rate": 2.0956733365664495e-05, + "loss": 2.0993, + "step": 78840 + }, + { + "epoch": 133.21, + "learning_rate": 2.0920212850556797e-05, + "loss": 2.0846, + "step": 78860 + }, + { + "epoch": 133.24, + "learning_rate": 2.0883715764508383e-05, + "loss": 2.0936, + "step": 78880 + }, + { + "epoch": 133.28, + "learning_rate": 2.0847242136924256e-05, + "loss": 2.1128, + "step": 78900 + }, + { + "epoch": 133.31, + "learning_rate": 2.0810791997190577e-05, + "loss": 2.093, + "step": 78920 + }, + { + "epoch": 133.34, + "learning_rate": 2.0774365374674565e-05, + "loss": 2.1066, + "step": 78940 + }, + { + "epoch": 133.38, + "learning_rate": 2.0737962298724512e-05, + "loss": 2.1072, + "step": 78960 + }, + { + "epoch": 133.41, + "learning_rate": 2.0701582798669676e-05, + "loss": 2.0962, + "step": 78980 + }, + { + "epoch": 133.45, + "learning_rate": 2.066522690382037e-05, + "loss": 2.1186, + "step": 79000 + }, + { + "epoch": 133.45, + "eval_loss": 2.498960256576538, + "eval_runtime": 48.7571, + "eval_samples_per_second": 20.284, + "eval_steps_per_second": 0.123, + "eval_tse_ndup": 0.003615034085210324, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03289195640099343, + "eval_tse_type": 0.00032311098298162106, + "step": 79000 + }, + { + "epoch": 133.48, + "learning_rate": 2.062889464346794e-05, + "loss": 2.1045, + "step": 79020 + }, + { + "epoch": 133.51, + "learning_rate": 2.0592586046884566e-05, + "loss": 2.107, + "step": 79040 + }, + { + "epoch": 133.55, + "learning_rate": 2.0556301143323458e-05, + "loss": 2.1035, + "step": 79060 + }, + { + "epoch": 133.58, + "learning_rate": 2.0520039962018693e-05, + "loss": 2.1125, + "step": 79080 + }, + { + "epoch": 133.61, + "learning_rate": 2.0483802532185286e-05, + "loss": 2.1083, + "step": 79100 + }, + { + "epoch": 133.65, + "learning_rate": 2.044758888301903e-05, + "loss": 2.121, + "step": 79120 + }, + { + "epoch": 133.68, + "learning_rate": 2.0411399043696627e-05, + "loss": 2.108, + "step": 79140 + }, + { + "epoch": 133.72, + "learning_rate": 2.0375233043375586e-05, + "loss": 2.1161, + "step": 79160 + }, + { + "epoch": 133.75, + "learning_rate": 2.033909091119419e-05, + "loss": 2.1127, + "step": 79180 + }, + { + "epoch": 133.78, + "learning_rate": 2.0302972676271524e-05, + "loss": 2.1105, + "step": 79200 + }, + { + "epoch": 133.82, + "learning_rate": 2.0266878367707347e-05, + "loss": 2.128, + "step": 79220 + }, + { + "epoch": 133.85, + "learning_rate": 2.0230808014582263e-05, + "loss": 2.1113, + "step": 79240 + }, + { + "epoch": 133.89, + "learning_rate": 2.0194761645957444e-05, + "loss": 2.1084, + "step": 79260 + }, + { + "epoch": 133.92, + "learning_rate": 2.015873929087482e-05, + "loss": 2.109, + "step": 79280 + }, + { + "epoch": 133.95, + "learning_rate": 2.012274097835695e-05, + "loss": 2.1202, + "step": 79300 + }, + { + "epoch": 133.99, + "learning_rate": 2.0086766737407032e-05, + "loss": 2.1194, + "step": 79320 + }, + { + "epoch": 134.02, + "learning_rate": 2.0050816597008864e-05, + "loss": 2.091, + "step": 79340 + }, + { + "epoch": 134.05, + "learning_rate": 2.001489058612679e-05, + "loss": 2.0843, + "step": 79360 + }, + { + "epoch": 134.09, + "learning_rate": 1.9978988733705807e-05, + "loss": 2.0809, + "step": 79380 + }, + { + "epoch": 134.12, + "learning_rate": 1.994311106867134e-05, + "loss": 2.0981, + "step": 79400 + }, + { + "epoch": 134.16, + "learning_rate": 1.9907257619929405e-05, + "loss": 2.0908, + "step": 79420 + }, + { + "epoch": 134.19, + "learning_rate": 1.9871428416366432e-05, + "loss": 2.0987, + "step": 79440 + }, + { + "epoch": 134.22, + "learning_rate": 1.983562348684942e-05, + "loss": 2.1047, + "step": 79460 + }, + { + "epoch": 134.26, + "learning_rate": 1.979984286022574e-05, + "loss": 2.102, + "step": 79480 + }, + { + "epoch": 134.29, + "learning_rate": 1.9764086565323177e-05, + "loss": 2.1032, + "step": 79500 + }, + { + "epoch": 134.32, + "learning_rate": 1.9728354630949936e-05, + "loss": 2.0998, + "step": 79520 + }, + { + "epoch": 134.36, + "learning_rate": 1.96926470858946e-05, + "loss": 2.0928, + "step": 79540 + }, + { + "epoch": 134.39, + "learning_rate": 1.9656963958926105e-05, + "loss": 2.105, + "step": 79560 + }, + { + "epoch": 134.43, + "learning_rate": 1.9621305278793656e-05, + "loss": 2.1176, + "step": 79580 + }, + { + "epoch": 134.46, + "learning_rate": 1.9585671074226858e-05, + "loss": 2.0935, + "step": 79600 + }, + { + "epoch": 134.49, + "learning_rate": 1.955006137393554e-05, + "loss": 2.0976, + "step": 79620 + }, + { + "epoch": 134.53, + "learning_rate": 1.951625488187545e-05, + "loss": 2.0994, + "step": 79640 + }, + { + "epoch": 134.56, + "learning_rate": 1.948069304742313e-05, + "loss": 2.1019, + "step": 79660 + }, + { + "epoch": 134.59, + "learning_rate": 1.944515580182522e-05, + "loss": 2.1038, + "step": 79680 + }, + { + "epoch": 134.63, + "learning_rate": 1.940964317371337e-05, + "loss": 2.1065, + "step": 79700 + }, + { + "epoch": 134.66, + "learning_rate": 1.9374155191699496e-05, + "loss": 2.09, + "step": 79720 + }, + { + "epoch": 134.7, + "learning_rate": 1.9338691884375605e-05, + "loss": 2.1042, + "step": 79740 + }, + { + "epoch": 134.73, + "learning_rate": 1.9303253280313872e-05, + "loss": 2.1115, + "step": 79760 + }, + { + "epoch": 134.76, + "learning_rate": 1.9267839408066507e-05, + "loss": 2.1009, + "step": 79780 + }, + { + "epoch": 134.8, + "learning_rate": 1.9232450296165838e-05, + "loss": 2.112, + "step": 79800 + }, + { + "epoch": 134.83, + "learning_rate": 1.919708597312424e-05, + "loss": 2.1046, + "step": 79820 + }, + { + "epoch": 134.86, + "learning_rate": 1.9161746467434104e-05, + "loss": 2.1017, + "step": 79840 + }, + { + "epoch": 134.9, + "learning_rate": 1.912643180756785e-05, + "loss": 2.1197, + "step": 79860 + }, + { + "epoch": 134.93, + "learning_rate": 1.9091142021977814e-05, + "loss": 2.124, + "step": 79880 + }, + { + "epoch": 134.97, + "learning_rate": 1.9055877139096402e-05, + "loss": 2.1029, + "step": 79900 + }, + { + "epoch": 135.0, + "learning_rate": 1.9020637187335844e-05, + "loss": 2.1145, + "step": 79920 + }, + { + "epoch": 135.03, + "learning_rate": 1.8985422195088347e-05, + "loss": 2.0876, + "step": 79940 + }, + { + "epoch": 135.07, + "learning_rate": 1.8950232190726003e-05, + "loss": 2.0874, + "step": 79960 + }, + { + "epoch": 135.1, + "learning_rate": 1.8915067202600755e-05, + "loss": 2.0823, + "step": 79980 + }, + { + "epoch": 135.14, + "learning_rate": 1.8879927259044416e-05, + "loss": 2.095, + "step": 80000 + }, + { + "epoch": 135.14, + "eval_loss": 2.4958672523498535, + "eval_runtime": 47.6442, + "eval_samples_per_second": 20.758, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.0038424369886962398, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03117974620576516, + "eval_tse_type": 0.000579288843950118, + "step": 80000 + }, + { + "epoch": 135.17, + "learning_rate": 1.8844812388368556e-05, + "loss": 2.0736, + "step": 80020 + }, + { + "epoch": 135.2, + "learning_rate": 1.8809722618864657e-05, + "loss": 2.0829, + "step": 80040 + }, + { + "epoch": 135.24, + "learning_rate": 1.877465797880386e-05, + "loss": 2.0836, + "step": 80060 + }, + { + "epoch": 135.27, + "learning_rate": 1.873961849643714e-05, + "loss": 2.1014, + "step": 80080 + }, + { + "epoch": 135.3, + "learning_rate": 1.8704604199995156e-05, + "loss": 2.0869, + "step": 80100 + }, + { + "epoch": 135.34, + "learning_rate": 1.8669615117688316e-05, + "loss": 2.1022, + "step": 80120 + }, + { + "epoch": 135.37, + "learning_rate": 1.8634651277706693e-05, + "loss": 2.0988, + "step": 80140 + }, + { + "epoch": 135.41, + "learning_rate": 1.8599712708219992e-05, + "loss": 2.1048, + "step": 80160 + }, + { + "epoch": 135.44, + "learning_rate": 1.8564799437377605e-05, + "loss": 2.0974, + "step": 80180 + }, + { + "epoch": 135.47, + "learning_rate": 1.8529911493308526e-05, + "loss": 2.0889, + "step": 80200 + }, + { + "epoch": 135.51, + "learning_rate": 1.8495048904121338e-05, + "loss": 2.0966, + "step": 80220 + }, + { + "epoch": 135.54, + "learning_rate": 1.84602116979042e-05, + "loss": 2.0896, + "step": 80240 + }, + { + "epoch": 135.57, + "learning_rate": 1.8425399902724817e-05, + "loss": 2.0981, + "step": 80260 + }, + { + "epoch": 135.61, + "learning_rate": 1.8390613546630448e-05, + "loss": 2.1059, + "step": 80280 + }, + { + "epoch": 135.64, + "learning_rate": 1.835585265764779e-05, + "loss": 2.1071, + "step": 80300 + }, + { + "epoch": 135.68, + "learning_rate": 1.832111726378308e-05, + "loss": 2.0962, + "step": 80320 + }, + { + "epoch": 135.71, + "learning_rate": 1.8286407393022008e-05, + "loss": 2.1241, + "step": 80340 + }, + { + "epoch": 135.74, + "learning_rate": 1.8251723073329685e-05, + "loss": 2.1039, + "step": 80360 + }, + { + "epoch": 135.78, + "learning_rate": 1.8217064332650652e-05, + "loss": 2.1091, + "step": 80380 + }, + { + "epoch": 135.81, + "learning_rate": 1.8182431198908783e-05, + "loss": 2.0984, + "step": 80400 + }, + { + "epoch": 135.84, + "learning_rate": 1.8147823700007444e-05, + "loss": 2.1158, + "step": 80420 + }, + { + "epoch": 135.88, + "learning_rate": 1.8113241863829204e-05, + "loss": 2.1005, + "step": 80440 + }, + { + "epoch": 135.91, + "learning_rate": 1.8078685718236054e-05, + "loss": 2.1054, + "step": 80460 + }, + { + "epoch": 135.95, + "learning_rate": 1.8044155291069255e-05, + "loss": 2.1138, + "step": 80480 + }, + { + "epoch": 135.98, + "learning_rate": 1.8009650610149343e-05, + "loss": 2.0984, + "step": 80500 + }, + { + "epoch": 136.01, + "learning_rate": 1.7975171703276133e-05, + "loss": 2.0964, + "step": 80520 + }, + { + "epoch": 136.05, + "learning_rate": 1.794071859822862e-05, + "loss": 2.0772, + "step": 80540 + }, + { + "epoch": 136.08, + "learning_rate": 1.7906291322765097e-05, + "loss": 2.079, + "step": 80560 + }, + { + "epoch": 136.11, + "learning_rate": 1.787188990462296e-05, + "loss": 2.0747, + "step": 80580 + }, + { + "epoch": 136.15, + "learning_rate": 1.7837514371518837e-05, + "loss": 2.0938, + "step": 80600 + }, + { + "epoch": 136.18, + "learning_rate": 1.7803164751148432e-05, + "loss": 2.0796, + "step": 80620 + }, + { + "epoch": 136.22, + "learning_rate": 1.7768841071186676e-05, + "loss": 2.0693, + "step": 80640 + }, + { + "epoch": 136.25, + "learning_rate": 1.7734543359287485e-05, + "loss": 2.0829, + "step": 80660 + }, + { + "epoch": 136.28, + "learning_rate": 1.7700271643083925e-05, + "loss": 2.0851, + "step": 80680 + }, + { + "epoch": 136.32, + "learning_rate": 1.7666025950188097e-05, + "loss": 2.0852, + "step": 80700 + }, + { + "epoch": 136.35, + "learning_rate": 1.7631806308191145e-05, + "loss": 2.0868, + "step": 80720 + }, + { + "epoch": 136.39, + "learning_rate": 1.7597612744663224e-05, + "loss": 2.0901, + "step": 80740 + }, + { + "epoch": 136.42, + "learning_rate": 1.7563445287153424e-05, + "loss": 2.0945, + "step": 80760 + }, + { + "epoch": 136.45, + "learning_rate": 1.7529303963189913e-05, + "loss": 2.0927, + "step": 80780 + }, + { + "epoch": 136.49, + "learning_rate": 1.7495188800279695e-05, + "loss": 2.0838, + "step": 80800 + }, + { + "epoch": 136.52, + "learning_rate": 1.7461099825908754e-05, + "loss": 2.0936, + "step": 80820 + }, + { + "epoch": 136.55, + "learning_rate": 1.7427037067541955e-05, + "loss": 2.1008, + "step": 80840 + }, + { + "epoch": 136.59, + "learning_rate": 1.7393000552623056e-05, + "loss": 2.0923, + "step": 80860 + }, + { + "epoch": 136.62, + "learning_rate": 1.7358990308574676e-05, + "loss": 2.0969, + "step": 80880 + }, + { + "epoch": 136.66, + "learning_rate": 1.732500636279822e-05, + "loss": 2.0988, + "step": 80900 + }, + { + "epoch": 136.69, + "learning_rate": 1.7291048742673954e-05, + "loss": 2.0989, + "step": 80920 + }, + { + "epoch": 136.72, + "learning_rate": 1.7257117475560923e-05, + "loss": 2.1037, + "step": 80940 + }, + { + "epoch": 136.76, + "learning_rate": 1.7223212588796933e-05, + "loss": 2.1032, + "step": 80960 + }, + { + "epoch": 136.79, + "learning_rate": 1.718933410969854e-05, + "loss": 2.1054, + "step": 80980 + }, + { + "epoch": 136.82, + "learning_rate": 1.7155482065561024e-05, + "loss": 2.0961, + "step": 81000 + }, + { + "epoch": 136.82, + "eval_loss": 2.489452362060547, + "eval_runtime": 47.4712, + "eval_samples_per_second": 20.834, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.003943252625860958, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03198987826687528, + "eval_tse_type": 0.00048107834873431016, + "step": 81000 + }, + { + "epoch": 136.86, + "learning_rate": 1.7121656483658383e-05, + "loss": 2.1133, + "step": 81020 + }, + { + "epoch": 136.89, + "learning_rate": 1.7087857391243246e-05, + "loss": 2.1203, + "step": 81040 + }, + { + "epoch": 136.93, + "learning_rate": 1.7054084815546933e-05, + "loss": 2.1088, + "step": 81060 + }, + { + "epoch": 136.96, + "learning_rate": 1.7020338783779414e-05, + "loss": 2.1037, + "step": 81080 + }, + { + "epoch": 136.99, + "learning_rate": 1.698661932312926e-05, + "loss": 2.1095, + "step": 81100 + }, + { + "epoch": 137.03, + "learning_rate": 1.6952926460763636e-05, + "loss": 2.0793, + "step": 81120 + }, + { + "epoch": 137.06, + "learning_rate": 1.6919260223828226e-05, + "loss": 2.0671, + "step": 81140 + }, + { + "epoch": 137.09, + "learning_rate": 1.6885620639447375e-05, + "loss": 2.0802, + "step": 81160 + }, + { + "epoch": 137.13, + "learning_rate": 1.6852007734723846e-05, + "loss": 2.0895, + "step": 81180 + }, + { + "epoch": 137.16, + "learning_rate": 1.6818421536738953e-05, + "loss": 2.0869, + "step": 81200 + }, + { + "epoch": 137.2, + "learning_rate": 1.6784862072552504e-05, + "loss": 2.0769, + "step": 81220 + }, + { + "epoch": 137.23, + "learning_rate": 1.6751329369202745e-05, + "loss": 2.0735, + "step": 81240 + }, + { + "epoch": 137.26, + "learning_rate": 1.6717823453706382e-05, + "loss": 2.085, + "step": 81260 + }, + { + "epoch": 137.3, + "learning_rate": 1.668434435305849e-05, + "loss": 2.0848, + "step": 81280 + }, + { + "epoch": 137.33, + "learning_rate": 1.6650892094232624e-05, + "loss": 2.0724, + "step": 81300 + }, + { + "epoch": 137.36, + "learning_rate": 1.661746670418063e-05, + "loss": 2.0938, + "step": 81320 + }, + { + "epoch": 137.4, + "learning_rate": 1.6584068209832743e-05, + "loss": 2.1014, + "step": 81340 + }, + { + "epoch": 137.43, + "learning_rate": 1.655069663809754e-05, + "loss": 2.0856, + "step": 81360 + }, + { + "epoch": 137.47, + "learning_rate": 1.6517352015861892e-05, + "loss": 2.0853, + "step": 81380 + }, + { + "epoch": 137.5, + "learning_rate": 1.648403436999097e-05, + "loss": 2.1067, + "step": 81400 + }, + { + "epoch": 137.53, + "learning_rate": 1.6450743727328167e-05, + "loss": 2.1053, + "step": 81420 + }, + { + "epoch": 137.57, + "learning_rate": 1.641748011469517e-05, + "loss": 2.0821, + "step": 81440 + }, + { + "epoch": 137.6, + "learning_rate": 1.6384243558891877e-05, + "loss": 2.0904, + "step": 81460 + }, + { + "epoch": 137.64, + "learning_rate": 1.6351034086696386e-05, + "loss": 2.0979, + "step": 81480 + }, + { + "epoch": 137.67, + "learning_rate": 1.6317851724864926e-05, + "loss": 2.0905, + "step": 81500 + }, + { + "epoch": 137.7, + "learning_rate": 1.6284696500131975e-05, + "loss": 2.0921, + "step": 81520 + }, + { + "epoch": 137.74, + "learning_rate": 1.62515684392101e-05, + "loss": 2.1098, + "step": 81540 + }, + { + "epoch": 137.77, + "learning_rate": 1.6218467568789946e-05, + "loss": 2.085, + "step": 81560 + }, + { + "epoch": 137.8, + "learning_rate": 1.6185393915540308e-05, + "loss": 2.0945, + "step": 81580 + }, + { + "epoch": 137.84, + "learning_rate": 1.6152347506108023e-05, + "loss": 2.0968, + "step": 81600 + }, + { + "epoch": 137.87, + "learning_rate": 1.6119328367118008e-05, + "loss": 2.1083, + "step": 81620 + }, + { + "epoch": 137.91, + "learning_rate": 1.608633652517315e-05, + "loss": 2.0924, + "step": 81640 + }, + { + "epoch": 137.94, + "learning_rate": 1.605337200685439e-05, + "loss": 2.0951, + "step": 81660 + }, + { + "epoch": 137.97, + "learning_rate": 1.6020434838720684e-05, + "loss": 2.0926, + "step": 81680 + }, + { + "epoch": 138.01, + "learning_rate": 1.5987525047308864e-05, + "loss": 2.089, + "step": 81700 + }, + { + "epoch": 138.04, + "learning_rate": 1.5954642659133778e-05, + "loss": 2.0787, + "step": 81720 + }, + { + "epoch": 138.07, + "learning_rate": 1.5921787700688166e-05, + "loss": 2.0714, + "step": 81740 + }, + { + "epoch": 138.11, + "learning_rate": 1.58889601984427e-05, + "loss": 2.0713, + "step": 81760 + }, + { + "epoch": 138.14, + "learning_rate": 1.5856160178845857e-05, + "loss": 2.067, + "step": 81780 + }, + { + "epoch": 138.18, + "learning_rate": 1.5825025640101894e-05, + "loss": 2.0735, + "step": 81800 + }, + { + "epoch": 138.21, + "learning_rate": 1.5792279287658634e-05, + "loss": 2.0742, + "step": 81820 + }, + { + "epoch": 138.24, + "learning_rate": 1.5759560495758075e-05, + "loss": 2.0855, + "step": 81840 + }, + { + "epoch": 138.28, + "learning_rate": 1.5726869290761158e-05, + "loss": 2.084, + "step": 81860 + }, + { + "epoch": 138.31, + "learning_rate": 1.5694205699006615e-05, + "loss": 2.0832, + "step": 81880 + }, + { + "epoch": 138.34, + "learning_rate": 1.56615697468109e-05, + "loss": 2.0875, + "step": 81900 + }, + { + "epoch": 138.38, + "learning_rate": 1.5628961460468234e-05, + "loss": 2.0844, + "step": 81920 + }, + { + "epoch": 138.41, + "learning_rate": 1.5596380866250465e-05, + "loss": 2.0827, + "step": 81940 + }, + { + "epoch": 138.45, + "learning_rate": 1.5563827990407265e-05, + "loss": 2.0701, + "step": 81960 + }, + { + "epoch": 138.48, + "learning_rate": 1.553130285916584e-05, + "loss": 2.074, + "step": 81980 + }, + { + "epoch": 138.51, + "learning_rate": 1.5498805498731144e-05, + "loss": 2.0887, + "step": 82000 + }, + { + "epoch": 138.51, + "eval_loss": 2.4867608547210693, + "eval_runtime": 50.3039, + "eval_samples_per_second": 19.66, + "eval_steps_per_second": 0.119, + "eval_tse_ndup": 0.0037981136751464254, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.031081833727716383, + "eval_tse_type": 0.0005038057521626784, + "step": 82000 + }, + { + "epoch": 138.55, + "learning_rate": 1.546633593528566e-05, + "loss": 2.0885, + "step": 82020 + }, + { + "epoch": 138.58, + "learning_rate": 1.5433894194989575e-05, + "loss": 2.0813, + "step": 82040 + }, + { + "epoch": 138.61, + "learning_rate": 1.540148030398061e-05, + "loss": 2.0962, + "step": 82060 + }, + { + "epoch": 138.65, + "learning_rate": 1.5369094288374026e-05, + "loss": 2.0909, + "step": 82080 + }, + { + "epoch": 138.68, + "learning_rate": 1.5336736174262667e-05, + "loss": 2.0864, + "step": 82100 + }, + { + "epoch": 138.72, + "learning_rate": 1.5304405987716876e-05, + "loss": 2.0911, + "step": 82120 + }, + { + "epoch": 138.75, + "learning_rate": 1.5272103754784517e-05, + "loss": 2.0987, + "step": 82140 + }, + { + "epoch": 138.78, + "learning_rate": 1.5239829501490871e-05, + "loss": 2.0966, + "step": 82160 + }, + { + "epoch": 138.82, + "learning_rate": 1.520758325383877e-05, + "loss": 2.0958, + "step": 82180 + }, + { + "epoch": 138.85, + "learning_rate": 1.5175365037808432e-05, + "loss": 2.09, + "step": 82200 + }, + { + "epoch": 138.89, + "learning_rate": 1.5143174879357452e-05, + "loss": 2.0863, + "step": 82220 + }, + { + "epoch": 138.92, + "learning_rate": 1.5111012804420887e-05, + "loss": 2.1026, + "step": 82240 + }, + { + "epoch": 138.95, + "learning_rate": 1.5078878838911137e-05, + "loss": 2.1075, + "step": 82260 + }, + { + "epoch": 138.99, + "learning_rate": 1.5046773008717969e-05, + "loss": 2.101, + "step": 82280 + }, + { + "epoch": 139.02, + "learning_rate": 1.501469533970844e-05, + "loss": 2.069, + "step": 82300 + }, + { + "epoch": 139.05, + "learning_rate": 1.4982645857726946e-05, + "loss": 2.0658, + "step": 82320 + }, + { + "epoch": 139.09, + "learning_rate": 1.495062458859523e-05, + "loss": 2.0736, + "step": 82340 + }, + { + "epoch": 139.12, + "learning_rate": 1.49186315581122e-05, + "loss": 2.085, + "step": 82360 + }, + { + "epoch": 139.16, + "learning_rate": 1.4886666792054083e-05, + "loss": 2.073, + "step": 82380 + }, + { + "epoch": 139.19, + "learning_rate": 1.4854730316174303e-05, + "loss": 2.0727, + "step": 82400 + }, + { + "epoch": 139.22, + "learning_rate": 1.482282215620352e-05, + "loss": 2.0818, + "step": 82420 + }, + { + "epoch": 139.26, + "learning_rate": 1.479094233784954e-05, + "loss": 2.0733, + "step": 82440 + }, + { + "epoch": 139.29, + "learning_rate": 1.475909088679735e-05, + "loss": 2.0868, + "step": 82460 + }, + { + "epoch": 139.32, + "learning_rate": 1.4727267828709134e-05, + "loss": 2.0854, + "step": 82480 + }, + { + "epoch": 139.36, + "learning_rate": 1.4695473189224112e-05, + "loss": 2.0749, + "step": 82500 + }, + { + "epoch": 139.39, + "learning_rate": 1.466370699395868e-05, + "loss": 2.0767, + "step": 82520 + }, + { + "epoch": 139.43, + "learning_rate": 1.463196926850624e-05, + "loss": 2.0768, + "step": 82540 + }, + { + "epoch": 139.46, + "learning_rate": 1.4600260038437375e-05, + "loss": 2.0858, + "step": 82560 + }, + { + "epoch": 139.49, + "learning_rate": 1.4568579329299582e-05, + "loss": 2.0794, + "step": 82580 + }, + { + "epoch": 139.53, + "learning_rate": 1.4536927166617454e-05, + "loss": 2.0875, + "step": 82600 + }, + { + "epoch": 139.56, + "learning_rate": 1.4505303575892564e-05, + "loss": 2.0768, + "step": 82620 + }, + { + "epoch": 139.59, + "learning_rate": 1.447370858260348e-05, + "loss": 2.0823, + "step": 82640 + }, + { + "epoch": 139.63, + "learning_rate": 1.4442142212205718e-05, + "loss": 2.0902, + "step": 82660 + }, + { + "epoch": 139.66, + "learning_rate": 1.4410604490131696e-05, + "loss": 2.0906, + "step": 82680 + }, + { + "epoch": 139.7, + "learning_rate": 1.4379095441790846e-05, + "loss": 2.0819, + "step": 82700 + }, + { + "epoch": 139.73, + "learning_rate": 1.4347615092569389e-05, + "loss": 2.0879, + "step": 82720 + }, + { + "epoch": 139.76, + "learning_rate": 1.4316163467830495e-05, + "loss": 2.0897, + "step": 82740 + }, + { + "epoch": 139.8, + "learning_rate": 1.4284740592914164e-05, + "loss": 2.1008, + "step": 82760 + }, + { + "epoch": 139.83, + "learning_rate": 1.4253346493137248e-05, + "loss": 2.088, + "step": 82780 + }, + { + "epoch": 139.86, + "learning_rate": 1.4221981193793415e-05, + "loss": 2.0853, + "step": 82800 + }, + { + "epoch": 139.9, + "learning_rate": 1.4190644720153085e-05, + "loss": 2.0898, + "step": 82820 + }, + { + "epoch": 139.93, + "learning_rate": 1.4159337097463515e-05, + "loss": 2.0775, + "step": 82840 + }, + { + "epoch": 139.97, + "learning_rate": 1.412805835094868e-05, + "loss": 2.1072, + "step": 82860 + }, + { + "epoch": 140.0, + "learning_rate": 1.4096808505809311e-05, + "loss": 2.0776, + "step": 82880 + }, + { + "epoch": 140.03, + "learning_rate": 1.4065587587222839e-05, + "loss": 2.0555, + "step": 82900 + }, + { + "epoch": 140.07, + "learning_rate": 1.4034395620343394e-05, + "loss": 2.0714, + "step": 82920 + }, + { + "epoch": 140.1, + "learning_rate": 1.40032326303018e-05, + "loss": 2.0684, + "step": 82940 + }, + { + "epoch": 140.14, + "learning_rate": 1.3972098642205472e-05, + "loss": 2.0549, + "step": 82960 + }, + { + "epoch": 140.17, + "learning_rate": 1.394099368113853e-05, + "loss": 2.0696, + "step": 82980 + }, + { + "epoch": 140.2, + "learning_rate": 1.390991777216168e-05, + "loss": 2.0607, + "step": 83000 + }, + { + "epoch": 140.2, + "eval_loss": 2.4845190048217773, + "eval_runtime": 50.7394, + "eval_samples_per_second": 19.492, + "eval_steps_per_second": 0.118, + "eval_tse_ndup": 0.004089683705445265, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.0313907972493957, + "eval_tse_type": 0.0005622169893882281, + "step": 83000 + }, + { + "epoch": 140.24, + "learning_rate": 1.3878870940312227e-05, + "loss": 2.0765, + "step": 83020 + }, + { + "epoch": 140.27, + "learning_rate": 1.3847853210604017e-05, + "loss": 2.0747, + "step": 83040 + }, + { + "epoch": 140.3, + "learning_rate": 1.3816864608027475e-05, + "loss": 2.0634, + "step": 83060 + }, + { + "epoch": 140.34, + "learning_rate": 1.37859051575496e-05, + "loss": 2.0887, + "step": 83080 + }, + { + "epoch": 140.37, + "learning_rate": 1.3754974884113819e-05, + "loss": 2.0808, + "step": 83100 + }, + { + "epoch": 140.41, + "learning_rate": 1.3724073812640109e-05, + "loss": 2.0821, + "step": 83120 + }, + { + "epoch": 140.44, + "learning_rate": 1.369320196802491e-05, + "loss": 2.0861, + "step": 83140 + }, + { + "epoch": 140.47, + "learning_rate": 1.366235937514112e-05, + "loss": 2.0711, + "step": 83160 + }, + { + "epoch": 140.51, + "learning_rate": 1.363154605883803e-05, + "loss": 2.0828, + "step": 83180 + }, + { + "epoch": 140.54, + "learning_rate": 1.3600762043941374e-05, + "loss": 2.0885, + "step": 83200 + }, + { + "epoch": 140.57, + "learning_rate": 1.3570007355253317e-05, + "loss": 2.0764, + "step": 83220 + }, + { + "epoch": 140.61, + "learning_rate": 1.3539282017552318e-05, + "loss": 2.0746, + "step": 83240 + }, + { + "epoch": 140.64, + "learning_rate": 1.350858605559323e-05, + "loss": 2.0943, + "step": 83260 + }, + { + "epoch": 140.68, + "learning_rate": 1.347791949410725e-05, + "loss": 2.075, + "step": 83280 + }, + { + "epoch": 140.71, + "learning_rate": 1.3447282357801877e-05, + "loss": 2.0678, + "step": 83300 + }, + { + "epoch": 140.74, + "learning_rate": 1.3416674671360874e-05, + "loss": 2.0791, + "step": 83320 + }, + { + "epoch": 140.78, + "learning_rate": 1.3386096459444314e-05, + "loss": 2.0869, + "step": 83340 + }, + { + "epoch": 140.81, + "learning_rate": 1.3355547746688513e-05, + "loss": 2.0998, + "step": 83360 + }, + { + "epoch": 140.84, + "learning_rate": 1.332502855770601e-05, + "loss": 2.0936, + "step": 83380 + }, + { + "epoch": 140.88, + "learning_rate": 1.3294538917085586e-05, + "loss": 2.089, + "step": 83400 + }, + { + "epoch": 140.91, + "learning_rate": 1.3264078849392141e-05, + "loss": 2.086, + "step": 83420 + }, + { + "epoch": 140.95, + "learning_rate": 1.3233648379166875e-05, + "loss": 2.0927, + "step": 83440 + }, + { + "epoch": 140.98, + "learning_rate": 1.320324753092701e-05, + "loss": 2.0846, + "step": 83460 + }, + { + "epoch": 141.01, + "learning_rate": 1.3172876329165978e-05, + "loss": 2.0788, + "step": 83480 + }, + { + "epoch": 141.05, + "learning_rate": 1.3142534798353318e-05, + "loss": 2.0621, + "step": 83500 + }, + { + "epoch": 141.08, + "learning_rate": 1.3112222962934639e-05, + "loss": 2.0617, + "step": 83520 + }, + { + "epoch": 141.11, + "learning_rate": 1.3081940847331659e-05, + "loss": 2.0751, + "step": 83540 + }, + { + "epoch": 141.15, + "learning_rate": 1.3051688475942109e-05, + "loss": 2.0809, + "step": 83560 + }, + { + "epoch": 141.18, + "learning_rate": 1.3021465873139782e-05, + "loss": 2.0623, + "step": 83580 + }, + { + "epoch": 141.22, + "learning_rate": 1.299127306327449e-05, + "loss": 2.0517, + "step": 83600 + }, + { + "epoch": 141.25, + "learning_rate": 1.2961110070672034e-05, + "loss": 2.0704, + "step": 83620 + }, + { + "epoch": 141.28, + "learning_rate": 1.293097691963419e-05, + "loss": 2.0659, + "step": 83640 + }, + { + "epoch": 141.32, + "learning_rate": 1.2900873634438699e-05, + "loss": 2.067, + "step": 83660 + }, + { + "epoch": 141.35, + "learning_rate": 1.2870800239339236e-05, + "loss": 2.0792, + "step": 83680 + }, + { + "epoch": 141.39, + "learning_rate": 1.2840756758565381e-05, + "loss": 2.0955, + "step": 83700 + }, + { + "epoch": 141.42, + "learning_rate": 1.2810743216322623e-05, + "loss": 2.0694, + "step": 83720 + }, + { + "epoch": 141.45, + "learning_rate": 1.2780759636792344e-05, + "loss": 2.068, + "step": 83740 + }, + { + "epoch": 141.49, + "learning_rate": 1.2750806044131758e-05, + "loss": 2.0703, + "step": 83760 + }, + { + "epoch": 141.52, + "learning_rate": 1.2720882462473943e-05, + "loss": 2.0855, + "step": 83780 + }, + { + "epoch": 141.55, + "learning_rate": 1.2690988915927788e-05, + "loss": 2.0764, + "step": 83800 + }, + { + "epoch": 141.59, + "learning_rate": 1.2661125428577997e-05, + "loss": 2.0934, + "step": 83820 + }, + { + "epoch": 141.62, + "learning_rate": 1.2631292024485009e-05, + "loss": 2.0795, + "step": 83840 + }, + { + "epoch": 141.66, + "learning_rate": 1.2601488727685078e-05, + "loss": 2.0796, + "step": 83860 + }, + { + "epoch": 141.69, + "learning_rate": 1.2571715562190183e-05, + "loss": 2.0791, + "step": 83880 + }, + { + "epoch": 141.72, + "learning_rate": 1.2541972551988024e-05, + "loss": 2.0793, + "step": 83900 + }, + { + "epoch": 141.76, + "learning_rate": 1.2512259721042019e-05, + "loss": 2.067, + "step": 83920 + }, + { + "epoch": 141.79, + "learning_rate": 1.248257709329122e-05, + "loss": 2.0877, + "step": 83940 + }, + { + "epoch": 141.82, + "learning_rate": 1.2452924692650442e-05, + "loss": 2.0859, + "step": 83960 + }, + { + "epoch": 141.86, + "learning_rate": 1.242330254301004e-05, + "loss": 2.0762, + "step": 83980 + }, + { + "epoch": 141.89, + "learning_rate": 1.2393710668236058e-05, + "loss": 2.073, + "step": 84000 + }, + { + "epoch": 141.89, + "eval_loss": 2.4791016578674316, + "eval_runtime": 47.4414, + "eval_samples_per_second": 20.847, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.00437610540721674, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03073634182479321, + "eval_tse_type": 0.000670824119886145, + "step": 84000 + }, + { + "epoch": 141.93, + "learning_rate": 1.2364149092170146e-05, + "loss": 2.0908, + "step": 84020 + }, + { + "epoch": 141.96, + "learning_rate": 1.2334617838629525e-05, + "loss": 2.0951, + "step": 84040 + }, + { + "epoch": 141.99, + "learning_rate": 1.2305116931407008e-05, + "loss": 2.0695, + "step": 84060 + }, + { + "epoch": 142.03, + "learning_rate": 1.2275646394270907e-05, + "loss": 2.0729, + "step": 84080 + }, + { + "epoch": 142.06, + "learning_rate": 1.2246206250965125e-05, + "loss": 2.0615, + "step": 84100 + }, + { + "epoch": 142.09, + "learning_rate": 1.2216796525209056e-05, + "loss": 2.0634, + "step": 84120 + }, + { + "epoch": 142.13, + "learning_rate": 1.2187417240697591e-05, + "loss": 2.0627, + "step": 84140 + }, + { + "epoch": 142.16, + "learning_rate": 1.2158068421101048e-05, + "loss": 2.052, + "step": 84160 + }, + { + "epoch": 142.2, + "learning_rate": 1.2128750090065304e-05, + "loss": 2.0605, + "step": 84180 + }, + { + "epoch": 142.23, + "learning_rate": 1.2099462271211558e-05, + "loss": 2.0677, + "step": 84200 + }, + { + "epoch": 142.26, + "learning_rate": 1.2071667126701514e-05, + "loss": 2.0704, + "step": 84220 + }, + { + "epoch": 142.3, + "learning_rate": 1.2042438874450202e-05, + "loss": 2.0755, + "step": 84240 + }, + { + "epoch": 142.33, + "learning_rate": 1.2013241203920295e-05, + "loss": 2.0658, + "step": 84260 + }, + { + "epoch": 142.36, + "learning_rate": 1.1984074138635825e-05, + "loss": 2.0637, + "step": 84280 + }, + { + "epoch": 142.4, + "learning_rate": 1.1954937702096175e-05, + "loss": 2.0796, + "step": 84300 + }, + { + "epoch": 142.43, + "learning_rate": 1.1925831917776043e-05, + "loss": 2.0878, + "step": 84320 + }, + { + "epoch": 142.47, + "learning_rate": 1.1896756809125442e-05, + "loss": 2.0624, + "step": 84340 + }, + { + "epoch": 142.5, + "learning_rate": 1.1867712399569642e-05, + "loss": 2.0717, + "step": 84360 + }, + { + "epoch": 142.53, + "learning_rate": 1.1838698712509206e-05, + "loss": 2.0777, + "step": 84380 + }, + { + "epoch": 142.57, + "learning_rate": 1.1809715771319946e-05, + "loss": 2.0743, + "step": 84400 + }, + { + "epoch": 142.6, + "learning_rate": 1.1780763599352885e-05, + "loss": 2.069, + "step": 84420 + }, + { + "epoch": 142.64, + "learning_rate": 1.1751842219934273e-05, + "loss": 2.068, + "step": 84440 + }, + { + "epoch": 142.67, + "learning_rate": 1.1722951656365538e-05, + "loss": 2.0725, + "step": 84460 + }, + { + "epoch": 142.7, + "learning_rate": 1.1694091931923302e-05, + "loss": 2.0772, + "step": 84480 + }, + { + "epoch": 142.74, + "learning_rate": 1.1665263069859295e-05, + "loss": 2.0907, + "step": 84500 + }, + { + "epoch": 142.77, + "learning_rate": 1.1636465093400423e-05, + "loss": 2.0793, + "step": 84520 + }, + { + "epoch": 142.8, + "learning_rate": 1.1607698025748697e-05, + "loss": 2.0671, + "step": 84540 + }, + { + "epoch": 142.84, + "learning_rate": 1.1578961890081225e-05, + "loss": 2.0792, + "step": 84560 + }, + { + "epoch": 142.87, + "learning_rate": 1.1550256709550206e-05, + "loss": 2.0789, + "step": 84580 + }, + { + "epoch": 142.91, + "learning_rate": 1.152158250728284e-05, + "loss": 2.089, + "step": 84600 + }, + { + "epoch": 142.94, + "learning_rate": 1.1492939306381473e-05, + "loss": 2.0869, + "step": 84620 + }, + { + "epoch": 142.97, + "learning_rate": 1.1464327129923369e-05, + "loss": 2.0719, + "step": 84640 + }, + { + "epoch": 143.01, + "learning_rate": 1.1435746000960861e-05, + "loss": 2.0714, + "step": 84660 + }, + { + "epoch": 143.04, + "learning_rate": 1.1407195942521248e-05, + "loss": 2.0597, + "step": 84680 + }, + { + "epoch": 143.07, + "learning_rate": 1.1378676977606789e-05, + "loss": 2.0419, + "step": 84700 + }, + { + "epoch": 143.11, + "learning_rate": 1.1350189129194722e-05, + "loss": 2.058, + "step": 84720 + }, + { + "epoch": 143.14, + "learning_rate": 1.1321732420237163e-05, + "loss": 2.0666, + "step": 84740 + }, + { + "epoch": 143.18, + "learning_rate": 1.1293306873661175e-05, + "loss": 2.0583, + "step": 84760 + }, + { + "epoch": 143.21, + "learning_rate": 1.1264912512368714e-05, + "loss": 2.0638, + "step": 84780 + }, + { + "epoch": 143.24, + "learning_rate": 1.1236549359236614e-05, + "loss": 2.0691, + "step": 84800 + }, + { + "epoch": 143.28, + "learning_rate": 1.120821743711652e-05, + "loss": 2.0557, + "step": 84820 + }, + { + "epoch": 143.31, + "learning_rate": 1.117991676883498e-05, + "loss": 2.0667, + "step": 84840 + }, + { + "epoch": 143.34, + "learning_rate": 1.1151647377193347e-05, + "loss": 2.0707, + "step": 84860 + }, + { + "epoch": 143.38, + "learning_rate": 1.1123409284967717e-05, + "loss": 2.0611, + "step": 84880 + }, + { + "epoch": 143.41, + "learning_rate": 1.109520251490903e-05, + "loss": 2.0766, + "step": 84900 + }, + { + "epoch": 143.45, + "learning_rate": 1.1067027089742976e-05, + "loss": 2.0798, + "step": 84920 + }, + { + "epoch": 143.48, + "learning_rate": 1.1038883032169995e-05, + "loss": 2.0705, + "step": 84940 + }, + { + "epoch": 143.51, + "learning_rate": 1.1010770364865203e-05, + "loss": 2.0785, + "step": 84960 + }, + { + "epoch": 143.55, + "learning_rate": 1.0982689110478511e-05, + "loss": 2.0649, + "step": 84980 + }, + { + "epoch": 143.58, + "learning_rate": 1.0954639291634482e-05, + "loss": 2.0784, + "step": 85000 + }, + { + "epoch": 143.58, + "eval_loss": 2.477313995361328, + "eval_runtime": 47.1876, + "eval_samples_per_second": 20.959, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.004345092321614098, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.030822836551891573, + "eval_tse_type": 0.0006871029754330882, + "step": 85000 + }, + { + "epoch": 143.61, + "learning_rate": 1.0926620930932319e-05, + "loss": 2.0825, + "step": 85020 + }, + { + "epoch": 143.65, + "learning_rate": 1.0898634050945915e-05, + "loss": 2.0769, + "step": 85040 + }, + { + "epoch": 143.68, + "learning_rate": 1.087067867422381e-05, + "loss": 2.0559, + "step": 85060 + }, + { + "epoch": 143.72, + "learning_rate": 1.0842754823289148e-05, + "loss": 2.092, + "step": 85080 + }, + { + "epoch": 143.75, + "learning_rate": 1.0814862520639641e-05, + "loss": 2.0753, + "step": 85100 + }, + { + "epoch": 143.78, + "learning_rate": 1.0787001788747626e-05, + "loss": 2.0753, + "step": 85120 + }, + { + "epoch": 143.82, + "learning_rate": 1.0759172650060018e-05, + "loss": 2.0671, + "step": 85140 + }, + { + "epoch": 143.85, + "learning_rate": 1.0731375126998222e-05, + "loss": 2.0752, + "step": 85160 + }, + { + "epoch": 143.89, + "learning_rate": 1.0703609241958212e-05, + "loss": 2.0686, + "step": 85180 + }, + { + "epoch": 143.92, + "learning_rate": 1.0675875017310455e-05, + "loss": 2.0807, + "step": 85200 + }, + { + "epoch": 143.95, + "learning_rate": 1.0648172475399931e-05, + "loss": 2.0675, + "step": 85220 + }, + { + "epoch": 143.99, + "learning_rate": 1.0620501638546049e-05, + "loss": 2.064, + "step": 85240 + }, + { + "epoch": 144.02, + "learning_rate": 1.0592862529042719e-05, + "loss": 2.0738, + "step": 85260 + }, + { + "epoch": 144.05, + "learning_rate": 1.0565255169158272e-05, + "loss": 2.0472, + "step": 85280 + }, + { + "epoch": 144.09, + "learning_rate": 1.0537679581135456e-05, + "loss": 2.058, + "step": 85300 + }, + { + "epoch": 144.12, + "learning_rate": 1.0510135787191444e-05, + "loss": 2.0489, + "step": 85320 + }, + { + "epoch": 144.16, + "learning_rate": 1.0482623809517727e-05, + "loss": 2.0532, + "step": 85340 + }, + { + "epoch": 144.19, + "learning_rate": 1.0455143670280265e-05, + "loss": 2.0643, + "step": 85360 + }, + { + "epoch": 144.22, + "learning_rate": 1.042769539161927e-05, + "loss": 2.0722, + "step": 85380 + }, + { + "epoch": 144.26, + "learning_rate": 1.0400278995649332e-05, + "loss": 2.0695, + "step": 85400 + }, + { + "epoch": 144.29, + "learning_rate": 1.0372894504459353e-05, + "loss": 2.0644, + "step": 85420 + }, + { + "epoch": 144.32, + "learning_rate": 1.0345541940112519e-05, + "loss": 2.0736, + "step": 85440 + }, + { + "epoch": 144.36, + "learning_rate": 1.031822132464631e-05, + "loss": 2.0559, + "step": 85460 + }, + { + "epoch": 144.39, + "learning_rate": 1.0290932680072412e-05, + "loss": 2.0542, + "step": 85480 + }, + { + "epoch": 144.43, + "learning_rate": 1.0263676028376856e-05, + "loss": 2.0653, + "step": 85500 + }, + { + "epoch": 144.46, + "learning_rate": 1.0236451391519786e-05, + "loss": 2.0632, + "step": 85520 + }, + { + "epoch": 144.49, + "learning_rate": 1.0209258791435621e-05, + "loss": 2.0596, + "step": 85540 + }, + { + "epoch": 144.53, + "learning_rate": 1.0182098250032918e-05, + "loss": 2.0622, + "step": 85560 + }, + { + "epoch": 144.56, + "learning_rate": 1.0154969789194468e-05, + "loss": 2.0696, + "step": 85580 + }, + { + "epoch": 144.59, + "learning_rate": 1.012787343077719e-05, + "loss": 2.0648, + "step": 85600 + }, + { + "epoch": 144.63, + "learning_rate": 1.0100809196612093e-05, + "loss": 2.0664, + "step": 85620 + }, + { + "epoch": 144.66, + "learning_rate": 1.0073777108504362e-05, + "loss": 2.0762, + "step": 85640 + }, + { + "epoch": 144.7, + "learning_rate": 1.0046777188233264e-05, + "loss": 2.0692, + "step": 85660 + }, + { + "epoch": 144.73, + "learning_rate": 1.0019809457552155e-05, + "loss": 2.0609, + "step": 85680 + }, + { + "epoch": 144.76, + "learning_rate": 9.992873938188407e-06, + "loss": 2.0804, + "step": 85700 + }, + { + "epoch": 144.8, + "learning_rate": 9.965970651843526e-06, + "loss": 2.0863, + "step": 85720 + }, + { + "epoch": 144.83, + "learning_rate": 9.939099620193004e-06, + "loss": 2.0718, + "step": 85740 + }, + { + "epoch": 144.86, + "learning_rate": 9.912260864886319e-06, + "loss": 2.0668, + "step": 85760 + }, + { + "epoch": 144.9, + "learning_rate": 9.885454407546985e-06, + "loss": 2.0816, + "step": 85780 + }, + { + "epoch": 144.93, + "learning_rate": 9.858680269772475e-06, + "loss": 2.0645, + "step": 85800 + }, + { + "epoch": 144.97, + "learning_rate": 9.83193847313425e-06, + "loss": 2.0805, + "step": 85820 + }, + { + "epoch": 145.0, + "learning_rate": 9.805229039177654e-06, + "loss": 2.0684, + "step": 85840 + }, + { + "epoch": 145.03, + "learning_rate": 9.778551989422014e-06, + "loss": 2.0476, + "step": 85860 + }, + { + "epoch": 145.07, + "learning_rate": 9.751907345360567e-06, + "loss": 2.053, + "step": 85880 + }, + { + "epoch": 145.1, + "learning_rate": 9.725295128460393e-06, + "loss": 2.0607, + "step": 85900 + }, + { + "epoch": 145.14, + "learning_rate": 9.698715360162492e-06, + "loss": 2.0643, + "step": 85920 + }, + { + "epoch": 145.17, + "learning_rate": 9.672168061881687e-06, + "loss": 2.0697, + "step": 85940 + }, + { + "epoch": 145.2, + "learning_rate": 9.645653255006687e-06, + "loss": 2.0515, + "step": 85960 + }, + { + "epoch": 145.24, + "learning_rate": 9.619170960899953e-06, + "loss": 2.0523, + "step": 85980 + }, + { + "epoch": 145.27, + "learning_rate": 9.592721200897804e-06, + "loss": 2.0519, + "step": 86000 + }, + { + "epoch": 145.27, + "eval_loss": 2.4758903980255127, + "eval_runtime": 49.7273, + "eval_samples_per_second": 19.888, + "eval_steps_per_second": 0.121, + "eval_tse_ndup": 0.0031375304058157056, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.031715138351557606, + "eval_tse_type": 0.0005146297351445141, + "step": 86000 + }, + { + "epoch": 145.3, + "learning_rate": 9.566303996310361e-06, + "loss": 2.0626, + "step": 86020 + }, + { + "epoch": 145.34, + "learning_rate": 9.539919368421456e-06, + "loss": 2.0577, + "step": 86040 + }, + { + "epoch": 145.37, + "learning_rate": 9.513567338488738e-06, + "loss": 2.0672, + "step": 86060 + }, + { + "epoch": 145.41, + "learning_rate": 9.48724792774351e-06, + "loss": 2.0477, + "step": 86080 + }, + { + "epoch": 145.44, + "learning_rate": 9.460961157390907e-06, + "loss": 2.0584, + "step": 86100 + }, + { + "epoch": 145.47, + "learning_rate": 9.434707048609664e-06, + "loss": 2.0671, + "step": 86120 + }, + { + "epoch": 145.51, + "learning_rate": 9.408485622552254e-06, + "loss": 2.0552, + "step": 86140 + }, + { + "epoch": 145.54, + "learning_rate": 9.382296900344805e-06, + "loss": 2.0686, + "step": 86160 + }, + { + "epoch": 145.57, + "learning_rate": 9.356140903087102e-06, + "loss": 2.0663, + "step": 86180 + }, + { + "epoch": 145.61, + "learning_rate": 9.330017651852569e-06, + "loss": 2.0723, + "step": 86200 + }, + { + "epoch": 145.64, + "learning_rate": 9.303927167688203e-06, + "loss": 2.0704, + "step": 86220 + }, + { + "epoch": 145.68, + "learning_rate": 9.27786947161468e-06, + "loss": 2.0787, + "step": 86240 + }, + { + "epoch": 145.71, + "learning_rate": 9.251844584626184e-06, + "loss": 2.0757, + "step": 86260 + }, + { + "epoch": 145.74, + "learning_rate": 9.227151350500151e-06, + "loss": 2.0742, + "step": 86280 + }, + { + "epoch": 145.78, + "learning_rate": 9.201190501511964e-06, + "loss": 2.053, + "step": 86300 + }, + { + "epoch": 145.81, + "learning_rate": 9.175262523387678e-06, + "loss": 2.0615, + "step": 86320 + }, + { + "epoch": 145.84, + "learning_rate": 9.149367437016992e-06, + "loss": 2.0679, + "step": 86340 + }, + { + "epoch": 145.88, + "learning_rate": 9.12350526326311e-06, + "loss": 2.0665, + "step": 86360 + }, + { + "epoch": 145.91, + "learning_rate": 9.09767602296272e-06, + "loss": 2.0727, + "step": 86380 + }, + { + "epoch": 145.95, + "learning_rate": 9.071879736925987e-06, + "loss": 2.0713, + "step": 86400 + }, + { + "epoch": 145.98, + "learning_rate": 9.046116425936491e-06, + "loss": 2.0673, + "step": 86420 + }, + { + "epoch": 146.01, + "learning_rate": 9.020386110751266e-06, + "loss": 2.0721, + "step": 86440 + }, + { + "epoch": 146.05, + "learning_rate": 8.994688812100776e-06, + "loss": 2.0604, + "step": 86460 + }, + { + "epoch": 146.08, + "learning_rate": 8.969024550688881e-06, + "loss": 2.0532, + "step": 86480 + }, + { + "epoch": 146.11, + "learning_rate": 8.94339334719278e-06, + "loss": 2.053, + "step": 86500 + }, + { + "epoch": 146.15, + "learning_rate": 8.917795222263076e-06, + "loss": 2.0612, + "step": 86520 + }, + { + "epoch": 146.18, + "learning_rate": 8.892230196523754e-06, + "loss": 2.0486, + "step": 86540 + }, + { + "epoch": 146.22, + "learning_rate": 8.866698290572051e-06, + "loss": 2.0646, + "step": 86560 + }, + { + "epoch": 146.25, + "learning_rate": 8.841199524978583e-06, + "loss": 2.0675, + "step": 86580 + }, + { + "epoch": 146.28, + "learning_rate": 8.81573392028724e-06, + "loss": 2.0645, + "step": 86600 + }, + { + "epoch": 146.32, + "learning_rate": 8.790301497015207e-06, + "loss": 2.0607, + "step": 86620 + }, + { + "epoch": 146.35, + "learning_rate": 8.764902275652914e-06, + "loss": 2.05, + "step": 86640 + }, + { + "epoch": 146.39, + "learning_rate": 8.739536276664063e-06, + "loss": 2.0596, + "step": 86660 + }, + { + "epoch": 146.42, + "learning_rate": 8.714203520485582e-06, + "loss": 2.0611, + "step": 86680 + }, + { + "epoch": 146.45, + "learning_rate": 8.688904027527605e-06, + "loss": 2.0514, + "step": 86700 + }, + { + "epoch": 146.49, + "learning_rate": 8.663637818173504e-06, + "loss": 2.046, + "step": 86720 + }, + { + "epoch": 146.52, + "learning_rate": 8.638404912779763e-06, + "loss": 2.0686, + "step": 86740 + }, + { + "epoch": 146.55, + "learning_rate": 8.613205331676133e-06, + "loss": 2.0547, + "step": 86760 + }, + { + "epoch": 146.59, + "learning_rate": 8.588039095165412e-06, + "loss": 2.0611, + "step": 86780 + }, + { + "epoch": 146.62, + "learning_rate": 8.562906223523603e-06, + "loss": 2.0738, + "step": 86800 + }, + { + "epoch": 146.66, + "learning_rate": 8.53780673699981e-06, + "loss": 2.0655, + "step": 86820 + }, + { + "epoch": 146.69, + "learning_rate": 8.512740655816232e-06, + "loss": 2.0479, + "step": 86840 + }, + { + "epoch": 146.72, + "learning_rate": 8.487708000168165e-06, + "loss": 2.0562, + "step": 86860 + }, + { + "epoch": 146.76, + "learning_rate": 8.46270879022394e-06, + "loss": 2.0621, + "step": 86880 + }, + { + "epoch": 146.79, + "learning_rate": 8.437743046125013e-06, + "loss": 2.0607, + "step": 86900 + }, + { + "epoch": 146.82, + "learning_rate": 8.412810787985797e-06, + "loss": 2.0686, + "step": 86920 + }, + { + "epoch": 146.86, + "learning_rate": 8.387912035893774e-06, + "loss": 2.0563, + "step": 86940 + }, + { + "epoch": 146.89, + "learning_rate": 8.36304680990942e-06, + "loss": 2.0744, + "step": 86960 + }, + { + "epoch": 146.93, + "learning_rate": 8.338215130066195e-06, + "loss": 2.0676, + "step": 86980 + }, + { + "epoch": 146.96, + "learning_rate": 8.313417016370556e-06, + "loss": 2.0607, + "step": 87000 + }, + { + "epoch": 146.96, + "eval_loss": 2.4719398021698, + "eval_runtime": 47.3991, + "eval_samples_per_second": 20.865, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.005302481808746275, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.030851745762280017, + "eval_tse_type": 0.00048098528255252214, + "step": 87000 + }, + { + "epoch": 146.99, + "learning_rate": 8.288652488801858e-06, + "loss": 2.0697, + "step": 87020 + }, + { + "epoch": 147.03, + "learning_rate": 8.263921567312454e-06, + "loss": 2.0586, + "step": 87040 + }, + { + "epoch": 147.06, + "learning_rate": 8.2392242718276e-06, + "loss": 2.0528, + "step": 87060 + }, + { + "epoch": 147.09, + "learning_rate": 8.21456062224546e-06, + "loss": 2.0655, + "step": 87080 + }, + { + "epoch": 147.13, + "learning_rate": 8.189930638437094e-06, + "loss": 2.0426, + "step": 87100 + }, + { + "epoch": 147.16, + "learning_rate": 8.165334340246427e-06, + "loss": 2.0635, + "step": 87120 + }, + { + "epoch": 147.2, + "learning_rate": 8.140771747490273e-06, + "loss": 2.0533, + "step": 87140 + }, + { + "epoch": 147.23, + "learning_rate": 8.116242879958236e-06, + "loss": 2.0411, + "step": 87160 + }, + { + "epoch": 147.26, + "learning_rate": 8.091747757412804e-06, + "loss": 2.0435, + "step": 87180 + }, + { + "epoch": 147.3, + "learning_rate": 8.067286399589246e-06, + "loss": 2.0477, + "step": 87200 + }, + { + "epoch": 147.33, + "learning_rate": 8.042858826195648e-06, + "loss": 2.074, + "step": 87220 + }, + { + "epoch": 147.36, + "learning_rate": 8.01846505691286e-06, + "loss": 2.0472, + "step": 87240 + }, + { + "epoch": 147.4, + "learning_rate": 7.99410511139448e-06, + "loss": 2.0456, + "step": 87260 + }, + { + "epoch": 147.43, + "learning_rate": 7.969779009266915e-06, + "loss": 2.0606, + "step": 87280 + }, + { + "epoch": 147.47, + "learning_rate": 7.945486770129234e-06, + "loss": 2.0632, + "step": 87300 + }, + { + "epoch": 147.5, + "learning_rate": 7.921228413553272e-06, + "loss": 2.0693, + "step": 87320 + }, + { + "epoch": 147.53, + "learning_rate": 7.897003959083538e-06, + "loss": 2.0688, + "step": 87340 + }, + { + "epoch": 147.57, + "learning_rate": 7.87281342623724e-06, + "loss": 2.0478, + "step": 87360 + }, + { + "epoch": 147.6, + "learning_rate": 7.848656834504276e-06, + "loss": 2.0476, + "step": 87380 + }, + { + "epoch": 147.64, + "learning_rate": 7.824534203347122e-06, + "loss": 2.0542, + "step": 87400 + }, + { + "epoch": 147.67, + "learning_rate": 7.800445552201013e-06, + "loss": 2.0665, + "step": 87420 + }, + { + "epoch": 147.7, + "learning_rate": 7.77639090047369e-06, + "loss": 2.0564, + "step": 87440 + }, + { + "epoch": 147.74, + "learning_rate": 7.752370267545584e-06, + "loss": 2.0552, + "step": 87460 + }, + { + "epoch": 147.77, + "learning_rate": 7.728383672769641e-06, + "loss": 2.0633, + "step": 87480 + }, + { + "epoch": 147.8, + "learning_rate": 7.704431135471473e-06, + "loss": 2.0697, + "step": 87500 + }, + { + "epoch": 147.84, + "learning_rate": 7.680512674949197e-06, + "loss": 2.0672, + "step": 87520 + }, + { + "epoch": 147.87, + "learning_rate": 7.656628310473468e-06, + "loss": 2.0613, + "step": 87540 + }, + { + "epoch": 147.91, + "learning_rate": 7.632778061287493e-06, + "loss": 2.0639, + "step": 87560 + }, + { + "epoch": 147.94, + "learning_rate": 7.608961946606996e-06, + "loss": 2.0635, + "step": 87580 + }, + { + "epoch": 147.97, + "learning_rate": 7.5851799856201945e-06, + "loss": 2.0685, + "step": 87600 + }, + { + "epoch": 148.01, + "learning_rate": 7.56143219748775e-06, + "loss": 2.054, + "step": 87620 + }, + { + "epoch": 148.04, + "learning_rate": 7.537718601342858e-06, + "loss": 2.0421, + "step": 87640 + }, + { + "epoch": 148.07, + "learning_rate": 7.514039216291147e-06, + "loss": 2.041, + "step": 87660 + }, + { + "epoch": 148.11, + "learning_rate": 7.490394061410638e-06, + "loss": 2.0385, + "step": 87680 + }, + { + "epoch": 148.14, + "learning_rate": 7.4667831557518165e-06, + "loss": 2.0476, + "step": 87700 + }, + { + "epoch": 148.18, + "learning_rate": 7.443206518337564e-06, + "loss": 2.0508, + "step": 87720 + }, + { + "epoch": 148.21, + "learning_rate": 7.419664168163165e-06, + "loss": 2.0585, + "step": 87740 + }, + { + "epoch": 148.24, + "learning_rate": 7.396156124196241e-06, + "loss": 2.0495, + "step": 87760 + }, + { + "epoch": 148.28, + "learning_rate": 7.372682405376807e-06, + "loss": 2.0559, + "step": 87780 + }, + { + "epoch": 148.31, + "learning_rate": 7.34924303061722e-06, + "loss": 2.0514, + "step": 87800 + }, + { + "epoch": 148.34, + "learning_rate": 7.325838018802156e-06, + "loss": 2.059, + "step": 87820 + }, + { + "epoch": 148.38, + "learning_rate": 7.302467388788614e-06, + "loss": 2.0384, + "step": 87840 + }, + { + "epoch": 148.41, + "learning_rate": 7.279131159405888e-06, + "loss": 2.0525, + "step": 87860 + }, + { + "epoch": 148.45, + "learning_rate": 7.255829349455567e-06, + "loss": 2.0589, + "step": 87880 + }, + { + "epoch": 148.48, + "learning_rate": 7.232561977711472e-06, + "loss": 2.053, + "step": 87900 + }, + { + "epoch": 148.51, + "learning_rate": 7.209329062919723e-06, + "loss": 2.0661, + "step": 87920 + }, + { + "epoch": 148.55, + "learning_rate": 7.186130623798648e-06, + "loss": 2.0559, + "step": 87940 + }, + { + "epoch": 148.58, + "learning_rate": 7.1629666790388236e-06, + "loss": 2.0615, + "step": 87960 + }, + { + "epoch": 148.61, + "learning_rate": 7.139837247303028e-06, + "loss": 2.0704, + "step": 87980 + }, + { + "epoch": 148.65, + "learning_rate": 7.11674234722619e-06, + "loss": 2.0672, + "step": 88000 + }, + { + "epoch": 148.65, + "eval_loss": 2.4698731899261475, + "eval_runtime": 47.5167, + "eval_samples_per_second": 20.814, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.003816046407276121, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03154199793926685, + "eval_tse_type": 0.000774391320397387, + "step": 88000 + }, + { + "epoch": 148.68, + "learning_rate": 7.093681997415508e-06, + "loss": 2.062, + "step": 88020 + }, + { + "epoch": 148.72, + "learning_rate": 7.070656216450239e-06, + "loss": 2.0625, + "step": 88040 + }, + { + "epoch": 148.75, + "learning_rate": 7.047665022881866e-06, + "loss": 2.0724, + "step": 88060 + }, + { + "epoch": 148.78, + "learning_rate": 7.0247084352339675e-06, + "loss": 2.0601, + "step": 88080 + }, + { + "epoch": 148.82, + "learning_rate": 7.001786472002259e-06, + "loss": 2.0529, + "step": 88100 + }, + { + "epoch": 148.85, + "learning_rate": 6.978899151654555e-06, + "loss": 2.0565, + "step": 88120 + }, + { + "epoch": 148.89, + "learning_rate": 6.95604649263073e-06, + "loss": 2.0474, + "step": 88140 + }, + { + "epoch": 148.92, + "learning_rate": 6.933228513342804e-06, + "loss": 2.0644, + "step": 88160 + }, + { + "epoch": 148.95, + "learning_rate": 6.910445232174772e-06, + "loss": 2.0612, + "step": 88180 + }, + { + "epoch": 148.99, + "learning_rate": 6.887696667482729e-06, + "loss": 2.0532, + "step": 88200 + }, + { + "epoch": 149.02, + "learning_rate": 6.8649828375947745e-06, + "loss": 2.0399, + "step": 88220 + }, + { + "epoch": 149.05, + "learning_rate": 6.8423037608110415e-06, + "loss": 2.0546, + "step": 88240 + }, + { + "epoch": 149.09, + "learning_rate": 6.8196594554036545e-06, + "loss": 2.0431, + "step": 88260 + }, + { + "epoch": 149.12, + "learning_rate": 6.797049939616701e-06, + "loss": 2.0565, + "step": 88280 + }, + { + "epoch": 149.16, + "learning_rate": 6.774475231666272e-06, + "loss": 2.0495, + "step": 88300 + }, + { + "epoch": 149.19, + "learning_rate": 6.751935349740407e-06, + "loss": 2.0564, + "step": 88320 + }, + { + "epoch": 149.22, + "learning_rate": 6.729430311999085e-06, + "loss": 2.0444, + "step": 88340 + }, + { + "epoch": 149.26, + "learning_rate": 6.706960136574175e-06, + "loss": 2.0734, + "step": 88360 + }, + { + "epoch": 149.29, + "learning_rate": 6.684524841569534e-06, + "loss": 2.0536, + "step": 88380 + }, + { + "epoch": 149.32, + "learning_rate": 6.662124445060863e-06, + "loss": 2.0487, + "step": 88400 + }, + { + "epoch": 149.36, + "learning_rate": 6.639758965095744e-06, + "loss": 2.048, + "step": 88420 + }, + { + "epoch": 149.39, + "learning_rate": 6.617428419693639e-06, + "loss": 2.0559, + "step": 88440 + }, + { + "epoch": 149.43, + "learning_rate": 6.595132826845879e-06, + "loss": 2.0456, + "step": 88460 + }, + { + "epoch": 149.46, + "learning_rate": 6.5728722045156285e-06, + "loss": 2.0585, + "step": 88480 + }, + { + "epoch": 149.49, + "learning_rate": 6.550646570637836e-06, + "loss": 2.0504, + "step": 88500 + }, + { + "epoch": 149.53, + "learning_rate": 6.528455943119305e-06, + "loss": 2.046, + "step": 88520 + }, + { + "epoch": 149.56, + "learning_rate": 6.506300339838656e-06, + "loss": 2.045, + "step": 88540 + }, + { + "epoch": 149.59, + "learning_rate": 6.484179778646216e-06, + "loss": 2.0529, + "step": 88560 + }, + { + "epoch": 149.63, + "learning_rate": 6.462094277364139e-06, + "loss": 2.0551, + "step": 88580 + }, + { + "epoch": 149.66, + "learning_rate": 6.440043853786315e-06, + "loss": 2.033, + "step": 88600 + }, + { + "epoch": 149.7, + "learning_rate": 6.418028525678382e-06, + "loss": 2.0499, + "step": 88620 + }, + { + "epoch": 149.73, + "learning_rate": 6.396048310777669e-06, + "loss": 2.0713, + "step": 88640 + }, + { + "epoch": 149.76, + "learning_rate": 6.374103226793243e-06, + "loss": 2.0577, + "step": 88660 + }, + { + "epoch": 149.8, + "learning_rate": 6.352193291405883e-06, + "loss": 2.0662, + "step": 88680 + }, + { + "epoch": 149.83, + "learning_rate": 6.330318522268008e-06, + "loss": 2.0467, + "step": 88700 + }, + { + "epoch": 149.86, + "learning_rate": 6.308478937003731e-06, + "loss": 2.0531, + "step": 88720 + }, + { + "epoch": 149.9, + "learning_rate": 6.28667455320881e-06, + "loss": 2.0586, + "step": 88740 + }, + { + "epoch": 149.93, + "learning_rate": 6.264905388450659e-06, + "loss": 2.0659, + "step": 88760 + }, + { + "epoch": 149.97, + "learning_rate": 6.2431714602682714e-06, + "loss": 2.0599, + "step": 88780 + }, + { + "epoch": 150.0, + "learning_rate": 6.221472786172294e-06, + "loss": 2.0527, + "step": 88800 + }, + { + "epoch": 150.03, + "learning_rate": 6.199809383644956e-06, + "loss": 2.04, + "step": 88820 + }, + { + "epoch": 150.07, + "learning_rate": 6.178181270140077e-06, + "loss": 2.0388, + "step": 88840 + }, + { + "epoch": 150.1, + "learning_rate": 6.156588463083035e-06, + "loss": 2.0466, + "step": 88860 + }, + { + "epoch": 150.14, + "learning_rate": 6.135030979870743e-06, + "loss": 2.0452, + "step": 88880 + }, + { + "epoch": 150.17, + "learning_rate": 6.113508837871718e-06, + "loss": 2.0419, + "step": 88900 + }, + { + "epoch": 150.2, + "learning_rate": 6.092022054425928e-06, + "loss": 2.0531, + "step": 88920 + }, + { + "epoch": 150.24, + "learning_rate": 6.070570646844886e-06, + "loss": 2.0525, + "step": 88940 + }, + { + "epoch": 150.27, + "learning_rate": 6.049154632411624e-06, + "loss": 2.0425, + "step": 88960 + }, + { + "epoch": 150.3, + "learning_rate": 6.027774028380623e-06, + "loss": 2.0572, + "step": 88980 + }, + { + "epoch": 150.34, + "learning_rate": 6.0064288519778635e-06, + "loss": 2.0491, + "step": 89000 + }, + { + "epoch": 150.34, + "eval_loss": 2.4691731929779053, + "eval_runtime": 47.5747, + "eval_samples_per_second": 20.788, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.004018707610340331, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.031120958485009845, + "eval_tse_type": 0.0008390911133580498, + "step": 89000 + }, + { + "epoch": 150.37, + "learning_rate": 5.985119120400745e-06, + "loss": 2.0614, + "step": 89020 + }, + { + "epoch": 150.41, + "learning_rate": 5.963844850818151e-06, + "loss": 2.0443, + "step": 89040 + }, + { + "epoch": 150.44, + "learning_rate": 5.94260606037037e-06, + "loss": 2.0417, + "step": 89060 + }, + { + "epoch": 150.47, + "learning_rate": 5.921402766169126e-06, + "loss": 2.0667, + "step": 89080 + }, + { + "epoch": 150.51, + "learning_rate": 5.900234985297498e-06, + "loss": 2.0578, + "step": 89100 + }, + { + "epoch": 150.54, + "learning_rate": 5.879102734810016e-06, + "loss": 2.0481, + "step": 89120 + }, + { + "epoch": 150.57, + "learning_rate": 5.858006031732549e-06, + "loss": 2.0737, + "step": 89140 + }, + { + "epoch": 150.61, + "learning_rate": 5.836944893062318e-06, + "loss": 2.0507, + "step": 89160 + }, + { + "epoch": 150.64, + "learning_rate": 5.8159193357679e-06, + "loss": 2.0488, + "step": 89180 + }, + { + "epoch": 150.68, + "learning_rate": 5.794929376789215e-06, + "loss": 2.039, + "step": 89200 + }, + { + "epoch": 150.71, + "learning_rate": 5.773975033037499e-06, + "loss": 2.0547, + "step": 89220 + }, + { + "epoch": 150.74, + "learning_rate": 5.753056321395267e-06, + "loss": 2.0551, + "step": 89240 + }, + { + "epoch": 150.78, + "learning_rate": 5.732173258716366e-06, + "loss": 2.0427, + "step": 89260 + }, + { + "epoch": 150.81, + "learning_rate": 5.711325861825906e-06, + "loss": 2.0503, + "step": 89280 + }, + { + "epoch": 150.84, + "learning_rate": 5.690514147520243e-06, + "loss": 2.0522, + "step": 89300 + }, + { + "epoch": 150.88, + "learning_rate": 5.66973813256701e-06, + "loss": 2.0386, + "step": 89320 + }, + { + "epoch": 150.91, + "learning_rate": 5.6489978337050555e-06, + "loss": 2.0557, + "step": 89340 + }, + { + "epoch": 150.95, + "learning_rate": 5.6282932676444856e-06, + "loss": 2.0451, + "step": 89360 + }, + { + "epoch": 150.98, + "learning_rate": 5.607624451066568e-06, + "loss": 2.0659, + "step": 89380 + }, + { + "epoch": 151.01, + "learning_rate": 5.586991400623798e-06, + "loss": 2.064, + "step": 89400 + }, + { + "epoch": 151.05, + "learning_rate": 5.566394132939884e-06, + "loss": 2.0434, + "step": 89420 + }, + { + "epoch": 151.08, + "learning_rate": 5.54583266460964e-06, + "loss": 2.0484, + "step": 89440 + }, + { + "epoch": 151.11, + "learning_rate": 5.525307012199077e-06, + "loss": 2.0436, + "step": 89460 + }, + { + "epoch": 151.15, + "learning_rate": 5.504817192245343e-06, + "loss": 2.0486, + "step": 89480 + }, + { + "epoch": 151.18, + "learning_rate": 5.484363221256733e-06, + "loss": 2.0363, + "step": 89500 + }, + { + "epoch": 151.22, + "learning_rate": 5.463945115712609e-06, + "loss": 2.0417, + "step": 89520 + }, + { + "epoch": 151.25, + "learning_rate": 5.443562892063497e-06, + "loss": 2.0627, + "step": 89540 + }, + { + "epoch": 151.28, + "learning_rate": 5.423216566730971e-06, + "loss": 2.0468, + "step": 89560 + }, + { + "epoch": 151.32, + "learning_rate": 5.4029061561077064e-06, + "loss": 2.0507, + "step": 89580 + }, + { + "epoch": 151.35, + "learning_rate": 5.382631676557437e-06, + "loss": 2.0686, + "step": 89600 + }, + { + "epoch": 151.39, + "learning_rate": 5.3623931444149235e-06, + "loss": 2.0343, + "step": 89620 + }, + { + "epoch": 151.42, + "learning_rate": 5.342190575986022e-06, + "loss": 2.0584, + "step": 89640 + }, + { + "epoch": 151.45, + "learning_rate": 5.322023987547547e-06, + "loss": 2.0555, + "step": 89660 + }, + { + "epoch": 151.49, + "learning_rate": 5.301893395347363e-06, + "loss": 2.0492, + "step": 89680 + }, + { + "epoch": 151.52, + "learning_rate": 5.281798815604327e-06, + "loss": 2.0453, + "step": 89700 + }, + { + "epoch": 151.55, + "learning_rate": 5.261740264508275e-06, + "loss": 2.042, + "step": 89720 + }, + { + "epoch": 151.59, + "learning_rate": 5.2417177582200325e-06, + "loss": 2.0472, + "step": 89740 + }, + { + "epoch": 151.62, + "learning_rate": 5.2217313128713415e-06, + "loss": 2.046, + "step": 89760 + }, + { + "epoch": 151.66, + "learning_rate": 5.20178094456496e-06, + "loss": 2.0422, + "step": 89780 + }, + { + "epoch": 151.69, + "learning_rate": 5.1818666693745076e-06, + "loss": 2.0479, + "step": 89800 + }, + { + "epoch": 151.72, + "learning_rate": 5.161988503344561e-06, + "loss": 2.042, + "step": 89820 + }, + { + "epoch": 151.76, + "learning_rate": 5.1421464624906155e-06, + "loss": 2.0542, + "step": 89840 + }, + { + "epoch": 151.79, + "learning_rate": 5.122340562799027e-06, + "loss": 2.0586, + "step": 89860 + }, + { + "epoch": 151.82, + "learning_rate": 5.1025708202270765e-06, + "loss": 2.0389, + "step": 89880 + }, + { + "epoch": 151.86, + "learning_rate": 5.0828372507028545e-06, + "loss": 2.0604, + "step": 89900 + }, + { + "epoch": 151.89, + "learning_rate": 5.063139870125367e-06, + "loss": 2.05, + "step": 89920 + }, + { + "epoch": 151.93, + "learning_rate": 5.043478694364423e-06, + "loss": 2.0452, + "step": 89940 + }, + { + "epoch": 151.96, + "learning_rate": 5.023853739260681e-06, + "loss": 2.0501, + "step": 89960 + }, + { + "epoch": 151.99, + "learning_rate": 5.0042650206256146e-06, + "loss": 2.0592, + "step": 89980 + }, + { + "epoch": 152.03, + "learning_rate": 4.9847125542415055e-06, + "loss": 2.0495, + "step": 90000 + }, + { + "epoch": 152.03, + "eval_loss": 2.4667632579803467, + "eval_runtime": 47.3318, + "eval_samples_per_second": 20.895, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.0038151567924477906, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.030863016647171955, + "eval_tse_type": 0.0009181781128057627, + "step": 90000 + }, + { + "epoch": 152.06, + "learning_rate": 4.965196355861423e-06, + "loss": 2.0334, + "step": 90020 + }, + { + "epoch": 152.09, + "learning_rate": 4.9457164412092025e-06, + "loss": 2.0306, + "step": 90040 + }, + { + "epoch": 152.13, + "learning_rate": 4.926272825979466e-06, + "loss": 2.036, + "step": 90060 + }, + { + "epoch": 152.16, + "learning_rate": 4.906865525837589e-06, + "loss": 2.0366, + "step": 90080 + }, + { + "epoch": 152.2, + "learning_rate": 4.887494556419675e-06, + "loss": 2.0381, + "step": 90100 + }, + { + "epoch": 152.23, + "learning_rate": 4.868159933332572e-06, + "loss": 2.0472, + "step": 90120 + }, + { + "epoch": 152.26, + "learning_rate": 4.8488616721538205e-06, + "loss": 2.0408, + "step": 90140 + }, + { + "epoch": 152.3, + "learning_rate": 4.82959978843171e-06, + "loss": 2.0502, + "step": 90160 + }, + { + "epoch": 152.33, + "learning_rate": 4.810374297685161e-06, + "loss": 2.0595, + "step": 90180 + }, + { + "epoch": 152.36, + "learning_rate": 4.791185215403821e-06, + "loss": 2.0581, + "step": 90200 + }, + { + "epoch": 152.4, + "learning_rate": 4.772032557047984e-06, + "loss": 2.0243, + "step": 90220 + }, + { + "epoch": 152.43, + "learning_rate": 4.7529163380486074e-06, + "loss": 2.0409, + "step": 90240 + }, + { + "epoch": 152.47, + "learning_rate": 4.734789695981407e-06, + "loss": 2.0498, + "step": 90260 + }, + { + "epoch": 152.5, + "learning_rate": 4.715744577999176e-06, + "loss": 2.0373, + "step": 90280 + }, + { + "epoch": 152.53, + "learning_rate": 4.696735944723624e-06, + "loss": 2.0461, + "step": 90300 + }, + { + "epoch": 152.57, + "learning_rate": 4.677763811469638e-06, + "loss": 2.0276, + "step": 90320 + }, + { + "epoch": 152.6, + "learning_rate": 4.658828193522735e-06, + "loss": 2.0582, + "step": 90340 + }, + { + "epoch": 152.64, + "learning_rate": 4.63992910613899e-06, + "loss": 2.045, + "step": 90360 + }, + { + "epoch": 152.67, + "learning_rate": 4.621066564545068e-06, + "loss": 2.056, + "step": 90380 + }, + { + "epoch": 152.7, + "learning_rate": 4.602240583938183e-06, + "loss": 2.0578, + "step": 90400 + }, + { + "epoch": 152.74, + "learning_rate": 4.583451179486053e-06, + "loss": 2.0609, + "step": 90420 + }, + { + "epoch": 152.77, + "learning_rate": 4.564698366327014e-06, + "loss": 2.0539, + "step": 90440 + }, + { + "epoch": 152.8, + "learning_rate": 4.545982159569822e-06, + "loss": 2.0348, + "step": 90460 + }, + { + "epoch": 152.84, + "learning_rate": 4.527302574293812e-06, + "loss": 2.0458, + "step": 90480 + }, + { + "epoch": 152.87, + "learning_rate": 4.5086596255487834e-06, + "loss": 2.0601, + "step": 90500 + }, + { + "epoch": 152.91, + "learning_rate": 4.490053328355015e-06, + "loss": 2.0599, + "step": 90520 + }, + { + "epoch": 152.94, + "learning_rate": 4.471483697703288e-06, + "loss": 2.0442, + "step": 90540 + }, + { + "epoch": 152.97, + "learning_rate": 4.4529507485547905e-06, + "loss": 2.0572, + "step": 90560 + }, + { + "epoch": 153.01, + "learning_rate": 4.434454495841195e-06, + "loss": 2.0633, + "step": 90580 + }, + { + "epoch": 153.04, + "learning_rate": 4.4159949544646025e-06, + "loss": 2.0344, + "step": 90600 + }, + { + "epoch": 153.07, + "learning_rate": 4.397572139297529e-06, + "loss": 2.034, + "step": 90620 + }, + { + "epoch": 153.11, + "learning_rate": 4.3791860651829e-06, + "loss": 2.044, + "step": 90640 + }, + { + "epoch": 153.14, + "learning_rate": 4.360836746934055e-06, + "loss": 2.0489, + "step": 90660 + }, + { + "epoch": 153.18, + "learning_rate": 4.342524199334702e-06, + "loss": 2.0524, + "step": 90680 + }, + { + "epoch": 153.21, + "learning_rate": 4.324248437138922e-06, + "loss": 2.0412, + "step": 90700 + }, + { + "epoch": 153.24, + "learning_rate": 4.306009475071177e-06, + "loss": 2.0381, + "step": 90720 + }, + { + "epoch": 153.28, + "learning_rate": 4.28780732782626e-06, + "loss": 2.0374, + "step": 90740 + }, + { + "epoch": 153.31, + "learning_rate": 4.269642010069319e-06, + "loss": 2.0404, + "step": 90760 + }, + { + "epoch": 153.34, + "learning_rate": 4.2515135364358305e-06, + "loss": 2.0323, + "step": 90780 + }, + { + "epoch": 153.38, + "learning_rate": 4.233421921531555e-06, + "loss": 2.0501, + "step": 90800 + }, + { + "epoch": 153.41, + "learning_rate": 4.215367179932605e-06, + "loss": 2.0328, + "step": 90820 + }, + { + "epoch": 153.45, + "learning_rate": 4.197349326185346e-06, + "loss": 2.0545, + "step": 90840 + }, + { + "epoch": 153.48, + "learning_rate": 4.179368374806436e-06, + "loss": 2.0653, + "step": 90860 + }, + { + "epoch": 153.51, + "learning_rate": 4.161424340282804e-06, + "loss": 2.053, + "step": 90880 + }, + { + "epoch": 153.55, + "learning_rate": 4.143517237071642e-06, + "loss": 2.0513, + "step": 90900 + }, + { + "epoch": 153.58, + "learning_rate": 4.125647079600381e-06, + "loss": 2.0428, + "step": 90920 + }, + { + "epoch": 153.61, + "learning_rate": 4.107813882266659e-06, + "loss": 2.0369, + "step": 90940 + }, + { + "epoch": 153.65, + "learning_rate": 4.0900176594384e-06, + "loss": 2.0453, + "step": 90960 + }, + { + "epoch": 153.68, + "learning_rate": 4.072258425453668e-06, + "loss": 2.0469, + "step": 90980 + }, + { + "epoch": 153.72, + "learning_rate": 4.054536194620778e-06, + "loss": 2.0482, + "step": 91000 + }, + { + "epoch": 153.72, + "eval_loss": 2.464383602142334, + "eval_runtime": 47.2598, + "eval_samples_per_second": 20.927, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.003930989641376842, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03078084773158803, + "eval_tse_type": 0.0006516914951408478, + "step": 91000 + }, + { + "epoch": 153.75, + "learning_rate": 4.036850981218182e-06, + "loss": 2.0542, + "step": 91020 + }, + { + "epoch": 153.78, + "learning_rate": 4.019202799494565e-06, + "loss": 2.045, + "step": 91040 + }, + { + "epoch": 153.82, + "learning_rate": 4.001591663668752e-06, + "loss": 2.055, + "step": 91060 + }, + { + "epoch": 153.85, + "learning_rate": 3.984017587929695e-06, + "loss": 2.0465, + "step": 91080 + }, + { + "epoch": 153.89, + "learning_rate": 3.9664805864365165e-06, + "loss": 2.0329, + "step": 91100 + }, + { + "epoch": 153.92, + "learning_rate": 3.948980673318464e-06, + "loss": 2.0599, + "step": 91120 + }, + { + "epoch": 153.95, + "learning_rate": 3.93151786267491e-06, + "loss": 2.0504, + "step": 91140 + }, + { + "epoch": 153.99, + "learning_rate": 3.914092168575306e-06, + "loss": 2.047, + "step": 91160 + }, + { + "epoch": 154.02, + "learning_rate": 3.896703605059232e-06, + "loss": 2.0447, + "step": 91180 + }, + { + "epoch": 154.05, + "learning_rate": 3.879352186136353e-06, + "loss": 2.0373, + "step": 91200 + }, + { + "epoch": 154.09, + "learning_rate": 3.862037925786366e-06, + "loss": 2.0415, + "step": 91220 + }, + { + "epoch": 154.12, + "learning_rate": 3.844760837959072e-06, + "loss": 2.0438, + "step": 91240 + }, + { + "epoch": 154.16, + "learning_rate": 3.827520936574303e-06, + "loss": 2.0415, + "step": 91260 + }, + { + "epoch": 154.19, + "learning_rate": 3.8103182355219535e-06, + "loss": 2.0403, + "step": 91280 + }, + { + "epoch": 154.22, + "learning_rate": 3.7931527486619046e-06, + "loss": 2.0389, + "step": 91300 + }, + { + "epoch": 154.26, + "learning_rate": 3.7760244898240925e-06, + "loss": 2.0378, + "step": 91320 + }, + { + "epoch": 154.29, + "learning_rate": 3.758933472808446e-06, + "loss": 2.0454, + "step": 91340 + }, + { + "epoch": 154.32, + "learning_rate": 3.7418797113848824e-06, + "loss": 2.0406, + "step": 91360 + }, + { + "epoch": 154.36, + "learning_rate": 3.724863219293312e-06, + "loss": 2.0296, + "step": 91380 + }, + { + "epoch": 154.39, + "learning_rate": 3.707884010243623e-06, + "loss": 2.0471, + "step": 91400 + }, + { + "epoch": 154.43, + "learning_rate": 3.6909420979156505e-06, + "loss": 2.0437, + "step": 91420 + }, + { + "epoch": 154.46, + "learning_rate": 3.6740374959591874e-06, + "loss": 2.033, + "step": 91440 + }, + { + "epoch": 154.49, + "learning_rate": 3.6571702179939603e-06, + "loss": 2.0283, + "step": 91460 + }, + { + "epoch": 154.53, + "learning_rate": 3.640340277609644e-06, + "loss": 2.0348, + "step": 91480 + }, + { + "epoch": 154.56, + "learning_rate": 3.6235476883658025e-06, + "loss": 2.0474, + "step": 91500 + }, + { + "epoch": 154.59, + "learning_rate": 3.6067924637919413e-06, + "loss": 2.0478, + "step": 91520 + }, + { + "epoch": 154.63, + "learning_rate": 3.5900746173874124e-06, + "loss": 2.0538, + "step": 91540 + }, + { + "epoch": 154.66, + "learning_rate": 3.5733941626215182e-06, + "loss": 2.0531, + "step": 91560 + }, + { + "epoch": 154.7, + "learning_rate": 3.5567511129333707e-06, + "loss": 2.0393, + "step": 91580 + }, + { + "epoch": 154.73, + "learning_rate": 3.5401454817319935e-06, + "loss": 2.0603, + "step": 91600 + }, + { + "epoch": 154.76, + "learning_rate": 3.523577282396229e-06, + "loss": 2.0581, + "step": 91620 + }, + { + "epoch": 154.8, + "learning_rate": 3.507046528274782e-06, + "loss": 2.0451, + "step": 91640 + }, + { + "epoch": 154.83, + "learning_rate": 3.4905532326861944e-06, + "loss": 2.0545, + "step": 91660 + }, + { + "epoch": 154.86, + "learning_rate": 3.4740974089187917e-06, + "loss": 2.0328, + "step": 91680 + }, + { + "epoch": 154.9, + "learning_rate": 3.4576790702307627e-06, + "loss": 2.0388, + "step": 91700 + }, + { + "epoch": 154.93, + "learning_rate": 3.441298229850043e-06, + "loss": 2.0402, + "step": 91720 + }, + { + "epoch": 154.97, + "learning_rate": 3.4249549009743866e-06, + "loss": 2.032, + "step": 91740 + }, + { + "epoch": 155.0, + "learning_rate": 3.4086490967713214e-06, + "loss": 2.0543, + "step": 91760 + }, + { + "epoch": 155.03, + "learning_rate": 3.392380830378139e-06, + "loss": 2.0326, + "step": 91780 + }, + { + "epoch": 155.07, + "learning_rate": 3.3761501149018936e-06, + "loss": 2.0247, + "step": 91800 + }, + { + "epoch": 155.1, + "learning_rate": 3.3599569634193697e-06, + "loss": 2.0473, + "step": 91820 + }, + { + "epoch": 155.14, + "learning_rate": 3.343801388977097e-06, + "loss": 2.0431, + "step": 91840 + }, + { + "epoch": 155.17, + "learning_rate": 3.327683404591331e-06, + "loss": 2.0301, + "step": 91860 + }, + { + "epoch": 155.2, + "learning_rate": 3.31160302324805e-06, + "loss": 2.0392, + "step": 91880 + }, + { + "epoch": 155.24, + "learning_rate": 3.295560257902908e-06, + "loss": 2.0383, + "step": 91900 + }, + { + "epoch": 155.27, + "learning_rate": 3.279555121481287e-06, + "loss": 2.0394, + "step": 91920 + }, + { + "epoch": 155.3, + "learning_rate": 3.2635876268782395e-06, + "loss": 2.0452, + "step": 91940 + }, + { + "epoch": 155.34, + "learning_rate": 3.2476577869584748e-06, + "loss": 2.0403, + "step": 91960 + }, + { + "epoch": 155.37, + "learning_rate": 3.2317656145563813e-06, + "loss": 2.038, + "step": 91980 + }, + { + "epoch": 155.41, + "learning_rate": 3.215911122475995e-06, + "loss": 2.0428, + "step": 92000 + }, + { + "epoch": 155.41, + "eval_loss": 2.4640307426452637, + "eval_runtime": 47.2345, + "eval_samples_per_second": 20.938, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.0037396565895704854, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03079625005469537, + "eval_tse_type": 0.0007390321312212111, + "step": 92000 + }, + { + "epoch": 155.44, + "learning_rate": 3.2000943234910097e-06, + "loss": 2.0297, + "step": 92020 + }, + { + "epoch": 155.47, + "learning_rate": 3.184315230344703e-06, + "loss": 2.0349, + "step": 92040 + }, + { + "epoch": 155.51, + "learning_rate": 3.1685738557500233e-06, + "loss": 2.0317, + "step": 92060 + }, + { + "epoch": 155.54, + "learning_rate": 3.1528702123895194e-06, + "loss": 2.0476, + "step": 92080 + }, + { + "epoch": 155.57, + "learning_rate": 3.1372043129153205e-06, + "loss": 2.0595, + "step": 92100 + }, + { + "epoch": 155.61, + "learning_rate": 3.121576169949164e-06, + "loss": 2.0417, + "step": 92120 + }, + { + "epoch": 155.64, + "learning_rate": 3.1059857960823614e-06, + "loss": 2.0464, + "step": 92140 + }, + { + "epoch": 155.68, + "learning_rate": 3.0904332038757977e-06, + "loss": 2.0519, + "step": 92160 + }, + { + "epoch": 155.71, + "learning_rate": 3.0749184058599114e-06, + "loss": 2.0388, + "step": 92180 + }, + { + "epoch": 155.74, + "learning_rate": 3.059441414534686e-06, + "loss": 2.0524, + "step": 92200 + }, + { + "epoch": 155.78, + "learning_rate": 3.044002242369681e-06, + "loss": 2.0664, + "step": 92220 + }, + { + "epoch": 155.81, + "learning_rate": 3.0286009018039354e-06, + "loss": 2.0385, + "step": 92240 + }, + { + "epoch": 155.84, + "learning_rate": 3.0132374052460388e-06, + "loss": 2.0547, + "step": 92260 + }, + { + "epoch": 155.88, + "learning_rate": 2.9979117650740797e-06, + "loss": 2.0344, + "step": 92280 + }, + { + "epoch": 155.91, + "learning_rate": 2.982623993635658e-06, + "loss": 2.0401, + "step": 92300 + }, + { + "epoch": 155.95, + "learning_rate": 2.9673741032478443e-06, + "loss": 2.0388, + "step": 92320 + }, + { + "epoch": 155.98, + "learning_rate": 2.952162106197204e-06, + "loss": 2.0434, + "step": 92340 + }, + { + "epoch": 156.01, + "learning_rate": 2.9369880147397667e-06, + "loss": 2.0322, + "step": 92360 + }, + { + "epoch": 156.05, + "learning_rate": 2.9218518411010252e-06, + "loss": 2.0325, + "step": 92380 + }, + { + "epoch": 156.08, + "learning_rate": 2.9067535974759364e-06, + "loss": 2.0453, + "step": 92400 + }, + { + "epoch": 156.11, + "learning_rate": 2.8916932960288466e-06, + "loss": 2.0271, + "step": 92420 + }, + { + "epoch": 156.15, + "learning_rate": 2.876670948893606e-06, + "loss": 2.0282, + "step": 92440 + }, + { + "epoch": 156.18, + "learning_rate": 2.861686568173427e-06, + "loss": 2.0411, + "step": 92460 + }, + { + "epoch": 156.22, + "learning_rate": 2.846740165940959e-06, + "loss": 2.0425, + "step": 92480 + }, + { + "epoch": 156.25, + "learning_rate": 2.8318317542382457e-06, + "loss": 2.0407, + "step": 92500 + }, + { + "epoch": 156.28, + "learning_rate": 2.8169613450767295e-06, + "loss": 2.0367, + "step": 92520 + }, + { + "epoch": 156.32, + "learning_rate": 2.802128950437244e-06, + "loss": 2.0385, + "step": 92540 + }, + { + "epoch": 156.35, + "learning_rate": 2.7873345822699514e-06, + "loss": 2.0454, + "step": 92560 + }, + { + "epoch": 156.39, + "learning_rate": 2.7725782524944286e-06, + "loss": 2.0423, + "step": 92580 + }, + { + "epoch": 156.42, + "learning_rate": 2.7578599729995768e-06, + "loss": 2.0435, + "step": 92600 + }, + { + "epoch": 156.45, + "learning_rate": 2.7431797556436546e-06, + "loss": 2.0424, + "step": 92620 + }, + { + "epoch": 156.49, + "learning_rate": 2.7285376122542283e-06, + "loss": 2.029, + "step": 92640 + }, + { + "epoch": 156.52, + "learning_rate": 2.7139335546282286e-06, + "loss": 2.0291, + "step": 92660 + }, + { + "epoch": 156.55, + "learning_rate": 2.6993675945318875e-06, + "loss": 2.0365, + "step": 92680 + }, + { + "epoch": 156.59, + "learning_rate": 2.684839743700712e-06, + "loss": 2.0487, + "step": 92700 + }, + { + "epoch": 156.62, + "learning_rate": 2.67035001383954e-06, + "loss": 2.038, + "step": 92720 + }, + { + "epoch": 156.66, + "learning_rate": 2.6558984166224875e-06, + "loss": 2.0447, + "step": 92740 + }, + { + "epoch": 156.69, + "learning_rate": 2.641484963692953e-06, + "loss": 2.0436, + "step": 92760 + }, + { + "epoch": 156.72, + "learning_rate": 2.6271096666635685e-06, + "loss": 2.0558, + "step": 92780 + }, + { + "epoch": 156.76, + "learning_rate": 2.612772537116276e-06, + "loss": 2.0376, + "step": 92800 + }, + { + "epoch": 156.79, + "learning_rate": 2.598473586602246e-06, + "loss": 2.051, + "step": 92820 + }, + { + "epoch": 156.82, + "learning_rate": 2.5842128266418573e-06, + "loss": 2.0496, + "step": 92840 + }, + { + "epoch": 156.86, + "learning_rate": 2.56999026872477e-06, + "loss": 2.035, + "step": 92860 + }, + { + "epoch": 156.89, + "learning_rate": 2.5558059243098286e-06, + "loss": 2.0461, + "step": 92880 + }, + { + "epoch": 156.93, + "learning_rate": 2.5416598048251194e-06, + "loss": 2.0444, + "step": 92900 + }, + { + "epoch": 156.96, + "learning_rate": 2.527551921667898e-06, + "loss": 2.0564, + "step": 92920 + }, + { + "epoch": 156.99, + "learning_rate": 2.5134822862046326e-06, + "loss": 2.0435, + "step": 92940 + }, + { + "epoch": 157.03, + "learning_rate": 2.4994509097709894e-06, + "loss": 2.0238, + "step": 92960 + }, + { + "epoch": 157.06, + "learning_rate": 2.485457803671781e-06, + "loss": 2.0347, + "step": 92980 + }, + { + "epoch": 157.09, + "learning_rate": 2.4715029791810006e-06, + "loss": 2.0331, + "step": 93000 + }, + { + "epoch": 157.09, + "eval_loss": 2.4623606204986572, + "eval_runtime": 47.7365, + "eval_samples_per_second": 20.718, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.0040481646977106375, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.031722884505978204, + "eval_tse_type": 0.0006198600738540071, + "step": 93000 + }, + { + "epoch": 157.13, + "learning_rate": 2.4575864475418098e-06, + "loss": 2.0365, + "step": 93020 + }, + { + "epoch": 157.16, + "learning_rate": 2.443708219966501e-06, + "loss": 2.0401, + "step": 93040 + }, + { + "epoch": 157.2, + "learning_rate": 2.4298683076365014e-06, + "loss": 2.0307, + "step": 93060 + }, + { + "epoch": 157.23, + "learning_rate": 2.4160667217023867e-06, + "loss": 2.0425, + "step": 93080 + }, + { + "epoch": 157.26, + "learning_rate": 2.4023034732838514e-06, + "loss": 2.0362, + "step": 93100 + }, + { + "epoch": 157.3, + "learning_rate": 2.3885785734696853e-06, + "loss": 2.0341, + "step": 93120 + }, + { + "epoch": 157.33, + "learning_rate": 2.3748920333178048e-06, + "loss": 2.0471, + "step": 93140 + }, + { + "epoch": 157.36, + "learning_rate": 2.361243863855184e-06, + "loss": 2.0375, + "step": 93160 + }, + { + "epoch": 157.4, + "learning_rate": 2.347634076077937e-06, + "loss": 2.0435, + "step": 93180 + }, + { + "epoch": 157.43, + "learning_rate": 2.3340626809512044e-06, + "loss": 2.0329, + "step": 93200 + }, + { + "epoch": 157.47, + "learning_rate": 2.3205296894092233e-06, + "loss": 2.0434, + "step": 93220 + }, + { + "epoch": 157.5, + "learning_rate": 2.3070351123552714e-06, + "loss": 2.0314, + "step": 93240 + }, + { + "epoch": 157.53, + "learning_rate": 2.2935789606616965e-06, + "loss": 2.0237, + "step": 93260 + }, + { + "epoch": 157.57, + "learning_rate": 2.2801612451698705e-06, + "loss": 2.0441, + "step": 93280 + }, + { + "epoch": 157.6, + "learning_rate": 2.266781976690202e-06, + "loss": 2.0352, + "step": 93300 + }, + { + "epoch": 157.64, + "learning_rate": 2.253441166002135e-06, + "loss": 2.0399, + "step": 93320 + }, + { + "epoch": 157.67, + "learning_rate": 2.2401388238541098e-06, + "loss": 2.0415, + "step": 93340 + }, + { + "epoch": 157.7, + "learning_rate": 2.2268749609635877e-06, + "loss": 2.044, + "step": 93360 + }, + { + "epoch": 157.74, + "learning_rate": 2.213649588017025e-06, + "loss": 2.0574, + "step": 93380 + }, + { + "epoch": 157.77, + "learning_rate": 2.20046271566986e-06, + "loss": 2.0417, + "step": 93400 + }, + { + "epoch": 157.8, + "learning_rate": 2.1873143545465323e-06, + "loss": 2.0478, + "step": 93420 + }, + { + "epoch": 157.84, + "learning_rate": 2.1742045152404246e-06, + "loss": 2.0399, + "step": 93440 + }, + { + "epoch": 157.87, + "learning_rate": 2.1611332083138923e-06, + "loss": 2.0469, + "step": 93460 + }, + { + "epoch": 157.91, + "learning_rate": 2.1481004442982676e-06, + "loss": 2.0407, + "step": 93480 + }, + { + "epoch": 157.94, + "learning_rate": 2.1351062336938065e-06, + "loss": 2.0424, + "step": 93500 + }, + { + "epoch": 157.97, + "learning_rate": 2.1221505869697065e-06, + "loss": 2.0346, + "step": 93520 + }, + { + "epoch": 158.01, + "learning_rate": 2.1092335145640963e-06, + "loss": 2.0366, + "step": 93540 + }, + { + "epoch": 158.04, + "learning_rate": 2.096355026884045e-06, + "loss": 2.039, + "step": 93560 + }, + { + "epoch": 158.07, + "learning_rate": 2.083515134305497e-06, + "loss": 2.0204, + "step": 93580 + }, + { + "epoch": 158.11, + "learning_rate": 2.070713847173328e-06, + "loss": 2.0283, + "step": 93600 + }, + { + "epoch": 158.14, + "learning_rate": 2.0579511758013093e-06, + "loss": 2.0306, + "step": 93620 + }, + { + "epoch": 158.18, + "learning_rate": 2.045227130472088e-06, + "loss": 2.0376, + "step": 93640 + }, + { + "epoch": 158.21, + "learning_rate": 2.032541721437209e-06, + "loss": 2.0393, + "step": 93660 + }, + { + "epoch": 158.24, + "learning_rate": 2.0198949589170513e-06, + "loss": 2.0418, + "step": 93680 + }, + { + "epoch": 158.28, + "learning_rate": 2.007286853100915e-06, + "loss": 2.0292, + "step": 93700 + }, + { + "epoch": 158.31, + "learning_rate": 1.9947174141469073e-06, + "loss": 2.0395, + "step": 93720 + }, + { + "epoch": 158.34, + "learning_rate": 1.9821866521819945e-06, + "loss": 2.0445, + "step": 93740 + }, + { + "epoch": 158.38, + "learning_rate": 1.969694577301995e-06, + "loss": 2.0421, + "step": 93760 + }, + { + "epoch": 158.41, + "learning_rate": 1.9572411995715356e-06, + "loss": 2.0373, + "step": 93780 + }, + { + "epoch": 158.45, + "learning_rate": 1.9448265290240964e-06, + "loss": 2.0443, + "step": 93800 + }, + { + "epoch": 158.48, + "learning_rate": 1.9324505756619314e-06, + "loss": 2.0406, + "step": 93820 + }, + { + "epoch": 158.51, + "learning_rate": 1.920113349456143e-06, + "loss": 2.0412, + "step": 93840 + }, + { + "epoch": 158.55, + "learning_rate": 1.907814860346596e-06, + "loss": 2.0393, + "step": 93860 + }, + { + "epoch": 158.58, + "learning_rate": 1.8955551182419706e-06, + "loss": 2.0345, + "step": 93880 + }, + { + "epoch": 158.61, + "learning_rate": 1.8833341330197097e-06, + "loss": 2.0256, + "step": 93900 + }, + { + "epoch": 158.65, + "learning_rate": 1.8711519145260537e-06, + "loss": 2.0332, + "step": 93920 + }, + { + "epoch": 158.68, + "learning_rate": 1.8590084725759849e-06, + "loss": 2.0308, + "step": 93940 + }, + { + "epoch": 158.72, + "learning_rate": 1.8469038169532603e-06, + "loss": 2.0421, + "step": 93960 + }, + { + "epoch": 158.75, + "learning_rate": 1.8348379574103791e-06, + "loss": 2.0384, + "step": 93980 + }, + { + "epoch": 158.78, + "learning_rate": 1.822810903668587e-06, + "loss": 2.0366, + "step": 94000 + }, + { + "epoch": 158.78, + "eval_loss": 2.461648464202881, + "eval_runtime": 47.3845, + "eval_samples_per_second": 20.872, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.0035718473484585298, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03096087466625037, + "eval_tse_type": 0.0006987710211795764, + "step": 94000 + }, + { + "epoch": 158.82, + "learning_rate": 1.8114211553132587e-06, + "loss": 2.0566, + "step": 94020 + }, + { + "epoch": 158.85, + "learning_rate": 1.799469800725917e-06, + "loss": 2.0375, + "step": 94040 + }, + { + "epoch": 158.89, + "learning_rate": 1.7875572804351482e-06, + "loss": 2.0472, + "step": 94060 + }, + { + "epoch": 158.92, + "learning_rate": 1.7756836040386637e-06, + "loss": 2.0366, + "step": 94080 + }, + { + "epoch": 158.95, + "learning_rate": 1.7638487811028615e-06, + "loss": 2.0499, + "step": 94100 + }, + { + "epoch": 158.99, + "learning_rate": 1.7520528211628416e-06, + "loss": 2.0325, + "step": 94120 + }, + { + "epoch": 159.02, + "learning_rate": 1.7402957337224024e-06, + "loss": 2.0452, + "step": 94140 + }, + { + "epoch": 159.05, + "learning_rate": 1.7285775282540217e-06, + "loss": 2.0372, + "step": 94160 + }, + { + "epoch": 159.09, + "learning_rate": 1.716898214198831e-06, + "loss": 2.0332, + "step": 94180 + }, + { + "epoch": 159.12, + "learning_rate": 1.7052578009666586e-06, + "loss": 2.0265, + "step": 94200 + }, + { + "epoch": 159.16, + "learning_rate": 1.6936562979359694e-06, + "loss": 2.0251, + "step": 94220 + }, + { + "epoch": 159.19, + "learning_rate": 1.6820937144538807e-06, + "loss": 2.0177, + "step": 94240 + }, + { + "epoch": 159.22, + "learning_rate": 1.6705700598361573e-06, + "loss": 2.0404, + "step": 94260 + }, + { + "epoch": 159.26, + "learning_rate": 1.659085343367206e-06, + "loss": 2.0293, + "step": 94280 + }, + { + "epoch": 159.29, + "learning_rate": 1.6476395743000528e-06, + "loss": 2.0256, + "step": 94300 + }, + { + "epoch": 159.32, + "learning_rate": 1.6362327618563489e-06, + "loss": 2.0374, + "step": 94320 + }, + { + "epoch": 159.36, + "learning_rate": 1.6248649152263429e-06, + "loss": 2.0277, + "step": 94340 + }, + { + "epoch": 159.39, + "learning_rate": 1.613536043568925e-06, + "loss": 2.0407, + "step": 94360 + }, + { + "epoch": 159.43, + "learning_rate": 1.6022461560115497e-06, + "loss": 2.0404, + "step": 94380 + }, + { + "epoch": 159.46, + "learning_rate": 1.5909952616502743e-06, + "loss": 2.0291, + "step": 94400 + }, + { + "epoch": 159.49, + "learning_rate": 1.5797833695497477e-06, + "loss": 2.0519, + "step": 94420 + }, + { + "epoch": 159.53, + "learning_rate": 1.5686104887431884e-06, + "loss": 2.0377, + "step": 94440 + }, + { + "epoch": 159.56, + "learning_rate": 1.557476628232385e-06, + "loss": 2.0296, + "step": 94460 + }, + { + "epoch": 159.59, + "learning_rate": 1.546381796987678e-06, + "loss": 2.0499, + "step": 94480 + }, + { + "epoch": 159.63, + "learning_rate": 1.5353260039479844e-06, + "loss": 2.0457, + "step": 94500 + }, + { + "epoch": 159.66, + "learning_rate": 1.5243092580207507e-06, + "loss": 2.0564, + "step": 94520 + }, + { + "epoch": 159.7, + "learning_rate": 1.5133315680819715e-06, + "loss": 2.0416, + "step": 94540 + }, + { + "epoch": 159.73, + "learning_rate": 1.5023929429761718e-06, + "loss": 2.0355, + "step": 94560 + }, + { + "epoch": 159.76, + "learning_rate": 1.4914933915164075e-06, + "loss": 2.0412, + "step": 94580 + }, + { + "epoch": 159.8, + "learning_rate": 1.480632922484254e-06, + "loss": 2.0411, + "step": 94600 + }, + { + "epoch": 159.83, + "learning_rate": 1.4698115446297845e-06, + "loss": 2.0417, + "step": 94620 + }, + { + "epoch": 159.86, + "learning_rate": 1.459029266671591e-06, + "loss": 2.0415, + "step": 94640 + }, + { + "epoch": 159.9, + "learning_rate": 1.4482860972967637e-06, + "loss": 2.0404, + "step": 94660 + }, + { + "epoch": 159.93, + "learning_rate": 1.4375820451608846e-06, + "loss": 2.0273, + "step": 94680 + }, + { + "epoch": 159.97, + "learning_rate": 1.4269171188879994e-06, + "loss": 2.0302, + "step": 94700 + }, + { + "epoch": 160.0, + "learning_rate": 1.4162913270706568e-06, + "loss": 2.0465, + "step": 94720 + }, + { + "epoch": 160.03, + "learning_rate": 1.4057046782698757e-06, + "loss": 2.0431, + "step": 94740 + }, + { + "epoch": 160.07, + "learning_rate": 1.3951571810151109e-06, + "loss": 2.0337, + "step": 94760 + }, + { + "epoch": 160.1, + "learning_rate": 1.3846488438042981e-06, + "loss": 2.0281, + "step": 94780 + }, + { + "epoch": 160.14, + "learning_rate": 1.3741796751038094e-06, + "loss": 2.0424, + "step": 94800 + }, + { + "epoch": 160.17, + "learning_rate": 1.3637496833484754e-06, + "loss": 2.0511, + "step": 94820 + }, + { + "epoch": 160.2, + "learning_rate": 1.3533588769415406e-06, + "loss": 2.0362, + "step": 94840 + }, + { + "epoch": 160.24, + "learning_rate": 1.3430072642546865e-06, + "loss": 2.0352, + "step": 94860 + }, + { + "epoch": 160.27, + "learning_rate": 1.3326948536280415e-06, + "loss": 2.031, + "step": 94880 + }, + { + "epoch": 160.3, + "learning_rate": 1.3224216533701095e-06, + "loss": 2.028, + "step": 94900 + }, + { + "epoch": 160.34, + "learning_rate": 1.3121876717578308e-06, + "loss": 2.0338, + "step": 94920 + }, + { + "epoch": 160.37, + "learning_rate": 1.3019929170365374e-06, + "loss": 2.0436, + "step": 94940 + }, + { + "epoch": 160.41, + "learning_rate": 1.291837397419965e-06, + "loss": 2.0394, + "step": 94960 + }, + { + "epoch": 160.44, + "learning_rate": 1.2817211210902181e-06, + "loss": 2.0197, + "step": 94980 + }, + { + "epoch": 160.47, + "learning_rate": 1.2716440961978105e-06, + "loss": 2.0417, + "step": 95000 + }, + { + "epoch": 160.47, + "eval_loss": 2.461150884628296, + "eval_runtime": 50.8979, + "eval_samples_per_second": 19.431, + "eval_steps_per_second": 0.118, + "eval_tse_ndup": 0.004373840688530554, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.030682119776855976, + "eval_tse_type": 0.0005216763445801307, + "step": 95000 + }, + { + "epoch": 160.51, + "learning_rate": 1.2616063308616144e-06, + "loss": 2.0446, + "step": 95020 + }, + { + "epoch": 160.54, + "learning_rate": 1.2516078331688718e-06, + "loss": 2.0292, + "step": 95040 + }, + { + "epoch": 160.57, + "learning_rate": 1.2416486111752057e-06, + "loss": 2.0138, + "step": 95060 + }, + { + "epoch": 160.61, + "learning_rate": 1.2317286729045586e-06, + "loss": 2.0353, + "step": 95080 + }, + { + "epoch": 160.64, + "learning_rate": 1.221848026349276e-06, + "loss": 2.0454, + "step": 95100 + }, + { + "epoch": 160.68, + "learning_rate": 1.2120066794699847e-06, + "loss": 2.0298, + "step": 95120 + }, + { + "epoch": 160.71, + "learning_rate": 1.2022046401957032e-06, + "loss": 2.0274, + "step": 95140 + }, + { + "epoch": 160.74, + "learning_rate": 1.1924419164237422e-06, + "loss": 2.049, + "step": 95160 + }, + { + "epoch": 160.78, + "learning_rate": 1.182718516019765e-06, + "loss": 2.0294, + "step": 95180 + }, + { + "epoch": 160.81, + "learning_rate": 1.1730344468177335e-06, + "loss": 2.0408, + "step": 95200 + }, + { + "epoch": 160.84, + "learning_rate": 1.1633897166199226e-06, + "loss": 2.0379, + "step": 95220 + }, + { + "epoch": 160.88, + "learning_rate": 1.1537843331969278e-06, + "loss": 2.0456, + "step": 95240 + }, + { + "epoch": 160.91, + "learning_rate": 1.1442183042876254e-06, + "loss": 2.0278, + "step": 95260 + }, + { + "epoch": 160.95, + "learning_rate": 1.1346916375991945e-06, + "loss": 2.0417, + "step": 95280 + }, + { + "epoch": 160.98, + "learning_rate": 1.12520434080709e-06, + "loss": 2.0421, + "step": 95300 + }, + { + "epoch": 161.01, + "learning_rate": 1.1157564215550698e-06, + "loss": 2.0323, + "step": 95320 + }, + { + "epoch": 161.05, + "learning_rate": 1.1063478874551503e-06, + "loss": 2.0476, + "step": 95340 + }, + { + "epoch": 161.08, + "learning_rate": 1.0969787460876012e-06, + "loss": 2.028, + "step": 95360 + }, + { + "epoch": 161.11, + "learning_rate": 1.0876490050009901e-06, + "loss": 2.0345, + "step": 95380 + }, + { + "epoch": 161.15, + "learning_rate": 1.0783586717121097e-06, + "loss": 2.046, + "step": 95400 + }, + { + "epoch": 161.18, + "learning_rate": 1.0691077537060224e-06, + "loss": 2.0359, + "step": 95420 + }, + { + "epoch": 161.22, + "learning_rate": 1.0598962584360051e-06, + "loss": 2.0275, + "step": 95440 + }, + { + "epoch": 161.25, + "learning_rate": 1.0507241933236157e-06, + "loss": 2.0295, + "step": 95460 + }, + { + "epoch": 161.28, + "learning_rate": 1.0415915657586206e-06, + "loss": 2.0444, + "step": 95480 + }, + { + "epoch": 161.32, + "learning_rate": 1.032498383099001e-06, + "loss": 2.0354, + "step": 95500 + }, + { + "epoch": 161.35, + "learning_rate": 1.023444652670974e-06, + "loss": 2.0188, + "step": 95520 + }, + { + "epoch": 161.39, + "learning_rate": 1.014430381768966e-06, + "loss": 2.0382, + "step": 95540 + }, + { + "epoch": 161.42, + "learning_rate": 1.0054555776556228e-06, + "loss": 2.0387, + "step": 95560 + }, + { + "epoch": 161.45, + "learning_rate": 9.965202475617664e-07, + "loss": 2.0456, + "step": 95580 + }, + { + "epoch": 161.49, + "learning_rate": 9.87624398686432e-07, + "loss": 2.0232, + "step": 95600 + }, + { + "epoch": 161.52, + "learning_rate": 9.787680381968589e-07, + "loss": 2.0324, + "step": 95620 + }, + { + "epoch": 161.55, + "learning_rate": 9.699511732284393e-07, + "loss": 2.0379, + "step": 95640 + }, + { + "epoch": 161.59, + "learning_rate": 9.611738108847745e-07, + "loss": 2.0311, + "step": 95660 + }, + { + "epoch": 161.62, + "learning_rate": 9.524359582376241e-07, + "loss": 2.0372, + "step": 95680 + }, + { + "epoch": 161.66, + "learning_rate": 9.437376223269178e-07, + "loss": 2.0318, + "step": 95700 + }, + { + "epoch": 161.69, + "learning_rate": 9.350788101607444e-07, + "loss": 2.0336, + "step": 95720 + }, + { + "epoch": 161.72, + "learning_rate": 9.264595287153511e-07, + "loss": 2.039, + "step": 95740 + }, + { + "epoch": 161.76, + "learning_rate": 9.178797849351494e-07, + "loss": 2.0431, + "step": 95760 + }, + { + "epoch": 161.79, + "learning_rate": 9.093395857326714e-07, + "loss": 2.0399, + "step": 95780 + }, + { + "epoch": 161.82, + "learning_rate": 9.008389379886073e-07, + "loss": 2.0331, + "step": 95800 + }, + { + "epoch": 161.86, + "learning_rate": 8.923778485517676e-07, + "loss": 2.0455, + "step": 95820 + }, + { + "epoch": 161.89, + "learning_rate": 8.839563242391158e-07, + "loss": 2.03, + "step": 95840 + }, + { + "epoch": 161.93, + "learning_rate": 8.755743718357023e-07, + "loss": 2.0234, + "step": 95860 + }, + { + "epoch": 161.96, + "learning_rate": 8.672319980947252e-07, + "loss": 2.0279, + "step": 95880 + }, + { + "epoch": 161.99, + "learning_rate": 8.589292097374857e-07, + "loss": 2.0426, + "step": 95900 + }, + { + "epoch": 162.03, + "learning_rate": 8.506660134533828e-07, + "loss": 2.0192, + "step": 95920 + }, + { + "epoch": 162.06, + "learning_rate": 8.424424158999355e-07, + "loss": 2.0289, + "step": 95940 + }, + { + "epoch": 162.09, + "learning_rate": 8.342584237027329e-07, + "loss": 2.0328, + "step": 95960 + }, + { + "epoch": 162.13, + "learning_rate": 8.261140434554892e-07, + "loss": 2.0473, + "step": 95980 + }, + { + "epoch": 162.16, + "learning_rate": 8.180092817199669e-07, + "loss": 2.0338, + "step": 96000 + }, + { + "epoch": 162.16, + "eval_loss": 2.460881233215332, + "eval_runtime": 47.5752, + "eval_samples_per_second": 20.788, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.0036474163620149, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03153410969713916, + "eval_tse_type": 0.0005627184100017492, + "step": 96000 + }, + { + "epoch": 162.2, + "learning_rate": 8.09944145026037e-07, + "loss": 2.0297, + "step": 96020 + }, + { + "epoch": 162.23, + "learning_rate": 8.019186398716239e-07, + "loss": 2.0275, + "step": 96040 + }, + { + "epoch": 162.26, + "learning_rate": 7.939327727227441e-07, + "loss": 2.0375, + "step": 96060 + }, + { + "epoch": 162.3, + "learning_rate": 7.859865500134622e-07, + "loss": 2.0264, + "step": 96080 + }, + { + "epoch": 162.33, + "learning_rate": 7.780799781459014e-07, + "loss": 2.0267, + "step": 96100 + }, + { + "epoch": 162.36, + "learning_rate": 7.702130634902493e-07, + "loss": 2.0366, + "step": 96120 + }, + { + "epoch": 162.4, + "learning_rate": 7.62385812384736e-07, + "loss": 2.026, + "step": 96140 + }, + { + "epoch": 162.43, + "learning_rate": 7.545982311356337e-07, + "loss": 2.0294, + "step": 96160 + }, + { + "epoch": 162.47, + "learning_rate": 7.468503260172566e-07, + "loss": 2.0351, + "step": 96180 + }, + { + "epoch": 162.5, + "learning_rate": 7.391421032719559e-07, + "loss": 2.0421, + "step": 96200 + }, + { + "epoch": 162.53, + "learning_rate": 7.314735691101082e-07, + "loss": 2.0266, + "step": 96220 + }, + { + "epoch": 162.57, + "learning_rate": 7.238447297101048e-07, + "loss": 2.0454, + "step": 96240 + }, + { + "epoch": 162.6, + "learning_rate": 7.162555912183677e-07, + "loss": 2.0501, + "step": 96260 + }, + { + "epoch": 162.64, + "learning_rate": 7.087061597493283e-07, + "loss": 2.0174, + "step": 96280 + }, + { + "epoch": 162.67, + "learning_rate": 7.011964413854377e-07, + "loss": 2.024, + "step": 96300 + }, + { + "epoch": 162.7, + "learning_rate": 6.937264421771228e-07, + "loss": 2.0307, + "step": 96320 + }, + { + "epoch": 162.74, + "learning_rate": 6.862961681428304e-07, + "loss": 2.043, + "step": 96340 + }, + { + "epoch": 162.77, + "learning_rate": 6.789056252690107e-07, + "loss": 2.033, + "step": 96360 + }, + { + "epoch": 162.8, + "learning_rate": 6.71554819510084e-07, + "loss": 2.0393, + "step": 96380 + }, + { + "epoch": 162.84, + "learning_rate": 6.642437567884574e-07, + "loss": 2.0439, + "step": 96400 + }, + { + "epoch": 162.87, + "learning_rate": 6.569724429945246e-07, + "loss": 2.0437, + "step": 96420 + }, + { + "epoch": 162.91, + "learning_rate": 6.497408839866548e-07, + "loss": 2.0499, + "step": 96440 + }, + { + "epoch": 162.94, + "learning_rate": 6.425490855911819e-07, + "loss": 2.0207, + "step": 96460 + }, + { + "epoch": 162.97, + "learning_rate": 6.353970536024045e-07, + "loss": 2.0393, + "step": 96480 + }, + { + "epoch": 163.01, + "learning_rate": 6.282847937825909e-07, + "loss": 2.0272, + "step": 96500 + }, + { + "epoch": 163.04, + "learning_rate": 6.212123118619628e-07, + "loss": 2.0307, + "step": 96520 + }, + { + "epoch": 163.07, + "learning_rate": 6.14179613538679e-07, + "loss": 2.0236, + "step": 96540 + }, + { + "epoch": 163.11, + "learning_rate": 6.071867044788737e-07, + "loss": 2.0165, + "step": 96560 + }, + { + "epoch": 163.14, + "learning_rate": 6.002335903165957e-07, + "loss": 2.0405, + "step": 96580 + }, + { + "epoch": 163.18, + "learning_rate": 5.93320276653847e-07, + "loss": 2.0313, + "step": 96600 + }, + { + "epoch": 163.21, + "learning_rate": 5.864467690605613e-07, + "loss": 2.0332, + "step": 96620 + }, + { + "epoch": 163.24, + "learning_rate": 5.796130730745975e-07, + "loss": 2.0295, + "step": 96640 + }, + { + "epoch": 163.28, + "learning_rate": 5.728191942017403e-07, + "loss": 2.0291, + "step": 96660 + }, + { + "epoch": 163.31, + "learning_rate": 5.660651379157e-07, + "loss": 2.0359, + "step": 96680 + }, + { + "epoch": 163.34, + "learning_rate": 5.593509096580851e-07, + "loss": 2.0324, + "step": 96700 + }, + { + "epoch": 163.38, + "learning_rate": 5.526765148384461e-07, + "loss": 2.0317, + "step": 96720 + }, + { + "epoch": 163.41, + "learning_rate": 5.460419588342092e-07, + "loss": 2.0299, + "step": 96740 + }, + { + "epoch": 163.45, + "learning_rate": 5.394472469907208e-07, + "loss": 2.0411, + "step": 96760 + }, + { + "epoch": 163.48, + "learning_rate": 5.328923846212197e-07, + "loss": 2.0363, + "step": 96780 + }, + { + "epoch": 163.51, + "learning_rate": 5.263773770068425e-07, + "loss": 2.0437, + "step": 96800 + }, + { + "epoch": 163.55, + "learning_rate": 5.199022293966127e-07, + "loss": 2.0333, + "step": 96820 + }, + { + "epoch": 163.58, + "learning_rate": 5.134669470074404e-07, + "loss": 2.0408, + "step": 96840 + }, + { + "epoch": 163.61, + "learning_rate": 5.070715350241117e-07, + "loss": 2.0397, + "step": 96860 + }, + { + "epoch": 163.65, + "learning_rate": 5.007159985992937e-07, + "loss": 2.0225, + "step": 96880 + }, + { + "epoch": 163.68, + "learning_rate": 4.944003428535348e-07, + "loss": 2.0372, + "step": 96900 + }, + { + "epoch": 163.72, + "learning_rate": 4.881245728752426e-07, + "loss": 2.0308, + "step": 96920 + }, + { + "epoch": 163.75, + "learning_rate": 4.81888693720689e-07, + "loss": 2.0294, + "step": 96940 + }, + { + "epoch": 163.78, + "learning_rate": 4.756927104140163e-07, + "loss": 2.0314, + "step": 96960 + }, + { + "epoch": 163.82, + "learning_rate": 4.6953662794720354e-07, + "loss": 2.0403, + "step": 96980 + }, + { + "epoch": 163.85, + "learning_rate": 4.634204512800999e-07, + "loss": 2.0377, + "step": 97000 + }, + { + "epoch": 163.85, + "eval_loss": 2.460573434829712, + "eval_runtime": 50.8928, + "eval_samples_per_second": 19.433, + "eval_steps_per_second": 0.118, + "eval_tse_ndup": 0.004185809636906782, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.031060657384459112, + "eval_tse_type": 0.0006161526794742163, + "step": 97000 + }, + { + "epoch": 163.89, + "learning_rate": 4.576470506819086e-07, + "loss": 2.0255, + "step": 97020 + }, + { + "epoch": 163.92, + "learning_rate": 4.5160870446823713e-07, + "loss": 2.0354, + "step": 97040 + }, + { + "epoch": 163.95, + "learning_rate": 4.456102784984817e-07, + "loss": 2.0318, + "step": 97060 + }, + { + "epoch": 163.99, + "learning_rate": 4.396517776054598e-07, + "loss": 2.047, + "step": 97080 + }, + { + "epoch": 164.02, + "learning_rate": 4.3373320658983694e-07, + "loss": 2.0353, + "step": 97100 + }, + { + "epoch": 164.05, + "learning_rate": 4.2785457022009867e-07, + "loss": 2.0469, + "step": 97120 + }, + { + "epoch": 164.09, + "learning_rate": 4.220158732325452e-07, + "loss": 2.0289, + "step": 97140 + }, + { + "epoch": 164.12, + "learning_rate": 4.162171203313192e-07, + "loss": 2.0297, + "step": 97160 + }, + { + "epoch": 164.16, + "learning_rate": 4.1045831618837236e-07, + "loss": 2.0404, + "step": 97180 + }, + { + "epoch": 164.19, + "learning_rate": 4.0473946544346e-07, + "loss": 2.0352, + "step": 97200 + }, + { + "epoch": 164.22, + "learning_rate": 3.9906057270416854e-07, + "loss": 2.0294, + "step": 97220 + }, + { + "epoch": 164.26, + "learning_rate": 3.9342164254587145e-07, + "loss": 2.025, + "step": 97240 + }, + { + "epoch": 164.29, + "learning_rate": 3.878226795117512e-07, + "loss": 2.0393, + "step": 97260 + }, + { + "epoch": 164.32, + "learning_rate": 3.8226368811279945e-07, + "loss": 2.0302, + "step": 97280 + }, + { + "epoch": 164.36, + "learning_rate": 3.76744672827789e-07, + "loss": 2.0421, + "step": 97300 + }, + { + "epoch": 164.39, + "learning_rate": 3.7126563810329087e-07, + "loss": 2.0435, + "step": 97320 + }, + { + "epoch": 164.43, + "learning_rate": 3.6582658835366267e-07, + "loss": 2.0435, + "step": 97340 + }, + { + "epoch": 164.46, + "learning_rate": 3.6042752796105473e-07, + "loss": 2.0413, + "step": 97360 + }, + { + "epoch": 164.49, + "learning_rate": 3.550684612753874e-07, + "loss": 2.0299, + "step": 97380 + }, + { + "epoch": 164.53, + "learning_rate": 3.49749392614368e-07, + "loss": 2.0322, + "step": 97400 + }, + { + "epoch": 164.56, + "learning_rate": 3.444703262634741e-07, + "loss": 2.0382, + "step": 97420 + }, + { + "epoch": 164.59, + "learning_rate": 3.3923126647594785e-07, + "loss": 2.025, + "step": 97440 + }, + { + "epoch": 164.63, + "learning_rate": 3.340322174728072e-07, + "loss": 2.0339, + "step": 97460 + }, + { + "epoch": 164.66, + "learning_rate": 3.288731834428405e-07, + "loss": 2.0242, + "step": 97480 + }, + { + "epoch": 164.7, + "learning_rate": 3.2375416854257823e-07, + "loss": 2.0252, + "step": 97500 + }, + { + "epoch": 164.73, + "learning_rate": 3.186751768963159e-07, + "loss": 2.0241, + "step": 97520 + }, + { + "epoch": 164.76, + "learning_rate": 3.13636212596119e-07, + "loss": 2.0321, + "step": 97540 + }, + { + "epoch": 164.8, + "learning_rate": 3.0863727970177894e-07, + "loss": 2.0245, + "step": 97560 + }, + { + "epoch": 164.83, + "learning_rate": 3.0367838224084623e-07, + "loss": 2.0428, + "step": 97580 + }, + { + "epoch": 164.86, + "learning_rate": 2.98759524208625e-07, + "loss": 2.0502, + "step": 97600 + }, + { + "epoch": 164.9, + "learning_rate": 2.9388070956813953e-07, + "loss": 2.0205, + "step": 97620 + }, + { + "epoch": 164.93, + "learning_rate": 2.8904194225016224e-07, + "loss": 2.0319, + "step": 97640 + }, + { + "epoch": 164.97, + "learning_rate": 2.84243226153208e-07, + "loss": 2.0298, + "step": 97660 + }, + { + "epoch": 165.0, + "learning_rate": 2.7948456514351184e-07, + "loss": 2.0271, + "step": 97680 + }, + { + "epoch": 165.03, + "learning_rate": 2.7476596305504034e-07, + "loss": 2.0305, + "step": 97700 + }, + { + "epoch": 165.07, + "learning_rate": 2.700874236894857e-07, + "loss": 2.0345, + "step": 97720 + }, + { + "epoch": 165.1, + "learning_rate": 2.654489508162661e-07, + "loss": 2.023, + "step": 97740 + }, + { + "epoch": 165.14, + "learning_rate": 2.608505481725143e-07, + "loss": 2.0366, + "step": 97760 + }, + { + "epoch": 165.17, + "learning_rate": 2.562922194630779e-07, + "loss": 2.0265, + "step": 97780 + }, + { + "epoch": 165.2, + "learning_rate": 2.517739683605191e-07, + "loss": 2.0379, + "step": 97800 + }, + { + "epoch": 165.24, + "learning_rate": 2.4729579850511497e-07, + "loss": 2.0206, + "step": 97820 + }, + { + "epoch": 165.27, + "learning_rate": 2.42857713504846e-07, + "loss": 2.0289, + "step": 97840 + }, + { + "epoch": 165.3, + "learning_rate": 2.3845971693539083e-07, + "loss": 2.028, + "step": 97860 + }, + { + "epoch": 165.34, + "learning_rate": 2.3410181234014283e-07, + "loss": 2.0354, + "step": 97880 + }, + { + "epoch": 165.37, + "learning_rate": 2.297840032301879e-07, + "loss": 2.0446, + "step": 97900 + }, + { + "epoch": 165.41, + "learning_rate": 2.2550629308429882e-07, + "loss": 2.0472, + "step": 97920 + }, + { + "epoch": 165.44, + "learning_rate": 2.212686853489576e-07, + "loss": 2.028, + "step": 97940 + }, + { + "epoch": 165.47, + "learning_rate": 2.170711834383221e-07, + "loss": 2.04, + "step": 97960 + }, + { + "epoch": 165.51, + "learning_rate": 2.1291379073424268e-07, + "loss": 2.0332, + "step": 97980 + }, + { + "epoch": 165.54, + "learning_rate": 2.087965105862677e-07, + "loss": 2.0401, + "step": 98000 + }, + { + "epoch": 165.54, + "eval_loss": 2.4602303504943848, + "eval_runtime": 47.3856, + "eval_samples_per_second": 20.871, + "eval_steps_per_second": 0.127, + "eval_tse_ndup": 0.0037392305241050337, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.03098927771360855, + "eval_tse_type": 0.000635462307605887, + "step": 98000 + }, + { + "epoch": 165.57, + "learning_rate": 2.0471934631160482e-07, + "loss": 2.0303, + "step": 98020 + }, + { + "epoch": 165.61, + "learning_rate": 2.0068230119514864e-07, + "loss": 2.0269, + "step": 98040 + }, + { + "epoch": 165.64, + "learning_rate": 1.9668537848948066e-07, + "loss": 2.0231, + "step": 98060 + }, + { + "epoch": 165.68, + "learning_rate": 1.9272858141485272e-07, + "loss": 2.0244, + "step": 98080 + }, + { + "epoch": 165.71, + "learning_rate": 1.888119131591759e-07, + "loss": 2.0409, + "step": 98100 + }, + { + "epoch": 165.74, + "learning_rate": 1.8493537687804263e-07, + "loss": 2.0392, + "step": 98120 + }, + { + "epoch": 165.78, + "learning_rate": 1.8109897569470457e-07, + "loss": 2.0344, + "step": 98140 + }, + { + "epoch": 165.81, + "learning_rate": 1.7730271270008925e-07, + "loss": 2.0337, + "step": 98160 + }, + { + "epoch": 165.84, + "learning_rate": 1.7354659095277226e-07, + "loss": 2.0382, + "step": 98180 + }, + { + "epoch": 165.88, + "learning_rate": 1.6983061347898843e-07, + "loss": 2.0311, + "step": 98200 + }, + { + "epoch": 165.91, + "learning_rate": 1.6615478327264845e-07, + "loss": 2.0391, + "step": 98220 + }, + { + "epoch": 165.95, + "learning_rate": 1.625191032952833e-07, + "loss": 2.0296, + "step": 98240 + }, + { + "epoch": 165.98, + "learning_rate": 1.5892357647610544e-07, + "loss": 2.0245, + "step": 98260 + }, + { + "epoch": 166.01, + "learning_rate": 1.553682057119643e-07, + "loss": 2.0344, + "step": 98280 + }, + { + "epoch": 166.05, + "learning_rate": 1.518529938673574e-07, + "loss": 2.031, + "step": 98300 + }, + { + "epoch": 166.08, + "learning_rate": 1.4837794377443038e-07, + "loss": 2.0323, + "step": 98320 + }, + { + "epoch": 166.11, + "learning_rate": 1.449430582329603e-07, + "loss": 2.0404, + "step": 98340 + }, + { + "epoch": 166.15, + "learning_rate": 1.4154834001038341e-07, + "loss": 2.0244, + "step": 98360 + }, + { + "epoch": 166.18, + "learning_rate": 1.381937918417564e-07, + "loss": 2.0396, + "step": 98380 + }, + { + "epoch": 166.22, + "learning_rate": 1.348794164297784e-07, + "loss": 2.0272, + "step": 98400 + }, + { + "epoch": 166.25, + "learning_rate": 1.3160521644478008e-07, + "loss": 2.0396, + "step": 98420 + }, + { + "epoch": 166.28, + "learning_rate": 1.2837119452472902e-07, + "loss": 2.0456, + "step": 98440 + }, + { + "epoch": 166.32, + "learning_rate": 1.2517735327521873e-07, + "loss": 2.0285, + "step": 98460 + }, + { + "epoch": 166.35, + "learning_rate": 1.220236952694631e-07, + "loss": 2.0264, + "step": 98480 + }, + { + "epoch": 166.39, + "learning_rate": 1.1891022304831856e-07, + "loss": 2.0328, + "step": 98500 + }, + { + "epoch": 166.42, + "learning_rate": 1.1583693912023963e-07, + "loss": 2.0394, + "step": 98520 + }, + { + "epoch": 166.45, + "learning_rate": 1.1280384596132898e-07, + "loss": 2.0352, + "step": 98540 + }, + { + "epoch": 166.49, + "learning_rate": 1.0981094601528186e-07, + "loss": 2.0252, + "step": 98560 + }, + { + "epoch": 166.52, + "learning_rate": 1.0685824169343606e-07, + "loss": 2.0312, + "step": 98580 + }, + { + "epoch": 166.55, + "learning_rate": 1.0394573537472196e-07, + "loss": 2.0217, + "step": 98600 + }, + { + "epoch": 166.59, + "learning_rate": 1.0107342940570141e-07, + "loss": 2.0388, + "step": 98620 + }, + { + "epoch": 166.62, + "learning_rate": 9.824132610053438e-08, + "loss": 2.0367, + "step": 98640 + }, + { + "epoch": 166.66, + "learning_rate": 9.544942774100119e-08, + "loss": 2.0282, + "step": 98660 + }, + { + "epoch": 166.69, + "learning_rate": 9.269773657647474e-08, + "loss": 2.0423, + "step": 98680 + }, + { + "epoch": 166.72, + "learning_rate": 8.998625482394829e-08, + "loss": 2.0335, + "step": 98700 + }, + { + "epoch": 166.76, + "learning_rate": 8.731498466801324e-08, + "loss": 2.0445, + "step": 98720 + }, + { + "epoch": 166.79, + "learning_rate": 8.468392826087024e-08, + "loss": 2.0335, + "step": 98740 + }, + { + "epoch": 166.82, + "learning_rate": 8.20930877223014e-08, + "loss": 2.0409, + "step": 98760 + }, + { + "epoch": 166.86, + "learning_rate": 7.954246513970365e-08, + "loss": 2.0339, + "step": 98780 + }, + { + "epoch": 166.89, + "learning_rate": 7.70320625680776e-08, + "loss": 2.0261, + "step": 98800 + }, + { + "epoch": 166.93, + "learning_rate": 7.456188202999426e-08, + "loss": 2.0179, + "step": 98820 + }, + { + "epoch": 166.96, + "learning_rate": 7.213192551564496e-08, + "loss": 2.0135, + "step": 98840 + }, + { + "epoch": 166.99, + "learning_rate": 6.9742194982797e-08, + "loss": 2.0312, + "step": 98860 + }, + { + "epoch": 167.03, + "learning_rate": 6.739269235681577e-08, + "loss": 2.0308, + "step": 98880 + }, + { + "epoch": 167.06, + "learning_rate": 6.508341953064822e-08, + "loss": 2.023, + "step": 98900 + }, + { + "epoch": 167.09, + "learning_rate": 6.281437836483384e-08, + "loss": 2.0342, + "step": 98920 + }, + { + "epoch": 167.13, + "learning_rate": 6.058557068750471e-08, + "loss": 2.0181, + "step": 98940 + }, + { + "epoch": 167.16, + "learning_rate": 5.839699829436884e-08, + "loss": 2.0376, + "step": 98960 + }, + { + "epoch": 167.2, + "learning_rate": 5.624866294871578e-08, + "loss": 2.0313, + "step": 98980 + }, + { + "epoch": 167.23, + "learning_rate": 5.414056638142206e-08, + "loss": 2.0296, + "step": 99000 + }, + { + "epoch": 167.23, + "eval_loss": 2.46012282371521, + "eval_runtime": 47.5343, + "eval_samples_per_second": 20.806, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.004190255604696678, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.030844263664246436, + "eval_tse_type": 0.0005863105269070891, + "step": 99000 + }, + { + "epoch": 167.26, + "learning_rate": 5.207271029094574e-08, + "loss": 2.0366, + "step": 99020 + }, + { + "epoch": 167.3, + "learning_rate": 5.004509634332078e-08, + "loss": 2.0318, + "step": 99040 + }, + { + "epoch": 167.33, + "learning_rate": 4.805772617215154e-08, + "loss": 2.0288, + "step": 99060 + }, + { + "epoch": 167.36, + "learning_rate": 4.611060137863499e-08, + "loss": 2.0477, + "step": 99080 + }, + { + "epoch": 167.4, + "learning_rate": 4.4203723531532905e-08, + "loss": 2.0369, + "step": 99100 + }, + { + "epoch": 167.43, + "learning_rate": 4.233709416717746e-08, + "loss": 2.0284, + "step": 99120 + }, + { + "epoch": 167.47, + "learning_rate": 4.051071478948232e-08, + "loss": 2.0401, + "step": 99140 + }, + { + "epoch": 167.5, + "learning_rate": 3.872458686992597e-08, + "loss": 2.0314, + "step": 99160 + }, + { + "epoch": 167.53, + "learning_rate": 3.697871184756285e-08, + "loss": 2.0251, + "step": 99180 + }, + { + "epoch": 167.57, + "learning_rate": 3.5273091129012224e-08, + "loss": 2.0314, + "step": 99200 + }, + { + "epoch": 167.6, + "learning_rate": 3.360772608845819e-08, + "loss": 2.0299, + "step": 99220 + }, + { + "epoch": 167.64, + "learning_rate": 3.1982618067660787e-08, + "loss": 2.0309, + "step": 99240 + }, + { + "epoch": 167.67, + "learning_rate": 3.0397768375939325e-08, + "loss": 2.0262, + "step": 99260 + }, + { + "epoch": 167.7, + "learning_rate": 2.8853178290172422e-08, + "loss": 2.0367, + "step": 99280 + }, + { + "epoch": 167.74, + "learning_rate": 2.7348849054809057e-08, + "loss": 2.0461, + "step": 99300 + }, + { + "epoch": 167.77, + "learning_rate": 2.5884781881868603e-08, + "loss": 2.0393, + "step": 99320 + }, + { + "epoch": 167.8, + "learning_rate": 2.446097795091862e-08, + "loss": 2.0259, + "step": 99340 + }, + { + "epoch": 167.84, + "learning_rate": 2.307743840909149e-08, + "loss": 2.0369, + "step": 99360 + }, + { + "epoch": 167.87, + "learning_rate": 2.1734164371084443e-08, + "loss": 2.0265, + "step": 99380 + }, + { + "epoch": 167.91, + "learning_rate": 2.0431156919137328e-08, + "loss": 2.041, + "step": 99400 + }, + { + "epoch": 167.94, + "learning_rate": 1.916841710307149e-08, + "loss": 2.033, + "step": 99420 + }, + { + "epoch": 167.97, + "learning_rate": 1.7945945940250898e-08, + "loss": 2.0241, + "step": 99440 + }, + { + "epoch": 168.01, + "learning_rate": 1.6763744415598805e-08, + "loss": 2.0241, + "step": 99460 + }, + { + "epoch": 168.04, + "learning_rate": 1.5621813481586646e-08, + "loss": 2.0468, + "step": 99480 + }, + { + "epoch": 168.07, + "learning_rate": 1.4520154058256241e-08, + "loss": 2.0104, + "step": 99500 + }, + { + "epoch": 168.11, + "learning_rate": 1.3458767033192043e-08, + "loss": 2.026, + "step": 99520 + }, + { + "epoch": 168.14, + "learning_rate": 1.2437653261537785e-08, + "loss": 2.0396, + "step": 99540 + }, + { + "epoch": 168.18, + "learning_rate": 1.1456813565974278e-08, + "loss": 2.037, + "step": 99560 + }, + { + "epoch": 168.21, + "learning_rate": 1.0516248736763823e-08, + "loss": 2.0301, + "step": 99580 + }, + { + "epoch": 168.24, + "learning_rate": 9.615959531683594e-09, + "loss": 2.036, + "step": 99600 + }, + { + "epoch": 168.28, + "learning_rate": 8.755946676097804e-09, + "loss": 2.0351, + "step": 99620 + }, + { + "epoch": 168.31, + "learning_rate": 7.936210862891091e-09, + "loss": 2.0232, + "step": 99640 + }, + { + "epoch": 168.34, + "learning_rate": 7.156752752518481e-09, + "loss": 2.037, + "step": 99660 + }, + { + "epoch": 168.38, + "learning_rate": 6.417572972966524e-09, + "loss": 2.0342, + "step": 99680 + }, + { + "epoch": 168.41, + "learning_rate": 5.718672119786606e-09, + "loss": 2.0436, + "step": 99700 + }, + { + "epoch": 168.45, + "learning_rate": 5.060050756067192e-09, + "loss": 2.0383, + "step": 99720 + }, + { + "epoch": 168.48, + "learning_rate": 4.4417094124449275e-09, + "loss": 2.0547, + "step": 99740 + }, + { + "epoch": 168.51, + "learning_rate": 3.863648587110191e-09, + "loss": 2.0211, + "step": 99760 + }, + { + "epoch": 168.55, + "learning_rate": 3.3258687457959904e-09, + "loss": 2.0316, + "step": 99780 + }, + { + "epoch": 168.58, + "learning_rate": 2.8283703217835135e-09, + "loss": 2.0264, + "step": 99800 + }, + { + "epoch": 168.61, + "learning_rate": 2.371153715891028e-09, + "loss": 2.0253, + "step": 99820 + }, + { + "epoch": 168.65, + "learning_rate": 1.954219296496085e-09, + "loss": 2.0317, + "step": 99840 + }, + { + "epoch": 168.68, + "learning_rate": 1.5775673995133134e-09, + "loss": 2.0227, + "step": 99860 + }, + { + "epoch": 168.72, + "learning_rate": 1.241198328405524e-09, + "loss": 2.0267, + "step": 99880 + }, + { + "epoch": 168.75, + "learning_rate": 9.45112354178157e-10, + "loss": 2.0422, + "step": 99900 + }, + { + "epoch": 168.78, + "learning_rate": 6.893097153848338e-10, + "loss": 2.0353, + "step": 99920 + }, + { + "epoch": 168.82, + "learning_rate": 4.737906181162544e-10, + "loss": 2.0309, + "step": 99940 + }, + { + "epoch": 168.85, + "learning_rate": 2.985552360168509e-10, + "loss": 2.017, + "step": 99960 + }, + { + "epoch": 168.89, + "learning_rate": 1.6360371026258315e-10, + "loss": 2.0289, + "step": 99980 + }, + { + "epoch": 168.92, + "learning_rate": 6.893614959424533e-11, + "loss": 2.0345, + "step": 100000 + }, + { + "epoch": 168.92, + "eval_loss": 2.46016526222229, + "eval_runtime": 47.7026, + "eval_samples_per_second": 20.733, + "eval_steps_per_second": 0.126, + "eval_tse_ndup": 0.003877539938720142, + "eval_tse_nnof": 0.0, + "eval_tse_nnon": 0.0, + "eval_tse_time": 0.030767411587534408, + "eval_tse_type": 0.000635462307605887, + "step": 100000 + }, + { + "epoch": 168.92, + "step": 100000, + "total_flos": 1.1144698192283566e+18, + "train_loss": 3.096821060371399, + "train_runtime": 63835.4946, + "train_samples_per_second": 200.515, + "train_steps_per_second": 1.567 + } + ], + "max_steps": 100000, + "num_train_epochs": 169, + "total_flos": 1.1144698192283566e+18, + "trial_name": null, + "trial_params": null +}