{ "best_metric": 2.46012282371521, "best_model_checkpoint": "runs/gen_Maestro_REMI/REMI_bpe20000/checkpoint-99000", "epoch": 168.9189189189189, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 6.666666666666667e-08, "loss": 9.9965, "step": 20 }, { "epoch": 0.07, "learning_rate": 1.3333333333333334e-07, "loss": 9.9861, "step": 40 }, { "epoch": 0.1, "learning_rate": 2.0000000000000002e-07, "loss": 9.9647, "step": 60 }, { "epoch": 0.14, "learning_rate": 2.6666666666666667e-07, "loss": 9.9301, "step": 80 }, { "epoch": 0.17, "learning_rate": 3.3333333333333335e-07, "loss": 9.8868, "step": 100 }, { "epoch": 0.2, "learning_rate": 4.0000000000000003e-07, "loss": 9.8358, "step": 120 }, { "epoch": 0.24, "learning_rate": 4.666666666666667e-07, "loss": 9.7888, "step": 140 }, { "epoch": 0.27, "learning_rate": 5.333333333333333e-07, "loss": 9.7479, "step": 160 }, { "epoch": 0.3, "learning_rate": 6.000000000000001e-07, "loss": 9.7129, "step": 180 }, { "epoch": 0.34, "learning_rate": 6.666666666666667e-07, "loss": 9.6937, "step": 200 }, { "epoch": 0.37, "learning_rate": 7.333333333333333e-07, "loss": 9.6719, "step": 220 }, { "epoch": 0.41, "learning_rate": 8.000000000000001e-07, "loss": 9.6522, "step": 240 }, { "epoch": 0.44, "learning_rate": 8.666666666666667e-07, "loss": 9.6366, "step": 260 }, { "epoch": 0.47, "learning_rate": 9.333333333333334e-07, "loss": 9.624, "step": 280 }, { "epoch": 0.51, "learning_rate": 1.0000000000000002e-06, "loss": 9.6086, "step": 300 }, { "epoch": 0.54, "learning_rate": 1.0666666666666667e-06, "loss": 9.5939, "step": 320 }, { "epoch": 0.57, "learning_rate": 1.1333333333333334e-06, "loss": 9.5796, "step": 340 }, { "epoch": 0.61, "learning_rate": 1.2000000000000002e-06, "loss": 9.5681, "step": 360 }, { "epoch": 0.64, "learning_rate": 1.2666666666666667e-06, "loss": 9.5595, "step": 380 }, { "epoch": 0.68, "learning_rate": 1.3333333333333334e-06, "loss": 9.5465, "step": 400 }, { "epoch": 0.71, "learning_rate": 1.4000000000000001e-06, "loss": 9.5305, "step": 420 }, { "epoch": 0.74, "learning_rate": 1.4666666666666667e-06, "loss": 9.5193, "step": 440 }, { "epoch": 0.78, "learning_rate": 1.5333333333333334e-06, "loss": 9.5049, "step": 460 }, { "epoch": 0.81, "learning_rate": 1.6000000000000001e-06, "loss": 9.4885, "step": 480 }, { "epoch": 0.84, "learning_rate": 1.6666666666666667e-06, "loss": 9.4719, "step": 500 }, { "epoch": 0.88, "learning_rate": 1.7333333333333334e-06, "loss": 9.4612, "step": 520 }, { "epoch": 0.91, "learning_rate": 1.8e-06, "loss": 9.4444, "step": 540 }, { "epoch": 0.95, "learning_rate": 1.8666666666666669e-06, "loss": 9.4235, "step": 560 }, { "epoch": 0.98, "learning_rate": 1.9333333333333336e-06, "loss": 9.4066, "step": 580 }, { "epoch": 1.01, "learning_rate": 2.0000000000000003e-06, "loss": 9.3919, "step": 600 }, { "epoch": 1.05, "learning_rate": 2.0666666666666666e-06, "loss": 9.3731, "step": 620 }, { "epoch": 1.08, "learning_rate": 2.1333333333333334e-06, "loss": 9.3541, "step": 640 }, { "epoch": 1.11, "learning_rate": 2.2e-06, "loss": 9.336, "step": 660 }, { "epoch": 1.15, "learning_rate": 2.266666666666667e-06, "loss": 9.3156, "step": 680 }, { "epoch": 1.18, "learning_rate": 2.3333333333333336e-06, "loss": 9.2945, "step": 700 }, { "epoch": 1.22, "learning_rate": 2.4000000000000003e-06, "loss": 9.2704, "step": 720 }, { "epoch": 1.25, "learning_rate": 2.4666666666666666e-06, "loss": 9.2556, "step": 740 }, { "epoch": 1.28, "learning_rate": 2.5333333333333334e-06, "loss": 9.2401, "step": 760 }, { "epoch": 1.32, "learning_rate": 2.6e-06, "loss": 9.2208, "step": 780 }, { "epoch": 1.35, "learning_rate": 2.666666666666667e-06, "loss": 9.1983, "step": 800 }, { "epoch": 1.39, "learning_rate": 2.7333333333333336e-06, "loss": 9.1781, "step": 820 }, { "epoch": 1.42, "learning_rate": 2.8000000000000003e-06, "loss": 9.1574, "step": 840 }, { "epoch": 1.45, "learning_rate": 2.8666666666666666e-06, "loss": 9.133, "step": 860 }, { "epoch": 1.49, "learning_rate": 2.9333333333333333e-06, "loss": 9.1153, "step": 880 }, { "epoch": 1.52, "learning_rate": 3e-06, "loss": 9.0907, "step": 900 }, { "epoch": 1.55, "learning_rate": 3.066666666666667e-06, "loss": 9.0703, "step": 920 }, { "epoch": 1.59, "learning_rate": 3.133333333333333e-06, "loss": 9.0484, "step": 940 }, { "epoch": 1.62, "learning_rate": 3.2000000000000003e-06, "loss": 9.0259, "step": 960 }, { "epoch": 1.66, "learning_rate": 3.2666666666666666e-06, "loss": 9.0088, "step": 980 }, { "epoch": 1.69, "learning_rate": 3.3333333333333333e-06, "loss": 8.9866, "step": 1000 }, { "epoch": 1.69, "eval_loss": 8.95605182647705, "eval_runtime": 46.5795, "eval_samples_per_second": 21.233, "eval_steps_per_second": 0.129, "eval_tse_ndup": 0.0, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.0006509407349596912, "eval_tse_type": 0.3327419239609195, "step": 1000 }, { "epoch": 1.72, "learning_rate": 3.4000000000000005e-06, "loss": 8.9656, "step": 1020 }, { "epoch": 1.76, "learning_rate": 3.466666666666667e-06, "loss": 8.946, "step": 1040 }, { "epoch": 1.79, "learning_rate": 3.5333333333333335e-06, "loss": 8.9318, "step": 1060 }, { "epoch": 1.82, "learning_rate": 3.6e-06, "loss": 8.903, "step": 1080 }, { "epoch": 1.86, "learning_rate": 3.666666666666667e-06, "loss": 8.8817, "step": 1100 }, { "epoch": 1.89, "learning_rate": 3.7333333333333337e-06, "loss": 8.8683, "step": 1120 }, { "epoch": 1.93, "learning_rate": 3.8e-06, "loss": 8.8524, "step": 1140 }, { "epoch": 1.96, "learning_rate": 3.866666666666667e-06, "loss": 8.833, "step": 1160 }, { "epoch": 1.99, "learning_rate": 3.9333333333333335e-06, "loss": 8.8211, "step": 1180 }, { "epoch": 2.03, "learning_rate": 4.000000000000001e-06, "loss": 8.7941, "step": 1200 }, { "epoch": 2.06, "learning_rate": 4.066666666666666e-06, "loss": 8.7745, "step": 1220 }, { "epoch": 2.09, "learning_rate": 4.133333333333333e-06, "loss": 8.761, "step": 1240 }, { "epoch": 2.13, "learning_rate": 4.2000000000000004e-06, "loss": 8.7479, "step": 1260 }, { "epoch": 2.16, "learning_rate": 4.266666666666667e-06, "loss": 8.7301, "step": 1280 }, { "epoch": 2.2, "learning_rate": 4.333333333333334e-06, "loss": 8.7187, "step": 1300 }, { "epoch": 2.23, "learning_rate": 4.4e-06, "loss": 8.7004, "step": 1320 }, { "epoch": 2.26, "learning_rate": 4.4666666666666665e-06, "loss": 8.683, "step": 1340 }, { "epoch": 2.3, "learning_rate": 4.533333333333334e-06, "loss": 8.6711, "step": 1360 }, { "epoch": 2.33, "learning_rate": 4.6e-06, "loss": 8.6576, "step": 1380 }, { "epoch": 2.36, "learning_rate": 4.666666666666667e-06, "loss": 8.6394, "step": 1400 }, { "epoch": 2.4, "learning_rate": 4.7333333333333335e-06, "loss": 8.6242, "step": 1420 }, { "epoch": 2.43, "learning_rate": 4.800000000000001e-06, "loss": 8.6158, "step": 1440 }, { "epoch": 2.47, "learning_rate": 4.866666666666667e-06, "loss": 8.5992, "step": 1460 }, { "epoch": 2.5, "learning_rate": 4.933333333333333e-06, "loss": 8.5956, "step": 1480 }, { "epoch": 2.53, "learning_rate": 5e-06, "loss": 8.5753, "step": 1500 }, { "epoch": 2.57, "learning_rate": 5.066666666666667e-06, "loss": 8.5659, "step": 1520 }, { "epoch": 2.6, "learning_rate": 5.133333333333334e-06, "loss": 8.5598, "step": 1540 }, { "epoch": 2.64, "learning_rate": 5.2e-06, "loss": 8.546, "step": 1560 }, { "epoch": 2.67, "learning_rate": 5.266666666666667e-06, "loss": 8.5332, "step": 1580 }, { "epoch": 2.7, "learning_rate": 5.333333333333334e-06, "loss": 8.5348, "step": 1600 }, { "epoch": 2.74, "learning_rate": 5.4e-06, "loss": 8.5243, "step": 1620 }, { "epoch": 2.77, "learning_rate": 5.466666666666667e-06, "loss": 8.5171, "step": 1640 }, { "epoch": 2.8, "learning_rate": 5.5333333333333334e-06, "loss": 8.5057, "step": 1660 }, { "epoch": 2.84, "learning_rate": 5.600000000000001e-06, "loss": 8.5059, "step": 1680 }, { "epoch": 2.87, "learning_rate": 5.666666666666667e-06, "loss": 8.501, "step": 1700 }, { "epoch": 2.91, "learning_rate": 5.733333333333333e-06, "loss": 8.4816, "step": 1720 }, { "epoch": 2.94, "learning_rate": 5.8e-06, "loss": 8.4922, "step": 1740 }, { "epoch": 2.97, "learning_rate": 5.866666666666667e-06, "loss": 8.4783, "step": 1760 }, { "epoch": 3.01, "learning_rate": 5.933333333333334e-06, "loss": 8.4751, "step": 1780 }, { "epoch": 3.04, "learning_rate": 6e-06, "loss": 8.4588, "step": 1800 }, { "epoch": 3.07, "learning_rate": 6.066666666666667e-06, "loss": 8.4623, "step": 1820 }, { "epoch": 3.11, "learning_rate": 6.133333333333334e-06, "loss": 8.4415, "step": 1840 }, { "epoch": 3.14, "learning_rate": 6.2e-06, "loss": 8.443, "step": 1860 }, { "epoch": 3.18, "learning_rate": 6.266666666666666e-06, "loss": 8.4319, "step": 1880 }, { "epoch": 3.21, "learning_rate": 6.333333333333334e-06, "loss": 8.4269, "step": 1900 }, { "epoch": 3.24, "learning_rate": 6.4000000000000006e-06, "loss": 8.4127, "step": 1920 }, { "epoch": 3.28, "learning_rate": 6.466666666666667e-06, "loss": 8.4157, "step": 1940 }, { "epoch": 3.31, "learning_rate": 6.533333333333333e-06, "loss": 8.4084, "step": 1960 }, { "epoch": 3.34, "learning_rate": 6.6e-06, "loss": 8.4011, "step": 1980 }, { "epoch": 3.38, "learning_rate": 6.666666666666667e-06, "loss": 8.3922, "step": 2000 }, { "epoch": 3.38, "eval_loss": 8.35992431640625, "eval_runtime": 46.4907, "eval_samples_per_second": 21.273, "eval_steps_per_second": 0.129, "eval_tse_ndup": 0.0, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.0005799355217422038, "eval_tse_type": 0.3327419239609195, "step": 2000 }, { "epoch": 3.41, "learning_rate": 6.733333333333333e-06, "loss": 8.379, "step": 2020 }, { "epoch": 3.45, "learning_rate": 6.800000000000001e-06, "loss": 8.3882, "step": 2040 }, { "epoch": 3.48, "learning_rate": 6.866666666666667e-06, "loss": 8.3824, "step": 2060 }, { "epoch": 3.51, "learning_rate": 6.933333333333334e-06, "loss": 8.3645, "step": 2080 }, { "epoch": 3.55, "learning_rate": 7.000000000000001e-06, "loss": 8.3618, "step": 2100 }, { "epoch": 3.58, "learning_rate": 7.066666666666667e-06, "loss": 8.3556, "step": 2120 }, { "epoch": 3.61, "learning_rate": 7.133333333333333e-06, "loss": 8.349, "step": 2140 }, { "epoch": 3.65, "learning_rate": 7.2e-06, "loss": 8.349, "step": 2160 }, { "epoch": 3.68, "learning_rate": 7.266666666666668e-06, "loss": 8.3382, "step": 2180 }, { "epoch": 3.72, "learning_rate": 7.333333333333334e-06, "loss": 8.3239, "step": 2200 }, { "epoch": 3.75, "learning_rate": 7.4e-06, "loss": 8.3113, "step": 2220 }, { "epoch": 3.78, "learning_rate": 7.4666666666666675e-06, "loss": 8.3119, "step": 2240 }, { "epoch": 3.82, "learning_rate": 7.533333333333334e-06, "loss": 8.2856, "step": 2260 }, { "epoch": 3.85, "learning_rate": 7.6e-06, "loss": 8.2844, "step": 2280 }, { "epoch": 3.89, "learning_rate": 7.666666666666667e-06, "loss": 8.2633, "step": 2300 }, { "epoch": 3.92, "learning_rate": 7.733333333333334e-06, "loss": 8.2571, "step": 2320 }, { "epoch": 3.95, "learning_rate": 7.8e-06, "loss": 8.2318, "step": 2340 }, { "epoch": 3.99, "learning_rate": 7.866666666666667e-06, "loss": 8.2176, "step": 2360 }, { "epoch": 4.02, "learning_rate": 7.933333333333334e-06, "loss": 8.221, "step": 2380 }, { "epoch": 4.05, "learning_rate": 8.000000000000001e-06, "loss": 8.2058, "step": 2400 }, { "epoch": 4.09, "learning_rate": 8.066666666666667e-06, "loss": 8.2073, "step": 2420 }, { "epoch": 4.12, "learning_rate": 8.133333333333332e-06, "loss": 8.1965, "step": 2440 }, { "epoch": 4.16, "learning_rate": 8.200000000000001e-06, "loss": 8.1608, "step": 2460 }, { "epoch": 4.19, "learning_rate": 8.266666666666667e-06, "loss": 8.1546, "step": 2480 }, { "epoch": 4.22, "learning_rate": 8.333333333333334e-06, "loss": 8.1279, "step": 2500 }, { "epoch": 4.26, "learning_rate": 8.400000000000001e-06, "loss": 8.1274, "step": 2520 }, { "epoch": 4.29, "learning_rate": 8.466666666666666e-06, "loss": 8.1081, "step": 2540 }, { "epoch": 4.32, "learning_rate": 8.533333333333334e-06, "loss": 8.0982, "step": 2560 }, { "epoch": 4.36, "learning_rate": 8.599999999999999e-06, "loss": 8.0698, "step": 2580 }, { "epoch": 4.39, "learning_rate": 8.666666666666668e-06, "loss": 8.0593, "step": 2600 }, { "epoch": 4.43, "learning_rate": 8.733333333333333e-06, "loss": 8.0368, "step": 2620 }, { "epoch": 4.46, "learning_rate": 8.8e-06, "loss": 8.0272, "step": 2640 }, { "epoch": 4.49, "learning_rate": 8.866666666666668e-06, "loss": 8.0123, "step": 2660 }, { "epoch": 4.53, "learning_rate": 8.933333333333333e-06, "loss": 7.9895, "step": 2680 }, { "epoch": 4.56, "learning_rate": 9e-06, "loss": 7.9688, "step": 2700 }, { "epoch": 4.59, "learning_rate": 9.066666666666667e-06, "loss": 7.956, "step": 2720 }, { "epoch": 4.63, "learning_rate": 9.133333333333335e-06, "loss": 7.9605, "step": 2740 }, { "epoch": 4.66, "learning_rate": 9.2e-06, "loss": 7.933, "step": 2760 }, { "epoch": 4.7, "learning_rate": 9.266666666666667e-06, "loss": 7.9012, "step": 2780 }, { "epoch": 4.73, "learning_rate": 9.333333333333334e-06, "loss": 7.8915, "step": 2800 }, { "epoch": 4.76, "learning_rate": 9.4e-06, "loss": 7.8632, "step": 2820 }, { "epoch": 4.8, "learning_rate": 9.466666666666667e-06, "loss": 7.8485, "step": 2840 }, { "epoch": 4.83, "learning_rate": 9.533333333333334e-06, "loss": 7.8243, "step": 2860 }, { "epoch": 4.86, "learning_rate": 9.600000000000001e-06, "loss": 7.8058, "step": 2880 }, { "epoch": 4.9, "learning_rate": 9.666666666666667e-06, "loss": 7.7773, "step": 2900 }, { "epoch": 4.93, "learning_rate": 9.733333333333334e-06, "loss": 7.7615, "step": 2920 }, { "epoch": 4.97, "learning_rate": 9.800000000000001e-06, "loss": 7.7201, "step": 2940 }, { "epoch": 5.0, "learning_rate": 9.866666666666667e-06, "loss": 7.6944, "step": 2960 }, { "epoch": 5.03, "learning_rate": 9.933333333333334e-06, "loss": 7.681, "step": 2980 }, { "epoch": 5.07, "learning_rate": 1e-05, "loss": 7.6484, "step": 3000 }, { "epoch": 5.07, "eval_loss": 7.5473527908325195, "eval_runtime": 46.812, "eval_samples_per_second": 21.127, "eval_steps_per_second": 0.128, "eval_tse_ndup": 0.019046315020784185, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.08615238659379869, "eval_tse_type": 0.0334121503263036, "step": 3000 }, { "epoch": 5.1, "learning_rate": 1.0066666666666668e-05, "loss": 7.6008, "step": 3020 }, { "epoch": 5.14, "learning_rate": 1.0133333333333333e-05, "loss": 7.5998, "step": 3040 }, { "epoch": 5.17, "learning_rate": 1.02e-05, "loss": 7.5668, "step": 3060 }, { "epoch": 5.2, "learning_rate": 1.0266666666666668e-05, "loss": 7.5497, "step": 3080 }, { "epoch": 5.24, "learning_rate": 1.0333333333333333e-05, "loss": 7.5271, "step": 3100 }, { "epoch": 5.27, "learning_rate": 1.04e-05, "loss": 7.4934, "step": 3120 }, { "epoch": 5.3, "learning_rate": 1.0466666666666668e-05, "loss": 7.464, "step": 3140 }, { "epoch": 5.34, "learning_rate": 1.0533333333333335e-05, "loss": 7.4618, "step": 3160 }, { "epoch": 5.37, "learning_rate": 1.06e-05, "loss": 7.4438, "step": 3180 }, { "epoch": 5.41, "learning_rate": 1.0666666666666667e-05, "loss": 7.3952, "step": 3200 }, { "epoch": 5.44, "learning_rate": 1.0733333333333334e-05, "loss": 7.3956, "step": 3220 }, { "epoch": 5.47, "learning_rate": 1.08e-05, "loss": 7.3867, "step": 3240 }, { "epoch": 5.51, "learning_rate": 1.0866666666666667e-05, "loss": 7.3624, "step": 3260 }, { "epoch": 5.54, "learning_rate": 1.0933333333333334e-05, "loss": 7.3379, "step": 3280 }, { "epoch": 5.57, "learning_rate": 1.1000000000000001e-05, "loss": 7.2998, "step": 3300 }, { "epoch": 5.61, "learning_rate": 1.1066666666666667e-05, "loss": 7.2783, "step": 3320 }, { "epoch": 5.64, "learning_rate": 1.1133333333333334e-05, "loss": 7.2713, "step": 3340 }, { "epoch": 5.68, "learning_rate": 1.1200000000000001e-05, "loss": 7.2402, "step": 3360 }, { "epoch": 5.71, "learning_rate": 1.1266666666666667e-05, "loss": 7.2445, "step": 3380 }, { "epoch": 5.74, "learning_rate": 1.1333333333333334e-05, "loss": 7.2294, "step": 3400 }, { "epoch": 5.78, "learning_rate": 1.1400000000000001e-05, "loss": 7.2023, "step": 3420 }, { "epoch": 5.81, "learning_rate": 1.1466666666666666e-05, "loss": 7.1985, "step": 3440 }, { "epoch": 5.84, "learning_rate": 1.1533333333333334e-05, "loss": 7.1602, "step": 3460 }, { "epoch": 5.88, "learning_rate": 1.16e-05, "loss": 7.1214, "step": 3480 }, { "epoch": 5.91, "learning_rate": 1.1666666666666668e-05, "loss": 7.1281, "step": 3500 }, { "epoch": 5.95, "learning_rate": 1.1733333333333333e-05, "loss": 7.0915, "step": 3520 }, { "epoch": 5.98, "learning_rate": 1.18e-05, "loss": 7.0716, "step": 3540 }, { "epoch": 6.01, "learning_rate": 1.1866666666666668e-05, "loss": 7.0603, "step": 3560 }, { "epoch": 6.05, "learning_rate": 1.1933333333333333e-05, "loss": 7.0538, "step": 3580 }, { "epoch": 6.08, "learning_rate": 1.2e-05, "loss": 7.0202, "step": 3600 }, { "epoch": 6.11, "learning_rate": 1.2066666666666667e-05, "loss": 7.0241, "step": 3620 }, { "epoch": 6.15, "learning_rate": 1.2133333333333335e-05, "loss": 7.0084, "step": 3640 }, { "epoch": 6.18, "learning_rate": 1.22e-05, "loss": 7.0037, "step": 3660 }, { "epoch": 6.22, "learning_rate": 1.2266666666666667e-05, "loss": 6.978, "step": 3680 }, { "epoch": 6.25, "learning_rate": 1.2333333333333334e-05, "loss": 6.9479, "step": 3700 }, { "epoch": 6.28, "learning_rate": 1.24e-05, "loss": 6.9353, "step": 3720 }, { "epoch": 6.32, "learning_rate": 1.2466666666666667e-05, "loss": 6.9249, "step": 3740 }, { "epoch": 6.35, "learning_rate": 1.2533333333333332e-05, "loss": 6.9284, "step": 3760 }, { "epoch": 6.39, "learning_rate": 1.2600000000000001e-05, "loss": 6.8827, "step": 3780 }, { "epoch": 6.42, "learning_rate": 1.2666666666666668e-05, "loss": 6.9025, "step": 3800 }, { "epoch": 6.45, "learning_rate": 1.2733333333333334e-05, "loss": 6.8757, "step": 3820 }, { "epoch": 6.49, "learning_rate": 1.2800000000000001e-05, "loss": 6.8597, "step": 3840 }, { "epoch": 6.52, "learning_rate": 1.2866666666666668e-05, "loss": 6.837, "step": 3860 }, { "epoch": 6.55, "learning_rate": 1.2933333333333334e-05, "loss": 6.8198, "step": 3880 }, { "epoch": 6.59, "learning_rate": 1.3000000000000001e-05, "loss": 6.8343, "step": 3900 }, { "epoch": 6.62, "learning_rate": 1.3066666666666666e-05, "loss": 6.78, "step": 3920 }, { "epoch": 6.66, "learning_rate": 1.3133333333333334e-05, "loss": 6.777, "step": 3940 }, { "epoch": 6.69, "learning_rate": 1.32e-05, "loss": 6.7962, "step": 3960 }, { "epoch": 6.72, "learning_rate": 1.3266666666666666e-05, "loss": 6.7663, "step": 3980 }, { "epoch": 6.76, "learning_rate": 1.3333333333333333e-05, "loss": 6.7458, "step": 4000 }, { "epoch": 6.76, "eval_loss": 6.617990493774414, "eval_runtime": 46.7629, "eval_samples_per_second": 21.149, "eval_steps_per_second": 0.128, "eval_tse_ndup": 0.03450995908266761, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.04389155620161396, "eval_tse_type": 0.0030251448519484775, "step": 4000 }, { "epoch": 6.79, "learning_rate": 1.3400000000000002e-05, "loss": 6.7378, "step": 4020 }, { "epoch": 6.82, "learning_rate": 1.3466666666666666e-05, "loss": 6.73, "step": 4040 }, { "epoch": 6.86, "learning_rate": 1.3533333333333335e-05, "loss": 6.7169, "step": 4060 }, { "epoch": 6.89, "learning_rate": 1.3600000000000002e-05, "loss": 6.711, "step": 4080 }, { "epoch": 6.93, "learning_rate": 1.3666666666666666e-05, "loss": 6.681, "step": 4100 }, { "epoch": 6.96, "learning_rate": 1.3733333333333335e-05, "loss": 6.6731, "step": 4120 }, { "epoch": 6.99, "learning_rate": 1.3800000000000002e-05, "loss": 6.6799, "step": 4140 }, { "epoch": 7.03, "learning_rate": 1.3866666666666667e-05, "loss": 6.6362, "step": 4160 }, { "epoch": 7.06, "learning_rate": 1.3933333333333334e-05, "loss": 6.6247, "step": 4180 }, { "epoch": 7.09, "learning_rate": 1.4000000000000001e-05, "loss": 6.6309, "step": 4200 }, { "epoch": 7.13, "learning_rate": 1.4066666666666667e-05, "loss": 6.6312, "step": 4220 }, { "epoch": 7.16, "learning_rate": 1.4133333333333334e-05, "loss": 6.6072, "step": 4240 }, { "epoch": 7.2, "learning_rate": 1.42e-05, "loss": 6.5955, "step": 4260 }, { "epoch": 7.23, "learning_rate": 1.4266666666666667e-05, "loss": 6.5989, "step": 4280 }, { "epoch": 7.26, "learning_rate": 1.4333333333333334e-05, "loss": 6.6025, "step": 4300 }, { "epoch": 7.3, "learning_rate": 1.44e-05, "loss": 6.5728, "step": 4320 }, { "epoch": 7.33, "learning_rate": 1.4466666666666667e-05, "loss": 6.578, "step": 4340 }, { "epoch": 7.36, "learning_rate": 1.4533333333333335e-05, "loss": 6.5628, "step": 4360 }, { "epoch": 7.4, "learning_rate": 1.4599999999999999e-05, "loss": 6.5257, "step": 4380 }, { "epoch": 7.43, "learning_rate": 1.4666666666666668e-05, "loss": 6.5414, "step": 4400 }, { "epoch": 7.47, "learning_rate": 1.4733333333333335e-05, "loss": 6.5253, "step": 4420 }, { "epoch": 7.5, "learning_rate": 1.48e-05, "loss": 6.5293, "step": 4440 }, { "epoch": 7.53, "learning_rate": 1.4866666666666668e-05, "loss": 6.4852, "step": 4460 }, { "epoch": 7.57, "learning_rate": 1.4933333333333335e-05, "loss": 6.488, "step": 4480 }, { "epoch": 7.6, "learning_rate": 1.5e-05, "loss": 6.5069, "step": 4500 }, { "epoch": 7.64, "learning_rate": 1.5066666666666668e-05, "loss": 6.498, "step": 4520 }, { "epoch": 7.67, "learning_rate": 1.5133333333333333e-05, "loss": 6.4785, "step": 4540 }, { "epoch": 7.7, "learning_rate": 1.52e-05, "loss": 6.4584, "step": 4560 }, { "epoch": 7.74, "learning_rate": 1.5266666666666667e-05, "loss": 6.4683, "step": 4580 }, { "epoch": 7.77, "learning_rate": 1.5333333333333334e-05, "loss": 6.4477, "step": 4600 }, { "epoch": 7.8, "learning_rate": 1.54e-05, "loss": 6.4477, "step": 4620 }, { "epoch": 7.84, "learning_rate": 1.546666666666667e-05, "loss": 6.4117, "step": 4640 }, { "epoch": 7.87, "learning_rate": 1.5533333333333333e-05, "loss": 6.4222, "step": 4660 }, { "epoch": 7.91, "learning_rate": 1.56e-05, "loss": 6.4158, "step": 4680 }, { "epoch": 7.94, "learning_rate": 1.5666666666666667e-05, "loss": 6.417, "step": 4700 }, { "epoch": 7.97, "learning_rate": 1.5733333333333334e-05, "loss": 6.3885, "step": 4720 }, { "epoch": 8.01, "learning_rate": 1.58e-05, "loss": 6.3959, "step": 4740 }, { "epoch": 8.04, "learning_rate": 1.586666666666667e-05, "loss": 6.3683, "step": 4760 }, { "epoch": 8.07, "learning_rate": 1.5933333333333332e-05, "loss": 6.3691, "step": 4780 }, { "epoch": 8.11, "learning_rate": 1.6000000000000003e-05, "loss": 6.3567, "step": 4800 }, { "epoch": 8.14, "learning_rate": 1.606666666666667e-05, "loss": 6.3445, "step": 4820 }, { "epoch": 8.18, "learning_rate": 1.6133333333333334e-05, "loss": 6.3392, "step": 4840 }, { "epoch": 8.21, "learning_rate": 1.62e-05, "loss": 6.3109, "step": 4860 }, { "epoch": 8.24, "learning_rate": 1.6266666666666665e-05, "loss": 6.3362, "step": 4880 }, { "epoch": 8.28, "learning_rate": 1.6333333333333335e-05, "loss": 6.3446, "step": 4900 }, { "epoch": 8.31, "learning_rate": 1.6400000000000002e-05, "loss": 6.3071, "step": 4920 }, { "epoch": 8.34, "learning_rate": 1.6466666666666666e-05, "loss": 6.3113, "step": 4940 }, { "epoch": 8.38, "learning_rate": 1.6533333333333333e-05, "loss": 6.279, "step": 4960 }, { "epoch": 8.41, "learning_rate": 1.66e-05, "loss": 6.2803, "step": 4980 }, { "epoch": 8.45, "learning_rate": 1.6666666666666667e-05, "loss": 6.2993, "step": 5000 }, { "epoch": 8.45, "eval_loss": 6.150936603546143, "eval_runtime": 46.9103, "eval_samples_per_second": 21.083, "eval_steps_per_second": 0.128, "eval_tse_ndup": 0.02838463484700525, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.030864985103219583, "eval_tse_type": 0.0033177451971688576, "step": 5000 }, { "epoch": 8.48, "learning_rate": 1.6733333333333335e-05, "loss": 6.2719, "step": 5020 }, { "epoch": 8.51, "learning_rate": 1.6800000000000002e-05, "loss": 6.2829, "step": 5040 }, { "epoch": 8.55, "learning_rate": 1.6866666666666666e-05, "loss": 6.2579, "step": 5060 }, { "epoch": 8.58, "learning_rate": 1.6933333333333333e-05, "loss": 6.2497, "step": 5080 }, { "epoch": 8.61, "learning_rate": 1.7000000000000003e-05, "loss": 6.2598, "step": 5100 }, { "epoch": 8.65, "learning_rate": 1.7066666666666667e-05, "loss": 6.2333, "step": 5120 }, { "epoch": 8.68, "learning_rate": 1.7133333333333334e-05, "loss": 6.2455, "step": 5140 }, { "epoch": 8.72, "learning_rate": 1.7199999999999998e-05, "loss": 6.2239, "step": 5160 }, { "epoch": 8.75, "learning_rate": 1.726666666666667e-05, "loss": 6.237, "step": 5180 }, { "epoch": 8.78, "learning_rate": 1.7333333333333336e-05, "loss": 6.2274, "step": 5200 }, { "epoch": 8.82, "learning_rate": 1.74e-05, "loss": 6.2016, "step": 5220 }, { "epoch": 8.85, "learning_rate": 1.7466666666666667e-05, "loss": 6.2061, "step": 5240 }, { "epoch": 8.89, "learning_rate": 1.7533333333333334e-05, "loss": 6.1996, "step": 5260 }, { "epoch": 8.92, "learning_rate": 1.76e-05, "loss": 6.1854, "step": 5280 }, { "epoch": 8.95, "learning_rate": 1.7666666666666668e-05, "loss": 6.1892, "step": 5300 }, { "epoch": 8.99, "learning_rate": 1.7733333333333335e-05, "loss": 6.1803, "step": 5320 }, { "epoch": 9.02, "learning_rate": 1.78e-05, "loss": 6.1609, "step": 5340 }, { "epoch": 9.05, "learning_rate": 1.7866666666666666e-05, "loss": 6.1776, "step": 5360 }, { "epoch": 9.09, "learning_rate": 1.7933333333333337e-05, "loss": 6.133, "step": 5380 }, { "epoch": 9.12, "learning_rate": 1.8e-05, "loss": 6.1621, "step": 5400 }, { "epoch": 9.16, "learning_rate": 1.8066666666666668e-05, "loss": 6.1211, "step": 5420 }, { "epoch": 9.19, "learning_rate": 1.8133333333333335e-05, "loss": 6.1205, "step": 5440 }, { "epoch": 9.22, "learning_rate": 1.8200000000000002e-05, "loss": 6.1046, "step": 5460 }, { "epoch": 9.26, "learning_rate": 1.826666666666667e-05, "loss": 6.1094, "step": 5480 }, { "epoch": 9.29, "learning_rate": 1.8333333333333333e-05, "loss": 6.115, "step": 5500 }, { "epoch": 9.32, "learning_rate": 1.84e-05, "loss": 6.0756, "step": 5520 }, { "epoch": 9.36, "learning_rate": 1.8466666666666667e-05, "loss": 6.0964, "step": 5540 }, { "epoch": 9.39, "learning_rate": 1.8533333333333334e-05, "loss": 6.0684, "step": 5560 }, { "epoch": 9.43, "learning_rate": 1.86e-05, "loss": 6.0754, "step": 5580 }, { "epoch": 9.46, "learning_rate": 1.866666666666667e-05, "loss": 6.0827, "step": 5600 }, { "epoch": 9.49, "learning_rate": 1.8733333333333332e-05, "loss": 6.0703, "step": 5620 }, { "epoch": 9.53, "learning_rate": 1.88e-05, "loss": 6.0633, "step": 5640 }, { "epoch": 9.56, "learning_rate": 1.886666666666667e-05, "loss": 6.0579, "step": 5660 }, { "epoch": 9.59, "learning_rate": 1.8933333333333334e-05, "loss": 6.0445, "step": 5680 }, { "epoch": 9.63, "learning_rate": 1.9e-05, "loss": 6.0616, "step": 5700 }, { "epoch": 9.66, "learning_rate": 1.9066666666666668e-05, "loss": 6.0371, "step": 5720 }, { "epoch": 9.7, "learning_rate": 1.9133333333333332e-05, "loss": 6.028, "step": 5740 }, { "epoch": 9.73, "learning_rate": 1.9200000000000003e-05, "loss": 6.0276, "step": 5760 }, { "epoch": 9.76, "learning_rate": 1.926666666666667e-05, "loss": 6.0154, "step": 5780 }, { "epoch": 9.8, "learning_rate": 1.9333333333333333e-05, "loss": 5.989, "step": 5800 }, { "epoch": 9.83, "learning_rate": 1.94e-05, "loss": 6.0047, "step": 5820 }, { "epoch": 9.86, "learning_rate": 1.9466666666666668e-05, "loss": 6.0087, "step": 5840 }, { "epoch": 9.9, "learning_rate": 1.9533333333333335e-05, "loss": 5.984, "step": 5860 }, { "epoch": 9.93, "learning_rate": 1.9600000000000002e-05, "loss": 5.9697, "step": 5880 }, { "epoch": 9.97, "learning_rate": 1.9666666666666666e-05, "loss": 5.989, "step": 5900 }, { "epoch": 10.0, "learning_rate": 1.9733333333333333e-05, "loss": 5.9804, "step": 5920 }, { "epoch": 10.03, "learning_rate": 1.9800000000000004e-05, "loss": 5.9616, "step": 5940 }, { "epoch": 10.07, "learning_rate": 1.9866666666666667e-05, "loss": 5.952, "step": 5960 }, { "epoch": 10.1, "learning_rate": 1.9933333333333334e-05, "loss": 5.9399, "step": 5980 }, { "epoch": 10.14, "learning_rate": 2e-05, "loss": 5.9317, "step": 6000 }, { "epoch": 10.14, "eval_loss": 5.784234523773193, "eval_runtime": 49.1403, "eval_samples_per_second": 20.126, "eval_steps_per_second": 0.122, "eval_tse_ndup": 0.011724633008987968, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.023856265453044822, "eval_tse_type": 0.001799306257723851, "step": 6000 }, { "epoch": 10.17, "learning_rate": 2.0066666666666665e-05, "loss": 5.928, "step": 6020 }, { "epoch": 10.2, "learning_rate": 2.0133333333333336e-05, "loss": 5.9352, "step": 6040 }, { "epoch": 10.24, "learning_rate": 2.0200000000000003e-05, "loss": 5.9184, "step": 6060 }, { "epoch": 10.27, "learning_rate": 2.0266666666666667e-05, "loss": 5.9234, "step": 6080 }, { "epoch": 10.3, "learning_rate": 2.0333333333333334e-05, "loss": 5.8903, "step": 6100 }, { "epoch": 10.34, "learning_rate": 2.04e-05, "loss": 5.9154, "step": 6120 }, { "epoch": 10.37, "learning_rate": 2.046666666666667e-05, "loss": 5.8967, "step": 6140 }, { "epoch": 10.41, "learning_rate": 2.0533333333333336e-05, "loss": 5.9033, "step": 6160 }, { "epoch": 10.44, "learning_rate": 2.06e-05, "loss": 5.8873, "step": 6180 }, { "epoch": 10.47, "learning_rate": 2.0663333333333336e-05, "loss": 5.8548, "step": 6200 }, { "epoch": 10.51, "learning_rate": 2.0730000000000003e-05, "loss": 5.8844, "step": 6220 }, { "epoch": 10.54, "learning_rate": 2.0796666666666667e-05, "loss": 5.8576, "step": 6240 }, { "epoch": 10.57, "learning_rate": 2.0863333333333334e-05, "loss": 5.8652, "step": 6260 }, { "epoch": 10.61, "learning_rate": 2.093e-05, "loss": 5.86, "step": 6280 }, { "epoch": 10.64, "learning_rate": 2.099666666666667e-05, "loss": 5.8448, "step": 6300 }, { "epoch": 10.68, "learning_rate": 2.1063333333333336e-05, "loss": 5.8298, "step": 6320 }, { "epoch": 10.71, "learning_rate": 2.113e-05, "loss": 5.8472, "step": 6340 }, { "epoch": 10.74, "learning_rate": 2.1196666666666666e-05, "loss": 5.8317, "step": 6360 }, { "epoch": 10.78, "learning_rate": 2.1263333333333334e-05, "loss": 5.818, "step": 6380 }, { "epoch": 10.81, "learning_rate": 2.133e-05, "loss": 5.8043, "step": 6400 }, { "epoch": 10.84, "learning_rate": 2.1396666666666668e-05, "loss": 5.8164, "step": 6420 }, { "epoch": 10.88, "learning_rate": 2.1463333333333335e-05, "loss": 5.8146, "step": 6440 }, { "epoch": 10.91, "learning_rate": 2.153e-05, "loss": 5.7853, "step": 6460 }, { "epoch": 10.95, "learning_rate": 2.159666666666667e-05, "loss": 5.7954, "step": 6480 }, { "epoch": 10.98, "learning_rate": 2.1663333333333337e-05, "loss": 5.7965, "step": 6500 }, { "epoch": 11.01, "learning_rate": 2.173e-05, "loss": 5.7747, "step": 6520 }, { "epoch": 11.05, "learning_rate": 2.1796666666666667e-05, "loss": 5.7595, "step": 6540 }, { "epoch": 11.08, "learning_rate": 2.1863333333333335e-05, "loss": 5.7593, "step": 6560 }, { "epoch": 11.11, "learning_rate": 2.1930000000000002e-05, "loss": 5.754, "step": 6580 }, { "epoch": 11.15, "learning_rate": 2.199666666666667e-05, "loss": 5.7569, "step": 6600 }, { "epoch": 11.18, "learning_rate": 2.2063333333333333e-05, "loss": 5.7343, "step": 6620 }, { "epoch": 11.22, "learning_rate": 2.213e-05, "loss": 5.7316, "step": 6640 }, { "epoch": 11.25, "learning_rate": 2.2196666666666667e-05, "loss": 5.7496, "step": 6660 }, { "epoch": 11.28, "learning_rate": 2.2263333333333334e-05, "loss": 5.7162, "step": 6680 }, { "epoch": 11.32, "learning_rate": 2.233e-05, "loss": 5.7307, "step": 6700 }, { "epoch": 11.35, "learning_rate": 2.239666666666667e-05, "loss": 5.7071, "step": 6720 }, { "epoch": 11.39, "learning_rate": 2.2463333333333332e-05, "loss": 5.7166, "step": 6740 }, { "epoch": 11.42, "learning_rate": 2.253e-05, "loss": 5.711, "step": 6760 }, { "epoch": 11.45, "learning_rate": 2.259666666666667e-05, "loss": 5.6948, "step": 6780 }, { "epoch": 11.49, "learning_rate": 2.2663333333333334e-05, "loss": 5.6979, "step": 6800 }, { "epoch": 11.52, "learning_rate": 2.273e-05, "loss": 5.6902, "step": 6820 }, { "epoch": 11.55, "learning_rate": 2.2796666666666668e-05, "loss": 5.6826, "step": 6840 }, { "epoch": 11.59, "learning_rate": 2.2863333333333335e-05, "loss": 5.6601, "step": 6860 }, { "epoch": 11.62, "learning_rate": 2.2930000000000002e-05, "loss": 5.66, "step": 6880 }, { "epoch": 11.66, "learning_rate": 2.299666666666667e-05, "loss": 5.6611, "step": 6900 }, { "epoch": 11.69, "learning_rate": 2.3063333333333333e-05, "loss": 5.6563, "step": 6920 }, { "epoch": 11.72, "learning_rate": 2.313e-05, "loss": 5.6494, "step": 6940 }, { "epoch": 11.76, "learning_rate": 2.3196666666666668e-05, "loss": 5.6565, "step": 6960 }, { "epoch": 11.79, "learning_rate": 2.3263333333333335e-05, "loss": 5.641, "step": 6980 }, { "epoch": 11.82, "learning_rate": 2.3330000000000002e-05, "loss": 5.6336, "step": 7000 }, { "epoch": 11.82, "eval_loss": 5.458117961883545, "eval_runtime": 50.0555, "eval_samples_per_second": 19.758, "eval_steps_per_second": 0.12, "eval_tse_ndup": 0.017345052495841798, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.017512275737317114, "eval_tse_type": 0.00018080833614200653, "step": 7000 }, { "epoch": 11.86, "learning_rate": 2.3396666666666666e-05, "loss": 5.6352, "step": 7020 }, { "epoch": 11.89, "learning_rate": 2.3463333333333333e-05, "loss": 5.6328, "step": 7040 }, { "epoch": 11.93, "learning_rate": 2.3530000000000003e-05, "loss": 5.6168, "step": 7060 }, { "epoch": 11.96, "learning_rate": 2.3596666666666667e-05, "loss": 5.6141, "step": 7080 }, { "epoch": 11.99, "learning_rate": 2.3663333333333334e-05, "loss": 5.6081, "step": 7100 }, { "epoch": 12.03, "learning_rate": 2.373e-05, "loss": 5.5965, "step": 7120 }, { "epoch": 12.06, "learning_rate": 2.379666666666667e-05, "loss": 5.6029, "step": 7140 }, { "epoch": 12.09, "learning_rate": 2.3863333333333336e-05, "loss": 5.6032, "step": 7160 }, { "epoch": 12.13, "learning_rate": 2.3930000000000003e-05, "loss": 5.5922, "step": 7180 }, { "epoch": 12.16, "learning_rate": 2.3996666666666667e-05, "loss": 5.5751, "step": 7200 }, { "epoch": 12.2, "learning_rate": 2.4063333333333334e-05, "loss": 5.5766, "step": 7220 }, { "epoch": 12.23, "learning_rate": 2.413e-05, "loss": 5.5592, "step": 7240 }, { "epoch": 12.26, "learning_rate": 2.4196666666666668e-05, "loss": 5.5591, "step": 7260 }, { "epoch": 12.3, "learning_rate": 2.4263333333333335e-05, "loss": 5.55, "step": 7280 }, { "epoch": 12.33, "learning_rate": 2.433e-05, "loss": 5.5541, "step": 7300 }, { "epoch": 12.36, "learning_rate": 2.4396666666666666e-05, "loss": 5.5417, "step": 7320 }, { "epoch": 12.4, "learning_rate": 2.4463333333333337e-05, "loss": 5.5455, "step": 7340 }, { "epoch": 12.43, "learning_rate": 2.453e-05, "loss": 5.5344, "step": 7360 }, { "epoch": 12.47, "learning_rate": 2.4596666666666668e-05, "loss": 5.5202, "step": 7380 }, { "epoch": 12.5, "learning_rate": 2.4663333333333335e-05, "loss": 5.5374, "step": 7400 }, { "epoch": 12.53, "learning_rate": 2.473e-05, "loss": 5.5242, "step": 7420 }, { "epoch": 12.57, "learning_rate": 2.479666666666667e-05, "loss": 5.505, "step": 7440 }, { "epoch": 12.6, "learning_rate": 2.4863333333333336e-05, "loss": 5.5153, "step": 7460 }, { "epoch": 12.64, "learning_rate": 2.493e-05, "loss": 5.4931, "step": 7480 }, { "epoch": 12.67, "learning_rate": 2.4996666666666667e-05, "loss": 5.4959, "step": 7500 }, { "epoch": 12.7, "learning_rate": 2.5063333333333334e-05, "loss": 5.4914, "step": 7520 }, { "epoch": 12.74, "learning_rate": 2.5130000000000005e-05, "loss": 5.4984, "step": 7540 }, { "epoch": 12.77, "learning_rate": 2.519666666666667e-05, "loss": 5.4908, "step": 7560 }, { "epoch": 12.8, "learning_rate": 2.5263333333333333e-05, "loss": 5.4752, "step": 7580 }, { "epoch": 12.84, "learning_rate": 2.5330000000000003e-05, "loss": 5.4862, "step": 7600 }, { "epoch": 12.87, "learning_rate": 2.539666666666667e-05, "loss": 5.4718, "step": 7620 }, { "epoch": 12.91, "learning_rate": 2.5463333333333334e-05, "loss": 5.4496, "step": 7640 }, { "epoch": 12.94, "learning_rate": 2.5530000000000005e-05, "loss": 5.4577, "step": 7660 }, { "epoch": 12.97, "learning_rate": 2.559666666666667e-05, "loss": 5.463, "step": 7680 }, { "epoch": 13.01, "learning_rate": 2.5663333333333332e-05, "loss": 5.4542, "step": 7700 }, { "epoch": 13.04, "learning_rate": 2.573e-05, "loss": 5.4298, "step": 7720 }, { "epoch": 13.07, "learning_rate": 2.579666666666667e-05, "loss": 5.4478, "step": 7740 }, { "epoch": 13.11, "learning_rate": 2.5863333333333334e-05, "loss": 5.4353, "step": 7760 }, { "epoch": 13.14, "learning_rate": 2.5929999999999997e-05, "loss": 5.4353, "step": 7780 }, { "epoch": 13.18, "learning_rate": 2.5996666666666668e-05, "loss": 5.4234, "step": 7800 }, { "epoch": 13.21, "learning_rate": 2.6063333333333335e-05, "loss": 5.4327, "step": 7820 }, { "epoch": 13.24, "learning_rate": 2.613e-05, "loss": 5.4052, "step": 7840 }, { "epoch": 13.28, "learning_rate": 2.619666666666667e-05, "loss": 5.4052, "step": 7860 }, { "epoch": 13.31, "learning_rate": 2.6263333333333333e-05, "loss": 5.409, "step": 7880 }, { "epoch": 13.34, "learning_rate": 2.633e-05, "loss": 5.4085, "step": 7900 }, { "epoch": 13.38, "learning_rate": 2.639666666666667e-05, "loss": 5.4105, "step": 7920 }, { "epoch": 13.41, "learning_rate": 2.6463333333333335e-05, "loss": 5.3895, "step": 7940 }, { "epoch": 13.45, "learning_rate": 2.653e-05, "loss": 5.404, "step": 7960 }, { "epoch": 13.48, "learning_rate": 2.659666666666667e-05, "loss": 5.3775, "step": 7980 }, { "epoch": 13.51, "learning_rate": 2.6663333333333336e-05, "loss": 5.3844, "step": 8000 }, { "epoch": 13.51, "eval_loss": 5.198601245880127, "eval_runtime": 47.5753, "eval_samples_per_second": 20.788, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.019161429918440446, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.013836201150024673, "eval_tse_type": 0.00011936861026850915, "step": 8000 }, { "epoch": 13.55, "learning_rate": 2.673e-05, "loss": 5.3828, "step": 8020 }, { "epoch": 13.58, "learning_rate": 2.679666666666667e-05, "loss": 5.3827, "step": 8040 }, { "epoch": 13.61, "learning_rate": 2.6863333333333334e-05, "loss": 5.3618, "step": 8060 }, { "epoch": 13.65, "learning_rate": 2.693e-05, "loss": 5.3752, "step": 8080 }, { "epoch": 13.68, "learning_rate": 2.699666666666667e-05, "loss": 5.3743, "step": 8100 }, { "epoch": 13.72, "learning_rate": 2.7063333333333336e-05, "loss": 5.3703, "step": 8120 }, { "epoch": 13.75, "learning_rate": 2.713e-05, "loss": 5.3624, "step": 8140 }, { "epoch": 13.78, "learning_rate": 2.719666666666667e-05, "loss": 5.3535, "step": 8160 }, { "epoch": 13.82, "learning_rate": 2.7263333333333334e-05, "loss": 5.3541, "step": 8180 }, { "epoch": 13.85, "learning_rate": 2.733e-05, "loss": 5.3412, "step": 8200 }, { "epoch": 13.89, "learning_rate": 2.739666666666667e-05, "loss": 5.3284, "step": 8220 }, { "epoch": 13.92, "learning_rate": 2.7463333333333335e-05, "loss": 5.3353, "step": 8240 }, { "epoch": 13.95, "learning_rate": 2.753e-05, "loss": 5.3302, "step": 8260 }, { "epoch": 13.99, "learning_rate": 2.759666666666667e-05, "loss": 5.3334, "step": 8280 }, { "epoch": 14.02, "learning_rate": 2.7663333333333337e-05, "loss": 5.3116, "step": 8300 }, { "epoch": 14.05, "learning_rate": 2.773e-05, "loss": 5.3125, "step": 8320 }, { "epoch": 14.09, "learning_rate": 2.7796666666666664e-05, "loss": 5.3136, "step": 8340 }, { "epoch": 14.12, "learning_rate": 2.7860000000000004e-05, "loss": 5.2997, "step": 8360 }, { "epoch": 14.16, "learning_rate": 2.7926666666666668e-05, "loss": 5.3007, "step": 8380 }, { "epoch": 14.19, "learning_rate": 2.7993333333333332e-05, "loss": 5.2872, "step": 8400 }, { "epoch": 14.22, "learning_rate": 2.8060000000000002e-05, "loss": 5.2895, "step": 8420 }, { "epoch": 14.26, "learning_rate": 2.8126666666666666e-05, "loss": 5.2961, "step": 8440 }, { "epoch": 14.29, "learning_rate": 2.8193333333333333e-05, "loss": 5.2973, "step": 8460 }, { "epoch": 14.32, "learning_rate": 2.8260000000000004e-05, "loss": 5.2681, "step": 8480 }, { "epoch": 14.36, "learning_rate": 2.8326666666666668e-05, "loss": 5.2751, "step": 8500 }, { "epoch": 14.39, "learning_rate": 2.839333333333333e-05, "loss": 5.2667, "step": 8520 }, { "epoch": 14.43, "learning_rate": 2.8460000000000002e-05, "loss": 5.2647, "step": 8540 }, { "epoch": 14.46, "learning_rate": 2.852666666666667e-05, "loss": 5.269, "step": 8560 }, { "epoch": 14.49, "learning_rate": 2.8593333333333333e-05, "loss": 5.2801, "step": 8580 }, { "epoch": 14.53, "learning_rate": 2.8660000000000003e-05, "loss": 5.2621, "step": 8600 }, { "epoch": 14.56, "learning_rate": 2.8726666666666667e-05, "loss": 5.2705, "step": 8620 }, { "epoch": 14.59, "learning_rate": 2.8793333333333334e-05, "loss": 5.2486, "step": 8640 }, { "epoch": 14.63, "learning_rate": 2.8860000000000005e-05, "loss": 5.2662, "step": 8660 }, { "epoch": 14.66, "learning_rate": 2.892666666666667e-05, "loss": 5.2371, "step": 8680 }, { "epoch": 14.7, "learning_rate": 2.8993333333333332e-05, "loss": 5.2561, "step": 8700 }, { "epoch": 14.73, "learning_rate": 2.9060000000000003e-05, "loss": 5.2491, "step": 8720 }, { "epoch": 14.76, "learning_rate": 2.912666666666667e-05, "loss": 5.2385, "step": 8740 }, { "epoch": 14.8, "learning_rate": 2.9193333333333334e-05, "loss": 5.2527, "step": 8760 }, { "epoch": 14.83, "learning_rate": 2.9260000000000004e-05, "loss": 5.232, "step": 8780 }, { "epoch": 14.86, "learning_rate": 2.9326666666666668e-05, "loss": 5.219, "step": 8800 }, { "epoch": 14.9, "learning_rate": 2.9393333333333335e-05, "loss": 5.2324, "step": 8820 }, { "epoch": 14.93, "learning_rate": 2.946e-05, "loss": 5.2374, "step": 8840 }, { "epoch": 14.97, "learning_rate": 2.952666666666667e-05, "loss": 5.225, "step": 8860 }, { "epoch": 15.0, "learning_rate": 2.9593333333333333e-05, "loss": 5.2252, "step": 8880 }, { "epoch": 15.03, "learning_rate": 2.9659999999999997e-05, "loss": 5.197, "step": 8900 }, { "epoch": 15.07, "learning_rate": 2.9726666666666668e-05, "loss": 5.1955, "step": 8920 }, { "epoch": 15.1, "learning_rate": 2.9793333333333335e-05, "loss": 5.1948, "step": 8940 }, { "epoch": 15.14, "learning_rate": 2.986e-05, "loss": 5.1977, "step": 8960 }, { "epoch": 15.17, "learning_rate": 2.992666666666667e-05, "loss": 5.1877, "step": 8980 }, { "epoch": 15.2, "learning_rate": 2.9993333333333333e-05, "loss": 5.185, "step": 9000 }, { "epoch": 15.2, "eval_loss": 4.983438014984131, "eval_runtime": 47.943, "eval_samples_per_second": 20.629, "eval_steps_per_second": 0.125, "eval_tse_ndup": 0.006947718609787001, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.021488613614829313, "eval_tse_type": 5.266262217728345e-05, "step": 9000 }, { "epoch": 15.24, "learning_rate": 3.006e-05, "loss": 5.1537, "step": 9020 }, { "epoch": 15.27, "learning_rate": 3.012666666666667e-05, "loss": 5.1774, "step": 9040 }, { "epoch": 15.3, "learning_rate": 3.0193333333333335e-05, "loss": 5.1848, "step": 9060 }, { "epoch": 15.34, "learning_rate": 3.0259999999999998e-05, "loss": 5.187, "step": 9080 }, { "epoch": 15.37, "learning_rate": 3.032666666666667e-05, "loss": 5.1824, "step": 9100 }, { "epoch": 15.41, "learning_rate": 3.0393333333333336e-05, "loss": 5.1581, "step": 9120 }, { "epoch": 15.44, "learning_rate": 3.046e-05, "loss": 5.175, "step": 9140 }, { "epoch": 15.47, "learning_rate": 3.052666666666667e-05, "loss": 5.1655, "step": 9160 }, { "epoch": 15.51, "learning_rate": 3.059333333333334e-05, "loss": 5.1884, "step": 9180 }, { "epoch": 15.54, "learning_rate": 3.066e-05, "loss": 5.1622, "step": 9200 }, { "epoch": 15.57, "learning_rate": 3.072666666666667e-05, "loss": 5.166, "step": 9220 }, { "epoch": 15.61, "learning_rate": 3.0793333333333336e-05, "loss": 5.1487, "step": 9240 }, { "epoch": 15.64, "learning_rate": 3.086e-05, "loss": 5.1449, "step": 9260 }, { "epoch": 15.68, "learning_rate": 3.092666666666667e-05, "loss": 5.1215, "step": 9280 }, { "epoch": 15.71, "learning_rate": 3.0993333333333334e-05, "loss": 5.1455, "step": 9300 }, { "epoch": 15.74, "learning_rate": 3.106e-05, "loss": 5.141, "step": 9320 }, { "epoch": 15.78, "learning_rate": 3.112666666666667e-05, "loss": 5.1339, "step": 9340 }, { "epoch": 15.81, "learning_rate": 3.119333333333334e-05, "loss": 5.1455, "step": 9360 }, { "epoch": 15.84, "learning_rate": 3.126e-05, "loss": 5.1416, "step": 9380 }, { "epoch": 15.88, "learning_rate": 3.132666666666667e-05, "loss": 5.1371, "step": 9400 }, { "epoch": 15.91, "learning_rate": 3.1393333333333337e-05, "loss": 5.1065, "step": 9420 }, { "epoch": 15.95, "learning_rate": 3.146e-05, "loss": 5.1175, "step": 9440 }, { "epoch": 15.98, "learning_rate": 3.1526666666666664e-05, "loss": 5.1194, "step": 9460 }, { "epoch": 16.01, "learning_rate": 3.1593333333333335e-05, "loss": 5.1007, "step": 9480 }, { "epoch": 16.05, "learning_rate": 3.166e-05, "loss": 5.1, "step": 9500 }, { "epoch": 16.08, "learning_rate": 3.172666666666667e-05, "loss": 5.0957, "step": 9520 }, { "epoch": 16.11, "learning_rate": 3.179333333333333e-05, "loss": 5.0958, "step": 9540 }, { "epoch": 16.15, "learning_rate": 3.186e-05, "loss": 5.0879, "step": 9560 }, { "epoch": 16.18, "learning_rate": 3.192666666666667e-05, "loss": 5.107, "step": 9580 }, { "epoch": 16.22, "learning_rate": 3.199333333333334e-05, "loss": 5.0921, "step": 9600 }, { "epoch": 16.25, "learning_rate": 3.206e-05, "loss": 5.0837, "step": 9620 }, { "epoch": 16.28, "learning_rate": 3.2126666666666665e-05, "loss": 5.0836, "step": 9640 }, { "epoch": 16.32, "learning_rate": 3.2193333333333336e-05, "loss": 5.0768, "step": 9660 }, { "epoch": 16.35, "learning_rate": 3.226e-05, "loss": 5.0825, "step": 9680 }, { "epoch": 16.39, "learning_rate": 3.232666666666666e-05, "loss": 5.0684, "step": 9700 }, { "epoch": 16.42, "learning_rate": 3.2393333333333334e-05, "loss": 5.08, "step": 9720 }, { "epoch": 16.45, "learning_rate": 3.2460000000000004e-05, "loss": 5.0809, "step": 9740 }, { "epoch": 16.49, "learning_rate": 3.252666666666667e-05, "loss": 5.0806, "step": 9760 }, { "epoch": 16.52, "learning_rate": 3.259333333333334e-05, "loss": 5.0728, "step": 9780 }, { "epoch": 16.55, "learning_rate": 3.266e-05, "loss": 5.045, "step": 9800 }, { "epoch": 16.59, "learning_rate": 3.2726666666666666e-05, "loss": 5.0504, "step": 9820 }, { "epoch": 16.62, "learning_rate": 3.279333333333334e-05, "loss": 5.05, "step": 9840 }, { "epoch": 16.66, "learning_rate": 3.286e-05, "loss": 5.05, "step": 9860 }, { "epoch": 16.69, "learning_rate": 3.2926666666666664e-05, "loss": 5.0503, "step": 9880 }, { "epoch": 16.72, "learning_rate": 3.2993333333333335e-05, "loss": 5.0635, "step": 9900 }, { "epoch": 16.76, "learning_rate": 3.3060000000000005e-05, "loss": 5.0389, "step": 9920 }, { "epoch": 16.79, "learning_rate": 3.312666666666667e-05, "loss": 5.0455, "step": 9940 }, { "epoch": 16.82, "learning_rate": 3.319333333333334e-05, "loss": 5.0375, "step": 9960 }, { "epoch": 16.86, "learning_rate": 3.3260000000000003e-05, "loss": 5.0244, "step": 9980 }, { "epoch": 16.89, "learning_rate": 3.332666666666667e-05, "loss": 5.0289, "step": 10000 }, { "epoch": 16.89, "eval_loss": 4.82033109664917, "eval_runtime": 47.23, "eval_samples_per_second": 20.94, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.011317646952682265, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.020578981676853885, "eval_tse_type": 2.3322018392796954e-05, "step": 10000 }, { "epoch": 16.93, "learning_rate": 3.339333333333334e-05, "loss": 5.0197, "step": 10020 }, { "epoch": 16.96, "learning_rate": 3.346e-05, "loss": 5.0374, "step": 10040 }, { "epoch": 16.99, "learning_rate": 3.3526666666666665e-05, "loss": 5.0181, "step": 10060 }, { "epoch": 17.03, "learning_rate": 3.359333333333333e-05, "loss": 5.025, "step": 10080 }, { "epoch": 17.06, "learning_rate": 3.366e-05, "loss": 5.0142, "step": 10100 }, { "epoch": 17.09, "learning_rate": 3.372666666666667e-05, "loss": 4.9989, "step": 10120 }, { "epoch": 17.13, "learning_rate": 3.3793333333333334e-05, "loss": 5.006, "step": 10140 }, { "epoch": 17.16, "learning_rate": 3.3860000000000004e-05, "loss": 5.0033, "step": 10160 }, { "epoch": 17.2, "learning_rate": 3.392666666666667e-05, "loss": 4.9941, "step": 10180 }, { "epoch": 17.23, "learning_rate": 3.399333333333333e-05, "loss": 4.9855, "step": 10200 }, { "epoch": 17.26, "learning_rate": 3.406e-05, "loss": 4.9799, "step": 10220 }, { "epoch": 17.3, "learning_rate": 3.4126666666666666e-05, "loss": 4.9858, "step": 10240 }, { "epoch": 17.33, "learning_rate": 3.419333333333333e-05, "loss": 4.9752, "step": 10260 }, { "epoch": 17.36, "learning_rate": 3.426e-05, "loss": 5.002, "step": 10280 }, { "epoch": 17.4, "learning_rate": 3.432666666666667e-05, "loss": 4.9834, "step": 10300 }, { "epoch": 17.43, "learning_rate": 3.4393333333333335e-05, "loss": 4.9915, "step": 10320 }, { "epoch": 17.47, "learning_rate": 3.4460000000000005e-05, "loss": 4.9699, "step": 10340 }, { "epoch": 17.5, "learning_rate": 3.452666666666667e-05, "loss": 4.9862, "step": 10360 }, { "epoch": 17.53, "learning_rate": 3.459333333333333e-05, "loss": 4.9749, "step": 10380 }, { "epoch": 17.57, "learning_rate": 3.4660000000000004e-05, "loss": 4.9767, "step": 10400 }, { "epoch": 17.6, "learning_rate": 3.472666666666667e-05, "loss": 4.9717, "step": 10420 }, { "epoch": 17.64, "learning_rate": 3.479333333333333e-05, "loss": 4.9541, "step": 10440 }, { "epoch": 17.67, "learning_rate": 3.486e-05, "loss": 4.9645, "step": 10460 }, { "epoch": 17.7, "learning_rate": 3.4926666666666665e-05, "loss": 4.9683, "step": 10480 }, { "epoch": 17.74, "learning_rate": 3.4993333333333336e-05, "loss": 4.9608, "step": 10500 }, { "epoch": 17.77, "learning_rate": 3.5060000000000007e-05, "loss": 4.9486, "step": 10520 }, { "epoch": 17.8, "learning_rate": 3.512666666666667e-05, "loss": 4.9536, "step": 10540 }, { "epoch": 17.84, "learning_rate": 3.5193333333333334e-05, "loss": 4.9385, "step": 10560 }, { "epoch": 17.87, "learning_rate": 3.5260000000000005e-05, "loss": 4.9397, "step": 10580 }, { "epoch": 17.91, "learning_rate": 3.532666666666667e-05, "loss": 4.9217, "step": 10600 }, { "epoch": 17.94, "learning_rate": 3.539333333333333e-05, "loss": 4.9301, "step": 10620 }, { "epoch": 17.97, "learning_rate": 3.546e-05, "loss": 4.9377, "step": 10640 }, { "epoch": 18.01, "learning_rate": 3.5526666666666666e-05, "loss": 4.9295, "step": 10660 }, { "epoch": 18.04, "learning_rate": 3.559333333333334e-05, "loss": 4.9267, "step": 10680 }, { "epoch": 18.07, "learning_rate": 3.566e-05, "loss": 4.9248, "step": 10700 }, { "epoch": 18.11, "learning_rate": 3.572333333333334e-05, "loss": 4.9083, "step": 10720 }, { "epoch": 18.14, "learning_rate": 3.579e-05, "loss": 4.9003, "step": 10740 }, { "epoch": 18.18, "learning_rate": 3.5856666666666665e-05, "loss": 4.9183, "step": 10760 }, { "epoch": 18.21, "learning_rate": 3.5923333333333336e-05, "loss": 4.9017, "step": 10780 }, { "epoch": 18.24, "learning_rate": 3.599e-05, "loss": 4.9102, "step": 10800 }, { "epoch": 18.28, "learning_rate": 3.605666666666666e-05, "loss": 4.9091, "step": 10820 }, { "epoch": 18.31, "learning_rate": 3.6123333333333334e-05, "loss": 4.9065, "step": 10840 }, { "epoch": 18.34, "learning_rate": 3.6190000000000004e-05, "loss": 4.8885, "step": 10860 }, { "epoch": 18.38, "learning_rate": 3.625666666666667e-05, "loss": 4.8856, "step": 10880 }, { "epoch": 18.41, "learning_rate": 3.632333333333334e-05, "loss": 4.8897, "step": 10900 }, { "epoch": 18.45, "learning_rate": 3.639e-05, "loss": 4.8961, "step": 10920 }, { "epoch": 18.48, "learning_rate": 3.6456666666666666e-05, "loss": 4.8833, "step": 10940 }, { "epoch": 18.51, "learning_rate": 3.6523333333333337e-05, "loss": 4.8744, "step": 10960 }, { "epoch": 18.55, "learning_rate": 3.659e-05, "loss": 4.8762, "step": 10980 }, { "epoch": 18.58, "learning_rate": 3.6656666666666664e-05, "loss": 4.9051, "step": 11000 }, { "epoch": 18.58, "eval_loss": 4.668403148651123, "eval_runtime": 47.1936, "eval_samples_per_second": 20.956, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.013447760109870657, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.017324954159006013, "eval_tse_type": 2.2820469610156163e-05, "step": 11000 }, { "epoch": 18.61, "learning_rate": 3.6723333333333335e-05, "loss": 4.8762, "step": 11020 }, { "epoch": 18.65, "learning_rate": 3.6790000000000005e-05, "loss": 4.8818, "step": 11040 }, { "epoch": 18.68, "learning_rate": 3.685666666666667e-05, "loss": 4.8846, "step": 11060 }, { "epoch": 18.72, "learning_rate": 3.692333333333334e-05, "loss": 4.869, "step": 11080 }, { "epoch": 18.75, "learning_rate": 3.699e-05, "loss": 4.8728, "step": 11100 }, { "epoch": 18.78, "learning_rate": 3.705666666666667e-05, "loss": 4.8554, "step": 11120 }, { "epoch": 18.82, "learning_rate": 3.712333333333334e-05, "loss": 4.8501, "step": 11140 }, { "epoch": 18.85, "learning_rate": 3.719e-05, "loss": 4.8576, "step": 11160 }, { "epoch": 18.89, "learning_rate": 3.7256666666666665e-05, "loss": 4.8459, "step": 11180 }, { "epoch": 18.92, "learning_rate": 3.7323333333333336e-05, "loss": 4.8468, "step": 11200 }, { "epoch": 18.95, "learning_rate": 3.739e-05, "loss": 4.8401, "step": 11220 }, { "epoch": 18.99, "learning_rate": 3.745666666666667e-05, "loss": 4.8533, "step": 11240 }, { "epoch": 19.02, "learning_rate": 3.7523333333333334e-05, "loss": 4.8369, "step": 11260 }, { "epoch": 19.05, "learning_rate": 3.7590000000000004e-05, "loss": 4.834, "step": 11280 }, { "epoch": 19.09, "learning_rate": 3.765666666666667e-05, "loss": 4.8195, "step": 11300 }, { "epoch": 19.12, "learning_rate": 3.772333333333333e-05, "loss": 4.8366, "step": 11320 }, { "epoch": 19.16, "learning_rate": 3.779e-05, "loss": 4.8187, "step": 11340 }, { "epoch": 19.19, "learning_rate": 3.7856666666666666e-05, "loss": 4.8247, "step": 11360 }, { "epoch": 19.22, "learning_rate": 3.792333333333333e-05, "loss": 4.8125, "step": 11380 }, { "epoch": 19.26, "learning_rate": 3.799e-05, "loss": 4.8191, "step": 11400 }, { "epoch": 19.29, "learning_rate": 3.805666666666667e-05, "loss": 4.8088, "step": 11420 }, { "epoch": 19.32, "learning_rate": 3.8123333333333335e-05, "loss": 4.809, "step": 11440 }, { "epoch": 19.36, "learning_rate": 3.8190000000000005e-05, "loss": 4.8036, "step": 11460 }, { "epoch": 19.39, "learning_rate": 3.825666666666667e-05, "loss": 4.8101, "step": 11480 }, { "epoch": 19.43, "learning_rate": 3.832333333333333e-05, "loss": 4.8087, "step": 11500 }, { "epoch": 19.46, "learning_rate": 3.8390000000000003e-05, "loss": 4.8172, "step": 11520 }, { "epoch": 19.49, "learning_rate": 3.845666666666667e-05, "loss": 4.7924, "step": 11540 }, { "epoch": 19.53, "learning_rate": 3.852333333333333e-05, "loss": 4.7777, "step": 11560 }, { "epoch": 19.56, "learning_rate": 3.859e-05, "loss": 4.7867, "step": 11580 }, { "epoch": 19.59, "learning_rate": 3.865666666666667e-05, "loss": 4.7873, "step": 11600 }, { "epoch": 19.63, "learning_rate": 3.8723333333333336e-05, "loss": 4.7886, "step": 11620 }, { "epoch": 19.66, "learning_rate": 3.8790000000000006e-05, "loss": 4.7865, "step": 11640 }, { "epoch": 19.7, "learning_rate": 3.885666666666667e-05, "loss": 4.7948, "step": 11660 }, { "epoch": 19.73, "learning_rate": 3.8923333333333334e-05, "loss": 4.7785, "step": 11680 }, { "epoch": 19.76, "learning_rate": 3.8990000000000004e-05, "loss": 4.7578, "step": 11700 }, { "epoch": 19.8, "learning_rate": 3.905666666666667e-05, "loss": 4.7851, "step": 11720 }, { "epoch": 19.83, "learning_rate": 3.912333333333333e-05, "loss": 4.7637, "step": 11740 }, { "epoch": 19.86, "learning_rate": 3.919e-05, "loss": 4.769, "step": 11760 }, { "epoch": 19.9, "learning_rate": 3.9256666666666666e-05, "loss": 4.7635, "step": 11780 }, { "epoch": 19.93, "learning_rate": 3.932333333333334e-05, "loss": 4.7621, "step": 11800 }, { "epoch": 19.97, "learning_rate": 3.939e-05, "loss": 4.7544, "step": 11820 }, { "epoch": 20.0, "learning_rate": 3.945666666666667e-05, "loss": 4.7671, "step": 11840 }, { "epoch": 20.03, "learning_rate": 3.9523333333333335e-05, "loss": 4.7392, "step": 11860 }, { "epoch": 20.07, "learning_rate": 3.959e-05, "loss": 4.7206, "step": 11880 }, { "epoch": 20.1, "learning_rate": 3.965666666666667e-05, "loss": 4.7336, "step": 11900 }, { "epoch": 20.14, "learning_rate": 3.972333333333333e-05, "loss": 4.7395, "step": 11920 }, { "epoch": 20.17, "learning_rate": 3.979e-05, "loss": 4.7248, "step": 11940 }, { "epoch": 20.2, "learning_rate": 3.985666666666667e-05, "loss": 4.7279, "step": 11960 }, { "epoch": 20.24, "learning_rate": 3.992333333333334e-05, "loss": 4.7454, "step": 11980 }, { "epoch": 20.27, "learning_rate": 3.999e-05, "loss": 4.7226, "step": 12000 }, { "epoch": 20.27, "eval_loss": 4.5096211433410645, "eval_runtime": 47.265, "eval_samples_per_second": 20.925, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.004553048152892597, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.023718560323270747, "eval_tse_type": 3.159757330637007e-05, "step": 12000 }, { "epoch": 20.3, "learning_rate": 4.005666666666667e-05, "loss": 4.7199, "step": 12020 }, { "epoch": 20.34, "learning_rate": 4.0123333333333336e-05, "loss": 4.7172, "step": 12040 }, { "epoch": 20.37, "learning_rate": 4.019e-05, "loss": 4.721, "step": 12060 }, { "epoch": 20.41, "learning_rate": 4.025666666666667e-05, "loss": 4.6972, "step": 12080 }, { "epoch": 20.44, "learning_rate": 4.0323333333333334e-05, "loss": 4.7091, "step": 12100 }, { "epoch": 20.47, "learning_rate": 4.039e-05, "loss": 4.7091, "step": 12120 }, { "epoch": 20.51, "learning_rate": 4.045666666666667e-05, "loss": 4.6999, "step": 12140 }, { "epoch": 20.54, "learning_rate": 4.052333333333333e-05, "loss": 4.6933, "step": 12160 }, { "epoch": 20.57, "learning_rate": 4.059e-05, "loss": 4.707, "step": 12180 }, { "epoch": 20.61, "learning_rate": 4.065666666666667e-05, "loss": 4.7006, "step": 12200 }, { "epoch": 20.64, "learning_rate": 4.072333333333334e-05, "loss": 4.6829, "step": 12220 }, { "epoch": 20.68, "learning_rate": 4.079e-05, "loss": 4.7021, "step": 12240 }, { "epoch": 20.71, "learning_rate": 4.085666666666667e-05, "loss": 4.7069, "step": 12260 }, { "epoch": 20.74, "learning_rate": 4.0923333333333335e-05, "loss": 4.6919, "step": 12280 }, { "epoch": 20.78, "learning_rate": 4.099e-05, "loss": 4.6995, "step": 12300 }, { "epoch": 20.81, "learning_rate": 4.105666666666667e-05, "loss": 4.689, "step": 12320 }, { "epoch": 20.84, "learning_rate": 4.112333333333333e-05, "loss": 4.6863, "step": 12340 }, { "epoch": 20.88, "learning_rate": 4.1190000000000004e-05, "loss": 4.6779, "step": 12360 }, { "epoch": 20.91, "learning_rate": 4.1256666666666674e-05, "loss": 4.6769, "step": 12380 }, { "epoch": 20.95, "learning_rate": 4.132333333333334e-05, "loss": 4.6919, "step": 12400 }, { "epoch": 20.98, "learning_rate": 4.139e-05, "loss": 4.6536, "step": 12420 }, { "epoch": 21.01, "learning_rate": 4.145666666666667e-05, "loss": 4.6801, "step": 12440 }, { "epoch": 21.05, "learning_rate": 4.1523333333333336e-05, "loss": 4.6575, "step": 12460 }, { "epoch": 21.08, "learning_rate": 4.159e-05, "loss": 4.6575, "step": 12480 }, { "epoch": 21.11, "learning_rate": 4.1656666666666664e-05, "loss": 4.6461, "step": 12500 }, { "epoch": 21.15, "learning_rate": 4.1723333333333334e-05, "loss": 4.6405, "step": 12520 }, { "epoch": 21.18, "learning_rate": 4.179e-05, "loss": 4.6446, "step": 12540 }, { "epoch": 21.22, "learning_rate": 4.185666666666667e-05, "loss": 4.634, "step": 12560 }, { "epoch": 21.25, "learning_rate": 4.192333333333334e-05, "loss": 4.6323, "step": 12580 }, { "epoch": 21.28, "learning_rate": 4.199e-05, "loss": 4.6389, "step": 12600 }, { "epoch": 21.32, "learning_rate": 4.205666666666667e-05, "loss": 4.6479, "step": 12620 }, { "epoch": 21.35, "learning_rate": 4.212333333333334e-05, "loss": 4.6287, "step": 12640 }, { "epoch": 21.39, "learning_rate": 4.219e-05, "loss": 4.6206, "step": 12660 }, { "epoch": 21.42, "learning_rate": 4.2256666666666665e-05, "loss": 4.6247, "step": 12680 }, { "epoch": 21.45, "learning_rate": 4.2323333333333335e-05, "loss": 4.6219, "step": 12700 }, { "epoch": 21.49, "learning_rate": 4.239e-05, "loss": 4.6233, "step": 12720 }, { "epoch": 21.52, "learning_rate": 4.245666666666667e-05, "loss": 4.6182, "step": 12740 }, { "epoch": 21.55, "learning_rate": 4.252333333333334e-05, "loss": 4.6204, "step": 12760 }, { "epoch": 21.59, "learning_rate": 4.2590000000000004e-05, "loss": 4.6288, "step": 12780 }, { "epoch": 21.62, "learning_rate": 4.265666666666667e-05, "loss": 4.6106, "step": 12800 }, { "epoch": 21.66, "learning_rate": 4.272333333333334e-05, "loss": 4.6268, "step": 12820 }, { "epoch": 21.69, "learning_rate": 4.279e-05, "loss": 4.6165, "step": 12840 }, { "epoch": 21.72, "learning_rate": 4.2856666666666666e-05, "loss": 4.6112, "step": 12860 }, { "epoch": 21.76, "learning_rate": 4.292e-05, "loss": 4.6031, "step": 12880 }, { "epoch": 21.79, "learning_rate": 4.2986666666666666e-05, "loss": 4.6123, "step": 12900 }, { "epoch": 21.82, "learning_rate": 4.305333333333334e-05, "loss": 4.5946, "step": 12920 }, { "epoch": 21.86, "learning_rate": 4.312000000000001e-05, "loss": 4.5956, "step": 12940 }, { "epoch": 21.89, "learning_rate": 4.318666666666667e-05, "loss": 4.6022, "step": 12960 }, { "epoch": 21.93, "learning_rate": 4.3253333333333335e-05, "loss": 4.6004, "step": 12980 }, { "epoch": 21.96, "learning_rate": 4.332e-05, "loss": 4.591, "step": 13000 }, { "epoch": 21.96, "eval_loss": 4.367846488952637, "eval_runtime": 48.351, "eval_samples_per_second": 20.455, "eval_steps_per_second": 0.124, "eval_tse_ndup": 0.011010868594862631, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.025262542461155377, "eval_tse_type": 8.074935400516795e-05, "step": 13000 }, { "epoch": 21.99, "learning_rate": 4.338666666666667e-05, "loss": 4.5838, "step": 13020 }, { "epoch": 22.03, "learning_rate": 4.345333333333333e-05, "loss": 4.5609, "step": 13040 }, { "epoch": 22.06, "learning_rate": 4.352e-05, "loss": 4.5916, "step": 13060 }, { "epoch": 22.09, "learning_rate": 4.358666666666667e-05, "loss": 4.5582, "step": 13080 }, { "epoch": 22.13, "learning_rate": 4.365333333333334e-05, "loss": 4.5623, "step": 13100 }, { "epoch": 22.16, "learning_rate": 4.372e-05, "loss": 4.563, "step": 13120 }, { "epoch": 22.2, "learning_rate": 4.378666666666667e-05, "loss": 4.5692, "step": 13140 }, { "epoch": 22.23, "learning_rate": 4.3853333333333336e-05, "loss": 4.5555, "step": 13160 }, { "epoch": 22.26, "learning_rate": 4.392e-05, "loss": 4.5659, "step": 13180 }, { "epoch": 22.3, "learning_rate": 4.398666666666667e-05, "loss": 4.5452, "step": 13200 }, { "epoch": 22.33, "learning_rate": 4.4053333333333334e-05, "loss": 4.5542, "step": 13220 }, { "epoch": 22.36, "learning_rate": 4.412e-05, "loss": 4.5599, "step": 13240 }, { "epoch": 22.4, "learning_rate": 4.418666666666667e-05, "loss": 4.5708, "step": 13260 }, { "epoch": 22.43, "learning_rate": 4.425333333333334e-05, "loss": 4.5405, "step": 13280 }, { "epoch": 22.47, "learning_rate": 4.432e-05, "loss": 4.534, "step": 13300 }, { "epoch": 22.5, "learning_rate": 4.438666666666667e-05, "loss": 4.5443, "step": 13320 }, { "epoch": 22.53, "learning_rate": 4.445333333333334e-05, "loss": 4.5482, "step": 13340 }, { "epoch": 22.57, "learning_rate": 4.452e-05, "loss": 4.5212, "step": 13360 }, { "epoch": 22.6, "learning_rate": 4.458666666666667e-05, "loss": 4.5322, "step": 13380 }, { "epoch": 22.64, "learning_rate": 4.4653333333333335e-05, "loss": 4.5235, "step": 13400 }, { "epoch": 22.67, "learning_rate": 4.472e-05, "loss": 4.5324, "step": 13420 }, { "epoch": 22.7, "learning_rate": 4.478666666666667e-05, "loss": 4.5232, "step": 13440 }, { "epoch": 22.74, "learning_rate": 4.485333333333333e-05, "loss": 4.5203, "step": 13460 }, { "epoch": 22.77, "learning_rate": 4.4920000000000004e-05, "loss": 4.5214, "step": 13480 }, { "epoch": 22.8, "learning_rate": 4.4986666666666674e-05, "loss": 4.5212, "step": 13500 }, { "epoch": 22.84, "learning_rate": 4.505333333333334e-05, "loss": 4.5101, "step": 13520 }, { "epoch": 22.87, "learning_rate": 4.512e-05, "loss": 4.5227, "step": 13540 }, { "epoch": 22.91, "learning_rate": 4.518666666666667e-05, "loss": 4.5105, "step": 13560 }, { "epoch": 22.94, "learning_rate": 4.5253333333333336e-05, "loss": 4.5185, "step": 13580 }, { "epoch": 22.97, "learning_rate": 4.532e-05, "loss": 4.5193, "step": 13600 }, { "epoch": 23.01, "learning_rate": 4.5386666666666664e-05, "loss": 4.4876, "step": 13620 }, { "epoch": 23.04, "learning_rate": 4.5453333333333334e-05, "loss": 4.4763, "step": 13640 }, { "epoch": 23.07, "learning_rate": 4.5520000000000005e-05, "loss": 4.4667, "step": 13660 }, { "epoch": 23.11, "learning_rate": 4.558666666666667e-05, "loss": 4.4732, "step": 13680 }, { "epoch": 23.14, "learning_rate": 4.565333333333334e-05, "loss": 4.4953, "step": 13700 }, { "epoch": 23.18, "learning_rate": 4.572e-05, "loss": 4.4903, "step": 13720 }, { "epoch": 23.21, "learning_rate": 4.5786666666666666e-05, "loss": 4.4833, "step": 13740 }, { "epoch": 23.24, "learning_rate": 4.585333333333334e-05, "loss": 4.475, "step": 13760 }, { "epoch": 23.28, "learning_rate": 4.592e-05, "loss": 4.4632, "step": 13780 }, { "epoch": 23.31, "learning_rate": 4.5986666666666665e-05, "loss": 4.4851, "step": 13800 }, { "epoch": 23.34, "learning_rate": 4.6053333333333335e-05, "loss": 4.4872, "step": 13820 }, { "epoch": 23.38, "learning_rate": 4.612e-05, "loss": 4.4695, "step": 13840 }, { "epoch": 23.41, "learning_rate": 4.618666666666667e-05, "loss": 4.45, "step": 13860 }, { "epoch": 23.45, "learning_rate": 4.625333333333334e-05, "loss": 4.4737, "step": 13880 }, { "epoch": 23.48, "learning_rate": 4.6320000000000004e-05, "loss": 4.4587, "step": 13900 }, { "epoch": 23.51, "learning_rate": 4.638666666666667e-05, "loss": 4.4517, "step": 13920 }, { "epoch": 23.55, "learning_rate": 4.645333333333334e-05, "loss": 4.4483, "step": 13940 }, { "epoch": 23.58, "learning_rate": 4.652e-05, "loss": 4.4679, "step": 13960 }, { "epoch": 23.61, "learning_rate": 4.6586666666666666e-05, "loss": 4.4671, "step": 13980 }, { "epoch": 23.65, "learning_rate": 4.6653333333333336e-05, "loss": 4.4681, "step": 14000 }, { "epoch": 23.65, "eval_loss": 4.2354302406311035, "eval_runtime": 47.1335, "eval_samples_per_second": 20.983, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.0073013202445331455, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.022982798478565217, "eval_tse_type": 4.739635995955511e-05, "step": 14000 }, { "epoch": 23.68, "learning_rate": 4.672e-05, "loss": 4.455, "step": 14020 }, { "epoch": 23.72, "learning_rate": 4.678666666666667e-05, "loss": 4.4507, "step": 14040 }, { "epoch": 23.75, "learning_rate": 4.685333333333334e-05, "loss": 4.4374, "step": 14060 }, { "epoch": 23.78, "learning_rate": 4.6920000000000005e-05, "loss": 4.4594, "step": 14080 }, { "epoch": 23.82, "learning_rate": 4.698666666666667e-05, "loss": 4.4386, "step": 14100 }, { "epoch": 23.85, "learning_rate": 4.705333333333334e-05, "loss": 4.4435, "step": 14120 }, { "epoch": 23.89, "learning_rate": 4.712e-05, "loss": 4.4422, "step": 14140 }, { "epoch": 23.92, "learning_rate": 4.718666666666667e-05, "loss": 4.4278, "step": 14160 }, { "epoch": 23.95, "learning_rate": 4.725333333333334e-05, "loss": 4.4381, "step": 14180 }, { "epoch": 23.99, "learning_rate": 4.732e-05, "loss": 4.4109, "step": 14200 }, { "epoch": 24.02, "learning_rate": 4.7386666666666665e-05, "loss": 4.3991, "step": 14220 }, { "epoch": 24.05, "learning_rate": 4.7453333333333335e-05, "loss": 4.4284, "step": 14240 }, { "epoch": 24.09, "learning_rate": 4.7520000000000006e-05, "loss": 4.4072, "step": 14260 }, { "epoch": 24.12, "learning_rate": 4.758666666666667e-05, "loss": 4.3891, "step": 14280 }, { "epoch": 24.16, "learning_rate": 4.765333333333333e-05, "loss": 4.3962, "step": 14300 }, { "epoch": 24.19, "learning_rate": 4.7720000000000004e-05, "loss": 4.4233, "step": 14320 }, { "epoch": 24.22, "learning_rate": 4.778666666666667e-05, "loss": 4.4073, "step": 14340 }, { "epoch": 24.26, "learning_rate": 4.785333333333333e-05, "loss": 4.3932, "step": 14360 }, { "epoch": 24.29, "learning_rate": 4.792e-05, "loss": 4.3902, "step": 14380 }, { "epoch": 24.32, "learning_rate": 4.7986666666666666e-05, "loss": 4.4077, "step": 14400 }, { "epoch": 24.36, "learning_rate": 4.8053333333333336e-05, "loss": 4.4033, "step": 14420 }, { "epoch": 24.39, "learning_rate": 4.812000000000001e-05, "loss": 4.3889, "step": 14440 }, { "epoch": 24.43, "learning_rate": 4.818666666666667e-05, "loss": 4.3902, "step": 14460 }, { "epoch": 24.46, "learning_rate": 4.8253333333333334e-05, "loss": 4.3947, "step": 14480 }, { "epoch": 24.49, "learning_rate": 4.8320000000000005e-05, "loss": 4.3853, "step": 14500 }, { "epoch": 24.53, "learning_rate": 4.838666666666667e-05, "loss": 4.3885, "step": 14520 }, { "epoch": 24.56, "learning_rate": 4.845333333333333e-05, "loss": 4.377, "step": 14540 }, { "epoch": 24.59, "learning_rate": 4.852e-05, "loss": 4.3586, "step": 14560 }, { "epoch": 24.63, "learning_rate": 4.858666666666667e-05, "loss": 4.3676, "step": 14580 }, { "epoch": 24.66, "learning_rate": 4.865333333333334e-05, "loss": 4.3796, "step": 14600 }, { "epoch": 24.7, "learning_rate": 4.872000000000001e-05, "loss": 4.3834, "step": 14620 }, { "epoch": 24.73, "learning_rate": 4.878666666666667e-05, "loss": 4.3548, "step": 14640 }, { "epoch": 24.76, "learning_rate": 4.8853333333333335e-05, "loss": 4.3818, "step": 14660 }, { "epoch": 24.8, "learning_rate": 4.8920000000000006e-05, "loss": 4.3803, "step": 14680 }, { "epoch": 24.83, "learning_rate": 4.898666666666667e-05, "loss": 4.3532, "step": 14700 }, { "epoch": 24.86, "learning_rate": 4.9053333333333333e-05, "loss": 4.3777, "step": 14720 }, { "epoch": 24.9, "learning_rate": 4.9120000000000004e-05, "loss": 4.3767, "step": 14740 }, { "epoch": 24.93, "learning_rate": 4.918666666666667e-05, "loss": 4.3639, "step": 14760 }, { "epoch": 24.97, "learning_rate": 4.925333333333333e-05, "loss": 4.3665, "step": 14780 }, { "epoch": 25.0, "learning_rate": 4.932e-05, "loss": 4.3638, "step": 14800 }, { "epoch": 25.03, "learning_rate": 4.938666666666667e-05, "loss": 4.3518, "step": 14820 }, { "epoch": 25.07, "learning_rate": 4.9453333333333336e-05, "loss": 4.3211, "step": 14840 }, { "epoch": 25.1, "learning_rate": 4.952e-05, "loss": 4.3176, "step": 14860 }, { "epoch": 25.14, "learning_rate": 4.958666666666667e-05, "loss": 4.3258, "step": 14880 }, { "epoch": 25.17, "learning_rate": 4.9653333333333335e-05, "loss": 4.3154, "step": 14900 }, { "epoch": 25.2, "learning_rate": 4.972e-05, "loss": 4.332, "step": 14920 }, { "epoch": 25.24, "learning_rate": 4.978666666666667e-05, "loss": 4.3278, "step": 14940 }, { "epoch": 25.27, "learning_rate": 4.985333333333333e-05, "loss": 4.336, "step": 14960 }, { "epoch": 25.3, "learning_rate": 4.992e-05, "loss": 4.3223, "step": 14980 }, { "epoch": 25.34, "learning_rate": 4.9986666666666674e-05, "loss": 4.301, "step": 15000 }, { "epoch": 25.34, "eval_loss": 4.116175174713135, "eval_runtime": 49.0128, "eval_samples_per_second": 20.178, "eval_steps_per_second": 0.122, "eval_tse_ndup": 0.010817436102613395, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.02719749724053464, "eval_tse_type": 6.84614088304685e-05, "step": 15000 }, { "epoch": 25.37, "learning_rate": 5.0053333333333344e-05, "loss": 4.3329, "step": 15020 }, { "epoch": 25.41, "learning_rate": 5.012e-05, "loss": 4.3206, "step": 15040 }, { "epoch": 25.44, "learning_rate": 5.018666666666667e-05, "loss": 4.3276, "step": 15060 }, { "epoch": 25.47, "learning_rate": 5.025333333333334e-05, "loss": 4.3132, "step": 15080 }, { "epoch": 25.51, "learning_rate": 5.032e-05, "loss": 4.3199, "step": 15100 }, { "epoch": 25.54, "learning_rate": 5.038666666666667e-05, "loss": 4.3329, "step": 15120 }, { "epoch": 25.57, "learning_rate": 5.045333333333333e-05, "loss": 4.3115, "step": 15140 }, { "epoch": 25.61, "learning_rate": 5.052e-05, "loss": 4.3041, "step": 15160 }, { "epoch": 25.64, "learning_rate": 5.058666666666667e-05, "loss": 4.2975, "step": 15180 }, { "epoch": 25.68, "learning_rate": 5.065333333333333e-05, "loss": 4.3206, "step": 15200 }, { "epoch": 25.71, "learning_rate": 5.072e-05, "loss": 4.3128, "step": 15220 }, { "epoch": 25.74, "learning_rate": 5.078666666666667e-05, "loss": 4.288, "step": 15240 }, { "epoch": 25.78, "learning_rate": 5.085333333333333e-05, "loss": 4.2992, "step": 15260 }, { "epoch": 25.81, "learning_rate": 5.092e-05, "loss": 4.2805, "step": 15280 }, { "epoch": 25.84, "learning_rate": 5.098666666666667e-05, "loss": 4.3033, "step": 15300 }, { "epoch": 25.88, "learning_rate": 5.105333333333333e-05, "loss": 4.297, "step": 15320 }, { "epoch": 25.91, "learning_rate": 5.112e-05, "loss": 4.2841, "step": 15340 }, { "epoch": 25.95, "learning_rate": 5.118666666666667e-05, "loss": 4.3054, "step": 15360 }, { "epoch": 25.98, "learning_rate": 5.125333333333333e-05, "loss": 4.28, "step": 15380 }, { "epoch": 26.01, "learning_rate": 5.132e-05, "loss": 4.2987, "step": 15400 }, { "epoch": 26.05, "learning_rate": 5.1386666666666674e-05, "loss": 4.269, "step": 15420 }, { "epoch": 26.08, "learning_rate": 5.145333333333333e-05, "loss": 4.2666, "step": 15440 }, { "epoch": 26.11, "learning_rate": 5.152e-05, "loss": 4.2605, "step": 15460 }, { "epoch": 26.15, "learning_rate": 5.158666666666667e-05, "loss": 4.2556, "step": 15480 }, { "epoch": 26.18, "learning_rate": 5.165333333333333e-05, "loss": 4.2584, "step": 15500 }, { "epoch": 26.22, "learning_rate": 5.172e-05, "loss": 4.265, "step": 15520 }, { "epoch": 26.25, "learning_rate": 5.178666666666667e-05, "loss": 4.2448, "step": 15540 }, { "epoch": 26.28, "learning_rate": 5.1853333333333334e-05, "loss": 4.2637, "step": 15560 }, { "epoch": 26.32, "learning_rate": 5.1920000000000004e-05, "loss": 4.2644, "step": 15580 }, { "epoch": 26.35, "learning_rate": 5.1986666666666675e-05, "loss": 4.2567, "step": 15600 }, { "epoch": 26.39, "learning_rate": 5.205333333333333e-05, "loss": 4.2591, "step": 15620 }, { "epoch": 26.42, "learning_rate": 5.212e-05, "loss": 4.2574, "step": 15640 }, { "epoch": 26.45, "learning_rate": 5.218666666666667e-05, "loss": 4.2384, "step": 15660 }, { "epoch": 26.49, "learning_rate": 5.225333333333333e-05, "loss": 4.2566, "step": 15680 }, { "epoch": 26.52, "learning_rate": 5.232e-05, "loss": 4.2485, "step": 15700 }, { "epoch": 26.55, "learning_rate": 5.238666666666667e-05, "loss": 4.2617, "step": 15720 }, { "epoch": 26.59, "learning_rate": 5.2453333333333335e-05, "loss": 4.2387, "step": 15740 }, { "epoch": 26.62, "learning_rate": 5.2520000000000005e-05, "loss": 4.2579, "step": 15760 }, { "epoch": 26.66, "learning_rate": 5.2586666666666676e-05, "loss": 4.2199, "step": 15780 }, { "epoch": 26.69, "learning_rate": 5.265333333333333e-05, "loss": 4.2348, "step": 15800 }, { "epoch": 26.72, "learning_rate": 5.2720000000000003e-05, "loss": 4.241, "step": 15820 }, { "epoch": 26.76, "learning_rate": 5.2786666666666674e-05, "loss": 4.2551, "step": 15840 }, { "epoch": 26.79, "learning_rate": 5.285333333333333e-05, "loss": 4.2267, "step": 15860 }, { "epoch": 26.82, "learning_rate": 5.292e-05, "loss": 4.2225, "step": 15880 }, { "epoch": 26.86, "learning_rate": 5.298666666666667e-05, "loss": 4.2263, "step": 15900 }, { "epoch": 26.89, "learning_rate": 5.3053333333333336e-05, "loss": 4.2248, "step": 15920 }, { "epoch": 26.93, "learning_rate": 5.3120000000000006e-05, "loss": 4.2236, "step": 15940 }, { "epoch": 26.96, "learning_rate": 5.318666666666667e-05, "loss": 4.2373, "step": 15960 }, { "epoch": 26.99, "learning_rate": 5.3253333333333334e-05, "loss": 4.216, "step": 15980 }, { "epoch": 27.03, "learning_rate": 5.3320000000000004e-05, "loss": 4.2041, "step": 16000 }, { "epoch": 27.03, "eval_loss": 4.027388095855713, "eval_runtime": 49.1139, "eval_samples_per_second": 20.137, "eval_steps_per_second": 0.122, "eval_tse_ndup": 0.004653635891177064, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.02689549264930171, "eval_tse_type": 5.4418042916526235e-05, "step": 16000 }, { "epoch": 27.06, "learning_rate": 5.3386666666666675e-05, "loss": 4.1844, "step": 16020 }, { "epoch": 27.09, "learning_rate": 5.345333333333333e-05, "loss": 4.1833, "step": 16040 }, { "epoch": 27.13, "learning_rate": 5.352e-05, "loss": 4.1919, "step": 16060 }, { "epoch": 27.16, "learning_rate": 5.358666666666667e-05, "loss": 4.2044, "step": 16080 }, { "epoch": 27.2, "learning_rate": 5.365333333333333e-05, "loss": 4.1999, "step": 16100 }, { "epoch": 27.23, "learning_rate": 5.372e-05, "loss": 4.1814, "step": 16120 }, { "epoch": 27.26, "learning_rate": 5.378666666666667e-05, "loss": 4.2043, "step": 16140 }, { "epoch": 27.3, "learning_rate": 5.3853333333333335e-05, "loss": 4.1859, "step": 16160 }, { "epoch": 27.33, "learning_rate": 5.3920000000000006e-05, "loss": 4.1992, "step": 16180 }, { "epoch": 27.36, "learning_rate": 5.3986666666666676e-05, "loss": 4.1918, "step": 16200 }, { "epoch": 27.4, "learning_rate": 5.405333333333333e-05, "loss": 4.183, "step": 16220 }, { "epoch": 27.43, "learning_rate": 5.4120000000000004e-05, "loss": 4.1757, "step": 16240 }, { "epoch": 27.47, "learning_rate": 5.4186666666666674e-05, "loss": 4.1887, "step": 16260 }, { "epoch": 27.5, "learning_rate": 5.425333333333333e-05, "loss": 4.1783, "step": 16280 }, { "epoch": 27.53, "learning_rate": 5.432e-05, "loss": 4.1839, "step": 16300 }, { "epoch": 27.57, "learning_rate": 5.438666666666667e-05, "loss": 4.1921, "step": 16320 }, { "epoch": 27.6, "learning_rate": 5.4453333333333336e-05, "loss": 4.1872, "step": 16340 }, { "epoch": 27.64, "learning_rate": 5.4520000000000007e-05, "loss": 4.1655, "step": 16360 }, { "epoch": 27.67, "learning_rate": 5.4586666666666664e-05, "loss": 4.1829, "step": 16380 }, { "epoch": 27.7, "learning_rate": 5.4653333333333334e-05, "loss": 4.1605, "step": 16400 }, { "epoch": 27.74, "learning_rate": 5.4720000000000005e-05, "loss": 4.1699, "step": 16420 }, { "epoch": 27.77, "learning_rate": 5.478666666666666e-05, "loss": 4.1841, "step": 16440 }, { "epoch": 27.8, "learning_rate": 5.485333333333333e-05, "loss": 4.1943, "step": 16460 }, { "epoch": 27.84, "learning_rate": 5.492e-05, "loss": 4.1516, "step": 16480 }, { "epoch": 27.87, "learning_rate": 5.4986666666666666e-05, "loss": 4.1766, "step": 16500 }, { "epoch": 27.91, "learning_rate": 5.505333333333334e-05, "loss": 4.1643, "step": 16520 }, { "epoch": 27.94, "learning_rate": 5.512000000000001e-05, "loss": 4.1652, "step": 16540 }, { "epoch": 27.97, "learning_rate": 5.5186666666666665e-05, "loss": 4.1694, "step": 16560 }, { "epoch": 28.01, "learning_rate": 5.5253333333333335e-05, "loss": 4.1623, "step": 16580 }, { "epoch": 28.04, "learning_rate": 5.5320000000000006e-05, "loss": 4.1371, "step": 16600 }, { "epoch": 28.07, "learning_rate": 5.538666666666666e-05, "loss": 4.142, "step": 16620 }, { "epoch": 28.11, "learning_rate": 5.545333333333333e-05, "loss": 4.1513, "step": 16640 }, { "epoch": 28.14, "learning_rate": 5.5520000000000004e-05, "loss": 4.1357, "step": 16660 }, { "epoch": 28.18, "learning_rate": 5.558666666666667e-05, "loss": 4.107, "step": 16680 }, { "epoch": 28.21, "learning_rate": 5.565333333333334e-05, "loss": 4.1297, "step": 16700 }, { "epoch": 28.24, "learning_rate": 5.572000000000001e-05, "loss": 4.1275, "step": 16720 }, { "epoch": 28.28, "learning_rate": 5.5786666666666666e-05, "loss": 4.1358, "step": 16740 }, { "epoch": 28.31, "learning_rate": 5.5853333333333336e-05, "loss": 4.1243, "step": 16760 }, { "epoch": 28.34, "learning_rate": 5.592000000000001e-05, "loss": 4.1366, "step": 16780 }, { "epoch": 28.38, "learning_rate": 5.5986666666666664e-05, "loss": 4.1266, "step": 16800 }, { "epoch": 28.41, "learning_rate": 5.6053333333333334e-05, "loss": 4.112, "step": 16820 }, { "epoch": 28.45, "learning_rate": 5.6120000000000005e-05, "loss": 4.1305, "step": 16840 }, { "epoch": 28.48, "learning_rate": 5.618666666666667e-05, "loss": 4.1163, "step": 16860 }, { "epoch": 28.51, "learning_rate": 5.6250000000000005e-05, "loss": 4.1156, "step": 16880 }, { "epoch": 28.55, "learning_rate": 5.6316666666666676e-05, "loss": 4.1357, "step": 16900 }, { "epoch": 28.58, "learning_rate": 5.638333333333333e-05, "loss": 4.1129, "step": 16920 }, { "epoch": 28.61, "learning_rate": 5.645e-05, "loss": 4.1258, "step": 16940 }, { "epoch": 28.65, "learning_rate": 5.6516666666666674e-05, "loss": 4.1345, "step": 16960 }, { "epoch": 28.68, "learning_rate": 5.658333333333333e-05, "loss": 4.0953, "step": 16980 }, { "epoch": 28.72, "learning_rate": 5.665e-05, "loss": 4.1014, "step": 17000 }, { "epoch": 28.72, "eval_loss": 3.9104785919189453, "eval_runtime": 47.1515, "eval_samples_per_second": 20.975, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.0052282058364623105, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.02501951527209312, "eval_tse_type": 8.426019548365353e-05, "step": 17000 }, { "epoch": 28.75, "learning_rate": 5.671666666666667e-05, "loss": 4.0944, "step": 17020 }, { "epoch": 28.78, "learning_rate": 5.6783333333333336e-05, "loss": 4.1039, "step": 17040 }, { "epoch": 28.82, "learning_rate": 5.6850000000000006e-05, "loss": 4.0933, "step": 17060 }, { "epoch": 28.85, "learning_rate": 5.691666666666668e-05, "loss": 4.1013, "step": 17080 }, { "epoch": 28.89, "learning_rate": 5.6983333333333334e-05, "loss": 4.0946, "step": 17100 }, { "epoch": 28.92, "learning_rate": 5.7050000000000004e-05, "loss": 4.0965, "step": 17120 }, { "epoch": 28.95, "learning_rate": 5.7116666666666675e-05, "loss": 4.0999, "step": 17140 }, { "epoch": 28.99, "learning_rate": 5.718333333333333e-05, "loss": 4.0836, "step": 17160 }, { "epoch": 29.02, "learning_rate": 5.725e-05, "loss": 4.0887, "step": 17180 }, { "epoch": 29.05, "learning_rate": 5.731666666666667e-05, "loss": 4.0653, "step": 17200 }, { "epoch": 29.09, "learning_rate": 5.738333333333334e-05, "loss": 4.0566, "step": 17220 }, { "epoch": 29.12, "learning_rate": 5.745e-05, "loss": 4.0655, "step": 17240 }, { "epoch": 29.16, "learning_rate": 5.751666666666667e-05, "loss": 4.0734, "step": 17260 }, { "epoch": 29.19, "learning_rate": 5.7583333333333335e-05, "loss": 4.0637, "step": 17280 }, { "epoch": 29.22, "learning_rate": 5.7650000000000005e-05, "loss": 4.0756, "step": 17300 }, { "epoch": 29.26, "learning_rate": 5.7716666666666676e-05, "loss": 4.0706, "step": 17320 }, { "epoch": 29.29, "learning_rate": 5.778333333333333e-05, "loss": 4.0541, "step": 17340 }, { "epoch": 29.32, "learning_rate": 5.7850000000000003e-05, "loss": 4.0837, "step": 17360 }, { "epoch": 29.36, "learning_rate": 5.7916666666666674e-05, "loss": 4.0597, "step": 17380 }, { "epoch": 29.39, "learning_rate": 5.798333333333333e-05, "loss": 4.0624, "step": 17400 }, { "epoch": 29.43, "learning_rate": 5.805e-05, "loss": 4.0607, "step": 17420 }, { "epoch": 29.46, "learning_rate": 5.811666666666667e-05, "loss": 4.0493, "step": 17440 }, { "epoch": 29.49, "learning_rate": 5.8183333333333336e-05, "loss": 4.0562, "step": 17460 }, { "epoch": 29.53, "learning_rate": 5.8250000000000006e-05, "loss": 4.0473, "step": 17480 }, { "epoch": 29.56, "learning_rate": 5.831666666666668e-05, "loss": 4.0315, "step": 17500 }, { "epoch": 29.59, "learning_rate": 5.8383333333333334e-05, "loss": 4.0591, "step": 17520 }, { "epoch": 29.63, "learning_rate": 5.8450000000000005e-05, "loss": 4.042, "step": 17540 }, { "epoch": 29.66, "learning_rate": 5.851666666666666e-05, "loss": 4.059, "step": 17560 }, { "epoch": 29.7, "learning_rate": 5.858333333333333e-05, "loss": 4.0502, "step": 17580 }, { "epoch": 29.73, "learning_rate": 5.865e-05, "loss": 4.0425, "step": 17600 }, { "epoch": 29.76, "learning_rate": 5.8716666666666666e-05, "loss": 4.0501, "step": 17620 }, { "epoch": 29.8, "learning_rate": 5.878333333333334e-05, "loss": 4.0398, "step": 17640 }, { "epoch": 29.83, "learning_rate": 5.885000000000001e-05, "loss": 4.0589, "step": 17660 }, { "epoch": 29.86, "learning_rate": 5.8916666666666664e-05, "loss": 4.0511, "step": 17680 }, { "epoch": 29.9, "learning_rate": 5.8983333333333335e-05, "loss": 4.0441, "step": 17700 }, { "epoch": 29.93, "learning_rate": 5.9050000000000006e-05, "loss": 4.0423, "step": 17720 }, { "epoch": 29.97, "learning_rate": 5.911666666666666e-05, "loss": 4.0582, "step": 17740 }, { "epoch": 30.0, "learning_rate": 5.918333333333333e-05, "loss": 4.0172, "step": 17760 }, { "epoch": 30.03, "learning_rate": 5.9250000000000004e-05, "loss": 3.9971, "step": 17780 }, { "epoch": 30.07, "learning_rate": 5.931666666666667e-05, "loss": 4.0049, "step": 17800 }, { "epoch": 30.1, "learning_rate": 5.938333333333334e-05, "loss": 4.0103, "step": 17820 }, { "epoch": 30.14, "learning_rate": 5.945000000000001e-05, "loss": 4.0012, "step": 17840 }, { "epoch": 30.17, "learning_rate": 5.9516666666666665e-05, "loss": 4.0021, "step": 17860 }, { "epoch": 30.2, "learning_rate": 5.9583333333333336e-05, "loss": 4.0068, "step": 17880 }, { "epoch": 30.24, "learning_rate": 5.9650000000000007e-05, "loss": 3.9981, "step": 17900 }, { "epoch": 30.27, "learning_rate": 5.9716666666666664e-05, "loss": 3.9985, "step": 17920 }, { "epoch": 30.3, "learning_rate": 5.9783333333333334e-05, "loss": 4.016, "step": 17940 }, { "epoch": 30.34, "learning_rate": 5.9850000000000005e-05, "loss": 4.0167, "step": 17960 }, { "epoch": 30.37, "learning_rate": 5.991666666666667e-05, "loss": 4.0145, "step": 17980 }, { "epoch": 30.41, "learning_rate": 5.998333333333334e-05, "loss": 3.9873, "step": 18000 }, { "epoch": 30.41, "eval_loss": 3.824690818786621, "eval_runtime": 47.1697, "eval_samples_per_second": 20.967, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.00881404733764158, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.028798604985216605, "eval_tse_type": 0.00015096618357487925, "step": 18000 }, { "epoch": 30.44, "learning_rate": 6.005000000000001e-05, "loss": 3.9943, "step": 18020 }, { "epoch": 30.47, "learning_rate": 6.0116666666666667e-05, "loss": 4.0006, "step": 18040 }, { "epoch": 30.51, "learning_rate": 6.018333333333334e-05, "loss": 4.0087, "step": 18060 }, { "epoch": 30.54, "learning_rate": 6.025000000000001e-05, "loss": 3.996, "step": 18080 }, { "epoch": 30.57, "learning_rate": 6.0316666666666665e-05, "loss": 3.9905, "step": 18100 }, { "epoch": 30.61, "learning_rate": 6.0383333333333335e-05, "loss": 3.9765, "step": 18120 }, { "epoch": 30.64, "learning_rate": 6.0450000000000006e-05, "loss": 3.9967, "step": 18140 }, { "epoch": 30.68, "learning_rate": 6.051666666666666e-05, "loss": 3.997, "step": 18160 }, { "epoch": 30.71, "learning_rate": 6.058333333333333e-05, "loss": 3.9924, "step": 18180 }, { "epoch": 30.74, "learning_rate": 6.0650000000000004e-05, "loss": 3.9955, "step": 18200 }, { "epoch": 30.78, "learning_rate": 6.071666666666667e-05, "loss": 3.9964, "step": 18220 }, { "epoch": 30.81, "learning_rate": 6.078333333333334e-05, "loss": 4.0035, "step": 18240 }, { "epoch": 30.84, "learning_rate": 6.085000000000001e-05, "loss": 3.985, "step": 18260 }, { "epoch": 30.88, "learning_rate": 6.0916666666666666e-05, "loss": 3.9823, "step": 18280 }, { "epoch": 30.91, "learning_rate": 6.0983333333333336e-05, "loss": 3.9668, "step": 18300 }, { "epoch": 30.95, "learning_rate": 6.105e-05, "loss": 3.9813, "step": 18320 }, { "epoch": 30.98, "learning_rate": 6.111666666666667e-05, "loss": 3.9721, "step": 18340 }, { "epoch": 31.01, "learning_rate": 6.118333333333333e-05, "loss": 3.9785, "step": 18360 }, { "epoch": 31.05, "learning_rate": 6.125000000000001e-05, "loss": 3.9517, "step": 18380 }, { "epoch": 31.08, "learning_rate": 6.131666666666666e-05, "loss": 3.9356, "step": 18400 }, { "epoch": 31.11, "learning_rate": 6.138333333333334e-05, "loss": 3.9674, "step": 18420 }, { "epoch": 31.15, "learning_rate": 6.145e-05, "loss": 3.9513, "step": 18440 }, { "epoch": 31.18, "learning_rate": 6.151666666666667e-05, "loss": 3.9554, "step": 18460 }, { "epoch": 31.22, "learning_rate": 6.158e-05, "loss": 3.9747, "step": 18480 }, { "epoch": 31.25, "learning_rate": 6.164666666666668e-05, "loss": 3.9409, "step": 18500 }, { "epoch": 31.28, "learning_rate": 6.171333333333333e-05, "loss": 3.9338, "step": 18520 }, { "epoch": 31.32, "learning_rate": 6.178000000000001e-05, "loss": 3.9511, "step": 18540 }, { "epoch": 31.35, "learning_rate": 6.184666666666667e-05, "loss": 3.9455, "step": 18560 }, { "epoch": 31.39, "learning_rate": 6.191333333333334e-05, "loss": 3.9652, "step": 18580 }, { "epoch": 31.42, "learning_rate": 6.198e-05, "loss": 3.948, "step": 18600 }, { "epoch": 31.45, "learning_rate": 6.204666666666668e-05, "loss": 3.9418, "step": 18620 }, { "epoch": 31.49, "learning_rate": 6.211333333333334e-05, "loss": 3.9469, "step": 18640 }, { "epoch": 31.52, "learning_rate": 6.218e-05, "loss": 3.9209, "step": 18660 }, { "epoch": 31.55, "learning_rate": 6.224666666666667e-05, "loss": 3.9364, "step": 18680 }, { "epoch": 31.59, "learning_rate": 6.231333333333333e-05, "loss": 3.9433, "step": 18700 }, { "epoch": 31.62, "learning_rate": 6.238000000000001e-05, "loss": 3.9114, "step": 18720 }, { "epoch": 31.66, "learning_rate": 6.244666666666666e-05, "loss": 3.937, "step": 18740 }, { "epoch": 31.69, "learning_rate": 6.251333333333334e-05, "loss": 3.9331, "step": 18760 }, { "epoch": 31.72, "learning_rate": 6.258e-05, "loss": 3.9161, "step": 18780 }, { "epoch": 31.76, "learning_rate": 6.264666666666666e-05, "loss": 3.9347, "step": 18800 }, { "epoch": 31.79, "learning_rate": 6.271333333333334e-05, "loss": 3.9473, "step": 18820 }, { "epoch": 31.82, "learning_rate": 6.278e-05, "loss": 3.9257, "step": 18840 }, { "epoch": 31.86, "learning_rate": 6.284666666666667e-05, "loss": 3.9495, "step": 18860 }, { "epoch": 31.89, "learning_rate": 6.291333333333333e-05, "loss": 3.9505, "step": 18880 }, { "epoch": 31.93, "learning_rate": 6.298000000000001e-05, "loss": 3.9255, "step": 18900 }, { "epoch": 31.96, "learning_rate": 6.304666666666666e-05, "loss": 3.9245, "step": 18920 }, { "epoch": 31.99, "learning_rate": 6.311333333333334e-05, "loss": 3.9306, "step": 18940 }, { "epoch": 32.03, "learning_rate": 6.318e-05, "loss": 3.8777, "step": 18960 }, { "epoch": 32.06, "learning_rate": 6.324666666666667e-05, "loss": 3.8864, "step": 18980 }, { "epoch": 32.09, "learning_rate": 6.331333333333333e-05, "loss": 3.9045, "step": 19000 }, { "epoch": 32.09, "eval_loss": 3.742777109146118, "eval_runtime": 47.4769, "eval_samples_per_second": 20.831, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.007489709142089086, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.021463110749665632, "eval_tse_type": 0.0001351673969216942, "step": 19000 }, { "epoch": 32.13, "learning_rate": 6.338e-05, "loss": 3.8955, "step": 19020 }, { "epoch": 32.16, "learning_rate": 6.344666666666667e-05, "loss": 3.8919, "step": 19040 }, { "epoch": 32.2, "learning_rate": 6.351333333333333e-05, "loss": 3.8977, "step": 19060 }, { "epoch": 32.23, "learning_rate": 6.358000000000001e-05, "loss": 3.8959, "step": 19080 }, { "epoch": 32.26, "learning_rate": 6.364666666666666e-05, "loss": 3.8859, "step": 19100 }, { "epoch": 32.3, "learning_rate": 6.371333333333334e-05, "loss": 3.8999, "step": 19120 }, { "epoch": 32.33, "learning_rate": 6.378e-05, "loss": 3.8998, "step": 19140 }, { "epoch": 32.36, "learning_rate": 6.384666666666667e-05, "loss": 3.885, "step": 19160 }, { "epoch": 32.4, "learning_rate": 6.391333333333333e-05, "loss": 3.9161, "step": 19180 }, { "epoch": 32.43, "learning_rate": 6.398000000000001e-05, "loss": 3.9012, "step": 19200 }, { "epoch": 32.47, "learning_rate": 6.404666666666667e-05, "loss": 3.8793, "step": 19220 }, { "epoch": 32.5, "learning_rate": 6.411333333333333e-05, "loss": 3.8778, "step": 19240 }, { "epoch": 32.53, "learning_rate": 6.418000000000001e-05, "loss": 3.8977, "step": 19260 }, { "epoch": 32.57, "learning_rate": 6.424666666666666e-05, "loss": 3.8798, "step": 19280 }, { "epoch": 32.6, "learning_rate": 6.431333333333334e-05, "loss": 3.8643, "step": 19300 }, { "epoch": 32.64, "learning_rate": 6.438e-05, "loss": 3.8896, "step": 19320 }, { "epoch": 32.67, "learning_rate": 6.444666666666667e-05, "loss": 3.8901, "step": 19340 }, { "epoch": 32.7, "learning_rate": 6.451333333333333e-05, "loss": 3.8805, "step": 19360 }, { "epoch": 32.74, "learning_rate": 6.458000000000001e-05, "loss": 3.892, "step": 19380 }, { "epoch": 32.77, "learning_rate": 6.464666666666667e-05, "loss": 3.8897, "step": 19400 }, { "epoch": 32.8, "learning_rate": 6.471333333333334e-05, "loss": 3.8872, "step": 19420 }, { "epoch": 32.84, "learning_rate": 6.478000000000001e-05, "loss": 3.9123, "step": 19440 }, { "epoch": 32.87, "learning_rate": 6.484666666666666e-05, "loss": 3.8569, "step": 19460 }, { "epoch": 32.91, "learning_rate": 6.491333333333334e-05, "loss": 3.865, "step": 19480 }, { "epoch": 32.94, "learning_rate": 6.498e-05, "loss": 3.8848, "step": 19500 }, { "epoch": 32.97, "learning_rate": 6.504666666666667e-05, "loss": 3.8906, "step": 19520 }, { "epoch": 33.01, "learning_rate": 6.511333333333333e-05, "loss": 3.8834, "step": 19540 }, { "epoch": 33.04, "learning_rate": 6.518000000000001e-05, "loss": 3.8466, "step": 19560 }, { "epoch": 33.07, "learning_rate": 6.524666666666667e-05, "loss": 3.8549, "step": 19580 }, { "epoch": 33.11, "learning_rate": 6.531333333333334e-05, "loss": 3.853, "step": 19600 }, { "epoch": 33.14, "learning_rate": 6.538000000000001e-05, "loss": 3.8348, "step": 19620 }, { "epoch": 33.18, "learning_rate": 6.544666666666666e-05, "loss": 3.8324, "step": 19640 }, { "epoch": 33.21, "learning_rate": 6.551333333333334e-05, "loss": 3.8482, "step": 19660 }, { "epoch": 33.24, "learning_rate": 6.558e-05, "loss": 3.8503, "step": 19680 }, { "epoch": 33.28, "learning_rate": 6.564666666666667e-05, "loss": 3.8215, "step": 19700 }, { "epoch": 33.31, "learning_rate": 6.571333333333333e-05, "loss": 3.8288, "step": 19720 }, { "epoch": 33.34, "learning_rate": 6.578000000000001e-05, "loss": 3.8424, "step": 19740 }, { "epoch": 33.38, "learning_rate": 6.584666666666667e-05, "loss": 3.827, "step": 19760 }, { "epoch": 33.41, "learning_rate": 6.591333333333334e-05, "loss": 3.8296, "step": 19780 }, { "epoch": 33.45, "learning_rate": 6.598e-05, "loss": 3.8509, "step": 19800 }, { "epoch": 33.48, "learning_rate": 6.604666666666667e-05, "loss": 3.8423, "step": 19820 }, { "epoch": 33.51, "learning_rate": 6.611333333333334e-05, "loss": 3.8294, "step": 19840 }, { "epoch": 33.55, "learning_rate": 6.618e-05, "loss": 3.8269, "step": 19860 }, { "epoch": 33.58, "learning_rate": 6.624666666666667e-05, "loss": 3.8302, "step": 19880 }, { "epoch": 33.61, "learning_rate": 6.631333333333333e-05, "loss": 3.8409, "step": 19900 }, { "epoch": 33.65, "learning_rate": 6.638e-05, "loss": 3.8368, "step": 19920 }, { "epoch": 33.68, "learning_rate": 6.644666666666666e-05, "loss": 3.8509, "step": 19940 }, { "epoch": 33.72, "learning_rate": 6.651333333333334e-05, "loss": 3.8311, "step": 19960 }, { "epoch": 33.75, "learning_rate": 6.658e-05, "loss": 3.8274, "step": 19980 }, { "epoch": 33.78, "learning_rate": 6.664666666666667e-05, "loss": 3.8349, "step": 20000 }, { "epoch": 33.78, "eval_loss": 3.672532081604004, "eval_runtime": 47.3457, "eval_samples_per_second": 20.889, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.006003945345159851, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.030795479673931526, "eval_tse_type": 3.86192562633412e-05, "step": 20000 }, { "epoch": 33.82, "learning_rate": 6.671333333333334e-05, "loss": 3.844, "step": 20020 }, { "epoch": 33.85, "learning_rate": 6.678e-05, "loss": 3.8501, "step": 20040 }, { "epoch": 33.89, "learning_rate": 6.684666666666667e-05, "loss": 3.8375, "step": 20060 }, { "epoch": 33.92, "learning_rate": 6.691333333333334e-05, "loss": 3.8277, "step": 20080 }, { "epoch": 33.95, "learning_rate": 6.698e-05, "loss": 3.8274, "step": 20100 }, { "epoch": 33.99, "learning_rate": 6.704666666666666e-05, "loss": 3.8322, "step": 20120 }, { "epoch": 34.02, "learning_rate": 6.711333333333334e-05, "loss": 3.7911, "step": 20140 }, { "epoch": 34.05, "learning_rate": 6.718e-05, "loss": 3.7825, "step": 20160 }, { "epoch": 34.09, "learning_rate": 6.724666666666667e-05, "loss": 3.8013, "step": 20180 }, { "epoch": 34.12, "learning_rate": 6.731333333333335e-05, "loss": 3.7964, "step": 20200 }, { "epoch": 34.16, "learning_rate": 6.738e-05, "loss": 3.7865, "step": 20220 }, { "epoch": 34.19, "learning_rate": 6.744666666666667e-05, "loss": 3.8097, "step": 20240 }, { "epoch": 34.22, "learning_rate": 6.751333333333334e-05, "loss": 3.8013, "step": 20260 }, { "epoch": 34.26, "learning_rate": 6.758e-05, "loss": 3.783, "step": 20280 }, { "epoch": 34.29, "learning_rate": 6.764666666666666e-05, "loss": 3.7939, "step": 20300 }, { "epoch": 34.32, "learning_rate": 6.771333333333334e-05, "loss": 3.8121, "step": 20320 }, { "epoch": 34.36, "learning_rate": 6.778e-05, "loss": 3.7952, "step": 20340 }, { "epoch": 34.39, "learning_rate": 6.784666666666667e-05, "loss": 3.7813, "step": 20360 }, { "epoch": 34.43, "learning_rate": 6.791333333333335e-05, "loss": 3.7881, "step": 20380 }, { "epoch": 34.46, "learning_rate": 6.798e-05, "loss": 3.7732, "step": 20400 }, { "epoch": 34.49, "learning_rate": 6.804666666666667e-05, "loss": 3.8046, "step": 20420 }, { "epoch": 34.53, "learning_rate": 6.811333333333334e-05, "loss": 3.8077, "step": 20440 }, { "epoch": 34.56, "learning_rate": 6.818e-05, "loss": 3.784, "step": 20460 }, { "epoch": 34.59, "learning_rate": 6.824666666666666e-05, "loss": 3.7724, "step": 20480 }, { "epoch": 34.63, "learning_rate": 6.831333333333334e-05, "loss": 3.8007, "step": 20500 }, { "epoch": 34.66, "learning_rate": 6.837666666666667e-05, "loss": 3.7861, "step": 20520 }, { "epoch": 34.7, "learning_rate": 6.844333333333334e-05, "loss": 3.7695, "step": 20540 }, { "epoch": 34.73, "learning_rate": 6.851e-05, "loss": 3.7881, "step": 20560 }, { "epoch": 34.76, "learning_rate": 6.857666666666667e-05, "loss": 3.7699, "step": 20580 }, { "epoch": 34.8, "learning_rate": 6.864333333333333e-05, "loss": 3.8045, "step": 20600 }, { "epoch": 34.83, "learning_rate": 6.871000000000001e-05, "loss": 3.7762, "step": 20620 }, { "epoch": 34.86, "learning_rate": 6.877666666666666e-05, "loss": 3.7835, "step": 20640 }, { "epoch": 34.9, "learning_rate": 6.884333333333334e-05, "loss": 3.781, "step": 20660 }, { "epoch": 34.93, "learning_rate": 6.891e-05, "loss": 3.7927, "step": 20680 }, { "epoch": 34.97, "learning_rate": 6.897666666666667e-05, "loss": 3.7805, "step": 20700 }, { "epoch": 35.0, "learning_rate": 6.904333333333334e-05, "loss": 3.7688, "step": 20720 }, { "epoch": 35.03, "learning_rate": 6.911000000000001e-05, "loss": 3.7479, "step": 20740 }, { "epoch": 35.07, "learning_rate": 6.917666666666667e-05, "loss": 3.7441, "step": 20760 }, { "epoch": 35.1, "learning_rate": 6.924333333333334e-05, "loss": 3.7364, "step": 20780 }, { "epoch": 35.14, "learning_rate": 6.931000000000001e-05, "loss": 3.7347, "step": 20800 }, { "epoch": 35.17, "learning_rate": 6.937666666666666e-05, "loss": 3.7438, "step": 20820 }, { "epoch": 35.2, "learning_rate": 6.944333333333334e-05, "loss": 3.7344, "step": 20840 }, { "epoch": 35.24, "learning_rate": 6.951e-05, "loss": 3.721, "step": 20860 }, { "epoch": 35.27, "learning_rate": 6.957666666666667e-05, "loss": 3.7525, "step": 20880 }, { "epoch": 35.3, "learning_rate": 6.964333333333334e-05, "loss": 3.7451, "step": 20900 }, { "epoch": 35.34, "learning_rate": 6.971000000000001e-05, "loss": 3.7693, "step": 20920 }, { "epoch": 35.37, "learning_rate": 6.977666666666667e-05, "loss": 3.7413, "step": 20940 }, { "epoch": 35.41, "learning_rate": 6.984333333333334e-05, "loss": 3.7478, "step": 20960 }, { "epoch": 35.44, "learning_rate": 6.991000000000001e-05, "loss": 3.7363, "step": 20980 }, { "epoch": 35.47, "learning_rate": 6.997666666666666e-05, "loss": 3.7421, "step": 21000 }, { "epoch": 35.47, "eval_loss": 3.6117138862609863, "eval_runtime": 50.7962, "eval_samples_per_second": 19.47, "eval_steps_per_second": 0.118, "eval_tse_ndup": 0.009148751032328753, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.022986131700466112, "eval_tse_type": 5.4418042916526235e-05, "step": 21000 }, { "epoch": 35.51, "learning_rate": 7.004333333333334e-05, "loss": 3.7338, "step": 21020 }, { "epoch": 35.54, "learning_rate": 7.011e-05, "loss": 3.7443, "step": 21040 }, { "epoch": 35.57, "learning_rate": 7.017666666666667e-05, "loss": 3.7358, "step": 21060 }, { "epoch": 35.61, "learning_rate": 7.024333333333333e-05, "loss": 3.7634, "step": 21080 }, { "epoch": 35.64, "learning_rate": 7.031e-05, "loss": 3.7469, "step": 21100 }, { "epoch": 35.68, "learning_rate": 7.037666666666667e-05, "loss": 3.7311, "step": 21120 }, { "epoch": 35.71, "learning_rate": 7.044333333333334e-05, "loss": 3.7281, "step": 21140 }, { "epoch": 35.74, "learning_rate": 7.051e-05, "loss": 3.7521, "step": 21160 }, { "epoch": 35.78, "learning_rate": 7.057666666666666e-05, "loss": 3.7502, "step": 21180 }, { "epoch": 35.81, "learning_rate": 7.064333333333334e-05, "loss": 3.7454, "step": 21200 }, { "epoch": 35.84, "learning_rate": 7.070999999999999e-05, "loss": 3.7254, "step": 21220 }, { "epoch": 35.88, "learning_rate": 7.077666666666667e-05, "loss": 3.7302, "step": 21240 }, { "epoch": 35.91, "learning_rate": 7.084333333333333e-05, "loss": 3.7285, "step": 21260 }, { "epoch": 35.95, "learning_rate": 7.091e-05, "loss": 3.7413, "step": 21280 }, { "epoch": 35.98, "learning_rate": 7.097666666666667e-05, "loss": 3.7399, "step": 21300 }, { "epoch": 36.01, "learning_rate": 7.104333333333334e-05, "loss": 3.7041, "step": 21320 }, { "epoch": 36.05, "learning_rate": 7.111e-05, "loss": 3.6866, "step": 21340 }, { "epoch": 36.08, "learning_rate": 7.117666666666667e-05, "loss": 3.6953, "step": 21360 }, { "epoch": 36.11, "learning_rate": 7.124333333333334e-05, "loss": 3.692, "step": 21380 }, { "epoch": 36.15, "learning_rate": 7.130999999999999e-05, "loss": 3.7086, "step": 21400 }, { "epoch": 36.18, "learning_rate": 7.137666666666667e-05, "loss": 3.7095, "step": 21420 }, { "epoch": 36.22, "learning_rate": 7.144333333333333e-05, "loss": 3.7011, "step": 21440 }, { "epoch": 36.25, "learning_rate": 7.151e-05, "loss": 3.7144, "step": 21460 }, { "epoch": 36.28, "learning_rate": 7.157666666666668e-05, "loss": 3.6975, "step": 21480 }, { "epoch": 36.32, "learning_rate": 7.164333333333334e-05, "loss": 3.6789, "step": 21500 }, { "epoch": 36.35, "learning_rate": 7.171e-05, "loss": 3.7143, "step": 21520 }, { "epoch": 36.39, "learning_rate": 7.177666666666667e-05, "loss": 3.687, "step": 21540 }, { "epoch": 36.42, "learning_rate": 7.184333333333334e-05, "loss": 3.7027, "step": 21560 }, { "epoch": 36.45, "learning_rate": 7.191e-05, "loss": 3.6983, "step": 21580 }, { "epoch": 36.49, "learning_rate": 7.197666666666667e-05, "loss": 3.705, "step": 21600 }, { "epoch": 36.52, "learning_rate": 7.204333333333334e-05, "loss": 3.6732, "step": 21620 }, { "epoch": 36.55, "learning_rate": 7.211e-05, "loss": 3.7052, "step": 21640 }, { "epoch": 36.59, "learning_rate": 7.217666666666668e-05, "loss": 3.6802, "step": 21660 }, { "epoch": 36.62, "learning_rate": 7.224333333333334e-05, "loss": 3.6838, "step": 21680 }, { "epoch": 36.66, "learning_rate": 7.231e-05, "loss": 3.7156, "step": 21700 }, { "epoch": 36.69, "learning_rate": 7.237666666666667e-05, "loss": 3.6974, "step": 21720 }, { "epoch": 36.72, "learning_rate": 7.244333333333335e-05, "loss": 3.6858, "step": 21740 }, { "epoch": 36.76, "learning_rate": 7.251e-05, "loss": 3.6949, "step": 21760 }, { "epoch": 36.79, "learning_rate": 7.257666666666667e-05, "loss": 3.6842, "step": 21780 }, { "epoch": 36.82, "learning_rate": 7.264333333333334e-05, "loss": 3.6957, "step": 21800 }, { "epoch": 36.86, "learning_rate": 7.271e-05, "loss": 3.6844, "step": 21820 }, { "epoch": 36.89, "learning_rate": 7.277666666666668e-05, "loss": 3.681, "step": 21840 }, { "epoch": 36.93, "learning_rate": 7.284333333333334e-05, "loss": 3.6908, "step": 21860 }, { "epoch": 36.96, "learning_rate": 7.291e-05, "loss": 3.6948, "step": 21880 }, { "epoch": 36.99, "learning_rate": 7.297666666666667e-05, "loss": 3.6721, "step": 21900 }, { "epoch": 37.03, "learning_rate": 7.304333333333335e-05, "loss": 3.6447, "step": 21920 }, { "epoch": 37.06, "learning_rate": 7.311e-05, "loss": 3.6511, "step": 21940 }, { "epoch": 37.09, "learning_rate": 7.317666666666667e-05, "loss": 3.6723, "step": 21960 }, { "epoch": 37.13, "learning_rate": 7.324333333333334e-05, "loss": 3.6579, "step": 21980 }, { "epoch": 37.16, "learning_rate": 7.331e-05, "loss": 3.6252, "step": 22000 }, { "epoch": 37.16, "eval_loss": 3.553020715713501, "eval_runtime": 50.5859, "eval_samples_per_second": 19.551, "eval_steps_per_second": 0.119, "eval_tse_ndup": 0.007445739995200251, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.02299272267063124, "eval_tse_type": 3.3352994045612854e-05, "step": 22000 }, { "epoch": 37.2, "learning_rate": 7.337666666666666e-05, "loss": 3.6621, "step": 22020 }, { "epoch": 37.23, "learning_rate": 7.344333333333334e-05, "loss": 3.6488, "step": 22040 }, { "epoch": 37.26, "learning_rate": 7.351e-05, "loss": 3.6586, "step": 22060 }, { "epoch": 37.3, "learning_rate": 7.357666666666667e-05, "loss": 3.6362, "step": 22080 }, { "epoch": 37.33, "learning_rate": 7.364333333333335e-05, "loss": 3.6319, "step": 22100 }, { "epoch": 37.36, "learning_rate": 7.371e-05, "loss": 3.6606, "step": 22120 }, { "epoch": 37.4, "learning_rate": 7.377666666666667e-05, "loss": 3.6556, "step": 22140 }, { "epoch": 37.43, "learning_rate": 7.384333333333334e-05, "loss": 3.641, "step": 22160 }, { "epoch": 37.47, "learning_rate": 7.391e-05, "loss": 3.6389, "step": 22180 }, { "epoch": 37.5, "learning_rate": 7.397666666666667e-05, "loss": 3.6665, "step": 22200 }, { "epoch": 37.53, "learning_rate": 7.404333333333334e-05, "loss": 3.6643, "step": 22220 }, { "epoch": 37.57, "learning_rate": 7.411000000000001e-05, "loss": 3.6308, "step": 22240 }, { "epoch": 37.6, "learning_rate": 7.417666666666667e-05, "loss": 3.6632, "step": 22260 }, { "epoch": 37.64, "learning_rate": 7.424333333333333e-05, "loss": 3.6501, "step": 22280 }, { "epoch": 37.67, "learning_rate": 7.431e-05, "loss": 3.6327, "step": 22300 }, { "epoch": 37.7, "learning_rate": 7.437666666666668e-05, "loss": 3.6671, "step": 22320 }, { "epoch": 37.74, "learning_rate": 7.444333333333333e-05, "loss": 3.6545, "step": 22340 }, { "epoch": 37.77, "learning_rate": 7.451e-05, "loss": 3.6403, "step": 22360 }, { "epoch": 37.8, "learning_rate": 7.457666666666667e-05, "loss": 3.6431, "step": 22380 }, { "epoch": 37.84, "learning_rate": 7.464333333333333e-05, "loss": 3.654, "step": 22400 }, { "epoch": 37.87, "learning_rate": 7.471000000000001e-05, "loss": 3.6387, "step": 22420 }, { "epoch": 37.91, "learning_rate": 7.477666666666667e-05, "loss": 3.6501, "step": 22440 }, { "epoch": 37.94, "learning_rate": 7.484333333333334e-05, "loss": 3.6447, "step": 22460 }, { "epoch": 37.97, "learning_rate": 7.491e-05, "loss": 3.6313, "step": 22480 }, { "epoch": 38.01, "learning_rate": 7.497666666666668e-05, "loss": 3.6315, "step": 22500 }, { "epoch": 38.04, "learning_rate": 7.504333333333333e-05, "loss": 3.6047, "step": 22520 }, { "epoch": 38.07, "learning_rate": 7.511e-05, "loss": 3.5979, "step": 22540 }, { "epoch": 38.11, "learning_rate": 7.517666666666667e-05, "loss": 3.5832, "step": 22560 }, { "epoch": 38.14, "learning_rate": 7.524e-05, "loss": 3.6096, "step": 22580 }, { "epoch": 38.18, "learning_rate": 7.530666666666667e-05, "loss": 3.5947, "step": 22600 }, { "epoch": 38.21, "learning_rate": 7.537333333333335e-05, "loss": 3.6199, "step": 22620 }, { "epoch": 38.24, "learning_rate": 7.544e-05, "loss": 3.5862, "step": 22640 }, { "epoch": 38.28, "learning_rate": 7.550666666666667e-05, "loss": 3.6136, "step": 22660 }, { "epoch": 38.31, "learning_rate": 7.557333333333334e-05, "loss": 3.6014, "step": 22680 }, { "epoch": 38.34, "learning_rate": 7.564e-05, "loss": 3.5834, "step": 22700 }, { "epoch": 38.38, "learning_rate": 7.570666666666666e-05, "loss": 3.6177, "step": 22720 }, { "epoch": 38.41, "learning_rate": 7.577333333333334e-05, "loss": 3.5869, "step": 22740 }, { "epoch": 38.45, "learning_rate": 7.584e-05, "loss": 3.6068, "step": 22760 }, { "epoch": 38.48, "learning_rate": 7.590666666666667e-05, "loss": 3.6107, "step": 22780 }, { "epoch": 38.51, "learning_rate": 7.597333333333335e-05, "loss": 3.5974, "step": 22800 }, { "epoch": 38.55, "learning_rate": 7.604e-05, "loss": 3.6298, "step": 22820 }, { "epoch": 38.58, "learning_rate": 7.610666666666667e-05, "loss": 3.6137, "step": 22840 }, { "epoch": 38.61, "learning_rate": 7.617333333333334e-05, "loss": 3.6151, "step": 22860 }, { "epoch": 38.65, "learning_rate": 7.624e-05, "loss": 3.594, "step": 22880 }, { "epoch": 38.68, "learning_rate": 7.630666666666667e-05, "loss": 3.6202, "step": 22900 }, { "epoch": 38.72, "learning_rate": 7.637333333333334e-05, "loss": 3.6162, "step": 22920 }, { "epoch": 38.75, "learning_rate": 7.644e-05, "loss": 3.6031, "step": 22940 }, { "epoch": 38.78, "learning_rate": 7.650666666666667e-05, "loss": 3.6145, "step": 22960 }, { "epoch": 38.82, "learning_rate": 7.657333333333335e-05, "loss": 3.6028, "step": 22980 }, { "epoch": 38.85, "learning_rate": 7.664e-05, "loss": 3.6068, "step": 23000 }, { "epoch": 38.85, "eval_loss": 3.4891817569732666, "eval_runtime": 47.271, "eval_samples_per_second": 20.922, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.006988584288630435, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.026305780437235755, "eval_tse_type": 9.305267063993084e-05, "step": 23000 }, { "epoch": 38.89, "learning_rate": 7.670666666666668e-05, "loss": 3.5932, "step": 23020 }, { "epoch": 38.92, "learning_rate": 7.677333333333334e-05, "loss": 3.5951, "step": 23040 }, { "epoch": 38.95, "learning_rate": 7.684e-05, "loss": 3.6082, "step": 23060 }, { "epoch": 38.99, "learning_rate": 7.690666666666667e-05, "loss": 3.5912, "step": 23080 }, { "epoch": 39.02, "learning_rate": 7.697333333333334e-05, "loss": 3.5746, "step": 23100 }, { "epoch": 39.05, "learning_rate": 7.704000000000001e-05, "loss": 3.568, "step": 23120 }, { "epoch": 39.09, "learning_rate": 7.710666666666667e-05, "loss": 3.557, "step": 23140 }, { "epoch": 39.12, "learning_rate": 7.717333333333334e-05, "loss": 3.5467, "step": 23160 }, { "epoch": 39.16, "learning_rate": 7.724e-05, "loss": 3.5625, "step": 23180 }, { "epoch": 39.19, "learning_rate": 7.730666666666668e-05, "loss": 3.5407, "step": 23200 }, { "epoch": 39.22, "learning_rate": 7.737333333333334e-05, "loss": 3.5679, "step": 23220 }, { "epoch": 39.26, "learning_rate": 7.744e-05, "loss": 3.5641, "step": 23240 }, { "epoch": 39.29, "learning_rate": 7.750666666666667e-05, "loss": 3.5661, "step": 23260 }, { "epoch": 39.32, "learning_rate": 7.757333333333335e-05, "loss": 3.554, "step": 23280 }, { "epoch": 39.36, "learning_rate": 7.764e-05, "loss": 3.589, "step": 23300 }, { "epoch": 39.39, "learning_rate": 7.770666666666667e-05, "loss": 3.5516, "step": 23320 }, { "epoch": 39.43, "learning_rate": 7.777333333333334e-05, "loss": 3.5599, "step": 23340 }, { "epoch": 39.46, "learning_rate": 7.784e-05, "loss": 3.5461, "step": 23360 }, { "epoch": 39.49, "learning_rate": 7.790666666666668e-05, "loss": 3.5676, "step": 23380 }, { "epoch": 39.53, "learning_rate": 7.797333333333333e-05, "loss": 3.5393, "step": 23400 }, { "epoch": 39.56, "learning_rate": 7.804e-05, "loss": 3.5668, "step": 23420 }, { "epoch": 39.59, "learning_rate": 7.810666666666667e-05, "loss": 3.5614, "step": 23440 }, { "epoch": 39.63, "learning_rate": 7.817333333333333e-05, "loss": 3.5668, "step": 23460 }, { "epoch": 39.66, "learning_rate": 7.824e-05, "loss": 3.558, "step": 23480 }, { "epoch": 39.7, "learning_rate": 7.830666666666667e-05, "loss": 3.5682, "step": 23500 }, { "epoch": 39.73, "learning_rate": 7.837333333333334e-05, "loss": 3.5582, "step": 23520 }, { "epoch": 39.76, "learning_rate": 7.844e-05, "loss": 3.5874, "step": 23540 }, { "epoch": 39.8, "learning_rate": 7.850666666666668e-05, "loss": 3.5481, "step": 23560 }, { "epoch": 39.83, "learning_rate": 7.857333333333333e-05, "loss": 3.5782, "step": 23580 }, { "epoch": 39.86, "learning_rate": 7.864e-05, "loss": 3.561, "step": 23600 }, { "epoch": 39.9, "learning_rate": 7.870666666666667e-05, "loss": 3.5682, "step": 23620 }, { "epoch": 39.93, "learning_rate": 7.877333333333333e-05, "loss": 3.5647, "step": 23640 }, { "epoch": 39.97, "learning_rate": 7.884e-05, "loss": 3.5623, "step": 23660 }, { "epoch": 40.0, "learning_rate": 7.890666666666667e-05, "loss": 3.5679, "step": 23680 }, { "epoch": 40.03, "learning_rate": 7.897333333333334e-05, "loss": 3.4902, "step": 23700 }, { "epoch": 40.07, "learning_rate": 7.904e-05, "loss": 3.5255, "step": 23720 }, { "epoch": 40.1, "learning_rate": 7.910666666666668e-05, "loss": 3.5233, "step": 23740 }, { "epoch": 40.14, "learning_rate": 7.917333333333333e-05, "loss": 3.4923, "step": 23760 }, { "epoch": 40.17, "learning_rate": 7.924000000000001e-05, "loss": 3.5447, "step": 23780 }, { "epoch": 40.2, "learning_rate": 7.930666666666667e-05, "loss": 3.4885, "step": 23800 }, { "epoch": 40.24, "learning_rate": 7.937333333333333e-05, "loss": 3.517, "step": 23820 }, { "epoch": 40.27, "learning_rate": 7.944e-05, "loss": 3.5105, "step": 23840 }, { "epoch": 40.3, "learning_rate": 7.950666666666668e-05, "loss": 3.5247, "step": 23860 }, { "epoch": 40.34, "learning_rate": 7.957333333333334e-05, "loss": 3.5284, "step": 23880 }, { "epoch": 40.37, "learning_rate": 7.964e-05, "loss": 3.5258, "step": 23900 }, { "epoch": 40.41, "learning_rate": 7.970666666666668e-05, "loss": 3.5195, "step": 23920 }, { "epoch": 40.44, "learning_rate": 7.977333333333333e-05, "loss": 3.5072, "step": 23940 }, { "epoch": 40.47, "learning_rate": 7.984000000000001e-05, "loss": 3.5233, "step": 23960 }, { "epoch": 40.51, "learning_rate": 7.990666666666667e-05, "loss": 3.5202, "step": 23980 }, { "epoch": 40.54, "learning_rate": 7.997333333333334e-05, "loss": 3.5159, "step": 24000 }, { "epoch": 40.54, "eval_loss": 3.4419097900390625, "eval_runtime": 50.9496, "eval_samples_per_second": 19.411, "eval_steps_per_second": 0.118, "eval_tse_ndup": 0.007944092697009155, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.02406613373421399, "eval_tse_type": 4.2130097741826756e-05, "step": 24000 }, { "epoch": 40.57, "learning_rate": 8.004e-05, "loss": 3.5179, "step": 24020 }, { "epoch": 40.61, "learning_rate": 8.010666666666668e-05, "loss": 3.536, "step": 24040 }, { "epoch": 40.64, "learning_rate": 8.017333333333333e-05, "loss": 3.5399, "step": 24060 }, { "epoch": 40.68, "learning_rate": 8.024e-05, "loss": 3.5033, "step": 24080 }, { "epoch": 40.71, "learning_rate": 8.030666666666667e-05, "loss": 3.527, "step": 24100 }, { "epoch": 40.74, "learning_rate": 8.037333333333333e-05, "loss": 3.5204, "step": 24120 }, { "epoch": 40.78, "learning_rate": 8.044000000000001e-05, "loss": 3.5163, "step": 24140 }, { "epoch": 40.81, "learning_rate": 8.050666666666667e-05, "loss": 3.5316, "step": 24160 }, { "epoch": 40.84, "learning_rate": 8.057333333333334e-05, "loss": 3.5257, "step": 24180 }, { "epoch": 40.88, "learning_rate": 8.064e-05, "loss": 3.5296, "step": 24200 }, { "epoch": 40.91, "learning_rate": 8.070666666666668e-05, "loss": 3.5129, "step": 24220 }, { "epoch": 40.95, "learning_rate": 8.077333333333333e-05, "loss": 3.5407, "step": 24240 }, { "epoch": 40.98, "learning_rate": 8.084e-05, "loss": 3.5082, "step": 24260 }, { "epoch": 41.01, "learning_rate": 8.090666666666667e-05, "loss": 3.4926, "step": 24280 }, { "epoch": 41.05, "learning_rate": 8.097333333333333e-05, "loss": 3.4739, "step": 24300 }, { "epoch": 41.08, "learning_rate": 8.104000000000001e-05, "loss": 3.4794, "step": 24320 }, { "epoch": 41.11, "learning_rate": 8.110666666666667e-05, "loss": 3.4631, "step": 24340 }, { "epoch": 41.15, "learning_rate": 8.117333333333334e-05, "loss": 3.4855, "step": 24360 }, { "epoch": 41.18, "learning_rate": 8.124e-05, "loss": 3.4837, "step": 24380 }, { "epoch": 41.22, "learning_rate": 8.130666666666668e-05, "loss": 3.4819, "step": 24400 }, { "epoch": 41.25, "learning_rate": 8.137333333333333e-05, "loss": 3.4535, "step": 24420 }, { "epoch": 41.28, "learning_rate": 8.144e-05, "loss": 3.4564, "step": 24440 }, { "epoch": 41.32, "learning_rate": 8.150666666666667e-05, "loss": 3.4928, "step": 24460 }, { "epoch": 41.35, "learning_rate": 8.157333333333333e-05, "loss": 3.4743, "step": 24480 }, { "epoch": 41.39, "learning_rate": 8.164000000000001e-05, "loss": 3.4782, "step": 24500 }, { "epoch": 41.42, "learning_rate": 8.170666666666667e-05, "loss": 3.4698, "step": 24520 }, { "epoch": 41.45, "learning_rate": 8.177333333333334e-05, "loss": 3.4868, "step": 24540 }, { "epoch": 41.49, "learning_rate": 8.184e-05, "loss": 3.4795, "step": 24560 }, { "epoch": 41.52, "learning_rate": 8.190666666666668e-05, "loss": 3.4912, "step": 24580 }, { "epoch": 41.55, "learning_rate": 8.197333333333333e-05, "loss": 3.4821, "step": 24600 }, { "epoch": 41.59, "learning_rate": 8.203666666666667e-05, "loss": 3.4764, "step": 24620 }, { "epoch": 41.62, "learning_rate": 8.210333333333333e-05, "loss": 3.4889, "step": 24640 }, { "epoch": 41.66, "learning_rate": 8.217000000000001e-05, "loss": 3.478, "step": 24660 }, { "epoch": 41.69, "learning_rate": 8.223666666666667e-05, "loss": 3.4772, "step": 24680 }, { "epoch": 41.72, "learning_rate": 8.230333333333334e-05, "loss": 3.4694, "step": 24700 }, { "epoch": 41.76, "learning_rate": 8.237e-05, "loss": 3.4743, "step": 24720 }, { "epoch": 41.79, "learning_rate": 8.243666666666668e-05, "loss": 3.4844, "step": 24740 }, { "epoch": 41.82, "learning_rate": 8.250333333333333e-05, "loss": 3.4657, "step": 24760 }, { "epoch": 41.86, "learning_rate": 8.257e-05, "loss": 3.4819, "step": 24780 }, { "epoch": 41.89, "learning_rate": 8.263666666666667e-05, "loss": 3.4704, "step": 24800 }, { "epoch": 41.93, "learning_rate": 8.270333333333333e-05, "loss": 3.4889, "step": 24820 }, { "epoch": 41.96, "learning_rate": 8.277000000000001e-05, "loss": 3.4977, "step": 24840 }, { "epoch": 41.99, "learning_rate": 8.283666666666667e-05, "loss": 3.4965, "step": 24860 }, { "epoch": 42.03, "learning_rate": 8.290333333333334e-05, "loss": 3.4475, "step": 24880 }, { "epoch": 42.06, "learning_rate": 8.297e-05, "loss": 3.4173, "step": 24900 }, { "epoch": 42.09, "learning_rate": 8.303666666666668e-05, "loss": 3.4348, "step": 24920 }, { "epoch": 42.13, "learning_rate": 8.310333333333333e-05, "loss": 3.4405, "step": 24940 }, { "epoch": 42.16, "learning_rate": 8.317e-05, "loss": 3.4271, "step": 24960 }, { "epoch": 42.2, "learning_rate": 8.323666666666667e-05, "loss": 3.4412, "step": 24980 }, { "epoch": 42.23, "learning_rate": 8.330333333333333e-05, "loss": 3.4345, "step": 25000 }, { "epoch": 42.23, "eval_loss": 3.3929953575134277, "eval_runtime": 47.2495, "eval_samples_per_second": 20.931, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.0033075099350616337, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.028185384195513738, "eval_tse_type": 6.626145455836754e-05, "step": 25000 }, { "epoch": 42.26, "learning_rate": 8.337000000000001e-05, "loss": 3.45, "step": 25020 }, { "epoch": 42.3, "learning_rate": 8.343666666666667e-05, "loss": 3.4294, "step": 25040 }, { "epoch": 42.33, "learning_rate": 8.350333333333334e-05, "loss": 3.4503, "step": 25060 }, { "epoch": 42.36, "learning_rate": 8.357e-05, "loss": 3.413, "step": 25080 }, { "epoch": 42.4, "learning_rate": 8.363666666666668e-05, "loss": 3.4294, "step": 25100 }, { "epoch": 42.43, "learning_rate": 8.370333333333333e-05, "loss": 3.4421, "step": 25120 }, { "epoch": 42.47, "learning_rate": 8.377e-05, "loss": 3.4619, "step": 25140 }, { "epoch": 42.5, "learning_rate": 8.383666666666667e-05, "loss": 3.4707, "step": 25160 }, { "epoch": 42.53, "learning_rate": 8.390333333333333e-05, "loss": 3.4346, "step": 25180 }, { "epoch": 42.57, "learning_rate": 8.397000000000001e-05, "loss": 3.4436, "step": 25200 }, { "epoch": 42.6, "learning_rate": 8.403666666666667e-05, "loss": 3.4434, "step": 25220 }, { "epoch": 42.64, "learning_rate": 8.410333333333334e-05, "loss": 3.4385, "step": 25240 }, { "epoch": 42.67, "learning_rate": 8.417e-05, "loss": 3.4598, "step": 25260 }, { "epoch": 42.7, "learning_rate": 8.423666666666668e-05, "loss": 3.4443, "step": 25280 }, { "epoch": 42.74, "learning_rate": 8.430333333333333e-05, "loss": 3.4465, "step": 25300 }, { "epoch": 42.77, "learning_rate": 8.437000000000001e-05, "loss": 3.4485, "step": 25320 }, { "epoch": 42.8, "learning_rate": 8.443666666666667e-05, "loss": 3.433, "step": 25340 }, { "epoch": 42.84, "learning_rate": 8.450333333333333e-05, "loss": 3.438, "step": 25360 }, { "epoch": 42.87, "learning_rate": 8.457e-05, "loss": 3.4344, "step": 25380 }, { "epoch": 42.91, "learning_rate": 8.463666666666668e-05, "loss": 3.4269, "step": 25400 }, { "epoch": 42.94, "learning_rate": 8.470333333333334e-05, "loss": 3.4329, "step": 25420 }, { "epoch": 42.97, "learning_rate": 8.477e-05, "loss": 3.4348, "step": 25440 }, { "epoch": 43.01, "learning_rate": 8.483666666666668e-05, "loss": 3.4257, "step": 25460 }, { "epoch": 43.04, "learning_rate": 8.490333333333333e-05, "loss": 3.3811, "step": 25480 }, { "epoch": 43.07, "learning_rate": 8.497000000000001e-05, "loss": 3.4155, "step": 25500 }, { "epoch": 43.11, "learning_rate": 8.503666666666667e-05, "loss": 3.4016, "step": 25520 }, { "epoch": 43.14, "learning_rate": 8.510333333333334e-05, "loss": 3.3912, "step": 25540 }, { "epoch": 43.18, "learning_rate": 8.517e-05, "loss": 3.3946, "step": 25560 }, { "epoch": 43.21, "learning_rate": 8.523666666666668e-05, "loss": 3.3945, "step": 25580 }, { "epoch": 43.24, "learning_rate": 8.530333333333334e-05, "loss": 3.4069, "step": 25600 }, { "epoch": 43.28, "learning_rate": 8.537e-05, "loss": 3.4032, "step": 25620 }, { "epoch": 43.31, "learning_rate": 8.543666666666668e-05, "loss": 3.3907, "step": 25640 }, { "epoch": 43.34, "learning_rate": 8.550333333333333e-05, "loss": 3.4155, "step": 25660 }, { "epoch": 43.38, "learning_rate": 8.557000000000001e-05, "loss": 3.4083, "step": 25680 }, { "epoch": 43.41, "learning_rate": 8.563666666666667e-05, "loss": 3.3871, "step": 25700 }, { "epoch": 43.45, "learning_rate": 8.570333333333334e-05, "loss": 3.4089, "step": 25720 }, { "epoch": 43.48, "learning_rate": 8.577e-05, "loss": 3.4084, "step": 25740 }, { "epoch": 43.51, "learning_rate": 8.583666666666666e-05, "loss": 3.3881, "step": 25760 }, { "epoch": 43.55, "learning_rate": 8.590333333333334e-05, "loss": 3.4156, "step": 25780 }, { "epoch": 43.58, "learning_rate": 8.597e-05, "loss": 3.4007, "step": 25800 }, { "epoch": 43.61, "learning_rate": 8.603666666666667e-05, "loss": 3.385, "step": 25820 }, { "epoch": 43.65, "learning_rate": 8.610333333333333e-05, "loss": 3.4057, "step": 25840 }, { "epoch": 43.68, "learning_rate": 8.617000000000001e-05, "loss": 3.3944, "step": 25860 }, { "epoch": 43.72, "learning_rate": 8.623666666666666e-05, "loss": 3.412, "step": 25880 }, { "epoch": 43.75, "learning_rate": 8.630333333333334e-05, "loss": 3.3974, "step": 25900 }, { "epoch": 43.78, "learning_rate": 8.637e-05, "loss": 3.4025, "step": 25920 }, { "epoch": 43.82, "learning_rate": 8.643666666666667e-05, "loss": 3.4, "step": 25940 }, { "epoch": 43.85, "learning_rate": 8.650333333333334e-05, "loss": 3.4062, "step": 25960 }, { "epoch": 43.89, "learning_rate": 8.657e-05, "loss": 3.3913, "step": 25980 }, { "epoch": 43.92, "learning_rate": 8.663666666666667e-05, "loss": 3.3936, "step": 26000 }, { "epoch": 43.92, "eval_loss": 3.3477540016174316, "eval_runtime": 47.4103, "eval_samples_per_second": 20.86, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.004606297891629218, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.02700516843596235, "eval_tse_type": 5.617346365576901e-05, "step": 26000 }, { "epoch": 43.95, "learning_rate": 8.670333333333333e-05, "loss": 3.399, "step": 26020 }, { "epoch": 43.99, "learning_rate": 8.677000000000001e-05, "loss": 3.4121, "step": 26040 }, { "epoch": 44.02, "learning_rate": 8.683666666666666e-05, "loss": 3.3813, "step": 26060 }, { "epoch": 44.05, "learning_rate": 8.690333333333334e-05, "loss": 3.3494, "step": 26080 }, { "epoch": 44.09, "learning_rate": 8.697e-05, "loss": 3.3565, "step": 26100 }, { "epoch": 44.12, "learning_rate": 8.703666666666667e-05, "loss": 3.3417, "step": 26120 }, { "epoch": 44.16, "learning_rate": 8.710333333333333e-05, "loss": 3.3536, "step": 26140 }, { "epoch": 44.19, "learning_rate": 8.717000000000001e-05, "loss": 3.3381, "step": 26160 }, { "epoch": 44.22, "learning_rate": 8.723666666666667e-05, "loss": 3.347, "step": 26180 }, { "epoch": 44.26, "learning_rate": 8.730333333333333e-05, "loss": 3.3678, "step": 26200 }, { "epoch": 44.29, "learning_rate": 8.737000000000001e-05, "loss": 3.361, "step": 26220 }, { "epoch": 44.32, "learning_rate": 8.743666666666666e-05, "loss": 3.3652, "step": 26240 }, { "epoch": 44.36, "learning_rate": 8.750333333333334e-05, "loss": 3.3427, "step": 26260 }, { "epoch": 44.39, "learning_rate": 8.757e-05, "loss": 3.3665, "step": 26280 }, { "epoch": 44.43, "learning_rate": 8.763666666666667e-05, "loss": 3.3604, "step": 26300 }, { "epoch": 44.46, "learning_rate": 8.770333333333333e-05, "loss": 3.3563, "step": 26320 }, { "epoch": 44.49, "learning_rate": 8.777000000000001e-05, "loss": 3.363, "step": 26340 }, { "epoch": 44.53, "learning_rate": 8.783666666666667e-05, "loss": 3.3858, "step": 26360 }, { "epoch": 44.56, "learning_rate": 8.790333333333334e-05, "loss": 3.3711, "step": 26380 }, { "epoch": 44.59, "learning_rate": 8.797000000000001e-05, "loss": 3.3703, "step": 26400 }, { "epoch": 44.63, "learning_rate": 8.803666666666666e-05, "loss": 3.3602, "step": 26420 }, { "epoch": 44.66, "learning_rate": 8.810333333333334e-05, "loss": 3.3624, "step": 26440 }, { "epoch": 44.7, "learning_rate": 8.817e-05, "loss": 3.3859, "step": 26460 }, { "epoch": 44.73, "learning_rate": 8.823666666666667e-05, "loss": 3.3696, "step": 26480 }, { "epoch": 44.76, "learning_rate": 8.830333333333333e-05, "loss": 3.3751, "step": 26500 }, { "epoch": 44.8, "learning_rate": 8.837000000000001e-05, "loss": 3.3658, "step": 26520 }, { "epoch": 44.83, "learning_rate": 8.843666666666667e-05, "loss": 3.3523, "step": 26540 }, { "epoch": 44.86, "learning_rate": 8.850333333333334e-05, "loss": 3.373, "step": 26560 }, { "epoch": 44.9, "learning_rate": 8.857000000000001e-05, "loss": 3.3698, "step": 26580 }, { "epoch": 44.93, "learning_rate": 8.863666666666666e-05, "loss": 3.3809, "step": 26600 }, { "epoch": 44.97, "learning_rate": 8.870333333333334e-05, "loss": 3.3766, "step": 26620 }, { "epoch": 45.0, "learning_rate": 8.877e-05, "loss": 3.3607, "step": 26640 }, { "epoch": 45.03, "learning_rate": 8.883333333333333e-05, "loss": 3.2998, "step": 26660 }, { "epoch": 45.07, "learning_rate": 8.89e-05, "loss": 3.3279, "step": 26680 }, { "epoch": 45.1, "learning_rate": 8.896666666666667e-05, "loss": 3.3109, "step": 26700 }, { "epoch": 45.14, "learning_rate": 8.903333333333333e-05, "loss": 3.3249, "step": 26720 }, { "epoch": 45.17, "learning_rate": 8.910000000000001e-05, "loss": 3.3232, "step": 26740 }, { "epoch": 45.2, "learning_rate": 8.916666666666667e-05, "loss": 3.3187, "step": 26760 }, { "epoch": 45.24, "learning_rate": 8.923333333333334e-05, "loss": 3.3111, "step": 26780 }, { "epoch": 45.27, "learning_rate": 8.93e-05, "loss": 3.3137, "step": 26800 }, { "epoch": 45.3, "learning_rate": 8.936666666666668e-05, "loss": 3.3132, "step": 26820 }, { "epoch": 45.34, "learning_rate": 8.943333333333333e-05, "loss": 3.3091, "step": 26840 }, { "epoch": 45.37, "learning_rate": 8.950000000000001e-05, "loss": 3.3241, "step": 26860 }, { "epoch": 45.41, "learning_rate": 8.956666666666667e-05, "loss": 3.3187, "step": 26880 }, { "epoch": 45.44, "learning_rate": 8.963333333333333e-05, "loss": 3.3425, "step": 26900 }, { "epoch": 45.47, "learning_rate": 8.970000000000001e-05, "loss": 3.3341, "step": 26920 }, { "epoch": 45.51, "learning_rate": 8.976666666666666e-05, "loss": 3.3377, "step": 26940 }, { "epoch": 45.54, "learning_rate": 8.983333333333334e-05, "loss": 3.327, "step": 26960 }, { "epoch": 45.57, "learning_rate": 8.99e-05, "loss": 3.3312, "step": 26980 }, { "epoch": 45.61, "learning_rate": 8.996666666666667e-05, "loss": 3.3293, "step": 27000 }, { "epoch": 45.61, "eval_loss": 3.294952869415283, "eval_runtime": 47.769, "eval_samples_per_second": 20.704, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.005338363761522809, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.029210528467358388, "eval_tse_type": 2.106504887091338e-05, "step": 27000 }, { "epoch": 45.64, "learning_rate": 9.003333333333333e-05, "loss": 3.3115, "step": 27020 }, { "epoch": 45.68, "learning_rate": 9.010000000000001e-05, "loss": 3.3484, "step": 27040 }, { "epoch": 45.71, "learning_rate": 9.016666666666667e-05, "loss": 3.3299, "step": 27060 }, { "epoch": 45.74, "learning_rate": 9.023333333333334e-05, "loss": 3.3394, "step": 27080 }, { "epoch": 45.78, "learning_rate": 9.030000000000001e-05, "loss": 3.3232, "step": 27100 }, { "epoch": 45.81, "learning_rate": 9.036666666666666e-05, "loss": 3.3264, "step": 27120 }, { "epoch": 45.84, "learning_rate": 9.043333333333334e-05, "loss": 3.3434, "step": 27140 }, { "epoch": 45.88, "learning_rate": 9.05e-05, "loss": 3.3458, "step": 27160 }, { "epoch": 45.91, "learning_rate": 9.056666666666667e-05, "loss": 3.3291, "step": 27180 }, { "epoch": 45.95, "learning_rate": 9.063333333333333e-05, "loss": 3.3573, "step": 27200 }, { "epoch": 45.98, "learning_rate": 9.070000000000001e-05, "loss": 3.3235, "step": 27220 }, { "epoch": 46.01, "learning_rate": 9.076666666666667e-05, "loss": 3.3058, "step": 27240 }, { "epoch": 46.05, "learning_rate": 9.083333333333334e-05, "loss": 3.2704, "step": 27260 }, { "epoch": 46.08, "learning_rate": 9.090000000000001e-05, "loss": 3.2543, "step": 27280 }, { "epoch": 46.11, "learning_rate": 9.096666666666666e-05, "loss": 3.2758, "step": 27300 }, { "epoch": 46.15, "learning_rate": 9.103333333333334e-05, "loss": 3.2722, "step": 27320 }, { "epoch": 46.18, "learning_rate": 9.11e-05, "loss": 3.2861, "step": 27340 }, { "epoch": 46.22, "learning_rate": 9.116666666666667e-05, "loss": 3.2896, "step": 27360 }, { "epoch": 46.25, "learning_rate": 9.123333333333333e-05, "loss": 3.2744, "step": 27380 }, { "epoch": 46.28, "learning_rate": 9.130000000000001e-05, "loss": 3.2888, "step": 27400 }, { "epoch": 46.32, "learning_rate": 9.136666666666666e-05, "loss": 3.2813, "step": 27420 }, { "epoch": 46.35, "learning_rate": 9.143333333333334e-05, "loss": 3.2856, "step": 27440 }, { "epoch": 46.39, "learning_rate": 9.15e-05, "loss": 3.2896, "step": 27460 }, { "epoch": 46.42, "learning_rate": 9.156666666666667e-05, "loss": 3.2784, "step": 27480 }, { "epoch": 46.45, "learning_rate": 9.163333333333334e-05, "loss": 3.297, "step": 27500 }, { "epoch": 46.49, "learning_rate": 9.17e-05, "loss": 3.2914, "step": 27520 }, { "epoch": 46.52, "learning_rate": 9.176666666666667e-05, "loss": 3.294, "step": 27540 }, { "epoch": 46.55, "learning_rate": 9.183333333333333e-05, "loss": 3.2912, "step": 27560 }, { "epoch": 46.59, "learning_rate": 9.190000000000001e-05, "loss": 3.3075, "step": 27580 }, { "epoch": 46.62, "learning_rate": 9.196666666666666e-05, "loss": 3.295, "step": 27600 }, { "epoch": 46.66, "learning_rate": 9.203333333333334e-05, "loss": 3.298, "step": 27620 }, { "epoch": 46.69, "learning_rate": 9.21e-05, "loss": 3.316, "step": 27640 }, { "epoch": 46.72, "learning_rate": 9.216666666666667e-05, "loss": 3.2933, "step": 27660 }, { "epoch": 46.76, "learning_rate": 9.223333333333334e-05, "loss": 3.3057, "step": 27680 }, { "epoch": 46.79, "learning_rate": 9.230000000000001e-05, "loss": 3.289, "step": 27700 }, { "epoch": 46.82, "learning_rate": 9.236666666666667e-05, "loss": 3.2987, "step": 27720 }, { "epoch": 46.86, "learning_rate": 9.243333333333333e-05, "loss": 3.2761, "step": 27740 }, { "epoch": 46.89, "learning_rate": 9.250000000000001e-05, "loss": 3.2988, "step": 27760 }, { "epoch": 46.93, "learning_rate": 9.256666666666666e-05, "loss": 3.2998, "step": 27780 }, { "epoch": 46.96, "learning_rate": 9.263333333333334e-05, "loss": 3.3402, "step": 27800 }, { "epoch": 46.99, "learning_rate": 9.27e-05, "loss": 3.301, "step": 27820 }, { "epoch": 47.03, "learning_rate": 9.276666666666667e-05, "loss": 3.2546, "step": 27840 }, { "epoch": 47.06, "learning_rate": 9.283333333333334e-05, "loss": 3.234, "step": 27860 }, { "epoch": 47.09, "learning_rate": 9.290000000000001e-05, "loss": 3.2488, "step": 27880 }, { "epoch": 47.13, "learning_rate": 9.296666666666667e-05, "loss": 3.2436, "step": 27900 }, { "epoch": 47.16, "learning_rate": 9.303333333333334e-05, "loss": 3.2488, "step": 27920 }, { "epoch": 47.2, "learning_rate": 9.310000000000001e-05, "loss": 3.2362, "step": 27940 }, { "epoch": 47.23, "learning_rate": 9.316666666666666e-05, "loss": 3.238, "step": 27960 }, { "epoch": 47.26, "learning_rate": 9.323333333333334e-05, "loss": 3.2507, "step": 27980 }, { "epoch": 47.3, "learning_rate": 9.33e-05, "loss": 3.2514, "step": 28000 }, { "epoch": 47.3, "eval_loss": 3.2621803283691406, "eval_runtime": 48.0971, "eval_samples_per_second": 20.563, "eval_steps_per_second": 0.125, "eval_tse_ndup": 0.005875090625116292, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.025641305248088424, "eval_tse_type": 8.60156162228963e-05, "step": 28000 }, { "epoch": 47.33, "learning_rate": 9.336666666666667e-05, "loss": 3.2557, "step": 28020 }, { "epoch": 47.36, "learning_rate": 9.343333333333335e-05, "loss": 3.2641, "step": 28040 }, { "epoch": 47.4, "learning_rate": 9.350000000000001e-05, "loss": 3.2384, "step": 28060 }, { "epoch": 47.43, "learning_rate": 9.356666666666667e-05, "loss": 3.2572, "step": 28080 }, { "epoch": 47.47, "learning_rate": 9.363333333333334e-05, "loss": 3.253, "step": 28100 }, { "epoch": 47.5, "learning_rate": 9.370000000000001e-05, "loss": 3.2743, "step": 28120 }, { "epoch": 47.53, "learning_rate": 9.376666666666666e-05, "loss": 3.2793, "step": 28140 }, { "epoch": 47.57, "learning_rate": 9.383333333333334e-05, "loss": 3.2907, "step": 28160 }, { "epoch": 47.6, "learning_rate": 9.39e-05, "loss": 3.2625, "step": 28180 }, { "epoch": 47.64, "learning_rate": 9.396666666666667e-05, "loss": 3.2671, "step": 28200 }, { "epoch": 47.67, "learning_rate": 9.403333333333335e-05, "loss": 3.2486, "step": 28220 }, { "epoch": 47.7, "learning_rate": 9.41e-05, "loss": 3.2518, "step": 28240 }, { "epoch": 47.74, "learning_rate": 9.416666666666667e-05, "loss": 3.2705, "step": 28260 }, { "epoch": 47.77, "learning_rate": 9.423333333333334e-05, "loss": 3.2757, "step": 28280 }, { "epoch": 47.8, "learning_rate": 9.43e-05, "loss": 3.2451, "step": 28300 }, { "epoch": 47.84, "learning_rate": 9.436666666666667e-05, "loss": 3.2783, "step": 28320 }, { "epoch": 47.87, "learning_rate": 9.443333333333334e-05, "loss": 3.2583, "step": 28340 }, { "epoch": 47.91, "learning_rate": 9.449999999999999e-05, "loss": 3.2671, "step": 28360 }, { "epoch": 47.94, "learning_rate": 9.456666666666667e-05, "loss": 3.2556, "step": 28380 }, { "epoch": 47.97, "learning_rate": 9.463333333333333e-05, "loss": 3.2696, "step": 28400 }, { "epoch": 48.01, "learning_rate": 9.47e-05, "loss": 3.2638, "step": 28420 }, { "epoch": 48.04, "learning_rate": 9.476666666666668e-05, "loss": 3.1904, "step": 28440 }, { "epoch": 48.07, "learning_rate": 9.483333333333334e-05, "loss": 3.1949, "step": 28460 }, { "epoch": 48.11, "learning_rate": 9.49e-05, "loss": 3.2049, "step": 28480 }, { "epoch": 48.14, "learning_rate": 9.496666666666667e-05, "loss": 3.2076, "step": 28500 }, { "epoch": 48.18, "learning_rate": 9.503333333333334e-05, "loss": 3.2015, "step": 28520 }, { "epoch": 48.21, "learning_rate": 9.51e-05, "loss": 3.2139, "step": 28540 }, { "epoch": 48.24, "learning_rate": 9.516333333333334e-05, "loss": 3.2035, "step": 28560 }, { "epoch": 48.28, "learning_rate": 9.523000000000001e-05, "loss": 3.2229, "step": 28580 }, { "epoch": 48.31, "learning_rate": 9.529666666666667e-05, "loss": 3.2199, "step": 28600 }, { "epoch": 48.34, "learning_rate": 9.536333333333334e-05, "loss": 3.2205, "step": 28620 }, { "epoch": 48.38, "learning_rate": 9.543000000000001e-05, "loss": 3.215, "step": 28640 }, { "epoch": 48.41, "learning_rate": 9.549666666666666e-05, "loss": 3.2398, "step": 28660 }, { "epoch": 48.45, "learning_rate": 9.556333333333334e-05, "loss": 3.2235, "step": 28680 }, { "epoch": 48.48, "learning_rate": 9.563e-05, "loss": 3.2174, "step": 28700 }, { "epoch": 48.51, "learning_rate": 9.569666666666667e-05, "loss": 3.2132, "step": 28720 }, { "epoch": 48.55, "learning_rate": 9.576333333333333e-05, "loss": 3.2246, "step": 28740 }, { "epoch": 48.58, "learning_rate": 9.583000000000001e-05, "loss": 3.2202, "step": 28760 }, { "epoch": 48.61, "learning_rate": 9.589666666666667e-05, "loss": 3.2404, "step": 28780 }, { "epoch": 48.65, "learning_rate": 9.596333333333334e-05, "loss": 3.2408, "step": 28800 }, { "epoch": 48.68, "learning_rate": 9.603000000000001e-05, "loss": 3.2429, "step": 28820 }, { "epoch": 48.72, "learning_rate": 9.609666666666666e-05, "loss": 3.2487, "step": 28840 }, { "epoch": 48.75, "learning_rate": 9.616333333333334e-05, "loss": 3.2461, "step": 28860 }, { "epoch": 48.78, "learning_rate": 9.623e-05, "loss": 3.2251, "step": 28880 }, { "epoch": 48.82, "learning_rate": 9.629666666666667e-05, "loss": 3.2366, "step": 28900 }, { "epoch": 48.85, "learning_rate": 9.636333333333333e-05, "loss": 3.2259, "step": 28920 }, { "epoch": 48.89, "learning_rate": 9.643000000000001e-05, "loss": 3.2304, "step": 28940 }, { "epoch": 48.92, "learning_rate": 9.649666666666667e-05, "loss": 3.2434, "step": 28960 }, { "epoch": 48.95, "learning_rate": 9.656333333333334e-05, "loss": 3.2253, "step": 28980 }, { "epoch": 48.99, "learning_rate": 9.663000000000002e-05, "loss": 3.247, "step": 29000 }, { "epoch": 48.99, "eval_loss": 3.2123095989227295, "eval_runtime": 47.5207, "eval_samples_per_second": 20.812, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.005857564524854671, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.02986247211927427, "eval_tse_type": 4.213009774182676e-05, "step": 29000 }, { "epoch": 49.02, "learning_rate": 9.669666666666667e-05, "loss": 3.2048, "step": 29020 }, { "epoch": 49.05, "learning_rate": 9.676333333333334e-05, "loss": 3.1737, "step": 29040 }, { "epoch": 49.09, "learning_rate": 9.683e-05, "loss": 3.1742, "step": 29060 }, { "epoch": 49.12, "learning_rate": 9.689666666666667e-05, "loss": 3.1847, "step": 29080 }, { "epoch": 49.16, "learning_rate": 9.696333333333333e-05, "loss": 3.1615, "step": 29100 }, { "epoch": 49.19, "learning_rate": 9.703000000000001e-05, "loss": 3.1775, "step": 29120 }, { "epoch": 49.22, "learning_rate": 9.709666666666667e-05, "loss": 3.169, "step": 29140 }, { "epoch": 49.26, "learning_rate": 9.716333333333334e-05, "loss": 3.1913, "step": 29160 }, { "epoch": 49.29, "learning_rate": 9.723000000000002e-05, "loss": 3.1899, "step": 29180 }, { "epoch": 49.32, "learning_rate": 9.729666666666667e-05, "loss": 3.1753, "step": 29200 }, { "epoch": 49.36, "learning_rate": 9.736333333333334e-05, "loss": 3.1989, "step": 29220 }, { "epoch": 49.39, "learning_rate": 9.743000000000001e-05, "loss": 3.1976, "step": 29240 }, { "epoch": 49.43, "learning_rate": 9.749666666666667e-05, "loss": 3.1722, "step": 29260 }, { "epoch": 49.46, "learning_rate": 9.756333333333333e-05, "loss": 3.1894, "step": 29280 }, { "epoch": 49.49, "learning_rate": 9.763e-05, "loss": 3.1922, "step": 29300 }, { "epoch": 49.53, "learning_rate": 9.769666666666668e-05, "loss": 3.1944, "step": 29320 }, { "epoch": 49.56, "learning_rate": 9.776333333333334e-05, "loss": 3.1903, "step": 29340 }, { "epoch": 49.59, "learning_rate": 9.783e-05, "loss": 3.1874, "step": 29360 }, { "epoch": 49.63, "learning_rate": 9.789666666666667e-05, "loss": 3.1963, "step": 29380 }, { "epoch": 49.66, "learning_rate": 9.796333333333334e-05, "loss": 3.1988, "step": 29400 }, { "epoch": 49.7, "learning_rate": 9.803e-05, "loss": 3.2034, "step": 29420 }, { "epoch": 49.73, "learning_rate": 9.809666666666667e-05, "loss": 3.1992, "step": 29440 }, { "epoch": 49.76, "learning_rate": 9.816333333333334e-05, "loss": 3.1975, "step": 29460 }, { "epoch": 49.8, "learning_rate": 9.823e-05, "loss": 3.2082, "step": 29480 }, { "epoch": 49.83, "learning_rate": 9.829666666666666e-05, "loss": 3.1831, "step": 29500 }, { "epoch": 49.86, "learning_rate": 9.836333333333334e-05, "loss": 3.2088, "step": 29520 }, { "epoch": 49.9, "learning_rate": 9.843e-05, "loss": 3.2198, "step": 29540 }, { "epoch": 49.93, "learning_rate": 9.849666666666667e-05, "loss": 3.2106, "step": 29560 }, { "epoch": 49.97, "learning_rate": 9.856333333333335e-05, "loss": 3.2075, "step": 29580 }, { "epoch": 50.0, "learning_rate": 9.863e-05, "loss": 3.1942, "step": 29600 }, { "epoch": 50.03, "learning_rate": 9.869666666666667e-05, "loss": 3.1266, "step": 29620 }, { "epoch": 50.07, "learning_rate": 9.876333333333334e-05, "loss": 3.1327, "step": 29640 }, { "epoch": 50.1, "learning_rate": 9.883e-05, "loss": 3.1546, "step": 29660 }, { "epoch": 50.14, "learning_rate": 9.889666666666666e-05, "loss": 3.1446, "step": 29680 }, { "epoch": 50.17, "learning_rate": 9.896333333333334e-05, "loss": 3.1516, "step": 29700 }, { "epoch": 50.2, "learning_rate": 9.903e-05, "loss": 3.1341, "step": 29720 }, { "epoch": 50.24, "learning_rate": 9.909666666666667e-05, "loss": 3.1798, "step": 29740 }, { "epoch": 50.27, "learning_rate": 9.916333333333335e-05, "loss": 3.1588, "step": 29760 }, { "epoch": 50.3, "learning_rate": 9.923e-05, "loss": 3.1727, "step": 29780 }, { "epoch": 50.34, "learning_rate": 9.929666666666667e-05, "loss": 3.1465, "step": 29800 }, { "epoch": 50.37, "learning_rate": 9.936333333333334e-05, "loss": 3.1362, "step": 29820 }, { "epoch": 50.41, "learning_rate": 9.943e-05, "loss": 3.1789, "step": 29840 }, { "epoch": 50.44, "learning_rate": 9.949666666666667e-05, "loss": 3.15, "step": 29860 }, { "epoch": 50.47, "learning_rate": 9.956333333333334e-05, "loss": 3.1583, "step": 29880 }, { "epoch": 50.51, "learning_rate": 9.963e-05, "loss": 3.1661, "step": 29900 }, { "epoch": 50.54, "learning_rate": 9.969666666666667e-05, "loss": 3.1609, "step": 29920 }, { "epoch": 50.57, "learning_rate": 9.976333333333335e-05, "loss": 3.1678, "step": 29940 }, { "epoch": 50.61, "learning_rate": 9.983e-05, "loss": 3.1783, "step": 29960 }, { "epoch": 50.64, "learning_rate": 9.989666666666668e-05, "loss": 3.1553, "step": 29980 }, { "epoch": 50.68, "learning_rate": 9.996333333333334e-05, "loss": 3.1724, "step": 30000 }, { "epoch": 50.68, "eval_loss": 3.1809890270233154, "eval_runtime": 50.8799, "eval_samples_per_second": 19.438, "eval_steps_per_second": 0.118, "eval_tse_ndup": 0.007068895666270629, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.02828921872766767, "eval_tse_type": 6.119376272675393e-05, "step": 30000 }, { "epoch": 50.71, "learning_rate": 9.999999592123497e-05, "loss": 3.1727, "step": 30020 }, { "epoch": 50.74, "learning_rate": 9.999995765134629e-05, "loss": 3.1707, "step": 30040 }, { "epoch": 50.78, "learning_rate": 9.999987909739482e-05, "loss": 3.1674, "step": 30060 }, { "epoch": 50.81, "learning_rate": 9.999976025944386e-05, "loss": 3.1599, "step": 30080 }, { "epoch": 50.84, "learning_rate": 9.999960113758919e-05, "loss": 3.1661, "step": 30100 }, { "epoch": 50.88, "learning_rate": 9.999940173195897e-05, "loss": 3.1731, "step": 30120 }, { "epoch": 50.91, "learning_rate": 9.999916204271387e-05, "loss": 3.1721, "step": 30140 }, { "epoch": 50.95, "learning_rate": 9.9998882070047e-05, "loss": 3.1665, "step": 30160 }, { "epoch": 50.98, "learning_rate": 9.999856181418395e-05, "loss": 3.1563, "step": 30180 }, { "epoch": 51.01, "learning_rate": 9.999820127538271e-05, "loss": 3.1503, "step": 30200 }, { "epoch": 51.05, "learning_rate": 9.999780045393379e-05, "loss": 3.104, "step": 30220 }, { "epoch": 51.08, "learning_rate": 9.99973593501601e-05, "loss": 3.0959, "step": 30240 }, { "epoch": 51.11, "learning_rate": 9.999687796441705e-05, "loss": 3.1194, "step": 30260 }, { "epoch": 51.15, "learning_rate": 9.999635629709249e-05, "loss": 3.1215, "step": 30280 }, { "epoch": 51.18, "learning_rate": 9.99957943486067e-05, "loss": 3.1198, "step": 30300 }, { "epoch": 51.22, "learning_rate": 9.999519211941241e-05, "loss": 3.1091, "step": 30320 }, { "epoch": 51.25, "learning_rate": 9.999454960999488e-05, "loss": 3.0948, "step": 30340 }, { "epoch": 51.28, "learning_rate": 9.999386682087173e-05, "loss": 3.1113, "step": 30360 }, { "epoch": 51.32, "learning_rate": 9.999314375259307e-05, "loss": 3.1162, "step": 30380 }, { "epoch": 51.35, "learning_rate": 9.999238040574151e-05, "loss": 3.1212, "step": 30400 }, { "epoch": 51.39, "learning_rate": 9.999157678093199e-05, "loss": 3.1081, "step": 30420 }, { "epoch": 51.42, "learning_rate": 9.999073287881202e-05, "loss": 3.1162, "step": 30440 }, { "epoch": 51.45, "learning_rate": 9.998984870006152e-05, "loss": 3.1367, "step": 30460 }, { "epoch": 51.49, "learning_rate": 9.998892424539283e-05, "loss": 3.1441, "step": 30480 }, { "epoch": 51.52, "learning_rate": 9.99879595155508e-05, "loss": 3.1363, "step": 30500 }, { "epoch": 51.55, "learning_rate": 9.998695451131268e-05, "loss": 3.1426, "step": 30520 }, { "epoch": 51.59, "learning_rate": 9.998590923348818e-05, "loss": 3.1136, "step": 30540 }, { "epoch": 51.62, "learning_rate": 9.998482368291946e-05, "loss": 3.1525, "step": 30560 }, { "epoch": 51.66, "learning_rate": 9.998369786048113e-05, "loss": 3.127, "step": 30580 }, { "epoch": 51.69, "learning_rate": 9.998253176708026e-05, "loss": 3.1411, "step": 30600 }, { "epoch": 51.72, "learning_rate": 9.998132540365634e-05, "loss": 3.157, "step": 30620 }, { "epoch": 51.76, "learning_rate": 9.99800787711813e-05, "loss": 3.1286, "step": 30640 }, { "epoch": 51.79, "learning_rate": 9.997879187065955e-05, "loss": 3.1298, "step": 30660 }, { "epoch": 51.82, "learning_rate": 9.997746470312792e-05, "loss": 3.1167, "step": 30680 }, { "epoch": 51.86, "learning_rate": 9.997609726965566e-05, "loss": 3.1338, "step": 30700 }, { "epoch": 51.89, "learning_rate": 9.997468957134453e-05, "loss": 3.1421, "step": 30720 }, { "epoch": 51.93, "learning_rate": 9.997324160932864e-05, "loss": 3.1708, "step": 30740 }, { "epoch": 51.96, "learning_rate": 9.997175338477462e-05, "loss": 3.1372, "step": 30760 }, { "epoch": 51.99, "learning_rate": 9.997022489888151e-05, "loss": 3.1345, "step": 30780 }, { "epoch": 52.03, "learning_rate": 9.996865615288076e-05, "loss": 3.07, "step": 30800 }, { "epoch": 52.06, "learning_rate": 9.996704714803629e-05, "loss": 3.0429, "step": 30820 }, { "epoch": 52.09, "learning_rate": 9.996539788564444e-05, "loss": 3.0817, "step": 30840 }, { "epoch": 52.13, "learning_rate": 9.9963708367034e-05, "loss": 3.0894, "step": 30860 }, { "epoch": 52.16, "learning_rate": 9.996197859356618e-05, "loss": 3.0859, "step": 30880 }, { "epoch": 52.2, "learning_rate": 9.996020856663464e-05, "loss": 3.0826, "step": 30900 }, { "epoch": 52.23, "learning_rate": 9.995839828766543e-05, "loss": 3.0649, "step": 30920 }, { "epoch": 52.26, "learning_rate": 9.995654775811709e-05, "loss": 3.0831, "step": 30940 }, { "epoch": 52.3, "learning_rate": 9.995465697948054e-05, "loss": 3.089, "step": 30960 }, { "epoch": 52.33, "learning_rate": 9.995272595327916e-05, "loss": 3.1017, "step": 30980 }, { "epoch": 52.36, "learning_rate": 9.995075468106871e-05, "loss": 3.0917, "step": 31000 }, { "epoch": 52.36, "eval_loss": 3.1495461463928223, "eval_runtime": 47.9091, "eval_samples_per_second": 20.643, "eval_steps_per_second": 0.125, "eval_tse_ndup": 0.005384660563324352, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.025098678207101443, "eval_tse_type": 8.777103696213909e-06, "step": 31000 }, { "epoch": 52.4, "learning_rate": 9.994874316443746e-05, "loss": 3.1011, "step": 31020 }, { "epoch": 52.43, "learning_rate": 9.994669140500601e-05, "loss": 3.1018, "step": 31040 }, { "epoch": 52.47, "learning_rate": 9.994459940442744e-05, "loss": 3.1035, "step": 31060 }, { "epoch": 52.5, "learning_rate": 9.994246716438724e-05, "loss": 3.0992, "step": 31080 }, { "epoch": 52.53, "learning_rate": 9.994029468660333e-05, "loss": 3.1029, "step": 31100 }, { "epoch": 52.57, "learning_rate": 9.9938081972826e-05, "loss": 3.1093, "step": 31120 }, { "epoch": 52.6, "learning_rate": 9.993582902483802e-05, "loss": 3.117, "step": 31140 }, { "epoch": 52.64, "learning_rate": 9.993353584445455e-05, "loss": 3.1145, "step": 31160 }, { "epoch": 52.67, "learning_rate": 9.993120243352317e-05, "loss": 3.0942, "step": 31180 }, { "epoch": 52.7, "learning_rate": 9.992882879392383e-05, "loss": 3.0882, "step": 31200 }, { "epoch": 52.74, "learning_rate": 9.992641492756895e-05, "loss": 3.0925, "step": 31220 }, { "epoch": 52.77, "learning_rate": 9.992396083640336e-05, "loss": 3.0963, "step": 31240 }, { "epoch": 52.8, "learning_rate": 9.992146652240427e-05, "loss": 3.1115, "step": 31260 }, { "epoch": 52.84, "learning_rate": 9.991893198758127e-05, "loss": 3.0974, "step": 31280 }, { "epoch": 52.87, "learning_rate": 9.99163572339764e-05, "loss": 3.1019, "step": 31300 }, { "epoch": 52.91, "learning_rate": 9.991374226366414e-05, "loss": 3.1158, "step": 31320 }, { "epoch": 52.94, "learning_rate": 9.991108707875127e-05, "loss": 3.1046, "step": 31340 }, { "epoch": 52.97, "learning_rate": 9.990839168137705e-05, "loss": 3.0875, "step": 31360 }, { "epoch": 53.01, "learning_rate": 9.99056560737131e-05, "loss": 3.0856, "step": 31380 }, { "epoch": 53.04, "learning_rate": 9.990288025796345e-05, "loss": 3.0327, "step": 31400 }, { "epoch": 53.07, "learning_rate": 9.990006423636453e-05, "loss": 3.0263, "step": 31420 }, { "epoch": 53.11, "learning_rate": 9.989720801118518e-05, "loss": 3.0481, "step": 31440 }, { "epoch": 53.14, "learning_rate": 9.989431158472657e-05, "loss": 3.022, "step": 31460 }, { "epoch": 53.18, "learning_rate": 9.989137495932231e-05, "loss": 3.0719, "step": 31480 }, { "epoch": 53.21, "learning_rate": 9.98883981373384e-05, "loss": 3.0342, "step": 31500 }, { "epoch": 53.24, "learning_rate": 9.98853811211732e-05, "loss": 3.0417, "step": 31520 }, { "epoch": 53.28, "learning_rate": 9.988232391325747e-05, "loss": 3.051, "step": 31540 }, { "epoch": 53.31, "learning_rate": 9.987922651605435e-05, "loss": 3.047, "step": 31560 }, { "epoch": 53.34, "learning_rate": 9.987608893205935e-05, "loss": 3.0516, "step": 31580 }, { "epoch": 53.38, "learning_rate": 9.987291116380037e-05, "loss": 3.057, "step": 31600 }, { "epoch": 53.41, "learning_rate": 9.986969321383768e-05, "loss": 3.0616, "step": 31620 }, { "epoch": 53.45, "learning_rate": 9.986643508476392e-05, "loss": 3.0592, "step": 31640 }, { "epoch": 53.48, "learning_rate": 9.986313677920411e-05, "loss": 3.0893, "step": 31660 }, { "epoch": 53.51, "learning_rate": 9.985979829981562e-05, "loss": 3.0649, "step": 31680 }, { "epoch": 53.55, "learning_rate": 9.985641964928822e-05, "loss": 3.0782, "step": 31700 }, { "epoch": 53.58, "learning_rate": 9.985300083034403e-05, "loss": 3.0677, "step": 31720 }, { "epoch": 53.61, "learning_rate": 9.984954184573753e-05, "loss": 3.0715, "step": 31740 }, { "epoch": 53.65, "learning_rate": 9.984604269825555e-05, "loss": 3.0784, "step": 31760 }, { "epoch": 53.68, "learning_rate": 9.98425033907173e-05, "loss": 3.0698, "step": 31780 }, { "epoch": 53.72, "learning_rate": 9.983892392597433e-05, "loss": 3.0702, "step": 31800 }, { "epoch": 53.75, "learning_rate": 9.983530430691054e-05, "loss": 3.0813, "step": 31820 }, { "epoch": 53.78, "learning_rate": 9.983164453644222e-05, "loss": 3.0766, "step": 31840 }, { "epoch": 53.82, "learning_rate": 9.982794461751796e-05, "loss": 3.0617, "step": 31860 }, { "epoch": 53.85, "learning_rate": 9.982420455311872e-05, "loss": 3.0803, "step": 31880 }, { "epoch": 53.89, "learning_rate": 9.98204243462578e-05, "loss": 3.0647, "step": 31900 }, { "epoch": 53.92, "learning_rate": 9.981660399998086e-05, "loss": 3.0745, "step": 31920 }, { "epoch": 53.95, "learning_rate": 9.981274351736585e-05, "loss": 3.089, "step": 31940 }, { "epoch": 53.99, "learning_rate": 9.980884290152313e-05, "loss": 3.0893, "step": 31960 }, { "epoch": 54.02, "learning_rate": 9.980490215559535e-05, "loss": 3.0372, "step": 31980 }, { "epoch": 54.05, "learning_rate": 9.980092128275748e-05, "loss": 3.0138, "step": 32000 }, { "epoch": 54.05, "eval_loss": 3.1076266765594482, "eval_runtime": 47.4505, "eval_samples_per_second": 20.843, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.004153837387933892, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.029939971117016524, "eval_tse_type": 5.617346365576901e-05, "step": 32000 }, { "epoch": 54.09, "learning_rate": 9.979690028621684e-05, "loss": 3.0161, "step": 32020 }, { "epoch": 54.12, "learning_rate": 9.979283916921308e-05, "loss": 3.0088, "step": 32040 }, { "epoch": 54.16, "learning_rate": 9.978873793501817e-05, "loss": 3.0205, "step": 32060 }, { "epoch": 54.19, "learning_rate": 9.978459658693639e-05, "loss": 3.0207, "step": 32080 }, { "epoch": 54.22, "learning_rate": 9.978041512830438e-05, "loss": 3.0243, "step": 32100 }, { "epoch": 54.26, "learning_rate": 9.977619356249103e-05, "loss": 3.027, "step": 32120 }, { "epoch": 54.29, "learning_rate": 9.97719318928976e-05, "loss": 3.0175, "step": 32140 }, { "epoch": 54.32, "learning_rate": 9.976763012295762e-05, "loss": 3.027, "step": 32160 }, { "epoch": 54.36, "learning_rate": 9.976328825613696e-05, "loss": 3.0509, "step": 32180 }, { "epoch": 54.39, "learning_rate": 9.975890629593378e-05, "loss": 3.032, "step": 32200 }, { "epoch": 54.43, "learning_rate": 9.975448424587858e-05, "loss": 3.0245, "step": 32220 }, { "epoch": 54.46, "learning_rate": 9.975002210953408e-05, "loss": 3.0334, "step": 32240 }, { "epoch": 54.49, "learning_rate": 9.974551989049535e-05, "loss": 3.0441, "step": 32260 }, { "epoch": 54.53, "learning_rate": 9.974097759238976e-05, "loss": 3.0297, "step": 32280 }, { "epoch": 54.56, "learning_rate": 9.973639521887696e-05, "loss": 3.0405, "step": 32300 }, { "epoch": 54.59, "learning_rate": 9.973177277364889e-05, "loss": 3.0464, "step": 32320 }, { "epoch": 54.63, "learning_rate": 9.972711026042975e-05, "loss": 3.0385, "step": 32340 }, { "epoch": 54.66, "learning_rate": 9.972240768297605e-05, "loss": 3.0736, "step": 32360 }, { "epoch": 54.7, "learning_rate": 9.971766504507657e-05, "loss": 3.038, "step": 32380 }, { "epoch": 54.73, "learning_rate": 9.971288235055239e-05, "loss": 3.0331, "step": 32400 }, { "epoch": 54.76, "learning_rate": 9.97080596032568e-05, "loss": 3.0234, "step": 32420 }, { "epoch": 54.8, "learning_rate": 9.970319680707543e-05, "loss": 3.0328, "step": 32440 }, { "epoch": 54.83, "learning_rate": 9.969829396592614e-05, "loss": 3.0402, "step": 32460 }, { "epoch": 54.86, "learning_rate": 9.969335108375907e-05, "loss": 3.0281, "step": 32480 }, { "epoch": 54.9, "learning_rate": 9.968836816455659e-05, "loss": 3.0512, "step": 32500 }, { "epoch": 54.93, "learning_rate": 9.968334521233337e-05, "loss": 3.0608, "step": 32520 }, { "epoch": 54.97, "learning_rate": 9.967828223113629e-05, "loss": 3.047, "step": 32540 }, { "epoch": 55.0, "learning_rate": 9.967317922504452e-05, "loss": 3.0328, "step": 32560 }, { "epoch": 55.03, "learning_rate": 9.966803619816946e-05, "loss": 2.9831, "step": 32580 }, { "epoch": 55.07, "learning_rate": 9.966311325716134e-05, "loss": 2.9638, "step": 32600 }, { "epoch": 55.1, "learning_rate": 9.965789220170637e-05, "loss": 2.9943, "step": 32620 }, { "epoch": 55.14, "learning_rate": 9.965263113778456e-05, "loss": 2.9563, "step": 32640 }, { "epoch": 55.17, "learning_rate": 9.964733006963469e-05, "loss": 2.9923, "step": 32660 }, { "epoch": 55.2, "learning_rate": 9.964198900152773e-05, "loss": 3.0015, "step": 32680 }, { "epoch": 55.24, "learning_rate": 9.963660793776688e-05, "loss": 2.9952, "step": 32700 }, { "epoch": 55.27, "learning_rate": 9.963118688268754e-05, "loss": 3.0032, "step": 32720 }, { "epoch": 55.3, "learning_rate": 9.962572584065739e-05, "loss": 2.9975, "step": 32740 }, { "epoch": 55.34, "learning_rate": 9.962022481607626e-05, "loss": 2.9956, "step": 32760 }, { "epoch": 55.37, "learning_rate": 9.961468381337627e-05, "loss": 3.0038, "step": 32780 }, { "epoch": 55.41, "learning_rate": 9.960910283702167e-05, "loss": 2.9954, "step": 32800 }, { "epoch": 55.44, "learning_rate": 9.960348189150896e-05, "loss": 2.9743, "step": 32820 }, { "epoch": 55.47, "learning_rate": 9.959782098136683e-05, "loss": 3.0213, "step": 32840 }, { "epoch": 55.51, "learning_rate": 9.959212011115619e-05, "loss": 3.0029, "step": 32860 }, { "epoch": 55.54, "learning_rate": 9.958637928547012e-05, "loss": 2.9911, "step": 32880 }, { "epoch": 55.57, "learning_rate": 9.958059850893389e-05, "loss": 3.0187, "step": 32900 }, { "epoch": 55.61, "learning_rate": 9.957477778620497e-05, "loss": 3.0225, "step": 32920 }, { "epoch": 55.64, "learning_rate": 9.956891712197302e-05, "loss": 3.005, "step": 32940 }, { "epoch": 55.68, "learning_rate": 9.956301652095986e-05, "loss": 3.0013, "step": 32960 }, { "epoch": 55.71, "learning_rate": 9.955707598791952e-05, "loss": 3.0038, "step": 32980 }, { "epoch": 55.74, "learning_rate": 9.955109552763815e-05, "loss": 3.0344, "step": 33000 }, { "epoch": 55.74, "eval_loss": 3.0742015838623047, "eval_runtime": 47.6167, "eval_samples_per_second": 20.77, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.005540551425196166, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.023727688279164206, "eval_tse_type": 1.9309628131670596e-05, "step": 33000 }, { "epoch": 55.78, "learning_rate": 9.954507514493412e-05, "loss": 3.0023, "step": 33020 }, { "epoch": 55.81, "learning_rate": 9.953901484465794e-05, "loss": 3.0111, "step": 33040 }, { "epoch": 55.84, "learning_rate": 9.953291463169228e-05, "loss": 3.0387, "step": 33060 }, { "epoch": 55.88, "learning_rate": 9.952677451095196e-05, "loss": 3.0281, "step": 33080 }, { "epoch": 55.91, "learning_rate": 9.9520594487384e-05, "loss": 3.0099, "step": 33100 }, { "epoch": 55.95, "learning_rate": 9.95143745659675e-05, "loss": 3.032, "step": 33120 }, { "epoch": 55.98, "learning_rate": 9.950811475171376e-05, "loss": 3.0183, "step": 33140 }, { "epoch": 56.01, "learning_rate": 9.950181504966617e-05, "loss": 3.0038, "step": 33160 }, { "epoch": 56.05, "learning_rate": 9.949547546490032e-05, "loss": 2.9516, "step": 33180 }, { "epoch": 56.08, "learning_rate": 9.948909600252388e-05, "loss": 2.9413, "step": 33200 }, { "epoch": 56.11, "learning_rate": 9.948267666767665e-05, "loss": 2.9535, "step": 33220 }, { "epoch": 56.15, "learning_rate": 9.947621746553062e-05, "loss": 2.9572, "step": 33240 }, { "epoch": 56.18, "learning_rate": 9.946971840128981e-05, "loss": 2.9445, "step": 33260 }, { "epoch": 56.22, "learning_rate": 9.946317948019043e-05, "loss": 2.9524, "step": 33280 }, { "epoch": 56.25, "learning_rate": 9.945660070750074e-05, "loss": 2.9687, "step": 33300 }, { "epoch": 56.28, "learning_rate": 9.944998208852116e-05, "loss": 2.9653, "step": 33320 }, { "epoch": 56.32, "learning_rate": 9.944332362858418e-05, "loss": 2.9739, "step": 33340 }, { "epoch": 56.35, "learning_rate": 9.943662533305442e-05, "loss": 2.9763, "step": 33360 }, { "epoch": 56.39, "learning_rate": 9.942988720732856e-05, "loss": 2.982, "step": 33380 }, { "epoch": 56.42, "learning_rate": 9.942310925683538e-05, "loss": 2.9574, "step": 33400 }, { "epoch": 56.45, "learning_rate": 9.941629148703575e-05, "loss": 2.9716, "step": 33420 }, { "epoch": 56.49, "learning_rate": 9.940943390342264e-05, "loss": 2.9621, "step": 33440 }, { "epoch": 56.52, "learning_rate": 9.940253651152109e-05, "loss": 2.9938, "step": 33460 }, { "epoch": 56.55, "learning_rate": 9.939559931688818e-05, "loss": 2.9634, "step": 33480 }, { "epoch": 56.59, "learning_rate": 9.938862232511309e-05, "loss": 2.9957, "step": 33500 }, { "epoch": 56.62, "learning_rate": 9.938160554181706e-05, "loss": 2.9867, "step": 33520 }, { "epoch": 56.66, "learning_rate": 9.937454897265337e-05, "loss": 2.9972, "step": 33540 }, { "epoch": 56.69, "learning_rate": 9.93674526233074e-05, "loss": 2.981, "step": 33560 }, { "epoch": 56.72, "learning_rate": 9.936031649949654e-05, "loss": 2.9927, "step": 33580 }, { "epoch": 56.76, "learning_rate": 9.935314060697024e-05, "loss": 3.0049, "step": 33600 }, { "epoch": 56.79, "learning_rate": 9.934592495150995e-05, "loss": 2.9847, "step": 33620 }, { "epoch": 56.82, "learning_rate": 9.933866953892923e-05, "loss": 2.9842, "step": 33640 }, { "epoch": 56.86, "learning_rate": 9.933137437507363e-05, "loss": 3.0006, "step": 33660 }, { "epoch": 56.89, "learning_rate": 9.932403946582072e-05, "loss": 2.9997, "step": 33680 }, { "epoch": 56.93, "learning_rate": 9.931666481708013e-05, "loss": 2.9872, "step": 33700 }, { "epoch": 56.96, "learning_rate": 9.930925043479345e-05, "loss": 3.0034, "step": 33720 }, { "epoch": 56.99, "learning_rate": 9.930179632493433e-05, "loss": 2.9868, "step": 33740 }, { "epoch": 57.03, "learning_rate": 9.929430249350839e-05, "loss": 2.931, "step": 33760 }, { "epoch": 57.06, "learning_rate": 9.92867689465533e-05, "loss": 2.9095, "step": 33780 }, { "epoch": 57.09, "learning_rate": 9.927919569013872e-05, "loss": 2.8951, "step": 33800 }, { "epoch": 57.13, "learning_rate": 9.927158273036625e-05, "loss": 2.9222, "step": 33820 }, { "epoch": 57.16, "learning_rate": 9.926393007336952e-05, "loss": 2.9202, "step": 33840 }, { "epoch": 57.2, "learning_rate": 9.925623772531414e-05, "loss": 2.9256, "step": 33860 }, { "epoch": 57.23, "learning_rate": 9.92485056923977e-05, "loss": 2.9443, "step": 33880 }, { "epoch": 57.26, "learning_rate": 9.924073398084976e-05, "loss": 2.9215, "step": 33900 }, { "epoch": 57.3, "learning_rate": 9.923292259693185e-05, "loss": 2.9173, "step": 33920 }, { "epoch": 57.33, "learning_rate": 9.922507154693746e-05, "loss": 2.9559, "step": 33940 }, { "epoch": 57.36, "learning_rate": 9.921718083719203e-05, "loss": 2.9508, "step": 33960 }, { "epoch": 57.4, "learning_rate": 9.920925047405296e-05, "loss": 2.94, "step": 33980 }, { "epoch": 57.43, "learning_rate": 9.920128046390961e-05, "loss": 2.9482, "step": 34000 }, { "epoch": 57.43, "eval_loss": 3.0440194606781006, "eval_runtime": 50.7751, "eval_samples_per_second": 19.478, "eval_steps_per_second": 0.118, "eval_tse_ndup": 0.005973692780821982, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.025863406414690565, "eval_tse_type": 5.4418042916526235e-05, "step": 34000 }, { "epoch": 57.47, "learning_rate": 9.919327081318328e-05, "loss": 2.9438, "step": 34020 }, { "epoch": 57.5, "learning_rate": 9.91852215283272e-05, "loss": 2.9469, "step": 34040 }, { "epoch": 57.53, "learning_rate": 9.917713261582651e-05, "loss": 2.9494, "step": 34060 }, { "epoch": 57.57, "learning_rate": 9.916900408219834e-05, "loss": 2.9649, "step": 34080 }, { "epoch": 57.6, "learning_rate": 9.916083593399166e-05, "loss": 2.9598, "step": 34100 }, { "epoch": 57.64, "learning_rate": 9.915262817778743e-05, "loss": 2.9585, "step": 34120 }, { "epoch": 57.67, "learning_rate": 9.914438082019848e-05, "loss": 2.9675, "step": 34140 }, { "epoch": 57.7, "learning_rate": 9.913609386786955e-05, "loss": 2.96, "step": 34160 }, { "epoch": 57.74, "learning_rate": 9.912776732747729e-05, "loss": 2.9655, "step": 34180 }, { "epoch": 57.77, "learning_rate": 9.911940120573027e-05, "loss": 2.967, "step": 34200 }, { "epoch": 57.8, "learning_rate": 9.911099550936887e-05, "loss": 2.966, "step": 34220 }, { "epoch": 57.84, "learning_rate": 9.910255024516546e-05, "loss": 2.9928, "step": 34240 }, { "epoch": 57.87, "learning_rate": 9.909406541992421e-05, "loss": 2.9733, "step": 34260 }, { "epoch": 57.91, "learning_rate": 9.90855410404812e-05, "loss": 2.9482, "step": 34280 }, { "epoch": 57.94, "learning_rate": 9.907697711370437e-05, "loss": 2.9731, "step": 34300 }, { "epoch": 57.97, "learning_rate": 9.90683736464935e-05, "loss": 2.9786, "step": 34320 }, { "epoch": 58.01, "learning_rate": 9.905973064578029e-05, "loss": 2.9644, "step": 34340 }, { "epoch": 58.04, "learning_rate": 9.905104811852822e-05, "loss": 2.9021, "step": 34360 }, { "epoch": 58.07, "learning_rate": 9.904232607173262e-05, "loss": 2.9144, "step": 34380 }, { "epoch": 58.11, "learning_rate": 9.903356451242073e-05, "loss": 2.8858, "step": 34400 }, { "epoch": 58.14, "learning_rate": 9.902476344765157e-05, "loss": 2.8912, "step": 34420 }, { "epoch": 58.18, "learning_rate": 9.901592288451599e-05, "loss": 2.8995, "step": 34440 }, { "epoch": 58.21, "learning_rate": 9.900704283013668e-05, "loss": 2.9142, "step": 34460 }, { "epoch": 58.24, "learning_rate": 9.899812329166814e-05, "loss": 2.9125, "step": 34480 }, { "epoch": 58.28, "learning_rate": 9.898916427629665e-05, "loss": 2.9074, "step": 34500 }, { "epoch": 58.31, "learning_rate": 9.898016579124037e-05, "loss": 2.9143, "step": 34520 }, { "epoch": 58.34, "learning_rate": 9.89711278437492e-05, "loss": 2.9293, "step": 34540 }, { "epoch": 58.38, "learning_rate": 9.896205044110486e-05, "loss": 2.9196, "step": 34560 }, { "epoch": 58.41, "learning_rate": 9.895293359062084e-05, "loss": 2.9231, "step": 34580 }, { "epoch": 58.45, "learning_rate": 9.894377729964241e-05, "loss": 2.9108, "step": 34600 }, { "epoch": 58.48, "learning_rate": 9.893504229817371e-05, "loss": 2.9392, "step": 34620 }, { "epoch": 58.51, "learning_rate": 9.892580911947841e-05, "loss": 2.917, "step": 34640 }, { "epoch": 58.55, "learning_rate": 9.891653652214241e-05, "loss": 2.9132, "step": 34660 }, { "epoch": 58.58, "learning_rate": 9.890722451363648e-05, "loss": 2.9256, "step": 34680 }, { "epoch": 58.61, "learning_rate": 9.889787310146313e-05, "loss": 2.9259, "step": 34700 }, { "epoch": 58.65, "learning_rate": 9.88884822931566e-05, "loss": 2.9287, "step": 34720 }, { "epoch": 58.68, "learning_rate": 9.887905209628295e-05, "loss": 2.9423, "step": 34740 }, { "epoch": 58.72, "learning_rate": 9.886958251843985e-05, "loss": 2.9489, "step": 34760 }, { "epoch": 58.75, "learning_rate": 9.886007356725685e-05, "loss": 2.9277, "step": 34780 }, { "epoch": 58.78, "learning_rate": 9.885052525039505e-05, "loss": 2.9371, "step": 34800 }, { "epoch": 58.82, "learning_rate": 9.884093757554743e-05, "loss": 2.9509, "step": 34820 }, { "epoch": 58.85, "learning_rate": 9.883131055043857e-05, "loss": 2.9292, "step": 34840 }, { "epoch": 58.89, "learning_rate": 9.882164418282481e-05, "loss": 2.9503, "step": 34860 }, { "epoch": 58.92, "learning_rate": 9.881193848049415e-05, "loss": 2.932, "step": 34880 }, { "epoch": 58.95, "learning_rate": 9.880219345126628e-05, "loss": 2.951, "step": 34900 }, { "epoch": 58.99, "learning_rate": 9.879240910299265e-05, "loss": 2.9435, "step": 34920 }, { "epoch": 59.02, "learning_rate": 9.87825854435563e-05, "loss": 2.8869, "step": 34940 }, { "epoch": 59.05, "learning_rate": 9.877272248087197e-05, "loss": 2.8609, "step": 34960 }, { "epoch": 59.09, "learning_rate": 9.876282022288609e-05, "loss": 2.8696, "step": 34980 }, { "epoch": 59.12, "learning_rate": 9.875287867757671e-05, "loss": 2.8636, "step": 35000 }, { "epoch": 59.12, "eval_loss": 3.0145843029022217, "eval_runtime": 47.4119, "eval_samples_per_second": 20.86, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.006429794500629647, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.029731352765353727, "eval_tse_type": 1.5798786653185036e-05, "step": 35000 }, { "epoch": 59.16, "learning_rate": 9.874289785295356e-05, "loss": 2.8783, "step": 35020 }, { "epoch": 59.19, "learning_rate": 9.873287775705801e-05, "loss": 2.8789, "step": 35040 }, { "epoch": 59.22, "learning_rate": 9.87228183979631e-05, "loss": 2.8784, "step": 35060 }, { "epoch": 59.26, "learning_rate": 9.871271978377341e-05, "loss": 2.8911, "step": 35080 }, { "epoch": 59.29, "learning_rate": 9.870258192262526e-05, "loss": 2.887, "step": 35100 }, { "epoch": 59.32, "learning_rate": 9.869240482268653e-05, "loss": 2.8873, "step": 35120 }, { "epoch": 59.36, "learning_rate": 9.868218849215673e-05, "loss": 2.8856, "step": 35140 }, { "epoch": 59.39, "learning_rate": 9.867193293926695e-05, "loss": 2.9085, "step": 35160 }, { "epoch": 59.43, "learning_rate": 9.866163817227994e-05, "loss": 2.9092, "step": 35180 }, { "epoch": 59.46, "learning_rate": 9.865130419948998e-05, "loss": 2.9038, "step": 35200 }, { "epoch": 59.49, "learning_rate": 9.864093102922299e-05, "loss": 2.8948, "step": 35220 }, { "epoch": 59.53, "learning_rate": 9.863051866983642e-05, "loss": 2.901, "step": 35240 }, { "epoch": 59.56, "learning_rate": 9.862006712971932e-05, "loss": 2.9121, "step": 35260 }, { "epoch": 59.59, "learning_rate": 9.860957641729233e-05, "loss": 2.9218, "step": 35280 }, { "epoch": 59.63, "learning_rate": 9.85990465410076e-05, "loss": 2.9, "step": 35300 }, { "epoch": 59.66, "learning_rate": 9.85884775093489e-05, "loss": 2.8993, "step": 35320 }, { "epoch": 59.7, "learning_rate": 9.857786933083146e-05, "loss": 2.8981, "step": 35340 }, { "epoch": 59.73, "learning_rate": 9.856722201400215e-05, "loss": 2.9141, "step": 35360 }, { "epoch": 59.76, "learning_rate": 9.855653556743927e-05, "loss": 2.9163, "step": 35380 }, { "epoch": 59.8, "learning_rate": 9.854580999975271e-05, "loss": 2.9001, "step": 35400 }, { "epoch": 59.83, "learning_rate": 9.85350453195839e-05, "loss": 2.9094, "step": 35420 }, { "epoch": 59.86, "learning_rate": 9.85242415356057e-05, "loss": 2.9078, "step": 35440 }, { "epoch": 59.9, "learning_rate": 9.851339865652259e-05, "loss": 2.915, "step": 35460 }, { "epoch": 59.93, "learning_rate": 9.85025166910704e-05, "loss": 2.9259, "step": 35480 }, { "epoch": 59.97, "learning_rate": 9.849159564801659e-05, "loss": 2.9205, "step": 35500 }, { "epoch": 60.0, "learning_rate": 9.848063553616003e-05, "loss": 2.9246, "step": 35520 }, { "epoch": 60.03, "learning_rate": 9.846963636433106e-05, "loss": 2.8246, "step": 35540 }, { "epoch": 60.07, "learning_rate": 9.845859814139157e-05, "loss": 2.8608, "step": 35560 }, { "epoch": 60.1, "learning_rate": 9.84475208762348e-05, "loss": 2.8313, "step": 35580 }, { "epoch": 60.14, "learning_rate": 9.843640457778554e-05, "loss": 2.857, "step": 35600 }, { "epoch": 60.17, "learning_rate": 9.842524925499999e-05, "loss": 2.8624, "step": 35620 }, { "epoch": 60.2, "learning_rate": 9.841405491686576e-05, "loss": 2.8565, "step": 35640 }, { "epoch": 60.24, "learning_rate": 9.840282157240194e-05, "loss": 2.8457, "step": 35660 }, { "epoch": 60.27, "learning_rate": 9.839154923065908e-05, "loss": 2.8677, "step": 35680 }, { "epoch": 60.3, "learning_rate": 9.838023790071903e-05, "loss": 2.8682, "step": 35700 }, { "epoch": 60.34, "learning_rate": 9.836888759169516e-05, "loss": 2.8505, "step": 35720 }, { "epoch": 60.37, "learning_rate": 9.83574983127322e-05, "loss": 2.8816, "step": 35740 }, { "epoch": 60.41, "learning_rate": 9.834607007300629e-05, "loss": 2.8709, "step": 35760 }, { "epoch": 60.44, "learning_rate": 9.833460288172495e-05, "loss": 2.8759, "step": 35780 }, { "epoch": 60.47, "learning_rate": 9.832309674812712e-05, "loss": 2.8548, "step": 35800 }, { "epoch": 60.51, "learning_rate": 9.831155168148305e-05, "loss": 2.8642, "step": 35820 }, { "epoch": 60.54, "learning_rate": 9.82999676910944e-05, "loss": 2.886, "step": 35840 }, { "epoch": 60.57, "learning_rate": 9.828834478629418e-05, "loss": 2.8796, "step": 35860 }, { "epoch": 60.61, "learning_rate": 9.827668297644675e-05, "loss": 2.888, "step": 35880 }, { "epoch": 60.64, "learning_rate": 9.826498227094784e-05, "loss": 2.8884, "step": 35900 }, { "epoch": 60.68, "learning_rate": 9.825324267922449e-05, "loss": 2.9063, "step": 35920 }, { "epoch": 60.71, "learning_rate": 9.824146421073506e-05, "loss": 2.8783, "step": 35940 }, { "epoch": 60.74, "learning_rate": 9.822964687496926e-05, "loss": 2.8948, "step": 35960 }, { "epoch": 60.78, "learning_rate": 9.82177906814481e-05, "loss": 2.8993, "step": 35980 }, { "epoch": 60.81, "learning_rate": 9.820589563972392e-05, "loss": 2.8785, "step": 36000 }, { "epoch": 60.81, "eval_loss": 2.9853713512420654, "eval_runtime": 47.5856, "eval_samples_per_second": 20.784, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.006577343696938163, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.026311128506210715, "eval_tse_type": 7.208581726990321e-05, "step": 36000 }, { "epoch": 60.84, "learning_rate": 9.819396175938032e-05, "loss": 2.8898, "step": 36020 }, { "epoch": 60.88, "learning_rate": 9.818198905003222e-05, "loss": 2.8822, "step": 36040 }, { "epoch": 60.91, "learning_rate": 9.816997752132582e-05, "loss": 2.882, "step": 36060 }, { "epoch": 60.95, "learning_rate": 9.815792718293857e-05, "loss": 2.8989, "step": 36080 }, { "epoch": 60.98, "learning_rate": 9.814583804457924e-05, "loss": 2.9002, "step": 36100 }, { "epoch": 61.01, "learning_rate": 9.81337101159878e-05, "loss": 2.8762, "step": 36120 }, { "epoch": 61.05, "learning_rate": 9.812154340693553e-05, "loss": 2.8286, "step": 36140 }, { "epoch": 61.08, "learning_rate": 9.810933792722492e-05, "loss": 2.8041, "step": 36160 }, { "epoch": 61.11, "learning_rate": 9.809709368668969e-05, "loss": 2.8286, "step": 36180 }, { "epoch": 61.15, "learning_rate": 9.808481069519482e-05, "loss": 2.8265, "step": 36200 }, { "epoch": 61.18, "learning_rate": 9.807248896263647e-05, "loss": 2.8351, "step": 36220 }, { "epoch": 61.22, "learning_rate": 9.806012849894208e-05, "loss": 2.8225, "step": 36240 }, { "epoch": 61.25, "learning_rate": 9.804772931407023e-05, "loss": 2.8444, "step": 36260 }, { "epoch": 61.28, "learning_rate": 9.803529141801071e-05, "loss": 2.8221, "step": 36280 }, { "epoch": 61.32, "learning_rate": 9.802281482078453e-05, "loss": 2.8442, "step": 36300 }, { "epoch": 61.35, "learning_rate": 9.801029953244383e-05, "loss": 2.8469, "step": 36320 }, { "epoch": 61.39, "learning_rate": 9.799774556307195e-05, "loss": 2.8387, "step": 36340 }, { "epoch": 61.42, "learning_rate": 9.798515292278344e-05, "loss": 2.8459, "step": 36360 }, { "epoch": 61.45, "learning_rate": 9.797252162172393e-05, "loss": 2.8553, "step": 36380 }, { "epoch": 61.49, "learning_rate": 9.795985167007023e-05, "loss": 2.8497, "step": 36400 }, { "epoch": 61.52, "learning_rate": 9.79471430780303e-05, "loss": 2.8559, "step": 36420 }, { "epoch": 61.55, "learning_rate": 9.793439585584324e-05, "loss": 2.8482, "step": 36440 }, { "epoch": 61.59, "learning_rate": 9.792161001377921e-05, "loss": 2.8522, "step": 36460 }, { "epoch": 61.62, "learning_rate": 9.790878556213957e-05, "loss": 2.86, "step": 36480 }, { "epoch": 61.66, "learning_rate": 9.789592251125674e-05, "loss": 2.874, "step": 36500 }, { "epoch": 61.69, "learning_rate": 9.788302087149426e-05, "loss": 2.8562, "step": 36520 }, { "epoch": 61.72, "learning_rate": 9.787008065324672e-05, "loss": 2.8655, "step": 36540 }, { "epoch": 61.76, "learning_rate": 9.785710186693983e-05, "loss": 2.8679, "step": 36560 }, { "epoch": 61.79, "learning_rate": 9.784408452303037e-05, "loss": 2.8656, "step": 36580 }, { "epoch": 61.82, "learning_rate": 9.78310286320062e-05, "loss": 2.8638, "step": 36600 }, { "epoch": 61.86, "learning_rate": 9.781793420438617e-05, "loss": 2.8619, "step": 36620 }, { "epoch": 61.89, "learning_rate": 9.780480125072026e-05, "loss": 2.874, "step": 36640 }, { "epoch": 61.93, "learning_rate": 9.779162978158944e-05, "loss": 2.872, "step": 36660 }, { "epoch": 61.96, "learning_rate": 9.777841980760571e-05, "loss": 2.8837, "step": 36680 }, { "epoch": 61.99, "learning_rate": 9.776517133941214e-05, "loss": 2.8674, "step": 36700 }, { "epoch": 62.03, "learning_rate": 9.775188438768276e-05, "loss": 2.8009, "step": 36720 }, { "epoch": 62.06, "learning_rate": 9.773855896312263e-05, "loss": 2.7796, "step": 36740 }, { "epoch": 62.09, "learning_rate": 9.77251950764678e-05, "loss": 2.8039, "step": 36760 }, { "epoch": 62.13, "learning_rate": 9.771179273848532e-05, "loss": 2.8124, "step": 36780 }, { "epoch": 62.16, "learning_rate": 9.769902491169436e-05, "loss": 2.8154, "step": 36800 }, { "epoch": 62.2, "learning_rate": 9.768554762470898e-05, "loss": 2.8202, "step": 36820 }, { "epoch": 62.23, "learning_rate": 9.767203191833918e-05, "loss": 2.8323, "step": 36840 }, { "epoch": 62.26, "learning_rate": 9.765847780347432e-05, "loss": 2.8114, "step": 36860 }, { "epoch": 62.3, "learning_rate": 9.764488529103471e-05, "loss": 2.8178, "step": 36880 }, { "epoch": 62.33, "learning_rate": 9.76312543919716e-05, "loss": 2.8239, "step": 36900 }, { "epoch": 62.36, "learning_rate": 9.761758511726715e-05, "loss": 2.8225, "step": 36920 }, { "epoch": 62.4, "learning_rate": 9.760387747793445e-05, "loss": 2.8288, "step": 36940 }, { "epoch": 62.43, "learning_rate": 9.759013148501747e-05, "loss": 2.8147, "step": 36960 }, { "epoch": 62.47, "learning_rate": 9.757634714959117e-05, "loss": 2.8262, "step": 36980 }, { "epoch": 62.5, "learning_rate": 9.756252448276127e-05, "loss": 2.8296, "step": 37000 }, { "epoch": 62.5, "eval_loss": 2.9655823707580566, "eval_runtime": 50.843, "eval_samples_per_second": 19.452, "eval_steps_per_second": 0.118, "eval_tse_ndup": 0.005690481399529718, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.030002900498550245, "eval_tse_type": 0.00014043365913942254, "step": 37000 }, { "epoch": 62.53, "learning_rate": 9.754866349566449e-05, "loss": 2.8358, "step": 37020 }, { "epoch": 62.57, "learning_rate": 9.753476419946837e-05, "loss": 2.8293, "step": 37040 }, { "epoch": 62.6, "learning_rate": 9.752082660537132e-05, "loss": 2.8435, "step": 37060 }, { "epoch": 62.64, "learning_rate": 9.750685072460259e-05, "loss": 2.8594, "step": 37080 }, { "epoch": 62.67, "learning_rate": 9.749283656842232e-05, "loss": 2.828, "step": 37100 }, { "epoch": 62.7, "learning_rate": 9.747878414812144e-05, "loss": 2.8415, "step": 37120 }, { "epoch": 62.74, "learning_rate": 9.746469347502174e-05, "loss": 2.8504, "step": 37140 }, { "epoch": 62.77, "learning_rate": 9.745056456047583e-05, "loss": 2.8391, "step": 37160 }, { "epoch": 62.8, "learning_rate": 9.74363974158671e-05, "loss": 2.8424, "step": 37180 }, { "epoch": 62.84, "learning_rate": 9.742219205260978e-05, "loss": 2.8376, "step": 37200 }, { "epoch": 62.87, "learning_rate": 9.74079484821489e-05, "loss": 2.8369, "step": 37220 }, { "epoch": 62.91, "learning_rate": 9.739366671596018e-05, "loss": 2.8418, "step": 37240 }, { "epoch": 62.94, "learning_rate": 9.737934676555024e-05, "loss": 2.8364, "step": 37260 }, { "epoch": 62.97, "learning_rate": 9.736498864245638e-05, "loss": 2.8364, "step": 37280 }, { "epoch": 63.01, "learning_rate": 9.735059235824669e-05, "loss": 2.8414, "step": 37300 }, { "epoch": 63.04, "learning_rate": 9.733615792451998e-05, "loss": 2.7791, "step": 37320 }, { "epoch": 63.07, "learning_rate": 9.732168535290583e-05, "loss": 2.7735, "step": 37340 }, { "epoch": 63.11, "learning_rate": 9.730717465506452e-05, "loss": 2.7741, "step": 37360 }, { "epoch": 63.14, "learning_rate": 9.729262584268707e-05, "loss": 2.772, "step": 37380 }, { "epoch": 63.18, "learning_rate": 9.727803892749518e-05, "loss": 2.799, "step": 37400 }, { "epoch": 63.21, "learning_rate": 9.726341392124127e-05, "loss": 2.7911, "step": 37420 }, { "epoch": 63.24, "learning_rate": 9.724875083570844e-05, "loss": 2.7756, "step": 37440 }, { "epoch": 63.28, "learning_rate": 9.723404968271049e-05, "loss": 2.7889, "step": 37460 }, { "epoch": 63.31, "learning_rate": 9.721931047409184e-05, "loss": 2.8044, "step": 37480 }, { "epoch": 63.34, "learning_rate": 9.720453322172764e-05, "loss": 2.811, "step": 37500 }, { "epoch": 63.38, "learning_rate": 9.718971793752363e-05, "loss": 2.7998, "step": 37520 }, { "epoch": 63.41, "learning_rate": 9.717486463341623e-05, "loss": 2.7907, "step": 37540 }, { "epoch": 63.45, "learning_rate": 9.715997332137248e-05, "loss": 2.8197, "step": 37560 }, { "epoch": 63.48, "learning_rate": 9.714504401339003e-05, "loss": 2.7852, "step": 37580 }, { "epoch": 63.51, "learning_rate": 9.713007672149716e-05, "loss": 2.7928, "step": 37600 }, { "epoch": 63.55, "learning_rate": 9.711507145775274e-05, "loss": 2.7938, "step": 37620 }, { "epoch": 63.58, "learning_rate": 9.710002823424626e-05, "loss": 2.8138, "step": 37640 }, { "epoch": 63.61, "learning_rate": 9.708494706309775e-05, "loss": 2.8227, "step": 37660 }, { "epoch": 63.65, "learning_rate": 9.706982795645784e-05, "loss": 2.8256, "step": 37680 }, { "epoch": 63.68, "learning_rate": 9.705467092650775e-05, "loss": 2.8179, "step": 37700 }, { "epoch": 63.72, "learning_rate": 9.70394759854592e-05, "loss": 2.8439, "step": 37720 }, { "epoch": 63.75, "learning_rate": 9.702424314555447e-05, "loss": 2.8262, "step": 37740 }, { "epoch": 63.78, "learning_rate": 9.700897241906642e-05, "loss": 2.8342, "step": 37760 }, { "epoch": 63.82, "learning_rate": 9.699366381829836e-05, "loss": 2.8327, "step": 37780 }, { "epoch": 63.85, "learning_rate": 9.697831735558417e-05, "loss": 2.8321, "step": 37800 }, { "epoch": 63.89, "learning_rate": 9.696293304328822e-05, "loss": 2.8342, "step": 37820 }, { "epoch": 63.92, "learning_rate": 9.694751089380536e-05, "loss": 2.8243, "step": 37840 }, { "epoch": 63.95, "learning_rate": 9.693205091956095e-05, "loss": 2.8131, "step": 37860 }, { "epoch": 63.99, "learning_rate": 9.691655313301082e-05, "loss": 2.8244, "step": 37880 }, { "epoch": 64.02, "learning_rate": 9.690101754664124e-05, "loss": 2.777, "step": 37900 }, { "epoch": 64.05, "learning_rate": 9.688544417296896e-05, "loss": 2.7272, "step": 37920 }, { "epoch": 64.09, "learning_rate": 9.686983302454116e-05, "loss": 2.7529, "step": 37940 }, { "epoch": 64.12, "learning_rate": 9.685418411393545e-05, "loss": 2.7637, "step": 37960 }, { "epoch": 64.16, "learning_rate": 9.68384974537599e-05, "loss": 2.7561, "step": 37980 }, { "epoch": 64.19, "learning_rate": 9.682277305665296e-05, "loss": 2.7802, "step": 38000 }, { "epoch": 64.19, "eval_loss": 2.940242052078247, "eval_runtime": 47.6927, "eval_samples_per_second": 20.737, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.004603012748291356, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.029666812137402826, "eval_tse_type": 3.686383552409842e-05, "step": 38000 }, { "epoch": 64.22, "learning_rate": 9.680701093528348e-05, "loss": 2.7769, "step": 38020 }, { "epoch": 64.26, "learning_rate": 9.679121110235072e-05, "loss": 2.7844, "step": 38040 }, { "epoch": 64.29, "learning_rate": 9.677537357058433e-05, "loss": 2.7848, "step": 38060 }, { "epoch": 64.32, "learning_rate": 9.675949835274434e-05, "loss": 2.7617, "step": 38080 }, { "epoch": 64.36, "learning_rate": 9.674358546162108e-05, "loss": 2.79, "step": 38100 }, { "epoch": 64.39, "learning_rate": 9.672763491003531e-05, "loss": 2.7904, "step": 38120 }, { "epoch": 64.43, "learning_rate": 9.67116467108381e-05, "loss": 2.7851, "step": 38140 }, { "epoch": 64.46, "learning_rate": 9.669562087691085e-05, "loss": 2.7901, "step": 38160 }, { "epoch": 64.49, "learning_rate": 9.667955742116528e-05, "loss": 2.7962, "step": 38180 }, { "epoch": 64.53, "learning_rate": 9.666345635654342e-05, "loss": 2.7811, "step": 38200 }, { "epoch": 64.56, "learning_rate": 9.664731769601763e-05, "loss": 2.795, "step": 38220 }, { "epoch": 64.59, "learning_rate": 9.663114145259053e-05, "loss": 2.7813, "step": 38240 }, { "epoch": 64.63, "learning_rate": 9.6614927639295e-05, "loss": 2.8024, "step": 38260 }, { "epoch": 64.66, "learning_rate": 9.659867626919425e-05, "loss": 2.7834, "step": 38280 }, { "epoch": 64.7, "learning_rate": 9.65823873553817e-05, "loss": 2.79, "step": 38300 }, { "epoch": 64.73, "learning_rate": 9.656606091098104e-05, "loss": 2.8086, "step": 38320 }, { "epoch": 64.76, "learning_rate": 9.65496969491462e-05, "loss": 2.7913, "step": 38340 }, { "epoch": 64.8, "learning_rate": 9.65332954830613e-05, "loss": 2.7653, "step": 38360 }, { "epoch": 64.83, "learning_rate": 9.651685652594072e-05, "loss": 2.7919, "step": 38380 }, { "epoch": 64.86, "learning_rate": 9.650038009102905e-05, "loss": 2.822, "step": 38400 }, { "epoch": 64.9, "learning_rate": 9.648386619160101e-05, "loss": 2.8155, "step": 38420 }, { "epoch": 64.93, "learning_rate": 9.64673148409616e-05, "loss": 2.8159, "step": 38440 }, { "epoch": 64.97, "learning_rate": 9.645072605244592e-05, "loss": 2.7967, "step": 38460 }, { "epoch": 65.0, "learning_rate": 9.643409983941925e-05, "loss": 2.8201, "step": 38480 }, { "epoch": 65.03, "learning_rate": 9.641743621527706e-05, "loss": 2.7267, "step": 38500 }, { "epoch": 65.07, "learning_rate": 9.640073519344489e-05, "loss": 2.7323, "step": 38520 }, { "epoch": 65.1, "learning_rate": 9.638399678737848e-05, "loss": 2.7501, "step": 38540 }, { "epoch": 65.14, "learning_rate": 9.636722101056366e-05, "loss": 2.7479, "step": 38560 }, { "epoch": 65.17, "learning_rate": 9.635040787651636e-05, "loss": 2.7484, "step": 38580 }, { "epoch": 65.2, "learning_rate": 9.633355739878262e-05, "loss": 2.7523, "step": 38600 }, { "epoch": 65.24, "learning_rate": 9.631666959093857e-05, "loss": 2.7429, "step": 38620 }, { "epoch": 65.27, "learning_rate": 9.62997444665904e-05, "loss": 2.7495, "step": 38640 }, { "epoch": 65.3, "learning_rate": 9.62827820393744e-05, "loss": 2.7659, "step": 38660 }, { "epoch": 65.34, "learning_rate": 9.626578232295689e-05, "loss": 2.7523, "step": 38680 }, { "epoch": 65.37, "learning_rate": 9.624874533103421e-05, "loss": 2.7508, "step": 38700 }, { "epoch": 65.41, "learning_rate": 9.623167107733275e-05, "loss": 2.754, "step": 38720 }, { "epoch": 65.44, "learning_rate": 9.621455957560898e-05, "loss": 2.7731, "step": 38740 }, { "epoch": 65.47, "learning_rate": 9.619741083964929e-05, "loss": 2.7851, "step": 38760 }, { "epoch": 65.51, "learning_rate": 9.618022488327009e-05, "loss": 2.7552, "step": 38780 }, { "epoch": 65.54, "learning_rate": 9.616300172031782e-05, "loss": 2.7665, "step": 38800 }, { "epoch": 65.57, "learning_rate": 9.614574136466888e-05, "loss": 2.7752, "step": 38820 }, { "epoch": 65.61, "learning_rate": 9.61284438302296e-05, "loss": 2.7793, "step": 38840 }, { "epoch": 65.64, "learning_rate": 9.611110913093633e-05, "loss": 2.7602, "step": 38860 }, { "epoch": 65.68, "learning_rate": 9.609460675538197e-05, "loss": 2.7783, "step": 38880 }, { "epoch": 65.71, "learning_rate": 9.607719962482106e-05, "loss": 2.7763, "step": 38900 }, { "epoch": 65.74, "learning_rate": 9.605975537069267e-05, "loss": 2.788, "step": 38920 }, { "epoch": 65.78, "learning_rate": 9.604227400705133e-05, "loss": 2.7826, "step": 38940 }, { "epoch": 65.81, "learning_rate": 9.602475554798141e-05, "loss": 2.7788, "step": 38960 }, { "epoch": 65.84, "learning_rate": 9.600720000759728e-05, "loss": 2.7906, "step": 38980 }, { "epoch": 65.88, "learning_rate": 9.598960740004308e-05, "loss": 2.783, "step": 39000 }, { "epoch": 65.88, "eval_loss": 2.9106831550598145, "eval_runtime": 47.4521, "eval_samples_per_second": 20.842, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.005352210245498537, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03049848284971481, "eval_tse_type": 3.686383552409842e-05, "step": 39000 }, { "epoch": 65.91, "learning_rate": 9.597197773949286e-05, "loss": 2.7798, "step": 39020 }, { "epoch": 65.95, "learning_rate": 9.595431104015055e-05, "loss": 2.7667, "step": 39040 }, { "epoch": 65.98, "learning_rate": 9.593660731624987e-05, "loss": 2.7856, "step": 39060 }, { "epoch": 66.01, "learning_rate": 9.591886658205438e-05, "loss": 2.7445, "step": 39080 }, { "epoch": 66.05, "learning_rate": 9.590108885185749e-05, "loss": 2.718, "step": 39100 }, { "epoch": 66.08, "learning_rate": 9.588327413998237e-05, "loss": 2.708, "step": 39120 }, { "epoch": 66.11, "learning_rate": 9.586542246078203e-05, "loss": 2.7218, "step": 39140 }, { "epoch": 66.15, "learning_rate": 9.584753382863924e-05, "loss": 2.7182, "step": 39160 }, { "epoch": 66.18, "learning_rate": 9.582960825796656e-05, "loss": 2.7243, "step": 39180 }, { "epoch": 66.22, "learning_rate": 9.581164576320629e-05, "loss": 2.7027, "step": 39200 }, { "epoch": 66.25, "learning_rate": 9.579364635883048e-05, "loss": 2.7287, "step": 39220 }, { "epoch": 66.28, "learning_rate": 9.577561005934093e-05, "loss": 2.7371, "step": 39240 }, { "epoch": 66.32, "learning_rate": 9.575753687926916e-05, "loss": 2.7484, "step": 39260 }, { "epoch": 66.35, "learning_rate": 9.573942683317641e-05, "loss": 2.7426, "step": 39280 }, { "epoch": 66.39, "learning_rate": 9.572127993565362e-05, "loss": 2.734, "step": 39300 }, { "epoch": 66.42, "learning_rate": 9.57030962013214e-05, "loss": 2.7459, "step": 39320 }, { "epoch": 66.45, "learning_rate": 9.568487564483008e-05, "loss": 2.7317, "step": 39340 }, { "epoch": 66.49, "learning_rate": 9.56666182808596e-05, "loss": 2.7504, "step": 39360 }, { "epoch": 66.52, "learning_rate": 9.564832412411964e-05, "loss": 2.7567, "step": 39380 }, { "epoch": 66.55, "learning_rate": 9.562999318934942e-05, "loss": 2.7614, "step": 39400 }, { "epoch": 66.59, "learning_rate": 9.561162549131788e-05, "loss": 2.7433, "step": 39420 }, { "epoch": 66.62, "learning_rate": 9.559322104482351e-05, "loss": 2.7592, "step": 39440 }, { "epoch": 66.66, "learning_rate": 9.557477986469445e-05, "loss": 2.7705, "step": 39460 }, { "epoch": 66.69, "learning_rate": 9.555630196578845e-05, "loss": 2.7589, "step": 39480 }, { "epoch": 66.72, "learning_rate": 9.553778736299279e-05, "loss": 2.7704, "step": 39500 }, { "epoch": 66.76, "learning_rate": 9.551923607122437e-05, "loss": 2.7775, "step": 39520 }, { "epoch": 66.79, "learning_rate": 9.550064810542962e-05, "loss": 2.7555, "step": 39540 }, { "epoch": 66.82, "learning_rate": 9.548202348058455e-05, "loss": 2.7396, "step": 39560 }, { "epoch": 66.86, "learning_rate": 9.546336221169464e-05, "loss": 2.7634, "step": 39580 }, { "epoch": 66.89, "learning_rate": 9.544466431379498e-05, "loss": 2.759, "step": 39600 }, { "epoch": 66.93, "learning_rate": 9.54259298019501e-05, "loss": 2.756, "step": 39620 }, { "epoch": 66.96, "learning_rate": 9.540715869125407e-05, "loss": 2.7706, "step": 39640 }, { "epoch": 66.99, "learning_rate": 9.538835099683044e-05, "loss": 2.7784, "step": 39660 }, { "epoch": 67.03, "learning_rate": 9.536950673383222e-05, "loss": 2.7018, "step": 39680 }, { "epoch": 67.06, "learning_rate": 9.53506259174419e-05, "loss": 2.6842, "step": 39700 }, { "epoch": 67.09, "learning_rate": 9.533170856287141e-05, "loss": 2.6832, "step": 39720 }, { "epoch": 67.13, "learning_rate": 9.531275468536211e-05, "loss": 2.7082, "step": 39740 }, { "epoch": 67.16, "learning_rate": 9.529376430018482e-05, "loss": 2.7008, "step": 39760 }, { "epoch": 67.2, "learning_rate": 9.527473742263973e-05, "loss": 2.7047, "step": 39780 }, { "epoch": 67.23, "learning_rate": 9.525567406805644e-05, "loss": 2.6971, "step": 39800 }, { "epoch": 67.26, "learning_rate": 9.523657425179399e-05, "loss": 2.7205, "step": 39820 }, { "epoch": 67.3, "learning_rate": 9.521743798924075e-05, "loss": 2.7176, "step": 39840 }, { "epoch": 67.33, "learning_rate": 9.519826529581442e-05, "loss": 2.7268, "step": 39860 }, { "epoch": 67.36, "learning_rate": 9.517905618696212e-05, "loss": 2.7313, "step": 39880 }, { "epoch": 67.4, "learning_rate": 9.51598106781603e-05, "loss": 2.7348, "step": 39900 }, { "epoch": 67.43, "learning_rate": 9.51405287849147e-05, "loss": 2.7161, "step": 39920 }, { "epoch": 67.47, "learning_rate": 9.512121052276037e-05, "loss": 2.7369, "step": 39940 }, { "epoch": 67.5, "learning_rate": 9.510185590726173e-05, "loss": 2.728, "step": 39960 }, { "epoch": 67.53, "learning_rate": 9.508246495401242e-05, "loss": 2.711, "step": 39980 }, { "epoch": 67.57, "learning_rate": 9.506303767863538e-05, "loss": 2.7312, "step": 40000 }, { "epoch": 67.57, "eval_loss": 2.898770809173584, "eval_runtime": 49.5003, "eval_samples_per_second": 19.98, "eval_steps_per_second": 0.121, "eval_tse_ndup": 0.006783209660764984, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.02881729831959224, "eval_tse_type": 6.495056735198292e-05, "step": 40000 }, { "epoch": 67.6, "learning_rate": 9.504357409678281e-05, "loss": 2.7354, "step": 40020 }, { "epoch": 67.64, "learning_rate": 9.50240742241362e-05, "loss": 2.7403, "step": 40040 }, { "epoch": 67.67, "learning_rate": 9.50045380764062e-05, "loss": 2.7457, "step": 40060 }, { "epoch": 67.7, "learning_rate": 9.498496566933274e-05, "loss": 2.7379, "step": 40080 }, { "epoch": 67.74, "learning_rate": 9.4965357018685e-05, "loss": 2.7347, "step": 40100 }, { "epoch": 67.77, "learning_rate": 9.494571214026126e-05, "loss": 2.7433, "step": 40120 }, { "epoch": 67.8, "learning_rate": 9.492603104988907e-05, "loss": 2.7424, "step": 40140 }, { "epoch": 67.84, "learning_rate": 9.490631376342513e-05, "loss": 2.736, "step": 40160 }, { "epoch": 67.87, "learning_rate": 9.48865602967553e-05, "loss": 2.7634, "step": 40180 }, { "epoch": 67.91, "learning_rate": 9.486677066579456e-05, "loss": 2.7654, "step": 40200 }, { "epoch": 67.94, "learning_rate": 9.484694488648711e-05, "loss": 2.7387, "step": 40220 }, { "epoch": 67.97, "learning_rate": 9.482708297480619e-05, "loss": 2.7374, "step": 40240 }, { "epoch": 68.01, "learning_rate": 9.480718494675419e-05, "loss": 2.7223, "step": 40260 }, { "epoch": 68.04, "learning_rate": 9.478725081836259e-05, "loss": 2.669, "step": 40280 }, { "epoch": 68.07, "learning_rate": 9.476728060569197e-05, "loss": 2.6612, "step": 40300 }, { "epoch": 68.11, "learning_rate": 9.474727432483197e-05, "loss": 2.676, "step": 40320 }, { "epoch": 68.14, "learning_rate": 9.472723199190125e-05, "loss": 2.6894, "step": 40340 }, { "epoch": 68.18, "learning_rate": 9.47071536230476e-05, "loss": 2.688, "step": 40360 }, { "epoch": 68.21, "learning_rate": 9.46870392344478e-05, "loss": 2.6987, "step": 40380 }, { "epoch": 68.24, "learning_rate": 9.466688884230761e-05, "loss": 2.6964, "step": 40400 }, { "epoch": 68.28, "learning_rate": 9.464670246286187e-05, "loss": 2.6938, "step": 40420 }, { "epoch": 68.31, "learning_rate": 9.462648011237439e-05, "loss": 2.7083, "step": 40440 }, { "epoch": 68.34, "learning_rate": 9.460622180713789e-05, "loss": 2.7013, "step": 40460 }, { "epoch": 68.38, "learning_rate": 9.458592756347419e-05, "loss": 2.7006, "step": 40480 }, { "epoch": 68.41, "learning_rate": 9.456559739773398e-05, "loss": 2.7108, "step": 40500 }, { "epoch": 68.45, "learning_rate": 9.454523132629689e-05, "loss": 2.6886, "step": 40520 }, { "epoch": 68.48, "learning_rate": 9.45248293655715e-05, "loss": 2.708, "step": 40540 }, { "epoch": 68.51, "learning_rate": 9.450439153199532e-05, "loss": 2.719, "step": 40560 }, { "epoch": 68.55, "learning_rate": 9.448391784203473e-05, "loss": 2.7169, "step": 40580 }, { "epoch": 68.58, "learning_rate": 9.446340831218499e-05, "loss": 2.7159, "step": 40600 }, { "epoch": 68.61, "learning_rate": 9.444286295897028e-05, "loss": 2.7166, "step": 40620 }, { "epoch": 68.65, "learning_rate": 9.442228179894362e-05, "loss": 2.7184, "step": 40640 }, { "epoch": 68.68, "learning_rate": 9.44016648486869e-05, "loss": 2.7184, "step": 40660 }, { "epoch": 68.72, "learning_rate": 9.438101212481076e-05, "loss": 2.7079, "step": 40680 }, { "epoch": 68.75, "learning_rate": 9.43603236439548e-05, "loss": 2.7232, "step": 40700 }, { "epoch": 68.78, "learning_rate": 9.433959942278732e-05, "loss": 2.733, "step": 40720 }, { "epoch": 68.82, "learning_rate": 9.431883947800543e-05, "loss": 2.718, "step": 40740 }, { "epoch": 68.85, "learning_rate": 9.42980438263351e-05, "loss": 2.7328, "step": 40760 }, { "epoch": 68.89, "learning_rate": 9.427721248453097e-05, "loss": 2.7245, "step": 40780 }, { "epoch": 68.92, "learning_rate": 9.425634546937647e-05, "loss": 2.7316, "step": 40800 }, { "epoch": 68.95, "learning_rate": 9.42354427976838e-05, "loss": 2.7356, "step": 40820 }, { "epoch": 68.99, "learning_rate": 9.421450448629385e-05, "loss": 2.7379, "step": 40840 }, { "epoch": 69.02, "learning_rate": 9.419353055207626e-05, "loss": 2.6732, "step": 40860 }, { "epoch": 69.05, "learning_rate": 9.41735723343163e-05, "loss": 2.6443, "step": 40880 }, { "epoch": 69.09, "learning_rate": 9.41525289842147e-05, "loss": 2.6368, "step": 40900 }, { "epoch": 69.12, "learning_rate": 9.413145006121797e-05, "loss": 2.6626, "step": 40920 }, { "epoch": 69.16, "learning_rate": 9.411033558230904e-05, "loss": 2.6547, "step": 40940 }, { "epoch": 69.19, "learning_rate": 9.408918556449948e-05, "loss": 2.6757, "step": 40960 }, { "epoch": 69.22, "learning_rate": 9.406800002482944e-05, "loss": 2.6627, "step": 40980 }, { "epoch": 69.26, "learning_rate": 9.404677898036776e-05, "loss": 2.6766, "step": 41000 }, { "epoch": 69.26, "eval_loss": 2.8736915588378906, "eval_runtime": 47.2829, "eval_samples_per_second": 20.917, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.005726045950618904, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.031480681739473576, "eval_tse_type": 8.250477474441074e-05, "step": 41000 }, { "epoch": 69.29, "learning_rate": 9.402552244821182e-05, "loss": 2.6666, "step": 41020 }, { "epoch": 69.32, "learning_rate": 9.400423044548763e-05, "loss": 2.6806, "step": 41040 }, { "epoch": 69.36, "learning_rate": 9.398290298934979e-05, "loss": 2.6727, "step": 41060 }, { "epoch": 69.39, "learning_rate": 9.396154009698147e-05, "loss": 2.6937, "step": 41080 }, { "epoch": 69.43, "learning_rate": 9.394014178559429e-05, "loss": 2.6921, "step": 41100 }, { "epoch": 69.46, "learning_rate": 9.391870807242855e-05, "loss": 2.7106, "step": 41120 }, { "epoch": 69.49, "learning_rate": 9.389723897475298e-05, "loss": 2.6732, "step": 41140 }, { "epoch": 69.53, "learning_rate": 9.387573450986484e-05, "loss": 2.7063, "step": 41160 }, { "epoch": 69.56, "learning_rate": 9.385419469508991e-05, "loss": 2.7094, "step": 41180 }, { "epoch": 69.59, "learning_rate": 9.383261954778241e-05, "loss": 2.6941, "step": 41200 }, { "epoch": 69.63, "learning_rate": 9.381100908532505e-05, "loss": 2.6923, "step": 41220 }, { "epoch": 69.66, "learning_rate": 9.3789363325129e-05, "loss": 2.7028, "step": 41240 }, { "epoch": 69.7, "learning_rate": 9.376768228463385e-05, "loss": 2.7205, "step": 41260 }, { "epoch": 69.73, "learning_rate": 9.37459659813076e-05, "loss": 2.709, "step": 41280 }, { "epoch": 69.76, "learning_rate": 9.372421443264671e-05, "loss": 2.7197, "step": 41300 }, { "epoch": 69.8, "learning_rate": 9.370242765617603e-05, "loss": 2.715, "step": 41320 }, { "epoch": 69.83, "learning_rate": 9.368060566944874e-05, "loss": 2.7041, "step": 41340 }, { "epoch": 69.86, "learning_rate": 9.365874849004641e-05, "loss": 2.7144, "step": 41360 }, { "epoch": 69.9, "learning_rate": 9.363685613557901e-05, "loss": 2.7231, "step": 41380 }, { "epoch": 69.93, "learning_rate": 9.36149286236848e-05, "loss": 2.6967, "step": 41400 }, { "epoch": 69.97, "learning_rate": 9.359296597203037e-05, "loss": 2.714, "step": 41420 }, { "epoch": 70.0, "learning_rate": 9.357096819831064e-05, "loss": 2.7044, "step": 41440 }, { "epoch": 70.03, "learning_rate": 9.354893532024882e-05, "loss": 2.6324, "step": 41460 }, { "epoch": 70.07, "learning_rate": 9.35268673555964e-05, "loss": 2.6222, "step": 41480 }, { "epoch": 70.1, "learning_rate": 9.350476432213315e-05, "loss": 2.6242, "step": 41500 }, { "epoch": 70.14, "learning_rate": 9.348262623766705e-05, "loss": 2.657, "step": 41520 }, { "epoch": 70.17, "learning_rate": 9.346045312003442e-05, "loss": 2.6573, "step": 41540 }, { "epoch": 70.2, "learning_rate": 9.343824498709968e-05, "loss": 2.6512, "step": 41560 }, { "epoch": 70.24, "learning_rate": 9.341600185675554e-05, "loss": 2.6473, "step": 41580 }, { "epoch": 70.27, "learning_rate": 9.33937237469229e-05, "loss": 2.6493, "step": 41600 }, { "epoch": 70.3, "learning_rate": 9.337141067555081e-05, "loss": 2.6827, "step": 41620 }, { "epoch": 70.34, "learning_rate": 9.334906266061654e-05, "loss": 2.6617, "step": 41640 }, { "epoch": 70.37, "learning_rate": 9.332667972012543e-05, "loss": 2.669, "step": 41660 }, { "epoch": 70.41, "learning_rate": 9.330426187211107e-05, "loss": 2.6711, "step": 41680 }, { "epoch": 70.44, "learning_rate": 9.328180913463508e-05, "loss": 2.6882, "step": 41700 }, { "epoch": 70.47, "learning_rate": 9.325932152578725e-05, "loss": 2.6664, "step": 41720 }, { "epoch": 70.51, "learning_rate": 9.323679906368539e-05, "loss": 2.6635, "step": 41740 }, { "epoch": 70.54, "learning_rate": 9.321424176647551e-05, "loss": 2.685, "step": 41760 }, { "epoch": 70.57, "learning_rate": 9.319164965233156e-05, "loss": 2.6828, "step": 41780 }, { "epoch": 70.61, "learning_rate": 9.316902273945562e-05, "loss": 2.6887, "step": 41800 }, { "epoch": 70.64, "learning_rate": 9.314636104607779e-05, "loss": 2.7013, "step": 41820 }, { "epoch": 70.68, "learning_rate": 9.312366459045618e-05, "loss": 2.6802, "step": 41840 }, { "epoch": 70.71, "learning_rate": 9.31009333908769e-05, "loss": 2.6761, "step": 41860 }, { "epoch": 70.74, "learning_rate": 9.307816746565412e-05, "loss": 2.6879, "step": 41880 }, { "epoch": 70.78, "learning_rate": 9.305536683312988e-05, "loss": 2.6834, "step": 41900 }, { "epoch": 70.81, "learning_rate": 9.303253151167426e-05, "loss": 2.695, "step": 41920 }, { "epoch": 70.84, "learning_rate": 9.300966151968525e-05, "loss": 2.6889, "step": 41940 }, { "epoch": 70.88, "learning_rate": 9.298675687558881e-05, "loss": 2.68, "step": 41960 }, { "epoch": 70.91, "learning_rate": 9.296381759783878e-05, "loss": 2.6942, "step": 41980 }, { "epoch": 70.95, "learning_rate": 9.294084370491694e-05, "loss": 2.7006, "step": 42000 }, { "epoch": 70.95, "eval_loss": 2.8601796627044678, "eval_runtime": 47.4112, "eval_samples_per_second": 20.86, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.00567902143861135, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.033240720621940445, "eval_tse_type": 8.60156162228963e-05, "step": 42000 }, { "epoch": 70.98, "learning_rate": 9.291783521533294e-05, "loss": 2.7007, "step": 42020 }, { "epoch": 71.01, "learning_rate": 9.289479214762429e-05, "loss": 2.671, "step": 42040 }, { "epoch": 71.05, "learning_rate": 9.287171452035637e-05, "loss": 2.6049, "step": 42060 }, { "epoch": 71.08, "learning_rate": 9.284860235212242e-05, "loss": 2.6099, "step": 42080 }, { "epoch": 71.11, "learning_rate": 9.28254556615435e-05, "loss": 2.639, "step": 42100 }, { "epoch": 71.15, "learning_rate": 9.280227446726846e-05, "loss": 2.6199, "step": 42120 }, { "epoch": 71.18, "learning_rate": 9.2779058787974e-05, "loss": 2.6298, "step": 42140 }, { "epoch": 71.22, "learning_rate": 9.275580864236455e-05, "loss": 2.64, "step": 42160 }, { "epoch": 71.25, "learning_rate": 9.273252404917235e-05, "loss": 2.6336, "step": 42180 }, { "epoch": 71.28, "learning_rate": 9.270920502715736e-05, "loss": 2.6368, "step": 42200 }, { "epoch": 71.32, "learning_rate": 9.26858515951073e-05, "loss": 2.6412, "step": 42220 }, { "epoch": 71.35, "learning_rate": 9.26624637718376e-05, "loss": 2.6558, "step": 42240 }, { "epoch": 71.39, "learning_rate": 9.263904157619142e-05, "loss": 2.6413, "step": 42260 }, { "epoch": 71.42, "learning_rate": 9.26155850270396e-05, "loss": 2.6505, "step": 42280 }, { "epoch": 71.45, "learning_rate": 9.259209414328065e-05, "loss": 2.6567, "step": 42300 }, { "epoch": 71.49, "learning_rate": 9.256856894384076e-05, "loss": 2.6604, "step": 42320 }, { "epoch": 71.52, "learning_rate": 9.254500944767374e-05, "loss": 2.6572, "step": 42340 }, { "epoch": 71.55, "learning_rate": 9.252141567376107e-05, "loss": 2.6808, "step": 42360 }, { "epoch": 71.59, "learning_rate": 9.249778764111182e-05, "loss": 2.6744, "step": 42380 }, { "epoch": 71.62, "learning_rate": 9.247412536876268e-05, "loss": 2.6585, "step": 42400 }, { "epoch": 71.66, "learning_rate": 9.245042887577788e-05, "loss": 2.679, "step": 42420 }, { "epoch": 71.69, "learning_rate": 9.24266981812493e-05, "loss": 2.6544, "step": 42440 }, { "epoch": 71.72, "learning_rate": 9.240293330429633e-05, "loss": 2.6672, "step": 42460 }, { "epoch": 71.76, "learning_rate": 9.237913426406585e-05, "loss": 2.6821, "step": 42480 }, { "epoch": 71.79, "learning_rate": 9.235530107973237e-05, "loss": 2.6669, "step": 42500 }, { "epoch": 71.82, "learning_rate": 9.233143377049784e-05, "loss": 2.6678, "step": 42520 }, { "epoch": 71.86, "learning_rate": 9.23075323555917e-05, "loss": 2.6784, "step": 42540 }, { "epoch": 71.89, "learning_rate": 9.228359685427095e-05, "loss": 2.6748, "step": 42560 }, { "epoch": 71.93, "learning_rate": 9.225962728581991e-05, "loss": 2.6953, "step": 42580 }, { "epoch": 71.96, "learning_rate": 9.223562366955048e-05, "loss": 2.6793, "step": 42600 }, { "epoch": 71.99, "learning_rate": 9.221158602480193e-05, "loss": 2.6778, "step": 42620 }, { "epoch": 72.03, "learning_rate": 9.218751437094094e-05, "loss": 2.6099, "step": 42640 }, { "epoch": 72.06, "learning_rate": 9.216340872736163e-05, "loss": 2.5965, "step": 42660 }, { "epoch": 72.09, "learning_rate": 9.213926911348548e-05, "loss": 2.5877, "step": 42680 }, { "epoch": 72.13, "learning_rate": 9.21150955487613e-05, "loss": 2.6262, "step": 42700 }, { "epoch": 72.16, "learning_rate": 9.209088805266535e-05, "loss": 2.6174, "step": 42720 }, { "epoch": 72.2, "learning_rate": 9.206664664470115e-05, "loss": 2.6064, "step": 42740 }, { "epoch": 72.23, "learning_rate": 9.204237134439955e-05, "loss": 2.6175, "step": 42760 }, { "epoch": 72.26, "learning_rate": 9.201806217131874e-05, "loss": 2.6192, "step": 42780 }, { "epoch": 72.3, "learning_rate": 9.199371914504417e-05, "loss": 2.6337, "step": 42800 }, { "epoch": 72.33, "learning_rate": 9.196934228518858e-05, "loss": 2.6214, "step": 42820 }, { "epoch": 72.36, "learning_rate": 9.194493161139199e-05, "loss": 2.6292, "step": 42840 }, { "epoch": 72.4, "learning_rate": 9.192048714332161e-05, "loss": 2.6487, "step": 42860 }, { "epoch": 72.43, "learning_rate": 9.189600890067191e-05, "loss": 2.6624, "step": 42880 }, { "epoch": 72.47, "learning_rate": 9.187149690316457e-05, "loss": 2.6281, "step": 42900 }, { "epoch": 72.5, "learning_rate": 9.184695117054847e-05, "loss": 2.6411, "step": 42920 }, { "epoch": 72.53, "learning_rate": 9.182237172259964e-05, "loss": 2.6444, "step": 42940 }, { "epoch": 72.57, "learning_rate": 9.179775857912134e-05, "loss": 2.6573, "step": 42960 }, { "epoch": 72.6, "learning_rate": 9.17731117599439e-05, "loss": 2.6441, "step": 42980 }, { "epoch": 72.64, "learning_rate": 9.174843128492482e-05, "loss": 2.6471, "step": 43000 }, { "epoch": 72.64, "eval_loss": 2.8353114128112793, "eval_runtime": 47.3722, "eval_samples_per_second": 20.877, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.005513313223234023, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03286320973778243, "eval_tse_type": 2.808673182788451e-05, "step": 43000 }, { "epoch": 72.67, "learning_rate": 9.172371717394873e-05, "loss": 2.6493, "step": 43020 }, { "epoch": 72.7, "learning_rate": 9.169896944692733e-05, "loss": 2.6458, "step": 43040 }, { "epoch": 72.74, "learning_rate": 9.167418812379942e-05, "loss": 2.6771, "step": 43060 }, { "epoch": 72.77, "learning_rate": 9.165061476661911e-05, "loss": 2.65, "step": 43080 }, { "epoch": 72.8, "learning_rate": 9.162576798853497e-05, "loss": 2.6617, "step": 43100 }, { "epoch": 72.84, "learning_rate": 9.16008876733214e-05, "loss": 2.6509, "step": 43120 }, { "epoch": 72.87, "learning_rate": 9.157597384102406e-05, "loss": 2.6666, "step": 43140 }, { "epoch": 72.91, "learning_rate": 9.155102651171552e-05, "loss": 2.6678, "step": 43160 }, { "epoch": 72.94, "learning_rate": 9.152604570549544e-05, "loss": 2.663, "step": 43180 }, { "epoch": 72.97, "learning_rate": 9.150103144249037e-05, "loss": 2.6616, "step": 43200 }, { "epoch": 73.01, "learning_rate": 9.147598374285387e-05, "loss": 2.6446, "step": 43220 }, { "epoch": 73.04, "learning_rate": 9.145090262676642e-05, "loss": 2.5908, "step": 43240 }, { "epoch": 73.07, "learning_rate": 9.142578811443542e-05, "loss": 2.5871, "step": 43260 }, { "epoch": 73.11, "learning_rate": 9.140064022609517e-05, "loss": 2.5963, "step": 43280 }, { "epoch": 73.14, "learning_rate": 9.137545898200687e-05, "loss": 2.6026, "step": 43300 }, { "epoch": 73.18, "learning_rate": 9.135024440245861e-05, "loss": 2.5953, "step": 43320 }, { "epoch": 73.21, "learning_rate": 9.13249965077653e-05, "loss": 2.6091, "step": 43340 }, { "epoch": 73.24, "learning_rate": 9.129971531826872e-05, "loss": 2.5937, "step": 43360 }, { "epoch": 73.28, "learning_rate": 9.12744008543375e-05, "loss": 2.6179, "step": 43380 }, { "epoch": 73.31, "learning_rate": 9.124905313636698e-05, "loss": 2.6367, "step": 43400 }, { "epoch": 73.34, "learning_rate": 9.122367218477941e-05, "loss": 2.6228, "step": 43420 }, { "epoch": 73.38, "learning_rate": 9.119825802002375e-05, "loss": 2.6115, "step": 43440 }, { "epoch": 73.41, "learning_rate": 9.117281066257574e-05, "loss": 2.618, "step": 43460 }, { "epoch": 73.45, "learning_rate": 9.114733013293783e-05, "loss": 2.6302, "step": 43480 }, { "epoch": 73.48, "learning_rate": 9.112181645163926e-05, "loss": 2.6248, "step": 43500 }, { "epoch": 73.51, "learning_rate": 9.109626963923592e-05, "loss": 2.6102, "step": 43520 }, { "epoch": 73.55, "learning_rate": 9.107068971631043e-05, "loss": 2.635, "step": 43540 }, { "epoch": 73.58, "learning_rate": 9.104507670347204e-05, "loss": 2.6123, "step": 43560 }, { "epoch": 73.61, "learning_rate": 9.101943062135672e-05, "loss": 2.6225, "step": 43580 }, { "epoch": 73.65, "learning_rate": 9.099375149062705e-05, "loss": 2.6396, "step": 43600 }, { "epoch": 73.68, "learning_rate": 9.096803933197225e-05, "loss": 2.6564, "step": 43620 }, { "epoch": 73.72, "learning_rate": 9.094229416610811e-05, "loss": 2.6402, "step": 43640 }, { "epoch": 73.75, "learning_rate": 9.091651601377709e-05, "loss": 2.6332, "step": 43660 }, { "epoch": 73.78, "learning_rate": 9.089070489574814e-05, "loss": 2.6279, "step": 43680 }, { "epoch": 73.82, "learning_rate": 9.086486083281683e-05, "loss": 2.6469, "step": 43700 }, { "epoch": 73.85, "learning_rate": 9.083898384580527e-05, "loss": 2.6482, "step": 43720 }, { "epoch": 73.89, "learning_rate": 9.081307395556206e-05, "loss": 2.6354, "step": 43740 }, { "epoch": 73.92, "learning_rate": 9.078713118296234e-05, "loss": 2.6315, "step": 43760 }, { "epoch": 73.95, "learning_rate": 9.076115554890772e-05, "loss": 2.6416, "step": 43780 }, { "epoch": 73.99, "learning_rate": 9.073514707432631e-05, "loss": 2.6603, "step": 43800 }, { "epoch": 74.02, "learning_rate": 9.070910578017268e-05, "loss": 2.5905, "step": 43820 }, { "epoch": 74.05, "learning_rate": 9.06830316874278e-05, "loss": 2.5618, "step": 43840 }, { "epoch": 74.09, "learning_rate": 9.065692481709913e-05, "loss": 2.546, "step": 43860 }, { "epoch": 74.12, "learning_rate": 9.063078519022048e-05, "loss": 2.5966, "step": 43880 }, { "epoch": 74.16, "learning_rate": 9.060461282785209e-05, "loss": 2.5812, "step": 43900 }, { "epoch": 74.19, "learning_rate": 9.057840775108053e-05, "loss": 2.5973, "step": 43920 }, { "epoch": 74.22, "learning_rate": 9.055216998101879e-05, "loss": 2.5783, "step": 43940 }, { "epoch": 74.26, "learning_rate": 9.052589953880617e-05, "loss": 2.601, "step": 43960 }, { "epoch": 74.29, "learning_rate": 9.049959644560826e-05, "loss": 2.5945, "step": 43980 }, { "epoch": 74.32, "learning_rate": 9.047326072261701e-05, "loss": 2.5965, "step": 44000 }, { "epoch": 74.32, "eval_loss": 2.825916290283203, "eval_runtime": 47.2292, "eval_samples_per_second": 20.94, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.004614444174740817, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.032045822172236636, "eval_tse_type": 0.00011954650185393913, "step": 44000 }, { "epoch": 74.36, "learning_rate": 9.044689239105063e-05, "loss": 2.6169, "step": 44020 }, { "epoch": 74.39, "learning_rate": 9.042049147215362e-05, "loss": 2.6112, "step": 44040 }, { "epoch": 74.43, "learning_rate": 9.039405798719668e-05, "loss": 2.6076, "step": 44060 }, { "epoch": 74.46, "learning_rate": 9.036759195747686e-05, "loss": 2.6172, "step": 44080 }, { "epoch": 74.49, "learning_rate": 9.034109340431732e-05, "loss": 2.6157, "step": 44100 }, { "epoch": 74.53, "learning_rate": 9.031456234906746e-05, "loss": 2.6086, "step": 44120 }, { "epoch": 74.56, "learning_rate": 9.028799881310289e-05, "loss": 2.6188, "step": 44140 }, { "epoch": 74.59, "learning_rate": 9.026140281782536e-05, "loss": 2.6167, "step": 44160 }, { "epoch": 74.63, "learning_rate": 9.023477438466279e-05, "loss": 2.6197, "step": 44180 }, { "epoch": 74.66, "learning_rate": 9.020811353506923e-05, "loss": 2.5992, "step": 44200 }, { "epoch": 74.7, "learning_rate": 9.018142029052483e-05, "loss": 2.6265, "step": 44220 }, { "epoch": 74.73, "learning_rate": 9.015469467253588e-05, "loss": 2.6258, "step": 44240 }, { "epoch": 74.76, "learning_rate": 9.012793670263471e-05, "loss": 2.6088, "step": 44260 }, { "epoch": 74.8, "learning_rate": 9.010114640237972e-05, "loss": 2.6176, "step": 44280 }, { "epoch": 74.83, "learning_rate": 9.007432379335541e-05, "loss": 2.6264, "step": 44300 }, { "epoch": 74.86, "learning_rate": 9.004746889717224e-05, "loss": 2.6386, "step": 44320 }, { "epoch": 74.9, "learning_rate": 9.002058173546672e-05, "loss": 2.6379, "step": 44340 }, { "epoch": 74.93, "learning_rate": 8.999366232990136e-05, "loss": 2.6381, "step": 44360 }, { "epoch": 74.97, "learning_rate": 8.996671070216463e-05, "loss": 2.624, "step": 44380 }, { "epoch": 75.0, "learning_rate": 8.993972687397096e-05, "loss": 2.6408, "step": 44400 }, { "epoch": 75.03, "learning_rate": 8.991271086706076e-05, "loss": 2.5559, "step": 44420 }, { "epoch": 75.07, "learning_rate": 8.988566270320032e-05, "loss": 2.5509, "step": 44440 }, { "epoch": 75.1, "learning_rate": 8.985858240418187e-05, "loss": 2.5518, "step": 44460 }, { "epoch": 75.14, "learning_rate": 8.98314699918235e-05, "loss": 2.5798, "step": 44480 }, { "epoch": 75.17, "learning_rate": 8.98043254879692e-05, "loss": 2.5557, "step": 44500 }, { "epoch": 75.2, "learning_rate": 8.977714891448882e-05, "loss": 2.584, "step": 44520 }, { "epoch": 75.24, "learning_rate": 8.974994029327801e-05, "loss": 2.5792, "step": 44540 }, { "epoch": 75.27, "learning_rate": 8.972269964625828e-05, "loss": 2.5928, "step": 44560 }, { "epoch": 75.3, "learning_rate": 8.969542699537692e-05, "loss": 2.5888, "step": 44580 }, { "epoch": 75.34, "learning_rate": 8.966812236260701e-05, "loss": 2.5817, "step": 44600 }, { "epoch": 75.37, "learning_rate": 8.964078576994742e-05, "loss": 2.5884, "step": 44620 }, { "epoch": 75.41, "learning_rate": 8.961341723942271e-05, "loss": 2.5919, "step": 44640 }, { "epoch": 75.44, "learning_rate": 8.958601679308325e-05, "loss": 2.5928, "step": 44660 }, { "epoch": 75.47, "learning_rate": 8.955858445300506e-05, "loss": 2.5757, "step": 44680 }, { "epoch": 75.51, "learning_rate": 8.953112024128991e-05, "loss": 2.5889, "step": 44700 }, { "epoch": 75.54, "learning_rate": 8.950362418006518e-05, "loss": 2.6046, "step": 44720 }, { "epoch": 75.57, "learning_rate": 8.947609629148398e-05, "loss": 2.5789, "step": 44740 }, { "epoch": 75.61, "learning_rate": 8.944853659772501e-05, "loss": 2.6029, "step": 44760 }, { "epoch": 75.64, "learning_rate": 8.942094512099264e-05, "loss": 2.6039, "step": 44780 }, { "epoch": 75.68, "learning_rate": 8.93933218835168e-05, "loss": 2.6185, "step": 44800 }, { "epoch": 75.71, "learning_rate": 8.936566690755308e-05, "loss": 2.6077, "step": 44820 }, { "epoch": 75.74, "learning_rate": 8.933798021538254e-05, "loss": 2.6162, "step": 44840 }, { "epoch": 75.78, "learning_rate": 8.931026182931186e-05, "loss": 2.6126, "step": 44860 }, { "epoch": 75.81, "learning_rate": 8.928251177167326e-05, "loss": 2.6081, "step": 44880 }, { "epoch": 75.84, "learning_rate": 8.925473006482445e-05, "loss": 2.6011, "step": 44900 }, { "epoch": 75.88, "learning_rate": 8.922691673114866e-05, "loss": 2.6214, "step": 44920 }, { "epoch": 75.91, "learning_rate": 8.919907179305459e-05, "loss": 2.6142, "step": 44940 }, { "epoch": 75.95, "learning_rate": 8.91711952729764e-05, "loss": 2.613, "step": 44960 }, { "epoch": 75.98, "learning_rate": 8.914328719337365e-05, "loss": 2.6126, "step": 44980 }, { "epoch": 76.01, "learning_rate": 8.911534757673146e-05, "loss": 2.6026, "step": 45000 }, { "epoch": 76.01, "eval_loss": 2.805588483810425, "eval_runtime": 50.8806, "eval_samples_per_second": 19.438, "eval_steps_per_second": 0.118, "eval_tse_ndup": 0.006243438008057357, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.027783443514817426, "eval_tse_type": 0.0002299601168408044, "step": 45000 }, { "epoch": 76.05, "learning_rate": 8.908737644556024e-05, "loss": 2.5423, "step": 45020 }, { "epoch": 76.08, "learning_rate": 8.905937382239581e-05, "loss": 2.5462, "step": 45040 }, { "epoch": 76.11, "learning_rate": 8.903133972979938e-05, "loss": 2.5347, "step": 45060 }, { "epoch": 76.15, "learning_rate": 8.900327419035753e-05, "loss": 2.5776, "step": 45080 }, { "epoch": 76.18, "learning_rate": 8.897517722668215e-05, "loss": 2.5492, "step": 45100 }, { "epoch": 76.22, "learning_rate": 8.894704886141046e-05, "loss": 2.5387, "step": 45120 }, { "epoch": 76.25, "learning_rate": 8.891888911720496e-05, "loss": 2.5581, "step": 45140 }, { "epoch": 76.28, "learning_rate": 8.889069801675344e-05, "loss": 2.5547, "step": 45160 }, { "epoch": 76.32, "learning_rate": 8.886247558276901e-05, "loss": 2.5697, "step": 45180 }, { "epoch": 76.35, "learning_rate": 8.883422183798992e-05, "loss": 2.5709, "step": 45200 }, { "epoch": 76.39, "learning_rate": 8.880593680517975e-05, "loss": 2.5787, "step": 45220 }, { "epoch": 76.42, "learning_rate": 8.877762050712719e-05, "loss": 2.5808, "step": 45240 }, { "epoch": 76.45, "learning_rate": 8.874927296664621e-05, "loss": 2.5762, "step": 45260 }, { "epoch": 76.49, "learning_rate": 8.87208942065759e-05, "loss": 2.5741, "step": 45280 }, { "epoch": 76.52, "learning_rate": 8.869248424978049e-05, "loss": 2.5909, "step": 45300 }, { "epoch": 76.55, "learning_rate": 8.866404311914942e-05, "loss": 2.5921, "step": 45320 }, { "epoch": 76.59, "learning_rate": 8.863557083759714e-05, "loss": 2.5934, "step": 45340 }, { "epoch": 76.62, "learning_rate": 8.860706742806328e-05, "loss": 2.5914, "step": 45360 }, { "epoch": 76.66, "learning_rate": 8.857853291351254e-05, "loss": 2.5862, "step": 45380 }, { "epoch": 76.69, "learning_rate": 8.854996731693464e-05, "loss": 2.592, "step": 45400 }, { "epoch": 76.72, "learning_rate": 8.852280123141984e-05, "loss": 2.5957, "step": 45420 }, { "epoch": 76.76, "learning_rate": 8.849417509110805e-05, "loss": 2.6099, "step": 45440 }, { "epoch": 76.79, "learning_rate": 8.846551793673467e-05, "loss": 2.5944, "step": 45460 }, { "epoch": 76.82, "learning_rate": 8.843682979138825e-05, "loss": 2.6035, "step": 45480 }, { "epoch": 76.86, "learning_rate": 8.840811067818233e-05, "loss": 2.6028, "step": 45500 }, { "epoch": 76.89, "learning_rate": 8.837936062025538e-05, "loss": 2.5908, "step": 45520 }, { "epoch": 76.93, "learning_rate": 8.835057964077079e-05, "loss": 2.5991, "step": 45540 }, { "epoch": 76.96, "learning_rate": 8.832176776291688e-05, "loss": 2.6049, "step": 45560 }, { "epoch": 76.99, "learning_rate": 8.829292500990683e-05, "loss": 2.6029, "step": 45580 }, { "epoch": 77.03, "learning_rate": 8.826405140497878e-05, "loss": 2.5246, "step": 45600 }, { "epoch": 77.06, "learning_rate": 8.823514697139564e-05, "loss": 2.5371, "step": 45620 }, { "epoch": 77.09, "learning_rate": 8.820621173244519e-05, "loss": 2.532, "step": 45640 }, { "epoch": 77.13, "learning_rate": 8.817724571144004e-05, "loss": 2.5445, "step": 45660 }, { "epoch": 77.16, "learning_rate": 8.814824893171758e-05, "loss": 2.5406, "step": 45680 }, { "epoch": 77.2, "learning_rate": 8.811922141664e-05, "loss": 2.5547, "step": 45700 }, { "epoch": 77.23, "learning_rate": 8.809016318959424e-05, "loss": 2.5516, "step": 45720 }, { "epoch": 77.26, "learning_rate": 8.806107427399197e-05, "loss": 2.5474, "step": 45740 }, { "epoch": 77.3, "learning_rate": 8.803195469326964e-05, "loss": 2.5611, "step": 45760 }, { "epoch": 77.33, "learning_rate": 8.800280447088836e-05, "loss": 2.5457, "step": 45780 }, { "epoch": 77.36, "learning_rate": 8.797362363033392e-05, "loss": 2.5677, "step": 45800 }, { "epoch": 77.4, "learning_rate": 8.794441219511681e-05, "loss": 2.5555, "step": 45820 }, { "epoch": 77.43, "learning_rate": 8.791517018877216e-05, "loss": 2.5579, "step": 45840 }, { "epoch": 77.47, "learning_rate": 8.78858976348597e-05, "loss": 2.5605, "step": 45860 }, { "epoch": 77.5, "learning_rate": 8.785659455696384e-05, "loss": 2.5729, "step": 45880 }, { "epoch": 77.53, "learning_rate": 8.782726097869349e-05, "loss": 2.5629, "step": 45900 }, { "epoch": 77.57, "learning_rate": 8.779789692368223e-05, "loss": 2.5627, "step": 45920 }, { "epoch": 77.6, "learning_rate": 8.776850241558814e-05, "loss": 2.5917, "step": 45940 }, { "epoch": 77.64, "learning_rate": 8.773907747809383e-05, "loss": 2.5836, "step": 45960 }, { "epoch": 77.67, "learning_rate": 8.770962213490643e-05, "loss": 2.5908, "step": 45980 }, { "epoch": 77.7, "learning_rate": 8.768013640975761e-05, "loss": 2.5803, "step": 46000 }, { "epoch": 77.7, "eval_loss": 2.7924163341522217, "eval_runtime": 49.1131, "eval_samples_per_second": 20.137, "eval_steps_per_second": 0.122, "eval_tse_ndup": 0.006478176525450504, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.031434061591553826, "eval_tse_type": 8.426019548365353e-05, "step": 46000 }, { "epoch": 77.74, "learning_rate": 8.765062032640346e-05, "loss": 2.5695, "step": 46020 }, { "epoch": 77.77, "learning_rate": 8.762107390862455e-05, "loss": 2.5713, "step": 46040 }, { "epoch": 77.8, "learning_rate": 8.759149718022594e-05, "loss": 2.5869, "step": 46060 }, { "epoch": 77.84, "learning_rate": 8.756189016503702e-05, "loss": 2.5722, "step": 46080 }, { "epoch": 77.87, "learning_rate": 8.753225288691165e-05, "loss": 2.5811, "step": 46100 }, { "epoch": 77.91, "learning_rate": 8.750258536972804e-05, "loss": 2.5708, "step": 46120 }, { "epoch": 77.94, "learning_rate": 8.747288763738877e-05, "loss": 2.5799, "step": 46140 }, { "epoch": 77.97, "learning_rate": 8.744315971382078e-05, "loss": 2.6076, "step": 46160 }, { "epoch": 78.01, "learning_rate": 8.741340162297531e-05, "loss": 2.5588, "step": 46180 }, { "epoch": 78.04, "learning_rate": 8.738361338882792e-05, "loss": 2.5079, "step": 46200 }, { "epoch": 78.07, "learning_rate": 8.735379503537844e-05, "loss": 2.5229, "step": 46220 }, { "epoch": 78.11, "learning_rate": 8.732394658665101e-05, "loss": 2.5249, "step": 46240 }, { "epoch": 78.14, "learning_rate": 8.729406806669396e-05, "loss": 2.5257, "step": 46260 }, { "epoch": 78.18, "learning_rate": 8.726415949957987e-05, "loss": 2.5111, "step": 46280 }, { "epoch": 78.21, "learning_rate": 8.723422090940555e-05, "loss": 2.5299, "step": 46300 }, { "epoch": 78.24, "learning_rate": 8.720425232029198e-05, "loss": 2.5404, "step": 46320 }, { "epoch": 78.28, "learning_rate": 8.717425375638429e-05, "loss": 2.5249, "step": 46340 }, { "epoch": 78.31, "learning_rate": 8.714422524185181e-05, "loss": 2.5518, "step": 46360 }, { "epoch": 78.34, "learning_rate": 8.711416680088795e-05, "loss": 2.5537, "step": 46380 }, { "epoch": 78.38, "learning_rate": 8.708407845771027e-05, "loss": 2.54, "step": 46400 }, { "epoch": 78.41, "learning_rate": 8.70539602365604e-05, "loss": 2.5449, "step": 46420 }, { "epoch": 78.45, "learning_rate": 8.702381216170403e-05, "loss": 2.535, "step": 46440 }, { "epoch": 78.48, "learning_rate": 8.699363425743093e-05, "loss": 2.5448, "step": 46460 }, { "epoch": 78.51, "learning_rate": 8.69634265480549e-05, "loss": 2.5534, "step": 46480 }, { "epoch": 78.55, "learning_rate": 8.693318905791375e-05, "loss": 2.5649, "step": 46500 }, { "epoch": 78.58, "learning_rate": 8.690292181136924e-05, "loss": 2.5664, "step": 46520 }, { "epoch": 78.61, "learning_rate": 8.687262483280719e-05, "loss": 2.5567, "step": 46540 }, { "epoch": 78.65, "learning_rate": 8.684229814663731e-05, "loss": 2.5679, "step": 46560 }, { "epoch": 78.68, "learning_rate": 8.681194177729328e-05, "loss": 2.5707, "step": 46580 }, { "epoch": 78.72, "learning_rate": 8.678155574923265e-05, "loss": 2.5715, "step": 46600 }, { "epoch": 78.75, "learning_rate": 8.675114008693689e-05, "loss": 2.5694, "step": 46620 }, { "epoch": 78.78, "learning_rate": 8.672069481491141e-05, "loss": 2.5666, "step": 46640 }, { "epoch": 78.82, "learning_rate": 8.669021995768534e-05, "loss": 2.5612, "step": 46660 }, { "epoch": 78.85, "learning_rate": 8.665971553981175e-05, "loss": 2.5749, "step": 46680 }, { "epoch": 78.89, "learning_rate": 8.662918158586753e-05, "loss": 2.5809, "step": 46700 }, { "epoch": 78.92, "learning_rate": 8.65986181204533e-05, "loss": 2.5734, "step": 46720 }, { "epoch": 78.95, "learning_rate": 8.656802516819349e-05, "loss": 2.5695, "step": 46740 }, { "epoch": 78.99, "learning_rate": 8.653740275373631e-05, "loss": 2.5787, "step": 46760 }, { "epoch": 79.02, "learning_rate": 8.650675090175366e-05, "loss": 2.5187, "step": 46780 }, { "epoch": 79.05, "learning_rate": 8.647606963694122e-05, "loss": 2.5092, "step": 46800 }, { "epoch": 79.09, "learning_rate": 8.644535898401831e-05, "loss": 2.5123, "step": 46820 }, { "epoch": 79.12, "learning_rate": 8.641461896772793e-05, "loss": 2.494, "step": 46840 }, { "epoch": 79.16, "learning_rate": 8.638384961283679e-05, "loss": 2.5321, "step": 46860 }, { "epoch": 79.19, "learning_rate": 8.63530509441352e-05, "loss": 2.4998, "step": 46880 }, { "epoch": 79.22, "learning_rate": 8.632222298643706e-05, "loss": 2.5165, "step": 46900 }, { "epoch": 79.26, "learning_rate": 8.629136576457991e-05, "loss": 2.5273, "step": 46920 }, { "epoch": 79.29, "learning_rate": 8.626047930342488e-05, "loss": 2.5145, "step": 46940 }, { "epoch": 79.32, "learning_rate": 8.622956362785662e-05, "loss": 2.539, "step": 46960 }, { "epoch": 79.36, "learning_rate": 8.619861876278332e-05, "loss": 2.5233, "step": 46980 }, { "epoch": 79.39, "learning_rate": 8.616764473313671e-05, "loss": 2.54, "step": 47000 }, { "epoch": 79.39, "eval_loss": 2.7760818004608154, "eval_runtime": 49.6014, "eval_samples_per_second": 19.939, "eval_steps_per_second": 0.121, "eval_tse_ndup": 0.005016615626178201, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.029835610583169678, "eval_tse_type": 0.00032650825749915734, "step": 47000 }, { "epoch": 79.43, "learning_rate": 8.6136641563872e-05, "loss": 2.5239, "step": 47020 }, { "epoch": 79.46, "learning_rate": 8.610560927996788e-05, "loss": 2.5344, "step": 47040 }, { "epoch": 79.49, "learning_rate": 8.607454790642654e-05, "loss": 2.5406, "step": 47060 }, { "epoch": 79.53, "learning_rate": 8.604345746827351e-05, "loss": 2.557, "step": 47080 }, { "epoch": 79.56, "learning_rate": 8.601233799055784e-05, "loss": 2.5437, "step": 47100 }, { "epoch": 79.59, "learning_rate": 8.59811894983519e-05, "loss": 2.5632, "step": 47120 }, { "epoch": 79.63, "learning_rate": 8.595001201675147e-05, "loss": 2.5543, "step": 47140 }, { "epoch": 79.66, "learning_rate": 8.591880557087573e-05, "loss": 2.5386, "step": 47160 }, { "epoch": 79.7, "learning_rate": 8.588757018586708e-05, "loss": 2.5332, "step": 47180 }, { "epoch": 79.73, "learning_rate": 8.585630588689135e-05, "loss": 2.5485, "step": 47200 }, { "epoch": 79.76, "learning_rate": 8.582501269913761e-05, "loss": 2.5439, "step": 47220 }, { "epoch": 79.8, "learning_rate": 8.579369064781819e-05, "loss": 2.5492, "step": 47240 }, { "epoch": 79.83, "learning_rate": 8.576233975816877e-05, "loss": 2.5538, "step": 47260 }, { "epoch": 79.86, "learning_rate": 8.573096005544811e-05, "loss": 2.5602, "step": 47280 }, { "epoch": 79.9, "learning_rate": 8.569955156493834e-05, "loss": 2.5648, "step": 47300 }, { "epoch": 79.93, "learning_rate": 8.566811431194468e-05, "loss": 2.5615, "step": 47320 }, { "epoch": 79.97, "learning_rate": 8.563664832179556e-05, "loss": 2.5485, "step": 47340 }, { "epoch": 80.0, "learning_rate": 8.560515361984256e-05, "loss": 2.5707, "step": 47360 }, { "epoch": 80.03, "learning_rate": 8.55736302314604e-05, "loss": 2.4823, "step": 47380 }, { "epoch": 80.07, "learning_rate": 8.55420781820469e-05, "loss": 2.4796, "step": 47400 }, { "epoch": 80.1, "learning_rate": 8.551049749702297e-05, "loss": 2.5203, "step": 47420 }, { "epoch": 80.14, "learning_rate": 8.548046934569051e-05, "loss": 2.5065, "step": 47440 }, { "epoch": 80.17, "learning_rate": 8.544883289443053e-05, "loss": 2.4937, "step": 47460 }, { "epoch": 80.2, "learning_rate": 8.541716788268617e-05, "loss": 2.5035, "step": 47480 }, { "epoch": 80.24, "learning_rate": 8.538547433596933e-05, "loss": 2.5136, "step": 47500 }, { "epoch": 80.27, "learning_rate": 8.535375227981497e-05, "loss": 2.5127, "step": 47520 }, { "epoch": 80.3, "learning_rate": 8.532200173978097e-05, "loss": 2.5223, "step": 47540 }, { "epoch": 80.34, "learning_rate": 8.529022274144816e-05, "loss": 2.5202, "step": 47560 }, { "epoch": 80.37, "learning_rate": 8.525841531042031e-05, "loss": 2.5101, "step": 47580 }, { "epoch": 80.41, "learning_rate": 8.522657947232407e-05, "loss": 2.5079, "step": 47600 }, { "epoch": 80.44, "learning_rate": 8.519471525280903e-05, "loss": 2.5074, "step": 47620 }, { "epoch": 80.47, "learning_rate": 8.516282267754761e-05, "loss": 2.532, "step": 47640 }, { "epoch": 80.51, "learning_rate": 8.513090177223506e-05, "loss": 2.5243, "step": 47660 }, { "epoch": 80.54, "learning_rate": 8.509895256258948e-05, "loss": 2.5215, "step": 47680 }, { "epoch": 80.57, "learning_rate": 8.506697507435182e-05, "loss": 2.5225, "step": 47700 }, { "epoch": 80.61, "learning_rate": 8.50349693332857e-05, "loss": 2.5225, "step": 47720 }, { "epoch": 80.64, "learning_rate": 8.50029353651776e-05, "loss": 2.5449, "step": 47740 }, { "epoch": 80.68, "learning_rate": 8.497087319583672e-05, "loss": 2.5336, "step": 47760 }, { "epoch": 80.71, "learning_rate": 8.493878285109495e-05, "loss": 2.5283, "step": 47780 }, { "epoch": 80.74, "learning_rate": 8.49066643568069e-05, "loss": 2.5366, "step": 47800 }, { "epoch": 80.78, "learning_rate": 8.487451773884987e-05, "loss": 2.5433, "step": 47820 }, { "epoch": 80.81, "learning_rate": 8.484234302312382e-05, "loss": 2.5496, "step": 47840 }, { "epoch": 80.84, "learning_rate": 8.48101402355513e-05, "loss": 2.5276, "step": 47860 }, { "epoch": 80.88, "learning_rate": 8.477790940207756e-05, "loss": 2.547, "step": 47880 }, { "epoch": 80.91, "learning_rate": 8.474565054867037e-05, "loss": 2.5479, "step": 47900 }, { "epoch": 80.95, "learning_rate": 8.471336370132012e-05, "loss": 2.56, "step": 47920 }, { "epoch": 80.98, "learning_rate": 8.468104888603973e-05, "loss": 2.5615, "step": 47940 }, { "epoch": 81.01, "learning_rate": 8.464870612886467e-05, "loss": 2.5124, "step": 47960 }, { "epoch": 81.05, "learning_rate": 8.46163354558529e-05, "loss": 2.4734, "step": 47980 }, { "epoch": 81.08, "learning_rate": 8.458393689308491e-05, "loss": 2.469, "step": 48000 }, { "epoch": 81.08, "eval_loss": 2.7601113319396973, "eval_runtime": 50.7409, "eval_samples_per_second": 19.491, "eval_steps_per_second": 0.118, "eval_tse_ndup": 0.005318529479795045, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03067736105780271, "eval_tse_type": 0.00011779108111469634, "step": 48000 }, { "epoch": 81.11, "learning_rate": 8.455151046666363e-05, "loss": 2.4842, "step": 48020 }, { "epoch": 81.15, "learning_rate": 8.451905620271443e-05, "loss": 2.4875, "step": 48040 }, { "epoch": 81.18, "learning_rate": 8.448657412738515e-05, "loss": 2.4963, "step": 48060 }, { "epoch": 81.22, "learning_rate": 8.445406426684598e-05, "loss": 2.5064, "step": 48080 }, { "epoch": 81.25, "learning_rate": 8.442152664728958e-05, "loss": 2.4863, "step": 48100 }, { "epoch": 81.28, "learning_rate": 8.438896129493086e-05, "loss": 2.4981, "step": 48120 }, { "epoch": 81.32, "learning_rate": 8.435636823600716e-05, "loss": 2.5012, "step": 48140 }, { "epoch": 81.35, "learning_rate": 8.432374749677814e-05, "loss": 2.5092, "step": 48160 }, { "epoch": 81.39, "learning_rate": 8.42910991035257e-05, "loss": 2.5109, "step": 48180 }, { "epoch": 81.42, "learning_rate": 8.425842308255412e-05, "loss": 2.5033, "step": 48200 }, { "epoch": 81.45, "learning_rate": 8.422571946018982e-05, "loss": 2.5004, "step": 48220 }, { "epoch": 81.49, "learning_rate": 8.419298826278154e-05, "loss": 2.5159, "step": 48240 }, { "epoch": 81.52, "learning_rate": 8.416022951670022e-05, "loss": 2.5198, "step": 48260 }, { "epoch": 81.55, "learning_rate": 8.412744324833898e-05, "loss": 2.5176, "step": 48280 }, { "epoch": 81.59, "learning_rate": 8.409462948411315e-05, "loss": 2.516, "step": 48300 }, { "epoch": 81.62, "learning_rate": 8.406178825046015e-05, "loss": 2.5264, "step": 48320 }, { "epoch": 81.66, "learning_rate": 8.402891957383959e-05, "loss": 2.5022, "step": 48340 }, { "epoch": 81.69, "learning_rate": 8.399602348073316e-05, "loss": 2.5241, "step": 48360 }, { "epoch": 81.72, "learning_rate": 8.396309999764467e-05, "loss": 2.5332, "step": 48380 }, { "epoch": 81.76, "learning_rate": 8.393014915109995e-05, "loss": 2.5321, "step": 48400 }, { "epoch": 81.79, "learning_rate": 8.389717096764691e-05, "loss": 2.5321, "step": 48420 }, { "epoch": 81.82, "learning_rate": 8.386416547385547e-05, "loss": 2.5354, "step": 48440 }, { "epoch": 81.86, "learning_rate": 8.383113269631757e-05, "loss": 2.5315, "step": 48460 }, { "epoch": 81.89, "learning_rate": 8.379807266164714e-05, "loss": 2.5348, "step": 48480 }, { "epoch": 81.93, "learning_rate": 8.376498539648001e-05, "loss": 2.5295, "step": 48500 }, { "epoch": 81.96, "learning_rate": 8.373187092747403e-05, "loss": 2.517, "step": 48520 }, { "epoch": 81.99, "learning_rate": 8.369872928130891e-05, "loss": 2.5343, "step": 48540 }, { "epoch": 82.03, "learning_rate": 8.366556048468628e-05, "loss": 2.4666, "step": 48560 }, { "epoch": 82.06, "learning_rate": 8.363236456432964e-05, "loss": 2.4525, "step": 48580 }, { "epoch": 82.09, "learning_rate": 8.359914154698434e-05, "loss": 2.4665, "step": 48600 }, { "epoch": 82.13, "learning_rate": 8.356589145941757e-05, "loss": 2.467, "step": 48620 }, { "epoch": 82.16, "learning_rate": 8.353261432841832e-05, "loss": 2.4681, "step": 48640 }, { "epoch": 82.2, "learning_rate": 8.34993101807974e-05, "loss": 2.4744, "step": 48660 }, { "epoch": 82.23, "learning_rate": 8.346597904338731e-05, "loss": 2.491, "step": 48680 }, { "epoch": 82.26, "learning_rate": 8.343262094304238e-05, "loss": 2.4709, "step": 48700 }, { "epoch": 82.3, "learning_rate": 8.339923590663863e-05, "loss": 2.491, "step": 48720 }, { "epoch": 82.33, "learning_rate": 8.336582396107378e-05, "loss": 2.4757, "step": 48740 }, { "epoch": 82.36, "learning_rate": 8.33323851332672e-05, "loss": 2.4961, "step": 48760 }, { "epoch": 82.4, "learning_rate": 8.329891945015998e-05, "loss": 2.499, "step": 48780 }, { "epoch": 82.43, "learning_rate": 8.326542693871482e-05, "loss": 2.4982, "step": 48800 }, { "epoch": 82.47, "learning_rate": 8.323190762591601e-05, "loss": 2.4993, "step": 48820 }, { "epoch": 82.5, "learning_rate": 8.319836153876947e-05, "loss": 2.4876, "step": 48840 }, { "epoch": 82.53, "learning_rate": 8.316478870430269e-05, "loss": 2.5086, "step": 48860 }, { "epoch": 82.57, "learning_rate": 8.313118914956466e-05, "loss": 2.5213, "step": 48880 }, { "epoch": 82.6, "learning_rate": 8.309756290162595e-05, "loss": 2.5103, "step": 48900 }, { "epoch": 82.64, "learning_rate": 8.306390998757863e-05, "loss": 2.512, "step": 48920 }, { "epoch": 82.67, "learning_rate": 8.303023043453624e-05, "loss": 2.5165, "step": 48940 }, { "epoch": 82.7, "learning_rate": 8.299652426963379e-05, "loss": 2.5069, "step": 48960 }, { "epoch": 82.74, "learning_rate": 8.296279152002771e-05, "loss": 2.5251, "step": 48980 }, { "epoch": 82.77, "learning_rate": 8.29290322128959e-05, "loss": 2.5204, "step": 49000 }, { "epoch": 82.77, "eval_loss": 2.746349811553955, "eval_runtime": 47.9582, "eval_samples_per_second": 20.622, "eval_steps_per_second": 0.125, "eval_tse_ndup": 0.004133415871221496, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03191453978257238, "eval_tse_type": 0.0001720312324457926, "step": 49000 }, { "epoch": 82.8, "learning_rate": 8.28952463754376e-05, "loss": 2.5142, "step": 49020 }, { "epoch": 82.84, "learning_rate": 8.286143403487345e-05, "loss": 2.5042, "step": 49040 }, { "epoch": 82.87, "learning_rate": 8.282759521844545e-05, "loss": 2.5232, "step": 49060 }, { "epoch": 82.91, "learning_rate": 8.279372995341692e-05, "loss": 2.5167, "step": 49080 }, { "epoch": 82.94, "learning_rate": 8.27598382670725e-05, "loss": 2.5243, "step": 49100 }, { "epoch": 82.97, "learning_rate": 8.272592018671809e-05, "loss": 2.5325, "step": 49120 }, { "epoch": 83.01, "learning_rate": 8.269197573968088e-05, "loss": 2.5253, "step": 49140 }, { "epoch": 83.04, "learning_rate": 8.265800495330932e-05, "loss": 2.442, "step": 49160 }, { "epoch": 83.07, "learning_rate": 8.262400785497303e-05, "loss": 2.4528, "step": 49180 }, { "epoch": 83.11, "learning_rate": 8.25899844720629e-05, "loss": 2.442, "step": 49200 }, { "epoch": 83.14, "learning_rate": 8.25559348319909e-05, "loss": 2.4649, "step": 49220 }, { "epoch": 83.18, "learning_rate": 8.252185896219024e-05, "loss": 2.4605, "step": 49240 }, { "epoch": 83.21, "learning_rate": 8.248775689011524e-05, "loss": 2.4585, "step": 49260 }, { "epoch": 83.24, "learning_rate": 8.245362864324131e-05, "loss": 2.4688, "step": 49280 }, { "epoch": 83.28, "learning_rate": 8.241947424906496e-05, "loss": 2.4734, "step": 49300 }, { "epoch": 83.31, "learning_rate": 8.238529373510378e-05, "loss": 2.4756, "step": 49320 }, { "epoch": 83.34, "learning_rate": 8.235108712889637e-05, "loss": 2.4654, "step": 49340 }, { "epoch": 83.38, "learning_rate": 8.23168544580024e-05, "loss": 2.4779, "step": 49360 }, { "epoch": 83.41, "learning_rate": 8.228259575000251e-05, "loss": 2.496, "step": 49380 }, { "epoch": 83.45, "learning_rate": 8.224831103249832e-05, "loss": 2.4779, "step": 49400 }, { "epoch": 83.48, "learning_rate": 8.22140003331124e-05, "loss": 2.4841, "step": 49420 }, { "epoch": 83.51, "learning_rate": 8.217966367948827e-05, "loss": 2.4896, "step": 49440 }, { "epoch": 83.55, "learning_rate": 8.214530109929034e-05, "loss": 2.5025, "step": 49460 }, { "epoch": 83.58, "learning_rate": 8.211091262020393e-05, "loss": 2.4928, "step": 49480 }, { "epoch": 83.61, "learning_rate": 8.207649826993522e-05, "loss": 2.4943, "step": 49500 }, { "epoch": 83.65, "learning_rate": 8.204205807621122e-05, "loss": 2.5013, "step": 49520 }, { "epoch": 83.68, "learning_rate": 8.200759206677979e-05, "loss": 2.4941, "step": 49540 }, { "epoch": 83.72, "learning_rate": 8.197310026940954e-05, "loss": 2.4958, "step": 49560 }, { "epoch": 83.75, "learning_rate": 8.193858271188992e-05, "loss": 2.5129, "step": 49580 }, { "epoch": 83.78, "learning_rate": 8.190403942203109e-05, "loss": 2.5089, "step": 49600 }, { "epoch": 83.82, "learning_rate": 8.186947042766393e-05, "loss": 2.5179, "step": 49620 }, { "epoch": 83.85, "learning_rate": 8.18348757566401e-05, "loss": 2.5085, "step": 49640 }, { "epoch": 83.89, "learning_rate": 8.180025543683188e-05, "loss": 2.5043, "step": 49660 }, { "epoch": 83.92, "learning_rate": 8.176560949613224e-05, "loss": 2.5093, "step": 49680 }, { "epoch": 83.95, "learning_rate": 8.173093796245477e-05, "loss": 2.5181, "step": 49700 }, { "epoch": 83.99, "learning_rate": 8.169624086373371e-05, "loss": 2.5226, "step": 49720 }, { "epoch": 84.02, "learning_rate": 8.166151822792389e-05, "loss": 2.4702, "step": 49740 }, { "epoch": 84.05, "learning_rate": 8.162677008300073e-05, "loss": 2.4166, "step": 49760 }, { "epoch": 84.09, "learning_rate": 8.159199645696016e-05, "loss": 2.4464, "step": 49780 }, { "epoch": 84.12, "learning_rate": 8.155893793585413e-05, "loss": 2.45, "step": 49800 }, { "epoch": 84.16, "learning_rate": 8.152411470223569e-05, "loss": 2.432, "step": 49820 }, { "epoch": 84.19, "learning_rate": 8.148926607020743e-05, "loss": 2.4586, "step": 49840 }, { "epoch": 84.22, "learning_rate": 8.145439206784626e-05, "loss": 2.4687, "step": 49860 }, { "epoch": 84.26, "learning_rate": 8.141949272324953e-05, "loss": 2.4685, "step": 49880 }, { "epoch": 84.29, "learning_rate": 8.138456806453503e-05, "loss": 2.4564, "step": 49900 }, { "epoch": 84.32, "learning_rate": 8.13496181198409e-05, "loss": 2.4827, "step": 49920 }, { "epoch": 84.36, "learning_rate": 8.131464291732572e-05, "loss": 2.4427, "step": 49940 }, { "epoch": 84.39, "learning_rate": 8.127964248516832e-05, "loss": 2.4619, "step": 49960 }, { "epoch": 84.43, "learning_rate": 8.124461685156795e-05, "loss": 2.4702, "step": 49980 }, { "epoch": 84.46, "learning_rate": 8.120956604474415e-05, "loss": 2.4752, "step": 50000 }, { "epoch": 84.46, "eval_loss": 2.737753391265869, "eval_runtime": 47.6026, "eval_samples_per_second": 20.776, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.004968291666702018, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03121488428037335, "eval_tse_type": 0.00018977318537182926, "step": 50000 }, { "epoch": 84.49, "learning_rate": 8.117449009293668e-05, "loss": 2.4778, "step": 50020 }, { "epoch": 84.53, "learning_rate": 8.113938902440564e-05, "loss": 2.4765, "step": 50040 }, { "epoch": 84.56, "learning_rate": 8.110426286743129e-05, "loss": 2.4681, "step": 50060 }, { "epoch": 84.59, "learning_rate": 8.106911165031415e-05, "loss": 2.5011, "step": 50080 }, { "epoch": 84.63, "learning_rate": 8.103393540137496e-05, "loss": 2.4837, "step": 50100 }, { "epoch": 84.66, "learning_rate": 8.099873414895453e-05, "loss": 2.4784, "step": 50120 }, { "epoch": 84.7, "learning_rate": 8.096350792141392e-05, "loss": 2.4975, "step": 50140 }, { "epoch": 84.73, "learning_rate": 8.092825674713425e-05, "loss": 2.4959, "step": 50160 }, { "epoch": 84.76, "learning_rate": 8.089298065451672e-05, "loss": 2.4922, "step": 50180 }, { "epoch": 84.8, "learning_rate": 8.085767967198269e-05, "loss": 2.4765, "step": 50200 }, { "epoch": 84.83, "learning_rate": 8.082235382797349e-05, "loss": 2.4971, "step": 50220 }, { "epoch": 84.86, "learning_rate": 8.078700315095055e-05, "loss": 2.5075, "step": 50240 }, { "epoch": 84.9, "learning_rate": 8.075162766939526e-05, "loss": 2.4855, "step": 50260 }, { "epoch": 84.93, "learning_rate": 8.071622741180898e-05, "loss": 2.5004, "step": 50280 }, { "epoch": 84.97, "learning_rate": 8.068080240671308e-05, "loss": 2.5046, "step": 50300 }, { "epoch": 85.0, "learning_rate": 8.064535268264883e-05, "loss": 2.5152, "step": 50320 }, { "epoch": 85.03, "learning_rate": 8.060987826817745e-05, "loss": 2.4181, "step": 50340 }, { "epoch": 85.07, "learning_rate": 8.057437919188005e-05, "loss": 2.4209, "step": 50360 }, { "epoch": 85.1, "learning_rate": 8.053885548235755e-05, "loss": 2.4479, "step": 50380 }, { "epoch": 85.14, "learning_rate": 8.05033071682308e-05, "loss": 2.4261, "step": 50400 }, { "epoch": 85.17, "learning_rate": 8.046773427814042e-05, "loss": 2.4532, "step": 50420 }, { "epoch": 85.2, "learning_rate": 8.043213684074684e-05, "loss": 2.4296, "step": 50440 }, { "epoch": 85.24, "learning_rate": 8.039651488473028e-05, "loss": 2.4408, "step": 50460 }, { "epoch": 85.27, "learning_rate": 8.03608684387907e-05, "loss": 2.4477, "step": 50480 }, { "epoch": 85.3, "learning_rate": 8.03251975316478e-05, "loss": 2.4433, "step": 50500 }, { "epoch": 85.34, "learning_rate": 8.0289502192041e-05, "loss": 2.457, "step": 50520 }, { "epoch": 85.37, "learning_rate": 8.025378244872936e-05, "loss": 2.4557, "step": 50540 }, { "epoch": 85.41, "learning_rate": 8.021803833049166e-05, "loss": 2.446, "step": 50560 }, { "epoch": 85.44, "learning_rate": 8.01822698661263e-05, "loss": 2.4528, "step": 50580 }, { "epoch": 85.47, "learning_rate": 8.014647708445124e-05, "loss": 2.4858, "step": 50600 }, { "epoch": 85.51, "learning_rate": 8.011066001430412e-05, "loss": 2.4548, "step": 50620 }, { "epoch": 85.54, "learning_rate": 8.007481868454208e-05, "loss": 2.4599, "step": 50640 }, { "epoch": 85.57, "learning_rate": 8.003895312404183e-05, "loss": 2.4664, "step": 50660 }, { "epoch": 85.61, "learning_rate": 8.000306336169963e-05, "loss": 2.4781, "step": 50680 }, { "epoch": 85.64, "learning_rate": 7.99671494264312e-05, "loss": 2.4777, "step": 50700 }, { "epoch": 85.68, "learning_rate": 7.993121134717177e-05, "loss": 2.4732, "step": 50720 }, { "epoch": 85.71, "learning_rate": 7.989524915287595e-05, "loss": 2.4783, "step": 50740 }, { "epoch": 85.74, "learning_rate": 7.985926287251787e-05, "loss": 2.478, "step": 50760 }, { "epoch": 85.78, "learning_rate": 7.982325253509102e-05, "loss": 2.4904, "step": 50780 }, { "epoch": 85.81, "learning_rate": 7.978721816960826e-05, "loss": 2.4812, "step": 50800 }, { "epoch": 85.84, "learning_rate": 7.975115980510187e-05, "loss": 2.487, "step": 50820 }, { "epoch": 85.88, "learning_rate": 7.971507747062337e-05, "loss": 2.4845, "step": 50840 }, { "epoch": 85.91, "learning_rate": 7.967897119524368e-05, "loss": 2.4728, "step": 50860 }, { "epoch": 85.95, "learning_rate": 7.964284100805297e-05, "loss": 2.5053, "step": 50880 }, { "epoch": 85.98, "learning_rate": 7.960668693816067e-05, "loss": 2.5008, "step": 50900 }, { "epoch": 86.01, "learning_rate": 7.957050901469545e-05, "loss": 2.4573, "step": 50920 }, { "epoch": 86.05, "learning_rate": 7.953430726680524e-05, "loss": 2.4049, "step": 50940 }, { "epoch": 86.08, "learning_rate": 7.949808172365713e-05, "loss": 2.4079, "step": 50960 }, { "epoch": 86.11, "learning_rate": 7.946183241443736e-05, "loss": 2.4051, "step": 50980 }, { "epoch": 86.15, "learning_rate": 7.942555936835135e-05, "loss": 2.4396, "step": 51000 }, { "epoch": 86.15, "eval_loss": 2.71986985206604, "eval_runtime": 47.2943, "eval_samples_per_second": 20.912, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.004861320099787565, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03205852765832834, "eval_tse_type": 0.0002071396472306482, "step": 51000 }, { "epoch": 86.18, "learning_rate": 7.938926261462366e-05, "loss": 2.428, "step": 51020 }, { "epoch": 86.22, "learning_rate": 7.935294218249791e-05, "loss": 2.4384, "step": 51040 }, { "epoch": 86.25, "learning_rate": 7.931659810123683e-05, "loss": 2.438, "step": 51060 }, { "epoch": 86.28, "learning_rate": 7.928023040012216e-05, "loss": 2.45, "step": 51080 }, { "epoch": 86.32, "learning_rate": 7.924383910845474e-05, "loss": 2.431, "step": 51100 }, { "epoch": 86.35, "learning_rate": 7.920742425555436e-05, "loss": 2.4443, "step": 51120 }, { "epoch": 86.39, "learning_rate": 7.91709858707598e-05, "loss": 2.4404, "step": 51140 }, { "epoch": 86.42, "learning_rate": 7.913452398342881e-05, "loss": 2.4507, "step": 51160 }, { "epoch": 86.45, "learning_rate": 7.909803862293808e-05, "loss": 2.4397, "step": 51180 }, { "epoch": 86.49, "learning_rate": 7.906152981868321e-05, "loss": 2.4595, "step": 51200 }, { "epoch": 86.52, "learning_rate": 7.902499760007867e-05, "loss": 2.4458, "step": 51220 }, { "epoch": 86.55, "learning_rate": 7.898844199655784e-05, "loss": 2.4712, "step": 51240 }, { "epoch": 86.59, "learning_rate": 7.895186303757287e-05, "loss": 2.4648, "step": 51260 }, { "epoch": 86.62, "learning_rate": 7.89152607525948e-05, "loss": 2.4638, "step": 51280 }, { "epoch": 86.66, "learning_rate": 7.887863517111338e-05, "loss": 2.4718, "step": 51300 }, { "epoch": 86.69, "learning_rate": 7.884198632263724e-05, "loss": 2.475, "step": 51320 }, { "epoch": 86.72, "learning_rate": 7.880531423669366e-05, "loss": 2.4567, "step": 51340 }, { "epoch": 86.76, "learning_rate": 7.876861894282869e-05, "loss": 2.4677, "step": 51360 }, { "epoch": 86.79, "learning_rate": 7.873190047060706e-05, "loss": 2.477, "step": 51380 }, { "epoch": 86.82, "learning_rate": 7.869515884961218e-05, "loss": 2.4794, "step": 51400 }, { "epoch": 86.86, "learning_rate": 7.865839410944612e-05, "loss": 2.4674, "step": 51420 }, { "epoch": 86.89, "learning_rate": 7.862160627972955e-05, "loss": 2.4613, "step": 51440 }, { "epoch": 86.93, "learning_rate": 7.858479539010177e-05, "loss": 2.4816, "step": 51460 }, { "epoch": 86.96, "learning_rate": 7.854796147022065e-05, "loss": 2.4687, "step": 51480 }, { "epoch": 86.99, "learning_rate": 7.85111045497626e-05, "loss": 2.4811, "step": 51500 }, { "epoch": 87.03, "learning_rate": 7.84742246584226e-05, "loss": 2.4159, "step": 51520 }, { "epoch": 87.06, "learning_rate": 7.84373218259141e-05, "loss": 2.4262, "step": 51540 }, { "epoch": 87.09, "learning_rate": 7.840039608196904e-05, "loss": 2.3902, "step": 51560 }, { "epoch": 87.13, "learning_rate": 7.836344745633783e-05, "loss": 2.4049, "step": 51580 }, { "epoch": 87.16, "learning_rate": 7.832647597878931e-05, "loss": 2.4193, "step": 51600 }, { "epoch": 87.2, "learning_rate": 7.828948167911074e-05, "loss": 2.4177, "step": 51620 }, { "epoch": 87.23, "learning_rate": 7.825246458710773e-05, "loss": 2.4203, "step": 51640 }, { "epoch": 87.26, "learning_rate": 7.821542473260432e-05, "loss": 2.4161, "step": 51660 }, { "epoch": 87.3, "learning_rate": 7.817836214544283e-05, "loss": 2.4194, "step": 51680 }, { "epoch": 87.33, "learning_rate": 7.814127685548391e-05, "loss": 2.4312, "step": 51700 }, { "epoch": 87.36, "learning_rate": 7.810416889260653e-05, "loss": 2.4292, "step": 51720 }, { "epoch": 87.4, "learning_rate": 7.80670382867079e-05, "loss": 2.4251, "step": 51740 }, { "epoch": 87.43, "learning_rate": 7.802988506770347e-05, "loss": 2.439, "step": 51760 }, { "epoch": 87.47, "learning_rate": 7.799270926552693e-05, "loss": 2.4419, "step": 51780 }, { "epoch": 87.5, "learning_rate": 7.795551091013013e-05, "loss": 2.4607, "step": 51800 }, { "epoch": 87.53, "learning_rate": 7.791829003148312e-05, "loss": 2.4635, "step": 51820 }, { "epoch": 87.57, "learning_rate": 7.78829093619214e-05, "loss": 2.4433, "step": 51840 }, { "epoch": 87.6, "learning_rate": 7.784564464920654e-05, "loss": 2.4617, "step": 51860 }, { "epoch": 87.64, "learning_rate": 7.780835750175874e-05, "loss": 2.4412, "step": 51880 }, { "epoch": 87.67, "learning_rate": 7.777104794961957e-05, "loss": 2.46, "step": 51900 }, { "epoch": 87.7, "learning_rate": 7.773371602284869e-05, "loss": 2.4576, "step": 51920 }, { "epoch": 87.74, "learning_rate": 7.769636175152374e-05, "loss": 2.4639, "step": 51940 }, { "epoch": 87.77, "learning_rate": 7.765898516574038e-05, "loss": 2.4442, "step": 51960 }, { "epoch": 87.8, "learning_rate": 7.762158629561225e-05, "loss": 2.4641, "step": 51980 }, { "epoch": 87.84, "learning_rate": 7.758416517127094e-05, "loss": 2.4595, "step": 52000 }, { "epoch": 87.84, "eval_loss": 2.7032272815704346, "eval_runtime": 50.8829, "eval_samples_per_second": 19.437, "eval_steps_per_second": 0.118, "eval_tse_ndup": 0.004222459898376891, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03180085910706684, "eval_tse_type": 9.6548140658353e-05, "step": 52000 }, { "epoch": 87.87, "learning_rate": 7.7546721822866e-05, "loss": 2.4544, "step": 52020 }, { "epoch": 87.91, "learning_rate": 7.750925628056482e-05, "loss": 2.4573, "step": 52040 }, { "epoch": 87.94, "learning_rate": 7.747176857455275e-05, "loss": 2.4697, "step": 52060 }, { "epoch": 87.97, "learning_rate": 7.743425873503294e-05, "loss": 2.4598, "step": 52080 }, { "epoch": 88.01, "learning_rate": 7.739672679222638e-05, "loss": 2.4552, "step": 52100 }, { "epoch": 88.04, "learning_rate": 7.735917277637189e-05, "loss": 2.3728, "step": 52120 }, { "epoch": 88.07, "learning_rate": 7.732159671772605e-05, "loss": 2.4024, "step": 52140 }, { "epoch": 88.11, "learning_rate": 7.728399864656324e-05, "loss": 2.3929, "step": 52160 }, { "epoch": 88.14, "learning_rate": 7.724637859317551e-05, "loss": 2.4046, "step": 52180 }, { "epoch": 88.18, "learning_rate": 7.720873658787268e-05, "loss": 2.4109, "step": 52200 }, { "epoch": 88.21, "learning_rate": 7.717107266098225e-05, "loss": 2.4293, "step": 52220 }, { "epoch": 88.24, "learning_rate": 7.713338684284932e-05, "loss": 2.4281, "step": 52240 }, { "epoch": 88.28, "learning_rate": 7.709567916383672e-05, "loss": 2.4039, "step": 52260 }, { "epoch": 88.31, "learning_rate": 7.705794965432481e-05, "loss": 2.4153, "step": 52280 }, { "epoch": 88.34, "learning_rate": 7.702019834471159e-05, "loss": 2.4153, "step": 52300 }, { "epoch": 88.38, "learning_rate": 7.698242526541262e-05, "loss": 2.4314, "step": 52320 }, { "epoch": 88.41, "learning_rate": 7.694463044686095e-05, "loss": 2.4365, "step": 52340 }, { "epoch": 88.45, "learning_rate": 7.690681391950723e-05, "loss": 2.4258, "step": 52360 }, { "epoch": 88.48, "learning_rate": 7.686897571381952e-05, "loss": 2.4224, "step": 52380 }, { "epoch": 88.51, "learning_rate": 7.68311158602834e-05, "loss": 2.4431, "step": 52400 }, { "epoch": 88.55, "learning_rate": 7.679323438940184e-05, "loss": 2.4327, "step": 52420 }, { "epoch": 88.58, "learning_rate": 7.67553313316953e-05, "loss": 2.4403, "step": 52440 }, { "epoch": 88.61, "learning_rate": 7.671740671770153e-05, "loss": 2.4478, "step": 52460 }, { "epoch": 88.65, "learning_rate": 7.667946057797578e-05, "loss": 2.4315, "step": 52480 }, { "epoch": 88.68, "learning_rate": 7.664149294309051e-05, "loss": 2.4405, "step": 52500 }, { "epoch": 88.72, "learning_rate": 7.66035038436356e-05, "loss": 2.4434, "step": 52520 }, { "epoch": 88.75, "learning_rate": 7.656549331021814e-05, "loss": 2.4497, "step": 52540 }, { "epoch": 88.78, "learning_rate": 7.652746137346255e-05, "loss": 2.4517, "step": 52560 }, { "epoch": 88.82, "learning_rate": 7.648940806401048e-05, "loss": 2.4465, "step": 52580 }, { "epoch": 88.85, "learning_rate": 7.645133341252078e-05, "loss": 2.436, "step": 52600 }, { "epoch": 88.89, "learning_rate": 7.641323744966953e-05, "loss": 2.441, "step": 52620 }, { "epoch": 88.92, "learning_rate": 7.637512020614995e-05, "loss": 2.4451, "step": 52640 }, { "epoch": 88.95, "learning_rate": 7.633698171267241e-05, "loss": 2.4658, "step": 52660 }, { "epoch": 88.99, "learning_rate": 7.629882199996441e-05, "loss": 2.4586, "step": 52680 }, { "epoch": 89.02, "learning_rate": 7.626064109877054e-05, "loss": 2.3964, "step": 52700 }, { "epoch": 89.05, "learning_rate": 7.622243903985245e-05, "loss": 2.3635, "step": 52720 }, { "epoch": 89.09, "learning_rate": 7.618421585398885e-05, "loss": 2.3716, "step": 52740 }, { "epoch": 89.12, "learning_rate": 7.61459715719755e-05, "loss": 2.3746, "step": 52760 }, { "epoch": 89.16, "learning_rate": 7.610770622462508e-05, "loss": 2.3948, "step": 52780 }, { "epoch": 89.19, "learning_rate": 7.606941984276734e-05, "loss": 2.3839, "step": 52800 }, { "epoch": 89.22, "learning_rate": 7.60311124572489e-05, "loss": 2.4137, "step": 52820 }, { "epoch": 89.26, "learning_rate": 7.599278409893334e-05, "loss": 2.4058, "step": 52840 }, { "epoch": 89.29, "learning_rate": 7.59544347987011e-05, "loss": 2.4233, "step": 52860 }, { "epoch": 89.32, "learning_rate": 7.591606458744955e-05, "loss": 2.4234, "step": 52880 }, { "epoch": 89.36, "learning_rate": 7.587767349609284e-05, "loss": 2.4087, "step": 52900 }, { "epoch": 89.39, "learning_rate": 7.583926155556203e-05, "loss": 2.418, "step": 52920 }, { "epoch": 89.43, "learning_rate": 7.580082879680488e-05, "loss": 2.41, "step": 52940 }, { "epoch": 89.46, "learning_rate": 7.5762375250786e-05, "loss": 2.4289, "step": 52960 }, { "epoch": 89.49, "learning_rate": 7.572390094848669e-05, "loss": 2.4167, "step": 52980 }, { "epoch": 89.53, "learning_rate": 7.568540592090503e-05, "loss": 2.4164, "step": 53000 }, { "epoch": 89.53, "eval_loss": 2.6998112201690674, "eval_runtime": 47.7256, "eval_samples_per_second": 20.723, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.006779113199156035, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03221942088756424, "eval_tse_type": 0.00011761318952926637, "step": 53000 }, { "epoch": 89.56, "learning_rate": 7.564689019905575e-05, "loss": 2.4375, "step": 53020 }, { "epoch": 89.59, "learning_rate": 7.560835381397027e-05, "loss": 2.4225, "step": 53040 }, { "epoch": 89.63, "learning_rate": 7.556979679669666e-05, "loss": 2.4205, "step": 53060 }, { "epoch": 89.66, "learning_rate": 7.553121917829962e-05, "loss": 2.4393, "step": 53080 }, { "epoch": 89.7, "learning_rate": 7.549262098986045e-05, "loss": 2.4471, "step": 53100 }, { "epoch": 89.73, "learning_rate": 7.545400226247699e-05, "loss": 2.4393, "step": 53120 }, { "epoch": 89.76, "learning_rate": 7.541536302726367e-05, "loss": 2.428, "step": 53140 }, { "epoch": 89.8, "learning_rate": 7.537670331535138e-05, "loss": 2.4348, "step": 53160 }, { "epoch": 89.83, "learning_rate": 7.533802315788762e-05, "loss": 2.4507, "step": 53180 }, { "epoch": 89.86, "learning_rate": 7.529932258603626e-05, "loss": 2.4357, "step": 53200 }, { "epoch": 89.9, "learning_rate": 7.526060163097766e-05, "loss": 2.4426, "step": 53220 }, { "epoch": 89.93, "learning_rate": 7.522186032390857e-05, "loss": 2.4383, "step": 53240 }, { "epoch": 89.97, "learning_rate": 7.518309869604219e-05, "loss": 2.4451, "step": 53260 }, { "epoch": 90.0, "learning_rate": 7.514431677860805e-05, "loss": 2.4421, "step": 53280 }, { "epoch": 90.03, "learning_rate": 7.510551460285202e-05, "loss": 2.3711, "step": 53300 }, { "epoch": 90.07, "learning_rate": 7.506669220003637e-05, "loss": 2.3711, "step": 53320 }, { "epoch": 90.1, "learning_rate": 7.502784960143955e-05, "loss": 2.3738, "step": 53340 }, { "epoch": 90.14, "learning_rate": 7.498898683835637e-05, "loss": 2.3822, "step": 53360 }, { "epoch": 90.17, "learning_rate": 7.495010394209785e-05, "loss": 2.3925, "step": 53380 }, { "epoch": 90.2, "learning_rate": 7.491120094399124e-05, "loss": 2.3825, "step": 53400 }, { "epoch": 90.24, "learning_rate": 7.487227787537997e-05, "loss": 2.3945, "step": 53420 }, { "epoch": 90.27, "learning_rate": 7.483333476762366e-05, "loss": 2.3954, "step": 53440 }, { "epoch": 90.3, "learning_rate": 7.479437165209808e-05, "loss": 2.4023, "step": 53460 }, { "epoch": 90.34, "learning_rate": 7.475538856019511e-05, "loss": 2.393, "step": 53480 }, { "epoch": 90.37, "learning_rate": 7.47163855233227e-05, "loss": 2.3872, "step": 53500 }, { "epoch": 90.41, "learning_rate": 7.467736257290492e-05, "loss": 2.405, "step": 53520 }, { "epoch": 90.44, "learning_rate": 7.463831974038182e-05, "loss": 2.4058, "step": 53540 }, { "epoch": 90.47, "learning_rate": 7.459925705720954e-05, "loss": 2.401, "step": 53560 }, { "epoch": 90.51, "learning_rate": 7.456017455486017e-05, "loss": 2.4124, "step": 53580 }, { "epoch": 90.54, "learning_rate": 7.452107226482176e-05, "loss": 2.4088, "step": 53600 }, { "epoch": 90.57, "learning_rate": 7.448195021859834e-05, "loss": 2.4122, "step": 53620 }, { "epoch": 90.61, "learning_rate": 7.444280844770981e-05, "loss": 2.4161, "step": 53640 }, { "epoch": 90.64, "learning_rate": 7.440364698369202e-05, "loss": 2.4245, "step": 53660 }, { "epoch": 90.68, "learning_rate": 7.436446585809663e-05, "loss": 2.4139, "step": 53680 }, { "epoch": 90.71, "learning_rate": 7.432526510249117e-05, "loss": 2.4357, "step": 53700 }, { "epoch": 90.74, "learning_rate": 7.4286044748459e-05, "loss": 2.4225, "step": 53720 }, { "epoch": 90.78, "learning_rate": 7.424680482759921e-05, "loss": 2.4384, "step": 53740 }, { "epoch": 90.81, "learning_rate": 7.420754537152674e-05, "loss": 2.43, "step": 53760 }, { "epoch": 90.84, "learning_rate": 7.416826641187219e-05, "loss": 2.4221, "step": 53780 }, { "epoch": 90.88, "learning_rate": 7.412896798028194e-05, "loss": 2.4463, "step": 53800 }, { "epoch": 90.91, "learning_rate": 7.4089650108418e-05, "loss": 2.4299, "step": 53820 }, { "epoch": 90.95, "learning_rate": 7.405031282795807e-05, "loss": 2.42, "step": 53840 }, { "epoch": 90.98, "learning_rate": 7.401095617059552e-05, "loss": 2.4391, "step": 53860 }, { "epoch": 91.01, "learning_rate": 7.397158016803925e-05, "loss": 2.4055, "step": 53880 }, { "epoch": 91.05, "learning_rate": 7.393218485201383e-05, "loss": 2.3565, "step": 53900 }, { "epoch": 91.08, "learning_rate": 7.389277025425933e-05, "loss": 2.3671, "step": 53920 }, { "epoch": 91.11, "learning_rate": 7.38533364065314e-05, "loss": 2.3723, "step": 53940 }, { "epoch": 91.15, "learning_rate": 7.381388334060118e-05, "loss": 2.3535, "step": 53960 }, { "epoch": 91.18, "learning_rate": 7.377441108825526e-05, "loss": 2.3746, "step": 53980 }, { "epoch": 91.22, "learning_rate": 7.373491968129577e-05, "loss": 2.3736, "step": 54000 }, { "epoch": 91.22, "eval_loss": 2.683696985244751, "eval_runtime": 47.5331, "eval_samples_per_second": 20.807, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.003875717581254406, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03362074664529218, "eval_tse_type": 0.00017729749466352098, "step": 54000 }, { "epoch": 91.25, "learning_rate": 7.369540915154018e-05, "loss": 2.383, "step": 54020 }, { "epoch": 91.28, "learning_rate": 7.365587953082142e-05, "loss": 2.3846, "step": 54040 }, { "epoch": 91.32, "learning_rate": 7.361633085098781e-05, "loss": 2.4035, "step": 54060 }, { "epoch": 91.35, "learning_rate": 7.3576763143903e-05, "loss": 2.3946, "step": 54080 }, { "epoch": 91.39, "learning_rate": 7.353717644144598e-05, "loss": 2.3772, "step": 54100 }, { "epoch": 91.42, "learning_rate": 7.349757077551101e-05, "loss": 2.3934, "step": 54120 }, { "epoch": 91.45, "learning_rate": 7.34579461780077e-05, "loss": 2.3944, "step": 54140 }, { "epoch": 91.49, "learning_rate": 7.341830268086084e-05, "loss": 2.4028, "step": 54160 }, { "epoch": 91.52, "learning_rate": 7.33786403160105e-05, "loss": 2.3973, "step": 54180 }, { "epoch": 91.55, "learning_rate": 7.333895911541194e-05, "loss": 2.4226, "step": 54200 }, { "epoch": 91.59, "learning_rate": 7.329925911103556e-05, "loss": 2.4169, "step": 54220 }, { "epoch": 91.62, "learning_rate": 7.325954033486695e-05, "loss": 2.4138, "step": 54240 }, { "epoch": 91.66, "learning_rate": 7.321980281890682e-05, "loss": 2.3961, "step": 54260 }, { "epoch": 91.69, "learning_rate": 7.318004659517095e-05, "loss": 2.3989, "step": 54280 }, { "epoch": 91.72, "learning_rate": 7.314226088371854e-05, "loss": 2.4183, "step": 54300 }, { "epoch": 91.76, "learning_rate": 7.31024682719625e-05, "loss": 2.4142, "step": 54320 }, { "epoch": 91.79, "learning_rate": 7.306265704696504e-05, "loss": 2.4108, "step": 54340 }, { "epoch": 91.82, "learning_rate": 7.302282724080138e-05, "loss": 2.4308, "step": 54360 }, { "epoch": 91.86, "learning_rate": 7.298297888556164e-05, "loss": 2.4199, "step": 54380 }, { "epoch": 91.89, "learning_rate": 7.294311201335093e-05, "loss": 2.4239, "step": 54400 }, { "epoch": 91.93, "learning_rate": 7.290322665628928e-05, "loss": 2.414, "step": 54420 }, { "epoch": 91.96, "learning_rate": 7.286332284651159e-05, "loss": 2.423, "step": 54440 }, { "epoch": 91.99, "learning_rate": 7.282340061616766e-05, "loss": 2.4306, "step": 54460 }, { "epoch": 92.03, "learning_rate": 7.278345999742208e-05, "loss": 2.3636, "step": 54480 }, { "epoch": 92.06, "learning_rate": 7.274350102245431e-05, "loss": 2.3382, "step": 54500 }, { "epoch": 92.09, "learning_rate": 7.270352372345855e-05, "loss": 2.3477, "step": 54520 }, { "epoch": 92.13, "learning_rate": 7.266352813264378e-05, "loss": 2.361, "step": 54540 }, { "epoch": 92.16, "learning_rate": 7.262351428223378e-05, "loss": 2.3566, "step": 54560 }, { "epoch": 92.2, "learning_rate": 7.258348220446695e-05, "loss": 2.3907, "step": 54580 }, { "epoch": 92.23, "learning_rate": 7.25434319315964e-05, "loss": 2.3643, "step": 54600 }, { "epoch": 92.26, "learning_rate": 7.250336349588994e-05, "loss": 2.3844, "step": 54620 }, { "epoch": 92.3, "learning_rate": 7.246327692962996e-05, "loss": 2.3964, "step": 54640 }, { "epoch": 92.33, "learning_rate": 7.24231722651135e-05, "loss": 2.3855, "step": 54660 }, { "epoch": 92.36, "learning_rate": 7.238304953465217e-05, "loss": 2.3828, "step": 54680 }, { "epoch": 92.4, "learning_rate": 7.234290877057208e-05, "loss": 2.3719, "step": 54700 }, { "epoch": 92.43, "learning_rate": 7.230275000521398e-05, "loss": 2.3786, "step": 54720 }, { "epoch": 92.47, "learning_rate": 7.226257327093304e-05, "loss": 2.3923, "step": 54740 }, { "epoch": 92.5, "learning_rate": 7.222237860009892e-05, "loss": 2.3972, "step": 54760 }, { "epoch": 92.53, "learning_rate": 7.218216602509574e-05, "loss": 2.3888, "step": 54780 }, { "epoch": 92.57, "learning_rate": 7.214193557832206e-05, "loss": 2.3937, "step": 54800 }, { "epoch": 92.6, "learning_rate": 7.21016872921908e-05, "loss": 2.3849, "step": 54820 }, { "epoch": 92.64, "learning_rate": 7.206142119912931e-05, "loss": 2.3849, "step": 54840 }, { "epoch": 92.67, "learning_rate": 7.202113733157923e-05, "loss": 2.4095, "step": 54860 }, { "epoch": 92.7, "learning_rate": 7.198083572199657e-05, "loss": 2.4029, "step": 54880 }, { "epoch": 92.74, "learning_rate": 7.194051640285157e-05, "loss": 2.3937, "step": 54900 }, { "epoch": 92.77, "learning_rate": 7.190017940662878e-05, "loss": 2.4132, "step": 54920 }, { "epoch": 92.8, "learning_rate": 7.185982476582705e-05, "loss": 2.3987, "step": 54940 }, { "epoch": 92.84, "learning_rate": 7.181945251295931e-05, "loss": 2.4036, "step": 54960 }, { "epoch": 92.87, "learning_rate": 7.17790626805528e-05, "loss": 2.4164, "step": 54980 }, { "epoch": 92.91, "learning_rate": 7.173865530114886e-05, "loss": 2.4191, "step": 55000 }, { "epoch": 92.91, "eval_loss": 2.6698572635650635, "eval_runtime": 50.8701, "eval_samples_per_second": 19.442, "eval_steps_per_second": 0.118, "eval_tse_ndup": 0.004103632794998836, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03406290900649943, "eval_tse_type": 0.00011936861026850915, "step": 55000 }, { "epoch": 92.94, "learning_rate": 7.1698230407303e-05, "loss": 2.4115, "step": 55020 }, { "epoch": 92.97, "learning_rate": 7.16577880315848e-05, "loss": 2.4227, "step": 55040 }, { "epoch": 93.01, "learning_rate": 7.161732820657799e-05, "loss": 2.3872, "step": 55060 }, { "epoch": 93.04, "learning_rate": 7.157685096488029e-05, "loss": 2.3322, "step": 55080 }, { "epoch": 93.07, "learning_rate": 7.153635633910349e-05, "loss": 2.3608, "step": 55100 }, { "epoch": 93.11, "learning_rate": 7.149584436187338e-05, "loss": 2.3444, "step": 55120 }, { "epoch": 93.14, "learning_rate": 7.145531506582975e-05, "loss": 2.3532, "step": 55140 }, { "epoch": 93.18, "learning_rate": 7.141476848362627e-05, "loss": 2.3356, "step": 55160 }, { "epoch": 93.21, "learning_rate": 7.137420464793063e-05, "loss": 2.3647, "step": 55180 }, { "epoch": 93.24, "learning_rate": 7.133362359142439e-05, "loss": 2.3639, "step": 55200 }, { "epoch": 93.28, "learning_rate": 7.129302534680293e-05, "loss": 2.3628, "step": 55220 }, { "epoch": 93.31, "learning_rate": 7.125240994677557e-05, "loss": 2.371, "step": 55240 }, { "epoch": 93.34, "learning_rate": 7.121177742406534e-05, "loss": 2.3581, "step": 55260 }, { "epoch": 93.38, "learning_rate": 7.11711278114092e-05, "loss": 2.3809, "step": 55280 }, { "epoch": 93.41, "learning_rate": 7.113046114155777e-05, "loss": 2.3773, "step": 55300 }, { "epoch": 93.45, "learning_rate": 7.108977744727547e-05, "loss": 2.3713, "step": 55320 }, { "epoch": 93.48, "learning_rate": 7.104907676134041e-05, "loss": 2.3754, "step": 55340 }, { "epoch": 93.51, "learning_rate": 7.100835911654437e-05, "loss": 2.3823, "step": 55360 }, { "epoch": 93.55, "learning_rate": 7.096762454569289e-05, "loss": 2.3841, "step": 55380 }, { "epoch": 93.58, "learning_rate": 7.0926873081605e-05, "loss": 2.3738, "step": 55400 }, { "epoch": 93.61, "learning_rate": 7.088610475711345e-05, "loss": 2.3947, "step": 55420 }, { "epoch": 93.65, "learning_rate": 7.084531960506456e-05, "loss": 2.3839, "step": 55440 }, { "epoch": 93.68, "learning_rate": 7.080451765831817e-05, "loss": 2.3935, "step": 55460 }, { "epoch": 93.72, "learning_rate": 7.076369894974768e-05, "loss": 2.4017, "step": 55480 }, { "epoch": 93.75, "learning_rate": 7.072286351223999e-05, "loss": 2.3954, "step": 55500 }, { "epoch": 93.78, "learning_rate": 7.068201137869546e-05, "loss": 2.4097, "step": 55520 }, { "epoch": 93.82, "learning_rate": 7.064114258202792e-05, "loss": 2.3989, "step": 55540 }, { "epoch": 93.85, "learning_rate": 7.060025715516463e-05, "loss": 2.4131, "step": 55560 }, { "epoch": 93.89, "learning_rate": 7.055935513104623e-05, "loss": 2.3959, "step": 55580 }, { "epoch": 93.92, "learning_rate": 7.051843654262676e-05, "loss": 2.4057, "step": 55600 }, { "epoch": 93.95, "learning_rate": 7.047750142287356e-05, "loss": 2.4038, "step": 55620 }, { "epoch": 93.99, "learning_rate": 7.043654980476735e-05, "loss": 2.397, "step": 55640 }, { "epoch": 94.02, "learning_rate": 7.039558172130208e-05, "loss": 2.3503, "step": 55660 }, { "epoch": 94.05, "learning_rate": 7.0354597205485e-05, "loss": 2.3315, "step": 55680 }, { "epoch": 94.09, "learning_rate": 7.031359629033661e-05, "loss": 2.3534, "step": 55700 }, { "epoch": 94.12, "learning_rate": 7.027257900889059e-05, "loss": 2.3252, "step": 55720 }, { "epoch": 94.16, "learning_rate": 7.023154539419384e-05, "loss": 2.3405, "step": 55740 }, { "epoch": 94.19, "learning_rate": 7.019049547930638e-05, "loss": 2.3479, "step": 55760 }, { "epoch": 94.22, "learning_rate": 7.01494292973014e-05, "loss": 2.3548, "step": 55780 }, { "epoch": 94.26, "learning_rate": 7.010834688126518e-05, "loss": 2.3475, "step": 55800 }, { "epoch": 94.29, "learning_rate": 7.006724826429706e-05, "loss": 2.3467, "step": 55820 }, { "epoch": 94.32, "learning_rate": 7.002613347950949e-05, "loss": 2.3527, "step": 55840 }, { "epoch": 94.36, "learning_rate": 6.998500256002789e-05, "loss": 2.3518, "step": 55860 }, { "epoch": 94.39, "learning_rate": 6.994385553899069e-05, "loss": 2.3593, "step": 55880 }, { "epoch": 94.43, "learning_rate": 6.990269244954933e-05, "loss": 2.3759, "step": 55900 }, { "epoch": 94.46, "learning_rate": 6.986151332486813e-05, "loss": 2.3658, "step": 55920 }, { "epoch": 94.49, "learning_rate": 6.98203181981244e-05, "loss": 2.3621, "step": 55940 }, { "epoch": 94.53, "learning_rate": 6.97791071025083e-05, "loss": 2.3945, "step": 55960 }, { "epoch": 94.56, "learning_rate": 6.973788007122283e-05, "loss": 2.3772, "step": 55980 }, { "epoch": 94.59, "learning_rate": 6.969663713748392e-05, "loss": 2.3798, "step": 56000 }, { "epoch": 94.59, "eval_loss": 2.668010711669922, "eval_runtime": 47.8955, "eval_samples_per_second": 20.649, "eval_steps_per_second": 0.125, "eval_tse_ndup": 0.005050061496517336, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.034623850337323764, "eval_tse_type": 0.00020538422649140546, "step": 56000 }, { "epoch": 94.63, "learning_rate": 6.965537833452024e-05, "loss": 2.3747, "step": 56020 }, { "epoch": 94.66, "learning_rate": 6.961410369557323e-05, "loss": 2.368, "step": 56040 }, { "epoch": 94.7, "learning_rate": 6.957281325389717e-05, "loss": 2.3824, "step": 56060 }, { "epoch": 94.73, "learning_rate": 6.9531507042759e-05, "loss": 2.3905, "step": 56080 }, { "epoch": 94.76, "learning_rate": 6.949018509543835e-05, "loss": 2.3951, "step": 56100 }, { "epoch": 94.8, "learning_rate": 6.944884744522764e-05, "loss": 2.3906, "step": 56120 }, { "epoch": 94.83, "learning_rate": 6.940749412543181e-05, "loss": 2.3845, "step": 56140 }, { "epoch": 94.86, "learning_rate": 6.936612516936852e-05, "loss": 2.3946, "step": 56160 }, { "epoch": 94.9, "learning_rate": 6.932474061036797e-05, "loss": 2.3913, "step": 56180 }, { "epoch": 94.93, "learning_rate": 6.928334048177296e-05, "loss": 2.3949, "step": 56200 }, { "epoch": 94.97, "learning_rate": 6.924192481693882e-05, "loss": 2.4153, "step": 56220 }, { "epoch": 95.0, "learning_rate": 6.920049364923342e-05, "loss": 2.3889, "step": 56240 }, { "epoch": 95.03, "learning_rate": 6.915904701203705e-05, "loss": 2.3298, "step": 56260 }, { "epoch": 95.07, "learning_rate": 6.911758493874258e-05, "loss": 2.3281, "step": 56280 }, { "epoch": 95.1, "learning_rate": 6.907610746275523e-05, "loss": 2.323, "step": 56300 }, { "epoch": 95.14, "learning_rate": 6.903461461749266e-05, "loss": 2.3401, "step": 56320 }, { "epoch": 95.17, "learning_rate": 6.89931064363849e-05, "loss": 2.3285, "step": 56340 }, { "epoch": 95.2, "learning_rate": 6.895365948996552e-05, "loss": 2.3299, "step": 56360 }, { "epoch": 95.24, "learning_rate": 6.891212150015955e-05, "loss": 2.3394, "step": 56380 }, { "epoch": 95.27, "learning_rate": 6.887056827319885e-05, "loss": 2.34, "step": 56400 }, { "epoch": 95.3, "learning_rate": 6.882899984256216e-05, "loss": 2.369, "step": 56420 }, { "epoch": 95.34, "learning_rate": 6.878741624174039e-05, "loss": 2.3588, "step": 56440 }, { "epoch": 95.37, "learning_rate": 6.87458175042367e-05, "loss": 2.3518, "step": 56460 }, { "epoch": 95.41, "learning_rate": 6.870420366356642e-05, "loss": 2.3542, "step": 56480 }, { "epoch": 95.44, "learning_rate": 6.86625747532571e-05, "loss": 2.3635, "step": 56500 }, { "epoch": 95.47, "learning_rate": 6.862093080684838e-05, "loss": 2.3609, "step": 56520 }, { "epoch": 95.51, "learning_rate": 6.857927185789204e-05, "loss": 2.3498, "step": 56540 }, { "epoch": 95.54, "learning_rate": 6.853759793995196e-05, "loss": 2.3651, "step": 56560 }, { "epoch": 95.57, "learning_rate": 6.849590908660404e-05, "loss": 2.3662, "step": 56580 }, { "epoch": 95.61, "learning_rate": 6.845420533143627e-05, "loss": 2.3717, "step": 56600 }, { "epoch": 95.64, "learning_rate": 6.841248670804853e-05, "loss": 2.3478, "step": 56620 }, { "epoch": 95.68, "learning_rate": 6.837075325005286e-05, "loss": 2.3753, "step": 56640 }, { "epoch": 95.71, "learning_rate": 6.832900499107311e-05, "loss": 2.3746, "step": 56660 }, { "epoch": 95.74, "learning_rate": 6.82872419647451e-05, "loss": 2.3741, "step": 56680 }, { "epoch": 95.78, "learning_rate": 6.824546420471653e-05, "loss": 2.377, "step": 56700 }, { "epoch": 95.81, "learning_rate": 6.820367174464703e-05, "loss": 2.3812, "step": 56720 }, { "epoch": 95.84, "learning_rate": 6.816186461820798e-05, "loss": 2.3846, "step": 56740 }, { "epoch": 95.88, "learning_rate": 6.812004285908266e-05, "loss": 2.4012, "step": 56760 }, { "epoch": 95.91, "learning_rate": 6.807820650096609e-05, "loss": 2.3778, "step": 56780 }, { "epoch": 95.95, "learning_rate": 6.803635557756507e-05, "loss": 2.3783, "step": 56800 }, { "epoch": 95.98, "learning_rate": 6.799449012259816e-05, "loss": 2.3772, "step": 56820 }, { "epoch": 96.01, "learning_rate": 6.795261016979555e-05, "loss": 2.3434, "step": 56840 }, { "epoch": 96.05, "learning_rate": 6.791071575289922e-05, "loss": 2.3278, "step": 56860 }, { "epoch": 96.08, "learning_rate": 6.786880690566268e-05, "loss": 2.3018, "step": 56880 }, { "epoch": 96.11, "learning_rate": 6.78268836618512e-05, "loss": 2.3097, "step": 56900 }, { "epoch": 96.15, "learning_rate": 6.778494605524151e-05, "loss": 2.3278, "step": 56920 }, { "epoch": 96.18, "learning_rate": 6.774299411962203e-05, "loss": 2.3357, "step": 56940 }, { "epoch": 96.22, "learning_rate": 6.770102788879267e-05, "loss": 2.3269, "step": 56960 }, { "epoch": 96.25, "learning_rate": 6.765904739656486e-05, "loss": 2.3179, "step": 56980 }, { "epoch": 96.28, "learning_rate": 6.761705267676153e-05, "loss": 2.3506, "step": 57000 }, { "epoch": 96.28, "eval_loss": 2.652446985244751, "eval_runtime": 47.4592, "eval_samples_per_second": 20.839, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.004419710815808129, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.032274585313776694, "eval_tse_type": 0.00025804684866868893, "step": 57000 }, { "epoch": 96.32, "learning_rate": 6.757504376321704e-05, "loss": 2.333, "step": 57020 }, { "epoch": 96.35, "learning_rate": 6.753302068977725e-05, "loss": 2.3232, "step": 57040 }, { "epoch": 96.39, "learning_rate": 6.749098349029935e-05, "loss": 2.3507, "step": 57060 }, { "epoch": 96.42, "learning_rate": 6.7448932198652e-05, "loss": 2.3556, "step": 57080 }, { "epoch": 96.45, "learning_rate": 6.740686684871515e-05, "loss": 2.3533, "step": 57100 }, { "epoch": 96.49, "learning_rate": 6.736478747438007e-05, "loss": 2.3587, "step": 57120 }, { "epoch": 96.52, "learning_rate": 6.732269410954938e-05, "loss": 2.3467, "step": 57140 }, { "epoch": 96.55, "learning_rate": 6.728058678813694e-05, "loss": 2.3415, "step": 57160 }, { "epoch": 96.59, "learning_rate": 6.723846554406782e-05, "loss": 2.3442, "step": 57180 }, { "epoch": 96.62, "learning_rate": 6.719633041127839e-05, "loss": 2.3605, "step": 57200 }, { "epoch": 96.66, "learning_rate": 6.715418142371614e-05, "loss": 2.365, "step": 57220 }, { "epoch": 96.69, "learning_rate": 6.711201861533978e-05, "loss": 2.3759, "step": 57240 }, { "epoch": 96.72, "learning_rate": 6.70698420201191e-05, "loss": 2.3712, "step": 57260 }, { "epoch": 96.76, "learning_rate": 6.7027651672035e-05, "loss": 2.3813, "step": 57280 }, { "epoch": 96.79, "learning_rate": 6.698544760507952e-05, "loss": 2.3772, "step": 57300 }, { "epoch": 96.82, "learning_rate": 6.694322985325569e-05, "loss": 2.3764, "step": 57320 }, { "epoch": 96.86, "learning_rate": 6.69009984505776e-05, "loss": 2.365, "step": 57340 }, { "epoch": 96.89, "learning_rate": 6.685875343107033e-05, "loss": 2.3746, "step": 57360 }, { "epoch": 96.93, "learning_rate": 6.681649482876994e-05, "loss": 2.3786, "step": 57380 }, { "epoch": 96.96, "learning_rate": 6.677422267772338e-05, "loss": 2.3717, "step": 57400 }, { "epoch": 96.99, "learning_rate": 6.673193701198862e-05, "loss": 2.3775, "step": 57420 }, { "epoch": 97.03, "learning_rate": 6.66896378656344e-05, "loss": 2.3134, "step": 57440 }, { "epoch": 97.06, "learning_rate": 6.664732527274041e-05, "loss": 2.3173, "step": 57460 }, { "epoch": 97.09, "learning_rate": 6.660499926739714e-05, "loss": 2.3021, "step": 57480 }, { "epoch": 97.13, "learning_rate": 6.656265988370588e-05, "loss": 2.318, "step": 57500 }, { "epoch": 97.16, "learning_rate": 6.652030715577871e-05, "loss": 2.3125, "step": 57520 }, { "epoch": 97.2, "learning_rate": 6.647794111773843e-05, "loss": 2.3126, "step": 57540 }, { "epoch": 97.23, "learning_rate": 6.643556180371866e-05, "loss": 2.3302, "step": 57560 }, { "epoch": 97.26, "learning_rate": 6.63931692478636e-05, "loss": 2.3208, "step": 57580 }, { "epoch": 97.3, "learning_rate": 6.635076348432815e-05, "loss": 2.3349, "step": 57600 }, { "epoch": 97.33, "learning_rate": 6.630834454727792e-05, "loss": 2.34, "step": 57620 }, { "epoch": 97.36, "learning_rate": 6.626591247088903e-05, "loss": 2.3404, "step": 57640 }, { "epoch": 97.4, "learning_rate": 6.622346728934827e-05, "loss": 2.3362, "step": 57660 }, { "epoch": 97.43, "learning_rate": 6.618100903685294e-05, "loss": 2.3237, "step": 57680 }, { "epoch": 97.47, "learning_rate": 6.61385377476109e-05, "loss": 2.3294, "step": 57700 }, { "epoch": 97.5, "learning_rate": 6.609605345584047e-05, "loss": 2.3433, "step": 57720 }, { "epoch": 97.53, "learning_rate": 6.605355619577054e-05, "loss": 2.3468, "step": 57740 }, { "epoch": 97.57, "learning_rate": 6.601104600164032e-05, "loss": 2.3455, "step": 57760 }, { "epoch": 97.6, "learning_rate": 6.596852290769952e-05, "loss": 2.3537, "step": 57780 }, { "epoch": 97.64, "learning_rate": 6.592598694820826e-05, "loss": 2.3462, "step": 57800 }, { "epoch": 97.67, "learning_rate": 6.588343815743697e-05, "loss": 2.3587, "step": 57820 }, { "epoch": 97.7, "learning_rate": 6.584087656966644e-05, "loss": 2.3575, "step": 57840 }, { "epoch": 97.74, "learning_rate": 6.57983022191878e-05, "loss": 2.3752, "step": 57860 }, { "epoch": 97.77, "learning_rate": 6.575571514030239e-05, "loss": 2.3578, "step": 57880 }, { "epoch": 97.8, "learning_rate": 6.571311536732188e-05, "loss": 2.3469, "step": 57900 }, { "epoch": 97.84, "learning_rate": 6.567050293456812e-05, "loss": 2.3702, "step": 57920 }, { "epoch": 97.87, "learning_rate": 6.562787787637321e-05, "loss": 2.3785, "step": 57940 }, { "epoch": 97.91, "learning_rate": 6.558524022707935e-05, "loss": 2.3621, "step": 57960 }, { "epoch": 97.94, "learning_rate": 6.554259002103895e-05, "loss": 2.3432, "step": 57980 }, { "epoch": 97.97, "learning_rate": 6.549992729261451e-05, "loss": 2.3689, "step": 58000 }, { "epoch": 97.97, "eval_loss": 2.6414411067962646, "eval_runtime": 50.7321, "eval_samples_per_second": 19.495, "eval_steps_per_second": 0.118, "eval_tse_ndup": 0.004525041405393203, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03392460574295638, "eval_tse_type": 0.00019714079755148143, "step": 58000 }, { "epoch": 98.01, "learning_rate": 6.54572520761786e-05, "loss": 2.3433, "step": 58020 }, { "epoch": 98.04, "learning_rate": 6.54145644061139e-05, "loss": 2.2908, "step": 58040 }, { "epoch": 98.07, "learning_rate": 6.537186431681306e-05, "loss": 2.2987, "step": 58060 }, { "epoch": 98.11, "learning_rate": 6.532915184267881e-05, "loss": 2.3084, "step": 58080 }, { "epoch": 98.14, "learning_rate": 6.528642701812378e-05, "loss": 2.3045, "step": 58100 }, { "epoch": 98.18, "learning_rate": 6.524368987757061e-05, "loss": 2.3313, "step": 58120 }, { "epoch": 98.21, "learning_rate": 6.520094045545184e-05, "loss": 2.3024, "step": 58140 }, { "epoch": 98.24, "learning_rate": 6.515817878620992e-05, "loss": 2.3164, "step": 58160 }, { "epoch": 98.28, "learning_rate": 6.511540490429713e-05, "loss": 2.3052, "step": 58180 }, { "epoch": 98.31, "learning_rate": 6.507261884417561e-05, "loss": 2.3216, "step": 58200 }, { "epoch": 98.34, "learning_rate": 6.502982064031736e-05, "loss": 2.322, "step": 58220 }, { "epoch": 98.38, "learning_rate": 6.498701032720406e-05, "loss": 2.3298, "step": 58240 }, { "epoch": 98.41, "learning_rate": 6.494418793932728e-05, "loss": 2.3179, "step": 58260 }, { "epoch": 98.45, "learning_rate": 6.490135351118817e-05, "loss": 2.3315, "step": 58280 }, { "epoch": 98.48, "learning_rate": 6.485850707729771e-05, "loss": 2.34, "step": 58300 }, { "epoch": 98.51, "learning_rate": 6.481564867217646e-05, "loss": 2.3419, "step": 58320 }, { "epoch": 98.55, "learning_rate": 6.477277833035467e-05, "loss": 2.3288, "step": 58340 }, { "epoch": 98.58, "learning_rate": 6.472989608637221e-05, "loss": 2.3389, "step": 58360 }, { "epoch": 98.61, "learning_rate": 6.468700197477853e-05, "loss": 2.3366, "step": 58380 }, { "epoch": 98.65, "learning_rate": 6.464409603013264e-05, "loss": 2.3486, "step": 58400 }, { "epoch": 98.68, "learning_rate": 6.46011782870031e-05, "loss": 2.3382, "step": 58420 }, { "epoch": 98.72, "learning_rate": 6.455824877996793e-05, "loss": 2.34, "step": 58440 }, { "epoch": 98.75, "learning_rate": 6.451530754361465e-05, "loss": 2.3462, "step": 58460 }, { "epoch": 98.78, "learning_rate": 6.447235461254029e-05, "loss": 2.3459, "step": 58480 }, { "epoch": 98.82, "learning_rate": 6.442939002135118e-05, "loss": 2.3578, "step": 58500 }, { "epoch": 98.85, "learning_rate": 6.43864138046632e-05, "loss": 2.3432, "step": 58520 }, { "epoch": 98.89, "learning_rate": 6.434342599710145e-05, "loss": 2.3605, "step": 58540 }, { "epoch": 98.92, "learning_rate": 6.430042663330046e-05, "loss": 2.3593, "step": 58560 }, { "epoch": 98.95, "learning_rate": 6.425741574790402e-05, "loss": 2.3643, "step": 58580 }, { "epoch": 98.99, "learning_rate": 6.421439337556523e-05, "loss": 2.3728, "step": 58600 }, { "epoch": 99.02, "learning_rate": 6.417135955094644e-05, "loss": 2.3148, "step": 58620 }, { "epoch": 99.05, "learning_rate": 6.412831430871922e-05, "loss": 2.2854, "step": 58640 }, { "epoch": 99.09, "learning_rate": 6.408525768356435e-05, "loss": 2.2796, "step": 58660 }, { "epoch": 99.12, "learning_rate": 6.404218971017179e-05, "loss": 2.2844, "step": 58680 }, { "epoch": 99.16, "learning_rate": 6.399911042324059e-05, "loss": 2.309, "step": 58700 }, { "epoch": 99.19, "learning_rate": 6.395601985747899e-05, "loss": 2.3022, "step": 58720 }, { "epoch": 99.22, "learning_rate": 6.391291804760427e-05, "loss": 2.3183, "step": 58740 }, { "epoch": 99.26, "learning_rate": 6.386980502834277e-05, "loss": 2.3138, "step": 58760 }, { "epoch": 99.29, "learning_rate": 6.382668083442989e-05, "loss": 2.3145, "step": 58780 }, { "epoch": 99.32, "learning_rate": 6.378354550060997e-05, "loss": 2.3048, "step": 58800 }, { "epoch": 99.36, "learning_rate": 6.374039906163642e-05, "loss": 2.3131, "step": 58820 }, { "epoch": 99.39, "learning_rate": 6.369724155227152e-05, "loss": 2.3216, "step": 58840 }, { "epoch": 99.43, "learning_rate": 6.365407300728653e-05, "loss": 2.3241, "step": 58860 }, { "epoch": 99.46, "learning_rate": 6.361089346146152e-05, "loss": 2.3188, "step": 58880 }, { "epoch": 99.49, "learning_rate": 6.356770294958549e-05, "loss": 2.3232, "step": 58900 }, { "epoch": 99.53, "learning_rate": 6.352450150645626e-05, "loss": 2.3165, "step": 58920 }, { "epoch": 99.56, "learning_rate": 6.348128916688045e-05, "loss": 2.3247, "step": 58940 }, { "epoch": 99.59, "learning_rate": 6.343806596567345e-05, "loss": 2.3515, "step": 58960 }, { "epoch": 99.63, "learning_rate": 6.339483193765941e-05, "loss": 2.3195, "step": 58980 }, { "epoch": 99.66, "learning_rate": 6.335158711767125e-05, "loss": 2.3521, "step": 59000 }, { "epoch": 99.66, "eval_loss": 2.6360507011413574, "eval_runtime": 47.5023, "eval_samples_per_second": 20.82, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.0036123021520311147, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03374268969161762, "eval_tse_type": 0.00028112071577424743, "step": 59000 }, { "epoch": 99.7, "learning_rate": 6.330833154055049e-05, "loss": 2.3336, "step": 59020 }, { "epoch": 99.73, "learning_rate": 6.326506524114739e-05, "loss": 2.3363, "step": 59040 }, { "epoch": 99.76, "learning_rate": 6.322178825432082e-05, "loss": 2.3469, "step": 59060 }, { "epoch": 99.8, "learning_rate": 6.317850061493827e-05, "loss": 2.3395, "step": 59080 }, { "epoch": 99.83, "learning_rate": 6.31352023578758e-05, "loss": 2.3385, "step": 59100 }, { "epoch": 99.86, "learning_rate": 6.309189351801805e-05, "loss": 2.3523, "step": 59120 }, { "epoch": 99.9, "learning_rate": 6.304857413025816e-05, "loss": 2.3339, "step": 59140 }, { "epoch": 99.93, "learning_rate": 6.300524422949776e-05, "loss": 2.3516, "step": 59160 }, { "epoch": 99.97, "learning_rate": 6.2961903850647e-05, "loss": 2.352, "step": 59180 }, { "epoch": 100.0, "learning_rate": 6.291855302862442e-05, "loss": 2.3407, "step": 59200 }, { "epoch": 100.03, "learning_rate": 6.287519179835702e-05, "loss": 2.2646, "step": 59220 }, { "epoch": 100.07, "learning_rate": 6.283182019478013e-05, "loss": 2.2781, "step": 59240 }, { "epoch": 100.1, "learning_rate": 6.278843825283749e-05, "loss": 2.2856, "step": 59260 }, { "epoch": 100.14, "learning_rate": 6.274504600748112e-05, "loss": 2.2917, "step": 59280 }, { "epoch": 100.17, "learning_rate": 6.27016434936714e-05, "loss": 2.3064, "step": 59300 }, { "epoch": 100.2, "learning_rate": 6.265823074637692e-05, "loss": 2.3053, "step": 59320 }, { "epoch": 100.24, "learning_rate": 6.261480780057458e-05, "loss": 2.2988, "step": 59340 }, { "epoch": 100.27, "learning_rate": 6.257137469124944e-05, "loss": 2.307, "step": 59360 }, { "epoch": 100.3, "learning_rate": 6.252793145339477e-05, "loss": 2.2935, "step": 59380 }, { "epoch": 100.34, "learning_rate": 6.248447812201201e-05, "loss": 2.2814, "step": 59400 }, { "epoch": 100.37, "learning_rate": 6.244101473211072e-05, "loss": 2.299, "step": 59420 }, { "epoch": 100.41, "learning_rate": 6.23975413187086e-05, "loss": 2.3196, "step": 59440 }, { "epoch": 100.44, "learning_rate": 6.235405791683134e-05, "loss": 2.3243, "step": 59460 }, { "epoch": 100.47, "learning_rate": 6.231056456151278e-05, "loss": 2.3026, "step": 59480 }, { "epoch": 100.51, "learning_rate": 6.226706128779468e-05, "loss": 2.3176, "step": 59500 }, { "epoch": 100.54, "learning_rate": 6.222354813072689e-05, "loss": 2.317, "step": 59520 }, { "epoch": 100.57, "learning_rate": 6.218002512536714e-05, "loss": 2.3105, "step": 59540 }, { "epoch": 100.61, "learning_rate": 6.213649230678116e-05, "loss": 2.323, "step": 59560 }, { "epoch": 100.64, "learning_rate": 6.209294971004253e-05, "loss": 2.3389, "step": 59580 }, { "epoch": 100.68, "learning_rate": 6.204939737023275e-05, "loss": 2.3472, "step": 59600 }, { "epoch": 100.71, "learning_rate": 6.200583532244114e-05, "loss": 2.3281, "step": 59620 }, { "epoch": 100.74, "learning_rate": 6.196226360176486e-05, "loss": 2.3392, "step": 59640 }, { "epoch": 100.78, "learning_rate": 6.191868224330886e-05, "loss": 2.3325, "step": 59660 }, { "epoch": 100.81, "learning_rate": 6.187509128218586e-05, "loss": 2.334, "step": 59680 }, { "epoch": 100.84, "learning_rate": 6.183149075351631e-05, "loss": 2.329, "step": 59700 }, { "epoch": 100.88, "learning_rate": 6.178788069242835e-05, "loss": 2.3408, "step": 59720 }, { "epoch": 100.91, "learning_rate": 6.174426113405783e-05, "loss": 2.3437, "step": 59740 }, { "epoch": 100.95, "learning_rate": 6.170063211354825e-05, "loss": 2.3345, "step": 59760 }, { "epoch": 100.98, "learning_rate": 6.165699366605072e-05, "loss": 2.3379, "step": 59780 }, { "epoch": 101.01, "learning_rate": 6.161334582672393e-05, "loss": 2.2955, "step": 59800 }, { "epoch": 101.05, "learning_rate": 6.156968863073417e-05, "loss": 2.2768, "step": 59820 }, { "epoch": 101.08, "learning_rate": 6.15282056599715e-05, "loss": 2.2786, "step": 59840 }, { "epoch": 101.11, "learning_rate": 6.148453031966447e-05, "loss": 2.272, "step": 59860 }, { "epoch": 101.15, "learning_rate": 6.14408457264788e-05, "loss": 2.2834, "step": 59880 }, { "epoch": 101.18, "learning_rate": 6.139715191561038e-05, "loss": 2.2731, "step": 59900 }, { "epoch": 101.22, "learning_rate": 6.135344892226253e-05, "loss": 2.2926, "step": 59920 }, { "epoch": 101.25, "learning_rate": 6.130973678164593e-05, "loss": 2.2875, "step": 59940 }, { "epoch": 101.28, "learning_rate": 6.126601552897869e-05, "loss": 2.2858, "step": 59960 }, { "epoch": 101.32, "learning_rate": 6.122228519948622e-05, "loss": 2.2891, "step": 59980 }, { "epoch": 101.35, "learning_rate": 6.117854582840129e-05, "loss": 2.2937, "step": 60000 }, { "epoch": 101.35, "eval_loss": 2.6247527599334717, "eval_runtime": 47.4643, "eval_samples_per_second": 20.837, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.004146178190892868, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.033031864075150866, "eval_tse_type": 0.00033554435769788256, "step": 60000 }, { "epoch": 101.39, "learning_rate": 6.113479745096387e-05, "loss": 2.3096, "step": 60020 }, { "epoch": 101.42, "learning_rate": 6.109104010242128e-05, "loss": 2.3123, "step": 60040 }, { "epoch": 101.45, "learning_rate": 6.104727381802798e-05, "loss": 2.3019, "step": 60060 }, { "epoch": 101.49, "learning_rate": 6.100349863304573e-05, "loss": 2.2997, "step": 60080 }, { "epoch": 101.52, "learning_rate": 6.0959714582743364e-05, "loss": 2.3064, "step": 60100 }, { "epoch": 101.55, "learning_rate": 6.0915921702396916e-05, "loss": 2.319, "step": 60120 }, { "epoch": 101.59, "learning_rate": 6.0872120027289536e-05, "loss": 2.3174, "step": 60140 }, { "epoch": 101.62, "learning_rate": 6.08283095927114e-05, "loss": 2.3142, "step": 60160 }, { "epoch": 101.66, "learning_rate": 6.078449043395982e-05, "loss": 2.3278, "step": 60180 }, { "epoch": 101.69, "learning_rate": 6.074066258633908e-05, "loss": 2.3256, "step": 60200 }, { "epoch": 101.72, "learning_rate": 6.0696826085160505e-05, "loss": 2.3238, "step": 60220 }, { "epoch": 101.76, "learning_rate": 6.065298096574235e-05, "loss": 2.3055, "step": 60240 }, { "epoch": 101.79, "learning_rate": 6.060912726340986e-05, "loss": 2.3252, "step": 60260 }, { "epoch": 101.82, "learning_rate": 6.0565265013495144e-05, "loss": 2.3365, "step": 60280 }, { "epoch": 101.86, "learning_rate": 6.052139425133724e-05, "loss": 2.3171, "step": 60300 }, { "epoch": 101.89, "learning_rate": 6.0477515012282024e-05, "loss": 2.3257, "step": 60320 }, { "epoch": 101.93, "learning_rate": 6.043362733168223e-05, "loss": 2.3344, "step": 60340 }, { "epoch": 101.96, "learning_rate": 6.038973124489733e-05, "loss": 2.3307, "step": 60360 }, { "epoch": 101.99, "learning_rate": 6.034582678729362e-05, "loss": 2.3429, "step": 60380 }, { "epoch": 102.03, "learning_rate": 6.0301913994244165e-05, "loss": 2.2761, "step": 60400 }, { "epoch": 102.06, "learning_rate": 6.0257992901128655e-05, "loss": 2.2577, "step": 60420 }, { "epoch": 102.09, "learning_rate": 6.0214063543333555e-05, "loss": 2.2687, "step": 60440 }, { "epoch": 102.13, "learning_rate": 6.0170125956251934e-05, "loss": 2.2709, "step": 60460 }, { "epoch": 102.16, "learning_rate": 6.0126180175283554e-05, "loss": 2.273, "step": 60480 }, { "epoch": 102.2, "learning_rate": 6.0082226235834684e-05, "loss": 2.2673, "step": 60500 }, { "epoch": 102.23, "learning_rate": 6.003826417331825e-05, "loss": 2.2841, "step": 60520 }, { "epoch": 102.26, "learning_rate": 5.999429402315367e-05, "loss": 2.2968, "step": 60540 }, { "epoch": 102.3, "learning_rate": 5.995031582076693e-05, "loss": 2.271, "step": 60560 }, { "epoch": 102.33, "learning_rate": 5.990632960159046e-05, "loss": 2.2915, "step": 60580 }, { "epoch": 102.36, "learning_rate": 5.9862335401063155e-05, "loss": 2.3, "step": 60600 }, { "epoch": 102.4, "learning_rate": 5.981833325463034e-05, "loss": 2.3028, "step": 60620 }, { "epoch": 102.43, "learning_rate": 5.9774323197743776e-05, "loss": 2.2911, "step": 60640 }, { "epoch": 102.47, "learning_rate": 5.9730305265861565e-05, "loss": 2.2932, "step": 60660 }, { "epoch": 102.5, "learning_rate": 5.9686279494448125e-05, "loss": 2.3085, "step": 60680 }, { "epoch": 102.53, "learning_rate": 5.964224591897428e-05, "loss": 2.2809, "step": 60700 }, { "epoch": 102.57, "learning_rate": 5.959820457491704e-05, "loss": 2.3003, "step": 60720 }, { "epoch": 102.6, "learning_rate": 5.955415549775974e-05, "loss": 2.2935, "step": 60740 }, { "epoch": 102.64, "learning_rate": 5.9510098722991924e-05, "loss": 2.3131, "step": 60760 }, { "epoch": 102.67, "learning_rate": 5.946603428610935e-05, "loss": 2.3157, "step": 60780 }, { "epoch": 102.7, "learning_rate": 5.9421962222613924e-05, "loss": 2.3178, "step": 60800 }, { "epoch": 102.74, "learning_rate": 5.937788256801371e-05, "loss": 2.317, "step": 60820 }, { "epoch": 102.77, "learning_rate": 5.9333795357822906e-05, "loss": 2.3317, "step": 60840 }, { "epoch": 102.8, "learning_rate": 5.928970062756176e-05, "loss": 2.3107, "step": 60860 }, { "epoch": 102.84, "learning_rate": 5.924559841275661e-05, "loss": 2.325, "step": 60880 }, { "epoch": 102.87, "learning_rate": 5.920148874893982e-05, "loss": 2.3346, "step": 60900 }, { "epoch": 102.91, "learning_rate": 5.915737167164975e-05, "loss": 2.3138, "step": 60920 }, { "epoch": 102.94, "learning_rate": 5.9113247216430725e-05, "loss": 2.3153, "step": 60940 }, { "epoch": 102.97, "learning_rate": 5.906911541883302e-05, "loss": 2.3074, "step": 60960 }, { "epoch": 103.01, "learning_rate": 5.902497631441283e-05, "loss": 2.3062, "step": 60980 }, { "epoch": 103.04, "learning_rate": 5.898082993873223e-05, "loss": 2.2662, "step": 61000 }, { "epoch": 103.04, "eval_loss": 2.6157455444335938, "eval_runtime": 47.4833, "eval_samples_per_second": 20.828, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.004475091648603663, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.033247145760106664, "eval_tse_type": 0.00043183350185372433, "step": 61000 }, { "epoch": 103.07, "learning_rate": 5.8936676327359154e-05, "loss": 2.2542, "step": 61020 }, { "epoch": 103.11, "learning_rate": 5.88925155158674e-05, "loss": 2.2555, "step": 61040 }, { "epoch": 103.14, "learning_rate": 5.884834753983649e-05, "loss": 2.2724, "step": 61060 }, { "epoch": 103.18, "learning_rate": 5.880417243485179e-05, "loss": 2.2722, "step": 61080 }, { "epoch": 103.21, "learning_rate": 5.8759990236504405e-05, "loss": 2.272, "step": 61100 }, { "epoch": 103.24, "learning_rate": 5.8715800980391086e-05, "loss": 2.2718, "step": 61120 }, { "epoch": 103.28, "learning_rate": 5.867160470211436e-05, "loss": 2.2786, "step": 61140 }, { "epoch": 103.31, "learning_rate": 5.8627401437282334e-05, "loss": 2.2754, "step": 61160 }, { "epoch": 103.34, "learning_rate": 5.858319122150881e-05, "loss": 2.2702, "step": 61180 }, { "epoch": 103.38, "learning_rate": 5.853897409041314e-05, "loss": 2.2912, "step": 61200 }, { "epoch": 103.41, "learning_rate": 5.849475007962031e-05, "loss": 2.2892, "step": 61220 }, { "epoch": 103.45, "learning_rate": 5.8450519224760746e-05, "loss": 2.2952, "step": 61240 }, { "epoch": 103.48, "learning_rate": 5.840628156147049e-05, "loss": 2.2924, "step": 61260 }, { "epoch": 103.51, "learning_rate": 5.8362037125391e-05, "loss": 2.3042, "step": 61280 }, { "epoch": 103.55, "learning_rate": 5.831778595216924e-05, "loss": 2.2761, "step": 61300 }, { "epoch": 103.58, "learning_rate": 5.8273528077457585e-05, "loss": 2.3046, "step": 61320 }, { "epoch": 103.61, "learning_rate": 5.822926353691378e-05, "loss": 2.3016, "step": 61340 }, { "epoch": 103.65, "learning_rate": 5.818499236620101e-05, "loss": 2.3065, "step": 61360 }, { "epoch": 103.68, "learning_rate": 5.81407146009877e-05, "loss": 2.3009, "step": 61380 }, { "epoch": 103.72, "learning_rate": 5.80964302769477e-05, "loss": 2.3058, "step": 61400 }, { "epoch": 103.75, "learning_rate": 5.805213942976004e-05, "loss": 2.2974, "step": 61420 }, { "epoch": 103.78, "learning_rate": 5.80078420951091e-05, "loss": 2.3086, "step": 61440 }, { "epoch": 103.82, "learning_rate": 5.7963538308684406e-05, "loss": 2.3129, "step": 61460 }, { "epoch": 103.85, "learning_rate": 5.791922810618075e-05, "loss": 2.3136, "step": 61480 }, { "epoch": 103.89, "learning_rate": 5.787491152329804e-05, "loss": 2.295, "step": 61500 }, { "epoch": 103.92, "learning_rate": 5.783058859574136e-05, "loss": 2.2978, "step": 61520 }, { "epoch": 103.95, "learning_rate": 5.7786259359220887e-05, "loss": 2.3184, "step": 61540 }, { "epoch": 103.99, "learning_rate": 5.774192384945188e-05, "loss": 2.3166, "step": 61560 }, { "epoch": 104.02, "learning_rate": 5.769758210215466e-05, "loss": 2.2875, "step": 61580 }, { "epoch": 104.05, "learning_rate": 5.7653234153054556e-05, "loss": 2.2524, "step": 61600 }, { "epoch": 104.09, "learning_rate": 5.7608880037881965e-05, "loss": 2.2642, "step": 61620 }, { "epoch": 104.12, "learning_rate": 5.756451979237214e-05, "loss": 2.269, "step": 61640 }, { "epoch": 104.16, "learning_rate": 5.752015345226537e-05, "loss": 2.2599, "step": 61660 }, { "epoch": 104.19, "learning_rate": 5.7475781053306776e-05, "loss": 2.2463, "step": 61680 }, { "epoch": 104.22, "learning_rate": 5.7431402631246424e-05, "loss": 2.2711, "step": 61700 }, { "epoch": 104.26, "learning_rate": 5.7387018221839197e-05, "loss": 2.2536, "step": 61720 }, { "epoch": 104.29, "learning_rate": 5.73426278608448e-05, "loss": 2.2713, "step": 61740 }, { "epoch": 104.32, "learning_rate": 5.729823158402777e-05, "loss": 2.2823, "step": 61760 }, { "epoch": 104.36, "learning_rate": 5.725382942715738e-05, "loss": 2.2796, "step": 61780 }, { "epoch": 104.39, "learning_rate": 5.720942142600764e-05, "loss": 2.2541, "step": 61800 }, { "epoch": 104.43, "learning_rate": 5.716500761635727e-05, "loss": 2.2788, "step": 61820 }, { "epoch": 104.46, "learning_rate": 5.712058803398969e-05, "loss": 2.2683, "step": 61840 }, { "epoch": 104.49, "learning_rate": 5.707616271469293e-05, "loss": 2.2898, "step": 61860 }, { "epoch": 104.53, "learning_rate": 5.7031731694259696e-05, "loss": 2.2814, "step": 61880 }, { "epoch": 104.56, "learning_rate": 5.698729500848722e-05, "loss": 2.2925, "step": 61900 }, { "epoch": 104.59, "learning_rate": 5.694285269317738e-05, "loss": 2.306, "step": 61920 }, { "epoch": 104.63, "learning_rate": 5.689840478413652e-05, "loss": 2.2874, "step": 61940 }, { "epoch": 104.66, "learning_rate": 5.685395131717553e-05, "loss": 2.2879, "step": 61960 }, { "epoch": 104.7, "learning_rate": 5.680949232810977e-05, "loss": 2.2877, "step": 61980 }, { "epoch": 104.73, "learning_rate": 5.676502785275901e-05, "loss": 2.296, "step": 62000 }, { "epoch": 104.73, "eval_loss": 2.6081273555755615, "eval_runtime": 50.8346, "eval_samples_per_second": 19.455, "eval_steps_per_second": 0.118, "eval_tse_ndup": 0.004712840923631677, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03142978121741027, "eval_tse_type": 0.00035712739957381837, "step": 62000 }, { "epoch": 104.76, "learning_rate": 5.672055792694753e-05, "loss": 2.299, "step": 62020 }, { "epoch": 104.8, "learning_rate": 5.66760825865039e-05, "loss": 2.3076, "step": 62040 }, { "epoch": 104.83, "learning_rate": 5.663160186726112e-05, "loss": 2.3144, "step": 62060 }, { "epoch": 104.86, "learning_rate": 5.658711580505649e-05, "loss": 2.3053, "step": 62080 }, { "epoch": 104.9, "learning_rate": 5.654262443573164e-05, "loss": 2.2974, "step": 62100 }, { "epoch": 104.93, "learning_rate": 5.649812779513245e-05, "loss": 2.2965, "step": 62120 }, { "epoch": 104.97, "learning_rate": 5.6453625919109074e-05, "loss": 2.2986, "step": 62140 }, { "epoch": 105.0, "learning_rate": 5.6409118843515854e-05, "loss": 2.2909, "step": 62160 }, { "epoch": 105.03, "learning_rate": 5.6364606604211345e-05, "loss": 2.2567, "step": 62180 }, { "epoch": 105.07, "learning_rate": 5.632008923705825e-05, "loss": 2.2407, "step": 62200 }, { "epoch": 105.1, "learning_rate": 5.627556677792343e-05, "loss": 2.2438, "step": 62220 }, { "epoch": 105.14, "learning_rate": 5.623103926267779e-05, "loss": 2.2538, "step": 62240 }, { "epoch": 105.17, "learning_rate": 5.6186506727196364e-05, "loss": 2.2554, "step": 62260 }, { "epoch": 105.2, "learning_rate": 5.614196920735821e-05, "loss": 2.2541, "step": 62280 }, { "epoch": 105.24, "learning_rate": 5.609742673904641e-05, "loss": 2.2575, "step": 62300 }, { "epoch": 105.27, "learning_rate": 5.6052879358148e-05, "loss": 2.2585, "step": 62320 }, { "epoch": 105.3, "learning_rate": 5.600832710055404e-05, "loss": 2.2517, "step": 62340 }, { "epoch": 105.34, "learning_rate": 5.596377000215945e-05, "loss": 2.2803, "step": 62360 }, { "epoch": 105.37, "learning_rate": 5.5919208098863084e-05, "loss": 2.2715, "step": 62380 }, { "epoch": 105.41, "learning_rate": 5.5874641426567684e-05, "loss": 2.2758, "step": 62400 }, { "epoch": 105.44, "learning_rate": 5.583007002117978e-05, "loss": 2.2809, "step": 62420 }, { "epoch": 105.47, "learning_rate": 5.5785493918609776e-05, "loss": 2.2825, "step": 62440 }, { "epoch": 105.51, "learning_rate": 5.5740913154771814e-05, "loss": 2.275, "step": 62460 }, { "epoch": 105.54, "learning_rate": 5.56963277655838e-05, "loss": 2.2765, "step": 62480 }, { "epoch": 105.57, "learning_rate": 5.5651737786967404e-05, "loss": 2.2791, "step": 62500 }, { "epoch": 105.61, "learning_rate": 5.560714325484796e-05, "loss": 2.2957, "step": 62520 }, { "epoch": 105.64, "learning_rate": 5.556254420515448e-05, "loss": 2.2953, "step": 62540 }, { "epoch": 105.68, "learning_rate": 5.552017095627057e-05, "loss": 2.2706, "step": 62560 }, { "epoch": 105.71, "learning_rate": 5.5475563200662275e-05, "loss": 2.2801, "step": 62580 }, { "epoch": 105.74, "learning_rate": 5.54309510334916e-05, "loss": 2.2807, "step": 62600 }, { "epoch": 105.78, "learning_rate": 5.538633449070177e-05, "loss": 2.291, "step": 62620 }, { "epoch": 105.81, "learning_rate": 5.5341713608239534e-05, "loss": 2.2895, "step": 62640 }, { "epoch": 105.84, "learning_rate": 5.529708842205512e-05, "loss": 2.2955, "step": 62660 }, { "epoch": 105.88, "learning_rate": 5.525245896810225e-05, "loss": 2.288, "step": 62680 }, { "epoch": 105.91, "learning_rate": 5.520782528233807e-05, "loss": 2.284, "step": 62700 }, { "epoch": 105.95, "learning_rate": 5.516318740072311e-05, "loss": 2.294, "step": 62720 }, { "epoch": 105.98, "learning_rate": 5.511854535922131e-05, "loss": 2.2958, "step": 62740 }, { "epoch": 106.01, "learning_rate": 5.5073899193799985e-05, "loss": 2.2622, "step": 62760 }, { "epoch": 106.05, "learning_rate": 5.502924894042971e-05, "loss": 2.2278, "step": 62780 }, { "epoch": 106.08, "learning_rate": 5.498459463508443e-05, "loss": 2.2378, "step": 62800 }, { "epoch": 106.11, "learning_rate": 5.4939936313741245e-05, "loss": 2.2397, "step": 62820 }, { "epoch": 106.15, "learning_rate": 5.4895274012380625e-05, "loss": 2.2511, "step": 62840 }, { "epoch": 106.18, "learning_rate": 5.485060776698615e-05, "loss": 2.2478, "step": 62860 }, { "epoch": 106.22, "learning_rate": 5.480593761354461e-05, "loss": 2.2558, "step": 62880 }, { "epoch": 106.25, "learning_rate": 5.476126358804594e-05, "loss": 2.2576, "step": 62900 }, { "epoch": 106.28, "learning_rate": 5.471658572648318e-05, "loss": 2.2568, "step": 62920 }, { "epoch": 106.32, "learning_rate": 5.467190406485252e-05, "loss": 2.262, "step": 62940 }, { "epoch": 106.35, "learning_rate": 5.462721863915312e-05, "loss": 2.254, "step": 62960 }, { "epoch": 106.39, "learning_rate": 5.458252948538724e-05, "loss": 2.2522, "step": 62980 }, { "epoch": 106.42, "learning_rate": 5.4537836639560125e-05, "loss": 2.2672, "step": 63000 }, { "epoch": 106.42, "eval_loss": 2.597033977508545, "eval_runtime": 48.9564, "eval_samples_per_second": 20.202, "eval_steps_per_second": 0.123, "eval_tse_ndup": 0.004876015186855477, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03177431396473742, "eval_tse_type": 0.0003672788379025068, "step": 63000 }, { "epoch": 106.45, "learning_rate": 5.449314013768e-05, "loss": 2.2633, "step": 63020 }, { "epoch": 106.49, "learning_rate": 5.444844001575802e-05, "loss": 2.2474, "step": 63040 }, { "epoch": 106.52, "learning_rate": 5.440373630980827e-05, "loss": 2.2761, "step": 63060 }, { "epoch": 106.55, "learning_rate": 5.435902905584771e-05, "loss": 2.27, "step": 63080 }, { "epoch": 106.59, "learning_rate": 5.4314318289896185e-05, "loss": 2.2901, "step": 63100 }, { "epoch": 106.62, "learning_rate": 5.4269604047976316e-05, "loss": 2.2814, "step": 63120 }, { "epoch": 106.66, "learning_rate": 5.4224886366113605e-05, "loss": 2.2735, "step": 63140 }, { "epoch": 106.69, "learning_rate": 5.418016528033625e-05, "loss": 2.2716, "step": 63160 }, { "epoch": 106.72, "learning_rate": 5.4135440826675235e-05, "loss": 2.2765, "step": 63180 }, { "epoch": 106.76, "learning_rate": 5.4090713041164245e-05, "loss": 2.2914, "step": 63200 }, { "epoch": 106.79, "learning_rate": 5.404598195983963e-05, "loss": 2.2833, "step": 63220 }, { "epoch": 106.82, "learning_rate": 5.400124761874045e-05, "loss": 2.2652, "step": 63240 }, { "epoch": 106.86, "learning_rate": 5.3956510053908306e-05, "loss": 2.2812, "step": 63260 }, { "epoch": 106.89, "learning_rate": 5.3911769301387505e-05, "loss": 2.2869, "step": 63280 }, { "epoch": 106.93, "learning_rate": 5.3867025397224814e-05, "loss": 2.2808, "step": 63300 }, { "epoch": 106.96, "learning_rate": 5.3822278377469616e-05, "loss": 2.3026, "step": 63320 }, { "epoch": 106.99, "learning_rate": 5.377752827817376e-05, "loss": 2.2837, "step": 63340 }, { "epoch": 107.03, "learning_rate": 5.373277513539162e-05, "loss": 2.2491, "step": 63360 }, { "epoch": 107.06, "learning_rate": 5.3688018985179956e-05, "loss": 2.2317, "step": 63380 }, { "epoch": 107.09, "learning_rate": 5.3643259863598015e-05, "loss": 2.2223, "step": 63400 }, { "epoch": 107.13, "learning_rate": 5.3598497806707406e-05, "loss": 2.2341, "step": 63420 }, { "epoch": 107.16, "learning_rate": 5.3553732850572104e-05, "loss": 2.2288, "step": 63440 }, { "epoch": 107.2, "learning_rate": 5.350896503125845e-05, "loss": 2.2444, "step": 63460 }, { "epoch": 107.23, "learning_rate": 5.346419438483503e-05, "loss": 2.2453, "step": 63480 }, { "epoch": 107.26, "learning_rate": 5.341942094737279e-05, "loss": 2.2444, "step": 63500 }, { "epoch": 107.3, "learning_rate": 5.3374644754944836e-05, "loss": 2.245, "step": 63520 }, { "epoch": 107.33, "learning_rate": 5.332986584362656e-05, "loss": 2.2478, "step": 63540 }, { "epoch": 107.36, "learning_rate": 5.328508424949551e-05, "loss": 2.2563, "step": 63560 }, { "epoch": 107.4, "learning_rate": 5.324030000863143e-05, "loss": 2.2545, "step": 63580 }, { "epoch": 107.43, "learning_rate": 5.319551315711615e-05, "loss": 2.2581, "step": 63600 }, { "epoch": 107.47, "learning_rate": 5.3150723731033644e-05, "loss": 2.244, "step": 63620 }, { "epoch": 107.5, "learning_rate": 5.3105931766469943e-05, "loss": 2.2545, "step": 63640 }, { "epoch": 107.53, "learning_rate": 5.30611372995131e-05, "loss": 2.2705, "step": 63660 }, { "epoch": 107.57, "learning_rate": 5.301634036625324e-05, "loss": 2.2615, "step": 63680 }, { "epoch": 107.6, "learning_rate": 5.297154100278241e-05, "loss": 2.2632, "step": 63700 }, { "epoch": 107.64, "learning_rate": 5.292673924519469e-05, "loss": 2.2676, "step": 63720 }, { "epoch": 107.67, "learning_rate": 5.2881935129586e-05, "loss": 2.277, "step": 63740 }, { "epoch": 107.7, "learning_rate": 5.283712869205426e-05, "loss": 2.2824, "step": 63760 }, { "epoch": 107.74, "learning_rate": 5.279231996869916e-05, "loss": 2.2842, "step": 63780 }, { "epoch": 107.77, "learning_rate": 5.27475089956223e-05, "loss": 2.274, "step": 63800 }, { "epoch": 107.8, "learning_rate": 5.270269580892706e-05, "loss": 2.2785, "step": 63820 }, { "epoch": 107.84, "learning_rate": 5.2657880444718625e-05, "loss": 2.2765, "step": 63840 }, { "epoch": 107.87, "learning_rate": 5.2613062939103927e-05, "loss": 2.2733, "step": 63860 }, { "epoch": 107.91, "learning_rate": 5.256824332819162e-05, "loss": 2.2809, "step": 63880 }, { "epoch": 107.94, "learning_rate": 5.2523421648092044e-05, "loss": 2.2792, "step": 63900 }, { "epoch": 107.97, "learning_rate": 5.247859793491723e-05, "loss": 2.283, "step": 63920 }, { "epoch": 108.01, "learning_rate": 5.243377222478083e-05, "loss": 2.2723, "step": 63940 }, { "epoch": 108.04, "learning_rate": 5.2388944553798106e-05, "loss": 2.2172, "step": 63960 }, { "epoch": 108.07, "learning_rate": 5.2344114958085896e-05, "loss": 2.2221, "step": 63980 }, { "epoch": 108.11, "learning_rate": 5.2299283473762606e-05, "loss": 2.2378, "step": 64000 }, { "epoch": 108.11, "eval_loss": 2.5892648696899414, "eval_runtime": 47.4999, "eval_samples_per_second": 20.821, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.00398650239969483, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03195638244943147, "eval_tse_type": 0.0003826817211549264, "step": 64000 }, { "epoch": 108.14, "learning_rate": 5.225445013694816e-05, "loss": 2.2405, "step": 64020 }, { "epoch": 108.18, "learning_rate": 5.2209614983763954e-05, "loss": 2.2324, "step": 64040 }, { "epoch": 108.21, "learning_rate": 5.216477805033287e-05, "loss": 2.2271, "step": 64060 }, { "epoch": 108.24, "learning_rate": 5.2119939372779216e-05, "loss": 2.2343, "step": 64080 }, { "epoch": 108.28, "learning_rate": 5.207509898722869e-05, "loss": 2.2378, "step": 64100 }, { "epoch": 108.31, "learning_rate": 5.20302569298084e-05, "loss": 2.2396, "step": 64120 }, { "epoch": 108.34, "learning_rate": 5.198541323664676e-05, "loss": 2.2535, "step": 64140 }, { "epoch": 108.38, "learning_rate": 5.1940567943873545e-05, "loss": 2.2533, "step": 64160 }, { "epoch": 108.41, "learning_rate": 5.1895721087619774e-05, "loss": 2.2434, "step": 64180 }, { "epoch": 108.45, "learning_rate": 5.185087270401778e-05, "loss": 2.246, "step": 64200 }, { "epoch": 108.48, "learning_rate": 5.180602282920107e-05, "loss": 2.255, "step": 64220 }, { "epoch": 108.51, "learning_rate": 5.1761171499304403e-05, "loss": 2.2554, "step": 64240 }, { "epoch": 108.55, "learning_rate": 5.171631875046366e-05, "loss": 2.2481, "step": 64260 }, { "epoch": 108.58, "learning_rate": 5.167146461881589e-05, "loss": 2.2584, "step": 64280 }, { "epoch": 108.61, "learning_rate": 5.162660914049927e-05, "loss": 2.2496, "step": 64300 }, { "epoch": 108.65, "learning_rate": 5.158175235165306e-05, "loss": 2.266, "step": 64320 }, { "epoch": 108.68, "learning_rate": 5.153689428841754e-05, "loss": 2.2703, "step": 64340 }, { "epoch": 108.72, "learning_rate": 5.1492034986934046e-05, "loss": 2.2663, "step": 64360 }, { "epoch": 108.75, "learning_rate": 5.144717448334493e-05, "loss": 2.2624, "step": 64380 }, { "epoch": 108.78, "learning_rate": 5.140231281379345e-05, "loss": 2.2639, "step": 64400 }, { "epoch": 108.82, "learning_rate": 5.135745001442388e-05, "loss": 2.26, "step": 64420 }, { "epoch": 108.85, "learning_rate": 5.1312586121381335e-05, "loss": 2.2627, "step": 64440 }, { "epoch": 108.89, "learning_rate": 5.1267721170811886e-05, "loss": 2.2613, "step": 64460 }, { "epoch": 108.92, "learning_rate": 5.122285519886236e-05, "loss": 2.2713, "step": 64480 }, { "epoch": 108.95, "learning_rate": 5.117798824168052e-05, "loss": 2.2873, "step": 64500 }, { "epoch": 108.99, "learning_rate": 5.113312033541481e-05, "loss": 2.2707, "step": 64520 }, { "epoch": 109.02, "learning_rate": 5.1088251516214515e-05, "loss": 2.2342, "step": 64540 }, { "epoch": 109.05, "learning_rate": 5.104338182022962e-05, "loss": 2.2223, "step": 64560 }, { "epoch": 109.09, "learning_rate": 5.099851128361085e-05, "loss": 2.2194, "step": 64580 }, { "epoch": 109.12, "learning_rate": 5.0953639942509565e-05, "loss": 2.2193, "step": 64600 }, { "epoch": 109.16, "learning_rate": 5.090876783307781e-05, "loss": 2.2249, "step": 64620 }, { "epoch": 109.19, "learning_rate": 5.086389499146823e-05, "loss": 2.2338, "step": 64640 }, { "epoch": 109.22, "learning_rate": 5.081902145383406e-05, "loss": 2.2304, "step": 64660 }, { "epoch": 109.26, "learning_rate": 5.07741472563291e-05, "loss": 2.2413, "step": 64680 }, { "epoch": 109.29, "learning_rate": 5.072927243510766e-05, "loss": 2.2229, "step": 64700 }, { "epoch": 109.32, "learning_rate": 5.06843970263246e-05, "loss": 2.226, "step": 64720 }, { "epoch": 109.36, "learning_rate": 5.063952106613522e-05, "loss": 2.2317, "step": 64740 }, { "epoch": 109.39, "learning_rate": 5.059464459069527e-05, "loss": 2.2323, "step": 64760 }, { "epoch": 109.43, "learning_rate": 5.0549767636160915e-05, "loss": 2.2496, "step": 64780 }, { "epoch": 109.46, "learning_rate": 5.0504890238688705e-05, "loss": 2.2398, "step": 64800 }, { "epoch": 109.49, "learning_rate": 5.046001243443554e-05, "loss": 2.257, "step": 64820 }, { "epoch": 109.53, "learning_rate": 5.041513425955868e-05, "loss": 2.2599, "step": 64840 }, { "epoch": 109.56, "learning_rate": 5.0370255750215636e-05, "loss": 2.2503, "step": 64860 }, { "epoch": 109.59, "learning_rate": 5.0325376942564215e-05, "loss": 2.2577, "step": 64880 }, { "epoch": 109.63, "learning_rate": 5.028049787276249e-05, "loss": 2.2485, "step": 64900 }, { "epoch": 109.66, "learning_rate": 5.023561857696867e-05, "loss": 2.2531, "step": 64920 }, { "epoch": 109.7, "learning_rate": 5.019073909134124e-05, "loss": 2.2586, "step": 64940 }, { "epoch": 109.73, "learning_rate": 5.0145859452038755e-05, "loss": 2.2691, "step": 64960 }, { "epoch": 109.76, "learning_rate": 5.010097969521996e-05, "loss": 2.2694, "step": 64980 }, { "epoch": 109.8, "learning_rate": 5.0056099857043624e-05, "loss": 2.2457, "step": 65000 }, { "epoch": 109.8, "eval_loss": 2.5843918323516846, "eval_runtime": 47.5161, "eval_samples_per_second": 20.814, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.004661628299613522, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03039183561700737, "eval_tse_type": 0.00044763228850690933, "step": 65000 }, { "epoch": 109.83, "learning_rate": 5.001121997366867e-05, "loss": 2.2672, "step": 65020 }, { "epoch": 109.86, "learning_rate": 4.9966340081253966e-05, "loss": 2.2523, "step": 65040 }, { "epoch": 109.9, "learning_rate": 4.992146021595847e-05, "loss": 2.2544, "step": 65060 }, { "epoch": 109.93, "learning_rate": 4.9876580413941045e-05, "loss": 2.2535, "step": 65080 }, { "epoch": 109.97, "learning_rate": 4.983170071136055e-05, "loss": 2.2583, "step": 65100 }, { "epoch": 110.0, "learning_rate": 4.978682114437576e-05, "loss": 2.2705, "step": 65120 }, { "epoch": 110.03, "learning_rate": 4.974194174914531e-05, "loss": 2.1967, "step": 65140 }, { "epoch": 110.07, "learning_rate": 4.9697062561827764e-05, "loss": 2.2078, "step": 65160 }, { "epoch": 110.1, "learning_rate": 4.9652183618581404e-05, "loss": 2.2159, "step": 65180 }, { "epoch": 110.14, "learning_rate": 4.960730495556446e-05, "loss": 2.2046, "step": 65200 }, { "epoch": 110.17, "learning_rate": 4.9562426608934774e-05, "loss": 2.2123, "step": 65220 }, { "epoch": 110.2, "learning_rate": 4.951754861485008e-05, "loss": 2.2236, "step": 65240 }, { "epoch": 110.24, "learning_rate": 4.947267100946777e-05, "loss": 2.2242, "step": 65260 }, { "epoch": 110.27, "learning_rate": 4.942779382894489e-05, "loss": 2.2472, "step": 65280 }, { "epoch": 110.3, "learning_rate": 4.93829171094382e-05, "loss": 2.2312, "step": 65300 }, { "epoch": 110.34, "learning_rate": 4.933804088710403e-05, "loss": 2.227, "step": 65320 }, { "epoch": 110.37, "learning_rate": 4.9293165198098376e-05, "loss": 2.2231, "step": 65340 }, { "epoch": 110.41, "learning_rate": 4.924829007857674e-05, "loss": 2.2399, "step": 65360 }, { "epoch": 110.44, "learning_rate": 4.920341556469421e-05, "loss": 2.2482, "step": 65380 }, { "epoch": 110.47, "learning_rate": 4.915854169260539e-05, "loss": 2.2318, "step": 65400 }, { "epoch": 110.51, "learning_rate": 4.911366849846432e-05, "loss": 2.2382, "step": 65420 }, { "epoch": 110.54, "learning_rate": 4.9068796018424535e-05, "loss": 2.2403, "step": 65440 }, { "epoch": 110.57, "learning_rate": 4.9023924288638975e-05, "loss": 2.2393, "step": 65460 }, { "epoch": 110.61, "learning_rate": 4.897905334525999e-05, "loss": 2.2485, "step": 65480 }, { "epoch": 110.64, "learning_rate": 4.893418322443928e-05, "loss": 2.2434, "step": 65500 }, { "epoch": 110.68, "learning_rate": 4.8889313962327876e-05, "loss": 2.2399, "step": 65520 }, { "epoch": 110.71, "learning_rate": 4.884444559507618e-05, "loss": 2.2565, "step": 65540 }, { "epoch": 110.74, "learning_rate": 4.879957815883378e-05, "loss": 2.2508, "step": 65560 }, { "epoch": 110.78, "learning_rate": 4.875471168974959e-05, "loss": 2.2577, "step": 65580 }, { "epoch": 110.81, "learning_rate": 4.870984622397169e-05, "loss": 2.2534, "step": 65600 }, { "epoch": 110.84, "learning_rate": 4.866498179764739e-05, "loss": 2.2419, "step": 65620 }, { "epoch": 110.88, "learning_rate": 4.862011844692313e-05, "loss": 2.2655, "step": 65640 }, { "epoch": 110.91, "learning_rate": 4.857525620794451e-05, "loss": 2.2519, "step": 65660 }, { "epoch": 110.95, "learning_rate": 4.853039511685626e-05, "loss": 2.2565, "step": 65680 }, { "epoch": 110.98, "learning_rate": 4.848553520980208e-05, "loss": 2.2471, "step": 65700 }, { "epoch": 111.01, "learning_rate": 4.844067652292487e-05, "loss": 2.2414, "step": 65720 }, { "epoch": 111.05, "learning_rate": 4.839581909236638e-05, "loss": 2.2022, "step": 65740 }, { "epoch": 111.08, "learning_rate": 4.83509629542675e-05, "loss": 2.188, "step": 65760 }, { "epoch": 111.11, "learning_rate": 4.830610814476797e-05, "loss": 2.1921, "step": 65780 }, { "epoch": 111.15, "learning_rate": 4.826125470000651e-05, "loss": 2.2088, "step": 65800 }, { "epoch": 111.18, "learning_rate": 4.821640265612075e-05, "loss": 2.2002, "step": 65820 }, { "epoch": 111.22, "learning_rate": 4.817155204924714e-05, "loss": 2.2228, "step": 65840 }, { "epoch": 111.25, "learning_rate": 4.812670291552103e-05, "loss": 2.2169, "step": 65860 }, { "epoch": 111.28, "learning_rate": 4.808185529107652e-05, "loss": 2.2194, "step": 65880 }, { "epoch": 111.32, "learning_rate": 4.8037009212046586e-05, "loss": 2.2201, "step": 65900 }, { "epoch": 111.35, "learning_rate": 4.799216471456284e-05, "loss": 2.2295, "step": 65920 }, { "epoch": 111.39, "learning_rate": 4.794732183475574e-05, "loss": 2.2339, "step": 65940 }, { "epoch": 111.42, "learning_rate": 4.790248060875436e-05, "loss": 2.2344, "step": 65960 }, { "epoch": 111.45, "learning_rate": 4.785764107268647e-05, "loss": 2.2413, "step": 65980 }, { "epoch": 111.49, "learning_rate": 4.7812803262678475e-05, "loss": 2.2349, "step": 66000 }, { "epoch": 111.49, "eval_loss": 2.5746378898620605, "eval_runtime": 50.8615, "eval_samples_per_second": 19.445, "eval_steps_per_second": 0.118, "eval_tse_ndup": 0.005091489181962179, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.031770601317693443, "eval_tse_type": 0.00035849893660813784, "step": 66000 }, { "epoch": 111.52, "learning_rate": 4.776796721485537e-05, "loss": 2.2249, "step": 66020 }, { "epoch": 111.55, "learning_rate": 4.772313296534079e-05, "loss": 2.2444, "step": 66040 }, { "epoch": 111.59, "learning_rate": 4.767830055025682e-05, "loss": 2.226, "step": 66060 }, { "epoch": 111.62, "learning_rate": 4.7633470005724176e-05, "loss": 2.2421, "step": 66080 }, { "epoch": 111.66, "learning_rate": 4.758864136786202e-05, "loss": 2.241, "step": 66100 }, { "epoch": 111.69, "learning_rate": 4.7543814672787945e-05, "loss": 2.2329, "step": 66120 }, { "epoch": 111.72, "learning_rate": 4.7498989956618035e-05, "loss": 2.2463, "step": 66140 }, { "epoch": 111.76, "learning_rate": 4.745416725546673e-05, "loss": 2.2587, "step": 66160 }, { "epoch": 111.79, "learning_rate": 4.74093466054469e-05, "loss": 2.2374, "step": 66180 }, { "epoch": 111.82, "learning_rate": 4.7364528042669705e-05, "loss": 2.2524, "step": 66200 }, { "epoch": 111.86, "learning_rate": 4.731971160324465e-05, "loss": 2.2392, "step": 66220 }, { "epoch": 111.89, "learning_rate": 4.727489732327958e-05, "loss": 2.2469, "step": 66240 }, { "epoch": 111.93, "learning_rate": 4.7230085238880475e-05, "loss": 2.2462, "step": 66260 }, { "epoch": 111.96, "learning_rate": 4.71852753861517e-05, "loss": 2.2592, "step": 66280 }, { "epoch": 111.99, "learning_rate": 4.714270812602657e-05, "loss": 2.2527, "step": 66300 }, { "epoch": 112.03, "learning_rate": 4.709790272889296e-05, "loss": 2.2011, "step": 66320 }, { "epoch": 112.06, "learning_rate": 4.705309966992672e-05, "loss": 2.1849, "step": 66340 }, { "epoch": 112.09, "learning_rate": 4.700829898522483e-05, "loss": 2.2052, "step": 66360 }, { "epoch": 112.13, "learning_rate": 4.6963500710882435e-05, "loss": 2.1981, "step": 66380 }, { "epoch": 112.16, "learning_rate": 4.691870488299264e-05, "loss": 2.2063, "step": 66400 }, { "epoch": 112.2, "learning_rate": 4.68739115376467e-05, "loss": 2.2066, "step": 66420 }, { "epoch": 112.23, "learning_rate": 4.682912071093374e-05, "loss": 2.2099, "step": 66440 }, { "epoch": 112.26, "learning_rate": 4.6784332438940963e-05, "loss": 2.2131, "step": 66460 }, { "epoch": 112.3, "learning_rate": 4.673954675775347e-05, "loss": 2.2188, "step": 66480 }, { "epoch": 112.33, "learning_rate": 4.669476370345425e-05, "loss": 2.2169, "step": 66500 }, { "epoch": 112.36, "learning_rate": 4.664998331212422e-05, "loss": 2.2119, "step": 66520 }, { "epoch": 112.4, "learning_rate": 4.660520561984211e-05, "loss": 2.2177, "step": 66540 }, { "epoch": 112.43, "learning_rate": 4.6560430662684545e-05, "loss": 2.2199, "step": 66560 }, { "epoch": 112.47, "learning_rate": 4.6515658476725834e-05, "loss": 2.2066, "step": 66580 }, { "epoch": 112.5, "learning_rate": 4.6470889098038174e-05, "loss": 2.2315, "step": 66600 }, { "epoch": 112.53, "learning_rate": 4.6426122562691427e-05, "loss": 2.2245, "step": 66620 }, { "epoch": 112.57, "learning_rate": 4.638135890675317e-05, "loss": 2.2265, "step": 66640 }, { "epoch": 112.6, "learning_rate": 4.633659816628869e-05, "loss": 2.2283, "step": 66660 }, { "epoch": 112.64, "learning_rate": 4.629184037736089e-05, "loss": 2.2289, "step": 66680 }, { "epoch": 112.67, "learning_rate": 4.624708557603034e-05, "loss": 2.2208, "step": 66700 }, { "epoch": 112.7, "learning_rate": 4.620233379835513e-05, "loss": 2.2176, "step": 66720 }, { "epoch": 112.74, "learning_rate": 4.615758508039098e-05, "loss": 2.2373, "step": 66740 }, { "epoch": 112.77, "learning_rate": 4.6112839458191146e-05, "loss": 2.2379, "step": 66760 }, { "epoch": 112.8, "learning_rate": 4.606809696780634e-05, "loss": 2.246, "step": 66780 }, { "epoch": 112.84, "learning_rate": 4.602335764528481e-05, "loss": 2.2412, "step": 66800 }, { "epoch": 112.87, "learning_rate": 4.5978621526672176e-05, "loss": 2.2338, "step": 66820 }, { "epoch": 112.91, "learning_rate": 4.593388864801156e-05, "loss": 2.2458, "step": 66840 }, { "epoch": 112.94, "learning_rate": 4.5889159045343404e-05, "loss": 2.2484, "step": 66860 }, { "epoch": 112.97, "learning_rate": 4.584443275470555e-05, "loss": 2.2505, "step": 66880 }, { "epoch": 113.01, "learning_rate": 4.579970981213319e-05, "loss": 2.2372, "step": 66900 }, { "epoch": 113.04, "learning_rate": 4.575499025365874e-05, "loss": 2.18, "step": 66920 }, { "epoch": 113.07, "learning_rate": 4.571027411531199e-05, "loss": 2.1834, "step": 66940 }, { "epoch": 113.11, "learning_rate": 4.566556143311989e-05, "loss": 2.2036, "step": 66960 }, { "epoch": 113.14, "learning_rate": 4.562085224310667e-05, "loss": 2.1953, "step": 66980 }, { "epoch": 113.18, "learning_rate": 4.5576146581293685e-05, "loss": 2.1919, "step": 67000 }, { "epoch": 113.18, "eval_loss": 2.567063331604004, "eval_runtime": 47.8109, "eval_samples_per_second": 20.686, "eval_steps_per_second": 0.125, "eval_tse_ndup": 0.00366446102423603, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03183495542273439, "eval_tse_type": 0.0004318335018537243, "step": 67000 }, { "epoch": 113.21, "learning_rate": 4.5531444483699496e-05, "loss": 2.2279, "step": 67020 }, { "epoch": 113.24, "learning_rate": 4.548674598633979e-05, "loss": 2.1915, "step": 67040 }, { "epoch": 113.28, "learning_rate": 4.54420511252273e-05, "loss": 2.2072, "step": 67060 }, { "epoch": 113.31, "learning_rate": 4.539735993637192e-05, "loss": 2.2144, "step": 67080 }, { "epoch": 113.34, "learning_rate": 4.5352672455780474e-05, "loss": 2.1916, "step": 67100 }, { "epoch": 113.38, "learning_rate": 4.530798871945693e-05, "loss": 2.2115, "step": 67120 }, { "epoch": 113.41, "learning_rate": 4.5263308763402084e-05, "loss": 2.2184, "step": 67140 }, { "epoch": 113.45, "learning_rate": 4.5218632623613833e-05, "loss": 2.2086, "step": 67160 }, { "epoch": 113.48, "learning_rate": 4.517396033608692e-05, "loss": 2.2246, "step": 67180 }, { "epoch": 113.51, "learning_rate": 4.512929193681298e-05, "loss": 2.2148, "step": 67200 }, { "epoch": 113.55, "learning_rate": 4.508462746178057e-05, "loss": 2.2275, "step": 67220 }, { "epoch": 113.58, "learning_rate": 4.503996694697502e-05, "loss": 2.2257, "step": 67240 }, { "epoch": 113.61, "learning_rate": 4.4995310428378524e-05, "loss": 2.2275, "step": 67260 }, { "epoch": 113.65, "learning_rate": 4.495065794196999e-05, "loss": 2.2067, "step": 67280 }, { "epoch": 113.68, "learning_rate": 4.4906009523725165e-05, "loss": 2.2119, "step": 67300 }, { "epoch": 113.72, "learning_rate": 4.486136520961647e-05, "loss": 2.2324, "step": 67320 }, { "epoch": 113.75, "learning_rate": 4.481672503561301e-05, "loss": 2.2291, "step": 67340 }, { "epoch": 113.78, "learning_rate": 4.477208903768057e-05, "loss": 2.2385, "step": 67360 }, { "epoch": 113.82, "learning_rate": 4.4727457251781544e-05, "loss": 2.2394, "step": 67380 }, { "epoch": 113.85, "learning_rate": 4.4682829713875e-05, "loss": 2.2375, "step": 67400 }, { "epoch": 113.89, "learning_rate": 4.463820645991651e-05, "loss": 2.2284, "step": 67420 }, { "epoch": 113.92, "learning_rate": 4.4593587525858224e-05, "loss": 2.2405, "step": 67440 }, { "epoch": 113.95, "learning_rate": 4.4548972947648806e-05, "loss": 2.2442, "step": 67460 }, { "epoch": 113.99, "learning_rate": 4.450436276123343e-05, "loss": 2.226, "step": 67480 }, { "epoch": 114.02, "learning_rate": 4.445975700255373e-05, "loss": 2.1975, "step": 67500 }, { "epoch": 114.05, "learning_rate": 4.441515570754774e-05, "loss": 2.1857, "step": 67520 }, { "epoch": 114.09, "learning_rate": 4.437055891214992e-05, "loss": 2.1835, "step": 67540 }, { "epoch": 114.12, "learning_rate": 4.4325966652291103e-05, "loss": 2.1798, "step": 67560 }, { "epoch": 114.16, "learning_rate": 4.428137896389848e-05, "loss": 2.1794, "step": 67580 }, { "epoch": 114.19, "learning_rate": 4.423679588289552e-05, "loss": 2.1927, "step": 67600 }, { "epoch": 114.22, "learning_rate": 4.419221744520201e-05, "loss": 2.1983, "step": 67620 }, { "epoch": 114.26, "learning_rate": 4.414764368673404e-05, "loss": 2.1889, "step": 67640 }, { "epoch": 114.29, "learning_rate": 4.410307464340381e-05, "loss": 2.1983, "step": 67660 }, { "epoch": 114.32, "learning_rate": 4.405851035111985e-05, "loss": 2.1973, "step": 67680 }, { "epoch": 114.36, "learning_rate": 4.4013950845786764e-05, "loss": 2.2014, "step": 67700 }, { "epoch": 114.39, "learning_rate": 4.3969396163305386e-05, "loss": 2.2061, "step": 67720 }, { "epoch": 114.43, "learning_rate": 4.392484633957258e-05, "loss": 2.2058, "step": 67740 }, { "epoch": 114.46, "learning_rate": 4.3880301410481345e-05, "loss": 2.2005, "step": 67760 }, { "epoch": 114.49, "learning_rate": 4.383576141192074e-05, "loss": 2.2272, "step": 67780 }, { "epoch": 114.53, "learning_rate": 4.37912263797758e-05, "loss": 2.2158, "step": 67800 }, { "epoch": 114.56, "learning_rate": 4.3746696349927655e-05, "loss": 2.2176, "step": 67820 }, { "epoch": 114.59, "learning_rate": 4.370217135825329e-05, "loss": 2.2191, "step": 67840 }, { "epoch": 114.63, "learning_rate": 4.3657651440625726e-05, "loss": 2.2179, "step": 67860 }, { "epoch": 114.66, "learning_rate": 4.361313663291382e-05, "loss": 2.2229, "step": 67880 }, { "epoch": 114.7, "learning_rate": 4.356862697098238e-05, "loss": 2.2155, "step": 67900 }, { "epoch": 114.73, "learning_rate": 4.3524122490692027e-05, "loss": 2.2132, "step": 67920 }, { "epoch": 114.76, "learning_rate": 4.347962322789921e-05, "loss": 2.2158, "step": 67940 }, { "epoch": 114.8, "learning_rate": 4.3435129218456193e-05, "loss": 2.2274, "step": 67960 }, { "epoch": 114.83, "learning_rate": 4.339064049821097e-05, "loss": 2.2316, "step": 67980 }, { "epoch": 114.86, "learning_rate": 4.334615710300735e-05, "loss": 2.2477, "step": 68000 }, { "epoch": 114.86, "eval_loss": 2.5606555938720703, "eval_runtime": 47.4493, "eval_samples_per_second": 20.843, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.004817544469031207, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03099397112054427, "eval_tse_type": 0.0007074345579148409, "step": 68000 }, { "epoch": 114.9, "learning_rate": 4.330167906868474e-05, "loss": 2.2318, "step": 68020 }, { "epoch": 114.93, "learning_rate": 4.325720643107832e-05, "loss": 2.2342, "step": 68040 }, { "epoch": 114.97, "learning_rate": 4.3212739226018904e-05, "loss": 2.2288, "step": 68060 }, { "epoch": 115.0, "learning_rate": 4.31682774893329e-05, "loss": 2.2382, "step": 68080 }, { "epoch": 115.03, "learning_rate": 4.3123821256842324e-05, "loss": 2.1705, "step": 68100 }, { "epoch": 115.07, "learning_rate": 4.3079370564364755e-05, "loss": 2.1913, "step": 68120 }, { "epoch": 115.1, "learning_rate": 4.303492544771334e-05, "loss": 2.1782, "step": 68140 }, { "epoch": 115.14, "learning_rate": 4.2990485942696665e-05, "loss": 2.1838, "step": 68160 }, { "epoch": 115.17, "learning_rate": 4.2946052085118857e-05, "loss": 2.195, "step": 68180 }, { "epoch": 115.2, "learning_rate": 4.2901623910779494e-05, "loss": 2.1826, "step": 68200 }, { "epoch": 115.24, "learning_rate": 4.2857201455473525e-05, "loss": 2.184, "step": 68220 }, { "epoch": 115.27, "learning_rate": 4.281278475499133e-05, "loss": 2.1998, "step": 68240 }, { "epoch": 115.3, "learning_rate": 4.276837384511864e-05, "loss": 2.2042, "step": 68260 }, { "epoch": 115.34, "learning_rate": 4.2723968761636534e-05, "loss": 2.1992, "step": 68280 }, { "epoch": 115.37, "learning_rate": 4.267956954032136e-05, "loss": 2.2026, "step": 68300 }, { "epoch": 115.41, "learning_rate": 4.263517621694478e-05, "loss": 2.1895, "step": 68320 }, { "epoch": 115.44, "learning_rate": 4.259078882727373e-05, "loss": 2.2081, "step": 68340 }, { "epoch": 115.47, "learning_rate": 4.2546407407070254e-05, "loss": 2.1983, "step": 68360 }, { "epoch": 115.51, "learning_rate": 4.250203199209173e-05, "loss": 2.207, "step": 68380 }, { "epoch": 115.54, "learning_rate": 4.245766261809059e-05, "loss": 2.2221, "step": 68400 }, { "epoch": 115.57, "learning_rate": 4.241329932081446e-05, "loss": 2.2158, "step": 68420 }, { "epoch": 115.61, "learning_rate": 4.2368942136006015e-05, "loss": 2.1987, "step": 68440 }, { "epoch": 115.64, "learning_rate": 4.2324591099403064e-05, "loss": 2.226, "step": 68460 }, { "epoch": 115.68, "learning_rate": 4.228024624673844e-05, "loss": 2.2055, "step": 68480 }, { "epoch": 115.71, "learning_rate": 4.223590761373996e-05, "loss": 2.2182, "step": 68500 }, { "epoch": 115.74, "learning_rate": 4.219157523613051e-05, "loss": 2.2134, "step": 68520 }, { "epoch": 115.78, "learning_rate": 4.2147249149627824e-05, "loss": 2.2096, "step": 68540 }, { "epoch": 115.81, "learning_rate": 4.210292938994469e-05, "loss": 2.2034, "step": 68560 }, { "epoch": 115.84, "learning_rate": 4.205861599278868e-05, "loss": 2.2085, "step": 68580 }, { "epoch": 115.88, "learning_rate": 4.201430899386233e-05, "loss": 2.22, "step": 68600 }, { "epoch": 115.91, "learning_rate": 4.197000842886301e-05, "loss": 2.2176, "step": 68620 }, { "epoch": 115.95, "learning_rate": 4.192571433348284e-05, "loss": 2.2275, "step": 68640 }, { "epoch": 115.98, "learning_rate": 4.18814267434088e-05, "loss": 2.2078, "step": 68660 }, { "epoch": 116.01, "learning_rate": 4.183714569432258e-05, "loss": 2.1973, "step": 68680 }, { "epoch": 116.05, "learning_rate": 4.1792871221900655e-05, "loss": 2.1775, "step": 68700 }, { "epoch": 116.08, "learning_rate": 4.174860336181412e-05, "loss": 2.1743, "step": 68720 }, { "epoch": 116.11, "learning_rate": 4.170655505189254e-05, "loss": 2.1681, "step": 68740 }, { "epoch": 116.15, "learning_rate": 4.1662300188439016e-05, "loss": 2.1694, "step": 68760 }, { "epoch": 116.18, "learning_rate": 4.161805204251963e-05, "loss": 2.177, "step": 68780 }, { "epoch": 116.22, "learning_rate": 4.157381064978432e-05, "loss": 2.1831, "step": 68800 }, { "epoch": 116.25, "learning_rate": 4.1529576045877565e-05, "loss": 2.1831, "step": 68820 }, { "epoch": 116.28, "learning_rate": 4.1485348266438386e-05, "loss": 2.1931, "step": 68840 }, { "epoch": 116.32, "learning_rate": 4.144112734710034e-05, "loss": 2.193, "step": 68860 }, { "epoch": 116.35, "learning_rate": 4.139691332349138e-05, "loss": 2.2098, "step": 68880 }, { "epoch": 116.39, "learning_rate": 4.135270623123398e-05, "loss": 2.1779, "step": 68900 }, { "epoch": 116.42, "learning_rate": 4.130850610594497e-05, "loss": 2.1898, "step": 68920 }, { "epoch": 116.45, "learning_rate": 4.126431298323561e-05, "loss": 2.2044, "step": 68940 }, { "epoch": 116.49, "learning_rate": 4.122012689871149e-05, "loss": 2.205, "step": 68960 }, { "epoch": 116.52, "learning_rate": 4.117594788797253e-05, "loss": 2.2057, "step": 68980 }, { "epoch": 116.55, "learning_rate": 4.113177598661301e-05, "loss": 2.1895, "step": 69000 }, { "epoch": 116.55, "eval_loss": 2.554255247116089, "eval_runtime": 50.962, "eval_samples_per_second": 19.407, "eval_steps_per_second": 0.118, "eval_tse_ndup": 0.004456970751369127, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.033351287520319316, "eval_tse_type": 0.0006056201550387597, "step": 69000 }, { "epoch": 116.59, "learning_rate": 4.108761123022136e-05, "loss": 2.2017, "step": 69020 }, { "epoch": 116.62, "learning_rate": 4.104345365438039e-05, "loss": 2.2165, "step": 69040 }, { "epoch": 116.66, "learning_rate": 4.099930329466703e-05, "loss": 2.2083, "step": 69060 }, { "epoch": 116.69, "learning_rate": 4.095516018665244e-05, "loss": 2.2081, "step": 69080 }, { "epoch": 116.72, "learning_rate": 4.09110243659019e-05, "loss": 2.2065, "step": 69100 }, { "epoch": 116.76, "learning_rate": 4.0866895867974874e-05, "loss": 2.2027, "step": 69120 }, { "epoch": 116.79, "learning_rate": 4.082277472842488e-05, "loss": 2.208, "step": 69140 }, { "epoch": 116.82, "learning_rate": 4.077866098279952e-05, "loss": 2.1954, "step": 69160 }, { "epoch": 116.86, "learning_rate": 4.073455466664048e-05, "loss": 2.2217, "step": 69180 }, { "epoch": 116.89, "learning_rate": 4.069045581548335e-05, "loss": 2.2172, "step": 69200 }, { "epoch": 116.93, "learning_rate": 4.0646364464857854e-05, "loss": 2.1965, "step": 69220 }, { "epoch": 116.96, "learning_rate": 4.060228065028753e-05, "loss": 2.2285, "step": 69240 }, { "epoch": 116.99, "learning_rate": 4.055820440728996e-05, "loss": 2.2177, "step": 69260 }, { "epoch": 117.03, "learning_rate": 4.051413577137656e-05, "loss": 2.1659, "step": 69280 }, { "epoch": 117.06, "learning_rate": 4.047007477805263e-05, "loss": 2.1683, "step": 69300 }, { "epoch": 117.09, "learning_rate": 4.0426021462817325e-05, "loss": 2.1777, "step": 69320 }, { "epoch": 117.13, "learning_rate": 4.03819758611636e-05, "loss": 2.1729, "step": 69340 }, { "epoch": 117.16, "learning_rate": 4.0337938008578204e-05, "loss": 2.1827, "step": 69360 }, { "epoch": 117.2, "learning_rate": 4.029390794054161e-05, "loss": 2.1744, "step": 69380 }, { "epoch": 117.23, "learning_rate": 4.02498856925281e-05, "loss": 2.172, "step": 69400 }, { "epoch": 117.26, "learning_rate": 4.020587130000558e-05, "loss": 2.1886, "step": 69420 }, { "epoch": 117.3, "learning_rate": 4.0161864798435646e-05, "loss": 2.1896, "step": 69440 }, { "epoch": 117.33, "learning_rate": 4.0117866223273546e-05, "loss": 2.17, "step": 69460 }, { "epoch": 117.36, "learning_rate": 4.007387560996814e-05, "loss": 2.2075, "step": 69480 }, { "epoch": 117.4, "learning_rate": 4.002989299396187e-05, "loss": 2.1785, "step": 69500 }, { "epoch": 117.43, "learning_rate": 3.998591841069072e-05, "loss": 2.1874, "step": 69520 }, { "epoch": 117.47, "learning_rate": 3.994195189558423e-05, "loss": 2.2015, "step": 69540 }, { "epoch": 117.5, "learning_rate": 3.9897993484065435e-05, "loss": 2.1784, "step": 69560 }, { "epoch": 117.53, "learning_rate": 3.985404321155083e-05, "loss": 2.1873, "step": 69580 }, { "epoch": 117.57, "learning_rate": 3.9810101113450356e-05, "loss": 2.1961, "step": 69600 }, { "epoch": 117.6, "learning_rate": 3.976616722516735e-05, "loss": 2.1805, "step": 69620 }, { "epoch": 117.64, "learning_rate": 3.9722241582098574e-05, "loss": 2.2068, "step": 69640 }, { "epoch": 117.67, "learning_rate": 3.967832421963411e-05, "loss": 2.1996, "step": 69660 }, { "epoch": 117.7, "learning_rate": 3.963441517315738e-05, "loss": 2.2086, "step": 69680 }, { "epoch": 117.74, "learning_rate": 3.959051447804515e-05, "loss": 2.2043, "step": 69700 }, { "epoch": 117.77, "learning_rate": 3.954662216966736e-05, "loss": 2.2006, "step": 69720 }, { "epoch": 117.8, "learning_rate": 3.950273828338731e-05, "loss": 2.2012, "step": 69740 }, { "epoch": 117.84, "learning_rate": 3.945886285456138e-05, "loss": 2.2014, "step": 69760 }, { "epoch": 117.87, "learning_rate": 3.941499591853928e-05, "loss": 2.2047, "step": 69780 }, { "epoch": 117.91, "learning_rate": 3.937113751066377e-05, "loss": 2.2125, "step": 69800 }, { "epoch": 117.94, "learning_rate": 3.932728766627079e-05, "loss": 2.1946, "step": 69820 }, { "epoch": 117.97, "learning_rate": 3.9283446420689365e-05, "loss": 2.2136, "step": 69840 }, { "epoch": 118.01, "learning_rate": 3.9239613809241595e-05, "loss": 2.1912, "step": 69860 }, { "epoch": 118.04, "learning_rate": 3.919578986724263e-05, "loss": 2.1715, "step": 69880 }, { "epoch": 118.07, "learning_rate": 3.91519746300006e-05, "loss": 2.1701, "step": 69900 }, { "epoch": 118.11, "learning_rate": 3.91081681328167e-05, "loss": 2.1539, "step": 69920 }, { "epoch": 118.14, "learning_rate": 3.9064370410984976e-05, "loss": 2.16, "step": 69940 }, { "epoch": 118.18, "learning_rate": 3.902058149979252e-05, "loss": 2.1514, "step": 69960 }, { "epoch": 118.21, "learning_rate": 3.8976801434519213e-05, "loss": 2.1707, "step": 69980 }, { "epoch": 118.24, "learning_rate": 3.89330302504379e-05, "loss": 2.1637, "step": 70000 }, { "epoch": 118.24, "eval_loss": 2.547391176223755, "eval_runtime": 49.2899, "eval_samples_per_second": 20.065, "eval_steps_per_second": 0.122, "eval_tse_ndup": 0.0035748532727668705, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03135054931534757, "eval_tse_type": 0.0003428075258099138, "step": 70000 }, { "epoch": 118.28, "learning_rate": 3.888926798281422e-05, "loss": 2.1963, "step": 70020 }, { "epoch": 118.31, "learning_rate": 3.884551466690664e-05, "loss": 2.1775, "step": 70040 }, { "epoch": 118.34, "learning_rate": 3.880177033796643e-05, "loss": 2.1822, "step": 70060 }, { "epoch": 118.38, "learning_rate": 3.8758035031237565e-05, "loss": 2.1762, "step": 70080 }, { "epoch": 118.41, "learning_rate": 3.871430878195686e-05, "loss": 2.1821, "step": 70100 }, { "epoch": 118.45, "learning_rate": 3.867059162535369e-05, "loss": 2.1828, "step": 70120 }, { "epoch": 118.48, "learning_rate": 3.8626883596650226e-05, "loss": 2.1742, "step": 70140 }, { "epoch": 118.51, "learning_rate": 3.858318473106124e-05, "loss": 2.1713, "step": 70160 }, { "epoch": 118.55, "learning_rate": 3.853949506379408e-05, "loss": 2.1893, "step": 70180 }, { "epoch": 118.58, "learning_rate": 3.849581463004877e-05, "loss": 2.1871, "step": 70200 }, { "epoch": 118.61, "learning_rate": 3.84521434650178e-05, "loss": 2.207, "step": 70220 }, { "epoch": 118.65, "learning_rate": 3.840848160388628e-05, "loss": 2.1941, "step": 70240 }, { "epoch": 118.68, "learning_rate": 3.836482908183175e-05, "loss": 2.1886, "step": 70260 }, { "epoch": 118.72, "learning_rate": 3.832118593402426e-05, "loss": 2.1996, "step": 70280 }, { "epoch": 118.75, "learning_rate": 3.827755219562635e-05, "loss": 2.1953, "step": 70300 }, { "epoch": 118.78, "learning_rate": 3.823392790179288e-05, "loss": 2.1966, "step": 70320 }, { "epoch": 118.82, "learning_rate": 3.819031308767119e-05, "loss": 2.1935, "step": 70340 }, { "epoch": 118.85, "learning_rate": 3.814670778840094e-05, "loss": 2.2139, "step": 70360 }, { "epoch": 118.89, "learning_rate": 3.810311203911412e-05, "loss": 2.205, "step": 70380 }, { "epoch": 118.92, "learning_rate": 3.8059525874935045e-05, "loss": 2.1983, "step": 70400 }, { "epoch": 118.95, "learning_rate": 3.8015949330980296e-05, "loss": 2.1951, "step": 70420 }, { "epoch": 118.99, "learning_rate": 3.797238244235874e-05, "loss": 2.1979, "step": 70440 }, { "epoch": 119.02, "learning_rate": 3.792882524417137e-05, "loss": 2.18, "step": 70460 }, { "epoch": 119.05, "learning_rate": 3.7885277771511495e-05, "loss": 2.1545, "step": 70480 }, { "epoch": 119.09, "learning_rate": 3.7841740059464495e-05, "loss": 2.1536, "step": 70500 }, { "epoch": 119.12, "learning_rate": 3.7798212143107924e-05, "loss": 2.166, "step": 70520 }, { "epoch": 119.16, "learning_rate": 3.775469405751143e-05, "loss": 2.1646, "step": 70540 }, { "epoch": 119.19, "learning_rate": 3.7711185837736765e-05, "loss": 2.1532, "step": 70560 }, { "epoch": 119.22, "learning_rate": 3.7667687518837716e-05, "loss": 2.1757, "step": 70580 }, { "epoch": 119.26, "learning_rate": 3.762419913586006e-05, "loss": 2.1677, "step": 70600 }, { "epoch": 119.29, "learning_rate": 3.758072072384167e-05, "loss": 2.1733, "step": 70620 }, { "epoch": 119.32, "learning_rate": 3.753725231781223e-05, "loss": 2.1643, "step": 70640 }, { "epoch": 119.36, "learning_rate": 3.749379395279352e-05, "loss": 2.171, "step": 70660 }, { "epoch": 119.39, "learning_rate": 3.7450345663799113e-05, "loss": 2.1729, "step": 70680 }, { "epoch": 119.43, "learning_rate": 3.740690748583453e-05, "loss": 2.179, "step": 70700 }, { "epoch": 119.46, "learning_rate": 3.736347945389715e-05, "loss": 2.1875, "step": 70720 }, { "epoch": 119.49, "learning_rate": 3.732006160297611e-05, "loss": 2.184, "step": 70740 }, { "epoch": 119.53, "learning_rate": 3.7276653968052434e-05, "loss": 2.1702, "step": 70760 }, { "epoch": 119.56, "learning_rate": 3.723325658409882e-05, "loss": 2.1874, "step": 70780 }, { "epoch": 119.59, "learning_rate": 3.71898694860798e-05, "loss": 2.17, "step": 70800 }, { "epoch": 119.63, "learning_rate": 3.714649270895153e-05, "loss": 2.1942, "step": 70820 }, { "epoch": 119.66, "learning_rate": 3.7103126287661935e-05, "loss": 2.1807, "step": 70840 }, { "epoch": 119.7, "learning_rate": 3.7059770257150555e-05, "loss": 2.1919, "step": 70860 }, { "epoch": 119.73, "learning_rate": 3.7016424652348536e-05, "loss": 2.1939, "step": 70880 }, { "epoch": 119.76, "learning_rate": 3.697308950817868e-05, "loss": 2.1762, "step": 70900 }, { "epoch": 119.8, "learning_rate": 3.69297648595553e-05, "loss": 2.1842, "step": 70920 }, { "epoch": 119.83, "learning_rate": 3.6886450741384306e-05, "loss": 2.1964, "step": 70940 }, { "epoch": 119.86, "learning_rate": 3.6843147188563075e-05, "loss": 2.2087, "step": 70960 }, { "epoch": 119.9, "learning_rate": 3.6799854235980513e-05, "loss": 2.1909, "step": 70980 }, { "epoch": 119.93, "learning_rate": 3.675657191851698e-05, "loss": 2.2071, "step": 71000 }, { "epoch": 119.93, "eval_loss": 2.5382301807403564, "eval_runtime": 47.3996, "eval_samples_per_second": 20.865, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.004431673769041034, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03141421335139171, "eval_tse_type": 0.00010747656849457508, "step": 71000 }, { "epoch": 119.97, "learning_rate": 3.671330027104425e-05, "loss": 2.1844, "step": 71020 }, { "epoch": 120.0, "learning_rate": 3.6670039328425505e-05, "loss": 2.1934, "step": 71040 }, { "epoch": 120.03, "learning_rate": 3.662678912551529e-05, "loss": 2.1381, "step": 71060 }, { "epoch": 120.07, "learning_rate": 3.658354969715955e-05, "loss": 2.1403, "step": 71080 }, { "epoch": 120.1, "learning_rate": 3.654032107819547e-05, "loss": 2.1517, "step": 71100 }, { "epoch": 120.14, "learning_rate": 3.649710330345161e-05, "loss": 2.1495, "step": 71120 }, { "epoch": 120.17, "learning_rate": 3.64538964077477e-05, "loss": 2.1716, "step": 71140 }, { "epoch": 120.2, "learning_rate": 3.641070042589478e-05, "loss": 2.1473, "step": 71160 }, { "epoch": 120.24, "learning_rate": 3.636751539269511e-05, "loss": 2.154, "step": 71180 }, { "epoch": 120.27, "learning_rate": 3.6324341342942017e-05, "loss": 2.1597, "step": 71200 }, { "epoch": 120.3, "learning_rate": 3.628117831142011e-05, "loss": 2.1724, "step": 71220 }, { "epoch": 120.34, "learning_rate": 3.623802633290504e-05, "loss": 2.1653, "step": 71240 }, { "epoch": 120.37, "learning_rate": 3.6197042222829426e-05, "loss": 2.1706, "step": 71260 }, { "epoch": 120.41, "learning_rate": 3.6153911897667496e-05, "loss": 2.1671, "step": 71280 }, { "epoch": 120.44, "learning_rate": 3.6110792728048635e-05, "loss": 2.1789, "step": 71300 }, { "epoch": 120.47, "learning_rate": 3.6067684748713235e-05, "loss": 2.1671, "step": 71320 }, { "epoch": 120.51, "learning_rate": 3.602458799439256e-05, "loss": 2.1657, "step": 71340 }, { "epoch": 120.54, "learning_rate": 3.598150249980892e-05, "loss": 2.1863, "step": 71360 }, { "epoch": 120.57, "learning_rate": 3.593842829967552e-05, "loss": 2.1698, "step": 71380 }, { "epoch": 120.61, "learning_rate": 3.5895365428696446e-05, "loss": 2.1783, "step": 71400 }, { "epoch": 120.64, "learning_rate": 3.58523139215667e-05, "loss": 2.1882, "step": 71420 }, { "epoch": 120.68, "learning_rate": 3.5809273812972074e-05, "loss": 2.1899, "step": 71440 }, { "epoch": 120.71, "learning_rate": 3.576624513758924e-05, "loss": 2.1927, "step": 71460 }, { "epoch": 120.74, "learning_rate": 3.5723227930085576e-05, "loss": 2.1816, "step": 71480 }, { "epoch": 120.78, "learning_rate": 3.568022222511931e-05, "loss": 2.191, "step": 71500 }, { "epoch": 120.81, "learning_rate": 3.563722805733937e-05, "loss": 2.1844, "step": 71520 }, { "epoch": 120.84, "learning_rate": 3.559424546138535e-05, "loss": 2.1694, "step": 71540 }, { "epoch": 120.88, "learning_rate": 3.5551274471887566e-05, "loss": 2.2022, "step": 71560 }, { "epoch": 120.91, "learning_rate": 3.550831512346695e-05, "loss": 2.1886, "step": 71580 }, { "epoch": 120.95, "learning_rate": 3.546536745073511e-05, "loss": 2.1777, "step": 71600 }, { "epoch": 120.98, "learning_rate": 3.542243148829417e-05, "loss": 2.1868, "step": 71620 }, { "epoch": 121.01, "learning_rate": 3.5379507270736865e-05, "loss": 2.1728, "step": 71640 }, { "epoch": 121.05, "learning_rate": 3.533659483264652e-05, "loss": 2.1448, "step": 71660 }, { "epoch": 121.08, "learning_rate": 3.529369420859682e-05, "loss": 2.1491, "step": 71680 }, { "epoch": 121.11, "learning_rate": 3.525080543315209e-05, "loss": 2.141, "step": 71700 }, { "epoch": 121.15, "learning_rate": 3.520792854086702e-05, "loss": 2.1434, "step": 71720 }, { "epoch": 121.18, "learning_rate": 3.516506356628675e-05, "loss": 2.1548, "step": 71740 }, { "epoch": 121.22, "learning_rate": 3.51222105439468e-05, "loss": 2.1519, "step": 71760 }, { "epoch": 121.25, "learning_rate": 3.507936950837309e-05, "loss": 2.1439, "step": 71780 }, { "epoch": 121.28, "learning_rate": 3.5036540494081886e-05, "loss": 2.1713, "step": 71800 }, { "epoch": 121.32, "learning_rate": 3.499372353557971e-05, "loss": 2.1584, "step": 71820 }, { "epoch": 121.35, "learning_rate": 3.495091866736346e-05, "loss": 2.1696, "step": 71840 }, { "epoch": 121.39, "learning_rate": 3.49081259239202e-05, "loss": 2.165, "step": 71860 }, { "epoch": 121.42, "learning_rate": 3.4865345339727307e-05, "loss": 2.1762, "step": 71880 }, { "epoch": 121.45, "learning_rate": 3.48225769492523e-05, "loss": 2.1646, "step": 71900 }, { "epoch": 121.49, "learning_rate": 3.477982078695291e-05, "loss": 2.1753, "step": 71920 }, { "epoch": 121.52, "learning_rate": 3.473707688727701e-05, "loss": 2.1753, "step": 71940 }, { "epoch": 121.55, "learning_rate": 3.4694345284662566e-05, "loss": 2.1714, "step": 71960 }, { "epoch": 121.59, "learning_rate": 3.4651626013537684e-05, "loss": 2.1697, "step": 71980 }, { "epoch": 121.62, "learning_rate": 3.460891910832049e-05, "loss": 2.1835, "step": 72000 }, { "epoch": 121.62, "eval_loss": 2.5357210636138916, "eval_runtime": 47.4231, "eval_samples_per_second": 20.855, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.003946419283396043, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.032960075494760176, "eval_tse_type": 0.0004335889225929671, "step": 72000 }, { "epoch": 121.66, "learning_rate": 3.45662246034192e-05, "loss": 2.183, "step": 72020 }, { "epoch": 121.69, "learning_rate": 3.452354253323194e-05, "loss": 2.1676, "step": 72040 }, { "epoch": 121.72, "learning_rate": 3.448087293214693e-05, "loss": 2.1642, "step": 72060 }, { "epoch": 121.76, "learning_rate": 3.443821583454231e-05, "loss": 2.1796, "step": 72080 }, { "epoch": 121.79, "learning_rate": 3.43955712747861e-05, "loss": 2.166, "step": 72100 }, { "epoch": 121.82, "learning_rate": 3.435293928723627e-05, "loss": 2.185, "step": 72120 }, { "epoch": 121.86, "learning_rate": 3.4310319906240626e-05, "loss": 2.1824, "step": 72140 }, { "epoch": 121.89, "learning_rate": 3.426771316613686e-05, "loss": 2.1815, "step": 72160 }, { "epoch": 121.93, "learning_rate": 3.4225119101252425e-05, "loss": 2.1774, "step": 72180 }, { "epoch": 121.96, "learning_rate": 3.4182537745904614e-05, "loss": 2.1728, "step": 72200 }, { "epoch": 121.99, "learning_rate": 3.4139969134400415e-05, "loss": 2.1813, "step": 72220 }, { "epoch": 122.03, "learning_rate": 3.409741330103664e-05, "loss": 2.1459, "step": 72240 }, { "epoch": 122.06, "learning_rate": 3.405487028009974e-05, "loss": 2.1341, "step": 72260 }, { "epoch": 122.09, "learning_rate": 3.401234010586583e-05, "loss": 2.1337, "step": 72280 }, { "epoch": 122.13, "learning_rate": 3.396982281260075e-05, "loss": 2.146, "step": 72300 }, { "epoch": 122.16, "learning_rate": 3.392731843455987e-05, "loss": 2.1448, "step": 72320 }, { "epoch": 122.2, "learning_rate": 3.388482700598823e-05, "loss": 2.1488, "step": 72340 }, { "epoch": 122.23, "learning_rate": 3.384234856112039e-05, "loss": 2.1569, "step": 72360 }, { "epoch": 122.26, "learning_rate": 3.379988313418046e-05, "loss": 2.1538, "step": 72380 }, { "epoch": 122.3, "learning_rate": 3.3757430759382105e-05, "loss": 2.1583, "step": 72400 }, { "epoch": 122.33, "learning_rate": 3.371499147092839e-05, "loss": 2.1588, "step": 72420 }, { "epoch": 122.36, "learning_rate": 3.3672565303011926e-05, "loss": 2.1596, "step": 72440 }, { "epoch": 122.4, "learning_rate": 3.363015228981468e-05, "loss": 2.1707, "step": 72460 }, { "epoch": 122.43, "learning_rate": 3.358775246550806e-05, "loss": 2.1598, "step": 72480 }, { "epoch": 122.47, "learning_rate": 3.354536586425283e-05, "loss": 2.1622, "step": 72500 }, { "epoch": 122.5, "learning_rate": 3.3502992520199104e-05, "loss": 2.1624, "step": 72520 }, { "epoch": 122.53, "learning_rate": 3.346063246748637e-05, "loss": 2.1513, "step": 72540 }, { "epoch": 122.57, "learning_rate": 3.3418285740243286e-05, "loss": 2.161, "step": 72560 }, { "epoch": 122.6, "learning_rate": 3.337595237258791e-05, "loss": 2.1552, "step": 72580 }, { "epoch": 122.64, "learning_rate": 3.333363239862741e-05, "loss": 2.1554, "step": 72600 }, { "epoch": 122.67, "learning_rate": 3.3291325852458274e-05, "loss": 2.157, "step": 72620 }, { "epoch": 122.7, "learning_rate": 3.3249032768166096e-05, "loss": 2.1671, "step": 72640 }, { "epoch": 122.74, "learning_rate": 3.3206753179825664e-05, "loss": 2.1841, "step": 72660 }, { "epoch": 122.77, "learning_rate": 3.3164487121500874e-05, "loss": 2.1698, "step": 72680 }, { "epoch": 122.8, "learning_rate": 3.312223462724472e-05, "loss": 2.1752, "step": 72700 }, { "epoch": 122.84, "learning_rate": 3.3079995731099285e-05, "loss": 2.1687, "step": 72720 }, { "epoch": 122.87, "learning_rate": 3.303777046709565e-05, "loss": 2.188, "step": 72740 }, { "epoch": 122.91, "learning_rate": 3.2995558869254014e-05, "loss": 2.1668, "step": 72760 }, { "epoch": 122.94, "learning_rate": 3.2953360971583436e-05, "loss": 2.1812, "step": 72780 }, { "epoch": 122.97, "learning_rate": 3.291117680808202e-05, "loss": 2.1768, "step": 72800 }, { "epoch": 123.01, "learning_rate": 3.286900641273681e-05, "loss": 2.1734, "step": 72820 }, { "epoch": 123.04, "learning_rate": 3.282684981952369e-05, "loss": 2.1362, "step": 72840 }, { "epoch": 123.07, "learning_rate": 3.278470706240751e-05, "loss": 2.1436, "step": 72860 }, { "epoch": 123.11, "learning_rate": 3.274257817534188e-05, "loss": 2.1404, "step": 72880 }, { "epoch": 123.14, "learning_rate": 3.2700463192269325e-05, "loss": 2.1367, "step": 72900 }, { "epoch": 123.18, "learning_rate": 3.2658362147121086e-05, "loss": 2.116, "step": 72920 }, { "epoch": 123.21, "learning_rate": 3.261627507381725e-05, "loss": 2.136, "step": 72940 }, { "epoch": 123.24, "learning_rate": 3.257420200626661e-05, "loss": 2.1531, "step": 72960 }, { "epoch": 123.28, "learning_rate": 3.2532142978366654e-05, "loss": 2.1544, "step": 72980 }, { "epoch": 123.31, "learning_rate": 3.24900980240036e-05, "loss": 2.1463, "step": 73000 }, { "epoch": 123.31, "eval_loss": 2.5296030044555664, "eval_runtime": 47.4683, "eval_samples_per_second": 20.835, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.004114381317956316, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03209363938463498, "eval_tse_type": 0.0007429229398547771, "step": 73000 }, { "epoch": 123.34, "learning_rate": 3.24480671770523e-05, "loss": 2.1407, "step": 73020 }, { "epoch": 123.38, "learning_rate": 3.2406050471376245e-05, "loss": 2.1516, "step": 73040 }, { "epoch": 123.41, "learning_rate": 3.236404794082754e-05, "loss": 2.1571, "step": 73060 }, { "epoch": 123.45, "learning_rate": 3.2322059619246856e-05, "loss": 2.1435, "step": 73080 }, { "epoch": 123.48, "learning_rate": 3.228008554046347e-05, "loss": 2.1523, "step": 73100 }, { "epoch": 123.51, "learning_rate": 3.223812573829506e-05, "loss": 2.1632, "step": 73120 }, { "epoch": 123.55, "learning_rate": 3.2196180246547954e-05, "loss": 2.1716, "step": 73140 }, { "epoch": 123.58, "learning_rate": 3.215424909901683e-05, "loss": 2.1545, "step": 73160 }, { "epoch": 123.61, "learning_rate": 3.2112332329484895e-05, "loss": 2.1701, "step": 73180 }, { "epoch": 123.65, "learning_rate": 3.2070429971723695e-05, "loss": 2.1641, "step": 73200 }, { "epoch": 123.68, "learning_rate": 3.2028542059493224e-05, "loss": 2.1693, "step": 73220 }, { "epoch": 123.72, "learning_rate": 3.198666862654182e-05, "loss": 2.1524, "step": 73240 }, { "epoch": 123.75, "learning_rate": 3.1944809706606124e-05, "loss": 2.1498, "step": 73260 }, { "epoch": 123.78, "learning_rate": 3.190296533341116e-05, "loss": 2.1765, "step": 73280 }, { "epoch": 123.82, "learning_rate": 3.186113554067013e-05, "loss": 2.1713, "step": 73300 }, { "epoch": 123.85, "learning_rate": 3.181932036208458e-05, "loss": 2.1694, "step": 73320 }, { "epoch": 123.89, "learning_rate": 3.177751983134423e-05, "loss": 2.178, "step": 73340 }, { "epoch": 123.92, "learning_rate": 3.1735733982127e-05, "loss": 2.1723, "step": 73360 }, { "epoch": 123.95, "learning_rate": 3.169396284809904e-05, "loss": 2.1635, "step": 73380 }, { "epoch": 123.99, "learning_rate": 3.165220646291454e-05, "loss": 2.1729, "step": 73400 }, { "epoch": 124.02, "learning_rate": 3.1610464860215904e-05, "loss": 2.1563, "step": 73420 }, { "epoch": 124.05, "learning_rate": 3.156873807363356e-05, "loss": 2.1293, "step": 73440 }, { "epoch": 124.09, "learning_rate": 3.152702613678607e-05, "loss": 2.1267, "step": 73460 }, { "epoch": 124.12, "learning_rate": 3.148532908327993e-05, "loss": 2.139, "step": 73480 }, { "epoch": 124.16, "learning_rate": 3.144364694670976e-05, "loss": 2.1172, "step": 73500 }, { "epoch": 124.19, "learning_rate": 3.1401979760658054e-05, "loss": 2.1292, "step": 73520 }, { "epoch": 124.22, "learning_rate": 3.1360327558695335e-05, "loss": 2.1394, "step": 73540 }, { "epoch": 124.26, "learning_rate": 3.1318690374380046e-05, "loss": 2.1241, "step": 73560 }, { "epoch": 124.29, "learning_rate": 3.127706824125848e-05, "loss": 2.1293, "step": 73580 }, { "epoch": 124.32, "learning_rate": 3.123546119286487e-05, "loss": 2.1459, "step": 73600 }, { "epoch": 124.36, "learning_rate": 3.119386926272124e-05, "loss": 2.1625, "step": 73620 }, { "epoch": 124.39, "learning_rate": 3.115229248433747e-05, "loss": 2.16, "step": 73640 }, { "epoch": 124.43, "learning_rate": 3.1110730891211206e-05, "loss": 2.1519, "step": 73660 }, { "epoch": 124.46, "learning_rate": 3.1069184516827887e-05, "loss": 2.1588, "step": 73680 }, { "epoch": 124.49, "learning_rate": 3.1027653394660676e-05, "loss": 2.1675, "step": 73700 }, { "epoch": 124.53, "learning_rate": 3.098613755817044e-05, "loss": 2.1515, "step": 73720 }, { "epoch": 124.56, "learning_rate": 3.094463704080575e-05, "loss": 2.1502, "step": 73740 }, { "epoch": 124.59, "learning_rate": 3.09031518760028e-05, "loss": 2.1526, "step": 73760 }, { "epoch": 124.63, "learning_rate": 3.0861682097185464e-05, "loss": 2.1701, "step": 73780 }, { "epoch": 124.66, "learning_rate": 3.0820227737765176e-05, "loss": 2.154, "step": 73800 }, { "epoch": 124.7, "learning_rate": 3.077878883114096e-05, "loss": 2.1478, "step": 73820 }, { "epoch": 124.73, "learning_rate": 3.073736541069943e-05, "loss": 2.1544, "step": 73840 }, { "epoch": 124.76, "learning_rate": 3.069595750981465e-05, "loss": 2.1489, "step": 73860 }, { "epoch": 124.8, "learning_rate": 3.065456516184824e-05, "loss": 2.1618, "step": 73880 }, { "epoch": 124.83, "learning_rate": 3.061318840014925e-05, "loss": 2.1645, "step": 73900 }, { "epoch": 124.86, "learning_rate": 3.057182725805421e-05, "loss": 2.1651, "step": 73920 }, { "epoch": 124.9, "learning_rate": 3.053048176888702e-05, "loss": 2.1648, "step": 73940 }, { "epoch": 124.93, "learning_rate": 3.0489151965958994e-05, "loss": 2.1671, "step": 73960 }, { "epoch": 124.97, "learning_rate": 3.0447837882568864e-05, "loss": 2.1627, "step": 73980 }, { "epoch": 125.0, "learning_rate": 3.0406539552002557e-05, "loss": 2.1569, "step": 74000 }, { "epoch": 125.0, "eval_loss": 2.520303726196289, "eval_runtime": 47.0319, "eval_samples_per_second": 21.028, "eval_steps_per_second": 0.128, "eval_tse_ndup": 0.004721879915856345, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03195639919245855, "eval_tse_type": 0.0003159757330637007, "step": 74000 }, { "epoch": 125.03, "learning_rate": 3.0365257007533465e-05, "loss": 2.1132, "step": 74020 }, { "epoch": 125.07, "learning_rate": 3.0323990282422122e-05, "loss": 2.1262, "step": 74040 }, { "epoch": 125.1, "learning_rate": 3.0282739409916445e-05, "loss": 2.1337, "step": 74060 }, { "epoch": 125.14, "learning_rate": 3.0241504423251477e-05, "loss": 2.1275, "step": 74080 }, { "epoch": 125.17, "learning_rate": 3.0200285355649506e-05, "loss": 2.1291, "step": 74100 }, { "epoch": 125.2, "learning_rate": 3.0159082240320013e-05, "loss": 2.136, "step": 74120 }, { "epoch": 125.24, "learning_rate": 3.0117895110459583e-05, "loss": 2.1255, "step": 74140 }, { "epoch": 125.27, "learning_rate": 3.0076723999251953e-05, "loss": 2.1314, "step": 74160 }, { "epoch": 125.3, "learning_rate": 3.003556893986792e-05, "loss": 2.1267, "step": 74180 }, { "epoch": 125.34, "learning_rate": 2.9994429965465427e-05, "loss": 2.1344, "step": 74200 }, { "epoch": 125.37, "learning_rate": 2.9953307109189332e-05, "loss": 2.1353, "step": 74220 }, { "epoch": 125.41, "learning_rate": 2.9912200404171618e-05, "loss": 2.1274, "step": 74240 }, { "epoch": 125.44, "learning_rate": 2.9871109883531228e-05, "loss": 2.1505, "step": 74260 }, { "epoch": 125.47, "learning_rate": 2.9830035580374022e-05, "loss": 2.1448, "step": 74280 }, { "epoch": 125.51, "learning_rate": 2.9788977527792842e-05, "loss": 2.1497, "step": 74300 }, { "epoch": 125.54, "learning_rate": 2.9747935758867408e-05, "loss": 2.146, "step": 74320 }, { "epoch": 125.57, "learning_rate": 2.9706910306664337e-05, "loss": 2.1499, "step": 74340 }, { "epoch": 125.61, "learning_rate": 2.9665901204237085e-05, "loss": 2.1461, "step": 74360 }, { "epoch": 125.64, "learning_rate": 2.9624908484625957e-05, "loss": 2.175, "step": 74380 }, { "epoch": 125.68, "learning_rate": 2.9583932180858066e-05, "loss": 2.1532, "step": 74400 }, { "epoch": 125.71, "learning_rate": 2.9542972325947238e-05, "loss": 2.1518, "step": 74420 }, { "epoch": 125.74, "learning_rate": 2.9502028952894122e-05, "loss": 2.1628, "step": 74440 }, { "epoch": 125.78, "learning_rate": 2.9461102094686026e-05, "loss": 2.154, "step": 74460 }, { "epoch": 125.81, "learning_rate": 2.9420191784297014e-05, "loss": 2.1549, "step": 74480 }, { "epoch": 125.84, "learning_rate": 2.9379298054687747e-05, "loss": 2.1616, "step": 74500 }, { "epoch": 125.88, "learning_rate": 2.9338420938805577e-05, "loss": 2.16, "step": 74520 }, { "epoch": 125.91, "learning_rate": 2.9297560469584494e-05, "loss": 2.1573, "step": 74540 }, { "epoch": 125.95, "learning_rate": 2.9256716679944983e-05, "loss": 2.1604, "step": 74560 }, { "epoch": 125.98, "learning_rate": 2.9215889602794188e-05, "loss": 2.1683, "step": 74580 }, { "epoch": 126.01, "learning_rate": 2.917507927102573e-05, "loss": 2.1477, "step": 74600 }, { "epoch": 126.05, "learning_rate": 2.9134285717519772e-05, "loss": 2.1189, "step": 74620 }, { "epoch": 126.08, "learning_rate": 2.9093508975142896e-05, "loss": 2.1301, "step": 74640 }, { "epoch": 126.11, "learning_rate": 2.9052749076748264e-05, "loss": 2.1255, "step": 74660 }, { "epoch": 126.15, "learning_rate": 2.9012006055175322e-05, "loss": 2.1163, "step": 74680 }, { "epoch": 126.18, "learning_rate": 2.897127994325002e-05, "loss": 2.1249, "step": 74700 }, { "epoch": 126.22, "learning_rate": 2.8930570773784643e-05, "loss": 2.1243, "step": 74720 }, { "epoch": 126.25, "learning_rate": 2.8889878579577835e-05, "loss": 2.1459, "step": 74740 }, { "epoch": 126.28, "learning_rate": 2.884920339341457e-05, "loss": 2.1288, "step": 74760 }, { "epoch": 126.32, "learning_rate": 2.8808545248066087e-05, "loss": 2.1312, "step": 74780 }, { "epoch": 126.35, "learning_rate": 2.876790417628994e-05, "loss": 2.1255, "step": 74800 }, { "epoch": 126.39, "learning_rate": 2.87272802108299e-05, "loss": 2.1151, "step": 74820 }, { "epoch": 126.42, "learning_rate": 2.8686673384415956e-05, "loss": 2.1279, "step": 74840 }, { "epoch": 126.45, "learning_rate": 2.8646083729764306e-05, "loss": 2.1323, "step": 74860 }, { "epoch": 126.49, "learning_rate": 2.860551127957729e-05, "loss": 2.1408, "step": 74880 }, { "epoch": 126.52, "learning_rate": 2.8564956066543435e-05, "loss": 2.1489, "step": 74900 }, { "epoch": 126.55, "learning_rate": 2.8524418123337294e-05, "loss": 2.1326, "step": 74920 }, { "epoch": 126.59, "learning_rate": 2.8483897482619565e-05, "loss": 2.1237, "step": 74940 }, { "epoch": 126.62, "learning_rate": 2.8443394177037014e-05, "loss": 2.1549, "step": 74960 }, { "epoch": 126.66, "learning_rate": 2.8402908239222413e-05, "loss": 2.1481, "step": 74980 }, { "epoch": 126.69, "learning_rate": 2.8362439701794574e-05, "loss": 2.1478, "step": 75000 }, { "epoch": 126.69, "eval_loss": 2.51645827293396, "eval_runtime": 47.3559, "eval_samples_per_second": 20.884, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.004002848352361593, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03201635846077423, "eval_tse_type": 0.0004039481567263266, "step": 75000 }, { "epoch": 126.72, "learning_rate": 2.8321988597358207e-05, "loss": 2.1553, "step": 75020 }, { "epoch": 126.76, "learning_rate": 2.828155495850412e-05, "loss": 2.1474, "step": 75040 }, { "epoch": 126.79, "learning_rate": 2.8241138817808888e-05, "loss": 2.1534, "step": 75060 }, { "epoch": 126.82, "learning_rate": 2.8200740207835107e-05, "loss": 2.1607, "step": 75080 }, { "epoch": 126.86, "learning_rate": 2.81603591611312e-05, "loss": 2.1395, "step": 75100 }, { "epoch": 126.89, "learning_rate": 2.811999571023144e-05, "loss": 2.1534, "step": 75120 }, { "epoch": 126.93, "learning_rate": 2.807964988765596e-05, "loss": 2.1645, "step": 75140 }, { "epoch": 126.96, "learning_rate": 2.8039321725910595e-05, "loss": 2.1695, "step": 75160 }, { "epoch": 126.99, "learning_rate": 2.79990112574871e-05, "loss": 2.1605, "step": 75180 }, { "epoch": 127.03, "learning_rate": 2.7958718514862824e-05, "loss": 2.1235, "step": 75200 }, { "epoch": 127.06, "learning_rate": 2.7918443530500937e-05, "loss": 2.1245, "step": 75220 }, { "epoch": 127.09, "learning_rate": 2.787818633685025e-05, "loss": 2.1131, "step": 75240 }, { "epoch": 127.13, "learning_rate": 2.7837946966345262e-05, "loss": 2.1248, "step": 75260 }, { "epoch": 127.16, "learning_rate": 2.7797725451406133e-05, "loss": 2.1318, "step": 75280 }, { "epoch": 127.2, "learning_rate": 2.775752182443856e-05, "loss": 2.1272, "step": 75300 }, { "epoch": 127.23, "learning_rate": 2.771934497705564e-05, "loss": 2.1192, "step": 75320 }, { "epoch": 127.26, "learning_rate": 2.7679176324785106e-05, "loss": 2.1222, "step": 75340 }, { "epoch": 127.3, "learning_rate": 2.7639025655999024e-05, "loss": 2.1199, "step": 75360 }, { "epoch": 127.33, "learning_rate": 2.7598893003046088e-05, "loss": 2.1319, "step": 75380 }, { "epoch": 127.36, "learning_rate": 2.7558778398260442e-05, "loss": 2.1273, "step": 75400 }, { "epoch": 127.4, "learning_rate": 2.7518681873961727e-05, "loss": 2.1405, "step": 75420 }, { "epoch": 127.43, "learning_rate": 2.7478603462454944e-05, "loss": 2.1495, "step": 75440 }, { "epoch": 127.47, "learning_rate": 2.7438543196030542e-05, "loss": 2.1265, "step": 75460 }, { "epoch": 127.5, "learning_rate": 2.7398501106964427e-05, "loss": 2.133, "step": 75480 }, { "epoch": 127.53, "learning_rate": 2.7358477227517708e-05, "loss": 2.1346, "step": 75500 }, { "epoch": 127.57, "learning_rate": 2.731847158993694e-05, "loss": 2.1308, "step": 75520 }, { "epoch": 127.6, "learning_rate": 2.7278484226453926e-05, "loss": 2.1398, "step": 75540 }, { "epoch": 127.64, "learning_rate": 2.72385151692858e-05, "loss": 2.1329, "step": 75560 }, { "epoch": 127.67, "learning_rate": 2.7198564450634856e-05, "loss": 2.1302, "step": 75580 }, { "epoch": 127.7, "learning_rate": 2.7158632102688676e-05, "loss": 2.1397, "step": 75600 }, { "epoch": 127.74, "learning_rate": 2.7118718157620076e-05, "loss": 2.1417, "step": 75620 }, { "epoch": 127.77, "learning_rate": 2.708081698476026e-05, "loss": 2.1446, "step": 75640 }, { "epoch": 127.8, "learning_rate": 2.704093901778363e-05, "loss": 2.1366, "step": 75660 }, { "epoch": 127.84, "learning_rate": 2.7001079548507736e-05, "loss": 2.1504, "step": 75680 }, { "epoch": 127.87, "learning_rate": 2.6961238609046646e-05, "loss": 2.1425, "step": 75700 }, { "epoch": 127.91, "learning_rate": 2.6921416231499498e-05, "loss": 2.1452, "step": 75720 }, { "epoch": 127.94, "learning_rate": 2.6881612447950423e-05, "loss": 2.1481, "step": 75740 }, { "epoch": 127.97, "learning_rate": 2.684182729046863e-05, "loss": 2.1524, "step": 75760 }, { "epoch": 128.01, "learning_rate": 2.68020607911083e-05, "loss": 2.1465, "step": 75780 }, { "epoch": 128.04, "learning_rate": 2.676231298190861e-05, "loss": 2.1048, "step": 75800 }, { "epoch": 128.07, "learning_rate": 2.6722583894893582e-05, "loss": 2.1034, "step": 75820 }, { "epoch": 128.11, "learning_rate": 2.6682873562072298e-05, "loss": 2.1174, "step": 75840 }, { "epoch": 128.14, "learning_rate": 2.664318201543864e-05, "loss": 2.1147, "step": 75860 }, { "epoch": 128.18, "learning_rate": 2.660350928697134e-05, "loss": 2.1131, "step": 75880 }, { "epoch": 128.21, "learning_rate": 2.656385540863402e-05, "loss": 2.1176, "step": 75900 }, { "epoch": 128.24, "learning_rate": 2.6524220412375077e-05, "loss": 2.1202, "step": 75920 }, { "epoch": 128.28, "learning_rate": 2.6484604330127755e-05, "loss": 2.1247, "step": 75940 }, { "epoch": 128.31, "learning_rate": 2.6445007193809945e-05, "loss": 2.1292, "step": 75960 }, { "epoch": 128.34, "learning_rate": 2.6405429035324403e-05, "loss": 2.1214, "step": 75980 }, { "epoch": 128.38, "learning_rate": 2.6365869886558537e-05, "loss": 2.1315, "step": 76000 }, { "epoch": 128.38, "eval_loss": 2.513632297515869, "eval_runtime": 47.4453, "eval_samples_per_second": 20.845, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.0034698624169113494, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03335803456619464, "eval_tse_type": 0.00045816481294236604, "step": 76000 }, { "epoch": 128.41, "learning_rate": 2.6326329779384395e-05, "loss": 2.125, "step": 76020 }, { "epoch": 128.45, "learning_rate": 2.6286808745658766e-05, "loss": 2.1255, "step": 76040 }, { "epoch": 128.48, "learning_rate": 2.6247306817223007e-05, "loss": 2.1307, "step": 76060 }, { "epoch": 128.51, "learning_rate": 2.6207824025903137e-05, "loss": 2.1304, "step": 76080 }, { "epoch": 128.55, "learning_rate": 2.6168360403509707e-05, "loss": 2.1369, "step": 76100 }, { "epoch": 128.58, "learning_rate": 2.6128915981837815e-05, "loss": 2.1337, "step": 76120 }, { "epoch": 128.61, "learning_rate": 2.60894907926672e-05, "loss": 2.1399, "step": 76140 }, { "epoch": 128.65, "learning_rate": 2.6050084867761954e-05, "loss": 2.1461, "step": 76160 }, { "epoch": 128.68, "learning_rate": 2.6010698238870744e-05, "loss": 2.1314, "step": 76180 }, { "epoch": 128.72, "learning_rate": 2.597133093772666e-05, "loss": 2.1386, "step": 76200 }, { "epoch": 128.75, "learning_rate": 2.5931982996047255e-05, "loss": 2.1401, "step": 76220 }, { "epoch": 128.78, "learning_rate": 2.589265444553441e-05, "loss": 2.1452, "step": 76240 }, { "epoch": 128.82, "learning_rate": 2.5853345317874445e-05, "loss": 2.1348, "step": 76260 }, { "epoch": 128.85, "learning_rate": 2.581405564473801e-05, "loss": 2.1289, "step": 76280 }, { "epoch": 128.89, "learning_rate": 2.5774785457780103e-05, "loss": 2.1505, "step": 76300 }, { "epoch": 128.92, "learning_rate": 2.5735534788640008e-05, "loss": 2.1362, "step": 76320 }, { "epoch": 128.95, "learning_rate": 2.5696303668941226e-05, "loss": 2.1446, "step": 76340 }, { "epoch": 128.99, "learning_rate": 2.5657092130291638e-05, "loss": 2.1474, "step": 76360 }, { "epoch": 129.02, "learning_rate": 2.561790020428322e-05, "loss": 2.1166, "step": 76380 }, { "epoch": 129.05, "learning_rate": 2.5578727922492206e-05, "loss": 2.1161, "step": 76400 }, { "epoch": 129.09, "learning_rate": 2.5539575316479007e-05, "loss": 2.1048, "step": 76420 }, { "epoch": 129.12, "learning_rate": 2.550044241778817e-05, "loss": 2.1115, "step": 76440 }, { "epoch": 129.16, "learning_rate": 2.546132925794838e-05, "loss": 2.1134, "step": 76460 }, { "epoch": 129.19, "learning_rate": 2.5422235868472345e-05, "loss": 2.1202, "step": 76480 }, { "epoch": 129.22, "learning_rate": 2.5383162280856986e-05, "loss": 2.1169, "step": 76500 }, { "epoch": 129.26, "learning_rate": 2.5344108526583123e-05, "loss": 2.1021, "step": 76520 }, { "epoch": 129.29, "learning_rate": 2.5305074637115677e-05, "loss": 2.1083, "step": 76540 }, { "epoch": 129.32, "learning_rate": 2.5266060643903556e-05, "loss": 2.1105, "step": 76560 }, { "epoch": 129.36, "learning_rate": 2.522706657837962e-05, "loss": 2.1248, "step": 76580 }, { "epoch": 129.39, "learning_rate": 2.5188092471960712e-05, "loss": 2.1235, "step": 76600 }, { "epoch": 129.43, "learning_rate": 2.5149138356047525e-05, "loss": 2.1311, "step": 76620 }, { "epoch": 129.46, "learning_rate": 2.5110204262024706e-05, "loss": 2.1225, "step": 76640 }, { "epoch": 129.49, "learning_rate": 2.507129022126074e-05, "loss": 2.1256, "step": 76660 }, { "epoch": 129.53, "learning_rate": 2.5032396265107984e-05, "loss": 2.1278, "step": 76680 }, { "epoch": 129.56, "learning_rate": 2.499352242490259e-05, "loss": 2.1273, "step": 76700 }, { "epoch": 129.59, "learning_rate": 2.4954668731964496e-05, "loss": 2.1306, "step": 76720 }, { "epoch": 129.63, "learning_rate": 2.491583521759746e-05, "loss": 2.1217, "step": 76740 }, { "epoch": 129.66, "learning_rate": 2.4877021913088893e-05, "loss": 2.1251, "step": 76760 }, { "epoch": 129.7, "learning_rate": 2.483822884971e-05, "loss": 2.129, "step": 76780 }, { "epoch": 129.73, "learning_rate": 2.479945605871564e-05, "loss": 2.1383, "step": 76800 }, { "epoch": 129.76, "learning_rate": 2.4760703571344363e-05, "loss": 2.131, "step": 76820 }, { "epoch": 129.8, "learning_rate": 2.4721971418818357e-05, "loss": 2.1305, "step": 76840 }, { "epoch": 129.83, "learning_rate": 2.4683259632343362e-05, "loss": 2.1351, "step": 76860 }, { "epoch": 129.86, "learning_rate": 2.464456824310885e-05, "loss": 2.1423, "step": 76880 }, { "epoch": 129.9, "learning_rate": 2.460589728228771e-05, "loss": 2.1416, "step": 76900 }, { "epoch": 129.93, "learning_rate": 2.4567246781036457e-05, "loss": 2.1385, "step": 76920 }, { "epoch": 129.97, "learning_rate": 2.45286167704951e-05, "loss": 2.1273, "step": 76940 }, { "epoch": 130.0, "learning_rate": 2.4490007281787164e-05, "loss": 2.1367, "step": 76960 }, { "epoch": 130.03, "learning_rate": 2.4451418346019576e-05, "loss": 2.0997, "step": 76980 }, { "epoch": 130.07, "learning_rate": 2.4412849994282742e-05, "loss": 2.1031, "step": 77000 }, { "epoch": 130.07, "eval_loss": 2.5073578357696533, "eval_runtime": 50.5928, "eval_samples_per_second": 19.548, "eval_steps_per_second": 0.119, "eval_tse_ndup": 0.00484687591380415, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.0312417529380153, "eval_tse_type": 0.0004991073247496958, "step": 77000 }, { "epoch": 130.1, "learning_rate": 2.437430225765055e-05, "loss": 2.1186, "step": 77020 }, { "epoch": 130.14, "learning_rate": 2.4335775167180153e-05, "loss": 2.1044, "step": 77040 }, { "epoch": 130.17, "learning_rate": 2.4297268753912172e-05, "loss": 2.105, "step": 77060 }, { "epoch": 130.2, "learning_rate": 2.425878304887047e-05, "loss": 2.1048, "step": 77080 }, { "epoch": 130.24, "learning_rate": 2.422031808306236e-05, "loss": 2.1283, "step": 77100 }, { "epoch": 130.27, "learning_rate": 2.4181873887478312e-05, "loss": 2.1099, "step": 77120 }, { "epoch": 130.3, "learning_rate": 2.4143450493092146e-05, "loss": 2.1001, "step": 77140 }, { "epoch": 130.34, "learning_rate": 2.410504793086089e-05, "loss": 2.1125, "step": 77160 }, { "epoch": 130.37, "learning_rate": 2.40666662317248e-05, "loss": 2.122, "step": 77180 }, { "epoch": 130.41, "learning_rate": 2.4028305426607333e-05, "loss": 2.1181, "step": 77200 }, { "epoch": 130.44, "learning_rate": 2.3989965546415045e-05, "loss": 2.1204, "step": 77220 }, { "epoch": 130.47, "learning_rate": 2.395164662203775e-05, "loss": 2.1112, "step": 77240 }, { "epoch": 130.51, "learning_rate": 2.3913348684348264e-05, "loss": 2.1231, "step": 77260 }, { "epoch": 130.54, "learning_rate": 2.3875071764202563e-05, "loss": 2.1233, "step": 77280 }, { "epoch": 130.57, "learning_rate": 2.383681589243967e-05, "loss": 2.1161, "step": 77300 }, { "epoch": 130.61, "learning_rate": 2.3798581099881645e-05, "loss": 2.1275, "step": 77320 }, { "epoch": 130.64, "learning_rate": 2.376036741733359e-05, "loss": 2.1325, "step": 77340 }, { "epoch": 130.68, "learning_rate": 2.3722174875583548e-05, "loss": 2.1176, "step": 77360 }, { "epoch": 130.71, "learning_rate": 2.3684003505402574e-05, "loss": 2.1128, "step": 77380 }, { "epoch": 130.74, "learning_rate": 2.3645853337544654e-05, "loss": 2.1256, "step": 77400 }, { "epoch": 130.78, "learning_rate": 2.3607724402746684e-05, "loss": 2.1345, "step": 77420 }, { "epoch": 130.81, "learning_rate": 2.3569616731728462e-05, "loss": 2.1257, "step": 77440 }, { "epoch": 130.84, "learning_rate": 2.3531530355192643e-05, "loss": 2.1385, "step": 77460 }, { "epoch": 130.88, "learning_rate": 2.3493465303824767e-05, "loss": 2.1472, "step": 77480 }, { "epoch": 130.91, "learning_rate": 2.3455421608293106e-05, "loss": 2.1227, "step": 77500 }, { "epoch": 130.95, "learning_rate": 2.3417399299248803e-05, "loss": 2.1366, "step": 77520 }, { "epoch": 130.98, "learning_rate": 2.3379398407325747e-05, "loss": 2.1243, "step": 77540 }, { "epoch": 131.01, "learning_rate": 2.334141896314057e-05, "loss": 2.1237, "step": 77560 }, { "epoch": 131.05, "learning_rate": 2.3303460997292637e-05, "loss": 2.1022, "step": 77580 }, { "epoch": 131.08, "learning_rate": 2.326552454036395e-05, "loss": 2.0869, "step": 77600 }, { "epoch": 131.11, "learning_rate": 2.3227609622919287e-05, "loss": 2.094, "step": 77620 }, { "epoch": 131.15, "learning_rate": 2.3189716275505967e-05, "loss": 2.1118, "step": 77640 }, { "epoch": 131.18, "learning_rate": 2.3151844528654e-05, "loss": 2.1038, "step": 77660 }, { "epoch": 131.22, "learning_rate": 2.311399441287595e-05, "loss": 2.0885, "step": 77680 }, { "epoch": 131.25, "learning_rate": 2.307616595866699e-05, "loss": 2.1073, "step": 77700 }, { "epoch": 131.28, "learning_rate": 2.3038359196504828e-05, "loss": 2.113, "step": 77720 }, { "epoch": 131.32, "learning_rate": 2.300057415684964e-05, "loss": 2.1125, "step": 77740 }, { "epoch": 131.35, "learning_rate": 2.2962810870144225e-05, "loss": 2.1142, "step": 77760 }, { "epoch": 131.39, "learning_rate": 2.2925069366813717e-05, "loss": 2.1199, "step": 77780 }, { "epoch": 131.42, "learning_rate": 2.288734967726579e-05, "loss": 2.1166, "step": 77800 }, { "epoch": 131.45, "learning_rate": 2.2849651831890517e-05, "loss": 2.1075, "step": 77820 }, { "epoch": 131.49, "learning_rate": 2.2811975861060368e-05, "loss": 2.1187, "step": 77840 }, { "epoch": 131.52, "learning_rate": 2.2774321795130215e-05, "loss": 2.1093, "step": 77860 }, { "epoch": 131.55, "learning_rate": 2.2736689664437217e-05, "loss": 2.1236, "step": 77880 }, { "epoch": 131.59, "learning_rate": 2.2699079499300918e-05, "loss": 2.1122, "step": 77900 }, { "epoch": 131.62, "learning_rate": 2.2661491330023154e-05, "loss": 2.1163, "step": 77920 }, { "epoch": 131.66, "learning_rate": 2.2623925186888056e-05, "loss": 2.123, "step": 77940 }, { "epoch": 131.69, "learning_rate": 2.2586381100161923e-05, "loss": 2.1282, "step": 77960 }, { "epoch": 131.72, "learning_rate": 2.2548859100093407e-05, "loss": 2.1131, "step": 77980 }, { "epoch": 131.76, "learning_rate": 2.2511359216913304e-05, "loss": 2.1234, "step": 78000 }, { "epoch": 131.76, "eval_loss": 2.5044639110565186, "eval_runtime": 47.4669, "eval_samples_per_second": 20.836, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.0041822542590794895, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.0324066122092436, "eval_tse_type": 0.0006477502527805865, "step": 78000 }, { "epoch": 131.79, "learning_rate": 2.247388148083456e-05, "loss": 2.1487, "step": 78020 }, { "epoch": 131.82, "learning_rate": 2.2436425922052324e-05, "loss": 2.1194, "step": 78040 }, { "epoch": 131.86, "learning_rate": 2.2398992570743866e-05, "loss": 2.1294, "step": 78060 }, { "epoch": 131.89, "learning_rate": 2.2361581457068574e-05, "loss": 2.1263, "step": 78080 }, { "epoch": 131.93, "learning_rate": 2.2324192611167875e-05, "loss": 2.1248, "step": 78100 }, { "epoch": 131.96, "learning_rate": 2.228682606316529e-05, "loss": 2.1209, "step": 78120 }, { "epoch": 131.99, "learning_rate": 2.224948184316642e-05, "loss": 2.1283, "step": 78140 }, { "epoch": 132.03, "learning_rate": 2.2212159981258774e-05, "loss": 2.0941, "step": 78160 }, { "epoch": 132.06, "learning_rate": 2.2174860507511924e-05, "loss": 2.0923, "step": 78180 }, { "epoch": 132.09, "learning_rate": 2.2137583451977377e-05, "loss": 2.0967, "step": 78200 }, { "epoch": 132.13, "learning_rate": 2.210032884468861e-05, "loss": 2.1014, "step": 78220 }, { "epoch": 132.16, "learning_rate": 2.2063096715660947e-05, "loss": 2.0929, "step": 78240 }, { "epoch": 132.2, "learning_rate": 2.2025887094891657e-05, "loss": 2.0848, "step": 78260 }, { "epoch": 132.23, "learning_rate": 2.1988700012359862e-05, "loss": 2.0958, "step": 78280 }, { "epoch": 132.26, "learning_rate": 2.1951535498026527e-05, "loss": 2.1127, "step": 78300 }, { "epoch": 132.3, "learning_rate": 2.1914393581834418e-05, "loss": 2.101, "step": 78320 }, { "epoch": 132.33, "learning_rate": 2.1877274293708116e-05, "loss": 2.1041, "step": 78340 }, { "epoch": 132.36, "learning_rate": 2.1840177663553974e-05, "loss": 2.1125, "step": 78360 }, { "epoch": 132.4, "learning_rate": 2.180310372126005e-05, "loss": 2.1062, "step": 78380 }, { "epoch": 132.43, "learning_rate": 2.1766052496696153e-05, "loss": 2.1232, "step": 78400 }, { "epoch": 132.47, "learning_rate": 2.1729024019713794e-05, "loss": 2.1115, "step": 78420 }, { "epoch": 132.5, "learning_rate": 2.1692018320146153e-05, "loss": 2.1086, "step": 78440 }, { "epoch": 132.53, "learning_rate": 2.165503542780806e-05, "loss": 2.104, "step": 78460 }, { "epoch": 132.57, "learning_rate": 2.1618075372495916e-05, "loss": 2.1271, "step": 78480 }, { "epoch": 132.6, "learning_rate": 2.158113818398784e-05, "loss": 2.1185, "step": 78500 }, { "epoch": 132.64, "learning_rate": 2.1544223892043406e-05, "loss": 2.1248, "step": 78520 }, { "epoch": 132.67, "learning_rate": 2.150733252640381e-05, "loss": 2.1102, "step": 78540 }, { "epoch": 132.7, "learning_rate": 2.147046411679176e-05, "loss": 2.1075, "step": 78560 }, { "epoch": 132.74, "learning_rate": 2.1433618692911467e-05, "loss": 2.1251, "step": 78580 }, { "epoch": 132.77, "learning_rate": 2.139679628444864e-05, "loss": 2.1113, "step": 78600 }, { "epoch": 132.8, "learning_rate": 2.135999692107039e-05, "loss": 2.1243, "step": 78620 }, { "epoch": 132.84, "learning_rate": 2.1323220632425316e-05, "loss": 2.1215, "step": 78640 }, { "epoch": 132.87, "learning_rate": 2.128646744814342e-05, "loss": 2.13, "step": 78660 }, { "epoch": 132.91, "learning_rate": 2.124973739783609e-05, "loss": 2.1184, "step": 78680 }, { "epoch": 132.94, "learning_rate": 2.121303051109601e-05, "loss": 2.1202, "step": 78700 }, { "epoch": 132.97, "learning_rate": 2.117634681749731e-05, "loss": 2.1065, "step": 78720 }, { "epoch": 133.01, "learning_rate": 2.1139686346595377e-05, "loss": 2.1247, "step": 78740 }, { "epoch": 133.04, "learning_rate": 2.110304912792686e-05, "loss": 2.0774, "step": 78760 }, { "epoch": 133.07, "learning_rate": 2.1066435191009715e-05, "loss": 2.0867, "step": 78780 }, { "epoch": 133.11, "learning_rate": 2.102984456534314e-05, "loss": 2.1156, "step": 78800 }, { "epoch": 133.14, "learning_rate": 2.0993277280407548e-05, "loss": 2.089, "step": 78820 }, { "epoch": 133.18, "learning_rate": 2.0956733365664495e-05, "loss": 2.0993, "step": 78840 }, { "epoch": 133.21, "learning_rate": 2.0920212850556797e-05, "loss": 2.0846, "step": 78860 }, { "epoch": 133.24, "learning_rate": 2.0883715764508383e-05, "loss": 2.0936, "step": 78880 }, { "epoch": 133.28, "learning_rate": 2.0847242136924256e-05, "loss": 2.1128, "step": 78900 }, { "epoch": 133.31, "learning_rate": 2.0810791997190577e-05, "loss": 2.093, "step": 78920 }, { "epoch": 133.34, "learning_rate": 2.0774365374674565e-05, "loss": 2.1066, "step": 78940 }, { "epoch": 133.38, "learning_rate": 2.0737962298724512e-05, "loss": 2.1072, "step": 78960 }, { "epoch": 133.41, "learning_rate": 2.0701582798669676e-05, "loss": 2.0962, "step": 78980 }, { "epoch": 133.45, "learning_rate": 2.066522690382037e-05, "loss": 2.1186, "step": 79000 }, { "epoch": 133.45, "eval_loss": 2.498960256576538, "eval_runtime": 48.7571, "eval_samples_per_second": 20.284, "eval_steps_per_second": 0.123, "eval_tse_ndup": 0.003615034085210324, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03289195640099343, "eval_tse_type": 0.00032311098298162106, "step": 79000 }, { "epoch": 133.48, "learning_rate": 2.062889464346794e-05, "loss": 2.1045, "step": 79020 }, { "epoch": 133.51, "learning_rate": 2.0592586046884566e-05, "loss": 2.107, "step": 79040 }, { "epoch": 133.55, "learning_rate": 2.0556301143323458e-05, "loss": 2.1035, "step": 79060 }, { "epoch": 133.58, "learning_rate": 2.0520039962018693e-05, "loss": 2.1125, "step": 79080 }, { "epoch": 133.61, "learning_rate": 2.0483802532185286e-05, "loss": 2.1083, "step": 79100 }, { "epoch": 133.65, "learning_rate": 2.044758888301903e-05, "loss": 2.121, "step": 79120 }, { "epoch": 133.68, "learning_rate": 2.0411399043696627e-05, "loss": 2.108, "step": 79140 }, { "epoch": 133.72, "learning_rate": 2.0375233043375586e-05, "loss": 2.1161, "step": 79160 }, { "epoch": 133.75, "learning_rate": 2.033909091119419e-05, "loss": 2.1127, "step": 79180 }, { "epoch": 133.78, "learning_rate": 2.0302972676271524e-05, "loss": 2.1105, "step": 79200 }, { "epoch": 133.82, "learning_rate": 2.0266878367707347e-05, "loss": 2.128, "step": 79220 }, { "epoch": 133.85, "learning_rate": 2.0230808014582263e-05, "loss": 2.1113, "step": 79240 }, { "epoch": 133.89, "learning_rate": 2.0194761645957444e-05, "loss": 2.1084, "step": 79260 }, { "epoch": 133.92, "learning_rate": 2.015873929087482e-05, "loss": 2.109, "step": 79280 }, { "epoch": 133.95, "learning_rate": 2.012274097835695e-05, "loss": 2.1202, "step": 79300 }, { "epoch": 133.99, "learning_rate": 2.0086766737407032e-05, "loss": 2.1194, "step": 79320 }, { "epoch": 134.02, "learning_rate": 2.0050816597008864e-05, "loss": 2.091, "step": 79340 }, { "epoch": 134.05, "learning_rate": 2.001489058612679e-05, "loss": 2.0843, "step": 79360 }, { "epoch": 134.09, "learning_rate": 1.9978988733705807e-05, "loss": 2.0809, "step": 79380 }, { "epoch": 134.12, "learning_rate": 1.994311106867134e-05, "loss": 2.0981, "step": 79400 }, { "epoch": 134.16, "learning_rate": 1.9907257619929405e-05, "loss": 2.0908, "step": 79420 }, { "epoch": 134.19, "learning_rate": 1.9871428416366432e-05, "loss": 2.0987, "step": 79440 }, { "epoch": 134.22, "learning_rate": 1.983562348684942e-05, "loss": 2.1047, "step": 79460 }, { "epoch": 134.26, "learning_rate": 1.979984286022574e-05, "loss": 2.102, "step": 79480 }, { "epoch": 134.29, "learning_rate": 1.9764086565323177e-05, "loss": 2.1032, "step": 79500 }, { "epoch": 134.32, "learning_rate": 1.9728354630949936e-05, "loss": 2.0998, "step": 79520 }, { "epoch": 134.36, "learning_rate": 1.96926470858946e-05, "loss": 2.0928, "step": 79540 }, { "epoch": 134.39, "learning_rate": 1.9656963958926105e-05, "loss": 2.105, "step": 79560 }, { "epoch": 134.43, "learning_rate": 1.9621305278793656e-05, "loss": 2.1176, "step": 79580 }, { "epoch": 134.46, "learning_rate": 1.9585671074226858e-05, "loss": 2.0935, "step": 79600 }, { "epoch": 134.49, "learning_rate": 1.955006137393554e-05, "loss": 2.0976, "step": 79620 }, { "epoch": 134.53, "learning_rate": 1.951625488187545e-05, "loss": 2.0994, "step": 79640 }, { "epoch": 134.56, "learning_rate": 1.948069304742313e-05, "loss": 2.1019, "step": 79660 }, { "epoch": 134.59, "learning_rate": 1.944515580182522e-05, "loss": 2.1038, "step": 79680 }, { "epoch": 134.63, "learning_rate": 1.940964317371337e-05, "loss": 2.1065, "step": 79700 }, { "epoch": 134.66, "learning_rate": 1.9374155191699496e-05, "loss": 2.09, "step": 79720 }, { "epoch": 134.7, "learning_rate": 1.9338691884375605e-05, "loss": 2.1042, "step": 79740 }, { "epoch": 134.73, "learning_rate": 1.9303253280313872e-05, "loss": 2.1115, "step": 79760 }, { "epoch": 134.76, "learning_rate": 1.9267839408066507e-05, "loss": 2.1009, "step": 79780 }, { "epoch": 134.8, "learning_rate": 1.9232450296165838e-05, "loss": 2.112, "step": 79800 }, { "epoch": 134.83, "learning_rate": 1.919708597312424e-05, "loss": 2.1046, "step": 79820 }, { "epoch": 134.86, "learning_rate": 1.9161746467434104e-05, "loss": 2.1017, "step": 79840 }, { "epoch": 134.9, "learning_rate": 1.912643180756785e-05, "loss": 2.1197, "step": 79860 }, { "epoch": 134.93, "learning_rate": 1.9091142021977814e-05, "loss": 2.124, "step": 79880 }, { "epoch": 134.97, "learning_rate": 1.9055877139096402e-05, "loss": 2.1029, "step": 79900 }, { "epoch": 135.0, "learning_rate": 1.9020637187335844e-05, "loss": 2.1145, "step": 79920 }, { "epoch": 135.03, "learning_rate": 1.8985422195088347e-05, "loss": 2.0876, "step": 79940 }, { "epoch": 135.07, "learning_rate": 1.8950232190726003e-05, "loss": 2.0874, "step": 79960 }, { "epoch": 135.1, "learning_rate": 1.8915067202600755e-05, "loss": 2.0823, "step": 79980 }, { "epoch": 135.14, "learning_rate": 1.8879927259044416e-05, "loss": 2.095, "step": 80000 }, { "epoch": 135.14, "eval_loss": 2.4958672523498535, "eval_runtime": 47.6442, "eval_samples_per_second": 20.758, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.0038424369886962398, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03117974620576516, "eval_tse_type": 0.000579288843950118, "step": 80000 }, { "epoch": 135.17, "learning_rate": 1.8844812388368556e-05, "loss": 2.0736, "step": 80020 }, { "epoch": 135.2, "learning_rate": 1.8809722618864657e-05, "loss": 2.0829, "step": 80040 }, { "epoch": 135.24, "learning_rate": 1.877465797880386e-05, "loss": 2.0836, "step": 80060 }, { "epoch": 135.27, "learning_rate": 1.873961849643714e-05, "loss": 2.1014, "step": 80080 }, { "epoch": 135.3, "learning_rate": 1.8704604199995156e-05, "loss": 2.0869, "step": 80100 }, { "epoch": 135.34, "learning_rate": 1.8669615117688316e-05, "loss": 2.1022, "step": 80120 }, { "epoch": 135.37, "learning_rate": 1.8634651277706693e-05, "loss": 2.0988, "step": 80140 }, { "epoch": 135.41, "learning_rate": 1.8599712708219992e-05, "loss": 2.1048, "step": 80160 }, { "epoch": 135.44, "learning_rate": 1.8564799437377605e-05, "loss": 2.0974, "step": 80180 }, { "epoch": 135.47, "learning_rate": 1.8529911493308526e-05, "loss": 2.0889, "step": 80200 }, { "epoch": 135.51, "learning_rate": 1.8495048904121338e-05, "loss": 2.0966, "step": 80220 }, { "epoch": 135.54, "learning_rate": 1.84602116979042e-05, "loss": 2.0896, "step": 80240 }, { "epoch": 135.57, "learning_rate": 1.8425399902724817e-05, "loss": 2.0981, "step": 80260 }, { "epoch": 135.61, "learning_rate": 1.8390613546630448e-05, "loss": 2.1059, "step": 80280 }, { "epoch": 135.64, "learning_rate": 1.835585265764779e-05, "loss": 2.1071, "step": 80300 }, { "epoch": 135.68, "learning_rate": 1.832111726378308e-05, "loss": 2.0962, "step": 80320 }, { "epoch": 135.71, "learning_rate": 1.8286407393022008e-05, "loss": 2.1241, "step": 80340 }, { "epoch": 135.74, "learning_rate": 1.8251723073329685e-05, "loss": 2.1039, "step": 80360 }, { "epoch": 135.78, "learning_rate": 1.8217064332650652e-05, "loss": 2.1091, "step": 80380 }, { "epoch": 135.81, "learning_rate": 1.8182431198908783e-05, "loss": 2.0984, "step": 80400 }, { "epoch": 135.84, "learning_rate": 1.8147823700007444e-05, "loss": 2.1158, "step": 80420 }, { "epoch": 135.88, "learning_rate": 1.8113241863829204e-05, "loss": 2.1005, "step": 80440 }, { "epoch": 135.91, "learning_rate": 1.8078685718236054e-05, "loss": 2.1054, "step": 80460 }, { "epoch": 135.95, "learning_rate": 1.8044155291069255e-05, "loss": 2.1138, "step": 80480 }, { "epoch": 135.98, "learning_rate": 1.8009650610149343e-05, "loss": 2.0984, "step": 80500 }, { "epoch": 136.01, "learning_rate": 1.7975171703276133e-05, "loss": 2.0964, "step": 80520 }, { "epoch": 136.05, "learning_rate": 1.794071859822862e-05, "loss": 2.0772, "step": 80540 }, { "epoch": 136.08, "learning_rate": 1.7906291322765097e-05, "loss": 2.079, "step": 80560 }, { "epoch": 136.11, "learning_rate": 1.787188990462296e-05, "loss": 2.0747, "step": 80580 }, { "epoch": 136.15, "learning_rate": 1.7837514371518837e-05, "loss": 2.0938, "step": 80600 }, { "epoch": 136.18, "learning_rate": 1.7803164751148432e-05, "loss": 2.0796, "step": 80620 }, { "epoch": 136.22, "learning_rate": 1.7768841071186676e-05, "loss": 2.0693, "step": 80640 }, { "epoch": 136.25, "learning_rate": 1.7734543359287485e-05, "loss": 2.0829, "step": 80660 }, { "epoch": 136.28, "learning_rate": 1.7700271643083925e-05, "loss": 2.0851, "step": 80680 }, { "epoch": 136.32, "learning_rate": 1.7666025950188097e-05, "loss": 2.0852, "step": 80700 }, { "epoch": 136.35, "learning_rate": 1.7631806308191145e-05, "loss": 2.0868, "step": 80720 }, { "epoch": 136.39, "learning_rate": 1.7597612744663224e-05, "loss": 2.0901, "step": 80740 }, { "epoch": 136.42, "learning_rate": 1.7563445287153424e-05, "loss": 2.0945, "step": 80760 }, { "epoch": 136.45, "learning_rate": 1.7529303963189913e-05, "loss": 2.0927, "step": 80780 }, { "epoch": 136.49, "learning_rate": 1.7495188800279695e-05, "loss": 2.0838, "step": 80800 }, { "epoch": 136.52, "learning_rate": 1.7461099825908754e-05, "loss": 2.0936, "step": 80820 }, { "epoch": 136.55, "learning_rate": 1.7427037067541955e-05, "loss": 2.1008, "step": 80840 }, { "epoch": 136.59, "learning_rate": 1.7393000552623056e-05, "loss": 2.0923, "step": 80860 }, { "epoch": 136.62, "learning_rate": 1.7358990308574676e-05, "loss": 2.0969, "step": 80880 }, { "epoch": 136.66, "learning_rate": 1.732500636279822e-05, "loss": 2.0988, "step": 80900 }, { "epoch": 136.69, "learning_rate": 1.7291048742673954e-05, "loss": 2.0989, "step": 80920 }, { "epoch": 136.72, "learning_rate": 1.7257117475560923e-05, "loss": 2.1037, "step": 80940 }, { "epoch": 136.76, "learning_rate": 1.7223212588796933e-05, "loss": 2.1032, "step": 80960 }, { "epoch": 136.79, "learning_rate": 1.718933410969854e-05, "loss": 2.1054, "step": 80980 }, { "epoch": 136.82, "learning_rate": 1.7155482065561024e-05, "loss": 2.0961, "step": 81000 }, { "epoch": 136.82, "eval_loss": 2.489452362060547, "eval_runtime": 47.4712, "eval_samples_per_second": 20.834, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.003943252625860958, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03198987826687528, "eval_tse_type": 0.00048107834873431016, "step": 81000 }, { "epoch": 136.86, "learning_rate": 1.7121656483658383e-05, "loss": 2.1133, "step": 81020 }, { "epoch": 136.89, "learning_rate": 1.7087857391243246e-05, "loss": 2.1203, "step": 81040 }, { "epoch": 136.93, "learning_rate": 1.7054084815546933e-05, "loss": 2.1088, "step": 81060 }, { "epoch": 136.96, "learning_rate": 1.7020338783779414e-05, "loss": 2.1037, "step": 81080 }, { "epoch": 136.99, "learning_rate": 1.698661932312926e-05, "loss": 2.1095, "step": 81100 }, { "epoch": 137.03, "learning_rate": 1.6952926460763636e-05, "loss": 2.0793, "step": 81120 }, { "epoch": 137.06, "learning_rate": 1.6919260223828226e-05, "loss": 2.0671, "step": 81140 }, { "epoch": 137.09, "learning_rate": 1.6885620639447375e-05, "loss": 2.0802, "step": 81160 }, { "epoch": 137.13, "learning_rate": 1.6852007734723846e-05, "loss": 2.0895, "step": 81180 }, { "epoch": 137.16, "learning_rate": 1.6818421536738953e-05, "loss": 2.0869, "step": 81200 }, { "epoch": 137.2, "learning_rate": 1.6784862072552504e-05, "loss": 2.0769, "step": 81220 }, { "epoch": 137.23, "learning_rate": 1.6751329369202745e-05, "loss": 2.0735, "step": 81240 }, { "epoch": 137.26, "learning_rate": 1.6717823453706382e-05, "loss": 2.085, "step": 81260 }, { "epoch": 137.3, "learning_rate": 1.668434435305849e-05, "loss": 2.0848, "step": 81280 }, { "epoch": 137.33, "learning_rate": 1.6650892094232624e-05, "loss": 2.0724, "step": 81300 }, { "epoch": 137.36, "learning_rate": 1.661746670418063e-05, "loss": 2.0938, "step": 81320 }, { "epoch": 137.4, "learning_rate": 1.6584068209832743e-05, "loss": 2.1014, "step": 81340 }, { "epoch": 137.43, "learning_rate": 1.655069663809754e-05, "loss": 2.0856, "step": 81360 }, { "epoch": 137.47, "learning_rate": 1.6517352015861892e-05, "loss": 2.0853, "step": 81380 }, { "epoch": 137.5, "learning_rate": 1.648403436999097e-05, "loss": 2.1067, "step": 81400 }, { "epoch": 137.53, "learning_rate": 1.6450743727328167e-05, "loss": 2.1053, "step": 81420 }, { "epoch": 137.57, "learning_rate": 1.641748011469517e-05, "loss": 2.0821, "step": 81440 }, { "epoch": 137.6, "learning_rate": 1.6384243558891877e-05, "loss": 2.0904, "step": 81460 }, { "epoch": 137.64, "learning_rate": 1.6351034086696386e-05, "loss": 2.0979, "step": 81480 }, { "epoch": 137.67, "learning_rate": 1.6317851724864926e-05, "loss": 2.0905, "step": 81500 }, { "epoch": 137.7, "learning_rate": 1.6284696500131975e-05, "loss": 2.0921, "step": 81520 }, { "epoch": 137.74, "learning_rate": 1.62515684392101e-05, "loss": 2.1098, "step": 81540 }, { "epoch": 137.77, "learning_rate": 1.6218467568789946e-05, "loss": 2.085, "step": 81560 }, { "epoch": 137.8, "learning_rate": 1.6185393915540308e-05, "loss": 2.0945, "step": 81580 }, { "epoch": 137.84, "learning_rate": 1.6152347506108023e-05, "loss": 2.0968, "step": 81600 }, { "epoch": 137.87, "learning_rate": 1.6119328367118008e-05, "loss": 2.1083, "step": 81620 }, { "epoch": 137.91, "learning_rate": 1.608633652517315e-05, "loss": 2.0924, "step": 81640 }, { "epoch": 137.94, "learning_rate": 1.605337200685439e-05, "loss": 2.0951, "step": 81660 }, { "epoch": 137.97, "learning_rate": 1.6020434838720684e-05, "loss": 2.0926, "step": 81680 }, { "epoch": 138.01, "learning_rate": 1.5987525047308864e-05, "loss": 2.089, "step": 81700 }, { "epoch": 138.04, "learning_rate": 1.5954642659133778e-05, "loss": 2.0787, "step": 81720 }, { "epoch": 138.07, "learning_rate": 1.5921787700688166e-05, "loss": 2.0714, "step": 81740 }, { "epoch": 138.11, "learning_rate": 1.58889601984427e-05, "loss": 2.0713, "step": 81760 }, { "epoch": 138.14, "learning_rate": 1.5856160178845857e-05, "loss": 2.067, "step": 81780 }, { "epoch": 138.18, "learning_rate": 1.5825025640101894e-05, "loss": 2.0735, "step": 81800 }, { "epoch": 138.21, "learning_rate": 1.5792279287658634e-05, "loss": 2.0742, "step": 81820 }, { "epoch": 138.24, "learning_rate": 1.5759560495758075e-05, "loss": 2.0855, "step": 81840 }, { "epoch": 138.28, "learning_rate": 1.5726869290761158e-05, "loss": 2.084, "step": 81860 }, { "epoch": 138.31, "learning_rate": 1.5694205699006615e-05, "loss": 2.0832, "step": 81880 }, { "epoch": 138.34, "learning_rate": 1.56615697468109e-05, "loss": 2.0875, "step": 81900 }, { "epoch": 138.38, "learning_rate": 1.5628961460468234e-05, "loss": 2.0844, "step": 81920 }, { "epoch": 138.41, "learning_rate": 1.5596380866250465e-05, "loss": 2.0827, "step": 81940 }, { "epoch": 138.45, "learning_rate": 1.5563827990407265e-05, "loss": 2.0701, "step": 81960 }, { "epoch": 138.48, "learning_rate": 1.553130285916584e-05, "loss": 2.074, "step": 81980 }, { "epoch": 138.51, "learning_rate": 1.5498805498731144e-05, "loss": 2.0887, "step": 82000 }, { "epoch": 138.51, "eval_loss": 2.4867608547210693, "eval_runtime": 50.3039, "eval_samples_per_second": 19.66, "eval_steps_per_second": 0.119, "eval_tse_ndup": 0.0037981136751464254, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.031081833727716383, "eval_tse_type": 0.0005038057521626784, "step": 82000 }, { "epoch": 138.55, "learning_rate": 1.546633593528566e-05, "loss": 2.0885, "step": 82020 }, { "epoch": 138.58, "learning_rate": 1.5433894194989575e-05, "loss": 2.0813, "step": 82040 }, { "epoch": 138.61, "learning_rate": 1.540148030398061e-05, "loss": 2.0962, "step": 82060 }, { "epoch": 138.65, "learning_rate": 1.5369094288374026e-05, "loss": 2.0909, "step": 82080 }, { "epoch": 138.68, "learning_rate": 1.5336736174262667e-05, "loss": 2.0864, "step": 82100 }, { "epoch": 138.72, "learning_rate": 1.5304405987716876e-05, "loss": 2.0911, "step": 82120 }, { "epoch": 138.75, "learning_rate": 1.5272103754784517e-05, "loss": 2.0987, "step": 82140 }, { "epoch": 138.78, "learning_rate": 1.5239829501490871e-05, "loss": 2.0966, "step": 82160 }, { "epoch": 138.82, "learning_rate": 1.520758325383877e-05, "loss": 2.0958, "step": 82180 }, { "epoch": 138.85, "learning_rate": 1.5175365037808432e-05, "loss": 2.09, "step": 82200 }, { "epoch": 138.89, "learning_rate": 1.5143174879357452e-05, "loss": 2.0863, "step": 82220 }, { "epoch": 138.92, "learning_rate": 1.5111012804420887e-05, "loss": 2.1026, "step": 82240 }, { "epoch": 138.95, "learning_rate": 1.5078878838911137e-05, "loss": 2.1075, "step": 82260 }, { "epoch": 138.99, "learning_rate": 1.5046773008717969e-05, "loss": 2.101, "step": 82280 }, { "epoch": 139.02, "learning_rate": 1.501469533970844e-05, "loss": 2.069, "step": 82300 }, { "epoch": 139.05, "learning_rate": 1.4982645857726946e-05, "loss": 2.0658, "step": 82320 }, { "epoch": 139.09, "learning_rate": 1.495062458859523e-05, "loss": 2.0736, "step": 82340 }, { "epoch": 139.12, "learning_rate": 1.49186315581122e-05, "loss": 2.085, "step": 82360 }, { "epoch": 139.16, "learning_rate": 1.4886666792054083e-05, "loss": 2.073, "step": 82380 }, { "epoch": 139.19, "learning_rate": 1.4854730316174303e-05, "loss": 2.0727, "step": 82400 }, { "epoch": 139.22, "learning_rate": 1.482282215620352e-05, "loss": 2.0818, "step": 82420 }, { "epoch": 139.26, "learning_rate": 1.479094233784954e-05, "loss": 2.0733, "step": 82440 }, { "epoch": 139.29, "learning_rate": 1.475909088679735e-05, "loss": 2.0868, "step": 82460 }, { "epoch": 139.32, "learning_rate": 1.4727267828709134e-05, "loss": 2.0854, "step": 82480 }, { "epoch": 139.36, "learning_rate": 1.4695473189224112e-05, "loss": 2.0749, "step": 82500 }, { "epoch": 139.39, "learning_rate": 1.466370699395868e-05, "loss": 2.0767, "step": 82520 }, { "epoch": 139.43, "learning_rate": 1.463196926850624e-05, "loss": 2.0768, "step": 82540 }, { "epoch": 139.46, "learning_rate": 1.4600260038437375e-05, "loss": 2.0858, "step": 82560 }, { "epoch": 139.49, "learning_rate": 1.4568579329299582e-05, "loss": 2.0794, "step": 82580 }, { "epoch": 139.53, "learning_rate": 1.4536927166617454e-05, "loss": 2.0875, "step": 82600 }, { "epoch": 139.56, "learning_rate": 1.4505303575892564e-05, "loss": 2.0768, "step": 82620 }, { "epoch": 139.59, "learning_rate": 1.447370858260348e-05, "loss": 2.0823, "step": 82640 }, { "epoch": 139.63, "learning_rate": 1.4442142212205718e-05, "loss": 2.0902, "step": 82660 }, { "epoch": 139.66, "learning_rate": 1.4410604490131696e-05, "loss": 2.0906, "step": 82680 }, { "epoch": 139.7, "learning_rate": 1.4379095441790846e-05, "loss": 2.0819, "step": 82700 }, { "epoch": 139.73, "learning_rate": 1.4347615092569389e-05, "loss": 2.0879, "step": 82720 }, { "epoch": 139.76, "learning_rate": 1.4316163467830495e-05, "loss": 2.0897, "step": 82740 }, { "epoch": 139.8, "learning_rate": 1.4284740592914164e-05, "loss": 2.1008, "step": 82760 }, { "epoch": 139.83, "learning_rate": 1.4253346493137248e-05, "loss": 2.088, "step": 82780 }, { "epoch": 139.86, "learning_rate": 1.4221981193793415e-05, "loss": 2.0853, "step": 82800 }, { "epoch": 139.9, "learning_rate": 1.4190644720153085e-05, "loss": 2.0898, "step": 82820 }, { "epoch": 139.93, "learning_rate": 1.4159337097463515e-05, "loss": 2.0775, "step": 82840 }, { "epoch": 139.97, "learning_rate": 1.412805835094868e-05, "loss": 2.1072, "step": 82860 }, { "epoch": 140.0, "learning_rate": 1.4096808505809311e-05, "loss": 2.0776, "step": 82880 }, { "epoch": 140.03, "learning_rate": 1.4065587587222839e-05, "loss": 2.0555, "step": 82900 }, { "epoch": 140.07, "learning_rate": 1.4034395620343394e-05, "loss": 2.0714, "step": 82920 }, { "epoch": 140.1, "learning_rate": 1.40032326303018e-05, "loss": 2.0684, "step": 82940 }, { "epoch": 140.14, "learning_rate": 1.3972098642205472e-05, "loss": 2.0549, "step": 82960 }, { "epoch": 140.17, "learning_rate": 1.394099368113853e-05, "loss": 2.0696, "step": 82980 }, { "epoch": 140.2, "learning_rate": 1.390991777216168e-05, "loss": 2.0607, "step": 83000 }, { "epoch": 140.2, "eval_loss": 2.4845190048217773, "eval_runtime": 50.7394, "eval_samples_per_second": 19.492, "eval_steps_per_second": 0.118, "eval_tse_ndup": 0.004089683705445265, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.0313907972493957, "eval_tse_type": 0.0005622169893882281, "step": 83000 }, { "epoch": 140.24, "learning_rate": 1.3878870940312227e-05, "loss": 2.0765, "step": 83020 }, { "epoch": 140.27, "learning_rate": 1.3847853210604017e-05, "loss": 2.0747, "step": 83040 }, { "epoch": 140.3, "learning_rate": 1.3816864608027475e-05, "loss": 2.0634, "step": 83060 }, { "epoch": 140.34, "learning_rate": 1.37859051575496e-05, "loss": 2.0887, "step": 83080 }, { "epoch": 140.37, "learning_rate": 1.3754974884113819e-05, "loss": 2.0808, "step": 83100 }, { "epoch": 140.41, "learning_rate": 1.3724073812640109e-05, "loss": 2.0821, "step": 83120 }, { "epoch": 140.44, "learning_rate": 1.369320196802491e-05, "loss": 2.0861, "step": 83140 }, { "epoch": 140.47, "learning_rate": 1.366235937514112e-05, "loss": 2.0711, "step": 83160 }, { "epoch": 140.51, "learning_rate": 1.363154605883803e-05, "loss": 2.0828, "step": 83180 }, { "epoch": 140.54, "learning_rate": 1.3600762043941374e-05, "loss": 2.0885, "step": 83200 }, { "epoch": 140.57, "learning_rate": 1.3570007355253317e-05, "loss": 2.0764, "step": 83220 }, { "epoch": 140.61, "learning_rate": 1.3539282017552318e-05, "loss": 2.0746, "step": 83240 }, { "epoch": 140.64, "learning_rate": 1.350858605559323e-05, "loss": 2.0943, "step": 83260 }, { "epoch": 140.68, "learning_rate": 1.347791949410725e-05, "loss": 2.075, "step": 83280 }, { "epoch": 140.71, "learning_rate": 1.3447282357801877e-05, "loss": 2.0678, "step": 83300 }, { "epoch": 140.74, "learning_rate": 1.3416674671360874e-05, "loss": 2.0791, "step": 83320 }, { "epoch": 140.78, "learning_rate": 1.3386096459444314e-05, "loss": 2.0869, "step": 83340 }, { "epoch": 140.81, "learning_rate": 1.3355547746688513e-05, "loss": 2.0998, "step": 83360 }, { "epoch": 140.84, "learning_rate": 1.332502855770601e-05, "loss": 2.0936, "step": 83380 }, { "epoch": 140.88, "learning_rate": 1.3294538917085586e-05, "loss": 2.089, "step": 83400 }, { "epoch": 140.91, "learning_rate": 1.3264078849392141e-05, "loss": 2.086, "step": 83420 }, { "epoch": 140.95, "learning_rate": 1.3233648379166875e-05, "loss": 2.0927, "step": 83440 }, { "epoch": 140.98, "learning_rate": 1.320324753092701e-05, "loss": 2.0846, "step": 83460 }, { "epoch": 141.01, "learning_rate": 1.3172876329165978e-05, "loss": 2.0788, "step": 83480 }, { "epoch": 141.05, "learning_rate": 1.3142534798353318e-05, "loss": 2.0621, "step": 83500 }, { "epoch": 141.08, "learning_rate": 1.3112222962934639e-05, "loss": 2.0617, "step": 83520 }, { "epoch": 141.11, "learning_rate": 1.3081940847331659e-05, "loss": 2.0751, "step": 83540 }, { "epoch": 141.15, "learning_rate": 1.3051688475942109e-05, "loss": 2.0809, "step": 83560 }, { "epoch": 141.18, "learning_rate": 1.3021465873139782e-05, "loss": 2.0623, "step": 83580 }, { "epoch": 141.22, "learning_rate": 1.299127306327449e-05, "loss": 2.0517, "step": 83600 }, { "epoch": 141.25, "learning_rate": 1.2961110070672034e-05, "loss": 2.0704, "step": 83620 }, { "epoch": 141.28, "learning_rate": 1.293097691963419e-05, "loss": 2.0659, "step": 83640 }, { "epoch": 141.32, "learning_rate": 1.2900873634438699e-05, "loss": 2.067, "step": 83660 }, { "epoch": 141.35, "learning_rate": 1.2870800239339236e-05, "loss": 2.0792, "step": 83680 }, { "epoch": 141.39, "learning_rate": 1.2840756758565381e-05, "loss": 2.0955, "step": 83700 }, { "epoch": 141.42, "learning_rate": 1.2810743216322623e-05, "loss": 2.0694, "step": 83720 }, { "epoch": 141.45, "learning_rate": 1.2780759636792344e-05, "loss": 2.068, "step": 83740 }, { "epoch": 141.49, "learning_rate": 1.2750806044131758e-05, "loss": 2.0703, "step": 83760 }, { "epoch": 141.52, "learning_rate": 1.2720882462473943e-05, "loss": 2.0855, "step": 83780 }, { "epoch": 141.55, "learning_rate": 1.2690988915927788e-05, "loss": 2.0764, "step": 83800 }, { "epoch": 141.59, "learning_rate": 1.2661125428577997e-05, "loss": 2.0934, "step": 83820 }, { "epoch": 141.62, "learning_rate": 1.2631292024485009e-05, "loss": 2.0795, "step": 83840 }, { "epoch": 141.66, "learning_rate": 1.2601488727685078e-05, "loss": 2.0796, "step": 83860 }, { "epoch": 141.69, "learning_rate": 1.2571715562190183e-05, "loss": 2.0791, "step": 83880 }, { "epoch": 141.72, "learning_rate": 1.2541972551988024e-05, "loss": 2.0793, "step": 83900 }, { "epoch": 141.76, "learning_rate": 1.2512259721042019e-05, "loss": 2.067, "step": 83920 }, { "epoch": 141.79, "learning_rate": 1.248257709329122e-05, "loss": 2.0877, "step": 83940 }, { "epoch": 141.82, "learning_rate": 1.2452924692650442e-05, "loss": 2.0859, "step": 83960 }, { "epoch": 141.86, "learning_rate": 1.242330254301004e-05, "loss": 2.0762, "step": 83980 }, { "epoch": 141.89, "learning_rate": 1.2393710668236058e-05, "loss": 2.073, "step": 84000 }, { "epoch": 141.89, "eval_loss": 2.4791016578674316, "eval_runtime": 47.4414, "eval_samples_per_second": 20.847, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.00437610540721674, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03073634182479321, "eval_tse_type": 0.000670824119886145, "step": 84000 }, { "epoch": 141.93, "learning_rate": 1.2364149092170146e-05, "loss": 2.0908, "step": 84020 }, { "epoch": 141.96, "learning_rate": 1.2334617838629525e-05, "loss": 2.0951, "step": 84040 }, { "epoch": 141.99, "learning_rate": 1.2305116931407008e-05, "loss": 2.0695, "step": 84060 }, { "epoch": 142.03, "learning_rate": 1.2275646394270907e-05, "loss": 2.0729, "step": 84080 }, { "epoch": 142.06, "learning_rate": 1.2246206250965125e-05, "loss": 2.0615, "step": 84100 }, { "epoch": 142.09, "learning_rate": 1.2216796525209056e-05, "loss": 2.0634, "step": 84120 }, { "epoch": 142.13, "learning_rate": 1.2187417240697591e-05, "loss": 2.0627, "step": 84140 }, { "epoch": 142.16, "learning_rate": 1.2158068421101048e-05, "loss": 2.052, "step": 84160 }, { "epoch": 142.2, "learning_rate": 1.2128750090065304e-05, "loss": 2.0605, "step": 84180 }, { "epoch": 142.23, "learning_rate": 1.2099462271211558e-05, "loss": 2.0677, "step": 84200 }, { "epoch": 142.26, "learning_rate": 1.2071667126701514e-05, "loss": 2.0704, "step": 84220 }, { "epoch": 142.3, "learning_rate": 1.2042438874450202e-05, "loss": 2.0755, "step": 84240 }, { "epoch": 142.33, "learning_rate": 1.2013241203920295e-05, "loss": 2.0658, "step": 84260 }, { "epoch": 142.36, "learning_rate": 1.1984074138635825e-05, "loss": 2.0637, "step": 84280 }, { "epoch": 142.4, "learning_rate": 1.1954937702096175e-05, "loss": 2.0796, "step": 84300 }, { "epoch": 142.43, "learning_rate": 1.1925831917776043e-05, "loss": 2.0878, "step": 84320 }, { "epoch": 142.47, "learning_rate": 1.1896756809125442e-05, "loss": 2.0624, "step": 84340 }, { "epoch": 142.5, "learning_rate": 1.1867712399569642e-05, "loss": 2.0717, "step": 84360 }, { "epoch": 142.53, "learning_rate": 1.1838698712509206e-05, "loss": 2.0777, "step": 84380 }, { "epoch": 142.57, "learning_rate": 1.1809715771319946e-05, "loss": 2.0743, "step": 84400 }, { "epoch": 142.6, "learning_rate": 1.1780763599352885e-05, "loss": 2.069, "step": 84420 }, { "epoch": 142.64, "learning_rate": 1.1751842219934273e-05, "loss": 2.068, "step": 84440 }, { "epoch": 142.67, "learning_rate": 1.1722951656365538e-05, "loss": 2.0725, "step": 84460 }, { "epoch": 142.7, "learning_rate": 1.1694091931923302e-05, "loss": 2.0772, "step": 84480 }, { "epoch": 142.74, "learning_rate": 1.1665263069859295e-05, "loss": 2.0907, "step": 84500 }, { "epoch": 142.77, "learning_rate": 1.1636465093400423e-05, "loss": 2.0793, "step": 84520 }, { "epoch": 142.8, "learning_rate": 1.1607698025748697e-05, "loss": 2.0671, "step": 84540 }, { "epoch": 142.84, "learning_rate": 1.1578961890081225e-05, "loss": 2.0792, "step": 84560 }, { "epoch": 142.87, "learning_rate": 1.1550256709550206e-05, "loss": 2.0789, "step": 84580 }, { "epoch": 142.91, "learning_rate": 1.152158250728284e-05, "loss": 2.089, "step": 84600 }, { "epoch": 142.94, "learning_rate": 1.1492939306381473e-05, "loss": 2.0869, "step": 84620 }, { "epoch": 142.97, "learning_rate": 1.1464327129923369e-05, "loss": 2.0719, "step": 84640 }, { "epoch": 143.01, "learning_rate": 1.1435746000960861e-05, "loss": 2.0714, "step": 84660 }, { "epoch": 143.04, "learning_rate": 1.1407195942521248e-05, "loss": 2.0597, "step": 84680 }, { "epoch": 143.07, "learning_rate": 1.1378676977606789e-05, "loss": 2.0419, "step": 84700 }, { "epoch": 143.11, "learning_rate": 1.1350189129194722e-05, "loss": 2.058, "step": 84720 }, { "epoch": 143.14, "learning_rate": 1.1321732420237163e-05, "loss": 2.0666, "step": 84740 }, { "epoch": 143.18, "learning_rate": 1.1293306873661175e-05, "loss": 2.0583, "step": 84760 }, { "epoch": 143.21, "learning_rate": 1.1264912512368714e-05, "loss": 2.0638, "step": 84780 }, { "epoch": 143.24, "learning_rate": 1.1236549359236614e-05, "loss": 2.0691, "step": 84800 }, { "epoch": 143.28, "learning_rate": 1.120821743711652e-05, "loss": 2.0557, "step": 84820 }, { "epoch": 143.31, "learning_rate": 1.117991676883498e-05, "loss": 2.0667, "step": 84840 }, { "epoch": 143.34, "learning_rate": 1.1151647377193347e-05, "loss": 2.0707, "step": 84860 }, { "epoch": 143.38, "learning_rate": 1.1123409284967717e-05, "loss": 2.0611, "step": 84880 }, { "epoch": 143.41, "learning_rate": 1.109520251490903e-05, "loss": 2.0766, "step": 84900 }, { "epoch": 143.45, "learning_rate": 1.1067027089742976e-05, "loss": 2.0798, "step": 84920 }, { "epoch": 143.48, "learning_rate": 1.1038883032169995e-05, "loss": 2.0705, "step": 84940 }, { "epoch": 143.51, "learning_rate": 1.1010770364865203e-05, "loss": 2.0785, "step": 84960 }, { "epoch": 143.55, "learning_rate": 1.0982689110478511e-05, "loss": 2.0649, "step": 84980 }, { "epoch": 143.58, "learning_rate": 1.0954639291634482e-05, "loss": 2.0784, "step": 85000 }, { "epoch": 143.58, "eval_loss": 2.477313995361328, "eval_runtime": 47.1876, "eval_samples_per_second": 20.959, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.004345092321614098, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.030822836551891573, "eval_tse_type": 0.0006871029754330882, "step": 85000 }, { "epoch": 143.61, "learning_rate": 1.0926620930932319e-05, "loss": 2.0825, "step": 85020 }, { "epoch": 143.65, "learning_rate": 1.0898634050945915e-05, "loss": 2.0769, "step": 85040 }, { "epoch": 143.68, "learning_rate": 1.087067867422381e-05, "loss": 2.0559, "step": 85060 }, { "epoch": 143.72, "learning_rate": 1.0842754823289148e-05, "loss": 2.092, "step": 85080 }, { "epoch": 143.75, "learning_rate": 1.0814862520639641e-05, "loss": 2.0753, "step": 85100 }, { "epoch": 143.78, "learning_rate": 1.0787001788747626e-05, "loss": 2.0753, "step": 85120 }, { "epoch": 143.82, "learning_rate": 1.0759172650060018e-05, "loss": 2.0671, "step": 85140 }, { "epoch": 143.85, "learning_rate": 1.0731375126998222e-05, "loss": 2.0752, "step": 85160 }, { "epoch": 143.89, "learning_rate": 1.0703609241958212e-05, "loss": 2.0686, "step": 85180 }, { "epoch": 143.92, "learning_rate": 1.0675875017310455e-05, "loss": 2.0807, "step": 85200 }, { "epoch": 143.95, "learning_rate": 1.0648172475399931e-05, "loss": 2.0675, "step": 85220 }, { "epoch": 143.99, "learning_rate": 1.0620501638546049e-05, "loss": 2.064, "step": 85240 }, { "epoch": 144.02, "learning_rate": 1.0592862529042719e-05, "loss": 2.0738, "step": 85260 }, { "epoch": 144.05, "learning_rate": 1.0565255169158272e-05, "loss": 2.0472, "step": 85280 }, { "epoch": 144.09, "learning_rate": 1.0537679581135456e-05, "loss": 2.058, "step": 85300 }, { "epoch": 144.12, "learning_rate": 1.0510135787191444e-05, "loss": 2.0489, "step": 85320 }, { "epoch": 144.16, "learning_rate": 1.0482623809517727e-05, "loss": 2.0532, "step": 85340 }, { "epoch": 144.19, "learning_rate": 1.0455143670280265e-05, "loss": 2.0643, "step": 85360 }, { "epoch": 144.22, "learning_rate": 1.042769539161927e-05, "loss": 2.0722, "step": 85380 }, { "epoch": 144.26, "learning_rate": 1.0400278995649332e-05, "loss": 2.0695, "step": 85400 }, { "epoch": 144.29, "learning_rate": 1.0372894504459353e-05, "loss": 2.0644, "step": 85420 }, { "epoch": 144.32, "learning_rate": 1.0345541940112519e-05, "loss": 2.0736, "step": 85440 }, { "epoch": 144.36, "learning_rate": 1.031822132464631e-05, "loss": 2.0559, "step": 85460 }, { "epoch": 144.39, "learning_rate": 1.0290932680072412e-05, "loss": 2.0542, "step": 85480 }, { "epoch": 144.43, "learning_rate": 1.0263676028376856e-05, "loss": 2.0653, "step": 85500 }, { "epoch": 144.46, "learning_rate": 1.0236451391519786e-05, "loss": 2.0632, "step": 85520 }, { "epoch": 144.49, "learning_rate": 1.0209258791435621e-05, "loss": 2.0596, "step": 85540 }, { "epoch": 144.53, "learning_rate": 1.0182098250032918e-05, "loss": 2.0622, "step": 85560 }, { "epoch": 144.56, "learning_rate": 1.0154969789194468e-05, "loss": 2.0696, "step": 85580 }, { "epoch": 144.59, "learning_rate": 1.012787343077719e-05, "loss": 2.0648, "step": 85600 }, { "epoch": 144.63, "learning_rate": 1.0100809196612093e-05, "loss": 2.0664, "step": 85620 }, { "epoch": 144.66, "learning_rate": 1.0073777108504362e-05, "loss": 2.0762, "step": 85640 }, { "epoch": 144.7, "learning_rate": 1.0046777188233264e-05, "loss": 2.0692, "step": 85660 }, { "epoch": 144.73, "learning_rate": 1.0019809457552155e-05, "loss": 2.0609, "step": 85680 }, { "epoch": 144.76, "learning_rate": 9.992873938188407e-06, "loss": 2.0804, "step": 85700 }, { "epoch": 144.8, "learning_rate": 9.965970651843526e-06, "loss": 2.0863, "step": 85720 }, { "epoch": 144.83, "learning_rate": 9.939099620193004e-06, "loss": 2.0718, "step": 85740 }, { "epoch": 144.86, "learning_rate": 9.912260864886319e-06, "loss": 2.0668, "step": 85760 }, { "epoch": 144.9, "learning_rate": 9.885454407546985e-06, "loss": 2.0816, "step": 85780 }, { "epoch": 144.93, "learning_rate": 9.858680269772475e-06, "loss": 2.0645, "step": 85800 }, { "epoch": 144.97, "learning_rate": 9.83193847313425e-06, "loss": 2.0805, "step": 85820 }, { "epoch": 145.0, "learning_rate": 9.805229039177654e-06, "loss": 2.0684, "step": 85840 }, { "epoch": 145.03, "learning_rate": 9.778551989422014e-06, "loss": 2.0476, "step": 85860 }, { "epoch": 145.07, "learning_rate": 9.751907345360567e-06, "loss": 2.053, "step": 85880 }, { "epoch": 145.1, "learning_rate": 9.725295128460393e-06, "loss": 2.0607, "step": 85900 }, { "epoch": 145.14, "learning_rate": 9.698715360162492e-06, "loss": 2.0643, "step": 85920 }, { "epoch": 145.17, "learning_rate": 9.672168061881687e-06, "loss": 2.0697, "step": 85940 }, { "epoch": 145.2, "learning_rate": 9.645653255006687e-06, "loss": 2.0515, "step": 85960 }, { "epoch": 145.24, "learning_rate": 9.619170960899953e-06, "loss": 2.0523, "step": 85980 }, { "epoch": 145.27, "learning_rate": 9.592721200897804e-06, "loss": 2.0519, "step": 86000 }, { "epoch": 145.27, "eval_loss": 2.4758903980255127, "eval_runtime": 49.7273, "eval_samples_per_second": 19.888, "eval_steps_per_second": 0.121, "eval_tse_ndup": 0.0031375304058157056, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.031715138351557606, "eval_tse_type": 0.0005146297351445141, "step": 86000 }, { "epoch": 145.3, "learning_rate": 9.566303996310361e-06, "loss": 2.0626, "step": 86020 }, { "epoch": 145.34, "learning_rate": 9.539919368421456e-06, "loss": 2.0577, "step": 86040 }, { "epoch": 145.37, "learning_rate": 9.513567338488738e-06, "loss": 2.0672, "step": 86060 }, { "epoch": 145.41, "learning_rate": 9.48724792774351e-06, "loss": 2.0477, "step": 86080 }, { "epoch": 145.44, "learning_rate": 9.460961157390907e-06, "loss": 2.0584, "step": 86100 }, { "epoch": 145.47, "learning_rate": 9.434707048609664e-06, "loss": 2.0671, "step": 86120 }, { "epoch": 145.51, "learning_rate": 9.408485622552254e-06, "loss": 2.0552, "step": 86140 }, { "epoch": 145.54, "learning_rate": 9.382296900344805e-06, "loss": 2.0686, "step": 86160 }, { "epoch": 145.57, "learning_rate": 9.356140903087102e-06, "loss": 2.0663, "step": 86180 }, { "epoch": 145.61, "learning_rate": 9.330017651852569e-06, "loss": 2.0723, "step": 86200 }, { "epoch": 145.64, "learning_rate": 9.303927167688203e-06, "loss": 2.0704, "step": 86220 }, { "epoch": 145.68, "learning_rate": 9.27786947161468e-06, "loss": 2.0787, "step": 86240 }, { "epoch": 145.71, "learning_rate": 9.251844584626184e-06, "loss": 2.0757, "step": 86260 }, { "epoch": 145.74, "learning_rate": 9.227151350500151e-06, "loss": 2.0742, "step": 86280 }, { "epoch": 145.78, "learning_rate": 9.201190501511964e-06, "loss": 2.053, "step": 86300 }, { "epoch": 145.81, "learning_rate": 9.175262523387678e-06, "loss": 2.0615, "step": 86320 }, { "epoch": 145.84, "learning_rate": 9.149367437016992e-06, "loss": 2.0679, "step": 86340 }, { "epoch": 145.88, "learning_rate": 9.12350526326311e-06, "loss": 2.0665, "step": 86360 }, { "epoch": 145.91, "learning_rate": 9.09767602296272e-06, "loss": 2.0727, "step": 86380 }, { "epoch": 145.95, "learning_rate": 9.071879736925987e-06, "loss": 2.0713, "step": 86400 }, { "epoch": 145.98, "learning_rate": 9.046116425936491e-06, "loss": 2.0673, "step": 86420 }, { "epoch": 146.01, "learning_rate": 9.020386110751266e-06, "loss": 2.0721, "step": 86440 }, { "epoch": 146.05, "learning_rate": 8.994688812100776e-06, "loss": 2.0604, "step": 86460 }, { "epoch": 146.08, "learning_rate": 8.969024550688881e-06, "loss": 2.0532, "step": 86480 }, { "epoch": 146.11, "learning_rate": 8.94339334719278e-06, "loss": 2.053, "step": 86500 }, { "epoch": 146.15, "learning_rate": 8.917795222263076e-06, "loss": 2.0612, "step": 86520 }, { "epoch": 146.18, "learning_rate": 8.892230196523754e-06, "loss": 2.0486, "step": 86540 }, { "epoch": 146.22, "learning_rate": 8.866698290572051e-06, "loss": 2.0646, "step": 86560 }, { "epoch": 146.25, "learning_rate": 8.841199524978583e-06, "loss": 2.0675, "step": 86580 }, { "epoch": 146.28, "learning_rate": 8.81573392028724e-06, "loss": 2.0645, "step": 86600 }, { "epoch": 146.32, "learning_rate": 8.790301497015207e-06, "loss": 2.0607, "step": 86620 }, { "epoch": 146.35, "learning_rate": 8.764902275652914e-06, "loss": 2.05, "step": 86640 }, { "epoch": 146.39, "learning_rate": 8.739536276664063e-06, "loss": 2.0596, "step": 86660 }, { "epoch": 146.42, "learning_rate": 8.714203520485582e-06, "loss": 2.0611, "step": 86680 }, { "epoch": 146.45, "learning_rate": 8.688904027527605e-06, "loss": 2.0514, "step": 86700 }, { "epoch": 146.49, "learning_rate": 8.663637818173504e-06, "loss": 2.046, "step": 86720 }, { "epoch": 146.52, "learning_rate": 8.638404912779763e-06, "loss": 2.0686, "step": 86740 }, { "epoch": 146.55, "learning_rate": 8.613205331676133e-06, "loss": 2.0547, "step": 86760 }, { "epoch": 146.59, "learning_rate": 8.588039095165412e-06, "loss": 2.0611, "step": 86780 }, { "epoch": 146.62, "learning_rate": 8.562906223523603e-06, "loss": 2.0738, "step": 86800 }, { "epoch": 146.66, "learning_rate": 8.53780673699981e-06, "loss": 2.0655, "step": 86820 }, { "epoch": 146.69, "learning_rate": 8.512740655816232e-06, "loss": 2.0479, "step": 86840 }, { "epoch": 146.72, "learning_rate": 8.487708000168165e-06, "loss": 2.0562, "step": 86860 }, { "epoch": 146.76, "learning_rate": 8.46270879022394e-06, "loss": 2.0621, "step": 86880 }, { "epoch": 146.79, "learning_rate": 8.437743046125013e-06, "loss": 2.0607, "step": 86900 }, { "epoch": 146.82, "learning_rate": 8.412810787985797e-06, "loss": 2.0686, "step": 86920 }, { "epoch": 146.86, "learning_rate": 8.387912035893774e-06, "loss": 2.0563, "step": 86940 }, { "epoch": 146.89, "learning_rate": 8.36304680990942e-06, "loss": 2.0744, "step": 86960 }, { "epoch": 146.93, "learning_rate": 8.338215130066195e-06, "loss": 2.0676, "step": 86980 }, { "epoch": 146.96, "learning_rate": 8.313417016370556e-06, "loss": 2.0607, "step": 87000 }, { "epoch": 146.96, "eval_loss": 2.4719398021698, "eval_runtime": 47.3991, "eval_samples_per_second": 20.865, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.005302481808746275, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.030851745762280017, "eval_tse_type": 0.00048098528255252214, "step": 87000 }, { "epoch": 146.99, "learning_rate": 8.288652488801858e-06, "loss": 2.0697, "step": 87020 }, { "epoch": 147.03, "learning_rate": 8.263921567312454e-06, "loss": 2.0586, "step": 87040 }, { "epoch": 147.06, "learning_rate": 8.2392242718276e-06, "loss": 2.0528, "step": 87060 }, { "epoch": 147.09, "learning_rate": 8.21456062224546e-06, "loss": 2.0655, "step": 87080 }, { "epoch": 147.13, "learning_rate": 8.189930638437094e-06, "loss": 2.0426, "step": 87100 }, { "epoch": 147.16, "learning_rate": 8.165334340246427e-06, "loss": 2.0635, "step": 87120 }, { "epoch": 147.2, "learning_rate": 8.140771747490273e-06, "loss": 2.0533, "step": 87140 }, { "epoch": 147.23, "learning_rate": 8.116242879958236e-06, "loss": 2.0411, "step": 87160 }, { "epoch": 147.26, "learning_rate": 8.091747757412804e-06, "loss": 2.0435, "step": 87180 }, { "epoch": 147.3, "learning_rate": 8.067286399589246e-06, "loss": 2.0477, "step": 87200 }, { "epoch": 147.33, "learning_rate": 8.042858826195648e-06, "loss": 2.074, "step": 87220 }, { "epoch": 147.36, "learning_rate": 8.01846505691286e-06, "loss": 2.0472, "step": 87240 }, { "epoch": 147.4, "learning_rate": 7.99410511139448e-06, "loss": 2.0456, "step": 87260 }, { "epoch": 147.43, "learning_rate": 7.969779009266915e-06, "loss": 2.0606, "step": 87280 }, { "epoch": 147.47, "learning_rate": 7.945486770129234e-06, "loss": 2.0632, "step": 87300 }, { "epoch": 147.5, "learning_rate": 7.921228413553272e-06, "loss": 2.0693, "step": 87320 }, { "epoch": 147.53, "learning_rate": 7.897003959083538e-06, "loss": 2.0688, "step": 87340 }, { "epoch": 147.57, "learning_rate": 7.87281342623724e-06, "loss": 2.0478, "step": 87360 }, { "epoch": 147.6, "learning_rate": 7.848656834504276e-06, "loss": 2.0476, "step": 87380 }, { "epoch": 147.64, "learning_rate": 7.824534203347122e-06, "loss": 2.0542, "step": 87400 }, { "epoch": 147.67, "learning_rate": 7.800445552201013e-06, "loss": 2.0665, "step": 87420 }, { "epoch": 147.7, "learning_rate": 7.77639090047369e-06, "loss": 2.0564, "step": 87440 }, { "epoch": 147.74, "learning_rate": 7.752370267545584e-06, "loss": 2.0552, "step": 87460 }, { "epoch": 147.77, "learning_rate": 7.728383672769641e-06, "loss": 2.0633, "step": 87480 }, { "epoch": 147.8, "learning_rate": 7.704431135471473e-06, "loss": 2.0697, "step": 87500 }, { "epoch": 147.84, "learning_rate": 7.680512674949197e-06, "loss": 2.0672, "step": 87520 }, { "epoch": 147.87, "learning_rate": 7.656628310473468e-06, "loss": 2.0613, "step": 87540 }, { "epoch": 147.91, "learning_rate": 7.632778061287493e-06, "loss": 2.0639, "step": 87560 }, { "epoch": 147.94, "learning_rate": 7.608961946606996e-06, "loss": 2.0635, "step": 87580 }, { "epoch": 147.97, "learning_rate": 7.5851799856201945e-06, "loss": 2.0685, "step": 87600 }, { "epoch": 148.01, "learning_rate": 7.56143219748775e-06, "loss": 2.054, "step": 87620 }, { "epoch": 148.04, "learning_rate": 7.537718601342858e-06, "loss": 2.0421, "step": 87640 }, { "epoch": 148.07, "learning_rate": 7.514039216291147e-06, "loss": 2.041, "step": 87660 }, { "epoch": 148.11, "learning_rate": 7.490394061410638e-06, "loss": 2.0385, "step": 87680 }, { "epoch": 148.14, "learning_rate": 7.4667831557518165e-06, "loss": 2.0476, "step": 87700 }, { "epoch": 148.18, "learning_rate": 7.443206518337564e-06, "loss": 2.0508, "step": 87720 }, { "epoch": 148.21, "learning_rate": 7.419664168163165e-06, "loss": 2.0585, "step": 87740 }, { "epoch": 148.24, "learning_rate": 7.396156124196241e-06, "loss": 2.0495, "step": 87760 }, { "epoch": 148.28, "learning_rate": 7.372682405376807e-06, "loss": 2.0559, "step": 87780 }, { "epoch": 148.31, "learning_rate": 7.34924303061722e-06, "loss": 2.0514, "step": 87800 }, { "epoch": 148.34, "learning_rate": 7.325838018802156e-06, "loss": 2.059, "step": 87820 }, { "epoch": 148.38, "learning_rate": 7.302467388788614e-06, "loss": 2.0384, "step": 87840 }, { "epoch": 148.41, "learning_rate": 7.279131159405888e-06, "loss": 2.0525, "step": 87860 }, { "epoch": 148.45, "learning_rate": 7.255829349455567e-06, "loss": 2.0589, "step": 87880 }, { "epoch": 148.48, "learning_rate": 7.232561977711472e-06, "loss": 2.053, "step": 87900 }, { "epoch": 148.51, "learning_rate": 7.209329062919723e-06, "loss": 2.0661, "step": 87920 }, { "epoch": 148.55, "learning_rate": 7.186130623798648e-06, "loss": 2.0559, "step": 87940 }, { "epoch": 148.58, "learning_rate": 7.1629666790388236e-06, "loss": 2.0615, "step": 87960 }, { "epoch": 148.61, "learning_rate": 7.139837247303028e-06, "loss": 2.0704, "step": 87980 }, { "epoch": 148.65, "learning_rate": 7.11674234722619e-06, "loss": 2.0672, "step": 88000 }, { "epoch": 148.65, "eval_loss": 2.4698731899261475, "eval_runtime": 47.5167, "eval_samples_per_second": 20.814, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.003816046407276121, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03154199793926685, "eval_tse_type": 0.000774391320397387, "step": 88000 }, { "epoch": 148.68, "learning_rate": 7.093681997415508e-06, "loss": 2.062, "step": 88020 }, { "epoch": 148.72, "learning_rate": 7.070656216450239e-06, "loss": 2.0625, "step": 88040 }, { "epoch": 148.75, "learning_rate": 7.047665022881866e-06, "loss": 2.0724, "step": 88060 }, { "epoch": 148.78, "learning_rate": 7.0247084352339675e-06, "loss": 2.0601, "step": 88080 }, { "epoch": 148.82, "learning_rate": 7.001786472002259e-06, "loss": 2.0529, "step": 88100 }, { "epoch": 148.85, "learning_rate": 6.978899151654555e-06, "loss": 2.0565, "step": 88120 }, { "epoch": 148.89, "learning_rate": 6.95604649263073e-06, "loss": 2.0474, "step": 88140 }, { "epoch": 148.92, "learning_rate": 6.933228513342804e-06, "loss": 2.0644, "step": 88160 }, { "epoch": 148.95, "learning_rate": 6.910445232174772e-06, "loss": 2.0612, "step": 88180 }, { "epoch": 148.99, "learning_rate": 6.887696667482729e-06, "loss": 2.0532, "step": 88200 }, { "epoch": 149.02, "learning_rate": 6.8649828375947745e-06, "loss": 2.0399, "step": 88220 }, { "epoch": 149.05, "learning_rate": 6.8423037608110415e-06, "loss": 2.0546, "step": 88240 }, { "epoch": 149.09, "learning_rate": 6.8196594554036545e-06, "loss": 2.0431, "step": 88260 }, { "epoch": 149.12, "learning_rate": 6.797049939616701e-06, "loss": 2.0565, "step": 88280 }, { "epoch": 149.16, "learning_rate": 6.774475231666272e-06, "loss": 2.0495, "step": 88300 }, { "epoch": 149.19, "learning_rate": 6.751935349740407e-06, "loss": 2.0564, "step": 88320 }, { "epoch": 149.22, "learning_rate": 6.729430311999085e-06, "loss": 2.0444, "step": 88340 }, { "epoch": 149.26, "learning_rate": 6.706960136574175e-06, "loss": 2.0734, "step": 88360 }, { "epoch": 149.29, "learning_rate": 6.684524841569534e-06, "loss": 2.0536, "step": 88380 }, { "epoch": 149.32, "learning_rate": 6.662124445060863e-06, "loss": 2.0487, "step": 88400 }, { "epoch": 149.36, "learning_rate": 6.639758965095744e-06, "loss": 2.048, "step": 88420 }, { "epoch": 149.39, "learning_rate": 6.617428419693639e-06, "loss": 2.0559, "step": 88440 }, { "epoch": 149.43, "learning_rate": 6.595132826845879e-06, "loss": 2.0456, "step": 88460 }, { "epoch": 149.46, "learning_rate": 6.5728722045156285e-06, "loss": 2.0585, "step": 88480 }, { "epoch": 149.49, "learning_rate": 6.550646570637836e-06, "loss": 2.0504, "step": 88500 }, { "epoch": 149.53, "learning_rate": 6.528455943119305e-06, "loss": 2.046, "step": 88520 }, { "epoch": 149.56, "learning_rate": 6.506300339838656e-06, "loss": 2.045, "step": 88540 }, { "epoch": 149.59, "learning_rate": 6.484179778646216e-06, "loss": 2.0529, "step": 88560 }, { "epoch": 149.63, "learning_rate": 6.462094277364139e-06, "loss": 2.0551, "step": 88580 }, { "epoch": 149.66, "learning_rate": 6.440043853786315e-06, "loss": 2.033, "step": 88600 }, { "epoch": 149.7, "learning_rate": 6.418028525678382e-06, "loss": 2.0499, "step": 88620 }, { "epoch": 149.73, "learning_rate": 6.396048310777669e-06, "loss": 2.0713, "step": 88640 }, { "epoch": 149.76, "learning_rate": 6.374103226793243e-06, "loss": 2.0577, "step": 88660 }, { "epoch": 149.8, "learning_rate": 6.352193291405883e-06, "loss": 2.0662, "step": 88680 }, { "epoch": 149.83, "learning_rate": 6.330318522268008e-06, "loss": 2.0467, "step": 88700 }, { "epoch": 149.86, "learning_rate": 6.308478937003731e-06, "loss": 2.0531, "step": 88720 }, { "epoch": 149.9, "learning_rate": 6.28667455320881e-06, "loss": 2.0586, "step": 88740 }, { "epoch": 149.93, "learning_rate": 6.264905388450659e-06, "loss": 2.0659, "step": 88760 }, { "epoch": 149.97, "learning_rate": 6.2431714602682714e-06, "loss": 2.0599, "step": 88780 }, { "epoch": 150.0, "learning_rate": 6.221472786172294e-06, "loss": 2.0527, "step": 88800 }, { "epoch": 150.03, "learning_rate": 6.199809383644956e-06, "loss": 2.04, "step": 88820 }, { "epoch": 150.07, "learning_rate": 6.178181270140077e-06, "loss": 2.0388, "step": 88840 }, { "epoch": 150.1, "learning_rate": 6.156588463083035e-06, "loss": 2.0466, "step": 88860 }, { "epoch": 150.14, "learning_rate": 6.135030979870743e-06, "loss": 2.0452, "step": 88880 }, { "epoch": 150.17, "learning_rate": 6.113508837871718e-06, "loss": 2.0419, "step": 88900 }, { "epoch": 150.2, "learning_rate": 6.092022054425928e-06, "loss": 2.0531, "step": 88920 }, { "epoch": 150.24, "learning_rate": 6.070570646844886e-06, "loss": 2.0525, "step": 88940 }, { "epoch": 150.27, "learning_rate": 6.049154632411624e-06, "loss": 2.0425, "step": 88960 }, { "epoch": 150.3, "learning_rate": 6.027774028380623e-06, "loss": 2.0572, "step": 88980 }, { "epoch": 150.34, "learning_rate": 6.0064288519778635e-06, "loss": 2.0491, "step": 89000 }, { "epoch": 150.34, "eval_loss": 2.4691731929779053, "eval_runtime": 47.5747, "eval_samples_per_second": 20.788, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.004018707610340331, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.031120958485009845, "eval_tse_type": 0.0008390911133580498, "step": 89000 }, { "epoch": 150.37, "learning_rate": 5.985119120400745e-06, "loss": 2.0614, "step": 89020 }, { "epoch": 150.41, "learning_rate": 5.963844850818151e-06, "loss": 2.0443, "step": 89040 }, { "epoch": 150.44, "learning_rate": 5.94260606037037e-06, "loss": 2.0417, "step": 89060 }, { "epoch": 150.47, "learning_rate": 5.921402766169126e-06, "loss": 2.0667, "step": 89080 }, { "epoch": 150.51, "learning_rate": 5.900234985297498e-06, "loss": 2.0578, "step": 89100 }, { "epoch": 150.54, "learning_rate": 5.879102734810016e-06, "loss": 2.0481, "step": 89120 }, { "epoch": 150.57, "learning_rate": 5.858006031732549e-06, "loss": 2.0737, "step": 89140 }, { "epoch": 150.61, "learning_rate": 5.836944893062318e-06, "loss": 2.0507, "step": 89160 }, { "epoch": 150.64, "learning_rate": 5.8159193357679e-06, "loss": 2.0488, "step": 89180 }, { "epoch": 150.68, "learning_rate": 5.794929376789215e-06, "loss": 2.039, "step": 89200 }, { "epoch": 150.71, "learning_rate": 5.773975033037499e-06, "loss": 2.0547, "step": 89220 }, { "epoch": 150.74, "learning_rate": 5.753056321395267e-06, "loss": 2.0551, "step": 89240 }, { "epoch": 150.78, "learning_rate": 5.732173258716366e-06, "loss": 2.0427, "step": 89260 }, { "epoch": 150.81, "learning_rate": 5.711325861825906e-06, "loss": 2.0503, "step": 89280 }, { "epoch": 150.84, "learning_rate": 5.690514147520243e-06, "loss": 2.0522, "step": 89300 }, { "epoch": 150.88, "learning_rate": 5.66973813256701e-06, "loss": 2.0386, "step": 89320 }, { "epoch": 150.91, "learning_rate": 5.6489978337050555e-06, "loss": 2.0557, "step": 89340 }, { "epoch": 150.95, "learning_rate": 5.6282932676444856e-06, "loss": 2.0451, "step": 89360 }, { "epoch": 150.98, "learning_rate": 5.607624451066568e-06, "loss": 2.0659, "step": 89380 }, { "epoch": 151.01, "learning_rate": 5.586991400623798e-06, "loss": 2.064, "step": 89400 }, { "epoch": 151.05, "learning_rate": 5.566394132939884e-06, "loss": 2.0434, "step": 89420 }, { "epoch": 151.08, "learning_rate": 5.54583266460964e-06, "loss": 2.0484, "step": 89440 }, { "epoch": 151.11, "learning_rate": 5.525307012199077e-06, "loss": 2.0436, "step": 89460 }, { "epoch": 151.15, "learning_rate": 5.504817192245343e-06, "loss": 2.0486, "step": 89480 }, { "epoch": 151.18, "learning_rate": 5.484363221256733e-06, "loss": 2.0363, "step": 89500 }, { "epoch": 151.22, "learning_rate": 5.463945115712609e-06, "loss": 2.0417, "step": 89520 }, { "epoch": 151.25, "learning_rate": 5.443562892063497e-06, "loss": 2.0627, "step": 89540 }, { "epoch": 151.28, "learning_rate": 5.423216566730971e-06, "loss": 2.0468, "step": 89560 }, { "epoch": 151.32, "learning_rate": 5.4029061561077064e-06, "loss": 2.0507, "step": 89580 }, { "epoch": 151.35, "learning_rate": 5.382631676557437e-06, "loss": 2.0686, "step": 89600 }, { "epoch": 151.39, "learning_rate": 5.3623931444149235e-06, "loss": 2.0343, "step": 89620 }, { "epoch": 151.42, "learning_rate": 5.342190575986022e-06, "loss": 2.0584, "step": 89640 }, { "epoch": 151.45, "learning_rate": 5.322023987547547e-06, "loss": 2.0555, "step": 89660 }, { "epoch": 151.49, "learning_rate": 5.301893395347363e-06, "loss": 2.0492, "step": 89680 }, { "epoch": 151.52, "learning_rate": 5.281798815604327e-06, "loss": 2.0453, "step": 89700 }, { "epoch": 151.55, "learning_rate": 5.261740264508275e-06, "loss": 2.042, "step": 89720 }, { "epoch": 151.59, "learning_rate": 5.2417177582200325e-06, "loss": 2.0472, "step": 89740 }, { "epoch": 151.62, "learning_rate": 5.2217313128713415e-06, "loss": 2.046, "step": 89760 }, { "epoch": 151.66, "learning_rate": 5.20178094456496e-06, "loss": 2.0422, "step": 89780 }, { "epoch": 151.69, "learning_rate": 5.1818666693745076e-06, "loss": 2.0479, "step": 89800 }, { "epoch": 151.72, "learning_rate": 5.161988503344561e-06, "loss": 2.042, "step": 89820 }, { "epoch": 151.76, "learning_rate": 5.1421464624906155e-06, "loss": 2.0542, "step": 89840 }, { "epoch": 151.79, "learning_rate": 5.122340562799027e-06, "loss": 2.0586, "step": 89860 }, { "epoch": 151.82, "learning_rate": 5.1025708202270765e-06, "loss": 2.0389, "step": 89880 }, { "epoch": 151.86, "learning_rate": 5.0828372507028545e-06, "loss": 2.0604, "step": 89900 }, { "epoch": 151.89, "learning_rate": 5.063139870125367e-06, "loss": 2.05, "step": 89920 }, { "epoch": 151.93, "learning_rate": 5.043478694364423e-06, "loss": 2.0452, "step": 89940 }, { "epoch": 151.96, "learning_rate": 5.023853739260681e-06, "loss": 2.0501, "step": 89960 }, { "epoch": 151.99, "learning_rate": 5.0042650206256146e-06, "loss": 2.0592, "step": 89980 }, { "epoch": 152.03, "learning_rate": 4.9847125542415055e-06, "loss": 2.0495, "step": 90000 }, { "epoch": 152.03, "eval_loss": 2.4667632579803467, "eval_runtime": 47.3318, "eval_samples_per_second": 20.895, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.0038151567924477906, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.030863016647171955, "eval_tse_type": 0.0009181781128057627, "step": 90000 }, { "epoch": 152.06, "learning_rate": 4.965196355861423e-06, "loss": 2.0334, "step": 90020 }, { "epoch": 152.09, "learning_rate": 4.9457164412092025e-06, "loss": 2.0306, "step": 90040 }, { "epoch": 152.13, "learning_rate": 4.926272825979466e-06, "loss": 2.036, "step": 90060 }, { "epoch": 152.16, "learning_rate": 4.906865525837589e-06, "loss": 2.0366, "step": 90080 }, { "epoch": 152.2, "learning_rate": 4.887494556419675e-06, "loss": 2.0381, "step": 90100 }, { "epoch": 152.23, "learning_rate": 4.868159933332572e-06, "loss": 2.0472, "step": 90120 }, { "epoch": 152.26, "learning_rate": 4.8488616721538205e-06, "loss": 2.0408, "step": 90140 }, { "epoch": 152.3, "learning_rate": 4.82959978843171e-06, "loss": 2.0502, "step": 90160 }, { "epoch": 152.33, "learning_rate": 4.810374297685161e-06, "loss": 2.0595, "step": 90180 }, { "epoch": 152.36, "learning_rate": 4.791185215403821e-06, "loss": 2.0581, "step": 90200 }, { "epoch": 152.4, "learning_rate": 4.772032557047984e-06, "loss": 2.0243, "step": 90220 }, { "epoch": 152.43, "learning_rate": 4.7529163380486074e-06, "loss": 2.0409, "step": 90240 }, { "epoch": 152.47, "learning_rate": 4.734789695981407e-06, "loss": 2.0498, "step": 90260 }, { "epoch": 152.5, "learning_rate": 4.715744577999176e-06, "loss": 2.0373, "step": 90280 }, { "epoch": 152.53, "learning_rate": 4.696735944723624e-06, "loss": 2.0461, "step": 90300 }, { "epoch": 152.57, "learning_rate": 4.677763811469638e-06, "loss": 2.0276, "step": 90320 }, { "epoch": 152.6, "learning_rate": 4.658828193522735e-06, "loss": 2.0582, "step": 90340 }, { "epoch": 152.64, "learning_rate": 4.63992910613899e-06, "loss": 2.045, "step": 90360 }, { "epoch": 152.67, "learning_rate": 4.621066564545068e-06, "loss": 2.056, "step": 90380 }, { "epoch": 152.7, "learning_rate": 4.602240583938183e-06, "loss": 2.0578, "step": 90400 }, { "epoch": 152.74, "learning_rate": 4.583451179486053e-06, "loss": 2.0609, "step": 90420 }, { "epoch": 152.77, "learning_rate": 4.564698366327014e-06, "loss": 2.0539, "step": 90440 }, { "epoch": 152.8, "learning_rate": 4.545982159569822e-06, "loss": 2.0348, "step": 90460 }, { "epoch": 152.84, "learning_rate": 4.527302574293812e-06, "loss": 2.0458, "step": 90480 }, { "epoch": 152.87, "learning_rate": 4.5086596255487834e-06, "loss": 2.0601, "step": 90500 }, { "epoch": 152.91, "learning_rate": 4.490053328355015e-06, "loss": 2.0599, "step": 90520 }, { "epoch": 152.94, "learning_rate": 4.471483697703288e-06, "loss": 2.0442, "step": 90540 }, { "epoch": 152.97, "learning_rate": 4.4529507485547905e-06, "loss": 2.0572, "step": 90560 }, { "epoch": 153.01, "learning_rate": 4.434454495841195e-06, "loss": 2.0633, "step": 90580 }, { "epoch": 153.04, "learning_rate": 4.4159949544646025e-06, "loss": 2.0344, "step": 90600 }, { "epoch": 153.07, "learning_rate": 4.397572139297529e-06, "loss": 2.034, "step": 90620 }, { "epoch": 153.11, "learning_rate": 4.3791860651829e-06, "loss": 2.044, "step": 90640 }, { "epoch": 153.14, "learning_rate": 4.360836746934055e-06, "loss": 2.0489, "step": 90660 }, { "epoch": 153.18, "learning_rate": 4.342524199334702e-06, "loss": 2.0524, "step": 90680 }, { "epoch": 153.21, "learning_rate": 4.324248437138922e-06, "loss": 2.0412, "step": 90700 }, { "epoch": 153.24, "learning_rate": 4.306009475071177e-06, "loss": 2.0381, "step": 90720 }, { "epoch": 153.28, "learning_rate": 4.28780732782626e-06, "loss": 2.0374, "step": 90740 }, { "epoch": 153.31, "learning_rate": 4.269642010069319e-06, "loss": 2.0404, "step": 90760 }, { "epoch": 153.34, "learning_rate": 4.2515135364358305e-06, "loss": 2.0323, "step": 90780 }, { "epoch": 153.38, "learning_rate": 4.233421921531555e-06, "loss": 2.0501, "step": 90800 }, { "epoch": 153.41, "learning_rate": 4.215367179932605e-06, "loss": 2.0328, "step": 90820 }, { "epoch": 153.45, "learning_rate": 4.197349326185346e-06, "loss": 2.0545, "step": 90840 }, { "epoch": 153.48, "learning_rate": 4.179368374806436e-06, "loss": 2.0653, "step": 90860 }, { "epoch": 153.51, "learning_rate": 4.161424340282804e-06, "loss": 2.053, "step": 90880 }, { "epoch": 153.55, "learning_rate": 4.143517237071642e-06, "loss": 2.0513, "step": 90900 }, { "epoch": 153.58, "learning_rate": 4.125647079600381e-06, "loss": 2.0428, "step": 90920 }, { "epoch": 153.61, "learning_rate": 4.107813882266659e-06, "loss": 2.0369, "step": 90940 }, { "epoch": 153.65, "learning_rate": 4.0900176594384e-06, "loss": 2.0453, "step": 90960 }, { "epoch": 153.68, "learning_rate": 4.072258425453668e-06, "loss": 2.0469, "step": 90980 }, { "epoch": 153.72, "learning_rate": 4.054536194620778e-06, "loss": 2.0482, "step": 91000 }, { "epoch": 153.72, "eval_loss": 2.464383602142334, "eval_runtime": 47.2598, "eval_samples_per_second": 20.927, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.003930989641376842, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03078084773158803, "eval_tse_type": 0.0006516914951408478, "step": 91000 }, { "epoch": 153.75, "learning_rate": 4.036850981218182e-06, "loss": 2.0542, "step": 91020 }, { "epoch": 153.78, "learning_rate": 4.019202799494565e-06, "loss": 2.045, "step": 91040 }, { "epoch": 153.82, "learning_rate": 4.001591663668752e-06, "loss": 2.055, "step": 91060 }, { "epoch": 153.85, "learning_rate": 3.984017587929695e-06, "loss": 2.0465, "step": 91080 }, { "epoch": 153.89, "learning_rate": 3.9664805864365165e-06, "loss": 2.0329, "step": 91100 }, { "epoch": 153.92, "learning_rate": 3.948980673318464e-06, "loss": 2.0599, "step": 91120 }, { "epoch": 153.95, "learning_rate": 3.93151786267491e-06, "loss": 2.0504, "step": 91140 }, { "epoch": 153.99, "learning_rate": 3.914092168575306e-06, "loss": 2.047, "step": 91160 }, { "epoch": 154.02, "learning_rate": 3.896703605059232e-06, "loss": 2.0447, "step": 91180 }, { "epoch": 154.05, "learning_rate": 3.879352186136353e-06, "loss": 2.0373, "step": 91200 }, { "epoch": 154.09, "learning_rate": 3.862037925786366e-06, "loss": 2.0415, "step": 91220 }, { "epoch": 154.12, "learning_rate": 3.844760837959072e-06, "loss": 2.0438, "step": 91240 }, { "epoch": 154.16, "learning_rate": 3.827520936574303e-06, "loss": 2.0415, "step": 91260 }, { "epoch": 154.19, "learning_rate": 3.8103182355219535e-06, "loss": 2.0403, "step": 91280 }, { "epoch": 154.22, "learning_rate": 3.7931527486619046e-06, "loss": 2.0389, "step": 91300 }, { "epoch": 154.26, "learning_rate": 3.7760244898240925e-06, "loss": 2.0378, "step": 91320 }, { "epoch": 154.29, "learning_rate": 3.758933472808446e-06, "loss": 2.0454, "step": 91340 }, { "epoch": 154.32, "learning_rate": 3.7418797113848824e-06, "loss": 2.0406, "step": 91360 }, { "epoch": 154.36, "learning_rate": 3.724863219293312e-06, "loss": 2.0296, "step": 91380 }, { "epoch": 154.39, "learning_rate": 3.707884010243623e-06, "loss": 2.0471, "step": 91400 }, { "epoch": 154.43, "learning_rate": 3.6909420979156505e-06, "loss": 2.0437, "step": 91420 }, { "epoch": 154.46, "learning_rate": 3.6740374959591874e-06, "loss": 2.033, "step": 91440 }, { "epoch": 154.49, "learning_rate": 3.6571702179939603e-06, "loss": 2.0283, "step": 91460 }, { "epoch": 154.53, "learning_rate": 3.640340277609644e-06, "loss": 2.0348, "step": 91480 }, { "epoch": 154.56, "learning_rate": 3.6235476883658025e-06, "loss": 2.0474, "step": 91500 }, { "epoch": 154.59, "learning_rate": 3.6067924637919413e-06, "loss": 2.0478, "step": 91520 }, { "epoch": 154.63, "learning_rate": 3.5900746173874124e-06, "loss": 2.0538, "step": 91540 }, { "epoch": 154.66, "learning_rate": 3.5733941626215182e-06, "loss": 2.0531, "step": 91560 }, { "epoch": 154.7, "learning_rate": 3.5567511129333707e-06, "loss": 2.0393, "step": 91580 }, { "epoch": 154.73, "learning_rate": 3.5401454817319935e-06, "loss": 2.0603, "step": 91600 }, { "epoch": 154.76, "learning_rate": 3.523577282396229e-06, "loss": 2.0581, "step": 91620 }, { "epoch": 154.8, "learning_rate": 3.507046528274782e-06, "loss": 2.0451, "step": 91640 }, { "epoch": 154.83, "learning_rate": 3.4905532326861944e-06, "loss": 2.0545, "step": 91660 }, { "epoch": 154.86, "learning_rate": 3.4740974089187917e-06, "loss": 2.0328, "step": 91680 }, { "epoch": 154.9, "learning_rate": 3.4576790702307627e-06, "loss": 2.0388, "step": 91700 }, { "epoch": 154.93, "learning_rate": 3.441298229850043e-06, "loss": 2.0402, "step": 91720 }, { "epoch": 154.97, "learning_rate": 3.4249549009743866e-06, "loss": 2.032, "step": 91740 }, { "epoch": 155.0, "learning_rate": 3.4086490967713214e-06, "loss": 2.0543, "step": 91760 }, { "epoch": 155.03, "learning_rate": 3.392380830378139e-06, "loss": 2.0326, "step": 91780 }, { "epoch": 155.07, "learning_rate": 3.3761501149018936e-06, "loss": 2.0247, "step": 91800 }, { "epoch": 155.1, "learning_rate": 3.3599569634193697e-06, "loss": 2.0473, "step": 91820 }, { "epoch": 155.14, "learning_rate": 3.343801388977097e-06, "loss": 2.0431, "step": 91840 }, { "epoch": 155.17, "learning_rate": 3.327683404591331e-06, "loss": 2.0301, "step": 91860 }, { "epoch": 155.2, "learning_rate": 3.31160302324805e-06, "loss": 2.0392, "step": 91880 }, { "epoch": 155.24, "learning_rate": 3.295560257902908e-06, "loss": 2.0383, "step": 91900 }, { "epoch": 155.27, "learning_rate": 3.279555121481287e-06, "loss": 2.0394, "step": 91920 }, { "epoch": 155.3, "learning_rate": 3.2635876268782395e-06, "loss": 2.0452, "step": 91940 }, { "epoch": 155.34, "learning_rate": 3.2476577869584748e-06, "loss": 2.0403, "step": 91960 }, { "epoch": 155.37, "learning_rate": 3.2317656145563813e-06, "loss": 2.038, "step": 91980 }, { "epoch": 155.41, "learning_rate": 3.215911122475995e-06, "loss": 2.0428, "step": 92000 }, { "epoch": 155.41, "eval_loss": 2.4640307426452637, "eval_runtime": 47.2345, "eval_samples_per_second": 20.938, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.0037396565895704854, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03079625005469537, "eval_tse_type": 0.0007390321312212111, "step": 92000 }, { "epoch": 155.44, "learning_rate": 3.2000943234910097e-06, "loss": 2.0297, "step": 92020 }, { "epoch": 155.47, "learning_rate": 3.184315230344703e-06, "loss": 2.0349, "step": 92040 }, { "epoch": 155.51, "learning_rate": 3.1685738557500233e-06, "loss": 2.0317, "step": 92060 }, { "epoch": 155.54, "learning_rate": 3.1528702123895194e-06, "loss": 2.0476, "step": 92080 }, { "epoch": 155.57, "learning_rate": 3.1372043129153205e-06, "loss": 2.0595, "step": 92100 }, { "epoch": 155.61, "learning_rate": 3.121576169949164e-06, "loss": 2.0417, "step": 92120 }, { "epoch": 155.64, "learning_rate": 3.1059857960823614e-06, "loss": 2.0464, "step": 92140 }, { "epoch": 155.68, "learning_rate": 3.0904332038757977e-06, "loss": 2.0519, "step": 92160 }, { "epoch": 155.71, "learning_rate": 3.0749184058599114e-06, "loss": 2.0388, "step": 92180 }, { "epoch": 155.74, "learning_rate": 3.059441414534686e-06, "loss": 2.0524, "step": 92200 }, { "epoch": 155.78, "learning_rate": 3.044002242369681e-06, "loss": 2.0664, "step": 92220 }, { "epoch": 155.81, "learning_rate": 3.0286009018039354e-06, "loss": 2.0385, "step": 92240 }, { "epoch": 155.84, "learning_rate": 3.0132374052460388e-06, "loss": 2.0547, "step": 92260 }, { "epoch": 155.88, "learning_rate": 2.9979117650740797e-06, "loss": 2.0344, "step": 92280 }, { "epoch": 155.91, "learning_rate": 2.982623993635658e-06, "loss": 2.0401, "step": 92300 }, { "epoch": 155.95, "learning_rate": 2.9673741032478443e-06, "loss": 2.0388, "step": 92320 }, { "epoch": 155.98, "learning_rate": 2.952162106197204e-06, "loss": 2.0434, "step": 92340 }, { "epoch": 156.01, "learning_rate": 2.9369880147397667e-06, "loss": 2.0322, "step": 92360 }, { "epoch": 156.05, "learning_rate": 2.9218518411010252e-06, "loss": 2.0325, "step": 92380 }, { "epoch": 156.08, "learning_rate": 2.9067535974759364e-06, "loss": 2.0453, "step": 92400 }, { "epoch": 156.11, "learning_rate": 2.8916932960288466e-06, "loss": 2.0271, "step": 92420 }, { "epoch": 156.15, "learning_rate": 2.876670948893606e-06, "loss": 2.0282, "step": 92440 }, { "epoch": 156.18, "learning_rate": 2.861686568173427e-06, "loss": 2.0411, "step": 92460 }, { "epoch": 156.22, "learning_rate": 2.846740165940959e-06, "loss": 2.0425, "step": 92480 }, { "epoch": 156.25, "learning_rate": 2.8318317542382457e-06, "loss": 2.0407, "step": 92500 }, { "epoch": 156.28, "learning_rate": 2.8169613450767295e-06, "loss": 2.0367, "step": 92520 }, { "epoch": 156.32, "learning_rate": 2.802128950437244e-06, "loss": 2.0385, "step": 92540 }, { "epoch": 156.35, "learning_rate": 2.7873345822699514e-06, "loss": 2.0454, "step": 92560 }, { "epoch": 156.39, "learning_rate": 2.7725782524944286e-06, "loss": 2.0423, "step": 92580 }, { "epoch": 156.42, "learning_rate": 2.7578599729995768e-06, "loss": 2.0435, "step": 92600 }, { "epoch": 156.45, "learning_rate": 2.7431797556436546e-06, "loss": 2.0424, "step": 92620 }, { "epoch": 156.49, "learning_rate": 2.7285376122542283e-06, "loss": 2.029, "step": 92640 }, { "epoch": 156.52, "learning_rate": 2.7139335546282286e-06, "loss": 2.0291, "step": 92660 }, { "epoch": 156.55, "learning_rate": 2.6993675945318875e-06, "loss": 2.0365, "step": 92680 }, { "epoch": 156.59, "learning_rate": 2.684839743700712e-06, "loss": 2.0487, "step": 92700 }, { "epoch": 156.62, "learning_rate": 2.67035001383954e-06, "loss": 2.038, "step": 92720 }, { "epoch": 156.66, "learning_rate": 2.6558984166224875e-06, "loss": 2.0447, "step": 92740 }, { "epoch": 156.69, "learning_rate": 2.641484963692953e-06, "loss": 2.0436, "step": 92760 }, { "epoch": 156.72, "learning_rate": 2.6271096666635685e-06, "loss": 2.0558, "step": 92780 }, { "epoch": 156.76, "learning_rate": 2.612772537116276e-06, "loss": 2.0376, "step": 92800 }, { "epoch": 156.79, "learning_rate": 2.598473586602246e-06, "loss": 2.051, "step": 92820 }, { "epoch": 156.82, "learning_rate": 2.5842128266418573e-06, "loss": 2.0496, "step": 92840 }, { "epoch": 156.86, "learning_rate": 2.56999026872477e-06, "loss": 2.035, "step": 92860 }, { "epoch": 156.89, "learning_rate": 2.5558059243098286e-06, "loss": 2.0461, "step": 92880 }, { "epoch": 156.93, "learning_rate": 2.5416598048251194e-06, "loss": 2.0444, "step": 92900 }, { "epoch": 156.96, "learning_rate": 2.527551921667898e-06, "loss": 2.0564, "step": 92920 }, { "epoch": 156.99, "learning_rate": 2.5134822862046326e-06, "loss": 2.0435, "step": 92940 }, { "epoch": 157.03, "learning_rate": 2.4994509097709894e-06, "loss": 2.0238, "step": 92960 }, { "epoch": 157.06, "learning_rate": 2.485457803671781e-06, "loss": 2.0347, "step": 92980 }, { "epoch": 157.09, "learning_rate": 2.4715029791810006e-06, "loss": 2.0331, "step": 93000 }, { "epoch": 157.09, "eval_loss": 2.4623606204986572, "eval_runtime": 47.7365, "eval_samples_per_second": 20.718, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.0040481646977106375, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.031722884505978204, "eval_tse_type": 0.0006198600738540071, "step": 93000 }, { "epoch": 157.13, "learning_rate": 2.4575864475418098e-06, "loss": 2.0365, "step": 93020 }, { "epoch": 157.16, "learning_rate": 2.443708219966501e-06, "loss": 2.0401, "step": 93040 }, { "epoch": 157.2, "learning_rate": 2.4298683076365014e-06, "loss": 2.0307, "step": 93060 }, { "epoch": 157.23, "learning_rate": 2.4160667217023867e-06, "loss": 2.0425, "step": 93080 }, { "epoch": 157.26, "learning_rate": 2.4023034732838514e-06, "loss": 2.0362, "step": 93100 }, { "epoch": 157.3, "learning_rate": 2.3885785734696853e-06, "loss": 2.0341, "step": 93120 }, { "epoch": 157.33, "learning_rate": 2.3748920333178048e-06, "loss": 2.0471, "step": 93140 }, { "epoch": 157.36, "learning_rate": 2.361243863855184e-06, "loss": 2.0375, "step": 93160 }, { "epoch": 157.4, "learning_rate": 2.347634076077937e-06, "loss": 2.0435, "step": 93180 }, { "epoch": 157.43, "learning_rate": 2.3340626809512044e-06, "loss": 2.0329, "step": 93200 }, { "epoch": 157.47, "learning_rate": 2.3205296894092233e-06, "loss": 2.0434, "step": 93220 }, { "epoch": 157.5, "learning_rate": 2.3070351123552714e-06, "loss": 2.0314, "step": 93240 }, { "epoch": 157.53, "learning_rate": 2.2935789606616965e-06, "loss": 2.0237, "step": 93260 }, { "epoch": 157.57, "learning_rate": 2.2801612451698705e-06, "loss": 2.0441, "step": 93280 }, { "epoch": 157.6, "learning_rate": 2.266781976690202e-06, "loss": 2.0352, "step": 93300 }, { "epoch": 157.64, "learning_rate": 2.253441166002135e-06, "loss": 2.0399, "step": 93320 }, { "epoch": 157.67, "learning_rate": 2.2401388238541098e-06, "loss": 2.0415, "step": 93340 }, { "epoch": 157.7, "learning_rate": 2.2268749609635877e-06, "loss": 2.044, "step": 93360 }, { "epoch": 157.74, "learning_rate": 2.213649588017025e-06, "loss": 2.0574, "step": 93380 }, { "epoch": 157.77, "learning_rate": 2.20046271566986e-06, "loss": 2.0417, "step": 93400 }, { "epoch": 157.8, "learning_rate": 2.1873143545465323e-06, "loss": 2.0478, "step": 93420 }, { "epoch": 157.84, "learning_rate": 2.1742045152404246e-06, "loss": 2.0399, "step": 93440 }, { "epoch": 157.87, "learning_rate": 2.1611332083138923e-06, "loss": 2.0469, "step": 93460 }, { "epoch": 157.91, "learning_rate": 2.1481004442982676e-06, "loss": 2.0407, "step": 93480 }, { "epoch": 157.94, "learning_rate": 2.1351062336938065e-06, "loss": 2.0424, "step": 93500 }, { "epoch": 157.97, "learning_rate": 2.1221505869697065e-06, "loss": 2.0346, "step": 93520 }, { "epoch": 158.01, "learning_rate": 2.1092335145640963e-06, "loss": 2.0366, "step": 93540 }, { "epoch": 158.04, "learning_rate": 2.096355026884045e-06, "loss": 2.039, "step": 93560 }, { "epoch": 158.07, "learning_rate": 2.083515134305497e-06, "loss": 2.0204, "step": 93580 }, { "epoch": 158.11, "learning_rate": 2.070713847173328e-06, "loss": 2.0283, "step": 93600 }, { "epoch": 158.14, "learning_rate": 2.0579511758013093e-06, "loss": 2.0306, "step": 93620 }, { "epoch": 158.18, "learning_rate": 2.045227130472088e-06, "loss": 2.0376, "step": 93640 }, { "epoch": 158.21, "learning_rate": 2.032541721437209e-06, "loss": 2.0393, "step": 93660 }, { "epoch": 158.24, "learning_rate": 2.0198949589170513e-06, "loss": 2.0418, "step": 93680 }, { "epoch": 158.28, "learning_rate": 2.007286853100915e-06, "loss": 2.0292, "step": 93700 }, { "epoch": 158.31, "learning_rate": 1.9947174141469073e-06, "loss": 2.0395, "step": 93720 }, { "epoch": 158.34, "learning_rate": 1.9821866521819945e-06, "loss": 2.0445, "step": 93740 }, { "epoch": 158.38, "learning_rate": 1.969694577301995e-06, "loss": 2.0421, "step": 93760 }, { "epoch": 158.41, "learning_rate": 1.9572411995715356e-06, "loss": 2.0373, "step": 93780 }, { "epoch": 158.45, "learning_rate": 1.9448265290240964e-06, "loss": 2.0443, "step": 93800 }, { "epoch": 158.48, "learning_rate": 1.9324505756619314e-06, "loss": 2.0406, "step": 93820 }, { "epoch": 158.51, "learning_rate": 1.920113349456143e-06, "loss": 2.0412, "step": 93840 }, { "epoch": 158.55, "learning_rate": 1.907814860346596e-06, "loss": 2.0393, "step": 93860 }, { "epoch": 158.58, "learning_rate": 1.8955551182419706e-06, "loss": 2.0345, "step": 93880 }, { "epoch": 158.61, "learning_rate": 1.8833341330197097e-06, "loss": 2.0256, "step": 93900 }, { "epoch": 158.65, "learning_rate": 1.8711519145260537e-06, "loss": 2.0332, "step": 93920 }, { "epoch": 158.68, "learning_rate": 1.8590084725759849e-06, "loss": 2.0308, "step": 93940 }, { "epoch": 158.72, "learning_rate": 1.8469038169532603e-06, "loss": 2.0421, "step": 93960 }, { "epoch": 158.75, "learning_rate": 1.8348379574103791e-06, "loss": 2.0384, "step": 93980 }, { "epoch": 158.78, "learning_rate": 1.822810903668587e-06, "loss": 2.0366, "step": 94000 }, { "epoch": 158.78, "eval_loss": 2.461648464202881, "eval_runtime": 47.3845, "eval_samples_per_second": 20.872, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.0035718473484585298, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03096087466625037, "eval_tse_type": 0.0006987710211795764, "step": 94000 }, { "epoch": 158.82, "learning_rate": 1.8114211553132587e-06, "loss": 2.0566, "step": 94020 }, { "epoch": 158.85, "learning_rate": 1.799469800725917e-06, "loss": 2.0375, "step": 94040 }, { "epoch": 158.89, "learning_rate": 1.7875572804351482e-06, "loss": 2.0472, "step": 94060 }, { "epoch": 158.92, "learning_rate": 1.7756836040386637e-06, "loss": 2.0366, "step": 94080 }, { "epoch": 158.95, "learning_rate": 1.7638487811028615e-06, "loss": 2.0499, "step": 94100 }, { "epoch": 158.99, "learning_rate": 1.7520528211628416e-06, "loss": 2.0325, "step": 94120 }, { "epoch": 159.02, "learning_rate": 1.7402957337224024e-06, "loss": 2.0452, "step": 94140 }, { "epoch": 159.05, "learning_rate": 1.7285775282540217e-06, "loss": 2.0372, "step": 94160 }, { "epoch": 159.09, "learning_rate": 1.716898214198831e-06, "loss": 2.0332, "step": 94180 }, { "epoch": 159.12, "learning_rate": 1.7052578009666586e-06, "loss": 2.0265, "step": 94200 }, { "epoch": 159.16, "learning_rate": 1.6936562979359694e-06, "loss": 2.0251, "step": 94220 }, { "epoch": 159.19, "learning_rate": 1.6820937144538807e-06, "loss": 2.0177, "step": 94240 }, { "epoch": 159.22, "learning_rate": 1.6705700598361573e-06, "loss": 2.0404, "step": 94260 }, { "epoch": 159.26, "learning_rate": 1.659085343367206e-06, "loss": 2.0293, "step": 94280 }, { "epoch": 159.29, "learning_rate": 1.6476395743000528e-06, "loss": 2.0256, "step": 94300 }, { "epoch": 159.32, "learning_rate": 1.6362327618563489e-06, "loss": 2.0374, "step": 94320 }, { "epoch": 159.36, "learning_rate": 1.6248649152263429e-06, "loss": 2.0277, "step": 94340 }, { "epoch": 159.39, "learning_rate": 1.613536043568925e-06, "loss": 2.0407, "step": 94360 }, { "epoch": 159.43, "learning_rate": 1.6022461560115497e-06, "loss": 2.0404, "step": 94380 }, { "epoch": 159.46, "learning_rate": 1.5909952616502743e-06, "loss": 2.0291, "step": 94400 }, { "epoch": 159.49, "learning_rate": 1.5797833695497477e-06, "loss": 2.0519, "step": 94420 }, { "epoch": 159.53, "learning_rate": 1.5686104887431884e-06, "loss": 2.0377, "step": 94440 }, { "epoch": 159.56, "learning_rate": 1.557476628232385e-06, "loss": 2.0296, "step": 94460 }, { "epoch": 159.59, "learning_rate": 1.546381796987678e-06, "loss": 2.0499, "step": 94480 }, { "epoch": 159.63, "learning_rate": 1.5353260039479844e-06, "loss": 2.0457, "step": 94500 }, { "epoch": 159.66, "learning_rate": 1.5243092580207507e-06, "loss": 2.0564, "step": 94520 }, { "epoch": 159.7, "learning_rate": 1.5133315680819715e-06, "loss": 2.0416, "step": 94540 }, { "epoch": 159.73, "learning_rate": 1.5023929429761718e-06, "loss": 2.0355, "step": 94560 }, { "epoch": 159.76, "learning_rate": 1.4914933915164075e-06, "loss": 2.0412, "step": 94580 }, { "epoch": 159.8, "learning_rate": 1.480632922484254e-06, "loss": 2.0411, "step": 94600 }, { "epoch": 159.83, "learning_rate": 1.4698115446297845e-06, "loss": 2.0417, "step": 94620 }, { "epoch": 159.86, "learning_rate": 1.459029266671591e-06, "loss": 2.0415, "step": 94640 }, { "epoch": 159.9, "learning_rate": 1.4482860972967637e-06, "loss": 2.0404, "step": 94660 }, { "epoch": 159.93, "learning_rate": 1.4375820451608846e-06, "loss": 2.0273, "step": 94680 }, { "epoch": 159.97, "learning_rate": 1.4269171188879994e-06, "loss": 2.0302, "step": 94700 }, { "epoch": 160.0, "learning_rate": 1.4162913270706568e-06, "loss": 2.0465, "step": 94720 }, { "epoch": 160.03, "learning_rate": 1.4057046782698757e-06, "loss": 2.0431, "step": 94740 }, { "epoch": 160.07, "learning_rate": 1.3951571810151109e-06, "loss": 2.0337, "step": 94760 }, { "epoch": 160.1, "learning_rate": 1.3846488438042981e-06, "loss": 2.0281, "step": 94780 }, { "epoch": 160.14, "learning_rate": 1.3741796751038094e-06, "loss": 2.0424, "step": 94800 }, { "epoch": 160.17, "learning_rate": 1.3637496833484754e-06, "loss": 2.0511, "step": 94820 }, { "epoch": 160.2, "learning_rate": 1.3533588769415406e-06, "loss": 2.0362, "step": 94840 }, { "epoch": 160.24, "learning_rate": 1.3430072642546865e-06, "loss": 2.0352, "step": 94860 }, { "epoch": 160.27, "learning_rate": 1.3326948536280415e-06, "loss": 2.031, "step": 94880 }, { "epoch": 160.3, "learning_rate": 1.3224216533701095e-06, "loss": 2.028, "step": 94900 }, { "epoch": 160.34, "learning_rate": 1.3121876717578308e-06, "loss": 2.0338, "step": 94920 }, { "epoch": 160.37, "learning_rate": 1.3019929170365374e-06, "loss": 2.0436, "step": 94940 }, { "epoch": 160.41, "learning_rate": 1.291837397419965e-06, "loss": 2.0394, "step": 94960 }, { "epoch": 160.44, "learning_rate": 1.2817211210902181e-06, "loss": 2.0197, "step": 94980 }, { "epoch": 160.47, "learning_rate": 1.2716440961978105e-06, "loss": 2.0417, "step": 95000 }, { "epoch": 160.47, "eval_loss": 2.461150884628296, "eval_runtime": 50.8979, "eval_samples_per_second": 19.431, "eval_steps_per_second": 0.118, "eval_tse_ndup": 0.004373840688530554, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.030682119776855976, "eval_tse_type": 0.0005216763445801307, "step": 95000 }, { "epoch": 160.51, "learning_rate": 1.2616063308616144e-06, "loss": 2.0446, "step": 95020 }, { "epoch": 160.54, "learning_rate": 1.2516078331688718e-06, "loss": 2.0292, "step": 95040 }, { "epoch": 160.57, "learning_rate": 1.2416486111752057e-06, "loss": 2.0138, "step": 95060 }, { "epoch": 160.61, "learning_rate": 1.2317286729045586e-06, "loss": 2.0353, "step": 95080 }, { "epoch": 160.64, "learning_rate": 1.221848026349276e-06, "loss": 2.0454, "step": 95100 }, { "epoch": 160.68, "learning_rate": 1.2120066794699847e-06, "loss": 2.0298, "step": 95120 }, { "epoch": 160.71, "learning_rate": 1.2022046401957032e-06, "loss": 2.0274, "step": 95140 }, { "epoch": 160.74, "learning_rate": 1.1924419164237422e-06, "loss": 2.049, "step": 95160 }, { "epoch": 160.78, "learning_rate": 1.182718516019765e-06, "loss": 2.0294, "step": 95180 }, { "epoch": 160.81, "learning_rate": 1.1730344468177335e-06, "loss": 2.0408, "step": 95200 }, { "epoch": 160.84, "learning_rate": 1.1633897166199226e-06, "loss": 2.0379, "step": 95220 }, { "epoch": 160.88, "learning_rate": 1.1537843331969278e-06, "loss": 2.0456, "step": 95240 }, { "epoch": 160.91, "learning_rate": 1.1442183042876254e-06, "loss": 2.0278, "step": 95260 }, { "epoch": 160.95, "learning_rate": 1.1346916375991945e-06, "loss": 2.0417, "step": 95280 }, { "epoch": 160.98, "learning_rate": 1.12520434080709e-06, "loss": 2.0421, "step": 95300 }, { "epoch": 161.01, "learning_rate": 1.1157564215550698e-06, "loss": 2.0323, "step": 95320 }, { "epoch": 161.05, "learning_rate": 1.1063478874551503e-06, "loss": 2.0476, "step": 95340 }, { "epoch": 161.08, "learning_rate": 1.0969787460876012e-06, "loss": 2.028, "step": 95360 }, { "epoch": 161.11, "learning_rate": 1.0876490050009901e-06, "loss": 2.0345, "step": 95380 }, { "epoch": 161.15, "learning_rate": 1.0783586717121097e-06, "loss": 2.046, "step": 95400 }, { "epoch": 161.18, "learning_rate": 1.0691077537060224e-06, "loss": 2.0359, "step": 95420 }, { "epoch": 161.22, "learning_rate": 1.0598962584360051e-06, "loss": 2.0275, "step": 95440 }, { "epoch": 161.25, "learning_rate": 1.0507241933236157e-06, "loss": 2.0295, "step": 95460 }, { "epoch": 161.28, "learning_rate": 1.0415915657586206e-06, "loss": 2.0444, "step": 95480 }, { "epoch": 161.32, "learning_rate": 1.032498383099001e-06, "loss": 2.0354, "step": 95500 }, { "epoch": 161.35, "learning_rate": 1.023444652670974e-06, "loss": 2.0188, "step": 95520 }, { "epoch": 161.39, "learning_rate": 1.014430381768966e-06, "loss": 2.0382, "step": 95540 }, { "epoch": 161.42, "learning_rate": 1.0054555776556228e-06, "loss": 2.0387, "step": 95560 }, { "epoch": 161.45, "learning_rate": 9.965202475617664e-07, "loss": 2.0456, "step": 95580 }, { "epoch": 161.49, "learning_rate": 9.87624398686432e-07, "loss": 2.0232, "step": 95600 }, { "epoch": 161.52, "learning_rate": 9.787680381968589e-07, "loss": 2.0324, "step": 95620 }, { "epoch": 161.55, "learning_rate": 9.699511732284393e-07, "loss": 2.0379, "step": 95640 }, { "epoch": 161.59, "learning_rate": 9.611738108847745e-07, "loss": 2.0311, "step": 95660 }, { "epoch": 161.62, "learning_rate": 9.524359582376241e-07, "loss": 2.0372, "step": 95680 }, { "epoch": 161.66, "learning_rate": 9.437376223269178e-07, "loss": 2.0318, "step": 95700 }, { "epoch": 161.69, "learning_rate": 9.350788101607444e-07, "loss": 2.0336, "step": 95720 }, { "epoch": 161.72, "learning_rate": 9.264595287153511e-07, "loss": 2.039, "step": 95740 }, { "epoch": 161.76, "learning_rate": 9.178797849351494e-07, "loss": 2.0431, "step": 95760 }, { "epoch": 161.79, "learning_rate": 9.093395857326714e-07, "loss": 2.0399, "step": 95780 }, { "epoch": 161.82, "learning_rate": 9.008389379886073e-07, "loss": 2.0331, "step": 95800 }, { "epoch": 161.86, "learning_rate": 8.923778485517676e-07, "loss": 2.0455, "step": 95820 }, { "epoch": 161.89, "learning_rate": 8.839563242391158e-07, "loss": 2.03, "step": 95840 }, { "epoch": 161.93, "learning_rate": 8.755743718357023e-07, "loss": 2.0234, "step": 95860 }, { "epoch": 161.96, "learning_rate": 8.672319980947252e-07, "loss": 2.0279, "step": 95880 }, { "epoch": 161.99, "learning_rate": 8.589292097374857e-07, "loss": 2.0426, "step": 95900 }, { "epoch": 162.03, "learning_rate": 8.506660134533828e-07, "loss": 2.0192, "step": 95920 }, { "epoch": 162.06, "learning_rate": 8.424424158999355e-07, "loss": 2.0289, "step": 95940 }, { "epoch": 162.09, "learning_rate": 8.342584237027329e-07, "loss": 2.0328, "step": 95960 }, { "epoch": 162.13, "learning_rate": 8.261140434554892e-07, "loss": 2.0473, "step": 95980 }, { "epoch": 162.16, "learning_rate": 8.180092817199669e-07, "loss": 2.0338, "step": 96000 }, { "epoch": 162.16, "eval_loss": 2.460881233215332, "eval_runtime": 47.5752, "eval_samples_per_second": 20.788, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.0036474163620149, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03153410969713916, "eval_tse_type": 0.0005627184100017492, "step": 96000 }, { "epoch": 162.2, "learning_rate": 8.09944145026037e-07, "loss": 2.0297, "step": 96020 }, { "epoch": 162.23, "learning_rate": 8.019186398716239e-07, "loss": 2.0275, "step": 96040 }, { "epoch": 162.26, "learning_rate": 7.939327727227441e-07, "loss": 2.0375, "step": 96060 }, { "epoch": 162.3, "learning_rate": 7.859865500134622e-07, "loss": 2.0264, "step": 96080 }, { "epoch": 162.33, "learning_rate": 7.780799781459014e-07, "loss": 2.0267, "step": 96100 }, { "epoch": 162.36, "learning_rate": 7.702130634902493e-07, "loss": 2.0366, "step": 96120 }, { "epoch": 162.4, "learning_rate": 7.62385812384736e-07, "loss": 2.026, "step": 96140 }, { "epoch": 162.43, "learning_rate": 7.545982311356337e-07, "loss": 2.0294, "step": 96160 }, { "epoch": 162.47, "learning_rate": 7.468503260172566e-07, "loss": 2.0351, "step": 96180 }, { "epoch": 162.5, "learning_rate": 7.391421032719559e-07, "loss": 2.0421, "step": 96200 }, { "epoch": 162.53, "learning_rate": 7.314735691101082e-07, "loss": 2.0266, "step": 96220 }, { "epoch": 162.57, "learning_rate": 7.238447297101048e-07, "loss": 2.0454, "step": 96240 }, { "epoch": 162.6, "learning_rate": 7.162555912183677e-07, "loss": 2.0501, "step": 96260 }, { "epoch": 162.64, "learning_rate": 7.087061597493283e-07, "loss": 2.0174, "step": 96280 }, { "epoch": 162.67, "learning_rate": 7.011964413854377e-07, "loss": 2.024, "step": 96300 }, { "epoch": 162.7, "learning_rate": 6.937264421771228e-07, "loss": 2.0307, "step": 96320 }, { "epoch": 162.74, "learning_rate": 6.862961681428304e-07, "loss": 2.043, "step": 96340 }, { "epoch": 162.77, "learning_rate": 6.789056252690107e-07, "loss": 2.033, "step": 96360 }, { "epoch": 162.8, "learning_rate": 6.71554819510084e-07, "loss": 2.0393, "step": 96380 }, { "epoch": 162.84, "learning_rate": 6.642437567884574e-07, "loss": 2.0439, "step": 96400 }, { "epoch": 162.87, "learning_rate": 6.569724429945246e-07, "loss": 2.0437, "step": 96420 }, { "epoch": 162.91, "learning_rate": 6.497408839866548e-07, "loss": 2.0499, "step": 96440 }, { "epoch": 162.94, "learning_rate": 6.425490855911819e-07, "loss": 2.0207, "step": 96460 }, { "epoch": 162.97, "learning_rate": 6.353970536024045e-07, "loss": 2.0393, "step": 96480 }, { "epoch": 163.01, "learning_rate": 6.282847937825909e-07, "loss": 2.0272, "step": 96500 }, { "epoch": 163.04, "learning_rate": 6.212123118619628e-07, "loss": 2.0307, "step": 96520 }, { "epoch": 163.07, "learning_rate": 6.14179613538679e-07, "loss": 2.0236, "step": 96540 }, { "epoch": 163.11, "learning_rate": 6.071867044788737e-07, "loss": 2.0165, "step": 96560 }, { "epoch": 163.14, "learning_rate": 6.002335903165957e-07, "loss": 2.0405, "step": 96580 }, { "epoch": 163.18, "learning_rate": 5.93320276653847e-07, "loss": 2.0313, "step": 96600 }, { "epoch": 163.21, "learning_rate": 5.864467690605613e-07, "loss": 2.0332, "step": 96620 }, { "epoch": 163.24, "learning_rate": 5.796130730745975e-07, "loss": 2.0295, "step": 96640 }, { "epoch": 163.28, "learning_rate": 5.728191942017403e-07, "loss": 2.0291, "step": 96660 }, { "epoch": 163.31, "learning_rate": 5.660651379157e-07, "loss": 2.0359, "step": 96680 }, { "epoch": 163.34, "learning_rate": 5.593509096580851e-07, "loss": 2.0324, "step": 96700 }, { "epoch": 163.38, "learning_rate": 5.526765148384461e-07, "loss": 2.0317, "step": 96720 }, { "epoch": 163.41, "learning_rate": 5.460419588342092e-07, "loss": 2.0299, "step": 96740 }, { "epoch": 163.45, "learning_rate": 5.394472469907208e-07, "loss": 2.0411, "step": 96760 }, { "epoch": 163.48, "learning_rate": 5.328923846212197e-07, "loss": 2.0363, "step": 96780 }, { "epoch": 163.51, "learning_rate": 5.263773770068425e-07, "loss": 2.0437, "step": 96800 }, { "epoch": 163.55, "learning_rate": 5.199022293966127e-07, "loss": 2.0333, "step": 96820 }, { "epoch": 163.58, "learning_rate": 5.134669470074404e-07, "loss": 2.0408, "step": 96840 }, { "epoch": 163.61, "learning_rate": 5.070715350241117e-07, "loss": 2.0397, "step": 96860 }, { "epoch": 163.65, "learning_rate": 5.007159985992937e-07, "loss": 2.0225, "step": 96880 }, { "epoch": 163.68, "learning_rate": 4.944003428535348e-07, "loss": 2.0372, "step": 96900 }, { "epoch": 163.72, "learning_rate": 4.881245728752426e-07, "loss": 2.0308, "step": 96920 }, { "epoch": 163.75, "learning_rate": 4.81888693720689e-07, "loss": 2.0294, "step": 96940 }, { "epoch": 163.78, "learning_rate": 4.756927104140163e-07, "loss": 2.0314, "step": 96960 }, { "epoch": 163.82, "learning_rate": 4.6953662794720354e-07, "loss": 2.0403, "step": 96980 }, { "epoch": 163.85, "learning_rate": 4.634204512800999e-07, "loss": 2.0377, "step": 97000 }, { "epoch": 163.85, "eval_loss": 2.460573434829712, "eval_runtime": 50.8928, "eval_samples_per_second": 19.433, "eval_steps_per_second": 0.118, "eval_tse_ndup": 0.004185809636906782, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.031060657384459112, "eval_tse_type": 0.0006161526794742163, "step": 97000 }, { "epoch": 163.89, "learning_rate": 4.576470506819086e-07, "loss": 2.0255, "step": 97020 }, { "epoch": 163.92, "learning_rate": 4.5160870446823713e-07, "loss": 2.0354, "step": 97040 }, { "epoch": 163.95, "learning_rate": 4.456102784984817e-07, "loss": 2.0318, "step": 97060 }, { "epoch": 163.99, "learning_rate": 4.396517776054598e-07, "loss": 2.047, "step": 97080 }, { "epoch": 164.02, "learning_rate": 4.3373320658983694e-07, "loss": 2.0353, "step": 97100 }, { "epoch": 164.05, "learning_rate": 4.2785457022009867e-07, "loss": 2.0469, "step": 97120 }, { "epoch": 164.09, "learning_rate": 4.220158732325452e-07, "loss": 2.0289, "step": 97140 }, { "epoch": 164.12, "learning_rate": 4.162171203313192e-07, "loss": 2.0297, "step": 97160 }, { "epoch": 164.16, "learning_rate": 4.1045831618837236e-07, "loss": 2.0404, "step": 97180 }, { "epoch": 164.19, "learning_rate": 4.0473946544346e-07, "loss": 2.0352, "step": 97200 }, { "epoch": 164.22, "learning_rate": 3.9906057270416854e-07, "loss": 2.0294, "step": 97220 }, { "epoch": 164.26, "learning_rate": 3.9342164254587145e-07, "loss": 2.025, "step": 97240 }, { "epoch": 164.29, "learning_rate": 3.878226795117512e-07, "loss": 2.0393, "step": 97260 }, { "epoch": 164.32, "learning_rate": 3.8226368811279945e-07, "loss": 2.0302, "step": 97280 }, { "epoch": 164.36, "learning_rate": 3.76744672827789e-07, "loss": 2.0421, "step": 97300 }, { "epoch": 164.39, "learning_rate": 3.7126563810329087e-07, "loss": 2.0435, "step": 97320 }, { "epoch": 164.43, "learning_rate": 3.6582658835366267e-07, "loss": 2.0435, "step": 97340 }, { "epoch": 164.46, "learning_rate": 3.6042752796105473e-07, "loss": 2.0413, "step": 97360 }, { "epoch": 164.49, "learning_rate": 3.550684612753874e-07, "loss": 2.0299, "step": 97380 }, { "epoch": 164.53, "learning_rate": 3.49749392614368e-07, "loss": 2.0322, "step": 97400 }, { "epoch": 164.56, "learning_rate": 3.444703262634741e-07, "loss": 2.0382, "step": 97420 }, { "epoch": 164.59, "learning_rate": 3.3923126647594785e-07, "loss": 2.025, "step": 97440 }, { "epoch": 164.63, "learning_rate": 3.340322174728072e-07, "loss": 2.0339, "step": 97460 }, { "epoch": 164.66, "learning_rate": 3.288731834428405e-07, "loss": 2.0242, "step": 97480 }, { "epoch": 164.7, "learning_rate": 3.2375416854257823e-07, "loss": 2.0252, "step": 97500 }, { "epoch": 164.73, "learning_rate": 3.186751768963159e-07, "loss": 2.0241, "step": 97520 }, { "epoch": 164.76, "learning_rate": 3.13636212596119e-07, "loss": 2.0321, "step": 97540 }, { "epoch": 164.8, "learning_rate": 3.0863727970177894e-07, "loss": 2.0245, "step": 97560 }, { "epoch": 164.83, "learning_rate": 3.0367838224084623e-07, "loss": 2.0428, "step": 97580 }, { "epoch": 164.86, "learning_rate": 2.98759524208625e-07, "loss": 2.0502, "step": 97600 }, { "epoch": 164.9, "learning_rate": 2.9388070956813953e-07, "loss": 2.0205, "step": 97620 }, { "epoch": 164.93, "learning_rate": 2.8904194225016224e-07, "loss": 2.0319, "step": 97640 }, { "epoch": 164.97, "learning_rate": 2.84243226153208e-07, "loss": 2.0298, "step": 97660 }, { "epoch": 165.0, "learning_rate": 2.7948456514351184e-07, "loss": 2.0271, "step": 97680 }, { "epoch": 165.03, "learning_rate": 2.7476596305504034e-07, "loss": 2.0305, "step": 97700 }, { "epoch": 165.07, "learning_rate": 2.700874236894857e-07, "loss": 2.0345, "step": 97720 }, { "epoch": 165.1, "learning_rate": 2.654489508162661e-07, "loss": 2.023, "step": 97740 }, { "epoch": 165.14, "learning_rate": 2.608505481725143e-07, "loss": 2.0366, "step": 97760 }, { "epoch": 165.17, "learning_rate": 2.562922194630779e-07, "loss": 2.0265, "step": 97780 }, { "epoch": 165.2, "learning_rate": 2.517739683605191e-07, "loss": 2.0379, "step": 97800 }, { "epoch": 165.24, "learning_rate": 2.4729579850511497e-07, "loss": 2.0206, "step": 97820 }, { "epoch": 165.27, "learning_rate": 2.42857713504846e-07, "loss": 2.0289, "step": 97840 }, { "epoch": 165.3, "learning_rate": 2.3845971693539083e-07, "loss": 2.028, "step": 97860 }, { "epoch": 165.34, "learning_rate": 2.3410181234014283e-07, "loss": 2.0354, "step": 97880 }, { "epoch": 165.37, "learning_rate": 2.297840032301879e-07, "loss": 2.0446, "step": 97900 }, { "epoch": 165.41, "learning_rate": 2.2550629308429882e-07, "loss": 2.0472, "step": 97920 }, { "epoch": 165.44, "learning_rate": 2.212686853489576e-07, "loss": 2.028, "step": 97940 }, { "epoch": 165.47, "learning_rate": 2.170711834383221e-07, "loss": 2.04, "step": 97960 }, { "epoch": 165.51, "learning_rate": 2.1291379073424268e-07, "loss": 2.0332, "step": 97980 }, { "epoch": 165.54, "learning_rate": 2.087965105862677e-07, "loss": 2.0401, "step": 98000 }, { "epoch": 165.54, "eval_loss": 2.4602303504943848, "eval_runtime": 47.3856, "eval_samples_per_second": 20.871, "eval_steps_per_second": 0.127, "eval_tse_ndup": 0.0037392305241050337, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.03098927771360855, "eval_tse_type": 0.000635462307605887, "step": 98000 }, { "epoch": 165.57, "learning_rate": 2.0471934631160482e-07, "loss": 2.0303, "step": 98020 }, { "epoch": 165.61, "learning_rate": 2.0068230119514864e-07, "loss": 2.0269, "step": 98040 }, { "epoch": 165.64, "learning_rate": 1.9668537848948066e-07, "loss": 2.0231, "step": 98060 }, { "epoch": 165.68, "learning_rate": 1.9272858141485272e-07, "loss": 2.0244, "step": 98080 }, { "epoch": 165.71, "learning_rate": 1.888119131591759e-07, "loss": 2.0409, "step": 98100 }, { "epoch": 165.74, "learning_rate": 1.8493537687804263e-07, "loss": 2.0392, "step": 98120 }, { "epoch": 165.78, "learning_rate": 1.8109897569470457e-07, "loss": 2.0344, "step": 98140 }, { "epoch": 165.81, "learning_rate": 1.7730271270008925e-07, "loss": 2.0337, "step": 98160 }, { "epoch": 165.84, "learning_rate": 1.7354659095277226e-07, "loss": 2.0382, "step": 98180 }, { "epoch": 165.88, "learning_rate": 1.6983061347898843e-07, "loss": 2.0311, "step": 98200 }, { "epoch": 165.91, "learning_rate": 1.6615478327264845e-07, "loss": 2.0391, "step": 98220 }, { "epoch": 165.95, "learning_rate": 1.625191032952833e-07, "loss": 2.0296, "step": 98240 }, { "epoch": 165.98, "learning_rate": 1.5892357647610544e-07, "loss": 2.0245, "step": 98260 }, { "epoch": 166.01, "learning_rate": 1.553682057119643e-07, "loss": 2.0344, "step": 98280 }, { "epoch": 166.05, "learning_rate": 1.518529938673574e-07, "loss": 2.031, "step": 98300 }, { "epoch": 166.08, "learning_rate": 1.4837794377443038e-07, "loss": 2.0323, "step": 98320 }, { "epoch": 166.11, "learning_rate": 1.449430582329603e-07, "loss": 2.0404, "step": 98340 }, { "epoch": 166.15, "learning_rate": 1.4154834001038341e-07, "loss": 2.0244, "step": 98360 }, { "epoch": 166.18, "learning_rate": 1.381937918417564e-07, "loss": 2.0396, "step": 98380 }, { "epoch": 166.22, "learning_rate": 1.348794164297784e-07, "loss": 2.0272, "step": 98400 }, { "epoch": 166.25, "learning_rate": 1.3160521644478008e-07, "loss": 2.0396, "step": 98420 }, { "epoch": 166.28, "learning_rate": 1.2837119452472902e-07, "loss": 2.0456, "step": 98440 }, { "epoch": 166.32, "learning_rate": 1.2517735327521873e-07, "loss": 2.0285, "step": 98460 }, { "epoch": 166.35, "learning_rate": 1.220236952694631e-07, "loss": 2.0264, "step": 98480 }, { "epoch": 166.39, "learning_rate": 1.1891022304831856e-07, "loss": 2.0328, "step": 98500 }, { "epoch": 166.42, "learning_rate": 1.1583693912023963e-07, "loss": 2.0394, "step": 98520 }, { "epoch": 166.45, "learning_rate": 1.1280384596132898e-07, "loss": 2.0352, "step": 98540 }, { "epoch": 166.49, "learning_rate": 1.0981094601528186e-07, "loss": 2.0252, "step": 98560 }, { "epoch": 166.52, "learning_rate": 1.0685824169343606e-07, "loss": 2.0312, "step": 98580 }, { "epoch": 166.55, "learning_rate": 1.0394573537472196e-07, "loss": 2.0217, "step": 98600 }, { "epoch": 166.59, "learning_rate": 1.0107342940570141e-07, "loss": 2.0388, "step": 98620 }, { "epoch": 166.62, "learning_rate": 9.824132610053438e-08, "loss": 2.0367, "step": 98640 }, { "epoch": 166.66, "learning_rate": 9.544942774100119e-08, "loss": 2.0282, "step": 98660 }, { "epoch": 166.69, "learning_rate": 9.269773657647474e-08, "loss": 2.0423, "step": 98680 }, { "epoch": 166.72, "learning_rate": 8.998625482394829e-08, "loss": 2.0335, "step": 98700 }, { "epoch": 166.76, "learning_rate": 8.731498466801324e-08, "loss": 2.0445, "step": 98720 }, { "epoch": 166.79, "learning_rate": 8.468392826087024e-08, "loss": 2.0335, "step": 98740 }, { "epoch": 166.82, "learning_rate": 8.20930877223014e-08, "loss": 2.0409, "step": 98760 }, { "epoch": 166.86, "learning_rate": 7.954246513970365e-08, "loss": 2.0339, "step": 98780 }, { "epoch": 166.89, "learning_rate": 7.70320625680776e-08, "loss": 2.0261, "step": 98800 }, { "epoch": 166.93, "learning_rate": 7.456188202999426e-08, "loss": 2.0179, "step": 98820 }, { "epoch": 166.96, "learning_rate": 7.213192551564496e-08, "loss": 2.0135, "step": 98840 }, { "epoch": 166.99, "learning_rate": 6.9742194982797e-08, "loss": 2.0312, "step": 98860 }, { "epoch": 167.03, "learning_rate": 6.739269235681577e-08, "loss": 2.0308, "step": 98880 }, { "epoch": 167.06, "learning_rate": 6.508341953064822e-08, "loss": 2.023, "step": 98900 }, { "epoch": 167.09, "learning_rate": 6.281437836483384e-08, "loss": 2.0342, "step": 98920 }, { "epoch": 167.13, "learning_rate": 6.058557068750471e-08, "loss": 2.0181, "step": 98940 }, { "epoch": 167.16, "learning_rate": 5.839699829436884e-08, "loss": 2.0376, "step": 98960 }, { "epoch": 167.2, "learning_rate": 5.624866294871578e-08, "loss": 2.0313, "step": 98980 }, { "epoch": 167.23, "learning_rate": 5.414056638142206e-08, "loss": 2.0296, "step": 99000 }, { "epoch": 167.23, "eval_loss": 2.46012282371521, "eval_runtime": 47.5343, "eval_samples_per_second": 20.806, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.004190255604696678, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.030844263664246436, "eval_tse_type": 0.0005863105269070891, "step": 99000 }, { "epoch": 167.26, "learning_rate": 5.207271029094574e-08, "loss": 2.0366, "step": 99020 }, { "epoch": 167.3, "learning_rate": 5.004509634332078e-08, "loss": 2.0318, "step": 99040 }, { "epoch": 167.33, "learning_rate": 4.805772617215154e-08, "loss": 2.0288, "step": 99060 }, { "epoch": 167.36, "learning_rate": 4.611060137863499e-08, "loss": 2.0477, "step": 99080 }, { "epoch": 167.4, "learning_rate": 4.4203723531532905e-08, "loss": 2.0369, "step": 99100 }, { "epoch": 167.43, "learning_rate": 4.233709416717746e-08, "loss": 2.0284, "step": 99120 }, { "epoch": 167.47, "learning_rate": 4.051071478948232e-08, "loss": 2.0401, "step": 99140 }, { "epoch": 167.5, "learning_rate": 3.872458686992597e-08, "loss": 2.0314, "step": 99160 }, { "epoch": 167.53, "learning_rate": 3.697871184756285e-08, "loss": 2.0251, "step": 99180 }, { "epoch": 167.57, "learning_rate": 3.5273091129012224e-08, "loss": 2.0314, "step": 99200 }, { "epoch": 167.6, "learning_rate": 3.360772608845819e-08, "loss": 2.0299, "step": 99220 }, { "epoch": 167.64, "learning_rate": 3.1982618067660787e-08, "loss": 2.0309, "step": 99240 }, { "epoch": 167.67, "learning_rate": 3.0397768375939325e-08, "loss": 2.0262, "step": 99260 }, { "epoch": 167.7, "learning_rate": 2.8853178290172422e-08, "loss": 2.0367, "step": 99280 }, { "epoch": 167.74, "learning_rate": 2.7348849054809057e-08, "loss": 2.0461, "step": 99300 }, { "epoch": 167.77, "learning_rate": 2.5884781881868603e-08, "loss": 2.0393, "step": 99320 }, { "epoch": 167.8, "learning_rate": 2.446097795091862e-08, "loss": 2.0259, "step": 99340 }, { "epoch": 167.84, "learning_rate": 2.307743840909149e-08, "loss": 2.0369, "step": 99360 }, { "epoch": 167.87, "learning_rate": 2.1734164371084443e-08, "loss": 2.0265, "step": 99380 }, { "epoch": 167.91, "learning_rate": 2.0431156919137328e-08, "loss": 2.041, "step": 99400 }, { "epoch": 167.94, "learning_rate": 1.916841710307149e-08, "loss": 2.033, "step": 99420 }, { "epoch": 167.97, "learning_rate": 1.7945945940250898e-08, "loss": 2.0241, "step": 99440 }, { "epoch": 168.01, "learning_rate": 1.6763744415598805e-08, "loss": 2.0241, "step": 99460 }, { "epoch": 168.04, "learning_rate": 1.5621813481586646e-08, "loss": 2.0468, "step": 99480 }, { "epoch": 168.07, "learning_rate": 1.4520154058256241e-08, "loss": 2.0104, "step": 99500 }, { "epoch": 168.11, "learning_rate": 1.3458767033192043e-08, "loss": 2.026, "step": 99520 }, { "epoch": 168.14, "learning_rate": 1.2437653261537785e-08, "loss": 2.0396, "step": 99540 }, { "epoch": 168.18, "learning_rate": 1.1456813565974278e-08, "loss": 2.037, "step": 99560 }, { "epoch": 168.21, "learning_rate": 1.0516248736763823e-08, "loss": 2.0301, "step": 99580 }, { "epoch": 168.24, "learning_rate": 9.615959531683594e-09, "loss": 2.036, "step": 99600 }, { "epoch": 168.28, "learning_rate": 8.755946676097804e-09, "loss": 2.0351, "step": 99620 }, { "epoch": 168.31, "learning_rate": 7.936210862891091e-09, "loss": 2.0232, "step": 99640 }, { "epoch": 168.34, "learning_rate": 7.156752752518481e-09, "loss": 2.037, "step": 99660 }, { "epoch": 168.38, "learning_rate": 6.417572972966524e-09, "loss": 2.0342, "step": 99680 }, { "epoch": 168.41, "learning_rate": 5.718672119786606e-09, "loss": 2.0436, "step": 99700 }, { "epoch": 168.45, "learning_rate": 5.060050756067192e-09, "loss": 2.0383, "step": 99720 }, { "epoch": 168.48, "learning_rate": 4.4417094124449275e-09, "loss": 2.0547, "step": 99740 }, { "epoch": 168.51, "learning_rate": 3.863648587110191e-09, "loss": 2.0211, "step": 99760 }, { "epoch": 168.55, "learning_rate": 3.3258687457959904e-09, "loss": 2.0316, "step": 99780 }, { "epoch": 168.58, "learning_rate": 2.8283703217835135e-09, "loss": 2.0264, "step": 99800 }, { "epoch": 168.61, "learning_rate": 2.371153715891028e-09, "loss": 2.0253, "step": 99820 }, { "epoch": 168.65, "learning_rate": 1.954219296496085e-09, "loss": 2.0317, "step": 99840 }, { "epoch": 168.68, "learning_rate": 1.5775673995133134e-09, "loss": 2.0227, "step": 99860 }, { "epoch": 168.72, "learning_rate": 1.241198328405524e-09, "loss": 2.0267, "step": 99880 }, { "epoch": 168.75, "learning_rate": 9.45112354178157e-10, "loss": 2.0422, "step": 99900 }, { "epoch": 168.78, "learning_rate": 6.893097153848338e-10, "loss": 2.0353, "step": 99920 }, { "epoch": 168.82, "learning_rate": 4.737906181162544e-10, "loss": 2.0309, "step": 99940 }, { "epoch": 168.85, "learning_rate": 2.985552360168509e-10, "loss": 2.017, "step": 99960 }, { "epoch": 168.89, "learning_rate": 1.6360371026258315e-10, "loss": 2.0289, "step": 99980 }, { "epoch": 168.92, "learning_rate": 6.893614959424533e-11, "loss": 2.0345, "step": 100000 }, { "epoch": 168.92, "eval_loss": 2.46016526222229, "eval_runtime": 47.7026, "eval_samples_per_second": 20.733, "eval_steps_per_second": 0.126, "eval_tse_ndup": 0.003877539938720142, "eval_tse_nnof": 0.0, "eval_tse_nnon": 0.0, "eval_tse_time": 0.030767411587534408, "eval_tse_type": 0.000635462307605887, "step": 100000 }, { "epoch": 168.92, "step": 100000, "total_flos": 1.1144698192283566e+18, "train_loss": 3.096821060371399, "train_runtime": 63835.4946, "train_samples_per_second": 200.515, "train_steps_per_second": 1.567 } ], "max_steps": 100000, "num_train_epochs": 169, "total_flos": 1.1144698192283566e+18, "trial_name": null, "trial_params": null }