{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.11565268222492, "global_step": 68000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 7.142857142857143e-06, "loss": 9.9046, "step": 1000 }, { "epoch": 0.09, "eval_loss": 8.965944290161133, "eval_rouge-1": 0.0801, "eval_rouge-2": 0.0016, "eval_rouge-l": 0.0788, "eval_runtime": 123.903, "eval_samples_per_second": 33.688, "step": 1000 }, { "epoch": 0.18, "learning_rate": 1.4285714285714285e-05, "loss": 8.3615, "step": 2000 }, { "epoch": 0.18, "eval_loss": 8.027783393859863, "eval_rouge-1": 0.1331, "eval_rouge-2": 0.0035, "eval_rouge-l": 0.1326, "eval_runtime": 188.3863, "eval_samples_per_second": 22.157, "step": 2000 }, { "epoch": 0.27, "learning_rate": 2.1428571428571428e-05, "loss": 7.8626, "step": 3000 }, { "epoch": 0.27, "eval_loss": 7.678055286407471, "eval_rouge-1": 0.1526, "eval_rouge-2": 0.0104, "eval_rouge-l": 0.1512, "eval_runtime": 171.3007, "eval_samples_per_second": 24.367, "step": 3000 }, { "epoch": 0.36, "learning_rate": 2.857142857142857e-05, "loss": 7.5789, "step": 4000 }, { "epoch": 0.36, "eval_loss": 7.365678787231445, "eval_rouge-1": 0.1554, "eval_rouge-2": 0.0185, "eval_rouge-l": 0.1514, "eval_runtime": 183.8437, "eval_samples_per_second": 22.704, "step": 4000 }, { "epoch": 0.45, "learning_rate": 3.571428571428572e-05, "loss": 7.2362, "step": 5000 }, { "epoch": 0.45, "eval_loss": 7.026582717895508, "eval_rouge-1": 0.1762, "eval_rouge-2": 0.0294, "eval_rouge-l": 0.1713, "eval_runtime": 186.2412, "eval_samples_per_second": 22.412, "step": 5000 }, { "epoch": 0.54, "learning_rate": 4.2857142857142856e-05, "loss": 6.8796, "step": 6000 }, { "epoch": 0.54, "eval_loss": 6.7096028327941895, "eval_rouge-1": 0.1867, "eval_rouge-2": 0.0358, "eval_rouge-l": 0.181, "eval_runtime": 187.2555, "eval_samples_per_second": 22.29, "step": 6000 }, { "epoch": 0.63, "learning_rate": 5e-05, "loss": 6.5408, "step": 7000 }, { "epoch": 0.63, "eval_loss": 6.399764060974121, "eval_rouge-1": 0.1998, "eval_rouge-2": 0.0414, "eval_rouge-l": 0.1933, "eval_runtime": 215.0406, "eval_samples_per_second": 19.41, "step": 7000 }, { "epoch": 0.72, "learning_rate": 4.984689346847537e-05, "loss": 6.203, "step": 8000 }, { "epoch": 0.72, "eval_loss": 6.118791580200195, "eval_rouge-1": 0.2166, "eval_rouge-2": 0.0499, "eval_rouge-l": 0.2054, "eval_runtime": 223.5995, "eval_samples_per_second": 18.667, "step": 8000 }, { "epoch": 0.81, "learning_rate": 4.969378693695073e-05, "loss": 5.8343, "step": 9000 }, { "epoch": 0.81, "eval_loss": 5.871669769287109, "eval_rouge-1": 0.2189, "eval_rouge-2": 0.0564, "eval_rouge-l": 0.2095, "eval_runtime": 173.6387, "eval_samples_per_second": 24.038, "step": 9000 }, { "epoch": 0.9, "learning_rate": 4.9540680405426096e-05, "loss": 5.6021, "step": 10000 }, { "epoch": 0.9, "eval_loss": 5.661921501159668, "eval_rouge-1": 0.2366, "eval_rouge-2": 0.0626, "eval_rouge-l": 0.2242, "eval_runtime": 196.6013, "eval_samples_per_second": 21.231, "step": 10000 }, { "epoch": 0.99, "learning_rate": 4.938757387390146e-05, "loss": 5.3839, "step": 11000 }, { "epoch": 0.99, "eval_loss": 5.473762512207031, "eval_rouge-1": 0.2505, "eval_rouge-2": 0.0733, "eval_rouge-l": 0.2382, "eval_runtime": 195.5436, "eval_samples_per_second": 21.346, "step": 11000 }, { "epoch": 1.08, "learning_rate": 4.923446734237683e-05, "loss": 5.1467, "step": 12000 }, { "epoch": 1.08, "eval_loss": 5.313476085662842, "eval_rouge-1": 0.2547, "eval_rouge-2": 0.0778, "eval_rouge-l": 0.2423, "eval_runtime": 184.8247, "eval_samples_per_second": 22.584, "step": 12000 }, { "epoch": 1.17, "learning_rate": 4.90813608108522e-05, "loss": 4.9939, "step": 13000 }, { "epoch": 1.17, "eval_loss": 5.17720365524292, "eval_rouge-1": 0.2666, "eval_rouge-2": 0.0886, "eval_rouge-l": 0.2517, "eval_runtime": 201.6984, "eval_samples_per_second": 20.694, "step": 13000 }, { "epoch": 1.26, "learning_rate": 4.892825427932756e-05, "loss": 4.8392, "step": 14000 }, { "epoch": 1.26, "eval_loss": 5.034477710723877, "eval_rouge-1": 0.2827, "eval_rouge-2": 0.0953, "eval_rouge-l": 0.2666, "eval_runtime": 190.2704, "eval_samples_per_second": 21.937, "step": 14000 }, { "epoch": 1.35, "learning_rate": 4.8775147747802924e-05, "loss": 4.7284, "step": 15000 }, { "epoch": 1.35, "eval_loss": 4.909021854400635, "eval_rouge-1": 0.288, "eval_rouge-2": 0.1, "eval_rouge-l": 0.2726, "eval_runtime": 181.3965, "eval_samples_per_second": 23.01, "step": 15000 }, { "epoch": 1.44, "learning_rate": 4.862204121627829e-05, "loss": 4.6145, "step": 16000 }, { "epoch": 1.44, "eval_loss": 4.801566123962402, "eval_rouge-1": 0.2952, "eval_rouge-2": 0.1042, "eval_rouge-l": 0.2789, "eval_runtime": 182.1022, "eval_samples_per_second": 22.921, "step": 16000 }, { "epoch": 1.53, "learning_rate": 4.846893468475365e-05, "loss": 4.3908, "step": 17000 }, { "epoch": 1.53, "eval_loss": 4.704518795013428, "eval_rouge-1": 0.3059, "eval_rouge-2": 0.1151, "eval_rouge-l": 0.2893, "eval_runtime": 174.1469, "eval_samples_per_second": 23.968, "step": 17000 }, { "epoch": 1.62, "learning_rate": 4.831582815322902e-05, "loss": 4.304, "step": 18000 }, { "epoch": 1.62, "eval_loss": 4.603546619415283, "eval_rouge-1": 0.3086, "eval_rouge-2": 0.1182, "eval_rouge-l": 0.2909, "eval_runtime": 182.7874, "eval_samples_per_second": 22.835, "step": 18000 }, { "epoch": 1.71, "learning_rate": 4.8162721621704385e-05, "loss": 4.2481, "step": 19000 }, { "epoch": 1.71, "eval_loss": 4.504889488220215, "eval_rouge-1": 0.3163, "eval_rouge-2": 0.1253, "eval_rouge-l": 0.2994, "eval_runtime": 179.5495, "eval_samples_per_second": 23.247, "step": 19000 }, { "epoch": 1.8, "learning_rate": 4.800961509017975e-05, "loss": 4.1473, "step": 20000 }, { "epoch": 1.8, "eval_loss": 4.4253692626953125, "eval_rouge-1": 0.3199, "eval_rouge-2": 0.1264, "eval_rouge-l": 0.3014, "eval_runtime": 182.3622, "eval_samples_per_second": 22.889, "step": 20000 }, { "epoch": 1.89, "learning_rate": 4.785650855865512e-05, "loss": 4.0769, "step": 21000 }, { "epoch": 1.89, "eval_loss": 4.35316276550293, "eval_rouge-1": 0.324, "eval_rouge-2": 0.1333, "eval_rouge-l": 0.306, "eval_runtime": 175.6652, "eval_samples_per_second": 23.761, "step": 21000 }, { "epoch": 1.98, "learning_rate": 4.770340202713048e-05, "loss": 3.999, "step": 22000 }, { "epoch": 1.98, "eval_loss": 4.278110027313232, "eval_rouge-1": 0.3268, "eval_rouge-2": 0.1368, "eval_rouge-l": 0.3089, "eval_runtime": 189.2177, "eval_samples_per_second": 22.059, "step": 22000 }, { "epoch": 2.07, "learning_rate": 4.7550295495605846e-05, "loss": 3.8351, "step": 23000 }, { "epoch": 2.07, "eval_loss": 4.208358287811279, "eval_rouge-1": 0.3308, "eval_rouge-2": 0.1427, "eval_rouge-l": 0.3144, "eval_runtime": 178.6224, "eval_samples_per_second": 23.368, "step": 23000 }, { "epoch": 2.16, "learning_rate": 4.7397188964081206e-05, "loss": 3.7519, "step": 24000 }, { "epoch": 2.16, "eval_loss": 4.136488914489746, "eval_rouge-1": 0.3361, "eval_rouge-2": 0.1442, "eval_rouge-l": 0.3184, "eval_runtime": 191.1592, "eval_samples_per_second": 21.835, "step": 24000 }, { "epoch": 2.25, "learning_rate": 4.724408243255657e-05, "loss": 3.6917, "step": 25000 }, { "epoch": 2.25, "eval_loss": 4.072964668273926, "eval_rouge-1": 0.3394, "eval_rouge-2": 0.1488, "eval_rouge-l": 0.3217, "eval_runtime": 178.8323, "eval_samples_per_second": 23.34, "step": 25000 }, { "epoch": 2.34, "learning_rate": 4.709097590103194e-05, "loss": 3.6526, "step": 26000 }, { "epoch": 2.34, "eval_loss": 4.014794826507568, "eval_rouge-1": 0.3437, "eval_rouge-2": 0.1511, "eval_rouge-l": 0.3254, "eval_runtime": 182.0303, "eval_samples_per_second": 22.93, "step": 26000 }, { "epoch": 2.43, "learning_rate": 4.693786936950731e-05, "loss": 3.485, "step": 27000 }, { "epoch": 2.43, "eval_loss": 3.9730067253112793, "eval_rouge-1": 0.3472, "eval_rouge-2": 0.1575, "eval_rouge-l": 0.329, "eval_runtime": 106.9008, "eval_samples_per_second": 39.046, "step": 27000 }, { "epoch": 2.52, "learning_rate": 4.6784762837982674e-05, "loss": 3.4744, "step": 28000 }, { "epoch": 2.52, "eval_loss": 3.905726909637451, "eval_rouge-1": 0.3524, "eval_rouge-2": 0.1605, "eval_rouge-l": 0.3328, "eval_runtime": 109.2943, "eval_samples_per_second": 38.19, "step": 28000 }, { "epoch": 2.61, "learning_rate": 4.663165630645804e-05, "loss": 3.4398, "step": 29000 }, { "epoch": 2.61, "eval_loss": 3.861633062362671, "eval_rouge-1": 0.3555, "eval_rouge-2": 0.1648, "eval_rouge-l": 0.3369, "eval_runtime": 109.0651, "eval_samples_per_second": 38.271, "step": 29000 }, { "epoch": 2.7, "learning_rate": 4.64785497749334e-05, "loss": 3.4033, "step": 30000 }, { "epoch": 2.7, "eval_loss": 3.814605712890625, "eval_rouge-1": 0.3616, "eval_rouge-2": 0.1696, "eval_rouge-l": 0.3426, "eval_runtime": 111.7482, "eval_samples_per_second": 37.352, "step": 30000 }, { "epoch": 2.79, "learning_rate": 4.632544324340877e-05, "loss": 3.3852, "step": 31000 }, { "epoch": 2.79, "eval_loss": 3.7633745670318604, "eval_rouge-1": 0.3618, "eval_rouge-2": 0.1729, "eval_rouge-l": 0.3431, "eval_runtime": 108.2348, "eval_samples_per_second": 38.564, "step": 31000 }, { "epoch": 2.88, "learning_rate": 4.617233671188413e-05, "loss": 3.3591, "step": 32000 }, { "epoch": 2.88, "eval_loss": 3.70929217338562, "eval_rouge-1": 0.3665, "eval_rouge-2": 0.1758, "eval_rouge-l": 0.3483, "eval_runtime": 107.2167, "eval_samples_per_second": 38.93, "step": 32000 }, { "epoch": 2.97, "learning_rate": 4.6019230180359495e-05, "loss": 3.3347, "step": 33000 }, { "epoch": 2.97, "eval_loss": 3.6575114727020264, "eval_rouge-1": 0.3664, "eval_rouge-2": 0.1771, "eval_rouge-l": 0.3479, "eval_runtime": 111.8035, "eval_samples_per_second": 37.333, "step": 33000 }, { "epoch": 3.06, "learning_rate": 4.586612364883486e-05, "loss": 3.1412, "step": 34000 }, { "epoch": 3.06, "eval_loss": 3.6294667720794678, "eval_rouge-1": 0.3687, "eval_rouge-2": 0.1799, "eval_rouge-l": 0.3504, "eval_runtime": 108.0977, "eval_samples_per_second": 38.613, "step": 34000 }, { "epoch": 3.15, "learning_rate": 4.571301711731022e-05, "loss": 3.0681, "step": 35000 }, { "epoch": 3.15, "eval_loss": 3.5905728340148926, "eval_rouge-1": 0.369, "eval_rouge-2": 0.1812, "eval_rouge-l": 0.3495, "eval_runtime": 109.8754, "eval_samples_per_second": 37.988, "step": 35000 }, { "epoch": 3.24, "learning_rate": 4.5559910585785595e-05, "loss": 3.047, "step": 36000 }, { "epoch": 3.24, "eval_loss": 3.5687978267669678, "eval_rouge-1": 0.3751, "eval_rouge-2": 0.1869, "eval_rouge-l": 0.3562, "eval_runtime": 112.5879, "eval_samples_per_second": 37.073, "step": 36000 }, { "epoch": 3.33, "learning_rate": 4.540680405426096e-05, "loss": 3.0482, "step": 37000 }, { "epoch": 3.33, "eval_loss": 3.522142171859741, "eval_rouge-1": 0.374, "eval_rouge-2": 0.1885, "eval_rouge-l": 0.3553, "eval_runtime": 110.8835, "eval_samples_per_second": 37.643, "step": 37000 }, { "epoch": 3.42, "learning_rate": 4.525369752273632e-05, "loss": 3.0085, "step": 38000 }, { "epoch": 3.42, "eval_loss": 3.497309446334839, "eval_rouge-1": 0.3754, "eval_rouge-2": 0.1889, "eval_rouge-l": 0.3563, "eval_runtime": 116.7032, "eval_samples_per_second": 35.766, "step": 38000 }, { "epoch": 3.51, "learning_rate": 4.510059099121169e-05, "loss": 3.0179, "step": 39000 }, { "epoch": 3.51, "eval_loss": 3.45327091217041, "eval_rouge-1": 0.3766, "eval_rouge-2": 0.1913, "eval_rouge-l": 0.3568, "eval_runtime": 110.7354, "eval_samples_per_second": 37.693, "step": 39000 }, { "epoch": 3.6, "learning_rate": 4.494748445968705e-05, "loss": 3.0008, "step": 40000 }, { "epoch": 3.6, "eval_loss": 3.4268550872802734, "eval_rouge-1": 0.3802, "eval_rouge-2": 0.1942, "eval_rouge-l": 0.3614, "eval_runtime": 108.7986, "eval_samples_per_second": 38.364, "step": 40000 }, { "epoch": 3.69, "learning_rate": 4.4794377928162417e-05, "loss": 2.973, "step": 41000 }, { "epoch": 3.69, "eval_loss": 3.393805980682373, "eval_rouge-1": 0.3835, "eval_rouge-2": 0.197, "eval_rouge-l": 0.3648, "eval_runtime": 112.0812, "eval_samples_per_second": 37.241, "step": 41000 }, { "epoch": 3.78, "learning_rate": 4.4641271396637783e-05, "loss": 2.9718, "step": 42000 }, { "epoch": 3.78, "eval_loss": 3.350437879562378, "eval_rouge-1": 0.3877, "eval_rouge-2": 0.2013, "eval_rouge-l": 0.3682, "eval_runtime": 111.3062, "eval_samples_per_second": 37.5, "step": 42000 }, { "epoch": 3.87, "learning_rate": 4.4488164865113144e-05, "loss": 2.9702, "step": 43000 }, { "epoch": 3.87, "eval_loss": 3.321171998977661, "eval_rouge-1": 0.3859, "eval_rouge-2": 0.1992, "eval_rouge-l": 0.3672, "eval_runtime": 109.8134, "eval_samples_per_second": 38.01, "step": 43000 }, { "epoch": 3.96, "learning_rate": 4.433505833358852e-05, "loss": 2.6419, "step": 44000 }, { "epoch": 3.96, "eval_loss": 3.341356039047241, "eval_rouge-1": 0.3839, "eval_rouge-2": 0.2012, "eval_rouge-l": 0.3653, "eval_runtime": 182.4115, "eval_samples_per_second": 22.882, "step": 44000 }, { "epoch": 4.05, "learning_rate": 4.418195180206388e-05, "loss": 2.6177, "step": 45000 }, { "epoch": 4.05, "eval_loss": 3.31478214263916, "eval_rouge-1": 0.3896, "eval_rouge-2": 0.2053, "eval_rouge-l": 0.37, "eval_runtime": 185.1687, "eval_samples_per_second": 22.542, "step": 45000 }, { "epoch": 4.14, "learning_rate": 4.4028845270539244e-05, "loss": 2.5985, "step": 46000 }, { "epoch": 4.14, "eval_loss": 3.3024539947509766, "eval_rouge-1": 0.3911, "eval_rouge-2": 0.2066, "eval_rouge-l": 0.3717, "eval_runtime": 183.1915, "eval_samples_per_second": 22.785, "step": 46000 }, { "epoch": 4.23, "learning_rate": 4.387573873901461e-05, "loss": 2.615, "step": 47000 }, { "epoch": 4.23, "eval_loss": 3.285188913345337, "eval_rouge-1": 0.3894, "eval_rouge-2": 0.2047, "eval_rouge-l": 0.371, "eval_runtime": 183.8267, "eval_samples_per_second": 22.706, "step": 47000 }, { "epoch": 4.32, "learning_rate": 4.372263220748997e-05, "loss": 2.6202, "step": 48000 }, { "epoch": 4.32, "eval_loss": 3.2647767066955566, "eval_rouge-1": 0.394, "eval_rouge-2": 0.2099, "eval_rouge-l": 0.3747, "eval_runtime": 186.2005, "eval_samples_per_second": 22.417, "step": 48000 }, { "epoch": 4.41, "learning_rate": 4.356952567596534e-05, "loss": 2.6072, "step": 49000 }, { "epoch": 4.41, "eval_loss": 3.2533161640167236, "eval_rouge-1": 0.3925, "eval_rouge-2": 0.2099, "eval_rouge-l": 0.373, "eval_runtime": 180.2911, "eval_samples_per_second": 23.151, "step": 49000 }, { "epoch": 4.5, "learning_rate": 4.3416419144440705e-05, "loss": 2.612, "step": 50000 }, { "epoch": 4.5, "eval_loss": 3.2256064414978027, "eval_rouge-1": 0.3933, "eval_rouge-2": 0.2113, "eval_rouge-l": 0.3738, "eval_runtime": 182.776, "eval_samples_per_second": 22.837, "step": 50000 }, { "epoch": 4.59, "learning_rate": 4.3263312612916065e-05, "loss": 2.6108, "step": 51000 }, { "epoch": 4.59, "eval_loss": 3.2123188972473145, "eval_rouge-1": 0.3951, "eval_rouge-2": 0.2122, "eval_rouge-l": 0.3767, "eval_runtime": 182.0897, "eval_samples_per_second": 22.923, "step": 51000 }, { "epoch": 4.68, "learning_rate": 4.311020608139143e-05, "loss": 2.6334, "step": 52000 }, { "epoch": 4.68, "eval_loss": 3.193678140640259, "eval_rouge-1": 0.3971, "eval_rouge-2": 0.2123, "eval_rouge-l": 0.3779, "eval_runtime": 184.0687, "eval_samples_per_second": 22.676, "step": 52000 }, { "epoch": 4.77, "learning_rate": 4.29570995498668e-05, "loss": 2.6086, "step": 53000 }, { "epoch": 4.77, "eval_loss": 3.1697912216186523, "eval_rouge-1": 0.3979, "eval_rouge-2": 0.215, "eval_rouge-l": 0.3786, "eval_runtime": 183.0622, "eval_samples_per_second": 22.801, "step": 53000 }, { "epoch": 4.86, "learning_rate": 4.2803993018342166e-05, "loss": 2.3308, "step": 54000 }, { "epoch": 4.86, "eval_loss": 3.189683437347412, "eval_rouge-1": 0.3961, "eval_rouge-2": 0.2152, "eval_rouge-l": 0.378, "eval_runtime": 207.2919, "eval_samples_per_second": 20.136, "step": 54000 }, { "epoch": 4.95, "learning_rate": 4.265088648681753e-05, "loss": 2.3303, "step": 55000 }, { "epoch": 4.95, "eval_loss": 3.19010591506958, "eval_rouge-1": 0.3982, "eval_rouge-2": 0.2151, "eval_rouge-l": 0.3788, "eval_runtime": 209.6218, "eval_samples_per_second": 19.912, "step": 55000 }, { "epoch": 5.04, "learning_rate": 4.249777995529289e-05, "loss": 2.3079, "step": 56000 }, { "epoch": 5.04, "eval_loss": 3.1867759227752686, "eval_rouge-1": 0.3958, "eval_rouge-2": 0.2146, "eval_rouge-l": 0.3764, "eval_runtime": 209.3472, "eval_samples_per_second": 19.938, "step": 56000 }, { "epoch": 5.13, "learning_rate": 4.234467342376826e-05, "loss": 2.2662, "step": 57000 }, { "epoch": 5.13, "eval_loss": 3.1843576431274414, "eval_rouge-1": 0.3966, "eval_rouge-2": 0.2116, "eval_rouge-l": 0.3765, "eval_runtime": 216.9453, "eval_samples_per_second": 19.24, "step": 57000 }, { "epoch": 5.22, "learning_rate": 4.219156689224362e-05, "loss": 2.2889, "step": 58000 }, { "epoch": 5.22, "eval_loss": 3.1772937774658203, "eval_rouge-1": 0.3928, "eval_rouge-2": 0.2142, "eval_rouge-l": 0.375, "eval_runtime": 208.5252, "eval_samples_per_second": 20.017, "step": 58000 }, { "epoch": 5.31, "learning_rate": 4.203846036071899e-05, "loss": 2.3055, "step": 59000 }, { "epoch": 5.31, "eval_loss": 3.1666276454925537, "eval_rouge-1": 0.3996, "eval_rouge-2": 0.2174, "eval_rouge-l": 0.3811, "eval_runtime": 208.7841, "eval_samples_per_second": 19.992, "step": 59000 }, { "epoch": 5.4, "learning_rate": 4.1885353829194354e-05, "loss": 2.1645, "step": 60000 }, { "epoch": 5.4, "eval_loss": 3.172426700592041, "eval_rouge-1": 0.3976, "eval_rouge-2": 0.2148, "eval_rouge-l": 0.378, "eval_runtime": 211.5986, "eval_samples_per_second": 19.726, "step": 60000 }, { "epoch": 5.49, "learning_rate": 4.173224729766972e-05, "loss": 2.2381, "step": 61000 }, { "epoch": 5.49, "eval_loss": 3.160379409790039, "eval_rouge-1": 0.3999, "eval_rouge-2": 0.2183, "eval_rouge-l": 0.3808, "eval_runtime": 207.4561, "eval_samples_per_second": 20.12, "step": 61000 }, { "epoch": 5.58, "learning_rate": 4.157914076614509e-05, "loss": 2.2566, "step": 62000 }, { "epoch": 5.58, "eval_loss": 3.148230791091919, "eval_rouge-1": 0.398, "eval_rouge-2": 0.2167, "eval_rouge-l": 0.379, "eval_runtime": 205.4861, "eval_samples_per_second": 20.313, "step": 62000 }, { "epoch": 5.67, "learning_rate": 4.1426034234620455e-05, "loss": 2.2723, "step": 63000 }, { "epoch": 5.67, "eval_loss": 3.145291566848755, "eval_rouge-1": 0.3968, "eval_rouge-2": 0.2183, "eval_rouge-l": 0.3781, "eval_runtime": 201.6935, "eval_samples_per_second": 20.695, "step": 63000 }, { "epoch": 5.76, "learning_rate": 4.1272927703095815e-05, "loss": 2.288, "step": 64000 }, { "epoch": 5.76, "eval_loss": 3.124877452850342, "eval_rouge-1": 0.3998, "eval_rouge-2": 0.2156, "eval_rouge-l": 0.3792, "eval_runtime": 203.9639, "eval_samples_per_second": 20.464, "step": 64000 }, { "epoch": 5.85, "learning_rate": 4.111982117157118e-05, "loss": 2.2889, "step": 65000 }, { "epoch": 5.85, "eval_loss": 3.128246545791626, "eval_rouge-1": 0.4007, "eval_rouge-2": 0.2195, "eval_rouge-l": 0.3822, "eval_runtime": 212.1813, "eval_samples_per_second": 19.672, "step": 65000 }, { "epoch": 5.94, "learning_rate": 4.096671464004654e-05, "loss": 2.294, "step": 66000 }, { "epoch": 5.94, "eval_loss": 3.1073431968688965, "eval_rouge-1": 0.3977, "eval_rouge-2": 0.2188, "eval_rouge-l": 0.3797, "eval_runtime": 205.3765, "eval_samples_per_second": 20.324, "step": 66000 }, { "epoch": 6.03, "learning_rate": 4.081360810852191e-05, "loss": 2.2157, "step": 67000 }, { "epoch": 6.03, "eval_loss": 3.1187493801116943, "eval_rouge-1": 0.3977, "eval_rouge-2": 0.2172, "eval_rouge-l": 0.3789, "eval_runtime": 204.9076, "eval_samples_per_second": 20.37, "step": 67000 }, { "epoch": 6.12, "learning_rate": 4.0660501576997276e-05, "loss": 1.9904, "step": 68000 }, { "epoch": 6.12, "eval_loss": 3.1339566707611084, "eval_rouge-1": 0.4014, "eval_rouge-2": 0.2191, "eval_rouge-l": 0.3822, "eval_runtime": 206.8966, "eval_samples_per_second": 20.174, "step": 68000 } ], "max_steps": 333570, "num_train_epochs": 30, "total_flos": 2.6905869526822502e+17, "trial_name": null, "trial_params": null }