|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.11565268222492, |
|
"global_step": 68000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.142857142857143e-06, |
|
"loss": 9.9046, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 8.965944290161133, |
|
"eval_rouge-1": 0.0801, |
|
"eval_rouge-2": 0.0016, |
|
"eval_rouge-l": 0.0788, |
|
"eval_runtime": 123.903, |
|
"eval_samples_per_second": 33.688, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 8.3615, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 8.027783393859863, |
|
"eval_rouge-1": 0.1331, |
|
"eval_rouge-2": 0.0035, |
|
"eval_rouge-l": 0.1326, |
|
"eval_runtime": 188.3863, |
|
"eval_samples_per_second": 22.157, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 7.8626, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 7.678055286407471, |
|
"eval_rouge-1": 0.1526, |
|
"eval_rouge-2": 0.0104, |
|
"eval_rouge-l": 0.1512, |
|
"eval_runtime": 171.3007, |
|
"eval_samples_per_second": 24.367, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 7.5789, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 7.365678787231445, |
|
"eval_rouge-1": 0.1554, |
|
"eval_rouge-2": 0.0185, |
|
"eval_rouge-l": 0.1514, |
|
"eval_runtime": 183.8437, |
|
"eval_samples_per_second": 22.704, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 7.2362, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 7.026582717895508, |
|
"eval_rouge-1": 0.1762, |
|
"eval_rouge-2": 0.0294, |
|
"eval_rouge-l": 0.1713, |
|
"eval_runtime": 186.2412, |
|
"eval_samples_per_second": 22.412, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 6.8796, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 6.7096028327941895, |
|
"eval_rouge-1": 0.1867, |
|
"eval_rouge-2": 0.0358, |
|
"eval_rouge-l": 0.181, |
|
"eval_runtime": 187.2555, |
|
"eval_samples_per_second": 22.29, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 5e-05, |
|
"loss": 6.5408, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 6.399764060974121, |
|
"eval_rouge-1": 0.1998, |
|
"eval_rouge-2": 0.0414, |
|
"eval_rouge-l": 0.1933, |
|
"eval_runtime": 215.0406, |
|
"eval_samples_per_second": 19.41, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.984689346847537e-05, |
|
"loss": 6.203, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 6.118791580200195, |
|
"eval_rouge-1": 0.2166, |
|
"eval_rouge-2": 0.0499, |
|
"eval_rouge-l": 0.2054, |
|
"eval_runtime": 223.5995, |
|
"eval_samples_per_second": 18.667, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.969378693695073e-05, |
|
"loss": 5.8343, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 5.871669769287109, |
|
"eval_rouge-1": 0.2189, |
|
"eval_rouge-2": 0.0564, |
|
"eval_rouge-l": 0.2095, |
|
"eval_runtime": 173.6387, |
|
"eval_samples_per_second": 24.038, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.9540680405426096e-05, |
|
"loss": 5.6021, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 5.661921501159668, |
|
"eval_rouge-1": 0.2366, |
|
"eval_rouge-2": 0.0626, |
|
"eval_rouge-l": 0.2242, |
|
"eval_runtime": 196.6013, |
|
"eval_samples_per_second": 21.231, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.938757387390146e-05, |
|
"loss": 5.3839, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 5.473762512207031, |
|
"eval_rouge-1": 0.2505, |
|
"eval_rouge-2": 0.0733, |
|
"eval_rouge-l": 0.2382, |
|
"eval_runtime": 195.5436, |
|
"eval_samples_per_second": 21.346, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.923446734237683e-05, |
|
"loss": 5.1467, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 5.313476085662842, |
|
"eval_rouge-1": 0.2547, |
|
"eval_rouge-2": 0.0778, |
|
"eval_rouge-l": 0.2423, |
|
"eval_runtime": 184.8247, |
|
"eval_samples_per_second": 22.584, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.90813608108522e-05, |
|
"loss": 4.9939, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 5.17720365524292, |
|
"eval_rouge-1": 0.2666, |
|
"eval_rouge-2": 0.0886, |
|
"eval_rouge-l": 0.2517, |
|
"eval_runtime": 201.6984, |
|
"eval_samples_per_second": 20.694, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.892825427932756e-05, |
|
"loss": 4.8392, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 5.034477710723877, |
|
"eval_rouge-1": 0.2827, |
|
"eval_rouge-2": 0.0953, |
|
"eval_rouge-l": 0.2666, |
|
"eval_runtime": 190.2704, |
|
"eval_samples_per_second": 21.937, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.8775147747802924e-05, |
|
"loss": 4.7284, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 4.909021854400635, |
|
"eval_rouge-1": 0.288, |
|
"eval_rouge-2": 0.1, |
|
"eval_rouge-l": 0.2726, |
|
"eval_runtime": 181.3965, |
|
"eval_samples_per_second": 23.01, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.862204121627829e-05, |
|
"loss": 4.6145, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 4.801566123962402, |
|
"eval_rouge-1": 0.2952, |
|
"eval_rouge-2": 0.1042, |
|
"eval_rouge-l": 0.2789, |
|
"eval_runtime": 182.1022, |
|
"eval_samples_per_second": 22.921, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.846893468475365e-05, |
|
"loss": 4.3908, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 4.704518795013428, |
|
"eval_rouge-1": 0.3059, |
|
"eval_rouge-2": 0.1151, |
|
"eval_rouge-l": 0.2893, |
|
"eval_runtime": 174.1469, |
|
"eval_samples_per_second": 23.968, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.831582815322902e-05, |
|
"loss": 4.304, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 4.603546619415283, |
|
"eval_rouge-1": 0.3086, |
|
"eval_rouge-2": 0.1182, |
|
"eval_rouge-l": 0.2909, |
|
"eval_runtime": 182.7874, |
|
"eval_samples_per_second": 22.835, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.8162721621704385e-05, |
|
"loss": 4.2481, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_loss": 4.504889488220215, |
|
"eval_rouge-1": 0.3163, |
|
"eval_rouge-2": 0.1253, |
|
"eval_rouge-l": 0.2994, |
|
"eval_runtime": 179.5495, |
|
"eval_samples_per_second": 23.247, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.800961509017975e-05, |
|
"loss": 4.1473, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 4.4253692626953125, |
|
"eval_rouge-1": 0.3199, |
|
"eval_rouge-2": 0.1264, |
|
"eval_rouge-l": 0.3014, |
|
"eval_runtime": 182.3622, |
|
"eval_samples_per_second": 22.889, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 4.785650855865512e-05, |
|
"loss": 4.0769, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 4.35316276550293, |
|
"eval_rouge-1": 0.324, |
|
"eval_rouge-2": 0.1333, |
|
"eval_rouge-l": 0.306, |
|
"eval_runtime": 175.6652, |
|
"eval_samples_per_second": 23.761, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 4.770340202713048e-05, |
|
"loss": 3.999, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_loss": 4.278110027313232, |
|
"eval_rouge-1": 0.3268, |
|
"eval_rouge-2": 0.1368, |
|
"eval_rouge-l": 0.3089, |
|
"eval_runtime": 189.2177, |
|
"eval_samples_per_second": 22.059, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.7550295495605846e-05, |
|
"loss": 3.8351, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_loss": 4.208358287811279, |
|
"eval_rouge-1": 0.3308, |
|
"eval_rouge-2": 0.1427, |
|
"eval_rouge-l": 0.3144, |
|
"eval_runtime": 178.6224, |
|
"eval_samples_per_second": 23.368, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.7397188964081206e-05, |
|
"loss": 3.7519, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 4.136488914489746, |
|
"eval_rouge-1": 0.3361, |
|
"eval_rouge-2": 0.1442, |
|
"eval_rouge-l": 0.3184, |
|
"eval_runtime": 191.1592, |
|
"eval_samples_per_second": 21.835, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.724408243255657e-05, |
|
"loss": 3.6917, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_loss": 4.072964668273926, |
|
"eval_rouge-1": 0.3394, |
|
"eval_rouge-2": 0.1488, |
|
"eval_rouge-l": 0.3217, |
|
"eval_runtime": 178.8323, |
|
"eval_samples_per_second": 23.34, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.709097590103194e-05, |
|
"loss": 3.6526, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_loss": 4.014794826507568, |
|
"eval_rouge-1": 0.3437, |
|
"eval_rouge-2": 0.1511, |
|
"eval_rouge-l": 0.3254, |
|
"eval_runtime": 182.0303, |
|
"eval_samples_per_second": 22.93, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.693786936950731e-05, |
|
"loss": 3.485, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 3.9730067253112793, |
|
"eval_rouge-1": 0.3472, |
|
"eval_rouge-2": 0.1575, |
|
"eval_rouge-l": 0.329, |
|
"eval_runtime": 106.9008, |
|
"eval_samples_per_second": 39.046, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.6784762837982674e-05, |
|
"loss": 3.4744, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_loss": 3.905726909637451, |
|
"eval_rouge-1": 0.3524, |
|
"eval_rouge-2": 0.1605, |
|
"eval_rouge-l": 0.3328, |
|
"eval_runtime": 109.2943, |
|
"eval_samples_per_second": 38.19, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 4.663165630645804e-05, |
|
"loss": 3.4398, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_loss": 3.861633062362671, |
|
"eval_rouge-1": 0.3555, |
|
"eval_rouge-2": 0.1648, |
|
"eval_rouge-l": 0.3369, |
|
"eval_runtime": 109.0651, |
|
"eval_samples_per_second": 38.271, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 4.64785497749334e-05, |
|
"loss": 3.4033, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 3.814605712890625, |
|
"eval_rouge-1": 0.3616, |
|
"eval_rouge-2": 0.1696, |
|
"eval_rouge-l": 0.3426, |
|
"eval_runtime": 111.7482, |
|
"eval_samples_per_second": 37.352, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 4.632544324340877e-05, |
|
"loss": 3.3852, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_loss": 3.7633745670318604, |
|
"eval_rouge-1": 0.3618, |
|
"eval_rouge-2": 0.1729, |
|
"eval_rouge-l": 0.3431, |
|
"eval_runtime": 108.2348, |
|
"eval_samples_per_second": 38.564, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 4.617233671188413e-05, |
|
"loss": 3.3591, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_loss": 3.70929217338562, |
|
"eval_rouge-1": 0.3665, |
|
"eval_rouge-2": 0.1758, |
|
"eval_rouge-l": 0.3483, |
|
"eval_runtime": 107.2167, |
|
"eval_samples_per_second": 38.93, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.6019230180359495e-05, |
|
"loss": 3.3347, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_loss": 3.6575114727020264, |
|
"eval_rouge-1": 0.3664, |
|
"eval_rouge-2": 0.1771, |
|
"eval_rouge-l": 0.3479, |
|
"eval_runtime": 111.8035, |
|
"eval_samples_per_second": 37.333, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 4.586612364883486e-05, |
|
"loss": 3.1412, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"eval_loss": 3.6294667720794678, |
|
"eval_rouge-1": 0.3687, |
|
"eval_rouge-2": 0.1799, |
|
"eval_rouge-l": 0.3504, |
|
"eval_runtime": 108.0977, |
|
"eval_samples_per_second": 38.613, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 4.571301711731022e-05, |
|
"loss": 3.0681, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"eval_loss": 3.5905728340148926, |
|
"eval_rouge-1": 0.369, |
|
"eval_rouge-2": 0.1812, |
|
"eval_rouge-l": 0.3495, |
|
"eval_runtime": 109.8754, |
|
"eval_samples_per_second": 37.988, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 4.5559910585785595e-05, |
|
"loss": 3.047, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"eval_loss": 3.5687978267669678, |
|
"eval_rouge-1": 0.3751, |
|
"eval_rouge-2": 0.1869, |
|
"eval_rouge-l": 0.3562, |
|
"eval_runtime": 112.5879, |
|
"eval_samples_per_second": 37.073, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 4.540680405426096e-05, |
|
"loss": 3.0482, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 3.522142171859741, |
|
"eval_rouge-1": 0.374, |
|
"eval_rouge-2": 0.1885, |
|
"eval_rouge-l": 0.3553, |
|
"eval_runtime": 110.8835, |
|
"eval_samples_per_second": 37.643, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 4.525369752273632e-05, |
|
"loss": 3.0085, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_loss": 3.497309446334839, |
|
"eval_rouge-1": 0.3754, |
|
"eval_rouge-2": 0.1889, |
|
"eval_rouge-l": 0.3563, |
|
"eval_runtime": 116.7032, |
|
"eval_samples_per_second": 35.766, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 4.510059099121169e-05, |
|
"loss": 3.0179, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_loss": 3.45327091217041, |
|
"eval_rouge-1": 0.3766, |
|
"eval_rouge-2": 0.1913, |
|
"eval_rouge-l": 0.3568, |
|
"eval_runtime": 110.7354, |
|
"eval_samples_per_second": 37.693, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 4.494748445968705e-05, |
|
"loss": 3.0008, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_loss": 3.4268550872802734, |
|
"eval_rouge-1": 0.3802, |
|
"eval_rouge-2": 0.1942, |
|
"eval_rouge-l": 0.3614, |
|
"eval_runtime": 108.7986, |
|
"eval_samples_per_second": 38.364, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 4.4794377928162417e-05, |
|
"loss": 2.973, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_loss": 3.393805980682373, |
|
"eval_rouge-1": 0.3835, |
|
"eval_rouge-2": 0.197, |
|
"eval_rouge-l": 0.3648, |
|
"eval_runtime": 112.0812, |
|
"eval_samples_per_second": 37.241, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 4.4641271396637783e-05, |
|
"loss": 2.9718, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_loss": 3.350437879562378, |
|
"eval_rouge-1": 0.3877, |
|
"eval_rouge-2": 0.2013, |
|
"eval_rouge-l": 0.3682, |
|
"eval_runtime": 111.3062, |
|
"eval_samples_per_second": 37.5, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 4.4488164865113144e-05, |
|
"loss": 2.9702, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_loss": 3.321171998977661, |
|
"eval_rouge-1": 0.3859, |
|
"eval_rouge-2": 0.1992, |
|
"eval_rouge-l": 0.3672, |
|
"eval_runtime": 109.8134, |
|
"eval_samples_per_second": 38.01, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 4.433505833358852e-05, |
|
"loss": 2.6419, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"eval_loss": 3.341356039047241, |
|
"eval_rouge-1": 0.3839, |
|
"eval_rouge-2": 0.2012, |
|
"eval_rouge-l": 0.3653, |
|
"eval_runtime": 182.4115, |
|
"eval_samples_per_second": 22.882, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 4.418195180206388e-05, |
|
"loss": 2.6177, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"eval_loss": 3.31478214263916, |
|
"eval_rouge-1": 0.3896, |
|
"eval_rouge-2": 0.2053, |
|
"eval_rouge-l": 0.37, |
|
"eval_runtime": 185.1687, |
|
"eval_samples_per_second": 22.542, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 4.4028845270539244e-05, |
|
"loss": 2.5985, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"eval_loss": 3.3024539947509766, |
|
"eval_rouge-1": 0.3911, |
|
"eval_rouge-2": 0.2066, |
|
"eval_rouge-l": 0.3717, |
|
"eval_runtime": 183.1915, |
|
"eval_samples_per_second": 22.785, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 4.387573873901461e-05, |
|
"loss": 2.615, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"eval_loss": 3.285188913345337, |
|
"eval_rouge-1": 0.3894, |
|
"eval_rouge-2": 0.2047, |
|
"eval_rouge-l": 0.371, |
|
"eval_runtime": 183.8267, |
|
"eval_samples_per_second": 22.706, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 4.372263220748997e-05, |
|
"loss": 2.6202, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"eval_loss": 3.2647767066955566, |
|
"eval_rouge-1": 0.394, |
|
"eval_rouge-2": 0.2099, |
|
"eval_rouge-l": 0.3747, |
|
"eval_runtime": 186.2005, |
|
"eval_samples_per_second": 22.417, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 4.356952567596534e-05, |
|
"loss": 2.6072, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"eval_loss": 3.2533161640167236, |
|
"eval_rouge-1": 0.3925, |
|
"eval_rouge-2": 0.2099, |
|
"eval_rouge-l": 0.373, |
|
"eval_runtime": 180.2911, |
|
"eval_samples_per_second": 23.151, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 4.3416419144440705e-05, |
|
"loss": 2.612, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_loss": 3.2256064414978027, |
|
"eval_rouge-1": 0.3933, |
|
"eval_rouge-2": 0.2113, |
|
"eval_rouge-l": 0.3738, |
|
"eval_runtime": 182.776, |
|
"eval_samples_per_second": 22.837, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 4.3263312612916065e-05, |
|
"loss": 2.6108, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"eval_loss": 3.2123188972473145, |
|
"eval_rouge-1": 0.3951, |
|
"eval_rouge-2": 0.2122, |
|
"eval_rouge-l": 0.3767, |
|
"eval_runtime": 182.0897, |
|
"eval_samples_per_second": 22.923, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 4.311020608139143e-05, |
|
"loss": 2.6334, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"eval_loss": 3.193678140640259, |
|
"eval_rouge-1": 0.3971, |
|
"eval_rouge-2": 0.2123, |
|
"eval_rouge-l": 0.3779, |
|
"eval_runtime": 184.0687, |
|
"eval_samples_per_second": 22.676, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 4.29570995498668e-05, |
|
"loss": 2.6086, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"eval_loss": 3.1697912216186523, |
|
"eval_rouge-1": 0.3979, |
|
"eval_rouge-2": 0.215, |
|
"eval_rouge-l": 0.3786, |
|
"eval_runtime": 183.0622, |
|
"eval_samples_per_second": 22.801, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 4.2803993018342166e-05, |
|
"loss": 2.3308, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"eval_loss": 3.189683437347412, |
|
"eval_rouge-1": 0.3961, |
|
"eval_rouge-2": 0.2152, |
|
"eval_rouge-l": 0.378, |
|
"eval_runtime": 207.2919, |
|
"eval_samples_per_second": 20.136, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 4.265088648681753e-05, |
|
"loss": 2.3303, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_loss": 3.19010591506958, |
|
"eval_rouge-1": 0.3982, |
|
"eval_rouge-2": 0.2151, |
|
"eval_rouge-l": 0.3788, |
|
"eval_runtime": 209.6218, |
|
"eval_samples_per_second": 19.912, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 4.249777995529289e-05, |
|
"loss": 2.3079, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"eval_loss": 3.1867759227752686, |
|
"eval_rouge-1": 0.3958, |
|
"eval_rouge-2": 0.2146, |
|
"eval_rouge-l": 0.3764, |
|
"eval_runtime": 209.3472, |
|
"eval_samples_per_second": 19.938, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 4.234467342376826e-05, |
|
"loss": 2.2662, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"eval_loss": 3.1843576431274414, |
|
"eval_rouge-1": 0.3966, |
|
"eval_rouge-2": 0.2116, |
|
"eval_rouge-l": 0.3765, |
|
"eval_runtime": 216.9453, |
|
"eval_samples_per_second": 19.24, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 4.219156689224362e-05, |
|
"loss": 2.2889, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_loss": 3.1772937774658203, |
|
"eval_rouge-1": 0.3928, |
|
"eval_rouge-2": 0.2142, |
|
"eval_rouge-l": 0.375, |
|
"eval_runtime": 208.5252, |
|
"eval_samples_per_second": 20.017, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 4.203846036071899e-05, |
|
"loss": 2.3055, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"eval_loss": 3.1666276454925537, |
|
"eval_rouge-1": 0.3996, |
|
"eval_rouge-2": 0.2174, |
|
"eval_rouge-l": 0.3811, |
|
"eval_runtime": 208.7841, |
|
"eval_samples_per_second": 19.992, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 4.1885353829194354e-05, |
|
"loss": 2.1645, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"eval_loss": 3.172426700592041, |
|
"eval_rouge-1": 0.3976, |
|
"eval_rouge-2": 0.2148, |
|
"eval_rouge-l": 0.378, |
|
"eval_runtime": 211.5986, |
|
"eval_samples_per_second": 19.726, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 4.173224729766972e-05, |
|
"loss": 2.2381, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"eval_loss": 3.160379409790039, |
|
"eval_rouge-1": 0.3999, |
|
"eval_rouge-2": 0.2183, |
|
"eval_rouge-l": 0.3808, |
|
"eval_runtime": 207.4561, |
|
"eval_samples_per_second": 20.12, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 4.157914076614509e-05, |
|
"loss": 2.2566, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"eval_loss": 3.148230791091919, |
|
"eval_rouge-1": 0.398, |
|
"eval_rouge-2": 0.2167, |
|
"eval_rouge-l": 0.379, |
|
"eval_runtime": 205.4861, |
|
"eval_samples_per_second": 20.313, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 4.1426034234620455e-05, |
|
"loss": 2.2723, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"eval_loss": 3.145291566848755, |
|
"eval_rouge-1": 0.3968, |
|
"eval_rouge-2": 0.2183, |
|
"eval_rouge-l": 0.3781, |
|
"eval_runtime": 201.6935, |
|
"eval_samples_per_second": 20.695, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 4.1272927703095815e-05, |
|
"loss": 2.288, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"eval_loss": 3.124877452850342, |
|
"eval_rouge-1": 0.3998, |
|
"eval_rouge-2": 0.2156, |
|
"eval_rouge-l": 0.3792, |
|
"eval_runtime": 203.9639, |
|
"eval_samples_per_second": 20.464, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 4.111982117157118e-05, |
|
"loss": 2.2889, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"eval_loss": 3.128246545791626, |
|
"eval_rouge-1": 0.4007, |
|
"eval_rouge-2": 0.2195, |
|
"eval_rouge-l": 0.3822, |
|
"eval_runtime": 212.1813, |
|
"eval_samples_per_second": 19.672, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 4.096671464004654e-05, |
|
"loss": 2.294, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_loss": 3.1073431968688965, |
|
"eval_rouge-1": 0.3977, |
|
"eval_rouge-2": 0.2188, |
|
"eval_rouge-l": 0.3797, |
|
"eval_runtime": 205.3765, |
|
"eval_samples_per_second": 20.324, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 4.081360810852191e-05, |
|
"loss": 2.2157, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_loss": 3.1187493801116943, |
|
"eval_rouge-1": 0.3977, |
|
"eval_rouge-2": 0.2172, |
|
"eval_rouge-l": 0.3789, |
|
"eval_runtime": 204.9076, |
|
"eval_samples_per_second": 20.37, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 4.0660501576997276e-05, |
|
"loss": 1.9904, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"eval_loss": 3.1339566707611084, |
|
"eval_rouge-1": 0.4014, |
|
"eval_rouge-2": 0.2191, |
|
"eval_rouge-l": 0.3822, |
|
"eval_runtime": 206.8966, |
|
"eval_samples_per_second": 20.174, |
|
"step": 68000 |
|
} |
|
], |
|
"max_steps": 333570, |
|
"num_train_epochs": 30, |
|
"total_flos": 2.6905869526822502e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|