{ "best_metric": 2.732292890548706, "best_model_checkpoint": "flan-t5-base-tldr-100k/checkpoint-45000", "epoch": 5.0, "global_step": 56250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 4.955555555555556e-05, "loss": 3.2897, "step": 500 }, { "epoch": 0.09, "learning_rate": 4.9111111111111114e-05, "loss": 3.2632, "step": 1000 }, { "epoch": 0.13, "learning_rate": 4.866666666666667e-05, "loss": 3.2601, "step": 1500 }, { "epoch": 0.18, "learning_rate": 4.8222222222222225e-05, "loss": 3.2293, "step": 2000 }, { "epoch": 0.22, "learning_rate": 4.7777777777777784e-05, "loss": 3.2164, "step": 2500 }, { "epoch": 0.27, "learning_rate": 4.7333333333333336e-05, "loss": 3.1959, "step": 3000 }, { "epoch": 0.31, "learning_rate": 4.6888888888888895e-05, "loss": 3.1905, "step": 3500 }, { "epoch": 0.36, "learning_rate": 4.644444444444445e-05, "loss": 3.233, "step": 4000 }, { "epoch": 0.4, "learning_rate": 4.600000000000001e-05, "loss": 3.1792, "step": 4500 }, { "epoch": 0.44, "learning_rate": 4.555555555555556e-05, "loss": 3.2026, "step": 5000 }, { "epoch": 0.49, "learning_rate": 4.511111111111112e-05, "loss": 3.1937, "step": 5500 }, { "epoch": 0.53, "learning_rate": 4.466666666666667e-05, "loss": 3.1751, "step": 6000 }, { "epoch": 0.58, "learning_rate": 4.422222222222222e-05, "loss": 3.173, "step": 6500 }, { "epoch": 0.62, "learning_rate": 4.377777777777778e-05, "loss": 3.1968, "step": 7000 }, { "epoch": 0.67, "learning_rate": 4.3333333333333334e-05, "loss": 3.1666, "step": 7500 }, { "epoch": 0.71, "learning_rate": 4.2888888888888886e-05, "loss": 3.1855, "step": 8000 }, { "epoch": 0.76, "learning_rate": 4.2444444444444445e-05, "loss": 3.1626, "step": 8500 }, { "epoch": 0.8, "learning_rate": 4.2e-05, "loss": 3.1699, "step": 9000 }, { "epoch": 0.84, "learning_rate": 4.155555555555556e-05, "loss": 3.1657, "step": 9500 }, { "epoch": 0.89, "learning_rate": 4.111111111111111e-05, "loss": 3.1547, "step": 10000 }, { "epoch": 0.93, "learning_rate": 4.066666666666667e-05, "loss": 3.1496, "step": 10500 }, { "epoch": 0.98, "learning_rate": 4.022222222222222e-05, "loss": 3.1467, "step": 11000 }, { "epoch": 1.0, "eval_gen_len": 16.4911, "eval_loss": 2.953094482421875, "eval_rouge1": 15.909, "eval_rouge2": 3.8156, "eval_rougeL": 13.4884, "eval_rougeLsum": 13.9568, "eval_runtime": 1704.954, "eval_samples_per_second": 5.865, "eval_steps_per_second": 0.733, "step": 11250 }, { "epoch": 1.02, "learning_rate": 3.977777777777778e-05, "loss": 3.0983, "step": 11500 }, { "epoch": 1.07, "learning_rate": 3.933333333333333e-05, "loss": 3.0641, "step": 12000 }, { "epoch": 1.11, "learning_rate": 3.888888888888889e-05, "loss": 3.0471, "step": 12500 }, { "epoch": 1.16, "learning_rate": 3.844444444444444e-05, "loss": 3.0583, "step": 13000 }, { "epoch": 1.2, "learning_rate": 3.8e-05, "loss": 3.069, "step": 13500 }, { "epoch": 1.24, "learning_rate": 3.7555555555555554e-05, "loss": 3.0642, "step": 14000 }, { "epoch": 1.29, "learning_rate": 3.7111111111111113e-05, "loss": 3.0729, "step": 14500 }, { "epoch": 1.33, "learning_rate": 3.6666666666666666e-05, "loss": 3.0753, "step": 15000 }, { "epoch": 1.38, "learning_rate": 3.6222222222222225e-05, "loss": 3.0415, "step": 15500 }, { "epoch": 1.42, "learning_rate": 3.577777777777778e-05, "loss": 3.0632, "step": 16000 }, { "epoch": 1.47, "learning_rate": 3.5333333333333336e-05, "loss": 3.0757, "step": 16500 }, { "epoch": 1.51, "learning_rate": 3.4888888888888895e-05, "loss": 3.0589, "step": 17000 }, { "epoch": 1.56, "learning_rate": 3.444444444444445e-05, "loss": 3.0586, "step": 17500 }, { "epoch": 1.6, "learning_rate": 3.4000000000000007e-05, "loss": 3.0582, "step": 18000 }, { "epoch": 1.64, "learning_rate": 3.355555555555556e-05, "loss": 3.0657, "step": 18500 }, { "epoch": 1.69, "learning_rate": 3.311111111111112e-05, "loss": 3.0653, "step": 19000 }, { "epoch": 1.73, "learning_rate": 3.266666666666667e-05, "loss": 3.0513, "step": 19500 }, { "epoch": 1.78, "learning_rate": 3.222222222222223e-05, "loss": 3.082, "step": 20000 }, { "epoch": 1.82, "learning_rate": 3.177777777777778e-05, "loss": 3.0785, "step": 20500 }, { "epoch": 1.87, "learning_rate": 3.1333333333333334e-05, "loss": 3.0654, "step": 21000 }, { "epoch": 1.91, "learning_rate": 3.088888888888889e-05, "loss": 3.0531, "step": 21500 }, { "epoch": 1.96, "learning_rate": 3.044444444444445e-05, "loss": 3.0518, "step": 22000 }, { "epoch": 2.0, "learning_rate": 3e-05, "loss": 3.0673, "step": 22500 }, { "epoch": 2.0, "eval_gen_len": 16.4639, "eval_loss": 2.93178129196167, "eval_rouge1": 16.4972, "eval_rouge2": 3.9757, "eval_rougeL": 13.9517, "eval_rougeLsum": 14.4336, "eval_runtime": 1398.1721, "eval_samples_per_second": 7.152, "eval_steps_per_second": 0.894, "step": 22500 }, { "epoch": 2.04, "learning_rate": 2.955555555555556e-05, "loss": 2.9751, "step": 23000 }, { "epoch": 2.09, "learning_rate": 2.9111111111111112e-05, "loss": 2.9883, "step": 23500 }, { "epoch": 2.13, "learning_rate": 2.8666666666666668e-05, "loss": 2.9911, "step": 24000 }, { "epoch": 2.18, "learning_rate": 2.8222222222222223e-05, "loss": 2.9796, "step": 24500 }, { "epoch": 2.22, "learning_rate": 2.777777777777778e-05, "loss": 2.9767, "step": 25000 }, { "epoch": 2.27, "learning_rate": 2.733333333333333e-05, "loss": 2.9795, "step": 25500 }, { "epoch": 2.31, "learning_rate": 2.688888888888889e-05, "loss": 2.9654, "step": 26000 }, { "epoch": 2.36, "learning_rate": 2.6444444444444443e-05, "loss": 2.9904, "step": 26500 }, { "epoch": 2.4, "learning_rate": 2.6000000000000002e-05, "loss": 2.9785, "step": 27000 }, { "epoch": 2.44, "learning_rate": 2.5555555555555554e-05, "loss": 2.9856, "step": 27500 }, { "epoch": 2.49, "learning_rate": 2.5111111111111113e-05, "loss": 3.007, "step": 28000 }, { "epoch": 2.53, "learning_rate": 2.466666666666667e-05, "loss": 2.9819, "step": 28500 }, { "epoch": 2.58, "learning_rate": 2.4222222222222224e-05, "loss": 3.0003, "step": 29000 }, { "epoch": 2.62, "learning_rate": 2.377777777777778e-05, "loss": 2.981, "step": 29500 }, { "epoch": 2.67, "learning_rate": 2.3333333333333336e-05, "loss": 2.9926, "step": 30000 }, { "epoch": 2.71, "learning_rate": 2.288888888888889e-05, "loss": 2.9901, "step": 30500 }, { "epoch": 2.76, "learning_rate": 2.2444444444444447e-05, "loss": 2.9893, "step": 31000 }, { "epoch": 2.8, "learning_rate": 2.2000000000000003e-05, "loss": 2.9668, "step": 31500 }, { "epoch": 2.84, "learning_rate": 2.1555555555555555e-05, "loss": 2.9903, "step": 32000 }, { "epoch": 2.89, "learning_rate": 2.111111111111111e-05, "loss": 2.9687, "step": 32500 }, { "epoch": 2.93, "learning_rate": 2.0666666666666666e-05, "loss": 2.9839, "step": 33000 }, { "epoch": 2.98, "learning_rate": 2.0222222222222222e-05, "loss": 2.9952, "step": 33500 }, { "epoch": 3.0, "eval_gen_len": 16.2585, "eval_loss": 2.924546003341675, "eval_rouge1": 16.5997, "eval_rouge2": 4.1068, "eval_rougeL": 14.0299, "eval_rougeLsum": 14.5147, "eval_runtime": 945.9838, "eval_samples_per_second": 10.571, "eval_steps_per_second": 1.321, "step": 33750 }, { "epoch": 3.02, "learning_rate": 1.9777777777777778e-05, "loss": 2.9548, "step": 34000 }, { "epoch": 3.07, "learning_rate": 1.9333333333333333e-05, "loss": 2.9566, "step": 34500 }, { "epoch": 3.11, "learning_rate": 1.888888888888889e-05, "loss": 2.953, "step": 35000 }, { "epoch": 3.16, "learning_rate": 1.8444444444444445e-05, "loss": 2.9514, "step": 35500 }, { "epoch": 3.2, "learning_rate": 1.8e-05, "loss": 2.9362, "step": 36000 }, { "epoch": 3.24, "learning_rate": 1.7555555555555556e-05, "loss": 2.9482, "step": 36500 }, { "epoch": 3.29, "learning_rate": 1.7111111111111112e-05, "loss": 2.9441, "step": 37000 }, { "epoch": 3.33, "learning_rate": 1.6666666666666667e-05, "loss": 2.9366, "step": 37500 }, { "epoch": 3.38, "learning_rate": 1.6222222222222223e-05, "loss": 2.9558, "step": 38000 }, { "epoch": 3.42, "learning_rate": 1.577777777777778e-05, "loss": 2.959, "step": 38500 }, { "epoch": 3.47, "learning_rate": 1.5333333333333334e-05, "loss": 2.9634, "step": 39000 }, { "epoch": 3.51, "learning_rate": 1.4888888888888888e-05, "loss": 2.9464, "step": 39500 }, { "epoch": 3.56, "learning_rate": 1.4444444444444444e-05, "loss": 2.9669, "step": 40000 }, { "epoch": 3.6, "learning_rate": 1.4000000000000001e-05, "loss": 2.9404, "step": 40500 }, { "epoch": 3.64, "learning_rate": 1.3555555555555557e-05, "loss": 2.9686, "step": 41000 }, { "epoch": 3.69, "learning_rate": 1.3111111111111113e-05, "loss": 2.9384, "step": 41500 }, { "epoch": 3.73, "learning_rate": 1.2666666666666668e-05, "loss": 2.9304, "step": 42000 }, { "epoch": 3.78, "learning_rate": 1.2222222222222222e-05, "loss": 2.9474, "step": 42500 }, { "epoch": 3.82, "learning_rate": 1.1777777777777778e-05, "loss": 2.9455, "step": 43000 }, { "epoch": 3.87, "learning_rate": 1.1333333333333334e-05, "loss": 2.9456, "step": 43500 }, { "epoch": 3.91, "learning_rate": 1.088888888888889e-05, "loss": 2.9359, "step": 44000 }, { "epoch": 3.96, "learning_rate": 1.0444444444444445e-05, "loss": 2.9445, "step": 44500 }, { "epoch": 4.0, "learning_rate": 1e-05, "loss": 2.9524, "step": 45000 }, { "epoch": 4.0, "eval_gen_len": 16.0925, "eval_loss": 2.732292890548706, "eval_rouge1": 17.0772, "eval_rouge2": 4.4204, "eval_rougeL": 14.549, "eval_rougeLsum": 15.0148, "eval_runtime": 957.2679, "eval_samples_per_second": 10.446, "eval_steps_per_second": 1.306, "step": 45000 }, { "epoch": 4.04, "learning_rate": 9.555555555555556e-06, "loss": 2.8978, "step": 45500 }, { "epoch": 4.09, "learning_rate": 9.111111111111112e-06, "loss": 2.8935, "step": 46000 }, { "epoch": 4.13, "learning_rate": 8.666666666666668e-06, "loss": 2.9088, "step": 46500 }, { "epoch": 4.18, "learning_rate": 8.222222222222223e-06, "loss": 2.9148, "step": 47000 }, { "epoch": 4.22, "learning_rate": 7.777777777777777e-06, "loss": 2.9336, "step": 47500 }, { "epoch": 4.27, "learning_rate": 7.333333333333334e-06, "loss": 2.9111, "step": 48000 }, { "epoch": 4.31, "learning_rate": 6.888888888888889e-06, "loss": 2.934, "step": 48500 }, { "epoch": 4.36, "learning_rate": 6.4444444444444445e-06, "loss": 2.9219, "step": 49000 }, { "epoch": 4.4, "learning_rate": 6e-06, "loss": 2.914, "step": 49500 }, { "epoch": 4.44, "learning_rate": 5.555555555555556e-06, "loss": 2.9088, "step": 50000 }, { "epoch": 4.49, "learning_rate": 5.1111111111111115e-06, "loss": 2.93, "step": 50500 }, { "epoch": 4.53, "learning_rate": 4.666666666666667e-06, "loss": 2.9101, "step": 51000 }, { "epoch": 4.58, "learning_rate": 4.222222222222223e-06, "loss": 2.9189, "step": 51500 }, { "epoch": 4.62, "learning_rate": 3.777777777777778e-06, "loss": 2.9197, "step": 52000 }, { "epoch": 4.67, "learning_rate": 3.3333333333333333e-06, "loss": 2.9263, "step": 52500 }, { "epoch": 4.71, "learning_rate": 2.888888888888889e-06, "loss": 2.925, "step": 53000 }, { "epoch": 4.76, "learning_rate": 2.4444444444444447e-06, "loss": 2.9124, "step": 53500 }, { "epoch": 4.8, "learning_rate": 2.0000000000000003e-06, "loss": 2.8981, "step": 54000 }, { "epoch": 4.84, "learning_rate": 1.5555555555555556e-06, "loss": 2.9145, "step": 54500 }, { "epoch": 4.89, "learning_rate": 1.1111111111111112e-06, "loss": 2.9135, "step": 55000 }, { "epoch": 4.93, "learning_rate": 6.666666666666667e-07, "loss": 2.9066, "step": 55500 }, { "epoch": 4.98, "learning_rate": 2.2222222222222224e-07, "loss": 2.9223, "step": 56000 }, { "epoch": 5.0, "eval_gen_len": 16.2163, "eval_loss": 2.732771158218384, "eval_rouge1": 17.1468, "eval_rouge2": 4.4384, "eval_rougeL": 14.5798, "eval_rougeLsum": 15.0572, "eval_runtime": 1011.7448, "eval_samples_per_second": 9.884, "eval_steps_per_second": 1.235, "step": 56250 } ], "max_steps": 56250, "num_train_epochs": 5, "total_flos": 3.081408086016e+17, "trial_name": null, "trial_params": null }