|
{ |
|
"best_metric": 31.7626, |
|
"best_model_checkpoint": "/content/drive/Shareddrives/UCM_SHARED/TFM_ESG/Ejemplo Oficial de T5-Summarization de HF/t5-small_adafactor/checkpoint-8100", |
|
"epoch": 0.9880028228652082, |
|
"global_step": 8400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0009823570924488356, |
|
"loss": 2.9361, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_gen_len": 18.8845, |
|
"eval_loss": 2.6215693950653076, |
|
"eval_rouge1": 26.8542, |
|
"eval_rouge2": 6.8667, |
|
"eval_rougeL": 21.1484, |
|
"eval_rougeLsum": 21.1563, |
|
"eval_runtime": 342.0879, |
|
"eval_samples_per_second": 33.126, |
|
"eval_steps_per_second": 1.383, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009648318042813455, |
|
"loss": 2.8543, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_gen_len": 18.8097, |
|
"eval_loss": 2.5835769176483154, |
|
"eval_rouge1": 27.2234, |
|
"eval_rouge2": 7.1753, |
|
"eval_rougeL": 21.5276, |
|
"eval_rougeLsum": 21.5308, |
|
"eval_runtime": 342.5643, |
|
"eval_samples_per_second": 33.08, |
|
"eval_steps_per_second": 1.381, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009471888967301812, |
|
"loss": 2.814, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_gen_len": 18.8294, |
|
"eval_loss": 2.564615488052368, |
|
"eval_rouge1": 28.1695, |
|
"eval_rouge2": 7.7873, |
|
"eval_rougeL": 22.2229, |
|
"eval_rougeLsum": 22.2251, |
|
"eval_runtime": 341.6846, |
|
"eval_samples_per_second": 33.165, |
|
"eval_steps_per_second": 1.384, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009295459891790166, |
|
"loss": 2.7861, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_gen_len": 18.7867, |
|
"eval_loss": 2.5409207344055176, |
|
"eval_rouge1": 28.5349, |
|
"eval_rouge2": 7.9732, |
|
"eval_rougeL": 22.6959, |
|
"eval_rougeLsum": 22.7078, |
|
"eval_runtime": 341.6917, |
|
"eval_samples_per_second": 33.164, |
|
"eval_steps_per_second": 1.384, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009119030816278523, |
|
"loss": 2.76, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_gen_len": 18.7526, |
|
"eval_loss": 2.5160539150238037, |
|
"eval_rouge1": 28.5629, |
|
"eval_rouge2": 7.9485, |
|
"eval_rougeL": 22.6545, |
|
"eval_rougeLsum": 22.6617, |
|
"eval_runtime": 341.8755, |
|
"eval_samples_per_second": 33.147, |
|
"eval_steps_per_second": 1.384, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0008942601740766878, |
|
"loss": 2.7545, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_gen_len": 18.7481, |
|
"eval_loss": 2.5028903484344482, |
|
"eval_rouge1": 29.1851, |
|
"eval_rouge2": 8.3586, |
|
"eval_rougeL": 23.1781, |
|
"eval_rougeLsum": 23.1815, |
|
"eval_runtime": 341.7767, |
|
"eval_samples_per_second": 33.156, |
|
"eval_steps_per_second": 1.384, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0008767348859091978, |
|
"loss": 2.7294, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_gen_len": 18.6948, |
|
"eval_loss": 2.4874510765075684, |
|
"eval_rouge1": 28.7611, |
|
"eval_rouge2": 8.0309, |
|
"eval_rougeL": 22.8289, |
|
"eval_rougeLsum": 22.8364, |
|
"eval_runtime": 341.7005, |
|
"eval_samples_per_second": 33.164, |
|
"eval_steps_per_second": 1.384, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0008590919783580335, |
|
"loss": 2.7005, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_gen_len": 18.7843, |
|
"eval_loss": 2.4769885540008545, |
|
"eval_rouge1": 29.1072, |
|
"eval_rouge2": 8.2713, |
|
"eval_rougeL": 23.0568, |
|
"eval_rougeLsum": 23.0602, |
|
"eval_runtime": 341.9693, |
|
"eval_samples_per_second": 33.137, |
|
"eval_steps_per_second": 1.383, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0008414490708068689, |
|
"loss": 2.7057, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_gen_len": 18.6973, |
|
"eval_loss": 2.46754789352417, |
|
"eval_rouge1": 29.0607, |
|
"eval_rouge2": 8.4308, |
|
"eval_rougeL": 23.1094, |
|
"eval_rougeLsum": 23.1125, |
|
"eval_runtime": 341.5228, |
|
"eval_samples_per_second": 33.181, |
|
"eval_steps_per_second": 1.385, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0008238061632557046, |
|
"loss": 2.6779, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_gen_len": 18.7927, |
|
"eval_loss": 2.4461910724639893, |
|
"eval_rouge1": 29.4815, |
|
"eval_rouge2": 8.6203, |
|
"eval_rougeL": 23.3742, |
|
"eval_rougeLsum": 23.3745, |
|
"eval_runtime": 341.6546, |
|
"eval_samples_per_second": 33.168, |
|
"eval_steps_per_second": 1.384, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00080616325570454, |
|
"loss": 2.6944, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_gen_len": 18.8219, |
|
"eval_loss": 2.4397881031036377, |
|
"eval_rouge1": 29.3817, |
|
"eval_rouge2": 8.5247, |
|
"eval_rougeL": 23.3088, |
|
"eval_rougeLsum": 23.3137, |
|
"eval_runtime": 341.8781, |
|
"eval_samples_per_second": 33.146, |
|
"eval_steps_per_second": 1.384, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0007885203481533757, |
|
"loss": 2.6589, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_gen_len": 18.7917, |
|
"eval_loss": 2.429356813430786, |
|
"eval_rouge1": 29.1914, |
|
"eval_rouge2": 8.5565, |
|
"eval_rougeL": 23.2017, |
|
"eval_rougeLsum": 23.2039, |
|
"eval_runtime": 341.5952, |
|
"eval_samples_per_second": 33.174, |
|
"eval_steps_per_second": 1.385, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0007708774406022112, |
|
"loss": 2.6473, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_gen_len": 18.7605, |
|
"eval_loss": 2.417249917984009, |
|
"eval_rouge1": 29.9482, |
|
"eval_rouge2": 8.8321, |
|
"eval_rougeL": 23.7848, |
|
"eval_rougeLsum": 23.7954, |
|
"eval_runtime": 341.816, |
|
"eval_samples_per_second": 33.152, |
|
"eval_steps_per_second": 1.384, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0007532345330510469, |
|
"loss": 2.6498, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_gen_len": 18.8079, |
|
"eval_loss": 2.4158124923706055, |
|
"eval_rouge1": 29.5843, |
|
"eval_rouge2": 8.9229, |
|
"eval_rougeL": 23.6193, |
|
"eval_rougeLsum": 23.6255, |
|
"eval_runtime": 341.8291, |
|
"eval_samples_per_second": 33.151, |
|
"eval_steps_per_second": 1.384, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0007355916254998823, |
|
"loss": 2.6321, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_gen_len": 18.7639, |
|
"eval_loss": 2.4022669792175293, |
|
"eval_rouge1": 29.9147, |
|
"eval_rouge2": 9.0085, |
|
"eval_rougeL": 23.8569, |
|
"eval_rougeLsum": 23.8709, |
|
"eval_runtime": 341.59, |
|
"eval_samples_per_second": 33.174, |
|
"eval_steps_per_second": 1.385, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.000717948717948718, |
|
"loss": 2.614, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_gen_len": 18.6922, |
|
"eval_loss": 2.3971035480499268, |
|
"eval_rouge1": 29.711, |
|
"eval_rouge2": 9.0017, |
|
"eval_rougeL": 23.761, |
|
"eval_rougeLsum": 23.7742, |
|
"eval_runtime": 341.4293, |
|
"eval_samples_per_second": 33.19, |
|
"eval_steps_per_second": 1.385, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0007003058103975535, |
|
"loss": 2.6212, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_gen_len": 18.7082, |
|
"eval_loss": 2.390623092651367, |
|
"eval_rouge1": 29.498, |
|
"eval_rouge2": 8.6483, |
|
"eval_rougeL": 23.5006, |
|
"eval_rougeLsum": 23.5112, |
|
"eval_runtime": 341.8905, |
|
"eval_samples_per_second": 33.145, |
|
"eval_steps_per_second": 1.383, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0006826629028463891, |
|
"loss": 2.6151, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_gen_len": 18.8923, |
|
"eval_loss": 2.3796188831329346, |
|
"eval_rouge1": 29.8502, |
|
"eval_rouge2": 8.9427, |
|
"eval_rougeL": 23.6665, |
|
"eval_rougeLsum": 23.6781, |
|
"eval_runtime": 341.6707, |
|
"eval_samples_per_second": 33.166, |
|
"eval_steps_per_second": 1.384, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0006650199952952247, |
|
"loss": 2.6123, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_gen_len": 18.7541, |
|
"eval_loss": 2.371220588684082, |
|
"eval_rouge1": 30.0211, |
|
"eval_rouge2": 9.2412, |
|
"eval_rougeL": 24.0388, |
|
"eval_rougeLsum": 24.045, |
|
"eval_runtime": 341.6315, |
|
"eval_samples_per_second": 33.17, |
|
"eval_steps_per_second": 1.385, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0006473770877440603, |
|
"loss": 2.5928, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_gen_len": 18.7963, |
|
"eval_loss": 2.359456777572632, |
|
"eval_rouge1": 30.3185, |
|
"eval_rouge2": 9.2796, |
|
"eval_rougeL": 24.2004, |
|
"eval_rougeLsum": 24.2101, |
|
"eval_runtime": 341.8833, |
|
"eval_samples_per_second": 33.146, |
|
"eval_steps_per_second": 1.384, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0006297341801928958, |
|
"loss": 2.5746, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_gen_len": 18.8229, |
|
"eval_loss": 2.366935968399048, |
|
"eval_rouge1": 29.8303, |
|
"eval_rouge2": 9.1665, |
|
"eval_rougeL": 23.8553, |
|
"eval_rougeLsum": 23.8672, |
|
"eval_runtime": 341.7148, |
|
"eval_samples_per_second": 33.162, |
|
"eval_steps_per_second": 1.384, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0006120912726417314, |
|
"loss": 2.5619, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_gen_len": 18.8087, |
|
"eval_loss": 2.3527944087982178, |
|
"eval_rouge1": 30.4684, |
|
"eval_rouge2": 9.4451, |
|
"eval_rougeL": 24.2867, |
|
"eval_rougeLsum": 24.2939, |
|
"eval_runtime": 341.8038, |
|
"eval_samples_per_second": 33.154, |
|
"eval_steps_per_second": 1.384, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.000594448365090567, |
|
"loss": 2.5677, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_gen_len": 18.8491, |
|
"eval_loss": 2.343312978744507, |
|
"eval_rouge1": 30.496, |
|
"eval_rouge2": 9.4356, |
|
"eval_rougeL": 24.31, |
|
"eval_rougeLsum": 24.3125, |
|
"eval_runtime": 342.0548, |
|
"eval_samples_per_second": 33.129, |
|
"eval_steps_per_second": 1.383, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0005768054575394025, |
|
"loss": 2.5456, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_gen_len": 18.809, |
|
"eval_loss": 2.3360698223114014, |
|
"eval_rouge1": 30.5752, |
|
"eval_rouge2": 9.4803, |
|
"eval_rougeL": 24.3462, |
|
"eval_rougeLsum": 24.3559, |
|
"eval_runtime": 341.7402, |
|
"eval_samples_per_second": 33.16, |
|
"eval_steps_per_second": 1.384, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0005591625499882381, |
|
"loss": 2.5669, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_gen_len": 18.8287, |
|
"eval_loss": 2.327768325805664, |
|
"eval_rouge1": 30.0561, |
|
"eval_rouge2": 9.2402, |
|
"eval_rougeL": 24.0535, |
|
"eval_rougeLsum": 24.0713, |
|
"eval_runtime": 341.71, |
|
"eval_samples_per_second": 33.163, |
|
"eval_steps_per_second": 1.384, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005415196424370737, |
|
"loss": 2.5446, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_gen_len": 18.8118, |
|
"eval_loss": 2.3254404067993164, |
|
"eval_rouge1": 30.3142, |
|
"eval_rouge2": 9.4929, |
|
"eval_rougeL": 24.335, |
|
"eval_rougeLsum": 24.3432, |
|
"eval_runtime": 341.8271, |
|
"eval_samples_per_second": 33.151, |
|
"eval_steps_per_second": 1.384, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005238767348859092, |
|
"loss": 2.5536, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_gen_len": 18.9035, |
|
"eval_loss": 2.3251070976257324, |
|
"eval_rouge1": 30.2523, |
|
"eval_rouge2": 9.3005, |
|
"eval_rougeL": 24.163, |
|
"eval_rougeLsum": 24.1711, |
|
"eval_runtime": 341.8151, |
|
"eval_samples_per_second": 33.152, |
|
"eval_steps_per_second": 1.384, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005062338273347448, |
|
"loss": 2.5154, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_gen_len": 18.7458, |
|
"eval_loss": 2.313904047012329, |
|
"eval_rouge1": 30.7718, |
|
"eval_rouge2": 9.8676, |
|
"eval_rougeL": 24.7111, |
|
"eval_rougeLsum": 24.7266, |
|
"eval_runtime": 341.6852, |
|
"eval_samples_per_second": 33.165, |
|
"eval_steps_per_second": 1.384, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0004885909197835804, |
|
"loss": 2.5176, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_gen_len": 18.7983, |
|
"eval_loss": 2.309589385986328, |
|
"eval_rouge1": 30.5617, |
|
"eval_rouge2": 9.6562, |
|
"eval_rougeL": 24.4754, |
|
"eval_rougeLsum": 24.4862, |
|
"eval_runtime": 341.8284, |
|
"eval_samples_per_second": 33.151, |
|
"eval_steps_per_second": 1.384, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004709480122324159, |
|
"loss": 2.5307, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_gen_len": 18.8217, |
|
"eval_loss": 2.3089160919189453, |
|
"eval_rouge1": 30.601, |
|
"eval_rouge2": 9.6672, |
|
"eval_rougeL": 24.4465, |
|
"eval_rougeLsum": 24.4567, |
|
"eval_runtime": 341.6213, |
|
"eval_samples_per_second": 33.171, |
|
"eval_steps_per_second": 1.385, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00045330510468125144, |
|
"loss": 2.515, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_gen_len": 18.8096, |
|
"eval_loss": 2.2991859912872314, |
|
"eval_rouge1": 31.1807, |
|
"eval_rouge2": 10.0105, |
|
"eval_rougeL": 24.9947, |
|
"eval_rougeLsum": 25.0096, |
|
"eval_runtime": 341.7452, |
|
"eval_samples_per_second": 33.159, |
|
"eval_steps_per_second": 1.384, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.000435662197130087, |
|
"loss": 2.5168, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_gen_len": 18.8254, |
|
"eval_loss": 2.292003631591797, |
|
"eval_rouge1": 30.8999, |
|
"eval_rouge2": 9.8832, |
|
"eval_rougeL": 24.7713, |
|
"eval_rougeLsum": 24.7823, |
|
"eval_runtime": 341.8542, |
|
"eval_samples_per_second": 33.149, |
|
"eval_steps_per_second": 1.384, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0004180192895789226, |
|
"loss": 2.5021, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_gen_len": 18.8787, |
|
"eval_loss": 2.285507917404175, |
|
"eval_rouge1": 31.0569, |
|
"eval_rouge2": 9.8752, |
|
"eval_rougeL": 24.791, |
|
"eval_rougeLsum": 24.8033, |
|
"eval_runtime": 341.7747, |
|
"eval_samples_per_second": 33.156, |
|
"eval_steps_per_second": 1.384, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00040037638202775815, |
|
"loss": 2.501, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_gen_len": 18.8315, |
|
"eval_loss": 2.2868235111236572, |
|
"eval_rouge1": 30.7132, |
|
"eval_rouge2": 9.8654, |
|
"eval_rougeL": 24.6084, |
|
"eval_rougeLsum": 24.6244, |
|
"eval_runtime": 341.9154, |
|
"eval_samples_per_second": 33.143, |
|
"eval_steps_per_second": 1.383, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0003827334744765937, |
|
"loss": 2.4849, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_gen_len": 18.8196, |
|
"eval_loss": 2.2783043384552, |
|
"eval_rouge1": 31.3434, |
|
"eval_rouge2": 10.206, |
|
"eval_rougeL": 25.0954, |
|
"eval_rougeLsum": 25.1114, |
|
"eval_runtime": 341.9076, |
|
"eval_samples_per_second": 33.143, |
|
"eval_steps_per_second": 1.383, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0003650905669254293, |
|
"loss": 2.4939, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_gen_len": 18.8579, |
|
"eval_loss": 2.275907039642334, |
|
"eval_rouge1": 31.1467, |
|
"eval_rouge2": 10.0457, |
|
"eval_rougeL": 24.964, |
|
"eval_rougeLsum": 24.9793, |
|
"eval_runtime": 341.6475, |
|
"eval_samples_per_second": 33.169, |
|
"eval_steps_per_second": 1.384, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00034744765937426485, |
|
"loss": 2.4624, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_gen_len": 18.8098, |
|
"eval_loss": 2.2712931632995605, |
|
"eval_rouge1": 31.4288, |
|
"eval_rouge2": 10.1719, |
|
"eval_rougeL": 25.1096, |
|
"eval_rougeLsum": 25.1276, |
|
"eval_runtime": 341.723, |
|
"eval_samples_per_second": 33.161, |
|
"eval_steps_per_second": 1.384, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0003298047518231004, |
|
"loss": 2.456, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_gen_len": 18.8262, |
|
"eval_loss": 2.2674217224121094, |
|
"eval_rouge1": 31.1515, |
|
"eval_rouge2": 10.1208, |
|
"eval_rougeL": 25.0456, |
|
"eval_rougeLsum": 25.0532, |
|
"eval_runtime": 341.9008, |
|
"eval_samples_per_second": 33.144, |
|
"eval_steps_per_second": 1.383, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.000312161844271936, |
|
"loss": 2.4667, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_gen_len": 18.7816, |
|
"eval_loss": 2.262035608291626, |
|
"eval_rouge1": 31.3741, |
|
"eval_rouge2": 10.1733, |
|
"eval_rougeL": 25.1421, |
|
"eval_rougeLsum": 25.159, |
|
"eval_runtime": 341.6066, |
|
"eval_samples_per_second": 33.173, |
|
"eval_steps_per_second": 1.385, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.000294636556104446, |
|
"loss": 2.4658, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_gen_len": 18.8, |
|
"eval_loss": 2.258843183517456, |
|
"eval_rouge1": 31.3913, |
|
"eval_rouge2": 10.2645, |
|
"eval_rougeL": 25.1746, |
|
"eval_rougeLsum": 25.188, |
|
"eval_runtime": 341.9247, |
|
"eval_samples_per_second": 33.142, |
|
"eval_steps_per_second": 1.383, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00027699364855328156, |
|
"loss": 2.4943, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_gen_len": 18.8074, |
|
"eval_loss": 2.2533156871795654, |
|
"eval_rouge1": 31.3905, |
|
"eval_rouge2": 10.164, |
|
"eval_rougeL": 25.093, |
|
"eval_rougeLsum": 25.107, |
|
"eval_runtime": 341.8317, |
|
"eval_samples_per_second": 33.151, |
|
"eval_steps_per_second": 1.384, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00025935074100211713, |
|
"loss": 2.473, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_gen_len": 18.7439, |
|
"eval_loss": 2.2551848888397217, |
|
"eval_rouge1": 31.1105, |
|
"eval_rouge2": 10.1939, |
|
"eval_rougeL": 24.9214, |
|
"eval_rougeLsum": 24.9321, |
|
"eval_runtime": 341.6034, |
|
"eval_samples_per_second": 33.173, |
|
"eval_steps_per_second": 1.385, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00024170783345095273, |
|
"loss": 2.4687, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_gen_len": 18.7709, |
|
"eval_loss": 2.247098684310913, |
|
"eval_rouge1": 31.3387, |
|
"eval_rouge2": 10.2446, |
|
"eval_rougeL": 25.0746, |
|
"eval_rougeLsum": 25.0936, |
|
"eval_runtime": 341.7347, |
|
"eval_samples_per_second": 33.16, |
|
"eval_steps_per_second": 1.384, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0002240649258997883, |
|
"loss": 2.4286, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_gen_len": 18.8313, |
|
"eval_loss": 2.2467916011810303, |
|
"eval_rouge1": 31.1953, |
|
"eval_rouge2": 10.1277, |
|
"eval_rougeL": 24.9535, |
|
"eval_rougeLsum": 24.9714, |
|
"eval_runtime": 341.8485, |
|
"eval_samples_per_second": 33.149, |
|
"eval_steps_per_second": 1.384, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00020642201834862386, |
|
"loss": 2.4492, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_gen_len": 18.7977, |
|
"eval_loss": 2.2422139644622803, |
|
"eval_rouge1": 31.7203, |
|
"eval_rouge2": 10.4934, |
|
"eval_rougeL": 25.3862, |
|
"eval_rougeLsum": 25.3946, |
|
"eval_runtime": 341.6909, |
|
"eval_samples_per_second": 33.164, |
|
"eval_steps_per_second": 1.384, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00018877911079745943, |
|
"loss": 2.4427, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_gen_len": 18.8144, |
|
"eval_loss": 2.2402756214141846, |
|
"eval_rouge1": 31.5498, |
|
"eval_rouge2": 10.4086, |
|
"eval_rougeL": 25.2384, |
|
"eval_rougeLsum": 25.2593, |
|
"eval_runtime": 341.7269, |
|
"eval_samples_per_second": 33.161, |
|
"eval_steps_per_second": 1.384, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.000171136203246295, |
|
"loss": 2.4641, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_gen_len": 18.7966, |
|
"eval_loss": 2.2365546226501465, |
|
"eval_rouge1": 31.4038, |
|
"eval_rouge2": 10.3691, |
|
"eval_rougeL": 25.1786, |
|
"eval_rougeLsum": 25.195, |
|
"eval_runtime": 341.8284, |
|
"eval_samples_per_second": 33.151, |
|
"eval_steps_per_second": 1.384, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00015349329569513056, |
|
"loss": 2.4276, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_gen_len": 18.7789, |
|
"eval_loss": 2.2344412803649902, |
|
"eval_rouge1": 31.5076, |
|
"eval_rouge2": 10.4403, |
|
"eval_rougeL": 25.299, |
|
"eval_rougeLsum": 25.3201, |
|
"eval_runtime": 342.6233, |
|
"eval_samples_per_second": 33.074, |
|
"eval_steps_per_second": 1.381, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00013585038814396613, |
|
"loss": 2.4402, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_gen_len": 18.7713, |
|
"eval_loss": 2.2317440509796143, |
|
"eval_rouge1": 31.5216, |
|
"eval_rouge2": 10.3901, |
|
"eval_rougeL": 25.276, |
|
"eval_rougeLsum": 25.2943, |
|
"eval_runtime": 345.4971, |
|
"eval_samples_per_second": 32.799, |
|
"eval_steps_per_second": 1.369, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00011820748059280171, |
|
"loss": 2.44, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_gen_len": 18.7671, |
|
"eval_loss": 2.2292771339416504, |
|
"eval_rouge1": 31.4244, |
|
"eval_rouge2": 10.4211, |
|
"eval_rougeL": 25.2592, |
|
"eval_rougeLsum": 25.2735, |
|
"eval_runtime": 342.0133, |
|
"eval_samples_per_second": 33.133, |
|
"eval_steps_per_second": 1.383, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00010056457304163728, |
|
"loss": 2.4251, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_gen_len": 18.7972, |
|
"eval_loss": 2.226907968521118, |
|
"eval_rouge1": 31.4887, |
|
"eval_rouge2": 10.3959, |
|
"eval_rougeL": 25.2335, |
|
"eval_rougeLsum": 25.2545, |
|
"eval_runtime": 341.7402, |
|
"eval_samples_per_second": 33.16, |
|
"eval_steps_per_second": 1.384, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.292166549047284e-05, |
|
"loss": 2.456, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_gen_len": 18.786, |
|
"eval_loss": 2.224229097366333, |
|
"eval_rouge1": 31.4508, |
|
"eval_rouge2": 10.4079, |
|
"eval_rougeL": 25.2328, |
|
"eval_rougeLsum": 25.2564, |
|
"eval_runtime": 341.8379, |
|
"eval_samples_per_second": 33.15, |
|
"eval_steps_per_second": 1.384, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.527875793930841e-05, |
|
"loss": 2.4181, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_gen_len": 18.8012, |
|
"eval_loss": 2.2223522663116455, |
|
"eval_rouge1": 31.6181, |
|
"eval_rouge2": 10.5558, |
|
"eval_rougeL": 25.3867, |
|
"eval_rougeLsum": 25.4042, |
|
"eval_runtime": 342.1259, |
|
"eval_samples_per_second": 33.122, |
|
"eval_steps_per_second": 1.383, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.763585038814397e-05, |
|
"loss": 2.4288, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_gen_len": 18.7953, |
|
"eval_loss": 2.22170352935791, |
|
"eval_rouge1": 31.7626, |
|
"eval_rouge2": 10.6059, |
|
"eval_rougeL": 25.4827, |
|
"eval_rougeLsum": 25.4958, |
|
"eval_runtime": 341.8728, |
|
"eval_samples_per_second": 33.147, |
|
"eval_steps_per_second": 1.384, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.9992942836979537e-05, |
|
"loss": 2.4327, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_gen_len": 18.7827, |
|
"eval_loss": 2.220174789428711, |
|
"eval_rouge1": 31.6839, |
|
"eval_rouge2": 10.5615, |
|
"eval_rougeL": 25.4137, |
|
"eval_rougeLsum": 25.433, |
|
"eval_runtime": 342.1089, |
|
"eval_samples_per_second": 33.124, |
|
"eval_steps_per_second": 1.383, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2350035285815103e-05, |
|
"loss": 2.4118, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_gen_len": 18.7979, |
|
"eval_loss": 2.2196593284606934, |
|
"eval_rouge1": 31.6519, |
|
"eval_rouge2": 10.4949, |
|
"eval_rougeL": 25.3751, |
|
"eval_rougeLsum": 25.3984, |
|
"eval_runtime": 342.1132, |
|
"eval_samples_per_second": 33.124, |
|
"eval_steps_per_second": 1.383, |
|
"step": 8400 |
|
} |
|
], |
|
"max_steps": 8502, |
|
"num_train_epochs": 1, |
|
"total_flos": 4.261943991730176e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|