|
{ |
|
"best_metric": 38.5992, |
|
"best_model_checkpoint": "pegasus-newsroom-cnn_full-adafactor-bs6/checkpoint-897", |
|
"epoch": 0.6356132321902493, |
|
"global_step": 1901, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.839464882943145e-05, |
|
"loss": 3.3516, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_gen_len": 75.2876, |
|
"eval_loss": 3.0071940422058105, |
|
"eval_rouge1": 37.4459, |
|
"eval_rouge2": 16.5527, |
|
"eval_rougeL": 26.0178, |
|
"eval_rougeLsum": 34.4955, |
|
"eval_runtime": 1262.9542, |
|
"eval_samples_per_second": 1.979, |
|
"eval_steps_per_second": 0.33, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00015812709030100337, |
|
"loss": 3.1391, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_gen_len": 72.434, |
|
"eval_loss": 2.974590539932251, |
|
"eval_rouge1": 37.9369, |
|
"eval_rouge2": 16.9058, |
|
"eval_rougeL": 26.566, |
|
"eval_rougeLsum": 35.0327, |
|
"eval_runtime": 1247.518, |
|
"eval_samples_per_second": 2.004, |
|
"eval_steps_per_second": 0.334, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001405351170568562, |
|
"loss": 3.1086, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_gen_len": 66.2268, |
|
"eval_loss": 2.9494450092315674, |
|
"eval_rouge1": 38.5992, |
|
"eval_rouge2": 17.5271, |
|
"eval_rougeL": 27.3099, |
|
"eval_rougeLsum": 35.6531, |
|
"eval_runtime": 1158.9422, |
|
"eval_samples_per_second": 2.157, |
|
"eval_steps_per_second": 0.36, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00012060200668896322, |
|
"loss": 3.1037, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_gen_len": 70.6692, |
|
"eval_loss": 2.95255184173584, |
|
"eval_rouge1": 38.3805, |
|
"eval_rouge2": 17.5267, |
|
"eval_rougeL": 27.1742, |
|
"eval_rougeLsum": 35.5035, |
|
"eval_runtime": 1183.4051, |
|
"eval_samples_per_second": 2.113, |
|
"eval_steps_per_second": 0.352, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00010100334448160536, |
|
"loss": 3.2294, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_gen_len": 56.5128, |
|
"eval_loss": 3.183689832687378, |
|
"eval_rouge1": 36.2619, |
|
"eval_rouge2": 15.599, |
|
"eval_rougeL": 25.8333, |
|
"eval_rougeLsum": 33.3692, |
|
"eval_runtime": 997.3694, |
|
"eval_samples_per_second": 2.507, |
|
"eval_steps_per_second": 0.418, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.127090301003345e-05, |
|
"loss": 3.695, |
|
"step": 1794 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_gen_len": 47.1888, |
|
"eval_loss": 3.4275190830230713, |
|
"eval_rouge1": 33.0969, |
|
"eval_rouge2": 13.6577, |
|
"eval_rougeL": 23.9839, |
|
"eval_rougeLsum": 30.6005, |
|
"eval_runtime": 927.662, |
|
"eval_samples_per_second": 2.695, |
|
"eval_steps_per_second": 0.45, |
|
"step": 1794 |
|
} |
|
], |
|
"max_steps": 2990, |
|
"num_train_epochs": 1, |
|
"total_flos": 2.4881742419263488e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|