|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"global_step": 17055, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4752, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_gen_len": 34.6, |
|
"eval_loss": 0.53515625, |
|
"eval_rouge1": 18.7991, |
|
"eval_rouge2": 11.6667, |
|
"eval_rougeL": 18.5494, |
|
"eval_rougeLsum": 18.6653, |
|
"eval_runtime": 2021.8236, |
|
"eval_samples_per_second": 1.688, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2688, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_gen_len": 33.6, |
|
"eval_loss": 0.61572265625, |
|
"eval_rouge1": 20.5977, |
|
"eval_rouge2": 12.9504, |
|
"eval_rougeL": 20.3605, |
|
"eval_rougeLsum": 20.4447, |
|
"eval_runtime": 1913.6134, |
|
"eval_samples_per_second": 1.784, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1309, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_gen_len": 33.2, |
|
"eval_loss": 0.89453125, |
|
"eval_rouge1": 21.4721, |
|
"eval_rouge2": 13.2325, |
|
"eval_rougeL": 21.2104, |
|
"eval_rougeLsum": 21.2775, |
|
"eval_runtime": 1900.2716, |
|
"eval_samples_per_second": 1.796, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 17055, |
|
"total_flos": 1.095854652102017e+18, |
|
"train_runtime": 101377.3904, |
|
"train_samples_per_second": 0.168 |
|
} |
|
], |
|
"max_steps": 17055, |
|
"num_train_epochs": 5, |
|
"total_flos": 1.095854652102017e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|