|
{ |
|
"best_metric": 43.53, |
|
"best_model_checkpoint": "opus_big_lsp_adapt_wce_precision_3_ubweight_1.5/checkpoint-80000", |
|
"epoch": 1.9716574245224892, |
|
"global_step": 80000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9753635243376465e-05, |
|
"loss": 0.1796, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_bleu": 42.245, |
|
"eval_gen_len": 33.5858, |
|
"eval_loss": 0.09831776469945908, |
|
"eval_runtime": 132.7068, |
|
"eval_samples_per_second": 7.859, |
|
"eval_steps_per_second": 0.497, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.9507208872458414e-05, |
|
"loss": 0.1487, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_bleu": 42.2116, |
|
"eval_gen_len": 34.3643, |
|
"eval_loss": 0.09730643033981323, |
|
"eval_runtime": 116.1316, |
|
"eval_samples_per_second": 8.981, |
|
"eval_steps_per_second": 0.568, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.9260844115834877e-05, |
|
"loss": 0.1446, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_bleu": 42.0306, |
|
"eval_gen_len": 33.4008, |
|
"eval_loss": 0.09792491048574448, |
|
"eval_runtime": 139.3867, |
|
"eval_samples_per_second": 7.483, |
|
"eval_steps_per_second": 0.474, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.901447935921134e-05, |
|
"loss": 0.1406, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_bleu": 42.7137, |
|
"eval_gen_len": 34.441, |
|
"eval_loss": 0.0964345782995224, |
|
"eval_runtime": 121.1927, |
|
"eval_samples_per_second": 8.606, |
|
"eval_steps_per_second": 0.545, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.8768114602587803e-05, |
|
"loss": 0.1391, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_bleu": 42.8852, |
|
"eval_gen_len": 33.8102, |
|
"eval_loss": 0.09575977176427841, |
|
"eval_runtime": 107.5204, |
|
"eval_samples_per_second": 9.7, |
|
"eval_steps_per_second": 0.614, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.8521749845964266e-05, |
|
"loss": 0.1239, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_bleu": 42.9831, |
|
"eval_gen_len": 34.3087, |
|
"eval_loss": 0.09662675112485886, |
|
"eval_runtime": 114.4632, |
|
"eval_samples_per_second": 9.112, |
|
"eval_steps_per_second": 0.577, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.827538508934073e-05, |
|
"loss": 0.1242, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_bleu": 42.7598, |
|
"eval_gen_len": 34.2502, |
|
"eval_loss": 0.09706231206655502, |
|
"eval_runtime": 120.5178, |
|
"eval_samples_per_second": 8.654, |
|
"eval_steps_per_second": 0.548, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.8029020332717192e-05, |
|
"loss": 0.1248, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_bleu": 42.9725, |
|
"eval_gen_len": 34.6242, |
|
"eval_loss": 0.09721297025680542, |
|
"eval_runtime": 217.1644, |
|
"eval_samples_per_second": 4.803, |
|
"eval_steps_per_second": 0.304, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.7782655576093655e-05, |
|
"loss": 0.1243, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_bleu": 42.7137, |
|
"eval_gen_len": 34.441, |
|
"eval_loss": 0.09667866677045822, |
|
"eval_runtime": 206.9894, |
|
"eval_samples_per_second": 5.039, |
|
"eval_steps_per_second": 0.319, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.753629081947012e-05, |
|
"loss": 0.1241, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_bleu": 43.53, |
|
"eval_gen_len": 33.9923, |
|
"eval_loss": 0.0961531400680542, |
|
"eval_runtime": 111.5933, |
|
"eval_samples_per_second": 9.346, |
|
"eval_steps_per_second": 0.591, |
|
"step": 80000 |
|
} |
|
], |
|
"max_steps": 649200, |
|
"num_train_epochs": 16, |
|
"total_flos": 2.5844916958632346e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|