|
{ |
|
"best_metric": 40.6424, |
|
"best_model_checkpoint": "M2M100_enfr_FT_wang_2022/checkpoint-256000", |
|
"epoch": 3.130158341994253, |
|
"global_step": 256000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.975551751543682e-05, |
|
"loss": 0.2244, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_bleu": 35.6567, |
|
"eval_gen_len": 44.8111, |
|
"eval_loss": 0.13247372210025787, |
|
"eval_runtime": 433.7309, |
|
"eval_samples_per_second": 2.405, |
|
"eval_steps_per_second": 0.302, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.951108088280247e-05, |
|
"loss": 0.1522, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_bleu": 36.512, |
|
"eval_gen_len": 44.4851, |
|
"eval_loss": 0.1262633502483368, |
|
"eval_runtime": 417.8212, |
|
"eval_samples_per_second": 2.496, |
|
"eval_steps_per_second": 0.314, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.9266613682215567e-05, |
|
"loss": 0.1435, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_bleu": 37.957, |
|
"eval_gen_len": 44.5302, |
|
"eval_loss": 0.12126067280769348, |
|
"eval_runtime": 414.4742, |
|
"eval_samples_per_second": 2.516, |
|
"eval_steps_per_second": 0.316, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.9022177049581222e-05, |
|
"loss": 0.1384, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_bleu": 38.0569, |
|
"eval_gen_len": 44.5034, |
|
"eval_loss": 0.1191168949007988, |
|
"eval_runtime": 415.8111, |
|
"eval_samples_per_second": 2.508, |
|
"eval_steps_per_second": 0.315, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.8777709848994316e-05, |
|
"loss": 0.1345, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_bleu": 38.4966, |
|
"eval_gen_len": 44.8821, |
|
"eval_loss": 0.11713190376758575, |
|
"eval_runtime": 423.2079, |
|
"eval_samples_per_second": 2.465, |
|
"eval_steps_per_second": 0.31, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.853325793238369e-05, |
|
"loss": 0.1213, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_bleu": 39.0368, |
|
"eval_gen_len": 44.6012, |
|
"eval_loss": 0.1156671866774559, |
|
"eval_runtime": 417.9213, |
|
"eval_samples_per_second": 2.496, |
|
"eval_steps_per_second": 0.313, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.8288806015773065e-05, |
|
"loss": 0.1199, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_bleu": 39.6906, |
|
"eval_gen_len": 44.8178, |
|
"eval_loss": 0.11390843987464905, |
|
"eval_runtime": 418.2954, |
|
"eval_samples_per_second": 2.493, |
|
"eval_steps_per_second": 0.313, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.804436938313872e-05, |
|
"loss": 0.1195, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_bleu": 39.6284, |
|
"eval_gen_len": 44.8552, |
|
"eval_loss": 0.11290750652551651, |
|
"eval_runtime": 422.1564, |
|
"eval_samples_per_second": 2.471, |
|
"eval_steps_per_second": 0.31, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.7799902182551813e-05, |
|
"loss": 0.1185, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_bleu": 39.0079, |
|
"eval_gen_len": 44.5618, |
|
"eval_loss": 0.11247587949037552, |
|
"eval_runtime": 408.9786, |
|
"eval_samples_per_second": 2.55, |
|
"eval_steps_per_second": 0.32, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.7555465549917468e-05, |
|
"loss": 0.1175, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_bleu": 39.2898, |
|
"eval_gen_len": 44.745, |
|
"eval_loss": 0.1103997528553009, |
|
"eval_runtime": 413.1156, |
|
"eval_samples_per_second": 2.525, |
|
"eval_steps_per_second": 0.317, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.7310998349330562e-05, |
|
"loss": 0.1062, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_bleu": 39.8593, |
|
"eval_gen_len": 45.1151, |
|
"eval_loss": 0.1111496165394783, |
|
"eval_runtime": 415.6075, |
|
"eval_samples_per_second": 2.51, |
|
"eval_steps_per_second": 0.315, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.7066546432719936e-05, |
|
"loss": 0.1047, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_bleu": 39.913, |
|
"eval_gen_len": 44.8102, |
|
"eval_loss": 0.11086419969797134, |
|
"eval_runtime": 418.8388, |
|
"eval_samples_per_second": 2.49, |
|
"eval_steps_per_second": 0.313, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.682210980008559e-05, |
|
"loss": 0.1055, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_bleu": 40.2278, |
|
"eval_gen_len": 45.2848, |
|
"eval_loss": 0.11025020480155945, |
|
"eval_runtime": 421.9598, |
|
"eval_samples_per_second": 2.472, |
|
"eval_steps_per_second": 0.31, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.6577657883474966e-05, |
|
"loss": 0.1059, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_bleu": 40.2198, |
|
"eval_gen_len": 45.0719, |
|
"eval_loss": 0.10949720442295074, |
|
"eval_runtime": 416.4666, |
|
"eval_samples_per_second": 2.504, |
|
"eval_steps_per_second": 0.315, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.633320596686434e-05, |
|
"loss": 0.106, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_bleu": 39.8973, |
|
"eval_gen_len": 44.954, |
|
"eval_loss": 0.10881481319665909, |
|
"eval_runtime": 421.0124, |
|
"eval_samples_per_second": 2.477, |
|
"eval_steps_per_second": 0.311, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.6088769334229995e-05, |
|
"loss": 0.0971, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_bleu": 40.6424, |
|
"eval_gen_len": 44.9732, |
|
"eval_loss": 0.11019956320524216, |
|
"eval_runtime": 418.4973, |
|
"eval_samples_per_second": 2.492, |
|
"eval_steps_per_second": 0.313, |
|
"step": 256000 |
|
} |
|
], |
|
"max_steps": 1308560, |
|
"num_train_epochs": 16, |
|
"total_flos": 8.283927408492872e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|