Ethan Sim
stage best wang model
2e7b5b1
{
"best_metric": 40.6424,
"best_model_checkpoint": "M2M100_enfr_FT_wang_2022/checkpoint-256000",
"epoch": 3.130158341994253,
"global_step": 256000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"learning_rate": 1.975551751543682e-05,
"loss": 0.2244,
"step": 16000
},
{
"epoch": 0.2,
"eval_bleu": 35.6567,
"eval_gen_len": 44.8111,
"eval_loss": 0.13247372210025787,
"eval_runtime": 433.7309,
"eval_samples_per_second": 2.405,
"eval_steps_per_second": 0.302,
"step": 16000
},
{
"epoch": 0.39,
"learning_rate": 1.951108088280247e-05,
"loss": 0.1522,
"step": 32000
},
{
"epoch": 0.39,
"eval_bleu": 36.512,
"eval_gen_len": 44.4851,
"eval_loss": 0.1262633502483368,
"eval_runtime": 417.8212,
"eval_samples_per_second": 2.496,
"eval_steps_per_second": 0.314,
"step": 32000
},
{
"epoch": 0.59,
"learning_rate": 1.9266613682215567e-05,
"loss": 0.1435,
"step": 48000
},
{
"epoch": 0.59,
"eval_bleu": 37.957,
"eval_gen_len": 44.5302,
"eval_loss": 0.12126067280769348,
"eval_runtime": 414.4742,
"eval_samples_per_second": 2.516,
"eval_steps_per_second": 0.316,
"step": 48000
},
{
"epoch": 0.78,
"learning_rate": 1.9022177049581222e-05,
"loss": 0.1384,
"step": 64000
},
{
"epoch": 0.78,
"eval_bleu": 38.0569,
"eval_gen_len": 44.5034,
"eval_loss": 0.1191168949007988,
"eval_runtime": 415.8111,
"eval_samples_per_second": 2.508,
"eval_steps_per_second": 0.315,
"step": 64000
},
{
"epoch": 0.98,
"learning_rate": 1.8777709848994316e-05,
"loss": 0.1345,
"step": 80000
},
{
"epoch": 0.98,
"eval_bleu": 38.4966,
"eval_gen_len": 44.8821,
"eval_loss": 0.11713190376758575,
"eval_runtime": 423.2079,
"eval_samples_per_second": 2.465,
"eval_steps_per_second": 0.31,
"step": 80000
},
{
"epoch": 1.17,
"learning_rate": 1.853325793238369e-05,
"loss": 0.1213,
"step": 96000
},
{
"epoch": 1.17,
"eval_bleu": 39.0368,
"eval_gen_len": 44.6012,
"eval_loss": 0.1156671866774559,
"eval_runtime": 417.9213,
"eval_samples_per_second": 2.496,
"eval_steps_per_second": 0.313,
"step": 96000
},
{
"epoch": 1.37,
"learning_rate": 1.8288806015773065e-05,
"loss": 0.1199,
"step": 112000
},
{
"epoch": 1.37,
"eval_bleu": 39.6906,
"eval_gen_len": 44.8178,
"eval_loss": 0.11390843987464905,
"eval_runtime": 418.2954,
"eval_samples_per_second": 2.493,
"eval_steps_per_second": 0.313,
"step": 112000
},
{
"epoch": 1.57,
"learning_rate": 1.804436938313872e-05,
"loss": 0.1195,
"step": 128000
},
{
"epoch": 1.57,
"eval_bleu": 39.6284,
"eval_gen_len": 44.8552,
"eval_loss": 0.11290750652551651,
"eval_runtime": 422.1564,
"eval_samples_per_second": 2.471,
"eval_steps_per_second": 0.31,
"step": 128000
},
{
"epoch": 1.76,
"learning_rate": 1.7799902182551813e-05,
"loss": 0.1185,
"step": 144000
},
{
"epoch": 1.76,
"eval_bleu": 39.0079,
"eval_gen_len": 44.5618,
"eval_loss": 0.11247587949037552,
"eval_runtime": 408.9786,
"eval_samples_per_second": 2.55,
"eval_steps_per_second": 0.32,
"step": 144000
},
{
"epoch": 1.96,
"learning_rate": 1.7555465549917468e-05,
"loss": 0.1175,
"step": 160000
},
{
"epoch": 1.96,
"eval_bleu": 39.2898,
"eval_gen_len": 44.745,
"eval_loss": 0.1103997528553009,
"eval_runtime": 413.1156,
"eval_samples_per_second": 2.525,
"eval_steps_per_second": 0.317,
"step": 160000
},
{
"epoch": 2.15,
"learning_rate": 1.7310998349330562e-05,
"loss": 0.1062,
"step": 176000
},
{
"epoch": 2.15,
"eval_bleu": 39.8593,
"eval_gen_len": 45.1151,
"eval_loss": 0.1111496165394783,
"eval_runtime": 415.6075,
"eval_samples_per_second": 2.51,
"eval_steps_per_second": 0.315,
"step": 176000
},
{
"epoch": 2.35,
"learning_rate": 1.7066546432719936e-05,
"loss": 0.1047,
"step": 192000
},
{
"epoch": 2.35,
"eval_bleu": 39.913,
"eval_gen_len": 44.8102,
"eval_loss": 0.11086419969797134,
"eval_runtime": 418.8388,
"eval_samples_per_second": 2.49,
"eval_steps_per_second": 0.313,
"step": 192000
},
{
"epoch": 2.54,
"learning_rate": 1.682210980008559e-05,
"loss": 0.1055,
"step": 208000
},
{
"epoch": 2.54,
"eval_bleu": 40.2278,
"eval_gen_len": 45.2848,
"eval_loss": 0.11025020480155945,
"eval_runtime": 421.9598,
"eval_samples_per_second": 2.472,
"eval_steps_per_second": 0.31,
"step": 208000
},
{
"epoch": 2.74,
"learning_rate": 1.6577657883474966e-05,
"loss": 0.1059,
"step": 224000
},
{
"epoch": 2.74,
"eval_bleu": 40.2198,
"eval_gen_len": 45.0719,
"eval_loss": 0.10949720442295074,
"eval_runtime": 416.4666,
"eval_samples_per_second": 2.504,
"eval_steps_per_second": 0.315,
"step": 224000
},
{
"epoch": 2.93,
"learning_rate": 1.633320596686434e-05,
"loss": 0.106,
"step": 240000
},
{
"epoch": 2.93,
"eval_bleu": 39.8973,
"eval_gen_len": 44.954,
"eval_loss": 0.10881481319665909,
"eval_runtime": 421.0124,
"eval_samples_per_second": 2.477,
"eval_steps_per_second": 0.311,
"step": 240000
},
{
"epoch": 3.13,
"learning_rate": 1.6088769334229995e-05,
"loss": 0.0971,
"step": 256000
},
{
"epoch": 3.13,
"eval_bleu": 40.6424,
"eval_gen_len": 44.9732,
"eval_loss": 0.11019956320524216,
"eval_runtime": 418.4973,
"eval_samples_per_second": 2.492,
"eval_steps_per_second": 0.313,
"step": 256000
}
],
"max_steps": 1308560,
"num_train_epochs": 16,
"total_flos": 8.283927408492872e+17,
"trial_name": null,
"trial_params": null
}