|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.51086956521739, |
|
"global_step": 3500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 62.8839, |
|
"eval_gen_len": 18.6587, |
|
"eval_loss": 0.6685489416122437, |
|
"eval_runtime": 219.9201, |
|
"eval_samples_per_second": 6.689, |
|
"eval_steps_per_second": 0.418, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.456521739130435e-05, |
|
"loss": 0.9723, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 70.5409, |
|
"eval_gen_len": 18.0659, |
|
"eval_loss": 0.5173786878585815, |
|
"eval_runtime": 185.8072, |
|
"eval_samples_per_second": 7.917, |
|
"eval_steps_per_second": 0.495, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.91304347826087e-05, |
|
"loss": 0.4138, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 70.6822, |
|
"eval_gen_len": 18.2522, |
|
"eval_loss": 0.4692835211753845, |
|
"eval_runtime": 305.3472, |
|
"eval_samples_per_second": 4.817, |
|
"eval_steps_per_second": 0.301, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 76.1884, |
|
"eval_gen_len": 18.2624, |
|
"eval_loss": 0.43825143575668335, |
|
"eval_runtime": 180.0844, |
|
"eval_samples_per_second": 8.168, |
|
"eval_steps_per_second": 0.511, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 2.3695652173913045e-05, |
|
"loss": 0.2544, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 76.4649, |
|
"eval_gen_len": 17.9884, |
|
"eval_loss": 0.42623549699783325, |
|
"eval_runtime": 265.6449, |
|
"eval_samples_per_second": 5.537, |
|
"eval_steps_per_second": 0.346, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 1.8260869565217393e-05, |
|
"loss": 0.1709, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 77.8959, |
|
"eval_gen_len": 17.9952, |
|
"eval_loss": 0.41967156529426575, |
|
"eval_runtime": 222.1851, |
|
"eval_samples_per_second": 6.621, |
|
"eval_steps_per_second": 0.414, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 1.282608695652174e-05, |
|
"loss": 0.1287, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 78.8909, |
|
"eval_gen_len": 18.0598, |
|
"eval_loss": 0.4178922474384308, |
|
"eval_runtime": 226.0293, |
|
"eval_samples_per_second": 6.508, |
|
"eval_steps_per_second": 0.407, |
|
"step": 2576 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 78.7098, |
|
"eval_gen_len": 18.0034, |
|
"eval_loss": 0.4161533713340759, |
|
"eval_runtime": 218.1192, |
|
"eval_samples_per_second": 6.744, |
|
"eval_steps_per_second": 0.422, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 7.391304347826087e-06, |
|
"loss": 0.1028, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 79.2546, |
|
"eval_gen_len": 18.0659, |
|
"eval_loss": 0.4158268868923187, |
|
"eval_runtime": 170.3307, |
|
"eval_samples_per_second": 8.636, |
|
"eval_steps_per_second": 0.54, |
|
"step": 3312 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 1.956521739130435e-06, |
|
"loss": 0.084, |
|
"step": 3500 |
|
} |
|
], |
|
"max_steps": 3680, |
|
"num_train_epochs": 10, |
|
"total_flos": 2284115790790656.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|