|
{ |
|
"best_metric": 42.5736, |
|
"best_model_checkpoint": "opus_base_simple_tfidf_wce/checkpoint-60000", |
|
"epoch": 2.9574132492113563, |
|
"global_step": 60000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9753918572555207e-05, |
|
"loss": 0.2098, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_bleu": 35.7996, |
|
"eval_gen_len": 39.4171, |
|
"eval_loss": 0.10897957533597946, |
|
"eval_runtime": 150.8888, |
|
"eval_samples_per_second": 6.912, |
|
"eval_steps_per_second": 0.219, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.950746746845426e-05, |
|
"loss": 0.1762, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_bleu": 36.8077, |
|
"eval_gen_len": 39.0182, |
|
"eval_loss": 0.10654434561729431, |
|
"eval_runtime": 116.4461, |
|
"eval_samples_per_second": 8.957, |
|
"eval_steps_per_second": 0.283, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.9261016364353314e-05, |
|
"loss": 0.1691, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_bleu": 41.2732, |
|
"eval_gen_len": 34.2013, |
|
"eval_loss": 0.10531464219093323, |
|
"eval_runtime": 133.3831, |
|
"eval_samples_per_second": 7.82, |
|
"eval_steps_per_second": 0.247, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.9014565260252367e-05, |
|
"loss": 0.1635, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_bleu": 41.6402, |
|
"eval_gen_len": 34.6366, |
|
"eval_loss": 0.10416054725646973, |
|
"eval_runtime": 104.9246, |
|
"eval_samples_per_second": 9.94, |
|
"eval_steps_per_second": 0.315, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.876811415615142e-05, |
|
"loss": 0.1611, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_bleu": 41.7834, |
|
"eval_gen_len": 35.3193, |
|
"eval_loss": 0.10361454635858536, |
|
"eval_runtime": 153.2125, |
|
"eval_samples_per_second": 6.808, |
|
"eval_steps_per_second": 0.215, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.8521786277602524e-05, |
|
"loss": 0.1493, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_bleu": 41.5067, |
|
"eval_gen_len": 35.8773, |
|
"eval_loss": 0.10355237871408463, |
|
"eval_runtime": 92.4584, |
|
"eval_samples_per_second": 11.281, |
|
"eval_steps_per_second": 0.357, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.827545839905363e-05, |
|
"loss": 0.1491, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_bleu": 41.1362, |
|
"eval_gen_len": 36.0518, |
|
"eval_loss": 0.10270049422979355, |
|
"eval_runtime": 90.7046, |
|
"eval_samples_per_second": 11.499, |
|
"eval_steps_per_second": 0.364, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.8029130520504733e-05, |
|
"loss": 0.1486, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_bleu": 42.0407, |
|
"eval_gen_len": 34.8581, |
|
"eval_loss": 0.10218680649995804, |
|
"eval_runtime": 93.8654, |
|
"eval_samples_per_second": 11.112, |
|
"eval_steps_per_second": 0.352, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.778286425473186e-05, |
|
"loss": 0.1472, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_bleu": 40.0238, |
|
"eval_gen_len": 37.8265, |
|
"eval_loss": 0.10189016908407211, |
|
"eval_runtime": 108.5416, |
|
"eval_samples_per_second": 9.609, |
|
"eval_steps_per_second": 0.304, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7536474763406942e-05, |
|
"loss": 0.1463, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_bleu": 42.5557, |
|
"eval_gen_len": 34.1285, |
|
"eval_loss": 0.10188464820384979, |
|
"eval_runtime": 71.1911, |
|
"eval_samples_per_second": 14.651, |
|
"eval_steps_per_second": 0.464, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.7290146884858045e-05, |
|
"loss": 0.1388, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_bleu": 42.1819, |
|
"eval_gen_len": 35.1783, |
|
"eval_loss": 0.10227210074663162, |
|
"eval_runtime": 93.0555, |
|
"eval_samples_per_second": 11.208, |
|
"eval_steps_per_second": 0.355, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.704381900630915e-05, |
|
"loss": 0.1378, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_bleu": 42.5524, |
|
"eval_gen_len": 35.2148, |
|
"eval_loss": 0.10202094167470932, |
|
"eval_runtime": 104.8789, |
|
"eval_samples_per_second": 9.945, |
|
"eval_steps_per_second": 0.315, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.679755274053628e-05, |
|
"loss": 0.1367, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_bleu": 42.2979, |
|
"eval_gen_len": 34.6817, |
|
"eval_loss": 0.10162690281867981, |
|
"eval_runtime": 90.8884, |
|
"eval_samples_per_second": 11.476, |
|
"eval_steps_per_second": 0.363, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.6551163249211358e-05, |
|
"loss": 0.137, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_bleu": 42.4162, |
|
"eval_gen_len": 35.652, |
|
"eval_loss": 0.10157214105129242, |
|
"eval_runtime": 132.7314, |
|
"eval_samples_per_second": 7.858, |
|
"eval_steps_per_second": 0.249, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.6304773757886436e-05, |
|
"loss": 0.1373, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_bleu": 42.5736, |
|
"eval_gen_len": 34.418, |
|
"eval_loss": 0.10088738054037094, |
|
"eval_runtime": 113.8031, |
|
"eval_samples_per_second": 9.165, |
|
"eval_steps_per_second": 0.29, |
|
"step": 60000 |
|
} |
|
], |
|
"max_steps": 324608, |
|
"num_train_epochs": 16, |
|
"total_flos": 1.0348852591617638e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|