{ "best_metric": 42.5736, "best_model_checkpoint": "opus_base_simple_tfidf_wce/checkpoint-60000", "epoch": 2.9574132492113563, "global_step": 60000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 1.9753918572555207e-05, "loss": 0.2098, "step": 4000 }, { "epoch": 0.2, "eval_bleu": 35.7996, "eval_gen_len": 39.4171, "eval_loss": 0.10897957533597946, "eval_runtime": 150.8888, "eval_samples_per_second": 6.912, "eval_steps_per_second": 0.219, "step": 4000 }, { "epoch": 0.39, "learning_rate": 1.950746746845426e-05, "loss": 0.1762, "step": 8000 }, { "epoch": 0.39, "eval_bleu": 36.8077, "eval_gen_len": 39.0182, "eval_loss": 0.10654434561729431, "eval_runtime": 116.4461, "eval_samples_per_second": 8.957, "eval_steps_per_second": 0.283, "step": 8000 }, { "epoch": 0.59, "learning_rate": 1.9261016364353314e-05, "loss": 0.1691, "step": 12000 }, { "epoch": 0.59, "eval_bleu": 41.2732, "eval_gen_len": 34.2013, "eval_loss": 0.10531464219093323, "eval_runtime": 133.3831, "eval_samples_per_second": 7.82, "eval_steps_per_second": 0.247, "step": 12000 }, { "epoch": 0.79, "learning_rate": 1.9014565260252367e-05, "loss": 0.1635, "step": 16000 }, { "epoch": 0.79, "eval_bleu": 41.6402, "eval_gen_len": 34.6366, "eval_loss": 0.10416054725646973, "eval_runtime": 104.9246, "eval_samples_per_second": 9.94, "eval_steps_per_second": 0.315, "step": 16000 }, { "epoch": 0.99, "learning_rate": 1.876811415615142e-05, "loss": 0.1611, "step": 20000 }, { "epoch": 0.99, "eval_bleu": 41.7834, "eval_gen_len": 35.3193, "eval_loss": 0.10361454635858536, "eval_runtime": 153.2125, "eval_samples_per_second": 6.808, "eval_steps_per_second": 0.215, "step": 20000 }, { "epoch": 1.18, "learning_rate": 1.8521786277602524e-05, "loss": 0.1493, "step": 24000 }, { "epoch": 1.18, "eval_bleu": 41.5067, "eval_gen_len": 35.8773, "eval_loss": 0.10355237871408463, "eval_runtime": 92.4584, "eval_samples_per_second": 11.281, "eval_steps_per_second": 0.357, "step": 24000 }, { "epoch": 1.38, "learning_rate": 1.827545839905363e-05, "loss": 0.1491, "step": 28000 }, { "epoch": 1.38, "eval_bleu": 41.1362, "eval_gen_len": 36.0518, "eval_loss": 0.10270049422979355, "eval_runtime": 90.7046, "eval_samples_per_second": 11.499, "eval_steps_per_second": 0.364, "step": 28000 }, { "epoch": 1.58, "learning_rate": 1.8029130520504733e-05, "loss": 0.1486, "step": 32000 }, { "epoch": 1.58, "eval_bleu": 42.0407, "eval_gen_len": 34.8581, "eval_loss": 0.10218680649995804, "eval_runtime": 93.8654, "eval_samples_per_second": 11.112, "eval_steps_per_second": 0.352, "step": 32000 }, { "epoch": 1.77, "learning_rate": 1.778286425473186e-05, "loss": 0.1472, "step": 36000 }, { "epoch": 1.77, "eval_bleu": 40.0238, "eval_gen_len": 37.8265, "eval_loss": 0.10189016908407211, "eval_runtime": 108.5416, "eval_samples_per_second": 9.609, "eval_steps_per_second": 0.304, "step": 36000 }, { "epoch": 1.97, "learning_rate": 1.7536474763406942e-05, "loss": 0.1463, "step": 40000 }, { "epoch": 1.97, "eval_bleu": 42.5557, "eval_gen_len": 34.1285, "eval_loss": 0.10188464820384979, "eval_runtime": 71.1911, "eval_samples_per_second": 14.651, "eval_steps_per_second": 0.464, "step": 40000 }, { "epoch": 2.17, "learning_rate": 1.7290146884858045e-05, "loss": 0.1388, "step": 44000 }, { "epoch": 2.17, "eval_bleu": 42.1819, "eval_gen_len": 35.1783, "eval_loss": 0.10227210074663162, "eval_runtime": 93.0555, "eval_samples_per_second": 11.208, "eval_steps_per_second": 0.355, "step": 44000 }, { "epoch": 2.37, "learning_rate": 1.704381900630915e-05, "loss": 0.1378, "step": 48000 }, { "epoch": 2.37, "eval_bleu": 42.5524, "eval_gen_len": 35.2148, "eval_loss": 0.10202094167470932, "eval_runtime": 104.8789, "eval_samples_per_second": 9.945, "eval_steps_per_second": 0.315, "step": 48000 }, { "epoch": 2.56, "learning_rate": 1.679755274053628e-05, "loss": 0.1367, "step": 52000 }, { "epoch": 2.56, "eval_bleu": 42.2979, "eval_gen_len": 34.6817, "eval_loss": 0.10162690281867981, "eval_runtime": 90.8884, "eval_samples_per_second": 11.476, "eval_steps_per_second": 0.363, "step": 52000 }, { "epoch": 2.76, "learning_rate": 1.6551163249211358e-05, "loss": 0.137, "step": 56000 }, { "epoch": 2.76, "eval_bleu": 42.4162, "eval_gen_len": 35.652, "eval_loss": 0.10157214105129242, "eval_runtime": 132.7314, "eval_samples_per_second": 7.858, "eval_steps_per_second": 0.249, "step": 56000 }, { "epoch": 2.96, "learning_rate": 1.6304773757886436e-05, "loss": 0.1373, "step": 60000 }, { "epoch": 2.96, "eval_bleu": 42.5736, "eval_gen_len": 34.418, "eval_loss": 0.10088738054037094, "eval_runtime": 113.8031, "eval_samples_per_second": 9.165, "eval_steps_per_second": 0.29, "step": 60000 } ], "max_steps": 324608, "num_train_epochs": 16, "total_flos": 1.0348852591617638e+17, "trial_name": null, "trial_params": null }