{ "best_metric": 42.252, "best_model_checkpoint": "opus_base_AoN_tfidf_wce_unsampled/checkpoint-32000", "epoch": 1.5772870662460567, "global_step": 32000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 1.9753918572555207e-05, "loss": 0.2121, "step": 4000 }, { "epoch": 0.2, "eval_bleu": 38.2399, "eval_gen_len": 37.2244, "eval_loss": 0.10916104912757874, "eval_runtime": 155.3597, "eval_samples_per_second": 6.713, "eval_steps_per_second": 0.212, "step": 4000 }, { "epoch": 0.39, "learning_rate": 1.950746746845426e-05, "loss": 0.178, "step": 8000 }, { "epoch": 0.39, "eval_bleu": 36.8164, "eval_gen_len": 39.0585, "eval_loss": 0.10669375211000443, "eval_runtime": 186.5644, "eval_samples_per_second": 5.591, "eval_steps_per_second": 0.177, "step": 8000 }, { "epoch": 0.59, "learning_rate": 1.9261016364353314e-05, "loss": 0.1706, "step": 12000 }, { "epoch": 0.59, "eval_bleu": 41.0455, "eval_gen_len": 34.4372, "eval_loss": 0.1054777055978775, "eval_runtime": 122.7383, "eval_samples_per_second": 8.498, "eval_steps_per_second": 0.269, "step": 12000 }, { "epoch": 0.79, "learning_rate": 1.9014565260252367e-05, "loss": 0.1648, "step": 16000 }, { "epoch": 0.79, "eval_bleu": 41.6399, "eval_gen_len": 35.9128, "eval_loss": 0.1043192595243454, "eval_runtime": 99.4427, "eval_samples_per_second": 10.488, "eval_steps_per_second": 0.332, "step": 16000 }, { "epoch": 0.99, "learning_rate": 1.876811415615142e-05, "loss": 0.1623, "step": 20000 }, { "epoch": 0.99, "eval_bleu": 41.8565, "eval_gen_len": 34.7057, "eval_loss": 0.10375376045703888, "eval_runtime": 116.1466, "eval_samples_per_second": 8.98, "eval_steps_per_second": 0.284, "step": 20000 }, { "epoch": 1.18, "learning_rate": 1.8521786277602524e-05, "loss": 0.1503, "step": 24000 }, { "epoch": 1.18, "eval_bleu": 41.5048, "eval_gen_len": 35.9981, "eval_loss": 0.103697270154953, "eval_runtime": 103.5695, "eval_samples_per_second": 10.071, "eval_steps_per_second": 0.319, "step": 24000 }, { "epoch": 1.38, "learning_rate": 1.827545839905363e-05, "loss": 0.15, "step": 28000 }, { "epoch": 1.38, "eval_bleu": 41.0864, "eval_gen_len": 36.0988, "eval_loss": 0.10295107960700989, "eval_runtime": 105.9932, "eval_samples_per_second": 9.84, "eval_steps_per_second": 0.311, "step": 28000 }, { "epoch": 1.58, "learning_rate": 1.8029130520504733e-05, "loss": 0.1495, "step": 32000 }, { "epoch": 1.58, "eval_bleu": 42.252, "eval_gen_len": 34.8677, "eval_loss": 0.1023373156785965, "eval_runtime": 91.3481, "eval_samples_per_second": 11.418, "eval_steps_per_second": 0.361, "step": 32000 } ], "max_steps": 324608, "num_train_epochs": 16, "total_flos": 5.51999915163648e+16, "trial_name": null, "trial_params": null }