{ "best_metric": 43.53, "best_model_checkpoint": "opus_big_lsp_adapt_wce_precision_3_ubweight_1.5/checkpoint-80000", "epoch": 1.9716574245224892, "global_step": 80000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 1.9753635243376465e-05, "loss": 0.1796, "step": 8000 }, { "epoch": 0.2, "eval_bleu": 42.245, "eval_gen_len": 33.5858, "eval_loss": 0.09831776469945908, "eval_runtime": 132.7068, "eval_samples_per_second": 7.859, "eval_steps_per_second": 0.497, "step": 8000 }, { "epoch": 0.39, "learning_rate": 1.9507208872458414e-05, "loss": 0.1487, "step": 16000 }, { "epoch": 0.39, "eval_bleu": 42.2116, "eval_gen_len": 34.3643, "eval_loss": 0.09730643033981323, "eval_runtime": 116.1316, "eval_samples_per_second": 8.981, "eval_steps_per_second": 0.568, "step": 16000 }, { "epoch": 0.59, "learning_rate": 1.9260844115834877e-05, "loss": 0.1446, "step": 24000 }, { "epoch": 0.59, "eval_bleu": 42.0306, "eval_gen_len": 33.4008, "eval_loss": 0.09792491048574448, "eval_runtime": 139.3867, "eval_samples_per_second": 7.483, "eval_steps_per_second": 0.474, "step": 24000 }, { "epoch": 0.79, "learning_rate": 1.901447935921134e-05, "loss": 0.1406, "step": 32000 }, { "epoch": 0.79, "eval_bleu": 42.7137, "eval_gen_len": 34.441, "eval_loss": 0.0964345782995224, "eval_runtime": 121.1927, "eval_samples_per_second": 8.606, "eval_steps_per_second": 0.545, "step": 32000 }, { "epoch": 0.99, "learning_rate": 1.8768114602587803e-05, "loss": 0.1391, "step": 40000 }, { "epoch": 0.99, "eval_bleu": 42.8852, "eval_gen_len": 33.8102, "eval_loss": 0.09575977176427841, "eval_runtime": 107.5204, "eval_samples_per_second": 9.7, "eval_steps_per_second": 0.614, "step": 40000 }, { "epoch": 1.18, "learning_rate": 1.8521749845964266e-05, "loss": 0.1239, "step": 48000 }, { "epoch": 1.18, "eval_bleu": 42.9831, "eval_gen_len": 34.3087, "eval_loss": 0.09662675112485886, "eval_runtime": 114.4632, "eval_samples_per_second": 9.112, "eval_steps_per_second": 0.577, "step": 48000 }, { "epoch": 1.38, "learning_rate": 1.827538508934073e-05, "loss": 0.1242, "step": 56000 }, { "epoch": 1.38, "eval_bleu": 42.7598, "eval_gen_len": 34.2502, "eval_loss": 0.09706231206655502, "eval_runtime": 120.5178, "eval_samples_per_second": 8.654, "eval_steps_per_second": 0.548, "step": 56000 }, { "epoch": 1.58, "learning_rate": 1.8029020332717192e-05, "loss": 0.1248, "step": 64000 }, { "epoch": 1.58, "eval_bleu": 42.9725, "eval_gen_len": 34.6242, "eval_loss": 0.09721297025680542, "eval_runtime": 217.1644, "eval_samples_per_second": 4.803, "eval_steps_per_second": 0.304, "step": 64000 }, { "epoch": 1.77, "learning_rate": 1.7782655576093655e-05, "loss": 0.1243, "step": 72000 }, { "epoch": 1.77, "eval_bleu": 42.7137, "eval_gen_len": 34.441, "eval_loss": 0.09667866677045822, "eval_runtime": 206.9894, "eval_samples_per_second": 5.039, "eval_steps_per_second": 0.319, "step": 72000 }, { "epoch": 1.97, "learning_rate": 1.753629081947012e-05, "loss": 0.1241, "step": 80000 }, { "epoch": 1.97, "eval_bleu": 43.53, "eval_gen_len": 33.9923, "eval_loss": 0.0961531400680542, "eval_runtime": 111.5933, "eval_samples_per_second": 9.346, "eval_steps_per_second": 0.591, "step": 80000 } ], "max_steps": 649200, "num_train_epochs": 16, "total_flos": 2.5844916958632346e+17, "trial_name": null, "trial_params": null }