{ "best_metric": 43.7339, "best_model_checkpoint": "opus_big_lsp_adapt_wce_prop_0.8_weight_1.75/checkpoint-80000", "epoch": 1.9716574245224892, "global_step": 80000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 1.9753635243376465e-05, "loss": 0.1872, "step": 8000 }, { "epoch": 0.2, "eval_bleu": 42.0446, "eval_gen_len": 33.721, "eval_loss": 0.09888985753059387, "eval_runtime": 130.4471, "eval_samples_per_second": 7.996, "eval_steps_per_second": 0.506, "step": 8000 }, { "epoch": 0.39, "learning_rate": 1.950723967960567e-05, "loss": 0.1556, "step": 16000 }, { "epoch": 0.39, "eval_bleu": 42.2512, "eval_gen_len": 34.093, "eval_loss": 0.09752173721790314, "eval_runtime": 189.6683, "eval_samples_per_second": 5.499, "eval_steps_per_second": 0.348, "step": 16000 }, { "epoch": 0.59, "learning_rate": 1.9260874922982132e-05, "loss": 0.1511, "step": 24000 }, { "epoch": 0.59, "eval_bleu": 42.4039, "eval_gen_len": 33.7776, "eval_loss": 0.09804832935333252, "eval_runtime": 125.3237, "eval_samples_per_second": 8.322, "eval_steps_per_second": 0.527, "step": 24000 }, { "epoch": 0.79, "learning_rate": 1.901447935921134e-05, "loss": 0.1468, "step": 32000 }, { "epoch": 0.79, "eval_bleu": 42.384, "eval_gen_len": 34.0077, "eval_loss": 0.09726251661777496, "eval_runtime": 127.0486, "eval_samples_per_second": 8.209, "eval_steps_per_second": 0.519, "step": 32000 }, { "epoch": 0.99, "learning_rate": 1.8768114602587803e-05, "loss": 0.1453, "step": 40000 }, { "epoch": 0.99, "eval_bleu": 42.5465, "eval_gen_len": 34.2138, "eval_loss": 0.09616752713918686, "eval_runtime": 149.6762, "eval_samples_per_second": 6.968, "eval_steps_per_second": 0.441, "step": 40000 }, { "epoch": 1.18, "learning_rate": 1.8521749845964266e-05, "loss": 0.1292, "step": 48000 }, { "epoch": 1.18, "eval_bleu": 42.7614, "eval_gen_len": 33.6558, "eval_loss": 0.09714562445878983, "eval_runtime": 140.3139, "eval_samples_per_second": 7.433, "eval_steps_per_second": 0.47, "step": 48000 }, { "epoch": 1.38, "learning_rate": 1.827535428219347e-05, "loss": 0.1296, "step": 56000 }, { "epoch": 1.38, "eval_bleu": 42.2625, "eval_gen_len": 34.7987, "eval_loss": 0.09766771644353867, "eval_runtime": 149.3991, "eval_samples_per_second": 6.981, "eval_steps_per_second": 0.442, "step": 56000 }, { "epoch": 1.58, "learning_rate": 1.8029020332717192e-05, "loss": 0.1301, "step": 64000 }, { "epoch": 1.58, "eval_bleu": 42.8366, "eval_gen_len": 34.4842, "eval_loss": 0.0971095860004425, "eval_runtime": 150.9733, "eval_samples_per_second": 6.909, "eval_steps_per_second": 0.437, "step": 64000 }, { "epoch": 1.77, "learning_rate": 1.7782624768946397e-05, "loss": 0.1295, "step": 72000 }, { "epoch": 1.77, "eval_bleu": 42.763, "eval_gen_len": 34.9012, "eval_loss": 0.09676354378461838, "eval_runtime": 156.8388, "eval_samples_per_second": 6.65, "eval_steps_per_second": 0.421, "step": 72000 }, { "epoch": 1.97, "learning_rate": 1.753626001232286e-05, "loss": 0.1293, "step": 80000 }, { "epoch": 1.97, "eval_bleu": 43.7339, "eval_gen_len": 33.8734, "eval_loss": 0.09614978730678558, "eval_runtime": 162.1547, "eval_samples_per_second": 6.432, "eval_steps_per_second": 0.407, "step": 80000 } ], "max_steps": 649200, "num_train_epochs": 16, "total_flos": 2.5844916958632346e+17, "trial_name": null, "trial_params": null }