{ "best_metric": 40.4443, "best_model_checkpoint": "M2M100_enfr_FT/checkpoint-160000", "epoch": 1.9716817212781428, "global_step": 160000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 1.975361680365747e-05, "loss": 0.2246, "step": 16000 }, { "epoch": 0.2, "eval_bleu": 35.632, "eval_gen_len": 45.0096, "eval_loss": 0.13251402974128723, "eval_runtime": 285.0264, "eval_samples_per_second": 3.659, "eval_steps_per_second": 0.46, "step": 16000 }, { "epoch": 0.39, "learning_rate": 1.950723360731494e-05, "loss": 0.1526, "step": 32000 }, { "epoch": 0.39, "eval_bleu": 36.2601, "eval_gen_len": 45.5829, "eval_loss": 0.12710681557655334, "eval_runtime": 288.5488, "eval_samples_per_second": 3.615, "eval_steps_per_second": 0.454, "step": 32000 }, { "epoch": 0.59, "learning_rate": 1.926085041097241e-05, "loss": 0.1444, "step": 48000 }, { "epoch": 0.59, "eval_bleu": 37.0395, "eval_gen_len": 44.7641, "eval_loss": 0.12176303565502167, "eval_runtime": 283.8778, "eval_samples_per_second": 3.674, "eval_steps_per_second": 0.461, "step": 48000 }, { "epoch": 0.79, "learning_rate": 1.9014498022156776e-05, "loss": 0.1381, "step": 64000 }, { "epoch": 0.79, "eval_bleu": 37.6851, "eval_gen_len": 45.0633, "eval_loss": 0.11842654645442963, "eval_runtime": 281.5142, "eval_samples_per_second": 3.705, "eval_steps_per_second": 0.465, "step": 64000 }, { "epoch": 0.99, "learning_rate": 1.8768114825814246e-05, "loss": 0.135, "step": 80000 }, { "epoch": 0.99, "eval_bleu": 38.8165, "eval_gen_len": 45.2244, "eval_loss": 0.11733223497867584, "eval_runtime": 282.3739, "eval_samples_per_second": 3.694, "eval_steps_per_second": 0.464, "step": 80000 }, { "epoch": 1.18, "learning_rate": 1.852174703323516e-05, "loss": 0.1205, "step": 96000 }, { "epoch": 1.18, "eval_bleu": 39.4115, "eval_gen_len": 45.0767, "eval_loss": 0.11619798094034195, "eval_runtime": 279.629, "eval_samples_per_second": 3.73, "eval_steps_per_second": 0.468, "step": 96000 }, { "epoch": 1.38, "learning_rate": 1.827537924065608e-05, "loss": 0.1203, "step": 112000 }, { "epoch": 1.38, "eval_bleu": 38.9974, "eval_gen_len": 45.0451, "eval_loss": 0.11450415849685669, "eval_runtime": 283.1117, "eval_samples_per_second": 3.684, "eval_steps_per_second": 0.463, "step": 112000 }, { "epoch": 1.58, "learning_rate": 1.802899604431355e-05, "loss": 0.1199, "step": 128000 }, { "epoch": 1.58, "eval_bleu": 38.9989, "eval_gen_len": 45.0268, "eval_loss": 0.1141299158334732, "eval_runtime": 283.9785, "eval_samples_per_second": 3.673, "eval_steps_per_second": 0.461, "step": 128000 }, { "epoch": 1.77, "learning_rate": 1.7782628251734466e-05, "loss": 0.1187, "step": 144000 }, { "epoch": 1.77, "eval_bleu": 39.3658, "eval_gen_len": 44.8562, "eval_loss": 0.11224538832902908, "eval_runtime": 281.1031, "eval_samples_per_second": 3.71, "eval_steps_per_second": 0.466, "step": 144000 }, { "epoch": 1.97, "learning_rate": 1.7536275862918832e-05, "loss": 0.1178, "step": 160000 }, { "epoch": 1.97, "eval_bleu": 40.4443, "eval_gen_len": 44.8341, "eval_loss": 0.11058922857046127, "eval_runtime": 283.3732, "eval_samples_per_second": 3.681, "eval_steps_per_second": 0.462, "step": 160000 } ], "max_steps": 1298384, "num_train_epochs": 16, "total_flos": 5.178177255993508e+17, "trial_name": null, "trial_params": null }