{ "best_metric": 40.6424, "best_model_checkpoint": "M2M100_enfr_FT_wang_2022/checkpoint-256000", "epoch": 3.130158341994253, "global_step": 256000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 1.975551751543682e-05, "loss": 0.2244, "step": 16000 }, { "epoch": 0.2, "eval_bleu": 35.6567, "eval_gen_len": 44.8111, "eval_loss": 0.13247372210025787, "eval_runtime": 433.7309, "eval_samples_per_second": 2.405, "eval_steps_per_second": 0.302, "step": 16000 }, { "epoch": 0.39, "learning_rate": 1.951108088280247e-05, "loss": 0.1522, "step": 32000 }, { "epoch": 0.39, "eval_bleu": 36.512, "eval_gen_len": 44.4851, "eval_loss": 0.1262633502483368, "eval_runtime": 417.8212, "eval_samples_per_second": 2.496, "eval_steps_per_second": 0.314, "step": 32000 }, { "epoch": 0.59, "learning_rate": 1.9266613682215567e-05, "loss": 0.1435, "step": 48000 }, { "epoch": 0.59, "eval_bleu": 37.957, "eval_gen_len": 44.5302, "eval_loss": 0.12126067280769348, "eval_runtime": 414.4742, "eval_samples_per_second": 2.516, "eval_steps_per_second": 0.316, "step": 48000 }, { "epoch": 0.78, "learning_rate": 1.9022177049581222e-05, "loss": 0.1384, "step": 64000 }, { "epoch": 0.78, "eval_bleu": 38.0569, "eval_gen_len": 44.5034, "eval_loss": 0.1191168949007988, "eval_runtime": 415.8111, "eval_samples_per_second": 2.508, "eval_steps_per_second": 0.315, "step": 64000 }, { "epoch": 0.98, "learning_rate": 1.8777709848994316e-05, "loss": 0.1345, "step": 80000 }, { "epoch": 0.98, "eval_bleu": 38.4966, "eval_gen_len": 44.8821, "eval_loss": 0.11713190376758575, "eval_runtime": 423.2079, "eval_samples_per_second": 2.465, "eval_steps_per_second": 0.31, "step": 80000 }, { "epoch": 1.17, "learning_rate": 1.853325793238369e-05, "loss": 0.1213, "step": 96000 }, { "epoch": 1.17, "eval_bleu": 39.0368, "eval_gen_len": 44.6012, "eval_loss": 0.1156671866774559, "eval_runtime": 417.9213, "eval_samples_per_second": 2.496, "eval_steps_per_second": 0.313, "step": 96000 }, { "epoch": 1.37, "learning_rate": 1.8288806015773065e-05, "loss": 0.1199, "step": 112000 }, { "epoch": 1.37, "eval_bleu": 39.6906, "eval_gen_len": 44.8178, "eval_loss": 0.11390843987464905, "eval_runtime": 418.2954, "eval_samples_per_second": 2.493, "eval_steps_per_second": 0.313, "step": 112000 }, { "epoch": 1.57, "learning_rate": 1.804436938313872e-05, "loss": 0.1195, "step": 128000 }, { "epoch": 1.57, "eval_bleu": 39.6284, "eval_gen_len": 44.8552, "eval_loss": 0.11290750652551651, "eval_runtime": 422.1564, "eval_samples_per_second": 2.471, "eval_steps_per_second": 0.31, "step": 128000 }, { "epoch": 1.76, "learning_rate": 1.7799902182551813e-05, "loss": 0.1185, "step": 144000 }, { "epoch": 1.76, "eval_bleu": 39.0079, "eval_gen_len": 44.5618, "eval_loss": 0.11247587949037552, "eval_runtime": 408.9786, "eval_samples_per_second": 2.55, "eval_steps_per_second": 0.32, "step": 144000 }, { "epoch": 1.96, "learning_rate": 1.7555465549917468e-05, "loss": 0.1175, "step": 160000 }, { "epoch": 1.96, "eval_bleu": 39.2898, "eval_gen_len": 44.745, "eval_loss": 0.1103997528553009, "eval_runtime": 413.1156, "eval_samples_per_second": 2.525, "eval_steps_per_second": 0.317, "step": 160000 }, { "epoch": 2.15, "learning_rate": 1.7310998349330562e-05, "loss": 0.1062, "step": 176000 }, { "epoch": 2.15, "eval_bleu": 39.8593, "eval_gen_len": 45.1151, "eval_loss": 0.1111496165394783, "eval_runtime": 415.6075, "eval_samples_per_second": 2.51, "eval_steps_per_second": 0.315, "step": 176000 }, { "epoch": 2.35, "learning_rate": 1.7066546432719936e-05, "loss": 0.1047, "step": 192000 }, { "epoch": 2.35, "eval_bleu": 39.913, "eval_gen_len": 44.8102, "eval_loss": 0.11086419969797134, "eval_runtime": 418.8388, "eval_samples_per_second": 2.49, "eval_steps_per_second": 0.313, "step": 192000 }, { "epoch": 2.54, "learning_rate": 1.682210980008559e-05, "loss": 0.1055, "step": 208000 }, { "epoch": 2.54, "eval_bleu": 40.2278, "eval_gen_len": 45.2848, "eval_loss": 0.11025020480155945, "eval_runtime": 421.9598, "eval_samples_per_second": 2.472, "eval_steps_per_second": 0.31, "step": 208000 }, { "epoch": 2.74, "learning_rate": 1.6577657883474966e-05, "loss": 0.1059, "step": 224000 }, { "epoch": 2.74, "eval_bleu": 40.2198, "eval_gen_len": 45.0719, "eval_loss": 0.10949720442295074, "eval_runtime": 416.4666, "eval_samples_per_second": 2.504, "eval_steps_per_second": 0.315, "step": 224000 }, { "epoch": 2.93, "learning_rate": 1.633320596686434e-05, "loss": 0.106, "step": 240000 }, { "epoch": 2.93, "eval_bleu": 39.8973, "eval_gen_len": 44.954, "eval_loss": 0.10881481319665909, "eval_runtime": 421.0124, "eval_samples_per_second": 2.477, "eval_steps_per_second": 0.311, "step": 240000 }, { "epoch": 3.13, "learning_rate": 1.6088769334229995e-05, "loss": 0.0971, "step": 256000 }, { "epoch": 3.13, "eval_bleu": 40.6424, "eval_gen_len": 44.9732, "eval_loss": 0.11019956320524216, "eval_runtime": 418.4973, "eval_samples_per_second": 2.492, "eval_steps_per_second": 0.313, "step": 256000 } ], "max_steps": 1308560, "num_train_epochs": 16, "total_flos": 8.283927408492872e+17, "trial_name": null, "trial_params": null }