{ "best_metric": 4.029151439666748, "best_model_checkpoint": "continued-finetuned-en-to-lg/checkpoint-39", "epoch": 26.0, "eval_steps": 50, "global_step": 1027, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9873417721518988, "eval_bleu": 0.8383, "eval_gen_len": 20.509, "eval_loss": 4.029151439666748, "eval_runtime": 478.8606, "eval_samples_per_second": 25.387, "eval_steps_per_second": 0.794, "step": 39 }, { "epoch": 2.0, "eval_bleu": 0.8282, "eval_gen_len": 20.3936, "eval_loss": 4.243524074554443, "eval_runtime": 474.9597, "eval_samples_per_second": 25.596, "eval_steps_per_second": 0.8, "step": 79 }, { "epoch": 2.9873417721518987, "eval_bleu": 0.8032, "eval_gen_len": 20.33, "eval_loss": 4.359070301055908, "eval_runtime": 475.0262, "eval_samples_per_second": 25.592, "eval_steps_per_second": 0.8, "step": 118 }, { "epoch": 4.0, "eval_bleu": 0.8038, "eval_gen_len": 20.2033, "eval_loss": 4.451895236968994, "eval_runtime": 473.6738, "eval_samples_per_second": 25.665, "eval_steps_per_second": 0.802, "step": 158 }, { "epoch": 4.987341772151899, "eval_bleu": 0.8017, "eval_gen_len": 20.0283, "eval_loss": 4.547088146209717, "eval_runtime": 476.3005, "eval_samples_per_second": 25.524, "eval_steps_per_second": 0.798, "step": 197 }, { "epoch": 6.0, "eval_bleu": 0.7828, "eval_gen_len": 20.1771, "eval_loss": 4.555694103240967, "eval_runtime": 473.4342, "eval_samples_per_second": 25.678, "eval_steps_per_second": 0.803, "step": 237 }, { "epoch": 6.987341772151899, "eval_bleu": 0.771, "eval_gen_len": 20.3172, "eval_loss": 4.672959327697754, "eval_runtime": 476.2521, "eval_samples_per_second": 25.526, "eval_steps_per_second": 0.798, "step": 276 }, { "epoch": 8.0, "eval_bleu": 0.7971, "eval_gen_len": 20.2095, "eval_loss": 4.719875812530518, "eval_runtime": 472.5874, "eval_samples_per_second": 25.724, "eval_steps_per_second": 0.804, "step": 316 }, { "epoch": 8.987341772151899, "eval_bleu": 0.7587, "eval_gen_len": 20.4253, "eval_loss": 4.758220672607422, "eval_runtime": 477.573, "eval_samples_per_second": 25.456, "eval_steps_per_second": 0.796, "step": 355 }, { "epoch": 10.0, "eval_bleu": 0.7975, "eval_gen_len": 20.3643, "eval_loss": 4.771291255950928, "eval_runtime": 477.1681, "eval_samples_per_second": 25.477, "eval_steps_per_second": 0.796, "step": 395 }, { "epoch": 10.987341772151899, "eval_bleu": 0.7619, "eval_gen_len": 20.4121, "eval_loss": 4.748700141906738, "eval_runtime": 479.5702, "eval_samples_per_second": 25.35, "eval_steps_per_second": 0.792, "step": 434 }, { "epoch": 12.0, "eval_bleu": 0.7865, "eval_gen_len": 20.3477, "eval_loss": 4.832670211791992, "eval_runtime": 473.4727, "eval_samples_per_second": 25.676, "eval_steps_per_second": 0.803, "step": 474 }, { "epoch": 12.658227848101266, "grad_norm": 1.2026784420013428, "learning_rate": 1.1452991452991454e-05, "loss": 0.1585, "step": 500 }, { "epoch": 12.987341772151899, "eval_bleu": 0.7909, "eval_gen_len": 20.307, "eval_loss": 4.844118595123291, "eval_runtime": 475.2982, "eval_samples_per_second": 25.578, "eval_steps_per_second": 0.799, "step": 513 }, { "epoch": 14.0, "eval_bleu": 0.8004, "eval_gen_len": 20.3038, "eval_loss": 4.876136302947998, "eval_runtime": 475.2478, "eval_samples_per_second": 25.58, "eval_steps_per_second": 0.8, "step": 553 }, { "epoch": 14.987341772151899, "eval_bleu": 0.7823, "eval_gen_len": 20.2747, "eval_loss": 4.92393684387207, "eval_runtime": 476.8033, "eval_samples_per_second": 25.497, "eval_steps_per_second": 0.797, "step": 592 }, { "epoch": 16.0, "eval_bleu": 0.79, "eval_gen_len": 20.226, "eval_loss": 4.880424976348877, "eval_runtime": 474.5946, "eval_samples_per_second": 25.616, "eval_steps_per_second": 0.801, "step": 632 }, { "epoch": 16.9873417721519, "eval_bleu": 0.7821, "eval_gen_len": 20.3653, "eval_loss": 4.9470534324646, "eval_runtime": 477.2511, "eval_samples_per_second": 25.473, "eval_steps_per_second": 0.796, "step": 671 }, { "epoch": 18.0, "eval_bleu": 0.7876, "eval_gen_len": 20.3285, "eval_loss": 4.9639482498168945, "eval_runtime": 477.3388, "eval_samples_per_second": 25.468, "eval_steps_per_second": 0.796, "step": 711 }, { "epoch": 18.9873417721519, "eval_bleu": 0.7895, "eval_gen_len": 20.3364, "eval_loss": 4.970686912536621, "eval_runtime": 476.4474, "eval_samples_per_second": 25.516, "eval_steps_per_second": 0.798, "step": 750 }, { "epoch": 20.0, "eval_bleu": 0.784, "eval_gen_len": 20.3641, "eval_loss": 4.984948635101318, "eval_runtime": 477.378, "eval_samples_per_second": 25.466, "eval_steps_per_second": 0.796, "step": 790 }, { "epoch": 20.9873417721519, "eval_bleu": 0.7838, "eval_gen_len": 20.3852, "eval_loss": 5.011585712432861, "eval_runtime": 477.6336, "eval_samples_per_second": 25.453, "eval_steps_per_second": 0.796, "step": 829 }, { "epoch": 22.0, "eval_bleu": 0.7874, "eval_gen_len": 20.3104, "eval_loss": 5.026498794555664, "eval_runtime": 477.8381, "eval_samples_per_second": 25.442, "eval_steps_per_second": 0.795, "step": 869 }, { "epoch": 22.9873417721519, "eval_bleu": 0.7886, "eval_gen_len": 20.3484, "eval_loss": 5.026541233062744, "eval_runtime": 478.7469, "eval_samples_per_second": 25.393, "eval_steps_per_second": 0.794, "step": 908 }, { "epoch": 24.0, "eval_bleu": 0.7833, "eval_gen_len": 20.3558, "eval_loss": 5.028345584869385, "eval_runtime": 476.6444, "eval_samples_per_second": 25.505, "eval_steps_per_second": 0.797, "step": 948 }, { "epoch": 24.9873417721519, "eval_bleu": 0.7835, "eval_gen_len": 20.313, "eval_loss": 5.029706001281738, "eval_runtime": 478.6376, "eval_samples_per_second": 25.399, "eval_steps_per_second": 0.794, "step": 987 }, { "epoch": 25.31645569620253, "grad_norm": 0.5030060410499573, "learning_rate": 2.9059829059829063e-06, "loss": 0.0263, "step": 1000 }, { "epoch": 26.0, "eval_bleu": 0.7812, "eval_gen_len": 20.3503, "eval_loss": 5.040848731994629, "eval_runtime": 478.615, "eval_samples_per_second": 25.4, "eval_steps_per_second": 0.794, "step": 1027 } ], "logging_steps": 500, "max_steps": 1170, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 346447645507584.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }