{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.51086956521739, "global_step": 3500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 62.8839, "eval_gen_len": 18.6587, "eval_loss": 0.6685489416122437, "eval_runtime": 219.9201, "eval_samples_per_second": 6.689, "eval_steps_per_second": 0.418, "step": 368 }, { "epoch": 1.36, "learning_rate": 3.456521739130435e-05, "loss": 0.9723, "step": 500 }, { "epoch": 2.0, "eval_bleu": 70.5409, "eval_gen_len": 18.0659, "eval_loss": 0.5173786878585815, "eval_runtime": 185.8072, "eval_samples_per_second": 7.917, "eval_steps_per_second": 0.495, "step": 736 }, { "epoch": 2.72, "learning_rate": 2.91304347826087e-05, "loss": 0.4138, "step": 1000 }, { "epoch": 3.0, "eval_bleu": 70.6822, "eval_gen_len": 18.2522, "eval_loss": 0.4692835211753845, "eval_runtime": 305.3472, "eval_samples_per_second": 4.817, "eval_steps_per_second": 0.301, "step": 1104 }, { "epoch": 4.0, "eval_bleu": 76.1884, "eval_gen_len": 18.2624, "eval_loss": 0.43825143575668335, "eval_runtime": 180.0844, "eval_samples_per_second": 8.168, "eval_steps_per_second": 0.511, "step": 1472 }, { "epoch": 4.08, "learning_rate": 2.3695652173913045e-05, "loss": 0.2544, "step": 1500 }, { "epoch": 5.0, "eval_bleu": 76.4649, "eval_gen_len": 17.9884, "eval_loss": 0.42623549699783325, "eval_runtime": 265.6449, "eval_samples_per_second": 5.537, "eval_steps_per_second": 0.346, "step": 1840 }, { "epoch": 5.43, "learning_rate": 1.8260869565217393e-05, "loss": 0.1709, "step": 2000 }, { "epoch": 6.0, "eval_bleu": 77.8959, "eval_gen_len": 17.9952, "eval_loss": 0.41967156529426575, "eval_runtime": 222.1851, "eval_samples_per_second": 6.621, "eval_steps_per_second": 0.414, "step": 2208 }, { "epoch": 6.79, "learning_rate": 1.282608695652174e-05, "loss": 0.1287, "step": 2500 }, { "epoch": 7.0, "eval_bleu": 78.8909, "eval_gen_len": 18.0598, "eval_loss": 0.4178922474384308, "eval_runtime": 226.0293, "eval_samples_per_second": 6.508, "eval_steps_per_second": 0.407, "step": 2576 }, { "epoch": 8.0, "eval_bleu": 78.7098, "eval_gen_len": 18.0034, "eval_loss": 0.4161533713340759, "eval_runtime": 218.1192, "eval_samples_per_second": 6.744, "eval_steps_per_second": 0.422, "step": 2944 }, { "epoch": 8.15, "learning_rate": 7.391304347826087e-06, "loss": 0.1028, "step": 3000 }, { "epoch": 9.0, "eval_bleu": 79.2546, "eval_gen_len": 18.0659, "eval_loss": 0.4158268868923187, "eval_runtime": 170.3307, "eval_samples_per_second": 8.636, "eval_steps_per_second": 0.54, "step": 3312 }, { "epoch": 9.51, "learning_rate": 1.956521739130435e-06, "loss": 0.084, "step": 3500 } ], "max_steps": 3680, "num_train_epochs": 10, "total_flos": 2284115790790656.0, "trial_name": null, "trial_params": null }