{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.175986495041148, "global_step": 17000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.42, "learning_rate": 3.3333333333333335e-05, "loss": 4.4431, "step": 1000 }, { "epoch": 0.42, "eval_loss": 3.1658337116241455, "eval_rouge-1": 0.1104, "eval_rouge-2": 0.0147, "eval_rouge-l": 0.1071, "eval_runtime": 938.5942, "eval_samples_per_second": 4.447, "step": 1000 }, { "epoch": 0.84, "learning_rate": 4.8873366381252815e-05, "loss": 3.0017, "step": 2000 }, { "epoch": 0.84, "eval_loss": 2.450249195098877, "eval_rouge-1": 0.2256, "eval_rouge-2": 0.0714, "eval_rouge-l": 0.2134, "eval_runtime": 828.0492, "eval_samples_per_second": 5.041, "step": 2000 }, { "epoch": 1.27, "learning_rate": 4.662009914375845e-05, "loss": 2.258, "step": 3000 }, { "epoch": 1.27, "eval_loss": 1.915558099746704, "eval_rouge-1": 0.3442, "eval_rouge-2": 0.1597, "eval_rouge-l": 0.3284, "eval_runtime": 854.823, "eval_samples_per_second": 4.883, "step": 3000 }, { "epoch": 1.69, "learning_rate": 4.4366831906264086e-05, "loss": 1.8476, "step": 4000 }, { "epoch": 1.69, "eval_loss": 1.7214736938476562, "eval_rouge-1": 0.3784, "eval_rouge-2": 0.1998, "eval_rouge-l": 0.362, "eval_runtime": 711.98, "eval_samples_per_second": 5.863, "step": 4000 }, { "epoch": 2.11, "learning_rate": 4.211356466876972e-05, "loss": 1.6932, "step": 5000 }, { "epoch": 2.11, "eval_loss": 1.6499994993209839, "eval_rouge-1": 0.3992, "eval_rouge-2": 0.2227, "eval_rouge-l": 0.3822, "eval_runtime": 721.767, "eval_samples_per_second": 5.783, "step": 5000 }, { "epoch": 2.53, "learning_rate": 3.986029743127535e-05, "loss": 1.5271, "step": 6000 }, { "epoch": 2.53, "eval_loss": 1.5838263034820557, "eval_rouge-1": 0.3999, "eval_rouge-2": 0.2255, "eval_rouge-l": 0.3825, "eval_runtime": 791.7162, "eval_samples_per_second": 5.272, "step": 6000 }, { "epoch": 2.95, "learning_rate": 3.760703019378098e-05, "loss": 1.4984, "step": 7000 }, { "epoch": 2.95, "eval_loss": 1.5331367254257202, "eval_rouge-1": 0.4063, "eval_rouge-2": 0.2319, "eval_rouge-l": 0.39, "eval_runtime": 787.263, "eval_samples_per_second": 5.302, "step": 7000 }, { "epoch": 3.38, "learning_rate": 3.535376295628662e-05, "loss": 1.3435, "step": 8000 }, { "epoch": 3.38, "eval_loss": 1.5344029664993286, "eval_rouge-1": 0.419, "eval_rouge-2": 0.2391, "eval_rouge-l": 0.4009, "eval_runtime": 801.0657, "eval_samples_per_second": 5.211, "step": 8000 }, { "epoch": 3.8, "learning_rate": 3.310049571879225e-05, "loss": 1.275, "step": 9000 }, { "epoch": 3.8, "eval_loss": 1.5167639255523682, "eval_rouge-1": 0.4172, "eval_rouge-2": 0.2399, "eval_rouge-l": 0.398, "eval_runtime": 714.2604, "eval_samples_per_second": 5.844, "step": 9000 }, { "epoch": 4.22, "learning_rate": 3.0847228481297885e-05, "loss": 1.2267, "step": 10000 }, { "epoch": 4.22, "eval_loss": 1.5133156776428223, "eval_rouge-1": 0.4215, "eval_rouge-2": 0.2435, "eval_rouge-l": 0.402, "eval_runtime": 712.9933, "eval_samples_per_second": 5.854, "step": 10000 }, { "epoch": 4.64, "learning_rate": 2.859396124380352e-05, "loss": 1.1519, "step": 11000 }, { "epoch": 4.64, "eval_loss": 1.4870655536651611, "eval_rouge-1": 0.4202, "eval_rouge-2": 0.244, "eval_rouge-l": 0.4013, "eval_runtime": 437.1167, "eval_samples_per_second": 9.549, "step": 11000 }, { "epoch": 5.07, "learning_rate": 2.6340694006309152e-05, "loss": 1.1841, "step": 12000 }, { "epoch": 5.07, "eval_loss": 1.5044740438461304, "eval_rouge-1": 0.4269, "eval_rouge-2": 0.2471, "eval_rouge-l": 0.4076, "eval_runtime": 446.0452, "eval_samples_per_second": 9.358, "step": 12000 }, { "epoch": 5.49, "learning_rate": 2.4087426768814784e-05, "loss": 1.0586, "step": 13000 }, { "epoch": 5.49, "eval_loss": 1.4930425882339478, "eval_rouge-1": 0.4285, "eval_rouge-2": 0.2518, "eval_rouge-l": 0.4109, "eval_runtime": 430.9672, "eval_samples_per_second": 9.685, "step": 13000 }, { "epoch": 5.91, "learning_rate": 2.1834159531320416e-05, "loss": 1.0657, "step": 14000 }, { "epoch": 5.91, "eval_loss": 1.488339900970459, "eval_rouge-1": 0.4273, "eval_rouge-2": 0.2514, "eval_rouge-l": 0.4084, "eval_runtime": 438.2586, "eval_samples_per_second": 9.524, "step": 14000 }, { "epoch": 6.33, "learning_rate": 1.958089229382605e-05, "loss": 0.9725, "step": 15000 }, { "epoch": 6.33, "eval_loss": 1.5135878324508667, "eval_rouge-1": 0.4312, "eval_rouge-2": 0.2537, "eval_rouge-l": 0.4116, "eval_runtime": 436.2911, "eval_samples_per_second": 9.567, "step": 15000 }, { "epoch": 6.75, "learning_rate": 1.732762505633168e-05, "loss": 0.9176, "step": 16000 }, { "epoch": 6.75, "eval_loss": 1.5166784524917603, "eval_rouge-1": 0.433, "eval_rouge-2": 0.2559, "eval_rouge-l": 0.4146, "eval_runtime": 820.6736, "eval_samples_per_second": 5.086, "step": 16000 }, { "epoch": 7.18, "learning_rate": 1.5074357818837314e-05, "loss": 0.8982, "step": 17000 }, { "epoch": 7.18, "eval_loss": 1.535448431968689, "eval_rouge-1": 0.4293, "eval_rouge-2": 0.2538, "eval_rouge-l": 0.4101, "eval_runtime": 828.443, "eval_samples_per_second": 5.038, "step": 17000 } ], "max_steps": 23690, "num_train_epochs": 10, "total_flos": 1.317233087652327e+17, "trial_name": null, "trial_params": null }