|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.175986495041148, |
|
"global_step": 17000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 4.4431, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 3.1658337116241455, |
|
"eval_rouge-1": 0.1104, |
|
"eval_rouge-2": 0.0147, |
|
"eval_rouge-l": 0.1071, |
|
"eval_runtime": 938.5942, |
|
"eval_samples_per_second": 4.447, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.8873366381252815e-05, |
|
"loss": 3.0017, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 2.450249195098877, |
|
"eval_rouge-1": 0.2256, |
|
"eval_rouge-2": 0.0714, |
|
"eval_rouge-l": 0.2134, |
|
"eval_runtime": 828.0492, |
|
"eval_samples_per_second": 5.041, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.662009914375845e-05, |
|
"loss": 2.258, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 1.915558099746704, |
|
"eval_rouge-1": 0.3442, |
|
"eval_rouge-2": 0.1597, |
|
"eval_rouge-l": 0.3284, |
|
"eval_runtime": 854.823, |
|
"eval_samples_per_second": 4.883, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.4366831906264086e-05, |
|
"loss": 1.8476, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_loss": 1.7214736938476562, |
|
"eval_rouge-1": 0.3784, |
|
"eval_rouge-2": 0.1998, |
|
"eval_rouge-l": 0.362, |
|
"eval_runtime": 711.98, |
|
"eval_samples_per_second": 5.863, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.211356466876972e-05, |
|
"loss": 1.6932, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 1.6499994993209839, |
|
"eval_rouge-1": 0.3992, |
|
"eval_rouge-2": 0.2227, |
|
"eval_rouge-l": 0.3822, |
|
"eval_runtime": 721.767, |
|
"eval_samples_per_second": 5.783, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.986029743127535e-05, |
|
"loss": 1.5271, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 1.5838263034820557, |
|
"eval_rouge-1": 0.3999, |
|
"eval_rouge-2": 0.2255, |
|
"eval_rouge-l": 0.3825, |
|
"eval_runtime": 791.7162, |
|
"eval_samples_per_second": 5.272, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.760703019378098e-05, |
|
"loss": 1.4984, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_loss": 1.5331367254257202, |
|
"eval_rouge-1": 0.4063, |
|
"eval_rouge-2": 0.2319, |
|
"eval_rouge-l": 0.39, |
|
"eval_runtime": 787.263, |
|
"eval_samples_per_second": 5.302, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 3.535376295628662e-05, |
|
"loss": 1.3435, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_loss": 1.5344029664993286, |
|
"eval_rouge-1": 0.419, |
|
"eval_rouge-2": 0.2391, |
|
"eval_rouge-l": 0.4009, |
|
"eval_runtime": 801.0657, |
|
"eval_samples_per_second": 5.211, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 3.310049571879225e-05, |
|
"loss": 1.275, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_loss": 1.5167639255523682, |
|
"eval_rouge-1": 0.4172, |
|
"eval_rouge-2": 0.2399, |
|
"eval_rouge-l": 0.398, |
|
"eval_runtime": 714.2604, |
|
"eval_samples_per_second": 5.844, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 3.0847228481297885e-05, |
|
"loss": 1.2267, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"eval_loss": 1.5133156776428223, |
|
"eval_rouge-1": 0.4215, |
|
"eval_rouge-2": 0.2435, |
|
"eval_rouge-l": 0.402, |
|
"eval_runtime": 712.9933, |
|
"eval_samples_per_second": 5.854, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 2.859396124380352e-05, |
|
"loss": 1.1519, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"eval_loss": 1.4870655536651611, |
|
"eval_rouge-1": 0.4202, |
|
"eval_rouge-2": 0.244, |
|
"eval_rouge-l": 0.4013, |
|
"eval_runtime": 437.1167, |
|
"eval_samples_per_second": 9.549, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 2.6340694006309152e-05, |
|
"loss": 1.1841, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"eval_loss": 1.5044740438461304, |
|
"eval_rouge-1": 0.4269, |
|
"eval_rouge-2": 0.2471, |
|
"eval_rouge-l": 0.4076, |
|
"eval_runtime": 446.0452, |
|
"eval_samples_per_second": 9.358, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 2.4087426768814784e-05, |
|
"loss": 1.0586, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"eval_loss": 1.4930425882339478, |
|
"eval_rouge-1": 0.4285, |
|
"eval_rouge-2": 0.2518, |
|
"eval_rouge-l": 0.4109, |
|
"eval_runtime": 430.9672, |
|
"eval_samples_per_second": 9.685, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 2.1834159531320416e-05, |
|
"loss": 1.0657, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"eval_loss": 1.488339900970459, |
|
"eval_rouge-1": 0.4273, |
|
"eval_rouge-2": 0.2514, |
|
"eval_rouge-l": 0.4084, |
|
"eval_runtime": 438.2586, |
|
"eval_samples_per_second": 9.524, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 1.958089229382605e-05, |
|
"loss": 0.9725, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"eval_loss": 1.5135878324508667, |
|
"eval_rouge-1": 0.4312, |
|
"eval_rouge-2": 0.2537, |
|
"eval_rouge-l": 0.4116, |
|
"eval_runtime": 436.2911, |
|
"eval_samples_per_second": 9.567, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 1.732762505633168e-05, |
|
"loss": 0.9176, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"eval_loss": 1.5166784524917603, |
|
"eval_rouge-1": 0.433, |
|
"eval_rouge-2": 0.2559, |
|
"eval_rouge-l": 0.4146, |
|
"eval_runtime": 820.6736, |
|
"eval_samples_per_second": 5.086, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 1.5074357818837314e-05, |
|
"loss": 0.8982, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"eval_loss": 1.535448431968689, |
|
"eval_rouge-1": 0.4293, |
|
"eval_rouge-2": 0.2538, |
|
"eval_rouge-l": 0.4101, |
|
"eval_runtime": 828.443, |
|
"eval_samples_per_second": 5.038, |
|
"step": 17000 |
|
} |
|
], |
|
"max_steps": 23690, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.317233087652327e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|