|
{ |
|
"best_metric": 0.6162260174751282, |
|
"best_model_checkpoint": "flan_t5_summarization/checkpoint-2720", |
|
"epoch": 10.0, |
|
"global_step": 2720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.120025634765625, |
|
"eval_rouge1": 9.2565, |
|
"eval_rouge2": 1.2805, |
|
"eval_rougeL": 9.2358, |
|
"eval_rougeLsum": 9.284, |
|
"eval_runtime": 4.0606, |
|
"eval_samples_per_second": 16.5, |
|
"eval_steps_per_second": 1.478, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 4.0808823529411765e-05, |
|
"loss": 1.5343, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8922988772392273, |
|
"eval_rouge1": 10.9045, |
|
"eval_rouge2": 2.9468, |
|
"eval_rougeL": 10.9112, |
|
"eval_rougeLsum": 10.8827, |
|
"eval_runtime": 4.0719, |
|
"eval_samples_per_second": 16.454, |
|
"eval_steps_per_second": 1.474, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7884227633476257, |
|
"eval_rouge1": 13.637, |
|
"eval_rouge2": 4.8447, |
|
"eval_rougeL": 13.3594, |
|
"eval_rougeLsum": 13.3459, |
|
"eval_runtime": 4.0204, |
|
"eval_samples_per_second": 16.665, |
|
"eval_steps_per_second": 1.492, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 3.161764705882353e-05, |
|
"loss": 0.9949, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 18.91044776119403, |
|
"eval_loss": 0.7256659269332886, |
|
"eval_rouge1": 15.2005, |
|
"eval_rouge2": 6.3919, |
|
"eval_rougeL": 14.781, |
|
"eval_rougeLsum": 14.7204, |
|
"eval_runtime": 4.0736, |
|
"eval_samples_per_second": 16.447, |
|
"eval_steps_per_second": 1.473, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 18.73134328358209, |
|
"eval_loss": 0.6852018237113953, |
|
"eval_rouge1": 15.1627, |
|
"eval_rouge2": 6.3116, |
|
"eval_rougeL": 14.7187, |
|
"eval_rougeLsum": 14.6983, |
|
"eval_runtime": 4.0455, |
|
"eval_samples_per_second": 16.561, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 2.2426470588235296e-05, |
|
"loss": 0.8504, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 18.73134328358209, |
|
"eval_loss": 0.6580936312675476, |
|
"eval_rouge1": 16.0795, |
|
"eval_rouge2": 6.9284, |
|
"eval_rougeL": 15.3516, |
|
"eval_rougeLsum": 15.3571, |
|
"eval_runtime": 4.0423, |
|
"eval_samples_per_second": 16.575, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_gen_len": 18.91044776119403, |
|
"eval_loss": 0.6391794681549072, |
|
"eval_rouge1": 16.0518, |
|
"eval_rouge2": 6.9377, |
|
"eval_rougeL": 15.3914, |
|
"eval_rougeLsum": 15.3748, |
|
"eval_runtime": 4.0453, |
|
"eval_samples_per_second": 16.562, |
|
"eval_steps_per_second": 1.483, |
|
"step": 1904 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 1.323529411764706e-05, |
|
"loss": 0.7841, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 18.82089552238806, |
|
"eval_loss": 0.6258341073989868, |
|
"eval_rouge1": 16.1307, |
|
"eval_rouge2": 7.6286, |
|
"eval_rougeL": 15.7398, |
|
"eval_rougeLsum": 15.7627, |
|
"eval_runtime": 4.0536, |
|
"eval_samples_per_second": 16.529, |
|
"eval_steps_per_second": 1.48, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_gen_len": 18.73134328358209, |
|
"eval_loss": 0.6200478672981262, |
|
"eval_rouge1": 15.9488, |
|
"eval_rouge2": 7.4447, |
|
"eval_rougeL": 15.5654, |
|
"eval_rougeLsum": 15.583, |
|
"eval_runtime": 4.042, |
|
"eval_samples_per_second": 16.576, |
|
"eval_steps_per_second": 1.484, |
|
"step": 2448 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 4.044117647058824e-06, |
|
"loss": 0.7599, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_gen_len": 18.73134328358209, |
|
"eval_loss": 0.6162260174751282, |
|
"eval_rouge1": 15.9418, |
|
"eval_rouge2": 7.4447, |
|
"eval_rougeL": 15.5655, |
|
"eval_rougeLsum": 15.5835, |
|
"eval_runtime": 4.0403, |
|
"eval_samples_per_second": 16.583, |
|
"eval_steps_per_second": 1.485, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 2720, |
|
"total_flos": 1177555285370880.0, |
|
"train_loss": 0.9648406533633962, |
|
"train_runtime": 1843.1414, |
|
"train_samples_per_second": 17.693, |
|
"train_steps_per_second": 1.476 |
|
} |
|
], |
|
"max_steps": 2720, |
|
"num_train_epochs": 10, |
|
"total_flos": 1177555285370880.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|