|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.8761073308512004, |
|
"global_step": 5600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 3.641, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 1.9178, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 1.7533, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.9066718816757202, |
|
"eval_rouge2_fmeasure": 0.1762, |
|
"eval_rouge2_precision": 0.1696, |
|
"eval_rouge2_recall": 0.2103, |
|
"eval_runtime": 1724.6841, |
|
"eval_samples_per_second": 0.252, |
|
"eval_steps_per_second": 0.252, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.2800000000000001e-05, |
|
"loss": 1.6872, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.6179, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 1.6207, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 1.9220949411392212, |
|
"eval_rouge2_fmeasure": 0.1758, |
|
"eval_rouge2_precision": 0.1798, |
|
"eval_rouge2_recall": 0.1946, |
|
"eval_runtime": 1498.1119, |
|
"eval_samples_per_second": 0.29, |
|
"eval_steps_per_second": 0.29, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.917069799585349e-05, |
|
"loss": 1.6178, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.806496199032481e-05, |
|
"loss": 1.5584, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.695922598479613e-05, |
|
"loss": 1.335, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.585348997926745e-05, |
|
"loss": 1.3719, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 1.7529886960983276, |
|
"eval_rouge2_fmeasure": 0.2175, |
|
"eval_rouge2_precision": 0.2233, |
|
"eval_rouge2_recall": 0.2326, |
|
"eval_runtime": 1473.4895, |
|
"eval_samples_per_second": 0.295, |
|
"eval_steps_per_second": 0.295, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.4747753973738772e-05, |
|
"loss": 1.3211, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.3642017968210091e-05, |
|
"loss": 1.3276, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.2536281962681412e-05, |
|
"loss": 1.2702, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 1.668319821357727, |
|
"eval_rouge2_fmeasure": 0.2329, |
|
"eval_rouge2_precision": 0.2476, |
|
"eval_rouge2_recall": 0.2394, |
|
"eval_runtime": 1458.0732, |
|
"eval_samples_per_second": 0.298, |
|
"eval_steps_per_second": 0.298, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.143054595715273e-05, |
|
"loss": 1.2236, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.0324809951624052e-05, |
|
"loss": 1.2533, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 9.21907394609537e-06, |
|
"loss": 1.2119, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 1.6909013986587524, |
|
"eval_rouge2_fmeasure": 0.2427, |
|
"eval_rouge2_precision": 0.2552, |
|
"eval_rouge2_recall": 0.2507, |
|
"eval_runtime": 1496.8818, |
|
"eval_samples_per_second": 0.29, |
|
"eval_steps_per_second": 0.29, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 8.11333794056669e-06, |
|
"loss": 1.0075, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.00760193503801e-06, |
|
"loss": 0.9473, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 5.901865929509331e-06, |
|
"loss": 0.9265, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 4.796129923980651e-06, |
|
"loss": 0.9583, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_loss": 1.6477222442626953, |
|
"eval_rouge2_fmeasure": 0.253, |
|
"eval_rouge2_precision": 0.2591, |
|
"eval_rouge2_recall": 0.2655, |
|
"eval_runtime": 1466.7648, |
|
"eval_samples_per_second": 0.296, |
|
"eval_steps_per_second": 0.296, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 3.69039391845197e-06, |
|
"loss": 0.8958, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.5846579129232894e-06, |
|
"loss": 0.9714, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.4789219073946095e-06, |
|
"loss": 0.8989, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_loss": 1.6045594215393066, |
|
"eval_rouge2_fmeasure": 0.264, |
|
"eval_rouge2_precision": 0.2695, |
|
"eval_rouge2_recall": 0.2781, |
|
"eval_runtime": 1496.2313, |
|
"eval_samples_per_second": 0.29, |
|
"eval_steps_per_second": 0.29, |
|
"step": 5600 |
|
} |
|
], |
|
"max_steps": 5841, |
|
"num_train_epochs": 3, |
|
"total_flos": 5.3748207477325824e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|