|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.999156296140055, |
|
"eval_steps": 500, |
|
"global_step": 9480, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.8955696202531647e-05, |
|
"loss": 2.9241, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.7900843881856543e-05, |
|
"loss": 2.7783, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.6845991561181435e-05, |
|
"loss": 2.7342, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.579113924050633e-05, |
|
"loss": 2.6968, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.8701, |
|
"eval_gen_len": 19.4437588989084, |
|
"eval_loss": 2.538452386856079, |
|
"eval_precision": 0.8766, |
|
"eval_recall": 0.8641, |
|
"eval_rouge1": 0.2634, |
|
"eval_rouge2": 0.0907, |
|
"eval_rougeL": 0.218, |
|
"eval_rougeLsum": 0.2182, |
|
"eval_runtime": 339.9314, |
|
"eval_samples_per_second": 6.198, |
|
"eval_steps_per_second": 1.55, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.4736286919831224e-05, |
|
"loss": 2.6435, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.368354430379747e-05, |
|
"loss": 2.4844, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.2628691983122363e-05, |
|
"loss": 2.4626, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.1573839662447259e-05, |
|
"loss": 2.4676, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.0523206751054854e-05, |
|
"loss": 2.4746, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.8712, |
|
"eval_gen_len": 19.25723777883246, |
|
"eval_loss": 2.507711410522461, |
|
"eval_precision": 0.8774, |
|
"eval_recall": 0.8655, |
|
"eval_rouge1": 0.273, |
|
"eval_rouge2": 0.0941, |
|
"eval_rougeL": 0.2238, |
|
"eval_rougeLsum": 0.2239, |
|
"eval_runtime": 335.7013, |
|
"eval_samples_per_second": 6.276, |
|
"eval_steps_per_second": 1.57, |
|
"step": 4741 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 9.468354430379748e-06, |
|
"loss": 2.3813, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 8.415611814345991e-06, |
|
"loss": 2.3173, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.360759493670887e-06, |
|
"loss": 2.3064, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 6.305907172995781e-06, |
|
"loss": 2.3008, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 5.251054852320675e-06, |
|
"loss": 2.3066, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.8696, |
|
"eval_gen_len": 19.307071665875654, |
|
"eval_loss": 2.501197099685669, |
|
"eval_precision": 0.8756, |
|
"eval_recall": 0.864, |
|
"eval_rouge1": 0.2671, |
|
"eval_rouge2": 0.0936, |
|
"eval_rougeL": 0.221, |
|
"eval_rougeLsum": 0.2211, |
|
"eval_runtime": 336.0052, |
|
"eval_samples_per_second": 6.271, |
|
"eval_steps_per_second": 1.568, |
|
"step": 7111 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 4.19620253164557e-06, |
|
"loss": 2.2216, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.141350210970465e-06, |
|
"loss": 2.2071, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.0864978902953587e-06, |
|
"loss": 2.1852, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.0316455696202532e-06, |
|
"loss": 2.2041, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.8705, |
|
"eval_gen_len": 19.35548172757475, |
|
"eval_loss": 2.503497838973999, |
|
"eval_precision": 0.8768, |
|
"eval_recall": 0.8648, |
|
"eval_rouge1": 0.2709, |
|
"eval_rouge2": 0.0948, |
|
"eval_rougeL": 0.2244, |
|
"eval_rougeLsum": 0.2244, |
|
"eval_runtime": 336.6782, |
|
"eval_samples_per_second": 6.258, |
|
"eval_steps_per_second": 1.565, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 9480, |
|
"total_flos": 2.372598264346706e+17, |
|
"train_loss": 2.436967770862177, |
|
"train_runtime": 10313.084, |
|
"train_samples_per_second": 14.709, |
|
"train_steps_per_second": 0.919 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 9480, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 2.372598264346706e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|