|
{ |
|
"best_metric": 0.9336426914153132, |
|
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1311", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 4370, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9984720788100354, |
|
"eval_f1": 0.9122645842903078, |
|
"eval_loss": 0.004654619377106428, |
|
"eval_precision": 0.8994565217391305, |
|
"eval_recall": 0.9254426840633737, |
|
"eval_runtime": 13.3719, |
|
"eval_samples_per_second": 519.448, |
|
"eval_steps_per_second": 64.987, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.1441647597254005, |
|
"grad_norm": 0.35407835245132446, |
|
"learning_rate": 4.4279176201373e-05, |
|
"loss": 0.0144, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9984511483827756, |
|
"eval_f1": 0.9131627056672761, |
|
"eval_loss": 0.005305842496454716, |
|
"eval_precision": 0.8959641255605382, |
|
"eval_recall": 0.9310344827586207, |
|
"eval_runtime": 13.2738, |
|
"eval_samples_per_second": 523.286, |
|
"eval_steps_per_second": 65.467, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 2.288329519450801, |
|
"grad_norm": 0.008026196621358395, |
|
"learning_rate": 3.8558352402745995e-05, |
|
"loss": 0.0038, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9987999888371054, |
|
"eval_f1": 0.9336426914153132, |
|
"eval_loss": 0.004605981521308422, |
|
"eval_precision": 0.9297597042513863, |
|
"eval_recall": 0.9375582479030755, |
|
"eval_runtime": 13.2443, |
|
"eval_samples_per_second": 524.45, |
|
"eval_steps_per_second": 65.613, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 3.4324942791762014, |
|
"grad_norm": 0.13839516043663025, |
|
"learning_rate": 3.2837528604119e-05, |
|
"loss": 0.0022, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9985627773281612, |
|
"eval_f1": 0.9223616922361693, |
|
"eval_loss": 0.005482749082148075, |
|
"eval_precision": 0.9202226345083488, |
|
"eval_recall": 0.9245107176141659, |
|
"eval_runtime": 13.4564, |
|
"eval_samples_per_second": 516.186, |
|
"eval_steps_per_second": 64.579, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 4.576659038901602, |
|
"grad_norm": 0.01504958514124155, |
|
"learning_rate": 2.7116704805491993e-05, |
|
"loss": 0.0019, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9985627773281612, |
|
"eval_f1": 0.9231477220432582, |
|
"eval_loss": 0.005268561653792858, |
|
"eval_precision": 0.9118181818181819, |
|
"eval_recall": 0.934762348555452, |
|
"eval_runtime": 13.2814, |
|
"eval_samples_per_second": 522.986, |
|
"eval_steps_per_second": 65.43, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 5.720823798627002, |
|
"grad_norm": 0.11815565079450607, |
|
"learning_rate": 2.139588100686499e-05, |
|
"loss": 0.0014, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.998618591800854, |
|
"eval_f1": 0.92243381328379, |
|
"eval_loss": 0.005419280380010605, |
|
"eval_precision": 0.9194444444444444, |
|
"eval_recall": 0.9254426840633737, |
|
"eval_runtime": 13.1103, |
|
"eval_samples_per_second": 529.811, |
|
"eval_steps_per_second": 66.284, |
|
"step": 2622 |
|
}, |
|
{ |
|
"epoch": 6.864988558352403, |
|
"grad_norm": 0.012974879704415798, |
|
"learning_rate": 1.5675057208237986e-05, |
|
"loss": 0.0009, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9986255686099406, |
|
"eval_f1": 0.9289055191768008, |
|
"eval_loss": 0.007274709176272154, |
|
"eval_precision": 0.9323943661971831, |
|
"eval_recall": 0.9254426840633737, |
|
"eval_runtime": 13.1693, |
|
"eval_samples_per_second": 527.439, |
|
"eval_steps_per_second": 65.987, |
|
"step": 3059 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9986604526553735, |
|
"eval_f1": 0.9297752808988765, |
|
"eval_loss": 0.006545887794345617, |
|
"eval_precision": 0.9341486359360301, |
|
"eval_recall": 0.9254426840633737, |
|
"eval_runtime": 13.1956, |
|
"eval_samples_per_second": 526.386, |
|
"eval_steps_per_second": 65.855, |
|
"step": 3496 |
|
}, |
|
{ |
|
"epoch": 8.009153318077804, |
|
"grad_norm": 0.0015736627392470837, |
|
"learning_rate": 9.954233409610985e-06, |
|
"loss": 0.0005, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9986674294644602, |
|
"eval_f1": 0.930905695611578, |
|
"eval_loss": 0.006908744107931852, |
|
"eval_precision": 0.9326473339569691, |
|
"eval_recall": 0.9291705498602051, |
|
"eval_runtime": 13.5052, |
|
"eval_samples_per_second": 514.321, |
|
"eval_steps_per_second": 64.346, |
|
"step": 3933 |
|
}, |
|
{ |
|
"epoch": 9.153318077803204, |
|
"grad_norm": 0.0005491800257004797, |
|
"learning_rate": 4.233409610983982e-06, |
|
"loss": 0.0004, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9986534758462869, |
|
"eval_f1": 0.9270106927010694, |
|
"eval_loss": 0.00707679707556963, |
|
"eval_precision": 0.924860853432282, |
|
"eval_recall": 0.9291705498602051, |
|
"eval_runtime": 13.4189, |
|
"eval_samples_per_second": 517.629, |
|
"eval_steps_per_second": 64.76, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 4370, |
|
"total_flos": 1.1151464037050934e+16, |
|
"train_loss": 0.002938754050150616, |
|
"train_runtime": 1039.0289, |
|
"train_samples_per_second": 269.165, |
|
"train_steps_per_second": 4.206 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4370, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1151464037050934e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|