{ "best_metric": 0.9336426914153132, "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1311", "epoch": 10.0, "eval_steps": 500, "global_step": 4370, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.9984720788100354, "eval_f1": 0.9122645842903078, "eval_loss": 0.004654619377106428, "eval_precision": 0.8994565217391305, "eval_recall": 0.9254426840633737, "eval_runtime": 13.3719, "eval_samples_per_second": 519.448, "eval_steps_per_second": 64.987, "step": 437 }, { "epoch": 1.1441647597254005, "grad_norm": 0.35407835245132446, "learning_rate": 4.4279176201373e-05, "loss": 0.0144, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.9984511483827756, "eval_f1": 0.9131627056672761, "eval_loss": 0.005305842496454716, "eval_precision": 0.8959641255605382, "eval_recall": 0.9310344827586207, "eval_runtime": 13.2738, "eval_samples_per_second": 523.286, "eval_steps_per_second": 65.467, "step": 874 }, { "epoch": 2.288329519450801, "grad_norm": 0.008026196621358395, "learning_rate": 3.8558352402745995e-05, "loss": 0.0038, "step": 1000 }, { "epoch": 3.0, "eval_accuracy": 0.9987999888371054, "eval_f1": 0.9336426914153132, "eval_loss": 0.004605981521308422, "eval_precision": 0.9297597042513863, "eval_recall": 0.9375582479030755, "eval_runtime": 13.2443, "eval_samples_per_second": 524.45, "eval_steps_per_second": 65.613, "step": 1311 }, { "epoch": 3.4324942791762014, "grad_norm": 0.13839516043663025, "learning_rate": 3.2837528604119e-05, "loss": 0.0022, "step": 1500 }, { "epoch": 4.0, "eval_accuracy": 0.9985627773281612, "eval_f1": 0.9223616922361693, "eval_loss": 0.005482749082148075, "eval_precision": 0.9202226345083488, "eval_recall": 0.9245107176141659, "eval_runtime": 13.4564, "eval_samples_per_second": 516.186, "eval_steps_per_second": 64.579, "step": 1748 }, { "epoch": 4.576659038901602, "grad_norm": 0.01504958514124155, "learning_rate": 2.7116704805491993e-05, "loss": 0.0019, "step": 2000 }, { "epoch": 5.0, "eval_accuracy": 0.9985627773281612, "eval_f1": 0.9231477220432582, "eval_loss": 0.005268561653792858, "eval_precision": 0.9118181818181819, "eval_recall": 0.934762348555452, "eval_runtime": 13.2814, "eval_samples_per_second": 522.986, "eval_steps_per_second": 65.43, "step": 2185 }, { "epoch": 5.720823798627002, "grad_norm": 0.11815565079450607, "learning_rate": 2.139588100686499e-05, "loss": 0.0014, "step": 2500 }, { "epoch": 6.0, "eval_accuracy": 0.998618591800854, "eval_f1": 0.92243381328379, "eval_loss": 0.005419280380010605, "eval_precision": 0.9194444444444444, "eval_recall": 0.9254426840633737, "eval_runtime": 13.1103, "eval_samples_per_second": 529.811, "eval_steps_per_second": 66.284, "step": 2622 }, { "epoch": 6.864988558352403, "grad_norm": 0.012974879704415798, "learning_rate": 1.5675057208237986e-05, "loss": 0.0009, "step": 3000 }, { "epoch": 7.0, "eval_accuracy": 0.9986255686099406, "eval_f1": 0.9289055191768008, "eval_loss": 0.007274709176272154, "eval_precision": 0.9323943661971831, "eval_recall": 0.9254426840633737, "eval_runtime": 13.1693, "eval_samples_per_second": 527.439, "eval_steps_per_second": 65.987, "step": 3059 }, { "epoch": 8.0, "eval_accuracy": 0.9986604526553735, "eval_f1": 0.9297752808988765, "eval_loss": 0.006545887794345617, "eval_precision": 0.9341486359360301, "eval_recall": 0.9254426840633737, "eval_runtime": 13.1956, "eval_samples_per_second": 526.386, "eval_steps_per_second": 65.855, "step": 3496 }, { "epoch": 8.009153318077804, "grad_norm": 0.0015736627392470837, "learning_rate": 9.954233409610985e-06, "loss": 0.0005, "step": 3500 }, { "epoch": 9.0, "eval_accuracy": 0.9986674294644602, "eval_f1": 0.930905695611578, "eval_loss": 0.006908744107931852, "eval_precision": 0.9326473339569691, "eval_recall": 0.9291705498602051, "eval_runtime": 13.5052, "eval_samples_per_second": 514.321, "eval_steps_per_second": 64.346, "step": 3933 }, { "epoch": 9.153318077803204, "grad_norm": 0.0005491800257004797, "learning_rate": 4.233409610983982e-06, "loss": 0.0004, "step": 4000 }, { "epoch": 10.0, "eval_accuracy": 0.9986534758462869, "eval_f1": 0.9270106927010694, "eval_loss": 0.00707679707556963, "eval_precision": 0.924860853432282, "eval_recall": 0.9291705498602051, "eval_runtime": 13.4189, "eval_samples_per_second": 517.629, "eval_steps_per_second": 64.76, "step": 4370 }, { "epoch": 10.0, "step": 4370, "total_flos": 1.1151464037050934e+16, "train_loss": 0.002938754050150616, "train_runtime": 1039.0289, "train_samples_per_second": 269.165, "train_steps_per_second": 4.206 } ], "logging_steps": 500, "max_steps": 4370, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1151464037050934e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }