|
{ |
|
"best_metric": 0.9309701492537313, |
|
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1972", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 4930, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9986744062735468, |
|
"eval_f1": 0.9266697804764128, |
|
"eval_loss": 0.004987192340195179, |
|
"eval_precision": 0.9288389513108615, |
|
"eval_recall": 0.9245107176141659, |
|
"eval_runtime": 13.1382, |
|
"eval_samples_per_second": 528.689, |
|
"eval_steps_per_second": 66.143, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.0141987829614605, |
|
"grad_norm": 0.12838158011436462, |
|
"learning_rate": 4.4929006085192696e-05, |
|
"loss": 0.018, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.998444171573689, |
|
"eval_f1": 0.9146567717996289, |
|
"eval_loss": 0.0057116178795695305, |
|
"eval_precision": 0.9104339796860572, |
|
"eval_recall": 0.918918918918919, |
|
"eval_runtime": 13.1923, |
|
"eval_samples_per_second": 526.518, |
|
"eval_steps_per_second": 65.872, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 2.028397565922921, |
|
"grad_norm": 0.014988560229539871, |
|
"learning_rate": 3.98580121703854e-05, |
|
"loss": 0.0044, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9984930092372952, |
|
"eval_f1": 0.9260480452190296, |
|
"eval_loss": 0.00787710677832365, |
|
"eval_precision": 0.9361904761904762, |
|
"eval_recall": 0.9161230195712954, |
|
"eval_runtime": 13.2289, |
|
"eval_samples_per_second": 525.064, |
|
"eval_steps_per_second": 65.69, |
|
"step": 1479 |
|
}, |
|
{ |
|
"epoch": 3.0425963488843815, |
|
"grad_norm": 0.24877521395683289, |
|
"learning_rate": 3.47870182555781e-05, |
|
"loss": 0.0023, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9986953367008066, |
|
"eval_f1": 0.9309701492537313, |
|
"eval_loss": 0.005731322802603245, |
|
"eval_precision": 0.9318394024276377, |
|
"eval_recall": 0.9301025163094129, |
|
"eval_runtime": 13.4618, |
|
"eval_samples_per_second": 515.978, |
|
"eval_steps_per_second": 64.553, |
|
"step": 1972 |
|
}, |
|
{ |
|
"epoch": 4.056795131845842, |
|
"grad_norm": 0.003771956777200103, |
|
"learning_rate": 2.9716024340770794e-05, |
|
"loss": 0.0014, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9985837077554209, |
|
"eval_f1": 0.9213587715216379, |
|
"eval_loss": 0.007000353187322617, |
|
"eval_precision": 0.9200743494423792, |
|
"eval_recall": 0.9226467847157502, |
|
"eval_runtime": 13.1965, |
|
"eval_samples_per_second": 526.351, |
|
"eval_steps_per_second": 65.851, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 5.070993914807302, |
|
"grad_norm": 0.006697970442473888, |
|
"learning_rate": 2.4645030425963488e-05, |
|
"loss": 0.0008, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9985209164736416, |
|
"eval_f1": 0.9185938945420906, |
|
"eval_loss": 0.008161710575222969, |
|
"eval_precision": 0.9118457300275482, |
|
"eval_recall": 0.9254426840633737, |
|
"eval_runtime": 13.1313, |
|
"eval_samples_per_second": 528.965, |
|
"eval_steps_per_second": 66.178, |
|
"step": 2958 |
|
}, |
|
{ |
|
"epoch": 6.085192697768763, |
|
"grad_norm": 0.0909859761595726, |
|
"learning_rate": 1.957403651115619e-05, |
|
"loss": 0.0006, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.998618591800854, |
|
"eval_f1": 0.9281767955801105, |
|
"eval_loss": 0.00739107932895422, |
|
"eval_precision": 0.9171974522292994, |
|
"eval_recall": 0.9394221808014911, |
|
"eval_runtime": 13.1688, |
|
"eval_samples_per_second": 527.459, |
|
"eval_steps_per_second": 65.989, |
|
"step": 3451 |
|
}, |
|
{ |
|
"epoch": 7.099391480730223, |
|
"grad_norm": 0.011905203573405743, |
|
"learning_rate": 1.4503042596348884e-05, |
|
"loss": 0.0003, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9985069628554685, |
|
"eval_f1": 0.9232201023731968, |
|
"eval_loss": 0.008491172455251217, |
|
"eval_precision": 0.9219330855018587, |
|
"eval_recall": 0.9245107176141659, |
|
"eval_runtime": 13.2155, |
|
"eval_samples_per_second": 525.595, |
|
"eval_steps_per_second": 65.756, |
|
"step": 3944 |
|
}, |
|
{ |
|
"epoch": 8.113590263691684, |
|
"grad_norm": 0.00037644465919584036, |
|
"learning_rate": 9.432048681541583e-06, |
|
"loss": 0.0003, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9985348700918149, |
|
"eval_f1": 0.9233610341643582, |
|
"eval_loss": 0.008594635874032974, |
|
"eval_precision": 0.9149130832570905, |
|
"eval_recall": 0.9319664492078286, |
|
"eval_runtime": 13.5474, |
|
"eval_samples_per_second": 512.72, |
|
"eval_steps_per_second": 64.145, |
|
"step": 4437 |
|
}, |
|
{ |
|
"epoch": 9.127789046653144, |
|
"grad_norm": 0.053812168538570404, |
|
"learning_rate": 4.36105476673428e-06, |
|
"loss": 0.0002, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9985418469009014, |
|
"eval_f1": 0.9231481481481483, |
|
"eval_loss": 0.008874327875673771, |
|
"eval_precision": 0.9172033118675254, |
|
"eval_recall": 0.9291705498602051, |
|
"eval_runtime": 13.6739, |
|
"eval_samples_per_second": 507.975, |
|
"eval_steps_per_second": 63.552, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 4930, |
|
"total_flos": 1.3595017549497408e+16, |
|
"train_loss": 0.0028862289850114567, |
|
"train_runtime": 1222.2462, |
|
"train_samples_per_second": 258.017, |
|
"train_steps_per_second": 4.034 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4930, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3595017549497408e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|