|
{ |
|
"best_metric": 0.45455732567249935, |
|
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2705", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 5410, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9242144177449169, |
|
"grad_norm": 1.4528056383132935, |
|
"learning_rate": 4.537892791127542e-05, |
|
"loss": 0.3191, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8443022505389485, |
|
"eval_f1": 0.407486125870823, |
|
"eval_loss": 0.47723615169525146, |
|
"eval_precision": 0.27250473783954515, |
|
"eval_recall": 0.8074403369209172, |
|
"eval_runtime": 14.3908, |
|
"eval_samples_per_second": 473.218, |
|
"eval_steps_per_second": 59.204, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.8484288354898335, |
|
"grad_norm": 1.5587466955184937, |
|
"learning_rate": 4.075785582255083e-05, |
|
"loss": 0.1619, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8552871874442172, |
|
"eval_f1": 0.43975123088883133, |
|
"eval_loss": 0.4583655595779419, |
|
"eval_precision": 0.30406737143881024, |
|
"eval_recall": 0.7941038839494619, |
|
"eval_runtime": 14.212, |
|
"eval_samples_per_second": 479.173, |
|
"eval_steps_per_second": 59.949, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 2.7726432532347505, |
|
"grad_norm": 0.9616082310676575, |
|
"learning_rate": 3.613678373382625e-05, |
|
"loss": 0.11, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8435470361267112, |
|
"eval_f1": 0.4338006724608259, |
|
"eval_loss": 0.6447410583496094, |
|
"eval_precision": 0.29758899817216466, |
|
"eval_recall": 0.7999532054281703, |
|
"eval_runtime": 14.3233, |
|
"eval_samples_per_second": 475.451, |
|
"eval_steps_per_second": 59.484, |
|
"step": 1623 |
|
}, |
|
{ |
|
"epoch": 3.6968576709796674, |
|
"grad_norm": 1.4018645286560059, |
|
"learning_rate": 3.1515711645101665e-05, |
|
"loss": 0.0764, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8398602166778805, |
|
"eval_f1": 0.42338430558177587, |
|
"eval_loss": 0.7413247227668762, |
|
"eval_precision": 0.2895756219333735, |
|
"eval_recall": 0.7870846981750117, |
|
"eval_runtime": 14.5396, |
|
"eval_samples_per_second": 468.375, |
|
"eval_steps_per_second": 58.598, |
|
"step": 2164 |
|
}, |
|
{ |
|
"epoch": 4.621072088724584, |
|
"grad_norm": 1.0904265642166138, |
|
"learning_rate": 2.6894639556377083e-05, |
|
"loss": 0.0567, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8564886649182308, |
|
"eval_f1": 0.45455732567249935, |
|
"eval_loss": 0.7005925178527832, |
|
"eval_precision": 0.3152508603513856, |
|
"eval_recall": 0.8144595226953674, |
|
"eval_runtime": 14.2723, |
|
"eval_samples_per_second": 477.148, |
|
"eval_steps_per_second": 59.696, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 5.545286506469501, |
|
"grad_norm": 1.240962028503418, |
|
"learning_rate": 2.2273567467652497e-05, |
|
"loss": 0.0428, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8504057561069384, |
|
"eval_f1": 0.44700636942675154, |
|
"eval_loss": 0.8111857175827026, |
|
"eval_precision": 0.30710659898477155, |
|
"eval_recall": 0.8210107627515209, |
|
"eval_runtime": 14.3991, |
|
"eval_samples_per_second": 472.946, |
|
"eval_steps_per_second": 59.17, |
|
"step": 3246 |
|
}, |
|
{ |
|
"epoch": 6.469500924214418, |
|
"grad_norm": 0.44737720489501953, |
|
"learning_rate": 1.7652495378927914e-05, |
|
"loss": 0.0332, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8532961676301372, |
|
"eval_f1": 0.4493518337567586, |
|
"eval_loss": 0.904643714427948, |
|
"eval_precision": 0.3113658932924077, |
|
"eval_recall": 0.8069723912026205, |
|
"eval_runtime": 14.3036, |
|
"eval_samples_per_second": 476.105, |
|
"eval_steps_per_second": 59.566, |
|
"step": 3787 |
|
}, |
|
{ |
|
"epoch": 7.393715341959335, |
|
"grad_norm": 0.7116318941116333, |
|
"learning_rate": 1.3031423290203328e-05, |
|
"loss": 0.0257, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8481538440413583, |
|
"eval_f1": 0.44435897435897437, |
|
"eval_loss": 0.9722912907600403, |
|
"eval_precision": 0.30602154335158044, |
|
"eval_recall": 0.8109499298081423, |
|
"eval_runtime": 14.4222, |
|
"eval_samples_per_second": 472.19, |
|
"eval_steps_per_second": 59.076, |
|
"step": 4328 |
|
}, |
|
{ |
|
"epoch": 8.317929759704251, |
|
"grad_norm": 0.9520462155342102, |
|
"learning_rate": 8.410351201478742e-06, |
|
"loss": 0.022, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.850186057368833, |
|
"eval_f1": 0.44668737060041414, |
|
"eval_loss": 1.002764105796814, |
|
"eval_precision": 0.30871042747272404, |
|
"eval_recall": 0.8076743097800655, |
|
"eval_runtime": 14.2485, |
|
"eval_samples_per_second": 477.944, |
|
"eval_steps_per_second": 59.796, |
|
"step": 4869 |
|
}, |
|
{ |
|
"epoch": 9.242144177449168, |
|
"grad_norm": 0.6707109212875366, |
|
"learning_rate": 3.789279112754159e-06, |
|
"loss": 0.0181, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8533304955579661, |
|
"eval_f1": 0.45038613797131544, |
|
"eval_loss": 1.0022608041763306, |
|
"eval_precision": 0.31162999550965426, |
|
"eval_recall": 0.8118858212447356, |
|
"eval_runtime": 14.4559, |
|
"eval_samples_per_second": 471.089, |
|
"eval_steps_per_second": 58.938, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 5410, |
|
"total_flos": 1.7176580067661056e+16, |
|
"train_loss": 0.0812657987344287, |
|
"train_runtime": 1549.44, |
|
"train_samples_per_second": 223.332, |
|
"train_steps_per_second": 3.492 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5410, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7176580067661056e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|