Rodrigo1771's picture
Training in progress, epoch 1
dd16367 verified
raw
history blame
6.49 kB
{
"best_metric": 0.45455732567249935,
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2705",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 5410,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9242144177449169,
"grad_norm": 1.4528056383132935,
"learning_rate": 4.537892791127542e-05,
"loss": 0.3191,
"step": 500
},
{
"epoch": 1.0,
"eval_accuracy": 0.8443022505389485,
"eval_f1": 0.407486125870823,
"eval_loss": 0.47723615169525146,
"eval_precision": 0.27250473783954515,
"eval_recall": 0.8074403369209172,
"eval_runtime": 14.3908,
"eval_samples_per_second": 473.218,
"eval_steps_per_second": 59.204,
"step": 541
},
{
"epoch": 1.8484288354898335,
"grad_norm": 1.5587466955184937,
"learning_rate": 4.075785582255083e-05,
"loss": 0.1619,
"step": 1000
},
{
"epoch": 2.0,
"eval_accuracy": 0.8552871874442172,
"eval_f1": 0.43975123088883133,
"eval_loss": 0.4583655595779419,
"eval_precision": 0.30406737143881024,
"eval_recall": 0.7941038839494619,
"eval_runtime": 14.212,
"eval_samples_per_second": 479.173,
"eval_steps_per_second": 59.949,
"step": 1082
},
{
"epoch": 2.7726432532347505,
"grad_norm": 0.9616082310676575,
"learning_rate": 3.613678373382625e-05,
"loss": 0.11,
"step": 1500
},
{
"epoch": 3.0,
"eval_accuracy": 0.8435470361267112,
"eval_f1": 0.4338006724608259,
"eval_loss": 0.6447410583496094,
"eval_precision": 0.29758899817216466,
"eval_recall": 0.7999532054281703,
"eval_runtime": 14.3233,
"eval_samples_per_second": 475.451,
"eval_steps_per_second": 59.484,
"step": 1623
},
{
"epoch": 3.6968576709796674,
"grad_norm": 1.4018645286560059,
"learning_rate": 3.1515711645101665e-05,
"loss": 0.0764,
"step": 2000
},
{
"epoch": 4.0,
"eval_accuracy": 0.8398602166778805,
"eval_f1": 0.42338430558177587,
"eval_loss": 0.7413247227668762,
"eval_precision": 0.2895756219333735,
"eval_recall": 0.7870846981750117,
"eval_runtime": 14.5396,
"eval_samples_per_second": 468.375,
"eval_steps_per_second": 58.598,
"step": 2164
},
{
"epoch": 4.621072088724584,
"grad_norm": 1.0904265642166138,
"learning_rate": 2.6894639556377083e-05,
"loss": 0.0567,
"step": 2500
},
{
"epoch": 5.0,
"eval_accuracy": 0.8564886649182308,
"eval_f1": 0.45455732567249935,
"eval_loss": 0.7005925178527832,
"eval_precision": 0.3152508603513856,
"eval_recall": 0.8144595226953674,
"eval_runtime": 14.2723,
"eval_samples_per_second": 477.148,
"eval_steps_per_second": 59.696,
"step": 2705
},
{
"epoch": 5.545286506469501,
"grad_norm": 1.240962028503418,
"learning_rate": 2.2273567467652497e-05,
"loss": 0.0428,
"step": 3000
},
{
"epoch": 6.0,
"eval_accuracy": 0.8504057561069384,
"eval_f1": 0.44700636942675154,
"eval_loss": 0.8111857175827026,
"eval_precision": 0.30710659898477155,
"eval_recall": 0.8210107627515209,
"eval_runtime": 14.3991,
"eval_samples_per_second": 472.946,
"eval_steps_per_second": 59.17,
"step": 3246
},
{
"epoch": 6.469500924214418,
"grad_norm": 0.44737720489501953,
"learning_rate": 1.7652495378927914e-05,
"loss": 0.0332,
"step": 3500
},
{
"epoch": 7.0,
"eval_accuracy": 0.8532961676301372,
"eval_f1": 0.4493518337567586,
"eval_loss": 0.904643714427948,
"eval_precision": 0.3113658932924077,
"eval_recall": 0.8069723912026205,
"eval_runtime": 14.3036,
"eval_samples_per_second": 476.105,
"eval_steps_per_second": 59.566,
"step": 3787
},
{
"epoch": 7.393715341959335,
"grad_norm": 0.7116318941116333,
"learning_rate": 1.3031423290203328e-05,
"loss": 0.0257,
"step": 4000
},
{
"epoch": 8.0,
"eval_accuracy": 0.8481538440413583,
"eval_f1": 0.44435897435897437,
"eval_loss": 0.9722912907600403,
"eval_precision": 0.30602154335158044,
"eval_recall": 0.8109499298081423,
"eval_runtime": 14.4222,
"eval_samples_per_second": 472.19,
"eval_steps_per_second": 59.076,
"step": 4328
},
{
"epoch": 8.317929759704251,
"grad_norm": 0.9520462155342102,
"learning_rate": 8.410351201478742e-06,
"loss": 0.022,
"step": 4500
},
{
"epoch": 9.0,
"eval_accuracy": 0.850186057368833,
"eval_f1": 0.44668737060041414,
"eval_loss": 1.002764105796814,
"eval_precision": 0.30871042747272404,
"eval_recall": 0.8076743097800655,
"eval_runtime": 14.2485,
"eval_samples_per_second": 477.944,
"eval_steps_per_second": 59.796,
"step": 4869
},
{
"epoch": 9.242144177449168,
"grad_norm": 0.6707109212875366,
"learning_rate": 3.789279112754159e-06,
"loss": 0.0181,
"step": 5000
},
{
"epoch": 10.0,
"eval_accuracy": 0.8533304955579661,
"eval_f1": 0.45038613797131544,
"eval_loss": 1.0022608041763306,
"eval_precision": 0.31162999550965426,
"eval_recall": 0.8118858212447356,
"eval_runtime": 14.4559,
"eval_samples_per_second": 471.089,
"eval_steps_per_second": 58.938,
"step": 5410
},
{
"epoch": 10.0,
"step": 5410,
"total_flos": 1.7176580067661056e+16,
"train_loss": 0.0812657987344287,
"train_runtime": 1549.44,
"train_samples_per_second": 223.332,
"train_steps_per_second": 3.492
}
],
"logging_steps": 500,
"max_steps": 5410,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.7176580067661056e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}