bsc-bio-ehr-es-distemist-ner / trainer_state.json
Rodrigo1771's picture
End of training
988fd5d verified
raw
history blame
6.25 kB
{
"best_metric": 0.8015293708724366,
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-3404",
"epoch": 9.988249118683902,
"eval_steps": 500,
"global_step": 4250,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9988249118683902,
"eval_accuracy": 0.9733340656624604,
"eval_f1": 0.7536426810132258,
"eval_loss": 0.07383445650339127,
"eval_precision": 0.7233218588640276,
"eval_recall": 0.786616752456715,
"eval_runtime": 14.6851,
"eval_samples_per_second": 463.736,
"eval_steps_per_second": 58.018,
"step": 425
},
{
"epoch": 1.1750881316098707,
"grad_norm": 1.1428656578063965,
"learning_rate": 4.411764705882353e-05,
"loss": 0.0996,
"step": 500
},
{
"epoch": 2.0,
"eval_accuracy": 0.9743364411550661,
"eval_f1": 0.7698492462311558,
"eval_loss": 0.07865303754806519,
"eval_precision": 0.7363811151463363,
"eval_recall": 0.8065044454843239,
"eval_runtime": 14.4143,
"eval_samples_per_second": 472.448,
"eval_steps_per_second": 59.108,
"step": 851
},
{
"epoch": 2.3501762632197414,
"grad_norm": 1.476723074913025,
"learning_rate": 3.8235294117647055e-05,
"loss": 0.0458,
"step": 1000
},
{
"epoch": 2.99882491186839,
"eval_accuracy": 0.9759155258351985,
"eval_f1": 0.7928563303378454,
"eval_loss": 0.07876282930374146,
"eval_precision": 0.7715297764002657,
"eval_recall": 0.8153954141319607,
"eval_runtime": 14.6794,
"eval_samples_per_second": 463.917,
"eval_steps_per_second": 58.041,
"step": 1276
},
{
"epoch": 3.525264394829612,
"grad_norm": 0.4094911515712738,
"learning_rate": 3.235294117647059e-05,
"loss": 0.0279,
"step": 1500
},
{
"epoch": 4.0,
"eval_accuracy": 0.9757026926826589,
"eval_f1": 0.7929102344196683,
"eval_loss": 0.09221930056810379,
"eval_precision": 0.775441735629613,
"eval_recall": 0.8111839026672906,
"eval_runtime": 14.825,
"eval_samples_per_second": 459.361,
"eval_steps_per_second": 57.471,
"step": 1702
},
{
"epoch": 4.700352526439483,
"grad_norm": 0.49960169196128845,
"learning_rate": 2.647058823529412e-05,
"loss": 0.0169,
"step": 2000
},
{
"epoch": 4.9988249118683905,
"eval_accuracy": 0.974439424938553,
"eval_f1": 0.7863421230561191,
"eval_loss": 0.09940312057733536,
"eval_precision": 0.7584782608695653,
"eval_recall": 0.816331305568554,
"eval_runtime": 14.3671,
"eval_samples_per_second": 473.999,
"eval_steps_per_second": 59.302,
"step": 2127
},
{
"epoch": 5.875440658049354,
"grad_norm": 0.6507154703140259,
"learning_rate": 2.058823529411765e-05,
"loss": 0.0114,
"step": 2500
},
{
"epoch": 6.0,
"eval_accuracy": 0.9764579070948961,
"eval_f1": 0.7909059593523942,
"eval_loss": 0.10803968459367752,
"eval_precision": 0.7765501691093574,
"eval_recall": 0.8058025269068788,
"eval_runtime": 14.5335,
"eval_samples_per_second": 468.572,
"eval_steps_per_second": 58.623,
"step": 2553
},
{
"epoch": 6.9988249118683905,
"eval_accuracy": 0.975997912861988,
"eval_f1": 0.7943099690260411,
"eval_loss": 0.11656877398490906,
"eval_precision": 0.7792032410533424,
"eval_recall": 0.810014038371549,
"eval_runtime": 14.6842,
"eval_samples_per_second": 463.764,
"eval_steps_per_second": 58.022,
"step": 2978
},
{
"epoch": 7.050528789659224,
"grad_norm": 0.15811629593372345,
"learning_rate": 1.4705882352941177e-05,
"loss": 0.0079,
"step": 3000
},
{
"epoch": 8.0,
"eval_accuracy": 0.9767668584453568,
"eval_f1": 0.8015293708724366,
"eval_loss": 0.1294233798980713,
"eval_precision": 0.7938948817994033,
"eval_recall": 0.8093121197941039,
"eval_runtime": 14.774,
"eval_samples_per_second": 460.946,
"eval_steps_per_second": 57.669,
"step": 3404
},
{
"epoch": 8.225616921269095,
"grad_norm": 0.11404519528150558,
"learning_rate": 8.823529411764707e-06,
"loss": 0.0053,
"step": 3500
},
{
"epoch": 8.99882491186839,
"eval_accuracy": 0.9766158155629093,
"eval_f1": 0.7988929889298894,
"eval_loss": 0.13398884236812592,
"eval_precision": 0.787630741246021,
"eval_recall": 0.8104819840898456,
"eval_runtime": 14.4437,
"eval_samples_per_second": 471.487,
"eval_steps_per_second": 58.988,
"step": 3829
},
{
"epoch": 9.400705052878966,
"grad_norm": 0.5636719465255737,
"learning_rate": 2.9411764705882355e-06,
"loss": 0.0038,
"step": 4000
},
{
"epoch": 9.988249118683902,
"eval_accuracy": 0.9766776058330014,
"eval_f1": 0.7988459319099828,
"eval_loss": 0.13674204051494598,
"eval_precision": 0.7882031427920747,
"eval_recall": 0.8097800655124006,
"eval_runtime": 14.8828,
"eval_samples_per_second": 457.575,
"eval_steps_per_second": 57.247,
"step": 4250
},
{
"epoch": 9.988249118683902,
"step": 4250,
"total_flos": 1.2649124434987926e+16,
"train_loss": 0.02590363489880281,
"train_runtime": 1203.9865,
"train_samples_per_second": 226.157,
"train_steps_per_second": 3.53
}
],
"logging_steps": 500,
"max_steps": 4250,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.2649124434987926e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}