longformer-spans / checkpoint-810 /trainer_state.json
Theoreticallyhugo's picture
Training in progress, epoch 10, checkpoint
d091d37 verified
{
"best_metric": 0.2252955436706543,
"best_model_checkpoint": "longformer-spans/checkpoint-162",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 810,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_B": {
"f1-score": 0.816,
"precision": 0.7461594732991953,
"recall": 0.9002647837599294,
"support": 1133.0
},
"eval_I": {
"f1-score": 0.9321376763813793,
"precision": 0.9024103768767235,
"recall": 0.9638902525500463,
"support": 18333.0
},
"eval_O": {
"f1-score": 0.8560784528570645,
"precision": 0.931782945736434,
"recall": 0.7917511147142278,
"support": 9868.0
},
"eval_accuracy": 0.9035249198881844,
"eval_loss": 0.2619660496711731,
"eval_macro avg": {
"f1-score": 0.8680720430794812,
"precision": 0.860117598637451,
"recall": 0.8853020503414012,
"support": 29334.0
},
"eval_runtime": 5.5032,
"eval_samples_per_second": 14.537,
"eval_steps_per_second": 1.817,
"eval_weighted avg": {
"f1-score": 0.9020655278480035,
"precision": 0.9062562975065145,
"recall": 0.9035249198881844,
"support": 29334.0
},
"step": 81
},
{
"epoch": 2.0,
"eval_B": {
"f1-score": 0.8512256973795435,
"precision": 0.8167072181670721,
"recall": 0.8887908208296558,
"support": 1133.0
},
"eval_I": {
"f1-score": 0.9416781438711729,
"precision": 0.9152551099212274,
"recall": 0.9696721758577429,
"support": 18333.0
},
"eval_O": {
"f1-score": 0.8778173190984578,
"precision": 0.9380041484212952,
"recall": 0.8248885285772193,
"support": 9868.0
},
"eval_accuracy": 0.9178427763005387,
"eval_loss": 0.2252955436706543,
"eval_macro avg": {
"f1-score": 0.8902403867830581,
"precision": 0.8899888255031981,
"recall": 0.8944505084215394,
"support": 29334.0
},
"eval_runtime": 5.4972,
"eval_samples_per_second": 14.553,
"eval_steps_per_second": 1.819,
"eval_weighted avg": {
"f1-score": 0.9167016237671239,
"precision": 0.9191015935430046,
"recall": 0.9178427763005387,
"support": 29334.0
},
"step": 162
},
{
"epoch": 3.0,
"eval_B": {
"f1-score": 0.8531120331950207,
"precision": 0.8050117462803446,
"recall": 0.9073256840247131,
"support": 1133.0
},
"eval_I": {
"f1-score": 0.9469915571230095,
"precision": 0.9280963603037444,
"recall": 0.9666721213112965,
"support": 18333.0
},
"eval_O": {
"f1-score": 0.8903876792352629,
"precision": 0.9353938852934612,
"recall": 0.8495135792460479,
"support": 9868.0
},
"eval_accuracy": 0.9249676143724006,
"eval_loss": 0.22786012291908264,
"eval_macro avg": {
"f1-score": 0.8968304231844311,
"precision": 0.8895006639591835,
"recall": 0.9078371281940192,
"support": 29334.0
},
"eval_runtime": 5.5036,
"eval_samples_per_second": 14.536,
"eval_steps_per_second": 1.817,
"eval_weighted avg": {
"f1-score": 0.9243239165827936,
"precision": 0.9257972230878861,
"recall": 0.9249676143724006,
"support": 29334.0
},
"step": 243
},
{
"epoch": 4.0,
"eval_B": {
"f1-score": 0.8567807351077312,
"precision": 0.8217179902755267,
"recall": 0.8949691085613416,
"support": 1133.0
},
"eval_I": {
"f1-score": 0.9472597903427299,
"precision": 0.9432635621180161,
"recall": 0.9512900234549719,
"support": 18333.0
},
"eval_O": {
"f1-score": 0.8979927100980543,
"precision": 0.9099989595255437,
"recall": 0.8862991487636805,
"support": 9868.0
},
"eval_accuracy": 0.9272516533715143,
"eval_loss": 0.23897655308246613,
"eval_macro avg": {
"f1-score": 0.9006777451828384,
"precision": 0.8916601706396955,
"recall": 0.910852760259998,
"support": 29334.0
},
"eval_runtime": 5.4904,
"eval_samples_per_second": 14.571,
"eval_steps_per_second": 1.821,
"eval_weighted avg": {
"f1-score": 0.9271915992526735,
"precision": 0.9273787107073643,
"recall": 0.9272516533715143,
"support": 29334.0
},
"step": 324
},
{
"epoch": 5.0,
"eval_B": {
"f1-score": 0.8624407072013798,
"precision": 0.8431703204047217,
"recall": 0.8826125330979699,
"support": 1133.0
},
"eval_I": {
"f1-score": 0.948190701170407,
"precision": 0.9335059992600032,
"recall": 0.9633447880870561,
"support": 18333.0
},
"eval_O": {
"f1-score": 0.8955333298423835,
"precision": 0.9265359193845487,
"recall": 0.8665383056343737,
"support": 9868.0
},
"eval_accuracy": 0.9276607349832958,
"eval_loss": 0.25390708446502686,
"eval_macro avg": {
"f1-score": 0.9020549127380568,
"precision": 0.9010707463497579,
"recall": 0.9041652089397999,
"support": 29334.0
},
"eval_runtime": 5.4919,
"eval_samples_per_second": 14.567,
"eval_steps_per_second": 1.821,
"eval_weighted avg": {
"f1-score": 0.9271646670996412,
"precision": 0.9276721180179627,
"recall": 0.9276607349832958,
"support": 29334.0
},
"step": 405
},
{
"epoch": 6.0,
"eval_B": {
"f1-score": 0.8601036269430052,
"precision": 0.841927303465765,
"recall": 0.8790820829655781,
"support": 1133.0
},
"eval_I": {
"f1-score": 0.9474285714285714,
"precision": 0.9452679589509693,
"recall": 0.9495990836197021,
"support": 18333.0
},
"eval_O": {
"f1-score": 0.8983777165595348,
"precision": 0.9045613314156564,
"recall": 0.8922780705310093,
"support": 9868.0
},
"eval_accuracy": 0.9275925547146656,
"eval_loss": 0.29299432039260864,
"eval_macro avg": {
"f1-score": 0.9019699716437038,
"precision": 0.8972521979441302,
"recall": 0.9069864123720964,
"support": 29334.0
},
"eval_runtime": 5.4971,
"eval_samples_per_second": 14.553,
"eval_steps_per_second": 1.819,
"eval_weighted avg": {
"f1-score": 0.9275549436263691,
"precision": 0.9275827485063247,
"recall": 0.9275925547146656,
"support": 29334.0
},
"step": 486
},
{
"epoch": 6.17,
"grad_norm": 2.853372573852539,
"learning_rate": 1.3827160493827162e-05,
"loss": 0.1621,
"step": 500
},
{
"epoch": 7.0,
"eval_B": {
"f1-score": 0.8665526090675792,
"precision": 0.8406639004149378,
"recall": 0.8940864960282436,
"support": 1133.0
},
"eval_I": {
"f1-score": 0.9492722371967655,
"precision": 0.9382959450098577,
"recall": 0.9605083728795069,
"support": 18333.0
},
"eval_O": {
"f1-score": 0.8984919396775871,
"precision": 0.9227729117709891,
"recall": 0.8754560194568302,
"support": 9868.0
},
"eval_accuracy": 0.9293311515647371,
"eval_loss": 0.3148973286151886,
"eval_macro avg": {
"f1-score": 0.9047722619806439,
"precision": 0.9005775857319281,
"recall": 0.9100169627881934,
"support": 29334.0
},
"eval_runtime": 5.4821,
"eval_samples_per_second": 14.593,
"eval_steps_per_second": 1.824,
"eval_weighted avg": {
"f1-score": 0.9289946986889036,
"precision": 0.9293030221719495,
"recall": 0.9293311515647371,
"support": 29334.0
},
"step": 567
},
{
"epoch": 8.0,
"eval_B": {
"f1-score": 0.8624629707998307,
"precision": 0.8284552845528456,
"recall": 0.8993821712268314,
"support": 1133.0
},
"eval_I": {
"f1-score": 0.9467755410030451,
"precision": 0.9356556940449557,
"recall": 0.9581628756886489,
"support": 18333.0
},
"eval_O": {
"f1-score": 0.8934263985831857,
"precision": 0.9191854233654877,
"recall": 0.8690717470612079,
"support": 9868.0
},
"eval_accuracy": 0.9259221381332242,
"eval_loss": 0.34766319394111633,
"eval_macro avg": {
"f1-score": 0.9008883034620205,
"precision": 0.8944321339877629,
"recall": 0.9088722646588961,
"support": 29334.0
},
"eval_runtime": 5.5202,
"eval_samples_per_second": 14.492,
"eval_steps_per_second": 1.812,
"eval_weighted avg": {
"f1-score": 0.9255723133682386,
"precision": 0.9259745494680297,
"recall": 0.9259221381332242,
"support": 29334.0
},
"step": 648
},
{
"epoch": 9.0,
"eval_B": {
"f1-score": 0.8654012079378774,
"precision": 0.8464135021097047,
"recall": 0.8852603706972639,
"support": 1133.0
},
"eval_I": {
"f1-score": 0.9474813007694164,
"precision": 0.9316216786166175,
"recall": 0.9638902525500463,
"support": 18333.0
},
"eval_O": {
"f1-score": 0.8933802299333298,
"precision": 0.9268053588933667,
"recall": 0.8622821240372922,
"support": 9868.0
},
"eval_accuracy": 0.9266721210881571,
"eval_loss": 0.3807723820209503,
"eval_macro avg": {
"f1-score": 0.9020875795468745,
"precision": 0.9016135132065629,
"recall": 0.9038109157615342,
"support": 29334.0
},
"eval_runtime": 5.532,
"eval_samples_per_second": 14.461,
"eval_steps_per_second": 1.808,
"eval_weighted avg": {
"f1-score": 0.9261113508073029,
"precision": 0.9267103706800465,
"recall": 0.9266721210881571,
"support": 29334.0
},
"step": 729
},
{
"epoch": 10.0,
"eval_B": {
"f1-score": 0.8593548387096774,
"precision": 0.8380872483221476,
"recall": 0.881729920564872,
"support": 1133.0
},
"eval_I": {
"f1-score": 0.9447995351539802,
"precision": 0.9158687080751703,
"recall": 0.9756177385043364,
"support": 18333.0
},
"eval_O": {
"f1-score": 0.8826362209837131,
"precision": 0.9469406710786021,
"recall": 0.8265099310903932,
"support": 9868.0
},
"eval_accuracy": 0.9218313220154087,
"eval_loss": 0.46634167432785034,
"eval_macro avg": {
"f1-score": 0.8955968649491236,
"precision": 0.9002988758253068,
"recall": 0.8946191967198672,
"support": 29334.0
},
"eval_runtime": 5.5106,
"eval_samples_per_second": 14.517,
"eval_steps_per_second": 1.815,
"eval_weighted avg": {
"f1-score": 0.9205874800198834,
"precision": 0.9233171207368492,
"recall": 0.9218313220154087,
"support": 29334.0
},
"step": 810
}
],
"logging_steps": 500,
"max_steps": 1620,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 1437866527356000.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}