longformer-spans / checkpoint-492 /trainer_state.json
Theoreticallyhugo's picture
Training in progress, epoch 12, checkpoint
ac15db7 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 12.0,
"eval_steps": 500,
"global_step": 492,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_B": {
"f1-score": 0.5721362229102167,
"precision": 0.8076923076923077,
"recall": 0.4429530201342282,
"support": 1043.0
},
"eval_I": {
"f1-score": 0.927966217883682,
"precision": 0.8850358282336942,
"recall": 0.9752737752161383,
"support": 17350.0
},
"eval_O": {
"f1-score": 0.864171621779177,
"precision": 0.9349142280524723,
"recall": 0.8033817472360719,
"support": 9226.0
},
"eval_accuracy": 0.8977515478474963,
"eval_loss": 0.29473039507865906,
"eval_macro avg": {
"f1-score": 0.7880913541910252,
"precision": 0.8758807879928246,
"recall": 0.7405361808621462,
"support": 27619.0
},
"eval_runtime": 1.3384,
"eval_samples_per_second": 59.772,
"eval_steps_per_second": 7.472,
"eval_weighted avg": {
"f1-score": 0.8932184128068332,
"precision": 0.8987766886849552,
"recall": 0.8977515478474963,
"support": 27619.0
},
"step": 41
},
{
"epoch": 2.0,
"eval_B": {
"f1-score": 0.8396183552930486,
"precision": 0.7979274611398963,
"recall": 0.8859060402684564,
"support": 1043.0
},
"eval_I": {
"f1-score": 0.9529475085691623,
"precision": 0.9369951534733441,
"recall": 0.9694524495677234,
"support": 17350.0
},
"eval_O": {
"f1-score": 0.9060667568786648,
"precision": 0.9441833137485312,
"recall": 0.8709083026230219,
"support": 9226.0
},
"eval_accuracy": 0.9333791954813715,
"eval_loss": 0.19544227421283722,
"eval_macro avg": {
"f1-score": 0.8995442069136251,
"precision": 0.8930353094539237,
"recall": 0.9087555974864006,
"support": 27619.0
},
"eval_runtime": 1.3427,
"eval_samples_per_second": 59.582,
"eval_steps_per_second": 7.448,
"eval_weighted avg": {
"f1-score": 0.933007462877301,
"precision": 0.9341445927577168,
"recall": 0.9333791954813715,
"support": 27619.0
},
"step": 82
},
{
"epoch": 3.0,
"eval_B": {
"f1-score": 0.8647365923113433,
"precision": 0.856203007518797,
"recall": 0.8734419942473634,
"support": 1043.0
},
"eval_I": {
"f1-score": 0.9557367501456028,
"precision": 0.9658622719246616,
"recall": 0.945821325648415,
"support": 17350.0
},
"eval_O": {
"f1-score": 0.9184183917833005,
"precision": 0.9021432305279665,
"recall": 0.9352915673097767,
"support": 9226.0
},
"eval_accuracy": 0.9395705854665267,
"eval_loss": 0.17375266551971436,
"eval_macro avg": {
"f1-score": 0.9129639114134155,
"precision": 0.9080695033238083,
"recall": 0.9181849624018517,
"support": 27619.0
},
"eval_runtime": 1.344,
"eval_samples_per_second": 59.525,
"eval_steps_per_second": 7.441,
"eval_weighted avg": {
"f1-score": 0.9398342070096555,
"precision": 0.940436062116152,
"recall": 0.9395705854665267,
"support": 27619.0
},
"step": 123
},
{
"epoch": 4.0,
"eval_B": {
"f1-score": 0.862493839329719,
"precision": 0.8874239350912779,
"recall": 0.8389261744966443,
"support": 1043.0
},
"eval_I": {
"f1-score": 0.9584169773444221,
"precision": 0.9537671232876712,
"recall": 0.9631123919308358,
"support": 17350.0
},
"eval_O": {
"f1-score": 0.9205518294345384,
"precision": 0.926259190167892,
"recall": 0.9149143724257534,
"support": 9226.0
},
"eval_accuracy": 0.9423223143488179,
"eval_loss": 0.17503595352172852,
"eval_macro avg": {
"f1-score": 0.9138208820362266,
"precision": 0.9224834161822804,
"recall": 0.9056509796177444,
"support": 27619.0
},
"eval_runtime": 1.3496,
"eval_samples_per_second": 59.277,
"eval_steps_per_second": 7.41,
"eval_weighted avg": {
"f1-score": 0.9421458709478862,
"precision": 0.9420728499160095,
"recall": 0.9423223143488179,
"support": 27619.0
},
"step": 164
},
{
"epoch": 5.0,
"eval_B": {
"f1-score": 0.8739573679332716,
"precision": 0.8457399103139014,
"recall": 0.9041227229146692,
"support": 1043.0
},
"eval_I": {
"f1-score": 0.954658525554048,
"precision": 0.9367580161988239,
"recall": 0.9732564841498559,
"support": 17350.0
},
"eval_O": {
"f1-score": 0.9086082241301401,
"precision": 0.948690728945506,
"recall": 0.8717754172989378,
"support": 9226.0
},
"eval_accuracy": 0.9367464426662805,
"eval_loss": 0.20350226759910583,
"eval_macro avg": {
"f1-score": 0.9124080392058199,
"precision": 0.9103962184860771,
"recall": 0.916384874787821,
"support": 27619.0
},
"eval_runtime": 1.3385,
"eval_samples_per_second": 59.767,
"eval_steps_per_second": 7.471,
"eval_weighted avg": {
"f1-score": 0.9362280469583188,
"precision": 0.9373068891979518,
"recall": 0.9367464426662805,
"support": 27619.0
},
"step": 205
},
{
"epoch": 6.0,
"eval_B": {
"f1-score": 0.8744710860366715,
"precision": 0.8579335793357934,
"recall": 0.8916586768935763,
"support": 1043.0
},
"eval_I": {
"f1-score": 0.9548456588905582,
"precision": 0.9394277427631212,
"recall": 0.970778097982709,
"support": 17350.0
},
"eval_O": {
"f1-score": 0.9091520861372813,
"precision": 0.941900999302812,
"recall": 0.8786039453717754,
"support": 9226.0
},
"eval_accuracy": 0.9369998913791231,
"eval_loss": 0.18961849808692932,
"eval_macro avg": {
"f1-score": 0.9128229436881702,
"precision": 0.9130874404672422,
"recall": 0.9136802400826869,
"support": 27619.0
},
"eval_runtime": 1.3446,
"eval_samples_per_second": 59.497,
"eval_steps_per_second": 7.437,
"eval_weighted avg": {
"f1-score": 0.9365466769683909,
"precision": 0.9371763887090455,
"recall": 0.9369998913791231,
"support": 27619.0
},
"step": 246
},
{
"epoch": 7.0,
"eval_B": {
"f1-score": 0.8734770384254921,
"precision": 0.854262144821265,
"recall": 0.8935762224352828,
"support": 1043.0
},
"eval_I": {
"f1-score": 0.9571367703451216,
"precision": 0.9436012321478577,
"recall": 0.9710662824207493,
"support": 17350.0
},
"eval_O": {
"f1-score": 0.9144644952231968,
"precision": 0.9436181252161882,
"recall": 0.8870583134619553,
"support": 9226.0
},
"eval_accuracy": 0.9400774828922119,
"eval_loss": 0.19739200174808502,
"eval_macro avg": {
"f1-score": 0.9150261013312702,
"precision": 0.9138271673951035,
"recall": 0.9172336061059957,
"support": 27619.0
},
"eval_runtime": 1.3403,
"eval_samples_per_second": 59.69,
"eval_steps_per_second": 7.461,
"eval_weighted avg": {
"f1-score": 0.9397229787282255,
"precision": 0.9402330865729558,
"recall": 0.9400774828922119,
"support": 27619.0
},
"step": 287
},
{
"epoch": 8.0,
"eval_B": {
"f1-score": 0.875,
"precision": 0.851952770208901,
"recall": 0.8993288590604027,
"support": 1043.0
},
"eval_I": {
"f1-score": 0.9546683185043361,
"precision": 0.9332269074094462,
"recall": 0.9771181556195966,
"support": 17350.0
},
"eval_O": {
"f1-score": 0.90670155876664,
"precision": 0.9541427203065134,
"recall": 0.8637546065467158,
"support": 9226.0
},
"eval_accuracy": 0.9363119591585503,
"eval_loss": 0.2391989678144455,
"eval_macro avg": {
"f1-score": 0.9121232924236587,
"precision": 0.9131074659749535,
"recall": 0.913400540408905,
"support": 27619.0
},
"eval_runtime": 1.3469,
"eval_samples_per_second": 59.396,
"eval_steps_per_second": 7.424,
"eval_weighted avg": {
"f1-score": 0.9356366598077863,
"precision": 0.937144513575063,
"recall": 0.9363119591585503,
"support": 27619.0
},
"step": 328
},
{
"epoch": 9.0,
"eval_B": {
"f1-score": 0.8698850574712644,
"precision": 0.8356890459363958,
"recall": 0.9069990412272292,
"support": 1043.0
},
"eval_I": {
"f1-score": 0.9540004482294935,
"precision": 0.9281042189033032,
"recall": 0.9813832853025937,
"support": 17350.0
},
"eval_O": {
"f1-score": 0.9027465883572292,
"precision": 0.9629038201695124,
"recall": 0.8496639930630826,
"support": 9226.0
},
"eval_accuracy": 0.9345740251276295,
"eval_loss": 0.25876709818840027,
"eval_macro avg": {
"f1-score": 0.9088773646859957,
"precision": 0.9088990283364038,
"recall": 0.9126821065309684,
"support": 27619.0
},
"eval_runtime": 1.3397,
"eval_samples_per_second": 59.713,
"eval_steps_per_second": 7.464,
"eval_weighted avg": {
"f1-score": 0.9337028102359982,
"precision": 0.9362389122621345,
"recall": 0.9345740251276295,
"support": 27619.0
},
"step": 369
},
{
"epoch": 10.0,
"eval_B": {
"f1-score": 0.8675496688741721,
"precision": 0.8562091503267973,
"recall": 0.8791946308724832,
"support": 1043.0
},
"eval_I": {
"f1-score": 0.9541242937853106,
"precision": 0.9356232686980609,
"recall": 0.973371757925072,
"support": 17350.0
},
"eval_O": {
"f1-score": 0.9069058903182126,
"precision": 0.9457519416333255,
"recall": 0.8711250812920008,
"support": 9226.0
},
"eval_accuracy": 0.935660233896955,
"eval_loss": 0.2737439274787903,
"eval_macro avg": {
"f1-score": 0.9095266176592318,
"precision": 0.9125281202193946,
"recall": 0.9078971566965187,
"support": 27619.0
},
"eval_runtime": 1.3436,
"eval_samples_per_second": 59.54,
"eval_steps_per_second": 7.443,
"eval_weighted avg": {
"f1-score": 0.9350818112852286,
"precision": 0.9360077218295835,
"recall": 0.935660233896955,
"support": 27619.0
},
"step": 410
},
{
"epoch": 11.0,
"eval_B": {
"f1-score": 0.8654028436018957,
"precision": 0.8556701030927835,
"recall": 0.87535953978907,
"support": 1043.0
},
"eval_I": {
"f1-score": 0.9553462854557281,
"precision": 0.9378157792460163,
"recall": 0.9735446685878962,
"support": 17350.0
},
"eval_O": {
"f1-score": 0.9094388473011763,
"precision": 0.9459079733052336,
"recall": 0.8756774333405593,
"support": 9226.0
},
"eval_accuracy": 0.9371447192150332,
"eval_loss": 0.27220794558525085,
"eval_macro avg": {
"f1-score": 0.9100626587862667,
"precision": 0.9131312852146779,
"recall": 0.9081938805725085,
"support": 27619.0
},
"eval_runtime": 1.3435,
"eval_samples_per_second": 59.547,
"eval_steps_per_second": 7.443,
"eval_weighted avg": {
"f1-score": 0.9366145053671137,
"precision": 0.937416801808836,
"recall": 0.9371447192150332,
"support": 27619.0
},
"step": 451
},
{
"epoch": 12.0,
"eval_B": {
"f1-score": 0.8710900473933649,
"precision": 0.8612933458294283,
"recall": 0.8811121764141898,
"support": 1043.0
},
"eval_I": {
"f1-score": 0.9563688940549427,
"precision": 0.9410812921943871,
"recall": 0.9721613832853025,
"support": 17350.0
},
"eval_O": {
"f1-score": 0.9124614953794455,
"precision": 0.9440259589755475,
"recall": 0.8829395187513549,
"support": 9226.0
},
"eval_accuracy": 0.9389188602049314,
"eval_loss": 0.27493152022361755,
"eval_macro avg": {
"f1-score": 0.9133068122759177,
"precision": 0.9154668656664544,
"recall": 0.9120710261502823,
"support": 27619.0
},
"eval_runtime": 1.3368,
"eval_samples_per_second": 59.844,
"eval_steps_per_second": 7.48,
"eval_weighted avg": {
"f1-score": 0.938481371072642,
"precision": 0.9390518439038746,
"recall": 0.9389188602049314,
"support": 27619.0
},
"step": 492
}
],
"logging_steps": 500,
"max_steps": 656,
"num_input_tokens_seen": 0,
"num_train_epochs": 16,
"save_steps": 500,
"total_flos": 1725439832827200.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}