bde-pos-bert-cased-base / trainer_state.json
batterydata's picture
pos
000ab3d
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 18.0,
"global_step": 14688,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.61,
"learning_rate": 1.9387254901960785e-05,
"loss": 0.4753,
"step": 500
},
{
"epoch": 1.0,
"eval_accuracy": 0.9720334966029388,
"eval_f1": 0.9606774786290644,
"eval_loss": 0.10676991939544678,
"eval_precision": 0.9602632083306715,
"eval_recall": 0.9610921065251447,
"eval_runtime": 4.3302,
"eval_samples_per_second": 335.092,
"eval_steps_per_second": 21.015,
"step": 816
},
{
"epoch": 1.23,
"learning_rate": 1.877450980392157e-05,
"loss": 0.0981,
"step": 1000
},
{
"epoch": 1.84,
"learning_rate": 1.8161764705882355e-05,
"loss": 0.0753,
"step": 1500
},
{
"epoch": 2.0,
"eval_accuracy": 0.9730605150892716,
"eval_f1": 0.9629144363665415,
"eval_loss": 0.10854744911193848,
"eval_precision": 0.9624992014310356,
"eval_recall": 0.9633300297324083,
"eval_runtime": 4.0978,
"eval_samples_per_second": 354.094,
"eval_steps_per_second": 22.207,
"step": 1632
},
{
"epoch": 2.45,
"learning_rate": 1.7549019607843138e-05,
"loss": 0.0535,
"step": 2000
},
{
"epoch": 3.0,
"eval_accuracy": 0.9732580186443356,
"eval_f1": 0.9634966531942426,
"eval_loss": 0.10905417799949646,
"eval_precision": 0.9628967367009388,
"eval_recall": 0.9640973176891844,
"eval_runtime": 4.1017,
"eval_samples_per_second": 353.751,
"eval_steps_per_second": 22.186,
"step": 2448
},
{
"epoch": 3.06,
"learning_rate": 1.693627450980392e-05,
"loss": 0.046,
"step": 2500
},
{
"epoch": 3.68,
"learning_rate": 1.6323529411764708e-05,
"loss": 0.0306,
"step": 3000
},
{
"epoch": 4.0,
"eval_accuracy": 0.9720927476694581,
"eval_f1": 0.9631510853418946,
"eval_loss": 0.1200980469584465,
"eval_precision": 0.9623978549540347,
"eval_recall": 0.9639054956999904,
"eval_runtime": 4.0941,
"eval_samples_per_second": 354.409,
"eval_steps_per_second": 22.227,
"step": 3264
},
{
"epoch": 4.29,
"learning_rate": 1.571078431372549e-05,
"loss": 0.0271,
"step": 3500
},
{
"epoch": 4.9,
"learning_rate": 1.5098039215686276e-05,
"loss": 0.0222,
"step": 4000
},
{
"epoch": 5.0,
"eval_accuracy": 0.9717372412703429,
"eval_f1": 0.9625902498242924,
"eval_loss": 0.1297336369752884,
"eval_precision": 0.9618834796488428,
"eval_recall": 0.963298059400876,
"eval_runtime": 3.1221,
"eval_samples_per_second": 464.754,
"eval_steps_per_second": 29.147,
"step": 4080
},
{
"epoch": 5.51,
"learning_rate": 1.448529411764706e-05,
"loss": 0.0165,
"step": 4500
},
{
"epoch": 6.0,
"eval_accuracy": 0.9724285037130669,
"eval_f1": 0.9634232047022744,
"eval_loss": 0.1293230503797531,
"eval_precision": 0.9626544096523988,
"eval_recall": 0.9641932286837814,
"eval_runtime": 4.1611,
"eval_samples_per_second": 348.706,
"eval_steps_per_second": 21.869,
"step": 4896
},
{
"epoch": 6.13,
"learning_rate": 1.3872549019607844e-05,
"loss": 0.0157,
"step": 5000
},
{
"epoch": 6.74,
"learning_rate": 1.3259803921568627e-05,
"loss": 0.0106,
"step": 5500
},
{
"epoch": 7.0,
"eval_accuracy": 0.9736727761099699,
"eval_f1": 0.9648310228071295,
"eval_loss": 0.14171898365020752,
"eval_precision": 0.9639996170172023,
"eval_recall": 0.965663863934269,
"eval_runtime": 3.07,
"eval_samples_per_second": 472.638,
"eval_steps_per_second": 29.642,
"step": 5712
},
{
"epoch": 7.35,
"learning_rate": 1.2647058823529412e-05,
"loss": 0.0099,
"step": 6000
},
{
"epoch": 7.97,
"learning_rate": 1.2034313725490197e-05,
"loss": 0.0084,
"step": 6500
},
{
"epoch": 8.0,
"eval_accuracy": 0.9737912782430084,
"eval_f1": 0.9657231348883248,
"eval_loss": 0.15885183215141296,
"eval_precision": 0.9645066649658779,
"eval_recall": 0.9669426771955625,
"eval_runtime": 3.0514,
"eval_samples_per_second": 475.516,
"eval_steps_per_second": 29.822,
"step": 6528
},
{
"epoch": 8.58,
"learning_rate": 1.142156862745098e-05,
"loss": 0.007,
"step": 7000
},
{
"epoch": 9.0,
"eval_accuracy": 0.9737320271764892,
"eval_f1": 0.9660194174757282,
"eval_loss": 0.16605724394321442,
"eval_precision": 0.9650023927261127,
"eval_recall": 0.9670385881901595,
"eval_runtime": 4.1246,
"eval_samples_per_second": 351.79,
"eval_steps_per_second": 22.063,
"step": 7344
},
{
"epoch": 9.19,
"learning_rate": 1.0808823529411765e-05,
"loss": 0.0066,
"step": 7500
},
{
"epoch": 9.8,
"learning_rate": 1.0196078431372549e-05,
"loss": 0.0051,
"step": 8000
},
{
"epoch": 10.0,
"eval_accuracy": 0.9731592668668037,
"eval_f1": 0.964738573573094,
"eval_loss": 0.17197225987911224,
"eval_precision": 0.9638150547241456,
"eval_recall": 0.965663863934269,
"eval_runtime": 3.0554,
"eval_samples_per_second": 474.895,
"eval_steps_per_second": 29.783,
"step": 8160
},
{
"epoch": 10.42,
"learning_rate": 9.583333333333335e-06,
"loss": 0.0046,
"step": 8500
},
{
"epoch": 11.0,
"eval_accuracy": 0.9732382682888292,
"eval_f1": 0.9645942795087595,
"eval_loss": 0.1706034392118454,
"eval_precision": 0.9636862594932669,
"eval_recall": 0.9655040122766073,
"eval_runtime": 4.2231,
"eval_samples_per_second": 343.586,
"eval_steps_per_second": 21.548,
"step": 8976
},
{
"epoch": 11.03,
"learning_rate": 8.970588235294119e-06,
"loss": 0.0051,
"step": 9000
},
{
"epoch": 11.64,
"learning_rate": 8.357843137254903e-06,
"loss": 0.0032,
"step": 9500
},
{
"epoch": 12.0,
"eval_accuracy": 0.973475272554906,
"eval_f1": 0.9657750934713834,
"eval_loss": 0.1721695363521576,
"eval_precision": 0.9653432139777046,
"eval_recall": 0.9662073595703188,
"eval_runtime": 4.1749,
"eval_samples_per_second": 347.551,
"eval_steps_per_second": 21.797,
"step": 9792
},
{
"epoch": 12.25,
"learning_rate": 7.745098039215687e-06,
"loss": 0.0037,
"step": 10000
},
{
"epoch": 12.87,
"learning_rate": 7.132352941176472e-06,
"loss": 0.0031,
"step": 10500
},
{
"epoch": 13.0,
"eval_accuracy": 0.9734357718438932,
"eval_f1": 0.9649974450689831,
"eval_loss": 0.18985576927661896,
"eval_precision": 0.9639814962513957,
"eval_recall": 0.9660155375811247,
"eval_runtime": 3.0596,
"eval_samples_per_second": 474.243,
"eval_steps_per_second": 29.742,
"step": 10608
},
{
"epoch": 13.48,
"learning_rate": 6.519607843137256e-06,
"loss": 0.0028,
"step": 11000
},
{
"epoch": 14.0,
"eval_accuracy": 0.9733962711328804,
"eval_f1": 0.9652715654952077,
"eval_loss": 0.1955101639032364,
"eval_precision": 0.9646243734235816,
"eval_recall": 0.9659196265865277,
"eval_runtime": 3.05,
"eval_samples_per_second": 475.739,
"eval_steps_per_second": 29.836,
"step": 11424
},
{
"epoch": 14.09,
"learning_rate": 5.90686274509804e-06,
"loss": 0.0028,
"step": 11500
},
{
"epoch": 14.71,
"learning_rate": 5.294117647058824e-06,
"loss": 0.0026,
"step": 12000
},
{
"epoch": 15.0,
"eval_accuracy": 0.9738110285985148,
"eval_f1": 0.9658246656760773,
"eval_loss": 0.19060379266738892,
"eval_precision": 0.9653465346534653,
"eval_recall": 0.9663032705649157,
"eval_runtime": 4.1019,
"eval_samples_per_second": 353.738,
"eval_steps_per_second": 22.185,
"step": 12240
},
{
"epoch": 15.32,
"learning_rate": 4.681372549019608e-06,
"loss": 0.0019,
"step": 12500
},
{
"epoch": 15.93,
"learning_rate": 4.068627450980392e-06,
"loss": 0.0021,
"step": 13000
},
{
"epoch": 16.0,
"eval_accuracy": 0.973969031442566,
"eval_f1": 0.9657132819112408,
"eval_loss": 0.2030537873506546,
"eval_precision": 0.964773452456924,
"eval_recall": 0.9666549442117714,
"eval_runtime": 4.0654,
"eval_samples_per_second": 356.916,
"eval_steps_per_second": 22.384,
"step": 13056
},
{
"epoch": 16.54,
"learning_rate": 3.4558823529411766e-06,
"loss": 0.0017,
"step": 13500
},
{
"epoch": 17.0,
"eval_accuracy": 0.9744035392637067,
"eval_f1": 0.9668322128359497,
"eval_loss": 0.1976209431886673,
"eval_precision": 0.9658605066683683,
"eval_recall": 0.9678058761469357,
"eval_runtime": 4.0709,
"eval_samples_per_second": 356.434,
"eval_steps_per_second": 22.354,
"step": 13872
},
{
"epoch": 17.16,
"learning_rate": 2.843137254901961e-06,
"loss": 0.0015,
"step": 14000
},
{
"epoch": 17.77,
"learning_rate": 2.2303921568627456e-06,
"loss": 0.0015,
"step": 14500
},
{
"epoch": 18.0,
"eval_accuracy": 0.9746997945963027,
"eval_f1": 0.9669350201265093,
"eval_loss": 0.19767752289772034,
"eval_precision": 0.9662250598563448,
"eval_recall": 0.967646024489274,
"eval_runtime": 4.1567,
"eval_samples_per_second": 349.074,
"eval_steps_per_second": 21.892,
"step": 14688
}
],
"max_steps": 16320,
"num_train_epochs": 20,
"total_flos": 6.14323669958656e+16,
"trial_name": null,
"trial_params": null
}