bert-large-cased-scmedium / trainer_state.json
ZongqianLi's picture
Upload 130 files
501dc72
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 30.0,
"global_step": 3180,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 1.3832186408159307e-05,
"loss": 2.8049,
"step": 106
},
{
"epoch": 1.0,
"eval_accuracy": 0.5764944934955999,
"eval_loss": 2.5287580490112305,
"eval_runtime": 22.0895,
"eval_samples_per_second": 80.128,
"eval_steps_per_second": 0.272,
"step": 106
},
{
"epoch": 2.0,
"learning_rate": 1.5888124272106204e-05,
"loss": 2.4814,
"step": 212
},
{
"epoch": 2.0,
"eval_accuracy": 0.5855989573905803,
"eval_loss": 2.429943561553955,
"eval_runtime": 20.8189,
"eval_samples_per_second": 85.019,
"eval_steps_per_second": 0.288,
"step": 212
},
{
"epoch": 3.0,
"learning_rate": 1.7090770826327895e-05,
"loss": 2.3778,
"step": 318
},
{
"epoch": 3.0,
"eval_accuracy": 0.5945957840073934,
"eval_loss": 2.348792314529419,
"eval_runtime": 20.8031,
"eval_samples_per_second": 85.084,
"eval_steps_per_second": 0.288,
"step": 318
},
{
"epoch": 4.0,
"learning_rate": 1.7944062136053104e-05,
"loss": 2.307,
"step": 424
},
{
"epoch": 4.0,
"eval_accuracy": 0.6146158043850404,
"eval_loss": 2.1809699535369873,
"eval_runtime": 20.6813,
"eval_samples_per_second": 85.585,
"eval_steps_per_second": 0.29,
"step": 424
},
{
"epoch": 5.0,
"learning_rate": 1.860592629580032e-05,
"loss": 2.0807,
"step": 530
},
{
"epoch": 5.0,
"eval_accuracy": 0.7058374199856394,
"eval_loss": 1.4772239923477173,
"eval_runtime": 20.7293,
"eval_samples_per_second": 85.386,
"eval_steps_per_second": 0.289,
"step": 530
},
{
"epoch": 6.0,
"learning_rate": 1.9146708690274792e-05,
"loss": 1.6063,
"step": 636
},
{
"epoch": 6.0,
"eval_accuracy": 0.7498181077384244,
"eval_loss": 1.183250069618225,
"eval_runtime": 20.8379,
"eval_samples_per_second": 84.941,
"eval_steps_per_second": 0.288,
"step": 636
},
{
"epoch": 7.0,
"learning_rate": 1.9603933689955228e-05,
"loss": 1.3122,
"step": 742
},
{
"epoch": 7.0,
"eval_accuracy": 0.7694247060500784,
"eval_loss": 1.0655418634414673,
"eval_runtime": 20.7257,
"eval_samples_per_second": 85.401,
"eval_steps_per_second": 0.289,
"step": 742
},
{
"epoch": 8.0,
"learning_rate": 2e-05,
"loss": 1.1911,
"step": 848
},
{
"epoch": 8.0,
"eval_accuracy": 0.7711616016924874,
"eval_loss": 1.059058427810669,
"eval_runtime": 20.8383,
"eval_samples_per_second": 84.94,
"eval_steps_per_second": 0.288,
"step": 848
},
{
"epoch": 9.0,
"learning_rate": 2e-05,
"loss": 1.1273,
"step": 954
},
{
"epoch": 9.0,
"eval_accuracy": 0.7756633909375628,
"eval_loss": 1.020793080329895,
"eval_runtime": 20.7787,
"eval_samples_per_second": 85.183,
"eval_steps_per_second": 0.289,
"step": 954
},
{
"epoch": 10.0,
"learning_rate": 2e-05,
"loss": 1.0954,
"step": 1060
},
{
"epoch": 10.0,
"eval_accuracy": 0.7866386428464195,
"eval_loss": 0.9602928161621094,
"eval_runtime": 20.6664,
"eval_samples_per_second": 85.646,
"eval_steps_per_second": 0.29,
"step": 1060
},
{
"epoch": 11.0,
"learning_rate": 2e-05,
"loss": 1.0565,
"step": 1166
},
{
"epoch": 11.0,
"eval_accuracy": 0.7900944085581504,
"eval_loss": 0.9395522475242615,
"eval_runtime": 20.7654,
"eval_samples_per_second": 85.238,
"eval_steps_per_second": 0.289,
"step": 1166
},
{
"epoch": 12.0,
"learning_rate": 2e-05,
"loss": 1.0351,
"step": 1272
},
{
"epoch": 12.0,
"eval_accuracy": 0.7892073832790445,
"eval_loss": 0.9436615705490112,
"eval_runtime": 20.1273,
"eval_samples_per_second": 87.94,
"eval_steps_per_second": 0.298,
"step": 1272
},
{
"epoch": 13.0,
"learning_rate": 2e-05,
"loss": 1.0136,
"step": 1378
},
{
"epoch": 13.0,
"eval_accuracy": 0.7930297660705511,
"eval_loss": 0.9114692211151123,
"eval_runtime": 20.7577,
"eval_samples_per_second": 85.269,
"eval_steps_per_second": 0.289,
"step": 1378
},
{
"epoch": 14.0,
"learning_rate": 2e-05,
"loss": 0.996,
"step": 1484
},
{
"epoch": 14.0,
"eval_accuracy": 0.7986084994358782,
"eval_loss": 0.9008192420005798,
"eval_runtime": 20.8068,
"eval_samples_per_second": 85.068,
"eval_steps_per_second": 0.288,
"step": 1484
},
{
"epoch": 15.0,
"learning_rate": 2e-05,
"loss": 0.985,
"step": 1590
},
{
"epoch": 15.0,
"eval_accuracy": 0.8006165209970277,
"eval_loss": 0.8874076008796692,
"eval_runtime": 20.7048,
"eval_samples_per_second": 85.487,
"eval_steps_per_second": 0.29,
"step": 1590
},
{
"epoch": 16.0,
"learning_rate": 2e-05,
"loss": 0.9654,
"step": 1696
},
{
"epoch": 16.0,
"eval_accuracy": 0.8016079609686441,
"eval_loss": 0.8684276938438416,
"eval_runtime": 20.7798,
"eval_samples_per_second": 85.179,
"eval_steps_per_second": 0.289,
"step": 1696
},
{
"epoch": 17.0,
"learning_rate": 2e-05,
"loss": 0.9529,
"step": 1802
},
{
"epoch": 17.0,
"eval_accuracy": 0.7997229383352594,
"eval_loss": 0.8870733976364136,
"eval_runtime": 20.7541,
"eval_samples_per_second": 85.284,
"eval_steps_per_second": 0.289,
"step": 1802
},
{
"epoch": 18.0,
"learning_rate": 2e-05,
"loss": 0.9442,
"step": 1908
},
{
"epoch": 18.0,
"eval_accuracy": 0.8027364602986559,
"eval_loss": 0.8661928772926331,
"eval_runtime": 20.7884,
"eval_samples_per_second": 85.144,
"eval_steps_per_second": 0.289,
"step": 1908
},
{
"epoch": 19.0,
"learning_rate": 2e-05,
"loss": 0.9327,
"step": 2014
},
{
"epoch": 19.0,
"eval_accuracy": 0.8069067033935976,
"eval_loss": 0.8454752564430237,
"eval_runtime": 20.7342,
"eval_samples_per_second": 85.366,
"eval_steps_per_second": 0.289,
"step": 2014
},
{
"epoch": 20.0,
"learning_rate": 2e-05,
"loss": 0.9204,
"step": 2120
},
{
"epoch": 20.0,
"eval_accuracy": 0.8092386081748497,
"eval_loss": 0.8449112772941589,
"eval_runtime": 20.6952,
"eval_samples_per_second": 85.527,
"eval_steps_per_second": 0.29,
"step": 2120
},
{
"epoch": 21.0,
"learning_rate": 2e-05,
"loss": 0.9097,
"step": 2226
},
{
"epoch": 21.0,
"eval_accuracy": 0.809981731823164,
"eval_loss": 0.8322621583938599,
"eval_runtime": 20.7947,
"eval_samples_per_second": 85.118,
"eval_steps_per_second": 0.289,
"step": 2226
},
{
"epoch": 22.0,
"learning_rate": 2e-05,
"loss": 0.9024,
"step": 2332
},
{
"epoch": 22.0,
"eval_accuracy": 0.8133925596890617,
"eval_loss": 0.8194364905357361,
"eval_runtime": 20.8718,
"eval_samples_per_second": 84.803,
"eval_steps_per_second": 0.287,
"step": 2332
},
{
"epoch": 23.0,
"learning_rate": 2e-05,
"loss": 0.895,
"step": 2438
},
{
"epoch": 23.0,
"eval_accuracy": 0.8129690134121053,
"eval_loss": 0.8122667670249939,
"eval_runtime": 20.8197,
"eval_samples_per_second": 85.016,
"eval_steps_per_second": 0.288,
"step": 2438
},
{
"epoch": 24.0,
"learning_rate": 2e-05,
"loss": 0.891,
"step": 2544
},
{
"epoch": 24.0,
"eval_accuracy": 0.8167250838730983,
"eval_loss": 0.7972639799118042,
"eval_runtime": 20.743,
"eval_samples_per_second": 85.33,
"eval_steps_per_second": 0.289,
"step": 2544
},
{
"epoch": 25.0,
"learning_rate": 2e-05,
"loss": 0.8784,
"step": 2650
},
{
"epoch": 25.0,
"eval_accuracy": 0.8133820647746538,
"eval_loss": 0.8083846569061279,
"eval_runtime": 20.8823,
"eval_samples_per_second": 84.761,
"eval_steps_per_second": 0.287,
"step": 2650
},
{
"epoch": 26.0,
"learning_rate": 2e-05,
"loss": 0.8771,
"step": 2756
},
{
"epoch": 26.0,
"eval_accuracy": 0.8162191337201458,
"eval_loss": 0.8039098381996155,
"eval_runtime": 20.8754,
"eval_samples_per_second": 84.789,
"eval_steps_per_second": 0.287,
"step": 2756
},
{
"epoch": 27.0,
"learning_rate": 2e-05,
"loss": 0.8713,
"step": 2862
},
{
"epoch": 27.0,
"eval_accuracy": 0.8169723253335294,
"eval_loss": 0.7862613797187805,
"eval_runtime": 20.8372,
"eval_samples_per_second": 84.944,
"eval_steps_per_second": 0.288,
"step": 2862
},
{
"epoch": 28.0,
"learning_rate": 2e-05,
"loss": 0.8707,
"step": 2968
},
{
"epoch": 28.0,
"eval_accuracy": 0.8173417825447652,
"eval_loss": 0.7960302829742432,
"eval_runtime": 20.7882,
"eval_samples_per_second": 85.145,
"eval_steps_per_second": 0.289,
"step": 2968
},
{
"epoch": 29.0,
"learning_rate": 2e-05,
"loss": 0.8571,
"step": 3074
},
{
"epoch": 29.0,
"eval_accuracy": 0.8181885887496183,
"eval_loss": 0.7875123023986816,
"eval_runtime": 20.7114,
"eval_samples_per_second": 85.46,
"eval_steps_per_second": 0.29,
"step": 3074
},
{
"epoch": 30.0,
"learning_rate": 2e-05,
"loss": 0.8593,
"step": 3180
},
{
"epoch": 30.0,
"eval_accuracy": 0.8199450484519026,
"eval_loss": 0.7765340805053711,
"eval_runtime": 20.9094,
"eval_samples_per_second": 84.651,
"eval_steps_per_second": 0.287,
"step": 3180
}
],
"max_steps": 4240,
"num_train_epochs": 40,
"total_flos": 1006048479543296.0,
"trial_name": null,
"trial_params": null
}