bert-base-uncased-scsmall / trainer_state.json
ZongqianLi's picture
Upload 130 files
74777fd
raw
history blame
13.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 37.0,
"global_step": 999,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 1.2262943855309169e-05,
"loss": 2.3623,
"step": 27
},
{
"epoch": 1.0,
"eval_accuracy": 0.638755980861244,
"eval_loss": 2.000427007675171,
"eval_runtime": 13.6799,
"eval_samples_per_second": 32.456,
"eval_steps_per_second": 0.146,
"step": 27
},
{
"epoch": 2.0,
"learning_rate": 1.4841962570206113e-05,
"loss": 2.0119,
"step": 54
},
{
"epoch": 2.0,
"eval_accuracy": 0.6597628650271401,
"eval_loss": 1.8219548463821411,
"eval_runtime": 13.3645,
"eval_samples_per_second": 33.222,
"eval_steps_per_second": 0.15,
"step": 54
},
{
"epoch": 3.0,
"learning_rate": 1.6350591807078892e-05,
"loss": 1.8529,
"step": 81
},
{
"epoch": 3.0,
"eval_accuracy": 0.6718626849986864,
"eval_loss": 1.7023260593414307,
"eval_runtime": 13.2825,
"eval_samples_per_second": 33.427,
"eval_steps_per_second": 0.151,
"step": 81
},
{
"epoch": 4.0,
"learning_rate": 1.7420981285103056e-05,
"loss": 1.7397,
"step": 108
},
{
"epoch": 4.0,
"eval_accuracy": 0.6914172365811054,
"eval_loss": 1.5797321796417236,
"eval_runtime": 13.2578,
"eval_samples_per_second": 33.49,
"eval_steps_per_second": 0.151,
"step": 108
},
{
"epoch": 5.0,
"learning_rate": 1.825123986666868e-05,
"loss": 1.6316,
"step": 135
},
{
"epoch": 5.0,
"eval_accuracy": 0.704878691211332,
"eval_loss": 1.4879175424575806,
"eval_runtime": 13.2974,
"eval_samples_per_second": 33.39,
"eval_steps_per_second": 0.15,
"step": 135
},
{
"epoch": 6.0,
"learning_rate": 1.892961052197583e-05,
"loss": 1.5906,
"step": 162
},
{
"epoch": 6.0,
"eval_accuracy": 0.7069794721407625,
"eval_loss": 1.4811629056930542,
"eval_runtime": 13.1968,
"eval_samples_per_second": 33.644,
"eval_steps_per_second": 0.152,
"step": 162
},
{
"epoch": 7.0,
"learning_rate": 1.9503164738653782e-05,
"loss": 1.5414,
"step": 189
},
{
"epoch": 7.0,
"eval_accuracy": 0.7172597434035343,
"eval_loss": 1.424712061882019,
"eval_runtime": 13.0856,
"eval_samples_per_second": 33.931,
"eval_steps_per_second": 0.153,
"step": 189
},
{
"epoch": 8.0,
"learning_rate": 1.9999999999999998e-05,
"loss": 1.4984,
"step": 216
},
{
"epoch": 8.0,
"eval_accuracy": 0.726310423413698,
"eval_loss": 1.3407562971115112,
"eval_runtime": 13.269,
"eval_samples_per_second": 33.462,
"eval_steps_per_second": 0.151,
"step": 216
},
{
"epoch": 9.0,
"learning_rate": 2e-05,
"loss": 1.4759,
"step": 243
},
{
"epoch": 9.0,
"eval_accuracy": 0.714683309437463,
"eval_loss": 1.409363865852356,
"eval_runtime": 13.377,
"eval_samples_per_second": 33.191,
"eval_steps_per_second": 0.15,
"step": 243
},
{
"epoch": 10.0,
"learning_rate": 2e-05,
"loss": 1.4544,
"step": 270
},
{
"epoch": 10.0,
"eval_accuracy": 0.7280686239830209,
"eval_loss": 1.338972806930542,
"eval_runtime": 13.2791,
"eval_samples_per_second": 33.436,
"eval_steps_per_second": 0.151,
"step": 270
},
{
"epoch": 11.0,
"learning_rate": 2e-05,
"loss": 1.4112,
"step": 297
},
{
"epoch": 11.0,
"eval_accuracy": 0.7280365296803653,
"eval_loss": 1.3258930444717407,
"eval_runtime": 13.3211,
"eval_samples_per_second": 33.331,
"eval_steps_per_second": 0.15,
"step": 297
},
{
"epoch": 12.0,
"learning_rate": 2e-05,
"loss": 1.3741,
"step": 324
},
{
"epoch": 12.0,
"eval_accuracy": 0.7261693058966325,
"eval_loss": 1.3296260833740234,
"eval_runtime": 13.335,
"eval_samples_per_second": 33.296,
"eval_steps_per_second": 0.15,
"step": 324
},
{
"epoch": 13.0,
"learning_rate": 2e-05,
"loss": 1.363,
"step": 351
},
{
"epoch": 13.0,
"eval_accuracy": 0.737372488408037,
"eval_loss": 1.2715643644332886,
"eval_runtime": 13.2444,
"eval_samples_per_second": 33.524,
"eval_steps_per_second": 0.151,
"step": 351
},
{
"epoch": 14.0,
"learning_rate": 2e-05,
"loss": 1.3399,
"step": 378
},
{
"epoch": 14.0,
"eval_accuracy": 0.7395378457059679,
"eval_loss": 1.2554606199264526,
"eval_runtime": 13.3293,
"eval_samples_per_second": 33.31,
"eval_steps_per_second": 0.15,
"step": 378
},
{
"epoch": 15.0,
"learning_rate": 2e-05,
"loss": 1.3276,
"step": 405
},
{
"epoch": 15.0,
"eval_accuracy": 0.7437385280359997,
"eval_loss": 1.1987595558166504,
"eval_runtime": 13.2943,
"eval_samples_per_second": 33.398,
"eval_steps_per_second": 0.15,
"step": 405
},
{
"epoch": 16.0,
"learning_rate": 2e-05,
"loss": 1.314,
"step": 432
},
{
"epoch": 16.0,
"eval_accuracy": 0.7433329382481925,
"eval_loss": 1.2292665243148804,
"eval_runtime": 13.3081,
"eval_samples_per_second": 33.363,
"eval_steps_per_second": 0.15,
"step": 432
},
{
"epoch": 17.0,
"learning_rate": 2e-05,
"loss": 1.3116,
"step": 459
},
{
"epoch": 17.0,
"eval_accuracy": 0.7434126298865632,
"eval_loss": 1.2456351518630981,
"eval_runtime": 13.0321,
"eval_samples_per_second": 34.07,
"eval_steps_per_second": 0.153,
"step": 459
},
{
"epoch": 18.0,
"learning_rate": 2e-05,
"loss": 1.2997,
"step": 486
},
{
"epoch": 18.0,
"eval_accuracy": 0.740932796900611,
"eval_loss": 1.2431975603103638,
"eval_runtime": 13.2685,
"eval_samples_per_second": 33.463,
"eval_steps_per_second": 0.151,
"step": 486
},
{
"epoch": 19.0,
"learning_rate": 2e-05,
"loss": 1.2975,
"step": 513
},
{
"epoch": 19.0,
"eval_accuracy": 0.7378796103191155,
"eval_loss": 1.2589606046676636,
"eval_runtime": 13.2957,
"eval_samples_per_second": 33.394,
"eval_steps_per_second": 0.15,
"step": 513
},
{
"epoch": 20.0,
"learning_rate": 2e-05,
"loss": 1.274,
"step": 540
},
{
"epoch": 20.0,
"eval_accuracy": 0.7467216737881244,
"eval_loss": 1.2022136449813843,
"eval_runtime": 13.2148,
"eval_samples_per_second": 33.599,
"eval_steps_per_second": 0.151,
"step": 540
},
{
"epoch": 21.0,
"learning_rate": 2e-05,
"loss": 1.2447,
"step": 567
},
{
"epoch": 21.0,
"eval_accuracy": 0.7521932389753188,
"eval_loss": 1.1791794300079346,
"eval_runtime": 13.3942,
"eval_samples_per_second": 33.149,
"eval_steps_per_second": 0.149,
"step": 567
},
{
"epoch": 22.0,
"learning_rate": 2e-05,
"loss": 1.2338,
"step": 594
},
{
"epoch": 22.0,
"eval_accuracy": 0.749752006492921,
"eval_loss": 1.1663883924484253,
"eval_runtime": 13.2721,
"eval_samples_per_second": 33.454,
"eval_steps_per_second": 0.151,
"step": 594
},
{
"epoch": 23.0,
"learning_rate": 2e-05,
"loss": 1.2129,
"step": 621
},
{
"epoch": 23.0,
"eval_accuracy": 0.757722556143656,
"eval_loss": 1.1311209201812744,
"eval_runtime": 13.3266,
"eval_samples_per_second": 33.317,
"eval_steps_per_second": 0.15,
"step": 621
},
{
"epoch": 24.0,
"learning_rate": 2e-05,
"loss": 1.2233,
"step": 648
},
{
"epoch": 24.0,
"eval_accuracy": 0.7568863020946808,
"eval_loss": 1.140002727508545,
"eval_runtime": 13.2772,
"eval_samples_per_second": 33.441,
"eval_steps_per_second": 0.151,
"step": 648
},
{
"epoch": 25.0,
"learning_rate": 2e-05,
"loss": 1.2072,
"step": 675
},
{
"epoch": 25.0,
"eval_accuracy": 0.7496811963830281,
"eval_loss": 1.1746466159820557,
"eval_runtime": 13.3397,
"eval_samples_per_second": 33.284,
"eval_steps_per_second": 0.15,
"step": 675
},
{
"epoch": 26.0,
"learning_rate": 2e-05,
"loss": 1.2086,
"step": 702
},
{
"epoch": 26.0,
"eval_accuracy": 0.7594424064563463,
"eval_loss": 1.1140285730361938,
"eval_runtime": 13.2221,
"eval_samples_per_second": 33.58,
"eval_steps_per_second": 0.151,
"step": 702
},
{
"epoch": 27.0,
"learning_rate": 2e-05,
"loss": 1.1993,
"step": 729
},
{
"epoch": 27.0,
"eval_accuracy": 0.746650287118247,
"eval_loss": 1.1945430040359497,
"eval_runtime": 13.2631,
"eval_samples_per_second": 33.476,
"eval_steps_per_second": 0.151,
"step": 729
},
{
"epoch": 28.0,
"learning_rate": 2e-05,
"loss": 1.1866,
"step": 756
},
{
"epoch": 28.0,
"eval_accuracy": 0.7611254579304546,
"eval_loss": 1.116799235343933,
"eval_runtime": 13.2563,
"eval_samples_per_second": 33.494,
"eval_steps_per_second": 0.151,
"step": 756
},
{
"epoch": 29.0,
"learning_rate": 2e-05,
"loss": 1.1658,
"step": 783
},
{
"epoch": 29.0,
"eval_accuracy": 0.7609447828200508,
"eval_loss": 1.0980545282363892,
"eval_runtime": 13.2601,
"eval_samples_per_second": 33.484,
"eval_steps_per_second": 0.151,
"step": 783
},
{
"epoch": 30.0,
"learning_rate": 2e-05,
"loss": 1.1682,
"step": 810
},
{
"epoch": 30.0,
"eval_accuracy": 0.7663264704147248,
"eval_loss": 1.1021370887756348,
"eval_runtime": 13.3382,
"eval_samples_per_second": 33.288,
"eval_steps_per_second": 0.15,
"step": 810
},
{
"epoch": 31.0,
"learning_rate": 2e-05,
"loss": 1.1598,
"step": 837
},
{
"epoch": 31.0,
"eval_accuracy": 0.7555162515584679,
"eval_loss": 1.1446928977966309,
"eval_runtime": 13.2762,
"eval_samples_per_second": 33.443,
"eval_steps_per_second": 0.151,
"step": 837
},
{
"epoch": 32.0,
"learning_rate": 2e-05,
"loss": 1.1586,
"step": 864
},
{
"epoch": 32.0,
"eval_accuracy": 0.7645990292129478,
"eval_loss": 1.1078757047653198,
"eval_runtime": 13.2153,
"eval_samples_per_second": 33.597,
"eval_steps_per_second": 0.151,
"step": 864
},
{
"epoch": 33.0,
"learning_rate": 2e-05,
"loss": 1.1457,
"step": 891
},
{
"epoch": 33.0,
"eval_accuracy": 0.7622519126288946,
"eval_loss": 1.114358901977539,
"eval_runtime": 13.3206,
"eval_samples_per_second": 33.332,
"eval_steps_per_second": 0.15,
"step": 891
},
{
"epoch": 34.0,
"learning_rate": 2e-05,
"loss": 1.1456,
"step": 918
},
{
"epoch": 34.0,
"eval_accuracy": 0.7681910865132537,
"eval_loss": 1.0843584537506104,
"eval_runtime": 13.3243,
"eval_samples_per_second": 33.323,
"eval_steps_per_second": 0.15,
"step": 918
},
{
"epoch": 35.0,
"learning_rate": 2e-05,
"loss": 1.1338,
"step": 945
},
{
"epoch": 35.0,
"eval_accuracy": 0.7738229401452542,
"eval_loss": 1.075467586517334,
"eval_runtime": 13.2446,
"eval_samples_per_second": 33.523,
"eval_steps_per_second": 0.151,
"step": 945
},
{
"epoch": 36.0,
"learning_rate": 2e-05,
"loss": 1.1335,
"step": 972
},
{
"epoch": 36.0,
"eval_accuracy": 0.7666764275256223,
"eval_loss": 1.0833895206451416,
"eval_runtime": 13.2849,
"eval_samples_per_second": 33.421,
"eval_steps_per_second": 0.151,
"step": 972
},
{
"epoch": 37.0,
"learning_rate": 2e-05,
"loss": 1.1268,
"step": 999
},
{
"epoch": 37.0,
"eval_accuracy": 0.7761633253632518,
"eval_loss": 1.0331711769104004,
"eval_runtime": 13.3282,
"eval_samples_per_second": 33.313,
"eval_steps_per_second": 0.15,
"step": 999
}
],
"max_steps": 1080,
"num_train_epochs": 40,
"total_flos": 118496962805760.0,
"trial_name": null,
"trial_params": null
}