bert-base-uncased-sclarge / trainer_state.json
ZongqianLi's picture
Upload 130 files
8766291
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 35.0,
"global_step": 33600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 1.535132032339264e-05,
"loss": 1.7652,
"step": 960
},
{
"epoch": 1.0,
"eval_accuracy": 0.66545764302116,
"eval_loss": 1.7168171405792236,
"eval_runtime": 51.9578,
"eval_samples_per_second": 146.35,
"eval_steps_per_second": 0.924,
"step": 960
},
{
"epoch": 2.0,
"learning_rate": 1.6900880215595094e-05,
"loss": 1.5201,
"step": 1920
},
{
"epoch": 2.0,
"eval_accuracy": 0.6816024828889072,
"eval_loss": 1.6037945747375488,
"eval_runtime": 51.6258,
"eval_samples_per_second": 147.291,
"eval_steps_per_second": 0.93,
"step": 1920
},
{
"epoch": 3.0,
"learning_rate": 1.7807314645155048e-05,
"loss": 1.4366,
"step": 2880
},
{
"epoch": 3.0,
"eval_accuracy": 0.6859236059235067,
"eval_loss": 1.5763635635375977,
"eval_runtime": 52.5239,
"eval_samples_per_second": 144.772,
"eval_steps_per_second": 0.914,
"step": 2880
},
{
"epoch": 4.0,
"learning_rate": 1.8450440107797548e-05,
"loss": 1.3831,
"step": 3840
},
{
"epoch": 4.0,
"eval_accuracy": 0.6914059105006866,
"eval_loss": 1.5295616388320923,
"eval_runtime": 51.6883,
"eval_samples_per_second": 147.113,
"eval_steps_per_second": 0.929,
"step": 3840
},
{
"epoch": 5.0,
"learning_rate": 1.894928697180815e-05,
"loss": 1.3447,
"step": 4800
},
{
"epoch": 5.0,
"eval_accuracy": 0.6934798357287497,
"eval_loss": 1.5127382278442383,
"eval_runtime": 51.7137,
"eval_samples_per_second": 147.04,
"eval_steps_per_second": 0.928,
"step": 4800
},
{
"epoch": 6.0,
"learning_rate": 1.93568745373575e-05,
"loss": 1.314,
"step": 5760
},
{
"epoch": 6.0,
"eval_accuracy": 0.6982260680110423,
"eval_loss": 1.4813944101333618,
"eval_runtime": 51.6818,
"eval_samples_per_second": 147.131,
"eval_steps_per_second": 0.929,
"step": 5760
},
{
"epoch": 7.0,
"learning_rate": 1.9701484913790247e-05,
"loss": 1.29,
"step": 6720
},
{
"epoch": 7.0,
"eval_accuracy": 0.6991250528199647,
"eval_loss": 1.4718950986862183,
"eval_runtime": 52.9344,
"eval_samples_per_second": 143.649,
"eval_steps_per_second": 0.907,
"step": 6720
},
{
"epoch": 8.0,
"learning_rate": 2e-05,
"loss": 1.2669,
"step": 7680
},
{
"epoch": 8.0,
"eval_accuracy": 0.7032825019384854,
"eval_loss": 1.449450135231018,
"eval_runtime": 50.9613,
"eval_samples_per_second": 149.211,
"eval_steps_per_second": 0.942,
"step": 7680
},
{
"epoch": 9.0,
"learning_rate": 2e-05,
"loss": 1.2461,
"step": 8640
},
{
"epoch": 9.0,
"eval_accuracy": 0.7032601118345916,
"eval_loss": 1.4466781616210938,
"eval_runtime": 50.8922,
"eval_samples_per_second": 149.414,
"eval_steps_per_second": 0.943,
"step": 8640
},
{
"epoch": 10.0,
"learning_rate": 2e-05,
"loss": 1.2323,
"step": 9600
},
{
"epoch": 10.0,
"eval_accuracy": 0.7045017515092793,
"eval_loss": 1.4342981576919556,
"eval_runtime": 51.2867,
"eval_samples_per_second": 148.264,
"eval_steps_per_second": 0.936,
"step": 9600
},
{
"epoch": 11.0,
"learning_rate": 2e-05,
"loss": 1.2191,
"step": 10560
},
{
"epoch": 11.0,
"eval_accuracy": 0.7057310891893632,
"eval_loss": 1.4175918102264404,
"eval_runtime": 51.4742,
"eval_samples_per_second": 147.725,
"eval_steps_per_second": 0.933,
"step": 10560
},
{
"epoch": 12.0,
"learning_rate": 2e-05,
"loss": 1.2046,
"step": 11520
},
{
"epoch": 12.0,
"eval_accuracy": 0.7111463874637353,
"eval_loss": 1.3918827772140503,
"eval_runtime": 51.2424,
"eval_samples_per_second": 148.393,
"eval_steps_per_second": 0.937,
"step": 11520
},
{
"epoch": 13.0,
"learning_rate": 2e-05,
"loss": 1.1944,
"step": 12480
},
{
"epoch": 13.0,
"eval_accuracy": 0.7096209003462108,
"eval_loss": 1.397229790687561,
"eval_runtime": 51.3927,
"eval_samples_per_second": 147.959,
"eval_steps_per_second": 0.934,
"step": 12480
},
{
"epoch": 14.0,
"learning_rate": 2e-05,
"loss": 1.1821,
"step": 13440
},
{
"epoch": 14.0,
"eval_accuracy": 0.7115298351243047,
"eval_loss": 1.386526107788086,
"eval_runtime": 51.5062,
"eval_samples_per_second": 147.633,
"eval_steps_per_second": 0.932,
"step": 13440
},
{
"epoch": 15.0,
"learning_rate": 2e-05,
"loss": 1.1716,
"step": 14400
},
{
"epoch": 15.0,
"eval_accuracy": 0.7124512834797834,
"eval_loss": 1.383684515953064,
"eval_runtime": 51.5334,
"eval_samples_per_second": 147.555,
"eval_steps_per_second": 0.931,
"step": 14400
},
{
"epoch": 16.0,
"learning_rate": 2e-05,
"loss": 1.1627,
"step": 15360
},
{
"epoch": 16.0,
"eval_accuracy": 0.7130167108614363,
"eval_loss": 1.3739854097366333,
"eval_runtime": 51.1674,
"eval_samples_per_second": 148.61,
"eval_steps_per_second": 0.938,
"step": 15360
},
{
"epoch": 17.0,
"learning_rate": 2e-05,
"loss": 1.1535,
"step": 16320
},
{
"epoch": 17.0,
"eval_accuracy": 0.7154258294693137,
"eval_loss": 1.3581925630569458,
"eval_runtime": 50.9905,
"eval_samples_per_second": 149.126,
"eval_steps_per_second": 0.941,
"step": 16320
},
{
"epoch": 18.0,
"learning_rate": 2e-05,
"loss": 1.1459,
"step": 17280
},
{
"epoch": 18.0,
"eval_accuracy": 0.7157034442498541,
"eval_loss": 1.3612279891967773,
"eval_runtime": 51.0142,
"eval_samples_per_second": 149.057,
"eval_steps_per_second": 0.941,
"step": 17280
},
{
"epoch": 19.0,
"learning_rate": 2e-05,
"loss": 1.1381,
"step": 18240
},
{
"epoch": 19.0,
"eval_accuracy": 0.7173944953215458,
"eval_loss": 1.3571031093597412,
"eval_runtime": 51.233,
"eval_samples_per_second": 148.42,
"eval_steps_per_second": 0.937,
"step": 18240
},
{
"epoch": 20.0,
"learning_rate": 2e-05,
"loss": 1.1314,
"step": 19200
},
{
"epoch": 20.0,
"eval_accuracy": 0.7164889608500034,
"eval_loss": 1.3498369455337524,
"eval_runtime": 51.475,
"eval_samples_per_second": 147.722,
"eval_steps_per_second": 0.932,
"step": 19200
},
{
"epoch": 21.0,
"learning_rate": 2e-05,
"loss": 1.1216,
"step": 20160
},
{
"epoch": 21.0,
"eval_accuracy": 0.7190562640400042,
"eval_loss": 1.3336502313613892,
"eval_runtime": 51.2931,
"eval_samples_per_second": 148.246,
"eval_steps_per_second": 0.936,
"step": 20160
},
{
"epoch": 22.0,
"learning_rate": 2e-05,
"loss": 1.1187,
"step": 21120
},
{
"epoch": 22.0,
"eval_accuracy": 0.7189180295993838,
"eval_loss": 1.3376628160476685,
"eval_runtime": 51.22,
"eval_samples_per_second": 148.458,
"eval_steps_per_second": 0.937,
"step": 21120
},
{
"epoch": 23.0,
"learning_rate": 2e-05,
"loss": 1.1128,
"step": 22080
},
{
"epoch": 23.0,
"eval_accuracy": 0.7185502647071909,
"eval_loss": 1.3377180099487305,
"eval_runtime": 50.9033,
"eval_samples_per_second": 149.381,
"eval_steps_per_second": 0.943,
"step": 22080
},
{
"epoch": 24.0,
"learning_rate": 2e-05,
"loss": 1.1052,
"step": 23040
},
{
"epoch": 24.0,
"eval_accuracy": 0.7208811149658572,
"eval_loss": 1.3223472833633423,
"eval_runtime": 51.1878,
"eval_samples_per_second": 148.551,
"eval_steps_per_second": 0.938,
"step": 23040
},
{
"epoch": 25.0,
"learning_rate": 2e-05,
"loss": 1.0996,
"step": 24000
},
{
"epoch": 25.0,
"eval_accuracy": 0.7216902223097578,
"eval_loss": 1.3264613151550293,
"eval_runtime": 50.8905,
"eval_samples_per_second": 149.419,
"eval_steps_per_second": 0.943,
"step": 24000
},
{
"epoch": 26.0,
"learning_rate": 2e-05,
"loss": 1.0961,
"step": 24960
},
{
"epoch": 26.0,
"eval_accuracy": 0.7211948506436695,
"eval_loss": 1.320527195930481,
"eval_runtime": 50.9638,
"eval_samples_per_second": 149.204,
"eval_steps_per_second": 0.942,
"step": 24960
},
{
"epoch": 27.0,
"learning_rate": 2e-05,
"loss": 1.0902,
"step": 25920
},
{
"epoch": 27.0,
"eval_accuracy": 0.7213838036019521,
"eval_loss": 1.321337342262268,
"eval_runtime": 52.1531,
"eval_samples_per_second": 145.802,
"eval_steps_per_second": 0.92,
"step": 25920
},
{
"epoch": 28.0,
"learning_rate": 2e-05,
"loss": 1.0835,
"step": 26880
},
{
"epoch": 28.0,
"eval_accuracy": 0.7239870388684853,
"eval_loss": 1.3021934032440186,
"eval_runtime": 51.6703,
"eval_samples_per_second": 147.164,
"eval_steps_per_second": 0.929,
"step": 26880
},
{
"epoch": 29.0,
"learning_rate": 2e-05,
"loss": 1.0796,
"step": 27840
},
{
"epoch": 29.0,
"eval_accuracy": 0.7225473777214267,
"eval_loss": 1.3106894493103027,
"eval_runtime": 51.4692,
"eval_samples_per_second": 147.739,
"eval_steps_per_second": 0.933,
"step": 27840
},
{
"epoch": 30.0,
"learning_rate": 2e-05,
"loss": 1.076,
"step": 28800
},
{
"epoch": 30.0,
"eval_accuracy": 0.7246294369637408,
"eval_loss": 1.3005998134613037,
"eval_runtime": 51.1849,
"eval_samples_per_second": 148.56,
"eval_steps_per_second": 0.938,
"step": 28800
},
{
"epoch": 31.0,
"learning_rate": 2e-05,
"loss": 1.0713,
"step": 29760
},
{
"epoch": 31.0,
"eval_accuracy": 0.723824419552894,
"eval_loss": 1.3021259307861328,
"eval_runtime": 51.1204,
"eval_samples_per_second": 148.747,
"eval_steps_per_second": 0.939,
"step": 29760
},
{
"epoch": 32.0,
"learning_rate": 2e-05,
"loss": 1.0679,
"step": 30720
},
{
"epoch": 32.0,
"eval_accuracy": 0.7239979316606835,
"eval_loss": 1.3063867092132568,
"eval_runtime": 50.892,
"eval_samples_per_second": 149.415,
"eval_steps_per_second": 0.943,
"step": 30720
},
{
"epoch": 33.0,
"learning_rate": 2e-05,
"loss": 1.0638,
"step": 31680
},
{
"epoch": 33.0,
"eval_accuracy": 0.7261307848236347,
"eval_loss": 1.2864927053451538,
"eval_runtime": 50.9275,
"eval_samples_per_second": 149.31,
"eval_steps_per_second": 0.943,
"step": 31680
},
{
"epoch": 34.0,
"learning_rate": 2e-05,
"loss": 1.058,
"step": 32640
},
{
"epoch": 34.0,
"eval_accuracy": 0.7236361200229268,
"eval_loss": 1.3007187843322754,
"eval_runtime": 51.9944,
"eval_samples_per_second": 146.246,
"eval_steps_per_second": 0.923,
"step": 32640
},
{
"epoch": 35.0,
"learning_rate": 2e-05,
"loss": 1.0548,
"step": 33600
},
{
"epoch": 35.0,
"eval_accuracy": 0.7257316118449666,
"eval_loss": 1.2933671474456787,
"eval_runtime": 51.2859,
"eval_samples_per_second": 148.267,
"eval_steps_per_second": 0.936,
"step": 33600
}
],
"max_steps": 38400,
"num_train_epochs": 40,
"total_flos": 2041797358387200.0,
"trial_name": null,
"trial_params": null
}