roberta-base-mnli_CollSgE / trainer_state.json
liuyanchen1015's picture
Training in progress, step 2000
3f4b0bd
{
"best_metric": 0.8503582395087002,
"best_model_checkpoint": "./outputs/finetuning/mnli_CollSgE/checkpoint-48000",
"epoch": 5.0,
"global_step": 59960,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.17,
"learning_rate": 1.933288859239493e-05,
"loss": 0.4123,
"step": 2000
},
{
"epoch": 0.17,
"eval_acc": 0.8331627430910952,
"eval_loss": 0.46929866075515747,
"eval_runtime": 16.6562,
"eval_samples_per_second": 586.569,
"eval_steps_per_second": 18.372,
"step": 2000
},
{
"epoch": 0.33,
"learning_rate": 1.866577718478986e-05,
"loss": 0.4028,
"step": 4000
},
{
"epoch": 0.33,
"eval_acc": 0.8337768679631525,
"eval_loss": 0.46244016289711,
"eval_runtime": 16.6313,
"eval_samples_per_second": 587.445,
"eval_steps_per_second": 18.399,
"step": 4000
},
{
"epoch": 0.5,
"learning_rate": 1.7998665777184793e-05,
"loss": 0.3888,
"step": 6000
},
{
"epoch": 0.5,
"eval_acc": 0.8374616171954964,
"eval_loss": 0.45001623034477234,
"eval_runtime": 16.6286,
"eval_samples_per_second": 587.542,
"eval_steps_per_second": 18.402,
"step": 6000
},
{
"epoch": 0.67,
"learning_rate": 1.7331554369579722e-05,
"loss": 0.3841,
"step": 8000
},
{
"epoch": 0.67,
"eval_acc": 0.8415557830092119,
"eval_loss": 0.4280681312084198,
"eval_runtime": 16.6312,
"eval_samples_per_second": 587.451,
"eval_steps_per_second": 18.399,
"step": 8000
},
{
"epoch": 0.83,
"learning_rate": 1.666444296197465e-05,
"loss": 0.3783,
"step": 10000
},
{
"epoch": 0.83,
"eval_acc": 0.8365404298874104,
"eval_loss": 0.4433709681034088,
"eval_runtime": 16.6487,
"eval_samples_per_second": 586.832,
"eval_steps_per_second": 18.38,
"step": 10000
},
{
"epoch": 1.0,
"learning_rate": 1.599733155436958e-05,
"loss": 0.3759,
"step": 12000
},
{
"epoch": 1.0,
"eval_acc": 0.8417604912998976,
"eval_loss": 0.4399653375148773,
"eval_runtime": 16.6479,
"eval_samples_per_second": 586.862,
"eval_steps_per_second": 18.381,
"step": 12000
},
{
"epoch": 1.17,
"learning_rate": 1.533022014676451e-05,
"loss": 0.2721,
"step": 14000
},
{
"epoch": 1.17,
"eval_acc": 0.8426816786079836,
"eval_loss": 0.5021729469299316,
"eval_runtime": 16.6186,
"eval_samples_per_second": 587.897,
"eval_steps_per_second": 18.413,
"step": 14000
},
{
"epoch": 1.33,
"learning_rate": 1.4663108739159441e-05,
"loss": 0.2736,
"step": 16000
},
{
"epoch": 1.33,
"eval_acc": 0.8430910951893552,
"eval_loss": 0.5251772403717041,
"eval_runtime": 16.6517,
"eval_samples_per_second": 586.728,
"eval_steps_per_second": 18.377,
"step": 16000
},
{
"epoch": 1.5,
"learning_rate": 1.3995997331554372e-05,
"loss": 0.2821,
"step": 18000
},
{
"epoch": 1.5,
"eval_acc": 0.8409416581371546,
"eval_loss": 0.48868492245674133,
"eval_runtime": 16.6338,
"eval_samples_per_second": 587.358,
"eval_steps_per_second": 18.396,
"step": 18000
},
{
"epoch": 1.67,
"learning_rate": 1.33288859239493e-05,
"loss": 0.2802,
"step": 20000
},
{
"epoch": 1.67,
"eval_acc": 0.8457523029682702,
"eval_loss": 0.4757882356643677,
"eval_runtime": 16.6205,
"eval_samples_per_second": 587.829,
"eval_steps_per_second": 18.411,
"step": 20000
},
{
"epoch": 1.83,
"learning_rate": 1.2661774516344229e-05,
"loss": 0.2794,
"step": 22000
},
{
"epoch": 1.83,
"eval_acc": 0.8457523029682702,
"eval_loss": 0.4611198604106903,
"eval_runtime": 16.6377,
"eval_samples_per_second": 587.22,
"eval_steps_per_second": 18.392,
"step": 22000
},
{
"epoch": 2.0,
"learning_rate": 1.199466310873916e-05,
"loss": 0.2797,
"step": 24000
},
{
"epoch": 2.0,
"eval_acc": 0.8456499488229273,
"eval_loss": 0.49363580346107483,
"eval_runtime": 16.6189,
"eval_samples_per_second": 587.883,
"eval_steps_per_second": 18.413,
"step": 24000
},
{
"epoch": 2.17,
"learning_rate": 1.132755170113409e-05,
"loss": 0.1915,
"step": 26000
},
{
"epoch": 2.17,
"eval_acc": 0.8461617195496418,
"eval_loss": 0.5545384883880615,
"eval_runtime": 16.6314,
"eval_samples_per_second": 587.443,
"eval_steps_per_second": 18.399,
"step": 26000
},
{
"epoch": 2.33,
"learning_rate": 1.066044029352902e-05,
"loss": 0.1946,
"step": 28000
},
{
"epoch": 2.33,
"eval_acc": 0.8443193449334698,
"eval_loss": 0.5731399059295654,
"eval_runtime": 16.6331,
"eval_samples_per_second": 587.383,
"eval_steps_per_second": 18.397,
"step": 28000
},
{
"epoch": 2.5,
"learning_rate": 9.99332888592395e-06,
"loss": 0.2007,
"step": 30000
},
{
"epoch": 2.5,
"eval_acc": 0.8427840327533265,
"eval_loss": 0.5507048964500427,
"eval_runtime": 16.6762,
"eval_samples_per_second": 585.864,
"eval_steps_per_second": 18.349,
"step": 30000
},
{
"epoch": 2.67,
"learning_rate": 9.326217478318879e-06,
"loss": 0.2008,
"step": 32000
},
{
"epoch": 2.67,
"eval_acc": 0.8454452405322416,
"eval_loss": 0.5498657822608948,
"eval_runtime": 16.6352,
"eval_samples_per_second": 587.307,
"eval_steps_per_second": 18.395,
"step": 32000
},
{
"epoch": 2.84,
"learning_rate": 8.65910607071381e-06,
"loss": 0.1971,
"step": 34000
},
{
"epoch": 2.84,
"eval_acc": 0.8483111566018424,
"eval_loss": 0.5273796916007996,
"eval_runtime": 16.6214,
"eval_samples_per_second": 587.797,
"eval_steps_per_second": 18.41,
"step": 34000
},
{
"epoch": 3.0,
"learning_rate": 7.99199466310874e-06,
"loss": 0.2054,
"step": 36000
},
{
"epoch": 3.0,
"eval_acc": 0.8475946775844422,
"eval_loss": 0.5454184412956238,
"eval_runtime": 16.6713,
"eval_samples_per_second": 586.039,
"eval_steps_per_second": 18.355,
"step": 36000
},
{
"epoch": 3.17,
"learning_rate": 7.324883255503669e-06,
"loss": 0.1436,
"step": 38000
},
{
"epoch": 3.17,
"eval_acc": 0.844216990788127,
"eval_loss": 0.6787278652191162,
"eval_runtime": 16.636,
"eval_samples_per_second": 587.281,
"eval_steps_per_second": 18.394,
"step": 38000
},
{
"epoch": 3.34,
"learning_rate": 6.6577718478985995e-06,
"loss": 0.1426,
"step": 40000
},
{
"epoch": 3.34,
"eval_acc": 0.8420675537359263,
"eval_loss": 0.6932513117790222,
"eval_runtime": 16.6446,
"eval_samples_per_second": 586.978,
"eval_steps_per_second": 18.384,
"step": 40000
},
{
"epoch": 3.5,
"learning_rate": 5.99066044029353e-06,
"loss": 0.1463,
"step": 42000
},
{
"epoch": 3.5,
"eval_acc": 0.8455475946775844,
"eval_loss": 0.65469890832901,
"eval_runtime": 16.6534,
"eval_samples_per_second": 586.666,
"eval_steps_per_second": 18.375,
"step": 42000
},
{
"epoch": 3.67,
"learning_rate": 5.32354903268846e-06,
"loss": 0.1447,
"step": 44000
},
{
"epoch": 3.67,
"eval_acc": 0.8438075742067553,
"eval_loss": 0.646902859210968,
"eval_runtime": 16.6236,
"eval_samples_per_second": 587.717,
"eval_steps_per_second": 18.408,
"step": 44000
},
{
"epoch": 3.84,
"learning_rate": 4.656437625083389e-06,
"loss": 0.1445,
"step": 46000
},
{
"epoch": 3.84,
"eval_acc": 0.8471852610030707,
"eval_loss": 0.6626366376876831,
"eval_runtime": 16.6568,
"eval_samples_per_second": 586.548,
"eval_steps_per_second": 18.371,
"step": 46000
},
{
"epoch": 4.0,
"learning_rate": 3.989326217478319e-06,
"loss": 0.1457,
"step": 48000
},
{
"epoch": 4.0,
"eval_acc": 0.8503582395087002,
"eval_loss": 0.6494254469871521,
"eval_runtime": 16.658,
"eval_samples_per_second": 586.503,
"eval_steps_per_second": 18.37,
"step": 48000
},
{
"epoch": 4.17,
"learning_rate": 3.3222148098732494e-06,
"loss": 0.1133,
"step": 50000
},
{
"epoch": 4.17,
"eval_acc": 0.8458546571136131,
"eval_loss": 0.7663929462432861,
"eval_runtime": 16.6347,
"eval_samples_per_second": 587.326,
"eval_steps_per_second": 18.395,
"step": 50000
},
{
"epoch": 4.34,
"learning_rate": 2.6551034022681787e-06,
"loss": 0.1138,
"step": 52000
},
{
"epoch": 4.34,
"eval_acc": 0.8452405322415558,
"eval_loss": 0.7856881022453308,
"eval_runtime": 16.65,
"eval_samples_per_second": 586.787,
"eval_steps_per_second": 18.378,
"step": 52000
},
{
"epoch": 4.5,
"learning_rate": 1.987991994663109e-06,
"loss": 0.1154,
"step": 54000
},
{
"epoch": 4.5,
"eval_acc": 0.848618219037871,
"eval_loss": 0.7623312473297119,
"eval_runtime": 16.6261,
"eval_samples_per_second": 587.63,
"eval_steps_per_second": 18.405,
"step": 54000
},
{
"epoch": 4.67,
"learning_rate": 1.3208805870580388e-06,
"loss": 0.1102,
"step": 56000
},
{
"epoch": 4.67,
"eval_acc": 0.845957011258956,
"eval_loss": 0.7739743590354919,
"eval_runtime": 16.636,
"eval_samples_per_second": 587.28,
"eval_steps_per_second": 18.394,
"step": 56000
},
{
"epoch": 4.84,
"learning_rate": 6.537691794529687e-07,
"loss": 0.1143,
"step": 58000
},
{
"epoch": 4.84,
"eval_acc": 0.8445240532241556,
"eval_loss": 0.7610092759132385,
"eval_runtime": 16.6493,
"eval_samples_per_second": 586.812,
"eval_steps_per_second": 18.379,
"step": 58000
},
{
"epoch": 5.0,
"step": 59960,
"total_flos": 1.2620432416386816e+17,
"train_loss": 0.22490199587200385,
"train_runtime": 11405.3413,
"train_samples_per_second": 168.222,
"train_steps_per_second": 5.257
}
],
"max_steps": 59960,
"num_train_epochs": 5,
"total_flos": 1.2620432416386816e+17,
"trial_name": null,
"trial_params": null
}