sentiment-pt-pl30-4 / trainer_state.json
apwic's picture
End of training
7c83ae7 verified
raw
history blame contribute delete
No virus
11 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 3.639183521270752,
"learning_rate": 4.75e-05,
"loss": 0.5413,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.7243107769423559,
"eval_f1": 0.6445578231292517,
"eval_loss": 0.5006802678108215,
"eval_precision": 0.6593400801180687,
"eval_recall": 0.6374340789234406,
"eval_runtime": 1.8003,
"eval_samples_per_second": 221.628,
"eval_steps_per_second": 27.773,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 3.4983344078063965,
"learning_rate": 4.5e-05,
"loss": 0.4584,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.8295739348370927,
"eval_f1": 0.7760942760942761,
"eval_loss": 0.3855762183666229,
"eval_precision": 0.8122789566755084,
"eval_recall": 0.7569103473358793,
"eval_runtime": 1.8373,
"eval_samples_per_second": 217.166,
"eval_steps_per_second": 27.214,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 5.97755765914917,
"learning_rate": 4.25e-05,
"loss": 0.3559,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.8571428571428571,
"eval_f1": 0.8079089841803424,
"eval_loss": 0.3407064378261566,
"eval_precision": 0.8638322884012539,
"eval_recall": 0.7814148026913984,
"eval_runtime": 1.8397,
"eval_samples_per_second": 216.877,
"eval_steps_per_second": 27.178,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 2.0363476276397705,
"learning_rate": 4e-05,
"loss": 0.2961,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.8696741854636592,
"eval_f1": 0.8419946387230413,
"eval_loss": 0.3088829517364502,
"eval_precision": 0.8437691365584814,
"eval_recall": 0.8402891434806329,
"eval_runtime": 1.8466,
"eval_samples_per_second": 216.069,
"eval_steps_per_second": 27.076,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 2.3318538665771484,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.276,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.8621553884711779,
"eval_f1": 0.8365204824303285,
"eval_loss": 0.29173794388771057,
"eval_precision": 0.8313636363636363,
"eval_recall": 0.8424713584288053,
"eval_runtime": 1.8414,
"eval_samples_per_second": 216.677,
"eval_steps_per_second": 27.153,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 2.3067033290863037,
"learning_rate": 3.5e-05,
"loss": 0.2555,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.8696741854636592,
"eval_f1": 0.842789598108747,
"eval_loss": 0.29054704308509827,
"eval_precision": 0.842789598108747,
"eval_recall": 0.842789598108747,
"eval_runtime": 1.847,
"eval_samples_per_second": 216.025,
"eval_steps_per_second": 27.071,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 1.5437530279159546,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.2427,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.8771929824561403,
"eval_f1": 0.84402249790578,
"eval_loss": 0.30313801765441895,
"eval_precision": 0.8669909824394875,
"eval_recall": 0.8281051100200036,
"eval_runtime": 1.8479,
"eval_samples_per_second": 215.922,
"eval_steps_per_second": 27.058,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 3.033709764480591,
"learning_rate": 3e-05,
"loss": 0.2219,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.8771929824561403,
"eval_f1": 0.8522278069611882,
"eval_loss": 0.2907596528530121,
"eval_precision": 0.8513631702756499,
"eval_recall": 0.8531096563011457,
"eval_runtime": 1.8468,
"eval_samples_per_second": 216.053,
"eval_steps_per_second": 27.074,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 2.670888900756836,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.2158,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.8847117794486216,
"eval_f1": 0.8539996181748759,
"eval_loss": 0.3083769381046295,
"eval_precision": 0.8759595959595959,
"eval_recall": 0.8384251682124022,
"eval_runtime": 1.856,
"eval_samples_per_second": 214.979,
"eval_steps_per_second": 26.94,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 3.6058239936828613,
"learning_rate": 2.5e-05,
"loss": 0.2,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.87468671679198,
"eval_f1": 0.8517301860990547,
"eval_loss": 0.29381993412971497,
"eval_precision": 0.8456788321167883,
"eval_recall": 0.8588379705400981,
"eval_runtime": 1.8468,
"eval_samples_per_second": 216.045,
"eval_steps_per_second": 27.073,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 3.181007146835327,
"learning_rate": 2.25e-05,
"loss": 0.1885,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.8771929824561403,
"eval_f1": 0.8514869535493182,
"eval_loss": 0.2976568639278412,
"eval_precision": 0.8523821128305106,
"eval_recall": 0.8506092016730314,
"eval_runtime": 1.8451,
"eval_samples_per_second": 216.253,
"eval_steps_per_second": 27.099,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 3.1322133541107178,
"learning_rate": 2e-05,
"loss": 0.183,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.8847117794486216,
"eval_f1": 0.8556621579112929,
"eval_loss": 0.30698344111442566,
"eval_precision": 0.871654421411703,
"eval_recall": 0.8434260774686306,
"eval_runtime": 1.8533,
"eval_samples_per_second": 215.295,
"eval_steps_per_second": 26.979,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 1.6321003437042236,
"learning_rate": 1.75e-05,
"loss": 0.1752,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.8796992481203008,
"eval_f1": 0.8569892473118279,
"eval_loss": 0.29585033655166626,
"eval_precision": 0.8522004241781549,
"eval_recall": 0.8623840698308783,
"eval_runtime": 1.846,
"eval_samples_per_second": 216.146,
"eval_steps_per_second": 27.086,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 3.546229600906372,
"learning_rate": 1.5e-05,
"loss": 0.1558,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.87468671679198,
"eval_f1": 0.8530841286673736,
"eval_loss": 0.3040328025817871,
"eval_precision": 0.8446597760551249,
"eval_recall": 0.8638388797963266,
"eval_runtime": 1.8508,
"eval_samples_per_second": 215.586,
"eval_steps_per_second": 27.016,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 0.5455009937286377,
"learning_rate": 1.25e-05,
"loss": 0.1538,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.8721804511278195,
"eval_f1": 0.8484099018899409,
"eval_loss": 0.30823931097984314,
"eval_precision": 0.8430645161290322,
"eval_recall": 0.8545644662665939,
"eval_runtime": 1.848,
"eval_samples_per_second": 215.904,
"eval_steps_per_second": 27.056,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 2.4319658279418945,
"learning_rate": 1e-05,
"loss": 0.152,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.8771929824561403,
"eval_f1": 0.8483536940081443,
"eval_loss": 0.30997762084007263,
"eval_precision": 0.8575792287132493,
"eval_recall": 0.8406073831605747,
"eval_runtime": 1.8455,
"eval_samples_per_second": 216.202,
"eval_steps_per_second": 27.093,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 3.420119285583496,
"learning_rate": 7.5e-06,
"loss": 0.1436,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.87468671679198,
"eval_f1": 0.8510304659498208,
"eval_loss": 0.31050172448158264,
"eval_precision": 0.8463237893248498,
"eval_recall": 0.8563375159119839,
"eval_runtime": 1.8478,
"eval_samples_per_second": 215.931,
"eval_steps_per_second": 27.059,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 0.7008257508277893,
"learning_rate": 5e-06,
"loss": 0.1426,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.87468671679198,
"eval_f1": 0.8503151260504201,
"eval_loss": 0.3118613362312317,
"eval_precision": 0.8470628455912955,
"eval_recall": 0.8538370612838698,
"eval_runtime": 1.8481,
"eval_samples_per_second": 215.903,
"eval_steps_per_second": 27.056,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 2.8680572509765625,
"learning_rate": 2.5e-06,
"loss": 0.1398,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.8796992481203008,
"eval_f1": 0.8569892473118279,
"eval_loss": 0.316354900598526,
"eval_precision": 0.8522004241781549,
"eval_recall": 0.8623840698308783,
"eval_runtime": 1.8465,
"eval_samples_per_second": 216.08,
"eval_steps_per_second": 27.078,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 3.442784547805786,
"learning_rate": 0.0,
"loss": 0.14,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.8796992481203008,
"eval_f1": 0.8563025210084034,
"eval_loss": 0.31680676341056824,
"eval_precision": 0.8529936381473334,
"eval_recall": 0.8598836152027641,
"eval_runtime": 1.8511,
"eval_samples_per_second": 215.549,
"eval_steps_per_second": 27.011,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 8460375995160000.0,
"train_loss": 0.2318932650519199,
"train_runtime": 643.2373,
"train_samples_per_second": 113.333,
"train_steps_per_second": 3.793
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 8460375995160000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}