sa_BERT_48_qqp / trainer_state.json
gokuls's picture
End of training
99627b7
{
"best_metric": 0.34253132343292236,
"best_model_checkpoint": "sa_BERT_48_qqp/checkpoint-11373",
"epoch": 8.0,
"global_step": 30328,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 3.9200000000000004e-05,
"loss": 0.4679,
"step": 3791
},
{
"epoch": 1.0,
"eval_accuracy": 0.8222359633935197,
"eval_combined_score": 0.796364668588026,
"eval_f1": 0.7704933737825324,
"eval_loss": 0.3794594705104828,
"eval_runtime": 85.3071,
"eval_samples_per_second": 473.935,
"eval_steps_per_second": 4.947,
"step": 3791
},
{
"epoch": 2.0,
"learning_rate": 3.8400000000000005e-05,
"loss": 0.3469,
"step": 7582
},
{
"epoch": 2.0,
"eval_accuracy": 0.8446697996537225,
"eval_combined_score": 0.8204728353358471,
"eval_f1": 0.7962758710179718,
"eval_loss": 0.35802027583122253,
"eval_runtime": 85.9214,
"eval_samples_per_second": 470.546,
"eval_steps_per_second": 4.911,
"step": 7582
},
{
"epoch": 3.0,
"learning_rate": 3.76e-05,
"loss": 0.2868,
"step": 11373
},
{
"epoch": 3.0,
"eval_accuracy": 0.8510017313875835,
"eval_combined_score": 0.8253212608245042,
"eval_f1": 0.799640790261425,
"eval_loss": 0.34253132343292236,
"eval_runtime": 85.8338,
"eval_samples_per_second": 471.026,
"eval_steps_per_second": 4.916,
"step": 11373
},
{
"epoch": 4.0,
"learning_rate": 3.680000000000001e-05,
"loss": 0.2372,
"step": 15164
},
{
"epoch": 4.0,
"eval_accuracy": 0.8560969577046748,
"eval_combined_score": 0.8354877050200858,
"eval_f1": 0.8148784523354969,
"eval_loss": 0.37059277296066284,
"eval_runtime": 85.9472,
"eval_samples_per_second": 470.405,
"eval_steps_per_second": 4.91,
"step": 15164
},
{
"epoch": 5.0,
"learning_rate": 3.6e-05,
"loss": 0.1938,
"step": 18955
},
{
"epoch": 5.0,
"eval_accuracy": 0.8624783576552065,
"eval_combined_score": 0.8410731175925228,
"eval_f1": 0.819667877529839,
"eval_loss": 0.36789774894714355,
"eval_runtime": 85.3085,
"eval_samples_per_second": 473.927,
"eval_steps_per_second": 4.947,
"step": 18955
},
{
"epoch": 6.0,
"learning_rate": 3.52e-05,
"loss": 0.1567,
"step": 22746
},
{
"epoch": 6.0,
"eval_accuracy": 0.8639376700469948,
"eval_combined_score": 0.8426697862913135,
"eval_f1": 0.8214019025356321,
"eval_loss": 0.42459815740585327,
"eval_runtime": 85.3092,
"eval_samples_per_second": 473.923,
"eval_steps_per_second": 4.947,
"step": 22746
},
{
"epoch": 7.0,
"learning_rate": 3.44e-05,
"loss": 0.1294,
"step": 26537
},
{
"epoch": 7.0,
"eval_accuracy": 0.8585456344298789,
"eval_combined_score": 0.8387393361321469,
"eval_f1": 0.8189330378344151,
"eval_loss": 0.40472903847694397,
"eval_runtime": 85.2488,
"eval_samples_per_second": 474.259,
"eval_steps_per_second": 4.95,
"step": 26537
},
{
"epoch": 8.0,
"learning_rate": 3.3600000000000004e-05,
"loss": 0.1059,
"step": 30328
},
{
"epoch": 8.0,
"eval_accuracy": 0.8579025476131585,
"eval_combined_score": 0.8380177209875599,
"eval_f1": 0.8181328943619613,
"eval_loss": 0.5063081383705139,
"eval_runtime": 85.299,
"eval_samples_per_second": 473.98,
"eval_steps_per_second": 4.947,
"step": 30328
},
{
"epoch": 8.0,
"step": 30328,
"total_flos": 3.8292763616005325e+17,
"train_loss": 0.24057756895378754,
"train_runtime": 19188.7559,
"train_samples_per_second": 948.071,
"train_steps_per_second": 9.878
}
],
"max_steps": 189550,
"num_train_epochs": 50,
"total_flos": 3.8292763616005325e+17,
"trial_name": null,
"trial_params": null
}