|
{ |
|
"best_metric": 0.34253132343292236, |
|
"best_model_checkpoint": "sa_BERT_48_qqp/checkpoint-11373", |
|
"epoch": 8.0, |
|
"global_step": 30328, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.9200000000000004e-05, |
|
"loss": 0.4679, |
|
"step": 3791 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8222359633935197, |
|
"eval_combined_score": 0.796364668588026, |
|
"eval_f1": 0.7704933737825324, |
|
"eval_loss": 0.3794594705104828, |
|
"eval_runtime": 85.3071, |
|
"eval_samples_per_second": 473.935, |
|
"eval_steps_per_second": 4.947, |
|
"step": 3791 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.8400000000000005e-05, |
|
"loss": 0.3469, |
|
"step": 7582 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8446697996537225, |
|
"eval_combined_score": 0.8204728353358471, |
|
"eval_f1": 0.7962758710179718, |
|
"eval_loss": 0.35802027583122253, |
|
"eval_runtime": 85.9214, |
|
"eval_samples_per_second": 470.546, |
|
"eval_steps_per_second": 4.911, |
|
"step": 7582 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.76e-05, |
|
"loss": 0.2868, |
|
"step": 11373 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8510017313875835, |
|
"eval_combined_score": 0.8253212608245042, |
|
"eval_f1": 0.799640790261425, |
|
"eval_loss": 0.34253132343292236, |
|
"eval_runtime": 85.8338, |
|
"eval_samples_per_second": 471.026, |
|
"eval_steps_per_second": 4.916, |
|
"step": 11373 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.680000000000001e-05, |
|
"loss": 0.2372, |
|
"step": 15164 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8560969577046748, |
|
"eval_combined_score": 0.8354877050200858, |
|
"eval_f1": 0.8148784523354969, |
|
"eval_loss": 0.37059277296066284, |
|
"eval_runtime": 85.9472, |
|
"eval_samples_per_second": 470.405, |
|
"eval_steps_per_second": 4.91, |
|
"step": 15164 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.1938, |
|
"step": 18955 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8624783576552065, |
|
"eval_combined_score": 0.8410731175925228, |
|
"eval_f1": 0.819667877529839, |
|
"eval_loss": 0.36789774894714355, |
|
"eval_runtime": 85.3085, |
|
"eval_samples_per_second": 473.927, |
|
"eval_steps_per_second": 4.947, |
|
"step": 18955 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3.52e-05, |
|
"loss": 0.1567, |
|
"step": 22746 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8639376700469948, |
|
"eval_combined_score": 0.8426697862913135, |
|
"eval_f1": 0.8214019025356321, |
|
"eval_loss": 0.42459815740585327, |
|
"eval_runtime": 85.3092, |
|
"eval_samples_per_second": 473.923, |
|
"eval_steps_per_second": 4.947, |
|
"step": 22746 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 3.44e-05, |
|
"loss": 0.1294, |
|
"step": 26537 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8585456344298789, |
|
"eval_combined_score": 0.8387393361321469, |
|
"eval_f1": 0.8189330378344151, |
|
"eval_loss": 0.40472903847694397, |
|
"eval_runtime": 85.2488, |
|
"eval_samples_per_second": 474.259, |
|
"eval_steps_per_second": 4.95, |
|
"step": 26537 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 3.3600000000000004e-05, |
|
"loss": 0.1059, |
|
"step": 30328 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8579025476131585, |
|
"eval_combined_score": 0.8380177209875599, |
|
"eval_f1": 0.8181328943619613, |
|
"eval_loss": 0.5063081383705139, |
|
"eval_runtime": 85.299, |
|
"eval_samples_per_second": 473.98, |
|
"eval_steps_per_second": 4.947, |
|
"step": 30328 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 30328, |
|
"total_flos": 3.8292763616005325e+17, |
|
"train_loss": 0.24057756895378754, |
|
"train_runtime": 19188.7559, |
|
"train_samples_per_second": 948.071, |
|
"train_steps_per_second": 9.878 |
|
} |
|
], |
|
"max_steps": 189550, |
|
"num_train_epochs": 50, |
|
"total_flos": 3.8292763616005325e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|