add_BERT_no_pretrain_qqp / trainer_state.json
gokuls's picture
End of training
8d0b2d9
{
"best_metric": 0.5939152240753174,
"best_model_checkpoint": "add_BERT_no_pretrain_qqp/checkpoint-11372",
"epoch": 9.0,
"global_step": 25587,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 3.9200000000000004e-05,
"loss": 0.657,
"step": 2843
},
{
"epoch": 1.0,
"eval_accuracy": 0.6490477368290873,
"eval_combined_score": 0.4049047757310398,
"eval_f1": 0.16076181463299224,
"eval_loss": 0.6437851786613464,
"eval_runtime": 75.6371,
"eval_samples_per_second": 534.526,
"eval_steps_per_second": 4.178,
"step": 2843
},
{
"epoch": 2.0,
"learning_rate": 3.8400000000000005e-05,
"loss": 0.6273,
"step": 5686
},
{
"epoch": 2.0,
"eval_accuracy": 0.6442987880286916,
"eval_combined_score": 0.4180752158744188,
"eval_f1": 0.1918516437201461,
"eval_loss": 0.6302036643028259,
"eval_runtime": 75.8685,
"eval_samples_per_second": 532.896,
"eval_steps_per_second": 4.165,
"step": 5686
},
{
"epoch": 3.0,
"learning_rate": 3.76e-05,
"loss": 0.6273,
"step": 8529
},
{
"epoch": 3.0,
"eval_accuracy": 0.6526589166460549,
"eval_combined_score": 0.5064287794766108,
"eval_f1": 0.36019864230716664,
"eval_loss": 0.6264548897743225,
"eval_runtime": 75.7943,
"eval_samples_per_second": 533.417,
"eval_steps_per_second": 4.169,
"step": 8529
},
{
"epoch": 4.0,
"learning_rate": 3.680000000000001e-05,
"loss": 0.6093,
"step": 11372
},
{
"epoch": 4.0,
"eval_accuracy": 0.6823893148651992,
"eval_combined_score": 0.5764208523272344,
"eval_f1": 0.4704523897892697,
"eval_loss": 0.5939152240753174,
"eval_runtime": 75.7663,
"eval_samples_per_second": 533.615,
"eval_steps_per_second": 4.171,
"step": 11372
},
{
"epoch": 5.0,
"learning_rate": 3.6e-05,
"loss": 0.5932,
"step": 14215
},
{
"epoch": 5.0,
"eval_accuracy": 0.680187979223349,
"eval_combined_score": 0.5486151798731713,
"eval_f1": 0.41704238052299364,
"eval_loss": 0.5961658358573914,
"eval_runtime": 75.7813,
"eval_samples_per_second": 533.509,
"eval_steps_per_second": 4.17,
"step": 14215
},
{
"epoch": 6.0,
"learning_rate": 3.52e-05,
"loss": 0.599,
"step": 17058
},
{
"epoch": 6.0,
"eval_accuracy": 0.675735839722978,
"eval_combined_score": 0.5776042813321474,
"eval_f1": 0.4794727229413166,
"eval_loss": 0.5981380343437195,
"eval_runtime": 75.8673,
"eval_samples_per_second": 532.904,
"eval_steps_per_second": 4.165,
"step": 17058
},
{
"epoch": 7.0,
"learning_rate": 3.44e-05,
"loss": 0.6063,
"step": 19901
},
{
"epoch": 7.0,
"eval_accuracy": 0.6318327974276527,
"eval_combined_score": 0.3159163987138264,
"eval_f1": 0.0,
"eval_loss": 0.6510685086250305,
"eval_runtime": 75.7196,
"eval_samples_per_second": 533.944,
"eval_steps_per_second": 4.173,
"step": 19901
},
{
"epoch": 8.0,
"learning_rate": 3.3600000000000004e-05,
"loss": 0.6264,
"step": 22744
},
{
"epoch": 8.0,
"eval_accuracy": 0.6531783329210982,
"eval_combined_score": 0.4303083615769859,
"eval_f1": 0.2074383902328736,
"eval_loss": 0.6261195540428162,
"eval_runtime": 75.7764,
"eval_samples_per_second": 533.543,
"eval_steps_per_second": 4.17,
"step": 22744
},
{
"epoch": 9.0,
"learning_rate": 3.28e-05,
"loss": 0.6348,
"step": 25587
},
{
"epoch": 9.0,
"eval_accuracy": 0.6318327974276527,
"eval_combined_score": 0.3159163987138264,
"eval_f1": 0.0,
"eval_loss": 0.6774410605430603,
"eval_runtime": 75.9165,
"eval_samples_per_second": 532.559,
"eval_steps_per_second": 4.162,
"step": 25587
},
{
"epoch": 9.0,
"step": 25587,
"total_flos": 4.676933727069143e+17,
"train_loss": 0.6200644320571725,
"train_runtime": 19316.7897,
"train_samples_per_second": 941.787,
"train_steps_per_second": 7.359
}
],
"max_steps": 142150,
"num_train_epochs": 50,
"total_flos": 4.676933727069143e+17,
"trial_name": null,
"trial_params": null
}