sa_BERT_no_pretrain_cola / trainer_state.json
gokuls's picture
End of training
3e544bc
{
"best_metric": 0.6180034875869751,
"best_model_checkpoint": "sa_BERT_no_pretrain_cola/checkpoint-335",
"epoch": 10.0,
"global_step": 670,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 0.0004902985074626866,
"loss": 0.8826,
"step": 67
},
{
"epoch": 1.0,
"eval_accuracy": 0.6912751793861389,
"eval_loss": 0.662401020526886,
"eval_matthews_correlation": 0.0,
"eval_runtime": 2.0483,
"eval_samples_per_second": 509.196,
"eval_steps_per_second": 4.394,
"step": 67
},
{
"epoch": 2.0,
"learning_rate": 0.00048029850746268655,
"loss": 0.616,
"step": 134
},
{
"epoch": 2.0,
"eval_accuracy": 0.6912751793861389,
"eval_loss": 0.6358437538146973,
"eval_matthews_correlation": 0.0,
"eval_runtime": 2.0437,
"eval_samples_per_second": 510.344,
"eval_steps_per_second": 4.404,
"step": 134
},
{
"epoch": 3.0,
"learning_rate": 0.0004702985074626866,
"loss": 0.6134,
"step": 201
},
{
"epoch": 3.0,
"eval_accuracy": 0.6912751793861389,
"eval_loss": 0.6195451021194458,
"eval_matthews_correlation": 0.0,
"eval_runtime": 2.0501,
"eval_samples_per_second": 508.758,
"eval_steps_per_second": 4.39,
"step": 201
},
{
"epoch": 4.0,
"learning_rate": 0.0004602985074626866,
"loss": 0.6139,
"step": 268
},
{
"epoch": 4.0,
"eval_accuracy": 0.6912751793861389,
"eval_loss": 0.6284816265106201,
"eval_matthews_correlation": 0.0,
"eval_runtime": 2.0535,
"eval_samples_per_second": 507.913,
"eval_steps_per_second": 4.383,
"step": 268
},
{
"epoch": 5.0,
"learning_rate": 0.0004502985074626866,
"loss": 0.6117,
"step": 335
},
{
"epoch": 5.0,
"eval_accuracy": 0.6912751793861389,
"eval_loss": 0.6180034875869751,
"eval_matthews_correlation": 0.0,
"eval_runtime": 2.044,
"eval_samples_per_second": 510.27,
"eval_steps_per_second": 4.403,
"step": 335
},
{
"epoch": 6.0,
"learning_rate": 0.00044029850746268656,
"loss": 0.6099,
"step": 402
},
{
"epoch": 6.0,
"eval_accuracy": 0.6912751793861389,
"eval_loss": 0.618323028087616,
"eval_matthews_correlation": 0.0,
"eval_runtime": 2.045,
"eval_samples_per_second": 510.017,
"eval_steps_per_second": 4.401,
"step": 402
},
{
"epoch": 7.0,
"learning_rate": 0.0004302985074626866,
"loss": 0.6113,
"step": 469
},
{
"epoch": 7.0,
"eval_accuracy": 0.6912751793861389,
"eval_loss": 0.6232409477233887,
"eval_matthews_correlation": 0.0,
"eval_runtime": 2.0445,
"eval_samples_per_second": 510.158,
"eval_steps_per_second": 4.402,
"step": 469
},
{
"epoch": 8.0,
"learning_rate": 0.00042029850746268656,
"loss": 0.6135,
"step": 536
},
{
"epoch": 8.0,
"eval_accuracy": 0.6912751793861389,
"eval_loss": 0.6182034015655518,
"eval_matthews_correlation": 0.0,
"eval_runtime": 2.0437,
"eval_samples_per_second": 510.344,
"eval_steps_per_second": 4.404,
"step": 536
},
{
"epoch": 9.0,
"learning_rate": 0.0004102985074626866,
"loss": 0.6094,
"step": 603
},
{
"epoch": 9.0,
"eval_accuracy": 0.6912751793861389,
"eval_loss": 0.6220687031745911,
"eval_matthews_correlation": 0.0,
"eval_runtime": 2.0453,
"eval_samples_per_second": 509.941,
"eval_steps_per_second": 4.4,
"step": 603
},
{
"epoch": 10.0,
"learning_rate": 0.0004002985074626866,
"loss": 0.6096,
"step": 670
},
{
"epoch": 10.0,
"eval_accuracy": 0.6912751793861389,
"eval_loss": 0.6310281157493591,
"eval_matthews_correlation": 0.0,
"eval_runtime": 2.0508,
"eval_samples_per_second": 508.571,
"eval_steps_per_second": 4.388,
"step": 670
},
{
"epoch": 10.0,
"step": 670,
"total_flos": 1.311463297777664e+16,
"train_loss": 0.6391209616589902,
"train_runtime": 614.413,
"train_samples_per_second": 695.867,
"train_steps_per_second": 5.452
}
],
"max_steps": 3350,
"num_train_epochs": 50,
"total_flos": 1.311463297777664e+16,
"trial_name": null,
"trial_params": null
}