hBERTv1_mnli / trainer_state.json
gokuls's picture
End of training
2e11768
{
"best_metric": 1.098163366317749,
"best_model_checkpoint": "hBERTv1_mnli/checkpoint-19942",
"epoch": 18.0,
"global_step": 27612,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 4.9e-05,
"loss": 1.1001,
"step": 1534
},
{
"epoch": 1.0,
"eval_accuracy": 0.31818644931227713,
"eval_loss": 1.0993714332580566,
"eval_runtime": 12.6071,
"eval_samples_per_second": 778.528,
"eval_steps_per_second": 3.093,
"step": 1534
},
{
"epoch": 2.0,
"learning_rate": 4.8e-05,
"loss": 1.0988,
"step": 3068
},
{
"epoch": 2.0,
"eval_accuracy": 0.31818644931227713,
"eval_loss": 1.0989975929260254,
"eval_runtime": 12.5819,
"eval_samples_per_second": 780.088,
"eval_steps_per_second": 3.1,
"step": 3068
},
{
"epoch": 3.0,
"learning_rate": 4.7e-05,
"loss": 1.0987,
"step": 4602
},
{
"epoch": 3.0,
"eval_accuracy": 0.3273560876209883,
"eval_loss": 1.099189043045044,
"eval_runtime": 12.6822,
"eval_samples_per_second": 773.921,
"eval_steps_per_second": 3.075,
"step": 4602
},
{
"epoch": 4.0,
"learning_rate": 4.600000000000001e-05,
"loss": 1.0987,
"step": 6136
},
{
"epoch": 4.0,
"eval_accuracy": 0.3273560876209883,
"eval_loss": 1.0986238718032837,
"eval_runtime": 12.5133,
"eval_samples_per_second": 784.368,
"eval_steps_per_second": 3.117,
"step": 6136
},
{
"epoch": 5.0,
"learning_rate": 4.50006518904824e-05,
"loss": 1.0987,
"step": 7670
},
{
"epoch": 5.0,
"eval_accuracy": 0.3544574630667346,
"eval_loss": 1.0984646081924438,
"eval_runtime": 12.6008,
"eval_samples_per_second": 778.92,
"eval_steps_per_second": 3.095,
"step": 7670
},
{
"epoch": 6.0,
"learning_rate": 4.40013037809648e-05,
"loss": 1.0986,
"step": 9204
},
{
"epoch": 6.0,
"eval_accuracy": 0.3273560876209883,
"eval_loss": 1.0987476110458374,
"eval_runtime": 12.4519,
"eval_samples_per_second": 788.232,
"eval_steps_per_second": 3.132,
"step": 9204
},
{
"epoch": 7.0,
"learning_rate": 4.3003259452411996e-05,
"loss": 1.105,
"step": 10738
},
{
"epoch": 7.0,
"eval_accuracy": 0.3273560876209883,
"eval_loss": 1.0986319780349731,
"eval_runtime": 12.6436,
"eval_samples_per_second": 776.285,
"eval_steps_per_second": 3.085,
"step": 10738
},
{
"epoch": 8.0,
"learning_rate": 4.2003259452412e-05,
"loss": 1.1045,
"step": 12272
},
{
"epoch": 8.0,
"eval_accuracy": 0.31818644931227713,
"eval_loss": 1.0985896587371826,
"eval_runtime": 12.6272,
"eval_samples_per_second": 777.291,
"eval_steps_per_second": 3.089,
"step": 12272
},
{
"epoch": 9.0,
"learning_rate": 4.1003259452411995e-05,
"loss": 1.0988,
"step": 13806
},
{
"epoch": 9.0,
"eval_accuracy": 0.3273560876209883,
"eval_loss": 1.0983130931854248,
"eval_runtime": 12.4879,
"eval_samples_per_second": 785.962,
"eval_steps_per_second": 3.123,
"step": 13806
},
{
"epoch": 10.0,
"learning_rate": 4.0003259452412e-05,
"loss": 1.0987,
"step": 15340
},
{
"epoch": 10.0,
"eval_accuracy": 0.31818644931227713,
"eval_loss": 1.098677158355713,
"eval_runtime": 12.4301,
"eval_samples_per_second": 789.616,
"eval_steps_per_second": 3.138,
"step": 15340
},
{
"epoch": 11.0,
"learning_rate": 3.9003259452411994e-05,
"loss": 1.0987,
"step": 16874
},
{
"epoch": 11.0,
"eval_accuracy": 0.31818644931227713,
"eval_loss": 1.0991103649139404,
"eval_runtime": 12.5586,
"eval_samples_per_second": 781.533,
"eval_steps_per_second": 3.105,
"step": 16874
},
{
"epoch": 12.0,
"learning_rate": 3.8003911342894394e-05,
"loss": 1.0986,
"step": 18408
},
{
"epoch": 12.0,
"eval_accuracy": 0.3544574630667346,
"eval_loss": 1.0986063480377197,
"eval_runtime": 12.5872,
"eval_samples_per_second": 779.762,
"eval_steps_per_second": 3.098,
"step": 18408
},
{
"epoch": 13.0,
"learning_rate": 3.700391134289439e-05,
"loss": 1.0986,
"step": 19942
},
{
"epoch": 13.0,
"eval_accuracy": 0.3544574630667346,
"eval_loss": 1.098163366317749,
"eval_runtime": 12.4723,
"eval_samples_per_second": 786.944,
"eval_steps_per_second": 3.127,
"step": 19942
},
{
"epoch": 14.0,
"learning_rate": 3.600456323337679e-05,
"loss": 1.0986,
"step": 21476
},
{
"epoch": 14.0,
"eval_accuracy": 0.3544574630667346,
"eval_loss": 1.0988693237304688,
"eval_runtime": 12.5684,
"eval_samples_per_second": 780.924,
"eval_steps_per_second": 3.103,
"step": 21476
},
{
"epoch": 15.0,
"learning_rate": 3.500521512385919e-05,
"loss": 1.0986,
"step": 23010
},
{
"epoch": 15.0,
"eval_accuracy": 0.31818644931227713,
"eval_loss": 1.098739504814148,
"eval_runtime": 12.6125,
"eval_samples_per_second": 778.196,
"eval_steps_per_second": 3.092,
"step": 23010
},
{
"epoch": 16.0,
"learning_rate": 3.4005215123859194e-05,
"loss": 1.0986,
"step": 24544
},
{
"epoch": 16.0,
"eval_accuracy": 0.3544574630667346,
"eval_loss": 1.0985974073410034,
"eval_runtime": 12.4698,
"eval_samples_per_second": 787.103,
"eval_steps_per_second": 3.128,
"step": 24544
},
{
"epoch": 17.0,
"learning_rate": 3.300586701434159e-05,
"loss": 1.0986,
"step": 26078
},
{
"epoch": 17.0,
"eval_accuracy": 0.3544574630667346,
"eval_loss": 1.098615288734436,
"eval_runtime": 12.5264,
"eval_samples_per_second": 783.545,
"eval_steps_per_second": 3.113,
"step": 26078
},
{
"epoch": 18.0,
"learning_rate": 3.200586701434159e-05,
"loss": 1.0986,
"step": 27612
},
{
"epoch": 18.0,
"eval_accuracy": 0.31818644931227713,
"eval_loss": 1.0982955694198608,
"eval_runtime": 12.8565,
"eval_samples_per_second": 763.425,
"eval_steps_per_second": 3.033,
"step": 27612
},
{
"epoch": 18.0,
"step": 27612,
"total_flos": 8.928550500811407e+17,
"train_loss": 1.0994189529164742,
"train_runtime": 25959.7521,
"train_samples_per_second": 756.367,
"train_steps_per_second": 2.955
}
],
"max_steps": 76700,
"num_train_epochs": 50,
"total_flos": 8.928550500811407e+17,
"trial_name": null,
"trial_params": null
}