hBERTv1_no_pretrain_qnli / trainer_state.json
gokuls's picture
End of training
4cacfd0
{
"best_metric": 0.6930651664733887,
"best_model_checkpoint": "hBERTv1_no_pretrain_qnli/checkpoint-9009",
"epoch": 16.0,
"global_step": 13104,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 0.00049002442002442,
"loss": 0.715,
"step": 819
},
{
"epoch": 1.0,
"eval_accuracy": 0.4946000366099213,
"eval_loss": 0.6931389570236206,
"eval_runtime": 9.5675,
"eval_samples_per_second": 570.995,
"eval_steps_per_second": 4.494,
"step": 819
},
{
"epoch": 2.0,
"learning_rate": 0.00048002442002442004,
"loss": 0.6932,
"step": 1638
},
{
"epoch": 2.0,
"eval_accuracy": 0.4946000366099213,
"eval_loss": 0.6931495070457458,
"eval_runtime": 9.5694,
"eval_samples_per_second": 570.88,
"eval_steps_per_second": 4.493,
"step": 1638
},
{
"epoch": 3.0,
"learning_rate": 0.00047002442002442,
"loss": 0.6936,
"step": 2457
},
{
"epoch": 3.0,
"eval_accuracy": 0.5053999633900788,
"eval_loss": 0.6931167840957642,
"eval_runtime": 9.5644,
"eval_samples_per_second": 571.18,
"eval_steps_per_second": 4.496,
"step": 2457
},
{
"epoch": 4.0,
"learning_rate": 0.00046002442002442004,
"loss": 0.6932,
"step": 3276
},
{
"epoch": 4.0,
"eval_accuracy": 0.4946000366099213,
"eval_loss": 0.6931969523429871,
"eval_runtime": 9.5713,
"eval_samples_per_second": 570.77,
"eval_steps_per_second": 4.493,
"step": 3276
},
{
"epoch": 5.0,
"learning_rate": 0.00045002442002442,
"loss": 0.6932,
"step": 4095
},
{
"epoch": 5.0,
"eval_accuracy": 0.5053999633900788,
"eval_loss": 0.6933488249778748,
"eval_runtime": 9.573,
"eval_samples_per_second": 570.669,
"eval_steps_per_second": 4.492,
"step": 4095
},
{
"epoch": 6.0,
"learning_rate": 0.00044002442002442004,
"loss": 0.6932,
"step": 4914
},
{
"epoch": 6.0,
"eval_accuracy": 0.5053999633900788,
"eval_loss": 0.6930915117263794,
"eval_runtime": 9.5567,
"eval_samples_per_second": 571.639,
"eval_steps_per_second": 4.499,
"step": 4914
},
{
"epoch": 7.0,
"learning_rate": 0.00043002442002442007,
"loss": 0.6932,
"step": 5733
},
{
"epoch": 7.0,
"eval_accuracy": 0.5053999633900788,
"eval_loss": 0.6931073069572449,
"eval_runtime": 9.5808,
"eval_samples_per_second": 570.202,
"eval_steps_per_second": 4.488,
"step": 5733
},
{
"epoch": 8.0,
"learning_rate": 0.00042002442002442005,
"loss": 0.6932,
"step": 6552
},
{
"epoch": 8.0,
"eval_accuracy": 0.5053999633900788,
"eval_loss": 0.6930862069129944,
"eval_runtime": 9.5904,
"eval_samples_per_second": 569.63,
"eval_steps_per_second": 4.484,
"step": 6552
},
{
"epoch": 9.0,
"learning_rate": 0.00041003663003663003,
"loss": 0.6935,
"step": 7371
},
{
"epoch": 9.0,
"eval_accuracy": 0.5053999633900788,
"eval_loss": 0.693478524684906,
"eval_runtime": 9.5344,
"eval_samples_per_second": 572.981,
"eval_steps_per_second": 4.51,
"step": 7371
},
{
"epoch": 10.0,
"learning_rate": 0.00040003663003663006,
"loss": 0.6932,
"step": 8190
},
{
"epoch": 10.0,
"eval_accuracy": 0.5053999633900788,
"eval_loss": 0.6930915117263794,
"eval_runtime": 9.5321,
"eval_samples_per_second": 573.116,
"eval_steps_per_second": 4.511,
"step": 8190
},
{
"epoch": 11.0,
"learning_rate": 0.00039003663003663004,
"loss": 0.6932,
"step": 9009
},
{
"epoch": 11.0,
"eval_accuracy": 0.5053999633900788,
"eval_loss": 0.6930651664733887,
"eval_runtime": 9.5448,
"eval_samples_per_second": 572.352,
"eval_steps_per_second": 4.505,
"step": 9009
},
{
"epoch": 12.0,
"learning_rate": 0.00038004884004884,
"loss": 0.6932,
"step": 9828
},
{
"epoch": 12.0,
"eval_accuracy": 0.5053999633900788,
"eval_loss": 0.6931073069572449,
"eval_runtime": 9.5697,
"eval_samples_per_second": 570.866,
"eval_steps_per_second": 4.493,
"step": 9828
},
{
"epoch": 13.0,
"learning_rate": 0.00037004884004884005,
"loss": 0.6932,
"step": 10647
},
{
"epoch": 13.0,
"eval_accuracy": 0.5053999633900788,
"eval_loss": 0.6931073069572449,
"eval_runtime": 9.5909,
"eval_samples_per_second": 569.603,
"eval_steps_per_second": 4.483,
"step": 10647
},
{
"epoch": 14.0,
"learning_rate": 0.00036004884004884,
"loss": 0.6932,
"step": 11466
},
{
"epoch": 14.0,
"eval_accuracy": 0.4946000366099213,
"eval_loss": 0.6931231617927551,
"eval_runtime": 9.5446,
"eval_samples_per_second": 572.363,
"eval_steps_per_second": 4.505,
"step": 11466
},
{
"epoch": 15.0,
"learning_rate": 0.00035004884004884005,
"loss": 0.6932,
"step": 12285
},
{
"epoch": 15.0,
"eval_accuracy": 0.4946000366099213,
"eval_loss": 0.693396270275116,
"eval_runtime": 9.5573,
"eval_samples_per_second": 571.603,
"eval_steps_per_second": 4.499,
"step": 12285
},
{
"epoch": 16.0,
"learning_rate": 0.0003400488400488401,
"loss": 0.6932,
"step": 13104
},
{
"epoch": 16.0,
"eval_accuracy": 0.4946000366099213,
"eval_loss": 0.6931284070014954,
"eval_runtime": 9.5555,
"eval_samples_per_second": 571.71,
"eval_steps_per_second": 4.5,
"step": 13104
},
{
"epoch": 16.0,
"step": 13104,
"total_flos": 2.481936015514665e+17,
"train_loss": 0.694585192334521,
"train_runtime": 8903.1517,
"train_samples_per_second": 588.236,
"train_steps_per_second": 4.599
}
],
"max_steps": 40950,
"num_train_epochs": 50,
"total_flos": 2.481936015514665e+17,
"trial_name": null,
"trial_params": null
}