|
{ |
|
"best_metric": 1.0983835458755493, |
|
"best_model_checkpoint": "hBERTv2_new_no_pretrain_mnli/checkpoint-18408", |
|
"epoch": 11.0, |
|
"global_step": 33748, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.000490009778357236, |
|
"loss": 1.108, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.0985444784164429, |
|
"eval_runtime": 16.4465, |
|
"eval_samples_per_second": 596.783, |
|
"eval_steps_per_second": 4.682, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00048000977835723597, |
|
"loss": 1.0988, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.31818644931227713, |
|
"eval_loss": 1.0985891819000244, |
|
"eval_runtime": 16.4454, |
|
"eval_samples_per_second": 596.825, |
|
"eval_steps_per_second": 4.682, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.000470009778357236, |
|
"loss": 1.0986, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.3273560876209883, |
|
"eval_loss": 1.0984275341033936, |
|
"eval_runtime": 16.4493, |
|
"eval_samples_per_second": 596.681, |
|
"eval_steps_per_second": 4.681, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.000460013037809648, |
|
"loss": 1.0986, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.0988556146621704, |
|
"eval_runtime": 16.4416, |
|
"eval_samples_per_second": 596.962, |
|
"eval_steps_per_second": 4.683, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.00045001629726206, |
|
"loss": 1.0986, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.0984472036361694, |
|
"eval_runtime": 16.4363, |
|
"eval_samples_per_second": 597.153, |
|
"eval_steps_per_second": 4.685, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.000440019556714472, |
|
"loss": 1.0986, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.3273560876209883, |
|
"eval_loss": 1.0983835458755493, |
|
"eval_runtime": 16.345, |
|
"eval_samples_per_second": 600.488, |
|
"eval_steps_per_second": 4.711, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.00043002607561929595, |
|
"loss": 1.0986, |
|
"step": 21476 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.3273560876209883, |
|
"eval_loss": 1.0984454154968262, |
|
"eval_runtime": 16.3565, |
|
"eval_samples_per_second": 600.068, |
|
"eval_steps_per_second": 4.708, |
|
"step": 21476 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.000420029335071708, |
|
"loss": 1.0986, |
|
"step": 24544 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.0985265970230103, |
|
"eval_runtime": 16.438, |
|
"eval_samples_per_second": 597.092, |
|
"eval_steps_per_second": 4.684, |
|
"step": 24544 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 0.0004100358539765319, |
|
"loss": 1.0986, |
|
"step": 27612 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.0984556674957275, |
|
"eval_runtime": 16.4147, |
|
"eval_samples_per_second": 597.938, |
|
"eval_steps_per_second": 4.691, |
|
"step": 27612 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.00040003911342894395, |
|
"loss": 1.0986, |
|
"step": 30680 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.31818644931227713, |
|
"eval_loss": 1.098721981048584, |
|
"eval_runtime": 16.3968, |
|
"eval_samples_per_second": 598.593, |
|
"eval_steps_per_second": 4.696, |
|
"step": 30680 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 0.0003900456323337679, |
|
"loss": 1.0986, |
|
"step": 33748 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.31818644931227713, |
|
"eval_loss": 1.098907232284546, |
|
"eval_runtime": 16.4429, |
|
"eval_samples_per_second": 596.916, |
|
"eval_steps_per_second": 4.683, |
|
"step": 33748 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"step": 33748, |
|
"total_flos": 6.321496332708086e+17, |
|
"train_loss": 1.099496340234514, |
|
"train_runtime": 21144.6986, |
|
"train_samples_per_second": 928.606, |
|
"train_steps_per_second": 7.255 |
|
} |
|
], |
|
"max_steps": 153400, |
|
"num_train_epochs": 50, |
|
"total_flos": 6.321496332708086e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|