|
{ |
|
"best_metric": 0.6971034407615662, |
|
"best_model_checkpoint": "hBERTv2_new_pretrain_w_init_48_rte/checkpoint-40", |
|
"epoch": 7.0, |
|
"global_step": 140, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.9200000000000004e-05, |
|
"loss": 0.755, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.7425682544708252, |
|
"eval_runtime": 0.4916, |
|
"eval_samples_per_second": 563.487, |
|
"eval_steps_per_second": 6.103, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.8400000000000005e-05, |
|
"loss": 0.7127, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.48736462093862815, |
|
"eval_loss": 0.6971034407615662, |
|
"eval_runtime": 0.501, |
|
"eval_samples_per_second": 552.935, |
|
"eval_steps_per_second": 5.988, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.76e-05, |
|
"loss": 0.7149, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5306859205776173, |
|
"eval_loss": 0.7048454284667969, |
|
"eval_runtime": 0.4961, |
|
"eval_samples_per_second": 558.334, |
|
"eval_steps_per_second": 6.047, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.680000000000001e-05, |
|
"loss": 0.6699, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.49458483754512633, |
|
"eval_loss": 0.7651455402374268, |
|
"eval_runtime": 0.4953, |
|
"eval_samples_per_second": 559.23, |
|
"eval_steps_per_second": 6.057, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.6432, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.48014440433212996, |
|
"eval_loss": 0.702499270439148, |
|
"eval_runtime": 0.4961, |
|
"eval_samples_per_second": 558.37, |
|
"eval_steps_per_second": 6.047, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3.52e-05, |
|
"loss": 0.6008, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5379061371841155, |
|
"eval_loss": 0.7388982176780701, |
|
"eval_runtime": 0.5048, |
|
"eval_samples_per_second": 548.748, |
|
"eval_steps_per_second": 5.943, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 3.44e-05, |
|
"loss": 0.5142, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5342960288808665, |
|
"eval_loss": 0.9628210067749023, |
|
"eval_runtime": 0.4987, |
|
"eval_samples_per_second": 555.482, |
|
"eval_steps_per_second": 6.016, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 140, |
|
"total_flos": 2550691654533120.0, |
|
"train_loss": 0.6586811883108956, |
|
"train_runtime": 121.339, |
|
"train_samples_per_second": 1026.051, |
|
"train_steps_per_second": 8.241 |
|
} |
|
], |
|
"max_steps": 1000, |
|
"num_train_epochs": 50, |
|
"total_flos": 2550691654533120.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|