|
{ |
|
"best_metric": 0.618320643901825, |
|
"best_model_checkpoint": "hBERTv1_data_aug_cola/checkpoint-1670", |
|
"epoch": 7.0, |
|
"global_step": 5845, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.6084, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.6251746416091919, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.4137, |
|
"eval_samples_per_second": 737.758, |
|
"eval_steps_per_second": 3.537, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.6066, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.618320643901825, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.4125, |
|
"eval_samples_per_second": 738.423, |
|
"eval_steps_per_second": 3.54, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.6065, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.6184751391410828, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.4126, |
|
"eval_samples_per_second": 738.355, |
|
"eval_steps_per_second": 3.54, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.6062, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.6219027042388916, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.414, |
|
"eval_samples_per_second": 737.6, |
|
"eval_steps_per_second": 3.536, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.6061, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.6204879879951477, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.4116, |
|
"eval_samples_per_second": 738.88, |
|
"eval_steps_per_second": 3.542, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.400119760479042e-05, |
|
"loss": 0.6066, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.6183536648750305, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.4244, |
|
"eval_samples_per_second": 732.218, |
|
"eval_steps_per_second": 3.51, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3001197604790424e-05, |
|
"loss": 0.6061, |
|
"step": 5845 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.6187195181846619, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 1.4085, |
|
"eval_samples_per_second": 740.513, |
|
"eval_steps_per_second": 3.55, |
|
"step": 5845 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 5845, |
|
"total_flos": 1.8886821472423117e+17, |
|
"train_loss": 0.6066560117070546, |
|
"train_runtime": 5653.074, |
|
"train_samples_per_second": 1889.317, |
|
"train_steps_per_second": 7.385 |
|
} |
|
], |
|
"max_steps": 41750, |
|
"num_train_epochs": 50, |
|
"total_flos": 1.8886821472423117e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|