|
{ |
|
"best_metric": 1.1755008697509766, |
|
"best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_logit_kd_mnli/checkpoint-30680", |
|
"epoch": 15.0, |
|
"global_step": 46020, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 1.6232, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5504839531329597, |
|
"eval_loss": 1.3869810104370117, |
|
"eval_runtime": 22.8066, |
|
"eval_samples_per_second": 430.358, |
|
"eval_steps_per_second": 3.376, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.4341, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5833927661742231, |
|
"eval_loss": 1.3186262845993042, |
|
"eval_runtime": 22.7427, |
|
"eval_samples_per_second": 431.567, |
|
"eval_steps_per_second": 3.386, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 1.3724, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5942944472745797, |
|
"eval_loss": 1.2818658351898193, |
|
"eval_runtime": 22.8582, |
|
"eval_samples_per_second": 429.386, |
|
"eval_steps_per_second": 3.369, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 1.3249, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5981660723382578, |
|
"eval_loss": 1.270175814628601, |
|
"eval_runtime": 22.8992, |
|
"eval_samples_per_second": 428.618, |
|
"eval_steps_per_second": 3.363, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.2788, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6030565461029037, |
|
"eval_loss": 1.235905408859253, |
|
"eval_runtime": 22.929, |
|
"eval_samples_per_second": 428.06, |
|
"eval_steps_per_second": 3.358, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 1.2302, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6192562404482934, |
|
"eval_loss": 1.200829029083252, |
|
"eval_runtime": 22.985, |
|
"eval_samples_per_second": 427.017, |
|
"eval_steps_per_second": 3.35, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3e-05, |
|
"loss": 1.1842, |
|
"step": 21476 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6222109016811004, |
|
"eval_loss": 1.1990573406219482, |
|
"eval_runtime": 22.7719, |
|
"eval_samples_per_second": 431.015, |
|
"eval_steps_per_second": 3.381, |
|
"step": 21476 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.2e-05, |
|
"loss": 1.1441, |
|
"step": 24544 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6201732042791646, |
|
"eval_loss": 1.1838924884796143, |
|
"eval_runtime": 22.9011, |
|
"eval_samples_per_second": 428.582, |
|
"eval_steps_per_second": 3.362, |
|
"step": 24544 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.1e-05, |
|
"loss": 1.1057, |
|
"step": 27612 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.624350483953133, |
|
"eval_loss": 1.1861381530761719, |
|
"eval_runtime": 22.9564, |
|
"eval_samples_per_second": 427.55, |
|
"eval_steps_per_second": 3.354, |
|
"step": 27612 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4e-05, |
|
"loss": 1.0715, |
|
"step": 30680 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6249617931737137, |
|
"eval_loss": 1.1755008697509766, |
|
"eval_runtime": 22.9046, |
|
"eval_samples_per_second": 428.516, |
|
"eval_steps_per_second": 3.362, |
|
"step": 30680 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 1.0386, |
|
"step": 33748 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6312786551197147, |
|
"eval_loss": 1.1971582174301147, |
|
"eval_runtime": 23.1346, |
|
"eval_samples_per_second": 424.256, |
|
"eval_steps_per_second": 3.328, |
|
"step": 33748 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.8e-05, |
|
"loss": 1.0066, |
|
"step": 36816 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6276107997962302, |
|
"eval_loss": 1.2148597240447998, |
|
"eval_runtime": 22.894, |
|
"eval_samples_per_second": 428.714, |
|
"eval_steps_per_second": 3.363, |
|
"step": 36816 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.9767, |
|
"step": 39884 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6192562404482934, |
|
"eval_loss": 1.2187175750732422, |
|
"eval_runtime": 22.8537, |
|
"eval_samples_per_second": 429.471, |
|
"eval_steps_per_second": 3.369, |
|
"step": 39884 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.9482, |
|
"step": 42952 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6226184411614876, |
|
"eval_loss": 1.2003837823867798, |
|
"eval_runtime": 23.0327, |
|
"eval_samples_per_second": 426.133, |
|
"eval_steps_per_second": 3.343, |
|
"step": 42952 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.921, |
|
"step": 46020 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6193581253183902, |
|
"eval_loss": 1.2092907428741455, |
|
"eval_runtime": 22.9952, |
|
"eval_samples_per_second": 426.829, |
|
"eval_steps_per_second": 3.349, |
|
"step": 46020 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 46020, |
|
"total_flos": 1.8469785301942272e+17, |
|
"train_loss": 1.177346492735835, |
|
"train_runtime": 35549.2544, |
|
"train_samples_per_second": 552.335, |
|
"train_steps_per_second": 4.315 |
|
} |
|
], |
|
"max_steps": 153400, |
|
"num_train_epochs": 50, |
|
"total_flos": 1.8469785301942272e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|