|
{ |
|
"best_metric": 0.6618520021438599, |
|
"best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_logit_kd_qqp_256/checkpoint-31273", |
|
"epoch": 16.0, |
|
"global_step": 45488, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.9454, |
|
"step": 2843 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.755552807321296, |
|
"eval_combined_score": 0.7059041209574377, |
|
"eval_f1": 0.6562554345935794, |
|
"eval_loss": 0.8257145881652832, |
|
"eval_runtime": 72.5003, |
|
"eval_samples_per_second": 557.653, |
|
"eval_steps_per_second": 4.359, |
|
"step": 2843 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.8165, |
|
"step": 5686 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7589661142715806, |
|
"eval_combined_score": 0.6819519426913909, |
|
"eval_f1": 0.6049377711112012, |
|
"eval_loss": 0.7681939005851746, |
|
"eval_runtime": 72.4829, |
|
"eval_samples_per_second": 557.787, |
|
"eval_steps_per_second": 4.36, |
|
"step": 5686 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.7741, |
|
"step": 8529 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7638139995053178, |
|
"eval_combined_score": 0.6920433977325372, |
|
"eval_f1": 0.6202727959597566, |
|
"eval_loss": 0.7514052987098694, |
|
"eval_runtime": 72.7836, |
|
"eval_samples_per_second": 555.482, |
|
"eval_steps_per_second": 4.342, |
|
"step": 8529 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.7325, |
|
"step": 11372 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7674746475389562, |
|
"eval_combined_score": 0.698115556996189, |
|
"eval_f1": 0.6287564664534218, |
|
"eval_loss": 0.7353646755218506, |
|
"eval_runtime": 72.4082, |
|
"eval_samples_per_second": 558.362, |
|
"eval_steps_per_second": 4.364, |
|
"step": 11372 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.6849, |
|
"step": 14215 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7785060598565422, |
|
"eval_combined_score": 0.7301661583702679, |
|
"eval_f1": 0.6818262568839936, |
|
"eval_loss": 0.7063168883323669, |
|
"eval_runtime": 72.761, |
|
"eval_samples_per_second": 555.655, |
|
"eval_steps_per_second": 4.343, |
|
"step": 14215 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.6399, |
|
"step": 17058 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.78276032649023, |
|
"eval_combined_score": 0.7351824040418427, |
|
"eval_f1": 0.6876044815934554, |
|
"eval_loss": 0.6906189918518066, |
|
"eval_runtime": 72.4222, |
|
"eval_samples_per_second": 558.254, |
|
"eval_steps_per_second": 4.363, |
|
"step": 17058 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.6005, |
|
"step": 19901 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7867919861488993, |
|
"eval_combined_score": 0.7430436592992455, |
|
"eval_f1": 0.6992953324495919, |
|
"eval_loss": 0.677066445350647, |
|
"eval_runtime": 72.5064, |
|
"eval_samples_per_second": 557.606, |
|
"eval_steps_per_second": 4.358, |
|
"step": 19901 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.5666, |
|
"step": 22744 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7897106109324759, |
|
"eval_combined_score": 0.7517336301581778, |
|
"eval_f1": 0.71375664938388, |
|
"eval_loss": 0.6808561086654663, |
|
"eval_runtime": 72.3704, |
|
"eval_samples_per_second": 558.654, |
|
"eval_steps_per_second": 4.366, |
|
"step": 22744 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.5365, |
|
"step": 25587 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7885975760573831, |
|
"eval_combined_score": 0.7403265307777458, |
|
"eval_f1": 0.6920554854981085, |
|
"eval_loss": 0.6807268857955933, |
|
"eval_runtime": 72.5072, |
|
"eval_samples_per_second": 557.6, |
|
"eval_steps_per_second": 4.358, |
|
"step": 25587 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4e-05, |
|
"loss": 0.5097, |
|
"step": 28430 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7872619342072719, |
|
"eval_combined_score": 0.7566418293977779, |
|
"eval_f1": 0.726021724588284, |
|
"eval_loss": 0.6826764345169067, |
|
"eval_runtime": 72.5186, |
|
"eval_samples_per_second": 557.512, |
|
"eval_steps_per_second": 4.358, |
|
"step": 28430 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.4856, |
|
"step": 31273 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.794855305466238, |
|
"eval_combined_score": 0.7586298751040943, |
|
"eval_f1": 0.7224044447419508, |
|
"eval_loss": 0.6618520021438599, |
|
"eval_runtime": 72.4772, |
|
"eval_samples_per_second": 557.83, |
|
"eval_steps_per_second": 4.36, |
|
"step": 31273 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.4653, |
|
"step": 34116 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7947563690328964, |
|
"eval_combined_score": 0.7572282047839796, |
|
"eval_f1": 0.7197000405350629, |
|
"eval_loss": 0.7001789212226868, |
|
"eval_runtime": 72.5909, |
|
"eval_samples_per_second": 556.957, |
|
"eval_steps_per_second": 4.353, |
|
"step": 34116 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.4438, |
|
"step": 36959 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7965372248330448, |
|
"eval_combined_score": 0.7584372396008563, |
|
"eval_f1": 0.7203372543686679, |
|
"eval_loss": 0.689961850643158, |
|
"eval_runtime": 72.5363, |
|
"eval_samples_per_second": 557.376, |
|
"eval_steps_per_second": 4.356, |
|
"step": 36959 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.4255, |
|
"step": 39802 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7980707395498392, |
|
"eval_combined_score": 0.7632222209859922, |
|
"eval_f1": 0.7283737024221453, |
|
"eval_loss": 0.6847425699234009, |
|
"eval_runtime": 72.6156, |
|
"eval_samples_per_second": 556.767, |
|
"eval_steps_per_second": 4.352, |
|
"step": 39802 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.4072, |
|
"step": 42645 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7917388078159783, |
|
"eval_combined_score": 0.7626668474313707, |
|
"eval_f1": 0.7335948870467632, |
|
"eval_loss": 0.6892824769020081, |
|
"eval_runtime": 72.7761, |
|
"eval_samples_per_second": 555.539, |
|
"eval_steps_per_second": 4.342, |
|
"step": 42645 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.391, |
|
"step": 45488 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7957209992579768, |
|
"eval_combined_score": 0.7628521627538454, |
|
"eval_f1": 0.729983326249714, |
|
"eval_loss": 0.7136414647102356, |
|
"eval_runtime": 72.54, |
|
"eval_samples_per_second": 557.348, |
|
"eval_steps_per_second": 4.356, |
|
"step": 45488 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"step": 45488, |
|
"total_flos": 1.5334594896828826e+17, |
|
"train_loss": 0.5890673631687546, |
|
"train_runtime": 25425.7527, |
|
"train_samples_per_second": 715.507, |
|
"train_steps_per_second": 5.591 |
|
} |
|
], |
|
"max_steps": 142150, |
|
"num_train_epochs": 50, |
|
"total_flos": 1.5334594896828826e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|