|
{ |
|
"best_metric": 0.5534398555755615, |
|
"best_model_checkpoint": "mobilebert_add_GLUE_Experiment_logit_kd_mrpc_128/checkpoint-174", |
|
"epoch": 11.0, |
|
"global_step": 319, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.6399, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.55616295337677, |
|
"eval_runtime": 0.6917, |
|
"eval_samples_per_second": 589.862, |
|
"eval_steps_per_second": 5.783, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.6101, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.5559439659118652, |
|
"eval_runtime": 0.696, |
|
"eval_samples_per_second": 586.243, |
|
"eval_steps_per_second": 5.747, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.6111, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.5557237863540649, |
|
"eval_runtime": 0.6873, |
|
"eval_samples_per_second": 593.59, |
|
"eval_steps_per_second": 5.82, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.6104, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.5572218298912048, |
|
"eval_runtime": 0.6905, |
|
"eval_samples_per_second": 590.849, |
|
"eval_steps_per_second": 5.793, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.6086, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.5549847483634949, |
|
"eval_runtime": 0.6877, |
|
"eval_samples_per_second": 593.317, |
|
"eval_steps_per_second": 5.817, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.6058, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.5534398555755615, |
|
"eval_runtime": 0.6886, |
|
"eval_samples_per_second": 592.53, |
|
"eval_steps_per_second": 5.809, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.6036, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.5745493769645691, |
|
"eval_runtime": 0.6903, |
|
"eval_samples_per_second": 591.038, |
|
"eval_steps_per_second": 5.794, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.5969, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.5594767928123474, |
|
"eval_runtime": 0.6934, |
|
"eval_samples_per_second": 588.422, |
|
"eval_steps_per_second": 5.769, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.5735, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.5699104070663452, |
|
"eval_runtime": 0.6946, |
|
"eval_samples_per_second": 587.423, |
|
"eval_steps_per_second": 5.759, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4e-05, |
|
"loss": 0.5597, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.5608029961585999, |
|
"eval_runtime": 0.6978, |
|
"eval_samples_per_second": 584.717, |
|
"eval_steps_per_second": 5.733, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.5456, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.5714082717895508, |
|
"eval_runtime": 0.7001, |
|
"eval_samples_per_second": 582.765, |
|
"eval_steps_per_second": 5.713, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"step": 319, |
|
"total_flos": 963213206224896.0, |
|
"train_loss": 0.5968392590370298, |
|
"train_runtime": 234.3942, |
|
"train_samples_per_second": 782.443, |
|
"train_steps_per_second": 6.186 |
|
} |
|
], |
|
"max_steps": 1450, |
|
"num_train_epochs": 50, |
|
"total_flos": 963213206224896.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|