|
{ |
|
"best_metric": 0.611058235168457, |
|
"best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_mrpc_256/checkpoint-232", |
|
"epoch": 13.0, |
|
"global_step": 377, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.6431, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.626089870929718, |
|
"eval_runtime": 0.9456, |
|
"eval_samples_per_second": 431.492, |
|
"eval_steps_per_second": 4.23, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.6296, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.6234638094902039, |
|
"eval_runtime": 0.9411, |
|
"eval_samples_per_second": 433.528, |
|
"eval_steps_per_second": 4.25, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.6306, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.6237208247184753, |
|
"eval_runtime": 0.937, |
|
"eval_samples_per_second": 435.41, |
|
"eval_steps_per_second": 4.269, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.6297, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.6237556338310242, |
|
"eval_runtime": 0.9403, |
|
"eval_samples_per_second": 433.895, |
|
"eval_steps_per_second": 4.254, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.6276, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.6206657290458679, |
|
"eval_runtime": 0.9389, |
|
"eval_samples_per_second": 434.555, |
|
"eval_steps_per_second": 4.26, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.6197, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.6212654709815979, |
|
"eval_runtime": 0.9398, |
|
"eval_samples_per_second": 434.126, |
|
"eval_steps_per_second": 4.256, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.6065, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6911764705882353, |
|
"eval_combined_score": 0.7477621483375959, |
|
"eval_f1": 0.8043478260869565, |
|
"eval_loss": 0.6284083724021912, |
|
"eval_runtime": 0.9178, |
|
"eval_samples_per_second": 444.537, |
|
"eval_steps_per_second": 4.358, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.5258, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6911764705882353, |
|
"eval_combined_score": 0.7429823721019352, |
|
"eval_f1": 0.7947882736156351, |
|
"eval_loss": 0.611058235168457, |
|
"eval_runtime": 0.9172, |
|
"eval_samples_per_second": 444.815, |
|
"eval_steps_per_second": 4.361, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.4596, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7034313725490197, |
|
"eval_combined_score": 0.7542921758075211, |
|
"eval_f1": 0.8051529790660225, |
|
"eval_loss": 0.6506468057632446, |
|
"eval_runtime": 0.9479, |
|
"eval_samples_per_second": 430.438, |
|
"eval_steps_per_second": 4.22, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3953, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7034313725490197, |
|
"eval_combined_score": 0.7482968828557064, |
|
"eval_f1": 0.7931623931623932, |
|
"eval_loss": 0.7271472811698914, |
|
"eval_runtime": 0.9357, |
|
"eval_samples_per_second": 436.055, |
|
"eval_steps_per_second": 4.275, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.3426, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6740196078431373, |
|
"eval_combined_score": 0.7140892863614947, |
|
"eval_f1": 0.7541589648798522, |
|
"eval_loss": 0.9509055018424988, |
|
"eval_runtime": 0.944, |
|
"eval_samples_per_second": 432.196, |
|
"eval_steps_per_second": 4.237, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.2821, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6862745098039216, |
|
"eval_combined_score": 0.7335482138060704, |
|
"eval_f1": 0.7808219178082192, |
|
"eval_loss": 1.0021162033081055, |
|
"eval_runtime": 0.9425, |
|
"eval_samples_per_second": 432.902, |
|
"eval_steps_per_second": 4.244, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.2177, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6691176470588235, |
|
"eval_combined_score": 0.7183798218082413, |
|
"eval_f1": 0.7676419965576592, |
|
"eval_loss": 1.0359125137329102, |
|
"eval_runtime": 0.9408, |
|
"eval_samples_per_second": 433.683, |
|
"eval_steps_per_second": 4.252, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"step": 377, |
|
"total_flos": 1256051433275392.0, |
|
"train_loss": 0.5084376322495843, |
|
"train_runtime": 493.204, |
|
"train_samples_per_second": 371.854, |
|
"train_steps_per_second": 2.94 |
|
} |
|
], |
|
"max_steps": 1450, |
|
"num_train_epochs": 50, |
|
"total_flos": 1256051433275392.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|