{ "best_metric": 0.6102265119552612, "best_model_checkpoint": "mobilebert_add_GLUE_Experiment_cola_256/checkpoint-737", "epoch": 16.0, "global_step": 1072, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 0.6129, "step": 67 }, { "epoch": 1.0, "eval_loss": 0.6179760098457336, "eval_matthews_correlation": 0.0, "eval_runtime": 1.9044, "eval_samples_per_second": 547.668, "eval_steps_per_second": 4.726, "step": 67 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.6078, "step": 134 }, { "epoch": 2.0, "eval_loss": 0.6178193688392639, "eval_matthews_correlation": 0.0, "eval_runtime": 1.9385, "eval_samples_per_second": 538.041, "eval_steps_per_second": 4.643, "step": 134 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.6073, "step": 201 }, { "epoch": 3.0, "eval_loss": 0.6178669333457947, "eval_matthews_correlation": 0.0, "eval_runtime": 1.9618, "eval_samples_per_second": 531.659, "eval_steps_per_second": 4.588, "step": 201 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.6067, "step": 268 }, { "epoch": 4.0, "eval_loss": 0.6166986227035522, "eval_matthews_correlation": 0.0, "eval_runtime": 1.9261, "eval_samples_per_second": 541.5, "eval_steps_per_second": 4.673, "step": 268 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.6059, "step": 335 }, { "epoch": 5.0, "eval_loss": 0.6167794466018677, "eval_matthews_correlation": 0.0, "eval_runtime": 1.882, "eval_samples_per_second": 554.203, "eval_steps_per_second": 4.782, "step": 335 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.5998, "step": 402 }, { "epoch": 6.0, "eval_loss": 0.6115455627441406, "eval_matthews_correlation": 0.0, "eval_runtime": 1.8963, "eval_samples_per_second": 550.027, "eval_steps_per_second": 4.746, "step": 402 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.5917, "step": 469 }, { "epoch": 7.0, "eval_loss": 0.6122425198554993, "eval_matthews_correlation": 0.0, "eval_runtime": 1.9219, "eval_samples_per_second": 542.687, "eval_steps_per_second": 4.683, "step": 469 }, { "epoch": 8.0, "learning_rate": 4.2e-05, "loss": 0.5849, "step": 536 }, { "epoch": 8.0, "eval_loss": 0.6126018166542053, "eval_matthews_correlation": 0.0, "eval_runtime": 1.9382, "eval_samples_per_second": 538.131, "eval_steps_per_second": 4.644, "step": 536 }, { "epoch": 9.0, "learning_rate": 4.1e-05, "loss": 0.5796, "step": 603 }, { "epoch": 9.0, "eval_loss": 0.6276524066925049, "eval_matthews_correlation": 0.0, "eval_runtime": 1.8651, "eval_samples_per_second": 559.208, "eval_steps_per_second": 4.825, "step": 603 }, { "epoch": 10.0, "learning_rate": 4e-05, "loss": 0.5759, "step": 670 }, { "epoch": 10.0, "eval_loss": 0.6138085722923279, "eval_matthews_correlation": 0.00286100001416597, "eval_runtime": 1.8774, "eval_samples_per_second": 555.557, "eval_steps_per_second": 4.794, "step": 670 }, { "epoch": 11.0, "learning_rate": 3.9000000000000006e-05, "loss": 0.5733, "step": 737 }, { "epoch": 11.0, "eval_loss": 0.6102265119552612, "eval_matthews_correlation": 0.01845565733408863, "eval_runtime": 1.925, "eval_samples_per_second": 541.806, "eval_steps_per_second": 4.675, "step": 737 }, { "epoch": 12.0, "learning_rate": 3.8e-05, "loss": 0.5716, "step": 804 }, { "epoch": 12.0, "eval_loss": 0.6143413782119751, "eval_matthews_correlation": 0.025208083291660098, "eval_runtime": 1.8542, "eval_samples_per_second": 562.494, "eval_steps_per_second": 4.854, "step": 804 }, { "epoch": 13.0, "learning_rate": 3.7e-05, "loss": 0.5667, "step": 871 }, { "epoch": 13.0, "eval_loss": 0.6347153782844543, "eval_matthews_correlation": 0.03482284441916008, "eval_runtime": 1.8966, "eval_samples_per_second": 549.934, "eval_steps_per_second": 4.745, "step": 871 }, { "epoch": 14.0, "learning_rate": 3.6e-05, "loss": 0.5662, "step": 938 }, { "epoch": 14.0, "eval_loss": 0.6314128637313843, "eval_matthews_correlation": 0.03846275142815186, "eval_runtime": 1.858, "eval_samples_per_second": 561.368, "eval_steps_per_second": 4.844, "step": 938 }, { "epoch": 15.0, "learning_rate": 3.5e-05, "loss": 0.5631, "step": 1005 }, { "epoch": 15.0, "eval_loss": 0.6130307912826538, "eval_matthews_correlation": 0.017448205413933698, "eval_runtime": 1.8802, "eval_samples_per_second": 554.731, "eval_steps_per_second": 4.787, "step": 1005 }, { "epoch": 16.0, "learning_rate": 3.4000000000000007e-05, "loss": 0.5628, "step": 1072 }, { "epoch": 16.0, "eval_loss": 0.6218040585517883, "eval_matthews_correlation": 0.03482284441916008, "eval_runtime": 1.8684, "eval_samples_per_second": 558.219, "eval_steps_per_second": 4.817, "step": 1072 }, { "epoch": 16.0, "step": 1072, "total_flos": 3526461549969408.0, "train_loss": 0.5860174342767516, "train_runtime": 908.8925, "train_samples_per_second": 470.408, "train_steps_per_second": 3.686 } ], "max_steps": 3350, "num_train_epochs": 50, "total_flos": 3526461549969408.0, "trial_name": null, "trial_params": null }