{ "best_metric": 0.6618520021438599, "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_logit_kd_qqp_256/checkpoint-31273", "epoch": 16.0, "global_step": 45488, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 0.9454, "step": 2843 }, { "epoch": 1.0, "eval_accuracy": 0.755552807321296, "eval_combined_score": 0.7059041209574377, "eval_f1": 0.6562554345935794, "eval_loss": 0.8257145881652832, "eval_runtime": 72.5003, "eval_samples_per_second": 557.653, "eval_steps_per_second": 4.359, "step": 2843 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.8165, "step": 5686 }, { "epoch": 2.0, "eval_accuracy": 0.7589661142715806, "eval_combined_score": 0.6819519426913909, "eval_f1": 0.6049377711112012, "eval_loss": 0.7681939005851746, "eval_runtime": 72.4829, "eval_samples_per_second": 557.787, "eval_steps_per_second": 4.36, "step": 5686 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.7741, "step": 8529 }, { "epoch": 3.0, "eval_accuracy": 0.7638139995053178, "eval_combined_score": 0.6920433977325372, "eval_f1": 0.6202727959597566, "eval_loss": 0.7514052987098694, "eval_runtime": 72.7836, "eval_samples_per_second": 555.482, "eval_steps_per_second": 4.342, "step": 8529 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.7325, "step": 11372 }, { "epoch": 4.0, "eval_accuracy": 0.7674746475389562, "eval_combined_score": 0.698115556996189, "eval_f1": 0.6287564664534218, "eval_loss": 0.7353646755218506, "eval_runtime": 72.4082, "eval_samples_per_second": 558.362, "eval_steps_per_second": 4.364, "step": 11372 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.6849, "step": 14215 }, { "epoch": 5.0, "eval_accuracy": 0.7785060598565422, "eval_combined_score": 0.7301661583702679, "eval_f1": 0.6818262568839936, "eval_loss": 0.7063168883323669, "eval_runtime": 72.761, "eval_samples_per_second": 555.655, "eval_steps_per_second": 4.343, "step": 14215 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.6399, "step": 17058 }, { "epoch": 6.0, "eval_accuracy": 0.78276032649023, "eval_combined_score": 0.7351824040418427, "eval_f1": 0.6876044815934554, "eval_loss": 0.6906189918518066, "eval_runtime": 72.4222, "eval_samples_per_second": 558.254, "eval_steps_per_second": 4.363, "step": 17058 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.6005, "step": 19901 }, { "epoch": 7.0, "eval_accuracy": 0.7867919861488993, "eval_combined_score": 0.7430436592992455, "eval_f1": 0.6992953324495919, "eval_loss": 0.677066445350647, "eval_runtime": 72.5064, "eval_samples_per_second": 557.606, "eval_steps_per_second": 4.358, "step": 19901 }, { "epoch": 8.0, "learning_rate": 4.2e-05, "loss": 0.5666, "step": 22744 }, { "epoch": 8.0, "eval_accuracy": 0.7897106109324759, "eval_combined_score": 0.7517336301581778, "eval_f1": 0.71375664938388, "eval_loss": 0.6808561086654663, "eval_runtime": 72.3704, "eval_samples_per_second": 558.654, "eval_steps_per_second": 4.366, "step": 22744 }, { "epoch": 9.0, "learning_rate": 4.1e-05, "loss": 0.5365, "step": 25587 }, { "epoch": 9.0, "eval_accuracy": 0.7885975760573831, "eval_combined_score": 0.7403265307777458, "eval_f1": 0.6920554854981085, "eval_loss": 0.6807268857955933, "eval_runtime": 72.5072, "eval_samples_per_second": 557.6, "eval_steps_per_second": 4.358, "step": 25587 }, { "epoch": 10.0, "learning_rate": 4e-05, "loss": 0.5097, "step": 28430 }, { "epoch": 10.0, "eval_accuracy": 0.7872619342072719, "eval_combined_score": 0.7566418293977779, "eval_f1": 0.726021724588284, "eval_loss": 0.6826764345169067, "eval_runtime": 72.5186, "eval_samples_per_second": 557.512, "eval_steps_per_second": 4.358, "step": 28430 }, { "epoch": 11.0, "learning_rate": 3.9000000000000006e-05, "loss": 0.4856, "step": 31273 }, { "epoch": 11.0, "eval_accuracy": 0.794855305466238, "eval_combined_score": 0.7586298751040943, "eval_f1": 0.7224044447419508, "eval_loss": 0.6618520021438599, "eval_runtime": 72.4772, "eval_samples_per_second": 557.83, "eval_steps_per_second": 4.36, "step": 31273 }, { "epoch": 12.0, "learning_rate": 3.8e-05, "loss": 0.4653, "step": 34116 }, { "epoch": 12.0, "eval_accuracy": 0.7947563690328964, "eval_combined_score": 0.7572282047839796, "eval_f1": 0.7197000405350629, "eval_loss": 0.7001789212226868, "eval_runtime": 72.5909, "eval_samples_per_second": 556.957, "eval_steps_per_second": 4.353, "step": 34116 }, { "epoch": 13.0, "learning_rate": 3.7e-05, "loss": 0.4438, "step": 36959 }, { "epoch": 13.0, "eval_accuracy": 0.7965372248330448, "eval_combined_score": 0.7584372396008563, "eval_f1": 0.7203372543686679, "eval_loss": 0.689961850643158, "eval_runtime": 72.5363, "eval_samples_per_second": 557.376, "eval_steps_per_second": 4.356, "step": 36959 }, { "epoch": 14.0, "learning_rate": 3.6e-05, "loss": 0.4255, "step": 39802 }, { "epoch": 14.0, "eval_accuracy": 0.7980707395498392, "eval_combined_score": 0.7632222209859922, "eval_f1": 0.7283737024221453, "eval_loss": 0.6847425699234009, "eval_runtime": 72.6156, "eval_samples_per_second": 556.767, "eval_steps_per_second": 4.352, "step": 39802 }, { "epoch": 15.0, "learning_rate": 3.5e-05, "loss": 0.4072, "step": 42645 }, { "epoch": 15.0, "eval_accuracy": 0.7917388078159783, "eval_combined_score": 0.7626668474313707, "eval_f1": 0.7335948870467632, "eval_loss": 0.6892824769020081, "eval_runtime": 72.7761, "eval_samples_per_second": 555.539, "eval_steps_per_second": 4.342, "step": 42645 }, { "epoch": 16.0, "learning_rate": 3.4000000000000007e-05, "loss": 0.391, "step": 45488 }, { "epoch": 16.0, "eval_accuracy": 0.7957209992579768, "eval_combined_score": 0.7628521627538454, "eval_f1": 0.729983326249714, "eval_loss": 0.7136414647102356, "eval_runtime": 72.54, "eval_samples_per_second": 557.348, "eval_steps_per_second": 4.356, "step": 45488 }, { "epoch": 16.0, "step": 45488, "total_flos": 1.5334594896828826e+17, "train_loss": 0.5890673631687546, "train_runtime": 25425.7527, "train_samples_per_second": 715.507, "train_steps_per_second": 5.591 } ], "max_steps": 142150, "num_train_epochs": 50, "total_flos": 1.5334594896828826e+17, "trial_name": null, "trial_params": null }