{ "best_metric": 0.5534398555755615, "best_model_checkpoint": "mobilebert_add_GLUE_Experiment_logit_kd_mrpc_128/checkpoint-174", "epoch": 11.0, "global_step": 319, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 0.6399, "step": 29 }, { "epoch": 1.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.55616295337677, "eval_runtime": 0.6917, "eval_samples_per_second": 589.862, "eval_steps_per_second": 5.783, "step": 29 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.6101, "step": 58 }, { "epoch": 2.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.5559439659118652, "eval_runtime": 0.696, "eval_samples_per_second": 586.243, "eval_steps_per_second": 5.747, "step": 58 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.6111, "step": 87 }, { "epoch": 3.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.5557237863540649, "eval_runtime": 0.6873, "eval_samples_per_second": 593.59, "eval_steps_per_second": 5.82, "step": 87 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.6104, "step": 116 }, { "epoch": 4.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.5572218298912048, "eval_runtime": 0.6905, "eval_samples_per_second": 590.849, "eval_steps_per_second": 5.793, "step": 116 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.6086, "step": 145 }, { "epoch": 5.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.5549847483634949, "eval_runtime": 0.6877, "eval_samples_per_second": 593.317, "eval_steps_per_second": 5.817, "step": 145 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.6058, "step": 174 }, { "epoch": 6.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.5534398555755615, "eval_runtime": 0.6886, "eval_samples_per_second": 592.53, "eval_steps_per_second": 5.809, "step": 174 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.6036, "step": 203 }, { "epoch": 7.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.5745493769645691, "eval_runtime": 0.6903, "eval_samples_per_second": 591.038, "eval_steps_per_second": 5.794, "step": 203 }, { "epoch": 8.0, "learning_rate": 4.2e-05, "loss": 0.5969, "step": 232 }, { "epoch": 8.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.5594767928123474, "eval_runtime": 0.6934, "eval_samples_per_second": 588.422, "eval_steps_per_second": 5.769, "step": 232 }, { "epoch": 9.0, "learning_rate": 4.1e-05, "loss": 0.5735, "step": 261 }, { "epoch": 9.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.5699104070663452, "eval_runtime": 0.6946, "eval_samples_per_second": 587.423, "eval_steps_per_second": 5.759, "step": 261 }, { "epoch": 10.0, "learning_rate": 4e-05, "loss": 0.5597, "step": 290 }, { "epoch": 10.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.5608029961585999, "eval_runtime": 0.6978, "eval_samples_per_second": 584.717, "eval_steps_per_second": 5.733, "step": 290 }, { "epoch": 11.0, "learning_rate": 3.9000000000000006e-05, "loss": 0.5456, "step": 319 }, { "epoch": 11.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.5714082717895508, "eval_runtime": 0.7001, "eval_samples_per_second": 582.765, "eval_steps_per_second": 5.713, "step": 319 }, { "epoch": 11.0, "step": 319, "total_flos": 963213206224896.0, "train_loss": 0.5968392590370298, "train_runtime": 234.3942, "train_samples_per_second": 782.443, "train_steps_per_second": 6.186 } ], "max_steps": 1450, "num_train_epochs": 50, "total_flos": 963213206224896.0, "trial_name": null, "trial_params": null }