{ "best_metric": 0.34341979026794434, "best_model_checkpoint": "distilbert_add_GLUE_Experiment_logit_kd_wnli_384/checkpoint-12", "epoch": 9.0, "global_step": 27, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 0.368, "step": 3 }, { "epoch": 1.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.34810706973075867, "eval_runtime": 0.0811, "eval_samples_per_second": 875.836, "eval_steps_per_second": 12.336, "step": 3 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.3551, "step": 6 }, { "epoch": 2.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.3499087989330292, "eval_runtime": 0.0812, "eval_samples_per_second": 874.907, "eval_steps_per_second": 12.323, "step": 6 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.3472, "step": 9 }, { "epoch": 3.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.3441191613674164, "eval_runtime": 0.081, "eval_samples_per_second": 876.571, "eval_steps_per_second": 12.346, "step": 9 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.3518, "step": 12 }, { "epoch": 4.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.34341979026794434, "eval_runtime": 0.0813, "eval_samples_per_second": 873.198, "eval_steps_per_second": 12.299, "step": 12 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.3492, "step": 15 }, { "epoch": 5.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.3494105637073517, "eval_runtime": 0.0808, "eval_samples_per_second": 879.216, "eval_steps_per_second": 12.383, "step": 15 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.3495, "step": 18 }, { "epoch": 6.0, "eval_accuracy": 0.43661971830985913, "eval_loss": 0.34805938601493835, "eval_runtime": 0.0809, "eval_samples_per_second": 877.583, "eval_steps_per_second": 12.36, "step": 18 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.3495, "step": 21 }, { "epoch": 7.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.3439640700817108, "eval_runtime": 0.0826, "eval_samples_per_second": 859.873, "eval_steps_per_second": 12.111, "step": 21 }, { "epoch": 8.0, "learning_rate": 4.2e-05, "loss": 0.3463, "step": 24 }, { "epoch": 8.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.34368786215782166, "eval_runtime": 0.0814, "eval_samples_per_second": 872.625, "eval_steps_per_second": 12.29, "step": 24 }, { "epoch": 9.0, "learning_rate": 4.1e-05, "loss": 0.349, "step": 27 }, { "epoch": 9.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.34437814354896545, "eval_runtime": 0.0806, "eval_samples_per_second": 880.755, "eval_steps_per_second": 12.405, "step": 27 }, { "epoch": 9.0, "step": 27, "total_flos": 149440281182208.0, "train_loss": 0.35173843966590035, "train_runtime": 21.0034, "train_samples_per_second": 1511.658, "train_steps_per_second": 7.142 } ], "max_steps": 150, "num_train_epochs": 50, "total_flos": 149440281182208.0, "trial_name": null, "trial_params": null }