{ "best_metric": 0.38646939396858215, "best_model_checkpoint": "distilbert_sa_GLUE_Experiment_logit_kd_qnli/checkpoint-820", "epoch": 7.0, "global_step": 2870, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 0.4069, "step": 410 }, { "epoch": 1.0, "eval_accuracy": 0.5680029287937031, "eval_loss": 0.3913678824901581, "eval_runtime": 8.451, "eval_samples_per_second": 646.432, "eval_steps_per_second": 2.603, "step": 410 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.3877, "step": 820 }, { "epoch": 2.0, "eval_accuracy": 0.5905180303862346, "eval_loss": 0.38646939396858215, "eval_runtime": 8.4822, "eval_samples_per_second": 644.053, "eval_steps_per_second": 2.594, "step": 820 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.3741, "step": 1230 }, { "epoch": 3.0, "eval_accuracy": 0.5971078162181951, "eval_loss": 0.39170926809310913, "eval_runtime": 8.5451, "eval_samples_per_second": 639.31, "eval_steps_per_second": 2.575, "step": 1230 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.3604, "step": 1640 }, { "epoch": 4.0, "eval_accuracy": 0.5928976752699981, "eval_loss": 0.38925468921661377, "eval_runtime": 8.5294, "eval_samples_per_second": 640.488, "eval_steps_per_second": 2.579, "step": 1640 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.3432, "step": 2050 }, { "epoch": 5.0, "eval_accuracy": 0.5921654768442248, "eval_loss": 0.39078015089035034, "eval_runtime": 8.4767, "eval_samples_per_second": 644.474, "eval_steps_per_second": 2.595, "step": 2050 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.3194, "step": 2460 }, { "epoch": 6.0, "eval_accuracy": 0.5861248398315944, "eval_loss": 0.4250967800617218, "eval_runtime": 8.8358, "eval_samples_per_second": 618.277, "eval_steps_per_second": 2.49, "step": 2460 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.2938, "step": 2870 }, { "epoch": 7.0, "eval_accuracy": 0.5939959729086582, "eval_loss": 0.4414215385913849, "eval_runtime": 8.4799, "eval_samples_per_second": 644.23, "eval_steps_per_second": 2.594, "step": 2870 }, { "epoch": 7.0, "step": 2870, "total_flos": 4.85626137846743e+16, "train_loss": 0.3550731512727638, "train_runtime": 1910.5551, "train_samples_per_second": 2741.167, "train_steps_per_second": 10.73 } ], "max_steps": 20500, "num_train_epochs": 50, "total_flos": 4.85626137846743e+16, "trial_name": null, "trial_params": null }