{ "best_metric": 0.9573363661766052, "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_logit_kd_qnli/checkpoint-1638", "epoch": 7.0, "global_step": 5733, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 1.0984, "step": 819 }, { "epoch": 1.0, "eval_accuracy": 0.6220025626944902, "eval_loss": 0.9626317620277405, "eval_runtime": 12.6845, "eval_samples_per_second": 430.682, "eval_steps_per_second": 3.39, "step": 819 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 1.0171, "step": 1638 }, { "epoch": 2.0, "eval_accuracy": 0.615595826468973, "eval_loss": 0.9573363661766052, "eval_runtime": 12.7142, "eval_samples_per_second": 429.676, "eval_steps_per_second": 3.382, "step": 1638 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.9717, "step": 2457 }, { "epoch": 3.0, "eval_accuracy": 0.6104704374885594, "eval_loss": 0.9651486277580261, "eval_runtime": 12.7327, "eval_samples_per_second": 429.053, "eval_steps_per_second": 3.377, "step": 2457 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.9377, "step": 3276 }, { "epoch": 4.0, "eval_accuracy": 0.6024162548050521, "eval_loss": 0.9713066220283508, "eval_runtime": 12.6949, "eval_samples_per_second": 430.33, "eval_steps_per_second": 3.387, "step": 3276 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.9132, "step": 4095 }, { "epoch": 5.0, "eval_accuracy": 0.5987552626761853, "eval_loss": 0.9811589121818542, "eval_runtime": 12.741, "eval_samples_per_second": 428.772, "eval_steps_per_second": 3.375, "step": 4095 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.89, "step": 4914 }, { "epoch": 6.0, "eval_accuracy": 0.5982061138568552, "eval_loss": 1.0107508897781372, "eval_runtime": 12.8729, "eval_samples_per_second": 424.379, "eval_steps_per_second": 3.34, "step": 4914 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.8683, "step": 5733 }, { "epoch": 7.0, "eval_accuracy": 0.5914332784184514, "eval_loss": 1.0290495157241821, "eval_runtime": 12.6935, "eval_samples_per_second": 430.378, "eval_steps_per_second": 3.388, "step": 5733 }, { "epoch": 7.0, "step": 5733, "total_flos": 2.2988974586855424e+16, "train_loss": 0.9566188341823108, "train_runtime": 4545.7624, "train_samples_per_second": 1152.095, "train_steps_per_second": 9.008 } ], "max_steps": 40950, "num_train_epochs": 50, "total_flos": 2.2988974586855424e+16, "trial_name": null, "trial_params": null }