{ "best_metric": 0.39777371287345886, "best_model_checkpoint": "distilbert_add_GLUE_Experiment_logit_kd_qnli/checkpoint-820", "epoch": 7.0, "global_step": 2870, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 0.4154, "step": 410 }, { "epoch": 1.0, "eval_accuracy": 0.5778876075416438, "eval_loss": 0.39863064885139465, "eval_runtime": 7.79, "eval_samples_per_second": 701.281, "eval_steps_per_second": 2.824, "step": 410 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.3986, "step": 820 }, { "epoch": 2.0, "eval_accuracy": 0.5883214351089145, "eval_loss": 0.39777371287345886, "eval_runtime": 7.6923, "eval_samples_per_second": 710.188, "eval_steps_per_second": 2.86, "step": 820 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.3909, "step": 1230 }, { "epoch": 3.0, "eval_accuracy": 0.5886875343218012, "eval_loss": 0.39901256561279297, "eval_runtime": 7.6905, "eval_samples_per_second": 710.361, "eval_steps_per_second": 2.861, "step": 1230 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.384, "step": 1640 }, { "epoch": 4.0, "eval_accuracy": 0.5912502288120081, "eval_loss": 0.39876481890678406, "eval_runtime": 7.7111, "eval_samples_per_second": 708.461, "eval_steps_per_second": 2.853, "step": 1640 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.3761, "step": 2050 }, { "epoch": 5.0, "eval_accuracy": 0.5899688815669046, "eval_loss": 0.400068461894989, "eval_runtime": 7.7122, "eval_samples_per_second": 708.359, "eval_steps_per_second": 2.853, "step": 2050 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.3634, "step": 2460 }, { "epoch": 6.0, "eval_accuracy": 0.6121178839465495, "eval_loss": 0.40260177850723267, "eval_runtime": 7.7119, "eval_samples_per_second": 708.388, "eval_steps_per_second": 2.853, "step": 2460 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.3413, "step": 2870 }, { "epoch": 7.0, "eval_accuracy": 0.6174263225334066, "eval_loss": 0.40683838725090027, "eval_runtime": 7.7369, "eval_samples_per_second": 706.101, "eval_steps_per_second": 2.844, "step": 2870 }, { "epoch": 7.0, "step": 2870, "total_flos": 4.469658863062221e+16, "train_loss": 0.3814040659196701, "train_runtime": 1634.9313, "train_samples_per_second": 3203.284, "train_steps_per_second": 12.539 } ], "max_steps": 20500, "num_train_epochs": 50, "total_flos": 4.469658863062221e+16, "trial_name": null, "trial_params": null }