{ "best_metric": 1.1755008697509766, "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_logit_kd_mnli/checkpoint-30680", "epoch": 15.0, "global_step": 46020, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 1.6232, "step": 3068 }, { "epoch": 1.0, "eval_accuracy": 0.5504839531329597, "eval_loss": 1.3869810104370117, "eval_runtime": 22.8066, "eval_samples_per_second": 430.358, "eval_steps_per_second": 3.376, "step": 3068 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 1.4341, "step": 6136 }, { "epoch": 2.0, "eval_accuracy": 0.5833927661742231, "eval_loss": 1.3186262845993042, "eval_runtime": 22.7427, "eval_samples_per_second": 431.567, "eval_steps_per_second": 3.386, "step": 6136 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 1.3724, "step": 9204 }, { "epoch": 3.0, "eval_accuracy": 0.5942944472745797, "eval_loss": 1.2818658351898193, "eval_runtime": 22.8582, "eval_samples_per_second": 429.386, "eval_steps_per_second": 3.369, "step": 9204 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 1.3249, "step": 12272 }, { "epoch": 4.0, "eval_accuracy": 0.5981660723382578, "eval_loss": 1.270175814628601, "eval_runtime": 22.8992, "eval_samples_per_second": 428.618, "eval_steps_per_second": 3.363, "step": 12272 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 1.2788, "step": 15340 }, { "epoch": 5.0, "eval_accuracy": 0.6030565461029037, "eval_loss": 1.235905408859253, "eval_runtime": 22.929, "eval_samples_per_second": 428.06, "eval_steps_per_second": 3.358, "step": 15340 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 1.2302, "step": 18408 }, { "epoch": 6.0, "eval_accuracy": 0.6192562404482934, "eval_loss": 1.200829029083252, "eval_runtime": 22.985, "eval_samples_per_second": 427.017, "eval_steps_per_second": 3.35, "step": 18408 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 1.1842, "step": 21476 }, { "epoch": 7.0, "eval_accuracy": 0.6222109016811004, "eval_loss": 1.1990573406219482, "eval_runtime": 22.7719, "eval_samples_per_second": 431.015, "eval_steps_per_second": 3.381, "step": 21476 }, { "epoch": 8.0, "learning_rate": 4.2e-05, "loss": 1.1441, "step": 24544 }, { "epoch": 8.0, "eval_accuracy": 0.6201732042791646, "eval_loss": 1.1838924884796143, "eval_runtime": 22.9011, "eval_samples_per_second": 428.582, "eval_steps_per_second": 3.362, "step": 24544 }, { "epoch": 9.0, "learning_rate": 4.1e-05, "loss": 1.1057, "step": 27612 }, { "epoch": 9.0, "eval_accuracy": 0.624350483953133, "eval_loss": 1.1861381530761719, "eval_runtime": 22.9564, "eval_samples_per_second": 427.55, "eval_steps_per_second": 3.354, "step": 27612 }, { "epoch": 10.0, "learning_rate": 4e-05, "loss": 1.0715, "step": 30680 }, { "epoch": 10.0, "eval_accuracy": 0.6249617931737137, "eval_loss": 1.1755008697509766, "eval_runtime": 22.9046, "eval_samples_per_second": 428.516, "eval_steps_per_second": 3.362, "step": 30680 }, { "epoch": 11.0, "learning_rate": 3.9000000000000006e-05, "loss": 1.0386, "step": 33748 }, { "epoch": 11.0, "eval_accuracy": 0.6312786551197147, "eval_loss": 1.1971582174301147, "eval_runtime": 23.1346, "eval_samples_per_second": 424.256, "eval_steps_per_second": 3.328, "step": 33748 }, { "epoch": 12.0, "learning_rate": 3.8e-05, "loss": 1.0066, "step": 36816 }, { "epoch": 12.0, "eval_accuracy": 0.6276107997962302, "eval_loss": 1.2148597240447998, "eval_runtime": 22.894, "eval_samples_per_second": 428.714, "eval_steps_per_second": 3.363, "step": 36816 }, { "epoch": 13.0, "learning_rate": 3.7e-05, "loss": 0.9767, "step": 39884 }, { "epoch": 13.0, "eval_accuracy": 0.6192562404482934, "eval_loss": 1.2187175750732422, "eval_runtime": 22.8537, "eval_samples_per_second": 429.471, "eval_steps_per_second": 3.369, "step": 39884 }, { "epoch": 14.0, "learning_rate": 3.6e-05, "loss": 0.9482, "step": 42952 }, { "epoch": 14.0, "eval_accuracy": 0.6226184411614876, "eval_loss": 1.2003837823867798, "eval_runtime": 23.0327, "eval_samples_per_second": 426.133, "eval_steps_per_second": 3.343, "step": 42952 }, { "epoch": 15.0, "learning_rate": 3.5e-05, "loss": 0.921, "step": 46020 }, { "epoch": 15.0, "eval_accuracy": 0.6193581253183902, "eval_loss": 1.2092907428741455, "eval_runtime": 22.9952, "eval_samples_per_second": 426.829, "eval_steps_per_second": 3.349, "step": 46020 }, { "epoch": 15.0, "step": 46020, "total_flos": 1.8469785301942272e+17, "train_loss": 1.177346492735835, "train_runtime": 35549.2544, "train_samples_per_second": 552.335, "train_steps_per_second": 4.315 } ], "max_steps": 153400, "num_train_epochs": 50, "total_flos": 1.8469785301942272e+17, "trial_name": null, "trial_params": null }