{ "best_metric": 0.388401061296463, "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_logit_kd_pretrain_rte/checkpoint-60", "epoch": 8.0, "global_step": 160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 0.4107, "step": 20 }, { "epoch": 1.0, "eval_accuracy": 0.5126353790613718, "eval_loss": 0.3951400816440582, "eval_runtime": 0.5432, "eval_samples_per_second": 509.897, "eval_steps_per_second": 5.522, "step": 20 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.3757, "step": 40 }, { "epoch": 2.0, "eval_accuracy": 0.4981949458483754, "eval_loss": 0.3914218842983246, "eval_runtime": 0.5449, "eval_samples_per_second": 508.396, "eval_steps_per_second": 5.506, "step": 40 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.347, "step": 60 }, { "epoch": 3.0, "eval_accuracy": 0.5451263537906137, "eval_loss": 0.388401061296463, "eval_runtime": 0.5432, "eval_samples_per_second": 509.919, "eval_steps_per_second": 5.523, "step": 60 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.3072, "step": 80 }, { "epoch": 4.0, "eval_accuracy": 0.5126353790613718, "eval_loss": 0.402240514755249, "eval_runtime": 0.5439, "eval_samples_per_second": 509.256, "eval_steps_per_second": 5.515, "step": 80 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.2762, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.41157519817352295, "eval_runtime": 0.542, "eval_samples_per_second": 511.088, "eval_steps_per_second": 5.535, "step": 100 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.2457, "step": 120 }, { "epoch": 6.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.40726301074028015, "eval_runtime": 0.5446, "eval_samples_per_second": 508.611, "eval_steps_per_second": 5.508, "step": 120 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.2215, "step": 140 }, { "epoch": 7.0, "eval_accuracy": 0.5487364620938628, "eval_loss": 0.4115126132965088, "eval_runtime": 0.5433, "eval_samples_per_second": 509.804, "eval_steps_per_second": 5.521, "step": 140 }, { "epoch": 8.0, "learning_rate": 4.2e-05, "loss": 0.2059, "step": 160 }, { "epoch": 8.0, "eval_accuracy": 0.5342960288808665, "eval_loss": 0.4230565130710602, "eval_runtime": 0.5423, "eval_samples_per_second": 510.767, "eval_steps_per_second": 5.532, "step": 160 }, { "epoch": 8.0, "step": 160, "total_flos": 632613383438336.0, "train_loss": 0.29872527420520784, "train_runtime": 143.5614, "train_samples_per_second": 867.225, "train_steps_per_second": 6.966 } ], "max_steps": 1000, "num_train_epochs": 50, "total_flos": 632613383438336.0, "trial_name": null, "trial_params": null }