{ "best_metric": 0.7282186150550842, "best_model_checkpoint": "mobilebert_add_GLUE_Experiment_logit_kd_sst2_128/checkpoint-2635", "epoch": 10.0, "global_step": 5270, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 1.5487, "step": 527 }, { "epoch": 1.0, "eval_accuracy": 0.5779816513761468, "eval_loss": 1.3928688764572144, "eval_runtime": 1.4162, "eval_samples_per_second": 615.727, "eval_steps_per_second": 4.943, "step": 527 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 1.3629, "step": 1054 }, { "epoch": 2.0, "eval_accuracy": 0.5504587155963303, "eval_loss": 1.4979432821273804, "eval_runtime": 1.4117, "eval_samples_per_second": 617.704, "eval_steps_per_second": 4.959, "step": 1054 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 1.1397, "step": 1581 }, { "epoch": 3.0, "eval_accuracy": 0.6754587155963303, "eval_loss": 1.3926728963851929, "eval_runtime": 1.4106, "eval_samples_per_second": 618.195, "eval_steps_per_second": 4.963, "step": 1581 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.5649, "step": 2108 }, { "epoch": 4.0, "eval_accuracy": 0.8073394495412844, "eval_loss": 0.7289367318153381, "eval_runtime": 1.4117, "eval_samples_per_second": 617.688, "eval_steps_per_second": 4.959, "step": 2108 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.4112, "step": 2635 }, { "epoch": 5.0, "eval_accuracy": 0.8073394495412844, "eval_loss": 0.7282186150550842, "eval_runtime": 1.5054, "eval_samples_per_second": 579.245, "eval_steps_per_second": 4.65, "step": 2635 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.3462, "step": 3162 }, { "epoch": 6.0, "eval_accuracy": 0.805045871559633, "eval_loss": 0.7653937339782715, "eval_runtime": 1.4197, "eval_samples_per_second": 614.218, "eval_steps_per_second": 4.931, "step": 3162 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.3069, "step": 3689 }, { "epoch": 7.0, "eval_accuracy": 0.7970183486238532, "eval_loss": 0.8302631974220276, "eval_runtime": 1.4135, "eval_samples_per_second": 616.903, "eval_steps_per_second": 4.952, "step": 3689 }, { "epoch": 8.0, "learning_rate": 4.2e-05, "loss": 0.2833, "step": 4216 }, { "epoch": 8.0, "eval_accuracy": 0.7924311926605505, "eval_loss": 0.880594789981842, "eval_runtime": 1.4122, "eval_samples_per_second": 617.467, "eval_steps_per_second": 4.957, "step": 4216 }, { "epoch": 9.0, "learning_rate": 4.1e-05, "loss": 0.2662, "step": 4743 }, { "epoch": 9.0, "eval_accuracy": 0.7958715596330275, "eval_loss": 0.9296879768371582, "eval_runtime": 1.4088, "eval_samples_per_second": 618.966, "eval_steps_per_second": 4.969, "step": 4743 }, { "epoch": 10.0, "learning_rate": 4e-05, "loss": 0.2521, "step": 5270 }, { "epoch": 10.0, "eval_accuracy": 0.7717889908256881, "eval_loss": 1.097933292388916, "eval_runtime": 1.4221, "eval_samples_per_second": 613.19, "eval_steps_per_second": 4.922, "step": 5270 }, { "epoch": 10.0, "step": 5270, "total_flos": 1.607798324461568e+16, "train_loss": 0.6482070778081268, "train_runtime": 2637.7784, "train_samples_per_second": 1276.624, "train_steps_per_second": 9.989 } ], "max_steps": 26350, "num_train_epochs": 50, "total_flos": 1.607798324461568e+16, "trial_name": null, "trial_params": null }