{ "best_metric": 0.863342821598053, "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_mnli_256/checkpoint-27612", "epoch": 14.0, "global_step": 42952, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 1.0008, "step": 3068 }, { "epoch": 1.0, "eval_accuracy": 0.5404992358634743, "eval_loss": 0.9489795565605164, "eval_runtime": 22.1002, "eval_samples_per_second": 444.113, "eval_steps_per_second": 3.484, "step": 3068 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.9205, "step": 6136 }, { "epoch": 2.0, "eval_accuracy": 0.5674987264391238, "eval_loss": 0.9166129231452942, "eval_runtime": 22.1405, "eval_samples_per_second": 443.305, "eval_steps_per_second": 3.478, "step": 6136 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.8928, "step": 9204 }, { "epoch": 3.0, "eval_accuracy": 0.578604177279674, "eval_loss": 0.902202844619751, "eval_runtime": 22.2149, "eval_samples_per_second": 441.82, "eval_steps_per_second": 3.466, "step": 9204 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.872, "step": 12272 }, { "epoch": 4.0, "eval_accuracy": 0.5967396841569027, "eval_loss": 0.8842912316322327, "eval_runtime": 22.1304, "eval_samples_per_second": 443.507, "eval_steps_per_second": 3.479, "step": 12272 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.8531, "step": 15340 }, { "epoch": 5.0, "eval_accuracy": 0.5959246051961283, "eval_loss": 0.8806653618812561, "eval_runtime": 22.1032, "eval_samples_per_second": 444.053, "eval_steps_per_second": 3.484, "step": 15340 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.8359, "step": 18408 }, { "epoch": 6.0, "eval_accuracy": 0.5998981151299032, "eval_loss": 0.8763103485107422, "eval_runtime": 22.114, "eval_samples_per_second": 443.837, "eval_steps_per_second": 3.482, "step": 18408 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.8197, "step": 21476 }, { "epoch": 7.0, "eval_accuracy": 0.6009169638308711, "eval_loss": 0.8814870119094849, "eval_runtime": 22.1114, "eval_samples_per_second": 443.889, "eval_steps_per_second": 3.482, "step": 21476 }, { "epoch": 8.0, "learning_rate": 4.2e-05, "loss": 0.8028, "step": 24544 }, { "epoch": 8.0, "eval_accuracy": 0.5933774834437087, "eval_loss": 0.9012145400047302, "eval_runtime": 22.1167, "eval_samples_per_second": 443.782, "eval_steps_per_second": 3.482, "step": 24544 }, { "epoch": 9.0, "learning_rate": 4.1e-05, "loss": 0.786, "step": 27612 }, { "epoch": 9.0, "eval_accuracy": 0.6190524707080999, "eval_loss": 0.863342821598053, "eval_runtime": 22.1033, "eval_samples_per_second": 444.052, "eval_steps_per_second": 3.484, "step": 27612 }, { "epoch": 10.0, "learning_rate": 4e-05, "loss": 0.769, "step": 30680 }, { "epoch": 10.0, "eval_accuracy": 0.6097809475292919, "eval_loss": 0.8733872771263123, "eval_runtime": 22.175, "eval_samples_per_second": 442.616, "eval_steps_per_second": 3.472, "step": 30680 }, { "epoch": 11.0, "learning_rate": 3.9000000000000006e-05, "loss": 0.752, "step": 33748 }, { "epoch": 11.0, "eval_accuracy": 0.6220071319409067, "eval_loss": 0.8681850433349609, "eval_runtime": 22.0877, "eval_samples_per_second": 444.365, "eval_steps_per_second": 3.486, "step": 33748 }, { "epoch": 12.0, "learning_rate": 3.8e-05, "loss": 0.736, "step": 36816 }, { "epoch": 12.0, "eval_accuracy": 0.617524197656648, "eval_loss": 0.8740953803062439, "eval_runtime": 22.1771, "eval_samples_per_second": 442.573, "eval_steps_per_second": 3.472, "step": 36816 }, { "epoch": 13.0, "learning_rate": 3.7e-05, "loss": 0.7204, "step": 39884 }, { "epoch": 13.0, "eval_accuracy": 0.6047885888945491, "eval_loss": 0.8993946313858032, "eval_runtime": 22.1027, "eval_samples_per_second": 444.063, "eval_steps_per_second": 3.484, "step": 39884 }, { "epoch": 14.0, "learning_rate": 3.6e-05, "loss": 0.7038, "step": 42952 }, { "epoch": 14.0, "eval_accuracy": 0.6079470198675496, "eval_loss": 0.8940238356590271, "eval_runtime": 22.1071, "eval_samples_per_second": 443.976, "eval_steps_per_second": 3.483, "step": 42952 }, { "epoch": 14.0, "step": 42952, "total_flos": 1.4482128069931827e+17, "train_loss": 0.8189185247849343, "train_runtime": 37903.3591, "train_samples_per_second": 518.031, "train_steps_per_second": 4.047 } ], "max_steps": 153400, "num_train_epochs": 50, "total_flos": 1.4482128069931827e+17, "trial_name": null, "trial_params": null }