{ "best_metric": 5.843591721621522e-10, "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_data_aug_mrpc/checkpoint-35262", "epoch": 23.0, "global_step": 45057, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 0.1838, "step": 1959 }, { "epoch": 1.0, "eval_accuracy": 0.9950980392156863, "eval_combined_score": 0.995750458456764, "eval_f1": 0.9964028776978416, "eval_loss": 0.013783634640276432, "eval_runtime": 0.4532, "eval_samples_per_second": 900.209, "eval_steps_per_second": 8.826, "step": 1959 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.0406, "step": 3918 }, { "epoch": 2.0, "eval_accuracy": 1.0, "eval_combined_score": 1.0, "eval_f1": 1.0, "eval_loss": 0.005524557549506426, "eval_runtime": 0.4517, "eval_samples_per_second": 903.294, "eval_steps_per_second": 8.856, "step": 3918 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.0267, "step": 5877 }, { "epoch": 3.0, "eval_accuracy": 0.9975490196078431, "eval_combined_score": 0.997876843735699, "eval_f1": 0.9982046678635548, "eval_loss": 0.012892701663076878, "eval_runtime": 0.4563, "eval_samples_per_second": 894.22, "eval_steps_per_second": 8.767, "step": 5877 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.0151, "step": 7836 }, { "epoch": 4.0, "eval_accuracy": 1.0, "eval_combined_score": 1.0, "eval_f1": 1.0, "eval_loss": 0.00039428556920029223, "eval_runtime": 0.4545, "eval_samples_per_second": 897.643, "eval_steps_per_second": 8.8, "step": 7836 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.0108, "step": 9795 }, { "epoch": 5.0, "eval_accuracy": 0.9975490196078431, "eval_combined_score": 0.997876843735699, "eval_f1": 0.9982046678635548, "eval_loss": 0.010444208979606628, "eval_runtime": 0.4519, "eval_samples_per_second": 902.771, "eval_steps_per_second": 8.851, "step": 9795 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.0075, "step": 11754 }, { "epoch": 6.0, "eval_accuracy": 1.0, "eval_combined_score": 1.0, "eval_f1": 1.0, "eval_loss": 1.37044798975694e-05, "eval_runtime": 0.4818, "eval_samples_per_second": 846.87, "eval_steps_per_second": 8.303, "step": 11754 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.0059, "step": 13713 }, { "epoch": 7.0, "eval_accuracy": 1.0, "eval_combined_score": 1.0, "eval_f1": 1.0, "eval_loss": 0.0005156163824722171, "eval_runtime": 0.4515, "eval_samples_per_second": 903.731, "eval_steps_per_second": 8.86, "step": 13713 }, { "epoch": 8.0, "learning_rate": 4.2e-05, "loss": 0.0047, "step": 15672 }, { "epoch": 8.0, "eval_accuracy": 1.0, "eval_combined_score": 1.0, "eval_f1": 1.0, "eval_loss": 1.4846425074210856e-05, "eval_runtime": 0.4532, "eval_samples_per_second": 900.352, "eval_steps_per_second": 8.827, "step": 15672 }, { "epoch": 9.0, "learning_rate": 4.1e-05, "loss": 0.0033, "step": 17631 }, { "epoch": 9.0, "eval_accuracy": 1.0, "eval_combined_score": 1.0, "eval_f1": 1.0, "eval_loss": 9.473485988564789e-05, "eval_runtime": 0.4507, "eval_samples_per_second": 905.29, "eval_steps_per_second": 8.875, "step": 17631 }, { "epoch": 10.0, "learning_rate": 4e-05, "loss": 0.0031, "step": 19590 }, { "epoch": 10.0, "eval_accuracy": 1.0, "eval_combined_score": 1.0, "eval_f1": 1.0, "eval_loss": 3.4562839346108376e-07, "eval_runtime": 0.4522, "eval_samples_per_second": 902.352, "eval_steps_per_second": 8.847, "step": 19590 }, { "epoch": 11.0, "learning_rate": 3.9000000000000006e-05, "loss": 0.0025, "step": 21549 }, { "epoch": 11.0, "eval_accuracy": 1.0, "eval_combined_score": 1.0, "eval_f1": 1.0, "eval_loss": 1.871667905106733e-06, "eval_runtime": 0.4541, "eval_samples_per_second": 898.423, "eval_steps_per_second": 8.808, "step": 21549 }, { "epoch": 12.0, "learning_rate": 3.8e-05, "loss": 0.0019, "step": 23508 }, { "epoch": 12.0, "eval_accuracy": 1.0, "eval_combined_score": 1.0, "eval_f1": 1.0, "eval_loss": 4.236588679873421e-08, "eval_runtime": 0.4549, "eval_samples_per_second": 896.923, "eval_steps_per_second": 8.793, "step": 23508 }, { "epoch": 13.0, "learning_rate": 3.7e-05, "loss": 0.0019, "step": 25467 }, { "epoch": 13.0, "eval_accuracy": 1.0, "eval_combined_score": 1.0, "eval_f1": 1.0, "eval_loss": 1.6456193634439842e-06, "eval_runtime": 0.4519, "eval_samples_per_second": 902.776, "eval_steps_per_second": 8.851, "step": 25467 }, { "epoch": 14.0, "learning_rate": 3.6e-05, "loss": 0.0014, "step": 27426 }, { "epoch": 14.0, "eval_accuracy": 1.0, "eval_combined_score": 1.0, "eval_f1": 1.0, "eval_loss": 1.5865224156641489e-07, "eval_runtime": 0.4547, "eval_samples_per_second": 897.244, "eval_steps_per_second": 8.797, "step": 27426 }, { "epoch": 15.0, "learning_rate": 3.5e-05, "loss": 0.001, "step": 29385 }, { "epoch": 15.0, "eval_accuracy": 1.0, "eval_combined_score": 1.0, "eval_f1": 1.0, "eval_loss": 6.775987912988057e-06, "eval_runtime": 0.4518, "eval_samples_per_second": 903.035, "eval_steps_per_second": 8.853, "step": 29385 }, { "epoch": 16.0, "learning_rate": 3.4000000000000007e-05, "loss": 0.001, "step": 31344 }, { "epoch": 16.0, "eval_accuracy": 1.0, "eval_combined_score": 1.0, "eval_f1": 1.0, "eval_loss": 4.674864584330862e-08, "eval_runtime": 0.4524, "eval_samples_per_second": 901.759, "eval_steps_per_second": 8.841, "step": 31344 }, { "epoch": 17.0, "learning_rate": 3.3e-05, "loss": 0.0009, "step": 33303 }, { "epoch": 17.0, "eval_accuracy": 1.0, "eval_combined_score": 1.0, "eval_f1": 1.0, "eval_loss": 3.7983216572001766e-08, "eval_runtime": 0.4506, "eval_samples_per_second": 905.494, "eval_steps_per_second": 8.877, "step": 33303 }, { "epoch": 18.0, "learning_rate": 3.2000000000000005e-05, "loss": 0.0009, "step": 35262 }, { "epoch": 18.0, "eval_accuracy": 1.0, "eval_combined_score": 1.0, "eval_f1": 1.0, "eval_loss": 5.843591721621522e-10, "eval_runtime": 0.4512, "eval_samples_per_second": 904.186, "eval_steps_per_second": 8.865, "step": 35262 }, { "epoch": 19.0, "learning_rate": 3.1e-05, "loss": 0.0006, "step": 37221 }, { "epoch": 19.0, "eval_accuracy": 1.0, "eval_combined_score": 1.0, "eval_f1": 1.0, "eval_loss": 2.1329100263756118e-08, "eval_runtime": 0.4524, "eval_samples_per_second": 901.947, "eval_steps_per_second": 8.843, "step": 37221 }, { "epoch": 20.0, "learning_rate": 3e-05, "loss": 0.0006, "step": 39180 }, { "epoch": 20.0, "eval_accuracy": 1.0, "eval_combined_score": 1.0, "eval_f1": 1.0, "eval_loss": 1.9020757235921337e-07, "eval_runtime": 0.4625, "eval_samples_per_second": 882.103, "eval_steps_per_second": 8.648, "step": 39180 }, { "epoch": 21.0, "learning_rate": 2.9e-05, "loss": 0.0003, "step": 41139 }, { "epoch": 21.0, "eval_accuracy": 1.0, "eval_combined_score": 1.0, "eval_f1": 1.0, "eval_loss": 6.7201266773508905e-09, "eval_runtime": 0.455, "eval_samples_per_second": 896.759, "eval_steps_per_second": 8.792, "step": 41139 }, { "epoch": 22.0, "learning_rate": 2.8000000000000003e-05, "loss": 0.0003, "step": 43098 }, { "epoch": 22.0, "eval_accuracy": 1.0, "eval_combined_score": 1.0, "eval_f1": 1.0, "eval_loss": 1.8991272554558236e-07, "eval_runtime": 0.4554, "eval_samples_per_second": 895.928, "eval_steps_per_second": 8.784, "step": 43098 }, { "epoch": 23.0, "learning_rate": 2.7000000000000002e-05, "loss": 0.0005, "step": 45057 }, { "epoch": 23.0, "eval_accuracy": 1.0, "eval_combined_score": 1.0, "eval_f1": 1.0, "eval_loss": 2.0744714390730223e-08, "eval_runtime": 0.4514, "eval_samples_per_second": 903.797, "eval_steps_per_second": 8.861, "step": 45057 }, { "epoch": 23.0, "step": 45057, "total_flos": 1.8081776184747622e+17, "train_loss": 0.013702845788788859, "train_runtime": 21120.7767, "train_samples_per_second": 593.577, "train_steps_per_second": 4.638 } ], "max_steps": 97950, "num_train_epochs": 50, "total_flos": 1.8081776184747622e+17, "trial_name": null, "trial_params": null }