{ "best_metric": 0.8634595701125896, "best_model_checkpoint": "./outputs/finetuning/mnli_AppE/checkpoint-26000", "epoch": 5.0, "global_step": 60000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17, "learning_rate": 1.9333333333333333e-05, "loss": 0.3562, "step": 2000 }, { "epoch": 0.17, "eval_acc": 0.8524053224155578, "eval_loss": 0.41664522886276245, "eval_runtime": 16.7668, "eval_samples_per_second": 582.7, "eval_steps_per_second": 18.25, "step": 2000 }, { "epoch": 0.33, "learning_rate": 1.866666666666667e-05, "loss": 0.3414, "step": 4000 }, { "epoch": 0.33, "eval_acc": 0.8475946775844422, "eval_loss": 0.43986669182777405, "eval_runtime": 16.7213, "eval_samples_per_second": 584.285, "eval_steps_per_second": 18.3, "step": 4000 }, { "epoch": 0.5, "learning_rate": 1.8e-05, "loss": 0.3308, "step": 6000 }, { "epoch": 0.5, "eval_acc": 0.8560900716479017, "eval_loss": 0.395113080739975, "eval_runtime": 16.7306, "eval_samples_per_second": 583.96, "eval_steps_per_second": 18.29, "step": 6000 }, { "epoch": 0.67, "learning_rate": 1.7333333333333336e-05, "loss": 0.3306, "step": 8000 }, { "epoch": 0.67, "eval_acc": 0.8488229273285568, "eval_loss": 0.41524460911750793, "eval_runtime": 16.7003, "eval_samples_per_second": 585.021, "eval_steps_per_second": 18.323, "step": 8000 }, { "epoch": 0.83, "learning_rate": 1.6666666666666667e-05, "loss": 0.3295, "step": 10000 }, { "epoch": 0.83, "eval_acc": 0.8575230296827021, "eval_loss": 0.39166104793548584, "eval_runtime": 16.6831, "eval_samples_per_second": 585.622, "eval_steps_per_second": 18.342, "step": 10000 }, { "epoch": 1.0, "learning_rate": 1.6000000000000003e-05, "loss": 0.3288, "step": 12000 }, { "epoch": 1.0, "eval_acc": 0.8581371545547595, "eval_loss": 0.40951114892959595, "eval_runtime": 16.6934, "eval_samples_per_second": 585.263, "eval_steps_per_second": 18.331, "step": 12000 }, { "epoch": 1.17, "learning_rate": 1.5333333333333334e-05, "loss": 0.229, "step": 14000 }, { "epoch": 1.17, "eval_acc": 0.8611054247697032, "eval_loss": 0.4420296251773834, "eval_runtime": 16.7189, "eval_samples_per_second": 584.367, "eval_steps_per_second": 18.303, "step": 14000 }, { "epoch": 1.33, "learning_rate": 1.4666666666666666e-05, "loss": 0.2307, "step": 16000 }, { "epoch": 1.33, "eval_acc": 0.8560900716479017, "eval_loss": 0.4876081645488739, "eval_runtime": 16.6966, "eval_samples_per_second": 585.148, "eval_steps_per_second": 18.327, "step": 16000 }, { "epoch": 1.5, "learning_rate": 1.4e-05, "loss": 0.2334, "step": 18000 }, { "epoch": 1.5, "eval_acc": 0.857727737973388, "eval_loss": 0.449561208486557, "eval_runtime": 16.7074, "eval_samples_per_second": 584.771, "eval_steps_per_second": 18.315, "step": 18000 }, { "epoch": 1.67, "learning_rate": 1.3333333333333333e-05, "loss": 0.2392, "step": 20000 }, { "epoch": 1.67, "eval_acc": 0.8607983623336745, "eval_loss": 0.4188033640384674, "eval_runtime": 16.694, "eval_samples_per_second": 585.239, "eval_steps_per_second": 18.33, "step": 20000 }, { "epoch": 1.83, "learning_rate": 1.2666666666666667e-05, "loss": 0.233, "step": 22000 }, { "epoch": 1.83, "eval_acc": 0.8578300921187308, "eval_loss": 0.44934678077697754, "eval_runtime": 16.6959, "eval_samples_per_second": 585.174, "eval_steps_per_second": 18.328, "step": 22000 }, { "epoch": 2.0, "learning_rate": 1.2e-05, "loss": 0.2343, "step": 24000 }, { "epoch": 2.0, "eval_acc": 0.8602865916069601, "eval_loss": 0.42783743143081665, "eval_runtime": 16.7147, "eval_samples_per_second": 584.517, "eval_steps_per_second": 18.307, "step": 24000 }, { "epoch": 2.17, "learning_rate": 1.1333333333333334e-05, "loss": 0.163, "step": 26000 }, { "epoch": 2.17, "eval_acc": 0.8634595701125896, "eval_loss": 0.5700035095214844, "eval_runtime": 16.7469, "eval_samples_per_second": 583.393, "eval_steps_per_second": 18.272, "step": 26000 }, { "epoch": 2.33, "learning_rate": 1.0666666666666667e-05, "loss": 0.1657, "step": 28000 }, { "epoch": 2.33, "eval_acc": 0.8560900716479017, "eval_loss": 0.5322555303573608, "eval_runtime": 16.7051, "eval_samples_per_second": 584.851, "eval_steps_per_second": 18.318, "step": 28000 }, { "epoch": 2.5, "learning_rate": 1e-05, "loss": 0.1652, "step": 30000 }, { "epoch": 2.5, "eval_acc": 0.8588536335721597, "eval_loss": 0.5046815872192383, "eval_runtime": 16.7454, "eval_samples_per_second": 583.445, "eval_steps_per_second": 18.274, "step": 30000 }, { "epoch": 2.67, "learning_rate": 9.333333333333334e-06, "loss": 0.1654, "step": 32000 }, { "epoch": 2.67, "eval_acc": 0.8589559877175026, "eval_loss": 0.5081538558006287, "eval_runtime": 16.6963, "eval_samples_per_second": 585.158, "eval_steps_per_second": 18.327, "step": 32000 }, { "epoch": 2.83, "learning_rate": 8.666666666666668e-06, "loss": 0.1659, "step": 34000 }, { "epoch": 2.83, "eval_acc": 0.8630501535312181, "eval_loss": 0.5075551271438599, "eval_runtime": 16.7114, "eval_samples_per_second": 584.632, "eval_steps_per_second": 18.311, "step": 34000 }, { "epoch": 3.0, "learning_rate": 8.000000000000001e-06, "loss": 0.1654, "step": 36000 }, { "epoch": 3.0, "eval_acc": 0.8612077789150461, "eval_loss": 0.5035998821258545, "eval_runtime": 16.7468, "eval_samples_per_second": 583.394, "eval_steps_per_second": 18.272, "step": 36000 }, { "epoch": 3.17, "learning_rate": 7.333333333333333e-06, "loss": 0.1195, "step": 38000 }, { "epoch": 3.17, "eval_acc": 0.8599795291709315, "eval_loss": 0.6220871806144714, "eval_runtime": 16.6963, "eval_samples_per_second": 585.161, "eval_steps_per_second": 18.327, "step": 38000 }, { "epoch": 3.33, "learning_rate": 6.666666666666667e-06, "loss": 0.1194, "step": 40000 }, { "epoch": 3.33, "eval_acc": 0.8575230296827021, "eval_loss": 0.6466721892356873, "eval_runtime": 16.6726, "eval_samples_per_second": 585.991, "eval_steps_per_second": 18.353, "step": 40000 }, { "epoch": 3.5, "learning_rate": 6e-06, "loss": 0.1205, "step": 42000 }, { "epoch": 3.5, "eval_acc": 0.8578300921187308, "eval_loss": 0.6742368340492249, "eval_runtime": 16.6999, "eval_samples_per_second": 585.034, "eval_steps_per_second": 18.323, "step": 42000 }, { "epoch": 3.67, "learning_rate": 5.333333333333334e-06, "loss": 0.1209, "step": 44000 }, { "epoch": 3.67, "eval_acc": 0.8580348004094166, "eval_loss": 0.6536660194396973, "eval_runtime": 16.7907, "eval_samples_per_second": 581.87, "eval_steps_per_second": 18.224, "step": 44000 }, { "epoch": 3.83, "learning_rate": 4.666666666666667e-06, "loss": 0.1234, "step": 46000 }, { "epoch": 3.83, "eval_acc": 0.8580348004094166, "eval_loss": 0.6385037899017334, "eval_runtime": 16.6773, "eval_samples_per_second": 585.826, "eval_steps_per_second": 18.348, "step": 46000 }, { "epoch": 4.0, "learning_rate": 4.000000000000001e-06, "loss": 0.122, "step": 48000 }, { "epoch": 4.0, "eval_acc": 0.8628454452405322, "eval_loss": 0.6103753447532654, "eval_runtime": 16.681, "eval_samples_per_second": 585.697, "eval_steps_per_second": 18.344, "step": 48000 }, { "epoch": 4.17, "learning_rate": 3.3333333333333333e-06, "loss": 0.0937, "step": 50000 }, { "epoch": 4.17, "eval_acc": 0.8602865916069601, "eval_loss": 0.7260550260543823, "eval_runtime": 16.6866, "eval_samples_per_second": 585.501, "eval_steps_per_second": 18.338, "step": 50000 }, { "epoch": 4.33, "learning_rate": 2.666666666666667e-06, "loss": 0.0941, "step": 52000 }, { "epoch": 4.33, "eval_acc": 0.8606960081883316, "eval_loss": 0.7633941769599915, "eval_runtime": 16.696, "eval_samples_per_second": 585.17, "eval_steps_per_second": 18.328, "step": 52000 }, { "epoch": 4.5, "learning_rate": 2.0000000000000003e-06, "loss": 0.0926, "step": 54000 }, { "epoch": 4.5, "eval_acc": 0.8617195496417605, "eval_loss": 0.7736791968345642, "eval_runtime": 16.6764, "eval_samples_per_second": 585.859, "eval_steps_per_second": 18.349, "step": 54000 }, { "epoch": 4.67, "learning_rate": 1.3333333333333334e-06, "loss": 0.093, "step": 56000 }, { "epoch": 4.67, "eval_acc": 0.8632548618219038, "eval_loss": 0.757666289806366, "eval_runtime": 16.6803, "eval_samples_per_second": 585.722, "eval_steps_per_second": 18.345, "step": 56000 }, { "epoch": 4.83, "learning_rate": 6.666666666666667e-07, "loss": 0.0918, "step": 58000 }, { "epoch": 4.83, "eval_acc": 0.8630501535312181, "eval_loss": 0.7534608840942383, "eval_runtime": 16.6933, "eval_samples_per_second": 585.263, "eval_steps_per_second": 18.331, "step": 58000 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 0.0935, "step": 60000 }, { "epoch": 5.0, "eval_acc": 0.8622313203684749, "eval_loss": 0.743323802947998, "eval_runtime": 16.7307, "eval_samples_per_second": 583.956, "eval_steps_per_second": 18.29, "step": 60000 }, { "epoch": 5.0, "step": 60000, "total_flos": 1.2628556043159168e+17, "train_loss": 0.18972002029418944, "train_runtime": 11504.4253, "train_samples_per_second": 166.881, "train_steps_per_second": 5.215 } ], "max_steps": 60000, "num_train_epochs": 5, "total_flos": 1.2628556043159168e+17, "trial_name": null, "trial_params": null }