{ "best_metric": 0.8406790060333368, "best_model_checkpoint": "./outputs/finetuning/mnli_MULTI/checkpoint-26000", "epoch": 5.0, "global_step": 60065, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17, "learning_rate": 1.933405477399484e-05, "loss": 0.4798, "step": 2000 }, { "epoch": 0.17, "eval_acc": 0.8211473565804275, "eval_loss": 0.4810026288032532, "eval_runtime": 16.7598, "eval_samples_per_second": 583.48, "eval_steps_per_second": 18.258, "step": 2000 }, { "epoch": 0.33, "learning_rate": 1.866810954798968e-05, "loss": 0.4494, "step": 4000 }, { "epoch": 0.33, "eval_acc": 0.8267716535433071, "eval_loss": 0.46056851744651794, "eval_runtime": 16.6747, "eval_samples_per_second": 586.457, "eval_steps_per_second": 18.351, "step": 4000 }, { "epoch": 0.5, "learning_rate": 1.8002164321984518e-05, "loss": 0.431, "step": 6000 }, { "epoch": 0.5, "eval_acc": 0.8301462317210349, "eval_loss": 0.4622686803340912, "eval_runtime": 16.6907, "eval_samples_per_second": 585.896, "eval_steps_per_second": 18.334, "step": 6000 }, { "epoch": 0.67, "learning_rate": 1.7336219095979357e-05, "loss": 0.4371, "step": 8000 }, { "epoch": 0.67, "eval_acc": 0.8297371919419163, "eval_loss": 0.44371065497398376, "eval_runtime": 16.7034, "eval_samples_per_second": 585.451, "eval_steps_per_second": 18.32, "step": 8000 }, { "epoch": 0.83, "learning_rate": 1.6670273869974196e-05, "loss": 0.4297, "step": 10000 }, { "epoch": 0.83, "eval_acc": 0.8312710911136107, "eval_loss": 0.45482972264289856, "eval_runtime": 16.6876, "eval_samples_per_second": 586.004, "eval_steps_per_second": 18.337, "step": 10000 }, { "epoch": 1.0, "learning_rate": 1.6004328643969035e-05, "loss": 0.4214, "step": 12000 }, { "epoch": 1.0, "eval_acc": 0.8337253297883219, "eval_loss": 0.4565775692462921, "eval_runtime": 16.6965, "eval_samples_per_second": 585.692, "eval_steps_per_second": 18.327, "step": 12000 }, { "epoch": 1.17, "learning_rate": 1.5338383417963873e-05, "loss": 0.3123, "step": 14000 }, { "epoch": 1.17, "eval_acc": 0.8322936905614071, "eval_loss": 0.48932746052742004, "eval_runtime": 16.699, "eval_samples_per_second": 585.603, "eval_steps_per_second": 18.324, "step": 14000 }, { "epoch": 1.33, "learning_rate": 1.4672438191958714e-05, "loss": 0.3158, "step": 16000 }, { "epoch": 1.33, "eval_acc": 0.8342366295122201, "eval_loss": 0.4861135184764862, "eval_runtime": 16.699, "eval_samples_per_second": 585.602, "eval_steps_per_second": 18.324, "step": 16000 }, { "epoch": 1.5, "learning_rate": 1.4006492965953551e-05, "loss": 0.324, "step": 18000 }, { "epoch": 1.5, "eval_acc": 0.8307597913897127, "eval_loss": 0.4812241792678833, "eval_runtime": 16.6905, "eval_samples_per_second": 585.902, "eval_steps_per_second": 18.334, "step": 18000 }, { "epoch": 1.66, "learning_rate": 1.3340547739948392e-05, "loss": 0.3161, "step": 20000 }, { "epoch": 1.66, "eval_acc": 0.8364863482973719, "eval_loss": 0.4630277454853058, "eval_runtime": 16.702, "eval_samples_per_second": 585.498, "eval_steps_per_second": 18.321, "step": 20000 }, { "epoch": 1.83, "learning_rate": 1.2674602513943229e-05, "loss": 0.32, "step": 22000 }, { "epoch": 1.83, "eval_acc": 0.8365886082421515, "eval_loss": 0.46297991275787354, "eval_runtime": 16.6817, "eval_samples_per_second": 586.213, "eval_steps_per_second": 18.343, "step": 22000 }, { "epoch": 2.0, "learning_rate": 1.2008657287938067e-05, "loss": 0.3195, "step": 24000 }, { "epoch": 2.0, "eval_acc": 0.8354637488495756, "eval_loss": 0.4681137800216675, "eval_runtime": 16.7123, "eval_samples_per_second": 585.138, "eval_steps_per_second": 18.31, "step": 24000 }, { "epoch": 2.16, "learning_rate": 1.1342712061932908e-05, "loss": 0.2274, "step": 26000 }, { "epoch": 2.16, "eval_acc": 0.8406790060333368, "eval_loss": 0.534744143486023, "eval_runtime": 16.6972, "eval_samples_per_second": 585.668, "eval_steps_per_second": 18.326, "step": 26000 }, { "epoch": 2.33, "learning_rate": 1.0676766835927745e-05, "loss": 0.2311, "step": 28000 }, { "epoch": 2.33, "eval_acc": 0.830964311279272, "eval_loss": 0.5649741291999817, "eval_runtime": 16.7014, "eval_samples_per_second": 585.518, "eval_steps_per_second": 18.322, "step": 28000 }, { "epoch": 2.5, "learning_rate": 1.0010821609922586e-05, "loss": 0.2293, "step": 30000 }, { "epoch": 2.5, "eval_acc": 0.8354637488495756, "eval_loss": 0.5407743453979492, "eval_runtime": 16.7112, "eval_samples_per_second": 585.175, "eval_steps_per_second": 18.311, "step": 30000 }, { "epoch": 2.66, "learning_rate": 9.344876383917424e-06, "loss": 0.2296, "step": 32000 }, { "epoch": 2.66, "eval_acc": 0.8374066878003886, "eval_loss": 0.5207422971725464, "eval_runtime": 16.6974, "eval_samples_per_second": 585.661, "eval_steps_per_second": 18.326, "step": 32000 }, { "epoch": 2.83, "learning_rate": 8.678931157912263e-06, "loss": 0.2274, "step": 34000 }, { "epoch": 2.83, "eval_acc": 0.8352592289600164, "eval_loss": 0.5696293115615845, "eval_runtime": 16.683, "eval_samples_per_second": 586.165, "eval_steps_per_second": 18.342, "step": 34000 }, { "epoch": 3.0, "learning_rate": 8.012985931907102e-06, "loss": 0.23, "step": 36000 }, { "epoch": 3.0, "eval_acc": 0.8365886082421515, "eval_loss": 0.5331636071205139, "eval_runtime": 16.7244, "eval_samples_per_second": 584.714, "eval_steps_per_second": 18.297, "step": 36000 }, { "epoch": 3.16, "learning_rate": 7.34704070590194e-06, "loss": 0.1686, "step": 38000 }, { "epoch": 3.16, "eval_acc": 0.8343388894569997, "eval_loss": 0.6275357007980347, "eval_runtime": 16.6784, "eval_samples_per_second": 586.326, "eval_steps_per_second": 18.347, "step": 38000 }, { "epoch": 3.33, "learning_rate": 6.681095479896779e-06, "loss": 0.1632, "step": 40000 }, { "epoch": 3.33, "eval_acc": 0.8348501891808978, "eval_loss": 0.6457108855247498, "eval_runtime": 16.7007, "eval_samples_per_second": 585.544, "eval_steps_per_second": 18.323, "step": 40000 }, { "epoch": 3.5, "learning_rate": 6.0151502538916185e-06, "loss": 0.1686, "step": 42000 }, { "epoch": 3.5, "eval_acc": 0.8338275897331016, "eval_loss": 0.5964699983596802, "eval_runtime": 16.6934, "eval_samples_per_second": 585.8, "eval_steps_per_second": 18.331, "step": 42000 }, { "epoch": 3.66, "learning_rate": 5.349205027886457e-06, "loss": 0.1634, "step": 44000 }, { "epoch": 3.66, "eval_acc": 0.8342366295122201, "eval_loss": 0.6272006034851074, "eval_runtime": 16.6672, "eval_samples_per_second": 586.722, "eval_steps_per_second": 18.359, "step": 44000 }, { "epoch": 3.83, "learning_rate": 4.683259801881296e-06, "loss": 0.1656, "step": 46000 }, { "epoch": 3.83, "eval_acc": 0.8311688311688312, "eval_loss": 0.6541053652763367, "eval_runtime": 16.6856, "eval_samples_per_second": 586.076, "eval_steps_per_second": 18.339, "step": 46000 }, { "epoch": 4.0, "learning_rate": 4.017314575876134e-06, "loss": 0.162, "step": 48000 }, { "epoch": 4.0, "eval_acc": 0.8316801308927293, "eval_loss": 0.6408036947250366, "eval_runtime": 16.6809, "eval_samples_per_second": 586.239, "eval_steps_per_second": 18.344, "step": 48000 }, { "epoch": 4.16, "learning_rate": 3.3513693498709734e-06, "loss": 0.1288, "step": 50000 }, { "epoch": 4.16, "eval_acc": 0.8348501891808978, "eval_loss": 0.7236860990524292, "eval_runtime": 16.7011, "eval_samples_per_second": 585.529, "eval_steps_per_second": 18.322, "step": 50000 }, { "epoch": 4.33, "learning_rate": 2.6854241238658126e-06, "loss": 0.1275, "step": 52000 }, { "epoch": 4.33, "eval_acc": 0.8295326720523571, "eval_loss": 0.7558159828186035, "eval_runtime": 16.7056, "eval_samples_per_second": 585.372, "eval_steps_per_second": 18.317, "step": 52000 }, { "epoch": 4.5, "learning_rate": 2.019478897860651e-06, "loss": 0.1291, "step": 54000 }, { "epoch": 4.5, "eval_acc": 0.8305552715001534, "eval_loss": 0.7729807496070862, "eval_runtime": 16.6766, "eval_samples_per_second": 586.391, "eval_steps_per_second": 18.349, "step": 54000 }, { "epoch": 4.66, "learning_rate": 1.35353367185549e-06, "loss": 0.1261, "step": 56000 }, { "epoch": 4.66, "eval_acc": 0.8300439717762552, "eval_loss": 0.7523751258850098, "eval_runtime": 16.688, "eval_samples_per_second": 585.99, "eval_steps_per_second": 18.337, "step": 56000 }, { "epoch": 4.83, "learning_rate": 6.875884458503289e-07, "loss": 0.1272, "step": 58000 }, { "epoch": 4.83, "eval_acc": 0.8316801308927293, "eval_loss": 0.7572413682937622, "eval_runtime": 16.7304, "eval_samples_per_second": 584.506, "eval_steps_per_second": 18.29, "step": 58000 }, { "epoch": 4.99, "learning_rate": 2.164321984516774e-08, "loss": 0.1242, "step": 60000 }, { "epoch": 4.99, "eval_acc": 0.830964311279272, "eval_loss": 0.7606698870658875, "eval_runtime": 16.6838, "eval_samples_per_second": 586.137, "eval_steps_per_second": 18.341, "step": 60000 }, { "epoch": 5.0, "step": 60065, "total_flos": 1.2642205051703808e+17, "train_loss": 0.2560187041367518, "train_runtime": 11508.9997, "train_samples_per_second": 166.995, "train_steps_per_second": 5.219 } ], "max_steps": 60065, "num_train_epochs": 5, "total_flos": 1.2642205051703808e+17, "trial_name": null, "trial_params": null }