|
{ |
|
"best_metric": 0.8406790060333368, |
|
"best_model_checkpoint": "./outputs/finetuning/mnli_MULTI/checkpoint-26000", |
|
"epoch": 5.0, |
|
"global_step": 60065, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.933405477399484e-05, |
|
"loss": 0.4798, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_acc": 0.8211473565804275, |
|
"eval_loss": 0.4810026288032532, |
|
"eval_runtime": 16.7598, |
|
"eval_samples_per_second": 583.48, |
|
"eval_steps_per_second": 18.258, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.866810954798968e-05, |
|
"loss": 0.4494, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_acc": 0.8267716535433071, |
|
"eval_loss": 0.46056851744651794, |
|
"eval_runtime": 16.6747, |
|
"eval_samples_per_second": 586.457, |
|
"eval_steps_per_second": 18.351, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.8002164321984518e-05, |
|
"loss": 0.431, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_acc": 0.8301462317210349, |
|
"eval_loss": 0.4622686803340912, |
|
"eval_runtime": 16.6907, |
|
"eval_samples_per_second": 585.896, |
|
"eval_steps_per_second": 18.334, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.7336219095979357e-05, |
|
"loss": 0.4371, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_acc": 0.8297371919419163, |
|
"eval_loss": 0.44371065497398376, |
|
"eval_runtime": 16.7034, |
|
"eval_samples_per_second": 585.451, |
|
"eval_steps_per_second": 18.32, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.6670273869974196e-05, |
|
"loss": 0.4297, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_acc": 0.8312710911136107, |
|
"eval_loss": 0.45482972264289856, |
|
"eval_runtime": 16.6876, |
|
"eval_samples_per_second": 586.004, |
|
"eval_steps_per_second": 18.337, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.6004328643969035e-05, |
|
"loss": 0.4214, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_acc": 0.8337253297883219, |
|
"eval_loss": 0.4565775692462921, |
|
"eval_runtime": 16.6965, |
|
"eval_samples_per_second": 585.692, |
|
"eval_steps_per_second": 18.327, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.5338383417963873e-05, |
|
"loss": 0.3123, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_acc": 0.8322936905614071, |
|
"eval_loss": 0.48932746052742004, |
|
"eval_runtime": 16.699, |
|
"eval_samples_per_second": 585.603, |
|
"eval_steps_per_second": 18.324, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.4672438191958714e-05, |
|
"loss": 0.3158, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_acc": 0.8342366295122201, |
|
"eval_loss": 0.4861135184764862, |
|
"eval_runtime": 16.699, |
|
"eval_samples_per_second": 585.602, |
|
"eval_steps_per_second": 18.324, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.4006492965953551e-05, |
|
"loss": 0.324, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_acc": 0.8307597913897127, |
|
"eval_loss": 0.4812241792678833, |
|
"eval_runtime": 16.6905, |
|
"eval_samples_per_second": 585.902, |
|
"eval_steps_per_second": 18.334, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.3340547739948392e-05, |
|
"loss": 0.3161, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_acc": 0.8364863482973719, |
|
"eval_loss": 0.4630277454853058, |
|
"eval_runtime": 16.702, |
|
"eval_samples_per_second": 585.498, |
|
"eval_steps_per_second": 18.321, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.2674602513943229e-05, |
|
"loss": 0.32, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_acc": 0.8365886082421515, |
|
"eval_loss": 0.46297991275787354, |
|
"eval_runtime": 16.6817, |
|
"eval_samples_per_second": 586.213, |
|
"eval_steps_per_second": 18.343, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.2008657287938067e-05, |
|
"loss": 0.3195, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_acc": 0.8354637488495756, |
|
"eval_loss": 0.4681137800216675, |
|
"eval_runtime": 16.7123, |
|
"eval_samples_per_second": 585.138, |
|
"eval_steps_per_second": 18.31, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.1342712061932908e-05, |
|
"loss": 0.2274, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_acc": 0.8406790060333368, |
|
"eval_loss": 0.534744143486023, |
|
"eval_runtime": 16.6972, |
|
"eval_samples_per_second": 585.668, |
|
"eval_steps_per_second": 18.326, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.0676766835927745e-05, |
|
"loss": 0.2311, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_acc": 0.830964311279272, |
|
"eval_loss": 0.5649741291999817, |
|
"eval_runtime": 16.7014, |
|
"eval_samples_per_second": 585.518, |
|
"eval_steps_per_second": 18.322, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.0010821609922586e-05, |
|
"loss": 0.2293, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_acc": 0.8354637488495756, |
|
"eval_loss": 0.5407743453979492, |
|
"eval_runtime": 16.7112, |
|
"eval_samples_per_second": 585.175, |
|
"eval_steps_per_second": 18.311, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 9.344876383917424e-06, |
|
"loss": 0.2296, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_acc": 0.8374066878003886, |
|
"eval_loss": 0.5207422971725464, |
|
"eval_runtime": 16.6974, |
|
"eval_samples_per_second": 585.661, |
|
"eval_steps_per_second": 18.326, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 8.678931157912263e-06, |
|
"loss": 0.2274, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_acc": 0.8352592289600164, |
|
"eval_loss": 0.5696293115615845, |
|
"eval_runtime": 16.683, |
|
"eval_samples_per_second": 586.165, |
|
"eval_steps_per_second": 18.342, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 8.012985931907102e-06, |
|
"loss": 0.23, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_acc": 0.8365886082421515, |
|
"eval_loss": 0.5331636071205139, |
|
"eval_runtime": 16.7244, |
|
"eval_samples_per_second": 584.714, |
|
"eval_steps_per_second": 18.297, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 7.34704070590194e-06, |
|
"loss": 0.1686, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"eval_acc": 0.8343388894569997, |
|
"eval_loss": 0.6275357007980347, |
|
"eval_runtime": 16.6784, |
|
"eval_samples_per_second": 586.326, |
|
"eval_steps_per_second": 18.347, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 6.681095479896779e-06, |
|
"loss": 0.1632, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_acc": 0.8348501891808978, |
|
"eval_loss": 0.6457108855247498, |
|
"eval_runtime": 16.7007, |
|
"eval_samples_per_second": 585.544, |
|
"eval_steps_per_second": 18.323, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 6.0151502538916185e-06, |
|
"loss": 0.1686, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_acc": 0.8338275897331016, |
|
"eval_loss": 0.5964699983596802, |
|
"eval_runtime": 16.6934, |
|
"eval_samples_per_second": 585.8, |
|
"eval_steps_per_second": 18.331, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 5.349205027886457e-06, |
|
"loss": 0.1634, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"eval_acc": 0.8342366295122201, |
|
"eval_loss": 0.6272006034851074, |
|
"eval_runtime": 16.6672, |
|
"eval_samples_per_second": 586.722, |
|
"eval_steps_per_second": 18.359, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.683259801881296e-06, |
|
"loss": 0.1656, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_acc": 0.8311688311688312, |
|
"eval_loss": 0.6541053652763367, |
|
"eval_runtime": 16.6856, |
|
"eval_samples_per_second": 586.076, |
|
"eval_steps_per_second": 18.339, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.017314575876134e-06, |
|
"loss": 0.162, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_acc": 0.8316801308927293, |
|
"eval_loss": 0.6408036947250366, |
|
"eval_runtime": 16.6809, |
|
"eval_samples_per_second": 586.239, |
|
"eval_steps_per_second": 18.344, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 3.3513693498709734e-06, |
|
"loss": 0.1288, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"eval_acc": 0.8348501891808978, |
|
"eval_loss": 0.7236860990524292, |
|
"eval_runtime": 16.7011, |
|
"eval_samples_per_second": 585.529, |
|
"eval_steps_per_second": 18.322, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2.6854241238658126e-06, |
|
"loss": 0.1275, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_acc": 0.8295326720523571, |
|
"eval_loss": 0.7558159828186035, |
|
"eval_runtime": 16.7056, |
|
"eval_samples_per_second": 585.372, |
|
"eval_steps_per_second": 18.317, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 2.019478897860651e-06, |
|
"loss": 0.1291, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_acc": 0.8305552715001534, |
|
"eval_loss": 0.7729807496070862, |
|
"eval_runtime": 16.6766, |
|
"eval_samples_per_second": 586.391, |
|
"eval_steps_per_second": 18.349, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 1.35353367185549e-06, |
|
"loss": 0.1261, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"eval_acc": 0.8300439717762552, |
|
"eval_loss": 0.7523751258850098, |
|
"eval_runtime": 16.688, |
|
"eval_samples_per_second": 585.99, |
|
"eval_steps_per_second": 18.337, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 6.875884458503289e-07, |
|
"loss": 0.1272, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"eval_acc": 0.8316801308927293, |
|
"eval_loss": 0.7572413682937622, |
|
"eval_runtime": 16.7304, |
|
"eval_samples_per_second": 584.506, |
|
"eval_steps_per_second": 18.29, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 2.164321984516774e-08, |
|
"loss": 0.1242, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_acc": 0.830964311279272, |
|
"eval_loss": 0.7606698870658875, |
|
"eval_runtime": 16.6838, |
|
"eval_samples_per_second": 586.137, |
|
"eval_steps_per_second": 18.341, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 60065, |
|
"total_flos": 1.2642205051703808e+17, |
|
"train_loss": 0.2560187041367518, |
|
"train_runtime": 11508.9997, |
|
"train_samples_per_second": 166.995, |
|
"train_steps_per_second": 5.219 |
|
} |
|
], |
|
"max_steps": 60065, |
|
"num_train_epochs": 5, |
|
"total_flos": 1.2642205051703808e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|