|
{ |
|
"best_metric": 0.87021494370522, |
|
"best_model_checkpoint": "./outputs/finetuning/mnli_ChcE/checkpoint-20000", |
|
"epoch": 5.0, |
|
"global_step": 59960, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.933288859239493e-05, |
|
"loss": 0.3102, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_acc": 0.8544524053224155, |
|
"eval_loss": 0.41351282596588135, |
|
"eval_runtime": 16.7417, |
|
"eval_samples_per_second": 583.571, |
|
"eval_steps_per_second": 18.278, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.866577718478986e-05, |
|
"loss": 0.3046, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_acc": 0.8644831115660184, |
|
"eval_loss": 0.40244850516319275, |
|
"eval_runtime": 16.6646, |
|
"eval_samples_per_second": 586.274, |
|
"eval_steps_per_second": 18.362, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.7998665777184793e-05, |
|
"loss": 0.3038, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_acc": 0.8668372569089048, |
|
"eval_loss": 0.39355912804603577, |
|
"eval_runtime": 16.6429, |
|
"eval_samples_per_second": 587.039, |
|
"eval_steps_per_second": 18.386, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.7331554369579722e-05, |
|
"loss": 0.3012, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_acc": 0.8625383828045036, |
|
"eval_loss": 0.40068650245666504, |
|
"eval_runtime": 16.6669, |
|
"eval_samples_per_second": 586.192, |
|
"eval_steps_per_second": 18.36, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.666444296197465e-05, |
|
"loss": 0.2979, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_acc": 0.8620266120777892, |
|
"eval_loss": 0.42349421977996826, |
|
"eval_runtime": 16.6784, |
|
"eval_samples_per_second": 585.787, |
|
"eval_steps_per_second": 18.347, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.599733155436958e-05, |
|
"loss": 0.2997, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_acc": 0.8643807574206755, |
|
"eval_loss": 0.403084933757782, |
|
"eval_runtime": 16.6652, |
|
"eval_samples_per_second": 586.251, |
|
"eval_steps_per_second": 18.362, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.533022014676451e-05, |
|
"loss": 0.2099, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_acc": 0.8632548618219038, |
|
"eval_loss": 0.43926700949668884, |
|
"eval_runtime": 16.6492, |
|
"eval_samples_per_second": 586.816, |
|
"eval_steps_per_second": 18.379, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.4663108739159441e-05, |
|
"loss": 0.2114, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_acc": 0.8628454452405322, |
|
"eval_loss": 0.46620267629623413, |
|
"eval_runtime": 16.6551, |
|
"eval_samples_per_second": 586.607, |
|
"eval_steps_per_second": 18.373, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.3995997331554372e-05, |
|
"loss": 0.2147, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_acc": 0.864790174002047, |
|
"eval_loss": 0.4331408143043518, |
|
"eval_runtime": 16.6453, |
|
"eval_samples_per_second": 586.954, |
|
"eval_steps_per_second": 18.384, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.33288859239493e-05, |
|
"loss": 0.2122, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_acc": 0.87021494370522, |
|
"eval_loss": 0.4166066646575928, |
|
"eval_runtime": 16.67, |
|
"eval_samples_per_second": 586.082, |
|
"eval_steps_per_second": 18.356, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.2661774516344229e-05, |
|
"loss": 0.2156, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_acc": 0.8632548618219038, |
|
"eval_loss": 0.4462782144546509, |
|
"eval_runtime": 16.6855, |
|
"eval_samples_per_second": 585.539, |
|
"eval_steps_per_second": 18.339, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.199466310873916e-05, |
|
"loss": 0.2117, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_acc": 0.8679631525076765, |
|
"eval_loss": 0.46366986632347107, |
|
"eval_runtime": 16.6496, |
|
"eval_samples_per_second": 586.801, |
|
"eval_steps_per_second": 18.379, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.132755170113409e-05, |
|
"loss": 0.1469, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_acc": 0.8680655066530194, |
|
"eval_loss": 0.5210850834846497, |
|
"eval_runtime": 16.6494, |
|
"eval_samples_per_second": 586.808, |
|
"eval_steps_per_second": 18.379, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.066044029352902e-05, |
|
"loss": 0.1526, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_acc": 0.8620266120777892, |
|
"eval_loss": 0.5206254720687866, |
|
"eval_runtime": 16.6442, |
|
"eval_samples_per_second": 586.99, |
|
"eval_steps_per_second": 18.385, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.99332888592395e-06, |
|
"loss": 0.1494, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_acc": 0.8664278403275333, |
|
"eval_loss": 0.5167897939682007, |
|
"eval_runtime": 16.6489, |
|
"eval_samples_per_second": 586.825, |
|
"eval_steps_per_second": 18.38, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 9.326217478318879e-06, |
|
"loss": 0.1519, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_acc": 0.8700102354145343, |
|
"eval_loss": 0.48301637172698975, |
|
"eval_runtime": 16.6933, |
|
"eval_samples_per_second": 585.265, |
|
"eval_steps_per_second": 18.331, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 8.65910607071381e-06, |
|
"loss": 0.152, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_acc": 0.8635619242579324, |
|
"eval_loss": 0.5464909672737122, |
|
"eval_runtime": 16.6533, |
|
"eval_samples_per_second": 586.671, |
|
"eval_steps_per_second": 18.375, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 7.99199466310874e-06, |
|
"loss": 0.1498, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_acc": 0.8679631525076765, |
|
"eval_loss": 0.5550450682640076, |
|
"eval_runtime": 16.6712, |
|
"eval_samples_per_second": 586.039, |
|
"eval_steps_per_second": 18.355, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 7.324883255503669e-06, |
|
"loss": 0.1131, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"eval_acc": 0.8601842374616172, |
|
"eval_loss": 0.6764107346534729, |
|
"eval_runtime": 16.6576, |
|
"eval_samples_per_second": 586.52, |
|
"eval_steps_per_second": 18.37, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 6.6577718478985995e-06, |
|
"loss": 0.1135, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"eval_acc": 0.865711361310133, |
|
"eval_loss": 0.6199905276298523, |
|
"eval_runtime": 16.6534, |
|
"eval_samples_per_second": 586.668, |
|
"eval_steps_per_second": 18.375, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 5.99066044029353e-06, |
|
"loss": 0.1175, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_acc": 0.8671443193449335, |
|
"eval_loss": 0.588898241519928, |
|
"eval_runtime": 16.664, |
|
"eval_samples_per_second": 586.293, |
|
"eval_steps_per_second": 18.363, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 5.32354903268846e-06, |
|
"loss": 0.1156, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_acc": 0.8663254861821904, |
|
"eval_loss": 0.6299933195114136, |
|
"eval_runtime": 16.6491, |
|
"eval_samples_per_second": 586.818, |
|
"eval_steps_per_second": 18.379, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 4.656437625083389e-06, |
|
"loss": 0.1104, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_acc": 0.8689866939611054, |
|
"eval_loss": 0.6044776439666748, |
|
"eval_runtime": 16.6309, |
|
"eval_samples_per_second": 587.46, |
|
"eval_steps_per_second": 18.399, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.989326217478319e-06, |
|
"loss": 0.1111, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_acc": 0.869396110542477, |
|
"eval_loss": 0.6412716507911682, |
|
"eval_runtime": 16.6717, |
|
"eval_samples_per_second": 586.023, |
|
"eval_steps_per_second": 18.354, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 3.3222148098732494e-06, |
|
"loss": 0.086, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"eval_acc": 0.8658137154554759, |
|
"eval_loss": 0.7271037697792053, |
|
"eval_runtime": 16.6567, |
|
"eval_samples_per_second": 586.55, |
|
"eval_steps_per_second": 18.371, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 2.6551034022681787e-06, |
|
"loss": 0.0895, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"eval_acc": 0.8682702149437053, |
|
"eval_loss": 0.7273563146591187, |
|
"eval_runtime": 16.678, |
|
"eval_samples_per_second": 585.801, |
|
"eval_steps_per_second": 18.348, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.987991994663109e-06, |
|
"loss": 0.0867, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_acc": 0.8658137154554759, |
|
"eval_loss": 0.7226472496986389, |
|
"eval_runtime": 16.6651, |
|
"eval_samples_per_second": 586.253, |
|
"eval_steps_per_second": 18.362, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 1.3208805870580388e-06, |
|
"loss": 0.0886, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"eval_acc": 0.8690890481064483, |
|
"eval_loss": 0.7181665897369385, |
|
"eval_runtime": 16.6571, |
|
"eval_samples_per_second": 586.536, |
|
"eval_steps_per_second": 18.371, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 6.537691794529687e-07, |
|
"loss": 0.0849, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"eval_acc": 0.8698055271238485, |
|
"eval_loss": 0.7094203233718872, |
|
"eval_runtime": 16.6693, |
|
"eval_samples_per_second": 586.108, |
|
"eval_steps_per_second": 18.357, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 59960, |
|
"total_flos": 1.2620695529804544e+17, |
|
"train_loss": 0.17320066710326415, |
|
"train_runtime": 11406.3483, |
|
"train_samples_per_second": 168.211, |
|
"train_steps_per_second": 5.257 |
|
} |
|
], |
|
"max_steps": 59960, |
|
"num_train_epochs": 5, |
|
"total_flos": 1.2620695529804544e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|