|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 18720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.3687332570552826, |
|
"eval_runtime": 4.3048, |
|
"eval_samples_per_second": 64.346, |
|
"eval_steps_per_second": 8.13, |
|
"step": 312 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.4729241877256318, |
|
"epoch": 1.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.732905982905983e-05, |
|
"loss": 0.3745, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5379061371841155, |
|
"eval_loss": 0.3523375689983368, |
|
"eval_runtime": 4.3685, |
|
"eval_samples_per_second": 63.408, |
|
"eval_steps_per_second": 8.012, |
|
"step": 624 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 2.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.35980018973350525, |
|
"eval_runtime": 4.353, |
|
"eval_samples_per_second": 63.634, |
|
"eval_steps_per_second": 8.04, |
|
"step": 936 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 3.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 9.465811965811966e-05, |
|
"loss": 0.3798, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.34790441393852234, |
|
"eval_runtime": 4.3588, |
|
"eval_samples_per_second": 63.55, |
|
"eval_steps_per_second": 8.03, |
|
"step": 1248 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 4.0, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 9.198717948717949e-05, |
|
"loss": 0.3752, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.35927093029022217, |
|
"eval_runtime": 4.3511, |
|
"eval_samples_per_second": 63.662, |
|
"eval_steps_per_second": 8.044, |
|
"step": 1560 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 5.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3504946231842041, |
|
"eval_runtime": 4.3363, |
|
"eval_samples_per_second": 63.879, |
|
"eval_steps_per_second": 8.071, |
|
"step": 1872 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 6.0, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 8.931623931623932e-05, |
|
"loss": 0.373, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3479885756969452, |
|
"eval_runtime": 4.3405, |
|
"eval_samples_per_second": 63.818, |
|
"eval_steps_per_second": 8.064, |
|
"step": 2184 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 7.0, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.35928189754486084, |
|
"eval_runtime": 4.3427, |
|
"eval_samples_per_second": 63.785, |
|
"eval_steps_per_second": 8.059, |
|
"step": 2496 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 8.0, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 8.664529914529916e-05, |
|
"loss": 0.3724, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3490194082260132, |
|
"eval_runtime": 4.3424, |
|
"eval_samples_per_second": 63.79, |
|
"eval_steps_per_second": 8.06, |
|
"step": 2808 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 9.0, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 8.397435897435898e-05, |
|
"loss": 0.3669, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.34893110394477844, |
|
"eval_runtime": 4.3468, |
|
"eval_samples_per_second": 63.725, |
|
"eval_steps_per_second": 8.052, |
|
"step": 3120 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 10.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3486764132976532, |
|
"eval_runtime": 4.3467, |
|
"eval_samples_per_second": 63.727, |
|
"eval_steps_per_second": 8.052, |
|
"step": 3432 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 11.0, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 8.13034188034188e-05, |
|
"loss": 0.3681, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.3587585389614105, |
|
"eval_runtime": 4.3467, |
|
"eval_samples_per_second": 63.726, |
|
"eval_steps_per_second": 8.052, |
|
"step": 3744 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 12.0, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 7.863247863247864e-05, |
|
"loss": 0.3636, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3518940210342407, |
|
"eval_runtime": 4.3497, |
|
"eval_samples_per_second": 63.682, |
|
"eval_steps_per_second": 8.046, |
|
"step": 4056 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 13.0, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.35109788179397583, |
|
"eval_runtime": 4.3616, |
|
"eval_samples_per_second": 63.509, |
|
"eval_steps_per_second": 8.025, |
|
"step": 4368 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 14.0, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 7.596153846153846e-05, |
|
"loss": 0.3629, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3510183095932007, |
|
"eval_runtime": 4.3453, |
|
"eval_samples_per_second": 63.746, |
|
"eval_steps_per_second": 8.055, |
|
"step": 4680 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 15.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3477706015110016, |
|
"eval_runtime": 4.3391, |
|
"eval_samples_per_second": 63.838, |
|
"eval_steps_per_second": 8.066, |
|
"step": 4992 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 16.0, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 7.32905982905983e-05, |
|
"loss": 0.3591, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3501521646976471, |
|
"eval_runtime": 4.3406, |
|
"eval_samples_per_second": 63.815, |
|
"eval_steps_per_second": 8.063, |
|
"step": 5304 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 17.0, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 7.061965811965813e-05, |
|
"loss": 0.3564, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3480573892593384, |
|
"eval_runtime": 4.3405, |
|
"eval_samples_per_second": 63.817, |
|
"eval_steps_per_second": 8.064, |
|
"step": 5616 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 18.0, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3510577082633972, |
|
"eval_runtime": 4.3404, |
|
"eval_samples_per_second": 63.819, |
|
"eval_steps_per_second": 8.064, |
|
"step": 5928 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 19.0, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 6.794871794871795e-05, |
|
"loss": 0.3573, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.35122740268707275, |
|
"eval_runtime": 4.3372, |
|
"eval_samples_per_second": 63.866, |
|
"eval_steps_per_second": 8.07, |
|
"step": 6240 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 20.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"learning_rate": 6.527777777777778e-05, |
|
"loss": 0.3574, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.34808871150016785, |
|
"eval_runtime": 4.3389, |
|
"eval_samples_per_second": 63.841, |
|
"eval_steps_per_second": 8.067, |
|
"step": 6552 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 21.0, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.348792165517807, |
|
"eval_runtime": 4.3427, |
|
"eval_samples_per_second": 63.785, |
|
"eval_steps_per_second": 8.059, |
|
"step": 6864 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 22.0, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 6.260683760683761e-05, |
|
"loss": 0.3566, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.35162776708602905, |
|
"eval_runtime": 4.3371, |
|
"eval_samples_per_second": 63.867, |
|
"eval_steps_per_second": 8.07, |
|
"step": 7176 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 23.0, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3483467102050781, |
|
"eval_runtime": 4.3395, |
|
"eval_samples_per_second": 63.832, |
|
"eval_steps_per_second": 8.065, |
|
"step": 7488 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 24.0, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 5.9935897435897434e-05, |
|
"loss": 0.3571, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3478488028049469, |
|
"eval_runtime": 4.3385, |
|
"eval_samples_per_second": 63.847, |
|
"eval_steps_per_second": 8.067, |
|
"step": 7800 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 25.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 5.726495726495726e-05, |
|
"loss": 0.3562, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3478386104106903, |
|
"eval_runtime": 4.337, |
|
"eval_samples_per_second": 63.869, |
|
"eval_steps_per_second": 8.07, |
|
"step": 8112 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 26.0, |
|
"step": 8112 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.353412389755249, |
|
"eval_runtime": 4.3381, |
|
"eval_samples_per_second": 63.853, |
|
"eval_steps_per_second": 8.068, |
|
"step": 8424 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 27.0, |
|
"step": 8424 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 5.459401709401709e-05, |
|
"loss": 0.356, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.34818387031555176, |
|
"eval_runtime": 4.3382, |
|
"eval_samples_per_second": 63.852, |
|
"eval_steps_per_second": 8.068, |
|
"step": 8736 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 28.0, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 5.192307692307693e-05, |
|
"loss": 0.3564, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3479267954826355, |
|
"eval_runtime": 4.3443, |
|
"eval_samples_per_second": 63.762, |
|
"eval_steps_per_second": 8.057, |
|
"step": 9048 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 29.0, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3506244421005249, |
|
"eval_runtime": 4.345, |
|
"eval_samples_per_second": 63.752, |
|
"eval_steps_per_second": 8.055, |
|
"step": 9360 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 30.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 30.45, |
|
"learning_rate": 4.925213675213676e-05, |
|
"loss": 0.3566, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3481433689594269, |
|
"eval_runtime": 4.3417, |
|
"eval_samples_per_second": 63.799, |
|
"eval_steps_per_second": 8.061, |
|
"step": 9672 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 31.0, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.34804922342300415, |
|
"eval_runtime": 4.3401, |
|
"eval_samples_per_second": 63.824, |
|
"eval_steps_per_second": 8.064, |
|
"step": 9984 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 32.0, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 4.6581196581196586e-05, |
|
"loss": 0.3552, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3479154706001282, |
|
"eval_runtime": 4.3362, |
|
"eval_samples_per_second": 63.881, |
|
"eval_steps_per_second": 8.072, |
|
"step": 10296 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 33.0, |
|
"step": 10296 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"learning_rate": 4.3910256410256415e-05, |
|
"loss": 0.3558, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.34827256202697754, |
|
"eval_runtime": 4.3393, |
|
"eval_samples_per_second": 63.835, |
|
"eval_steps_per_second": 8.066, |
|
"step": 10608 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 34.0, |
|
"step": 10608 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3481837213039398, |
|
"eval_runtime": 4.3385, |
|
"eval_samples_per_second": 63.847, |
|
"eval_steps_per_second": 8.067, |
|
"step": 10920 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 35.0, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 4.123931623931624e-05, |
|
"loss": 0.3553, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3493901193141937, |
|
"eval_runtime": 4.3365, |
|
"eval_samples_per_second": 63.876, |
|
"eval_steps_per_second": 8.071, |
|
"step": 11232 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 36.0, |
|
"step": 11232 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 3.856837606837607e-05, |
|
"loss": 0.3546, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3477574288845062, |
|
"eval_runtime": 4.333, |
|
"eval_samples_per_second": 63.928, |
|
"eval_steps_per_second": 8.078, |
|
"step": 11544 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 37.0, |
|
"step": 11544 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3490616977214813, |
|
"eval_runtime": 4.3368, |
|
"eval_samples_per_second": 63.872, |
|
"eval_steps_per_second": 8.071, |
|
"step": 11856 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 38.0, |
|
"step": 11856 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 3.58974358974359e-05, |
|
"loss": 0.3558, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3478951156139374, |
|
"eval_runtime": 4.3361, |
|
"eval_samples_per_second": 63.882, |
|
"eval_steps_per_second": 8.072, |
|
"step": 12168 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 39.0, |
|
"step": 12168 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.34864434599876404, |
|
"eval_runtime": 4.338, |
|
"eval_samples_per_second": 63.855, |
|
"eval_steps_per_second": 8.068, |
|
"step": 12480 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 40.0, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 40.06, |
|
"learning_rate": 3.3226495726495725e-05, |
|
"loss": 0.3558, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.34804537892341614, |
|
"eval_runtime": 4.3347, |
|
"eval_samples_per_second": 63.903, |
|
"eval_steps_per_second": 8.074, |
|
"step": 12792 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 41.0, |
|
"step": 12792 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.3551, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3495274782180786, |
|
"eval_runtime": 4.3376, |
|
"eval_samples_per_second": 63.86, |
|
"eval_steps_per_second": 8.069, |
|
"step": 13104 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 42.0, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3478722870349884, |
|
"eval_runtime": 4.3353, |
|
"eval_samples_per_second": 63.894, |
|
"eval_steps_per_second": 8.073, |
|
"step": 13416 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 43.0, |
|
"step": 13416 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"learning_rate": 2.7884615384615386e-05, |
|
"loss": 0.3563, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3480492830276489, |
|
"eval_runtime": 4.3398, |
|
"eval_samples_per_second": 63.828, |
|
"eval_steps_per_second": 8.065, |
|
"step": 13728 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 44.0, |
|
"step": 13728 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 2.5213675213675215e-05, |
|
"loss": 0.3549, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3502560257911682, |
|
"eval_runtime": 4.342, |
|
"eval_samples_per_second": 63.795, |
|
"eval_steps_per_second": 8.061, |
|
"step": 14040 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 45.0, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.34895843267440796, |
|
"eval_runtime": 4.3397, |
|
"eval_samples_per_second": 63.83, |
|
"eval_steps_per_second": 8.065, |
|
"step": 14352 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 46.0, |
|
"step": 14352 |
|
}, |
|
{ |
|
"epoch": 46.47, |
|
"learning_rate": 2.2542735042735044e-05, |
|
"loss": 0.355, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.34932848811149597, |
|
"eval_runtime": 4.3405, |
|
"eval_samples_per_second": 63.817, |
|
"eval_steps_per_second": 8.064, |
|
"step": 14664 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 47.0, |
|
"step": 14664 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3478783965110779, |
|
"eval_runtime": 4.3362, |
|
"eval_samples_per_second": 63.882, |
|
"eval_steps_per_second": 8.072, |
|
"step": 14976 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 48.0, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 1.987179487179487e-05, |
|
"loss": 0.3551, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.348417192697525, |
|
"eval_runtime": 4.3401, |
|
"eval_samples_per_second": 63.824, |
|
"eval_steps_per_second": 8.064, |
|
"step": 15288 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 49.0, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 49.68, |
|
"learning_rate": 1.7200854700854702e-05, |
|
"loss": 0.3558, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3478577435016632, |
|
"eval_runtime": 4.3354, |
|
"eval_samples_per_second": 63.893, |
|
"eval_steps_per_second": 8.073, |
|
"step": 15600 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 50.0, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3480057716369629, |
|
"eval_runtime": 4.3382, |
|
"eval_samples_per_second": 63.852, |
|
"eval_steps_per_second": 8.068, |
|
"step": 15912 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 51.0, |
|
"step": 15912 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 1.4529914529914531e-05, |
|
"loss": 0.3542, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.34878429770469666, |
|
"eval_runtime": 4.338, |
|
"eval_samples_per_second": 63.855, |
|
"eval_steps_per_second": 8.068, |
|
"step": 16224 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 52.0, |
|
"step": 16224 |
|
}, |
|
{ |
|
"epoch": 52.88, |
|
"learning_rate": 1.1858974358974359e-05, |
|
"loss": 0.3553, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3482593595981598, |
|
"eval_runtime": 4.3388, |
|
"eval_samples_per_second": 63.842, |
|
"eval_steps_per_second": 8.067, |
|
"step": 16536 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 53.0, |
|
"step": 16536 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3484840393066406, |
|
"eval_runtime": 4.3395, |
|
"eval_samples_per_second": 63.833, |
|
"eval_steps_per_second": 8.065, |
|
"step": 16848 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 54.0, |
|
"step": 16848 |
|
}, |
|
{ |
|
"epoch": 54.49, |
|
"learning_rate": 9.18803418803419e-06, |
|
"loss": 0.3544, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3480793237686157, |
|
"eval_runtime": 4.3369, |
|
"eval_samples_per_second": 63.87, |
|
"eval_steps_per_second": 8.07, |
|
"step": 17160 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 55.0, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.34800031781196594, |
|
"eval_runtime": 4.3409, |
|
"eval_samples_per_second": 63.812, |
|
"eval_steps_per_second": 8.063, |
|
"step": 17472 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 56.0, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 56.09, |
|
"learning_rate": 6.517094017094018e-06, |
|
"loss": 0.3549, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.3482745885848999, |
|
"eval_runtime": 4.3469, |
|
"eval_samples_per_second": 63.723, |
|
"eval_steps_per_second": 8.052, |
|
"step": 17784 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 57.0, |
|
"step": 17784 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 3.846153846153847e-06, |
|
"loss": 0.3544, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.34807997941970825, |
|
"eval_runtime": 4.3307, |
|
"eval_samples_per_second": 63.962, |
|
"eval_steps_per_second": 8.082, |
|
"step": 18096 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 58.0, |
|
"step": 18096 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.34806886315345764, |
|
"eval_runtime": 4.3268, |
|
"eval_samples_per_second": 64.02, |
|
"eval_steps_per_second": 8.089, |
|
"step": 18408 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 59.0, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 59.29, |
|
"learning_rate": 1.1752136752136752e-06, |
|
"loss": 0.3537, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.34800583124160767, |
|
"eval_runtime": 4.33, |
|
"eval_samples_per_second": 63.973, |
|
"eval_steps_per_second": 8.083, |
|
"step": 18720 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 60.0, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 18720, |
|
"total_flos": 7.25686341470208e+16, |
|
"train_loss": 0.35934105734539845, |
|
"train_runtime": 4012.3556, |
|
"train_samples_per_second": 37.235, |
|
"train_steps_per_second": 4.666 |
|
} |
|
], |
|
"max_steps": 18720, |
|
"num_train_epochs": 60, |
|
"total_flos": 7.25686341470208e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|