|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 18720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 23.58551788330078, |
|
"eval_runtime": 6.7563, |
|
"eval_samples_per_second": 40.999, |
|
"eval_steps_per_second": 5.18, |
|
"step": 312 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 1.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.009732905982905982, |
|
"loss": 27.3295, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 15.767162322998047, |
|
"eval_runtime": 6.7306, |
|
"eval_samples_per_second": 41.155, |
|
"eval_steps_per_second": 5.2, |
|
"step": 624 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 2.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 14.181631088256836, |
|
"eval_runtime": 6.7376, |
|
"eval_samples_per_second": 41.113, |
|
"eval_steps_per_second": 5.195, |
|
"step": 936 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 3.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.009465811965811966, |
|
"loss": 19.6736, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 13.581140518188477, |
|
"eval_runtime": 6.8422, |
|
"eval_samples_per_second": 40.484, |
|
"eval_steps_per_second": 5.115, |
|
"step": 1248 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 4.0, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.009198717948717948, |
|
"loss": 18.8481, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 13.385133743286133, |
|
"eval_runtime": 6.799, |
|
"eval_samples_per_second": 40.741, |
|
"eval_steps_per_second": 5.148, |
|
"step": 1560 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 5.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 13.019899368286133, |
|
"eval_runtime": 6.7829, |
|
"eval_samples_per_second": 40.838, |
|
"eval_steps_per_second": 5.16, |
|
"step": 1872 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 6.0, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.008931623931623932, |
|
"loss": 18.5899, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.48375451263537905, |
|
"eval_loss": 12.949692726135254, |
|
"eval_runtime": 6.8051, |
|
"eval_samples_per_second": 40.705, |
|
"eval_steps_per_second": 5.143, |
|
"step": 2184 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 7.0, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.996054649353027, |
|
"eval_runtime": 6.7778, |
|
"eval_samples_per_second": 40.869, |
|
"eval_steps_per_second": 5.164, |
|
"step": 2496 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 8.0, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.008664529914529914, |
|
"loss": 18.473, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.827549934387207, |
|
"eval_runtime": 6.7783, |
|
"eval_samples_per_second": 40.866, |
|
"eval_steps_per_second": 5.164, |
|
"step": 2808 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 9.0, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.008397435897435898, |
|
"loss": 18.3073, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.699183464050293, |
|
"eval_runtime": 6.8687, |
|
"eval_samples_per_second": 40.328, |
|
"eval_steps_per_second": 5.096, |
|
"step": 3120 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 10.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 13.515963554382324, |
|
"eval_runtime": 6.7643, |
|
"eval_samples_per_second": 40.95, |
|
"eval_steps_per_second": 5.174, |
|
"step": 3432 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 11.0, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 0.00813034188034188, |
|
"loss": 18.2739, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5306859205776173, |
|
"eval_loss": 12.67311954498291, |
|
"eval_runtime": 6.8268, |
|
"eval_samples_per_second": 40.575, |
|
"eval_steps_per_second": 5.127, |
|
"step": 3744 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 12.0, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.007863247863247862, |
|
"loss": 18.1236, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.606614112854004, |
|
"eval_runtime": 6.7237, |
|
"eval_samples_per_second": 41.198, |
|
"eval_steps_per_second": 5.205, |
|
"step": 4056 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 13.0, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.580245971679688, |
|
"eval_runtime": 6.7327, |
|
"eval_samples_per_second": 41.143, |
|
"eval_steps_per_second": 5.199, |
|
"step": 4368 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 14.0, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 0.007596153846153846, |
|
"loss": 18.1096, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 12.644716262817383, |
|
"eval_runtime": 6.8476, |
|
"eval_samples_per_second": 40.452, |
|
"eval_steps_per_second": 5.111, |
|
"step": 4680 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 15.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 13.309383392333984, |
|
"eval_runtime": 6.7717, |
|
"eval_samples_per_second": 40.906, |
|
"eval_steps_per_second": 5.169, |
|
"step": 4992 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 16.0, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 0.007329059829059828, |
|
"loss": 18.1134, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 13.096991539001465, |
|
"eval_runtime": 6.9132, |
|
"eval_samples_per_second": 40.068, |
|
"eval_steps_per_second": 5.063, |
|
"step": 5304 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 17.0, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 0.007061965811965812, |
|
"loss": 18.1098, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 12.729278564453125, |
|
"eval_runtime": 6.7858, |
|
"eval_samples_per_second": 40.821, |
|
"eval_steps_per_second": 5.158, |
|
"step": 5616 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 18.0, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 12.616609573364258, |
|
"eval_runtime": 6.7613, |
|
"eval_samples_per_second": 40.968, |
|
"eval_steps_per_second": 5.177, |
|
"step": 5928 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 19.0, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.006794871794871795, |
|
"loss": 18.0277, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.560620307922363, |
|
"eval_runtime": 6.7972, |
|
"eval_samples_per_second": 40.752, |
|
"eval_steps_per_second": 5.149, |
|
"step": 6240 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 20.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"learning_rate": 0.006527777777777778, |
|
"loss": 18.0289, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.532211303710938, |
|
"eval_runtime": 6.7373, |
|
"eval_samples_per_second": 41.114, |
|
"eval_steps_per_second": 5.195, |
|
"step": 6552 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 21.0, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 12.734131813049316, |
|
"eval_runtime": 6.7144, |
|
"eval_samples_per_second": 41.255, |
|
"eval_steps_per_second": 5.213, |
|
"step": 6864 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 22.0, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 0.006260683760683761, |
|
"loss": 18.0223, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.549654960632324, |
|
"eval_runtime": 6.7477, |
|
"eval_samples_per_second": 41.051, |
|
"eval_steps_per_second": 5.187, |
|
"step": 7176 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 23.0, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 12.419885635375977, |
|
"eval_runtime": 6.7465, |
|
"eval_samples_per_second": 41.058, |
|
"eval_steps_per_second": 5.188, |
|
"step": 7488 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 24.0, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 0.005993589743589743, |
|
"loss": 17.9317, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 12.786826133728027, |
|
"eval_runtime": 6.8039, |
|
"eval_samples_per_second": 40.712, |
|
"eval_steps_per_second": 5.144, |
|
"step": 7800 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 25.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.005726495726495726, |
|
"loss": 17.9107, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.329524040222168, |
|
"eval_runtime": 6.7093, |
|
"eval_samples_per_second": 41.286, |
|
"eval_steps_per_second": 5.217, |
|
"step": 8112 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 26.0, |
|
"step": 8112 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.603827476501465, |
|
"eval_runtime": 6.715, |
|
"eval_samples_per_second": 41.251, |
|
"eval_steps_per_second": 5.212, |
|
"step": 8424 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 27.0, |
|
"step": 8424 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 0.005459401709401709, |
|
"loss": 17.8944, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 12.332864761352539, |
|
"eval_runtime": 6.7174, |
|
"eval_samples_per_second": 41.236, |
|
"eval_steps_per_second": 5.21, |
|
"step": 8736 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 28.0, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 0.005192307692307693, |
|
"loss": 17.8667, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 12.303411483764648, |
|
"eval_runtime": 6.7285, |
|
"eval_samples_per_second": 41.168, |
|
"eval_steps_per_second": 5.202, |
|
"step": 9048 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 29.0, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 12.460461616516113, |
|
"eval_runtime": 6.8018, |
|
"eval_samples_per_second": 40.725, |
|
"eval_steps_per_second": 5.146, |
|
"step": 9360 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 30.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 30.45, |
|
"learning_rate": 0.004925213675213675, |
|
"loss": 17.8228, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.51104736328125, |
|
"eval_runtime": 6.7123, |
|
"eval_samples_per_second": 41.268, |
|
"eval_steps_per_second": 5.214, |
|
"step": 9672 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 31.0, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 12.422727584838867, |
|
"eval_runtime": 6.7496, |
|
"eval_samples_per_second": 41.039, |
|
"eval_steps_per_second": 5.185, |
|
"step": 9984 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 32.0, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.004658119658119658, |
|
"loss": 17.8006, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.297224044799805, |
|
"eval_runtime": 6.7368, |
|
"eval_samples_per_second": 41.118, |
|
"eval_steps_per_second": 5.195, |
|
"step": 10296 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 33.0, |
|
"step": 10296 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"learning_rate": 0.004391025641025641, |
|
"loss": 17.76, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.301063537597656, |
|
"eval_runtime": 6.7413, |
|
"eval_samples_per_second": 41.09, |
|
"eval_steps_per_second": 5.192, |
|
"step": 10608 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 34.0, |
|
"step": 10608 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.217933654785156, |
|
"eval_runtime": 6.8901, |
|
"eval_samples_per_second": 40.203, |
|
"eval_steps_per_second": 5.08, |
|
"step": 10920 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 35.0, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 0.004123931623931624, |
|
"loss": 17.7564, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.238066673278809, |
|
"eval_runtime": 6.8926, |
|
"eval_samples_per_second": 40.188, |
|
"eval_steps_per_second": 5.078, |
|
"step": 11232 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 36.0, |
|
"step": 11232 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 0.003856837606837607, |
|
"loss": 17.7084, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.874678611755371, |
|
"eval_runtime": 6.8036, |
|
"eval_samples_per_second": 40.714, |
|
"eval_steps_per_second": 5.144, |
|
"step": 11544 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 37.0, |
|
"step": 11544 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.194494247436523, |
|
"eval_runtime": 6.9457, |
|
"eval_samples_per_second": 39.881, |
|
"eval_steps_per_second": 5.039, |
|
"step": 11856 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 38.0, |
|
"step": 11856 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.0035897435897435897, |
|
"loss": 17.7035, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.21796703338623, |
|
"eval_runtime": 6.8901, |
|
"eval_samples_per_second": 40.202, |
|
"eval_steps_per_second": 5.08, |
|
"step": 12168 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 39.0, |
|
"step": 12168 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.282953262329102, |
|
"eval_runtime": 6.7911, |
|
"eval_samples_per_second": 40.788, |
|
"eval_steps_per_second": 5.154, |
|
"step": 12480 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 40.0, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 40.06, |
|
"learning_rate": 0.0033226495726495727, |
|
"loss": 17.6668, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.4693140794223827, |
|
"eval_loss": 12.185731887817383, |
|
"eval_runtime": 6.8018, |
|
"eval_samples_per_second": 40.724, |
|
"eval_steps_per_second": 5.146, |
|
"step": 12792 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 41.0, |
|
"step": 12792 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.0030555555555555557, |
|
"loss": 17.6396, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.5379061371841155, |
|
"eval_loss": 12.2239408493042, |
|
"eval_runtime": 6.8368, |
|
"eval_samples_per_second": 40.516, |
|
"eval_steps_per_second": 5.119, |
|
"step": 13104 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 42.0, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 12.158435821533203, |
|
"eval_runtime": 6.7914, |
|
"eval_samples_per_second": 40.787, |
|
"eval_steps_per_second": 5.154, |
|
"step": 13416 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 43.0, |
|
"step": 13416 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"learning_rate": 0.0027884615384615387, |
|
"loss": 17.6452, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.318547248840332, |
|
"eval_runtime": 6.7753, |
|
"eval_samples_per_second": 40.884, |
|
"eval_steps_per_second": 5.166, |
|
"step": 13728 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 44.0, |
|
"step": 13728 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 0.0025213675213675213, |
|
"loss": 17.6074, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 12.24209213256836, |
|
"eval_runtime": 6.8413, |
|
"eval_samples_per_second": 40.489, |
|
"eval_steps_per_second": 5.116, |
|
"step": 14040 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 45.0, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.191246032714844, |
|
"eval_runtime": 6.7445, |
|
"eval_samples_per_second": 41.071, |
|
"eval_steps_per_second": 5.189, |
|
"step": 14352 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 46.0, |
|
"step": 14352 |
|
}, |
|
{ |
|
"epoch": 46.47, |
|
"learning_rate": 0.0022542735042735042, |
|
"loss": 17.6167, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 12.202165603637695, |
|
"eval_runtime": 6.7512, |
|
"eval_samples_per_second": 41.03, |
|
"eval_steps_per_second": 5.184, |
|
"step": 14664 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 47.0, |
|
"step": 14664 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.13256549835205, |
|
"eval_runtime": 6.7893, |
|
"eval_samples_per_second": 40.799, |
|
"eval_steps_per_second": 5.155, |
|
"step": 14976 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 48.0, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 0.0019871794871794872, |
|
"loss": 17.5782, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.155043601989746, |
|
"eval_runtime": 6.8327, |
|
"eval_samples_per_second": 40.54, |
|
"eval_steps_per_second": 5.122, |
|
"step": 15288 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 49.0, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 49.68, |
|
"learning_rate": 0.0017200854700854702, |
|
"loss": 17.562, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 12.225010871887207, |
|
"eval_runtime": 6.7916, |
|
"eval_samples_per_second": 40.786, |
|
"eval_steps_per_second": 5.153, |
|
"step": 15600 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 50.0, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.119040489196777, |
|
"eval_runtime": 6.8127, |
|
"eval_samples_per_second": 40.659, |
|
"eval_steps_per_second": 5.137, |
|
"step": 15912 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 51.0, |
|
"step": 15912 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 0.0014529914529914532, |
|
"loss": 17.5409, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 12.150501251220703, |
|
"eval_runtime": 6.7591, |
|
"eval_samples_per_second": 40.982, |
|
"eval_steps_per_second": 5.178, |
|
"step": 16224 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 52.0, |
|
"step": 16224 |
|
}, |
|
{ |
|
"epoch": 52.88, |
|
"learning_rate": 0.001185897435897436, |
|
"loss": 17.5211, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.104633331298828, |
|
"eval_runtime": 6.8314, |
|
"eval_samples_per_second": 40.548, |
|
"eval_steps_per_second": 5.123, |
|
"step": 16536 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 53.0, |
|
"step": 16536 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 12.113192558288574, |
|
"eval_runtime": 6.8331, |
|
"eval_samples_per_second": 40.538, |
|
"eval_steps_per_second": 5.122, |
|
"step": 16848 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 54.0, |
|
"step": 16848 |
|
}, |
|
{ |
|
"epoch": 54.49, |
|
"learning_rate": 0.0009188034188034189, |
|
"loss": 17.5043, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.11585807800293, |
|
"eval_runtime": 6.8139, |
|
"eval_samples_per_second": 40.652, |
|
"eval_steps_per_second": 5.137, |
|
"step": 17160 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 55.0, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 12.108481407165527, |
|
"eval_runtime": 6.7999, |
|
"eval_samples_per_second": 40.736, |
|
"eval_steps_per_second": 5.147, |
|
"step": 17472 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 56.0, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 56.09, |
|
"learning_rate": 0.0006517094017094017, |
|
"loss": 17.4952, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.102400779724121, |
|
"eval_runtime": 6.9307, |
|
"eval_samples_per_second": 39.967, |
|
"eval_steps_per_second": 5.05, |
|
"step": 17784 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 57.0, |
|
"step": 17784 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 0.00038461538461538467, |
|
"loss": 17.4731, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.095499992370605, |
|
"eval_runtime": 6.771, |
|
"eval_samples_per_second": 40.91, |
|
"eval_steps_per_second": 5.169, |
|
"step": 18096 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 58.0, |
|
"step": 18096 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 12.09808349609375, |
|
"eval_runtime": 6.7995, |
|
"eval_samples_per_second": 40.738, |
|
"eval_steps_per_second": 5.147, |
|
"step": 18408 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 59.0, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 59.29, |
|
"learning_rate": 0.00011752136752136752, |
|
"loss": 17.4654, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.09249496459961, |
|
"eval_runtime": 3.2881, |
|
"eval_samples_per_second": 84.244, |
|
"eval_steps_per_second": 10.645, |
|
"step": 18720 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 60.0, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 18720, |
|
"total_flos": 6.96152728406016e+16, |
|
"train_loss": 18.167257181803386, |
|
"train_runtime": 6211.4613, |
|
"train_samples_per_second": 24.052, |
|
"train_steps_per_second": 3.014 |
|
} |
|
], |
|
"max_steps": 18720, |
|
"num_train_epochs": 60, |
|
"total_flos": 6.96152728406016e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|