|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 9360, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4693140794223827, |
|
"eval_loss": 0.554839015007019, |
|
"eval_runtime": 4.3616, |
|
"eval_samples_per_second": 63.509, |
|
"eval_steps_per_second": 8.025, |
|
"step": 156 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.4693140794223827, |
|
"epoch": 1.0, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.48375451263537905, |
|
"eval_loss": 0.5564807653427124, |
|
"eval_runtime": 4.4769, |
|
"eval_samples_per_second": 61.873, |
|
"eval_steps_per_second": 7.818, |
|
"step": 312 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.48375451263537905, |
|
"epoch": 2.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.5531193017959595, |
|
"eval_runtime": 4.5245, |
|
"eval_samples_per_second": 61.222, |
|
"eval_steps_per_second": 7.736, |
|
"step": 468 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.48375451263537905, |
|
"epoch": 3.0, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.0028397435897435895, |
|
"loss": 0.6259, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.5810030698776245, |
|
"eval_runtime": 4.5312, |
|
"eval_samples_per_second": 61.131, |
|
"eval_steps_per_second": 7.724, |
|
"step": 624 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.48375451263537905, |
|
"epoch": 4.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5595667870036101, |
|
"eval_loss": 0.6009803414344788, |
|
"eval_runtime": 4.5226, |
|
"eval_samples_per_second": 61.249, |
|
"eval_steps_per_second": 7.739, |
|
"step": 780 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 5.0, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6462093862815884, |
|
"eval_loss": 0.49691498279571533, |
|
"eval_runtime": 4.5117, |
|
"eval_samples_per_second": 61.396, |
|
"eval_steps_per_second": 7.758, |
|
"step": 936 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.6462093862815884, |
|
"epoch": 6.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.00267948717948718, |
|
"loss": 0.5907, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5487364620938628, |
|
"eval_loss": 0.7981559634208679, |
|
"eval_runtime": 4.518, |
|
"eval_samples_per_second": 61.31, |
|
"eval_steps_per_second": 7.747, |
|
"step": 1092 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.6462093862815884, |
|
"epoch": 7.0, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.631768953068592, |
|
"eval_loss": 0.4882575571537018, |
|
"eval_runtime": 4.5152, |
|
"eval_samples_per_second": 61.349, |
|
"eval_steps_per_second": 7.752, |
|
"step": 1248 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.6462093862815884, |
|
"epoch": 8.0, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6931407942238267, |
|
"eval_loss": 0.4714406132698059, |
|
"eval_runtime": 4.5186, |
|
"eval_samples_per_second": 61.302, |
|
"eval_steps_per_second": 7.746, |
|
"step": 1404 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 9.0, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.0025192307692307693, |
|
"loss": 0.5602, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.555956678700361, |
|
"eval_loss": 0.9236112236976624, |
|
"eval_runtime": 4.512, |
|
"eval_samples_per_second": 61.391, |
|
"eval_steps_per_second": 7.757, |
|
"step": 1560 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 10.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6967509025270758, |
|
"eval_loss": 0.49723225831985474, |
|
"eval_runtime": 4.5165, |
|
"eval_samples_per_second": 61.331, |
|
"eval_steps_per_second": 7.749, |
|
"step": 1716 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 11.0, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.5116312503814697, |
|
"eval_runtime": 4.5179, |
|
"eval_samples_per_second": 61.311, |
|
"eval_steps_per_second": 7.747, |
|
"step": 1872 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 12.0, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.002358974358974359, |
|
"loss": 0.5015, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.4912594258785248, |
|
"eval_runtime": 4.519, |
|
"eval_samples_per_second": 61.296, |
|
"eval_steps_per_second": 7.745, |
|
"step": 2028 |
|
}, |
|
{ |
|
"best_epoch": 12, |
|
"best_eval_accuracy": 0.7075812274368231, |
|
"epoch": 13.0, |
|
"step": 2028 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7111913357400722, |
|
"eval_loss": 0.4682758152484894, |
|
"eval_runtime": 4.5179, |
|
"eval_samples_per_second": 61.311, |
|
"eval_steps_per_second": 7.747, |
|
"step": 2184 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.7111913357400722, |
|
"epoch": 14.0, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.5264927744865417, |
|
"eval_runtime": 4.5188, |
|
"eval_samples_per_second": 61.3, |
|
"eval_steps_per_second": 7.745, |
|
"step": 2340 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.7111913357400722, |
|
"epoch": 15.0, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.4616451859474182, |
|
"eval_runtime": 4.5164, |
|
"eval_samples_per_second": 61.332, |
|
"eval_steps_per_second": 7.749, |
|
"step": 2496 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.7111913357400722, |
|
"epoch": 16.0, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 0.0021987179487179486, |
|
"loss": 0.4782, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6678700361010831, |
|
"eval_loss": 0.5787555575370789, |
|
"eval_runtime": 4.5274, |
|
"eval_samples_per_second": 61.183, |
|
"eval_steps_per_second": 7.731, |
|
"step": 2652 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.7111913357400722, |
|
"epoch": 17.0, |
|
"step": 2652 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.44711267948150635, |
|
"eval_runtime": 4.5354, |
|
"eval_samples_per_second": 61.076, |
|
"eval_steps_per_second": 7.717, |
|
"step": 2808 |
|
}, |
|
{ |
|
"best_epoch": 17, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 18.0, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7545126353790613, |
|
"eval_loss": 0.458781361579895, |
|
"eval_runtime": 4.5233, |
|
"eval_samples_per_second": 61.239, |
|
"eval_steps_per_second": 7.738, |
|
"step": 2964 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7545126353790613, |
|
"epoch": 19.0, |
|
"step": 2964 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.0020384615384615385, |
|
"loss": 0.4628, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6425992779783394, |
|
"eval_loss": 0.6477251648902893, |
|
"eval_runtime": 4.5142, |
|
"eval_samples_per_second": 61.362, |
|
"eval_steps_per_second": 7.753, |
|
"step": 3120 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7545126353790613, |
|
"epoch": 20.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6967509025270758, |
|
"eval_loss": 0.5305241942405701, |
|
"eval_runtime": 4.5156, |
|
"eval_samples_per_second": 61.343, |
|
"eval_steps_per_second": 7.751, |
|
"step": 3276 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7545126353790613, |
|
"epoch": 21.0, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.4549444615840912, |
|
"eval_runtime": 4.5147, |
|
"eval_samples_per_second": 61.355, |
|
"eval_steps_per_second": 7.752, |
|
"step": 3432 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7545126353790613, |
|
"epoch": 22.0, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 0.0018782051282051281, |
|
"loss": 0.4248, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.510131299495697, |
|
"eval_runtime": 4.5083, |
|
"eval_samples_per_second": 61.443, |
|
"eval_steps_per_second": 7.764, |
|
"step": 3588 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7545126353790613, |
|
"epoch": 23.0, |
|
"step": 3588 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7184115523465704, |
|
"eval_loss": 0.47630512714385986, |
|
"eval_runtime": 4.5103, |
|
"eval_samples_per_second": 61.415, |
|
"eval_steps_per_second": 7.76, |
|
"step": 3744 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7545126353790613, |
|
"epoch": 24.0, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.580871045589447, |
|
"eval_runtime": 4.5211, |
|
"eval_samples_per_second": 61.269, |
|
"eval_steps_per_second": 7.742, |
|
"step": 3900 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7545126353790613, |
|
"epoch": 25.0, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.0017179487179487178, |
|
"loss": 0.4067, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.44606083631515503, |
|
"eval_runtime": 4.512, |
|
"eval_samples_per_second": 61.392, |
|
"eval_steps_per_second": 7.757, |
|
"step": 4056 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7545126353790613, |
|
"epoch": 26.0, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.44597113132476807, |
|
"eval_runtime": 4.5146, |
|
"eval_samples_per_second": 61.356, |
|
"eval_steps_per_second": 7.753, |
|
"step": 4212 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7545126353790613, |
|
"epoch": 27.0, |
|
"step": 4212 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7509025270758123, |
|
"eval_loss": 0.4453907907009125, |
|
"eval_runtime": 4.5068, |
|
"eval_samples_per_second": 61.462, |
|
"eval_steps_per_second": 7.766, |
|
"step": 4368 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7545126353790613, |
|
"epoch": 28.0, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 0.0015576923076923079, |
|
"loss": 0.3941, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.4664019048213959, |
|
"eval_runtime": 4.5147, |
|
"eval_samples_per_second": 61.355, |
|
"eval_steps_per_second": 7.752, |
|
"step": 4524 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7545126353790613, |
|
"epoch": 29.0, |
|
"step": 4524 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.5038701295852661, |
|
"eval_runtime": 4.5095, |
|
"eval_samples_per_second": 61.426, |
|
"eval_steps_per_second": 7.761, |
|
"step": 4680 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7545126353790613, |
|
"epoch": 30.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.45480838418006897, |
|
"eval_runtime": 4.5065, |
|
"eval_samples_per_second": 61.467, |
|
"eval_steps_per_second": 7.767, |
|
"step": 4836 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7545126353790613, |
|
"epoch": 31.0, |
|
"step": 4836 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.448424369096756, |
|
"eval_runtime": 4.5029, |
|
"eval_samples_per_second": 61.516, |
|
"eval_steps_per_second": 7.773, |
|
"step": 4992 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7545126353790613, |
|
"epoch": 32.0, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.0013974358974358976, |
|
"loss": 0.3749, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.49240007996559143, |
|
"eval_runtime": 4.5076, |
|
"eval_samples_per_second": 61.452, |
|
"eval_steps_per_second": 7.765, |
|
"step": 5148 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7545126353790613, |
|
"epoch": 33.0, |
|
"step": 5148 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.45687469840049744, |
|
"eval_runtime": 4.5091, |
|
"eval_samples_per_second": 61.432, |
|
"eval_steps_per_second": 7.762, |
|
"step": 5304 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7545126353790613, |
|
"epoch": 34.0, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7617328519855595, |
|
"eval_loss": 0.4603608548641205, |
|
"eval_runtime": 4.5099, |
|
"eval_samples_per_second": 61.42, |
|
"eval_steps_per_second": 7.761, |
|
"step": 5460 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.7617328519855595, |
|
"epoch": 35.0, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 0.0012371794871794872, |
|
"loss": 0.3586, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7653429602888087, |
|
"eval_loss": 0.4447592794895172, |
|
"eval_runtime": 4.5158, |
|
"eval_samples_per_second": 61.34, |
|
"eval_steps_per_second": 7.751, |
|
"step": 5616 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 36.0, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.47678714990615845, |
|
"eval_runtime": 4.5221, |
|
"eval_samples_per_second": 61.254, |
|
"eval_steps_per_second": 7.74, |
|
"step": 5772 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 37.0, |
|
"step": 5772 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.5052057504653931, |
|
"eval_runtime": 4.5169, |
|
"eval_samples_per_second": 61.325, |
|
"eval_steps_per_second": 7.749, |
|
"step": 5928 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 38.0, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.0010769230769230769, |
|
"loss": 0.3521, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.7328519855595668, |
|
"eval_loss": 0.5166675448417664, |
|
"eval_runtime": 4.5231, |
|
"eval_samples_per_second": 61.241, |
|
"eval_steps_per_second": 7.738, |
|
"step": 6084 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 39.0, |
|
"step": 6084 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7509025270758123, |
|
"eval_loss": 0.4424862563610077, |
|
"eval_runtime": 4.5215, |
|
"eval_samples_per_second": 61.263, |
|
"eval_steps_per_second": 7.741, |
|
"step": 6240 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 40.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.7545126353790613, |
|
"eval_loss": 0.47295689582824707, |
|
"eval_runtime": 4.5231, |
|
"eval_samples_per_second": 61.241, |
|
"eval_steps_per_second": 7.738, |
|
"step": 6396 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 41.0, |
|
"step": 6396 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.0009166666666666668, |
|
"loss": 0.3407, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.7509025270758123, |
|
"eval_loss": 0.4623565077781677, |
|
"eval_runtime": 4.5243, |
|
"eval_samples_per_second": 61.225, |
|
"eval_steps_per_second": 7.736, |
|
"step": 6552 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 42.0, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.7509025270758123, |
|
"eval_loss": 0.4847128987312317, |
|
"eval_runtime": 4.5246, |
|
"eval_samples_per_second": 61.221, |
|
"eval_steps_per_second": 7.736, |
|
"step": 6708 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 43.0, |
|
"step": 6708 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7328519855595668, |
|
"eval_loss": 0.5370722413063049, |
|
"eval_runtime": 4.5236, |
|
"eval_samples_per_second": 61.234, |
|
"eval_steps_per_second": 7.737, |
|
"step": 6864 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 44.0, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 0.0007564102564102564, |
|
"loss": 0.3329, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.7545126353790613, |
|
"eval_loss": 0.48414450883865356, |
|
"eval_runtime": 4.531, |
|
"eval_samples_per_second": 61.134, |
|
"eval_steps_per_second": 7.725, |
|
"step": 7020 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 45.0, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.4815019369125366, |
|
"eval_runtime": 4.5295, |
|
"eval_samples_per_second": 61.154, |
|
"eval_steps_per_second": 7.727, |
|
"step": 7176 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 46.0, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.7509025270758123, |
|
"eval_loss": 0.4678334891796112, |
|
"eval_runtime": 4.5262, |
|
"eval_samples_per_second": 61.199, |
|
"eval_steps_per_second": 7.733, |
|
"step": 7332 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 47.0, |
|
"step": 7332 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.49180135130882263, |
|
"eval_runtime": 4.527, |
|
"eval_samples_per_second": 61.188, |
|
"eval_steps_per_second": 7.731, |
|
"step": 7488 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 48.0, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 0.0005961538461538461, |
|
"loss": 0.3235, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.7581227436823105, |
|
"eval_loss": 0.459226131439209, |
|
"eval_runtime": 4.528, |
|
"eval_samples_per_second": 61.175, |
|
"eval_steps_per_second": 7.73, |
|
"step": 7644 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 49.0, |
|
"step": 7644 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.5004814267158508, |
|
"eval_runtime": 4.529, |
|
"eval_samples_per_second": 61.162, |
|
"eval_steps_per_second": 7.728, |
|
"step": 7800 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 50.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.7545126353790613, |
|
"eval_loss": 0.4776909351348877, |
|
"eval_runtime": 4.5302, |
|
"eval_samples_per_second": 61.145, |
|
"eval_steps_per_second": 7.726, |
|
"step": 7956 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 51.0, |
|
"step": 7956 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 0.00043589743589743596, |
|
"loss": 0.3193, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.7545126353790613, |
|
"eval_loss": 0.45583781599998474, |
|
"eval_runtime": 4.5285, |
|
"eval_samples_per_second": 61.168, |
|
"eval_steps_per_second": 7.729, |
|
"step": 8112 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 52.0, |
|
"step": 8112 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.48702728748321533, |
|
"eval_runtime": 4.5349, |
|
"eval_samples_per_second": 61.082, |
|
"eval_steps_per_second": 7.718, |
|
"step": 8268 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 53.0, |
|
"step": 8268 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.47922226786613464, |
|
"eval_runtime": 4.5301, |
|
"eval_samples_per_second": 61.146, |
|
"eval_steps_per_second": 7.726, |
|
"step": 8424 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 54.0, |
|
"step": 8424 |
|
}, |
|
{ |
|
"epoch": 54.49, |
|
"learning_rate": 0.0002756410256410257, |
|
"loss": 0.3132, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.46733495593070984, |
|
"eval_runtime": 4.5359, |
|
"eval_samples_per_second": 61.069, |
|
"eval_steps_per_second": 7.716, |
|
"step": 8580 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 55.0, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.4943315386772156, |
|
"eval_runtime": 4.5312, |
|
"eval_samples_per_second": 61.132, |
|
"eval_steps_per_second": 7.724, |
|
"step": 8736 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 56.0, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.49698954820632935, |
|
"eval_runtime": 4.5298, |
|
"eval_samples_per_second": 61.151, |
|
"eval_steps_per_second": 7.727, |
|
"step": 8892 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 57.0, |
|
"step": 8892 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 0.0001153846153846154, |
|
"loss": 0.311, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.740072202166065, |
|
"eval_loss": 0.49142444133758545, |
|
"eval_runtime": 4.5346, |
|
"eval_samples_per_second": 61.086, |
|
"eval_steps_per_second": 7.718, |
|
"step": 9048 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 58.0, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.4886566698551178, |
|
"eval_runtime": 4.5139, |
|
"eval_samples_per_second": 61.367, |
|
"eval_steps_per_second": 7.754, |
|
"step": 9204 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 59.0, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.48362648487091064, |
|
"eval_runtime": 4.5024, |
|
"eval_samples_per_second": 61.522, |
|
"eval_steps_per_second": 7.774, |
|
"step": 9360 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 60.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 9360, |
|
"total_flos": 6.96152728406016e+16, |
|
"train_loss": 0.41104679596729765, |
|
"train_runtime": 3875.9051, |
|
"train_samples_per_second": 38.546, |
|
"train_steps_per_second": 2.415 |
|
} |
|
], |
|
"max_steps": 9360, |
|
"num_train_epochs": 60, |
|
"total_flos": 6.96152728406016e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|