|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 9360, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5306859205776173, |
|
"eval_loss": 0.4740632474422455, |
|
"eval_runtime": 9.255, |
|
"eval_samples_per_second": 29.93, |
|
"eval_steps_per_second": 3.782, |
|
"step": 156 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 1.0, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5090252707581228, |
|
"eval_loss": 0.3849472999572754, |
|
"eval_runtime": 9.2931, |
|
"eval_samples_per_second": 29.807, |
|
"eval_steps_per_second": 3.766, |
|
"step": 312 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 2.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.434469997882843, |
|
"eval_runtime": 9.1899, |
|
"eval_samples_per_second": 30.142, |
|
"eval_steps_per_second": 3.809, |
|
"step": 468 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 3.0, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.0037863247863247863, |
|
"loss": 0.5496, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5234657039711191, |
|
"eval_loss": 0.4748525321483612, |
|
"eval_runtime": 9.2109, |
|
"eval_samples_per_second": 30.073, |
|
"eval_steps_per_second": 3.8, |
|
"step": 624 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 4.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5342960288808665, |
|
"eval_loss": 0.41381731629371643, |
|
"eval_runtime": 9.2454, |
|
"eval_samples_per_second": 29.961, |
|
"eval_steps_per_second": 3.786, |
|
"step": 780 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 5.0, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5631768953068592, |
|
"eval_loss": 0.3598599135875702, |
|
"eval_runtime": 3.6132, |
|
"eval_samples_per_second": 76.664, |
|
"eval_steps_per_second": 9.687, |
|
"step": 936 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5631768953068592, |
|
"epoch": 6.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.003572649572649573, |
|
"loss": 0.4365, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5631768953068592, |
|
"eval_loss": 0.395423024892807, |
|
"eval_runtime": 9.3371, |
|
"eval_samples_per_second": 29.666, |
|
"eval_steps_per_second": 3.748, |
|
"step": 1092 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5631768953068592, |
|
"epoch": 7.0, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5018050541516246, |
|
"eval_loss": 0.34552210569381714, |
|
"eval_runtime": 9.3479, |
|
"eval_samples_per_second": 29.632, |
|
"eval_steps_per_second": 3.744, |
|
"step": 1248 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5631768953068592, |
|
"epoch": 8.0, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5776173285198556, |
|
"eval_loss": 0.39849764108657837, |
|
"eval_runtime": 9.338, |
|
"eval_samples_per_second": 29.664, |
|
"eval_steps_per_second": 3.748, |
|
"step": 1404 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.5776173285198556, |
|
"epoch": 9.0, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.003358974358974359, |
|
"loss": 0.4109, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5992779783393501, |
|
"eval_loss": 0.3827930986881256, |
|
"eval_runtime": 9.2193, |
|
"eval_samples_per_second": 30.046, |
|
"eval_steps_per_second": 3.796, |
|
"step": 1560 |
|
}, |
|
{ |
|
"best_epoch": 9, |
|
"best_eval_accuracy": 0.5992779783393501, |
|
"epoch": 10.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.4339348375797272, |
|
"eval_runtime": 9.1599, |
|
"eval_samples_per_second": 30.24, |
|
"eval_steps_per_second": 3.821, |
|
"step": 1716 |
|
}, |
|
{ |
|
"best_epoch": 9, |
|
"best_eval_accuracy": 0.5992779783393501, |
|
"epoch": 11.0, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5379061371841155, |
|
"eval_loss": 0.34316927194595337, |
|
"eval_runtime": 8.9867, |
|
"eval_samples_per_second": 30.823, |
|
"eval_steps_per_second": 3.895, |
|
"step": 1872 |
|
}, |
|
{ |
|
"best_epoch": 9, |
|
"best_eval_accuracy": 0.5992779783393501, |
|
"epoch": 12.0, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.0031452991452991454, |
|
"loss": 0.3611, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6137184115523465, |
|
"eval_loss": 0.33951735496520996, |
|
"eval_runtime": 9.3311, |
|
"eval_samples_per_second": 29.686, |
|
"eval_steps_per_second": 3.751, |
|
"step": 2028 |
|
}, |
|
{ |
|
"best_epoch": 12, |
|
"best_eval_accuracy": 0.6137184115523465, |
|
"epoch": 13.0, |
|
"step": 2028 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6714801444043321, |
|
"eval_loss": 0.3404270112514496, |
|
"eval_runtime": 9.3325, |
|
"eval_samples_per_second": 29.681, |
|
"eval_steps_per_second": 3.75, |
|
"step": 2184 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.6714801444043321, |
|
"epoch": 14.0, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6570397111913358, |
|
"eval_loss": 0.3395780026912689, |
|
"eval_runtime": 9.3018, |
|
"eval_samples_per_second": 29.779, |
|
"eval_steps_per_second": 3.763, |
|
"step": 2340 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.6714801444043321, |
|
"epoch": 15.0, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6353790613718412, |
|
"eval_loss": 0.3856579065322876, |
|
"eval_runtime": 9.3146, |
|
"eval_samples_per_second": 29.738, |
|
"eval_steps_per_second": 3.758, |
|
"step": 2496 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.6714801444043321, |
|
"epoch": 16.0, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 0.0029316239316239316, |
|
"loss": 0.3456, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.348003625869751, |
|
"eval_runtime": 9.3161, |
|
"eval_samples_per_second": 29.734, |
|
"eval_steps_per_second": 3.757, |
|
"step": 2652 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.6895306859205776, |
|
"epoch": 17.0, |
|
"step": 2652 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.33484789729118347, |
|
"eval_runtime": 9.253, |
|
"eval_samples_per_second": 29.936, |
|
"eval_steps_per_second": 3.783, |
|
"step": 2808 |
|
}, |
|
{ |
|
"best_epoch": 17, |
|
"best_eval_accuracy": 0.703971119133574, |
|
"epoch": 18.0, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.6425992779783394, |
|
"eval_loss": 0.33233121037483215, |
|
"eval_runtime": 9.3192, |
|
"eval_samples_per_second": 29.724, |
|
"eval_steps_per_second": 3.756, |
|
"step": 2964 |
|
}, |
|
{ |
|
"best_epoch": 17, |
|
"best_eval_accuracy": 0.703971119133574, |
|
"epoch": 19.0, |
|
"step": 2964 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.0027179487179487182, |
|
"loss": 0.3391, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6714801444043321, |
|
"eval_loss": 0.359100878238678, |
|
"eval_runtime": 9.2605, |
|
"eval_samples_per_second": 29.912, |
|
"eval_steps_per_second": 3.78, |
|
"step": 3120 |
|
}, |
|
{ |
|
"best_epoch": 17, |
|
"best_eval_accuracy": 0.703971119133574, |
|
"epoch": 20.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.33779582381248474, |
|
"eval_runtime": 9.5614, |
|
"eval_samples_per_second": 28.971, |
|
"eval_steps_per_second": 3.661, |
|
"step": 3276 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7148014440433214, |
|
"epoch": 21.0, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7003610108303249, |
|
"eval_loss": 0.3452521562576294, |
|
"eval_runtime": 9.6177, |
|
"eval_samples_per_second": 28.801, |
|
"eval_steps_per_second": 3.639, |
|
"step": 3432 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7148014440433214, |
|
"epoch": 22.0, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 0.0025042735042735045, |
|
"loss": 0.3319, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.6678700361010831, |
|
"eval_loss": 0.34050798416137695, |
|
"eval_runtime": 9.2679, |
|
"eval_samples_per_second": 29.888, |
|
"eval_steps_per_second": 3.776, |
|
"step": 3588 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7148014440433214, |
|
"epoch": 23.0, |
|
"step": 3588 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.6389891696750902, |
|
"eval_loss": 0.34506645798683167, |
|
"eval_runtime": 9.2745, |
|
"eval_samples_per_second": 29.867, |
|
"eval_steps_per_second": 3.774, |
|
"step": 3744 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7148014440433214, |
|
"epoch": 24.0, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.36652639508247375, |
|
"eval_runtime": 9.4067, |
|
"eval_samples_per_second": 29.447, |
|
"eval_steps_per_second": 3.721, |
|
"step": 3900 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7148014440433214, |
|
"epoch": 25.0, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.0022905982905982907, |
|
"loss": 0.3274, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7111913357400722, |
|
"eval_loss": 0.32902663946151733, |
|
"eval_runtime": 9.2506, |
|
"eval_samples_per_second": 29.944, |
|
"eval_steps_per_second": 3.784, |
|
"step": 4056 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7148014440433214, |
|
"epoch": 26.0, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.3251630365848541, |
|
"eval_runtime": 9.2957, |
|
"eval_samples_per_second": 29.799, |
|
"eval_steps_per_second": 3.765, |
|
"step": 4212 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7148014440433214, |
|
"epoch": 27.0, |
|
"step": 4212 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7184115523465704, |
|
"eval_loss": 0.3264797031879425, |
|
"eval_runtime": 9.1811, |
|
"eval_samples_per_second": 30.171, |
|
"eval_steps_per_second": 3.812, |
|
"step": 4368 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 28.0, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 0.0020769230769230773, |
|
"loss": 0.3214, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.32835328578948975, |
|
"eval_runtime": 9.1497, |
|
"eval_samples_per_second": 30.274, |
|
"eval_steps_per_second": 3.825, |
|
"step": 4524 |
|
}, |
|
{ |
|
"best_epoch": 28, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 29.0, |
|
"step": 4524 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.3290432393550873, |
|
"eval_runtime": 8.9091, |
|
"eval_samples_per_second": 31.092, |
|
"eval_steps_per_second": 3.929, |
|
"step": 4680 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 30.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.3327571749687195, |
|
"eval_runtime": 8.8998, |
|
"eval_samples_per_second": 31.124, |
|
"eval_steps_per_second": 3.933, |
|
"step": 4836 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 31.0, |
|
"step": 4836 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7220216606498195, |
|
"eval_loss": 0.3268325626850128, |
|
"eval_runtime": 8.9031, |
|
"eval_samples_per_second": 31.113, |
|
"eval_steps_per_second": 3.931, |
|
"step": 4992 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 32.0, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.0018632478632478633, |
|
"loss": 0.3167, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7220216606498195, |
|
"eval_loss": 0.3372153341770172, |
|
"eval_runtime": 8.8917, |
|
"eval_samples_per_second": 31.153, |
|
"eval_steps_per_second": 3.936, |
|
"step": 5148 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 33.0, |
|
"step": 5148 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.32633286714553833, |
|
"eval_runtime": 8.8844, |
|
"eval_samples_per_second": 31.178, |
|
"eval_steps_per_second": 3.939, |
|
"step": 5304 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 34.0, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.32305726408958435, |
|
"eval_runtime": 8.9015, |
|
"eval_samples_per_second": 31.118, |
|
"eval_steps_per_second": 3.932, |
|
"step": 5460 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 35.0, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 0.0016495726495726495, |
|
"loss": 0.312, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.325504332780838, |
|
"eval_runtime": 8.9164, |
|
"eval_samples_per_second": 31.066, |
|
"eval_steps_per_second": 3.925, |
|
"step": 5616 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 36.0, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.33250829577445984, |
|
"eval_runtime": 8.8964, |
|
"eval_samples_per_second": 31.136, |
|
"eval_steps_per_second": 3.934, |
|
"step": 5772 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 37.0, |
|
"step": 5772 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.3350552022457123, |
|
"eval_runtime": 8.8999, |
|
"eval_samples_per_second": 31.124, |
|
"eval_steps_per_second": 3.933, |
|
"step": 5928 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 38.0, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.001435897435897436, |
|
"loss": 0.3083, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.3361571431159973, |
|
"eval_runtime": 8.8805, |
|
"eval_samples_per_second": 31.192, |
|
"eval_steps_per_second": 3.941, |
|
"step": 6084 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 39.0, |
|
"step": 6084 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.33255627751350403, |
|
"eval_runtime": 8.896, |
|
"eval_samples_per_second": 31.138, |
|
"eval_steps_per_second": 3.934, |
|
"step": 6240 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 40.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.7220216606498195, |
|
"eval_loss": 0.3366214632987976, |
|
"eval_runtime": 8.8975, |
|
"eval_samples_per_second": 31.132, |
|
"eval_steps_per_second": 3.934, |
|
"step": 6396 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 41.0, |
|
"step": 6396 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.0012222222222222224, |
|
"loss": 0.3081, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.3264662027359009, |
|
"eval_runtime": 8.9131, |
|
"eval_samples_per_second": 31.078, |
|
"eval_steps_per_second": 3.927, |
|
"step": 6552 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 42.0, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.3351312577724457, |
|
"eval_runtime": 8.9174, |
|
"eval_samples_per_second": 31.063, |
|
"eval_steps_per_second": 3.925, |
|
"step": 6708 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 43.0, |
|
"step": 6708 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7328519855595668, |
|
"eval_loss": 0.3383844196796417, |
|
"eval_runtime": 8.9057, |
|
"eval_samples_per_second": 31.104, |
|
"eval_steps_per_second": 3.93, |
|
"step": 6864 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 44.0, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 0.0010085470085470086, |
|
"loss": 0.3032, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.7220216606498195, |
|
"eval_loss": 0.32982781529426575, |
|
"eval_runtime": 8.8961, |
|
"eval_samples_per_second": 31.137, |
|
"eval_steps_per_second": 3.934, |
|
"step": 7020 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 45.0, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.7328519855595668, |
|
"eval_loss": 0.3309342563152313, |
|
"eval_runtime": 8.8931, |
|
"eval_samples_per_second": 31.148, |
|
"eval_steps_per_second": 3.936, |
|
"step": 7176 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 46.0, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.3318590819835663, |
|
"eval_runtime": 8.8948, |
|
"eval_samples_per_second": 31.142, |
|
"eval_steps_per_second": 3.935, |
|
"step": 7332 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 47.0, |
|
"step": 7332 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.740072202166065, |
|
"eval_loss": 0.34517648816108704, |
|
"eval_runtime": 8.9043, |
|
"eval_samples_per_second": 31.108, |
|
"eval_steps_per_second": 3.931, |
|
"step": 7488 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 48.0, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 0.0007948717948717948, |
|
"loss": 0.2998, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.3364735543727875, |
|
"eval_runtime": 8.9076, |
|
"eval_samples_per_second": 31.097, |
|
"eval_steps_per_second": 3.929, |
|
"step": 7644 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 49.0, |
|
"step": 7644 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.3290213346481323, |
|
"eval_runtime": 8.8977, |
|
"eval_samples_per_second": 31.132, |
|
"eval_steps_per_second": 3.934, |
|
"step": 7800 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 50.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.7509025270758123, |
|
"eval_loss": 0.3251466155052185, |
|
"eval_runtime": 8.9086, |
|
"eval_samples_per_second": 31.093, |
|
"eval_steps_per_second": 3.929, |
|
"step": 7956 |
|
}, |
|
{ |
|
"best_epoch": 50, |
|
"best_eval_accuracy": 0.7509025270758123, |
|
"epoch": 51.0, |
|
"step": 7956 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 0.0005811965811965813, |
|
"loss": 0.2989, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.740072202166065, |
|
"eval_loss": 0.3253740072250366, |
|
"eval_runtime": 8.8806, |
|
"eval_samples_per_second": 31.192, |
|
"eval_steps_per_second": 3.941, |
|
"step": 8112 |
|
}, |
|
{ |
|
"best_epoch": 50, |
|
"best_eval_accuracy": 0.7509025270758123, |
|
"epoch": 52.0, |
|
"step": 8112 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.3372468054294586, |
|
"eval_runtime": 8.9258, |
|
"eval_samples_per_second": 31.034, |
|
"eval_steps_per_second": 3.921, |
|
"step": 8268 |
|
}, |
|
{ |
|
"best_epoch": 50, |
|
"best_eval_accuracy": 0.7509025270758123, |
|
"epoch": 53.0, |
|
"step": 8268 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.340105801820755, |
|
"eval_runtime": 8.9051, |
|
"eval_samples_per_second": 31.106, |
|
"eval_steps_per_second": 3.93, |
|
"step": 8424 |
|
}, |
|
{ |
|
"best_epoch": 50, |
|
"best_eval_accuracy": 0.7509025270758123, |
|
"epoch": 54.0, |
|
"step": 8424 |
|
}, |
|
{ |
|
"epoch": 54.49, |
|
"learning_rate": 0.00036752136752136755, |
|
"loss": 0.2951, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.33145537972450256, |
|
"eval_runtime": 8.91, |
|
"eval_samples_per_second": 31.089, |
|
"eval_steps_per_second": 3.928, |
|
"step": 8580 |
|
}, |
|
{ |
|
"best_epoch": 50, |
|
"best_eval_accuracy": 0.7509025270758123, |
|
"epoch": 55.0, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.33449816703796387, |
|
"eval_runtime": 8.899, |
|
"eval_samples_per_second": 31.127, |
|
"eval_steps_per_second": 3.933, |
|
"step": 8736 |
|
}, |
|
{ |
|
"best_epoch": 50, |
|
"best_eval_accuracy": 0.7509025270758123, |
|
"epoch": 56.0, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.33013680577278137, |
|
"eval_runtime": 8.9036, |
|
"eval_samples_per_second": 31.111, |
|
"eval_steps_per_second": 3.931, |
|
"step": 8892 |
|
}, |
|
{ |
|
"best_epoch": 50, |
|
"best_eval_accuracy": 0.7509025270758123, |
|
"epoch": 57.0, |
|
"step": 8892 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 0.00015384615384615385, |
|
"loss": 0.2945, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.3321804106235504, |
|
"eval_runtime": 9.3599, |
|
"eval_samples_per_second": 29.594, |
|
"eval_steps_per_second": 3.739, |
|
"step": 9048 |
|
}, |
|
{ |
|
"best_epoch": 50, |
|
"best_eval_accuracy": 0.7509025270758123, |
|
"epoch": 58.0, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.7328519855595668, |
|
"eval_loss": 0.3304736912250519, |
|
"eval_runtime": 9.3479, |
|
"eval_samples_per_second": 29.632, |
|
"eval_steps_per_second": 3.744, |
|
"step": 9204 |
|
}, |
|
{ |
|
"best_epoch": 50, |
|
"best_eval_accuracy": 0.7509025270758123, |
|
"epoch": 59.0, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.3315790295600891, |
|
"eval_runtime": 9.3813, |
|
"eval_samples_per_second": 29.527, |
|
"eval_steps_per_second": 3.731, |
|
"step": 9360 |
|
}, |
|
{ |
|
"best_epoch": 50, |
|
"best_eval_accuracy": 0.7509025270758123, |
|
"epoch": 60.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 9360, |
|
"total_flos": 6.96152728406016e+16, |
|
"train_loss": 0.34035047710451305, |
|
"train_runtime": 7833.6103, |
|
"train_samples_per_second": 19.072, |
|
"train_steps_per_second": 1.195 |
|
} |
|
], |
|
"max_steps": 9360, |
|
"num_train_epochs": 60, |
|
"total_flos": 6.96152728406016e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|