|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 18720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5306859205776173, |
|
"eval_loss": 0.3733972907066345, |
|
"eval_runtime": 4.2135, |
|
"eval_samples_per_second": 65.74, |
|
"eval_steps_per_second": 8.307, |
|
"step": 312 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 1.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0009732905982905983, |
|
"loss": 0.4216, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.38019850850105286, |
|
"eval_runtime": 4.2887, |
|
"eval_samples_per_second": 64.589, |
|
"eval_steps_per_second": 8.161, |
|
"step": 624 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 2.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.47653429602888087, |
|
"eval_loss": 0.4298796057701111, |
|
"eval_runtime": 4.2965, |
|
"eval_samples_per_second": 64.471, |
|
"eval_steps_per_second": 8.146, |
|
"step": 936 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 3.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.0009465811965811966, |
|
"loss": 0.3883, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5451263537906137, |
|
"eval_loss": 0.34903544187545776, |
|
"eval_runtime": 4.308, |
|
"eval_samples_per_second": 64.299, |
|
"eval_steps_per_second": 8.124, |
|
"step": 1248 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5451263537906137, |
|
"epoch": 4.0, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.0009198717948717949, |
|
"loss": 0.3918, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5884476534296029, |
|
"eval_loss": 0.3460931181907654, |
|
"eval_runtime": 4.301, |
|
"eval_samples_per_second": 64.404, |
|
"eval_steps_per_second": 8.138, |
|
"step": 1560 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5884476534296029, |
|
"epoch": 5.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5523465703971119, |
|
"eval_loss": 0.3598935902118683, |
|
"eval_runtime": 4.2993, |
|
"eval_samples_per_second": 64.429, |
|
"eval_steps_per_second": 8.141, |
|
"step": 1872 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5884476534296029, |
|
"epoch": 6.0, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.0008931623931623932, |
|
"loss": 0.3764, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5451263537906137, |
|
"eval_loss": 0.356489896774292, |
|
"eval_runtime": 4.3043, |
|
"eval_samples_per_second": 64.355, |
|
"eval_steps_per_second": 8.131, |
|
"step": 2184 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5884476534296029, |
|
"epoch": 7.0, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5018050541516246, |
|
"eval_loss": 0.3611055910587311, |
|
"eval_runtime": 4.3049, |
|
"eval_samples_per_second": 64.345, |
|
"eval_steps_per_second": 8.13, |
|
"step": 2496 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5884476534296029, |
|
"epoch": 8.0, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.0008664529914529915, |
|
"loss": 0.3794, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5415162454873647, |
|
"eval_loss": 0.40397846698760986, |
|
"eval_runtime": 4.3052, |
|
"eval_samples_per_second": 64.341, |
|
"eval_steps_per_second": 8.13, |
|
"step": 2808 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5884476534296029, |
|
"epoch": 9.0, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.0008397435897435898, |
|
"loss": 0.3778, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.3622380495071411, |
|
"eval_runtime": 4.3031, |
|
"eval_samples_per_second": 64.372, |
|
"eval_steps_per_second": 8.134, |
|
"step": 3120 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5884476534296029, |
|
"epoch": 10.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.4693140794223827, |
|
"eval_loss": 0.4954076111316681, |
|
"eval_runtime": 4.3057, |
|
"eval_samples_per_second": 64.333, |
|
"eval_steps_per_second": 8.129, |
|
"step": 3432 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5884476534296029, |
|
"epoch": 11.0, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 0.0008130341880341881, |
|
"loss": 0.3813, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.47653429602888087, |
|
"eval_loss": 0.3602141737937927, |
|
"eval_runtime": 4.3078, |
|
"eval_samples_per_second": 64.302, |
|
"eval_steps_per_second": 8.125, |
|
"step": 3744 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5884476534296029, |
|
"epoch": 12.0, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.0007863247863247863, |
|
"loss": 0.3718, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.5415162454873647, |
|
"eval_loss": 0.345300555229187, |
|
"eval_runtime": 4.2997, |
|
"eval_samples_per_second": 64.422, |
|
"eval_steps_per_second": 8.14, |
|
"step": 4056 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5884476534296029, |
|
"epoch": 13.0, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.5342960288808665, |
|
"eval_loss": 0.36396491527557373, |
|
"eval_runtime": 4.3002, |
|
"eval_samples_per_second": 64.415, |
|
"eval_steps_per_second": 8.139, |
|
"step": 4368 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5884476534296029, |
|
"epoch": 14.0, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 0.0007596153846153846, |
|
"loss": 0.3701, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.48375451263537905, |
|
"eval_loss": 0.358859658241272, |
|
"eval_runtime": 4.3059, |
|
"eval_samples_per_second": 64.33, |
|
"eval_steps_per_second": 8.128, |
|
"step": 4680 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5884476534296029, |
|
"epoch": 15.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5631768953068592, |
|
"eval_loss": 0.36997947096824646, |
|
"eval_runtime": 4.3027, |
|
"eval_samples_per_second": 64.378, |
|
"eval_steps_per_second": 8.134, |
|
"step": 4992 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5884476534296029, |
|
"epoch": 16.0, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 0.0007329059829059829, |
|
"loss": 0.371, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5342960288808665, |
|
"eval_loss": 0.4146815538406372, |
|
"eval_runtime": 4.3009, |
|
"eval_samples_per_second": 64.405, |
|
"eval_steps_per_second": 8.138, |
|
"step": 5304 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5884476534296029, |
|
"epoch": 17.0, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 0.0007061965811965813, |
|
"loss": 0.3644, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5740072202166066, |
|
"eval_loss": 0.3504742681980133, |
|
"eval_runtime": 4.3002, |
|
"eval_samples_per_second": 64.416, |
|
"eval_steps_per_second": 8.139, |
|
"step": 5616 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5884476534296029, |
|
"epoch": 18.0, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.48736462093862815, |
|
"eval_loss": 0.37356558442115784, |
|
"eval_runtime": 4.2986, |
|
"eval_samples_per_second": 64.439, |
|
"eval_steps_per_second": 8.142, |
|
"step": 5928 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5884476534296029, |
|
"epoch": 19.0, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.0006794871794871796, |
|
"loss": 0.3667, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5703971119133574, |
|
"eval_loss": 0.3636634945869446, |
|
"eval_runtime": 4.2999, |
|
"eval_samples_per_second": 64.42, |
|
"eval_steps_per_second": 8.14, |
|
"step": 6240 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5884476534296029, |
|
"epoch": 20.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"learning_rate": 0.0006527777777777778, |
|
"loss": 0.3629, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6209386281588448, |
|
"eval_loss": 0.3412002921104431, |
|
"eval_runtime": 4.2976, |
|
"eval_samples_per_second": 64.455, |
|
"eval_steps_per_second": 8.144, |
|
"step": 6552 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.6209386281588448, |
|
"epoch": 21.0, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.628158844765343, |
|
"eval_loss": 0.34508734941482544, |
|
"eval_runtime": 4.3013, |
|
"eval_samples_per_second": 64.399, |
|
"eval_steps_per_second": 8.137, |
|
"step": 6864 |
|
}, |
|
{ |
|
"best_epoch": 21, |
|
"best_eval_accuracy": 0.628158844765343, |
|
"epoch": 22.0, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 0.0006260683760683761, |
|
"loss": 0.3574, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.6064981949458483, |
|
"eval_loss": 0.3625960350036621, |
|
"eval_runtime": 4.301, |
|
"eval_samples_per_second": 64.404, |
|
"eval_steps_per_second": 8.138, |
|
"step": 7176 |
|
}, |
|
{ |
|
"best_epoch": 21, |
|
"best_eval_accuracy": 0.628158844765343, |
|
"epoch": 23.0, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.48736462093862815, |
|
"eval_loss": 0.3732198178768158, |
|
"eval_runtime": 4.2992, |
|
"eval_samples_per_second": 64.431, |
|
"eval_steps_per_second": 8.141, |
|
"step": 7488 |
|
}, |
|
{ |
|
"best_epoch": 21, |
|
"best_eval_accuracy": 0.628158844765343, |
|
"epoch": 24.0, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 0.0005993589743589744, |
|
"loss": 0.3565, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.6173285198555957, |
|
"eval_loss": 0.342726469039917, |
|
"eval_runtime": 4.3024, |
|
"eval_samples_per_second": 64.382, |
|
"eval_steps_per_second": 8.135, |
|
"step": 7800 |
|
}, |
|
{ |
|
"best_epoch": 21, |
|
"best_eval_accuracy": 0.628158844765343, |
|
"epoch": 25.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.0005726495726495727, |
|
"loss": 0.3525, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.5812274368231047, |
|
"eval_loss": 0.38550591468811035, |
|
"eval_runtime": 4.3005, |
|
"eval_samples_per_second": 64.41, |
|
"eval_steps_per_second": 8.139, |
|
"step": 8112 |
|
}, |
|
{ |
|
"best_epoch": 21, |
|
"best_eval_accuracy": 0.628158844765343, |
|
"epoch": 26.0, |
|
"step": 8112 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.6498194945848376, |
|
"eval_loss": 0.3384442925453186, |
|
"eval_runtime": 4.2994, |
|
"eval_samples_per_second": 64.428, |
|
"eval_steps_per_second": 8.141, |
|
"step": 8424 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.6498194945848376, |
|
"epoch": 27.0, |
|
"step": 8424 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 0.000545940170940171, |
|
"loss": 0.3523, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.628158844765343, |
|
"eval_loss": 0.3408096730709076, |
|
"eval_runtime": 4.3005, |
|
"eval_samples_per_second": 64.412, |
|
"eval_steps_per_second": 8.139, |
|
"step": 8736 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.6498194945848376, |
|
"epoch": 28.0, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 0.0005192307692307693, |
|
"loss": 0.3505, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.6101083032490975, |
|
"eval_loss": 0.354794442653656, |
|
"eval_runtime": 4.3016, |
|
"eval_samples_per_second": 64.395, |
|
"eval_steps_per_second": 8.137, |
|
"step": 9048 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.6498194945848376, |
|
"epoch": 29.0, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.592057761732852, |
|
"eval_loss": 0.3860756754875183, |
|
"eval_runtime": 4.2988, |
|
"eval_samples_per_second": 64.436, |
|
"eval_steps_per_second": 8.142, |
|
"step": 9360 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.6498194945848376, |
|
"epoch": 30.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 30.45, |
|
"learning_rate": 0.0004925213675213676, |
|
"loss": 0.3509, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.5992779783393501, |
|
"eval_loss": 0.370971143245697, |
|
"eval_runtime": 4.2976, |
|
"eval_samples_per_second": 64.454, |
|
"eval_steps_per_second": 8.144, |
|
"step": 9672 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.6498194945848376, |
|
"epoch": 31.0, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.5992779783393501, |
|
"eval_loss": 0.38974320888519287, |
|
"eval_runtime": 4.303, |
|
"eval_samples_per_second": 64.374, |
|
"eval_steps_per_second": 8.134, |
|
"step": 9984 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.6498194945848376, |
|
"epoch": 32.0, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.00046581196581196583, |
|
"loss": 0.3494, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.6353790613718412, |
|
"eval_loss": 0.3535420596599579, |
|
"eval_runtime": 4.299, |
|
"eval_samples_per_second": 64.433, |
|
"eval_steps_per_second": 8.141, |
|
"step": 10296 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.6498194945848376, |
|
"epoch": 33.0, |
|
"step": 10296 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"learning_rate": 0.0004391025641025641, |
|
"loss": 0.3459, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.628158844765343, |
|
"eval_loss": 0.33893653750419617, |
|
"eval_runtime": 4.3011, |
|
"eval_samples_per_second": 64.401, |
|
"eval_steps_per_second": 8.137, |
|
"step": 10608 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.6498194945848376, |
|
"epoch": 34.0, |
|
"step": 10608 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.6209386281588448, |
|
"eval_loss": 0.3396986126899719, |
|
"eval_runtime": 4.2983, |
|
"eval_samples_per_second": 64.445, |
|
"eval_steps_per_second": 8.143, |
|
"step": 10920 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.6498194945848376, |
|
"epoch": 35.0, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 0.0004123931623931624, |
|
"loss": 0.3429, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.6101083032490975, |
|
"eval_loss": 0.34503623843193054, |
|
"eval_runtime": 4.2992, |
|
"eval_samples_per_second": 64.431, |
|
"eval_steps_per_second": 8.141, |
|
"step": 11232 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.6498194945848376, |
|
"epoch": 36.0, |
|
"step": 11232 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 0.0003856837606837607, |
|
"loss": 0.3432, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.6064981949458483, |
|
"eval_loss": 0.392516553401947, |
|
"eval_runtime": 4.2992, |
|
"eval_samples_per_second": 64.431, |
|
"eval_steps_per_second": 8.141, |
|
"step": 11544 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.6498194945848376, |
|
"epoch": 37.0, |
|
"step": 11544 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.6714801444043321, |
|
"eval_loss": 0.32944434881210327, |
|
"eval_runtime": 4.2991, |
|
"eval_samples_per_second": 64.432, |
|
"eval_steps_per_second": 8.141, |
|
"step": 11856 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.6714801444043321, |
|
"epoch": 38.0, |
|
"step": 11856 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.000358974358974359, |
|
"loss": 0.341, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.6389891696750902, |
|
"eval_loss": 0.34416595101356506, |
|
"eval_runtime": 4.2999, |
|
"eval_samples_per_second": 64.419, |
|
"eval_steps_per_second": 8.14, |
|
"step": 12168 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.6714801444043321, |
|
"epoch": 39.0, |
|
"step": 12168 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.6462093862815884, |
|
"eval_loss": 0.342062771320343, |
|
"eval_runtime": 4.3022, |
|
"eval_samples_per_second": 64.386, |
|
"eval_steps_per_second": 8.135, |
|
"step": 12480 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.6714801444043321, |
|
"epoch": 40.0, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 40.06, |
|
"learning_rate": 0.00033226495726495727, |
|
"loss": 0.3392, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.6389891696750902, |
|
"eval_loss": 0.33708664774894714, |
|
"eval_runtime": 4.2992, |
|
"eval_samples_per_second": 64.431, |
|
"eval_steps_per_second": 8.141, |
|
"step": 12792 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.6714801444043321, |
|
"epoch": 41.0, |
|
"step": 12792 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.0003055555555555556, |
|
"loss": 0.3392, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.6534296028880866, |
|
"eval_loss": 0.33264684677124023, |
|
"eval_runtime": 4.3016, |
|
"eval_samples_per_second": 64.395, |
|
"eval_steps_per_second": 8.136, |
|
"step": 13104 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.6714801444043321, |
|
"epoch": 42.0, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.628158844765343, |
|
"eval_loss": 0.3713812530040741, |
|
"eval_runtime": 4.2963, |
|
"eval_samples_per_second": 64.474, |
|
"eval_steps_per_second": 8.146, |
|
"step": 13416 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.6714801444043321, |
|
"epoch": 43.0, |
|
"step": 13416 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"learning_rate": 0.0002788461538461539, |
|
"loss": 0.337, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.6245487364620939, |
|
"eval_loss": 0.3535134494304657, |
|
"eval_runtime": 4.2972, |
|
"eval_samples_per_second": 64.461, |
|
"eval_steps_per_second": 8.145, |
|
"step": 13728 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.6714801444043321, |
|
"epoch": 44.0, |
|
"step": 13728 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 0.00025213675213675216, |
|
"loss": 0.3352, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.6245487364620939, |
|
"eval_loss": 0.35477742552757263, |
|
"eval_runtime": 4.2996, |
|
"eval_samples_per_second": 64.424, |
|
"eval_steps_per_second": 8.14, |
|
"step": 14040 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.6714801444043321, |
|
"epoch": 45.0, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.6570397111913358, |
|
"eval_loss": 0.33611994981765747, |
|
"eval_runtime": 4.2993, |
|
"eval_samples_per_second": 64.429, |
|
"eval_steps_per_second": 8.141, |
|
"step": 14352 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.6714801444043321, |
|
"epoch": 46.0, |
|
"step": 14352 |
|
}, |
|
{ |
|
"epoch": 46.47, |
|
"learning_rate": 0.00022542735042735044, |
|
"loss": 0.3335, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.6859205776173285, |
|
"eval_loss": 0.33294087648391724, |
|
"eval_runtime": 4.3015, |
|
"eval_samples_per_second": 64.396, |
|
"eval_steps_per_second": 8.137, |
|
"step": 14664 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.6859205776173285, |
|
"epoch": 47.0, |
|
"step": 14664 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.6462093862815884, |
|
"eval_loss": 0.3423037827014923, |
|
"eval_runtime": 4.297, |
|
"eval_samples_per_second": 64.464, |
|
"eval_steps_per_second": 8.145, |
|
"step": 14976 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.6859205776173285, |
|
"epoch": 48.0, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 0.0001987179487179487, |
|
"loss": 0.3329, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.6534296028880866, |
|
"eval_loss": 0.33556535840034485, |
|
"eval_runtime": 4.2948, |
|
"eval_samples_per_second": 64.497, |
|
"eval_steps_per_second": 8.149, |
|
"step": 15288 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.6859205776173285, |
|
"epoch": 49.0, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 49.68, |
|
"learning_rate": 0.00017200854700854702, |
|
"loss": 0.3308, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.6642599277978339, |
|
"eval_loss": 0.33979371190071106, |
|
"eval_runtime": 4.2975, |
|
"eval_samples_per_second": 64.456, |
|
"eval_steps_per_second": 8.144, |
|
"step": 15600 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.6859205776173285, |
|
"epoch": 50.0, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.6678700361010831, |
|
"eval_loss": 0.3374033272266388, |
|
"eval_runtime": 4.2988, |
|
"eval_samples_per_second": 64.437, |
|
"eval_steps_per_second": 8.142, |
|
"step": 15912 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.6859205776173285, |
|
"epoch": 51.0, |
|
"step": 15912 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 0.00014529914529914532, |
|
"loss": 0.3291, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.6787003610108303, |
|
"eval_loss": 0.33146607875823975, |
|
"eval_runtime": 4.3011, |
|
"eval_samples_per_second": 64.402, |
|
"eval_steps_per_second": 8.137, |
|
"step": 16224 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.6859205776173285, |
|
"epoch": 52.0, |
|
"step": 16224 |
|
}, |
|
{ |
|
"epoch": 52.88, |
|
"learning_rate": 0.00011858974358974358, |
|
"loss": 0.3284, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.631768953068592, |
|
"eval_loss": 0.36504465341567993, |
|
"eval_runtime": 4.3007, |
|
"eval_samples_per_second": 64.409, |
|
"eval_steps_per_second": 8.138, |
|
"step": 16536 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.6859205776173285, |
|
"epoch": 53.0, |
|
"step": 16536 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.628158844765343, |
|
"eval_loss": 0.35373085737228394, |
|
"eval_runtime": 4.2986, |
|
"eval_samples_per_second": 64.44, |
|
"eval_steps_per_second": 8.142, |
|
"step": 16848 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.6859205776173285, |
|
"epoch": 54.0, |
|
"step": 16848 |
|
}, |
|
{ |
|
"epoch": 54.49, |
|
"learning_rate": 9.188034188034189e-05, |
|
"loss": 0.3257, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.6425992779783394, |
|
"eval_loss": 0.34801429510116577, |
|
"eval_runtime": 4.3013, |
|
"eval_samples_per_second": 64.399, |
|
"eval_steps_per_second": 8.137, |
|
"step": 17160 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.6859205776173285, |
|
"epoch": 55.0, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.6570397111913358, |
|
"eval_loss": 0.34239715337753296, |
|
"eval_runtime": 4.2962, |
|
"eval_samples_per_second": 64.476, |
|
"eval_steps_per_second": 8.147, |
|
"step": 17472 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.6859205776173285, |
|
"epoch": 56.0, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 56.09, |
|
"learning_rate": 6.517094017094018e-05, |
|
"loss": 0.3274, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.6678700361010831, |
|
"eval_loss": 0.3412990868091583, |
|
"eval_runtime": 4.2933, |
|
"eval_samples_per_second": 64.519, |
|
"eval_steps_per_second": 8.152, |
|
"step": 17784 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.6859205776173285, |
|
"epoch": 57.0, |
|
"step": 17784 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 0.3265, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.6389891696750902, |
|
"eval_loss": 0.34415990114212036, |
|
"eval_runtime": 4.3058, |
|
"eval_samples_per_second": 64.332, |
|
"eval_steps_per_second": 8.129, |
|
"step": 18096 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.6859205776173285, |
|
"epoch": 58.0, |
|
"step": 18096 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.6534296028880866, |
|
"eval_loss": 0.34172776341438293, |
|
"eval_runtime": 4.2978, |
|
"eval_samples_per_second": 64.452, |
|
"eval_steps_per_second": 8.144, |
|
"step": 18408 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.6859205776173285, |
|
"epoch": 59.0, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 59.29, |
|
"learning_rate": 1.1752136752136752e-05, |
|
"loss": 0.326, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.6570397111913358, |
|
"eval_loss": 0.3407086431980133, |
|
"eval_runtime": 4.2954, |
|
"eval_samples_per_second": 64.488, |
|
"eval_steps_per_second": 8.148, |
|
"step": 18720 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.6859205776173285, |
|
"epoch": 60.0, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 18720, |
|
"total_flos": 6.96152728406016e+16, |
|
"train_loss": 0.3531108416043795, |
|
"train_runtime": 3953.5033, |
|
"train_samples_per_second": 37.789, |
|
"train_steps_per_second": 4.735 |
|
} |
|
], |
|
"max_steps": 18720, |
|
"num_train_epochs": 60, |
|
"total_flos": 6.96152728406016e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|