|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 18720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.15293638408184052, |
|
"eval_runtime": 4.3856, |
|
"eval_samples_per_second": 63.161, |
|
"eval_steps_per_second": 7.981, |
|
"step": 312 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.4729241877256318, |
|
"epoch": 1.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.004866452991452991, |
|
"loss": 0.2201, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.10219389200210571, |
|
"eval_runtime": 4.4747, |
|
"eval_samples_per_second": 61.904, |
|
"eval_steps_per_second": 7.822, |
|
"step": 624 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 2.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.26191186904907227, |
|
"eval_runtime": 4.4885, |
|
"eval_samples_per_second": 61.714, |
|
"eval_steps_per_second": 7.798, |
|
"step": 936 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 3.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.004732905982905983, |
|
"loss": 0.1563, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.51985559566787, |
|
"eval_loss": 0.07380817085504532, |
|
"eval_runtime": 4.4993, |
|
"eval_samples_per_second": 61.565, |
|
"eval_steps_per_second": 7.779, |
|
"step": 1248 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 4.0, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.004599358974358974, |
|
"loss": 0.0889, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.4981949458483754, |
|
"eval_loss": 0.07089700549840927, |
|
"eval_runtime": 4.4954, |
|
"eval_samples_per_second": 61.619, |
|
"eval_steps_per_second": 7.786, |
|
"step": 1560 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 5.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.07577553391456604, |
|
"eval_runtime": 4.4933, |
|
"eval_samples_per_second": 61.647, |
|
"eval_steps_per_second": 7.789, |
|
"step": 1872 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 6.0, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.004465811965811966, |
|
"loss": 0.0808, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.07322199642658234, |
|
"eval_runtime": 4.4858, |
|
"eval_samples_per_second": 61.751, |
|
"eval_steps_per_second": 7.802, |
|
"step": 2184 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 7.0, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5595667870036101, |
|
"eval_loss": 0.07163483649492264, |
|
"eval_runtime": 4.4829, |
|
"eval_samples_per_second": 61.79, |
|
"eval_steps_per_second": 7.807, |
|
"step": 2496 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 8.0, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.004332264957264957, |
|
"loss": 0.0802, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5306859205776173, |
|
"eval_loss": 0.07071372121572495, |
|
"eval_runtime": 4.4841, |
|
"eval_samples_per_second": 61.774, |
|
"eval_steps_per_second": 7.805, |
|
"step": 2808 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 9.0, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.004198717948717949, |
|
"loss": 0.0819, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.07121473550796509, |
|
"eval_runtime": 4.4844, |
|
"eval_samples_per_second": 61.77, |
|
"eval_steps_per_second": 7.805, |
|
"step": 3120 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 10.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.47653429602888087, |
|
"eval_loss": 0.07059402763843536, |
|
"eval_runtime": 4.4875, |
|
"eval_samples_per_second": 61.727, |
|
"eval_steps_per_second": 7.799, |
|
"step": 3432 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 11.0, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 0.00406517094017094, |
|
"loss": 0.0818, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.4620938628158845, |
|
"eval_loss": 0.07027973979711533, |
|
"eval_runtime": 4.4847, |
|
"eval_samples_per_second": 61.766, |
|
"eval_steps_per_second": 7.804, |
|
"step": 3744 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 12.0, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.003931623931623931, |
|
"loss": 0.08, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.07372691482305527, |
|
"eval_runtime": 4.4841, |
|
"eval_samples_per_second": 61.774, |
|
"eval_steps_per_second": 7.805, |
|
"step": 4056 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 13.0, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.5306859205776173, |
|
"eval_loss": 0.07121168822050095, |
|
"eval_runtime": 4.4841, |
|
"eval_samples_per_second": 61.774, |
|
"eval_steps_per_second": 7.805, |
|
"step": 4368 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 14.0, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 0.003798076923076923, |
|
"loss": 0.0803, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.07377327233552933, |
|
"eval_runtime": 4.4908, |
|
"eval_samples_per_second": 61.682, |
|
"eval_steps_per_second": 7.794, |
|
"step": 4680 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 15.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.07084672898054123, |
|
"eval_runtime": 4.4838, |
|
"eval_samples_per_second": 61.778, |
|
"eval_steps_per_second": 7.806, |
|
"step": 4992 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 16.0, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 0.003664529914529914, |
|
"loss": 0.0807, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5487364620938628, |
|
"eval_loss": 0.0708812028169632, |
|
"eval_runtime": 4.485, |
|
"eval_samples_per_second": 61.761, |
|
"eval_steps_per_second": 7.804, |
|
"step": 5304 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 17.0, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 0.003530982905982906, |
|
"loss": 0.082, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5523465703971119, |
|
"eval_loss": 0.0720214918255806, |
|
"eval_runtime": 4.4868, |
|
"eval_samples_per_second": 61.736, |
|
"eval_steps_per_second": 7.801, |
|
"step": 5616 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 18.0, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.07122407853603363, |
|
"eval_runtime": 4.49, |
|
"eval_samples_per_second": 61.693, |
|
"eval_steps_per_second": 7.795, |
|
"step": 5928 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 19.0, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.0033974358974358976, |
|
"loss": 0.0806, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5090252707581228, |
|
"eval_loss": 0.07032328844070435, |
|
"eval_runtime": 4.4893, |
|
"eval_samples_per_second": 61.703, |
|
"eval_steps_per_second": 7.796, |
|
"step": 6240 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 20.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"learning_rate": 0.003263888888888889, |
|
"loss": 0.0801, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.07104188203811646, |
|
"eval_runtime": 4.488, |
|
"eval_samples_per_second": 61.72, |
|
"eval_steps_per_second": 7.799, |
|
"step": 6552 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 21.0, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.48736462093862815, |
|
"eval_loss": 0.07005982100963593, |
|
"eval_runtime": 4.4962, |
|
"eval_samples_per_second": 61.608, |
|
"eval_steps_per_second": 7.784, |
|
"step": 6864 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 22.0, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 0.0031303418803418806, |
|
"loss": 0.0798, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.48736462093862815, |
|
"eval_loss": 0.07032880932092667, |
|
"eval_runtime": 4.4887, |
|
"eval_samples_per_second": 61.711, |
|
"eval_steps_per_second": 7.797, |
|
"step": 7176 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 23.0, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.47653429602888087, |
|
"eval_loss": 0.07051917910575867, |
|
"eval_runtime": 4.4932, |
|
"eval_samples_per_second": 61.649, |
|
"eval_steps_per_second": 7.79, |
|
"step": 7488 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 24.0, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 0.0029967948717948716, |
|
"loss": 0.0854, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.5523465703971119, |
|
"eval_loss": 0.07042864710092545, |
|
"eval_runtime": 4.4874, |
|
"eval_samples_per_second": 61.728, |
|
"eval_steps_per_second": 7.8, |
|
"step": 7800 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 25.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.002863247863247863, |
|
"loss": 0.0793, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.49097472924187724, |
|
"eval_loss": 0.07020144909620285, |
|
"eval_runtime": 4.4906, |
|
"eval_samples_per_second": 61.684, |
|
"eval_steps_per_second": 7.794, |
|
"step": 8112 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 26.0, |
|
"step": 8112 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.07208646833896637, |
|
"eval_runtime": 4.4856, |
|
"eval_samples_per_second": 61.753, |
|
"eval_steps_per_second": 7.803, |
|
"step": 8424 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 27.0, |
|
"step": 8424 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 0.0027297008547008546, |
|
"loss": 0.0792, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.07201223820447922, |
|
"eval_runtime": 4.4887, |
|
"eval_samples_per_second": 61.711, |
|
"eval_steps_per_second": 7.797, |
|
"step": 8736 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 28.0, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 0.0025961538461538466, |
|
"loss": 0.0794, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.47653429602888087, |
|
"eval_loss": 0.0713350772857666, |
|
"eval_runtime": 4.4877, |
|
"eval_samples_per_second": 61.724, |
|
"eval_steps_per_second": 7.799, |
|
"step": 9048 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 29.0, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.5631768953068592, |
|
"eval_loss": 0.07010631263256073, |
|
"eval_runtime": 4.4831, |
|
"eval_samples_per_second": 61.787, |
|
"eval_steps_per_second": 7.807, |
|
"step": 9360 |
|
}, |
|
{ |
|
"best_epoch": 29, |
|
"best_eval_accuracy": 0.5631768953068592, |
|
"epoch": 30.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 30.45, |
|
"learning_rate": 0.0024626068376068376, |
|
"loss": 0.0785, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.6101083032490975, |
|
"eval_loss": 0.07099565863609314, |
|
"eval_runtime": 4.4893, |
|
"eval_samples_per_second": 61.703, |
|
"eval_steps_per_second": 7.796, |
|
"step": 9672 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.6101083032490975, |
|
"epoch": 31.0, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.48014440433212996, |
|
"eval_loss": 0.07032310217618942, |
|
"eval_runtime": 4.4879, |
|
"eval_samples_per_second": 61.721, |
|
"eval_steps_per_second": 7.799, |
|
"step": 9984 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.6101083032490975, |
|
"epoch": 32.0, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.002329059829059829, |
|
"loss": 0.0786, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.07281779497861862, |
|
"eval_runtime": 4.4847, |
|
"eval_samples_per_second": 61.766, |
|
"eval_steps_per_second": 7.804, |
|
"step": 10296 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.6101083032490975, |
|
"epoch": 33.0, |
|
"step": 10296 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"learning_rate": 0.0021955128205128206, |
|
"loss": 0.0791, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.5054151624548736, |
|
"eval_loss": 0.07028384506702423, |
|
"eval_runtime": 4.4824, |
|
"eval_samples_per_second": 61.798, |
|
"eval_steps_per_second": 7.808, |
|
"step": 10608 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.6101083032490975, |
|
"epoch": 34.0, |
|
"step": 10608 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.6173285198555957, |
|
"eval_loss": 0.07163991034030914, |
|
"eval_runtime": 4.4867, |
|
"eval_samples_per_second": 61.738, |
|
"eval_steps_per_second": 7.801, |
|
"step": 10920 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.6173285198555957, |
|
"epoch": 35.0, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 0.002061965811965812, |
|
"loss": 0.0789, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.47653429602888087, |
|
"eval_loss": 0.07081528007984161, |
|
"eval_runtime": 4.4866, |
|
"eval_samples_per_second": 61.739, |
|
"eval_steps_per_second": 7.801, |
|
"step": 11232 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.6173285198555957, |
|
"epoch": 36.0, |
|
"step": 11232 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 0.0019284188034188036, |
|
"loss": 0.0786, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.07695722579956055, |
|
"eval_runtime": 4.4827, |
|
"eval_samples_per_second": 61.793, |
|
"eval_steps_per_second": 7.808, |
|
"step": 11544 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.6173285198555957, |
|
"epoch": 37.0, |
|
"step": 11544 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.07178593426942825, |
|
"eval_runtime": 4.4814, |
|
"eval_samples_per_second": 61.811, |
|
"eval_steps_per_second": 7.81, |
|
"step": 11856 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.6173285198555957, |
|
"epoch": 38.0, |
|
"step": 11856 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.0017948717948717949, |
|
"loss": 0.0784, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.48375451263537905, |
|
"eval_loss": 0.06998021900653839, |
|
"eval_runtime": 4.4856, |
|
"eval_samples_per_second": 61.753, |
|
"eval_steps_per_second": 7.803, |
|
"step": 12168 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.6173285198555957, |
|
"epoch": 39.0, |
|
"step": 12168 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.5234657039711191, |
|
"eval_loss": 0.06990227848291397, |
|
"eval_runtime": 4.4914, |
|
"eval_samples_per_second": 61.673, |
|
"eval_steps_per_second": 7.793, |
|
"step": 12480 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.6173285198555957, |
|
"epoch": 40.0, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 40.06, |
|
"learning_rate": 0.0016613247863247864, |
|
"loss": 0.0775, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.6137184115523465, |
|
"eval_loss": 0.06979037076234818, |
|
"eval_runtime": 4.4875, |
|
"eval_samples_per_second": 61.727, |
|
"eval_steps_per_second": 7.799, |
|
"step": 12792 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.6173285198555957, |
|
"epoch": 41.0, |
|
"step": 12792 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.0015277777777777779, |
|
"loss": 0.0779, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.51985559566787, |
|
"eval_loss": 0.06967325508594513, |
|
"eval_runtime": 4.4905, |
|
"eval_samples_per_second": 61.686, |
|
"eval_steps_per_second": 7.794, |
|
"step": 13104 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.6173285198555957, |
|
"epoch": 42.0, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.6534296028880866, |
|
"eval_loss": 0.06980358064174652, |
|
"eval_runtime": 4.4879, |
|
"eval_samples_per_second": 61.721, |
|
"eval_steps_per_second": 7.799, |
|
"step": 13416 |
|
}, |
|
{ |
|
"best_epoch": 42, |
|
"best_eval_accuracy": 0.6534296028880866, |
|
"epoch": 43.0, |
|
"step": 13416 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"learning_rate": 0.0013942307692307694, |
|
"loss": 0.0777, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.5848375451263538, |
|
"eval_loss": 0.06972871720790863, |
|
"eval_runtime": 4.5198, |
|
"eval_samples_per_second": 61.286, |
|
"eval_steps_per_second": 7.744, |
|
"step": 13728 |
|
}, |
|
{ |
|
"best_epoch": 42, |
|
"best_eval_accuracy": 0.6534296028880866, |
|
"epoch": 44.0, |
|
"step": 13728 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 0.0012606837606837606, |
|
"loss": 0.0776, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.6425992779783394, |
|
"eval_loss": 0.06987190246582031, |
|
"eval_runtime": 4.501, |
|
"eval_samples_per_second": 61.542, |
|
"eval_steps_per_second": 7.776, |
|
"step": 14040 |
|
}, |
|
{ |
|
"best_epoch": 42, |
|
"best_eval_accuracy": 0.6534296028880866, |
|
"epoch": 45.0, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.6028880866425993, |
|
"eval_loss": 0.06966899335384369, |
|
"eval_runtime": 4.4681, |
|
"eval_samples_per_second": 61.995, |
|
"eval_steps_per_second": 7.833, |
|
"step": 14352 |
|
}, |
|
{ |
|
"best_epoch": 42, |
|
"best_eval_accuracy": 0.6534296028880866, |
|
"epoch": 46.0, |
|
"step": 14352 |
|
}, |
|
{ |
|
"epoch": 46.47, |
|
"learning_rate": 0.0011271367521367521, |
|
"loss": 0.0769, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.48736462093862815, |
|
"eval_loss": 0.07046782970428467, |
|
"eval_runtime": 4.4607, |
|
"eval_samples_per_second": 62.098, |
|
"eval_steps_per_second": 7.846, |
|
"step": 14664 |
|
}, |
|
{ |
|
"best_epoch": 42, |
|
"best_eval_accuracy": 0.6534296028880866, |
|
"epoch": 47.0, |
|
"step": 14664 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.6209386281588448, |
|
"eval_loss": 0.06947591155767441, |
|
"eval_runtime": 4.4615, |
|
"eval_samples_per_second": 62.087, |
|
"eval_steps_per_second": 7.845, |
|
"step": 14976 |
|
}, |
|
{ |
|
"best_epoch": 42, |
|
"best_eval_accuracy": 0.6534296028880866, |
|
"epoch": 48.0, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 0.0009935897435897436, |
|
"loss": 0.077, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.5667870036101083, |
|
"eval_loss": 0.06946070492267609, |
|
"eval_runtime": 4.4574, |
|
"eval_samples_per_second": 62.144, |
|
"eval_steps_per_second": 7.852, |
|
"step": 15288 |
|
}, |
|
{ |
|
"best_epoch": 42, |
|
"best_eval_accuracy": 0.6534296028880866, |
|
"epoch": 49.0, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 49.68, |
|
"learning_rate": 0.0008600427350427351, |
|
"loss": 0.077, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.5018050541516246, |
|
"eval_loss": 0.06961851567029953, |
|
"eval_runtime": 4.4447, |
|
"eval_samples_per_second": 62.321, |
|
"eval_steps_per_second": 7.874, |
|
"step": 15600 |
|
}, |
|
{ |
|
"best_epoch": 42, |
|
"best_eval_accuracy": 0.6534296028880866, |
|
"epoch": 50.0, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.49458483754512633, |
|
"eval_loss": 0.07000398635864258, |
|
"eval_runtime": 4.4397, |
|
"eval_samples_per_second": 62.391, |
|
"eval_steps_per_second": 7.883, |
|
"step": 15912 |
|
}, |
|
{ |
|
"best_epoch": 42, |
|
"best_eval_accuracy": 0.6534296028880866, |
|
"epoch": 51.0, |
|
"step": 15912 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 0.0007264957264957266, |
|
"loss": 0.0774, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.4981949458483754, |
|
"eval_loss": 0.0700514167547226, |
|
"eval_runtime": 4.4359, |
|
"eval_samples_per_second": 62.445, |
|
"eval_steps_per_second": 7.89, |
|
"step": 16224 |
|
}, |
|
{ |
|
"best_epoch": 42, |
|
"best_eval_accuracy": 0.6534296028880866, |
|
"epoch": 52.0, |
|
"step": 16224 |
|
}, |
|
{ |
|
"epoch": 52.88, |
|
"learning_rate": 0.000592948717948718, |
|
"loss": 0.0767, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.5812274368231047, |
|
"eval_loss": 0.06938585638999939, |
|
"eval_runtime": 4.4418, |
|
"eval_samples_per_second": 62.363, |
|
"eval_steps_per_second": 7.88, |
|
"step": 16536 |
|
}, |
|
{ |
|
"best_epoch": 42, |
|
"best_eval_accuracy": 0.6534296028880866, |
|
"epoch": 53.0, |
|
"step": 16536 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.6462093862815884, |
|
"eval_loss": 0.07005595415830612, |
|
"eval_runtime": 4.4342, |
|
"eval_samples_per_second": 62.469, |
|
"eval_steps_per_second": 7.893, |
|
"step": 16848 |
|
}, |
|
{ |
|
"best_epoch": 42, |
|
"best_eval_accuracy": 0.6534296028880866, |
|
"epoch": 54.0, |
|
"step": 16848 |
|
}, |
|
{ |
|
"epoch": 54.49, |
|
"learning_rate": 0.00045940170940170943, |
|
"loss": 0.0761, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.48736462093862815, |
|
"eval_loss": 0.07062944769859314, |
|
"eval_runtime": 4.4382, |
|
"eval_samples_per_second": 62.412, |
|
"eval_steps_per_second": 7.886, |
|
"step": 17160 |
|
}, |
|
{ |
|
"best_epoch": 42, |
|
"best_eval_accuracy": 0.6534296028880866, |
|
"epoch": 55.0, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.6787003610108303, |
|
"eval_loss": 0.06952185928821564, |
|
"eval_runtime": 4.4379, |
|
"eval_samples_per_second": 62.417, |
|
"eval_steps_per_second": 7.887, |
|
"step": 17472 |
|
}, |
|
{ |
|
"best_epoch": 55, |
|
"best_eval_accuracy": 0.6787003610108303, |
|
"epoch": 56.0, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 56.09, |
|
"learning_rate": 0.00032585470085470087, |
|
"loss": 0.0762, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.6028880866425993, |
|
"eval_loss": 0.0693010687828064, |
|
"eval_runtime": 4.4361, |
|
"eval_samples_per_second": 62.442, |
|
"eval_steps_per_second": 7.89, |
|
"step": 17784 |
|
}, |
|
{ |
|
"best_epoch": 55, |
|
"best_eval_accuracy": 0.6787003610108303, |
|
"epoch": 57.0, |
|
"step": 17784 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 0.00019230769230769233, |
|
"loss": 0.0763, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.51985559566787, |
|
"eval_loss": 0.06962151825428009, |
|
"eval_runtime": 4.4368, |
|
"eval_samples_per_second": 62.432, |
|
"eval_steps_per_second": 7.889, |
|
"step": 18096 |
|
}, |
|
{ |
|
"best_epoch": 55, |
|
"best_eval_accuracy": 0.6787003610108303, |
|
"epoch": 58.0, |
|
"step": 18096 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.5740072202166066, |
|
"eval_loss": 0.06931070238351822, |
|
"eval_runtime": 4.4366, |
|
"eval_samples_per_second": 62.435, |
|
"eval_steps_per_second": 7.889, |
|
"step": 18408 |
|
}, |
|
{ |
|
"best_epoch": 55, |
|
"best_eval_accuracy": 0.6787003610108303, |
|
"epoch": 59.0, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 59.29, |
|
"learning_rate": 5.876068376068376e-05, |
|
"loss": 0.0763, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.5812274368231047, |
|
"eval_loss": 0.06930926442146301, |
|
"eval_runtime": 4.4417, |
|
"eval_samples_per_second": 62.364, |
|
"eval_steps_per_second": 7.88, |
|
"step": 18720 |
|
}, |
|
{ |
|
"best_epoch": 55, |
|
"best_eval_accuracy": 0.6787003610108303, |
|
"epoch": 60.0, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 18720, |
|
"total_flos": 6.96152728406016e+16, |
|
"train_loss": 0.08502325043719039, |
|
"train_runtime": 4069.7527, |
|
"train_samples_per_second": 36.71, |
|
"train_steps_per_second": 4.6 |
|
} |
|
], |
|
"max_steps": 18720, |
|
"num_train_epochs": 60, |
|
"total_flos": 6.96152728406016e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|