|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 18720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5234657039711191, |
|
"eval_loss": 0.5883553624153137, |
|
"eval_runtime": 8.7854, |
|
"eval_samples_per_second": 31.53, |
|
"eval_steps_per_second": 3.984, |
|
"step": 312 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5234657039711191, |
|
"epoch": 1.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.004866452991452991, |
|
"loss": 0.6001, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.4145338237285614, |
|
"eval_runtime": 8.8107, |
|
"eval_samples_per_second": 31.439, |
|
"eval_steps_per_second": 3.972, |
|
"step": 624 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5234657039711191, |
|
"epoch": 2.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.6337469816207886, |
|
"eval_runtime": 8.7248, |
|
"eval_samples_per_second": 31.748, |
|
"eval_steps_per_second": 4.012, |
|
"step": 936 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5234657039711191, |
|
"epoch": 3.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.004732905982905983, |
|
"loss": 0.5343, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.48375451263537905, |
|
"eval_loss": 0.3934288024902344, |
|
"eval_runtime": 8.7605, |
|
"eval_samples_per_second": 31.619, |
|
"eval_steps_per_second": 3.995, |
|
"step": 1248 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5234657039711191, |
|
"epoch": 4.0, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.004599358974358974, |
|
"loss": 0.5255, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.566152811050415, |
|
"eval_runtime": 8.8523, |
|
"eval_samples_per_second": 31.291, |
|
"eval_steps_per_second": 3.954, |
|
"step": 1560 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5234657039711191, |
|
"epoch": 5.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.5157924890518188, |
|
"eval_runtime": 8.9667, |
|
"eval_samples_per_second": 30.892, |
|
"eval_steps_per_second": 3.903, |
|
"step": 1872 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 6.0, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.004465811965811966, |
|
"loss": 0.504, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5342960288808665, |
|
"eval_loss": 0.3480044901371002, |
|
"eval_runtime": 8.8419, |
|
"eval_samples_per_second": 31.328, |
|
"eval_steps_per_second": 3.958, |
|
"step": 2184 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 7.0, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5379061371841155, |
|
"eval_loss": 0.38455167412757874, |
|
"eval_runtime": 8.9247, |
|
"eval_samples_per_second": 31.037, |
|
"eval_steps_per_second": 3.922, |
|
"step": 2496 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 8.0, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.004332264957264957, |
|
"loss": 0.4941, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5306859205776173, |
|
"eval_loss": 0.5111421942710876, |
|
"eval_runtime": 9.2356, |
|
"eval_samples_per_second": 29.993, |
|
"eval_steps_per_second": 3.79, |
|
"step": 2808 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 9.0, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.004198717948717949, |
|
"loss": 0.5022, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.4620998203754425, |
|
"eval_runtime": 8.9286, |
|
"eval_samples_per_second": 31.024, |
|
"eval_steps_per_second": 3.92, |
|
"step": 3120 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 10.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6425992779783394, |
|
"eval_loss": 0.3418288826942444, |
|
"eval_runtime": 8.7679, |
|
"eval_samples_per_second": 31.592, |
|
"eval_steps_per_second": 3.992, |
|
"step": 3432 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.6425992779783394, |
|
"epoch": 11.0, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 0.00406517094017094, |
|
"loss": 0.453, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5631768953068592, |
|
"eval_loss": 0.3651691675186157, |
|
"eval_runtime": 8.7849, |
|
"eval_samples_per_second": 31.532, |
|
"eval_steps_per_second": 3.984, |
|
"step": 3744 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.6425992779783394, |
|
"epoch": 12.0, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.003931623931623931, |
|
"loss": 0.3879, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.5595667870036101, |
|
"eval_loss": 0.3450925946235657, |
|
"eval_runtime": 8.7727, |
|
"eval_samples_per_second": 31.575, |
|
"eval_steps_per_second": 3.99, |
|
"step": 4056 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.6425992779783394, |
|
"epoch": 13.0, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6425992779783394, |
|
"eval_loss": 0.33118629455566406, |
|
"eval_runtime": 8.7741, |
|
"eval_samples_per_second": 31.57, |
|
"eval_steps_per_second": 3.989, |
|
"step": 4368 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.6425992779783394, |
|
"epoch": 14.0, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 0.003798076923076923, |
|
"loss": 0.3698, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6462093862815884, |
|
"eval_loss": 0.3599488437175751, |
|
"eval_runtime": 8.7843, |
|
"eval_samples_per_second": 31.533, |
|
"eval_steps_per_second": 3.984, |
|
"step": 4680 |
|
}, |
|
{ |
|
"best_epoch": 14, |
|
"best_eval_accuracy": 0.6462093862815884, |
|
"epoch": 15.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5992779783393501, |
|
"eval_loss": 0.39469993114471436, |
|
"eval_runtime": 8.7768, |
|
"eval_samples_per_second": 31.561, |
|
"eval_steps_per_second": 3.988, |
|
"step": 4992 |
|
}, |
|
{ |
|
"best_epoch": 14, |
|
"best_eval_accuracy": 0.6462093862815884, |
|
"epoch": 16.0, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 0.003664529914529914, |
|
"loss": 0.3705, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6173285198555957, |
|
"eval_loss": 0.38333019614219666, |
|
"eval_runtime": 8.7807, |
|
"eval_samples_per_second": 31.547, |
|
"eval_steps_per_second": 3.986, |
|
"step": 5304 |
|
}, |
|
{ |
|
"best_epoch": 14, |
|
"best_eval_accuracy": 0.6462093862815884, |
|
"epoch": 17.0, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 0.003530982905982906, |
|
"loss": 0.3598, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.6462093862815884, |
|
"eval_loss": 0.33536940813064575, |
|
"eval_runtime": 8.765, |
|
"eval_samples_per_second": 31.603, |
|
"eval_steps_per_second": 3.993, |
|
"step": 5616 |
|
}, |
|
{ |
|
"best_epoch": 14, |
|
"best_eval_accuracy": 0.6462093862815884, |
|
"epoch": 18.0, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.6714801444043321, |
|
"eval_loss": 0.33954861760139465, |
|
"eval_runtime": 8.7463, |
|
"eval_samples_per_second": 31.67, |
|
"eval_steps_per_second": 4.002, |
|
"step": 5928 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.6714801444043321, |
|
"epoch": 19.0, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.0033974358974358976, |
|
"loss": 0.3631, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6389891696750902, |
|
"eval_loss": 0.36644089221954346, |
|
"eval_runtime": 8.72, |
|
"eval_samples_per_second": 31.766, |
|
"eval_steps_per_second": 4.014, |
|
"step": 6240 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.6714801444043321, |
|
"epoch": 20.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"learning_rate": 0.003263888888888889, |
|
"loss": 0.3515, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6787003610108303, |
|
"eval_loss": 0.34204551577568054, |
|
"eval_runtime": 8.8321, |
|
"eval_samples_per_second": 31.363, |
|
"eval_steps_per_second": 3.963, |
|
"step": 6552 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.6787003610108303, |
|
"epoch": 21.0, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.6137184115523465, |
|
"eval_loss": 0.34830158948898315, |
|
"eval_runtime": 8.8779, |
|
"eval_samples_per_second": 31.201, |
|
"eval_steps_per_second": 3.942, |
|
"step": 6864 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.6787003610108303, |
|
"epoch": 22.0, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 0.0031303418803418806, |
|
"loss": 0.3486, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.6498194945848376, |
|
"eval_loss": 0.3820384442806244, |
|
"eval_runtime": 8.8616, |
|
"eval_samples_per_second": 31.258, |
|
"eval_steps_per_second": 3.95, |
|
"step": 7176 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.6787003610108303, |
|
"epoch": 23.0, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7003610108303249, |
|
"eval_loss": 0.32402852177619934, |
|
"eval_runtime": 8.8647, |
|
"eval_samples_per_second": 31.248, |
|
"eval_steps_per_second": 3.948, |
|
"step": 7488 |
|
}, |
|
{ |
|
"best_epoch": 23, |
|
"best_eval_accuracy": 0.7003610108303249, |
|
"epoch": 24.0, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 0.0029967948717948716, |
|
"loss": 0.3437, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.3299575448036194, |
|
"eval_runtime": 8.8718, |
|
"eval_samples_per_second": 31.223, |
|
"eval_steps_per_second": 3.945, |
|
"step": 7800 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.7148014440433214, |
|
"epoch": 25.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.002863247863247863, |
|
"loss": 0.3389, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.6787003610108303, |
|
"eval_loss": 0.3404531478881836, |
|
"eval_runtime": 8.8761, |
|
"eval_samples_per_second": 31.208, |
|
"eval_steps_per_second": 3.943, |
|
"step": 8112 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.7148014440433214, |
|
"epoch": 26.0, |
|
"step": 8112 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.6967509025270758, |
|
"eval_loss": 0.32906994223594666, |
|
"eval_runtime": 8.8771, |
|
"eval_samples_per_second": 31.204, |
|
"eval_steps_per_second": 3.943, |
|
"step": 8424 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.7148014440433214, |
|
"epoch": 27.0, |
|
"step": 8424 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 0.0027297008547008546, |
|
"loss": 0.3363, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.333825945854187, |
|
"eval_runtime": 8.8635, |
|
"eval_samples_per_second": 31.252, |
|
"eval_steps_per_second": 3.949, |
|
"step": 8736 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.7148014440433214, |
|
"epoch": 28.0, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 0.0025961538461538466, |
|
"loss": 0.3381, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7220216606498195, |
|
"eval_loss": 0.33664965629577637, |
|
"eval_runtime": 9.0918, |
|
"eval_samples_per_second": 30.467, |
|
"eval_steps_per_second": 3.85, |
|
"step": 9048 |
|
}, |
|
{ |
|
"best_epoch": 28, |
|
"best_eval_accuracy": 0.7220216606498195, |
|
"epoch": 29.0, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.6606498194945848, |
|
"eval_loss": 0.3830634355545044, |
|
"eval_runtime": 10.2562, |
|
"eval_samples_per_second": 27.008, |
|
"eval_steps_per_second": 3.413, |
|
"step": 9360 |
|
}, |
|
{ |
|
"best_epoch": 28, |
|
"best_eval_accuracy": 0.7220216606498195, |
|
"epoch": 30.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 30.45, |
|
"learning_rate": 0.0024626068376068376, |
|
"loss": 0.3302, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.3299681842327118, |
|
"eval_runtime": 10.119, |
|
"eval_samples_per_second": 27.374, |
|
"eval_steps_per_second": 3.459, |
|
"step": 9672 |
|
}, |
|
{ |
|
"best_epoch": 28, |
|
"best_eval_accuracy": 0.7220216606498195, |
|
"epoch": 31.0, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.3223809599876404, |
|
"eval_runtime": 10.1698, |
|
"eval_samples_per_second": 27.238, |
|
"eval_steps_per_second": 3.442, |
|
"step": 9984 |
|
}, |
|
{ |
|
"best_epoch": 28, |
|
"best_eval_accuracy": 0.7220216606498195, |
|
"epoch": 32.0, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.002329059829059829, |
|
"loss": 0.33, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.6787003610108303, |
|
"eval_loss": 0.3332250118255615, |
|
"eval_runtime": 10.1039, |
|
"eval_samples_per_second": 27.415, |
|
"eval_steps_per_second": 3.464, |
|
"step": 10296 |
|
}, |
|
{ |
|
"best_epoch": 28, |
|
"best_eval_accuracy": 0.7220216606498195, |
|
"epoch": 33.0, |
|
"step": 10296 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"learning_rate": 0.0021955128205128206, |
|
"loss": 0.3271, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.3412158787250519, |
|
"eval_runtime": 10.1195, |
|
"eval_samples_per_second": 27.373, |
|
"eval_steps_per_second": 3.459, |
|
"step": 10608 |
|
}, |
|
{ |
|
"best_epoch": 33, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 34.0, |
|
"step": 10608 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.3197210431098938, |
|
"eval_runtime": 10.0745, |
|
"eval_samples_per_second": 27.495, |
|
"eval_steps_per_second": 3.474, |
|
"step": 10920 |
|
}, |
|
{ |
|
"best_epoch": 33, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 35.0, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 0.002061965811965812, |
|
"loss": 0.3266, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.3236064612865448, |
|
"eval_runtime": 10.0538, |
|
"eval_samples_per_second": 27.552, |
|
"eval_steps_per_second": 3.481, |
|
"step": 11232 |
|
}, |
|
{ |
|
"best_epoch": 33, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 36.0, |
|
"step": 11232 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 0.0019284188034188036, |
|
"loss": 0.3248, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.6750902527075813, |
|
"eval_loss": 0.3621442914009094, |
|
"eval_runtime": 10.1983, |
|
"eval_samples_per_second": 27.161, |
|
"eval_steps_per_second": 3.432, |
|
"step": 11544 |
|
}, |
|
{ |
|
"best_epoch": 33, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 37.0, |
|
"step": 11544 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.33299773931503296, |
|
"eval_runtime": 10.136, |
|
"eval_samples_per_second": 27.328, |
|
"eval_steps_per_second": 3.453, |
|
"step": 11856 |
|
}, |
|
{ |
|
"best_epoch": 33, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 38.0, |
|
"step": 11856 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.0017948717948717949, |
|
"loss": 0.3223, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.6823104693140795, |
|
"eval_loss": 0.3635832667350769, |
|
"eval_runtime": 10.2689, |
|
"eval_samples_per_second": 26.975, |
|
"eval_steps_per_second": 3.408, |
|
"step": 12168 |
|
}, |
|
{ |
|
"best_epoch": 33, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 39.0, |
|
"step": 12168 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.3298203647136688, |
|
"eval_runtime": 10.1829, |
|
"eval_samples_per_second": 27.202, |
|
"eval_steps_per_second": 3.437, |
|
"step": 12480 |
|
}, |
|
{ |
|
"best_epoch": 33, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 40.0, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 40.06, |
|
"learning_rate": 0.0016613247863247864, |
|
"loss": 0.3205, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.32243612408638, |
|
"eval_runtime": 10.719, |
|
"eval_samples_per_second": 25.842, |
|
"eval_steps_per_second": 3.265, |
|
"step": 12792 |
|
}, |
|
{ |
|
"best_epoch": 33, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 41.0, |
|
"step": 12792 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.0015277777777777779, |
|
"loss": 0.3177, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.3287702202796936, |
|
"eval_runtime": 10.6219, |
|
"eval_samples_per_second": 26.078, |
|
"eval_steps_per_second": 3.295, |
|
"step": 13104 |
|
}, |
|
{ |
|
"best_epoch": 33, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 42.0, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.6823104693140795, |
|
"eval_loss": 0.3464113175868988, |
|
"eval_runtime": 9.1644, |
|
"eval_samples_per_second": 30.226, |
|
"eval_steps_per_second": 3.819, |
|
"step": 13416 |
|
}, |
|
{ |
|
"best_epoch": 33, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 43.0, |
|
"step": 13416 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"learning_rate": 0.0013942307692307694, |
|
"loss": 0.3167, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.6787003610108303, |
|
"eval_loss": 0.35672882199287415, |
|
"eval_runtime": 9.0166, |
|
"eval_samples_per_second": 30.721, |
|
"eval_steps_per_second": 3.882, |
|
"step": 13728 |
|
}, |
|
{ |
|
"best_epoch": 33, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 44.0, |
|
"step": 13728 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 0.0012606837606837606, |
|
"loss": 0.3159, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.355069100856781, |
|
"eval_runtime": 9.0565, |
|
"eval_samples_per_second": 30.586, |
|
"eval_steps_per_second": 3.865, |
|
"step": 14040 |
|
}, |
|
{ |
|
"best_epoch": 33, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 45.0, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.7111913357400722, |
|
"eval_loss": 0.33131033182144165, |
|
"eval_runtime": 9.1597, |
|
"eval_samples_per_second": 30.241, |
|
"eval_steps_per_second": 3.821, |
|
"step": 14352 |
|
}, |
|
{ |
|
"best_epoch": 33, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 46.0, |
|
"step": 14352 |
|
}, |
|
{ |
|
"epoch": 46.47, |
|
"learning_rate": 0.0011271367521367521, |
|
"loss": 0.3131, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.3233274519443512, |
|
"eval_runtime": 9.0766, |
|
"eval_samples_per_second": 30.518, |
|
"eval_steps_per_second": 3.856, |
|
"step": 14664 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 47.0, |
|
"step": 14664 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.6750902527075813, |
|
"eval_loss": 0.35078391432762146, |
|
"eval_runtime": 9.0843, |
|
"eval_samples_per_second": 30.492, |
|
"eval_steps_per_second": 3.853, |
|
"step": 14976 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 48.0, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 0.0009935897435897436, |
|
"loss": 0.3118, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.34203651547431946, |
|
"eval_runtime": 9.1363, |
|
"eval_samples_per_second": 30.319, |
|
"eval_steps_per_second": 3.831, |
|
"step": 15288 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 49.0, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 49.68, |
|
"learning_rate": 0.0008600427350427351, |
|
"loss": 0.3088, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.6967509025270758, |
|
"eval_loss": 0.34102147817611694, |
|
"eval_runtime": 9.0781, |
|
"eval_samples_per_second": 30.513, |
|
"eval_steps_per_second": 3.855, |
|
"step": 15600 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 50.0, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.3420613706111908, |
|
"eval_runtime": 8.9032, |
|
"eval_samples_per_second": 31.113, |
|
"eval_steps_per_second": 3.931, |
|
"step": 15912 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 51.0, |
|
"step": 15912 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 0.0007264957264957266, |
|
"loss": 0.3082, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.3411448895931244, |
|
"eval_runtime": 8.9332, |
|
"eval_samples_per_second": 31.008, |
|
"eval_steps_per_second": 3.918, |
|
"step": 16224 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 52.0, |
|
"step": 16224 |
|
}, |
|
{ |
|
"epoch": 52.88, |
|
"learning_rate": 0.000592948717948718, |
|
"loss": 0.3068, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.6823104693140795, |
|
"eval_loss": 0.3616185486316681, |
|
"eval_runtime": 8.921, |
|
"eval_samples_per_second": 31.05, |
|
"eval_steps_per_second": 3.923, |
|
"step": 16536 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 53.0, |
|
"step": 16536 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.6714801444043321, |
|
"eval_loss": 0.3554804027080536, |
|
"eval_runtime": 8.9621, |
|
"eval_samples_per_second": 30.908, |
|
"eval_steps_per_second": 3.905, |
|
"step": 16848 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 54.0, |
|
"step": 16848 |
|
}, |
|
{ |
|
"epoch": 54.49, |
|
"learning_rate": 0.00045940170940170943, |
|
"loss": 0.3031, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.7003610108303249, |
|
"eval_loss": 0.34177201986312866, |
|
"eval_runtime": 9.1926, |
|
"eval_samples_per_second": 30.133, |
|
"eval_steps_per_second": 3.807, |
|
"step": 17160 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 55.0, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.6859205776173285, |
|
"eval_loss": 0.34597525000572205, |
|
"eval_runtime": 9.1395, |
|
"eval_samples_per_second": 30.308, |
|
"eval_steps_per_second": 3.83, |
|
"step": 17472 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 56.0, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 56.09, |
|
"learning_rate": 0.00032585470085470087, |
|
"loss": 0.3039, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.335258424282074, |
|
"eval_runtime": 9.0559, |
|
"eval_samples_per_second": 30.588, |
|
"eval_steps_per_second": 3.865, |
|
"step": 17784 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 57.0, |
|
"step": 17784 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 0.00019230769230769233, |
|
"loss": 0.3025, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.7003610108303249, |
|
"eval_loss": 0.34501808881759644, |
|
"eval_runtime": 3.4803, |
|
"eval_samples_per_second": 79.591, |
|
"eval_steps_per_second": 10.057, |
|
"step": 18096 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 58.0, |
|
"step": 18096 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.34273090958595276, |
|
"eval_runtime": 8.911, |
|
"eval_samples_per_second": 31.085, |
|
"eval_steps_per_second": 3.928, |
|
"step": 18408 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 59.0, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 59.29, |
|
"learning_rate": 5.876068376068376e-05, |
|
"loss": 0.3034, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7003610108303249, |
|
"eval_loss": 0.3430791199207306, |
|
"eval_runtime": 8.959, |
|
"eval_samples_per_second": 30.919, |
|
"eval_steps_per_second": 3.907, |
|
"step": 18720 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 60.0, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 18720, |
|
"total_flos": 6.96152728406016e+16, |
|
"train_loss": 0.36451038132365954, |
|
"train_runtime": 8268.8296, |
|
"train_samples_per_second": 18.068, |
|
"train_steps_per_second": 2.264 |
|
} |
|
], |
|
"max_steps": 18720, |
|
"num_train_epochs": 60, |
|
"total_flos": 6.96152728406016e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|