|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 18720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.51985559566787, |
|
"eval_loss": 0.5319512486457825, |
|
"eval_runtime": 9.1619, |
|
"eval_samples_per_second": 30.234, |
|
"eval_steps_per_second": 3.82, |
|
"step": 312 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.51985559566787, |
|
"epoch": 1.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.004866452991452991, |
|
"loss": 0.6084, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5306859205776173, |
|
"eval_loss": 0.505980372428894, |
|
"eval_runtime": 8.9739, |
|
"eval_samples_per_second": 30.867, |
|
"eval_steps_per_second": 3.9, |
|
"step": 624 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 2.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.476485937833786, |
|
"eval_runtime": 9.1085, |
|
"eval_samples_per_second": 30.411, |
|
"eval_steps_per_second": 3.843, |
|
"step": 936 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 3.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.004732905982905983, |
|
"loss": 0.4786, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.38619130849838257, |
|
"eval_runtime": 9.0544, |
|
"eval_samples_per_second": 30.593, |
|
"eval_steps_per_second": 3.866, |
|
"step": 1248 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 4.0, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.004599358974358974, |
|
"loss": 0.5253, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5342960288808665, |
|
"eval_loss": 0.5090705156326294, |
|
"eval_runtime": 9.0148, |
|
"eval_samples_per_second": 30.727, |
|
"eval_steps_per_second": 3.882, |
|
"step": 1560 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 5.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.4981949458483754, |
|
"eval_loss": 0.376774400472641, |
|
"eval_runtime": 9.0141, |
|
"eval_samples_per_second": 30.73, |
|
"eval_steps_per_second": 3.883, |
|
"step": 1872 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 6.0, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.004465811965811966, |
|
"loss": 0.5144, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.44060468673706055, |
|
"eval_runtime": 9.0181, |
|
"eval_samples_per_second": 30.716, |
|
"eval_steps_per_second": 3.881, |
|
"step": 2184 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 7.0, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.631768953068592, |
|
"eval_loss": 0.3461342751979828, |
|
"eval_runtime": 8.9803, |
|
"eval_samples_per_second": 30.845, |
|
"eval_steps_per_second": 3.897, |
|
"step": 2496 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.631768953068592, |
|
"epoch": 8.0, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.004332264957264957, |
|
"loss": 0.4407, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6534296028880866, |
|
"eval_loss": 0.34802812337875366, |
|
"eval_runtime": 8.976, |
|
"eval_samples_per_second": 30.86, |
|
"eval_steps_per_second": 3.899, |
|
"step": 2808 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.6534296028880866, |
|
"epoch": 9.0, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.004198717948717949, |
|
"loss": 0.4002, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6642599277978339, |
|
"eval_loss": 0.362854540348053, |
|
"eval_runtime": 9.0085, |
|
"eval_samples_per_second": 30.749, |
|
"eval_steps_per_second": 3.885, |
|
"step": 3120 |
|
}, |
|
{ |
|
"best_epoch": 9, |
|
"best_eval_accuracy": 0.6642599277978339, |
|
"epoch": 10.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.555956678700361, |
|
"eval_loss": 0.39493829011917114, |
|
"eval_runtime": 9.036, |
|
"eval_samples_per_second": 30.655, |
|
"eval_steps_per_second": 3.873, |
|
"step": 3432 |
|
}, |
|
{ |
|
"best_epoch": 9, |
|
"best_eval_accuracy": 0.6642599277978339, |
|
"epoch": 11.0, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 0.00406517094017094, |
|
"loss": 0.3576, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.336588591337204, |
|
"eval_runtime": 9.0067, |
|
"eval_samples_per_second": 30.755, |
|
"eval_steps_per_second": 3.886, |
|
"step": 3744 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.7075812274368231, |
|
"epoch": 12.0, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.003931623931623931, |
|
"loss": 0.346, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.3301502466201782, |
|
"eval_runtime": 9.0221, |
|
"eval_samples_per_second": 30.702, |
|
"eval_steps_per_second": 3.879, |
|
"step": 4056 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.7075812274368231, |
|
"epoch": 13.0, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7184115523465704, |
|
"eval_loss": 0.3293074369430542, |
|
"eval_runtime": 9.0321, |
|
"eval_samples_per_second": 30.668, |
|
"eval_steps_per_second": 3.875, |
|
"step": 4368 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 14.0, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 0.003798076923076923, |
|
"loss": 0.337, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.33005112409591675, |
|
"eval_runtime": 9.0187, |
|
"eval_samples_per_second": 30.714, |
|
"eval_steps_per_second": 3.881, |
|
"step": 4680 |
|
}, |
|
{ |
|
"best_epoch": 14, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 15.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7328519855595668, |
|
"eval_loss": 0.33984842896461487, |
|
"eval_runtime": 9.0474, |
|
"eval_samples_per_second": 30.616, |
|
"eval_steps_per_second": 3.869, |
|
"step": 4992 |
|
}, |
|
{ |
|
"best_epoch": 15, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 16.0, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 0.003664529914529914, |
|
"loss": 0.3323, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.35551536083221436, |
|
"eval_runtime": 9.0226, |
|
"eval_samples_per_second": 30.701, |
|
"eval_steps_per_second": 3.879, |
|
"step": 5304 |
|
}, |
|
{ |
|
"best_epoch": 15, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 17.0, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 0.003530982905982906, |
|
"loss": 0.3245, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.32570627331733704, |
|
"eval_runtime": 9.0171, |
|
"eval_samples_per_second": 30.719, |
|
"eval_steps_per_second": 3.882, |
|
"step": 5616 |
|
}, |
|
{ |
|
"best_epoch": 15, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 18.0, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.3257431089878082, |
|
"eval_runtime": 9.0276, |
|
"eval_samples_per_second": 30.684, |
|
"eval_steps_per_second": 3.877, |
|
"step": 5928 |
|
}, |
|
{ |
|
"best_epoch": 15, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 19.0, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.0033974358974358976, |
|
"loss": 0.3243, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7220216606498195, |
|
"eval_loss": 0.3507075309753418, |
|
"eval_runtime": 8.9982, |
|
"eval_samples_per_second": 30.784, |
|
"eval_steps_per_second": 3.89, |
|
"step": 6240 |
|
}, |
|
{ |
|
"best_epoch": 15, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 20.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"learning_rate": 0.003263888888888889, |
|
"loss": 0.3144, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7184115523465704, |
|
"eval_loss": 0.40468713641166687, |
|
"eval_runtime": 9.0484, |
|
"eval_samples_per_second": 30.613, |
|
"eval_steps_per_second": 3.868, |
|
"step": 6552 |
|
}, |
|
{ |
|
"best_epoch": 15, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 21.0, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7220216606498195, |
|
"eval_loss": 0.36198413372039795, |
|
"eval_runtime": 9.0445, |
|
"eval_samples_per_second": 30.626, |
|
"eval_steps_per_second": 3.87, |
|
"step": 6864 |
|
}, |
|
{ |
|
"best_epoch": 15, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 22.0, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 0.0031303418803418806, |
|
"loss": 0.3135, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.3739970028400421, |
|
"eval_runtime": 9.0931, |
|
"eval_samples_per_second": 30.463, |
|
"eval_steps_per_second": 3.849, |
|
"step": 7176 |
|
}, |
|
{ |
|
"best_epoch": 15, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 23.0, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.33150357007980347, |
|
"eval_runtime": 9.0913, |
|
"eval_samples_per_second": 30.469, |
|
"eval_steps_per_second": 3.85, |
|
"step": 7488 |
|
}, |
|
{ |
|
"best_epoch": 23, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 24.0, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 0.0029967948717948716, |
|
"loss": 0.3063, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.3291415572166443, |
|
"eval_runtime": 9.0712, |
|
"eval_samples_per_second": 30.536, |
|
"eval_steps_per_second": 3.858, |
|
"step": 7800 |
|
}, |
|
{ |
|
"best_epoch": 23, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 25.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.002863247863247863, |
|
"loss": 0.2986, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.36258506774902344, |
|
"eval_runtime": 9.1485, |
|
"eval_samples_per_second": 30.278, |
|
"eval_steps_per_second": 3.826, |
|
"step": 8112 |
|
}, |
|
{ |
|
"best_epoch": 23, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 26.0, |
|
"step": 8112 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.740072202166065, |
|
"eval_loss": 0.3281010687351227, |
|
"eval_runtime": 9.0555, |
|
"eval_samples_per_second": 30.589, |
|
"eval_steps_per_second": 3.865, |
|
"step": 8424 |
|
}, |
|
{ |
|
"best_epoch": 23, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 27.0, |
|
"step": 8424 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 0.0027297008547008546, |
|
"loss": 0.2956, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.740072202166065, |
|
"eval_loss": 0.33756041526794434, |
|
"eval_runtime": 9.1719, |
|
"eval_samples_per_second": 30.201, |
|
"eval_steps_per_second": 3.816, |
|
"step": 8736 |
|
}, |
|
{ |
|
"best_epoch": 23, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 28.0, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 0.0025961538461538466, |
|
"loss": 0.2927, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7545126353790613, |
|
"eval_loss": 0.3309880197048187, |
|
"eval_runtime": 9.0985, |
|
"eval_samples_per_second": 30.445, |
|
"eval_steps_per_second": 3.847, |
|
"step": 9048 |
|
}, |
|
{ |
|
"best_epoch": 28, |
|
"best_eval_accuracy": 0.7545126353790613, |
|
"epoch": 29.0, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.34706664085388184, |
|
"eval_runtime": 8.8071, |
|
"eval_samples_per_second": 31.452, |
|
"eval_steps_per_second": 3.974, |
|
"step": 9360 |
|
}, |
|
{ |
|
"best_epoch": 28, |
|
"best_eval_accuracy": 0.7545126353790613, |
|
"epoch": 30.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 30.45, |
|
"learning_rate": 0.0024626068376068376, |
|
"loss": 0.2853, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7581227436823105, |
|
"eval_loss": 0.3204677700996399, |
|
"eval_runtime": 9.1308, |
|
"eval_samples_per_second": 30.337, |
|
"eval_steps_per_second": 3.833, |
|
"step": 9672 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.7581227436823105, |
|
"epoch": 31.0, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7509025270758123, |
|
"eval_loss": 0.32714536786079407, |
|
"eval_runtime": 9.0779, |
|
"eval_samples_per_second": 30.514, |
|
"eval_steps_per_second": 3.855, |
|
"step": 9984 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.7581227436823105, |
|
"epoch": 32.0, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.002329059829059829, |
|
"loss": 0.2861, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7509025270758123, |
|
"eval_loss": 0.3423185646533966, |
|
"eval_runtime": 9.1078, |
|
"eval_samples_per_second": 30.414, |
|
"eval_steps_per_second": 3.843, |
|
"step": 10296 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.7581227436823105, |
|
"epoch": 33.0, |
|
"step": 10296 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"learning_rate": 0.0021955128205128206, |
|
"loss": 0.2782, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.3327659070491791, |
|
"eval_runtime": 9.1866, |
|
"eval_samples_per_second": 30.153, |
|
"eval_steps_per_second": 3.81, |
|
"step": 10608 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.7581227436823105, |
|
"epoch": 34.0, |
|
"step": 10608 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7617328519855595, |
|
"eval_loss": 0.32888853549957275, |
|
"eval_runtime": 9.2412, |
|
"eval_samples_per_second": 29.974, |
|
"eval_steps_per_second": 3.787, |
|
"step": 10920 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.7617328519855595, |
|
"epoch": 35.0, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 0.002061965811965812, |
|
"loss": 0.2756, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7581227436823105, |
|
"eval_loss": 0.330919086933136, |
|
"eval_runtime": 9.2586, |
|
"eval_samples_per_second": 29.918, |
|
"eval_steps_per_second": 3.78, |
|
"step": 11232 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.7617328519855595, |
|
"epoch": 36.0, |
|
"step": 11232 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 0.0019284188034188036, |
|
"loss": 0.2758, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.37405356764793396, |
|
"eval_runtime": 9.2, |
|
"eval_samples_per_second": 30.109, |
|
"eval_steps_per_second": 3.804, |
|
"step": 11544 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.7617328519855595, |
|
"epoch": 37.0, |
|
"step": 11544 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.33260834217071533, |
|
"eval_runtime": 4.3573, |
|
"eval_samples_per_second": 63.571, |
|
"eval_steps_per_second": 8.032, |
|
"step": 11856 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.7617328519855595, |
|
"epoch": 38.0, |
|
"step": 11856 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.0017948717948717949, |
|
"loss": 0.2714, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.7184115523465704, |
|
"eval_loss": 0.36113840341567993, |
|
"eval_runtime": 10.6137, |
|
"eval_samples_per_second": 26.098, |
|
"eval_steps_per_second": 3.298, |
|
"step": 12168 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.7617328519855595, |
|
"epoch": 39.0, |
|
"step": 12168 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.33517616987228394, |
|
"eval_runtime": 10.5938, |
|
"eval_samples_per_second": 26.147, |
|
"eval_steps_per_second": 3.304, |
|
"step": 12480 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.7617328519855595, |
|
"epoch": 40.0, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 40.06, |
|
"learning_rate": 0.0016613247863247864, |
|
"loss": 0.2687, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.34048405289649963, |
|
"eval_runtime": 10.5168, |
|
"eval_samples_per_second": 26.339, |
|
"eval_steps_per_second": 3.328, |
|
"step": 12792 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.7617328519855595, |
|
"epoch": 41.0, |
|
"step": 12792 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.0015277777777777779, |
|
"loss": 0.2685, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.34080803394317627, |
|
"eval_runtime": 10.8178, |
|
"eval_samples_per_second": 25.606, |
|
"eval_steps_per_second": 3.235, |
|
"step": 13104 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.7617328519855595, |
|
"epoch": 42.0, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.34136372804641724, |
|
"eval_runtime": 10.56, |
|
"eval_samples_per_second": 26.231, |
|
"eval_steps_per_second": 3.314, |
|
"step": 13416 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.7617328519855595, |
|
"epoch": 43.0, |
|
"step": 13416 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"learning_rate": 0.0013942307692307694, |
|
"loss": 0.2649, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7545126353790613, |
|
"eval_loss": 0.3369286060333252, |
|
"eval_runtime": 10.5795, |
|
"eval_samples_per_second": 26.183, |
|
"eval_steps_per_second": 3.308, |
|
"step": 13728 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.7617328519855595, |
|
"epoch": 44.0, |
|
"step": 13728 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 0.0012606837606837606, |
|
"loss": 0.2615, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.7545126353790613, |
|
"eval_loss": 0.33710670471191406, |
|
"eval_runtime": 10.8841, |
|
"eval_samples_per_second": 25.45, |
|
"eval_steps_per_second": 3.216, |
|
"step": 14040 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.7617328519855595, |
|
"epoch": 45.0, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.7509025270758123, |
|
"eval_loss": 0.34283646941185, |
|
"eval_runtime": 10.5807, |
|
"eval_samples_per_second": 26.18, |
|
"eval_steps_per_second": 3.308, |
|
"step": 14352 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.7617328519855595, |
|
"epoch": 46.0, |
|
"step": 14352 |
|
}, |
|
{ |
|
"epoch": 46.47, |
|
"learning_rate": 0.0011271367521367521, |
|
"loss": 0.2602, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.7545126353790613, |
|
"eval_loss": 0.32864174246788025, |
|
"eval_runtime": 10.9225, |
|
"eval_samples_per_second": 25.361, |
|
"eval_steps_per_second": 3.204, |
|
"step": 14664 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.7617328519855595, |
|
"epoch": 47.0, |
|
"step": 14664 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.7581227436823105, |
|
"eval_loss": 0.33157122135162354, |
|
"eval_runtime": 10.8742, |
|
"eval_samples_per_second": 25.473, |
|
"eval_steps_per_second": 3.219, |
|
"step": 14976 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.7617328519855595, |
|
"epoch": 48.0, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 0.0009935897435897436, |
|
"loss": 0.2595, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.7545126353790613, |
|
"eval_loss": 0.3400510847568512, |
|
"eval_runtime": 9.388, |
|
"eval_samples_per_second": 29.506, |
|
"eval_steps_per_second": 3.728, |
|
"step": 15288 |
|
}, |
|
{ |
|
"best_epoch": 34, |
|
"best_eval_accuracy": 0.7617328519855595, |
|
"epoch": 49.0, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 49.68, |
|
"learning_rate": 0.0008600427350427351, |
|
"loss": 0.2551, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.7653429602888087, |
|
"eval_loss": 0.3362467288970947, |
|
"eval_runtime": 9.2771, |
|
"eval_samples_per_second": 29.859, |
|
"eval_steps_per_second": 3.773, |
|
"step": 15600 |
|
}, |
|
{ |
|
"best_epoch": 49, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 50.0, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.7653429602888087, |
|
"eval_loss": 0.34337136149406433, |
|
"eval_runtime": 9.1926, |
|
"eval_samples_per_second": 30.133, |
|
"eval_steps_per_second": 3.807, |
|
"step": 15912 |
|
}, |
|
{ |
|
"best_epoch": 49, |
|
"best_eval_accuracy": 0.7653429602888087, |
|
"epoch": 51.0, |
|
"step": 15912 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 0.0007264957264957266, |
|
"loss": 0.2574, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.7725631768953068, |
|
"eval_loss": 0.33018356561660767, |
|
"eval_runtime": 9.1431, |
|
"eval_samples_per_second": 30.296, |
|
"eval_steps_per_second": 3.828, |
|
"step": 16224 |
|
}, |
|
{ |
|
"best_epoch": 51, |
|
"best_eval_accuracy": 0.7725631768953068, |
|
"epoch": 52.0, |
|
"step": 16224 |
|
}, |
|
{ |
|
"epoch": 52.88, |
|
"learning_rate": 0.000592948717948718, |
|
"loss": 0.2515, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.3463674485683441, |
|
"eval_runtime": 9.139, |
|
"eval_samples_per_second": 30.31, |
|
"eval_steps_per_second": 3.83, |
|
"step": 16536 |
|
}, |
|
{ |
|
"best_epoch": 51, |
|
"best_eval_accuracy": 0.7725631768953068, |
|
"epoch": 53.0, |
|
"step": 16536 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.7689530685920578, |
|
"eval_loss": 0.3337368667125702, |
|
"eval_runtime": 9.3533, |
|
"eval_samples_per_second": 29.615, |
|
"eval_steps_per_second": 3.742, |
|
"step": 16848 |
|
}, |
|
{ |
|
"best_epoch": 51, |
|
"best_eval_accuracy": 0.7725631768953068, |
|
"epoch": 54.0, |
|
"step": 16848 |
|
}, |
|
{ |
|
"epoch": 54.49, |
|
"learning_rate": 0.00045940170940170943, |
|
"loss": 0.252, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.7689530685920578, |
|
"eval_loss": 0.33643844723701477, |
|
"eval_runtime": 9.1243, |
|
"eval_samples_per_second": 30.359, |
|
"eval_steps_per_second": 3.836, |
|
"step": 17160 |
|
}, |
|
{ |
|
"best_epoch": 51, |
|
"best_eval_accuracy": 0.7725631768953068, |
|
"epoch": 55.0, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.7509025270758123, |
|
"eval_loss": 0.3417775630950928, |
|
"eval_runtime": 8.8741, |
|
"eval_samples_per_second": 31.214, |
|
"eval_steps_per_second": 3.944, |
|
"step": 17472 |
|
}, |
|
{ |
|
"best_epoch": 51, |
|
"best_eval_accuracy": 0.7725631768953068, |
|
"epoch": 56.0, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 56.09, |
|
"learning_rate": 0.00032585470085470087, |
|
"loss": 0.2497, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.7581227436823105, |
|
"eval_loss": 0.340709924697876, |
|
"eval_runtime": 8.9435, |
|
"eval_samples_per_second": 30.972, |
|
"eval_steps_per_second": 3.913, |
|
"step": 17784 |
|
}, |
|
{ |
|
"best_epoch": 51, |
|
"best_eval_accuracy": 0.7725631768953068, |
|
"epoch": 57.0, |
|
"step": 17784 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 0.00019230769230769233, |
|
"loss": 0.2503, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.7545126353790613, |
|
"eval_loss": 0.3418755829334259, |
|
"eval_runtime": 8.9319, |
|
"eval_samples_per_second": 31.012, |
|
"eval_steps_per_second": 3.919, |
|
"step": 18096 |
|
}, |
|
{ |
|
"best_epoch": 51, |
|
"best_eval_accuracy": 0.7725631768953068, |
|
"epoch": 58.0, |
|
"step": 18096 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.776173285198556, |
|
"eval_loss": 0.33760103583335876, |
|
"eval_runtime": 9.2259, |
|
"eval_samples_per_second": 30.024, |
|
"eval_steps_per_second": 3.794, |
|
"step": 18408 |
|
}, |
|
{ |
|
"best_epoch": 58, |
|
"best_eval_accuracy": 0.776173285198556, |
|
"epoch": 59.0, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 59.29, |
|
"learning_rate": 5.876068376068376e-05, |
|
"loss": 0.2504, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7725631768953068, |
|
"eval_loss": 0.3379180431365967, |
|
"eval_runtime": 9.2108, |
|
"eval_samples_per_second": 30.073, |
|
"eval_steps_per_second": 3.8, |
|
"step": 18720 |
|
}, |
|
{ |
|
"best_epoch": 58, |
|
"best_eval_accuracy": 0.776173285198556, |
|
"epoch": 60.0, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 18720, |
|
"total_flos": 6.96152728406016e+16, |
|
"train_loss": 0.3189461204740736, |
|
"train_runtime": 8512.0872, |
|
"train_samples_per_second": 17.552, |
|
"train_steps_per_second": 2.199 |
|
} |
|
], |
|
"max_steps": 18720, |
|
"num_train_epochs": 60, |
|
"total_flos": 6.96152728406016e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|