|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 18720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5306859205776173, |
|
"eval_loss": 0.5077166557312012, |
|
"eval_runtime": 4.2532, |
|
"eval_samples_per_second": 65.127, |
|
"eval_steps_per_second": 8.229, |
|
"step": 312 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 1.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0019465811965811966, |
|
"loss": 0.4439, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.48736462093862815, |
|
"eval_loss": 0.3970734775066376, |
|
"eval_runtime": 4.3287, |
|
"eval_samples_per_second": 63.992, |
|
"eval_steps_per_second": 8.086, |
|
"step": 624 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 2.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5379061371841155, |
|
"eval_loss": 0.35738521814346313, |
|
"eval_runtime": 4.3459, |
|
"eval_samples_per_second": 63.739, |
|
"eval_steps_per_second": 8.054, |
|
"step": 936 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 3.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.0018931623931623931, |
|
"loss": 0.4231, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5776173285198556, |
|
"eval_loss": 0.362491250038147, |
|
"eval_runtime": 4.3598, |
|
"eval_samples_per_second": 63.535, |
|
"eval_steps_per_second": 8.028, |
|
"step": 1248 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5776173285198556, |
|
"epoch": 4.0, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.0018397435897435897, |
|
"loss": 0.4071, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5342960288808665, |
|
"eval_loss": 0.49368584156036377, |
|
"eval_runtime": 4.3603, |
|
"eval_samples_per_second": 63.527, |
|
"eval_steps_per_second": 8.027, |
|
"step": 1560 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5776173285198556, |
|
"epoch": 5.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5667870036101083, |
|
"eval_loss": 0.3738144636154175, |
|
"eval_runtime": 4.3477, |
|
"eval_samples_per_second": 63.712, |
|
"eval_steps_per_second": 8.05, |
|
"step": 1872 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5776173285198556, |
|
"epoch": 6.0, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.0017863247863247865, |
|
"loss": 0.3956, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.4080715775489807, |
|
"eval_runtime": 4.3468, |
|
"eval_samples_per_second": 63.725, |
|
"eval_steps_per_second": 8.052, |
|
"step": 2184 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5776173285198556, |
|
"epoch": 7.0, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6209386281588448, |
|
"eval_loss": 0.3385550379753113, |
|
"eval_runtime": 4.3506, |
|
"eval_samples_per_second": 63.67, |
|
"eval_steps_per_second": 8.045, |
|
"step": 2496 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.6209386281588448, |
|
"epoch": 8.0, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.001732905982905983, |
|
"loss": 0.3905, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.4146560728549957, |
|
"eval_runtime": 4.3523, |
|
"eval_samples_per_second": 63.644, |
|
"eval_steps_per_second": 8.042, |
|
"step": 2808 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.6209386281588448, |
|
"epoch": 9.0, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.0016794871794871796, |
|
"loss": 0.3888, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6353790613718412, |
|
"eval_loss": 0.33526456356048584, |
|
"eval_runtime": 4.353, |
|
"eval_samples_per_second": 63.634, |
|
"eval_steps_per_second": 8.04, |
|
"step": 3120 |
|
}, |
|
{ |
|
"best_epoch": 9, |
|
"best_eval_accuracy": 0.6353790613718412, |
|
"epoch": 10.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.628158844765343, |
|
"eval_loss": 0.3539533317089081, |
|
"eval_runtime": 4.3528, |
|
"eval_samples_per_second": 63.637, |
|
"eval_steps_per_second": 8.041, |
|
"step": 3432 |
|
}, |
|
{ |
|
"best_epoch": 9, |
|
"best_eval_accuracy": 0.6353790613718412, |
|
"epoch": 11.0, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 0.0016260683760683761, |
|
"loss": 0.3992, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5848375451263538, |
|
"eval_loss": 0.3453006446361542, |
|
"eval_runtime": 4.3589, |
|
"eval_samples_per_second": 63.547, |
|
"eval_steps_per_second": 8.029, |
|
"step": 3744 |
|
}, |
|
{ |
|
"best_epoch": 9, |
|
"best_eval_accuracy": 0.6353790613718412, |
|
"epoch": 12.0, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.0015726495726495727, |
|
"loss": 0.372, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.32649099826812744, |
|
"eval_runtime": 4.3537, |
|
"eval_samples_per_second": 63.623, |
|
"eval_steps_per_second": 8.039, |
|
"step": 4056 |
|
}, |
|
{ |
|
"best_epoch": 12, |
|
"best_eval_accuracy": 0.6895306859205776, |
|
"epoch": 13.0, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6425992779783394, |
|
"eval_loss": 0.35752376914024353, |
|
"eval_runtime": 4.3547, |
|
"eval_samples_per_second": 63.609, |
|
"eval_steps_per_second": 8.037, |
|
"step": 4368 |
|
}, |
|
{ |
|
"best_epoch": 12, |
|
"best_eval_accuracy": 0.6895306859205776, |
|
"epoch": 14.0, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 0.0015192307692307692, |
|
"loss": 0.3643, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6498194945848376, |
|
"eval_loss": 0.3304148018360138, |
|
"eval_runtime": 4.3605, |
|
"eval_samples_per_second": 63.525, |
|
"eval_steps_per_second": 8.027, |
|
"step": 4680 |
|
}, |
|
{ |
|
"best_epoch": 12, |
|
"best_eval_accuracy": 0.6895306859205776, |
|
"epoch": 15.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6714801444043321, |
|
"eval_loss": 0.3632947504520416, |
|
"eval_runtime": 4.3555, |
|
"eval_samples_per_second": 63.598, |
|
"eval_steps_per_second": 8.036, |
|
"step": 4992 |
|
}, |
|
{ |
|
"best_epoch": 12, |
|
"best_eval_accuracy": 0.6895306859205776, |
|
"epoch": 16.0, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 0.0014658119658119658, |
|
"loss": 0.3666, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5342960288808665, |
|
"eval_loss": 0.5230220556259155, |
|
"eval_runtime": 4.3594, |
|
"eval_samples_per_second": 63.54, |
|
"eval_steps_per_second": 8.029, |
|
"step": 5304 |
|
}, |
|
{ |
|
"best_epoch": 12, |
|
"best_eval_accuracy": 0.6895306859205776, |
|
"epoch": 17.0, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 0.0014123931623931626, |
|
"loss": 0.3517, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.6462093862815884, |
|
"eval_loss": 0.33842334151268005, |
|
"eval_runtime": 4.3558, |
|
"eval_samples_per_second": 63.593, |
|
"eval_steps_per_second": 8.035, |
|
"step": 5616 |
|
}, |
|
{ |
|
"best_epoch": 12, |
|
"best_eval_accuracy": 0.6895306859205776, |
|
"epoch": 18.0, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.6823104693140795, |
|
"eval_loss": 0.32931071519851685, |
|
"eval_runtime": 4.3567, |
|
"eval_samples_per_second": 63.58, |
|
"eval_steps_per_second": 8.034, |
|
"step": 5928 |
|
}, |
|
{ |
|
"best_epoch": 12, |
|
"best_eval_accuracy": 0.6895306859205776, |
|
"epoch": 19.0, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.0013589743589743591, |
|
"loss": 0.3519, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6823104693140795, |
|
"eval_loss": 0.36133021116256714, |
|
"eval_runtime": 4.354, |
|
"eval_samples_per_second": 63.62, |
|
"eval_steps_per_second": 8.039, |
|
"step": 6240 |
|
}, |
|
{ |
|
"best_epoch": 12, |
|
"best_eval_accuracy": 0.6895306859205776, |
|
"epoch": 20.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"learning_rate": 0.0013055555555555557, |
|
"loss": 0.338, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.3241924047470093, |
|
"eval_runtime": 4.3569, |
|
"eval_samples_per_second": 63.577, |
|
"eval_steps_per_second": 8.033, |
|
"step": 6552 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 21.0, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7184115523465704, |
|
"eval_loss": 0.3399159610271454, |
|
"eval_runtime": 4.3574, |
|
"eval_samples_per_second": 63.57, |
|
"eval_steps_per_second": 8.032, |
|
"step": 6864 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 22.0, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 0.0012521367521367522, |
|
"loss": 0.3316, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7003610108303249, |
|
"eval_loss": 0.33920276165008545, |
|
"eval_runtime": 4.3578, |
|
"eval_samples_per_second": 63.565, |
|
"eval_steps_per_second": 8.032, |
|
"step": 7176 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 23.0, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.6534296028880866, |
|
"eval_loss": 0.33432814478874207, |
|
"eval_runtime": 4.3588, |
|
"eval_samples_per_second": 63.55, |
|
"eval_steps_per_second": 8.03, |
|
"step": 7488 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 24.0, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 0.0011987179487179488, |
|
"loss": 0.3266, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7111913357400722, |
|
"eval_loss": 0.3467292785644531, |
|
"eval_runtime": 4.3605, |
|
"eval_samples_per_second": 63.525, |
|
"eval_steps_per_second": 8.027, |
|
"step": 7800 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 25.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.0011452991452991453, |
|
"loss": 0.3213, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.3418598473072052, |
|
"eval_runtime": 4.3581, |
|
"eval_samples_per_second": 63.559, |
|
"eval_steps_per_second": 8.031, |
|
"step": 8112 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 26.0, |
|
"step": 8112 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7111913357400722, |
|
"eval_loss": 0.3190039396286011, |
|
"eval_runtime": 4.3552, |
|
"eval_samples_per_second": 63.601, |
|
"eval_steps_per_second": 8.036, |
|
"step": 8424 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 27.0, |
|
"step": 8424 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 0.001091880341880342, |
|
"loss": 0.3177, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.6931407942238267, |
|
"eval_loss": 0.32053741812705994, |
|
"eval_runtime": 4.3604, |
|
"eval_samples_per_second": 63.526, |
|
"eval_steps_per_second": 8.027, |
|
"step": 8736 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 28.0, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 0.0010384615384615387, |
|
"loss": 0.3187, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.3302537798881531, |
|
"eval_runtime": 4.3616, |
|
"eval_samples_per_second": 63.509, |
|
"eval_steps_per_second": 8.025, |
|
"step": 9048 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 29.0, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.32684004306793213, |
|
"eval_runtime": 4.3594, |
|
"eval_samples_per_second": 63.541, |
|
"eval_steps_per_second": 8.029, |
|
"step": 9360 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 30.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 30.45, |
|
"learning_rate": 0.0009850427350427352, |
|
"loss": 0.3162, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.3273889720439911, |
|
"eval_runtime": 4.3611, |
|
"eval_samples_per_second": 63.516, |
|
"eval_steps_per_second": 8.026, |
|
"step": 9672 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 31.0, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7111913357400722, |
|
"eval_loss": 0.33114469051361084, |
|
"eval_runtime": 4.3586, |
|
"eval_samples_per_second": 63.552, |
|
"eval_steps_per_second": 8.03, |
|
"step": 9984 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 32.0, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.0009316239316239317, |
|
"loss": 0.3132, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.345443457365036, |
|
"eval_runtime": 4.3573, |
|
"eval_samples_per_second": 63.572, |
|
"eval_steps_per_second": 8.033, |
|
"step": 10296 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 33.0, |
|
"step": 10296 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"learning_rate": 0.0008782051282051282, |
|
"loss": 0.3087, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.32495415210723877, |
|
"eval_runtime": 4.3587, |
|
"eval_samples_per_second": 63.551, |
|
"eval_steps_per_second": 8.03, |
|
"step": 10608 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 34.0, |
|
"step": 10608 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.3265839219093323, |
|
"eval_runtime": 4.3556, |
|
"eval_samples_per_second": 63.597, |
|
"eval_steps_per_second": 8.036, |
|
"step": 10920 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 35.0, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 0.0008247863247863248, |
|
"loss": 0.3076, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.3346775770187378, |
|
"eval_runtime": 4.3524, |
|
"eval_samples_per_second": 63.643, |
|
"eval_steps_per_second": 8.042, |
|
"step": 11232 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 36.0, |
|
"step": 11232 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 0.0007713675213675214, |
|
"loss": 0.3071, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.7111913357400722, |
|
"eval_loss": 0.3307958245277405, |
|
"eval_runtime": 4.3598, |
|
"eval_samples_per_second": 63.535, |
|
"eval_steps_per_second": 8.028, |
|
"step": 11544 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 37.0, |
|
"step": 11544 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.7220216606498195, |
|
"eval_loss": 0.3272268772125244, |
|
"eval_runtime": 4.3546, |
|
"eval_samples_per_second": 63.611, |
|
"eval_steps_per_second": 8.038, |
|
"step": 11856 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 38.0, |
|
"step": 11856 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.000717948717948718, |
|
"loss": 0.3061, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.3301219046115875, |
|
"eval_runtime": 4.3561, |
|
"eval_samples_per_second": 63.589, |
|
"eval_steps_per_second": 8.035, |
|
"step": 12168 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 39.0, |
|
"step": 12168 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.32260364294052124, |
|
"eval_runtime": 4.356, |
|
"eval_samples_per_second": 63.59, |
|
"eval_steps_per_second": 8.035, |
|
"step": 12480 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7292418772563177, |
|
"epoch": 40.0, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 40.06, |
|
"learning_rate": 0.0006645299145299145, |
|
"loss": 0.3006, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.32847943902015686, |
|
"eval_runtime": 4.3592, |
|
"eval_samples_per_second": 63.544, |
|
"eval_steps_per_second": 8.029, |
|
"step": 12792 |
|
}, |
|
{ |
|
"best_epoch": 40, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 41.0, |
|
"step": 12792 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.0006111111111111112, |
|
"loss": 0.3016, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.3225715160369873, |
|
"eval_runtime": 4.3622, |
|
"eval_samples_per_second": 63.5, |
|
"eval_steps_per_second": 8.023, |
|
"step": 13104 |
|
}, |
|
{ |
|
"best_epoch": 40, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 42.0, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.7220216606498195, |
|
"eval_loss": 0.3290694057941437, |
|
"eval_runtime": 4.3571, |
|
"eval_samples_per_second": 63.575, |
|
"eval_steps_per_second": 8.033, |
|
"step": 13416 |
|
}, |
|
{ |
|
"best_epoch": 40, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 43.0, |
|
"step": 13416 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"learning_rate": 0.0005576923076923078, |
|
"loss": 0.2984, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7111913357400722, |
|
"eval_loss": 0.33770960569381714, |
|
"eval_runtime": 4.3531, |
|
"eval_samples_per_second": 63.633, |
|
"eval_steps_per_second": 8.04, |
|
"step": 13728 |
|
}, |
|
{ |
|
"best_epoch": 40, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 44.0, |
|
"step": 13728 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 0.0005042735042735043, |
|
"loss": 0.2976, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.7220216606498195, |
|
"eval_loss": 0.3326423168182373, |
|
"eval_runtime": 4.3579, |
|
"eval_samples_per_second": 63.563, |
|
"eval_steps_per_second": 8.031, |
|
"step": 14040 |
|
}, |
|
{ |
|
"best_epoch": 40, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 45.0, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.3340679407119751, |
|
"eval_runtime": 4.3576, |
|
"eval_samples_per_second": 63.568, |
|
"eval_steps_per_second": 8.032, |
|
"step": 14352 |
|
}, |
|
{ |
|
"best_epoch": 40, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 46.0, |
|
"step": 14352 |
|
}, |
|
{ |
|
"epoch": 46.47, |
|
"learning_rate": 0.00045085470085470087, |
|
"loss": 0.2967, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.7184115523465704, |
|
"eval_loss": 0.3186676800251007, |
|
"eval_runtime": 4.3574, |
|
"eval_samples_per_second": 63.57, |
|
"eval_steps_per_second": 8.032, |
|
"step": 14664 |
|
}, |
|
{ |
|
"best_epoch": 40, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 47.0, |
|
"step": 14664 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.3322432041168213, |
|
"eval_runtime": 4.3552, |
|
"eval_samples_per_second": 63.602, |
|
"eval_steps_per_second": 8.036, |
|
"step": 14976 |
|
}, |
|
{ |
|
"best_epoch": 40, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 48.0, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 0.0003974358974358974, |
|
"loss": 0.2953, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.32692331075668335, |
|
"eval_runtime": 4.3589, |
|
"eval_samples_per_second": 63.548, |
|
"eval_steps_per_second": 8.03, |
|
"step": 15288 |
|
}, |
|
{ |
|
"best_epoch": 40, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 49.0, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 49.68, |
|
"learning_rate": 0.00034401709401709403, |
|
"loss": 0.2911, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.3256283104419708, |
|
"eval_runtime": 4.3551, |
|
"eval_samples_per_second": 63.604, |
|
"eval_steps_per_second": 8.037, |
|
"step": 15600 |
|
}, |
|
{ |
|
"best_epoch": 40, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 50.0, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.3252301812171936, |
|
"eval_runtime": 4.3543, |
|
"eval_samples_per_second": 63.615, |
|
"eval_steps_per_second": 8.038, |
|
"step": 15912 |
|
}, |
|
{ |
|
"best_epoch": 40, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 51.0, |
|
"step": 15912 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 0.00029059829059829064, |
|
"loss": 0.2929, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.32512250542640686, |
|
"eval_runtime": 4.3638, |
|
"eval_samples_per_second": 63.476, |
|
"eval_steps_per_second": 8.02, |
|
"step": 16224 |
|
}, |
|
{ |
|
"best_epoch": 40, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 52.0, |
|
"step": 16224 |
|
}, |
|
{ |
|
"epoch": 52.88, |
|
"learning_rate": 0.00023717948717948717, |
|
"loss": 0.2904, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.32575854659080505, |
|
"eval_runtime": 4.3574, |
|
"eval_samples_per_second": 63.57, |
|
"eval_steps_per_second": 8.032, |
|
"step": 16536 |
|
}, |
|
{ |
|
"best_epoch": 40, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 53.0, |
|
"step": 16536 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.7220216606498195, |
|
"eval_loss": 0.3357574939727783, |
|
"eval_runtime": 4.3583, |
|
"eval_samples_per_second": 63.556, |
|
"eval_steps_per_second": 8.031, |
|
"step": 16848 |
|
}, |
|
{ |
|
"best_epoch": 40, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 54.0, |
|
"step": 16848 |
|
}, |
|
{ |
|
"epoch": 54.49, |
|
"learning_rate": 0.00018376068376068378, |
|
"loss": 0.2895, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.7328519855595668, |
|
"eval_loss": 0.32194650173187256, |
|
"eval_runtime": 4.3566, |
|
"eval_samples_per_second": 63.581, |
|
"eval_steps_per_second": 8.034, |
|
"step": 17160 |
|
}, |
|
{ |
|
"best_epoch": 40, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 55.0, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.7328519855595668, |
|
"eval_loss": 0.3322417438030243, |
|
"eval_runtime": 4.356, |
|
"eval_samples_per_second": 63.591, |
|
"eval_steps_per_second": 8.035, |
|
"step": 17472 |
|
}, |
|
{ |
|
"best_epoch": 40, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 56.0, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 56.09, |
|
"learning_rate": 0.00013034188034188036, |
|
"loss": 0.2887, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.325890451669693, |
|
"eval_runtime": 4.3537, |
|
"eval_samples_per_second": 63.624, |
|
"eval_steps_per_second": 8.039, |
|
"step": 17784 |
|
}, |
|
{ |
|
"best_epoch": 40, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 57.0, |
|
"step": 17784 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 7.692307692307693e-05, |
|
"loss": 0.2883, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.32602423429489136, |
|
"eval_runtime": 4.3569, |
|
"eval_samples_per_second": 63.578, |
|
"eval_steps_per_second": 8.033, |
|
"step": 18096 |
|
}, |
|
{ |
|
"best_epoch": 40, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 58.0, |
|
"step": 18096 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.3276124596595764, |
|
"eval_runtime": 4.3463, |
|
"eval_samples_per_second": 63.732, |
|
"eval_steps_per_second": 8.053, |
|
"step": 18408 |
|
}, |
|
{ |
|
"best_epoch": 40, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 59.0, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 59.29, |
|
"learning_rate": 2.3504273504273504e-05, |
|
"loss": 0.2874, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7328519855595668, |
|
"eval_loss": 0.3288937509059906, |
|
"eval_runtime": 4.351, |
|
"eval_samples_per_second": 63.664, |
|
"eval_steps_per_second": 8.044, |
|
"step": 18720 |
|
}, |
|
{ |
|
"best_epoch": 40, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 60.0, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 18720, |
|
"total_flos": 6.96152728406016e+16, |
|
"train_loss": 0.33155403911557974, |
|
"train_runtime": 3997.9333, |
|
"train_samples_per_second": 37.369, |
|
"train_steps_per_second": 4.682 |
|
} |
|
], |
|
"max_steps": 18720, |
|
"num_train_epochs": 60, |
|
"total_flos": 6.96152728406016e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|