|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 18720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.516245487364621, |
|
"eval_loss": 0.37735894322395325, |
|
"eval_runtime": 4.3147, |
|
"eval_samples_per_second": 64.199, |
|
"eval_steps_per_second": 8.112, |
|
"step": 312 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.516245487364621, |
|
"epoch": 1.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.003893162393162393, |
|
"loss": 0.5343, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5018050541516246, |
|
"eval_loss": 0.35060495138168335, |
|
"eval_runtime": 4.3354, |
|
"eval_samples_per_second": 63.893, |
|
"eval_steps_per_second": 8.073, |
|
"step": 624 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.516245487364621, |
|
"epoch": 2.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.45752400159835815, |
|
"eval_runtime": 4.3375, |
|
"eval_samples_per_second": 63.862, |
|
"eval_steps_per_second": 8.069, |
|
"step": 936 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.516245487364621, |
|
"epoch": 3.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.0037863247863247863, |
|
"loss": 0.4659, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5306859205776173, |
|
"eval_loss": 0.3758547008037567, |
|
"eval_runtime": 4.3393, |
|
"eval_samples_per_second": 63.836, |
|
"eval_steps_per_second": 8.066, |
|
"step": 1248 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 4.0, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.0036794871794871794, |
|
"loss": 0.4691, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5812274368231047, |
|
"eval_loss": 0.3500436246395111, |
|
"eval_runtime": 4.3557, |
|
"eval_samples_per_second": 63.595, |
|
"eval_steps_per_second": 8.035, |
|
"step": 1560 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.5812274368231047, |
|
"epoch": 5.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5992779783393501, |
|
"eval_loss": 0.345672607421875, |
|
"eval_runtime": 4.342, |
|
"eval_samples_per_second": 63.796, |
|
"eval_steps_per_second": 8.061, |
|
"step": 1872 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5992779783393501, |
|
"epoch": 6.0, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.003572649572649573, |
|
"loss": 0.4442, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6101083032490975, |
|
"eval_loss": 0.34997570514678955, |
|
"eval_runtime": 4.338, |
|
"eval_samples_per_second": 63.854, |
|
"eval_steps_per_second": 8.068, |
|
"step": 2184 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.6101083032490975, |
|
"epoch": 7.0, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6173285198555957, |
|
"eval_loss": 0.3403359055519104, |
|
"eval_runtime": 4.3408, |
|
"eval_samples_per_second": 63.814, |
|
"eval_steps_per_second": 8.063, |
|
"step": 2496 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.6173285198555957, |
|
"epoch": 8.0, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.003465811965811966, |
|
"loss": 0.4366, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5776173285198556, |
|
"eval_loss": 0.38398295640945435, |
|
"eval_runtime": 4.3382, |
|
"eval_samples_per_second": 63.852, |
|
"eval_steps_per_second": 8.068, |
|
"step": 2808 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.6173285198555957, |
|
"epoch": 9.0, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.003358974358974359, |
|
"loss": 0.4097, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5487364620938628, |
|
"eval_loss": 0.43909069895744324, |
|
"eval_runtime": 4.3443, |
|
"eval_samples_per_second": 63.762, |
|
"eval_steps_per_second": 8.057, |
|
"step": 3120 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.6173285198555957, |
|
"epoch": 10.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6028880866425993, |
|
"eval_loss": 0.35843461751937866, |
|
"eval_runtime": 4.3422, |
|
"eval_samples_per_second": 63.793, |
|
"eval_steps_per_second": 8.06, |
|
"step": 3432 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.6173285198555957, |
|
"epoch": 11.0, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 0.0032521367521367523, |
|
"loss": 0.3922, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6498194945848376, |
|
"eval_loss": 0.33557796478271484, |
|
"eval_runtime": 4.3425, |
|
"eval_samples_per_second": 63.788, |
|
"eval_steps_per_second": 8.06, |
|
"step": 3744 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.6498194945848376, |
|
"epoch": 12.0, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.0031452991452991454, |
|
"loss": 0.3564, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6931407942238267, |
|
"eval_loss": 0.32750093936920166, |
|
"eval_runtime": 4.3335, |
|
"eval_samples_per_second": 63.921, |
|
"eval_steps_per_second": 8.077, |
|
"step": 4056 |
|
}, |
|
{ |
|
"best_epoch": 12, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 13.0, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.32828277349472046, |
|
"eval_runtime": 4.3334, |
|
"eval_samples_per_second": 63.922, |
|
"eval_steps_per_second": 8.077, |
|
"step": 4368 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.7075812274368231, |
|
"epoch": 14.0, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 0.0030384615384615385, |
|
"loss": 0.3343, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6462093862815884, |
|
"eval_loss": 0.3377060890197754, |
|
"eval_runtime": 4.3243, |
|
"eval_samples_per_second": 64.056, |
|
"eval_steps_per_second": 8.094, |
|
"step": 4680 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.7075812274368231, |
|
"epoch": 15.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6389891696750902, |
|
"eval_loss": 0.35497620701789856, |
|
"eval_runtime": 4.328, |
|
"eval_samples_per_second": 64.002, |
|
"eval_steps_per_second": 8.087, |
|
"step": 4992 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.7075812274368231, |
|
"epoch": 16.0, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 0.0029316239316239316, |
|
"loss": 0.335, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.3370417058467865, |
|
"eval_runtime": 4.3279, |
|
"eval_samples_per_second": 64.003, |
|
"eval_steps_per_second": 8.087, |
|
"step": 5304 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.7075812274368231, |
|
"epoch": 17.0, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 0.002824786324786325, |
|
"loss": 0.3233, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.6787003610108303, |
|
"eval_loss": 0.32560163736343384, |
|
"eval_runtime": 4.3346, |
|
"eval_samples_per_second": 63.904, |
|
"eval_steps_per_second": 8.075, |
|
"step": 5616 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.7075812274368231, |
|
"epoch": 18.0, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7111913357400722, |
|
"eval_loss": 0.31737250089645386, |
|
"eval_runtime": 4.3417, |
|
"eval_samples_per_second": 63.8, |
|
"eval_steps_per_second": 8.061, |
|
"step": 5928 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7111913357400722, |
|
"epoch": 19.0, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.0027179487179487182, |
|
"loss": 0.3232, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6642599277978339, |
|
"eval_loss": 0.34402281045913696, |
|
"eval_runtime": 4.3368, |
|
"eval_samples_per_second": 63.872, |
|
"eval_steps_per_second": 8.07, |
|
"step": 6240 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7111913357400722, |
|
"epoch": 20.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"learning_rate": 0.0026111111111111114, |
|
"loss": 0.3102, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.3374693691730499, |
|
"eval_runtime": 4.3363, |
|
"eval_samples_per_second": 63.879, |
|
"eval_steps_per_second": 8.071, |
|
"step": 6552 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7111913357400722, |
|
"epoch": 21.0, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.6787003610108303, |
|
"eval_loss": 0.3433131277561188, |
|
"eval_runtime": 4.3244, |
|
"eval_samples_per_second": 64.055, |
|
"eval_steps_per_second": 8.094, |
|
"step": 6864 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7111913357400722, |
|
"epoch": 22.0, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 0.0025042735042735045, |
|
"loss": 0.3064, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.6714801444043321, |
|
"eval_loss": 0.36901581287384033, |
|
"eval_runtime": 4.3278, |
|
"eval_samples_per_second": 64.005, |
|
"eval_steps_per_second": 8.087, |
|
"step": 7176 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7111913357400722, |
|
"epoch": 23.0, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.6931407942238267, |
|
"eval_loss": 0.3393624722957611, |
|
"eval_runtime": 4.332, |
|
"eval_samples_per_second": 63.943, |
|
"eval_steps_per_second": 8.079, |
|
"step": 7488 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7111913357400722, |
|
"epoch": 24.0, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 0.0023974358974358976, |
|
"loss": 0.3004, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.3376837372779846, |
|
"eval_runtime": 4.3275, |
|
"eval_samples_per_second": 64.009, |
|
"eval_steps_per_second": 8.088, |
|
"step": 7800 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 25.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.0022905982905982907, |
|
"loss": 0.2962, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.6750902527075813, |
|
"eval_loss": 0.34352612495422363, |
|
"eval_runtime": 4.336, |
|
"eval_samples_per_second": 63.884, |
|
"eval_steps_per_second": 8.072, |
|
"step": 8112 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.7256317689530686, |
|
"epoch": 26.0, |
|
"step": 8112 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7328519855595668, |
|
"eval_loss": 0.31822633743286133, |
|
"eval_runtime": 4.336, |
|
"eval_samples_per_second": 63.884, |
|
"eval_steps_per_second": 8.072, |
|
"step": 8424 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 27.0, |
|
"step": 8424 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 0.002183760683760684, |
|
"loss": 0.2937, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7111913357400722, |
|
"eval_loss": 0.3305753767490387, |
|
"eval_runtime": 4.3337, |
|
"eval_samples_per_second": 63.918, |
|
"eval_steps_per_second": 8.076, |
|
"step": 8736 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 28.0, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 0.0020769230769230773, |
|
"loss": 0.2905, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.3361673057079315, |
|
"eval_runtime": 4.3378, |
|
"eval_samples_per_second": 63.857, |
|
"eval_steps_per_second": 8.069, |
|
"step": 9048 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 29.0, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.6750902527075813, |
|
"eval_loss": 0.36750420928001404, |
|
"eval_runtime": 4.3383, |
|
"eval_samples_per_second": 63.85, |
|
"eval_steps_per_second": 8.068, |
|
"step": 9360 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 30.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 30.45, |
|
"learning_rate": 0.0019700854700854704, |
|
"loss": 0.2865, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.34060072898864746, |
|
"eval_runtime": 4.3406, |
|
"eval_samples_per_second": 63.816, |
|
"eval_steps_per_second": 8.063, |
|
"step": 9672 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 31.0, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.334277480840683, |
|
"eval_runtime": 4.3332, |
|
"eval_samples_per_second": 63.924, |
|
"eval_steps_per_second": 8.077, |
|
"step": 9984 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 32.0, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.0018632478632478633, |
|
"loss": 0.2812, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.6859205776173285, |
|
"eval_loss": 0.34722989797592163, |
|
"eval_runtime": 4.3283, |
|
"eval_samples_per_second": 63.998, |
|
"eval_steps_per_second": 8.086, |
|
"step": 10296 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 33.0, |
|
"step": 10296 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"learning_rate": 0.0017564102564102564, |
|
"loss": 0.2727, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.337159126996994, |
|
"eval_runtime": 4.3326, |
|
"eval_samples_per_second": 63.933, |
|
"eval_steps_per_second": 8.078, |
|
"step": 10608 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 34.0, |
|
"step": 10608 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.35748186707496643, |
|
"eval_runtime": 4.3343, |
|
"eval_samples_per_second": 63.909, |
|
"eval_steps_per_second": 8.075, |
|
"step": 10920 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 35.0, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 0.0016495726495726495, |
|
"loss": 0.2735, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.3299775719642639, |
|
"eval_runtime": 4.3421, |
|
"eval_samples_per_second": 63.794, |
|
"eval_steps_per_second": 8.061, |
|
"step": 11232 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 36.0, |
|
"step": 11232 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 0.0015427350427350429, |
|
"loss": 0.2701, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.6967509025270758, |
|
"eval_loss": 0.3585418164730072, |
|
"eval_runtime": 4.3353, |
|
"eval_samples_per_second": 63.894, |
|
"eval_steps_per_second": 8.073, |
|
"step": 11544 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 37.0, |
|
"step": 11544 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.3422161340713501, |
|
"eval_runtime": 4.3273, |
|
"eval_samples_per_second": 64.012, |
|
"eval_steps_per_second": 8.088, |
|
"step": 11856 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 38.0, |
|
"step": 11856 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.001435897435897436, |
|
"loss": 0.2688, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.6931407942238267, |
|
"eval_loss": 0.3579197824001312, |
|
"eval_runtime": 4.3267, |
|
"eval_samples_per_second": 64.021, |
|
"eval_steps_per_second": 8.089, |
|
"step": 12168 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 39.0, |
|
"step": 12168 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.33261609077453613, |
|
"eval_runtime": 4.3229, |
|
"eval_samples_per_second": 64.078, |
|
"eval_steps_per_second": 8.097, |
|
"step": 12480 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 40.0, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 40.06, |
|
"learning_rate": 0.001329059829059829, |
|
"loss": 0.2644, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.3463744819164276, |
|
"eval_runtime": 4.3279, |
|
"eval_samples_per_second": 64.003, |
|
"eval_steps_per_second": 8.087, |
|
"step": 12792 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 41.0, |
|
"step": 12792 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.0012222222222222224, |
|
"loss": 0.2637, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.6931407942238267, |
|
"eval_loss": 0.35785871744155884, |
|
"eval_runtime": 4.3221, |
|
"eval_samples_per_second": 64.089, |
|
"eval_steps_per_second": 8.098, |
|
"step": 13104 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 42.0, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.3489207327365875, |
|
"eval_runtime": 4.3156, |
|
"eval_samples_per_second": 64.186, |
|
"eval_steps_per_second": 8.11, |
|
"step": 13416 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 43.0, |
|
"step": 13416 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"learning_rate": 0.0011153846153846155, |
|
"loss": 0.26, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.3438562750816345, |
|
"eval_runtime": 4.3177, |
|
"eval_samples_per_second": 64.154, |
|
"eval_steps_per_second": 8.106, |
|
"step": 13728 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 44.0, |
|
"step": 13728 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 0.0010085470085470086, |
|
"loss": 0.2582, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.7003610108303249, |
|
"eval_loss": 0.3585074543952942, |
|
"eval_runtime": 4.3123, |
|
"eval_samples_per_second": 64.235, |
|
"eval_steps_per_second": 8.116, |
|
"step": 14040 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 45.0, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.35347476601600647, |
|
"eval_runtime": 4.3136, |
|
"eval_samples_per_second": 64.215, |
|
"eval_steps_per_second": 8.114, |
|
"step": 14352 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 46.0, |
|
"step": 14352 |
|
}, |
|
{ |
|
"epoch": 46.47, |
|
"learning_rate": 0.0009017094017094017, |
|
"loss": 0.2533, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.34402725100517273, |
|
"eval_runtime": 4.315, |
|
"eval_samples_per_second": 64.194, |
|
"eval_steps_per_second": 8.111, |
|
"step": 14664 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 47.0, |
|
"step": 14664 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.3505990207195282, |
|
"eval_runtime": 4.3143, |
|
"eval_samples_per_second": 64.206, |
|
"eval_steps_per_second": 8.113, |
|
"step": 14976 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 48.0, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 0.0007948717948717948, |
|
"loss": 0.2535, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.35185110569000244, |
|
"eval_runtime": 4.3151, |
|
"eval_samples_per_second": 64.193, |
|
"eval_steps_per_second": 8.111, |
|
"step": 15288 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 49.0, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 49.68, |
|
"learning_rate": 0.0006880341880341881, |
|
"loss": 0.2498, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.6931407942238267, |
|
"eval_loss": 0.3456619083881378, |
|
"eval_runtime": 4.3131, |
|
"eval_samples_per_second": 64.223, |
|
"eval_steps_per_second": 8.115, |
|
"step": 15600 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 50.0, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.7111913357400722, |
|
"eval_loss": 0.34943750500679016, |
|
"eval_runtime": 4.314, |
|
"eval_samples_per_second": 64.21, |
|
"eval_steps_per_second": 8.113, |
|
"step": 15912 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 51.0, |
|
"step": 15912 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 0.0005811965811965813, |
|
"loss": 0.2504, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.3430747985839844, |
|
"eval_runtime": 4.3163, |
|
"eval_samples_per_second": 64.176, |
|
"eval_steps_per_second": 8.109, |
|
"step": 16224 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 52.0, |
|
"step": 16224 |
|
}, |
|
{ |
|
"epoch": 52.88, |
|
"learning_rate": 0.00047435897435897434, |
|
"loss": 0.2499, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.34503889083862305, |
|
"eval_runtime": 4.3129, |
|
"eval_samples_per_second": 64.226, |
|
"eval_steps_per_second": 8.115, |
|
"step": 16536 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 53.0, |
|
"step": 16536 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.3484850227832794, |
|
"eval_runtime": 4.3131, |
|
"eval_samples_per_second": 64.223, |
|
"eval_steps_per_second": 8.115, |
|
"step": 16848 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 54.0, |
|
"step": 16848 |
|
}, |
|
{ |
|
"epoch": 54.49, |
|
"learning_rate": 0.00036752136752136755, |
|
"loss": 0.2488, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.7003610108303249, |
|
"eval_loss": 0.3436543345451355, |
|
"eval_runtime": 4.3132, |
|
"eval_samples_per_second": 64.222, |
|
"eval_steps_per_second": 8.115, |
|
"step": 17160 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 55.0, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.7003610108303249, |
|
"eval_loss": 0.3464801013469696, |
|
"eval_runtime": 4.3155, |
|
"eval_samples_per_second": 64.187, |
|
"eval_steps_per_second": 8.11, |
|
"step": 17472 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 56.0, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 56.09, |
|
"learning_rate": 0.0002606837606837607, |
|
"loss": 0.2479, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.34788957238197327, |
|
"eval_runtime": 4.3164, |
|
"eval_samples_per_second": 64.173, |
|
"eval_steps_per_second": 8.109, |
|
"step": 17784 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 57.0, |
|
"step": 17784 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 0.00015384615384615385, |
|
"loss": 0.247, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.7003610108303249, |
|
"eval_loss": 0.34465810656547546, |
|
"eval_runtime": 4.3158, |
|
"eval_samples_per_second": 64.183, |
|
"eval_steps_per_second": 8.11, |
|
"step": 18096 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 58.0, |
|
"step": 18096 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.7003610108303249, |
|
"eval_loss": 0.3520893454551697, |
|
"eval_runtime": 4.3122, |
|
"eval_samples_per_second": 64.236, |
|
"eval_steps_per_second": 8.116, |
|
"step": 18408 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 59.0, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 59.29, |
|
"learning_rate": 4.700854700854701e-05, |
|
"loss": 0.2468, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.6967509025270758, |
|
"eval_loss": 0.3493305444717407, |
|
"eval_runtime": 4.3274, |
|
"eval_samples_per_second": 64.011, |
|
"eval_steps_per_second": 8.088, |
|
"step": 18720 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 60.0, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 18720, |
|
"total_flos": 6.96152728406016e+16, |
|
"train_loss": 0.3118535759102585, |
|
"train_runtime": 3964.0197, |
|
"train_samples_per_second": 37.689, |
|
"train_steps_per_second": 4.722 |
|
} |
|
], |
|
"max_steps": 18720, |
|
"num_train_epochs": 60, |
|
"total_flos": 6.96152728406016e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|