|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 18720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5234657039711191, |
|
"eval_loss": 0.5125718116760254, |
|
"eval_runtime": 8.8736, |
|
"eval_samples_per_second": 31.216, |
|
"eval_steps_per_second": 3.944, |
|
"step": 312 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5234657039711191, |
|
"epoch": 1.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.002919871794871795, |
|
"loss": 0.5126, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.47653429602888087, |
|
"eval_loss": 0.38242238759994507, |
|
"eval_runtime": 8.7725, |
|
"eval_samples_per_second": 31.576, |
|
"eval_steps_per_second": 3.99, |
|
"step": 624 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5234657039711191, |
|
"epoch": 2.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.49097472924187724, |
|
"eval_loss": 0.3692016005516052, |
|
"eval_runtime": 8.7845, |
|
"eval_samples_per_second": 31.533, |
|
"eval_steps_per_second": 3.984, |
|
"step": 936 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5234657039711191, |
|
"epoch": 3.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.0028397435897435895, |
|
"loss": 0.4613, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5342960288808665, |
|
"eval_loss": 0.3940806984901428, |
|
"eval_runtime": 8.8274, |
|
"eval_samples_per_second": 31.38, |
|
"eval_steps_per_second": 3.965, |
|
"step": 1248 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 4.0, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.0027596153846153847, |
|
"loss": 0.446, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.6773150563240051, |
|
"eval_runtime": 8.9686, |
|
"eval_samples_per_second": 30.885, |
|
"eval_steps_per_second": 3.902, |
|
"step": 1560 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 5.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.5515874028205872, |
|
"eval_runtime": 9.1762, |
|
"eval_samples_per_second": 30.187, |
|
"eval_steps_per_second": 3.814, |
|
"step": 1872 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 6.0, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.00267948717948718, |
|
"loss": 0.4477, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.51985559566787, |
|
"eval_loss": 0.3517453372478485, |
|
"eval_runtime": 9.1384, |
|
"eval_samples_per_second": 30.312, |
|
"eval_steps_per_second": 3.83, |
|
"step": 2184 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 7.0, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.49097472924187724, |
|
"eval_loss": 0.37720802426338196, |
|
"eval_runtime": 9.0573, |
|
"eval_samples_per_second": 30.583, |
|
"eval_steps_per_second": 3.864, |
|
"step": 2496 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 8.0, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.0025993589743589745, |
|
"loss": 0.4263, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.48375451263537905, |
|
"eval_loss": 0.36901482939720154, |
|
"eval_runtime": 9.0499, |
|
"eval_samples_per_second": 30.608, |
|
"eval_steps_per_second": 3.867, |
|
"step": 2808 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 9.0, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.0025192307692307693, |
|
"loss": 0.4397, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.48375451263537905, |
|
"eval_loss": 0.35119369626045227, |
|
"eval_runtime": 9.0328, |
|
"eval_samples_per_second": 30.666, |
|
"eval_steps_per_second": 3.875, |
|
"step": 3120 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5342960288808665, |
|
"epoch": 10.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.5379061371841155, |
|
"eval_loss": 0.47159671783447266, |
|
"eval_runtime": 9.0616, |
|
"eval_samples_per_second": 30.569, |
|
"eval_steps_per_second": 3.862, |
|
"step": 3432 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.5379061371841155, |
|
"epoch": 11.0, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 0.002439102564102564, |
|
"loss": 0.4425, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6570397111913358, |
|
"eval_loss": 0.36053842306137085, |
|
"eval_runtime": 9.0613, |
|
"eval_samples_per_second": 30.57, |
|
"eval_steps_per_second": 3.863, |
|
"step": 3744 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.6570397111913358, |
|
"epoch": 12.0, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.002358974358974359, |
|
"loss": 0.4269, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.5379061371841155, |
|
"eval_loss": 0.3570541739463806, |
|
"eval_runtime": 9.0666, |
|
"eval_samples_per_second": 30.552, |
|
"eval_steps_per_second": 3.86, |
|
"step": 4056 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.6570397111913358, |
|
"epoch": 13.0, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.48375451263537905, |
|
"eval_loss": 0.3544716238975525, |
|
"eval_runtime": 9.1496, |
|
"eval_samples_per_second": 30.274, |
|
"eval_steps_per_second": 3.825, |
|
"step": 4368 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.6570397111913358, |
|
"epoch": 14.0, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 0.002278846153846154, |
|
"loss": 0.3975, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6498194945848376, |
|
"eval_loss": 0.37444454431533813, |
|
"eval_runtime": 9.0245, |
|
"eval_samples_per_second": 30.694, |
|
"eval_steps_per_second": 3.878, |
|
"step": 4680 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.6570397111913358, |
|
"epoch": 15.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6606498194945848, |
|
"eval_loss": 0.35777392983436584, |
|
"eval_runtime": 9.0439, |
|
"eval_samples_per_second": 30.628, |
|
"eval_steps_per_second": 3.87, |
|
"step": 4992 |
|
}, |
|
{ |
|
"best_epoch": 15, |
|
"best_eval_accuracy": 0.6606498194945848, |
|
"epoch": 16.0, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 0.0021987179487179486, |
|
"loss": 0.3906, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6931407942238267, |
|
"eval_loss": 0.37044015526771545, |
|
"eval_runtime": 9.0352, |
|
"eval_samples_per_second": 30.658, |
|
"eval_steps_per_second": 3.874, |
|
"step": 5304 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 17.0, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 0.0021185897435897437, |
|
"loss": 0.3633, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.6064981949458483, |
|
"eval_loss": 0.3355711102485657, |
|
"eval_runtime": 9.076, |
|
"eval_samples_per_second": 30.52, |
|
"eval_steps_per_second": 3.856, |
|
"step": 5616 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 18.0, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.6064981949458483, |
|
"eval_loss": 0.3397271931171417, |
|
"eval_runtime": 9.0579, |
|
"eval_samples_per_second": 30.581, |
|
"eval_steps_per_second": 3.864, |
|
"step": 5928 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 19.0, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.0020384615384615385, |
|
"loss": 0.3604, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6931407942238267, |
|
"eval_loss": 0.3809119760990143, |
|
"eval_runtime": 9.0061, |
|
"eval_samples_per_second": 30.757, |
|
"eval_steps_per_second": 3.886, |
|
"step": 6240 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 20.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"learning_rate": 0.0019583333333333336, |
|
"loss": 0.3565, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6787003610108303, |
|
"eval_loss": 0.33565953373908997, |
|
"eval_runtime": 9.0589, |
|
"eval_samples_per_second": 30.578, |
|
"eval_steps_per_second": 3.864, |
|
"step": 6552 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 21.0, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.6209386281588448, |
|
"eval_loss": 0.38026997447013855, |
|
"eval_runtime": 9.0356, |
|
"eval_samples_per_second": 30.656, |
|
"eval_steps_per_second": 3.874, |
|
"step": 6864 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 22.0, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 0.0018782051282051281, |
|
"loss": 0.3533, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.6750902527075813, |
|
"eval_loss": 0.37539029121398926, |
|
"eval_runtime": 9.0595, |
|
"eval_samples_per_second": 30.576, |
|
"eval_steps_per_second": 3.863, |
|
"step": 7176 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 23.0, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.6353790613718412, |
|
"eval_loss": 0.3304370641708374, |
|
"eval_runtime": 9.2561, |
|
"eval_samples_per_second": 29.926, |
|
"eval_steps_per_second": 3.781, |
|
"step": 7488 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 24.0, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 0.001798076923076923, |
|
"loss": 0.3462, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.6967509025270758, |
|
"eval_loss": 0.36999332904815674, |
|
"eval_runtime": 9.2621, |
|
"eval_samples_per_second": 29.907, |
|
"eval_steps_per_second": 3.779, |
|
"step": 7800 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 25.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.0017179487179487178, |
|
"loss": 0.3432, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.333748996257782, |
|
"eval_runtime": 9.203, |
|
"eval_samples_per_second": 30.099, |
|
"eval_steps_per_second": 3.803, |
|
"step": 8112 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.7148014440433214, |
|
"epoch": 26.0, |
|
"step": 8112 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.6967509025270758, |
|
"eval_loss": 0.3289393484592438, |
|
"eval_runtime": 9.2259, |
|
"eval_samples_per_second": 30.024, |
|
"eval_steps_per_second": 3.794, |
|
"step": 8424 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.7148014440433214, |
|
"epoch": 27.0, |
|
"step": 8424 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 0.0016378205128205127, |
|
"loss": 0.3409, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.3340049684047699, |
|
"eval_runtime": 9.2073, |
|
"eval_samples_per_second": 30.085, |
|
"eval_steps_per_second": 3.801, |
|
"step": 8736 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.7148014440433214, |
|
"epoch": 28.0, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 0.0015576923076923079, |
|
"loss": 0.3381, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7220216606498195, |
|
"eval_loss": 0.3466947674751282, |
|
"eval_runtime": 9.2066, |
|
"eval_samples_per_second": 30.087, |
|
"eval_steps_per_second": 3.802, |
|
"step": 9048 |
|
}, |
|
{ |
|
"best_epoch": 28, |
|
"best_eval_accuracy": 0.7220216606498195, |
|
"epoch": 29.0, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.6823104693140795, |
|
"eval_loss": 0.3859948515892029, |
|
"eval_runtime": 9.1838, |
|
"eval_samples_per_second": 30.162, |
|
"eval_steps_per_second": 3.811, |
|
"step": 9360 |
|
}, |
|
{ |
|
"best_epoch": 28, |
|
"best_eval_accuracy": 0.7220216606498195, |
|
"epoch": 30.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 30.45, |
|
"learning_rate": 0.0014775641025641026, |
|
"loss": 0.337, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.6931407942238267, |
|
"eval_loss": 0.3795461356639862, |
|
"eval_runtime": 9.2077, |
|
"eval_samples_per_second": 30.083, |
|
"eval_steps_per_second": 3.801, |
|
"step": 9672 |
|
}, |
|
{ |
|
"best_epoch": 28, |
|
"best_eval_accuracy": 0.7220216606498195, |
|
"epoch": 31.0, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7184115523465704, |
|
"eval_loss": 0.3755357563495636, |
|
"eval_runtime": 9.142, |
|
"eval_samples_per_second": 30.3, |
|
"eval_steps_per_second": 3.828, |
|
"step": 9984 |
|
}, |
|
{ |
|
"best_epoch": 28, |
|
"best_eval_accuracy": 0.7220216606498195, |
|
"epoch": 32.0, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.0013974358974358976, |
|
"loss": 0.334, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7111913357400722, |
|
"eval_loss": 0.3529473543167114, |
|
"eval_runtime": 9.0973, |
|
"eval_samples_per_second": 30.449, |
|
"eval_steps_per_second": 3.847, |
|
"step": 10296 |
|
}, |
|
{ |
|
"best_epoch": 28, |
|
"best_eval_accuracy": 0.7220216606498195, |
|
"epoch": 33.0, |
|
"step": 10296 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"learning_rate": 0.0013173076923076923, |
|
"loss": 0.3321, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.33890479803085327, |
|
"eval_runtime": 9.1599, |
|
"eval_samples_per_second": 30.24, |
|
"eval_steps_per_second": 3.821, |
|
"step": 10608 |
|
}, |
|
{ |
|
"best_epoch": 28, |
|
"best_eval_accuracy": 0.7220216606498195, |
|
"epoch": 34.0, |
|
"step": 10608 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.3260202407836914, |
|
"eval_runtime": 9.3959, |
|
"eval_samples_per_second": 29.481, |
|
"eval_steps_per_second": 3.725, |
|
"step": 10920 |
|
}, |
|
{ |
|
"best_epoch": 28, |
|
"best_eval_accuracy": 0.7220216606498195, |
|
"epoch": 35.0, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 0.0012371794871794872, |
|
"loss": 0.3315, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7328519855595668, |
|
"eval_loss": 0.35185953974723816, |
|
"eval_runtime": 9.0108, |
|
"eval_samples_per_second": 30.741, |
|
"eval_steps_per_second": 3.884, |
|
"step": 11232 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 36.0, |
|
"step": 11232 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 0.0011570512820512822, |
|
"loss": 0.3317, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.6967509025270758, |
|
"eval_loss": 0.3741394877433777, |
|
"eval_runtime": 8.9889, |
|
"eval_samples_per_second": 30.816, |
|
"eval_steps_per_second": 3.894, |
|
"step": 11544 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 37.0, |
|
"step": 11544 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.7111913357400722, |
|
"eval_loss": 0.33644017577171326, |
|
"eval_runtime": 9.1679, |
|
"eval_samples_per_second": 30.214, |
|
"eval_steps_per_second": 3.818, |
|
"step": 11856 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 38.0, |
|
"step": 11856 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.0010769230769230769, |
|
"loss": 0.325, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.34382882714271545, |
|
"eval_runtime": 9.0452, |
|
"eval_samples_per_second": 30.624, |
|
"eval_steps_per_second": 3.869, |
|
"step": 12168 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 39.0, |
|
"step": 12168 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.3461564779281616, |
|
"eval_runtime": 8.9087, |
|
"eval_samples_per_second": 31.093, |
|
"eval_steps_per_second": 3.929, |
|
"step": 12480 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 40.0, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 40.06, |
|
"learning_rate": 0.0009967948717948718, |
|
"loss": 0.3282, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.33443841338157654, |
|
"eval_runtime": 9.0016, |
|
"eval_samples_per_second": 30.772, |
|
"eval_steps_per_second": 3.888, |
|
"step": 12792 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 41.0, |
|
"step": 12792 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.0009166666666666668, |
|
"loss": 0.3251, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.328012079000473, |
|
"eval_runtime": 8.8448, |
|
"eval_samples_per_second": 31.318, |
|
"eval_steps_per_second": 3.957, |
|
"step": 13104 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 42.0, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.35438141226768494, |
|
"eval_runtime": 8.8382, |
|
"eval_samples_per_second": 31.341, |
|
"eval_steps_per_second": 3.96, |
|
"step": 13416 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 43.0, |
|
"step": 13416 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"learning_rate": 0.0008365384615384616, |
|
"loss": 0.3223, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.34875577688217163, |
|
"eval_runtime": 8.7814, |
|
"eval_samples_per_second": 31.544, |
|
"eval_steps_per_second": 3.986, |
|
"step": 13728 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 44.0, |
|
"step": 13728 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 0.0007564102564102564, |
|
"loss": 0.3215, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.7220216606498195, |
|
"eval_loss": 0.3436548113822937, |
|
"eval_runtime": 8.7944, |
|
"eval_samples_per_second": 31.497, |
|
"eval_steps_per_second": 3.98, |
|
"step": 14040 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 45.0, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.7220216606498195, |
|
"eval_loss": 0.34295418858528137, |
|
"eval_runtime": 8.7788, |
|
"eval_samples_per_second": 31.553, |
|
"eval_steps_per_second": 3.987, |
|
"step": 14352 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 46.0, |
|
"step": 14352 |
|
}, |
|
{ |
|
"epoch": 46.47, |
|
"learning_rate": 0.0006762820512820514, |
|
"loss": 0.3205, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.33936554193496704, |
|
"eval_runtime": 8.8644, |
|
"eval_samples_per_second": 31.249, |
|
"eval_steps_per_second": 3.948, |
|
"step": 14664 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 47.0, |
|
"step": 14664 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.3676346242427826, |
|
"eval_runtime": 8.8385, |
|
"eval_samples_per_second": 31.34, |
|
"eval_steps_per_second": 3.96, |
|
"step": 14976 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7328519855595668, |
|
"epoch": 48.0, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 0.0005961538461538461, |
|
"loss": 0.3163, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.3486822545528412, |
|
"eval_runtime": 8.8462, |
|
"eval_samples_per_second": 31.313, |
|
"eval_steps_per_second": 3.956, |
|
"step": 15288 |
|
}, |
|
{ |
|
"best_epoch": 48, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 49.0, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 49.68, |
|
"learning_rate": 0.000516025641025641, |
|
"loss": 0.3154, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.33869412541389465, |
|
"eval_runtime": 8.9668, |
|
"eval_samples_per_second": 30.892, |
|
"eval_steps_per_second": 3.903, |
|
"step": 15600 |
|
}, |
|
{ |
|
"best_epoch": 48, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 50.0, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.3448237478733063, |
|
"eval_runtime": 8.8551, |
|
"eval_samples_per_second": 31.281, |
|
"eval_steps_per_second": 3.953, |
|
"step": 15912 |
|
}, |
|
{ |
|
"best_epoch": 48, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 51.0, |
|
"step": 15912 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 0.00043589743589743596, |
|
"loss": 0.3164, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.7220216606498195, |
|
"eval_loss": 0.33612075448036194, |
|
"eval_runtime": 8.8349, |
|
"eval_samples_per_second": 31.353, |
|
"eval_steps_per_second": 3.962, |
|
"step": 16224 |
|
}, |
|
{ |
|
"best_epoch": 48, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 52.0, |
|
"step": 16224 |
|
}, |
|
{ |
|
"epoch": 52.88, |
|
"learning_rate": 0.00035576923076923074, |
|
"loss": 0.3153, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.367554634809494, |
|
"eval_runtime": 8.8354, |
|
"eval_samples_per_second": 31.351, |
|
"eval_steps_per_second": 3.961, |
|
"step": 16536 |
|
}, |
|
{ |
|
"best_epoch": 48, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 53.0, |
|
"step": 16536 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.3463309407234192, |
|
"eval_runtime": 8.8416, |
|
"eval_samples_per_second": 31.329, |
|
"eval_steps_per_second": 3.959, |
|
"step": 16848 |
|
}, |
|
{ |
|
"best_epoch": 48, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 54.0, |
|
"step": 16848 |
|
}, |
|
{ |
|
"epoch": 54.49, |
|
"learning_rate": 0.0002756410256410257, |
|
"loss": 0.3145, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.7328519855595668, |
|
"eval_loss": 0.34912317991256714, |
|
"eval_runtime": 8.8368, |
|
"eval_samples_per_second": 31.346, |
|
"eval_steps_per_second": 3.961, |
|
"step": 17160 |
|
}, |
|
{ |
|
"best_epoch": 48, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 55.0, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.3599321246147156, |
|
"eval_runtime": 8.8319, |
|
"eval_samples_per_second": 31.363, |
|
"eval_steps_per_second": 3.963, |
|
"step": 17472 |
|
}, |
|
{ |
|
"best_epoch": 48, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 56.0, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 56.09, |
|
"learning_rate": 0.00019551282051282054, |
|
"loss": 0.3151, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.3457310199737549, |
|
"eval_runtime": 8.9008, |
|
"eval_samples_per_second": 31.121, |
|
"eval_steps_per_second": 3.932, |
|
"step": 17784 |
|
}, |
|
{ |
|
"best_epoch": 48, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 57.0, |
|
"step": 17784 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 0.0001153846153846154, |
|
"loss": 0.3103, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.7220216606498195, |
|
"eval_loss": 0.34887006878852844, |
|
"eval_runtime": 8.8994, |
|
"eval_samples_per_second": 31.126, |
|
"eval_steps_per_second": 3.933, |
|
"step": 18096 |
|
}, |
|
{ |
|
"best_epoch": 48, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 58.0, |
|
"step": 18096 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.348056823015213, |
|
"eval_runtime": 8.9031, |
|
"eval_samples_per_second": 31.113, |
|
"eval_steps_per_second": 3.931, |
|
"step": 18408 |
|
}, |
|
{ |
|
"best_epoch": 48, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 59.0, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 59.29, |
|
"learning_rate": 3.5256410256410254e-05, |
|
"loss": 0.314, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.35035568475723267, |
|
"eval_runtime": 8.8809, |
|
"eval_samples_per_second": 31.191, |
|
"eval_steps_per_second": 3.941, |
|
"step": 18720 |
|
}, |
|
{ |
|
"best_epoch": 48, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 60.0, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 18720, |
|
"total_flos": 6.96152728406016e+16, |
|
"train_loss": 0.3594475073692126, |
|
"train_runtime": 8160.6004, |
|
"train_samples_per_second": 18.307, |
|
"train_steps_per_second": 2.294 |
|
} |
|
], |
|
"max_steps": 18720, |
|
"num_train_epochs": 60, |
|
"total_flos": 6.96152728406016e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|