|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 80.0, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.53, |
|
"eval_loss": 0.6182658672332764, |
|
"eval_runtime": 2.964, |
|
"eval_samples_per_second": 33.738, |
|
"eval_steps_per_second": 4.386, |
|
"step": 25 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.53, |
|
"epoch": 1.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.41887640953063965, |
|
"eval_runtime": 3.0102, |
|
"eval_samples_per_second": 33.22, |
|
"eval_steps_per_second": 4.319, |
|
"step": 50 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.62, |
|
"epoch": 2.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.43512362241744995, |
|
"eval_runtime": 3.0581, |
|
"eval_samples_per_second": 32.7, |
|
"eval_steps_per_second": 4.251, |
|
"step": 75 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.62, |
|
"epoch": 3.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.41805171966552734, |
|
"eval_runtime": 3.0736, |
|
"eval_samples_per_second": 32.536, |
|
"eval_steps_per_second": 4.23, |
|
"step": 100 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.62, |
|
"epoch": 4.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.41053706407546997, |
|
"eval_runtime": 3.0847, |
|
"eval_samples_per_second": 32.418, |
|
"eval_steps_per_second": 4.214, |
|
"step": 125 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.62, |
|
"epoch": 5.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.41397789120674133, |
|
"eval_runtime": 3.0887, |
|
"eval_samples_per_second": 32.376, |
|
"eval_steps_per_second": 4.209, |
|
"step": 150 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.63, |
|
"epoch": 6.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.40520820021629333, |
|
"eval_runtime": 3.0892, |
|
"eval_samples_per_second": 32.371, |
|
"eval_steps_per_second": 4.208, |
|
"step": 175 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 7.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.43216773867607117, |
|
"eval_runtime": 3.0856, |
|
"eval_samples_per_second": 32.408, |
|
"eval_steps_per_second": 4.213, |
|
"step": 200 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 8.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.41, |
|
"eval_loss": 0.4364350438117981, |
|
"eval_runtime": 3.0898, |
|
"eval_samples_per_second": 32.365, |
|
"eval_steps_per_second": 4.207, |
|
"step": 225 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 9.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 0.42465832829475403, |
|
"eval_runtime": 3.0783, |
|
"eval_samples_per_second": 32.485, |
|
"eval_steps_per_second": 4.223, |
|
"step": 250 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 10.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.53, |
|
"eval_loss": 0.42610007524490356, |
|
"eval_runtime": 3.0769, |
|
"eval_samples_per_second": 32.5, |
|
"eval_steps_per_second": 4.225, |
|
"step": 275 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 11.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.41756904125213623, |
|
"eval_runtime": 3.0756, |
|
"eval_samples_per_second": 32.514, |
|
"eval_steps_per_second": 4.227, |
|
"step": 300 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 12.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.4107968211174011, |
|
"eval_runtime": 3.0727, |
|
"eval_samples_per_second": 32.544, |
|
"eval_steps_per_second": 4.231, |
|
"step": 325 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 13.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.51, |
|
"eval_loss": 0.43049681186676025, |
|
"eval_runtime": 3.0737, |
|
"eval_samples_per_second": 32.534, |
|
"eval_steps_per_second": 4.229, |
|
"step": 350 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 14.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.40635767579078674, |
|
"eval_runtime": 3.0804, |
|
"eval_samples_per_second": 32.463, |
|
"eval_steps_per_second": 4.22, |
|
"step": 375 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 15.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.40317821502685547, |
|
"eval_runtime": 3.0757, |
|
"eval_samples_per_second": 32.513, |
|
"eval_steps_per_second": 4.227, |
|
"step": 400 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 16.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.40984559059143066, |
|
"eval_runtime": 3.0757, |
|
"eval_samples_per_second": 32.513, |
|
"eval_steps_per_second": 4.227, |
|
"step": 425 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 17.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.41323602199554443, |
|
"eval_runtime": 3.0748, |
|
"eval_samples_per_second": 32.522, |
|
"eval_steps_per_second": 4.228, |
|
"step": 450 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 18.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.3924804627895355, |
|
"eval_runtime": 3.0886, |
|
"eval_samples_per_second": 32.377, |
|
"eval_steps_per_second": 4.209, |
|
"step": 475 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 19.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.015, |
|
"loss": 0.7171, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.3957255482673645, |
|
"eval_runtime": 3.0914, |
|
"eval_samples_per_second": 32.348, |
|
"eval_steps_per_second": 4.205, |
|
"step": 500 |
|
}, |
|
{ |
|
"best_epoch": 19, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 20.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.42917293310165405, |
|
"eval_runtime": 3.0689, |
|
"eval_samples_per_second": 32.585, |
|
"eval_steps_per_second": 4.236, |
|
"step": 525 |
|
}, |
|
{ |
|
"best_epoch": 19, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 21.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.40250298380851746, |
|
"eval_runtime": 3.0667, |
|
"eval_samples_per_second": 32.608, |
|
"eval_steps_per_second": 4.239, |
|
"step": 550 |
|
}, |
|
{ |
|
"best_epoch": 19, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 22.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.3997068703174591, |
|
"eval_runtime": 3.0617, |
|
"eval_samples_per_second": 32.662, |
|
"eval_steps_per_second": 4.246, |
|
"step": 575 |
|
}, |
|
{ |
|
"best_epoch": 19, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 23.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.4115046560764313, |
|
"eval_runtime": 3.0625, |
|
"eval_samples_per_second": 32.653, |
|
"eval_steps_per_second": 4.245, |
|
"step": 600 |
|
}, |
|
{ |
|
"best_epoch": 19, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 24.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.40437012910842896, |
|
"eval_runtime": 3.062, |
|
"eval_samples_per_second": 32.659, |
|
"eval_steps_per_second": 4.246, |
|
"step": 625 |
|
}, |
|
{ |
|
"best_epoch": 19, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 25.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.409763902425766, |
|
"eval_runtime": 3.0623, |
|
"eval_samples_per_second": 32.655, |
|
"eval_steps_per_second": 4.245, |
|
"step": 650 |
|
}, |
|
{ |
|
"best_epoch": 19, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 26.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.40510663390159607, |
|
"eval_runtime": 3.0617, |
|
"eval_samples_per_second": 32.661, |
|
"eval_steps_per_second": 4.246, |
|
"step": 675 |
|
}, |
|
{ |
|
"best_epoch": 19, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 27.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.42438602447509766, |
|
"eval_runtime": 3.0608, |
|
"eval_samples_per_second": 32.671, |
|
"eval_steps_per_second": 4.247, |
|
"step": 700 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 28.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.4031755328178406, |
|
"eval_runtime": 3.0609, |
|
"eval_samples_per_second": 32.67, |
|
"eval_steps_per_second": 4.247, |
|
"step": 725 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 29.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.413577675819397, |
|
"eval_runtime": 3.0625, |
|
"eval_samples_per_second": 32.653, |
|
"eval_steps_per_second": 4.245, |
|
"step": 750 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 30.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.3992563486099243, |
|
"eval_runtime": 3.064, |
|
"eval_samples_per_second": 32.637, |
|
"eval_steps_per_second": 4.243, |
|
"step": 775 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 31.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.417043000459671, |
|
"eval_runtime": 3.0628, |
|
"eval_samples_per_second": 32.65, |
|
"eval_steps_per_second": 4.245, |
|
"step": 800 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 32.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.40380868315696716, |
|
"eval_runtime": 3.0665, |
|
"eval_samples_per_second": 32.611, |
|
"eval_steps_per_second": 4.239, |
|
"step": 825 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 33.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.42510226368904114, |
|
"eval_runtime": 3.0667, |
|
"eval_samples_per_second": 32.608, |
|
"eval_steps_per_second": 4.239, |
|
"step": 850 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 34.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.4078834056854248, |
|
"eval_runtime": 3.0638, |
|
"eval_samples_per_second": 32.639, |
|
"eval_steps_per_second": 4.243, |
|
"step": 875 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 35.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.4119352102279663, |
|
"eval_runtime": 3.0725, |
|
"eval_samples_per_second": 32.547, |
|
"eval_steps_per_second": 4.231, |
|
"step": 900 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 36.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.4074689745903015, |
|
"eval_runtime": 3.0606, |
|
"eval_samples_per_second": 32.674, |
|
"eval_steps_per_second": 4.248, |
|
"step": 925 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 37.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.44055667519569397, |
|
"eval_runtime": 3.0609, |
|
"eval_samples_per_second": 32.67, |
|
"eval_steps_per_second": 4.247, |
|
"step": 950 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 38.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.4081181287765503, |
|
"eval_runtime": 3.061, |
|
"eval_samples_per_second": 32.669, |
|
"eval_steps_per_second": 4.247, |
|
"step": 975 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 39.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.01, |
|
"loss": 0.4731, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.4190601110458374, |
|
"eval_runtime": 3.0627, |
|
"eval_samples_per_second": 32.651, |
|
"eval_steps_per_second": 4.245, |
|
"step": 1000 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 40.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.4217339754104614, |
|
"eval_runtime": 3.0615, |
|
"eval_samples_per_second": 32.663, |
|
"eval_steps_per_second": 4.246, |
|
"step": 1025 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 41.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.39827075600624084, |
|
"eval_runtime": 3.0673, |
|
"eval_samples_per_second": 32.602, |
|
"eval_steps_per_second": 4.238, |
|
"step": 1050 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 42.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.40923720598220825, |
|
"eval_runtime": 3.0598, |
|
"eval_samples_per_second": 32.682, |
|
"eval_steps_per_second": 4.249, |
|
"step": 1075 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 43.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.42479878664016724, |
|
"eval_runtime": 3.0596, |
|
"eval_samples_per_second": 32.684, |
|
"eval_steps_per_second": 4.249, |
|
"step": 1100 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 44.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.4218236804008484, |
|
"eval_runtime": 3.059, |
|
"eval_samples_per_second": 32.691, |
|
"eval_steps_per_second": 4.25, |
|
"step": 1125 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 45.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.4371417164802551, |
|
"eval_runtime": 3.0613, |
|
"eval_samples_per_second": 32.666, |
|
"eval_steps_per_second": 4.247, |
|
"step": 1150 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 46.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.4098566472530365, |
|
"eval_runtime": 3.0607, |
|
"eval_samples_per_second": 32.672, |
|
"eval_steps_per_second": 4.247, |
|
"step": 1175 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 47.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.4299997091293335, |
|
"eval_runtime": 3.0624, |
|
"eval_samples_per_second": 32.654, |
|
"eval_steps_per_second": 4.245, |
|
"step": 1200 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 48.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.4093553125858307, |
|
"eval_runtime": 3.0616, |
|
"eval_samples_per_second": 32.662, |
|
"eval_steps_per_second": 4.246, |
|
"step": 1225 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 49.0, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.4205920100212097, |
|
"eval_runtime": 3.062, |
|
"eval_samples_per_second": 32.658, |
|
"eval_steps_per_second": 4.246, |
|
"step": 1250 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 50.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.4240824580192566, |
|
"eval_runtime": 3.0617, |
|
"eval_samples_per_second": 32.662, |
|
"eval_steps_per_second": 4.246, |
|
"step": 1275 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 51.0, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.4252602756023407, |
|
"eval_runtime": 3.0627, |
|
"eval_samples_per_second": 32.651, |
|
"eval_steps_per_second": 4.245, |
|
"step": 1300 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 52.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.4116606116294861, |
|
"eval_runtime": 3.0605, |
|
"eval_samples_per_second": 32.674, |
|
"eval_steps_per_second": 4.248, |
|
"step": 1325 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 53.0, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.417370080947876, |
|
"eval_runtime": 3.0649, |
|
"eval_samples_per_second": 32.628, |
|
"eval_steps_per_second": 4.242, |
|
"step": 1350 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 54.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.41312336921691895, |
|
"eval_runtime": 3.0627, |
|
"eval_samples_per_second": 32.651, |
|
"eval_steps_per_second": 4.245, |
|
"step": 1375 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 55.0, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.42308780550956726, |
|
"eval_runtime": 3.0629, |
|
"eval_samples_per_second": 32.648, |
|
"eval_steps_per_second": 4.244, |
|
"step": 1400 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 56.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.4059382379055023, |
|
"eval_runtime": 3.0606, |
|
"eval_samples_per_second": 32.674, |
|
"eval_steps_per_second": 4.248, |
|
"step": 1425 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 57.0, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.4168393313884735, |
|
"eval_runtime": 3.0634, |
|
"eval_samples_per_second": 32.643, |
|
"eval_steps_per_second": 4.244, |
|
"step": 1450 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 58.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.42363443970680237, |
|
"eval_runtime": 3.0615, |
|
"eval_samples_per_second": 32.664, |
|
"eval_steps_per_second": 4.246, |
|
"step": 1475 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 59.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.005, |
|
"loss": 0.4204, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.400055855512619, |
|
"eval_runtime": 3.0593, |
|
"eval_samples_per_second": 32.687, |
|
"eval_steps_per_second": 4.249, |
|
"step": 1500 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 60.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.41580745577812195, |
|
"eval_runtime": 3.0632, |
|
"eval_samples_per_second": 32.645, |
|
"eval_steps_per_second": 4.244, |
|
"step": 1525 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 61.0, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.43029120564460754, |
|
"eval_runtime": 3.0605, |
|
"eval_samples_per_second": 32.675, |
|
"eval_steps_per_second": 4.248, |
|
"step": 1550 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 62.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.41548973321914673, |
|
"eval_runtime": 3.0606, |
|
"eval_samples_per_second": 32.673, |
|
"eval_steps_per_second": 4.247, |
|
"step": 1575 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 63.0, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.41950705647468567, |
|
"eval_runtime": 3.0588, |
|
"eval_samples_per_second": 32.692, |
|
"eval_steps_per_second": 4.25, |
|
"step": 1600 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 64.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.43146055936813354, |
|
"eval_runtime": 3.0608, |
|
"eval_samples_per_second": 32.671, |
|
"eval_steps_per_second": 4.247, |
|
"step": 1625 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 65.0, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.4239749610424042, |
|
"eval_runtime": 3.0598, |
|
"eval_samples_per_second": 32.682, |
|
"eval_steps_per_second": 4.249, |
|
"step": 1650 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 66.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.4191063344478607, |
|
"eval_runtime": 3.0607, |
|
"eval_samples_per_second": 32.672, |
|
"eval_steps_per_second": 4.247, |
|
"step": 1675 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 67.0, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.42141807079315186, |
|
"eval_runtime": 3.0592, |
|
"eval_samples_per_second": 32.688, |
|
"eval_steps_per_second": 4.249, |
|
"step": 1700 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 68.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.41697102785110474, |
|
"eval_runtime": 3.0626, |
|
"eval_samples_per_second": 32.652, |
|
"eval_steps_per_second": 4.245, |
|
"step": 1725 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 69.0, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.41583549976348877, |
|
"eval_runtime": 3.0593, |
|
"eval_samples_per_second": 32.687, |
|
"eval_steps_per_second": 4.249, |
|
"step": 1750 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 70.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.42303329706192017, |
|
"eval_runtime": 3.0591, |
|
"eval_samples_per_second": 32.69, |
|
"eval_steps_per_second": 4.25, |
|
"step": 1775 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 71.0, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.41061896085739136, |
|
"eval_runtime": 3.0576, |
|
"eval_samples_per_second": 32.706, |
|
"eval_steps_per_second": 4.252, |
|
"step": 1800 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 72.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.42553210258483887, |
|
"eval_runtime": 3.056, |
|
"eval_samples_per_second": 32.723, |
|
"eval_steps_per_second": 4.254, |
|
"step": 1825 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 73.0, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.4223473072052002, |
|
"eval_runtime": 3.057, |
|
"eval_samples_per_second": 32.712, |
|
"eval_steps_per_second": 4.253, |
|
"step": 1850 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 74.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.41237103939056396, |
|
"eval_runtime": 3.0557, |
|
"eval_samples_per_second": 32.725, |
|
"eval_steps_per_second": 4.254, |
|
"step": 1875 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 75.0, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.4114343225955963, |
|
"eval_runtime": 3.0557, |
|
"eval_samples_per_second": 32.726, |
|
"eval_steps_per_second": 4.254, |
|
"step": 1900 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 76.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.4114573001861572, |
|
"eval_runtime": 3.0539, |
|
"eval_samples_per_second": 32.745, |
|
"eval_steps_per_second": 4.257, |
|
"step": 1925 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 77.0, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.4135931432247162, |
|
"eval_runtime": 3.0553, |
|
"eval_samples_per_second": 32.73, |
|
"eval_steps_per_second": 4.255, |
|
"step": 1950 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 78.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.41504940390586853, |
|
"eval_runtime": 3.0541, |
|
"eval_samples_per_second": 32.743, |
|
"eval_steps_per_second": 4.257, |
|
"step": 1975 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 79.0, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.3939, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.41364365816116333, |
|
"eval_runtime": 3.0573, |
|
"eval_samples_per_second": 32.709, |
|
"eval_steps_per_second": 4.252, |
|
"step": 2000 |
|
}, |
|
{ |
|
"best_epoch": 37, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 80.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"step": 2000, |
|
"total_flos": 2.9821702864896e+16, |
|
"train_loss": 0.5011474990844726, |
|
"train_runtime": 1641.5222, |
|
"train_samples_per_second": 19.494, |
|
"train_steps_per_second": 1.218 |
|
} |
|
], |
|
"max_steps": 2000, |
|
"num_train_epochs": 80, |
|
"total_flos": 2.9821702864896e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|