|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 80.0, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.49, |
|
"eval_loss": 0.5651425123214722, |
|
"eval_runtime": 2.9028, |
|
"eval_samples_per_second": 34.45, |
|
"eval_steps_per_second": 4.478, |
|
"step": 25 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.49, |
|
"epoch": 1.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.5092301368713379, |
|
"eval_runtime": 2.9462, |
|
"eval_samples_per_second": 33.943, |
|
"eval_steps_per_second": 4.413, |
|
"step": 50 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.6, |
|
"epoch": 2.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.5062299370765686, |
|
"eval_runtime": 2.9899, |
|
"eval_samples_per_second": 33.446, |
|
"eval_steps_per_second": 4.348, |
|
"step": 75 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.63, |
|
"epoch": 3.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.4843701422214508, |
|
"eval_runtime": 3.0147, |
|
"eval_samples_per_second": 33.171, |
|
"eval_steps_per_second": 4.312, |
|
"step": 100 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 4.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.441400945186615, |
|
"eval_runtime": 3.0319, |
|
"eval_samples_per_second": 32.982, |
|
"eval_steps_per_second": 4.288, |
|
"step": 125 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 5.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.4265769124031067, |
|
"eval_runtime": 3.0555, |
|
"eval_samples_per_second": 32.728, |
|
"eval_steps_per_second": 4.255, |
|
"step": 150 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 6.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.4179702401161194, |
|
"eval_runtime": 3.0661, |
|
"eval_samples_per_second": 32.615, |
|
"eval_steps_per_second": 4.24, |
|
"step": 175 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 7.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.46249979734420776, |
|
"eval_runtime": 3.0777, |
|
"eval_samples_per_second": 32.492, |
|
"eval_steps_per_second": 4.224, |
|
"step": 200 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 8.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.41327133774757385, |
|
"eval_runtime": 3.0835, |
|
"eval_samples_per_second": 32.43, |
|
"eval_steps_per_second": 4.216, |
|
"step": 225 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 9.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.42823413014411926, |
|
"eval_runtime": 3.0882, |
|
"eval_samples_per_second": 32.382, |
|
"eval_steps_per_second": 4.21, |
|
"step": 250 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 10.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.41956833004951477, |
|
"eval_runtime": 3.0926, |
|
"eval_samples_per_second": 32.335, |
|
"eval_steps_per_second": 4.204, |
|
"step": 275 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 11.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.40657806396484375, |
|
"eval_runtime": 3.092, |
|
"eval_samples_per_second": 32.342, |
|
"eval_steps_per_second": 4.204, |
|
"step": 300 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 12.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.40092143416404724, |
|
"eval_runtime": 3.0926, |
|
"eval_samples_per_second": 32.336, |
|
"eval_steps_per_second": 4.204, |
|
"step": 325 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 13.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.39529332518577576, |
|
"eval_runtime": 3.0883, |
|
"eval_samples_per_second": 32.381, |
|
"eval_steps_per_second": 4.209, |
|
"step": 350 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 14.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.39530593156814575, |
|
"eval_runtime": 3.0882, |
|
"eval_samples_per_second": 32.382, |
|
"eval_steps_per_second": 4.21, |
|
"step": 375 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 15.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.4115433990955353, |
|
"eval_runtime": 3.085, |
|
"eval_samples_per_second": 32.415, |
|
"eval_steps_per_second": 4.214, |
|
"step": 400 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 16.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.38952964544296265, |
|
"eval_runtime": 3.0886, |
|
"eval_samples_per_second": 32.377, |
|
"eval_steps_per_second": 4.209, |
|
"step": 425 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 17.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.427374929189682, |
|
"eval_runtime": 3.0847, |
|
"eval_samples_per_second": 32.418, |
|
"eval_steps_per_second": 4.214, |
|
"step": 450 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 18.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.39974284172058105, |
|
"eval_runtime": 3.085, |
|
"eval_samples_per_second": 32.415, |
|
"eval_steps_per_second": 4.214, |
|
"step": 475 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 19.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0075, |
|
"loss": 0.6183, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.3965441584587097, |
|
"eval_runtime": 3.085, |
|
"eval_samples_per_second": 32.415, |
|
"eval_steps_per_second": 4.214, |
|
"step": 500 |
|
}, |
|
{ |
|
"best_epoch": 19, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 20.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.43516531586647034, |
|
"eval_runtime": 3.0853, |
|
"eval_samples_per_second": 32.412, |
|
"eval_steps_per_second": 4.214, |
|
"step": 525 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.68, |
|
"epoch": 21.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.4253265857696533, |
|
"eval_runtime": 3.0836, |
|
"eval_samples_per_second": 32.43, |
|
"eval_steps_per_second": 4.216, |
|
"step": 550 |
|
}, |
|
{ |
|
"best_epoch": 21, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 22.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.3890969753265381, |
|
"eval_runtime": 3.0862, |
|
"eval_samples_per_second": 32.402, |
|
"eval_steps_per_second": 4.212, |
|
"step": 575 |
|
}, |
|
{ |
|
"best_epoch": 21, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 23.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.4323919415473938, |
|
"eval_runtime": 3.0876, |
|
"eval_samples_per_second": 32.387, |
|
"eval_steps_per_second": 4.21, |
|
"step": 600 |
|
}, |
|
{ |
|
"best_epoch": 21, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 24.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.4395662248134613, |
|
"eval_runtime": 3.0865, |
|
"eval_samples_per_second": 32.399, |
|
"eval_steps_per_second": 4.212, |
|
"step": 625 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 25.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.43163925409317017, |
|
"eval_runtime": 3.0856, |
|
"eval_samples_per_second": 32.408, |
|
"eval_steps_per_second": 4.213, |
|
"step": 650 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 26.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.39514267444610596, |
|
"eval_runtime": 3.0856, |
|
"eval_samples_per_second": 32.408, |
|
"eval_steps_per_second": 4.213, |
|
"step": 675 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 27.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.4021581709384918, |
|
"eval_runtime": 3.0862, |
|
"eval_samples_per_second": 32.402, |
|
"eval_steps_per_second": 4.212, |
|
"step": 700 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 28.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.42091983556747437, |
|
"eval_runtime": 3.0882, |
|
"eval_samples_per_second": 32.382, |
|
"eval_steps_per_second": 4.21, |
|
"step": 725 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 29.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.4499932527542114, |
|
"eval_runtime": 3.0879, |
|
"eval_samples_per_second": 32.385, |
|
"eval_steps_per_second": 4.21, |
|
"step": 750 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 30.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.4072466194629669, |
|
"eval_runtime": 3.0878, |
|
"eval_samples_per_second": 32.385, |
|
"eval_steps_per_second": 4.21, |
|
"step": 775 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 31.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.40178433060646057, |
|
"eval_runtime": 3.0877, |
|
"eval_samples_per_second": 32.387, |
|
"eval_steps_per_second": 4.21, |
|
"step": 800 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 32.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.41905662417411804, |
|
"eval_runtime": 3.089, |
|
"eval_samples_per_second": 32.373, |
|
"eval_steps_per_second": 4.208, |
|
"step": 825 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 33.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.3970845937728882, |
|
"eval_runtime": 3.0867, |
|
"eval_samples_per_second": 32.397, |
|
"eval_steps_per_second": 4.212, |
|
"step": 850 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 34.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.39993271231651306, |
|
"eval_runtime": 3.088, |
|
"eval_samples_per_second": 32.383, |
|
"eval_steps_per_second": 4.21, |
|
"step": 875 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 35.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.40247154235839844, |
|
"eval_runtime": 3.0865, |
|
"eval_samples_per_second": 32.399, |
|
"eval_steps_per_second": 4.212, |
|
"step": 900 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 36.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.4090607762336731, |
|
"eval_runtime": 3.0867, |
|
"eval_samples_per_second": 32.397, |
|
"eval_steps_per_second": 4.212, |
|
"step": 925 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 37.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.4060271084308624, |
|
"eval_runtime": 3.0865, |
|
"eval_samples_per_second": 32.399, |
|
"eval_steps_per_second": 4.212, |
|
"step": 950 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 38.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.4415541887283325, |
|
"eval_runtime": 3.089, |
|
"eval_samples_per_second": 32.373, |
|
"eval_steps_per_second": 4.208, |
|
"step": 975 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 39.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.005, |
|
"loss": 0.4716, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.404102623462677, |
|
"eval_runtime": 3.0888, |
|
"eval_samples_per_second": 32.375, |
|
"eval_steps_per_second": 4.209, |
|
"step": 1000 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 40.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.4100326895713806, |
|
"eval_runtime": 3.0875, |
|
"eval_samples_per_second": 32.389, |
|
"eval_steps_per_second": 4.211, |
|
"step": 1025 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 41.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.40424102544784546, |
|
"eval_runtime": 3.0901, |
|
"eval_samples_per_second": 32.362, |
|
"eval_steps_per_second": 4.207, |
|
"step": 1050 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 42.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.3743560314178467, |
|
"eval_runtime": 3.0886, |
|
"eval_samples_per_second": 32.377, |
|
"eval_steps_per_second": 4.209, |
|
"step": 1075 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 43.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.3826563358306885, |
|
"eval_runtime": 3.089, |
|
"eval_samples_per_second": 32.373, |
|
"eval_steps_per_second": 4.209, |
|
"step": 1100 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 44.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.39410629868507385, |
|
"eval_runtime": 3.0915, |
|
"eval_samples_per_second": 32.346, |
|
"eval_steps_per_second": 4.205, |
|
"step": 1125 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 45.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.430500864982605, |
|
"eval_runtime": 3.0889, |
|
"eval_samples_per_second": 32.373, |
|
"eval_steps_per_second": 4.209, |
|
"step": 1150 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 46.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.40077340602874756, |
|
"eval_runtime": 3.0873, |
|
"eval_samples_per_second": 32.39, |
|
"eval_steps_per_second": 4.211, |
|
"step": 1175 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 47.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.40273621678352356, |
|
"eval_runtime": 3.0892, |
|
"eval_samples_per_second": 32.371, |
|
"eval_steps_per_second": 4.208, |
|
"step": 1200 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 48.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.4023691415786743, |
|
"eval_runtime": 3.0893, |
|
"eval_samples_per_second": 32.369, |
|
"eval_steps_per_second": 4.208, |
|
"step": 1225 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 49.0, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.3937583267688751, |
|
"eval_runtime": 3.0877, |
|
"eval_samples_per_second": 32.387, |
|
"eval_steps_per_second": 4.21, |
|
"step": 1250 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 50.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.38425591588020325, |
|
"eval_runtime": 3.0873, |
|
"eval_samples_per_second": 32.391, |
|
"eval_steps_per_second": 4.211, |
|
"step": 1275 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 51.0, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.3911306858062744, |
|
"eval_runtime": 3.0857, |
|
"eval_samples_per_second": 32.407, |
|
"eval_steps_per_second": 4.213, |
|
"step": 1300 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 52.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.3854801654815674, |
|
"eval_runtime": 3.0878, |
|
"eval_samples_per_second": 32.385, |
|
"eval_steps_per_second": 4.21, |
|
"step": 1325 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 53.0, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.3934061527252197, |
|
"eval_runtime": 3.0865, |
|
"eval_samples_per_second": 32.399, |
|
"eval_steps_per_second": 4.212, |
|
"step": 1350 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 54.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.40291014313697815, |
|
"eval_runtime": 3.0864, |
|
"eval_samples_per_second": 32.401, |
|
"eval_steps_per_second": 4.212, |
|
"step": 1375 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 55.0, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.38784506916999817, |
|
"eval_runtime": 3.0835, |
|
"eval_samples_per_second": 32.43, |
|
"eval_steps_per_second": 4.216, |
|
"step": 1400 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 56.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.3839341104030609, |
|
"eval_runtime": 3.0855, |
|
"eval_samples_per_second": 32.41, |
|
"eval_steps_per_second": 4.213, |
|
"step": 1425 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 57.0, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.3942648768424988, |
|
"eval_runtime": 3.0854, |
|
"eval_samples_per_second": 32.411, |
|
"eval_steps_per_second": 4.213, |
|
"step": 1450 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 58.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.3984474539756775, |
|
"eval_runtime": 3.0848, |
|
"eval_samples_per_second": 32.417, |
|
"eval_steps_per_second": 4.214, |
|
"step": 1475 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 59.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.0025, |
|
"loss": 0.4121, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.4064193069934845, |
|
"eval_runtime": 3.0855, |
|
"eval_samples_per_second": 32.41, |
|
"eval_steps_per_second": 4.213, |
|
"step": 1500 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 60.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.3871249258518219, |
|
"eval_runtime": 3.0854, |
|
"eval_samples_per_second": 32.411, |
|
"eval_steps_per_second": 4.213, |
|
"step": 1525 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 61.0, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.41410523653030396, |
|
"eval_runtime": 3.0839, |
|
"eval_samples_per_second": 32.427, |
|
"eval_steps_per_second": 4.215, |
|
"step": 1550 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 62.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.38500529527664185, |
|
"eval_runtime": 3.0874, |
|
"eval_samples_per_second": 32.39, |
|
"eval_steps_per_second": 4.211, |
|
"step": 1575 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 63.0, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.3933054804801941, |
|
"eval_runtime": 3.0858, |
|
"eval_samples_per_second": 32.406, |
|
"eval_steps_per_second": 4.213, |
|
"step": 1600 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 64.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.40550005435943604, |
|
"eval_runtime": 3.0847, |
|
"eval_samples_per_second": 32.418, |
|
"eval_steps_per_second": 4.214, |
|
"step": 1625 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 65.0, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.38515716791152954, |
|
"eval_runtime": 3.084, |
|
"eval_samples_per_second": 32.425, |
|
"eval_steps_per_second": 4.215, |
|
"step": 1650 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 66.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.39516574144363403, |
|
"eval_runtime": 3.0876, |
|
"eval_samples_per_second": 32.387, |
|
"eval_steps_per_second": 4.21, |
|
"step": 1675 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 67.0, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.38739094138145447, |
|
"eval_runtime": 3.0845, |
|
"eval_samples_per_second": 32.42, |
|
"eval_steps_per_second": 4.215, |
|
"step": 1700 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 68.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.3999227285385132, |
|
"eval_runtime": 3.0852, |
|
"eval_samples_per_second": 32.413, |
|
"eval_steps_per_second": 4.214, |
|
"step": 1725 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 69.0, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.39561355113983154, |
|
"eval_runtime": 3.0845, |
|
"eval_samples_per_second": 32.42, |
|
"eval_steps_per_second": 4.215, |
|
"step": 1750 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 70.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.39178183674812317, |
|
"eval_runtime": 3.0849, |
|
"eval_samples_per_second": 32.416, |
|
"eval_steps_per_second": 4.214, |
|
"step": 1775 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 71.0, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.38587287068367004, |
|
"eval_runtime": 3.0854, |
|
"eval_samples_per_second": 32.411, |
|
"eval_steps_per_second": 4.213, |
|
"step": 1800 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 72.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.39257708191871643, |
|
"eval_runtime": 3.0824, |
|
"eval_samples_per_second": 32.443, |
|
"eval_steps_per_second": 4.218, |
|
"step": 1825 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 73.0, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.38968443870544434, |
|
"eval_runtime": 3.084, |
|
"eval_samples_per_second": 32.425, |
|
"eval_steps_per_second": 4.215, |
|
"step": 1850 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 74.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.3859497010707855, |
|
"eval_runtime": 3.0819, |
|
"eval_samples_per_second": 32.447, |
|
"eval_steps_per_second": 4.218, |
|
"step": 1875 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 75.0, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.38490238785743713, |
|
"eval_runtime": 3.083, |
|
"eval_samples_per_second": 32.436, |
|
"eval_steps_per_second": 4.217, |
|
"step": 1900 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 76.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.3855893313884735, |
|
"eval_runtime": 3.0824, |
|
"eval_samples_per_second": 32.442, |
|
"eval_steps_per_second": 4.217, |
|
"step": 1925 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 77.0, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.3901687562465668, |
|
"eval_runtime": 3.0824, |
|
"eval_samples_per_second": 32.442, |
|
"eval_steps_per_second": 4.217, |
|
"step": 1950 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 78.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.3903580904006958, |
|
"eval_runtime": 3.0842, |
|
"eval_samples_per_second": 32.423, |
|
"eval_steps_per_second": 4.215, |
|
"step": 1975 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 79.0, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.3881, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.3895449936389923, |
|
"eval_runtime": 3.0918, |
|
"eval_samples_per_second": 32.344, |
|
"eval_steps_per_second": 4.205, |
|
"step": 2000 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 80.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"step": 2000, |
|
"total_flos": 2.9821702864896e+16, |
|
"train_loss": 0.47253459167480466, |
|
"train_runtime": 1645.8876, |
|
"train_samples_per_second": 19.442, |
|
"train_steps_per_second": 1.215 |
|
} |
|
], |
|
"max_steps": 2000, |
|
"num_train_epochs": 80, |
|
"total_flos": 2.9821702864896e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|