|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 80.0, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.45, |
|
"eval_loss": 0.46248623728752136, |
|
"eval_runtime": 2.8961, |
|
"eval_samples_per_second": 34.53, |
|
"eval_steps_per_second": 4.489, |
|
"step": 25 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.45, |
|
"epoch": 1.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.48594656586647034, |
|
"eval_runtime": 2.9335, |
|
"eval_samples_per_second": 34.089, |
|
"eval_steps_per_second": 4.432, |
|
"step": 50 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 2.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.4227062165737152, |
|
"eval_runtime": 2.9668, |
|
"eval_samples_per_second": 33.707, |
|
"eval_steps_per_second": 4.382, |
|
"step": 75 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 3.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.53, |
|
"eval_loss": 0.4247153103351593, |
|
"eval_runtime": 2.9923, |
|
"eval_samples_per_second": 33.419, |
|
"eval_steps_per_second": 4.344, |
|
"step": 100 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 4.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.43, |
|
"eval_loss": 0.44806617498397827, |
|
"eval_runtime": 3.0015, |
|
"eval_samples_per_second": 33.317, |
|
"eval_steps_per_second": 4.331, |
|
"step": 125 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 5.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.4309893846511841, |
|
"eval_runtime": 3.023, |
|
"eval_samples_per_second": 33.079, |
|
"eval_steps_per_second": 4.3, |
|
"step": 150 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 6.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.47, |
|
"eval_loss": 0.4267328679561615, |
|
"eval_runtime": 3.0288, |
|
"eval_samples_per_second": 33.017, |
|
"eval_steps_per_second": 4.292, |
|
"step": 175 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 7.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 0.42460113763809204, |
|
"eval_runtime": 3.0312, |
|
"eval_samples_per_second": 32.99, |
|
"eval_steps_per_second": 4.289, |
|
"step": 200 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 8.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.44, |
|
"eval_loss": 0.426717609167099, |
|
"eval_runtime": 3.0248, |
|
"eval_samples_per_second": 33.06, |
|
"eval_steps_per_second": 4.298, |
|
"step": 225 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 9.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.51, |
|
"eval_loss": 0.4260082542896271, |
|
"eval_runtime": 3.0266, |
|
"eval_samples_per_second": 33.04, |
|
"eval_steps_per_second": 4.295, |
|
"step": 250 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 10.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.52, |
|
"eval_loss": 0.42262589931488037, |
|
"eval_runtime": 3.027, |
|
"eval_samples_per_second": 33.037, |
|
"eval_steps_per_second": 4.295, |
|
"step": 275 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 11.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.44, |
|
"eval_loss": 0.427063524723053, |
|
"eval_runtime": 3.0293, |
|
"eval_samples_per_second": 33.011, |
|
"eval_steps_per_second": 4.291, |
|
"step": 300 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 12.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.49, |
|
"eval_loss": 0.42662951350212097, |
|
"eval_runtime": 3.0315, |
|
"eval_samples_per_second": 32.987, |
|
"eval_steps_per_second": 4.288, |
|
"step": 325 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 13.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.42443209886550903, |
|
"eval_runtime": 3.0363, |
|
"eval_samples_per_second": 32.935, |
|
"eval_steps_per_second": 4.282, |
|
"step": 350 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 14.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 0.42530936002731323, |
|
"eval_runtime": 3.0344, |
|
"eval_samples_per_second": 32.956, |
|
"eval_steps_per_second": 4.284, |
|
"step": 375 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 15.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.51, |
|
"eval_loss": 0.4255802631378174, |
|
"eval_runtime": 3.0396, |
|
"eval_samples_per_second": 32.9, |
|
"eval_steps_per_second": 4.277, |
|
"step": 400 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 16.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.44, |
|
"eval_loss": 0.4265216588973999, |
|
"eval_runtime": 3.0416, |
|
"eval_samples_per_second": 32.877, |
|
"eval_steps_per_second": 4.274, |
|
"step": 425 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 17.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.42, |
|
"eval_loss": 0.42613503336906433, |
|
"eval_runtime": 3.0532, |
|
"eval_samples_per_second": 32.753, |
|
"eval_steps_per_second": 4.258, |
|
"step": 450 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 18.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.46, |
|
"eval_loss": 0.4262290298938751, |
|
"eval_runtime": 3.0418, |
|
"eval_samples_per_second": 32.875, |
|
"eval_steps_per_second": 4.274, |
|
"step": 475 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 19.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.037500000000000006, |
|
"loss": 1.4009, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.47, |
|
"eval_loss": 0.4260247051715851, |
|
"eval_runtime": 3.0426, |
|
"eval_samples_per_second": 32.867, |
|
"eval_steps_per_second": 4.273, |
|
"step": 500 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 20.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.42, |
|
"eval_loss": 0.4285390377044678, |
|
"eval_runtime": 3.0444, |
|
"eval_samples_per_second": 32.848, |
|
"eval_steps_per_second": 4.27, |
|
"step": 525 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 21.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 0.4259931147098541, |
|
"eval_runtime": 3.0428, |
|
"eval_samples_per_second": 32.865, |
|
"eval_steps_per_second": 4.272, |
|
"step": 550 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 22.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.54, |
|
"eval_loss": 0.42451244592666626, |
|
"eval_runtime": 3.0442, |
|
"eval_samples_per_second": 32.849, |
|
"eval_steps_per_second": 4.27, |
|
"step": 575 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 23.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.54, |
|
"eval_loss": 0.42513129115104675, |
|
"eval_runtime": 3.0467, |
|
"eval_samples_per_second": 32.823, |
|
"eval_steps_per_second": 4.267, |
|
"step": 600 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 24.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.46, |
|
"eval_loss": 0.42705675959587097, |
|
"eval_runtime": 3.0463, |
|
"eval_samples_per_second": 32.827, |
|
"eval_steps_per_second": 4.268, |
|
"step": 625 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 25.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.46, |
|
"eval_loss": 0.4261193871498108, |
|
"eval_runtime": 3.0479, |
|
"eval_samples_per_second": 32.81, |
|
"eval_steps_per_second": 4.265, |
|
"step": 650 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 26.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.49, |
|
"eval_loss": 0.42566367983818054, |
|
"eval_runtime": 3.0456, |
|
"eval_samples_per_second": 32.834, |
|
"eval_steps_per_second": 4.268, |
|
"step": 675 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 27.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 0.42552897334098816, |
|
"eval_runtime": 3.0459, |
|
"eval_samples_per_second": 32.831, |
|
"eval_steps_per_second": 4.268, |
|
"step": 700 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 28.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.52, |
|
"eval_loss": 0.42543452978134155, |
|
"eval_runtime": 3.0492, |
|
"eval_samples_per_second": 32.796, |
|
"eval_steps_per_second": 4.263, |
|
"step": 725 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 29.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.52, |
|
"eval_loss": 0.4259891211986542, |
|
"eval_runtime": 3.0481, |
|
"eval_samples_per_second": 32.808, |
|
"eval_steps_per_second": 4.265, |
|
"step": 750 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 30.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.49, |
|
"eval_loss": 0.4255729913711548, |
|
"eval_runtime": 3.0503, |
|
"eval_samples_per_second": 32.784, |
|
"eval_steps_per_second": 4.262, |
|
"step": 775 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 31.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 0.4256853759288788, |
|
"eval_runtime": 3.0494, |
|
"eval_samples_per_second": 32.793, |
|
"eval_steps_per_second": 4.263, |
|
"step": 800 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 32.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.53, |
|
"eval_loss": 0.42549124360084534, |
|
"eval_runtime": 3.0483, |
|
"eval_samples_per_second": 32.806, |
|
"eval_steps_per_second": 4.265, |
|
"step": 825 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 33.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.54, |
|
"eval_loss": 0.4255514144897461, |
|
"eval_runtime": 3.0467, |
|
"eval_samples_per_second": 32.823, |
|
"eval_steps_per_second": 4.267, |
|
"step": 850 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 34.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.44, |
|
"eval_loss": 0.4261658191680908, |
|
"eval_runtime": 3.0508, |
|
"eval_samples_per_second": 32.778, |
|
"eval_steps_per_second": 4.261, |
|
"step": 875 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 35.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.51, |
|
"eval_loss": 0.4256916344165802, |
|
"eval_runtime": 3.0492, |
|
"eval_samples_per_second": 32.795, |
|
"eval_steps_per_second": 4.263, |
|
"step": 900 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 36.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.4, |
|
"eval_loss": 0.42672908306121826, |
|
"eval_runtime": 3.0502, |
|
"eval_samples_per_second": 32.785, |
|
"eval_steps_per_second": 4.262, |
|
"step": 925 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 37.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.48, |
|
"eval_loss": 0.4258723556995392, |
|
"eval_runtime": 3.0474, |
|
"eval_samples_per_second": 32.815, |
|
"eval_steps_per_second": 4.266, |
|
"step": 950 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 38.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 0.4254632890224457, |
|
"eval_runtime": 3.0495, |
|
"eval_samples_per_second": 32.792, |
|
"eval_steps_per_second": 4.263, |
|
"step": 975 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 39.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.025, |
|
"loss": 0.9833, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.49, |
|
"eval_loss": 0.42543506622314453, |
|
"eval_runtime": 3.0505, |
|
"eval_samples_per_second": 32.782, |
|
"eval_steps_per_second": 4.262, |
|
"step": 1000 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 40.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.49, |
|
"eval_loss": 0.4257011413574219, |
|
"eval_runtime": 3.0503, |
|
"eval_samples_per_second": 32.783, |
|
"eval_steps_per_second": 4.262, |
|
"step": 1025 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 41.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.42543143033981323, |
|
"eval_runtime": 3.0485, |
|
"eval_samples_per_second": 32.803, |
|
"eval_steps_per_second": 4.264, |
|
"step": 1050 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 42.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.48, |
|
"eval_loss": 0.42607200145721436, |
|
"eval_runtime": 3.0551, |
|
"eval_samples_per_second": 32.732, |
|
"eval_steps_per_second": 4.255, |
|
"step": 1075 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 43.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 0.4259500503540039, |
|
"eval_runtime": 3.0508, |
|
"eval_samples_per_second": 32.779, |
|
"eval_steps_per_second": 4.261, |
|
"step": 1100 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 44.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.51, |
|
"eval_loss": 0.42568397521972656, |
|
"eval_runtime": 3.0487, |
|
"eval_samples_per_second": 32.801, |
|
"eval_steps_per_second": 4.264, |
|
"step": 1125 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 45.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.52, |
|
"eval_loss": 0.4254191517829895, |
|
"eval_runtime": 3.0504, |
|
"eval_samples_per_second": 32.782, |
|
"eval_steps_per_second": 4.262, |
|
"step": 1150 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 46.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 0.42553916573524475, |
|
"eval_runtime": 3.0517, |
|
"eval_samples_per_second": 32.769, |
|
"eval_steps_per_second": 4.26, |
|
"step": 1175 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 47.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.48, |
|
"eval_loss": 0.42568346858024597, |
|
"eval_runtime": 3.05, |
|
"eval_samples_per_second": 32.787, |
|
"eval_steps_per_second": 4.262, |
|
"step": 1200 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 48.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.41, |
|
"eval_loss": 0.42611098289489746, |
|
"eval_runtime": 3.0501, |
|
"eval_samples_per_second": 32.786, |
|
"eval_steps_per_second": 4.262, |
|
"step": 1225 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 49.0, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.42508599162101746, |
|
"eval_runtime": 3.0503, |
|
"eval_samples_per_second": 32.784, |
|
"eval_steps_per_second": 4.262, |
|
"step": 1250 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 50.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.47, |
|
"eval_loss": 0.4258117377758026, |
|
"eval_runtime": 3.0511, |
|
"eval_samples_per_second": 32.775, |
|
"eval_steps_per_second": 4.261, |
|
"step": 1275 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 51.0, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.52, |
|
"eval_loss": 0.42553529143333435, |
|
"eval_runtime": 3.0496, |
|
"eval_samples_per_second": 32.792, |
|
"eval_steps_per_second": 4.263, |
|
"step": 1300 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 52.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.53, |
|
"eval_loss": 0.42570650577545166, |
|
"eval_runtime": 3.0501, |
|
"eval_samples_per_second": 32.786, |
|
"eval_steps_per_second": 4.262, |
|
"step": 1325 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 53.0, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.52, |
|
"eval_loss": 0.4255848228931427, |
|
"eval_runtime": 3.0507, |
|
"eval_samples_per_second": 32.78, |
|
"eval_steps_per_second": 4.261, |
|
"step": 1350 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 54.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.51, |
|
"eval_loss": 0.4257044196128845, |
|
"eval_runtime": 3.0575, |
|
"eval_samples_per_second": 32.706, |
|
"eval_steps_per_second": 4.252, |
|
"step": 1375 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 55.0, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 0.4256804585456848, |
|
"eval_runtime": 3.0497, |
|
"eval_samples_per_second": 32.79, |
|
"eval_steps_per_second": 4.263, |
|
"step": 1400 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 56.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.49, |
|
"eval_loss": 0.4257115423679352, |
|
"eval_runtime": 3.0519, |
|
"eval_samples_per_second": 32.766, |
|
"eval_steps_per_second": 4.26, |
|
"step": 1425 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 57.0, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.51, |
|
"eval_loss": 0.4257067143917084, |
|
"eval_runtime": 3.0511, |
|
"eval_samples_per_second": 32.775, |
|
"eval_steps_per_second": 4.261, |
|
"step": 1450 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 58.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.42552703619003296, |
|
"eval_runtime": 3.052, |
|
"eval_samples_per_second": 32.765, |
|
"eval_steps_per_second": 4.259, |
|
"step": 1475 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 59.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.0125, |
|
"loss": 0.7428, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.46, |
|
"eval_loss": 0.42590823769569397, |
|
"eval_runtime": 3.0534, |
|
"eval_samples_per_second": 32.75, |
|
"eval_steps_per_second": 4.258, |
|
"step": 1500 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 60.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.51, |
|
"eval_loss": 0.4257360100746155, |
|
"eval_runtime": 3.0514, |
|
"eval_samples_per_second": 32.772, |
|
"eval_steps_per_second": 4.26, |
|
"step": 1525 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 61.0, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 0.4255290627479553, |
|
"eval_runtime": 3.0557, |
|
"eval_samples_per_second": 32.725, |
|
"eval_steps_per_second": 4.254, |
|
"step": 1550 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 62.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 0.42556220293045044, |
|
"eval_runtime": 3.05, |
|
"eval_samples_per_second": 32.787, |
|
"eval_steps_per_second": 4.262, |
|
"step": 1575 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 63.0, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.4, |
|
"eval_loss": 0.4258395731449127, |
|
"eval_runtime": 3.0496, |
|
"eval_samples_per_second": 32.791, |
|
"eval_steps_per_second": 4.263, |
|
"step": 1600 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 64.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.44, |
|
"eval_loss": 0.4258385896682739, |
|
"eval_runtime": 3.0514, |
|
"eval_samples_per_second": 32.771, |
|
"eval_steps_per_second": 4.26, |
|
"step": 1625 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 65.0, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.41, |
|
"eval_loss": 0.4258986711502075, |
|
"eval_runtime": 3.0504, |
|
"eval_samples_per_second": 32.782, |
|
"eval_steps_per_second": 4.262, |
|
"step": 1650 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 66.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.38, |
|
"eval_loss": 0.4259600341320038, |
|
"eval_runtime": 3.0516, |
|
"eval_samples_per_second": 32.77, |
|
"eval_steps_per_second": 4.26, |
|
"step": 1675 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 67.0, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.52, |
|
"eval_loss": 0.4256523549556732, |
|
"eval_runtime": 3.0526, |
|
"eval_samples_per_second": 32.759, |
|
"eval_steps_per_second": 4.259, |
|
"step": 1700 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 68.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.35, |
|
"eval_loss": 0.42585432529449463, |
|
"eval_runtime": 3.0509, |
|
"eval_samples_per_second": 32.777, |
|
"eval_steps_per_second": 4.261, |
|
"step": 1725 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 69.0, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.38, |
|
"eval_loss": 0.4259151518344879, |
|
"eval_runtime": 3.0508, |
|
"eval_samples_per_second": 32.779, |
|
"eval_steps_per_second": 4.261, |
|
"step": 1750 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 70.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.44, |
|
"eval_loss": 0.4259404242038727, |
|
"eval_runtime": 3.052, |
|
"eval_samples_per_second": 32.766, |
|
"eval_steps_per_second": 4.26, |
|
"step": 1775 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 71.0, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.41, |
|
"eval_loss": 0.42599546909332275, |
|
"eval_runtime": 3.0519, |
|
"eval_samples_per_second": 32.766, |
|
"eval_steps_per_second": 4.26, |
|
"step": 1800 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 72.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.45, |
|
"eval_loss": 0.42574265599250793, |
|
"eval_runtime": 3.0522, |
|
"eval_samples_per_second": 32.764, |
|
"eval_steps_per_second": 4.259, |
|
"step": 1825 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 73.0, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.42, |
|
"eval_loss": 0.4257790446281433, |
|
"eval_runtime": 3.0506, |
|
"eval_samples_per_second": 32.781, |
|
"eval_steps_per_second": 4.261, |
|
"step": 1850 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 74.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.41, |
|
"eval_loss": 0.4257981479167938, |
|
"eval_runtime": 3.0529, |
|
"eval_samples_per_second": 32.755, |
|
"eval_steps_per_second": 4.258, |
|
"step": 1875 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 75.0, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.4, |
|
"eval_loss": 0.4257836639881134, |
|
"eval_runtime": 3.0536, |
|
"eval_samples_per_second": 32.748, |
|
"eval_steps_per_second": 4.257, |
|
"step": 1900 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 76.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.45, |
|
"eval_loss": 0.42581596970558167, |
|
"eval_runtime": 3.0529, |
|
"eval_samples_per_second": 32.756, |
|
"eval_steps_per_second": 4.258, |
|
"step": 1925 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 77.0, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.43, |
|
"eval_loss": 0.4258066415786743, |
|
"eval_runtime": 3.0527, |
|
"eval_samples_per_second": 32.758, |
|
"eval_steps_per_second": 4.258, |
|
"step": 1950 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 78.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.44, |
|
"eval_loss": 0.4258076846599579, |
|
"eval_runtime": 3.0537, |
|
"eval_samples_per_second": 32.747, |
|
"eval_steps_per_second": 4.257, |
|
"step": 1975 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 79.0, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.6138, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.4, |
|
"eval_loss": 0.4258350431919098, |
|
"eval_runtime": 3.0537, |
|
"eval_samples_per_second": 32.747, |
|
"eval_steps_per_second": 4.257, |
|
"step": 2000 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 80.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"step": 2000, |
|
"total_flos": 2.9821702864896e+16, |
|
"train_loss": 0.9352073516845704, |
|
"train_runtime": 1633.4708, |
|
"train_samples_per_second": 19.59, |
|
"train_steps_per_second": 1.224 |
|
} |
|
], |
|
"max_steps": 2000, |
|
"num_train_epochs": 80, |
|
"total_flos": 2.9821702864896e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|