|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 80.0, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4, |
|
"eval_loss": 0.6199389696121216, |
|
"eval_runtime": 2.9463, |
|
"eval_samples_per_second": 33.94, |
|
"eval_steps_per_second": 4.412, |
|
"step": 25 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.4, |
|
"epoch": 1.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.664335310459137, |
|
"eval_runtime": 2.9957, |
|
"eval_samples_per_second": 33.381, |
|
"eval_steps_per_second": 4.34, |
|
"step": 50 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.59, |
|
"epoch": 2.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.54, |
|
"eval_loss": 0.5066738724708557, |
|
"eval_runtime": 3.0312, |
|
"eval_samples_per_second": 32.99, |
|
"eval_steps_per_second": 4.289, |
|
"step": 75 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.59, |
|
"epoch": 3.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.4272070825099945, |
|
"eval_runtime": 3.0601, |
|
"eval_samples_per_second": 32.679, |
|
"eval_steps_per_second": 4.248, |
|
"step": 100 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.63, |
|
"epoch": 4.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.49, |
|
"eval_loss": 0.4341088831424713, |
|
"eval_runtime": 3.0787, |
|
"eval_samples_per_second": 32.482, |
|
"eval_steps_per_second": 4.223, |
|
"step": 125 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.63, |
|
"epoch": 5.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.44, |
|
"eval_loss": 0.4487706124782562, |
|
"eval_runtime": 3.0875, |
|
"eval_samples_per_second": 32.388, |
|
"eval_steps_per_second": 4.21, |
|
"step": 150 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.63, |
|
"epoch": 6.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.4091510474681854, |
|
"eval_runtime": 3.0921, |
|
"eval_samples_per_second": 32.34, |
|
"eval_steps_per_second": 4.204, |
|
"step": 175 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 7.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.4563831686973572, |
|
"eval_runtime": 3.0938, |
|
"eval_samples_per_second": 32.322, |
|
"eval_steps_per_second": 4.202, |
|
"step": 200 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 8.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.43667757511138916, |
|
"eval_runtime": 3.0893, |
|
"eval_samples_per_second": 32.37, |
|
"eval_steps_per_second": 4.208, |
|
"step": 225 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 9.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.4343050420284271, |
|
"eval_runtime": 3.0861, |
|
"eval_samples_per_second": 32.403, |
|
"eval_steps_per_second": 4.212, |
|
"step": 250 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 10.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.41208839416503906, |
|
"eval_runtime": 3.0842, |
|
"eval_samples_per_second": 32.423, |
|
"eval_steps_per_second": 4.215, |
|
"step": 275 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 11.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.4299950897693634, |
|
"eval_runtime": 3.0824, |
|
"eval_samples_per_second": 32.442, |
|
"eval_steps_per_second": 4.217, |
|
"step": 300 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 12.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.423895925283432, |
|
"eval_runtime": 3.0828, |
|
"eval_samples_per_second": 32.438, |
|
"eval_steps_per_second": 4.217, |
|
"step": 325 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 13.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.4147571325302124, |
|
"eval_runtime": 3.0847, |
|
"eval_samples_per_second": 32.418, |
|
"eval_steps_per_second": 4.214, |
|
"step": 350 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 14.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.43110018968582153, |
|
"eval_runtime": 3.0818, |
|
"eval_samples_per_second": 32.449, |
|
"eval_steps_per_second": 4.218, |
|
"step": 375 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 15.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.41430824995040894, |
|
"eval_runtime": 3.0802, |
|
"eval_samples_per_second": 32.465, |
|
"eval_steps_per_second": 4.22, |
|
"step": 400 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 16.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.4166063964366913, |
|
"eval_runtime": 3.0812, |
|
"eval_samples_per_second": 32.455, |
|
"eval_steps_per_second": 4.219, |
|
"step": 425 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 17.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.41197219491004944, |
|
"eval_runtime": 3.0825, |
|
"eval_samples_per_second": 32.441, |
|
"eval_steps_per_second": 4.217, |
|
"step": 450 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 18.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.4121343493461609, |
|
"eval_runtime": 3.0809, |
|
"eval_samples_per_second": 32.458, |
|
"eval_steps_per_second": 4.22, |
|
"step": 475 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 19.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0075, |
|
"loss": 0.6423, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.40662243962287903, |
|
"eval_runtime": 3.0833, |
|
"eval_samples_per_second": 32.433, |
|
"eval_steps_per_second": 4.216, |
|
"step": 500 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 20.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.404727965593338, |
|
"eval_runtime": 3.0808, |
|
"eval_samples_per_second": 32.459, |
|
"eval_steps_per_second": 4.22, |
|
"step": 525 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 21.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.4215034246444702, |
|
"eval_runtime": 3.0822, |
|
"eval_samples_per_second": 32.444, |
|
"eval_steps_per_second": 4.218, |
|
"step": 550 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 22.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.4074056148529053, |
|
"eval_runtime": 3.0831, |
|
"eval_samples_per_second": 32.435, |
|
"eval_steps_per_second": 4.217, |
|
"step": 575 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 23.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.40676093101501465, |
|
"eval_runtime": 3.0829, |
|
"eval_samples_per_second": 32.437, |
|
"eval_steps_per_second": 4.217, |
|
"step": 600 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 24.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.4190574586391449, |
|
"eval_runtime": 3.0842, |
|
"eval_samples_per_second": 32.423, |
|
"eval_steps_per_second": 4.215, |
|
"step": 625 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 25.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.4035128355026245, |
|
"eval_runtime": 3.0849, |
|
"eval_samples_per_second": 32.416, |
|
"eval_steps_per_second": 4.214, |
|
"step": 650 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 26.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.4227715730667114, |
|
"eval_runtime": 3.0831, |
|
"eval_samples_per_second": 32.435, |
|
"eval_steps_per_second": 4.216, |
|
"step": 675 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 27.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.4241797626018524, |
|
"eval_runtime": 3.082, |
|
"eval_samples_per_second": 32.446, |
|
"eval_steps_per_second": 4.218, |
|
"step": 700 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 28.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.42384228110313416, |
|
"eval_runtime": 3.0835, |
|
"eval_samples_per_second": 32.431, |
|
"eval_steps_per_second": 4.216, |
|
"step": 725 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 29.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.47883549332618713, |
|
"eval_runtime": 3.086, |
|
"eval_samples_per_second": 32.405, |
|
"eval_steps_per_second": 4.213, |
|
"step": 750 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 30.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.42142295837402344, |
|
"eval_runtime": 3.0841, |
|
"eval_samples_per_second": 32.424, |
|
"eval_steps_per_second": 4.215, |
|
"step": 775 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 31.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.42832931876182556, |
|
"eval_runtime": 3.0875, |
|
"eval_samples_per_second": 32.388, |
|
"eval_steps_per_second": 4.211, |
|
"step": 800 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 32.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.4221520721912384, |
|
"eval_runtime": 3.0864, |
|
"eval_samples_per_second": 32.4, |
|
"eval_steps_per_second": 4.212, |
|
"step": 825 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 33.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.4233008623123169, |
|
"eval_runtime": 3.086, |
|
"eval_samples_per_second": 32.404, |
|
"eval_steps_per_second": 4.213, |
|
"step": 850 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 34.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.4400712549686432, |
|
"eval_runtime": 3.0875, |
|
"eval_samples_per_second": 32.388, |
|
"eval_steps_per_second": 4.21, |
|
"step": 875 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 35.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.4583715796470642, |
|
"eval_runtime": 3.0883, |
|
"eval_samples_per_second": 32.38, |
|
"eval_steps_per_second": 4.209, |
|
"step": 900 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 36.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.43615418672561646, |
|
"eval_runtime": 3.0876, |
|
"eval_samples_per_second": 32.388, |
|
"eval_steps_per_second": 4.21, |
|
"step": 925 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 37.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.3988666534423828, |
|
"eval_runtime": 3.0861, |
|
"eval_samples_per_second": 32.403, |
|
"eval_steps_per_second": 4.212, |
|
"step": 950 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 38.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.4379017651081085, |
|
"eval_runtime": 3.0871, |
|
"eval_samples_per_second": 32.393, |
|
"eval_steps_per_second": 4.211, |
|
"step": 975 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 39.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.005, |
|
"loss": 0.5234, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.40936079621315, |
|
"eval_runtime": 3.0882, |
|
"eval_samples_per_second": 32.382, |
|
"eval_steps_per_second": 4.21, |
|
"step": 1000 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 40.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.4683462381362915, |
|
"eval_runtime": 3.0846, |
|
"eval_samples_per_second": 32.419, |
|
"eval_steps_per_second": 4.214, |
|
"step": 1025 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 41.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.43595993518829346, |
|
"eval_runtime": 3.0964, |
|
"eval_samples_per_second": 32.296, |
|
"eval_steps_per_second": 4.198, |
|
"step": 1050 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 42.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.43824779987335205, |
|
"eval_runtime": 3.0945, |
|
"eval_samples_per_second": 32.315, |
|
"eval_steps_per_second": 4.201, |
|
"step": 1075 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 43.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.4057212769985199, |
|
"eval_runtime": 3.0836, |
|
"eval_samples_per_second": 32.43, |
|
"eval_steps_per_second": 4.216, |
|
"step": 1100 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 44.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.42997491359710693, |
|
"eval_runtime": 3.0845, |
|
"eval_samples_per_second": 32.42, |
|
"eval_steps_per_second": 4.215, |
|
"step": 1125 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 45.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.42525675892829895, |
|
"eval_runtime": 3.0823, |
|
"eval_samples_per_second": 32.443, |
|
"eval_steps_per_second": 4.218, |
|
"step": 1150 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 46.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.43460795283317566, |
|
"eval_runtime": 3.0847, |
|
"eval_samples_per_second": 32.418, |
|
"eval_steps_per_second": 4.214, |
|
"step": 1175 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 47.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.4166887402534485, |
|
"eval_runtime": 3.0845, |
|
"eval_samples_per_second": 32.42, |
|
"eval_steps_per_second": 4.215, |
|
"step": 1200 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 48.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.4572359025478363, |
|
"eval_runtime": 3.0805, |
|
"eval_samples_per_second": 32.462, |
|
"eval_steps_per_second": 4.22, |
|
"step": 1225 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 49.0, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.4412834644317627, |
|
"eval_runtime": 3.081, |
|
"eval_samples_per_second": 32.457, |
|
"eval_steps_per_second": 4.219, |
|
"step": 1250 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 50.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.4160061776638031, |
|
"eval_runtime": 3.0802, |
|
"eval_samples_per_second": 32.465, |
|
"eval_steps_per_second": 4.22, |
|
"step": 1275 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 51.0, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.40441954135894775, |
|
"eval_runtime": 3.0786, |
|
"eval_samples_per_second": 32.482, |
|
"eval_steps_per_second": 4.223, |
|
"step": 1300 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 52.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.42456939816474915, |
|
"eval_runtime": 3.0814, |
|
"eval_samples_per_second": 32.453, |
|
"eval_steps_per_second": 4.219, |
|
"step": 1325 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 53.0, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.4075394570827484, |
|
"eval_runtime": 3.0794, |
|
"eval_samples_per_second": 32.474, |
|
"eval_steps_per_second": 4.222, |
|
"step": 1350 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 54.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.4202047288417816, |
|
"eval_runtime": 3.0797, |
|
"eval_samples_per_second": 32.471, |
|
"eval_steps_per_second": 4.221, |
|
"step": 1375 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 55.0, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.4381832480430603, |
|
"eval_runtime": 3.0788, |
|
"eval_samples_per_second": 32.481, |
|
"eval_steps_per_second": 4.222, |
|
"step": 1400 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 56.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.4282267391681671, |
|
"eval_runtime": 3.078, |
|
"eval_samples_per_second": 32.489, |
|
"eval_steps_per_second": 4.224, |
|
"step": 1425 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 57.0, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.41453811526298523, |
|
"eval_runtime": 3.0841, |
|
"eval_samples_per_second": 32.424, |
|
"eval_steps_per_second": 4.215, |
|
"step": 1450 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 58.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.42016276717185974, |
|
"eval_runtime": 3.0831, |
|
"eval_samples_per_second": 32.435, |
|
"eval_steps_per_second": 4.217, |
|
"step": 1475 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 59.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.0025, |
|
"loss": 0.4334, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.42332953214645386, |
|
"eval_runtime": 3.0858, |
|
"eval_samples_per_second": 32.406, |
|
"eval_steps_per_second": 4.213, |
|
"step": 1500 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 60.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.42850321531295776, |
|
"eval_runtime": 3.0831, |
|
"eval_samples_per_second": 32.435, |
|
"eval_steps_per_second": 4.217, |
|
"step": 1525 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 61.0, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.42719802260398865, |
|
"eval_runtime": 3.0793, |
|
"eval_samples_per_second": 32.475, |
|
"eval_steps_per_second": 4.222, |
|
"step": 1550 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 62.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.4232694208621979, |
|
"eval_runtime": 3.0794, |
|
"eval_samples_per_second": 32.474, |
|
"eval_steps_per_second": 4.222, |
|
"step": 1575 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 63.0, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.4338792860507965, |
|
"eval_runtime": 3.0814, |
|
"eval_samples_per_second": 32.453, |
|
"eval_steps_per_second": 4.219, |
|
"step": 1600 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 64.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.41708603501319885, |
|
"eval_runtime": 3.0817, |
|
"eval_samples_per_second": 32.449, |
|
"eval_steps_per_second": 4.218, |
|
"step": 1625 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 65.0, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.40945830941200256, |
|
"eval_runtime": 3.0809, |
|
"eval_samples_per_second": 32.458, |
|
"eval_steps_per_second": 4.219, |
|
"step": 1650 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 66.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.4198063910007477, |
|
"eval_runtime": 3.0829, |
|
"eval_samples_per_second": 32.436, |
|
"eval_steps_per_second": 4.217, |
|
"step": 1675 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 67.0, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.41699719429016113, |
|
"eval_runtime": 3.0779, |
|
"eval_samples_per_second": 32.49, |
|
"eval_steps_per_second": 4.224, |
|
"step": 1700 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 68.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.42641326785087585, |
|
"eval_runtime": 3.086, |
|
"eval_samples_per_second": 32.405, |
|
"eval_steps_per_second": 4.213, |
|
"step": 1725 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 69.0, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.43629851937294006, |
|
"eval_runtime": 3.0831, |
|
"eval_samples_per_second": 32.435, |
|
"eval_steps_per_second": 4.217, |
|
"step": 1750 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 70.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.42060795426368713, |
|
"eval_runtime": 3.0701, |
|
"eval_samples_per_second": 32.572, |
|
"eval_steps_per_second": 4.234, |
|
"step": 1775 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 71.0, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.4196779727935791, |
|
"eval_runtime": 3.0659, |
|
"eval_samples_per_second": 32.617, |
|
"eval_steps_per_second": 4.24, |
|
"step": 1800 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 72.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.43019339442253113, |
|
"eval_runtime": 3.067, |
|
"eval_samples_per_second": 32.605, |
|
"eval_steps_per_second": 4.239, |
|
"step": 1825 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 73.0, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.4256959855556488, |
|
"eval_runtime": 3.0648, |
|
"eval_samples_per_second": 32.629, |
|
"eval_steps_per_second": 4.242, |
|
"step": 1850 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 74.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.41873273253440857, |
|
"eval_runtime": 3.0621, |
|
"eval_samples_per_second": 32.657, |
|
"eval_steps_per_second": 4.245, |
|
"step": 1875 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 75.0, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.42521288990974426, |
|
"eval_runtime": 3.0634, |
|
"eval_samples_per_second": 32.644, |
|
"eval_steps_per_second": 4.244, |
|
"step": 1900 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 76.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.4271906316280365, |
|
"eval_runtime": 3.0628, |
|
"eval_samples_per_second": 32.65, |
|
"eval_steps_per_second": 4.245, |
|
"step": 1925 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 77.0, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.4203490912914276, |
|
"eval_runtime": 3.0621, |
|
"eval_samples_per_second": 32.658, |
|
"eval_steps_per_second": 4.246, |
|
"step": 1950 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 78.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.41599762439727783, |
|
"eval_runtime": 3.0609, |
|
"eval_samples_per_second": 32.67, |
|
"eval_steps_per_second": 4.247, |
|
"step": 1975 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 79.0, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.4063, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.4164546728134155, |
|
"eval_runtime": 3.0632, |
|
"eval_samples_per_second": 32.646, |
|
"eval_steps_per_second": 4.244, |
|
"step": 2000 |
|
}, |
|
{ |
|
"best_epoch": 39, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 80.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"step": 2000, |
|
"total_flos": 2.9821702864896e+16, |
|
"train_loss": 0.5013418884277344, |
|
"train_runtime": 1645.3455, |
|
"train_samples_per_second": 19.449, |
|
"train_steps_per_second": 1.216 |
|
} |
|
], |
|
"max_steps": 2000, |
|
"num_train_epochs": 80, |
|
"total_flos": 2.9821702864896e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|