|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 80.0, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4, |
|
"eval_loss": 0.8357499837875366, |
|
"eval_runtime": 2.905, |
|
"eval_samples_per_second": 34.423, |
|
"eval_steps_per_second": 4.475, |
|
"step": 25 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.4, |
|
"epoch": 1.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.6364213228225708, |
|
"eval_runtime": 2.9225, |
|
"eval_samples_per_second": 34.218, |
|
"eval_steps_per_second": 4.448, |
|
"step": 50 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.62, |
|
"epoch": 2.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.7512836456298828, |
|
"eval_runtime": 2.9503, |
|
"eval_samples_per_second": 33.895, |
|
"eval_steps_per_second": 4.406, |
|
"step": 75 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.62, |
|
"epoch": 3.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.5949835181236267, |
|
"eval_runtime": 2.9767, |
|
"eval_samples_per_second": 33.594, |
|
"eval_steps_per_second": 4.367, |
|
"step": 100 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.62, |
|
"epoch": 4.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.49, |
|
"eval_loss": 0.6111248135566711, |
|
"eval_runtime": 3.0094, |
|
"eval_samples_per_second": 33.23, |
|
"eval_steps_per_second": 4.32, |
|
"step": 125 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.62, |
|
"epoch": 5.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.7314449548721313, |
|
"eval_runtime": 3.0154, |
|
"eval_samples_per_second": 33.163, |
|
"eval_steps_per_second": 4.311, |
|
"step": 150 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.62, |
|
"epoch": 6.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.6188446283340454, |
|
"eval_runtime": 3.0184, |
|
"eval_samples_per_second": 33.13, |
|
"eval_steps_per_second": 4.307, |
|
"step": 175 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 7.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 1.202831745147705, |
|
"eval_runtime": 3.0198, |
|
"eval_samples_per_second": 33.114, |
|
"eval_steps_per_second": 4.305, |
|
"step": 200 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 8.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.630307674407959, |
|
"eval_runtime": 3.0221, |
|
"eval_samples_per_second": 33.09, |
|
"eval_steps_per_second": 4.302, |
|
"step": 225 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 9.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.8704754114151001, |
|
"eval_runtime": 3.0231, |
|
"eval_samples_per_second": 33.078, |
|
"eval_steps_per_second": 4.3, |
|
"step": 250 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 10.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.5481266379356384, |
|
"eval_runtime": 3.0256, |
|
"eval_samples_per_second": 33.052, |
|
"eval_steps_per_second": 4.297, |
|
"step": 275 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 11.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8700436949729919, |
|
"eval_runtime": 3.0239, |
|
"eval_samples_per_second": 33.07, |
|
"eval_steps_per_second": 4.299, |
|
"step": 300 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 12.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.7616304159164429, |
|
"eval_runtime": 3.027, |
|
"eval_samples_per_second": 33.035, |
|
"eval_steps_per_second": 4.295, |
|
"step": 325 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 13.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.7384722232818604, |
|
"eval_runtime": 3.0286, |
|
"eval_samples_per_second": 33.019, |
|
"eval_steps_per_second": 4.292, |
|
"step": 350 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 14.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 0.8501294851303101, |
|
"eval_runtime": 3.0347, |
|
"eval_samples_per_second": 32.952, |
|
"eval_steps_per_second": 4.284, |
|
"step": 375 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 15.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.49, |
|
"eval_loss": 0.695446252822876, |
|
"eval_runtime": 3.0254, |
|
"eval_samples_per_second": 33.053, |
|
"eval_steps_per_second": 4.297, |
|
"step": 400 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 16.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 0.6255203485488892, |
|
"eval_runtime": 3.021, |
|
"eval_samples_per_second": 33.102, |
|
"eval_steps_per_second": 4.303, |
|
"step": 425 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 17.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.38, |
|
"eval_loss": 0.6264253854751587, |
|
"eval_runtime": 3.0191, |
|
"eval_samples_per_second": 33.123, |
|
"eval_steps_per_second": 4.306, |
|
"step": 450 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 18.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.42, |
|
"eval_loss": 0.6275391578674316, |
|
"eval_runtime": 3.0206, |
|
"eval_samples_per_second": 33.106, |
|
"eval_steps_per_second": 4.304, |
|
"step": 475 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 19.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.037500000000000006, |
|
"loss": 1.5048, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.6259335279464722, |
|
"eval_runtime": 3.0208, |
|
"eval_samples_per_second": 33.103, |
|
"eval_steps_per_second": 4.303, |
|
"step": 500 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 20.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.42, |
|
"eval_loss": 0.6269640326499939, |
|
"eval_runtime": 3.0178, |
|
"eval_samples_per_second": 33.137, |
|
"eval_steps_per_second": 4.308, |
|
"step": 525 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 21.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.42, |
|
"eval_loss": 0.6274638772010803, |
|
"eval_runtime": 3.0203, |
|
"eval_samples_per_second": 33.11, |
|
"eval_steps_per_second": 4.304, |
|
"step": 550 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 22.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.6249264478683472, |
|
"eval_runtime": 3.0198, |
|
"eval_samples_per_second": 33.115, |
|
"eval_steps_per_second": 4.305, |
|
"step": 575 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 23.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.4, |
|
"eval_loss": 0.6269152164459229, |
|
"eval_runtime": 3.0189, |
|
"eval_samples_per_second": 33.124, |
|
"eval_steps_per_second": 4.306, |
|
"step": 600 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 24.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.6253538727760315, |
|
"eval_runtime": 3.0186, |
|
"eval_samples_per_second": 33.128, |
|
"eval_steps_per_second": 4.307, |
|
"step": 625 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 25.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.45, |
|
"eval_loss": 0.6265491247177124, |
|
"eval_runtime": 3.0189, |
|
"eval_samples_per_second": 33.124, |
|
"eval_steps_per_second": 4.306, |
|
"step": 650 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 26.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.6261975169181824, |
|
"eval_runtime": 3.0184, |
|
"eval_samples_per_second": 33.13, |
|
"eval_steps_per_second": 4.307, |
|
"step": 675 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 27.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.54, |
|
"eval_loss": 0.6247080564498901, |
|
"eval_runtime": 3.0202, |
|
"eval_samples_per_second": 33.11, |
|
"eval_steps_per_second": 4.304, |
|
"step": 700 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 28.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.6241124272346497, |
|
"eval_runtime": 3.0185, |
|
"eval_samples_per_second": 33.129, |
|
"eval_steps_per_second": 4.307, |
|
"step": 725 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 29.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.56, |
|
"eval_loss": 0.6246690154075623, |
|
"eval_runtime": 3.0184, |
|
"eval_samples_per_second": 33.13, |
|
"eval_steps_per_second": 4.307, |
|
"step": 750 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 30.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 0.6262466907501221, |
|
"eval_runtime": 3.0193, |
|
"eval_samples_per_second": 33.121, |
|
"eval_steps_per_second": 4.306, |
|
"step": 775 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 31.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.6260530352592468, |
|
"eval_runtime": 3.0186, |
|
"eval_samples_per_second": 33.128, |
|
"eval_steps_per_second": 4.307, |
|
"step": 800 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 32.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 0.6261406540870667, |
|
"eval_runtime": 3.0185, |
|
"eval_samples_per_second": 33.129, |
|
"eval_steps_per_second": 4.307, |
|
"step": 825 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 33.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.44, |
|
"eval_loss": 0.6263586282730103, |
|
"eval_runtime": 3.0169, |
|
"eval_samples_per_second": 33.147, |
|
"eval_steps_per_second": 4.309, |
|
"step": 850 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 34.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.43, |
|
"eval_loss": 0.6266360282897949, |
|
"eval_runtime": 3.0196, |
|
"eval_samples_per_second": 33.117, |
|
"eval_steps_per_second": 4.305, |
|
"step": 875 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 35.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.44, |
|
"eval_loss": 0.626537561416626, |
|
"eval_runtime": 3.0175, |
|
"eval_samples_per_second": 33.141, |
|
"eval_steps_per_second": 4.308, |
|
"step": 900 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 36.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.47, |
|
"eval_loss": 0.6262317895889282, |
|
"eval_runtime": 3.0187, |
|
"eval_samples_per_second": 33.127, |
|
"eval_steps_per_second": 4.306, |
|
"step": 925 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 37.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.48, |
|
"eval_loss": 0.6264010667800903, |
|
"eval_runtime": 3.0289, |
|
"eval_samples_per_second": 33.016, |
|
"eval_steps_per_second": 4.292, |
|
"step": 950 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 38.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.43, |
|
"eval_loss": 0.6263816356658936, |
|
"eval_runtime": 3.0275, |
|
"eval_samples_per_second": 33.031, |
|
"eval_steps_per_second": 4.294, |
|
"step": 975 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 39.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.025, |
|
"loss": 1.2203, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.6261613965034485, |
|
"eval_runtime": 3.0186, |
|
"eval_samples_per_second": 33.128, |
|
"eval_steps_per_second": 4.307, |
|
"step": 1000 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 40.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.53, |
|
"eval_loss": 0.6262817978858948, |
|
"eval_runtime": 3.0154, |
|
"eval_samples_per_second": 33.163, |
|
"eval_steps_per_second": 4.311, |
|
"step": 1025 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 41.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.6261902451515198, |
|
"eval_runtime": 3.0169, |
|
"eval_samples_per_second": 33.147, |
|
"eval_steps_per_second": 4.309, |
|
"step": 1050 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 42.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.38, |
|
"eval_loss": 0.6264880895614624, |
|
"eval_runtime": 3.0169, |
|
"eval_samples_per_second": 33.147, |
|
"eval_steps_per_second": 4.309, |
|
"step": 1075 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 43.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.6262249946594238, |
|
"eval_runtime": 3.0162, |
|
"eval_samples_per_second": 33.154, |
|
"eval_steps_per_second": 4.31, |
|
"step": 1100 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 44.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6261861324310303, |
|
"eval_runtime": 3.0154, |
|
"eval_samples_per_second": 33.163, |
|
"eval_steps_per_second": 4.311, |
|
"step": 1125 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 45.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 0.6262775659561157, |
|
"eval_runtime": 3.0142, |
|
"eval_samples_per_second": 33.177, |
|
"eval_steps_per_second": 4.313, |
|
"step": 1150 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 46.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.6262478828430176, |
|
"eval_runtime": 3.014, |
|
"eval_samples_per_second": 33.178, |
|
"eval_steps_per_second": 4.313, |
|
"step": 1175 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 47.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 0.6262679100036621, |
|
"eval_runtime": 3.0142, |
|
"eval_samples_per_second": 33.176, |
|
"eval_steps_per_second": 4.313, |
|
"step": 1200 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 48.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.39, |
|
"eval_loss": 0.6264752149581909, |
|
"eval_runtime": 3.014, |
|
"eval_samples_per_second": 33.178, |
|
"eval_steps_per_second": 4.313, |
|
"step": 1225 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 49.0, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.6261715292930603, |
|
"eval_runtime": 3.0114, |
|
"eval_samples_per_second": 33.207, |
|
"eval_steps_per_second": 4.317, |
|
"step": 1250 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 50.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.51, |
|
"eval_loss": 0.626240611076355, |
|
"eval_runtime": 3.0123, |
|
"eval_samples_per_second": 33.197, |
|
"eval_steps_per_second": 4.316, |
|
"step": 1275 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 51.0, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.6261351108551025, |
|
"eval_runtime": 3.0136, |
|
"eval_samples_per_second": 33.183, |
|
"eval_steps_per_second": 4.314, |
|
"step": 1300 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 52.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.6261637210845947, |
|
"eval_runtime": 3.0144, |
|
"eval_samples_per_second": 33.174, |
|
"eval_steps_per_second": 4.313, |
|
"step": 1325 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 53.0, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.6261104345321655, |
|
"eval_runtime": 3.014, |
|
"eval_samples_per_second": 33.179, |
|
"eval_steps_per_second": 4.313, |
|
"step": 1350 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 54.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.6260131597518921, |
|
"eval_runtime": 3.0129, |
|
"eval_samples_per_second": 33.19, |
|
"eval_steps_per_second": 4.315, |
|
"step": 1375 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 55.0, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6261314749717712, |
|
"eval_runtime": 3.0129, |
|
"eval_samples_per_second": 33.191, |
|
"eval_steps_per_second": 4.315, |
|
"step": 1400 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 56.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.41, |
|
"eval_loss": 0.6263273358345032, |
|
"eval_runtime": 3.0142, |
|
"eval_samples_per_second": 33.176, |
|
"eval_steps_per_second": 4.313, |
|
"step": 1425 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 57.0, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.41, |
|
"eval_loss": 0.6263743042945862, |
|
"eval_runtime": 3.0121, |
|
"eval_samples_per_second": 33.199, |
|
"eval_steps_per_second": 4.316, |
|
"step": 1450 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 58.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.45, |
|
"eval_loss": 0.6262876391410828, |
|
"eval_runtime": 3.0151, |
|
"eval_samples_per_second": 33.167, |
|
"eval_steps_per_second": 4.312, |
|
"step": 1475 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 59.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.0125, |
|
"loss": 0.9516, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.54, |
|
"eval_loss": 0.6262710690498352, |
|
"eval_runtime": 3.0132, |
|
"eval_samples_per_second": 33.187, |
|
"eval_steps_per_second": 4.314, |
|
"step": 1500 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 60.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.47, |
|
"eval_loss": 0.6263102889060974, |
|
"eval_runtime": 3.0116, |
|
"eval_samples_per_second": 33.205, |
|
"eval_steps_per_second": 4.317, |
|
"step": 1525 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 61.0, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.6261453628540039, |
|
"eval_runtime": 3.0117, |
|
"eval_samples_per_second": 33.203, |
|
"eval_steps_per_second": 4.316, |
|
"step": 1550 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 62.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.6262593865394592, |
|
"eval_runtime": 3.0153, |
|
"eval_samples_per_second": 33.164, |
|
"eval_steps_per_second": 4.311, |
|
"step": 1575 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 63.0, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.6261275410652161, |
|
"eval_runtime": 3.019, |
|
"eval_samples_per_second": 33.123, |
|
"eval_steps_per_second": 4.306, |
|
"step": 1600 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 64.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 0.6263031959533691, |
|
"eval_runtime": 3.0189, |
|
"eval_samples_per_second": 33.124, |
|
"eval_steps_per_second": 4.306, |
|
"step": 1625 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 65.0, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.39, |
|
"eval_loss": 0.6264517307281494, |
|
"eval_runtime": 3.0111, |
|
"eval_samples_per_second": 33.21, |
|
"eval_steps_per_second": 4.317, |
|
"step": 1650 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 66.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.6262269020080566, |
|
"eval_runtime": 3.0148, |
|
"eval_samples_per_second": 33.17, |
|
"eval_steps_per_second": 4.312, |
|
"step": 1675 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 67.0, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.38, |
|
"eval_loss": 0.6263662576675415, |
|
"eval_runtime": 3.0138, |
|
"eval_samples_per_second": 33.18, |
|
"eval_steps_per_second": 4.313, |
|
"step": 1700 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 68.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.6261860132217407, |
|
"eval_runtime": 3.0135, |
|
"eval_samples_per_second": 33.184, |
|
"eval_steps_per_second": 4.314, |
|
"step": 1725 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 69.0, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.51, |
|
"eval_loss": 0.6262713074684143, |
|
"eval_runtime": 3.013, |
|
"eval_samples_per_second": 33.189, |
|
"eval_steps_per_second": 4.315, |
|
"step": 1750 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 70.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.6261432766914368, |
|
"eval_runtime": 3.0111, |
|
"eval_samples_per_second": 33.21, |
|
"eval_steps_per_second": 4.317, |
|
"step": 1775 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 71.0, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.4, |
|
"eval_loss": 0.6263407468795776, |
|
"eval_runtime": 3.0159, |
|
"eval_samples_per_second": 33.158, |
|
"eval_steps_per_second": 4.311, |
|
"step": 1800 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 72.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.6262317895889282, |
|
"eval_runtime": 3.0131, |
|
"eval_samples_per_second": 33.189, |
|
"eval_steps_per_second": 4.315, |
|
"step": 1825 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 73.0, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.48, |
|
"eval_loss": 0.6263194680213928, |
|
"eval_runtime": 3.0137, |
|
"eval_samples_per_second": 33.182, |
|
"eval_steps_per_second": 4.314, |
|
"step": 1850 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 74.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.6262105703353882, |
|
"eval_runtime": 3.0116, |
|
"eval_samples_per_second": 33.204, |
|
"eval_steps_per_second": 4.317, |
|
"step": 1875 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 75.0, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.44, |
|
"eval_loss": 0.6263117790222168, |
|
"eval_runtime": 3.0104, |
|
"eval_samples_per_second": 33.218, |
|
"eval_steps_per_second": 4.318, |
|
"step": 1900 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 76.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.43, |
|
"eval_loss": 0.6263181567192078, |
|
"eval_runtime": 3.0116, |
|
"eval_samples_per_second": 33.205, |
|
"eval_steps_per_second": 4.317, |
|
"step": 1925 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 77.0, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.45, |
|
"eval_loss": 0.6262911558151245, |
|
"eval_runtime": 3.0137, |
|
"eval_samples_per_second": 33.182, |
|
"eval_steps_per_second": 4.314, |
|
"step": 1950 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 78.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.42, |
|
"eval_loss": 0.6263221502304077, |
|
"eval_runtime": 3.0254, |
|
"eval_samples_per_second": 33.053, |
|
"eval_steps_per_second": 4.297, |
|
"step": 1975 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 79.0, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.7734, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.44, |
|
"eval_loss": 0.6263124346733093, |
|
"eval_runtime": 3.0236, |
|
"eval_samples_per_second": 33.074, |
|
"eval_steps_per_second": 4.3, |
|
"step": 2000 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 80.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"step": 2000, |
|
"total_flos": 2.9821702864896e+16, |
|
"train_loss": 1.112525405883789, |
|
"train_runtime": 1617.2068, |
|
"train_samples_per_second": 19.787, |
|
"train_steps_per_second": 1.237 |
|
} |
|
], |
|
"max_steps": 2000, |
|
"num_train_epochs": 80, |
|
"total_flos": 2.9821702864896e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|