|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 80.0, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.6010557413101196, |
|
"eval_runtime": 2.8952, |
|
"eval_samples_per_second": 34.539, |
|
"eval_steps_per_second": 4.49, |
|
"step": 25 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 1.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5991448163986206, |
|
"eval_runtime": 2.9197, |
|
"eval_samples_per_second": 34.25, |
|
"eval_steps_per_second": 4.453, |
|
"step": 50 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 2.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5983209609985352, |
|
"eval_runtime": 2.9581, |
|
"eval_samples_per_second": 33.806, |
|
"eval_steps_per_second": 4.395, |
|
"step": 75 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 3.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.6062744855880737, |
|
"eval_runtime": 2.9913, |
|
"eval_samples_per_second": 33.43, |
|
"eval_steps_per_second": 4.346, |
|
"step": 100 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 4.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5972874164581299, |
|
"eval_runtime": 3.0135, |
|
"eval_samples_per_second": 33.184, |
|
"eval_steps_per_second": 4.314, |
|
"step": 125 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 5.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.6048508286476135, |
|
"eval_runtime": 3.0172, |
|
"eval_samples_per_second": 33.143, |
|
"eval_steps_per_second": 4.309, |
|
"step": 150 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 6.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.6030882596969604, |
|
"eval_runtime": 3.0217, |
|
"eval_samples_per_second": 33.094, |
|
"eval_steps_per_second": 4.302, |
|
"step": 175 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 7.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.6000898480415344, |
|
"eval_runtime": 3.0239, |
|
"eval_samples_per_second": 33.07, |
|
"eval_steps_per_second": 4.299, |
|
"step": 200 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 8.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5968500971794128, |
|
"eval_runtime": 3.0268, |
|
"eval_samples_per_second": 33.038, |
|
"eval_steps_per_second": 4.295, |
|
"step": 225 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 9.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.6007280945777893, |
|
"eval_runtime": 3.0282, |
|
"eval_samples_per_second": 33.023, |
|
"eval_steps_per_second": 4.293, |
|
"step": 250 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 10.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.6015586853027344, |
|
"eval_runtime": 3.027, |
|
"eval_samples_per_second": 33.036, |
|
"eval_steps_per_second": 4.295, |
|
"step": 275 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 11.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5992419123649597, |
|
"eval_runtime": 3.0339, |
|
"eval_samples_per_second": 32.961, |
|
"eval_steps_per_second": 4.285, |
|
"step": 300 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 12.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5967671275138855, |
|
"eval_runtime": 3.0311, |
|
"eval_samples_per_second": 32.992, |
|
"eval_steps_per_second": 4.289, |
|
"step": 325 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 13.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.596794605255127, |
|
"eval_runtime": 3.0286, |
|
"eval_samples_per_second": 33.018, |
|
"eval_steps_per_second": 4.292, |
|
"step": 350 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 14.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.600039541721344, |
|
"eval_runtime": 3.0308, |
|
"eval_samples_per_second": 32.995, |
|
"eval_steps_per_second": 4.289, |
|
"step": 375 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 15.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.6000267267227173, |
|
"eval_runtime": 3.0298, |
|
"eval_samples_per_second": 33.006, |
|
"eval_steps_per_second": 4.291, |
|
"step": 400 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 16.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5882958769798279, |
|
"eval_runtime": 3.029, |
|
"eval_samples_per_second": 33.015, |
|
"eval_steps_per_second": 4.292, |
|
"step": 425 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 17.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5920267701148987, |
|
"eval_runtime": 3.0314, |
|
"eval_samples_per_second": 32.988, |
|
"eval_steps_per_second": 4.288, |
|
"step": 450 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 18.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.6034604907035828, |
|
"eval_runtime": 3.0306, |
|
"eval_samples_per_second": 32.997, |
|
"eval_steps_per_second": 4.29, |
|
"step": 475 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 19.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.00075, |
|
"loss": 0.6519, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6074941158294678, |
|
"eval_runtime": 3.0321, |
|
"eval_samples_per_second": 32.981, |
|
"eval_steps_per_second": 4.287, |
|
"step": 500 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 20.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5919292569160461, |
|
"eval_runtime": 3.03, |
|
"eval_samples_per_second": 33.003, |
|
"eval_steps_per_second": 4.29, |
|
"step": 525 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 21.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.5951123237609863, |
|
"eval_runtime": 3.0298, |
|
"eval_samples_per_second": 33.006, |
|
"eval_steps_per_second": 4.291, |
|
"step": 550 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 22.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.603710412979126, |
|
"eval_runtime": 3.0294, |
|
"eval_samples_per_second": 33.009, |
|
"eval_steps_per_second": 4.291, |
|
"step": 575 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 23.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.6057878732681274, |
|
"eval_runtime": 3.0274, |
|
"eval_samples_per_second": 33.032, |
|
"eval_steps_per_second": 4.294, |
|
"step": 600 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 24.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5943801999092102, |
|
"eval_runtime": 3.0273, |
|
"eval_samples_per_second": 33.033, |
|
"eval_steps_per_second": 4.294, |
|
"step": 625 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 25.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5938175320625305, |
|
"eval_runtime": 3.0318, |
|
"eval_samples_per_second": 32.983, |
|
"eval_steps_per_second": 4.288, |
|
"step": 650 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 26.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5908843278884888, |
|
"eval_runtime": 3.0311, |
|
"eval_samples_per_second": 32.991, |
|
"eval_steps_per_second": 4.289, |
|
"step": 675 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 27.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5914073586463928, |
|
"eval_runtime": 3.0408, |
|
"eval_samples_per_second": 32.886, |
|
"eval_steps_per_second": 4.275, |
|
"step": 700 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 28.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5901620388031006, |
|
"eval_runtime": 3.0355, |
|
"eval_samples_per_second": 32.943, |
|
"eval_steps_per_second": 4.283, |
|
"step": 725 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 29.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5905589461326599, |
|
"eval_runtime": 3.0236, |
|
"eval_samples_per_second": 33.073, |
|
"eval_steps_per_second": 4.3, |
|
"step": 750 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 30.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5935985445976257, |
|
"eval_runtime": 3.023, |
|
"eval_samples_per_second": 33.08, |
|
"eval_steps_per_second": 4.3, |
|
"step": 775 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 31.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5959981679916382, |
|
"eval_runtime": 3.0241, |
|
"eval_samples_per_second": 33.068, |
|
"eval_steps_per_second": 4.299, |
|
"step": 800 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 32.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.595251739025116, |
|
"eval_runtime": 3.0223, |
|
"eval_samples_per_second": 33.088, |
|
"eval_steps_per_second": 4.301, |
|
"step": 825 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 33.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.597002387046814, |
|
"eval_runtime": 3.0261, |
|
"eval_samples_per_second": 33.046, |
|
"eval_steps_per_second": 4.296, |
|
"step": 850 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 34.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.593684732913971, |
|
"eval_runtime": 3.0258, |
|
"eval_samples_per_second": 33.049, |
|
"eval_steps_per_second": 4.296, |
|
"step": 875 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 35.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5954169034957886, |
|
"eval_runtime": 3.0248, |
|
"eval_samples_per_second": 33.06, |
|
"eval_steps_per_second": 4.298, |
|
"step": 900 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 36.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.5992526412010193, |
|
"eval_runtime": 3.0254, |
|
"eval_samples_per_second": 33.053, |
|
"eval_steps_per_second": 4.297, |
|
"step": 925 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 37.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5905333161354065, |
|
"eval_runtime": 3.0228, |
|
"eval_samples_per_second": 33.082, |
|
"eval_steps_per_second": 4.301, |
|
"step": 950 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 38.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5898448824882507, |
|
"eval_runtime": 3.0254, |
|
"eval_samples_per_second": 33.053, |
|
"eval_steps_per_second": 4.297, |
|
"step": 975 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 39.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.0005, |
|
"loss": 0.6395, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5947265028953552, |
|
"eval_runtime": 3.0242, |
|
"eval_samples_per_second": 33.067, |
|
"eval_steps_per_second": 4.299, |
|
"step": 1000 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 40.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5965713262557983, |
|
"eval_runtime": 3.0253, |
|
"eval_samples_per_second": 33.054, |
|
"eval_steps_per_second": 4.297, |
|
"step": 1025 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 41.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.595311164855957, |
|
"eval_runtime": 3.0266, |
|
"eval_samples_per_second": 33.04, |
|
"eval_steps_per_second": 4.295, |
|
"step": 1050 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 42.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.59684818983078, |
|
"eval_runtime": 3.0241, |
|
"eval_samples_per_second": 33.067, |
|
"eval_steps_per_second": 4.299, |
|
"step": 1075 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 43.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5934141874313354, |
|
"eval_runtime": 3.0236, |
|
"eval_samples_per_second": 33.074, |
|
"eval_steps_per_second": 4.3, |
|
"step": 1100 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 44.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.59480881690979, |
|
"eval_runtime": 3.0258, |
|
"eval_samples_per_second": 33.049, |
|
"eval_steps_per_second": 4.296, |
|
"step": 1125 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 45.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5958330035209656, |
|
"eval_runtime": 3.0235, |
|
"eval_samples_per_second": 33.074, |
|
"eval_steps_per_second": 4.3, |
|
"step": 1150 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 46.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5928426384925842, |
|
"eval_runtime": 3.0224, |
|
"eval_samples_per_second": 33.086, |
|
"eval_steps_per_second": 4.301, |
|
"step": 1175 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 47.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5922430157661438, |
|
"eval_runtime": 3.0243, |
|
"eval_samples_per_second": 33.065, |
|
"eval_steps_per_second": 4.298, |
|
"step": 1200 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 48.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5928506255149841, |
|
"eval_runtime": 3.0246, |
|
"eval_samples_per_second": 33.063, |
|
"eval_steps_per_second": 4.298, |
|
"step": 1225 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 49.0, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5967039465904236, |
|
"eval_runtime": 3.0208, |
|
"eval_samples_per_second": 33.103, |
|
"eval_steps_per_second": 4.303, |
|
"step": 1250 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 50.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5907678008079529, |
|
"eval_runtime": 3.0228, |
|
"eval_samples_per_second": 33.082, |
|
"eval_steps_per_second": 4.301, |
|
"step": 1275 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 51.0, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5929586887359619, |
|
"eval_runtime": 3.0225, |
|
"eval_samples_per_second": 33.085, |
|
"eval_steps_per_second": 4.301, |
|
"step": 1300 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 52.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.590965986251831, |
|
"eval_runtime": 3.0223, |
|
"eval_samples_per_second": 33.088, |
|
"eval_steps_per_second": 4.301, |
|
"step": 1325 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 53.0, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5930989980697632, |
|
"eval_runtime": 3.0227, |
|
"eval_samples_per_second": 33.083, |
|
"eval_steps_per_second": 4.301, |
|
"step": 1350 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 54.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5900382399559021, |
|
"eval_runtime": 3.022, |
|
"eval_samples_per_second": 33.091, |
|
"eval_steps_per_second": 4.302, |
|
"step": 1375 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 55.0, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5924661159515381, |
|
"eval_runtime": 3.0221, |
|
"eval_samples_per_second": 33.09, |
|
"eval_steps_per_second": 4.302, |
|
"step": 1400 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 56.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5937650203704834, |
|
"eval_runtime": 3.0236, |
|
"eval_samples_per_second": 33.073, |
|
"eval_steps_per_second": 4.3, |
|
"step": 1425 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 57.0, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5963329076766968, |
|
"eval_runtime": 3.0249, |
|
"eval_samples_per_second": 33.059, |
|
"eval_steps_per_second": 4.298, |
|
"step": 1450 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 58.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5954621434211731, |
|
"eval_runtime": 3.0247, |
|
"eval_samples_per_second": 33.061, |
|
"eval_steps_per_second": 4.298, |
|
"step": 1475 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 59.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.00025, |
|
"loss": 0.6331, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5935218930244446, |
|
"eval_runtime": 3.0258, |
|
"eval_samples_per_second": 33.049, |
|
"eval_steps_per_second": 4.296, |
|
"step": 1500 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 60.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5937374830245972, |
|
"eval_runtime": 3.0237, |
|
"eval_samples_per_second": 33.072, |
|
"eval_steps_per_second": 4.299, |
|
"step": 1525 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 61.0, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5923505425453186, |
|
"eval_runtime": 3.0211, |
|
"eval_samples_per_second": 33.101, |
|
"eval_steps_per_second": 4.303, |
|
"step": 1550 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 62.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5909485816955566, |
|
"eval_runtime": 3.0219, |
|
"eval_samples_per_second": 33.092, |
|
"eval_steps_per_second": 4.302, |
|
"step": 1575 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 63.0, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.589134931564331, |
|
"eval_runtime": 3.026, |
|
"eval_samples_per_second": 33.047, |
|
"eval_steps_per_second": 4.296, |
|
"step": 1600 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 64.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5880649089813232, |
|
"eval_runtime": 3.0289, |
|
"eval_samples_per_second": 33.015, |
|
"eval_steps_per_second": 4.292, |
|
"step": 1625 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 65.0, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5884314775466919, |
|
"eval_runtime": 3.0234, |
|
"eval_samples_per_second": 33.075, |
|
"eval_steps_per_second": 4.3, |
|
"step": 1650 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 66.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5893000960350037, |
|
"eval_runtime": 3.0241, |
|
"eval_samples_per_second": 33.067, |
|
"eval_steps_per_second": 4.299, |
|
"step": 1675 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 67.0, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5899676084518433, |
|
"eval_runtime": 3.028, |
|
"eval_samples_per_second": 33.025, |
|
"eval_steps_per_second": 4.293, |
|
"step": 1700 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 68.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5908388495445251, |
|
"eval_runtime": 3.0265, |
|
"eval_samples_per_second": 33.041, |
|
"eval_steps_per_second": 4.295, |
|
"step": 1725 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 69.0, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5911998748779297, |
|
"eval_runtime": 3.0244, |
|
"eval_samples_per_second": 33.065, |
|
"eval_steps_per_second": 4.298, |
|
"step": 1750 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 70.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5914051532745361, |
|
"eval_runtime": 3.0277, |
|
"eval_samples_per_second": 33.029, |
|
"eval_steps_per_second": 4.294, |
|
"step": 1775 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 71.0, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5901281237602234, |
|
"eval_runtime": 3.0271, |
|
"eval_samples_per_second": 33.035, |
|
"eval_steps_per_second": 4.294, |
|
"step": 1800 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 72.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5898309350013733, |
|
"eval_runtime": 3.0251, |
|
"eval_samples_per_second": 33.057, |
|
"eval_steps_per_second": 4.297, |
|
"step": 1825 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 73.0, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.589576005935669, |
|
"eval_runtime": 3.0338, |
|
"eval_samples_per_second": 32.961, |
|
"eval_steps_per_second": 4.285, |
|
"step": 1850 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 74.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5904554128646851, |
|
"eval_runtime": 3.0252, |
|
"eval_samples_per_second": 33.055, |
|
"eval_steps_per_second": 4.297, |
|
"step": 1875 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 75.0, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5900978446006775, |
|
"eval_runtime": 3.0257, |
|
"eval_samples_per_second": 33.05, |
|
"eval_steps_per_second": 4.296, |
|
"step": 1900 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 76.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5900523662567139, |
|
"eval_runtime": 3.0269, |
|
"eval_samples_per_second": 33.037, |
|
"eval_steps_per_second": 4.295, |
|
"step": 1925 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 77.0, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5899617671966553, |
|
"eval_runtime": 3.0271, |
|
"eval_samples_per_second": 33.034, |
|
"eval_steps_per_second": 4.294, |
|
"step": 1950 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 78.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5899820327758789, |
|
"eval_runtime": 3.0253, |
|
"eval_samples_per_second": 33.055, |
|
"eval_steps_per_second": 4.297, |
|
"step": 1975 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 79.0, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.6276, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5899689793586731, |
|
"eval_runtime": 3.0275, |
|
"eval_samples_per_second": 33.03, |
|
"eval_steps_per_second": 4.294, |
|
"step": 2000 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 80.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"step": 2000, |
|
"total_flos": 2.9821702864896e+16, |
|
"train_loss": 0.6380199432373047, |
|
"train_runtime": 1620.376, |
|
"train_samples_per_second": 19.749, |
|
"train_steps_per_second": 1.234 |
|
} |
|
], |
|
"max_steps": 2000, |
|
"num_train_epochs": 80, |
|
"total_flos": 2.9821702864896e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|