|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 80.0, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.41458651423454285, |
|
"eval_runtime": 2.9544, |
|
"eval_samples_per_second": 33.848, |
|
"eval_steps_per_second": 4.4, |
|
"step": 25 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 1.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.4116404056549072, |
|
"eval_runtime": 2.9946, |
|
"eval_samples_per_second": 33.394, |
|
"eval_steps_per_second": 4.341, |
|
"step": 50 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 2.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.41392984986305237, |
|
"eval_runtime": 3.022, |
|
"eval_samples_per_second": 33.091, |
|
"eval_steps_per_second": 4.302, |
|
"step": 75 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 3.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.416966050863266, |
|
"eval_runtime": 3.0401, |
|
"eval_samples_per_second": 32.893, |
|
"eval_steps_per_second": 4.276, |
|
"step": 100 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 4.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.4182185232639313, |
|
"eval_runtime": 3.0632, |
|
"eval_samples_per_second": 32.646, |
|
"eval_steps_per_second": 4.244, |
|
"step": 125 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 5.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.42081978917121887, |
|
"eval_runtime": 3.0619, |
|
"eval_samples_per_second": 32.66, |
|
"eval_steps_per_second": 4.246, |
|
"step": 150 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 6.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.41149312257766724, |
|
"eval_runtime": 3.069, |
|
"eval_samples_per_second": 32.584, |
|
"eval_steps_per_second": 4.236, |
|
"step": 175 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 7.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.41568344831466675, |
|
"eval_runtime": 3.0682, |
|
"eval_samples_per_second": 32.592, |
|
"eval_steps_per_second": 4.237, |
|
"step": 200 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 8.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.4229449927806854, |
|
"eval_runtime": 3.0693, |
|
"eval_samples_per_second": 32.581, |
|
"eval_steps_per_second": 4.235, |
|
"step": 225 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 9.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.4205465018749237, |
|
"eval_runtime": 3.0704, |
|
"eval_samples_per_second": 32.569, |
|
"eval_steps_per_second": 4.234, |
|
"step": 250 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 10.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.41778337955474854, |
|
"eval_runtime": 3.0755, |
|
"eval_samples_per_second": 32.515, |
|
"eval_steps_per_second": 4.227, |
|
"step": 275 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 11.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.41307488083839417, |
|
"eval_runtime": 3.0694, |
|
"eval_samples_per_second": 32.58, |
|
"eval_steps_per_second": 4.235, |
|
"step": 300 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 12.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.4146430492401123, |
|
"eval_runtime": 3.0703, |
|
"eval_samples_per_second": 32.57, |
|
"eval_steps_per_second": 4.234, |
|
"step": 325 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 13.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.42016124725341797, |
|
"eval_runtime": 3.0704, |
|
"eval_samples_per_second": 32.569, |
|
"eval_steps_per_second": 4.234, |
|
"step": 350 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 14.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.4330647587776184, |
|
"eval_runtime": 3.0703, |
|
"eval_samples_per_second": 32.57, |
|
"eval_steps_per_second": 4.234, |
|
"step": 375 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 15.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.4120154082775116, |
|
"eval_runtime": 3.0708, |
|
"eval_samples_per_second": 32.564, |
|
"eval_steps_per_second": 4.233, |
|
"step": 400 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 16.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.4144454896450043, |
|
"eval_runtime": 3.0689, |
|
"eval_samples_per_second": 32.585, |
|
"eval_steps_per_second": 4.236, |
|
"step": 425 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 17.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.4182257354259491, |
|
"eval_runtime": 3.066, |
|
"eval_samples_per_second": 32.616, |
|
"eval_steps_per_second": 4.24, |
|
"step": 450 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 18.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.41842299699783325, |
|
"eval_runtime": 3.0648, |
|
"eval_samples_per_second": 32.629, |
|
"eval_steps_per_second": 4.242, |
|
"step": 475 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 19.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.00075, |
|
"loss": 0.5392, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.41611525416374207, |
|
"eval_runtime": 3.0695, |
|
"eval_samples_per_second": 32.578, |
|
"eval_steps_per_second": 4.235, |
|
"step": 500 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 20.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.41845789551734924, |
|
"eval_runtime": 3.0612, |
|
"eval_samples_per_second": 32.667, |
|
"eval_steps_per_second": 4.247, |
|
"step": 525 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 21.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.4186709225177765, |
|
"eval_runtime": 3.0611, |
|
"eval_samples_per_second": 32.668, |
|
"eval_steps_per_second": 4.247, |
|
"step": 550 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 22.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.4185837209224701, |
|
"eval_runtime": 3.0617, |
|
"eval_samples_per_second": 32.662, |
|
"eval_steps_per_second": 4.246, |
|
"step": 575 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 23.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.41592276096343994, |
|
"eval_runtime": 3.0573, |
|
"eval_samples_per_second": 32.709, |
|
"eval_steps_per_second": 4.252, |
|
"step": 600 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 24.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.4151555895805359, |
|
"eval_runtime": 3.0541, |
|
"eval_samples_per_second": 32.743, |
|
"eval_steps_per_second": 4.257, |
|
"step": 625 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 25.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.41507554054260254, |
|
"eval_runtime": 3.0542, |
|
"eval_samples_per_second": 32.742, |
|
"eval_steps_per_second": 4.256, |
|
"step": 650 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 26.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.41364148259162903, |
|
"eval_runtime": 3.0522, |
|
"eval_samples_per_second": 32.763, |
|
"eval_steps_per_second": 4.259, |
|
"step": 675 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 27.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.41904377937316895, |
|
"eval_runtime": 3.0528, |
|
"eval_samples_per_second": 32.757, |
|
"eval_steps_per_second": 4.258, |
|
"step": 700 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 28.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.4224679470062256, |
|
"eval_runtime": 3.0526, |
|
"eval_samples_per_second": 32.759, |
|
"eval_steps_per_second": 4.259, |
|
"step": 725 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 29.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.42088595032691956, |
|
"eval_runtime": 3.0523, |
|
"eval_samples_per_second": 32.762, |
|
"eval_steps_per_second": 4.259, |
|
"step": 750 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 30.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.416681706905365, |
|
"eval_runtime": 3.0478, |
|
"eval_samples_per_second": 32.81, |
|
"eval_steps_per_second": 4.265, |
|
"step": 775 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 31.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.4152999818325043, |
|
"eval_runtime": 3.049, |
|
"eval_samples_per_second": 32.797, |
|
"eval_steps_per_second": 4.264, |
|
"step": 800 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 32.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.4235653281211853, |
|
"eval_runtime": 3.0473, |
|
"eval_samples_per_second": 32.816, |
|
"eval_steps_per_second": 4.266, |
|
"step": 825 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 33.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.41910892724990845, |
|
"eval_runtime": 3.0486, |
|
"eval_samples_per_second": 32.801, |
|
"eval_steps_per_second": 4.264, |
|
"step": 850 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 34.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.41600170731544495, |
|
"eval_runtime": 3.0482, |
|
"eval_samples_per_second": 32.806, |
|
"eval_steps_per_second": 4.265, |
|
"step": 875 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 35.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.41634392738342285, |
|
"eval_runtime": 3.0496, |
|
"eval_samples_per_second": 32.791, |
|
"eval_steps_per_second": 4.263, |
|
"step": 900 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 36.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.419331818819046, |
|
"eval_runtime": 3.0513, |
|
"eval_samples_per_second": 32.773, |
|
"eval_steps_per_second": 4.261, |
|
"step": 925 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 37.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.4208333194255829, |
|
"eval_runtime": 3.0517, |
|
"eval_samples_per_second": 32.768, |
|
"eval_steps_per_second": 4.26, |
|
"step": 950 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 38.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.416349321603775, |
|
"eval_runtime": 3.0475, |
|
"eval_samples_per_second": 32.814, |
|
"eval_steps_per_second": 4.266, |
|
"step": 975 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 39.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.0005, |
|
"loss": 0.5359, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.4158593416213989, |
|
"eval_runtime": 3.0471, |
|
"eval_samples_per_second": 32.818, |
|
"eval_steps_per_second": 4.266, |
|
"step": 1000 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 40.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.41456928849220276, |
|
"eval_runtime": 3.0494, |
|
"eval_samples_per_second": 32.793, |
|
"eval_steps_per_second": 4.263, |
|
"step": 1025 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 41.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.41578975319862366, |
|
"eval_runtime": 3.0512, |
|
"eval_samples_per_second": 32.774, |
|
"eval_steps_per_second": 4.261, |
|
"step": 1050 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 42.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.42108017206192017, |
|
"eval_runtime": 3.0504, |
|
"eval_samples_per_second": 32.783, |
|
"eval_steps_per_second": 4.262, |
|
"step": 1075 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 43.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.4202888607978821, |
|
"eval_runtime": 3.049, |
|
"eval_samples_per_second": 32.797, |
|
"eval_steps_per_second": 4.264, |
|
"step": 1100 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 44.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.42174988985061646, |
|
"eval_runtime": 3.0504, |
|
"eval_samples_per_second": 32.782, |
|
"eval_steps_per_second": 4.262, |
|
"step": 1125 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 45.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.41829144954681396, |
|
"eval_runtime": 3.0521, |
|
"eval_samples_per_second": 32.764, |
|
"eval_steps_per_second": 4.259, |
|
"step": 1150 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 46.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.413789838552475, |
|
"eval_runtime": 3.0518, |
|
"eval_samples_per_second": 32.767, |
|
"eval_steps_per_second": 4.26, |
|
"step": 1175 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 47.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.4123985171318054, |
|
"eval_runtime": 3.0523, |
|
"eval_samples_per_second": 32.763, |
|
"eval_steps_per_second": 4.259, |
|
"step": 1200 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 48.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.41400572657585144, |
|
"eval_runtime": 3.0522, |
|
"eval_samples_per_second": 32.763, |
|
"eval_steps_per_second": 4.259, |
|
"step": 1225 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 49.0, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.4117923378944397, |
|
"eval_runtime": 3.0586, |
|
"eval_samples_per_second": 32.694, |
|
"eval_steps_per_second": 4.25, |
|
"step": 1250 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 50.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.4136669933795929, |
|
"eval_runtime": 3.062, |
|
"eval_samples_per_second": 32.658, |
|
"eval_steps_per_second": 4.246, |
|
"step": 1275 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 51.0, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.4112585783004761, |
|
"eval_runtime": 3.0617, |
|
"eval_samples_per_second": 32.662, |
|
"eval_steps_per_second": 4.246, |
|
"step": 1300 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 52.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.411198228597641, |
|
"eval_runtime": 3.0569, |
|
"eval_samples_per_second": 32.713, |
|
"eval_steps_per_second": 4.253, |
|
"step": 1325 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 53.0, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.41399067640304565, |
|
"eval_runtime": 3.0552, |
|
"eval_samples_per_second": 32.731, |
|
"eval_steps_per_second": 4.255, |
|
"step": 1350 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 54.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.4128565490245819, |
|
"eval_runtime": 3.0536, |
|
"eval_samples_per_second": 32.748, |
|
"eval_steps_per_second": 4.257, |
|
"step": 1375 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 55.0, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.41508427262306213, |
|
"eval_runtime": 3.0516, |
|
"eval_samples_per_second": 32.77, |
|
"eval_steps_per_second": 4.26, |
|
"step": 1400 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 56.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.4154996871948242, |
|
"eval_runtime": 3.0553, |
|
"eval_samples_per_second": 32.73, |
|
"eval_steps_per_second": 4.255, |
|
"step": 1425 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 57.0, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.41395488381385803, |
|
"eval_runtime": 3.0559, |
|
"eval_samples_per_second": 32.724, |
|
"eval_steps_per_second": 4.254, |
|
"step": 1450 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 58.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.414520800113678, |
|
"eval_runtime": 3.0565, |
|
"eval_samples_per_second": 32.717, |
|
"eval_steps_per_second": 4.253, |
|
"step": 1475 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 59.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.00025, |
|
"loss": 0.5347, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.41578900814056396, |
|
"eval_runtime": 3.0637, |
|
"eval_samples_per_second": 32.641, |
|
"eval_steps_per_second": 4.243, |
|
"step": 1500 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 60.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.41476428508758545, |
|
"eval_runtime": 3.0578, |
|
"eval_samples_per_second": 32.703, |
|
"eval_steps_per_second": 4.251, |
|
"step": 1525 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 61.0, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.41465869545936584, |
|
"eval_runtime": 3.0637, |
|
"eval_samples_per_second": 32.64, |
|
"eval_steps_per_second": 4.243, |
|
"step": 1550 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 62.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.41527068614959717, |
|
"eval_runtime": 3.0607, |
|
"eval_samples_per_second": 32.672, |
|
"eval_steps_per_second": 4.247, |
|
"step": 1575 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 63.0, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.41556769609451294, |
|
"eval_runtime": 3.0628, |
|
"eval_samples_per_second": 32.65, |
|
"eval_steps_per_second": 4.244, |
|
"step": 1600 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 64.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.4151536226272583, |
|
"eval_runtime": 3.0628, |
|
"eval_samples_per_second": 32.65, |
|
"eval_steps_per_second": 4.245, |
|
"step": 1625 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 65.0, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.4146164655685425, |
|
"eval_runtime": 3.0648, |
|
"eval_samples_per_second": 32.629, |
|
"eval_steps_per_second": 4.242, |
|
"step": 1650 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 66.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.4151190221309662, |
|
"eval_runtime": 3.0643, |
|
"eval_samples_per_second": 32.634, |
|
"eval_steps_per_second": 4.242, |
|
"step": 1675 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 67.0, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.41448092460632324, |
|
"eval_runtime": 3.0668, |
|
"eval_samples_per_second": 32.607, |
|
"eval_steps_per_second": 4.239, |
|
"step": 1700 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 68.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.41525405645370483, |
|
"eval_runtime": 3.0657, |
|
"eval_samples_per_second": 32.618, |
|
"eval_steps_per_second": 4.24, |
|
"step": 1725 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 69.0, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.4146501123905182, |
|
"eval_runtime": 3.0643, |
|
"eval_samples_per_second": 32.633, |
|
"eval_steps_per_second": 4.242, |
|
"step": 1750 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 70.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.4146224558353424, |
|
"eval_runtime": 3.0657, |
|
"eval_samples_per_second": 32.619, |
|
"eval_steps_per_second": 4.24, |
|
"step": 1775 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 71.0, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.41336506605148315, |
|
"eval_runtime": 3.068, |
|
"eval_samples_per_second": 32.595, |
|
"eval_steps_per_second": 4.237, |
|
"step": 1800 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 72.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.41397038102149963, |
|
"eval_runtime": 3.0653, |
|
"eval_samples_per_second": 32.624, |
|
"eval_steps_per_second": 4.241, |
|
"step": 1825 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 73.0, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.4141198694705963, |
|
"eval_runtime": 3.0676, |
|
"eval_samples_per_second": 32.599, |
|
"eval_steps_per_second": 4.238, |
|
"step": 1850 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 74.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.41505852341651917, |
|
"eval_runtime": 3.0673, |
|
"eval_samples_per_second": 32.602, |
|
"eval_steps_per_second": 4.238, |
|
"step": 1875 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 75.0, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.4149923622608185, |
|
"eval_runtime": 3.0651, |
|
"eval_samples_per_second": 32.625, |
|
"eval_steps_per_second": 4.241, |
|
"step": 1900 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 76.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.41483086347579956, |
|
"eval_runtime": 3.0669, |
|
"eval_samples_per_second": 32.606, |
|
"eval_steps_per_second": 4.239, |
|
"step": 1925 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 77.0, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.41491347551345825, |
|
"eval_runtime": 3.069, |
|
"eval_samples_per_second": 32.584, |
|
"eval_steps_per_second": 4.236, |
|
"step": 1950 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 78.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.4150215983390808, |
|
"eval_runtime": 3.0645, |
|
"eval_samples_per_second": 32.632, |
|
"eval_steps_per_second": 4.242, |
|
"step": 1975 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 79.0, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.5285, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.4149799048900604, |
|
"eval_runtime": 3.0687, |
|
"eval_samples_per_second": 32.587, |
|
"eval_steps_per_second": 4.236, |
|
"step": 2000 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 80.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"step": 2000, |
|
"total_flos": 2.9821702864896e+16, |
|
"train_loss": 0.5345863189697265, |
|
"train_runtime": 1637.4299, |
|
"train_samples_per_second": 19.543, |
|
"train_steps_per_second": 1.221 |
|
} |
|
], |
|
"max_steps": 2000, |
|
"num_train_epochs": 80, |
|
"total_flos": 2.9821702864896e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|