|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 80.0, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.2910219132900238, |
|
"eval_runtime": 2.9634, |
|
"eval_samples_per_second": 33.745, |
|
"eval_steps_per_second": 4.387, |
|
"step": 25 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.6, |
|
"epoch": 1.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.2910943627357483, |
|
"eval_runtime": 3.0131, |
|
"eval_samples_per_second": 33.188, |
|
"eval_steps_per_second": 4.314, |
|
"step": 50 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 2.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.2874543070793152, |
|
"eval_runtime": 3.065, |
|
"eval_samples_per_second": 32.626, |
|
"eval_steps_per_second": 4.241, |
|
"step": 75 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 3.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.2908875346183777, |
|
"eval_runtime": 3.0986, |
|
"eval_samples_per_second": 32.272, |
|
"eval_steps_per_second": 4.195, |
|
"step": 100 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 4.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.2935199439525604, |
|
"eval_runtime": 3.1198, |
|
"eval_samples_per_second": 32.053, |
|
"eval_steps_per_second": 4.167, |
|
"step": 125 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 5.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.29770228266716003, |
|
"eval_runtime": 3.1385, |
|
"eval_samples_per_second": 31.863, |
|
"eval_steps_per_second": 4.142, |
|
"step": 150 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 6.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.285401850938797, |
|
"eval_runtime": 3.1456, |
|
"eval_samples_per_second": 31.79, |
|
"eval_steps_per_second": 4.133, |
|
"step": 175 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 7.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.29004549980163574, |
|
"eval_runtime": 3.1554, |
|
"eval_samples_per_second": 31.692, |
|
"eval_steps_per_second": 4.12, |
|
"step": 200 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 8.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.53, |
|
"eval_loss": 0.29845982789993286, |
|
"eval_runtime": 3.157, |
|
"eval_samples_per_second": 31.676, |
|
"eval_steps_per_second": 4.118, |
|
"step": 225 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 9.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.29055431485176086, |
|
"eval_runtime": 3.1623, |
|
"eval_samples_per_second": 31.623, |
|
"eval_steps_per_second": 4.111, |
|
"step": 250 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 10.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.2978856861591339, |
|
"eval_runtime": 3.1644, |
|
"eval_samples_per_second": 31.601, |
|
"eval_steps_per_second": 4.108, |
|
"step": 275 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 11.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.28910714387893677, |
|
"eval_runtime": 3.1651, |
|
"eval_samples_per_second": 31.594, |
|
"eval_steps_per_second": 4.107, |
|
"step": 300 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 12.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.28845351934432983, |
|
"eval_runtime": 3.1653, |
|
"eval_samples_per_second": 31.593, |
|
"eval_steps_per_second": 4.107, |
|
"step": 325 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 13.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.2904435992240906, |
|
"eval_runtime": 3.1679, |
|
"eval_samples_per_second": 31.567, |
|
"eval_steps_per_second": 4.104, |
|
"step": 350 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 14.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.30564746260643005, |
|
"eval_runtime": 3.1686, |
|
"eval_samples_per_second": 31.56, |
|
"eval_steps_per_second": 4.103, |
|
"step": 375 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 15.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.286005437374115, |
|
"eval_runtime": 3.1691, |
|
"eval_samples_per_second": 31.555, |
|
"eval_steps_per_second": 4.102, |
|
"step": 400 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 16.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.2886832356452942, |
|
"eval_runtime": 3.1705, |
|
"eval_samples_per_second": 31.541, |
|
"eval_steps_per_second": 4.1, |
|
"step": 425 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 17.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.296792596578598, |
|
"eval_runtime": 3.1687, |
|
"eval_samples_per_second": 31.559, |
|
"eval_steps_per_second": 4.103, |
|
"step": 450 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 18.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.51, |
|
"eval_loss": 0.29268431663513184, |
|
"eval_runtime": 3.172, |
|
"eval_samples_per_second": 31.526, |
|
"eval_steps_per_second": 4.098, |
|
"step": 475 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 19.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.00075, |
|
"loss": 0.4646, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.2887031137943268, |
|
"eval_runtime": 3.1725, |
|
"eval_samples_per_second": 31.52, |
|
"eval_steps_per_second": 4.098, |
|
"step": 500 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 20.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.2917368412017822, |
|
"eval_runtime": 3.1728, |
|
"eval_samples_per_second": 31.518, |
|
"eval_steps_per_second": 4.097, |
|
"step": 525 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 21.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.53, |
|
"eval_loss": 0.29400041699409485, |
|
"eval_runtime": 3.1736, |
|
"eval_samples_per_second": 31.509, |
|
"eval_steps_per_second": 4.096, |
|
"step": 550 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 22.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.2913835346698761, |
|
"eval_runtime": 3.1725, |
|
"eval_samples_per_second": 31.521, |
|
"eval_steps_per_second": 4.098, |
|
"step": 575 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 23.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.28750455379486084, |
|
"eval_runtime": 3.1724, |
|
"eval_samples_per_second": 31.522, |
|
"eval_steps_per_second": 4.098, |
|
"step": 600 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 24.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.2928391695022583, |
|
"eval_runtime": 3.174, |
|
"eval_samples_per_second": 31.506, |
|
"eval_steps_per_second": 4.096, |
|
"step": 625 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 25.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.2887367010116577, |
|
"eval_runtime": 3.1711, |
|
"eval_samples_per_second": 31.535, |
|
"eval_steps_per_second": 4.1, |
|
"step": 650 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 26.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.28706756234169006, |
|
"eval_runtime": 3.1728, |
|
"eval_samples_per_second": 31.517, |
|
"eval_steps_per_second": 4.097, |
|
"step": 675 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 27.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.29247915744781494, |
|
"eval_runtime": 3.1743, |
|
"eval_samples_per_second": 31.503, |
|
"eval_steps_per_second": 4.095, |
|
"step": 700 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 28.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.2962961494922638, |
|
"eval_runtime": 3.1784, |
|
"eval_samples_per_second": 31.462, |
|
"eval_steps_per_second": 4.09, |
|
"step": 725 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 29.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.56, |
|
"eval_loss": 0.29220831394195557, |
|
"eval_runtime": 3.1748, |
|
"eval_samples_per_second": 31.498, |
|
"eval_steps_per_second": 4.095, |
|
"step": 750 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 30.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.29017964005470276, |
|
"eval_runtime": 3.1789, |
|
"eval_samples_per_second": 31.458, |
|
"eval_steps_per_second": 4.089, |
|
"step": 775 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 31.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.28849852085113525, |
|
"eval_runtime": 3.174, |
|
"eval_samples_per_second": 31.506, |
|
"eval_steps_per_second": 4.096, |
|
"step": 800 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 32.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.29404839873313904, |
|
"eval_runtime": 3.1757, |
|
"eval_samples_per_second": 31.489, |
|
"eval_steps_per_second": 4.094, |
|
"step": 825 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 33.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.53, |
|
"eval_loss": 0.2911713719367981, |
|
"eval_runtime": 3.1765, |
|
"eval_samples_per_second": 31.481, |
|
"eval_steps_per_second": 4.092, |
|
"step": 850 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 34.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.2879158556461334, |
|
"eval_runtime": 3.1778, |
|
"eval_samples_per_second": 31.468, |
|
"eval_steps_per_second": 4.091, |
|
"step": 875 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 35.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.2879682183265686, |
|
"eval_runtime": 3.1764, |
|
"eval_samples_per_second": 31.482, |
|
"eval_steps_per_second": 4.093, |
|
"step": 900 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 36.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.47, |
|
"eval_loss": 0.2944921553134918, |
|
"eval_runtime": 3.1777, |
|
"eval_samples_per_second": 31.47, |
|
"eval_steps_per_second": 4.091, |
|
"step": 925 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 37.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.2917746603488922, |
|
"eval_runtime": 3.1773, |
|
"eval_samples_per_second": 31.474, |
|
"eval_steps_per_second": 4.092, |
|
"step": 950 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 38.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.2887328267097473, |
|
"eval_runtime": 3.1751, |
|
"eval_samples_per_second": 31.495, |
|
"eval_steps_per_second": 4.094, |
|
"step": 975 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 39.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.0005, |
|
"loss": 0.4656, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.2873851954936981, |
|
"eval_runtime": 3.1756, |
|
"eval_samples_per_second": 31.49, |
|
"eval_steps_per_second": 4.094, |
|
"step": 1000 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 40.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.56, |
|
"eval_loss": 0.28980785608291626, |
|
"eval_runtime": 3.1798, |
|
"eval_samples_per_second": 31.448, |
|
"eval_steps_per_second": 4.088, |
|
"step": 1025 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 41.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.2896897494792938, |
|
"eval_runtime": 3.1763, |
|
"eval_samples_per_second": 31.483, |
|
"eval_steps_per_second": 4.093, |
|
"step": 1050 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 42.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 0.29236462712287903, |
|
"eval_runtime": 3.1769, |
|
"eval_samples_per_second": 31.477, |
|
"eval_steps_per_second": 4.092, |
|
"step": 1075 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 43.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.2897891700267792, |
|
"eval_runtime": 3.177, |
|
"eval_samples_per_second": 31.477, |
|
"eval_steps_per_second": 4.092, |
|
"step": 1100 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 44.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.292123407125473, |
|
"eval_runtime": 3.1798, |
|
"eval_samples_per_second": 31.449, |
|
"eval_steps_per_second": 4.088, |
|
"step": 1125 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 45.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.56, |
|
"eval_loss": 0.2894897758960724, |
|
"eval_runtime": 3.1767, |
|
"eval_samples_per_second": 31.479, |
|
"eval_steps_per_second": 4.092, |
|
"step": 1150 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 46.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.2862450182437897, |
|
"eval_runtime": 3.1754, |
|
"eval_samples_per_second": 31.492, |
|
"eval_steps_per_second": 4.094, |
|
"step": 1175 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 47.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.28694215416908264, |
|
"eval_runtime": 3.1812, |
|
"eval_samples_per_second": 31.435, |
|
"eval_steps_per_second": 4.086, |
|
"step": 1200 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 48.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.28551337122917175, |
|
"eval_runtime": 3.1793, |
|
"eval_samples_per_second": 31.453, |
|
"eval_steps_per_second": 4.089, |
|
"step": 1225 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 49.0, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.2858518958091736, |
|
"eval_runtime": 3.1778, |
|
"eval_samples_per_second": 31.468, |
|
"eval_steps_per_second": 4.091, |
|
"step": 1250 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 50.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.28994351625442505, |
|
"eval_runtime": 3.1813, |
|
"eval_samples_per_second": 31.434, |
|
"eval_steps_per_second": 4.086, |
|
"step": 1275 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 51.0, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.285138875246048, |
|
"eval_runtime": 3.1778, |
|
"eval_samples_per_second": 31.468, |
|
"eval_steps_per_second": 4.091, |
|
"step": 1300 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 52.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.2851716876029968, |
|
"eval_runtime": 3.1789, |
|
"eval_samples_per_second": 31.457, |
|
"eval_steps_per_second": 4.089, |
|
"step": 1325 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 53.0, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.2886792719364166, |
|
"eval_runtime": 3.1798, |
|
"eval_samples_per_second": 31.449, |
|
"eval_steps_per_second": 4.088, |
|
"step": 1350 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 54.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.2869800329208374, |
|
"eval_runtime": 3.1792, |
|
"eval_samples_per_second": 31.455, |
|
"eval_steps_per_second": 4.089, |
|
"step": 1375 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 55.0, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.289546400308609, |
|
"eval_runtime": 3.1807, |
|
"eval_samples_per_second": 31.439, |
|
"eval_steps_per_second": 4.087, |
|
"step": 1400 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 56.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.2893470823764801, |
|
"eval_runtime": 3.1797, |
|
"eval_samples_per_second": 31.449, |
|
"eval_steps_per_second": 4.088, |
|
"step": 1425 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 57.0, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.2891067862510681, |
|
"eval_runtime": 3.1796, |
|
"eval_samples_per_second": 31.45, |
|
"eval_steps_per_second": 4.089, |
|
"step": 1450 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 58.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.28904733061790466, |
|
"eval_runtime": 3.1792, |
|
"eval_samples_per_second": 31.455, |
|
"eval_steps_per_second": 4.089, |
|
"step": 1475 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 59.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.00025, |
|
"loss": 0.4637, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.28896650671958923, |
|
"eval_runtime": 3.179, |
|
"eval_samples_per_second": 31.457, |
|
"eval_steps_per_second": 4.089, |
|
"step": 1500 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 60.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.2883150577545166, |
|
"eval_runtime": 3.1809, |
|
"eval_samples_per_second": 31.438, |
|
"eval_steps_per_second": 4.087, |
|
"step": 1525 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 61.0, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.2882067561149597, |
|
"eval_runtime": 3.1797, |
|
"eval_samples_per_second": 31.449, |
|
"eval_steps_per_second": 4.088, |
|
"step": 1550 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 62.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.2883051633834839, |
|
"eval_runtime": 3.1845, |
|
"eval_samples_per_second": 31.402, |
|
"eval_steps_per_second": 4.082, |
|
"step": 1575 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 63.0, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.28842997550964355, |
|
"eval_runtime": 3.1787, |
|
"eval_samples_per_second": 31.459, |
|
"eval_steps_per_second": 4.09, |
|
"step": 1600 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 64.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.2875995337963104, |
|
"eval_runtime": 3.1796, |
|
"eval_samples_per_second": 31.45, |
|
"eval_steps_per_second": 4.089, |
|
"step": 1625 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 65.0, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.28709518909454346, |
|
"eval_runtime": 3.1808, |
|
"eval_samples_per_second": 31.438, |
|
"eval_steps_per_second": 4.087, |
|
"step": 1650 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 66.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.2878973186016083, |
|
"eval_runtime": 3.1941, |
|
"eval_samples_per_second": 31.308, |
|
"eval_steps_per_second": 4.07, |
|
"step": 1675 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 67.0, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.28794804215431213, |
|
"eval_runtime": 3.1917, |
|
"eval_samples_per_second": 31.332, |
|
"eval_steps_per_second": 4.073, |
|
"step": 1700 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 68.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.287733256816864, |
|
"eval_runtime": 3.1855, |
|
"eval_samples_per_second": 31.392, |
|
"eval_steps_per_second": 4.081, |
|
"step": 1725 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 69.0, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.2870822250843048, |
|
"eval_runtime": 3.1924, |
|
"eval_samples_per_second": 31.325, |
|
"eval_steps_per_second": 4.072, |
|
"step": 1750 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 70.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.2874806523323059, |
|
"eval_runtime": 3.1929, |
|
"eval_samples_per_second": 31.319, |
|
"eval_steps_per_second": 4.072, |
|
"step": 1775 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 71.0, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.2870163321495056, |
|
"eval_runtime": 3.1935, |
|
"eval_samples_per_second": 31.314, |
|
"eval_steps_per_second": 4.071, |
|
"step": 1800 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 72.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.2874850928783417, |
|
"eval_runtime": 3.1878, |
|
"eval_samples_per_second": 31.369, |
|
"eval_steps_per_second": 4.078, |
|
"step": 1825 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 73.0, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.2879111170768738, |
|
"eval_runtime": 3.1939, |
|
"eval_samples_per_second": 31.31, |
|
"eval_steps_per_second": 4.07, |
|
"step": 1850 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 74.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.2886711359024048, |
|
"eval_runtime": 3.1977, |
|
"eval_samples_per_second": 31.272, |
|
"eval_steps_per_second": 4.065, |
|
"step": 1875 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 75.0, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.28828713297843933, |
|
"eval_runtime": 3.1974, |
|
"eval_samples_per_second": 31.275, |
|
"eval_steps_per_second": 4.066, |
|
"step": 1900 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 76.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.28815144300460815, |
|
"eval_runtime": 3.1962, |
|
"eval_samples_per_second": 31.287, |
|
"eval_steps_per_second": 4.067, |
|
"step": 1925 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 77.0, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.28827354311943054, |
|
"eval_runtime": 3.197, |
|
"eval_samples_per_second": 31.279, |
|
"eval_steps_per_second": 4.066, |
|
"step": 1950 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 78.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.2883530855178833, |
|
"eval_runtime": 3.1937, |
|
"eval_samples_per_second": 31.311, |
|
"eval_steps_per_second": 4.07, |
|
"step": 1975 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 79.0, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.4587, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.28832393884658813, |
|
"eval_runtime": 3.1941, |
|
"eval_samples_per_second": 31.308, |
|
"eval_steps_per_second": 4.07, |
|
"step": 2000 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 80.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"step": 2000, |
|
"total_flos": 2.9821702864896e+16, |
|
"train_loss": 0.46315200805664064, |
|
"train_runtime": 1695.4283, |
|
"train_samples_per_second": 18.874, |
|
"train_steps_per_second": 1.18 |
|
} |
|
], |
|
"max_steps": 2000, |
|
"num_train_epochs": 80, |
|
"total_flos": 2.9821702864896e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|