|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 80.0, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.29521188139915466, |
|
"eval_runtime": 2.9613, |
|
"eval_samples_per_second": 33.769, |
|
"eval_steps_per_second": 4.39, |
|
"step": 25 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 1.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.28949299454689026, |
|
"eval_runtime": 3.029, |
|
"eval_samples_per_second": 33.015, |
|
"eval_steps_per_second": 4.292, |
|
"step": 50 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 2.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.29219087958335876, |
|
"eval_runtime": 3.0806, |
|
"eval_samples_per_second": 32.461, |
|
"eval_steps_per_second": 4.22, |
|
"step": 75 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 3.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.29383212327957153, |
|
"eval_runtime": 3.1144, |
|
"eval_samples_per_second": 32.109, |
|
"eval_steps_per_second": 4.174, |
|
"step": 100 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 4.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.28848135471343994, |
|
"eval_runtime": 3.1511, |
|
"eval_samples_per_second": 31.735, |
|
"eval_steps_per_second": 4.126, |
|
"step": 125 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 5.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.48, |
|
"eval_loss": 0.2944973409175873, |
|
"eval_runtime": 3.1574, |
|
"eval_samples_per_second": 31.672, |
|
"eval_steps_per_second": 4.117, |
|
"step": 150 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 6.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.2860355079174042, |
|
"eval_runtime": 3.1678, |
|
"eval_samples_per_second": 31.568, |
|
"eval_steps_per_second": 4.104, |
|
"step": 175 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 7.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.2887631356716156, |
|
"eval_runtime": 3.1726, |
|
"eval_samples_per_second": 31.52, |
|
"eval_steps_per_second": 4.098, |
|
"step": 200 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 8.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.51, |
|
"eval_loss": 0.28938525915145874, |
|
"eval_runtime": 3.1696, |
|
"eval_samples_per_second": 31.55, |
|
"eval_steps_per_second": 4.101, |
|
"step": 225 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 9.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.56, |
|
"eval_loss": 0.29034924507141113, |
|
"eval_runtime": 3.1725, |
|
"eval_samples_per_second": 31.521, |
|
"eval_steps_per_second": 4.098, |
|
"step": 250 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 10.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.286794513463974, |
|
"eval_runtime": 3.1738, |
|
"eval_samples_per_second": 31.508, |
|
"eval_steps_per_second": 4.096, |
|
"step": 275 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 11.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.287968248128891, |
|
"eval_runtime": 3.1761, |
|
"eval_samples_per_second": 31.485, |
|
"eval_steps_per_second": 4.093, |
|
"step": 300 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 12.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.54, |
|
"eval_loss": 0.2946690618991852, |
|
"eval_runtime": 3.1756, |
|
"eval_samples_per_second": 31.49, |
|
"eval_steps_per_second": 4.094, |
|
"step": 325 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 13.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.29572564363479614, |
|
"eval_runtime": 3.1786, |
|
"eval_samples_per_second": 31.46, |
|
"eval_steps_per_second": 4.09, |
|
"step": 350 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 14.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.2876897156238556, |
|
"eval_runtime": 3.1783, |
|
"eval_samples_per_second": 31.463, |
|
"eval_steps_per_second": 4.09, |
|
"step": 375 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 15.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.28647202253341675, |
|
"eval_runtime": 3.1833, |
|
"eval_samples_per_second": 31.414, |
|
"eval_steps_per_second": 4.084, |
|
"step": 400 |
|
}, |
|
{ |
|
"best_epoch": 15, |
|
"best_eval_accuracy": 0.68, |
|
"epoch": 16.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.28502991795539856, |
|
"eval_runtime": 3.1839, |
|
"eval_samples_per_second": 31.408, |
|
"eval_steps_per_second": 4.083, |
|
"step": 425 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 17.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.28460079431533813, |
|
"eval_runtime": 3.1831, |
|
"eval_samples_per_second": 31.416, |
|
"eval_steps_per_second": 4.084, |
|
"step": 450 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 18.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.29105326533317566, |
|
"eval_runtime": 3.1856, |
|
"eval_samples_per_second": 31.391, |
|
"eval_steps_per_second": 4.081, |
|
"step": 475 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 19.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.00075, |
|
"loss": 0.4684, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.2960752844810486, |
|
"eval_runtime": 3.1862, |
|
"eval_samples_per_second": 31.386, |
|
"eval_steps_per_second": 4.08, |
|
"step": 500 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 20.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.28715649247169495, |
|
"eval_runtime": 3.1889, |
|
"eval_samples_per_second": 31.359, |
|
"eval_steps_per_second": 4.077, |
|
"step": 525 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 21.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.2879977822303772, |
|
"eval_runtime": 3.1931, |
|
"eval_samples_per_second": 31.317, |
|
"eval_steps_per_second": 4.071, |
|
"step": 550 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 22.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.51, |
|
"eval_loss": 0.2950732111930847, |
|
"eval_runtime": 3.1897, |
|
"eval_samples_per_second": 31.351, |
|
"eval_steps_per_second": 4.076, |
|
"step": 575 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 23.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.2896903157234192, |
|
"eval_runtime": 3.1901, |
|
"eval_samples_per_second": 31.347, |
|
"eval_steps_per_second": 4.075, |
|
"step": 600 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 24.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.2884129285812378, |
|
"eval_runtime": 3.1902, |
|
"eval_samples_per_second": 31.346, |
|
"eval_steps_per_second": 4.075, |
|
"step": 625 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 25.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.2894528806209564, |
|
"eval_runtime": 3.1904, |
|
"eval_samples_per_second": 31.344, |
|
"eval_steps_per_second": 4.075, |
|
"step": 650 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 26.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.2871526777744293, |
|
"eval_runtime": 3.1931, |
|
"eval_samples_per_second": 31.317, |
|
"eval_steps_per_second": 4.071, |
|
"step": 675 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 27.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.28898343443870544, |
|
"eval_runtime": 3.1862, |
|
"eval_samples_per_second": 31.385, |
|
"eval_steps_per_second": 4.08, |
|
"step": 700 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 28.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.2887178659439087, |
|
"eval_runtime": 3.1838, |
|
"eval_samples_per_second": 31.409, |
|
"eval_steps_per_second": 4.083, |
|
"step": 725 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 29.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.2886087894439697, |
|
"eval_runtime": 3.1839, |
|
"eval_samples_per_second": 31.408, |
|
"eval_steps_per_second": 4.083, |
|
"step": 750 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 30.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.2875303328037262, |
|
"eval_runtime": 3.1843, |
|
"eval_samples_per_second": 31.404, |
|
"eval_steps_per_second": 4.082, |
|
"step": 775 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 31.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.2882201075553894, |
|
"eval_runtime": 3.1843, |
|
"eval_samples_per_second": 31.404, |
|
"eval_steps_per_second": 4.083, |
|
"step": 800 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 32.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.2886188328266144, |
|
"eval_runtime": 3.1833, |
|
"eval_samples_per_second": 31.414, |
|
"eval_steps_per_second": 4.084, |
|
"step": 825 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 33.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.2970466911792755, |
|
"eval_runtime": 3.1842, |
|
"eval_samples_per_second": 31.405, |
|
"eval_steps_per_second": 4.083, |
|
"step": 850 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 34.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.287495493888855, |
|
"eval_runtime": 3.1852, |
|
"eval_samples_per_second": 31.396, |
|
"eval_steps_per_second": 4.081, |
|
"step": 875 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 35.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.2887771725654602, |
|
"eval_runtime": 3.1879, |
|
"eval_samples_per_second": 31.368, |
|
"eval_steps_per_second": 4.078, |
|
"step": 900 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 36.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.2868404686450958, |
|
"eval_runtime": 3.1879, |
|
"eval_samples_per_second": 31.369, |
|
"eval_steps_per_second": 4.078, |
|
"step": 925 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 37.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.28633153438568115, |
|
"eval_runtime": 3.1886, |
|
"eval_samples_per_second": 31.361, |
|
"eval_steps_per_second": 4.077, |
|
"step": 950 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 38.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.2910645008087158, |
|
"eval_runtime": 3.1871, |
|
"eval_samples_per_second": 31.376, |
|
"eval_steps_per_second": 4.079, |
|
"step": 975 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 39.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.0005, |
|
"loss": 0.4634, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.2866987884044647, |
|
"eval_runtime": 3.1867, |
|
"eval_samples_per_second": 31.38, |
|
"eval_steps_per_second": 4.079, |
|
"step": 1000 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 40.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.54, |
|
"eval_loss": 0.29356110095977783, |
|
"eval_runtime": 3.1874, |
|
"eval_samples_per_second": 31.374, |
|
"eval_steps_per_second": 4.079, |
|
"step": 1025 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 41.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.29651960730552673, |
|
"eval_runtime": 3.1917, |
|
"eval_samples_per_second": 31.331, |
|
"eval_steps_per_second": 4.073, |
|
"step": 1050 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 42.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.2872467339038849, |
|
"eval_runtime": 3.1905, |
|
"eval_samples_per_second": 31.344, |
|
"eval_steps_per_second": 4.075, |
|
"step": 1075 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 43.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.2862147092819214, |
|
"eval_runtime": 3.1876, |
|
"eval_samples_per_second": 31.372, |
|
"eval_steps_per_second": 4.078, |
|
"step": 1100 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 44.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.28714072704315186, |
|
"eval_runtime": 3.1836, |
|
"eval_samples_per_second": 31.411, |
|
"eval_steps_per_second": 4.083, |
|
"step": 1125 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 45.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.2913626432418823, |
|
"eval_runtime": 3.1841, |
|
"eval_samples_per_second": 31.406, |
|
"eval_steps_per_second": 4.083, |
|
"step": 1150 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 46.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.29249587655067444, |
|
"eval_runtime": 3.1869, |
|
"eval_samples_per_second": 31.378, |
|
"eval_steps_per_second": 4.079, |
|
"step": 1175 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 47.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.28830277919769287, |
|
"eval_runtime": 3.1859, |
|
"eval_samples_per_second": 31.388, |
|
"eval_steps_per_second": 4.08, |
|
"step": 1200 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 48.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.28956958651542664, |
|
"eval_runtime": 3.1845, |
|
"eval_samples_per_second": 31.402, |
|
"eval_steps_per_second": 4.082, |
|
"step": 1225 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 49.0, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.28662189841270447, |
|
"eval_runtime": 3.1871, |
|
"eval_samples_per_second": 31.377, |
|
"eval_steps_per_second": 4.079, |
|
"step": 1250 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 50.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.28566181659698486, |
|
"eval_runtime": 3.1876, |
|
"eval_samples_per_second": 31.371, |
|
"eval_steps_per_second": 4.078, |
|
"step": 1275 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 51.0, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.28922975063323975, |
|
"eval_runtime": 3.186, |
|
"eval_samples_per_second": 31.387, |
|
"eval_steps_per_second": 4.08, |
|
"step": 1300 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 52.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.2861008942127228, |
|
"eval_runtime": 3.1847, |
|
"eval_samples_per_second": 31.401, |
|
"eval_steps_per_second": 4.082, |
|
"step": 1325 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 53.0, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.2861438989639282, |
|
"eval_runtime": 3.1893, |
|
"eval_samples_per_second": 31.355, |
|
"eval_steps_per_second": 4.076, |
|
"step": 1350 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 54.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.2871514558792114, |
|
"eval_runtime": 3.1842, |
|
"eval_samples_per_second": 31.405, |
|
"eval_steps_per_second": 4.083, |
|
"step": 1375 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 55.0, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.2861255705356598, |
|
"eval_runtime": 3.1867, |
|
"eval_samples_per_second": 31.381, |
|
"eval_steps_per_second": 4.08, |
|
"step": 1400 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 56.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.2864649295806885, |
|
"eval_runtime": 3.1885, |
|
"eval_samples_per_second": 31.363, |
|
"eval_steps_per_second": 4.077, |
|
"step": 1425 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 57.0, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.2879580557346344, |
|
"eval_runtime": 3.1877, |
|
"eval_samples_per_second": 31.371, |
|
"eval_steps_per_second": 4.078, |
|
"step": 1450 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 58.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.2898498773574829, |
|
"eval_runtime": 3.1921, |
|
"eval_samples_per_second": 31.327, |
|
"eval_steps_per_second": 4.073, |
|
"step": 1475 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 59.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.00025, |
|
"loss": 0.4583, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.2899610102176666, |
|
"eval_runtime": 3.1902, |
|
"eval_samples_per_second": 31.346, |
|
"eval_steps_per_second": 4.075, |
|
"step": 1500 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 60.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.2895892858505249, |
|
"eval_runtime": 3.1858, |
|
"eval_samples_per_second": 31.389, |
|
"eval_steps_per_second": 4.081, |
|
"step": 1525 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 61.0, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.28861695528030396, |
|
"eval_runtime": 3.1896, |
|
"eval_samples_per_second": 31.352, |
|
"eval_steps_per_second": 4.076, |
|
"step": 1550 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 62.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.28878867626190186, |
|
"eval_runtime": 3.1872, |
|
"eval_samples_per_second": 31.375, |
|
"eval_steps_per_second": 4.079, |
|
"step": 1575 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 63.0, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.2890695333480835, |
|
"eval_runtime": 3.1911, |
|
"eval_samples_per_second": 31.337, |
|
"eval_steps_per_second": 4.074, |
|
"step": 1600 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 64.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.28738299012184143, |
|
"eval_runtime": 3.1931, |
|
"eval_samples_per_second": 31.317, |
|
"eval_steps_per_second": 4.071, |
|
"step": 1625 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 65.0, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.28749388456344604, |
|
"eval_runtime": 3.1907, |
|
"eval_samples_per_second": 31.341, |
|
"eval_steps_per_second": 4.074, |
|
"step": 1650 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 66.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.2881923019886017, |
|
"eval_runtime": 3.1995, |
|
"eval_samples_per_second": 31.255, |
|
"eval_steps_per_second": 4.063, |
|
"step": 1675 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 67.0, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.2863309979438782, |
|
"eval_runtime": 3.1929, |
|
"eval_samples_per_second": 31.32, |
|
"eval_steps_per_second": 4.072, |
|
"step": 1700 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 68.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.2866677939891815, |
|
"eval_runtime": 3.1936, |
|
"eval_samples_per_second": 31.313, |
|
"eval_steps_per_second": 4.071, |
|
"step": 1725 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 69.0, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.28654342889785767, |
|
"eval_runtime": 3.1984, |
|
"eval_samples_per_second": 31.266, |
|
"eval_steps_per_second": 4.065, |
|
"step": 1750 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 70.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.2863212823867798, |
|
"eval_runtime": 3.2026, |
|
"eval_samples_per_second": 31.225, |
|
"eval_steps_per_second": 4.059, |
|
"step": 1775 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 71.0, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.2862430512905121, |
|
"eval_runtime": 3.2028, |
|
"eval_samples_per_second": 31.223, |
|
"eval_steps_per_second": 4.059, |
|
"step": 1800 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 72.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.28635847568511963, |
|
"eval_runtime": 3.2023, |
|
"eval_samples_per_second": 31.227, |
|
"eval_steps_per_second": 4.06, |
|
"step": 1825 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 73.0, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.28623470664024353, |
|
"eval_runtime": 3.2087, |
|
"eval_samples_per_second": 31.166, |
|
"eval_steps_per_second": 4.052, |
|
"step": 1850 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 74.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.2866401672363281, |
|
"eval_runtime": 3.2061, |
|
"eval_samples_per_second": 31.19, |
|
"eval_steps_per_second": 4.055, |
|
"step": 1875 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 75.0, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.28675001859664917, |
|
"eval_runtime": 3.2056, |
|
"eval_samples_per_second": 31.195, |
|
"eval_steps_per_second": 4.055, |
|
"step": 1900 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 76.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.2866291403770447, |
|
"eval_runtime": 3.2051, |
|
"eval_samples_per_second": 31.201, |
|
"eval_steps_per_second": 4.056, |
|
"step": 1925 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 77.0, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.2866869568824768, |
|
"eval_runtime": 3.2049, |
|
"eval_samples_per_second": 31.202, |
|
"eval_steps_per_second": 4.056, |
|
"step": 1950 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 78.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.28672027587890625, |
|
"eval_runtime": 3.2024, |
|
"eval_samples_per_second": 31.227, |
|
"eval_steps_per_second": 4.059, |
|
"step": 1975 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 79.0, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.4597, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.2866990268230438, |
|
"eval_runtime": 3.202, |
|
"eval_samples_per_second": 31.23, |
|
"eval_steps_per_second": 4.06, |
|
"step": 2000 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 80.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"step": 2000, |
|
"total_flos": 2.9821702864896e+16, |
|
"train_loss": 0.4624418716430664, |
|
"train_runtime": 1699.8447, |
|
"train_samples_per_second": 18.825, |
|
"train_steps_per_second": 1.177 |
|
} |
|
], |
|
"max_steps": 2000, |
|
"num_train_epochs": 80, |
|
"total_flos": 2.9821702864896e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|