|
{ |
|
"best_metric": 0.3843018710613251, |
|
"best_model_checkpoint": "yoga_pose_image_classification/checkpoint-3800", |
|
"epoch": 100.0, |
|
"eval_steps": 500, |
|
"global_step": 3800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.1212624584717608, |
|
"eval_loss": 2.18632435798645, |
|
"eval_runtime": 9.4012, |
|
"eval_samples_per_second": 64.034, |
|
"eval_steps_per_second": 2.021, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.2009966777408638, |
|
"eval_loss": 2.154275417327881, |
|
"eval_runtime": 9.4031, |
|
"eval_samples_per_second": 64.021, |
|
"eval_steps_per_second": 2.021, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.3239202657807309, |
|
"eval_loss": 2.1111578941345215, |
|
"eval_runtime": 9.3981, |
|
"eval_samples_per_second": 64.056, |
|
"eval_steps_per_second": 2.022, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.4833887043189369, |
|
"eval_loss": 2.052860736846924, |
|
"eval_runtime": 9.3515, |
|
"eval_samples_per_second": 64.374, |
|
"eval_steps_per_second": 2.032, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5863787375415282, |
|
"eval_loss": 1.9836093187332153, |
|
"eval_runtime": 9.2802, |
|
"eval_samples_per_second": 64.869, |
|
"eval_steps_per_second": 2.047, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6677740863787376, |
|
"eval_loss": 1.9060654640197754, |
|
"eval_runtime": 9.3038, |
|
"eval_samples_per_second": 64.704, |
|
"eval_steps_per_second": 2.042, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7126245847176079, |
|
"eval_loss": 1.8243703842163086, |
|
"eval_runtime": 9.2471, |
|
"eval_samples_per_second": 65.102, |
|
"eval_steps_per_second": 2.055, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7691029900332226, |
|
"eval_loss": 1.738010287284851, |
|
"eval_runtime": 9.2752, |
|
"eval_samples_per_second": 64.905, |
|
"eval_steps_per_second": 2.048, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7956810631229236, |
|
"eval_loss": 1.6476677656173706, |
|
"eval_runtime": 9.1847, |
|
"eval_samples_per_second": 65.544, |
|
"eval_steps_per_second": 2.069, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8073089700996677, |
|
"eval_loss": 1.5577423572540283, |
|
"eval_runtime": 9.1847, |
|
"eval_samples_per_second": 65.544, |
|
"eval_steps_per_second": 2.069, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8222591362126246, |
|
"eval_loss": 1.4709256887435913, |
|
"eval_runtime": 8.9043, |
|
"eval_samples_per_second": 67.608, |
|
"eval_steps_per_second": 2.134, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8338870431893688, |
|
"eval_loss": 1.3882230520248413, |
|
"eval_runtime": 9.1638, |
|
"eval_samples_per_second": 65.693, |
|
"eval_steps_per_second": 2.073, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.845514950166113, |
|
"eval_loss": 1.314037799835205, |
|
"eval_runtime": 9.4856, |
|
"eval_samples_per_second": 63.465, |
|
"eval_steps_per_second": 2.003, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"learning_rate": 3.5199999999999998e-06, |
|
"loss": 1.7637, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8554817275747508, |
|
"eval_loss": 1.2419167757034302, |
|
"eval_runtime": 9.2365, |
|
"eval_samples_per_second": 65.176, |
|
"eval_steps_per_second": 2.057, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8621262458471761, |
|
"eval_loss": 1.175502896308899, |
|
"eval_runtime": 9.1995, |
|
"eval_samples_per_second": 65.439, |
|
"eval_steps_per_second": 2.065, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.867109634551495, |
|
"eval_loss": 1.1211938858032227, |
|
"eval_runtime": 9.2041, |
|
"eval_samples_per_second": 65.406, |
|
"eval_steps_per_second": 2.064, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8704318936877077, |
|
"eval_loss": 1.0678402185440063, |
|
"eval_runtime": 9.1819, |
|
"eval_samples_per_second": 65.564, |
|
"eval_steps_per_second": 2.069, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8770764119601329, |
|
"eval_loss": 1.0240187644958496, |
|
"eval_runtime": 8.9907, |
|
"eval_samples_per_second": 66.958, |
|
"eval_steps_per_second": 2.113, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8803986710963455, |
|
"eval_loss": 0.9842038750648499, |
|
"eval_runtime": 9.2217, |
|
"eval_samples_per_second": 65.281, |
|
"eval_steps_per_second": 2.06, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8870431893687708, |
|
"eval_loss": 0.9439226984977722, |
|
"eval_runtime": 9.1617, |
|
"eval_samples_per_second": 65.708, |
|
"eval_steps_per_second": 2.074, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.8920265780730897, |
|
"eval_loss": 0.9107761383056641, |
|
"eval_runtime": 9.3007, |
|
"eval_samples_per_second": 64.726, |
|
"eval_steps_per_second": 2.043, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.893687707641196, |
|
"eval_loss": 0.8776938319206238, |
|
"eval_runtime": 8.8098, |
|
"eval_samples_per_second": 68.333, |
|
"eval_steps_per_second": 2.157, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.893687707641196, |
|
"eval_loss": 0.8496754765510559, |
|
"eval_runtime": 9.052, |
|
"eval_samples_per_second": 66.504, |
|
"eval_steps_per_second": 2.099, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8986710963455149, |
|
"eval_loss": 0.8216127753257751, |
|
"eval_runtime": 9.1831, |
|
"eval_samples_per_second": 65.555, |
|
"eval_steps_per_second": 2.069, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.9053156146179402, |
|
"eval_loss": 0.7941116094589233, |
|
"eval_runtime": 8.8187, |
|
"eval_samples_per_second": 68.264, |
|
"eval_steps_per_second": 2.155, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.9086378737541528, |
|
"eval_loss": 0.7701788544654846, |
|
"eval_runtime": 9.255, |
|
"eval_samples_per_second": 65.046, |
|
"eval_steps_per_second": 2.053, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"learning_rate": 2.9866666666666667e-06, |
|
"loss": 0.8811, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.7498743534088135, |
|
"eval_runtime": 8.9384, |
|
"eval_samples_per_second": 67.35, |
|
"eval_steps_per_second": 2.126, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.9102990033222591, |
|
"eval_loss": 0.7278289794921875, |
|
"eval_runtime": 9.2157, |
|
"eval_samples_per_second": 65.324, |
|
"eval_steps_per_second": 2.062, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.9119601328903655, |
|
"eval_loss": 0.7089008092880249, |
|
"eval_runtime": 9.5658, |
|
"eval_samples_per_second": 62.933, |
|
"eval_steps_per_second": 1.986, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.9186046511627907, |
|
"eval_loss": 0.6902615427970886, |
|
"eval_runtime": 9.276, |
|
"eval_samples_per_second": 64.899, |
|
"eval_steps_per_second": 2.048, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.920265780730897, |
|
"eval_loss": 0.6722272038459778, |
|
"eval_runtime": 9.2907, |
|
"eval_samples_per_second": 64.796, |
|
"eval_steps_per_second": 2.045, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.9169435215946844, |
|
"eval_loss": 0.6599347591400146, |
|
"eval_runtime": 9.146, |
|
"eval_samples_per_second": 65.821, |
|
"eval_steps_per_second": 2.077, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.9169435215946844, |
|
"eval_loss": 0.6435149312019348, |
|
"eval_runtime": 9.2827, |
|
"eval_samples_per_second": 64.852, |
|
"eval_steps_per_second": 2.047, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.9186046511627907, |
|
"eval_loss": 0.6296250820159912, |
|
"eval_runtime": 8.9518, |
|
"eval_samples_per_second": 67.249, |
|
"eval_steps_per_second": 2.122, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.920265780730897, |
|
"eval_loss": 0.6152604222297668, |
|
"eval_runtime": 9.2036, |
|
"eval_samples_per_second": 65.409, |
|
"eval_steps_per_second": 2.064, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.9219269102990033, |
|
"eval_loss": 0.6024987101554871, |
|
"eval_runtime": 9.1807, |
|
"eval_samples_per_second": 65.573, |
|
"eval_steps_per_second": 2.07, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.9235880398671097, |
|
"eval_loss": 0.5926721692085266, |
|
"eval_runtime": 9.3058, |
|
"eval_samples_per_second": 64.691, |
|
"eval_steps_per_second": 2.042, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.5806441903114319, |
|
"eval_runtime": 9.2355, |
|
"eval_samples_per_second": 65.183, |
|
"eval_steps_per_second": 2.057, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.925249169435216, |
|
"eval_loss": 0.5694455504417419, |
|
"eval_runtime": 9.1082, |
|
"eval_samples_per_second": 66.094, |
|
"eval_steps_per_second": 2.086, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 39.47, |
|
"learning_rate": 2.453333333333333e-06, |
|
"loss": 0.5341, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.9235880398671097, |
|
"eval_loss": 0.5654778480529785, |
|
"eval_runtime": 9.1414, |
|
"eval_samples_per_second": 65.854, |
|
"eval_steps_per_second": 2.078, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_loss": 0.5506969094276428, |
|
"eval_runtime": 9.2079, |
|
"eval_samples_per_second": 65.379, |
|
"eval_steps_per_second": 2.063, |
|
"step": 1558 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.543114185333252, |
|
"eval_runtime": 9.2502, |
|
"eval_samples_per_second": 65.08, |
|
"eval_steps_per_second": 2.054, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.9318936877076412, |
|
"eval_loss": 0.5349024534225464, |
|
"eval_runtime": 9.0081, |
|
"eval_samples_per_second": 66.829, |
|
"eval_steps_per_second": 2.109, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.9318936877076412, |
|
"eval_loss": 0.5262035727500916, |
|
"eval_runtime": 9.1978, |
|
"eval_samples_per_second": 65.45, |
|
"eval_steps_per_second": 2.066, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.9269102990033222, |
|
"eval_loss": 0.5220299363136292, |
|
"eval_runtime": 9.148, |
|
"eval_samples_per_second": 65.807, |
|
"eval_steps_per_second": 2.077, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.9269102990033222, |
|
"eval_loss": 0.5145980715751648, |
|
"eval_runtime": 9.2485, |
|
"eval_samples_per_second": 65.092, |
|
"eval_steps_per_second": 2.054, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.5084472894668579, |
|
"eval_runtime": 8.9428, |
|
"eval_samples_per_second": 67.317, |
|
"eval_steps_per_second": 2.125, |
|
"step": 1786 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.5007317066192627, |
|
"eval_runtime": 9.1094, |
|
"eval_samples_per_second": 66.086, |
|
"eval_steps_per_second": 2.086, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_loss": 0.4938865602016449, |
|
"eval_runtime": 9.3331, |
|
"eval_samples_per_second": 64.501, |
|
"eval_steps_per_second": 2.036, |
|
"step": 1862 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.48970168828964233, |
|
"eval_runtime": 9.2175, |
|
"eval_samples_per_second": 65.31, |
|
"eval_steps_per_second": 2.061, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.4844241738319397, |
|
"eval_runtime": 9.3402, |
|
"eval_samples_per_second": 64.453, |
|
"eval_steps_per_second": 2.034, |
|
"step": 1938 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.4789960980415344, |
|
"eval_runtime": 9.2399, |
|
"eval_samples_per_second": 65.152, |
|
"eval_steps_per_second": 2.056, |
|
"step": 1976 |
|
}, |
|
{ |
|
"epoch": 52.63, |
|
"learning_rate": 1.92e-06, |
|
"loss": 0.3822, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.4740654230117798, |
|
"eval_runtime": 9.2394, |
|
"eval_samples_per_second": 65.155, |
|
"eval_steps_per_second": 2.056, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.9318936877076412, |
|
"eval_loss": 0.4689539968967438, |
|
"eval_runtime": 9.192, |
|
"eval_samples_per_second": 65.491, |
|
"eval_steps_per_second": 2.067, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.4651150107383728, |
|
"eval_runtime": 9.0767, |
|
"eval_samples_per_second": 66.324, |
|
"eval_steps_per_second": 2.093, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.9318936877076412, |
|
"eval_loss": 0.4605846703052521, |
|
"eval_runtime": 9.2523, |
|
"eval_samples_per_second": 65.065, |
|
"eval_steps_per_second": 2.054, |
|
"step": 2128 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.4576462507247925, |
|
"eval_runtime": 9.1536, |
|
"eval_samples_per_second": 65.767, |
|
"eval_steps_per_second": 2.076, |
|
"step": 2166 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.9318936877076412, |
|
"eval_loss": 0.45208579301834106, |
|
"eval_runtime": 9.1727, |
|
"eval_samples_per_second": 65.63, |
|
"eval_steps_per_second": 2.071, |
|
"step": 2204 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.9318936877076412, |
|
"eval_loss": 0.4491443336009979, |
|
"eval_runtime": 9.1974, |
|
"eval_samples_per_second": 65.453, |
|
"eval_steps_per_second": 2.066, |
|
"step": 2242 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.44608035683631897, |
|
"eval_runtime": 9.363, |
|
"eval_samples_per_second": 64.295, |
|
"eval_steps_per_second": 2.029, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.9318936877076412, |
|
"eval_loss": 0.4429391622543335, |
|
"eval_runtime": 9.2675, |
|
"eval_samples_per_second": 64.959, |
|
"eval_steps_per_second": 2.05, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.9318936877076412, |
|
"eval_loss": 0.4398341476917267, |
|
"eval_runtime": 9.2027, |
|
"eval_samples_per_second": 65.416, |
|
"eval_steps_per_second": 2.065, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.9318936877076412, |
|
"eval_loss": 0.43718549609184265, |
|
"eval_runtime": 9.0863, |
|
"eval_samples_per_second": 66.254, |
|
"eval_steps_per_second": 2.091, |
|
"step": 2394 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.435174822807312, |
|
"eval_runtime": 9.1649, |
|
"eval_samples_per_second": 65.685, |
|
"eval_steps_per_second": 2.073, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.4314589500427246, |
|
"eval_runtime": 9.0429, |
|
"eval_samples_per_second": 66.572, |
|
"eval_steps_per_second": 2.101, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 65.79, |
|
"learning_rate": 1.3866666666666666e-06, |
|
"loss": 0.3089, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.4279780983924866, |
|
"eval_runtime": 8.8928, |
|
"eval_samples_per_second": 67.695, |
|
"eval_steps_per_second": 2.137, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.4262917935848236, |
|
"eval_runtime": 9.0012, |
|
"eval_samples_per_second": 66.88, |
|
"eval_steps_per_second": 2.111, |
|
"step": 2546 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.42236465215682983, |
|
"eval_runtime": 8.9695, |
|
"eval_samples_per_second": 67.116, |
|
"eval_steps_per_second": 2.118, |
|
"step": 2584 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.420457124710083, |
|
"eval_runtime": 9.2333, |
|
"eval_samples_per_second": 65.199, |
|
"eval_steps_per_second": 2.058, |
|
"step": 2622 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.4179752767086029, |
|
"eval_runtime": 9.1119, |
|
"eval_samples_per_second": 66.067, |
|
"eval_steps_per_second": 2.085, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_loss": 0.41662338376045227, |
|
"eval_runtime": 9.2498, |
|
"eval_samples_per_second": 65.082, |
|
"eval_steps_per_second": 2.054, |
|
"step": 2698 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.4137563109397888, |
|
"eval_runtime": 9.0837, |
|
"eval_samples_per_second": 66.273, |
|
"eval_steps_per_second": 2.092, |
|
"step": 2736 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.4119124710559845, |
|
"eval_runtime": 9.123, |
|
"eval_samples_per_second": 65.987, |
|
"eval_steps_per_second": 2.083, |
|
"step": 2774 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.9352159468438538, |
|
"eval_loss": 0.4097132682800293, |
|
"eval_runtime": 9.2672, |
|
"eval_samples_per_second": 64.96, |
|
"eval_steps_per_second": 2.05, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.9335548172757475, |
|
"eval_loss": 0.40696021914482117, |
|
"eval_runtime": 8.9895, |
|
"eval_samples_per_second": 66.967, |
|
"eval_steps_per_second": 2.114, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.9318936877076412, |
|
"eval_loss": 0.40516966581344604, |
|
"eval_runtime": 9.1036, |
|
"eval_samples_per_second": 66.128, |
|
"eval_steps_per_second": 2.087, |
|
"step": 2888 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.9335548172757475, |
|
"eval_loss": 0.40282949805259705, |
|
"eval_runtime": 9.1585, |
|
"eval_samples_per_second": 65.731, |
|
"eval_steps_per_second": 2.075, |
|
"step": 2926 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.9335548172757475, |
|
"eval_loss": 0.4002813994884491, |
|
"eval_runtime": 9.2285, |
|
"eval_samples_per_second": 65.233, |
|
"eval_steps_per_second": 2.059, |
|
"step": 2964 |
|
}, |
|
{ |
|
"epoch": 78.95, |
|
"learning_rate": 8.533333333333334e-07, |
|
"loss": 0.2708, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.9352159468438538, |
|
"eval_loss": 0.3989640772342682, |
|
"eval_runtime": 9.2555, |
|
"eval_samples_per_second": 65.042, |
|
"eval_steps_per_second": 2.053, |
|
"step": 3002 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.9368770764119602, |
|
"eval_loss": 0.3976523280143738, |
|
"eval_runtime": 9.2496, |
|
"eval_samples_per_second": 65.084, |
|
"eval_steps_per_second": 2.054, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.9335548172757475, |
|
"eval_loss": 0.39560216665267944, |
|
"eval_runtime": 9.3445, |
|
"eval_samples_per_second": 64.423, |
|
"eval_steps_per_second": 2.033, |
|
"step": 3078 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.9368770764119602, |
|
"eval_loss": 0.39483192563056946, |
|
"eval_runtime": 9.0825, |
|
"eval_samples_per_second": 66.282, |
|
"eval_steps_per_second": 2.092, |
|
"step": 3116 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.9368770764119602, |
|
"eval_loss": 0.3940925896167755, |
|
"eval_runtime": 8.9097, |
|
"eval_samples_per_second": 67.567, |
|
"eval_steps_per_second": 2.133, |
|
"step": 3154 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.9368770764119602, |
|
"eval_loss": 0.3927687406539917, |
|
"eval_runtime": 9.2565, |
|
"eval_samples_per_second": 65.035, |
|
"eval_steps_per_second": 2.053, |
|
"step": 3192 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.9368770764119602, |
|
"eval_loss": 0.39090487360954285, |
|
"eval_runtime": 9.3794, |
|
"eval_samples_per_second": 64.184, |
|
"eval_steps_per_second": 2.026, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.9368770764119602, |
|
"eval_loss": 0.3901437222957611, |
|
"eval_runtime": 9.1195, |
|
"eval_samples_per_second": 66.013, |
|
"eval_steps_per_second": 2.083, |
|
"step": 3268 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.9368770764119602, |
|
"eval_loss": 0.3894195556640625, |
|
"eval_runtime": 9.1199, |
|
"eval_samples_per_second": 66.01, |
|
"eval_steps_per_second": 2.083, |
|
"step": 3306 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.9368770764119602, |
|
"eval_loss": 0.38919058442115784, |
|
"eval_runtime": 9.2628, |
|
"eval_samples_per_second": 64.991, |
|
"eval_steps_per_second": 2.051, |
|
"step": 3344 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.9368770764119602, |
|
"eval_loss": 0.388712078332901, |
|
"eval_runtime": 9.1513, |
|
"eval_samples_per_second": 65.783, |
|
"eval_steps_per_second": 2.076, |
|
"step": 3382 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.9368770764119602, |
|
"eval_loss": 0.3878062665462494, |
|
"eval_runtime": 8.9396, |
|
"eval_samples_per_second": 67.341, |
|
"eval_steps_per_second": 2.125, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.9368770764119602, |
|
"eval_loss": 0.38750186562538147, |
|
"eval_runtime": 9.2575, |
|
"eval_samples_per_second": 65.028, |
|
"eval_steps_per_second": 2.052, |
|
"step": 3458 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.9385382059800664, |
|
"eval_loss": 0.38650283217430115, |
|
"eval_runtime": 9.1672, |
|
"eval_samples_per_second": 65.669, |
|
"eval_steps_per_second": 2.073, |
|
"step": 3496 |
|
}, |
|
{ |
|
"epoch": 92.11, |
|
"learning_rate": 3.2e-07, |
|
"loss": 0.2496, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.9368770764119602, |
|
"eval_loss": 0.3866115212440491, |
|
"eval_runtime": 9.0461, |
|
"eval_samples_per_second": 66.548, |
|
"eval_steps_per_second": 2.1, |
|
"step": 3534 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.9385382059800664, |
|
"eval_loss": 0.3854145407676697, |
|
"eval_runtime": 9.209, |
|
"eval_samples_per_second": 65.371, |
|
"eval_steps_per_second": 2.063, |
|
"step": 3572 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.9385382059800664, |
|
"eval_loss": 0.38511377573013306, |
|
"eval_runtime": 9.1643, |
|
"eval_samples_per_second": 65.69, |
|
"eval_steps_per_second": 2.073, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.9385382059800664, |
|
"eval_loss": 0.38524767756462097, |
|
"eval_runtime": 9.2315, |
|
"eval_samples_per_second": 65.211, |
|
"eval_steps_per_second": 2.058, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.9385382059800664, |
|
"eval_loss": 0.38468137383461, |
|
"eval_runtime": 9.3077, |
|
"eval_samples_per_second": 64.678, |
|
"eval_steps_per_second": 2.041, |
|
"step": 3686 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.9385382059800664, |
|
"eval_loss": 0.38459545373916626, |
|
"eval_runtime": 9.2928, |
|
"eval_samples_per_second": 64.781, |
|
"eval_steps_per_second": 2.045, |
|
"step": 3724 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.9385382059800664, |
|
"eval_loss": 0.38430511951446533, |
|
"eval_runtime": 9.1498, |
|
"eval_samples_per_second": 65.794, |
|
"eval_steps_per_second": 2.077, |
|
"step": 3762 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.9385382059800664, |
|
"eval_loss": 0.3843018710613251, |
|
"eval_runtime": 9.1442, |
|
"eval_samples_per_second": 65.834, |
|
"eval_steps_per_second": 2.078, |
|
"step": 3800 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3800, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 1.8630243068851814e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|