|
{ |
|
"best_metric": 0.9142857142857143, |
|
"best_model_checkpoint": "Cvt-finetuned-thyroid/checkpoint-18", |
|
"epoch": 149.88888888888889, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.6600533127784729, |
|
"eval_runtime": 0.4026, |
|
"eval_samples_per_second": 173.882, |
|
"eval_steps_per_second": 7.452, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.6563708186149597, |
|
"eval_runtime": 0.3922, |
|
"eval_samples_per_second": 178.495, |
|
"eval_steps_per_second": 7.65, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_accuracy": 0.7428571428571429, |
|
"eval_loss": 0.6498632431030273, |
|
"eval_runtime": 0.3916, |
|
"eval_samples_per_second": 178.774, |
|
"eval_steps_per_second": 7.662, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_accuracy": 0.7714285714285715, |
|
"eval_loss": 0.6404139399528503, |
|
"eval_runtime": 0.3913, |
|
"eval_samples_per_second": 178.878, |
|
"eval_steps_per_second": 7.666, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.62859708070755, |
|
"eval_runtime": 0.3923, |
|
"eval_samples_per_second": 178.448, |
|
"eval_steps_per_second": 7.648, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"eval_accuracy": 0.8142857142857143, |
|
"eval_loss": 0.6146815419197083, |
|
"eval_runtime": 0.3861, |
|
"eval_samples_per_second": 181.308, |
|
"eval_steps_per_second": 7.77, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_loss": 0.5986873507499695, |
|
"eval_runtime": 0.3853, |
|
"eval_samples_per_second": 181.687, |
|
"eval_steps_per_second": 7.787, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.5808569192886353, |
|
"eval_runtime": 0.391, |
|
"eval_samples_per_second": 179.013, |
|
"eval_steps_per_second": 7.672, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.5618674755096436, |
|
"eval_runtime": 0.3861, |
|
"eval_samples_per_second": 181.318, |
|
"eval_steps_per_second": 7.771, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.5425492525100708, |
|
"eval_runtime": 0.3895, |
|
"eval_samples_per_second": 179.718, |
|
"eval_steps_per_second": 7.702, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.5230604410171509, |
|
"eval_runtime": 0.3908, |
|
"eval_samples_per_second": 179.109, |
|
"eval_steps_per_second": 7.676, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 11.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.5035112500190735, |
|
"eval_runtime": 0.3841, |
|
"eval_samples_per_second": 182.262, |
|
"eval_steps_per_second": 7.811, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.6786, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 12.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.4828525483608246, |
|
"eval_runtime": 0.3958, |
|
"eval_samples_per_second": 176.835, |
|
"eval_steps_per_second": 7.579, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.4621497690677643, |
|
"eval_runtime": 0.3893, |
|
"eval_samples_per_second": 179.815, |
|
"eval_steps_per_second": 7.706, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.44225531816482544, |
|
"eval_runtime": 0.3936, |
|
"eval_samples_per_second": 177.866, |
|
"eval_steps_per_second": 7.623, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.4237878918647766, |
|
"eval_runtime": 0.3885, |
|
"eval_samples_per_second": 180.201, |
|
"eval_steps_per_second": 7.723, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.4087783396244049, |
|
"eval_runtime": 0.3869, |
|
"eval_samples_per_second": 180.93, |
|
"eval_steps_per_second": 7.754, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 17.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.39638686180114746, |
|
"eval_runtime": 0.3919, |
|
"eval_samples_per_second": 178.599, |
|
"eval_steps_per_second": 7.654, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 18.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.3871336281299591, |
|
"eval_runtime": 0.3895, |
|
"eval_samples_per_second": 179.725, |
|
"eval_steps_per_second": 7.703, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 19.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.37913817167282104, |
|
"eval_runtime": 0.3975, |
|
"eval_samples_per_second": 176.113, |
|
"eval_steps_per_second": 7.548, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 20.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.37276408076286316, |
|
"eval_runtime": 0.3884, |
|
"eval_samples_per_second": 180.205, |
|
"eval_steps_per_second": 7.723, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 21.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.36989572644233704, |
|
"eval_runtime": 0.3857, |
|
"eval_samples_per_second": 181.502, |
|
"eval_steps_per_second": 7.779, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 22.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.36899128556251526, |
|
"eval_runtime": 0.3889, |
|
"eval_samples_per_second": 180.008, |
|
"eval_steps_per_second": 7.715, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 23.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.36841970682144165, |
|
"eval_runtime": 0.3867, |
|
"eval_samples_per_second": 181.0, |
|
"eval_steps_per_second": 7.757, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 24.89, |
|
"learning_rate": 9.25925925925926e-06, |
|
"loss": 0.544, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 24.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.36657989025115967, |
|
"eval_runtime": 0.4003, |
|
"eval_samples_per_second": 174.876, |
|
"eval_steps_per_second": 7.495, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 25.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.36433330178260803, |
|
"eval_runtime": 0.386, |
|
"eval_samples_per_second": 181.342, |
|
"eval_steps_per_second": 7.772, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 26.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.3611242175102234, |
|
"eval_runtime": 0.3881, |
|
"eval_samples_per_second": 180.356, |
|
"eval_steps_per_second": 7.73, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 27.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.356433242559433, |
|
"eval_runtime": 0.3885, |
|
"eval_samples_per_second": 180.192, |
|
"eval_steps_per_second": 7.723, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 28.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.3520072400569916, |
|
"eval_runtime": 0.3869, |
|
"eval_samples_per_second": 180.937, |
|
"eval_steps_per_second": 7.754, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 29.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.3477487862110138, |
|
"eval_runtime": 0.39, |
|
"eval_samples_per_second": 179.492, |
|
"eval_steps_per_second": 7.693, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 30.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.3445895314216614, |
|
"eval_runtime": 0.4111, |
|
"eval_samples_per_second": 170.276, |
|
"eval_steps_per_second": 7.298, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 31.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.3390456736087799, |
|
"eval_runtime": 0.3852, |
|
"eval_samples_per_second": 181.703, |
|
"eval_steps_per_second": 7.787, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 32.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.3347129821777344, |
|
"eval_runtime": 0.4506, |
|
"eval_samples_per_second": 155.353, |
|
"eval_steps_per_second": 6.658, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 33.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.33145877718925476, |
|
"eval_runtime": 0.394, |
|
"eval_samples_per_second": 177.648, |
|
"eval_steps_per_second": 7.613, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 34.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.3286542296409607, |
|
"eval_runtime": 0.4325, |
|
"eval_samples_per_second": 161.841, |
|
"eval_steps_per_second": 6.936, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 35.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.3254566192626953, |
|
"eval_runtime": 0.3901, |
|
"eval_samples_per_second": 179.436, |
|
"eval_steps_per_second": 7.69, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 36.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.3226519525051117, |
|
"eval_runtime": 0.3957, |
|
"eval_samples_per_second": 176.889, |
|
"eval_steps_per_second": 7.581, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 37.44, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.4904, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 37.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.32086578011512756, |
|
"eval_runtime": 0.3917, |
|
"eval_samples_per_second": 178.712, |
|
"eval_steps_per_second": 7.659, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 38.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.32166367769241333, |
|
"eval_runtime": 0.3879, |
|
"eval_samples_per_second": 180.476, |
|
"eval_steps_per_second": 7.735, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 39.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.3228186070919037, |
|
"eval_runtime": 0.3945, |
|
"eval_samples_per_second": 177.444, |
|
"eval_steps_per_second": 7.605, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 40.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.32248714566230774, |
|
"eval_runtime": 0.3925, |
|
"eval_samples_per_second": 178.361, |
|
"eval_steps_per_second": 7.644, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 41.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.32159554958343506, |
|
"eval_runtime": 0.3914, |
|
"eval_samples_per_second": 178.848, |
|
"eval_steps_per_second": 7.665, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 42.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.31996870040893555, |
|
"eval_runtime": 0.3902, |
|
"eval_samples_per_second": 179.374, |
|
"eval_steps_per_second": 7.687, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 43.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.31838318705558777, |
|
"eval_runtime": 0.3951, |
|
"eval_samples_per_second": 177.183, |
|
"eval_steps_per_second": 7.594, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 44.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.3160267174243927, |
|
"eval_runtime": 0.3892, |
|
"eval_samples_per_second": 179.849, |
|
"eval_steps_per_second": 7.708, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 45.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.31339019536972046, |
|
"eval_runtime": 0.3914, |
|
"eval_samples_per_second": 178.826, |
|
"eval_steps_per_second": 7.664, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 46.89, |
|
"eval_accuracy": 0.9142857142857143, |
|
"eval_loss": 0.3097696900367737, |
|
"eval_runtime": 0.3909, |
|
"eval_samples_per_second": 179.096, |
|
"eval_steps_per_second": 7.676, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 47.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.3083226680755615, |
|
"eval_runtime": 0.3975, |
|
"eval_samples_per_second": 176.094, |
|
"eval_steps_per_second": 7.547, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 48.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.306194543838501, |
|
"eval_runtime": 0.39, |
|
"eval_samples_per_second": 179.464, |
|
"eval_steps_per_second": 7.691, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 49.89, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.4468, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 49.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.3034059703350067, |
|
"eval_runtime": 0.3988, |
|
"eval_samples_per_second": 175.515, |
|
"eval_steps_per_second": 7.522, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 50.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.3030487298965454, |
|
"eval_runtime": 0.3912, |
|
"eval_samples_per_second": 178.919, |
|
"eval_steps_per_second": 7.668, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 51.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.3037838339805603, |
|
"eval_runtime": 0.4036, |
|
"eval_samples_per_second": 173.422, |
|
"eval_steps_per_second": 7.432, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 52.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.30441269278526306, |
|
"eval_runtime": 0.3905, |
|
"eval_samples_per_second": 179.272, |
|
"eval_steps_per_second": 7.683, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 53.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.30462566018104553, |
|
"eval_runtime": 0.4004, |
|
"eval_samples_per_second": 174.832, |
|
"eval_steps_per_second": 7.493, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 54.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.30405017733573914, |
|
"eval_runtime": 0.3926, |
|
"eval_samples_per_second": 178.307, |
|
"eval_steps_per_second": 7.642, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 55.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.3027400076389313, |
|
"eval_runtime": 0.3921, |
|
"eval_samples_per_second": 178.52, |
|
"eval_steps_per_second": 7.651, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 56.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.3015061914920807, |
|
"eval_runtime": 0.3917, |
|
"eval_samples_per_second": 178.717, |
|
"eval_steps_per_second": 7.659, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 57.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.298301637172699, |
|
"eval_runtime": 0.3901, |
|
"eval_samples_per_second": 179.444, |
|
"eval_steps_per_second": 7.69, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 58.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.2965472638607025, |
|
"eval_runtime": 0.3941, |
|
"eval_samples_per_second": 177.631, |
|
"eval_steps_per_second": 7.613, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 59.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.2955659329891205, |
|
"eval_runtime": 0.3929, |
|
"eval_samples_per_second": 178.161, |
|
"eval_steps_per_second": 7.635, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 60.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.2953043282032013, |
|
"eval_runtime": 0.3961, |
|
"eval_samples_per_second": 176.718, |
|
"eval_steps_per_second": 7.574, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 61.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.2960428297519684, |
|
"eval_runtime": 0.3894, |
|
"eval_samples_per_second": 179.751, |
|
"eval_steps_per_second": 7.704, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 62.44, |
|
"learning_rate": 6.481481481481482e-06, |
|
"loss": 0.411, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 62.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.29606738686561584, |
|
"eval_runtime": 0.3907, |
|
"eval_samples_per_second": 179.177, |
|
"eval_steps_per_second": 7.679, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 63.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.29615068435668945, |
|
"eval_runtime": 0.3957, |
|
"eval_samples_per_second": 176.884, |
|
"eval_steps_per_second": 7.581, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 64.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.29546239972114563, |
|
"eval_runtime": 0.4028, |
|
"eval_samples_per_second": 173.791, |
|
"eval_steps_per_second": 7.448, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 65.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.29375123977661133, |
|
"eval_runtime": 0.3984, |
|
"eval_samples_per_second": 175.721, |
|
"eval_steps_per_second": 7.531, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 66.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.293165922164917, |
|
"eval_runtime": 0.3896, |
|
"eval_samples_per_second": 179.657, |
|
"eval_steps_per_second": 7.7, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 67.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.2927437126636505, |
|
"eval_runtime": 0.3965, |
|
"eval_samples_per_second": 176.563, |
|
"eval_steps_per_second": 7.567, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 68.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.2911137044429779, |
|
"eval_runtime": 0.3982, |
|
"eval_samples_per_second": 175.784, |
|
"eval_steps_per_second": 7.534, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 69.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.29145926237106323, |
|
"eval_runtime": 0.3948, |
|
"eval_samples_per_second": 177.308, |
|
"eval_steps_per_second": 7.599, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 70.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.291074275970459, |
|
"eval_runtime": 0.3933, |
|
"eval_samples_per_second": 177.964, |
|
"eval_steps_per_second": 7.627, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 71.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.2896248400211334, |
|
"eval_runtime": 0.3962, |
|
"eval_samples_per_second": 176.685, |
|
"eval_steps_per_second": 7.572, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 72.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.2880937457084656, |
|
"eval_runtime": 0.3944, |
|
"eval_samples_per_second": 177.463, |
|
"eval_steps_per_second": 7.606, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 73.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.28765279054641724, |
|
"eval_runtime": 0.3966, |
|
"eval_samples_per_second": 176.488, |
|
"eval_steps_per_second": 7.564, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 74.89, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.3705, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 74.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.2876226603984833, |
|
"eval_runtime": 0.3888, |
|
"eval_samples_per_second": 180.054, |
|
"eval_steps_per_second": 7.717, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 75.89, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.28748172521591187, |
|
"eval_runtime": 0.411, |
|
"eval_samples_per_second": 170.337, |
|
"eval_steps_per_second": 7.3, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 76.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2876095473766327, |
|
"eval_runtime": 0.4294, |
|
"eval_samples_per_second": 163.0, |
|
"eval_steps_per_second": 6.986, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 77.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.28670457005500793, |
|
"eval_runtime": 0.3949, |
|
"eval_samples_per_second": 177.274, |
|
"eval_steps_per_second": 7.597, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 78.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.28570595383644104, |
|
"eval_runtime": 0.3939, |
|
"eval_samples_per_second": 177.69, |
|
"eval_steps_per_second": 7.615, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 79.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.28557589650154114, |
|
"eval_runtime": 0.3949, |
|
"eval_samples_per_second": 177.268, |
|
"eval_steps_per_second": 7.597, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 80.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2853609621524811, |
|
"eval_runtime": 0.3898, |
|
"eval_samples_per_second": 179.59, |
|
"eval_steps_per_second": 7.697, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 81.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.2859923243522644, |
|
"eval_runtime": 0.3917, |
|
"eval_samples_per_second": 178.729, |
|
"eval_steps_per_second": 7.66, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 82.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.2853332459926605, |
|
"eval_runtime": 0.3898, |
|
"eval_samples_per_second": 179.566, |
|
"eval_steps_per_second": 7.696, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 83.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.28681549429893494, |
|
"eval_runtime": 0.3903, |
|
"eval_samples_per_second": 179.364, |
|
"eval_steps_per_second": 7.687, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 84.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.2884899079799652, |
|
"eval_runtime": 0.4004, |
|
"eval_samples_per_second": 174.808, |
|
"eval_steps_per_second": 7.492, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 85.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.2893112599849701, |
|
"eval_runtime": 0.3932, |
|
"eval_samples_per_second": 178.006, |
|
"eval_steps_per_second": 7.629, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 86.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.29076695442199707, |
|
"eval_runtime": 0.4006, |
|
"eval_samples_per_second": 174.727, |
|
"eval_steps_per_second": 7.488, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 87.44, |
|
"learning_rate": 4.62962962962963e-06, |
|
"loss": 0.3442, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 87.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.29055681824684143, |
|
"eval_runtime": 0.3948, |
|
"eval_samples_per_second": 177.302, |
|
"eval_steps_per_second": 7.599, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 88.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.29001426696777344, |
|
"eval_runtime": 0.4034, |
|
"eval_samples_per_second": 173.528, |
|
"eval_steps_per_second": 7.437, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 89.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.287276953458786, |
|
"eval_runtime": 0.3919, |
|
"eval_samples_per_second": 178.62, |
|
"eval_steps_per_second": 7.655, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 90.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.28426897525787354, |
|
"eval_runtime": 0.4018, |
|
"eval_samples_per_second": 174.231, |
|
"eval_steps_per_second": 7.467, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 91.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.28223827481269836, |
|
"eval_runtime": 0.3886, |
|
"eval_samples_per_second": 180.111, |
|
"eval_steps_per_second": 7.719, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 92.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.28137001395225525, |
|
"eval_runtime": 0.3867, |
|
"eval_samples_per_second": 181.016, |
|
"eval_steps_per_second": 7.758, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 93.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.2821663022041321, |
|
"eval_runtime": 0.395, |
|
"eval_samples_per_second": 177.23, |
|
"eval_steps_per_second": 7.596, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 94.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.2825068235397339, |
|
"eval_runtime": 0.3994, |
|
"eval_samples_per_second": 175.263, |
|
"eval_steps_per_second": 7.511, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 95.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2831081449985504, |
|
"eval_runtime": 0.3997, |
|
"eval_samples_per_second": 175.112, |
|
"eval_steps_per_second": 7.505, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 96.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2835954427719116, |
|
"eval_runtime": 0.3968, |
|
"eval_samples_per_second": 176.408, |
|
"eval_steps_per_second": 7.56, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 97.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.28326863050460815, |
|
"eval_runtime": 0.3965, |
|
"eval_samples_per_second": 176.561, |
|
"eval_steps_per_second": 7.567, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 98.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.281973272562027, |
|
"eval_runtime": 0.3973, |
|
"eval_samples_per_second": 176.206, |
|
"eval_steps_per_second": 7.552, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 99.89, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.3159, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 99.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.28054317831993103, |
|
"eval_runtime": 0.3966, |
|
"eval_samples_per_second": 176.5, |
|
"eval_steps_per_second": 7.564, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 100.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.27876371145248413, |
|
"eval_runtime": 0.393, |
|
"eval_samples_per_second": 178.117, |
|
"eval_steps_per_second": 7.634, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 101.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.27795442938804626, |
|
"eval_runtime": 0.388, |
|
"eval_samples_per_second": 180.426, |
|
"eval_steps_per_second": 7.733, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 102.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.27797621488571167, |
|
"eval_runtime": 0.3898, |
|
"eval_samples_per_second": 179.574, |
|
"eval_steps_per_second": 7.696, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 103.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.27778205275535583, |
|
"eval_runtime": 0.3946, |
|
"eval_samples_per_second": 177.402, |
|
"eval_steps_per_second": 7.603, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 104.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.2783416509628296, |
|
"eval_runtime": 0.3956, |
|
"eval_samples_per_second": 176.929, |
|
"eval_steps_per_second": 7.583, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 105.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2788624167442322, |
|
"eval_runtime": 0.3965, |
|
"eval_samples_per_second": 176.537, |
|
"eval_steps_per_second": 7.566, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 106.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.27885138988494873, |
|
"eval_runtime": 0.401, |
|
"eval_samples_per_second": 174.562, |
|
"eval_steps_per_second": 7.481, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 107.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2788878381252289, |
|
"eval_runtime": 0.3934, |
|
"eval_samples_per_second": 177.953, |
|
"eval_steps_per_second": 7.627, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 108.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.27941665053367615, |
|
"eval_runtime": 0.3952, |
|
"eval_samples_per_second": 177.139, |
|
"eval_steps_per_second": 7.592, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 109.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2792874872684479, |
|
"eval_runtime": 0.3992, |
|
"eval_samples_per_second": 175.358, |
|
"eval_steps_per_second": 7.515, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 110.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.27895817160606384, |
|
"eval_runtime": 0.393, |
|
"eval_samples_per_second": 178.121, |
|
"eval_steps_per_second": 7.634, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 111.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2787097692489624, |
|
"eval_runtime": 0.3941, |
|
"eval_samples_per_second": 177.636, |
|
"eval_steps_per_second": 7.613, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 112.44, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.2982, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 112.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2774018943309784, |
|
"eval_runtime": 0.3953, |
|
"eval_samples_per_second": 177.084, |
|
"eval_steps_per_second": 7.589, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 113.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2765597105026245, |
|
"eval_runtime": 0.389, |
|
"eval_samples_per_second": 179.928, |
|
"eval_steps_per_second": 7.711, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 114.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.27541905641555786, |
|
"eval_runtime": 0.3931, |
|
"eval_samples_per_second": 178.054, |
|
"eval_steps_per_second": 7.631, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 115.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.27446889877319336, |
|
"eval_runtime": 0.3931, |
|
"eval_samples_per_second": 178.076, |
|
"eval_steps_per_second": 7.632, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 116.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.2735791802406311, |
|
"eval_runtime": 0.3922, |
|
"eval_samples_per_second": 178.501, |
|
"eval_steps_per_second": 7.65, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 117.89, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.27332428097724915, |
|
"eval_runtime": 0.3911, |
|
"eval_samples_per_second": 178.974, |
|
"eval_steps_per_second": 7.67, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 118.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2734207212924957, |
|
"eval_runtime": 0.396, |
|
"eval_samples_per_second": 176.768, |
|
"eval_steps_per_second": 7.576, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 119.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.27379804849624634, |
|
"eval_runtime": 0.3939, |
|
"eval_samples_per_second": 177.731, |
|
"eval_steps_per_second": 7.617, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 120.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2744147777557373, |
|
"eval_runtime": 0.3947, |
|
"eval_samples_per_second": 177.362, |
|
"eval_steps_per_second": 7.601, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 121.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2748957574367523, |
|
"eval_runtime": 0.4048, |
|
"eval_samples_per_second": 172.911, |
|
"eval_steps_per_second": 7.41, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 122.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.27517834305763245, |
|
"eval_runtime": 0.398, |
|
"eval_samples_per_second": 175.867, |
|
"eval_steps_per_second": 7.537, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 123.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2754015624523163, |
|
"eval_runtime": 0.394, |
|
"eval_samples_per_second": 177.676, |
|
"eval_steps_per_second": 7.615, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 124.89, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.2757, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 124.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.27559223771095276, |
|
"eval_runtime": 0.3999, |
|
"eval_samples_per_second": 175.064, |
|
"eval_steps_per_second": 7.503, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 125.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.27577343583106995, |
|
"eval_runtime": 0.3951, |
|
"eval_samples_per_second": 177.152, |
|
"eval_steps_per_second": 7.592, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 126.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2756122648715973, |
|
"eval_runtime": 0.3965, |
|
"eval_samples_per_second": 176.526, |
|
"eval_steps_per_second": 7.565, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 127.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2754615247249603, |
|
"eval_runtime": 0.3971, |
|
"eval_samples_per_second": 176.293, |
|
"eval_steps_per_second": 7.555, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 128.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.27490970492362976, |
|
"eval_runtime": 0.3981, |
|
"eval_samples_per_second": 175.828, |
|
"eval_steps_per_second": 7.535, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 129.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.27434810996055603, |
|
"eval_runtime": 0.3988, |
|
"eval_samples_per_second": 175.52, |
|
"eval_steps_per_second": 7.522, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 130.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.27393919229507446, |
|
"eval_runtime": 0.3964, |
|
"eval_samples_per_second": 176.607, |
|
"eval_steps_per_second": 7.569, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 131.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2735598385334015, |
|
"eval_runtime": 0.3963, |
|
"eval_samples_per_second": 176.647, |
|
"eval_steps_per_second": 7.571, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 132.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.27345114946365356, |
|
"eval_runtime": 0.3943, |
|
"eval_samples_per_second": 177.525, |
|
"eval_steps_per_second": 7.608, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 133.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.27316632866859436, |
|
"eval_runtime": 0.3968, |
|
"eval_samples_per_second": 176.395, |
|
"eval_steps_per_second": 7.56, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 134.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2731429636478424, |
|
"eval_runtime": 0.3964, |
|
"eval_samples_per_second": 176.608, |
|
"eval_steps_per_second": 7.569, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 135.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2727990746498108, |
|
"eval_runtime": 0.3965, |
|
"eval_samples_per_second": 176.545, |
|
"eval_steps_per_second": 7.566, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 136.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2725311815738678, |
|
"eval_runtime": 0.3934, |
|
"eval_samples_per_second": 177.937, |
|
"eval_steps_per_second": 7.626, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 137.44, |
|
"learning_rate": 9.259259259259259e-07, |
|
"loss": 0.2724, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 137.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.27232787013053894, |
|
"eval_runtime": 0.397, |
|
"eval_samples_per_second": 176.334, |
|
"eval_steps_per_second": 7.557, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 138.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2722608149051666, |
|
"eval_runtime": 0.3984, |
|
"eval_samples_per_second": 175.719, |
|
"eval_steps_per_second": 7.531, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 139.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2721470594406128, |
|
"eval_runtime": 0.3948, |
|
"eval_samples_per_second": 177.289, |
|
"eval_steps_per_second": 7.598, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 140.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2720603346824646, |
|
"eval_runtime": 0.4008, |
|
"eval_samples_per_second": 174.65, |
|
"eval_steps_per_second": 7.485, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 141.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.27187877893447876, |
|
"eval_runtime": 0.3937, |
|
"eval_samples_per_second": 177.785, |
|
"eval_steps_per_second": 7.619, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 142.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.27186280488967896, |
|
"eval_runtime": 0.3942, |
|
"eval_samples_per_second": 177.564, |
|
"eval_steps_per_second": 7.61, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 143.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.27189135551452637, |
|
"eval_runtime": 0.3939, |
|
"eval_samples_per_second": 177.73, |
|
"eval_steps_per_second": 7.617, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 144.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.27191561460494995, |
|
"eval_runtime": 0.4045, |
|
"eval_samples_per_second": 173.067, |
|
"eval_steps_per_second": 7.417, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 145.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2719388008117676, |
|
"eval_runtime": 0.3958, |
|
"eval_samples_per_second": 176.858, |
|
"eval_steps_per_second": 7.58, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 146.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.2719106674194336, |
|
"eval_runtime": 0.3928, |
|
"eval_samples_per_second": 178.226, |
|
"eval_steps_per_second": 7.638, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 147.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.271912157535553, |
|
"eval_runtime": 0.3948, |
|
"eval_samples_per_second": 177.319, |
|
"eval_steps_per_second": 7.599, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 148.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.271916002035141, |
|
"eval_runtime": 0.3979, |
|
"eval_samples_per_second": 175.916, |
|
"eval_steps_per_second": 7.539, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 149.89, |
|
"learning_rate": 0.0, |
|
"loss": 0.2614, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 149.89, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.27192050218582153, |
|
"eval_runtime": 0.3918, |
|
"eval_samples_per_second": 178.659, |
|
"eval_steps_per_second": 7.657, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 149.89, |
|
"step": 300, |
|
"total_flos": 1.0435256966870508e+18, |
|
"train_loss": 0.39242326736450195, |
|
"train_runtime": 677.2441, |
|
"train_samples_per_second": 61.352, |
|
"train_steps_per_second": 0.443 |
|
} |
|
], |
|
"max_steps": 300, |
|
"num_train_epochs": 150, |
|
"total_flos": 1.0435256966870508e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|