|
{ |
|
"best_metric": 0.7611846765843823, |
|
"best_model_checkpoint": "./finetuned/wikitext103_roberta-base_v2/checkpoint-123000", |
|
"epoch": 20.0, |
|
"global_step": 147800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.983085250338295e-05, |
|
"loss": 1.4212, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.7235698186111409, |
|
"eval_loss": 1.3007760047912598, |
|
"eval_runtime": 2.6682, |
|
"eval_samples_per_second": 185.892, |
|
"eval_steps_per_second": 5.997, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.96617050067659e-05, |
|
"loss": 1.3933, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.7226983316766515, |
|
"eval_loss": 1.2827116250991821, |
|
"eval_runtime": 2.1995, |
|
"eval_samples_per_second": 225.505, |
|
"eval_steps_per_second": 7.274, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.949255751014885e-05, |
|
"loss": 1.3917, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.7266257137444863, |
|
"eval_loss": 1.2815688848495483, |
|
"eval_runtime": 2.1563, |
|
"eval_samples_per_second": 230.027, |
|
"eval_steps_per_second": 7.42, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.93234100135318e-05, |
|
"loss": 1.3824, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.7251124131353045, |
|
"eval_loss": 1.294681191444397, |
|
"eval_runtime": 2.1464, |
|
"eval_samples_per_second": 231.089, |
|
"eval_steps_per_second": 7.454, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.915426251691475e-05, |
|
"loss": 1.3835, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.7289371440736602, |
|
"eval_loss": 1.2555147409439087, |
|
"eval_runtime": 2.2812, |
|
"eval_samples_per_second": 217.427, |
|
"eval_steps_per_second": 7.014, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.89851150202977e-05, |
|
"loss": 1.3758, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.7279413775189347, |
|
"eval_loss": 1.2611732482910156, |
|
"eval_runtime": 2.2556, |
|
"eval_samples_per_second": 219.898, |
|
"eval_steps_per_second": 7.093, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.881596752368065e-05, |
|
"loss": 1.3745, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.7244728228792188, |
|
"eval_loss": 1.279096007347107, |
|
"eval_runtime": 1.9103, |
|
"eval_samples_per_second": 259.647, |
|
"eval_steps_per_second": 8.376, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.86468200270636e-05, |
|
"loss": 1.3761, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.7286496152595643, |
|
"eval_loss": 1.2621806859970093, |
|
"eval_runtime": 2.2565, |
|
"eval_samples_per_second": 219.811, |
|
"eval_steps_per_second": 7.091, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.847767253044655e-05, |
|
"loss": 1.3735, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.7359971845474972, |
|
"eval_loss": 1.231848955154419, |
|
"eval_runtime": 2.2526, |
|
"eval_samples_per_second": 220.188, |
|
"eval_steps_per_second": 7.103, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.83085250338295e-05, |
|
"loss": 1.3717, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.7259761388286334, |
|
"eval_loss": 1.2777374982833862, |
|
"eval_runtime": 2.343, |
|
"eval_samples_per_second": 211.693, |
|
"eval_steps_per_second": 6.829, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.813937753721245e-05, |
|
"loss": 1.3675, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.7309145880574452, |
|
"eval_loss": 1.2589675188064575, |
|
"eval_runtime": 2.0349, |
|
"eval_samples_per_second": 243.741, |
|
"eval_steps_per_second": 7.863, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.79702300405954e-05, |
|
"loss": 1.3585, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.7253910822602958, |
|
"eval_loss": 1.2838590145111084, |
|
"eval_runtime": 2.2225, |
|
"eval_samples_per_second": 223.175, |
|
"eval_steps_per_second": 7.199, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.780108254397835e-05, |
|
"loss": 1.3579, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.7347076623797687, |
|
"eval_loss": 1.2341055870056152, |
|
"eval_runtime": 1.8958, |
|
"eval_samples_per_second": 261.625, |
|
"eval_steps_per_second": 8.44, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.76319350473613e-05, |
|
"loss": 1.3588, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.7326682357975821, |
|
"eval_loss": 1.2412930727005005, |
|
"eval_runtime": 2.1422, |
|
"eval_samples_per_second": 231.54, |
|
"eval_steps_per_second": 7.469, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.746278755074425e-05, |
|
"loss": 1.351, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_accuracy": 0.7317281968967362, |
|
"eval_loss": 1.2459222078323364, |
|
"eval_runtime": 2.2341, |
|
"eval_samples_per_second": 222.016, |
|
"eval_steps_per_second": 7.162, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.72936400541272e-05, |
|
"loss": 1.3394, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_accuracy": 0.7314233839745815, |
|
"eval_loss": 1.242180347442627, |
|
"eval_runtime": 2.2469, |
|
"eval_samples_per_second": 220.751, |
|
"eval_steps_per_second": 7.121, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.712449255751015e-05, |
|
"loss": 1.3429, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_accuracy": 0.734901599848407, |
|
"eval_loss": 1.2285393476486206, |
|
"eval_runtime": 2.1498, |
|
"eval_samples_per_second": 230.723, |
|
"eval_steps_per_second": 7.443, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.69553450608931e-05, |
|
"loss": 1.3393, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_accuracy": 0.7324128503075872, |
|
"eval_loss": 1.2404521703720093, |
|
"eval_runtime": 2.2579, |
|
"eval_samples_per_second": 219.671, |
|
"eval_steps_per_second": 7.086, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.678619756427605e-05, |
|
"loss": 1.3421, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_accuracy": 0.735434836099188, |
|
"eval_loss": 1.2255122661590576, |
|
"eval_runtime": 2.2664, |
|
"eval_samples_per_second": 218.847, |
|
"eval_steps_per_second": 7.06, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.6617050067659e-05, |
|
"loss": 1.3426, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_accuracy": 0.7333513221802482, |
|
"eval_loss": 1.2296382188796997, |
|
"eval_runtime": 2.1344, |
|
"eval_samples_per_second": 232.385, |
|
"eval_steps_per_second": 7.496, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.644790257104195e-05, |
|
"loss": 1.3326, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_accuracy": 0.7351480394040008, |
|
"eval_loss": 1.2158225774765015, |
|
"eval_runtime": 2.1342, |
|
"eval_samples_per_second": 232.406, |
|
"eval_steps_per_second": 7.497, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.62787550744249e-05, |
|
"loss": 1.3355, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_accuracy": 0.7364017876607805, |
|
"eval_loss": 1.2255741357803345, |
|
"eval_runtime": 2.1391, |
|
"eval_samples_per_second": 231.876, |
|
"eval_steps_per_second": 7.48, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.610960757780785e-05, |
|
"loss": 1.3324, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_accuracy": 0.7355829363706523, |
|
"eval_loss": 1.2208420038223267, |
|
"eval_runtime": 2.251, |
|
"eval_samples_per_second": 220.348, |
|
"eval_steps_per_second": 7.108, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.59404600811908e-05, |
|
"loss": 1.3331, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_accuracy": 0.7347190272757148, |
|
"eval_loss": 1.2230000495910645, |
|
"eval_runtime": 2.1329, |
|
"eval_samples_per_second": 232.552, |
|
"eval_steps_per_second": 7.502, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.577131258457375e-05, |
|
"loss": 1.3326, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_accuracy": 0.7316047842477829, |
|
"eval_loss": 1.250501275062561, |
|
"eval_runtime": 2.2566, |
|
"eval_samples_per_second": 219.795, |
|
"eval_steps_per_second": 7.09, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.56021650879567e-05, |
|
"loss": 1.3339, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_accuracy": 0.7321860715246034, |
|
"eval_loss": 1.2471247911453247, |
|
"eval_runtime": 1.8846, |
|
"eval_samples_per_second": 263.185, |
|
"eval_steps_per_second": 8.49, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.543301759133965e-05, |
|
"loss": 1.3286, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_accuracy": 0.7358603599923753, |
|
"eval_loss": 1.218480110168457, |
|
"eval_runtime": 2.183, |
|
"eval_samples_per_second": 227.211, |
|
"eval_steps_per_second": 7.329, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 4.52638700947226e-05, |
|
"loss": 1.3314, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_accuracy": 0.7363198956152989, |
|
"eval_loss": 1.2333292961120605, |
|
"eval_runtime": 1.922, |
|
"eval_samples_per_second": 258.068, |
|
"eval_steps_per_second": 8.325, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 4.509472259810555e-05, |
|
"loss": 1.325, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_accuracy": 0.7320171162387606, |
|
"eval_loss": 1.2384274005889893, |
|
"eval_runtime": 2.1484, |
|
"eval_samples_per_second": 230.871, |
|
"eval_steps_per_second": 7.447, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.49255751014885e-05, |
|
"loss": 1.3251, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_accuracy": 0.7332704232946886, |
|
"eval_loss": 1.2141916751861572, |
|
"eval_runtime": 2.1498, |
|
"eval_samples_per_second": 230.723, |
|
"eval_steps_per_second": 7.443, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.475642760487145e-05, |
|
"loss": 1.3136, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_accuracy": 0.7346380072100398, |
|
"eval_loss": 1.2162067890167236, |
|
"eval_runtime": 2.2669, |
|
"eval_samples_per_second": 218.8, |
|
"eval_steps_per_second": 7.058, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.45872801082544e-05, |
|
"loss": 1.3202, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_accuracy": 0.7368549767669357, |
|
"eval_loss": 1.220727801322937, |
|
"eval_runtime": 2.1251, |
|
"eval_samples_per_second": 233.399, |
|
"eval_steps_per_second": 7.529, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 4.441813261163735e-05, |
|
"loss": 1.3168, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_accuracy": 0.7391136589130195, |
|
"eval_loss": 1.1931146383285522, |
|
"eval_runtime": 2.2628, |
|
"eval_samples_per_second": 219.196, |
|
"eval_steps_per_second": 7.071, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.42489851150203e-05, |
|
"loss": 1.3134, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_accuracy": 0.7398447820343461, |
|
"eval_loss": 1.1856846809387207, |
|
"eval_runtime": 2.1315, |
|
"eval_samples_per_second": 232.703, |
|
"eval_steps_per_second": 7.507, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.407983761840325e-05, |
|
"loss": 1.3085, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_accuracy": 0.7383094012462748, |
|
"eval_loss": 1.2111510038375854, |
|
"eval_runtime": 2.2389, |
|
"eval_samples_per_second": 221.538, |
|
"eval_steps_per_second": 7.146, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 4.39106901217862e-05, |
|
"loss": 1.3165, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_accuracy": 0.736477152685609, |
|
"eval_loss": 1.2284483909606934, |
|
"eval_runtime": 2.2655, |
|
"eval_samples_per_second": 218.936, |
|
"eval_steps_per_second": 7.062, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 4.374154262516915e-05, |
|
"loss": 1.3144, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_accuracy": 0.7387957989256795, |
|
"eval_loss": 1.2013208866119385, |
|
"eval_runtime": 2.1477, |
|
"eval_samples_per_second": 230.94, |
|
"eval_steps_per_second": 7.45, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.35723951285521e-05, |
|
"loss": 1.319, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_accuracy": 0.7355637897925513, |
|
"eval_loss": 1.217348337173462, |
|
"eval_runtime": 1.8976, |
|
"eval_samples_per_second": 261.38, |
|
"eval_steps_per_second": 8.432, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 4.340324763193505e-05, |
|
"loss": 1.3147, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_accuracy": 0.7403712864559268, |
|
"eval_loss": 1.1786144971847534, |
|
"eval_runtime": 2.1417, |
|
"eval_samples_per_second": 231.588, |
|
"eval_steps_per_second": 7.471, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.3234100135318e-05, |
|
"loss": 1.311, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_accuracy": 0.7372879017795558, |
|
"eval_loss": 1.2008836269378662, |
|
"eval_runtime": 2.2409, |
|
"eval_samples_per_second": 221.338, |
|
"eval_steps_per_second": 7.14, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.306495263870095e-05, |
|
"loss": 1.3131, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_accuracy": 0.7366438077684113, |
|
"eval_loss": 1.1992290019989014, |
|
"eval_runtime": 2.3077, |
|
"eval_samples_per_second": 214.937, |
|
"eval_steps_per_second": 6.933, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 4.28958051420839e-05, |
|
"loss": 1.3036, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_accuracy": 0.7369976679863333, |
|
"eval_loss": 1.2166584730148315, |
|
"eval_runtime": 2.1243, |
|
"eval_samples_per_second": 233.491, |
|
"eval_steps_per_second": 7.532, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 4.272665764546685e-05, |
|
"loss": 1.3122, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_accuracy": 0.7378714413413875, |
|
"eval_loss": 1.2138844728469849, |
|
"eval_runtime": 2.155, |
|
"eval_samples_per_second": 230.165, |
|
"eval_steps_per_second": 7.425, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.25575101488498e-05, |
|
"loss": 1.3091, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_accuracy": 0.7364524804942348, |
|
"eval_loss": 1.2197295427322388, |
|
"eval_runtime": 2.0278, |
|
"eval_samples_per_second": 244.602, |
|
"eval_steps_per_second": 7.89, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 4.238836265223275e-05, |
|
"loss": 1.304, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_accuracy": 0.7371755128447044, |
|
"eval_loss": 1.186427354812622, |
|
"eval_runtime": 2.0462, |
|
"eval_samples_per_second": 242.4, |
|
"eval_steps_per_second": 7.819, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 4.22192151556157e-05, |
|
"loss": 1.3015, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"eval_accuracy": 0.7355039424985249, |
|
"eval_loss": 1.2046276330947876, |
|
"eval_runtime": 2.0572, |
|
"eval_samples_per_second": 241.108, |
|
"eval_steps_per_second": 7.778, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 4.205006765899865e-05, |
|
"loss": 1.2916, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"eval_accuracy": 0.7344874591057797, |
|
"eval_loss": 1.2312067747116089, |
|
"eval_runtime": 2.3523, |
|
"eval_samples_per_second": 210.856, |
|
"eval_steps_per_second": 6.802, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 4.18809201623816e-05, |
|
"loss": 1.2966, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_accuracy": 0.7372955288985823, |
|
"eval_loss": 1.2116466760635376, |
|
"eval_runtime": 2.306, |
|
"eval_samples_per_second": 215.094, |
|
"eval_steps_per_second": 6.939, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 4.171177266576455e-05, |
|
"loss": 1.2991, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"eval_accuracy": 0.737794624029042, |
|
"eval_loss": 1.2262712717056274, |
|
"eval_runtime": 2.2208, |
|
"eval_samples_per_second": 223.344, |
|
"eval_steps_per_second": 7.205, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 4.15426251691475e-05, |
|
"loss": 1.3003, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_accuracy": 0.741288193792419, |
|
"eval_loss": 1.184373378753662, |
|
"eval_runtime": 2.336, |
|
"eval_samples_per_second": 212.325, |
|
"eval_steps_per_second": 6.849, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 4.137347767253045e-05, |
|
"loss": 1.2942, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_accuracy": 0.7368591999133871, |
|
"eval_loss": 1.195932149887085, |
|
"eval_runtime": 2.1558, |
|
"eval_samples_per_second": 230.073, |
|
"eval_steps_per_second": 7.422, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 4.12043301759134e-05, |
|
"loss": 1.2988, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"eval_accuracy": 0.7381074306659838, |
|
"eval_loss": 1.2017642259597778, |
|
"eval_runtime": 2.1521, |
|
"eval_samples_per_second": 230.473, |
|
"eval_steps_per_second": 7.435, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 4.103518267929635e-05, |
|
"loss": 1.2936, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"eval_accuracy": 0.7388343788536808, |
|
"eval_loss": 1.1992815732955933, |
|
"eval_runtime": 2.3209, |
|
"eval_samples_per_second": 213.713, |
|
"eval_steps_per_second": 6.894, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 4.08660351826793e-05, |
|
"loss": 1.2937, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_accuracy": 0.7358311660164716, |
|
"eval_loss": 1.2154779434204102, |
|
"eval_runtime": 2.1442, |
|
"eval_samples_per_second": 231.319, |
|
"eval_steps_per_second": 7.462, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 4.069688768606225e-05, |
|
"loss": 1.3021, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"eval_accuracy": 0.7395591959907313, |
|
"eval_loss": 1.1794347763061523, |
|
"eval_runtime": 2.2631, |
|
"eval_samples_per_second": 219.166, |
|
"eval_steps_per_second": 7.07, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 4.05277401894452e-05, |
|
"loss": 1.2937, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_accuracy": 0.7401357600670687, |
|
"eval_loss": 1.1982717514038086, |
|
"eval_runtime": 2.0447, |
|
"eval_samples_per_second": 242.582, |
|
"eval_steps_per_second": 7.825, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 4.035859269282815e-05, |
|
"loss": 1.291, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"eval_accuracy": 0.7448072021259288, |
|
"eval_loss": 1.1694941520690918, |
|
"eval_runtime": 2.1453, |
|
"eval_samples_per_second": 231.207, |
|
"eval_steps_per_second": 7.458, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 4.01894451962111e-05, |
|
"loss": 1.2932, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"eval_accuracy": 0.7410137752905726, |
|
"eval_loss": 1.1980637311935425, |
|
"eval_runtime": 2.2686, |
|
"eval_samples_per_second": 218.634, |
|
"eval_steps_per_second": 7.053, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 4.002029769959405e-05, |
|
"loss": 1.2938, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_accuracy": 0.7382663617554176, |
|
"eval_loss": 1.1999621391296387, |
|
"eval_runtime": 2.1418, |
|
"eval_samples_per_second": 231.579, |
|
"eval_steps_per_second": 7.47, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 3.9851150202977e-05, |
|
"loss": 1.2789, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"eval_accuracy": 0.7402127426252879, |
|
"eval_loss": 1.1918007135391235, |
|
"eval_runtime": 2.3184, |
|
"eval_samples_per_second": 213.944, |
|
"eval_steps_per_second": 6.901, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 3.968200270635995e-05, |
|
"loss": 1.2806, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"eval_accuracy": 0.7368392751519062, |
|
"eval_loss": 1.2065249681472778, |
|
"eval_runtime": 2.1671, |
|
"eval_samples_per_second": 228.872, |
|
"eval_steps_per_second": 7.383, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 3.95128552097429e-05, |
|
"loss": 1.2799, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"eval_accuracy": 0.7374173525839968, |
|
"eval_loss": 1.2035958766937256, |
|
"eval_runtime": 2.0293, |
|
"eval_samples_per_second": 244.417, |
|
"eval_steps_per_second": 7.884, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 3.934370771312585e-05, |
|
"loss": 1.2851, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"eval_accuracy": 0.7374529736652525, |
|
"eval_loss": 1.2056316137313843, |
|
"eval_runtime": 2.2747, |
|
"eval_samples_per_second": 218.047, |
|
"eval_steps_per_second": 7.034, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 3.91745602165088e-05, |
|
"loss": 1.2789, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_accuracy": 0.7414960437229791, |
|
"eval_loss": 1.185698390007019, |
|
"eval_runtime": 2.1279, |
|
"eval_samples_per_second": 233.09, |
|
"eval_steps_per_second": 7.519, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 3.900541271989175e-05, |
|
"loss": 1.2847, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_accuracy": 0.7375549926676443, |
|
"eval_loss": 1.1947497129440308, |
|
"eval_runtime": 2.2844, |
|
"eval_samples_per_second": 217.128, |
|
"eval_steps_per_second": 7.004, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 3.88362652232747e-05, |
|
"loss": 1.2843, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"eval_accuracy": 0.7398512049167071, |
|
"eval_loss": 1.1868607997894287, |
|
"eval_runtime": 1.8928, |
|
"eval_samples_per_second": 262.041, |
|
"eval_steps_per_second": 8.453, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 3.866711772665765e-05, |
|
"loss": 1.2822, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"eval_accuracy": 0.738583059254866, |
|
"eval_loss": 1.1962590217590332, |
|
"eval_runtime": 2.3042, |
|
"eval_samples_per_second": 215.256, |
|
"eval_steps_per_second": 6.944, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 3.84979702300406e-05, |
|
"loss": 1.2755, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"eval_accuracy": 0.7423808354478731, |
|
"eval_loss": 1.189677357673645, |
|
"eval_runtime": 2.1343, |
|
"eval_samples_per_second": 232.399, |
|
"eval_steps_per_second": 7.497, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 3.832882273342355e-05, |
|
"loss": 1.283, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"eval_accuracy": 0.7438030006523157, |
|
"eval_loss": 1.1673452854156494, |
|
"eval_runtime": 2.1405, |
|
"eval_samples_per_second": 231.723, |
|
"eval_steps_per_second": 7.475, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 3.81596752368065e-05, |
|
"loss": 1.2765, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"eval_accuracy": 0.7418567866813223, |
|
"eval_loss": 1.1855015754699707, |
|
"eval_runtime": 2.2371, |
|
"eval_samples_per_second": 221.718, |
|
"eval_steps_per_second": 7.152, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 3.799052774018945e-05, |
|
"loss": 1.2762, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_accuracy": 0.7412275877241228, |
|
"eval_loss": 1.1773431301116943, |
|
"eval_runtime": 2.1867, |
|
"eval_samples_per_second": 226.824, |
|
"eval_steps_per_second": 7.317, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 3.7821380243572397e-05, |
|
"loss": 1.2776, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"eval_accuracy": 0.740787246819894, |
|
"eval_loss": 1.1897586584091187, |
|
"eval_runtime": 1.9025, |
|
"eval_samples_per_second": 260.712, |
|
"eval_steps_per_second": 8.41, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 3.7652232746955347e-05, |
|
"loss": 1.2847, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_accuracy": 0.7437667084947351, |
|
"eval_loss": 1.1624772548675537, |
|
"eval_runtime": 1.9202, |
|
"eval_samples_per_second": 258.302, |
|
"eval_steps_per_second": 8.332, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 3.7483085250338296e-05, |
|
"loss": 1.2732, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"eval_accuracy": 0.7396705597179374, |
|
"eval_loss": 1.194719672203064, |
|
"eval_runtime": 2.2563, |
|
"eval_samples_per_second": 219.831, |
|
"eval_steps_per_second": 7.091, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 3.7313937753721246e-05, |
|
"loss": 1.2667, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"eval_accuracy": 0.7384741591468417, |
|
"eval_loss": 1.2097489833831787, |
|
"eval_runtime": 2.0162, |
|
"eval_samples_per_second": 246.011, |
|
"eval_steps_per_second": 7.936, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 3.7144790257104196e-05, |
|
"loss": 1.2678, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"eval_accuracy": 0.7397711324624852, |
|
"eval_loss": 1.187340497970581, |
|
"eval_runtime": 1.9242, |
|
"eval_samples_per_second": 257.772, |
|
"eval_steps_per_second": 8.315, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 3.6975642760487146e-05, |
|
"loss": 1.2681, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"eval_accuracy": 0.7467894879436467, |
|
"eval_loss": 1.1681954860687256, |
|
"eval_runtime": 2.1385, |
|
"eval_samples_per_second": 231.938, |
|
"eval_steps_per_second": 7.482, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 3.6806495263870096e-05, |
|
"loss": 1.2699, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"eval_accuracy": 0.745684382221014, |
|
"eval_loss": 1.1739610433578491, |
|
"eval_runtime": 1.9046, |
|
"eval_samples_per_second": 260.416, |
|
"eval_steps_per_second": 8.401, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 3.6637347767253046e-05, |
|
"loss": 1.2675, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"eval_accuracy": 0.7378905091781449, |
|
"eval_loss": 1.212327003479004, |
|
"eval_runtime": 2.151, |
|
"eval_samples_per_second": 230.592, |
|
"eval_steps_per_second": 7.438, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 3.6468200270635996e-05, |
|
"loss": 1.2604, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"eval_accuracy": 0.7395626782561456, |
|
"eval_loss": 1.195254921913147, |
|
"eval_runtime": 2.1404, |
|
"eval_samples_per_second": 231.731, |
|
"eval_steps_per_second": 7.475, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 3.6299052774018946e-05, |
|
"loss": 1.2688, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"eval_accuracy": 0.7397589090237662, |
|
"eval_loss": 1.1849150657653809, |
|
"eval_runtime": 2.1374, |
|
"eval_samples_per_second": 232.054, |
|
"eval_steps_per_second": 7.486, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 3.6129905277401896e-05, |
|
"loss": 1.2698, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"eval_accuracy": 0.7413877684508885, |
|
"eval_loss": 1.1708790063858032, |
|
"eval_runtime": 2.1318, |
|
"eval_samples_per_second": 232.668, |
|
"eval_steps_per_second": 7.505, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 3.5960757780784846e-05, |
|
"loss": 1.2689, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"eval_accuracy": 0.7438135277526475, |
|
"eval_loss": 1.1763643026351929, |
|
"eval_runtime": 1.9258, |
|
"eval_samples_per_second": 257.551, |
|
"eval_steps_per_second": 8.308, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 3.5791610284167796e-05, |
|
"loss": 1.269, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"eval_accuracy": 0.7409149325968664, |
|
"eval_loss": 1.1824229955673218, |
|
"eval_runtime": 2.2453, |
|
"eval_samples_per_second": 220.905, |
|
"eval_steps_per_second": 7.126, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 3.5622462787550746e-05, |
|
"loss": 1.2715, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"eval_accuracy": 0.7408733194884687, |
|
"eval_loss": 1.178514003753662, |
|
"eval_runtime": 2.0475, |
|
"eval_samples_per_second": 242.248, |
|
"eval_steps_per_second": 7.814, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 3.5453315290933695e-05, |
|
"loss": 1.2628, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"eval_accuracy": 0.7433914472797822, |
|
"eval_loss": 1.173943281173706, |
|
"eval_runtime": 2.1375, |
|
"eval_samples_per_second": 232.048, |
|
"eval_steps_per_second": 7.485, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 3.5284167794316645e-05, |
|
"loss": 1.2617, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"eval_accuracy": 0.7406168909338969, |
|
"eval_loss": 1.1814693212509155, |
|
"eval_runtime": 2.1357, |
|
"eval_samples_per_second": 232.237, |
|
"eval_steps_per_second": 7.492, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 3.5115020297699595e-05, |
|
"loss": 1.2565, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"eval_accuracy": 0.7414824236191919, |
|
"eval_loss": 1.1885017156600952, |
|
"eval_runtime": 2.2461, |
|
"eval_samples_per_second": 220.826, |
|
"eval_steps_per_second": 7.123, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 3.4945872801082545e-05, |
|
"loss": 1.2639, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"eval_accuracy": 0.741952133873027, |
|
"eval_loss": 1.1781718730926514, |
|
"eval_runtime": 2.0174, |
|
"eval_samples_per_second": 245.859, |
|
"eval_steps_per_second": 7.931, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 3.4776725304465495e-05, |
|
"loss": 1.2557, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"eval_accuracy": 0.7382356866408648, |
|
"eval_loss": 1.2061494588851929, |
|
"eval_runtime": 2.2612, |
|
"eval_samples_per_second": 219.356, |
|
"eval_steps_per_second": 7.076, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 3.4607577807848445e-05, |
|
"loss": 1.2503, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"eval_accuracy": 0.739681675962454, |
|
"eval_loss": 1.1741236448287964, |
|
"eval_runtime": 2.1411, |
|
"eval_samples_per_second": 231.661, |
|
"eval_steps_per_second": 7.473, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 3.4438430311231395e-05, |
|
"loss": 1.2514, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"eval_accuracy": 0.7435828154552824, |
|
"eval_loss": 1.167312741279602, |
|
"eval_runtime": 2.0431, |
|
"eval_samples_per_second": 242.763, |
|
"eval_steps_per_second": 7.831, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 3.4269282814614345e-05, |
|
"loss": 1.254, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"eval_accuracy": 0.7399956502827316, |
|
"eval_loss": 1.1828943490982056, |
|
"eval_runtime": 2.2651, |
|
"eval_samples_per_second": 218.976, |
|
"eval_steps_per_second": 7.064, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 3.4100135317997295e-05, |
|
"loss": 1.2583, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"eval_accuracy": 0.7390757539268417, |
|
"eval_loss": 1.1776684522628784, |
|
"eval_runtime": 2.0336, |
|
"eval_samples_per_second": 243.904, |
|
"eval_steps_per_second": 7.868, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 3.3930987821380245e-05, |
|
"loss": 1.2518, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"eval_accuracy": 0.7411625020238545, |
|
"eval_loss": 1.1892728805541992, |
|
"eval_runtime": 2.2474, |
|
"eval_samples_per_second": 220.698, |
|
"eval_steps_per_second": 7.119, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 3.3761840324763195e-05, |
|
"loss": 1.2519, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"eval_accuracy": 0.7410831524506257, |
|
"eval_loss": 1.1775306463241577, |
|
"eval_runtime": 2.127, |
|
"eval_samples_per_second": 233.19, |
|
"eval_steps_per_second": 7.522, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 3.3592692828146145e-05, |
|
"loss": 1.2477, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"eval_accuracy": 0.7451821862348178, |
|
"eval_loss": 1.1809273958206177, |
|
"eval_runtime": 1.902, |
|
"eval_samples_per_second": 260.776, |
|
"eval_steps_per_second": 8.412, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 3.3423545331529095e-05, |
|
"loss": 1.2546, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"eval_accuracy": 0.7455485978763953, |
|
"eval_loss": 1.1651870012283325, |
|
"eval_runtime": 2.1247, |
|
"eval_samples_per_second": 233.443, |
|
"eval_steps_per_second": 7.53, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 3.3254397834912044e-05, |
|
"loss": 1.2564, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"eval_accuracy": 0.7435488746599247, |
|
"eval_loss": 1.1729925870895386, |
|
"eval_runtime": 2.2521, |
|
"eval_samples_per_second": 220.235, |
|
"eval_steps_per_second": 7.104, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 3.3085250338294994e-05, |
|
"loss": 1.254, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"eval_accuracy": 0.7427022407392571, |
|
"eval_loss": 1.1740801334381104, |
|
"eval_runtime": 2.2515, |
|
"eval_samples_per_second": 220.294, |
|
"eval_steps_per_second": 7.106, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 3.2916102841677944e-05, |
|
"loss": 1.2495, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"eval_accuracy": 0.7475704632944787, |
|
"eval_loss": 1.1539645195007324, |
|
"eval_runtime": 2.1379, |
|
"eval_samples_per_second": 231.999, |
|
"eval_steps_per_second": 7.484, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 3.2746955345060894e-05, |
|
"loss": 1.2502, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"eval_accuracy": 0.7488099797559774, |
|
"eval_loss": 1.145354151725769, |
|
"eval_runtime": 2.0467, |
|
"eval_samples_per_second": 242.344, |
|
"eval_steps_per_second": 7.818, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 3.2577807848443844e-05, |
|
"loss": 1.2527, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_accuracy": 0.7429261278858414, |
|
"eval_loss": 1.1704862117767334, |
|
"eval_runtime": 1.8944, |
|
"eval_samples_per_second": 261.83, |
|
"eval_steps_per_second": 8.446, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 3.2408660351826794e-05, |
|
"loss": 1.2418, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"eval_accuracy": 0.7441042170292774, |
|
"eval_loss": 1.1714463233947754, |
|
"eval_runtime": 2.2491, |
|
"eval_samples_per_second": 220.532, |
|
"eval_steps_per_second": 7.114, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 3.2239512855209744e-05, |
|
"loss": 1.2386, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"eval_accuracy": 0.74550079317324, |
|
"eval_loss": 1.1619137525558472, |
|
"eval_runtime": 2.2788, |
|
"eval_samples_per_second": 217.662, |
|
"eval_steps_per_second": 7.021, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 3.2070365358592694e-05, |
|
"loss": 1.2407, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"eval_accuracy": 0.7428433966802983, |
|
"eval_loss": 1.1702818870544434, |
|
"eval_runtime": 2.2482, |
|
"eval_samples_per_second": 220.624, |
|
"eval_steps_per_second": 7.117, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 3.1901217861975644e-05, |
|
"loss": 1.2429, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"eval_accuracy": 0.7437382207533255, |
|
"eval_loss": 1.1596566438674927, |
|
"eval_runtime": 2.0269, |
|
"eval_samples_per_second": 244.711, |
|
"eval_steps_per_second": 7.894, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 3.1732070365358594e-05, |
|
"loss": 1.2398, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"eval_accuracy": 0.7411157814291173, |
|
"eval_loss": 1.1802175045013428, |
|
"eval_runtime": 1.903, |
|
"eval_samples_per_second": 260.643, |
|
"eval_steps_per_second": 8.408, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 3.1562922868741544e-05, |
|
"loss": 1.2507, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"eval_accuracy": 0.7465291873021028, |
|
"eval_loss": 1.153898000717163, |
|
"eval_runtime": 2.134, |
|
"eval_samples_per_second": 232.429, |
|
"eval_steps_per_second": 7.498, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 3.1393775372124494e-05, |
|
"loss": 1.2369, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"eval_accuracy": 0.7421205732433082, |
|
"eval_loss": 1.1711477041244507, |
|
"eval_runtime": 2.2417, |
|
"eval_samples_per_second": 221.263, |
|
"eval_steps_per_second": 7.138, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 3.1224627875507443e-05, |
|
"loss": 1.2463, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"eval_accuracy": 0.7408580787198625, |
|
"eval_loss": 1.1848827600479126, |
|
"eval_runtime": 2.2658, |
|
"eval_samples_per_second": 218.909, |
|
"eval_steps_per_second": 7.062, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 3.1055480378890393e-05, |
|
"loss": 1.2389, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"eval_accuracy": 0.7447417175239756, |
|
"eval_loss": 1.172045111656189, |
|
"eval_runtime": 2.1226, |
|
"eval_samples_per_second": 233.68, |
|
"eval_steps_per_second": 7.538, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 3.088633288227334e-05, |
|
"loss": 1.2395, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"eval_accuracy": 0.7455846610856063, |
|
"eval_loss": 1.1613755226135254, |
|
"eval_runtime": 2.2492, |
|
"eval_samples_per_second": 220.523, |
|
"eval_steps_per_second": 7.114, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 3.071718538565629e-05, |
|
"loss": 1.2429, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"eval_accuracy": 0.7459984960790633, |
|
"eval_loss": 1.1604408025741577, |
|
"eval_runtime": 2.2523, |
|
"eval_samples_per_second": 220.221, |
|
"eval_steps_per_second": 7.104, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 3.054803788903924e-05, |
|
"loss": 1.2384, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"eval_accuracy": 0.7408438637823945, |
|
"eval_loss": 1.1852344274520874, |
|
"eval_runtime": 2.2645, |
|
"eval_samples_per_second": 219.035, |
|
"eval_steps_per_second": 7.066, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 3.0378890392422193e-05, |
|
"loss": 1.2419, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"eval_accuracy": 0.7460735114607351, |
|
"eval_loss": 1.1592859029769897, |
|
"eval_runtime": 2.2667, |
|
"eval_samples_per_second": 218.824, |
|
"eval_steps_per_second": 7.059, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 3.0209742895805143e-05, |
|
"loss": 1.2381, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"eval_accuracy": 0.7454180674547229, |
|
"eval_loss": 1.161791205406189, |
|
"eval_runtime": 2.2508, |
|
"eval_samples_per_second": 220.362, |
|
"eval_steps_per_second": 7.108, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 3.0040595399188093e-05, |
|
"loss": 1.2384, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"eval_accuracy": 0.7445992935958163, |
|
"eval_loss": 1.1550912857055664, |
|
"eval_runtime": 2.2277, |
|
"eval_samples_per_second": 222.654, |
|
"eval_steps_per_second": 7.182, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 2.9871447902571043e-05, |
|
"loss": 1.2314, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"eval_accuracy": 0.7451252345598434, |
|
"eval_loss": 1.1473671197891235, |
|
"eval_runtime": 2.0323, |
|
"eval_samples_per_second": 244.059, |
|
"eval_steps_per_second": 7.873, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 2.9702300405953993e-05, |
|
"loss": 1.2277, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"eval_accuracy": 0.7435493080290383, |
|
"eval_loss": 1.1636135578155518, |
|
"eval_runtime": 2.2565, |
|
"eval_samples_per_second": 219.81, |
|
"eval_steps_per_second": 7.091, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 2.9533152909336943e-05, |
|
"loss": 1.23, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"eval_accuracy": 0.7482466354355656, |
|
"eval_loss": 1.1545356512069702, |
|
"eval_runtime": 2.1398, |
|
"eval_samples_per_second": 231.799, |
|
"eval_steps_per_second": 7.477, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 2.9364005412719893e-05, |
|
"loss": 1.2292, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"eval_accuracy": 0.7456762809270702, |
|
"eval_loss": 1.169358730316162, |
|
"eval_runtime": 2.2505, |
|
"eval_samples_per_second": 220.392, |
|
"eval_steps_per_second": 7.109, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 2.9194857916102843e-05, |
|
"loss": 1.2337, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"eval_accuracy": 0.7437165882071332, |
|
"eval_loss": 1.1681973934173584, |
|
"eval_runtime": 2.2595, |
|
"eval_samples_per_second": 219.518, |
|
"eval_steps_per_second": 7.081, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 2.9025710419485792e-05, |
|
"loss": 1.2274, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"eval_accuracy": 0.7484281932495036, |
|
"eval_loss": 1.1518677473068237, |
|
"eval_runtime": 2.0234, |
|
"eval_samples_per_second": 245.13, |
|
"eval_steps_per_second": 7.907, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 2.885656292286874e-05, |
|
"loss": 1.232, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"eval_accuracy": 0.7435426377844804, |
|
"eval_loss": 1.1693381071090698, |
|
"eval_runtime": 2.153, |
|
"eval_samples_per_second": 230.381, |
|
"eval_steps_per_second": 7.432, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 2.868741542625169e-05, |
|
"loss": 1.2315, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"eval_accuracy": 0.7434497229246247, |
|
"eval_loss": 1.1637970209121704, |
|
"eval_runtime": 2.1389, |
|
"eval_samples_per_second": 231.9, |
|
"eval_steps_per_second": 7.481, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 2.851826792963464e-05, |
|
"loss": 1.2293, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"eval_accuracy": 0.746056909476852, |
|
"eval_loss": 1.1639689207077026, |
|
"eval_runtime": 1.9056, |
|
"eval_samples_per_second": 260.282, |
|
"eval_steps_per_second": 8.396, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 2.8349120433017595e-05, |
|
"loss": 1.2287, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"eval_accuracy": 0.7519274622651754, |
|
"eval_loss": 1.146359920501709, |
|
"eval_runtime": 2.1418, |
|
"eval_samples_per_second": 231.582, |
|
"eval_steps_per_second": 7.47, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 2.8179972936400545e-05, |
|
"loss": 1.2283, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"eval_accuracy": 0.7480988335904306, |
|
"eval_loss": 1.1439129114151, |
|
"eval_runtime": 2.1515, |
|
"eval_samples_per_second": 230.532, |
|
"eval_steps_per_second": 7.437, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 2.8010825439783495e-05, |
|
"loss": 1.2279, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"eval_accuracy": 0.7476770091832853, |
|
"eval_loss": 1.1496102809906006, |
|
"eval_runtime": 2.1443, |
|
"eval_samples_per_second": 231.311, |
|
"eval_steps_per_second": 7.462, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 2.7841677943166445e-05, |
|
"loss": 1.2276, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"eval_accuracy": 0.7448800151502855, |
|
"eval_loss": 1.1544512510299683, |
|
"eval_runtime": 2.1443, |
|
"eval_samples_per_second": 231.308, |
|
"eval_steps_per_second": 7.462, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 2.7672530446549395e-05, |
|
"loss": 1.2301, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"eval_accuracy": 0.7486796972831709, |
|
"eval_loss": 1.131188154220581, |
|
"eval_runtime": 2.2612, |
|
"eval_samples_per_second": 219.354, |
|
"eval_steps_per_second": 7.076, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2.7503382949932345e-05, |
|
"loss": 1.2248, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7464872620949183, |
|
"eval_loss": 1.1444239616394043, |
|
"eval_runtime": 1.9117, |
|
"eval_samples_per_second": 259.451, |
|
"eval_steps_per_second": 8.369, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 2.7334235453315295e-05, |
|
"loss": 1.2266, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"eval_accuracy": 0.7430061513773736, |
|
"eval_loss": 1.1525160074234009, |
|
"eval_runtime": 2.256, |
|
"eval_samples_per_second": 219.857, |
|
"eval_steps_per_second": 7.092, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 2.716508795669824e-05, |
|
"loss": 1.2198, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"eval_accuracy": 0.7462388784038825, |
|
"eval_loss": 1.1551423072814941, |
|
"eval_runtime": 2.2513, |
|
"eval_samples_per_second": 220.317, |
|
"eval_steps_per_second": 7.107, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 2.699594046008119e-05, |
|
"loss": 1.219, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"eval_accuracy": 0.7479334406870639, |
|
"eval_loss": 1.143385887145996, |
|
"eval_runtime": 2.2558, |
|
"eval_samples_per_second": 219.879, |
|
"eval_steps_per_second": 7.093, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 2.682679296346414e-05, |
|
"loss": 1.2212, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"eval_accuracy": 0.7415982885151786, |
|
"eval_loss": 1.1707236766815186, |
|
"eval_runtime": 2.2452, |
|
"eval_samples_per_second": 220.92, |
|
"eval_steps_per_second": 7.126, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 2.665764546684709e-05, |
|
"loss": 1.2265, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"eval_accuracy": 0.7421521035598706, |
|
"eval_loss": 1.1743712425231934, |
|
"eval_runtime": 2.1289, |
|
"eval_samples_per_second": 232.985, |
|
"eval_steps_per_second": 7.516, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 2.648849797023004e-05, |
|
"loss": 1.2216, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"eval_accuracy": 0.7392663666010835, |
|
"eval_loss": 1.1817814111709595, |
|
"eval_runtime": 2.1373, |
|
"eval_samples_per_second": 232.069, |
|
"eval_steps_per_second": 7.486, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 2.631935047361299e-05, |
|
"loss": 1.2226, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"eval_accuracy": 0.7454341644794401, |
|
"eval_loss": 1.1662167310714722, |
|
"eval_runtime": 2.1776, |
|
"eval_samples_per_second": 227.77, |
|
"eval_steps_per_second": 7.347, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 2.615020297699594e-05, |
|
"loss": 1.2224, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"eval_accuracy": 0.7460155894249055, |
|
"eval_loss": 1.1345940828323364, |
|
"eval_runtime": 2.131, |
|
"eval_samples_per_second": 232.753, |
|
"eval_steps_per_second": 7.508, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 2.598105548037889e-05, |
|
"loss": 1.2186, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"eval_accuracy": 0.7462514417531718, |
|
"eval_loss": 1.153380036354065, |
|
"eval_runtime": 2.3136, |
|
"eval_samples_per_second": 214.386, |
|
"eval_steps_per_second": 6.916, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 2.581190798376184e-05, |
|
"loss": 1.2179, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"eval_accuracy": 0.7477791705270042, |
|
"eval_loss": 1.1399047374725342, |
|
"eval_runtime": 2.2709, |
|
"eval_samples_per_second": 218.413, |
|
"eval_steps_per_second": 7.046, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 2.564276048714479e-05, |
|
"loss": 1.2177, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"eval_accuracy": 0.7441804462995666, |
|
"eval_loss": 1.1545348167419434, |
|
"eval_runtime": 2.2419, |
|
"eval_samples_per_second": 221.237, |
|
"eval_steps_per_second": 7.137, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 2.547361299052774e-05, |
|
"loss": 1.2154, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"eval_accuracy": 0.7426768214742224, |
|
"eval_loss": 1.171052098274231, |
|
"eval_runtime": 2.1267, |
|
"eval_samples_per_second": 233.224, |
|
"eval_steps_per_second": 7.523, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 2.530446549391069e-05, |
|
"loss": 1.2179, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"eval_accuracy": 0.7514139509830325, |
|
"eval_loss": 1.1348686218261719, |
|
"eval_runtime": 1.8985, |
|
"eval_samples_per_second": 261.263, |
|
"eval_steps_per_second": 8.428, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 2.513531799729364e-05, |
|
"loss": 1.2184, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"eval_accuracy": 0.749460868615729, |
|
"eval_loss": 1.1427435874938965, |
|
"eval_runtime": 2.1257, |
|
"eval_samples_per_second": 233.336, |
|
"eval_steps_per_second": 7.527, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 2.496617050067659e-05, |
|
"loss": 1.2193, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"eval_accuracy": 0.7494911077780159, |
|
"eval_loss": 1.1222712993621826, |
|
"eval_runtime": 2.0347, |
|
"eval_samples_per_second": 243.77, |
|
"eval_steps_per_second": 7.864, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 2.479702300405954e-05, |
|
"loss": 1.2063, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"eval_accuracy": 0.7488264163021444, |
|
"eval_loss": 1.1357399225234985, |
|
"eval_runtime": 1.9046, |
|
"eval_samples_per_second": 260.423, |
|
"eval_steps_per_second": 8.401, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 10.15, |
|
"learning_rate": 2.462787550744249e-05, |
|
"loss": 1.2025, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 10.15, |
|
"eval_accuracy": 0.7486311066000695, |
|
"eval_loss": 1.1476197242736816, |
|
"eval_runtime": 2.1786, |
|
"eval_samples_per_second": 227.67, |
|
"eval_steps_per_second": 7.344, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 2.445872801082544e-05, |
|
"loss": 1.2097, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"eval_accuracy": 0.7492516383053316, |
|
"eval_loss": 1.1382330656051636, |
|
"eval_runtime": 2.1295, |
|
"eval_samples_per_second": 232.922, |
|
"eval_steps_per_second": 7.514, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"learning_rate": 2.428958051420839e-05, |
|
"loss": 1.2106, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"eval_accuracy": 0.7500204253928484, |
|
"eval_loss": 1.1413904428482056, |
|
"eval_runtime": 2.1753, |
|
"eval_samples_per_second": 228.019, |
|
"eval_steps_per_second": 7.355, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"learning_rate": 2.412043301759134e-05, |
|
"loss": 1.2146, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"eval_accuracy": 0.7533006412674462, |
|
"eval_loss": 1.113772988319397, |
|
"eval_runtime": 2.136, |
|
"eval_samples_per_second": 232.208, |
|
"eval_steps_per_second": 7.491, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"learning_rate": 2.395128552097429e-05, |
|
"loss": 1.2129, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"eval_accuracy": 0.7477787948952668, |
|
"eval_loss": 1.1447216272354126, |
|
"eval_runtime": 2.2641, |
|
"eval_samples_per_second": 219.075, |
|
"eval_steps_per_second": 7.067, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 2.378213802435724e-05, |
|
"loss": 1.2078, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"eval_accuracy": 0.7508962988920937, |
|
"eval_loss": 1.155730128288269, |
|
"eval_runtime": 2.1359, |
|
"eval_samples_per_second": 232.221, |
|
"eval_steps_per_second": 7.491, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 2.3612990527740193e-05, |
|
"loss": 1.204, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"eval_accuracy": 0.7537665293735096, |
|
"eval_loss": 1.1243318319320679, |
|
"eval_runtime": 2.2543, |
|
"eval_samples_per_second": 220.027, |
|
"eval_steps_per_second": 7.098, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 2.3443843031123143e-05, |
|
"loss": 1.2101, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"eval_accuracy": 0.7507114399544679, |
|
"eval_loss": 1.1352229118347168, |
|
"eval_runtime": 2.1504, |
|
"eval_samples_per_second": 230.651, |
|
"eval_steps_per_second": 7.44, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"learning_rate": 2.327469553450609e-05, |
|
"loss": 1.207, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"eval_accuracy": 0.7526499865482916, |
|
"eval_loss": 1.1365910768508911, |
|
"eval_runtime": 2.2554, |
|
"eval_samples_per_second": 219.92, |
|
"eval_steps_per_second": 7.094, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"learning_rate": 2.310554803788904e-05, |
|
"loss": 1.2067, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"eval_accuracy": 0.7482271408617169, |
|
"eval_loss": 1.145031213760376, |
|
"eval_runtime": 2.1262, |
|
"eval_samples_per_second": 233.276, |
|
"eval_steps_per_second": 7.525, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 2.293640054127199e-05, |
|
"loss": 1.1997, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"eval_accuracy": 0.7503758591065293, |
|
"eval_loss": 1.1333723068237305, |
|
"eval_runtime": 2.1437, |
|
"eval_samples_per_second": 231.381, |
|
"eval_steps_per_second": 7.464, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"learning_rate": 2.276725304465494e-05, |
|
"loss": 1.2114, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"eval_accuracy": 0.7523950883821346, |
|
"eval_loss": 1.13480544090271, |
|
"eval_runtime": 2.256, |
|
"eval_samples_per_second": 219.855, |
|
"eval_steps_per_second": 7.092, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"learning_rate": 2.259810554803789e-05, |
|
"loss": 1.2087, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"eval_accuracy": 0.7507978579542381, |
|
"eval_loss": 1.1221325397491455, |
|
"eval_runtime": 2.149, |
|
"eval_samples_per_second": 230.81, |
|
"eval_steps_per_second": 7.445, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 2.242895805142084e-05, |
|
"loss": 1.2065, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"eval_accuracy": 0.7486237532021583, |
|
"eval_loss": 1.130583643913269, |
|
"eval_runtime": 2.2591, |
|
"eval_samples_per_second": 219.556, |
|
"eval_steps_per_second": 7.082, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 2.225981055480379e-05, |
|
"loss": 1.1985, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"eval_accuracy": 0.7470671686582637, |
|
"eval_loss": 1.1648321151733398, |
|
"eval_runtime": 2.2577, |
|
"eval_samples_per_second": 219.693, |
|
"eval_steps_per_second": 7.087, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 11.16, |
|
"learning_rate": 2.209066305818674e-05, |
|
"loss": 1.205, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 11.16, |
|
"eval_accuracy": 0.7526795068095737, |
|
"eval_loss": 1.1088367700576782, |
|
"eval_runtime": 2.1263, |
|
"eval_samples_per_second": 233.267, |
|
"eval_steps_per_second": 7.525, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"learning_rate": 2.192151556156969e-05, |
|
"loss": 1.2026, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"eval_accuracy": 0.7512794548290868, |
|
"eval_loss": 1.1253347396850586, |
|
"eval_runtime": 2.1489, |
|
"eval_samples_per_second": 230.816, |
|
"eval_steps_per_second": 7.446, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 2.175236806495264e-05, |
|
"loss": 1.2, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"eval_accuracy": 0.7473735779217244, |
|
"eval_loss": 1.1330283880233765, |
|
"eval_runtime": 2.2554, |
|
"eval_samples_per_second": 219.915, |
|
"eval_steps_per_second": 7.094, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 11.37, |
|
"learning_rate": 2.1583220568335592e-05, |
|
"loss": 1.1997, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 11.37, |
|
"eval_accuracy": 0.7493864048660762, |
|
"eval_loss": 1.1423763036727905, |
|
"eval_runtime": 2.2628, |
|
"eval_samples_per_second": 219.193, |
|
"eval_steps_per_second": 7.071, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 2.1414073071718542e-05, |
|
"loss": 1.1989, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"eval_accuracy": 0.7477665276950566, |
|
"eval_loss": 1.1288686990737915, |
|
"eval_runtime": 2.2744, |
|
"eval_samples_per_second": 218.08, |
|
"eval_steps_per_second": 7.035, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 2.124492557510149e-05, |
|
"loss": 1.1956, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"eval_accuracy": 0.75250470912615, |
|
"eval_loss": 1.1163060665130615, |
|
"eval_runtime": 2.1424, |
|
"eval_samples_per_second": 231.521, |
|
"eval_steps_per_second": 7.468, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"learning_rate": 2.107577807848444e-05, |
|
"loss": 1.1997, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"eval_accuracy": 0.7502406674510643, |
|
"eval_loss": 1.135400414466858, |
|
"eval_runtime": 2.132, |
|
"eval_samples_per_second": 232.649, |
|
"eval_steps_per_second": 7.505, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"learning_rate": 2.090663058186739e-05, |
|
"loss": 1.2011, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"eval_accuracy": 0.7487909354704988, |
|
"eval_loss": 1.137099027633667, |
|
"eval_runtime": 2.022, |
|
"eval_samples_per_second": 245.301, |
|
"eval_steps_per_second": 7.913, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 2.073748308525034e-05, |
|
"loss": 1.1998, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"eval_accuracy": 0.7525347250536846, |
|
"eval_loss": 1.1276001930236816, |
|
"eval_runtime": 2.1283, |
|
"eval_samples_per_second": 233.052, |
|
"eval_steps_per_second": 7.518, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 11.77, |
|
"learning_rate": 2.056833558863329e-05, |
|
"loss": 1.1957, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 11.77, |
|
"eval_accuracy": 0.7557962751805397, |
|
"eval_loss": 1.1078341007232666, |
|
"eval_runtime": 2.1602, |
|
"eval_samples_per_second": 229.608, |
|
"eval_steps_per_second": 7.407, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"learning_rate": 2.039918809201624e-05, |
|
"loss": 1.2027, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"eval_accuracy": 0.745357875418331, |
|
"eval_loss": 1.1625709533691406, |
|
"eval_runtime": 2.1381, |
|
"eval_samples_per_second": 231.987, |
|
"eval_steps_per_second": 7.483, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"learning_rate": 2.0230040595399188e-05, |
|
"loss": 1.2013, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"eval_accuracy": 0.7526884647845145, |
|
"eval_loss": 1.1228464841842651, |
|
"eval_runtime": 1.9029, |
|
"eval_samples_per_second": 260.656, |
|
"eval_steps_per_second": 8.408, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"learning_rate": 2.0060893098782138e-05, |
|
"loss": 1.1944, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"eval_accuracy": 0.7478242411377627, |
|
"eval_loss": 1.1413049697875977, |
|
"eval_runtime": 2.1489, |
|
"eval_samples_per_second": 230.819, |
|
"eval_steps_per_second": 7.446, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 12.04, |
|
"learning_rate": 1.9891745602165088e-05, |
|
"loss": 1.1946, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 12.04, |
|
"eval_accuracy": 0.7513838877841672, |
|
"eval_loss": 1.124992847442627, |
|
"eval_runtime": 2.2528, |
|
"eval_samples_per_second": 220.169, |
|
"eval_steps_per_second": 7.102, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 12.11, |
|
"learning_rate": 1.972259810554804e-05, |
|
"loss": 1.196, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 12.11, |
|
"eval_accuracy": 0.7467797423793904, |
|
"eval_loss": 1.1447776556015015, |
|
"eval_runtime": 2.243, |
|
"eval_samples_per_second": 221.135, |
|
"eval_steps_per_second": 7.133, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 12.18, |
|
"learning_rate": 1.955345060893099e-05, |
|
"loss": 1.1893, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 12.18, |
|
"eval_accuracy": 0.7478244470188862, |
|
"eval_loss": 1.1357169151306152, |
|
"eval_runtime": 2.1342, |
|
"eval_samples_per_second": 232.401, |
|
"eval_steps_per_second": 7.497, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"learning_rate": 1.938430311231394e-05, |
|
"loss": 1.1865, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"eval_accuracy": 0.7525039957378796, |
|
"eval_loss": 1.120892882347107, |
|
"eval_runtime": 2.1457, |
|
"eval_samples_per_second": 231.161, |
|
"eval_steps_per_second": 7.457, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 1.9215155615696888e-05, |
|
"loss": 1.1921, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"eval_accuracy": 0.7517412799431865, |
|
"eval_loss": 1.1200112104415894, |
|
"eval_runtime": 2.2546, |
|
"eval_samples_per_second": 219.99, |
|
"eval_steps_per_second": 7.096, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 12.38, |
|
"learning_rate": 1.9046008119079838e-05, |
|
"loss": 1.1928, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 12.38, |
|
"eval_accuracy": 0.751185221513814, |
|
"eval_loss": 1.1144980192184448, |
|
"eval_runtime": 1.9441, |
|
"eval_samples_per_second": 255.124, |
|
"eval_steps_per_second": 8.23, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 1.8876860622462788e-05, |
|
"loss": 1.1904, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"eval_accuracy": 0.754587343566813, |
|
"eval_loss": 1.1108394861221313, |
|
"eval_runtime": 2.252, |
|
"eval_samples_per_second": 220.248, |
|
"eval_steps_per_second": 7.105, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"learning_rate": 1.8707713125845738e-05, |
|
"loss": 1.1955, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"eval_accuracy": 0.7540812503345287, |
|
"eval_loss": 1.106156826019287, |
|
"eval_runtime": 2.2623, |
|
"eval_samples_per_second": 219.243, |
|
"eval_steps_per_second": 7.072, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 12.58, |
|
"learning_rate": 1.8538565629228687e-05, |
|
"loss": 1.1898, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 12.58, |
|
"eval_accuracy": 0.7519862396592678, |
|
"eval_loss": 1.126400351524353, |
|
"eval_runtime": 2.1347, |
|
"eval_samples_per_second": 232.346, |
|
"eval_steps_per_second": 7.495, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"learning_rate": 1.8369418132611637e-05, |
|
"loss": 1.1917, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"eval_accuracy": 0.7535633076368476, |
|
"eval_loss": 1.112923502922058, |
|
"eval_runtime": 2.2569, |
|
"eval_samples_per_second": 219.772, |
|
"eval_steps_per_second": 7.089, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"learning_rate": 1.8200270635994587e-05, |
|
"loss": 1.1895, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"eval_accuracy": 0.7494371965607963, |
|
"eval_loss": 1.1288461685180664, |
|
"eval_runtime": 2.1453, |
|
"eval_samples_per_second": 231.202, |
|
"eval_steps_per_second": 7.458, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"learning_rate": 1.8031123139377537e-05, |
|
"loss": 1.1966, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"eval_accuracy": 0.7474297006435763, |
|
"eval_loss": 1.1435807943344116, |
|
"eval_runtime": 2.2544, |
|
"eval_samples_per_second": 220.014, |
|
"eval_steps_per_second": 7.097, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 1.7861975642760487e-05, |
|
"loss": 1.1887, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"eval_accuracy": 0.7530491066652402, |
|
"eval_loss": 1.1220248937606812, |
|
"eval_runtime": 2.0377, |
|
"eval_samples_per_second": 243.414, |
|
"eval_steps_per_second": 7.852, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"learning_rate": 1.769282814614344e-05, |
|
"loss": 1.1856, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"eval_accuracy": 0.7499591391991283, |
|
"eval_loss": 1.1441563367843628, |
|
"eval_runtime": 2.2428, |
|
"eval_samples_per_second": 221.15, |
|
"eval_steps_per_second": 7.134, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 1.752368064952639e-05, |
|
"loss": 1.1934, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.7487198734618374, |
|
"eval_loss": 1.134777545928955, |
|
"eval_runtime": 2.1446, |
|
"eval_samples_per_second": 231.279, |
|
"eval_steps_per_second": 7.461, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 1.7354533152909337e-05, |
|
"loss": 1.1848, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"eval_accuracy": 0.7521449252264457, |
|
"eval_loss": 1.1171698570251465, |
|
"eval_runtime": 2.2564, |
|
"eval_samples_per_second": 219.819, |
|
"eval_steps_per_second": 7.091, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 13.13, |
|
"learning_rate": 1.7185385656292287e-05, |
|
"loss": 1.1821, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 13.13, |
|
"eval_accuracy": 0.7566415837311541, |
|
"eval_loss": 1.1042215824127197, |
|
"eval_runtime": 2.1261, |
|
"eval_samples_per_second": 233.296, |
|
"eval_steps_per_second": 7.526, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 1.7016238159675237e-05, |
|
"loss": 1.1817, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"eval_accuracy": 0.7495432072227, |
|
"eval_loss": 1.1272791624069214, |
|
"eval_runtime": 2.029, |
|
"eval_samples_per_second": 244.45, |
|
"eval_steps_per_second": 7.885, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"learning_rate": 1.6847090663058187e-05, |
|
"loss": 1.1773, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"eval_accuracy": 0.7539743031358885, |
|
"eval_loss": 1.0957542657852173, |
|
"eval_runtime": 2.193, |
|
"eval_samples_per_second": 226.174, |
|
"eval_steps_per_second": 7.296, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 1.6677943166441137e-05, |
|
"loss": 1.1774, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"eval_accuracy": 0.7510550791645386, |
|
"eval_loss": 1.1139615774154663, |
|
"eval_runtime": 2.0354, |
|
"eval_samples_per_second": 243.69, |
|
"eval_steps_per_second": 7.861, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 1.6508795669824086e-05, |
|
"loss": 1.1841, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"eval_accuracy": 0.7535410764872521, |
|
"eval_loss": 1.1085665225982666, |
|
"eval_runtime": 2.2775, |
|
"eval_samples_per_second": 217.784, |
|
"eval_steps_per_second": 7.025, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"learning_rate": 1.6339648173207036e-05, |
|
"loss": 1.1825, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"eval_accuracy": 0.7575840393550151, |
|
"eval_loss": 1.0903350114822388, |
|
"eval_runtime": 2.1352, |
|
"eval_samples_per_second": 232.293, |
|
"eval_steps_per_second": 7.493, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 13.53, |
|
"learning_rate": 1.6170500676589986e-05, |
|
"loss": 1.1845, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 13.53, |
|
"eval_accuracy": 0.7486053092575125, |
|
"eval_loss": 1.129094123840332, |
|
"eval_runtime": 2.2564, |
|
"eval_samples_per_second": 219.823, |
|
"eval_steps_per_second": 7.091, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 1.6001353179972936e-05, |
|
"loss": 1.1853, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"eval_accuracy": 0.7485774103500107, |
|
"eval_loss": 1.1317797899246216, |
|
"eval_runtime": 2.2139, |
|
"eval_samples_per_second": 224.036, |
|
"eval_steps_per_second": 7.227, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"learning_rate": 1.5832205683355886e-05, |
|
"loss": 1.1761, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"eval_accuracy": 0.7552630190471166, |
|
"eval_loss": 1.1218476295471191, |
|
"eval_runtime": 2.2116, |
|
"eval_samples_per_second": 224.271, |
|
"eval_steps_per_second": 7.235, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"learning_rate": 1.566305818673884e-05, |
|
"loss": 1.1825, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"eval_accuracy": 0.7484677617063006, |
|
"eval_loss": 1.130650520324707, |
|
"eval_runtime": 2.1348, |
|
"eval_samples_per_second": 232.339, |
|
"eval_steps_per_second": 7.495, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 1.549391069012179e-05, |
|
"loss": 1.1849, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"eval_accuracy": 0.7503921250473254, |
|
"eval_loss": 1.1273096799850464, |
|
"eval_runtime": 2.1346, |
|
"eval_samples_per_second": 232.367, |
|
"eval_steps_per_second": 7.496, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 13.87, |
|
"learning_rate": 1.5324763193504736e-05, |
|
"loss": 1.1792, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 13.87, |
|
"eval_accuracy": 0.7496725963112518, |
|
"eval_loss": 1.1290724277496338, |
|
"eval_runtime": 2.0388, |
|
"eval_samples_per_second": 243.277, |
|
"eval_steps_per_second": 7.848, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 1.5155615696887688e-05, |
|
"loss": 1.1852, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"eval_accuracy": 0.7521213264014223, |
|
"eval_loss": 1.1133606433868408, |
|
"eval_runtime": 2.1407, |
|
"eval_samples_per_second": 231.702, |
|
"eval_steps_per_second": 7.474, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 1.4986468200270637e-05, |
|
"loss": 1.1745, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"eval_accuracy": 0.7510633656887338, |
|
"eval_loss": 1.1251685619354248, |
|
"eval_runtime": 2.2562, |
|
"eval_samples_per_second": 219.835, |
|
"eval_steps_per_second": 7.091, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"learning_rate": 1.4817320703653587e-05, |
|
"loss": 1.1746, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"eval_accuracy": 0.7508518468038707, |
|
"eval_loss": 1.114823579788208, |
|
"eval_runtime": 2.1373, |
|
"eval_samples_per_second": 232.068, |
|
"eval_steps_per_second": 7.486, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 14.14, |
|
"learning_rate": 1.4648173207036536e-05, |
|
"loss": 1.1765, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 14.14, |
|
"eval_accuracy": 0.7499047204224969, |
|
"eval_loss": 1.120153784751892, |
|
"eval_runtime": 1.9047, |
|
"eval_samples_per_second": 260.406, |
|
"eval_steps_per_second": 8.4, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"learning_rate": 1.4479025710419486e-05, |
|
"loss": 1.1762, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"eval_accuracy": 0.7526651867686152, |
|
"eval_loss": 1.11342453956604, |
|
"eval_runtime": 2.1108, |
|
"eval_samples_per_second": 234.982, |
|
"eval_steps_per_second": 7.58, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"learning_rate": 1.4309878213802435e-05, |
|
"loss": 1.1752, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"eval_accuracy": 0.7550594107753242, |
|
"eval_loss": 1.1170574426651, |
|
"eval_runtime": 2.0286, |
|
"eval_samples_per_second": 244.505, |
|
"eval_steps_per_second": 7.887, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 14.34, |
|
"learning_rate": 1.4140730717185385e-05, |
|
"loss": 1.176, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 14.34, |
|
"eval_accuracy": 0.7526875882289065, |
|
"eval_loss": 1.1155229806900024, |
|
"eval_runtime": 2.2532, |
|
"eval_samples_per_second": 220.127, |
|
"eval_steps_per_second": 7.101, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 1.3971583220568335e-05, |
|
"loss": 1.1732, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"eval_accuracy": 0.7481485413956945, |
|
"eval_loss": 1.133280873298645, |
|
"eval_runtime": 2.1464, |
|
"eval_samples_per_second": 231.081, |
|
"eval_steps_per_second": 7.454, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 1.3802435723951287e-05, |
|
"loss": 1.1753, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"eval_accuracy": 0.7574028502663674, |
|
"eval_loss": 1.0981875658035278, |
|
"eval_runtime": 2.0246, |
|
"eval_samples_per_second": 244.982, |
|
"eval_steps_per_second": 7.903, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 1.3633288227334237e-05, |
|
"loss": 1.1713, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"eval_accuracy": 0.749116988864623, |
|
"eval_loss": 1.1342977285385132, |
|
"eval_runtime": 2.1397, |
|
"eval_samples_per_second": 231.806, |
|
"eval_steps_per_second": 7.478, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"learning_rate": 1.3464140730717187e-05, |
|
"loss": 1.1692, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"eval_accuracy": 0.7548563905532121, |
|
"eval_loss": 1.1020859479904175, |
|
"eval_runtime": 2.038, |
|
"eval_samples_per_second": 243.371, |
|
"eval_steps_per_second": 7.851, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"learning_rate": 1.3294993234100137e-05, |
|
"loss": 1.17, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"eval_accuracy": 0.7503776331328444, |
|
"eval_loss": 1.110732078552246, |
|
"eval_runtime": 2.2439, |
|
"eval_samples_per_second": 221.039, |
|
"eval_steps_per_second": 7.13, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 14.75, |
|
"learning_rate": 1.3125845737483087e-05, |
|
"loss": 1.1699, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 14.75, |
|
"eval_accuracy": 0.7505413012882971, |
|
"eval_loss": 1.1227320432662964, |
|
"eval_runtime": 2.1354, |
|
"eval_samples_per_second": 232.28, |
|
"eval_steps_per_second": 7.493, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 14.82, |
|
"learning_rate": 1.2956698240866036e-05, |
|
"loss": 1.1763, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 14.82, |
|
"eval_accuracy": 0.7523848348960457, |
|
"eval_loss": 1.1152479648590088, |
|
"eval_runtime": 2.2475, |
|
"eval_samples_per_second": 220.693, |
|
"eval_steps_per_second": 7.119, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 14.88, |
|
"learning_rate": 1.2787550744248986e-05, |
|
"loss": 1.1729, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 14.88, |
|
"eval_accuracy": 0.7563491422261722, |
|
"eval_loss": 1.0939308404922485, |
|
"eval_runtime": 2.0291, |
|
"eval_samples_per_second": 244.44, |
|
"eval_steps_per_second": 7.885, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 14.95, |
|
"learning_rate": 1.2618403247631935e-05, |
|
"loss": 1.1731, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 14.95, |
|
"eval_accuracy": 0.7446182644738601, |
|
"eval_loss": 1.153084397315979, |
|
"eval_runtime": 2.1368, |
|
"eval_samples_per_second": 232.12, |
|
"eval_steps_per_second": 7.488, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 1.2449255751014885e-05, |
|
"loss": 1.1744, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"eval_accuracy": 0.748938913662494, |
|
"eval_loss": 1.1451458930969238, |
|
"eval_runtime": 2.2627, |
|
"eval_samples_per_second": 219.205, |
|
"eval_steps_per_second": 7.071, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 1.2280108254397836e-05, |
|
"loss": 1.169, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"eval_accuracy": 0.7527127355796688, |
|
"eval_loss": 1.1211124658584595, |
|
"eval_runtime": 2.254, |
|
"eval_samples_per_second": 220.058, |
|
"eval_steps_per_second": 7.099, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 15.16, |
|
"learning_rate": 1.2110960757780786e-05, |
|
"loss": 1.1644, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 15.16, |
|
"eval_accuracy": 0.7553240179845462, |
|
"eval_loss": 1.1134895086288452, |
|
"eval_runtime": 2.1385, |
|
"eval_samples_per_second": 231.943, |
|
"eval_steps_per_second": 7.482, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 1.1941813261163736e-05, |
|
"loss": 1.1726, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"eval_accuracy": 0.7551064057320073, |
|
"eval_loss": 1.0903879404067993, |
|
"eval_runtime": 2.141, |
|
"eval_samples_per_second": 231.664, |
|
"eval_steps_per_second": 7.473, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 15.29, |
|
"learning_rate": 1.1772665764546684e-05, |
|
"loss": 1.1653, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 15.29, |
|
"eval_accuracy": 0.7585871152701898, |
|
"eval_loss": 1.0806618928909302, |
|
"eval_runtime": 2.2591, |
|
"eval_samples_per_second": 219.56, |
|
"eval_steps_per_second": 7.083, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"learning_rate": 1.1603518267929634e-05, |
|
"loss": 1.1651, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"eval_accuracy": 0.7487410264652309, |
|
"eval_loss": 1.1385972499847412, |
|
"eval_runtime": 2.0137, |
|
"eval_samples_per_second": 246.318, |
|
"eval_steps_per_second": 7.946, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 15.43, |
|
"learning_rate": 1.1434370771312584e-05, |
|
"loss": 1.1663, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 15.43, |
|
"eval_accuracy": 0.7531269501044577, |
|
"eval_loss": 1.1114603281021118, |
|
"eval_runtime": 2.038, |
|
"eval_samples_per_second": 243.378, |
|
"eval_steps_per_second": 7.851, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 1.1265223274695536e-05, |
|
"loss": 1.1635, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"eval_accuracy": 0.7503972421965474, |
|
"eval_loss": 1.1271893978118896, |
|
"eval_runtime": 2.2587, |
|
"eval_samples_per_second": 219.592, |
|
"eval_steps_per_second": 7.084, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"learning_rate": 1.1096075778078486e-05, |
|
"loss": 1.1646, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"eval_accuracy": 0.7541348344725908, |
|
"eval_loss": 1.0982328653335571, |
|
"eval_runtime": 2.0235, |
|
"eval_samples_per_second": 245.115, |
|
"eval_steps_per_second": 7.907, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"learning_rate": 1.0926928281461436e-05, |
|
"loss": 1.1639, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"eval_accuracy": 0.7544710600476913, |
|
"eval_loss": 1.1104248762130737, |
|
"eval_runtime": 2.0141, |
|
"eval_samples_per_second": 246.267, |
|
"eval_steps_per_second": 7.944, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 15.7, |
|
"learning_rate": 1.0757780784844384e-05, |
|
"loss": 1.1598, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 15.7, |
|
"eval_accuracy": 0.7492906747372119, |
|
"eval_loss": 1.1334669589996338, |
|
"eval_runtime": 2.1233, |
|
"eval_samples_per_second": 233.593, |
|
"eval_steps_per_second": 7.535, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"learning_rate": 1.0588633288227334e-05, |
|
"loss": 1.1612, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"eval_accuracy": 0.7535777086433112, |
|
"eval_loss": 1.1088109016418457, |
|
"eval_runtime": 2.1482, |
|
"eval_samples_per_second": 230.894, |
|
"eval_steps_per_second": 7.448, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"learning_rate": 1.0419485791610285e-05, |
|
"loss": 1.159, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"eval_accuracy": 0.755389401298914, |
|
"eval_loss": 1.0895658731460571, |
|
"eval_runtime": 1.8998, |
|
"eval_samples_per_second": 261.073, |
|
"eval_steps_per_second": 8.422, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"learning_rate": 1.0250338294993235e-05, |
|
"loss": 1.1686, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"eval_accuracy": 0.7521880806829505, |
|
"eval_loss": 1.1212115287780762, |
|
"eval_runtime": 2.1408, |
|
"eval_samples_per_second": 231.688, |
|
"eval_steps_per_second": 7.474, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 15.97, |
|
"learning_rate": 1.0081190798376185e-05, |
|
"loss": 1.158, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 15.97, |
|
"eval_accuracy": 0.7528032891926527, |
|
"eval_loss": 1.1104135513305664, |
|
"eval_runtime": 2.2497, |
|
"eval_samples_per_second": 220.471, |
|
"eval_steps_per_second": 7.112, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"learning_rate": 9.912043301759135e-06, |
|
"loss": 1.1633, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"eval_accuracy": 0.7537655533726261, |
|
"eval_loss": 1.097953200340271, |
|
"eval_runtime": 2.0401, |
|
"eval_samples_per_second": 243.13, |
|
"eval_steps_per_second": 7.843, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"learning_rate": 9.742895805142083e-06, |
|
"loss": 1.1622, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"eval_accuracy": 0.750889583782618, |
|
"eval_loss": 1.1274609565734863, |
|
"eval_runtime": 2.0243, |
|
"eval_samples_per_second": 245.026, |
|
"eval_steps_per_second": 7.904, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"learning_rate": 9.573748308525033e-06, |
|
"loss": 1.1625, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"eval_accuracy": 0.754607674067687, |
|
"eval_loss": 1.1065136194229126, |
|
"eval_runtime": 1.9032, |
|
"eval_samples_per_second": 260.616, |
|
"eval_steps_per_second": 8.407, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 16.24, |
|
"learning_rate": 9.404600811907985e-06, |
|
"loss": 1.1582, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 16.24, |
|
"eval_accuracy": 0.7515266766659524, |
|
"eval_loss": 1.1181069612503052, |
|
"eval_runtime": 2.1333, |
|
"eval_samples_per_second": 232.507, |
|
"eval_steps_per_second": 7.5, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 16.31, |
|
"learning_rate": 9.235453315290935e-06, |
|
"loss": 1.1568, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 16.31, |
|
"eval_accuracy": 0.7558363160425237, |
|
"eval_loss": 1.1019920110702515, |
|
"eval_runtime": 2.1312, |
|
"eval_samples_per_second": 232.728, |
|
"eval_steps_per_second": 7.507, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 16.37, |
|
"learning_rate": 9.066305818673885e-06, |
|
"loss": 1.1573, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 16.37, |
|
"eval_accuracy": 0.7532534995625547, |
|
"eval_loss": 1.115644097328186, |
|
"eval_runtime": 2.1433, |
|
"eval_samples_per_second": 231.416, |
|
"eval_steps_per_second": 7.465, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"learning_rate": 8.897158322056835e-06, |
|
"loss": 1.1549, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"eval_accuracy": 0.7508123310487945, |
|
"eval_loss": 1.1205765008926392, |
|
"eval_runtime": 2.2601, |
|
"eval_samples_per_second": 219.456, |
|
"eval_steps_per_second": 7.079, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"learning_rate": 8.728010825439783e-06, |
|
"loss": 1.1592, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"eval_accuracy": 0.7542723559759243, |
|
"eval_loss": 1.0985246896743774, |
|
"eval_runtime": 2.2649, |
|
"eval_samples_per_second": 218.998, |
|
"eval_steps_per_second": 7.064, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 16.58, |
|
"learning_rate": 8.558863328822733e-06, |
|
"loss": 1.1584, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 16.58, |
|
"eval_accuracy": 0.7531888104231674, |
|
"eval_loss": 1.1170583963394165, |
|
"eval_runtime": 2.028, |
|
"eval_samples_per_second": 244.575, |
|
"eval_steps_per_second": 7.89, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"learning_rate": 8.389715832205684e-06, |
|
"loss": 1.1589, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"eval_accuracy": 0.7611846765843823, |
|
"eval_loss": 1.0686120986938477, |
|
"eval_runtime": 2.0269, |
|
"eval_samples_per_second": 244.714, |
|
"eval_steps_per_second": 7.894, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 16.71, |
|
"learning_rate": 8.220568335588634e-06, |
|
"loss": 1.1566, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 16.71, |
|
"eval_accuracy": 0.7563581433672069, |
|
"eval_loss": 1.094774603843689, |
|
"eval_runtime": 2.1272, |
|
"eval_samples_per_second": 233.175, |
|
"eval_steps_per_second": 7.522, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 16.78, |
|
"learning_rate": 8.051420838971584e-06, |
|
"loss": 1.157, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 16.78, |
|
"eval_accuracy": 0.7568443220476267, |
|
"eval_loss": 1.0895816087722778, |
|
"eval_runtime": 1.8979, |
|
"eval_samples_per_second": 261.337, |
|
"eval_steps_per_second": 8.43, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 16.85, |
|
"learning_rate": 7.882273342354534e-06, |
|
"loss": 1.1598, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 16.85, |
|
"eval_accuracy": 0.7582212358242888, |
|
"eval_loss": 1.086458683013916, |
|
"eval_runtime": 2.2441, |
|
"eval_samples_per_second": 221.026, |
|
"eval_steps_per_second": 7.13, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 16.91, |
|
"learning_rate": 7.713125845737482e-06, |
|
"loss": 1.1567, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 16.91, |
|
"eval_accuracy": 0.7565769744554401, |
|
"eval_loss": 1.1091084480285645, |
|
"eval_runtime": 2.2461, |
|
"eval_samples_per_second": 220.83, |
|
"eval_steps_per_second": 7.124, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"learning_rate": 7.543978349120433e-06, |
|
"loss": 1.1643, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"eval_accuracy": 0.7521943363306939, |
|
"eval_loss": 1.1232304573059082, |
|
"eval_runtime": 1.9345, |
|
"eval_samples_per_second": 256.4, |
|
"eval_steps_per_second": 8.271, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 17.05, |
|
"learning_rate": 7.374830852503384e-06, |
|
"loss": 1.1536, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 17.05, |
|
"eval_accuracy": 0.7583081570996979, |
|
"eval_loss": 1.0930777788162231, |
|
"eval_runtime": 2.1324, |
|
"eval_samples_per_second": 232.599, |
|
"eval_steps_per_second": 7.503, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 17.12, |
|
"learning_rate": 7.205683355886334e-06, |
|
"loss": 1.1486, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 17.12, |
|
"eval_accuracy": 0.7540195062318956, |
|
"eval_loss": 1.1099752187728882, |
|
"eval_runtime": 1.917, |
|
"eval_samples_per_second": 258.744, |
|
"eval_steps_per_second": 8.347, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 17.19, |
|
"learning_rate": 7.036535859269283e-06, |
|
"loss": 1.1551, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 17.19, |
|
"eval_accuracy": 0.7537926501999014, |
|
"eval_loss": 1.1018755435943604, |
|
"eval_runtime": 2.2805, |
|
"eval_samples_per_second": 217.494, |
|
"eval_steps_per_second": 7.016, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 17.25, |
|
"learning_rate": 6.867388362652233e-06, |
|
"loss": 1.1491, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 17.25, |
|
"eval_accuracy": 0.7546221700303138, |
|
"eval_loss": 1.096489667892456, |
|
"eval_runtime": 2.0207, |
|
"eval_samples_per_second": 245.455, |
|
"eval_steps_per_second": 7.918, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 17.32, |
|
"learning_rate": 6.698240866035183e-06, |
|
"loss": 1.152, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 17.32, |
|
"eval_accuracy": 0.7590838783208054, |
|
"eval_loss": 1.0724998712539673, |
|
"eval_runtime": 2.2748, |
|
"eval_samples_per_second": 218.043, |
|
"eval_steps_per_second": 7.034, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 6.5290933694181334e-06, |
|
"loss": 1.1521, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"eval_accuracy": 0.7526631431935811, |
|
"eval_loss": 1.1246150732040405, |
|
"eval_runtime": 2.2564, |
|
"eval_samples_per_second": 219.819, |
|
"eval_steps_per_second": 7.091, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 17.46, |
|
"learning_rate": 6.359945872801083e-06, |
|
"loss": 1.1518, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 17.46, |
|
"eval_accuracy": 0.7570206230802984, |
|
"eval_loss": 1.1025118827819824, |
|
"eval_runtime": 2.2644, |
|
"eval_samples_per_second": 219.041, |
|
"eval_steps_per_second": 7.066, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"learning_rate": 6.190798376184033e-06, |
|
"loss": 1.1525, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"eval_accuracy": 0.7553470100392842, |
|
"eval_loss": 1.1027612686157227, |
|
"eval_runtime": 2.2829, |
|
"eval_samples_per_second": 217.27, |
|
"eval_steps_per_second": 7.009, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 17.59, |
|
"learning_rate": 6.021650879566982e-06, |
|
"loss": 1.1509, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 17.59, |
|
"eval_accuracy": 0.753968902322795, |
|
"eval_loss": 1.1140735149383545, |
|
"eval_runtime": 2.0112, |
|
"eval_samples_per_second": 246.621, |
|
"eval_steps_per_second": 7.956, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 17.66, |
|
"learning_rate": 5.852503382949932e-06, |
|
"loss": 1.1522, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 17.66, |
|
"eval_accuracy": 0.7523416805483493, |
|
"eval_loss": 1.1235767602920532, |
|
"eval_runtime": 2.0162, |
|
"eval_samples_per_second": 246.006, |
|
"eval_steps_per_second": 7.936, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 17.73, |
|
"learning_rate": 5.683355886332883e-06, |
|
"loss": 1.1488, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 17.73, |
|
"eval_accuracy": 0.7589817903428665, |
|
"eval_loss": 1.0937731266021729, |
|
"eval_runtime": 2.1303, |
|
"eval_samples_per_second": 232.835, |
|
"eval_steps_per_second": 7.511, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 17.79, |
|
"learning_rate": 5.514208389715832e-06, |
|
"loss": 1.1477, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 17.79, |
|
"eval_accuracy": 0.7519756032882524, |
|
"eval_loss": 1.1069520711898804, |
|
"eval_runtime": 2.1341, |
|
"eval_samples_per_second": 232.421, |
|
"eval_steps_per_second": 7.497, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"learning_rate": 5.345060893098782e-06, |
|
"loss": 1.1498, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"eval_accuracy": 0.7560714094247574, |
|
"eval_loss": 1.0885875225067139, |
|
"eval_runtime": 2.2647, |
|
"eval_samples_per_second": 219.014, |
|
"eval_steps_per_second": 7.065, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 17.93, |
|
"learning_rate": 5.175913396481733e-06, |
|
"loss": 1.1489, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 17.93, |
|
"eval_accuracy": 0.75788641382883, |
|
"eval_loss": 1.0874009132385254, |
|
"eval_runtime": 2.1397, |
|
"eval_samples_per_second": 231.808, |
|
"eval_steps_per_second": 7.478, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 5.006765899864682e-06, |
|
"loss": 1.1462, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7556547699093623, |
|
"eval_loss": 1.1015816926956177, |
|
"eval_runtime": 2.2706, |
|
"eval_samples_per_second": 218.448, |
|
"eval_steps_per_second": 7.047, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"learning_rate": 4.837618403247632e-06, |
|
"loss": 1.1448, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"eval_accuracy": 0.7546062508530094, |
|
"eval_loss": 1.0937751531600952, |
|
"eval_runtime": 2.022, |
|
"eval_samples_per_second": 245.305, |
|
"eval_steps_per_second": 7.913, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"learning_rate": 4.6684709066305826e-06, |
|
"loss": 1.1425, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"eval_accuracy": 0.7552112751822265, |
|
"eval_loss": 1.0958871841430664, |
|
"eval_runtime": 2.2598, |
|
"eval_samples_per_second": 219.486, |
|
"eval_steps_per_second": 7.08, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 18.2, |
|
"learning_rate": 4.499323410013532e-06, |
|
"loss": 1.1414, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 18.2, |
|
"eval_accuracy": 0.7558802565930149, |
|
"eval_loss": 1.0867284536361694, |
|
"eval_runtime": 2.029, |
|
"eval_samples_per_second": 244.456, |
|
"eval_steps_per_second": 7.886, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 18.27, |
|
"learning_rate": 4.330175913396482e-06, |
|
"loss": 1.1453, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 18.27, |
|
"eval_accuracy": 0.7591597591597592, |
|
"eval_loss": 1.0756407976150513, |
|
"eval_runtime": 2.1403, |
|
"eval_samples_per_second": 231.744, |
|
"eval_steps_per_second": 7.476, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 18.34, |
|
"learning_rate": 4.161028416779432e-06, |
|
"loss": 1.1448, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 18.34, |
|
"eval_accuracy": 0.7545405695862439, |
|
"eval_loss": 1.0937347412109375, |
|
"eval_runtime": 2.2479, |
|
"eval_samples_per_second": 220.651, |
|
"eval_steps_per_second": 7.118, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 3.991880920162381e-06, |
|
"loss": 1.1471, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"eval_accuracy": 0.7537506745817593, |
|
"eval_loss": 1.1153604984283447, |
|
"eval_runtime": 2.2669, |
|
"eval_samples_per_second": 218.8, |
|
"eval_steps_per_second": 7.058, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 18.47, |
|
"learning_rate": 3.822733423545332e-06, |
|
"loss": 1.1484, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 18.47, |
|
"eval_accuracy": 0.7537701926689208, |
|
"eval_loss": 1.1114356517791748, |
|
"eval_runtime": 2.0201, |
|
"eval_samples_per_second": 245.53, |
|
"eval_steps_per_second": 7.92, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 18.54, |
|
"learning_rate": 3.6535859269282817e-06, |
|
"loss": 1.1463, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 18.54, |
|
"eval_accuracy": 0.7513940144923632, |
|
"eval_loss": 1.1001887321472168, |
|
"eval_runtime": 2.1485, |
|
"eval_samples_per_second": 230.858, |
|
"eval_steps_per_second": 7.447, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 18.61, |
|
"learning_rate": 3.4844384303112316e-06, |
|
"loss": 1.1512, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 18.61, |
|
"eval_accuracy": 0.7586606950140298, |
|
"eval_loss": 1.0663777589797974, |
|
"eval_runtime": 2.1796, |
|
"eval_samples_per_second": 227.562, |
|
"eval_steps_per_second": 7.341, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"learning_rate": 3.315290933694182e-06, |
|
"loss": 1.1464, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"eval_accuracy": 0.7583911006384086, |
|
"eval_loss": 1.0735660791397095, |
|
"eval_runtime": 2.251, |
|
"eval_samples_per_second": 220.348, |
|
"eval_steps_per_second": 7.108, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 18.74, |
|
"learning_rate": 3.1461434370771314e-06, |
|
"loss": 1.1457, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 18.74, |
|
"eval_accuracy": 0.7604149648750205, |
|
"eval_loss": 1.080166220664978, |
|
"eval_runtime": 2.1301, |
|
"eval_samples_per_second": 232.857, |
|
"eval_steps_per_second": 7.512, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 18.81, |
|
"learning_rate": 2.9769959404600813e-06, |
|
"loss": 1.1464, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 18.81, |
|
"eval_accuracy": 0.75420555676145, |
|
"eval_loss": 1.1091315746307373, |
|
"eval_runtime": 2.1281, |
|
"eval_samples_per_second": 233.067, |
|
"eval_steps_per_second": 7.518, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 18.88, |
|
"learning_rate": 2.8078484438430312e-06, |
|
"loss": 1.1415, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 18.88, |
|
"eval_accuracy": 0.7594658329138073, |
|
"eval_loss": 1.0856248140335083, |
|
"eval_runtime": 2.2679, |
|
"eval_samples_per_second": 218.701, |
|
"eval_steps_per_second": 7.055, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 18.94, |
|
"learning_rate": 2.638700947225981e-06, |
|
"loss": 1.149, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 18.94, |
|
"eval_accuracy": 0.7557433607017732, |
|
"eval_loss": 1.0958749055862427, |
|
"eval_runtime": 2.1355, |
|
"eval_samples_per_second": 232.263, |
|
"eval_steps_per_second": 7.492, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 2.469553450608931e-06, |
|
"loss": 1.1445, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"eval_accuracy": 0.7600160578081092, |
|
"eval_loss": 1.0713545083999634, |
|
"eval_runtime": 2.0458, |
|
"eval_samples_per_second": 242.449, |
|
"eval_steps_per_second": 7.821, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 19.08, |
|
"learning_rate": 2.300405953991881e-06, |
|
"loss": 1.1378, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 19.08, |
|
"eval_accuracy": 0.7528535980148884, |
|
"eval_loss": 1.1179081201553345, |
|
"eval_runtime": 2.239, |
|
"eval_samples_per_second": 221.527, |
|
"eval_steps_per_second": 7.146, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"learning_rate": 2.131258457374831e-06, |
|
"loss": 1.143, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"eval_accuracy": 0.7608561044555122, |
|
"eval_loss": 1.085029125213623, |
|
"eval_runtime": 2.2698, |
|
"eval_samples_per_second": 218.525, |
|
"eval_steps_per_second": 7.049, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 19.22, |
|
"learning_rate": 1.962110960757781e-06, |
|
"loss": 1.1412, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 19.22, |
|
"eval_accuracy": 0.7571760842796552, |
|
"eval_loss": 1.1089389324188232, |
|
"eval_runtime": 2.2591, |
|
"eval_samples_per_second": 219.56, |
|
"eval_steps_per_second": 7.083, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 19.28, |
|
"learning_rate": 1.7929634641407306e-06, |
|
"loss": 1.1393, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 19.28, |
|
"eval_accuracy": 0.7580414678206476, |
|
"eval_loss": 1.095458984375, |
|
"eval_runtime": 1.8948, |
|
"eval_samples_per_second": 261.767, |
|
"eval_steps_per_second": 8.444, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 1.6238159675236807e-06, |
|
"loss": 1.1492, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"eval_accuracy": 0.755947708880288, |
|
"eval_loss": 1.0982964038848877, |
|
"eval_runtime": 2.019, |
|
"eval_samples_per_second": 245.66, |
|
"eval_steps_per_second": 7.925, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 19.42, |
|
"learning_rate": 1.4546684709066306e-06, |
|
"loss": 1.1455, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 19.42, |
|
"eval_accuracy": 0.7540966020328801, |
|
"eval_loss": 1.12480628490448, |
|
"eval_runtime": 1.9105, |
|
"eval_samples_per_second": 259.614, |
|
"eval_steps_per_second": 8.375, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 19.49, |
|
"learning_rate": 1.2855209742895805e-06, |
|
"loss": 1.1442, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 19.49, |
|
"eval_accuracy": 0.7567218409366169, |
|
"eval_loss": 1.1033666133880615, |
|
"eval_runtime": 2.1366, |
|
"eval_samples_per_second": 232.142, |
|
"eval_steps_per_second": 7.488, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 19.55, |
|
"learning_rate": 1.1163734776725304e-06, |
|
"loss": 1.1385, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 19.55, |
|
"eval_accuracy": 0.7598665473187404, |
|
"eval_loss": 1.0718320608139038, |
|
"eval_runtime": 2.1421, |
|
"eval_samples_per_second": 231.551, |
|
"eval_steps_per_second": 7.469, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 19.62, |
|
"learning_rate": 9.472259810554805e-07, |
|
"loss": 1.1393, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 19.62, |
|
"eval_accuracy": 0.7511771590321439, |
|
"eval_loss": 1.1188093423843384, |
|
"eval_runtime": 1.937, |
|
"eval_samples_per_second": 256.065, |
|
"eval_steps_per_second": 8.26, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 19.69, |
|
"learning_rate": 7.780784844384303e-07, |
|
"loss": 1.1408, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 19.69, |
|
"eval_accuracy": 0.7571148718506829, |
|
"eval_loss": 1.096737027168274, |
|
"eval_runtime": 2.128, |
|
"eval_samples_per_second": 233.085, |
|
"eval_steps_per_second": 7.519, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 19.76, |
|
"learning_rate": 6.089309878213802e-07, |
|
"loss": 1.1443, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 19.76, |
|
"eval_accuracy": 0.7525236340330075, |
|
"eval_loss": 1.115225911140442, |
|
"eval_runtime": 2.0196, |
|
"eval_samples_per_second": 245.588, |
|
"eval_steps_per_second": 7.922, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 19.82, |
|
"learning_rate": 4.397834912043302e-07, |
|
"loss": 1.1495, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 19.82, |
|
"eval_accuracy": 0.7534898820473974, |
|
"eval_loss": 1.1063731908798218, |
|
"eval_runtime": 2.0319, |
|
"eval_samples_per_second": 244.111, |
|
"eval_steps_per_second": 7.875, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 19.89, |
|
"learning_rate": 2.7063599458728015e-07, |
|
"loss": 1.1397, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 19.89, |
|
"eval_accuracy": 0.7602626366768863, |
|
"eval_loss": 1.0799843072891235, |
|
"eval_runtime": 2.3196, |
|
"eval_samples_per_second": 213.833, |
|
"eval_steps_per_second": 6.898, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 19.96, |
|
"learning_rate": 1.0148849797023004e-07, |
|
"loss": 1.1399, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 19.96, |
|
"eval_accuracy": 0.7566619534479008, |
|
"eval_loss": 1.0812491178512573, |
|
"eval_runtime": 2.2409, |
|
"eval_samples_per_second": 221.343, |
|
"eval_steps_per_second": 7.14, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 147800, |
|
"total_flos": 1.2450139383539958e+18, |
|
"train_loss": 1.2263236557646922, |
|
"train_runtime": 47907.234, |
|
"train_samples_per_second": 98.713, |
|
"train_steps_per_second": 3.085 |
|
} |
|
], |
|
"max_steps": 147800, |
|
"num_train_epochs": 20, |
|
"total_flos": 1.2450139383539958e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|