{ "best_metric": 0.7611846765843823, "best_model_checkpoint": "./finetuned/wikitext103_roberta-base_v2/checkpoint-123000", "epoch": 20.0, "global_step": 147800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 4.983085250338295e-05, "loss": 1.4212, "step": 500 }, { "epoch": 0.07, "eval_accuracy": 0.7235698186111409, "eval_loss": 1.3007760047912598, "eval_runtime": 2.6682, "eval_samples_per_second": 185.892, "eval_steps_per_second": 5.997, "step": 500 }, { "epoch": 0.14, "learning_rate": 4.96617050067659e-05, "loss": 1.3933, "step": 1000 }, { "epoch": 0.14, "eval_accuracy": 0.7226983316766515, "eval_loss": 1.2827116250991821, "eval_runtime": 2.1995, "eval_samples_per_second": 225.505, "eval_steps_per_second": 7.274, "step": 1000 }, { "epoch": 0.2, "learning_rate": 4.949255751014885e-05, "loss": 1.3917, "step": 1500 }, { "epoch": 0.2, "eval_accuracy": 0.7266257137444863, "eval_loss": 1.2815688848495483, "eval_runtime": 2.1563, "eval_samples_per_second": 230.027, "eval_steps_per_second": 7.42, "step": 1500 }, { "epoch": 0.27, "learning_rate": 4.93234100135318e-05, "loss": 1.3824, "step": 2000 }, { "epoch": 0.27, "eval_accuracy": 0.7251124131353045, "eval_loss": 1.294681191444397, "eval_runtime": 2.1464, "eval_samples_per_second": 231.089, "eval_steps_per_second": 7.454, "step": 2000 }, { "epoch": 0.34, "learning_rate": 4.915426251691475e-05, "loss": 1.3835, "step": 2500 }, { "epoch": 0.34, "eval_accuracy": 0.7289371440736602, "eval_loss": 1.2555147409439087, "eval_runtime": 2.2812, "eval_samples_per_second": 217.427, "eval_steps_per_second": 7.014, "step": 2500 }, { "epoch": 0.41, "learning_rate": 4.89851150202977e-05, "loss": 1.3758, "step": 3000 }, { "epoch": 0.41, "eval_accuracy": 0.7279413775189347, "eval_loss": 1.2611732482910156, "eval_runtime": 2.2556, "eval_samples_per_second": 219.898, "eval_steps_per_second": 7.093, "step": 3000 }, { "epoch": 0.47, "learning_rate": 4.881596752368065e-05, "loss": 1.3745, "step": 3500 }, { "epoch": 0.47, "eval_accuracy": 0.7244728228792188, "eval_loss": 1.279096007347107, "eval_runtime": 1.9103, "eval_samples_per_second": 259.647, "eval_steps_per_second": 8.376, "step": 3500 }, { "epoch": 0.54, "learning_rate": 4.86468200270636e-05, "loss": 1.3761, "step": 4000 }, { "epoch": 0.54, "eval_accuracy": 0.7286496152595643, "eval_loss": 1.2621806859970093, "eval_runtime": 2.2565, "eval_samples_per_second": 219.811, "eval_steps_per_second": 7.091, "step": 4000 }, { "epoch": 0.61, "learning_rate": 4.847767253044655e-05, "loss": 1.3735, "step": 4500 }, { "epoch": 0.61, "eval_accuracy": 0.7359971845474972, "eval_loss": 1.231848955154419, "eval_runtime": 2.2526, "eval_samples_per_second": 220.188, "eval_steps_per_second": 7.103, "step": 4500 }, { "epoch": 0.68, "learning_rate": 4.83085250338295e-05, "loss": 1.3717, "step": 5000 }, { "epoch": 0.68, "eval_accuracy": 0.7259761388286334, "eval_loss": 1.2777374982833862, "eval_runtime": 2.343, "eval_samples_per_second": 211.693, "eval_steps_per_second": 6.829, "step": 5000 }, { "epoch": 0.74, "learning_rate": 4.813937753721245e-05, "loss": 1.3675, "step": 5500 }, { "epoch": 0.74, "eval_accuracy": 0.7309145880574452, "eval_loss": 1.2589675188064575, "eval_runtime": 2.0349, "eval_samples_per_second": 243.741, "eval_steps_per_second": 7.863, "step": 5500 }, { "epoch": 0.81, "learning_rate": 4.79702300405954e-05, "loss": 1.3585, "step": 6000 }, { "epoch": 0.81, "eval_accuracy": 0.7253910822602958, "eval_loss": 1.2838590145111084, "eval_runtime": 2.2225, "eval_samples_per_second": 223.175, "eval_steps_per_second": 7.199, "step": 6000 }, { "epoch": 0.88, "learning_rate": 4.780108254397835e-05, "loss": 1.3579, "step": 6500 }, { "epoch": 0.88, "eval_accuracy": 0.7347076623797687, "eval_loss": 1.2341055870056152, "eval_runtime": 1.8958, "eval_samples_per_second": 261.625, "eval_steps_per_second": 8.44, "step": 6500 }, { "epoch": 0.95, "learning_rate": 4.76319350473613e-05, "loss": 1.3588, "step": 7000 }, { "epoch": 0.95, "eval_accuracy": 0.7326682357975821, "eval_loss": 1.2412930727005005, "eval_runtime": 2.1422, "eval_samples_per_second": 231.54, "eval_steps_per_second": 7.469, "step": 7000 }, { "epoch": 1.01, "learning_rate": 4.746278755074425e-05, "loss": 1.351, "step": 7500 }, { "epoch": 1.01, "eval_accuracy": 0.7317281968967362, "eval_loss": 1.2459222078323364, "eval_runtime": 2.2341, "eval_samples_per_second": 222.016, "eval_steps_per_second": 7.162, "step": 7500 }, { "epoch": 1.08, "learning_rate": 4.72936400541272e-05, "loss": 1.3394, "step": 8000 }, { "epoch": 1.08, "eval_accuracy": 0.7314233839745815, "eval_loss": 1.242180347442627, "eval_runtime": 2.2469, "eval_samples_per_second": 220.751, "eval_steps_per_second": 7.121, "step": 8000 }, { "epoch": 1.15, "learning_rate": 4.712449255751015e-05, "loss": 1.3429, "step": 8500 }, { "epoch": 1.15, "eval_accuracy": 0.734901599848407, "eval_loss": 1.2285393476486206, "eval_runtime": 2.1498, "eval_samples_per_second": 230.723, "eval_steps_per_second": 7.443, "step": 8500 }, { "epoch": 1.22, "learning_rate": 4.69553450608931e-05, "loss": 1.3393, "step": 9000 }, { "epoch": 1.22, "eval_accuracy": 0.7324128503075872, "eval_loss": 1.2404521703720093, "eval_runtime": 2.2579, "eval_samples_per_second": 219.671, "eval_steps_per_second": 7.086, "step": 9000 }, { "epoch": 1.29, "learning_rate": 4.678619756427605e-05, "loss": 1.3421, "step": 9500 }, { "epoch": 1.29, "eval_accuracy": 0.735434836099188, "eval_loss": 1.2255122661590576, "eval_runtime": 2.2664, "eval_samples_per_second": 218.847, "eval_steps_per_second": 7.06, "step": 9500 }, { "epoch": 1.35, "learning_rate": 4.6617050067659e-05, "loss": 1.3426, "step": 10000 }, { "epoch": 1.35, "eval_accuracy": 0.7333513221802482, "eval_loss": 1.2296382188796997, "eval_runtime": 2.1344, "eval_samples_per_second": 232.385, "eval_steps_per_second": 7.496, "step": 10000 }, { "epoch": 1.42, "learning_rate": 4.644790257104195e-05, "loss": 1.3326, "step": 10500 }, { "epoch": 1.42, "eval_accuracy": 0.7351480394040008, "eval_loss": 1.2158225774765015, "eval_runtime": 2.1342, "eval_samples_per_second": 232.406, "eval_steps_per_second": 7.497, "step": 10500 }, { "epoch": 1.49, "learning_rate": 4.62787550744249e-05, "loss": 1.3355, "step": 11000 }, { "epoch": 1.49, "eval_accuracy": 0.7364017876607805, "eval_loss": 1.2255741357803345, "eval_runtime": 2.1391, "eval_samples_per_second": 231.876, "eval_steps_per_second": 7.48, "step": 11000 }, { "epoch": 1.56, "learning_rate": 4.610960757780785e-05, "loss": 1.3324, "step": 11500 }, { "epoch": 1.56, "eval_accuracy": 0.7355829363706523, "eval_loss": 1.2208420038223267, "eval_runtime": 2.251, "eval_samples_per_second": 220.348, "eval_steps_per_second": 7.108, "step": 11500 }, { "epoch": 1.62, "learning_rate": 4.59404600811908e-05, "loss": 1.3331, "step": 12000 }, { "epoch": 1.62, "eval_accuracy": 0.7347190272757148, "eval_loss": 1.2230000495910645, "eval_runtime": 2.1329, "eval_samples_per_second": 232.552, "eval_steps_per_second": 7.502, "step": 12000 }, { "epoch": 1.69, "learning_rate": 4.577131258457375e-05, "loss": 1.3326, "step": 12500 }, { "epoch": 1.69, "eval_accuracy": 0.7316047842477829, "eval_loss": 1.250501275062561, "eval_runtime": 2.2566, "eval_samples_per_second": 219.795, "eval_steps_per_second": 7.09, "step": 12500 }, { "epoch": 1.76, "learning_rate": 4.56021650879567e-05, "loss": 1.3339, "step": 13000 }, { "epoch": 1.76, "eval_accuracy": 0.7321860715246034, "eval_loss": 1.2471247911453247, "eval_runtime": 1.8846, "eval_samples_per_second": 263.185, "eval_steps_per_second": 8.49, "step": 13000 }, { "epoch": 1.83, "learning_rate": 4.543301759133965e-05, "loss": 1.3286, "step": 13500 }, { "epoch": 1.83, "eval_accuracy": 0.7358603599923753, "eval_loss": 1.218480110168457, "eval_runtime": 2.183, "eval_samples_per_second": 227.211, "eval_steps_per_second": 7.329, "step": 13500 }, { "epoch": 1.89, "learning_rate": 4.52638700947226e-05, "loss": 1.3314, "step": 14000 }, { "epoch": 1.89, "eval_accuracy": 0.7363198956152989, "eval_loss": 1.2333292961120605, "eval_runtime": 1.922, "eval_samples_per_second": 258.068, "eval_steps_per_second": 8.325, "step": 14000 }, { "epoch": 1.96, "learning_rate": 4.509472259810555e-05, "loss": 1.325, "step": 14500 }, { "epoch": 1.96, "eval_accuracy": 0.7320171162387606, "eval_loss": 1.2384274005889893, "eval_runtime": 2.1484, "eval_samples_per_second": 230.871, "eval_steps_per_second": 7.447, "step": 14500 }, { "epoch": 2.03, "learning_rate": 4.49255751014885e-05, "loss": 1.3251, "step": 15000 }, { "epoch": 2.03, "eval_accuracy": 0.7332704232946886, "eval_loss": 1.2141916751861572, "eval_runtime": 2.1498, "eval_samples_per_second": 230.723, "eval_steps_per_second": 7.443, "step": 15000 }, { "epoch": 2.1, "learning_rate": 4.475642760487145e-05, "loss": 1.3136, "step": 15500 }, { "epoch": 2.1, "eval_accuracy": 0.7346380072100398, "eval_loss": 1.2162067890167236, "eval_runtime": 2.2669, "eval_samples_per_second": 218.8, "eval_steps_per_second": 7.058, "step": 15500 }, { "epoch": 2.17, "learning_rate": 4.45872801082544e-05, "loss": 1.3202, "step": 16000 }, { "epoch": 2.17, "eval_accuracy": 0.7368549767669357, "eval_loss": 1.220727801322937, "eval_runtime": 2.1251, "eval_samples_per_second": 233.399, "eval_steps_per_second": 7.529, "step": 16000 }, { "epoch": 2.23, "learning_rate": 4.441813261163735e-05, "loss": 1.3168, "step": 16500 }, { "epoch": 2.23, "eval_accuracy": 0.7391136589130195, "eval_loss": 1.1931146383285522, "eval_runtime": 2.2628, "eval_samples_per_second": 219.196, "eval_steps_per_second": 7.071, "step": 16500 }, { "epoch": 2.3, "learning_rate": 4.42489851150203e-05, "loss": 1.3134, "step": 17000 }, { "epoch": 2.3, "eval_accuracy": 0.7398447820343461, "eval_loss": 1.1856846809387207, "eval_runtime": 2.1315, "eval_samples_per_second": 232.703, "eval_steps_per_second": 7.507, "step": 17000 }, { "epoch": 2.37, "learning_rate": 4.407983761840325e-05, "loss": 1.3085, "step": 17500 }, { "epoch": 2.37, "eval_accuracy": 0.7383094012462748, "eval_loss": 1.2111510038375854, "eval_runtime": 2.2389, "eval_samples_per_second": 221.538, "eval_steps_per_second": 7.146, "step": 17500 }, { "epoch": 2.44, "learning_rate": 4.39106901217862e-05, "loss": 1.3165, "step": 18000 }, { "epoch": 2.44, "eval_accuracy": 0.736477152685609, "eval_loss": 1.2284483909606934, "eval_runtime": 2.2655, "eval_samples_per_second": 218.936, "eval_steps_per_second": 7.062, "step": 18000 }, { "epoch": 2.5, "learning_rate": 4.374154262516915e-05, "loss": 1.3144, "step": 18500 }, { "epoch": 2.5, "eval_accuracy": 0.7387957989256795, "eval_loss": 1.2013208866119385, "eval_runtime": 2.1477, "eval_samples_per_second": 230.94, "eval_steps_per_second": 7.45, "step": 18500 }, { "epoch": 2.57, "learning_rate": 4.35723951285521e-05, "loss": 1.319, "step": 19000 }, { "epoch": 2.57, "eval_accuracy": 0.7355637897925513, "eval_loss": 1.217348337173462, "eval_runtime": 1.8976, "eval_samples_per_second": 261.38, "eval_steps_per_second": 8.432, "step": 19000 }, { "epoch": 2.64, "learning_rate": 4.340324763193505e-05, "loss": 1.3147, "step": 19500 }, { "epoch": 2.64, "eval_accuracy": 0.7403712864559268, "eval_loss": 1.1786144971847534, "eval_runtime": 2.1417, "eval_samples_per_second": 231.588, "eval_steps_per_second": 7.471, "step": 19500 }, { "epoch": 2.71, "learning_rate": 4.3234100135318e-05, "loss": 1.311, "step": 20000 }, { "epoch": 2.71, "eval_accuracy": 0.7372879017795558, "eval_loss": 1.2008836269378662, "eval_runtime": 2.2409, "eval_samples_per_second": 221.338, "eval_steps_per_second": 7.14, "step": 20000 }, { "epoch": 2.77, "learning_rate": 4.306495263870095e-05, "loss": 1.3131, "step": 20500 }, { "epoch": 2.77, "eval_accuracy": 0.7366438077684113, "eval_loss": 1.1992290019989014, "eval_runtime": 2.3077, "eval_samples_per_second": 214.937, "eval_steps_per_second": 6.933, "step": 20500 }, { "epoch": 2.84, "learning_rate": 4.28958051420839e-05, "loss": 1.3036, "step": 21000 }, { "epoch": 2.84, "eval_accuracy": 0.7369976679863333, "eval_loss": 1.2166584730148315, "eval_runtime": 2.1243, "eval_samples_per_second": 233.491, "eval_steps_per_second": 7.532, "step": 21000 }, { "epoch": 2.91, "learning_rate": 4.272665764546685e-05, "loss": 1.3122, "step": 21500 }, { "epoch": 2.91, "eval_accuracy": 0.7378714413413875, "eval_loss": 1.2138844728469849, "eval_runtime": 2.155, "eval_samples_per_second": 230.165, "eval_steps_per_second": 7.425, "step": 21500 }, { "epoch": 2.98, "learning_rate": 4.25575101488498e-05, "loss": 1.3091, "step": 22000 }, { "epoch": 2.98, "eval_accuracy": 0.7364524804942348, "eval_loss": 1.2197295427322388, "eval_runtime": 2.0278, "eval_samples_per_second": 244.602, "eval_steps_per_second": 7.89, "step": 22000 }, { "epoch": 3.04, "learning_rate": 4.238836265223275e-05, "loss": 1.304, "step": 22500 }, { "epoch": 3.04, "eval_accuracy": 0.7371755128447044, "eval_loss": 1.186427354812622, "eval_runtime": 2.0462, "eval_samples_per_second": 242.4, "eval_steps_per_second": 7.819, "step": 22500 }, { "epoch": 3.11, "learning_rate": 4.22192151556157e-05, "loss": 1.3015, "step": 23000 }, { "epoch": 3.11, "eval_accuracy": 0.7355039424985249, "eval_loss": 1.2046276330947876, "eval_runtime": 2.0572, "eval_samples_per_second": 241.108, "eval_steps_per_second": 7.778, "step": 23000 }, { "epoch": 3.18, "learning_rate": 4.205006765899865e-05, "loss": 1.2916, "step": 23500 }, { "epoch": 3.18, "eval_accuracy": 0.7344874591057797, "eval_loss": 1.2312067747116089, "eval_runtime": 2.3523, "eval_samples_per_second": 210.856, "eval_steps_per_second": 6.802, "step": 23500 }, { "epoch": 3.25, "learning_rate": 4.18809201623816e-05, "loss": 1.2966, "step": 24000 }, { "epoch": 3.25, "eval_accuracy": 0.7372955288985823, "eval_loss": 1.2116466760635376, "eval_runtime": 2.306, "eval_samples_per_second": 215.094, "eval_steps_per_second": 6.939, "step": 24000 }, { "epoch": 3.32, "learning_rate": 4.171177266576455e-05, "loss": 1.2991, "step": 24500 }, { "epoch": 3.32, "eval_accuracy": 0.737794624029042, "eval_loss": 1.2262712717056274, "eval_runtime": 2.2208, "eval_samples_per_second": 223.344, "eval_steps_per_second": 7.205, "step": 24500 }, { "epoch": 3.38, "learning_rate": 4.15426251691475e-05, "loss": 1.3003, "step": 25000 }, { "epoch": 3.38, "eval_accuracy": 0.741288193792419, "eval_loss": 1.184373378753662, "eval_runtime": 2.336, "eval_samples_per_second": 212.325, "eval_steps_per_second": 6.849, "step": 25000 }, { "epoch": 3.45, "learning_rate": 4.137347767253045e-05, "loss": 1.2942, "step": 25500 }, { "epoch": 3.45, "eval_accuracy": 0.7368591999133871, "eval_loss": 1.195932149887085, "eval_runtime": 2.1558, "eval_samples_per_second": 230.073, "eval_steps_per_second": 7.422, "step": 25500 }, { "epoch": 3.52, "learning_rate": 4.12043301759134e-05, "loss": 1.2988, "step": 26000 }, { "epoch": 3.52, "eval_accuracy": 0.7381074306659838, "eval_loss": 1.2017642259597778, "eval_runtime": 2.1521, "eval_samples_per_second": 230.473, "eval_steps_per_second": 7.435, "step": 26000 }, { "epoch": 3.59, "learning_rate": 4.103518267929635e-05, "loss": 1.2936, "step": 26500 }, { "epoch": 3.59, "eval_accuracy": 0.7388343788536808, "eval_loss": 1.1992815732955933, "eval_runtime": 2.3209, "eval_samples_per_second": 213.713, "eval_steps_per_second": 6.894, "step": 26500 }, { "epoch": 3.65, "learning_rate": 4.08660351826793e-05, "loss": 1.2937, "step": 27000 }, { "epoch": 3.65, "eval_accuracy": 0.7358311660164716, "eval_loss": 1.2154779434204102, "eval_runtime": 2.1442, "eval_samples_per_second": 231.319, "eval_steps_per_second": 7.462, "step": 27000 }, { "epoch": 3.72, "learning_rate": 4.069688768606225e-05, "loss": 1.3021, "step": 27500 }, { "epoch": 3.72, "eval_accuracy": 0.7395591959907313, "eval_loss": 1.1794347763061523, "eval_runtime": 2.2631, "eval_samples_per_second": 219.166, "eval_steps_per_second": 7.07, "step": 27500 }, { "epoch": 3.79, "learning_rate": 4.05277401894452e-05, "loss": 1.2937, "step": 28000 }, { "epoch": 3.79, "eval_accuracy": 0.7401357600670687, "eval_loss": 1.1982717514038086, "eval_runtime": 2.0447, "eval_samples_per_second": 242.582, "eval_steps_per_second": 7.825, "step": 28000 }, { "epoch": 3.86, "learning_rate": 4.035859269282815e-05, "loss": 1.291, "step": 28500 }, { "epoch": 3.86, "eval_accuracy": 0.7448072021259288, "eval_loss": 1.1694941520690918, "eval_runtime": 2.1453, "eval_samples_per_second": 231.207, "eval_steps_per_second": 7.458, "step": 28500 }, { "epoch": 3.92, "learning_rate": 4.01894451962111e-05, "loss": 1.2932, "step": 29000 }, { "epoch": 3.92, "eval_accuracy": 0.7410137752905726, "eval_loss": 1.1980637311935425, "eval_runtime": 2.2686, "eval_samples_per_second": 218.634, "eval_steps_per_second": 7.053, "step": 29000 }, { "epoch": 3.99, "learning_rate": 4.002029769959405e-05, "loss": 1.2938, "step": 29500 }, { "epoch": 3.99, "eval_accuracy": 0.7382663617554176, "eval_loss": 1.1999621391296387, "eval_runtime": 2.1418, "eval_samples_per_second": 231.579, "eval_steps_per_second": 7.47, "step": 29500 }, { "epoch": 4.06, "learning_rate": 3.9851150202977e-05, "loss": 1.2789, "step": 30000 }, { "epoch": 4.06, "eval_accuracy": 0.7402127426252879, "eval_loss": 1.1918007135391235, "eval_runtime": 2.3184, "eval_samples_per_second": 213.944, "eval_steps_per_second": 6.901, "step": 30000 }, { "epoch": 4.13, "learning_rate": 3.968200270635995e-05, "loss": 1.2806, "step": 30500 }, { "epoch": 4.13, "eval_accuracy": 0.7368392751519062, "eval_loss": 1.2065249681472778, "eval_runtime": 2.1671, "eval_samples_per_second": 228.872, "eval_steps_per_second": 7.383, "step": 30500 }, { "epoch": 4.19, "learning_rate": 3.95128552097429e-05, "loss": 1.2799, "step": 31000 }, { "epoch": 4.19, "eval_accuracy": 0.7374173525839968, "eval_loss": 1.2035958766937256, "eval_runtime": 2.0293, "eval_samples_per_second": 244.417, "eval_steps_per_second": 7.884, "step": 31000 }, { "epoch": 4.26, "learning_rate": 3.934370771312585e-05, "loss": 1.2851, "step": 31500 }, { "epoch": 4.26, "eval_accuracy": 0.7374529736652525, "eval_loss": 1.2056316137313843, "eval_runtime": 2.2747, "eval_samples_per_second": 218.047, "eval_steps_per_second": 7.034, "step": 31500 }, { "epoch": 4.33, "learning_rate": 3.91745602165088e-05, "loss": 1.2789, "step": 32000 }, { "epoch": 4.33, "eval_accuracy": 0.7414960437229791, "eval_loss": 1.185698390007019, "eval_runtime": 2.1279, "eval_samples_per_second": 233.09, "eval_steps_per_second": 7.519, "step": 32000 }, { "epoch": 4.4, "learning_rate": 3.900541271989175e-05, "loss": 1.2847, "step": 32500 }, { "epoch": 4.4, "eval_accuracy": 0.7375549926676443, "eval_loss": 1.1947497129440308, "eval_runtime": 2.2844, "eval_samples_per_second": 217.128, "eval_steps_per_second": 7.004, "step": 32500 }, { "epoch": 4.47, "learning_rate": 3.88362652232747e-05, "loss": 1.2843, "step": 33000 }, { "epoch": 4.47, "eval_accuracy": 0.7398512049167071, "eval_loss": 1.1868607997894287, "eval_runtime": 1.8928, "eval_samples_per_second": 262.041, "eval_steps_per_second": 8.453, "step": 33000 }, { "epoch": 4.53, "learning_rate": 3.866711772665765e-05, "loss": 1.2822, "step": 33500 }, { "epoch": 4.53, "eval_accuracy": 0.738583059254866, "eval_loss": 1.1962590217590332, "eval_runtime": 2.3042, "eval_samples_per_second": 215.256, "eval_steps_per_second": 6.944, "step": 33500 }, { "epoch": 4.6, "learning_rate": 3.84979702300406e-05, "loss": 1.2755, "step": 34000 }, { "epoch": 4.6, "eval_accuracy": 0.7423808354478731, "eval_loss": 1.189677357673645, "eval_runtime": 2.1343, "eval_samples_per_second": 232.399, "eval_steps_per_second": 7.497, "step": 34000 }, { "epoch": 4.67, "learning_rate": 3.832882273342355e-05, "loss": 1.283, "step": 34500 }, { "epoch": 4.67, "eval_accuracy": 0.7438030006523157, "eval_loss": 1.1673452854156494, "eval_runtime": 2.1405, "eval_samples_per_second": 231.723, "eval_steps_per_second": 7.475, "step": 34500 }, { "epoch": 4.74, "learning_rate": 3.81596752368065e-05, "loss": 1.2765, "step": 35000 }, { "epoch": 4.74, "eval_accuracy": 0.7418567866813223, "eval_loss": 1.1855015754699707, "eval_runtime": 2.2371, "eval_samples_per_second": 221.718, "eval_steps_per_second": 7.152, "step": 35000 }, { "epoch": 4.8, "learning_rate": 3.799052774018945e-05, "loss": 1.2762, "step": 35500 }, { "epoch": 4.8, "eval_accuracy": 0.7412275877241228, "eval_loss": 1.1773431301116943, "eval_runtime": 2.1867, "eval_samples_per_second": 226.824, "eval_steps_per_second": 7.317, "step": 35500 }, { "epoch": 4.87, "learning_rate": 3.7821380243572397e-05, "loss": 1.2776, "step": 36000 }, { "epoch": 4.87, "eval_accuracy": 0.740787246819894, "eval_loss": 1.1897586584091187, "eval_runtime": 1.9025, "eval_samples_per_second": 260.712, "eval_steps_per_second": 8.41, "step": 36000 }, { "epoch": 4.94, "learning_rate": 3.7652232746955347e-05, "loss": 1.2847, "step": 36500 }, { "epoch": 4.94, "eval_accuracy": 0.7437667084947351, "eval_loss": 1.1624772548675537, "eval_runtime": 1.9202, "eval_samples_per_second": 258.302, "eval_steps_per_second": 8.332, "step": 36500 }, { "epoch": 5.01, "learning_rate": 3.7483085250338296e-05, "loss": 1.2732, "step": 37000 }, { "epoch": 5.01, "eval_accuracy": 0.7396705597179374, "eval_loss": 1.194719672203064, "eval_runtime": 2.2563, "eval_samples_per_second": 219.831, "eval_steps_per_second": 7.091, "step": 37000 }, { "epoch": 5.07, "learning_rate": 3.7313937753721246e-05, "loss": 1.2667, "step": 37500 }, { "epoch": 5.07, "eval_accuracy": 0.7384741591468417, "eval_loss": 1.2097489833831787, "eval_runtime": 2.0162, "eval_samples_per_second": 246.011, "eval_steps_per_second": 7.936, "step": 37500 }, { "epoch": 5.14, "learning_rate": 3.7144790257104196e-05, "loss": 1.2678, "step": 38000 }, { "epoch": 5.14, "eval_accuracy": 0.7397711324624852, "eval_loss": 1.187340497970581, "eval_runtime": 1.9242, "eval_samples_per_second": 257.772, "eval_steps_per_second": 8.315, "step": 38000 }, { "epoch": 5.21, "learning_rate": 3.6975642760487146e-05, "loss": 1.2681, "step": 38500 }, { "epoch": 5.21, "eval_accuracy": 0.7467894879436467, "eval_loss": 1.1681954860687256, "eval_runtime": 2.1385, "eval_samples_per_second": 231.938, "eval_steps_per_second": 7.482, "step": 38500 }, { "epoch": 5.28, "learning_rate": 3.6806495263870096e-05, "loss": 1.2699, "step": 39000 }, { "epoch": 5.28, "eval_accuracy": 0.745684382221014, "eval_loss": 1.1739610433578491, "eval_runtime": 1.9046, "eval_samples_per_second": 260.416, "eval_steps_per_second": 8.401, "step": 39000 }, { "epoch": 5.35, "learning_rate": 3.6637347767253046e-05, "loss": 1.2675, "step": 39500 }, { "epoch": 5.35, "eval_accuracy": 0.7378905091781449, "eval_loss": 1.212327003479004, "eval_runtime": 2.151, "eval_samples_per_second": 230.592, "eval_steps_per_second": 7.438, "step": 39500 }, { "epoch": 5.41, "learning_rate": 3.6468200270635996e-05, "loss": 1.2604, "step": 40000 }, { "epoch": 5.41, "eval_accuracy": 0.7395626782561456, "eval_loss": 1.195254921913147, "eval_runtime": 2.1404, "eval_samples_per_second": 231.731, "eval_steps_per_second": 7.475, "step": 40000 }, { "epoch": 5.48, "learning_rate": 3.6299052774018946e-05, "loss": 1.2688, "step": 40500 }, { "epoch": 5.48, "eval_accuracy": 0.7397589090237662, "eval_loss": 1.1849150657653809, "eval_runtime": 2.1374, "eval_samples_per_second": 232.054, "eval_steps_per_second": 7.486, "step": 40500 }, { "epoch": 5.55, "learning_rate": 3.6129905277401896e-05, "loss": 1.2698, "step": 41000 }, { "epoch": 5.55, "eval_accuracy": 0.7413877684508885, "eval_loss": 1.1708790063858032, "eval_runtime": 2.1318, "eval_samples_per_second": 232.668, "eval_steps_per_second": 7.505, "step": 41000 }, { "epoch": 5.62, "learning_rate": 3.5960757780784846e-05, "loss": 1.2689, "step": 41500 }, { "epoch": 5.62, "eval_accuracy": 0.7438135277526475, "eval_loss": 1.1763643026351929, "eval_runtime": 1.9258, "eval_samples_per_second": 257.551, "eval_steps_per_second": 8.308, "step": 41500 }, { "epoch": 5.68, "learning_rate": 3.5791610284167796e-05, "loss": 1.269, "step": 42000 }, { "epoch": 5.68, "eval_accuracy": 0.7409149325968664, "eval_loss": 1.1824229955673218, "eval_runtime": 2.2453, "eval_samples_per_second": 220.905, "eval_steps_per_second": 7.126, "step": 42000 }, { "epoch": 5.75, "learning_rate": 3.5622462787550746e-05, "loss": 1.2715, "step": 42500 }, { "epoch": 5.75, "eval_accuracy": 0.7408733194884687, "eval_loss": 1.178514003753662, "eval_runtime": 2.0475, "eval_samples_per_second": 242.248, "eval_steps_per_second": 7.814, "step": 42500 }, { "epoch": 5.82, "learning_rate": 3.5453315290933695e-05, "loss": 1.2628, "step": 43000 }, { "epoch": 5.82, "eval_accuracy": 0.7433914472797822, "eval_loss": 1.173943281173706, "eval_runtime": 2.1375, "eval_samples_per_second": 232.048, "eval_steps_per_second": 7.485, "step": 43000 }, { "epoch": 5.89, "learning_rate": 3.5284167794316645e-05, "loss": 1.2617, "step": 43500 }, { "epoch": 5.89, "eval_accuracy": 0.7406168909338969, "eval_loss": 1.1814693212509155, "eval_runtime": 2.1357, "eval_samples_per_second": 232.237, "eval_steps_per_second": 7.492, "step": 43500 }, { "epoch": 5.95, "learning_rate": 3.5115020297699595e-05, "loss": 1.2565, "step": 44000 }, { "epoch": 5.95, "eval_accuracy": 0.7414824236191919, "eval_loss": 1.1885017156600952, "eval_runtime": 2.2461, "eval_samples_per_second": 220.826, "eval_steps_per_second": 7.123, "step": 44000 }, { "epoch": 6.02, "learning_rate": 3.4945872801082545e-05, "loss": 1.2639, "step": 44500 }, { "epoch": 6.02, "eval_accuracy": 0.741952133873027, "eval_loss": 1.1781718730926514, "eval_runtime": 2.0174, "eval_samples_per_second": 245.859, "eval_steps_per_second": 7.931, "step": 44500 }, { "epoch": 6.09, "learning_rate": 3.4776725304465495e-05, "loss": 1.2557, "step": 45000 }, { "epoch": 6.09, "eval_accuracy": 0.7382356866408648, "eval_loss": 1.2061494588851929, "eval_runtime": 2.2612, "eval_samples_per_second": 219.356, "eval_steps_per_second": 7.076, "step": 45000 }, { "epoch": 6.16, "learning_rate": 3.4607577807848445e-05, "loss": 1.2503, "step": 45500 }, { "epoch": 6.16, "eval_accuracy": 0.739681675962454, "eval_loss": 1.1741236448287964, "eval_runtime": 2.1411, "eval_samples_per_second": 231.661, "eval_steps_per_second": 7.473, "step": 45500 }, { "epoch": 6.22, "learning_rate": 3.4438430311231395e-05, "loss": 1.2514, "step": 46000 }, { "epoch": 6.22, "eval_accuracy": 0.7435828154552824, "eval_loss": 1.167312741279602, "eval_runtime": 2.0431, "eval_samples_per_second": 242.763, "eval_steps_per_second": 7.831, "step": 46000 }, { "epoch": 6.29, "learning_rate": 3.4269282814614345e-05, "loss": 1.254, "step": 46500 }, { "epoch": 6.29, "eval_accuracy": 0.7399956502827316, "eval_loss": 1.1828943490982056, "eval_runtime": 2.2651, "eval_samples_per_second": 218.976, "eval_steps_per_second": 7.064, "step": 46500 }, { "epoch": 6.36, "learning_rate": 3.4100135317997295e-05, "loss": 1.2583, "step": 47000 }, { "epoch": 6.36, "eval_accuracy": 0.7390757539268417, "eval_loss": 1.1776684522628784, "eval_runtime": 2.0336, "eval_samples_per_second": 243.904, "eval_steps_per_second": 7.868, "step": 47000 }, { "epoch": 6.43, "learning_rate": 3.3930987821380245e-05, "loss": 1.2518, "step": 47500 }, { "epoch": 6.43, "eval_accuracy": 0.7411625020238545, "eval_loss": 1.1892728805541992, "eval_runtime": 2.2474, "eval_samples_per_second": 220.698, "eval_steps_per_second": 7.119, "step": 47500 }, { "epoch": 6.5, "learning_rate": 3.3761840324763195e-05, "loss": 1.2519, "step": 48000 }, { "epoch": 6.5, "eval_accuracy": 0.7410831524506257, "eval_loss": 1.1775306463241577, "eval_runtime": 2.127, "eval_samples_per_second": 233.19, "eval_steps_per_second": 7.522, "step": 48000 }, { "epoch": 6.56, "learning_rate": 3.3592692828146145e-05, "loss": 1.2477, "step": 48500 }, { "epoch": 6.56, "eval_accuracy": 0.7451821862348178, "eval_loss": 1.1809273958206177, "eval_runtime": 1.902, "eval_samples_per_second": 260.776, "eval_steps_per_second": 8.412, "step": 48500 }, { "epoch": 6.63, "learning_rate": 3.3423545331529095e-05, "loss": 1.2546, "step": 49000 }, { "epoch": 6.63, "eval_accuracy": 0.7455485978763953, "eval_loss": 1.1651870012283325, "eval_runtime": 2.1247, "eval_samples_per_second": 233.443, "eval_steps_per_second": 7.53, "step": 49000 }, { "epoch": 6.7, "learning_rate": 3.3254397834912044e-05, "loss": 1.2564, "step": 49500 }, { "epoch": 6.7, "eval_accuracy": 0.7435488746599247, "eval_loss": 1.1729925870895386, "eval_runtime": 2.2521, "eval_samples_per_second": 220.235, "eval_steps_per_second": 7.104, "step": 49500 }, { "epoch": 6.77, "learning_rate": 3.3085250338294994e-05, "loss": 1.254, "step": 50000 }, { "epoch": 6.77, "eval_accuracy": 0.7427022407392571, "eval_loss": 1.1740801334381104, "eval_runtime": 2.2515, "eval_samples_per_second": 220.294, "eval_steps_per_second": 7.106, "step": 50000 }, { "epoch": 6.83, "learning_rate": 3.2916102841677944e-05, "loss": 1.2495, "step": 50500 }, { "epoch": 6.83, "eval_accuracy": 0.7475704632944787, "eval_loss": 1.1539645195007324, "eval_runtime": 2.1379, "eval_samples_per_second": 231.999, "eval_steps_per_second": 7.484, "step": 50500 }, { "epoch": 6.9, "learning_rate": 3.2746955345060894e-05, "loss": 1.2502, "step": 51000 }, { "epoch": 6.9, "eval_accuracy": 0.7488099797559774, "eval_loss": 1.145354151725769, "eval_runtime": 2.0467, "eval_samples_per_second": 242.344, "eval_steps_per_second": 7.818, "step": 51000 }, { "epoch": 6.97, "learning_rate": 3.2577807848443844e-05, "loss": 1.2527, "step": 51500 }, { "epoch": 6.97, "eval_accuracy": 0.7429261278858414, "eval_loss": 1.1704862117767334, "eval_runtime": 1.8944, "eval_samples_per_second": 261.83, "eval_steps_per_second": 8.446, "step": 51500 }, { "epoch": 7.04, "learning_rate": 3.2408660351826794e-05, "loss": 1.2418, "step": 52000 }, { "epoch": 7.04, "eval_accuracy": 0.7441042170292774, "eval_loss": 1.1714463233947754, "eval_runtime": 2.2491, "eval_samples_per_second": 220.532, "eval_steps_per_second": 7.114, "step": 52000 }, { "epoch": 7.1, "learning_rate": 3.2239512855209744e-05, "loss": 1.2386, "step": 52500 }, { "epoch": 7.1, "eval_accuracy": 0.74550079317324, "eval_loss": 1.1619137525558472, "eval_runtime": 2.2788, "eval_samples_per_second": 217.662, "eval_steps_per_second": 7.021, "step": 52500 }, { "epoch": 7.17, "learning_rate": 3.2070365358592694e-05, "loss": 1.2407, "step": 53000 }, { "epoch": 7.17, "eval_accuracy": 0.7428433966802983, "eval_loss": 1.1702818870544434, "eval_runtime": 2.2482, "eval_samples_per_second": 220.624, "eval_steps_per_second": 7.117, "step": 53000 }, { "epoch": 7.24, "learning_rate": 3.1901217861975644e-05, "loss": 1.2429, "step": 53500 }, { "epoch": 7.24, "eval_accuracy": 0.7437382207533255, "eval_loss": 1.1596566438674927, "eval_runtime": 2.0269, "eval_samples_per_second": 244.711, "eval_steps_per_second": 7.894, "step": 53500 }, { "epoch": 7.31, "learning_rate": 3.1732070365358594e-05, "loss": 1.2398, "step": 54000 }, { "epoch": 7.31, "eval_accuracy": 0.7411157814291173, "eval_loss": 1.1802175045013428, "eval_runtime": 1.903, "eval_samples_per_second": 260.643, "eval_steps_per_second": 8.408, "step": 54000 }, { "epoch": 7.37, "learning_rate": 3.1562922868741544e-05, "loss": 1.2507, "step": 54500 }, { "epoch": 7.37, "eval_accuracy": 0.7465291873021028, "eval_loss": 1.153898000717163, "eval_runtime": 2.134, "eval_samples_per_second": 232.429, "eval_steps_per_second": 7.498, "step": 54500 }, { "epoch": 7.44, "learning_rate": 3.1393775372124494e-05, "loss": 1.2369, "step": 55000 }, { "epoch": 7.44, "eval_accuracy": 0.7421205732433082, "eval_loss": 1.1711477041244507, "eval_runtime": 2.2417, "eval_samples_per_second": 221.263, "eval_steps_per_second": 7.138, "step": 55000 }, { "epoch": 7.51, "learning_rate": 3.1224627875507443e-05, "loss": 1.2463, "step": 55500 }, { "epoch": 7.51, "eval_accuracy": 0.7408580787198625, "eval_loss": 1.1848827600479126, "eval_runtime": 2.2658, "eval_samples_per_second": 218.909, "eval_steps_per_second": 7.062, "step": 55500 }, { "epoch": 7.58, "learning_rate": 3.1055480378890393e-05, "loss": 1.2389, "step": 56000 }, { "epoch": 7.58, "eval_accuracy": 0.7447417175239756, "eval_loss": 1.172045111656189, "eval_runtime": 2.1226, "eval_samples_per_second": 233.68, "eval_steps_per_second": 7.538, "step": 56000 }, { "epoch": 7.65, "learning_rate": 3.088633288227334e-05, "loss": 1.2395, "step": 56500 }, { "epoch": 7.65, "eval_accuracy": 0.7455846610856063, "eval_loss": 1.1613755226135254, "eval_runtime": 2.2492, "eval_samples_per_second": 220.523, "eval_steps_per_second": 7.114, "step": 56500 }, { "epoch": 7.71, "learning_rate": 3.071718538565629e-05, "loss": 1.2429, "step": 57000 }, { "epoch": 7.71, "eval_accuracy": 0.7459984960790633, "eval_loss": 1.1604408025741577, "eval_runtime": 2.2523, "eval_samples_per_second": 220.221, "eval_steps_per_second": 7.104, "step": 57000 }, { "epoch": 7.78, "learning_rate": 3.054803788903924e-05, "loss": 1.2384, "step": 57500 }, { "epoch": 7.78, "eval_accuracy": 0.7408438637823945, "eval_loss": 1.1852344274520874, "eval_runtime": 2.2645, "eval_samples_per_second": 219.035, "eval_steps_per_second": 7.066, "step": 57500 }, { "epoch": 7.85, "learning_rate": 3.0378890392422193e-05, "loss": 1.2419, "step": 58000 }, { "epoch": 7.85, "eval_accuracy": 0.7460735114607351, "eval_loss": 1.1592859029769897, "eval_runtime": 2.2667, "eval_samples_per_second": 218.824, "eval_steps_per_second": 7.059, "step": 58000 }, { "epoch": 7.92, "learning_rate": 3.0209742895805143e-05, "loss": 1.2381, "step": 58500 }, { "epoch": 7.92, "eval_accuracy": 0.7454180674547229, "eval_loss": 1.161791205406189, "eval_runtime": 2.2508, "eval_samples_per_second": 220.362, "eval_steps_per_second": 7.108, "step": 58500 }, { "epoch": 7.98, "learning_rate": 3.0040595399188093e-05, "loss": 1.2384, "step": 59000 }, { "epoch": 7.98, "eval_accuracy": 0.7445992935958163, "eval_loss": 1.1550912857055664, "eval_runtime": 2.2277, "eval_samples_per_second": 222.654, "eval_steps_per_second": 7.182, "step": 59000 }, { "epoch": 8.05, "learning_rate": 2.9871447902571043e-05, "loss": 1.2314, "step": 59500 }, { "epoch": 8.05, "eval_accuracy": 0.7451252345598434, "eval_loss": 1.1473671197891235, "eval_runtime": 2.0323, "eval_samples_per_second": 244.059, "eval_steps_per_second": 7.873, "step": 59500 }, { "epoch": 8.12, "learning_rate": 2.9702300405953993e-05, "loss": 1.2277, "step": 60000 }, { "epoch": 8.12, "eval_accuracy": 0.7435493080290383, "eval_loss": 1.1636135578155518, "eval_runtime": 2.2565, "eval_samples_per_second": 219.81, "eval_steps_per_second": 7.091, "step": 60000 }, { "epoch": 8.19, "learning_rate": 2.9533152909336943e-05, "loss": 1.23, "step": 60500 }, { "epoch": 8.19, "eval_accuracy": 0.7482466354355656, "eval_loss": 1.1545356512069702, "eval_runtime": 2.1398, "eval_samples_per_second": 231.799, "eval_steps_per_second": 7.477, "step": 60500 }, { "epoch": 8.25, "learning_rate": 2.9364005412719893e-05, "loss": 1.2292, "step": 61000 }, { "epoch": 8.25, "eval_accuracy": 0.7456762809270702, "eval_loss": 1.169358730316162, "eval_runtime": 2.2505, "eval_samples_per_second": 220.392, "eval_steps_per_second": 7.109, "step": 61000 }, { "epoch": 8.32, "learning_rate": 2.9194857916102843e-05, "loss": 1.2337, "step": 61500 }, { "epoch": 8.32, "eval_accuracy": 0.7437165882071332, "eval_loss": 1.1681973934173584, "eval_runtime": 2.2595, "eval_samples_per_second": 219.518, "eval_steps_per_second": 7.081, "step": 61500 }, { "epoch": 8.39, "learning_rate": 2.9025710419485792e-05, "loss": 1.2274, "step": 62000 }, { "epoch": 8.39, "eval_accuracy": 0.7484281932495036, "eval_loss": 1.1518677473068237, "eval_runtime": 2.0234, "eval_samples_per_second": 245.13, "eval_steps_per_second": 7.907, "step": 62000 }, { "epoch": 8.46, "learning_rate": 2.885656292286874e-05, "loss": 1.232, "step": 62500 }, { "epoch": 8.46, "eval_accuracy": 0.7435426377844804, "eval_loss": 1.1693381071090698, "eval_runtime": 2.153, "eval_samples_per_second": 230.381, "eval_steps_per_second": 7.432, "step": 62500 }, { "epoch": 8.53, "learning_rate": 2.868741542625169e-05, "loss": 1.2315, "step": 63000 }, { "epoch": 8.53, "eval_accuracy": 0.7434497229246247, "eval_loss": 1.1637970209121704, "eval_runtime": 2.1389, "eval_samples_per_second": 231.9, "eval_steps_per_second": 7.481, "step": 63000 }, { "epoch": 8.59, "learning_rate": 2.851826792963464e-05, "loss": 1.2293, "step": 63500 }, { "epoch": 8.59, "eval_accuracy": 0.746056909476852, "eval_loss": 1.1639689207077026, "eval_runtime": 1.9056, "eval_samples_per_second": 260.282, "eval_steps_per_second": 8.396, "step": 63500 }, { "epoch": 8.66, "learning_rate": 2.8349120433017595e-05, "loss": 1.2287, "step": 64000 }, { "epoch": 8.66, "eval_accuracy": 0.7519274622651754, "eval_loss": 1.146359920501709, "eval_runtime": 2.1418, "eval_samples_per_second": 231.582, "eval_steps_per_second": 7.47, "step": 64000 }, { "epoch": 8.73, "learning_rate": 2.8179972936400545e-05, "loss": 1.2283, "step": 64500 }, { "epoch": 8.73, "eval_accuracy": 0.7480988335904306, "eval_loss": 1.1439129114151, "eval_runtime": 2.1515, "eval_samples_per_second": 230.532, "eval_steps_per_second": 7.437, "step": 64500 }, { "epoch": 8.8, "learning_rate": 2.8010825439783495e-05, "loss": 1.2279, "step": 65000 }, { "epoch": 8.8, "eval_accuracy": 0.7476770091832853, "eval_loss": 1.1496102809906006, "eval_runtime": 2.1443, "eval_samples_per_second": 231.311, "eval_steps_per_second": 7.462, "step": 65000 }, { "epoch": 8.86, "learning_rate": 2.7841677943166445e-05, "loss": 1.2276, "step": 65500 }, { "epoch": 8.86, "eval_accuracy": 0.7448800151502855, "eval_loss": 1.1544512510299683, "eval_runtime": 2.1443, "eval_samples_per_second": 231.308, "eval_steps_per_second": 7.462, "step": 65500 }, { "epoch": 8.93, "learning_rate": 2.7672530446549395e-05, "loss": 1.2301, "step": 66000 }, { "epoch": 8.93, "eval_accuracy": 0.7486796972831709, "eval_loss": 1.131188154220581, "eval_runtime": 2.2612, "eval_samples_per_second": 219.354, "eval_steps_per_second": 7.076, "step": 66000 }, { "epoch": 9.0, "learning_rate": 2.7503382949932345e-05, "loss": 1.2248, "step": 66500 }, { "epoch": 9.0, "eval_accuracy": 0.7464872620949183, "eval_loss": 1.1444239616394043, "eval_runtime": 1.9117, "eval_samples_per_second": 259.451, "eval_steps_per_second": 8.369, "step": 66500 }, { "epoch": 9.07, "learning_rate": 2.7334235453315295e-05, "loss": 1.2266, "step": 67000 }, { "epoch": 9.07, "eval_accuracy": 0.7430061513773736, "eval_loss": 1.1525160074234009, "eval_runtime": 2.256, "eval_samples_per_second": 219.857, "eval_steps_per_second": 7.092, "step": 67000 }, { "epoch": 9.13, "learning_rate": 2.716508795669824e-05, "loss": 1.2198, "step": 67500 }, { "epoch": 9.13, "eval_accuracy": 0.7462388784038825, "eval_loss": 1.1551423072814941, "eval_runtime": 2.2513, "eval_samples_per_second": 220.317, "eval_steps_per_second": 7.107, "step": 67500 }, { "epoch": 9.2, "learning_rate": 2.699594046008119e-05, "loss": 1.219, "step": 68000 }, { "epoch": 9.2, "eval_accuracy": 0.7479334406870639, "eval_loss": 1.143385887145996, "eval_runtime": 2.2558, "eval_samples_per_second": 219.879, "eval_steps_per_second": 7.093, "step": 68000 }, { "epoch": 9.27, "learning_rate": 2.682679296346414e-05, "loss": 1.2212, "step": 68500 }, { "epoch": 9.27, "eval_accuracy": 0.7415982885151786, "eval_loss": 1.1707236766815186, "eval_runtime": 2.2452, "eval_samples_per_second": 220.92, "eval_steps_per_second": 7.126, "step": 68500 }, { "epoch": 9.34, "learning_rate": 2.665764546684709e-05, "loss": 1.2265, "step": 69000 }, { "epoch": 9.34, "eval_accuracy": 0.7421521035598706, "eval_loss": 1.1743712425231934, "eval_runtime": 2.1289, "eval_samples_per_second": 232.985, "eval_steps_per_second": 7.516, "step": 69000 }, { "epoch": 9.4, "learning_rate": 2.648849797023004e-05, "loss": 1.2216, "step": 69500 }, { "epoch": 9.4, "eval_accuracy": 0.7392663666010835, "eval_loss": 1.1817814111709595, "eval_runtime": 2.1373, "eval_samples_per_second": 232.069, "eval_steps_per_second": 7.486, "step": 69500 }, { "epoch": 9.47, "learning_rate": 2.631935047361299e-05, "loss": 1.2226, "step": 70000 }, { "epoch": 9.47, "eval_accuracy": 0.7454341644794401, "eval_loss": 1.1662167310714722, "eval_runtime": 2.1776, "eval_samples_per_second": 227.77, "eval_steps_per_second": 7.347, "step": 70000 }, { "epoch": 9.54, "learning_rate": 2.615020297699594e-05, "loss": 1.2224, "step": 70500 }, { "epoch": 9.54, "eval_accuracy": 0.7460155894249055, "eval_loss": 1.1345940828323364, "eval_runtime": 2.131, "eval_samples_per_second": 232.753, "eval_steps_per_second": 7.508, "step": 70500 }, { "epoch": 9.61, "learning_rate": 2.598105548037889e-05, "loss": 1.2186, "step": 71000 }, { "epoch": 9.61, "eval_accuracy": 0.7462514417531718, "eval_loss": 1.153380036354065, "eval_runtime": 2.3136, "eval_samples_per_second": 214.386, "eval_steps_per_second": 6.916, "step": 71000 }, { "epoch": 9.68, "learning_rate": 2.581190798376184e-05, "loss": 1.2179, "step": 71500 }, { "epoch": 9.68, "eval_accuracy": 0.7477791705270042, "eval_loss": 1.1399047374725342, "eval_runtime": 2.2709, "eval_samples_per_second": 218.413, "eval_steps_per_second": 7.046, "step": 71500 }, { "epoch": 9.74, "learning_rate": 2.564276048714479e-05, "loss": 1.2177, "step": 72000 }, { "epoch": 9.74, "eval_accuracy": 0.7441804462995666, "eval_loss": 1.1545348167419434, "eval_runtime": 2.2419, "eval_samples_per_second": 221.237, "eval_steps_per_second": 7.137, "step": 72000 }, { "epoch": 9.81, "learning_rate": 2.547361299052774e-05, "loss": 1.2154, "step": 72500 }, { "epoch": 9.81, "eval_accuracy": 0.7426768214742224, "eval_loss": 1.171052098274231, "eval_runtime": 2.1267, "eval_samples_per_second": 233.224, "eval_steps_per_second": 7.523, "step": 72500 }, { "epoch": 9.88, "learning_rate": 2.530446549391069e-05, "loss": 1.2179, "step": 73000 }, { "epoch": 9.88, "eval_accuracy": 0.7514139509830325, "eval_loss": 1.1348686218261719, "eval_runtime": 1.8985, "eval_samples_per_second": 261.263, "eval_steps_per_second": 8.428, "step": 73000 }, { "epoch": 9.95, "learning_rate": 2.513531799729364e-05, "loss": 1.2184, "step": 73500 }, { "epoch": 9.95, "eval_accuracy": 0.749460868615729, "eval_loss": 1.1427435874938965, "eval_runtime": 2.1257, "eval_samples_per_second": 233.336, "eval_steps_per_second": 7.527, "step": 73500 }, { "epoch": 10.01, "learning_rate": 2.496617050067659e-05, "loss": 1.2193, "step": 74000 }, { "epoch": 10.01, "eval_accuracy": 0.7494911077780159, "eval_loss": 1.1222712993621826, "eval_runtime": 2.0347, "eval_samples_per_second": 243.77, "eval_steps_per_second": 7.864, "step": 74000 }, { "epoch": 10.08, "learning_rate": 2.479702300405954e-05, "loss": 1.2063, "step": 74500 }, { "epoch": 10.08, "eval_accuracy": 0.7488264163021444, "eval_loss": 1.1357399225234985, "eval_runtime": 1.9046, "eval_samples_per_second": 260.423, "eval_steps_per_second": 8.401, "step": 74500 }, { "epoch": 10.15, "learning_rate": 2.462787550744249e-05, "loss": 1.2025, "step": 75000 }, { "epoch": 10.15, "eval_accuracy": 0.7486311066000695, "eval_loss": 1.1476197242736816, "eval_runtime": 2.1786, "eval_samples_per_second": 227.67, "eval_steps_per_second": 7.344, "step": 75000 }, { "epoch": 10.22, "learning_rate": 2.445872801082544e-05, "loss": 1.2097, "step": 75500 }, { "epoch": 10.22, "eval_accuracy": 0.7492516383053316, "eval_loss": 1.1382330656051636, "eval_runtime": 2.1295, "eval_samples_per_second": 232.922, "eval_steps_per_second": 7.514, "step": 75500 }, { "epoch": 10.28, "learning_rate": 2.428958051420839e-05, "loss": 1.2106, "step": 76000 }, { "epoch": 10.28, "eval_accuracy": 0.7500204253928484, "eval_loss": 1.1413904428482056, "eval_runtime": 2.1753, "eval_samples_per_second": 228.019, "eval_steps_per_second": 7.355, "step": 76000 }, { "epoch": 10.35, "learning_rate": 2.412043301759134e-05, "loss": 1.2146, "step": 76500 }, { "epoch": 10.35, "eval_accuracy": 0.7533006412674462, "eval_loss": 1.113772988319397, "eval_runtime": 2.136, "eval_samples_per_second": 232.208, "eval_steps_per_second": 7.491, "step": 76500 }, { "epoch": 10.42, "learning_rate": 2.395128552097429e-05, "loss": 1.2129, "step": 77000 }, { "epoch": 10.42, "eval_accuracy": 0.7477787948952668, "eval_loss": 1.1447216272354126, "eval_runtime": 2.2641, "eval_samples_per_second": 219.075, "eval_steps_per_second": 7.067, "step": 77000 }, { "epoch": 10.49, "learning_rate": 2.378213802435724e-05, "loss": 1.2078, "step": 77500 }, { "epoch": 10.49, "eval_accuracy": 0.7508962988920937, "eval_loss": 1.155730128288269, "eval_runtime": 2.1359, "eval_samples_per_second": 232.221, "eval_steps_per_second": 7.491, "step": 77500 }, { "epoch": 10.55, "learning_rate": 2.3612990527740193e-05, "loss": 1.204, "step": 78000 }, { "epoch": 10.55, "eval_accuracy": 0.7537665293735096, "eval_loss": 1.1243318319320679, "eval_runtime": 2.2543, "eval_samples_per_second": 220.027, "eval_steps_per_second": 7.098, "step": 78000 }, { "epoch": 10.62, "learning_rate": 2.3443843031123143e-05, "loss": 1.2101, "step": 78500 }, { "epoch": 10.62, "eval_accuracy": 0.7507114399544679, "eval_loss": 1.1352229118347168, "eval_runtime": 2.1504, "eval_samples_per_second": 230.651, "eval_steps_per_second": 7.44, "step": 78500 }, { "epoch": 10.69, "learning_rate": 2.327469553450609e-05, "loss": 1.207, "step": 79000 }, { "epoch": 10.69, "eval_accuracy": 0.7526499865482916, "eval_loss": 1.1365910768508911, "eval_runtime": 2.2554, "eval_samples_per_second": 219.92, "eval_steps_per_second": 7.094, "step": 79000 }, { "epoch": 10.76, "learning_rate": 2.310554803788904e-05, "loss": 1.2067, "step": 79500 }, { "epoch": 10.76, "eval_accuracy": 0.7482271408617169, "eval_loss": 1.145031213760376, "eval_runtime": 2.1262, "eval_samples_per_second": 233.276, "eval_steps_per_second": 7.525, "step": 79500 }, { "epoch": 10.83, "learning_rate": 2.293640054127199e-05, "loss": 1.1997, "step": 80000 }, { "epoch": 10.83, "eval_accuracy": 0.7503758591065293, "eval_loss": 1.1333723068237305, "eval_runtime": 2.1437, "eval_samples_per_second": 231.381, "eval_steps_per_second": 7.464, "step": 80000 }, { "epoch": 10.89, "learning_rate": 2.276725304465494e-05, "loss": 1.2114, "step": 80500 }, { "epoch": 10.89, "eval_accuracy": 0.7523950883821346, "eval_loss": 1.13480544090271, "eval_runtime": 2.256, "eval_samples_per_second": 219.855, "eval_steps_per_second": 7.092, "step": 80500 }, { "epoch": 10.96, "learning_rate": 2.259810554803789e-05, "loss": 1.2087, "step": 81000 }, { "epoch": 10.96, "eval_accuracy": 0.7507978579542381, "eval_loss": 1.1221325397491455, "eval_runtime": 2.149, "eval_samples_per_second": 230.81, "eval_steps_per_second": 7.445, "step": 81000 }, { "epoch": 11.03, "learning_rate": 2.242895805142084e-05, "loss": 1.2065, "step": 81500 }, { "epoch": 11.03, "eval_accuracy": 0.7486237532021583, "eval_loss": 1.130583643913269, "eval_runtime": 2.2591, "eval_samples_per_second": 219.556, "eval_steps_per_second": 7.082, "step": 81500 }, { "epoch": 11.1, "learning_rate": 2.225981055480379e-05, "loss": 1.1985, "step": 82000 }, { "epoch": 11.1, "eval_accuracy": 0.7470671686582637, "eval_loss": 1.1648321151733398, "eval_runtime": 2.2577, "eval_samples_per_second": 219.693, "eval_steps_per_second": 7.087, "step": 82000 }, { "epoch": 11.16, "learning_rate": 2.209066305818674e-05, "loss": 1.205, "step": 82500 }, { "epoch": 11.16, "eval_accuracy": 0.7526795068095737, "eval_loss": 1.1088367700576782, "eval_runtime": 2.1263, "eval_samples_per_second": 233.267, "eval_steps_per_second": 7.525, "step": 82500 }, { "epoch": 11.23, "learning_rate": 2.192151556156969e-05, "loss": 1.2026, "step": 83000 }, { "epoch": 11.23, "eval_accuracy": 0.7512794548290868, "eval_loss": 1.1253347396850586, "eval_runtime": 2.1489, "eval_samples_per_second": 230.816, "eval_steps_per_second": 7.446, "step": 83000 }, { "epoch": 11.3, "learning_rate": 2.175236806495264e-05, "loss": 1.2, "step": 83500 }, { "epoch": 11.3, "eval_accuracy": 0.7473735779217244, "eval_loss": 1.1330283880233765, "eval_runtime": 2.2554, "eval_samples_per_second": 219.915, "eval_steps_per_second": 7.094, "step": 83500 }, { "epoch": 11.37, "learning_rate": 2.1583220568335592e-05, "loss": 1.1997, "step": 84000 }, { "epoch": 11.37, "eval_accuracy": 0.7493864048660762, "eval_loss": 1.1423763036727905, "eval_runtime": 2.2628, "eval_samples_per_second": 219.193, "eval_steps_per_second": 7.071, "step": 84000 }, { "epoch": 11.43, "learning_rate": 2.1414073071718542e-05, "loss": 1.1989, "step": 84500 }, { "epoch": 11.43, "eval_accuracy": 0.7477665276950566, "eval_loss": 1.1288686990737915, "eval_runtime": 2.2744, "eval_samples_per_second": 218.08, "eval_steps_per_second": 7.035, "step": 84500 }, { "epoch": 11.5, "learning_rate": 2.124492557510149e-05, "loss": 1.1956, "step": 85000 }, { "epoch": 11.5, "eval_accuracy": 0.75250470912615, "eval_loss": 1.1163060665130615, "eval_runtime": 2.1424, "eval_samples_per_second": 231.521, "eval_steps_per_second": 7.468, "step": 85000 }, { "epoch": 11.57, "learning_rate": 2.107577807848444e-05, "loss": 1.1997, "step": 85500 }, { "epoch": 11.57, "eval_accuracy": 0.7502406674510643, "eval_loss": 1.135400414466858, "eval_runtime": 2.132, "eval_samples_per_second": 232.649, "eval_steps_per_second": 7.505, "step": 85500 }, { "epoch": 11.64, "learning_rate": 2.090663058186739e-05, "loss": 1.2011, "step": 86000 }, { "epoch": 11.64, "eval_accuracy": 0.7487909354704988, "eval_loss": 1.137099027633667, "eval_runtime": 2.022, "eval_samples_per_second": 245.301, "eval_steps_per_second": 7.913, "step": 86000 }, { "epoch": 11.71, "learning_rate": 2.073748308525034e-05, "loss": 1.1998, "step": 86500 }, { "epoch": 11.71, "eval_accuracy": 0.7525347250536846, "eval_loss": 1.1276001930236816, "eval_runtime": 2.1283, "eval_samples_per_second": 233.052, "eval_steps_per_second": 7.518, "step": 86500 }, { "epoch": 11.77, "learning_rate": 2.056833558863329e-05, "loss": 1.1957, "step": 87000 }, { "epoch": 11.77, "eval_accuracy": 0.7557962751805397, "eval_loss": 1.1078341007232666, "eval_runtime": 2.1602, "eval_samples_per_second": 229.608, "eval_steps_per_second": 7.407, "step": 87000 }, { "epoch": 11.84, "learning_rate": 2.039918809201624e-05, "loss": 1.2027, "step": 87500 }, { "epoch": 11.84, "eval_accuracy": 0.745357875418331, "eval_loss": 1.1625709533691406, "eval_runtime": 2.1381, "eval_samples_per_second": 231.987, "eval_steps_per_second": 7.483, "step": 87500 }, { "epoch": 11.91, "learning_rate": 2.0230040595399188e-05, "loss": 1.2013, "step": 88000 }, { "epoch": 11.91, "eval_accuracy": 0.7526884647845145, "eval_loss": 1.1228464841842651, "eval_runtime": 1.9029, "eval_samples_per_second": 260.656, "eval_steps_per_second": 8.408, "step": 88000 }, { "epoch": 11.98, "learning_rate": 2.0060893098782138e-05, "loss": 1.1944, "step": 88500 }, { "epoch": 11.98, "eval_accuracy": 0.7478242411377627, "eval_loss": 1.1413049697875977, "eval_runtime": 2.1489, "eval_samples_per_second": 230.819, "eval_steps_per_second": 7.446, "step": 88500 }, { "epoch": 12.04, "learning_rate": 1.9891745602165088e-05, "loss": 1.1946, "step": 89000 }, { "epoch": 12.04, "eval_accuracy": 0.7513838877841672, "eval_loss": 1.124992847442627, "eval_runtime": 2.2528, "eval_samples_per_second": 220.169, "eval_steps_per_second": 7.102, "step": 89000 }, { "epoch": 12.11, "learning_rate": 1.972259810554804e-05, "loss": 1.196, "step": 89500 }, { "epoch": 12.11, "eval_accuracy": 0.7467797423793904, "eval_loss": 1.1447776556015015, "eval_runtime": 2.243, "eval_samples_per_second": 221.135, "eval_steps_per_second": 7.133, "step": 89500 }, { "epoch": 12.18, "learning_rate": 1.955345060893099e-05, "loss": 1.1893, "step": 90000 }, { "epoch": 12.18, "eval_accuracy": 0.7478244470188862, "eval_loss": 1.1357169151306152, "eval_runtime": 2.1342, "eval_samples_per_second": 232.401, "eval_steps_per_second": 7.497, "step": 90000 }, { "epoch": 12.25, "learning_rate": 1.938430311231394e-05, "loss": 1.1865, "step": 90500 }, { "epoch": 12.25, "eval_accuracy": 0.7525039957378796, "eval_loss": 1.120892882347107, "eval_runtime": 2.1457, "eval_samples_per_second": 231.161, "eval_steps_per_second": 7.457, "step": 90500 }, { "epoch": 12.31, "learning_rate": 1.9215155615696888e-05, "loss": 1.1921, "step": 91000 }, { "epoch": 12.31, "eval_accuracy": 0.7517412799431865, "eval_loss": 1.1200112104415894, "eval_runtime": 2.2546, "eval_samples_per_second": 219.99, "eval_steps_per_second": 7.096, "step": 91000 }, { "epoch": 12.38, "learning_rate": 1.9046008119079838e-05, "loss": 1.1928, "step": 91500 }, { "epoch": 12.38, "eval_accuracy": 0.751185221513814, "eval_loss": 1.1144980192184448, "eval_runtime": 1.9441, "eval_samples_per_second": 255.124, "eval_steps_per_second": 8.23, "step": 91500 }, { "epoch": 12.45, "learning_rate": 1.8876860622462788e-05, "loss": 1.1904, "step": 92000 }, { "epoch": 12.45, "eval_accuracy": 0.754587343566813, "eval_loss": 1.1108394861221313, "eval_runtime": 2.252, "eval_samples_per_second": 220.248, "eval_steps_per_second": 7.105, "step": 92000 }, { "epoch": 12.52, "learning_rate": 1.8707713125845738e-05, "loss": 1.1955, "step": 92500 }, { "epoch": 12.52, "eval_accuracy": 0.7540812503345287, "eval_loss": 1.106156826019287, "eval_runtime": 2.2623, "eval_samples_per_second": 219.243, "eval_steps_per_second": 7.072, "step": 92500 }, { "epoch": 12.58, "learning_rate": 1.8538565629228687e-05, "loss": 1.1898, "step": 93000 }, { "epoch": 12.58, "eval_accuracy": 0.7519862396592678, "eval_loss": 1.126400351524353, "eval_runtime": 2.1347, "eval_samples_per_second": 232.346, "eval_steps_per_second": 7.495, "step": 93000 }, { "epoch": 12.65, "learning_rate": 1.8369418132611637e-05, "loss": 1.1917, "step": 93500 }, { "epoch": 12.65, "eval_accuracy": 0.7535633076368476, "eval_loss": 1.112923502922058, "eval_runtime": 2.2569, "eval_samples_per_second": 219.772, "eval_steps_per_second": 7.089, "step": 93500 }, { "epoch": 12.72, "learning_rate": 1.8200270635994587e-05, "loss": 1.1895, "step": 94000 }, { "epoch": 12.72, "eval_accuracy": 0.7494371965607963, "eval_loss": 1.1288461685180664, "eval_runtime": 2.1453, "eval_samples_per_second": 231.202, "eval_steps_per_second": 7.458, "step": 94000 }, { "epoch": 12.79, "learning_rate": 1.8031123139377537e-05, "loss": 1.1966, "step": 94500 }, { "epoch": 12.79, "eval_accuracy": 0.7474297006435763, "eval_loss": 1.1435807943344116, "eval_runtime": 2.2544, "eval_samples_per_second": 220.014, "eval_steps_per_second": 7.097, "step": 94500 }, { "epoch": 12.86, "learning_rate": 1.7861975642760487e-05, "loss": 1.1887, "step": 95000 }, { "epoch": 12.86, "eval_accuracy": 0.7530491066652402, "eval_loss": 1.1220248937606812, "eval_runtime": 2.0377, "eval_samples_per_second": 243.414, "eval_steps_per_second": 7.852, "step": 95000 }, { "epoch": 12.92, "learning_rate": 1.769282814614344e-05, "loss": 1.1856, "step": 95500 }, { "epoch": 12.92, "eval_accuracy": 0.7499591391991283, "eval_loss": 1.1441563367843628, "eval_runtime": 2.2428, "eval_samples_per_second": 221.15, "eval_steps_per_second": 7.134, "step": 95500 }, { "epoch": 12.99, "learning_rate": 1.752368064952639e-05, "loss": 1.1934, "step": 96000 }, { "epoch": 12.99, "eval_accuracy": 0.7487198734618374, "eval_loss": 1.134777545928955, "eval_runtime": 2.1446, "eval_samples_per_second": 231.279, "eval_steps_per_second": 7.461, "step": 96000 }, { "epoch": 13.06, "learning_rate": 1.7354533152909337e-05, "loss": 1.1848, "step": 96500 }, { "epoch": 13.06, "eval_accuracy": 0.7521449252264457, "eval_loss": 1.1171698570251465, "eval_runtime": 2.2564, "eval_samples_per_second": 219.819, "eval_steps_per_second": 7.091, "step": 96500 }, { "epoch": 13.13, "learning_rate": 1.7185385656292287e-05, "loss": 1.1821, "step": 97000 }, { "epoch": 13.13, "eval_accuracy": 0.7566415837311541, "eval_loss": 1.1042215824127197, "eval_runtime": 2.1261, "eval_samples_per_second": 233.296, "eval_steps_per_second": 7.526, "step": 97000 }, { "epoch": 13.19, "learning_rate": 1.7016238159675237e-05, "loss": 1.1817, "step": 97500 }, { "epoch": 13.19, "eval_accuracy": 0.7495432072227, "eval_loss": 1.1272791624069214, "eval_runtime": 2.029, "eval_samples_per_second": 244.45, "eval_steps_per_second": 7.885, "step": 97500 }, { "epoch": 13.26, "learning_rate": 1.6847090663058187e-05, "loss": 1.1773, "step": 98000 }, { "epoch": 13.26, "eval_accuracy": 0.7539743031358885, "eval_loss": 1.0957542657852173, "eval_runtime": 2.193, "eval_samples_per_second": 226.174, "eval_steps_per_second": 7.296, "step": 98000 }, { "epoch": 13.33, "learning_rate": 1.6677943166441137e-05, "loss": 1.1774, "step": 98500 }, { "epoch": 13.33, "eval_accuracy": 0.7510550791645386, "eval_loss": 1.1139615774154663, "eval_runtime": 2.0354, "eval_samples_per_second": 243.69, "eval_steps_per_second": 7.861, "step": 98500 }, { "epoch": 13.4, "learning_rate": 1.6508795669824086e-05, "loss": 1.1841, "step": 99000 }, { "epoch": 13.4, "eval_accuracy": 0.7535410764872521, "eval_loss": 1.1085665225982666, "eval_runtime": 2.2775, "eval_samples_per_second": 217.784, "eval_steps_per_second": 7.025, "step": 99000 }, { "epoch": 13.46, "learning_rate": 1.6339648173207036e-05, "loss": 1.1825, "step": 99500 }, { "epoch": 13.46, "eval_accuracy": 0.7575840393550151, "eval_loss": 1.0903350114822388, "eval_runtime": 2.1352, "eval_samples_per_second": 232.293, "eval_steps_per_second": 7.493, "step": 99500 }, { "epoch": 13.53, "learning_rate": 1.6170500676589986e-05, "loss": 1.1845, "step": 100000 }, { "epoch": 13.53, "eval_accuracy": 0.7486053092575125, "eval_loss": 1.129094123840332, "eval_runtime": 2.2564, "eval_samples_per_second": 219.823, "eval_steps_per_second": 7.091, "step": 100000 }, { "epoch": 13.6, "learning_rate": 1.6001353179972936e-05, "loss": 1.1853, "step": 100500 }, { "epoch": 13.6, "eval_accuracy": 0.7485774103500107, "eval_loss": 1.1317797899246216, "eval_runtime": 2.2139, "eval_samples_per_second": 224.036, "eval_steps_per_second": 7.227, "step": 100500 }, { "epoch": 13.67, "learning_rate": 1.5832205683355886e-05, "loss": 1.1761, "step": 101000 }, { "epoch": 13.67, "eval_accuracy": 0.7552630190471166, "eval_loss": 1.1218476295471191, "eval_runtime": 2.2116, "eval_samples_per_second": 224.271, "eval_steps_per_second": 7.235, "step": 101000 }, { "epoch": 13.73, "learning_rate": 1.566305818673884e-05, "loss": 1.1825, "step": 101500 }, { "epoch": 13.73, "eval_accuracy": 0.7484677617063006, "eval_loss": 1.130650520324707, "eval_runtime": 2.1348, "eval_samples_per_second": 232.339, "eval_steps_per_second": 7.495, "step": 101500 }, { "epoch": 13.8, "learning_rate": 1.549391069012179e-05, "loss": 1.1849, "step": 102000 }, { "epoch": 13.8, "eval_accuracy": 0.7503921250473254, "eval_loss": 1.1273096799850464, "eval_runtime": 2.1346, "eval_samples_per_second": 232.367, "eval_steps_per_second": 7.496, "step": 102000 }, { "epoch": 13.87, "learning_rate": 1.5324763193504736e-05, "loss": 1.1792, "step": 102500 }, { "epoch": 13.87, "eval_accuracy": 0.7496725963112518, "eval_loss": 1.1290724277496338, "eval_runtime": 2.0388, "eval_samples_per_second": 243.277, "eval_steps_per_second": 7.848, "step": 102500 }, { "epoch": 13.94, "learning_rate": 1.5155615696887688e-05, "loss": 1.1852, "step": 103000 }, { "epoch": 13.94, "eval_accuracy": 0.7521213264014223, "eval_loss": 1.1133606433868408, "eval_runtime": 2.1407, "eval_samples_per_second": 231.702, "eval_steps_per_second": 7.474, "step": 103000 }, { "epoch": 14.01, "learning_rate": 1.4986468200270637e-05, "loss": 1.1745, "step": 103500 }, { "epoch": 14.01, "eval_accuracy": 0.7510633656887338, "eval_loss": 1.1251685619354248, "eval_runtime": 2.2562, "eval_samples_per_second": 219.835, "eval_steps_per_second": 7.091, "step": 103500 }, { "epoch": 14.07, "learning_rate": 1.4817320703653587e-05, "loss": 1.1746, "step": 104000 }, { "epoch": 14.07, "eval_accuracy": 0.7508518468038707, "eval_loss": 1.114823579788208, "eval_runtime": 2.1373, "eval_samples_per_second": 232.068, "eval_steps_per_second": 7.486, "step": 104000 }, { "epoch": 14.14, "learning_rate": 1.4648173207036536e-05, "loss": 1.1765, "step": 104500 }, { "epoch": 14.14, "eval_accuracy": 0.7499047204224969, "eval_loss": 1.120153784751892, "eval_runtime": 1.9047, "eval_samples_per_second": 260.406, "eval_steps_per_second": 8.4, "step": 104500 }, { "epoch": 14.21, "learning_rate": 1.4479025710419486e-05, "loss": 1.1762, "step": 105000 }, { "epoch": 14.21, "eval_accuracy": 0.7526651867686152, "eval_loss": 1.11342453956604, "eval_runtime": 2.1108, "eval_samples_per_second": 234.982, "eval_steps_per_second": 7.58, "step": 105000 }, { "epoch": 14.28, "learning_rate": 1.4309878213802435e-05, "loss": 1.1752, "step": 105500 }, { "epoch": 14.28, "eval_accuracy": 0.7550594107753242, "eval_loss": 1.1170574426651, "eval_runtime": 2.0286, "eval_samples_per_second": 244.505, "eval_steps_per_second": 7.887, "step": 105500 }, { "epoch": 14.34, "learning_rate": 1.4140730717185385e-05, "loss": 1.176, "step": 106000 }, { "epoch": 14.34, "eval_accuracy": 0.7526875882289065, "eval_loss": 1.1155229806900024, "eval_runtime": 2.2532, "eval_samples_per_second": 220.127, "eval_steps_per_second": 7.101, "step": 106000 }, { "epoch": 14.41, "learning_rate": 1.3971583220568335e-05, "loss": 1.1732, "step": 106500 }, { "epoch": 14.41, "eval_accuracy": 0.7481485413956945, "eval_loss": 1.133280873298645, "eval_runtime": 2.1464, "eval_samples_per_second": 231.081, "eval_steps_per_second": 7.454, "step": 106500 }, { "epoch": 14.48, "learning_rate": 1.3802435723951287e-05, "loss": 1.1753, "step": 107000 }, { "epoch": 14.48, "eval_accuracy": 0.7574028502663674, "eval_loss": 1.0981875658035278, "eval_runtime": 2.0246, "eval_samples_per_second": 244.982, "eval_steps_per_second": 7.903, "step": 107000 }, { "epoch": 14.55, "learning_rate": 1.3633288227334237e-05, "loss": 1.1713, "step": 107500 }, { "epoch": 14.55, "eval_accuracy": 0.749116988864623, "eval_loss": 1.1342977285385132, "eval_runtime": 2.1397, "eval_samples_per_second": 231.806, "eval_steps_per_second": 7.478, "step": 107500 }, { "epoch": 14.61, "learning_rate": 1.3464140730717187e-05, "loss": 1.1692, "step": 108000 }, { "epoch": 14.61, "eval_accuracy": 0.7548563905532121, "eval_loss": 1.1020859479904175, "eval_runtime": 2.038, "eval_samples_per_second": 243.371, "eval_steps_per_second": 7.851, "step": 108000 }, { "epoch": 14.68, "learning_rate": 1.3294993234100137e-05, "loss": 1.17, "step": 108500 }, { "epoch": 14.68, "eval_accuracy": 0.7503776331328444, "eval_loss": 1.110732078552246, "eval_runtime": 2.2439, "eval_samples_per_second": 221.039, "eval_steps_per_second": 7.13, "step": 108500 }, { "epoch": 14.75, "learning_rate": 1.3125845737483087e-05, "loss": 1.1699, "step": 109000 }, { "epoch": 14.75, "eval_accuracy": 0.7505413012882971, "eval_loss": 1.1227320432662964, "eval_runtime": 2.1354, "eval_samples_per_second": 232.28, "eval_steps_per_second": 7.493, "step": 109000 }, { "epoch": 14.82, "learning_rate": 1.2956698240866036e-05, "loss": 1.1763, "step": 109500 }, { "epoch": 14.82, "eval_accuracy": 0.7523848348960457, "eval_loss": 1.1152479648590088, "eval_runtime": 2.2475, "eval_samples_per_second": 220.693, "eval_steps_per_second": 7.119, "step": 109500 }, { "epoch": 14.88, "learning_rate": 1.2787550744248986e-05, "loss": 1.1729, "step": 110000 }, { "epoch": 14.88, "eval_accuracy": 0.7563491422261722, "eval_loss": 1.0939308404922485, "eval_runtime": 2.0291, "eval_samples_per_second": 244.44, "eval_steps_per_second": 7.885, "step": 110000 }, { "epoch": 14.95, "learning_rate": 1.2618403247631935e-05, "loss": 1.1731, "step": 110500 }, { "epoch": 14.95, "eval_accuracy": 0.7446182644738601, "eval_loss": 1.153084397315979, "eval_runtime": 2.1368, "eval_samples_per_second": 232.12, "eval_steps_per_second": 7.488, "step": 110500 }, { "epoch": 15.02, "learning_rate": 1.2449255751014885e-05, "loss": 1.1744, "step": 111000 }, { "epoch": 15.02, "eval_accuracy": 0.748938913662494, "eval_loss": 1.1451458930969238, "eval_runtime": 2.2627, "eval_samples_per_second": 219.205, "eval_steps_per_second": 7.071, "step": 111000 }, { "epoch": 15.09, "learning_rate": 1.2280108254397836e-05, "loss": 1.169, "step": 111500 }, { "epoch": 15.09, "eval_accuracy": 0.7527127355796688, "eval_loss": 1.1211124658584595, "eval_runtime": 2.254, "eval_samples_per_second": 220.058, "eval_steps_per_second": 7.099, "step": 111500 }, { "epoch": 15.16, "learning_rate": 1.2110960757780786e-05, "loss": 1.1644, "step": 112000 }, { "epoch": 15.16, "eval_accuracy": 0.7553240179845462, "eval_loss": 1.1134895086288452, "eval_runtime": 2.1385, "eval_samples_per_second": 231.943, "eval_steps_per_second": 7.482, "step": 112000 }, { "epoch": 15.22, "learning_rate": 1.1941813261163736e-05, "loss": 1.1726, "step": 112500 }, { "epoch": 15.22, "eval_accuracy": 0.7551064057320073, "eval_loss": 1.0903879404067993, "eval_runtime": 2.141, "eval_samples_per_second": 231.664, "eval_steps_per_second": 7.473, "step": 112500 }, { "epoch": 15.29, "learning_rate": 1.1772665764546684e-05, "loss": 1.1653, "step": 113000 }, { "epoch": 15.29, "eval_accuracy": 0.7585871152701898, "eval_loss": 1.0806618928909302, "eval_runtime": 2.2591, "eval_samples_per_second": 219.56, "eval_steps_per_second": 7.083, "step": 113000 }, { "epoch": 15.36, "learning_rate": 1.1603518267929634e-05, "loss": 1.1651, "step": 113500 }, { "epoch": 15.36, "eval_accuracy": 0.7487410264652309, "eval_loss": 1.1385972499847412, "eval_runtime": 2.0137, "eval_samples_per_second": 246.318, "eval_steps_per_second": 7.946, "step": 113500 }, { "epoch": 15.43, "learning_rate": 1.1434370771312584e-05, "loss": 1.1663, "step": 114000 }, { "epoch": 15.43, "eval_accuracy": 0.7531269501044577, "eval_loss": 1.1114603281021118, "eval_runtime": 2.038, "eval_samples_per_second": 243.378, "eval_steps_per_second": 7.851, "step": 114000 }, { "epoch": 15.49, "learning_rate": 1.1265223274695536e-05, "loss": 1.1635, "step": 114500 }, { "epoch": 15.49, "eval_accuracy": 0.7503972421965474, "eval_loss": 1.1271893978118896, "eval_runtime": 2.2587, "eval_samples_per_second": 219.592, "eval_steps_per_second": 7.084, "step": 114500 }, { "epoch": 15.56, "learning_rate": 1.1096075778078486e-05, "loss": 1.1646, "step": 115000 }, { "epoch": 15.56, "eval_accuracy": 0.7541348344725908, "eval_loss": 1.0982328653335571, "eval_runtime": 2.0235, "eval_samples_per_second": 245.115, "eval_steps_per_second": 7.907, "step": 115000 }, { "epoch": 15.63, "learning_rate": 1.0926928281461436e-05, "loss": 1.1639, "step": 115500 }, { "epoch": 15.63, "eval_accuracy": 0.7544710600476913, "eval_loss": 1.1104248762130737, "eval_runtime": 2.0141, "eval_samples_per_second": 246.267, "eval_steps_per_second": 7.944, "step": 115500 }, { "epoch": 15.7, "learning_rate": 1.0757780784844384e-05, "loss": 1.1598, "step": 116000 }, { "epoch": 15.7, "eval_accuracy": 0.7492906747372119, "eval_loss": 1.1334669589996338, "eval_runtime": 2.1233, "eval_samples_per_second": 233.593, "eval_steps_per_second": 7.535, "step": 116000 }, { "epoch": 15.76, "learning_rate": 1.0588633288227334e-05, "loss": 1.1612, "step": 116500 }, { "epoch": 15.76, "eval_accuracy": 0.7535777086433112, "eval_loss": 1.1088109016418457, "eval_runtime": 2.1482, "eval_samples_per_second": 230.894, "eval_steps_per_second": 7.448, "step": 116500 }, { "epoch": 15.83, "learning_rate": 1.0419485791610285e-05, "loss": 1.159, "step": 117000 }, { "epoch": 15.83, "eval_accuracy": 0.755389401298914, "eval_loss": 1.0895658731460571, "eval_runtime": 1.8998, "eval_samples_per_second": 261.073, "eval_steps_per_second": 8.422, "step": 117000 }, { "epoch": 15.9, "learning_rate": 1.0250338294993235e-05, "loss": 1.1686, "step": 117500 }, { "epoch": 15.9, "eval_accuracy": 0.7521880806829505, "eval_loss": 1.1212115287780762, "eval_runtime": 2.1408, "eval_samples_per_second": 231.688, "eval_steps_per_second": 7.474, "step": 117500 }, { "epoch": 15.97, "learning_rate": 1.0081190798376185e-05, "loss": 1.158, "step": 118000 }, { "epoch": 15.97, "eval_accuracy": 0.7528032891926527, "eval_loss": 1.1104135513305664, "eval_runtime": 2.2497, "eval_samples_per_second": 220.471, "eval_steps_per_second": 7.112, "step": 118000 }, { "epoch": 16.04, "learning_rate": 9.912043301759135e-06, "loss": 1.1633, "step": 118500 }, { "epoch": 16.04, "eval_accuracy": 0.7537655533726261, "eval_loss": 1.097953200340271, "eval_runtime": 2.0401, "eval_samples_per_second": 243.13, "eval_steps_per_second": 7.843, "step": 118500 }, { "epoch": 16.1, "learning_rate": 9.742895805142083e-06, "loss": 1.1622, "step": 119000 }, { "epoch": 16.1, "eval_accuracy": 0.750889583782618, "eval_loss": 1.1274609565734863, "eval_runtime": 2.0243, "eval_samples_per_second": 245.026, "eval_steps_per_second": 7.904, "step": 119000 }, { "epoch": 16.17, "learning_rate": 9.573748308525033e-06, "loss": 1.1625, "step": 119500 }, { "epoch": 16.17, "eval_accuracy": 0.754607674067687, "eval_loss": 1.1065136194229126, "eval_runtime": 1.9032, "eval_samples_per_second": 260.616, "eval_steps_per_second": 8.407, "step": 119500 }, { "epoch": 16.24, "learning_rate": 9.404600811907985e-06, "loss": 1.1582, "step": 120000 }, { "epoch": 16.24, "eval_accuracy": 0.7515266766659524, "eval_loss": 1.1181069612503052, "eval_runtime": 2.1333, "eval_samples_per_second": 232.507, "eval_steps_per_second": 7.5, "step": 120000 }, { "epoch": 16.31, "learning_rate": 9.235453315290935e-06, "loss": 1.1568, "step": 120500 }, { "epoch": 16.31, "eval_accuracy": 0.7558363160425237, "eval_loss": 1.1019920110702515, "eval_runtime": 2.1312, "eval_samples_per_second": 232.728, "eval_steps_per_second": 7.507, "step": 120500 }, { "epoch": 16.37, "learning_rate": 9.066305818673885e-06, "loss": 1.1573, "step": 121000 }, { "epoch": 16.37, "eval_accuracy": 0.7532534995625547, "eval_loss": 1.115644097328186, "eval_runtime": 2.1433, "eval_samples_per_second": 231.416, "eval_steps_per_second": 7.465, "step": 121000 }, { "epoch": 16.44, "learning_rate": 8.897158322056835e-06, "loss": 1.1549, "step": 121500 }, { "epoch": 16.44, "eval_accuracy": 0.7508123310487945, "eval_loss": 1.1205765008926392, "eval_runtime": 2.2601, "eval_samples_per_second": 219.456, "eval_steps_per_second": 7.079, "step": 121500 }, { "epoch": 16.51, "learning_rate": 8.728010825439783e-06, "loss": 1.1592, "step": 122000 }, { "epoch": 16.51, "eval_accuracy": 0.7542723559759243, "eval_loss": 1.0985246896743774, "eval_runtime": 2.2649, "eval_samples_per_second": 218.998, "eval_steps_per_second": 7.064, "step": 122000 }, { "epoch": 16.58, "learning_rate": 8.558863328822733e-06, "loss": 1.1584, "step": 122500 }, { "epoch": 16.58, "eval_accuracy": 0.7531888104231674, "eval_loss": 1.1170583963394165, "eval_runtime": 2.028, "eval_samples_per_second": 244.575, "eval_steps_per_second": 7.89, "step": 122500 }, { "epoch": 16.64, "learning_rate": 8.389715832205684e-06, "loss": 1.1589, "step": 123000 }, { "epoch": 16.64, "eval_accuracy": 0.7611846765843823, "eval_loss": 1.0686120986938477, "eval_runtime": 2.0269, "eval_samples_per_second": 244.714, "eval_steps_per_second": 7.894, "step": 123000 }, { "epoch": 16.71, "learning_rate": 8.220568335588634e-06, "loss": 1.1566, "step": 123500 }, { "epoch": 16.71, "eval_accuracy": 0.7563581433672069, "eval_loss": 1.094774603843689, "eval_runtime": 2.1272, "eval_samples_per_second": 233.175, "eval_steps_per_second": 7.522, "step": 123500 }, { "epoch": 16.78, "learning_rate": 8.051420838971584e-06, "loss": 1.157, "step": 124000 }, { "epoch": 16.78, "eval_accuracy": 0.7568443220476267, "eval_loss": 1.0895816087722778, "eval_runtime": 1.8979, "eval_samples_per_second": 261.337, "eval_steps_per_second": 8.43, "step": 124000 }, { "epoch": 16.85, "learning_rate": 7.882273342354534e-06, "loss": 1.1598, "step": 124500 }, { "epoch": 16.85, "eval_accuracy": 0.7582212358242888, "eval_loss": 1.086458683013916, "eval_runtime": 2.2441, "eval_samples_per_second": 221.026, "eval_steps_per_second": 7.13, "step": 124500 }, { "epoch": 16.91, "learning_rate": 7.713125845737482e-06, "loss": 1.1567, "step": 125000 }, { "epoch": 16.91, "eval_accuracy": 0.7565769744554401, "eval_loss": 1.1091084480285645, "eval_runtime": 2.2461, "eval_samples_per_second": 220.83, "eval_steps_per_second": 7.124, "step": 125000 }, { "epoch": 16.98, "learning_rate": 7.543978349120433e-06, "loss": 1.1643, "step": 125500 }, { "epoch": 16.98, "eval_accuracy": 0.7521943363306939, "eval_loss": 1.1232304573059082, "eval_runtime": 1.9345, "eval_samples_per_second": 256.4, "eval_steps_per_second": 8.271, "step": 125500 }, { "epoch": 17.05, "learning_rate": 7.374830852503384e-06, "loss": 1.1536, "step": 126000 }, { "epoch": 17.05, "eval_accuracy": 0.7583081570996979, "eval_loss": 1.0930777788162231, "eval_runtime": 2.1324, "eval_samples_per_second": 232.599, "eval_steps_per_second": 7.503, "step": 126000 }, { "epoch": 17.12, "learning_rate": 7.205683355886334e-06, "loss": 1.1486, "step": 126500 }, { "epoch": 17.12, "eval_accuracy": 0.7540195062318956, "eval_loss": 1.1099752187728882, "eval_runtime": 1.917, "eval_samples_per_second": 258.744, "eval_steps_per_second": 8.347, "step": 126500 }, { "epoch": 17.19, "learning_rate": 7.036535859269283e-06, "loss": 1.1551, "step": 127000 }, { "epoch": 17.19, "eval_accuracy": 0.7537926501999014, "eval_loss": 1.1018755435943604, "eval_runtime": 2.2805, "eval_samples_per_second": 217.494, "eval_steps_per_second": 7.016, "step": 127000 }, { "epoch": 17.25, "learning_rate": 6.867388362652233e-06, "loss": 1.1491, "step": 127500 }, { "epoch": 17.25, "eval_accuracy": 0.7546221700303138, "eval_loss": 1.096489667892456, "eval_runtime": 2.0207, "eval_samples_per_second": 245.455, "eval_steps_per_second": 7.918, "step": 127500 }, { "epoch": 17.32, "learning_rate": 6.698240866035183e-06, "loss": 1.152, "step": 128000 }, { "epoch": 17.32, "eval_accuracy": 0.7590838783208054, "eval_loss": 1.0724998712539673, "eval_runtime": 2.2748, "eval_samples_per_second": 218.043, "eval_steps_per_second": 7.034, "step": 128000 }, { "epoch": 17.39, "learning_rate": 6.5290933694181334e-06, "loss": 1.1521, "step": 128500 }, { "epoch": 17.39, "eval_accuracy": 0.7526631431935811, "eval_loss": 1.1246150732040405, "eval_runtime": 2.2564, "eval_samples_per_second": 219.819, "eval_steps_per_second": 7.091, "step": 128500 }, { "epoch": 17.46, "learning_rate": 6.359945872801083e-06, "loss": 1.1518, "step": 129000 }, { "epoch": 17.46, "eval_accuracy": 0.7570206230802984, "eval_loss": 1.1025118827819824, "eval_runtime": 2.2644, "eval_samples_per_second": 219.041, "eval_steps_per_second": 7.066, "step": 129000 }, { "epoch": 17.52, "learning_rate": 6.190798376184033e-06, "loss": 1.1525, "step": 129500 }, { "epoch": 17.52, "eval_accuracy": 0.7553470100392842, "eval_loss": 1.1027612686157227, "eval_runtime": 2.2829, "eval_samples_per_second": 217.27, "eval_steps_per_second": 7.009, "step": 129500 }, { "epoch": 17.59, "learning_rate": 6.021650879566982e-06, "loss": 1.1509, "step": 130000 }, { "epoch": 17.59, "eval_accuracy": 0.753968902322795, "eval_loss": 1.1140735149383545, "eval_runtime": 2.0112, "eval_samples_per_second": 246.621, "eval_steps_per_second": 7.956, "step": 130000 }, { "epoch": 17.66, "learning_rate": 5.852503382949932e-06, "loss": 1.1522, "step": 130500 }, { "epoch": 17.66, "eval_accuracy": 0.7523416805483493, "eval_loss": 1.1235767602920532, "eval_runtime": 2.0162, "eval_samples_per_second": 246.006, "eval_steps_per_second": 7.936, "step": 130500 }, { "epoch": 17.73, "learning_rate": 5.683355886332883e-06, "loss": 1.1488, "step": 131000 }, { "epoch": 17.73, "eval_accuracy": 0.7589817903428665, "eval_loss": 1.0937731266021729, "eval_runtime": 2.1303, "eval_samples_per_second": 232.835, "eval_steps_per_second": 7.511, "step": 131000 }, { "epoch": 17.79, "learning_rate": 5.514208389715832e-06, "loss": 1.1477, "step": 131500 }, { "epoch": 17.79, "eval_accuracy": 0.7519756032882524, "eval_loss": 1.1069520711898804, "eval_runtime": 2.1341, "eval_samples_per_second": 232.421, "eval_steps_per_second": 7.497, "step": 131500 }, { "epoch": 17.86, "learning_rate": 5.345060893098782e-06, "loss": 1.1498, "step": 132000 }, { "epoch": 17.86, "eval_accuracy": 0.7560714094247574, "eval_loss": 1.0885875225067139, "eval_runtime": 2.2647, "eval_samples_per_second": 219.014, "eval_steps_per_second": 7.065, "step": 132000 }, { "epoch": 17.93, "learning_rate": 5.175913396481733e-06, "loss": 1.1489, "step": 132500 }, { "epoch": 17.93, "eval_accuracy": 0.75788641382883, "eval_loss": 1.0874009132385254, "eval_runtime": 2.1397, "eval_samples_per_second": 231.808, "eval_steps_per_second": 7.478, "step": 132500 }, { "epoch": 18.0, "learning_rate": 5.006765899864682e-06, "loss": 1.1462, "step": 133000 }, { "epoch": 18.0, "eval_accuracy": 0.7556547699093623, "eval_loss": 1.1015816926956177, "eval_runtime": 2.2706, "eval_samples_per_second": 218.448, "eval_steps_per_second": 7.047, "step": 133000 }, { "epoch": 18.06, "learning_rate": 4.837618403247632e-06, "loss": 1.1448, "step": 133500 }, { "epoch": 18.06, "eval_accuracy": 0.7546062508530094, "eval_loss": 1.0937751531600952, "eval_runtime": 2.022, "eval_samples_per_second": 245.305, "eval_steps_per_second": 7.913, "step": 133500 }, { "epoch": 18.13, "learning_rate": 4.6684709066305826e-06, "loss": 1.1425, "step": 134000 }, { "epoch": 18.13, "eval_accuracy": 0.7552112751822265, "eval_loss": 1.0958871841430664, "eval_runtime": 2.2598, "eval_samples_per_second": 219.486, "eval_steps_per_second": 7.08, "step": 134000 }, { "epoch": 18.2, "learning_rate": 4.499323410013532e-06, "loss": 1.1414, "step": 134500 }, { "epoch": 18.2, "eval_accuracy": 0.7558802565930149, "eval_loss": 1.0867284536361694, "eval_runtime": 2.029, "eval_samples_per_second": 244.456, "eval_steps_per_second": 7.886, "step": 134500 }, { "epoch": 18.27, "learning_rate": 4.330175913396482e-06, "loss": 1.1453, "step": 135000 }, { "epoch": 18.27, "eval_accuracy": 0.7591597591597592, "eval_loss": 1.0756407976150513, "eval_runtime": 2.1403, "eval_samples_per_second": 231.744, "eval_steps_per_second": 7.476, "step": 135000 }, { "epoch": 18.34, "learning_rate": 4.161028416779432e-06, "loss": 1.1448, "step": 135500 }, { "epoch": 18.34, "eval_accuracy": 0.7545405695862439, "eval_loss": 1.0937347412109375, "eval_runtime": 2.2479, "eval_samples_per_second": 220.651, "eval_steps_per_second": 7.118, "step": 135500 }, { "epoch": 18.4, "learning_rate": 3.991880920162381e-06, "loss": 1.1471, "step": 136000 }, { "epoch": 18.4, "eval_accuracy": 0.7537506745817593, "eval_loss": 1.1153604984283447, "eval_runtime": 2.2669, "eval_samples_per_second": 218.8, "eval_steps_per_second": 7.058, "step": 136000 }, { "epoch": 18.47, "learning_rate": 3.822733423545332e-06, "loss": 1.1484, "step": 136500 }, { "epoch": 18.47, "eval_accuracy": 0.7537701926689208, "eval_loss": 1.1114356517791748, "eval_runtime": 2.0201, "eval_samples_per_second": 245.53, "eval_steps_per_second": 7.92, "step": 136500 }, { "epoch": 18.54, "learning_rate": 3.6535859269282817e-06, "loss": 1.1463, "step": 137000 }, { "epoch": 18.54, "eval_accuracy": 0.7513940144923632, "eval_loss": 1.1001887321472168, "eval_runtime": 2.1485, "eval_samples_per_second": 230.858, "eval_steps_per_second": 7.447, "step": 137000 }, { "epoch": 18.61, "learning_rate": 3.4844384303112316e-06, "loss": 1.1512, "step": 137500 }, { "epoch": 18.61, "eval_accuracy": 0.7586606950140298, "eval_loss": 1.0663777589797974, "eval_runtime": 2.1796, "eval_samples_per_second": 227.562, "eval_steps_per_second": 7.341, "step": 137500 }, { "epoch": 18.67, "learning_rate": 3.315290933694182e-06, "loss": 1.1464, "step": 138000 }, { "epoch": 18.67, "eval_accuracy": 0.7583911006384086, "eval_loss": 1.0735660791397095, "eval_runtime": 2.251, "eval_samples_per_second": 220.348, "eval_steps_per_second": 7.108, "step": 138000 }, { "epoch": 18.74, "learning_rate": 3.1461434370771314e-06, "loss": 1.1457, "step": 138500 }, { "epoch": 18.74, "eval_accuracy": 0.7604149648750205, "eval_loss": 1.080166220664978, "eval_runtime": 2.1301, "eval_samples_per_second": 232.857, "eval_steps_per_second": 7.512, "step": 138500 }, { "epoch": 18.81, "learning_rate": 2.9769959404600813e-06, "loss": 1.1464, "step": 139000 }, { "epoch": 18.81, "eval_accuracy": 0.75420555676145, "eval_loss": 1.1091315746307373, "eval_runtime": 2.1281, "eval_samples_per_second": 233.067, "eval_steps_per_second": 7.518, "step": 139000 }, { "epoch": 18.88, "learning_rate": 2.8078484438430312e-06, "loss": 1.1415, "step": 139500 }, { "epoch": 18.88, "eval_accuracy": 0.7594658329138073, "eval_loss": 1.0856248140335083, "eval_runtime": 2.2679, "eval_samples_per_second": 218.701, "eval_steps_per_second": 7.055, "step": 139500 }, { "epoch": 18.94, "learning_rate": 2.638700947225981e-06, "loss": 1.149, "step": 140000 }, { "epoch": 18.94, "eval_accuracy": 0.7557433607017732, "eval_loss": 1.0958749055862427, "eval_runtime": 2.1355, "eval_samples_per_second": 232.263, "eval_steps_per_second": 7.492, "step": 140000 }, { "epoch": 19.01, "learning_rate": 2.469553450608931e-06, "loss": 1.1445, "step": 140500 }, { "epoch": 19.01, "eval_accuracy": 0.7600160578081092, "eval_loss": 1.0713545083999634, "eval_runtime": 2.0458, "eval_samples_per_second": 242.449, "eval_steps_per_second": 7.821, "step": 140500 }, { "epoch": 19.08, "learning_rate": 2.300405953991881e-06, "loss": 1.1378, "step": 141000 }, { "epoch": 19.08, "eval_accuracy": 0.7528535980148884, "eval_loss": 1.1179081201553345, "eval_runtime": 2.239, "eval_samples_per_second": 221.527, "eval_steps_per_second": 7.146, "step": 141000 }, { "epoch": 19.15, "learning_rate": 2.131258457374831e-06, "loss": 1.143, "step": 141500 }, { "epoch": 19.15, "eval_accuracy": 0.7608561044555122, "eval_loss": 1.085029125213623, "eval_runtime": 2.2698, "eval_samples_per_second": 218.525, "eval_steps_per_second": 7.049, "step": 141500 }, { "epoch": 19.22, "learning_rate": 1.962110960757781e-06, "loss": 1.1412, "step": 142000 }, { "epoch": 19.22, "eval_accuracy": 0.7571760842796552, "eval_loss": 1.1089389324188232, "eval_runtime": 2.2591, "eval_samples_per_second": 219.56, "eval_steps_per_second": 7.083, "step": 142000 }, { "epoch": 19.28, "learning_rate": 1.7929634641407306e-06, "loss": 1.1393, "step": 142500 }, { "epoch": 19.28, "eval_accuracy": 0.7580414678206476, "eval_loss": 1.095458984375, "eval_runtime": 1.8948, "eval_samples_per_second": 261.767, "eval_steps_per_second": 8.444, "step": 142500 }, { "epoch": 19.35, "learning_rate": 1.6238159675236807e-06, "loss": 1.1492, "step": 143000 }, { "epoch": 19.35, "eval_accuracy": 0.755947708880288, "eval_loss": 1.0982964038848877, "eval_runtime": 2.019, "eval_samples_per_second": 245.66, "eval_steps_per_second": 7.925, "step": 143000 }, { "epoch": 19.42, "learning_rate": 1.4546684709066306e-06, "loss": 1.1455, "step": 143500 }, { "epoch": 19.42, "eval_accuracy": 0.7540966020328801, "eval_loss": 1.12480628490448, "eval_runtime": 1.9105, "eval_samples_per_second": 259.614, "eval_steps_per_second": 8.375, "step": 143500 }, { "epoch": 19.49, "learning_rate": 1.2855209742895805e-06, "loss": 1.1442, "step": 144000 }, { "epoch": 19.49, "eval_accuracy": 0.7567218409366169, "eval_loss": 1.1033666133880615, "eval_runtime": 2.1366, "eval_samples_per_second": 232.142, "eval_steps_per_second": 7.488, "step": 144000 }, { "epoch": 19.55, "learning_rate": 1.1163734776725304e-06, "loss": 1.1385, "step": 144500 }, { "epoch": 19.55, "eval_accuracy": 0.7598665473187404, "eval_loss": 1.0718320608139038, "eval_runtime": 2.1421, "eval_samples_per_second": 231.551, "eval_steps_per_second": 7.469, "step": 144500 }, { "epoch": 19.62, "learning_rate": 9.472259810554805e-07, "loss": 1.1393, "step": 145000 }, { "epoch": 19.62, "eval_accuracy": 0.7511771590321439, "eval_loss": 1.1188093423843384, "eval_runtime": 1.937, "eval_samples_per_second": 256.065, "eval_steps_per_second": 8.26, "step": 145000 }, { "epoch": 19.69, "learning_rate": 7.780784844384303e-07, "loss": 1.1408, "step": 145500 }, { "epoch": 19.69, "eval_accuracy": 0.7571148718506829, "eval_loss": 1.096737027168274, "eval_runtime": 2.128, "eval_samples_per_second": 233.085, "eval_steps_per_second": 7.519, "step": 145500 }, { "epoch": 19.76, "learning_rate": 6.089309878213802e-07, "loss": 1.1443, "step": 146000 }, { "epoch": 19.76, "eval_accuracy": 0.7525236340330075, "eval_loss": 1.115225911140442, "eval_runtime": 2.0196, "eval_samples_per_second": 245.588, "eval_steps_per_second": 7.922, "step": 146000 }, { "epoch": 19.82, "learning_rate": 4.397834912043302e-07, "loss": 1.1495, "step": 146500 }, { "epoch": 19.82, "eval_accuracy": 0.7534898820473974, "eval_loss": 1.1063731908798218, "eval_runtime": 2.0319, "eval_samples_per_second": 244.111, "eval_steps_per_second": 7.875, "step": 146500 }, { "epoch": 19.89, "learning_rate": 2.7063599458728015e-07, "loss": 1.1397, "step": 147000 }, { "epoch": 19.89, "eval_accuracy": 0.7602626366768863, "eval_loss": 1.0799843072891235, "eval_runtime": 2.3196, "eval_samples_per_second": 213.833, "eval_steps_per_second": 6.898, "step": 147000 }, { "epoch": 19.96, "learning_rate": 1.0148849797023004e-07, "loss": 1.1399, "step": 147500 }, { "epoch": 19.96, "eval_accuracy": 0.7566619534479008, "eval_loss": 1.0812491178512573, "eval_runtime": 2.2409, "eval_samples_per_second": 221.343, "eval_steps_per_second": 7.14, "step": 147500 }, { "epoch": 20.0, "step": 147800, "total_flos": 1.2450139383539958e+18, "train_loss": 1.2263236557646922, "train_runtime": 47907.234, "train_samples_per_second": 98.713, "train_steps_per_second": 3.085 } ], "max_steps": 147800, "num_train_epochs": 20, "total_flos": 1.2450139383539958e+18, "trial_name": null, "trial_params": null }