wikitext103_roberta-base / trainer_state.json
liuyanchen1015's picture
End of training
8670e2c
{
"best_metric": 0.7611846765843823,
"best_model_checkpoint": "./finetuned/wikitext103_roberta-base_v2/checkpoint-123000",
"epoch": 20.0,
"global_step": 147800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07,
"learning_rate": 4.983085250338295e-05,
"loss": 1.4212,
"step": 500
},
{
"epoch": 0.07,
"eval_accuracy": 0.7235698186111409,
"eval_loss": 1.3007760047912598,
"eval_runtime": 2.6682,
"eval_samples_per_second": 185.892,
"eval_steps_per_second": 5.997,
"step": 500
},
{
"epoch": 0.14,
"learning_rate": 4.96617050067659e-05,
"loss": 1.3933,
"step": 1000
},
{
"epoch": 0.14,
"eval_accuracy": 0.7226983316766515,
"eval_loss": 1.2827116250991821,
"eval_runtime": 2.1995,
"eval_samples_per_second": 225.505,
"eval_steps_per_second": 7.274,
"step": 1000
},
{
"epoch": 0.2,
"learning_rate": 4.949255751014885e-05,
"loss": 1.3917,
"step": 1500
},
{
"epoch": 0.2,
"eval_accuracy": 0.7266257137444863,
"eval_loss": 1.2815688848495483,
"eval_runtime": 2.1563,
"eval_samples_per_second": 230.027,
"eval_steps_per_second": 7.42,
"step": 1500
},
{
"epoch": 0.27,
"learning_rate": 4.93234100135318e-05,
"loss": 1.3824,
"step": 2000
},
{
"epoch": 0.27,
"eval_accuracy": 0.7251124131353045,
"eval_loss": 1.294681191444397,
"eval_runtime": 2.1464,
"eval_samples_per_second": 231.089,
"eval_steps_per_second": 7.454,
"step": 2000
},
{
"epoch": 0.34,
"learning_rate": 4.915426251691475e-05,
"loss": 1.3835,
"step": 2500
},
{
"epoch": 0.34,
"eval_accuracy": 0.7289371440736602,
"eval_loss": 1.2555147409439087,
"eval_runtime": 2.2812,
"eval_samples_per_second": 217.427,
"eval_steps_per_second": 7.014,
"step": 2500
},
{
"epoch": 0.41,
"learning_rate": 4.89851150202977e-05,
"loss": 1.3758,
"step": 3000
},
{
"epoch": 0.41,
"eval_accuracy": 0.7279413775189347,
"eval_loss": 1.2611732482910156,
"eval_runtime": 2.2556,
"eval_samples_per_second": 219.898,
"eval_steps_per_second": 7.093,
"step": 3000
},
{
"epoch": 0.47,
"learning_rate": 4.881596752368065e-05,
"loss": 1.3745,
"step": 3500
},
{
"epoch": 0.47,
"eval_accuracy": 0.7244728228792188,
"eval_loss": 1.279096007347107,
"eval_runtime": 1.9103,
"eval_samples_per_second": 259.647,
"eval_steps_per_second": 8.376,
"step": 3500
},
{
"epoch": 0.54,
"learning_rate": 4.86468200270636e-05,
"loss": 1.3761,
"step": 4000
},
{
"epoch": 0.54,
"eval_accuracy": 0.7286496152595643,
"eval_loss": 1.2621806859970093,
"eval_runtime": 2.2565,
"eval_samples_per_second": 219.811,
"eval_steps_per_second": 7.091,
"step": 4000
},
{
"epoch": 0.61,
"learning_rate": 4.847767253044655e-05,
"loss": 1.3735,
"step": 4500
},
{
"epoch": 0.61,
"eval_accuracy": 0.7359971845474972,
"eval_loss": 1.231848955154419,
"eval_runtime": 2.2526,
"eval_samples_per_second": 220.188,
"eval_steps_per_second": 7.103,
"step": 4500
},
{
"epoch": 0.68,
"learning_rate": 4.83085250338295e-05,
"loss": 1.3717,
"step": 5000
},
{
"epoch": 0.68,
"eval_accuracy": 0.7259761388286334,
"eval_loss": 1.2777374982833862,
"eval_runtime": 2.343,
"eval_samples_per_second": 211.693,
"eval_steps_per_second": 6.829,
"step": 5000
},
{
"epoch": 0.74,
"learning_rate": 4.813937753721245e-05,
"loss": 1.3675,
"step": 5500
},
{
"epoch": 0.74,
"eval_accuracy": 0.7309145880574452,
"eval_loss": 1.2589675188064575,
"eval_runtime": 2.0349,
"eval_samples_per_second": 243.741,
"eval_steps_per_second": 7.863,
"step": 5500
},
{
"epoch": 0.81,
"learning_rate": 4.79702300405954e-05,
"loss": 1.3585,
"step": 6000
},
{
"epoch": 0.81,
"eval_accuracy": 0.7253910822602958,
"eval_loss": 1.2838590145111084,
"eval_runtime": 2.2225,
"eval_samples_per_second": 223.175,
"eval_steps_per_second": 7.199,
"step": 6000
},
{
"epoch": 0.88,
"learning_rate": 4.780108254397835e-05,
"loss": 1.3579,
"step": 6500
},
{
"epoch": 0.88,
"eval_accuracy": 0.7347076623797687,
"eval_loss": 1.2341055870056152,
"eval_runtime": 1.8958,
"eval_samples_per_second": 261.625,
"eval_steps_per_second": 8.44,
"step": 6500
},
{
"epoch": 0.95,
"learning_rate": 4.76319350473613e-05,
"loss": 1.3588,
"step": 7000
},
{
"epoch": 0.95,
"eval_accuracy": 0.7326682357975821,
"eval_loss": 1.2412930727005005,
"eval_runtime": 2.1422,
"eval_samples_per_second": 231.54,
"eval_steps_per_second": 7.469,
"step": 7000
},
{
"epoch": 1.01,
"learning_rate": 4.746278755074425e-05,
"loss": 1.351,
"step": 7500
},
{
"epoch": 1.01,
"eval_accuracy": 0.7317281968967362,
"eval_loss": 1.2459222078323364,
"eval_runtime": 2.2341,
"eval_samples_per_second": 222.016,
"eval_steps_per_second": 7.162,
"step": 7500
},
{
"epoch": 1.08,
"learning_rate": 4.72936400541272e-05,
"loss": 1.3394,
"step": 8000
},
{
"epoch": 1.08,
"eval_accuracy": 0.7314233839745815,
"eval_loss": 1.242180347442627,
"eval_runtime": 2.2469,
"eval_samples_per_second": 220.751,
"eval_steps_per_second": 7.121,
"step": 8000
},
{
"epoch": 1.15,
"learning_rate": 4.712449255751015e-05,
"loss": 1.3429,
"step": 8500
},
{
"epoch": 1.15,
"eval_accuracy": 0.734901599848407,
"eval_loss": 1.2285393476486206,
"eval_runtime": 2.1498,
"eval_samples_per_second": 230.723,
"eval_steps_per_second": 7.443,
"step": 8500
},
{
"epoch": 1.22,
"learning_rate": 4.69553450608931e-05,
"loss": 1.3393,
"step": 9000
},
{
"epoch": 1.22,
"eval_accuracy": 0.7324128503075872,
"eval_loss": 1.2404521703720093,
"eval_runtime": 2.2579,
"eval_samples_per_second": 219.671,
"eval_steps_per_second": 7.086,
"step": 9000
},
{
"epoch": 1.29,
"learning_rate": 4.678619756427605e-05,
"loss": 1.3421,
"step": 9500
},
{
"epoch": 1.29,
"eval_accuracy": 0.735434836099188,
"eval_loss": 1.2255122661590576,
"eval_runtime": 2.2664,
"eval_samples_per_second": 218.847,
"eval_steps_per_second": 7.06,
"step": 9500
},
{
"epoch": 1.35,
"learning_rate": 4.6617050067659e-05,
"loss": 1.3426,
"step": 10000
},
{
"epoch": 1.35,
"eval_accuracy": 0.7333513221802482,
"eval_loss": 1.2296382188796997,
"eval_runtime": 2.1344,
"eval_samples_per_second": 232.385,
"eval_steps_per_second": 7.496,
"step": 10000
},
{
"epoch": 1.42,
"learning_rate": 4.644790257104195e-05,
"loss": 1.3326,
"step": 10500
},
{
"epoch": 1.42,
"eval_accuracy": 0.7351480394040008,
"eval_loss": 1.2158225774765015,
"eval_runtime": 2.1342,
"eval_samples_per_second": 232.406,
"eval_steps_per_second": 7.497,
"step": 10500
},
{
"epoch": 1.49,
"learning_rate": 4.62787550744249e-05,
"loss": 1.3355,
"step": 11000
},
{
"epoch": 1.49,
"eval_accuracy": 0.7364017876607805,
"eval_loss": 1.2255741357803345,
"eval_runtime": 2.1391,
"eval_samples_per_second": 231.876,
"eval_steps_per_second": 7.48,
"step": 11000
},
{
"epoch": 1.56,
"learning_rate": 4.610960757780785e-05,
"loss": 1.3324,
"step": 11500
},
{
"epoch": 1.56,
"eval_accuracy": 0.7355829363706523,
"eval_loss": 1.2208420038223267,
"eval_runtime": 2.251,
"eval_samples_per_second": 220.348,
"eval_steps_per_second": 7.108,
"step": 11500
},
{
"epoch": 1.62,
"learning_rate": 4.59404600811908e-05,
"loss": 1.3331,
"step": 12000
},
{
"epoch": 1.62,
"eval_accuracy": 0.7347190272757148,
"eval_loss": 1.2230000495910645,
"eval_runtime": 2.1329,
"eval_samples_per_second": 232.552,
"eval_steps_per_second": 7.502,
"step": 12000
},
{
"epoch": 1.69,
"learning_rate": 4.577131258457375e-05,
"loss": 1.3326,
"step": 12500
},
{
"epoch": 1.69,
"eval_accuracy": 0.7316047842477829,
"eval_loss": 1.250501275062561,
"eval_runtime": 2.2566,
"eval_samples_per_second": 219.795,
"eval_steps_per_second": 7.09,
"step": 12500
},
{
"epoch": 1.76,
"learning_rate": 4.56021650879567e-05,
"loss": 1.3339,
"step": 13000
},
{
"epoch": 1.76,
"eval_accuracy": 0.7321860715246034,
"eval_loss": 1.2471247911453247,
"eval_runtime": 1.8846,
"eval_samples_per_second": 263.185,
"eval_steps_per_second": 8.49,
"step": 13000
},
{
"epoch": 1.83,
"learning_rate": 4.543301759133965e-05,
"loss": 1.3286,
"step": 13500
},
{
"epoch": 1.83,
"eval_accuracy": 0.7358603599923753,
"eval_loss": 1.218480110168457,
"eval_runtime": 2.183,
"eval_samples_per_second": 227.211,
"eval_steps_per_second": 7.329,
"step": 13500
},
{
"epoch": 1.89,
"learning_rate": 4.52638700947226e-05,
"loss": 1.3314,
"step": 14000
},
{
"epoch": 1.89,
"eval_accuracy": 0.7363198956152989,
"eval_loss": 1.2333292961120605,
"eval_runtime": 1.922,
"eval_samples_per_second": 258.068,
"eval_steps_per_second": 8.325,
"step": 14000
},
{
"epoch": 1.96,
"learning_rate": 4.509472259810555e-05,
"loss": 1.325,
"step": 14500
},
{
"epoch": 1.96,
"eval_accuracy": 0.7320171162387606,
"eval_loss": 1.2384274005889893,
"eval_runtime": 2.1484,
"eval_samples_per_second": 230.871,
"eval_steps_per_second": 7.447,
"step": 14500
},
{
"epoch": 2.03,
"learning_rate": 4.49255751014885e-05,
"loss": 1.3251,
"step": 15000
},
{
"epoch": 2.03,
"eval_accuracy": 0.7332704232946886,
"eval_loss": 1.2141916751861572,
"eval_runtime": 2.1498,
"eval_samples_per_second": 230.723,
"eval_steps_per_second": 7.443,
"step": 15000
},
{
"epoch": 2.1,
"learning_rate": 4.475642760487145e-05,
"loss": 1.3136,
"step": 15500
},
{
"epoch": 2.1,
"eval_accuracy": 0.7346380072100398,
"eval_loss": 1.2162067890167236,
"eval_runtime": 2.2669,
"eval_samples_per_second": 218.8,
"eval_steps_per_second": 7.058,
"step": 15500
},
{
"epoch": 2.17,
"learning_rate": 4.45872801082544e-05,
"loss": 1.3202,
"step": 16000
},
{
"epoch": 2.17,
"eval_accuracy": 0.7368549767669357,
"eval_loss": 1.220727801322937,
"eval_runtime": 2.1251,
"eval_samples_per_second": 233.399,
"eval_steps_per_second": 7.529,
"step": 16000
},
{
"epoch": 2.23,
"learning_rate": 4.441813261163735e-05,
"loss": 1.3168,
"step": 16500
},
{
"epoch": 2.23,
"eval_accuracy": 0.7391136589130195,
"eval_loss": 1.1931146383285522,
"eval_runtime": 2.2628,
"eval_samples_per_second": 219.196,
"eval_steps_per_second": 7.071,
"step": 16500
},
{
"epoch": 2.3,
"learning_rate": 4.42489851150203e-05,
"loss": 1.3134,
"step": 17000
},
{
"epoch": 2.3,
"eval_accuracy": 0.7398447820343461,
"eval_loss": 1.1856846809387207,
"eval_runtime": 2.1315,
"eval_samples_per_second": 232.703,
"eval_steps_per_second": 7.507,
"step": 17000
},
{
"epoch": 2.37,
"learning_rate": 4.407983761840325e-05,
"loss": 1.3085,
"step": 17500
},
{
"epoch": 2.37,
"eval_accuracy": 0.7383094012462748,
"eval_loss": 1.2111510038375854,
"eval_runtime": 2.2389,
"eval_samples_per_second": 221.538,
"eval_steps_per_second": 7.146,
"step": 17500
},
{
"epoch": 2.44,
"learning_rate": 4.39106901217862e-05,
"loss": 1.3165,
"step": 18000
},
{
"epoch": 2.44,
"eval_accuracy": 0.736477152685609,
"eval_loss": 1.2284483909606934,
"eval_runtime": 2.2655,
"eval_samples_per_second": 218.936,
"eval_steps_per_second": 7.062,
"step": 18000
},
{
"epoch": 2.5,
"learning_rate": 4.374154262516915e-05,
"loss": 1.3144,
"step": 18500
},
{
"epoch": 2.5,
"eval_accuracy": 0.7387957989256795,
"eval_loss": 1.2013208866119385,
"eval_runtime": 2.1477,
"eval_samples_per_second": 230.94,
"eval_steps_per_second": 7.45,
"step": 18500
},
{
"epoch": 2.57,
"learning_rate": 4.35723951285521e-05,
"loss": 1.319,
"step": 19000
},
{
"epoch": 2.57,
"eval_accuracy": 0.7355637897925513,
"eval_loss": 1.217348337173462,
"eval_runtime": 1.8976,
"eval_samples_per_second": 261.38,
"eval_steps_per_second": 8.432,
"step": 19000
},
{
"epoch": 2.64,
"learning_rate": 4.340324763193505e-05,
"loss": 1.3147,
"step": 19500
},
{
"epoch": 2.64,
"eval_accuracy": 0.7403712864559268,
"eval_loss": 1.1786144971847534,
"eval_runtime": 2.1417,
"eval_samples_per_second": 231.588,
"eval_steps_per_second": 7.471,
"step": 19500
},
{
"epoch": 2.71,
"learning_rate": 4.3234100135318e-05,
"loss": 1.311,
"step": 20000
},
{
"epoch": 2.71,
"eval_accuracy": 0.7372879017795558,
"eval_loss": 1.2008836269378662,
"eval_runtime": 2.2409,
"eval_samples_per_second": 221.338,
"eval_steps_per_second": 7.14,
"step": 20000
},
{
"epoch": 2.77,
"learning_rate": 4.306495263870095e-05,
"loss": 1.3131,
"step": 20500
},
{
"epoch": 2.77,
"eval_accuracy": 0.7366438077684113,
"eval_loss": 1.1992290019989014,
"eval_runtime": 2.3077,
"eval_samples_per_second": 214.937,
"eval_steps_per_second": 6.933,
"step": 20500
},
{
"epoch": 2.84,
"learning_rate": 4.28958051420839e-05,
"loss": 1.3036,
"step": 21000
},
{
"epoch": 2.84,
"eval_accuracy": 0.7369976679863333,
"eval_loss": 1.2166584730148315,
"eval_runtime": 2.1243,
"eval_samples_per_second": 233.491,
"eval_steps_per_second": 7.532,
"step": 21000
},
{
"epoch": 2.91,
"learning_rate": 4.272665764546685e-05,
"loss": 1.3122,
"step": 21500
},
{
"epoch": 2.91,
"eval_accuracy": 0.7378714413413875,
"eval_loss": 1.2138844728469849,
"eval_runtime": 2.155,
"eval_samples_per_second": 230.165,
"eval_steps_per_second": 7.425,
"step": 21500
},
{
"epoch": 2.98,
"learning_rate": 4.25575101488498e-05,
"loss": 1.3091,
"step": 22000
},
{
"epoch": 2.98,
"eval_accuracy": 0.7364524804942348,
"eval_loss": 1.2197295427322388,
"eval_runtime": 2.0278,
"eval_samples_per_second": 244.602,
"eval_steps_per_second": 7.89,
"step": 22000
},
{
"epoch": 3.04,
"learning_rate": 4.238836265223275e-05,
"loss": 1.304,
"step": 22500
},
{
"epoch": 3.04,
"eval_accuracy": 0.7371755128447044,
"eval_loss": 1.186427354812622,
"eval_runtime": 2.0462,
"eval_samples_per_second": 242.4,
"eval_steps_per_second": 7.819,
"step": 22500
},
{
"epoch": 3.11,
"learning_rate": 4.22192151556157e-05,
"loss": 1.3015,
"step": 23000
},
{
"epoch": 3.11,
"eval_accuracy": 0.7355039424985249,
"eval_loss": 1.2046276330947876,
"eval_runtime": 2.0572,
"eval_samples_per_second": 241.108,
"eval_steps_per_second": 7.778,
"step": 23000
},
{
"epoch": 3.18,
"learning_rate": 4.205006765899865e-05,
"loss": 1.2916,
"step": 23500
},
{
"epoch": 3.18,
"eval_accuracy": 0.7344874591057797,
"eval_loss": 1.2312067747116089,
"eval_runtime": 2.3523,
"eval_samples_per_second": 210.856,
"eval_steps_per_second": 6.802,
"step": 23500
},
{
"epoch": 3.25,
"learning_rate": 4.18809201623816e-05,
"loss": 1.2966,
"step": 24000
},
{
"epoch": 3.25,
"eval_accuracy": 0.7372955288985823,
"eval_loss": 1.2116466760635376,
"eval_runtime": 2.306,
"eval_samples_per_second": 215.094,
"eval_steps_per_second": 6.939,
"step": 24000
},
{
"epoch": 3.32,
"learning_rate": 4.171177266576455e-05,
"loss": 1.2991,
"step": 24500
},
{
"epoch": 3.32,
"eval_accuracy": 0.737794624029042,
"eval_loss": 1.2262712717056274,
"eval_runtime": 2.2208,
"eval_samples_per_second": 223.344,
"eval_steps_per_second": 7.205,
"step": 24500
},
{
"epoch": 3.38,
"learning_rate": 4.15426251691475e-05,
"loss": 1.3003,
"step": 25000
},
{
"epoch": 3.38,
"eval_accuracy": 0.741288193792419,
"eval_loss": 1.184373378753662,
"eval_runtime": 2.336,
"eval_samples_per_second": 212.325,
"eval_steps_per_second": 6.849,
"step": 25000
},
{
"epoch": 3.45,
"learning_rate": 4.137347767253045e-05,
"loss": 1.2942,
"step": 25500
},
{
"epoch": 3.45,
"eval_accuracy": 0.7368591999133871,
"eval_loss": 1.195932149887085,
"eval_runtime": 2.1558,
"eval_samples_per_second": 230.073,
"eval_steps_per_second": 7.422,
"step": 25500
},
{
"epoch": 3.52,
"learning_rate": 4.12043301759134e-05,
"loss": 1.2988,
"step": 26000
},
{
"epoch": 3.52,
"eval_accuracy": 0.7381074306659838,
"eval_loss": 1.2017642259597778,
"eval_runtime": 2.1521,
"eval_samples_per_second": 230.473,
"eval_steps_per_second": 7.435,
"step": 26000
},
{
"epoch": 3.59,
"learning_rate": 4.103518267929635e-05,
"loss": 1.2936,
"step": 26500
},
{
"epoch": 3.59,
"eval_accuracy": 0.7388343788536808,
"eval_loss": 1.1992815732955933,
"eval_runtime": 2.3209,
"eval_samples_per_second": 213.713,
"eval_steps_per_second": 6.894,
"step": 26500
},
{
"epoch": 3.65,
"learning_rate": 4.08660351826793e-05,
"loss": 1.2937,
"step": 27000
},
{
"epoch": 3.65,
"eval_accuracy": 0.7358311660164716,
"eval_loss": 1.2154779434204102,
"eval_runtime": 2.1442,
"eval_samples_per_second": 231.319,
"eval_steps_per_second": 7.462,
"step": 27000
},
{
"epoch": 3.72,
"learning_rate": 4.069688768606225e-05,
"loss": 1.3021,
"step": 27500
},
{
"epoch": 3.72,
"eval_accuracy": 0.7395591959907313,
"eval_loss": 1.1794347763061523,
"eval_runtime": 2.2631,
"eval_samples_per_second": 219.166,
"eval_steps_per_second": 7.07,
"step": 27500
},
{
"epoch": 3.79,
"learning_rate": 4.05277401894452e-05,
"loss": 1.2937,
"step": 28000
},
{
"epoch": 3.79,
"eval_accuracy": 0.7401357600670687,
"eval_loss": 1.1982717514038086,
"eval_runtime": 2.0447,
"eval_samples_per_second": 242.582,
"eval_steps_per_second": 7.825,
"step": 28000
},
{
"epoch": 3.86,
"learning_rate": 4.035859269282815e-05,
"loss": 1.291,
"step": 28500
},
{
"epoch": 3.86,
"eval_accuracy": 0.7448072021259288,
"eval_loss": 1.1694941520690918,
"eval_runtime": 2.1453,
"eval_samples_per_second": 231.207,
"eval_steps_per_second": 7.458,
"step": 28500
},
{
"epoch": 3.92,
"learning_rate": 4.01894451962111e-05,
"loss": 1.2932,
"step": 29000
},
{
"epoch": 3.92,
"eval_accuracy": 0.7410137752905726,
"eval_loss": 1.1980637311935425,
"eval_runtime": 2.2686,
"eval_samples_per_second": 218.634,
"eval_steps_per_second": 7.053,
"step": 29000
},
{
"epoch": 3.99,
"learning_rate": 4.002029769959405e-05,
"loss": 1.2938,
"step": 29500
},
{
"epoch": 3.99,
"eval_accuracy": 0.7382663617554176,
"eval_loss": 1.1999621391296387,
"eval_runtime": 2.1418,
"eval_samples_per_second": 231.579,
"eval_steps_per_second": 7.47,
"step": 29500
},
{
"epoch": 4.06,
"learning_rate": 3.9851150202977e-05,
"loss": 1.2789,
"step": 30000
},
{
"epoch": 4.06,
"eval_accuracy": 0.7402127426252879,
"eval_loss": 1.1918007135391235,
"eval_runtime": 2.3184,
"eval_samples_per_second": 213.944,
"eval_steps_per_second": 6.901,
"step": 30000
},
{
"epoch": 4.13,
"learning_rate": 3.968200270635995e-05,
"loss": 1.2806,
"step": 30500
},
{
"epoch": 4.13,
"eval_accuracy": 0.7368392751519062,
"eval_loss": 1.2065249681472778,
"eval_runtime": 2.1671,
"eval_samples_per_second": 228.872,
"eval_steps_per_second": 7.383,
"step": 30500
},
{
"epoch": 4.19,
"learning_rate": 3.95128552097429e-05,
"loss": 1.2799,
"step": 31000
},
{
"epoch": 4.19,
"eval_accuracy": 0.7374173525839968,
"eval_loss": 1.2035958766937256,
"eval_runtime": 2.0293,
"eval_samples_per_second": 244.417,
"eval_steps_per_second": 7.884,
"step": 31000
},
{
"epoch": 4.26,
"learning_rate": 3.934370771312585e-05,
"loss": 1.2851,
"step": 31500
},
{
"epoch": 4.26,
"eval_accuracy": 0.7374529736652525,
"eval_loss": 1.2056316137313843,
"eval_runtime": 2.2747,
"eval_samples_per_second": 218.047,
"eval_steps_per_second": 7.034,
"step": 31500
},
{
"epoch": 4.33,
"learning_rate": 3.91745602165088e-05,
"loss": 1.2789,
"step": 32000
},
{
"epoch": 4.33,
"eval_accuracy": 0.7414960437229791,
"eval_loss": 1.185698390007019,
"eval_runtime": 2.1279,
"eval_samples_per_second": 233.09,
"eval_steps_per_second": 7.519,
"step": 32000
},
{
"epoch": 4.4,
"learning_rate": 3.900541271989175e-05,
"loss": 1.2847,
"step": 32500
},
{
"epoch": 4.4,
"eval_accuracy": 0.7375549926676443,
"eval_loss": 1.1947497129440308,
"eval_runtime": 2.2844,
"eval_samples_per_second": 217.128,
"eval_steps_per_second": 7.004,
"step": 32500
},
{
"epoch": 4.47,
"learning_rate": 3.88362652232747e-05,
"loss": 1.2843,
"step": 33000
},
{
"epoch": 4.47,
"eval_accuracy": 0.7398512049167071,
"eval_loss": 1.1868607997894287,
"eval_runtime": 1.8928,
"eval_samples_per_second": 262.041,
"eval_steps_per_second": 8.453,
"step": 33000
},
{
"epoch": 4.53,
"learning_rate": 3.866711772665765e-05,
"loss": 1.2822,
"step": 33500
},
{
"epoch": 4.53,
"eval_accuracy": 0.738583059254866,
"eval_loss": 1.1962590217590332,
"eval_runtime": 2.3042,
"eval_samples_per_second": 215.256,
"eval_steps_per_second": 6.944,
"step": 33500
},
{
"epoch": 4.6,
"learning_rate": 3.84979702300406e-05,
"loss": 1.2755,
"step": 34000
},
{
"epoch": 4.6,
"eval_accuracy": 0.7423808354478731,
"eval_loss": 1.189677357673645,
"eval_runtime": 2.1343,
"eval_samples_per_second": 232.399,
"eval_steps_per_second": 7.497,
"step": 34000
},
{
"epoch": 4.67,
"learning_rate": 3.832882273342355e-05,
"loss": 1.283,
"step": 34500
},
{
"epoch": 4.67,
"eval_accuracy": 0.7438030006523157,
"eval_loss": 1.1673452854156494,
"eval_runtime": 2.1405,
"eval_samples_per_second": 231.723,
"eval_steps_per_second": 7.475,
"step": 34500
},
{
"epoch": 4.74,
"learning_rate": 3.81596752368065e-05,
"loss": 1.2765,
"step": 35000
},
{
"epoch": 4.74,
"eval_accuracy": 0.7418567866813223,
"eval_loss": 1.1855015754699707,
"eval_runtime": 2.2371,
"eval_samples_per_second": 221.718,
"eval_steps_per_second": 7.152,
"step": 35000
},
{
"epoch": 4.8,
"learning_rate": 3.799052774018945e-05,
"loss": 1.2762,
"step": 35500
},
{
"epoch": 4.8,
"eval_accuracy": 0.7412275877241228,
"eval_loss": 1.1773431301116943,
"eval_runtime": 2.1867,
"eval_samples_per_second": 226.824,
"eval_steps_per_second": 7.317,
"step": 35500
},
{
"epoch": 4.87,
"learning_rate": 3.7821380243572397e-05,
"loss": 1.2776,
"step": 36000
},
{
"epoch": 4.87,
"eval_accuracy": 0.740787246819894,
"eval_loss": 1.1897586584091187,
"eval_runtime": 1.9025,
"eval_samples_per_second": 260.712,
"eval_steps_per_second": 8.41,
"step": 36000
},
{
"epoch": 4.94,
"learning_rate": 3.7652232746955347e-05,
"loss": 1.2847,
"step": 36500
},
{
"epoch": 4.94,
"eval_accuracy": 0.7437667084947351,
"eval_loss": 1.1624772548675537,
"eval_runtime": 1.9202,
"eval_samples_per_second": 258.302,
"eval_steps_per_second": 8.332,
"step": 36500
},
{
"epoch": 5.01,
"learning_rate": 3.7483085250338296e-05,
"loss": 1.2732,
"step": 37000
},
{
"epoch": 5.01,
"eval_accuracy": 0.7396705597179374,
"eval_loss": 1.194719672203064,
"eval_runtime": 2.2563,
"eval_samples_per_second": 219.831,
"eval_steps_per_second": 7.091,
"step": 37000
},
{
"epoch": 5.07,
"learning_rate": 3.7313937753721246e-05,
"loss": 1.2667,
"step": 37500
},
{
"epoch": 5.07,
"eval_accuracy": 0.7384741591468417,
"eval_loss": 1.2097489833831787,
"eval_runtime": 2.0162,
"eval_samples_per_second": 246.011,
"eval_steps_per_second": 7.936,
"step": 37500
},
{
"epoch": 5.14,
"learning_rate": 3.7144790257104196e-05,
"loss": 1.2678,
"step": 38000
},
{
"epoch": 5.14,
"eval_accuracy": 0.7397711324624852,
"eval_loss": 1.187340497970581,
"eval_runtime": 1.9242,
"eval_samples_per_second": 257.772,
"eval_steps_per_second": 8.315,
"step": 38000
},
{
"epoch": 5.21,
"learning_rate": 3.6975642760487146e-05,
"loss": 1.2681,
"step": 38500
},
{
"epoch": 5.21,
"eval_accuracy": 0.7467894879436467,
"eval_loss": 1.1681954860687256,
"eval_runtime": 2.1385,
"eval_samples_per_second": 231.938,
"eval_steps_per_second": 7.482,
"step": 38500
},
{
"epoch": 5.28,
"learning_rate": 3.6806495263870096e-05,
"loss": 1.2699,
"step": 39000
},
{
"epoch": 5.28,
"eval_accuracy": 0.745684382221014,
"eval_loss": 1.1739610433578491,
"eval_runtime": 1.9046,
"eval_samples_per_second": 260.416,
"eval_steps_per_second": 8.401,
"step": 39000
},
{
"epoch": 5.35,
"learning_rate": 3.6637347767253046e-05,
"loss": 1.2675,
"step": 39500
},
{
"epoch": 5.35,
"eval_accuracy": 0.7378905091781449,
"eval_loss": 1.212327003479004,
"eval_runtime": 2.151,
"eval_samples_per_second": 230.592,
"eval_steps_per_second": 7.438,
"step": 39500
},
{
"epoch": 5.41,
"learning_rate": 3.6468200270635996e-05,
"loss": 1.2604,
"step": 40000
},
{
"epoch": 5.41,
"eval_accuracy": 0.7395626782561456,
"eval_loss": 1.195254921913147,
"eval_runtime": 2.1404,
"eval_samples_per_second": 231.731,
"eval_steps_per_second": 7.475,
"step": 40000
},
{
"epoch": 5.48,
"learning_rate": 3.6299052774018946e-05,
"loss": 1.2688,
"step": 40500
},
{
"epoch": 5.48,
"eval_accuracy": 0.7397589090237662,
"eval_loss": 1.1849150657653809,
"eval_runtime": 2.1374,
"eval_samples_per_second": 232.054,
"eval_steps_per_second": 7.486,
"step": 40500
},
{
"epoch": 5.55,
"learning_rate": 3.6129905277401896e-05,
"loss": 1.2698,
"step": 41000
},
{
"epoch": 5.55,
"eval_accuracy": 0.7413877684508885,
"eval_loss": 1.1708790063858032,
"eval_runtime": 2.1318,
"eval_samples_per_second": 232.668,
"eval_steps_per_second": 7.505,
"step": 41000
},
{
"epoch": 5.62,
"learning_rate": 3.5960757780784846e-05,
"loss": 1.2689,
"step": 41500
},
{
"epoch": 5.62,
"eval_accuracy": 0.7438135277526475,
"eval_loss": 1.1763643026351929,
"eval_runtime": 1.9258,
"eval_samples_per_second": 257.551,
"eval_steps_per_second": 8.308,
"step": 41500
},
{
"epoch": 5.68,
"learning_rate": 3.5791610284167796e-05,
"loss": 1.269,
"step": 42000
},
{
"epoch": 5.68,
"eval_accuracy": 0.7409149325968664,
"eval_loss": 1.1824229955673218,
"eval_runtime": 2.2453,
"eval_samples_per_second": 220.905,
"eval_steps_per_second": 7.126,
"step": 42000
},
{
"epoch": 5.75,
"learning_rate": 3.5622462787550746e-05,
"loss": 1.2715,
"step": 42500
},
{
"epoch": 5.75,
"eval_accuracy": 0.7408733194884687,
"eval_loss": 1.178514003753662,
"eval_runtime": 2.0475,
"eval_samples_per_second": 242.248,
"eval_steps_per_second": 7.814,
"step": 42500
},
{
"epoch": 5.82,
"learning_rate": 3.5453315290933695e-05,
"loss": 1.2628,
"step": 43000
},
{
"epoch": 5.82,
"eval_accuracy": 0.7433914472797822,
"eval_loss": 1.173943281173706,
"eval_runtime": 2.1375,
"eval_samples_per_second": 232.048,
"eval_steps_per_second": 7.485,
"step": 43000
},
{
"epoch": 5.89,
"learning_rate": 3.5284167794316645e-05,
"loss": 1.2617,
"step": 43500
},
{
"epoch": 5.89,
"eval_accuracy": 0.7406168909338969,
"eval_loss": 1.1814693212509155,
"eval_runtime": 2.1357,
"eval_samples_per_second": 232.237,
"eval_steps_per_second": 7.492,
"step": 43500
},
{
"epoch": 5.95,
"learning_rate": 3.5115020297699595e-05,
"loss": 1.2565,
"step": 44000
},
{
"epoch": 5.95,
"eval_accuracy": 0.7414824236191919,
"eval_loss": 1.1885017156600952,
"eval_runtime": 2.2461,
"eval_samples_per_second": 220.826,
"eval_steps_per_second": 7.123,
"step": 44000
},
{
"epoch": 6.02,
"learning_rate": 3.4945872801082545e-05,
"loss": 1.2639,
"step": 44500
},
{
"epoch": 6.02,
"eval_accuracy": 0.741952133873027,
"eval_loss": 1.1781718730926514,
"eval_runtime": 2.0174,
"eval_samples_per_second": 245.859,
"eval_steps_per_second": 7.931,
"step": 44500
},
{
"epoch": 6.09,
"learning_rate": 3.4776725304465495e-05,
"loss": 1.2557,
"step": 45000
},
{
"epoch": 6.09,
"eval_accuracy": 0.7382356866408648,
"eval_loss": 1.2061494588851929,
"eval_runtime": 2.2612,
"eval_samples_per_second": 219.356,
"eval_steps_per_second": 7.076,
"step": 45000
},
{
"epoch": 6.16,
"learning_rate": 3.4607577807848445e-05,
"loss": 1.2503,
"step": 45500
},
{
"epoch": 6.16,
"eval_accuracy": 0.739681675962454,
"eval_loss": 1.1741236448287964,
"eval_runtime": 2.1411,
"eval_samples_per_second": 231.661,
"eval_steps_per_second": 7.473,
"step": 45500
},
{
"epoch": 6.22,
"learning_rate": 3.4438430311231395e-05,
"loss": 1.2514,
"step": 46000
},
{
"epoch": 6.22,
"eval_accuracy": 0.7435828154552824,
"eval_loss": 1.167312741279602,
"eval_runtime": 2.0431,
"eval_samples_per_second": 242.763,
"eval_steps_per_second": 7.831,
"step": 46000
},
{
"epoch": 6.29,
"learning_rate": 3.4269282814614345e-05,
"loss": 1.254,
"step": 46500
},
{
"epoch": 6.29,
"eval_accuracy": 0.7399956502827316,
"eval_loss": 1.1828943490982056,
"eval_runtime": 2.2651,
"eval_samples_per_second": 218.976,
"eval_steps_per_second": 7.064,
"step": 46500
},
{
"epoch": 6.36,
"learning_rate": 3.4100135317997295e-05,
"loss": 1.2583,
"step": 47000
},
{
"epoch": 6.36,
"eval_accuracy": 0.7390757539268417,
"eval_loss": 1.1776684522628784,
"eval_runtime": 2.0336,
"eval_samples_per_second": 243.904,
"eval_steps_per_second": 7.868,
"step": 47000
},
{
"epoch": 6.43,
"learning_rate": 3.3930987821380245e-05,
"loss": 1.2518,
"step": 47500
},
{
"epoch": 6.43,
"eval_accuracy": 0.7411625020238545,
"eval_loss": 1.1892728805541992,
"eval_runtime": 2.2474,
"eval_samples_per_second": 220.698,
"eval_steps_per_second": 7.119,
"step": 47500
},
{
"epoch": 6.5,
"learning_rate": 3.3761840324763195e-05,
"loss": 1.2519,
"step": 48000
},
{
"epoch": 6.5,
"eval_accuracy": 0.7410831524506257,
"eval_loss": 1.1775306463241577,
"eval_runtime": 2.127,
"eval_samples_per_second": 233.19,
"eval_steps_per_second": 7.522,
"step": 48000
},
{
"epoch": 6.56,
"learning_rate": 3.3592692828146145e-05,
"loss": 1.2477,
"step": 48500
},
{
"epoch": 6.56,
"eval_accuracy": 0.7451821862348178,
"eval_loss": 1.1809273958206177,
"eval_runtime": 1.902,
"eval_samples_per_second": 260.776,
"eval_steps_per_second": 8.412,
"step": 48500
},
{
"epoch": 6.63,
"learning_rate": 3.3423545331529095e-05,
"loss": 1.2546,
"step": 49000
},
{
"epoch": 6.63,
"eval_accuracy": 0.7455485978763953,
"eval_loss": 1.1651870012283325,
"eval_runtime": 2.1247,
"eval_samples_per_second": 233.443,
"eval_steps_per_second": 7.53,
"step": 49000
},
{
"epoch": 6.7,
"learning_rate": 3.3254397834912044e-05,
"loss": 1.2564,
"step": 49500
},
{
"epoch": 6.7,
"eval_accuracy": 0.7435488746599247,
"eval_loss": 1.1729925870895386,
"eval_runtime": 2.2521,
"eval_samples_per_second": 220.235,
"eval_steps_per_second": 7.104,
"step": 49500
},
{
"epoch": 6.77,
"learning_rate": 3.3085250338294994e-05,
"loss": 1.254,
"step": 50000
},
{
"epoch": 6.77,
"eval_accuracy": 0.7427022407392571,
"eval_loss": 1.1740801334381104,
"eval_runtime": 2.2515,
"eval_samples_per_second": 220.294,
"eval_steps_per_second": 7.106,
"step": 50000
},
{
"epoch": 6.83,
"learning_rate": 3.2916102841677944e-05,
"loss": 1.2495,
"step": 50500
},
{
"epoch": 6.83,
"eval_accuracy": 0.7475704632944787,
"eval_loss": 1.1539645195007324,
"eval_runtime": 2.1379,
"eval_samples_per_second": 231.999,
"eval_steps_per_second": 7.484,
"step": 50500
},
{
"epoch": 6.9,
"learning_rate": 3.2746955345060894e-05,
"loss": 1.2502,
"step": 51000
},
{
"epoch": 6.9,
"eval_accuracy": 0.7488099797559774,
"eval_loss": 1.145354151725769,
"eval_runtime": 2.0467,
"eval_samples_per_second": 242.344,
"eval_steps_per_second": 7.818,
"step": 51000
},
{
"epoch": 6.97,
"learning_rate": 3.2577807848443844e-05,
"loss": 1.2527,
"step": 51500
},
{
"epoch": 6.97,
"eval_accuracy": 0.7429261278858414,
"eval_loss": 1.1704862117767334,
"eval_runtime": 1.8944,
"eval_samples_per_second": 261.83,
"eval_steps_per_second": 8.446,
"step": 51500
},
{
"epoch": 7.04,
"learning_rate": 3.2408660351826794e-05,
"loss": 1.2418,
"step": 52000
},
{
"epoch": 7.04,
"eval_accuracy": 0.7441042170292774,
"eval_loss": 1.1714463233947754,
"eval_runtime": 2.2491,
"eval_samples_per_second": 220.532,
"eval_steps_per_second": 7.114,
"step": 52000
},
{
"epoch": 7.1,
"learning_rate": 3.2239512855209744e-05,
"loss": 1.2386,
"step": 52500
},
{
"epoch": 7.1,
"eval_accuracy": 0.74550079317324,
"eval_loss": 1.1619137525558472,
"eval_runtime": 2.2788,
"eval_samples_per_second": 217.662,
"eval_steps_per_second": 7.021,
"step": 52500
},
{
"epoch": 7.17,
"learning_rate": 3.2070365358592694e-05,
"loss": 1.2407,
"step": 53000
},
{
"epoch": 7.17,
"eval_accuracy": 0.7428433966802983,
"eval_loss": 1.1702818870544434,
"eval_runtime": 2.2482,
"eval_samples_per_second": 220.624,
"eval_steps_per_second": 7.117,
"step": 53000
},
{
"epoch": 7.24,
"learning_rate": 3.1901217861975644e-05,
"loss": 1.2429,
"step": 53500
},
{
"epoch": 7.24,
"eval_accuracy": 0.7437382207533255,
"eval_loss": 1.1596566438674927,
"eval_runtime": 2.0269,
"eval_samples_per_second": 244.711,
"eval_steps_per_second": 7.894,
"step": 53500
},
{
"epoch": 7.31,
"learning_rate": 3.1732070365358594e-05,
"loss": 1.2398,
"step": 54000
},
{
"epoch": 7.31,
"eval_accuracy": 0.7411157814291173,
"eval_loss": 1.1802175045013428,
"eval_runtime": 1.903,
"eval_samples_per_second": 260.643,
"eval_steps_per_second": 8.408,
"step": 54000
},
{
"epoch": 7.37,
"learning_rate": 3.1562922868741544e-05,
"loss": 1.2507,
"step": 54500
},
{
"epoch": 7.37,
"eval_accuracy": 0.7465291873021028,
"eval_loss": 1.153898000717163,
"eval_runtime": 2.134,
"eval_samples_per_second": 232.429,
"eval_steps_per_second": 7.498,
"step": 54500
},
{
"epoch": 7.44,
"learning_rate": 3.1393775372124494e-05,
"loss": 1.2369,
"step": 55000
},
{
"epoch": 7.44,
"eval_accuracy": 0.7421205732433082,
"eval_loss": 1.1711477041244507,
"eval_runtime": 2.2417,
"eval_samples_per_second": 221.263,
"eval_steps_per_second": 7.138,
"step": 55000
},
{
"epoch": 7.51,
"learning_rate": 3.1224627875507443e-05,
"loss": 1.2463,
"step": 55500
},
{
"epoch": 7.51,
"eval_accuracy": 0.7408580787198625,
"eval_loss": 1.1848827600479126,
"eval_runtime": 2.2658,
"eval_samples_per_second": 218.909,
"eval_steps_per_second": 7.062,
"step": 55500
},
{
"epoch": 7.58,
"learning_rate": 3.1055480378890393e-05,
"loss": 1.2389,
"step": 56000
},
{
"epoch": 7.58,
"eval_accuracy": 0.7447417175239756,
"eval_loss": 1.172045111656189,
"eval_runtime": 2.1226,
"eval_samples_per_second": 233.68,
"eval_steps_per_second": 7.538,
"step": 56000
},
{
"epoch": 7.65,
"learning_rate": 3.088633288227334e-05,
"loss": 1.2395,
"step": 56500
},
{
"epoch": 7.65,
"eval_accuracy": 0.7455846610856063,
"eval_loss": 1.1613755226135254,
"eval_runtime": 2.2492,
"eval_samples_per_second": 220.523,
"eval_steps_per_second": 7.114,
"step": 56500
},
{
"epoch": 7.71,
"learning_rate": 3.071718538565629e-05,
"loss": 1.2429,
"step": 57000
},
{
"epoch": 7.71,
"eval_accuracy": 0.7459984960790633,
"eval_loss": 1.1604408025741577,
"eval_runtime": 2.2523,
"eval_samples_per_second": 220.221,
"eval_steps_per_second": 7.104,
"step": 57000
},
{
"epoch": 7.78,
"learning_rate": 3.054803788903924e-05,
"loss": 1.2384,
"step": 57500
},
{
"epoch": 7.78,
"eval_accuracy": 0.7408438637823945,
"eval_loss": 1.1852344274520874,
"eval_runtime": 2.2645,
"eval_samples_per_second": 219.035,
"eval_steps_per_second": 7.066,
"step": 57500
},
{
"epoch": 7.85,
"learning_rate": 3.0378890392422193e-05,
"loss": 1.2419,
"step": 58000
},
{
"epoch": 7.85,
"eval_accuracy": 0.7460735114607351,
"eval_loss": 1.1592859029769897,
"eval_runtime": 2.2667,
"eval_samples_per_second": 218.824,
"eval_steps_per_second": 7.059,
"step": 58000
},
{
"epoch": 7.92,
"learning_rate": 3.0209742895805143e-05,
"loss": 1.2381,
"step": 58500
},
{
"epoch": 7.92,
"eval_accuracy": 0.7454180674547229,
"eval_loss": 1.161791205406189,
"eval_runtime": 2.2508,
"eval_samples_per_second": 220.362,
"eval_steps_per_second": 7.108,
"step": 58500
},
{
"epoch": 7.98,
"learning_rate": 3.0040595399188093e-05,
"loss": 1.2384,
"step": 59000
},
{
"epoch": 7.98,
"eval_accuracy": 0.7445992935958163,
"eval_loss": 1.1550912857055664,
"eval_runtime": 2.2277,
"eval_samples_per_second": 222.654,
"eval_steps_per_second": 7.182,
"step": 59000
},
{
"epoch": 8.05,
"learning_rate": 2.9871447902571043e-05,
"loss": 1.2314,
"step": 59500
},
{
"epoch": 8.05,
"eval_accuracy": 0.7451252345598434,
"eval_loss": 1.1473671197891235,
"eval_runtime": 2.0323,
"eval_samples_per_second": 244.059,
"eval_steps_per_second": 7.873,
"step": 59500
},
{
"epoch": 8.12,
"learning_rate": 2.9702300405953993e-05,
"loss": 1.2277,
"step": 60000
},
{
"epoch": 8.12,
"eval_accuracy": 0.7435493080290383,
"eval_loss": 1.1636135578155518,
"eval_runtime": 2.2565,
"eval_samples_per_second": 219.81,
"eval_steps_per_second": 7.091,
"step": 60000
},
{
"epoch": 8.19,
"learning_rate": 2.9533152909336943e-05,
"loss": 1.23,
"step": 60500
},
{
"epoch": 8.19,
"eval_accuracy": 0.7482466354355656,
"eval_loss": 1.1545356512069702,
"eval_runtime": 2.1398,
"eval_samples_per_second": 231.799,
"eval_steps_per_second": 7.477,
"step": 60500
},
{
"epoch": 8.25,
"learning_rate": 2.9364005412719893e-05,
"loss": 1.2292,
"step": 61000
},
{
"epoch": 8.25,
"eval_accuracy": 0.7456762809270702,
"eval_loss": 1.169358730316162,
"eval_runtime": 2.2505,
"eval_samples_per_second": 220.392,
"eval_steps_per_second": 7.109,
"step": 61000
},
{
"epoch": 8.32,
"learning_rate": 2.9194857916102843e-05,
"loss": 1.2337,
"step": 61500
},
{
"epoch": 8.32,
"eval_accuracy": 0.7437165882071332,
"eval_loss": 1.1681973934173584,
"eval_runtime": 2.2595,
"eval_samples_per_second": 219.518,
"eval_steps_per_second": 7.081,
"step": 61500
},
{
"epoch": 8.39,
"learning_rate": 2.9025710419485792e-05,
"loss": 1.2274,
"step": 62000
},
{
"epoch": 8.39,
"eval_accuracy": 0.7484281932495036,
"eval_loss": 1.1518677473068237,
"eval_runtime": 2.0234,
"eval_samples_per_second": 245.13,
"eval_steps_per_second": 7.907,
"step": 62000
},
{
"epoch": 8.46,
"learning_rate": 2.885656292286874e-05,
"loss": 1.232,
"step": 62500
},
{
"epoch": 8.46,
"eval_accuracy": 0.7435426377844804,
"eval_loss": 1.1693381071090698,
"eval_runtime": 2.153,
"eval_samples_per_second": 230.381,
"eval_steps_per_second": 7.432,
"step": 62500
},
{
"epoch": 8.53,
"learning_rate": 2.868741542625169e-05,
"loss": 1.2315,
"step": 63000
},
{
"epoch": 8.53,
"eval_accuracy": 0.7434497229246247,
"eval_loss": 1.1637970209121704,
"eval_runtime": 2.1389,
"eval_samples_per_second": 231.9,
"eval_steps_per_second": 7.481,
"step": 63000
},
{
"epoch": 8.59,
"learning_rate": 2.851826792963464e-05,
"loss": 1.2293,
"step": 63500
},
{
"epoch": 8.59,
"eval_accuracy": 0.746056909476852,
"eval_loss": 1.1639689207077026,
"eval_runtime": 1.9056,
"eval_samples_per_second": 260.282,
"eval_steps_per_second": 8.396,
"step": 63500
},
{
"epoch": 8.66,
"learning_rate": 2.8349120433017595e-05,
"loss": 1.2287,
"step": 64000
},
{
"epoch": 8.66,
"eval_accuracy": 0.7519274622651754,
"eval_loss": 1.146359920501709,
"eval_runtime": 2.1418,
"eval_samples_per_second": 231.582,
"eval_steps_per_second": 7.47,
"step": 64000
},
{
"epoch": 8.73,
"learning_rate": 2.8179972936400545e-05,
"loss": 1.2283,
"step": 64500
},
{
"epoch": 8.73,
"eval_accuracy": 0.7480988335904306,
"eval_loss": 1.1439129114151,
"eval_runtime": 2.1515,
"eval_samples_per_second": 230.532,
"eval_steps_per_second": 7.437,
"step": 64500
},
{
"epoch": 8.8,
"learning_rate": 2.8010825439783495e-05,
"loss": 1.2279,
"step": 65000
},
{
"epoch": 8.8,
"eval_accuracy": 0.7476770091832853,
"eval_loss": 1.1496102809906006,
"eval_runtime": 2.1443,
"eval_samples_per_second": 231.311,
"eval_steps_per_second": 7.462,
"step": 65000
},
{
"epoch": 8.86,
"learning_rate": 2.7841677943166445e-05,
"loss": 1.2276,
"step": 65500
},
{
"epoch": 8.86,
"eval_accuracy": 0.7448800151502855,
"eval_loss": 1.1544512510299683,
"eval_runtime": 2.1443,
"eval_samples_per_second": 231.308,
"eval_steps_per_second": 7.462,
"step": 65500
},
{
"epoch": 8.93,
"learning_rate": 2.7672530446549395e-05,
"loss": 1.2301,
"step": 66000
},
{
"epoch": 8.93,
"eval_accuracy": 0.7486796972831709,
"eval_loss": 1.131188154220581,
"eval_runtime": 2.2612,
"eval_samples_per_second": 219.354,
"eval_steps_per_second": 7.076,
"step": 66000
},
{
"epoch": 9.0,
"learning_rate": 2.7503382949932345e-05,
"loss": 1.2248,
"step": 66500
},
{
"epoch": 9.0,
"eval_accuracy": 0.7464872620949183,
"eval_loss": 1.1444239616394043,
"eval_runtime": 1.9117,
"eval_samples_per_second": 259.451,
"eval_steps_per_second": 8.369,
"step": 66500
},
{
"epoch": 9.07,
"learning_rate": 2.7334235453315295e-05,
"loss": 1.2266,
"step": 67000
},
{
"epoch": 9.07,
"eval_accuracy": 0.7430061513773736,
"eval_loss": 1.1525160074234009,
"eval_runtime": 2.256,
"eval_samples_per_second": 219.857,
"eval_steps_per_second": 7.092,
"step": 67000
},
{
"epoch": 9.13,
"learning_rate": 2.716508795669824e-05,
"loss": 1.2198,
"step": 67500
},
{
"epoch": 9.13,
"eval_accuracy": 0.7462388784038825,
"eval_loss": 1.1551423072814941,
"eval_runtime": 2.2513,
"eval_samples_per_second": 220.317,
"eval_steps_per_second": 7.107,
"step": 67500
},
{
"epoch": 9.2,
"learning_rate": 2.699594046008119e-05,
"loss": 1.219,
"step": 68000
},
{
"epoch": 9.2,
"eval_accuracy": 0.7479334406870639,
"eval_loss": 1.143385887145996,
"eval_runtime": 2.2558,
"eval_samples_per_second": 219.879,
"eval_steps_per_second": 7.093,
"step": 68000
},
{
"epoch": 9.27,
"learning_rate": 2.682679296346414e-05,
"loss": 1.2212,
"step": 68500
},
{
"epoch": 9.27,
"eval_accuracy": 0.7415982885151786,
"eval_loss": 1.1707236766815186,
"eval_runtime": 2.2452,
"eval_samples_per_second": 220.92,
"eval_steps_per_second": 7.126,
"step": 68500
},
{
"epoch": 9.34,
"learning_rate": 2.665764546684709e-05,
"loss": 1.2265,
"step": 69000
},
{
"epoch": 9.34,
"eval_accuracy": 0.7421521035598706,
"eval_loss": 1.1743712425231934,
"eval_runtime": 2.1289,
"eval_samples_per_second": 232.985,
"eval_steps_per_second": 7.516,
"step": 69000
},
{
"epoch": 9.4,
"learning_rate": 2.648849797023004e-05,
"loss": 1.2216,
"step": 69500
},
{
"epoch": 9.4,
"eval_accuracy": 0.7392663666010835,
"eval_loss": 1.1817814111709595,
"eval_runtime": 2.1373,
"eval_samples_per_second": 232.069,
"eval_steps_per_second": 7.486,
"step": 69500
},
{
"epoch": 9.47,
"learning_rate": 2.631935047361299e-05,
"loss": 1.2226,
"step": 70000
},
{
"epoch": 9.47,
"eval_accuracy": 0.7454341644794401,
"eval_loss": 1.1662167310714722,
"eval_runtime": 2.1776,
"eval_samples_per_second": 227.77,
"eval_steps_per_second": 7.347,
"step": 70000
},
{
"epoch": 9.54,
"learning_rate": 2.615020297699594e-05,
"loss": 1.2224,
"step": 70500
},
{
"epoch": 9.54,
"eval_accuracy": 0.7460155894249055,
"eval_loss": 1.1345940828323364,
"eval_runtime": 2.131,
"eval_samples_per_second": 232.753,
"eval_steps_per_second": 7.508,
"step": 70500
},
{
"epoch": 9.61,
"learning_rate": 2.598105548037889e-05,
"loss": 1.2186,
"step": 71000
},
{
"epoch": 9.61,
"eval_accuracy": 0.7462514417531718,
"eval_loss": 1.153380036354065,
"eval_runtime": 2.3136,
"eval_samples_per_second": 214.386,
"eval_steps_per_second": 6.916,
"step": 71000
},
{
"epoch": 9.68,
"learning_rate": 2.581190798376184e-05,
"loss": 1.2179,
"step": 71500
},
{
"epoch": 9.68,
"eval_accuracy": 0.7477791705270042,
"eval_loss": 1.1399047374725342,
"eval_runtime": 2.2709,
"eval_samples_per_second": 218.413,
"eval_steps_per_second": 7.046,
"step": 71500
},
{
"epoch": 9.74,
"learning_rate": 2.564276048714479e-05,
"loss": 1.2177,
"step": 72000
},
{
"epoch": 9.74,
"eval_accuracy": 0.7441804462995666,
"eval_loss": 1.1545348167419434,
"eval_runtime": 2.2419,
"eval_samples_per_second": 221.237,
"eval_steps_per_second": 7.137,
"step": 72000
},
{
"epoch": 9.81,
"learning_rate": 2.547361299052774e-05,
"loss": 1.2154,
"step": 72500
},
{
"epoch": 9.81,
"eval_accuracy": 0.7426768214742224,
"eval_loss": 1.171052098274231,
"eval_runtime": 2.1267,
"eval_samples_per_second": 233.224,
"eval_steps_per_second": 7.523,
"step": 72500
},
{
"epoch": 9.88,
"learning_rate": 2.530446549391069e-05,
"loss": 1.2179,
"step": 73000
},
{
"epoch": 9.88,
"eval_accuracy": 0.7514139509830325,
"eval_loss": 1.1348686218261719,
"eval_runtime": 1.8985,
"eval_samples_per_second": 261.263,
"eval_steps_per_second": 8.428,
"step": 73000
},
{
"epoch": 9.95,
"learning_rate": 2.513531799729364e-05,
"loss": 1.2184,
"step": 73500
},
{
"epoch": 9.95,
"eval_accuracy": 0.749460868615729,
"eval_loss": 1.1427435874938965,
"eval_runtime": 2.1257,
"eval_samples_per_second": 233.336,
"eval_steps_per_second": 7.527,
"step": 73500
},
{
"epoch": 10.01,
"learning_rate": 2.496617050067659e-05,
"loss": 1.2193,
"step": 74000
},
{
"epoch": 10.01,
"eval_accuracy": 0.7494911077780159,
"eval_loss": 1.1222712993621826,
"eval_runtime": 2.0347,
"eval_samples_per_second": 243.77,
"eval_steps_per_second": 7.864,
"step": 74000
},
{
"epoch": 10.08,
"learning_rate": 2.479702300405954e-05,
"loss": 1.2063,
"step": 74500
},
{
"epoch": 10.08,
"eval_accuracy": 0.7488264163021444,
"eval_loss": 1.1357399225234985,
"eval_runtime": 1.9046,
"eval_samples_per_second": 260.423,
"eval_steps_per_second": 8.401,
"step": 74500
},
{
"epoch": 10.15,
"learning_rate": 2.462787550744249e-05,
"loss": 1.2025,
"step": 75000
},
{
"epoch": 10.15,
"eval_accuracy": 0.7486311066000695,
"eval_loss": 1.1476197242736816,
"eval_runtime": 2.1786,
"eval_samples_per_second": 227.67,
"eval_steps_per_second": 7.344,
"step": 75000
},
{
"epoch": 10.22,
"learning_rate": 2.445872801082544e-05,
"loss": 1.2097,
"step": 75500
},
{
"epoch": 10.22,
"eval_accuracy": 0.7492516383053316,
"eval_loss": 1.1382330656051636,
"eval_runtime": 2.1295,
"eval_samples_per_second": 232.922,
"eval_steps_per_second": 7.514,
"step": 75500
},
{
"epoch": 10.28,
"learning_rate": 2.428958051420839e-05,
"loss": 1.2106,
"step": 76000
},
{
"epoch": 10.28,
"eval_accuracy": 0.7500204253928484,
"eval_loss": 1.1413904428482056,
"eval_runtime": 2.1753,
"eval_samples_per_second": 228.019,
"eval_steps_per_second": 7.355,
"step": 76000
},
{
"epoch": 10.35,
"learning_rate": 2.412043301759134e-05,
"loss": 1.2146,
"step": 76500
},
{
"epoch": 10.35,
"eval_accuracy": 0.7533006412674462,
"eval_loss": 1.113772988319397,
"eval_runtime": 2.136,
"eval_samples_per_second": 232.208,
"eval_steps_per_second": 7.491,
"step": 76500
},
{
"epoch": 10.42,
"learning_rate": 2.395128552097429e-05,
"loss": 1.2129,
"step": 77000
},
{
"epoch": 10.42,
"eval_accuracy": 0.7477787948952668,
"eval_loss": 1.1447216272354126,
"eval_runtime": 2.2641,
"eval_samples_per_second": 219.075,
"eval_steps_per_second": 7.067,
"step": 77000
},
{
"epoch": 10.49,
"learning_rate": 2.378213802435724e-05,
"loss": 1.2078,
"step": 77500
},
{
"epoch": 10.49,
"eval_accuracy": 0.7508962988920937,
"eval_loss": 1.155730128288269,
"eval_runtime": 2.1359,
"eval_samples_per_second": 232.221,
"eval_steps_per_second": 7.491,
"step": 77500
},
{
"epoch": 10.55,
"learning_rate": 2.3612990527740193e-05,
"loss": 1.204,
"step": 78000
},
{
"epoch": 10.55,
"eval_accuracy": 0.7537665293735096,
"eval_loss": 1.1243318319320679,
"eval_runtime": 2.2543,
"eval_samples_per_second": 220.027,
"eval_steps_per_second": 7.098,
"step": 78000
},
{
"epoch": 10.62,
"learning_rate": 2.3443843031123143e-05,
"loss": 1.2101,
"step": 78500
},
{
"epoch": 10.62,
"eval_accuracy": 0.7507114399544679,
"eval_loss": 1.1352229118347168,
"eval_runtime": 2.1504,
"eval_samples_per_second": 230.651,
"eval_steps_per_second": 7.44,
"step": 78500
},
{
"epoch": 10.69,
"learning_rate": 2.327469553450609e-05,
"loss": 1.207,
"step": 79000
},
{
"epoch": 10.69,
"eval_accuracy": 0.7526499865482916,
"eval_loss": 1.1365910768508911,
"eval_runtime": 2.2554,
"eval_samples_per_second": 219.92,
"eval_steps_per_second": 7.094,
"step": 79000
},
{
"epoch": 10.76,
"learning_rate": 2.310554803788904e-05,
"loss": 1.2067,
"step": 79500
},
{
"epoch": 10.76,
"eval_accuracy": 0.7482271408617169,
"eval_loss": 1.145031213760376,
"eval_runtime": 2.1262,
"eval_samples_per_second": 233.276,
"eval_steps_per_second": 7.525,
"step": 79500
},
{
"epoch": 10.83,
"learning_rate": 2.293640054127199e-05,
"loss": 1.1997,
"step": 80000
},
{
"epoch": 10.83,
"eval_accuracy": 0.7503758591065293,
"eval_loss": 1.1333723068237305,
"eval_runtime": 2.1437,
"eval_samples_per_second": 231.381,
"eval_steps_per_second": 7.464,
"step": 80000
},
{
"epoch": 10.89,
"learning_rate": 2.276725304465494e-05,
"loss": 1.2114,
"step": 80500
},
{
"epoch": 10.89,
"eval_accuracy": 0.7523950883821346,
"eval_loss": 1.13480544090271,
"eval_runtime": 2.256,
"eval_samples_per_second": 219.855,
"eval_steps_per_second": 7.092,
"step": 80500
},
{
"epoch": 10.96,
"learning_rate": 2.259810554803789e-05,
"loss": 1.2087,
"step": 81000
},
{
"epoch": 10.96,
"eval_accuracy": 0.7507978579542381,
"eval_loss": 1.1221325397491455,
"eval_runtime": 2.149,
"eval_samples_per_second": 230.81,
"eval_steps_per_second": 7.445,
"step": 81000
},
{
"epoch": 11.03,
"learning_rate": 2.242895805142084e-05,
"loss": 1.2065,
"step": 81500
},
{
"epoch": 11.03,
"eval_accuracy": 0.7486237532021583,
"eval_loss": 1.130583643913269,
"eval_runtime": 2.2591,
"eval_samples_per_second": 219.556,
"eval_steps_per_second": 7.082,
"step": 81500
},
{
"epoch": 11.1,
"learning_rate": 2.225981055480379e-05,
"loss": 1.1985,
"step": 82000
},
{
"epoch": 11.1,
"eval_accuracy": 0.7470671686582637,
"eval_loss": 1.1648321151733398,
"eval_runtime": 2.2577,
"eval_samples_per_second": 219.693,
"eval_steps_per_second": 7.087,
"step": 82000
},
{
"epoch": 11.16,
"learning_rate": 2.209066305818674e-05,
"loss": 1.205,
"step": 82500
},
{
"epoch": 11.16,
"eval_accuracy": 0.7526795068095737,
"eval_loss": 1.1088367700576782,
"eval_runtime": 2.1263,
"eval_samples_per_second": 233.267,
"eval_steps_per_second": 7.525,
"step": 82500
},
{
"epoch": 11.23,
"learning_rate": 2.192151556156969e-05,
"loss": 1.2026,
"step": 83000
},
{
"epoch": 11.23,
"eval_accuracy": 0.7512794548290868,
"eval_loss": 1.1253347396850586,
"eval_runtime": 2.1489,
"eval_samples_per_second": 230.816,
"eval_steps_per_second": 7.446,
"step": 83000
},
{
"epoch": 11.3,
"learning_rate": 2.175236806495264e-05,
"loss": 1.2,
"step": 83500
},
{
"epoch": 11.3,
"eval_accuracy": 0.7473735779217244,
"eval_loss": 1.1330283880233765,
"eval_runtime": 2.2554,
"eval_samples_per_second": 219.915,
"eval_steps_per_second": 7.094,
"step": 83500
},
{
"epoch": 11.37,
"learning_rate": 2.1583220568335592e-05,
"loss": 1.1997,
"step": 84000
},
{
"epoch": 11.37,
"eval_accuracy": 0.7493864048660762,
"eval_loss": 1.1423763036727905,
"eval_runtime": 2.2628,
"eval_samples_per_second": 219.193,
"eval_steps_per_second": 7.071,
"step": 84000
},
{
"epoch": 11.43,
"learning_rate": 2.1414073071718542e-05,
"loss": 1.1989,
"step": 84500
},
{
"epoch": 11.43,
"eval_accuracy": 0.7477665276950566,
"eval_loss": 1.1288686990737915,
"eval_runtime": 2.2744,
"eval_samples_per_second": 218.08,
"eval_steps_per_second": 7.035,
"step": 84500
},
{
"epoch": 11.5,
"learning_rate": 2.124492557510149e-05,
"loss": 1.1956,
"step": 85000
},
{
"epoch": 11.5,
"eval_accuracy": 0.75250470912615,
"eval_loss": 1.1163060665130615,
"eval_runtime": 2.1424,
"eval_samples_per_second": 231.521,
"eval_steps_per_second": 7.468,
"step": 85000
},
{
"epoch": 11.57,
"learning_rate": 2.107577807848444e-05,
"loss": 1.1997,
"step": 85500
},
{
"epoch": 11.57,
"eval_accuracy": 0.7502406674510643,
"eval_loss": 1.135400414466858,
"eval_runtime": 2.132,
"eval_samples_per_second": 232.649,
"eval_steps_per_second": 7.505,
"step": 85500
},
{
"epoch": 11.64,
"learning_rate": 2.090663058186739e-05,
"loss": 1.2011,
"step": 86000
},
{
"epoch": 11.64,
"eval_accuracy": 0.7487909354704988,
"eval_loss": 1.137099027633667,
"eval_runtime": 2.022,
"eval_samples_per_second": 245.301,
"eval_steps_per_second": 7.913,
"step": 86000
},
{
"epoch": 11.71,
"learning_rate": 2.073748308525034e-05,
"loss": 1.1998,
"step": 86500
},
{
"epoch": 11.71,
"eval_accuracy": 0.7525347250536846,
"eval_loss": 1.1276001930236816,
"eval_runtime": 2.1283,
"eval_samples_per_second": 233.052,
"eval_steps_per_second": 7.518,
"step": 86500
},
{
"epoch": 11.77,
"learning_rate": 2.056833558863329e-05,
"loss": 1.1957,
"step": 87000
},
{
"epoch": 11.77,
"eval_accuracy": 0.7557962751805397,
"eval_loss": 1.1078341007232666,
"eval_runtime": 2.1602,
"eval_samples_per_second": 229.608,
"eval_steps_per_second": 7.407,
"step": 87000
},
{
"epoch": 11.84,
"learning_rate": 2.039918809201624e-05,
"loss": 1.2027,
"step": 87500
},
{
"epoch": 11.84,
"eval_accuracy": 0.745357875418331,
"eval_loss": 1.1625709533691406,
"eval_runtime": 2.1381,
"eval_samples_per_second": 231.987,
"eval_steps_per_second": 7.483,
"step": 87500
},
{
"epoch": 11.91,
"learning_rate": 2.0230040595399188e-05,
"loss": 1.2013,
"step": 88000
},
{
"epoch": 11.91,
"eval_accuracy": 0.7526884647845145,
"eval_loss": 1.1228464841842651,
"eval_runtime": 1.9029,
"eval_samples_per_second": 260.656,
"eval_steps_per_second": 8.408,
"step": 88000
},
{
"epoch": 11.98,
"learning_rate": 2.0060893098782138e-05,
"loss": 1.1944,
"step": 88500
},
{
"epoch": 11.98,
"eval_accuracy": 0.7478242411377627,
"eval_loss": 1.1413049697875977,
"eval_runtime": 2.1489,
"eval_samples_per_second": 230.819,
"eval_steps_per_second": 7.446,
"step": 88500
},
{
"epoch": 12.04,
"learning_rate": 1.9891745602165088e-05,
"loss": 1.1946,
"step": 89000
},
{
"epoch": 12.04,
"eval_accuracy": 0.7513838877841672,
"eval_loss": 1.124992847442627,
"eval_runtime": 2.2528,
"eval_samples_per_second": 220.169,
"eval_steps_per_second": 7.102,
"step": 89000
},
{
"epoch": 12.11,
"learning_rate": 1.972259810554804e-05,
"loss": 1.196,
"step": 89500
},
{
"epoch": 12.11,
"eval_accuracy": 0.7467797423793904,
"eval_loss": 1.1447776556015015,
"eval_runtime": 2.243,
"eval_samples_per_second": 221.135,
"eval_steps_per_second": 7.133,
"step": 89500
},
{
"epoch": 12.18,
"learning_rate": 1.955345060893099e-05,
"loss": 1.1893,
"step": 90000
},
{
"epoch": 12.18,
"eval_accuracy": 0.7478244470188862,
"eval_loss": 1.1357169151306152,
"eval_runtime": 2.1342,
"eval_samples_per_second": 232.401,
"eval_steps_per_second": 7.497,
"step": 90000
},
{
"epoch": 12.25,
"learning_rate": 1.938430311231394e-05,
"loss": 1.1865,
"step": 90500
},
{
"epoch": 12.25,
"eval_accuracy": 0.7525039957378796,
"eval_loss": 1.120892882347107,
"eval_runtime": 2.1457,
"eval_samples_per_second": 231.161,
"eval_steps_per_second": 7.457,
"step": 90500
},
{
"epoch": 12.31,
"learning_rate": 1.9215155615696888e-05,
"loss": 1.1921,
"step": 91000
},
{
"epoch": 12.31,
"eval_accuracy": 0.7517412799431865,
"eval_loss": 1.1200112104415894,
"eval_runtime": 2.2546,
"eval_samples_per_second": 219.99,
"eval_steps_per_second": 7.096,
"step": 91000
},
{
"epoch": 12.38,
"learning_rate": 1.9046008119079838e-05,
"loss": 1.1928,
"step": 91500
},
{
"epoch": 12.38,
"eval_accuracy": 0.751185221513814,
"eval_loss": 1.1144980192184448,
"eval_runtime": 1.9441,
"eval_samples_per_second": 255.124,
"eval_steps_per_second": 8.23,
"step": 91500
},
{
"epoch": 12.45,
"learning_rate": 1.8876860622462788e-05,
"loss": 1.1904,
"step": 92000
},
{
"epoch": 12.45,
"eval_accuracy": 0.754587343566813,
"eval_loss": 1.1108394861221313,
"eval_runtime": 2.252,
"eval_samples_per_second": 220.248,
"eval_steps_per_second": 7.105,
"step": 92000
},
{
"epoch": 12.52,
"learning_rate": 1.8707713125845738e-05,
"loss": 1.1955,
"step": 92500
},
{
"epoch": 12.52,
"eval_accuracy": 0.7540812503345287,
"eval_loss": 1.106156826019287,
"eval_runtime": 2.2623,
"eval_samples_per_second": 219.243,
"eval_steps_per_second": 7.072,
"step": 92500
},
{
"epoch": 12.58,
"learning_rate": 1.8538565629228687e-05,
"loss": 1.1898,
"step": 93000
},
{
"epoch": 12.58,
"eval_accuracy": 0.7519862396592678,
"eval_loss": 1.126400351524353,
"eval_runtime": 2.1347,
"eval_samples_per_second": 232.346,
"eval_steps_per_second": 7.495,
"step": 93000
},
{
"epoch": 12.65,
"learning_rate": 1.8369418132611637e-05,
"loss": 1.1917,
"step": 93500
},
{
"epoch": 12.65,
"eval_accuracy": 0.7535633076368476,
"eval_loss": 1.112923502922058,
"eval_runtime": 2.2569,
"eval_samples_per_second": 219.772,
"eval_steps_per_second": 7.089,
"step": 93500
},
{
"epoch": 12.72,
"learning_rate": 1.8200270635994587e-05,
"loss": 1.1895,
"step": 94000
},
{
"epoch": 12.72,
"eval_accuracy": 0.7494371965607963,
"eval_loss": 1.1288461685180664,
"eval_runtime": 2.1453,
"eval_samples_per_second": 231.202,
"eval_steps_per_second": 7.458,
"step": 94000
},
{
"epoch": 12.79,
"learning_rate": 1.8031123139377537e-05,
"loss": 1.1966,
"step": 94500
},
{
"epoch": 12.79,
"eval_accuracy": 0.7474297006435763,
"eval_loss": 1.1435807943344116,
"eval_runtime": 2.2544,
"eval_samples_per_second": 220.014,
"eval_steps_per_second": 7.097,
"step": 94500
},
{
"epoch": 12.86,
"learning_rate": 1.7861975642760487e-05,
"loss": 1.1887,
"step": 95000
},
{
"epoch": 12.86,
"eval_accuracy": 0.7530491066652402,
"eval_loss": 1.1220248937606812,
"eval_runtime": 2.0377,
"eval_samples_per_second": 243.414,
"eval_steps_per_second": 7.852,
"step": 95000
},
{
"epoch": 12.92,
"learning_rate": 1.769282814614344e-05,
"loss": 1.1856,
"step": 95500
},
{
"epoch": 12.92,
"eval_accuracy": 0.7499591391991283,
"eval_loss": 1.1441563367843628,
"eval_runtime": 2.2428,
"eval_samples_per_second": 221.15,
"eval_steps_per_second": 7.134,
"step": 95500
},
{
"epoch": 12.99,
"learning_rate": 1.752368064952639e-05,
"loss": 1.1934,
"step": 96000
},
{
"epoch": 12.99,
"eval_accuracy": 0.7487198734618374,
"eval_loss": 1.134777545928955,
"eval_runtime": 2.1446,
"eval_samples_per_second": 231.279,
"eval_steps_per_second": 7.461,
"step": 96000
},
{
"epoch": 13.06,
"learning_rate": 1.7354533152909337e-05,
"loss": 1.1848,
"step": 96500
},
{
"epoch": 13.06,
"eval_accuracy": 0.7521449252264457,
"eval_loss": 1.1171698570251465,
"eval_runtime": 2.2564,
"eval_samples_per_second": 219.819,
"eval_steps_per_second": 7.091,
"step": 96500
},
{
"epoch": 13.13,
"learning_rate": 1.7185385656292287e-05,
"loss": 1.1821,
"step": 97000
},
{
"epoch": 13.13,
"eval_accuracy": 0.7566415837311541,
"eval_loss": 1.1042215824127197,
"eval_runtime": 2.1261,
"eval_samples_per_second": 233.296,
"eval_steps_per_second": 7.526,
"step": 97000
},
{
"epoch": 13.19,
"learning_rate": 1.7016238159675237e-05,
"loss": 1.1817,
"step": 97500
},
{
"epoch": 13.19,
"eval_accuracy": 0.7495432072227,
"eval_loss": 1.1272791624069214,
"eval_runtime": 2.029,
"eval_samples_per_second": 244.45,
"eval_steps_per_second": 7.885,
"step": 97500
},
{
"epoch": 13.26,
"learning_rate": 1.6847090663058187e-05,
"loss": 1.1773,
"step": 98000
},
{
"epoch": 13.26,
"eval_accuracy": 0.7539743031358885,
"eval_loss": 1.0957542657852173,
"eval_runtime": 2.193,
"eval_samples_per_second": 226.174,
"eval_steps_per_second": 7.296,
"step": 98000
},
{
"epoch": 13.33,
"learning_rate": 1.6677943166441137e-05,
"loss": 1.1774,
"step": 98500
},
{
"epoch": 13.33,
"eval_accuracy": 0.7510550791645386,
"eval_loss": 1.1139615774154663,
"eval_runtime": 2.0354,
"eval_samples_per_second": 243.69,
"eval_steps_per_second": 7.861,
"step": 98500
},
{
"epoch": 13.4,
"learning_rate": 1.6508795669824086e-05,
"loss": 1.1841,
"step": 99000
},
{
"epoch": 13.4,
"eval_accuracy": 0.7535410764872521,
"eval_loss": 1.1085665225982666,
"eval_runtime": 2.2775,
"eval_samples_per_second": 217.784,
"eval_steps_per_second": 7.025,
"step": 99000
},
{
"epoch": 13.46,
"learning_rate": 1.6339648173207036e-05,
"loss": 1.1825,
"step": 99500
},
{
"epoch": 13.46,
"eval_accuracy": 0.7575840393550151,
"eval_loss": 1.0903350114822388,
"eval_runtime": 2.1352,
"eval_samples_per_second": 232.293,
"eval_steps_per_second": 7.493,
"step": 99500
},
{
"epoch": 13.53,
"learning_rate": 1.6170500676589986e-05,
"loss": 1.1845,
"step": 100000
},
{
"epoch": 13.53,
"eval_accuracy": 0.7486053092575125,
"eval_loss": 1.129094123840332,
"eval_runtime": 2.2564,
"eval_samples_per_second": 219.823,
"eval_steps_per_second": 7.091,
"step": 100000
},
{
"epoch": 13.6,
"learning_rate": 1.6001353179972936e-05,
"loss": 1.1853,
"step": 100500
},
{
"epoch": 13.6,
"eval_accuracy": 0.7485774103500107,
"eval_loss": 1.1317797899246216,
"eval_runtime": 2.2139,
"eval_samples_per_second": 224.036,
"eval_steps_per_second": 7.227,
"step": 100500
},
{
"epoch": 13.67,
"learning_rate": 1.5832205683355886e-05,
"loss": 1.1761,
"step": 101000
},
{
"epoch": 13.67,
"eval_accuracy": 0.7552630190471166,
"eval_loss": 1.1218476295471191,
"eval_runtime": 2.2116,
"eval_samples_per_second": 224.271,
"eval_steps_per_second": 7.235,
"step": 101000
},
{
"epoch": 13.73,
"learning_rate": 1.566305818673884e-05,
"loss": 1.1825,
"step": 101500
},
{
"epoch": 13.73,
"eval_accuracy": 0.7484677617063006,
"eval_loss": 1.130650520324707,
"eval_runtime": 2.1348,
"eval_samples_per_second": 232.339,
"eval_steps_per_second": 7.495,
"step": 101500
},
{
"epoch": 13.8,
"learning_rate": 1.549391069012179e-05,
"loss": 1.1849,
"step": 102000
},
{
"epoch": 13.8,
"eval_accuracy": 0.7503921250473254,
"eval_loss": 1.1273096799850464,
"eval_runtime": 2.1346,
"eval_samples_per_second": 232.367,
"eval_steps_per_second": 7.496,
"step": 102000
},
{
"epoch": 13.87,
"learning_rate": 1.5324763193504736e-05,
"loss": 1.1792,
"step": 102500
},
{
"epoch": 13.87,
"eval_accuracy": 0.7496725963112518,
"eval_loss": 1.1290724277496338,
"eval_runtime": 2.0388,
"eval_samples_per_second": 243.277,
"eval_steps_per_second": 7.848,
"step": 102500
},
{
"epoch": 13.94,
"learning_rate": 1.5155615696887688e-05,
"loss": 1.1852,
"step": 103000
},
{
"epoch": 13.94,
"eval_accuracy": 0.7521213264014223,
"eval_loss": 1.1133606433868408,
"eval_runtime": 2.1407,
"eval_samples_per_second": 231.702,
"eval_steps_per_second": 7.474,
"step": 103000
},
{
"epoch": 14.01,
"learning_rate": 1.4986468200270637e-05,
"loss": 1.1745,
"step": 103500
},
{
"epoch": 14.01,
"eval_accuracy": 0.7510633656887338,
"eval_loss": 1.1251685619354248,
"eval_runtime": 2.2562,
"eval_samples_per_second": 219.835,
"eval_steps_per_second": 7.091,
"step": 103500
},
{
"epoch": 14.07,
"learning_rate": 1.4817320703653587e-05,
"loss": 1.1746,
"step": 104000
},
{
"epoch": 14.07,
"eval_accuracy": 0.7508518468038707,
"eval_loss": 1.114823579788208,
"eval_runtime": 2.1373,
"eval_samples_per_second": 232.068,
"eval_steps_per_second": 7.486,
"step": 104000
},
{
"epoch": 14.14,
"learning_rate": 1.4648173207036536e-05,
"loss": 1.1765,
"step": 104500
},
{
"epoch": 14.14,
"eval_accuracy": 0.7499047204224969,
"eval_loss": 1.120153784751892,
"eval_runtime": 1.9047,
"eval_samples_per_second": 260.406,
"eval_steps_per_second": 8.4,
"step": 104500
},
{
"epoch": 14.21,
"learning_rate": 1.4479025710419486e-05,
"loss": 1.1762,
"step": 105000
},
{
"epoch": 14.21,
"eval_accuracy": 0.7526651867686152,
"eval_loss": 1.11342453956604,
"eval_runtime": 2.1108,
"eval_samples_per_second": 234.982,
"eval_steps_per_second": 7.58,
"step": 105000
},
{
"epoch": 14.28,
"learning_rate": 1.4309878213802435e-05,
"loss": 1.1752,
"step": 105500
},
{
"epoch": 14.28,
"eval_accuracy": 0.7550594107753242,
"eval_loss": 1.1170574426651,
"eval_runtime": 2.0286,
"eval_samples_per_second": 244.505,
"eval_steps_per_second": 7.887,
"step": 105500
},
{
"epoch": 14.34,
"learning_rate": 1.4140730717185385e-05,
"loss": 1.176,
"step": 106000
},
{
"epoch": 14.34,
"eval_accuracy": 0.7526875882289065,
"eval_loss": 1.1155229806900024,
"eval_runtime": 2.2532,
"eval_samples_per_second": 220.127,
"eval_steps_per_second": 7.101,
"step": 106000
},
{
"epoch": 14.41,
"learning_rate": 1.3971583220568335e-05,
"loss": 1.1732,
"step": 106500
},
{
"epoch": 14.41,
"eval_accuracy": 0.7481485413956945,
"eval_loss": 1.133280873298645,
"eval_runtime": 2.1464,
"eval_samples_per_second": 231.081,
"eval_steps_per_second": 7.454,
"step": 106500
},
{
"epoch": 14.48,
"learning_rate": 1.3802435723951287e-05,
"loss": 1.1753,
"step": 107000
},
{
"epoch": 14.48,
"eval_accuracy": 0.7574028502663674,
"eval_loss": 1.0981875658035278,
"eval_runtime": 2.0246,
"eval_samples_per_second": 244.982,
"eval_steps_per_second": 7.903,
"step": 107000
},
{
"epoch": 14.55,
"learning_rate": 1.3633288227334237e-05,
"loss": 1.1713,
"step": 107500
},
{
"epoch": 14.55,
"eval_accuracy": 0.749116988864623,
"eval_loss": 1.1342977285385132,
"eval_runtime": 2.1397,
"eval_samples_per_second": 231.806,
"eval_steps_per_second": 7.478,
"step": 107500
},
{
"epoch": 14.61,
"learning_rate": 1.3464140730717187e-05,
"loss": 1.1692,
"step": 108000
},
{
"epoch": 14.61,
"eval_accuracy": 0.7548563905532121,
"eval_loss": 1.1020859479904175,
"eval_runtime": 2.038,
"eval_samples_per_second": 243.371,
"eval_steps_per_second": 7.851,
"step": 108000
},
{
"epoch": 14.68,
"learning_rate": 1.3294993234100137e-05,
"loss": 1.17,
"step": 108500
},
{
"epoch": 14.68,
"eval_accuracy": 0.7503776331328444,
"eval_loss": 1.110732078552246,
"eval_runtime": 2.2439,
"eval_samples_per_second": 221.039,
"eval_steps_per_second": 7.13,
"step": 108500
},
{
"epoch": 14.75,
"learning_rate": 1.3125845737483087e-05,
"loss": 1.1699,
"step": 109000
},
{
"epoch": 14.75,
"eval_accuracy": 0.7505413012882971,
"eval_loss": 1.1227320432662964,
"eval_runtime": 2.1354,
"eval_samples_per_second": 232.28,
"eval_steps_per_second": 7.493,
"step": 109000
},
{
"epoch": 14.82,
"learning_rate": 1.2956698240866036e-05,
"loss": 1.1763,
"step": 109500
},
{
"epoch": 14.82,
"eval_accuracy": 0.7523848348960457,
"eval_loss": 1.1152479648590088,
"eval_runtime": 2.2475,
"eval_samples_per_second": 220.693,
"eval_steps_per_second": 7.119,
"step": 109500
},
{
"epoch": 14.88,
"learning_rate": 1.2787550744248986e-05,
"loss": 1.1729,
"step": 110000
},
{
"epoch": 14.88,
"eval_accuracy": 0.7563491422261722,
"eval_loss": 1.0939308404922485,
"eval_runtime": 2.0291,
"eval_samples_per_second": 244.44,
"eval_steps_per_second": 7.885,
"step": 110000
},
{
"epoch": 14.95,
"learning_rate": 1.2618403247631935e-05,
"loss": 1.1731,
"step": 110500
},
{
"epoch": 14.95,
"eval_accuracy": 0.7446182644738601,
"eval_loss": 1.153084397315979,
"eval_runtime": 2.1368,
"eval_samples_per_second": 232.12,
"eval_steps_per_second": 7.488,
"step": 110500
},
{
"epoch": 15.02,
"learning_rate": 1.2449255751014885e-05,
"loss": 1.1744,
"step": 111000
},
{
"epoch": 15.02,
"eval_accuracy": 0.748938913662494,
"eval_loss": 1.1451458930969238,
"eval_runtime": 2.2627,
"eval_samples_per_second": 219.205,
"eval_steps_per_second": 7.071,
"step": 111000
},
{
"epoch": 15.09,
"learning_rate": 1.2280108254397836e-05,
"loss": 1.169,
"step": 111500
},
{
"epoch": 15.09,
"eval_accuracy": 0.7527127355796688,
"eval_loss": 1.1211124658584595,
"eval_runtime": 2.254,
"eval_samples_per_second": 220.058,
"eval_steps_per_second": 7.099,
"step": 111500
},
{
"epoch": 15.16,
"learning_rate": 1.2110960757780786e-05,
"loss": 1.1644,
"step": 112000
},
{
"epoch": 15.16,
"eval_accuracy": 0.7553240179845462,
"eval_loss": 1.1134895086288452,
"eval_runtime": 2.1385,
"eval_samples_per_second": 231.943,
"eval_steps_per_second": 7.482,
"step": 112000
},
{
"epoch": 15.22,
"learning_rate": 1.1941813261163736e-05,
"loss": 1.1726,
"step": 112500
},
{
"epoch": 15.22,
"eval_accuracy": 0.7551064057320073,
"eval_loss": 1.0903879404067993,
"eval_runtime": 2.141,
"eval_samples_per_second": 231.664,
"eval_steps_per_second": 7.473,
"step": 112500
},
{
"epoch": 15.29,
"learning_rate": 1.1772665764546684e-05,
"loss": 1.1653,
"step": 113000
},
{
"epoch": 15.29,
"eval_accuracy": 0.7585871152701898,
"eval_loss": 1.0806618928909302,
"eval_runtime": 2.2591,
"eval_samples_per_second": 219.56,
"eval_steps_per_second": 7.083,
"step": 113000
},
{
"epoch": 15.36,
"learning_rate": 1.1603518267929634e-05,
"loss": 1.1651,
"step": 113500
},
{
"epoch": 15.36,
"eval_accuracy": 0.7487410264652309,
"eval_loss": 1.1385972499847412,
"eval_runtime": 2.0137,
"eval_samples_per_second": 246.318,
"eval_steps_per_second": 7.946,
"step": 113500
},
{
"epoch": 15.43,
"learning_rate": 1.1434370771312584e-05,
"loss": 1.1663,
"step": 114000
},
{
"epoch": 15.43,
"eval_accuracy": 0.7531269501044577,
"eval_loss": 1.1114603281021118,
"eval_runtime": 2.038,
"eval_samples_per_second": 243.378,
"eval_steps_per_second": 7.851,
"step": 114000
},
{
"epoch": 15.49,
"learning_rate": 1.1265223274695536e-05,
"loss": 1.1635,
"step": 114500
},
{
"epoch": 15.49,
"eval_accuracy": 0.7503972421965474,
"eval_loss": 1.1271893978118896,
"eval_runtime": 2.2587,
"eval_samples_per_second": 219.592,
"eval_steps_per_second": 7.084,
"step": 114500
},
{
"epoch": 15.56,
"learning_rate": 1.1096075778078486e-05,
"loss": 1.1646,
"step": 115000
},
{
"epoch": 15.56,
"eval_accuracy": 0.7541348344725908,
"eval_loss": 1.0982328653335571,
"eval_runtime": 2.0235,
"eval_samples_per_second": 245.115,
"eval_steps_per_second": 7.907,
"step": 115000
},
{
"epoch": 15.63,
"learning_rate": 1.0926928281461436e-05,
"loss": 1.1639,
"step": 115500
},
{
"epoch": 15.63,
"eval_accuracy": 0.7544710600476913,
"eval_loss": 1.1104248762130737,
"eval_runtime": 2.0141,
"eval_samples_per_second": 246.267,
"eval_steps_per_second": 7.944,
"step": 115500
},
{
"epoch": 15.7,
"learning_rate": 1.0757780784844384e-05,
"loss": 1.1598,
"step": 116000
},
{
"epoch": 15.7,
"eval_accuracy": 0.7492906747372119,
"eval_loss": 1.1334669589996338,
"eval_runtime": 2.1233,
"eval_samples_per_second": 233.593,
"eval_steps_per_second": 7.535,
"step": 116000
},
{
"epoch": 15.76,
"learning_rate": 1.0588633288227334e-05,
"loss": 1.1612,
"step": 116500
},
{
"epoch": 15.76,
"eval_accuracy": 0.7535777086433112,
"eval_loss": 1.1088109016418457,
"eval_runtime": 2.1482,
"eval_samples_per_second": 230.894,
"eval_steps_per_second": 7.448,
"step": 116500
},
{
"epoch": 15.83,
"learning_rate": 1.0419485791610285e-05,
"loss": 1.159,
"step": 117000
},
{
"epoch": 15.83,
"eval_accuracy": 0.755389401298914,
"eval_loss": 1.0895658731460571,
"eval_runtime": 1.8998,
"eval_samples_per_second": 261.073,
"eval_steps_per_second": 8.422,
"step": 117000
},
{
"epoch": 15.9,
"learning_rate": 1.0250338294993235e-05,
"loss": 1.1686,
"step": 117500
},
{
"epoch": 15.9,
"eval_accuracy": 0.7521880806829505,
"eval_loss": 1.1212115287780762,
"eval_runtime": 2.1408,
"eval_samples_per_second": 231.688,
"eval_steps_per_second": 7.474,
"step": 117500
},
{
"epoch": 15.97,
"learning_rate": 1.0081190798376185e-05,
"loss": 1.158,
"step": 118000
},
{
"epoch": 15.97,
"eval_accuracy": 0.7528032891926527,
"eval_loss": 1.1104135513305664,
"eval_runtime": 2.2497,
"eval_samples_per_second": 220.471,
"eval_steps_per_second": 7.112,
"step": 118000
},
{
"epoch": 16.04,
"learning_rate": 9.912043301759135e-06,
"loss": 1.1633,
"step": 118500
},
{
"epoch": 16.04,
"eval_accuracy": 0.7537655533726261,
"eval_loss": 1.097953200340271,
"eval_runtime": 2.0401,
"eval_samples_per_second": 243.13,
"eval_steps_per_second": 7.843,
"step": 118500
},
{
"epoch": 16.1,
"learning_rate": 9.742895805142083e-06,
"loss": 1.1622,
"step": 119000
},
{
"epoch": 16.1,
"eval_accuracy": 0.750889583782618,
"eval_loss": 1.1274609565734863,
"eval_runtime": 2.0243,
"eval_samples_per_second": 245.026,
"eval_steps_per_second": 7.904,
"step": 119000
},
{
"epoch": 16.17,
"learning_rate": 9.573748308525033e-06,
"loss": 1.1625,
"step": 119500
},
{
"epoch": 16.17,
"eval_accuracy": 0.754607674067687,
"eval_loss": 1.1065136194229126,
"eval_runtime": 1.9032,
"eval_samples_per_second": 260.616,
"eval_steps_per_second": 8.407,
"step": 119500
},
{
"epoch": 16.24,
"learning_rate": 9.404600811907985e-06,
"loss": 1.1582,
"step": 120000
},
{
"epoch": 16.24,
"eval_accuracy": 0.7515266766659524,
"eval_loss": 1.1181069612503052,
"eval_runtime": 2.1333,
"eval_samples_per_second": 232.507,
"eval_steps_per_second": 7.5,
"step": 120000
},
{
"epoch": 16.31,
"learning_rate": 9.235453315290935e-06,
"loss": 1.1568,
"step": 120500
},
{
"epoch": 16.31,
"eval_accuracy": 0.7558363160425237,
"eval_loss": 1.1019920110702515,
"eval_runtime": 2.1312,
"eval_samples_per_second": 232.728,
"eval_steps_per_second": 7.507,
"step": 120500
},
{
"epoch": 16.37,
"learning_rate": 9.066305818673885e-06,
"loss": 1.1573,
"step": 121000
},
{
"epoch": 16.37,
"eval_accuracy": 0.7532534995625547,
"eval_loss": 1.115644097328186,
"eval_runtime": 2.1433,
"eval_samples_per_second": 231.416,
"eval_steps_per_second": 7.465,
"step": 121000
},
{
"epoch": 16.44,
"learning_rate": 8.897158322056835e-06,
"loss": 1.1549,
"step": 121500
},
{
"epoch": 16.44,
"eval_accuracy": 0.7508123310487945,
"eval_loss": 1.1205765008926392,
"eval_runtime": 2.2601,
"eval_samples_per_second": 219.456,
"eval_steps_per_second": 7.079,
"step": 121500
},
{
"epoch": 16.51,
"learning_rate": 8.728010825439783e-06,
"loss": 1.1592,
"step": 122000
},
{
"epoch": 16.51,
"eval_accuracy": 0.7542723559759243,
"eval_loss": 1.0985246896743774,
"eval_runtime": 2.2649,
"eval_samples_per_second": 218.998,
"eval_steps_per_second": 7.064,
"step": 122000
},
{
"epoch": 16.58,
"learning_rate": 8.558863328822733e-06,
"loss": 1.1584,
"step": 122500
},
{
"epoch": 16.58,
"eval_accuracy": 0.7531888104231674,
"eval_loss": 1.1170583963394165,
"eval_runtime": 2.028,
"eval_samples_per_second": 244.575,
"eval_steps_per_second": 7.89,
"step": 122500
},
{
"epoch": 16.64,
"learning_rate": 8.389715832205684e-06,
"loss": 1.1589,
"step": 123000
},
{
"epoch": 16.64,
"eval_accuracy": 0.7611846765843823,
"eval_loss": 1.0686120986938477,
"eval_runtime": 2.0269,
"eval_samples_per_second": 244.714,
"eval_steps_per_second": 7.894,
"step": 123000
},
{
"epoch": 16.71,
"learning_rate": 8.220568335588634e-06,
"loss": 1.1566,
"step": 123500
},
{
"epoch": 16.71,
"eval_accuracy": 0.7563581433672069,
"eval_loss": 1.094774603843689,
"eval_runtime": 2.1272,
"eval_samples_per_second": 233.175,
"eval_steps_per_second": 7.522,
"step": 123500
},
{
"epoch": 16.78,
"learning_rate": 8.051420838971584e-06,
"loss": 1.157,
"step": 124000
},
{
"epoch": 16.78,
"eval_accuracy": 0.7568443220476267,
"eval_loss": 1.0895816087722778,
"eval_runtime": 1.8979,
"eval_samples_per_second": 261.337,
"eval_steps_per_second": 8.43,
"step": 124000
},
{
"epoch": 16.85,
"learning_rate": 7.882273342354534e-06,
"loss": 1.1598,
"step": 124500
},
{
"epoch": 16.85,
"eval_accuracy": 0.7582212358242888,
"eval_loss": 1.086458683013916,
"eval_runtime": 2.2441,
"eval_samples_per_second": 221.026,
"eval_steps_per_second": 7.13,
"step": 124500
},
{
"epoch": 16.91,
"learning_rate": 7.713125845737482e-06,
"loss": 1.1567,
"step": 125000
},
{
"epoch": 16.91,
"eval_accuracy": 0.7565769744554401,
"eval_loss": 1.1091084480285645,
"eval_runtime": 2.2461,
"eval_samples_per_second": 220.83,
"eval_steps_per_second": 7.124,
"step": 125000
},
{
"epoch": 16.98,
"learning_rate": 7.543978349120433e-06,
"loss": 1.1643,
"step": 125500
},
{
"epoch": 16.98,
"eval_accuracy": 0.7521943363306939,
"eval_loss": 1.1232304573059082,
"eval_runtime": 1.9345,
"eval_samples_per_second": 256.4,
"eval_steps_per_second": 8.271,
"step": 125500
},
{
"epoch": 17.05,
"learning_rate": 7.374830852503384e-06,
"loss": 1.1536,
"step": 126000
},
{
"epoch": 17.05,
"eval_accuracy": 0.7583081570996979,
"eval_loss": 1.0930777788162231,
"eval_runtime": 2.1324,
"eval_samples_per_second": 232.599,
"eval_steps_per_second": 7.503,
"step": 126000
},
{
"epoch": 17.12,
"learning_rate": 7.205683355886334e-06,
"loss": 1.1486,
"step": 126500
},
{
"epoch": 17.12,
"eval_accuracy": 0.7540195062318956,
"eval_loss": 1.1099752187728882,
"eval_runtime": 1.917,
"eval_samples_per_second": 258.744,
"eval_steps_per_second": 8.347,
"step": 126500
},
{
"epoch": 17.19,
"learning_rate": 7.036535859269283e-06,
"loss": 1.1551,
"step": 127000
},
{
"epoch": 17.19,
"eval_accuracy": 0.7537926501999014,
"eval_loss": 1.1018755435943604,
"eval_runtime": 2.2805,
"eval_samples_per_second": 217.494,
"eval_steps_per_second": 7.016,
"step": 127000
},
{
"epoch": 17.25,
"learning_rate": 6.867388362652233e-06,
"loss": 1.1491,
"step": 127500
},
{
"epoch": 17.25,
"eval_accuracy": 0.7546221700303138,
"eval_loss": 1.096489667892456,
"eval_runtime": 2.0207,
"eval_samples_per_second": 245.455,
"eval_steps_per_second": 7.918,
"step": 127500
},
{
"epoch": 17.32,
"learning_rate": 6.698240866035183e-06,
"loss": 1.152,
"step": 128000
},
{
"epoch": 17.32,
"eval_accuracy": 0.7590838783208054,
"eval_loss": 1.0724998712539673,
"eval_runtime": 2.2748,
"eval_samples_per_second": 218.043,
"eval_steps_per_second": 7.034,
"step": 128000
},
{
"epoch": 17.39,
"learning_rate": 6.5290933694181334e-06,
"loss": 1.1521,
"step": 128500
},
{
"epoch": 17.39,
"eval_accuracy": 0.7526631431935811,
"eval_loss": 1.1246150732040405,
"eval_runtime": 2.2564,
"eval_samples_per_second": 219.819,
"eval_steps_per_second": 7.091,
"step": 128500
},
{
"epoch": 17.46,
"learning_rate": 6.359945872801083e-06,
"loss": 1.1518,
"step": 129000
},
{
"epoch": 17.46,
"eval_accuracy": 0.7570206230802984,
"eval_loss": 1.1025118827819824,
"eval_runtime": 2.2644,
"eval_samples_per_second": 219.041,
"eval_steps_per_second": 7.066,
"step": 129000
},
{
"epoch": 17.52,
"learning_rate": 6.190798376184033e-06,
"loss": 1.1525,
"step": 129500
},
{
"epoch": 17.52,
"eval_accuracy": 0.7553470100392842,
"eval_loss": 1.1027612686157227,
"eval_runtime": 2.2829,
"eval_samples_per_second": 217.27,
"eval_steps_per_second": 7.009,
"step": 129500
},
{
"epoch": 17.59,
"learning_rate": 6.021650879566982e-06,
"loss": 1.1509,
"step": 130000
},
{
"epoch": 17.59,
"eval_accuracy": 0.753968902322795,
"eval_loss": 1.1140735149383545,
"eval_runtime": 2.0112,
"eval_samples_per_second": 246.621,
"eval_steps_per_second": 7.956,
"step": 130000
},
{
"epoch": 17.66,
"learning_rate": 5.852503382949932e-06,
"loss": 1.1522,
"step": 130500
},
{
"epoch": 17.66,
"eval_accuracy": 0.7523416805483493,
"eval_loss": 1.1235767602920532,
"eval_runtime": 2.0162,
"eval_samples_per_second": 246.006,
"eval_steps_per_second": 7.936,
"step": 130500
},
{
"epoch": 17.73,
"learning_rate": 5.683355886332883e-06,
"loss": 1.1488,
"step": 131000
},
{
"epoch": 17.73,
"eval_accuracy": 0.7589817903428665,
"eval_loss": 1.0937731266021729,
"eval_runtime": 2.1303,
"eval_samples_per_second": 232.835,
"eval_steps_per_second": 7.511,
"step": 131000
},
{
"epoch": 17.79,
"learning_rate": 5.514208389715832e-06,
"loss": 1.1477,
"step": 131500
},
{
"epoch": 17.79,
"eval_accuracy": 0.7519756032882524,
"eval_loss": 1.1069520711898804,
"eval_runtime": 2.1341,
"eval_samples_per_second": 232.421,
"eval_steps_per_second": 7.497,
"step": 131500
},
{
"epoch": 17.86,
"learning_rate": 5.345060893098782e-06,
"loss": 1.1498,
"step": 132000
},
{
"epoch": 17.86,
"eval_accuracy": 0.7560714094247574,
"eval_loss": 1.0885875225067139,
"eval_runtime": 2.2647,
"eval_samples_per_second": 219.014,
"eval_steps_per_second": 7.065,
"step": 132000
},
{
"epoch": 17.93,
"learning_rate": 5.175913396481733e-06,
"loss": 1.1489,
"step": 132500
},
{
"epoch": 17.93,
"eval_accuracy": 0.75788641382883,
"eval_loss": 1.0874009132385254,
"eval_runtime": 2.1397,
"eval_samples_per_second": 231.808,
"eval_steps_per_second": 7.478,
"step": 132500
},
{
"epoch": 18.0,
"learning_rate": 5.006765899864682e-06,
"loss": 1.1462,
"step": 133000
},
{
"epoch": 18.0,
"eval_accuracy": 0.7556547699093623,
"eval_loss": 1.1015816926956177,
"eval_runtime": 2.2706,
"eval_samples_per_second": 218.448,
"eval_steps_per_second": 7.047,
"step": 133000
},
{
"epoch": 18.06,
"learning_rate": 4.837618403247632e-06,
"loss": 1.1448,
"step": 133500
},
{
"epoch": 18.06,
"eval_accuracy": 0.7546062508530094,
"eval_loss": 1.0937751531600952,
"eval_runtime": 2.022,
"eval_samples_per_second": 245.305,
"eval_steps_per_second": 7.913,
"step": 133500
},
{
"epoch": 18.13,
"learning_rate": 4.6684709066305826e-06,
"loss": 1.1425,
"step": 134000
},
{
"epoch": 18.13,
"eval_accuracy": 0.7552112751822265,
"eval_loss": 1.0958871841430664,
"eval_runtime": 2.2598,
"eval_samples_per_second": 219.486,
"eval_steps_per_second": 7.08,
"step": 134000
},
{
"epoch": 18.2,
"learning_rate": 4.499323410013532e-06,
"loss": 1.1414,
"step": 134500
},
{
"epoch": 18.2,
"eval_accuracy": 0.7558802565930149,
"eval_loss": 1.0867284536361694,
"eval_runtime": 2.029,
"eval_samples_per_second": 244.456,
"eval_steps_per_second": 7.886,
"step": 134500
},
{
"epoch": 18.27,
"learning_rate": 4.330175913396482e-06,
"loss": 1.1453,
"step": 135000
},
{
"epoch": 18.27,
"eval_accuracy": 0.7591597591597592,
"eval_loss": 1.0756407976150513,
"eval_runtime": 2.1403,
"eval_samples_per_second": 231.744,
"eval_steps_per_second": 7.476,
"step": 135000
},
{
"epoch": 18.34,
"learning_rate": 4.161028416779432e-06,
"loss": 1.1448,
"step": 135500
},
{
"epoch": 18.34,
"eval_accuracy": 0.7545405695862439,
"eval_loss": 1.0937347412109375,
"eval_runtime": 2.2479,
"eval_samples_per_second": 220.651,
"eval_steps_per_second": 7.118,
"step": 135500
},
{
"epoch": 18.4,
"learning_rate": 3.991880920162381e-06,
"loss": 1.1471,
"step": 136000
},
{
"epoch": 18.4,
"eval_accuracy": 0.7537506745817593,
"eval_loss": 1.1153604984283447,
"eval_runtime": 2.2669,
"eval_samples_per_second": 218.8,
"eval_steps_per_second": 7.058,
"step": 136000
},
{
"epoch": 18.47,
"learning_rate": 3.822733423545332e-06,
"loss": 1.1484,
"step": 136500
},
{
"epoch": 18.47,
"eval_accuracy": 0.7537701926689208,
"eval_loss": 1.1114356517791748,
"eval_runtime": 2.0201,
"eval_samples_per_second": 245.53,
"eval_steps_per_second": 7.92,
"step": 136500
},
{
"epoch": 18.54,
"learning_rate": 3.6535859269282817e-06,
"loss": 1.1463,
"step": 137000
},
{
"epoch": 18.54,
"eval_accuracy": 0.7513940144923632,
"eval_loss": 1.1001887321472168,
"eval_runtime": 2.1485,
"eval_samples_per_second": 230.858,
"eval_steps_per_second": 7.447,
"step": 137000
},
{
"epoch": 18.61,
"learning_rate": 3.4844384303112316e-06,
"loss": 1.1512,
"step": 137500
},
{
"epoch": 18.61,
"eval_accuracy": 0.7586606950140298,
"eval_loss": 1.0663777589797974,
"eval_runtime": 2.1796,
"eval_samples_per_second": 227.562,
"eval_steps_per_second": 7.341,
"step": 137500
},
{
"epoch": 18.67,
"learning_rate": 3.315290933694182e-06,
"loss": 1.1464,
"step": 138000
},
{
"epoch": 18.67,
"eval_accuracy": 0.7583911006384086,
"eval_loss": 1.0735660791397095,
"eval_runtime": 2.251,
"eval_samples_per_second": 220.348,
"eval_steps_per_second": 7.108,
"step": 138000
},
{
"epoch": 18.74,
"learning_rate": 3.1461434370771314e-06,
"loss": 1.1457,
"step": 138500
},
{
"epoch": 18.74,
"eval_accuracy": 0.7604149648750205,
"eval_loss": 1.080166220664978,
"eval_runtime": 2.1301,
"eval_samples_per_second": 232.857,
"eval_steps_per_second": 7.512,
"step": 138500
},
{
"epoch": 18.81,
"learning_rate": 2.9769959404600813e-06,
"loss": 1.1464,
"step": 139000
},
{
"epoch": 18.81,
"eval_accuracy": 0.75420555676145,
"eval_loss": 1.1091315746307373,
"eval_runtime": 2.1281,
"eval_samples_per_second": 233.067,
"eval_steps_per_second": 7.518,
"step": 139000
},
{
"epoch": 18.88,
"learning_rate": 2.8078484438430312e-06,
"loss": 1.1415,
"step": 139500
},
{
"epoch": 18.88,
"eval_accuracy": 0.7594658329138073,
"eval_loss": 1.0856248140335083,
"eval_runtime": 2.2679,
"eval_samples_per_second": 218.701,
"eval_steps_per_second": 7.055,
"step": 139500
},
{
"epoch": 18.94,
"learning_rate": 2.638700947225981e-06,
"loss": 1.149,
"step": 140000
},
{
"epoch": 18.94,
"eval_accuracy": 0.7557433607017732,
"eval_loss": 1.0958749055862427,
"eval_runtime": 2.1355,
"eval_samples_per_second": 232.263,
"eval_steps_per_second": 7.492,
"step": 140000
},
{
"epoch": 19.01,
"learning_rate": 2.469553450608931e-06,
"loss": 1.1445,
"step": 140500
},
{
"epoch": 19.01,
"eval_accuracy": 0.7600160578081092,
"eval_loss": 1.0713545083999634,
"eval_runtime": 2.0458,
"eval_samples_per_second": 242.449,
"eval_steps_per_second": 7.821,
"step": 140500
},
{
"epoch": 19.08,
"learning_rate": 2.300405953991881e-06,
"loss": 1.1378,
"step": 141000
},
{
"epoch": 19.08,
"eval_accuracy": 0.7528535980148884,
"eval_loss": 1.1179081201553345,
"eval_runtime": 2.239,
"eval_samples_per_second": 221.527,
"eval_steps_per_second": 7.146,
"step": 141000
},
{
"epoch": 19.15,
"learning_rate": 2.131258457374831e-06,
"loss": 1.143,
"step": 141500
},
{
"epoch": 19.15,
"eval_accuracy": 0.7608561044555122,
"eval_loss": 1.085029125213623,
"eval_runtime": 2.2698,
"eval_samples_per_second": 218.525,
"eval_steps_per_second": 7.049,
"step": 141500
},
{
"epoch": 19.22,
"learning_rate": 1.962110960757781e-06,
"loss": 1.1412,
"step": 142000
},
{
"epoch": 19.22,
"eval_accuracy": 0.7571760842796552,
"eval_loss": 1.1089389324188232,
"eval_runtime": 2.2591,
"eval_samples_per_second": 219.56,
"eval_steps_per_second": 7.083,
"step": 142000
},
{
"epoch": 19.28,
"learning_rate": 1.7929634641407306e-06,
"loss": 1.1393,
"step": 142500
},
{
"epoch": 19.28,
"eval_accuracy": 0.7580414678206476,
"eval_loss": 1.095458984375,
"eval_runtime": 1.8948,
"eval_samples_per_second": 261.767,
"eval_steps_per_second": 8.444,
"step": 142500
},
{
"epoch": 19.35,
"learning_rate": 1.6238159675236807e-06,
"loss": 1.1492,
"step": 143000
},
{
"epoch": 19.35,
"eval_accuracy": 0.755947708880288,
"eval_loss": 1.0982964038848877,
"eval_runtime": 2.019,
"eval_samples_per_second": 245.66,
"eval_steps_per_second": 7.925,
"step": 143000
},
{
"epoch": 19.42,
"learning_rate": 1.4546684709066306e-06,
"loss": 1.1455,
"step": 143500
},
{
"epoch": 19.42,
"eval_accuracy": 0.7540966020328801,
"eval_loss": 1.12480628490448,
"eval_runtime": 1.9105,
"eval_samples_per_second": 259.614,
"eval_steps_per_second": 8.375,
"step": 143500
},
{
"epoch": 19.49,
"learning_rate": 1.2855209742895805e-06,
"loss": 1.1442,
"step": 144000
},
{
"epoch": 19.49,
"eval_accuracy": 0.7567218409366169,
"eval_loss": 1.1033666133880615,
"eval_runtime": 2.1366,
"eval_samples_per_second": 232.142,
"eval_steps_per_second": 7.488,
"step": 144000
},
{
"epoch": 19.55,
"learning_rate": 1.1163734776725304e-06,
"loss": 1.1385,
"step": 144500
},
{
"epoch": 19.55,
"eval_accuracy": 0.7598665473187404,
"eval_loss": 1.0718320608139038,
"eval_runtime": 2.1421,
"eval_samples_per_second": 231.551,
"eval_steps_per_second": 7.469,
"step": 144500
},
{
"epoch": 19.62,
"learning_rate": 9.472259810554805e-07,
"loss": 1.1393,
"step": 145000
},
{
"epoch": 19.62,
"eval_accuracy": 0.7511771590321439,
"eval_loss": 1.1188093423843384,
"eval_runtime": 1.937,
"eval_samples_per_second": 256.065,
"eval_steps_per_second": 8.26,
"step": 145000
},
{
"epoch": 19.69,
"learning_rate": 7.780784844384303e-07,
"loss": 1.1408,
"step": 145500
},
{
"epoch": 19.69,
"eval_accuracy": 0.7571148718506829,
"eval_loss": 1.096737027168274,
"eval_runtime": 2.128,
"eval_samples_per_second": 233.085,
"eval_steps_per_second": 7.519,
"step": 145500
},
{
"epoch": 19.76,
"learning_rate": 6.089309878213802e-07,
"loss": 1.1443,
"step": 146000
},
{
"epoch": 19.76,
"eval_accuracy": 0.7525236340330075,
"eval_loss": 1.115225911140442,
"eval_runtime": 2.0196,
"eval_samples_per_second": 245.588,
"eval_steps_per_second": 7.922,
"step": 146000
},
{
"epoch": 19.82,
"learning_rate": 4.397834912043302e-07,
"loss": 1.1495,
"step": 146500
},
{
"epoch": 19.82,
"eval_accuracy": 0.7534898820473974,
"eval_loss": 1.1063731908798218,
"eval_runtime": 2.0319,
"eval_samples_per_second": 244.111,
"eval_steps_per_second": 7.875,
"step": 146500
},
{
"epoch": 19.89,
"learning_rate": 2.7063599458728015e-07,
"loss": 1.1397,
"step": 147000
},
{
"epoch": 19.89,
"eval_accuracy": 0.7602626366768863,
"eval_loss": 1.0799843072891235,
"eval_runtime": 2.3196,
"eval_samples_per_second": 213.833,
"eval_steps_per_second": 6.898,
"step": 147000
},
{
"epoch": 19.96,
"learning_rate": 1.0148849797023004e-07,
"loss": 1.1399,
"step": 147500
},
{
"epoch": 19.96,
"eval_accuracy": 0.7566619534479008,
"eval_loss": 1.0812491178512573,
"eval_runtime": 2.2409,
"eval_samples_per_second": 221.343,
"eval_steps_per_second": 7.14,
"step": 147500
},
{
"epoch": 20.0,
"step": 147800,
"total_flos": 1.2450139383539958e+18,
"train_loss": 1.2263236557646922,
"train_runtime": 47907.234,
"train_samples_per_second": 98.713,
"train_steps_per_second": 3.085
}
],
"max_steps": 147800,
"num_train_epochs": 20,
"total_flos": 1.2450139383539958e+18,
"trial_name": null,
"trial_params": null
}