{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.079355858472596, "global_step": 900000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 2.2222222222222223e-05, "loss": 10.6946, "step": 2000 }, { "epoch": 0.03, "learning_rate": 4.4444444444444447e-05, "loss": 10.5675, "step": 4000 }, { "epoch": 0.04, "learning_rate": 6.666666666666667e-05, "loss": 10.4211, "step": 6000 }, { "epoch": 0.05, "learning_rate": 8.888888888888889e-05, "loss": 10.2041, "step": 8000 }, { "epoch": 0.07, "learning_rate": 9.988776655443322e-05, "loss": 9.8916, "step": 10000 }, { "epoch": 0.08, "learning_rate": 9.966329966329967e-05, "loss": 9.4948, "step": 12000 }, { "epoch": 0.09, "learning_rate": 9.943883277216612e-05, "loss": 9.0658, "step": 14000 }, { "epoch": 0.11, "learning_rate": 9.921436588103256e-05, "loss": 8.6284, "step": 16000 }, { "epoch": 0.12, "learning_rate": 9.8989898989899e-05, "loss": 8.1946, "step": 18000 }, { "epoch": 0.14, "learning_rate": 9.876543209876543e-05, "loss": 7.801, "step": 20000 }, { "epoch": 0.15, "learning_rate": 9.854096520763188e-05, "loss": 7.4541, "step": 22000 }, { "epoch": 0.16, "learning_rate": 9.831649831649832e-05, "loss": 7.283, "step": 24000 }, { "epoch": 0.18, "learning_rate": 9.809203142536476e-05, "loss": 7.2067, "step": 26000 }, { "epoch": 0.19, "learning_rate": 9.78675645342312e-05, "loss": 7.1146, "step": 28000 }, { "epoch": 0.2, "learning_rate": 9.764309764309764e-05, "loss": 7.0541, "step": 30000 }, { "epoch": 0.22, "learning_rate": 9.74186307519641e-05, "loss": 6.9996, "step": 32000 }, { "epoch": 0.23, "learning_rate": 9.719416386083054e-05, "loss": 6.967, "step": 34000 }, { "epoch": 0.24, "learning_rate": 9.696969696969698e-05, "loss": 6.9272, "step": 36000 }, { "epoch": 0.26, "learning_rate": 9.674523007856342e-05, "loss": 6.9136, "step": 38000 }, { "epoch": 0.27, "learning_rate": 9.652076318742987e-05, "loss": 6.8708, "step": 40000 }, { "epoch": 0.28, "learning_rate": 9.62962962962963e-05, "loss": 6.8544, "step": 42000 }, { "epoch": 0.3, "learning_rate": 9.607182940516274e-05, "loss": 6.8332, "step": 44000 }, { "epoch": 0.31, "learning_rate": 9.584736251402918e-05, "loss": 6.8299, "step": 46000 }, { "epoch": 0.32, "learning_rate": 9.562289562289563e-05, "loss": 6.806, "step": 48000 }, { "epoch": 0.34, "learning_rate": 9.539842873176207e-05, "loss": 6.7812, "step": 50000 }, { "epoch": 0.35, "learning_rate": 9.517396184062851e-05, "loss": 6.7821, "step": 52000 }, { "epoch": 0.36, "learning_rate": 9.494949494949495e-05, "loss": 6.756, "step": 54000 }, { "epoch": 0.38, "learning_rate": 9.472502805836139e-05, "loss": 6.7547, "step": 56000 }, { "epoch": 0.39, "learning_rate": 9.450056116722785e-05, "loss": 6.7199, "step": 58000 }, { "epoch": 0.41, "learning_rate": 9.427609427609429e-05, "loss": 6.6967, "step": 60000 }, { "epoch": 0.42, "learning_rate": 9.405162738496073e-05, "loss": 6.6726, "step": 62000 }, { "epoch": 0.43, "learning_rate": 9.382716049382717e-05, "loss": 6.6519, "step": 64000 }, { "epoch": 0.45, "learning_rate": 9.36026936026936e-05, "loss": 6.6244, "step": 66000 }, { "epoch": 0.46, "learning_rate": 9.337822671156005e-05, "loss": 6.5836, "step": 68000 }, { "epoch": 0.47, "learning_rate": 9.315375982042649e-05, "loss": 6.5503, "step": 70000 }, { "epoch": 0.49, "learning_rate": 9.292929292929293e-05, "loss": 6.5117, "step": 72000 }, { "epoch": 0.5, "learning_rate": 9.270482603815938e-05, "loss": 6.4852, "step": 74000 }, { "epoch": 0.51, "learning_rate": 9.248035914702582e-05, "loss": 6.4517, "step": 76000 }, { "epoch": 0.53, "learning_rate": 9.225589225589226e-05, "loss": 6.405, "step": 78000 }, { "epoch": 0.54, "learning_rate": 9.20314253647587e-05, "loss": 6.3787, "step": 80000 }, { "epoch": 0.55, "learning_rate": 9.180695847362514e-05, "loss": 6.3171, "step": 82000 }, { "epoch": 0.57, "learning_rate": 9.15824915824916e-05, "loss": 6.2806, "step": 84000 }, { "epoch": 0.58, "learning_rate": 9.135802469135802e-05, "loss": 6.3758, "step": 86000 }, { "epoch": 0.59, "learning_rate": 9.113355780022446e-05, "loss": 6.3753, "step": 88000 }, { "epoch": 0.61, "learning_rate": 9.090909090909092e-05, "loss": 6.2428, "step": 90000 }, { "epoch": 0.62, "learning_rate": 9.068462401795736e-05, "loss": 5.9605, "step": 92000 }, { "epoch": 0.63, "learning_rate": 9.04601571268238e-05, "loss": 5.774, "step": 94000 }, { "epoch": 0.65, "learning_rate": 9.023569023569024e-05, "loss": 5.8005, "step": 96000 }, { "epoch": 0.66, "learning_rate": 9.001122334455668e-05, "loss": 5.8735, "step": 98000 }, { "epoch": 0.68, "learning_rate": 8.978675645342313e-05, "loss": 5.8452, "step": 100000 }, { "epoch": 0.69, "learning_rate": 8.956228956228957e-05, "loss": 5.7882, "step": 102000 }, { "epoch": 0.7, "learning_rate": 8.933782267115601e-05, "loss": 6.0639, "step": 104000 }, { "epoch": 0.72, "learning_rate": 8.911335578002245e-05, "loss": 6.0042, "step": 106000 }, { "epoch": 0.73, "learning_rate": 8.888888888888889e-05, "loss": 5.9511, "step": 108000 }, { "epoch": 0.74, "learning_rate": 8.866442199775533e-05, "loss": 5.8909, "step": 110000 }, { "epoch": 0.76, "learning_rate": 8.843995510662177e-05, "loss": 5.8925, "step": 112000 }, { "epoch": 0.77, "learning_rate": 8.821548821548821e-05, "loss": 5.9042, "step": 114000 }, { "epoch": 0.78, "learning_rate": 8.799102132435467e-05, "loss": 5.9797, "step": 116000 }, { "epoch": 0.8, "learning_rate": 8.776655443322111e-05, "loss": 6.1174, "step": 118000 }, { "epoch": 0.81, "learning_rate": 8.754208754208755e-05, "loss": 6.2284, "step": 120000 }, { "epoch": 0.82, "learning_rate": 8.731762065095399e-05, "loss": 6.1204, "step": 122000 }, { "epoch": 0.84, "learning_rate": 8.709315375982043e-05, "loss": 5.6589, "step": 124000 }, { "epoch": 0.85, "learning_rate": 8.686868686868688e-05, "loss": 5.7295, "step": 126000 }, { "epoch": 0.86, "learning_rate": 8.664421997755332e-05, "loss": 5.7535, "step": 128000 }, { "epoch": 0.88, "learning_rate": 8.641975308641975e-05, "loss": 5.6045, "step": 130000 }, { "epoch": 0.89, "learning_rate": 8.619528619528619e-05, "loss": 5.6091, "step": 132000 }, { "epoch": 0.91, "learning_rate": 8.597081930415264e-05, "loss": 5.5933, "step": 134000 }, { "epoch": 0.92, "learning_rate": 8.574635241301908e-05, "loss": 5.6344, "step": 136000 }, { "epoch": 0.93, "learning_rate": 8.552188552188552e-05, "loss": 5.6063, "step": 138000 }, { "epoch": 0.95, "learning_rate": 8.529741863075196e-05, "loss": 5.4933, "step": 140000 }, { "epoch": 0.96, "learning_rate": 8.507295173961842e-05, "loss": 5.5386, "step": 142000 }, { "epoch": 0.97, "learning_rate": 8.484848484848486e-05, "loss": 5.5085, "step": 144000 }, { "epoch": 0.99, "learning_rate": 8.46240179573513e-05, "loss": 5.4098, "step": 146000 }, { "epoch": 1.0, "learning_rate": 8.439955106621774e-05, "loss": 5.0913, "step": 148000 }, { "epoch": 1.01, "learning_rate": 8.417508417508418e-05, "loss": 5.6115, "step": 150000 }, { "epoch": 1.03, "learning_rate": 8.395061728395062e-05, "loss": 5.5311, "step": 152000 }, { "epoch": 1.04, "learning_rate": 8.372615039281706e-05, "loss": 5.4583, "step": 154000 }, { "epoch": 1.05, "learning_rate": 8.35016835016835e-05, "loss": 5.4188, "step": 156000 }, { "epoch": 1.07, "learning_rate": 8.327721661054994e-05, "loss": 5.3814, "step": 158000 }, { "epoch": 1.08, "learning_rate": 8.30527497194164e-05, "loss": 5.3292, "step": 160000 }, { "epoch": 1.09, "learning_rate": 8.282828282828283e-05, "loss": 5.291, "step": 162000 }, { "epoch": 1.11, "learning_rate": 8.260381593714927e-05, "loss": 5.2601, "step": 164000 }, { "epoch": 1.12, "learning_rate": 8.237934904601572e-05, "loss": 5.2253, "step": 166000 }, { "epoch": 1.13, "learning_rate": 8.215488215488217e-05, "loss": 5.2076, "step": 168000 }, { "epoch": 1.15, "learning_rate": 8.193041526374861e-05, "loss": 5.1529, "step": 170000 }, { "epoch": 1.16, "learning_rate": 8.170594837261504e-05, "loss": 5.1493, "step": 172000 }, { "epoch": 1.18, "learning_rate": 8.148148148148148e-05, "loss": 5.1449, "step": 174000 }, { "epoch": 1.19, "learning_rate": 8.125701459034793e-05, "loss": 5.0768, "step": 176000 }, { "epoch": 1.2, "learning_rate": 8.103254769921437e-05, "loss": 5.0558, "step": 178000 }, { "epoch": 1.22, "learning_rate": 8.080808080808081e-05, "loss": 5.0232, "step": 180000 }, { "epoch": 1.23, "learning_rate": 8.058361391694725e-05, "loss": 5.0037, "step": 182000 }, { "epoch": 1.24, "learning_rate": 8.035914702581369e-05, "loss": 4.9835, "step": 184000 }, { "epoch": 1.26, "learning_rate": 8.013468013468015e-05, "loss": 4.9832, "step": 186000 }, { "epoch": 1.27, "learning_rate": 7.991021324354659e-05, "loss": 4.9235, "step": 188000 }, { "epoch": 1.28, "learning_rate": 7.968574635241303e-05, "loss": 4.9121, "step": 190000 }, { "epoch": 1.3, "learning_rate": 7.946127946127947e-05, "loss": 4.8973, "step": 192000 }, { "epoch": 1.31, "learning_rate": 7.92368125701459e-05, "loss": 4.8992, "step": 194000 }, { "epoch": 1.32, "learning_rate": 7.901234567901235e-05, "loss": 4.8595, "step": 196000 }, { "epoch": 1.34, "learning_rate": 7.878787878787879e-05, "loss": 4.84, "step": 198000 }, { "epoch": 1.35, "learning_rate": 7.856341189674523e-05, "loss": 4.8424, "step": 200000 }, { "epoch": 1.36, "learning_rate": 7.833894500561168e-05, "loss": 4.8017, "step": 202000 }, { "epoch": 1.38, "learning_rate": 7.811447811447812e-05, "loss": 4.8083, "step": 204000 }, { "epoch": 1.39, "learning_rate": 7.789001122334456e-05, "loss": 4.7732, "step": 206000 }, { "epoch": 1.41, "learning_rate": 7.7665544332211e-05, "loss": 4.7528, "step": 208000 }, { "epoch": 1.42, "learning_rate": 7.744107744107744e-05, "loss": 4.7499, "step": 210000 }, { "epoch": 1.43, "learning_rate": 7.72166105499439e-05, "loss": 4.7275, "step": 212000 }, { "epoch": 1.45, "learning_rate": 7.699214365881034e-05, "loss": 4.7118, "step": 214000 }, { "epoch": 1.46, "learning_rate": 7.676767676767676e-05, "loss": 4.6839, "step": 216000 }, { "epoch": 1.47, "learning_rate": 7.65432098765432e-05, "loss": 4.6654, "step": 218000 }, { "epoch": 1.49, "learning_rate": 7.631874298540966e-05, "loss": 4.6509, "step": 220000 }, { "epoch": 1.5, "learning_rate": 7.60942760942761e-05, "loss": 4.6436, "step": 222000 }, { "epoch": 1.51, "learning_rate": 7.586980920314254e-05, "loss": 4.6321, "step": 224000 }, { "epoch": 1.53, "learning_rate": 7.564534231200898e-05, "loss": 4.6257, "step": 226000 }, { "epoch": 1.54, "learning_rate": 7.542087542087543e-05, "loss": 4.6259, "step": 228000 }, { "epoch": 1.55, "learning_rate": 7.519640852974187e-05, "loss": 4.5876, "step": 230000 }, { "epoch": 1.57, "learning_rate": 7.497194163860831e-05, "loss": 4.5918, "step": 232000 }, { "epoch": 1.58, "learning_rate": 7.474747474747475e-05, "loss": 4.7349, "step": 234000 }, { "epoch": 1.59, "learning_rate": 7.452300785634119e-05, "loss": 4.7906, "step": 236000 }, { "epoch": 1.61, "learning_rate": 7.429854096520763e-05, "loss": 4.5217, "step": 238000 }, { "epoch": 1.62, "learning_rate": 7.407407407407407e-05, "loss": 4.2357, "step": 240000 }, { "epoch": 1.63, "learning_rate": 7.384960718294051e-05, "loss": 4.1207, "step": 242000 }, { "epoch": 1.65, "learning_rate": 7.362514029180695e-05, "loss": 4.1877, "step": 244000 }, { "epoch": 1.66, "learning_rate": 7.340067340067341e-05, "loss": 4.4264, "step": 246000 }, { "epoch": 1.68, "learning_rate": 7.317620650953985e-05, "loss": 4.3763, "step": 248000 }, { "epoch": 1.69, "learning_rate": 7.295173961840629e-05, "loss": 4.3764, "step": 250000 }, { "epoch": 1.7, "learning_rate": 7.272727272727273e-05, "loss": 4.6481, "step": 252000 }, { "epoch": 1.72, "learning_rate": 7.250280583613918e-05, "loss": 4.6296, "step": 254000 }, { "epoch": 1.73, "learning_rate": 7.227833894500562e-05, "loss": 4.6029, "step": 256000 }, { "epoch": 1.74, "learning_rate": 7.205387205387206e-05, "loss": 4.5606, "step": 258000 }, { "epoch": 1.76, "learning_rate": 7.182940516273849e-05, "loss": 4.6015, "step": 260000 }, { "epoch": 1.77, "learning_rate": 7.160493827160494e-05, "loss": 4.6519, "step": 262000 }, { "epoch": 1.78, "learning_rate": 7.138047138047138e-05, "loss": 4.7609, "step": 264000 }, { "epoch": 1.8, "learning_rate": 7.115600448933782e-05, "loss": 4.9615, "step": 266000 }, { "epoch": 1.81, "learning_rate": 7.093153759820426e-05, "loss": 5.1392, "step": 268000 }, { "epoch": 1.82, "learning_rate": 7.07070707070707e-05, "loss": 5.0403, "step": 270000 }, { "epoch": 1.84, "learning_rate": 7.048260381593716e-05, "loss": 4.3987, "step": 272000 }, { "epoch": 1.85, "learning_rate": 7.02581369248036e-05, "loss": 4.6271, "step": 274000 }, { "epoch": 1.86, "learning_rate": 7.003367003367004e-05, "loss": 4.6795, "step": 276000 }, { "epoch": 1.88, "learning_rate": 6.980920314253648e-05, "loss": 4.5634, "step": 278000 }, { "epoch": 1.89, "learning_rate": 6.958473625140293e-05, "loss": 4.5767, "step": 280000 }, { "epoch": 1.9, "learning_rate": 6.936026936026936e-05, "loss": 4.5943, "step": 282000 }, { "epoch": 1.92, "learning_rate": 6.91358024691358e-05, "loss": 4.6575, "step": 284000 }, { "epoch": 1.93, "learning_rate": 6.891133557800224e-05, "loss": 4.6512, "step": 286000 }, { "epoch": 1.95, "learning_rate": 6.86868686868687e-05, "loss": 4.5433, "step": 288000 }, { "epoch": 1.96, "learning_rate": 6.846240179573513e-05, "loss": 4.6108, "step": 290000 }, { "epoch": 1.97, "learning_rate": 6.823793490460157e-05, "loss": 4.6175, "step": 292000 }, { "epoch": 1.99, "learning_rate": 6.801346801346801e-05, "loss": 4.5076, "step": 294000 }, { "epoch": 2.0, "learning_rate": 6.778900112233445e-05, "loss": 4.131, "step": 296000 }, { "epoch": 2.01, "learning_rate": 6.756453423120091e-05, "loss": 4.4705, "step": 298000 }, { "epoch": 2.03, "learning_rate": 6.734006734006735e-05, "loss": 4.4578, "step": 300000 }, { "epoch": 2.04, "learning_rate": 6.711560044893379e-05, "loss": 4.4142, "step": 302000 }, { "epoch": 2.05, "learning_rate": 6.689113355780023e-05, "loss": 4.3968, "step": 304000 }, { "epoch": 2.07, "learning_rate": 6.666666666666667e-05, "loss": 4.3893, "step": 306000 }, { "epoch": 2.08, "learning_rate": 6.644219977553311e-05, "loss": 4.3628, "step": 308000 }, { "epoch": 2.09, "learning_rate": 6.621773288439955e-05, "loss": 4.3452, "step": 310000 }, { "epoch": 2.11, "learning_rate": 6.599326599326599e-05, "loss": 4.3391, "step": 312000 }, { "epoch": 2.12, "learning_rate": 6.576879910213244e-05, "loss": 4.3232, "step": 314000 }, { "epoch": 2.13, "learning_rate": 6.554433221099888e-05, "loss": 4.3286, "step": 316000 }, { "epoch": 2.15, "learning_rate": 6.531986531986533e-05, "loss": 4.2963, "step": 318000 }, { "epoch": 2.16, "learning_rate": 6.509539842873177e-05, "loss": 4.3098, "step": 320000 }, { "epoch": 2.18, "learning_rate": 6.48709315375982e-05, "loss": 4.3239, "step": 322000 }, { "epoch": 2.19, "learning_rate": 6.464646464646466e-05, "loss": 4.2606, "step": 324000 }, { "epoch": 2.2, "learning_rate": 6.442199775533109e-05, "loss": 4.2617, "step": 326000 }, { "epoch": 2.22, "learning_rate": 6.419753086419753e-05, "loss": 4.2429, "step": 328000 }, { "epoch": 2.23, "learning_rate": 6.397306397306398e-05, "loss": 4.2412, "step": 330000 }, { "epoch": 2.24, "learning_rate": 6.374859708193042e-05, "loss": 4.2415, "step": 332000 }, { "epoch": 2.26, "learning_rate": 6.352413019079686e-05, "loss": 4.2517, "step": 334000 }, { "epoch": 2.27, "learning_rate": 6.32996632996633e-05, "loss": 4.2051, "step": 336000 }, { "epoch": 2.28, "learning_rate": 6.307519640852974e-05, "loss": 4.2003, "step": 338000 }, { "epoch": 2.3, "learning_rate": 6.28507295173962e-05, "loss": 4.2039, "step": 340000 }, { "epoch": 2.31, "learning_rate": 6.262626262626264e-05, "loss": 4.2197, "step": 342000 }, { "epoch": 2.32, "learning_rate": 6.240179573512908e-05, "loss": 4.1898, "step": 344000 }, { "epoch": 2.34, "learning_rate": 6.21773288439955e-05, "loss": 4.1859, "step": 346000 }, { "epoch": 2.35, "learning_rate": 6.195286195286196e-05, "loss": 4.2003, "step": 348000 }, { "epoch": 2.36, "learning_rate": 6.17283950617284e-05, "loss": 4.1667, "step": 350000 }, { "epoch": 2.38, "learning_rate": 6.150392817059484e-05, "loss": 4.1896, "step": 352000 }, { "epoch": 2.39, "learning_rate": 6.127946127946128e-05, "loss": 4.1576, "step": 354000 }, { "epoch": 2.4, "learning_rate": 6.105499438832773e-05, "loss": 4.1515, "step": 356000 }, { "epoch": 2.42, "learning_rate": 6.0830527497194164e-05, "loss": 4.1666, "step": 358000 }, { "epoch": 2.43, "learning_rate": 6.060606060606061e-05, "loss": 4.1459, "step": 360000 }, { "epoch": 2.45, "learning_rate": 6.038159371492705e-05, "loss": 4.1438, "step": 362000 }, { "epoch": 2.46, "learning_rate": 6.01571268237935e-05, "loss": 4.1272, "step": 364000 }, { "epoch": 2.47, "learning_rate": 5.993265993265994e-05, "loss": 4.1105, "step": 366000 }, { "epoch": 2.49, "learning_rate": 5.970819304152637e-05, "loss": 4.1111, "step": 368000 }, { "epoch": 2.5, "learning_rate": 5.948372615039281e-05, "loss": 4.1062, "step": 370000 }, { "epoch": 2.51, "learning_rate": 5.925925925925926e-05, "loss": 4.1072, "step": 372000 }, { "epoch": 2.53, "learning_rate": 5.90347923681257e-05, "loss": 4.1112, "step": 374000 }, { "epoch": 2.54, "learning_rate": 5.881032547699215e-05, "loss": 4.1251, "step": 376000 }, { "epoch": 2.55, "learning_rate": 5.858585858585859e-05, "loss": 4.0813, "step": 378000 }, { "epoch": 2.57, "learning_rate": 5.8361391694725034e-05, "loss": 4.0908, "step": 380000 }, { "epoch": 2.58, "learning_rate": 5.8136924803591475e-05, "loss": 4.2286, "step": 382000 }, { "epoch": 2.59, "learning_rate": 5.7912457912457915e-05, "loss": 4.3118, "step": 384000 }, { "epoch": 2.61, "learning_rate": 5.768799102132436e-05, "loss": 4.0238, "step": 386000 }, { "epoch": 2.62, "learning_rate": 5.74635241301908e-05, "loss": 3.7601, "step": 388000 }, { "epoch": 2.63, "learning_rate": 5.7239057239057236e-05, "loss": 3.6653, "step": 390000 }, { "epoch": 2.65, "learning_rate": 5.701459034792368e-05, "loss": 3.7115, "step": 392000 }, { "epoch": 2.66, "learning_rate": 5.679012345679012e-05, "loss": 4.0332, "step": 394000 }, { "epoch": 2.67, "learning_rate": 5.6565656565656563e-05, "loss": 3.9273, "step": 396000 }, { "epoch": 2.69, "learning_rate": 5.634118967452301e-05, "loss": 3.9359, "step": 398000 }, { "epoch": 2.7, "learning_rate": 5.611672278338945e-05, "loss": 4.2338, "step": 400000 }, { "epoch": 2.72, "learning_rate": 5.58922558922559e-05, "loss": 4.2178, "step": 402000 }, { "epoch": 2.73, "learning_rate": 5.566778900112234e-05, "loss": 4.2029, "step": 404000 }, { "epoch": 2.74, "learning_rate": 5.5443322109988785e-05, "loss": 4.1545, "step": 406000 }, { "epoch": 2.76, "learning_rate": 5.5218855218855225e-05, "loss": 4.2009, "step": 408000 }, { "epoch": 2.77, "learning_rate": 5.4994388327721666e-05, "loss": 4.2519, "step": 410000 }, { "epoch": 2.78, "learning_rate": 5.47699214365881e-05, "loss": 4.3561, "step": 412000 }, { "epoch": 2.8, "learning_rate": 5.4545454545454546e-05, "loss": 4.5544, "step": 414000 }, { "epoch": 2.81, "learning_rate": 5.4320987654320986e-05, "loss": 4.7515, "step": 416000 }, { "epoch": 2.82, "learning_rate": 5.4096520763187434e-05, "loss": 4.6801, "step": 418000 }, { "epoch": 2.84, "learning_rate": 5.3872053872053874e-05, "loss": 3.9784, "step": 420000 }, { "epoch": 2.85, "learning_rate": 5.3647586980920314e-05, "loss": 4.2571, "step": 422000 }, { "epoch": 2.86, "learning_rate": 5.342312008978676e-05, "loss": 4.3184, "step": 424000 }, { "epoch": 2.88, "learning_rate": 5.31986531986532e-05, "loss": 4.2075, "step": 426000 }, { "epoch": 2.89, "learning_rate": 5.297418630751965e-05, "loss": 4.2215, "step": 428000 }, { "epoch": 2.9, "learning_rate": 5.274971941638609e-05, "loss": 4.2475, "step": 430000 }, { "epoch": 2.92, "learning_rate": 5.2525252525252536e-05, "loss": 4.3064, "step": 432000 }, { "epoch": 2.93, "learning_rate": 5.230078563411896e-05, "loss": 4.3034, "step": 434000 }, { "epoch": 2.95, "learning_rate": 5.207631874298541e-05, "loss": 4.2119, "step": 436000 }, { "epoch": 2.96, "learning_rate": 5.185185185185185e-05, "loss": 4.2714, "step": 438000 }, { "epoch": 2.97, "learning_rate": 5.16273849607183e-05, "loss": 4.2878, "step": 440000 }, { "epoch": 2.99, "learning_rate": 5.140291806958474e-05, "loss": 4.1744, "step": 442000 }, { "epoch": 3.0, "learning_rate": 5.1178451178451184e-05, "loss": 3.8192, "step": 444000 }, { "epoch": 3.01, "learning_rate": 5.0953984287317624e-05, "loss": 4.0513, "step": 446000 }, { "epoch": 3.03, "learning_rate": 5.0729517396184065e-05, "loss": 4.0666, "step": 448000 }, { "epoch": 3.04, "learning_rate": 5.050505050505051e-05, "loss": 4.0297, "step": 450000 }, { "epoch": 3.05, "learning_rate": 5.028058361391695e-05, "loss": 4.019, "step": 452000 }, { "epoch": 3.07, "learning_rate": 5.00561167227834e-05, "loss": 4.0183, "step": 454000 }, { "epoch": 3.08, "learning_rate": 4.983164983164983e-05, "loss": 3.9988, "step": 456000 }, { "epoch": 3.09, "learning_rate": 4.960718294051628e-05, "loss": 3.9882, "step": 458000 }, { "epoch": 3.11, "learning_rate": 4.938271604938271e-05, "loss": 3.9879, "step": 460000 }, { "epoch": 3.12, "learning_rate": 4.915824915824916e-05, "loss": 3.9752, "step": 462000 }, { "epoch": 3.13, "learning_rate": 4.89337822671156e-05, "loss": 3.9901, "step": 464000 }, { "epoch": 3.15, "learning_rate": 4.870931537598205e-05, "loss": 3.9586, "step": 466000 }, { "epoch": 3.16, "learning_rate": 4.848484848484849e-05, "loss": 3.9711, "step": 468000 }, { "epoch": 3.17, "learning_rate": 4.8260381593714935e-05, "loss": 4.0005, "step": 470000 }, { "epoch": 3.19, "learning_rate": 4.803591470258137e-05, "loss": 3.9334, "step": 472000 }, { "epoch": 3.2, "learning_rate": 4.7811447811447815e-05, "loss": 3.9461, "step": 474000 }, { "epoch": 3.22, "learning_rate": 4.7586980920314256e-05, "loss": 3.9312, "step": 476000 }, { "epoch": 3.23, "learning_rate": 4.7362514029180696e-05, "loss": 3.928, "step": 478000 }, { "epoch": 3.24, "learning_rate": 4.713804713804714e-05, "loss": 3.9341, "step": 480000 }, { "epoch": 3.26, "learning_rate": 4.691358024691358e-05, "loss": 3.949, "step": 482000 }, { "epoch": 3.27, "learning_rate": 4.6689113355780024e-05, "loss": 3.9045, "step": 484000 }, { "epoch": 3.28, "learning_rate": 4.6464646464646464e-05, "loss": 3.9063, "step": 486000 }, { "epoch": 3.3, "learning_rate": 4.624017957351291e-05, "loss": 3.9079, "step": 488000 }, { "epoch": 3.31, "learning_rate": 4.601571268237935e-05, "loss": 3.9365, "step": 490000 }, { "epoch": 3.32, "learning_rate": 4.57912457912458e-05, "loss": 3.908, "step": 492000 }, { "epoch": 3.34, "learning_rate": 4.556677890011223e-05, "loss": 3.9043, "step": 494000 }, { "epoch": 3.35, "learning_rate": 4.534231200897868e-05, "loss": 3.9217, "step": 496000 }, { "epoch": 3.36, "learning_rate": 4.511784511784512e-05, "loss": 3.8926, "step": 498000 }, { "epoch": 3.38, "learning_rate": 4.4893378226711566e-05, "loss": 3.9196, "step": 500000 }, { "epoch": 3.39, "learning_rate": 4.4668911335578006e-05, "loss": 3.8883, "step": 502000 }, { "epoch": 3.4, "learning_rate": 4.4444444444444447e-05, "loss": 3.8888, "step": 504000 }, { "epoch": 3.42, "learning_rate": 4.421997755331089e-05, "loss": 3.9072, "step": 506000 }, { "epoch": 3.43, "learning_rate": 4.3995510662177334e-05, "loss": 3.8872, "step": 508000 }, { "epoch": 3.44, "learning_rate": 4.3771043771043774e-05, "loss": 3.8941, "step": 510000 }, { "epoch": 3.46, "learning_rate": 4.3546576879910214e-05, "loss": 3.8768, "step": 512000 }, { "epoch": 3.47, "learning_rate": 4.332210998877666e-05, "loss": 3.8667, "step": 514000 }, { "epoch": 3.49, "learning_rate": 4.3097643097643095e-05, "loss": 3.867, "step": 516000 }, { "epoch": 3.5, "learning_rate": 4.287317620650954e-05, "loss": 3.8665, "step": 518000 }, { "epoch": 3.51, "learning_rate": 4.264870931537598e-05, "loss": 3.8677, "step": 520000 }, { "epoch": 3.53, "learning_rate": 4.242424242424243e-05, "loss": 3.8752, "step": 522000 }, { "epoch": 3.54, "learning_rate": 4.219977553310887e-05, "loss": 3.888, "step": 524000 }, { "epoch": 3.55, "learning_rate": 4.197530864197531e-05, "loss": 3.849, "step": 526000 }, { "epoch": 3.57, "learning_rate": 4.175084175084175e-05, "loss": 3.8591, "step": 528000 }, { "epoch": 3.58, "learning_rate": 4.15263748597082e-05, "loss": 3.9935, "step": 530000 }, { "epoch": 3.59, "learning_rate": 4.130190796857464e-05, "loss": 4.1179, "step": 532000 }, { "epoch": 3.61, "learning_rate": 4.1077441077441085e-05, "loss": 3.8085, "step": 534000 }, { "epoch": 3.62, "learning_rate": 4.085297418630752e-05, "loss": 3.5572, "step": 536000 }, { "epoch": 3.63, "learning_rate": 4.0628507295173965e-05, "loss": 3.4772, "step": 538000 }, { "epoch": 3.65, "learning_rate": 4.0404040404040405e-05, "loss": 3.4955, "step": 540000 }, { "epoch": 3.66, "learning_rate": 4.0179573512906846e-05, "loss": 3.8805, "step": 542000 }, { "epoch": 3.67, "learning_rate": 3.995510662177329e-05, "loss": 3.7327, "step": 544000 }, { "epoch": 3.69, "learning_rate": 3.973063973063973e-05, "loss": 3.7417, "step": 546000 }, { "epoch": 3.7, "learning_rate": 3.950617283950617e-05, "loss": 4.0379, "step": 548000 }, { "epoch": 3.72, "learning_rate": 3.9281705948372613e-05, "loss": 4.0257, "step": 550000 }, { "epoch": 3.73, "learning_rate": 3.905723905723906e-05, "loss": 4.0116, "step": 552000 }, { "epoch": 3.74, "learning_rate": 3.88327721661055e-05, "loss": 3.9664, "step": 554000 }, { "epoch": 3.76, "learning_rate": 3.860830527497195e-05, "loss": 4.0149, "step": 556000 }, { "epoch": 3.77, "learning_rate": 3.838383838383838e-05, "loss": 4.0601, "step": 558000 }, { "epoch": 3.78, "learning_rate": 3.815937149270483e-05, "loss": 4.1639, "step": 560000 }, { "epoch": 3.8, "learning_rate": 3.793490460157127e-05, "loss": 4.3604, "step": 562000 }, { "epoch": 3.81, "learning_rate": 3.7710437710437716e-05, "loss": 4.5598, "step": 564000 }, { "epoch": 3.82, "learning_rate": 3.7485970819304156e-05, "loss": 4.5141, "step": 566000 }, { "epoch": 3.84, "learning_rate": 3.7261503928170596e-05, "loss": 3.8061, "step": 568000 }, { "epoch": 3.85, "learning_rate": 3.7037037037037037e-05, "loss": 4.0652, "step": 570000 }, { "epoch": 3.86, "learning_rate": 3.681257014590348e-05, "loss": 4.1378, "step": 572000 }, { "epoch": 3.88, "learning_rate": 3.6588103254769924e-05, "loss": 4.0305, "step": 574000 }, { "epoch": 3.89, "learning_rate": 3.6363636363636364e-05, "loss": 4.0567, "step": 576000 }, { "epoch": 3.9, "learning_rate": 3.613916947250281e-05, "loss": 4.0735, "step": 578000 }, { "epoch": 3.92, "learning_rate": 3.5914702581369245e-05, "loss": 4.1457, "step": 580000 }, { "epoch": 3.93, "learning_rate": 3.569023569023569e-05, "loss": 4.1228, "step": 582000 }, { "epoch": 3.94, "learning_rate": 3.546576879910213e-05, "loss": 4.061, "step": 584000 }, { "epoch": 3.96, "learning_rate": 3.524130190796858e-05, "loss": 4.1077, "step": 586000 }, { "epoch": 3.97, "learning_rate": 3.501683501683502e-05, "loss": 4.1329, "step": 588000 }, { "epoch": 3.99, "learning_rate": 3.4792368125701466e-05, "loss": 4.0207, "step": 590000 }, { "epoch": 4.0, "learning_rate": 3.45679012345679e-05, "loss": 3.6761, "step": 592000 }, { "epoch": 4.01, "learning_rate": 3.434343434343435e-05, "loss": 3.8304, "step": 594000 }, { "epoch": 4.03, "learning_rate": 3.411896745230079e-05, "loss": 3.8686, "step": 596000 }, { "epoch": 4.04, "learning_rate": 3.389450056116723e-05, "loss": 3.8363, "step": 598000 }, { "epoch": 4.05, "learning_rate": 3.3670033670033675e-05, "loss": 3.8302, "step": 600000 }, { "epoch": 4.07, "learning_rate": 3.3445566778900115e-05, "loss": 3.8316, "step": 602000 }, { "epoch": 4.08, "learning_rate": 3.3221099887766555e-05, "loss": 3.8158, "step": 604000 }, { "epoch": 4.09, "learning_rate": 3.2996632996632995e-05, "loss": 3.8058, "step": 606000 }, { "epoch": 4.11, "learning_rate": 3.277216610549944e-05, "loss": 3.8102, "step": 608000 }, { "epoch": 4.12, "learning_rate": 3.254769921436588e-05, "loss": 3.8001, "step": 610000 }, { "epoch": 4.13, "learning_rate": 3.232323232323233e-05, "loss": 3.8153, "step": 612000 }, { "epoch": 4.15, "learning_rate": 3.209876543209876e-05, "loss": 3.7888, "step": 614000 }, { "epoch": 4.16, "learning_rate": 3.187429854096521e-05, "loss": 3.8019, "step": 616000 }, { "epoch": 4.17, "learning_rate": 3.164983164983165e-05, "loss": 3.8379, "step": 618000 }, { "epoch": 4.19, "learning_rate": 3.14253647586981e-05, "loss": 3.7686, "step": 620000 }, { "epoch": 4.2, "learning_rate": 3.120089786756454e-05, "loss": 3.7825, "step": 622000 }, { "epoch": 4.22, "learning_rate": 3.097643097643098e-05, "loss": 3.7695, "step": 624000 }, { "epoch": 4.23, "learning_rate": 3.075196408529742e-05, "loss": 3.7713, "step": 626000 }, { "epoch": 4.24, "learning_rate": 3.0527497194163865e-05, "loss": 3.7789, "step": 628000 }, { "epoch": 4.26, "learning_rate": 3.0303030303030306e-05, "loss": 3.7927, "step": 630000 }, { "epoch": 4.27, "learning_rate": 3.007856341189675e-05, "loss": 3.7568, "step": 632000 }, { "epoch": 4.28, "learning_rate": 2.9854096520763186e-05, "loss": 3.7555, "step": 634000 }, { "epoch": 4.3, "learning_rate": 2.962962962962963e-05, "loss": 3.761, "step": 636000 }, { "epoch": 4.31, "learning_rate": 2.9405162738496074e-05, "loss": 3.7894, "step": 638000 }, { "epoch": 4.32, "learning_rate": 2.9180695847362517e-05, "loss": 3.7595, "step": 640000 }, { "epoch": 4.34, "learning_rate": 2.8956228956228958e-05, "loss": 3.763, "step": 642000 }, { "epoch": 4.35, "learning_rate": 2.87317620650954e-05, "loss": 3.7774, "step": 644000 }, { "epoch": 4.36, "learning_rate": 2.850729517396184e-05, "loss": 3.7551, "step": 646000 }, { "epoch": 4.38, "learning_rate": 2.8282828282828282e-05, "loss": 3.7829, "step": 648000 }, { "epoch": 4.39, "learning_rate": 2.8058361391694725e-05, "loss": 3.7527, "step": 650000 }, { "epoch": 4.4, "learning_rate": 2.783389450056117e-05, "loss": 3.753, "step": 652000 }, { "epoch": 4.42, "learning_rate": 2.7609427609427613e-05, "loss": 3.7746, "step": 654000 }, { "epoch": 4.43, "learning_rate": 2.738496071829405e-05, "loss": 3.7543, "step": 656000 }, { "epoch": 4.44, "learning_rate": 2.7160493827160493e-05, "loss": 3.7604, "step": 658000 }, { "epoch": 4.46, "learning_rate": 2.6936026936026937e-05, "loss": 3.7473, "step": 660000 }, { "epoch": 4.47, "learning_rate": 2.671156004489338e-05, "loss": 3.7377, "step": 662000 }, { "epoch": 4.49, "learning_rate": 2.6487093153759824e-05, "loss": 3.7398, "step": 664000 }, { "epoch": 4.5, "learning_rate": 2.6262626262626268e-05, "loss": 3.7432, "step": 666000 }, { "epoch": 4.51, "learning_rate": 2.6038159371492705e-05, "loss": 3.7484, "step": 668000 }, { "epoch": 4.53, "learning_rate": 2.581369248035915e-05, "loss": 3.7542, "step": 670000 }, { "epoch": 4.54, "learning_rate": 2.5589225589225592e-05, "loss": 3.7721, "step": 672000 }, { "epoch": 4.55, "learning_rate": 2.5364758698092032e-05, "loss": 3.7291, "step": 674000 }, { "epoch": 4.57, "learning_rate": 2.5140291806958476e-05, "loss": 3.7455, "step": 676000 }, { "epoch": 4.58, "learning_rate": 2.4915824915824916e-05, "loss": 3.8808, "step": 678000 }, { "epoch": 4.59, "learning_rate": 2.4691358024691357e-05, "loss": 4.0555, "step": 680000 }, { "epoch": 4.61, "learning_rate": 2.44668911335578e-05, "loss": 3.7132, "step": 682000 }, { "epoch": 4.62, "learning_rate": 2.4242424242424244e-05, "loss": 3.4602, "step": 684000 }, { "epoch": 4.63, "learning_rate": 2.4017957351290684e-05, "loss": 3.4057, "step": 686000 }, { "epoch": 4.65, "learning_rate": 2.3793490460157128e-05, "loss": 3.397, "step": 688000 }, { "epoch": 4.66, "learning_rate": 2.356902356902357e-05, "loss": 3.8375, "step": 690000 }, { "epoch": 4.67, "learning_rate": 2.3344556677890012e-05, "loss": 3.6615, "step": 692000 }, { "epoch": 4.69, "learning_rate": 2.3120089786756455e-05, "loss": 3.6624, "step": 694000 }, { "epoch": 4.7, "learning_rate": 2.28956228956229e-05, "loss": 3.9217, "step": 696000 }, { "epoch": 4.71, "learning_rate": 2.267115600448934e-05, "loss": 3.9308, "step": 698000 }, { "epoch": 4.73, "learning_rate": 2.2446689113355783e-05, "loss": 3.9061, "step": 700000 }, { "epoch": 4.74, "learning_rate": 2.2222222222222223e-05, "loss": 3.87, "step": 702000 }, { "epoch": 4.76, "learning_rate": 2.1997755331088667e-05, "loss": 3.919, "step": 704000 }, { "epoch": 4.77, "learning_rate": 2.1773288439955107e-05, "loss": 3.9621, "step": 706000 }, { "epoch": 4.78, "learning_rate": 2.1548821548821547e-05, "loss": 4.0667, "step": 708000 }, { "epoch": 4.8, "learning_rate": 2.132435465768799e-05, "loss": 4.2579, "step": 710000 }, { "epoch": 4.81, "learning_rate": 2.1099887766554435e-05, "loss": 4.4613, "step": 712000 }, { "epoch": 4.82, "learning_rate": 2.0875420875420875e-05, "loss": 4.4471, "step": 714000 }, { "epoch": 4.84, "learning_rate": 2.065095398428732e-05, "loss": 3.7612, "step": 716000 }, { "epoch": 4.85, "learning_rate": 2.042648709315376e-05, "loss": 3.9657, "step": 718000 }, { "epoch": 4.86, "learning_rate": 2.0202020202020203e-05, "loss": 4.0575, "step": 720000 }, { "epoch": 4.88, "learning_rate": 1.9977553310886646e-05, "loss": 3.9554, "step": 722000 }, { "epoch": 4.89, "learning_rate": 1.9753086419753087e-05, "loss": 3.9786, "step": 724000 }, { "epoch": 4.9, "learning_rate": 1.952861952861953e-05, "loss": 3.9809, "step": 726000 }, { "epoch": 4.92, "learning_rate": 1.9304152637485974e-05, "loss": 4.0793, "step": 728000 }, { "epoch": 4.93, "learning_rate": 1.9079685746352414e-05, "loss": 4.0373, "step": 730000 }, { "epoch": 4.94, "learning_rate": 1.8855218855218858e-05, "loss": 4.0029, "step": 732000 }, { "epoch": 4.96, "learning_rate": 1.8630751964085298e-05, "loss": 4.0193, "step": 734000 }, { "epoch": 4.97, "learning_rate": 1.840628507295174e-05, "loss": 4.0719, "step": 736000 }, { "epoch": 4.99, "learning_rate": 1.8181818181818182e-05, "loss": 3.9453, "step": 738000 }, { "epoch": 5.0, "learning_rate": 1.7957351290684622e-05, "loss": 3.6204, "step": 740000 }, { "epoch": 5.01, "learning_rate": 1.7732884399551066e-05, "loss": 3.6973, "step": 742000 }, { "epoch": 5.03, "learning_rate": 1.750841750841751e-05, "loss": 3.7598, "step": 744000 }, { "epoch": 5.04, "learning_rate": 1.728395061728395e-05, "loss": 3.7362, "step": 746000 }, { "epoch": 5.05, "learning_rate": 1.7059483726150394e-05, "loss": 3.7313, "step": 748000 }, { "epoch": 5.07, "learning_rate": 1.6835016835016837e-05, "loss": 3.7332, "step": 750000 }, { "epoch": 5.08, "learning_rate": 1.6610549943883278e-05, "loss": 3.722, "step": 752000 }, { "epoch": 5.09, "learning_rate": 1.638608305274972e-05, "loss": 3.713, "step": 754000 }, { "epoch": 5.11, "learning_rate": 1.6161616161616165e-05, "loss": 3.72, "step": 756000 }, { "epoch": 5.12, "learning_rate": 1.5937149270482605e-05, "loss": 3.7103, "step": 758000 }, { "epoch": 5.13, "learning_rate": 1.571268237934905e-05, "loss": 3.7315, "step": 760000 }, { "epoch": 5.15, "learning_rate": 1.548821548821549e-05, "loss": 3.7024, "step": 762000 }, { "epoch": 5.16, "learning_rate": 1.5263748597081933e-05, "loss": 3.7156, "step": 764000 }, { "epoch": 5.17, "learning_rate": 1.5039281705948375e-05, "loss": 3.7549, "step": 766000 }, { "epoch": 5.19, "learning_rate": 1.4814814814814815e-05, "loss": 3.6887, "step": 768000 }, { "epoch": 5.2, "learning_rate": 1.4590347923681259e-05, "loss": 3.7033, "step": 770000 }, { "epoch": 5.21, "learning_rate": 1.43658810325477e-05, "loss": 3.6917, "step": 772000 }, { "epoch": 5.23, "learning_rate": 1.4141414141414141e-05, "loss": 3.6936, "step": 774000 }, { "epoch": 5.24, "learning_rate": 1.3916947250280585e-05, "loss": 3.7019, "step": 776000 }, { "epoch": 5.26, "learning_rate": 1.3692480359147025e-05, "loss": 3.7172, "step": 778000 }, { "epoch": 5.27, "learning_rate": 1.3468013468013468e-05, "loss": 3.6812, "step": 780000 }, { "epoch": 5.28, "learning_rate": 1.3243546576879912e-05, "loss": 3.6841, "step": 782000 }, { "epoch": 5.3, "learning_rate": 1.3019079685746352e-05, "loss": 3.6837, "step": 784000 }, { "epoch": 5.31, "learning_rate": 1.2794612794612796e-05, "loss": 3.7225, "step": 786000 }, { "epoch": 5.32, "learning_rate": 1.2570145903479238e-05, "loss": 3.6897, "step": 788000 }, { "epoch": 5.34, "learning_rate": 1.2345679012345678e-05, "loss": 3.6941, "step": 790000 }, { "epoch": 5.35, "learning_rate": 1.2121212121212122e-05, "loss": 3.7125, "step": 792000 }, { "epoch": 5.36, "learning_rate": 1.1896745230078564e-05, "loss": 3.688, "step": 794000 }, { "epoch": 5.38, "learning_rate": 1.1672278338945006e-05, "loss": 3.7184, "step": 796000 }, { "epoch": 5.39, "learning_rate": 1.144781144781145e-05, "loss": 3.6901, "step": 798000 }, { "epoch": 5.4, "learning_rate": 1.1223344556677892e-05, "loss": 3.6886, "step": 800000 }, { "epoch": 5.42, "learning_rate": 1.0998877665544333e-05, "loss": 3.7129, "step": 802000 }, { "epoch": 5.43, "learning_rate": 1.0774410774410774e-05, "loss": 3.6953, "step": 804000 }, { "epoch": 5.44, "learning_rate": 1.0549943883277217e-05, "loss": 3.7016, "step": 806000 }, { "epoch": 5.46, "learning_rate": 1.032547699214366e-05, "loss": 3.69, "step": 808000 }, { "epoch": 5.47, "learning_rate": 1.0101010101010101e-05, "loss": 3.6834, "step": 810000 }, { "epoch": 5.48, "learning_rate": 9.876543209876543e-06, "loss": 3.6809, "step": 812000 }, { "epoch": 5.5, "learning_rate": 9.652076318742987e-06, "loss": 3.6861, "step": 814000 }, { "epoch": 5.51, "learning_rate": 9.427609427609429e-06, "loss": 3.6977, "step": 816000 }, { "epoch": 5.53, "learning_rate": 9.20314253647587e-06, "loss": 3.7002, "step": 818000 }, { "epoch": 5.54, "learning_rate": 8.978675645342311e-06, "loss": 3.7243, "step": 820000 }, { "epoch": 5.55, "learning_rate": 8.754208754208755e-06, "loss": 3.6768, "step": 822000 }, { "epoch": 5.57, "learning_rate": 8.529741863075197e-06, "loss": 3.6947, "step": 824000 }, { "epoch": 5.58, "learning_rate": 8.305274971941639e-06, "loss": 3.8538, "step": 826000 }, { "epoch": 5.59, "learning_rate": 8.080808080808082e-06, "loss": 4.0943, "step": 828000 }, { "epoch": 5.61, "learning_rate": 7.856341189674524e-06, "loss": 3.7048, "step": 830000 }, { "epoch": 5.62, "learning_rate": 7.631874298540966e-06, "loss": 3.4471, "step": 832000 }, { "epoch": 5.63, "learning_rate": 7.4074074074074075e-06, "loss": 3.4131, "step": 834000 }, { "epoch": 5.65, "learning_rate": 7.18294051627385e-06, "loss": 3.3965, "step": 836000 }, { "epoch": 5.66, "learning_rate": 6.958473625140292e-06, "loss": 3.9014, "step": 838000 }, { "epoch": 5.67, "learning_rate": 6.734006734006734e-06, "loss": 3.6888, "step": 840000 }, { "epoch": 5.69, "learning_rate": 6.509539842873176e-06, "loss": 3.6862, "step": 842000 }, { "epoch": 5.7, "learning_rate": 6.285072951739619e-06, "loss": 3.8646, "step": 844000 }, { "epoch": 5.71, "learning_rate": 6.060606060606061e-06, "loss": 3.8818, "step": 846000 }, { "epoch": 5.73, "learning_rate": 5.836139169472503e-06, "loss": 3.8666, "step": 848000 }, { "epoch": 5.74, "learning_rate": 5.611672278338946e-06, "loss": 3.8303, "step": 850000 }, { "epoch": 5.76, "learning_rate": 5.387205387205387e-06, "loss": 3.8764, "step": 852000 }, { "epoch": 5.77, "learning_rate": 5.16273849607183e-06, "loss": 3.9326, "step": 854000 }, { "epoch": 5.78, "learning_rate": 4.938271604938272e-06, "loss": 4.0327, "step": 856000 }, { "epoch": 5.8, "learning_rate": 4.7138047138047145e-06, "loss": 4.2255, "step": 858000 }, { "epoch": 5.81, "learning_rate": 4.489337822671156e-06, "loss": 4.4367, "step": 860000 }, { "epoch": 5.82, "learning_rate": 4.264870931537598e-06, "loss": 4.4619, "step": 862000 }, { "epoch": 5.84, "learning_rate": 4.040404040404041e-06, "loss": 3.8376, "step": 864000 }, { "epoch": 5.85, "learning_rate": 3.815937149270483e-06, "loss": 3.9389, "step": 866000 }, { "epoch": 5.86, "learning_rate": 3.591470258136925e-06, "loss": 4.051, "step": 868000 }, { "epoch": 5.88, "learning_rate": 3.367003367003367e-06, "loss": 3.9391, "step": 870000 }, { "epoch": 5.89, "learning_rate": 3.1425364758698095e-06, "loss": 3.969, "step": 872000 }, { "epoch": 5.9, "learning_rate": 2.9180695847362515e-06, "loss": 3.9726, "step": 874000 }, { "epoch": 5.92, "learning_rate": 2.6936026936026934e-06, "loss": 4.081, "step": 876000 }, { "epoch": 5.93, "learning_rate": 2.469135802469136e-06, "loss": 4.0299, "step": 878000 }, { "epoch": 5.94, "learning_rate": 2.244668911335578e-06, "loss": 4.0083, "step": 880000 }, { "epoch": 5.96, "learning_rate": 2.0202020202020206e-06, "loss": 4.0233, "step": 882000 }, { "epoch": 5.97, "learning_rate": 1.7957351290684626e-06, "loss": 4.0916, "step": 884000 }, { "epoch": 5.98, "learning_rate": 1.5712682379349048e-06, "loss": 3.9541, "step": 886000 }, { "epoch": 6.0, "learning_rate": 1.3468013468013467e-06, "loss": 3.6427, "step": 888000 }, { "epoch": 6.01, "learning_rate": 1.122334455667789e-06, "loss": 3.6296, "step": 890000 }, { "epoch": 6.03, "learning_rate": 8.978675645342313e-07, "loss": 3.7085, "step": 892000 }, { "epoch": 6.04, "learning_rate": 6.734006734006734e-07, "loss": 3.6918, "step": 894000 }, { "epoch": 6.05, "learning_rate": 4.4893378226711564e-07, "loss": 3.6907, "step": 896000 }, { "epoch": 6.07, "learning_rate": 2.2446689113355782e-07, "loss": 3.6985, "step": 898000 }, { "epoch": 6.08, "learning_rate": 0.0, "loss": 3.6945, "step": 900000 } ], "max_steps": 900000, "num_train_epochs": 7, "total_flos": 1.894127697690476e+18, "trial_name": null, "trial_params": null }