marian-german-grammar / trainer_state.json
flozi00's picture
mire training
a180bb1
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.054384640693646,
"global_step": 519500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.997580754417543e-05,
"loss": 0.1399,
"step": 500
},
{
"epoch": 0.02,
"learning_rate": 4.995161508835085e-05,
"loss": 0.1465,
"step": 1000
},
{
"epoch": 0.03,
"learning_rate": 4.992742263252628e-05,
"loss": 0.1547,
"step": 1500
},
{
"epoch": 0.04,
"learning_rate": 4.99032301767017e-05,
"loss": 0.155,
"step": 2000
},
{
"epoch": 0.05,
"learning_rate": 4.987903772087712e-05,
"loss": 0.1567,
"step": 2500
},
{
"epoch": 0.06,
"learning_rate": 4.9854845265052546e-05,
"loss": 0.1567,
"step": 3000
},
{
"epoch": 0.07,
"learning_rate": 4.9830652809227975e-05,
"loss": 0.1626,
"step": 3500
},
{
"epoch": 0.08,
"learning_rate": 4.980646035340339e-05,
"loss": 0.1574,
"step": 4000
},
{
"epoch": 0.09,
"learning_rate": 4.978226789757882e-05,
"loss": 0.1575,
"step": 4500
},
{
"epoch": 0.1,
"learning_rate": 4.975807544175425e-05,
"loss": 0.1577,
"step": 5000
},
{
"epoch": 0.11,
"learning_rate": 4.9733882985929667e-05,
"loss": 0.1628,
"step": 5500
},
{
"epoch": 0.12,
"learning_rate": 4.9709690530105096e-05,
"loss": 0.1668,
"step": 6000
},
{
"epoch": 0.13,
"learning_rate": 4.968549807428052e-05,
"loss": 0.1673,
"step": 6500
},
{
"epoch": 0.14,
"learning_rate": 4.966130561845594e-05,
"loss": 0.1675,
"step": 7000
},
{
"epoch": 0.15,
"learning_rate": 4.9637113162631364e-05,
"loss": 0.1664,
"step": 7500
},
{
"epoch": 0.15,
"learning_rate": 4.9612920706806794e-05,
"loss": 0.1645,
"step": 8000
},
{
"epoch": 0.16,
"learning_rate": 4.958872825098222e-05,
"loss": 0.1645,
"step": 8500
},
{
"epoch": 0.17,
"learning_rate": 4.956453579515764e-05,
"loss": 0.1635,
"step": 9000
},
{
"epoch": 0.18,
"learning_rate": 4.954034333933307e-05,
"loss": 0.1711,
"step": 9500
},
{
"epoch": 0.19,
"learning_rate": 4.951615088350849e-05,
"loss": 0.1686,
"step": 10000
},
{
"epoch": 0.2,
"learning_rate": 4.9491958427683915e-05,
"loss": 0.1659,
"step": 10500
},
{
"epoch": 0.21,
"learning_rate": 4.946776597185934e-05,
"loss": 0.1645,
"step": 11000
},
{
"epoch": 0.22,
"learning_rate": 4.944357351603476e-05,
"loss": 0.1689,
"step": 11500
},
{
"epoch": 0.23,
"learning_rate": 4.941938106021018e-05,
"loss": 0.1704,
"step": 12000
},
{
"epoch": 0.24,
"learning_rate": 4.939518860438561e-05,
"loss": 0.1669,
"step": 12500
},
{
"epoch": 0.25,
"learning_rate": 4.9370996148561035e-05,
"loss": 0.1655,
"step": 13000
},
{
"epoch": 0.26,
"learning_rate": 4.934680369273646e-05,
"loss": 0.1716,
"step": 13500
},
{
"epoch": 0.27,
"learning_rate": 4.932261123691189e-05,
"loss": 0.1715,
"step": 14000
},
{
"epoch": 0.28,
"learning_rate": 4.929841878108731e-05,
"loss": 0.1666,
"step": 14500
},
{
"epoch": 0.29,
"learning_rate": 4.927422632526273e-05,
"loss": 0.1694,
"step": 15000
},
{
"epoch": 0.3,
"learning_rate": 4.9250033869438156e-05,
"loss": 0.1847,
"step": 15500
},
{
"epoch": 0.31,
"learning_rate": 4.9225841413613585e-05,
"loss": 0.1701,
"step": 16000
},
{
"epoch": 0.32,
"learning_rate": 4.9201648957789e-05,
"loss": 0.1718,
"step": 16500
},
{
"epoch": 0.33,
"learning_rate": 4.917745650196443e-05,
"loss": 0.1669,
"step": 17000
},
{
"epoch": 0.34,
"learning_rate": 4.9153264046139854e-05,
"loss": 0.1769,
"step": 17500
},
{
"epoch": 0.35,
"learning_rate": 4.9129071590315277e-05,
"loss": 0.1697,
"step": 18000
},
{
"epoch": 0.36,
"learning_rate": 4.9104879134490706e-05,
"loss": 0.1671,
"step": 18500
},
{
"epoch": 0.37,
"learning_rate": 4.908068667866613e-05,
"loss": 0.1701,
"step": 19000
},
{
"epoch": 0.38,
"learning_rate": 4.905649422284155e-05,
"loss": 0.1733,
"step": 19500
},
{
"epoch": 0.39,
"learning_rate": 4.9032301767016974e-05,
"loss": 0.1682,
"step": 20000
},
{
"epoch": 0.4,
"learning_rate": 4.9008109311192404e-05,
"loss": 0.1811,
"step": 20500
},
{
"epoch": 0.41,
"learning_rate": 4.898391685536782e-05,
"loss": 0.173,
"step": 21000
},
{
"epoch": 0.42,
"learning_rate": 4.895972439954325e-05,
"loss": 0.1722,
"step": 21500
},
{
"epoch": 0.43,
"learning_rate": 4.893553194371867e-05,
"loss": 0.1764,
"step": 22000
},
{
"epoch": 0.44,
"learning_rate": 4.8911339487894095e-05,
"loss": 0.1714,
"step": 22500
},
{
"epoch": 0.45,
"learning_rate": 4.888714703206952e-05,
"loss": 0.1779,
"step": 23000
},
{
"epoch": 0.45,
"learning_rate": 4.886295457624495e-05,
"loss": 0.1721,
"step": 23500
},
{
"epoch": 0.46,
"learning_rate": 4.883876212042037e-05,
"loss": 0.1804,
"step": 24000
},
{
"epoch": 0.47,
"learning_rate": 4.881456966459579e-05,
"loss": 0.1723,
"step": 24500
},
{
"epoch": 0.48,
"learning_rate": 4.879037720877122e-05,
"loss": 0.1746,
"step": 25000
},
{
"epoch": 0.49,
"learning_rate": 4.8766184752946645e-05,
"loss": 0.1821,
"step": 25500
},
{
"epoch": 0.5,
"learning_rate": 4.874199229712207e-05,
"loss": 0.1685,
"step": 26000
},
{
"epoch": 0.51,
"learning_rate": 4.871779984129749e-05,
"loss": 0.1711,
"step": 26500
},
{
"epoch": 0.52,
"learning_rate": 4.8693607385472914e-05,
"loss": 0.1775,
"step": 27000
},
{
"epoch": 0.53,
"learning_rate": 4.8669414929648336e-05,
"loss": 0.1761,
"step": 27500
},
{
"epoch": 0.54,
"learning_rate": 4.8645222473823766e-05,
"loss": 0.1781,
"step": 28000
},
{
"epoch": 0.55,
"learning_rate": 4.862103001799919e-05,
"loss": 0.1743,
"step": 28500
},
{
"epoch": 0.56,
"learning_rate": 4.859683756217461e-05,
"loss": 0.1774,
"step": 29000
},
{
"epoch": 0.57,
"learning_rate": 4.857264510635004e-05,
"loss": 0.1718,
"step": 29500
},
{
"epoch": 0.58,
"learning_rate": 4.8548452650525464e-05,
"loss": 0.1719,
"step": 30000
},
{
"epoch": 0.59,
"learning_rate": 4.852426019470089e-05,
"loss": 0.1714,
"step": 30500
},
{
"epoch": 0.6,
"learning_rate": 4.850006773887631e-05,
"loss": 0.1789,
"step": 31000
},
{
"epoch": 0.61,
"learning_rate": 4.847587528305174e-05,
"loss": 0.1821,
"step": 31500
},
{
"epoch": 0.62,
"learning_rate": 4.8451682827227155e-05,
"loss": 0.1829,
"step": 32000
},
{
"epoch": 0.63,
"learning_rate": 4.8427490371402585e-05,
"loss": 0.1741,
"step": 32500
},
{
"epoch": 0.64,
"learning_rate": 4.8403297915578014e-05,
"loss": 0.1795,
"step": 33000
},
{
"epoch": 0.65,
"learning_rate": 4.837910545975343e-05,
"loss": 0.1756,
"step": 33500
},
{
"epoch": 0.66,
"learning_rate": 4.835491300392886e-05,
"loss": 0.1811,
"step": 34000
},
{
"epoch": 0.67,
"learning_rate": 4.833072054810428e-05,
"loss": 0.1798,
"step": 34500
},
{
"epoch": 0.68,
"learning_rate": 4.8306528092279705e-05,
"loss": 0.1779,
"step": 35000
},
{
"epoch": 0.69,
"learning_rate": 4.828233563645513e-05,
"loss": 0.1859,
"step": 35500
},
{
"epoch": 0.7,
"learning_rate": 4.825814318063056e-05,
"loss": 0.1852,
"step": 36000
},
{
"epoch": 0.71,
"learning_rate": 4.8233950724805974e-05,
"loss": 0.1818,
"step": 36500
},
{
"epoch": 0.72,
"learning_rate": 4.82097582689814e-05,
"loss": 0.1872,
"step": 37000
},
{
"epoch": 0.73,
"learning_rate": 4.818556581315683e-05,
"loss": 0.1843,
"step": 37500
},
{
"epoch": 0.74,
"learning_rate": 4.816137335733225e-05,
"loss": 0.1868,
"step": 38000
},
{
"epoch": 0.75,
"learning_rate": 4.813718090150768e-05,
"loss": 0.1818,
"step": 38500
},
{
"epoch": 0.75,
"learning_rate": 4.81129884456831e-05,
"loss": 0.1811,
"step": 39000
},
{
"epoch": 0.76,
"learning_rate": 4.8088795989858524e-05,
"loss": 0.1827,
"step": 39500
},
{
"epoch": 0.77,
"learning_rate": 4.8064603534033947e-05,
"loss": 0.1753,
"step": 40000
},
{
"epoch": 0.78,
"learning_rate": 4.8040411078209376e-05,
"loss": 0.1898,
"step": 40500
},
{
"epoch": 0.79,
"learning_rate": 4.801621862238479e-05,
"loss": 0.1838,
"step": 41000
},
{
"epoch": 0.8,
"learning_rate": 4.799202616656022e-05,
"loss": 0.1839,
"step": 41500
},
{
"epoch": 0.81,
"learning_rate": 4.796783371073565e-05,
"loss": 0.1877,
"step": 42000
},
{
"epoch": 0.82,
"learning_rate": 4.794364125491107e-05,
"loss": 0.179,
"step": 42500
},
{
"epoch": 0.83,
"learning_rate": 4.79194487990865e-05,
"loss": 0.1913,
"step": 43000
},
{
"epoch": 0.84,
"learning_rate": 4.789525634326192e-05,
"loss": 0.1826,
"step": 43500
},
{
"epoch": 0.85,
"learning_rate": 4.787106388743734e-05,
"loss": 0.1794,
"step": 44000
},
{
"epoch": 0.86,
"learning_rate": 4.7846871431612765e-05,
"loss": 0.1823,
"step": 44500
},
{
"epoch": 0.87,
"learning_rate": 4.7822678975788195e-05,
"loss": 0.1836,
"step": 45000
},
{
"epoch": 0.88,
"learning_rate": 4.779848651996362e-05,
"loss": 0.1889,
"step": 45500
},
{
"epoch": 0.89,
"learning_rate": 4.777429406413904e-05,
"loss": 0.1899,
"step": 46000
},
{
"epoch": 0.9,
"learning_rate": 4.775010160831447e-05,
"loss": 0.1842,
"step": 46500
},
{
"epoch": 0.91,
"learning_rate": 4.772590915248989e-05,
"loss": 0.1926,
"step": 47000
},
{
"epoch": 0.92,
"learning_rate": 4.7701716696665315e-05,
"loss": 0.1912,
"step": 47500
},
{
"epoch": 0.93,
"learning_rate": 4.767752424084074e-05,
"loss": 0.1884,
"step": 48000
},
{
"epoch": 0.94,
"learning_rate": 4.765333178501616e-05,
"loss": 0.1908,
"step": 48500
},
{
"epoch": 0.95,
"learning_rate": 4.7629139329191584e-05,
"loss": 0.1881,
"step": 49000
},
{
"epoch": 0.96,
"learning_rate": 4.760494687336701e-05,
"loss": 0.1895,
"step": 49500
},
{
"epoch": 0.97,
"learning_rate": 4.7580754417542436e-05,
"loss": 0.1925,
"step": 50000
},
{
"epoch": 0.98,
"learning_rate": 4.755656196171786e-05,
"loss": 0.1952,
"step": 50500
},
{
"epoch": 0.99,
"learning_rate": 4.753236950589329e-05,
"loss": 0.1878,
"step": 51000
},
{
"epoch": 1.0,
"learning_rate": 4.750817705006871e-05,
"loss": 0.1948,
"step": 51500
},
{
"epoch": 1.01,
"learning_rate": 4.7483984594244134e-05,
"loss": 0.1723,
"step": 52000
},
{
"epoch": 1.02,
"learning_rate": 4.745979213841956e-05,
"loss": 0.1655,
"step": 52500
},
{
"epoch": 1.03,
"learning_rate": 4.7435599682594986e-05,
"loss": 0.169,
"step": 53000
},
{
"epoch": 1.04,
"learning_rate": 4.74114072267704e-05,
"loss": 0.1697,
"step": 53500
},
{
"epoch": 1.05,
"learning_rate": 4.738721477094583e-05,
"loss": 0.1663,
"step": 54000
},
{
"epoch": 1.05,
"learning_rate": 4.7363022315121255e-05,
"loss": 0.1676,
"step": 54500
},
{
"epoch": 1.06,
"learning_rate": 4.733882985929668e-05,
"loss": 0.1648,
"step": 55000
},
{
"epoch": 1.07,
"learning_rate": 4.731463740347211e-05,
"loss": 0.1709,
"step": 55500
},
{
"epoch": 1.08,
"learning_rate": 4.729044494764753e-05,
"loss": 0.166,
"step": 56000
},
{
"epoch": 1.09,
"learning_rate": 4.726625249182295e-05,
"loss": 0.1639,
"step": 56500
},
{
"epoch": 1.1,
"learning_rate": 4.7242060035998375e-05,
"loss": 0.1695,
"step": 57000
},
{
"epoch": 1.11,
"learning_rate": 4.7217867580173805e-05,
"loss": 0.1645,
"step": 57500
},
{
"epoch": 1.12,
"learning_rate": 4.719367512434922e-05,
"loss": 0.1642,
"step": 58000
},
{
"epoch": 1.13,
"learning_rate": 4.716948266852465e-05,
"loss": 0.1725,
"step": 58500
},
{
"epoch": 1.14,
"learning_rate": 4.714529021270007e-05,
"loss": 0.1694,
"step": 59000
},
{
"epoch": 1.15,
"learning_rate": 4.7121097756875496e-05,
"loss": 0.1683,
"step": 59500
},
{
"epoch": 1.16,
"learning_rate": 4.709690530105092e-05,
"loss": 0.1662,
"step": 60000
},
{
"epoch": 1.17,
"learning_rate": 4.707271284522635e-05,
"loss": 0.1681,
"step": 60500
},
{
"epoch": 1.18,
"learning_rate": 4.704852038940177e-05,
"loss": 0.17,
"step": 61000
},
{
"epoch": 1.19,
"learning_rate": 4.7024327933577194e-05,
"loss": 0.1724,
"step": 61500
},
{
"epoch": 1.2,
"learning_rate": 4.700013547775262e-05,
"loss": 0.1732,
"step": 62000
},
{
"epoch": 1.21,
"learning_rate": 4.6975943021928046e-05,
"loss": 0.1721,
"step": 62500
},
{
"epoch": 1.22,
"learning_rate": 4.695175056610347e-05,
"loss": 0.174,
"step": 63000
},
{
"epoch": 1.23,
"learning_rate": 4.692755811027889e-05,
"loss": 0.1742,
"step": 63500
},
{
"epoch": 1.24,
"learning_rate": 4.6903365654454314e-05,
"loss": 0.1776,
"step": 64000
},
{
"epoch": 1.25,
"learning_rate": 4.687917319862974e-05,
"loss": 0.1715,
"step": 64500
},
{
"epoch": 1.26,
"learning_rate": 4.685498074280517e-05,
"loss": 0.1759,
"step": 65000
},
{
"epoch": 1.27,
"learning_rate": 4.683078828698059e-05,
"loss": 0.1774,
"step": 65500
},
{
"epoch": 1.28,
"learning_rate": 4.680659583115601e-05,
"loss": 0.1761,
"step": 66000
},
{
"epoch": 1.29,
"learning_rate": 4.678240337533144e-05,
"loss": 0.1721,
"step": 66500
},
{
"epoch": 1.3,
"learning_rate": 4.6758210919506865e-05,
"loss": 0.1768,
"step": 67000
},
{
"epoch": 1.31,
"learning_rate": 4.673401846368229e-05,
"loss": 0.1721,
"step": 67500
},
{
"epoch": 1.32,
"learning_rate": 4.670982600785771e-05,
"loss": 0.1757,
"step": 68000
},
{
"epoch": 1.33,
"learning_rate": 4.668563355203314e-05,
"loss": 0.1699,
"step": 68500
},
{
"epoch": 1.34,
"learning_rate": 4.6661441096208556e-05,
"loss": 0.1673,
"step": 69000
},
{
"epoch": 1.35,
"learning_rate": 4.6637248640383985e-05,
"loss": 0.1781,
"step": 69500
},
{
"epoch": 1.35,
"learning_rate": 4.6613056184559415e-05,
"loss": 0.1748,
"step": 70000
},
{
"epoch": 1.36,
"learning_rate": 4.658886372873483e-05,
"loss": 0.1774,
"step": 70500
},
{
"epoch": 1.37,
"learning_rate": 4.656467127291026e-05,
"loss": 0.1726,
"step": 71000
},
{
"epoch": 1.38,
"learning_rate": 4.654047881708568e-05,
"loss": 0.1737,
"step": 71500
},
{
"epoch": 1.39,
"learning_rate": 4.6516286361261106e-05,
"loss": 0.1727,
"step": 72000
},
{
"epoch": 1.4,
"learning_rate": 4.649209390543653e-05,
"loss": 0.1713,
"step": 72500
},
{
"epoch": 1.41,
"learning_rate": 4.646790144961196e-05,
"loss": 0.1747,
"step": 73000
},
{
"epoch": 1.42,
"learning_rate": 4.6443708993787374e-05,
"loss": 0.1819,
"step": 73500
},
{
"epoch": 1.43,
"learning_rate": 4.6419516537962804e-05,
"loss": 0.173,
"step": 74000
},
{
"epoch": 1.44,
"learning_rate": 4.6395324082138233e-05,
"loss": 0.1705,
"step": 74500
},
{
"epoch": 1.45,
"learning_rate": 4.637113162631365e-05,
"loss": 0.1854,
"step": 75000
},
{
"epoch": 1.46,
"learning_rate": 4.634693917048908e-05,
"loss": 0.1703,
"step": 75500
},
{
"epoch": 1.47,
"learning_rate": 4.63227467146645e-05,
"loss": 0.177,
"step": 76000
},
{
"epoch": 1.48,
"learning_rate": 4.6298554258839925e-05,
"loss": 0.1746,
"step": 76500
},
{
"epoch": 1.49,
"learning_rate": 4.627436180301535e-05,
"loss": 0.1756,
"step": 77000
},
{
"epoch": 1.5,
"learning_rate": 4.625016934719078e-05,
"loss": 0.1747,
"step": 77500
},
{
"epoch": 1.51,
"learning_rate": 4.622597689136619e-05,
"loss": 0.1743,
"step": 78000
},
{
"epoch": 1.52,
"learning_rate": 4.620178443554162e-05,
"loss": 0.1801,
"step": 78500
},
{
"epoch": 1.53,
"learning_rate": 4.617759197971705e-05,
"loss": 0.1744,
"step": 79000
},
{
"epoch": 1.54,
"learning_rate": 4.615339952389247e-05,
"loss": 0.1702,
"step": 79500
},
{
"epoch": 1.55,
"learning_rate": 4.61292070680679e-05,
"loss": 0.1793,
"step": 80000
},
{
"epoch": 1.56,
"learning_rate": 4.610501461224332e-05,
"loss": 0.1755,
"step": 80500
},
{
"epoch": 1.57,
"learning_rate": 4.608082215641874e-05,
"loss": 0.1706,
"step": 81000
},
{
"epoch": 1.58,
"learning_rate": 4.6056629700594166e-05,
"loss": 0.182,
"step": 81500
},
{
"epoch": 1.59,
"learning_rate": 4.6032437244769595e-05,
"loss": 0.1772,
"step": 82000
},
{
"epoch": 1.6,
"learning_rate": 4.600824478894502e-05,
"loss": 0.1784,
"step": 82500
},
{
"epoch": 1.61,
"learning_rate": 4.598405233312044e-05,
"loss": 0.1723,
"step": 83000
},
{
"epoch": 1.62,
"learning_rate": 4.595985987729587e-05,
"loss": 0.1732,
"step": 83500
},
{
"epoch": 1.63,
"learning_rate": 4.593566742147129e-05,
"loss": 0.1788,
"step": 84000
},
{
"epoch": 1.64,
"learning_rate": 4.5911474965646716e-05,
"loss": 0.1763,
"step": 84500
},
{
"epoch": 1.65,
"learning_rate": 4.588728250982214e-05,
"loss": 0.1753,
"step": 85000
},
{
"epoch": 1.65,
"learning_rate": 4.586309005399756e-05,
"loss": 0.1745,
"step": 85500
},
{
"epoch": 1.66,
"learning_rate": 4.5838897598172984e-05,
"loss": 0.1769,
"step": 86000
},
{
"epoch": 1.67,
"learning_rate": 4.5814705142348414e-05,
"loss": 0.1824,
"step": 86500
},
{
"epoch": 1.68,
"learning_rate": 4.579051268652384e-05,
"loss": 0.1823,
"step": 87000
},
{
"epoch": 1.69,
"learning_rate": 4.576632023069926e-05,
"loss": 0.1783,
"step": 87500
},
{
"epoch": 1.7,
"learning_rate": 4.574212777487469e-05,
"loss": 0.1816,
"step": 88000
},
{
"epoch": 1.71,
"learning_rate": 4.571793531905011e-05,
"loss": 0.1793,
"step": 88500
},
{
"epoch": 1.72,
"learning_rate": 4.5693742863225535e-05,
"loss": 0.17,
"step": 89000
},
{
"epoch": 1.73,
"learning_rate": 4.566955040740096e-05,
"loss": 0.1739,
"step": 89500
},
{
"epoch": 1.74,
"learning_rate": 4.564535795157639e-05,
"loss": 0.1804,
"step": 90000
},
{
"epoch": 1.75,
"learning_rate": 4.56211654957518e-05,
"loss": 0.1741,
"step": 90500
},
{
"epoch": 1.76,
"learning_rate": 4.559697303992723e-05,
"loss": 0.1761,
"step": 91000
},
{
"epoch": 1.77,
"learning_rate": 4.5572780584102655e-05,
"loss": 0.1753,
"step": 91500
},
{
"epoch": 1.78,
"learning_rate": 4.554858812827808e-05,
"loss": 0.1808,
"step": 92000
},
{
"epoch": 1.79,
"learning_rate": 4.552439567245351e-05,
"loss": 0.177,
"step": 92500
},
{
"epoch": 1.8,
"learning_rate": 4.550020321662893e-05,
"loss": 0.1824,
"step": 93000
},
{
"epoch": 1.81,
"learning_rate": 4.547601076080435e-05,
"loss": 0.1763,
"step": 93500
},
{
"epoch": 1.82,
"learning_rate": 4.5451818304979776e-05,
"loss": 0.1767,
"step": 94000
},
{
"epoch": 1.83,
"learning_rate": 4.5427625849155206e-05,
"loss": 0.1786,
"step": 94500
},
{
"epoch": 1.84,
"learning_rate": 4.540343339333062e-05,
"loss": 0.1788,
"step": 95000
},
{
"epoch": 1.85,
"learning_rate": 4.537924093750605e-05,
"loss": 0.1771,
"step": 95500
},
{
"epoch": 1.86,
"learning_rate": 4.5355048481681474e-05,
"loss": 0.1879,
"step": 96000
},
{
"epoch": 1.87,
"learning_rate": 4.53308560258569e-05,
"loss": 0.1821,
"step": 96500
},
{
"epoch": 1.88,
"learning_rate": 4.530666357003232e-05,
"loss": 0.1807,
"step": 97000
},
{
"epoch": 1.89,
"learning_rate": 4.528247111420775e-05,
"loss": 0.1806,
"step": 97500
},
{
"epoch": 1.9,
"learning_rate": 4.525827865838317e-05,
"loss": 0.1757,
"step": 98000
},
{
"epoch": 1.91,
"learning_rate": 4.5234086202558595e-05,
"loss": 0.1806,
"step": 98500
},
{
"epoch": 1.92,
"learning_rate": 4.5209893746734024e-05,
"loss": 0.1783,
"step": 99000
},
{
"epoch": 1.93,
"learning_rate": 4.518570129090945e-05,
"loss": 0.1872,
"step": 99500
},
{
"epoch": 1.94,
"learning_rate": 4.516150883508487e-05,
"loss": 0.1819,
"step": 100000
},
{
"epoch": 1.95,
"learning_rate": 4.513731637926029e-05,
"loss": 0.1843,
"step": 100500
},
{
"epoch": 1.95,
"learning_rate": 4.5113123923435715e-05,
"loss": 0.1806,
"step": 101000
},
{
"epoch": 1.96,
"learning_rate": 4.508893146761114e-05,
"loss": 0.18,
"step": 101500
},
{
"epoch": 1.97,
"learning_rate": 4.506473901178657e-05,
"loss": 0.1853,
"step": 102000
},
{
"epoch": 1.98,
"learning_rate": 4.504054655596199e-05,
"loss": 0.1769,
"step": 102500
},
{
"epoch": 1.99,
"learning_rate": 4.501635410013741e-05,
"loss": 0.1763,
"step": 103000
},
{
"epoch": 2.0,
"learning_rate": 4.499216164431284e-05,
"loss": 0.1759,
"step": 103500
},
{
"epoch": 2.01,
"learning_rate": 4.4967969188488265e-05,
"loss": 0.1615,
"step": 104000
},
{
"epoch": 2.02,
"learning_rate": 4.494377673266369e-05,
"loss": 0.1623,
"step": 104500
},
{
"epoch": 2.03,
"learning_rate": 4.491958427683911e-05,
"loss": 0.1619,
"step": 105000
},
{
"epoch": 2.04,
"learning_rate": 4.489539182101454e-05,
"loss": 0.1607,
"step": 105500
},
{
"epoch": 2.05,
"learning_rate": 4.4871199365189957e-05,
"loss": 0.1621,
"step": 106000
},
{
"epoch": 2.06,
"learning_rate": 4.4847006909365386e-05,
"loss": 0.1633,
"step": 106500
},
{
"epoch": 2.07,
"learning_rate": 4.4822814453540816e-05,
"loss": 0.1585,
"step": 107000
},
{
"epoch": 2.08,
"learning_rate": 4.479862199771623e-05,
"loss": 0.1564,
"step": 107500
},
{
"epoch": 2.09,
"learning_rate": 4.477442954189166e-05,
"loss": 0.1591,
"step": 108000
},
{
"epoch": 2.1,
"learning_rate": 4.4750237086067084e-05,
"loss": 0.1605,
"step": 108500
},
{
"epoch": 2.11,
"learning_rate": 4.472604463024251e-05,
"loss": 0.158,
"step": 109000
},
{
"epoch": 2.12,
"learning_rate": 4.470185217441793e-05,
"loss": 0.1594,
"step": 109500
},
{
"epoch": 2.13,
"learning_rate": 4.467765971859336e-05,
"loss": 0.1655,
"step": 110000
},
{
"epoch": 2.14,
"learning_rate": 4.4653467262768775e-05,
"loss": 0.1577,
"step": 110500
},
{
"epoch": 2.15,
"learning_rate": 4.4629274806944205e-05,
"loss": 0.1607,
"step": 111000
},
{
"epoch": 2.16,
"learning_rate": 4.4605082351119634e-05,
"loss": 0.1591,
"step": 111500
},
{
"epoch": 2.17,
"learning_rate": 4.458088989529505e-05,
"loss": 0.1606,
"step": 112000
},
{
"epoch": 2.18,
"learning_rate": 4.455669743947048e-05,
"loss": 0.1673,
"step": 112500
},
{
"epoch": 2.19,
"learning_rate": 4.45325049836459e-05,
"loss": 0.1679,
"step": 113000
},
{
"epoch": 2.2,
"learning_rate": 4.4508312527821325e-05,
"loss": 0.1615,
"step": 113500
},
{
"epoch": 2.21,
"learning_rate": 4.448412007199675e-05,
"loss": 0.1635,
"step": 114000
},
{
"epoch": 2.22,
"learning_rate": 4.445992761617218e-05,
"loss": 0.1678,
"step": 114500
},
{
"epoch": 2.23,
"learning_rate": 4.4435735160347594e-05,
"loss": 0.1693,
"step": 115000
},
{
"epoch": 2.24,
"learning_rate": 4.441154270452302e-05,
"loss": 0.1673,
"step": 115500
},
{
"epoch": 2.25,
"learning_rate": 4.438735024869845e-05,
"loss": 0.1615,
"step": 116000
},
{
"epoch": 2.25,
"learning_rate": 4.436315779287387e-05,
"loss": 0.165,
"step": 116500
},
{
"epoch": 2.26,
"learning_rate": 4.43389653370493e-05,
"loss": 0.1617,
"step": 117000
},
{
"epoch": 2.27,
"learning_rate": 4.431477288122472e-05,
"loss": 0.1662,
"step": 117500
},
{
"epoch": 2.28,
"learning_rate": 4.4290580425400144e-05,
"loss": 0.1651,
"step": 118000
},
{
"epoch": 2.29,
"learning_rate": 4.426638796957557e-05,
"loss": 0.1674,
"step": 118500
},
{
"epoch": 2.3,
"learning_rate": 4.4242195513750996e-05,
"loss": 0.1644,
"step": 119000
},
{
"epoch": 2.31,
"learning_rate": 4.421800305792642e-05,
"loss": 0.1677,
"step": 119500
},
{
"epoch": 2.32,
"learning_rate": 4.419381060210184e-05,
"loss": 0.1608,
"step": 120000
},
{
"epoch": 2.33,
"learning_rate": 4.416961814627727e-05,
"loss": 0.1644,
"step": 120500
},
{
"epoch": 2.34,
"learning_rate": 4.4145425690452694e-05,
"loss": 0.1705,
"step": 121000
},
{
"epoch": 2.35,
"learning_rate": 4.412123323462812e-05,
"loss": 0.1677,
"step": 121500
},
{
"epoch": 2.36,
"learning_rate": 4.409704077880354e-05,
"loss": 0.1627,
"step": 122000
},
{
"epoch": 2.37,
"learning_rate": 4.407284832297896e-05,
"loss": 0.1669,
"step": 122500
},
{
"epoch": 2.38,
"learning_rate": 4.4048655867154385e-05,
"loss": 0.1687,
"step": 123000
},
{
"epoch": 2.39,
"learning_rate": 4.4024463411329815e-05,
"loss": 0.1665,
"step": 123500
},
{
"epoch": 2.4,
"learning_rate": 4.400027095550524e-05,
"loss": 0.167,
"step": 124000
},
{
"epoch": 2.41,
"learning_rate": 4.397607849968066e-05,
"loss": 0.1592,
"step": 124500
},
{
"epoch": 2.42,
"learning_rate": 4.395188604385609e-05,
"loss": 0.1683,
"step": 125000
},
{
"epoch": 2.43,
"learning_rate": 4.392769358803151e-05,
"loss": 0.1677,
"step": 125500
},
{
"epoch": 2.44,
"learning_rate": 4.3903501132206935e-05,
"loss": 0.1601,
"step": 126000
},
{
"epoch": 2.45,
"learning_rate": 4.387930867638236e-05,
"loss": 0.1695,
"step": 126500
},
{
"epoch": 2.46,
"learning_rate": 4.385511622055779e-05,
"loss": 0.1676,
"step": 127000
},
{
"epoch": 2.47,
"learning_rate": 4.3830923764733204e-05,
"loss": 0.173,
"step": 127500
},
{
"epoch": 2.48,
"learning_rate": 4.380673130890863e-05,
"loss": 0.166,
"step": 128000
},
{
"epoch": 2.49,
"learning_rate": 4.3782538853084056e-05,
"loss": 0.1673,
"step": 128500
},
{
"epoch": 2.5,
"learning_rate": 4.375834639725948e-05,
"loss": 0.1662,
"step": 129000
},
{
"epoch": 2.51,
"learning_rate": 4.373415394143491e-05,
"loss": 0.18,
"step": 129500
},
{
"epoch": 2.52,
"learning_rate": 4.370996148561033e-05,
"loss": 0.1663,
"step": 130000
},
{
"epoch": 2.53,
"learning_rate": 4.3685769029785754e-05,
"loss": 0.1716,
"step": 130500
},
{
"epoch": 2.54,
"learning_rate": 4.366157657396118e-05,
"loss": 0.1653,
"step": 131000
},
{
"epoch": 2.55,
"learning_rate": 4.3637384118136606e-05,
"loss": 0.1742,
"step": 131500
},
{
"epoch": 2.55,
"learning_rate": 4.361319166231202e-05,
"loss": 0.1746,
"step": 132000
},
{
"epoch": 2.56,
"learning_rate": 4.358899920648745e-05,
"loss": 0.165,
"step": 132500
},
{
"epoch": 2.57,
"learning_rate": 4.3564806750662875e-05,
"loss": 0.1677,
"step": 133000
},
{
"epoch": 2.58,
"learning_rate": 4.35406142948383e-05,
"loss": 0.1766,
"step": 133500
},
{
"epoch": 2.59,
"learning_rate": 4.351642183901372e-05,
"loss": 0.1727,
"step": 134000
},
{
"epoch": 2.6,
"learning_rate": 4.349222938318915e-05,
"loss": 0.1733,
"step": 134500
},
{
"epoch": 2.61,
"learning_rate": 4.346803692736457e-05,
"loss": 0.176,
"step": 135000
},
{
"epoch": 2.62,
"learning_rate": 4.3443844471539995e-05,
"loss": 0.1719,
"step": 135500
},
{
"epoch": 2.63,
"learning_rate": 4.3419652015715425e-05,
"loss": 0.1679,
"step": 136000
},
{
"epoch": 2.64,
"learning_rate": 4.339545955989085e-05,
"loss": 0.1727,
"step": 136500
},
{
"epoch": 2.65,
"learning_rate": 4.337126710406627e-05,
"loss": 0.171,
"step": 137000
},
{
"epoch": 2.66,
"learning_rate": 4.334707464824169e-05,
"loss": 0.1736,
"step": 137500
},
{
"epoch": 2.67,
"learning_rate": 4.3322882192417116e-05,
"loss": 0.1716,
"step": 138000
},
{
"epoch": 2.68,
"learning_rate": 4.329868973659254e-05,
"loss": 0.1793,
"step": 138500
},
{
"epoch": 2.69,
"learning_rate": 4.327449728076797e-05,
"loss": 0.1714,
"step": 139000
},
{
"epoch": 2.7,
"learning_rate": 4.325030482494339e-05,
"loss": 0.1735,
"step": 139500
},
{
"epoch": 2.71,
"learning_rate": 4.3226112369118814e-05,
"loss": 0.1777,
"step": 140000
},
{
"epoch": 2.72,
"learning_rate": 4.320191991329424e-05,
"loss": 0.1658,
"step": 140500
},
{
"epoch": 2.73,
"learning_rate": 4.3177727457469666e-05,
"loss": 0.1743,
"step": 141000
},
{
"epoch": 2.74,
"learning_rate": 4.315353500164509e-05,
"loss": 0.1747,
"step": 141500
},
{
"epoch": 2.75,
"learning_rate": 4.312934254582051e-05,
"loss": 0.1657,
"step": 142000
},
{
"epoch": 2.76,
"learning_rate": 4.310515008999594e-05,
"loss": 0.1714,
"step": 142500
},
{
"epoch": 2.77,
"learning_rate": 4.308095763417136e-05,
"loss": 0.1767,
"step": 143000
},
{
"epoch": 2.78,
"learning_rate": 4.305676517834679e-05,
"loss": 0.1735,
"step": 143500
},
{
"epoch": 2.79,
"learning_rate": 4.3032572722522216e-05,
"loss": 0.1724,
"step": 144000
},
{
"epoch": 2.8,
"learning_rate": 4.300838026669763e-05,
"loss": 0.1743,
"step": 144500
},
{
"epoch": 2.81,
"learning_rate": 4.298418781087306e-05,
"loss": 0.1682,
"step": 145000
},
{
"epoch": 2.82,
"learning_rate": 4.2959995355048485e-05,
"loss": 0.1756,
"step": 145500
},
{
"epoch": 2.83,
"learning_rate": 4.293580289922391e-05,
"loss": 0.1694,
"step": 146000
},
{
"epoch": 2.84,
"learning_rate": 4.291161044339933e-05,
"loss": 0.1737,
"step": 146500
},
{
"epoch": 2.85,
"learning_rate": 4.288741798757476e-05,
"loss": 0.1705,
"step": 147000
},
{
"epoch": 2.85,
"learning_rate": 4.2863225531750176e-05,
"loss": 0.1715,
"step": 147500
},
{
"epoch": 2.86,
"learning_rate": 4.2839033075925605e-05,
"loss": 0.1741,
"step": 148000
},
{
"epoch": 2.87,
"learning_rate": 4.2814840620101035e-05,
"loss": 0.1819,
"step": 148500
},
{
"epoch": 2.88,
"learning_rate": 4.279064816427645e-05,
"loss": 0.1733,
"step": 149000
},
{
"epoch": 2.89,
"learning_rate": 4.276645570845188e-05,
"loss": 0.1727,
"step": 149500
},
{
"epoch": 2.9,
"learning_rate": 4.27422632526273e-05,
"loss": 0.1773,
"step": 150000
},
{
"epoch": 2.91,
"learning_rate": 4.2718070796802726e-05,
"loss": 0.1719,
"step": 150500
},
{
"epoch": 2.92,
"learning_rate": 4.269387834097815e-05,
"loss": 0.1752,
"step": 151000
},
{
"epoch": 2.93,
"learning_rate": 4.266968588515358e-05,
"loss": 0.1757,
"step": 151500
},
{
"epoch": 2.94,
"learning_rate": 4.2645493429328994e-05,
"loss": 0.1698,
"step": 152000
},
{
"epoch": 2.95,
"learning_rate": 4.2621300973504424e-05,
"loss": 0.1725,
"step": 152500
},
{
"epoch": 2.96,
"learning_rate": 4.2597108517679853e-05,
"loss": 0.1782,
"step": 153000
},
{
"epoch": 2.97,
"learning_rate": 4.257291606185527e-05,
"loss": 0.1814,
"step": 153500
},
{
"epoch": 2.98,
"learning_rate": 4.25487236060307e-05,
"loss": 0.1724,
"step": 154000
},
{
"epoch": 2.99,
"learning_rate": 4.252453115020612e-05,
"loss": 0.1748,
"step": 154500
},
{
"epoch": 3.0,
"learning_rate": 4.2500338694381545e-05,
"loss": 0.1765,
"step": 155000
},
{
"epoch": 3.01,
"learning_rate": 4.247614623855697e-05,
"loss": 0.1556,
"step": 155500
},
{
"epoch": 3.02,
"learning_rate": 4.24519537827324e-05,
"loss": 0.1534,
"step": 156000
},
{
"epoch": 3.03,
"learning_rate": 4.242776132690782e-05,
"loss": 0.1502,
"step": 156500
},
{
"epoch": 3.04,
"learning_rate": 4.240356887108324e-05,
"loss": 0.1587,
"step": 157000
},
{
"epoch": 3.05,
"learning_rate": 4.237937641525867e-05,
"loss": 0.1542,
"step": 157500
},
{
"epoch": 3.06,
"learning_rate": 4.2355183959434095e-05,
"loss": 0.1528,
"step": 158000
},
{
"epoch": 3.07,
"learning_rate": 4.233099150360952e-05,
"loss": 0.1499,
"step": 158500
},
{
"epoch": 3.08,
"learning_rate": 4.230679904778494e-05,
"loss": 0.1556,
"step": 159000
},
{
"epoch": 3.09,
"learning_rate": 4.228260659196036e-05,
"loss": 0.1579,
"step": 159500
},
{
"epoch": 3.1,
"learning_rate": 4.2258414136135786e-05,
"loss": 0.1546,
"step": 160000
},
{
"epoch": 3.11,
"learning_rate": 4.2234221680311215e-05,
"loss": 0.1526,
"step": 160500
},
{
"epoch": 3.12,
"learning_rate": 4.221002922448664e-05,
"loss": 0.1582,
"step": 161000
},
{
"epoch": 3.13,
"learning_rate": 4.218583676866206e-05,
"loss": 0.1573,
"step": 161500
},
{
"epoch": 3.14,
"learning_rate": 4.216164431283749e-05,
"loss": 0.1569,
"step": 162000
},
{
"epoch": 3.15,
"learning_rate": 4.213745185701291e-05,
"loss": 0.1549,
"step": 162500
},
{
"epoch": 3.15,
"learning_rate": 4.2113259401188336e-05,
"loss": 0.1525,
"step": 163000
},
{
"epoch": 3.16,
"learning_rate": 4.208906694536376e-05,
"loss": 0.1599,
"step": 163500
},
{
"epoch": 3.17,
"learning_rate": 4.206487448953919e-05,
"loss": 0.1561,
"step": 164000
},
{
"epoch": 3.18,
"learning_rate": 4.2040682033714604e-05,
"loss": 0.1658,
"step": 164500
},
{
"epoch": 3.19,
"learning_rate": 4.2016489577890034e-05,
"loss": 0.1543,
"step": 165000
},
{
"epoch": 3.2,
"learning_rate": 4.199229712206546e-05,
"loss": 0.1587,
"step": 165500
},
{
"epoch": 3.21,
"learning_rate": 4.196810466624088e-05,
"loss": 0.1553,
"step": 166000
},
{
"epoch": 3.22,
"learning_rate": 4.194391221041631e-05,
"loss": 0.1604,
"step": 166500
},
{
"epoch": 3.23,
"learning_rate": 4.191971975459173e-05,
"loss": 0.1586,
"step": 167000
},
{
"epoch": 3.24,
"learning_rate": 4.1895527298767155e-05,
"loss": 0.1585,
"step": 167500
},
{
"epoch": 3.25,
"learning_rate": 4.187133484294258e-05,
"loss": 0.1636,
"step": 168000
},
{
"epoch": 3.26,
"learning_rate": 4.184714238711801e-05,
"loss": 0.1595,
"step": 168500
},
{
"epoch": 3.27,
"learning_rate": 4.182294993129342e-05,
"loss": 0.1588,
"step": 169000
},
{
"epoch": 3.28,
"learning_rate": 4.179875747546885e-05,
"loss": 0.1594,
"step": 169500
},
{
"epoch": 3.29,
"learning_rate": 4.1774565019644275e-05,
"loss": 0.1593,
"step": 170000
},
{
"epoch": 3.3,
"learning_rate": 4.17503725638197e-05,
"loss": 0.1586,
"step": 170500
},
{
"epoch": 3.31,
"learning_rate": 4.172618010799513e-05,
"loss": 0.17,
"step": 171000
},
{
"epoch": 3.32,
"learning_rate": 4.170198765217055e-05,
"loss": 0.1572,
"step": 171500
},
{
"epoch": 3.33,
"learning_rate": 4.167779519634597e-05,
"loss": 0.1577,
"step": 172000
},
{
"epoch": 3.34,
"learning_rate": 4.1653602740521396e-05,
"loss": 0.1635,
"step": 172500
},
{
"epoch": 3.35,
"learning_rate": 4.1629410284696826e-05,
"loss": 0.1587,
"step": 173000
},
{
"epoch": 3.36,
"learning_rate": 4.160521782887224e-05,
"loss": 0.1588,
"step": 173500
},
{
"epoch": 3.37,
"learning_rate": 4.158102537304767e-05,
"loss": 0.1621,
"step": 174000
},
{
"epoch": 3.38,
"learning_rate": 4.1556832917223094e-05,
"loss": 0.1647,
"step": 174500
},
{
"epoch": 3.39,
"learning_rate": 4.153264046139852e-05,
"loss": 0.1601,
"step": 175000
},
{
"epoch": 3.4,
"learning_rate": 4.150844800557394e-05,
"loss": 0.1608,
"step": 175500
},
{
"epoch": 3.41,
"learning_rate": 4.148425554974937e-05,
"loss": 0.1619,
"step": 176000
},
{
"epoch": 3.42,
"learning_rate": 4.146006309392479e-05,
"loss": 0.1647,
"step": 176500
},
{
"epoch": 3.43,
"learning_rate": 4.1435870638100215e-05,
"loss": 0.1584,
"step": 177000
},
{
"epoch": 3.44,
"learning_rate": 4.1411678182275644e-05,
"loss": 0.1649,
"step": 177500
},
{
"epoch": 3.45,
"learning_rate": 4.138748572645107e-05,
"loss": 0.1637,
"step": 178000
},
{
"epoch": 3.45,
"learning_rate": 4.136329327062649e-05,
"loss": 0.1607,
"step": 178500
},
{
"epoch": 3.46,
"learning_rate": 4.133910081480191e-05,
"loss": 0.1636,
"step": 179000
},
{
"epoch": 3.47,
"learning_rate": 4.131490835897734e-05,
"loss": 0.1638,
"step": 179500
},
{
"epoch": 3.48,
"learning_rate": 4.129071590315276e-05,
"loss": 0.1631,
"step": 180000
},
{
"epoch": 3.49,
"learning_rate": 4.126652344732819e-05,
"loss": 0.162,
"step": 180500
},
{
"epoch": 3.5,
"learning_rate": 4.124233099150362e-05,
"loss": 0.1601,
"step": 181000
},
{
"epoch": 3.51,
"learning_rate": 4.121813853567903e-05,
"loss": 0.1674,
"step": 181500
},
{
"epoch": 3.52,
"learning_rate": 4.119394607985446e-05,
"loss": 0.169,
"step": 182000
},
{
"epoch": 3.53,
"learning_rate": 4.1169753624029885e-05,
"loss": 0.1645,
"step": 182500
},
{
"epoch": 3.54,
"learning_rate": 4.114556116820531e-05,
"loss": 0.162,
"step": 183000
},
{
"epoch": 3.55,
"learning_rate": 4.112136871238073e-05,
"loss": 0.163,
"step": 183500
},
{
"epoch": 3.56,
"learning_rate": 4.109717625655616e-05,
"loss": 0.1693,
"step": 184000
},
{
"epoch": 3.57,
"learning_rate": 4.1072983800731577e-05,
"loss": 0.1712,
"step": 184500
},
{
"epoch": 3.58,
"learning_rate": 4.1048791344907006e-05,
"loss": 0.1655,
"step": 185000
},
{
"epoch": 3.59,
"learning_rate": 4.1024598889082436e-05,
"loss": 0.1625,
"step": 185500
},
{
"epoch": 3.6,
"learning_rate": 4.100040643325785e-05,
"loss": 0.1647,
"step": 186000
},
{
"epoch": 3.61,
"learning_rate": 4.097621397743328e-05,
"loss": 0.1638,
"step": 186500
},
{
"epoch": 3.62,
"learning_rate": 4.0952021521608704e-05,
"loss": 0.1648,
"step": 187000
},
{
"epoch": 3.63,
"learning_rate": 4.092782906578413e-05,
"loss": 0.1667,
"step": 187500
},
{
"epoch": 3.64,
"learning_rate": 4.090363660995955e-05,
"loss": 0.1726,
"step": 188000
},
{
"epoch": 3.65,
"learning_rate": 4.087944415413498e-05,
"loss": 0.1667,
"step": 188500
},
{
"epoch": 3.66,
"learning_rate": 4.0855251698310395e-05,
"loss": 0.1652,
"step": 189000
},
{
"epoch": 3.67,
"learning_rate": 4.0831059242485825e-05,
"loss": 0.1733,
"step": 189500
},
{
"epoch": 3.68,
"learning_rate": 4.0806866786661254e-05,
"loss": 0.1652,
"step": 190000
},
{
"epoch": 3.69,
"learning_rate": 4.078267433083667e-05,
"loss": 0.1656,
"step": 190500
},
{
"epoch": 3.7,
"learning_rate": 4.07584818750121e-05,
"loss": 0.1589,
"step": 191000
},
{
"epoch": 3.71,
"learning_rate": 4.073428941918752e-05,
"loss": 0.1645,
"step": 191500
},
{
"epoch": 3.72,
"learning_rate": 4.0710096963362945e-05,
"loss": 0.1673,
"step": 192000
},
{
"epoch": 3.73,
"learning_rate": 4.068590450753837e-05,
"loss": 0.1709,
"step": 192500
},
{
"epoch": 3.74,
"learning_rate": 4.06617120517138e-05,
"loss": 0.1664,
"step": 193000
},
{
"epoch": 3.74,
"learning_rate": 4.063751959588922e-05,
"loss": 0.1767,
"step": 193500
},
{
"epoch": 3.75,
"learning_rate": 4.061332714006464e-05,
"loss": 0.1683,
"step": 194000
},
{
"epoch": 3.76,
"learning_rate": 4.058913468424007e-05,
"loss": 0.1654,
"step": 194500
},
{
"epoch": 3.77,
"learning_rate": 4.0564942228415496e-05,
"loss": 0.1665,
"step": 195000
},
{
"epoch": 3.78,
"learning_rate": 4.054074977259092e-05,
"loss": 0.1722,
"step": 195500
},
{
"epoch": 3.79,
"learning_rate": 4.051655731676634e-05,
"loss": 0.1644,
"step": 196000
},
{
"epoch": 3.8,
"learning_rate": 4.0492364860941764e-05,
"loss": 0.1733,
"step": 196500
},
{
"epoch": 3.81,
"learning_rate": 4.046817240511719e-05,
"loss": 0.1691,
"step": 197000
},
{
"epoch": 3.82,
"learning_rate": 4.0443979949292616e-05,
"loss": 0.1672,
"step": 197500
},
{
"epoch": 3.83,
"learning_rate": 4.041978749346804e-05,
"loss": 0.1652,
"step": 198000
},
{
"epoch": 3.84,
"learning_rate": 4.039559503764346e-05,
"loss": 0.1765,
"step": 198500
},
{
"epoch": 3.85,
"learning_rate": 4.037140258181889e-05,
"loss": 0.1713,
"step": 199000
},
{
"epoch": 3.86,
"learning_rate": 4.0347210125994314e-05,
"loss": 0.1671,
"step": 199500
},
{
"epoch": 3.87,
"learning_rate": 4.032301767016974e-05,
"loss": 0.1671,
"step": 200000
},
{
"epoch": 3.88,
"learning_rate": 4.029882521434516e-05,
"loss": 0.1718,
"step": 200500
},
{
"epoch": 3.89,
"learning_rate": 4.027463275852059e-05,
"loss": 0.1657,
"step": 201000
},
{
"epoch": 3.9,
"learning_rate": 4.0250440302696005e-05,
"loss": 0.17,
"step": 201500
},
{
"epoch": 3.91,
"learning_rate": 4.0226247846871435e-05,
"loss": 0.1697,
"step": 202000
},
{
"epoch": 3.92,
"learning_rate": 4.020205539104686e-05,
"loss": 0.1677,
"step": 202500
},
{
"epoch": 3.93,
"learning_rate": 4.017786293522228e-05,
"loss": 0.1668,
"step": 203000
},
{
"epoch": 3.94,
"learning_rate": 4.015367047939771e-05,
"loss": 0.1673,
"step": 203500
},
{
"epoch": 3.95,
"learning_rate": 4.012947802357313e-05,
"loss": 0.1684,
"step": 204000
},
{
"epoch": 3.96,
"learning_rate": 4.0105285567748555e-05,
"loss": 0.166,
"step": 204500
},
{
"epoch": 3.97,
"learning_rate": 4.008109311192398e-05,
"loss": 0.1686,
"step": 205000
},
{
"epoch": 3.98,
"learning_rate": 4.005690065609941e-05,
"loss": 0.1794,
"step": 205500
},
{
"epoch": 3.99,
"learning_rate": 4.0032708200274824e-05,
"loss": 0.1668,
"step": 206000
},
{
"epoch": 4.0,
"learning_rate": 4.000851574445025e-05,
"loss": 0.1724,
"step": 206500
},
{
"epoch": 4.01,
"learning_rate": 3.9984323288625676e-05,
"loss": 0.1572,
"step": 207000
},
{
"epoch": 4.02,
"learning_rate": 3.99601308328011e-05,
"loss": 0.1462,
"step": 207500
},
{
"epoch": 4.03,
"learning_rate": 3.993593837697653e-05,
"loss": 0.1494,
"step": 208000
},
{
"epoch": 4.04,
"learning_rate": 3.991174592115195e-05,
"loss": 0.1492,
"step": 208500
},
{
"epoch": 4.04,
"learning_rate": 3.9887553465327374e-05,
"loss": 0.1487,
"step": 209000
},
{
"epoch": 4.05,
"learning_rate": 3.98633610095028e-05,
"loss": 0.1476,
"step": 209500
},
{
"epoch": 4.06,
"learning_rate": 3.9839168553678226e-05,
"loss": 0.1448,
"step": 210000
},
{
"epoch": 4.07,
"learning_rate": 3.981497609785364e-05,
"loss": 0.151,
"step": 210500
},
{
"epoch": 4.08,
"learning_rate": 3.979078364202907e-05,
"loss": 0.1563,
"step": 211000
},
{
"epoch": 4.09,
"learning_rate": 3.9766591186204495e-05,
"loss": 0.1458,
"step": 211500
},
{
"epoch": 4.1,
"learning_rate": 3.974239873037992e-05,
"loss": 0.1449,
"step": 212000
},
{
"epoch": 4.11,
"learning_rate": 3.971820627455534e-05,
"loss": 0.1515,
"step": 212500
},
{
"epoch": 4.12,
"learning_rate": 3.969401381873077e-05,
"loss": 0.1518,
"step": 213000
},
{
"epoch": 4.13,
"learning_rate": 3.966982136290619e-05,
"loss": 0.1467,
"step": 213500
},
{
"epoch": 4.14,
"learning_rate": 3.9645628907081615e-05,
"loss": 0.1502,
"step": 214000
},
{
"epoch": 4.15,
"learning_rate": 3.9621436451257045e-05,
"loss": 0.1525,
"step": 214500
},
{
"epoch": 4.16,
"learning_rate": 3.959724399543247e-05,
"loss": 0.1475,
"step": 215000
},
{
"epoch": 4.17,
"learning_rate": 3.957305153960789e-05,
"loss": 0.1505,
"step": 215500
},
{
"epoch": 4.18,
"learning_rate": 3.954885908378331e-05,
"loss": 0.1523,
"step": 216000
},
{
"epoch": 4.19,
"learning_rate": 3.952466662795874e-05,
"loss": 0.1531,
"step": 216500
},
{
"epoch": 4.2,
"learning_rate": 3.950047417213416e-05,
"loss": 0.1517,
"step": 217000
},
{
"epoch": 4.21,
"learning_rate": 3.947628171630959e-05,
"loss": 0.1521,
"step": 217500
},
{
"epoch": 4.22,
"learning_rate": 3.945208926048502e-05,
"loss": 0.1524,
"step": 218000
},
{
"epoch": 4.23,
"learning_rate": 3.9427896804660434e-05,
"loss": 0.1559,
"step": 218500
},
{
"epoch": 4.24,
"learning_rate": 3.9403704348835863e-05,
"loss": 0.1497,
"step": 219000
},
{
"epoch": 4.25,
"learning_rate": 3.9379511893011286e-05,
"loss": 0.1474,
"step": 219500
},
{
"epoch": 4.26,
"learning_rate": 3.935531943718671e-05,
"loss": 0.1528,
"step": 220000
},
{
"epoch": 4.27,
"learning_rate": 3.933112698136213e-05,
"loss": 0.1513,
"step": 220500
},
{
"epoch": 4.28,
"learning_rate": 3.930693452553756e-05,
"loss": 0.15,
"step": 221000
},
{
"epoch": 4.29,
"learning_rate": 3.928274206971298e-05,
"loss": 0.1506,
"step": 221500
},
{
"epoch": 4.3,
"learning_rate": 3.925854961388841e-05,
"loss": 0.1519,
"step": 222000
},
{
"epoch": 4.31,
"learning_rate": 3.9234357158063836e-05,
"loss": 0.157,
"step": 222500
},
{
"epoch": 4.32,
"learning_rate": 3.921016470223925e-05,
"loss": 0.1531,
"step": 223000
},
{
"epoch": 4.33,
"learning_rate": 3.918597224641468e-05,
"loss": 0.1478,
"step": 223500
},
{
"epoch": 4.34,
"learning_rate": 3.9161779790590105e-05,
"loss": 0.1534,
"step": 224000
},
{
"epoch": 4.34,
"learning_rate": 3.913758733476553e-05,
"loss": 0.1537,
"step": 224500
},
{
"epoch": 4.35,
"learning_rate": 3.911339487894095e-05,
"loss": 0.1508,
"step": 225000
},
{
"epoch": 4.36,
"learning_rate": 3.908920242311638e-05,
"loss": 0.1577,
"step": 225500
},
{
"epoch": 4.37,
"learning_rate": 3.9065009967291796e-05,
"loss": 0.1556,
"step": 226000
},
{
"epoch": 4.38,
"learning_rate": 3.9040817511467225e-05,
"loss": 0.1563,
"step": 226500
},
{
"epoch": 4.39,
"learning_rate": 3.9016625055642655e-05,
"loss": 0.1621,
"step": 227000
},
{
"epoch": 4.4,
"learning_rate": 3.899243259981807e-05,
"loss": 0.1599,
"step": 227500
},
{
"epoch": 4.41,
"learning_rate": 3.89682401439935e-05,
"loss": 0.1498,
"step": 228000
},
{
"epoch": 4.42,
"learning_rate": 3.894404768816892e-05,
"loss": 0.1539,
"step": 228500
},
{
"epoch": 4.43,
"learning_rate": 3.8919855232344346e-05,
"loss": 0.1526,
"step": 229000
},
{
"epoch": 4.44,
"learning_rate": 3.889566277651977e-05,
"loss": 0.155,
"step": 229500
},
{
"epoch": 4.45,
"learning_rate": 3.88714703206952e-05,
"loss": 0.1577,
"step": 230000
},
{
"epoch": 4.46,
"learning_rate": 3.884727786487062e-05,
"loss": 0.155,
"step": 230500
},
{
"epoch": 4.47,
"learning_rate": 3.8823085409046044e-05,
"loss": 0.1531,
"step": 231000
},
{
"epoch": 4.48,
"learning_rate": 3.8798892953221474e-05,
"loss": 0.154,
"step": 231500
},
{
"epoch": 4.49,
"learning_rate": 3.8774700497396896e-05,
"loss": 0.1561,
"step": 232000
},
{
"epoch": 4.5,
"learning_rate": 3.875050804157232e-05,
"loss": 0.1623,
"step": 232500
},
{
"epoch": 4.51,
"learning_rate": 3.872631558574774e-05,
"loss": 0.1529,
"step": 233000
},
{
"epoch": 4.52,
"learning_rate": 3.8702123129923165e-05,
"loss": 0.1575,
"step": 233500
},
{
"epoch": 4.53,
"learning_rate": 3.867793067409859e-05,
"loss": 0.1557,
"step": 234000
},
{
"epoch": 4.54,
"learning_rate": 3.865373821827402e-05,
"loss": 0.1604,
"step": 234500
},
{
"epoch": 4.55,
"learning_rate": 3.862954576244944e-05,
"loss": 0.1579,
"step": 235000
},
{
"epoch": 4.56,
"learning_rate": 3.860535330662486e-05,
"loss": 0.1578,
"step": 235500
},
{
"epoch": 4.57,
"learning_rate": 3.858116085080029e-05,
"loss": 0.1617,
"step": 236000
},
{
"epoch": 4.58,
"learning_rate": 3.8556968394975715e-05,
"loss": 0.1587,
"step": 236500
},
{
"epoch": 4.59,
"learning_rate": 3.853277593915114e-05,
"loss": 0.1545,
"step": 237000
},
{
"epoch": 4.6,
"learning_rate": 3.850858348332656e-05,
"loss": 0.1576,
"step": 237500
},
{
"epoch": 4.61,
"learning_rate": 3.848439102750199e-05,
"loss": 0.1592,
"step": 238000
},
{
"epoch": 4.62,
"learning_rate": 3.8460198571677406e-05,
"loss": 0.1527,
"step": 238500
},
{
"epoch": 4.63,
"learning_rate": 3.8436006115852836e-05,
"loss": 0.1585,
"step": 239000
},
{
"epoch": 4.64,
"learning_rate": 3.841181366002826e-05,
"loss": 0.1576,
"step": 239500
},
{
"epoch": 4.64,
"learning_rate": 3.838762120420368e-05,
"loss": 0.1596,
"step": 240000
},
{
"epoch": 4.65,
"learning_rate": 3.836342874837911e-05,
"loss": 0.1625,
"step": 240500
},
{
"epoch": 4.66,
"learning_rate": 3.8339236292554533e-05,
"loss": 0.1515,
"step": 241000
},
{
"epoch": 4.67,
"learning_rate": 3.8315043836729956e-05,
"loss": 0.1563,
"step": 241500
},
{
"epoch": 4.68,
"learning_rate": 3.829085138090538e-05,
"loss": 0.1595,
"step": 242000
},
{
"epoch": 4.69,
"learning_rate": 3.826665892508081e-05,
"loss": 0.1577,
"step": 242500
},
{
"epoch": 4.7,
"learning_rate": 3.8242466469256225e-05,
"loss": 0.164,
"step": 243000
},
{
"epoch": 4.71,
"learning_rate": 3.8218274013431654e-05,
"loss": 0.1569,
"step": 243500
},
{
"epoch": 4.72,
"learning_rate": 3.819408155760708e-05,
"loss": 0.1557,
"step": 244000
},
{
"epoch": 4.73,
"learning_rate": 3.81698891017825e-05,
"loss": 0.1568,
"step": 244500
},
{
"epoch": 4.74,
"learning_rate": 3.814569664595793e-05,
"loss": 0.1609,
"step": 245000
},
{
"epoch": 4.75,
"learning_rate": 3.812150419013335e-05,
"loss": 0.1547,
"step": 245500
},
{
"epoch": 4.76,
"learning_rate": 3.8097311734308775e-05,
"loss": 0.1564,
"step": 246000
},
{
"epoch": 4.77,
"learning_rate": 3.80731192784842e-05,
"loss": 0.1586,
"step": 246500
},
{
"epoch": 4.78,
"learning_rate": 3.804892682265963e-05,
"loss": 0.1619,
"step": 247000
},
{
"epoch": 4.79,
"learning_rate": 3.802473436683504e-05,
"loss": 0.157,
"step": 247500
},
{
"epoch": 4.8,
"learning_rate": 3.800054191101047e-05,
"loss": 0.1602,
"step": 248000
},
{
"epoch": 4.81,
"learning_rate": 3.7976349455185895e-05,
"loss": 0.1581,
"step": 248500
},
{
"epoch": 4.82,
"learning_rate": 3.795215699936132e-05,
"loss": 0.157,
"step": 249000
},
{
"epoch": 4.83,
"learning_rate": 3.792796454353674e-05,
"loss": 0.1617,
"step": 249500
},
{
"epoch": 4.84,
"learning_rate": 3.790377208771217e-05,
"loss": 0.1536,
"step": 250000
},
{
"epoch": 4.85,
"learning_rate": 3.787957963188759e-05,
"loss": 0.1571,
"step": 250500
},
{
"epoch": 4.86,
"learning_rate": 3.7855387176063016e-05,
"loss": 0.1583,
"step": 251000
},
{
"epoch": 4.87,
"learning_rate": 3.7831194720238446e-05,
"loss": 0.1617,
"step": 251500
},
{
"epoch": 4.88,
"learning_rate": 3.780700226441387e-05,
"loss": 0.1568,
"step": 252000
},
{
"epoch": 4.89,
"learning_rate": 3.778280980858929e-05,
"loss": 0.1618,
"step": 252500
},
{
"epoch": 4.9,
"learning_rate": 3.7758617352764714e-05,
"loss": 0.1592,
"step": 253000
},
{
"epoch": 4.91,
"learning_rate": 3.7734424896940143e-05,
"loss": 0.1625,
"step": 253500
},
{
"epoch": 4.92,
"learning_rate": 3.771023244111556e-05,
"loss": 0.1586,
"step": 254000
},
{
"epoch": 4.93,
"learning_rate": 3.768603998529099e-05,
"loss": 0.1543,
"step": 254500
},
{
"epoch": 4.94,
"learning_rate": 3.766184752946642e-05,
"loss": 0.1593,
"step": 255000
},
{
"epoch": 4.94,
"learning_rate": 3.7637655073641835e-05,
"loss": 0.1634,
"step": 255500
},
{
"epoch": 4.95,
"learning_rate": 3.7613462617817264e-05,
"loss": 0.161,
"step": 256000
},
{
"epoch": 4.96,
"learning_rate": 3.758927016199269e-05,
"loss": 0.158,
"step": 256500
},
{
"epoch": 4.97,
"learning_rate": 3.756507770616811e-05,
"loss": 0.1686,
"step": 257000
},
{
"epoch": 4.98,
"learning_rate": 3.754088525034353e-05,
"loss": 0.1619,
"step": 257500
},
{
"epoch": 4.99,
"learning_rate": 3.751669279451896e-05,
"loss": 0.1644,
"step": 258000
},
{
"epoch": 5.0,
"learning_rate": 3.749250033869438e-05,
"loss": 0.1524,
"step": 258500
},
{
"epoch": 5.01,
"learning_rate": 3.746830788286981e-05,
"loss": 0.1413,
"step": 259000
},
{
"epoch": 5.02,
"learning_rate": 3.744411542704524e-05,
"loss": 0.1377,
"step": 259500
},
{
"epoch": 5.03,
"learning_rate": 3.741992297122065e-05,
"loss": 0.1374,
"step": 260000
},
{
"epoch": 5.04,
"learning_rate": 3.739573051539608e-05,
"loss": 0.1391,
"step": 260500
},
{
"epoch": 5.05,
"learning_rate": 3.7371538059571505e-05,
"loss": 0.1401,
"step": 261000
},
{
"epoch": 5.06,
"learning_rate": 3.734734560374693e-05,
"loss": 0.1396,
"step": 261500
},
{
"epoch": 5.07,
"learning_rate": 3.732315314792235e-05,
"loss": 0.1397,
"step": 262000
},
{
"epoch": 5.08,
"learning_rate": 3.729896069209778e-05,
"loss": 0.1373,
"step": 262500
},
{
"epoch": 5.09,
"learning_rate": 3.7274768236273197e-05,
"loss": 0.1383,
"step": 263000
},
{
"epoch": 5.1,
"learning_rate": 3.7250575780448626e-05,
"loss": 0.1409,
"step": 263500
},
{
"epoch": 5.11,
"learning_rate": 3.7226383324624056e-05,
"loss": 0.1364,
"step": 264000
},
{
"epoch": 5.12,
"learning_rate": 3.720219086879947e-05,
"loss": 0.1438,
"step": 264500
},
{
"epoch": 5.13,
"learning_rate": 3.71779984129749e-05,
"loss": 0.14,
"step": 265000
},
{
"epoch": 5.14,
"learning_rate": 3.7153805957150324e-05,
"loss": 0.1433,
"step": 265500
},
{
"epoch": 5.15,
"learning_rate": 3.712961350132575e-05,
"loss": 0.1447,
"step": 266000
},
{
"epoch": 5.16,
"learning_rate": 3.710542104550117e-05,
"loss": 0.1394,
"step": 266500
},
{
"epoch": 5.17,
"learning_rate": 3.70812285896766e-05,
"loss": 0.142,
"step": 267000
},
{
"epoch": 5.18,
"learning_rate": 3.705703613385202e-05,
"loss": 0.1424,
"step": 267500
},
{
"epoch": 5.19,
"learning_rate": 3.7032843678027445e-05,
"loss": 0.1397,
"step": 268000
},
{
"epoch": 5.2,
"learning_rate": 3.7008651222202874e-05,
"loss": 0.143,
"step": 268500
},
{
"epoch": 5.21,
"learning_rate": 3.69844587663783e-05,
"loss": 0.1421,
"step": 269000
},
{
"epoch": 5.22,
"learning_rate": 3.696026631055372e-05,
"loss": 0.1468,
"step": 269500
},
{
"epoch": 5.23,
"learning_rate": 3.693607385472914e-05,
"loss": 0.1389,
"step": 270000
},
{
"epoch": 5.24,
"learning_rate": 3.6911881398904565e-05,
"loss": 0.1466,
"step": 270500
},
{
"epoch": 5.24,
"learning_rate": 3.688768894307999e-05,
"loss": 0.1404,
"step": 271000
},
{
"epoch": 5.25,
"learning_rate": 3.686349648725542e-05,
"loss": 0.1396,
"step": 271500
},
{
"epoch": 5.26,
"learning_rate": 3.683930403143084e-05,
"loss": 0.147,
"step": 272000
},
{
"epoch": 5.27,
"learning_rate": 3.681511157560626e-05,
"loss": 0.145,
"step": 272500
},
{
"epoch": 5.28,
"learning_rate": 3.679091911978169e-05,
"loss": 0.1448,
"step": 273000
},
{
"epoch": 5.29,
"learning_rate": 3.6766726663957116e-05,
"loss": 0.1402,
"step": 273500
},
{
"epoch": 5.3,
"learning_rate": 3.674253420813254e-05,
"loss": 0.1424,
"step": 274000
},
{
"epoch": 5.31,
"learning_rate": 3.671834175230796e-05,
"loss": 0.1394,
"step": 274500
},
{
"epoch": 5.32,
"learning_rate": 3.669414929648339e-05,
"loss": 0.1519,
"step": 275000
},
{
"epoch": 5.33,
"learning_rate": 3.666995684065881e-05,
"loss": 0.1468,
"step": 275500
},
{
"epoch": 5.34,
"learning_rate": 3.6645764384834236e-05,
"loss": 0.1451,
"step": 276000
},
{
"epoch": 5.35,
"learning_rate": 3.662157192900966e-05,
"loss": 0.1459,
"step": 276500
},
{
"epoch": 5.36,
"learning_rate": 3.659737947318508e-05,
"loss": 0.1435,
"step": 277000
},
{
"epoch": 5.37,
"learning_rate": 3.657318701736051e-05,
"loss": 0.1444,
"step": 277500
},
{
"epoch": 5.38,
"learning_rate": 3.6548994561535934e-05,
"loss": 0.1458,
"step": 278000
},
{
"epoch": 5.39,
"learning_rate": 3.652480210571136e-05,
"loss": 0.1475,
"step": 278500
},
{
"epoch": 5.4,
"learning_rate": 3.650060964988678e-05,
"loss": 0.1436,
"step": 279000
},
{
"epoch": 5.41,
"learning_rate": 3.647641719406221e-05,
"loss": 0.1438,
"step": 279500
},
{
"epoch": 5.42,
"learning_rate": 3.6452224738237625e-05,
"loss": 0.1398,
"step": 280000
},
{
"epoch": 5.43,
"learning_rate": 3.6428032282413055e-05,
"loss": 0.1516,
"step": 280500
},
{
"epoch": 5.44,
"learning_rate": 3.640383982658848e-05,
"loss": 0.1435,
"step": 281000
},
{
"epoch": 5.45,
"learning_rate": 3.63796473707639e-05,
"loss": 0.139,
"step": 281500
},
{
"epoch": 5.46,
"learning_rate": 3.635545491493933e-05,
"loss": 0.1414,
"step": 282000
},
{
"epoch": 5.47,
"learning_rate": 3.633126245911475e-05,
"loss": 0.1531,
"step": 282500
},
{
"epoch": 5.48,
"learning_rate": 3.6307070003290175e-05,
"loss": 0.1475,
"step": 283000
},
{
"epoch": 5.49,
"learning_rate": 3.62828775474656e-05,
"loss": 0.142,
"step": 283500
},
{
"epoch": 5.5,
"learning_rate": 3.625868509164103e-05,
"loss": 0.1471,
"step": 284000
},
{
"epoch": 5.51,
"learning_rate": 3.6234492635816444e-05,
"loss": 0.1533,
"step": 284500
},
{
"epoch": 5.52,
"learning_rate": 3.621030017999187e-05,
"loss": 0.1454,
"step": 285000
},
{
"epoch": 5.53,
"learning_rate": 3.6186107724167296e-05,
"loss": 0.1439,
"step": 285500
},
{
"epoch": 5.54,
"learning_rate": 3.616191526834272e-05,
"loss": 0.1517,
"step": 286000
},
{
"epoch": 5.54,
"learning_rate": 3.613772281251815e-05,
"loss": 0.1435,
"step": 286500
},
{
"epoch": 5.55,
"learning_rate": 3.611353035669357e-05,
"loss": 0.1482,
"step": 287000
},
{
"epoch": 5.56,
"learning_rate": 3.6089337900868994e-05,
"loss": 0.1534,
"step": 287500
},
{
"epoch": 5.57,
"learning_rate": 3.606514544504442e-05,
"loss": 0.1401,
"step": 288000
},
{
"epoch": 5.58,
"learning_rate": 3.6040952989219846e-05,
"loss": 0.1483,
"step": 288500
},
{
"epoch": 5.59,
"learning_rate": 3.601676053339527e-05,
"loss": 0.148,
"step": 289000
},
{
"epoch": 5.6,
"learning_rate": 3.599256807757069e-05,
"loss": 0.1434,
"step": 289500
},
{
"epoch": 5.61,
"learning_rate": 3.5968375621746115e-05,
"loss": 0.1488,
"step": 290000
},
{
"epoch": 5.62,
"learning_rate": 3.5944183165921544e-05,
"loss": 0.1559,
"step": 290500
},
{
"epoch": 5.63,
"learning_rate": 3.591999071009696e-05,
"loss": 0.1464,
"step": 291000
},
{
"epoch": 5.64,
"learning_rate": 3.589579825427239e-05,
"loss": 0.1469,
"step": 291500
},
{
"epoch": 5.65,
"learning_rate": 3.587160579844782e-05,
"loss": 0.1489,
"step": 292000
},
{
"epoch": 5.66,
"learning_rate": 3.5847413342623235e-05,
"loss": 0.1483,
"step": 292500
},
{
"epoch": 5.67,
"learning_rate": 3.5823220886798665e-05,
"loss": 0.1487,
"step": 293000
},
{
"epoch": 5.68,
"learning_rate": 3.579902843097409e-05,
"loss": 0.1482,
"step": 293500
},
{
"epoch": 5.69,
"learning_rate": 3.577483597514951e-05,
"loss": 0.1439,
"step": 294000
},
{
"epoch": 5.7,
"learning_rate": 3.575064351932493e-05,
"loss": 0.1496,
"step": 294500
},
{
"epoch": 5.71,
"learning_rate": 3.572645106350036e-05,
"loss": 0.1488,
"step": 295000
},
{
"epoch": 5.72,
"learning_rate": 3.570225860767578e-05,
"loss": 0.1461,
"step": 295500
},
{
"epoch": 5.73,
"learning_rate": 3.567806615185121e-05,
"loss": 0.1486,
"step": 296000
},
{
"epoch": 5.74,
"learning_rate": 3.565387369602664e-05,
"loss": 0.1465,
"step": 296500
},
{
"epoch": 5.75,
"learning_rate": 3.5629681240202054e-05,
"loss": 0.1468,
"step": 297000
},
{
"epoch": 5.76,
"learning_rate": 3.5605488784377483e-05,
"loss": 0.1447,
"step": 297500
},
{
"epoch": 5.77,
"learning_rate": 3.5581296328552906e-05,
"loss": 0.1523,
"step": 298000
},
{
"epoch": 5.78,
"learning_rate": 3.555710387272833e-05,
"loss": 0.1412,
"step": 298500
},
{
"epoch": 5.79,
"learning_rate": 3.553291141690375e-05,
"loss": 0.1492,
"step": 299000
},
{
"epoch": 5.8,
"learning_rate": 3.550871896107918e-05,
"loss": 0.1455,
"step": 299500
},
{
"epoch": 5.81,
"learning_rate": 3.54845265052546e-05,
"loss": 0.1492,
"step": 300000
},
{
"epoch": 5.82,
"learning_rate": 3.546033404943003e-05,
"loss": 0.1491,
"step": 300500
},
{
"epoch": 5.83,
"learning_rate": 3.5436141593605456e-05,
"loss": 0.1473,
"step": 301000
},
{
"epoch": 5.84,
"learning_rate": 3.541194913778087e-05,
"loss": 0.1518,
"step": 301500
},
{
"epoch": 5.84,
"learning_rate": 3.53877566819563e-05,
"loss": 0.148,
"step": 302000
},
{
"epoch": 5.85,
"learning_rate": 3.5363564226131725e-05,
"loss": 0.152,
"step": 302500
},
{
"epoch": 5.86,
"learning_rate": 3.533937177030715e-05,
"loss": 0.146,
"step": 303000
},
{
"epoch": 5.87,
"learning_rate": 3.531517931448257e-05,
"loss": 0.1506,
"step": 303500
},
{
"epoch": 5.88,
"learning_rate": 3.5290986858658e-05,
"loss": 0.1493,
"step": 304000
},
{
"epoch": 5.89,
"learning_rate": 3.526679440283342e-05,
"loss": 0.1466,
"step": 304500
},
{
"epoch": 5.9,
"learning_rate": 3.5242601947008845e-05,
"loss": 0.1505,
"step": 305000
},
{
"epoch": 5.91,
"learning_rate": 3.5218409491184275e-05,
"loss": 0.1442,
"step": 305500
},
{
"epoch": 5.92,
"learning_rate": 3.51942170353597e-05,
"loss": 0.1471,
"step": 306000
},
{
"epoch": 5.93,
"learning_rate": 3.517002457953512e-05,
"loss": 0.1499,
"step": 306500
},
{
"epoch": 5.94,
"learning_rate": 3.514583212371054e-05,
"loss": 0.1554,
"step": 307000
},
{
"epoch": 5.95,
"learning_rate": 3.5121639667885966e-05,
"loss": 0.1492,
"step": 307500
},
{
"epoch": 5.96,
"learning_rate": 3.509744721206139e-05,
"loss": 0.1485,
"step": 308000
},
{
"epoch": 5.97,
"learning_rate": 3.507325475623682e-05,
"loss": 0.1482,
"step": 308500
},
{
"epoch": 5.98,
"learning_rate": 3.504906230041224e-05,
"loss": 0.155,
"step": 309000
},
{
"epoch": 5.99,
"learning_rate": 3.5024869844587664e-05,
"loss": 0.1518,
"step": 309500
},
{
"epoch": 6.0,
"learning_rate": 3.5000677388763094e-05,
"loss": 0.1468,
"step": 310000
},
{
"epoch": 6.01,
"learning_rate": 3.4976484932938516e-05,
"loss": 0.128,
"step": 310500
},
{
"epoch": 6.02,
"learning_rate": 3.495229247711394e-05,
"loss": 0.1322,
"step": 311000
},
{
"epoch": 6.03,
"learning_rate": 3.492810002128936e-05,
"loss": 0.1307,
"step": 311500
},
{
"epoch": 6.04,
"learning_rate": 3.490390756546479e-05,
"loss": 0.1338,
"step": 312000
},
{
"epoch": 6.05,
"learning_rate": 3.487971510964021e-05,
"loss": 0.1302,
"step": 312500
},
{
"epoch": 6.06,
"learning_rate": 3.485552265381564e-05,
"loss": 0.1267,
"step": 313000
},
{
"epoch": 6.07,
"learning_rate": 3.483133019799106e-05,
"loss": 0.1282,
"step": 313500
},
{
"epoch": 6.08,
"learning_rate": 3.480713774216648e-05,
"loss": 0.13,
"step": 314000
},
{
"epoch": 6.09,
"learning_rate": 3.478294528634191e-05,
"loss": 0.1292,
"step": 314500
},
{
"epoch": 6.1,
"learning_rate": 3.4758752830517335e-05,
"loss": 0.1313,
"step": 315000
},
{
"epoch": 6.11,
"learning_rate": 3.473456037469276e-05,
"loss": 0.1324,
"step": 315500
},
{
"epoch": 6.12,
"learning_rate": 3.471036791886818e-05,
"loss": 0.1348,
"step": 316000
},
{
"epoch": 6.13,
"learning_rate": 3.468617546304361e-05,
"loss": 0.1373,
"step": 316500
},
{
"epoch": 6.14,
"learning_rate": 3.4661983007219026e-05,
"loss": 0.132,
"step": 317000
},
{
"epoch": 6.14,
"learning_rate": 3.4637790551394456e-05,
"loss": 0.1331,
"step": 317500
},
{
"epoch": 6.15,
"learning_rate": 3.461359809556988e-05,
"loss": 0.1308,
"step": 318000
},
{
"epoch": 6.16,
"learning_rate": 3.45894056397453e-05,
"loss": 0.1381,
"step": 318500
},
{
"epoch": 6.17,
"learning_rate": 3.456521318392073e-05,
"loss": 0.1356,
"step": 319000
},
{
"epoch": 6.18,
"learning_rate": 3.4541020728096153e-05,
"loss": 0.1318,
"step": 319500
},
{
"epoch": 6.19,
"learning_rate": 3.4516828272271576e-05,
"loss": 0.1304,
"step": 320000
},
{
"epoch": 6.2,
"learning_rate": 3.4492635816447e-05,
"loss": 0.1341,
"step": 320500
},
{
"epoch": 6.21,
"learning_rate": 3.446844336062243e-05,
"loss": 0.1293,
"step": 321000
},
{
"epoch": 6.22,
"learning_rate": 3.4444250904797845e-05,
"loss": 0.1369,
"step": 321500
},
{
"epoch": 6.23,
"learning_rate": 3.4420058448973274e-05,
"loss": 0.1281,
"step": 322000
},
{
"epoch": 6.24,
"learning_rate": 3.43958659931487e-05,
"loss": 0.1357,
"step": 322500
},
{
"epoch": 6.25,
"learning_rate": 3.437167353732412e-05,
"loss": 0.1332,
"step": 323000
},
{
"epoch": 6.26,
"learning_rate": 3.434748108149955e-05,
"loss": 0.1358,
"step": 323500
},
{
"epoch": 6.27,
"learning_rate": 3.432328862567497e-05,
"loss": 0.137,
"step": 324000
},
{
"epoch": 6.28,
"learning_rate": 3.4299096169850395e-05,
"loss": 0.1329,
"step": 324500
},
{
"epoch": 6.29,
"learning_rate": 3.427490371402582e-05,
"loss": 0.1282,
"step": 325000
},
{
"epoch": 6.3,
"learning_rate": 3.425071125820125e-05,
"loss": 0.1353,
"step": 325500
},
{
"epoch": 6.31,
"learning_rate": 3.422651880237667e-05,
"loss": 0.1387,
"step": 326000
},
{
"epoch": 6.32,
"learning_rate": 3.420232634655209e-05,
"loss": 0.132,
"step": 326500
},
{
"epoch": 6.33,
"learning_rate": 3.4178133890727515e-05,
"loss": 0.1383,
"step": 327000
},
{
"epoch": 6.34,
"learning_rate": 3.4153941434902945e-05,
"loss": 0.1333,
"step": 327500
},
{
"epoch": 6.35,
"learning_rate": 3.412974897907836e-05,
"loss": 0.1329,
"step": 328000
},
{
"epoch": 6.36,
"learning_rate": 3.410555652325379e-05,
"loss": 0.132,
"step": 328500
},
{
"epoch": 6.37,
"learning_rate": 3.408136406742922e-05,
"loss": 0.1436,
"step": 329000
},
{
"epoch": 6.38,
"learning_rate": 3.4057171611604636e-05,
"loss": 0.1334,
"step": 329500
},
{
"epoch": 6.39,
"learning_rate": 3.4032979155780066e-05,
"loss": 0.1313,
"step": 330000
},
{
"epoch": 6.4,
"learning_rate": 3.400878669995549e-05,
"loss": 0.1345,
"step": 330500
},
{
"epoch": 6.41,
"learning_rate": 3.398459424413091e-05,
"loss": 0.1351,
"step": 331000
},
{
"epoch": 6.42,
"learning_rate": 3.3960401788306334e-05,
"loss": 0.1397,
"step": 331500
},
{
"epoch": 6.43,
"learning_rate": 3.3936209332481764e-05,
"loss": 0.1345,
"step": 332000
},
{
"epoch": 6.44,
"learning_rate": 3.391201687665718e-05,
"loss": 0.1302,
"step": 332500
},
{
"epoch": 6.44,
"learning_rate": 3.388782442083261e-05,
"loss": 0.1346,
"step": 333000
},
{
"epoch": 6.45,
"learning_rate": 3.386363196500804e-05,
"loss": 0.1433,
"step": 333500
},
{
"epoch": 6.46,
"learning_rate": 3.3839439509183455e-05,
"loss": 0.1377,
"step": 334000
},
{
"epoch": 6.47,
"learning_rate": 3.3815247053358884e-05,
"loss": 0.1413,
"step": 334500
},
{
"epoch": 6.48,
"learning_rate": 3.379105459753431e-05,
"loss": 0.1392,
"step": 335000
},
{
"epoch": 6.49,
"learning_rate": 3.376686214170973e-05,
"loss": 0.1366,
"step": 335500
},
{
"epoch": 6.5,
"learning_rate": 3.374266968588515e-05,
"loss": 0.1388,
"step": 336000
},
{
"epoch": 6.51,
"learning_rate": 3.371847723006058e-05,
"loss": 0.1406,
"step": 336500
},
{
"epoch": 6.52,
"learning_rate": 3.3694284774236e-05,
"loss": 0.1418,
"step": 337000
},
{
"epoch": 6.53,
"learning_rate": 3.367009231841143e-05,
"loss": 0.141,
"step": 337500
},
{
"epoch": 6.54,
"learning_rate": 3.364589986258686e-05,
"loss": 0.1373,
"step": 338000
},
{
"epoch": 6.55,
"learning_rate": 3.362170740676227e-05,
"loss": 0.1387,
"step": 338500
},
{
"epoch": 6.56,
"learning_rate": 3.35975149509377e-05,
"loss": 0.1338,
"step": 339000
},
{
"epoch": 6.57,
"learning_rate": 3.3573322495113126e-05,
"loss": 0.143,
"step": 339500
},
{
"epoch": 6.58,
"learning_rate": 3.354913003928855e-05,
"loss": 0.1376,
"step": 340000
},
{
"epoch": 6.59,
"learning_rate": 3.352493758346397e-05,
"loss": 0.1319,
"step": 340500
},
{
"epoch": 6.6,
"learning_rate": 3.35007451276394e-05,
"loss": 0.1421,
"step": 341000
},
{
"epoch": 6.61,
"learning_rate": 3.3476552671814823e-05,
"loss": 0.1361,
"step": 341500
},
{
"epoch": 6.62,
"learning_rate": 3.3452360215990246e-05,
"loss": 0.1344,
"step": 342000
},
{
"epoch": 6.63,
"learning_rate": 3.3428167760165676e-05,
"loss": 0.1434,
"step": 342500
},
{
"epoch": 6.64,
"learning_rate": 3.34039753043411e-05,
"loss": 0.1374,
"step": 343000
},
{
"epoch": 6.65,
"learning_rate": 3.337978284851652e-05,
"loss": 0.1368,
"step": 343500
},
{
"epoch": 6.66,
"learning_rate": 3.3355590392691944e-05,
"loss": 0.1414,
"step": 344000
},
{
"epoch": 6.67,
"learning_rate": 3.333139793686737e-05,
"loss": 0.1325,
"step": 344500
},
{
"epoch": 6.68,
"learning_rate": 3.330720548104279e-05,
"loss": 0.1376,
"step": 345000
},
{
"epoch": 6.69,
"learning_rate": 3.328301302521822e-05,
"loss": 0.1418,
"step": 345500
},
{
"epoch": 6.7,
"learning_rate": 3.325882056939364e-05,
"loss": 0.1413,
"step": 346000
},
{
"epoch": 6.71,
"learning_rate": 3.3234628113569065e-05,
"loss": 0.136,
"step": 346500
},
{
"epoch": 6.72,
"learning_rate": 3.3210435657744494e-05,
"loss": 0.1462,
"step": 347000
},
{
"epoch": 6.73,
"learning_rate": 3.318624320191992e-05,
"loss": 0.1386,
"step": 347500
},
{
"epoch": 6.74,
"learning_rate": 3.316205074609534e-05,
"loss": 0.1371,
"step": 348000
},
{
"epoch": 6.74,
"learning_rate": 3.313785829027076e-05,
"loss": 0.1418,
"step": 348500
},
{
"epoch": 6.75,
"learning_rate": 3.311366583444619e-05,
"loss": 0.1434,
"step": 349000
},
{
"epoch": 6.76,
"learning_rate": 3.308947337862161e-05,
"loss": 0.1388,
"step": 349500
},
{
"epoch": 6.77,
"learning_rate": 3.306528092279704e-05,
"loss": 0.137,
"step": 350000
},
{
"epoch": 6.78,
"learning_rate": 3.304108846697246e-05,
"loss": 0.1445,
"step": 350500
},
{
"epoch": 6.79,
"learning_rate": 3.301689601114788e-05,
"loss": 0.1358,
"step": 351000
},
{
"epoch": 6.8,
"learning_rate": 3.299270355532331e-05,
"loss": 0.1411,
"step": 351500
},
{
"epoch": 6.81,
"learning_rate": 3.2968511099498736e-05,
"loss": 0.1406,
"step": 352000
},
{
"epoch": 6.82,
"learning_rate": 3.294431864367416e-05,
"loss": 0.1407,
"step": 352500
},
{
"epoch": 6.83,
"learning_rate": 3.292012618784958e-05,
"loss": 0.1414,
"step": 353000
},
{
"epoch": 6.84,
"learning_rate": 3.289593373202501e-05,
"loss": 0.1371,
"step": 353500
},
{
"epoch": 6.85,
"learning_rate": 3.287174127620043e-05,
"loss": 0.1368,
"step": 354000
},
{
"epoch": 6.86,
"learning_rate": 3.2847548820375856e-05,
"loss": 0.1376,
"step": 354500
},
{
"epoch": 6.87,
"learning_rate": 3.282335636455128e-05,
"loss": 0.1362,
"step": 355000
},
{
"epoch": 6.88,
"learning_rate": 3.27991639087267e-05,
"loss": 0.1387,
"step": 355500
},
{
"epoch": 6.89,
"learning_rate": 3.277497145290213e-05,
"loss": 0.1394,
"step": 356000
},
{
"epoch": 6.9,
"learning_rate": 3.2750778997077554e-05,
"loss": 0.1388,
"step": 356500
},
{
"epoch": 6.91,
"learning_rate": 3.272658654125298e-05,
"loss": 0.1403,
"step": 357000
},
{
"epoch": 6.92,
"learning_rate": 3.27023940854284e-05,
"loss": 0.1451,
"step": 357500
},
{
"epoch": 6.93,
"learning_rate": 3.267820162960383e-05,
"loss": 0.1356,
"step": 358000
},
{
"epoch": 6.94,
"learning_rate": 3.2654009173779245e-05,
"loss": 0.1428,
"step": 358500
},
{
"epoch": 6.95,
"learning_rate": 3.2629816717954675e-05,
"loss": 0.1369,
"step": 359000
},
{
"epoch": 6.96,
"learning_rate": 3.26056242621301e-05,
"loss": 0.1399,
"step": 359500
},
{
"epoch": 6.97,
"learning_rate": 3.258143180630552e-05,
"loss": 0.1389,
"step": 360000
},
{
"epoch": 6.98,
"learning_rate": 3.255723935048095e-05,
"loss": 0.1415,
"step": 360500
},
{
"epoch": 6.99,
"learning_rate": 3.253304689465637e-05,
"loss": 0.1425,
"step": 361000
},
{
"epoch": 7.0,
"learning_rate": 3.2508854438831796e-05,
"loss": 0.1375,
"step": 361500
},
{
"epoch": 7.01,
"learning_rate": 3.248466198300722e-05,
"loss": 0.1283,
"step": 362000
},
{
"epoch": 7.02,
"learning_rate": 3.246046952718265e-05,
"loss": 0.1209,
"step": 362500
},
{
"epoch": 7.03,
"learning_rate": 3.243627707135807e-05,
"loss": 0.1212,
"step": 363000
},
{
"epoch": 7.04,
"learning_rate": 3.2412084615533493e-05,
"loss": 0.1216,
"step": 363500
},
{
"epoch": 7.04,
"learning_rate": 3.2387892159708916e-05,
"loss": 0.1225,
"step": 364000
},
{
"epoch": 7.05,
"learning_rate": 3.2363699703884346e-05,
"loss": 0.1219,
"step": 364500
},
{
"epoch": 7.06,
"learning_rate": 3.233950724805976e-05,
"loss": 0.1256,
"step": 365000
},
{
"epoch": 7.07,
"learning_rate": 3.231531479223519e-05,
"loss": 0.1175,
"step": 365500
},
{
"epoch": 7.08,
"learning_rate": 3.229112233641062e-05,
"loss": 0.1219,
"step": 366000
},
{
"epoch": 7.09,
"learning_rate": 3.226692988058604e-05,
"loss": 0.1242,
"step": 366500
},
{
"epoch": 7.1,
"learning_rate": 3.2242737424761466e-05,
"loss": 0.1183,
"step": 367000
},
{
"epoch": 7.11,
"learning_rate": 3.221854496893689e-05,
"loss": 0.1207,
"step": 367500
},
{
"epoch": 7.12,
"learning_rate": 3.219435251311231e-05,
"loss": 0.1267,
"step": 368000
},
{
"epoch": 7.13,
"learning_rate": 3.2170160057287735e-05,
"loss": 0.121,
"step": 368500
},
{
"epoch": 7.14,
"learning_rate": 3.2145967601463164e-05,
"loss": 0.1225,
"step": 369000
},
{
"epoch": 7.15,
"learning_rate": 3.212177514563858e-05,
"loss": 0.1195,
"step": 369500
},
{
"epoch": 7.16,
"learning_rate": 3.209758268981401e-05,
"loss": 0.1221,
"step": 370000
},
{
"epoch": 7.17,
"learning_rate": 3.207339023398944e-05,
"loss": 0.1239,
"step": 370500
},
{
"epoch": 7.18,
"learning_rate": 3.2049197778164855e-05,
"loss": 0.1315,
"step": 371000
},
{
"epoch": 7.19,
"learning_rate": 3.2025005322340285e-05,
"loss": 0.1263,
"step": 371500
},
{
"epoch": 7.2,
"learning_rate": 3.200081286651571e-05,
"loss": 0.1279,
"step": 372000
},
{
"epoch": 7.21,
"learning_rate": 3.197662041069113e-05,
"loss": 0.1259,
"step": 372500
},
{
"epoch": 7.22,
"learning_rate": 3.195242795486655e-05,
"loss": 0.1277,
"step": 373000
},
{
"epoch": 7.23,
"learning_rate": 3.192823549904198e-05,
"loss": 0.1264,
"step": 373500
},
{
"epoch": 7.24,
"learning_rate": 3.19040430432174e-05,
"loss": 0.1254,
"step": 374000
},
{
"epoch": 7.25,
"learning_rate": 3.187985058739283e-05,
"loss": 0.1254,
"step": 374500
},
{
"epoch": 7.26,
"learning_rate": 3.185565813156826e-05,
"loss": 0.131,
"step": 375000
},
{
"epoch": 7.27,
"learning_rate": 3.1831465675743674e-05,
"loss": 0.1324,
"step": 375500
},
{
"epoch": 7.28,
"learning_rate": 3.1807273219919103e-05,
"loss": 0.1297,
"step": 376000
},
{
"epoch": 7.29,
"learning_rate": 3.1783080764094526e-05,
"loss": 0.1305,
"step": 376500
},
{
"epoch": 7.3,
"learning_rate": 3.175888830826995e-05,
"loss": 0.1323,
"step": 377000
},
{
"epoch": 7.31,
"learning_rate": 3.173469585244537e-05,
"loss": 0.1228,
"step": 377500
},
{
"epoch": 7.32,
"learning_rate": 3.17105033966208e-05,
"loss": 0.1251,
"step": 378000
},
{
"epoch": 7.33,
"learning_rate": 3.1686310940796224e-05,
"loss": 0.1238,
"step": 378500
},
{
"epoch": 7.34,
"learning_rate": 3.166211848497165e-05,
"loss": 0.1213,
"step": 379000
},
{
"epoch": 7.34,
"learning_rate": 3.1637926029147076e-05,
"loss": 0.123,
"step": 379500
},
{
"epoch": 7.35,
"learning_rate": 3.16137335733225e-05,
"loss": 0.1247,
"step": 380000
},
{
"epoch": 7.36,
"learning_rate": 3.158954111749792e-05,
"loss": 0.1253,
"step": 380500
},
{
"epoch": 7.37,
"learning_rate": 3.1565348661673345e-05,
"loss": 0.1241,
"step": 381000
},
{
"epoch": 7.38,
"learning_rate": 3.154115620584877e-05,
"loss": 0.1264,
"step": 381500
},
{
"epoch": 7.39,
"learning_rate": 3.151696375002419e-05,
"loss": 0.1244,
"step": 382000
},
{
"epoch": 7.4,
"learning_rate": 3.149277129419962e-05,
"loss": 0.1274,
"step": 382500
},
{
"epoch": 7.41,
"learning_rate": 3.146857883837504e-05,
"loss": 0.1243,
"step": 383000
},
{
"epoch": 7.42,
"learning_rate": 3.1444386382550465e-05,
"loss": 0.1307,
"step": 383500
},
{
"epoch": 7.43,
"learning_rate": 3.1420193926725895e-05,
"loss": 0.1269,
"step": 384000
},
{
"epoch": 7.44,
"learning_rate": 3.139600147090132e-05,
"loss": 0.127,
"step": 384500
},
{
"epoch": 7.45,
"learning_rate": 3.137180901507674e-05,
"loss": 0.1299,
"step": 385000
},
{
"epoch": 7.46,
"learning_rate": 3.134761655925216e-05,
"loss": 0.1302,
"step": 385500
},
{
"epoch": 7.47,
"learning_rate": 3.132342410342759e-05,
"loss": 0.1277,
"step": 386000
},
{
"epoch": 7.48,
"learning_rate": 3.129923164760301e-05,
"loss": 0.1265,
"step": 386500
},
{
"epoch": 7.49,
"learning_rate": 3.127503919177844e-05,
"loss": 0.124,
"step": 387000
},
{
"epoch": 7.5,
"learning_rate": 3.125084673595386e-05,
"loss": 0.1252,
"step": 387500
},
{
"epoch": 7.51,
"learning_rate": 3.1226654280129284e-05,
"loss": 0.1265,
"step": 388000
},
{
"epoch": 7.52,
"learning_rate": 3.1202461824304714e-05,
"loss": 0.1297,
"step": 388500
},
{
"epoch": 7.53,
"learning_rate": 3.1178269368480136e-05,
"loss": 0.1292,
"step": 389000
},
{
"epoch": 7.54,
"learning_rate": 3.115407691265556e-05,
"loss": 0.1294,
"step": 389500
},
{
"epoch": 7.55,
"learning_rate": 3.112988445683098e-05,
"loss": 0.1304,
"step": 390000
},
{
"epoch": 7.56,
"learning_rate": 3.110569200100641e-05,
"loss": 0.1259,
"step": 390500
},
{
"epoch": 7.57,
"learning_rate": 3.108149954518183e-05,
"loss": 0.1317,
"step": 391000
},
{
"epoch": 7.58,
"learning_rate": 3.105730708935726e-05,
"loss": 0.1262,
"step": 391500
},
{
"epoch": 7.59,
"learning_rate": 3.103311463353268e-05,
"loss": 0.1263,
"step": 392000
},
{
"epoch": 7.6,
"learning_rate": 3.10089221777081e-05,
"loss": 0.1315,
"step": 392500
},
{
"epoch": 7.61,
"learning_rate": 3.098472972188353e-05,
"loss": 0.133,
"step": 393000
},
{
"epoch": 7.62,
"learning_rate": 3.0960537266058955e-05,
"loss": 0.1291,
"step": 393500
},
{
"epoch": 7.63,
"learning_rate": 3.093634481023438e-05,
"loss": 0.132,
"step": 394000
},
{
"epoch": 7.64,
"learning_rate": 3.09121523544098e-05,
"loss": 0.1285,
"step": 394500
},
{
"epoch": 7.64,
"learning_rate": 3.088795989858523e-05,
"loss": 0.1296,
"step": 395000
},
{
"epoch": 7.65,
"learning_rate": 3.0863767442760646e-05,
"loss": 0.13,
"step": 395500
},
{
"epoch": 7.66,
"learning_rate": 3.0839574986936076e-05,
"loss": 0.1292,
"step": 396000
},
{
"epoch": 7.67,
"learning_rate": 3.08153825311115e-05,
"loss": 0.1283,
"step": 396500
},
{
"epoch": 7.68,
"learning_rate": 3.079119007528692e-05,
"loss": 0.1335,
"step": 397000
},
{
"epoch": 7.69,
"learning_rate": 3.076699761946235e-05,
"loss": 0.1291,
"step": 397500
},
{
"epoch": 7.7,
"learning_rate": 3.0742805163637773e-05,
"loss": 0.1302,
"step": 398000
},
{
"epoch": 7.71,
"learning_rate": 3.0718612707813196e-05,
"loss": 0.1274,
"step": 398500
},
{
"epoch": 7.72,
"learning_rate": 3.069442025198862e-05,
"loss": 0.1287,
"step": 399000
},
{
"epoch": 7.73,
"learning_rate": 3.067022779616405e-05,
"loss": 0.1348,
"step": 399500
},
{
"epoch": 7.74,
"learning_rate": 3.064603534033947e-05,
"loss": 0.1291,
"step": 400000
},
{
"epoch": 7.75,
"learning_rate": 3.0621842884514894e-05,
"loss": 0.1336,
"step": 400500
},
{
"epoch": 7.76,
"learning_rate": 3.059765042869032e-05,
"loss": 0.1316,
"step": 401000
},
{
"epoch": 7.77,
"learning_rate": 3.0573457972865746e-05,
"loss": 0.1268,
"step": 401500
},
{
"epoch": 7.78,
"learning_rate": 3.054926551704116e-05,
"loss": 0.1274,
"step": 402000
},
{
"epoch": 7.79,
"learning_rate": 3.052507306121659e-05,
"loss": 0.1319,
"step": 402500
},
{
"epoch": 7.8,
"learning_rate": 3.0500880605392018e-05,
"loss": 0.1353,
"step": 403000
},
{
"epoch": 7.81,
"learning_rate": 3.047668814956744e-05,
"loss": 0.1292,
"step": 403500
},
{
"epoch": 7.82,
"learning_rate": 3.0452495693742867e-05,
"loss": 0.1303,
"step": 404000
},
{
"epoch": 7.83,
"learning_rate": 3.0428303237918287e-05,
"loss": 0.1325,
"step": 404500
},
{
"epoch": 7.84,
"learning_rate": 3.0404110782093713e-05,
"loss": 0.1344,
"step": 405000
},
{
"epoch": 7.85,
"learning_rate": 3.0379918326269135e-05,
"loss": 0.1307,
"step": 405500
},
{
"epoch": 7.86,
"learning_rate": 3.035572587044456e-05,
"loss": 0.1291,
"step": 406000
},
{
"epoch": 7.87,
"learning_rate": 3.0331533414619984e-05,
"loss": 0.1335,
"step": 406500
},
{
"epoch": 7.88,
"learning_rate": 3.030734095879541e-05,
"loss": 0.1308,
"step": 407000
},
{
"epoch": 7.89,
"learning_rate": 3.0283148502970837e-05,
"loss": 0.1376,
"step": 407500
},
{
"epoch": 7.9,
"learning_rate": 3.025895604714626e-05,
"loss": 0.1319,
"step": 408000
},
{
"epoch": 7.91,
"learning_rate": 3.0234763591321686e-05,
"loss": 0.1338,
"step": 408500
},
{
"epoch": 7.92,
"learning_rate": 3.0210571135497105e-05,
"loss": 0.1336,
"step": 409000
},
{
"epoch": 7.93,
"learning_rate": 3.0186378679672535e-05,
"loss": 0.1276,
"step": 409500
},
{
"epoch": 7.94,
"learning_rate": 3.0162186223847954e-05,
"loss": 0.1294,
"step": 410000
},
{
"epoch": 7.94,
"learning_rate": 3.013799376802338e-05,
"loss": 0.1343,
"step": 410500
},
{
"epoch": 7.95,
"learning_rate": 3.0113801312198803e-05,
"loss": 0.1279,
"step": 411000
},
{
"epoch": 7.96,
"learning_rate": 3.008960885637423e-05,
"loss": 0.13,
"step": 411500
},
{
"epoch": 7.97,
"learning_rate": 3.0065416400549655e-05,
"loss": 0.1281,
"step": 412000
},
{
"epoch": 7.98,
"learning_rate": 3.0041223944725078e-05,
"loss": 0.1324,
"step": 412500
},
{
"epoch": 7.99,
"learning_rate": 3.0017031488900504e-05,
"loss": 0.1305,
"step": 413000
},
{
"epoch": 8.0,
"learning_rate": 2.9992839033075927e-05,
"loss": 0.1256,
"step": 413500
},
{
"epoch": 8.01,
"learning_rate": 2.9968646577251353e-05,
"loss": 0.1132,
"step": 414000
},
{
"epoch": 8.02,
"learning_rate": 2.9944454121426773e-05,
"loss": 0.1114,
"step": 414500
},
{
"epoch": 8.03,
"learning_rate": 2.9920261665602202e-05,
"loss": 0.1121,
"step": 415000
},
{
"epoch": 8.04,
"learning_rate": 2.989606920977762e-05,
"loss": 0.1161,
"step": 415500
},
{
"epoch": 8.05,
"learning_rate": 2.9871876753953048e-05,
"loss": 0.1144,
"step": 416000
},
{
"epoch": 8.06,
"learning_rate": 2.9847684298128474e-05,
"loss": 0.12,
"step": 416500
},
{
"epoch": 8.07,
"learning_rate": 2.9823491842303897e-05,
"loss": 0.1184,
"step": 417000
},
{
"epoch": 8.08,
"learning_rate": 2.9799299386479323e-05,
"loss": 0.1157,
"step": 417500
},
{
"epoch": 8.09,
"learning_rate": 2.9775106930654746e-05,
"loss": 0.1172,
"step": 418000
},
{
"epoch": 8.1,
"learning_rate": 2.9750914474830172e-05,
"loss": 0.1189,
"step": 418500
},
{
"epoch": 8.11,
"learning_rate": 2.9726722019005595e-05,
"loss": 0.1153,
"step": 419000
},
{
"epoch": 8.12,
"learning_rate": 2.970252956318102e-05,
"loss": 0.1142,
"step": 419500
},
{
"epoch": 8.13,
"learning_rate": 2.967833710735644e-05,
"loss": 0.12,
"step": 420000
},
{
"epoch": 8.14,
"learning_rate": 2.9654144651531866e-05,
"loss": 0.1131,
"step": 420500
},
{
"epoch": 8.15,
"learning_rate": 2.9629952195707296e-05,
"loss": 0.1155,
"step": 421000
},
{
"epoch": 8.16,
"learning_rate": 2.9605759739882715e-05,
"loss": 0.1188,
"step": 421500
},
{
"epoch": 8.17,
"learning_rate": 2.958156728405814e-05,
"loss": 0.1212,
"step": 422000
},
{
"epoch": 8.18,
"learning_rate": 2.9557374828233564e-05,
"loss": 0.1119,
"step": 422500
},
{
"epoch": 8.19,
"learning_rate": 2.953318237240899e-05,
"loss": 0.1141,
"step": 423000
},
{
"epoch": 8.2,
"learning_rate": 2.9508989916584413e-05,
"loss": 0.1145,
"step": 423500
},
{
"epoch": 8.21,
"learning_rate": 2.948479746075984e-05,
"loss": 0.1189,
"step": 424000
},
{
"epoch": 8.22,
"learning_rate": 2.946060500493526e-05,
"loss": 0.1207,
"step": 424500
},
{
"epoch": 8.23,
"learning_rate": 2.9436412549110688e-05,
"loss": 0.118,
"step": 425000
},
{
"epoch": 8.24,
"learning_rate": 2.9412220093286114e-05,
"loss": 0.1159,
"step": 425500
},
{
"epoch": 8.24,
"learning_rate": 2.9388027637461534e-05,
"loss": 0.1174,
"step": 426000
},
{
"epoch": 8.25,
"learning_rate": 2.9363835181636963e-05,
"loss": 0.117,
"step": 426500
},
{
"epoch": 8.26,
"learning_rate": 2.9339642725812383e-05,
"loss": 0.1169,
"step": 427000
},
{
"epoch": 8.27,
"learning_rate": 2.931545026998781e-05,
"loss": 0.1164,
"step": 427500
},
{
"epoch": 8.28,
"learning_rate": 2.929125781416323e-05,
"loss": 0.1158,
"step": 428000
},
{
"epoch": 8.29,
"learning_rate": 2.9267065358338658e-05,
"loss": 0.1138,
"step": 428500
},
{
"epoch": 8.3,
"learning_rate": 2.924287290251408e-05,
"loss": 0.1168,
"step": 429000
},
{
"epoch": 8.31,
"learning_rate": 2.9218680446689507e-05,
"loss": 0.1165,
"step": 429500
},
{
"epoch": 8.32,
"learning_rate": 2.9194487990864933e-05,
"loss": 0.1184,
"step": 430000
},
{
"epoch": 8.33,
"learning_rate": 2.9170295535040352e-05,
"loss": 0.12,
"step": 430500
},
{
"epoch": 8.34,
"learning_rate": 2.9146103079215782e-05,
"loss": 0.1184,
"step": 431000
},
{
"epoch": 8.35,
"learning_rate": 2.91219106233912e-05,
"loss": 0.1212,
"step": 431500
},
{
"epoch": 8.36,
"learning_rate": 2.9097718167566627e-05,
"loss": 0.1198,
"step": 432000
},
{
"epoch": 8.37,
"learning_rate": 2.907352571174205e-05,
"loss": 0.116,
"step": 432500
},
{
"epoch": 8.38,
"learning_rate": 2.9049333255917476e-05,
"loss": 0.1176,
"step": 433000
},
{
"epoch": 8.39,
"learning_rate": 2.90251408000929e-05,
"loss": 0.1159,
"step": 433500
},
{
"epoch": 8.4,
"learning_rate": 2.9000948344268325e-05,
"loss": 0.1193,
"step": 434000
},
{
"epoch": 8.41,
"learning_rate": 2.897675588844375e-05,
"loss": 0.1195,
"step": 434500
},
{
"epoch": 8.42,
"learning_rate": 2.8952563432619174e-05,
"loss": 0.1193,
"step": 435000
},
{
"epoch": 8.43,
"learning_rate": 2.89283709767946e-05,
"loss": 0.1208,
"step": 435500
},
{
"epoch": 8.44,
"learning_rate": 2.890417852097002e-05,
"loss": 0.1218,
"step": 436000
},
{
"epoch": 8.45,
"learning_rate": 2.887998606514545e-05,
"loss": 0.1199,
"step": 436500
},
{
"epoch": 8.46,
"learning_rate": 2.885579360932087e-05,
"loss": 0.1258,
"step": 437000
},
{
"epoch": 8.47,
"learning_rate": 2.8831601153496295e-05,
"loss": 0.1168,
"step": 437500
},
{
"epoch": 8.48,
"learning_rate": 2.8807408697671718e-05,
"loss": 0.1175,
"step": 438000
},
{
"epoch": 8.49,
"learning_rate": 2.8783216241847144e-05,
"loss": 0.1196,
"step": 438500
},
{
"epoch": 8.5,
"learning_rate": 2.875902378602257e-05,
"loss": 0.1223,
"step": 439000
},
{
"epoch": 8.51,
"learning_rate": 2.8734831330197993e-05,
"loss": 0.1183,
"step": 439500
},
{
"epoch": 8.52,
"learning_rate": 2.871063887437342e-05,
"loss": 0.1184,
"step": 440000
},
{
"epoch": 8.53,
"learning_rate": 2.8686446418548842e-05,
"loss": 0.1238,
"step": 440500
},
{
"epoch": 8.54,
"learning_rate": 2.8662253962724268e-05,
"loss": 0.1181,
"step": 441000
},
{
"epoch": 8.54,
"learning_rate": 2.8638061506899687e-05,
"loss": 0.1193,
"step": 441500
},
{
"epoch": 8.55,
"learning_rate": 2.8613869051075113e-05,
"loss": 0.1213,
"step": 442000
},
{
"epoch": 8.56,
"learning_rate": 2.8589676595250536e-05,
"loss": 0.1239,
"step": 442500
},
{
"epoch": 8.57,
"learning_rate": 2.8565484139425962e-05,
"loss": 0.1176,
"step": 443000
},
{
"epoch": 8.58,
"learning_rate": 2.8541291683601385e-05,
"loss": 0.1174,
"step": 443500
},
{
"epoch": 8.59,
"learning_rate": 2.851709922777681e-05,
"loss": 0.1238,
"step": 444000
},
{
"epoch": 8.6,
"learning_rate": 2.8492906771952237e-05,
"loss": 0.1193,
"step": 444500
},
{
"epoch": 8.61,
"learning_rate": 2.846871431612766e-05,
"loss": 0.1232,
"step": 445000
},
{
"epoch": 8.62,
"learning_rate": 2.8444521860303086e-05,
"loss": 0.119,
"step": 445500
},
{
"epoch": 8.63,
"learning_rate": 2.8420329404478506e-05,
"loss": 0.122,
"step": 446000
},
{
"epoch": 8.64,
"learning_rate": 2.8396136948653935e-05,
"loss": 0.1189,
"step": 446500
},
{
"epoch": 8.65,
"learning_rate": 2.8371944492829355e-05,
"loss": 0.1238,
"step": 447000
},
{
"epoch": 8.66,
"learning_rate": 2.834775203700478e-05,
"loss": 0.1229,
"step": 447500
},
{
"epoch": 8.67,
"learning_rate": 2.8323559581180204e-05,
"loss": 0.1227,
"step": 448000
},
{
"epoch": 8.68,
"learning_rate": 2.829936712535563e-05,
"loss": 0.1209,
"step": 448500
},
{
"epoch": 8.69,
"learning_rate": 2.8275174669531056e-05,
"loss": 0.1174,
"step": 449000
},
{
"epoch": 8.7,
"learning_rate": 2.825098221370648e-05,
"loss": 0.1192,
"step": 449500
},
{
"epoch": 8.71,
"learning_rate": 2.8226789757881905e-05,
"loss": 0.119,
"step": 450000
},
{
"epoch": 8.72,
"learning_rate": 2.8202597302057328e-05,
"loss": 0.1256,
"step": 450500
},
{
"epoch": 8.73,
"learning_rate": 2.8178404846232754e-05,
"loss": 0.1287,
"step": 451000
},
{
"epoch": 8.74,
"learning_rate": 2.8154212390408173e-05,
"loss": 0.1241,
"step": 451500
},
{
"epoch": 8.75,
"learning_rate": 2.8130019934583603e-05,
"loss": 0.1215,
"step": 452000
},
{
"epoch": 8.76,
"learning_rate": 2.8105827478759022e-05,
"loss": 0.1228,
"step": 452500
},
{
"epoch": 8.77,
"learning_rate": 2.808163502293445e-05,
"loss": 0.1245,
"step": 453000
},
{
"epoch": 8.78,
"learning_rate": 2.8057442567109875e-05,
"loss": 0.1215,
"step": 453500
},
{
"epoch": 8.79,
"learning_rate": 2.8033250111285297e-05,
"loss": 0.1212,
"step": 454000
},
{
"epoch": 8.8,
"learning_rate": 2.8009057655460724e-05,
"loss": 0.1233,
"step": 454500
},
{
"epoch": 8.81,
"learning_rate": 2.7984865199636146e-05,
"loss": 0.1294,
"step": 455000
},
{
"epoch": 8.82,
"learning_rate": 2.7960672743811572e-05,
"loss": 0.1231,
"step": 455500
},
{
"epoch": 8.83,
"learning_rate": 2.7936480287986995e-05,
"loss": 0.122,
"step": 456000
},
{
"epoch": 8.84,
"learning_rate": 2.791228783216242e-05,
"loss": 0.1205,
"step": 456500
},
{
"epoch": 8.84,
"learning_rate": 2.788809537633784e-05,
"loss": 0.1232,
"step": 457000
},
{
"epoch": 8.85,
"learning_rate": 2.7863902920513267e-05,
"loss": 0.1218,
"step": 457500
},
{
"epoch": 8.86,
"learning_rate": 2.7839710464688697e-05,
"loss": 0.1232,
"step": 458000
},
{
"epoch": 8.87,
"learning_rate": 2.7815518008864116e-05,
"loss": 0.1219,
"step": 458500
},
{
"epoch": 8.88,
"learning_rate": 2.7791325553039542e-05,
"loss": 0.1186,
"step": 459000
},
{
"epoch": 8.89,
"learning_rate": 2.7767133097214965e-05,
"loss": 0.1259,
"step": 459500
},
{
"epoch": 8.9,
"learning_rate": 2.774294064139039e-05,
"loss": 0.1208,
"step": 460000
},
{
"epoch": 8.91,
"learning_rate": 2.7718748185565814e-05,
"loss": 0.1279,
"step": 460500
},
{
"epoch": 8.92,
"learning_rate": 2.769455572974124e-05,
"loss": 0.1222,
"step": 461000
},
{
"epoch": 8.93,
"learning_rate": 2.767036327391666e-05,
"loss": 0.1219,
"step": 461500
},
{
"epoch": 8.94,
"learning_rate": 2.764617081809209e-05,
"loss": 0.125,
"step": 462000
},
{
"epoch": 8.95,
"learning_rate": 2.7621978362267515e-05,
"loss": 0.1249,
"step": 462500
},
{
"epoch": 8.96,
"learning_rate": 2.7597785906442934e-05,
"loss": 0.1214,
"step": 463000
},
{
"epoch": 8.97,
"learning_rate": 2.7573593450618364e-05,
"loss": 0.125,
"step": 463500
},
{
"epoch": 8.98,
"learning_rate": 2.7549400994793783e-05,
"loss": 0.1227,
"step": 464000
},
{
"epoch": 8.99,
"learning_rate": 2.752520853896921e-05,
"loss": 0.1219,
"step": 464500
},
{
"epoch": 9.0,
"learning_rate": 2.7501016083144632e-05,
"loss": 0.1196,
"step": 465000
},
{
"epoch": 9.01,
"learning_rate": 2.747682362732006e-05,
"loss": 0.1074,
"step": 465500
},
{
"epoch": 9.02,
"learning_rate": 2.745263117149548e-05,
"loss": 0.1025,
"step": 466000
},
{
"epoch": 9.03,
"learning_rate": 2.7428438715670907e-05,
"loss": 0.1067,
"step": 466500
},
{
"epoch": 9.04,
"learning_rate": 2.7404246259846334e-05,
"loss": 0.1099,
"step": 467000
},
{
"epoch": 9.05,
"learning_rate": 2.7380053804021753e-05,
"loss": 0.104,
"step": 467500
},
{
"epoch": 9.06,
"learning_rate": 2.7355861348197183e-05,
"loss": 0.1107,
"step": 468000
},
{
"epoch": 9.07,
"learning_rate": 2.7331668892372602e-05,
"loss": 0.1044,
"step": 468500
},
{
"epoch": 9.08,
"learning_rate": 2.7307476436548028e-05,
"loss": 0.1037,
"step": 469000
},
{
"epoch": 9.09,
"learning_rate": 2.728328398072345e-05,
"loss": 0.1077,
"step": 469500
},
{
"epoch": 9.1,
"learning_rate": 2.7259091524898877e-05,
"loss": 0.1056,
"step": 470000
},
{
"epoch": 9.11,
"learning_rate": 2.72348990690743e-05,
"loss": 0.1046,
"step": 470500
},
{
"epoch": 9.12,
"learning_rate": 2.7210706613249726e-05,
"loss": 0.1112,
"step": 471000
},
{
"epoch": 9.13,
"learning_rate": 2.7186514157425152e-05,
"loss": 0.1064,
"step": 471500
},
{
"epoch": 9.14,
"learning_rate": 2.7162321701600575e-05,
"loss": 0.1095,
"step": 472000
},
{
"epoch": 9.14,
"learning_rate": 2.7138129245776e-05,
"loss": 0.1057,
"step": 472500
},
{
"epoch": 9.15,
"learning_rate": 2.711393678995142e-05,
"loss": 0.1084,
"step": 473000
},
{
"epoch": 9.16,
"learning_rate": 2.708974433412685e-05,
"loss": 0.1099,
"step": 473500
},
{
"epoch": 9.17,
"learning_rate": 2.706555187830227e-05,
"loss": 0.1099,
"step": 474000
},
{
"epoch": 9.18,
"learning_rate": 2.7041359422477696e-05,
"loss": 0.1124,
"step": 474500
},
{
"epoch": 9.19,
"learning_rate": 2.701716696665312e-05,
"loss": 0.1066,
"step": 475000
},
{
"epoch": 9.2,
"learning_rate": 2.6992974510828545e-05,
"loss": 0.1039,
"step": 475500
},
{
"epoch": 9.21,
"learning_rate": 2.696878205500397e-05,
"loss": 0.1093,
"step": 476000
},
{
"epoch": 9.22,
"learning_rate": 2.6944589599179394e-05,
"loss": 0.1094,
"step": 476500
},
{
"epoch": 9.23,
"learning_rate": 2.692039714335482e-05,
"loss": 0.1115,
"step": 477000
},
{
"epoch": 9.24,
"learning_rate": 2.6896204687530242e-05,
"loss": 0.1113,
"step": 477500
},
{
"epoch": 9.25,
"learning_rate": 2.687201223170567e-05,
"loss": 0.1144,
"step": 478000
},
{
"epoch": 9.26,
"learning_rate": 2.6847819775881088e-05,
"loss": 0.1085,
"step": 478500
},
{
"epoch": 9.27,
"learning_rate": 2.6823627320056514e-05,
"loss": 0.1119,
"step": 479000
},
{
"epoch": 9.28,
"learning_rate": 2.6799434864231937e-05,
"loss": 0.1105,
"step": 479500
},
{
"epoch": 9.29,
"learning_rate": 2.6775242408407363e-05,
"loss": 0.1086,
"step": 480000
},
{
"epoch": 9.3,
"learning_rate": 2.6751049952582786e-05,
"loss": 0.1163,
"step": 480500
},
{
"epoch": 9.31,
"learning_rate": 2.6726857496758212e-05,
"loss": 0.1153,
"step": 481000
},
{
"epoch": 9.32,
"learning_rate": 2.6702665040933638e-05,
"loss": 0.1105,
"step": 481500
},
{
"epoch": 9.33,
"learning_rate": 2.667847258510906e-05,
"loss": 0.1127,
"step": 482000
},
{
"epoch": 9.34,
"learning_rate": 2.6654280129284487e-05,
"loss": 0.1128,
"step": 482500
},
{
"epoch": 9.35,
"learning_rate": 2.6630087673459907e-05,
"loss": 0.1088,
"step": 483000
},
{
"epoch": 9.36,
"learning_rate": 2.6605895217635336e-05,
"loss": 0.11,
"step": 483500
},
{
"epoch": 9.37,
"learning_rate": 2.6581702761810756e-05,
"loss": 0.1125,
"step": 484000
},
{
"epoch": 9.38,
"learning_rate": 2.655751030598618e-05,
"loss": 0.1135,
"step": 484500
},
{
"epoch": 9.39,
"learning_rate": 2.6533317850161604e-05,
"loss": 0.1123,
"step": 485000
},
{
"epoch": 9.4,
"learning_rate": 2.650912539433703e-05,
"loss": 0.1137,
"step": 485500
},
{
"epoch": 9.41,
"learning_rate": 2.6484932938512457e-05,
"loss": 0.1127,
"step": 486000
},
{
"epoch": 9.42,
"learning_rate": 2.646074048268788e-05,
"loss": 0.1118,
"step": 486500
},
{
"epoch": 9.43,
"learning_rate": 2.6436548026863306e-05,
"loss": 0.1125,
"step": 487000
},
{
"epoch": 9.44,
"learning_rate": 2.641235557103873e-05,
"loss": 0.1143,
"step": 487500
},
{
"epoch": 9.44,
"learning_rate": 2.6388163115214155e-05,
"loss": 0.1141,
"step": 488000
},
{
"epoch": 9.45,
"learning_rate": 2.6363970659389574e-05,
"loss": 0.1135,
"step": 488500
},
{
"epoch": 9.46,
"learning_rate": 2.6339778203565004e-05,
"loss": 0.1169,
"step": 489000
},
{
"epoch": 9.47,
"learning_rate": 2.6315585747740423e-05,
"loss": 0.1104,
"step": 489500
},
{
"epoch": 9.48,
"learning_rate": 2.629139329191585e-05,
"loss": 0.1134,
"step": 490000
},
{
"epoch": 9.49,
"learning_rate": 2.6267200836091275e-05,
"loss": 0.11,
"step": 490500
},
{
"epoch": 9.5,
"learning_rate": 2.6243008380266698e-05,
"loss": 0.1187,
"step": 491000
},
{
"epoch": 9.51,
"learning_rate": 2.6218815924442124e-05,
"loss": 0.1141,
"step": 491500
},
{
"epoch": 9.52,
"learning_rate": 2.6194623468617547e-05,
"loss": 0.1096,
"step": 492000
},
{
"epoch": 9.53,
"learning_rate": 2.6170431012792973e-05,
"loss": 0.1098,
"step": 492500
},
{
"epoch": 9.54,
"learning_rate": 2.6146238556968396e-05,
"loss": 0.1131,
"step": 493000
},
{
"epoch": 9.55,
"learning_rate": 2.6122046101143822e-05,
"loss": 0.1156,
"step": 493500
},
{
"epoch": 9.56,
"learning_rate": 2.609785364531924e-05,
"loss": 0.1134,
"step": 494000
},
{
"epoch": 9.57,
"learning_rate": 2.6073661189494668e-05,
"loss": 0.1115,
"step": 494500
},
{
"epoch": 9.58,
"learning_rate": 2.6049468733670097e-05,
"loss": 0.1116,
"step": 495000
},
{
"epoch": 9.59,
"learning_rate": 2.6025276277845517e-05,
"loss": 0.1135,
"step": 495500
},
{
"epoch": 9.6,
"learning_rate": 2.6001083822020943e-05,
"loss": 0.1128,
"step": 496000
},
{
"epoch": 9.61,
"learning_rate": 2.5976891366196366e-05,
"loss": 0.1132,
"step": 496500
},
{
"epoch": 9.62,
"learning_rate": 2.5952698910371792e-05,
"loss": 0.1171,
"step": 497000
},
{
"epoch": 9.63,
"learning_rate": 2.5928506454547215e-05,
"loss": 0.1113,
"step": 497500
},
{
"epoch": 9.64,
"learning_rate": 2.590431399872264e-05,
"loss": 0.1164,
"step": 498000
},
{
"epoch": 9.65,
"learning_rate": 2.588012154289806e-05,
"loss": 0.1136,
"step": 498500
},
{
"epoch": 9.66,
"learning_rate": 2.585592908707349e-05,
"loss": 0.1125,
"step": 499000
},
{
"epoch": 9.67,
"learning_rate": 2.5831736631248916e-05,
"loss": 0.1121,
"step": 499500
},
{
"epoch": 9.68,
"learning_rate": 2.5807544175424335e-05,
"loss": 0.1124,
"step": 500000
},
{
"epoch": 9.69,
"learning_rate": 2.5783351719599765e-05,
"loss": 0.1132,
"step": 500500
},
{
"epoch": 9.7,
"learning_rate": 2.5759159263775184e-05,
"loss": 0.1167,
"step": 501000
},
{
"epoch": 9.71,
"learning_rate": 2.573496680795061e-05,
"loss": 0.1157,
"step": 501500
},
{
"epoch": 9.72,
"learning_rate": 2.5710774352126033e-05,
"loss": 0.1213,
"step": 502000
},
{
"epoch": 9.73,
"learning_rate": 2.568658189630146e-05,
"loss": 0.1146,
"step": 502500
},
{
"epoch": 9.74,
"learning_rate": 2.5662389440476882e-05,
"loss": 0.1135,
"step": 503000
},
{
"epoch": 9.74,
"learning_rate": 2.5638196984652308e-05,
"loss": 0.1131,
"step": 503500
},
{
"epoch": 9.75,
"learning_rate": 2.5614004528827734e-05,
"loss": 0.1148,
"step": 504000
},
{
"epoch": 9.76,
"learning_rate": 2.5589812073003154e-05,
"loss": 0.1121,
"step": 504500
},
{
"epoch": 9.77,
"learning_rate": 2.5565619617178583e-05,
"loss": 0.1166,
"step": 505000
},
{
"epoch": 9.78,
"learning_rate": 2.5541427161354003e-05,
"loss": 0.1128,
"step": 505500
},
{
"epoch": 9.79,
"learning_rate": 2.551723470552943e-05,
"loss": 0.1153,
"step": 506000
},
{
"epoch": 9.8,
"learning_rate": 2.549304224970485e-05,
"loss": 0.1132,
"step": 506500
},
{
"epoch": 9.81,
"learning_rate": 2.5468849793880278e-05,
"loss": 0.1137,
"step": 507000
},
{
"epoch": 9.82,
"learning_rate": 2.54446573380557e-05,
"loss": 0.1104,
"step": 507500
},
{
"epoch": 9.83,
"learning_rate": 2.5420464882231127e-05,
"loss": 0.1224,
"step": 508000
},
{
"epoch": 9.84,
"learning_rate": 2.5396272426406553e-05,
"loss": 0.1156,
"step": 508500
},
{
"epoch": 9.85,
"learning_rate": 2.5372079970581976e-05,
"loss": 0.116,
"step": 509000
},
{
"epoch": 9.86,
"learning_rate": 2.5347887514757402e-05,
"loss": 0.1142,
"step": 509500
},
{
"epoch": 9.87,
"learning_rate": 2.532369505893282e-05,
"loss": 0.1128,
"step": 510000
},
{
"epoch": 9.88,
"learning_rate": 2.529950260310825e-05,
"loss": 0.1187,
"step": 510500
},
{
"epoch": 9.89,
"learning_rate": 2.527531014728367e-05,
"loss": 0.1133,
"step": 511000
},
{
"epoch": 9.9,
"learning_rate": 2.5251117691459096e-05,
"loss": 0.116,
"step": 511500
},
{
"epoch": 9.91,
"learning_rate": 2.522692523563452e-05,
"loss": 0.1086,
"step": 512000
},
{
"epoch": 9.92,
"learning_rate": 2.5202732779809945e-05,
"loss": 0.1195,
"step": 512500
},
{
"epoch": 9.93,
"learning_rate": 2.517854032398537e-05,
"loss": 0.116,
"step": 513000
},
{
"epoch": 9.94,
"learning_rate": 2.5154347868160794e-05,
"loss": 0.1148,
"step": 513500
},
{
"epoch": 9.95,
"learning_rate": 2.513015541233622e-05,
"loss": 0.1144,
"step": 514000
},
{
"epoch": 9.96,
"learning_rate": 2.5105962956511643e-05,
"loss": 0.1146,
"step": 514500
},
{
"epoch": 9.97,
"learning_rate": 2.508177050068707e-05,
"loss": 0.1135,
"step": 515000
},
{
"epoch": 9.98,
"learning_rate": 2.505757804486249e-05,
"loss": 0.1154,
"step": 515500
},
{
"epoch": 9.99,
"learning_rate": 2.5033385589037915e-05,
"loss": 0.1119,
"step": 516000
},
{
"epoch": 10.0,
"learning_rate": 2.5009193133213338e-05,
"loss": 0.1152,
"step": 516500
},
{
"epoch": 10.01,
"learning_rate": 2.4985000677388764e-05,
"loss": 0.1053,
"step": 517000
},
{
"epoch": 10.02,
"learning_rate": 2.496080822156419e-05,
"loss": 0.1002,
"step": 517500
},
{
"epoch": 10.03,
"learning_rate": 2.4936615765739613e-05,
"loss": 0.1003,
"step": 518000
},
{
"epoch": 10.04,
"learning_rate": 2.4912423309915036e-05,
"loss": 0.0994,
"step": 518500
},
{
"epoch": 10.04,
"learning_rate": 2.4888230854090462e-05,
"loss": 0.1001,
"step": 519000
},
{
"epoch": 10.05,
"learning_rate": 2.4864038398265885e-05,
"loss": 0.1028,
"step": 519500
}
],
"max_steps": 1033380,
"num_train_epochs": 20,
"total_flos": 9.248377705093478e+17,
"trial_name": null,
"trial_params": null
}