|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.054384640693646, |
|
"global_step": 519500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.997580754417543e-05, |
|
"loss": 0.1399, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.995161508835085e-05, |
|
"loss": 0.1465, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.992742263252628e-05, |
|
"loss": 0.1547, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.99032301767017e-05, |
|
"loss": 0.155, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.987903772087712e-05, |
|
"loss": 0.1567, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9854845265052546e-05, |
|
"loss": 0.1567, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.9830652809227975e-05, |
|
"loss": 0.1626, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.980646035340339e-05, |
|
"loss": 0.1574, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.978226789757882e-05, |
|
"loss": 0.1575, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.975807544175425e-05, |
|
"loss": 0.1577, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.9733882985929667e-05, |
|
"loss": 0.1628, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9709690530105096e-05, |
|
"loss": 0.1668, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.968549807428052e-05, |
|
"loss": 0.1673, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.966130561845594e-05, |
|
"loss": 0.1675, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9637113162631364e-05, |
|
"loss": 0.1664, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9612920706806794e-05, |
|
"loss": 0.1645, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.958872825098222e-05, |
|
"loss": 0.1645, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.956453579515764e-05, |
|
"loss": 0.1635, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.954034333933307e-05, |
|
"loss": 0.1711, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.951615088350849e-05, |
|
"loss": 0.1686, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.9491958427683915e-05, |
|
"loss": 0.1659, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.946776597185934e-05, |
|
"loss": 0.1645, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.944357351603476e-05, |
|
"loss": 0.1689, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.941938106021018e-05, |
|
"loss": 0.1704, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.939518860438561e-05, |
|
"loss": 0.1669, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.9370996148561035e-05, |
|
"loss": 0.1655, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.934680369273646e-05, |
|
"loss": 0.1716, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.932261123691189e-05, |
|
"loss": 0.1715, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.929841878108731e-05, |
|
"loss": 0.1666, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.927422632526273e-05, |
|
"loss": 0.1694, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.9250033869438156e-05, |
|
"loss": 0.1847, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.9225841413613585e-05, |
|
"loss": 0.1701, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.9201648957789e-05, |
|
"loss": 0.1718, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.917745650196443e-05, |
|
"loss": 0.1669, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.9153264046139854e-05, |
|
"loss": 0.1769, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.9129071590315277e-05, |
|
"loss": 0.1697, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.9104879134490706e-05, |
|
"loss": 0.1671, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.908068667866613e-05, |
|
"loss": 0.1701, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.905649422284155e-05, |
|
"loss": 0.1733, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.9032301767016974e-05, |
|
"loss": 0.1682, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.9008109311192404e-05, |
|
"loss": 0.1811, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.898391685536782e-05, |
|
"loss": 0.173, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.895972439954325e-05, |
|
"loss": 0.1722, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.893553194371867e-05, |
|
"loss": 0.1764, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.8911339487894095e-05, |
|
"loss": 0.1714, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.888714703206952e-05, |
|
"loss": 0.1779, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.886295457624495e-05, |
|
"loss": 0.1721, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.883876212042037e-05, |
|
"loss": 0.1804, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.881456966459579e-05, |
|
"loss": 0.1723, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.879037720877122e-05, |
|
"loss": 0.1746, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.8766184752946645e-05, |
|
"loss": 0.1821, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.874199229712207e-05, |
|
"loss": 0.1685, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.871779984129749e-05, |
|
"loss": 0.1711, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.8693607385472914e-05, |
|
"loss": 0.1775, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.8669414929648336e-05, |
|
"loss": 0.1761, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.8645222473823766e-05, |
|
"loss": 0.1781, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.862103001799919e-05, |
|
"loss": 0.1743, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.859683756217461e-05, |
|
"loss": 0.1774, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.857264510635004e-05, |
|
"loss": 0.1718, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.8548452650525464e-05, |
|
"loss": 0.1719, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.852426019470089e-05, |
|
"loss": 0.1714, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.850006773887631e-05, |
|
"loss": 0.1789, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.847587528305174e-05, |
|
"loss": 0.1821, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.8451682827227155e-05, |
|
"loss": 0.1829, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.8427490371402585e-05, |
|
"loss": 0.1741, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.8403297915578014e-05, |
|
"loss": 0.1795, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.837910545975343e-05, |
|
"loss": 0.1756, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.835491300392886e-05, |
|
"loss": 0.1811, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.833072054810428e-05, |
|
"loss": 0.1798, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.8306528092279705e-05, |
|
"loss": 0.1779, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.828233563645513e-05, |
|
"loss": 0.1859, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.825814318063056e-05, |
|
"loss": 0.1852, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.8233950724805974e-05, |
|
"loss": 0.1818, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.82097582689814e-05, |
|
"loss": 0.1872, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.818556581315683e-05, |
|
"loss": 0.1843, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.816137335733225e-05, |
|
"loss": 0.1868, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.813718090150768e-05, |
|
"loss": 0.1818, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.81129884456831e-05, |
|
"loss": 0.1811, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.8088795989858524e-05, |
|
"loss": 0.1827, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.8064603534033947e-05, |
|
"loss": 0.1753, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.8040411078209376e-05, |
|
"loss": 0.1898, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.801621862238479e-05, |
|
"loss": 0.1838, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.799202616656022e-05, |
|
"loss": 0.1839, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.796783371073565e-05, |
|
"loss": 0.1877, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.794364125491107e-05, |
|
"loss": 0.179, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.79194487990865e-05, |
|
"loss": 0.1913, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.789525634326192e-05, |
|
"loss": 0.1826, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.787106388743734e-05, |
|
"loss": 0.1794, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.7846871431612765e-05, |
|
"loss": 0.1823, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.7822678975788195e-05, |
|
"loss": 0.1836, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.779848651996362e-05, |
|
"loss": 0.1889, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.777429406413904e-05, |
|
"loss": 0.1899, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.775010160831447e-05, |
|
"loss": 0.1842, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.772590915248989e-05, |
|
"loss": 0.1926, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.7701716696665315e-05, |
|
"loss": 0.1912, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.767752424084074e-05, |
|
"loss": 0.1884, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.765333178501616e-05, |
|
"loss": 0.1908, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.7629139329191584e-05, |
|
"loss": 0.1881, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.760494687336701e-05, |
|
"loss": 0.1895, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.7580754417542436e-05, |
|
"loss": 0.1925, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.755656196171786e-05, |
|
"loss": 0.1952, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.753236950589329e-05, |
|
"loss": 0.1878, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.750817705006871e-05, |
|
"loss": 0.1948, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.7483984594244134e-05, |
|
"loss": 0.1723, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.745979213841956e-05, |
|
"loss": 0.1655, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.7435599682594986e-05, |
|
"loss": 0.169, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.74114072267704e-05, |
|
"loss": 0.1697, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.738721477094583e-05, |
|
"loss": 0.1663, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.7363022315121255e-05, |
|
"loss": 0.1676, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.733882985929668e-05, |
|
"loss": 0.1648, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.731463740347211e-05, |
|
"loss": 0.1709, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.729044494764753e-05, |
|
"loss": 0.166, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.726625249182295e-05, |
|
"loss": 0.1639, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.7242060035998375e-05, |
|
"loss": 0.1695, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.7217867580173805e-05, |
|
"loss": 0.1645, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.719367512434922e-05, |
|
"loss": 0.1642, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.716948266852465e-05, |
|
"loss": 0.1725, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.714529021270007e-05, |
|
"loss": 0.1694, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.7121097756875496e-05, |
|
"loss": 0.1683, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.709690530105092e-05, |
|
"loss": 0.1662, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.707271284522635e-05, |
|
"loss": 0.1681, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.704852038940177e-05, |
|
"loss": 0.17, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.7024327933577194e-05, |
|
"loss": 0.1724, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.700013547775262e-05, |
|
"loss": 0.1732, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.6975943021928046e-05, |
|
"loss": 0.1721, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.695175056610347e-05, |
|
"loss": 0.174, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.692755811027889e-05, |
|
"loss": 0.1742, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 4.6903365654454314e-05, |
|
"loss": 0.1776, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.687917319862974e-05, |
|
"loss": 0.1715, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.685498074280517e-05, |
|
"loss": 0.1759, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.683078828698059e-05, |
|
"loss": 0.1774, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.680659583115601e-05, |
|
"loss": 0.1761, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.678240337533144e-05, |
|
"loss": 0.1721, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.6758210919506865e-05, |
|
"loss": 0.1768, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 4.673401846368229e-05, |
|
"loss": 0.1721, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.670982600785771e-05, |
|
"loss": 0.1757, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.668563355203314e-05, |
|
"loss": 0.1699, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.6661441096208556e-05, |
|
"loss": 0.1673, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.6637248640383985e-05, |
|
"loss": 0.1781, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.6613056184559415e-05, |
|
"loss": 0.1748, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.658886372873483e-05, |
|
"loss": 0.1774, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.656467127291026e-05, |
|
"loss": 0.1726, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.654047881708568e-05, |
|
"loss": 0.1737, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.6516286361261106e-05, |
|
"loss": 0.1727, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.649209390543653e-05, |
|
"loss": 0.1713, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.646790144961196e-05, |
|
"loss": 0.1747, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.6443708993787374e-05, |
|
"loss": 0.1819, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.6419516537962804e-05, |
|
"loss": 0.173, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.6395324082138233e-05, |
|
"loss": 0.1705, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.637113162631365e-05, |
|
"loss": 0.1854, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.634693917048908e-05, |
|
"loss": 0.1703, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 4.63227467146645e-05, |
|
"loss": 0.177, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 4.6298554258839925e-05, |
|
"loss": 0.1746, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.627436180301535e-05, |
|
"loss": 0.1756, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.625016934719078e-05, |
|
"loss": 0.1747, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.622597689136619e-05, |
|
"loss": 0.1743, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.620178443554162e-05, |
|
"loss": 0.1801, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.617759197971705e-05, |
|
"loss": 0.1744, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.615339952389247e-05, |
|
"loss": 0.1702, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.61292070680679e-05, |
|
"loss": 0.1793, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.610501461224332e-05, |
|
"loss": 0.1755, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.608082215641874e-05, |
|
"loss": 0.1706, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.6056629700594166e-05, |
|
"loss": 0.182, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.6032437244769595e-05, |
|
"loss": 0.1772, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.600824478894502e-05, |
|
"loss": 0.1784, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.598405233312044e-05, |
|
"loss": 0.1723, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.595985987729587e-05, |
|
"loss": 0.1732, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.593566742147129e-05, |
|
"loss": 0.1788, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.5911474965646716e-05, |
|
"loss": 0.1763, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.588728250982214e-05, |
|
"loss": 0.1753, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.586309005399756e-05, |
|
"loss": 0.1745, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.5838897598172984e-05, |
|
"loss": 0.1769, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.5814705142348414e-05, |
|
"loss": 0.1824, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.579051268652384e-05, |
|
"loss": 0.1823, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.576632023069926e-05, |
|
"loss": 0.1783, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.574212777487469e-05, |
|
"loss": 0.1816, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.571793531905011e-05, |
|
"loss": 0.1793, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.5693742863225535e-05, |
|
"loss": 0.17, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.566955040740096e-05, |
|
"loss": 0.1739, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 4.564535795157639e-05, |
|
"loss": 0.1804, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.56211654957518e-05, |
|
"loss": 0.1741, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.559697303992723e-05, |
|
"loss": 0.1761, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 4.5572780584102655e-05, |
|
"loss": 0.1753, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 4.554858812827808e-05, |
|
"loss": 0.1808, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.552439567245351e-05, |
|
"loss": 0.177, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.550020321662893e-05, |
|
"loss": 0.1824, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.547601076080435e-05, |
|
"loss": 0.1763, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.5451818304979776e-05, |
|
"loss": 0.1767, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.5427625849155206e-05, |
|
"loss": 0.1786, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 4.540343339333062e-05, |
|
"loss": 0.1788, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.537924093750605e-05, |
|
"loss": 0.1771, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 4.5355048481681474e-05, |
|
"loss": 0.1879, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 4.53308560258569e-05, |
|
"loss": 0.1821, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 4.530666357003232e-05, |
|
"loss": 0.1807, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 4.528247111420775e-05, |
|
"loss": 0.1806, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 4.525827865838317e-05, |
|
"loss": 0.1757, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 4.5234086202558595e-05, |
|
"loss": 0.1806, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 4.5209893746734024e-05, |
|
"loss": 0.1783, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 4.518570129090945e-05, |
|
"loss": 0.1872, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 4.516150883508487e-05, |
|
"loss": 0.1819, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 4.513731637926029e-05, |
|
"loss": 0.1843, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 4.5113123923435715e-05, |
|
"loss": 0.1806, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 4.508893146761114e-05, |
|
"loss": 0.18, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.506473901178657e-05, |
|
"loss": 0.1853, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 4.504054655596199e-05, |
|
"loss": 0.1769, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 4.501635410013741e-05, |
|
"loss": 0.1763, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.499216164431284e-05, |
|
"loss": 0.1759, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.4967969188488265e-05, |
|
"loss": 0.1615, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.494377673266369e-05, |
|
"loss": 0.1623, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.491958427683911e-05, |
|
"loss": 0.1619, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.489539182101454e-05, |
|
"loss": 0.1607, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.4871199365189957e-05, |
|
"loss": 0.1621, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.4847006909365386e-05, |
|
"loss": 0.1633, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.4822814453540816e-05, |
|
"loss": 0.1585, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.479862199771623e-05, |
|
"loss": 0.1564, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.477442954189166e-05, |
|
"loss": 0.1591, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.4750237086067084e-05, |
|
"loss": 0.1605, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.472604463024251e-05, |
|
"loss": 0.158, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.470185217441793e-05, |
|
"loss": 0.1594, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.467765971859336e-05, |
|
"loss": 0.1655, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.4653467262768775e-05, |
|
"loss": 0.1577, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.4629274806944205e-05, |
|
"loss": 0.1607, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.4605082351119634e-05, |
|
"loss": 0.1591, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.458088989529505e-05, |
|
"loss": 0.1606, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.455669743947048e-05, |
|
"loss": 0.1673, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 4.45325049836459e-05, |
|
"loss": 0.1679, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.4508312527821325e-05, |
|
"loss": 0.1615, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 4.448412007199675e-05, |
|
"loss": 0.1635, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 4.445992761617218e-05, |
|
"loss": 0.1678, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 4.4435735160347594e-05, |
|
"loss": 0.1693, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 4.441154270452302e-05, |
|
"loss": 0.1673, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.438735024869845e-05, |
|
"loss": 0.1615, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.436315779287387e-05, |
|
"loss": 0.165, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.43389653370493e-05, |
|
"loss": 0.1617, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.431477288122472e-05, |
|
"loss": 0.1662, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.4290580425400144e-05, |
|
"loss": 0.1651, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 4.426638796957557e-05, |
|
"loss": 0.1674, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.4242195513750996e-05, |
|
"loss": 0.1644, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.421800305792642e-05, |
|
"loss": 0.1677, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.419381060210184e-05, |
|
"loss": 0.1608, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.416961814627727e-05, |
|
"loss": 0.1644, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.4145425690452694e-05, |
|
"loss": 0.1705, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 4.412123323462812e-05, |
|
"loss": 0.1677, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 4.409704077880354e-05, |
|
"loss": 0.1627, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.407284832297896e-05, |
|
"loss": 0.1669, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 4.4048655867154385e-05, |
|
"loss": 0.1687, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.4024463411329815e-05, |
|
"loss": 0.1665, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.400027095550524e-05, |
|
"loss": 0.167, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 4.397607849968066e-05, |
|
"loss": 0.1592, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 4.395188604385609e-05, |
|
"loss": 0.1683, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.392769358803151e-05, |
|
"loss": 0.1677, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 4.3903501132206935e-05, |
|
"loss": 0.1601, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.387930867638236e-05, |
|
"loss": 0.1695, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 4.385511622055779e-05, |
|
"loss": 0.1676, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 4.3830923764733204e-05, |
|
"loss": 0.173, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 4.380673130890863e-05, |
|
"loss": 0.166, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 4.3782538853084056e-05, |
|
"loss": 0.1673, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 4.375834639725948e-05, |
|
"loss": 0.1662, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.373415394143491e-05, |
|
"loss": 0.18, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.370996148561033e-05, |
|
"loss": 0.1663, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.3685769029785754e-05, |
|
"loss": 0.1716, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.366157657396118e-05, |
|
"loss": 0.1653, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.3637384118136606e-05, |
|
"loss": 0.1742, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.361319166231202e-05, |
|
"loss": 0.1746, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.358899920648745e-05, |
|
"loss": 0.165, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.3564806750662875e-05, |
|
"loss": 0.1677, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.35406142948383e-05, |
|
"loss": 0.1766, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.351642183901372e-05, |
|
"loss": 0.1727, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.349222938318915e-05, |
|
"loss": 0.1733, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 4.346803692736457e-05, |
|
"loss": 0.176, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 4.3443844471539995e-05, |
|
"loss": 0.1719, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 4.3419652015715425e-05, |
|
"loss": 0.1679, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 4.339545955989085e-05, |
|
"loss": 0.1727, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 4.337126710406627e-05, |
|
"loss": 0.171, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 4.334707464824169e-05, |
|
"loss": 0.1736, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 4.3322882192417116e-05, |
|
"loss": 0.1716, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 4.329868973659254e-05, |
|
"loss": 0.1793, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 4.327449728076797e-05, |
|
"loss": 0.1714, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 4.325030482494339e-05, |
|
"loss": 0.1735, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.3226112369118814e-05, |
|
"loss": 0.1777, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.320191991329424e-05, |
|
"loss": 0.1658, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.3177727457469666e-05, |
|
"loss": 0.1743, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.315353500164509e-05, |
|
"loss": 0.1747, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.312934254582051e-05, |
|
"loss": 0.1657, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.310515008999594e-05, |
|
"loss": 0.1714, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.308095763417136e-05, |
|
"loss": 0.1767, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.305676517834679e-05, |
|
"loss": 0.1735, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 4.3032572722522216e-05, |
|
"loss": 0.1724, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 4.300838026669763e-05, |
|
"loss": 0.1743, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 4.298418781087306e-05, |
|
"loss": 0.1682, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.2959995355048485e-05, |
|
"loss": 0.1756, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.293580289922391e-05, |
|
"loss": 0.1694, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 4.291161044339933e-05, |
|
"loss": 0.1737, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 4.288741798757476e-05, |
|
"loss": 0.1705, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 4.2863225531750176e-05, |
|
"loss": 0.1715, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 4.2839033075925605e-05, |
|
"loss": 0.1741, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 4.2814840620101035e-05, |
|
"loss": 0.1819, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 4.279064816427645e-05, |
|
"loss": 0.1733, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 4.276645570845188e-05, |
|
"loss": 0.1727, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 4.27422632526273e-05, |
|
"loss": 0.1773, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 4.2718070796802726e-05, |
|
"loss": 0.1719, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 4.269387834097815e-05, |
|
"loss": 0.1752, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 4.266968588515358e-05, |
|
"loss": 0.1757, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 4.2645493429328994e-05, |
|
"loss": 0.1698, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.2621300973504424e-05, |
|
"loss": 0.1725, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.2597108517679853e-05, |
|
"loss": 0.1782, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.257291606185527e-05, |
|
"loss": 0.1814, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.25487236060307e-05, |
|
"loss": 0.1724, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 4.252453115020612e-05, |
|
"loss": 0.1748, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.2500338694381545e-05, |
|
"loss": 0.1765, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.247614623855697e-05, |
|
"loss": 0.1556, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.24519537827324e-05, |
|
"loss": 0.1534, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 4.242776132690782e-05, |
|
"loss": 0.1502, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 4.240356887108324e-05, |
|
"loss": 0.1587, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 4.237937641525867e-05, |
|
"loss": 0.1542, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 4.2355183959434095e-05, |
|
"loss": 0.1528, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 4.233099150360952e-05, |
|
"loss": 0.1499, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 4.230679904778494e-05, |
|
"loss": 0.1556, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 4.228260659196036e-05, |
|
"loss": 0.1579, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 4.2258414136135786e-05, |
|
"loss": 0.1546, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 4.2234221680311215e-05, |
|
"loss": 0.1526, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 4.221002922448664e-05, |
|
"loss": 0.1582, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 4.218583676866206e-05, |
|
"loss": 0.1573, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 4.216164431283749e-05, |
|
"loss": 0.1569, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 4.213745185701291e-05, |
|
"loss": 0.1549, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 4.2113259401188336e-05, |
|
"loss": 0.1525, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 4.208906694536376e-05, |
|
"loss": 0.1599, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 4.206487448953919e-05, |
|
"loss": 0.1561, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 4.2040682033714604e-05, |
|
"loss": 0.1658, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 4.2016489577890034e-05, |
|
"loss": 0.1543, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 4.199229712206546e-05, |
|
"loss": 0.1587, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 4.196810466624088e-05, |
|
"loss": 0.1553, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 4.194391221041631e-05, |
|
"loss": 0.1604, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 4.191971975459173e-05, |
|
"loss": 0.1586, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 4.1895527298767155e-05, |
|
"loss": 0.1585, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 4.187133484294258e-05, |
|
"loss": 0.1636, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 4.184714238711801e-05, |
|
"loss": 0.1595, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 4.182294993129342e-05, |
|
"loss": 0.1588, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 4.179875747546885e-05, |
|
"loss": 0.1594, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 4.1774565019644275e-05, |
|
"loss": 0.1593, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 4.17503725638197e-05, |
|
"loss": 0.1586, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 4.172618010799513e-05, |
|
"loss": 0.17, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 4.170198765217055e-05, |
|
"loss": 0.1572, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 4.167779519634597e-05, |
|
"loss": 0.1577, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 4.1653602740521396e-05, |
|
"loss": 0.1635, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 4.1629410284696826e-05, |
|
"loss": 0.1587, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 4.160521782887224e-05, |
|
"loss": 0.1588, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 4.158102537304767e-05, |
|
"loss": 0.1621, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 4.1556832917223094e-05, |
|
"loss": 0.1647, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 4.153264046139852e-05, |
|
"loss": 0.1601, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 4.150844800557394e-05, |
|
"loss": 0.1608, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 4.148425554974937e-05, |
|
"loss": 0.1619, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 4.146006309392479e-05, |
|
"loss": 0.1647, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 4.1435870638100215e-05, |
|
"loss": 0.1584, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 4.1411678182275644e-05, |
|
"loss": 0.1649, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 4.138748572645107e-05, |
|
"loss": 0.1637, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 4.136329327062649e-05, |
|
"loss": 0.1607, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 4.133910081480191e-05, |
|
"loss": 0.1636, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 4.131490835897734e-05, |
|
"loss": 0.1638, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.129071590315276e-05, |
|
"loss": 0.1631, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 4.126652344732819e-05, |
|
"loss": 0.162, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 4.124233099150362e-05, |
|
"loss": 0.1601, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 4.121813853567903e-05, |
|
"loss": 0.1674, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 4.119394607985446e-05, |
|
"loss": 0.169, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 4.1169753624029885e-05, |
|
"loss": 0.1645, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 4.114556116820531e-05, |
|
"loss": 0.162, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 4.112136871238073e-05, |
|
"loss": 0.163, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 4.109717625655616e-05, |
|
"loss": 0.1693, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 4.1072983800731577e-05, |
|
"loss": 0.1712, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 4.1048791344907006e-05, |
|
"loss": 0.1655, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 4.1024598889082436e-05, |
|
"loss": 0.1625, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 4.100040643325785e-05, |
|
"loss": 0.1647, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 4.097621397743328e-05, |
|
"loss": 0.1638, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 4.0952021521608704e-05, |
|
"loss": 0.1648, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 4.092782906578413e-05, |
|
"loss": 0.1667, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 4.090363660995955e-05, |
|
"loss": 0.1726, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 4.087944415413498e-05, |
|
"loss": 0.1667, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 4.0855251698310395e-05, |
|
"loss": 0.1652, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 4.0831059242485825e-05, |
|
"loss": 0.1733, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 4.0806866786661254e-05, |
|
"loss": 0.1652, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 4.078267433083667e-05, |
|
"loss": 0.1656, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 4.07584818750121e-05, |
|
"loss": 0.1589, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 4.073428941918752e-05, |
|
"loss": 0.1645, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 4.0710096963362945e-05, |
|
"loss": 0.1673, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 4.068590450753837e-05, |
|
"loss": 0.1709, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 4.06617120517138e-05, |
|
"loss": 0.1664, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 4.063751959588922e-05, |
|
"loss": 0.1767, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 4.061332714006464e-05, |
|
"loss": 0.1683, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 4.058913468424007e-05, |
|
"loss": 0.1654, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 4.0564942228415496e-05, |
|
"loss": 0.1665, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 4.054074977259092e-05, |
|
"loss": 0.1722, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 4.051655731676634e-05, |
|
"loss": 0.1644, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 4.0492364860941764e-05, |
|
"loss": 0.1733, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 4.046817240511719e-05, |
|
"loss": 0.1691, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 4.0443979949292616e-05, |
|
"loss": 0.1672, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.041978749346804e-05, |
|
"loss": 0.1652, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 4.039559503764346e-05, |
|
"loss": 0.1765, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 4.037140258181889e-05, |
|
"loss": 0.1713, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 4.0347210125994314e-05, |
|
"loss": 0.1671, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 4.032301767016974e-05, |
|
"loss": 0.1671, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 4.029882521434516e-05, |
|
"loss": 0.1718, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 4.027463275852059e-05, |
|
"loss": 0.1657, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 4.0250440302696005e-05, |
|
"loss": 0.17, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 4.0226247846871435e-05, |
|
"loss": 0.1697, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 4.020205539104686e-05, |
|
"loss": 0.1677, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 4.017786293522228e-05, |
|
"loss": 0.1668, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 4.015367047939771e-05, |
|
"loss": 0.1673, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 4.012947802357313e-05, |
|
"loss": 0.1684, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 4.0105285567748555e-05, |
|
"loss": 0.166, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 4.008109311192398e-05, |
|
"loss": 0.1686, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 4.005690065609941e-05, |
|
"loss": 0.1794, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 4.0032708200274824e-05, |
|
"loss": 0.1668, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.000851574445025e-05, |
|
"loss": 0.1724, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3.9984323288625676e-05, |
|
"loss": 0.1572, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 3.99601308328011e-05, |
|
"loss": 0.1462, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 3.993593837697653e-05, |
|
"loss": 0.1494, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 3.991174592115195e-05, |
|
"loss": 0.1492, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 3.9887553465327374e-05, |
|
"loss": 0.1487, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 3.98633610095028e-05, |
|
"loss": 0.1476, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 3.9839168553678226e-05, |
|
"loss": 0.1448, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 3.981497609785364e-05, |
|
"loss": 0.151, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 3.979078364202907e-05, |
|
"loss": 0.1563, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 3.9766591186204495e-05, |
|
"loss": 0.1458, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.974239873037992e-05, |
|
"loss": 0.1449, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 3.971820627455534e-05, |
|
"loss": 0.1515, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 3.969401381873077e-05, |
|
"loss": 0.1518, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 3.966982136290619e-05, |
|
"loss": 0.1467, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 3.9645628907081615e-05, |
|
"loss": 0.1502, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 3.9621436451257045e-05, |
|
"loss": 0.1525, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 3.959724399543247e-05, |
|
"loss": 0.1475, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 3.957305153960789e-05, |
|
"loss": 0.1505, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 3.954885908378331e-05, |
|
"loss": 0.1523, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 3.952466662795874e-05, |
|
"loss": 0.1531, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 3.950047417213416e-05, |
|
"loss": 0.1517, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 3.947628171630959e-05, |
|
"loss": 0.1521, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 3.945208926048502e-05, |
|
"loss": 0.1524, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 3.9427896804660434e-05, |
|
"loss": 0.1559, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 3.9403704348835863e-05, |
|
"loss": 0.1497, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 3.9379511893011286e-05, |
|
"loss": 0.1474, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 3.935531943718671e-05, |
|
"loss": 0.1528, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 3.933112698136213e-05, |
|
"loss": 0.1513, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 3.930693452553756e-05, |
|
"loss": 0.15, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 3.928274206971298e-05, |
|
"loss": 0.1506, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 3.925854961388841e-05, |
|
"loss": 0.1519, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 3.9234357158063836e-05, |
|
"loss": 0.157, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 3.921016470223925e-05, |
|
"loss": 0.1531, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 3.918597224641468e-05, |
|
"loss": 0.1478, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 3.9161779790590105e-05, |
|
"loss": 0.1534, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 3.913758733476553e-05, |
|
"loss": 0.1537, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 3.911339487894095e-05, |
|
"loss": 0.1508, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 3.908920242311638e-05, |
|
"loss": 0.1577, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 3.9065009967291796e-05, |
|
"loss": 0.1556, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 3.9040817511467225e-05, |
|
"loss": 0.1563, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 3.9016625055642655e-05, |
|
"loss": 0.1621, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 3.899243259981807e-05, |
|
"loss": 0.1599, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 3.89682401439935e-05, |
|
"loss": 0.1498, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 3.894404768816892e-05, |
|
"loss": 0.1539, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 3.8919855232344346e-05, |
|
"loss": 0.1526, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 3.889566277651977e-05, |
|
"loss": 0.155, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 3.88714703206952e-05, |
|
"loss": 0.1577, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 3.884727786487062e-05, |
|
"loss": 0.155, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 3.8823085409046044e-05, |
|
"loss": 0.1531, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 3.8798892953221474e-05, |
|
"loss": 0.154, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 3.8774700497396896e-05, |
|
"loss": 0.1561, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 3.875050804157232e-05, |
|
"loss": 0.1623, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 3.872631558574774e-05, |
|
"loss": 0.1529, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 3.8702123129923165e-05, |
|
"loss": 0.1575, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 3.867793067409859e-05, |
|
"loss": 0.1557, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 3.865373821827402e-05, |
|
"loss": 0.1604, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 3.862954576244944e-05, |
|
"loss": 0.1579, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 3.860535330662486e-05, |
|
"loss": 0.1578, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 3.858116085080029e-05, |
|
"loss": 0.1617, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 3.8556968394975715e-05, |
|
"loss": 0.1587, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 3.853277593915114e-05, |
|
"loss": 0.1545, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 3.850858348332656e-05, |
|
"loss": 0.1576, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 3.848439102750199e-05, |
|
"loss": 0.1592, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 3.8460198571677406e-05, |
|
"loss": 0.1527, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 3.8436006115852836e-05, |
|
"loss": 0.1585, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 3.841181366002826e-05, |
|
"loss": 0.1576, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 3.838762120420368e-05, |
|
"loss": 0.1596, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 3.836342874837911e-05, |
|
"loss": 0.1625, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 3.8339236292554533e-05, |
|
"loss": 0.1515, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 3.8315043836729956e-05, |
|
"loss": 0.1563, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 3.829085138090538e-05, |
|
"loss": 0.1595, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 3.826665892508081e-05, |
|
"loss": 0.1577, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 3.8242466469256225e-05, |
|
"loss": 0.164, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 3.8218274013431654e-05, |
|
"loss": 0.1569, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 3.819408155760708e-05, |
|
"loss": 0.1557, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 3.81698891017825e-05, |
|
"loss": 0.1568, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 3.814569664595793e-05, |
|
"loss": 0.1609, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 3.812150419013335e-05, |
|
"loss": 0.1547, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 3.8097311734308775e-05, |
|
"loss": 0.1564, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 3.80731192784842e-05, |
|
"loss": 0.1586, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 3.804892682265963e-05, |
|
"loss": 0.1619, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 3.802473436683504e-05, |
|
"loss": 0.157, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 3.800054191101047e-05, |
|
"loss": 0.1602, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 3.7976349455185895e-05, |
|
"loss": 0.1581, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 3.795215699936132e-05, |
|
"loss": 0.157, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 3.792796454353674e-05, |
|
"loss": 0.1617, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 3.790377208771217e-05, |
|
"loss": 0.1536, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 3.787957963188759e-05, |
|
"loss": 0.1571, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 3.7855387176063016e-05, |
|
"loss": 0.1583, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 3.7831194720238446e-05, |
|
"loss": 0.1617, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 3.780700226441387e-05, |
|
"loss": 0.1568, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 3.778280980858929e-05, |
|
"loss": 0.1618, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 3.7758617352764714e-05, |
|
"loss": 0.1592, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 3.7734424896940143e-05, |
|
"loss": 0.1625, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 3.771023244111556e-05, |
|
"loss": 0.1586, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 3.768603998529099e-05, |
|
"loss": 0.1543, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 3.766184752946642e-05, |
|
"loss": 0.1593, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 3.7637655073641835e-05, |
|
"loss": 0.1634, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 3.7613462617817264e-05, |
|
"loss": 0.161, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 3.758927016199269e-05, |
|
"loss": 0.158, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 3.756507770616811e-05, |
|
"loss": 0.1686, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 3.754088525034353e-05, |
|
"loss": 0.1619, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 3.751669279451896e-05, |
|
"loss": 0.1644, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3.749250033869438e-05, |
|
"loss": 0.1524, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 3.746830788286981e-05, |
|
"loss": 0.1413, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 3.744411542704524e-05, |
|
"loss": 0.1377, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 3.741992297122065e-05, |
|
"loss": 0.1374, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 3.739573051539608e-05, |
|
"loss": 0.1391, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 3.7371538059571505e-05, |
|
"loss": 0.1401, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 3.734734560374693e-05, |
|
"loss": 0.1396, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 3.732315314792235e-05, |
|
"loss": 0.1397, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 3.729896069209778e-05, |
|
"loss": 0.1373, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 3.7274768236273197e-05, |
|
"loss": 0.1383, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 3.7250575780448626e-05, |
|
"loss": 0.1409, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 3.7226383324624056e-05, |
|
"loss": 0.1364, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 3.720219086879947e-05, |
|
"loss": 0.1438, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 3.71779984129749e-05, |
|
"loss": 0.14, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 3.7153805957150324e-05, |
|
"loss": 0.1433, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 3.712961350132575e-05, |
|
"loss": 0.1447, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 3.710542104550117e-05, |
|
"loss": 0.1394, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 3.70812285896766e-05, |
|
"loss": 0.142, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 3.705703613385202e-05, |
|
"loss": 0.1424, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 3.7032843678027445e-05, |
|
"loss": 0.1397, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 3.7008651222202874e-05, |
|
"loss": 0.143, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 3.69844587663783e-05, |
|
"loss": 0.1421, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 3.696026631055372e-05, |
|
"loss": 0.1468, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 3.693607385472914e-05, |
|
"loss": 0.1389, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 3.6911881398904565e-05, |
|
"loss": 0.1466, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 3.688768894307999e-05, |
|
"loss": 0.1404, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 3.686349648725542e-05, |
|
"loss": 0.1396, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 3.683930403143084e-05, |
|
"loss": 0.147, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 3.681511157560626e-05, |
|
"loss": 0.145, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 3.679091911978169e-05, |
|
"loss": 0.1448, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 3.6766726663957116e-05, |
|
"loss": 0.1402, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 3.674253420813254e-05, |
|
"loss": 0.1424, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 3.671834175230796e-05, |
|
"loss": 0.1394, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 3.669414929648339e-05, |
|
"loss": 0.1519, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 3.666995684065881e-05, |
|
"loss": 0.1468, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 3.6645764384834236e-05, |
|
"loss": 0.1451, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 3.662157192900966e-05, |
|
"loss": 0.1459, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 3.659737947318508e-05, |
|
"loss": 0.1435, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 3.657318701736051e-05, |
|
"loss": 0.1444, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 3.6548994561535934e-05, |
|
"loss": 0.1458, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 3.652480210571136e-05, |
|
"loss": 0.1475, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 3.650060964988678e-05, |
|
"loss": 0.1436, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 3.647641719406221e-05, |
|
"loss": 0.1438, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 3.6452224738237625e-05, |
|
"loss": 0.1398, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 3.6428032282413055e-05, |
|
"loss": 0.1516, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 3.640383982658848e-05, |
|
"loss": 0.1435, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 3.63796473707639e-05, |
|
"loss": 0.139, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 3.635545491493933e-05, |
|
"loss": 0.1414, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 3.633126245911475e-05, |
|
"loss": 0.1531, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 3.6307070003290175e-05, |
|
"loss": 0.1475, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 3.62828775474656e-05, |
|
"loss": 0.142, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 3.625868509164103e-05, |
|
"loss": 0.1471, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 3.6234492635816444e-05, |
|
"loss": 0.1533, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 3.621030017999187e-05, |
|
"loss": 0.1454, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 3.6186107724167296e-05, |
|
"loss": 0.1439, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 3.616191526834272e-05, |
|
"loss": 0.1517, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 3.613772281251815e-05, |
|
"loss": 0.1435, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 3.611353035669357e-05, |
|
"loss": 0.1482, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 3.6089337900868994e-05, |
|
"loss": 0.1534, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 3.606514544504442e-05, |
|
"loss": 0.1401, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 3.6040952989219846e-05, |
|
"loss": 0.1483, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 3.601676053339527e-05, |
|
"loss": 0.148, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 3.599256807757069e-05, |
|
"loss": 0.1434, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 3.5968375621746115e-05, |
|
"loss": 0.1488, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 3.5944183165921544e-05, |
|
"loss": 0.1559, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 3.591999071009696e-05, |
|
"loss": 0.1464, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 3.589579825427239e-05, |
|
"loss": 0.1469, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 3.587160579844782e-05, |
|
"loss": 0.1489, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 3.5847413342623235e-05, |
|
"loss": 0.1483, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 3.5823220886798665e-05, |
|
"loss": 0.1487, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 3.579902843097409e-05, |
|
"loss": 0.1482, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 3.577483597514951e-05, |
|
"loss": 0.1439, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 3.575064351932493e-05, |
|
"loss": 0.1496, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 3.572645106350036e-05, |
|
"loss": 0.1488, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 3.570225860767578e-05, |
|
"loss": 0.1461, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 3.567806615185121e-05, |
|
"loss": 0.1486, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 3.565387369602664e-05, |
|
"loss": 0.1465, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 3.5629681240202054e-05, |
|
"loss": 0.1468, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 3.5605488784377483e-05, |
|
"loss": 0.1447, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 3.5581296328552906e-05, |
|
"loss": 0.1523, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 3.555710387272833e-05, |
|
"loss": 0.1412, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 3.553291141690375e-05, |
|
"loss": 0.1492, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 3.550871896107918e-05, |
|
"loss": 0.1455, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 3.54845265052546e-05, |
|
"loss": 0.1492, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 3.546033404943003e-05, |
|
"loss": 0.1491, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 3.5436141593605456e-05, |
|
"loss": 0.1473, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 3.541194913778087e-05, |
|
"loss": 0.1518, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 3.53877566819563e-05, |
|
"loss": 0.148, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 3.5363564226131725e-05, |
|
"loss": 0.152, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 3.533937177030715e-05, |
|
"loss": 0.146, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 3.531517931448257e-05, |
|
"loss": 0.1506, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 3.5290986858658e-05, |
|
"loss": 0.1493, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 3.526679440283342e-05, |
|
"loss": 0.1466, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 3.5242601947008845e-05, |
|
"loss": 0.1505, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 3.5218409491184275e-05, |
|
"loss": 0.1442, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 3.51942170353597e-05, |
|
"loss": 0.1471, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 3.517002457953512e-05, |
|
"loss": 0.1499, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 3.514583212371054e-05, |
|
"loss": 0.1554, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 3.5121639667885966e-05, |
|
"loss": 0.1492, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 3.509744721206139e-05, |
|
"loss": 0.1485, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 3.507325475623682e-05, |
|
"loss": 0.1482, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 3.504906230041224e-05, |
|
"loss": 0.155, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 3.5024869844587664e-05, |
|
"loss": 0.1518, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3.5000677388763094e-05, |
|
"loss": 0.1468, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 3.4976484932938516e-05, |
|
"loss": 0.128, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 3.495229247711394e-05, |
|
"loss": 0.1322, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 3.492810002128936e-05, |
|
"loss": 0.1307, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 3.490390756546479e-05, |
|
"loss": 0.1338, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 3.487971510964021e-05, |
|
"loss": 0.1302, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 3.485552265381564e-05, |
|
"loss": 0.1267, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 3.483133019799106e-05, |
|
"loss": 0.1282, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 3.480713774216648e-05, |
|
"loss": 0.13, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 3.478294528634191e-05, |
|
"loss": 0.1292, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 3.4758752830517335e-05, |
|
"loss": 0.1313, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 3.473456037469276e-05, |
|
"loss": 0.1324, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 3.471036791886818e-05, |
|
"loss": 0.1348, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 3.468617546304361e-05, |
|
"loss": 0.1373, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 3.4661983007219026e-05, |
|
"loss": 0.132, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 3.4637790551394456e-05, |
|
"loss": 0.1331, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 3.461359809556988e-05, |
|
"loss": 0.1308, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 3.45894056397453e-05, |
|
"loss": 0.1381, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 3.456521318392073e-05, |
|
"loss": 0.1356, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 3.4541020728096153e-05, |
|
"loss": 0.1318, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 3.4516828272271576e-05, |
|
"loss": 0.1304, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 3.4492635816447e-05, |
|
"loss": 0.1341, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 3.446844336062243e-05, |
|
"loss": 0.1293, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 3.4444250904797845e-05, |
|
"loss": 0.1369, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 3.4420058448973274e-05, |
|
"loss": 0.1281, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 3.43958659931487e-05, |
|
"loss": 0.1357, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 3.437167353732412e-05, |
|
"loss": 0.1332, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 3.434748108149955e-05, |
|
"loss": 0.1358, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 3.432328862567497e-05, |
|
"loss": 0.137, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 3.4299096169850395e-05, |
|
"loss": 0.1329, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 3.427490371402582e-05, |
|
"loss": 0.1282, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 3.425071125820125e-05, |
|
"loss": 0.1353, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 3.422651880237667e-05, |
|
"loss": 0.1387, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 3.420232634655209e-05, |
|
"loss": 0.132, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 3.4178133890727515e-05, |
|
"loss": 0.1383, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 3.4153941434902945e-05, |
|
"loss": 0.1333, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 3.412974897907836e-05, |
|
"loss": 0.1329, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 3.410555652325379e-05, |
|
"loss": 0.132, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 3.408136406742922e-05, |
|
"loss": 0.1436, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 3.4057171611604636e-05, |
|
"loss": 0.1334, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 3.4032979155780066e-05, |
|
"loss": 0.1313, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 3.400878669995549e-05, |
|
"loss": 0.1345, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 3.398459424413091e-05, |
|
"loss": 0.1351, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 3.3960401788306334e-05, |
|
"loss": 0.1397, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 3.3936209332481764e-05, |
|
"loss": 0.1345, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 3.391201687665718e-05, |
|
"loss": 0.1302, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 3.388782442083261e-05, |
|
"loss": 0.1346, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 3.386363196500804e-05, |
|
"loss": 0.1433, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 3.3839439509183455e-05, |
|
"loss": 0.1377, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 3.3815247053358884e-05, |
|
"loss": 0.1413, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 3.379105459753431e-05, |
|
"loss": 0.1392, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 3.376686214170973e-05, |
|
"loss": 0.1366, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 3.374266968588515e-05, |
|
"loss": 0.1388, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 3.371847723006058e-05, |
|
"loss": 0.1406, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 3.3694284774236e-05, |
|
"loss": 0.1418, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 3.367009231841143e-05, |
|
"loss": 0.141, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 3.364589986258686e-05, |
|
"loss": 0.1373, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 3.362170740676227e-05, |
|
"loss": 0.1387, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 3.35975149509377e-05, |
|
"loss": 0.1338, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 3.3573322495113126e-05, |
|
"loss": 0.143, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 3.354913003928855e-05, |
|
"loss": 0.1376, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 3.352493758346397e-05, |
|
"loss": 0.1319, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 3.35007451276394e-05, |
|
"loss": 0.1421, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 3.3476552671814823e-05, |
|
"loss": 0.1361, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 3.3452360215990246e-05, |
|
"loss": 0.1344, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 3.3428167760165676e-05, |
|
"loss": 0.1434, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 3.34039753043411e-05, |
|
"loss": 0.1374, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 3.337978284851652e-05, |
|
"loss": 0.1368, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 3.3355590392691944e-05, |
|
"loss": 0.1414, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 3.333139793686737e-05, |
|
"loss": 0.1325, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 3.330720548104279e-05, |
|
"loss": 0.1376, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 3.328301302521822e-05, |
|
"loss": 0.1418, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 3.325882056939364e-05, |
|
"loss": 0.1413, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 3.3234628113569065e-05, |
|
"loss": 0.136, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 3.3210435657744494e-05, |
|
"loss": 0.1462, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 3.318624320191992e-05, |
|
"loss": 0.1386, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 3.316205074609534e-05, |
|
"loss": 0.1371, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 3.313785829027076e-05, |
|
"loss": 0.1418, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 3.311366583444619e-05, |
|
"loss": 0.1434, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 3.308947337862161e-05, |
|
"loss": 0.1388, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 3.306528092279704e-05, |
|
"loss": 0.137, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 3.304108846697246e-05, |
|
"loss": 0.1445, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 3.301689601114788e-05, |
|
"loss": 0.1358, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 3.299270355532331e-05, |
|
"loss": 0.1411, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 3.2968511099498736e-05, |
|
"loss": 0.1406, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 3.294431864367416e-05, |
|
"loss": 0.1407, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 3.292012618784958e-05, |
|
"loss": 0.1414, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 3.289593373202501e-05, |
|
"loss": 0.1371, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 3.287174127620043e-05, |
|
"loss": 0.1368, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 3.2847548820375856e-05, |
|
"loss": 0.1376, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 3.282335636455128e-05, |
|
"loss": 0.1362, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 3.27991639087267e-05, |
|
"loss": 0.1387, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 3.277497145290213e-05, |
|
"loss": 0.1394, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 3.2750778997077554e-05, |
|
"loss": 0.1388, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 3.272658654125298e-05, |
|
"loss": 0.1403, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 3.27023940854284e-05, |
|
"loss": 0.1451, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 3.267820162960383e-05, |
|
"loss": 0.1356, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 3.2654009173779245e-05, |
|
"loss": 0.1428, |
|
"step": 358500 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 3.2629816717954675e-05, |
|
"loss": 0.1369, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 3.26056242621301e-05, |
|
"loss": 0.1399, |
|
"step": 359500 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 3.258143180630552e-05, |
|
"loss": 0.1389, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 3.255723935048095e-05, |
|
"loss": 0.1415, |
|
"step": 360500 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 3.253304689465637e-05, |
|
"loss": 0.1425, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 3.2508854438831796e-05, |
|
"loss": 0.1375, |
|
"step": 361500 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 3.248466198300722e-05, |
|
"loss": 0.1283, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 3.246046952718265e-05, |
|
"loss": 0.1209, |
|
"step": 362500 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 3.243627707135807e-05, |
|
"loss": 0.1212, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 3.2412084615533493e-05, |
|
"loss": 0.1216, |
|
"step": 363500 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 3.2387892159708916e-05, |
|
"loss": 0.1225, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 3.2363699703884346e-05, |
|
"loss": 0.1219, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 3.233950724805976e-05, |
|
"loss": 0.1256, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 3.231531479223519e-05, |
|
"loss": 0.1175, |
|
"step": 365500 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 3.229112233641062e-05, |
|
"loss": 0.1219, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 3.226692988058604e-05, |
|
"loss": 0.1242, |
|
"step": 366500 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 3.2242737424761466e-05, |
|
"loss": 0.1183, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 3.221854496893689e-05, |
|
"loss": 0.1207, |
|
"step": 367500 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 3.219435251311231e-05, |
|
"loss": 0.1267, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 3.2170160057287735e-05, |
|
"loss": 0.121, |
|
"step": 368500 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 3.2145967601463164e-05, |
|
"loss": 0.1225, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 3.212177514563858e-05, |
|
"loss": 0.1195, |
|
"step": 369500 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 3.209758268981401e-05, |
|
"loss": 0.1221, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 3.207339023398944e-05, |
|
"loss": 0.1239, |
|
"step": 370500 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 3.2049197778164855e-05, |
|
"loss": 0.1315, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 3.2025005322340285e-05, |
|
"loss": 0.1263, |
|
"step": 371500 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 3.200081286651571e-05, |
|
"loss": 0.1279, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 3.197662041069113e-05, |
|
"loss": 0.1259, |
|
"step": 372500 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 3.195242795486655e-05, |
|
"loss": 0.1277, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 3.192823549904198e-05, |
|
"loss": 0.1264, |
|
"step": 373500 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 3.19040430432174e-05, |
|
"loss": 0.1254, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 3.187985058739283e-05, |
|
"loss": 0.1254, |
|
"step": 374500 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 3.185565813156826e-05, |
|
"loss": 0.131, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 3.1831465675743674e-05, |
|
"loss": 0.1324, |
|
"step": 375500 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 3.1807273219919103e-05, |
|
"loss": 0.1297, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 3.1783080764094526e-05, |
|
"loss": 0.1305, |
|
"step": 376500 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 3.175888830826995e-05, |
|
"loss": 0.1323, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 3.173469585244537e-05, |
|
"loss": 0.1228, |
|
"step": 377500 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 3.17105033966208e-05, |
|
"loss": 0.1251, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 3.1686310940796224e-05, |
|
"loss": 0.1238, |
|
"step": 378500 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 3.166211848497165e-05, |
|
"loss": 0.1213, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 3.1637926029147076e-05, |
|
"loss": 0.123, |
|
"step": 379500 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 3.16137335733225e-05, |
|
"loss": 0.1247, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 3.158954111749792e-05, |
|
"loss": 0.1253, |
|
"step": 380500 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 3.1565348661673345e-05, |
|
"loss": 0.1241, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 3.154115620584877e-05, |
|
"loss": 0.1264, |
|
"step": 381500 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 3.151696375002419e-05, |
|
"loss": 0.1244, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 3.149277129419962e-05, |
|
"loss": 0.1274, |
|
"step": 382500 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 3.146857883837504e-05, |
|
"loss": 0.1243, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 3.1444386382550465e-05, |
|
"loss": 0.1307, |
|
"step": 383500 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 3.1420193926725895e-05, |
|
"loss": 0.1269, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 3.139600147090132e-05, |
|
"loss": 0.127, |
|
"step": 384500 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 3.137180901507674e-05, |
|
"loss": 0.1299, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 3.134761655925216e-05, |
|
"loss": 0.1302, |
|
"step": 385500 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 3.132342410342759e-05, |
|
"loss": 0.1277, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 3.129923164760301e-05, |
|
"loss": 0.1265, |
|
"step": 386500 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 3.127503919177844e-05, |
|
"loss": 0.124, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 3.125084673595386e-05, |
|
"loss": 0.1252, |
|
"step": 387500 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 3.1226654280129284e-05, |
|
"loss": 0.1265, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 3.1202461824304714e-05, |
|
"loss": 0.1297, |
|
"step": 388500 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 3.1178269368480136e-05, |
|
"loss": 0.1292, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 3.115407691265556e-05, |
|
"loss": 0.1294, |
|
"step": 389500 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 3.112988445683098e-05, |
|
"loss": 0.1304, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 3.110569200100641e-05, |
|
"loss": 0.1259, |
|
"step": 390500 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 3.108149954518183e-05, |
|
"loss": 0.1317, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 3.105730708935726e-05, |
|
"loss": 0.1262, |
|
"step": 391500 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 3.103311463353268e-05, |
|
"loss": 0.1263, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 3.10089221777081e-05, |
|
"loss": 0.1315, |
|
"step": 392500 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 3.098472972188353e-05, |
|
"loss": 0.133, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 3.0960537266058955e-05, |
|
"loss": 0.1291, |
|
"step": 393500 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 3.093634481023438e-05, |
|
"loss": 0.132, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 3.09121523544098e-05, |
|
"loss": 0.1285, |
|
"step": 394500 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 3.088795989858523e-05, |
|
"loss": 0.1296, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 3.0863767442760646e-05, |
|
"loss": 0.13, |
|
"step": 395500 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 3.0839574986936076e-05, |
|
"loss": 0.1292, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 3.08153825311115e-05, |
|
"loss": 0.1283, |
|
"step": 396500 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 3.079119007528692e-05, |
|
"loss": 0.1335, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 3.076699761946235e-05, |
|
"loss": 0.1291, |
|
"step": 397500 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 3.0742805163637773e-05, |
|
"loss": 0.1302, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 3.0718612707813196e-05, |
|
"loss": 0.1274, |
|
"step": 398500 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 3.069442025198862e-05, |
|
"loss": 0.1287, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 3.067022779616405e-05, |
|
"loss": 0.1348, |
|
"step": 399500 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 3.064603534033947e-05, |
|
"loss": 0.1291, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 3.0621842884514894e-05, |
|
"loss": 0.1336, |
|
"step": 400500 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 3.059765042869032e-05, |
|
"loss": 0.1316, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 3.0573457972865746e-05, |
|
"loss": 0.1268, |
|
"step": 401500 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 3.054926551704116e-05, |
|
"loss": 0.1274, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 3.052507306121659e-05, |
|
"loss": 0.1319, |
|
"step": 402500 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 3.0500880605392018e-05, |
|
"loss": 0.1353, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 3.047668814956744e-05, |
|
"loss": 0.1292, |
|
"step": 403500 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 3.0452495693742867e-05, |
|
"loss": 0.1303, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 3.0428303237918287e-05, |
|
"loss": 0.1325, |
|
"step": 404500 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 3.0404110782093713e-05, |
|
"loss": 0.1344, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 3.0379918326269135e-05, |
|
"loss": 0.1307, |
|
"step": 405500 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 3.035572587044456e-05, |
|
"loss": 0.1291, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 3.0331533414619984e-05, |
|
"loss": 0.1335, |
|
"step": 406500 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 3.030734095879541e-05, |
|
"loss": 0.1308, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 3.0283148502970837e-05, |
|
"loss": 0.1376, |
|
"step": 407500 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 3.025895604714626e-05, |
|
"loss": 0.1319, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 3.0234763591321686e-05, |
|
"loss": 0.1338, |
|
"step": 408500 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 3.0210571135497105e-05, |
|
"loss": 0.1336, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 3.0186378679672535e-05, |
|
"loss": 0.1276, |
|
"step": 409500 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 3.0162186223847954e-05, |
|
"loss": 0.1294, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 3.013799376802338e-05, |
|
"loss": 0.1343, |
|
"step": 410500 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 3.0113801312198803e-05, |
|
"loss": 0.1279, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 3.008960885637423e-05, |
|
"loss": 0.13, |
|
"step": 411500 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 3.0065416400549655e-05, |
|
"loss": 0.1281, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 3.0041223944725078e-05, |
|
"loss": 0.1324, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 3.0017031488900504e-05, |
|
"loss": 0.1305, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 2.9992839033075927e-05, |
|
"loss": 0.1256, |
|
"step": 413500 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 2.9968646577251353e-05, |
|
"loss": 0.1132, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 2.9944454121426773e-05, |
|
"loss": 0.1114, |
|
"step": 414500 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 2.9920261665602202e-05, |
|
"loss": 0.1121, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 2.989606920977762e-05, |
|
"loss": 0.1161, |
|
"step": 415500 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 2.9871876753953048e-05, |
|
"loss": 0.1144, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 2.9847684298128474e-05, |
|
"loss": 0.12, |
|
"step": 416500 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 2.9823491842303897e-05, |
|
"loss": 0.1184, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 2.9799299386479323e-05, |
|
"loss": 0.1157, |
|
"step": 417500 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 2.9775106930654746e-05, |
|
"loss": 0.1172, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 2.9750914474830172e-05, |
|
"loss": 0.1189, |
|
"step": 418500 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 2.9726722019005595e-05, |
|
"loss": 0.1153, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 2.970252956318102e-05, |
|
"loss": 0.1142, |
|
"step": 419500 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 2.967833710735644e-05, |
|
"loss": 0.12, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 2.9654144651531866e-05, |
|
"loss": 0.1131, |
|
"step": 420500 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 2.9629952195707296e-05, |
|
"loss": 0.1155, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 2.9605759739882715e-05, |
|
"loss": 0.1188, |
|
"step": 421500 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 2.958156728405814e-05, |
|
"loss": 0.1212, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 2.9557374828233564e-05, |
|
"loss": 0.1119, |
|
"step": 422500 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 2.953318237240899e-05, |
|
"loss": 0.1141, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 2.9508989916584413e-05, |
|
"loss": 0.1145, |
|
"step": 423500 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 2.948479746075984e-05, |
|
"loss": 0.1189, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 2.946060500493526e-05, |
|
"loss": 0.1207, |
|
"step": 424500 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 2.9436412549110688e-05, |
|
"loss": 0.118, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 2.9412220093286114e-05, |
|
"loss": 0.1159, |
|
"step": 425500 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 2.9388027637461534e-05, |
|
"loss": 0.1174, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 2.9363835181636963e-05, |
|
"loss": 0.117, |
|
"step": 426500 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 2.9339642725812383e-05, |
|
"loss": 0.1169, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 2.931545026998781e-05, |
|
"loss": 0.1164, |
|
"step": 427500 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 2.929125781416323e-05, |
|
"loss": 0.1158, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 2.9267065358338658e-05, |
|
"loss": 0.1138, |
|
"step": 428500 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 2.924287290251408e-05, |
|
"loss": 0.1168, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 2.9218680446689507e-05, |
|
"loss": 0.1165, |
|
"step": 429500 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 2.9194487990864933e-05, |
|
"loss": 0.1184, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 2.9170295535040352e-05, |
|
"loss": 0.12, |
|
"step": 430500 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 2.9146103079215782e-05, |
|
"loss": 0.1184, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 2.91219106233912e-05, |
|
"loss": 0.1212, |
|
"step": 431500 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 2.9097718167566627e-05, |
|
"loss": 0.1198, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 2.907352571174205e-05, |
|
"loss": 0.116, |
|
"step": 432500 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 2.9049333255917476e-05, |
|
"loss": 0.1176, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 2.90251408000929e-05, |
|
"loss": 0.1159, |
|
"step": 433500 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 2.9000948344268325e-05, |
|
"loss": 0.1193, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 2.897675588844375e-05, |
|
"loss": 0.1195, |
|
"step": 434500 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 2.8952563432619174e-05, |
|
"loss": 0.1193, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 2.89283709767946e-05, |
|
"loss": 0.1208, |
|
"step": 435500 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 2.890417852097002e-05, |
|
"loss": 0.1218, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 2.887998606514545e-05, |
|
"loss": 0.1199, |
|
"step": 436500 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 2.885579360932087e-05, |
|
"loss": 0.1258, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 2.8831601153496295e-05, |
|
"loss": 0.1168, |
|
"step": 437500 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 2.8807408697671718e-05, |
|
"loss": 0.1175, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 2.8783216241847144e-05, |
|
"loss": 0.1196, |
|
"step": 438500 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 2.875902378602257e-05, |
|
"loss": 0.1223, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 2.8734831330197993e-05, |
|
"loss": 0.1183, |
|
"step": 439500 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 2.871063887437342e-05, |
|
"loss": 0.1184, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 2.8686446418548842e-05, |
|
"loss": 0.1238, |
|
"step": 440500 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 2.8662253962724268e-05, |
|
"loss": 0.1181, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 2.8638061506899687e-05, |
|
"loss": 0.1193, |
|
"step": 441500 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 2.8613869051075113e-05, |
|
"loss": 0.1213, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 2.8589676595250536e-05, |
|
"loss": 0.1239, |
|
"step": 442500 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 2.8565484139425962e-05, |
|
"loss": 0.1176, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 2.8541291683601385e-05, |
|
"loss": 0.1174, |
|
"step": 443500 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 2.851709922777681e-05, |
|
"loss": 0.1238, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 2.8492906771952237e-05, |
|
"loss": 0.1193, |
|
"step": 444500 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 2.846871431612766e-05, |
|
"loss": 0.1232, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 2.8444521860303086e-05, |
|
"loss": 0.119, |
|
"step": 445500 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 2.8420329404478506e-05, |
|
"loss": 0.122, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 2.8396136948653935e-05, |
|
"loss": 0.1189, |
|
"step": 446500 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 2.8371944492829355e-05, |
|
"loss": 0.1238, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 2.834775203700478e-05, |
|
"loss": 0.1229, |
|
"step": 447500 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 2.8323559581180204e-05, |
|
"loss": 0.1227, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 2.829936712535563e-05, |
|
"loss": 0.1209, |
|
"step": 448500 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 2.8275174669531056e-05, |
|
"loss": 0.1174, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 2.825098221370648e-05, |
|
"loss": 0.1192, |
|
"step": 449500 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 2.8226789757881905e-05, |
|
"loss": 0.119, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 2.8202597302057328e-05, |
|
"loss": 0.1256, |
|
"step": 450500 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 2.8178404846232754e-05, |
|
"loss": 0.1287, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 2.8154212390408173e-05, |
|
"loss": 0.1241, |
|
"step": 451500 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 2.8130019934583603e-05, |
|
"loss": 0.1215, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 2.8105827478759022e-05, |
|
"loss": 0.1228, |
|
"step": 452500 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 2.808163502293445e-05, |
|
"loss": 0.1245, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 2.8057442567109875e-05, |
|
"loss": 0.1215, |
|
"step": 453500 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 2.8033250111285297e-05, |
|
"loss": 0.1212, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 2.8009057655460724e-05, |
|
"loss": 0.1233, |
|
"step": 454500 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 2.7984865199636146e-05, |
|
"loss": 0.1294, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 2.7960672743811572e-05, |
|
"loss": 0.1231, |
|
"step": 455500 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 2.7936480287986995e-05, |
|
"loss": 0.122, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 2.791228783216242e-05, |
|
"loss": 0.1205, |
|
"step": 456500 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 2.788809537633784e-05, |
|
"loss": 0.1232, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 2.7863902920513267e-05, |
|
"loss": 0.1218, |
|
"step": 457500 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 2.7839710464688697e-05, |
|
"loss": 0.1232, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 2.7815518008864116e-05, |
|
"loss": 0.1219, |
|
"step": 458500 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 2.7791325553039542e-05, |
|
"loss": 0.1186, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 2.7767133097214965e-05, |
|
"loss": 0.1259, |
|
"step": 459500 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 2.774294064139039e-05, |
|
"loss": 0.1208, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 2.7718748185565814e-05, |
|
"loss": 0.1279, |
|
"step": 460500 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 2.769455572974124e-05, |
|
"loss": 0.1222, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 2.767036327391666e-05, |
|
"loss": 0.1219, |
|
"step": 461500 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 2.764617081809209e-05, |
|
"loss": 0.125, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 2.7621978362267515e-05, |
|
"loss": 0.1249, |
|
"step": 462500 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 2.7597785906442934e-05, |
|
"loss": 0.1214, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 2.7573593450618364e-05, |
|
"loss": 0.125, |
|
"step": 463500 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 2.7549400994793783e-05, |
|
"loss": 0.1227, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 2.752520853896921e-05, |
|
"loss": 0.1219, |
|
"step": 464500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2.7501016083144632e-05, |
|
"loss": 0.1196, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 2.747682362732006e-05, |
|
"loss": 0.1074, |
|
"step": 465500 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 2.745263117149548e-05, |
|
"loss": 0.1025, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 2.7428438715670907e-05, |
|
"loss": 0.1067, |
|
"step": 466500 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 2.7404246259846334e-05, |
|
"loss": 0.1099, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 2.7380053804021753e-05, |
|
"loss": 0.104, |
|
"step": 467500 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 2.7355861348197183e-05, |
|
"loss": 0.1107, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 2.7331668892372602e-05, |
|
"loss": 0.1044, |
|
"step": 468500 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 2.7307476436548028e-05, |
|
"loss": 0.1037, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 2.728328398072345e-05, |
|
"loss": 0.1077, |
|
"step": 469500 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 2.7259091524898877e-05, |
|
"loss": 0.1056, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 2.72348990690743e-05, |
|
"loss": 0.1046, |
|
"step": 470500 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 2.7210706613249726e-05, |
|
"loss": 0.1112, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 2.7186514157425152e-05, |
|
"loss": 0.1064, |
|
"step": 471500 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 2.7162321701600575e-05, |
|
"loss": 0.1095, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 2.7138129245776e-05, |
|
"loss": 0.1057, |
|
"step": 472500 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 2.711393678995142e-05, |
|
"loss": 0.1084, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 2.708974433412685e-05, |
|
"loss": 0.1099, |
|
"step": 473500 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 2.706555187830227e-05, |
|
"loss": 0.1099, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 2.7041359422477696e-05, |
|
"loss": 0.1124, |
|
"step": 474500 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 2.701716696665312e-05, |
|
"loss": 0.1066, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 2.6992974510828545e-05, |
|
"loss": 0.1039, |
|
"step": 475500 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 2.696878205500397e-05, |
|
"loss": 0.1093, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 2.6944589599179394e-05, |
|
"loss": 0.1094, |
|
"step": 476500 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 2.692039714335482e-05, |
|
"loss": 0.1115, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 2.6896204687530242e-05, |
|
"loss": 0.1113, |
|
"step": 477500 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 2.687201223170567e-05, |
|
"loss": 0.1144, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 2.6847819775881088e-05, |
|
"loss": 0.1085, |
|
"step": 478500 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 2.6823627320056514e-05, |
|
"loss": 0.1119, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 2.6799434864231937e-05, |
|
"loss": 0.1105, |
|
"step": 479500 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 2.6775242408407363e-05, |
|
"loss": 0.1086, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 2.6751049952582786e-05, |
|
"loss": 0.1163, |
|
"step": 480500 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 2.6726857496758212e-05, |
|
"loss": 0.1153, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 2.6702665040933638e-05, |
|
"loss": 0.1105, |
|
"step": 481500 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 2.667847258510906e-05, |
|
"loss": 0.1127, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 2.6654280129284487e-05, |
|
"loss": 0.1128, |
|
"step": 482500 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 2.6630087673459907e-05, |
|
"loss": 0.1088, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 2.6605895217635336e-05, |
|
"loss": 0.11, |
|
"step": 483500 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 2.6581702761810756e-05, |
|
"loss": 0.1125, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 2.655751030598618e-05, |
|
"loss": 0.1135, |
|
"step": 484500 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 2.6533317850161604e-05, |
|
"loss": 0.1123, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 2.650912539433703e-05, |
|
"loss": 0.1137, |
|
"step": 485500 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 2.6484932938512457e-05, |
|
"loss": 0.1127, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 2.646074048268788e-05, |
|
"loss": 0.1118, |
|
"step": 486500 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 2.6436548026863306e-05, |
|
"loss": 0.1125, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 2.641235557103873e-05, |
|
"loss": 0.1143, |
|
"step": 487500 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 2.6388163115214155e-05, |
|
"loss": 0.1141, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 2.6363970659389574e-05, |
|
"loss": 0.1135, |
|
"step": 488500 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 2.6339778203565004e-05, |
|
"loss": 0.1169, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 2.6315585747740423e-05, |
|
"loss": 0.1104, |
|
"step": 489500 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 2.629139329191585e-05, |
|
"loss": 0.1134, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 2.6267200836091275e-05, |
|
"loss": 0.11, |
|
"step": 490500 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 2.6243008380266698e-05, |
|
"loss": 0.1187, |
|
"step": 491000 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 2.6218815924442124e-05, |
|
"loss": 0.1141, |
|
"step": 491500 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 2.6194623468617547e-05, |
|
"loss": 0.1096, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 2.6170431012792973e-05, |
|
"loss": 0.1098, |
|
"step": 492500 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 2.6146238556968396e-05, |
|
"loss": 0.1131, |
|
"step": 493000 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 2.6122046101143822e-05, |
|
"loss": 0.1156, |
|
"step": 493500 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 2.609785364531924e-05, |
|
"loss": 0.1134, |
|
"step": 494000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 2.6073661189494668e-05, |
|
"loss": 0.1115, |
|
"step": 494500 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 2.6049468733670097e-05, |
|
"loss": 0.1116, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 2.6025276277845517e-05, |
|
"loss": 0.1135, |
|
"step": 495500 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 2.6001083822020943e-05, |
|
"loss": 0.1128, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 2.5976891366196366e-05, |
|
"loss": 0.1132, |
|
"step": 496500 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 2.5952698910371792e-05, |
|
"loss": 0.1171, |
|
"step": 497000 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 2.5928506454547215e-05, |
|
"loss": 0.1113, |
|
"step": 497500 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 2.590431399872264e-05, |
|
"loss": 0.1164, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 2.588012154289806e-05, |
|
"loss": 0.1136, |
|
"step": 498500 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 2.585592908707349e-05, |
|
"loss": 0.1125, |
|
"step": 499000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 2.5831736631248916e-05, |
|
"loss": 0.1121, |
|
"step": 499500 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 2.5807544175424335e-05, |
|
"loss": 0.1124, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 2.5783351719599765e-05, |
|
"loss": 0.1132, |
|
"step": 500500 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 2.5759159263775184e-05, |
|
"loss": 0.1167, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 2.573496680795061e-05, |
|
"loss": 0.1157, |
|
"step": 501500 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 2.5710774352126033e-05, |
|
"loss": 0.1213, |
|
"step": 502000 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 2.568658189630146e-05, |
|
"loss": 0.1146, |
|
"step": 502500 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 2.5662389440476882e-05, |
|
"loss": 0.1135, |
|
"step": 503000 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 2.5638196984652308e-05, |
|
"loss": 0.1131, |
|
"step": 503500 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 2.5614004528827734e-05, |
|
"loss": 0.1148, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 2.5589812073003154e-05, |
|
"loss": 0.1121, |
|
"step": 504500 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 2.5565619617178583e-05, |
|
"loss": 0.1166, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 2.5541427161354003e-05, |
|
"loss": 0.1128, |
|
"step": 505500 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 2.551723470552943e-05, |
|
"loss": 0.1153, |
|
"step": 506000 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 2.549304224970485e-05, |
|
"loss": 0.1132, |
|
"step": 506500 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 2.5468849793880278e-05, |
|
"loss": 0.1137, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 2.54446573380557e-05, |
|
"loss": 0.1104, |
|
"step": 507500 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 2.5420464882231127e-05, |
|
"loss": 0.1224, |
|
"step": 508000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 2.5396272426406553e-05, |
|
"loss": 0.1156, |
|
"step": 508500 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 2.5372079970581976e-05, |
|
"loss": 0.116, |
|
"step": 509000 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 2.5347887514757402e-05, |
|
"loss": 0.1142, |
|
"step": 509500 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 2.532369505893282e-05, |
|
"loss": 0.1128, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 2.529950260310825e-05, |
|
"loss": 0.1187, |
|
"step": 510500 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 2.527531014728367e-05, |
|
"loss": 0.1133, |
|
"step": 511000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 2.5251117691459096e-05, |
|
"loss": 0.116, |
|
"step": 511500 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 2.522692523563452e-05, |
|
"loss": 0.1086, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 2.5202732779809945e-05, |
|
"loss": 0.1195, |
|
"step": 512500 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 2.517854032398537e-05, |
|
"loss": 0.116, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 2.5154347868160794e-05, |
|
"loss": 0.1148, |
|
"step": 513500 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 2.513015541233622e-05, |
|
"loss": 0.1144, |
|
"step": 514000 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 2.5105962956511643e-05, |
|
"loss": 0.1146, |
|
"step": 514500 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 2.508177050068707e-05, |
|
"loss": 0.1135, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 2.505757804486249e-05, |
|
"loss": 0.1154, |
|
"step": 515500 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 2.5033385589037915e-05, |
|
"loss": 0.1119, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2.5009193133213338e-05, |
|
"loss": 0.1152, |
|
"step": 516500 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 2.4985000677388764e-05, |
|
"loss": 0.1053, |
|
"step": 517000 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 2.496080822156419e-05, |
|
"loss": 0.1002, |
|
"step": 517500 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 2.4936615765739613e-05, |
|
"loss": 0.1003, |
|
"step": 518000 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 2.4912423309915036e-05, |
|
"loss": 0.0994, |
|
"step": 518500 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 2.4888230854090462e-05, |
|
"loss": 0.1001, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 2.4864038398265885e-05, |
|
"loss": 0.1028, |
|
"step": 519500 |
|
} |
|
], |
|
"max_steps": 1033380, |
|
"num_train_epochs": 20, |
|
"total_flos": 9.248377705093478e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|