{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.1936428262798855, "global_step": 2866000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.999888066436196e-05, "loss": 0.756, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.999775457217078e-05, "loss": 0.5828, "step": 1000 }, { "epoch": 0.0, "learning_rate": 4.999662847997959e-05, "loss": 0.5156, "step": 1500 }, { "epoch": 0.0, "learning_rate": 4.9995502387788405e-05, "loss": 0.4877, "step": 2000 }, { "epoch": 0.0, "learning_rate": 4.99943785477816e-05, "loss": 0.4557, "step": 2500 }, { "epoch": 0.0, "learning_rate": 4.9993252455590416e-05, "loss": 0.4476, "step": 3000 }, { "epoch": 0.0, "learning_rate": 4.9992126363399235e-05, "loss": 0.4251, "step": 3500 }, { "epoch": 0.0, "learning_rate": 4.999100027120805e-05, "loss": 0.4121, "step": 4000 }, { "epoch": 0.0, "learning_rate": 4.998987417901686e-05, "loss": 0.3991, "step": 4500 }, { "epoch": 0.0, "learning_rate": 4.998874808682568e-05, "loss": 0.3821, "step": 5000 }, { "epoch": 0.0, "learning_rate": 4.998762199463449e-05, "loss": 0.3764, "step": 5500 }, { "epoch": 0.0, "learning_rate": 4.99864959024433e-05, "loss": 0.3668, "step": 6000 }, { "epoch": 0.0, "learning_rate": 4.998536981025212e-05, "loss": 0.3571, "step": 6500 }, { "epoch": 0.0, "learning_rate": 4.998424371806093e-05, "loss": 0.3435, "step": 7000 }, { "epoch": 0.0, "learning_rate": 4.9983117625869744e-05, "loss": 0.3335, "step": 7500 }, { "epoch": 0.0, "learning_rate": 4.9981991533678555e-05, "loss": 0.3324, "step": 8000 }, { "epoch": 0.0, "learning_rate": 4.9980867693671755e-05, "loss": 0.3264, "step": 8500 }, { "epoch": 0.0, "learning_rate": 4.997974160148057e-05, "loss": 0.3245, "step": 9000 }, { "epoch": 0.0, "learning_rate": 4.9978615509289385e-05, "loss": 0.3185, "step": 9500 }, { "epoch": 0.0, "learning_rate": 4.9977491669282584e-05, "loss": 0.3185, "step": 10000 }, { "epoch": 0.0, "learning_rate": 4.9976365577091396e-05, "loss": 0.309, "step": 10500 }, { "epoch": 0.0, "learning_rate": 4.997523948490021e-05, "loss": 0.3016, "step": 11000 }, { "epoch": 0.0, "learning_rate": 4.997411339270903e-05, "loss": 0.295, "step": 11500 }, { "epoch": 0.0, "learning_rate": 4.997298730051784e-05, "loss": 0.3018, "step": 12000 }, { "epoch": 0.0, "learning_rate": 4.997186120832665e-05, "loss": 0.2869, "step": 12500 }, { "epoch": 0.0, "learning_rate": 4.997073511613546e-05, "loss": 0.2833, "step": 13000 }, { "epoch": 0.0, "learning_rate": 4.996960902394428e-05, "loss": 0.2818, "step": 13500 }, { "epoch": 0.0, "learning_rate": 4.99684829317531e-05, "loss": 0.2766, "step": 14000 }, { "epoch": 0.0, "learning_rate": 4.996735909174629e-05, "loss": 0.274, "step": 14500 }, { "epoch": 0.0, "learning_rate": 4.9966232999555104e-05, "loss": 0.2733, "step": 15000 }, { "epoch": 0.0, "learning_rate": 4.9965106907363916e-05, "loss": 0.2751, "step": 15500 }, { "epoch": 0.0, "learning_rate": 4.9963980815172735e-05, "loss": 0.2638, "step": 16000 }, { "epoch": 0.0, "learning_rate": 4.996285472298155e-05, "loss": 0.2693, "step": 16500 }, { "epoch": 0.0, "learning_rate": 4.996172863079036e-05, "loss": 0.2517, "step": 17000 }, { "epoch": 0.0, "learning_rate": 4.996060253859918e-05, "loss": 0.262, "step": 17500 }, { "epoch": 0.0, "learning_rate": 4.9959476446407996e-05, "loss": 0.2536, "step": 18000 }, { "epoch": 0.0, "learning_rate": 4.995835260640119e-05, "loss": 0.2653, "step": 18500 }, { "epoch": 0.0, "learning_rate": 4.995722651421001e-05, "loss": 0.2528, "step": 19000 }, { "epoch": 0.0, "learning_rate": 4.995610042201881e-05, "loss": 0.2528, "step": 19500 }, { "epoch": 0.0, "learning_rate": 4.995497432982763e-05, "loss": 0.247, "step": 20000 }, { "epoch": 0.0, "learning_rate": 4.995384823763645e-05, "loss": 0.2461, "step": 20500 }, { "epoch": 0.0, "learning_rate": 4.9952722145445254e-05, "loss": 0.2325, "step": 21000 }, { "epoch": 0.0, "learning_rate": 4.995159830543846e-05, "loss": 0.2369, "step": 21500 }, { "epoch": 0.0, "learning_rate": 4.9950472213247265e-05, "loss": 0.2393, "step": 22000 }, { "epoch": 0.0, "learning_rate": 4.9949346121056084e-05, "loss": 0.2362, "step": 22500 }, { "epoch": 0.0, "learning_rate": 4.99482200288649e-05, "loss": 0.2362, "step": 23000 }, { "epoch": 0.0, "learning_rate": 4.994709393667371e-05, "loss": 0.2281, "step": 23500 }, { "epoch": 0.0, "learning_rate": 4.9945967844482527e-05, "loss": 0.2396, "step": 24000 }, { "epoch": 0.0, "learning_rate": 4.9944841752291345e-05, "loss": 0.2237, "step": 24500 }, { "epoch": 0.0, "learning_rate": 4.994371566010016e-05, "loss": 0.2291, "step": 25000 }, { "epoch": 0.0, "learning_rate": 4.9942591820093356e-05, "loss": 0.2338, "step": 25500 }, { "epoch": 0.0, "learning_rate": 4.994146798008655e-05, "loss": 0.2216, "step": 26000 }, { "epoch": 0.0, "learning_rate": 4.994034188789537e-05, "loss": 0.222, "step": 26500 }, { "epoch": 0.0, "learning_rate": 4.993921579570417e-05, "loss": 0.2247, "step": 27000 }, { "epoch": 0.0, "learning_rate": 4.993808970351299e-05, "loss": 0.2213, "step": 27500 }, { "epoch": 0.0, "learning_rate": 4.993696361132181e-05, "loss": 0.2163, "step": 28000 }, { "epoch": 0.0, "learning_rate": 4.9935837519130615e-05, "loss": 0.2132, "step": 28500 }, { "epoch": 0.0, "learning_rate": 4.9934711426939434e-05, "loss": 0.2107, "step": 29000 }, { "epoch": 0.0, "learning_rate": 4.9933587586932626e-05, "loss": 0.211, "step": 29500 }, { "epoch": 0.0, "learning_rate": 4.9932461494741445e-05, "loss": 0.2079, "step": 30000 }, { "epoch": 0.0, "learning_rate": 4.9931335402550263e-05, "loss": 0.2124, "step": 30500 }, { "epoch": 0.0, "learning_rate": 4.993020931035907e-05, "loss": 0.2118, "step": 31000 }, { "epoch": 0.0, "learning_rate": 4.992908321816789e-05, "loss": 0.2063, "step": 31500 }, { "epoch": 0.0, "learning_rate": 4.9927957125976706e-05, "loss": 0.2098, "step": 32000 }, { "epoch": 0.0, "learning_rate": 4.992683103378552e-05, "loss": 0.2053, "step": 32500 }, { "epoch": 0.0, "learning_rate": 4.992570719377872e-05, "loss": 0.2061, "step": 33000 }, { "epoch": 0.0, "learning_rate": 4.992458110158752e-05, "loss": 0.2012, "step": 33500 }, { "epoch": 0.0, "learning_rate": 4.992345500939634e-05, "loss": 0.2011, "step": 34000 }, { "epoch": 0.0, "learning_rate": 4.992232891720516e-05, "loss": 0.2021, "step": 34500 }, { "epoch": 0.0, "learning_rate": 4.992120282501397e-05, "loss": 0.1937, "step": 35000 }, { "epoch": 0.0, "learning_rate": 4.992007673282278e-05, "loss": 0.1961, "step": 35500 }, { "epoch": 0.0, "learning_rate": 4.9918950640631595e-05, "loss": 0.1933, "step": 36000 }, { "epoch": 0.0, "learning_rate": 4.9917824548440414e-05, "loss": 0.1988, "step": 36500 }, { "epoch": 0.0, "learning_rate": 4.991670070843361e-05, "loss": 0.2005, "step": 37000 }, { "epoch": 0.01, "learning_rate": 4.9915574616242425e-05, "loss": 0.1934, "step": 37500 }, { "epoch": 0.01, "learning_rate": 4.991444852405124e-05, "loss": 0.1951, "step": 38000 }, { "epoch": 0.01, "learning_rate": 4.991332243186005e-05, "loss": 0.1977, "step": 38500 }, { "epoch": 0.01, "learning_rate": 4.991219633966887e-05, "loss": 0.1907, "step": 39000 }, { "epoch": 0.01, "learning_rate": 4.991107024747768e-05, "loss": 0.1908, "step": 39500 }, { "epoch": 0.01, "learning_rate": 4.990994640747088e-05, "loss": 0.1869, "step": 40000 }, { "epoch": 0.01, "learning_rate": 4.990882031527969e-05, "loss": 0.1915, "step": 40500 }, { "epoch": 0.01, "learning_rate": 4.99076942230885e-05, "loss": 0.1879, "step": 41000 }, { "epoch": 0.01, "learning_rate": 4.990656813089732e-05, "loss": 0.1932, "step": 41500 }, { "epoch": 0.01, "learning_rate": 4.990544429089051e-05, "loss": 0.1841, "step": 42000 }, { "epoch": 0.01, "learning_rate": 4.990431819869933e-05, "loss": 0.1907, "step": 42500 }, { "epoch": 0.01, "learning_rate": 4.9903192106508144e-05, "loss": 0.1824, "step": 43000 }, { "epoch": 0.01, "learning_rate": 4.9902066014316956e-05, "loss": 0.1859, "step": 43500 }, { "epoch": 0.01, "learning_rate": 4.9900942174310155e-05, "loss": 0.1803, "step": 44000 }, { "epoch": 0.01, "learning_rate": 4.989981608211897e-05, "loss": 0.182, "step": 44500 }, { "epoch": 0.01, "learning_rate": 4.9898689989927785e-05, "loss": 0.1848, "step": 45000 }, { "epoch": 0.01, "learning_rate": 4.98975638977366e-05, "loss": 0.1904, "step": 45500 }, { "epoch": 0.01, "learning_rate": 4.989644005772979e-05, "loss": 0.1777, "step": 46000 }, { "epoch": 0.01, "learning_rate": 4.989531396553861e-05, "loss": 0.1794, "step": 46500 }, { "epoch": 0.01, "learning_rate": 4.989418787334742e-05, "loss": 0.1775, "step": 47000 }, { "epoch": 0.01, "learning_rate": 4.989306178115624e-05, "loss": 0.1762, "step": 47500 }, { "epoch": 0.01, "learning_rate": 4.989193568896505e-05, "loss": 0.1771, "step": 48000 }, { "epoch": 0.01, "learning_rate": 4.989081184895824e-05, "loss": 0.1713, "step": 48500 }, { "epoch": 0.01, "learning_rate": 4.988968575676706e-05, "loss": 0.1772, "step": 49000 }, { "epoch": 0.01, "learning_rate": 4.9888559664575874e-05, "loss": 0.1768, "step": 49500 }, { "epoch": 0.01, "learning_rate": 4.988743357238469e-05, "loss": 0.1814, "step": 50000 }, { "epoch": 0.01, "learning_rate": 4.9886307480193504e-05, "loss": 0.1718, "step": 50500 }, { "epoch": 0.01, "learning_rate": 4.98851836401867e-05, "loss": 0.1728, "step": 51000 }, { "epoch": 0.01, "learning_rate": 4.9884057547995515e-05, "loss": 0.1694, "step": 51500 }, { "epoch": 0.01, "learning_rate": 4.988293145580433e-05, "loss": 0.174, "step": 52000 }, { "epoch": 0.01, "learning_rate": 4.9881805363613146e-05, "loss": 0.1688, "step": 52500 }, { "epoch": 0.01, "learning_rate": 4.988067927142196e-05, "loss": 0.1727, "step": 53000 }, { "epoch": 0.01, "learning_rate": 4.987955543141515e-05, "loss": 0.1694, "step": 53500 }, { "epoch": 0.01, "learning_rate": 4.987842933922397e-05, "loss": 0.1713, "step": 54000 }, { "epoch": 0.01, "learning_rate": 4.987730324703278e-05, "loss": 0.1649, "step": 54500 }, { "epoch": 0.01, "learning_rate": 4.98761771548416e-05, "loss": 0.1676, "step": 55000 }, { "epoch": 0.01, "learning_rate": 4.98750533148348e-05, "loss": 0.168, "step": 55500 }, { "epoch": 0.01, "learning_rate": 4.987392722264361e-05, "loss": 0.1678, "step": 56000 }, { "epoch": 0.01, "learning_rate": 4.987280113045242e-05, "loss": 0.1689, "step": 56500 }, { "epoch": 0.01, "learning_rate": 4.9871675038261234e-05, "loss": 0.1677, "step": 57000 }, { "epoch": 0.01, "learning_rate": 4.987054894607005e-05, "loss": 0.1668, "step": 57500 }, { "epoch": 0.01, "learning_rate": 4.986942510606325e-05, "loss": 0.163, "step": 58000 }, { "epoch": 0.01, "learning_rate": 4.9868299013872064e-05, "loss": 0.1633, "step": 58500 }, { "epoch": 0.01, "learning_rate": 4.9867172921680876e-05, "loss": 0.1689, "step": 59000 }, { "epoch": 0.01, "learning_rate": 4.986604682948969e-05, "loss": 0.17, "step": 59500 }, { "epoch": 0.01, "learning_rate": 4.986492298948289e-05, "loss": 0.1669, "step": 60000 }, { "epoch": 0.01, "learning_rate": 4.9863796897291706e-05, "loss": 0.1659, "step": 60500 }, { "epoch": 0.01, "learning_rate": 4.986267080510052e-05, "loss": 0.1655, "step": 61000 }, { "epoch": 0.01, "learning_rate": 4.986154471290933e-05, "loss": 0.1613, "step": 61500 }, { "epoch": 0.01, "learning_rate": 4.986042087290252e-05, "loss": 0.1617, "step": 62000 }, { "epoch": 0.01, "learning_rate": 4.985929478071134e-05, "loss": 0.166, "step": 62500 }, { "epoch": 0.01, "learning_rate": 4.985816868852016e-05, "loss": 0.161, "step": 63000 }, { "epoch": 0.01, "learning_rate": 4.985704259632897e-05, "loss": 0.1597, "step": 63500 }, { "epoch": 0.01, "learning_rate": 4.985591875632217e-05, "loss": 0.1653, "step": 64000 }, { "epoch": 0.01, "learning_rate": 4.9854792664130975e-05, "loss": 0.1587, "step": 64500 }, { "epoch": 0.01, "learning_rate": 4.9853666571939794e-05, "loss": 0.1522, "step": 65000 }, { "epoch": 0.01, "learning_rate": 4.9852542731932987e-05, "loss": 0.1535, "step": 65500 }, { "epoch": 0.01, "learning_rate": 4.9851416639741805e-05, "loss": 0.1497, "step": 66000 }, { "epoch": 0.01, "learning_rate": 4.9850290547550624e-05, "loss": 0.1567, "step": 66500 }, { "epoch": 0.01, "learning_rate": 4.984916445535943e-05, "loss": 0.1633, "step": 67000 }, { "epoch": 0.01, "learning_rate": 4.984803836316825e-05, "loss": 0.1541, "step": 67500 }, { "epoch": 0.01, "learning_rate": 4.9846912270977066e-05, "loss": 0.16, "step": 68000 }, { "epoch": 0.01, "learning_rate": 4.984578617878588e-05, "loss": 0.1508, "step": 68500 }, { "epoch": 0.01, "learning_rate": 4.984466008659469e-05, "loss": 0.1543, "step": 69000 }, { "epoch": 0.01, "learning_rate": 4.98435339944035e-05, "loss": 0.1525, "step": 69500 }, { "epoch": 0.01, "learning_rate": 4.98424101543967e-05, "loss": 0.1541, "step": 70000 }, { "epoch": 0.01, "learning_rate": 4.984128406220552e-05, "loss": 0.1503, "step": 70500 }, { "epoch": 0.01, "learning_rate": 4.984015797001433e-05, "loss": 0.1582, "step": 71000 }, { "epoch": 0.01, "learning_rate": 4.9839031877823144e-05, "loss": 0.1554, "step": 71500 }, { "epoch": 0.01, "learning_rate": 4.9837908037816336e-05, "loss": 0.1475, "step": 72000 }, { "epoch": 0.01, "learning_rate": 4.9836781945625155e-05, "loss": 0.1563, "step": 72500 }, { "epoch": 0.01, "learning_rate": 4.983565585343397e-05, "loss": 0.1549, "step": 73000 }, { "epoch": 0.01, "learning_rate": 4.9834529761242785e-05, "loss": 0.1493, "step": 73500 }, { "epoch": 0.01, "learning_rate": 4.98334036690516e-05, "loss": 0.1475, "step": 74000 }, { "epoch": 0.01, "learning_rate": 4.983227982904479e-05, "loss": 0.1452, "step": 74500 }, { "epoch": 0.01, "learning_rate": 4.983115373685361e-05, "loss": 0.1512, "step": 75000 }, { "epoch": 0.01, "learning_rate": 4.98300298968468e-05, "loss": 0.1502, "step": 75500 }, { "epoch": 0.01, "learning_rate": 4.982890380465562e-05, "loss": 0.145, "step": 76000 }, { "epoch": 0.01, "learning_rate": 4.982777771246444e-05, "loss": 0.15, "step": 76500 }, { "epoch": 0.01, "learning_rate": 4.982665162027324e-05, "loss": 0.147, "step": 77000 }, { "epoch": 0.01, "learning_rate": 4.982552552808206e-05, "loss": 0.1498, "step": 77500 }, { "epoch": 0.01, "learning_rate": 4.982439943589088e-05, "loss": 0.1559, "step": 78000 }, { "epoch": 0.01, "learning_rate": 4.982327334369969e-05, "loss": 0.1489, "step": 78500 }, { "epoch": 0.01, "learning_rate": 4.9822147251508504e-05, "loss": 0.1481, "step": 79000 }, { "epoch": 0.01, "learning_rate": 4.9821021159317316e-05, "loss": 0.1487, "step": 79500 }, { "epoch": 0.01, "learning_rate": 4.9819895067126135e-05, "loss": 0.1435, "step": 80000 }, { "epoch": 0.01, "learning_rate": 4.981877347930371e-05, "loss": 0.1465, "step": 80500 }, { "epoch": 0.01, "learning_rate": 4.9817647387112526e-05, "loss": 0.1434, "step": 81000 }, { "epoch": 0.01, "learning_rate": 4.9816521294921345e-05, "loss": 0.1485, "step": 81500 }, { "epoch": 0.01, "learning_rate": 4.981539520273015e-05, "loss": 0.1507, "step": 82000 }, { "epoch": 0.01, "learning_rate": 4.981426911053897e-05, "loss": 0.1449, "step": 82500 }, { "epoch": 0.01, "learning_rate": 4.981314301834779e-05, "loss": 0.1412, "step": 83000 }, { "epoch": 0.01, "learning_rate": 4.98120169261566e-05, "loss": 0.1378, "step": 83500 }, { "epoch": 0.01, "learning_rate": 4.981089083396541e-05, "loss": 0.1412, "step": 84000 }, { "epoch": 0.01, "learning_rate": 4.9809766993958604e-05, "loss": 0.1419, "step": 84500 }, { "epoch": 0.01, "learning_rate": 4.980864090176742e-05, "loss": 0.1461, "step": 85000 }, { "epoch": 0.01, "learning_rate": 4.980751480957624e-05, "loss": 0.1408, "step": 85500 }, { "epoch": 0.01, "learning_rate": 4.980638871738505e-05, "loss": 0.1394, "step": 86000 }, { "epoch": 0.01, "learning_rate": 4.9805262625193865e-05, "loss": 0.1417, "step": 86500 }, { "epoch": 0.01, "learning_rate": 4.980413878518706e-05, "loss": 0.137, "step": 87000 }, { "epoch": 0.01, "learning_rate": 4.9803012692995876e-05, "loss": 0.1351, "step": 87500 }, { "epoch": 0.01, "learning_rate": 4.9801886600804695e-05, "loss": 0.1411, "step": 88000 }, { "epoch": 0.01, "learning_rate": 4.9800760508613506e-05, "loss": 0.1397, "step": 88500 }, { "epoch": 0.01, "learning_rate": 4.9799636668606706e-05, "loss": 0.1346, "step": 89000 }, { "epoch": 0.01, "learning_rate": 4.979851057641551e-05, "loss": 0.1451, "step": 89500 }, { "epoch": 0.01, "learning_rate": 4.979738448422433e-05, "loss": 0.1356, "step": 90000 }, { "epoch": 0.01, "learning_rate": 4.979625839203315e-05, "loss": 0.1439, "step": 90500 }, { "epoch": 0.01, "learning_rate": 4.979513229984196e-05, "loss": 0.1408, "step": 91000 }, { "epoch": 0.01, "learning_rate": 4.979400845983516e-05, "loss": 0.134, "step": 91500 }, { "epoch": 0.01, "learning_rate": 4.9792882367643964e-05, "loss": 0.1357, "step": 92000 }, { "epoch": 0.01, "learning_rate": 4.979175627545278e-05, "loss": 0.1372, "step": 92500 }, { "epoch": 0.01, "learning_rate": 4.97906301832616e-05, "loss": 0.1371, "step": 93000 }, { "epoch": 0.01, "learning_rate": 4.9789506343254794e-05, "loss": 0.1348, "step": 93500 }, { "epoch": 0.01, "learning_rate": 4.978838025106361e-05, "loss": 0.1381, "step": 94000 }, { "epoch": 0.01, "learning_rate": 4.978725415887242e-05, "loss": 0.1383, "step": 94500 }, { "epoch": 0.01, "learning_rate": 4.9786128066681236e-05, "loss": 0.1402, "step": 95000 }, { "epoch": 0.01, "learning_rate": 4.9785001974490055e-05, "loss": 0.1374, "step": 95500 }, { "epoch": 0.01, "learning_rate": 4.978387813448325e-05, "loss": 0.1407, "step": 96000 }, { "epoch": 0.01, "learning_rate": 4.9782752042292066e-05, "loss": 0.1372, "step": 96500 }, { "epoch": 0.01, "learning_rate": 4.978162595010087e-05, "loss": 0.1345, "step": 97000 }, { "epoch": 0.01, "learning_rate": 4.978049985790969e-05, "loss": 0.1376, "step": 97500 }, { "epoch": 0.01, "learning_rate": 4.977937601790288e-05, "loss": 0.1299, "step": 98000 }, { "epoch": 0.01, "learning_rate": 4.97782499257117e-05, "loss": 0.1379, "step": 98500 }, { "epoch": 0.01, "learning_rate": 4.977712383352052e-05, "loss": 0.1348, "step": 99000 }, { "epoch": 0.01, "learning_rate": 4.9775997741329325e-05, "loss": 0.1281, "step": 99500 }, { "epoch": 0.01, "learning_rate": 4.9774873901322524e-05, "loss": 0.1385, "step": 100000 }, { "epoch": 0.01, "learning_rate": 4.9773747809131336e-05, "loss": 0.1279, "step": 100500 }, { "epoch": 0.01, "learning_rate": 4.9772621716940155e-05, "loss": 0.1397, "step": 101000 }, { "epoch": 0.01, "learning_rate": 4.977149562474897e-05, "loss": 0.1354, "step": 101500 }, { "epoch": 0.01, "learning_rate": 4.977036953255778e-05, "loss": 0.1332, "step": 102000 }, { "epoch": 0.01, "learning_rate": 4.976924569255098e-05, "loss": 0.1341, "step": 102500 }, { "epoch": 0.01, "learning_rate": 4.976811960035979e-05, "loss": 0.1332, "step": 103000 }, { "epoch": 0.01, "learning_rate": 4.976699350816861e-05, "loss": 0.1301, "step": 103500 }, { "epoch": 0.01, "learning_rate": 4.976586741597743e-05, "loss": 0.1277, "step": 104000 }, { "epoch": 0.01, "learning_rate": 4.976474357597062e-05, "loss": 0.1254, "step": 104500 }, { "epoch": 0.01, "learning_rate": 4.976361748377943e-05, "loss": 0.1286, "step": 105000 }, { "epoch": 0.01, "learning_rate": 4.976249139158824e-05, "loss": 0.1289, "step": 105500 }, { "epoch": 0.01, "learning_rate": 4.976136529939706e-05, "loss": 0.1234, "step": 106000 }, { "epoch": 0.01, "learning_rate": 4.976023920720588e-05, "loss": 0.1245, "step": 106500 }, { "epoch": 0.01, "learning_rate": 4.975911536719907e-05, "loss": 0.1297, "step": 107000 }, { "epoch": 0.01, "learning_rate": 4.9757989275007885e-05, "loss": 0.1299, "step": 107500 }, { "epoch": 0.01, "learning_rate": 4.9756863182816697e-05, "loss": 0.135, "step": 108000 }, { "epoch": 0.01, "learning_rate": 4.9755737090625515e-05, "loss": 0.1283, "step": 108500 }, { "epoch": 0.01, "learning_rate": 4.975461325061871e-05, "loss": 0.1271, "step": 109000 }, { "epoch": 0.01, "learning_rate": 4.9753487158427526e-05, "loss": 0.1352, "step": 109500 }, { "epoch": 0.01, "learning_rate": 4.975236106623634e-05, "loss": 0.1266, "step": 110000 }, { "epoch": 0.01, "learning_rate": 4.975123497404515e-05, "loss": 0.1286, "step": 110500 }, { "epoch": 0.01, "learning_rate": 4.975011113403835e-05, "loss": 0.1268, "step": 111000 }, { "epoch": 0.02, "learning_rate": 4.974898504184716e-05, "loss": 0.1233, "step": 111500 }, { "epoch": 0.02, "learning_rate": 4.974785894965598e-05, "loss": 0.1324, "step": 112000 }, { "epoch": 0.02, "learning_rate": 4.974673285746479e-05, "loss": 0.1278, "step": 112500 }, { "epoch": 0.02, "learning_rate": 4.974560901745799e-05, "loss": 0.1288, "step": 113000 }, { "epoch": 0.02, "learning_rate": 4.97444829252668e-05, "loss": 0.1255, "step": 113500 }, { "epoch": 0.02, "learning_rate": 4.9743356833075615e-05, "loss": 0.1255, "step": 114000 }, { "epoch": 0.02, "learning_rate": 4.974223074088443e-05, "loss": 0.1252, "step": 114500 }, { "epoch": 0.02, "learning_rate": 4.9741104648693245e-05, "loss": 0.1272, "step": 115000 }, { "epoch": 0.02, "learning_rate": 4.973997855650206e-05, "loss": 0.1258, "step": 115500 }, { "epoch": 0.02, "learning_rate": 4.9738852464310876e-05, "loss": 0.1279, "step": 116000 }, { "epoch": 0.02, "learning_rate": 4.9737726372119694e-05, "loss": 0.127, "step": 116500 }, { "epoch": 0.02, "learning_rate": 4.973660253211289e-05, "loss": 0.1257, "step": 117000 }, { "epoch": 0.02, "learning_rate": 4.97354764399217e-05, "loss": 0.1282, "step": 117500 }, { "epoch": 0.02, "learning_rate": 4.973435034773051e-05, "loss": 0.1248, "step": 118000 }, { "epoch": 0.02, "learning_rate": 4.973322425553933e-05, "loss": 0.1267, "step": 118500 }, { "epoch": 0.02, "learning_rate": 4.973210041553252e-05, "loss": 0.1261, "step": 119000 }, { "epoch": 0.02, "learning_rate": 4.973097432334134e-05, "loss": 0.1304, "step": 119500 }, { "epoch": 0.02, "learning_rate": 4.972984823115015e-05, "loss": 0.1219, "step": 120000 }, { "epoch": 0.02, "learning_rate": 4.9728722138958964e-05, "loss": 0.1274, "step": 120500 }, { "epoch": 0.02, "learning_rate": 4.972759604676778e-05, "loss": 0.1211, "step": 121000 }, { "epoch": 0.02, "learning_rate": 4.9726472206760975e-05, "loss": 0.127, "step": 121500 }, { "epoch": 0.02, "learning_rate": 4.9725346114569794e-05, "loss": 0.1215, "step": 122000 }, { "epoch": 0.02, "learning_rate": 4.972422002237861e-05, "loss": 0.1235, "step": 122500 }, { "epoch": 0.02, "learning_rate": 4.972309393018742e-05, "loss": 0.1211, "step": 123000 }, { "epoch": 0.02, "learning_rate": 4.972197009018062e-05, "loss": 0.122, "step": 123500 }, { "epoch": 0.02, "learning_rate": 4.972084399798943e-05, "loss": 0.1221, "step": 124000 }, { "epoch": 0.02, "learning_rate": 4.971971790579825e-05, "loss": 0.1176, "step": 124500 }, { "epoch": 0.02, "learning_rate": 4.9718591813607066e-05, "loss": 0.1217, "step": 125000 }, { "epoch": 0.02, "learning_rate": 4.971746797360026e-05, "loss": 0.12, "step": 125500 }, { "epoch": 0.02, "learning_rate": 4.971634188140907e-05, "loss": 0.1191, "step": 126000 }, { "epoch": 0.02, "learning_rate": 4.971521578921788e-05, "loss": 0.1198, "step": 126500 }, { "epoch": 0.02, "learning_rate": 4.97140896970267e-05, "loss": 0.1233, "step": 127000 }, { "epoch": 0.02, "learning_rate": 4.971296360483552e-05, "loss": 0.1184, "step": 127500 }, { "epoch": 0.02, "learning_rate": 4.971183976482871e-05, "loss": 0.1247, "step": 128000 }, { "epoch": 0.02, "learning_rate": 4.9710713672637524e-05, "loss": 0.1266, "step": 128500 }, { "epoch": 0.02, "learning_rate": 4.9709587580446336e-05, "loss": 0.1163, "step": 129000 }, { "epoch": 0.02, "learning_rate": 4.9708461488255155e-05, "loss": 0.1223, "step": 129500 }, { "epoch": 0.02, "learning_rate": 4.970733764824835e-05, "loss": 0.1207, "step": 130000 }, { "epoch": 0.02, "learning_rate": 4.9706211556057166e-05, "loss": 0.1265, "step": 130500 }, { "epoch": 0.02, "learning_rate": 4.970508546386598e-05, "loss": 0.1213, "step": 131000 }, { "epoch": 0.02, "learning_rate": 4.970395937167479e-05, "loss": 0.1197, "step": 131500 }, { "epoch": 0.02, "learning_rate": 4.970283553166799e-05, "loss": 0.1187, "step": 132000 }, { "epoch": 0.02, "learning_rate": 4.97017094394768e-05, "loss": 0.1182, "step": 132500 }, { "epoch": 0.02, "learning_rate": 4.970058334728562e-05, "loss": 0.1264, "step": 133000 }, { "epoch": 0.02, "learning_rate": 4.969945725509443e-05, "loss": 0.1222, "step": 133500 }, { "epoch": 0.02, "learning_rate": 4.969833116290324e-05, "loss": 0.1199, "step": 134000 }, { "epoch": 0.02, "learning_rate": 4.969720732289644e-05, "loss": 0.1249, "step": 134500 }, { "epoch": 0.02, "learning_rate": 4.9696081230705254e-05, "loss": 0.1169, "step": 135000 }, { "epoch": 0.02, "learning_rate": 4.969495513851407e-05, "loss": 0.1224, "step": 135500 }, { "epoch": 0.02, "learning_rate": 4.9693829046322885e-05, "loss": 0.1155, "step": 136000 }, { "epoch": 0.02, "learning_rate": 4.9692707458500464e-05, "loss": 0.113, "step": 136500 }, { "epoch": 0.02, "learning_rate": 4.969158136630928e-05, "loss": 0.1205, "step": 137000 }, { "epoch": 0.02, "learning_rate": 4.9690455274118095e-05, "loss": 0.1172, "step": 137500 }, { "epoch": 0.02, "learning_rate": 4.968932918192691e-05, "loss": 0.1163, "step": 138000 }, { "epoch": 0.02, "learning_rate": 4.9688203089735725e-05, "loss": 0.1188, "step": 138500 }, { "epoch": 0.02, "learning_rate": 4.968707699754454e-05, "loss": 0.1249, "step": 139000 }, { "epoch": 0.02, "learning_rate": 4.968595090535335e-05, "loss": 0.1135, "step": 139500 }, { "epoch": 0.02, "learning_rate": 4.968482481316216e-05, "loss": 0.1178, "step": 140000 }, { "epoch": 0.02, "learning_rate": 4.968370097315536e-05, "loss": 0.1206, "step": 140500 }, { "epoch": 0.02, "learning_rate": 4.968257488096418e-05, "loss": 0.1149, "step": 141000 }, { "epoch": 0.02, "learning_rate": 4.968144878877299e-05, "loss": 0.1184, "step": 141500 }, { "epoch": 0.02, "learning_rate": 4.96803226965818e-05, "loss": 0.1203, "step": 142000 }, { "epoch": 0.02, "learning_rate": 4.9679198856575e-05, "loss": 0.1138, "step": 142500 }, { "epoch": 0.02, "learning_rate": 4.9678072764383814e-05, "loss": 0.1191, "step": 143000 }, { "epoch": 0.02, "learning_rate": 4.967694667219263e-05, "loss": 0.1145, "step": 143500 }, { "epoch": 0.02, "learning_rate": 4.9675820580001444e-05, "loss": 0.1159, "step": 144000 }, { "epoch": 0.02, "learning_rate": 4.9674694487810256e-05, "loss": 0.1154, "step": 144500 }, { "epoch": 0.02, "learning_rate": 4.9673570647803455e-05, "loss": 0.1128, "step": 145000 }, { "epoch": 0.02, "learning_rate": 4.967244455561227e-05, "loss": 0.113, "step": 145500 }, { "epoch": 0.02, "learning_rate": 4.9671318463421086e-05, "loss": 0.1142, "step": 146000 }, { "epoch": 0.02, "learning_rate": 4.96701923712299e-05, "loss": 0.1136, "step": 146500 }, { "epoch": 0.02, "learning_rate": 4.96690685312231e-05, "loss": 0.1136, "step": 147000 }, { "epoch": 0.02, "learning_rate": 4.966794243903191e-05, "loss": 0.1108, "step": 147500 }, { "epoch": 0.02, "learning_rate": 4.966681634684072e-05, "loss": 0.1148, "step": 148000 }, { "epoch": 0.02, "learning_rate": 4.966569025464954e-05, "loss": 0.1134, "step": 148500 }, { "epoch": 0.02, "learning_rate": 4.966456641464273e-05, "loss": 0.1136, "step": 149000 }, { "epoch": 0.02, "learning_rate": 4.966344032245155e-05, "loss": 0.1123, "step": 149500 }, { "epoch": 0.02, "learning_rate": 4.966231423026036e-05, "loss": 0.1148, "step": 150000 }, { "epoch": 0.02, "learning_rate": 4.9661188138069174e-05, "loss": 0.1085, "step": 150500 }, { "epoch": 0.02, "learning_rate": 4.966006204587799e-05, "loss": 0.1076, "step": 151000 }, { "epoch": 0.02, "learning_rate": 4.9658938205871186e-05, "loss": 0.1104, "step": 151500 }, { "epoch": 0.02, "learning_rate": 4.9657812113680004e-05, "loss": 0.1154, "step": 152000 }, { "epoch": 0.02, "learning_rate": 4.9656686021488816e-05, "loss": 0.1123, "step": 152500 }, { "epoch": 0.02, "learning_rate": 4.965555992929763e-05, "loss": 0.1098, "step": 153000 }, { "epoch": 0.02, "learning_rate": 4.965443608929082e-05, "loss": 0.1103, "step": 153500 }, { "epoch": 0.02, "learning_rate": 4.965330999709964e-05, "loss": 0.114, "step": 154000 }, { "epoch": 0.02, "learning_rate": 4.965218390490846e-05, "loss": 0.1114, "step": 154500 }, { "epoch": 0.02, "learning_rate": 4.965105781271727e-05, "loss": 0.1137, "step": 155000 }, { "epoch": 0.02, "learning_rate": 4.964993172052608e-05, "loss": 0.1119, "step": 155500 }, { "epoch": 0.02, "learning_rate": 4.9648807880519274e-05, "loss": 0.1185, "step": 156000 }, { "epoch": 0.02, "learning_rate": 4.964768178832809e-05, "loss": 0.1133, "step": 156500 }, { "epoch": 0.02, "learning_rate": 4.964655569613691e-05, "loss": 0.1103, "step": 157000 }, { "epoch": 0.02, "learning_rate": 4.964542960394572e-05, "loss": 0.1108, "step": 157500 }, { "epoch": 0.02, "learning_rate": 4.9644303511754535e-05, "loss": 0.1138, "step": 158000 }, { "epoch": 0.02, "learning_rate": 4.964317967174773e-05, "loss": 0.1143, "step": 158500 }, { "epoch": 0.02, "learning_rate": 4.9642053579556546e-05, "loss": 0.109, "step": 159000 }, { "epoch": 0.02, "learning_rate": 4.9640927487365365e-05, "loss": 0.1112, "step": 159500 }, { "epoch": 0.02, "learning_rate": 4.963980139517418e-05, "loss": 0.113, "step": 160000 }, { "epoch": 0.02, "learning_rate": 4.9638677555167376e-05, "loss": 0.1123, "step": 160500 }, { "epoch": 0.02, "learning_rate": 4.963755146297618e-05, "loss": 0.1113, "step": 161000 }, { "epoch": 0.02, "learning_rate": 4.9636425370785e-05, "loss": 0.1092, "step": 161500 }, { "epoch": 0.02, "learning_rate": 4.963529927859382e-05, "loss": 0.1088, "step": 162000 }, { "epoch": 0.02, "learning_rate": 4.963417318640263e-05, "loss": 0.1072, "step": 162500 }, { "epoch": 0.02, "learning_rate": 4.963304934639583e-05, "loss": 0.1099, "step": 163000 }, { "epoch": 0.02, "learning_rate": 4.9631923254204634e-05, "loss": 0.1141, "step": 163500 }, { "epoch": 0.02, "learning_rate": 4.963079716201345e-05, "loss": 0.1093, "step": 164000 }, { "epoch": 0.02, "learning_rate": 4.962967106982227e-05, "loss": 0.11, "step": 164500 }, { "epoch": 0.02, "learning_rate": 4.9628547229815464e-05, "loss": 0.1072, "step": 165000 }, { "epoch": 0.02, "learning_rate": 4.962742113762428e-05, "loss": 0.1084, "step": 165500 }, { "epoch": 0.02, "learning_rate": 4.962629504543309e-05, "loss": 0.107, "step": 166000 }, { "epoch": 0.01, "learning_rate": 4.9812584463958124e-05, "loss": 0.1021, "step": 166500 }, { "epoch": 0.01, "learning_rate": 4.9812023670009024e-05, "loss": 0.1008, "step": 167000 }, { "epoch": 0.01, "learning_rate": 4.981146062387539e-05, "loss": 0.1017, "step": 167500 }, { "epoch": 0.01, "learning_rate": 4.981089757774175e-05, "loss": 0.1029, "step": 168000 }, { "epoch": 0.01, "learning_rate": 4.9810335657700384e-05, "loss": 0.1074, "step": 168500 }, { "epoch": 0.01, "learning_rate": 4.9809772611566755e-05, "loss": 0.1038, "step": 169000 }, { "epoch": 0.01, "learning_rate": 4.980920956543311e-05, "loss": 0.1041, "step": 169500 }, { "epoch": 0.01, "learning_rate": 4.9808646519299476e-05, "loss": 0.1053, "step": 170000 }, { "epoch": 0.01, "learning_rate": 4.980808347316585e-05, "loss": 0.1041, "step": 170500 }, { "epoch": 0.01, "learning_rate": 4.9807520427032204e-05, "loss": 0.1048, "step": 171000 }, { "epoch": 0.01, "learning_rate": 4.9806957380898575e-05, "loss": 0.1035, "step": 171500 }, { "epoch": 0.01, "learning_rate": 4.980639433476494e-05, "loss": 0.1062, "step": 172000 }, { "epoch": 0.01, "learning_rate": 4.9805831288631296e-05, "loss": 0.1051, "step": 172500 }, { "epoch": 0.01, "learning_rate": 4.980526824249767e-05, "loss": 0.1049, "step": 173000 }, { "epoch": 0.01, "learning_rate": 4.98047063224563e-05, "loss": 0.1005, "step": 173500 }, { "epoch": 0.01, "learning_rate": 4.980414327632266e-05, "loss": 0.1039, "step": 174000 }, { "epoch": 0.01, "learning_rate": 4.980358023018903e-05, "loss": 0.0972, "step": 174500 }, { "epoch": 0.01, "learning_rate": 4.980301718405539e-05, "loss": 0.1027, "step": 175000 }, { "epoch": 0.01, "learning_rate": 4.9802455264014023e-05, "loss": 0.1074, "step": 175500 }, { "epoch": 0.01, "learning_rate": 4.980189221788039e-05, "loss": 0.1062, "step": 176000 }, { "epoch": 0.01, "learning_rate": 4.980132917174675e-05, "loss": 0.1061, "step": 176500 }, { "epoch": 0.01, "learning_rate": 4.9800766125613115e-05, "loss": 0.1028, "step": 177000 }, { "epoch": 0.01, "learning_rate": 4.980020307947948e-05, "loss": 0.0959, "step": 177500 }, { "epoch": 0.01, "learning_rate": 4.9799640033345843e-05, "loss": 0.1026, "step": 178000 }, { "epoch": 0.01, "learning_rate": 4.9799076987212214e-05, "loss": 0.1132, "step": 178500 }, { "epoch": 0.01, "learning_rate": 4.979851394107858e-05, "loss": 0.1053, "step": 179000 }, { "epoch": 0.01, "learning_rate": 4.979795202103721e-05, "loss": 0.0977, "step": 179500 }, { "epoch": 0.01, "learning_rate": 4.9797388974903574e-05, "loss": 0.1005, "step": 180000 }, { "epoch": 0.01, "learning_rate": 4.979682592876994e-05, "loss": 0.107, "step": 180500 }, { "epoch": 0.01, "learning_rate": 4.97962628826363e-05, "loss": 0.1097, "step": 181000 }, { "epoch": 0.01, "learning_rate": 4.9795699836502666e-05, "loss": 0.1038, "step": 181500 }, { "epoch": 0.01, "learning_rate": 4.97951379164613e-05, "loss": 0.1107, "step": 182000 }, { "epoch": 0.01, "learning_rate": 4.979457487032766e-05, "loss": 0.0993, "step": 182500 }, { "epoch": 0.01, "learning_rate": 4.979401182419403e-05, "loss": 0.1011, "step": 183000 }, { "epoch": 0.01, "learning_rate": 4.979344877806039e-05, "loss": 0.1045, "step": 183500 }, { "epoch": 0.01, "learning_rate": 4.979288573192676e-05, "loss": 0.101, "step": 184000 }, { "epoch": 0.01, "learning_rate": 4.979232268579312e-05, "loss": 0.1055, "step": 184500 }, { "epoch": 0.01, "learning_rate": 4.979175963965948e-05, "loss": 0.0973, "step": 185000 }, { "epoch": 0.01, "learning_rate": 4.9791197719618115e-05, "loss": 0.1015, "step": 185500 }, { "epoch": 0.01, "learning_rate": 4.979063467348448e-05, "loss": 0.1022, "step": 186000 }, { "epoch": 0.01, "learning_rate": 4.979007162735085e-05, "loss": 0.1039, "step": 186500 }, { "epoch": 0.01, "learning_rate": 4.978950858121721e-05, "loss": 0.0983, "step": 187000 }, { "epoch": 0.01, "learning_rate": 4.9788946661175846e-05, "loss": 0.1058, "step": 187500 }, { "epoch": 0.01, "learning_rate": 4.97883836150422e-05, "loss": 0.1093, "step": 188000 }, { "epoch": 0.01, "learning_rate": 4.9787820568908574e-05, "loss": 0.1108, "step": 188500 }, { "epoch": 0.01, "learning_rate": 4.978725752277494e-05, "loss": 0.1044, "step": 189000 }, { "epoch": 0.01, "learning_rate": 4.978669560273357e-05, "loss": 0.1038, "step": 189500 }, { "epoch": 0.01, "learning_rate": 4.9786132556599934e-05, "loss": 0.1052, "step": 190000 }, { "epoch": 0.01, "learning_rate": 4.97855695104663e-05, "loss": 0.1046, "step": 190500 }, { "epoch": 0.01, "learning_rate": 4.978500646433266e-05, "loss": 0.1019, "step": 191000 }, { "epoch": 0.01, "learning_rate": 4.9784444544291294e-05, "loss": 0.1085, "step": 191500 }, { "epoch": 0.01, "learning_rate": 4.978388149815766e-05, "loss": 0.101, "step": 192000 }, { "epoch": 0.01, "learning_rate": 4.978331845202402e-05, "loss": 0.1065, "step": 192500 }, { "epoch": 0.01, "learning_rate": 4.978275540589039e-05, "loss": 0.1007, "step": 193000 }, { "epoch": 0.01, "learning_rate": 4.978219235975675e-05, "loss": 0.096, "step": 193500 }, { "epoch": 0.01, "learning_rate": 4.978163043971538e-05, "loss": 0.1045, "step": 194000 }, { "epoch": 0.01, "learning_rate": 4.9781067393581746e-05, "loss": 0.1021, "step": 194500 }, { "epoch": 0.01, "learning_rate": 4.978050434744812e-05, "loss": 0.1035, "step": 195000 }, { "epoch": 0.01, "learning_rate": 4.977994130131448e-05, "loss": 0.1033, "step": 195500 }, { "epoch": 0.01, "learning_rate": 4.9779378255180845e-05, "loss": 0.0989, "step": 196000 }, { "epoch": 0.01, "learning_rate": 4.977881633513948e-05, "loss": 0.1041, "step": 196500 }, { "epoch": 0.01, "learning_rate": 4.977825328900584e-05, "loss": 0.1039, "step": 197000 }, { "epoch": 0.01, "learning_rate": 4.9777690242872205e-05, "loss": 0.1055, "step": 197500 }, { "epoch": 0.01, "learning_rate": 4.977712719673857e-05, "loss": 0.1029, "step": 198000 }, { "epoch": 0.01, "learning_rate": 4.977656415060493e-05, "loss": 0.1043, "step": 198500 }, { "epoch": 0.01, "learning_rate": 4.9776002230563566e-05, "loss": 0.092, "step": 199000 }, { "epoch": 0.01, "learning_rate": 4.977543918442993e-05, "loss": 0.1094, "step": 199500 }, { "epoch": 0.01, "learning_rate": 4.9774876138296294e-05, "loss": 0.1008, "step": 200000 }, { "epoch": 0.01, "learning_rate": 4.9774313092162664e-05, "loss": 0.0975, "step": 200500 }, { "epoch": 0.01, "learning_rate": 4.977375004602902e-05, "loss": 0.0955, "step": 201000 }, { "epoch": 0.01, "learning_rate": 4.977318812598766e-05, "loss": 0.1019, "step": 201500 }, { "epoch": 0.01, "learning_rate": 4.977262507985402e-05, "loss": 0.1062, "step": 202000 }, { "epoch": 0.01, "learning_rate": 4.977206203372038e-05, "loss": 0.1023, "step": 202500 }, { "epoch": 0.01, "learning_rate": 4.977149898758675e-05, "loss": 0.1049, "step": 203000 }, { "epoch": 0.01, "learning_rate": 4.977093594145311e-05, "loss": 0.1035, "step": 203500 }, { "epoch": 0.01, "learning_rate": 4.977037402141175e-05, "loss": 0.1077, "step": 204000 }, { "epoch": 0.01, "learning_rate": 4.9769810975278106e-05, "loss": 0.1046, "step": 204500 }, { "epoch": 0.01, "learning_rate": 4.976924792914448e-05, "loss": 0.1022, "step": 205000 }, { "epoch": 0.01, "learning_rate": 4.976868488301084e-05, "loss": 0.1053, "step": 205500 }, { "epoch": 0.01, "learning_rate": 4.9768121836877205e-05, "loss": 0.1011, "step": 206000 }, { "epoch": 0.01, "learning_rate": 4.976755991683584e-05, "loss": 0.098, "step": 206500 }, { "epoch": 0.01, "learning_rate": 4.97669968707022e-05, "loss": 0.1067, "step": 207000 }, { "epoch": 0.01, "learning_rate": 4.9766433824568565e-05, "loss": 0.1013, "step": 207500 }, { "epoch": 0.01, "learning_rate": 4.976587077843493e-05, "loss": 0.0973, "step": 208000 }, { "epoch": 0.01, "learning_rate": 4.976530885839356e-05, "loss": 0.0958, "step": 208500 }, { "epoch": 0.01, "learning_rate": 4.9764745812259925e-05, "loss": 0.0996, "step": 209000 }, { "epoch": 0.01, "learning_rate": 4.9764182766126296e-05, "loss": 0.1039, "step": 209500 }, { "epoch": 0.01, "learning_rate": 4.976361971999265e-05, "loss": 0.0908, "step": 210000 }, { "epoch": 0.01, "learning_rate": 4.976305779995129e-05, "loss": 0.1, "step": 210500 }, { "epoch": 0.01, "learning_rate": 4.976249475381765e-05, "loss": 0.098, "step": 211000 }, { "epoch": 0.01, "learning_rate": 4.976193170768402e-05, "loss": 0.0942, "step": 211500 }, { "epoch": 0.01, "learning_rate": 4.9761368661550384e-05, "loss": 0.0939, "step": 212000 }, { "epoch": 0.01, "learning_rate": 4.976080561541674e-05, "loss": 0.0996, "step": 212500 }, { "epoch": 0.01, "learning_rate": 4.976024369537538e-05, "loss": 0.0898, "step": 213000 }, { "epoch": 0.01, "learning_rate": 4.9759680649241744e-05, "loss": 0.1021, "step": 213500 }, { "epoch": 0.01, "learning_rate": 4.975911760310811e-05, "loss": 0.0996, "step": 214000 }, { "epoch": 0.01, "learning_rate": 4.975855455697447e-05, "loss": 0.1006, "step": 214500 }, { "epoch": 0.01, "learning_rate": 4.9757992636933104e-05, "loss": 0.1015, "step": 215000 }, { "epoch": 0.01, "learning_rate": 4.975742959079947e-05, "loss": 0.1052, "step": 215500 }, { "epoch": 0.01, "learning_rate": 4.975686654466583e-05, "loss": 0.1045, "step": 216000 }, { "epoch": 0.01, "learning_rate": 4.9756303498532196e-05, "loss": 0.0966, "step": 216500 }, { "epoch": 0.01, "learning_rate": 4.975574157849083e-05, "loss": 0.0985, "step": 217000 }, { "epoch": 0.01, "learning_rate": 4.975517853235719e-05, "loss": 0.0942, "step": 217500 }, { "epoch": 0.01, "learning_rate": 4.9754615486223563e-05, "loss": 0.1024, "step": 218000 }, { "epoch": 0.01, "learning_rate": 4.975405244008992e-05, "loss": 0.0988, "step": 218500 }, { "epoch": 0.01, "learning_rate": 4.975349052004856e-05, "loss": 0.1089, "step": 219000 }, { "epoch": 0.01, "learning_rate": 4.975292747391492e-05, "loss": 0.0981, "step": 219500 }, { "epoch": 0.01, "learning_rate": 4.975236442778128e-05, "loss": 0.0952, "step": 220000 }, { "epoch": 0.01, "learning_rate": 4.975180138164765e-05, "loss": 0.0987, "step": 220500 }, { "epoch": 0.01, "learning_rate": 4.9751239461606284e-05, "loss": 0.0971, "step": 221000 }, { "epoch": 0.01, "learning_rate": 4.975067641547265e-05, "loss": 0.0983, "step": 221500 }, { "epoch": 0.01, "learning_rate": 4.9750113369339005e-05, "loss": 0.0968, "step": 222000 }, { "epoch": 0.02, "learning_rate": 4.9749550323205376e-05, "loss": 0.0971, "step": 222500 }, { "epoch": 0.02, "learning_rate": 4.974898727707174e-05, "loss": 0.0964, "step": 223000 }, { "epoch": 0.02, "learning_rate": 4.974842535703037e-05, "loss": 0.1048, "step": 223500 }, { "epoch": 0.02, "learning_rate": 4.9747862310896736e-05, "loss": 0.1014, "step": 224000 }, { "epoch": 0.02, "learning_rate": 4.97472992647631e-05, "loss": 0.1035, "step": 224500 }, { "epoch": 0.02, "learning_rate": 4.9746736218629464e-05, "loss": 0.0957, "step": 225000 }, { "epoch": 0.02, "learning_rate": 4.9746174298588096e-05, "loss": 0.0987, "step": 225500 }, { "epoch": 0.02, "learning_rate": 4.974561125245446e-05, "loss": 0.1007, "step": 226000 }, { "epoch": 0.02, "learning_rate": 4.9745048206320824e-05, "loss": 0.0976, "step": 226500 }, { "epoch": 0.02, "learning_rate": 4.9744485160187195e-05, "loss": 0.0955, "step": 227000 }, { "epoch": 0.02, "learning_rate": 4.974392211405355e-05, "loss": 0.1, "step": 227500 }, { "epoch": 0.02, "learning_rate": 4.974336019401219e-05, "loss": 0.0951, "step": 228000 }, { "epoch": 0.02, "learning_rate": 4.974279714787855e-05, "loss": 0.1015, "step": 228500 }, { "epoch": 0.02, "learning_rate": 4.974223410174492e-05, "loss": 0.0937, "step": 229000 }, { "epoch": 0.02, "learning_rate": 4.974167105561128e-05, "loss": 0.1, "step": 229500 }, { "epoch": 0.02, "learning_rate": 4.974110800947765e-05, "loss": 0.0985, "step": 230000 }, { "epoch": 0.02, "learning_rate": 4.974054608943628e-05, "loss": 0.0959, "step": 230500 }, { "epoch": 0.02, "learning_rate": 4.9739983043302643e-05, "loss": 0.0998, "step": 231000 }, { "epoch": 0.02, "learning_rate": 4.973941999716901e-05, "loss": 0.0985, "step": 231500 }, { "epoch": 0.02, "learning_rate": 4.973885695103537e-05, "loss": 0.0971, "step": 232000 }, { "epoch": 0.02, "learning_rate": 4.9738295030994004e-05, "loss": 0.0999, "step": 232500 }, { "epoch": 0.02, "learning_rate": 4.973773198486037e-05, "loss": 0.0979, "step": 233000 }, { "epoch": 0.02, "learning_rate": 4.973716893872673e-05, "loss": 0.0955, "step": 233500 }, { "epoch": 0.02, "learning_rate": 4.9736605892593096e-05, "loss": 0.1002, "step": 234000 }, { "epoch": 0.02, "learning_rate": 4.973604397255173e-05, "loss": 0.0988, "step": 234500 }, { "epoch": 0.02, "learning_rate": 4.973548092641809e-05, "loss": 0.0994, "step": 235000 }, { "epoch": 0.02, "learning_rate": 4.973491788028446e-05, "loss": 0.0971, "step": 235500 }, { "epoch": 0.02, "learning_rate": 4.973435483415082e-05, "loss": 0.0956, "step": 236000 }, { "epoch": 0.02, "learning_rate": 4.9733791788017184e-05, "loss": 0.0957, "step": 236500 }, { "epoch": 0.02, "learning_rate": 4.9733228741883555e-05, "loss": 0.0995, "step": 237000 }, { "epoch": 0.02, "learning_rate": 4.973266682184218e-05, "loss": 0.0992, "step": 237500 }, { "epoch": 0.02, "learning_rate": 4.973210377570855e-05, "loss": 0.1011, "step": 238000 }, { "epoch": 0.02, "learning_rate": 4.973154072957491e-05, "loss": 0.1072, "step": 238500 }, { "epoch": 0.02, "learning_rate": 4.973097768344128e-05, "loss": 0.0964, "step": 239000 }, { "epoch": 0.02, "learning_rate": 4.973041463730764e-05, "loss": 0.0967, "step": 239500 }, { "epoch": 0.02, "learning_rate": 4.9729852717266275e-05, "loss": 0.0971, "step": 240000 }, { "epoch": 0.02, "learning_rate": 4.972928967113264e-05, "loss": 0.094, "step": 240500 }, { "epoch": 0.02, "learning_rate": 4.9728726624999e-05, "loss": 0.1028, "step": 241000 }, { "epoch": 0.02, "learning_rate": 4.972816357886537e-05, "loss": 0.094, "step": 241500 }, { "epoch": 0.02, "learning_rate": 4.9727601658824e-05, "loss": 0.0967, "step": 242000 }, { "epoch": 0.02, "learning_rate": 4.972703861269036e-05, "loss": 0.0958, "step": 242500 }, { "epoch": 0.02, "learning_rate": 4.972647556655673e-05, "loss": 0.1031, "step": 243000 }, { "epoch": 0.02, "learning_rate": 4.97259125204231e-05, "loss": 0.0942, "step": 243500 }, { "epoch": 0.02, "learning_rate": 4.9725349474289455e-05, "loss": 0.0969, "step": 244000 }, { "epoch": 0.02, "learning_rate": 4.9724787554248094e-05, "loss": 0.0971, "step": 244500 }, { "epoch": 0.02, "learning_rate": 4.972422450811445e-05, "loss": 0.0991, "step": 245000 }, { "epoch": 0.02, "learning_rate": 4.972366146198082e-05, "loss": 0.0996, "step": 245500 }, { "epoch": 0.02, "learning_rate": 4.9723098415847186e-05, "loss": 0.0941, "step": 246000 }, { "epoch": 0.02, "learning_rate": 4.972253536971354e-05, "loss": 0.0967, "step": 246500 }, { "epoch": 0.02, "learning_rate": 4.972197344967218e-05, "loss": 0.0928, "step": 247000 }, { "epoch": 0.02, "learning_rate": 4.9721410403538546e-05, "loss": 0.0944, "step": 247500 }, { "epoch": 0.02, "learning_rate": 4.972084735740491e-05, "loss": 0.0994, "step": 248000 }, { "epoch": 0.02, "learning_rate": 4.9720284311271274e-05, "loss": 0.0928, "step": 248500 }, { "epoch": 0.02, "learning_rate": 4.971972126513764e-05, "loss": 0.0926, "step": 249000 }, { "epoch": 0.02, "learning_rate": 4.9719158219004e-05, "loss": 0.0896, "step": 249500 }, { "epoch": 0.02, "learning_rate": 4.9718596298962635e-05, "loss": 0.1015, "step": 250000 }, { "epoch": 0.02, "learning_rate": 4.9718033252829e-05, "loss": 0.0936, "step": 250500 }, { "epoch": 0.02, "learning_rate": 4.971747020669536e-05, "loss": 0.0921, "step": 251000 }, { "epoch": 0.02, "learning_rate": 4.9716907160561727e-05, "loss": 0.0958, "step": 251500 }, { "epoch": 0.02, "learning_rate": 4.971634411442809e-05, "loss": 0.0921, "step": 252000 }, { "epoch": 0.02, "learning_rate": 4.971578106829446e-05, "loss": 0.096, "step": 252500 }, { "epoch": 0.02, "learning_rate": 4.971521914825309e-05, "loss": 0.0968, "step": 253000 }, { "epoch": 0.02, "learning_rate": 4.971465610211946e-05, "loss": 0.0942, "step": 253500 }, { "epoch": 0.02, "learning_rate": 4.9714093055985815e-05, "loss": 0.0955, "step": 254000 }, { "epoch": 0.02, "learning_rate": 4.9713530009852185e-05, "loss": 0.0947, "step": 254500 }, { "epoch": 0.02, "learning_rate": 4.971296808981081e-05, "loss": 0.0943, "step": 255000 }, { "epoch": 0.02, "learning_rate": 4.971240504367718e-05, "loss": 0.0973, "step": 255500 }, { "epoch": 0.02, "learning_rate": 4.9711841997543546e-05, "loss": 0.1012, "step": 256000 }, { "epoch": 0.02, "learning_rate": 4.971127895140991e-05, "loss": 0.0994, "step": 256500 }, { "epoch": 0.02, "learning_rate": 4.971071703136854e-05, "loss": 0.0963, "step": 257000 }, { "epoch": 0.02, "learning_rate": 4.9710153985234906e-05, "loss": 0.0927, "step": 257500 }, { "epoch": 0.02, "learning_rate": 4.970959093910127e-05, "loss": 0.0922, "step": 258000 }, { "epoch": 0.02, "learning_rate": 4.9709027892967634e-05, "loss": 0.0976, "step": 258500 }, { "epoch": 0.02, "learning_rate": 4.9708464846834005e-05, "loss": 0.0969, "step": 259000 }, { "epoch": 0.02, "learning_rate": 4.970790292679263e-05, "loss": 0.0997, "step": 259500 }, { "epoch": 0.02, "learning_rate": 4.9707339880659e-05, "loss": 0.0913, "step": 260000 }, { "epoch": 0.02, "learning_rate": 4.970677683452536e-05, "loss": 0.0943, "step": 260500 }, { "epoch": 0.02, "learning_rate": 4.970621378839173e-05, "loss": 0.1012, "step": 261000 }, { "epoch": 0.02, "learning_rate": 4.9705651868350354e-05, "loss": 0.0966, "step": 261500 }, { "epoch": 0.02, "learning_rate": 4.9705088822216725e-05, "loss": 0.0922, "step": 262000 }, { "epoch": 0.02, "learning_rate": 4.970452577608309e-05, "loss": 0.0909, "step": 262500 }, { "epoch": 0.02, "learning_rate": 4.9703962729949446e-05, "loss": 0.0989, "step": 263000 }, { "epoch": 0.02, "learning_rate": 4.970339968381582e-05, "loss": 0.0962, "step": 263500 }, { "epoch": 0.02, "learning_rate": 4.970283776377444e-05, "loss": 0.0897, "step": 264000 }, { "epoch": 0.02, "learning_rate": 4.970227471764081e-05, "loss": 0.0957, "step": 264500 }, { "epoch": 0.02, "learning_rate": 4.970171167150718e-05, "loss": 0.095, "step": 265000 }, { "epoch": 0.02, "learning_rate": 4.970114862537354e-05, "loss": 0.0995, "step": 265500 }, { "epoch": 0.02, "learning_rate": 4.9700586705332173e-05, "loss": 0.0951, "step": 266000 }, { "epoch": 0.02, "learning_rate": 4.970002365919854e-05, "loss": 0.0973, "step": 266500 }, { "epoch": 0.02, "learning_rate": 4.96994606130649e-05, "loss": 0.0951, "step": 267000 }, { "epoch": 0.02, "learning_rate": 4.9698897566931265e-05, "loss": 0.1009, "step": 267500 }, { "epoch": 0.02, "learning_rate": 4.969833452079763e-05, "loss": 0.0906, "step": 268000 }, { "epoch": 0.02, "learning_rate": 4.969777372684853e-05, "loss": 0.0998, "step": 268500 }, { "epoch": 0.02, "learning_rate": 4.9697210680714894e-05, "loss": 0.0965, "step": 269000 }, { "epoch": 0.02, "learning_rate": 4.9696647634581265e-05, "loss": 0.0893, "step": 269500 }, { "epoch": 0.02, "learning_rate": 4.969608458844762e-05, "loss": 0.0938, "step": 270000 }, { "epoch": 0.02, "learning_rate": 4.9695521542313986e-05, "loss": 0.0903, "step": 270500 }, { "epoch": 0.02, "learning_rate": 4.969495849618036e-05, "loss": 0.1004, "step": 271000 }, { "epoch": 0.02, "learning_rate": 4.9694395450046714e-05, "loss": 0.0912, "step": 271500 }, { "epoch": 0.02, "learning_rate": 4.9693832403913085e-05, "loss": 0.0916, "step": 272000 }, { "epoch": 0.02, "learning_rate": 4.969327048387171e-05, "loss": 0.092, "step": 272500 }, { "epoch": 0.02, "learning_rate": 4.969270743773808e-05, "loss": 0.0906, "step": 273000 }, { "epoch": 0.02, "learning_rate": 4.9692144391604445e-05, "loss": 0.1, "step": 273500 }, { "epoch": 0.02, "learning_rate": 4.969158134547081e-05, "loss": 0.0925, "step": 274000 }, { "epoch": 0.02, "learning_rate": 4.969101829933717e-05, "loss": 0.0936, "step": 274500 }, { "epoch": 0.02, "learning_rate": 4.969045525320354e-05, "loss": 0.0891, "step": 275000 }, { "epoch": 0.02, "learning_rate": 4.968989333316217e-05, "loss": 0.0888, "step": 275500 }, { "epoch": 0.02, "learning_rate": 4.968933028702853e-05, "loss": 0.0975, "step": 276000 }, { "epoch": 0.02, "learning_rate": 4.9688767240894904e-05, "loss": 0.0899, "step": 276500 }, { "epoch": 0.02, "learning_rate": 4.968820419476126e-05, "loss": 0.0942, "step": 277000 }, { "epoch": 0.02, "learning_rate": 4.96876422747199e-05, "loss": 0.0971, "step": 277500 }, { "epoch": 0.02, "learning_rate": 4.968707922858626e-05, "loss": 0.1018, "step": 278000 }, { "epoch": 0.02, "learning_rate": 4.968651618245263e-05, "loss": 0.0879, "step": 278500 }, { "epoch": 0.02, "learning_rate": 4.968595313631899e-05, "loss": 0.0951, "step": 279000 }, { "epoch": 0.02, "learning_rate": 4.968539009018535e-05, "loss": 0.094, "step": 279500 }, { "epoch": 0.02, "learning_rate": 4.968482817014399e-05, "loss": 0.095, "step": 280000 }, { "epoch": 0.02, "learning_rate": 4.9684265124010345e-05, "loss": 0.0958, "step": 280500 }, { "epoch": 0.02, "learning_rate": 4.9683702077876716e-05, "loss": 0.0936, "step": 281000 }, { "epoch": 0.02, "learning_rate": 4.968313903174308e-05, "loss": 0.0924, "step": 281500 }, { "epoch": 0.02, "learning_rate": 4.968257711170171e-05, "loss": 0.0921, "step": 282000 }, { "epoch": 0.02, "learning_rate": 4.9682014065568076e-05, "loss": 0.0944, "step": 282500 }, { "epoch": 0.02, "learning_rate": 4.968145101943444e-05, "loss": 0.0944, "step": 283000 }, { "epoch": 0.02, "learning_rate": 4.9680887973300804e-05, "loss": 0.093, "step": 283500 }, { "epoch": 0.02, "learning_rate": 4.968032492716717e-05, "loss": 0.0953, "step": 284000 }, { "epoch": 0.02, "learning_rate": 4.967976188103353e-05, "loss": 0.0863, "step": 284500 }, { "epoch": 0.02, "learning_rate": 4.9679199960992165e-05, "loss": 0.0929, "step": 285000 }, { "epoch": 0.02, "learning_rate": 4.967863691485853e-05, "loss": 0.0928, "step": 285500 }, { "epoch": 0.02, "learning_rate": 4.967807386872489e-05, "loss": 0.0972, "step": 286000 }, { "epoch": 0.02, "learning_rate": 4.967751082259126e-05, "loss": 0.0891, "step": 286500 }, { "epoch": 0.02, "learning_rate": 4.967694890254989e-05, "loss": 0.0922, "step": 287000 }, { "epoch": 0.02, "learning_rate": 4.967638585641626e-05, "loss": 0.0942, "step": 287500 }, { "epoch": 0.02, "learning_rate": 4.967582281028262e-05, "loss": 0.0946, "step": 288000 }, { "epoch": 0.02, "learning_rate": 4.967525976414899e-05, "loss": 0.0949, "step": 288500 }, { "epoch": 0.02, "learning_rate": 4.967469671801535e-05, "loss": 0.092, "step": 289000 }, { "epoch": 0.02, "learning_rate": 4.9674134797973984e-05, "loss": 0.0904, "step": 289500 }, { "epoch": 0.02, "learning_rate": 4.967357175184035e-05, "loss": 0.0883, "step": 290000 }, { "epoch": 0.02, "learning_rate": 4.967300870570671e-05, "loss": 0.0935, "step": 290500 }, { "epoch": 0.02, "learning_rate": 4.9672445659573076e-05, "loss": 0.086, "step": 291000 }, { "epoch": 0.02, "learning_rate": 4.967188373953171e-05, "loss": 0.0923, "step": 291500 }, { "epoch": 0.02, "learning_rate": 4.967132069339807e-05, "loss": 0.0874, "step": 292000 }, { "epoch": 0.02, "learning_rate": 4.9670757647264436e-05, "loss": 0.0908, "step": 292500 }, { "epoch": 0.02, "learning_rate": 4.967019460113081e-05, "loss": 0.0938, "step": 293000 }, { "epoch": 0.02, "learning_rate": 4.96696338071817e-05, "loss": 0.0929, "step": 293500 }, { "epoch": 0.02, "learning_rate": 4.9669070761048064e-05, "loss": 0.0919, "step": 294000 }, { "epoch": 0.02, "learning_rate": 4.966850771491443e-05, "loss": 0.0898, "step": 294500 }, { "epoch": 0.02, "learning_rate": 4.96679446687808e-05, "loss": 0.0888, "step": 295000 }, { "epoch": 0.02, "learning_rate": 4.9667381622647156e-05, "loss": 0.0891, "step": 295500 }, { "epoch": 0.02, "learning_rate": 4.966681857651353e-05, "loss": 0.0913, "step": 296000 }, { "epoch": 0.02, "learning_rate": 4.966625553037989e-05, "loss": 0.0897, "step": 296500 }, { "epoch": 0.02, "learning_rate": 4.966569248424625e-05, "loss": 0.0907, "step": 297000 }, { "epoch": 0.02, "learning_rate": 4.966512943811262e-05, "loss": 0.09, "step": 297500 }, { "epoch": 0.02, "learning_rate": 4.9664567518071245e-05, "loss": 0.0925, "step": 298000 }, { "epoch": 0.02, "learning_rate": 4.9664004471937615e-05, "loss": 0.0912, "step": 298500 }, { "epoch": 0.02, "learning_rate": 4.966344142580398e-05, "loss": 0.087, "step": 299000 }, { "epoch": 0.02, "learning_rate": 4.966287837967034e-05, "loss": 0.0904, "step": 299500 }, { "epoch": 0.02, "learning_rate": 4.9662316459628976e-05, "loss": 0.0902, "step": 300000 }, { "epoch": 0.02, "learning_rate": 4.966175341349534e-05, "loss": 0.0824, "step": 300500 }, { "epoch": 0.02, "learning_rate": 4.9661190367361704e-05, "loss": 0.0923, "step": 301000 }, { "epoch": 0.02, "learning_rate": 4.966062732122807e-05, "loss": 0.0854, "step": 301500 }, { "epoch": 0.02, "learning_rate": 4.966006427509443e-05, "loss": 0.0891, "step": 302000 }, { "epoch": 0.02, "learning_rate": 4.9659502355053064e-05, "loss": 0.0946, "step": 302500 }, { "epoch": 0.02, "learning_rate": 4.965893930891943e-05, "loss": 0.089, "step": 303000 }, { "epoch": 0.02, "learning_rate": 4.965837626278579e-05, "loss": 0.093, "step": 303500 }, { "epoch": 0.02, "learning_rate": 4.965781321665216e-05, "loss": 0.093, "step": 304000 }, { "epoch": 0.02, "learning_rate": 4.965725129661079e-05, "loss": 0.0875, "step": 304500 }, { "epoch": 0.02, "learning_rate": 4.965668825047716e-05, "loss": 0.0925, "step": 305000 }, { "epoch": 0.02, "learning_rate": 4.9656125204343516e-05, "loss": 0.0917, "step": 305500 }, { "epoch": 0.02, "learning_rate": 4.965556215820989e-05, "loss": 0.0849, "step": 306000 }, { "epoch": 0.02, "learning_rate": 4.965500023816851e-05, "loss": 0.0881, "step": 306500 }, { "epoch": 0.02, "learning_rate": 4.965443719203488e-05, "loss": 0.0864, "step": 307000 }, { "epoch": 0.02, "learning_rate": 4.965387414590125e-05, "loss": 0.0898, "step": 307500 }, { "epoch": 0.02, "learning_rate": 4.965331109976761e-05, "loss": 0.0939, "step": 308000 }, { "epoch": 0.02, "learning_rate": 4.9652748053633975e-05, "loss": 0.0914, "step": 308500 }, { "epoch": 0.02, "learning_rate": 4.965218613359261e-05, "loss": 0.0868, "step": 309000 }, { "epoch": 0.02, "learning_rate": 4.965162308745897e-05, "loss": 0.0876, "step": 309500 }, { "epoch": 0.02, "learning_rate": 4.96510611674176e-05, "loss": 0.0933, "step": 310000 }, { "epoch": 0.02, "learning_rate": 4.965049812128397e-05, "loss": 0.0887, "step": 310500 }, { "epoch": 0.02, "learning_rate": 4.964993507515033e-05, "loss": 0.0855, "step": 311000 }, { "epoch": 0.02, "learning_rate": 4.96493720290167e-05, "loss": 0.0935, "step": 311500 }, { "epoch": 0.02, "learning_rate": 4.964880898288306e-05, "loss": 0.0948, "step": 312000 }, { "epoch": 0.02, "learning_rate": 4.964824593674943e-05, "loss": 0.0907, "step": 312500 }, { "epoch": 0.02, "learning_rate": 4.9647682890615794e-05, "loss": 0.0878, "step": 313000 }, { "epoch": 0.02, "learning_rate": 4.964711984448215e-05, "loss": 0.09, "step": 313500 }, { "epoch": 0.02, "learning_rate": 4.964655679834852e-05, "loss": 0.0866, "step": 314000 }, { "epoch": 0.02, "learning_rate": 4.964599487830715e-05, "loss": 0.091, "step": 314500 }, { "epoch": 0.02, "learning_rate": 4.964543183217352e-05, "loss": 0.0927, "step": 315000 }, { "epoch": 0.02, "learning_rate": 4.964486878603988e-05, "loss": 0.0974, "step": 315500 }, { "epoch": 0.02, "learning_rate": 4.9644305739906246e-05, "loss": 0.0896, "step": 316000 }, { "epoch": 0.02, "learning_rate": 4.964374269377261e-05, "loss": 0.0931, "step": 316500 }, { "epoch": 0.02, "learning_rate": 4.9643179647638974e-05, "loss": 0.0902, "step": 317000 }, { "epoch": 0.02, "learning_rate": 4.9642617727597606e-05, "loss": 0.0877, "step": 317500 }, { "epoch": 0.02, "learning_rate": 4.964205468146397e-05, "loss": 0.0875, "step": 318000 }, { "epoch": 0.02, "learning_rate": 4.9641491635330334e-05, "loss": 0.0882, "step": 318500 }, { "epoch": 0.02, "learning_rate": 4.96409285891967e-05, "loss": 0.0859, "step": 319000 }, { "epoch": 0.02, "learning_rate": 4.964036666915533e-05, "loss": 0.0903, "step": 319500 }, { "epoch": 0.02, "learning_rate": 4.9639803623021695e-05, "loss": 0.0933, "step": 320000 }, { "epoch": 0.02, "learning_rate": 4.963924170298033e-05, "loss": 0.0923, "step": 320500 }, { "epoch": 0.02, "learning_rate": 4.963867865684669e-05, "loss": 0.0886, "step": 321000 }, { "epoch": 0.02, "learning_rate": 4.963811561071306e-05, "loss": 0.0889, "step": 321500 }, { "epoch": 0.02, "learning_rate": 4.9637552564579426e-05, "loss": 0.0871, "step": 322000 }, { "epoch": 0.02, "learning_rate": 4.963698951844579e-05, "loss": 0.0844, "step": 322500 }, { "epoch": 0.02, "learning_rate": 4.9636426472312154e-05, "loss": 0.0892, "step": 323000 }, { "epoch": 0.02, "learning_rate": 4.963586342617852e-05, "loss": 0.0859, "step": 323500 }, { "epoch": 0.02, "learning_rate": 4.963530038004488e-05, "loss": 0.087, "step": 324000 }, { "epoch": 0.02, "learning_rate": 4.9634738460003514e-05, "loss": 0.0873, "step": 324500 }, { "epoch": 0.02, "learning_rate": 4.963417541386988e-05, "loss": 0.0847, "step": 325000 }, { "epoch": 0.02, "learning_rate": 4.963361236773624e-05, "loss": 0.0893, "step": 325500 }, { "epoch": 0.02, "learning_rate": 4.963304932160261e-05, "loss": 0.0894, "step": 326000 }, { "epoch": 0.02, "learning_rate": 4.963248627546897e-05, "loss": 0.0898, "step": 326500 }, { "epoch": 0.02, "learning_rate": 4.963192435542761e-05, "loss": 0.0873, "step": 327000 }, { "epoch": 0.02, "learning_rate": 4.9631361309293966e-05, "loss": 0.0923, "step": 327500 }, { "epoch": 0.02, "learning_rate": 4.963079826316033e-05, "loss": 0.0809, "step": 328000 }, { "epoch": 0.02, "learning_rate": 4.96302352170267e-05, "loss": 0.0846, "step": 328500 }, { "epoch": 0.02, "learning_rate": 4.9629673296985326e-05, "loss": 0.0911, "step": 329000 }, { "epoch": 0.02, "learning_rate": 4.96291102508517e-05, "loss": 0.0871, "step": 329500 }, { "epoch": 0.02, "learning_rate": 4.9628547204718054e-05, "loss": 0.0858, "step": 330000 }, { "epoch": 0.02, "learning_rate": 4.9627984158584425e-05, "loss": 0.0905, "step": 330500 }, { "epoch": 0.02, "learning_rate": 4.962742111245079e-05, "loss": 0.0845, "step": 331000 }, { "epoch": 0.02, "learning_rate": 4.962685919240942e-05, "loss": 0.0869, "step": 331500 }, { "epoch": 0.02, "learning_rate": 4.9626296146275785e-05, "loss": 0.0877, "step": 332000 }, { "epoch": 0.02, "learning_rate": 4.962573310014215e-05, "loss": 0.0883, "step": 332500 }, { "epoch": 0.02, "learning_rate": 4.962517005400851e-05, "loss": 0.0929, "step": 333000 }, { "epoch": 0.02, "learning_rate": 4.9624608133967145e-05, "loss": 0.0889, "step": 333500 }, { "epoch": 0.02, "learning_rate": 4.962404508783351e-05, "loss": 0.0851, "step": 334000 }, { "epoch": 0.02, "learning_rate": 4.962348204169987e-05, "loss": 0.091, "step": 334500 }, { "epoch": 0.02, "learning_rate": 4.962291899556624e-05, "loss": 0.0921, "step": 335000 }, { "epoch": 0.02, "learning_rate": 4.962235707552487e-05, "loss": 0.0927, "step": 335500 }, { "epoch": 0.02, "learning_rate": 4.9621794029391234e-05, "loss": 0.1025, "step": 336000 }, { "epoch": 0.02, "learning_rate": 4.96212309832576e-05, "loss": 0.0906, "step": 336500 }, { "epoch": 0.02, "learning_rate": 4.962066793712397e-05, "loss": 0.0907, "step": 337000 }, { "epoch": 0.02, "learning_rate": 4.962010489099033e-05, "loss": 0.0903, "step": 337500 }, { "epoch": 0.02, "learning_rate": 4.961954184485669e-05, "loss": 0.0915, "step": 338000 }, { "epoch": 0.02, "learning_rate": 4.961897879872306e-05, "loss": 0.0897, "step": 338500 }, { "epoch": 0.02, "learning_rate": 4.9618415752589424e-05, "loss": 0.0887, "step": 339000 }, { "epoch": 0.02, "learning_rate": 4.9617853832548057e-05, "loss": 0.0905, "step": 339500 }, { "epoch": 0.02, "learning_rate": 4.961729078641442e-05, "loss": 0.0949, "step": 340000 }, { "epoch": 0.02, "learning_rate": 4.9616727740280785e-05, "loss": 0.088, "step": 340500 }, { "epoch": 0.02, "learning_rate": 4.961616469414715e-05, "loss": 0.0854, "step": 341000 }, { "epoch": 0.02, "learning_rate": 4.961560164801351e-05, "loss": 0.0837, "step": 341500 }, { "epoch": 0.02, "learning_rate": 4.9615038601879877e-05, "loss": 0.089, "step": 342000 }, { "epoch": 0.02, "learning_rate": 4.961447668183851e-05, "loss": 0.0969, "step": 342500 }, { "epoch": 0.02, "learning_rate": 4.961391363570487e-05, "loss": 0.0929, "step": 343000 }, { "epoch": 0.02, "learning_rate": 4.961335058957124e-05, "loss": 0.0889, "step": 343500 }, { "epoch": 0.02, "learning_rate": 4.961278754343761e-05, "loss": 0.0893, "step": 344000 }, { "epoch": 0.02, "learning_rate": 4.961222562339623e-05, "loss": 0.0926, "step": 344500 }, { "epoch": 0.02, "learning_rate": 4.9611662577262604e-05, "loss": 0.0863, "step": 345000 }, { "epoch": 0.02, "learning_rate": 4.961109953112896e-05, "loss": 0.0878, "step": 345500 }, { "epoch": 0.02, "learning_rate": 4.961053648499533e-05, "loss": 0.093, "step": 346000 }, { "epoch": 0.02, "learning_rate": 4.9609973438861696e-05, "loss": 0.0911, "step": 346500 }, { "epoch": 0.02, "learning_rate": 4.960941151882033e-05, "loss": 0.0884, "step": 347000 }, { "epoch": 0.02, "learning_rate": 4.960884847268669e-05, "loss": 0.083, "step": 347500 }, { "epoch": 0.02, "learning_rate": 4.9608285426553056e-05, "loss": 0.0898, "step": 348000 }, { "epoch": 0.02, "learning_rate": 4.960772238041942e-05, "loss": 0.0887, "step": 348500 }, { "epoch": 0.02, "learning_rate": 4.960716046037805e-05, "loss": 0.0846, "step": 349000 }, { "epoch": 0.02, "learning_rate": 4.9606597414244416e-05, "loss": 0.0896, "step": 349500 }, { "epoch": 0.02, "learning_rate": 4.960603436811078e-05, "loss": 0.0873, "step": 350000 }, { "epoch": 0.02, "learning_rate": 4.960547132197715e-05, "loss": 0.0882, "step": 350500 }, { "epoch": 0.02, "learning_rate": 4.960490827584351e-05, "loss": 0.0854, "step": 351000 }, { "epoch": 0.02, "learning_rate": 4.960434635580215e-05, "loss": 0.0905, "step": 351500 }, { "epoch": 0.02, "learning_rate": 4.9603783309668504e-05, "loss": 0.0918, "step": 352000 }, { "epoch": 0.02, "learning_rate": 4.9603220263534875e-05, "loss": 0.085, "step": 352500 }, { "epoch": 0.02, "learning_rate": 4.960265721740124e-05, "loss": 0.0909, "step": 353000 }, { "epoch": 0.02, "learning_rate": 4.960209529735987e-05, "loss": 0.0864, "step": 353500 }, { "epoch": 0.02, "learning_rate": 4.9601532251226235e-05, "loss": 0.0901, "step": 354000 }, { "epoch": 0.02, "learning_rate": 4.960096920509259e-05, "loss": 0.0853, "step": 354500 }, { "epoch": 0.02, "learning_rate": 4.960040615895896e-05, "loss": 0.0838, "step": 355000 }, { "epoch": 0.02, "learning_rate": 4.959984311282533e-05, "loss": 0.0879, "step": 355500 }, { "epoch": 0.02, "learning_rate": 4.959928231887623e-05, "loss": 0.0913, "step": 356000 }, { "epoch": 0.02, "learning_rate": 4.959871927274259e-05, "loss": 0.087, "step": 356500 }, { "epoch": 0.02, "learning_rate": 4.9598156226608956e-05, "loss": 0.0875, "step": 357000 }, { "epoch": 0.02, "learning_rate": 4.959759318047532e-05, "loss": 0.0858, "step": 357500 }, { "epoch": 0.02, "learning_rate": 4.9597030134341684e-05, "loss": 0.0903, "step": 358000 }, { "epoch": 0.02, "learning_rate": 4.959646708820805e-05, "loss": 0.0872, "step": 358500 }, { "epoch": 0.02, "learning_rate": 4.959590404207441e-05, "loss": 0.0914, "step": 359000 }, { "epoch": 0.02, "learning_rate": 4.9595340995940776e-05, "loss": 0.0936, "step": 359500 }, { "epoch": 0.02, "learning_rate": 4.959477794980714e-05, "loss": 0.0896, "step": 360000 }, { "epoch": 0.02, "learning_rate": 4.959421490367351e-05, "loss": 0.0912, "step": 360500 }, { "epoch": 0.02, "learning_rate": 4.9593652983632136e-05, "loss": 0.0863, "step": 361000 }, { "epoch": 0.02, "learning_rate": 4.959308993749851e-05, "loss": 0.0857, "step": 361500 }, { "epoch": 0.02, "learning_rate": 4.9592526891364864e-05, "loss": 0.094, "step": 362000 }, { "epoch": 0.02, "learning_rate": 4.9591963845231235e-05, "loss": 0.0931, "step": 362500 }, { "epoch": 0.02, "learning_rate": 4.959140192518986e-05, "loss": 0.0864, "step": 363000 }, { "epoch": 0.02, "learning_rate": 4.959083887905623e-05, "loss": 0.0909, "step": 363500 }, { "epoch": 0.02, "learning_rate": 4.9590275832922595e-05, "loss": 0.0844, "step": 364000 }, { "epoch": 0.02, "learning_rate": 4.958971278678896e-05, "loss": 0.088, "step": 364500 }, { "epoch": 0.02, "learning_rate": 4.958914974065532e-05, "loss": 0.0902, "step": 365000 }, { "epoch": 0.02, "learning_rate": 4.9588587820613955e-05, "loss": 0.0882, "step": 365500 }, { "epoch": 0.02, "learning_rate": 4.958802477448032e-05, "loss": 0.0842, "step": 366000 }, { "epoch": 0.02, "learning_rate": 4.958746172834668e-05, "loss": 0.0899, "step": 366500 }, { "epoch": 0.02, "learning_rate": 4.9586898682213054e-05, "loss": 0.0898, "step": 367000 }, { "epoch": 0.02, "learning_rate": 4.958633676217168e-05, "loss": 0.0928, "step": 367500 }, { "epoch": 0.02, "learning_rate": 4.958577371603805e-05, "loss": 0.0881, "step": 368000 }, { "epoch": 0.02, "learning_rate": 4.958521066990441e-05, "loss": 0.0871, "step": 368500 }, { "epoch": 0.02, "learning_rate": 4.958464762377078e-05, "loss": 0.0889, "step": 369000 }, { "epoch": 0.02, "learning_rate": 4.958408457763714e-05, "loss": 0.0902, "step": 369500 }, { "epoch": 0.02, "learning_rate": 4.9583522657595774e-05, "loss": 0.0854, "step": 370000 }, { "epoch": 0.03, "learning_rate": 4.958295961146214e-05, "loss": 0.0944, "step": 370500 }, { "epoch": 0.03, "learning_rate": 4.9582396565328495e-05, "loss": 0.0872, "step": 371000 }, { "epoch": 0.03, "learning_rate": 4.9581833519194866e-05, "loss": 0.0888, "step": 371500 }, { "epoch": 0.03, "learning_rate": 4.958127047306123e-05, "loss": 0.0913, "step": 372000 }, { "epoch": 0.03, "learning_rate": 4.9580707426927594e-05, "loss": 0.0835, "step": 372500 }, { "epoch": 0.03, "learning_rate": 4.958014438079396e-05, "loss": 0.0877, "step": 373000 }, { "epoch": 0.03, "learning_rate": 4.957958133466032e-05, "loss": 0.0857, "step": 373500 }, { "epoch": 0.03, "learning_rate": 4.9579019414618954e-05, "loss": 0.0906, "step": 374000 }, { "epoch": 0.03, "learning_rate": 4.957845636848532e-05, "loss": 0.0869, "step": 374500 }, { "epoch": 0.03, "learning_rate": 4.957789332235168e-05, "loss": 0.0835, "step": 375000 }, { "epoch": 0.03, "learning_rate": 4.9577330276218046e-05, "loss": 0.0803, "step": 375500 }, { "epoch": 0.03, "learning_rate": 4.957676723008442e-05, "loss": 0.0826, "step": 376000 }, { "epoch": 0.03, "learning_rate": 4.9576204183950774e-05, "loss": 0.087, "step": 376500 }, { "epoch": 0.03, "learning_rate": 4.957564226390941e-05, "loss": 0.0881, "step": 377000 }, { "epoch": 0.03, "learning_rate": 4.957507921777577e-05, "loss": 0.0872, "step": 377500 }, { "epoch": 0.03, "learning_rate": 4.957451617164214e-05, "loss": 0.0834, "step": 378000 }, { "epoch": 0.03, "learning_rate": 4.9573953125508505e-05, "loss": 0.0905, "step": 378500 }, { "epoch": 0.03, "learning_rate": 4.957339120546714e-05, "loss": 0.0847, "step": 379000 }, { "epoch": 0.03, "learning_rate": 4.95728281593335e-05, "loss": 0.084, "step": 379500 }, { "epoch": 0.03, "learning_rate": 4.9572265113199866e-05, "loss": 0.0903, "step": 380000 }, { "epoch": 0.03, "learning_rate": 4.957170206706623e-05, "loss": 0.0896, "step": 380500 }, { "epoch": 0.03, "learning_rate": 4.9571139020932594e-05, "loss": 0.0842, "step": 381000 }, { "epoch": 0.03, "learning_rate": 4.957057597479896e-05, "loss": 0.0843, "step": 381500 }, { "epoch": 0.03, "learning_rate": 4.957001405475759e-05, "loss": 0.0809, "step": 382000 }, { "epoch": 0.03, "learning_rate": 4.956945100862396e-05, "loss": 0.0897, "step": 382500 }, { "epoch": 0.03, "learning_rate": 4.956888796249032e-05, "loss": 0.0894, "step": 383000 }, { "epoch": 0.03, "learning_rate": 4.956832491635668e-05, "loss": 0.0814, "step": 383500 }, { "epoch": 0.03, "learning_rate": 4.9567762996315314e-05, "loss": 0.0839, "step": 384000 }, { "epoch": 0.03, "learning_rate": 4.956719995018168e-05, "loss": 0.0888, "step": 384500 }, { "epoch": 0.03, "learning_rate": 4.956663690404805e-05, "loss": 0.0816, "step": 385000 }, { "epoch": 0.03, "learning_rate": 4.9566073857914406e-05, "loss": 0.0891, "step": 385500 }, { "epoch": 0.03, "learning_rate": 4.9565511937873045e-05, "loss": 0.0893, "step": 386000 }, { "epoch": 0.03, "learning_rate": 4.95649488917394e-05, "loss": 0.0843, "step": 386500 }, { "epoch": 0.03, "learning_rate": 4.956438584560577e-05, "loss": 0.0833, "step": 387000 }, { "epoch": 0.03, "learning_rate": 4.956382279947214e-05, "loss": 0.0898, "step": 387500 }, { "epoch": 0.03, "learning_rate": 4.95632597533385e-05, "loss": 0.0851, "step": 388000 }, { "epoch": 0.03, "learning_rate": 4.956269783329713e-05, "loss": 0.0845, "step": 388500 }, { "epoch": 0.03, "learning_rate": 4.95621347871635e-05, "loss": 0.089, "step": 389000 }, { "epoch": 0.03, "learning_rate": 4.956157174102986e-05, "loss": 0.084, "step": 389500 }, { "epoch": 0.03, "learning_rate": 4.9561008694896225e-05, "loss": 0.087, "step": 390000 }, { "epoch": 0.03, "learning_rate": 4.956044677485486e-05, "loss": 0.0855, "step": 390500 }, { "epoch": 0.03, "learning_rate": 4.955988372872122e-05, "loss": 0.0878, "step": 391000 }, { "epoch": 0.03, "learning_rate": 4.9559320682587585e-05, "loss": 0.0871, "step": 391500 }, { "epoch": 0.03, "learning_rate": 4.955875763645395e-05, "loss": 0.0881, "step": 392000 }, { "epoch": 0.03, "learning_rate": 4.955819459032032e-05, "loss": 0.086, "step": 392500 }, { "epoch": 0.03, "learning_rate": 4.955763154418668e-05, "loss": 0.088, "step": 393000 }, { "epoch": 0.03, "learning_rate": 4.955706849805304e-05, "loss": 0.0841, "step": 393500 }, { "epoch": 0.03, "learning_rate": 4.9556506578011674e-05, "loss": 0.0893, "step": 394000 }, { "epoch": 0.03, "learning_rate": 4.955594465797031e-05, "loss": 0.0871, "step": 394500 }, { "epoch": 0.03, "learning_rate": 4.955538161183667e-05, "loss": 0.0863, "step": 395000 }, { "epoch": 0.03, "learning_rate": 4.955481856570304e-05, "loss": 0.0831, "step": 395500 }, { "epoch": 0.03, "learning_rate": 4.9554255519569404e-05, "loss": 0.0884, "step": 396000 }, { "epoch": 0.03, "learning_rate": 4.955369247343577e-05, "loss": 0.0782, "step": 396500 }, { "epoch": 0.03, "learning_rate": 4.955312942730213e-05, "loss": 0.0839, "step": 397000 }, { "epoch": 0.03, "learning_rate": 4.9552566381168496e-05, "loss": 0.0899, "step": 397500 }, { "epoch": 0.03, "learning_rate": 4.955200333503486e-05, "loss": 0.0883, "step": 398000 }, { "epoch": 0.03, "learning_rate": 4.955144141499349e-05, "loss": 0.0887, "step": 398500 }, { "epoch": 0.03, "learning_rate": 4.955087836885986e-05, "loss": 0.0832, "step": 399000 }, { "epoch": 0.03, "learning_rate": 4.955031532272622e-05, "loss": 0.0837, "step": 399500 }, { "epoch": 0.03, "learning_rate": 4.9549752276592585e-05, "loss": 0.0862, "step": 400000 }, { "epoch": 0.03, "learning_rate": 4.954919035655122e-05, "loss": 0.0915, "step": 400500 }, { "epoch": 0.03, "learning_rate": 4.954862731041758e-05, "loss": 0.0871, "step": 401000 }, { "epoch": 0.03, "learning_rate": 4.954806426428395e-05, "loss": 0.0839, "step": 401500 }, { "epoch": 0.03, "learning_rate": 4.954750121815031e-05, "loss": 0.0851, "step": 402000 }, { "epoch": 0.03, "learning_rate": 4.954693929810895e-05, "loss": 0.0828, "step": 402500 }, { "epoch": 0.03, "learning_rate": 4.9546376251975305e-05, "loss": 0.0859, "step": 403000 }, { "epoch": 0.03, "learning_rate": 4.9545813205841676e-05, "loss": 0.0851, "step": 403500 }, { "epoch": 0.03, "learning_rate": 4.954525015970804e-05, "loss": 0.0872, "step": 404000 }, { "epoch": 0.03, "learning_rate": 4.954468823966667e-05, "loss": 0.0812, "step": 404500 }, { "epoch": 0.03, "learning_rate": 4.95441263196253e-05, "loss": 0.0838, "step": 405000 }, { "epoch": 0.03, "learning_rate": 4.954356327349167e-05, "loss": 0.0872, "step": 405500 }, { "epoch": 0.03, "learning_rate": 4.954300022735803e-05, "loss": 0.0839, "step": 406000 }, { "epoch": 0.03, "learning_rate": 4.9542437181224396e-05, "loss": 0.0924, "step": 406500 }, { "epoch": 0.03, "learning_rate": 4.954187413509076e-05, "loss": 0.0823, "step": 407000 }, { "epoch": 0.03, "learning_rate": 4.9541311088957124e-05, "loss": 0.0795, "step": 407500 }, { "epoch": 0.03, "learning_rate": 4.954074804282349e-05, "loss": 0.0835, "step": 408000 }, { "epoch": 0.03, "learning_rate": 4.954018499668985e-05, "loss": 0.0913, "step": 408500 }, { "epoch": 0.03, "learning_rate": 4.9539623076648484e-05, "loss": 0.0821, "step": 409000 }, { "epoch": 0.03, "learning_rate": 4.953906003051485e-05, "loss": 0.0822, "step": 409500 }, { "epoch": 0.03, "learning_rate": 4.953849698438122e-05, "loss": 0.0848, "step": 410000 }, { "epoch": 0.03, "learning_rate": 4.9537933938247576e-05, "loss": 0.0855, "step": 410500 }, { "epoch": 0.03, "learning_rate": 4.953737089211394e-05, "loss": 0.0866, "step": 411000 }, { "epoch": 0.03, "learning_rate": 4.953680784598031e-05, "loss": 0.0837, "step": 411500 }, { "epoch": 0.03, "learning_rate": 4.953624592593894e-05, "loss": 0.0783, "step": 412000 }, { "epoch": 0.03, "learning_rate": 4.953568287980531e-05, "loss": 0.0854, "step": 412500 }, { "epoch": 0.03, "learning_rate": 4.953511983367167e-05, "loss": 0.0849, "step": 413000 }, { "epoch": 0.03, "learning_rate": 4.9534556787538035e-05, "loss": 0.0815, "step": 413500 }, { "epoch": 0.03, "learning_rate": 4.953399486749667e-05, "loss": 0.087, "step": 414000 }, { "epoch": 0.03, "learning_rate": 4.953343182136303e-05, "loss": 0.0842, "step": 414500 }, { "epoch": 0.03, "learning_rate": 4.9532868775229396e-05, "loss": 0.0844, "step": 415000 }, { "epoch": 0.03, "learning_rate": 4.953230572909576e-05, "loss": 0.0828, "step": 415500 }, { "epoch": 0.03, "learning_rate": 4.9531742682962124e-05, "loss": 0.0814, "step": 416000 }, { "epoch": 0.03, "learning_rate": 4.9531180762920756e-05, "loss": 0.0836, "step": 416500 }, { "epoch": 0.03, "learning_rate": 4.953061771678712e-05, "loss": 0.0839, "step": 417000 }, { "epoch": 0.03, "learning_rate": 4.9530054670653484e-05, "loss": 0.0861, "step": 417500 }, { "epoch": 0.03, "learning_rate": 4.9529491624519855e-05, "loss": 0.0815, "step": 418000 }, { "epoch": 0.03, "learning_rate": 4.952892857838621e-05, "loss": 0.085, "step": 418500 }, { "epoch": 0.03, "learning_rate": 4.952836553225258e-05, "loss": 0.0815, "step": 419000 }, { "epoch": 0.03, "learning_rate": 4.952780361221121e-05, "loss": 0.0861, "step": 419500 }, { "epoch": 0.03, "learning_rate": 4.952724056607758e-05, "loss": 0.0885, "step": 420000 }, { "epoch": 0.03, "learning_rate": 4.952667751994394e-05, "loss": 0.0846, "step": 420500 }, { "epoch": 0.03, "learning_rate": 4.95261144738103e-05, "loss": 0.0842, "step": 421000 }, { "epoch": 0.03, "learning_rate": 4.952555142767667e-05, "loss": 0.0773, "step": 421500 }, { "epoch": 0.03, "learning_rate": 4.95249895076353e-05, "loss": 0.0821, "step": 422000 }, { "epoch": 0.03, "learning_rate": 4.952442646150167e-05, "loss": 0.0857, "step": 422500 }, { "epoch": 0.03, "learning_rate": 4.952386341536803e-05, "loss": 0.0879, "step": 423000 }, { "epoch": 0.03, "learning_rate": 4.9523300369234395e-05, "loss": 0.0827, "step": 423500 }, { "epoch": 0.03, "learning_rate": 4.952273732310076e-05, "loss": 0.0843, "step": 424000 }, { "epoch": 0.03, "learning_rate": 4.952217540305939e-05, "loss": 0.0787, "step": 424500 }, { "epoch": 0.03, "learning_rate": 4.9521612356925755e-05, "loss": 0.0798, "step": 425000 }, { "epoch": 0.03, "learning_rate": 4.952104931079212e-05, "loss": 0.0787, "step": 425500 }, { "epoch": 0.03, "learning_rate": 4.952048626465849e-05, "loss": 0.0934, "step": 426000 }, { "epoch": 0.03, "learning_rate": 4.951992434461712e-05, "loss": 0.0832, "step": 426500 }, { "epoch": 0.03, "learning_rate": 4.9519361298483486e-05, "loss": 0.0926, "step": 427000 }, { "epoch": 0.03, "learning_rate": 4.951879825234984e-05, "loss": 0.0776, "step": 427500 }, { "epoch": 0.03, "learning_rate": 4.9518235206216214e-05, "loss": 0.0861, "step": 428000 }, { "epoch": 0.03, "learning_rate": 4.951767216008258e-05, "loss": 0.0812, "step": 428500 }, { "epoch": 0.03, "learning_rate": 4.951711024004121e-05, "loss": 0.0814, "step": 429000 }, { "epoch": 0.03, "learning_rate": 4.9516547193907574e-05, "loss": 0.0888, "step": 429500 }, { "epoch": 0.03, "learning_rate": 4.951598414777394e-05, "loss": 0.0859, "step": 430000 }, { "epoch": 0.03, "learning_rate": 4.95154211016403e-05, "loss": 0.0822, "step": 430500 }, { "epoch": 0.03, "learning_rate": 4.9514858055506666e-05, "loss": 0.0845, "step": 431000 }, { "epoch": 0.03, "learning_rate": 4.95142961354653e-05, "loss": 0.08, "step": 431500 }, { "epoch": 0.03, "learning_rate": 4.951373308933166e-05, "loss": 0.0807, "step": 432000 }, { "epoch": 0.03, "learning_rate": 4.9513170043198027e-05, "loss": 0.0846, "step": 432500 }, { "epoch": 0.03, "learning_rate": 4.951260699706439e-05, "loss": 0.0846, "step": 433000 }, { "epoch": 0.03, "learning_rate": 4.951204395093076e-05, "loss": 0.0821, "step": 433500 }, { "epoch": 0.03, "learning_rate": 4.951148090479712e-05, "loss": 0.0828, "step": 434000 }, { "epoch": 0.03, "learning_rate": 4.951091785866348e-05, "loss": 0.0783, "step": 434500 }, { "epoch": 0.03, "learning_rate": 4.9510355938622115e-05, "loss": 0.0792, "step": 435000 }, { "epoch": 0.03, "learning_rate": 4.9509792892488486e-05, "loss": 0.0826, "step": 435500 }, { "epoch": 0.03, "learning_rate": 4.950922984635485e-05, "loss": 0.0776, "step": 436000 }, { "epoch": 0.03, "learning_rate": 4.950866680022121e-05, "loss": 0.0787, "step": 436500 }, { "epoch": 0.03, "learning_rate": 4.9508104880179846e-05, "loss": 0.0844, "step": 437000 }, { "epoch": 0.03, "learning_rate": 4.95075418340462e-05, "loss": 0.0839, "step": 437500 }, { "epoch": 0.03, "learning_rate": 4.9506978787912574e-05, "loss": 0.0855, "step": 438000 }, { "epoch": 0.03, "learning_rate": 4.950641574177894e-05, "loss": 0.0831, "step": 438500 }, { "epoch": 0.03, "learning_rate": 4.95058526956453e-05, "loss": 0.082, "step": 439000 }, { "epoch": 0.03, "learning_rate": 4.9505289649511666e-05, "loss": 0.083, "step": 439500 }, { "epoch": 0.03, "learning_rate": 4.95047277294703e-05, "loss": 0.0824, "step": 440000 }, { "epoch": 0.03, "learning_rate": 4.950416468333666e-05, "loss": 0.081, "step": 440500 }, { "epoch": 0.03, "learning_rate": 4.9503601637203026e-05, "loss": 0.0858, "step": 441000 }, { "epoch": 0.03, "learning_rate": 4.95030385910694e-05, "loss": 0.0784, "step": 441500 }, { "epoch": 0.03, "learning_rate": 4.950247667102802e-05, "loss": 0.0821, "step": 442000 }, { "epoch": 0.03, "learning_rate": 4.950191362489439e-05, "loss": 0.0819, "step": 442500 }, { "epoch": 0.03, "learning_rate": 4.950135057876075e-05, "loss": 0.0819, "step": 443000 }, { "epoch": 0.03, "learning_rate": 4.950078753262712e-05, "loss": 0.0885, "step": 443500 }, { "epoch": 0.03, "learning_rate": 4.9500225612585746e-05, "loss": 0.087, "step": 444000 }, { "epoch": 0.03, "learning_rate": 4.949966256645212e-05, "loss": 0.0809, "step": 444500 }, { "epoch": 0.03, "learning_rate": 4.949909952031848e-05, "loss": 0.0852, "step": 445000 }, { "epoch": 0.03, "learning_rate": 4.9498536474184845e-05, "loss": 0.0781, "step": 445500 }, { "epoch": 0.03, "learning_rate": 4.949797342805121e-05, "loss": 0.0856, "step": 446000 }, { "epoch": 0.03, "learning_rate": 4.949741150800984e-05, "loss": 0.0818, "step": 446500 }, { "epoch": 0.03, "learning_rate": 4.9496848461876205e-05, "loss": 0.0831, "step": 447000 }, { "epoch": 0.03, "learning_rate": 4.949628541574257e-05, "loss": 0.0865, "step": 447500 }, { "epoch": 0.03, "learning_rate": 4.949572236960893e-05, "loss": 0.0783, "step": 448000 }, { "epoch": 0.03, "learning_rate": 4.94951593234753e-05, "loss": 0.0807, "step": 448500 }, { "epoch": 0.03, "learning_rate": 4.949459740343393e-05, "loss": 0.0795, "step": 449000 }, { "epoch": 0.03, "learning_rate": 4.9494034357300293e-05, "loss": 0.0848, "step": 449500 }, { "epoch": 0.03, "learning_rate": 4.9493471311166664e-05, "loss": 0.0791, "step": 450000 }, { "epoch": 0.03, "learning_rate": 4.949290826503302e-05, "loss": 0.0794, "step": 450500 }, { "epoch": 0.03, "learning_rate": 4.949234634499166e-05, "loss": 0.0847, "step": 451000 }, { "epoch": 0.03, "learning_rate": 4.949178329885802e-05, "loss": 0.0834, "step": 451500 }, { "epoch": 0.03, "learning_rate": 4.949122025272439e-05, "loss": 0.0749, "step": 452000 }, { "epoch": 0.03, "learning_rate": 4.949065720659075e-05, "loss": 0.078, "step": 452500 }, { "epoch": 0.03, "learning_rate": 4.9490095286549385e-05, "loss": 0.0826, "step": 453000 }, { "epoch": 0.03, "learning_rate": 4.948953224041575e-05, "loss": 0.0828, "step": 453500 }, { "epoch": 0.03, "learning_rate": 4.9488969194282106e-05, "loss": 0.0805, "step": 454000 }, { "epoch": 0.03, "learning_rate": 4.948840614814848e-05, "loss": 0.0771, "step": 454500 }, { "epoch": 0.03, "learning_rate": 4.948784310201484e-05, "loss": 0.0829, "step": 455000 }, { "epoch": 0.03, "learning_rate": 4.948728118197347e-05, "loss": 0.0819, "step": 455500 }, { "epoch": 0.03, "learning_rate": 4.948671813583984e-05, "loss": 0.078, "step": 456000 }, { "epoch": 0.03, "learning_rate": 4.948615508970621e-05, "loss": 0.0827, "step": 456500 }, { "epoch": 0.03, "learning_rate": 4.9485592043572565e-05, "loss": 0.0769, "step": 457000 }, { "epoch": 0.03, "learning_rate": 4.948502899743893e-05, "loss": 0.0836, "step": 457500 }, { "epoch": 0.03, "learning_rate": 4.948446707739756e-05, "loss": 0.085, "step": 458000 }, { "epoch": 0.03, "learning_rate": 4.9483904031263925e-05, "loss": 0.0839, "step": 458500 }, { "epoch": 0.03, "learning_rate": 4.9483340985130296e-05, "loss": 0.0834, "step": 459000 }, { "epoch": 0.03, "learning_rate": 4.948277793899665e-05, "loss": 0.0803, "step": 459500 }, { "epoch": 0.03, "learning_rate": 4.9482214892863024e-05, "loss": 0.0803, "step": 460000 }, { "epoch": 0.03, "learning_rate": 4.948165297282165e-05, "loss": 0.0826, "step": 460500 }, { "epoch": 0.03, "learning_rate": 4.948108992668802e-05, "loss": 0.083, "step": 461000 }, { "epoch": 0.03, "learning_rate": 4.9480526880554384e-05, "loss": 0.0852, "step": 461500 }, { "epoch": 0.03, "learning_rate": 4.947996383442075e-05, "loss": 0.0856, "step": 462000 }, { "epoch": 0.03, "learning_rate": 4.947940078828711e-05, "loss": 0.0877, "step": 462500 }, { "epoch": 0.03, "learning_rate": 4.9478838868245744e-05, "loss": 0.0792, "step": 463000 }, { "epoch": 0.03, "learning_rate": 4.947827582211211e-05, "loss": 0.085, "step": 463500 }, { "epoch": 0.03, "learning_rate": 4.947771277597847e-05, "loss": 0.0836, "step": 464000 }, { "epoch": 0.03, "learning_rate": 4.9477149729844836e-05, "loss": 0.0816, "step": 464500 }, { "epoch": 0.03, "learning_rate": 4.947658780980347e-05, "loss": 0.0769, "step": 465000 }, { "epoch": 0.03, "learning_rate": 4.947602476366983e-05, "loss": 0.0791, "step": 465500 }, { "epoch": 0.03, "learning_rate": 4.9475461717536196e-05, "loss": 0.0831, "step": 466000 }, { "epoch": 0.03, "learning_rate": 4.947489867140257e-05, "loss": 0.0806, "step": 466500 }, { "epoch": 0.03, "learning_rate": 4.9474335625268924e-05, "loss": 0.0776, "step": 467000 }, { "epoch": 0.03, "learning_rate": 4.947377370522756e-05, "loss": 0.0811, "step": 467500 }, { "epoch": 0.03, "learning_rate": 4.947321065909392e-05, "loss": 0.0794, "step": 468000 }, { "epoch": 0.03, "learning_rate": 4.9472647612960285e-05, "loss": 0.08, "step": 468500 }, { "epoch": 0.03, "learning_rate": 4.9472084566826655e-05, "loss": 0.0796, "step": 469000 }, { "epoch": 0.03, "learning_rate": 4.947152152069301e-05, "loss": 0.085, "step": 469500 }, { "epoch": 0.03, "learning_rate": 4.947095960065165e-05, "loss": 0.081, "step": 470000 }, { "epoch": 0.03, "learning_rate": 4.947039655451801e-05, "loss": 0.0833, "step": 470500 }, { "epoch": 0.03, "learning_rate": 4.946983350838438e-05, "loss": 0.0834, "step": 471000 }, { "epoch": 0.03, "learning_rate": 4.9469270462250744e-05, "loss": 0.0821, "step": 471500 }, { "epoch": 0.03, "learning_rate": 4.9468708542209376e-05, "loss": 0.0759, "step": 472000 }, { "epoch": 0.03, "learning_rate": 4.946814549607574e-05, "loss": 0.0831, "step": 472500 }, { "epoch": 0.03, "learning_rate": 4.9467582449942104e-05, "loss": 0.0752, "step": 473000 }, { "epoch": 0.03, "learning_rate": 4.946701940380847e-05, "loss": 0.0806, "step": 473500 }, { "epoch": 0.03, "learning_rate": 4.946645635767483e-05, "loss": 0.0791, "step": 474000 }, { "epoch": 0.03, "learning_rate": 4.94658933115412e-05, "loss": 0.0797, "step": 474500 }, { "epoch": 0.03, "learning_rate": 4.946533139149983e-05, "loss": 0.0833, "step": 475000 }, { "epoch": 0.03, "learning_rate": 4.94647683453662e-05, "loss": 0.0785, "step": 475500 }, { "epoch": 0.03, "learning_rate": 4.9464205299232556e-05, "loss": 0.0814, "step": 476000 }, { "epoch": 0.03, "learning_rate": 4.946364225309893e-05, "loss": 0.0831, "step": 476500 }, { "epoch": 0.03, "learning_rate": 4.946307920696529e-05, "loss": 0.0828, "step": 477000 }, { "epoch": 0.03, "learning_rate": 4.946251728692392e-05, "loss": 0.0842, "step": 477500 }, { "epoch": 0.03, "learning_rate": 4.946195424079029e-05, "loss": 0.0788, "step": 478000 }, { "epoch": 0.03, "learning_rate": 4.946139119465665e-05, "loss": 0.0785, "step": 478500 }, { "epoch": 0.03, "learning_rate": 4.9460828148523015e-05, "loss": 0.0774, "step": 479000 }, { "epoch": 0.03, "learning_rate": 4.946026622848165e-05, "loss": 0.0768, "step": 479500 }, { "epoch": 0.03, "learning_rate": 4.945970318234801e-05, "loss": 0.0845, "step": 480000 }, { "epoch": 0.03, "learning_rate": 4.9459140136214375e-05, "loss": 0.0827, "step": 480500 }, { "epoch": 0.03, "learning_rate": 4.945857709008074e-05, "loss": 0.0836, "step": 481000 }, { "epoch": 0.03, "learning_rate": 4.94580140439471e-05, "loss": 0.0801, "step": 481500 }, { "epoch": 0.03, "learning_rate": 4.945745099781347e-05, "loss": 0.0787, "step": 482000 }, { "epoch": 0.03, "learning_rate": 4.94568890777721e-05, "loss": 0.077, "step": 482500 }, { "epoch": 0.03, "learning_rate": 4.945632603163847e-05, "loss": 0.0805, "step": 483000 }, { "epoch": 0.03, "learning_rate": 4.945576298550483e-05, "loss": 0.08, "step": 483500 }, { "epoch": 0.03, "learning_rate": 4.945519993937119e-05, "loss": 0.0817, "step": 484000 }, { "epoch": 0.03, "learning_rate": 4.9454638019329824e-05, "loss": 0.0784, "step": 484500 }, { "epoch": 0.03, "learning_rate": 4.945407497319619e-05, "loss": 0.0782, "step": 485000 }, { "epoch": 0.03, "learning_rate": 4.945351192706256e-05, "loss": 0.0793, "step": 485500 }, { "epoch": 0.03, "learning_rate": 4.9452948880928916e-05, "loss": 0.0804, "step": 486000 }, { "epoch": 0.03, "learning_rate": 4.9452385834795286e-05, "loss": 0.0773, "step": 486500 }, { "epoch": 0.03, "learning_rate": 4.945182391475391e-05, "loss": 0.0778, "step": 487000 }, { "epoch": 0.03, "learning_rate": 4.945126086862028e-05, "loss": 0.078, "step": 487500 }, { "epoch": 0.03, "learning_rate": 4.9450697822486647e-05, "loss": 0.0832, "step": 488000 }, { "epoch": 0.03, "learning_rate": 4.945013477635301e-05, "loss": 0.0817, "step": 488500 }, { "epoch": 0.03, "learning_rate": 4.944957285631164e-05, "loss": 0.0811, "step": 489000 }, { "epoch": 0.03, "learning_rate": 4.944900981017801e-05, "loss": 0.0735, "step": 489500 }, { "epoch": 0.03, "learning_rate": 4.944844676404437e-05, "loss": 0.0735, "step": 490000 }, { "epoch": 0.03, "learning_rate": 4.9447883717910735e-05, "loss": 0.0727, "step": 490500 }, { "epoch": 0.03, "learning_rate": 4.944732179786937e-05, "loss": 0.081, "step": 491000 }, { "epoch": 0.03, "learning_rate": 4.944675875173573e-05, "loss": 0.0735, "step": 491500 }, { "epoch": 0.03, "learning_rate": 4.94461957056021e-05, "loss": 0.08, "step": 492000 }, { "epoch": 0.03, "learning_rate": 4.944563265946846e-05, "loss": 0.0773, "step": 492500 }, { "epoch": 0.03, "learning_rate": 4.944506961333483e-05, "loss": 0.0759, "step": 493000 }, { "epoch": 0.03, "learning_rate": 4.9444506567201194e-05, "loss": 0.0834, "step": 493500 }, { "epoch": 0.03, "learning_rate": 4.944394352106755e-05, "loss": 0.0759, "step": 494000 }, { "epoch": 0.03, "learning_rate": 4.944338160102619e-05, "loss": 0.0812, "step": 494500 }, { "epoch": 0.03, "learning_rate": 4.944281855489255e-05, "loss": 0.0808, "step": 495000 }, { "epoch": 0.03, "learning_rate": 4.944225550875892e-05, "loss": 0.0834, "step": 495500 }, { "epoch": 0.03, "learning_rate": 4.944169246262528e-05, "loss": 0.0791, "step": 496000 }, { "epoch": 0.03, "learning_rate": 4.9441129416491646e-05, "loss": 0.0842, "step": 496500 }, { "epoch": 0.03, "learning_rate": 4.944056749645028e-05, "loss": 0.0793, "step": 497000 }, { "epoch": 0.03, "learning_rate": 4.944000445031664e-05, "loss": 0.0787, "step": 497500 }, { "epoch": 0.03, "learning_rate": 4.9439441404183006e-05, "loss": 0.0828, "step": 498000 }, { "epoch": 0.03, "learning_rate": 4.943887948414164e-05, "loss": 0.0816, "step": 498500 }, { "epoch": 0.03, "learning_rate": 4.9438316438008e-05, "loss": 0.0831, "step": 499000 }, { "epoch": 0.03, "learning_rate": 4.9437753391874366e-05, "loss": 0.0814, "step": 499500 }, { "epoch": 0.03, "learning_rate": 4.943719034574073e-05, "loss": 0.0776, "step": 500000 }, { "epoch": 0.03, "learning_rate": 4.9436627299607094e-05, "loss": 0.0812, "step": 500500 }, { "epoch": 0.03, "learning_rate": 4.9436064253473465e-05, "loss": 0.0836, "step": 501000 }, { "epoch": 0.03, "learning_rate": 4.943550120733983e-05, "loss": 0.0811, "step": 501500 }, { "epoch": 0.03, "learning_rate": 4.943493816120619e-05, "loss": 0.0762, "step": 502000 }, { "epoch": 0.03, "learning_rate": 4.943437511507256e-05, "loss": 0.084, "step": 502500 }, { "epoch": 0.03, "learning_rate": 4.943381319503119e-05, "loss": 0.082, "step": 503000 }, { "epoch": 0.03, "learning_rate": 4.943325014889755e-05, "loss": 0.0765, "step": 503500 }, { "epoch": 0.03, "learning_rate": 4.943268710276392e-05, "loss": 0.0734, "step": 504000 }, { "epoch": 0.03, "learning_rate": 4.943212405663028e-05, "loss": 0.0807, "step": 504500 }, { "epoch": 0.03, "learning_rate": 4.9431562136588913e-05, "loss": 0.0801, "step": 505000 }, { "epoch": 0.03, "learning_rate": 4.943099909045528e-05, "loss": 0.0778, "step": 505500 }, { "epoch": 0.03, "learning_rate": 4.943043604432164e-05, "loss": 0.0771, "step": 506000 }, { "epoch": 0.03, "learning_rate": 4.942987299818801e-05, "loss": 0.0798, "step": 506500 }, { "epoch": 0.03, "learning_rate": 4.942931107814664e-05, "loss": 0.0815, "step": 507000 }, { "epoch": 0.03, "learning_rate": 4.942874803201301e-05, "loss": 0.0781, "step": 507500 }, { "epoch": 0.03, "learning_rate": 4.9428184985879366e-05, "loss": 0.0816, "step": 508000 }, { "epoch": 0.03, "learning_rate": 4.942762193974573e-05, "loss": 0.0726, "step": 508500 }, { "epoch": 0.03, "learning_rate": 4.94270588936121e-05, "loss": 0.0795, "step": 509000 }, { "epoch": 0.03, "learning_rate": 4.942649584747846e-05, "loss": 0.0796, "step": 509500 }, { "epoch": 0.03, "learning_rate": 4.9425933927437097e-05, "loss": 0.0742, "step": 510000 }, { "epoch": 0.03, "learning_rate": 4.9425370881303454e-05, "loss": 0.0822, "step": 510500 }, { "epoch": 0.03, "learning_rate": 4.9424807835169825e-05, "loss": 0.075, "step": 511000 }, { "epoch": 0.03, "learning_rate": 4.942424478903619e-05, "loss": 0.0802, "step": 511500 }, { "epoch": 0.03, "learning_rate": 4.942368286899482e-05, "loss": 0.0749, "step": 512000 }, { "epoch": 0.03, "learning_rate": 4.9423119822861185e-05, "loss": 0.0772, "step": 512500 }, { "epoch": 0.03, "learning_rate": 4.942255677672755e-05, "loss": 0.083, "step": 513000 }, { "epoch": 0.03, "learning_rate": 4.942199373059391e-05, "loss": 0.0754, "step": 513500 }, { "epoch": 0.03, "learning_rate": 4.9421431810552545e-05, "loss": 0.0782, "step": 514000 }, { "epoch": 0.03, "learning_rate": 4.942086876441891e-05, "loss": 0.0768, "step": 514500 }, { "epoch": 0.03, "learning_rate": 4.942030571828527e-05, "loss": 0.0791, "step": 515000 }, { "epoch": 0.03, "learning_rate": 4.941974267215164e-05, "loss": 0.0817, "step": 515500 }, { "epoch": 0.03, "learning_rate": 4.9419179626018e-05, "loss": 0.0771, "step": 516000 }, { "epoch": 0.03, "learning_rate": 4.941861770597663e-05, "loss": 0.0779, "step": 516500 }, { "epoch": 0.03, "learning_rate": 4.9418054659843e-05, "loss": 0.0811, "step": 517000 }, { "epoch": 0.03, "learning_rate": 4.941749161370937e-05, "loss": 0.0826, "step": 517500 }, { "epoch": 0.03, "learning_rate": 4.941692856757573e-05, "loss": 0.0814, "step": 518000 }, { "epoch": 0.04, "learning_rate": 4.9416366647534364e-05, "loss": 0.0752, "step": 518500 }, { "epoch": 0.04, "learning_rate": 4.941580360140073e-05, "loss": 0.0706, "step": 519000 }, { "epoch": 0.04, "learning_rate": 4.941524055526709e-05, "loss": 0.0838, "step": 519500 }, { "epoch": 0.04, "learning_rate": 4.9414677509133456e-05, "loss": 0.0782, "step": 520000 }, { "epoch": 0.04, "learning_rate": 4.941411446299982e-05, "loss": 0.0776, "step": 520500 }, { "epoch": 0.04, "learning_rate": 4.941355254295845e-05, "loss": 0.0792, "step": 521000 }, { "epoch": 0.04, "learning_rate": 4.9412989496824816e-05, "loss": 0.0724, "step": 521500 }, { "epoch": 0.04, "learning_rate": 4.941242645069118e-05, "loss": 0.0744, "step": 522000 }, { "epoch": 0.04, "learning_rate": 4.9411863404557544e-05, "loss": 0.0847, "step": 522500 }, { "epoch": 0.04, "learning_rate": 4.9411301484516177e-05, "loss": 0.0799, "step": 523000 }, { "epoch": 0.04, "learning_rate": 4.941073843838254e-05, "loss": 0.0783, "step": 523500 }, { "epoch": 0.04, "learning_rate": 4.941017539224891e-05, "loss": 0.0794, "step": 524000 }, { "epoch": 0.04, "learning_rate": 4.940961234611527e-05, "loss": 0.0775, "step": 524500 }, { "epoch": 0.04, "learning_rate": 4.940904929998163e-05, "loss": 0.0832, "step": 525000 }, { "epoch": 0.04, "learning_rate": 4.9408487379940265e-05, "loss": 0.0772, "step": 525500 }, { "epoch": 0.04, "learning_rate": 4.9407924333806636e-05, "loss": 0.0739, "step": 526000 }, { "epoch": 0.04, "learning_rate": 4.9407361287673e-05, "loss": 0.0786, "step": 526500 }, { "epoch": 0.04, "learning_rate": 4.940679824153936e-05, "loss": 0.0794, "step": 527000 }, { "epoch": 0.04, "learning_rate": 4.940623519540573e-05, "loss": 0.0802, "step": 527500 }, { "epoch": 0.04, "learning_rate": 4.940567327536435e-05, "loss": 0.0774, "step": 528000 }, { "epoch": 0.04, "learning_rate": 4.9405110229230724e-05, "loss": 0.0815, "step": 528500 }, { "epoch": 0.04, "learning_rate": 4.940454718309709e-05, "loss": 0.0843, "step": 529000 }, { "epoch": 0.04, "learning_rate": 4.940398413696345e-05, "loss": 0.0809, "step": 529500 }, { "epoch": 0.04, "learning_rate": 4.9403421090829816e-05, "loss": 0.0741, "step": 530000 }, { "epoch": 0.04, "learning_rate": 4.940285917078845e-05, "loss": 0.0752, "step": 530500 }, { "epoch": 0.04, "learning_rate": 4.940229725074708e-05, "loss": 0.0752, "step": 531000 }, { "epoch": 0.04, "learning_rate": 4.9401734204613444e-05, "loss": 0.0786, "step": 531500 }, { "epoch": 0.04, "learning_rate": 4.940117115847981e-05, "loss": 0.0764, "step": 532000 }, { "epoch": 0.04, "learning_rate": 4.940060811234617e-05, "loss": 0.0784, "step": 532500 }, { "epoch": 0.04, "learning_rate": 4.9400045066212536e-05, "loss": 0.0793, "step": 533000 }, { "epoch": 0.04, "learning_rate": 4.93994820200789e-05, "loss": 0.0799, "step": 533500 }, { "epoch": 0.04, "learning_rate": 4.939891897394527e-05, "loss": 0.0792, "step": 534000 }, { "epoch": 0.04, "learning_rate": 4.9398355927811635e-05, "loss": 0.089, "step": 534500 }, { "epoch": 0.04, "learning_rate": 4.939779400777027e-05, "loss": 0.0719, "step": 535000 }, { "epoch": 0.04, "learning_rate": 4.939723096163663e-05, "loss": 0.0757, "step": 535500 }, { "epoch": 0.04, "learning_rate": 4.9396667915502995e-05, "loss": 0.0779, "step": 536000 }, { "epoch": 0.04, "learning_rate": 4.939610486936936e-05, "loss": 0.0781, "step": 536500 }, { "epoch": 0.04, "learning_rate": 4.939554294932799e-05, "loss": 0.0773, "step": 537000 }, { "epoch": 0.04, "learning_rate": 4.9394979903194355e-05, "loss": 0.079, "step": 537500 }, { "epoch": 0.04, "learning_rate": 4.939441685706072e-05, "loss": 0.0735, "step": 538000 }, { "epoch": 0.04, "learning_rate": 4.939385381092708e-05, "loss": 0.0824, "step": 538500 }, { "epoch": 0.04, "learning_rate": 4.939329076479345e-05, "loss": 0.0763, "step": 539000 }, { "epoch": 0.04, "learning_rate": 4.939272884475208e-05, "loss": 0.0823, "step": 539500 }, { "epoch": 0.04, "learning_rate": 4.9392165798618443e-05, "loss": 0.0756, "step": 540000 }, { "epoch": 0.04, "learning_rate": 4.9391602752484814e-05, "loss": 0.0837, "step": 540500 }, { "epoch": 0.04, "learning_rate": 4.939103970635117e-05, "loss": 0.077, "step": 541000 }, { "epoch": 0.04, "learning_rate": 4.9390476660217535e-05, "loss": 0.0757, "step": 541500 }, { "epoch": 0.04, "learning_rate": 4.938991474017617e-05, "loss": 0.082, "step": 542000 }, { "epoch": 0.04, "learning_rate": 4.938935169404253e-05, "loss": 0.0765, "step": 542500 }, { "epoch": 0.04, "learning_rate": 4.93887886479089e-05, "loss": 0.0748, "step": 543000 }, { "epoch": 0.04, "learning_rate": 4.938822560177526e-05, "loss": 0.0815, "step": 543500 }, { "epoch": 0.04, "learning_rate": 4.938766255564163e-05, "loss": 0.0764, "step": 544000 }, { "epoch": 0.04, "learning_rate": 4.9387100635600256e-05, "loss": 0.0782, "step": 544500 }, { "epoch": 0.04, "learning_rate": 4.938653758946663e-05, "loss": 0.0832, "step": 545000 }, { "epoch": 0.04, "learning_rate": 4.938597454333299e-05, "loss": 0.0765, "step": 545500 }, { "epoch": 0.04, "learning_rate": 4.9385411497199355e-05, "loss": 0.0748, "step": 546000 }, { "epoch": 0.04, "learning_rate": 4.938484845106572e-05, "loss": 0.0769, "step": 546500 }, { "epoch": 0.04, "learning_rate": 4.938428653102435e-05, "loss": 0.0808, "step": 547000 }, { "epoch": 0.04, "learning_rate": 4.9383723484890715e-05, "loss": 0.0773, "step": 547500 }, { "epoch": 0.04, "learning_rate": 4.938316043875708e-05, "loss": 0.0832, "step": 548000 }, { "epoch": 0.04, "learning_rate": 4.938259739262345e-05, "loss": 0.0774, "step": 548500 }, { "epoch": 0.04, "learning_rate": 4.9382035472582075e-05, "loss": 0.0802, "step": 549000 }, { "epoch": 0.04, "learning_rate": 4.9381472426448446e-05, "loss": 0.0692, "step": 549500 }, { "epoch": 0.04, "learning_rate": 4.93809093803148e-05, "loss": 0.0741, "step": 550000 }, { "epoch": 0.04, "learning_rate": 4.9380346334181174e-05, "loss": 0.0715, "step": 550500 }, { "epoch": 0.04, "learning_rate": 4.93797844141398e-05, "loss": 0.0774, "step": 551000 }, { "epoch": 0.04, "learning_rate": 4.937922136800617e-05, "loss": 0.0761, "step": 551500 }, { "epoch": 0.04, "learning_rate": 4.9378658321872534e-05, "loss": 0.0728, "step": 552000 }, { "epoch": 0.04, "learning_rate": 4.93780952757389e-05, "loss": 0.0821, "step": 552500 }, { "epoch": 0.04, "learning_rate": 4.937753335569753e-05, "loss": 0.0749, "step": 553000 }, { "epoch": 0.04, "learning_rate": 4.9376970309563894e-05, "loss": 0.0733, "step": 553500 }, { "epoch": 0.04, "learning_rate": 4.937640726343026e-05, "loss": 0.0725, "step": 554000 }, { "epoch": 0.04, "learning_rate": 4.937584421729662e-05, "loss": 0.0782, "step": 554500 }, { "epoch": 0.04, "learning_rate": 4.9375281171162986e-05, "loss": 0.0751, "step": 555000 }, { "epoch": 0.04, "learning_rate": 4.937471925112162e-05, "loss": 0.0741, "step": 555500 }, { "epoch": 0.04, "learning_rate": 4.937415620498798e-05, "loss": 0.0759, "step": 556000 }, { "epoch": 0.04, "learning_rate": 4.9373593158854346e-05, "loss": 0.0781, "step": 556500 }, { "epoch": 0.04, "learning_rate": 4.937303011272072e-05, "loss": 0.0803, "step": 557000 }, { "epoch": 0.04, "learning_rate": 4.937246819267934e-05, "loss": 0.0765, "step": 557500 }, { "epoch": 0.04, "learning_rate": 4.9371905146545713e-05, "loss": 0.0765, "step": 558000 }, { "epoch": 0.04, "learning_rate": 4.937134210041207e-05, "loss": 0.077, "step": 558500 }, { "epoch": 0.04, "learning_rate": 4.937078018037071e-05, "loss": 0.0752, "step": 559000 }, { "epoch": 0.04, "learning_rate": 4.937021713423707e-05, "loss": 0.0808, "step": 559500 }, { "epoch": 0.04, "learning_rate": 4.936965408810343e-05, "loss": 0.0768, "step": 560000 }, { "epoch": 0.04, "learning_rate": 4.93690910419698e-05, "loss": 0.0789, "step": 560500 }, { "epoch": 0.04, "learning_rate": 4.936852799583616e-05, "loss": 0.0766, "step": 561000 }, { "epoch": 0.04, "learning_rate": 4.936796494970253e-05, "loss": 0.0792, "step": 561500 }, { "epoch": 0.04, "learning_rate": 4.9367401903568894e-05, "loss": 0.0786, "step": 562000 }, { "epoch": 0.04, "learning_rate": 4.936683885743526e-05, "loss": 0.077, "step": 562500 }, { "epoch": 0.04, "learning_rate": 4.936627581130162e-05, "loss": 0.0771, "step": 563000 }, { "epoch": 0.04, "learning_rate": 4.9365713891260254e-05, "loss": 0.071, "step": 563500 }, { "epoch": 0.04, "learning_rate": 4.936515084512662e-05, "loss": 0.0773, "step": 564000 }, { "epoch": 0.04, "learning_rate": 4.936458779899298e-05, "loss": 0.0808, "step": 564500 }, { "epoch": 0.04, "learning_rate": 4.936402475285935e-05, "loss": 0.0767, "step": 565000 }, { "epoch": 0.04, "learning_rate": 4.936346283281798e-05, "loss": 0.0795, "step": 565500 }, { "epoch": 0.04, "learning_rate": 4.936289978668435e-05, "loss": 0.0764, "step": 566000 }, { "epoch": 0.04, "learning_rate": 4.9362336740550706e-05, "loss": 0.0759, "step": 566500 }, { "epoch": 0.04, "learning_rate": 4.936177369441708e-05, "loss": 0.0785, "step": 567000 }, { "epoch": 0.04, "learning_rate": 4.93612117743757e-05, "loss": 0.0787, "step": 567500 }, { "epoch": 0.04, "learning_rate": 4.936064872824207e-05, "loss": 0.0793, "step": 568000 }, { "epoch": 0.04, "learning_rate": 4.936008568210844e-05, "loss": 0.078, "step": 568500 }, { "epoch": 0.04, "learning_rate": 4.9359522635974794e-05, "loss": 0.0713, "step": 569000 }, { "epoch": 0.04, "learning_rate": 4.9358959589841165e-05, "loss": 0.0745, "step": 569500 }, { "epoch": 0.04, "learning_rate": 4.93583976697998e-05, "loss": 0.0767, "step": 570000 }, { "epoch": 0.04, "learning_rate": 4.935783462366616e-05, "loss": 0.0755, "step": 570500 }, { "epoch": 0.04, "learning_rate": 4.9357271577532525e-05, "loss": 0.0809, "step": 571000 }, { "epoch": 0.04, "learning_rate": 4.935670853139889e-05, "loss": 0.0736, "step": 571500 }, { "epoch": 0.04, "learning_rate": 4.935614548526525e-05, "loss": 0.0833, "step": 572000 }, { "epoch": 0.04, "learning_rate": 4.935558243913162e-05, "loss": 0.0732, "step": 572500 }, { "epoch": 0.04, "learning_rate": 4.935502051909025e-05, "loss": 0.0744, "step": 573000 }, { "epoch": 0.04, "learning_rate": 4.935445747295661e-05, "loss": 0.0756, "step": 573500 }, { "epoch": 0.04, "learning_rate": 4.935389442682298e-05, "loss": 0.0759, "step": 574000 }, { "epoch": 0.04, "learning_rate": 4.935333138068934e-05, "loss": 0.0772, "step": 574500 }, { "epoch": 0.04, "learning_rate": 4.935276833455571e-05, "loss": 0.08, "step": 575000 }, { "epoch": 0.04, "learning_rate": 4.935220641451434e-05, "loss": 0.0767, "step": 575500 }, { "epoch": 0.04, "learning_rate": 4.935164336838071e-05, "loss": 0.0799, "step": 576000 }, { "epoch": 0.04, "learning_rate": 4.9351080322247066e-05, "loss": 0.0731, "step": 576500 }, { "epoch": 0.04, "learning_rate": 4.9350517276113436e-05, "loss": 0.0813, "step": 577000 }, { "epoch": 0.04, "learning_rate": 4.93499542299798e-05, "loss": 0.0782, "step": 577500 }, { "epoch": 0.04, "learning_rate": 4.934939230993843e-05, "loss": 0.0765, "step": 578000 }, { "epoch": 0.04, "learning_rate": 4.9348829263804797e-05, "loss": 0.078, "step": 578500 }, { "epoch": 0.04, "learning_rate": 4.934826621767116e-05, "loss": 0.0749, "step": 579000 }, { "epoch": 0.04, "learning_rate": 4.9347703171537525e-05, "loss": 0.0766, "step": 579500 }, { "epoch": 0.04, "learning_rate": 4.934714012540389e-05, "loss": 0.0787, "step": 580000 }, { "epoch": 0.04, "learning_rate": 4.934657820536252e-05, "loss": 0.08, "step": 580500 }, { "epoch": 0.04, "learning_rate": 4.9346015159228885e-05, "loss": 0.0728, "step": 581000 }, { "epoch": 0.04, "learning_rate": 4.9345452113095255e-05, "loss": 0.0807, "step": 581500 }, { "epoch": 0.04, "learning_rate": 4.934488906696161e-05, "loss": 0.077, "step": 582000 }, { "epoch": 0.04, "learning_rate": 4.934432714692025e-05, "loss": 0.0756, "step": 582500 }, { "epoch": 0.04, "learning_rate": 4.934376410078661e-05, "loss": 0.0781, "step": 583000 }, { "epoch": 0.04, "learning_rate": 4.934320105465298e-05, "loss": 0.0743, "step": 583500 }, { "epoch": 0.04, "learning_rate": 4.9342639134611605e-05, "loss": 0.0743, "step": 584000 }, { "epoch": 0.04, "learning_rate": 4.9342076088477976e-05, "loss": 0.0749, "step": 584500 }, { "epoch": 0.04, "learning_rate": 4.934151304234434e-05, "loss": 0.0765, "step": 585000 }, { "epoch": 0.04, "learning_rate": 4.93409499962107e-05, "loss": 0.0741, "step": 585500 }, { "epoch": 0.04, "learning_rate": 4.934038695007707e-05, "loss": 0.0797, "step": 586000 }, { "epoch": 0.04, "learning_rate": 4.933982390394343e-05, "loss": 0.0747, "step": 586500 }, { "epoch": 0.04, "learning_rate": 4.9339260857809796e-05, "loss": 0.0744, "step": 587000 }, { "epoch": 0.04, "learning_rate": 4.933869781167616e-05, "loss": 0.0761, "step": 587500 }, { "epoch": 0.04, "learning_rate": 4.9338134765542524e-05, "loss": 0.0792, "step": 588000 }, { "epoch": 0.04, "learning_rate": 4.9337572845501156e-05, "loss": 0.0776, "step": 588500 }, { "epoch": 0.04, "learning_rate": 4.933700979936752e-05, "loss": 0.0763, "step": 589000 }, { "epoch": 0.04, "learning_rate": 4.9336446753233884e-05, "loss": 0.0763, "step": 589500 }, { "epoch": 0.04, "learning_rate": 4.933588370710025e-05, "loss": 0.0823, "step": 590000 }, { "epoch": 0.04, "learning_rate": 4.933532066096662e-05, "loss": 0.0728, "step": 590500 }, { "epoch": 0.04, "learning_rate": 4.9334758740925244e-05, "loss": 0.0802, "step": 591000 }, { "epoch": 0.04, "learning_rate": 4.9334195694791615e-05, "loss": 0.0775, "step": 591500 }, { "epoch": 0.04, "learning_rate": 4.933363264865797e-05, "loss": 0.0727, "step": 592000 }, { "epoch": 0.04, "learning_rate": 4.933306960252434e-05, "loss": 0.0737, "step": 592500 }, { "epoch": 0.04, "learning_rate": 4.933250768248297e-05, "loss": 0.0784, "step": 593000 }, { "epoch": 0.04, "learning_rate": 4.933194463634934e-05, "loss": 0.0719, "step": 593500 }, { "epoch": 0.04, "learning_rate": 4.93313815902157e-05, "loss": 0.0749, "step": 594000 }, { "epoch": 0.04, "learning_rate": 4.933081854408207e-05, "loss": 0.0774, "step": 594500 }, { "epoch": 0.04, "learning_rate": 4.93302566240407e-05, "loss": 0.0706, "step": 595000 }, { "epoch": 0.04, "learning_rate": 4.9329693577907063e-05, "loss": 0.0748, "step": 595500 }, { "epoch": 0.04, "learning_rate": 4.932913053177343e-05, "loss": 0.0692, "step": 596000 }, { "epoch": 0.04, "learning_rate": 4.932856748563979e-05, "loss": 0.0744, "step": 596500 }, { "epoch": 0.04, "learning_rate": 4.932800443950616e-05, "loss": 0.0719, "step": 597000 }, { "epoch": 0.04, "learning_rate": 4.932744139337252e-05, "loss": 0.0796, "step": 597500 }, { "epoch": 0.04, "learning_rate": 4.932687947333116e-05, "loss": 0.0707, "step": 598000 }, { "epoch": 0.04, "learning_rate": 4.9326316427197516e-05, "loss": 0.0722, "step": 598500 }, { "epoch": 0.04, "learning_rate": 4.932575338106388e-05, "loss": 0.0759, "step": 599000 }, { "epoch": 0.04, "learning_rate": 4.932519033493025e-05, "loss": 0.0703, "step": 599500 }, { "epoch": 0.04, "learning_rate": 4.932462841488888e-05, "loss": 0.0734, "step": 600000 }, { "epoch": 0.04, "learning_rate": 4.9324065368755247e-05, "loss": 0.0743, "step": 600500 }, { "epoch": 0.04, "learning_rate": 4.9323502322621604e-05, "loss": 0.0671, "step": 601000 }, { "epoch": 0.04, "learning_rate": 4.9322939276487975e-05, "loss": 0.0817, "step": 601500 }, { "epoch": 0.04, "learning_rate": 4.93223773564466e-05, "loss": 0.0802, "step": 602000 }, { "epoch": 0.04, "learning_rate": 4.932181431031297e-05, "loss": 0.0766, "step": 602500 }, { "epoch": 0.04, "learning_rate": 4.9321251264179335e-05, "loss": 0.0773, "step": 603000 }, { "epoch": 0.04, "learning_rate": 4.93206882180457e-05, "loss": 0.0725, "step": 603500 }, { "epoch": 0.04, "learning_rate": 4.932012517191206e-05, "loss": 0.0734, "step": 604000 }, { "epoch": 0.04, "learning_rate": 4.9319563251870695e-05, "loss": 0.0747, "step": 604500 }, { "epoch": 0.04, "learning_rate": 4.931900020573706e-05, "loss": 0.0736, "step": 605000 }, { "epoch": 0.04, "learning_rate": 4.931843715960342e-05, "loss": 0.0732, "step": 605500 }, { "epoch": 0.04, "learning_rate": 4.931787411346979e-05, "loss": 0.0779, "step": 606000 }, { "epoch": 0.04, "learning_rate": 4.931731106733615e-05, "loss": 0.0753, "step": 606500 }, { "epoch": 0.04, "learning_rate": 4.931674914729478e-05, "loss": 0.0712, "step": 607000 }, { "epoch": 0.04, "learning_rate": 4.931618610116115e-05, "loss": 0.07, "step": 607500 }, { "epoch": 0.04, "learning_rate": 4.931562305502752e-05, "loss": 0.0747, "step": 608000 }, { "epoch": 0.04, "learning_rate": 4.9315060008893875e-05, "loss": 0.0773, "step": 608500 }, { "epoch": 0.04, "learning_rate": 4.9314498088852514e-05, "loss": 0.073, "step": 609000 }, { "epoch": 0.04, "learning_rate": 4.931393504271887e-05, "loss": 0.0769, "step": 609500 }, { "epoch": 0.04, "learning_rate": 4.931337199658524e-05, "loss": 0.0751, "step": 610000 }, { "epoch": 0.04, "learning_rate": 4.9312808950451606e-05, "loss": 0.0756, "step": 610500 }, { "epoch": 0.04, "learning_rate": 4.931224590431797e-05, "loss": 0.0719, "step": 611000 }, { "epoch": 0.04, "learning_rate": 4.93116839842766e-05, "loss": 0.0773, "step": 611500 }, { "epoch": 0.04, "learning_rate": 4.9311120938142966e-05, "loss": 0.075, "step": 612000 }, { "epoch": 0.04, "learning_rate": 4.931055789200933e-05, "loss": 0.071, "step": 612500 }, { "epoch": 0.04, "learning_rate": 4.9309994845875694e-05, "loss": 0.0734, "step": 613000 }, { "epoch": 0.04, "learning_rate": 4.9309432925834327e-05, "loss": 0.0799, "step": 613500 }, { "epoch": 0.04, "learning_rate": 4.930886987970069e-05, "loss": 0.0751, "step": 614000 }, { "epoch": 0.04, "learning_rate": 4.930830683356706e-05, "loss": 0.0686, "step": 614500 }, { "epoch": 0.04, "learning_rate": 4.930774378743342e-05, "loss": 0.0769, "step": 615000 }, { "epoch": 0.04, "learning_rate": 4.930718074129978e-05, "loss": 0.0773, "step": 615500 }, { "epoch": 0.04, "learning_rate": 4.9306618821258415e-05, "loss": 0.0765, "step": 616000 }, { "epoch": 0.04, "learning_rate": 4.930605577512478e-05, "loss": 0.0733, "step": 616500 }, { "epoch": 0.04, "learning_rate": 4.930549272899115e-05, "loss": 0.0741, "step": 617000 }, { "epoch": 0.04, "learning_rate": 4.930492968285751e-05, "loss": 0.0739, "step": 617500 }, { "epoch": 0.04, "learning_rate": 4.9304367762816146e-05, "loss": 0.0725, "step": 618000 }, { "epoch": 0.04, "learning_rate": 4.93038047166825e-05, "loss": 0.0677, "step": 618500 }, { "epoch": 0.04, "learning_rate": 4.9303241670548874e-05, "loss": 0.0736, "step": 619000 }, { "epoch": 0.04, "learning_rate": 4.930267862441524e-05, "loss": 0.0741, "step": 619500 }, { "epoch": 0.04, "learning_rate": 4.930211670437387e-05, "loss": 0.0717, "step": 620000 }, { "epoch": 0.04, "learning_rate": 4.9301553658240234e-05, "loss": 0.0726, "step": 620500 }, { "epoch": 0.04, "learning_rate": 4.93009906121066e-05, "loss": 0.0739, "step": 621000 }, { "epoch": 0.04, "learning_rate": 4.930042756597296e-05, "loss": 0.0736, "step": 621500 }, { "epoch": 0.04, "learning_rate": 4.9299864519839326e-05, "loss": 0.0738, "step": 622000 }, { "epoch": 0.04, "learning_rate": 4.929930259979796e-05, "loss": 0.0706, "step": 622500 }, { "epoch": 0.04, "learning_rate": 4.929873955366432e-05, "loss": 0.0725, "step": 623000 }, { "epoch": 0.04, "learning_rate": 4.9298176507530686e-05, "loss": 0.0699, "step": 623500 }, { "epoch": 0.04, "learning_rate": 4.929761346139705e-05, "loss": 0.0727, "step": 624000 }, { "epoch": 0.04, "learning_rate": 4.929705154135568e-05, "loss": 0.0732, "step": 624500 }, { "epoch": 0.04, "learning_rate": 4.9296488495222046e-05, "loss": 0.0794, "step": 625000 }, { "epoch": 0.04, "learning_rate": 4.929592544908842e-05, "loss": 0.0727, "step": 625500 }, { "epoch": 0.04, "learning_rate": 4.929536240295478e-05, "loss": 0.0748, "step": 626000 }, { "epoch": 0.04, "learning_rate": 4.9294799356821145e-05, "loss": 0.0693, "step": 626500 }, { "epoch": 0.04, "learning_rate": 4.929423743677977e-05, "loss": 0.0743, "step": 627000 }, { "epoch": 0.04, "learning_rate": 4.929367439064614e-05, "loss": 0.07, "step": 627500 }, { "epoch": 0.04, "learning_rate": 4.9293111344512505e-05, "loss": 0.0777, "step": 628000 }, { "epoch": 0.04, "learning_rate": 4.929254829837887e-05, "loss": 0.0705, "step": 628500 }, { "epoch": 0.04, "learning_rate": 4.929198525224523e-05, "loss": 0.0764, "step": 629000 }, { "epoch": 0.04, "learning_rate": 4.9291424458296134e-05, "loss": 0.0756, "step": 629500 }, { "epoch": 0.04, "learning_rate": 4.92908614121625e-05, "loss": 0.0721, "step": 630000 }, { "epoch": 0.04, "learning_rate": 4.929029836602886e-05, "loss": 0.0731, "step": 630500 }, { "epoch": 0.04, "learning_rate": 4.9289735319895226e-05, "loss": 0.0729, "step": 631000 }, { "epoch": 0.04, "learning_rate": 4.928917227376159e-05, "loss": 0.0732, "step": 631500 }, { "epoch": 0.04, "learning_rate": 4.928860922762796e-05, "loss": 0.072, "step": 632000 }, { "epoch": 0.04, "learning_rate": 4.928804618149432e-05, "loss": 0.0757, "step": 632500 }, { "epoch": 0.04, "learning_rate": 4.928748313536068e-05, "loss": 0.0742, "step": 633000 }, { "epoch": 0.04, "learning_rate": 4.928692008922705e-05, "loss": 0.0737, "step": 633500 }, { "epoch": 0.04, "learning_rate": 4.928635704309341e-05, "loss": 0.0779, "step": 634000 }, { "epoch": 0.04, "learning_rate": 4.928579512305205e-05, "loss": 0.071, "step": 634500 }, { "epoch": 0.04, "learning_rate": 4.9285232076918406e-05, "loss": 0.0733, "step": 635000 }, { "epoch": 0.04, "learning_rate": 4.928466903078478e-05, "loss": 0.0728, "step": 635500 }, { "epoch": 0.04, "learning_rate": 4.928410598465114e-05, "loss": 0.071, "step": 636000 }, { "epoch": 0.04, "learning_rate": 4.9283542938517505e-05, "loss": 0.0693, "step": 636500 }, { "epoch": 0.04, "learning_rate": 4.928298101847614e-05, "loss": 0.0774, "step": 637000 }, { "epoch": 0.04, "learning_rate": 4.92824179723425e-05, "loss": 0.073, "step": 637500 }, { "epoch": 0.04, "learning_rate": 4.9281854926208865e-05, "loss": 0.0737, "step": 638000 }, { "epoch": 0.04, "learning_rate": 4.928129188007523e-05, "loss": 0.0683, "step": 638500 }, { "epoch": 0.04, "learning_rate": 4.928072883394159e-05, "loss": 0.0715, "step": 639000 }, { "epoch": 0.04, "learning_rate": 4.9280166913900225e-05, "loss": 0.0697, "step": 639500 }, { "epoch": 0.04, "learning_rate": 4.927960386776659e-05, "loss": 0.0712, "step": 640000 }, { "epoch": 0.04, "learning_rate": 4.927904082163295e-05, "loss": 0.0688, "step": 640500 }, { "epoch": 0.04, "learning_rate": 4.9278477775499324e-05, "loss": 0.0787, "step": 641000 }, { "epoch": 0.04, "learning_rate": 4.927791585545795e-05, "loss": 0.0714, "step": 641500 }, { "epoch": 0.04, "learning_rate": 4.927735280932432e-05, "loss": 0.0749, "step": 642000 }, { "epoch": 0.04, "learning_rate": 4.9276789763190684e-05, "loss": 0.0751, "step": 642500 }, { "epoch": 0.04, "learning_rate": 4.927622671705704e-05, "loss": 0.0717, "step": 643000 }, { "epoch": 0.04, "learning_rate": 4.927566479701568e-05, "loss": 0.0689, "step": 643500 }, { "epoch": 0.04, "learning_rate": 4.9275101750882044e-05, "loss": 0.0739, "step": 644000 }, { "epoch": 0.04, "learning_rate": 4.927453870474841e-05, "loss": 0.0733, "step": 644500 }, { "epoch": 0.04, "learning_rate": 4.927397565861477e-05, "loss": 0.0696, "step": 645000 }, { "epoch": 0.04, "learning_rate": 4.9273412612481136e-05, "loss": 0.0765, "step": 645500 }, { "epoch": 0.04, "learning_rate": 4.927285069243977e-05, "loss": 0.0727, "step": 646000 }, { "epoch": 0.04, "learning_rate": 4.927228764630613e-05, "loss": 0.0748, "step": 646500 }, { "epoch": 0.04, "learning_rate": 4.9271724600172496e-05, "loss": 0.0717, "step": 647000 }, { "epoch": 0.04, "learning_rate": 4.927116155403886e-05, "loss": 0.0758, "step": 647500 }, { "epoch": 0.04, "learning_rate": 4.9270598507905224e-05, "loss": 0.0696, "step": 648000 }, { "epoch": 0.04, "learning_rate": 4.9270036587863863e-05, "loss": 0.0711, "step": 648500 }, { "epoch": 0.04, "learning_rate": 4.926947466782249e-05, "loss": 0.072, "step": 649000 }, { "epoch": 0.04, "learning_rate": 4.926891162168886e-05, "loss": 0.0708, "step": 649500 }, { "epoch": 0.04, "learning_rate": 4.926834857555522e-05, "loss": 0.0698, "step": 650000 }, { "epoch": 0.04, "learning_rate": 4.926778552942158e-05, "loss": 0.0709, "step": 650500 }, { "epoch": 0.04, "learning_rate": 4.926722248328795e-05, "loss": 0.0701, "step": 651000 }, { "epoch": 0.04, "learning_rate": 4.926665943715431e-05, "loss": 0.0703, "step": 651500 }, { "epoch": 0.04, "learning_rate": 4.926609639102068e-05, "loss": 0.0753, "step": 652000 }, { "epoch": 0.04, "learning_rate": 4.9265533344887044e-05, "loss": 0.0714, "step": 652500 }, { "epoch": 0.04, "learning_rate": 4.926497029875341e-05, "loss": 0.0708, "step": 653000 }, { "epoch": 0.04, "learning_rate": 4.926440837871204e-05, "loss": 0.0758, "step": 653500 }, { "epoch": 0.04, "learning_rate": 4.9263845332578404e-05, "loss": 0.0689, "step": 654000 }, { "epoch": 0.04, "learning_rate": 4.9263283412537036e-05, "loss": 0.0727, "step": 654500 }, { "epoch": 0.04, "learning_rate": 4.92627203664034e-05, "loss": 0.0741, "step": 655000 }, { "epoch": 0.04, "learning_rate": 4.9262157320269764e-05, "loss": 0.0743, "step": 655500 }, { "epoch": 0.04, "learning_rate": 4.926159427413613e-05, "loss": 0.0738, "step": 656000 }, { "epoch": 0.04, "learning_rate": 4.926103122800249e-05, "loss": 0.0747, "step": 656500 }, { "epoch": 0.04, "learning_rate": 4.9260468181868856e-05, "loss": 0.0713, "step": 657000 }, { "epoch": 0.04, "learning_rate": 4.925990513573523e-05, "loss": 0.0724, "step": 657500 }, { "epoch": 0.04, "learning_rate": 4.925934208960159e-05, "loss": 0.0692, "step": 658000 }, { "epoch": 0.04, "learning_rate": 4.925877904346795e-05, "loss": 0.0715, "step": 658500 }, { "epoch": 0.04, "learning_rate": 4.925821599733432e-05, "loss": 0.0735, "step": 659000 }, { "epoch": 0.04, "learning_rate": 4.9257654077292944e-05, "loss": 0.0741, "step": 659500 }, { "epoch": 0.04, "learning_rate": 4.9257091031159315e-05, "loss": 0.0703, "step": 660000 }, { "epoch": 0.04, "learning_rate": 4.925652798502568e-05, "loss": 0.0735, "step": 660500 }, { "epoch": 0.04, "learning_rate": 4.925596493889204e-05, "loss": 0.071, "step": 661000 }, { "epoch": 0.04, "learning_rate": 4.925540189275841e-05, "loss": 0.0721, "step": 661500 }, { "epoch": 0.04, "learning_rate": 4.925483997271704e-05, "loss": 0.0721, "step": 662000 }, { "epoch": 0.04, "learning_rate": 4.92542769265834e-05, "loss": 0.0745, "step": 662500 }, { "epoch": 0.04, "learning_rate": 4.925371388044977e-05, "loss": 0.0729, "step": 663000 }, { "epoch": 0.04, "learning_rate": 4.925315083431613e-05, "loss": 0.0747, "step": 663500 }, { "epoch": 0.04, "learning_rate": 4.9252587788182495e-05, "loss": 0.0761, "step": 664000 }, { "epoch": 0.04, "learning_rate": 4.9252024742048866e-05, "loss": 0.0713, "step": 664500 }, { "epoch": 0.04, "learning_rate": 4.925146282200749e-05, "loss": 0.0713, "step": 665000 }, { "epoch": 0.04, "learning_rate": 4.925089977587386e-05, "loss": 0.0744, "step": 665500 }, { "epoch": 0.04, "learning_rate": 4.925033672974022e-05, "loss": 0.0715, "step": 666000 }, { "epoch": 0.05, "learning_rate": 4.924977368360659e-05, "loss": 0.0669, "step": 666500 }, { "epoch": 0.05, "learning_rate": 4.9249210637472954e-05, "loss": 0.0776, "step": 667000 }, { "epoch": 0.05, "learning_rate": 4.9248648717431586e-05, "loss": 0.0718, "step": 667500 }, { "epoch": 0.05, "learning_rate": 4.924808567129795e-05, "loss": 0.068, "step": 668000 }, { "epoch": 0.05, "learning_rate": 4.924752262516431e-05, "loss": 0.0716, "step": 668500 }, { "epoch": 0.05, "learning_rate": 4.924695957903068e-05, "loss": 0.078, "step": 669000 }, { "epoch": 0.05, "learning_rate": 4.924639765898931e-05, "loss": 0.0767, "step": 669500 }, { "epoch": 0.05, "learning_rate": 4.9245834612855675e-05, "loss": 0.0668, "step": 670000 }, { "epoch": 0.05, "learning_rate": 4.924527156672204e-05, "loss": 0.0723, "step": 670500 }, { "epoch": 0.05, "learning_rate": 4.924470852058841e-05, "loss": 0.0722, "step": 671000 }, { "epoch": 0.05, "learning_rate": 4.9244145474454767e-05, "loss": 0.0732, "step": 671500 }, { "epoch": 0.05, "learning_rate": 4.9243583554413405e-05, "loss": 0.0721, "step": 672000 }, { "epoch": 0.05, "learning_rate": 4.924302050827976e-05, "loss": 0.0732, "step": 672500 }, { "epoch": 0.05, "learning_rate": 4.924245746214613e-05, "loss": 0.0735, "step": 673000 }, { "epoch": 0.05, "learning_rate": 4.92418944160125e-05, "loss": 0.0702, "step": 673500 }, { "epoch": 0.05, "learning_rate": 4.924133249597113e-05, "loss": 0.0751, "step": 674000 }, { "epoch": 0.05, "learning_rate": 4.9240769449837494e-05, "loss": 0.0701, "step": 674500 }, { "epoch": 0.05, "learning_rate": 4.924020640370385e-05, "loss": 0.071, "step": 675000 }, { "epoch": 0.05, "learning_rate": 4.923964335757022e-05, "loss": 0.0696, "step": 675500 }, { "epoch": 0.05, "learning_rate": 4.9239080311436586e-05, "loss": 0.0709, "step": 676000 }, { "epoch": 0.05, "learning_rate": 4.923851726530295e-05, "loss": 0.0726, "step": 676500 }, { "epoch": 0.05, "learning_rate": 4.923795534526158e-05, "loss": 0.0744, "step": 677000 }, { "epoch": 0.05, "learning_rate": 4.9237392299127946e-05, "loss": 0.0741, "step": 677500 }, { "epoch": 0.05, "learning_rate": 4.923682925299431e-05, "loss": 0.0743, "step": 678000 }, { "epoch": 0.05, "learning_rate": 4.9236266206860674e-05, "loss": 0.0718, "step": 678500 }, { "epoch": 0.05, "learning_rate": 4.923570316072704e-05, "loss": 0.0746, "step": 679000 }, { "epoch": 0.05, "learning_rate": 4.923514124068567e-05, "loss": 0.0674, "step": 679500 }, { "epoch": 0.05, "learning_rate": 4.9234578194552034e-05, "loss": 0.0696, "step": 680000 }, { "epoch": 0.05, "learning_rate": 4.92340151484184e-05, "loss": 0.0695, "step": 680500 }, { "epoch": 0.05, "learning_rate": 4.923345210228477e-05, "loss": 0.0739, "step": 681000 }, { "epoch": 0.05, "learning_rate": 4.9232890182243394e-05, "loss": 0.0704, "step": 681500 }, { "epoch": 0.05, "learning_rate": 4.9232327136109765e-05, "loss": 0.0761, "step": 682000 }, { "epoch": 0.05, "learning_rate": 4.923176408997612e-05, "loss": 0.0721, "step": 682500 }, { "epoch": 0.05, "learning_rate": 4.923120104384249e-05, "loss": 0.0711, "step": 683000 }, { "epoch": 0.05, "learning_rate": 4.923063799770886e-05, "loss": 0.0726, "step": 683500 }, { "epoch": 0.05, "learning_rate": 4.9230074951575214e-05, "loss": 0.0716, "step": 684000 }, { "epoch": 0.05, "learning_rate": 4.922951303153385e-05, "loss": 0.071, "step": 684500 }, { "epoch": 0.05, "learning_rate": 4.922894998540021e-05, "loss": 0.0694, "step": 685000 }, { "epoch": 0.05, "learning_rate": 4.922838693926658e-05, "loss": 0.0743, "step": 685500 }, { "epoch": 0.05, "learning_rate": 4.9227823893132945e-05, "loss": 0.0683, "step": 686000 }, { "epoch": 0.05, "learning_rate": 4.922726197309158e-05, "loss": 0.0702, "step": 686500 }, { "epoch": 0.05, "learning_rate": 4.922669892695794e-05, "loss": 0.0696, "step": 687000 }, { "epoch": 0.05, "learning_rate": 4.922613588082431e-05, "loss": 0.0669, "step": 687500 }, { "epoch": 0.05, "learning_rate": 4.922557283469067e-05, "loss": 0.0762, "step": 688000 }, { "epoch": 0.05, "learning_rate": 4.922501091464931e-05, "loss": 0.0684, "step": 688500 }, { "epoch": 0.05, "learning_rate": 4.9224447868515666e-05, "loss": 0.0709, "step": 689000 }, { "epoch": 0.05, "learning_rate": 4.922388482238203e-05, "loss": 0.0705, "step": 689500 }, { "epoch": 0.05, "learning_rate": 4.92233217762484e-05, "loss": 0.0678, "step": 690000 }, { "epoch": 0.05, "learning_rate": 4.922275873011476e-05, "loss": 0.0711, "step": 690500 }, { "epoch": 0.05, "learning_rate": 4.922219568398113e-05, "loss": 0.0705, "step": 691000 }, { "epoch": 0.05, "learning_rate": 4.922163263784749e-05, "loss": 0.0696, "step": 691500 }, { "epoch": 0.05, "learning_rate": 4.9221070717806125e-05, "loss": 0.0733, "step": 692000 }, { "epoch": 0.05, "learning_rate": 4.922050767167249e-05, "loss": 0.0725, "step": 692500 }, { "epoch": 0.05, "learning_rate": 4.921994462553885e-05, "loss": 0.0689, "step": 693000 }, { "epoch": 0.05, "learning_rate": 4.9219381579405217e-05, "loss": 0.0679, "step": 693500 }, { "epoch": 0.05, "learning_rate": 4.921881853327158e-05, "loss": 0.0699, "step": 694000 }, { "epoch": 0.05, "learning_rate": 4.921825661323021e-05, "loss": 0.0742, "step": 694500 }, { "epoch": 0.05, "learning_rate": 4.921769356709658e-05, "loss": 0.0657, "step": 695000 }, { "epoch": 0.05, "learning_rate": 4.921713052096294e-05, "loss": 0.0693, "step": 695500 }, { "epoch": 0.05, "learning_rate": 4.9216567474829305e-05, "loss": 0.0665, "step": 696000 }, { "epoch": 0.05, "learning_rate": 4.9216004428695676e-05, "loss": 0.0729, "step": 696500 }, { "epoch": 0.05, "learning_rate": 4.92154425086543e-05, "loss": 0.071, "step": 697000 }, { "epoch": 0.05, "learning_rate": 4.921487946252067e-05, "loss": 0.0698, "step": 697500 }, { "epoch": 0.05, "learning_rate": 4.921431641638703e-05, "loss": 0.0718, "step": 698000 }, { "epoch": 0.05, "learning_rate": 4.921375337025339e-05, "loss": 0.0713, "step": 698500 }, { "epoch": 0.05, "learning_rate": 4.9213191450212025e-05, "loss": 0.0705, "step": 699000 }, { "epoch": 0.05, "learning_rate": 4.921262840407839e-05, "loss": 0.069, "step": 699500 }, { "epoch": 0.05, "learning_rate": 4.921206535794476e-05, "loss": 0.0721, "step": 700000 }, { "epoch": 0.05, "learning_rate": 4.9211502311811124e-05, "loss": 0.0703, "step": 700500 }, { "epoch": 0.05, "learning_rate": 4.921093926567749e-05, "loss": 0.0677, "step": 701000 }, { "epoch": 0.05, "learning_rate": 4.921037734563612e-05, "loss": 0.0674, "step": 701500 }, { "epoch": 0.05, "learning_rate": 4.9209814299502484e-05, "loss": 0.0765, "step": 702000 }, { "epoch": 0.05, "learning_rate": 4.920925237946111e-05, "loss": 0.0702, "step": 702500 }, { "epoch": 0.05, "learning_rate": 4.920868933332748e-05, "loss": 0.0713, "step": 703000 }, { "epoch": 0.05, "learning_rate": 4.9208126287193844e-05, "loss": 0.0722, "step": 703500 }, { "epoch": 0.05, "learning_rate": 4.920756324106021e-05, "loss": 0.0711, "step": 704000 }, { "epoch": 0.05, "learning_rate": 4.920700019492657e-05, "loss": 0.0661, "step": 704500 }, { "epoch": 0.05, "learning_rate": 4.9206437148792936e-05, "loss": 0.0711, "step": 705000 }, { "epoch": 0.05, "learning_rate": 4.920587410265931e-05, "loss": 0.0705, "step": 705500 }, { "epoch": 0.05, "learning_rate": 4.9205311056525664e-05, "loss": 0.0753, "step": 706000 }, { "epoch": 0.05, "learning_rate": 4.9204748010392035e-05, "loss": 0.0708, "step": 706500 }, { "epoch": 0.05, "learning_rate": 4.920418609035066e-05, "loss": 0.0713, "step": 707000 }, { "epoch": 0.05, "learning_rate": 4.920362304421703e-05, "loss": 0.0704, "step": 707500 }, { "epoch": 0.05, "learning_rate": 4.9203059998083395e-05, "loss": 0.0739, "step": 708000 }, { "epoch": 0.05, "learning_rate": 4.920249695194975e-05, "loss": 0.0727, "step": 708500 }, { "epoch": 0.05, "learning_rate": 4.920193390581612e-05, "loss": 0.071, "step": 709000 }, { "epoch": 0.05, "learning_rate": 4.9201371985774756e-05, "loss": 0.0679, "step": 709500 }, { "epoch": 0.05, "learning_rate": 4.920080893964112e-05, "loss": 0.0641, "step": 710000 }, { "epoch": 0.05, "learning_rate": 4.9200245893507484e-05, "loss": 0.0706, "step": 710500 }, { "epoch": 0.05, "learning_rate": 4.919968284737385e-05, "loss": 0.0653, "step": 711000 }, { "epoch": 0.05, "learning_rate": 4.919911980124021e-05, "loss": 0.0651, "step": 711500 }, { "epoch": 0.05, "learning_rate": 4.9198557881198844e-05, "loss": 0.0728, "step": 712000 }, { "epoch": 0.05, "learning_rate": 4.919799483506521e-05, "loss": 0.0709, "step": 712500 }, { "epoch": 0.05, "learning_rate": 4.919743178893157e-05, "loss": 0.0705, "step": 713000 }, { "epoch": 0.05, "learning_rate": 4.9196868742797936e-05, "loss": 0.0683, "step": 713500 }, { "epoch": 0.05, "learning_rate": 4.9196306822756575e-05, "loss": 0.0747, "step": 714000 }, { "epoch": 0.05, "learning_rate": 4.919574377662293e-05, "loss": 0.0701, "step": 714500 }, { "epoch": 0.05, "learning_rate": 4.9195180730489296e-05, "loss": 0.0718, "step": 715000 }, { "epoch": 0.05, "learning_rate": 4.919461768435567e-05, "loss": 0.0688, "step": 715500 }, { "epoch": 0.05, "learning_rate": 4.919405463822203e-05, "loss": 0.0697, "step": 716000 }, { "epoch": 0.05, "learning_rate": 4.919349271818066e-05, "loss": 0.0731, "step": 716500 }, { "epoch": 0.05, "learning_rate": 4.919292967204703e-05, "loss": 0.0698, "step": 717000 }, { "epoch": 0.05, "learning_rate": 4.919236662591339e-05, "loss": 0.0668, "step": 717500 }, { "epoch": 0.05, "learning_rate": 4.9191803579779755e-05, "loss": 0.071, "step": 718000 }, { "epoch": 0.05, "learning_rate": 4.919124053364612e-05, "loss": 0.0626, "step": 718500 }, { "epoch": 0.05, "learning_rate": 4.919067861360475e-05, "loss": 0.073, "step": 719000 }, { "epoch": 0.05, "learning_rate": 4.9190115567471115e-05, "loss": 0.073, "step": 719500 }, { "epoch": 0.05, "learning_rate": 4.918955252133748e-05, "loss": 0.0679, "step": 720000 }, { "epoch": 0.05, "learning_rate": 4.918898947520384e-05, "loss": 0.071, "step": 720500 }, { "epoch": 0.05, "learning_rate": 4.9188426429070214e-05, "loss": 0.067, "step": 721000 }, { "epoch": 0.05, "learning_rate": 4.918786450902884e-05, "loss": 0.0767, "step": 721500 }, { "epoch": 0.05, "learning_rate": 4.918730146289521e-05, "loss": 0.0675, "step": 722000 }, { "epoch": 0.05, "learning_rate": 4.918673841676157e-05, "loss": 0.0719, "step": 722500 }, { "epoch": 0.05, "learning_rate": 4.918617537062794e-05, "loss": 0.0699, "step": 723000 }, { "epoch": 0.05, "learning_rate": 4.91856123244943e-05, "loss": 0.0703, "step": 723500 }, { "epoch": 0.05, "learning_rate": 4.918504927836066e-05, "loss": 0.0718, "step": 724000 }, { "epoch": 0.05, "learning_rate": 4.91844873583193e-05, "loss": 0.0742, "step": 724500 }, { "epoch": 0.05, "learning_rate": 4.9183924312185655e-05, "loss": 0.0714, "step": 725000 }, { "epoch": 0.05, "learning_rate": 4.9183361266052026e-05, "loss": 0.0676, "step": 725500 }, { "epoch": 0.05, "learning_rate": 4.918279821991839e-05, "loss": 0.0694, "step": 726000 }, { "epoch": 0.05, "learning_rate": 4.918223629987702e-05, "loss": 0.068, "step": 726500 }, { "epoch": 0.05, "learning_rate": 4.9181673253743386e-05, "loss": 0.0688, "step": 727000 }, { "epoch": 0.05, "learning_rate": 4.918111020760975e-05, "loss": 0.0638, "step": 727500 }, { "epoch": 0.05, "learning_rate": 4.9180547161476114e-05, "loss": 0.0699, "step": 728000 }, { "epoch": 0.05, "learning_rate": 4.9179986367527015e-05, "loss": 0.0671, "step": 728500 }, { "epoch": 0.05, "learning_rate": 4.917942332139338e-05, "loss": 0.0673, "step": 729000 }, { "epoch": 0.05, "learning_rate": 4.917886027525974e-05, "loss": 0.0738, "step": 729500 }, { "epoch": 0.05, "learning_rate": 4.917829722912611e-05, "loss": 0.0707, "step": 730000 }, { "epoch": 0.05, "learning_rate": 4.917773418299248e-05, "loss": 0.0683, "step": 730500 }, { "epoch": 0.05, "learning_rate": 4.9177171136858835e-05, "loss": 0.0732, "step": 731000 }, { "epoch": 0.05, "learning_rate": 4.91766080907252e-05, "loss": 0.0716, "step": 731500 }, { "epoch": 0.05, "learning_rate": 4.917604504459157e-05, "loss": 0.0699, "step": 732000 }, { "epoch": 0.05, "learning_rate": 4.9175483124550195e-05, "loss": 0.0675, "step": 732500 }, { "epoch": 0.05, "learning_rate": 4.917492120450883e-05, "loss": 0.0747, "step": 733000 }, { "epoch": 0.05, "learning_rate": 4.917435815837519e-05, "loss": 0.0718, "step": 733500 }, { "epoch": 0.05, "learning_rate": 4.917379511224156e-05, "loss": 0.0724, "step": 734000 }, { "epoch": 0.05, "learning_rate": 4.9173232066107926e-05, "loss": 0.072, "step": 734500 }, { "epoch": 0.05, "learning_rate": 4.917266901997429e-05, "loss": 0.069, "step": 735000 }, { "epoch": 0.05, "learning_rate": 4.9172105973840654e-05, "loss": 0.064, "step": 735500 }, { "epoch": 0.05, "learning_rate": 4.917154292770702e-05, "loss": 0.0683, "step": 736000 }, { "epoch": 0.05, "learning_rate": 4.917097988157338e-05, "loss": 0.0722, "step": 736500 }, { "epoch": 0.05, "learning_rate": 4.9170417961532014e-05, "loss": 0.0704, "step": 737000 }, { "epoch": 0.05, "learning_rate": 4.916985491539838e-05, "loss": 0.067, "step": 737500 }, { "epoch": 0.05, "learning_rate": 4.916929186926474e-05, "loss": 0.0715, "step": 738000 }, { "epoch": 0.05, "learning_rate": 4.916872882313111e-05, "loss": 0.07, "step": 738500 }, { "epoch": 0.05, "learning_rate": 4.916816690308974e-05, "loss": 0.0667, "step": 739000 }, { "epoch": 0.05, "learning_rate": 4.916760385695611e-05, "loss": 0.0686, "step": 739500 }, { "epoch": 0.05, "learning_rate": 4.9167040810822466e-05, "loss": 0.0667, "step": 740000 }, { "epoch": 0.05, "learning_rate": 4.916647776468884e-05, "loss": 0.0702, "step": 740500 }, { "epoch": 0.05, "learning_rate": 4.916591584464746e-05, "loss": 0.0669, "step": 741000 }, { "epoch": 0.05, "learning_rate": 4.9165352798513833e-05, "loss": 0.0696, "step": 741500 }, { "epoch": 0.05, "learning_rate": 4.91647897523802e-05, "loss": 0.0725, "step": 742000 }, { "epoch": 0.05, "learning_rate": 4.9164226706246555e-05, "loss": 0.071, "step": 742500 }, { "epoch": 0.05, "learning_rate": 4.9163663660112925e-05, "loss": 0.0653, "step": 743000 }, { "epoch": 0.05, "learning_rate": 4.916310174007156e-05, "loss": 0.0694, "step": 743500 }, { "epoch": 0.05, "learning_rate": 4.916253869393792e-05, "loss": 0.0762, "step": 744000 }, { "epoch": 0.05, "learning_rate": 4.9161975647804286e-05, "loss": 0.0666, "step": 744500 }, { "epoch": 0.05, "learning_rate": 4.916141260167065e-05, "loss": 0.0667, "step": 745000 }, { "epoch": 0.05, "learning_rate": 4.916085068162928e-05, "loss": 0.0723, "step": 745500 }, { "epoch": 0.05, "learning_rate": 4.9160287635495646e-05, "loss": 0.0669, "step": 746000 }, { "epoch": 0.05, "learning_rate": 4.915972458936201e-05, "loss": 0.0777, "step": 746500 }, { "epoch": 0.05, "learning_rate": 4.9159161543228374e-05, "loss": 0.0764, "step": 747000 }, { "epoch": 0.05, "learning_rate": 4.9158598497094745e-05, "loss": 0.0742, "step": 747500 }, { "epoch": 0.05, "learning_rate": 4.915803657705338e-05, "loss": 0.0723, "step": 748000 }, { "epoch": 0.05, "learning_rate": 4.915747353091974e-05, "loss": 0.0696, "step": 748500 }, { "epoch": 0.05, "learning_rate": 4.91569104847861e-05, "loss": 0.0686, "step": 749000 }, { "epoch": 0.05, "learning_rate": 4.915634743865247e-05, "loss": 0.0659, "step": 749500 }, { "epoch": 0.05, "learning_rate": 4.915578439251883e-05, "loss": 0.0678, "step": 750000 }, { "epoch": 0.05, "learning_rate": 4.9155222472477465e-05, "loss": 0.068, "step": 750500 }, { "epoch": 0.05, "learning_rate": 4.915465942634383e-05, "loss": 0.0699, "step": 751000 }, { "epoch": 0.05, "learning_rate": 4.915409638021019e-05, "loss": 0.0733, "step": 751500 }, { "epoch": 0.05, "learning_rate": 4.915353333407656e-05, "loss": 0.0669, "step": 752000 }, { "epoch": 0.05, "learning_rate": 4.915297141403519e-05, "loss": 0.0721, "step": 752500 }, { "epoch": 0.05, "learning_rate": 4.915240836790155e-05, "loss": 0.067, "step": 753000 }, { "epoch": 0.05, "learning_rate": 4.915184532176792e-05, "loss": 0.0706, "step": 753500 }, { "epoch": 0.05, "learning_rate": 4.915128227563428e-05, "loss": 0.0639, "step": 754000 }, { "epoch": 0.05, "learning_rate": 4.915072035559291e-05, "loss": 0.0641, "step": 754500 }, { "epoch": 0.05, "learning_rate": 4.915015730945928e-05, "loss": 0.068, "step": 755000 }, { "epoch": 0.05, "learning_rate": 4.914959426332564e-05, "loss": 0.0682, "step": 755500 }, { "epoch": 0.05, "learning_rate": 4.914903121719201e-05, "loss": 0.0724, "step": 756000 }, { "epoch": 0.05, "learning_rate": 4.914846929715064e-05, "loss": 0.07, "step": 756500 }, { "epoch": 0.05, "learning_rate": 4.914790625101701e-05, "loss": 0.0688, "step": 757000 }, { "epoch": 0.05, "learning_rate": 4.9147343204883366e-05, "loss": 0.069, "step": 757500 }, { "epoch": 0.05, "learning_rate": 4.9146780158749736e-05, "loss": 0.0706, "step": 758000 }, { "epoch": 0.05, "learning_rate": 4.914621823870836e-05, "loss": 0.0671, "step": 758500 }, { "epoch": 0.05, "learning_rate": 4.914565519257473e-05, "loss": 0.0699, "step": 759000 }, { "epoch": 0.05, "learning_rate": 4.9145092146441097e-05, "loss": 0.0686, "step": 759500 }, { "epoch": 0.05, "learning_rate": 4.9144529100307454e-05, "loss": 0.0711, "step": 760000 }, { "epoch": 0.05, "learning_rate": 4.9143966054173825e-05, "loss": 0.0722, "step": 760500 }, { "epoch": 0.05, "learning_rate": 4.914340413413246e-05, "loss": 0.0762, "step": 761000 }, { "epoch": 0.05, "learning_rate": 4.914284108799882e-05, "loss": 0.0687, "step": 761500 }, { "epoch": 0.05, "learning_rate": 4.9142278041865185e-05, "loss": 0.0663, "step": 762000 }, { "epoch": 0.05, "learning_rate": 4.914171499573155e-05, "loss": 0.0675, "step": 762500 }, { "epoch": 0.05, "learning_rate": 4.914115194959791e-05, "loss": 0.0662, "step": 763000 }, { "epoch": 0.05, "learning_rate": 4.9140590029556545e-05, "loss": 0.0689, "step": 763500 }, { "epoch": 0.05, "learning_rate": 4.914002698342291e-05, "loss": 0.0726, "step": 764000 }, { "epoch": 0.05, "learning_rate": 4.913946393728927e-05, "loss": 0.069, "step": 764500 }, { "epoch": 0.05, "learning_rate": 4.9138900891155644e-05, "loss": 0.0723, "step": 765000 }, { "epoch": 0.05, "learning_rate": 4.9138337845022e-05, "loss": 0.0667, "step": 765500 }, { "epoch": 0.05, "learning_rate": 4.913777479888837e-05, "loss": 0.0742, "step": 766000 }, { "epoch": 0.05, "learning_rate": 4.9137212878847e-05, "loss": 0.0717, "step": 766500 }, { "epoch": 0.05, "learning_rate": 4.913664983271337e-05, "loss": 0.0737, "step": 767000 }, { "epoch": 0.05, "learning_rate": 4.913608678657973e-05, "loss": 0.0642, "step": 767500 }, { "epoch": 0.05, "learning_rate": 4.9135523740446096e-05, "loss": 0.067, "step": 768000 }, { "epoch": 0.05, "learning_rate": 4.913496069431246e-05, "loss": 0.0686, "step": 768500 }, { "epoch": 0.05, "learning_rate": 4.913439877427109e-05, "loss": 0.0719, "step": 769000 }, { "epoch": 0.05, "learning_rate": 4.9133835728137456e-05, "loss": 0.0734, "step": 769500 }, { "epoch": 0.05, "learning_rate": 4.913327268200382e-05, "loss": 0.0707, "step": 770000 }, { "epoch": 0.05, "learning_rate": 4.9132709635870184e-05, "loss": 0.065, "step": 770500 }, { "epoch": 0.05, "learning_rate": 4.913214658973655e-05, "loss": 0.0688, "step": 771000 }, { "epoch": 0.05, "learning_rate": 4.913158466969518e-05, "loss": 0.0637, "step": 771500 }, { "epoch": 0.05, "learning_rate": 4.9131021623561544e-05, "loss": 0.0658, "step": 772000 }, { "epoch": 0.05, "learning_rate": 4.9130458577427915e-05, "loss": 0.0637, "step": 772500 }, { "epoch": 0.05, "learning_rate": 4.912989553129427e-05, "loss": 0.0687, "step": 773000 }, { "epoch": 0.05, "learning_rate": 4.9129332485160636e-05, "loss": 0.0653, "step": 773500 }, { "epoch": 0.05, "learning_rate": 4.912877056511927e-05, "loss": 0.067, "step": 774000 }, { "epoch": 0.05, "learning_rate": 4.912820751898564e-05, "loss": 0.0714, "step": 774500 }, { "epoch": 0.05, "learning_rate": 4.9127644472852e-05, "loss": 0.0668, "step": 775000 }, { "epoch": 0.05, "learning_rate": 4.912708142671836e-05, "loss": 0.0698, "step": 775500 }, { "epoch": 0.05, "learning_rate": 4.912651838058473e-05, "loss": 0.0668, "step": 776000 }, { "epoch": 0.05, "learning_rate": 4.9125955334451095e-05, "loss": 0.0738, "step": 776500 }, { "epoch": 0.05, "learning_rate": 4.912539228831746e-05, "loss": 0.067, "step": 777000 }, { "epoch": 0.05, "learning_rate": 4.912483036827609e-05, "loss": 0.0669, "step": 777500 }, { "epoch": 0.05, "learning_rate": 4.9124267322142455e-05, "loss": 0.0689, "step": 778000 }, { "epoch": 0.05, "learning_rate": 4.912370427600882e-05, "loss": 0.0693, "step": 778500 }, { "epoch": 0.05, "learning_rate": 4.9123141229875183e-05, "loss": 0.0724, "step": 779000 }, { "epoch": 0.05, "learning_rate": 4.9122578183741554e-05, "loss": 0.0719, "step": 779500 }, { "epoch": 0.05, "learning_rate": 4.912201626370018e-05, "loss": 0.0687, "step": 780000 }, { "epoch": 0.05, "learning_rate": 4.912145321756655e-05, "loss": 0.0631, "step": 780500 }, { "epoch": 0.05, "learning_rate": 4.912089017143291e-05, "loss": 0.0655, "step": 781000 }, { "epoch": 0.05, "learning_rate": 4.912032712529928e-05, "loss": 0.0759, "step": 781500 }, { "epoch": 0.05, "learning_rate": 4.911976407916564e-05, "loss": 0.0656, "step": 782000 }, { "epoch": 0.05, "learning_rate": 4.9119201033032e-05, "loss": 0.0731, "step": 782500 }, { "epoch": 0.05, "learning_rate": 4.911863911299064e-05, "loss": 0.0694, "step": 783000 }, { "epoch": 0.05, "learning_rate": 4.9118076066857e-05, "loss": 0.0727, "step": 783500 }, { "epoch": 0.05, "learning_rate": 4.911751302072337e-05, "loss": 0.0668, "step": 784000 }, { "epoch": 0.05, "learning_rate": 4.911694997458973e-05, "loss": 0.0704, "step": 784500 }, { "epoch": 0.05, "learning_rate": 4.911638805454836e-05, "loss": 0.0643, "step": 785000 }, { "epoch": 0.05, "learning_rate": 4.911582500841473e-05, "loss": 0.064, "step": 785500 }, { "epoch": 0.05, "learning_rate": 4.911526196228109e-05, "loss": 0.0703, "step": 786000 }, { "epoch": 0.05, "learning_rate": 4.9114698916147455e-05, "loss": 0.0673, "step": 786500 }, { "epoch": 0.05, "learning_rate": 4.911413699610609e-05, "loss": 0.0665, "step": 787000 }, { "epoch": 0.05, "learning_rate": 4.911357394997245e-05, "loss": 0.0706, "step": 787500 }, { "epoch": 0.05, "learning_rate": 4.911301090383882e-05, "loss": 0.0679, "step": 788000 }, { "epoch": 0.05, "learning_rate": 4.911244785770518e-05, "loss": 0.065, "step": 788500 }, { "epoch": 0.05, "learning_rate": 4.911188593766382e-05, "loss": 0.0648, "step": 789000 }, { "epoch": 0.05, "learning_rate": 4.9111322891530175e-05, "loss": 0.0733, "step": 789500 }, { "epoch": 0.05, "learning_rate": 4.911075984539654e-05, "loss": 0.0645, "step": 790000 }, { "epoch": 0.05, "learning_rate": 4.911019679926291e-05, "loss": 0.0688, "step": 790500 }, { "epoch": 0.05, "learning_rate": 4.910963375312927e-05, "loss": 0.0689, "step": 791000 }, { "epoch": 0.05, "learning_rate": 4.9109071833087906e-05, "loss": 0.0618, "step": 791500 }, { "epoch": 0.05, "learning_rate": 4.9108508786954263e-05, "loss": 0.0651, "step": 792000 }, { "epoch": 0.05, "learning_rate": 4.9107945740820634e-05, "loss": 0.0674, "step": 792500 }, { "epoch": 0.05, "learning_rate": 4.9107382694687e-05, "loss": 0.0731, "step": 793000 }, { "epoch": 0.05, "learning_rate": 4.910682077464563e-05, "loss": 0.064, "step": 793500 }, { "epoch": 0.05, "learning_rate": 4.9106257728511994e-05, "loss": 0.0672, "step": 794000 }, { "epoch": 0.05, "learning_rate": 4.910569468237836e-05, "loss": 0.0661, "step": 794500 }, { "epoch": 0.05, "learning_rate": 4.910513163624472e-05, "loss": 0.0694, "step": 795000 }, { "epoch": 0.05, "learning_rate": 4.9104568590111086e-05, "loss": 0.0669, "step": 795500 }, { "epoch": 0.05, "learning_rate": 4.910400667006972e-05, "loss": 0.0648, "step": 796000 }, { "epoch": 0.05, "learning_rate": 4.910344362393608e-05, "loss": 0.0675, "step": 796500 }, { "epoch": 0.05, "learning_rate": 4.910288057780245e-05, "loss": 0.066, "step": 797000 }, { "epoch": 0.05, "learning_rate": 4.910231753166881e-05, "loss": 0.0731, "step": 797500 }, { "epoch": 0.05, "learning_rate": 4.910175448553518e-05, "loss": 0.0702, "step": 798000 }, { "epoch": 0.05, "learning_rate": 4.910119256549381e-05, "loss": 0.0696, "step": 798500 }, { "epoch": 0.05, "learning_rate": 4.910062951936018e-05, "loss": 0.0674, "step": 799000 }, { "epoch": 0.05, "learning_rate": 4.910006647322654e-05, "loss": 0.0682, "step": 799500 }, { "epoch": 0.05, "learning_rate": 4.9099503427092906e-05, "loss": 0.0706, "step": 800000 }, { "epoch": 0.05, "learning_rate": 4.909894038095927e-05, "loss": 0.0714, "step": 800500 }, { "epoch": 0.05, "learning_rate": 4.90983784609179e-05, "loss": 0.0665, "step": 801000 }, { "epoch": 0.05, "learning_rate": 4.9097815414784266e-05, "loss": 0.0664, "step": 801500 }, { "epoch": 0.05, "learning_rate": 4.909725236865063e-05, "loss": 0.0684, "step": 802000 }, { "epoch": 0.05, "learning_rate": 4.9096689322516994e-05, "loss": 0.0727, "step": 802500 }, { "epoch": 0.05, "learning_rate": 4.9096127402475626e-05, "loss": 0.0698, "step": 803000 }, { "epoch": 0.05, "learning_rate": 4.909556435634199e-05, "loss": 0.0624, "step": 803500 }, { "epoch": 0.05, "learning_rate": 4.9095001310208354e-05, "loss": 0.0719, "step": 804000 }, { "epoch": 0.05, "learning_rate": 4.9094438264074725e-05, "loss": 0.0689, "step": 804500 }, { "epoch": 0.05, "learning_rate": 4.909387521794108e-05, "loss": 0.0689, "step": 805000 }, { "epoch": 0.05, "learning_rate": 4.9093312171807446e-05, "loss": 0.0725, "step": 805500 }, { "epoch": 0.05, "learning_rate": 4.909275025176608e-05, "loss": 0.0723, "step": 806000 }, { "epoch": 0.05, "learning_rate": 4.909218720563244e-05, "loss": 0.0664, "step": 806500 }, { "epoch": 0.05, "learning_rate": 4.909162415949881e-05, "loss": 0.0693, "step": 807000 }, { "epoch": 0.05, "learning_rate": 4.909106111336517e-05, "loss": 0.0681, "step": 807500 }, { "epoch": 0.05, "learning_rate": 4.909049806723154e-05, "loss": 0.0686, "step": 808000 }, { "epoch": 0.05, "learning_rate": 4.9089936147190166e-05, "loss": 0.0651, "step": 808500 }, { "epoch": 0.05, "learning_rate": 4.908937310105654e-05, "loss": 0.0687, "step": 809000 }, { "epoch": 0.05, "learning_rate": 4.90888100549229e-05, "loss": 0.068, "step": 809500 }, { "epoch": 0.05, "learning_rate": 4.9088247008789265e-05, "loss": 0.0684, "step": 810000 }, { "epoch": 0.05, "learning_rate": 4.908768396265563e-05, "loss": 0.0647, "step": 810500 }, { "epoch": 0.05, "learning_rate": 4.908712204261426e-05, "loss": 0.0655, "step": 811000 }, { "epoch": 0.05, "learning_rate": 4.9086558996480625e-05, "loss": 0.0676, "step": 811500 }, { "epoch": 0.05, "learning_rate": 4.908599595034699e-05, "loss": 0.0703, "step": 812000 }, { "epoch": 0.05, "learning_rate": 4.908543290421336e-05, "loss": 0.0731, "step": 812500 }, { "epoch": 0.05, "learning_rate": 4.908486985807972e-05, "loss": 0.068, "step": 813000 }, { "epoch": 0.05, "learning_rate": 4.908430681194609e-05, "loss": 0.0628, "step": 813500 }, { "epoch": 0.05, "learning_rate": 4.908374376581245e-05, "loss": 0.0673, "step": 814000 }, { "epoch": 0.06, "learning_rate": 4.9083181845771084e-05, "loss": 0.0732, "step": 814500 }, { "epoch": 0.06, "learning_rate": 4.908261879963745e-05, "loss": 0.0729, "step": 815000 }, { "epoch": 0.06, "learning_rate": 4.9082055753503805e-05, "loss": 0.0677, "step": 815500 }, { "epoch": 0.06, "learning_rate": 4.9081492707370176e-05, "loss": 0.0614, "step": 816000 }, { "epoch": 0.06, "learning_rate": 4.90809307873288e-05, "loss": 0.0651, "step": 816500 }, { "epoch": 0.06, "learning_rate": 4.908036774119517e-05, "loss": 0.0673, "step": 817000 }, { "epoch": 0.06, "learning_rate": 4.9079804695061536e-05, "loss": 0.0667, "step": 817500 }, { "epoch": 0.06, "learning_rate": 4.90792416489279e-05, "loss": 0.0685, "step": 818000 }, { "epoch": 0.06, "learning_rate": 4.907867972888653e-05, "loss": 0.063, "step": 818500 }, { "epoch": 0.06, "learning_rate": 4.90781166827529e-05, "loss": 0.0662, "step": 819000 }, { "epoch": 0.06, "learning_rate": 4.907755363661926e-05, "loss": 0.0659, "step": 819500 }, { "epoch": 0.06, "learning_rate": 4.9076990590485625e-05, "loss": 0.0642, "step": 820000 }, { "epoch": 0.06, "learning_rate": 4.907642754435199e-05, "loss": 0.0688, "step": 820500 }, { "epoch": 0.06, "learning_rate": 4.907586562431062e-05, "loss": 0.0668, "step": 821000 }, { "epoch": 0.06, "learning_rate": 4.9075302578176985e-05, "loss": 0.0633, "step": 821500 }, { "epoch": 0.06, "learning_rate": 4.907473953204335e-05, "loss": 0.0676, "step": 822000 }, { "epoch": 0.06, "learning_rate": 4.907417648590972e-05, "loss": 0.0708, "step": 822500 }, { "epoch": 0.06, "learning_rate": 4.9073614565868345e-05, "loss": 0.0649, "step": 823000 }, { "epoch": 0.06, "learning_rate": 4.9073051519734716e-05, "loss": 0.0652, "step": 823500 }, { "epoch": 0.06, "learning_rate": 4.907248847360108e-05, "loss": 0.0674, "step": 824000 }, { "epoch": 0.06, "learning_rate": 4.9071925427467444e-05, "loss": 0.069, "step": 824500 }, { "epoch": 0.06, "learning_rate": 4.907136238133381e-05, "loss": 0.0662, "step": 825000 }, { "epoch": 0.06, "learning_rate": 4.907079933520017e-05, "loss": 0.0685, "step": 825500 }, { "epoch": 0.06, "learning_rate": 4.9070237415158804e-05, "loss": 0.0656, "step": 826000 }, { "epoch": 0.06, "learning_rate": 4.906967436902517e-05, "loss": 0.0684, "step": 826500 }, { "epoch": 0.06, "learning_rate": 4.906911132289153e-05, "loss": 0.0674, "step": 827000 }, { "epoch": 0.06, "learning_rate": 4.9068548276757896e-05, "loss": 0.0634, "step": 827500 }, { "epoch": 0.06, "learning_rate": 4.906798635671653e-05, "loss": 0.0646, "step": 828000 }, { "epoch": 0.06, "learning_rate": 4.906742331058289e-05, "loss": 0.0638, "step": 828500 }, { "epoch": 0.06, "learning_rate": 4.906686026444926e-05, "loss": 0.0716, "step": 829000 }, { "epoch": 0.06, "learning_rate": 4.906629721831562e-05, "loss": 0.0702, "step": 829500 }, { "epoch": 0.06, "learning_rate": 4.9065734172181984e-05, "loss": 0.0624, "step": 830000 }, { "epoch": 0.06, "learning_rate": 4.9065172252140616e-05, "loss": 0.0697, "step": 830500 }, { "epoch": 0.06, "learning_rate": 4.906460920600699e-05, "loss": 0.0675, "step": 831000 }, { "epoch": 0.06, "learning_rate": 4.906404615987335e-05, "loss": 0.0714, "step": 831500 }, { "epoch": 0.06, "learning_rate": 4.906348311373971e-05, "loss": 0.0665, "step": 832000 }, { "epoch": 0.06, "learning_rate": 4.906292119369835e-05, "loss": 0.0702, "step": 832500 }, { "epoch": 0.06, "learning_rate": 4.9062358147564705e-05, "loss": 0.0679, "step": 833000 }, { "epoch": 0.06, "learning_rate": 4.9061795101431075e-05, "loss": 0.069, "step": 833500 }, { "epoch": 0.06, "learning_rate": 4.906123205529744e-05, "loss": 0.067, "step": 834000 }, { "epoch": 0.06, "learning_rate": 4.9060669009163803e-05, "loss": 0.066, "step": 834500 }, { "epoch": 0.06, "learning_rate": 4.9060107089122436e-05, "loss": 0.0678, "step": 835000 }, { "epoch": 0.06, "learning_rate": 4.905954516908107e-05, "loss": 0.0665, "step": 835500 }, { "epoch": 0.06, "learning_rate": 4.905898212294743e-05, "loss": 0.0654, "step": 836000 }, { "epoch": 0.06, "learning_rate": 4.9058419076813796e-05, "loss": 0.0709, "step": 836500 }, { "epoch": 0.06, "learning_rate": 4.905785603068016e-05, "loss": 0.0694, "step": 837000 }, { "epoch": 0.06, "learning_rate": 4.9057292984546524e-05, "loss": 0.0666, "step": 837500 }, { "epoch": 0.06, "learning_rate": 4.905672993841289e-05, "loss": 0.0684, "step": 838000 }, { "epoch": 0.06, "learning_rate": 4.905616689227925e-05, "loss": 0.0642, "step": 838500 }, { "epoch": 0.06, "learning_rate": 4.905560384614562e-05, "loss": 0.0654, "step": 839000 }, { "epoch": 0.06, "learning_rate": 4.9055040800011987e-05, "loss": 0.0629, "step": 839500 }, { "epoch": 0.06, "learning_rate": 4.905447887997062e-05, "loss": 0.0695, "step": 840000 }, { "epoch": 0.06, "learning_rate": 4.905391583383698e-05, "loss": 0.069, "step": 840500 }, { "epoch": 0.06, "learning_rate": 4.905335278770335e-05, "loss": 0.0669, "step": 841000 }, { "epoch": 0.06, "learning_rate": 4.905278974156971e-05, "loss": 0.0663, "step": 841500 }, { "epoch": 0.06, "learning_rate": 4.9052226695436075e-05, "loss": 0.0689, "step": 842000 }, { "epoch": 0.06, "learning_rate": 4.905166477539471e-05, "loss": 0.0654, "step": 842500 }, { "epoch": 0.06, "learning_rate": 4.905110172926107e-05, "loss": 0.0674, "step": 843000 }, { "epoch": 0.06, "learning_rate": 4.9050538683127435e-05, "loss": 0.0685, "step": 843500 }, { "epoch": 0.06, "learning_rate": 4.90499756369938e-05, "loss": 0.0666, "step": 844000 }, { "epoch": 0.06, "learning_rate": 4.904941371695243e-05, "loss": 0.0663, "step": 844500 }, { "epoch": 0.06, "learning_rate": 4.9048850670818795e-05, "loss": 0.0659, "step": 845000 }, { "epoch": 0.06, "learning_rate": 4.9048287624685166e-05, "loss": 0.0684, "step": 845500 }, { "epoch": 0.06, "learning_rate": 4.904772457855152e-05, "loss": 0.0664, "step": 846000 }, { "epoch": 0.06, "learning_rate": 4.904716153241789e-05, "loss": 0.0677, "step": 846500 }, { "epoch": 0.06, "learning_rate": 4.904659961237652e-05, "loss": 0.0673, "step": 847000 }, { "epoch": 0.06, "learning_rate": 4.904603656624288e-05, "loss": 0.063, "step": 847500 }, { "epoch": 0.06, "learning_rate": 4.9045473520109254e-05, "loss": 0.0664, "step": 848000 }, { "epoch": 0.06, "learning_rate": 4.904491047397561e-05, "loss": 0.0631, "step": 848500 }, { "epoch": 0.06, "learning_rate": 4.904434742784198e-05, "loss": 0.0631, "step": 849000 }, { "epoch": 0.06, "learning_rate": 4.904378550780061e-05, "loss": 0.0629, "step": 849500 }, { "epoch": 0.06, "learning_rate": 4.904322246166698e-05, "loss": 0.0747, "step": 850000 }, { "epoch": 0.06, "learning_rate": 4.904265941553334e-05, "loss": 0.0636, "step": 850500 }, { "epoch": 0.06, "learning_rate": 4.9042096369399706e-05, "loss": 0.0656, "step": 851000 }, { "epoch": 0.06, "learning_rate": 4.904153332326607e-05, "loss": 0.0677, "step": 851500 }, { "epoch": 0.06, "learning_rate": 4.9040970277132434e-05, "loss": 0.0633, "step": 852000 }, { "epoch": 0.06, "learning_rate": 4.9040408357091067e-05, "loss": 0.0635, "step": 852500 }, { "epoch": 0.06, "learning_rate": 4.90398464370497e-05, "loss": 0.0651, "step": 853000 }, { "epoch": 0.06, "learning_rate": 4.903928339091606e-05, "loss": 0.0688, "step": 853500 }, { "epoch": 0.06, "learning_rate": 4.903872034478243e-05, "loss": 0.0648, "step": 854000 }, { "epoch": 0.06, "learning_rate": 4.903815729864879e-05, "loss": 0.0679, "step": 854500 }, { "epoch": 0.06, "learning_rate": 4.9037594252515155e-05, "loss": 0.0658, "step": 855000 }, { "epoch": 0.06, "learning_rate": 4.9037031206381525e-05, "loss": 0.0626, "step": 855500 }, { "epoch": 0.06, "learning_rate": 4.903646816024789e-05, "loss": 0.0699, "step": 856000 }, { "epoch": 0.06, "learning_rate": 4.903590511411425e-05, "loss": 0.0649, "step": 856500 }, { "epoch": 0.06, "learning_rate": 4.903534206798062e-05, "loss": 0.062, "step": 857000 }, { "epoch": 0.06, "learning_rate": 4.903478014793925e-05, "loss": 0.0636, "step": 857500 }, { "epoch": 0.06, "learning_rate": 4.9034217101805614e-05, "loss": 0.0643, "step": 858000 }, { "epoch": 0.06, "learning_rate": 4.903365405567198e-05, "loss": 0.0671, "step": 858500 }, { "epoch": 0.06, "learning_rate": 4.903309100953834e-05, "loss": 0.0669, "step": 859000 }, { "epoch": 0.06, "learning_rate": 4.9032529089496974e-05, "loss": 0.0652, "step": 859500 }, { "epoch": 0.06, "learning_rate": 4.903196604336334e-05, "loss": 0.0674, "step": 860000 }, { "epoch": 0.06, "learning_rate": 4.90314029972297e-05, "loss": 0.0694, "step": 860500 }, { "epoch": 0.06, "learning_rate": 4.9030839951096066e-05, "loss": 0.0647, "step": 861000 }, { "epoch": 0.06, "learning_rate": 4.903027690496243e-05, "loss": 0.0651, "step": 861500 }, { "epoch": 0.06, "learning_rate": 4.9029713858828794e-05, "loss": 0.0636, "step": 862000 }, { "epoch": 0.06, "learning_rate": 4.9029151938787426e-05, "loss": 0.0673, "step": 862500 }, { "epoch": 0.06, "learning_rate": 4.902858889265379e-05, "loss": 0.0682, "step": 863000 }, { "epoch": 0.06, "learning_rate": 4.902802584652016e-05, "loss": 0.0646, "step": 863500 }, { "epoch": 0.06, "learning_rate": 4.902746280038652e-05, "loss": 0.0647, "step": 864000 }, { "epoch": 0.06, "learning_rate": 4.902690088034516e-05, "loss": 0.0666, "step": 864500 }, { "epoch": 0.06, "learning_rate": 4.9026337834211514e-05, "loss": 0.0677, "step": 865000 }, { "epoch": 0.06, "learning_rate": 4.9025774788077885e-05, "loss": 0.0621, "step": 865500 }, { "epoch": 0.06, "learning_rate": 4.902521174194425e-05, "loss": 0.0676, "step": 866000 }, { "epoch": 0.06, "learning_rate": 4.902464869581061e-05, "loss": 0.0653, "step": 866500 }, { "epoch": 0.06, "learning_rate": 4.9024086775769245e-05, "loss": 0.0653, "step": 867000 }, { "epoch": 0.06, "learning_rate": 4.902352372963561e-05, "loss": 0.0662, "step": 867500 }, { "epoch": 0.06, "learning_rate": 4.902296068350197e-05, "loss": 0.0659, "step": 868000 }, { "epoch": 0.06, "learning_rate": 4.902239763736834e-05, "loss": 0.0638, "step": 868500 }, { "epoch": 0.06, "learning_rate": 4.902183459123471e-05, "loss": 0.0701, "step": 869000 }, { "epoch": 0.06, "learning_rate": 4.9021272671193333e-05, "loss": 0.0668, "step": 869500 }, { "epoch": 0.06, "learning_rate": 4.9020709625059704e-05, "loss": 0.0609, "step": 870000 }, { "epoch": 0.06, "learning_rate": 4.902014657892606e-05, "loss": 0.0645, "step": 870500 }, { "epoch": 0.06, "learning_rate": 4.901958353279243e-05, "loss": 0.0668, "step": 871000 }, { "epoch": 0.06, "learning_rate": 4.901902161275106e-05, "loss": 0.0628, "step": 871500 }, { "epoch": 0.06, "learning_rate": 4.901845856661743e-05, "loss": 0.0593, "step": 872000 }, { "epoch": 0.06, "learning_rate": 4.901789552048379e-05, "loss": 0.0625, "step": 872500 }, { "epoch": 0.06, "learning_rate": 4.901733247435015e-05, "loss": 0.0696, "step": 873000 }, { "epoch": 0.06, "learning_rate": 4.901677055430879e-05, "loss": 0.0631, "step": 873500 }, { "epoch": 0.06, "learning_rate": 4.901620750817515e-05, "loss": 0.0664, "step": 874000 }, { "epoch": 0.06, "learning_rate": 4.901564446204152e-05, "loss": 0.067, "step": 874500 }, { "epoch": 0.06, "learning_rate": 4.901508141590788e-05, "loss": 0.0652, "step": 875000 }, { "epoch": 0.06, "learning_rate": 4.9014518369774245e-05, "loss": 0.0661, "step": 875500 }, { "epoch": 0.06, "learning_rate": 4.901395532364061e-05, "loss": 0.064, "step": 876000 }, { "epoch": 0.06, "learning_rate": 4.901339452969151e-05, "loss": 0.0694, "step": 876500 }, { "epoch": 0.06, "learning_rate": 4.901283148355787e-05, "loss": 0.0663, "step": 877000 }, { "epoch": 0.06, "learning_rate": 4.901226843742424e-05, "loss": 0.0656, "step": 877500 }, { "epoch": 0.06, "learning_rate": 4.90117053912906e-05, "loss": 0.0702, "step": 878000 }, { "epoch": 0.06, "learning_rate": 4.901114234515697e-05, "loss": 0.0667, "step": 878500 }, { "epoch": 0.06, "learning_rate": 4.901057929902333e-05, "loss": 0.0656, "step": 879000 }, { "epoch": 0.06, "learning_rate": 4.901001625288969e-05, "loss": 0.0635, "step": 879500 }, { "epoch": 0.06, "learning_rate": 4.9009453206756064e-05, "loss": 0.0703, "step": 880000 }, { "epoch": 0.06, "learning_rate": 4.900889016062242e-05, "loss": 0.0695, "step": 880500 }, { "epoch": 0.06, "learning_rate": 4.900832824058106e-05, "loss": 0.0658, "step": 881000 }, { "epoch": 0.06, "learning_rate": 4.900776519444742e-05, "loss": 0.0656, "step": 881500 }, { "epoch": 0.06, "learning_rate": 4.900720214831379e-05, "loss": 0.0643, "step": 882000 }, { "epoch": 0.06, "learning_rate": 4.900663910218015e-05, "loss": 0.0665, "step": 882500 }, { "epoch": 0.06, "learning_rate": 4.9006077182138784e-05, "loss": 0.0674, "step": 883000 }, { "epoch": 0.06, "learning_rate": 4.900551413600515e-05, "loss": 0.0658, "step": 883500 }, { "epoch": 0.06, "learning_rate": 4.900495108987151e-05, "loss": 0.062, "step": 884000 }, { "epoch": 0.06, "learning_rate": 4.9004389169830144e-05, "loss": 0.0627, "step": 884500 }, { "epoch": 0.06, "learning_rate": 4.900382612369651e-05, "loss": 0.067, "step": 885000 }, { "epoch": 0.06, "learning_rate": 4.900326307756287e-05, "loss": 0.0653, "step": 885500 }, { "epoch": 0.06, "learning_rate": 4.9002700031429236e-05, "loss": 0.0601, "step": 886000 }, { "epoch": 0.06, "learning_rate": 4.900213698529561e-05, "loss": 0.0651, "step": 886500 }, { "epoch": 0.06, "learning_rate": 4.9001573939161964e-05, "loss": 0.0637, "step": 887000 }, { "epoch": 0.06, "learning_rate": 4.9001010893028335e-05, "loss": 0.0646, "step": 887500 }, { "epoch": 0.06, "learning_rate": 4.90004478468947e-05, "loss": 0.0648, "step": 888000 }, { "epoch": 0.06, "learning_rate": 4.899988592685333e-05, "loss": 0.0667, "step": 888500 }, { "epoch": 0.06, "learning_rate": 4.8999322880719695e-05, "loss": 0.063, "step": 889000 }, { "epoch": 0.06, "learning_rate": 4.899875983458605e-05, "loss": 0.0617, "step": 889500 }, { "epoch": 0.06, "learning_rate": 4.899819678845242e-05, "loss": 0.0682, "step": 890000 }, { "epoch": 0.06, "learning_rate": 4.899763486841105e-05, "loss": 0.0685, "step": 890500 }, { "epoch": 0.06, "learning_rate": 4.899707182227742e-05, "loss": 0.0685, "step": 891000 }, { "epoch": 0.06, "learning_rate": 4.8996508776143784e-05, "loss": 0.0593, "step": 891500 }, { "epoch": 0.06, "learning_rate": 4.899594573001015e-05, "loss": 0.0657, "step": 892000 }, { "epoch": 0.06, "learning_rate": 4.899538268387651e-05, "loss": 0.0631, "step": 892500 }, { "epoch": 0.06, "learning_rate": 4.8994820763835144e-05, "loss": 0.0641, "step": 893000 }, { "epoch": 0.06, "learning_rate": 4.899425771770151e-05, "loss": 0.0611, "step": 893500 }, { "epoch": 0.06, "learning_rate": 4.899369467156787e-05, "loss": 0.0637, "step": 894000 }, { "epoch": 0.06, "learning_rate": 4.8993131625434236e-05, "loss": 0.0687, "step": 894500 }, { "epoch": 0.06, "learning_rate": 4.8992570831485136e-05, "loss": 0.0669, "step": 895000 }, { "epoch": 0.06, "learning_rate": 4.89920077853515e-05, "loss": 0.0612, "step": 895500 }, { "epoch": 0.06, "learning_rate": 4.899144473921787e-05, "loss": 0.0626, "step": 896000 }, { "epoch": 0.06, "learning_rate": 4.899088169308423e-05, "loss": 0.0628, "step": 896500 }, { "epoch": 0.06, "learning_rate": 4.899031864695059e-05, "loss": 0.0682, "step": 897000 }, { "epoch": 0.06, "learning_rate": 4.898975560081696e-05, "loss": 0.0652, "step": 897500 }, { "epoch": 0.06, "learning_rate": 4.898919255468332e-05, "loss": 0.0614, "step": 898000 }, { "epoch": 0.06, "learning_rate": 4.898862950854969e-05, "loss": 0.067, "step": 898500 }, { "epoch": 0.06, "learning_rate": 4.8988066462416055e-05, "loss": 0.0653, "step": 899000 }, { "epoch": 0.06, "learning_rate": 4.898750454237469e-05, "loss": 0.0622, "step": 899500 }, { "epoch": 0.06, "learning_rate": 4.898694262233331e-05, "loss": 0.0665, "step": 900000 }, { "epoch": 0.06, "learning_rate": 4.898637957619968e-05, "loss": 0.0639, "step": 900500 }, { "epoch": 0.06, "learning_rate": 4.898581653006605e-05, "loss": 0.065, "step": 901000 }, { "epoch": 0.06, "learning_rate": 4.898525348393241e-05, "loss": 0.0675, "step": 901500 }, { "epoch": 0.06, "learning_rate": 4.8984690437798775e-05, "loss": 0.0653, "step": 902000 }, { "epoch": 0.06, "learning_rate": 4.898412739166514e-05, "loss": 0.0643, "step": 902500 }, { "epoch": 0.06, "learning_rate": 4.898356434553151e-05, "loss": 0.0702, "step": 903000 }, { "epoch": 0.06, "learning_rate": 4.898300129939787e-05, "loss": 0.0642, "step": 903500 }, { "epoch": 0.06, "learning_rate": 4.8982439379356506e-05, "loss": 0.0672, "step": 904000 }, { "epoch": 0.06, "learning_rate": 4.8981876333222864e-05, "loss": 0.0647, "step": 904500 }, { "epoch": 0.06, "learning_rate": 4.8981313287089234e-05, "loss": 0.0636, "step": 905000 }, { "epoch": 0.06, "learning_rate": 4.89807502409556e-05, "loss": 0.069, "step": 905500 }, { "epoch": 0.06, "learning_rate": 4.898018832091423e-05, "loss": 0.067, "step": 906000 }, { "epoch": 0.06, "learning_rate": 4.8979625274780594e-05, "loss": 0.0717, "step": 906500 }, { "epoch": 0.06, "learning_rate": 4.897906222864695e-05, "loss": 0.0672, "step": 907000 }, { "epoch": 0.06, "learning_rate": 4.897849918251332e-05, "loss": 0.0715, "step": 907500 }, { "epoch": 0.06, "learning_rate": 4.8977936136379686e-05, "loss": 0.0658, "step": 908000 }, { "epoch": 0.06, "learning_rate": 4.897737421633832e-05, "loss": 0.0645, "step": 908500 }, { "epoch": 0.06, "learning_rate": 4.897681117020468e-05, "loss": 0.0644, "step": 909000 }, { "epoch": 0.06, "learning_rate": 4.897624812407105e-05, "loss": 0.0669, "step": 909500 }, { "epoch": 0.06, "learning_rate": 4.897568507793741e-05, "loss": 0.0635, "step": 910000 }, { "epoch": 0.06, "learning_rate": 4.897512315789604e-05, "loss": 0.0641, "step": 910500 }, { "epoch": 0.06, "learning_rate": 4.897456011176241e-05, "loss": 0.069, "step": 911000 }, { "epoch": 0.06, "learning_rate": 4.897399706562877e-05, "loss": 0.0636, "step": 911500 }, { "epoch": 0.06, "learning_rate": 4.8973434019495135e-05, "loss": 0.0658, "step": 912000 }, { "epoch": 0.06, "learning_rate": 4.89728709733615e-05, "loss": 0.0663, "step": 912500 }, { "epoch": 0.06, "learning_rate": 4.897230905332013e-05, "loss": 0.0651, "step": 913000 }, { "epoch": 0.06, "learning_rate": 4.8971746007186495e-05, "loss": 0.0655, "step": 913500 }, { "epoch": 0.06, "learning_rate": 4.8971182961052866e-05, "loss": 0.062, "step": 914000 }, { "epoch": 0.06, "learning_rate": 4.897061991491922e-05, "loss": 0.062, "step": 914500 }, { "epoch": 0.06, "learning_rate": 4.8970056868785594e-05, "loss": 0.0693, "step": 915000 }, { "epoch": 0.06, "learning_rate": 4.896949494874422e-05, "loss": 0.0695, "step": 915500 }, { "epoch": 0.06, "learning_rate": 4.896893190261059e-05, "loss": 0.0607, "step": 916000 }, { "epoch": 0.06, "learning_rate": 4.8968368856476954e-05, "loss": 0.0662, "step": 916500 }, { "epoch": 0.06, "learning_rate": 4.8967806936435586e-05, "loss": 0.0673, "step": 917000 }, { "epoch": 0.06, "learning_rate": 4.896724389030195e-05, "loss": 0.0655, "step": 917500 }, { "epoch": 0.06, "learning_rate": 4.8966680844168314e-05, "loss": 0.0661, "step": 918000 }, { "epoch": 0.06, "learning_rate": 4.896611779803468e-05, "loss": 0.0631, "step": 918500 }, { "epoch": 0.06, "learning_rate": 4.896555475190104e-05, "loss": 0.0611, "step": 919000 }, { "epoch": 0.06, "learning_rate": 4.896499170576741e-05, "loss": 0.0574, "step": 919500 }, { "epoch": 0.06, "learning_rate": 4.896442865963377e-05, "loss": 0.0638, "step": 920000 }, { "epoch": 0.06, "learning_rate": 4.8963865613500134e-05, "loss": 0.07, "step": 920500 }, { "epoch": 0.06, "learning_rate": 4.8963303693458766e-05, "loss": 0.0662, "step": 921000 }, { "epoch": 0.06, "learning_rate": 4.896274064732513e-05, "loss": 0.0625, "step": 921500 }, { "epoch": 0.06, "learning_rate": 4.89621776011915e-05, "loss": 0.0678, "step": 922000 }, { "epoch": 0.06, "learning_rate": 4.896161455505786e-05, "loss": 0.0621, "step": 922500 }, { "epoch": 0.06, "learning_rate": 4.896105150892423e-05, "loss": 0.0678, "step": 923000 }, { "epoch": 0.06, "learning_rate": 4.896048846279059e-05, "loss": 0.0652, "step": 923500 }, { "epoch": 0.06, "learning_rate": 4.895992541665696e-05, "loss": 0.0664, "step": 924000 }, { "epoch": 0.06, "learning_rate": 4.895936349661559e-05, "loss": 0.0608, "step": 924500 }, { "epoch": 0.06, "learning_rate": 4.8958800450481953e-05, "loss": 0.0658, "step": 925000 }, { "epoch": 0.06, "learning_rate": 4.895823740434832e-05, "loss": 0.0678, "step": 925500 }, { "epoch": 0.06, "learning_rate": 4.895767435821468e-05, "loss": 0.0636, "step": 926000 }, { "epoch": 0.06, "learning_rate": 4.8957112438173314e-05, "loss": 0.0627, "step": 926500 }, { "epoch": 0.06, "learning_rate": 4.895654939203968e-05, "loss": 0.0674, "step": 927000 }, { "epoch": 0.06, "learning_rate": 4.895598634590604e-05, "loss": 0.0655, "step": 927500 }, { "epoch": 0.06, "learning_rate": 4.8955424425864674e-05, "loss": 0.0618, "step": 928000 }, { "epoch": 0.06, "learning_rate": 4.895486137973104e-05, "loss": 0.0601, "step": 928500 }, { "epoch": 0.06, "learning_rate": 4.89542983335974e-05, "loss": 0.0648, "step": 929000 }, { "epoch": 0.06, "learning_rate": 4.895373528746377e-05, "loss": 0.0625, "step": 929500 }, { "epoch": 0.06, "learning_rate": 4.895317224133013e-05, "loss": 0.064, "step": 930000 }, { "epoch": 0.06, "learning_rate": 4.8952609195196494e-05, "loss": 0.0711, "step": 930500 }, { "epoch": 0.06, "learning_rate": 4.8952046149062865e-05, "loss": 0.0625, "step": 931000 }, { "epoch": 0.06, "learning_rate": 4.895148310292923e-05, "loss": 0.0689, "step": 931500 }, { "epoch": 0.06, "learning_rate": 4.895092005679559e-05, "loss": 0.0659, "step": 932000 }, { "epoch": 0.06, "learning_rate": 4.8950357010661957e-05, "loss": 0.0704, "step": 932500 }, { "epoch": 0.06, "learning_rate": 4.894979396452832e-05, "loss": 0.0619, "step": 933000 }, { "epoch": 0.06, "learning_rate": 4.894923204448695e-05, "loss": 0.0619, "step": 933500 }, { "epoch": 0.06, "learning_rate": 4.894866899835332e-05, "loss": 0.0655, "step": 934000 }, { "epoch": 0.06, "learning_rate": 4.894810595221968e-05, "loss": 0.063, "step": 934500 }, { "epoch": 0.06, "learning_rate": 4.8947542906086045e-05, "loss": 0.0598, "step": 935000 }, { "epoch": 0.06, "learning_rate": 4.894698098604468e-05, "loss": 0.0593, "step": 935500 }, { "epoch": 0.06, "learning_rate": 4.894641793991104e-05, "loss": 0.063, "step": 936000 }, { "epoch": 0.06, "learning_rate": 4.894585489377741e-05, "loss": 0.0641, "step": 936500 }, { "epoch": 0.06, "learning_rate": 4.894529184764377e-05, "loss": 0.0662, "step": 937000 }, { "epoch": 0.06, "learning_rate": 4.894472880151014e-05, "loss": 0.0669, "step": 937500 }, { "epoch": 0.06, "learning_rate": 4.8944166881468765e-05, "loss": 0.0596, "step": 938000 }, { "epoch": 0.06, "learning_rate": 4.8943603835335136e-05, "loss": 0.0599, "step": 938500 }, { "epoch": 0.06, "learning_rate": 4.89430407892015e-05, "loss": 0.0658, "step": 939000 }, { "epoch": 0.06, "learning_rate": 4.894247774306786e-05, "loss": 0.0597, "step": 939500 }, { "epoch": 0.06, "learning_rate": 4.8941915823026496e-05, "loss": 0.0618, "step": 940000 }, { "epoch": 0.06, "learning_rate": 4.894135277689286e-05, "loss": 0.0621, "step": 940500 }, { "epoch": 0.06, "learning_rate": 4.8940789730759224e-05, "loss": 0.0638, "step": 941000 }, { "epoch": 0.06, "learning_rate": 4.894022668462559e-05, "loss": 0.0645, "step": 941500 }, { "epoch": 0.06, "learning_rate": 4.893966476458422e-05, "loss": 0.0661, "step": 942000 }, { "epoch": 0.06, "learning_rate": 4.8939101718450584e-05, "loss": 0.0699, "step": 942500 }, { "epoch": 0.06, "learning_rate": 4.893853867231695e-05, "loss": 0.063, "step": 943000 }, { "epoch": 0.06, "learning_rate": 4.893797562618331e-05, "loss": 0.0692, "step": 943500 }, { "epoch": 0.06, "learning_rate": 4.8937413706141945e-05, "loss": 0.0634, "step": 944000 }, { "epoch": 0.06, "learning_rate": 4.893685066000831e-05, "loss": 0.0653, "step": 944500 }, { "epoch": 0.06, "learning_rate": 4.893628761387468e-05, "loss": 0.0591, "step": 945000 }, { "epoch": 0.06, "learning_rate": 4.893572456774104e-05, "loss": 0.0593, "step": 945500 }, { "epoch": 0.06, "learning_rate": 4.89351615216074e-05, "loss": 0.0657, "step": 946000 }, { "epoch": 0.06, "learning_rate": 4.893459960156604e-05, "loss": 0.0646, "step": 946500 }, { "epoch": 0.06, "learning_rate": 4.89340365554324e-05, "loss": 0.0614, "step": 947000 }, { "epoch": 0.06, "learning_rate": 4.893347350929877e-05, "loss": 0.06, "step": 947500 }, { "epoch": 0.06, "learning_rate": 4.89329115892574e-05, "loss": 0.0647, "step": 948000 }, { "epoch": 0.06, "learning_rate": 4.8932348543123764e-05, "loss": 0.0616, "step": 948500 }, { "epoch": 0.06, "learning_rate": 4.893178549699013e-05, "loss": 0.067, "step": 949000 }, { "epoch": 0.06, "learning_rate": 4.893122245085649e-05, "loss": 0.0677, "step": 949500 }, { "epoch": 0.06, "learning_rate": 4.8930659404722856e-05, "loss": 0.0615, "step": 950000 }, { "epoch": 0.06, "learning_rate": 4.893009635858922e-05, "loss": 0.0611, "step": 950500 }, { "epoch": 0.06, "learning_rate": 4.8929533312455584e-05, "loss": 0.069, "step": 951000 }, { "epoch": 0.06, "learning_rate": 4.892897026632195e-05, "loss": 0.0637, "step": 951500 }, { "epoch": 0.06, "learning_rate": 4.892840722018832e-05, "loss": 0.0635, "step": 952000 }, { "epoch": 0.06, "learning_rate": 4.8927844174054676e-05, "loss": 0.066, "step": 952500 }, { "epoch": 0.06, "learning_rate": 4.8927282254013315e-05, "loss": 0.0606, "step": 953000 }, { "epoch": 0.06, "learning_rate": 4.892671920787967e-05, "loss": 0.0647, "step": 953500 }, { "epoch": 0.06, "learning_rate": 4.892615616174604e-05, "loss": 0.0651, "step": 954000 }, { "epoch": 0.06, "learning_rate": 4.892559311561241e-05, "loss": 0.0646, "step": 954500 }, { "epoch": 0.06, "learning_rate": 4.8925030069478764e-05, "loss": 0.0654, "step": 955000 }, { "epoch": 0.06, "learning_rate": 4.89244681494374e-05, "loss": 0.0648, "step": 955500 }, { "epoch": 0.06, "learning_rate": 4.892390510330376e-05, "loss": 0.0594, "step": 956000 }, { "epoch": 0.06, "learning_rate": 4.892334205717013e-05, "loss": 0.0637, "step": 956500 }, { "epoch": 0.06, "learning_rate": 4.8922779011036495e-05, "loss": 0.0634, "step": 957000 }, { "epoch": 0.06, "learning_rate": 4.892221709099513e-05, "loss": 0.0612, "step": 957500 }, { "epoch": 0.06, "learning_rate": 4.892165404486149e-05, "loss": 0.0704, "step": 958000 }, { "epoch": 0.06, "learning_rate": 4.8921090998727855e-05, "loss": 0.0667, "step": 958500 }, { "epoch": 0.06, "learning_rate": 4.892052795259422e-05, "loss": 0.0667, "step": 959000 }, { "epoch": 0.06, "learning_rate": 4.891996603255285e-05, "loss": 0.0707, "step": 959500 }, { "epoch": 0.06, "learning_rate": 4.8919402986419215e-05, "loss": 0.0651, "step": 960000 }, { "epoch": 0.06, "learning_rate": 4.891883994028558e-05, "loss": 0.0656, "step": 960500 }, { "epoch": 0.06, "learning_rate": 4.891827689415195e-05, "loss": 0.066, "step": 961000 }, { "epoch": 0.06, "learning_rate": 4.891771384801831e-05, "loss": 0.0607, "step": 961500 }, { "epoch": 0.06, "learning_rate": 4.8917151927976946e-05, "loss": 0.0634, "step": 962000 }, { "epoch": 0.07, "learning_rate": 4.891659000793558e-05, "loss": 0.0672, "step": 962500 }, { "epoch": 0.07, "learning_rate": 4.891602696180194e-05, "loss": 0.0631, "step": 963000 }, { "epoch": 0.07, "learning_rate": 4.89154639156683e-05, "loss": 0.062, "step": 963500 }, { "epoch": 0.07, "learning_rate": 4.891490086953467e-05, "loss": 0.0689, "step": 964000 }, { "epoch": 0.07, "learning_rate": 4.8914338949493296e-05, "loss": 0.0632, "step": 964500 }, { "epoch": 0.07, "learning_rate": 4.891377590335967e-05, "loss": 0.0643, "step": 965000 }, { "epoch": 0.07, "learning_rate": 4.891321285722603e-05, "loss": 0.061, "step": 965500 }, { "epoch": 0.07, "learning_rate": 4.8912649811092395e-05, "loss": 0.0615, "step": 966000 }, { "epoch": 0.07, "learning_rate": 4.891208676495876e-05, "loss": 0.0617, "step": 966500 }, { "epoch": 0.07, "learning_rate": 4.891152371882512e-05, "loss": 0.0593, "step": 967000 }, { "epoch": 0.07, "learning_rate": 4.891096067269149e-05, "loss": 0.0633, "step": 967500 }, { "epoch": 0.07, "learning_rate": 4.891039762655785e-05, "loss": 0.0638, "step": 968000 }, { "epoch": 0.07, "learning_rate": 4.890983458042422e-05, "loss": 0.0593, "step": 968500 }, { "epoch": 0.07, "learning_rate": 4.890927266038285e-05, "loss": 0.0665, "step": 969000 }, { "epoch": 0.07, "learning_rate": 4.890870961424922e-05, "loss": 0.0627, "step": 969500 }, { "epoch": 0.07, "learning_rate": 4.8908146568115575e-05, "loss": 0.0644, "step": 970000 }, { "epoch": 0.07, "learning_rate": 4.8907583521981946e-05, "loss": 0.0588, "step": 970500 }, { "epoch": 0.07, "learning_rate": 4.890702047584831e-05, "loss": 0.0595, "step": 971000 }, { "epoch": 0.07, "learning_rate": 4.890645855580694e-05, "loss": 0.0621, "step": 971500 }, { "epoch": 0.07, "learning_rate": 4.8905895509673306e-05, "loss": 0.0628, "step": 972000 }, { "epoch": 0.07, "learning_rate": 4.890533246353966e-05, "loss": 0.0628, "step": 972500 }, { "epoch": 0.07, "learning_rate": 4.8904769417406034e-05, "loss": 0.068, "step": 973000 }, { "epoch": 0.07, "learning_rate": 4.89042063712724e-05, "loss": 0.0643, "step": 973500 }, { "epoch": 0.07, "learning_rate": 4.890364445123103e-05, "loss": 0.063, "step": 974000 }, { "epoch": 0.07, "learning_rate": 4.8903081405097394e-05, "loss": 0.0657, "step": 974500 }, { "epoch": 0.07, "learning_rate": 4.8902518358963765e-05, "loss": 0.0668, "step": 975000 }, { "epoch": 0.07, "learning_rate": 4.890195531283012e-05, "loss": 0.0689, "step": 975500 }, { "epoch": 0.07, "learning_rate": 4.8901392266696486e-05, "loss": 0.0632, "step": 976000 }, { "epoch": 0.07, "learning_rate": 4.890083034665512e-05, "loss": 0.0596, "step": 976500 }, { "epoch": 0.07, "learning_rate": 4.890026730052148e-05, "loss": 0.0643, "step": 977000 }, { "epoch": 0.07, "learning_rate": 4.889970425438785e-05, "loss": 0.0697, "step": 977500 }, { "epoch": 0.07, "learning_rate": 4.889914120825421e-05, "loss": 0.0621, "step": 978000 }, { "epoch": 0.07, "learning_rate": 4.889857816212058e-05, "loss": 0.059, "step": 978500 }, { "epoch": 0.07, "learning_rate": 4.8898016242079206e-05, "loss": 0.0599, "step": 979000 }, { "epoch": 0.07, "learning_rate": 4.889745319594558e-05, "loss": 0.0621, "step": 979500 }, { "epoch": 0.07, "learning_rate": 4.889689014981194e-05, "loss": 0.0641, "step": 980000 }, { "epoch": 0.07, "learning_rate": 4.8896327103678305e-05, "loss": 0.0648, "step": 980500 }, { "epoch": 0.07, "learning_rate": 4.889576518363694e-05, "loss": 0.0651, "step": 981000 }, { "epoch": 0.07, "learning_rate": 4.88952021375033e-05, "loss": 0.0626, "step": 981500 }, { "epoch": 0.07, "learning_rate": 4.8894639091369665e-05, "loss": 0.0615, "step": 982000 }, { "epoch": 0.07, "learning_rate": 4.889407604523603e-05, "loss": 0.0579, "step": 982500 }, { "epoch": 0.07, "learning_rate": 4.889351299910239e-05, "loss": 0.0627, "step": 983000 }, { "epoch": 0.07, "learning_rate": 4.8892951079061026e-05, "loss": 0.0623, "step": 983500 }, { "epoch": 0.07, "learning_rate": 4.889238803292739e-05, "loss": 0.059, "step": 984000 }, { "epoch": 0.07, "learning_rate": 4.8891824986793754e-05, "loss": 0.0686, "step": 984500 }, { "epoch": 0.07, "learning_rate": 4.8891261940660124e-05, "loss": 0.0636, "step": 985000 }, { "epoch": 0.07, "learning_rate": 4.889069889452648e-05, "loss": 0.0653, "step": 985500 }, { "epoch": 0.07, "learning_rate": 4.8890135848392846e-05, "loss": 0.0646, "step": 986000 }, { "epoch": 0.07, "learning_rate": 4.8889572802259216e-05, "loss": 0.061, "step": 986500 }, { "epoch": 0.07, "learning_rate": 4.888901088221784e-05, "loss": 0.0618, "step": 987000 }, { "epoch": 0.07, "learning_rate": 4.888844783608421e-05, "loss": 0.0628, "step": 987500 }, { "epoch": 0.07, "learning_rate": 4.888788478995057e-05, "loss": 0.0614, "step": 988000 }, { "epoch": 0.07, "learning_rate": 4.888732174381694e-05, "loss": 0.0627, "step": 988500 }, { "epoch": 0.07, "learning_rate": 4.8886759823775566e-05, "loss": 0.0675, "step": 989000 }, { "epoch": 0.07, "learning_rate": 4.888619677764194e-05, "loss": 0.0643, "step": 989500 }, { "epoch": 0.07, "learning_rate": 4.88856337315083e-05, "loss": 0.0655, "step": 990000 }, { "epoch": 0.07, "learning_rate": 4.8885070685374665e-05, "loss": 0.0594, "step": 990500 }, { "epoch": 0.07, "learning_rate": 4.888450763924103e-05, "loss": 0.0583, "step": 991000 }, { "epoch": 0.07, "learning_rate": 4.888394571919966e-05, "loss": 0.0646, "step": 991500 }, { "epoch": 0.07, "learning_rate": 4.8883382673066025e-05, "loss": 0.0625, "step": 992000 }, { "epoch": 0.07, "learning_rate": 4.888281962693239e-05, "loss": 0.0574, "step": 992500 }, { "epoch": 0.07, "learning_rate": 4.888225658079876e-05, "loss": 0.0596, "step": 993000 }, { "epoch": 0.07, "learning_rate": 4.888169353466512e-05, "loss": 0.0658, "step": 993500 }, { "epoch": 0.07, "learning_rate": 4.888113048853149e-05, "loss": 0.0651, "step": 994000 }, { "epoch": 0.07, "learning_rate": 4.888056744239785e-05, "loss": 0.0606, "step": 994500 }, { "epoch": 0.07, "learning_rate": 4.888000439626421e-05, "loss": 0.0657, "step": 995000 }, { "epoch": 0.07, "learning_rate": 4.887944360231511e-05, "loss": 0.063, "step": 995500 }, { "epoch": 0.07, "learning_rate": 4.887888055618148e-05, "loss": 0.0616, "step": 996000 }, { "epoch": 0.07, "learning_rate": 4.8878317510047844e-05, "loss": 0.0635, "step": 996500 }, { "epoch": 0.07, "learning_rate": 4.887775446391421e-05, "loss": 0.0698, "step": 997000 }, { "epoch": 0.07, "learning_rate": 4.887719141778057e-05, "loss": 0.0617, "step": 997500 }, { "epoch": 0.07, "learning_rate": 4.8876628371646936e-05, "loss": 0.0613, "step": 998000 }, { "epoch": 0.07, "learning_rate": 4.887606645160557e-05, "loss": 0.0632, "step": 998500 }, { "epoch": 0.07, "learning_rate": 4.887550340547193e-05, "loss": 0.0596, "step": 999000 }, { "epoch": 0.07, "learning_rate": 4.8874940359338296e-05, "loss": 0.0618, "step": 999500 }, { "epoch": 0.07, "learning_rate": 4.887437731320466e-05, "loss": 0.063, "step": 1000000 }, { "epoch": 0.07, "learning_rate": 4.8873814267071024e-05, "loss": 0.0624, "step": 1000500 }, { "epoch": 0.07, "learning_rate": 4.887325122093739e-05, "loss": 0.0575, "step": 1001000 }, { "epoch": 0.07, "learning_rate": 4.887268930089603e-05, "loss": 0.0663, "step": 1001500 }, { "epoch": 0.07, "learning_rate": 4.8872126254762384e-05, "loss": 0.0649, "step": 1002000 }, { "epoch": 0.07, "learning_rate": 4.887156320862875e-05, "loss": 0.0619, "step": 1002500 }, { "epoch": 0.07, "learning_rate": 4.887100016249512e-05, "loss": 0.0629, "step": 1003000 }, { "epoch": 0.07, "learning_rate": 4.887043711636148e-05, "loss": 0.0625, "step": 1003500 }, { "epoch": 0.07, "learning_rate": 4.8869875196320115e-05, "loss": 0.0612, "step": 1004000 }, { "epoch": 0.07, "learning_rate": 4.886931215018648e-05, "loss": 0.0583, "step": 1004500 }, { "epoch": 0.07, "learning_rate": 4.8868749104052843e-05, "loss": 0.0614, "step": 1005000 }, { "epoch": 0.07, "learning_rate": 4.886818605791921e-05, "loss": 0.0648, "step": 1005500 }, { "epoch": 0.07, "learning_rate": 4.886762301178557e-05, "loss": 0.0692, "step": 1006000 }, { "epoch": 0.07, "learning_rate": 4.8867059965651935e-05, "loss": 0.0601, "step": 1006500 }, { "epoch": 0.07, "learning_rate": 4.886649804561057e-05, "loss": 0.0589, "step": 1007000 }, { "epoch": 0.07, "learning_rate": 4.886593499947693e-05, "loss": 0.0629, "step": 1007500 }, { "epoch": 0.07, "learning_rate": 4.8865371953343296e-05, "loss": 0.061, "step": 1008000 }, { "epoch": 0.07, "learning_rate": 4.8864808907209666e-05, "loss": 0.068, "step": 1008500 }, { "epoch": 0.07, "learning_rate": 4.8864245861076024e-05, "loss": 0.0657, "step": 1009000 }, { "epoch": 0.07, "learning_rate": 4.886368394103466e-05, "loss": 0.059, "step": 1009500 }, { "epoch": 0.07, "learning_rate": 4.886312089490102e-05, "loss": 0.0621, "step": 1010000 }, { "epoch": 0.07, "learning_rate": 4.886255784876739e-05, "loss": 0.0663, "step": 1010500 }, { "epoch": 0.07, "learning_rate": 4.8861994802633755e-05, "loss": 0.0673, "step": 1011000 }, { "epoch": 0.07, "learning_rate": 4.886143175650011e-05, "loss": 0.0607, "step": 1011500 }, { "epoch": 0.07, "learning_rate": 4.886086983645875e-05, "loss": 0.067, "step": 1012000 }, { "epoch": 0.07, "learning_rate": 4.886030679032511e-05, "loss": 0.0636, "step": 1012500 }, { "epoch": 0.07, "learning_rate": 4.885974374419148e-05, "loss": 0.0663, "step": 1013000 }, { "epoch": 0.07, "learning_rate": 4.885918069805784e-05, "loss": 0.0604, "step": 1013500 }, { "epoch": 0.07, "learning_rate": 4.885861765192421e-05, "loss": 0.0636, "step": 1014000 }, { "epoch": 0.07, "learning_rate": 4.885805460579057e-05, "loss": 0.0673, "step": 1014500 }, { "epoch": 0.07, "learning_rate": 4.8857491559656935e-05, "loss": 0.067, "step": 1015000 }, { "epoch": 0.07, "learning_rate": 4.885692963961557e-05, "loss": 0.0628, "step": 1015500 }, { "epoch": 0.07, "learning_rate": 4.885636659348193e-05, "loss": 0.06, "step": 1016000 }, { "epoch": 0.07, "learning_rate": 4.8855803547348295e-05, "loss": 0.0631, "step": 1016500 }, { "epoch": 0.07, "learning_rate": 4.885524050121466e-05, "loss": 0.0628, "step": 1017000 }, { "epoch": 0.07, "learning_rate": 4.885467858117329e-05, "loss": 0.0598, "step": 1017500 }, { "epoch": 0.07, "learning_rate": 4.8854115535039655e-05, "loss": 0.0635, "step": 1018000 }, { "epoch": 0.07, "learning_rate": 4.8853552488906026e-05, "loss": 0.0575, "step": 1018500 }, { "epoch": 0.07, "learning_rate": 4.885298944277239e-05, "loss": 0.0602, "step": 1019000 }, { "epoch": 0.07, "learning_rate": 4.8852426396638754e-05, "loss": 0.0612, "step": 1019500 }, { "epoch": 0.07, "learning_rate": 4.885186335050512e-05, "loss": 0.0659, "step": 1020000 }, { "epoch": 0.07, "learning_rate": 4.885130143046375e-05, "loss": 0.0601, "step": 1020500 }, { "epoch": 0.07, "learning_rate": 4.8850738384330114e-05, "loss": 0.0568, "step": 1021000 }, { "epoch": 0.07, "learning_rate": 4.885017533819648e-05, "loss": 0.0628, "step": 1021500 }, { "epoch": 0.07, "learning_rate": 4.884961229206284e-05, "loss": 0.0606, "step": 1022000 }, { "epoch": 0.07, "learning_rate": 4.8849050372021474e-05, "loss": 0.0609, "step": 1022500 }, { "epoch": 0.07, "learning_rate": 4.884848732588784e-05, "loss": 0.0658, "step": 1023000 }, { "epoch": 0.07, "learning_rate": 4.88479242797542e-05, "loss": 0.0649, "step": 1023500 }, { "epoch": 0.07, "learning_rate": 4.884736123362057e-05, "loss": 0.0589, "step": 1024000 }, { "epoch": 0.07, "learning_rate": 4.88467993135792e-05, "loss": 0.0632, "step": 1024500 }, { "epoch": 0.07, "learning_rate": 4.884623626744557e-05, "loss": 0.0618, "step": 1025000 }, { "epoch": 0.07, "learning_rate": 4.8845673221311927e-05, "loss": 0.0577, "step": 1025500 }, { "epoch": 0.07, "learning_rate": 4.884511017517829e-05, "loss": 0.0618, "step": 1026000 }, { "epoch": 0.07, "learning_rate": 4.884454712904466e-05, "loss": 0.0565, "step": 1026500 }, { "epoch": 0.07, "learning_rate": 4.884398408291102e-05, "loss": 0.061, "step": 1027000 }, { "epoch": 0.07, "learning_rate": 4.884342103677739e-05, "loss": 0.0596, "step": 1027500 }, { "epoch": 0.07, "learning_rate": 4.8842859116736015e-05, "loss": 0.062, "step": 1028000 }, { "epoch": 0.07, "learning_rate": 4.8842296070602386e-05, "loss": 0.0614, "step": 1028500 }, { "epoch": 0.07, "learning_rate": 4.884173302446875e-05, "loss": 0.0605, "step": 1029000 }, { "epoch": 0.07, "learning_rate": 4.8841169978335114e-05, "loss": 0.0659, "step": 1029500 }, { "epoch": 0.07, "learning_rate": 4.884060693220148e-05, "loss": 0.0695, "step": 1030000 }, { "epoch": 0.07, "learning_rate": 4.884004501216011e-05, "loss": 0.0675, "step": 1030500 }, { "epoch": 0.07, "learning_rate": 4.8839481966026474e-05, "loss": 0.062, "step": 1031000 }, { "epoch": 0.07, "learning_rate": 4.883891891989284e-05, "loss": 0.065, "step": 1031500 }, { "epoch": 0.07, "learning_rate": 4.883835587375921e-05, "loss": 0.0645, "step": 1032000 }, { "epoch": 0.07, "learning_rate": 4.8837793953717834e-05, "loss": 0.0601, "step": 1032500 }, { "epoch": 0.07, "learning_rate": 4.8837230907584205e-05, "loss": 0.0573, "step": 1033000 }, { "epoch": 0.07, "learning_rate": 4.883666786145056e-05, "loss": 0.0602, "step": 1033500 }, { "epoch": 0.07, "learning_rate": 4.883610481531693e-05, "loss": 0.0645, "step": 1034000 }, { "epoch": 0.07, "learning_rate": 4.88355417691833e-05, "loss": 0.0575, "step": 1034500 }, { "epoch": 0.07, "learning_rate": 4.883497984914193e-05, "loss": 0.0602, "step": 1035000 }, { "epoch": 0.07, "learning_rate": 4.883441680300829e-05, "loss": 0.0623, "step": 1035500 }, { "epoch": 0.07, "learning_rate": 4.883385375687465e-05, "loss": 0.0592, "step": 1036000 }, { "epoch": 0.07, "learning_rate": 4.883329071074102e-05, "loss": 0.0663, "step": 1036500 }, { "epoch": 0.07, "learning_rate": 4.8832727664607385e-05, "loss": 0.0661, "step": 1037000 }, { "epoch": 0.07, "learning_rate": 4.883216574456602e-05, "loss": 0.0584, "step": 1037500 }, { "epoch": 0.07, "learning_rate": 4.883160269843238e-05, "loss": 0.0637, "step": 1038000 }, { "epoch": 0.07, "learning_rate": 4.8831039652298745e-05, "loss": 0.0617, "step": 1038500 }, { "epoch": 0.07, "learning_rate": 4.883047660616511e-05, "loss": 0.0639, "step": 1039000 }, { "epoch": 0.07, "learning_rate": 4.882991356003147e-05, "loss": 0.0613, "step": 1039500 }, { "epoch": 0.07, "learning_rate": 4.882935051389784e-05, "loss": 0.0599, "step": 1040000 }, { "epoch": 0.07, "learning_rate": 4.882878859385647e-05, "loss": 0.0607, "step": 1040500 }, { "epoch": 0.07, "learning_rate": 4.882822554772283e-05, "loss": 0.0598, "step": 1041000 }, { "epoch": 0.07, "learning_rate": 4.88276625015892e-05, "loss": 0.06, "step": 1041500 }, { "epoch": 0.07, "learning_rate": 4.882709945545557e-05, "loss": 0.0672, "step": 1042000 }, { "epoch": 0.07, "learning_rate": 4.8826536409321925e-05, "loss": 0.0599, "step": 1042500 }, { "epoch": 0.07, "learning_rate": 4.8825974489280564e-05, "loss": 0.0641, "step": 1043000 }, { "epoch": 0.07, "learning_rate": 4.882541144314692e-05, "loss": 0.0596, "step": 1043500 }, { "epoch": 0.07, "learning_rate": 4.882484952310556e-05, "loss": 0.0662, "step": 1044000 }, { "epoch": 0.07, "learning_rate": 4.882428647697192e-05, "loss": 0.0629, "step": 1044500 }, { "epoch": 0.07, "learning_rate": 4.882372343083829e-05, "loss": 0.0616, "step": 1045000 }, { "epoch": 0.07, "learning_rate": 4.882316038470465e-05, "loss": 0.0588, "step": 1045500 }, { "epoch": 0.07, "learning_rate": 4.8822597338571016e-05, "loss": 0.0621, "step": 1046000 }, { "epoch": 0.07, "learning_rate": 4.882203429243738e-05, "loss": 0.063, "step": 1046500 }, { "epoch": 0.07, "learning_rate": 4.8821471246303744e-05, "loss": 0.066, "step": 1047000 }, { "epoch": 0.07, "learning_rate": 4.8820908200170115e-05, "loss": 0.0606, "step": 1047500 }, { "epoch": 0.07, "learning_rate": 4.882034515403647e-05, "loss": 0.0578, "step": 1048000 }, { "epoch": 0.07, "learning_rate": 4.881978323399511e-05, "loss": 0.0638, "step": 1048500 }, { "epoch": 0.07, "learning_rate": 4.881922018786147e-05, "loss": 0.0619, "step": 1049000 }, { "epoch": 0.07, "learning_rate": 4.881865714172783e-05, "loss": 0.0663, "step": 1049500 }, { "epoch": 0.07, "learning_rate": 4.8818094095594203e-05, "loss": 0.0629, "step": 1050000 }, { "epoch": 0.07, "learning_rate": 4.8817532175552836e-05, "loss": 0.0637, "step": 1050500 }, { "epoch": 0.07, "learning_rate": 4.88169691294192e-05, "loss": 0.0657, "step": 1051000 }, { "epoch": 0.07, "learning_rate": 4.881640608328556e-05, "loss": 0.0588, "step": 1051500 }, { "epoch": 0.07, "learning_rate": 4.881584303715193e-05, "loss": 0.0602, "step": 1052000 }, { "epoch": 0.07, "learning_rate": 4.881528111711055e-05, "loss": 0.0625, "step": 1052500 }, { "epoch": 0.07, "learning_rate": 4.8814718070976924e-05, "loss": 0.0624, "step": 1053000 }, { "epoch": 0.07, "learning_rate": 4.881415502484329e-05, "loss": 0.0579, "step": 1053500 }, { "epoch": 0.07, "learning_rate": 4.881359197870965e-05, "loss": 0.0617, "step": 1054000 }, { "epoch": 0.07, "learning_rate": 4.8813028932576016e-05, "loss": 0.0615, "step": 1054500 }, { "epoch": 0.07, "learning_rate": 4.881246701253465e-05, "loss": 0.0577, "step": 1055000 }, { "epoch": 0.07, "learning_rate": 4.881190396640101e-05, "loss": 0.0611, "step": 1055500 }, { "epoch": 0.07, "learning_rate": 4.8811340920267376e-05, "loss": 0.0549, "step": 1056000 }, { "epoch": 0.07, "learning_rate": 4.881077787413374e-05, "loss": 0.0612, "step": 1056500 }, { "epoch": 0.07, "learning_rate": 4.8810214828000104e-05, "loss": 0.0587, "step": 1057000 }, { "epoch": 0.07, "learning_rate": 4.8809652907958736e-05, "loss": 0.0642, "step": 1057500 }, { "epoch": 0.07, "learning_rate": 4.88090898618251e-05, "loss": 0.0618, "step": 1058000 }, { "epoch": 0.07, "learning_rate": 4.880852681569147e-05, "loss": 0.0638, "step": 1058500 }, { "epoch": 0.07, "learning_rate": 4.880796376955783e-05, "loss": 0.0612, "step": 1059000 }, { "epoch": 0.07, "learning_rate": 4.880740184951647e-05, "loss": 0.0576, "step": 1059500 }, { "epoch": 0.07, "learning_rate": 4.8806838803382824e-05, "loss": 0.0595, "step": 1060000 }, { "epoch": 0.07, "learning_rate": 4.8806275757249195e-05, "loss": 0.0635, "step": 1060500 }, { "epoch": 0.07, "learning_rate": 4.880571271111556e-05, "loss": 0.0646, "step": 1061000 }, { "epoch": 0.07, "learning_rate": 4.880514966498192e-05, "loss": 0.0609, "step": 1061500 }, { "epoch": 0.07, "learning_rate": 4.880458661884829e-05, "loss": 0.0639, "step": 1062000 }, { "epoch": 0.07, "learning_rate": 4.880402469880692e-05, "loss": 0.0599, "step": 1062500 }, { "epoch": 0.07, "learning_rate": 4.880346165267328e-05, "loss": 0.0654, "step": 1063000 }, { "epoch": 0.07, "learning_rate": 4.880289860653965e-05, "loss": 0.0564, "step": 1063500 }, { "epoch": 0.07, "learning_rate": 4.880233556040602e-05, "loss": 0.0615, "step": 1064000 }, { "epoch": 0.07, "learning_rate": 4.8801773640364644e-05, "loss": 0.0644, "step": 1064500 }, { "epoch": 0.07, "learning_rate": 4.8801210594231014e-05, "loss": 0.063, "step": 1065000 }, { "epoch": 0.07, "learning_rate": 4.880064754809737e-05, "loss": 0.0638, "step": 1065500 }, { "epoch": 0.07, "learning_rate": 4.8800084501963736e-05, "loss": 0.059, "step": 1066000 }, { "epoch": 0.07, "learning_rate": 4.8799521455830106e-05, "loss": 0.0626, "step": 1066500 }, { "epoch": 0.07, "learning_rate": 4.879895953578874e-05, "loss": 0.0604, "step": 1067000 }, { "epoch": 0.07, "learning_rate": 4.87983964896551e-05, "loss": 0.0672, "step": 1067500 }, { "epoch": 0.07, "learning_rate": 4.879783344352146e-05, "loss": 0.0599, "step": 1068000 }, { "epoch": 0.07, "learning_rate": 4.879727039738783e-05, "loss": 0.0589, "step": 1068500 }, { "epoch": 0.07, "learning_rate": 4.8796707351254195e-05, "loss": 0.0601, "step": 1069000 }, { "epoch": 0.07, "learning_rate": 4.879614430512056e-05, "loss": 0.063, "step": 1069500 }, { "epoch": 0.07, "learning_rate": 4.879558238507919e-05, "loss": 0.0599, "step": 1070000 }, { "epoch": 0.07, "learning_rate": 4.8795019338945555e-05, "loss": 0.0616, "step": 1070500 }, { "epoch": 0.07, "learning_rate": 4.879445629281192e-05, "loss": 0.0624, "step": 1071000 }, { "epoch": 0.07, "learning_rate": 4.879389324667828e-05, "loss": 0.0604, "step": 1071500 }, { "epoch": 0.07, "learning_rate": 4.879333020054465e-05, "loss": 0.0626, "step": 1072000 }, { "epoch": 0.07, "learning_rate": 4.879276715441101e-05, "loss": 0.0624, "step": 1072500 }, { "epoch": 0.07, "learning_rate": 4.879220410827738e-05, "loss": 0.0588, "step": 1073000 }, { "epoch": 0.07, "learning_rate": 4.879164218823601e-05, "loss": 0.0608, "step": 1073500 }, { "epoch": 0.07, "learning_rate": 4.879107914210238e-05, "loss": 0.0599, "step": 1074000 }, { "epoch": 0.07, "learning_rate": 4.8790516095968735e-05, "loss": 0.0654, "step": 1074500 }, { "epoch": 0.07, "learning_rate": 4.87899530498351e-05, "loss": 0.0593, "step": 1075000 }, { "epoch": 0.07, "learning_rate": 4.878939112979373e-05, "loss": 0.0644, "step": 1075500 }, { "epoch": 0.07, "learning_rate": 4.87888280836601e-05, "loss": 0.0614, "step": 1076000 }, { "epoch": 0.07, "learning_rate": 4.8788265037526466e-05, "loss": 0.0616, "step": 1076500 }, { "epoch": 0.07, "learning_rate": 4.878770199139283e-05, "loss": 0.0592, "step": 1077000 }, { "epoch": 0.07, "learning_rate": 4.878714007135146e-05, "loss": 0.0628, "step": 1077500 }, { "epoch": 0.07, "learning_rate": 4.8786577025217826e-05, "loss": 0.0657, "step": 1078000 }, { "epoch": 0.07, "learning_rate": 4.878601397908419e-05, "loss": 0.06, "step": 1078500 }, { "epoch": 0.07, "learning_rate": 4.8785450932950554e-05, "loss": 0.061, "step": 1079000 }, { "epoch": 0.07, "learning_rate": 4.8784889012909186e-05, "loss": 0.0611, "step": 1079500 }, { "epoch": 0.07, "learning_rate": 4.878432596677555e-05, "loss": 0.066, "step": 1080000 }, { "epoch": 0.07, "learning_rate": 4.878376292064192e-05, "loss": 0.0591, "step": 1080500 }, { "epoch": 0.07, "learning_rate": 4.878319987450828e-05, "loss": 0.062, "step": 1081000 }, { "epoch": 0.07, "learning_rate": 4.878263682837464e-05, "loss": 0.0611, "step": 1081500 }, { "epoch": 0.07, "learning_rate": 4.8782074908333275e-05, "loss": 0.0614, "step": 1082000 }, { "epoch": 0.07, "learning_rate": 4.878151186219964e-05, "loss": 0.0612, "step": 1082500 }, { "epoch": 0.07, "learning_rate": 4.878094881606601e-05, "loss": 0.06, "step": 1083000 }, { "epoch": 0.07, "learning_rate": 4.8780385769932367e-05, "loss": 0.064, "step": 1083500 }, { "epoch": 0.07, "learning_rate": 4.877982272379874e-05, "loss": 0.0548, "step": 1084000 }, { "epoch": 0.07, "learning_rate": 4.87792596776651e-05, "loss": 0.0607, "step": 1084500 }, { "epoch": 0.07, "learning_rate": 4.8778696631531465e-05, "loss": 0.0616, "step": 1085000 }, { "epoch": 0.07, "learning_rate": 4.87781347114901e-05, "loss": 0.0603, "step": 1085500 }, { "epoch": 0.07, "learning_rate": 4.877757166535646e-05, "loss": 0.058, "step": 1086000 }, { "epoch": 0.07, "learning_rate": 4.8777008619222825e-05, "loss": 0.0599, "step": 1086500 }, { "epoch": 0.07, "learning_rate": 4.877644557308919e-05, "loss": 0.0607, "step": 1087000 }, { "epoch": 0.07, "learning_rate": 4.8775882526955553e-05, "loss": 0.0584, "step": 1087500 }, { "epoch": 0.07, "learning_rate": 4.8775320606914186e-05, "loss": 0.0623, "step": 1088000 }, { "epoch": 0.07, "learning_rate": 4.877475756078055e-05, "loss": 0.0642, "step": 1088500 }, { "epoch": 0.07, "learning_rate": 4.8774194514646914e-05, "loss": 0.0616, "step": 1089000 }, { "epoch": 0.07, "learning_rate": 4.8773631468513284e-05, "loss": 0.0583, "step": 1089500 }, { "epoch": 0.07, "learning_rate": 4.877306842237965e-05, "loss": 0.0629, "step": 1090000 }, { "epoch": 0.07, "learning_rate": 4.8772505376246006e-05, "loss": 0.0613, "step": 1090500 }, { "epoch": 0.07, "learning_rate": 4.8771942330112376e-05, "loss": 0.0563, "step": 1091000 }, { "epoch": 0.07, "learning_rate": 4.877137928397874e-05, "loss": 0.0603, "step": 1091500 }, { "epoch": 0.07, "learning_rate": 4.877081736393737e-05, "loss": 0.0542, "step": 1092000 }, { "epoch": 0.07, "learning_rate": 4.8770255443896e-05, "loss": 0.0611, "step": 1092500 }, { "epoch": 0.07, "learning_rate": 4.876969239776237e-05, "loss": 0.0616, "step": 1093000 }, { "epoch": 0.07, "learning_rate": 4.876912935162873e-05, "loss": 0.0648, "step": 1093500 }, { "epoch": 0.07, "learning_rate": 4.8768567431587365e-05, "loss": 0.0655, "step": 1094000 }, { "epoch": 0.07, "learning_rate": 4.876800438545373e-05, "loss": 0.0595, "step": 1094500 }, { "epoch": 0.07, "learning_rate": 4.876744133932009e-05, "loss": 0.0616, "step": 1095000 }, { "epoch": 0.07, "learning_rate": 4.876687829318646e-05, "loss": 0.0605, "step": 1095500 }, { "epoch": 0.07, "learning_rate": 4.876631524705282e-05, "loss": 0.0611, "step": 1096000 }, { "epoch": 0.07, "learning_rate": 4.8765752200919185e-05, "loss": 0.061, "step": 1096500 }, { "epoch": 0.07, "learning_rate": 4.876518915478555e-05, "loss": 0.0604, "step": 1097000 }, { "epoch": 0.07, "learning_rate": 4.876462610865192e-05, "loss": 0.0579, "step": 1097500 }, { "epoch": 0.07, "learning_rate": 4.8764064188610545e-05, "loss": 0.0592, "step": 1098000 }, { "epoch": 0.07, "learning_rate": 4.8763501142476916e-05, "loss": 0.0644, "step": 1098500 }, { "epoch": 0.07, "learning_rate": 4.876293809634327e-05, "loss": 0.0605, "step": 1099000 }, { "epoch": 0.07, "learning_rate": 4.8762375050209644e-05, "loss": 0.0617, "step": 1099500 }, { "epoch": 0.07, "learning_rate": 4.876181200407601e-05, "loss": 0.0588, "step": 1100000 }, { "epoch": 0.07, "learning_rate": 4.8761248957942365e-05, "loss": 0.0615, "step": 1100500 }, { "epoch": 0.07, "learning_rate": 4.8760685911808736e-05, "loss": 0.0585, "step": 1101000 }, { "epoch": 0.07, "learning_rate": 4.876012399176736e-05, "loss": 0.0641, "step": 1101500 }, { "epoch": 0.07, "learning_rate": 4.875956094563373e-05, "loss": 0.0588, "step": 1102000 }, { "epoch": 0.07, "learning_rate": 4.8758997899500096e-05, "loss": 0.0593, "step": 1102500 }, { "epoch": 0.07, "learning_rate": 4.875843597945873e-05, "loss": 0.0591, "step": 1103000 }, { "epoch": 0.07, "learning_rate": 4.875787293332509e-05, "loss": 0.0597, "step": 1103500 }, { "epoch": 0.07, "learning_rate": 4.8757309887191456e-05, "loss": 0.0617, "step": 1104000 }, { "epoch": 0.07, "learning_rate": 4.875674684105782e-05, "loss": 0.0613, "step": 1104500 }, { "epoch": 0.07, "learning_rate": 4.8756183794924184e-05, "loss": 0.0616, "step": 1105000 }, { "epoch": 0.07, "learning_rate": 4.8755620748790555e-05, "loss": 0.0607, "step": 1105500 }, { "epoch": 0.07, "learning_rate": 4.875505770265691e-05, "loss": 0.0612, "step": 1106000 }, { "epoch": 0.07, "learning_rate": 4.875449465652328e-05, "loss": 0.0593, "step": 1106500 }, { "epoch": 0.07, "learning_rate": 4.875393161038965e-05, "loss": 0.0594, "step": 1107000 }, { "epoch": 0.07, "learning_rate": 4.875336856425601e-05, "loss": 0.0592, "step": 1107500 }, { "epoch": 0.07, "learning_rate": 4.875280664421464e-05, "loss": 0.0601, "step": 1108000 }, { "epoch": 0.07, "learning_rate": 4.875224359808101e-05, "loss": 0.0633, "step": 1108500 }, { "epoch": 0.07, "learning_rate": 4.875168055194737e-05, "loss": 0.0602, "step": 1109000 }, { "epoch": 0.07, "learning_rate": 4.8751117505813735e-05, "loss": 0.0584, "step": 1109500 }, { "epoch": 0.07, "learning_rate": 4.87505544596801e-05, "loss": 0.0633, "step": 1110000 }, { "epoch": 0.08, "learning_rate": 4.874999253963873e-05, "loss": 0.0636, "step": 1110500 }, { "epoch": 0.08, "learning_rate": 4.8749429493505096e-05, "loss": 0.0612, "step": 1111000 }, { "epoch": 0.08, "learning_rate": 4.874886644737146e-05, "loss": 0.0581, "step": 1111500 }, { "epoch": 0.08, "learning_rate": 4.874830340123783e-05, "loss": 0.0608, "step": 1112000 }, { "epoch": 0.08, "learning_rate": 4.874774035510419e-05, "loss": 0.0609, "step": 1112500 }, { "epoch": 0.08, "learning_rate": 4.8747178435062827e-05, "loss": 0.0617, "step": 1113000 }, { "epoch": 0.08, "learning_rate": 4.8746615388929184e-05, "loss": 0.0581, "step": 1113500 }, { "epoch": 0.08, "learning_rate": 4.874605234279555e-05, "loss": 0.0541, "step": 1114000 }, { "epoch": 0.08, "learning_rate": 4.874548929666192e-05, "loss": 0.0574, "step": 1114500 }, { "epoch": 0.08, "learning_rate": 4.8744926250528276e-05, "loss": 0.0593, "step": 1115000 }, { "epoch": 0.08, "learning_rate": 4.8744363204394647e-05, "loss": 0.0668, "step": 1115500 }, { "epoch": 0.08, "learning_rate": 4.874380128435327e-05, "loss": 0.0619, "step": 1116000 }, { "epoch": 0.08, "learning_rate": 4.874323823821964e-05, "loss": 0.0645, "step": 1116500 }, { "epoch": 0.08, "learning_rate": 4.874267519208601e-05, "loss": 0.059, "step": 1117000 }, { "epoch": 0.08, "learning_rate": 4.874211214595237e-05, "loss": 0.059, "step": 1117500 }, { "epoch": 0.08, "learning_rate": 4.8741549099818735e-05, "loss": 0.0604, "step": 1118000 }, { "epoch": 0.08, "learning_rate": 4.874098717977737e-05, "loss": 0.0666, "step": 1118500 }, { "epoch": 0.08, "learning_rate": 4.874042413364373e-05, "loss": 0.0634, "step": 1119000 }, { "epoch": 0.08, "learning_rate": 4.8739861087510095e-05, "loss": 0.0572, "step": 1119500 }, { "epoch": 0.08, "learning_rate": 4.8739298041376466e-05, "loss": 0.0582, "step": 1120000 }, { "epoch": 0.08, "learning_rate": 4.873873499524282e-05, "loss": 0.0589, "step": 1120500 }, { "epoch": 0.08, "learning_rate": 4.873817307520146e-05, "loss": 0.0585, "step": 1121000 }, { "epoch": 0.08, "learning_rate": 4.873761002906782e-05, "loss": 0.0575, "step": 1121500 }, { "epoch": 0.08, "learning_rate": 4.873704698293419e-05, "loss": 0.0634, "step": 1122000 }, { "epoch": 0.08, "learning_rate": 4.8736483936800554e-05, "loss": 0.0581, "step": 1122500 }, { "epoch": 0.08, "learning_rate": 4.873592089066691e-05, "loss": 0.0616, "step": 1123000 }, { "epoch": 0.08, "learning_rate": 4.873535897062555e-05, "loss": 0.0612, "step": 1123500 }, { "epoch": 0.08, "learning_rate": 4.873479592449191e-05, "loss": 0.0594, "step": 1124000 }, { "epoch": 0.08, "learning_rate": 4.873423287835828e-05, "loss": 0.063, "step": 1124500 }, { "epoch": 0.08, "learning_rate": 4.873366983222464e-05, "loss": 0.0545, "step": 1125000 }, { "epoch": 0.08, "learning_rate": 4.8733106786091006e-05, "loss": 0.0622, "step": 1125500 }, { "epoch": 0.08, "learning_rate": 4.873254486604964e-05, "loss": 0.0611, "step": 1126000 }, { "epoch": 0.08, "learning_rate": 4.8731981819916e-05, "loss": 0.0563, "step": 1126500 }, { "epoch": 0.08, "learning_rate": 4.8731418773782366e-05, "loss": 0.0584, "step": 1127000 }, { "epoch": 0.08, "learning_rate": 4.873085572764873e-05, "loss": 0.0552, "step": 1127500 }, { "epoch": 0.08, "learning_rate": 4.8730292681515094e-05, "loss": 0.0551, "step": 1128000 }, { "epoch": 0.08, "learning_rate": 4.8729730761473726e-05, "loss": 0.0588, "step": 1128500 }, { "epoch": 0.08, "learning_rate": 4.872916771534009e-05, "loss": 0.0621, "step": 1129000 }, { "epoch": 0.08, "learning_rate": 4.8728604669206454e-05, "loss": 0.0612, "step": 1129500 }, { "epoch": 0.08, "learning_rate": 4.8728041623072825e-05, "loss": 0.061, "step": 1130000 }, { "epoch": 0.08, "learning_rate": 4.872747857693918e-05, "loss": 0.0593, "step": 1130500 }, { "epoch": 0.08, "learning_rate": 4.872691665689782e-05, "loss": 0.0569, "step": 1131000 }, { "epoch": 0.08, "learning_rate": 4.872635361076418e-05, "loss": 0.0591, "step": 1131500 }, { "epoch": 0.08, "learning_rate": 4.872579056463055e-05, "loss": 0.0574, "step": 1132000 }, { "epoch": 0.08, "learning_rate": 4.8725227518496913e-05, "loss": 0.0598, "step": 1132500 }, { "epoch": 0.08, "learning_rate": 4.872466447236328e-05, "loss": 0.0639, "step": 1133000 }, { "epoch": 0.08, "learning_rate": 4.872410142622964e-05, "loss": 0.0562, "step": 1133500 }, { "epoch": 0.08, "learning_rate": 4.8723539506188274e-05, "loss": 0.0606, "step": 1134000 }, { "epoch": 0.08, "learning_rate": 4.872297646005464e-05, "loss": 0.0619, "step": 1134500 }, { "epoch": 0.08, "learning_rate": 4.8722413413921e-05, "loss": 0.0578, "step": 1135000 }, { "epoch": 0.08, "learning_rate": 4.872185036778737e-05, "loss": 0.0593, "step": 1135500 }, { "epoch": 0.08, "learning_rate": 4.8721288447746e-05, "loss": 0.0599, "step": 1136000 }, { "epoch": 0.08, "learning_rate": 4.872072540161237e-05, "loss": 0.0568, "step": 1136500 }, { "epoch": 0.08, "learning_rate": 4.8720162355478726e-05, "loss": 0.0584, "step": 1137000 }, { "epoch": 0.08, "learning_rate": 4.871959930934509e-05, "loss": 0.0577, "step": 1137500 }, { "epoch": 0.08, "learning_rate": 4.871903626321146e-05, "loss": 0.0616, "step": 1138000 }, { "epoch": 0.08, "learning_rate": 4.871847434317009e-05, "loss": 0.0614, "step": 1138500 }, { "epoch": 0.08, "learning_rate": 4.871791129703646e-05, "loss": 0.0569, "step": 1139000 }, { "epoch": 0.08, "learning_rate": 4.8717348250902814e-05, "loss": 0.0596, "step": 1139500 }, { "epoch": 0.08, "learning_rate": 4.8716785204769185e-05, "loss": 0.0598, "step": 1140000 }, { "epoch": 0.08, "learning_rate": 4.871622215863555e-05, "loss": 0.0576, "step": 1140500 }, { "epoch": 0.08, "learning_rate": 4.871566023859418e-05, "loss": 0.058, "step": 1141000 }, { "epoch": 0.08, "learning_rate": 4.8715097192460545e-05, "loss": 0.0581, "step": 1141500 }, { "epoch": 0.08, "learning_rate": 4.871453414632691e-05, "loss": 0.0617, "step": 1142000 }, { "epoch": 0.08, "learning_rate": 4.871397110019327e-05, "loss": 0.0552, "step": 1142500 }, { "epoch": 0.08, "learning_rate": 4.871340805405964e-05, "loss": 0.0589, "step": 1143000 }, { "epoch": 0.08, "learning_rate": 4.871284613401827e-05, "loss": 0.058, "step": 1143500 }, { "epoch": 0.08, "learning_rate": 4.871228308788463e-05, "loss": 0.0575, "step": 1144000 }, { "epoch": 0.08, "learning_rate": 4.8711720041751e-05, "loss": 0.0659, "step": 1144500 }, { "epoch": 0.08, "learning_rate": 4.871115699561736e-05, "loss": 0.0616, "step": 1145000 }, { "epoch": 0.08, "learning_rate": 4.871059394948373e-05, "loss": 0.0594, "step": 1145500 }, { "epoch": 0.08, "learning_rate": 4.871003202944236e-05, "loss": 0.0584, "step": 1146000 }, { "epoch": 0.08, "learning_rate": 4.870946898330873e-05, "loss": 0.0599, "step": 1146500 }, { "epoch": 0.08, "learning_rate": 4.870890593717509e-05, "loss": 0.0614, "step": 1147000 }, { "epoch": 0.08, "learning_rate": 4.8708342891041456e-05, "loss": 0.0602, "step": 1147500 }, { "epoch": 0.08, "learning_rate": 4.870778097100009e-05, "loss": 0.0646, "step": 1148000 }, { "epoch": 0.08, "learning_rate": 4.870721792486645e-05, "loss": 0.059, "step": 1148500 }, { "epoch": 0.08, "learning_rate": 4.8706654878732816e-05, "loss": 0.0573, "step": 1149000 }, { "epoch": 0.08, "learning_rate": 4.870609183259918e-05, "loss": 0.0573, "step": 1149500 }, { "epoch": 0.08, "learning_rate": 4.8705528786465544e-05, "loss": 0.0598, "step": 1150000 }, { "epoch": 0.08, "learning_rate": 4.8704966866424177e-05, "loss": 0.0588, "step": 1150500 }, { "epoch": 0.08, "learning_rate": 4.870440382029054e-05, "loss": 0.0598, "step": 1151000 }, { "epoch": 0.08, "learning_rate": 4.8703840774156905e-05, "loss": 0.0616, "step": 1151500 }, { "epoch": 0.08, "learning_rate": 4.8703277728023275e-05, "loss": 0.0569, "step": 1152000 }, { "epoch": 0.08, "learning_rate": 4.87027158079819e-05, "loss": 0.0604, "step": 1152500 }, { "epoch": 0.08, "learning_rate": 4.870215276184827e-05, "loss": 0.0582, "step": 1153000 }, { "epoch": 0.08, "learning_rate": 4.870158971571463e-05, "loss": 0.0618, "step": 1153500 }, { "epoch": 0.08, "learning_rate": 4.870102666958099e-05, "loss": 0.0573, "step": 1154000 }, { "epoch": 0.08, "learning_rate": 4.8700463623447364e-05, "loss": 0.0612, "step": 1154500 }, { "epoch": 0.08, "learning_rate": 4.869990170340599e-05, "loss": 0.0607, "step": 1155000 }, { "epoch": 0.08, "learning_rate": 4.869933865727236e-05, "loss": 0.061, "step": 1155500 }, { "epoch": 0.08, "learning_rate": 4.869877561113872e-05, "loss": 0.0582, "step": 1156000 }, { "epoch": 0.08, "learning_rate": 4.869821256500509e-05, "loss": 0.0592, "step": 1156500 }, { "epoch": 0.08, "learning_rate": 4.869764951887145e-05, "loss": 0.0568, "step": 1157000 }, { "epoch": 0.08, "learning_rate": 4.8697087598830084e-05, "loss": 0.0583, "step": 1157500 }, { "epoch": 0.08, "learning_rate": 4.869652455269645e-05, "loss": 0.0605, "step": 1158000 }, { "epoch": 0.08, "learning_rate": 4.869596150656281e-05, "loss": 0.0562, "step": 1158500 }, { "epoch": 0.08, "learning_rate": 4.8695398460429176e-05, "loss": 0.06, "step": 1159000 }, { "epoch": 0.08, "learning_rate": 4.869483654038781e-05, "loss": 0.0572, "step": 1159500 }, { "epoch": 0.08, "learning_rate": 4.869427349425417e-05, "loss": 0.0605, "step": 1160000 }, { "epoch": 0.08, "learning_rate": 4.8693710448120536e-05, "loss": 0.0579, "step": 1160500 }, { "epoch": 0.08, "learning_rate": 4.86931474019869e-05, "loss": 0.0611, "step": 1161000 }, { "epoch": 0.08, "learning_rate": 4.8692584355853264e-05, "loss": 0.0606, "step": 1161500 }, { "epoch": 0.08, "learning_rate": 4.8692021309719635e-05, "loss": 0.0632, "step": 1162000 }, { "epoch": 0.08, "learning_rate": 4.8691458263586e-05, "loss": 0.0566, "step": 1162500 }, { "epoch": 0.08, "learning_rate": 4.869089634354463e-05, "loss": 0.0571, "step": 1163000 }, { "epoch": 0.08, "learning_rate": 4.8690333297410995e-05, "loss": 0.057, "step": 1163500 }, { "epoch": 0.08, "learning_rate": 4.868977025127735e-05, "loss": 0.0587, "step": 1164000 }, { "epoch": 0.08, "learning_rate": 4.868920720514372e-05, "loss": 0.0634, "step": 1164500 }, { "epoch": 0.08, "learning_rate": 4.868864415901009e-05, "loss": 0.0606, "step": 1165000 }, { "epoch": 0.08, "learning_rate": 4.868808111287645e-05, "loss": 0.0574, "step": 1165500 }, { "epoch": 0.08, "learning_rate": 4.8687518066742815e-05, "loss": 0.0613, "step": 1166000 }, { "epoch": 0.08, "learning_rate": 4.868695614670145e-05, "loss": 0.0555, "step": 1166500 }, { "epoch": 0.08, "learning_rate": 4.868639310056781e-05, "loss": 0.0569, "step": 1167000 }, { "epoch": 0.08, "learning_rate": 4.8685830054434175e-05, "loss": 0.0632, "step": 1167500 }, { "epoch": 0.08, "learning_rate": 4.868526700830054e-05, "loss": 0.0629, "step": 1168000 }, { "epoch": 0.08, "learning_rate": 4.86847039621669e-05, "loss": 0.0612, "step": 1168500 }, { "epoch": 0.08, "learning_rate": 4.8684142042125536e-05, "loss": 0.0625, "step": 1169000 }, { "epoch": 0.08, "learning_rate": 4.86835789959919e-05, "loss": 0.0595, "step": 1169500 }, { "epoch": 0.08, "learning_rate": 4.868301594985827e-05, "loss": 0.0604, "step": 1170000 }, { "epoch": 0.08, "learning_rate": 4.868245290372463e-05, "loss": 0.0577, "step": 1170500 }, { "epoch": 0.08, "learning_rate": 4.8681889857591e-05, "loss": 0.0611, "step": 1171000 }, { "epoch": 0.08, "learning_rate": 4.8681327937549624e-05, "loss": 0.0603, "step": 1171500 }, { "epoch": 0.08, "learning_rate": 4.8680764891415994e-05, "loss": 0.0602, "step": 1172000 }, { "epoch": 0.08, "learning_rate": 4.868020184528236e-05, "loss": 0.0553, "step": 1172500 }, { "epoch": 0.08, "learning_rate": 4.8679638799148716e-05, "loss": 0.0578, "step": 1173000 }, { "epoch": 0.08, "learning_rate": 4.8679076879107355e-05, "loss": 0.0595, "step": 1173500 }, { "epoch": 0.08, "learning_rate": 4.867851383297372e-05, "loss": 0.0621, "step": 1174000 }, { "epoch": 0.08, "learning_rate": 4.867795078684008e-05, "loss": 0.0634, "step": 1174500 }, { "epoch": 0.08, "learning_rate": 4.867738774070645e-05, "loss": 0.0603, "step": 1175000 }, { "epoch": 0.08, "learning_rate": 4.867682469457282e-05, "loss": 0.0589, "step": 1175500 }, { "epoch": 0.08, "learning_rate": 4.867626277453144e-05, "loss": 0.0562, "step": 1176000 }, { "epoch": 0.08, "learning_rate": 4.8675699728397814e-05, "loss": 0.0559, "step": 1176500 }, { "epoch": 0.08, "learning_rate": 4.867513668226417e-05, "loss": 0.0567, "step": 1177000 }, { "epoch": 0.08, "learning_rate": 4.8674573636130535e-05, "loss": 0.0586, "step": 1177500 }, { "epoch": 0.08, "learning_rate": 4.8674010589996906e-05, "loss": 0.0594, "step": 1178000 }, { "epoch": 0.08, "learning_rate": 4.867344754386326e-05, "loss": 0.0568, "step": 1178500 }, { "epoch": 0.08, "learning_rate": 4.86728856238219e-05, "loss": 0.0589, "step": 1179000 }, { "epoch": 0.08, "learning_rate": 4.867232257768826e-05, "loss": 0.0556, "step": 1179500 }, { "epoch": 0.08, "learning_rate": 4.867175953155463e-05, "loss": 0.0562, "step": 1180000 }, { "epoch": 0.08, "learning_rate": 4.8671196485420994e-05, "loss": 0.0605, "step": 1180500 }, { "epoch": 0.08, "learning_rate": 4.867063343928736e-05, "loss": 0.0586, "step": 1181000 }, { "epoch": 0.08, "learning_rate": 4.867007039315372e-05, "loss": 0.0588, "step": 1181500 }, { "epoch": 0.08, "learning_rate": 4.8669507347020086e-05, "loss": 0.0616, "step": 1182000 }, { "epoch": 0.08, "learning_rate": 4.866894430088645e-05, "loss": 0.0574, "step": 1182500 }, { "epoch": 0.08, "learning_rate": 4.866838350693735e-05, "loss": 0.0576, "step": 1183000 }, { "epoch": 0.08, "learning_rate": 4.8667820460803714e-05, "loss": 0.0608, "step": 1183500 }, { "epoch": 0.08, "learning_rate": 4.866725741467008e-05, "loss": 0.0606, "step": 1184000 }, { "epoch": 0.08, "learning_rate": 4.866669436853644e-05, "loss": 0.0564, "step": 1184500 }, { "epoch": 0.08, "learning_rate": 4.8666132448495074e-05, "loss": 0.0604, "step": 1185000 }, { "epoch": 0.08, "learning_rate": 4.866556940236144e-05, "loss": 0.0609, "step": 1185500 }, { "epoch": 0.08, "learning_rate": 4.86650063562278e-05, "loss": 0.06, "step": 1186000 }, { "epoch": 0.08, "learning_rate": 4.866444331009417e-05, "loss": 0.058, "step": 1186500 }, { "epoch": 0.08, "learning_rate": 4.86638813900528e-05, "loss": 0.0596, "step": 1187000 }, { "epoch": 0.08, "learning_rate": 4.866331834391917e-05, "loss": 0.0581, "step": 1187500 }, { "epoch": 0.08, "learning_rate": 4.866275529778553e-05, "loss": 0.0615, "step": 1188000 }, { "epoch": 0.08, "learning_rate": 4.86621922516519e-05, "loss": 0.0614, "step": 1188500 }, { "epoch": 0.08, "learning_rate": 4.866162920551826e-05, "loss": 0.0624, "step": 1189000 }, { "epoch": 0.08, "learning_rate": 4.866106615938462e-05, "loss": 0.0581, "step": 1189500 }, { "epoch": 0.08, "learning_rate": 4.866050311325099e-05, "loss": 0.0585, "step": 1190000 }, { "epoch": 0.08, "learning_rate": 4.8659940067117353e-05, "loss": 0.0619, "step": 1190500 }, { "epoch": 0.08, "learning_rate": 4.865937702098372e-05, "loss": 0.0621, "step": 1191000 }, { "epoch": 0.08, "learning_rate": 4.865881510094235e-05, "loss": 0.0648, "step": 1191500 }, { "epoch": 0.08, "learning_rate": 4.865825205480872e-05, "loss": 0.0594, "step": 1192000 }, { "epoch": 0.08, "learning_rate": 4.865768900867508e-05, "loss": 0.0547, "step": 1192500 }, { "epoch": 0.08, "learning_rate": 4.865712596254144e-05, "loss": 0.0577, "step": 1193000 }, { "epoch": 0.08, "learning_rate": 4.865656291640781e-05, "loss": 0.0554, "step": 1193500 }, { "epoch": 0.08, "learning_rate": 4.865599987027417e-05, "loss": 0.0568, "step": 1194000 }, { "epoch": 0.08, "learning_rate": 4.865543682414054e-05, "loss": 0.0572, "step": 1194500 }, { "epoch": 0.08, "learning_rate": 4.8654874904099166e-05, "loss": 0.0627, "step": 1195000 }, { "epoch": 0.08, "learning_rate": 4.8654311857965537e-05, "loss": 0.0611, "step": 1195500 }, { "epoch": 0.08, "learning_rate": 4.86537488118319e-05, "loss": 0.056, "step": 1196000 }, { "epoch": 0.08, "learning_rate": 4.8653185765698265e-05, "loss": 0.0587, "step": 1196500 }, { "epoch": 0.08, "learning_rate": 4.86526238456569e-05, "loss": 0.0575, "step": 1197000 }, { "epoch": 0.08, "learning_rate": 4.865206192561553e-05, "loss": 0.0612, "step": 1197500 }, { "epoch": 0.08, "learning_rate": 4.865149887948189e-05, "loss": 0.0571, "step": 1198000 }, { "epoch": 0.08, "learning_rate": 4.865093583334826e-05, "loss": 0.0601, "step": 1198500 }, { "epoch": 0.08, "learning_rate": 4.865037278721462e-05, "loss": 0.06, "step": 1199000 }, { "epoch": 0.08, "learning_rate": 4.8649809741080985e-05, "loss": 0.0574, "step": 1199500 }, { "epoch": 0.08, "learning_rate": 4.864924669494735e-05, "loss": 0.0585, "step": 1200000 }, { "epoch": 0.08, "learning_rate": 4.864868364881371e-05, "loss": 0.0593, "step": 1200500 }, { "epoch": 0.08, "learning_rate": 4.8648120602680084e-05, "loss": 0.0626, "step": 1201000 }, { "epoch": 0.08, "learning_rate": 4.864755755654644e-05, "loss": 0.0608, "step": 1201500 }, { "epoch": 0.08, "learning_rate": 4.8646994510412805e-05, "loss": 0.066, "step": 1202000 }, { "epoch": 0.08, "learning_rate": 4.8646431464279176e-05, "loss": 0.0551, "step": 1202500 }, { "epoch": 0.08, "learning_rate": 4.86458695442378e-05, "loss": 0.0543, "step": 1203000 }, { "epoch": 0.08, "learning_rate": 4.864530649810417e-05, "loss": 0.057, "step": 1203500 }, { "epoch": 0.08, "learning_rate": 4.8644743451970536e-05, "loss": 0.0593, "step": 1204000 }, { "epoch": 0.08, "learning_rate": 4.86441804058369e-05, "loss": 0.0575, "step": 1204500 }, { "epoch": 0.08, "learning_rate": 4.8643617359703264e-05, "loss": 0.057, "step": 1205000 }, { "epoch": 0.08, "learning_rate": 4.8643055439661896e-05, "loss": 0.0603, "step": 1205500 }, { "epoch": 0.08, "learning_rate": 4.864249239352826e-05, "loss": 0.0546, "step": 1206000 }, { "epoch": 0.08, "learning_rate": 4.8641929347394624e-05, "loss": 0.0581, "step": 1206500 }, { "epoch": 0.08, "learning_rate": 4.864136630126099e-05, "loss": 0.0606, "step": 1207000 }, { "epoch": 0.08, "learning_rate": 4.864080438121962e-05, "loss": 0.0642, "step": 1207500 }, { "epoch": 0.08, "learning_rate": 4.8640241335085984e-05, "loss": 0.0537, "step": 1208000 }, { "epoch": 0.08, "learning_rate": 4.863967828895235e-05, "loss": 0.0619, "step": 1208500 }, { "epoch": 0.08, "learning_rate": 4.863911524281872e-05, "loss": 0.0624, "step": 1209000 }, { "epoch": 0.08, "learning_rate": 4.8638553322777345e-05, "loss": 0.0612, "step": 1209500 }, { "epoch": 0.08, "learning_rate": 4.8637990276643715e-05, "loss": 0.0624, "step": 1210000 }, { "epoch": 0.08, "learning_rate": 4.863742723051007e-05, "loss": 0.061, "step": 1210500 }, { "epoch": 0.08, "learning_rate": 4.863686418437644e-05, "loss": 0.061, "step": 1211000 }, { "epoch": 0.08, "learning_rate": 4.863630113824281e-05, "loss": 0.0556, "step": 1211500 }, { "epoch": 0.08, "learning_rate": 4.863573921820144e-05, "loss": 0.0583, "step": 1212000 }, { "epoch": 0.08, "learning_rate": 4.8635176172067803e-05, "loss": 0.0579, "step": 1212500 }, { "epoch": 0.08, "learning_rate": 4.863461312593417e-05, "loss": 0.0574, "step": 1213000 }, { "epoch": 0.08, "learning_rate": 4.863405007980053e-05, "loss": 0.0626, "step": 1213500 }, { "epoch": 0.08, "learning_rate": 4.8633488159759164e-05, "loss": 0.0569, "step": 1214000 }, { "epoch": 0.08, "learning_rate": 4.863292511362553e-05, "loss": 0.0597, "step": 1214500 }, { "epoch": 0.08, "learning_rate": 4.863236206749189e-05, "loss": 0.0585, "step": 1215000 }, { "epoch": 0.08, "learning_rate": 4.8631799021358256e-05, "loss": 0.06, "step": 1215500 }, { "epoch": 0.08, "learning_rate": 4.863123710131689e-05, "loss": 0.0615, "step": 1216000 }, { "epoch": 0.08, "learning_rate": 4.863067405518325e-05, "loss": 0.0586, "step": 1216500 }, { "epoch": 0.08, "learning_rate": 4.8630111009049616e-05, "loss": 0.0608, "step": 1217000 }, { "epoch": 0.08, "learning_rate": 4.862954796291599e-05, "loss": 0.0567, "step": 1217500 }, { "epoch": 0.08, "learning_rate": 4.862898604287461e-05, "loss": 0.0591, "step": 1218000 }, { "epoch": 0.08, "learning_rate": 4.862842299674098e-05, "loss": 0.0585, "step": 1218500 }, { "epoch": 0.08, "learning_rate": 4.862785995060734e-05, "loss": 0.0618, "step": 1219000 }, { "epoch": 0.08, "learning_rate": 4.8627296904473704e-05, "loss": 0.0545, "step": 1219500 }, { "epoch": 0.08, "learning_rate": 4.8626734984432336e-05, "loss": 0.0604, "step": 1220000 }, { "epoch": 0.08, "learning_rate": 4.86261719382987e-05, "loss": 0.062, "step": 1220500 }, { "epoch": 0.08, "learning_rate": 4.862560889216507e-05, "loss": 0.0575, "step": 1221000 }, { "epoch": 0.08, "learning_rate": 4.8625045846031435e-05, "loss": 0.0624, "step": 1221500 }, { "epoch": 0.08, "learning_rate": 4.86244827998978e-05, "loss": 0.0622, "step": 1222000 }, { "epoch": 0.08, "learning_rate": 4.862391975376416e-05, "loss": 0.0622, "step": 1222500 }, { "epoch": 0.08, "learning_rate": 4.862335670763053e-05, "loss": 0.0626, "step": 1223000 }, { "epoch": 0.08, "learning_rate": 4.862279366149689e-05, "loss": 0.0571, "step": 1223500 }, { "epoch": 0.08, "learning_rate": 4.862223174145552e-05, "loss": 0.0595, "step": 1224000 }, { "epoch": 0.08, "learning_rate": 4.862166869532189e-05, "loss": 0.0603, "step": 1224500 }, { "epoch": 0.08, "learning_rate": 4.862110677528052e-05, "loss": 0.0602, "step": 1225000 }, { "epoch": 0.08, "learning_rate": 4.8620543729146883e-05, "loss": 0.0574, "step": 1225500 }, { "epoch": 0.08, "learning_rate": 4.861998068301325e-05, "loss": 0.0618, "step": 1226000 }, { "epoch": 0.08, "learning_rate": 4.861941763687962e-05, "loss": 0.0595, "step": 1226500 }, { "epoch": 0.08, "learning_rate": 4.8618854590745975e-05, "loss": 0.0594, "step": 1227000 }, { "epoch": 0.08, "learning_rate": 4.8618291544612346e-05, "loss": 0.0579, "step": 1227500 }, { "epoch": 0.08, "learning_rate": 4.861772849847871e-05, "loss": 0.0583, "step": 1228000 }, { "epoch": 0.08, "learning_rate": 4.861716545234507e-05, "loss": 0.061, "step": 1228500 }, { "epoch": 0.08, "learning_rate": 4.8616603532303706e-05, "loss": 0.0591, "step": 1229000 }, { "epoch": 0.08, "learning_rate": 4.8616040486170064e-05, "loss": 0.0655, "step": 1229500 }, { "epoch": 0.08, "learning_rate": 4.8615477440036434e-05, "loss": 0.058, "step": 1230000 }, { "epoch": 0.08, "learning_rate": 4.861491551999507e-05, "loss": 0.0625, "step": 1230500 }, { "epoch": 0.08, "learning_rate": 4.861435247386143e-05, "loss": 0.0581, "step": 1231000 }, { "epoch": 0.08, "learning_rate": 4.8613789427727795e-05, "loss": 0.0548, "step": 1231500 }, { "epoch": 0.08, "learning_rate": 4.861322638159416e-05, "loss": 0.0561, "step": 1232000 }, { "epoch": 0.08, "learning_rate": 4.861266333546052e-05, "loss": 0.0603, "step": 1232500 }, { "epoch": 0.08, "learning_rate": 4.8612100289326887e-05, "loss": 0.0577, "step": 1233000 }, { "epoch": 0.08, "learning_rate": 4.861153724319326e-05, "loss": 0.0585, "step": 1233500 }, { "epoch": 0.08, "learning_rate": 4.8610974197059615e-05, "loss": 0.0589, "step": 1234000 }, { "epoch": 0.08, "learning_rate": 4.8610412277018254e-05, "loss": 0.0595, "step": 1234500 }, { "epoch": 0.08, "learning_rate": 4.860984923088461e-05, "loss": 0.0592, "step": 1235000 }, { "epoch": 0.08, "learning_rate": 4.860928618475098e-05, "loss": 0.0562, "step": 1235500 }, { "epoch": 0.08, "learning_rate": 4.8608723138617346e-05, "loss": 0.0612, "step": 1236000 }, { "epoch": 0.08, "learning_rate": 4.860816009248371e-05, "loss": 0.0563, "step": 1236500 }, { "epoch": 0.08, "learning_rate": 4.8607597046350074e-05, "loss": 0.0567, "step": 1237000 }, { "epoch": 0.08, "learning_rate": 4.8607035126308706e-05, "loss": 0.0563, "step": 1237500 }, { "epoch": 0.08, "learning_rate": 4.860647208017507e-05, "loss": 0.0606, "step": 1238000 }, { "epoch": 0.08, "learning_rate": 4.8605909034041434e-05, "loss": 0.0593, "step": 1238500 }, { "epoch": 0.08, "learning_rate": 4.86053459879078e-05, "loss": 0.0583, "step": 1239000 }, { "epoch": 0.08, "learning_rate": 4.860478294177416e-05, "loss": 0.06, "step": 1239500 }, { "epoch": 0.08, "learning_rate": 4.8604221021732794e-05, "loss": 0.0532, "step": 1240000 }, { "epoch": 0.08, "learning_rate": 4.860365797559916e-05, "loss": 0.0539, "step": 1240500 }, { "epoch": 0.08, "learning_rate": 4.860309492946553e-05, "loss": 0.0583, "step": 1241000 }, { "epoch": 0.08, "learning_rate": 4.8602531883331886e-05, "loss": 0.0541, "step": 1241500 }, { "epoch": 0.08, "learning_rate": 4.8601969963290525e-05, "loss": 0.055, "step": 1242000 }, { "epoch": 0.08, "learning_rate": 4.860140691715688e-05, "loss": 0.0605, "step": 1242500 }, { "epoch": 0.08, "learning_rate": 4.8600843871023246e-05, "loss": 0.062, "step": 1243000 }, { "epoch": 0.08, "learning_rate": 4.860028082488962e-05, "loss": 0.0571, "step": 1243500 }, { "epoch": 0.08, "learning_rate": 4.859971890484825e-05, "loss": 0.0626, "step": 1244000 }, { "epoch": 0.08, "learning_rate": 4.859915585871461e-05, "loss": 0.0562, "step": 1244500 }, { "epoch": 0.08, "learning_rate": 4.859859281258097e-05, "loss": 0.0568, "step": 1245000 }, { "epoch": 0.08, "learning_rate": 4.859802976644734e-05, "loss": 0.0538, "step": 1245500 }, { "epoch": 0.08, "learning_rate": 4.8597466720313705e-05, "loss": 0.0565, "step": 1246000 }, { "epoch": 0.08, "learning_rate": 4.859690480027234e-05, "loss": 0.0556, "step": 1246500 }, { "epoch": 0.08, "learning_rate": 4.85963417541387e-05, "loss": 0.0595, "step": 1247000 }, { "epoch": 0.08, "learning_rate": 4.8595778708005065e-05, "loss": 0.063, "step": 1247500 }, { "epoch": 0.08, "learning_rate": 4.859521566187143e-05, "loss": 0.0609, "step": 1248000 }, { "epoch": 0.08, "learning_rate": 4.859465261573779e-05, "loss": 0.0553, "step": 1248500 }, { "epoch": 0.08, "learning_rate": 4.8594089569604164e-05, "loss": 0.0605, "step": 1249000 }, { "epoch": 0.08, "learning_rate": 4.859352652347052e-05, "loss": 0.0576, "step": 1249500 }, { "epoch": 0.08, "learning_rate": 4.859296460342916e-05, "loss": 0.0584, "step": 1250000 }, { "epoch": 0.08, "learning_rate": 4.859240155729552e-05, "loss": 0.0576, "step": 1250500 }, { "epoch": 0.08, "learning_rate": 4.859183851116189e-05, "loss": 0.0622, "step": 1251000 }, { "epoch": 0.08, "learning_rate": 4.859127546502825e-05, "loss": 0.0547, "step": 1251500 }, { "epoch": 0.08, "learning_rate": 4.8590713544986885e-05, "loss": 0.0588, "step": 1252000 }, { "epoch": 0.08, "learning_rate": 4.859015049885325e-05, "loss": 0.0583, "step": 1252500 }, { "epoch": 0.08, "learning_rate": 4.858958745271961e-05, "loss": 0.0571, "step": 1253000 }, { "epoch": 0.08, "learning_rate": 4.8589024406585977e-05, "loss": 0.0571, "step": 1253500 }, { "epoch": 0.08, "learning_rate": 4.858846136045234e-05, "loss": 0.0574, "step": 1254000 }, { "epoch": 0.08, "learning_rate": 4.8587898314318704e-05, "loss": 0.0588, "step": 1254500 }, { "epoch": 0.08, "learning_rate": 4.858733639427734e-05, "loss": 0.0621, "step": 1255000 }, { "epoch": 0.08, "learning_rate": 4.85867733481437e-05, "loss": 0.0599, "step": 1255500 }, { "epoch": 0.08, "learning_rate": 4.8586210302010065e-05, "loss": 0.0572, "step": 1256000 }, { "epoch": 0.08, "learning_rate": 4.858564725587643e-05, "loss": 0.0614, "step": 1256500 }, { "epoch": 0.08, "learning_rate": 4.858508420974279e-05, "loss": 0.0567, "step": 1257000 }, { "epoch": 0.08, "learning_rate": 4.858452228970143e-05, "loss": 0.0586, "step": 1257500 }, { "epoch": 0.08, "learning_rate": 4.858395924356779e-05, "loss": 0.056, "step": 1258000 }, { "epoch": 0.09, "learning_rate": 4.858339619743415e-05, "loss": 0.0587, "step": 1258500 }, { "epoch": 0.09, "learning_rate": 4.8582833151300524e-05, "loss": 0.0608, "step": 1259000 }, { "epoch": 0.09, "learning_rate": 4.858227123125915e-05, "loss": 0.058, "step": 1259500 }, { "epoch": 0.09, "learning_rate": 4.858170818512552e-05, "loss": 0.0594, "step": 1260000 }, { "epoch": 0.09, "learning_rate": 4.858114513899188e-05, "loss": 0.0566, "step": 1260500 }, { "epoch": 0.09, "learning_rate": 4.858058209285825e-05, "loss": 0.0598, "step": 1261000 }, { "epoch": 0.09, "learning_rate": 4.858002017281687e-05, "loss": 0.062, "step": 1261500 }, { "epoch": 0.09, "learning_rate": 4.8579457126683244e-05, "loss": 0.0598, "step": 1262000 }, { "epoch": 0.09, "learning_rate": 4.857889408054961e-05, "loss": 0.0572, "step": 1262500 }, { "epoch": 0.09, "learning_rate": 4.857833103441597e-05, "loss": 0.0581, "step": 1263000 }, { "epoch": 0.09, "learning_rate": 4.8577767988282336e-05, "loss": 0.0595, "step": 1263500 }, { "epoch": 0.09, "learning_rate": 4.857720606824097e-05, "loss": 0.0601, "step": 1264000 }, { "epoch": 0.09, "learning_rate": 4.857664302210733e-05, "loss": 0.0575, "step": 1264500 }, { "epoch": 0.09, "learning_rate": 4.8576081102065964e-05, "loss": 0.0569, "step": 1265000 }, { "epoch": 0.09, "learning_rate": 4.857551805593233e-05, "loss": 0.0563, "step": 1265500 }, { "epoch": 0.09, "learning_rate": 4.857495500979869e-05, "loss": 0.0593, "step": 1266000 }, { "epoch": 0.09, "learning_rate": 4.857439196366506e-05, "loss": 0.0555, "step": 1266500 }, { "epoch": 0.09, "learning_rate": 4.857382891753142e-05, "loss": 0.0597, "step": 1267000 }, { "epoch": 0.09, "learning_rate": 4.857326587139779e-05, "loss": 0.053, "step": 1267500 }, { "epoch": 0.09, "learning_rate": 4.8572702825264155e-05, "loss": 0.0542, "step": 1268000 }, { "epoch": 0.09, "learning_rate": 4.857213977913051e-05, "loss": 0.062, "step": 1268500 }, { "epoch": 0.09, "learning_rate": 4.857157785908915e-05, "loss": 0.0573, "step": 1269000 }, { "epoch": 0.09, "learning_rate": 4.857101481295551e-05, "loss": 0.0595, "step": 1269500 }, { "epoch": 0.09, "learning_rate": 4.857045176682188e-05, "loss": 0.061, "step": 1270000 }, { "epoch": 0.09, "learning_rate": 4.8569888720688243e-05, "loss": 0.0576, "step": 1270500 }, { "epoch": 0.09, "learning_rate": 4.856932567455461e-05, "loss": 0.0589, "step": 1271000 }, { "epoch": 0.09, "learning_rate": 4.856876375451324e-05, "loss": 0.0607, "step": 1271500 }, { "epoch": 0.09, "learning_rate": 4.8568200708379604e-05, "loss": 0.0561, "step": 1272000 }, { "epoch": 0.09, "learning_rate": 4.856763766224597e-05, "loss": 0.0515, "step": 1272500 }, { "epoch": 0.09, "learning_rate": 4.856707461611233e-05, "loss": 0.0558, "step": 1273000 }, { "epoch": 0.09, "learning_rate": 4.8566511569978696e-05, "loss": 0.063, "step": 1273500 }, { "epoch": 0.09, "learning_rate": 4.856594964993733e-05, "loss": 0.055, "step": 1274000 }, { "epoch": 0.09, "learning_rate": 4.856538660380369e-05, "loss": 0.0554, "step": 1274500 }, { "epoch": 0.09, "learning_rate": 4.8564823557670056e-05, "loss": 0.0589, "step": 1275000 }, { "epoch": 0.09, "learning_rate": 4.8564260511536427e-05, "loss": 0.0614, "step": 1275500 }, { "epoch": 0.09, "learning_rate": 4.856369859149505e-05, "loss": 0.0605, "step": 1276000 }, { "epoch": 0.09, "learning_rate": 4.856313554536142e-05, "loss": 0.0578, "step": 1276500 }, { "epoch": 0.09, "learning_rate": 4.856257249922778e-05, "loss": 0.0585, "step": 1277000 }, { "epoch": 0.09, "learning_rate": 4.856200945309415e-05, "loss": 0.0552, "step": 1277500 }, { "epoch": 0.09, "learning_rate": 4.8561447533052776e-05, "loss": 0.061, "step": 1278000 }, { "epoch": 0.09, "learning_rate": 4.856088448691915e-05, "loss": 0.0605, "step": 1278500 }, { "epoch": 0.09, "learning_rate": 4.856032144078551e-05, "loss": 0.0564, "step": 1279000 }, { "epoch": 0.09, "learning_rate": 4.8559758394651875e-05, "loss": 0.0543, "step": 1279500 }, { "epoch": 0.09, "learning_rate": 4.855919534851824e-05, "loss": 0.0588, "step": 1280000 }, { "epoch": 0.09, "learning_rate": 4.85586323023846e-05, "loss": 0.0607, "step": 1280500 }, { "epoch": 0.09, "learning_rate": 4.8558070382343235e-05, "loss": 0.0569, "step": 1281000 }, { "epoch": 0.09, "learning_rate": 4.85575073362096e-05, "loss": 0.0581, "step": 1281500 }, { "epoch": 0.09, "learning_rate": 4.855694429007597e-05, "loss": 0.06, "step": 1282000 }, { "epoch": 0.09, "learning_rate": 4.855638124394233e-05, "loss": 0.0532, "step": 1282500 }, { "epoch": 0.09, "learning_rate": 4.8555819323900966e-05, "loss": 0.0564, "step": 1283000 }, { "epoch": 0.09, "learning_rate": 4.8555256277767323e-05, "loss": 0.0593, "step": 1283500 }, { "epoch": 0.09, "learning_rate": 4.8554693231633694e-05, "loss": 0.0628, "step": 1284000 }, { "epoch": 0.09, "learning_rate": 4.855413018550006e-05, "loss": 0.0564, "step": 1284500 }, { "epoch": 0.09, "learning_rate": 4.855356826545869e-05, "loss": 0.0577, "step": 1285000 }, { "epoch": 0.09, "learning_rate": 4.8553005219325054e-05, "loss": 0.0568, "step": 1285500 }, { "epoch": 0.09, "learning_rate": 4.855244217319141e-05, "loss": 0.0552, "step": 1286000 }, { "epoch": 0.09, "learning_rate": 4.855187912705778e-05, "loss": 0.0561, "step": 1286500 }, { "epoch": 0.09, "learning_rate": 4.8551317207016415e-05, "loss": 0.0591, "step": 1287000 }, { "epoch": 0.09, "learning_rate": 4.855075416088278e-05, "loss": 0.0578, "step": 1287500 }, { "epoch": 0.09, "learning_rate": 4.855019111474914e-05, "loss": 0.0584, "step": 1288000 }, { "epoch": 0.09, "learning_rate": 4.8549628068615507e-05, "loss": 0.0537, "step": 1288500 }, { "epoch": 0.09, "learning_rate": 4.854906502248187e-05, "loss": 0.0621, "step": 1289000 }, { "epoch": 0.09, "learning_rate": 4.85485031024405e-05, "loss": 0.0604, "step": 1289500 }, { "epoch": 0.09, "learning_rate": 4.854794005630687e-05, "loss": 0.0576, "step": 1290000 }, { "epoch": 0.09, "learning_rate": 4.854737701017323e-05, "loss": 0.0576, "step": 1290500 }, { "epoch": 0.09, "learning_rate": 4.8546813964039595e-05, "loss": 0.06, "step": 1291000 }, { "epoch": 0.09, "learning_rate": 4.8546252043998234e-05, "loss": 0.0576, "step": 1291500 }, { "epoch": 0.09, "learning_rate": 4.854568899786459e-05, "loss": 0.0555, "step": 1292000 }, { "epoch": 0.09, "learning_rate": 4.8545125951730955e-05, "loss": 0.0614, "step": 1292500 }, { "epoch": 0.09, "learning_rate": 4.8544562905597326e-05, "loss": 0.0549, "step": 1293000 }, { "epoch": 0.09, "learning_rate": 4.854399985946368e-05, "loss": 0.0571, "step": 1293500 }, { "epoch": 0.09, "learning_rate": 4.8543436813330054e-05, "loss": 0.0547, "step": 1294000 }, { "epoch": 0.09, "learning_rate": 4.854287489328868e-05, "loss": 0.0563, "step": 1294500 }, { "epoch": 0.09, "learning_rate": 4.854231297324732e-05, "loss": 0.0601, "step": 1295000 }, { "epoch": 0.09, "learning_rate": 4.8541749927113675e-05, "loss": 0.0582, "step": 1295500 }, { "epoch": 0.09, "learning_rate": 4.8541186880980046e-05, "loss": 0.0578, "step": 1296000 }, { "epoch": 0.09, "learning_rate": 4.854062383484641e-05, "loss": 0.0592, "step": 1296500 }, { "epoch": 0.09, "learning_rate": 4.8540060788712774e-05, "loss": 0.0595, "step": 1297000 }, { "epoch": 0.09, "learning_rate": 4.853949774257914e-05, "loss": 0.0602, "step": 1297500 }, { "epoch": 0.09, "learning_rate": 4.85389346964455e-05, "loss": 0.059, "step": 1298000 }, { "epoch": 0.09, "learning_rate": 4.853837165031187e-05, "loss": 0.0605, "step": 1298500 }, { "epoch": 0.09, "learning_rate": 4.853780860417823e-05, "loss": 0.0542, "step": 1299000 }, { "epoch": 0.09, "learning_rate": 4.853724668413687e-05, "loss": 0.0618, "step": 1299500 }, { "epoch": 0.09, "learning_rate": 4.8536683638003226e-05, "loss": 0.0604, "step": 1300000 }, { "epoch": 0.09, "learning_rate": 4.85361205918696e-05, "loss": 0.0528, "step": 1300500 }, { "epoch": 0.09, "learning_rate": 4.853555754573596e-05, "loss": 0.0567, "step": 1301000 }, { "epoch": 0.09, "learning_rate": 4.853499449960232e-05, "loss": 0.0558, "step": 1301500 }, { "epoch": 0.09, "learning_rate": 4.853443145346869e-05, "loss": 0.0596, "step": 1302000 }, { "epoch": 0.09, "learning_rate": 4.8533869533427315e-05, "loss": 0.0587, "step": 1302500 }, { "epoch": 0.09, "learning_rate": 4.8533306487293685e-05, "loss": 0.061, "step": 1303000 }, { "epoch": 0.09, "learning_rate": 4.853274344116005e-05, "loss": 0.0583, "step": 1303500 }, { "epoch": 0.09, "learning_rate": 4.853218039502641e-05, "loss": 0.0528, "step": 1304000 }, { "epoch": 0.09, "learning_rate": 4.853161734889278e-05, "loss": 0.0543, "step": 1304500 }, { "epoch": 0.09, "learning_rate": 4.853105542885141e-05, "loss": 0.0552, "step": 1305000 }, { "epoch": 0.09, "learning_rate": 4.8530492382717773e-05, "loss": 0.0571, "step": 1305500 }, { "epoch": 0.09, "learning_rate": 4.852992933658414e-05, "loss": 0.0589, "step": 1306000 }, { "epoch": 0.09, "learning_rate": 4.85293662904505e-05, "loss": 0.0585, "step": 1306500 }, { "epoch": 0.09, "learning_rate": 4.8528803244316865e-05, "loss": 0.0586, "step": 1307000 }, { "epoch": 0.09, "learning_rate": 4.8528240198183236e-05, "loss": 0.0586, "step": 1307500 }, { "epoch": 0.09, "learning_rate": 4.852767827814186e-05, "loss": 0.0583, "step": 1308000 }, { "epoch": 0.09, "learning_rate": 4.852711523200823e-05, "loss": 0.0572, "step": 1308500 }, { "epoch": 0.09, "learning_rate": 4.8526552185874596e-05, "loss": 0.0568, "step": 1309000 }, { "epoch": 0.09, "learning_rate": 4.852598913974096e-05, "loss": 0.0632, "step": 1309500 }, { "epoch": 0.09, "learning_rate": 4.8525426093607324e-05, "loss": 0.0547, "step": 1310000 }, { "epoch": 0.09, "learning_rate": 4.852486417356596e-05, "loss": 0.0615, "step": 1310500 }, { "epoch": 0.09, "learning_rate": 4.852430112743232e-05, "loss": 0.0583, "step": 1311000 }, { "epoch": 0.09, "learning_rate": 4.8523738081298685e-05, "loss": 0.0557, "step": 1311500 }, { "epoch": 0.09, "learning_rate": 4.852317503516505e-05, "loss": 0.0618, "step": 1312000 }, { "epoch": 0.09, "learning_rate": 4.852261198903141e-05, "loss": 0.0599, "step": 1312500 }, { "epoch": 0.09, "learning_rate": 4.8522050068990045e-05, "loss": 0.0544, "step": 1313000 }, { "epoch": 0.09, "learning_rate": 4.852148702285641e-05, "loss": 0.0568, "step": 1313500 }, { "epoch": 0.09, "learning_rate": 4.852092397672278e-05, "loss": 0.0614, "step": 1314000 }, { "epoch": 0.09, "learning_rate": 4.852036093058914e-05, "loss": 0.0583, "step": 1314500 }, { "epoch": 0.09, "learning_rate": 4.85197978844555e-05, "loss": 0.0567, "step": 1315000 }, { "epoch": 0.09, "learning_rate": 4.851923596441413e-05, "loss": 0.0593, "step": 1315500 }, { "epoch": 0.09, "learning_rate": 4.85186729182805e-05, "loss": 0.0575, "step": 1316000 }, { "epoch": 0.09, "learning_rate": 4.851810987214687e-05, "loss": 0.0542, "step": 1316500 }, { "epoch": 0.09, "learning_rate": 4.8517546826013225e-05, "loss": 0.0575, "step": 1317000 }, { "epoch": 0.09, "learning_rate": 4.8516984905971864e-05, "loss": 0.0579, "step": 1317500 }, { "epoch": 0.09, "learning_rate": 4.8516422985930496e-05, "loss": 0.0581, "step": 1318000 }, { "epoch": 0.09, "learning_rate": 4.851585993979686e-05, "loss": 0.0537, "step": 1318500 }, { "epoch": 0.09, "learning_rate": 4.851529689366322e-05, "loss": 0.0612, "step": 1319000 }, { "epoch": 0.09, "learning_rate": 4.851473384752959e-05, "loss": 0.0574, "step": 1319500 }, { "epoch": 0.09, "learning_rate": 4.851417080139595e-05, "loss": 0.0568, "step": 1320000 }, { "epoch": 0.09, "learning_rate": 4.8513607755262316e-05, "loss": 0.0567, "step": 1320500 }, { "epoch": 0.09, "learning_rate": 4.851304470912868e-05, "loss": 0.0514, "step": 1321000 }, { "epoch": 0.09, "learning_rate": 4.8512481662995044e-05, "loss": 0.0514, "step": 1321500 }, { "epoch": 0.09, "learning_rate": 4.8511919742953676e-05, "loss": 0.0583, "step": 1322000 }, { "epoch": 0.09, "learning_rate": 4.851135669682004e-05, "loss": 0.0573, "step": 1322500 }, { "epoch": 0.09, "learning_rate": 4.8510793650686404e-05, "loss": 0.0571, "step": 1323000 }, { "epoch": 0.09, "learning_rate": 4.851023173064504e-05, "loss": 0.0616, "step": 1323500 }, { "epoch": 0.09, "learning_rate": 4.85096686845114e-05, "loss": 0.0579, "step": 1324000 }, { "epoch": 0.09, "learning_rate": 4.8509105638377765e-05, "loss": 0.0557, "step": 1324500 }, { "epoch": 0.09, "learning_rate": 4.8508542592244135e-05, "loss": 0.0598, "step": 1325000 }, { "epoch": 0.09, "learning_rate": 4.85079795461105e-05, "loss": 0.0516, "step": 1325500 }, { "epoch": 0.09, "learning_rate": 4.8507416499976857e-05, "loss": 0.0557, "step": 1326000 }, { "epoch": 0.09, "learning_rate": 4.850685345384323e-05, "loss": 0.0516, "step": 1326500 }, { "epoch": 0.09, "learning_rate": 4.850629040770959e-05, "loss": 0.0527, "step": 1327000 }, { "epoch": 0.09, "learning_rate": 4.8505727361575955e-05, "loss": 0.0599, "step": 1327500 }, { "epoch": 0.09, "learning_rate": 4.850516431544232e-05, "loss": 0.0556, "step": 1328000 }, { "epoch": 0.09, "learning_rate": 4.850460126930868e-05, "loss": 0.0625, "step": 1328500 }, { "epoch": 0.09, "learning_rate": 4.8504039349267316e-05, "loss": 0.055, "step": 1329000 }, { "epoch": 0.09, "learning_rate": 4.850347630313368e-05, "loss": 0.0591, "step": 1329500 }, { "epoch": 0.09, "learning_rate": 4.8502913257000044e-05, "loss": 0.0562, "step": 1330000 }, { "epoch": 0.09, "learning_rate": 4.850235021086641e-05, "loss": 0.0525, "step": 1330500 }, { "epoch": 0.09, "learning_rate": 4.850178829082504e-05, "loss": 0.0599, "step": 1331000 }, { "epoch": 0.09, "learning_rate": 4.8501225244691404e-05, "loss": 0.0567, "step": 1331500 }, { "epoch": 0.09, "learning_rate": 4.8500663324650036e-05, "loss": 0.0585, "step": 1332000 }, { "epoch": 0.09, "learning_rate": 4.85001002785164e-05, "loss": 0.0573, "step": 1332500 }, { "epoch": 0.09, "learning_rate": 4.849953723238277e-05, "loss": 0.0527, "step": 1333000 }, { "epoch": 0.09, "learning_rate": 4.849897418624913e-05, "loss": 0.0572, "step": 1333500 }, { "epoch": 0.09, "learning_rate": 4.84984111401155e-05, "loss": 0.0549, "step": 1334000 }, { "epoch": 0.09, "learning_rate": 4.849784809398186e-05, "loss": 0.0594, "step": 1334500 }, { "epoch": 0.09, "learning_rate": 4.849728504784822e-05, "loss": 0.0568, "step": 1335000 }, { "epoch": 0.09, "learning_rate": 4.849672200171459e-05, "loss": 0.0591, "step": 1335500 }, { "epoch": 0.09, "learning_rate": 4.849616008167322e-05, "loss": 0.0598, "step": 1336000 }, { "epoch": 0.09, "learning_rate": 4.849559703553959e-05, "loss": 0.0579, "step": 1336500 }, { "epoch": 0.09, "learning_rate": 4.849503398940595e-05, "loss": 0.0572, "step": 1337000 }, { "epoch": 0.09, "learning_rate": 4.849447094327232e-05, "loss": 0.0573, "step": 1337500 }, { "epoch": 0.09, "learning_rate": 4.849390789713868e-05, "loss": 0.0553, "step": 1338000 }, { "epoch": 0.09, "learning_rate": 4.849334485100504e-05, "loss": 0.0574, "step": 1338500 }, { "epoch": 0.09, "learning_rate": 4.8492782930963675e-05, "loss": 0.0607, "step": 1339000 }, { "epoch": 0.09, "learning_rate": 4.849221988483004e-05, "loss": 0.0552, "step": 1339500 }, { "epoch": 0.09, "learning_rate": 4.849165683869641e-05, "loss": 0.0631, "step": 1340000 }, { "epoch": 0.09, "learning_rate": 4.849109379256277e-05, "loss": 0.0573, "step": 1340500 }, { "epoch": 0.09, "learning_rate": 4.8490531872521406e-05, "loss": 0.0553, "step": 1341000 }, { "epoch": 0.09, "learning_rate": 4.848996882638776e-05, "loss": 0.0589, "step": 1341500 }, { "epoch": 0.09, "learning_rate": 4.8489405780254134e-05, "loss": 0.053, "step": 1342000 }, { "epoch": 0.09, "learning_rate": 4.84888427341205e-05, "loss": 0.0581, "step": 1342500 }, { "epoch": 0.09, "learning_rate": 4.848828081407913e-05, "loss": 0.0587, "step": 1343000 }, { "epoch": 0.09, "learning_rate": 4.8487717767945494e-05, "loss": 0.0525, "step": 1343500 }, { "epoch": 0.09, "learning_rate": 4.848715472181186e-05, "loss": 0.0569, "step": 1344000 }, { "epoch": 0.09, "learning_rate": 4.848659167567822e-05, "loss": 0.0539, "step": 1344500 }, { "epoch": 0.09, "learning_rate": 4.8486028629544586e-05, "loss": 0.0541, "step": 1345000 }, { "epoch": 0.09, "learning_rate": 4.848546670950322e-05, "loss": 0.0585, "step": 1345500 }, { "epoch": 0.09, "learning_rate": 4.848490366336958e-05, "loss": 0.0578, "step": 1346000 }, { "epoch": 0.09, "learning_rate": 4.8484340617235947e-05, "loss": 0.0558, "step": 1346500 }, { "epoch": 0.09, "learning_rate": 4.848377757110231e-05, "loss": 0.0553, "step": 1347000 }, { "epoch": 0.09, "learning_rate": 4.848321452496868e-05, "loss": 0.0555, "step": 1347500 }, { "epoch": 0.09, "learning_rate": 4.848265260492731e-05, "loss": 0.058, "step": 1348000 }, { "epoch": 0.09, "learning_rate": 4.848208955879368e-05, "loss": 0.0549, "step": 1348500 }, { "epoch": 0.09, "learning_rate": 4.8481526512660035e-05, "loss": 0.0579, "step": 1349000 }, { "epoch": 0.09, "learning_rate": 4.8480963466526405e-05, "loss": 0.0554, "step": 1349500 }, { "epoch": 0.09, "learning_rate": 4.848040042039277e-05, "loss": 0.0562, "step": 1350000 }, { "epoch": 0.09, "learning_rate": 4.84798385003514e-05, "loss": 0.0587, "step": 1350500 }, { "epoch": 0.09, "learning_rate": 4.8479275454217766e-05, "loss": 0.0607, "step": 1351000 }, { "epoch": 0.09, "learning_rate": 4.847871240808412e-05, "loss": 0.0578, "step": 1351500 }, { "epoch": 0.09, "learning_rate": 4.8478149361950494e-05, "loss": 0.0577, "step": 1352000 }, { "epoch": 0.09, "learning_rate": 4.8477587441909126e-05, "loss": 0.0567, "step": 1352500 }, { "epoch": 0.09, "learning_rate": 4.847702439577549e-05, "loss": 0.0598, "step": 1353000 }, { "epoch": 0.09, "learning_rate": 4.8476461349641854e-05, "loss": 0.0568, "step": 1353500 }, { "epoch": 0.09, "learning_rate": 4.8475898303508225e-05, "loss": 0.0566, "step": 1354000 }, { "epoch": 0.09, "learning_rate": 4.847533638346685e-05, "loss": 0.0584, "step": 1354500 }, { "epoch": 0.09, "learning_rate": 4.847477333733322e-05, "loss": 0.0593, "step": 1355000 }, { "epoch": 0.09, "learning_rate": 4.847421029119958e-05, "loss": 0.0594, "step": 1355500 }, { "epoch": 0.09, "learning_rate": 4.847364724506594e-05, "loss": 0.0521, "step": 1356000 }, { "epoch": 0.09, "learning_rate": 4.847308419893231e-05, "loss": 0.0575, "step": 1356500 }, { "epoch": 0.09, "learning_rate": 4.8472522278890945e-05, "loss": 0.056, "step": 1357000 }, { "epoch": 0.09, "learning_rate": 4.847195923275731e-05, "loss": 0.0547, "step": 1357500 }, { "epoch": 0.09, "learning_rate": 4.8471396186623666e-05, "loss": 0.0566, "step": 1358000 }, { "epoch": 0.09, "learning_rate": 4.847083314049004e-05, "loss": 0.0555, "step": 1358500 }, { "epoch": 0.09, "learning_rate": 4.84702700943564e-05, "loss": 0.0548, "step": 1359000 }, { "epoch": 0.09, "learning_rate": 4.846970817431503e-05, "loss": 0.0545, "step": 1359500 }, { "epoch": 0.09, "learning_rate": 4.84691451281814e-05, "loss": 0.0577, "step": 1360000 }, { "epoch": 0.09, "learning_rate": 4.846858208204776e-05, "loss": 0.0586, "step": 1360500 }, { "epoch": 0.09, "learning_rate": 4.8468019035914125e-05, "loss": 0.054, "step": 1361000 }, { "epoch": 0.09, "learning_rate": 4.846745711587276e-05, "loss": 0.0555, "step": 1361500 }, { "epoch": 0.09, "learning_rate": 4.846689406973912e-05, "loss": 0.0578, "step": 1362000 }, { "epoch": 0.09, "learning_rate": 4.8466331023605485e-05, "loss": 0.0585, "step": 1362500 }, { "epoch": 0.09, "learning_rate": 4.846576797747185e-05, "loss": 0.0546, "step": 1363000 }, { "epoch": 0.09, "learning_rate": 4.8465204931338213e-05, "loss": 0.0593, "step": 1363500 }, { "epoch": 0.09, "learning_rate": 4.8464643011296846e-05, "loss": 0.0556, "step": 1364000 }, { "epoch": 0.09, "learning_rate": 4.846407996516321e-05, "loss": 0.059, "step": 1364500 }, { "epoch": 0.09, "learning_rate": 4.846351804512184e-05, "loss": 0.0569, "step": 1365000 }, { "epoch": 0.09, "learning_rate": 4.8462954998988206e-05, "loss": 0.0521, "step": 1365500 }, { "epoch": 0.09, "learning_rate": 4.8462391952854577e-05, "loss": 0.0583, "step": 1366000 }, { "epoch": 0.09, "learning_rate": 4.8461828906720934e-05, "loss": 0.0581, "step": 1366500 }, { "epoch": 0.09, "learning_rate": 4.8461265860587305e-05, "loss": 0.059, "step": 1367000 }, { "epoch": 0.09, "learning_rate": 4.846070281445367e-05, "loss": 0.0562, "step": 1367500 }, { "epoch": 0.09, "learning_rate": 4.8460139768320026e-05, "loss": 0.0576, "step": 1368000 }, { "epoch": 0.09, "learning_rate": 4.8459576722186397e-05, "loss": 0.0553, "step": 1368500 }, { "epoch": 0.09, "learning_rate": 4.845901367605276e-05, "loss": 0.0558, "step": 1369000 }, { "epoch": 0.09, "learning_rate": 4.8458450629919125e-05, "loss": 0.0551, "step": 1369500 }, { "epoch": 0.09, "learning_rate": 4.845788870987776e-05, "loss": 0.053, "step": 1370000 }, { "epoch": 0.09, "learning_rate": 4.845732566374413e-05, "loss": 0.0586, "step": 1370500 }, { "epoch": 0.09, "learning_rate": 4.8456762617610485e-05, "loss": 0.0524, "step": 1371000 }, { "epoch": 0.09, "learning_rate": 4.845619957147685e-05, "loss": 0.054, "step": 1371500 }, { "epoch": 0.09, "learning_rate": 4.845563652534322e-05, "loss": 0.0565, "step": 1372000 }, { "epoch": 0.09, "learning_rate": 4.8455074605301845e-05, "loss": 0.0573, "step": 1372500 }, { "epoch": 0.09, "learning_rate": 4.845451268526048e-05, "loss": 0.0539, "step": 1373000 }, { "epoch": 0.09, "learning_rate": 4.845394963912684e-05, "loss": 0.0563, "step": 1373500 }, { "epoch": 0.09, "learning_rate": 4.845338659299321e-05, "loss": 0.063, "step": 1374000 }, { "epoch": 0.09, "learning_rate": 4.845282354685957e-05, "loss": 0.0572, "step": 1374500 }, { "epoch": 0.09, "learning_rate": 4.845226050072594e-05, "loss": 0.0582, "step": 1375000 }, { "epoch": 0.09, "learning_rate": 4.8451697454592304e-05, "loss": 0.0573, "step": 1375500 }, { "epoch": 0.09, "learning_rate": 4.845113440845867e-05, "loss": 0.0533, "step": 1376000 }, { "epoch": 0.09, "learning_rate": 4.845057136232503e-05, "loss": 0.0534, "step": 1376500 }, { "epoch": 0.09, "learning_rate": 4.8450008316191396e-05, "loss": 0.0579, "step": 1377000 }, { "epoch": 0.09, "learning_rate": 4.844944639615003e-05, "loss": 0.0534, "step": 1377500 }, { "epoch": 0.09, "learning_rate": 4.844888335001639e-05, "loss": 0.0562, "step": 1378000 }, { "epoch": 0.09, "learning_rate": 4.8448320303882756e-05, "loss": 0.06, "step": 1378500 }, { "epoch": 0.09, "learning_rate": 4.844775725774912e-05, "loss": 0.0511, "step": 1379000 }, { "epoch": 0.09, "learning_rate": 4.844719421161549e-05, "loss": 0.0539, "step": 1379500 }, { "epoch": 0.09, "learning_rate": 4.8446632291574116e-05, "loss": 0.0556, "step": 1380000 }, { "epoch": 0.09, "learning_rate": 4.844606924544049e-05, "loss": 0.0595, "step": 1380500 }, { "epoch": 0.09, "learning_rate": 4.8445506199306844e-05, "loss": 0.0565, "step": 1381000 }, { "epoch": 0.09, "learning_rate": 4.844494315317321e-05, "loss": 0.0609, "step": 1381500 }, { "epoch": 0.09, "learning_rate": 4.844438010703958e-05, "loss": 0.0554, "step": 1382000 }, { "epoch": 0.09, "learning_rate": 4.8443818186998205e-05, "loss": 0.0603, "step": 1382500 }, { "epoch": 0.09, "learning_rate": 4.8443255140864575e-05, "loss": 0.0549, "step": 1383000 }, { "epoch": 0.09, "learning_rate": 4.844269209473094e-05, "loss": 0.0551, "step": 1383500 }, { "epoch": 0.09, "learning_rate": 4.84421290485973e-05, "loss": 0.059, "step": 1384000 }, { "epoch": 0.09, "learning_rate": 4.8441567128555936e-05, "loss": 0.0548, "step": 1384500 }, { "epoch": 0.09, "learning_rate": 4.84410040824223e-05, "loss": 0.0592, "step": 1385000 }, { "epoch": 0.09, "learning_rate": 4.8440441036288664e-05, "loss": 0.0541, "step": 1385500 }, { "epoch": 0.09, "learning_rate": 4.843987799015503e-05, "loss": 0.0563, "step": 1386000 }, { "epoch": 0.09, "learning_rate": 4.843931607011366e-05, "loss": 0.0583, "step": 1386500 }, { "epoch": 0.09, "learning_rate": 4.8438753023980024e-05, "loss": 0.0514, "step": 1387000 }, { "epoch": 0.09, "learning_rate": 4.843818997784639e-05, "loss": 0.0567, "step": 1387500 }, { "epoch": 0.09, "learning_rate": 4.843762693171275e-05, "loss": 0.0616, "step": 1388000 }, { "epoch": 0.09, "learning_rate": 4.843706388557912e-05, "loss": 0.0606, "step": 1388500 }, { "epoch": 0.09, "learning_rate": 4.843650196553775e-05, "loss": 0.0526, "step": 1389000 }, { "epoch": 0.09, "learning_rate": 4.843593891940412e-05, "loss": 0.057, "step": 1389500 }, { "epoch": 0.09, "learning_rate": 4.8435375873270476e-05, "loss": 0.0547, "step": 1390000 }, { "epoch": 0.09, "learning_rate": 4.843481282713685e-05, "loss": 0.0569, "step": 1390500 }, { "epoch": 0.09, "learning_rate": 4.843425090709547e-05, "loss": 0.0518, "step": 1391000 }, { "epoch": 0.09, "learning_rate": 4.843368786096184e-05, "loss": 0.0569, "step": 1391500 }, { "epoch": 0.09, "learning_rate": 4.843312481482821e-05, "loss": 0.0533, "step": 1392000 }, { "epoch": 0.09, "learning_rate": 4.843256176869457e-05, "loss": 0.0539, "step": 1392500 }, { "epoch": 0.09, "learning_rate": 4.8431998722560935e-05, "loss": 0.0604, "step": 1393000 }, { "epoch": 0.09, "learning_rate": 4.843143680251957e-05, "loss": 0.0547, "step": 1393500 }, { "epoch": 0.09, "learning_rate": 4.843087375638593e-05, "loss": 0.0569, "step": 1394000 }, { "epoch": 0.09, "learning_rate": 4.8430310710252295e-05, "loss": 0.0552, "step": 1394500 }, { "epoch": 0.09, "learning_rate": 4.842974766411866e-05, "loss": 0.058, "step": 1395000 }, { "epoch": 0.09, "learning_rate": 4.842918574407729e-05, "loss": 0.061, "step": 1395500 }, { "epoch": 0.09, "learning_rate": 4.8428622697943655e-05, "loss": 0.0549, "step": 1396000 }, { "epoch": 0.09, "learning_rate": 4.842805965181002e-05, "loss": 0.0585, "step": 1396500 }, { "epoch": 0.09, "learning_rate": 4.842749660567639e-05, "loss": 0.0597, "step": 1397000 }, { "epoch": 0.09, "learning_rate": 4.842693355954275e-05, "loss": 0.058, "step": 1397500 }, { "epoch": 0.09, "learning_rate": 4.842637051340911e-05, "loss": 0.055, "step": 1398000 }, { "epoch": 0.09, "learning_rate": 4.8425808593367743e-05, "loss": 0.0552, "step": 1398500 }, { "epoch": 0.09, "learning_rate": 4.842524554723411e-05, "loss": 0.0548, "step": 1399000 }, { "epoch": 0.09, "learning_rate": 4.842468250110048e-05, "loss": 0.0598, "step": 1399500 }, { "epoch": 0.09, "learning_rate": 4.842411945496684e-05, "loss": 0.0601, "step": 1400000 }, { "epoch": 0.09, "learning_rate": 4.8423557534925474e-05, "loss": 0.0531, "step": 1400500 }, { "epoch": 0.09, "learning_rate": 4.842299448879184e-05, "loss": 0.0564, "step": 1401000 }, { "epoch": 0.09, "learning_rate": 4.84224314426582e-05, "loss": 0.0595, "step": 1401500 }, { "epoch": 0.09, "learning_rate": 4.8421868396524566e-05, "loss": 0.0578, "step": 1402000 }, { "epoch": 0.09, "learning_rate": 4.842130535039093e-05, "loss": 0.058, "step": 1402500 }, { "epoch": 0.09, "learning_rate": 4.8420742304257294e-05, "loss": 0.0566, "step": 1403000 }, { "epoch": 0.09, "learning_rate": 4.842017925812366e-05, "loss": 0.0524, "step": 1403500 }, { "epoch": 0.09, "learning_rate": 4.841961621199003e-05, "loss": 0.0579, "step": 1404000 }, { "epoch": 0.09, "learning_rate": 4.8419053165856386e-05, "loss": 0.054, "step": 1404500 }, { "epoch": 0.09, "learning_rate": 4.841849011972275e-05, "loss": 0.0531, "step": 1405000 }, { "epoch": 0.09, "learning_rate": 4.841792707358912e-05, "loss": 0.0533, "step": 1405500 }, { "epoch": 0.09, "learning_rate": 4.8417365153547753e-05, "loss": 0.0561, "step": 1406000 }, { "epoch": 0.1, "learning_rate": 4.841680210741412e-05, "loss": 0.0611, "step": 1406500 }, { "epoch": 0.1, "learning_rate": 4.8416239061280475e-05, "loss": 0.0535, "step": 1407000 }, { "epoch": 0.1, "learning_rate": 4.8415676015146845e-05, "loss": 0.0548, "step": 1407500 }, { "epoch": 0.1, "learning_rate": 4.841511409510547e-05, "loss": 0.0532, "step": 1408000 }, { "epoch": 0.1, "learning_rate": 4.841455104897184e-05, "loss": 0.0552, "step": 1408500 }, { "epoch": 0.1, "learning_rate": 4.8413988002838206e-05, "loss": 0.0514, "step": 1409000 }, { "epoch": 0.1, "learning_rate": 4.841342495670457e-05, "loss": 0.0599, "step": 1409500 }, { "epoch": 0.1, "learning_rate": 4.84128630366632e-05, "loss": 0.0575, "step": 1410000 }, { "epoch": 0.1, "learning_rate": 4.8412299990529566e-05, "loss": 0.0551, "step": 1410500 }, { "epoch": 0.1, "learning_rate": 4.841173694439593e-05, "loss": 0.0564, "step": 1411000 }, { "epoch": 0.1, "learning_rate": 4.8411173898262294e-05, "loss": 0.0546, "step": 1411500 }, { "epoch": 0.1, "learning_rate": 4.8410610852128665e-05, "loss": 0.0566, "step": 1412000 }, { "epoch": 0.1, "learning_rate": 4.841004780599502e-05, "loss": 0.0586, "step": 1412500 }, { "epoch": 0.1, "learning_rate": 4.840948588595366e-05, "loss": 0.0571, "step": 1413000 }, { "epoch": 0.1, "learning_rate": 4.840892283982002e-05, "loss": 0.0535, "step": 1413500 }, { "epoch": 0.1, "learning_rate": 4.840835979368639e-05, "loss": 0.0533, "step": 1414000 }, { "epoch": 0.1, "learning_rate": 4.840779674755275e-05, "loss": 0.0552, "step": 1414500 }, { "epoch": 0.1, "learning_rate": 4.8407234827511385e-05, "loss": 0.0556, "step": 1415000 }, { "epoch": 0.1, "learning_rate": 4.840667178137775e-05, "loss": 0.0528, "step": 1415500 }, { "epoch": 0.1, "learning_rate": 4.840610873524411e-05, "loss": 0.0523, "step": 1416000 }, { "epoch": 0.1, "learning_rate": 4.840554568911048e-05, "loss": 0.0565, "step": 1416500 }, { "epoch": 0.1, "learning_rate": 4.840498264297684e-05, "loss": 0.0542, "step": 1417000 }, { "epoch": 0.1, "learning_rate": 4.8404419596843205e-05, "loss": 0.0554, "step": 1417500 }, { "epoch": 0.1, "learning_rate": 4.840385655070957e-05, "loss": 0.0551, "step": 1418000 }, { "epoch": 0.1, "learning_rate": 4.84032946306682e-05, "loss": 0.0554, "step": 1418500 }, { "epoch": 0.1, "learning_rate": 4.8402731584534565e-05, "loss": 0.0547, "step": 1419000 }, { "epoch": 0.1, "learning_rate": 4.8402168538400936e-05, "loss": 0.0548, "step": 1419500 }, { "epoch": 0.1, "learning_rate": 4.840160549226729e-05, "loss": 0.0545, "step": 1420000 }, { "epoch": 0.1, "learning_rate": 4.840104357222593e-05, "loss": 0.0589, "step": 1420500 }, { "epoch": 0.1, "learning_rate": 4.840048052609229e-05, "loss": 0.0647, "step": 1421000 }, { "epoch": 0.1, "learning_rate": 4.839991747995865e-05, "loss": 0.0553, "step": 1421500 }, { "epoch": 0.1, "learning_rate": 4.8399354433825024e-05, "loss": 0.0518, "step": 1422000 }, { "epoch": 0.1, "learning_rate": 4.839879138769138e-05, "loss": 0.0568, "step": 1422500 }, { "epoch": 0.1, "learning_rate": 4.839822946765002e-05, "loss": 0.059, "step": 1423000 }, { "epoch": 0.1, "learning_rate": 4.839766642151638e-05, "loss": 0.0593, "step": 1423500 }, { "epoch": 0.1, "learning_rate": 4.839710337538275e-05, "loss": 0.0551, "step": 1424000 }, { "epoch": 0.1, "learning_rate": 4.839654032924911e-05, "loss": 0.054, "step": 1424500 }, { "epoch": 0.1, "learning_rate": 4.8395977283115476e-05, "loss": 0.0553, "step": 1425000 }, { "epoch": 0.1, "learning_rate": 4.839541423698184e-05, "loss": 0.0575, "step": 1425500 }, { "epoch": 0.1, "learning_rate": 4.839485231694047e-05, "loss": 0.0543, "step": 1426000 }, { "epoch": 0.1, "learning_rate": 4.8394289270806837e-05, "loss": 0.0534, "step": 1426500 }, { "epoch": 0.1, "learning_rate": 4.83937262246732e-05, "loss": 0.0568, "step": 1427000 }, { "epoch": 0.1, "learning_rate": 4.839316317853957e-05, "loss": 0.0551, "step": 1427500 }, { "epoch": 0.1, "learning_rate": 4.839260013240593e-05, "loss": 0.058, "step": 1428000 }, { "epoch": 0.1, "learning_rate": 4.83920370862723e-05, "loss": 0.0559, "step": 1428500 }, { "epoch": 0.1, "learning_rate": 4.8391475166230925e-05, "loss": 0.0574, "step": 1429000 }, { "epoch": 0.1, "learning_rate": 4.8390912120097296e-05, "loss": 0.0559, "step": 1429500 }, { "epoch": 0.1, "learning_rate": 4.839034907396366e-05, "loss": 0.0523, "step": 1430000 }, { "epoch": 0.1, "learning_rate": 4.838978602783002e-05, "loss": 0.0548, "step": 1430500 }, { "epoch": 0.1, "learning_rate": 4.8389224107788656e-05, "loss": 0.0538, "step": 1431000 }, { "epoch": 0.1, "learning_rate": 4.838866106165501e-05, "loss": 0.0573, "step": 1431500 }, { "epoch": 0.1, "learning_rate": 4.8388098015521384e-05, "loss": 0.0548, "step": 1432000 }, { "epoch": 0.1, "learning_rate": 4.838753496938775e-05, "loss": 0.0584, "step": 1432500 }, { "epoch": 0.1, "learning_rate": 4.838697304934638e-05, "loss": 0.0535, "step": 1433000 }, { "epoch": 0.1, "learning_rate": 4.8386410003212744e-05, "loss": 0.057, "step": 1433500 }, { "epoch": 0.1, "learning_rate": 4.838584695707911e-05, "loss": 0.0554, "step": 1434000 }, { "epoch": 0.1, "learning_rate": 4.838528391094547e-05, "loss": 0.06, "step": 1434500 }, { "epoch": 0.1, "learning_rate": 4.8384720864811836e-05, "loss": 0.0522, "step": 1435000 }, { "epoch": 0.1, "learning_rate": 4.83841578186782e-05, "loss": 0.0548, "step": 1435500 }, { "epoch": 0.1, "learning_rate": 4.8383594772544564e-05, "loss": 0.0519, "step": 1436000 }, { "epoch": 0.1, "learning_rate": 4.8383031726410935e-05, "loss": 0.0545, "step": 1436500 }, { "epoch": 0.1, "learning_rate": 4.838246980636956e-05, "loss": 0.0545, "step": 1437000 }, { "epoch": 0.1, "learning_rate": 4.838190676023593e-05, "loss": 0.0601, "step": 1437500 }, { "epoch": 0.1, "learning_rate": 4.838134371410229e-05, "loss": 0.0541, "step": 1438000 }, { "epoch": 0.1, "learning_rate": 4.838078066796866e-05, "loss": 0.0543, "step": 1438500 }, { "epoch": 0.1, "learning_rate": 4.8380218747927284e-05, "loss": 0.058, "step": 1439000 }, { "epoch": 0.1, "learning_rate": 4.8379655701793655e-05, "loss": 0.0557, "step": 1439500 }, { "epoch": 0.1, "learning_rate": 4.837909265566002e-05, "loss": 0.0522, "step": 1440000 }, { "epoch": 0.1, "learning_rate": 4.837852960952638e-05, "loss": 0.0597, "step": 1440500 }, { "epoch": 0.1, "learning_rate": 4.837796656339275e-05, "loss": 0.054, "step": 1441000 }, { "epoch": 0.1, "learning_rate": 4.837740351725911e-05, "loss": 0.0553, "step": 1441500 }, { "epoch": 0.1, "learning_rate": 4.837684047112548e-05, "loss": 0.0584, "step": 1442000 }, { "epoch": 0.1, "learning_rate": 4.837627855108411e-05, "loss": 0.0615, "step": 1442500 }, { "epoch": 0.1, "learning_rate": 4.837571550495048e-05, "loss": 0.0526, "step": 1443000 }, { "epoch": 0.1, "learning_rate": 4.8375152458816835e-05, "loss": 0.0512, "step": 1443500 }, { "epoch": 0.1, "learning_rate": 4.83745894126832e-05, "loss": 0.055, "step": 1444000 }, { "epoch": 0.1, "learning_rate": 4.837402636654957e-05, "loss": 0.0559, "step": 1444500 }, { "epoch": 0.1, "learning_rate": 4.8373464446508195e-05, "loss": 0.0532, "step": 1445000 }, { "epoch": 0.1, "learning_rate": 4.8372901400374566e-05, "loss": 0.0554, "step": 1445500 }, { "epoch": 0.1, "learning_rate": 4.8372338354240923e-05, "loss": 0.0598, "step": 1446000 }, { "epoch": 0.1, "learning_rate": 4.8371775308107294e-05, "loss": 0.054, "step": 1446500 }, { "epoch": 0.1, "learning_rate": 4.837121226197366e-05, "loss": 0.0538, "step": 1447000 }, { "epoch": 0.1, "learning_rate": 4.837065034193229e-05, "loss": 0.0557, "step": 1447500 }, { "epoch": 0.1, "learning_rate": 4.8370087295798654e-05, "loss": 0.0549, "step": 1448000 }, { "epoch": 0.1, "learning_rate": 4.836952424966502e-05, "loss": 0.0553, "step": 1448500 }, { "epoch": 0.1, "learning_rate": 4.836896120353138e-05, "loss": 0.0564, "step": 1449000 }, { "epoch": 0.1, "learning_rate": 4.8368398157397746e-05, "loss": 0.0571, "step": 1449500 }, { "epoch": 0.1, "learning_rate": 4.836783511126411e-05, "loss": 0.0553, "step": 1450000 }, { "epoch": 0.1, "learning_rate": 4.8367272065130474e-05, "loss": 0.0552, "step": 1450500 }, { "epoch": 0.1, "learning_rate": 4.836671014508911e-05, "loss": 0.0568, "step": 1451000 }, { "epoch": 0.1, "learning_rate": 4.836614709895547e-05, "loss": 0.0576, "step": 1451500 }, { "epoch": 0.1, "learning_rate": 4.836558405282184e-05, "loss": 0.0538, "step": 1452000 }, { "epoch": 0.1, "learning_rate": 4.8365021006688205e-05, "loss": 0.0555, "step": 1452500 }, { "epoch": 0.1, "learning_rate": 4.836445796055456e-05, "loss": 0.0531, "step": 1453000 }, { "epoch": 0.1, "learning_rate": 4.83638960405132e-05, "loss": 0.0559, "step": 1453500 }, { "epoch": 0.1, "learning_rate": 4.836333299437956e-05, "loss": 0.0554, "step": 1454000 }, { "epoch": 0.1, "learning_rate": 4.836276994824593e-05, "loss": 0.0538, "step": 1454500 }, { "epoch": 0.1, "learning_rate": 4.8362206902112294e-05, "loss": 0.052, "step": 1455000 }, { "epoch": 0.1, "learning_rate": 4.836164385597866e-05, "loss": 0.0601, "step": 1455500 }, { "epoch": 0.1, "learning_rate": 4.836108080984502e-05, "loss": 0.0518, "step": 1456000 }, { "epoch": 0.1, "learning_rate": 4.8360518889803654e-05, "loss": 0.0534, "step": 1456500 }, { "epoch": 0.1, "learning_rate": 4.835995584367002e-05, "loss": 0.056, "step": 1457000 }, { "epoch": 0.1, "learning_rate": 4.835939392362865e-05, "loss": 0.0557, "step": 1457500 }, { "epoch": 0.1, "learning_rate": 4.8358830877495014e-05, "loss": 0.061, "step": 1458000 }, { "epoch": 0.1, "learning_rate": 4.835826783136138e-05, "loss": 0.0577, "step": 1458500 }, { "epoch": 0.1, "learning_rate": 4.835770478522774e-05, "loss": 0.058, "step": 1459000 }, { "epoch": 0.1, "learning_rate": 4.8357141739094106e-05, "loss": 0.0568, "step": 1459500 }, { "epoch": 0.1, "learning_rate": 4.835657869296048e-05, "loss": 0.0597, "step": 1460000 }, { "epoch": 0.1, "learning_rate": 4.8356015646826834e-05, "loss": 0.0539, "step": 1460500 }, { "epoch": 0.1, "learning_rate": 4.8355452600693205e-05, "loss": 0.0609, "step": 1461000 }, { "epoch": 0.1, "learning_rate": 4.835488955455957e-05, "loss": 0.054, "step": 1461500 }, { "epoch": 0.1, "learning_rate": 4.83543276345182e-05, "loss": 0.0538, "step": 1462000 }, { "epoch": 0.1, "learning_rate": 4.8353764588384565e-05, "loss": 0.0537, "step": 1462500 }, { "epoch": 0.1, "learning_rate": 4.835320154225092e-05, "loss": 0.0545, "step": 1463000 }, { "epoch": 0.1, "learning_rate": 4.835263849611729e-05, "loss": 0.0565, "step": 1463500 }, { "epoch": 0.1, "learning_rate": 4.835207544998366e-05, "loss": 0.0545, "step": 1464000 }, { "epoch": 0.1, "learning_rate": 4.835151352994229e-05, "loss": 0.0562, "step": 1464500 }, { "epoch": 0.1, "learning_rate": 4.835095048380865e-05, "loss": 0.0575, "step": 1465000 }, { "epoch": 0.1, "learning_rate": 4.835038743767502e-05, "loss": 0.0551, "step": 1465500 }, { "epoch": 0.1, "learning_rate": 4.834982439154138e-05, "loss": 0.0581, "step": 1466000 }, { "epoch": 0.1, "learning_rate": 4.8349261345407745e-05, "loss": 0.0572, "step": 1466500 }, { "epoch": 0.1, "learning_rate": 4.834869942536638e-05, "loss": 0.0575, "step": 1467000 }, { "epoch": 0.1, "learning_rate": 4.834813637923274e-05, "loss": 0.0613, "step": 1467500 }, { "epoch": 0.1, "learning_rate": 4.834757333309911e-05, "loss": 0.0549, "step": 1468000 }, { "epoch": 0.1, "learning_rate": 4.834701028696547e-05, "loss": 0.0556, "step": 1468500 }, { "epoch": 0.1, "learning_rate": 4.834644836692411e-05, "loss": 0.0561, "step": 1469000 }, { "epoch": 0.1, "learning_rate": 4.8345885320790466e-05, "loss": 0.0556, "step": 1469500 }, { "epoch": 0.1, "learning_rate": 4.8345322274656836e-05, "loss": 0.0553, "step": 1470000 }, { "epoch": 0.1, "learning_rate": 4.83447592285232e-05, "loss": 0.0551, "step": 1470500 }, { "epoch": 0.1, "learning_rate": 4.8344196182389564e-05, "loss": 0.0579, "step": 1471000 }, { "epoch": 0.1, "learning_rate": 4.8343634262348197e-05, "loss": 0.0553, "step": 1471500 }, { "epoch": 0.1, "learning_rate": 4.834307121621456e-05, "loss": 0.0585, "step": 1472000 }, { "epoch": 0.1, "learning_rate": 4.8342508170080925e-05, "loss": 0.056, "step": 1472500 }, { "epoch": 0.1, "learning_rate": 4.834194512394729e-05, "loss": 0.0566, "step": 1473000 }, { "epoch": 0.1, "learning_rate": 4.834138207781365e-05, "loss": 0.0546, "step": 1473500 }, { "epoch": 0.1, "learning_rate": 4.8340819031680017e-05, "loss": 0.0563, "step": 1474000 }, { "epoch": 0.1, "learning_rate": 4.834025598554639e-05, "loss": 0.0553, "step": 1474500 }, { "epoch": 0.1, "learning_rate": 4.833969406550501e-05, "loss": 0.0586, "step": 1475000 }, { "epoch": 0.1, "learning_rate": 4.8339131019371383e-05, "loss": 0.057, "step": 1475500 }, { "epoch": 0.1, "learning_rate": 4.833856797323774e-05, "loss": 0.0506, "step": 1476000 }, { "epoch": 0.1, "learning_rate": 4.833800605319638e-05, "loss": 0.0692, "step": 1476500 }, { "epoch": 0.1, "learning_rate": 4.833744300706274e-05, "loss": 0.0588, "step": 1477000 }, { "epoch": 0.1, "learning_rate": 4.833687996092911e-05, "loss": 0.0569, "step": 1477500 }, { "epoch": 0.1, "learning_rate": 4.833631691479547e-05, "loss": 0.0614, "step": 1478000 }, { "epoch": 0.1, "learning_rate": 4.833575386866183e-05, "loss": 0.0539, "step": 1478500 }, { "epoch": 0.1, "learning_rate": 4.83351908225282e-05, "loss": 0.0544, "step": 1479000 }, { "epoch": 0.1, "learning_rate": 4.8334627776394564e-05, "loss": 0.0534, "step": 1479500 }, { "epoch": 0.1, "learning_rate": 4.833406473026093e-05, "loss": 0.0528, "step": 1480000 }, { "epoch": 0.1, "learning_rate": 4.833350168412729e-05, "loss": 0.056, "step": 1480500 }, { "epoch": 0.1, "learning_rate": 4.8332939764085924e-05, "loss": 0.0555, "step": 1481000 }, { "epoch": 0.1, "learning_rate": 4.833237671795229e-05, "loss": 0.0544, "step": 1481500 }, { "epoch": 0.1, "learning_rate": 4.833181367181865e-05, "loss": 0.059, "step": 1482000 }, { "epoch": 0.1, "learning_rate": 4.833125062568502e-05, "loss": 0.0504, "step": 1482500 }, { "epoch": 0.1, "learning_rate": 4.833068870564365e-05, "loss": 0.049, "step": 1483000 }, { "epoch": 0.1, "learning_rate": 4.833012565951002e-05, "loss": 0.053, "step": 1483500 }, { "epoch": 0.1, "learning_rate": 4.8329562613376376e-05, "loss": 0.0555, "step": 1484000 }, { "epoch": 0.1, "learning_rate": 4.832899956724275e-05, "loss": 0.0578, "step": 1484500 }, { "epoch": 0.1, "learning_rate": 4.832843764720137e-05, "loss": 0.0577, "step": 1485000 }, { "epoch": 0.1, "learning_rate": 4.832787460106774e-05, "loss": 0.0562, "step": 1485500 }, { "epoch": 0.1, "learning_rate": 4.832731155493411e-05, "loss": 0.0575, "step": 1486000 }, { "epoch": 0.1, "learning_rate": 4.832674850880047e-05, "loss": 0.0614, "step": 1486500 }, { "epoch": 0.1, "learning_rate": 4.8326185462666835e-05, "loss": 0.0514, "step": 1487000 }, { "epoch": 0.1, "learning_rate": 4.83256224165332e-05, "loss": 0.0528, "step": 1487500 }, { "epoch": 0.1, "learning_rate": 4.832505937039956e-05, "loss": 0.0578, "step": 1488000 }, { "epoch": 0.1, "learning_rate": 4.832449632426593e-05, "loss": 0.0583, "step": 1488500 }, { "epoch": 0.1, "learning_rate": 4.832393440422456e-05, "loss": 0.05, "step": 1489000 }, { "epoch": 0.1, "learning_rate": 4.832337135809092e-05, "loss": 0.0561, "step": 1489500 }, { "epoch": 0.1, "learning_rate": 4.832280831195729e-05, "loss": 0.0576, "step": 1490000 }, { "epoch": 0.1, "learning_rate": 4.832224526582365e-05, "loss": 0.0575, "step": 1490500 }, { "epoch": 0.1, "learning_rate": 4.832168334578229e-05, "loss": 0.0511, "step": 1491000 }, { "epoch": 0.1, "learning_rate": 4.832112029964865e-05, "loss": 0.0559, "step": 1491500 }, { "epoch": 0.1, "learning_rate": 4.832055725351501e-05, "loss": 0.0523, "step": 1492000 }, { "epoch": 0.1, "learning_rate": 4.831999420738138e-05, "loss": 0.0547, "step": 1492500 }, { "epoch": 0.1, "learning_rate": 4.8319431161247746e-05, "loss": 0.0541, "step": 1493000 }, { "epoch": 0.1, "learning_rate": 4.831886924120638e-05, "loss": 0.057, "step": 1493500 }, { "epoch": 0.1, "learning_rate": 4.8318306195072736e-05, "loss": 0.0513, "step": 1494000 }, { "epoch": 0.1, "learning_rate": 4.8317743148939106e-05, "loss": 0.0529, "step": 1494500 }, { "epoch": 0.1, "learning_rate": 4.831718010280547e-05, "loss": 0.0524, "step": 1495000 }, { "epoch": 0.1, "learning_rate": 4.83166181827641e-05, "loss": 0.0531, "step": 1495500 }, { "epoch": 0.1, "learning_rate": 4.8316055136630467e-05, "loss": 0.0538, "step": 1496000 }, { "epoch": 0.1, "learning_rate": 4.831549209049683e-05, "loss": 0.054, "step": 1496500 }, { "epoch": 0.1, "learning_rate": 4.8314929044363195e-05, "loss": 0.0559, "step": 1497000 }, { "epoch": 0.1, "learning_rate": 4.831436599822956e-05, "loss": 0.0596, "step": 1497500 }, { "epoch": 0.1, "learning_rate": 4.831380295209593e-05, "loss": 0.0551, "step": 1498000 }, { "epoch": 0.1, "learning_rate": 4.8313241032054555e-05, "loss": 0.0568, "step": 1498500 }, { "epoch": 0.1, "learning_rate": 4.8312677985920926e-05, "loss": 0.0516, "step": 1499000 }, { "epoch": 0.1, "learning_rate": 4.831211493978728e-05, "loss": 0.0513, "step": 1499500 }, { "epoch": 0.1, "learning_rate": 4.8311551893653654e-05, "loss": 0.0523, "step": 1500000 }, { "epoch": 0.1, "learning_rate": 4.831098884752002e-05, "loss": 0.0548, "step": 1500500 }, { "epoch": 0.1, "learning_rate": 4.831042692747865e-05, "loss": 0.0595, "step": 1501000 }, { "epoch": 0.1, "learning_rate": 4.8309863881345014e-05, "loss": 0.053, "step": 1501500 }, { "epoch": 0.1, "learning_rate": 4.830930083521137e-05, "loss": 0.0522, "step": 1502000 }, { "epoch": 0.1, "learning_rate": 4.830873778907774e-05, "loss": 0.0553, "step": 1502500 }, { "epoch": 0.1, "learning_rate": 4.8308174742944106e-05, "loss": 0.0495, "step": 1503000 }, { "epoch": 0.1, "learning_rate": 4.830761169681047e-05, "loss": 0.0535, "step": 1503500 }, { "epoch": 0.1, "learning_rate": 4.8307048650676834e-05, "loss": 0.0545, "step": 1504000 }, { "epoch": 0.1, "learning_rate": 4.8306486730635466e-05, "loss": 0.0539, "step": 1504500 }, { "epoch": 0.1, "learning_rate": 4.830592368450183e-05, "loss": 0.0548, "step": 1505000 }, { "epoch": 0.1, "learning_rate": 4.8305360638368194e-05, "loss": 0.0521, "step": 1505500 }, { "epoch": 0.1, "learning_rate": 4.830479759223456e-05, "loss": 0.0534, "step": 1506000 }, { "epoch": 0.1, "learning_rate": 4.830423567219319e-05, "loss": 0.0582, "step": 1506500 }, { "epoch": 0.1, "learning_rate": 4.8303672626059554e-05, "loss": 0.0526, "step": 1507000 }, { "epoch": 0.1, "learning_rate": 4.830310957992592e-05, "loss": 0.0568, "step": 1507500 }, { "epoch": 0.1, "learning_rate": 4.830254653379229e-05, "loss": 0.0544, "step": 1508000 }, { "epoch": 0.1, "learning_rate": 4.8301984613750914e-05, "loss": 0.0542, "step": 1508500 }, { "epoch": 0.1, "learning_rate": 4.8301421567617285e-05, "loss": 0.0541, "step": 1509000 }, { "epoch": 0.1, "learning_rate": 4.830085852148365e-05, "loss": 0.0547, "step": 1509500 }, { "epoch": 0.1, "learning_rate": 4.830029547535001e-05, "loss": 0.0581, "step": 1510000 }, { "epoch": 0.1, "learning_rate": 4.8299733555308645e-05, "loss": 0.0614, "step": 1510500 }, { "epoch": 0.1, "learning_rate": 4.829917050917501e-05, "loss": 0.0559, "step": 1511000 }, { "epoch": 0.1, "learning_rate": 4.829860746304137e-05, "loss": 0.0571, "step": 1511500 }, { "epoch": 0.1, "learning_rate": 4.829804441690774e-05, "loss": 0.0603, "step": 1512000 }, { "epoch": 0.1, "learning_rate": 4.829748249686637e-05, "loss": 0.0523, "step": 1512500 }, { "epoch": 0.1, "learning_rate": 4.8296919450732734e-05, "loss": 0.0527, "step": 1513000 }, { "epoch": 0.1, "learning_rate": 4.82963564045991e-05, "loss": 0.0519, "step": 1513500 }, { "epoch": 0.1, "learning_rate": 4.829579335846546e-05, "loss": 0.0527, "step": 1514000 }, { "epoch": 0.1, "learning_rate": 4.829523031233183e-05, "loss": 0.0564, "step": 1514500 }, { "epoch": 0.1, "learning_rate": 4.829466726619819e-05, "loss": 0.0529, "step": 1515000 }, { "epoch": 0.1, "learning_rate": 4.8294104220064554e-05, "loss": 0.0582, "step": 1515500 }, { "epoch": 0.1, "learning_rate": 4.8293541173930924e-05, "loss": 0.0567, "step": 1516000 }, { "epoch": 0.1, "learning_rate": 4.8292979253889556e-05, "loss": 0.0524, "step": 1516500 }, { "epoch": 0.1, "learning_rate": 4.829241733384818e-05, "loss": 0.0501, "step": 1517000 }, { "epoch": 0.1, "learning_rate": 4.829185428771455e-05, "loss": 0.0545, "step": 1517500 }, { "epoch": 0.1, "learning_rate": 4.829129124158092e-05, "loss": 0.0497, "step": 1518000 }, { "epoch": 0.1, "learning_rate": 4.8290728195447274e-05, "loss": 0.0558, "step": 1518500 }, { "epoch": 0.1, "learning_rate": 4.829016627540591e-05, "loss": 0.0532, "step": 1519000 }, { "epoch": 0.1, "learning_rate": 4.828960322927227e-05, "loss": 0.0573, "step": 1519500 }, { "epoch": 0.1, "learning_rate": 4.828904018313864e-05, "loss": 0.059, "step": 1520000 }, { "epoch": 0.1, "learning_rate": 4.828847826309727e-05, "loss": 0.0573, "step": 1520500 }, { "epoch": 0.1, "learning_rate": 4.828791521696364e-05, "loss": 0.0544, "step": 1521000 }, { "epoch": 0.1, "learning_rate": 4.828735217083e-05, "loss": 0.0529, "step": 1521500 }, { "epoch": 0.1, "learning_rate": 4.8286789124696365e-05, "loss": 0.0511, "step": 1522000 }, { "epoch": 0.1, "learning_rate": 4.828622607856273e-05, "loss": 0.0552, "step": 1522500 }, { "epoch": 0.1, "learning_rate": 4.828566303242909e-05, "loss": 0.0572, "step": 1523000 }, { "epoch": 0.1, "learning_rate": 4.828509998629546e-05, "loss": 0.0527, "step": 1523500 }, { "epoch": 0.1, "learning_rate": 4.828453694016182e-05, "loss": 0.053, "step": 1524000 }, { "epoch": 0.1, "learning_rate": 4.828397502012045e-05, "loss": 0.0515, "step": 1524500 }, { "epoch": 0.1, "learning_rate": 4.828341197398682e-05, "loss": 0.0549, "step": 1525000 }, { "epoch": 0.1, "learning_rate": 4.828284892785319e-05, "loss": 0.0521, "step": 1525500 }, { "epoch": 0.1, "learning_rate": 4.828228588171955e-05, "loss": 0.0552, "step": 1526000 }, { "epoch": 0.1, "learning_rate": 4.8281722835585916e-05, "loss": 0.0578, "step": 1526500 }, { "epoch": 0.1, "learning_rate": 4.828116091554455e-05, "loss": 0.0565, "step": 1527000 }, { "epoch": 0.1, "learning_rate": 4.828059899550318e-05, "loss": 0.0529, "step": 1527500 }, { "epoch": 0.1, "learning_rate": 4.8280035949369544e-05, "loss": 0.0528, "step": 1528000 }, { "epoch": 0.1, "learning_rate": 4.827947290323591e-05, "loss": 0.0567, "step": 1528500 }, { "epoch": 0.1, "learning_rate": 4.827890985710227e-05, "loss": 0.0534, "step": 1529000 }, { "epoch": 0.1, "learning_rate": 4.8278346810968636e-05, "loss": 0.0551, "step": 1529500 }, { "epoch": 0.1, "learning_rate": 4.8277783764835e-05, "loss": 0.0523, "step": 1530000 }, { "epoch": 0.1, "learning_rate": 4.8277220718701364e-05, "loss": 0.0526, "step": 1530500 }, { "epoch": 0.1, "learning_rate": 4.8276657672567735e-05, "loss": 0.0516, "step": 1531000 }, { "epoch": 0.1, "learning_rate": 4.827609575252636e-05, "loss": 0.0582, "step": 1531500 }, { "epoch": 0.1, "learning_rate": 4.827553270639273e-05, "loss": 0.0527, "step": 1532000 }, { "epoch": 0.1, "learning_rate": 4.827496966025909e-05, "loss": 0.0543, "step": 1532500 }, { "epoch": 0.1, "learning_rate": 4.827440661412545e-05, "loss": 0.0527, "step": 1533000 }, { "epoch": 0.1, "learning_rate": 4.8273843567991823e-05, "loss": 0.0542, "step": 1533500 }, { "epoch": 0.1, "learning_rate": 4.827328052185818e-05, "loss": 0.054, "step": 1534000 }, { "epoch": 0.1, "learning_rate": 4.827271747572455e-05, "loss": 0.0557, "step": 1534500 }, { "epoch": 0.1, "learning_rate": 4.827215555568318e-05, "loss": 0.0555, "step": 1535000 }, { "epoch": 0.1, "learning_rate": 4.827159250954955e-05, "loss": 0.0559, "step": 1535500 }, { "epoch": 0.1, "learning_rate": 4.827102946341591e-05, "loss": 0.056, "step": 1536000 }, { "epoch": 0.1, "learning_rate": 4.8270466417282276e-05, "loss": 0.0569, "step": 1536500 }, { "epoch": 0.1, "learning_rate": 4.826990337114864e-05, "loss": 0.0523, "step": 1537000 }, { "epoch": 0.1, "learning_rate": 4.826934145110727e-05, "loss": 0.0534, "step": 1537500 }, { "epoch": 0.1, "learning_rate": 4.8268778404973636e-05, "loss": 0.0553, "step": 1538000 }, { "epoch": 0.1, "learning_rate": 4.826821535884e-05, "loss": 0.0574, "step": 1538500 }, { "epoch": 0.1, "learning_rate": 4.826765231270637e-05, "loss": 0.0556, "step": 1539000 }, { "epoch": 0.1, "learning_rate": 4.826708926657273e-05, "loss": 0.0585, "step": 1539500 }, { "epoch": 0.1, "learning_rate": 4.826652734653137e-05, "loss": 0.0531, "step": 1540000 }, { "epoch": 0.1, "learning_rate": 4.8265964300397724e-05, "loss": 0.0541, "step": 1540500 }, { "epoch": 0.1, "learning_rate": 4.8265401254264095e-05, "loss": 0.056, "step": 1541000 }, { "epoch": 0.1, "learning_rate": 4.826483820813046e-05, "loss": 0.0539, "step": 1541500 }, { "epoch": 0.1, "learning_rate": 4.8264275161996816e-05, "loss": 0.0583, "step": 1542000 }, { "epoch": 0.1, "learning_rate": 4.8263713241955455e-05, "loss": 0.0551, "step": 1542500 }, { "epoch": 0.1, "learning_rate": 4.826315019582182e-05, "loss": 0.0532, "step": 1543000 }, { "epoch": 0.1, "learning_rate": 4.826258714968818e-05, "loss": 0.0541, "step": 1543500 }, { "epoch": 0.1, "learning_rate": 4.826202410355455e-05, "loss": 0.057, "step": 1544000 }, { "epoch": 0.1, "learning_rate": 4.826146218351318e-05, "loss": 0.0545, "step": 1544500 }, { "epoch": 0.1, "learning_rate": 4.826089913737954e-05, "loss": 0.0548, "step": 1545000 }, { "epoch": 0.1, "learning_rate": 4.826033609124591e-05, "loss": 0.058, "step": 1545500 }, { "epoch": 0.1, "learning_rate": 4.825977304511227e-05, "loss": 0.053, "step": 1546000 }, { "epoch": 0.1, "learning_rate": 4.82592111250709e-05, "loss": 0.0569, "step": 1546500 }, { "epoch": 0.1, "learning_rate": 4.825864807893727e-05, "loss": 0.0556, "step": 1547000 }, { "epoch": 0.1, "learning_rate": 4.825808503280364e-05, "loss": 0.0547, "step": 1547500 }, { "epoch": 0.1, "learning_rate": 4.8257523112762264e-05, "loss": 0.0575, "step": 1548000 }, { "epoch": 0.1, "learning_rate": 4.8256960066628634e-05, "loss": 0.0557, "step": 1548500 }, { "epoch": 0.1, "learning_rate": 4.825639702049499e-05, "loss": 0.0503, "step": 1549000 }, { "epoch": 0.1, "learning_rate": 4.8255833974361356e-05, "loss": 0.057, "step": 1549500 }, { "epoch": 0.1, "learning_rate": 4.8255270928227726e-05, "loss": 0.0544, "step": 1550000 }, { "epoch": 0.1, "learning_rate": 4.8254707882094084e-05, "loss": 0.0518, "step": 1550500 }, { "epoch": 0.1, "learning_rate": 4.8254144835960454e-05, "loss": 0.0573, "step": 1551000 }, { "epoch": 0.1, "learning_rate": 4.825358178982682e-05, "loss": 0.0543, "step": 1551500 }, { "epoch": 0.1, "learning_rate": 4.825301874369318e-05, "loss": 0.052, "step": 1552000 }, { "epoch": 0.1, "learning_rate": 4.8252455697559546e-05, "loss": 0.055, "step": 1552500 }, { "epoch": 0.1, "learning_rate": 4.825189377751818e-05, "loss": 0.0515, "step": 1553000 }, { "epoch": 0.1, "learning_rate": 4.825133073138454e-05, "loss": 0.0507, "step": 1553500 }, { "epoch": 0.1, "learning_rate": 4.8250767685250907e-05, "loss": 0.054, "step": 1554000 }, { "epoch": 0.11, "learning_rate": 4.825020463911728e-05, "loss": 0.054, "step": 1554500 }, { "epoch": 0.11, "learning_rate": 4.82496427190759e-05, "loss": 0.0547, "step": 1555000 }, { "epoch": 0.11, "learning_rate": 4.8249079672942274e-05, "loss": 0.0552, "step": 1555500 }, { "epoch": 0.11, "learning_rate": 4.824851662680863e-05, "loss": 0.0556, "step": 1556000 }, { "epoch": 0.11, "learning_rate": 4.8247953580675e-05, "loss": 0.0514, "step": 1556500 }, { "epoch": 0.11, "learning_rate": 4.824739166063363e-05, "loss": 0.052, "step": 1557000 }, { "epoch": 0.11, "learning_rate": 4.82468286145e-05, "loss": 0.0543, "step": 1557500 }, { "epoch": 0.11, "learning_rate": 4.824626556836636e-05, "loss": 0.0554, "step": 1558000 }, { "epoch": 0.11, "learning_rate": 4.824570252223272e-05, "loss": 0.0551, "step": 1558500 }, { "epoch": 0.11, "learning_rate": 4.824514060219136e-05, "loss": 0.0544, "step": 1559000 }, { "epoch": 0.11, "learning_rate": 4.8244577556057715e-05, "loss": 0.0554, "step": 1559500 }, { "epoch": 0.11, "learning_rate": 4.8244014509924086e-05, "loss": 0.0533, "step": 1560000 }, { "epoch": 0.11, "learning_rate": 4.824345146379045e-05, "loss": 0.0561, "step": 1560500 }, { "epoch": 0.11, "learning_rate": 4.8242888417656814e-05, "loss": 0.0484, "step": 1561000 }, { "epoch": 0.11, "learning_rate": 4.824232537152318e-05, "loss": 0.0529, "step": 1561500 }, { "epoch": 0.11, "learning_rate": 4.824176345148181e-05, "loss": 0.0519, "step": 1562000 }, { "epoch": 0.11, "learning_rate": 4.8241200405348174e-05, "loss": 0.0549, "step": 1562500 }, { "epoch": 0.11, "learning_rate": 4.824063735921454e-05, "loss": 0.0515, "step": 1563000 }, { "epoch": 0.11, "learning_rate": 4.82400743130809e-05, "loss": 0.0537, "step": 1563500 }, { "epoch": 0.11, "learning_rate": 4.8239511266947266e-05, "loss": 0.058, "step": 1564000 }, { "epoch": 0.11, "learning_rate": 4.82389493469059e-05, "loss": 0.0544, "step": 1564500 }, { "epoch": 0.11, "learning_rate": 4.823838630077226e-05, "loss": 0.0507, "step": 1565000 }, { "epoch": 0.11, "learning_rate": 4.823782325463863e-05, "loss": 0.0567, "step": 1565500 }, { "epoch": 0.11, "learning_rate": 4.823726020850499e-05, "loss": 0.0545, "step": 1566000 }, { "epoch": 0.11, "learning_rate": 4.823669828846363e-05, "loss": 0.0592, "step": 1566500 }, { "epoch": 0.11, "learning_rate": 4.8236135242329987e-05, "loss": 0.0547, "step": 1567000 }, { "epoch": 0.11, "learning_rate": 4.823557219619636e-05, "loss": 0.0547, "step": 1567500 }, { "epoch": 0.11, "learning_rate": 4.823500915006272e-05, "loss": 0.0581, "step": 1568000 }, { "epoch": 0.11, "learning_rate": 4.8234446103929085e-05, "loss": 0.0561, "step": 1568500 }, { "epoch": 0.11, "learning_rate": 4.823388418388772e-05, "loss": 0.0502, "step": 1569000 }, { "epoch": 0.11, "learning_rate": 4.823332113775408e-05, "loss": 0.0554, "step": 1569500 }, { "epoch": 0.11, "learning_rate": 4.8232758091620445e-05, "loss": 0.0574, "step": 1570000 }, { "epoch": 0.11, "learning_rate": 4.823219504548681e-05, "loss": 0.0544, "step": 1570500 }, { "epoch": 0.11, "learning_rate": 4.823163199935318e-05, "loss": 0.0594, "step": 1571000 }, { "epoch": 0.11, "learning_rate": 4.823106895321954e-05, "loss": 0.0544, "step": 1571500 }, { "epoch": 0.11, "learning_rate": 4.82305059070859e-05, "loss": 0.0541, "step": 1572000 }, { "epoch": 0.11, "learning_rate": 4.822994286095227e-05, "loss": 0.0591, "step": 1572500 }, { "epoch": 0.11, "learning_rate": 4.822938206700317e-05, "loss": 0.0544, "step": 1573000 }, { "epoch": 0.11, "learning_rate": 4.822881902086953e-05, "loss": 0.0519, "step": 1573500 }, { "epoch": 0.11, "learning_rate": 4.82282559747359e-05, "loss": 0.0516, "step": 1574000 }, { "epoch": 0.11, "learning_rate": 4.8227692928602265e-05, "loss": 0.0554, "step": 1574500 }, { "epoch": 0.11, "learning_rate": 4.82271310085609e-05, "loss": 0.0561, "step": 1575000 }, { "epoch": 0.11, "learning_rate": 4.822656796242726e-05, "loss": 0.0536, "step": 1575500 }, { "epoch": 0.11, "learning_rate": 4.822600491629362e-05, "loss": 0.0498, "step": 1576000 }, { "epoch": 0.11, "learning_rate": 4.822544187015999e-05, "loss": 0.0525, "step": 1576500 }, { "epoch": 0.11, "learning_rate": 4.822487882402635e-05, "loss": 0.059, "step": 1577000 }, { "epoch": 0.11, "learning_rate": 4.8224316903984985e-05, "loss": 0.055, "step": 1577500 }, { "epoch": 0.11, "learning_rate": 4.822375385785135e-05, "loss": 0.0524, "step": 1578000 }, { "epoch": 0.11, "learning_rate": 4.822319081171771e-05, "loss": 0.0568, "step": 1578500 }, { "epoch": 0.11, "learning_rate": 4.822262776558408e-05, "loss": 0.0541, "step": 1579000 }, { "epoch": 0.11, "learning_rate": 4.822206584554271e-05, "loss": 0.0548, "step": 1579500 }, { "epoch": 0.11, "learning_rate": 4.822150279940907e-05, "loss": 0.0574, "step": 1580000 }, { "epoch": 0.11, "learning_rate": 4.822093975327544e-05, "loss": 0.0533, "step": 1580500 }, { "epoch": 0.11, "learning_rate": 4.82203767071418e-05, "loss": 0.0548, "step": 1581000 }, { "epoch": 0.11, "learning_rate": 4.821981478710044e-05, "loss": 0.0548, "step": 1581500 }, { "epoch": 0.11, "learning_rate": 4.8219252867059066e-05, "loss": 0.0546, "step": 1582000 }, { "epoch": 0.11, "learning_rate": 4.8218689820925436e-05, "loss": 0.0502, "step": 1582500 }, { "epoch": 0.11, "learning_rate": 4.8218126774791794e-05, "loss": 0.0521, "step": 1583000 }, { "epoch": 0.11, "learning_rate": 4.821756372865816e-05, "loss": 0.0553, "step": 1583500 }, { "epoch": 0.11, "learning_rate": 4.821700068252453e-05, "loss": 0.0523, "step": 1584000 }, { "epoch": 0.11, "learning_rate": 4.8216437636390886e-05, "loss": 0.0562, "step": 1584500 }, { "epoch": 0.11, "learning_rate": 4.8215874590257256e-05, "loss": 0.0554, "step": 1585000 }, { "epoch": 0.11, "learning_rate": 4.821531154412362e-05, "loss": 0.0539, "step": 1585500 }, { "epoch": 0.11, "learning_rate": 4.8214748497989984e-05, "loss": 0.0517, "step": 1586000 }, { "epoch": 0.11, "learning_rate": 4.821418657794862e-05, "loss": 0.0497, "step": 1586500 }, { "epoch": 0.11, "learning_rate": 4.821362353181498e-05, "loss": 0.0529, "step": 1587000 }, { "epoch": 0.11, "learning_rate": 4.8213060485681345e-05, "loss": 0.0535, "step": 1587500 }, { "epoch": 0.11, "learning_rate": 4.821249743954771e-05, "loss": 0.0519, "step": 1588000 }, { "epoch": 0.11, "learning_rate": 4.821193439341408e-05, "loss": 0.0542, "step": 1588500 }, { "epoch": 0.11, "learning_rate": 4.8211371347280437e-05, "loss": 0.0539, "step": 1589000 }, { "epoch": 0.11, "learning_rate": 4.82108083011468e-05, "loss": 0.0536, "step": 1589500 }, { "epoch": 0.11, "learning_rate": 4.821024525501317e-05, "loss": 0.0504, "step": 1590000 }, { "epoch": 0.11, "learning_rate": 4.8209683334971804e-05, "loss": 0.0557, "step": 1590500 }, { "epoch": 0.11, "learning_rate": 4.820912028883817e-05, "loss": 0.0502, "step": 1591000 }, { "epoch": 0.11, "learning_rate": 4.8208557242704525e-05, "loss": 0.0582, "step": 1591500 }, { "epoch": 0.11, "learning_rate": 4.8207994196570896e-05, "loss": 0.0568, "step": 1592000 }, { "epoch": 0.11, "learning_rate": 4.820743227652952e-05, "loss": 0.0527, "step": 1592500 }, { "epoch": 0.11, "learning_rate": 4.820686923039589e-05, "loss": 0.0556, "step": 1593000 }, { "epoch": 0.11, "learning_rate": 4.8206306184262256e-05, "loss": 0.05, "step": 1593500 }, { "epoch": 0.11, "learning_rate": 4.820574313812862e-05, "loss": 0.0558, "step": 1594000 }, { "epoch": 0.11, "learning_rate": 4.8205180091994984e-05, "loss": 0.0502, "step": 1594500 }, { "epoch": 0.11, "learning_rate": 4.820461704586135e-05, "loss": 0.0484, "step": 1595000 }, { "epoch": 0.11, "learning_rate": 4.820405512581998e-05, "loss": 0.0564, "step": 1595500 }, { "epoch": 0.11, "learning_rate": 4.8203492079686344e-05, "loss": 0.0527, "step": 1596000 }, { "epoch": 0.11, "learning_rate": 4.820292903355271e-05, "loss": 0.0523, "step": 1596500 }, { "epoch": 0.11, "learning_rate": 4.820236598741907e-05, "loss": 0.0533, "step": 1597000 }, { "epoch": 0.11, "learning_rate": 4.8201804067377704e-05, "loss": 0.0555, "step": 1597500 }, { "epoch": 0.11, "learning_rate": 4.820124102124407e-05, "loss": 0.0534, "step": 1598000 }, { "epoch": 0.11, "learning_rate": 4.820067797511044e-05, "loss": 0.0534, "step": 1598500 }, { "epoch": 0.11, "learning_rate": 4.8200114928976796e-05, "loss": 0.0521, "step": 1599000 }, { "epoch": 0.11, "learning_rate": 4.819955188284317e-05, "loss": 0.0534, "step": 1599500 }, { "epoch": 0.11, "learning_rate": 4.819898996280179e-05, "loss": 0.0551, "step": 1600000 }, { "epoch": 0.11, "learning_rate": 4.819842691666816e-05, "loss": 0.0532, "step": 1600500 }, { "epoch": 0.11, "learning_rate": 4.819786387053453e-05, "loss": 0.0525, "step": 1601000 }, { "epoch": 0.11, "learning_rate": 4.819730082440089e-05, "loss": 0.0536, "step": 1601500 }, { "epoch": 0.11, "learning_rate": 4.819673890435952e-05, "loss": 0.0554, "step": 1602000 }, { "epoch": 0.11, "learning_rate": 4.819617585822589e-05, "loss": 0.0518, "step": 1602500 }, { "epoch": 0.11, "learning_rate": 4.819561281209225e-05, "loss": 0.0542, "step": 1603000 }, { "epoch": 0.11, "learning_rate": 4.8195049765958615e-05, "loss": 0.0521, "step": 1603500 }, { "epoch": 0.11, "learning_rate": 4.819448784591725e-05, "loss": 0.0511, "step": 1604000 }, { "epoch": 0.11, "learning_rate": 4.819392479978361e-05, "loss": 0.0517, "step": 1604500 }, { "epoch": 0.11, "learning_rate": 4.819336175364998e-05, "loss": 0.0569, "step": 1605000 }, { "epoch": 0.11, "learning_rate": 4.819279870751634e-05, "loss": 0.0498, "step": 1605500 }, { "epoch": 0.11, "learning_rate": 4.8192235661382704e-05, "loss": 0.0645, "step": 1606000 }, { "epoch": 0.11, "learning_rate": 4.8191672615249074e-05, "loss": 0.0583, "step": 1606500 }, { "epoch": 0.11, "learning_rate": 4.81911106952077e-05, "loss": 0.0556, "step": 1607000 }, { "epoch": 0.11, "learning_rate": 4.819054764907407e-05, "loss": 0.0548, "step": 1607500 }, { "epoch": 0.11, "learning_rate": 4.818998460294043e-05, "loss": 0.0521, "step": 1608000 }, { "epoch": 0.11, "learning_rate": 4.81894215568068e-05, "loss": 0.0569, "step": 1608500 }, { "epoch": 0.11, "learning_rate": 4.818885851067316e-05, "loss": 0.0507, "step": 1609000 }, { "epoch": 0.11, "learning_rate": 4.8188295464539526e-05, "loss": 0.0532, "step": 1609500 }, { "epoch": 0.11, "learning_rate": 4.818773354449816e-05, "loss": 0.0545, "step": 1610000 }, { "epoch": 0.11, "learning_rate": 4.818717049836452e-05, "loss": 0.0521, "step": 1610500 }, { "epoch": 0.11, "learning_rate": 4.818660745223089e-05, "loss": 0.0499, "step": 1611000 }, { "epoch": 0.11, "learning_rate": 4.818604440609725e-05, "loss": 0.0541, "step": 1611500 }, { "epoch": 0.11, "learning_rate": 4.8185481359963615e-05, "loss": 0.0524, "step": 1612000 }, { "epoch": 0.11, "learning_rate": 4.818491831382998e-05, "loss": 0.0517, "step": 1612500 }, { "epoch": 0.11, "learning_rate": 4.818435639378861e-05, "loss": 0.0536, "step": 1613000 }, { "epoch": 0.11, "learning_rate": 4.8183793347654975e-05, "loss": 0.0501, "step": 1613500 }, { "epoch": 0.11, "learning_rate": 4.8183230301521346e-05, "loss": 0.0543, "step": 1614000 }, { "epoch": 0.11, "learning_rate": 4.818266725538771e-05, "loss": 0.0542, "step": 1614500 }, { "epoch": 0.11, "learning_rate": 4.818210533534634e-05, "loss": 0.0557, "step": 1615000 }, { "epoch": 0.11, "learning_rate": 4.8181542289212706e-05, "loss": 0.0593, "step": 1615500 }, { "epoch": 0.11, "learning_rate": 4.818097924307906e-05, "loss": 0.0476, "step": 1616000 }, { "epoch": 0.11, "learning_rate": 4.8180417323037695e-05, "loss": 0.0527, "step": 1616500 }, { "epoch": 0.11, "learning_rate": 4.8179854276904066e-05, "loss": 0.0532, "step": 1617000 }, { "epoch": 0.11, "learning_rate": 4.817929123077043e-05, "loss": 0.0514, "step": 1617500 }, { "epoch": 0.11, "learning_rate": 4.8178728184636794e-05, "loss": 0.0534, "step": 1618000 }, { "epoch": 0.11, "learning_rate": 4.817816513850316e-05, "loss": 0.0547, "step": 1618500 }, { "epoch": 0.11, "learning_rate": 4.817760209236952e-05, "loss": 0.0521, "step": 1619000 }, { "epoch": 0.11, "learning_rate": 4.8177039046235886e-05, "loss": 0.0526, "step": 1619500 }, { "epoch": 0.11, "learning_rate": 4.817647600010225e-05, "loss": 0.0509, "step": 1620000 }, { "epoch": 0.11, "learning_rate": 4.8175912953968614e-05, "loss": 0.0547, "step": 1620500 }, { "epoch": 0.11, "learning_rate": 4.8175351033927246e-05, "loss": 0.0516, "step": 1621000 }, { "epoch": 0.11, "learning_rate": 4.817478798779361e-05, "loss": 0.0504, "step": 1621500 }, { "epoch": 0.11, "learning_rate": 4.817422494165998e-05, "loss": 0.0548, "step": 1622000 }, { "epoch": 0.11, "learning_rate": 4.817366189552634e-05, "loss": 0.0542, "step": 1622500 }, { "epoch": 0.11, "learning_rate": 4.817309997548498e-05, "loss": 0.0532, "step": 1623000 }, { "epoch": 0.11, "learning_rate": 4.8172536929351334e-05, "loss": 0.0505, "step": 1623500 }, { "epoch": 0.11, "learning_rate": 4.8171973883217705e-05, "loss": 0.0582, "step": 1624000 }, { "epoch": 0.11, "learning_rate": 4.817141083708407e-05, "loss": 0.0515, "step": 1624500 }, { "epoch": 0.11, "learning_rate": 4.8170847790950426e-05, "loss": 0.0512, "step": 1625000 }, { "epoch": 0.11, "learning_rate": 4.8170285870909065e-05, "loss": 0.0563, "step": 1625500 }, { "epoch": 0.11, "learning_rate": 4.816972282477543e-05, "loss": 0.0498, "step": 1626000 }, { "epoch": 0.11, "learning_rate": 4.8169159778641793e-05, "loss": 0.0566, "step": 1626500 }, { "epoch": 0.11, "learning_rate": 4.816859673250816e-05, "loss": 0.0554, "step": 1627000 }, { "epoch": 0.11, "learning_rate": 4.816803368637452e-05, "loss": 0.053, "step": 1627500 }, { "epoch": 0.11, "learning_rate": 4.8167470640240885e-05, "loss": 0.0514, "step": 1628000 }, { "epoch": 0.11, "learning_rate": 4.816690759410725e-05, "loss": 0.052, "step": 1628500 }, { "epoch": 0.11, "learning_rate": 4.816634567406588e-05, "loss": 0.0556, "step": 1629000 }, { "epoch": 0.11, "learning_rate": 4.8165782627932246e-05, "loss": 0.0592, "step": 1629500 }, { "epoch": 0.11, "learning_rate": 4.8165219581798616e-05, "loss": 0.0552, "step": 1630000 }, { "epoch": 0.11, "learning_rate": 4.8164656535664974e-05, "loss": 0.0484, "step": 1630500 }, { "epoch": 0.11, "learning_rate": 4.816409461562361e-05, "loss": 0.0532, "step": 1631000 }, { "epoch": 0.11, "learning_rate": 4.816353156948997e-05, "loss": 0.0544, "step": 1631500 }, { "epoch": 0.11, "learning_rate": 4.816296964944861e-05, "loss": 0.0545, "step": 1632000 }, { "epoch": 0.11, "learning_rate": 4.8162406603314966e-05, "loss": 0.0543, "step": 1632500 }, { "epoch": 0.11, "learning_rate": 4.816184355718134e-05, "loss": 0.0542, "step": 1633000 }, { "epoch": 0.11, "learning_rate": 4.81612805110477e-05, "loss": 0.0546, "step": 1633500 }, { "epoch": 0.11, "learning_rate": 4.8160717464914065e-05, "loss": 0.0538, "step": 1634000 }, { "epoch": 0.11, "learning_rate": 4.816015441878043e-05, "loss": 0.0539, "step": 1634500 }, { "epoch": 0.11, "learning_rate": 4.815959137264679e-05, "loss": 0.0485, "step": 1635000 }, { "epoch": 0.11, "learning_rate": 4.815902832651316e-05, "loss": 0.057, "step": 1635500 }, { "epoch": 0.11, "learning_rate": 4.815846528037952e-05, "loss": 0.0511, "step": 1636000 }, { "epoch": 0.11, "learning_rate": 4.815790336033815e-05, "loss": 0.0524, "step": 1636500 }, { "epoch": 0.11, "learning_rate": 4.815734031420452e-05, "loss": 0.0523, "step": 1637000 }, { "epoch": 0.11, "learning_rate": 4.815677726807089e-05, "loss": 0.051, "step": 1637500 }, { "epoch": 0.11, "learning_rate": 4.8156214221937245e-05, "loss": 0.0487, "step": 1638000 }, { "epoch": 0.11, "learning_rate": 4.815565117580361e-05, "loss": 0.0536, "step": 1638500 }, { "epoch": 0.11, "learning_rate": 4.815508925576224e-05, "loss": 0.0527, "step": 1639000 }, { "epoch": 0.11, "learning_rate": 4.815452620962861e-05, "loss": 0.0539, "step": 1639500 }, { "epoch": 0.11, "learning_rate": 4.8153963163494976e-05, "loss": 0.056, "step": 1640000 }, { "epoch": 0.11, "learning_rate": 4.815340011736133e-05, "loss": 0.0566, "step": 1640500 }, { "epoch": 0.11, "learning_rate": 4.8152837071227704e-05, "loss": 0.0477, "step": 1641000 }, { "epoch": 0.11, "learning_rate": 4.815227515118633e-05, "loss": 0.0504, "step": 1641500 }, { "epoch": 0.11, "learning_rate": 4.81517121050527e-05, "loss": 0.0562, "step": 1642000 }, { "epoch": 0.11, "learning_rate": 4.8151149058919064e-05, "loss": 0.053, "step": 1642500 }, { "epoch": 0.11, "learning_rate": 4.815058601278543e-05, "loss": 0.0567, "step": 1643000 }, { "epoch": 0.11, "learning_rate": 4.815002296665179e-05, "loss": 0.0521, "step": 1643500 }, { "epoch": 0.11, "learning_rate": 4.8149459920518156e-05, "loss": 0.0546, "step": 1644000 }, { "epoch": 0.11, "learning_rate": 4.814889800047679e-05, "loss": 0.0528, "step": 1644500 }, { "epoch": 0.11, "learning_rate": 4.814833495434315e-05, "loss": 0.0557, "step": 1645000 }, { "epoch": 0.11, "learning_rate": 4.814777190820952e-05, "loss": 0.0534, "step": 1645500 }, { "epoch": 0.11, "learning_rate": 4.814720886207588e-05, "loss": 0.0536, "step": 1646000 }, { "epoch": 0.11, "learning_rate": 4.814664694203452e-05, "loss": 0.0536, "step": 1646500 }, { "epoch": 0.11, "learning_rate": 4.8146083895900877e-05, "loss": 0.05, "step": 1647000 }, { "epoch": 0.11, "learning_rate": 4.814552084976725e-05, "loss": 0.0532, "step": 1647500 }, { "epoch": 0.11, "learning_rate": 4.814495780363361e-05, "loss": 0.0535, "step": 1648000 }, { "epoch": 0.11, "learning_rate": 4.8144395883592244e-05, "loss": 0.0519, "step": 1648500 }, { "epoch": 0.11, "learning_rate": 4.814383283745861e-05, "loss": 0.0539, "step": 1649000 }, { "epoch": 0.11, "learning_rate": 4.814326979132497e-05, "loss": 0.0551, "step": 1649500 }, { "epoch": 0.11, "learning_rate": 4.8142706745191336e-05, "loss": 0.0545, "step": 1650000 }, { "epoch": 0.11, "learning_rate": 4.81421436990577e-05, "loss": 0.0573, "step": 1650500 }, { "epoch": 0.11, "learning_rate": 4.814158177901633e-05, "loss": 0.0504, "step": 1651000 }, { "epoch": 0.11, "learning_rate": 4.8141018732882696e-05, "loss": 0.0552, "step": 1651500 }, { "epoch": 0.11, "learning_rate": 4.814045568674906e-05, "loss": 0.0509, "step": 1652000 }, { "epoch": 0.11, "learning_rate": 4.8139892640615424e-05, "loss": 0.0503, "step": 1652500 }, { "epoch": 0.11, "learning_rate": 4.8139330720574056e-05, "loss": 0.0512, "step": 1653000 }, { "epoch": 0.11, "learning_rate": 4.813876767444042e-05, "loss": 0.0558, "step": 1653500 }, { "epoch": 0.11, "learning_rate": 4.813820462830679e-05, "loss": 0.0527, "step": 1654000 }, { "epoch": 0.11, "learning_rate": 4.813764158217315e-05, "loss": 0.0581, "step": 1654500 }, { "epoch": 0.11, "learning_rate": 4.813707853603951e-05, "loss": 0.0497, "step": 1655000 }, { "epoch": 0.11, "learning_rate": 4.8136516615998144e-05, "loss": 0.0492, "step": 1655500 }, { "epoch": 0.11, "learning_rate": 4.813595356986451e-05, "loss": 0.0517, "step": 1656000 }, { "epoch": 0.11, "learning_rate": 4.813539052373088e-05, "loss": 0.053, "step": 1656500 }, { "epoch": 0.11, "learning_rate": 4.813482860368951e-05, "loss": 0.053, "step": 1657000 }, { "epoch": 0.11, "learning_rate": 4.8134265557555875e-05, "loss": 0.0528, "step": 1657500 }, { "epoch": 0.11, "learning_rate": 4.813370251142223e-05, "loss": 0.0531, "step": 1658000 }, { "epoch": 0.11, "learning_rate": 4.81331394652886e-05, "loss": 0.052, "step": 1658500 }, { "epoch": 0.11, "learning_rate": 4.813257641915497e-05, "loss": 0.0511, "step": 1659000 }, { "epoch": 0.11, "learning_rate": 4.813201337302133e-05, "loss": 0.0528, "step": 1659500 }, { "epoch": 0.11, "learning_rate": 4.8131450326887695e-05, "loss": 0.0534, "step": 1660000 }, { "epoch": 0.11, "learning_rate": 4.813088728075406e-05, "loss": 0.0513, "step": 1660500 }, { "epoch": 0.11, "learning_rate": 4.813032536071269e-05, "loss": 0.0559, "step": 1661000 }, { "epoch": 0.11, "learning_rate": 4.8129762314579055e-05, "loss": 0.0538, "step": 1661500 }, { "epoch": 0.11, "learning_rate": 4.8129199268445426e-05, "loss": 0.0524, "step": 1662000 }, { "epoch": 0.11, "learning_rate": 4.812863622231178e-05, "loss": 0.0577, "step": 1662500 }, { "epoch": 0.11, "learning_rate": 4.8128073176178154e-05, "loss": 0.0539, "step": 1663000 }, { "epoch": 0.11, "learning_rate": 4.812751013004452e-05, "loss": 0.0553, "step": 1663500 }, { "epoch": 0.11, "learning_rate": 4.8126947083910875e-05, "loss": 0.0522, "step": 1664000 }, { "epoch": 0.11, "learning_rate": 4.8126384037777246e-05, "loss": 0.0517, "step": 1664500 }, { "epoch": 0.11, "learning_rate": 4.812582099164361e-05, "loss": 0.0535, "step": 1665000 }, { "epoch": 0.11, "learning_rate": 4.812525907160224e-05, "loss": 0.0526, "step": 1665500 }, { "epoch": 0.11, "learning_rate": 4.8124696025468606e-05, "loss": 0.0484, "step": 1666000 }, { "epoch": 0.11, "learning_rate": 4.812413297933497e-05, "loss": 0.0551, "step": 1666500 }, { "epoch": 0.11, "learning_rate": 4.8123569933201334e-05, "loss": 0.052, "step": 1667000 }, { "epoch": 0.11, "learning_rate": 4.81230068870677e-05, "loss": 0.0593, "step": 1667500 }, { "epoch": 0.11, "learning_rate": 4.812244496702633e-05, "loss": 0.0501, "step": 1668000 }, { "epoch": 0.11, "learning_rate": 4.8121881920892694e-05, "loss": 0.0512, "step": 1668500 }, { "epoch": 0.11, "learning_rate": 4.812131887475906e-05, "loss": 0.0524, "step": 1669000 }, { "epoch": 0.11, "learning_rate": 4.812075582862542e-05, "loss": 0.052, "step": 1669500 }, { "epoch": 0.11, "learning_rate": 4.8120193908584055e-05, "loss": 0.0583, "step": 1670000 }, { "epoch": 0.11, "learning_rate": 4.811963086245042e-05, "loss": 0.0507, "step": 1670500 }, { "epoch": 0.11, "learning_rate": 4.811906781631679e-05, "loss": 0.054, "step": 1671000 }, { "epoch": 0.11, "learning_rate": 4.8118504770183153e-05, "loss": 0.0535, "step": 1671500 }, { "epoch": 0.11, "learning_rate": 4.8117942850141786e-05, "loss": 0.0537, "step": 1672000 }, { "epoch": 0.11, "learning_rate": 4.811737980400815e-05, "loss": 0.054, "step": 1672500 }, { "epoch": 0.11, "learning_rate": 4.8116816757874514e-05, "loss": 0.0534, "step": 1673000 }, { "epoch": 0.11, "learning_rate": 4.811625371174088e-05, "loss": 0.0489, "step": 1673500 }, { "epoch": 0.11, "learning_rate": 4.811569066560724e-05, "loss": 0.0528, "step": 1674000 }, { "epoch": 0.11, "learning_rate": 4.8115128745565874e-05, "loss": 0.0541, "step": 1674500 }, { "epoch": 0.11, "learning_rate": 4.811456569943224e-05, "loss": 0.0488, "step": 1675000 }, { "epoch": 0.11, "learning_rate": 4.81140026532986e-05, "loss": 0.0532, "step": 1675500 }, { "epoch": 0.11, "learning_rate": 4.8113439607164966e-05, "loss": 0.0539, "step": 1676000 }, { "epoch": 0.11, "learning_rate": 4.8112876561031337e-05, "loss": 0.0533, "step": 1676500 }, { "epoch": 0.11, "learning_rate": 4.811231464098996e-05, "loss": 0.0506, "step": 1677000 }, { "epoch": 0.11, "learning_rate": 4.811175159485633e-05, "loss": 0.0547, "step": 1677500 }, { "epoch": 0.11, "learning_rate": 4.811118854872269e-05, "loss": 0.0523, "step": 1678000 }, { "epoch": 0.11, "learning_rate": 4.8110625502589054e-05, "loss": 0.0583, "step": 1678500 }, { "epoch": 0.11, "learning_rate": 4.8110062456455425e-05, "loss": 0.0506, "step": 1679000 }, { "epoch": 0.11, "learning_rate": 4.810950053641406e-05, "loss": 0.056, "step": 1679500 }, { "epoch": 0.11, "learning_rate": 4.810893749028042e-05, "loss": 0.055, "step": 1680000 }, { "epoch": 0.11, "learning_rate": 4.810837444414678e-05, "loss": 0.0521, "step": 1680500 }, { "epoch": 0.11, "learning_rate": 4.810781139801315e-05, "loss": 0.058, "step": 1681000 }, { "epoch": 0.11, "learning_rate": 4.8107249477971774e-05, "loss": 0.0577, "step": 1681500 }, { "epoch": 0.11, "learning_rate": 4.8106686431838145e-05, "loss": 0.0547, "step": 1682000 }, { "epoch": 0.11, "learning_rate": 4.810612338570451e-05, "loss": 0.0561, "step": 1682500 }, { "epoch": 0.11, "learning_rate": 4.810556033957087e-05, "loss": 0.0513, "step": 1683000 }, { "epoch": 0.11, "learning_rate": 4.810499729343724e-05, "loss": 0.0512, "step": 1683500 }, { "epoch": 0.11, "learning_rate": 4.81044342473036e-05, "loss": 0.0513, "step": 1684000 }, { "epoch": 0.11, "learning_rate": 4.8103871201169965e-05, "loss": 0.0565, "step": 1684500 }, { "epoch": 0.11, "learning_rate": 4.810330815503633e-05, "loss": 0.057, "step": 1685000 }, { "epoch": 0.11, "learning_rate": 4.810274623499496e-05, "loss": 0.0553, "step": 1685500 }, { "epoch": 0.11, "learning_rate": 4.8102184314953594e-05, "loss": 0.0508, "step": 1686000 }, { "epoch": 0.11, "learning_rate": 4.810162126881996e-05, "loss": 0.0505, "step": 1686500 }, { "epoch": 0.11, "learning_rate": 4.810105822268632e-05, "loss": 0.052, "step": 1687000 }, { "epoch": 0.11, "learning_rate": 4.810049517655269e-05, "loss": 0.0495, "step": 1687500 }, { "epoch": 0.11, "learning_rate": 4.8099932130419056e-05, "loss": 0.0545, "step": 1688000 }, { "epoch": 0.11, "learning_rate": 4.809936908428542e-05, "loss": 0.0514, "step": 1688500 }, { "epoch": 0.11, "learning_rate": 4.809880716424405e-05, "loss": 0.0536, "step": 1689000 }, { "epoch": 0.11, "learning_rate": 4.8098244118110417e-05, "loss": 0.0519, "step": 1689500 }, { "epoch": 0.11, "learning_rate": 4.809768107197678e-05, "loss": 0.0523, "step": 1690000 }, { "epoch": 0.11, "learning_rate": 4.8097118025843145e-05, "loss": 0.0517, "step": 1690500 }, { "epoch": 0.11, "learning_rate": 4.809655497970951e-05, "loss": 0.0504, "step": 1691000 }, { "epoch": 0.11, "learning_rate": 4.809599305966814e-05, "loss": 0.0544, "step": 1691500 }, { "epoch": 0.11, "learning_rate": 4.8095430013534505e-05, "loss": 0.054, "step": 1692000 }, { "epoch": 0.11, "learning_rate": 4.809486696740087e-05, "loss": 0.0497, "step": 1692500 }, { "epoch": 0.11, "learning_rate": 4.809430392126724e-05, "loss": 0.0543, "step": 1693000 }, { "epoch": 0.11, "learning_rate": 4.80937408751336e-05, "loss": 0.0545, "step": 1693500 }, { "epoch": 0.11, "learning_rate": 4.809317782899996e-05, "loss": 0.0533, "step": 1694000 }, { "epoch": 0.11, "learning_rate": 4.809261590895859e-05, "loss": 0.0533, "step": 1694500 }, { "epoch": 0.11, "learning_rate": 4.809205286282496e-05, "loss": 0.0493, "step": 1695000 }, { "epoch": 0.11, "learning_rate": 4.809148981669133e-05, "loss": 0.0549, "step": 1695500 }, { "epoch": 0.11, "learning_rate": 4.8090926770557685e-05, "loss": 0.0492, "step": 1696000 }, { "epoch": 0.11, "learning_rate": 4.8090364850516324e-05, "loss": 0.0584, "step": 1696500 }, { "epoch": 0.11, "learning_rate": 4.808980180438268e-05, "loss": 0.0486, "step": 1697000 }, { "epoch": 0.11, "learning_rate": 4.808923875824905e-05, "loss": 0.0496, "step": 1697500 }, { "epoch": 0.11, "learning_rate": 4.8088675712115416e-05, "loss": 0.0528, "step": 1698000 }, { "epoch": 0.11, "learning_rate": 4.808811379207405e-05, "loss": 0.0496, "step": 1698500 }, { "epoch": 0.11, "learning_rate": 4.808755074594041e-05, "loss": 0.0528, "step": 1699000 }, { "epoch": 0.11, "learning_rate": 4.8086987699806776e-05, "loss": 0.0536, "step": 1699500 }, { "epoch": 0.11, "learning_rate": 4.808642465367314e-05, "loss": 0.0527, "step": 1700000 }, { "epoch": 0.11, "learning_rate": 4.8085861607539504e-05, "loss": 0.0496, "step": 1700500 }, { "epoch": 0.11, "learning_rate": 4.8085299687498136e-05, "loss": 0.0493, "step": 1701000 }, { "epoch": 0.11, "learning_rate": 4.80847366413645e-05, "loss": 0.0537, "step": 1701500 }, { "epoch": 0.11, "learning_rate": 4.808417359523087e-05, "loss": 0.0549, "step": 1702000 }, { "epoch": 0.12, "learning_rate": 4.808361054909723e-05, "loss": 0.0531, "step": 1702500 }, { "epoch": 0.12, "learning_rate": 4.808304862905586e-05, "loss": 0.0476, "step": 1703000 }, { "epoch": 0.12, "learning_rate": 4.8082485582922224e-05, "loss": 0.0533, "step": 1703500 }, { "epoch": 0.12, "learning_rate": 4.8081922536788595e-05, "loss": 0.0501, "step": 1704000 }, { "epoch": 0.12, "learning_rate": 4.808135949065496e-05, "loss": 0.0556, "step": 1704500 }, { "epoch": 0.12, "learning_rate": 4.808079644452132e-05, "loss": 0.0519, "step": 1705000 }, { "epoch": 0.12, "learning_rate": 4.8080234524479955e-05, "loss": 0.052, "step": 1705500 }, { "epoch": 0.12, "learning_rate": 4.807967147834632e-05, "loss": 0.0538, "step": 1706000 }, { "epoch": 0.12, "learning_rate": 4.8079108432212683e-05, "loss": 0.0524, "step": 1706500 }, { "epoch": 0.12, "learning_rate": 4.807854538607905e-05, "loss": 0.056, "step": 1707000 }, { "epoch": 0.12, "learning_rate": 4.807798346603768e-05, "loss": 0.0565, "step": 1707500 }, { "epoch": 0.12, "learning_rate": 4.8077420419904044e-05, "loss": 0.0521, "step": 1708000 }, { "epoch": 0.12, "learning_rate": 4.807685737377041e-05, "loss": 0.0509, "step": 1708500 }, { "epoch": 0.12, "learning_rate": 4.807629432763677e-05, "loss": 0.0483, "step": 1709000 }, { "epoch": 0.12, "learning_rate": 4.807573128150314e-05, "loss": 0.0538, "step": 1709500 }, { "epoch": 0.12, "learning_rate": 4.80751682353695e-05, "loss": 0.0489, "step": 1710000 }, { "epoch": 0.12, "learning_rate": 4.807460631532814e-05, "loss": 0.0521, "step": 1710500 }, { "epoch": 0.12, "learning_rate": 4.8074043269194496e-05, "loss": 0.0544, "step": 1711000 }, { "epoch": 0.12, "learning_rate": 4.807348022306086e-05, "loss": 0.0512, "step": 1711500 }, { "epoch": 0.12, "learning_rate": 4.807291717692723e-05, "loss": 0.0525, "step": 1712000 }, { "epoch": 0.12, "learning_rate": 4.8072355256885856e-05, "loss": 0.0536, "step": 1712500 }, { "epoch": 0.12, "learning_rate": 4.807179221075223e-05, "loss": 0.0502, "step": 1713000 }, { "epoch": 0.12, "learning_rate": 4.8071229164618584e-05, "loss": 0.0575, "step": 1713500 }, { "epoch": 0.12, "learning_rate": 4.8070666118484955e-05, "loss": 0.0524, "step": 1714000 }, { "epoch": 0.12, "learning_rate": 4.807010307235132e-05, "loss": 0.05, "step": 1714500 }, { "epoch": 0.12, "learning_rate": 4.806954002621768e-05, "loss": 0.0513, "step": 1715000 }, { "epoch": 0.12, "learning_rate": 4.806897698008405e-05, "loss": 0.0536, "step": 1715500 }, { "epoch": 0.12, "learning_rate": 4.806841393395041e-05, "loss": 0.0568, "step": 1716000 }, { "epoch": 0.12, "learning_rate": 4.806785201390904e-05, "loss": 0.0506, "step": 1716500 }, { "epoch": 0.12, "learning_rate": 4.806728896777541e-05, "loss": 0.0519, "step": 1717000 }, { "epoch": 0.12, "learning_rate": 4.806672592164178e-05, "loss": 0.0486, "step": 1717500 }, { "epoch": 0.12, "learning_rate": 4.8066162875508135e-05, "loss": 0.0523, "step": 1718000 }, { "epoch": 0.12, "learning_rate": 4.8065599829374506e-05, "loss": 0.0504, "step": 1718500 }, { "epoch": 0.12, "learning_rate": 4.806503678324087e-05, "loss": 0.0525, "step": 1719000 }, { "epoch": 0.12, "learning_rate": 4.80644748631995e-05, "loss": 0.0472, "step": 1719500 }, { "epoch": 0.12, "learning_rate": 4.8063911817065866e-05, "loss": 0.0557, "step": 1720000 }, { "epoch": 0.12, "learning_rate": 4.806334877093222e-05, "loss": 0.0509, "step": 1720500 }, { "epoch": 0.12, "learning_rate": 4.8062785724798594e-05, "loss": 0.0568, "step": 1721000 }, { "epoch": 0.12, "learning_rate": 4.806222267866496e-05, "loss": 0.0543, "step": 1721500 }, { "epoch": 0.12, "learning_rate": 4.806166075862359e-05, "loss": 0.0528, "step": 1722000 }, { "epoch": 0.12, "learning_rate": 4.8061097712489954e-05, "loss": 0.0469, "step": 1722500 }, { "epoch": 0.12, "learning_rate": 4.806053466635632e-05, "loss": 0.0504, "step": 1723000 }, { "epoch": 0.12, "learning_rate": 4.805997162022268e-05, "loss": 0.0517, "step": 1723500 }, { "epoch": 0.12, "learning_rate": 4.8059409700181314e-05, "loss": 0.0517, "step": 1724000 }, { "epoch": 0.12, "learning_rate": 4.805884665404768e-05, "loss": 0.0507, "step": 1724500 }, { "epoch": 0.12, "learning_rate": 4.805828360791404e-05, "loss": 0.0495, "step": 1725000 }, { "epoch": 0.12, "learning_rate": 4.8057720561780406e-05, "loss": 0.0517, "step": 1725500 }, { "epoch": 0.12, "learning_rate": 4.805715751564677e-05, "loss": 0.0534, "step": 1726000 }, { "epoch": 0.12, "learning_rate": 4.805659446951314e-05, "loss": 0.0513, "step": 1726500 }, { "epoch": 0.12, "learning_rate": 4.80560314233795e-05, "loss": 0.0541, "step": 1727000 }, { "epoch": 0.12, "learning_rate": 4.805546837724587e-05, "loss": 0.0531, "step": 1727500 }, { "epoch": 0.12, "learning_rate": 4.8054906457204495e-05, "loss": 0.0541, "step": 1728000 }, { "epoch": 0.12, "learning_rate": 4.8054343411070865e-05, "loss": 0.0548, "step": 1728500 }, { "epoch": 0.12, "learning_rate": 4.805378036493723e-05, "loss": 0.0553, "step": 1729000 }, { "epoch": 0.12, "learning_rate": 4.805321731880359e-05, "loss": 0.0488, "step": 1729500 }, { "epoch": 0.12, "learning_rate": 4.805265427266996e-05, "loss": 0.0537, "step": 1730000 }, { "epoch": 0.12, "learning_rate": 4.805209235262859e-05, "loss": 0.0527, "step": 1730500 }, { "epoch": 0.12, "learning_rate": 4.8051529306494954e-05, "loss": 0.0528, "step": 1731000 }, { "epoch": 0.12, "learning_rate": 4.805096626036132e-05, "loss": 0.0545, "step": 1731500 }, { "epoch": 0.12, "learning_rate": 4.805040321422769e-05, "loss": 0.0517, "step": 1732000 }, { "epoch": 0.12, "learning_rate": 4.8049841294186314e-05, "loss": 0.0537, "step": 1732500 }, { "epoch": 0.12, "learning_rate": 4.8049278248052685e-05, "loss": 0.0524, "step": 1733000 }, { "epoch": 0.12, "learning_rate": 4.804871520191904e-05, "loss": 0.0529, "step": 1733500 }, { "epoch": 0.12, "learning_rate": 4.8048152155785406e-05, "loss": 0.0534, "step": 1734000 }, { "epoch": 0.12, "learning_rate": 4.804759023574404e-05, "loss": 0.0487, "step": 1734500 }, { "epoch": 0.12, "learning_rate": 4.80470271896104e-05, "loss": 0.0496, "step": 1735000 }, { "epoch": 0.12, "learning_rate": 4.804646414347677e-05, "loss": 0.0542, "step": 1735500 }, { "epoch": 0.12, "learning_rate": 4.804590109734313e-05, "loss": 0.0479, "step": 1736000 }, { "epoch": 0.12, "learning_rate": 4.80453380512095e-05, "loss": 0.0539, "step": 1736500 }, { "epoch": 0.12, "learning_rate": 4.8044776131168126e-05, "loss": 0.0514, "step": 1737000 }, { "epoch": 0.12, "learning_rate": 4.80442130850345e-05, "loss": 0.0551, "step": 1737500 }, { "epoch": 0.12, "learning_rate": 4.804365003890086e-05, "loss": 0.0513, "step": 1738000 }, { "epoch": 0.12, "learning_rate": 4.8043086992767225e-05, "loss": 0.0548, "step": 1738500 }, { "epoch": 0.12, "learning_rate": 4.804252394663359e-05, "loss": 0.0501, "step": 1739000 }, { "epoch": 0.12, "learning_rate": 4.804196202659222e-05, "loss": 0.0531, "step": 1739500 }, { "epoch": 0.12, "learning_rate": 4.8041398980458585e-05, "loss": 0.0519, "step": 1740000 }, { "epoch": 0.12, "learning_rate": 4.804083593432495e-05, "loss": 0.0527, "step": 1740500 }, { "epoch": 0.12, "learning_rate": 4.804027288819131e-05, "loss": 0.0554, "step": 1741000 }, { "epoch": 0.12, "learning_rate": 4.803970984205768e-05, "loss": 0.0483, "step": 1741500 }, { "epoch": 0.12, "learning_rate": 4.803914679592405e-05, "loss": 0.0551, "step": 1742000 }, { "epoch": 0.12, "learning_rate": 4.8038583749790405e-05, "loss": 0.0523, "step": 1742500 }, { "epoch": 0.12, "learning_rate": 4.8038021829749044e-05, "loss": 0.0527, "step": 1743000 }, { "epoch": 0.12, "learning_rate": 4.80374587836154e-05, "loss": 0.051, "step": 1743500 }, { "epoch": 0.12, "learning_rate": 4.8036895737481765e-05, "loss": 0.0543, "step": 1744000 }, { "epoch": 0.12, "learning_rate": 4.8036332691348136e-05, "loss": 0.053, "step": 1744500 }, { "epoch": 0.12, "learning_rate": 4.803577077130677e-05, "loss": 0.0501, "step": 1745000 }, { "epoch": 0.12, "learning_rate": 4.803520772517313e-05, "loss": 0.0507, "step": 1745500 }, { "epoch": 0.12, "learning_rate": 4.8034644679039496e-05, "loss": 0.0521, "step": 1746000 }, { "epoch": 0.12, "learning_rate": 4.803408163290586e-05, "loss": 0.0544, "step": 1746500 }, { "epoch": 0.12, "learning_rate": 4.803351971286449e-05, "loss": 0.0545, "step": 1747000 }, { "epoch": 0.12, "learning_rate": 4.8032956666730856e-05, "loss": 0.0528, "step": 1747500 }, { "epoch": 0.12, "learning_rate": 4.803239362059722e-05, "loss": 0.0518, "step": 1748000 }, { "epoch": 0.12, "learning_rate": 4.8031830574463584e-05, "loss": 0.0515, "step": 1748500 }, { "epoch": 0.12, "learning_rate": 4.803126752832995e-05, "loss": 0.0539, "step": 1749000 }, { "epoch": 0.12, "learning_rate": 4.803070448219631e-05, "loss": 0.0541, "step": 1749500 }, { "epoch": 0.12, "learning_rate": 4.8030142562154945e-05, "loss": 0.053, "step": 1750000 }, { "epoch": 0.12, "learning_rate": 4.802957951602131e-05, "loss": 0.0525, "step": 1750500 }, { "epoch": 0.12, "learning_rate": 4.802901646988768e-05, "loss": 0.0562, "step": 1751000 }, { "epoch": 0.12, "learning_rate": 4.802845342375404e-05, "loss": 0.0529, "step": 1751500 }, { "epoch": 0.12, "learning_rate": 4.8027891503712676e-05, "loss": 0.0555, "step": 1752000 }, { "epoch": 0.12, "learning_rate": 4.802732845757903e-05, "loss": 0.0543, "step": 1752500 }, { "epoch": 0.12, "learning_rate": 4.8026765411445404e-05, "loss": 0.0494, "step": 1753000 }, { "epoch": 0.12, "learning_rate": 4.802620236531177e-05, "loss": 0.0558, "step": 1753500 }, { "epoch": 0.12, "learning_rate": 4.80256404452704e-05, "loss": 0.0518, "step": 1754000 }, { "epoch": 0.12, "learning_rate": 4.8025077399136764e-05, "loss": 0.0537, "step": 1754500 }, { "epoch": 0.12, "learning_rate": 4.802451435300313e-05, "loss": 0.0505, "step": 1755000 }, { "epoch": 0.12, "learning_rate": 4.802395130686949e-05, "loss": 0.0487, "step": 1755500 }, { "epoch": 0.12, "learning_rate": 4.8023388260735856e-05, "loss": 0.0501, "step": 1756000 }, { "epoch": 0.12, "learning_rate": 4.802282521460222e-05, "loss": 0.051, "step": 1756500 }, { "epoch": 0.12, "learning_rate": 4.802226329456085e-05, "loss": 0.0553, "step": 1757000 }, { "epoch": 0.12, "learning_rate": 4.8021700248427216e-05, "loss": 0.0509, "step": 1757500 }, { "epoch": 0.12, "learning_rate": 4.802113720229358e-05, "loss": 0.0573, "step": 1758000 }, { "epoch": 0.12, "learning_rate": 4.802057415615995e-05, "loss": 0.0506, "step": 1758500 }, { "epoch": 0.12, "learning_rate": 4.8020011110026315e-05, "loss": 0.0552, "step": 1759000 }, { "epoch": 0.12, "learning_rate": 4.801944806389267e-05, "loss": 0.0507, "step": 1759500 }, { "epoch": 0.12, "learning_rate": 4.8018886143851304e-05, "loss": 0.0501, "step": 1760000 }, { "epoch": 0.12, "learning_rate": 4.801832309771767e-05, "loss": 0.0522, "step": 1760500 }, { "epoch": 0.12, "learning_rate": 4.801776005158404e-05, "loss": 0.0535, "step": 1761000 }, { "epoch": 0.12, "learning_rate": 4.80171970054504e-05, "loss": 0.0505, "step": 1761500 }, { "epoch": 0.12, "learning_rate": 4.801663395931677e-05, "loss": 0.0475, "step": 1762000 }, { "epoch": 0.12, "learning_rate": 4.801607091318313e-05, "loss": 0.051, "step": 1762500 }, { "epoch": 0.12, "learning_rate": 4.801550899314176e-05, "loss": 0.0564, "step": 1763000 }, { "epoch": 0.12, "learning_rate": 4.801494594700813e-05, "loss": 0.0514, "step": 1763500 }, { "epoch": 0.12, "learning_rate": 4.801438290087449e-05, "loss": 0.0531, "step": 1764000 }, { "epoch": 0.12, "learning_rate": 4.8013819854740855e-05, "loss": 0.0514, "step": 1764500 }, { "epoch": 0.12, "learning_rate": 4.801325793469949e-05, "loss": 0.0518, "step": 1765000 }, { "epoch": 0.12, "learning_rate": 4.801269488856585e-05, "loss": 0.0518, "step": 1765500 }, { "epoch": 0.12, "learning_rate": 4.8012131842432215e-05, "loss": 0.0516, "step": 1766000 }, { "epoch": 0.12, "learning_rate": 4.8011568796298586e-05, "loss": 0.0523, "step": 1766500 }, { "epoch": 0.12, "learning_rate": 4.801100687625721e-05, "loss": 0.0542, "step": 1767000 }, { "epoch": 0.12, "learning_rate": 4.801044383012358e-05, "loss": 0.0496, "step": 1767500 }, { "epoch": 0.12, "learning_rate": 4.800988078398994e-05, "loss": 0.055, "step": 1768000 }, { "epoch": 0.12, "learning_rate": 4.800931773785631e-05, "loss": 0.05, "step": 1768500 }, { "epoch": 0.12, "learning_rate": 4.8008754691722674e-05, "loss": 0.0511, "step": 1769000 }, { "epoch": 0.12, "learning_rate": 4.8008192771681307e-05, "loss": 0.0559, "step": 1769500 }, { "epoch": 0.12, "learning_rate": 4.800762972554767e-05, "loss": 0.0517, "step": 1770000 }, { "epoch": 0.12, "learning_rate": 4.800706667941403e-05, "loss": 0.0512, "step": 1770500 }, { "epoch": 0.12, "learning_rate": 4.80065036332804e-05, "loss": 0.0491, "step": 1771000 }, { "epoch": 0.12, "learning_rate": 4.800594058714676e-05, "loss": 0.0485, "step": 1771500 }, { "epoch": 0.12, "learning_rate": 4.8005377541013127e-05, "loss": 0.0528, "step": 1772000 }, { "epoch": 0.12, "learning_rate": 4.800481562097176e-05, "loss": 0.0531, "step": 1772500 }, { "epoch": 0.12, "learning_rate": 4.800425257483812e-05, "loss": 0.0495, "step": 1773000 }, { "epoch": 0.12, "learning_rate": 4.800368952870449e-05, "loss": 0.0565, "step": 1773500 }, { "epoch": 0.12, "learning_rate": 4.800312648257085e-05, "loss": 0.0519, "step": 1774000 }, { "epoch": 0.12, "learning_rate": 4.800256343643722e-05, "loss": 0.0549, "step": 1774500 }, { "epoch": 0.12, "learning_rate": 4.800200039030358e-05, "loss": 0.0551, "step": 1775000 }, { "epoch": 0.12, "learning_rate": 4.800143847026222e-05, "loss": 0.0527, "step": 1775500 }, { "epoch": 0.12, "learning_rate": 4.8000875424128575e-05, "loss": 0.0543, "step": 1776000 }, { "epoch": 0.12, "learning_rate": 4.8000312377994946e-05, "loss": 0.0512, "step": 1776500 }, { "epoch": 0.12, "learning_rate": 4.799974933186131e-05, "loss": 0.0511, "step": 1777000 }, { "epoch": 0.12, "learning_rate": 4.799918741181994e-05, "loss": 0.0513, "step": 1777500 }, { "epoch": 0.12, "learning_rate": 4.7998624365686306e-05, "loss": 0.0513, "step": 1778000 }, { "epoch": 0.12, "learning_rate": 4.799806131955267e-05, "loss": 0.0508, "step": 1778500 }, { "epoch": 0.12, "learning_rate": 4.7997498273419034e-05, "loss": 0.0518, "step": 1779000 }, { "epoch": 0.12, "learning_rate": 4.7996936353377666e-05, "loss": 0.0536, "step": 1779500 }, { "epoch": 0.12, "learning_rate": 4.799637330724403e-05, "loss": 0.0504, "step": 1780000 }, { "epoch": 0.12, "learning_rate": 4.7995810261110394e-05, "loss": 0.0544, "step": 1780500 }, { "epoch": 0.12, "learning_rate": 4.799524721497676e-05, "loss": 0.0521, "step": 1781000 }, { "epoch": 0.12, "learning_rate": 4.799468416884312e-05, "loss": 0.0538, "step": 1781500 }, { "epoch": 0.12, "learning_rate": 4.799412112270949e-05, "loss": 0.0466, "step": 1782000 }, { "epoch": 0.12, "learning_rate": 4.799355807657585e-05, "loss": 0.0524, "step": 1782500 }, { "epoch": 0.12, "learning_rate": 4.799299615653449e-05, "loss": 0.0495, "step": 1783000 }, { "epoch": 0.12, "learning_rate": 4.7992433110400846e-05, "loss": 0.0482, "step": 1783500 }, { "epoch": 0.12, "learning_rate": 4.799187006426721e-05, "loss": 0.053, "step": 1784000 }, { "epoch": 0.12, "learning_rate": 4.799130701813358e-05, "loss": 0.0491, "step": 1784500 }, { "epoch": 0.12, "learning_rate": 4.799074509809221e-05, "loss": 0.0526, "step": 1785000 }, { "epoch": 0.12, "learning_rate": 4.799018205195858e-05, "loss": 0.0491, "step": 1785500 }, { "epoch": 0.12, "learning_rate": 4.798962013191721e-05, "loss": 0.0515, "step": 1786000 }, { "epoch": 0.12, "learning_rate": 4.7989057085783573e-05, "loss": 0.0497, "step": 1786500 }, { "epoch": 0.12, "learning_rate": 4.798849403964993e-05, "loss": 0.0495, "step": 1787000 }, { "epoch": 0.12, "learning_rate": 4.79879309935163e-05, "loss": 0.0498, "step": 1787500 }, { "epoch": 0.12, "learning_rate": 4.7987367947382665e-05, "loss": 0.0538, "step": 1788000 }, { "epoch": 0.12, "learning_rate": 4.798680490124903e-05, "loss": 0.0519, "step": 1788500 }, { "epoch": 0.12, "learning_rate": 4.7986241855115393e-05, "loss": 0.0542, "step": 1789000 }, { "epoch": 0.12, "learning_rate": 4.798567880898176e-05, "loss": 0.0506, "step": 1789500 }, { "epoch": 0.12, "learning_rate": 4.798511688894039e-05, "loss": 0.0545, "step": 1790000 }, { "epoch": 0.12, "learning_rate": 4.7984553842806754e-05, "loss": 0.0528, "step": 1790500 }, { "epoch": 0.12, "learning_rate": 4.7983990796673124e-05, "loss": 0.0535, "step": 1791000 }, { "epoch": 0.12, "learning_rate": 4.798342775053948e-05, "loss": 0.0549, "step": 1791500 }, { "epoch": 0.12, "learning_rate": 4.798286470440585e-05, "loss": 0.0494, "step": 1792000 }, { "epoch": 0.12, "learning_rate": 4.7982301658272216e-05, "loss": 0.0508, "step": 1792500 }, { "epoch": 0.12, "learning_rate": 4.798173973823085e-05, "loss": 0.0519, "step": 1793000 }, { "epoch": 0.12, "learning_rate": 4.798117669209721e-05, "loss": 0.0524, "step": 1793500 }, { "epoch": 0.12, "learning_rate": 4.798061364596358e-05, "loss": 0.051, "step": 1794000 }, { "epoch": 0.12, "learning_rate": 4.798005059982994e-05, "loss": 0.0498, "step": 1794500 }, { "epoch": 0.12, "learning_rate": 4.797948867978857e-05, "loss": 0.0565, "step": 1795000 }, { "epoch": 0.12, "learning_rate": 4.797892563365494e-05, "loss": 0.0488, "step": 1795500 }, { "epoch": 0.12, "learning_rate": 4.79783625875213e-05, "loss": 0.0482, "step": 1796000 }, { "epoch": 0.12, "learning_rate": 4.7977799541387665e-05, "loss": 0.0516, "step": 1796500 }, { "epoch": 0.12, "learning_rate": 4.797723649525403e-05, "loss": 0.0524, "step": 1797000 }, { "epoch": 0.12, "learning_rate": 4.797667457521266e-05, "loss": 0.051, "step": 1797500 }, { "epoch": 0.12, "learning_rate": 4.7976111529079025e-05, "loss": 0.0549, "step": 1798000 }, { "epoch": 0.12, "learning_rate": 4.7975548482945396e-05, "loss": 0.0513, "step": 1798500 }, { "epoch": 0.12, "learning_rate": 4.797498543681175e-05, "loss": 0.052, "step": 1799000 }, { "epoch": 0.12, "learning_rate": 4.797442351677039e-05, "loss": 0.0536, "step": 1799500 }, { "epoch": 0.12, "learning_rate": 4.797386047063675e-05, "loss": 0.0488, "step": 1800000 }, { "epoch": 0.12, "learning_rate": 4.797329742450311e-05, "loss": 0.0509, "step": 1800500 }, { "epoch": 0.12, "learning_rate": 4.7972734378369484e-05, "loss": 0.0527, "step": 1801000 }, { "epoch": 0.12, "learning_rate": 4.7972172458328116e-05, "loss": 0.0551, "step": 1801500 }, { "epoch": 0.12, "learning_rate": 4.797160941219448e-05, "loss": 0.0519, "step": 1802000 }, { "epoch": 0.12, "learning_rate": 4.797104636606084e-05, "loss": 0.0533, "step": 1802500 }, { "epoch": 0.12, "learning_rate": 4.797048331992721e-05, "loss": 0.0546, "step": 1803000 }, { "epoch": 0.12, "learning_rate": 4.7969921399885834e-05, "loss": 0.0487, "step": 1803500 }, { "epoch": 0.12, "learning_rate": 4.7969358353752204e-05, "loss": 0.0508, "step": 1804000 }, { "epoch": 0.12, "learning_rate": 4.796879530761857e-05, "loss": 0.0483, "step": 1804500 }, { "epoch": 0.12, "learning_rate": 4.796823226148493e-05, "loss": 0.0504, "step": 1805000 }, { "epoch": 0.12, "learning_rate": 4.7967669215351296e-05, "loss": 0.0503, "step": 1805500 }, { "epoch": 0.12, "learning_rate": 4.7967107295309935e-05, "loss": 0.0545, "step": 1806000 }, { "epoch": 0.12, "learning_rate": 4.796654424917629e-05, "loss": 0.0489, "step": 1806500 }, { "epoch": 0.12, "learning_rate": 4.796598120304266e-05, "loss": 0.0503, "step": 1807000 }, { "epoch": 0.12, "learning_rate": 4.796541815690903e-05, "loss": 0.0514, "step": 1807500 }, { "epoch": 0.12, "learning_rate": 4.7964855110775385e-05, "loss": 0.0546, "step": 1808000 }, { "epoch": 0.12, "learning_rate": 4.7964292064641755e-05, "loss": 0.0531, "step": 1808500 }, { "epoch": 0.12, "learning_rate": 4.796372901850812e-05, "loss": 0.0517, "step": 1809000 }, { "epoch": 0.12, "learning_rate": 4.796316709846675e-05, "loss": 0.0516, "step": 1809500 }, { "epoch": 0.12, "learning_rate": 4.7962604052333116e-05, "loss": 0.0498, "step": 1810000 }, { "epoch": 0.12, "learning_rate": 4.796204100619948e-05, "loss": 0.0526, "step": 1810500 }, { "epoch": 0.12, "learning_rate": 4.7961477960065844e-05, "loss": 0.0479, "step": 1811000 }, { "epoch": 0.12, "learning_rate": 4.7960916040024476e-05, "loss": 0.0535, "step": 1811500 }, { "epoch": 0.12, "learning_rate": 4.796035299389084e-05, "loss": 0.0509, "step": 1812000 }, { "epoch": 0.12, "learning_rate": 4.7959789947757204e-05, "loss": 0.0498, "step": 1812500 }, { "epoch": 0.12, "learning_rate": 4.795922690162357e-05, "loss": 0.0522, "step": 1813000 }, { "epoch": 0.12, "learning_rate": 4.79586649815822e-05, "loss": 0.0486, "step": 1813500 }, { "epoch": 0.12, "learning_rate": 4.7958101935448564e-05, "loss": 0.0519, "step": 1814000 }, { "epoch": 0.12, "learning_rate": 4.795753888931493e-05, "loss": 0.0511, "step": 1814500 }, { "epoch": 0.12, "learning_rate": 4.79569758431813e-05, "loss": 0.0509, "step": 1815000 }, { "epoch": 0.12, "learning_rate": 4.7956412797047656e-05, "loss": 0.0487, "step": 1815500 }, { "epoch": 0.12, "learning_rate": 4.795584975091402e-05, "loss": 0.0479, "step": 1816000 }, { "epoch": 0.12, "learning_rate": 4.795528670478039e-05, "loss": 0.0536, "step": 1816500 }, { "epoch": 0.12, "learning_rate": 4.7954723658646755e-05, "loss": 0.0495, "step": 1817000 }, { "epoch": 0.12, "learning_rate": 4.795416173860539e-05, "loss": 0.0539, "step": 1817500 }, { "epoch": 0.12, "learning_rate": 4.795359981856401e-05, "loss": 0.05, "step": 1818000 }, { "epoch": 0.12, "learning_rate": 4.795303677243038e-05, "loss": 0.0505, "step": 1818500 }, { "epoch": 0.12, "learning_rate": 4.795247372629674e-05, "loss": 0.0555, "step": 1819000 }, { "epoch": 0.12, "learning_rate": 4.795191068016311e-05, "loss": 0.0515, "step": 1819500 }, { "epoch": 0.12, "learning_rate": 4.7951347634029475e-05, "loss": 0.0528, "step": 1820000 }, { "epoch": 0.12, "learning_rate": 4.795078458789584e-05, "loss": 0.0539, "step": 1820500 }, { "epoch": 0.12, "learning_rate": 4.79502215417622e-05, "loss": 0.048, "step": 1821000 }, { "epoch": 0.12, "learning_rate": 4.794965849562857e-05, "loss": 0.0523, "step": 1821500 }, { "epoch": 0.12, "learning_rate": 4.79490965755872e-05, "loss": 0.0499, "step": 1822000 }, { "epoch": 0.12, "learning_rate": 4.794853352945356e-05, "loss": 0.0454, "step": 1822500 }, { "epoch": 0.12, "learning_rate": 4.7947970483319934e-05, "loss": 0.0482, "step": 1823000 }, { "epoch": 0.12, "learning_rate": 4.794740743718629e-05, "loss": 0.0529, "step": 1823500 }, { "epoch": 0.12, "learning_rate": 4.794684551714493e-05, "loss": 0.0543, "step": 1824000 }, { "epoch": 0.12, "learning_rate": 4.794628247101129e-05, "loss": 0.0507, "step": 1824500 }, { "epoch": 0.12, "learning_rate": 4.794571942487766e-05, "loss": 0.0495, "step": 1825000 }, { "epoch": 0.12, "learning_rate": 4.794515637874402e-05, "loss": 0.0513, "step": 1825500 }, { "epoch": 0.12, "learning_rate": 4.794459333261038e-05, "loss": 0.0506, "step": 1826000 }, { "epoch": 0.12, "learning_rate": 4.794403141256902e-05, "loss": 0.0514, "step": 1826500 }, { "epoch": 0.12, "learning_rate": 4.7943468366435376e-05, "loss": 0.0503, "step": 1827000 }, { "epoch": 0.12, "learning_rate": 4.7942905320301747e-05, "loss": 0.0532, "step": 1827500 }, { "epoch": 0.12, "learning_rate": 4.794234227416811e-05, "loss": 0.0494, "step": 1828000 }, { "epoch": 0.12, "learning_rate": 4.7941779228034475e-05, "loss": 0.0517, "step": 1828500 }, { "epoch": 0.12, "learning_rate": 4.794121730799311e-05, "loss": 0.0532, "step": 1829000 }, { "epoch": 0.12, "learning_rate": 4.794065426185947e-05, "loss": 0.0512, "step": 1829500 }, { "epoch": 0.12, "learning_rate": 4.7940091215725835e-05, "loss": 0.0539, "step": 1830000 }, { "epoch": 0.12, "learning_rate": 4.79395281695922e-05, "loss": 0.0509, "step": 1830500 }, { "epoch": 0.12, "learning_rate": 4.793896512345856e-05, "loss": 0.0575, "step": 1831000 }, { "epoch": 0.12, "learning_rate": 4.793840207732493e-05, "loss": 0.0537, "step": 1831500 }, { "epoch": 0.12, "learning_rate": 4.793784015728356e-05, "loss": 0.0504, "step": 1832000 }, { "epoch": 0.12, "learning_rate": 4.793727711114992e-05, "loss": 0.0535, "step": 1832500 }, { "epoch": 0.12, "learning_rate": 4.7936714065016294e-05, "loss": 0.0486, "step": 1833000 }, { "epoch": 0.12, "learning_rate": 4.793615101888266e-05, "loss": 0.0513, "step": 1833500 }, { "epoch": 0.12, "learning_rate": 4.793558797274902e-05, "loss": 0.0579, "step": 1834000 }, { "epoch": 0.12, "learning_rate": 4.7935024926615386e-05, "loss": 0.0474, "step": 1834500 }, { "epoch": 0.12, "learning_rate": 4.793446300657402e-05, "loss": 0.0519, "step": 1835000 }, { "epoch": 0.12, "learning_rate": 4.793389996044038e-05, "loss": 0.0517, "step": 1835500 }, { "epoch": 0.12, "learning_rate": 4.7933336914306746e-05, "loss": 0.0528, "step": 1836000 }, { "epoch": 0.12, "learning_rate": 4.793277386817311e-05, "loss": 0.0537, "step": 1836500 }, { "epoch": 0.12, "learning_rate": 4.7932210822039474e-05, "loss": 0.0526, "step": 1837000 }, { "epoch": 0.12, "learning_rate": 4.7931648901998106e-05, "loss": 0.0529, "step": 1837500 }, { "epoch": 0.12, "learning_rate": 4.793108585586447e-05, "loss": 0.0536, "step": 1838000 }, { "epoch": 0.12, "learning_rate": 4.793052280973084e-05, "loss": 0.0513, "step": 1838500 }, { "epoch": 0.12, "learning_rate": 4.79299597635972e-05, "loss": 0.0509, "step": 1839000 }, { "epoch": 0.12, "learning_rate": 4.792939784355584e-05, "loss": 0.0514, "step": 1839500 }, { "epoch": 0.12, "learning_rate": 4.7928834797422194e-05, "loss": 0.0525, "step": 1840000 }, { "epoch": 0.12, "learning_rate": 4.792827175128856e-05, "loss": 0.0529, "step": 1840500 }, { "epoch": 0.12, "learning_rate": 4.792770870515493e-05, "loss": 0.0518, "step": 1841000 }, { "epoch": 0.12, "learning_rate": 4.7927145659021286e-05, "loss": 0.0523, "step": 1841500 }, { "epoch": 0.12, "learning_rate": 4.7926583738979925e-05, "loss": 0.0557, "step": 1842000 }, { "epoch": 0.12, "learning_rate": 4.792602069284628e-05, "loss": 0.0528, "step": 1842500 }, { "epoch": 0.12, "learning_rate": 4.792545764671265e-05, "loss": 0.0512, "step": 1843000 }, { "epoch": 0.12, "learning_rate": 4.792489460057902e-05, "loss": 0.0498, "step": 1843500 }, { "epoch": 0.12, "learning_rate": 4.792433268053765e-05, "loss": 0.0528, "step": 1844000 }, { "epoch": 0.12, "learning_rate": 4.7923769634404013e-05, "loss": 0.0467, "step": 1844500 }, { "epoch": 0.12, "learning_rate": 4.792320658827038e-05, "loss": 0.0526, "step": 1845000 }, { "epoch": 0.12, "learning_rate": 4.792264354213674e-05, "loss": 0.0511, "step": 1845500 }, { "epoch": 0.12, "learning_rate": 4.7922080496003105e-05, "loss": 0.0479, "step": 1846000 }, { "epoch": 0.12, "learning_rate": 4.792151744986947e-05, "loss": 0.0544, "step": 1846500 }, { "epoch": 0.12, "learning_rate": 4.79209555298281e-05, "loss": 0.0534, "step": 1847000 }, { "epoch": 0.12, "learning_rate": 4.7920392483694466e-05, "loss": 0.0504, "step": 1847500 }, { "epoch": 0.12, "learning_rate": 4.791982943756083e-05, "loss": 0.0485, "step": 1848000 }, { "epoch": 0.12, "learning_rate": 4.79192663914272e-05, "loss": 0.0538, "step": 1848500 }, { "epoch": 0.12, "learning_rate": 4.7918704471385826e-05, "loss": 0.0531, "step": 1849000 }, { "epoch": 0.12, "learning_rate": 4.7918141425252197e-05, "loss": 0.0531, "step": 1849500 }, { "epoch": 0.12, "learning_rate": 4.791757837911856e-05, "loss": 0.0493, "step": 1850000 }, { "epoch": 0.13, "learning_rate": 4.7917015332984925e-05, "loss": 0.0487, "step": 1850500 }, { "epoch": 0.13, "learning_rate": 4.791645228685129e-05, "loss": 0.0537, "step": 1851000 }, { "epoch": 0.13, "learning_rate": 4.791589036680992e-05, "loss": 0.0498, "step": 1851500 }, { "epoch": 0.13, "learning_rate": 4.7915327320676285e-05, "loss": 0.0478, "step": 1852000 }, { "epoch": 0.13, "learning_rate": 4.791476427454265e-05, "loss": 0.0512, "step": 1852500 }, { "epoch": 0.13, "learning_rate": 4.791420122840901e-05, "loss": 0.0493, "step": 1853000 }, { "epoch": 0.13, "learning_rate": 4.7913639308367645e-05, "loss": 0.0523, "step": 1853500 }, { "epoch": 0.13, "learning_rate": 4.791307626223401e-05, "loss": 0.0531, "step": 1854000 }, { "epoch": 0.13, "learning_rate": 4.791251321610037e-05, "loss": 0.0498, "step": 1854500 }, { "epoch": 0.13, "learning_rate": 4.7911950169966744e-05, "loss": 0.0548, "step": 1855000 }, { "epoch": 0.13, "learning_rate": 4.79113871238331e-05, "loss": 0.0515, "step": 1855500 }, { "epoch": 0.13, "learning_rate": 4.7910824077699465e-05, "loss": 0.0531, "step": 1856000 }, { "epoch": 0.13, "learning_rate": 4.7910261031565836e-05, "loss": 0.0481, "step": 1856500 }, { "epoch": 0.13, "learning_rate": 4.790969911152446e-05, "loss": 0.0503, "step": 1857000 }, { "epoch": 0.13, "learning_rate": 4.7909137191483093e-05, "loss": 0.0495, "step": 1857500 }, { "epoch": 0.13, "learning_rate": 4.790857414534946e-05, "loss": 0.0534, "step": 1858000 }, { "epoch": 0.13, "learning_rate": 4.790801109921583e-05, "loss": 0.0495, "step": 1858500 }, { "epoch": 0.13, "learning_rate": 4.7907448053082185e-05, "loss": 0.0536, "step": 1859000 }, { "epoch": 0.13, "learning_rate": 4.7906885006948556e-05, "loss": 0.0528, "step": 1859500 }, { "epoch": 0.13, "learning_rate": 4.790632196081492e-05, "loss": 0.052, "step": 1860000 }, { "epoch": 0.13, "learning_rate": 4.7905758914681284e-05, "loss": 0.0499, "step": 1860500 }, { "epoch": 0.13, "learning_rate": 4.790519586854765e-05, "loss": 0.0527, "step": 1861000 }, { "epoch": 0.13, "learning_rate": 4.790463394850628e-05, "loss": 0.05, "step": 1861500 }, { "epoch": 0.13, "learning_rate": 4.7904070902372644e-05, "loss": 0.0564, "step": 1862000 }, { "epoch": 0.13, "learning_rate": 4.790350785623901e-05, "loss": 0.0542, "step": 1862500 }, { "epoch": 0.13, "learning_rate": 4.790294481010538e-05, "loss": 0.0512, "step": 1863000 }, { "epoch": 0.13, "learning_rate": 4.7902381763971736e-05, "loss": 0.0499, "step": 1863500 }, { "epoch": 0.13, "learning_rate": 4.7901819843930375e-05, "loss": 0.0468, "step": 1864000 }, { "epoch": 0.13, "learning_rate": 4.790125679779673e-05, "loss": 0.0515, "step": 1864500 }, { "epoch": 0.13, "learning_rate": 4.79006937516631e-05, "loss": 0.0503, "step": 1865000 }, { "epoch": 0.13, "learning_rate": 4.790013070552947e-05, "loss": 0.0532, "step": 1865500 }, { "epoch": 0.13, "learning_rate": 4.7899567659395825e-05, "loss": 0.0508, "step": 1866000 }, { "epoch": 0.13, "learning_rate": 4.7899004613262195e-05, "loss": 0.056, "step": 1866500 }, { "epoch": 0.13, "learning_rate": 4.789844156712856e-05, "loss": 0.0511, "step": 1867000 }, { "epoch": 0.13, "learning_rate": 4.789787964708719e-05, "loss": 0.0512, "step": 1867500 }, { "epoch": 0.13, "learning_rate": 4.7897316600953556e-05, "loss": 0.0463, "step": 1868000 }, { "epoch": 0.13, "learning_rate": 4.789675355481992e-05, "loss": 0.0558, "step": 1868500 }, { "epoch": 0.13, "learning_rate": 4.7896190508686284e-05, "loss": 0.0522, "step": 1869000 }, { "epoch": 0.13, "learning_rate": 4.789562746255265e-05, "loss": 0.0488, "step": 1869500 }, { "epoch": 0.13, "learning_rate": 4.789506554251128e-05, "loss": 0.0532, "step": 1870000 }, { "epoch": 0.13, "learning_rate": 4.7894502496377644e-05, "loss": 0.0544, "step": 1870500 }, { "epoch": 0.13, "learning_rate": 4.789393945024401e-05, "loss": 0.049, "step": 1871000 }, { "epoch": 0.13, "learning_rate": 4.789337640411037e-05, "loss": 0.0465, "step": 1871500 }, { "epoch": 0.13, "learning_rate": 4.789281335797674e-05, "loss": 0.0572, "step": 1872000 }, { "epoch": 0.13, "learning_rate": 4.78922503118431e-05, "loss": 0.0507, "step": 1872500 }, { "epoch": 0.13, "learning_rate": 4.789168839180174e-05, "loss": 0.0528, "step": 1873000 }, { "epoch": 0.13, "learning_rate": 4.7891125345668096e-05, "loss": 0.0489, "step": 1873500 }, { "epoch": 0.13, "learning_rate": 4.789056229953447e-05, "loss": 0.0479, "step": 1874000 }, { "epoch": 0.13, "learning_rate": 4.788999925340083e-05, "loss": 0.0498, "step": 1874500 }, { "epoch": 0.13, "learning_rate": 4.788943620726719e-05, "loss": 0.0494, "step": 1875000 }, { "epoch": 0.13, "learning_rate": 4.788887316113356e-05, "loss": 0.0532, "step": 1875500 }, { "epoch": 0.13, "learning_rate": 4.7888311241092184e-05, "loss": 0.0536, "step": 1876000 }, { "epoch": 0.13, "learning_rate": 4.7887748194958555e-05, "loss": 0.0526, "step": 1876500 }, { "epoch": 0.13, "learning_rate": 4.788718514882492e-05, "loss": 0.0505, "step": 1877000 }, { "epoch": 0.13, "learning_rate": 4.788662210269129e-05, "loss": 0.0481, "step": 1877500 }, { "epoch": 0.13, "learning_rate": 4.788605905655765e-05, "loss": 0.0471, "step": 1878000 }, { "epoch": 0.13, "learning_rate": 4.7885497136516286e-05, "loss": 0.0497, "step": 1878500 }, { "epoch": 0.13, "learning_rate": 4.788493409038264e-05, "loss": 0.0538, "step": 1879000 }, { "epoch": 0.13, "learning_rate": 4.788437104424901e-05, "loss": 0.0524, "step": 1879500 }, { "epoch": 0.13, "learning_rate": 4.788380799811538e-05, "loss": 0.0492, "step": 1880000 }, { "epoch": 0.13, "learning_rate": 4.7883244951981735e-05, "loss": 0.0497, "step": 1880500 }, { "epoch": 0.13, "learning_rate": 4.7882683031940374e-05, "loss": 0.0497, "step": 1881000 }, { "epoch": 0.13, "learning_rate": 4.788211998580673e-05, "loss": 0.049, "step": 1881500 }, { "epoch": 0.13, "learning_rate": 4.78815569396731e-05, "loss": 0.0508, "step": 1882000 }, { "epoch": 0.13, "learning_rate": 4.7880993893539466e-05, "loss": 0.0504, "step": 1882500 }, { "epoch": 0.13, "learning_rate": 4.788043084740583e-05, "loss": 0.0511, "step": 1883000 }, { "epoch": 0.13, "learning_rate": 4.7879867801272194e-05, "loss": 0.0498, "step": 1883500 }, { "epoch": 0.13, "learning_rate": 4.787930475513856e-05, "loss": 0.0496, "step": 1884000 }, { "epoch": 0.13, "learning_rate": 4.787874170900492e-05, "loss": 0.0494, "step": 1884500 }, { "epoch": 0.13, "learning_rate": 4.7878179788963554e-05, "loss": 0.046, "step": 1885000 }, { "epoch": 0.13, "learning_rate": 4.787761674282992e-05, "loss": 0.0486, "step": 1885500 }, { "epoch": 0.13, "learning_rate": 4.787705369669628e-05, "loss": 0.0563, "step": 1886000 }, { "epoch": 0.13, "learning_rate": 4.787649065056265e-05, "loss": 0.0484, "step": 1886500 }, { "epoch": 0.13, "learning_rate": 4.787592760442901e-05, "loss": 0.0489, "step": 1887000 }, { "epoch": 0.13, "learning_rate": 4.787536568438765e-05, "loss": 0.0476, "step": 1887500 }, { "epoch": 0.13, "learning_rate": 4.7874802638254006e-05, "loss": 0.051, "step": 1888000 }, { "epoch": 0.13, "learning_rate": 4.787423959212037e-05, "loss": 0.0518, "step": 1888500 }, { "epoch": 0.13, "learning_rate": 4.787367654598674e-05, "loss": 0.0503, "step": 1889000 }, { "epoch": 0.13, "learning_rate": 4.7873113499853105e-05, "loss": 0.0512, "step": 1889500 }, { "epoch": 0.13, "learning_rate": 4.787255157981174e-05, "loss": 0.0509, "step": 1890000 }, { "epoch": 0.13, "learning_rate": 4.78719885336781e-05, "loss": 0.0547, "step": 1890500 }, { "epoch": 0.13, "learning_rate": 4.7871425487544465e-05, "loss": 0.0527, "step": 1891000 }, { "epoch": 0.13, "learning_rate": 4.787086244141083e-05, "loss": 0.051, "step": 1891500 }, { "epoch": 0.13, "learning_rate": 4.787030052136946e-05, "loss": 0.0515, "step": 1892000 }, { "epoch": 0.13, "learning_rate": 4.7869737475235826e-05, "loss": 0.0516, "step": 1892500 }, { "epoch": 0.13, "learning_rate": 4.786917442910219e-05, "loss": 0.0529, "step": 1893000 }, { "epoch": 0.13, "learning_rate": 4.7868611382968554e-05, "loss": 0.0534, "step": 1893500 }, { "epoch": 0.13, "learning_rate": 4.786804833683492e-05, "loss": 0.0493, "step": 1894000 }, { "epoch": 0.13, "learning_rate": 4.786748641679355e-05, "loss": 0.0517, "step": 1894500 }, { "epoch": 0.13, "learning_rate": 4.7866923370659914e-05, "loss": 0.0502, "step": 1895000 }, { "epoch": 0.13, "learning_rate": 4.7866360324526285e-05, "loss": 0.0495, "step": 1895500 }, { "epoch": 0.13, "learning_rate": 4.786579727839264e-05, "loss": 0.0539, "step": 1896000 }, { "epoch": 0.13, "learning_rate": 4.786523423225901e-05, "loss": 0.0494, "step": 1896500 }, { "epoch": 0.13, "learning_rate": 4.786467231221764e-05, "loss": 0.0521, "step": 1897000 }, { "epoch": 0.13, "learning_rate": 4.786410926608401e-05, "loss": 0.0525, "step": 1897500 }, { "epoch": 0.13, "learning_rate": 4.786354621995037e-05, "loss": 0.0513, "step": 1898000 }, { "epoch": 0.13, "learning_rate": 4.786298317381673e-05, "loss": 0.0518, "step": 1898500 }, { "epoch": 0.13, "learning_rate": 4.78624201276831e-05, "loss": 0.0488, "step": 1899000 }, { "epoch": 0.13, "learning_rate": 4.7861857081549465e-05, "loss": 0.0523, "step": 1899500 }, { "epoch": 0.13, "learning_rate": 4.786129403541583e-05, "loss": 0.0498, "step": 1900000 }, { "epoch": 0.13, "learning_rate": 4.786073211537446e-05, "loss": 0.0501, "step": 1900500 }, { "epoch": 0.13, "learning_rate": 4.7860169069240825e-05, "loss": 0.0504, "step": 1901000 }, { "epoch": 0.13, "learning_rate": 4.785960602310719e-05, "loss": 0.0508, "step": 1901500 }, { "epoch": 0.13, "learning_rate": 4.785904297697355e-05, "loss": 0.0536, "step": 1902000 }, { "epoch": 0.13, "learning_rate": 4.7858479930839924e-05, "loss": 0.0491, "step": 1902500 }, { "epoch": 0.13, "learning_rate": 4.785791688470628e-05, "loss": 0.0518, "step": 1903000 }, { "epoch": 0.13, "learning_rate": 4.785735383857265e-05, "loss": 0.0542, "step": 1903500 }, { "epoch": 0.13, "learning_rate": 4.785679191853128e-05, "loss": 0.049, "step": 1904000 }, { "epoch": 0.13, "learning_rate": 4.785622887239765e-05, "loss": 0.0494, "step": 1904500 }, { "epoch": 0.13, "learning_rate": 4.785566582626401e-05, "loss": 0.0518, "step": 1905000 }, { "epoch": 0.13, "learning_rate": 4.7855102780130376e-05, "loss": 0.0543, "step": 1905500 }, { "epoch": 0.13, "learning_rate": 4.785453973399674e-05, "loss": 0.052, "step": 1906000 }, { "epoch": 0.13, "learning_rate": 4.785397781395537e-05, "loss": 0.051, "step": 1906500 }, { "epoch": 0.13, "learning_rate": 4.7853414767821736e-05, "loss": 0.047, "step": 1907000 }, { "epoch": 0.13, "learning_rate": 4.78528517216881e-05, "loss": 0.0543, "step": 1907500 }, { "epoch": 0.13, "learning_rate": 4.7852288675554464e-05, "loss": 0.0462, "step": 1908000 }, { "epoch": 0.13, "learning_rate": 4.785172562942083e-05, "loss": 0.0525, "step": 1908500 }, { "epoch": 0.13, "learning_rate": 4.785116370937946e-05, "loss": 0.0499, "step": 1909000 }, { "epoch": 0.13, "learning_rate": 4.7850600663245824e-05, "loss": 0.0494, "step": 1909500 }, { "epoch": 0.13, "learning_rate": 4.7850037617112195e-05, "loss": 0.0511, "step": 1910000 }, { "epoch": 0.13, "learning_rate": 4.784947457097855e-05, "loss": 0.0532, "step": 1910500 }, { "epoch": 0.13, "learning_rate": 4.7848911524844916e-05, "loss": 0.0489, "step": 1911000 }, { "epoch": 0.13, "learning_rate": 4.784834847871129e-05, "loss": 0.0512, "step": 1911500 }, { "epoch": 0.13, "learning_rate": 4.784778655866991e-05, "loss": 0.0515, "step": 1912000 }, { "epoch": 0.13, "learning_rate": 4.784722351253628e-05, "loss": 0.0521, "step": 1912500 }, { "epoch": 0.13, "learning_rate": 4.784666046640264e-05, "loss": 0.0517, "step": 1913000 }, { "epoch": 0.13, "learning_rate": 4.784609742026901e-05, "loss": 0.0525, "step": 1913500 }, { "epoch": 0.13, "learning_rate": 4.7845534374135375e-05, "loss": 0.0483, "step": 1914000 }, { "epoch": 0.13, "learning_rate": 4.784497245409401e-05, "loss": 0.0497, "step": 1914500 }, { "epoch": 0.13, "learning_rate": 4.784440940796037e-05, "loss": 0.0489, "step": 1915000 }, { "epoch": 0.13, "learning_rate": 4.7843846361826735e-05, "loss": 0.0521, "step": 1915500 }, { "epoch": 0.13, "learning_rate": 4.78432833156931e-05, "loss": 0.051, "step": 1916000 }, { "epoch": 0.13, "learning_rate": 4.784272139565173e-05, "loss": 0.0511, "step": 1916500 }, { "epoch": 0.13, "learning_rate": 4.7842158349518096e-05, "loss": 0.0485, "step": 1917000 }, { "epoch": 0.13, "learning_rate": 4.784159530338446e-05, "loss": 0.0523, "step": 1917500 }, { "epoch": 0.13, "learning_rate": 4.784103225725083e-05, "loss": 0.0529, "step": 1918000 }, { "epoch": 0.13, "learning_rate": 4.7840470337209456e-05, "loss": 0.0486, "step": 1918500 }, { "epoch": 0.13, "learning_rate": 4.783990729107583e-05, "loss": 0.052, "step": 1919000 }, { "epoch": 0.13, "learning_rate": 4.7839344244942184e-05, "loss": 0.0569, "step": 1919500 }, { "epoch": 0.13, "learning_rate": 4.7838781198808555e-05, "loss": 0.0529, "step": 1920000 }, { "epoch": 0.13, "learning_rate": 4.783821927876718e-05, "loss": 0.0521, "step": 1920500 }, { "epoch": 0.13, "learning_rate": 4.783765735872582e-05, "loss": 0.0483, "step": 1921000 }, { "epoch": 0.13, "learning_rate": 4.7837094312592176e-05, "loss": 0.0506, "step": 1921500 }, { "epoch": 0.13, "learning_rate": 4.783653126645855e-05, "loss": 0.0479, "step": 1922000 }, { "epoch": 0.13, "learning_rate": 4.783596822032491e-05, "loss": 0.0449, "step": 1922500 }, { "epoch": 0.13, "learning_rate": 4.7835405174191275e-05, "loss": 0.049, "step": 1923000 }, { "epoch": 0.13, "learning_rate": 4.783484212805764e-05, "loss": 0.0507, "step": 1923500 }, { "epoch": 0.13, "learning_rate": 4.7834279081924e-05, "loss": 0.053, "step": 1924000 }, { "epoch": 0.13, "learning_rate": 4.783371603579037e-05, "loss": 0.0521, "step": 1924500 }, { "epoch": 0.13, "learning_rate": 4.7833154115749e-05, "loss": 0.0527, "step": 1925000 }, { "epoch": 0.13, "learning_rate": 4.783259106961536e-05, "loss": 0.0471, "step": 1925500 }, { "epoch": 0.13, "learning_rate": 4.783202802348173e-05, "loss": 0.0505, "step": 1926000 }, { "epoch": 0.13, "learning_rate": 4.78314649773481e-05, "loss": 0.0505, "step": 1926500 }, { "epoch": 0.13, "learning_rate": 4.7830903057306723e-05, "loss": 0.0518, "step": 1927000 }, { "epoch": 0.13, "learning_rate": 4.7830340011173094e-05, "loss": 0.0499, "step": 1927500 }, { "epoch": 0.13, "learning_rate": 4.782977696503945e-05, "loss": 0.0508, "step": 1928000 }, { "epoch": 0.13, "learning_rate": 4.7829213918905815e-05, "loss": 0.0507, "step": 1928500 }, { "epoch": 0.13, "learning_rate": 4.782865199886445e-05, "loss": 0.0502, "step": 1929000 }, { "epoch": 0.13, "learning_rate": 4.782808895273082e-05, "loss": 0.0506, "step": 1929500 }, { "epoch": 0.13, "learning_rate": 4.782752590659718e-05, "loss": 0.0499, "step": 1930000 }, { "epoch": 0.13, "learning_rate": 4.782696286046354e-05, "loss": 0.0538, "step": 1930500 }, { "epoch": 0.13, "learning_rate": 4.782640094042218e-05, "loss": 0.0533, "step": 1931000 }, { "epoch": 0.13, "learning_rate": 4.7825837894288536e-05, "loss": 0.0485, "step": 1931500 }, { "epoch": 0.13, "learning_rate": 4.782527484815491e-05, "loss": 0.0464, "step": 1932000 }, { "epoch": 0.13, "learning_rate": 4.782471180202127e-05, "loss": 0.0512, "step": 1932500 }, { "epoch": 0.13, "learning_rate": 4.78241498819799e-05, "loss": 0.0488, "step": 1933000 }, { "epoch": 0.13, "learning_rate": 4.782358683584627e-05, "loss": 0.0506, "step": 1933500 }, { "epoch": 0.13, "learning_rate": 4.782302378971263e-05, "loss": 0.0529, "step": 1934000 }, { "epoch": 0.13, "learning_rate": 4.7822460743578995e-05, "loss": 0.0529, "step": 1934500 }, { "epoch": 0.13, "learning_rate": 4.782189769744536e-05, "loss": 0.0467, "step": 1935000 }, { "epoch": 0.13, "learning_rate": 4.782133577740399e-05, "loss": 0.0505, "step": 1935500 }, { "epoch": 0.13, "learning_rate": 4.7820772731270355e-05, "loss": 0.0546, "step": 1936000 }, { "epoch": 0.13, "learning_rate": 4.7820209685136726e-05, "loss": 0.0515, "step": 1936500 }, { "epoch": 0.13, "learning_rate": 4.781964663900308e-05, "loss": 0.0471, "step": 1937000 }, { "epoch": 0.13, "learning_rate": 4.7819083592869454e-05, "loss": 0.0496, "step": 1937500 }, { "epoch": 0.13, "learning_rate": 4.781852054673582e-05, "loss": 0.0519, "step": 1938000 }, { "epoch": 0.13, "learning_rate": 4.781795862669445e-05, "loss": 0.0495, "step": 1938500 }, { "epoch": 0.13, "learning_rate": 4.7817395580560814e-05, "loss": 0.048, "step": 1939000 }, { "epoch": 0.13, "learning_rate": 4.781683253442718e-05, "loss": 0.049, "step": 1939500 }, { "epoch": 0.13, "learning_rate": 4.781626948829354e-05, "loss": 0.0521, "step": 1940000 }, { "epoch": 0.13, "learning_rate": 4.7815706442159906e-05, "loss": 0.0539, "step": 1940500 }, { "epoch": 0.13, "learning_rate": 4.781514452211854e-05, "loss": 0.0487, "step": 1941000 }, { "epoch": 0.13, "learning_rate": 4.78145814759849e-05, "loss": 0.0498, "step": 1941500 }, { "epoch": 0.13, "learning_rate": 4.7814018429851266e-05, "loss": 0.0512, "step": 1942000 }, { "epoch": 0.13, "learning_rate": 4.781345538371763e-05, "loss": 0.0493, "step": 1942500 }, { "epoch": 0.13, "learning_rate": 4.781289346367626e-05, "loss": 0.0503, "step": 1943000 }, { "epoch": 0.13, "learning_rate": 4.7812330417542626e-05, "loss": 0.0503, "step": 1943500 }, { "epoch": 0.13, "learning_rate": 4.7811767371409e-05, "loss": 0.0513, "step": 1944000 }, { "epoch": 0.13, "learning_rate": 4.7811204325275354e-05, "loss": 0.0504, "step": 1944500 }, { "epoch": 0.13, "learning_rate": 4.781064127914172e-05, "loss": 0.0514, "step": 1945000 }, { "epoch": 0.13, "learning_rate": 4.781007935910035e-05, "loss": 0.0503, "step": 1945500 }, { "epoch": 0.13, "learning_rate": 4.7809516312966715e-05, "loss": 0.0499, "step": 1946000 }, { "epoch": 0.13, "learning_rate": 4.7808953266833085e-05, "loss": 0.0525, "step": 1946500 }, { "epoch": 0.13, "learning_rate": 4.780839022069944e-05, "loss": 0.0491, "step": 1947000 }, { "epoch": 0.13, "learning_rate": 4.780782830065808e-05, "loss": 0.0518, "step": 1947500 }, { "epoch": 0.13, "learning_rate": 4.780726525452444e-05, "loss": 0.0511, "step": 1948000 }, { "epoch": 0.13, "learning_rate": 4.780670220839081e-05, "loss": 0.0492, "step": 1948500 }, { "epoch": 0.13, "learning_rate": 4.7806139162257174e-05, "loss": 0.0498, "step": 1949000 }, { "epoch": 0.13, "learning_rate": 4.780557611612354e-05, "loss": 0.0526, "step": 1949500 }, { "epoch": 0.13, "learning_rate": 4.780501419608217e-05, "loss": 0.0537, "step": 1950000 }, { "epoch": 0.13, "learning_rate": 4.7804451149948534e-05, "loss": 0.0517, "step": 1950500 }, { "epoch": 0.13, "learning_rate": 4.78038881038149e-05, "loss": 0.0518, "step": 1951000 }, { "epoch": 0.13, "learning_rate": 4.780332505768126e-05, "loss": 0.0508, "step": 1951500 }, { "epoch": 0.13, "learning_rate": 4.7802763137639894e-05, "loss": 0.0545, "step": 1952000 }, { "epoch": 0.13, "learning_rate": 4.780220009150626e-05, "loss": 0.0552, "step": 1952500 }, { "epoch": 0.13, "learning_rate": 4.780163704537263e-05, "loss": 0.0506, "step": 1953000 }, { "epoch": 0.13, "learning_rate": 4.7801075125331254e-05, "loss": 0.052, "step": 1953500 }, { "epoch": 0.13, "learning_rate": 4.7800512079197625e-05, "loss": 0.0497, "step": 1954000 }, { "epoch": 0.13, "learning_rate": 4.779994903306398e-05, "loss": 0.0561, "step": 1954500 }, { "epoch": 0.13, "learning_rate": 4.779938598693035e-05, "loss": 0.0523, "step": 1955000 }, { "epoch": 0.13, "learning_rate": 4.779882294079672e-05, "loss": 0.052, "step": 1955500 }, { "epoch": 0.13, "learning_rate": 4.779825989466308e-05, "loss": 0.0505, "step": 1956000 }, { "epoch": 0.13, "learning_rate": 4.7797696848529445e-05, "loss": 0.0468, "step": 1956500 }, { "epoch": 0.13, "learning_rate": 4.779713380239581e-05, "loss": 0.0539, "step": 1957000 }, { "epoch": 0.13, "learning_rate": 4.779657075626217e-05, "loss": 0.0498, "step": 1957500 }, { "epoch": 0.13, "learning_rate": 4.7796008836220805e-05, "loss": 0.0532, "step": 1958000 }, { "epoch": 0.13, "learning_rate": 4.779544579008717e-05, "loss": 0.0526, "step": 1958500 }, { "epoch": 0.13, "learning_rate": 4.779488274395353e-05, "loss": 0.0536, "step": 1959000 }, { "epoch": 0.13, "learning_rate": 4.77943196978199e-05, "loss": 0.0538, "step": 1959500 }, { "epoch": 0.13, "learning_rate": 4.779375665168626e-05, "loss": 0.0499, "step": 1960000 }, { "epoch": 0.13, "learning_rate": 4.77931947316449e-05, "loss": 0.0477, "step": 1960500 }, { "epoch": 0.13, "learning_rate": 4.779263168551126e-05, "loss": 0.0478, "step": 1961000 }, { "epoch": 0.13, "learning_rate": 4.7792069765469896e-05, "loss": 0.0483, "step": 1961500 }, { "epoch": 0.13, "learning_rate": 4.7791506719336254e-05, "loss": 0.0514, "step": 1962000 }, { "epoch": 0.13, "learning_rate": 4.779094367320262e-05, "loss": 0.0494, "step": 1962500 }, { "epoch": 0.13, "learning_rate": 4.779038062706899e-05, "loss": 0.0528, "step": 1963000 }, { "epoch": 0.13, "learning_rate": 4.7789817580935346e-05, "loss": 0.0504, "step": 1963500 }, { "epoch": 0.13, "learning_rate": 4.7789254534801716e-05, "loss": 0.0467, "step": 1964000 }, { "epoch": 0.13, "learning_rate": 4.778869148866808e-05, "loss": 0.0502, "step": 1964500 }, { "epoch": 0.13, "learning_rate": 4.7788128442534444e-05, "loss": 0.0516, "step": 1965000 }, { "epoch": 0.13, "learning_rate": 4.778756539640081e-05, "loss": 0.0506, "step": 1965500 }, { "epoch": 0.13, "learning_rate": 4.778700347635944e-05, "loss": 0.0512, "step": 1966000 }, { "epoch": 0.13, "learning_rate": 4.7786440430225804e-05, "loss": 0.049, "step": 1966500 }, { "epoch": 0.13, "learning_rate": 4.778587738409217e-05, "loss": 0.0517, "step": 1967000 }, { "epoch": 0.13, "learning_rate": 4.778531433795854e-05, "loss": 0.049, "step": 1967500 }, { "epoch": 0.13, "learning_rate": 4.7784751291824896e-05, "loss": 0.0532, "step": 1968000 }, { "epoch": 0.13, "learning_rate": 4.7784189371783535e-05, "loss": 0.0498, "step": 1968500 }, { "epoch": 0.13, "learning_rate": 4.778362632564989e-05, "loss": 0.0535, "step": 1969000 }, { "epoch": 0.13, "learning_rate": 4.778306440560853e-05, "loss": 0.0525, "step": 1969500 }, { "epoch": 0.13, "learning_rate": 4.778250135947489e-05, "loss": 0.0482, "step": 1970000 }, { "epoch": 0.13, "learning_rate": 4.778193831334126e-05, "loss": 0.0484, "step": 1970500 }, { "epoch": 0.13, "learning_rate": 4.7781375267207624e-05, "loss": 0.0516, "step": 1971000 }, { "epoch": 0.13, "learning_rate": 4.778081222107398e-05, "loss": 0.0482, "step": 1971500 }, { "epoch": 0.13, "learning_rate": 4.778024917494035e-05, "loss": 0.0538, "step": 1972000 }, { "epoch": 0.13, "learning_rate": 4.7779686128806716e-05, "loss": 0.0484, "step": 1972500 }, { "epoch": 0.13, "learning_rate": 4.777912308267308e-05, "loss": 0.0517, "step": 1973000 }, { "epoch": 0.13, "learning_rate": 4.777856116263171e-05, "loss": 0.0537, "step": 1973500 }, { "epoch": 0.13, "learning_rate": 4.7777998116498076e-05, "loss": 0.0493, "step": 1974000 }, { "epoch": 0.13, "learning_rate": 4.777743619645671e-05, "loss": 0.0474, "step": 1974500 }, { "epoch": 0.13, "learning_rate": 4.777687315032307e-05, "loss": 0.0551, "step": 1975000 }, { "epoch": 0.13, "learning_rate": 4.7776310104189436e-05, "loss": 0.0529, "step": 1975500 }, { "epoch": 0.13, "learning_rate": 4.77757470580558e-05, "loss": 0.0478, "step": 1976000 }, { "epoch": 0.13, "learning_rate": 4.7775184011922164e-05, "loss": 0.0529, "step": 1976500 }, { "epoch": 0.13, "learning_rate": 4.777462096578853e-05, "loss": 0.0472, "step": 1977000 }, { "epoch": 0.13, "learning_rate": 4.77740579196549e-05, "loss": 0.051, "step": 1977500 }, { "epoch": 0.13, "learning_rate": 4.777349487352126e-05, "loss": 0.0534, "step": 1978000 }, { "epoch": 0.13, "learning_rate": 4.777293182738763e-05, "loss": 0.0505, "step": 1978500 }, { "epoch": 0.13, "learning_rate": 4.777236878125399e-05, "loss": 0.0543, "step": 1979000 }, { "epoch": 0.13, "learning_rate": 4.7771805735120355e-05, "loss": 0.0479, "step": 1979500 }, { "epoch": 0.13, "learning_rate": 4.777124381507899e-05, "loss": 0.0492, "step": 1980000 }, { "epoch": 0.13, "learning_rate": 4.777068076894535e-05, "loss": 0.0504, "step": 1980500 }, { "epoch": 0.13, "learning_rate": 4.7770117722811715e-05, "loss": 0.0495, "step": 1981000 }, { "epoch": 0.13, "learning_rate": 4.776955467667808e-05, "loss": 0.0481, "step": 1981500 }, { "epoch": 0.13, "learning_rate": 4.776899275663671e-05, "loss": 0.0503, "step": 1982000 }, { "epoch": 0.13, "learning_rate": 4.7768429710503075e-05, "loss": 0.0512, "step": 1982500 }, { "epoch": 0.13, "learning_rate": 4.7767866664369446e-05, "loss": 0.0503, "step": 1983000 }, { "epoch": 0.13, "learning_rate": 4.77673036182358e-05, "loss": 0.0513, "step": 1983500 }, { "epoch": 0.13, "learning_rate": 4.776674057210217e-05, "loss": 0.0532, "step": 1984000 }, { "epoch": 0.13, "learning_rate": 4.776617752596854e-05, "loss": 0.0504, "step": 1984500 }, { "epoch": 0.13, "learning_rate": 4.7765615605927163e-05, "loss": 0.0481, "step": 1985000 }, { "epoch": 0.13, "learning_rate": 4.7765052559793534e-05, "loss": 0.0482, "step": 1985500 }, { "epoch": 0.13, "learning_rate": 4.776448951365989e-05, "loss": 0.0518, "step": 1986000 }, { "epoch": 0.13, "learning_rate": 4.776392646752626e-05, "loss": 0.0499, "step": 1986500 }, { "epoch": 0.13, "learning_rate": 4.776336454748489e-05, "loss": 0.0469, "step": 1987000 }, { "epoch": 0.13, "learning_rate": 4.776280150135126e-05, "loss": 0.0469, "step": 1987500 }, { "epoch": 0.13, "learning_rate": 4.776223845521762e-05, "loss": 0.049, "step": 1988000 }, { "epoch": 0.13, "learning_rate": 4.7761675409083986e-05, "loss": 0.0496, "step": 1988500 }, { "epoch": 0.13, "learning_rate": 4.776111236295035e-05, "loss": 0.0486, "step": 1989000 }, { "epoch": 0.13, "learning_rate": 4.7760549316816714e-05, "loss": 0.0495, "step": 1989500 }, { "epoch": 0.13, "learning_rate": 4.7759987396775347e-05, "loss": 0.0501, "step": 1990000 }, { "epoch": 0.13, "learning_rate": 4.775942435064171e-05, "loss": 0.0533, "step": 1990500 }, { "epoch": 0.13, "learning_rate": 4.7758861304508075e-05, "loss": 0.0509, "step": 1991000 }, { "epoch": 0.13, "learning_rate": 4.775829825837444e-05, "loss": 0.0505, "step": 1991500 }, { "epoch": 0.13, "learning_rate": 4.775773521224081e-05, "loss": 0.047, "step": 1992000 }, { "epoch": 0.13, "learning_rate": 4.775717216610717e-05, "loss": 0.0478, "step": 1992500 }, { "epoch": 0.13, "learning_rate": 4.775660911997353e-05, "loss": 0.0474, "step": 1993000 }, { "epoch": 0.13, "learning_rate": 4.77560460738399e-05, "loss": 0.0516, "step": 1993500 }, { "epoch": 0.13, "learning_rate": 4.775548415379853e-05, "loss": 0.0509, "step": 1994000 }, { "epoch": 0.13, "learning_rate": 4.77549211076649e-05, "loss": 0.0486, "step": 1994500 }, { "epoch": 0.13, "learning_rate": 4.775435806153126e-05, "loss": 0.0502, "step": 1995000 }, { "epoch": 0.13, "learning_rate": 4.7753795015397626e-05, "loss": 0.0493, "step": 1995500 }, { "epoch": 0.13, "learning_rate": 4.775323196926399e-05, "loss": 0.0528, "step": 1996000 }, { "epoch": 0.13, "learning_rate": 4.7752668923130354e-05, "loss": 0.05, "step": 1996500 }, { "epoch": 0.13, "learning_rate": 4.775210587699672e-05, "loss": 0.0535, "step": 1997000 }, { "epoch": 0.13, "learning_rate": 4.775154395695535e-05, "loss": 0.0474, "step": 1997500 }, { "epoch": 0.13, "learning_rate": 4.7750980910821714e-05, "loss": 0.0505, "step": 1998000 }, { "epoch": 0.14, "learning_rate": 4.775041786468808e-05, "loss": 0.0484, "step": 1998500 }, { "epoch": 0.14, "learning_rate": 4.774985481855445e-05, "loss": 0.0507, "step": 1999000 }, { "epoch": 0.14, "learning_rate": 4.7749291772420806e-05, "loss": 0.0499, "step": 1999500 }, { "epoch": 0.14, "learning_rate": 4.7748729852379445e-05, "loss": 0.0499, "step": 2000000 }, { "epoch": 0.14, "learning_rate": 4.77481668062458e-05, "loss": 0.0461, "step": 2000500 }, { "epoch": 0.14, "learning_rate": 4.774760376011217e-05, "loss": 0.0542, "step": 2001000 }, { "epoch": 0.14, "learning_rate": 4.774704071397854e-05, "loss": 0.0536, "step": 2001500 }, { "epoch": 0.14, "learning_rate": 4.774647879393717e-05, "loss": 0.0522, "step": 2002000 }, { "epoch": 0.14, "learning_rate": 4.7745916873895794e-05, "loss": 0.0478, "step": 2002500 }, { "epoch": 0.14, "learning_rate": 4.7745353827762165e-05, "loss": 0.049, "step": 2003000 }, { "epoch": 0.14, "learning_rate": 4.774479078162853e-05, "loss": 0.0464, "step": 2003500 }, { "epoch": 0.14, "learning_rate": 4.7744227735494886e-05, "loss": 0.0514, "step": 2004000 }, { "epoch": 0.14, "learning_rate": 4.774366468936126e-05, "loss": 0.0473, "step": 2004500 }, { "epoch": 0.14, "learning_rate": 4.774310164322762e-05, "loss": 0.0484, "step": 2005000 }, { "epoch": 0.14, "learning_rate": 4.774253859709399e-05, "loss": 0.0489, "step": 2005500 }, { "epoch": 0.14, "learning_rate": 4.774197555096035e-05, "loss": 0.0484, "step": 2006000 }, { "epoch": 0.14, "learning_rate": 4.774141250482671e-05, "loss": 0.0508, "step": 2006500 }, { "epoch": 0.14, "learning_rate": 4.7740849458693084e-05, "loss": 0.0512, "step": 2007000 }, { "epoch": 0.14, "learning_rate": 4.774028753865171e-05, "loss": 0.051, "step": 2007500 }, { "epoch": 0.14, "learning_rate": 4.773972449251808e-05, "loss": 0.0509, "step": 2008000 }, { "epoch": 0.14, "learning_rate": 4.773916144638444e-05, "loss": 0.054, "step": 2008500 }, { "epoch": 0.14, "learning_rate": 4.773859840025081e-05, "loss": 0.0563, "step": 2009000 }, { "epoch": 0.14, "learning_rate": 4.7738036480209433e-05, "loss": 0.0479, "step": 2009500 }, { "epoch": 0.14, "learning_rate": 4.773747456016807e-05, "loss": 0.0524, "step": 2010000 }, { "epoch": 0.14, "learning_rate": 4.773691151403443e-05, "loss": 0.0532, "step": 2010500 }, { "epoch": 0.14, "learning_rate": 4.77363484679008e-05, "loss": 0.0528, "step": 2011000 }, { "epoch": 0.14, "learning_rate": 4.7735785421767164e-05, "loss": 0.0497, "step": 2011500 }, { "epoch": 0.14, "learning_rate": 4.773522237563353e-05, "loss": 0.0466, "step": 2012000 }, { "epoch": 0.14, "learning_rate": 4.773465932949989e-05, "loss": 0.0509, "step": 2012500 }, { "epoch": 0.14, "learning_rate": 4.7734096283366256e-05, "loss": 0.0516, "step": 2013000 }, { "epoch": 0.14, "learning_rate": 4.773353323723262e-05, "loss": 0.05, "step": 2013500 }, { "epoch": 0.14, "learning_rate": 4.7732970191098984e-05, "loss": 0.0496, "step": 2014000 }, { "epoch": 0.14, "learning_rate": 4.7732407144965355e-05, "loss": 0.0504, "step": 2014500 }, { "epoch": 0.14, "learning_rate": 4.773184409883171e-05, "loss": 0.0527, "step": 2015000 }, { "epoch": 0.14, "learning_rate": 4.7731281052698076e-05, "loss": 0.0474, "step": 2015500 }, { "epoch": 0.14, "learning_rate": 4.773072025874898e-05, "loss": 0.0472, "step": 2016000 }, { "epoch": 0.14, "learning_rate": 4.773015721261535e-05, "loss": 0.05, "step": 2016500 }, { "epoch": 0.14, "learning_rate": 4.7729594166481705e-05, "loss": 0.0502, "step": 2017000 }, { "epoch": 0.14, "learning_rate": 4.772903112034807e-05, "loss": 0.0475, "step": 2017500 }, { "epoch": 0.14, "learning_rate": 4.772846807421444e-05, "loss": 0.0474, "step": 2018000 }, { "epoch": 0.14, "learning_rate": 4.772790615417307e-05, "loss": 0.0528, "step": 2018500 }, { "epoch": 0.14, "learning_rate": 4.7727343108039436e-05, "loss": 0.0496, "step": 2019000 }, { "epoch": 0.14, "learning_rate": 4.772678006190579e-05, "loss": 0.048, "step": 2019500 }, { "epoch": 0.14, "learning_rate": 4.772621814186443e-05, "loss": 0.053, "step": 2020000 }, { "epoch": 0.14, "learning_rate": 4.772565509573079e-05, "loss": 0.0499, "step": 2020500 }, { "epoch": 0.14, "learning_rate": 4.772509204959716e-05, "loss": 0.0514, "step": 2021000 }, { "epoch": 0.14, "learning_rate": 4.7724529003463524e-05, "loss": 0.0501, "step": 2021500 }, { "epoch": 0.14, "learning_rate": 4.772396595732989e-05, "loss": 0.052, "step": 2022000 }, { "epoch": 0.14, "learning_rate": 4.772340291119625e-05, "loss": 0.0506, "step": 2022500 }, { "epoch": 0.14, "learning_rate": 4.7722839865062616e-05, "loss": 0.049, "step": 2023000 }, { "epoch": 0.14, "learning_rate": 4.772227681892899e-05, "loss": 0.0501, "step": 2023500 }, { "epoch": 0.14, "learning_rate": 4.772171489888761e-05, "loss": 0.0495, "step": 2024000 }, { "epoch": 0.14, "learning_rate": 4.772115185275398e-05, "loss": 0.0458, "step": 2024500 }, { "epoch": 0.14, "learning_rate": 4.772058880662034e-05, "loss": 0.0493, "step": 2025000 }, { "epoch": 0.14, "learning_rate": 4.772002576048671e-05, "loss": 0.046, "step": 2025500 }, { "epoch": 0.14, "learning_rate": 4.7719463840445336e-05, "loss": 0.0493, "step": 2026000 }, { "epoch": 0.14, "learning_rate": 4.771890079431171e-05, "loss": 0.0509, "step": 2026500 }, { "epoch": 0.14, "learning_rate": 4.771833774817807e-05, "loss": 0.0518, "step": 2027000 }, { "epoch": 0.14, "learning_rate": 4.7717774702044435e-05, "loss": 0.0493, "step": 2027500 }, { "epoch": 0.14, "learning_rate": 4.77172116559108e-05, "loss": 0.0446, "step": 2028000 }, { "epoch": 0.14, "learning_rate": 4.771664973586943e-05, "loss": 0.0472, "step": 2028500 }, { "epoch": 0.14, "learning_rate": 4.7716086689735795e-05, "loss": 0.051, "step": 2029000 }, { "epoch": 0.14, "learning_rate": 4.771552364360216e-05, "loss": 0.0445, "step": 2029500 }, { "epoch": 0.14, "learning_rate": 4.771496059746852e-05, "loss": 0.0502, "step": 2030000 }, { "epoch": 0.14, "learning_rate": 4.7714398677427156e-05, "loss": 0.0498, "step": 2030500 }, { "epoch": 0.14, "learning_rate": 4.771383563129352e-05, "loss": 0.048, "step": 2031000 }, { "epoch": 0.14, "learning_rate": 4.7713272585159884e-05, "loss": 0.0438, "step": 2031500 }, { "epoch": 0.14, "learning_rate": 4.7712709539026254e-05, "loss": 0.0481, "step": 2032000 }, { "epoch": 0.14, "learning_rate": 4.771214649289261e-05, "loss": 0.053, "step": 2032500 }, { "epoch": 0.14, "learning_rate": 4.7711583446758976e-05, "loss": 0.0532, "step": 2033000 }, { "epoch": 0.14, "learning_rate": 4.771102152671761e-05, "loss": 0.0507, "step": 2033500 }, { "epoch": 0.14, "learning_rate": 4.771045848058397e-05, "loss": 0.0459, "step": 2034000 }, { "epoch": 0.14, "learning_rate": 4.770989543445034e-05, "loss": 0.0456, "step": 2034500 }, { "epoch": 0.14, "learning_rate": 4.7709332388316707e-05, "loss": 0.0478, "step": 2035000 }, { "epoch": 0.14, "learning_rate": 4.770877046827534e-05, "loss": 0.0537, "step": 2035500 }, { "epoch": 0.14, "learning_rate": 4.77082074221417e-05, "loss": 0.0486, "step": 2036000 }, { "epoch": 0.14, "learning_rate": 4.770764437600807e-05, "loss": 0.0514, "step": 2036500 }, { "epoch": 0.14, "learning_rate": 4.770708132987443e-05, "loss": 0.0512, "step": 2037000 }, { "epoch": 0.14, "learning_rate": 4.770651940983306e-05, "loss": 0.0493, "step": 2037500 }, { "epoch": 0.14, "learning_rate": 4.770595636369943e-05, "loss": 0.051, "step": 2038000 }, { "epoch": 0.14, "learning_rate": 4.770539331756579e-05, "loss": 0.0547, "step": 2038500 }, { "epoch": 0.14, "learning_rate": 4.7704830271432155e-05, "loss": 0.049, "step": 2039000 }, { "epoch": 0.14, "learning_rate": 4.7704268351390794e-05, "loss": 0.052, "step": 2039500 }, { "epoch": 0.14, "learning_rate": 4.770370643134942e-05, "loss": 0.0496, "step": 2040000 }, { "epoch": 0.14, "learning_rate": 4.770314338521579e-05, "loss": 0.049, "step": 2040500 }, { "epoch": 0.14, "learning_rate": 4.770258033908215e-05, "loss": 0.0505, "step": 2041000 }, { "epoch": 0.14, "learning_rate": 4.770201729294851e-05, "loss": 0.0508, "step": 2041500 }, { "epoch": 0.14, "learning_rate": 4.770145424681488e-05, "loss": 0.0514, "step": 2042000 }, { "epoch": 0.14, "learning_rate": 4.770089120068124e-05, "loss": 0.0482, "step": 2042500 }, { "epoch": 0.14, "learning_rate": 4.770032815454761e-05, "loss": 0.0461, "step": 2043000 }, { "epoch": 0.14, "learning_rate": 4.7699765108413974e-05, "loss": 0.0479, "step": 2043500 }, { "epoch": 0.14, "learning_rate": 4.7699203188372606e-05, "loss": 0.0474, "step": 2044000 }, { "epoch": 0.14, "learning_rate": 4.769864014223897e-05, "loss": 0.0501, "step": 2044500 }, { "epoch": 0.14, "learning_rate": 4.7698077096105334e-05, "loss": 0.0497, "step": 2045000 }, { "epoch": 0.14, "learning_rate": 4.76975140499717e-05, "loss": 0.0468, "step": 2045500 }, { "epoch": 0.14, "learning_rate": 4.769695212993033e-05, "loss": 0.0481, "step": 2046000 }, { "epoch": 0.14, "learning_rate": 4.7696389083796695e-05, "loss": 0.05, "step": 2046500 }, { "epoch": 0.14, "learning_rate": 4.769582603766306e-05, "loss": 0.0526, "step": 2047000 }, { "epoch": 0.14, "learning_rate": 4.769526299152942e-05, "loss": 0.049, "step": 2047500 }, { "epoch": 0.14, "learning_rate": 4.7694699945395787e-05, "loss": 0.0465, "step": 2048000 }, { "epoch": 0.14, "learning_rate": 4.769413802535442e-05, "loss": 0.0504, "step": 2048500 }, { "epoch": 0.14, "learning_rate": 4.769357497922078e-05, "loss": 0.0542, "step": 2049000 }, { "epoch": 0.14, "learning_rate": 4.7693011933087153e-05, "loss": 0.0505, "step": 2049500 }, { "epoch": 0.14, "learning_rate": 4.769244888695351e-05, "loss": 0.05, "step": 2050000 }, { "epoch": 0.14, "learning_rate": 4.769188696691215e-05, "loss": 0.0508, "step": 2050500 }, { "epoch": 0.14, "learning_rate": 4.769132392077851e-05, "loss": 0.0522, "step": 2051000 }, { "epoch": 0.14, "learning_rate": 4.769076087464487e-05, "loss": 0.0484, "step": 2051500 }, { "epoch": 0.14, "learning_rate": 4.769019782851124e-05, "loss": 0.0519, "step": 2052000 }, { "epoch": 0.14, "learning_rate": 4.7689634782377606e-05, "loss": 0.0502, "step": 2052500 }, { "epoch": 0.14, "learning_rate": 4.768907286233624e-05, "loss": 0.0509, "step": 2053000 }, { "epoch": 0.14, "learning_rate": 4.76885098162026e-05, "loss": 0.0478, "step": 2053500 }, { "epoch": 0.14, "learning_rate": 4.7687946770068966e-05, "loss": 0.0487, "step": 2054000 }, { "epoch": 0.14, "learning_rate": 4.768738372393533e-05, "loss": 0.0521, "step": 2054500 }, { "epoch": 0.14, "learning_rate": 4.768682180389396e-05, "loss": 0.0502, "step": 2055000 }, { "epoch": 0.14, "learning_rate": 4.7686258757760326e-05, "loss": 0.0465, "step": 2055500 }, { "epoch": 0.14, "learning_rate": 4.768569571162669e-05, "loss": 0.0534, "step": 2056000 }, { "epoch": 0.14, "learning_rate": 4.7685132665493054e-05, "loss": 0.0474, "step": 2056500 }, { "epoch": 0.14, "learning_rate": 4.768456961935942e-05, "loss": 0.0461, "step": 2057000 }, { "epoch": 0.14, "learning_rate": 4.768400657322579e-05, "loss": 0.047, "step": 2057500 }, { "epoch": 0.14, "learning_rate": 4.7683443527092146e-05, "loss": 0.0468, "step": 2058000 }, { "epoch": 0.14, "learning_rate": 4.7682881607050785e-05, "loss": 0.0485, "step": 2058500 }, { "epoch": 0.14, "learning_rate": 4.768231856091714e-05, "loss": 0.0493, "step": 2059000 }, { "epoch": 0.14, "learning_rate": 4.768175551478351e-05, "loss": 0.0456, "step": 2059500 }, { "epoch": 0.14, "learning_rate": 4.768119246864988e-05, "loss": 0.0468, "step": 2060000 }, { "epoch": 0.14, "learning_rate": 4.768063054860851e-05, "loss": 0.052, "step": 2060500 }, { "epoch": 0.14, "learning_rate": 4.768006750247487e-05, "loss": 0.0507, "step": 2061000 }, { "epoch": 0.14, "learning_rate": 4.767950445634124e-05, "loss": 0.0492, "step": 2061500 }, { "epoch": 0.14, "learning_rate": 4.76789414102076e-05, "loss": 0.0491, "step": 2062000 }, { "epoch": 0.14, "learning_rate": 4.7678379490166233e-05, "loss": 0.0492, "step": 2062500 }, { "epoch": 0.14, "learning_rate": 4.76778164440326e-05, "loss": 0.0535, "step": 2063000 }, { "epoch": 0.14, "learning_rate": 4.767725339789896e-05, "loss": 0.0462, "step": 2063500 }, { "epoch": 0.14, "learning_rate": 4.7676690351765325e-05, "loss": 0.0478, "step": 2064000 }, { "epoch": 0.14, "learning_rate": 4.767612730563169e-05, "loss": 0.0468, "step": 2064500 }, { "epoch": 0.14, "learning_rate": 4.767556538559032e-05, "loss": 0.0472, "step": 2065000 }, { "epoch": 0.14, "learning_rate": 4.7675002339456686e-05, "loss": 0.0495, "step": 2065500 }, { "epoch": 0.14, "learning_rate": 4.7674439293323056e-05, "loss": 0.0494, "step": 2066000 }, { "epoch": 0.14, "learning_rate": 4.7673876247189414e-05, "loss": 0.052, "step": 2066500 }, { "epoch": 0.14, "learning_rate": 4.767331432714805e-05, "loss": 0.0505, "step": 2067000 }, { "epoch": 0.14, "learning_rate": 4.767275128101441e-05, "loss": 0.0517, "step": 2067500 }, { "epoch": 0.14, "learning_rate": 4.7672188234880774e-05, "loss": 0.0512, "step": 2068000 }, { "epoch": 0.14, "learning_rate": 4.7671625188747145e-05, "loss": 0.0488, "step": 2068500 }, { "epoch": 0.14, "learning_rate": 4.767106214261351e-05, "loss": 0.0511, "step": 2069000 }, { "epoch": 0.14, "learning_rate": 4.767050022257214e-05, "loss": 0.0447, "step": 2069500 }, { "epoch": 0.14, "learning_rate": 4.7669937176438505e-05, "loss": 0.0485, "step": 2070000 }, { "epoch": 0.14, "learning_rate": 4.766937413030487e-05, "loss": 0.0496, "step": 2070500 }, { "epoch": 0.14, "learning_rate": 4.766881108417123e-05, "loss": 0.0484, "step": 2071000 }, { "epoch": 0.14, "learning_rate": 4.76682480380376e-05, "loss": 0.0489, "step": 2071500 }, { "epoch": 0.14, "learning_rate": 4.766768499190396e-05, "loss": 0.051, "step": 2072000 }, { "epoch": 0.14, "learning_rate": 4.7667121945770325e-05, "loss": 0.0493, "step": 2072500 }, { "epoch": 0.14, "learning_rate": 4.766656002572896e-05, "loss": 0.0479, "step": 2073000 }, { "epoch": 0.14, "learning_rate": 4.766599697959532e-05, "loss": 0.0523, "step": 2073500 }, { "epoch": 0.14, "learning_rate": 4.766543393346169e-05, "loss": 0.0476, "step": 2074000 }, { "epoch": 0.14, "learning_rate": 4.766487088732805e-05, "loss": 0.0472, "step": 2074500 }, { "epoch": 0.14, "learning_rate": 4.766430784119442e-05, "loss": 0.0477, "step": 2075000 }, { "epoch": 0.14, "learning_rate": 4.7663745921153045e-05, "loss": 0.0487, "step": 2075500 }, { "epoch": 0.14, "learning_rate": 4.7663182875019416e-05, "loss": 0.0458, "step": 2076000 }, { "epoch": 0.14, "learning_rate": 4.766261982888578e-05, "loss": 0.0516, "step": 2076500 }, { "epoch": 0.14, "learning_rate": 4.766205678275214e-05, "loss": 0.0493, "step": 2077000 }, { "epoch": 0.14, "learning_rate": 4.7661494862710776e-05, "loss": 0.0484, "step": 2077500 }, { "epoch": 0.14, "learning_rate": 4.766093181657714e-05, "loss": 0.0496, "step": 2078000 }, { "epoch": 0.14, "learning_rate": 4.7660368770443504e-05, "loss": 0.052, "step": 2078500 }, { "epoch": 0.14, "learning_rate": 4.765980572430987e-05, "loss": 0.0506, "step": 2079000 }, { "epoch": 0.14, "learning_rate": 4.76592438042685e-05, "loss": 0.0508, "step": 2079500 }, { "epoch": 0.14, "learning_rate": 4.765868188422713e-05, "loss": 0.0497, "step": 2080000 }, { "epoch": 0.14, "learning_rate": 4.7658118838093497e-05, "loss": 0.0483, "step": 2080500 }, { "epoch": 0.14, "learning_rate": 4.765755579195986e-05, "loss": 0.0505, "step": 2081000 }, { "epoch": 0.14, "learning_rate": 4.7656992745826225e-05, "loss": 0.0509, "step": 2081500 }, { "epoch": 0.14, "learning_rate": 4.765642969969259e-05, "loss": 0.0488, "step": 2082000 }, { "epoch": 0.14, "learning_rate": 4.765586665355896e-05, "loss": 0.0511, "step": 2082500 }, { "epoch": 0.14, "learning_rate": 4.765530360742532e-05, "loss": 0.0512, "step": 2083000 }, { "epoch": 0.14, "learning_rate": 4.765474056129168e-05, "loss": 0.0514, "step": 2083500 }, { "epoch": 0.14, "learning_rate": 4.765417751515805e-05, "loss": 0.0523, "step": 2084000 }, { "epoch": 0.14, "learning_rate": 4.765361559511668e-05, "loss": 0.048, "step": 2084500 }, { "epoch": 0.14, "learning_rate": 4.765305254898305e-05, "loss": 0.0493, "step": 2085000 }, { "epoch": 0.14, "learning_rate": 4.765248950284941e-05, "loss": 0.051, "step": 2085500 }, { "epoch": 0.14, "learning_rate": 4.7651926456715776e-05, "loss": 0.0528, "step": 2086000 }, { "epoch": 0.14, "learning_rate": 4.765136341058214e-05, "loss": 0.0529, "step": 2086500 }, { "epoch": 0.14, "learning_rate": 4.765080149054077e-05, "loss": 0.0497, "step": 2087000 }, { "epoch": 0.14, "learning_rate": 4.7650239570499404e-05, "loss": 0.0516, "step": 2087500 }, { "epoch": 0.14, "learning_rate": 4.764967652436577e-05, "loss": 0.0484, "step": 2088000 }, { "epoch": 0.14, "learning_rate": 4.764911347823213e-05, "loss": 0.0499, "step": 2088500 }, { "epoch": 0.14, "learning_rate": 4.7648550432098496e-05, "loss": 0.0451, "step": 2089000 }, { "epoch": 0.14, "learning_rate": 4.764798738596486e-05, "loss": 0.0504, "step": 2089500 }, { "epoch": 0.14, "learning_rate": 4.7647424339831224e-05, "loss": 0.0514, "step": 2090000 }, { "epoch": 0.14, "learning_rate": 4.7646861293697595e-05, "loss": 0.0456, "step": 2090500 }, { "epoch": 0.14, "learning_rate": 4.764629824756395e-05, "loss": 0.0487, "step": 2091000 }, { "epoch": 0.14, "learning_rate": 4.764573520143032e-05, "loss": 0.0515, "step": 2091500 }, { "epoch": 0.14, "learning_rate": 4.764517328138895e-05, "loss": 0.0525, "step": 2092000 }, { "epoch": 0.14, "learning_rate": 4.764461023525532e-05, "loss": 0.0511, "step": 2092500 }, { "epoch": 0.14, "learning_rate": 4.764404718912168e-05, "loss": 0.0456, "step": 2093000 }, { "epoch": 0.14, "learning_rate": 4.764348414298804e-05, "loss": 0.0499, "step": 2093500 }, { "epoch": 0.14, "learning_rate": 4.764292222294668e-05, "loss": 0.0516, "step": 2094000 }, { "epoch": 0.14, "learning_rate": 4.7642359176813036e-05, "loss": 0.048, "step": 2094500 }, { "epoch": 0.14, "learning_rate": 4.764179613067941e-05, "loss": 0.0464, "step": 2095000 }, { "epoch": 0.14, "learning_rate": 4.764123308454577e-05, "loss": 0.0495, "step": 2095500 }, { "epoch": 0.14, "learning_rate": 4.7640670038412135e-05, "loss": 0.0492, "step": 2096000 }, { "epoch": 0.14, "learning_rate": 4.764010811837077e-05, "loss": 0.0508, "step": 2096500 }, { "epoch": 0.14, "learning_rate": 4.763954507223713e-05, "loss": 0.0465, "step": 2097000 }, { "epoch": 0.14, "learning_rate": 4.7638982026103495e-05, "loss": 0.048, "step": 2097500 }, { "epoch": 0.14, "learning_rate": 4.763841897996986e-05, "loss": 0.0499, "step": 2098000 }, { "epoch": 0.14, "learning_rate": 4.763785593383623e-05, "loss": 0.0475, "step": 2098500 }, { "epoch": 0.14, "learning_rate": 4.763729288770259e-05, "loss": 0.0463, "step": 2099000 }, { "epoch": 0.14, "learning_rate": 4.7636730967661226e-05, "loss": 0.0517, "step": 2099500 }, { "epoch": 0.14, "learning_rate": 4.7636167921527584e-05, "loss": 0.0495, "step": 2100000 }, { "epoch": 0.14, "learning_rate": 4.7635604875393954e-05, "loss": 0.0488, "step": 2100500 }, { "epoch": 0.14, "learning_rate": 4.763504182926032e-05, "loss": 0.0493, "step": 2101000 }, { "epoch": 0.14, "learning_rate": 4.763447990921895e-05, "loss": 0.0535, "step": 2101500 }, { "epoch": 0.14, "learning_rate": 4.7633916863085314e-05, "loss": 0.0443, "step": 2102000 }, { "epoch": 0.14, "learning_rate": 4.763335381695168e-05, "loss": 0.0534, "step": 2102500 }, { "epoch": 0.14, "learning_rate": 4.763279077081804e-05, "loss": 0.0537, "step": 2103000 }, { "epoch": 0.14, "learning_rate": 4.7632227724684406e-05, "loss": 0.0495, "step": 2103500 }, { "epoch": 0.14, "learning_rate": 4.763166467855077e-05, "loss": 0.0522, "step": 2104000 }, { "epoch": 0.14, "learning_rate": 4.7631101632417134e-05, "loss": 0.0454, "step": 2104500 }, { "epoch": 0.14, "learning_rate": 4.763053971237577e-05, "loss": 0.0507, "step": 2105000 }, { "epoch": 0.14, "learning_rate": 4.762997666624213e-05, "loss": 0.0483, "step": 2105500 }, { "epoch": 0.14, "learning_rate": 4.76294136201085e-05, "loss": 0.0472, "step": 2106000 }, { "epoch": 0.14, "learning_rate": 4.762885057397486e-05, "loss": 0.0479, "step": 2106500 }, { "epoch": 0.14, "learning_rate": 4.76282886539335e-05, "loss": 0.0511, "step": 2107000 }, { "epoch": 0.14, "learning_rate": 4.7627725607799855e-05, "loss": 0.05, "step": 2107500 }, { "epoch": 0.14, "learning_rate": 4.762716256166622e-05, "loss": 0.0483, "step": 2108000 }, { "epoch": 0.14, "learning_rate": 4.762659951553259e-05, "loss": 0.0495, "step": 2108500 }, { "epoch": 0.14, "learning_rate": 4.762603646939895e-05, "loss": 0.0477, "step": 2109000 }, { "epoch": 0.14, "learning_rate": 4.7625474549357586e-05, "loss": 0.0524, "step": 2109500 }, { "epoch": 0.14, "learning_rate": 4.762491150322394e-05, "loss": 0.05, "step": 2110000 }, { "epoch": 0.14, "learning_rate": 4.7624348457090314e-05, "loss": 0.0525, "step": 2110500 }, { "epoch": 0.14, "learning_rate": 4.762378541095668e-05, "loss": 0.0481, "step": 2111000 }, { "epoch": 0.14, "learning_rate": 4.762322236482304e-05, "loss": 0.0544, "step": 2111500 }, { "epoch": 0.14, "learning_rate": 4.7622659318689406e-05, "loss": 0.0479, "step": 2112000 }, { "epoch": 0.14, "learning_rate": 4.762209739864804e-05, "loss": 0.0518, "step": 2112500 }, { "epoch": 0.14, "learning_rate": 4.76215343525144e-05, "loss": 0.0478, "step": 2113000 }, { "epoch": 0.14, "learning_rate": 4.7620971306380766e-05, "loss": 0.0528, "step": 2113500 }, { "epoch": 0.14, "learning_rate": 4.762040826024714e-05, "loss": 0.0489, "step": 2114000 }, { "epoch": 0.14, "learning_rate": 4.7619845214113494e-05, "loss": 0.0501, "step": 2114500 }, { "epoch": 0.14, "learning_rate": 4.761928329407213e-05, "loss": 0.0486, "step": 2115000 }, { "epoch": 0.14, "learning_rate": 4.761872024793849e-05, "loss": 0.047, "step": 2115500 }, { "epoch": 0.14, "learning_rate": 4.761815720180486e-05, "loss": 0.0474, "step": 2116000 }, { "epoch": 0.14, "learning_rate": 4.7617594155671225e-05, "loss": 0.0497, "step": 2116500 }, { "epoch": 0.14, "learning_rate": 4.761703110953758e-05, "loss": 0.0519, "step": 2117000 }, { "epoch": 0.14, "learning_rate": 4.761646806340395e-05, "loss": 0.0536, "step": 2117500 }, { "epoch": 0.14, "learning_rate": 4.7615906143362585e-05, "loss": 0.0504, "step": 2118000 }, { "epoch": 0.14, "learning_rate": 4.761534309722895e-05, "loss": 0.0471, "step": 2118500 }, { "epoch": 0.14, "learning_rate": 4.761478005109531e-05, "loss": 0.0538, "step": 2119000 }, { "epoch": 0.14, "learning_rate": 4.761421700496168e-05, "loss": 0.05, "step": 2119500 }, { "epoch": 0.14, "learning_rate": 4.761365395882804e-05, "loss": 0.0489, "step": 2120000 }, { "epoch": 0.14, "learning_rate": 4.7613092038786673e-05, "loss": 0.0514, "step": 2120500 }, { "epoch": 0.14, "learning_rate": 4.761252899265304e-05, "loss": 0.049, "step": 2121000 }, { "epoch": 0.14, "learning_rate": 4.76119659465194e-05, "loss": 0.0469, "step": 2121500 }, { "epoch": 0.14, "learning_rate": 4.7611402900385765e-05, "loss": 0.0504, "step": 2122000 }, { "epoch": 0.14, "learning_rate": 4.7610840980344404e-05, "loss": 0.0489, "step": 2122500 }, { "epoch": 0.14, "learning_rate": 4.761027793421076e-05, "loss": 0.0479, "step": 2123000 }, { "epoch": 0.14, "learning_rate": 4.7609714888077126e-05, "loss": 0.0446, "step": 2123500 }, { "epoch": 0.14, "learning_rate": 4.7609151841943496e-05, "loss": 0.0482, "step": 2124000 }, { "epoch": 0.14, "learning_rate": 4.7608588795809854e-05, "loss": 0.0479, "step": 2124500 }, { "epoch": 0.14, "learning_rate": 4.760802687576849e-05, "loss": 0.0516, "step": 2125000 }, { "epoch": 0.14, "learning_rate": 4.760746382963485e-05, "loss": 0.0495, "step": 2125500 }, { "epoch": 0.14, "learning_rate": 4.760690078350122e-05, "loss": 0.0499, "step": 2126000 }, { "epoch": 0.14, "learning_rate": 4.7606337737367585e-05, "loss": 0.0477, "step": 2126500 }, { "epoch": 0.14, "learning_rate": 4.760577469123395e-05, "loss": 0.0466, "step": 2127000 }, { "epoch": 0.14, "learning_rate": 4.760521164510031e-05, "loss": 0.051, "step": 2127500 }, { "epoch": 0.14, "learning_rate": 4.7604649725058945e-05, "loss": 0.0523, "step": 2128000 }, { "epoch": 0.14, "learning_rate": 4.760408667892531e-05, "loss": 0.0471, "step": 2128500 }, { "epoch": 0.14, "learning_rate": 4.760352363279167e-05, "loss": 0.0476, "step": 2129000 }, { "epoch": 0.14, "learning_rate": 4.7602960586658044e-05, "loss": 0.0463, "step": 2129500 }, { "epoch": 0.14, "learning_rate": 4.76023975405244e-05, "loss": 0.0522, "step": 2130000 }, { "epoch": 0.14, "learning_rate": 4.760183562048304e-05, "loss": 0.0496, "step": 2130500 }, { "epoch": 0.14, "learning_rate": 4.76012725743494e-05, "loss": 0.0493, "step": 2131000 }, { "epoch": 0.14, "learning_rate": 4.7600710654308036e-05, "loss": 0.0509, "step": 2131500 }, { "epoch": 0.14, "learning_rate": 4.760014760817439e-05, "loss": 0.0499, "step": 2132000 }, { "epoch": 0.14, "learning_rate": 4.7599584562040764e-05, "loss": 0.0514, "step": 2132500 }, { "epoch": 0.14, "learning_rate": 4.759902151590713e-05, "loss": 0.0513, "step": 2133000 }, { "epoch": 0.14, "learning_rate": 4.7598458469773485e-05, "loss": 0.0535, "step": 2133500 }, { "epoch": 0.14, "learning_rate": 4.7597895423639856e-05, "loss": 0.0463, "step": 2134000 }, { "epoch": 0.14, "learning_rate": 4.759733237750622e-05, "loss": 0.0489, "step": 2134500 }, { "epoch": 0.14, "learning_rate": 4.7596769331372584e-05, "loss": 0.0457, "step": 2135000 }, { "epoch": 0.14, "learning_rate": 4.759620628523895e-05, "loss": 0.0504, "step": 2135500 }, { "epoch": 0.14, "learning_rate": 4.759564549128985e-05, "loss": 0.0501, "step": 2136000 }, { "epoch": 0.14, "learning_rate": 4.759508244515621e-05, "loss": 0.0513, "step": 2136500 }, { "epoch": 0.14, "learning_rate": 4.7594519399022576e-05, "loss": 0.0503, "step": 2137000 }, { "epoch": 0.14, "learning_rate": 4.759395635288894e-05, "loss": 0.0458, "step": 2137500 }, { "epoch": 0.14, "learning_rate": 4.7593393306755304e-05, "loss": 0.0511, "step": 2138000 }, { "epoch": 0.14, "learning_rate": 4.7592831386713937e-05, "loss": 0.0484, "step": 2138500 }, { "epoch": 0.14, "learning_rate": 4.75922683405803e-05, "loss": 0.0509, "step": 2139000 }, { "epoch": 0.14, "learning_rate": 4.7591705294446665e-05, "loss": 0.0492, "step": 2139500 }, { "epoch": 0.14, "learning_rate": 4.759114224831303e-05, "loss": 0.0489, "step": 2140000 }, { "epoch": 0.14, "learning_rate": 4.75905792021794e-05, "loss": 0.0554, "step": 2140500 }, { "epoch": 0.14, "learning_rate": 4.759001615604576e-05, "loss": 0.0498, "step": 2141000 }, { "epoch": 0.14, "learning_rate": 4.758945310991213e-05, "loss": 0.05, "step": 2141500 }, { "epoch": 0.14, "learning_rate": 4.758889006377849e-05, "loss": 0.049, "step": 2142000 }, { "epoch": 0.14, "learning_rate": 4.7588327017644855e-05, "loss": 0.0469, "step": 2142500 }, { "epoch": 0.14, "learning_rate": 4.758776397151122e-05, "loss": 0.0481, "step": 2143000 }, { "epoch": 0.14, "learning_rate": 4.758720092537758e-05, "loss": 0.0483, "step": 2143500 }, { "epoch": 0.14, "learning_rate": 4.758663787924395e-05, "loss": 0.0469, "step": 2144000 }, { "epoch": 0.14, "learning_rate": 4.758607595920258e-05, "loss": 0.049, "step": 2144500 }, { "epoch": 0.14, "learning_rate": 4.758551291306895e-05, "loss": 0.0489, "step": 2145000 }, { "epoch": 0.14, "learning_rate": 4.758494986693531e-05, "loss": 0.046, "step": 2145500 }, { "epoch": 0.14, "learning_rate": 4.758438682080167e-05, "loss": 0.0467, "step": 2146000 }, { "epoch": 0.15, "learning_rate": 4.758382377466804e-05, "loss": 0.0488, "step": 2146500 }, { "epoch": 0.15, "learning_rate": 4.75832607285344e-05, "loss": 0.0481, "step": 2147000 }, { "epoch": 0.15, "learning_rate": 4.758269880849304e-05, "loss": 0.0481, "step": 2147500 }, { "epoch": 0.15, "learning_rate": 4.7582135762359396e-05, "loss": 0.0487, "step": 2148000 }, { "epoch": 0.15, "learning_rate": 4.7581572716225766e-05, "loss": 0.0473, "step": 2148500 }, { "epoch": 0.15, "learning_rate": 4.758100967009213e-05, "loss": 0.0481, "step": 2149000 }, { "epoch": 0.15, "learning_rate": 4.7580446623958494e-05, "loss": 0.0473, "step": 2149500 }, { "epoch": 0.15, "learning_rate": 4.757988470391713e-05, "loss": 0.0524, "step": 2150000 }, { "epoch": 0.15, "learning_rate": 4.757932165778349e-05, "loss": 0.0504, "step": 2150500 }, { "epoch": 0.15, "learning_rate": 4.7578758611649855e-05, "loss": 0.0454, "step": 2151000 }, { "epoch": 0.15, "learning_rate": 4.757819556551622e-05, "loss": 0.0477, "step": 2151500 }, { "epoch": 0.15, "learning_rate": 4.757763364547485e-05, "loss": 0.0499, "step": 2152000 }, { "epoch": 0.15, "learning_rate": 4.7577070599341215e-05, "loss": 0.0468, "step": 2152500 }, { "epoch": 0.15, "learning_rate": 4.757650755320758e-05, "loss": 0.0521, "step": 2153000 }, { "epoch": 0.15, "learning_rate": 4.757594450707394e-05, "loss": 0.0451, "step": 2153500 }, { "epoch": 0.15, "learning_rate": 4.7575382587032575e-05, "loss": 0.0493, "step": 2154000 }, { "epoch": 0.15, "learning_rate": 4.757481954089894e-05, "loss": 0.0522, "step": 2154500 }, { "epoch": 0.15, "learning_rate": 4.757425649476531e-05, "loss": 0.0501, "step": 2155000 }, { "epoch": 0.15, "learning_rate": 4.7573693448631674e-05, "loss": 0.0471, "step": 2155500 }, { "epoch": 0.15, "learning_rate": 4.757313040249803e-05, "loss": 0.0462, "step": 2156000 }, { "epoch": 0.15, "learning_rate": 4.757256848245667e-05, "loss": 0.0512, "step": 2156500 }, { "epoch": 0.15, "learning_rate": 4.757200543632303e-05, "loss": 0.0477, "step": 2157000 }, { "epoch": 0.15, "learning_rate": 4.75714423901894e-05, "loss": 0.0488, "step": 2157500 }, { "epoch": 0.15, "learning_rate": 4.757087934405576e-05, "loss": 0.0506, "step": 2158000 }, { "epoch": 0.15, "learning_rate": 4.7570316297922126e-05, "loss": 0.0467, "step": 2158500 }, { "epoch": 0.15, "learning_rate": 4.756975437788076e-05, "loss": 0.0491, "step": 2159000 }, { "epoch": 0.15, "learning_rate": 4.756919133174712e-05, "loss": 0.0461, "step": 2159500 }, { "epoch": 0.15, "learning_rate": 4.7568628285613486e-05, "loss": 0.0501, "step": 2160000 }, { "epoch": 0.15, "learning_rate": 4.756806523947985e-05, "loss": 0.0494, "step": 2160500 }, { "epoch": 0.15, "learning_rate": 4.7567502193346214e-05, "loss": 0.0483, "step": 2161000 }, { "epoch": 0.15, "learning_rate": 4.7566940273304846e-05, "loss": 0.0515, "step": 2161500 }, { "epoch": 0.15, "learning_rate": 4.756637722717121e-05, "loss": 0.0465, "step": 2162000 }, { "epoch": 0.15, "learning_rate": 4.7565814181037574e-05, "loss": 0.0474, "step": 2162500 }, { "epoch": 0.15, "learning_rate": 4.7565251134903945e-05, "loss": 0.0519, "step": 2163000 }, { "epoch": 0.15, "learning_rate": 4.75646880887703e-05, "loss": 0.0471, "step": 2163500 }, { "epoch": 0.15, "learning_rate": 4.756412504263667e-05, "loss": 0.049, "step": 2164000 }, { "epoch": 0.15, "learning_rate": 4.756356199650304e-05, "loss": 0.0509, "step": 2164500 }, { "epoch": 0.15, "learning_rate": 4.756300007646167e-05, "loss": 0.0483, "step": 2165000 }, { "epoch": 0.15, "learning_rate": 4.756243703032803e-05, "loss": 0.0487, "step": 2165500 }, { "epoch": 0.15, "learning_rate": 4.756187398419439e-05, "loss": 0.0511, "step": 2166000 }, { "epoch": 0.15, "learning_rate": 4.756131093806076e-05, "loss": 0.0484, "step": 2166500 }, { "epoch": 0.15, "learning_rate": 4.7560747891927125e-05, "loss": 0.049, "step": 2167000 }, { "epoch": 0.15, "learning_rate": 4.756018597188576e-05, "loss": 0.0496, "step": 2167500 }, { "epoch": 0.15, "learning_rate": 4.755962292575212e-05, "loss": 0.0492, "step": 2168000 }, { "epoch": 0.15, "learning_rate": 4.755905987961849e-05, "loss": 0.0508, "step": 2168500 }, { "epoch": 0.15, "learning_rate": 4.755849683348485e-05, "loss": 0.0476, "step": 2169000 }, { "epoch": 0.15, "learning_rate": 4.755793491344349e-05, "loss": 0.048, "step": 2169500 }, { "epoch": 0.15, "learning_rate": 4.7557371867309846e-05, "loss": 0.0494, "step": 2170000 }, { "epoch": 0.15, "learning_rate": 4.755680882117621e-05, "loss": 0.0532, "step": 2170500 }, { "epoch": 0.15, "learning_rate": 4.755624577504258e-05, "loss": 0.0448, "step": 2171000 }, { "epoch": 0.15, "learning_rate": 4.755568385500121e-05, "loss": 0.0475, "step": 2171500 }, { "epoch": 0.15, "learning_rate": 4.755512080886758e-05, "loss": 0.0465, "step": 2172000 }, { "epoch": 0.15, "learning_rate": 4.7554557762733934e-05, "loss": 0.0475, "step": 2172500 }, { "epoch": 0.15, "learning_rate": 4.7553994716600305e-05, "loss": 0.0482, "step": 2173000 }, { "epoch": 0.15, "learning_rate": 4.755343167046667e-05, "loss": 0.0464, "step": 2173500 }, { "epoch": 0.15, "learning_rate": 4.755286862433303e-05, "loss": 0.0482, "step": 2174000 }, { "epoch": 0.15, "learning_rate": 4.7552306704291665e-05, "loss": 0.0459, "step": 2174500 }, { "epoch": 0.15, "learning_rate": 4.755174365815803e-05, "loss": 0.0453, "step": 2175000 }, { "epoch": 0.15, "learning_rate": 4.755118061202439e-05, "loss": 0.05, "step": 2175500 }, { "epoch": 0.15, "learning_rate": 4.755061756589076e-05, "loss": 0.048, "step": 2176000 }, { "epoch": 0.15, "learning_rate": 4.755005564584939e-05, "loss": 0.0458, "step": 2176500 }, { "epoch": 0.15, "learning_rate": 4.754949259971575e-05, "loss": 0.0515, "step": 2177000 }, { "epoch": 0.15, "learning_rate": 4.754892955358212e-05, "loss": 0.0486, "step": 2177500 }, { "epoch": 0.15, "learning_rate": 4.754836650744848e-05, "loss": 0.0513, "step": 2178000 }, { "epoch": 0.15, "learning_rate": 4.754780346131485e-05, "loss": 0.0519, "step": 2178500 }, { "epoch": 0.15, "learning_rate": 4.754724154127348e-05, "loss": 0.0521, "step": 2179000 }, { "epoch": 0.15, "learning_rate": 4.754667849513985e-05, "loss": 0.0479, "step": 2179500 }, { "epoch": 0.15, "learning_rate": 4.7546115449006205e-05, "loss": 0.0506, "step": 2180000 }, { "epoch": 0.15, "learning_rate": 4.7545552402872576e-05, "loss": 0.0512, "step": 2180500 }, { "epoch": 0.15, "learning_rate": 4.754498935673894e-05, "loss": 0.0455, "step": 2181000 }, { "epoch": 0.15, "learning_rate": 4.754442743669757e-05, "loss": 0.0495, "step": 2181500 }, { "epoch": 0.15, "learning_rate": 4.7543864390563936e-05, "loss": 0.0497, "step": 2182000 }, { "epoch": 0.15, "learning_rate": 4.7543301344430294e-05, "loss": 0.049, "step": 2182500 }, { "epoch": 0.15, "learning_rate": 4.754273942438893e-05, "loss": 0.0505, "step": 2183000 }, { "epoch": 0.15, "learning_rate": 4.7542176378255297e-05, "loss": 0.0471, "step": 2183500 }, { "epoch": 0.15, "learning_rate": 4.754161333212166e-05, "loss": 0.0475, "step": 2184000 }, { "epoch": 0.15, "learning_rate": 4.7541050285988025e-05, "loss": 0.0489, "step": 2184500 }, { "epoch": 0.15, "learning_rate": 4.7540487239854395e-05, "loss": 0.0456, "step": 2185000 }, { "epoch": 0.15, "learning_rate": 4.753992419372075e-05, "loss": 0.0476, "step": 2185500 }, { "epoch": 0.15, "learning_rate": 4.7539361147587116e-05, "loss": 0.0491, "step": 2186000 }, { "epoch": 0.15, "learning_rate": 4.753879810145349e-05, "loss": 0.0495, "step": 2186500 }, { "epoch": 0.15, "learning_rate": 4.753823618141211e-05, "loss": 0.0463, "step": 2187000 }, { "epoch": 0.15, "learning_rate": 4.7537673135278483e-05, "loss": 0.0481, "step": 2187500 }, { "epoch": 0.15, "learning_rate": 4.753711008914484e-05, "loss": 0.0496, "step": 2188000 }, { "epoch": 0.15, "learning_rate": 4.753654816910348e-05, "loss": 0.0463, "step": 2188500 }, { "epoch": 0.15, "learning_rate": 4.753598512296984e-05, "loss": 0.0495, "step": 2189000 }, { "epoch": 0.15, "learning_rate": 4.753542207683621e-05, "loss": 0.0475, "step": 2189500 }, { "epoch": 0.15, "learning_rate": 4.753485903070257e-05, "loss": 0.0502, "step": 2190000 }, { "epoch": 0.15, "learning_rate": 4.7534295984568936e-05, "loss": 0.048, "step": 2190500 }, { "epoch": 0.15, "learning_rate": 4.75337329384353e-05, "loss": 0.0486, "step": 2191000 }, { "epoch": 0.15, "learning_rate": 4.7533169892301664e-05, "loss": 0.0519, "step": 2191500 }, { "epoch": 0.15, "learning_rate": 4.753260684616803e-05, "loss": 0.0499, "step": 2192000 }, { "epoch": 0.15, "learning_rate": 4.753204492612666e-05, "loss": 0.0496, "step": 2192500 }, { "epoch": 0.15, "learning_rate": 4.7531481879993024e-05, "loss": 0.0467, "step": 2193000 }, { "epoch": 0.15, "learning_rate": 4.753091883385939e-05, "loss": 0.0483, "step": 2193500 }, { "epoch": 0.15, "learning_rate": 4.753035578772576e-05, "loss": 0.051, "step": 2194000 }, { "epoch": 0.15, "learning_rate": 4.7529792741592116e-05, "loss": 0.0482, "step": 2194500 }, { "epoch": 0.15, "learning_rate": 4.7529230821550755e-05, "loss": 0.0488, "step": 2195000 }, { "epoch": 0.15, "learning_rate": 4.752866777541711e-05, "loss": 0.0457, "step": 2195500 }, { "epoch": 0.15, "learning_rate": 4.7528104729283476e-05, "loss": 0.049, "step": 2196000 }, { "epoch": 0.15, "learning_rate": 4.752754168314985e-05, "loss": 0.051, "step": 2196500 }, { "epoch": 0.15, "learning_rate": 4.752697976310848e-05, "loss": 0.0497, "step": 2197000 }, { "epoch": 0.15, "learning_rate": 4.752641671697484e-05, "loss": 0.0476, "step": 2197500 }, { "epoch": 0.15, "learning_rate": 4.752585367084121e-05, "loss": 0.0532, "step": 2198000 }, { "epoch": 0.15, "learning_rate": 4.752529062470757e-05, "loss": 0.0508, "step": 2198500 }, { "epoch": 0.15, "learning_rate": 4.7524727578573935e-05, "loss": 0.0452, "step": 2199000 }, { "epoch": 0.15, "learning_rate": 4.752416565853257e-05, "loss": 0.0458, "step": 2199500 }, { "epoch": 0.15, "learning_rate": 4.752360261239893e-05, "loss": 0.0488, "step": 2200000 }, { "epoch": 0.15, "learning_rate": 4.7523039566265295e-05, "loss": 0.0491, "step": 2200500 }, { "epoch": 0.15, "learning_rate": 4.752247652013166e-05, "loss": 0.0487, "step": 2201000 }, { "epoch": 0.15, "learning_rate": 4.752191347399802e-05, "loss": 0.0488, "step": 2201500 }, { "epoch": 0.15, "learning_rate": 4.7521350427864394e-05, "loss": 0.0485, "step": 2202000 }, { "epoch": 0.15, "learning_rate": 4.752078850782302e-05, "loss": 0.0518, "step": 2202500 }, { "epoch": 0.15, "learning_rate": 4.752022546168939e-05, "loss": 0.0495, "step": 2203000 }, { "epoch": 0.15, "learning_rate": 4.751966241555575e-05, "loss": 0.051, "step": 2203500 }, { "epoch": 0.15, "learning_rate": 4.751909936942212e-05, "loss": 0.0488, "step": 2204000 }, { "epoch": 0.15, "learning_rate": 4.751853632328848e-05, "loss": 0.0448, "step": 2204500 }, { "epoch": 0.15, "learning_rate": 4.7517974403247114e-05, "loss": 0.046, "step": 2205000 }, { "epoch": 0.15, "learning_rate": 4.751741135711348e-05, "loss": 0.0469, "step": 2205500 }, { "epoch": 0.15, "learning_rate": 4.751684831097984e-05, "loss": 0.0486, "step": 2206000 }, { "epoch": 0.15, "learning_rate": 4.7516285264846206e-05, "loss": 0.0454, "step": 2206500 }, { "epoch": 0.15, "learning_rate": 4.751572334480484e-05, "loss": 0.0479, "step": 2207000 }, { "epoch": 0.15, "learning_rate": 4.75151602986712e-05, "loss": 0.0473, "step": 2207500 }, { "epoch": 0.15, "learning_rate": 4.7514597252537567e-05, "loss": 0.0474, "step": 2208000 }, { "epoch": 0.15, "learning_rate": 4.751403420640393e-05, "loss": 0.0492, "step": 2208500 }, { "epoch": 0.15, "learning_rate": 4.751347228636256e-05, "loss": 0.0506, "step": 2209000 }, { "epoch": 0.15, "learning_rate": 4.751290924022893e-05, "loss": 0.0491, "step": 2209500 }, { "epoch": 0.15, "learning_rate": 4.751234619409529e-05, "loss": 0.0492, "step": 2210000 }, { "epoch": 0.15, "learning_rate": 4.751178314796166e-05, "loss": 0.0468, "step": 2210500 }, { "epoch": 0.15, "learning_rate": 4.751122010182802e-05, "loss": 0.0447, "step": 2211000 }, { "epoch": 0.15, "learning_rate": 4.751065705569438e-05, "loss": 0.0463, "step": 2211500 }, { "epoch": 0.15, "learning_rate": 4.7510094009560754e-05, "loss": 0.0482, "step": 2212000 }, { "epoch": 0.15, "learning_rate": 4.750953096342712e-05, "loss": 0.049, "step": 2212500 }, { "epoch": 0.15, "learning_rate": 4.750896904338575e-05, "loss": 0.0477, "step": 2213000 }, { "epoch": 0.15, "learning_rate": 4.7508405997252114e-05, "loss": 0.0455, "step": 2213500 }, { "epoch": 0.15, "learning_rate": 4.750784295111848e-05, "loss": 0.0482, "step": 2214000 }, { "epoch": 0.15, "learning_rate": 4.750727990498484e-05, "loss": 0.0479, "step": 2214500 }, { "epoch": 0.15, "learning_rate": 4.7506716858851206e-05, "loss": 0.0477, "step": 2215000 }, { "epoch": 0.15, "learning_rate": 4.750615493880984e-05, "loss": 0.0526, "step": 2215500 }, { "epoch": 0.15, "learning_rate": 4.75055918926762e-05, "loss": 0.052, "step": 2216000 }, { "epoch": 0.15, "learning_rate": 4.7505028846542566e-05, "loss": 0.0497, "step": 2216500 }, { "epoch": 0.15, "learning_rate": 4.750446580040893e-05, "loss": 0.0506, "step": 2217000 }, { "epoch": 0.15, "learning_rate": 4.750390388036756e-05, "loss": 0.054, "step": 2217500 }, { "epoch": 0.15, "learning_rate": 4.7503340834233926e-05, "loss": 0.0497, "step": 2218000 }, { "epoch": 0.15, "learning_rate": 4.75027777881003e-05, "loss": 0.0465, "step": 2218500 }, { "epoch": 0.15, "learning_rate": 4.7502214741966654e-05, "loss": 0.0469, "step": 2219000 }, { "epoch": 0.15, "learning_rate": 4.7501651695833025e-05, "loss": 0.0515, "step": 2219500 }, { "epoch": 0.15, "learning_rate": 4.750108864969939e-05, "loss": 0.0493, "step": 2220000 }, { "epoch": 0.15, "learning_rate": 4.750052672965802e-05, "loss": 0.0481, "step": 2220500 }, { "epoch": 0.15, "learning_rate": 4.7499963683524385e-05, "loss": 0.0454, "step": 2221000 }, { "epoch": 0.15, "learning_rate": 4.749940063739074e-05, "loss": 0.0508, "step": 2221500 }, { "epoch": 0.15, "learning_rate": 4.749883759125711e-05, "loss": 0.0502, "step": 2222000 }, { "epoch": 0.15, "learning_rate": 4.749827454512348e-05, "loss": 0.0502, "step": 2222500 }, { "epoch": 0.15, "learning_rate": 4.749771262508211e-05, "loss": 0.0456, "step": 2223000 }, { "epoch": 0.15, "learning_rate": 4.749714957894847e-05, "loss": 0.0478, "step": 2223500 }, { "epoch": 0.15, "learning_rate": 4.749658653281484e-05, "loss": 0.0453, "step": 2224000 }, { "epoch": 0.15, "learning_rate": 4.74960234866812e-05, "loss": 0.0513, "step": 2224500 }, { "epoch": 0.15, "learning_rate": 4.7495460440547565e-05, "loss": 0.0492, "step": 2225000 }, { "epoch": 0.15, "learning_rate": 4.7494897394413936e-05, "loss": 0.0457, "step": 2225500 }, { "epoch": 0.15, "learning_rate": 4.749433547437256e-05, "loss": 0.0475, "step": 2226000 }, { "epoch": 0.15, "learning_rate": 4.749377242823893e-05, "loss": 0.0505, "step": 2226500 }, { "epoch": 0.15, "learning_rate": 4.749320938210529e-05, "loss": 0.0471, "step": 2227000 }, { "epoch": 0.15, "learning_rate": 4.749264633597166e-05, "loss": 0.0488, "step": 2227500 }, { "epoch": 0.15, "learning_rate": 4.7492084415930286e-05, "loss": 0.0483, "step": 2228000 }, { "epoch": 0.15, "learning_rate": 4.7491521369796656e-05, "loss": 0.0505, "step": 2228500 }, { "epoch": 0.15, "learning_rate": 4.749095832366302e-05, "loss": 0.0549, "step": 2229000 }, { "epoch": 0.15, "learning_rate": 4.7490395277529384e-05, "loss": 0.0485, "step": 2229500 }, { "epoch": 0.15, "learning_rate": 4.748983223139575e-05, "loss": 0.0511, "step": 2230000 }, { "epoch": 0.15, "learning_rate": 4.748927031135438e-05, "loss": 0.0446, "step": 2230500 }, { "epoch": 0.15, "learning_rate": 4.7488707265220745e-05, "loss": 0.0457, "step": 2231000 }, { "epoch": 0.15, "learning_rate": 4.748814421908711e-05, "loss": 0.0468, "step": 2231500 }, { "epoch": 0.15, "learning_rate": 4.748758117295347e-05, "loss": 0.0459, "step": 2232000 }, { "epoch": 0.15, "learning_rate": 4.7487019252912105e-05, "loss": 0.0475, "step": 2232500 }, { "epoch": 0.15, "learning_rate": 4.748645620677847e-05, "loss": 0.0497, "step": 2233000 }, { "epoch": 0.15, "learning_rate": 4.748589316064483e-05, "loss": 0.0493, "step": 2233500 }, { "epoch": 0.15, "learning_rate": 4.7485330114511204e-05, "loss": 0.0504, "step": 2234000 }, { "epoch": 0.15, "learning_rate": 4.748476819446983e-05, "loss": 0.0479, "step": 2234500 }, { "epoch": 0.15, "learning_rate": 4.74842051483362e-05, "loss": 0.0473, "step": 2235000 }, { "epoch": 0.15, "learning_rate": 4.748364210220256e-05, "loss": 0.0488, "step": 2235500 }, { "epoch": 0.15, "learning_rate": 4.748307905606892e-05, "loss": 0.0468, "step": 2236000 }, { "epoch": 0.15, "learning_rate": 4.748251713602755e-05, "loss": 0.0494, "step": 2236500 }, { "epoch": 0.15, "learning_rate": 4.7481954089893924e-05, "loss": 0.0521, "step": 2237000 }, { "epoch": 0.15, "learning_rate": 4.748139104376029e-05, "loss": 0.0482, "step": 2237500 }, { "epoch": 0.15, "learning_rate": 4.7480827997626645e-05, "loss": 0.0502, "step": 2238000 }, { "epoch": 0.15, "learning_rate": 4.7480264951493016e-05, "loss": 0.0472, "step": 2238500 }, { "epoch": 0.15, "learning_rate": 4.747970190535938e-05, "loss": 0.0527, "step": 2239000 }, { "epoch": 0.15, "learning_rate": 4.7479138859225744e-05, "loss": 0.0478, "step": 2239500 }, { "epoch": 0.15, "learning_rate": 4.747857806527664e-05, "loss": 0.0516, "step": 2240000 }, { "epoch": 0.15, "learning_rate": 4.747801501914301e-05, "loss": 0.0449, "step": 2240500 }, { "epoch": 0.15, "learning_rate": 4.747745197300937e-05, "loss": 0.0439, "step": 2241000 }, { "epoch": 0.15, "learning_rate": 4.7476888926875736e-05, "loss": 0.049, "step": 2241500 }, { "epoch": 0.15, "learning_rate": 4.74763258807421e-05, "loss": 0.0496, "step": 2242000 }, { "epoch": 0.15, "learning_rate": 4.7475762834608464e-05, "loss": 0.0483, "step": 2242500 }, { "epoch": 0.15, "learning_rate": 4.7475199788474835e-05, "loss": 0.054, "step": 2243000 }, { "epoch": 0.15, "learning_rate": 4.747463674234119e-05, "loss": 0.0454, "step": 2243500 }, { "epoch": 0.15, "learning_rate": 4.747407369620756e-05, "loss": 0.0543, "step": 2244000 }, { "epoch": 0.15, "learning_rate": 4.747351177616619e-05, "loss": 0.047, "step": 2244500 }, { "epoch": 0.15, "learning_rate": 4.747294873003256e-05, "loss": 0.0475, "step": 2245000 }, { "epoch": 0.15, "learning_rate": 4.7472385683898923e-05, "loss": 0.0479, "step": 2245500 }, { "epoch": 0.15, "learning_rate": 4.747182263776529e-05, "loss": 0.0488, "step": 2246000 }, { "epoch": 0.15, "learning_rate": 4.747126071772392e-05, "loss": 0.0504, "step": 2246500 }, { "epoch": 0.15, "learning_rate": 4.7470697671590284e-05, "loss": 0.0519, "step": 2247000 }, { "epoch": 0.15, "learning_rate": 4.747013462545665e-05, "loss": 0.0458, "step": 2247500 }, { "epoch": 0.15, "learning_rate": 4.746957157932301e-05, "loss": 0.0473, "step": 2248000 }, { "epoch": 0.15, "learning_rate": 4.7469008533189376e-05, "loss": 0.05, "step": 2248500 }, { "epoch": 0.15, "learning_rate": 4.746844548705574e-05, "loss": 0.0469, "step": 2249000 }, { "epoch": 0.15, "learning_rate": 4.7467882440922104e-05, "loss": 0.0446, "step": 2249500 }, { "epoch": 0.15, "learning_rate": 4.746731939478847e-05, "loss": 0.0436, "step": 2250000 }, { "epoch": 0.15, "learning_rate": 4.7466757474747107e-05, "loss": 0.0477, "step": 2250500 }, { "epoch": 0.15, "learning_rate": 4.7466194428613464e-05, "loss": 0.0438, "step": 2251000 }, { "epoch": 0.15, "learning_rate": 4.746563138247983e-05, "loss": 0.0451, "step": 2251500 }, { "epoch": 0.15, "learning_rate": 4.74650683363462e-05, "loss": 0.0448, "step": 2252000 }, { "epoch": 0.15, "learning_rate": 4.7464506416304824e-05, "loss": 0.0487, "step": 2252500 }, { "epoch": 0.15, "learning_rate": 4.7463943370171195e-05, "loss": 0.0468, "step": 2253000 }, { "epoch": 0.15, "learning_rate": 4.746338032403755e-05, "loss": 0.051, "step": 2253500 }, { "epoch": 0.15, "learning_rate": 4.746281727790392e-05, "loss": 0.0491, "step": 2254000 }, { "epoch": 0.15, "learning_rate": 4.746225535786255e-05, "loss": 0.0457, "step": 2254500 }, { "epoch": 0.15, "learning_rate": 4.746169231172892e-05, "loss": 0.0504, "step": 2255000 }, { "epoch": 0.15, "learning_rate": 4.746112926559528e-05, "loss": 0.0455, "step": 2255500 }, { "epoch": 0.15, "learning_rate": 4.746056621946165e-05, "loss": 0.0472, "step": 2256000 }, { "epoch": 0.15, "learning_rate": 4.746000429942028e-05, "loss": 0.0465, "step": 2256500 }, { "epoch": 0.15, "learning_rate": 4.745944125328664e-05, "loss": 0.0521, "step": 2257000 }, { "epoch": 0.15, "learning_rate": 4.745887820715301e-05, "loss": 0.0437, "step": 2257500 }, { "epoch": 0.15, "learning_rate": 4.745831516101937e-05, "loss": 0.0488, "step": 2258000 }, { "epoch": 0.15, "learning_rate": 4.745775211488574e-05, "loss": 0.0482, "step": 2258500 }, { "epoch": 0.15, "learning_rate": 4.745719019484437e-05, "loss": 0.0448, "step": 2259000 }, { "epoch": 0.15, "learning_rate": 4.745662714871074e-05, "loss": 0.0434, "step": 2259500 }, { "epoch": 0.15, "learning_rate": 4.7456064102577095e-05, "loss": 0.044, "step": 2260000 }, { "epoch": 0.15, "learning_rate": 4.7455501056443466e-05, "loss": 0.045, "step": 2260500 }, { "epoch": 0.15, "learning_rate": 4.745493801030983e-05, "loss": 0.0478, "step": 2261000 }, { "epoch": 0.15, "learning_rate": 4.745437496417619e-05, "loss": 0.0498, "step": 2261500 }, { "epoch": 0.15, "learning_rate": 4.7453813044134826e-05, "loss": 0.0447, "step": 2262000 }, { "epoch": 0.15, "learning_rate": 4.7453249998001184e-05, "loss": 0.0505, "step": 2262500 }, { "epoch": 0.15, "learning_rate": 4.7452686951867554e-05, "loss": 0.0489, "step": 2263000 }, { "epoch": 0.15, "learning_rate": 4.745212390573392e-05, "loss": 0.0502, "step": 2263500 }, { "epoch": 0.15, "learning_rate": 4.745156085960028e-05, "loss": 0.0471, "step": 2264000 }, { "epoch": 0.15, "learning_rate": 4.7450998939558915e-05, "loss": 0.0471, "step": 2264500 }, { "epoch": 0.15, "learning_rate": 4.745043589342528e-05, "loss": 0.0501, "step": 2265000 }, { "epoch": 0.15, "learning_rate": 4.744987284729164e-05, "loss": 0.0495, "step": 2265500 }, { "epoch": 0.15, "learning_rate": 4.7449309801158007e-05, "loss": 0.0489, "step": 2266000 }, { "epoch": 0.15, "learning_rate": 4.744874788111664e-05, "loss": 0.0501, "step": 2266500 }, { "epoch": 0.15, "learning_rate": 4.7448184834983e-05, "loss": 0.0484, "step": 2267000 }, { "epoch": 0.15, "learning_rate": 4.7447622914941635e-05, "loss": 0.0457, "step": 2267500 }, { "epoch": 0.15, "learning_rate": 4.7447059868808006e-05, "loss": 0.0472, "step": 2268000 }, { "epoch": 0.15, "learning_rate": 4.744649682267436e-05, "loss": 0.0492, "step": 2268500 }, { "epoch": 0.15, "learning_rate": 4.744593377654073e-05, "loss": 0.0463, "step": 2269000 }, { "epoch": 0.15, "learning_rate": 4.74453707304071e-05, "loss": 0.0475, "step": 2269500 }, { "epoch": 0.15, "learning_rate": 4.7444807684273455e-05, "loss": 0.0523, "step": 2270000 }, { "epoch": 0.15, "learning_rate": 4.7444244638139826e-05, "loss": 0.0478, "step": 2270500 }, { "epoch": 0.15, "learning_rate": 4.744368159200619e-05, "loss": 0.0455, "step": 2271000 }, { "epoch": 0.15, "learning_rate": 4.7443118545872554e-05, "loss": 0.0467, "step": 2271500 }, { "epoch": 0.15, "learning_rate": 4.744255549973892e-05, "loss": 0.0494, "step": 2272000 }, { "epoch": 0.15, "learning_rate": 4.744199245360528e-05, "loss": 0.0491, "step": 2272500 }, { "epoch": 0.15, "learning_rate": 4.744142940747165e-05, "loss": 0.0476, "step": 2273000 }, { "epoch": 0.15, "learning_rate": 4.744086748743028e-05, "loss": 0.0466, "step": 2273500 }, { "epoch": 0.15, "learning_rate": 4.744030556738891e-05, "loss": 0.049, "step": 2274000 }, { "epoch": 0.15, "learning_rate": 4.7439742521255274e-05, "loss": 0.0509, "step": 2274500 }, { "epoch": 0.15, "learning_rate": 4.7439179475121645e-05, "loss": 0.0476, "step": 2275000 }, { "epoch": 0.15, "learning_rate": 4.7438616428988e-05, "loss": 0.047, "step": 2275500 }, { "epoch": 0.15, "learning_rate": 4.7438053382854366e-05, "loss": 0.0487, "step": 2276000 }, { "epoch": 0.15, "learning_rate": 4.743749033672074e-05, "loss": 0.0471, "step": 2276500 }, { "epoch": 0.15, "learning_rate": 4.743692841667937e-05, "loss": 0.0503, "step": 2277000 }, { "epoch": 0.15, "learning_rate": 4.743636537054573e-05, "loss": 0.05, "step": 2277500 }, { "epoch": 0.15, "learning_rate": 4.743580232441209e-05, "loss": 0.0459, "step": 2278000 }, { "epoch": 0.15, "learning_rate": 4.743523927827846e-05, "loss": 0.0468, "step": 2278500 }, { "epoch": 0.15, "learning_rate": 4.7434676232144825e-05, "loss": 0.0512, "step": 2279000 }, { "epoch": 0.15, "learning_rate": 4.743411318601119e-05, "loss": 0.0488, "step": 2279500 }, { "epoch": 0.15, "learning_rate": 4.743355126596982e-05, "loss": 0.0459, "step": 2280000 }, { "epoch": 0.15, "learning_rate": 4.7432988219836185e-05, "loss": 0.0469, "step": 2280500 }, { "epoch": 0.15, "learning_rate": 4.743242517370255e-05, "loss": 0.0428, "step": 2281000 }, { "epoch": 0.15, "learning_rate": 4.743186212756891e-05, "loss": 0.0483, "step": 2281500 }, { "epoch": 0.15, "learning_rate": 4.743129908143528e-05, "loss": 0.0468, "step": 2282000 }, { "epoch": 0.15, "learning_rate": 4.743073603530164e-05, "loss": 0.0478, "step": 2282500 }, { "epoch": 0.15, "learning_rate": 4.7430174115260273e-05, "loss": 0.0511, "step": 2283000 }, { "epoch": 0.15, "learning_rate": 4.742961106912664e-05, "loss": 0.0446, "step": 2283500 }, { "epoch": 0.15, "learning_rate": 4.742904802299301e-05, "loss": 0.0471, "step": 2284000 }, { "epoch": 0.15, "learning_rate": 4.742848497685937e-05, "loss": 0.0497, "step": 2284500 }, { "epoch": 0.15, "learning_rate": 4.7427923056818004e-05, "loss": 0.0502, "step": 2285000 }, { "epoch": 0.15, "learning_rate": 4.742736001068436e-05, "loss": 0.0469, "step": 2285500 }, { "epoch": 0.15, "learning_rate": 4.742679696455073e-05, "loss": 0.0487, "step": 2286000 }, { "epoch": 0.15, "learning_rate": 4.7426233918417096e-05, "loss": 0.0475, "step": 2286500 }, { "epoch": 0.15, "learning_rate": 4.742567087228346e-05, "loss": 0.0471, "step": 2287000 }, { "epoch": 0.15, "learning_rate": 4.7425107826149824e-05, "loss": 0.0487, "step": 2287500 }, { "epoch": 0.15, "learning_rate": 4.742454590610846e-05, "loss": 0.0487, "step": 2288000 }, { "epoch": 0.15, "learning_rate": 4.742398285997482e-05, "loss": 0.0491, "step": 2288500 }, { "epoch": 0.15, "learning_rate": 4.7423419813841185e-05, "loss": 0.0475, "step": 2289000 }, { "epoch": 0.15, "learning_rate": 4.742285676770755e-05, "loss": 0.0467, "step": 2289500 }, { "epoch": 0.15, "learning_rate": 4.742229372157391e-05, "loss": 0.0469, "step": 2290000 }, { "epoch": 0.15, "learning_rate": 4.742173180153255e-05, "loss": 0.0478, "step": 2290500 }, { "epoch": 0.15, "learning_rate": 4.742116875539891e-05, "loss": 0.0469, "step": 2291000 }, { "epoch": 0.15, "learning_rate": 4.742060570926527e-05, "loss": 0.0468, "step": 2291500 }, { "epoch": 0.15, "learning_rate": 4.7420042663131644e-05, "loss": 0.0487, "step": 2292000 }, { "epoch": 0.15, "learning_rate": 4.7419479616998e-05, "loss": 0.0529, "step": 2292500 }, { "epoch": 0.15, "learning_rate": 4.741891657086437e-05, "loss": 0.0493, "step": 2293000 }, { "epoch": 0.15, "learning_rate": 4.7418353524730736e-05, "loss": 0.0486, "step": 2293500 }, { "epoch": 0.15, "learning_rate": 4.741779160468937e-05, "loss": 0.0467, "step": 2294000 }, { "epoch": 0.16, "learning_rate": 4.741722855855573e-05, "loss": 0.0508, "step": 2294500 }, { "epoch": 0.16, "learning_rate": 4.7416665512422096e-05, "loss": 0.0482, "step": 2295000 }, { "epoch": 0.16, "learning_rate": 4.741610246628846e-05, "loss": 0.0485, "step": 2295500 }, { "epoch": 0.16, "learning_rate": 4.7415539420154824e-05, "loss": 0.0494, "step": 2296000 }, { "epoch": 0.16, "learning_rate": 4.7414977500113456e-05, "loss": 0.0507, "step": 2296500 }, { "epoch": 0.16, "learning_rate": 4.741441445397982e-05, "loss": 0.0495, "step": 2297000 }, { "epoch": 0.16, "learning_rate": 4.7413851407846184e-05, "loss": 0.0491, "step": 2297500 }, { "epoch": 0.16, "learning_rate": 4.741328836171255e-05, "loss": 0.0487, "step": 2298000 }, { "epoch": 0.16, "learning_rate": 4.741272531557891e-05, "loss": 0.0456, "step": 2298500 }, { "epoch": 0.16, "learning_rate": 4.7412163395537544e-05, "loss": 0.0452, "step": 2299000 }, { "epoch": 0.16, "learning_rate": 4.7411600349403915e-05, "loss": 0.0447, "step": 2299500 }, { "epoch": 0.16, "learning_rate": 4.741103730327028e-05, "loss": 0.0486, "step": 2300000 }, { "epoch": 0.16, "learning_rate": 4.7410474257136636e-05, "loss": 0.0447, "step": 2300500 }, { "epoch": 0.16, "learning_rate": 4.740991121100301e-05, "loss": 0.0464, "step": 2301000 }, { "epoch": 0.16, "learning_rate": 4.740934929096163e-05, "loss": 0.0468, "step": 2301500 }, { "epoch": 0.16, "learning_rate": 4.7408786244828e-05, "loss": 0.0461, "step": 2302000 }, { "epoch": 0.16, "learning_rate": 4.740822319869437e-05, "loss": 0.0485, "step": 2302500 }, { "epoch": 0.16, "learning_rate": 4.740766015256073e-05, "loss": 0.0487, "step": 2303000 }, { "epoch": 0.16, "learning_rate": 4.740709935861163e-05, "loss": 0.0499, "step": 2303500 }, { "epoch": 0.16, "learning_rate": 4.7406536312477996e-05, "loss": 0.0415, "step": 2304000 }, { "epoch": 0.16, "learning_rate": 4.740597326634436e-05, "loss": 0.0485, "step": 2304500 }, { "epoch": 0.16, "learning_rate": 4.7405410220210724e-05, "loss": 0.0463, "step": 2305000 }, { "epoch": 0.16, "learning_rate": 4.740484717407709e-05, "loss": 0.0484, "step": 2305500 }, { "epoch": 0.16, "learning_rate": 4.740428412794345e-05, "loss": 0.0499, "step": 2306000 }, { "epoch": 0.16, "learning_rate": 4.7403721081809816e-05, "loss": 0.0487, "step": 2306500 }, { "epoch": 0.16, "learning_rate": 4.740315803567618e-05, "loss": 0.0476, "step": 2307000 }, { "epoch": 0.16, "learning_rate": 4.740259498954255e-05, "loss": 0.0492, "step": 2307500 }, { "epoch": 0.16, "learning_rate": 4.7402033069501176e-05, "loss": 0.0457, "step": 2308000 }, { "epoch": 0.16, "learning_rate": 4.7401470023367547e-05, "loss": 0.0443, "step": 2308500 }, { "epoch": 0.16, "learning_rate": 4.7400906977233904e-05, "loss": 0.0467, "step": 2309000 }, { "epoch": 0.16, "learning_rate": 4.7400343931100275e-05, "loss": 0.0517, "step": 2309500 }, { "epoch": 0.16, "learning_rate": 4.73997820110589e-05, "loss": 0.0468, "step": 2310000 }, { "epoch": 0.16, "learning_rate": 4.739921896492527e-05, "loss": 0.0485, "step": 2310500 }, { "epoch": 0.16, "learning_rate": 4.7398655918791635e-05, "loss": 0.0484, "step": 2311000 }, { "epoch": 0.16, "learning_rate": 4.7398092872658e-05, "loss": 0.047, "step": 2311500 }, { "epoch": 0.16, "learning_rate": 4.739753095261663e-05, "loss": 0.0516, "step": 2312000 }, { "epoch": 0.16, "learning_rate": 4.7396967906482995e-05, "loss": 0.0504, "step": 2312500 }, { "epoch": 0.16, "learning_rate": 4.739640486034936e-05, "loss": 0.0489, "step": 2313000 }, { "epoch": 0.16, "learning_rate": 4.739584181421572e-05, "loss": 0.0481, "step": 2313500 }, { "epoch": 0.16, "learning_rate": 4.739527876808209e-05, "loss": 0.0478, "step": 2314000 }, { "epoch": 0.16, "learning_rate": 4.739471572194845e-05, "loss": 0.0502, "step": 2314500 }, { "epoch": 0.16, "learning_rate": 4.7394152675814815e-05, "loss": 0.0466, "step": 2315000 }, { "epoch": 0.16, "learning_rate": 4.739359075577345e-05, "loss": 0.046, "step": 2315500 }, { "epoch": 0.16, "learning_rate": 4.739302770963982e-05, "loss": 0.0448, "step": 2316000 }, { "epoch": 0.16, "learning_rate": 4.739246466350618e-05, "loss": 0.0474, "step": 2316500 }, { "epoch": 0.16, "learning_rate": 4.739190161737254e-05, "loss": 0.0476, "step": 2317000 }, { "epoch": 0.16, "learning_rate": 4.739133857123891e-05, "loss": 0.0448, "step": 2317500 }, { "epoch": 0.16, "learning_rate": 4.7390776651197535e-05, "loss": 0.0466, "step": 2318000 }, { "epoch": 0.16, "learning_rate": 4.7390213605063906e-05, "loss": 0.0476, "step": 2318500 }, { "epoch": 0.16, "learning_rate": 4.738965055893027e-05, "loss": 0.0466, "step": 2319000 }, { "epoch": 0.16, "learning_rate": 4.7389087512796634e-05, "loss": 0.0439, "step": 2319500 }, { "epoch": 0.16, "learning_rate": 4.7388525592755266e-05, "loss": 0.046, "step": 2320000 }, { "epoch": 0.16, "learning_rate": 4.738796254662163e-05, "loss": 0.0484, "step": 2320500 }, { "epoch": 0.16, "learning_rate": 4.7387399500487994e-05, "loss": 0.0474, "step": 2321000 }, { "epoch": 0.16, "learning_rate": 4.738683645435436e-05, "loss": 0.0527, "step": 2321500 }, { "epoch": 0.16, "learning_rate": 4.738627453431299e-05, "loss": 0.0519, "step": 2322000 }, { "epoch": 0.16, "learning_rate": 4.7385711488179354e-05, "loss": 0.0477, "step": 2322500 }, { "epoch": 0.16, "learning_rate": 4.738514844204572e-05, "loss": 0.0479, "step": 2323000 }, { "epoch": 0.16, "learning_rate": 4.738458539591208e-05, "loss": 0.0479, "step": 2323500 }, { "epoch": 0.16, "learning_rate": 4.738402234977845e-05, "loss": 0.0479, "step": 2324000 }, { "epoch": 0.16, "learning_rate": 4.738346042973708e-05, "loss": 0.0485, "step": 2324500 }, { "epoch": 0.16, "learning_rate": 4.738289738360345e-05, "loss": 0.0459, "step": 2325000 }, { "epoch": 0.16, "learning_rate": 4.738233433746981e-05, "loss": 0.0431, "step": 2325500 }, { "epoch": 0.16, "learning_rate": 4.738177129133618e-05, "loss": 0.0507, "step": 2326000 }, { "epoch": 0.16, "learning_rate": 4.73812093712948e-05, "loss": 0.0446, "step": 2326500 }, { "epoch": 0.16, "learning_rate": 4.7380646325161174e-05, "loss": 0.0476, "step": 2327000 }, { "epoch": 0.16, "learning_rate": 4.738008327902754e-05, "loss": 0.0507, "step": 2327500 }, { "epoch": 0.16, "learning_rate": 4.7379520232893895e-05, "loss": 0.0476, "step": 2328000 }, { "epoch": 0.16, "learning_rate": 4.7378958312852534e-05, "loss": 0.0474, "step": 2328500 }, { "epoch": 0.16, "learning_rate": 4.73783952667189e-05, "loss": 0.0485, "step": 2329000 }, { "epoch": 0.16, "learning_rate": 4.737783222058526e-05, "loss": 0.0481, "step": 2329500 }, { "epoch": 0.16, "learning_rate": 4.7377269174451626e-05, "loss": 0.0475, "step": 2330000 }, { "epoch": 0.16, "learning_rate": 4.7376706128318e-05, "loss": 0.0492, "step": 2330500 }, { "epoch": 0.16, "learning_rate": 4.7376143082184354e-05, "loss": 0.0479, "step": 2331000 }, { "epoch": 0.16, "learning_rate": 4.737558003605072e-05, "loss": 0.0467, "step": 2331500 }, { "epoch": 0.16, "learning_rate": 4.737501811600935e-05, "loss": 0.0442, "step": 2332000 }, { "epoch": 0.16, "learning_rate": 4.7374455069875714e-05, "loss": 0.0435, "step": 2332500 }, { "epoch": 0.16, "learning_rate": 4.7373892023742085e-05, "loss": 0.0449, "step": 2333000 }, { "epoch": 0.16, "learning_rate": 4.737333010370072e-05, "loss": 0.0507, "step": 2333500 }, { "epoch": 0.16, "learning_rate": 4.737276705756708e-05, "loss": 0.0473, "step": 2334000 }, { "epoch": 0.16, "learning_rate": 4.737220401143344e-05, "loss": 0.0483, "step": 2334500 }, { "epoch": 0.16, "learning_rate": 4.737164096529981e-05, "loss": 0.0477, "step": 2335000 }, { "epoch": 0.16, "learning_rate": 4.737107791916617e-05, "loss": 0.0453, "step": 2335500 }, { "epoch": 0.16, "learning_rate": 4.737051487303254e-05, "loss": 0.0496, "step": 2336000 }, { "epoch": 0.16, "learning_rate": 4.73699518268989e-05, "loss": 0.046, "step": 2336500 }, { "epoch": 0.16, "learning_rate": 4.7369388780765265e-05, "loss": 0.0512, "step": 2337000 }, { "epoch": 0.16, "learning_rate": 4.736882573463163e-05, "loss": 0.0508, "step": 2337500 }, { "epoch": 0.16, "learning_rate": 4.736826381459026e-05, "loss": 0.0491, "step": 2338000 }, { "epoch": 0.16, "learning_rate": 4.7367700768456625e-05, "loss": 0.0437, "step": 2338500 }, { "epoch": 0.16, "learning_rate": 4.736713772232299e-05, "loss": 0.048, "step": 2339000 }, { "epoch": 0.16, "learning_rate": 4.736657467618936e-05, "loss": 0.0475, "step": 2339500 }, { "epoch": 0.16, "learning_rate": 4.7366012756147985e-05, "loss": 0.0501, "step": 2340000 }, { "epoch": 0.16, "learning_rate": 4.7365449710014356e-05, "loss": 0.047, "step": 2340500 }, { "epoch": 0.16, "learning_rate": 4.736488778997298e-05, "loss": 0.051, "step": 2341000 }, { "epoch": 0.16, "learning_rate": 4.736432474383935e-05, "loss": 0.0461, "step": 2341500 }, { "epoch": 0.16, "learning_rate": 4.736376169770571e-05, "loss": 0.0448, "step": 2342000 }, { "epoch": 0.16, "learning_rate": 4.736319865157208e-05, "loss": 0.0497, "step": 2342500 }, { "epoch": 0.16, "learning_rate": 4.7362635605438444e-05, "loss": 0.0463, "step": 2343000 }, { "epoch": 0.16, "learning_rate": 4.73620725593048e-05, "loss": 0.047, "step": 2343500 }, { "epoch": 0.16, "learning_rate": 4.736150951317117e-05, "loss": 0.0468, "step": 2344000 }, { "epoch": 0.16, "learning_rate": 4.7360946467037536e-05, "loss": 0.0481, "step": 2344500 }, { "epoch": 0.16, "learning_rate": 4.736038454699617e-05, "loss": 0.0502, "step": 2345000 }, { "epoch": 0.16, "learning_rate": 4.735982150086253e-05, "loss": 0.0517, "step": 2345500 }, { "epoch": 0.16, "learning_rate": 4.7359258454728897e-05, "loss": 0.0457, "step": 2346000 }, { "epoch": 0.16, "learning_rate": 4.735869653468753e-05, "loss": 0.0455, "step": 2346500 }, { "epoch": 0.16, "learning_rate": 4.73581334885539e-05, "loss": 0.05, "step": 2347000 }, { "epoch": 0.16, "learning_rate": 4.735757044242026e-05, "loss": 0.0454, "step": 2347500 }, { "epoch": 0.16, "learning_rate": 4.735700739628662e-05, "loss": 0.048, "step": 2348000 }, { "epoch": 0.16, "learning_rate": 4.735644435015299e-05, "loss": 0.0499, "step": 2348500 }, { "epoch": 0.16, "learning_rate": 4.735588130401935e-05, "loss": 0.0501, "step": 2349000 }, { "epoch": 0.16, "learning_rate": 4.735531825788572e-05, "loss": 0.0474, "step": 2349500 }, { "epoch": 0.16, "learning_rate": 4.7354755211752084e-05, "loss": 0.0465, "step": 2350000 }, { "epoch": 0.16, "learning_rate": 4.7354193291710716e-05, "loss": 0.0444, "step": 2350500 }, { "epoch": 0.16, "learning_rate": 4.735363024557708e-05, "loss": 0.0458, "step": 2351000 }, { "epoch": 0.16, "learning_rate": 4.7353067199443444e-05, "loss": 0.0492, "step": 2351500 }, { "epoch": 0.16, "learning_rate": 4.735250415330981e-05, "loss": 0.0517, "step": 2352000 }, { "epoch": 0.16, "learning_rate": 4.735194110717617e-05, "loss": 0.0473, "step": 2352500 }, { "epoch": 0.16, "learning_rate": 4.7351379187134804e-05, "loss": 0.0446, "step": 2353000 }, { "epoch": 0.16, "learning_rate": 4.735081614100117e-05, "loss": 0.0485, "step": 2353500 }, { "epoch": 0.16, "learning_rate": 4.735025309486753e-05, "loss": 0.0483, "step": 2354000 }, { "epoch": 0.16, "learning_rate": 4.7349690048733896e-05, "loss": 0.0481, "step": 2354500 }, { "epoch": 0.16, "learning_rate": 4.734912700260026e-05, "loss": 0.0468, "step": 2355000 }, { "epoch": 0.16, "learning_rate": 4.7348563956466624e-05, "loss": 0.0533, "step": 2355500 }, { "epoch": 0.16, "learning_rate": 4.734800091033299e-05, "loss": 0.0485, "step": 2356000 }, { "epoch": 0.16, "learning_rate": 4.734743786419936e-05, "loss": 0.0474, "step": 2356500 }, { "epoch": 0.16, "learning_rate": 4.734687481806572e-05, "loss": 0.0486, "step": 2357000 }, { "epoch": 0.16, "learning_rate": 4.7346314024116616e-05, "loss": 0.0498, "step": 2357500 }, { "epoch": 0.16, "learning_rate": 4.734575097798298e-05, "loss": 0.0488, "step": 2358000 }, { "epoch": 0.16, "learning_rate": 4.734518793184935e-05, "loss": 0.0479, "step": 2358500 }, { "epoch": 0.16, "learning_rate": 4.7344624885715715e-05, "loss": 0.0493, "step": 2359000 }, { "epoch": 0.16, "learning_rate": 4.734406183958208e-05, "loss": 0.0456, "step": 2359500 }, { "epoch": 0.16, "learning_rate": 4.734349991954071e-05, "loss": 0.0505, "step": 2360000 }, { "epoch": 0.16, "learning_rate": 4.7342936873407075e-05, "loss": 0.048, "step": 2360500 }, { "epoch": 0.16, "learning_rate": 4.734237382727344e-05, "loss": 0.0467, "step": 2361000 }, { "epoch": 0.16, "learning_rate": 4.73418107811398e-05, "loss": 0.0463, "step": 2361500 }, { "epoch": 0.16, "learning_rate": 4.734124773500617e-05, "loss": 0.0464, "step": 2362000 }, { "epoch": 0.16, "learning_rate": 4.73406858149648e-05, "loss": 0.0471, "step": 2362500 }, { "epoch": 0.16, "learning_rate": 4.7340122768831163e-05, "loss": 0.0442, "step": 2363000 }, { "epoch": 0.16, "learning_rate": 4.733955972269753e-05, "loss": 0.0495, "step": 2363500 }, { "epoch": 0.16, "learning_rate": 4.73389966765639e-05, "loss": 0.0464, "step": 2364000 }, { "epoch": 0.16, "learning_rate": 4.7338434756522524e-05, "loss": 0.0484, "step": 2364500 }, { "epoch": 0.16, "learning_rate": 4.7337871710388894e-05, "loss": 0.0544, "step": 2365000 }, { "epoch": 0.16, "learning_rate": 4.733730979034752e-05, "loss": 0.0499, "step": 2365500 }, { "epoch": 0.16, "learning_rate": 4.733674674421389e-05, "loss": 0.0427, "step": 2366000 }, { "epoch": 0.16, "learning_rate": 4.733618369808025e-05, "loss": 0.0514, "step": 2366500 }, { "epoch": 0.16, "learning_rate": 4.733562065194662e-05, "loss": 0.0501, "step": 2367000 }, { "epoch": 0.16, "learning_rate": 4.733505760581298e-05, "loss": 0.0468, "step": 2367500 }, { "epoch": 0.16, "learning_rate": 4.733449455967934e-05, "loss": 0.0475, "step": 2368000 }, { "epoch": 0.16, "learning_rate": 4.733393151354571e-05, "loss": 0.0455, "step": 2368500 }, { "epoch": 0.16, "learning_rate": 4.7333368467412075e-05, "loss": 0.0506, "step": 2369000 }, { "epoch": 0.16, "learning_rate": 4.733280542127844e-05, "loss": 0.0484, "step": 2369500 }, { "epoch": 0.16, "learning_rate": 4.733224350123707e-05, "loss": 0.0456, "step": 2370000 }, { "epoch": 0.16, "learning_rate": 4.7331680455103435e-05, "loss": 0.0459, "step": 2370500 }, { "epoch": 0.16, "learning_rate": 4.73311174089698e-05, "loss": 0.0478, "step": 2371000 }, { "epoch": 0.16, "learning_rate": 4.733055436283616e-05, "loss": 0.0473, "step": 2371500 }, { "epoch": 0.16, "learning_rate": 4.732999131670253e-05, "loss": 0.0454, "step": 2372000 }, { "epoch": 0.16, "learning_rate": 4.732942939666116e-05, "loss": 0.0475, "step": 2372500 }, { "epoch": 0.16, "learning_rate": 4.732886635052752e-05, "loss": 0.0493, "step": 2373000 }, { "epoch": 0.16, "learning_rate": 4.732830330439389e-05, "loss": 0.0431, "step": 2373500 }, { "epoch": 0.16, "learning_rate": 4.732774025826026e-05, "loss": 0.0477, "step": 2374000 }, { "epoch": 0.16, "learning_rate": 4.732717721212662e-05, "loss": 0.0475, "step": 2374500 }, { "epoch": 0.16, "learning_rate": 4.7326614165992986e-05, "loss": 0.0478, "step": 2375000 }, { "epoch": 0.16, "learning_rate": 4.732605224595162e-05, "loss": 0.0461, "step": 2375500 }, { "epoch": 0.16, "learning_rate": 4.732548919981798e-05, "loss": 0.048, "step": 2376000 }, { "epoch": 0.16, "learning_rate": 4.7324926153684346e-05, "loss": 0.0456, "step": 2376500 }, { "epoch": 0.16, "learning_rate": 4.732436310755071e-05, "loss": 0.0485, "step": 2377000 }, { "epoch": 0.16, "learning_rate": 4.7323800061417074e-05, "loss": 0.0451, "step": 2377500 }, { "epoch": 0.16, "learning_rate": 4.7323238141375706e-05, "loss": 0.0475, "step": 2378000 }, { "epoch": 0.16, "learning_rate": 4.732267509524207e-05, "loss": 0.0458, "step": 2378500 }, { "epoch": 0.16, "learning_rate": 4.7322112049108434e-05, "loss": 0.0484, "step": 2379000 }, { "epoch": 0.16, "learning_rate": 4.7321550129067066e-05, "loss": 0.0476, "step": 2379500 }, { "epoch": 0.16, "learning_rate": 4.732098708293343e-05, "loss": 0.0468, "step": 2380000 }, { "epoch": 0.16, "learning_rate": 4.73204240367998e-05, "loss": 0.0483, "step": 2380500 }, { "epoch": 0.16, "learning_rate": 4.731986099066616e-05, "loss": 0.0497, "step": 2381000 }, { "epoch": 0.16, "learning_rate": 4.731929794453252e-05, "loss": 0.0484, "step": 2381500 }, { "epoch": 0.16, "learning_rate": 4.731873489839889e-05, "loss": 0.0463, "step": 2382000 }, { "epoch": 0.16, "learning_rate": 4.731817185226525e-05, "loss": 0.0468, "step": 2382500 }, { "epoch": 0.16, "learning_rate": 4.731760880613162e-05, "loss": 0.0469, "step": 2383000 }, { "epoch": 0.16, "learning_rate": 4.7317045759997985e-05, "loss": 0.0461, "step": 2383500 }, { "epoch": 0.16, "learning_rate": 4.731648383995662e-05, "loss": 0.0495, "step": 2384000 }, { "epoch": 0.16, "learning_rate": 4.731592079382298e-05, "loss": 0.047, "step": 2384500 }, { "epoch": 0.16, "learning_rate": 4.7315357747689345e-05, "loss": 0.0492, "step": 2385000 }, { "epoch": 0.16, "learning_rate": 4.731479470155571e-05, "loss": 0.0501, "step": 2385500 }, { "epoch": 0.16, "learning_rate": 4.731423278151434e-05, "loss": 0.0483, "step": 2386000 }, { "epoch": 0.16, "learning_rate": 4.7313670861472974e-05, "loss": 0.0466, "step": 2386500 }, { "epoch": 0.16, "learning_rate": 4.731310781533934e-05, "loss": 0.046, "step": 2387000 }, { "epoch": 0.16, "learning_rate": 4.73125447692057e-05, "loss": 0.0487, "step": 2387500 }, { "epoch": 0.16, "learning_rate": 4.7311981723072066e-05, "loss": 0.0447, "step": 2388000 }, { "epoch": 0.16, "learning_rate": 4.7311418676938437e-05, "loss": 0.0474, "step": 2388500 }, { "epoch": 0.16, "learning_rate": 4.7310855630804794e-05, "loss": 0.0493, "step": 2389000 }, { "epoch": 0.16, "learning_rate": 4.7310292584671165e-05, "loss": 0.0474, "step": 2389500 }, { "epoch": 0.16, "learning_rate": 4.730972953853753e-05, "loss": 0.0458, "step": 2390000 }, { "epoch": 0.16, "learning_rate": 4.7309166492403886e-05, "loss": 0.0468, "step": 2390500 }, { "epoch": 0.16, "learning_rate": 4.7308603446270257e-05, "loss": 0.0469, "step": 2391000 }, { "epoch": 0.16, "learning_rate": 4.730804040013662e-05, "loss": 0.0496, "step": 2391500 }, { "epoch": 0.16, "learning_rate": 4.730747848009525e-05, "loss": 0.0491, "step": 2392000 }, { "epoch": 0.16, "learning_rate": 4.730691543396162e-05, "loss": 0.0475, "step": 2392500 }, { "epoch": 0.16, "learning_rate": 4.730635238782798e-05, "loss": 0.044, "step": 2393000 }, { "epoch": 0.16, "learning_rate": 4.7305789341694345e-05, "loss": 0.0501, "step": 2393500 }, { "epoch": 0.16, "learning_rate": 4.730522629556071e-05, "loss": 0.0478, "step": 2394000 }, { "epoch": 0.16, "learning_rate": 4.730466437551934e-05, "loss": 0.0508, "step": 2394500 }, { "epoch": 0.16, "learning_rate": 4.7304101329385705e-05, "loss": 0.0447, "step": 2395000 }, { "epoch": 0.16, "learning_rate": 4.730353828325207e-05, "loss": 0.0497, "step": 2395500 }, { "epoch": 0.16, "learning_rate": 4.730297523711843e-05, "loss": 0.0484, "step": 2396000 }, { "epoch": 0.16, "learning_rate": 4.7302413317077065e-05, "loss": 0.0472, "step": 2396500 }, { "epoch": 0.16, "learning_rate": 4.730185027094343e-05, "loss": 0.0483, "step": 2397000 }, { "epoch": 0.16, "learning_rate": 4.73012872248098e-05, "loss": 0.0471, "step": 2397500 }, { "epoch": 0.16, "learning_rate": 4.730072417867616e-05, "loss": 0.0458, "step": 2398000 }, { "epoch": 0.16, "learning_rate": 4.730016113254253e-05, "loss": 0.0448, "step": 2398500 }, { "epoch": 0.16, "learning_rate": 4.729959921250115e-05, "loss": 0.0493, "step": 2399000 }, { "epoch": 0.16, "learning_rate": 4.7299036166367524e-05, "loss": 0.045, "step": 2399500 }, { "epoch": 0.16, "learning_rate": 4.729847312023389e-05, "loss": 0.0486, "step": 2400000 }, { "epoch": 0.16, "learning_rate": 4.729791007410025e-05, "loss": 0.0487, "step": 2400500 }, { "epoch": 0.16, "learning_rate": 4.7297347027966616e-05, "loss": 0.0454, "step": 2401000 }, { "epoch": 0.16, "learning_rate": 4.729678510792525e-05, "loss": 0.0511, "step": 2401500 }, { "epoch": 0.16, "learning_rate": 4.729622206179161e-05, "loss": 0.0455, "step": 2402000 }, { "epoch": 0.16, "learning_rate": 4.7295659015657976e-05, "loss": 0.0492, "step": 2402500 }, { "epoch": 0.16, "learning_rate": 4.729509596952435e-05, "loss": 0.0488, "step": 2403000 }, { "epoch": 0.16, "learning_rate": 4.7294532923390704e-05, "loss": 0.0474, "step": 2403500 }, { "epoch": 0.16, "learning_rate": 4.729396987725707e-05, "loss": 0.0476, "step": 2404000 }, { "epoch": 0.16, "learning_rate": 4.729340683112344e-05, "loss": 0.0467, "step": 2404500 }, { "epoch": 0.16, "learning_rate": 4.7292843784989796e-05, "loss": 0.0442, "step": 2405000 }, { "epoch": 0.16, "learning_rate": 4.7292281864948435e-05, "loss": 0.0472, "step": 2405500 }, { "epoch": 0.16, "learning_rate": 4.729171881881479e-05, "loss": 0.0477, "step": 2406000 }, { "epoch": 0.16, "learning_rate": 4.729115577268116e-05, "loss": 0.0444, "step": 2406500 }, { "epoch": 0.16, "learning_rate": 4.729059272654753e-05, "loss": 0.0509, "step": 2407000 }, { "epoch": 0.16, "learning_rate": 4.729003080650616e-05, "loss": 0.047, "step": 2407500 }, { "epoch": 0.16, "learning_rate": 4.7289467760372523e-05, "loss": 0.0485, "step": 2408000 }, { "epoch": 0.16, "learning_rate": 4.728890471423889e-05, "loss": 0.0473, "step": 2408500 }, { "epoch": 0.16, "learning_rate": 4.728834166810525e-05, "loss": 0.0452, "step": 2409000 }, { "epoch": 0.16, "learning_rate": 4.7287779748063884e-05, "loss": 0.0473, "step": 2409500 }, { "epoch": 0.16, "learning_rate": 4.728721670193025e-05, "loss": 0.0441, "step": 2410000 }, { "epoch": 0.16, "learning_rate": 4.728665365579661e-05, "loss": 0.0473, "step": 2410500 }, { "epoch": 0.16, "learning_rate": 4.7286090609662976e-05, "loss": 0.043, "step": 2411000 }, { "epoch": 0.16, "learning_rate": 4.728552868962161e-05, "loss": 0.0485, "step": 2411500 }, { "epoch": 0.16, "learning_rate": 4.728496564348797e-05, "loss": 0.0489, "step": 2412000 }, { "epoch": 0.16, "learning_rate": 4.7284402597354336e-05, "loss": 0.0503, "step": 2412500 }, { "epoch": 0.16, "learning_rate": 4.728383955122071e-05, "loss": 0.047, "step": 2413000 }, { "epoch": 0.16, "learning_rate": 4.7283276505087064e-05, "loss": 0.0472, "step": 2413500 }, { "epoch": 0.16, "learning_rate": 4.72827145850457e-05, "loss": 0.0477, "step": 2414000 }, { "epoch": 0.16, "learning_rate": 4.728215153891206e-05, "loss": 0.0448, "step": 2414500 }, { "epoch": 0.16, "learning_rate": 4.728158849277843e-05, "loss": 0.0455, "step": 2415000 }, { "epoch": 0.16, "learning_rate": 4.7281025446644795e-05, "loss": 0.0471, "step": 2415500 }, { "epoch": 0.16, "learning_rate": 4.728046240051116e-05, "loss": 0.0475, "step": 2416000 }, { "epoch": 0.16, "learning_rate": 4.727989935437752e-05, "loss": 0.0445, "step": 2416500 }, { "epoch": 0.16, "learning_rate": 4.727933630824389e-05, "loss": 0.0458, "step": 2417000 }, { "epoch": 0.16, "learning_rate": 4.727877438820252e-05, "loss": 0.0444, "step": 2417500 }, { "epoch": 0.16, "learning_rate": 4.727821246816115e-05, "loss": 0.0443, "step": 2418000 }, { "epoch": 0.16, "learning_rate": 4.7277649422027515e-05, "loss": 0.0485, "step": 2418500 }, { "epoch": 0.16, "learning_rate": 4.727708637589388e-05, "loss": 0.043, "step": 2419000 }, { "epoch": 0.16, "learning_rate": 4.727652332976025e-05, "loss": 0.046, "step": 2419500 }, { "epoch": 0.16, "learning_rate": 4.727596028362661e-05, "loss": 0.0471, "step": 2420000 }, { "epoch": 0.16, "learning_rate": 4.727539723749297e-05, "loss": 0.047, "step": 2420500 }, { "epoch": 0.16, "learning_rate": 4.727483419135934e-05, "loss": 0.0491, "step": 2421000 }, { "epoch": 0.16, "learning_rate": 4.72742711452257e-05, "loss": 0.0459, "step": 2421500 }, { "epoch": 0.16, "learning_rate": 4.727370809909207e-05, "loss": 0.0509, "step": 2422000 }, { "epoch": 0.16, "learning_rate": 4.7273145052958434e-05, "loss": 0.0451, "step": 2422500 }, { "epoch": 0.16, "learning_rate": 4.7272583132917066e-05, "loss": 0.048, "step": 2423000 }, { "epoch": 0.16, "learning_rate": 4.727202008678343e-05, "loss": 0.0478, "step": 2423500 }, { "epoch": 0.16, "learning_rate": 4.7271457040649794e-05, "loss": 0.0478, "step": 2424000 }, { "epoch": 0.16, "learning_rate": 4.727089399451616e-05, "loss": 0.0511, "step": 2424500 }, { "epoch": 0.16, "learning_rate": 4.727033207447479e-05, "loss": 0.0501, "step": 2425000 }, { "epoch": 0.16, "learning_rate": 4.7269769028341154e-05, "loss": 0.0432, "step": 2425500 }, { "epoch": 0.16, "learning_rate": 4.726920598220752e-05, "loss": 0.0481, "step": 2426000 }, { "epoch": 0.16, "learning_rate": 4.726864293607388e-05, "loss": 0.0445, "step": 2426500 }, { "epoch": 0.16, "learning_rate": 4.7268079889940246e-05, "loss": 0.0472, "step": 2427000 }, { "epoch": 0.16, "learning_rate": 4.726751796989888e-05, "loss": 0.0496, "step": 2427500 }, { "epoch": 0.16, "learning_rate": 4.726695492376524e-05, "loss": 0.0491, "step": 2428000 }, { "epoch": 0.16, "learning_rate": 4.726639187763161e-05, "loss": 0.0499, "step": 2428500 }, { "epoch": 0.16, "learning_rate": 4.726582883149797e-05, "loss": 0.0475, "step": 2429000 }, { "epoch": 0.16, "learning_rate": 4.7265265785364335e-05, "loss": 0.0488, "step": 2429500 }, { "epoch": 0.16, "learning_rate": 4.726470386532297e-05, "loss": 0.0465, "step": 2430000 }, { "epoch": 0.16, "learning_rate": 4.726414081918934e-05, "loss": 0.0449, "step": 2430500 }, { "epoch": 0.16, "learning_rate": 4.72635777730557e-05, "loss": 0.0482, "step": 2431000 }, { "epoch": 0.16, "learning_rate": 4.7263014726922066e-05, "loss": 0.0472, "step": 2431500 }, { "epoch": 0.16, "learning_rate": 4.72624528068807e-05, "loss": 0.0484, "step": 2432000 }, { "epoch": 0.16, "learning_rate": 4.726188976074706e-05, "loss": 0.0491, "step": 2432500 }, { "epoch": 0.16, "learning_rate": 4.7261326714613426e-05, "loss": 0.048, "step": 2433000 }, { "epoch": 0.16, "learning_rate": 4.726076366847979e-05, "loss": 0.0435, "step": 2433500 }, { "epoch": 0.16, "learning_rate": 4.7260200622346154e-05, "loss": 0.0463, "step": 2434000 }, { "epoch": 0.16, "learning_rate": 4.7259638702304786e-05, "loss": 0.0494, "step": 2434500 }, { "epoch": 0.16, "learning_rate": 4.725907565617116e-05, "loss": 0.0454, "step": 2435000 }, { "epoch": 0.16, "learning_rate": 4.7258512610037514e-05, "loss": 0.044, "step": 2435500 }, { "epoch": 0.16, "learning_rate": 4.725794956390388e-05, "loss": 0.0463, "step": 2436000 }, { "epoch": 0.16, "learning_rate": 4.725738651777025e-05, "loss": 0.0481, "step": 2436500 }, { "epoch": 0.16, "learning_rate": 4.7256824597728874e-05, "loss": 0.0516, "step": 2437000 }, { "epoch": 0.16, "learning_rate": 4.7256261551595245e-05, "loss": 0.0449, "step": 2437500 }, { "epoch": 0.16, "learning_rate": 4.72556985054616e-05, "loss": 0.0439, "step": 2438000 }, { "epoch": 0.16, "learning_rate": 4.725513545932797e-05, "loss": 0.0454, "step": 2438500 }, { "epoch": 0.16, "learning_rate": 4.725457241319434e-05, "loss": 0.046, "step": 2439000 }, { "epoch": 0.16, "learning_rate": 4.725401049315297e-05, "loss": 0.0461, "step": 2439500 }, { "epoch": 0.16, "learning_rate": 4.725344744701933e-05, "loss": 0.046, "step": 2440000 }, { "epoch": 0.16, "learning_rate": 4.72528844008857e-05, "loss": 0.0462, "step": 2440500 }, { "epoch": 0.16, "learning_rate": 4.725232135475206e-05, "loss": 0.0449, "step": 2441000 }, { "epoch": 0.16, "learning_rate": 4.7251758308618425e-05, "loss": 0.0477, "step": 2441500 }, { "epoch": 0.16, "learning_rate": 4.725119638857706e-05, "loss": 0.0452, "step": 2442000 }, { "epoch": 0.17, "learning_rate": 4.725063334244342e-05, "loss": 0.0471, "step": 2442500 }, { "epoch": 0.17, "learning_rate": 4.7250070296309785e-05, "loss": 0.046, "step": 2443000 }, { "epoch": 0.17, "learning_rate": 4.724950725017615e-05, "loss": 0.0487, "step": 2443500 }, { "epoch": 0.17, "learning_rate": 4.724894533013478e-05, "loss": 0.0454, "step": 2444000 }, { "epoch": 0.17, "learning_rate": 4.7248382284001146e-05, "loss": 0.045, "step": 2444500 }, { "epoch": 0.17, "learning_rate": 4.7247819237867516e-05, "loss": 0.051, "step": 2445000 }, { "epoch": 0.17, "learning_rate": 4.724725619173388e-05, "loss": 0.0457, "step": 2445500 }, { "epoch": 0.17, "learning_rate": 4.724669314560024e-05, "loss": 0.0487, "step": 2446000 }, { "epoch": 0.17, "learning_rate": 4.724613009946661e-05, "loss": 0.047, "step": 2446500 }, { "epoch": 0.17, "learning_rate": 4.724556705333297e-05, "loss": 0.0472, "step": 2447000 }, { "epoch": 0.17, "learning_rate": 4.7245004007199336e-05, "loss": 0.044, "step": 2447500 }, { "epoch": 0.17, "learning_rate": 4.72444409610657e-05, "loss": 0.0468, "step": 2448000 }, { "epoch": 0.17, "learning_rate": 4.724387904102433e-05, "loss": 0.0443, "step": 2448500 }, { "epoch": 0.17, "learning_rate": 4.7243315994890696e-05, "loss": 0.0481, "step": 2449000 }, { "epoch": 0.17, "learning_rate": 4.724275294875706e-05, "loss": 0.0472, "step": 2449500 }, { "epoch": 0.17, "learning_rate": 4.7242189902623424e-05, "loss": 0.0479, "step": 2450000 }, { "epoch": 0.17, "learning_rate": 4.724162685648979e-05, "loss": 0.0457, "step": 2450500 }, { "epoch": 0.17, "learning_rate": 4.724106493644842e-05, "loss": 0.0485, "step": 2451000 }, { "epoch": 0.17, "learning_rate": 4.7240501890314785e-05, "loss": 0.0483, "step": 2451500 }, { "epoch": 0.17, "learning_rate": 4.7239938844181155e-05, "loss": 0.045, "step": 2452000 }, { "epoch": 0.17, "learning_rate": 4.723937579804751e-05, "loss": 0.0476, "step": 2452500 }, { "epoch": 0.17, "learning_rate": 4.723881387800615e-05, "loss": 0.0494, "step": 2453000 }, { "epoch": 0.17, "learning_rate": 4.723825083187251e-05, "loss": 0.0442, "step": 2453500 }, { "epoch": 0.17, "learning_rate": 4.723768778573888e-05, "loss": 0.0497, "step": 2454000 }, { "epoch": 0.17, "learning_rate": 4.7237124739605244e-05, "loss": 0.0477, "step": 2454500 }, { "epoch": 0.17, "learning_rate": 4.72365616934716e-05, "loss": 0.0483, "step": 2455000 }, { "epoch": 0.17, "learning_rate": 4.723599977343024e-05, "loss": 0.0484, "step": 2455500 }, { "epoch": 0.17, "learning_rate": 4.72354367272966e-05, "loss": 0.0446, "step": 2456000 }, { "epoch": 0.17, "learning_rate": 4.723487368116297e-05, "loss": 0.0497, "step": 2456500 }, { "epoch": 0.17, "learning_rate": 4.723431063502933e-05, "loss": 0.0462, "step": 2457000 }, { "epoch": 0.17, "learning_rate": 4.7233747588895696e-05, "loss": 0.0507, "step": 2457500 }, { "epoch": 0.17, "learning_rate": 4.723318454276206e-05, "loss": 0.0477, "step": 2458000 }, { "epoch": 0.17, "learning_rate": 4.723262262272069e-05, "loss": 0.0495, "step": 2458500 }, { "epoch": 0.17, "learning_rate": 4.7232059576587056e-05, "loss": 0.0453, "step": 2459000 }, { "epoch": 0.17, "learning_rate": 4.723149653045342e-05, "loss": 0.0454, "step": 2459500 }, { "epoch": 0.17, "learning_rate": 4.723093348431979e-05, "loss": 0.0442, "step": 2460000 }, { "epoch": 0.17, "learning_rate": 4.723037043818615e-05, "loss": 0.0494, "step": 2460500 }, { "epoch": 0.17, "learning_rate": 4.722980851814479e-05, "loss": 0.049, "step": 2461000 }, { "epoch": 0.17, "learning_rate": 4.7229245472011144e-05, "loss": 0.0455, "step": 2461500 }, { "epoch": 0.17, "learning_rate": 4.7228682425877515e-05, "loss": 0.0489, "step": 2462000 }, { "epoch": 0.17, "learning_rate": 4.722811937974388e-05, "loss": 0.0456, "step": 2462500 }, { "epoch": 0.17, "learning_rate": 4.722755745970251e-05, "loss": 0.0472, "step": 2463000 }, { "epoch": 0.17, "learning_rate": 4.7226994413568875e-05, "loss": 0.043, "step": 2463500 }, { "epoch": 0.17, "learning_rate": 4.722643136743524e-05, "loss": 0.0468, "step": 2464000 }, { "epoch": 0.17, "learning_rate": 4.72258683213016e-05, "loss": 0.0449, "step": 2464500 }, { "epoch": 0.17, "learning_rate": 4.722530527516797e-05, "loss": 0.045, "step": 2465000 }, { "epoch": 0.17, "learning_rate": 4.722474222903433e-05, "loss": 0.0477, "step": 2465500 }, { "epoch": 0.17, "learning_rate": 4.7224179182900695e-05, "loss": 0.0504, "step": 2466000 }, { "epoch": 0.17, "learning_rate": 4.722361726285933e-05, "loss": 0.0478, "step": 2466500 }, { "epoch": 0.17, "learning_rate": 4.722305421672569e-05, "loss": 0.0475, "step": 2467000 }, { "epoch": 0.17, "learning_rate": 4.722249117059206e-05, "loss": 0.0462, "step": 2467500 }, { "epoch": 0.17, "learning_rate": 4.722192812445842e-05, "loss": 0.0505, "step": 2468000 }, { "epoch": 0.17, "learning_rate": 4.722136620441706e-05, "loss": 0.0453, "step": 2468500 }, { "epoch": 0.17, "learning_rate": 4.7220803158283416e-05, "loss": 0.0467, "step": 2469000 }, { "epoch": 0.17, "learning_rate": 4.722024011214978e-05, "loss": 0.0498, "step": 2469500 }, { "epoch": 0.17, "learning_rate": 4.721967706601615e-05, "loss": 0.0474, "step": 2470000 }, { "epoch": 0.17, "learning_rate": 4.721911514597478e-05, "loss": 0.0462, "step": 2470500 }, { "epoch": 0.17, "learning_rate": 4.7218552099841147e-05, "loss": 0.0474, "step": 2471000 }, { "epoch": 0.17, "learning_rate": 4.7217989053707504e-05, "loss": 0.0447, "step": 2471500 }, { "epoch": 0.17, "learning_rate": 4.7217426007573875e-05, "loss": 0.0448, "step": 2472000 }, { "epoch": 0.17, "learning_rate": 4.721686296144024e-05, "loss": 0.0534, "step": 2472500 }, { "epoch": 0.17, "learning_rate": 4.721630104139887e-05, "loss": 0.0478, "step": 2473000 }, { "epoch": 0.17, "learning_rate": 4.7215737995265235e-05, "loss": 0.0466, "step": 2473500 }, { "epoch": 0.17, "learning_rate": 4.721517607522387e-05, "loss": 0.0448, "step": 2474000 }, { "epoch": 0.17, "learning_rate": 4.721461302909023e-05, "loss": 0.0481, "step": 2474500 }, { "epoch": 0.17, "learning_rate": 4.72140499829566e-05, "loss": 0.0456, "step": 2475000 }, { "epoch": 0.17, "learning_rate": 4.721348693682296e-05, "loss": 0.0459, "step": 2475500 }, { "epoch": 0.17, "learning_rate": 4.721292389068932e-05, "loss": 0.0457, "step": 2476000 }, { "epoch": 0.17, "learning_rate": 4.7212360844555694e-05, "loss": 0.0456, "step": 2476500 }, { "epoch": 0.17, "learning_rate": 4.721179779842205e-05, "loss": 0.0534, "step": 2477000 }, { "epoch": 0.17, "learning_rate": 4.721123475228842e-05, "loss": 0.0479, "step": 2477500 }, { "epoch": 0.17, "learning_rate": 4.7210671706154786e-05, "loss": 0.0453, "step": 2478000 }, { "epoch": 0.17, "learning_rate": 4.721010978611342e-05, "loss": 0.0395, "step": 2478500 }, { "epoch": 0.17, "learning_rate": 4.720954673997978e-05, "loss": 0.047, "step": 2479000 }, { "epoch": 0.17, "learning_rate": 4.7208983693846146e-05, "loss": 0.0445, "step": 2479500 }, { "epoch": 0.17, "learning_rate": 4.720842064771251e-05, "loss": 0.044, "step": 2480000 }, { "epoch": 0.17, "learning_rate": 4.7207857601578874e-05, "loss": 0.0459, "step": 2480500 }, { "epoch": 0.17, "learning_rate": 4.7207295681537506e-05, "loss": 0.0486, "step": 2481000 }, { "epoch": 0.17, "learning_rate": 4.720673263540387e-05, "loss": 0.0461, "step": 2481500 }, { "epoch": 0.17, "learning_rate": 4.7206169589270234e-05, "loss": 0.0495, "step": 2482000 }, { "epoch": 0.17, "learning_rate": 4.72056065431366e-05, "loss": 0.0507, "step": 2482500 }, { "epoch": 0.17, "learning_rate": 4.720504462309523e-05, "loss": 0.0495, "step": 2483000 }, { "epoch": 0.17, "learning_rate": 4.7204481576961594e-05, "loss": 0.0512, "step": 2483500 }, { "epoch": 0.17, "learning_rate": 4.7203918530827965e-05, "loss": 0.0487, "step": 2484000 }, { "epoch": 0.17, "learning_rate": 4.720335548469432e-05, "loss": 0.0496, "step": 2484500 }, { "epoch": 0.17, "learning_rate": 4.7202792438560686e-05, "loss": 0.0462, "step": 2485000 }, { "epoch": 0.17, "learning_rate": 4.720222939242706e-05, "loss": 0.0506, "step": 2485500 }, { "epoch": 0.17, "learning_rate": 4.720166634629342e-05, "loss": 0.0494, "step": 2486000 }, { "epoch": 0.17, "learning_rate": 4.7201103300159785e-05, "loss": 0.0458, "step": 2486500 }, { "epoch": 0.17, "learning_rate": 4.720054025402615e-05, "loss": 0.0455, "step": 2487000 }, { "epoch": 0.17, "learning_rate": 4.719997720789251e-05, "loss": 0.0488, "step": 2487500 }, { "epoch": 0.17, "learning_rate": 4.719941416175888e-05, "loss": 0.0475, "step": 2488000 }, { "epoch": 0.17, "learning_rate": 4.719885224171751e-05, "loss": 0.0495, "step": 2488500 }, { "epoch": 0.17, "learning_rate": 4.719828919558387e-05, "loss": 0.0465, "step": 2489000 }, { "epoch": 0.17, "learning_rate": 4.719772614945024e-05, "loss": 0.0447, "step": 2489500 }, { "epoch": 0.17, "learning_rate": 4.719716310331661e-05, "loss": 0.0469, "step": 2490000 }, { "epoch": 0.17, "learning_rate": 4.7196600057182965e-05, "loss": 0.0468, "step": 2490500 }, { "epoch": 0.17, "learning_rate": 4.719603701104933e-05, "loss": 0.0436, "step": 2491000 }, { "epoch": 0.17, "learning_rate": 4.71954739649157e-05, "loss": 0.0431, "step": 2491500 }, { "epoch": 0.17, "learning_rate": 4.7194912044874325e-05, "loss": 0.0469, "step": 2492000 }, { "epoch": 0.17, "learning_rate": 4.7194348998740696e-05, "loss": 0.0459, "step": 2492500 }, { "epoch": 0.17, "learning_rate": 4.7193785952607053e-05, "loss": 0.0492, "step": 2493000 }, { "epoch": 0.17, "learning_rate": 4.7193222906473424e-05, "loss": 0.0498, "step": 2493500 }, { "epoch": 0.17, "learning_rate": 4.719266098643205e-05, "loss": 0.0515, "step": 2494000 }, { "epoch": 0.17, "learning_rate": 4.719209794029842e-05, "loss": 0.0413, "step": 2494500 }, { "epoch": 0.17, "learning_rate": 4.7191534894164784e-05, "loss": 0.05, "step": 2495000 }, { "epoch": 0.17, "learning_rate": 4.719097184803115e-05, "loss": 0.0472, "step": 2495500 }, { "epoch": 0.17, "learning_rate": 4.719040992798978e-05, "loss": 0.0483, "step": 2496000 }, { "epoch": 0.17, "learning_rate": 4.7189846881856145e-05, "loss": 0.0462, "step": 2496500 }, { "epoch": 0.17, "learning_rate": 4.718928383572251e-05, "loss": 0.0465, "step": 2497000 }, { "epoch": 0.17, "learning_rate": 4.718872078958887e-05, "loss": 0.0456, "step": 2497500 }, { "epoch": 0.17, "learning_rate": 4.718815774345524e-05, "loss": 0.0441, "step": 2498000 }, { "epoch": 0.17, "learning_rate": 4.718759582341387e-05, "loss": 0.0456, "step": 2498500 }, { "epoch": 0.17, "learning_rate": 4.718703277728023e-05, "loss": 0.0479, "step": 2499000 }, { "epoch": 0.17, "learning_rate": 4.71864697311466e-05, "loss": 0.0475, "step": 2499500 }, { "epoch": 0.17, "learning_rate": 4.718590668501297e-05, "loss": 0.0458, "step": 2500000 }, { "epoch": 0.17, "learning_rate": 4.718534476497159e-05, "loss": 0.048, "step": 2500500 }, { "epoch": 0.17, "learning_rate": 4.7184781718837964e-05, "loss": 0.0471, "step": 2501000 }, { "epoch": 0.17, "learning_rate": 4.718421867270433e-05, "loss": 0.0463, "step": 2501500 }, { "epoch": 0.17, "learning_rate": 4.718365562657069e-05, "loss": 0.0463, "step": 2502000 }, { "epoch": 0.17, "learning_rate": 4.7183092580437056e-05, "loss": 0.0476, "step": 2502500 }, { "epoch": 0.17, "learning_rate": 4.718253066039569e-05, "loss": 0.0479, "step": 2503000 }, { "epoch": 0.17, "learning_rate": 4.718196761426205e-05, "loss": 0.047, "step": 2503500 }, { "epoch": 0.17, "learning_rate": 4.7181404568128416e-05, "loss": 0.046, "step": 2504000 }, { "epoch": 0.17, "learning_rate": 4.718084152199478e-05, "loss": 0.0452, "step": 2504500 }, { "epoch": 0.17, "learning_rate": 4.718027960195341e-05, "loss": 0.047, "step": 2505000 }, { "epoch": 0.17, "learning_rate": 4.7179716555819776e-05, "loss": 0.0497, "step": 2505500 }, { "epoch": 0.17, "learning_rate": 4.717915350968614e-05, "loss": 0.0468, "step": 2506000 }, { "epoch": 0.17, "learning_rate": 4.717859046355251e-05, "loss": 0.0463, "step": 2506500 }, { "epoch": 0.17, "learning_rate": 4.7178028543511136e-05, "loss": 0.0492, "step": 2507000 }, { "epoch": 0.17, "learning_rate": 4.717746549737751e-05, "loss": 0.0464, "step": 2507500 }, { "epoch": 0.17, "learning_rate": 4.7176902451243864e-05, "loss": 0.0499, "step": 2508000 }, { "epoch": 0.17, "learning_rate": 4.717633940511023e-05, "loss": 0.0455, "step": 2508500 }, { "epoch": 0.17, "learning_rate": 4.71757763589766e-05, "loss": 0.0458, "step": 2509000 }, { "epoch": 0.17, "learning_rate": 4.7175214438935225e-05, "loss": 0.0502, "step": 2509500 }, { "epoch": 0.17, "learning_rate": 4.7174651392801595e-05, "loss": 0.0456, "step": 2510000 }, { "epoch": 0.17, "learning_rate": 4.717408834666795e-05, "loss": 0.0511, "step": 2510500 }, { "epoch": 0.17, "learning_rate": 4.7173525300534323e-05, "loss": 0.0484, "step": 2511000 }, { "epoch": 0.17, "learning_rate": 4.717296225440069e-05, "loss": 0.0459, "step": 2511500 }, { "epoch": 0.17, "learning_rate": 4.717240033435932e-05, "loss": 0.0458, "step": 2512000 }, { "epoch": 0.17, "learning_rate": 4.7171837288225684e-05, "loss": 0.0461, "step": 2512500 }, { "epoch": 0.17, "learning_rate": 4.717127424209205e-05, "loss": 0.0466, "step": 2513000 }, { "epoch": 0.17, "learning_rate": 4.717071119595841e-05, "loss": 0.0462, "step": 2513500 }, { "epoch": 0.17, "learning_rate": 4.7170148149824776e-05, "loss": 0.0475, "step": 2514000 }, { "epoch": 0.17, "learning_rate": 4.7169585103691146e-05, "loss": 0.0426, "step": 2514500 }, { "epoch": 0.17, "learning_rate": 4.716902430974204e-05, "loss": 0.0444, "step": 2515000 }, { "epoch": 0.17, "learning_rate": 4.7168461263608404e-05, "loss": 0.0452, "step": 2515500 }, { "epoch": 0.17, "learning_rate": 4.716789821747477e-05, "loss": 0.0479, "step": 2516000 }, { "epoch": 0.17, "learning_rate": 4.716733517134113e-05, "loss": 0.0467, "step": 2516500 }, { "epoch": 0.17, "learning_rate": 4.7166772125207496e-05, "loss": 0.0478, "step": 2517000 }, { "epoch": 0.17, "learning_rate": 4.716620907907387e-05, "loss": 0.0511, "step": 2517500 }, { "epoch": 0.17, "learning_rate": 4.716564603294023e-05, "loss": 0.0464, "step": 2518000 }, { "epoch": 0.17, "learning_rate": 4.716508298680659e-05, "loss": 0.0437, "step": 2518500 }, { "epoch": 0.17, "learning_rate": 4.716452106676523e-05, "loss": 0.0454, "step": 2519000 }, { "epoch": 0.17, "learning_rate": 4.716395914672386e-05, "loss": 0.0436, "step": 2519500 }, { "epoch": 0.17, "learning_rate": 4.716339610059022e-05, "loss": 0.0449, "step": 2520000 }, { "epoch": 0.17, "learning_rate": 4.716283305445659e-05, "loss": 0.042, "step": 2520500 }, { "epoch": 0.17, "learning_rate": 4.716227000832295e-05, "loss": 0.0431, "step": 2521000 }, { "epoch": 0.17, "learning_rate": 4.7161706962189315e-05, "loss": 0.0505, "step": 2521500 }, { "epoch": 0.17, "learning_rate": 4.716114391605568e-05, "loss": 0.048, "step": 2522000 }, { "epoch": 0.17, "learning_rate": 4.716058086992204e-05, "loss": 0.047, "step": 2522500 }, { "epoch": 0.17, "learning_rate": 4.716001782378841e-05, "loss": 0.0483, "step": 2523000 }, { "epoch": 0.17, "learning_rate": 4.715945590374704e-05, "loss": 0.0507, "step": 2523500 }, { "epoch": 0.17, "learning_rate": 4.715889285761341e-05, "loss": 0.044, "step": 2524000 }, { "epoch": 0.17, "learning_rate": 4.715832981147977e-05, "loss": 0.0462, "step": 2524500 }, { "epoch": 0.17, "learning_rate": 4.7157767891438406e-05, "loss": 0.0488, "step": 2525000 }, { "epoch": 0.17, "learning_rate": 4.7157204845304764e-05, "loss": 0.0463, "step": 2525500 }, { "epoch": 0.17, "learning_rate": 4.715664179917113e-05, "loss": 0.0442, "step": 2526000 }, { "epoch": 0.17, "learning_rate": 4.71560787530375e-05, "loss": 0.047, "step": 2526500 }, { "epoch": 0.17, "learning_rate": 4.7155515706903856e-05, "loss": 0.0477, "step": 2527000 }, { "epoch": 0.17, "learning_rate": 4.7154952660770226e-05, "loss": 0.0473, "step": 2527500 }, { "epoch": 0.17, "learning_rate": 4.715438961463659e-05, "loss": 0.0444, "step": 2528000 }, { "epoch": 0.17, "learning_rate": 4.7153826568502954e-05, "loss": 0.0451, "step": 2528500 }, { "epoch": 0.17, "learning_rate": 4.715326352236932e-05, "loss": 0.0483, "step": 2529000 }, { "epoch": 0.17, "learning_rate": 4.715270160232795e-05, "loss": 0.0457, "step": 2529500 }, { "epoch": 0.17, "learning_rate": 4.7152138556194315e-05, "loss": 0.0493, "step": 2530000 }, { "epoch": 0.17, "learning_rate": 4.715157551006068e-05, "loss": 0.0465, "step": 2530500 }, { "epoch": 0.17, "learning_rate": 4.715101246392705e-05, "loss": 0.0459, "step": 2531000 }, { "epoch": 0.17, "learning_rate": 4.7150450543885675e-05, "loss": 0.0448, "step": 2531500 }, { "epoch": 0.17, "learning_rate": 4.7149887497752045e-05, "loss": 0.0464, "step": 2532000 }, { "epoch": 0.17, "learning_rate": 4.71493244516184e-05, "loss": 0.0471, "step": 2532500 }, { "epoch": 0.17, "learning_rate": 4.7148761405484773e-05, "loss": 0.0474, "step": 2533000 }, { "epoch": 0.17, "learning_rate": 4.71481994854434e-05, "loss": 0.0462, "step": 2533500 }, { "epoch": 0.17, "learning_rate": 4.714763643930977e-05, "loss": 0.0444, "step": 2534000 }, { "epoch": 0.17, "learning_rate": 4.7147073393176134e-05, "loss": 0.0474, "step": 2534500 }, { "epoch": 0.17, "learning_rate": 4.714651034704249e-05, "loss": 0.0492, "step": 2535000 }, { "epoch": 0.17, "learning_rate": 4.714594730090886e-05, "loss": 0.0512, "step": 2535500 }, { "epoch": 0.17, "learning_rate": 4.7145384254775226e-05, "loss": 0.0488, "step": 2536000 }, { "epoch": 0.17, "learning_rate": 4.714482120864159e-05, "loss": 0.0467, "step": 2536500 }, { "epoch": 0.17, "learning_rate": 4.714425928860022e-05, "loss": 0.0452, "step": 2537000 }, { "epoch": 0.17, "learning_rate": 4.7143696242466586e-05, "loss": 0.0452, "step": 2537500 }, { "epoch": 0.17, "learning_rate": 4.714313319633295e-05, "loss": 0.0442, "step": 2538000 }, { "epoch": 0.17, "learning_rate": 4.7142570150199314e-05, "loss": 0.0481, "step": 2538500 }, { "epoch": 0.17, "learning_rate": 4.714200710406568e-05, "loss": 0.0456, "step": 2539000 }, { "epoch": 0.17, "learning_rate": 4.714144518402431e-05, "loss": 0.0433, "step": 2539500 }, { "epoch": 0.17, "learning_rate": 4.7140882137890674e-05, "loss": 0.0452, "step": 2540000 }, { "epoch": 0.17, "learning_rate": 4.714031909175704e-05, "loss": 0.0456, "step": 2540500 }, { "epoch": 0.17, "learning_rate": 4.713975604562341e-05, "loss": 0.0431, "step": 2541000 }, { "epoch": 0.17, "learning_rate": 4.7139192999489766e-05, "loss": 0.0476, "step": 2541500 }, { "epoch": 0.17, "learning_rate": 4.7138631079448405e-05, "loss": 0.0508, "step": 2542000 }, { "epoch": 0.17, "learning_rate": 4.713806803331476e-05, "loss": 0.0474, "step": 2542500 }, { "epoch": 0.17, "learning_rate": 4.713750498718113e-05, "loss": 0.0492, "step": 2543000 }, { "epoch": 0.17, "learning_rate": 4.71369419410475e-05, "loss": 0.0472, "step": 2543500 }, { "epoch": 0.17, "learning_rate": 4.713638002100613e-05, "loss": 0.0475, "step": 2544000 }, { "epoch": 0.17, "learning_rate": 4.713581697487249e-05, "loss": 0.0485, "step": 2544500 }, { "epoch": 0.17, "learning_rate": 4.713525392873886e-05, "loss": 0.042, "step": 2545000 }, { "epoch": 0.17, "learning_rate": 4.713469200869749e-05, "loss": 0.048, "step": 2545500 }, { "epoch": 0.17, "learning_rate": 4.7134128962563853e-05, "loss": 0.047, "step": 2546000 }, { "epoch": 0.17, "learning_rate": 4.713356591643022e-05, "loss": 0.0442, "step": 2546500 }, { "epoch": 0.17, "learning_rate": 4.713300287029658e-05, "loss": 0.0462, "step": 2547000 }, { "epoch": 0.17, "learning_rate": 4.713243982416295e-05, "loss": 0.0452, "step": 2547500 }, { "epoch": 0.17, "learning_rate": 4.713187677802931e-05, "loss": 0.048, "step": 2548000 }, { "epoch": 0.17, "learning_rate": 4.7131313731895673e-05, "loss": 0.0462, "step": 2548500 }, { "epoch": 0.17, "learning_rate": 4.7130750685762044e-05, "loss": 0.0452, "step": 2549000 }, { "epoch": 0.17, "learning_rate": 4.71301876396284e-05, "loss": 0.05, "step": 2549500 }, { "epoch": 0.17, "learning_rate": 4.712962459349477e-05, "loss": 0.0461, "step": 2550000 }, { "epoch": 0.17, "learning_rate": 4.71290626734534e-05, "loss": 0.0454, "step": 2550500 }, { "epoch": 0.17, "learning_rate": 4.712849962731977e-05, "loss": 0.046, "step": 2551000 }, { "epoch": 0.17, "learning_rate": 4.712793658118613e-05, "loss": 0.045, "step": 2551500 }, { "epoch": 0.17, "learning_rate": 4.7127373535052496e-05, "loss": 0.0484, "step": 2552000 }, { "epoch": 0.17, "learning_rate": 4.712681161501113e-05, "loss": 0.047, "step": 2552500 }, { "epoch": 0.17, "learning_rate": 4.712624856887749e-05, "loss": 0.0427, "step": 2553000 }, { "epoch": 0.17, "learning_rate": 4.712568552274386e-05, "loss": 0.0504, "step": 2553500 }, { "epoch": 0.17, "learning_rate": 4.712512247661022e-05, "loss": 0.0416, "step": 2554000 }, { "epoch": 0.17, "learning_rate": 4.712456055656885e-05, "loss": 0.0424, "step": 2554500 }, { "epoch": 0.17, "learning_rate": 4.7123998636527485e-05, "loss": 0.0461, "step": 2555000 }, { "epoch": 0.17, "learning_rate": 4.712343559039385e-05, "loss": 0.0474, "step": 2555500 }, { "epoch": 0.17, "learning_rate": 4.712287254426021e-05, "loss": 0.0445, "step": 2556000 }, { "epoch": 0.17, "learning_rate": 4.712230949812658e-05, "loss": 0.0458, "step": 2556500 }, { "epoch": 0.17, "learning_rate": 4.712174645199294e-05, "loss": 0.0423, "step": 2557000 }, { "epoch": 0.17, "learning_rate": 4.712118340585931e-05, "loss": 0.0428, "step": 2557500 }, { "epoch": 0.17, "learning_rate": 4.712062035972567e-05, "loss": 0.0477, "step": 2558000 }, { "epoch": 0.17, "learning_rate": 4.712005731359204e-05, "loss": 0.0438, "step": 2558500 }, { "epoch": 0.17, "learning_rate": 4.7119494267458404e-05, "loss": 0.0477, "step": 2559000 }, { "epoch": 0.17, "learning_rate": 4.7118932347417036e-05, "loss": 0.0481, "step": 2559500 }, { "epoch": 0.17, "learning_rate": 4.71183693012834e-05, "loss": 0.0457, "step": 2560000 }, { "epoch": 0.17, "learning_rate": 4.7117806255149764e-05, "loss": 0.0485, "step": 2560500 }, { "epoch": 0.17, "learning_rate": 4.711724320901613e-05, "loss": 0.0478, "step": 2561000 }, { "epoch": 0.17, "learning_rate": 4.711668128897476e-05, "loss": 0.0469, "step": 2561500 }, { "epoch": 0.17, "learning_rate": 4.711611936893339e-05, "loss": 0.0453, "step": 2562000 }, { "epoch": 0.17, "learning_rate": 4.7115556322799756e-05, "loss": 0.0429, "step": 2562500 }, { "epoch": 0.17, "learning_rate": 4.711499327666612e-05, "loss": 0.0453, "step": 2563000 }, { "epoch": 0.17, "learning_rate": 4.7114430230532484e-05, "loss": 0.0446, "step": 2563500 }, { "epoch": 0.17, "learning_rate": 4.7113867184398855e-05, "loss": 0.0449, "step": 2564000 }, { "epoch": 0.17, "learning_rate": 4.711330413826521e-05, "loss": 0.0473, "step": 2564500 }, { "epoch": 0.17, "learning_rate": 4.7112741092131576e-05, "loss": 0.0434, "step": 2565000 }, { "epoch": 0.17, "learning_rate": 4.711217804599795e-05, "loss": 0.0465, "step": 2565500 }, { "epoch": 0.17, "learning_rate": 4.7111614999864304e-05, "loss": 0.0462, "step": 2566000 }, { "epoch": 0.17, "learning_rate": 4.7111051953730675e-05, "loss": 0.0478, "step": 2566500 }, { "epoch": 0.17, "learning_rate": 4.71104900336893e-05, "loss": 0.0454, "step": 2567000 }, { "epoch": 0.17, "learning_rate": 4.710992698755567e-05, "loss": 0.0487, "step": 2567500 }, { "epoch": 0.17, "learning_rate": 4.7109363941422035e-05, "loss": 0.0489, "step": 2568000 }, { "epoch": 0.17, "learning_rate": 4.71088008952884e-05, "loss": 0.0486, "step": 2568500 }, { "epoch": 0.17, "learning_rate": 4.710823897524703e-05, "loss": 0.0453, "step": 2569000 }, { "epoch": 0.17, "learning_rate": 4.7107675929113396e-05, "loss": 0.0475, "step": 2569500 }, { "epoch": 0.17, "learning_rate": 4.710711288297976e-05, "loss": 0.0483, "step": 2570000 }, { "epoch": 0.17, "learning_rate": 4.7106549836846124e-05, "loss": 0.045, "step": 2570500 }, { "epoch": 0.17, "learning_rate": 4.710598679071249e-05, "loss": 0.0452, "step": 2571000 }, { "epoch": 0.17, "learning_rate": 4.710542374457885e-05, "loss": 0.0476, "step": 2571500 }, { "epoch": 0.17, "learning_rate": 4.7104861824537484e-05, "loss": 0.0472, "step": 2572000 }, { "epoch": 0.17, "learning_rate": 4.710429877840385e-05, "loss": 0.0479, "step": 2572500 }, { "epoch": 0.17, "learning_rate": 4.710373573227022e-05, "loss": 0.0471, "step": 2573000 }, { "epoch": 0.17, "learning_rate": 4.7103172686136576e-05, "loss": 0.0443, "step": 2573500 }, { "epoch": 0.17, "learning_rate": 4.710260964000294e-05, "loss": 0.0508, "step": 2574000 }, { "epoch": 0.17, "learning_rate": 4.710204771996157e-05, "loss": 0.0459, "step": 2574500 }, { "epoch": 0.17, "learning_rate": 4.7101484673827936e-05, "loss": 0.0454, "step": 2575000 }, { "epoch": 0.17, "learning_rate": 4.710092162769431e-05, "loss": 0.0499, "step": 2575500 }, { "epoch": 0.17, "learning_rate": 4.710035858156067e-05, "loss": 0.0415, "step": 2576000 }, { "epoch": 0.17, "learning_rate": 4.70997966615193e-05, "loss": 0.0428, "step": 2576500 }, { "epoch": 0.17, "learning_rate": 4.709923361538567e-05, "loss": 0.0438, "step": 2577000 }, { "epoch": 0.17, "learning_rate": 4.709867056925203e-05, "loss": 0.0433, "step": 2577500 }, { "epoch": 0.17, "learning_rate": 4.7098107523118395e-05, "loss": 0.0499, "step": 2578000 }, { "epoch": 0.17, "learning_rate": 4.709754447698476e-05, "loss": 0.0459, "step": 2578500 }, { "epoch": 0.17, "learning_rate": 4.709698255694339e-05, "loss": 0.0475, "step": 2579000 }, { "epoch": 0.17, "learning_rate": 4.7096419510809755e-05, "loss": 0.0468, "step": 2579500 }, { "epoch": 0.17, "learning_rate": 4.709585646467612e-05, "loss": 0.0479, "step": 2580000 }, { "epoch": 0.17, "learning_rate": 4.709529341854248e-05, "loss": 0.0471, "step": 2580500 }, { "epoch": 0.17, "learning_rate": 4.7094731498501115e-05, "loss": 0.0443, "step": 2581000 }, { "epoch": 0.17, "learning_rate": 4.709416845236748e-05, "loss": 0.0475, "step": 2581500 }, { "epoch": 0.17, "learning_rate": 4.709360540623385e-05, "loss": 0.048, "step": 2582000 }, { "epoch": 0.17, "learning_rate": 4.709304236010021e-05, "loss": 0.0411, "step": 2582500 }, { "epoch": 0.17, "learning_rate": 4.709247931396658e-05, "loss": 0.0492, "step": 2583000 }, { "epoch": 0.17, "learning_rate": 4.7091917393925204e-05, "loss": 0.0414, "step": 2583500 }, { "epoch": 0.17, "learning_rate": 4.7091354347791574e-05, "loss": 0.049, "step": 2584000 }, { "epoch": 0.17, "learning_rate": 4.709079130165794e-05, "loss": 0.0463, "step": 2584500 }, { "epoch": 0.17, "learning_rate": 4.70902282555243e-05, "loss": 0.0422, "step": 2585000 }, { "epoch": 0.17, "learning_rate": 4.7089665209390666e-05, "loss": 0.0474, "step": 2585500 }, { "epoch": 0.17, "learning_rate": 4.70891032893493e-05, "loss": 0.0454, "step": 2586000 }, { "epoch": 0.17, "learning_rate": 4.708854024321566e-05, "loss": 0.0458, "step": 2586500 }, { "epoch": 0.17, "learning_rate": 4.7087977197082026e-05, "loss": 0.0445, "step": 2587000 }, { "epoch": 0.17, "learning_rate": 4.708741415094839e-05, "loss": 0.0476, "step": 2587500 }, { "epoch": 0.17, "learning_rate": 4.7086851104814754e-05, "loss": 0.0452, "step": 2588000 }, { "epoch": 0.17, "learning_rate": 4.708628918477339e-05, "loss": 0.0466, "step": 2588500 }, { "epoch": 0.17, "learning_rate": 4.708572613863975e-05, "loss": 0.047, "step": 2589000 }, { "epoch": 0.17, "learning_rate": 4.708516309250612e-05, "loss": 0.0427, "step": 2589500 }, { "epoch": 0.17, "learning_rate": 4.7084600046372485e-05, "loss": 0.0477, "step": 2590000 }, { "epoch": 0.18, "learning_rate": 4.708403700023884e-05, "loss": 0.0428, "step": 2590500 }, { "epoch": 0.18, "learning_rate": 4.7083475080197475e-05, "loss": 0.0445, "step": 2591000 }, { "epoch": 0.18, "learning_rate": 4.708291203406384e-05, "loss": 0.046, "step": 2591500 }, { "epoch": 0.18, "learning_rate": 4.708234898793021e-05, "loss": 0.0454, "step": 2592000 }, { "epoch": 0.18, "learning_rate": 4.7081785941796574e-05, "loss": 0.0453, "step": 2592500 }, { "epoch": 0.18, "learning_rate": 4.7081224021755206e-05, "loss": 0.0429, "step": 2593000 }, { "epoch": 0.18, "learning_rate": 4.708066097562157e-05, "loss": 0.0418, "step": 2593500 }, { "epoch": 0.18, "learning_rate": 4.7080097929487934e-05, "loss": 0.0478, "step": 2594000 }, { "epoch": 0.18, "learning_rate": 4.70795348833543e-05, "loss": 0.0454, "step": 2594500 }, { "epoch": 0.18, "learning_rate": 4.707897183722066e-05, "loss": 0.0451, "step": 2595000 }, { "epoch": 0.18, "learning_rate": 4.7078409917179294e-05, "loss": 0.0468, "step": 2595500 }, { "epoch": 0.18, "learning_rate": 4.707784687104566e-05, "loss": 0.0503, "step": 2596000 }, { "epoch": 0.18, "learning_rate": 4.707728382491202e-05, "loss": 0.0453, "step": 2596500 }, { "epoch": 0.18, "learning_rate": 4.7076720778778386e-05, "loss": 0.0468, "step": 2597000 }, { "epoch": 0.18, "learning_rate": 4.707615773264476e-05, "loss": 0.0464, "step": 2597500 }, { "epoch": 0.18, "learning_rate": 4.707559581260338e-05, "loss": 0.0466, "step": 2598000 }, { "epoch": 0.18, "learning_rate": 4.707503276646975e-05, "loss": 0.0437, "step": 2598500 }, { "epoch": 0.18, "learning_rate": 4.707446972033611e-05, "loss": 0.0446, "step": 2599000 }, { "epoch": 0.18, "learning_rate": 4.707390667420248e-05, "loss": 0.0437, "step": 2599500 }, { "epoch": 0.18, "learning_rate": 4.7073344754161106e-05, "loss": 0.0473, "step": 2600000 }, { "epoch": 0.18, "learning_rate": 4.707278170802748e-05, "loss": 0.0465, "step": 2600500 }, { "epoch": 0.18, "learning_rate": 4.707221866189384e-05, "loss": 0.0476, "step": 2601000 }, { "epoch": 0.18, "learning_rate": 4.70716556157602e-05, "loss": 0.0442, "step": 2601500 }, { "epoch": 0.18, "learning_rate": 4.707109256962657e-05, "loss": 0.0477, "step": 2602000 }, { "epoch": 0.18, "learning_rate": 4.707052952349293e-05, "loss": 0.05, "step": 2602500 }, { "epoch": 0.18, "learning_rate": 4.7069967603451565e-05, "loss": 0.0446, "step": 2603000 }, { "epoch": 0.18, "learning_rate": 4.70694056834102e-05, "loss": 0.052, "step": 2603500 }, { "epoch": 0.18, "learning_rate": 4.706884263727656e-05, "loss": 0.0472, "step": 2604000 }, { "epoch": 0.18, "learning_rate": 4.7068279591142926e-05, "loss": 0.0447, "step": 2604500 }, { "epoch": 0.18, "learning_rate": 4.706771654500929e-05, "loss": 0.0457, "step": 2605000 }, { "epoch": 0.18, "learning_rate": 4.7067153498875654e-05, "loss": 0.0438, "step": 2605500 }, { "epoch": 0.18, "learning_rate": 4.706659045274202e-05, "loss": 0.0458, "step": 2606000 }, { "epoch": 0.18, "learning_rate": 4.706602740660839e-05, "loss": 0.0474, "step": 2606500 }, { "epoch": 0.18, "learning_rate": 4.7065464360474746e-05, "loss": 0.0433, "step": 2607000 }, { "epoch": 0.18, "learning_rate": 4.7064901314341116e-05, "loss": 0.0433, "step": 2607500 }, { "epoch": 0.18, "learning_rate": 4.706433939429974e-05, "loss": 0.0472, "step": 2608000 }, { "epoch": 0.18, "learning_rate": 4.706377634816611e-05, "loss": 0.0474, "step": 2608500 }, { "epoch": 0.18, "learning_rate": 4.7063213302032477e-05, "loss": 0.0437, "step": 2609000 }, { "epoch": 0.18, "learning_rate": 4.706265025589884e-05, "loss": 0.0442, "step": 2609500 }, { "epoch": 0.18, "learning_rate": 4.7062087209765205e-05, "loss": 0.0493, "step": 2610000 }, { "epoch": 0.18, "learning_rate": 4.706152528972384e-05, "loss": 0.046, "step": 2610500 }, { "epoch": 0.18, "learning_rate": 4.70609622435902e-05, "loss": 0.0457, "step": 2611000 }, { "epoch": 0.18, "learning_rate": 4.7060399197456565e-05, "loss": 0.0482, "step": 2611500 }, { "epoch": 0.18, "learning_rate": 4.705983615132293e-05, "loss": 0.0487, "step": 2612000 }, { "epoch": 0.18, "learning_rate": 4.705927423128156e-05, "loss": 0.0436, "step": 2612500 }, { "epoch": 0.18, "learning_rate": 4.7058711185147925e-05, "loss": 0.0422, "step": 2613000 }, { "epoch": 0.18, "learning_rate": 4.705814813901429e-05, "loss": 0.0467, "step": 2613500 }, { "epoch": 0.18, "learning_rate": 4.705758509288066e-05, "loss": 0.0511, "step": 2614000 }, { "epoch": 0.18, "learning_rate": 4.705702204674702e-05, "loss": 0.0457, "step": 2614500 }, { "epoch": 0.18, "learning_rate": 4.7056460126705656e-05, "loss": 0.0461, "step": 2615000 }, { "epoch": 0.18, "learning_rate": 4.705589708057201e-05, "loss": 0.0473, "step": 2615500 }, { "epoch": 0.18, "learning_rate": 4.705533516053065e-05, "loss": 0.0453, "step": 2616000 }, { "epoch": 0.18, "learning_rate": 4.705477211439701e-05, "loss": 0.045, "step": 2616500 }, { "epoch": 0.18, "learning_rate": 4.705420906826338e-05, "loss": 0.0456, "step": 2617000 }, { "epoch": 0.18, "learning_rate": 4.7053646022129744e-05, "loss": 0.0492, "step": 2617500 }, { "epoch": 0.18, "learning_rate": 4.70530829759961e-05, "loss": 0.0415, "step": 2618000 }, { "epoch": 0.18, "learning_rate": 4.705251992986247e-05, "loss": 0.046, "step": 2618500 }, { "epoch": 0.18, "learning_rate": 4.7051956883728836e-05, "loss": 0.0456, "step": 2619000 }, { "epoch": 0.18, "learning_rate": 4.705139496368747e-05, "loss": 0.0457, "step": 2619500 }, { "epoch": 0.18, "learning_rate": 4.705083191755383e-05, "loss": 0.045, "step": 2620000 }, { "epoch": 0.18, "learning_rate": 4.7050268871420196e-05, "loss": 0.0459, "step": 2620500 }, { "epoch": 0.18, "learning_rate": 4.704970582528656e-05, "loss": 0.0464, "step": 2621000 }, { "epoch": 0.18, "learning_rate": 4.7049142779152924e-05, "loss": 0.0453, "step": 2621500 }, { "epoch": 0.18, "learning_rate": 4.7048579733019295e-05, "loss": 0.0466, "step": 2622000 }, { "epoch": 0.18, "learning_rate": 4.704801668688565e-05, "loss": 0.0455, "step": 2622500 }, { "epoch": 0.18, "learning_rate": 4.704745364075202e-05, "loss": 0.0433, "step": 2623000 }, { "epoch": 0.18, "learning_rate": 4.704689059461839e-05, "loss": 0.0469, "step": 2623500 }, { "epoch": 0.18, "learning_rate": 4.704632867457702e-05, "loss": 0.0439, "step": 2624000 }, { "epoch": 0.18, "learning_rate": 4.704576562844338e-05, "loss": 0.0433, "step": 2624500 }, { "epoch": 0.18, "learning_rate": 4.704520258230975e-05, "loss": 0.0477, "step": 2625000 }, { "epoch": 0.18, "learning_rate": 4.704463953617611e-05, "loss": 0.0485, "step": 2625500 }, { "epoch": 0.18, "learning_rate": 4.7044077616134743e-05, "loss": 0.0454, "step": 2626000 }, { "epoch": 0.18, "learning_rate": 4.704351457000111e-05, "loss": 0.0499, "step": 2626500 }, { "epoch": 0.18, "learning_rate": 4.704295152386747e-05, "loss": 0.0449, "step": 2627000 }, { "epoch": 0.18, "learning_rate": 4.7042388477733835e-05, "loss": 0.0427, "step": 2627500 }, { "epoch": 0.18, "learning_rate": 4.70418254316002e-05, "loss": 0.0441, "step": 2628000 }, { "epoch": 0.18, "learning_rate": 4.7041262385466563e-05, "loss": 0.0428, "step": 2628500 }, { "epoch": 0.18, "learning_rate": 4.704069933933293e-05, "loss": 0.0449, "step": 2629000 }, { "epoch": 0.18, "learning_rate": 4.7040137419291566e-05, "loss": 0.0458, "step": 2629500 }, { "epoch": 0.18, "learning_rate": 4.7039574373157924e-05, "loss": 0.0455, "step": 2630000 }, { "epoch": 0.18, "learning_rate": 4.703901132702429e-05, "loss": 0.0483, "step": 2630500 }, { "epoch": 0.18, "learning_rate": 4.703844828089066e-05, "loss": 0.0453, "step": 2631000 }, { "epoch": 0.18, "learning_rate": 4.7037886360849284e-05, "loss": 0.0489, "step": 2631500 }, { "epoch": 0.18, "learning_rate": 4.7037323314715655e-05, "loss": 0.048, "step": 2632000 }, { "epoch": 0.18, "learning_rate": 4.703676026858201e-05, "loss": 0.0461, "step": 2632500 }, { "epoch": 0.18, "learning_rate": 4.703619722244838e-05, "loss": 0.0454, "step": 2633000 }, { "epoch": 0.18, "learning_rate": 4.703563417631475e-05, "loss": 0.0472, "step": 2633500 }, { "epoch": 0.18, "learning_rate": 4.703507225627338e-05, "loss": 0.0433, "step": 2634000 }, { "epoch": 0.18, "learning_rate": 4.703450921013974e-05, "loss": 0.0459, "step": 2634500 }, { "epoch": 0.18, "learning_rate": 4.703394616400611e-05, "loss": 0.0447, "step": 2635000 }, { "epoch": 0.18, "learning_rate": 4.703338311787247e-05, "loss": 0.0441, "step": 2635500 }, { "epoch": 0.18, "learning_rate": 4.70328211978311e-05, "loss": 0.0454, "step": 2636000 }, { "epoch": 0.18, "learning_rate": 4.703225815169747e-05, "loss": 0.0502, "step": 2636500 }, { "epoch": 0.18, "learning_rate": 4.703169510556383e-05, "loss": 0.0459, "step": 2637000 }, { "epoch": 0.18, "learning_rate": 4.70311320594302e-05, "loss": 0.0458, "step": 2637500 }, { "epoch": 0.18, "learning_rate": 4.703056901329656e-05, "loss": 0.0462, "step": 2638000 }, { "epoch": 0.18, "learning_rate": 4.70300070932552e-05, "loss": 0.0463, "step": 2638500 }, { "epoch": 0.18, "learning_rate": 4.7029444047121555e-05, "loss": 0.0419, "step": 2639000 }, { "epoch": 0.18, "learning_rate": 4.7028881000987926e-05, "loss": 0.0447, "step": 2639500 }, { "epoch": 0.18, "learning_rate": 4.702831795485429e-05, "loss": 0.0484, "step": 2640000 }, { "epoch": 0.18, "learning_rate": 4.702775490872065e-05, "loss": 0.0433, "step": 2640500 }, { "epoch": 0.18, "learning_rate": 4.7027192988679286e-05, "loss": 0.0466, "step": 2641000 }, { "epoch": 0.18, "learning_rate": 4.702662994254565e-05, "loss": 0.0474, "step": 2641500 }, { "epoch": 0.18, "learning_rate": 4.7026066896412014e-05, "loss": 0.0424, "step": 2642000 }, { "epoch": 0.18, "learning_rate": 4.702550385027838e-05, "loss": 0.0437, "step": 2642500 }, { "epoch": 0.18, "learning_rate": 4.702494080414474e-05, "loss": 0.0463, "step": 2643000 }, { "epoch": 0.18, "learning_rate": 4.7024377758011106e-05, "loss": 0.0456, "step": 2643500 }, { "epoch": 0.18, "learning_rate": 4.702381583796974e-05, "loss": 0.0509, "step": 2644000 }, { "epoch": 0.18, "learning_rate": 4.702325391792837e-05, "loss": 0.047, "step": 2644500 }, { "epoch": 0.18, "learning_rate": 4.7022690871794735e-05, "loss": 0.0503, "step": 2645000 }, { "epoch": 0.18, "learning_rate": 4.70221278256611e-05, "loss": 0.0428, "step": 2645500 }, { "epoch": 0.18, "learning_rate": 4.702156477952747e-05, "loss": 0.0518, "step": 2646000 }, { "epoch": 0.18, "learning_rate": 4.702100173339383e-05, "loss": 0.0465, "step": 2646500 }, { "epoch": 0.18, "learning_rate": 4.702043868726019e-05, "loss": 0.0429, "step": 2647000 }, { "epoch": 0.18, "learning_rate": 4.701987564112656e-05, "loss": 0.0468, "step": 2647500 }, { "epoch": 0.18, "learning_rate": 4.7019312594992925e-05, "loss": 0.0516, "step": 2648000 }, { "epoch": 0.18, "learning_rate": 4.701874954885929e-05, "loss": 0.0461, "step": 2648500 }, { "epoch": 0.18, "learning_rate": 4.701818650272565e-05, "loss": 0.0444, "step": 2649000 }, { "epoch": 0.18, "learning_rate": 4.701762345659202e-05, "loss": 0.0488, "step": 2649500 }, { "epoch": 0.18, "learning_rate": 4.701706041045838e-05, "loss": 0.0438, "step": 2650000 }, { "epoch": 0.18, "learning_rate": 4.7016497364324745e-05, "loss": 0.0489, "step": 2650500 }, { "epoch": 0.18, "learning_rate": 4.7015936570375646e-05, "loss": 0.0473, "step": 2651000 }, { "epoch": 0.18, "learning_rate": 4.701537352424201e-05, "loss": 0.0466, "step": 2651500 }, { "epoch": 0.18, "learning_rate": 4.701481160420064e-05, "loss": 0.0475, "step": 2652000 }, { "epoch": 0.18, "learning_rate": 4.7014248558067006e-05, "loss": 0.0408, "step": 2652500 }, { "epoch": 0.18, "learning_rate": 4.701368551193337e-05, "loss": 0.0441, "step": 2653000 }, { "epoch": 0.18, "learning_rate": 4.7013122465799734e-05, "loss": 0.042, "step": 2653500 }, { "epoch": 0.18, "learning_rate": 4.7012559419666105e-05, "loss": 0.0441, "step": 2654000 }, { "epoch": 0.18, "learning_rate": 4.701199637353246e-05, "loss": 0.0441, "step": 2654500 }, { "epoch": 0.18, "learning_rate": 4.701143332739883e-05, "loss": 0.046, "step": 2655000 }, { "epoch": 0.18, "learning_rate": 4.70108702812652e-05, "loss": 0.047, "step": 2655500 }, { "epoch": 0.18, "learning_rate": 4.701030836122383e-05, "loss": 0.0467, "step": 2656000 }, { "epoch": 0.18, "learning_rate": 4.700974531509019e-05, "loss": 0.0507, "step": 2656500 }, { "epoch": 0.18, "learning_rate": 4.700918226895655e-05, "loss": 0.0438, "step": 2657000 }, { "epoch": 0.18, "learning_rate": 4.700861922282292e-05, "loss": 0.0474, "step": 2657500 }, { "epoch": 0.18, "learning_rate": 4.7008056176689285e-05, "loss": 0.0466, "step": 2658000 }, { "epoch": 0.18, "learning_rate": 4.700749425664792e-05, "loss": 0.0497, "step": 2658500 }, { "epoch": 0.18, "learning_rate": 4.700693121051428e-05, "loss": 0.0442, "step": 2659000 }, { "epoch": 0.18, "learning_rate": 4.7006368164380645e-05, "loss": 0.0472, "step": 2659500 }, { "epoch": 0.18, "learning_rate": 4.700580511824701e-05, "loss": 0.0438, "step": 2660000 }, { "epoch": 0.18, "learning_rate": 4.700524207211337e-05, "loss": 0.0431, "step": 2660500 }, { "epoch": 0.18, "learning_rate": 4.700467902597974e-05, "loss": 0.0461, "step": 2661000 }, { "epoch": 0.18, "learning_rate": 4.700411710593837e-05, "loss": 0.0447, "step": 2661500 }, { "epoch": 0.18, "learning_rate": 4.700355405980473e-05, "loss": 0.0411, "step": 2662000 }, { "epoch": 0.18, "learning_rate": 4.70029910136711e-05, "loss": 0.0452, "step": 2662500 }, { "epoch": 0.18, "learning_rate": 4.700242796753747e-05, "loss": 0.0447, "step": 2663000 }, { "epoch": 0.18, "learning_rate": 4.700186492140383e-05, "loss": 0.0447, "step": 2663500 }, { "epoch": 0.18, "learning_rate": 4.7001303001362464e-05, "loss": 0.0495, "step": 2664000 }, { "epoch": 0.18, "learning_rate": 4.700073995522883e-05, "loss": 0.0471, "step": 2664500 }, { "epoch": 0.18, "learning_rate": 4.700017690909519e-05, "loss": 0.0495, "step": 2665000 }, { "epoch": 0.18, "learning_rate": 4.6999613862961556e-05, "loss": 0.0458, "step": 2665500 }, { "epoch": 0.18, "learning_rate": 4.699905194292019e-05, "loss": 0.0463, "step": 2666000 }, { "epoch": 0.18, "learning_rate": 4.699848889678655e-05, "loss": 0.0449, "step": 2666500 }, { "epoch": 0.18, "learning_rate": 4.6997925850652917e-05, "loss": 0.0486, "step": 2667000 }, { "epoch": 0.18, "learning_rate": 4.699736280451928e-05, "loss": 0.0436, "step": 2667500 }, { "epoch": 0.18, "learning_rate": 4.6996799758385645e-05, "loss": 0.0411, "step": 2668000 }, { "epoch": 0.18, "learning_rate": 4.699623783834428e-05, "loss": 0.0466, "step": 2668500 }, { "epoch": 0.18, "learning_rate": 4.699567479221064e-05, "loss": 0.0477, "step": 2669000 }, { "epoch": 0.18, "learning_rate": 4.699511174607701e-05, "loss": 0.0476, "step": 2669500 }, { "epoch": 0.18, "learning_rate": 4.699454869994337e-05, "loss": 0.0439, "step": 2670000 }, { "epoch": 0.18, "learning_rate": 4.699398677990201e-05, "loss": 0.0492, "step": 2670500 }, { "epoch": 0.18, "learning_rate": 4.6993423733768365e-05, "loss": 0.0454, "step": 2671000 }, { "epoch": 0.18, "learning_rate": 4.699286068763473e-05, "loss": 0.0442, "step": 2671500 }, { "epoch": 0.18, "learning_rate": 4.699229876759336e-05, "loss": 0.0479, "step": 2672000 }, { "epoch": 0.18, "learning_rate": 4.699173572145973e-05, "loss": 0.0443, "step": 2672500 }, { "epoch": 0.18, "learning_rate": 4.6991172675326096e-05, "loss": 0.0445, "step": 2673000 }, { "epoch": 0.18, "learning_rate": 4.699060962919245e-05, "loss": 0.0456, "step": 2673500 }, { "epoch": 0.18, "learning_rate": 4.6990046583058824e-05, "loss": 0.0479, "step": 2674000 }, { "epoch": 0.18, "learning_rate": 4.698948353692519e-05, "loss": 0.0439, "step": 2674500 }, { "epoch": 0.18, "learning_rate": 4.698892049079155e-05, "loss": 0.0505, "step": 2675000 }, { "epoch": 0.18, "learning_rate": 4.6988357444657916e-05, "loss": 0.0488, "step": 2675500 }, { "epoch": 0.18, "learning_rate": 4.698779439852428e-05, "loss": 0.0456, "step": 2676000 }, { "epoch": 0.18, "learning_rate": 4.698723247848291e-05, "loss": 0.0488, "step": 2676500 }, { "epoch": 0.18, "learning_rate": 4.6986669432349276e-05, "loss": 0.044, "step": 2677000 }, { "epoch": 0.18, "learning_rate": 4.698610638621564e-05, "loss": 0.0471, "step": 2677500 }, { "epoch": 0.18, "learning_rate": 4.6985543340082004e-05, "loss": 0.046, "step": 2678000 }, { "epoch": 0.18, "learning_rate": 4.6984980293948375e-05, "loss": 0.0473, "step": 2678500 }, { "epoch": 0.18, "learning_rate": 4.6984418373907e-05, "loss": 0.0473, "step": 2679000 }, { "epoch": 0.18, "learning_rate": 4.698385532777337e-05, "loss": 0.0469, "step": 2679500 }, { "epoch": 0.18, "learning_rate": 4.6983292281639735e-05, "loss": 0.0456, "step": 2680000 }, { "epoch": 0.18, "learning_rate": 4.698272923550609e-05, "loss": 0.0458, "step": 2680500 }, { "epoch": 0.18, "learning_rate": 4.698216618937246e-05, "loss": 0.0473, "step": 2681000 }, { "epoch": 0.18, "learning_rate": 4.698160314323883e-05, "loss": 0.0444, "step": 2681500 }, { "epoch": 0.18, "learning_rate": 4.698104122319746e-05, "loss": 0.0472, "step": 2682000 }, { "epoch": 0.18, "learning_rate": 4.698047817706382e-05, "loss": 0.0456, "step": 2682500 }, { "epoch": 0.18, "learning_rate": 4.697991513093019e-05, "loss": 0.0445, "step": 2683000 }, { "epoch": 0.18, "learning_rate": 4.697935321088882e-05, "loss": 0.0493, "step": 2683500 }, { "epoch": 0.18, "learning_rate": 4.6978790164755183e-05, "loss": 0.0431, "step": 2684000 }, { "epoch": 0.18, "learning_rate": 4.697822711862155e-05, "loss": 0.0482, "step": 2684500 }, { "epoch": 0.18, "learning_rate": 4.697766407248791e-05, "loss": 0.0429, "step": 2685000 }, { "epoch": 0.18, "learning_rate": 4.6977101026354275e-05, "loss": 0.0457, "step": 2685500 }, { "epoch": 0.18, "learning_rate": 4.697653798022064e-05, "loss": 0.0481, "step": 2686000 }, { "epoch": 0.18, "learning_rate": 4.697597493408701e-05, "loss": 0.0461, "step": 2686500 }, { "epoch": 0.18, "learning_rate": 4.697541188795337e-05, "loss": 0.045, "step": 2687000 }, { "epoch": 0.18, "learning_rate": 4.697484884181974e-05, "loss": 0.0448, "step": 2687500 }, { "epoch": 0.18, "learning_rate": 4.6974286921778364e-05, "loss": 0.0441, "step": 2688000 }, { "epoch": 0.18, "learning_rate": 4.6973723875644734e-05, "loss": 0.0444, "step": 2688500 }, { "epoch": 0.18, "learning_rate": 4.69731608295111e-05, "loss": 0.0443, "step": 2689000 }, { "epoch": 0.18, "learning_rate": 4.6972597783377456e-05, "loss": 0.0468, "step": 2689500 }, { "epoch": 0.18, "learning_rate": 4.6972034737243826e-05, "loss": 0.0486, "step": 2690000 }, { "epoch": 0.18, "learning_rate": 4.697147169111019e-05, "loss": 0.0482, "step": 2690500 }, { "epoch": 0.18, "learning_rate": 4.697090977106882e-05, "loss": 0.0442, "step": 2691000 }, { "epoch": 0.18, "learning_rate": 4.6970346724935187e-05, "loss": 0.0465, "step": 2691500 }, { "epoch": 0.18, "learning_rate": 4.696978367880156e-05, "loss": 0.0464, "step": 2692000 }, { "epoch": 0.18, "learning_rate": 4.6969220632667915e-05, "loss": 0.0465, "step": 2692500 }, { "epoch": 0.18, "learning_rate": 4.696865758653428e-05, "loss": 0.0445, "step": 2693000 }, { "epoch": 0.18, "learning_rate": 4.696809454040065e-05, "loss": 0.0424, "step": 2693500 }, { "epoch": 0.18, "learning_rate": 4.6967532620359275e-05, "loss": 0.0489, "step": 2694000 }, { "epoch": 0.18, "learning_rate": 4.696697070031791e-05, "loss": 0.0456, "step": 2694500 }, { "epoch": 0.18, "learning_rate": 4.696640765418428e-05, "loss": 0.0443, "step": 2695000 }, { "epoch": 0.18, "learning_rate": 4.696584460805064e-05, "loss": 0.0456, "step": 2695500 }, { "epoch": 0.18, "learning_rate": 4.6965281561917e-05, "loss": 0.0446, "step": 2696000 }, { "epoch": 0.18, "learning_rate": 4.696471851578337e-05, "loss": 0.0448, "step": 2696500 }, { "epoch": 0.18, "learning_rate": 4.6964155469649734e-05, "loss": 0.041, "step": 2697000 }, { "epoch": 0.18, "learning_rate": 4.69635924235161e-05, "loss": 0.0474, "step": 2697500 }, { "epoch": 0.18, "learning_rate": 4.696302937738246e-05, "loss": 0.0491, "step": 2698000 }, { "epoch": 0.18, "learning_rate": 4.6962467457341094e-05, "loss": 0.0456, "step": 2698500 }, { "epoch": 0.18, "learning_rate": 4.696190441120746e-05, "loss": 0.0476, "step": 2699000 }, { "epoch": 0.18, "learning_rate": 4.696134136507382e-05, "loss": 0.0443, "step": 2699500 }, { "epoch": 0.18, "learning_rate": 4.6960778318940186e-05, "loss": 0.0461, "step": 2700000 }, { "epoch": 0.18, "learning_rate": 4.696021527280655e-05, "loss": 0.0458, "step": 2700500 }, { "epoch": 0.18, "learning_rate": 4.695965335276518e-05, "loss": 0.0439, "step": 2701000 }, { "epoch": 0.18, "learning_rate": 4.6959090306631546e-05, "loss": 0.0434, "step": 2701500 }, { "epoch": 0.18, "learning_rate": 4.695852726049792e-05, "loss": 0.0445, "step": 2702000 }, { "epoch": 0.18, "learning_rate": 4.6957964214364274e-05, "loss": 0.0469, "step": 2702500 }, { "epoch": 0.18, "learning_rate": 4.695740229432291e-05, "loss": 0.0482, "step": 2703000 }, { "epoch": 0.18, "learning_rate": 4.695683924818927e-05, "loss": 0.0437, "step": 2703500 }, { "epoch": 0.18, "learning_rate": 4.695627620205564e-05, "loss": 0.0472, "step": 2704000 }, { "epoch": 0.18, "learning_rate": 4.6955713155922005e-05, "loss": 0.0433, "step": 2704500 }, { "epoch": 0.18, "learning_rate": 4.695515123588064e-05, "loss": 0.0458, "step": 2705000 }, { "epoch": 0.18, "learning_rate": 4.6954588189747e-05, "loss": 0.0477, "step": 2705500 }, { "epoch": 0.18, "learning_rate": 4.695402514361336e-05, "loss": 0.0427, "step": 2706000 }, { "epoch": 0.18, "learning_rate": 4.695346209747973e-05, "loss": 0.0436, "step": 2706500 }, { "epoch": 0.18, "learning_rate": 4.695289905134609e-05, "loss": 0.0449, "step": 2707000 }, { "epoch": 0.18, "learning_rate": 4.6952337131304726e-05, "loss": 0.0451, "step": 2707500 }, { "epoch": 0.18, "learning_rate": 4.695177408517109e-05, "loss": 0.0445, "step": 2708000 }, { "epoch": 0.18, "learning_rate": 4.695121103903746e-05, "loss": 0.0427, "step": 2708500 }, { "epoch": 0.18, "learning_rate": 4.695064799290382e-05, "loss": 0.0462, "step": 2709000 }, { "epoch": 0.18, "learning_rate": 4.695008494677018e-05, "loss": 0.046, "step": 2709500 }, { "epoch": 0.18, "learning_rate": 4.6949523026728814e-05, "loss": 0.0488, "step": 2710000 }, { "epoch": 0.18, "learning_rate": 4.694895998059518e-05, "loss": 0.0476, "step": 2710500 }, { "epoch": 0.18, "learning_rate": 4.694839693446155e-05, "loss": 0.0483, "step": 2711000 }, { "epoch": 0.18, "learning_rate": 4.6947833888327906e-05, "loss": 0.0436, "step": 2711500 }, { "epoch": 0.18, "learning_rate": 4.6947270842194276e-05, "loss": 0.0475, "step": 2712000 }, { "epoch": 0.18, "learning_rate": 4.69467089221529e-05, "loss": 0.0446, "step": 2712500 }, { "epoch": 0.18, "learning_rate": 4.694614587601927e-05, "loss": 0.0489, "step": 2713000 }, { "epoch": 0.18, "learning_rate": 4.694558282988564e-05, "loss": 0.0448, "step": 2713500 }, { "epoch": 0.18, "learning_rate": 4.6945019783752e-05, "loss": 0.0422, "step": 2714000 }, { "epoch": 0.18, "learning_rate": 4.694445786371063e-05, "loss": 0.0448, "step": 2714500 }, { "epoch": 0.18, "learning_rate": 4.6943894817577e-05, "loss": 0.0408, "step": 2715000 }, { "epoch": 0.18, "learning_rate": 4.694333177144336e-05, "loss": 0.043, "step": 2715500 }, { "epoch": 0.18, "learning_rate": 4.6942768725309725e-05, "loss": 0.0481, "step": 2716000 }, { "epoch": 0.18, "learning_rate": 4.694220567917609e-05, "loss": 0.0454, "step": 2716500 }, { "epoch": 0.18, "learning_rate": 4.694164263304245e-05, "loss": 0.0455, "step": 2717000 }, { "epoch": 0.18, "learning_rate": 4.6941079586908824e-05, "loss": 0.0464, "step": 2717500 }, { "epoch": 0.18, "learning_rate": 4.694051654077518e-05, "loss": 0.046, "step": 2718000 }, { "epoch": 0.18, "learning_rate": 4.693995462073382e-05, "loss": 0.0448, "step": 2718500 }, { "epoch": 0.18, "learning_rate": 4.6939392700692445e-05, "loss": 0.0441, "step": 2719000 }, { "epoch": 0.18, "learning_rate": 4.6938829654558816e-05, "loss": 0.0488, "step": 2719500 }, { "epoch": 0.18, "learning_rate": 4.693826660842517e-05, "loss": 0.0482, "step": 2720000 }, { "epoch": 0.18, "learning_rate": 4.693770356229154e-05, "loss": 0.0447, "step": 2720500 }, { "epoch": 0.18, "learning_rate": 4.693714164225017e-05, "loss": 0.0476, "step": 2721000 }, { "epoch": 0.18, "learning_rate": 4.693657859611654e-05, "loss": 0.0472, "step": 2721500 }, { "epoch": 0.18, "learning_rate": 4.6936015549982904e-05, "loss": 0.0422, "step": 2722000 }, { "epoch": 0.18, "learning_rate": 4.693545250384927e-05, "loss": 0.0409, "step": 2722500 }, { "epoch": 0.18, "learning_rate": 4.693488945771563e-05, "loss": 0.0498, "step": 2723000 }, { "epoch": 0.18, "learning_rate": 4.6934326411581996e-05, "loss": 0.0473, "step": 2723500 }, { "epoch": 0.18, "learning_rate": 4.693376336544836e-05, "loss": 0.044, "step": 2724000 }, { "epoch": 0.18, "learning_rate": 4.693320144540699e-05, "loss": 0.0466, "step": 2724500 }, { "epoch": 0.18, "learning_rate": 4.6932638399273356e-05, "loss": 0.0433, "step": 2725000 }, { "epoch": 0.18, "learning_rate": 4.693207535313972e-05, "loss": 0.0494, "step": 2725500 }, { "epoch": 0.18, "learning_rate": 4.6931512307006084e-05, "loss": 0.0429, "step": 2726000 }, { "epoch": 0.18, "learning_rate": 4.6930949260872455e-05, "loss": 0.0456, "step": 2726500 }, { "epoch": 0.18, "learning_rate": 4.693038734083108e-05, "loss": 0.0486, "step": 2727000 }, { "epoch": 0.18, "learning_rate": 4.692982429469745e-05, "loss": 0.0476, "step": 2727500 }, { "epoch": 0.18, "learning_rate": 4.692926124856381e-05, "loss": 0.0457, "step": 2728000 }, { "epoch": 0.18, "learning_rate": 4.692869820243018e-05, "loss": 0.0479, "step": 2728500 }, { "epoch": 0.18, "learning_rate": 4.6928135156296543e-05, "loss": 0.0413, "step": 2729000 }, { "epoch": 0.18, "learning_rate": 4.69275721101629e-05, "loss": 0.0442, "step": 2729500 }, { "epoch": 0.18, "learning_rate": 4.692701019012154e-05, "loss": 0.0464, "step": 2730000 }, { "epoch": 0.18, "learning_rate": 4.6926447143987904e-05, "loss": 0.0453, "step": 2730500 }, { "epoch": 0.18, "learning_rate": 4.692588409785427e-05, "loss": 0.0466, "step": 2731000 }, { "epoch": 0.18, "learning_rate": 4.692532105172063e-05, "loss": 0.0484, "step": 2731500 }, { "epoch": 0.18, "learning_rate": 4.6924758005586996e-05, "loss": 0.0485, "step": 2732000 }, { "epoch": 0.18, "learning_rate": 4.692419608554563e-05, "loss": 0.0456, "step": 2732500 }, { "epoch": 0.18, "learning_rate": 4.692363303941199e-05, "loss": 0.0479, "step": 2733000 }, { "epoch": 0.18, "learning_rate": 4.6923069993278356e-05, "loss": 0.0482, "step": 2733500 }, { "epoch": 0.18, "learning_rate": 4.692250694714472e-05, "loss": 0.0449, "step": 2734000 }, { "epoch": 0.18, "learning_rate": 4.692194502710335e-05, "loss": 0.0462, "step": 2734500 }, { "epoch": 0.18, "learning_rate": 4.692138198096972e-05, "loss": 0.0426, "step": 2735000 }, { "epoch": 0.18, "learning_rate": 4.692081893483608e-05, "loss": 0.0463, "step": 2735500 }, { "epoch": 0.18, "learning_rate": 4.6920255888702444e-05, "loss": 0.0446, "step": 2736000 }, { "epoch": 0.18, "learning_rate": 4.6919692842568815e-05, "loss": 0.0471, "step": 2736500 }, { "epoch": 0.18, "learning_rate": 4.691912979643518e-05, "loss": 0.0479, "step": 2737000 }, { "epoch": 0.18, "learning_rate": 4.691856675030154e-05, "loss": 0.0477, "step": 2737500 }, { "epoch": 0.18, "learning_rate": 4.691800370416791e-05, "loss": 0.0477, "step": 2738000 }, { "epoch": 0.19, "learning_rate": 4.691744065803427e-05, "loss": 0.0467, "step": 2738500 }, { "epoch": 0.19, "learning_rate": 4.69168787379929e-05, "loss": 0.0443, "step": 2739000 }, { "epoch": 0.19, "learning_rate": 4.691631569185927e-05, "loss": 0.0469, "step": 2739500 }, { "epoch": 0.19, "learning_rate": 4.691575264572563e-05, "loss": 0.0437, "step": 2740000 }, { "epoch": 0.19, "learning_rate": 4.6915189599591995e-05, "loss": 0.0429, "step": 2740500 }, { "epoch": 0.19, "learning_rate": 4.6914626553458366e-05, "loss": 0.0472, "step": 2741000 }, { "epoch": 0.19, "learning_rate": 4.691406350732472e-05, "loss": 0.0492, "step": 2741500 }, { "epoch": 0.19, "learning_rate": 4.691350046119109e-05, "loss": 0.0491, "step": 2742000 }, { "epoch": 0.19, "learning_rate": 4.691293741505746e-05, "loss": 0.0455, "step": 2742500 }, { "epoch": 0.19, "learning_rate": 4.691237662110836e-05, "loss": 0.0434, "step": 2743000 }, { "epoch": 0.19, "learning_rate": 4.6911813574974715e-05, "loss": 0.0504, "step": 2743500 }, { "epoch": 0.19, "learning_rate": 4.6911250528841086e-05, "loss": 0.0474, "step": 2744000 }, { "epoch": 0.19, "learning_rate": 4.691068748270745e-05, "loss": 0.0406, "step": 2744500 }, { "epoch": 0.19, "learning_rate": 4.691012443657381e-05, "loss": 0.0437, "step": 2745000 }, { "epoch": 0.19, "learning_rate": 4.6909562516532446e-05, "loss": 0.0446, "step": 2745500 }, { "epoch": 0.19, "learning_rate": 4.6908999470398804e-05, "loss": 0.0462, "step": 2746000 }, { "epoch": 0.19, "learning_rate": 4.6908436424265174e-05, "loss": 0.0475, "step": 2746500 }, { "epoch": 0.19, "learning_rate": 4.690787337813154e-05, "loss": 0.0495, "step": 2747000 }, { "epoch": 0.19, "learning_rate": 4.69073103319979e-05, "loss": 0.0445, "step": 2747500 }, { "epoch": 0.19, "learning_rate": 4.6906747285864266e-05, "loss": 0.0469, "step": 2748000 }, { "epoch": 0.19, "learning_rate": 4.690618423973063e-05, "loss": 0.0478, "step": 2748500 }, { "epoch": 0.19, "learning_rate": 4.690562231968926e-05, "loss": 0.0443, "step": 2749000 }, { "epoch": 0.19, "learning_rate": 4.6905059273555627e-05, "loss": 0.0489, "step": 2749500 }, { "epoch": 0.19, "learning_rate": 4.6904496227422e-05, "loss": 0.0472, "step": 2750000 }, { "epoch": 0.19, "learning_rate": 4.6903933181288355e-05, "loss": 0.0507, "step": 2750500 }, { "epoch": 0.19, "learning_rate": 4.6903371261246994e-05, "loss": 0.0431, "step": 2751000 }, { "epoch": 0.19, "learning_rate": 4.6902809341205626e-05, "loss": 0.0442, "step": 2751500 }, { "epoch": 0.19, "learning_rate": 4.690224629507199e-05, "loss": 0.0469, "step": 2752000 }, { "epoch": 0.19, "learning_rate": 4.690168324893835e-05, "loss": 0.0456, "step": 2752500 }, { "epoch": 0.19, "learning_rate": 4.690112020280472e-05, "loss": 0.0433, "step": 2753000 }, { "epoch": 0.19, "learning_rate": 4.690055715667108e-05, "loss": 0.0453, "step": 2753500 }, { "epoch": 0.19, "learning_rate": 4.6899994110537446e-05, "loss": 0.0449, "step": 2754000 }, { "epoch": 0.19, "learning_rate": 4.689943106440381e-05, "loss": 0.0469, "step": 2754500 }, { "epoch": 0.19, "learning_rate": 4.6898868018270174e-05, "loss": 0.0438, "step": 2755000 }, { "epoch": 0.19, "learning_rate": 4.689830497213654e-05, "loss": 0.0469, "step": 2755500 }, { "epoch": 0.19, "learning_rate": 4.68977419260029e-05, "loss": 0.0458, "step": 2756000 }, { "epoch": 0.19, "learning_rate": 4.6897180005961534e-05, "loss": 0.0471, "step": 2756500 }, { "epoch": 0.19, "learning_rate": 4.68966169598279e-05, "loss": 0.0461, "step": 2757000 }, { "epoch": 0.19, "learning_rate": 4.689605391369427e-05, "loss": 0.0461, "step": 2757500 }, { "epoch": 0.19, "learning_rate": 4.6895490867560626e-05, "loss": 0.043, "step": 2758000 }, { "epoch": 0.19, "learning_rate": 4.689492782142699e-05, "loss": 0.0446, "step": 2758500 }, { "epoch": 0.19, "learning_rate": 4.689436590138562e-05, "loss": 0.0498, "step": 2759000 }, { "epoch": 0.19, "learning_rate": 4.6893802855251986e-05, "loss": 0.0426, "step": 2759500 }, { "epoch": 0.19, "learning_rate": 4.689323980911836e-05, "loss": 0.0445, "step": 2760000 }, { "epoch": 0.19, "learning_rate": 4.6892676762984714e-05, "loss": 0.0455, "step": 2760500 }, { "epoch": 0.19, "learning_rate": 4.6892113716851085e-05, "loss": 0.0446, "step": 2761000 }, { "epoch": 0.19, "learning_rate": 4.689155179680971e-05, "loss": 0.0458, "step": 2761500 }, { "epoch": 0.19, "learning_rate": 4.689098875067608e-05, "loss": 0.0473, "step": 2762000 }, { "epoch": 0.19, "learning_rate": 4.6890425704542445e-05, "loss": 0.0439, "step": 2762500 }, { "epoch": 0.19, "learning_rate": 4.688986265840881e-05, "loss": 0.0466, "step": 2763000 }, { "epoch": 0.19, "learning_rate": 4.688930073836744e-05, "loss": 0.042, "step": 2763500 }, { "epoch": 0.19, "learning_rate": 4.6888737692233805e-05, "loss": 0.0451, "step": 2764000 }, { "epoch": 0.19, "learning_rate": 4.688817464610017e-05, "loss": 0.0467, "step": 2764500 }, { "epoch": 0.19, "learning_rate": 4.688761159996653e-05, "loss": 0.0481, "step": 2765000 }, { "epoch": 0.19, "learning_rate": 4.6887048553832904e-05, "loss": 0.0442, "step": 2765500 }, { "epoch": 0.19, "learning_rate": 4.688648663379153e-05, "loss": 0.0456, "step": 2766000 }, { "epoch": 0.19, "learning_rate": 4.68859235876579e-05, "loss": 0.0466, "step": 2766500 }, { "epoch": 0.19, "learning_rate": 4.688536054152426e-05, "loss": 0.0413, "step": 2767000 }, { "epoch": 0.19, "learning_rate": 4.688479749539063e-05, "loss": 0.0426, "step": 2767500 }, { "epoch": 0.19, "learning_rate": 4.6884235575349254e-05, "loss": 0.046, "step": 2768000 }, { "epoch": 0.19, "learning_rate": 4.6883672529215624e-05, "loss": 0.0435, "step": 2768500 }, { "epoch": 0.19, "learning_rate": 4.688310948308199e-05, "loss": 0.0451, "step": 2769000 }, { "epoch": 0.19, "learning_rate": 4.688254643694835e-05, "loss": 0.0439, "step": 2769500 }, { "epoch": 0.19, "learning_rate": 4.6881983390814716e-05, "loss": 0.0461, "step": 2770000 }, { "epoch": 0.19, "learning_rate": 4.688142147077335e-05, "loss": 0.0426, "step": 2770500 }, { "epoch": 0.19, "learning_rate": 4.688085842463971e-05, "loss": 0.0437, "step": 2771000 }, { "epoch": 0.19, "learning_rate": 4.688029537850608e-05, "loss": 0.0466, "step": 2771500 }, { "epoch": 0.19, "learning_rate": 4.687973233237244e-05, "loss": 0.0441, "step": 2772000 }, { "epoch": 0.19, "learning_rate": 4.687917041233107e-05, "loss": 0.045, "step": 2772500 }, { "epoch": 0.19, "learning_rate": 4.687860736619744e-05, "loss": 0.045, "step": 2773000 }, { "epoch": 0.19, "learning_rate": 4.68780443200638e-05, "loss": 0.0423, "step": 2773500 }, { "epoch": 0.19, "learning_rate": 4.687748127393017e-05, "loss": 0.0461, "step": 2774000 }, { "epoch": 0.19, "learning_rate": 4.68769193538888e-05, "loss": 0.0464, "step": 2774500 }, { "epoch": 0.19, "learning_rate": 4.687635630775517e-05, "loss": 0.0435, "step": 2775000 }, { "epoch": 0.19, "learning_rate": 4.6875793261621525e-05, "loss": 0.0457, "step": 2775500 }, { "epoch": 0.19, "learning_rate": 4.687523021548789e-05, "loss": 0.0471, "step": 2776000 }, { "epoch": 0.19, "learning_rate": 4.687466716935426e-05, "loss": 0.0474, "step": 2776500 }, { "epoch": 0.19, "learning_rate": 4.6874105249312885e-05, "loss": 0.0434, "step": 2777000 }, { "epoch": 0.19, "learning_rate": 4.6873542203179256e-05, "loss": 0.0437, "step": 2777500 }, { "epoch": 0.19, "learning_rate": 4.687297915704561e-05, "loss": 0.0409, "step": 2778000 }, { "epoch": 0.19, "learning_rate": 4.6872416110911984e-05, "loss": 0.0456, "step": 2778500 }, { "epoch": 0.19, "learning_rate": 4.687185419087061e-05, "loss": 0.0458, "step": 2779000 }, { "epoch": 0.19, "learning_rate": 4.687129114473698e-05, "loss": 0.0433, "step": 2779500 }, { "epoch": 0.19, "learning_rate": 4.6870728098603344e-05, "loss": 0.0445, "step": 2780000 }, { "epoch": 0.19, "learning_rate": 4.687016505246971e-05, "loss": 0.0476, "step": 2780500 }, { "epoch": 0.19, "learning_rate": 4.686960200633607e-05, "loss": 0.0431, "step": 2781000 }, { "epoch": 0.19, "learning_rate": 4.6869040086294704e-05, "loss": 0.0491, "step": 2781500 }, { "epoch": 0.19, "learning_rate": 4.686847704016107e-05, "loss": 0.0446, "step": 2782000 }, { "epoch": 0.19, "learning_rate": 4.686791399402743e-05, "loss": 0.0426, "step": 2782500 }, { "epoch": 0.19, "learning_rate": 4.68673509478938e-05, "loss": 0.0454, "step": 2783000 }, { "epoch": 0.19, "learning_rate": 4.686678790176016e-05, "loss": 0.0437, "step": 2783500 }, { "epoch": 0.19, "learning_rate": 4.686622485562653e-05, "loss": 0.0438, "step": 2784000 }, { "epoch": 0.19, "learning_rate": 4.6865661809492895e-05, "loss": 0.0462, "step": 2784500 }, { "epoch": 0.19, "learning_rate": 4.686509876335925e-05, "loss": 0.0412, "step": 2785000 }, { "epoch": 0.19, "learning_rate": 4.686453684331789e-05, "loss": 0.0445, "step": 2785500 }, { "epoch": 0.19, "learning_rate": 4.686397379718425e-05, "loss": 0.0436, "step": 2786000 }, { "epoch": 0.19, "learning_rate": 4.686341075105062e-05, "loss": 0.047, "step": 2786500 }, { "epoch": 0.19, "learning_rate": 4.686284770491698e-05, "loss": 0.0419, "step": 2787000 }, { "epoch": 0.19, "learning_rate": 4.686228465878335e-05, "loss": 0.0453, "step": 2787500 }, { "epoch": 0.19, "learning_rate": 4.686172273874198e-05, "loss": 0.0423, "step": 2788000 }, { "epoch": 0.19, "learning_rate": 4.6861159692608344e-05, "loss": 0.049, "step": 2788500 }, { "epoch": 0.19, "learning_rate": 4.686059664647471e-05, "loss": 0.0418, "step": 2789000 }, { "epoch": 0.19, "learning_rate": 4.686003360034107e-05, "loss": 0.0415, "step": 2789500 }, { "epoch": 0.19, "learning_rate": 4.6859470554207436e-05, "loss": 0.0454, "step": 2790000 }, { "epoch": 0.19, "learning_rate": 4.685890863416607e-05, "loss": 0.0481, "step": 2790500 }, { "epoch": 0.19, "learning_rate": 4.685834558803243e-05, "loss": 0.044, "step": 2791000 }, { "epoch": 0.19, "learning_rate": 4.6857782541898796e-05, "loss": 0.0437, "step": 2791500 }, { "epoch": 0.19, "learning_rate": 4.6857219495765167e-05, "loss": 0.0452, "step": 2792000 }, { "epoch": 0.19, "learning_rate": 4.6856656449631524e-05, "loss": 0.0474, "step": 2792500 }, { "epoch": 0.19, "learning_rate": 4.685609452959016e-05, "loss": 0.0499, "step": 2793000 }, { "epoch": 0.19, "learning_rate": 4.685553148345652e-05, "loss": 0.0446, "step": 2793500 }, { "epoch": 0.19, "learning_rate": 4.685496843732289e-05, "loss": 0.0466, "step": 2794000 }, { "epoch": 0.19, "learning_rate": 4.6854405391189255e-05, "loss": 0.0473, "step": 2794500 }, { "epoch": 0.19, "learning_rate": 4.685384234505562e-05, "loss": 0.0495, "step": 2795000 }, { "epoch": 0.19, "learning_rate": 4.685327929892198e-05, "loss": 0.0464, "step": 2795500 }, { "epoch": 0.19, "learning_rate": 4.6852717378880615e-05, "loss": 0.0451, "step": 2796000 }, { "epoch": 0.19, "learning_rate": 4.685215433274698e-05, "loss": 0.0417, "step": 2796500 }, { "epoch": 0.19, "learning_rate": 4.685159128661334e-05, "loss": 0.0446, "step": 2797000 }, { "epoch": 0.19, "learning_rate": 4.6851028240479714e-05, "loss": 0.0407, "step": 2797500 }, { "epoch": 0.19, "learning_rate": 4.685046632043834e-05, "loss": 0.0482, "step": 2798000 }, { "epoch": 0.19, "learning_rate": 4.684990327430471e-05, "loss": 0.0458, "step": 2798500 }, { "epoch": 0.19, "learning_rate": 4.684934022817107e-05, "loss": 0.0424, "step": 2799000 }, { "epoch": 0.19, "learning_rate": 4.684877718203743e-05, "loss": 0.0469, "step": 2799500 }, { "epoch": 0.19, "learning_rate": 4.68482141359038e-05, "loss": 0.0408, "step": 2800000 }, { "epoch": 0.19, "learning_rate": 4.6847652215862434e-05, "loss": 0.0473, "step": 2800500 }, { "epoch": 0.19, "learning_rate": 4.68470891697288e-05, "loss": 0.0465, "step": 2801000 }, { "epoch": 0.19, "learning_rate": 4.6846526123595155e-05, "loss": 0.0471, "step": 2801500 }, { "epoch": 0.19, "learning_rate": 4.6845963077461526e-05, "loss": 0.0441, "step": 2802000 }, { "epoch": 0.19, "learning_rate": 4.684540003132789e-05, "loss": 0.0441, "step": 2802500 }, { "epoch": 0.19, "learning_rate": 4.684483811128652e-05, "loss": 0.045, "step": 2803000 }, { "epoch": 0.19, "learning_rate": 4.6844275065152886e-05, "loss": 0.0454, "step": 2803500 }, { "epoch": 0.19, "learning_rate": 4.684371201901925e-05, "loss": 0.044, "step": 2804000 }, { "epoch": 0.19, "learning_rate": 4.6843148972885614e-05, "loss": 0.0453, "step": 2804500 }, { "epoch": 0.19, "learning_rate": 4.6842587052844246e-05, "loss": 0.0467, "step": 2805000 }, { "epoch": 0.19, "learning_rate": 4.684202400671061e-05, "loss": 0.05, "step": 2805500 }, { "epoch": 0.19, "learning_rate": 4.6841460960576974e-05, "loss": 0.0471, "step": 2806000 }, { "epoch": 0.19, "learning_rate": 4.684089791444334e-05, "loss": 0.0466, "step": 2806500 }, { "epoch": 0.19, "learning_rate": 4.68403348683097e-05, "loss": 0.0452, "step": 2807000 }, { "epoch": 0.19, "learning_rate": 4.6839772948268335e-05, "loss": 0.0451, "step": 2807500 }, { "epoch": 0.19, "learning_rate": 4.68392099021347e-05, "loss": 0.0471, "step": 2808000 }, { "epoch": 0.19, "learning_rate": 4.683864798209333e-05, "loss": 0.0463, "step": 2808500 }, { "epoch": 0.19, "learning_rate": 4.6838084935959695e-05, "loss": 0.049, "step": 2809000 }, { "epoch": 0.19, "learning_rate": 4.6837521889826066e-05, "loss": 0.0436, "step": 2809500 }, { "epoch": 0.19, "learning_rate": 4.683695884369242e-05, "loss": 0.045, "step": 2810000 }, { "epoch": 0.19, "learning_rate": 4.6836395797558794e-05, "loss": 0.0487, "step": 2810500 }, { "epoch": 0.19, "learning_rate": 4.683583275142516e-05, "loss": 0.0465, "step": 2811000 }, { "epoch": 0.19, "learning_rate": 4.683526970529152e-05, "loss": 0.0438, "step": 2811500 }, { "epoch": 0.19, "learning_rate": 4.6834706659157886e-05, "loss": 0.0458, "step": 2812000 }, { "epoch": 0.19, "learning_rate": 4.683414361302425e-05, "loss": 0.0478, "step": 2812500 }, { "epoch": 0.19, "learning_rate": 4.683358169298288e-05, "loss": 0.0414, "step": 2813000 }, { "epoch": 0.19, "learning_rate": 4.6833018646849246e-05, "loss": 0.0473, "step": 2813500 }, { "epoch": 0.19, "learning_rate": 4.683245560071562e-05, "loss": 0.0472, "step": 2814000 }, { "epoch": 0.19, "learning_rate": 4.6831892554581974e-05, "loss": 0.0427, "step": 2814500 }, { "epoch": 0.19, "learning_rate": 4.683133063454061e-05, "loss": 0.0447, "step": 2815000 }, { "epoch": 0.19, "learning_rate": 4.683076758840697e-05, "loss": 0.0432, "step": 2815500 }, { "epoch": 0.19, "learning_rate": 4.6830204542273334e-05, "loss": 0.0452, "step": 2816000 }, { "epoch": 0.19, "learning_rate": 4.6829641496139705e-05, "loss": 0.0444, "step": 2816500 }, { "epoch": 0.19, "learning_rate": 4.682907845000606e-05, "loss": 0.0441, "step": 2817000 }, { "epoch": 0.19, "learning_rate": 4.68285165299647e-05, "loss": 0.0461, "step": 2817500 }, { "epoch": 0.19, "learning_rate": 4.682795348383106e-05, "loss": 0.0461, "step": 2818000 }, { "epoch": 0.19, "learning_rate": 4.682739043769743e-05, "loss": 0.0392, "step": 2818500 }, { "epoch": 0.19, "learning_rate": 4.682682739156379e-05, "loss": 0.0491, "step": 2819000 }, { "epoch": 0.19, "learning_rate": 4.682626434543016e-05, "loss": 0.0457, "step": 2819500 }, { "epoch": 0.19, "learning_rate": 4.682570129929652e-05, "loss": 0.0474, "step": 2820000 }, { "epoch": 0.19, "learning_rate": 4.6825138253162885e-05, "loss": 0.0409, "step": 2820500 }, { "epoch": 0.19, "learning_rate": 4.682457520702925e-05, "loss": 0.0446, "step": 2821000 }, { "epoch": 0.19, "learning_rate": 4.682401328698788e-05, "loss": 0.0436, "step": 2821500 }, { "epoch": 0.19, "learning_rate": 4.6823450240854245e-05, "loss": 0.0496, "step": 2822000 }, { "epoch": 0.19, "learning_rate": 4.682288719472061e-05, "loss": 0.0468, "step": 2822500 }, { "epoch": 0.19, "learning_rate": 4.682232414858698e-05, "loss": 0.0427, "step": 2823000 }, { "epoch": 0.19, "learning_rate": 4.6821761102453344e-05, "loss": 0.0455, "step": 2823500 }, { "epoch": 0.19, "learning_rate": 4.6821199182411976e-05, "loss": 0.0479, "step": 2824000 }, { "epoch": 0.19, "learning_rate": 4.682063613627834e-05, "loss": 0.0495, "step": 2824500 }, { "epoch": 0.19, "learning_rate": 4.68200730901447e-05, "loss": 0.0423, "step": 2825000 }, { "epoch": 0.19, "learning_rate": 4.681951004401107e-05, "loss": 0.0453, "step": 2825500 }, { "epoch": 0.19, "learning_rate": 4.681894699787743e-05, "loss": 0.0441, "step": 2826000 }, { "epoch": 0.19, "learning_rate": 4.6818385077836064e-05, "loss": 0.0447, "step": 2826500 }, { "epoch": 0.19, "learning_rate": 4.681782203170243e-05, "loss": 0.0408, "step": 2827000 }, { "epoch": 0.19, "learning_rate": 4.681725898556879e-05, "loss": 0.0487, "step": 2827500 }, { "epoch": 0.19, "learning_rate": 4.6816695939435156e-05, "loss": 0.0426, "step": 2828000 }, { "epoch": 0.19, "learning_rate": 4.681613289330152e-05, "loss": 0.0428, "step": 2828500 }, { "epoch": 0.19, "learning_rate": 4.681557097326015e-05, "loss": 0.0443, "step": 2829000 }, { "epoch": 0.19, "learning_rate": 4.6815007927126517e-05, "loss": 0.045, "step": 2829500 }, { "epoch": 0.19, "learning_rate": 4.681444488099288e-05, "loss": 0.0455, "step": 2830000 }, { "epoch": 0.19, "learning_rate": 4.6813881834859245e-05, "loss": 0.0469, "step": 2830500 }, { "epoch": 0.19, "learning_rate": 4.681331991481788e-05, "loss": 0.0498, "step": 2831000 }, { "epoch": 0.19, "learning_rate": 4.681275686868424e-05, "loss": 0.0466, "step": 2831500 }, { "epoch": 0.19, "learning_rate": 4.681219382255061e-05, "loss": 0.0444, "step": 2832000 }, { "epoch": 0.19, "learning_rate": 4.681163077641697e-05, "loss": 0.0456, "step": 2832500 }, { "epoch": 0.19, "learning_rate": 4.681106773028334e-05, "loss": 0.0463, "step": 2833000 }, { "epoch": 0.19, "learning_rate": 4.6810505810241965e-05, "loss": 0.0484, "step": 2833500 }, { "epoch": 0.19, "learning_rate": 4.6809942764108336e-05, "loss": 0.0458, "step": 2834000 }, { "epoch": 0.19, "learning_rate": 4.68093797179747e-05, "loss": 0.0441, "step": 2834500 }, { "epoch": 0.19, "learning_rate": 4.680881667184106e-05, "loss": 0.0471, "step": 2835000 }, { "epoch": 0.19, "learning_rate": 4.680825362570743e-05, "loss": 0.0448, "step": 2835500 }, { "epoch": 0.19, "learning_rate": 4.680769170566606e-05, "loss": 0.0429, "step": 2836000 }, { "epoch": 0.19, "learning_rate": 4.6807128659532424e-05, "loss": 0.0419, "step": 2836500 }, { "epoch": 0.19, "learning_rate": 4.680656561339879e-05, "loss": 0.0437, "step": 2837000 }, { "epoch": 0.19, "learning_rate": 4.680600256726516e-05, "loss": 0.0406, "step": 2837500 }, { "epoch": 0.19, "learning_rate": 4.6805439521131516e-05, "loss": 0.0455, "step": 2838000 }, { "epoch": 0.19, "learning_rate": 4.6804877601090155e-05, "loss": 0.0467, "step": 2838500 }, { "epoch": 0.19, "learning_rate": 4.680431455495651e-05, "loss": 0.0454, "step": 2839000 }, { "epoch": 0.19, "learning_rate": 4.6803751508822876e-05, "loss": 0.045, "step": 2839500 }, { "epoch": 0.19, "learning_rate": 4.680318846268925e-05, "loss": 0.0453, "step": 2840000 }, { "epoch": 0.19, "learning_rate": 4.6802625416555604e-05, "loss": 0.0435, "step": 2840500 }, { "epoch": 0.19, "learning_rate": 4.680206349651424e-05, "loss": 0.047, "step": 2841000 }, { "epoch": 0.19, "learning_rate": 4.68015004503806e-05, "loss": 0.0483, "step": 2841500 }, { "epoch": 0.19, "learning_rate": 4.680093740424697e-05, "loss": 0.0478, "step": 2842000 }, { "epoch": 0.19, "learning_rate": 4.6800374358113335e-05, "loss": 0.0457, "step": 2842500 }, { "epoch": 0.19, "learning_rate": 4.679981243807197e-05, "loss": 0.0479, "step": 2843000 }, { "epoch": 0.19, "learning_rate": 4.679924939193833e-05, "loss": 0.0447, "step": 2843500 }, { "epoch": 0.19, "learning_rate": 4.6798686345804695e-05, "loss": 0.0425, "step": 2844000 }, { "epoch": 0.19, "learning_rate": 4.679812329967106e-05, "loss": 0.0434, "step": 2844500 }, { "epoch": 0.19, "learning_rate": 4.679756025353742e-05, "loss": 0.0477, "step": 2845000 }, { "epoch": 0.19, "learning_rate": 4.6796998333496056e-05, "loss": 0.0441, "step": 2845500 }, { "epoch": 0.19, "learning_rate": 4.679643528736242e-05, "loss": 0.0477, "step": 2846000 }, { "epoch": 0.19, "learning_rate": 4.6795872241228783e-05, "loss": 0.0434, "step": 2846500 }, { "epoch": 0.19, "learning_rate": 4.679530919509515e-05, "loss": 0.0457, "step": 2847000 }, { "epoch": 0.19, "learning_rate": 4.679474727505378e-05, "loss": 0.0435, "step": 2847500 }, { "epoch": 0.19, "learning_rate": 4.6794184228920144e-05, "loss": 0.0476, "step": 2848000 }, { "epoch": 0.19, "learning_rate": 4.6793621182786514e-05, "loss": 0.0491, "step": 2848500 }, { "epoch": 0.19, "learning_rate": 4.679305813665287e-05, "loss": 0.0451, "step": 2849000 }, { "epoch": 0.19, "learning_rate": 4.679249509051924e-05, "loss": 0.0468, "step": 2849500 }, { "epoch": 0.19, "learning_rate": 4.679193317047787e-05, "loss": 0.0465, "step": 2850000 }, { "epoch": 0.19, "learning_rate": 4.679137012434424e-05, "loss": 0.043, "step": 2850500 }, { "epoch": 0.19, "learning_rate": 4.67908070782106e-05, "loss": 0.0478, "step": 2851000 }, { "epoch": 0.19, "learning_rate": 4.679024403207696e-05, "loss": 0.0467, "step": 2851500 }, { "epoch": 0.19, "learning_rate": 4.67896821120356e-05, "loss": 0.0424, "step": 2852000 }, { "epoch": 0.19, "learning_rate": 4.678911906590196e-05, "loss": 0.045, "step": 2852500 }, { "epoch": 0.19, "learning_rate": 4.678855601976833e-05, "loss": 0.0498, "step": 2853000 }, { "epoch": 0.19, "learning_rate": 4.678799297363469e-05, "loss": 0.0467, "step": 2853500 }, { "epoch": 0.19, "learning_rate": 4.678742992750106e-05, "loss": 0.0449, "step": 2854000 }, { "epoch": 0.19, "learning_rate": 4.678686800745969e-05, "loss": 0.0474, "step": 2854500 }, { "epoch": 0.19, "learning_rate": 4.678630608741832e-05, "loss": 0.0481, "step": 2855000 }, { "epoch": 0.19, "learning_rate": 4.678574304128468e-05, "loss": 0.0439, "step": 2855500 }, { "epoch": 0.19, "learning_rate": 4.6785179995151054e-05, "loss": 0.0453, "step": 2856000 }, { "epoch": 0.19, "learning_rate": 4.678461694901741e-05, "loss": 0.041, "step": 2856500 }, { "epoch": 0.19, "learning_rate": 4.678405390288378e-05, "loss": 0.0463, "step": 2857000 }, { "epoch": 0.19, "learning_rate": 4.6783490856750146e-05, "loss": 0.0464, "step": 2857500 }, { "epoch": 0.19, "learning_rate": 4.67829278106165e-05, "loss": 0.0434, "step": 2858000 }, { "epoch": 0.19, "learning_rate": 4.6782364764482874e-05, "loss": 0.0452, "step": 2858500 }, { "epoch": 0.19, "learning_rate": 4.678180171834924e-05, "loss": 0.0464, "step": 2859000 }, { "epoch": 0.19, "learning_rate": 4.678123979830787e-05, "loss": 0.0406, "step": 2859500 }, { "epoch": 0.19, "learning_rate": 4.6780676752174234e-05, "loss": 0.0462, "step": 2860000 }, { "epoch": 0.19, "learning_rate": 4.67801137060406e-05, "loss": 0.0456, "step": 2860500 }, { "epoch": 0.19, "learning_rate": 4.677955065990696e-05, "loss": 0.0463, "step": 2861000 }, { "epoch": 0.19, "learning_rate": 4.6778987613773326e-05, "loss": 0.0428, "step": 2861500 }, { "epoch": 0.19, "learning_rate": 4.677842569373196e-05, "loss": 0.0461, "step": 2862000 }, { "epoch": 0.19, "learning_rate": 4.677786264759832e-05, "loss": 0.0466, "step": 2862500 }, { "epoch": 0.19, "learning_rate": 4.6777299601464686e-05, "loss": 0.043, "step": 2863000 }, { "epoch": 0.19, "learning_rate": 4.677673655533105e-05, "loss": 0.0433, "step": 2863500 }, { "epoch": 0.19, "learning_rate": 4.677617350919742e-05, "loss": 0.0447, "step": 2864000 }, { "epoch": 0.19, "learning_rate": 4.677561158915605e-05, "loss": 0.0449, "step": 2864500 }, { "epoch": 0.19, "learning_rate": 4.677504854302242e-05, "loss": 0.0441, "step": 2865000 }, { "epoch": 0.19, "learning_rate": 4.6774485496888775e-05, "loss": 0.0455, "step": 2865500 }, { "epoch": 0.19, "learning_rate": 4.6773922450755145e-05, "loss": 0.0459, "step": 2866000 } ], "max_steps": 44401335, "num_train_epochs": 3, "total_flos": 2.1376644681970483e+17, "trial_name": null, "trial_params": null }