bert-web-bg / trainer_state.json
usmiva's picture
Upload 13 files
76c04f1
raw
history blame contribute delete
No virus
80.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 323979,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.992283450470556e-05,
"loss": 7.2271,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 4.98456690094111e-05,
"loss": 6.7719,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 4.976850351411666e-05,
"loss": 6.6629,
"step": 1500
},
{
"epoch": 0.02,
"learning_rate": 4.969133801882221e-05,
"loss": 6.6057,
"step": 2000
},
{
"epoch": 0.02,
"learning_rate": 4.961417252352776e-05,
"loss": 6.5557,
"step": 2500
},
{
"epoch": 0.03,
"learning_rate": 4.953700702823331e-05,
"loss": 6.5339,
"step": 3000
},
{
"epoch": 0.03,
"learning_rate": 4.945984153293887e-05,
"loss": 6.4981,
"step": 3500
},
{
"epoch": 0.04,
"learning_rate": 4.938267603764441e-05,
"loss": 6.4864,
"step": 4000
},
{
"epoch": 0.04,
"learning_rate": 4.930551054234997e-05,
"loss": 6.4665,
"step": 4500
},
{
"epoch": 0.05,
"learning_rate": 4.922834504705553e-05,
"loss": 6.4375,
"step": 5000
},
{
"epoch": 0.05,
"learning_rate": 4.915117955176107e-05,
"loss": 6.4367,
"step": 5500
},
{
"epoch": 0.06,
"learning_rate": 4.907401405646663e-05,
"loss": 6.4276,
"step": 6000
},
{
"epoch": 0.06,
"learning_rate": 4.899684856117218e-05,
"loss": 6.4075,
"step": 6500
},
{
"epoch": 0.06,
"learning_rate": 4.891968306587773e-05,
"loss": 6.3899,
"step": 7000
},
{
"epoch": 0.07,
"learning_rate": 4.884251757058328e-05,
"loss": 6.3858,
"step": 7500
},
{
"epoch": 0.07,
"learning_rate": 4.8765352075288836e-05,
"loss": 6.3791,
"step": 8000
},
{
"epoch": 0.08,
"learning_rate": 4.868818657999438e-05,
"loss": 6.3676,
"step": 8500
},
{
"epoch": 0.08,
"learning_rate": 4.861102108469994e-05,
"loss": 6.3539,
"step": 9000
},
{
"epoch": 0.09,
"learning_rate": 4.853385558940549e-05,
"loss": 6.3511,
"step": 9500
},
{
"epoch": 0.09,
"learning_rate": 4.845669009411104e-05,
"loss": 6.3434,
"step": 10000
},
{
"epoch": 0.1,
"learning_rate": 4.8379524598816595e-05,
"loss": 6.3393,
"step": 10500
},
{
"epoch": 0.1,
"learning_rate": 4.8302359103522146e-05,
"loss": 6.3319,
"step": 11000
},
{
"epoch": 0.11,
"learning_rate": 4.8225193608227696e-05,
"loss": 6.3237,
"step": 11500
},
{
"epoch": 0.11,
"learning_rate": 4.8148028112933246e-05,
"loss": 6.3252,
"step": 12000
},
{
"epoch": 0.12,
"learning_rate": 4.8070862617638804e-05,
"loss": 6.31,
"step": 12500
},
{
"epoch": 0.12,
"learning_rate": 4.799369712234435e-05,
"loss": 6.3092,
"step": 13000
},
{
"epoch": 0.13,
"learning_rate": 4.7916531627049905e-05,
"loss": 6.3098,
"step": 13500
},
{
"epoch": 0.13,
"learning_rate": 4.7839366131755455e-05,
"loss": 6.3044,
"step": 14000
},
{
"epoch": 0.13,
"learning_rate": 4.7762200636461005e-05,
"loss": 6.297,
"step": 14500
},
{
"epoch": 0.14,
"learning_rate": 4.768503514116656e-05,
"loss": 6.2934,
"step": 15000
},
{
"epoch": 0.14,
"learning_rate": 4.760786964587211e-05,
"loss": 6.2945,
"step": 15500
},
{
"epoch": 0.15,
"learning_rate": 4.7530704150577663e-05,
"loss": 6.2808,
"step": 16000
},
{
"epoch": 0.15,
"learning_rate": 4.7453538655283214e-05,
"loss": 6.2806,
"step": 16500
},
{
"epoch": 0.16,
"learning_rate": 4.737637315998877e-05,
"loss": 6.2805,
"step": 17000
},
{
"epoch": 0.16,
"learning_rate": 4.7299207664694315e-05,
"loss": 6.2715,
"step": 17500
},
{
"epoch": 0.17,
"learning_rate": 4.722204216939987e-05,
"loss": 6.264,
"step": 18000
},
{
"epoch": 0.17,
"learning_rate": 4.714487667410542e-05,
"loss": 6.2593,
"step": 18500
},
{
"epoch": 0.18,
"learning_rate": 4.706771117881097e-05,
"loss": 6.2522,
"step": 19000
},
{
"epoch": 0.18,
"learning_rate": 4.699054568351653e-05,
"loss": 6.2625,
"step": 19500
},
{
"epoch": 0.19,
"learning_rate": 4.691338018822208e-05,
"loss": 6.2572,
"step": 20000
},
{
"epoch": 0.19,
"learning_rate": 4.683621469292763e-05,
"loss": 6.2562,
"step": 20500
},
{
"epoch": 0.19,
"learning_rate": 4.675904919763318e-05,
"loss": 6.2434,
"step": 21000
},
{
"epoch": 0.2,
"learning_rate": 4.668188370233874e-05,
"loss": 6.2465,
"step": 21500
},
{
"epoch": 0.2,
"learning_rate": 4.660471820704428e-05,
"loss": 6.2381,
"step": 22000
},
{
"epoch": 0.21,
"learning_rate": 4.652755271174984e-05,
"loss": 6.2361,
"step": 22500
},
{
"epoch": 0.21,
"learning_rate": 4.645038721645539e-05,
"loss": 6.2318,
"step": 23000
},
{
"epoch": 0.22,
"learning_rate": 4.637322172116094e-05,
"loss": 6.2407,
"step": 23500
},
{
"epoch": 0.22,
"learning_rate": 4.629605622586649e-05,
"loss": 6.2403,
"step": 24000
},
{
"epoch": 0.23,
"learning_rate": 4.621889073057205e-05,
"loss": 6.2229,
"step": 24500
},
{
"epoch": 0.23,
"learning_rate": 4.61417252352776e-05,
"loss": 6.231,
"step": 25000
},
{
"epoch": 0.24,
"learning_rate": 4.606455973998315e-05,
"loss": 6.2295,
"step": 25500
},
{
"epoch": 0.24,
"learning_rate": 4.5987394244688706e-05,
"loss": 6.2117,
"step": 26000
},
{
"epoch": 0.25,
"learning_rate": 4.591022874939425e-05,
"loss": 6.2233,
"step": 26500
},
{
"epoch": 0.25,
"learning_rate": 4.583306325409981e-05,
"loss": 6.2193,
"step": 27000
},
{
"epoch": 0.25,
"learning_rate": 4.575589775880536e-05,
"loss": 6.2166,
"step": 27500
},
{
"epoch": 0.26,
"learning_rate": 4.567873226351091e-05,
"loss": 6.2093,
"step": 28000
},
{
"epoch": 0.26,
"learning_rate": 4.560156676821646e-05,
"loss": 6.2025,
"step": 28500
},
{
"epoch": 0.27,
"learning_rate": 4.5524401272922015e-05,
"loss": 6.2056,
"step": 29000
},
{
"epoch": 0.27,
"learning_rate": 4.5447235777627566e-05,
"loss": 6.2018,
"step": 29500
},
{
"epoch": 0.28,
"learning_rate": 4.5370070282333116e-05,
"loss": 6.2111,
"step": 30000
},
{
"epoch": 0.28,
"learning_rate": 4.529290478703867e-05,
"loss": 6.2045,
"step": 30500
},
{
"epoch": 0.29,
"learning_rate": 4.521573929174422e-05,
"loss": 6.1887,
"step": 31000
},
{
"epoch": 0.29,
"learning_rate": 4.5138573796449774e-05,
"loss": 6.1956,
"step": 31500
},
{
"epoch": 0.3,
"learning_rate": 4.5061408301155324e-05,
"loss": 6.2002,
"step": 32000
},
{
"epoch": 0.3,
"learning_rate": 4.4984242805860875e-05,
"loss": 6.2027,
"step": 32500
},
{
"epoch": 0.31,
"learning_rate": 4.4907077310566425e-05,
"loss": 6.1877,
"step": 33000
},
{
"epoch": 0.31,
"learning_rate": 4.482991181527198e-05,
"loss": 6.1894,
"step": 33500
},
{
"epoch": 0.31,
"learning_rate": 4.475274631997753e-05,
"loss": 6.1861,
"step": 34000
},
{
"epoch": 0.32,
"learning_rate": 4.467558082468308e-05,
"loss": 6.1839,
"step": 34500
},
{
"epoch": 0.32,
"learning_rate": 4.459841532938864e-05,
"loss": 6.1783,
"step": 35000
},
{
"epoch": 0.33,
"learning_rate": 4.4521249834094184e-05,
"loss": 6.1894,
"step": 35500
},
{
"epoch": 0.33,
"learning_rate": 4.444408433879974e-05,
"loss": 6.1811,
"step": 36000
},
{
"epoch": 0.34,
"learning_rate": 4.436691884350529e-05,
"loss": 6.1822,
"step": 36500
},
{
"epoch": 0.34,
"learning_rate": 4.428975334821084e-05,
"loss": 6.1739,
"step": 37000
},
{
"epoch": 0.35,
"learning_rate": 4.421258785291639e-05,
"loss": 6.1715,
"step": 37500
},
{
"epoch": 0.35,
"learning_rate": 4.413542235762195e-05,
"loss": 6.1779,
"step": 38000
},
{
"epoch": 0.36,
"learning_rate": 4.4058256862327493e-05,
"loss": 6.1735,
"step": 38500
},
{
"epoch": 0.36,
"learning_rate": 4.398109136703305e-05,
"loss": 6.1723,
"step": 39000
},
{
"epoch": 0.37,
"learning_rate": 4.390392587173861e-05,
"loss": 6.1703,
"step": 39500
},
{
"epoch": 0.37,
"learning_rate": 4.382676037644415e-05,
"loss": 6.1738,
"step": 40000
},
{
"epoch": 0.38,
"learning_rate": 4.374959488114971e-05,
"loss": 6.1656,
"step": 40500
},
{
"epoch": 0.38,
"learning_rate": 4.367242938585526e-05,
"loss": 6.1722,
"step": 41000
},
{
"epoch": 0.38,
"learning_rate": 4.359526389056081e-05,
"loss": 6.1643,
"step": 41500
},
{
"epoch": 0.39,
"learning_rate": 4.351809839526636e-05,
"loss": 6.1708,
"step": 42000
},
{
"epoch": 0.39,
"learning_rate": 4.344093289997192e-05,
"loss": 6.1624,
"step": 42500
},
{
"epoch": 0.4,
"learning_rate": 4.336376740467746e-05,
"loss": 6.1644,
"step": 43000
},
{
"epoch": 0.4,
"learning_rate": 4.328660190938302e-05,
"loss": 6.1659,
"step": 43500
},
{
"epoch": 0.41,
"learning_rate": 4.3209436414088575e-05,
"loss": 6.1557,
"step": 44000
},
{
"epoch": 0.41,
"learning_rate": 4.313227091879412e-05,
"loss": 6.1448,
"step": 44500
},
{
"epoch": 0.42,
"learning_rate": 4.3055105423499676e-05,
"loss": 6.1532,
"step": 45000
},
{
"epoch": 0.42,
"learning_rate": 4.2977939928205227e-05,
"loss": 6.1445,
"step": 45500
},
{
"epoch": 0.43,
"learning_rate": 4.290077443291078e-05,
"loss": 6.1419,
"step": 46000
},
{
"epoch": 0.43,
"learning_rate": 4.282360893761633e-05,
"loss": 6.1518,
"step": 46500
},
{
"epoch": 0.44,
"learning_rate": 4.2746443442321885e-05,
"loss": 6.1501,
"step": 47000
},
{
"epoch": 0.44,
"learning_rate": 4.266927794702743e-05,
"loss": 6.142,
"step": 47500
},
{
"epoch": 0.44,
"learning_rate": 4.2592112451732985e-05,
"loss": 6.1523,
"step": 48000
},
{
"epoch": 0.45,
"learning_rate": 4.2514946956438536e-05,
"loss": 6.1383,
"step": 48500
},
{
"epoch": 0.45,
"learning_rate": 4.2437781461144086e-05,
"loss": 6.1358,
"step": 49000
},
{
"epoch": 0.46,
"learning_rate": 4.2360615965849643e-05,
"loss": 6.1378,
"step": 49500
},
{
"epoch": 0.46,
"learning_rate": 4.2283450470555194e-05,
"loss": 6.1415,
"step": 50000
},
{
"epoch": 0.47,
"learning_rate": 4.2206284975260744e-05,
"loss": 6.1407,
"step": 50500
},
{
"epoch": 0.47,
"learning_rate": 4.2129119479966295e-05,
"loss": 6.1405,
"step": 51000
},
{
"epoch": 0.48,
"learning_rate": 4.205195398467185e-05,
"loss": 6.1407,
"step": 51500
},
{
"epoch": 0.48,
"learning_rate": 4.1974788489377396e-05,
"loss": 6.1412,
"step": 52000
},
{
"epoch": 0.49,
"learning_rate": 4.189762299408295e-05,
"loss": 6.1413,
"step": 52500
},
{
"epoch": 0.49,
"learning_rate": 4.18204574987885e-05,
"loss": 6.1347,
"step": 53000
},
{
"epoch": 0.5,
"learning_rate": 4.1743292003494054e-05,
"loss": 6.1364,
"step": 53500
},
{
"epoch": 0.5,
"learning_rate": 4.166612650819961e-05,
"loss": 6.1401,
"step": 54000
},
{
"epoch": 0.5,
"learning_rate": 4.158896101290516e-05,
"loss": 6.1357,
"step": 54500
},
{
"epoch": 0.51,
"learning_rate": 4.151179551761071e-05,
"loss": 6.1317,
"step": 55000
},
{
"epoch": 0.51,
"learning_rate": 4.143463002231626e-05,
"loss": 6.1287,
"step": 55500
},
{
"epoch": 0.52,
"learning_rate": 4.135746452702182e-05,
"loss": 6.1378,
"step": 56000
},
{
"epoch": 0.52,
"learning_rate": 4.128029903172736e-05,
"loss": 6.1365,
"step": 56500
},
{
"epoch": 0.53,
"learning_rate": 4.120313353643292e-05,
"loss": 6.1393,
"step": 57000
},
{
"epoch": 0.53,
"learning_rate": 4.112596804113847e-05,
"loss": 6.1246,
"step": 57500
},
{
"epoch": 0.54,
"learning_rate": 4.104880254584402e-05,
"loss": 6.1184,
"step": 58000
},
{
"epoch": 0.54,
"learning_rate": 4.097163705054958e-05,
"loss": 6.1164,
"step": 58500
},
{
"epoch": 0.55,
"learning_rate": 4.089447155525513e-05,
"loss": 6.1308,
"step": 59000
},
{
"epoch": 0.55,
"learning_rate": 4.081730605996068e-05,
"loss": 6.1179,
"step": 59500
},
{
"epoch": 0.56,
"learning_rate": 4.074014056466623e-05,
"loss": 6.125,
"step": 60000
},
{
"epoch": 0.56,
"learning_rate": 4.066297506937179e-05,
"loss": 6.123,
"step": 60500
},
{
"epoch": 0.56,
"learning_rate": 4.058580957407733e-05,
"loss": 6.1191,
"step": 61000
},
{
"epoch": 0.57,
"learning_rate": 4.050864407878289e-05,
"loss": 6.1176,
"step": 61500
},
{
"epoch": 0.57,
"learning_rate": 4.043147858348844e-05,
"loss": 6.1186,
"step": 62000
},
{
"epoch": 0.58,
"learning_rate": 4.035431308819399e-05,
"loss": 6.1197,
"step": 62500
},
{
"epoch": 0.58,
"learning_rate": 4.027714759289954e-05,
"loss": 6.1212,
"step": 63000
},
{
"epoch": 0.59,
"learning_rate": 4.0199982097605096e-05,
"loss": 6.1219,
"step": 63500
},
{
"epoch": 0.59,
"learning_rate": 4.0122816602310646e-05,
"loss": 6.1169,
"step": 64000
},
{
"epoch": 0.6,
"learning_rate": 4.00456511070162e-05,
"loss": 6.1149,
"step": 64500
},
{
"epoch": 0.6,
"learning_rate": 3.9968485611721754e-05,
"loss": 6.118,
"step": 65000
},
{
"epoch": 0.61,
"learning_rate": 3.98913201164273e-05,
"loss": 6.1218,
"step": 65500
},
{
"epoch": 0.61,
"learning_rate": 3.9814154621132855e-05,
"loss": 6.11,
"step": 66000
},
{
"epoch": 0.62,
"learning_rate": 3.9736989125838405e-05,
"loss": 6.1164,
"step": 66500
},
{
"epoch": 0.62,
"learning_rate": 3.9659823630543956e-05,
"loss": 6.1142,
"step": 67000
},
{
"epoch": 0.63,
"learning_rate": 3.9582658135249506e-05,
"loss": 6.1067,
"step": 67500
},
{
"epoch": 0.63,
"learning_rate": 3.9505492639955063e-05,
"loss": 6.1179,
"step": 68000
},
{
"epoch": 0.63,
"learning_rate": 3.9428327144660614e-05,
"loss": 6.1031,
"step": 68500
},
{
"epoch": 0.64,
"learning_rate": 3.9351161649366164e-05,
"loss": 6.1052,
"step": 69000
},
{
"epoch": 0.64,
"learning_rate": 3.927399615407172e-05,
"loss": 6.1057,
"step": 69500
},
{
"epoch": 0.65,
"learning_rate": 3.9196830658777265e-05,
"loss": 6.1035,
"step": 70000
},
{
"epoch": 0.65,
"learning_rate": 3.911966516348282e-05,
"loss": 6.1028,
"step": 70500
},
{
"epoch": 0.66,
"learning_rate": 3.904249966818837e-05,
"loss": 6.1133,
"step": 71000
},
{
"epoch": 0.66,
"learning_rate": 3.896533417289392e-05,
"loss": 6.1124,
"step": 71500
},
{
"epoch": 0.67,
"learning_rate": 3.8888168677599474e-05,
"loss": 6.1048,
"step": 72000
},
{
"epoch": 0.67,
"learning_rate": 3.881100318230503e-05,
"loss": 6.0969,
"step": 72500
},
{
"epoch": 0.68,
"learning_rate": 3.873383768701058e-05,
"loss": 6.1106,
"step": 73000
},
{
"epoch": 0.68,
"learning_rate": 3.865667219171613e-05,
"loss": 6.095,
"step": 73500
},
{
"epoch": 0.69,
"learning_rate": 3.857950669642169e-05,
"loss": 6.1035,
"step": 74000
},
{
"epoch": 0.69,
"learning_rate": 3.850234120112723e-05,
"loss": 6.1016,
"step": 74500
},
{
"epoch": 0.69,
"learning_rate": 3.842517570583279e-05,
"loss": 6.1119,
"step": 75000
},
{
"epoch": 0.7,
"learning_rate": 3.834801021053834e-05,
"loss": 6.1011,
"step": 75500
},
{
"epoch": 0.7,
"learning_rate": 3.827084471524389e-05,
"loss": 6.097,
"step": 76000
},
{
"epoch": 0.71,
"learning_rate": 3.819367921994944e-05,
"loss": 6.1007,
"step": 76500
},
{
"epoch": 0.71,
"learning_rate": 3.8116513724655e-05,
"loss": 6.0952,
"step": 77000
},
{
"epoch": 0.72,
"learning_rate": 3.803934822936054e-05,
"loss": 6.0097,
"step": 77500
},
{
"epoch": 0.72,
"learning_rate": 3.79621827340661e-05,
"loss": 5.9232,
"step": 78000
},
{
"epoch": 0.73,
"learning_rate": 3.7885017238771656e-05,
"loss": 5.8762,
"step": 78500
},
{
"epoch": 0.73,
"learning_rate": 3.78078517434772e-05,
"loss": 5.8308,
"step": 79000
},
{
"epoch": 0.74,
"learning_rate": 3.773068624818276e-05,
"loss": 5.7992,
"step": 79500
},
{
"epoch": 0.74,
"learning_rate": 3.765352075288831e-05,
"loss": 5.761,
"step": 80000
},
{
"epoch": 0.75,
"learning_rate": 3.757635525759386e-05,
"loss": 5.7269,
"step": 80500
},
{
"epoch": 0.75,
"learning_rate": 3.749918976229941e-05,
"loss": 5.7042,
"step": 81000
},
{
"epoch": 0.75,
"learning_rate": 3.7422024267004966e-05,
"loss": 5.6826,
"step": 81500
},
{
"epoch": 0.76,
"learning_rate": 3.734485877171051e-05,
"loss": 5.6541,
"step": 82000
},
{
"epoch": 0.76,
"learning_rate": 3.7267693276416066e-05,
"loss": 5.6237,
"step": 82500
},
{
"epoch": 0.77,
"learning_rate": 3.719052778112162e-05,
"loss": 5.6091,
"step": 83000
},
{
"epoch": 0.77,
"learning_rate": 3.711336228582717e-05,
"loss": 5.5772,
"step": 83500
},
{
"epoch": 0.78,
"learning_rate": 3.7036196790532724e-05,
"loss": 5.5444,
"step": 84000
},
{
"epoch": 0.78,
"learning_rate": 3.6959031295238275e-05,
"loss": 5.5268,
"step": 84500
},
{
"epoch": 0.79,
"learning_rate": 3.6881865799943825e-05,
"loss": 5.4856,
"step": 85000
},
{
"epoch": 0.79,
"learning_rate": 3.6804700304649376e-05,
"loss": 5.4607,
"step": 85500
},
{
"epoch": 0.8,
"learning_rate": 3.672753480935493e-05,
"loss": 5.4276,
"step": 86000
},
{
"epoch": 0.8,
"learning_rate": 3.6650369314060477e-05,
"loss": 5.4144,
"step": 86500
},
{
"epoch": 0.81,
"learning_rate": 3.6573203818766034e-05,
"loss": 5.3782,
"step": 87000
},
{
"epoch": 0.81,
"learning_rate": 3.6496038323471584e-05,
"loss": 5.3337,
"step": 87500
},
{
"epoch": 0.81,
"learning_rate": 3.6418872828177135e-05,
"loss": 5.2709,
"step": 88000
},
{
"epoch": 0.82,
"learning_rate": 3.634170733288269e-05,
"loss": 5.2095,
"step": 88500
},
{
"epoch": 0.82,
"learning_rate": 3.626454183758824e-05,
"loss": 5.138,
"step": 89000
},
{
"epoch": 0.83,
"learning_rate": 3.618737634229379e-05,
"loss": 4.9959,
"step": 89500
},
{
"epoch": 0.83,
"learning_rate": 3.611021084699934e-05,
"loss": 4.8709,
"step": 90000
},
{
"epoch": 0.84,
"learning_rate": 3.60330453517049e-05,
"loss": 4.7619,
"step": 90500
},
{
"epoch": 0.84,
"learning_rate": 3.5955879856410444e-05,
"loss": 4.6625,
"step": 91000
},
{
"epoch": 0.85,
"learning_rate": 3.5878714361116e-05,
"loss": 4.5585,
"step": 91500
},
{
"epoch": 0.85,
"learning_rate": 3.580154886582155e-05,
"loss": 4.4651,
"step": 92000
},
{
"epoch": 0.86,
"learning_rate": 3.57243833705271e-05,
"loss": 4.3757,
"step": 92500
},
{
"epoch": 0.86,
"learning_rate": 3.564721787523266e-05,
"loss": 4.298,
"step": 93000
},
{
"epoch": 0.87,
"learning_rate": 3.557005237993821e-05,
"loss": 4.2163,
"step": 93500
},
{
"epoch": 0.87,
"learning_rate": 3.549288688464376e-05,
"loss": 4.1503,
"step": 94000
},
{
"epoch": 0.88,
"learning_rate": 3.541572138934931e-05,
"loss": 4.0701,
"step": 94500
},
{
"epoch": 0.88,
"learning_rate": 3.533855589405487e-05,
"loss": 4.0051,
"step": 95000
},
{
"epoch": 0.88,
"learning_rate": 3.526139039876041e-05,
"loss": 3.9148,
"step": 95500
},
{
"epoch": 0.89,
"learning_rate": 3.518422490346597e-05,
"loss": 3.7963,
"step": 96000
},
{
"epoch": 0.89,
"learning_rate": 3.510705940817152e-05,
"loss": 3.6717,
"step": 96500
},
{
"epoch": 0.9,
"learning_rate": 3.502989391287707e-05,
"loss": 3.5825,
"step": 97000
},
{
"epoch": 0.9,
"learning_rate": 3.4952728417582627e-05,
"loss": 3.5186,
"step": 97500
},
{
"epoch": 0.91,
"learning_rate": 3.487556292228818e-05,
"loss": 3.4746,
"step": 98000
},
{
"epoch": 0.91,
"learning_rate": 3.479839742699373e-05,
"loss": 3.3886,
"step": 98500
},
{
"epoch": 0.92,
"learning_rate": 3.472123193169928e-05,
"loss": 3.3322,
"step": 99000
},
{
"epoch": 0.92,
"learning_rate": 3.4644066436404835e-05,
"loss": 3.2667,
"step": 99500
},
{
"epoch": 0.93,
"learning_rate": 3.456690094111038e-05,
"loss": 3.2286,
"step": 100000
},
{
"epoch": 0.93,
"learning_rate": 3.4489735445815936e-05,
"loss": 3.1693,
"step": 100500
},
{
"epoch": 0.94,
"learning_rate": 3.4412569950521486e-05,
"loss": 3.1297,
"step": 101000
},
{
"epoch": 0.94,
"learning_rate": 3.433540445522704e-05,
"loss": 3.095,
"step": 101500
},
{
"epoch": 0.94,
"learning_rate": 3.4258238959932594e-05,
"loss": 3.0555,
"step": 102000
},
{
"epoch": 0.95,
"learning_rate": 3.4181073464638144e-05,
"loss": 3.0215,
"step": 102500
},
{
"epoch": 0.95,
"learning_rate": 3.4103907969343695e-05,
"loss": 2.9936,
"step": 103000
},
{
"epoch": 0.96,
"learning_rate": 3.4026742474049245e-05,
"loss": 2.9553,
"step": 103500
},
{
"epoch": 0.96,
"learning_rate": 3.39495769787548e-05,
"loss": 2.9263,
"step": 104000
},
{
"epoch": 0.97,
"learning_rate": 3.3872411483460346e-05,
"loss": 2.9042,
"step": 104500
},
{
"epoch": 0.97,
"learning_rate": 3.37952459881659e-05,
"loss": 2.8779,
"step": 105000
},
{
"epoch": 0.98,
"learning_rate": 3.3718080492871454e-05,
"loss": 2.8483,
"step": 105500
},
{
"epoch": 0.98,
"learning_rate": 3.3640914997577004e-05,
"loss": 2.8281,
"step": 106000
},
{
"epoch": 0.99,
"learning_rate": 3.3563749502282555e-05,
"loss": 2.8081,
"step": 106500
},
{
"epoch": 0.99,
"learning_rate": 3.348658400698811e-05,
"loss": 2.7802,
"step": 107000
},
{
"epoch": 1.0,
"learning_rate": 3.340941851169366e-05,
"loss": 2.7555,
"step": 107500
},
{
"epoch": 1.0,
"learning_rate": 3.333225301639921e-05,
"loss": 2.7429,
"step": 108000
},
{
"epoch": 1.0,
"learning_rate": 3.325508752110476e-05,
"loss": 2.723,
"step": 108500
},
{
"epoch": 1.01,
"learning_rate": 3.3177922025810313e-05,
"loss": 2.7106,
"step": 109000
},
{
"epoch": 1.01,
"learning_rate": 3.310075653051587e-05,
"loss": 2.6834,
"step": 109500
},
{
"epoch": 1.02,
"learning_rate": 3.302359103522142e-05,
"loss": 2.6625,
"step": 110000
},
{
"epoch": 1.02,
"learning_rate": 3.294642553992697e-05,
"loss": 2.6493,
"step": 110500
},
{
"epoch": 1.03,
"learning_rate": 3.286926004463252e-05,
"loss": 2.6323,
"step": 111000
},
{
"epoch": 1.03,
"learning_rate": 3.279209454933808e-05,
"loss": 2.6196,
"step": 111500
},
{
"epoch": 1.04,
"learning_rate": 3.271492905404363e-05,
"loss": 2.6093,
"step": 112000
},
{
"epoch": 1.04,
"learning_rate": 3.263776355874918e-05,
"loss": 2.5883,
"step": 112500
},
{
"epoch": 1.05,
"learning_rate": 3.256059806345473e-05,
"loss": 2.5722,
"step": 113000
},
{
"epoch": 1.05,
"learning_rate": 3.248343256816028e-05,
"loss": 2.5621,
"step": 113500
},
{
"epoch": 1.06,
"learning_rate": 3.240626707286584e-05,
"loss": 2.5431,
"step": 114000
},
{
"epoch": 1.06,
"learning_rate": 3.232910157757139e-05,
"loss": 2.5357,
"step": 114500
},
{
"epoch": 1.06,
"learning_rate": 3.225193608227694e-05,
"loss": 2.5253,
"step": 115000
},
{
"epoch": 1.07,
"learning_rate": 3.217477058698249e-05,
"loss": 2.5091,
"step": 115500
},
{
"epoch": 1.07,
"learning_rate": 3.2097605091688046e-05,
"loss": 2.5008,
"step": 116000
},
{
"epoch": 1.08,
"learning_rate": 3.20204395963936e-05,
"loss": 2.4897,
"step": 116500
},
{
"epoch": 1.08,
"learning_rate": 3.194327410109915e-05,
"loss": 2.4621,
"step": 117000
},
{
"epoch": 1.09,
"learning_rate": 3.18661086058047e-05,
"loss": 2.4584,
"step": 117500
},
{
"epoch": 1.09,
"learning_rate": 3.178894311051025e-05,
"loss": 2.4491,
"step": 118000
},
{
"epoch": 1.1,
"learning_rate": 3.1711777615215805e-05,
"loss": 2.4336,
"step": 118500
},
{
"epoch": 1.1,
"learning_rate": 3.1634612119921356e-05,
"loss": 2.4147,
"step": 119000
},
{
"epoch": 1.11,
"learning_rate": 3.1557446624626906e-05,
"loss": 2.4093,
"step": 119500
},
{
"epoch": 1.11,
"learning_rate": 3.148028112933246e-05,
"loss": 2.4001,
"step": 120000
},
{
"epoch": 1.12,
"learning_rate": 3.1403115634038014e-05,
"loss": 2.3902,
"step": 120500
},
{
"epoch": 1.12,
"learning_rate": 3.132595013874356e-05,
"loss": 2.3675,
"step": 121000
},
{
"epoch": 1.13,
"learning_rate": 3.1248784643449115e-05,
"loss": 2.3749,
"step": 121500
},
{
"epoch": 1.13,
"learning_rate": 3.1171619148154665e-05,
"loss": 2.3599,
"step": 122000
},
{
"epoch": 1.13,
"learning_rate": 3.1094453652860216e-05,
"loss": 2.3526,
"step": 122500
},
{
"epoch": 1.14,
"learning_rate": 3.101728815756577e-05,
"loss": 2.3341,
"step": 123000
},
{
"epoch": 1.14,
"learning_rate": 3.094012266227132e-05,
"loss": 2.3356,
"step": 123500
},
{
"epoch": 1.15,
"learning_rate": 3.0862957166976874e-05,
"loss": 2.3146,
"step": 124000
},
{
"epoch": 1.15,
"learning_rate": 3.0785791671682424e-05,
"loss": 2.2939,
"step": 124500
},
{
"epoch": 1.16,
"learning_rate": 3.070862617638798e-05,
"loss": 2.2914,
"step": 125000
},
{
"epoch": 1.16,
"learning_rate": 3.0631460681093525e-05,
"loss": 2.2835,
"step": 125500
},
{
"epoch": 1.17,
"learning_rate": 3.055429518579908e-05,
"loss": 2.2782,
"step": 126000
},
{
"epoch": 1.17,
"learning_rate": 3.0477129690504636e-05,
"loss": 2.272,
"step": 126500
},
{
"epoch": 1.18,
"learning_rate": 3.0399964195210183e-05,
"loss": 2.2664,
"step": 127000
},
{
"epoch": 1.18,
"learning_rate": 3.032279869991574e-05,
"loss": 2.2529,
"step": 127500
},
{
"epoch": 1.19,
"learning_rate": 3.0245633204621287e-05,
"loss": 2.2433,
"step": 128000
},
{
"epoch": 1.19,
"learning_rate": 3.016846770932684e-05,
"loss": 2.2377,
"step": 128500
},
{
"epoch": 1.19,
"learning_rate": 3.009130221403239e-05,
"loss": 2.226,
"step": 129000
},
{
"epoch": 1.2,
"learning_rate": 3.0014136718737945e-05,
"loss": 2.2204,
"step": 129500
},
{
"epoch": 1.2,
"learning_rate": 2.9936971223443496e-05,
"loss": 2.2053,
"step": 130000
},
{
"epoch": 1.21,
"learning_rate": 2.985980572814905e-05,
"loss": 2.2028,
"step": 130500
},
{
"epoch": 1.21,
"learning_rate": 2.9782640232854596e-05,
"loss": 2.1984,
"step": 131000
},
{
"epoch": 1.22,
"learning_rate": 2.970547473756015e-05,
"loss": 2.1928,
"step": 131500
},
{
"epoch": 1.22,
"learning_rate": 2.9628309242265707e-05,
"loss": 2.1778,
"step": 132000
},
{
"epoch": 1.23,
"learning_rate": 2.9551143746971255e-05,
"loss": 2.1823,
"step": 132500
},
{
"epoch": 1.23,
"learning_rate": 2.947397825167681e-05,
"loss": 2.1643,
"step": 133000
},
{
"epoch": 1.24,
"learning_rate": 2.939681275638236e-05,
"loss": 2.1642,
"step": 133500
},
{
"epoch": 1.24,
"learning_rate": 2.9319647261087913e-05,
"loss": 2.1537,
"step": 134000
},
{
"epoch": 1.25,
"learning_rate": 2.9242481765793463e-05,
"loss": 2.1488,
"step": 134500
},
{
"epoch": 1.25,
"learning_rate": 2.9165316270499017e-05,
"loss": 2.1471,
"step": 135000
},
{
"epoch": 1.25,
"learning_rate": 2.9088150775204564e-05,
"loss": 2.1342,
"step": 135500
},
{
"epoch": 1.26,
"learning_rate": 2.9010985279910118e-05,
"loss": 2.1308,
"step": 136000
},
{
"epoch": 1.26,
"learning_rate": 2.8933819784615675e-05,
"loss": 2.126,
"step": 136500
},
{
"epoch": 1.27,
"learning_rate": 2.8856654289321222e-05,
"loss": 2.1168,
"step": 137000
},
{
"epoch": 1.27,
"learning_rate": 2.8779488794026776e-05,
"loss": 2.1099,
"step": 137500
},
{
"epoch": 1.28,
"learning_rate": 2.8702323298732326e-05,
"loss": 2.1055,
"step": 138000
},
{
"epoch": 1.28,
"learning_rate": 2.862515780343788e-05,
"loss": 2.1085,
"step": 138500
},
{
"epoch": 1.29,
"learning_rate": 2.8547992308143427e-05,
"loss": 2.0995,
"step": 139000
},
{
"epoch": 1.29,
"learning_rate": 2.8470826812848984e-05,
"loss": 2.0969,
"step": 139500
},
{
"epoch": 1.3,
"learning_rate": 2.839366131755453e-05,
"loss": 2.0776,
"step": 140000
},
{
"epoch": 1.3,
"learning_rate": 2.8316495822260085e-05,
"loss": 2.0799,
"step": 140500
},
{
"epoch": 1.31,
"learning_rate": 2.8239330326965642e-05,
"loss": 2.0776,
"step": 141000
},
{
"epoch": 1.31,
"learning_rate": 2.816216483167119e-05,
"loss": 2.0776,
"step": 141500
},
{
"epoch": 1.31,
"learning_rate": 2.8084999336376743e-05,
"loss": 2.0655,
"step": 142000
},
{
"epoch": 1.32,
"learning_rate": 2.8007833841082293e-05,
"loss": 2.0537,
"step": 142500
},
{
"epoch": 1.32,
"learning_rate": 2.7930668345787847e-05,
"loss": 2.0508,
"step": 143000
},
{
"epoch": 1.33,
"learning_rate": 2.7853502850493394e-05,
"loss": 2.0511,
"step": 143500
},
{
"epoch": 1.33,
"learning_rate": 2.777633735519895e-05,
"loss": 2.04,
"step": 144000
},
{
"epoch": 1.34,
"learning_rate": 2.76991718599045e-05,
"loss": 2.0409,
"step": 144500
},
{
"epoch": 1.34,
"learning_rate": 2.7622006364610052e-05,
"loss": 2.037,
"step": 145000
},
{
"epoch": 1.35,
"learning_rate": 2.7544840869315603e-05,
"loss": 2.0314,
"step": 145500
},
{
"epoch": 1.35,
"learning_rate": 2.7467675374021157e-05,
"loss": 2.0304,
"step": 146000
},
{
"epoch": 1.36,
"learning_rate": 2.739050987872671e-05,
"loss": 2.0209,
"step": 146500
},
{
"epoch": 1.36,
"learning_rate": 2.731334438343226e-05,
"loss": 2.0227,
"step": 147000
},
{
"epoch": 1.37,
"learning_rate": 2.7236178888137815e-05,
"loss": 2.0162,
"step": 147500
},
{
"epoch": 1.37,
"learning_rate": 2.7159013392843362e-05,
"loss": 2.013,
"step": 148000
},
{
"epoch": 1.38,
"learning_rate": 2.708184789754892e-05,
"loss": 2.0051,
"step": 148500
},
{
"epoch": 1.38,
"learning_rate": 2.7004682402254466e-05,
"loss": 1.9949,
"step": 149000
},
{
"epoch": 1.38,
"learning_rate": 2.692751690696002e-05,
"loss": 2.0048,
"step": 149500
},
{
"epoch": 1.39,
"learning_rate": 2.685035141166557e-05,
"loss": 1.9992,
"step": 150000
},
{
"epoch": 1.39,
"learning_rate": 2.6773185916371124e-05,
"loss": 1.9944,
"step": 150500
},
{
"epoch": 1.4,
"learning_rate": 2.6696020421076678e-05,
"loss": 1.9888,
"step": 151000
},
{
"epoch": 1.4,
"learning_rate": 2.6618854925782228e-05,
"loss": 1.9888,
"step": 151500
},
{
"epoch": 1.41,
"learning_rate": 2.6541689430487782e-05,
"loss": 1.9869,
"step": 152000
},
{
"epoch": 1.41,
"learning_rate": 2.646452393519333e-05,
"loss": 1.988,
"step": 152500
},
{
"epoch": 1.42,
"learning_rate": 2.6387358439898886e-05,
"loss": 1.9722,
"step": 153000
},
{
"epoch": 1.42,
"learning_rate": 2.6310192944604433e-05,
"loss": 1.9684,
"step": 153500
},
{
"epoch": 1.43,
"learning_rate": 2.6233027449309987e-05,
"loss": 1.9693,
"step": 154000
},
{
"epoch": 1.43,
"learning_rate": 2.6155861954015538e-05,
"loss": 1.9706,
"step": 154500
},
{
"epoch": 1.44,
"learning_rate": 2.607869645872109e-05,
"loss": 1.9631,
"step": 155000
},
{
"epoch": 1.44,
"learning_rate": 2.6001530963426645e-05,
"loss": 1.9571,
"step": 155500
},
{
"epoch": 1.44,
"learning_rate": 2.5924365468132196e-05,
"loss": 1.9525,
"step": 156000
},
{
"epoch": 1.45,
"learning_rate": 2.584719997283775e-05,
"loss": 2.0859,
"step": 156500
},
{
"epoch": 1.45,
"learning_rate": 2.5770034477543296e-05,
"loss": 1.986,
"step": 157000
},
{
"epoch": 1.46,
"learning_rate": 2.5692868982248854e-05,
"loss": 1.9676,
"step": 157500
},
{
"epoch": 1.46,
"learning_rate": 2.56157034869544e-05,
"loss": 1.9541,
"step": 158000
},
{
"epoch": 1.47,
"learning_rate": 2.5538537991659954e-05,
"loss": 1.9392,
"step": 158500
},
{
"epoch": 1.47,
"learning_rate": 2.5461372496365505e-05,
"loss": 1.9335,
"step": 159000
},
{
"epoch": 1.48,
"learning_rate": 2.538420700107106e-05,
"loss": 1.9308,
"step": 159500
},
{
"epoch": 1.48,
"learning_rate": 2.530704150577661e-05,
"loss": 1.9314,
"step": 160000
},
{
"epoch": 1.49,
"learning_rate": 2.5229876010482163e-05,
"loss": 1.9259,
"step": 160500
},
{
"epoch": 1.49,
"learning_rate": 2.5152710515187717e-05,
"loss": 1.9202,
"step": 161000
},
{
"epoch": 1.5,
"learning_rate": 2.5075545019893264e-05,
"loss": 1.9131,
"step": 161500
},
{
"epoch": 1.5,
"learning_rate": 2.4998379524598818e-05,
"loss": 1.9097,
"step": 162000
},
{
"epoch": 1.5,
"learning_rate": 2.4921214029304368e-05,
"loss": 1.9126,
"step": 162500
},
{
"epoch": 1.51,
"learning_rate": 2.4844048534009922e-05,
"loss": 1.9031,
"step": 163000
},
{
"epoch": 1.51,
"learning_rate": 2.4766883038715476e-05,
"loss": 1.9042,
"step": 163500
},
{
"epoch": 1.52,
"learning_rate": 2.4689717543421026e-05,
"loss": 1.899,
"step": 164000
},
{
"epoch": 1.52,
"learning_rate": 2.4612552048126577e-05,
"loss": 1.8939,
"step": 164500
},
{
"epoch": 1.53,
"learning_rate": 2.453538655283213e-05,
"loss": 1.8871,
"step": 165000
},
{
"epoch": 1.53,
"learning_rate": 2.445822105753768e-05,
"loss": 1.8973,
"step": 165500
},
{
"epoch": 1.54,
"learning_rate": 2.438105556224323e-05,
"loss": 1.8879,
"step": 166000
},
{
"epoch": 1.54,
"learning_rate": 2.4303890066948785e-05,
"loss": 1.8853,
"step": 166500
},
{
"epoch": 1.55,
"learning_rate": 2.4226724571654335e-05,
"loss": 1.8851,
"step": 167000
},
{
"epoch": 1.55,
"learning_rate": 2.414955907635989e-05,
"loss": 1.8871,
"step": 167500
},
{
"epoch": 1.56,
"learning_rate": 2.4072393581065443e-05,
"loss": 1.8749,
"step": 168000
},
{
"epoch": 1.56,
"learning_rate": 2.3995228085770993e-05,
"loss": 1.8756,
"step": 168500
},
{
"epoch": 1.56,
"learning_rate": 2.3918062590476544e-05,
"loss": 1.8784,
"step": 169000
},
{
"epoch": 1.57,
"learning_rate": 2.3840897095182098e-05,
"loss": 1.8693,
"step": 169500
},
{
"epoch": 1.57,
"learning_rate": 2.3763731599887648e-05,
"loss": 1.8639,
"step": 170000
},
{
"epoch": 1.58,
"learning_rate": 2.36865661045932e-05,
"loss": 1.8625,
"step": 170500
},
{
"epoch": 1.58,
"learning_rate": 2.3609400609298752e-05,
"loss": 1.8659,
"step": 171000
},
{
"epoch": 1.59,
"learning_rate": 2.3532235114004303e-05,
"loss": 1.8591,
"step": 171500
},
{
"epoch": 1.59,
"learning_rate": 2.3455069618709853e-05,
"loss": 1.8524,
"step": 172000
},
{
"epoch": 1.6,
"learning_rate": 2.337790412341541e-05,
"loss": 1.8551,
"step": 172500
},
{
"epoch": 1.6,
"learning_rate": 2.330073862812096e-05,
"loss": 1.8547,
"step": 173000
},
{
"epoch": 1.61,
"learning_rate": 2.322357313282651e-05,
"loss": 1.8481,
"step": 173500
},
{
"epoch": 1.61,
"learning_rate": 2.3146407637532065e-05,
"loss": 1.8484,
"step": 174000
},
{
"epoch": 1.62,
"learning_rate": 2.3069242142237616e-05,
"loss": 1.8412,
"step": 174500
},
{
"epoch": 1.62,
"learning_rate": 2.2992076646943166e-05,
"loss": 1.8404,
"step": 175000
},
{
"epoch": 1.63,
"learning_rate": 2.291491115164872e-05,
"loss": 1.8344,
"step": 175500
},
{
"epoch": 1.63,
"learning_rate": 2.283774565635427e-05,
"loss": 1.8417,
"step": 176000
},
{
"epoch": 1.63,
"learning_rate": 2.276058016105982e-05,
"loss": 1.837,
"step": 176500
},
{
"epoch": 1.64,
"learning_rate": 2.2683414665765374e-05,
"loss": 1.8251,
"step": 177000
},
{
"epoch": 1.64,
"learning_rate": 2.2606249170470928e-05,
"loss": 1.825,
"step": 177500
},
{
"epoch": 1.65,
"learning_rate": 2.252908367517648e-05,
"loss": 1.8313,
"step": 178000
},
{
"epoch": 1.65,
"learning_rate": 2.2451918179882032e-05,
"loss": 1.8324,
"step": 178500
},
{
"epoch": 1.66,
"learning_rate": 2.2374752684587583e-05,
"loss": 1.8244,
"step": 179000
},
{
"epoch": 1.66,
"learning_rate": 2.2297587189293133e-05,
"loss": 1.8218,
"step": 179500
},
{
"epoch": 1.67,
"learning_rate": 2.2220421693998687e-05,
"loss": 1.825,
"step": 180000
},
{
"epoch": 1.67,
"learning_rate": 2.2143256198704238e-05,
"loss": 1.8204,
"step": 180500
},
{
"epoch": 1.68,
"learning_rate": 2.2066090703409788e-05,
"loss": 1.8138,
"step": 181000
},
{
"epoch": 1.68,
"learning_rate": 2.1988925208115342e-05,
"loss": 1.8059,
"step": 181500
},
{
"epoch": 1.69,
"learning_rate": 2.1911759712820896e-05,
"loss": 1.8126,
"step": 182000
},
{
"epoch": 1.69,
"learning_rate": 2.1834594217526446e-05,
"loss": 1.8102,
"step": 182500
},
{
"epoch": 1.69,
"learning_rate": 2.1757428722232e-05,
"loss": 1.8102,
"step": 183000
},
{
"epoch": 1.7,
"learning_rate": 2.168026322693755e-05,
"loss": 1.8105,
"step": 183500
},
{
"epoch": 1.7,
"learning_rate": 2.16030977316431e-05,
"loss": 1.8027,
"step": 184000
},
{
"epoch": 1.71,
"learning_rate": 2.1525932236348654e-05,
"loss": 1.8073,
"step": 184500
},
{
"epoch": 1.71,
"learning_rate": 2.1448766741054205e-05,
"loss": 1.8022,
"step": 185000
},
{
"epoch": 1.72,
"learning_rate": 2.1371601245759755e-05,
"loss": 1.7926,
"step": 185500
},
{
"epoch": 1.72,
"learning_rate": 2.129443575046531e-05,
"loss": 1.7932,
"step": 186000
},
{
"epoch": 1.73,
"learning_rate": 2.121727025517086e-05,
"loss": 1.7964,
"step": 186500
},
{
"epoch": 1.73,
"learning_rate": 2.1140104759876413e-05,
"loss": 1.7932,
"step": 187000
},
{
"epoch": 1.74,
"learning_rate": 2.1062939264581967e-05,
"loss": 1.7926,
"step": 187500
},
{
"epoch": 1.74,
"learning_rate": 2.0985773769287518e-05,
"loss": 1.7899,
"step": 188000
},
{
"epoch": 1.75,
"learning_rate": 2.0908608273993068e-05,
"loss": 1.7837,
"step": 188500
},
{
"epoch": 1.75,
"learning_rate": 2.0831442778698622e-05,
"loss": 1.7836,
"step": 189000
},
{
"epoch": 1.75,
"learning_rate": 2.0754277283404172e-05,
"loss": 1.7863,
"step": 189500
},
{
"epoch": 1.76,
"learning_rate": 2.0677111788109723e-05,
"loss": 1.7759,
"step": 190000
},
{
"epoch": 1.76,
"learning_rate": 2.0599946292815277e-05,
"loss": 1.7749,
"step": 190500
},
{
"epoch": 1.77,
"learning_rate": 2.0522780797520827e-05,
"loss": 1.7745,
"step": 191000
},
{
"epoch": 1.77,
"learning_rate": 2.0445615302226377e-05,
"loss": 1.7678,
"step": 191500
},
{
"epoch": 1.78,
"learning_rate": 2.0368449806931935e-05,
"loss": 1.778,
"step": 192000
},
{
"epoch": 1.78,
"learning_rate": 2.0291284311637485e-05,
"loss": 1.7771,
"step": 192500
},
{
"epoch": 1.79,
"learning_rate": 2.0214118816343035e-05,
"loss": 1.7682,
"step": 193000
},
{
"epoch": 1.79,
"learning_rate": 2.013695332104859e-05,
"loss": 1.7679,
"step": 193500
},
{
"epoch": 1.8,
"learning_rate": 2.005978782575414e-05,
"loss": 1.7712,
"step": 194000
},
{
"epoch": 1.8,
"learning_rate": 1.998262233045969e-05,
"loss": 1.7615,
"step": 194500
},
{
"epoch": 1.81,
"learning_rate": 1.9905456835165244e-05,
"loss": 1.7664,
"step": 195000
},
{
"epoch": 1.81,
"learning_rate": 1.9828291339870794e-05,
"loss": 1.7618,
"step": 195500
},
{
"epoch": 1.81,
"learning_rate": 1.9751125844576345e-05,
"loss": 1.7604,
"step": 196000
},
{
"epoch": 1.82,
"learning_rate": 1.96739603492819e-05,
"loss": 1.754,
"step": 196500
},
{
"epoch": 1.82,
"learning_rate": 1.9596794853987452e-05,
"loss": 1.7563,
"step": 197000
},
{
"epoch": 1.83,
"learning_rate": 1.9519629358693003e-05,
"loss": 1.7573,
"step": 197500
},
{
"epoch": 1.83,
"learning_rate": 1.9442463863398557e-05,
"loss": 1.7473,
"step": 198000
},
{
"epoch": 1.84,
"learning_rate": 1.9365298368104107e-05,
"loss": 1.743,
"step": 198500
},
{
"epoch": 1.84,
"learning_rate": 1.9288132872809657e-05,
"loss": 1.7482,
"step": 199000
},
{
"epoch": 1.85,
"learning_rate": 1.921096737751521e-05,
"loss": 1.7395,
"step": 199500
},
{
"epoch": 1.85,
"learning_rate": 1.9133801882220762e-05,
"loss": 1.7428,
"step": 200000
},
{
"epoch": 1.86,
"learning_rate": 1.9056636386926312e-05,
"loss": 1.7448,
"step": 200500
},
{
"epoch": 1.86,
"learning_rate": 1.8979470891631866e-05,
"loss": 1.7461,
"step": 201000
},
{
"epoch": 1.87,
"learning_rate": 1.890230539633742e-05,
"loss": 1.7332,
"step": 201500
},
{
"epoch": 1.87,
"learning_rate": 1.882513990104297e-05,
"loss": 1.7409,
"step": 202000
},
{
"epoch": 1.88,
"learning_rate": 1.8747974405748524e-05,
"loss": 1.7361,
"step": 202500
},
{
"epoch": 1.88,
"learning_rate": 1.8670808910454074e-05,
"loss": 1.7425,
"step": 203000
},
{
"epoch": 1.88,
"learning_rate": 1.8593643415159625e-05,
"loss": 1.7305,
"step": 203500
},
{
"epoch": 1.89,
"learning_rate": 1.851647791986518e-05,
"loss": 1.7352,
"step": 204000
},
{
"epoch": 1.89,
"learning_rate": 1.843931242457073e-05,
"loss": 1.7351,
"step": 204500
},
{
"epoch": 1.9,
"learning_rate": 1.836214692927628e-05,
"loss": 1.7237,
"step": 205000
},
{
"epoch": 1.9,
"learning_rate": 1.8284981433981833e-05,
"loss": 1.729,
"step": 205500
},
{
"epoch": 1.91,
"learning_rate": 1.8207815938687384e-05,
"loss": 1.7275,
"step": 206000
},
{
"epoch": 1.91,
"learning_rate": 1.8130650443392938e-05,
"loss": 1.725,
"step": 206500
},
{
"epoch": 1.92,
"learning_rate": 1.805348494809849e-05,
"loss": 1.7216,
"step": 207000
},
{
"epoch": 1.92,
"learning_rate": 1.7976319452804042e-05,
"loss": 1.7218,
"step": 207500
},
{
"epoch": 1.93,
"learning_rate": 1.7899153957509592e-05,
"loss": 1.7187,
"step": 208000
},
{
"epoch": 1.93,
"learning_rate": 1.7821988462215146e-05,
"loss": 1.7173,
"step": 208500
},
{
"epoch": 1.94,
"learning_rate": 1.7744822966920696e-05,
"loss": 1.7228,
"step": 209000
},
{
"epoch": 1.94,
"learning_rate": 1.7667657471626247e-05,
"loss": 1.7113,
"step": 209500
},
{
"epoch": 1.94,
"learning_rate": 1.75904919763318e-05,
"loss": 1.7072,
"step": 210000
},
{
"epoch": 1.95,
"learning_rate": 1.751332648103735e-05,
"loss": 1.7174,
"step": 210500
},
{
"epoch": 1.95,
"learning_rate": 1.74361609857429e-05,
"loss": 1.707,
"step": 211000
},
{
"epoch": 1.96,
"learning_rate": 1.7358995490448455e-05,
"loss": 1.7147,
"step": 211500
},
{
"epoch": 1.96,
"learning_rate": 1.728182999515401e-05,
"loss": 1.7156,
"step": 212000
},
{
"epoch": 1.97,
"learning_rate": 1.720466449985956e-05,
"loss": 1.7024,
"step": 212500
},
{
"epoch": 1.97,
"learning_rate": 1.7127499004565113e-05,
"loss": 1.7038,
"step": 213000
},
{
"epoch": 1.98,
"learning_rate": 1.7050333509270664e-05,
"loss": 1.7006,
"step": 213500
},
{
"epoch": 1.98,
"learning_rate": 1.6973168013976214e-05,
"loss": 1.6952,
"step": 214000
},
{
"epoch": 1.99,
"learning_rate": 1.6896002518681768e-05,
"loss": 1.7124,
"step": 214500
},
{
"epoch": 1.99,
"learning_rate": 1.681883702338732e-05,
"loss": 1.7015,
"step": 215000
},
{
"epoch": 2.0,
"learning_rate": 1.674167152809287e-05,
"loss": 1.6964,
"step": 215500
},
{
"epoch": 2.0,
"learning_rate": 1.6664506032798423e-05,
"loss": 1.6972,
"step": 216000
},
{
"epoch": 2.0,
"learning_rate": 1.6587340537503977e-05,
"loss": 1.6966,
"step": 216500
},
{
"epoch": 2.01,
"learning_rate": 1.6510175042209527e-05,
"loss": 1.6976,
"step": 217000
},
{
"epoch": 2.01,
"learning_rate": 1.643300954691508e-05,
"loss": 1.6848,
"step": 217500
},
{
"epoch": 2.02,
"learning_rate": 1.635584405162063e-05,
"loss": 1.6875,
"step": 218000
},
{
"epoch": 2.02,
"learning_rate": 1.627867855632618e-05,
"loss": 1.6929,
"step": 218500
},
{
"epoch": 2.03,
"learning_rate": 1.6201513061031735e-05,
"loss": 1.6858,
"step": 219000
},
{
"epoch": 2.03,
"learning_rate": 1.6124347565737286e-05,
"loss": 1.6863,
"step": 219500
},
{
"epoch": 2.04,
"learning_rate": 1.6047182070442836e-05,
"loss": 1.6858,
"step": 220000
},
{
"epoch": 2.04,
"learning_rate": 1.597001657514839e-05,
"loss": 1.6841,
"step": 220500
},
{
"epoch": 2.05,
"learning_rate": 1.5892851079853944e-05,
"loss": 1.6815,
"step": 221000
},
{
"epoch": 2.05,
"learning_rate": 1.5815685584559494e-05,
"loss": 1.6829,
"step": 221500
},
{
"epoch": 2.06,
"learning_rate": 1.5738520089265045e-05,
"loss": 1.6897,
"step": 222000
},
{
"epoch": 2.06,
"learning_rate": 1.56613545939706e-05,
"loss": 1.6817,
"step": 222500
},
{
"epoch": 2.06,
"learning_rate": 1.558418909867615e-05,
"loss": 1.6822,
"step": 223000
},
{
"epoch": 2.07,
"learning_rate": 1.5507023603381703e-05,
"loss": 1.6784,
"step": 223500
},
{
"epoch": 2.07,
"learning_rate": 1.5429858108087253e-05,
"loss": 1.6743,
"step": 224000
},
{
"epoch": 2.08,
"learning_rate": 1.5352692612792804e-05,
"loss": 1.6865,
"step": 224500
},
{
"epoch": 2.08,
"learning_rate": 1.5275527117498357e-05,
"loss": 1.671,
"step": 225000
},
{
"epoch": 2.09,
"learning_rate": 1.5198361622203908e-05,
"loss": 1.671,
"step": 225500
},
{
"epoch": 2.09,
"learning_rate": 1.5121196126909462e-05,
"loss": 1.6757,
"step": 226000
},
{
"epoch": 2.1,
"learning_rate": 1.5044030631615014e-05,
"loss": 1.6741,
"step": 226500
},
{
"epoch": 2.1,
"learning_rate": 1.4966865136320566e-05,
"loss": 1.6719,
"step": 227000
},
{
"epoch": 2.11,
"learning_rate": 1.4889699641026116e-05,
"loss": 1.671,
"step": 227500
},
{
"epoch": 2.11,
"learning_rate": 1.4812534145731668e-05,
"loss": 1.6658,
"step": 228000
},
{
"epoch": 2.12,
"learning_rate": 1.473536865043722e-05,
"loss": 1.6649,
"step": 228500
},
{
"epoch": 2.12,
"learning_rate": 1.4658203155142771e-05,
"loss": 1.6693,
"step": 229000
},
{
"epoch": 2.13,
"learning_rate": 1.4581037659848323e-05,
"loss": 1.6604,
"step": 229500
},
{
"epoch": 2.13,
"learning_rate": 1.4503872164553875e-05,
"loss": 1.6655,
"step": 230000
},
{
"epoch": 2.13,
"learning_rate": 1.4426706669259429e-05,
"loss": 1.6564,
"step": 230500
},
{
"epoch": 2.14,
"learning_rate": 1.4349541173964981e-05,
"loss": 1.6664,
"step": 231000
},
{
"epoch": 2.14,
"learning_rate": 1.4272375678670533e-05,
"loss": 1.6537,
"step": 231500
},
{
"epoch": 2.15,
"learning_rate": 1.4195210183376084e-05,
"loss": 1.6616,
"step": 232000
},
{
"epoch": 2.15,
"learning_rate": 1.4118044688081636e-05,
"loss": 1.6579,
"step": 232500
},
{
"epoch": 2.16,
"learning_rate": 1.4040879192787188e-05,
"loss": 1.6617,
"step": 233000
},
{
"epoch": 2.16,
"learning_rate": 1.3963713697492738e-05,
"loss": 1.652,
"step": 233500
},
{
"epoch": 2.17,
"learning_rate": 1.388654820219829e-05,
"loss": 1.6544,
"step": 234000
},
{
"epoch": 2.17,
"learning_rate": 1.3809382706903843e-05,
"loss": 1.6504,
"step": 234500
},
{
"epoch": 2.18,
"learning_rate": 1.3732217211609393e-05,
"loss": 1.6574,
"step": 235000
},
{
"epoch": 2.18,
"learning_rate": 1.3655051716314949e-05,
"loss": 1.6495,
"step": 235500
},
{
"epoch": 2.19,
"learning_rate": 1.35778862210205e-05,
"loss": 1.6519,
"step": 236000
},
{
"epoch": 2.19,
"learning_rate": 1.3500720725726051e-05,
"loss": 1.6521,
"step": 236500
},
{
"epoch": 2.19,
"learning_rate": 1.3423555230431603e-05,
"loss": 1.6535,
"step": 237000
},
{
"epoch": 2.2,
"learning_rate": 1.3346389735137155e-05,
"loss": 1.6527,
"step": 237500
},
{
"epoch": 2.2,
"learning_rate": 1.3269224239842706e-05,
"loss": 1.6459,
"step": 238000
},
{
"epoch": 2.21,
"learning_rate": 1.3192058744548258e-05,
"loss": 1.6444,
"step": 238500
},
{
"epoch": 2.21,
"learning_rate": 1.311489324925381e-05,
"loss": 1.6443,
"step": 239000
},
{
"epoch": 2.22,
"learning_rate": 1.303772775395936e-05,
"loss": 1.6448,
"step": 239500
},
{
"epoch": 2.22,
"learning_rate": 1.2960562258664913e-05,
"loss": 1.6469,
"step": 240000
},
{
"epoch": 2.23,
"learning_rate": 1.2883396763370466e-05,
"loss": 1.6368,
"step": 240500
},
{
"epoch": 2.23,
"learning_rate": 1.2806231268076018e-05,
"loss": 1.6462,
"step": 241000
},
{
"epoch": 2.24,
"learning_rate": 1.272906577278157e-05,
"loss": 1.6395,
"step": 241500
},
{
"epoch": 2.24,
"learning_rate": 1.2651900277487123e-05,
"loss": 1.6396,
"step": 242000
},
{
"epoch": 2.25,
"learning_rate": 1.2574734782192673e-05,
"loss": 1.6442,
"step": 242500
},
{
"epoch": 2.25,
"learning_rate": 1.2497569286898225e-05,
"loss": 1.6417,
"step": 243000
},
{
"epoch": 2.25,
"learning_rate": 1.2420403791603777e-05,
"loss": 1.6377,
"step": 243500
},
{
"epoch": 2.26,
"learning_rate": 1.234323829630933e-05,
"loss": 1.6415,
"step": 244000
},
{
"epoch": 2.26,
"learning_rate": 1.2266072801014882e-05,
"loss": 1.6313,
"step": 244500
},
{
"epoch": 2.27,
"learning_rate": 1.2188907305720434e-05,
"loss": 1.6325,
"step": 245000
},
{
"epoch": 2.27,
"learning_rate": 1.2111741810425984e-05,
"loss": 1.6376,
"step": 245500
},
{
"epoch": 2.28,
"learning_rate": 1.2034576315131536e-05,
"loss": 1.6412,
"step": 246000
},
{
"epoch": 2.28,
"learning_rate": 1.195741081983709e-05,
"loss": 1.6296,
"step": 246500
},
{
"epoch": 2.29,
"learning_rate": 1.188024532454264e-05,
"loss": 1.6288,
"step": 247000
},
{
"epoch": 2.29,
"learning_rate": 1.1803079829248193e-05,
"loss": 1.6349,
"step": 247500
},
{
"epoch": 2.3,
"learning_rate": 1.1725914333953745e-05,
"loss": 1.6317,
"step": 248000
},
{
"epoch": 2.3,
"learning_rate": 1.1648748838659295e-05,
"loss": 1.6306,
"step": 248500
},
{
"epoch": 2.31,
"learning_rate": 1.1571583343364849e-05,
"loss": 1.6322,
"step": 249000
},
{
"epoch": 2.31,
"learning_rate": 1.1494417848070401e-05,
"loss": 1.6334,
"step": 249500
},
{
"epoch": 2.31,
"learning_rate": 1.1417252352775952e-05,
"loss": 1.6261,
"step": 250000
},
{
"epoch": 2.32,
"learning_rate": 1.1340086857481504e-05,
"loss": 1.6245,
"step": 250500
},
{
"epoch": 2.32,
"learning_rate": 1.1262921362187056e-05,
"loss": 1.6254,
"step": 251000
},
{
"epoch": 2.33,
"learning_rate": 1.1185755866892608e-05,
"loss": 1.6244,
"step": 251500
},
{
"epoch": 2.33,
"learning_rate": 1.110859037159816e-05,
"loss": 1.6203,
"step": 252000
},
{
"epoch": 2.34,
"learning_rate": 1.1031424876303712e-05,
"loss": 1.6255,
"step": 252500
},
{
"epoch": 2.34,
"learning_rate": 1.0954259381009263e-05,
"loss": 1.6187,
"step": 253000
},
{
"epoch": 2.35,
"learning_rate": 1.0877093885714815e-05,
"loss": 1.6242,
"step": 253500
},
{
"epoch": 2.35,
"learning_rate": 1.0799928390420368e-05,
"loss": 1.6218,
"step": 254000
},
{
"epoch": 2.36,
"learning_rate": 1.0722762895125919e-05,
"loss": 1.6202,
"step": 254500
},
{
"epoch": 2.36,
"learning_rate": 1.0645597399831471e-05,
"loss": 1.6138,
"step": 255000
},
{
"epoch": 2.37,
"learning_rate": 1.0568431904537023e-05,
"loss": 1.6146,
"step": 255500
},
{
"epoch": 2.37,
"learning_rate": 1.0491266409242575e-05,
"loss": 1.608,
"step": 256000
},
{
"epoch": 2.38,
"learning_rate": 1.0414100913948127e-05,
"loss": 1.62,
"step": 256500
},
{
"epoch": 2.38,
"learning_rate": 1.033693541865368e-05,
"loss": 1.6238,
"step": 257000
},
{
"epoch": 2.38,
"learning_rate": 1.025976992335923e-05,
"loss": 1.6198,
"step": 257500
},
{
"epoch": 2.39,
"learning_rate": 1.0182604428064782e-05,
"loss": 1.6165,
"step": 258000
},
{
"epoch": 2.39,
"learning_rate": 1.0105438932770334e-05,
"loss": 1.615,
"step": 258500
},
{
"epoch": 2.4,
"learning_rate": 1.0028273437475886e-05,
"loss": 1.6119,
"step": 259000
},
{
"epoch": 2.4,
"learning_rate": 9.951107942181438e-06,
"loss": 1.6029,
"step": 259500
},
{
"epoch": 2.41,
"learning_rate": 9.87394244688699e-06,
"loss": 1.6108,
"step": 260000
},
{
"epoch": 2.41,
"learning_rate": 9.796776951592541e-06,
"loss": 1.6152,
"step": 260500
},
{
"epoch": 2.42,
"learning_rate": 9.719611456298095e-06,
"loss": 1.6118,
"step": 261000
},
{
"epoch": 2.42,
"learning_rate": 9.642445961003647e-06,
"loss": 1.6092,
"step": 261500
},
{
"epoch": 2.43,
"learning_rate": 9.565280465709197e-06,
"loss": 1.6072,
"step": 262000
},
{
"epoch": 2.43,
"learning_rate": 9.48811497041475e-06,
"loss": 1.6117,
"step": 262500
},
{
"epoch": 2.44,
"learning_rate": 9.410949475120302e-06,
"loss": 1.6077,
"step": 263000
},
{
"epoch": 2.44,
"learning_rate": 9.333783979825854e-06,
"loss": 1.6082,
"step": 263500
},
{
"epoch": 2.44,
"learning_rate": 9.256618484531406e-06,
"loss": 1.6054,
"step": 264000
},
{
"epoch": 2.45,
"learning_rate": 9.179452989236958e-06,
"loss": 1.6012,
"step": 264500
},
{
"epoch": 2.45,
"learning_rate": 9.102287493942508e-06,
"loss": 1.6091,
"step": 265000
},
{
"epoch": 2.46,
"learning_rate": 9.02512199864806e-06,
"loss": 1.6026,
"step": 265500
},
{
"epoch": 2.46,
"learning_rate": 8.947956503353613e-06,
"loss": 1.6038,
"step": 266000
},
{
"epoch": 2.47,
"learning_rate": 8.870791008059165e-06,
"loss": 1.5986,
"step": 266500
},
{
"epoch": 2.47,
"learning_rate": 8.793625512764717e-06,
"loss": 1.5989,
"step": 267000
},
{
"epoch": 2.48,
"learning_rate": 8.716460017470269e-06,
"loss": 1.6037,
"step": 267500
},
{
"epoch": 2.48,
"learning_rate": 8.63929452217582e-06,
"loss": 1.5999,
"step": 268000
},
{
"epoch": 2.49,
"learning_rate": 8.562129026881373e-06,
"loss": 1.5942,
"step": 268500
},
{
"epoch": 2.49,
"learning_rate": 8.484963531586925e-06,
"loss": 1.6006,
"step": 269000
},
{
"epoch": 2.5,
"learning_rate": 8.407798036292476e-06,
"loss": 1.5911,
"step": 269500
},
{
"epoch": 2.5,
"learning_rate": 8.330632540998028e-06,
"loss": 1.5997,
"step": 270000
},
{
"epoch": 2.5,
"learning_rate": 8.25346704570358e-06,
"loss": 1.5983,
"step": 270500
},
{
"epoch": 2.51,
"learning_rate": 8.176301550409132e-06,
"loss": 1.5966,
"step": 271000
},
{
"epoch": 2.51,
"learning_rate": 8.099136055114684e-06,
"loss": 1.5899,
"step": 271500
},
{
"epoch": 2.52,
"learning_rate": 8.021970559820236e-06,
"loss": 1.599,
"step": 272000
},
{
"epoch": 2.52,
"learning_rate": 7.944805064525787e-06,
"loss": 1.5894,
"step": 272500
},
{
"epoch": 2.53,
"learning_rate": 7.86763956923134e-06,
"loss": 1.5969,
"step": 273000
},
{
"epoch": 2.53,
"learning_rate": 7.790474073936891e-06,
"loss": 1.5976,
"step": 273500
},
{
"epoch": 2.54,
"learning_rate": 7.713308578642443e-06,
"loss": 1.5907,
"step": 274000
},
{
"epoch": 2.54,
"learning_rate": 7.636143083347995e-06,
"loss": 1.5812,
"step": 274500
},
{
"epoch": 2.55,
"learning_rate": 7.5589775880535464e-06,
"loss": 1.5889,
"step": 275000
},
{
"epoch": 2.55,
"learning_rate": 7.481812092759099e-06,
"loss": 1.5857,
"step": 275500
},
{
"epoch": 2.56,
"learning_rate": 7.4046465974646515e-06,
"loss": 1.5849,
"step": 276000
},
{
"epoch": 2.56,
"learning_rate": 7.327481102170203e-06,
"loss": 1.5893,
"step": 276500
},
{
"epoch": 2.56,
"learning_rate": 7.250315606875754e-06,
"loss": 1.5901,
"step": 277000
},
{
"epoch": 2.57,
"learning_rate": 7.173150111581306e-06,
"loss": 1.5812,
"step": 277500
},
{
"epoch": 2.57,
"learning_rate": 7.095984616286859e-06,
"loss": 1.5969,
"step": 278000
},
{
"epoch": 2.58,
"learning_rate": 7.01881912099241e-06,
"loss": 1.5894,
"step": 278500
},
{
"epoch": 2.58,
"learning_rate": 6.9416536256979625e-06,
"loss": 1.5812,
"step": 279000
},
{
"epoch": 2.59,
"learning_rate": 6.864488130403514e-06,
"loss": 1.5908,
"step": 279500
},
{
"epoch": 2.59,
"learning_rate": 6.787322635109065e-06,
"loss": 1.5869,
"step": 280000
},
{
"epoch": 2.6,
"learning_rate": 6.710157139814618e-06,
"loss": 1.5863,
"step": 280500
},
{
"epoch": 2.6,
"learning_rate": 6.63299164452017e-06,
"loss": 1.5842,
"step": 281000
},
{
"epoch": 2.61,
"learning_rate": 6.5558261492257214e-06,
"loss": 1.5794,
"step": 281500
},
{
"epoch": 2.61,
"learning_rate": 6.4786606539312736e-06,
"loss": 1.5896,
"step": 282000
},
{
"epoch": 2.62,
"learning_rate": 6.401495158636825e-06,
"loss": 1.5837,
"step": 282500
},
{
"epoch": 2.62,
"learning_rate": 6.324329663342378e-06,
"loss": 1.579,
"step": 283000
},
{
"epoch": 2.63,
"learning_rate": 6.24716416804793e-06,
"loss": 1.5811,
"step": 283500
},
{
"epoch": 2.63,
"learning_rate": 6.169998672753481e-06,
"loss": 1.5833,
"step": 284000
},
{
"epoch": 2.63,
"learning_rate": 6.092833177459033e-06,
"loss": 1.5814,
"step": 284500
},
{
"epoch": 2.64,
"learning_rate": 6.015667682164585e-06,
"loss": 1.5826,
"step": 285000
},
{
"epoch": 2.64,
"learning_rate": 5.938502186870137e-06,
"loss": 1.5786,
"step": 285500
},
{
"epoch": 2.65,
"learning_rate": 5.861336691575689e-06,
"loss": 1.575,
"step": 286000
},
{
"epoch": 2.65,
"learning_rate": 5.784171196281241e-06,
"loss": 1.5753,
"step": 286500
},
{
"epoch": 2.66,
"learning_rate": 5.707005700986793e-06,
"loss": 1.5788,
"step": 287000
},
{
"epoch": 2.66,
"learning_rate": 5.629840205692344e-06,
"loss": 1.5777,
"step": 287500
},
{
"epoch": 2.67,
"learning_rate": 5.5526747103978964e-06,
"loss": 1.5719,
"step": 288000
},
{
"epoch": 2.67,
"learning_rate": 5.4755092151034486e-06,
"loss": 1.5772,
"step": 288500
},
{
"epoch": 2.68,
"learning_rate": 5.398343719809e-06,
"loss": 1.5733,
"step": 289000
},
{
"epoch": 2.68,
"learning_rate": 5.321178224514552e-06,
"loss": 1.5791,
"step": 289500
},
{
"epoch": 2.69,
"learning_rate": 5.244012729220104e-06,
"loss": 1.5711,
"step": 290000
},
{
"epoch": 2.69,
"learning_rate": 5.166847233925655e-06,
"loss": 1.5727,
"step": 290500
},
{
"epoch": 2.69,
"learning_rate": 5.089681738631208e-06,
"loss": 1.57,
"step": 291000
},
{
"epoch": 2.7,
"learning_rate": 5.0125162433367596e-06,
"loss": 1.5743,
"step": 291500
},
{
"epoch": 2.7,
"learning_rate": 4.935350748042312e-06,
"loss": 1.5749,
"step": 292000
},
{
"epoch": 2.71,
"learning_rate": 4.858185252747864e-06,
"loss": 1.5697,
"step": 292500
},
{
"epoch": 2.71,
"learning_rate": 4.781019757453416e-06,
"loss": 1.573,
"step": 293000
},
{
"epoch": 2.72,
"learning_rate": 4.703854262158967e-06,
"loss": 1.5754,
"step": 293500
},
{
"epoch": 2.72,
"learning_rate": 4.626688766864519e-06,
"loss": 1.5677,
"step": 294000
},
{
"epoch": 2.73,
"learning_rate": 4.5495232715700714e-06,
"loss": 1.5689,
"step": 294500
},
{
"epoch": 2.73,
"learning_rate": 4.472357776275623e-06,
"loss": 1.5688,
"step": 295000
},
{
"epoch": 2.74,
"learning_rate": 4.395192280981175e-06,
"loss": 1.5759,
"step": 295500
},
{
"epoch": 2.74,
"learning_rate": 4.318026785686727e-06,
"loss": 1.5662,
"step": 296000
},
{
"epoch": 2.75,
"learning_rate": 4.240861290392278e-06,
"loss": 1.5725,
"step": 296500
},
{
"epoch": 2.75,
"learning_rate": 4.16369579509783e-06,
"loss": 1.5712,
"step": 297000
},
{
"epoch": 2.75,
"learning_rate": 4.0865302998033825e-06,
"loss": 1.5632,
"step": 297500
},
{
"epoch": 2.76,
"learning_rate": 4.0093648045089346e-06,
"loss": 1.5631,
"step": 298000
},
{
"epoch": 2.76,
"learning_rate": 3.932199309214486e-06,
"loss": 1.5639,
"step": 298500
},
{
"epoch": 2.77,
"learning_rate": 3.855033813920038e-06,
"loss": 1.5672,
"step": 299000
},
{
"epoch": 2.77,
"learning_rate": 3.77786831862559e-06,
"loss": 1.5691,
"step": 299500
},
{
"epoch": 2.78,
"learning_rate": 3.7007028233311418e-06,
"loss": 1.5654,
"step": 300000
},
{
"epoch": 2.78,
"learning_rate": 3.623537328036694e-06,
"loss": 1.559,
"step": 300500
},
{
"epoch": 2.79,
"learning_rate": 3.5463718327422456e-06,
"loss": 1.5709,
"step": 301000
},
{
"epoch": 2.79,
"learning_rate": 3.469206337447798e-06,
"loss": 1.5669,
"step": 301500
},
{
"epoch": 2.8,
"learning_rate": 3.39204084215335e-06,
"loss": 1.5635,
"step": 302000
},
{
"epoch": 2.8,
"learning_rate": 3.314875346858901e-06,
"loss": 1.5659,
"step": 302500
},
{
"epoch": 2.81,
"learning_rate": 3.2377098515644536e-06,
"loss": 1.5623,
"step": 303000
},
{
"epoch": 2.81,
"learning_rate": 3.1605443562700053e-06,
"loss": 1.5638,
"step": 303500
},
{
"epoch": 2.81,
"learning_rate": 3.083378860975557e-06,
"loss": 1.5644,
"step": 304000
},
{
"epoch": 2.82,
"learning_rate": 3.006213365681109e-06,
"loss": 1.5593,
"step": 304500
},
{
"epoch": 2.82,
"learning_rate": 2.929047870386661e-06,
"loss": 1.5592,
"step": 305000
},
{
"epoch": 2.83,
"learning_rate": 2.851882375092213e-06,
"loss": 1.5571,
"step": 305500
},
{
"epoch": 2.83,
"learning_rate": 2.774716879797765e-06,
"loss": 1.5574,
"step": 306000
},
{
"epoch": 2.84,
"learning_rate": 2.6975513845033168e-06,
"loss": 1.557,
"step": 306500
},
{
"epoch": 2.84,
"learning_rate": 2.6203858892088685e-06,
"loss": 1.5555,
"step": 307000
},
{
"epoch": 2.85,
"learning_rate": 2.5432203939144206e-06,
"loss": 1.5559,
"step": 307500
},
{
"epoch": 2.85,
"learning_rate": 2.4660548986199723e-06,
"loss": 1.562,
"step": 308000
},
{
"epoch": 2.86,
"learning_rate": 2.3888894033255244e-06,
"loss": 1.5609,
"step": 308500
},
{
"epoch": 2.86,
"learning_rate": 2.311723908031076e-06,
"loss": 1.5584,
"step": 309000
},
{
"epoch": 2.87,
"learning_rate": 2.234558412736628e-06,
"loss": 1.5618,
"step": 309500
},
{
"epoch": 2.87,
"learning_rate": 2.15739291744218e-06,
"loss": 1.5539,
"step": 310000
},
{
"epoch": 2.88,
"learning_rate": 2.080227422147732e-06,
"loss": 1.5567,
"step": 310500
},
{
"epoch": 2.88,
"learning_rate": 2.0030619268532837e-06,
"loss": 1.5585,
"step": 311000
},
{
"epoch": 2.88,
"learning_rate": 1.925896431558836e-06,
"loss": 1.5562,
"step": 311500
},
{
"epoch": 2.89,
"learning_rate": 1.8487309362643875e-06,
"loss": 1.5575,
"step": 312000
},
{
"epoch": 2.89,
"learning_rate": 1.7715654409699394e-06,
"loss": 1.5587,
"step": 312500
},
{
"epoch": 2.9,
"learning_rate": 1.6943999456754913e-06,
"loss": 1.5602,
"step": 313000
},
{
"epoch": 2.9,
"learning_rate": 1.6172344503810433e-06,
"loss": 1.562,
"step": 313500
},
{
"epoch": 2.91,
"learning_rate": 1.5400689550865952e-06,
"loss": 1.5581,
"step": 314000
},
{
"epoch": 2.91,
"learning_rate": 1.462903459792147e-06,
"loss": 1.5536,
"step": 314500
},
{
"epoch": 2.92,
"learning_rate": 1.385737964497699e-06,
"loss": 1.5588,
"step": 315000
},
{
"epoch": 2.92,
"learning_rate": 1.3085724692032509e-06,
"loss": 1.5569,
"step": 315500
},
{
"epoch": 2.93,
"learning_rate": 1.2314069739088028e-06,
"loss": 1.5536,
"step": 316000
},
{
"epoch": 2.93,
"learning_rate": 1.1542414786143547e-06,
"loss": 1.5558,
"step": 316500
},
{
"epoch": 2.94,
"learning_rate": 1.0770759833199066e-06,
"loss": 1.5585,
"step": 317000
},
{
"epoch": 2.94,
"learning_rate": 9.999104880254585e-07,
"loss": 1.5542,
"step": 317500
},
{
"epoch": 2.94,
"learning_rate": 9.227449927310104e-07,
"loss": 1.5529,
"step": 318000
},
{
"epoch": 2.95,
"learning_rate": 8.455794974365622e-07,
"loss": 1.5605,
"step": 318500
},
{
"epoch": 2.95,
"learning_rate": 7.684140021421142e-07,
"loss": 1.5496,
"step": 319000
},
{
"epoch": 2.96,
"learning_rate": 6.91248506847666e-07,
"loss": 1.5489,
"step": 319500
},
{
"epoch": 2.96,
"learning_rate": 6.140830115532179e-07,
"loss": 1.5593,
"step": 320000
},
{
"epoch": 2.97,
"learning_rate": 5.3691751625877e-07,
"loss": 1.5583,
"step": 320500
},
{
"epoch": 2.97,
"learning_rate": 4.597520209643218e-07,
"loss": 1.5566,
"step": 321000
},
{
"epoch": 2.98,
"learning_rate": 3.825865256698737e-07,
"loss": 1.557,
"step": 321500
},
{
"epoch": 2.98,
"learning_rate": 3.0542103037542556e-07,
"loss": 1.5617,
"step": 322000
},
{
"epoch": 2.99,
"learning_rate": 2.282555350809775e-07,
"loss": 1.5574,
"step": 322500
},
{
"epoch": 2.99,
"learning_rate": 1.5109003978652938e-07,
"loss": 1.5566,
"step": 323000
},
{
"epoch": 3.0,
"learning_rate": 7.392454449208128e-08,
"loss": 1.551,
"step": 323500
},
{
"epoch": 3.0,
"step": 323979,
"total_flos": 2.728710874004779e+18,
"train_loss": 3.0939811468297327,
"train_runtime": 276726.3152,
"train_samples_per_second": 37.464,
"train_steps_per_second": 1.171
}
],
"max_steps": 323979,
"num_train_epochs": 3,
"total_flos": 2.728710874004779e+18,
"trial_name": null,
"trial_params": null
}