MizBERT / trainer_state.json
robzchhangte's picture
Upload 12 files
0f74d11 verified
raw
history blame
66.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 24.955603327413776,
"eval_steps": 500,
"global_step": 267000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 4.9906533320871115e-05,
"loss": 6.5864,
"step": 500
},
{
"epoch": 0.09,
"learning_rate": 4.981306664174222e-05,
"loss": 5.4802,
"step": 1000
},
{
"epoch": 0.14,
"learning_rate": 4.9719599962613325e-05,
"loss": 4.7221,
"step": 1500
},
{
"epoch": 0.19,
"learning_rate": 4.962613328348444e-05,
"loss": 4.2025,
"step": 2000
},
{
"epoch": 0.23,
"learning_rate": 4.953266660435555e-05,
"loss": 3.8489,
"step": 2500
},
{
"epoch": 0.28,
"learning_rate": 4.943919992522666e-05,
"loss": 3.5856,
"step": 3000
},
{
"epoch": 0.33,
"learning_rate": 4.934573324609777e-05,
"loss": 3.3865,
"step": 3500
},
{
"epoch": 0.37,
"learning_rate": 4.925226656696888e-05,
"loss": 3.2125,
"step": 4000
},
{
"epoch": 0.42,
"learning_rate": 4.9158799887839984e-05,
"loss": 3.0721,
"step": 4500
},
{
"epoch": 0.47,
"learning_rate": 4.9065333208711096e-05,
"loss": 2.9681,
"step": 5000
},
{
"epoch": 0.51,
"learning_rate": 4.897186652958221e-05,
"loss": 2.8801,
"step": 5500
},
{
"epoch": 0.56,
"learning_rate": 4.8878399850453314e-05,
"loss": 2.782,
"step": 6000
},
{
"epoch": 0.61,
"learning_rate": 4.8784933171324426e-05,
"loss": 2.7237,
"step": 6500
},
{
"epoch": 0.65,
"learning_rate": 4.869146649219554e-05,
"loss": 2.6715,
"step": 7000
},
{
"epoch": 0.7,
"learning_rate": 4.859799981306664e-05,
"loss": 2.637,
"step": 7500
},
{
"epoch": 0.75,
"learning_rate": 4.850453313393775e-05,
"loss": 2.5817,
"step": 8000
},
{
"epoch": 0.79,
"learning_rate": 4.841106645480886e-05,
"loss": 2.5346,
"step": 8500
},
{
"epoch": 0.84,
"learning_rate": 4.831759977567997e-05,
"loss": 2.49,
"step": 9000
},
{
"epoch": 0.89,
"learning_rate": 4.8224133096551085e-05,
"loss": 2.4597,
"step": 9500
},
{
"epoch": 0.93,
"learning_rate": 4.813066641742219e-05,
"loss": 2.4381,
"step": 10000
},
{
"epoch": 0.98,
"learning_rate": 4.8037199738293296e-05,
"loss": 2.3995,
"step": 10500
},
{
"epoch": 1.03,
"learning_rate": 4.794373305916441e-05,
"loss": 2.385,
"step": 11000
},
{
"epoch": 1.07,
"learning_rate": 4.785026638003552e-05,
"loss": 2.3463,
"step": 11500
},
{
"epoch": 1.12,
"learning_rate": 4.775679970090663e-05,
"loss": 2.3033,
"step": 12000
},
{
"epoch": 1.17,
"learning_rate": 4.766333302177774e-05,
"loss": 2.2915,
"step": 12500
},
{
"epoch": 1.22,
"learning_rate": 4.756986634264885e-05,
"loss": 2.2722,
"step": 13000
},
{
"epoch": 1.26,
"learning_rate": 4.7476399663519955e-05,
"loss": 2.2443,
"step": 13500
},
{
"epoch": 1.31,
"learning_rate": 4.738293298439107e-05,
"loss": 2.2412,
"step": 14000
},
{
"epoch": 1.36,
"learning_rate": 4.728946630526218e-05,
"loss": 2.22,
"step": 14500
},
{
"epoch": 1.4,
"learning_rate": 4.7195999626133284e-05,
"loss": 2.19,
"step": 15000
},
{
"epoch": 1.45,
"learning_rate": 4.7102532947004396e-05,
"loss": 2.1646,
"step": 15500
},
{
"epoch": 1.5,
"learning_rate": 4.700906626787551e-05,
"loss": 2.1665,
"step": 16000
},
{
"epoch": 1.54,
"learning_rate": 4.6915599588746614e-05,
"loss": 2.1406,
"step": 16500
},
{
"epoch": 1.59,
"learning_rate": 4.682213290961772e-05,
"loss": 2.1191,
"step": 17000
},
{
"epoch": 1.64,
"learning_rate": 4.672866623048883e-05,
"loss": 2.1071,
"step": 17500
},
{
"epoch": 1.68,
"learning_rate": 4.663519955135994e-05,
"loss": 2.0976,
"step": 18000
},
{
"epoch": 1.73,
"learning_rate": 4.6541732872231055e-05,
"loss": 2.0935,
"step": 18500
},
{
"epoch": 1.78,
"learning_rate": 4.644826619310216e-05,
"loss": 2.0682,
"step": 19000
},
{
"epoch": 1.82,
"learning_rate": 4.6354799513973266e-05,
"loss": 2.0487,
"step": 19500
},
{
"epoch": 1.87,
"learning_rate": 4.626133283484438e-05,
"loss": 2.0384,
"step": 20000
},
{
"epoch": 1.92,
"learning_rate": 4.616786615571549e-05,
"loss": 2.0184,
"step": 20500
},
{
"epoch": 1.96,
"learning_rate": 4.60743994765866e-05,
"loss": 2.0046,
"step": 21000
},
{
"epoch": 2.01,
"learning_rate": 4.598093279745771e-05,
"loss": 1.9972,
"step": 21500
},
{
"epoch": 2.06,
"learning_rate": 4.588746611832882e-05,
"loss": 1.9796,
"step": 22000
},
{
"epoch": 2.1,
"learning_rate": 4.5793999439199925e-05,
"loss": 1.9909,
"step": 22500
},
{
"epoch": 2.15,
"learning_rate": 4.570053276007104e-05,
"loss": 1.9776,
"step": 23000
},
{
"epoch": 2.2,
"learning_rate": 4.560706608094214e-05,
"loss": 1.9556,
"step": 23500
},
{
"epoch": 2.24,
"learning_rate": 4.5513599401813255e-05,
"loss": 1.9427,
"step": 24000
},
{
"epoch": 2.29,
"learning_rate": 4.542013272268437e-05,
"loss": 1.9452,
"step": 24500
},
{
"epoch": 2.34,
"learning_rate": 4.532666604355548e-05,
"loss": 1.9365,
"step": 25000
},
{
"epoch": 2.38,
"learning_rate": 4.5233199364426584e-05,
"loss": 1.925,
"step": 25500
},
{
"epoch": 2.43,
"learning_rate": 4.513973268529769e-05,
"loss": 1.9045,
"step": 26000
},
{
"epoch": 2.48,
"learning_rate": 4.50462660061688e-05,
"loss": 1.8886,
"step": 26500
},
{
"epoch": 2.52,
"learning_rate": 4.4952799327039914e-05,
"loss": 1.9086,
"step": 27000
},
{
"epoch": 2.57,
"learning_rate": 4.4859332647911026e-05,
"loss": 1.882,
"step": 27500
},
{
"epoch": 2.62,
"learning_rate": 4.476586596878213e-05,
"loss": 1.8831,
"step": 28000
},
{
"epoch": 2.66,
"learning_rate": 4.4672399289653237e-05,
"loss": 1.8745,
"step": 28500
},
{
"epoch": 2.71,
"learning_rate": 4.457893261052435e-05,
"loss": 1.8645,
"step": 29000
},
{
"epoch": 2.76,
"learning_rate": 4.448546593139546e-05,
"loss": 1.8481,
"step": 29500
},
{
"epoch": 2.8,
"learning_rate": 4.439199925226657e-05,
"loss": 1.8524,
"step": 30000
},
{
"epoch": 2.85,
"learning_rate": 4.429853257313768e-05,
"loss": 1.8299,
"step": 30500
},
{
"epoch": 2.9,
"learning_rate": 4.420506589400879e-05,
"loss": 1.835,
"step": 31000
},
{
"epoch": 2.94,
"learning_rate": 4.4111599214879896e-05,
"loss": 1.8246,
"step": 31500
},
{
"epoch": 2.99,
"learning_rate": 4.401813253575101e-05,
"loss": 1.8154,
"step": 32000
},
{
"epoch": 3.04,
"learning_rate": 4.392466585662211e-05,
"loss": 1.8097,
"step": 32500
},
{
"epoch": 3.08,
"learning_rate": 4.3831199177493225e-05,
"loss": 1.7977,
"step": 33000
},
{
"epoch": 3.13,
"learning_rate": 4.373773249836434e-05,
"loss": 1.7857,
"step": 33500
},
{
"epoch": 3.18,
"learning_rate": 4.364426581923545e-05,
"loss": 1.7933,
"step": 34000
},
{
"epoch": 3.22,
"learning_rate": 4.3550799140106555e-05,
"loss": 1.774,
"step": 34500
},
{
"epoch": 3.27,
"learning_rate": 4.345733246097766e-05,
"loss": 1.7747,
"step": 35000
},
{
"epoch": 3.32,
"learning_rate": 4.336386578184877e-05,
"loss": 1.7663,
"step": 35500
},
{
"epoch": 3.36,
"learning_rate": 4.3270399102719884e-05,
"loss": 1.7746,
"step": 36000
},
{
"epoch": 3.41,
"learning_rate": 4.3176932423590996e-05,
"loss": 1.7586,
"step": 36500
},
{
"epoch": 3.46,
"learning_rate": 4.30834657444621e-05,
"loss": 1.7545,
"step": 37000
},
{
"epoch": 3.51,
"learning_rate": 4.298999906533321e-05,
"loss": 1.7424,
"step": 37500
},
{
"epoch": 3.55,
"learning_rate": 4.289653238620432e-05,
"loss": 1.7472,
"step": 38000
},
{
"epoch": 3.6,
"learning_rate": 4.280306570707543e-05,
"loss": 1.7587,
"step": 38500
},
{
"epoch": 3.65,
"learning_rate": 4.2709599027946537e-05,
"loss": 1.7486,
"step": 39000
},
{
"epoch": 3.69,
"learning_rate": 4.261613234881765e-05,
"loss": 1.7288,
"step": 39500
},
{
"epoch": 3.74,
"learning_rate": 4.252266566968876e-05,
"loss": 1.7361,
"step": 40000
},
{
"epoch": 3.79,
"learning_rate": 4.2429198990559866e-05,
"loss": 1.7089,
"step": 40500
},
{
"epoch": 3.83,
"learning_rate": 4.233573231143098e-05,
"loss": 1.7253,
"step": 41000
},
{
"epoch": 3.88,
"learning_rate": 4.2242265632302084e-05,
"loss": 1.7134,
"step": 41500
},
{
"epoch": 3.93,
"learning_rate": 4.2148798953173196e-05,
"loss": 1.6936,
"step": 42000
},
{
"epoch": 3.97,
"learning_rate": 4.205533227404431e-05,
"loss": 1.7115,
"step": 42500
},
{
"epoch": 4.02,
"learning_rate": 4.196186559491542e-05,
"loss": 1.6981,
"step": 43000
},
{
"epoch": 4.07,
"learning_rate": 4.1868398915786525e-05,
"loss": 1.6913,
"step": 43500
},
{
"epoch": 4.11,
"learning_rate": 4.177493223665763e-05,
"loss": 1.6916,
"step": 44000
},
{
"epoch": 4.16,
"learning_rate": 4.168146555752874e-05,
"loss": 1.6953,
"step": 44500
},
{
"epoch": 4.21,
"learning_rate": 4.1587998878399855e-05,
"loss": 1.6778,
"step": 45000
},
{
"epoch": 4.25,
"learning_rate": 4.149453219927097e-05,
"loss": 1.6706,
"step": 45500
},
{
"epoch": 4.3,
"learning_rate": 4.140106552014207e-05,
"loss": 1.6703,
"step": 46000
},
{
"epoch": 4.35,
"learning_rate": 4.130759884101318e-05,
"loss": 1.6639,
"step": 46500
},
{
"epoch": 4.39,
"learning_rate": 4.121413216188429e-05,
"loss": 1.6728,
"step": 47000
},
{
"epoch": 4.44,
"learning_rate": 4.11206654827554e-05,
"loss": 1.6553,
"step": 47500
},
{
"epoch": 4.49,
"learning_rate": 4.102719880362651e-05,
"loss": 1.6433,
"step": 48000
},
{
"epoch": 4.53,
"learning_rate": 4.093373212449762e-05,
"loss": 1.6499,
"step": 48500
},
{
"epoch": 4.58,
"learning_rate": 4.084026544536873e-05,
"loss": 1.6553,
"step": 49000
},
{
"epoch": 4.63,
"learning_rate": 4.0746798766239837e-05,
"loss": 1.6401,
"step": 49500
},
{
"epoch": 4.67,
"learning_rate": 4.065333208711095e-05,
"loss": 1.6444,
"step": 50000
},
{
"epoch": 4.72,
"learning_rate": 4.0559865407982054e-05,
"loss": 1.6398,
"step": 50500
},
{
"epoch": 4.77,
"learning_rate": 4.0466398728853166e-05,
"loss": 1.6338,
"step": 51000
},
{
"epoch": 4.81,
"learning_rate": 4.037293204972428e-05,
"loss": 1.6194,
"step": 51500
},
{
"epoch": 4.86,
"learning_rate": 4.027946537059539e-05,
"loss": 1.6327,
"step": 52000
},
{
"epoch": 4.91,
"learning_rate": 4.018599869146649e-05,
"loss": 1.6232,
"step": 52500
},
{
"epoch": 4.95,
"learning_rate": 4.00925320123376e-05,
"loss": 1.6296,
"step": 53000
},
{
"epoch": 5.0,
"learning_rate": 3.999906533320871e-05,
"loss": 1.6152,
"step": 53500
},
{
"epoch": 5.05,
"learning_rate": 3.9905598654079825e-05,
"loss": 1.6012,
"step": 54000
},
{
"epoch": 5.09,
"learning_rate": 3.981213197495093e-05,
"loss": 1.6087,
"step": 54500
},
{
"epoch": 5.14,
"learning_rate": 3.971866529582204e-05,
"loss": 1.5971,
"step": 55000
},
{
"epoch": 5.19,
"learning_rate": 3.962519861669315e-05,
"loss": 1.5956,
"step": 55500
},
{
"epoch": 5.23,
"learning_rate": 3.953173193756426e-05,
"loss": 1.5947,
"step": 56000
},
{
"epoch": 5.28,
"learning_rate": 3.943826525843537e-05,
"loss": 1.5993,
"step": 56500
},
{
"epoch": 5.33,
"learning_rate": 3.934479857930648e-05,
"loss": 1.5816,
"step": 57000
},
{
"epoch": 5.37,
"learning_rate": 3.925133190017759e-05,
"loss": 1.5837,
"step": 57500
},
{
"epoch": 5.42,
"learning_rate": 3.91578652210487e-05,
"loss": 1.5854,
"step": 58000
},
{
"epoch": 5.47,
"learning_rate": 3.906439854191981e-05,
"loss": 1.5734,
"step": 58500
},
{
"epoch": 5.51,
"learning_rate": 3.897093186279092e-05,
"loss": 1.578,
"step": 59000
},
{
"epoch": 5.56,
"learning_rate": 3.8877465183662024e-05,
"loss": 1.5817,
"step": 59500
},
{
"epoch": 5.61,
"learning_rate": 3.8783998504533137e-05,
"loss": 1.578,
"step": 60000
},
{
"epoch": 5.65,
"learning_rate": 3.869053182540425e-05,
"loss": 1.5732,
"step": 60500
},
{
"epoch": 5.7,
"learning_rate": 3.8597065146275354e-05,
"loss": 1.5606,
"step": 61000
},
{
"epoch": 5.75,
"learning_rate": 3.850359846714646e-05,
"loss": 1.5709,
"step": 61500
},
{
"epoch": 5.79,
"learning_rate": 3.841013178801757e-05,
"loss": 1.5607,
"step": 62000
},
{
"epoch": 5.84,
"learning_rate": 3.8316665108888684e-05,
"loss": 1.5687,
"step": 62500
},
{
"epoch": 5.89,
"learning_rate": 3.8223198429759796e-05,
"loss": 1.5488,
"step": 63000
},
{
"epoch": 5.94,
"learning_rate": 3.81297317506309e-05,
"loss": 1.5601,
"step": 63500
},
{
"epoch": 5.98,
"learning_rate": 3.803626507150201e-05,
"loss": 1.5611,
"step": 64000
},
{
"epoch": 6.03,
"learning_rate": 3.794279839237312e-05,
"loss": 1.5515,
"step": 64500
},
{
"epoch": 6.08,
"learning_rate": 3.784933171324423e-05,
"loss": 1.5412,
"step": 65000
},
{
"epoch": 6.12,
"learning_rate": 3.775586503411534e-05,
"loss": 1.5434,
"step": 65500
},
{
"epoch": 6.17,
"learning_rate": 3.766239835498645e-05,
"loss": 1.5372,
"step": 66000
},
{
"epoch": 6.22,
"learning_rate": 3.756893167585756e-05,
"loss": 1.5293,
"step": 66500
},
{
"epoch": 6.26,
"learning_rate": 3.7475464996728665e-05,
"loss": 1.5335,
"step": 67000
},
{
"epoch": 6.31,
"learning_rate": 3.738199831759978e-05,
"loss": 1.5296,
"step": 67500
},
{
"epoch": 6.36,
"learning_rate": 3.728853163847088e-05,
"loss": 1.5238,
"step": 68000
},
{
"epoch": 6.4,
"learning_rate": 3.7195064959341995e-05,
"loss": 1.5269,
"step": 68500
},
{
"epoch": 6.45,
"learning_rate": 3.710159828021311e-05,
"loss": 1.5233,
"step": 69000
},
{
"epoch": 6.5,
"learning_rate": 3.700813160108422e-05,
"loss": 1.5234,
"step": 69500
},
{
"epoch": 6.54,
"learning_rate": 3.6914664921955325e-05,
"loss": 1.5277,
"step": 70000
},
{
"epoch": 6.59,
"learning_rate": 3.682119824282643e-05,
"loss": 1.5185,
"step": 70500
},
{
"epoch": 6.64,
"learning_rate": 3.672773156369754e-05,
"loss": 1.5185,
"step": 71000
},
{
"epoch": 6.68,
"learning_rate": 3.6634264884568654e-05,
"loss": 1.5316,
"step": 71500
},
{
"epoch": 6.73,
"learning_rate": 3.6540798205439766e-05,
"loss": 1.5165,
"step": 72000
},
{
"epoch": 6.78,
"learning_rate": 3.644733152631087e-05,
"loss": 1.5067,
"step": 72500
},
{
"epoch": 6.82,
"learning_rate": 3.635386484718198e-05,
"loss": 1.5108,
"step": 73000
},
{
"epoch": 6.87,
"learning_rate": 3.626039816805309e-05,
"loss": 1.4999,
"step": 73500
},
{
"epoch": 6.92,
"learning_rate": 3.61669314889242e-05,
"loss": 1.4997,
"step": 74000
},
{
"epoch": 6.96,
"learning_rate": 3.607346480979531e-05,
"loss": 1.513,
"step": 74500
},
{
"epoch": 7.01,
"learning_rate": 3.597999813066642e-05,
"loss": 1.5012,
"step": 75000
},
{
"epoch": 7.06,
"learning_rate": 3.588653145153753e-05,
"loss": 1.4837,
"step": 75500
},
{
"epoch": 7.1,
"learning_rate": 3.5793064772408636e-05,
"loss": 1.4824,
"step": 76000
},
{
"epoch": 7.15,
"learning_rate": 3.569959809327975e-05,
"loss": 1.4896,
"step": 76500
},
{
"epoch": 7.2,
"learning_rate": 3.560613141415085e-05,
"loss": 1.4865,
"step": 77000
},
{
"epoch": 7.24,
"learning_rate": 3.5512664735021965e-05,
"loss": 1.4937,
"step": 77500
},
{
"epoch": 7.29,
"learning_rate": 3.541919805589308e-05,
"loss": 1.4828,
"step": 78000
},
{
"epoch": 7.34,
"learning_rate": 3.532573137676419e-05,
"loss": 1.4767,
"step": 78500
},
{
"epoch": 7.38,
"learning_rate": 3.5232264697635295e-05,
"loss": 1.4878,
"step": 79000
},
{
"epoch": 7.43,
"learning_rate": 3.51387980185064e-05,
"loss": 1.4946,
"step": 79500
},
{
"epoch": 7.48,
"learning_rate": 3.504533133937751e-05,
"loss": 1.4756,
"step": 80000
},
{
"epoch": 7.52,
"learning_rate": 3.4951864660248625e-05,
"loss": 1.464,
"step": 80500
},
{
"epoch": 7.57,
"learning_rate": 3.485839798111974e-05,
"loss": 1.4754,
"step": 81000
},
{
"epoch": 7.62,
"learning_rate": 3.476493130199084e-05,
"loss": 1.472,
"step": 81500
},
{
"epoch": 7.66,
"learning_rate": 3.467146462286195e-05,
"loss": 1.4716,
"step": 82000
},
{
"epoch": 7.71,
"learning_rate": 3.457799794373306e-05,
"loss": 1.4888,
"step": 82500
},
{
"epoch": 7.76,
"learning_rate": 3.448453126460417e-05,
"loss": 1.4678,
"step": 83000
},
{
"epoch": 7.8,
"learning_rate": 3.439106458547528e-05,
"loss": 1.4694,
"step": 83500
},
{
"epoch": 7.85,
"learning_rate": 3.429759790634639e-05,
"loss": 1.4729,
"step": 84000
},
{
"epoch": 7.9,
"learning_rate": 3.42041312272175e-05,
"loss": 1.4558,
"step": 84500
},
{
"epoch": 7.94,
"learning_rate": 3.4110664548088606e-05,
"loss": 1.4597,
"step": 85000
},
{
"epoch": 7.99,
"learning_rate": 3.401719786895972e-05,
"loss": 1.449,
"step": 85500
},
{
"epoch": 8.04,
"learning_rate": 3.3923731189830824e-05,
"loss": 1.4543,
"step": 86000
},
{
"epoch": 8.08,
"learning_rate": 3.3830264510701936e-05,
"loss": 1.4437,
"step": 86500
},
{
"epoch": 8.13,
"learning_rate": 3.373679783157305e-05,
"loss": 1.4423,
"step": 87000
},
{
"epoch": 8.18,
"learning_rate": 3.364333115244416e-05,
"loss": 1.4417,
"step": 87500
},
{
"epoch": 8.23,
"learning_rate": 3.3549864473315265e-05,
"loss": 1.4505,
"step": 88000
},
{
"epoch": 8.27,
"learning_rate": 3.345639779418637e-05,
"loss": 1.4426,
"step": 88500
},
{
"epoch": 8.32,
"learning_rate": 3.336293111505748e-05,
"loss": 1.4359,
"step": 89000
},
{
"epoch": 8.37,
"learning_rate": 3.3269464435928595e-05,
"loss": 1.4463,
"step": 89500
},
{
"epoch": 8.41,
"learning_rate": 3.317599775679971e-05,
"loss": 1.4395,
"step": 90000
},
{
"epoch": 8.46,
"learning_rate": 3.308253107767081e-05,
"loss": 1.4388,
"step": 90500
},
{
"epoch": 8.51,
"learning_rate": 3.298906439854192e-05,
"loss": 1.4395,
"step": 91000
},
{
"epoch": 8.55,
"learning_rate": 3.289559771941303e-05,
"loss": 1.4335,
"step": 91500
},
{
"epoch": 8.6,
"learning_rate": 3.280213104028414e-05,
"loss": 1.4334,
"step": 92000
},
{
"epoch": 8.65,
"learning_rate": 3.270866436115525e-05,
"loss": 1.4457,
"step": 92500
},
{
"epoch": 8.69,
"learning_rate": 3.261519768202636e-05,
"loss": 1.4371,
"step": 93000
},
{
"epoch": 8.74,
"learning_rate": 3.252173100289747e-05,
"loss": 1.4299,
"step": 93500
},
{
"epoch": 8.79,
"learning_rate": 3.242826432376858e-05,
"loss": 1.4414,
"step": 94000
},
{
"epoch": 8.83,
"learning_rate": 3.233479764463969e-05,
"loss": 1.4191,
"step": 94500
},
{
"epoch": 8.88,
"learning_rate": 3.2241330965510794e-05,
"loss": 1.4261,
"step": 95000
},
{
"epoch": 8.93,
"learning_rate": 3.2147864286381906e-05,
"loss": 1.4292,
"step": 95500
},
{
"epoch": 8.97,
"learning_rate": 3.205439760725302e-05,
"loss": 1.4245,
"step": 96000
},
{
"epoch": 9.02,
"learning_rate": 3.196093092812413e-05,
"loss": 1.4169,
"step": 96500
},
{
"epoch": 9.07,
"learning_rate": 3.186746424899523e-05,
"loss": 1.4099,
"step": 97000
},
{
"epoch": 9.11,
"learning_rate": 3.177399756986634e-05,
"loss": 1.4087,
"step": 97500
},
{
"epoch": 9.16,
"learning_rate": 3.168053089073745e-05,
"loss": 1.4289,
"step": 98000
},
{
"epoch": 9.21,
"learning_rate": 3.1587064211608565e-05,
"loss": 1.4251,
"step": 98500
},
{
"epoch": 9.25,
"learning_rate": 3.149359753247967e-05,
"loss": 1.4228,
"step": 99000
},
{
"epoch": 9.3,
"learning_rate": 3.140013085335078e-05,
"loss": 1.4062,
"step": 99500
},
{
"epoch": 9.35,
"learning_rate": 3.130666417422189e-05,
"loss": 1.4032,
"step": 100000
},
{
"epoch": 9.39,
"learning_rate": 3.1213197495093e-05,
"loss": 1.4143,
"step": 100500
},
{
"epoch": 9.44,
"learning_rate": 3.111973081596411e-05,
"loss": 1.4038,
"step": 101000
},
{
"epoch": 9.49,
"learning_rate": 3.102626413683522e-05,
"loss": 1.3984,
"step": 101500
},
{
"epoch": 9.53,
"learning_rate": 3.093279745770633e-05,
"loss": 1.4098,
"step": 102000
},
{
"epoch": 9.58,
"learning_rate": 3.083933077857744e-05,
"loss": 1.4021,
"step": 102500
},
{
"epoch": 9.63,
"learning_rate": 3.074586409944855e-05,
"loss": 1.4041,
"step": 103000
},
{
"epoch": 9.67,
"learning_rate": 3.065239742031966e-05,
"loss": 1.3972,
"step": 103500
},
{
"epoch": 9.72,
"learning_rate": 3.0558930741190765e-05,
"loss": 1.3955,
"step": 104000
},
{
"epoch": 9.77,
"learning_rate": 3.0465464062061877e-05,
"loss": 1.4066,
"step": 104500
},
{
"epoch": 9.81,
"learning_rate": 3.037199738293299e-05,
"loss": 1.4019,
"step": 105000
},
{
"epoch": 9.86,
"learning_rate": 3.0278530703804098e-05,
"loss": 1.3893,
"step": 105500
},
{
"epoch": 9.91,
"learning_rate": 3.0185064024675203e-05,
"loss": 1.3995,
"step": 106000
},
{
"epoch": 9.95,
"learning_rate": 3.0091597345546312e-05,
"loss": 1.3958,
"step": 106500
},
{
"epoch": 10.0,
"learning_rate": 2.9998130666417424e-05,
"loss": 1.3839,
"step": 107000
},
{
"epoch": 10.05,
"learning_rate": 2.9904663987288533e-05,
"loss": 1.3878,
"step": 107500
},
{
"epoch": 10.09,
"learning_rate": 2.9811197308159645e-05,
"loss": 1.3848,
"step": 108000
},
{
"epoch": 10.14,
"learning_rate": 2.9717730629030753e-05,
"loss": 1.3804,
"step": 108500
},
{
"epoch": 10.19,
"learning_rate": 2.962426394990186e-05,
"loss": 1.3841,
"step": 109000
},
{
"epoch": 10.23,
"learning_rate": 2.953079727077297e-05,
"loss": 1.3878,
"step": 109500
},
{
"epoch": 10.28,
"learning_rate": 2.943733059164408e-05,
"loss": 1.3662,
"step": 110000
},
{
"epoch": 10.33,
"learning_rate": 2.934386391251519e-05,
"loss": 1.3775,
"step": 110500
},
{
"epoch": 10.37,
"learning_rate": 2.92503972333863e-05,
"loss": 1.3757,
"step": 111000
},
{
"epoch": 10.42,
"learning_rate": 2.9156930554257412e-05,
"loss": 1.3816,
"step": 111500
},
{
"epoch": 10.47,
"learning_rate": 2.9063463875128514e-05,
"loss": 1.3769,
"step": 112000
},
{
"epoch": 10.52,
"learning_rate": 2.8969997195999626e-05,
"loss": 1.3824,
"step": 112500
},
{
"epoch": 10.56,
"learning_rate": 2.8876530516870735e-05,
"loss": 1.3753,
"step": 113000
},
{
"epoch": 10.61,
"learning_rate": 2.8783063837741847e-05,
"loss": 1.3728,
"step": 113500
},
{
"epoch": 10.66,
"learning_rate": 2.8689597158612956e-05,
"loss": 1.3699,
"step": 114000
},
{
"epoch": 10.7,
"learning_rate": 2.8596130479484068e-05,
"loss": 1.3758,
"step": 114500
},
{
"epoch": 10.75,
"learning_rate": 2.8502663800355173e-05,
"loss": 1.3763,
"step": 115000
},
{
"epoch": 10.8,
"learning_rate": 2.8409197121226282e-05,
"loss": 1.3677,
"step": 115500
},
{
"epoch": 10.84,
"learning_rate": 2.8315730442097394e-05,
"loss": 1.3578,
"step": 116000
},
{
"epoch": 10.89,
"learning_rate": 2.8222263762968503e-05,
"loss": 1.3693,
"step": 116500
},
{
"epoch": 10.94,
"learning_rate": 2.8128797083839615e-05,
"loss": 1.3726,
"step": 117000
},
{
"epoch": 10.98,
"learning_rate": 2.8035330404710724e-05,
"loss": 1.3772,
"step": 117500
},
{
"epoch": 11.03,
"learning_rate": 2.794186372558183e-05,
"loss": 1.3472,
"step": 118000
},
{
"epoch": 11.08,
"learning_rate": 2.784839704645294e-05,
"loss": 1.3638,
"step": 118500
},
{
"epoch": 11.12,
"learning_rate": 2.775493036732405e-05,
"loss": 1.3556,
"step": 119000
},
{
"epoch": 11.17,
"learning_rate": 2.7661463688195162e-05,
"loss": 1.3598,
"step": 119500
},
{
"epoch": 11.22,
"learning_rate": 2.756799700906627e-05,
"loss": 1.344,
"step": 120000
},
{
"epoch": 11.26,
"learning_rate": 2.7474530329937383e-05,
"loss": 1.3532,
"step": 120500
},
{
"epoch": 11.31,
"learning_rate": 2.7381063650808485e-05,
"loss": 1.351,
"step": 121000
},
{
"epoch": 11.36,
"learning_rate": 2.7287596971679597e-05,
"loss": 1.3555,
"step": 121500
},
{
"epoch": 11.4,
"learning_rate": 2.7194130292550706e-05,
"loss": 1.361,
"step": 122000
},
{
"epoch": 11.45,
"learning_rate": 2.7100663613421818e-05,
"loss": 1.3472,
"step": 122500
},
{
"epoch": 11.5,
"learning_rate": 2.7007196934292926e-05,
"loss": 1.3462,
"step": 123000
},
{
"epoch": 11.54,
"learning_rate": 2.691373025516404e-05,
"loss": 1.3539,
"step": 123500
},
{
"epoch": 11.59,
"learning_rate": 2.6820263576035144e-05,
"loss": 1.3493,
"step": 124000
},
{
"epoch": 11.64,
"learning_rate": 2.6726796896906253e-05,
"loss": 1.3504,
"step": 124500
},
{
"epoch": 11.68,
"learning_rate": 2.6633330217777365e-05,
"loss": 1.3548,
"step": 125000
},
{
"epoch": 11.73,
"learning_rate": 2.6539863538648473e-05,
"loss": 1.3373,
"step": 125500
},
{
"epoch": 11.78,
"learning_rate": 2.6446396859519586e-05,
"loss": 1.3506,
"step": 126000
},
{
"epoch": 11.82,
"learning_rate": 2.6352930180390694e-05,
"loss": 1.3431,
"step": 126500
},
{
"epoch": 11.87,
"learning_rate": 2.62594635012618e-05,
"loss": 1.3458,
"step": 127000
},
{
"epoch": 11.92,
"learning_rate": 2.616599682213291e-05,
"loss": 1.345,
"step": 127500
},
{
"epoch": 11.96,
"learning_rate": 2.607253014300402e-05,
"loss": 1.3478,
"step": 128000
},
{
"epoch": 12.01,
"learning_rate": 2.597906346387513e-05,
"loss": 1.3453,
"step": 128500
},
{
"epoch": 12.06,
"learning_rate": 2.588559678474624e-05,
"loss": 1.3339,
"step": 129000
},
{
"epoch": 12.1,
"learning_rate": 2.5792130105617347e-05,
"loss": 1.3325,
"step": 129500
},
{
"epoch": 12.15,
"learning_rate": 2.5698663426488455e-05,
"loss": 1.339,
"step": 130000
},
{
"epoch": 12.2,
"learning_rate": 2.5605196747359567e-05,
"loss": 1.3329,
"step": 130500
},
{
"epoch": 12.24,
"learning_rate": 2.5511730068230676e-05,
"loss": 1.3341,
"step": 131000
},
{
"epoch": 12.29,
"learning_rate": 2.5418263389101788e-05,
"loss": 1.3396,
"step": 131500
},
{
"epoch": 12.34,
"learning_rate": 2.5324796709972897e-05,
"loss": 1.3341,
"step": 132000
},
{
"epoch": 12.38,
"learning_rate": 2.5231330030844002e-05,
"loss": 1.3358,
"step": 132500
},
{
"epoch": 12.43,
"learning_rate": 2.5137863351715114e-05,
"loss": 1.3294,
"step": 133000
},
{
"epoch": 12.48,
"learning_rate": 2.5044396672586223e-05,
"loss": 1.3339,
"step": 133500
},
{
"epoch": 12.52,
"learning_rate": 2.4950929993457335e-05,
"loss": 1.3338,
"step": 134000
},
{
"epoch": 12.57,
"learning_rate": 2.4857463314328444e-05,
"loss": 1.324,
"step": 134500
},
{
"epoch": 12.62,
"learning_rate": 2.4763996635199553e-05,
"loss": 1.3188,
"step": 135000
},
{
"epoch": 12.66,
"learning_rate": 2.467052995607066e-05,
"loss": 1.3244,
"step": 135500
},
{
"epoch": 12.71,
"learning_rate": 2.4577063276941773e-05,
"loss": 1.3296,
"step": 136000
},
{
"epoch": 12.76,
"learning_rate": 2.448359659781288e-05,
"loss": 1.3148,
"step": 136500
},
{
"epoch": 12.8,
"learning_rate": 2.439012991868399e-05,
"loss": 1.3261,
"step": 137000
},
{
"epoch": 12.85,
"learning_rate": 2.42966632395551e-05,
"loss": 1.3166,
"step": 137500
},
{
"epoch": 12.9,
"learning_rate": 2.420319656042621e-05,
"loss": 1.3137,
"step": 138000
},
{
"epoch": 12.95,
"learning_rate": 2.410972988129732e-05,
"loss": 1.3217,
"step": 138500
},
{
"epoch": 12.99,
"learning_rate": 2.401626320216843e-05,
"loss": 1.3341,
"step": 139000
},
{
"epoch": 13.04,
"learning_rate": 2.3922796523039538e-05,
"loss": 1.3248,
"step": 139500
},
{
"epoch": 13.09,
"learning_rate": 2.3829329843910647e-05,
"loss": 1.3087,
"step": 140000
},
{
"epoch": 13.13,
"learning_rate": 2.373586316478176e-05,
"loss": 1.3049,
"step": 140500
},
{
"epoch": 13.18,
"learning_rate": 2.3642396485652864e-05,
"loss": 1.3074,
"step": 141000
},
{
"epoch": 13.23,
"learning_rate": 2.3548929806523976e-05,
"loss": 1.3133,
"step": 141500
},
{
"epoch": 13.27,
"learning_rate": 2.3455463127395085e-05,
"loss": 1.3221,
"step": 142000
},
{
"epoch": 13.32,
"learning_rate": 2.3361996448266194e-05,
"loss": 1.3113,
"step": 142500
},
{
"epoch": 13.37,
"learning_rate": 2.3268529769137302e-05,
"loss": 1.3138,
"step": 143000
},
{
"epoch": 13.41,
"learning_rate": 2.3175063090008414e-05,
"loss": 1.3091,
"step": 143500
},
{
"epoch": 13.46,
"learning_rate": 2.3081596410879523e-05,
"loss": 1.3132,
"step": 144000
},
{
"epoch": 13.51,
"learning_rate": 2.2988129731750632e-05,
"loss": 1.3095,
"step": 144500
},
{
"epoch": 13.55,
"learning_rate": 2.2894663052621744e-05,
"loss": 1.3046,
"step": 145000
},
{
"epoch": 13.6,
"learning_rate": 2.280119637349285e-05,
"loss": 1.3136,
"step": 145500
},
{
"epoch": 13.65,
"learning_rate": 2.270772969436396e-05,
"loss": 1.3067,
"step": 146000
},
{
"epoch": 13.69,
"learning_rate": 2.261426301523507e-05,
"loss": 1.3025,
"step": 146500
},
{
"epoch": 13.74,
"learning_rate": 2.252079633610618e-05,
"loss": 1.3085,
"step": 147000
},
{
"epoch": 13.79,
"learning_rate": 2.2427329656977288e-05,
"loss": 1.2976,
"step": 147500
},
{
"epoch": 13.83,
"learning_rate": 2.23338629778484e-05,
"loss": 1.3007,
"step": 148000
},
{
"epoch": 13.88,
"learning_rate": 2.224039629871951e-05,
"loss": 1.3138,
"step": 148500
},
{
"epoch": 13.93,
"learning_rate": 2.2146929619590617e-05,
"loss": 1.3143,
"step": 149000
},
{
"epoch": 13.97,
"learning_rate": 2.205346294046173e-05,
"loss": 1.3029,
"step": 149500
},
{
"epoch": 14.02,
"learning_rate": 2.1959996261332835e-05,
"loss": 1.2919,
"step": 150000
},
{
"epoch": 14.07,
"learning_rate": 2.1866529582203947e-05,
"loss": 1.2982,
"step": 150500
},
{
"epoch": 14.11,
"learning_rate": 2.1773062903075055e-05,
"loss": 1.3012,
"step": 151000
},
{
"epoch": 14.16,
"learning_rate": 2.1679596223946164e-05,
"loss": 1.2841,
"step": 151500
},
{
"epoch": 14.21,
"learning_rate": 2.1586129544817273e-05,
"loss": 1.3044,
"step": 152000
},
{
"epoch": 14.25,
"learning_rate": 2.1492662865688385e-05,
"loss": 1.2973,
"step": 152500
},
{
"epoch": 14.3,
"learning_rate": 2.1399196186559494e-05,
"loss": 1.2884,
"step": 153000
},
{
"epoch": 14.35,
"learning_rate": 2.1305729507430602e-05,
"loss": 1.2883,
"step": 153500
},
{
"epoch": 14.39,
"learning_rate": 2.1212262828301714e-05,
"loss": 1.2993,
"step": 154000
},
{
"epoch": 14.44,
"learning_rate": 2.111879614917282e-05,
"loss": 1.2919,
"step": 154500
},
{
"epoch": 14.49,
"learning_rate": 2.1025329470043932e-05,
"loss": 1.3026,
"step": 155000
},
{
"epoch": 14.53,
"learning_rate": 2.093186279091504e-05,
"loss": 1.2882,
"step": 155500
},
{
"epoch": 14.58,
"learning_rate": 2.083839611178615e-05,
"loss": 1.289,
"step": 156000
},
{
"epoch": 14.63,
"learning_rate": 2.0744929432657258e-05,
"loss": 1.2917,
"step": 156500
},
{
"epoch": 14.67,
"learning_rate": 2.065146275352837e-05,
"loss": 1.2897,
"step": 157000
},
{
"epoch": 14.72,
"learning_rate": 2.0557996074399475e-05,
"loss": 1.2859,
"step": 157500
},
{
"epoch": 14.77,
"learning_rate": 2.0464529395270588e-05,
"loss": 1.2924,
"step": 158000
},
{
"epoch": 14.81,
"learning_rate": 2.0371062716141696e-05,
"loss": 1.2873,
"step": 158500
},
{
"epoch": 14.86,
"learning_rate": 2.0277596037012805e-05,
"loss": 1.29,
"step": 159000
},
{
"epoch": 14.91,
"learning_rate": 2.0184129357883917e-05,
"loss": 1.2848,
"step": 159500
},
{
"epoch": 14.95,
"learning_rate": 2.0090662678755026e-05,
"loss": 1.2831,
"step": 160000
},
{
"epoch": 15.0,
"learning_rate": 1.9997195999626135e-05,
"loss": 1.2841,
"step": 160500
},
{
"epoch": 15.05,
"learning_rate": 1.9903729320497243e-05,
"loss": 1.2716,
"step": 161000
},
{
"epoch": 15.09,
"learning_rate": 1.9810262641368352e-05,
"loss": 1.2795,
"step": 161500
},
{
"epoch": 15.14,
"learning_rate": 1.971679596223946e-05,
"loss": 1.2836,
"step": 162000
},
{
"epoch": 15.19,
"learning_rate": 1.9623329283110573e-05,
"loss": 1.2854,
"step": 162500
},
{
"epoch": 15.24,
"learning_rate": 1.952986260398168e-05,
"loss": 1.2819,
"step": 163000
},
{
"epoch": 15.28,
"learning_rate": 1.943639592485279e-05,
"loss": 1.2762,
"step": 163500
},
{
"epoch": 15.33,
"learning_rate": 1.9342929245723902e-05,
"loss": 1.2638,
"step": 164000
},
{
"epoch": 15.38,
"learning_rate": 1.9249462566595008e-05,
"loss": 1.269,
"step": 164500
},
{
"epoch": 15.42,
"learning_rate": 1.915599588746612e-05,
"loss": 1.2691,
"step": 165000
},
{
"epoch": 15.47,
"learning_rate": 1.906252920833723e-05,
"loss": 1.2802,
"step": 165500
},
{
"epoch": 15.52,
"learning_rate": 1.8969062529208337e-05,
"loss": 1.275,
"step": 166000
},
{
"epoch": 15.56,
"learning_rate": 1.8875595850079446e-05,
"loss": 1.278,
"step": 166500
},
{
"epoch": 15.61,
"learning_rate": 1.8782129170950558e-05,
"loss": 1.2768,
"step": 167000
},
{
"epoch": 15.66,
"learning_rate": 1.8688662491821667e-05,
"loss": 1.2761,
"step": 167500
},
{
"epoch": 15.7,
"learning_rate": 1.8595195812692775e-05,
"loss": 1.271,
"step": 168000
},
{
"epoch": 15.75,
"learning_rate": 1.8501729133563888e-05,
"loss": 1.2687,
"step": 168500
},
{
"epoch": 15.8,
"learning_rate": 1.8408262454434993e-05,
"loss": 1.2644,
"step": 169000
},
{
"epoch": 15.84,
"learning_rate": 1.8314795775306105e-05,
"loss": 1.2732,
"step": 169500
},
{
"epoch": 15.89,
"learning_rate": 1.8221329096177214e-05,
"loss": 1.2742,
"step": 170000
},
{
"epoch": 15.94,
"learning_rate": 1.8127862417048322e-05,
"loss": 1.266,
"step": 170500
},
{
"epoch": 15.98,
"learning_rate": 1.803439573791943e-05,
"loss": 1.27,
"step": 171000
},
{
"epoch": 16.03,
"learning_rate": 1.7940929058790543e-05,
"loss": 1.2682,
"step": 171500
},
{
"epoch": 16.08,
"learning_rate": 1.784746237966165e-05,
"loss": 1.2584,
"step": 172000
},
{
"epoch": 16.12,
"learning_rate": 1.775399570053276e-05,
"loss": 1.2702,
"step": 172500
},
{
"epoch": 16.17,
"learning_rate": 1.766052902140387e-05,
"loss": 1.2602,
"step": 173000
},
{
"epoch": 16.22,
"learning_rate": 1.7567062342274978e-05,
"loss": 1.2595,
"step": 173500
},
{
"epoch": 16.26,
"learning_rate": 1.747359566314609e-05,
"loss": 1.261,
"step": 174000
},
{
"epoch": 16.31,
"learning_rate": 1.73801289840172e-05,
"loss": 1.2556,
"step": 174500
},
{
"epoch": 16.36,
"learning_rate": 1.7286662304888308e-05,
"loss": 1.2722,
"step": 175000
},
{
"epoch": 16.4,
"learning_rate": 1.7193195625759416e-05,
"loss": 1.2553,
"step": 175500
},
{
"epoch": 16.45,
"learning_rate": 1.709972894663053e-05,
"loss": 1.2577,
"step": 176000
},
{
"epoch": 16.5,
"learning_rate": 1.7006262267501634e-05,
"loss": 1.2607,
"step": 176500
},
{
"epoch": 16.54,
"learning_rate": 1.6912795588372746e-05,
"loss": 1.2646,
"step": 177000
},
{
"epoch": 16.59,
"learning_rate": 1.6819328909243855e-05,
"loss": 1.267,
"step": 177500
},
{
"epoch": 16.64,
"learning_rate": 1.6725862230114963e-05,
"loss": 1.2596,
"step": 178000
},
{
"epoch": 16.68,
"learning_rate": 1.6632395550986075e-05,
"loss": 1.2553,
"step": 178500
},
{
"epoch": 16.73,
"learning_rate": 1.6538928871857184e-05,
"loss": 1.2538,
"step": 179000
},
{
"epoch": 16.78,
"learning_rate": 1.6445462192728293e-05,
"loss": 1.2626,
"step": 179500
},
{
"epoch": 16.82,
"learning_rate": 1.63519955135994e-05,
"loss": 1.2551,
"step": 180000
},
{
"epoch": 16.87,
"learning_rate": 1.6258528834470514e-05,
"loss": 1.2569,
"step": 180500
},
{
"epoch": 16.92,
"learning_rate": 1.616506215534162e-05,
"loss": 1.2535,
"step": 181000
},
{
"epoch": 16.96,
"learning_rate": 1.607159547621273e-05,
"loss": 1.2591,
"step": 181500
},
{
"epoch": 17.01,
"learning_rate": 1.597812879708384e-05,
"loss": 1.2508,
"step": 182000
},
{
"epoch": 17.06,
"learning_rate": 1.588466211795495e-05,
"loss": 1.2517,
"step": 182500
},
{
"epoch": 17.1,
"learning_rate": 1.579119543882606e-05,
"loss": 1.2546,
"step": 183000
},
{
"epoch": 17.15,
"learning_rate": 1.569772875969717e-05,
"loss": 1.241,
"step": 183500
},
{
"epoch": 17.2,
"learning_rate": 1.5604262080568278e-05,
"loss": 1.2421,
"step": 184000
},
{
"epoch": 17.24,
"learning_rate": 1.5510795401439387e-05,
"loss": 1.243,
"step": 184500
},
{
"epoch": 17.29,
"learning_rate": 1.54173287223105e-05,
"loss": 1.2459,
"step": 185000
},
{
"epoch": 17.34,
"learning_rate": 1.5323862043181604e-05,
"loss": 1.2497,
"step": 185500
},
{
"epoch": 17.38,
"learning_rate": 1.5230395364052716e-05,
"loss": 1.2433,
"step": 186000
},
{
"epoch": 17.43,
"learning_rate": 1.5136928684923827e-05,
"loss": 1.2497,
"step": 186500
},
{
"epoch": 17.48,
"learning_rate": 1.5043462005794934e-05,
"loss": 1.2416,
"step": 187000
},
{
"epoch": 17.53,
"learning_rate": 1.4949995326666044e-05,
"loss": 1.2457,
"step": 187500
},
{
"epoch": 17.57,
"learning_rate": 1.4856528647537155e-05,
"loss": 1.2516,
"step": 188000
},
{
"epoch": 17.62,
"learning_rate": 1.4763061968408262e-05,
"loss": 1.2492,
"step": 188500
},
{
"epoch": 17.67,
"learning_rate": 1.4669595289279372e-05,
"loss": 1.2462,
"step": 189000
},
{
"epoch": 17.71,
"learning_rate": 1.4576128610150482e-05,
"loss": 1.2485,
"step": 189500
},
{
"epoch": 17.76,
"learning_rate": 1.4482661931021591e-05,
"loss": 1.2398,
"step": 190000
},
{
"epoch": 17.81,
"learning_rate": 1.4389195251892702e-05,
"loss": 1.2464,
"step": 190500
},
{
"epoch": 17.85,
"learning_rate": 1.4295728572763812e-05,
"loss": 1.2444,
"step": 191000
},
{
"epoch": 17.9,
"learning_rate": 1.4202261893634919e-05,
"loss": 1.2464,
"step": 191500
},
{
"epoch": 17.95,
"learning_rate": 1.410879521450603e-05,
"loss": 1.2507,
"step": 192000
},
{
"epoch": 17.99,
"learning_rate": 1.401532853537714e-05,
"loss": 1.2374,
"step": 192500
},
{
"epoch": 18.04,
"learning_rate": 1.3921861856248247e-05,
"loss": 1.2439,
"step": 193000
},
{
"epoch": 18.09,
"learning_rate": 1.3828395177119357e-05,
"loss": 1.2455,
"step": 193500
},
{
"epoch": 18.13,
"learning_rate": 1.3734928497990468e-05,
"loss": 1.2368,
"step": 194000
},
{
"epoch": 18.18,
"learning_rate": 1.3641461818861575e-05,
"loss": 1.2434,
"step": 194500
},
{
"epoch": 18.23,
"learning_rate": 1.3547995139732685e-05,
"loss": 1.2292,
"step": 195000
},
{
"epoch": 18.27,
"learning_rate": 1.3454528460603796e-05,
"loss": 1.2316,
"step": 195500
},
{
"epoch": 18.32,
"learning_rate": 1.3361061781474904e-05,
"loss": 1.2312,
"step": 196000
},
{
"epoch": 18.37,
"learning_rate": 1.3267595102346015e-05,
"loss": 1.229,
"step": 196500
},
{
"epoch": 18.41,
"learning_rate": 1.3174128423217125e-05,
"loss": 1.2375,
"step": 197000
},
{
"epoch": 18.46,
"learning_rate": 1.3080661744088232e-05,
"loss": 1.2346,
"step": 197500
},
{
"epoch": 18.51,
"learning_rate": 1.2987195064959343e-05,
"loss": 1.2334,
"step": 198000
},
{
"epoch": 18.55,
"learning_rate": 1.2893728385830453e-05,
"loss": 1.2317,
"step": 198500
},
{
"epoch": 18.6,
"learning_rate": 1.280026170670156e-05,
"loss": 1.2342,
"step": 199000
},
{
"epoch": 18.65,
"learning_rate": 1.270679502757267e-05,
"loss": 1.2327,
"step": 199500
},
{
"epoch": 18.69,
"learning_rate": 1.261332834844378e-05,
"loss": 1.2303,
"step": 200000
},
{
"epoch": 18.74,
"learning_rate": 1.251986166931489e-05,
"loss": 1.2406,
"step": 200500
},
{
"epoch": 18.79,
"learning_rate": 1.2426394990186e-05,
"loss": 1.2306,
"step": 201000
},
{
"epoch": 18.83,
"learning_rate": 1.2332928311057109e-05,
"loss": 1.2289,
"step": 201500
},
{
"epoch": 18.88,
"learning_rate": 1.2239461631928219e-05,
"loss": 1.231,
"step": 202000
},
{
"epoch": 18.93,
"learning_rate": 1.2145994952799328e-05,
"loss": 1.227,
"step": 202500
},
{
"epoch": 18.97,
"learning_rate": 1.2052528273670437e-05,
"loss": 1.2329,
"step": 203000
},
{
"epoch": 19.02,
"learning_rate": 1.1959061594541547e-05,
"loss": 1.2278,
"step": 203500
},
{
"epoch": 19.07,
"learning_rate": 1.1865594915412656e-05,
"loss": 1.2342,
"step": 204000
},
{
"epoch": 19.11,
"learning_rate": 1.1772128236283764e-05,
"loss": 1.2174,
"step": 204500
},
{
"epoch": 19.16,
"learning_rate": 1.1678661557154875e-05,
"loss": 1.2299,
"step": 205000
},
{
"epoch": 19.21,
"learning_rate": 1.1585194878025985e-05,
"loss": 1.2276,
"step": 205500
},
{
"epoch": 19.25,
"learning_rate": 1.1491728198897094e-05,
"loss": 1.2266,
"step": 206000
},
{
"epoch": 19.3,
"learning_rate": 1.1398261519768204e-05,
"loss": 1.2229,
"step": 206500
},
{
"epoch": 19.35,
"learning_rate": 1.1304794840639313e-05,
"loss": 1.2258,
"step": 207000
},
{
"epoch": 19.39,
"learning_rate": 1.1211328161510422e-05,
"loss": 1.2275,
"step": 207500
},
{
"epoch": 19.44,
"learning_rate": 1.1117861482381532e-05,
"loss": 1.2148,
"step": 208000
},
{
"epoch": 19.49,
"learning_rate": 1.1024394803252641e-05,
"loss": 1.2229,
"step": 208500
},
{
"epoch": 19.53,
"learning_rate": 1.093092812412375e-05,
"loss": 1.2218,
"step": 209000
},
{
"epoch": 19.58,
"learning_rate": 1.083746144499486e-05,
"loss": 1.2114,
"step": 209500
},
{
"epoch": 19.63,
"learning_rate": 1.0743994765865969e-05,
"loss": 1.2243,
"step": 210000
},
{
"epoch": 19.67,
"learning_rate": 1.0650528086737079e-05,
"loss": 1.2211,
"step": 210500
},
{
"epoch": 19.72,
"learning_rate": 1.0557061407608188e-05,
"loss": 1.2223,
"step": 211000
},
{
"epoch": 19.77,
"learning_rate": 1.0463594728479298e-05,
"loss": 1.2263,
"step": 211500
},
{
"epoch": 19.81,
"learning_rate": 1.0370128049350407e-05,
"loss": 1.2184,
"step": 212000
},
{
"epoch": 19.86,
"learning_rate": 1.0276661370221516e-05,
"loss": 1.2193,
"step": 212500
},
{
"epoch": 19.91,
"learning_rate": 1.0183194691092626e-05,
"loss": 1.2147,
"step": 213000
},
{
"epoch": 19.96,
"learning_rate": 1.0089728011963735e-05,
"loss": 1.2179,
"step": 213500
},
{
"epoch": 20.0,
"learning_rate": 9.996261332834844e-06,
"loss": 1.2093,
"step": 214000
},
{
"epoch": 20.05,
"learning_rate": 9.902794653705954e-06,
"loss": 1.2069,
"step": 214500
},
{
"epoch": 20.1,
"learning_rate": 9.809327974577064e-06,
"loss": 1.2147,
"step": 215000
},
{
"epoch": 20.14,
"learning_rate": 9.715861295448173e-06,
"loss": 1.2125,
"step": 215500
},
{
"epoch": 20.19,
"learning_rate": 9.622394616319283e-06,
"loss": 1.2221,
"step": 216000
},
{
"epoch": 20.24,
"learning_rate": 9.528927937190392e-06,
"loss": 1.2056,
"step": 216500
},
{
"epoch": 20.28,
"learning_rate": 9.435461258061501e-06,
"loss": 1.211,
"step": 217000
},
{
"epoch": 20.33,
"learning_rate": 9.341994578932611e-06,
"loss": 1.2146,
"step": 217500
},
{
"epoch": 20.38,
"learning_rate": 9.24852789980372e-06,
"loss": 1.2152,
"step": 218000
},
{
"epoch": 20.42,
"learning_rate": 9.155061220674829e-06,
"loss": 1.2146,
"step": 218500
},
{
"epoch": 20.47,
"learning_rate": 9.06159454154594e-06,
"loss": 1.2256,
"step": 219000
},
{
"epoch": 20.52,
"learning_rate": 8.968127862417048e-06,
"loss": 1.2058,
"step": 219500
},
{
"epoch": 20.56,
"learning_rate": 8.874661183288158e-06,
"loss": 1.2128,
"step": 220000
},
{
"epoch": 20.61,
"learning_rate": 8.781194504159269e-06,
"loss": 1.2137,
"step": 220500
},
{
"epoch": 20.66,
"learning_rate": 8.687727825030377e-06,
"loss": 1.2129,
"step": 221000
},
{
"epoch": 20.7,
"learning_rate": 8.594261145901486e-06,
"loss": 1.2181,
"step": 221500
},
{
"epoch": 20.75,
"learning_rate": 8.500794466772597e-06,
"loss": 1.201,
"step": 222000
},
{
"epoch": 20.8,
"learning_rate": 8.407327787643705e-06,
"loss": 1.2162,
"step": 222500
},
{
"epoch": 20.84,
"learning_rate": 8.313861108514814e-06,
"loss": 1.2077,
"step": 223000
},
{
"epoch": 20.89,
"learning_rate": 8.220394429385924e-06,
"loss": 1.2098,
"step": 223500
},
{
"epoch": 20.94,
"learning_rate": 8.126927750257033e-06,
"loss": 1.2092,
"step": 224000
},
{
"epoch": 20.98,
"learning_rate": 8.033461071128144e-06,
"loss": 1.2132,
"step": 224500
},
{
"epoch": 21.03,
"learning_rate": 7.939994391999252e-06,
"loss": 1.2166,
"step": 225000
},
{
"epoch": 21.08,
"learning_rate": 7.846527712870363e-06,
"loss": 1.2063,
"step": 225500
},
{
"epoch": 21.12,
"learning_rate": 7.753061033741471e-06,
"loss": 1.2029,
"step": 226000
},
{
"epoch": 21.17,
"learning_rate": 7.659594354612582e-06,
"loss": 1.201,
"step": 226500
},
{
"epoch": 21.22,
"learning_rate": 7.5661276754836905e-06,
"loss": 1.2006,
"step": 227000
},
{
"epoch": 21.26,
"learning_rate": 7.472660996354799e-06,
"loss": 1.2028,
"step": 227500
},
{
"epoch": 21.31,
"learning_rate": 7.37919431722591e-06,
"loss": 1.1959,
"step": 228000
},
{
"epoch": 21.36,
"learning_rate": 7.285727638097019e-06,
"loss": 1.2126,
"step": 228500
},
{
"epoch": 21.4,
"learning_rate": 7.192260958968128e-06,
"loss": 1.208,
"step": 229000
},
{
"epoch": 21.45,
"learning_rate": 7.098794279839238e-06,
"loss": 1.2026,
"step": 229500
},
{
"epoch": 21.5,
"learning_rate": 7.005327600710347e-06,
"loss": 1.2069,
"step": 230000
},
{
"epoch": 21.54,
"learning_rate": 6.911860921581456e-06,
"loss": 1.2042,
"step": 230500
},
{
"epoch": 21.59,
"learning_rate": 6.818394242452566e-06,
"loss": 1.2057,
"step": 231000
},
{
"epoch": 21.64,
"learning_rate": 6.724927563323676e-06,
"loss": 1.212,
"step": 231500
},
{
"epoch": 21.68,
"learning_rate": 6.6314608841947845e-06,
"loss": 1.2012,
"step": 232000
},
{
"epoch": 21.73,
"learning_rate": 6.537994205065895e-06,
"loss": 1.2066,
"step": 232500
},
{
"epoch": 21.78,
"learning_rate": 6.444527525937004e-06,
"loss": 1.2042,
"step": 233000
},
{
"epoch": 21.82,
"learning_rate": 6.351060846808113e-06,
"loss": 1.2101,
"step": 233500
},
{
"epoch": 21.87,
"learning_rate": 6.257594167679224e-06,
"loss": 1.1961,
"step": 234000
},
{
"epoch": 21.92,
"learning_rate": 6.164127488550332e-06,
"loss": 1.1994,
"step": 234500
},
{
"epoch": 21.96,
"learning_rate": 6.070660809421441e-06,
"loss": 1.2034,
"step": 235000
},
{
"epoch": 22.01,
"learning_rate": 5.977194130292551e-06,
"loss": 1.1977,
"step": 235500
},
{
"epoch": 22.06,
"learning_rate": 5.883727451163661e-06,
"loss": 1.1961,
"step": 236000
},
{
"epoch": 22.1,
"learning_rate": 5.79026077203477e-06,
"loss": 1.1929,
"step": 236500
},
{
"epoch": 22.15,
"learning_rate": 5.696794092905879e-06,
"loss": 1.1968,
"step": 237000
},
{
"epoch": 22.2,
"learning_rate": 5.603327413776989e-06,
"loss": 1.1946,
"step": 237500
},
{
"epoch": 22.25,
"learning_rate": 5.5098607346480976e-06,
"loss": 1.1971,
"step": 238000
},
{
"epoch": 22.29,
"learning_rate": 5.416394055519208e-06,
"loss": 1.2018,
"step": 238500
},
{
"epoch": 22.34,
"learning_rate": 5.3229273763903175e-06,
"loss": 1.1986,
"step": 239000
},
{
"epoch": 22.39,
"learning_rate": 5.229460697261426e-06,
"loss": 1.1877,
"step": 239500
},
{
"epoch": 22.43,
"learning_rate": 5.135994018132536e-06,
"loss": 1.1955,
"step": 240000
},
{
"epoch": 22.48,
"learning_rate": 5.042527339003645e-06,
"loss": 1.1989,
"step": 240500
},
{
"epoch": 22.53,
"learning_rate": 4.949060659874755e-06,
"loss": 1.1888,
"step": 241000
},
{
"epoch": 22.57,
"learning_rate": 4.8555939807458645e-06,
"loss": 1.2003,
"step": 241500
},
{
"epoch": 22.62,
"learning_rate": 4.762127301616974e-06,
"loss": 1.1945,
"step": 242000
},
{
"epoch": 22.67,
"learning_rate": 4.668660622488083e-06,
"loss": 1.1833,
"step": 242500
},
{
"epoch": 22.71,
"learning_rate": 4.575193943359192e-06,
"loss": 1.1866,
"step": 243000
},
{
"epoch": 22.76,
"learning_rate": 4.481727264230303e-06,
"loss": 1.1924,
"step": 243500
},
{
"epoch": 22.81,
"learning_rate": 4.3882605851014115e-06,
"loss": 1.1924,
"step": 244000
},
{
"epoch": 22.85,
"learning_rate": 4.294793905972521e-06,
"loss": 1.1894,
"step": 244500
},
{
"epoch": 22.9,
"learning_rate": 4.201327226843631e-06,
"loss": 1.2008,
"step": 245000
},
{
"epoch": 22.95,
"learning_rate": 4.107860547714739e-06,
"loss": 1.1959,
"step": 245500
},
{
"epoch": 22.99,
"learning_rate": 4.01439386858585e-06,
"loss": 1.1996,
"step": 246000
},
{
"epoch": 23.04,
"learning_rate": 3.920927189456959e-06,
"loss": 1.1928,
"step": 246500
},
{
"epoch": 23.09,
"learning_rate": 3.827460510328068e-06,
"loss": 1.195,
"step": 247000
},
{
"epoch": 23.13,
"learning_rate": 3.7339938311991776e-06,
"loss": 1.1873,
"step": 247500
},
{
"epoch": 23.18,
"learning_rate": 3.6405271520702876e-06,
"loss": 1.1977,
"step": 248000
},
{
"epoch": 23.23,
"learning_rate": 3.5470604729413963e-06,
"loss": 1.1909,
"step": 248500
},
{
"epoch": 23.27,
"learning_rate": 3.4535937938125063e-06,
"loss": 1.1881,
"step": 249000
},
{
"epoch": 23.32,
"learning_rate": 3.360127114683616e-06,
"loss": 1.1914,
"step": 249500
},
{
"epoch": 23.37,
"learning_rate": 3.266660435554725e-06,
"loss": 1.1813,
"step": 250000
},
{
"epoch": 23.41,
"learning_rate": 3.1731937564258346e-06,
"loss": 1.2004,
"step": 250500
},
{
"epoch": 23.46,
"learning_rate": 3.0797270772969437e-06,
"loss": 1.1867,
"step": 251000
},
{
"epoch": 23.51,
"learning_rate": 2.9862603981680533e-06,
"loss": 1.1933,
"step": 251500
},
{
"epoch": 23.55,
"learning_rate": 2.892793719039163e-06,
"loss": 1.1834,
"step": 252000
},
{
"epoch": 23.6,
"learning_rate": 2.799327039910272e-06,
"loss": 1.18,
"step": 252500
},
{
"epoch": 23.65,
"learning_rate": 2.7058603607813815e-06,
"loss": 1.1923,
"step": 253000
},
{
"epoch": 23.69,
"learning_rate": 2.612393681652491e-06,
"loss": 1.1912,
"step": 253500
},
{
"epoch": 23.74,
"learning_rate": 2.5189270025236007e-06,
"loss": 1.1888,
"step": 254000
},
{
"epoch": 23.79,
"learning_rate": 2.42546032339471e-06,
"loss": 1.1843,
"step": 254500
},
{
"epoch": 23.83,
"learning_rate": 2.3319936442658194e-06,
"loss": 1.1911,
"step": 255000
},
{
"epoch": 23.88,
"learning_rate": 2.238526965136929e-06,
"loss": 1.1943,
"step": 255500
},
{
"epoch": 23.93,
"learning_rate": 2.145060286008038e-06,
"loss": 1.1811,
"step": 256000
},
{
"epoch": 23.97,
"learning_rate": 2.0515936068791476e-06,
"loss": 1.1904,
"step": 256500
},
{
"epoch": 24.02,
"learning_rate": 1.958126927750257e-06,
"loss": 1.1752,
"step": 257000
},
{
"epoch": 24.07,
"learning_rate": 1.8646602486213666e-06,
"loss": 1.1851,
"step": 257500
},
{
"epoch": 24.11,
"learning_rate": 1.771193569492476e-06,
"loss": 1.1874,
"step": 258000
},
{
"epoch": 24.16,
"learning_rate": 1.6777268903635857e-06,
"loss": 1.1904,
"step": 258500
},
{
"epoch": 24.21,
"learning_rate": 1.5842602112346948e-06,
"loss": 1.188,
"step": 259000
},
{
"epoch": 24.25,
"learning_rate": 1.4907935321058044e-06,
"loss": 1.1808,
"step": 259500
},
{
"epoch": 24.3,
"learning_rate": 1.3973268529769137e-06,
"loss": 1.1855,
"step": 260000
},
{
"epoch": 24.35,
"learning_rate": 1.3038601738480233e-06,
"loss": 1.1856,
"step": 260500
},
{
"epoch": 24.39,
"learning_rate": 1.2103934947191327e-06,
"loss": 1.1943,
"step": 261000
},
{
"epoch": 24.44,
"learning_rate": 1.1169268155902422e-06,
"loss": 1.19,
"step": 261500
},
{
"epoch": 24.49,
"learning_rate": 1.0234601364613516e-06,
"loss": 1.1819,
"step": 262000
},
{
"epoch": 24.54,
"learning_rate": 9.299934573324609e-07,
"loss": 1.1836,
"step": 262500
},
{
"epoch": 24.58,
"learning_rate": 8.365267782035705e-07,
"loss": 1.182,
"step": 263000
},
{
"epoch": 24.63,
"learning_rate": 7.4306009907468e-07,
"loss": 1.1844,
"step": 263500
},
{
"epoch": 24.68,
"learning_rate": 6.495934199457893e-07,
"loss": 1.1928,
"step": 264000
},
{
"epoch": 24.72,
"learning_rate": 5.561267408168989e-07,
"loss": 1.1838,
"step": 264500
},
{
"epoch": 24.77,
"learning_rate": 4.626600616880083e-07,
"loss": 1.1849,
"step": 265000
},
{
"epoch": 24.82,
"learning_rate": 3.691933825591177e-07,
"loss": 1.1851,
"step": 265500
},
{
"epoch": 24.86,
"learning_rate": 2.7572670343022714e-07,
"loss": 1.1919,
"step": 266000
},
{
"epoch": 24.91,
"learning_rate": 1.8226002430133658e-07,
"loss": 1.1752,
"step": 266500
},
{
"epoch": 24.96,
"learning_rate": 8.879334517244602e-08,
"loss": 1.1807,
"step": 267000
}
],
"logging_steps": 500,
"max_steps": 267475,
"num_input_tokens_seen": 0,
"num_train_epochs": 25,
"save_steps": 500,
"total_flos": 1.1243414742110208e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}