biobert-it / trainer_state.json
marcopost-it
first model
1dcec44
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 709692,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.9964773451018196e-05,
"loss": 7.3047,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 4.9929546902036376e-05,
"loss": 6.5184,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 4.989432035305457e-05,
"loss": 6.2814,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 4.985909380407276e-05,
"loss": 6.1168,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 4.9823867255090944e-05,
"loss": 6.0133,
"step": 2500
},
{
"epoch": 0.01,
"learning_rate": 4.978864070610913e-05,
"loss": 5.8313,
"step": 3000
},
{
"epoch": 0.01,
"learning_rate": 4.975341415712732e-05,
"loss": 5.625,
"step": 3500
},
{
"epoch": 0.02,
"learning_rate": 4.9718187608145504e-05,
"loss": 5.3774,
"step": 4000
},
{
"epoch": 0.02,
"learning_rate": 4.96829610591637e-05,
"loss": 5.1476,
"step": 4500
},
{
"epoch": 0.02,
"learning_rate": 4.964773451018188e-05,
"loss": 4.9479,
"step": 5000
},
{
"epoch": 0.02,
"learning_rate": 4.961250796120007e-05,
"loss": 4.7909,
"step": 5500
},
{
"epoch": 0.03,
"learning_rate": 4.9577281412218266e-05,
"loss": 4.6501,
"step": 6000
},
{
"epoch": 0.03,
"learning_rate": 4.9542054863236446e-05,
"loss": 4.5042,
"step": 6500
},
{
"epoch": 0.03,
"learning_rate": 4.950682831425464e-05,
"loss": 4.3819,
"step": 7000
},
{
"epoch": 0.03,
"learning_rate": 4.9471601765272826e-05,
"loss": 4.2813,
"step": 7500
},
{
"epoch": 0.03,
"learning_rate": 4.943637521629101e-05,
"loss": 4.1623,
"step": 8000
},
{
"epoch": 0.04,
"learning_rate": 4.94011486673092e-05,
"loss": 4.0686,
"step": 8500
},
{
"epoch": 0.04,
"learning_rate": 4.936592211832739e-05,
"loss": 3.9636,
"step": 9000
},
{
"epoch": 0.04,
"learning_rate": 4.9330695569345574e-05,
"loss": 3.8418,
"step": 9500
},
{
"epoch": 0.04,
"learning_rate": 4.929546902036377e-05,
"loss": 3.7808,
"step": 10000
},
{
"epoch": 0.04,
"learning_rate": 4.9260242471381954e-05,
"loss": 3.697,
"step": 10500
},
{
"epoch": 0.05,
"learning_rate": 4.922501592240014e-05,
"loss": 3.6477,
"step": 11000
},
{
"epoch": 0.05,
"learning_rate": 4.9189789373418335e-05,
"loss": 3.5502,
"step": 11500
},
{
"epoch": 0.05,
"learning_rate": 4.9154562824436515e-05,
"loss": 3.5132,
"step": 12000
},
{
"epoch": 0.05,
"learning_rate": 4.911933627545471e-05,
"loss": 3.4574,
"step": 12500
},
{
"epoch": 0.05,
"learning_rate": 4.9084109726472896e-05,
"loss": 3.4167,
"step": 13000
},
{
"epoch": 0.06,
"learning_rate": 4.904888317749108e-05,
"loss": 3.3762,
"step": 13500
},
{
"epoch": 0.06,
"learning_rate": 4.901365662850927e-05,
"loss": 3.33,
"step": 14000
},
{
"epoch": 0.06,
"learning_rate": 4.8978430079527456e-05,
"loss": 3.2696,
"step": 14500
},
{
"epoch": 0.06,
"learning_rate": 4.894320353054564e-05,
"loss": 3.2309,
"step": 15000
},
{
"epoch": 0.07,
"learning_rate": 4.890797698156384e-05,
"loss": 3.1656,
"step": 15500
},
{
"epoch": 0.07,
"learning_rate": 4.8872750432582024e-05,
"loss": 3.135,
"step": 16000
},
{
"epoch": 0.07,
"learning_rate": 4.883752388360021e-05,
"loss": 3.1137,
"step": 16500
},
{
"epoch": 0.07,
"learning_rate": 4.8802297334618405e-05,
"loss": 3.0699,
"step": 17000
},
{
"epoch": 0.07,
"learning_rate": 4.8767070785636585e-05,
"loss": 3.0313,
"step": 17500
},
{
"epoch": 0.08,
"learning_rate": 4.873184423665478e-05,
"loss": 3.0078,
"step": 18000
},
{
"epoch": 0.08,
"learning_rate": 4.8696617687672965e-05,
"loss": 2.9916,
"step": 18500
},
{
"epoch": 0.08,
"learning_rate": 4.866139113869115e-05,
"loss": 2.9576,
"step": 19000
},
{
"epoch": 0.08,
"learning_rate": 4.862616458970934e-05,
"loss": 2.9201,
"step": 19500
},
{
"epoch": 0.08,
"learning_rate": 4.859093804072753e-05,
"loss": 2.9107,
"step": 20000
},
{
"epoch": 0.09,
"learning_rate": 4.855571149174572e-05,
"loss": 2.8871,
"step": 20500
},
{
"epoch": 0.09,
"learning_rate": 4.8520484942763907e-05,
"loss": 2.8691,
"step": 21000
},
{
"epoch": 0.09,
"learning_rate": 4.8485258393782093e-05,
"loss": 2.8645,
"step": 21500
},
{
"epoch": 0.09,
"learning_rate": 4.845003184480028e-05,
"loss": 2.8233,
"step": 22000
},
{
"epoch": 0.1,
"learning_rate": 4.8414805295818474e-05,
"loss": 2.7953,
"step": 22500
},
{
"epoch": 0.1,
"learning_rate": 4.8379578746836654e-05,
"loss": 2.7669,
"step": 23000
},
{
"epoch": 0.1,
"learning_rate": 4.834435219785485e-05,
"loss": 2.7489,
"step": 23500
},
{
"epoch": 0.1,
"learning_rate": 4.8309125648873035e-05,
"loss": 2.7754,
"step": 24000
},
{
"epoch": 0.1,
"learning_rate": 4.827389909989122e-05,
"loss": 2.7525,
"step": 24500
},
{
"epoch": 0.11,
"learning_rate": 4.823867255090941e-05,
"loss": 2.7249,
"step": 25000
},
{
"epoch": 0.11,
"learning_rate": 4.82034460019276e-05,
"loss": 2.687,
"step": 25500
},
{
"epoch": 0.11,
"learning_rate": 4.816821945294579e-05,
"loss": 2.6729,
"step": 26000
},
{
"epoch": 0.11,
"learning_rate": 4.8132992903963976e-05,
"loss": 2.6922,
"step": 26500
},
{
"epoch": 0.11,
"learning_rate": 4.809776635498216e-05,
"loss": 2.6631,
"step": 27000
},
{
"epoch": 0.12,
"learning_rate": 4.806253980600035e-05,
"loss": 2.6465,
"step": 27500
},
{
"epoch": 0.12,
"learning_rate": 4.8027313257018543e-05,
"loss": 2.6219,
"step": 28000
},
{
"epoch": 0.12,
"learning_rate": 4.7992086708036724e-05,
"loss": 2.6152,
"step": 28500
},
{
"epoch": 0.12,
"learning_rate": 4.795686015905492e-05,
"loss": 2.6077,
"step": 29000
},
{
"epoch": 0.12,
"learning_rate": 4.7921633610073104e-05,
"loss": 2.6222,
"step": 29500
},
{
"epoch": 0.13,
"learning_rate": 4.788640706109129e-05,
"loss": 2.5918,
"step": 30000
},
{
"epoch": 0.13,
"learning_rate": 4.785118051210948e-05,
"loss": 2.5734,
"step": 30500
},
{
"epoch": 0.13,
"learning_rate": 4.781595396312767e-05,
"loss": 2.5571,
"step": 31000
},
{
"epoch": 0.13,
"learning_rate": 4.778072741414586e-05,
"loss": 2.5387,
"step": 31500
},
{
"epoch": 0.14,
"learning_rate": 4.7745500865164045e-05,
"loss": 2.5273,
"step": 32000
},
{
"epoch": 0.14,
"learning_rate": 4.771027431618223e-05,
"loss": 2.5061,
"step": 32500
},
{
"epoch": 0.14,
"learning_rate": 4.767504776720042e-05,
"loss": 2.514,
"step": 33000
},
{
"epoch": 0.14,
"learning_rate": 4.763982121821861e-05,
"loss": 2.4995,
"step": 33500
},
{
"epoch": 0.14,
"learning_rate": 4.760459466923679e-05,
"loss": 2.4845,
"step": 34000
},
{
"epoch": 0.15,
"learning_rate": 4.756936812025499e-05,
"loss": 2.4921,
"step": 34500
},
{
"epoch": 0.15,
"learning_rate": 4.7534141571273174e-05,
"loss": 2.4816,
"step": 35000
},
{
"epoch": 0.15,
"learning_rate": 4.749891502229136e-05,
"loss": 2.4777,
"step": 35500
},
{
"epoch": 0.15,
"learning_rate": 4.7463688473309554e-05,
"loss": 2.4449,
"step": 36000
},
{
"epoch": 0.15,
"learning_rate": 4.742846192432774e-05,
"loss": 2.439,
"step": 36500
},
{
"epoch": 0.16,
"learning_rate": 4.739323537534593e-05,
"loss": 2.4522,
"step": 37000
},
{
"epoch": 0.16,
"learning_rate": 4.7358008826364115e-05,
"loss": 2.4519,
"step": 37500
},
{
"epoch": 0.16,
"learning_rate": 4.73227822773823e-05,
"loss": 2.404,
"step": 38000
},
{
"epoch": 0.16,
"learning_rate": 4.728755572840049e-05,
"loss": 2.4217,
"step": 38500
},
{
"epoch": 0.16,
"learning_rate": 4.725232917941868e-05,
"loss": 2.3985,
"step": 39000
},
{
"epoch": 0.17,
"learning_rate": 4.721710263043686e-05,
"loss": 2.3941,
"step": 39500
},
{
"epoch": 0.17,
"learning_rate": 4.7181876081455056e-05,
"loss": 2.3889,
"step": 40000
},
{
"epoch": 0.17,
"learning_rate": 4.714664953247324e-05,
"loss": 2.4091,
"step": 40500
},
{
"epoch": 0.17,
"learning_rate": 4.711142298349143e-05,
"loss": 2.3649,
"step": 41000
},
{
"epoch": 0.18,
"learning_rate": 4.7076196434509624e-05,
"loss": 2.3702,
"step": 41500
},
{
"epoch": 0.18,
"learning_rate": 4.704096988552781e-05,
"loss": 2.3551,
"step": 42000
},
{
"epoch": 0.18,
"learning_rate": 4.7005743336546e-05,
"loss": 2.3655,
"step": 42500
},
{
"epoch": 0.18,
"learning_rate": 4.6970516787564184e-05,
"loss": 2.3533,
"step": 43000
},
{
"epoch": 0.18,
"learning_rate": 4.693529023858237e-05,
"loss": 2.3228,
"step": 43500
},
{
"epoch": 0.19,
"learning_rate": 4.690006368960056e-05,
"loss": 2.3521,
"step": 44000
},
{
"epoch": 0.19,
"learning_rate": 4.686483714061875e-05,
"loss": 2.3226,
"step": 44500
},
{
"epoch": 0.19,
"learning_rate": 4.682961059163694e-05,
"loss": 2.3249,
"step": 45000
},
{
"epoch": 0.19,
"learning_rate": 4.6794384042655126e-05,
"loss": 2.3061,
"step": 45500
},
{
"epoch": 0.19,
"learning_rate": 4.675915749367332e-05,
"loss": 2.2967,
"step": 46000
},
{
"epoch": 0.2,
"learning_rate": 4.67239309446915e-05,
"loss": 2.2972,
"step": 46500
},
{
"epoch": 0.2,
"learning_rate": 4.668870439570969e-05,
"loss": 2.319,
"step": 47000
},
{
"epoch": 0.2,
"learning_rate": 4.665347784672788e-05,
"loss": 2.3123,
"step": 47500
},
{
"epoch": 0.2,
"learning_rate": 4.661825129774607e-05,
"loss": 2.2893,
"step": 48000
},
{
"epoch": 0.21,
"learning_rate": 4.6583024748764254e-05,
"loss": 2.2594,
"step": 48500
},
{
"epoch": 0.21,
"learning_rate": 4.654779819978244e-05,
"loss": 2.2802,
"step": 49000
},
{
"epoch": 0.21,
"learning_rate": 4.651257165080063e-05,
"loss": 2.2758,
"step": 49500
},
{
"epoch": 0.21,
"learning_rate": 4.647734510181882e-05,
"loss": 2.2651,
"step": 50000
},
{
"epoch": 0.21,
"learning_rate": 4.644211855283701e-05,
"loss": 2.2667,
"step": 50500
},
{
"epoch": 0.22,
"learning_rate": 4.6406892003855195e-05,
"loss": 2.2427,
"step": 51000
},
{
"epoch": 0.22,
"learning_rate": 4.637166545487339e-05,
"loss": 2.2468,
"step": 51500
},
{
"epoch": 0.22,
"learning_rate": 4.633643890589157e-05,
"loss": 2.2605,
"step": 52000
},
{
"epoch": 0.22,
"learning_rate": 4.630121235690976e-05,
"loss": 2.2575,
"step": 52500
},
{
"epoch": 0.22,
"learning_rate": 4.626598580792795e-05,
"loss": 2.2217,
"step": 53000
},
{
"epoch": 0.23,
"learning_rate": 4.6230759258946136e-05,
"loss": 2.2353,
"step": 53500
},
{
"epoch": 0.23,
"learning_rate": 4.619553270996432e-05,
"loss": 2.2534,
"step": 54000
},
{
"epoch": 0.23,
"learning_rate": 4.616030616098252e-05,
"loss": 2.2287,
"step": 54500
},
{
"epoch": 0.23,
"learning_rate": 4.61250796120007e-05,
"loss": 2.2149,
"step": 55000
},
{
"epoch": 0.23,
"learning_rate": 4.608985306301889e-05,
"loss": 2.2124,
"step": 55500
},
{
"epoch": 0.24,
"learning_rate": 4.605462651403708e-05,
"loss": 2.2191,
"step": 56000
},
{
"epoch": 0.24,
"learning_rate": 4.6019399965055265e-05,
"loss": 2.1982,
"step": 56500
},
{
"epoch": 0.24,
"learning_rate": 4.598417341607346e-05,
"loss": 2.195,
"step": 57000
},
{
"epoch": 0.24,
"learning_rate": 4.594894686709164e-05,
"loss": 2.1845,
"step": 57500
},
{
"epoch": 0.25,
"learning_rate": 4.591372031810983e-05,
"loss": 2.1877,
"step": 58000
},
{
"epoch": 0.25,
"learning_rate": 4.587849376912802e-05,
"loss": 2.1718,
"step": 58500
},
{
"epoch": 0.25,
"learning_rate": 4.5843267220146206e-05,
"loss": 2.1686,
"step": 59000
},
{
"epoch": 0.25,
"learning_rate": 4.580804067116439e-05,
"loss": 2.1746,
"step": 59500
},
{
"epoch": 0.25,
"learning_rate": 4.5772814122182587e-05,
"loss": 2.1597,
"step": 60000
},
{
"epoch": 0.26,
"learning_rate": 4.573758757320077e-05,
"loss": 2.1625,
"step": 60500
},
{
"epoch": 0.26,
"learning_rate": 4.570236102421896e-05,
"loss": 2.1407,
"step": 61000
},
{
"epoch": 0.26,
"learning_rate": 4.566713447523715e-05,
"loss": 2.1433,
"step": 61500
},
{
"epoch": 0.26,
"learning_rate": 4.5631907926255334e-05,
"loss": 2.1689,
"step": 62000
},
{
"epoch": 0.26,
"learning_rate": 4.559668137727353e-05,
"loss": 2.1465,
"step": 62500
},
{
"epoch": 0.27,
"learning_rate": 4.556145482829171e-05,
"loss": 2.1502,
"step": 63000
},
{
"epoch": 0.27,
"learning_rate": 4.55262282793099e-05,
"loss": 2.1188,
"step": 63500
},
{
"epoch": 0.27,
"learning_rate": 4.549100173032809e-05,
"loss": 2.1514,
"step": 64000
},
{
"epoch": 0.27,
"learning_rate": 4.5455775181346275e-05,
"loss": 2.1102,
"step": 64500
},
{
"epoch": 0.27,
"learning_rate": 4.542054863236446e-05,
"loss": 2.1252,
"step": 65000
},
{
"epoch": 0.28,
"learning_rate": 4.5385322083382656e-05,
"loss": 2.1449,
"step": 65500
},
{
"epoch": 0.28,
"learning_rate": 4.5350095534400836e-05,
"loss": 2.1094,
"step": 66000
},
{
"epoch": 0.28,
"learning_rate": 4.531486898541903e-05,
"loss": 2.1142,
"step": 66500
},
{
"epoch": 0.28,
"learning_rate": 4.527964243643722e-05,
"loss": 2.1201,
"step": 67000
},
{
"epoch": 0.29,
"learning_rate": 4.5244415887455404e-05,
"loss": 2.098,
"step": 67500
},
{
"epoch": 0.29,
"learning_rate": 4.52091893384736e-05,
"loss": 2.0912,
"step": 68000
},
{
"epoch": 0.29,
"learning_rate": 4.517396278949178e-05,
"loss": 2.1082,
"step": 68500
},
{
"epoch": 0.29,
"learning_rate": 4.513873624050997e-05,
"loss": 2.1016,
"step": 69000
},
{
"epoch": 0.29,
"learning_rate": 4.510350969152816e-05,
"loss": 2.1133,
"step": 69500
},
{
"epoch": 0.3,
"learning_rate": 4.5068283142546345e-05,
"loss": 2.0862,
"step": 70000
},
{
"epoch": 0.3,
"learning_rate": 4.503305659356453e-05,
"loss": 2.0723,
"step": 70500
},
{
"epoch": 0.3,
"learning_rate": 4.4997830044582725e-05,
"loss": 2.0975,
"step": 71000
},
{
"epoch": 0.3,
"learning_rate": 4.496260349560091e-05,
"loss": 2.0834,
"step": 71500
},
{
"epoch": 0.3,
"learning_rate": 4.49273769466191e-05,
"loss": 2.0644,
"step": 72000
},
{
"epoch": 0.31,
"learning_rate": 4.4892150397637286e-05,
"loss": 2.088,
"step": 72500
},
{
"epoch": 0.31,
"learning_rate": 4.485692384865547e-05,
"loss": 2.0726,
"step": 73000
},
{
"epoch": 0.31,
"learning_rate": 4.482169729967367e-05,
"loss": 2.074,
"step": 73500
},
{
"epoch": 0.31,
"learning_rate": 4.478647075069185e-05,
"loss": 2.0736,
"step": 74000
},
{
"epoch": 0.31,
"learning_rate": 4.475124420171004e-05,
"loss": 2.056,
"step": 74500
},
{
"epoch": 0.32,
"learning_rate": 4.471601765272823e-05,
"loss": 2.0633,
"step": 75000
},
{
"epoch": 0.32,
"learning_rate": 4.4680791103746414e-05,
"loss": 2.067,
"step": 75500
},
{
"epoch": 0.32,
"learning_rate": 4.46455645547646e-05,
"loss": 2.0556,
"step": 76000
},
{
"epoch": 0.32,
"learning_rate": 4.4610338005782795e-05,
"loss": 2.0451,
"step": 76500
},
{
"epoch": 0.33,
"learning_rate": 4.457511145680098e-05,
"loss": 2.0471,
"step": 77000
},
{
"epoch": 0.33,
"learning_rate": 4.453988490781917e-05,
"loss": 2.0763,
"step": 77500
},
{
"epoch": 0.33,
"learning_rate": 4.4504658358837356e-05,
"loss": 2.0215,
"step": 78000
},
{
"epoch": 0.33,
"learning_rate": 4.446943180985554e-05,
"loss": 2.0481,
"step": 78500
},
{
"epoch": 0.33,
"learning_rate": 4.4434205260873736e-05,
"loss": 2.0259,
"step": 79000
},
{
"epoch": 0.34,
"learning_rate": 4.439897871189192e-05,
"loss": 2.036,
"step": 79500
},
{
"epoch": 0.34,
"learning_rate": 4.436375216291011e-05,
"loss": 2.0332,
"step": 80000
},
{
"epoch": 0.34,
"learning_rate": 4.43285256139283e-05,
"loss": 2.0387,
"step": 80500
},
{
"epoch": 0.34,
"learning_rate": 4.4293299064946484e-05,
"loss": 2.0454,
"step": 81000
},
{
"epoch": 0.34,
"learning_rate": 4.425807251596467e-05,
"loss": 2.0148,
"step": 81500
},
{
"epoch": 0.35,
"learning_rate": 4.4222845966982864e-05,
"loss": 2.0229,
"step": 82000
},
{
"epoch": 0.35,
"learning_rate": 4.418761941800105e-05,
"loss": 2.0375,
"step": 82500
},
{
"epoch": 0.35,
"learning_rate": 4.415239286901924e-05,
"loss": 2.0239,
"step": 83000
},
{
"epoch": 0.35,
"learning_rate": 4.411716632003743e-05,
"loss": 2.0194,
"step": 83500
},
{
"epoch": 0.36,
"learning_rate": 4.408193977105561e-05,
"loss": 2.0271,
"step": 84000
},
{
"epoch": 0.36,
"learning_rate": 4.4046713222073806e-05,
"loss": 2.012,
"step": 84500
},
{
"epoch": 0.36,
"learning_rate": 4.401148667309199e-05,
"loss": 2.0198,
"step": 85000
},
{
"epoch": 0.36,
"learning_rate": 4.397626012411018e-05,
"loss": 1.9945,
"step": 85500
},
{
"epoch": 0.36,
"learning_rate": 4.3941033575128366e-05,
"loss": 2.0004,
"step": 86000
},
{
"epoch": 0.37,
"learning_rate": 4.390580702614655e-05,
"loss": 2.0044,
"step": 86500
},
{
"epoch": 0.37,
"learning_rate": 4.387058047716475e-05,
"loss": 1.9829,
"step": 87000
},
{
"epoch": 0.37,
"learning_rate": 4.3835353928182934e-05,
"loss": 1.9796,
"step": 87500
},
{
"epoch": 0.37,
"learning_rate": 4.380012737920112e-05,
"loss": 1.9778,
"step": 88000
},
{
"epoch": 0.37,
"learning_rate": 4.376490083021931e-05,
"loss": 1.9972,
"step": 88500
},
{
"epoch": 0.38,
"learning_rate": 4.37296742812375e-05,
"loss": 2.0046,
"step": 89000
},
{
"epoch": 0.38,
"learning_rate": 4.369444773225568e-05,
"loss": 1.9844,
"step": 89500
},
{
"epoch": 0.38,
"learning_rate": 4.3659221183273875e-05,
"loss": 1.9935,
"step": 90000
},
{
"epoch": 0.38,
"learning_rate": 4.362399463429206e-05,
"loss": 1.9695,
"step": 90500
},
{
"epoch": 0.38,
"learning_rate": 4.358876808531025e-05,
"loss": 1.9717,
"step": 91000
},
{
"epoch": 0.39,
"learning_rate": 4.3553541536328436e-05,
"loss": 1.9795,
"step": 91500
},
{
"epoch": 0.39,
"learning_rate": 4.351831498734662e-05,
"loss": 1.9717,
"step": 92000
},
{
"epoch": 0.39,
"learning_rate": 4.3483088438364816e-05,
"loss": 1.9668,
"step": 92500
},
{
"epoch": 0.39,
"learning_rate": 4.3447861889383003e-05,
"loss": 1.9674,
"step": 93000
},
{
"epoch": 0.4,
"learning_rate": 4.341263534040119e-05,
"loss": 1.9854,
"step": 93500
},
{
"epoch": 0.4,
"learning_rate": 4.337740879141938e-05,
"loss": 1.9695,
"step": 94000
},
{
"epoch": 0.4,
"learning_rate": 4.334218224243757e-05,
"loss": 1.9542,
"step": 94500
},
{
"epoch": 0.4,
"learning_rate": 4.330695569345575e-05,
"loss": 1.9721,
"step": 95000
},
{
"epoch": 0.4,
"learning_rate": 4.3271729144473945e-05,
"loss": 1.9545,
"step": 95500
},
{
"epoch": 0.41,
"learning_rate": 4.323650259549213e-05,
"loss": 1.9559,
"step": 96000
},
{
"epoch": 0.41,
"learning_rate": 4.320127604651032e-05,
"loss": 1.9466,
"step": 96500
},
{
"epoch": 0.41,
"learning_rate": 4.316604949752851e-05,
"loss": 1.9532,
"step": 97000
},
{
"epoch": 0.41,
"learning_rate": 4.313082294854669e-05,
"loss": 1.9488,
"step": 97500
},
{
"epoch": 0.41,
"learning_rate": 4.3095596399564886e-05,
"loss": 1.9559,
"step": 98000
},
{
"epoch": 0.42,
"learning_rate": 4.306036985058307e-05,
"loss": 1.9372,
"step": 98500
},
{
"epoch": 0.42,
"learning_rate": 4.302514330160126e-05,
"loss": 1.944,
"step": 99000
},
{
"epoch": 0.42,
"learning_rate": 4.298991675261945e-05,
"loss": 1.9516,
"step": 99500
},
{
"epoch": 0.42,
"learning_rate": 4.295469020363764e-05,
"loss": 1.9562,
"step": 100000
},
{
"epoch": 0.42,
"learning_rate": 4.291946365465582e-05,
"loss": 1.9357,
"step": 100500
},
{
"epoch": 0.43,
"learning_rate": 4.2884237105674014e-05,
"loss": 1.9445,
"step": 101000
},
{
"epoch": 0.43,
"learning_rate": 4.28490105566922e-05,
"loss": 1.9514,
"step": 101500
},
{
"epoch": 0.43,
"learning_rate": 4.281378400771039e-05,
"loss": 1.9441,
"step": 102000
},
{
"epoch": 0.43,
"learning_rate": 4.277855745872858e-05,
"loss": 1.9301,
"step": 102500
},
{
"epoch": 0.44,
"learning_rate": 4.274333090974676e-05,
"loss": 1.9395,
"step": 103000
},
{
"epoch": 0.44,
"learning_rate": 4.2708104360764955e-05,
"loss": 1.9468,
"step": 103500
},
{
"epoch": 0.44,
"learning_rate": 4.267287781178314e-05,
"loss": 1.9377,
"step": 104000
},
{
"epoch": 0.44,
"learning_rate": 4.263765126280133e-05,
"loss": 1.9116,
"step": 104500
},
{
"epoch": 0.44,
"learning_rate": 4.2602424713819516e-05,
"loss": 1.9144,
"step": 105000
},
{
"epoch": 0.45,
"learning_rate": 4.256719816483771e-05,
"loss": 1.922,
"step": 105500
},
{
"epoch": 0.45,
"learning_rate": 4.253197161585589e-05,
"loss": 1.9184,
"step": 106000
},
{
"epoch": 0.45,
"learning_rate": 4.2496745066874084e-05,
"loss": 1.9227,
"step": 106500
},
{
"epoch": 0.45,
"learning_rate": 4.246151851789227e-05,
"loss": 1.9251,
"step": 107000
},
{
"epoch": 0.45,
"learning_rate": 4.242629196891046e-05,
"loss": 1.8982,
"step": 107500
},
{
"epoch": 0.46,
"learning_rate": 4.239106541992865e-05,
"loss": 1.8947,
"step": 108000
},
{
"epoch": 0.46,
"learning_rate": 4.235583887094683e-05,
"loss": 1.9032,
"step": 108500
},
{
"epoch": 0.46,
"learning_rate": 4.2320612321965025e-05,
"loss": 1.9185,
"step": 109000
},
{
"epoch": 0.46,
"learning_rate": 4.228538577298321e-05,
"loss": 1.9126,
"step": 109500
},
{
"epoch": 0.46,
"learning_rate": 4.22501592240014e-05,
"loss": 1.8936,
"step": 110000
},
{
"epoch": 0.47,
"learning_rate": 4.2214932675019586e-05,
"loss": 1.9053,
"step": 110500
},
{
"epoch": 0.47,
"learning_rate": 4.217970612603778e-05,
"loss": 1.9096,
"step": 111000
},
{
"epoch": 0.47,
"learning_rate": 4.214447957705596e-05,
"loss": 1.9072,
"step": 111500
},
{
"epoch": 0.47,
"learning_rate": 4.210925302807415e-05,
"loss": 1.8868,
"step": 112000
},
{
"epoch": 0.48,
"learning_rate": 4.207402647909234e-05,
"loss": 1.8924,
"step": 112500
},
{
"epoch": 0.48,
"learning_rate": 4.203879993011053e-05,
"loss": 1.8976,
"step": 113000
},
{
"epoch": 0.48,
"learning_rate": 4.200357338112872e-05,
"loss": 1.8753,
"step": 113500
},
{
"epoch": 0.48,
"learning_rate": 4.196834683214691e-05,
"loss": 1.907,
"step": 114000
},
{
"epoch": 0.48,
"learning_rate": 4.1933120283165094e-05,
"loss": 1.8758,
"step": 114500
},
{
"epoch": 0.49,
"learning_rate": 4.189789373418328e-05,
"loss": 1.885,
"step": 115000
},
{
"epoch": 0.49,
"learning_rate": 4.186266718520147e-05,
"loss": 1.8507,
"step": 115500
},
{
"epoch": 0.49,
"learning_rate": 4.1827440636219655e-05,
"loss": 1.8878,
"step": 116000
},
{
"epoch": 0.49,
"learning_rate": 4.179221408723785e-05,
"loss": 1.8847,
"step": 116500
},
{
"epoch": 0.49,
"learning_rate": 4.175698753825603e-05,
"loss": 1.8745,
"step": 117000
},
{
"epoch": 0.5,
"learning_rate": 4.172176098927422e-05,
"loss": 1.8778,
"step": 117500
},
{
"epoch": 0.5,
"learning_rate": 4.1686534440292416e-05,
"loss": 1.8803,
"step": 118000
},
{
"epoch": 0.5,
"learning_rate": 4.1651307891310596e-05,
"loss": 1.881,
"step": 118500
},
{
"epoch": 0.5,
"learning_rate": 4.161608134232879e-05,
"loss": 1.8846,
"step": 119000
},
{
"epoch": 0.51,
"learning_rate": 4.158085479334698e-05,
"loss": 1.8718,
"step": 119500
},
{
"epoch": 0.51,
"learning_rate": 4.1545628244365164e-05,
"loss": 1.8804,
"step": 120000
},
{
"epoch": 0.51,
"learning_rate": 4.151040169538335e-05,
"loss": 1.8753,
"step": 120500
},
{
"epoch": 0.51,
"learning_rate": 4.147517514640154e-05,
"loss": 1.8816,
"step": 121000
},
{
"epoch": 0.51,
"learning_rate": 4.1439948597419725e-05,
"loss": 1.8634,
"step": 121500
},
{
"epoch": 0.52,
"learning_rate": 4.140472204843792e-05,
"loss": 1.8694,
"step": 122000
},
{
"epoch": 0.52,
"learning_rate": 4.1369495499456105e-05,
"loss": 1.8779,
"step": 122500
},
{
"epoch": 0.52,
"learning_rate": 4.133426895047429e-05,
"loss": 1.8669,
"step": 123000
},
{
"epoch": 0.52,
"learning_rate": 4.1299042401492486e-05,
"loss": 1.8656,
"step": 123500
},
{
"epoch": 0.52,
"learning_rate": 4.1263815852510666e-05,
"loss": 1.8387,
"step": 124000
},
{
"epoch": 0.53,
"learning_rate": 4.122858930352886e-05,
"loss": 1.8518,
"step": 124500
},
{
"epoch": 0.53,
"learning_rate": 4.1193362754547046e-05,
"loss": 1.8348,
"step": 125000
},
{
"epoch": 0.53,
"learning_rate": 4.115813620556523e-05,
"loss": 1.8483,
"step": 125500
},
{
"epoch": 0.53,
"learning_rate": 4.112290965658342e-05,
"loss": 1.853,
"step": 126000
},
{
"epoch": 0.53,
"learning_rate": 4.108768310760161e-05,
"loss": 1.8376,
"step": 126500
},
{
"epoch": 0.54,
"learning_rate": 4.1052456558619794e-05,
"loss": 1.8561,
"step": 127000
},
{
"epoch": 0.54,
"learning_rate": 4.101723000963799e-05,
"loss": 1.8326,
"step": 127500
},
{
"epoch": 0.54,
"learning_rate": 4.0982003460656175e-05,
"loss": 1.8506,
"step": 128000
},
{
"epoch": 0.54,
"learning_rate": 4.094677691167436e-05,
"loss": 1.8433,
"step": 128500
},
{
"epoch": 0.55,
"learning_rate": 4.0911550362692555e-05,
"loss": 1.8508,
"step": 129000
},
{
"epoch": 0.55,
"learning_rate": 4.0876323813710735e-05,
"loss": 1.8493,
"step": 129500
},
{
"epoch": 0.55,
"learning_rate": 4.084109726472893e-05,
"loss": 1.8302,
"step": 130000
},
{
"epoch": 0.55,
"learning_rate": 4.0805870715747116e-05,
"loss": 1.8398,
"step": 130500
},
{
"epoch": 0.55,
"learning_rate": 4.07706441667653e-05,
"loss": 1.8376,
"step": 131000
},
{
"epoch": 0.56,
"learning_rate": 4.073541761778349e-05,
"loss": 1.8452,
"step": 131500
},
{
"epoch": 0.56,
"learning_rate": 4.070019106880168e-05,
"loss": 1.8554,
"step": 132000
},
{
"epoch": 0.56,
"learning_rate": 4.0664964519819864e-05,
"loss": 1.8459,
"step": 132500
},
{
"epoch": 0.56,
"learning_rate": 4.062973797083806e-05,
"loss": 1.84,
"step": 133000
},
{
"epoch": 0.56,
"learning_rate": 4.0594511421856244e-05,
"loss": 1.8279,
"step": 133500
},
{
"epoch": 0.57,
"learning_rate": 4.055928487287443e-05,
"loss": 1.8303,
"step": 134000
},
{
"epoch": 0.57,
"learning_rate": 4.0524058323892625e-05,
"loss": 1.8323,
"step": 134500
},
{
"epoch": 0.57,
"learning_rate": 4.0488831774910805e-05,
"loss": 1.8017,
"step": 135000
},
{
"epoch": 0.57,
"learning_rate": 4.0453605225929e-05,
"loss": 1.8268,
"step": 135500
},
{
"epoch": 0.57,
"learning_rate": 4.0418378676947185e-05,
"loss": 1.8221,
"step": 136000
},
{
"epoch": 0.58,
"learning_rate": 4.038315212796537e-05,
"loss": 1.832,
"step": 136500
},
{
"epoch": 0.58,
"learning_rate": 4.034792557898356e-05,
"loss": 1.8366,
"step": 137000
},
{
"epoch": 0.58,
"learning_rate": 4.0312699030001746e-05,
"loss": 1.8313,
"step": 137500
},
{
"epoch": 0.58,
"learning_rate": 4.027747248101994e-05,
"loss": 1.8124,
"step": 138000
},
{
"epoch": 0.59,
"learning_rate": 4.024224593203813e-05,
"loss": 1.8144,
"step": 138500
},
{
"epoch": 0.59,
"learning_rate": 4.0207019383056314e-05,
"loss": 1.8164,
"step": 139000
},
{
"epoch": 0.59,
"learning_rate": 4.01717928340745e-05,
"loss": 1.8316,
"step": 139500
},
{
"epoch": 0.59,
"learning_rate": 4.0136566285092694e-05,
"loss": 1.8105,
"step": 140000
},
{
"epoch": 0.59,
"learning_rate": 4.0101339736110874e-05,
"loss": 1.8119,
"step": 140500
},
{
"epoch": 0.6,
"learning_rate": 4.006611318712907e-05,
"loss": 1.7914,
"step": 141000
},
{
"epoch": 0.6,
"learning_rate": 4.0030886638147255e-05,
"loss": 1.8251,
"step": 141500
},
{
"epoch": 0.6,
"learning_rate": 3.999566008916544e-05,
"loss": 1.8176,
"step": 142000
},
{
"epoch": 0.6,
"learning_rate": 3.996043354018363e-05,
"loss": 1.7962,
"step": 142500
},
{
"epoch": 0.6,
"learning_rate": 3.9925206991201816e-05,
"loss": 1.8147,
"step": 143000
},
{
"epoch": 0.61,
"learning_rate": 3.988998044222001e-05,
"loss": 1.8182,
"step": 143500
},
{
"epoch": 0.61,
"learning_rate": 3.9854753893238196e-05,
"loss": 1.7926,
"step": 144000
},
{
"epoch": 0.61,
"learning_rate": 3.981952734425638e-05,
"loss": 1.8024,
"step": 144500
},
{
"epoch": 0.61,
"learning_rate": 3.978430079527457e-05,
"loss": 1.7953,
"step": 145000
},
{
"epoch": 0.62,
"learning_rate": 3.9749074246292764e-05,
"loss": 1.7986,
"step": 145500
},
{
"epoch": 0.62,
"learning_rate": 3.9713847697310944e-05,
"loss": 1.7843,
"step": 146000
},
{
"epoch": 0.62,
"learning_rate": 3.967862114832914e-05,
"loss": 1.8076,
"step": 146500
},
{
"epoch": 0.62,
"learning_rate": 3.9643394599347324e-05,
"loss": 1.8062,
"step": 147000
},
{
"epoch": 0.62,
"learning_rate": 3.960816805036551e-05,
"loss": 1.7963,
"step": 147500
},
{
"epoch": 0.63,
"learning_rate": 3.9572941501383705e-05,
"loss": 1.7824,
"step": 148000
},
{
"epoch": 0.63,
"learning_rate": 3.953771495240189e-05,
"loss": 1.7936,
"step": 148500
},
{
"epoch": 0.63,
"learning_rate": 3.950248840342008e-05,
"loss": 1.7937,
"step": 149000
},
{
"epoch": 0.63,
"learning_rate": 3.9467261854438266e-05,
"loss": 1.7844,
"step": 149500
},
{
"epoch": 0.63,
"learning_rate": 3.943203530545645e-05,
"loss": 1.7965,
"step": 150000
},
{
"epoch": 0.64,
"learning_rate": 3.939680875647464e-05,
"loss": 1.7957,
"step": 150500
},
{
"epoch": 0.64,
"learning_rate": 3.936158220749283e-05,
"loss": 1.7802,
"step": 151000
},
{
"epoch": 0.64,
"learning_rate": 3.932635565851101e-05,
"loss": 1.7885,
"step": 151500
},
{
"epoch": 0.64,
"learning_rate": 3.929112910952921e-05,
"loss": 1.7663,
"step": 152000
},
{
"epoch": 0.64,
"learning_rate": 3.9255902560547394e-05,
"loss": 1.7824,
"step": 152500
},
{
"epoch": 0.65,
"learning_rate": 3.922067601156558e-05,
"loss": 1.7829,
"step": 153000
},
{
"epoch": 0.65,
"learning_rate": 3.9185449462583774e-05,
"loss": 1.7797,
"step": 153500
},
{
"epoch": 0.65,
"learning_rate": 3.915022291360196e-05,
"loss": 1.7706,
"step": 154000
},
{
"epoch": 0.65,
"learning_rate": 3.911499636462015e-05,
"loss": 1.8029,
"step": 154500
},
{
"epoch": 0.66,
"learning_rate": 3.9079769815638335e-05,
"loss": 1.772,
"step": 155000
},
{
"epoch": 0.66,
"learning_rate": 3.904454326665652e-05,
"loss": 1.7736,
"step": 155500
},
{
"epoch": 0.66,
"learning_rate": 3.900931671767471e-05,
"loss": 1.7655,
"step": 156000
},
{
"epoch": 0.66,
"learning_rate": 3.89740901686929e-05,
"loss": 1.7748,
"step": 156500
},
{
"epoch": 0.66,
"learning_rate": 3.893886361971108e-05,
"loss": 1.791,
"step": 157000
},
{
"epoch": 0.67,
"learning_rate": 3.8903637070729276e-05,
"loss": 1.7676,
"step": 157500
},
{
"epoch": 0.67,
"learning_rate": 3.886841052174746e-05,
"loss": 1.7681,
"step": 158000
},
{
"epoch": 0.67,
"learning_rate": 3.883318397276565e-05,
"loss": 1.7815,
"step": 158500
},
{
"epoch": 0.67,
"learning_rate": 3.8797957423783844e-05,
"loss": 1.7793,
"step": 159000
},
{
"epoch": 0.67,
"learning_rate": 3.876273087480203e-05,
"loss": 1.7727,
"step": 159500
},
{
"epoch": 0.68,
"learning_rate": 3.872750432582022e-05,
"loss": 1.7707,
"step": 160000
},
{
"epoch": 0.68,
"learning_rate": 3.8692277776838405e-05,
"loss": 1.7681,
"step": 160500
},
{
"epoch": 0.68,
"learning_rate": 3.865705122785659e-05,
"loss": 1.7605,
"step": 161000
},
{
"epoch": 0.68,
"learning_rate": 3.862182467887478e-05,
"loss": 1.7668,
"step": 161500
},
{
"epoch": 0.68,
"learning_rate": 3.858659812989297e-05,
"loss": 1.7638,
"step": 162000
},
{
"epoch": 0.69,
"learning_rate": 3.855137158091115e-05,
"loss": 1.7543,
"step": 162500
},
{
"epoch": 0.69,
"learning_rate": 3.8516145031929346e-05,
"loss": 1.7586,
"step": 163000
},
{
"epoch": 0.69,
"learning_rate": 3.848091848294754e-05,
"loss": 1.7403,
"step": 163500
},
{
"epoch": 0.69,
"learning_rate": 3.844569193396572e-05,
"loss": 1.769,
"step": 164000
},
{
"epoch": 0.7,
"learning_rate": 3.841046538498391e-05,
"loss": 1.7605,
"step": 164500
},
{
"epoch": 0.7,
"learning_rate": 3.83752388360021e-05,
"loss": 1.7572,
"step": 165000
},
{
"epoch": 0.7,
"learning_rate": 3.834001228702029e-05,
"loss": 1.7739,
"step": 165500
},
{
"epoch": 0.7,
"learning_rate": 3.8304785738038474e-05,
"loss": 1.7649,
"step": 166000
},
{
"epoch": 0.7,
"learning_rate": 3.826955918905666e-05,
"loss": 1.748,
"step": 166500
},
{
"epoch": 0.71,
"learning_rate": 3.823433264007485e-05,
"loss": 1.7544,
"step": 167000
},
{
"epoch": 0.71,
"learning_rate": 3.819910609109304e-05,
"loss": 1.7466,
"step": 167500
},
{
"epoch": 0.71,
"learning_rate": 3.816387954211122e-05,
"loss": 1.7447,
"step": 168000
},
{
"epoch": 0.71,
"learning_rate": 3.8128652993129415e-05,
"loss": 1.7424,
"step": 168500
},
{
"epoch": 0.71,
"learning_rate": 3.809342644414761e-05,
"loss": 1.7502,
"step": 169000
},
{
"epoch": 0.72,
"learning_rate": 3.805819989516579e-05,
"loss": 1.7462,
"step": 169500
},
{
"epoch": 0.72,
"learning_rate": 3.802297334618398e-05,
"loss": 1.7351,
"step": 170000
},
{
"epoch": 0.72,
"learning_rate": 3.798774679720217e-05,
"loss": 1.7531,
"step": 170500
},
{
"epoch": 0.72,
"learning_rate": 3.795252024822036e-05,
"loss": 1.7257,
"step": 171000
},
{
"epoch": 0.72,
"learning_rate": 3.7917293699238544e-05,
"loss": 1.7429,
"step": 171500
},
{
"epoch": 0.73,
"learning_rate": 3.788206715025673e-05,
"loss": 1.7488,
"step": 172000
},
{
"epoch": 0.73,
"learning_rate": 3.784684060127492e-05,
"loss": 1.7516,
"step": 172500
},
{
"epoch": 0.73,
"learning_rate": 3.781161405229311e-05,
"loss": 1.741,
"step": 173000
},
{
"epoch": 0.73,
"learning_rate": 3.77763875033113e-05,
"loss": 1.7334,
"step": 173500
},
{
"epoch": 0.74,
"learning_rate": 3.7741160954329485e-05,
"loss": 1.7122,
"step": 174000
},
{
"epoch": 0.74,
"learning_rate": 3.770593440534768e-05,
"loss": 1.7641,
"step": 174500
},
{
"epoch": 0.74,
"learning_rate": 3.767070785636586e-05,
"loss": 1.7368,
"step": 175000
},
{
"epoch": 0.74,
"learning_rate": 3.763548130738405e-05,
"loss": 1.742,
"step": 175500
},
{
"epoch": 0.74,
"learning_rate": 3.760025475840224e-05,
"loss": 1.738,
"step": 176000
},
{
"epoch": 0.75,
"learning_rate": 3.7565028209420426e-05,
"loss": 1.7385,
"step": 176500
},
{
"epoch": 0.75,
"learning_rate": 3.752980166043861e-05,
"loss": 1.7339,
"step": 177000
},
{
"epoch": 0.75,
"learning_rate": 3.749457511145681e-05,
"loss": 1.7203,
"step": 177500
},
{
"epoch": 0.75,
"learning_rate": 3.745934856247499e-05,
"loss": 1.7333,
"step": 178000
},
{
"epoch": 0.75,
"learning_rate": 3.742412201349318e-05,
"loss": 1.734,
"step": 178500
},
{
"epoch": 0.76,
"learning_rate": 3.738889546451137e-05,
"loss": 1.736,
"step": 179000
},
{
"epoch": 0.76,
"learning_rate": 3.7353668915529554e-05,
"loss": 1.7323,
"step": 179500
},
{
"epoch": 0.76,
"learning_rate": 3.731844236654775e-05,
"loss": 1.7213,
"step": 180000
},
{
"epoch": 0.76,
"learning_rate": 3.728321581756593e-05,
"loss": 1.7312,
"step": 180500
},
{
"epoch": 0.77,
"learning_rate": 3.724798926858412e-05,
"loss": 1.7008,
"step": 181000
},
{
"epoch": 0.77,
"learning_rate": 3.721276271960231e-05,
"loss": 1.7275,
"step": 181500
},
{
"epoch": 0.77,
"learning_rate": 3.7177536170620496e-05,
"loss": 1.7313,
"step": 182000
},
{
"epoch": 0.77,
"learning_rate": 3.714230962163868e-05,
"loss": 1.7141,
"step": 182500
},
{
"epoch": 0.77,
"learning_rate": 3.7107083072656876e-05,
"loss": 1.7279,
"step": 183000
},
{
"epoch": 0.78,
"learning_rate": 3.7071856523675056e-05,
"loss": 1.7198,
"step": 183500
},
{
"epoch": 0.78,
"learning_rate": 3.703662997469325e-05,
"loss": 1.696,
"step": 184000
},
{
"epoch": 0.78,
"learning_rate": 3.700140342571144e-05,
"loss": 1.714,
"step": 184500
},
{
"epoch": 0.78,
"learning_rate": 3.6966176876729624e-05,
"loss": 1.7134,
"step": 185000
},
{
"epoch": 0.78,
"learning_rate": 3.693095032774782e-05,
"loss": 1.7247,
"step": 185500
},
{
"epoch": 0.79,
"learning_rate": 3.6895723778766e-05,
"loss": 1.7243,
"step": 186000
},
{
"epoch": 0.79,
"learning_rate": 3.686049722978419e-05,
"loss": 1.7078,
"step": 186500
},
{
"epoch": 0.79,
"learning_rate": 3.682527068080238e-05,
"loss": 1.7228,
"step": 187000
},
{
"epoch": 0.79,
"learning_rate": 3.6790044131820565e-05,
"loss": 1.715,
"step": 187500
},
{
"epoch": 0.79,
"learning_rate": 3.675481758283875e-05,
"loss": 1.6951,
"step": 188000
},
{
"epoch": 0.8,
"learning_rate": 3.6719591033856946e-05,
"loss": 1.6982,
"step": 188500
},
{
"epoch": 0.8,
"learning_rate": 3.668436448487513e-05,
"loss": 1.7091,
"step": 189000
},
{
"epoch": 0.8,
"learning_rate": 3.664913793589332e-05,
"loss": 1.712,
"step": 189500
},
{
"epoch": 0.8,
"learning_rate": 3.6613911386911506e-05,
"loss": 1.7082,
"step": 190000
},
{
"epoch": 0.81,
"learning_rate": 3.657868483792969e-05,
"loss": 1.711,
"step": 190500
},
{
"epoch": 0.81,
"learning_rate": 3.654345828894789e-05,
"loss": 1.7219,
"step": 191000
},
{
"epoch": 0.81,
"learning_rate": 3.650823173996607e-05,
"loss": 1.7177,
"step": 191500
},
{
"epoch": 0.81,
"learning_rate": 3.647300519098426e-05,
"loss": 1.7017,
"step": 192000
},
{
"epoch": 0.81,
"learning_rate": 3.643777864200245e-05,
"loss": 1.7206,
"step": 192500
},
{
"epoch": 0.82,
"learning_rate": 3.6402552093020635e-05,
"loss": 1.7188,
"step": 193000
},
{
"epoch": 0.82,
"learning_rate": 3.636732554403882e-05,
"loss": 1.6969,
"step": 193500
},
{
"epoch": 0.82,
"learning_rate": 3.6332098995057015e-05,
"loss": 1.7136,
"step": 194000
},
{
"epoch": 0.82,
"learning_rate": 3.62968724460752e-05,
"loss": 1.7186,
"step": 194500
},
{
"epoch": 0.82,
"learning_rate": 3.626164589709339e-05,
"loss": 1.7099,
"step": 195000
},
{
"epoch": 0.83,
"learning_rate": 3.6226419348111576e-05,
"loss": 1.7033,
"step": 195500
},
{
"epoch": 0.83,
"learning_rate": 3.619119279912976e-05,
"loss": 1.6896,
"step": 196000
},
{
"epoch": 0.83,
"learning_rate": 3.6155966250147956e-05,
"loss": 1.7101,
"step": 196500
},
{
"epoch": 0.83,
"learning_rate": 3.6120739701166137e-05,
"loss": 1.7098,
"step": 197000
},
{
"epoch": 0.83,
"learning_rate": 3.608551315218433e-05,
"loss": 1.6815,
"step": 197500
},
{
"epoch": 0.84,
"learning_rate": 3.605028660320252e-05,
"loss": 1.6913,
"step": 198000
},
{
"epoch": 0.84,
"learning_rate": 3.6015060054220704e-05,
"loss": 1.7059,
"step": 198500
},
{
"epoch": 0.84,
"learning_rate": 3.59798335052389e-05,
"loss": 1.6873,
"step": 199000
},
{
"epoch": 0.84,
"learning_rate": 3.5944606956257085e-05,
"loss": 1.6798,
"step": 199500
},
{
"epoch": 0.85,
"learning_rate": 3.590938040727527e-05,
"loss": 1.6992,
"step": 200000
},
{
"epoch": 0.85,
"learning_rate": 3.587415385829346e-05,
"loss": 1.6907,
"step": 200500
},
{
"epoch": 0.85,
"learning_rate": 3.5838927309311645e-05,
"loss": 1.6852,
"step": 201000
},
{
"epoch": 0.85,
"learning_rate": 3.580370076032983e-05,
"loss": 1.6826,
"step": 201500
},
{
"epoch": 0.85,
"learning_rate": 3.5768474211348026e-05,
"loss": 1.6713,
"step": 202000
},
{
"epoch": 0.86,
"learning_rate": 3.5733247662366206e-05,
"loss": 1.692,
"step": 202500
},
{
"epoch": 0.86,
"learning_rate": 3.56980211133844e-05,
"loss": 1.7061,
"step": 203000
},
{
"epoch": 0.86,
"learning_rate": 3.566279456440259e-05,
"loss": 1.6876,
"step": 203500
},
{
"epoch": 0.86,
"learning_rate": 3.5627568015420774e-05,
"loss": 1.6913,
"step": 204000
},
{
"epoch": 0.86,
"learning_rate": 3.559234146643897e-05,
"loss": 1.6888,
"step": 204500
},
{
"epoch": 0.87,
"learning_rate": 3.5557114917457154e-05,
"loss": 1.6828,
"step": 205000
},
{
"epoch": 0.87,
"learning_rate": 3.552188836847534e-05,
"loss": 1.6915,
"step": 205500
},
{
"epoch": 0.87,
"learning_rate": 3.548666181949353e-05,
"loss": 1.6778,
"step": 206000
},
{
"epoch": 0.87,
"learning_rate": 3.5451435270511715e-05,
"loss": 1.6868,
"step": 206500
},
{
"epoch": 0.88,
"learning_rate": 3.54162087215299e-05,
"loss": 1.6895,
"step": 207000
},
{
"epoch": 0.88,
"learning_rate": 3.5380982172548095e-05,
"loss": 1.6719,
"step": 207500
},
{
"epoch": 0.88,
"learning_rate": 3.534575562356628e-05,
"loss": 1.6942,
"step": 208000
},
{
"epoch": 0.88,
"learning_rate": 3.531052907458447e-05,
"loss": 1.6821,
"step": 208500
},
{
"epoch": 0.88,
"learning_rate": 3.5275302525602656e-05,
"loss": 1.6694,
"step": 209000
},
{
"epoch": 0.89,
"learning_rate": 3.524007597662084e-05,
"loss": 1.6886,
"step": 209500
},
{
"epoch": 0.89,
"learning_rate": 3.520484942763904e-05,
"loss": 1.6635,
"step": 210000
},
{
"epoch": 0.89,
"learning_rate": 3.5169622878657224e-05,
"loss": 1.6591,
"step": 210500
},
{
"epoch": 0.89,
"learning_rate": 3.513439632967541e-05,
"loss": 1.6875,
"step": 211000
},
{
"epoch": 0.89,
"learning_rate": 3.50991697806936e-05,
"loss": 1.66,
"step": 211500
},
{
"epoch": 0.9,
"learning_rate": 3.506394323171179e-05,
"loss": 1.6815,
"step": 212000
},
{
"epoch": 0.9,
"learning_rate": 3.502871668272997e-05,
"loss": 1.6714,
"step": 212500
},
{
"epoch": 0.9,
"learning_rate": 3.4993490133748165e-05,
"loss": 1.6703,
"step": 213000
},
{
"epoch": 0.9,
"learning_rate": 3.495826358476635e-05,
"loss": 1.6772,
"step": 213500
},
{
"epoch": 0.9,
"learning_rate": 3.492303703578454e-05,
"loss": 1.6698,
"step": 214000
},
{
"epoch": 0.91,
"learning_rate": 3.488781048680273e-05,
"loss": 1.6638,
"step": 214500
},
{
"epoch": 0.91,
"learning_rate": 3.485258393782091e-05,
"loss": 1.6613,
"step": 215000
},
{
"epoch": 0.91,
"learning_rate": 3.4817357388839106e-05,
"loss": 1.6832,
"step": 215500
},
{
"epoch": 0.91,
"learning_rate": 3.478213083985729e-05,
"loss": 1.6443,
"step": 216000
},
{
"epoch": 0.92,
"learning_rate": 3.474690429087548e-05,
"loss": 1.6696,
"step": 216500
},
{
"epoch": 0.92,
"learning_rate": 3.471167774189367e-05,
"loss": 1.6726,
"step": 217000
},
{
"epoch": 0.92,
"learning_rate": 3.467645119291186e-05,
"loss": 1.6643,
"step": 217500
},
{
"epoch": 0.92,
"learning_rate": 3.464122464393004e-05,
"loss": 1.6555,
"step": 218000
},
{
"epoch": 0.92,
"learning_rate": 3.4605998094948234e-05,
"loss": 1.6469,
"step": 218500
},
{
"epoch": 0.93,
"learning_rate": 3.457077154596642e-05,
"loss": 1.6534,
"step": 219000
},
{
"epoch": 0.93,
"learning_rate": 3.453554499698461e-05,
"loss": 1.6406,
"step": 219500
},
{
"epoch": 0.93,
"learning_rate": 3.45003184480028e-05,
"loss": 1.6616,
"step": 220000
},
{
"epoch": 0.93,
"learning_rate": 3.446509189902098e-05,
"loss": 1.6385,
"step": 220500
},
{
"epoch": 0.93,
"learning_rate": 3.4429865350039176e-05,
"loss": 1.6491,
"step": 221000
},
{
"epoch": 0.94,
"learning_rate": 3.439463880105736e-05,
"loss": 1.6511,
"step": 221500
},
{
"epoch": 0.94,
"learning_rate": 3.435941225207555e-05,
"loss": 1.6546,
"step": 222000
},
{
"epoch": 0.94,
"learning_rate": 3.4324185703093736e-05,
"loss": 1.6623,
"step": 222500
},
{
"epoch": 0.94,
"learning_rate": 3.428895915411193e-05,
"loss": 1.6536,
"step": 223000
},
{
"epoch": 0.94,
"learning_rate": 3.425373260513011e-05,
"loss": 1.6487,
"step": 223500
},
{
"epoch": 0.95,
"learning_rate": 3.4218506056148304e-05,
"loss": 1.6546,
"step": 224000
},
{
"epoch": 0.95,
"learning_rate": 3.418327950716649e-05,
"loss": 1.6564,
"step": 224500
},
{
"epoch": 0.95,
"learning_rate": 3.414805295818468e-05,
"loss": 1.655,
"step": 225000
},
{
"epoch": 0.95,
"learning_rate": 3.411282640920287e-05,
"loss": 1.6562,
"step": 225500
},
{
"epoch": 0.96,
"learning_rate": 3.407759986022105e-05,
"loss": 1.645,
"step": 226000
},
{
"epoch": 0.96,
"learning_rate": 3.4042373311239245e-05,
"loss": 1.6406,
"step": 226500
},
{
"epoch": 0.96,
"learning_rate": 3.400714676225743e-05,
"loss": 1.6181,
"step": 227000
},
{
"epoch": 0.96,
"learning_rate": 3.397192021327562e-05,
"loss": 1.648,
"step": 227500
},
{
"epoch": 0.96,
"learning_rate": 3.3936693664293806e-05,
"loss": 1.6429,
"step": 228000
},
{
"epoch": 0.97,
"learning_rate": 3.3901467115312e-05,
"loss": 1.6285,
"step": 228500
},
{
"epoch": 0.97,
"learning_rate": 3.386624056633018e-05,
"loss": 1.6624,
"step": 229000
},
{
"epoch": 0.97,
"learning_rate": 3.383101401734837e-05,
"loss": 1.6395,
"step": 229500
},
{
"epoch": 0.97,
"learning_rate": 3.379578746836656e-05,
"loss": 1.6601,
"step": 230000
},
{
"epoch": 0.97,
"learning_rate": 3.376056091938475e-05,
"loss": 1.6492,
"step": 230500
},
{
"epoch": 0.98,
"learning_rate": 3.372533437040294e-05,
"loss": 1.6444,
"step": 231000
},
{
"epoch": 0.98,
"learning_rate": 3.369010782142112e-05,
"loss": 1.6685,
"step": 231500
},
{
"epoch": 0.98,
"learning_rate": 3.3654881272439315e-05,
"loss": 1.644,
"step": 232000
},
{
"epoch": 0.98,
"learning_rate": 3.36196547234575e-05,
"loss": 1.6401,
"step": 232500
},
{
"epoch": 0.98,
"learning_rate": 3.358442817447569e-05,
"loss": 1.6424,
"step": 233000
},
{
"epoch": 0.99,
"learning_rate": 3.3549201625493875e-05,
"loss": 1.6277,
"step": 233500
},
{
"epoch": 0.99,
"learning_rate": 3.351397507651207e-05,
"loss": 1.6495,
"step": 234000
},
{
"epoch": 0.99,
"learning_rate": 3.347874852753025e-05,
"loss": 1.6405,
"step": 234500
},
{
"epoch": 0.99,
"learning_rate": 3.344352197854844e-05,
"loss": 1.6408,
"step": 235000
},
{
"epoch": 1.0,
"learning_rate": 3.340829542956663e-05,
"loss": 1.6469,
"step": 235500
},
{
"epoch": 1.0,
"learning_rate": 3.3373068880584817e-05,
"loss": 1.6347,
"step": 236000
},
{
"epoch": 1.0,
"learning_rate": 3.333784233160301e-05,
"loss": 1.6348,
"step": 236500
},
{
"epoch": 1.0,
"learning_rate": 3.330261578262119e-05,
"loss": 1.6232,
"step": 237000
},
{
"epoch": 1.0,
"learning_rate": 3.3267389233639384e-05,
"loss": 1.6322,
"step": 237500
},
{
"epoch": 1.01,
"learning_rate": 3.323216268465757e-05,
"loss": 1.6263,
"step": 238000
},
{
"epoch": 1.01,
"learning_rate": 3.319693613567576e-05,
"loss": 1.6207,
"step": 238500
},
{
"epoch": 1.01,
"learning_rate": 3.3161709586693945e-05,
"loss": 1.6282,
"step": 239000
},
{
"epoch": 1.01,
"learning_rate": 3.312648303771214e-05,
"loss": 1.6293,
"step": 239500
},
{
"epoch": 1.01,
"learning_rate": 3.3091256488730325e-05,
"loss": 1.6323,
"step": 240000
},
{
"epoch": 1.02,
"learning_rate": 3.305602993974851e-05,
"loss": 1.6343,
"step": 240500
},
{
"epoch": 1.02,
"learning_rate": 3.30208033907667e-05,
"loss": 1.6286,
"step": 241000
},
{
"epoch": 1.02,
"learning_rate": 3.2985576841784886e-05,
"loss": 1.6234,
"step": 241500
},
{
"epoch": 1.02,
"learning_rate": 3.295035029280308e-05,
"loss": 1.6215,
"step": 242000
},
{
"epoch": 1.03,
"learning_rate": 3.291512374382127e-05,
"loss": 1.6327,
"step": 242500
},
{
"epoch": 1.03,
"learning_rate": 3.2879897194839454e-05,
"loss": 1.6202,
"step": 243000
},
{
"epoch": 1.03,
"learning_rate": 3.284467064585764e-05,
"loss": 1.6296,
"step": 243500
},
{
"epoch": 1.03,
"learning_rate": 3.280944409687583e-05,
"loss": 1.6309,
"step": 244000
},
{
"epoch": 1.03,
"learning_rate": 3.2774217547894014e-05,
"loss": 1.6442,
"step": 244500
},
{
"epoch": 1.04,
"learning_rate": 3.273899099891221e-05,
"loss": 1.6292,
"step": 245000
},
{
"epoch": 1.04,
"learning_rate": 3.2703764449930395e-05,
"loss": 1.6409,
"step": 245500
},
{
"epoch": 1.04,
"learning_rate": 3.266853790094858e-05,
"loss": 1.6236,
"step": 246000
},
{
"epoch": 1.04,
"learning_rate": 3.2633311351966775e-05,
"loss": 1.6235,
"step": 246500
},
{
"epoch": 1.04,
"learning_rate": 3.2598084802984956e-05,
"loss": 1.6319,
"step": 247000
},
{
"epoch": 1.05,
"learning_rate": 3.256285825400315e-05,
"loss": 1.6215,
"step": 247500
},
{
"epoch": 1.05,
"learning_rate": 3.2527631705021336e-05,
"loss": 1.6065,
"step": 248000
},
{
"epoch": 1.05,
"learning_rate": 3.249240515603952e-05,
"loss": 1.6236,
"step": 248500
},
{
"epoch": 1.05,
"learning_rate": 3.245717860705771e-05,
"loss": 1.5967,
"step": 249000
},
{
"epoch": 1.05,
"learning_rate": 3.24219520580759e-05,
"loss": 1.6094,
"step": 249500
},
{
"epoch": 1.06,
"learning_rate": 3.238672550909409e-05,
"loss": 1.6157,
"step": 250000
},
{
"epoch": 1.06,
"learning_rate": 3.235149896011228e-05,
"loss": 1.6229,
"step": 250500
},
{
"epoch": 1.06,
"learning_rate": 3.2316272411130464e-05,
"loss": 1.6157,
"step": 251000
},
{
"epoch": 1.06,
"learning_rate": 3.228104586214865e-05,
"loss": 1.6239,
"step": 251500
},
{
"epoch": 1.07,
"learning_rate": 3.2245819313166845e-05,
"loss": 1.6115,
"step": 252000
},
{
"epoch": 1.07,
"learning_rate": 3.2210592764185025e-05,
"loss": 1.613,
"step": 252500
},
{
"epoch": 1.07,
"learning_rate": 3.217536621520322e-05,
"loss": 1.6138,
"step": 253000
},
{
"epoch": 1.07,
"learning_rate": 3.2140139666221406e-05,
"loss": 1.6095,
"step": 253500
},
{
"epoch": 1.07,
"learning_rate": 3.210491311723959e-05,
"loss": 1.6144,
"step": 254000
},
{
"epoch": 1.08,
"learning_rate": 3.206968656825778e-05,
"loss": 1.6083,
"step": 254500
},
{
"epoch": 1.08,
"learning_rate": 3.2034460019275966e-05,
"loss": 1.6129,
"step": 255000
},
{
"epoch": 1.08,
"learning_rate": 3.199923347029416e-05,
"loss": 1.6139,
"step": 255500
},
{
"epoch": 1.08,
"learning_rate": 3.196400692131235e-05,
"loss": 1.612,
"step": 256000
},
{
"epoch": 1.08,
"learning_rate": 3.1928780372330534e-05,
"loss": 1.6092,
"step": 256500
},
{
"epoch": 1.09,
"learning_rate": 3.189355382334872e-05,
"loss": 1.6196,
"step": 257000
},
{
"epoch": 1.09,
"learning_rate": 3.1858327274366914e-05,
"loss": 1.6083,
"step": 257500
},
{
"epoch": 1.09,
"learning_rate": 3.1823100725385094e-05,
"loss": 1.6137,
"step": 258000
},
{
"epoch": 1.09,
"learning_rate": 3.178787417640329e-05,
"loss": 1.6011,
"step": 258500
},
{
"epoch": 1.09,
"learning_rate": 3.1752647627421475e-05,
"loss": 1.5961,
"step": 259000
},
{
"epoch": 1.1,
"learning_rate": 3.171742107843966e-05,
"loss": 1.5967,
"step": 259500
},
{
"epoch": 1.1,
"learning_rate": 3.168219452945785e-05,
"loss": 1.6007,
"step": 260000
},
{
"epoch": 1.1,
"learning_rate": 3.1646967980476036e-05,
"loss": 1.5962,
"step": 260500
},
{
"epoch": 1.1,
"learning_rate": 3.161174143149423e-05,
"loss": 1.5978,
"step": 261000
},
{
"epoch": 1.11,
"learning_rate": 3.1576514882512416e-05,
"loss": 1.6136,
"step": 261500
},
{
"epoch": 1.11,
"learning_rate": 3.15412883335306e-05,
"loss": 1.599,
"step": 262000
},
{
"epoch": 1.11,
"learning_rate": 3.150606178454879e-05,
"loss": 1.6165,
"step": 262500
},
{
"epoch": 1.11,
"learning_rate": 3.1470835235566984e-05,
"loss": 1.6045,
"step": 263000
},
{
"epoch": 1.11,
"learning_rate": 3.1435608686585164e-05,
"loss": 1.6059,
"step": 263500
},
{
"epoch": 1.12,
"learning_rate": 3.140038213760336e-05,
"loss": 1.6107,
"step": 264000
},
{
"epoch": 1.12,
"learning_rate": 3.1365155588621545e-05,
"loss": 1.6015,
"step": 264500
},
{
"epoch": 1.12,
"learning_rate": 3.132992903963973e-05,
"loss": 1.5839,
"step": 265000
},
{
"epoch": 1.12,
"learning_rate": 3.1294702490657925e-05,
"loss": 1.6052,
"step": 265500
},
{
"epoch": 1.12,
"learning_rate": 3.1259475941676105e-05,
"loss": 1.6105,
"step": 266000
},
{
"epoch": 1.13,
"learning_rate": 3.12242493926943e-05,
"loss": 1.5993,
"step": 266500
},
{
"epoch": 1.13,
"learning_rate": 3.1189022843712486e-05,
"loss": 1.5978,
"step": 267000
},
{
"epoch": 1.13,
"learning_rate": 3.115379629473067e-05,
"loss": 1.5906,
"step": 267500
},
{
"epoch": 1.13,
"learning_rate": 3.111856974574886e-05,
"loss": 1.6163,
"step": 268000
},
{
"epoch": 1.13,
"learning_rate": 3.108334319676705e-05,
"loss": 1.5865,
"step": 268500
},
{
"epoch": 1.14,
"learning_rate": 3.1048116647785233e-05,
"loss": 1.5982,
"step": 269000
},
{
"epoch": 1.14,
"learning_rate": 3.101289009880343e-05,
"loss": 1.5857,
"step": 269500
},
{
"epoch": 1.14,
"learning_rate": 3.0977663549821614e-05,
"loss": 1.5938,
"step": 270000
},
{
"epoch": 1.14,
"learning_rate": 3.09424370008398e-05,
"loss": 1.5846,
"step": 270500
},
{
"epoch": 1.15,
"learning_rate": 3.0907210451857995e-05,
"loss": 1.5827,
"step": 271000
},
{
"epoch": 1.15,
"learning_rate": 3.087198390287618e-05,
"loss": 1.585,
"step": 271500
},
{
"epoch": 1.15,
"learning_rate": 3.083675735389437e-05,
"loss": 1.6006,
"step": 272000
},
{
"epoch": 1.15,
"learning_rate": 3.0801530804912555e-05,
"loss": 1.5951,
"step": 272500
},
{
"epoch": 1.15,
"learning_rate": 3.076630425593074e-05,
"loss": 1.5691,
"step": 273000
},
{
"epoch": 1.16,
"learning_rate": 3.073107770694893e-05,
"loss": 1.6121,
"step": 273500
},
{
"epoch": 1.16,
"learning_rate": 3.069585115796712e-05,
"loss": 1.5855,
"step": 274000
},
{
"epoch": 1.16,
"learning_rate": 3.06606246089853e-05,
"loss": 1.5982,
"step": 274500
},
{
"epoch": 1.16,
"learning_rate": 3.0625398060003497e-05,
"loss": 1.5991,
"step": 275000
},
{
"epoch": 1.16,
"learning_rate": 3.0590171511021684e-05,
"loss": 1.5902,
"step": 275500
},
{
"epoch": 1.17,
"learning_rate": 3.055494496203987e-05,
"loss": 1.5886,
"step": 276000
},
{
"epoch": 1.17,
"learning_rate": 3.0519718413058064e-05,
"loss": 1.578,
"step": 276500
},
{
"epoch": 1.17,
"learning_rate": 3.0484491864076248e-05,
"loss": 1.584,
"step": 277000
},
{
"epoch": 1.17,
"learning_rate": 3.0449265315094438e-05,
"loss": 1.5859,
"step": 277500
},
{
"epoch": 1.18,
"learning_rate": 3.0414038766112625e-05,
"loss": 1.5647,
"step": 278000
},
{
"epoch": 1.18,
"learning_rate": 3.0378812217130815e-05,
"loss": 1.5826,
"step": 278500
},
{
"epoch": 1.18,
"learning_rate": 3.0343585668149e-05,
"loss": 1.5689,
"step": 279000
},
{
"epoch": 1.18,
"learning_rate": 3.030835911916719e-05,
"loss": 1.5738,
"step": 279500
},
{
"epoch": 1.18,
"learning_rate": 3.0273132570185376e-05,
"loss": 1.5774,
"step": 280000
},
{
"epoch": 1.19,
"learning_rate": 3.0237906021203566e-05,
"loss": 1.5727,
"step": 280500
},
{
"epoch": 1.19,
"learning_rate": 3.0202679472221756e-05,
"loss": 1.5823,
"step": 281000
},
{
"epoch": 1.19,
"learning_rate": 3.0167452923239943e-05,
"loss": 1.59,
"step": 281500
},
{
"epoch": 1.19,
"learning_rate": 3.0132226374258134e-05,
"loss": 1.5751,
"step": 282000
},
{
"epoch": 1.19,
"learning_rate": 3.0096999825276317e-05,
"loss": 1.5672,
"step": 282500
},
{
"epoch": 1.2,
"learning_rate": 3.0061773276294507e-05,
"loss": 1.571,
"step": 283000
},
{
"epoch": 1.2,
"learning_rate": 3.0026546727312694e-05,
"loss": 1.5825,
"step": 283500
},
{
"epoch": 1.2,
"learning_rate": 2.9991320178330885e-05,
"loss": 1.5836,
"step": 284000
},
{
"epoch": 1.2,
"learning_rate": 2.9956093629349068e-05,
"loss": 1.5874,
"step": 284500
},
{
"epoch": 1.2,
"learning_rate": 2.992086708036726e-05,
"loss": 1.5686,
"step": 285000
},
{
"epoch": 1.21,
"learning_rate": 2.9885640531385445e-05,
"loss": 1.5852,
"step": 285500
},
{
"epoch": 1.21,
"learning_rate": 2.9850413982403636e-05,
"loss": 1.5594,
"step": 286000
},
{
"epoch": 1.21,
"learning_rate": 2.9815187433421826e-05,
"loss": 1.5611,
"step": 286500
},
{
"epoch": 1.21,
"learning_rate": 2.9779960884440013e-05,
"loss": 1.5729,
"step": 287000
},
{
"epoch": 1.22,
"learning_rate": 2.9744734335458203e-05,
"loss": 1.5689,
"step": 287500
},
{
"epoch": 1.22,
"learning_rate": 2.9709507786476387e-05,
"loss": 1.5816,
"step": 288000
},
{
"epoch": 1.22,
"learning_rate": 2.9674281237494577e-05,
"loss": 1.5728,
"step": 288500
},
{
"epoch": 1.22,
"learning_rate": 2.9639054688512764e-05,
"loss": 1.5673,
"step": 289000
},
{
"epoch": 1.22,
"learning_rate": 2.9603828139530954e-05,
"loss": 1.5657,
"step": 289500
},
{
"epoch": 1.23,
"learning_rate": 2.9568601590549138e-05,
"loss": 1.564,
"step": 290000
},
{
"epoch": 1.23,
"learning_rate": 2.9533375041567328e-05,
"loss": 1.5606,
"step": 290500
},
{
"epoch": 1.23,
"learning_rate": 2.949814849258552e-05,
"loss": 1.5582,
"step": 291000
},
{
"epoch": 1.23,
"learning_rate": 2.9462921943603705e-05,
"loss": 1.5718,
"step": 291500
},
{
"epoch": 1.23,
"learning_rate": 2.9427695394621895e-05,
"loss": 1.5769,
"step": 292000
},
{
"epoch": 1.24,
"learning_rate": 2.9392468845640082e-05,
"loss": 1.5695,
"step": 292500
},
{
"epoch": 1.24,
"learning_rate": 2.9357242296658273e-05,
"loss": 1.5549,
"step": 293000
},
{
"epoch": 1.24,
"learning_rate": 2.9322015747676456e-05,
"loss": 1.5715,
"step": 293500
},
{
"epoch": 1.24,
"learning_rate": 2.9286789198694646e-05,
"loss": 1.5507,
"step": 294000
},
{
"epoch": 1.24,
"learning_rate": 2.9251562649712833e-05,
"loss": 1.5762,
"step": 294500
},
{
"epoch": 1.25,
"learning_rate": 2.9216336100731024e-05,
"loss": 1.5684,
"step": 295000
},
{
"epoch": 1.25,
"learning_rate": 2.9181109551749207e-05,
"loss": 1.5616,
"step": 295500
},
{
"epoch": 1.25,
"learning_rate": 2.91458830027674e-05,
"loss": 1.5824,
"step": 296000
},
{
"epoch": 1.25,
"learning_rate": 2.911065645378559e-05,
"loss": 1.5609,
"step": 296500
},
{
"epoch": 1.26,
"learning_rate": 2.9075429904803775e-05,
"loss": 1.5675,
"step": 297000
},
{
"epoch": 1.26,
"learning_rate": 2.9040203355821965e-05,
"loss": 1.5785,
"step": 297500
},
{
"epoch": 1.26,
"learning_rate": 2.9004976806840152e-05,
"loss": 1.5612,
"step": 298000
},
{
"epoch": 1.26,
"learning_rate": 2.8969750257858342e-05,
"loss": 1.5634,
"step": 298500
},
{
"epoch": 1.26,
"learning_rate": 2.8934523708876526e-05,
"loss": 1.5721,
"step": 299000
},
{
"epoch": 1.27,
"learning_rate": 2.8899297159894716e-05,
"loss": 1.5641,
"step": 299500
},
{
"epoch": 1.27,
"learning_rate": 2.8864070610912903e-05,
"loss": 1.5598,
"step": 300000
},
{
"epoch": 1.27,
"learning_rate": 2.8828844061931093e-05,
"loss": 1.5645,
"step": 300500
},
{
"epoch": 1.27,
"learning_rate": 2.8793617512949283e-05,
"loss": 1.5563,
"step": 301000
},
{
"epoch": 1.27,
"learning_rate": 2.875839096396747e-05,
"loss": 1.5488,
"step": 301500
},
{
"epoch": 1.28,
"learning_rate": 2.872316441498566e-05,
"loss": 1.565,
"step": 302000
},
{
"epoch": 1.28,
"learning_rate": 2.8687937866003844e-05,
"loss": 1.5612,
"step": 302500
},
{
"epoch": 1.28,
"learning_rate": 2.8652711317022034e-05,
"loss": 1.5537,
"step": 303000
},
{
"epoch": 1.28,
"learning_rate": 2.861748476804022e-05,
"loss": 1.5709,
"step": 303500
},
{
"epoch": 1.29,
"learning_rate": 2.858225821905841e-05,
"loss": 1.5456,
"step": 304000
},
{
"epoch": 1.29,
"learning_rate": 2.8547031670076595e-05,
"loss": 1.5473,
"step": 304500
},
{
"epoch": 1.29,
"learning_rate": 2.8511805121094785e-05,
"loss": 1.5592,
"step": 305000
},
{
"epoch": 1.29,
"learning_rate": 2.8476578572112972e-05,
"loss": 1.5503,
"step": 305500
},
{
"epoch": 1.29,
"learning_rate": 2.8441352023131162e-05,
"loss": 1.5577,
"step": 306000
},
{
"epoch": 1.3,
"learning_rate": 2.8406125474149353e-05,
"loss": 1.5548,
"step": 306500
},
{
"epoch": 1.3,
"learning_rate": 2.837089892516754e-05,
"loss": 1.5601,
"step": 307000
},
{
"epoch": 1.3,
"learning_rate": 2.833567237618573e-05,
"loss": 1.556,
"step": 307500
},
{
"epoch": 1.3,
"learning_rate": 2.8300445827203913e-05,
"loss": 1.5495,
"step": 308000
},
{
"epoch": 1.3,
"learning_rate": 2.8265219278222104e-05,
"loss": 1.5504,
"step": 308500
},
{
"epoch": 1.31,
"learning_rate": 2.822999272924029e-05,
"loss": 1.5581,
"step": 309000
},
{
"epoch": 1.31,
"learning_rate": 2.819476618025848e-05,
"loss": 1.5516,
"step": 309500
},
{
"epoch": 1.31,
"learning_rate": 2.8159539631276664e-05,
"loss": 1.543,
"step": 310000
},
{
"epoch": 1.31,
"learning_rate": 2.8124313082294855e-05,
"loss": 1.555,
"step": 310500
},
{
"epoch": 1.31,
"learning_rate": 2.808908653331304e-05,
"loss": 1.5422,
"step": 311000
},
{
"epoch": 1.32,
"learning_rate": 2.8053859984331232e-05,
"loss": 1.5572,
"step": 311500
},
{
"epoch": 1.32,
"learning_rate": 2.8018633435349422e-05,
"loss": 1.5592,
"step": 312000
},
{
"epoch": 1.32,
"learning_rate": 2.798340688636761e-05,
"loss": 1.5585,
"step": 312500
},
{
"epoch": 1.32,
"learning_rate": 2.79481803373858e-05,
"loss": 1.54,
"step": 313000
},
{
"epoch": 1.33,
"learning_rate": 2.7912953788403983e-05,
"loss": 1.5433,
"step": 313500
},
{
"epoch": 1.33,
"learning_rate": 2.7877727239422173e-05,
"loss": 1.5444,
"step": 314000
},
{
"epoch": 1.33,
"learning_rate": 2.784250069044036e-05,
"loss": 1.5474,
"step": 314500
},
{
"epoch": 1.33,
"learning_rate": 2.780727414145855e-05,
"loss": 1.557,
"step": 315000
},
{
"epoch": 1.33,
"learning_rate": 2.7772047592476734e-05,
"loss": 1.5388,
"step": 315500
},
{
"epoch": 1.34,
"learning_rate": 2.7736821043494928e-05,
"loss": 1.5422,
"step": 316000
},
{
"epoch": 1.34,
"learning_rate": 2.7701594494513118e-05,
"loss": 1.55,
"step": 316500
},
{
"epoch": 1.34,
"learning_rate": 2.76663679455313e-05,
"loss": 1.5508,
"step": 317000
},
{
"epoch": 1.34,
"learning_rate": 2.7631141396549492e-05,
"loss": 1.5562,
"step": 317500
},
{
"epoch": 1.34,
"learning_rate": 2.759591484756768e-05,
"loss": 1.5494,
"step": 318000
},
{
"epoch": 1.35,
"learning_rate": 2.756068829858587e-05,
"loss": 1.5501,
"step": 318500
},
{
"epoch": 1.35,
"learning_rate": 2.7525461749604052e-05,
"loss": 1.5343,
"step": 319000
},
{
"epoch": 1.35,
"learning_rate": 2.7490235200622243e-05,
"loss": 1.5422,
"step": 319500
},
{
"epoch": 1.35,
"learning_rate": 2.745500865164043e-05,
"loss": 1.5431,
"step": 320000
},
{
"epoch": 1.35,
"learning_rate": 2.741978210265862e-05,
"loss": 1.5465,
"step": 320500
},
{
"epoch": 1.36,
"learning_rate": 2.7384555553676803e-05,
"loss": 1.5319,
"step": 321000
},
{
"epoch": 1.36,
"learning_rate": 2.7349329004694997e-05,
"loss": 1.5447,
"step": 321500
},
{
"epoch": 1.36,
"learning_rate": 2.7314102455713187e-05,
"loss": 1.5526,
"step": 322000
},
{
"epoch": 1.36,
"learning_rate": 2.727887590673137e-05,
"loss": 1.5406,
"step": 322500
},
{
"epoch": 1.37,
"learning_rate": 2.724364935774956e-05,
"loss": 1.5548,
"step": 323000
},
{
"epoch": 1.37,
"learning_rate": 2.7208422808767748e-05,
"loss": 1.5251,
"step": 323500
},
{
"epoch": 1.37,
"learning_rate": 2.717319625978594e-05,
"loss": 1.5489,
"step": 324000
},
{
"epoch": 1.37,
"learning_rate": 2.7137969710804122e-05,
"loss": 1.5399,
"step": 324500
},
{
"epoch": 1.37,
"learning_rate": 2.7102743161822312e-05,
"loss": 1.5326,
"step": 325000
},
{
"epoch": 1.38,
"learning_rate": 2.70675166128405e-05,
"loss": 1.539,
"step": 325500
},
{
"epoch": 1.38,
"learning_rate": 2.703229006385869e-05,
"loss": 1.533,
"step": 326000
},
{
"epoch": 1.38,
"learning_rate": 2.6997063514876876e-05,
"loss": 1.5366,
"step": 326500
},
{
"epoch": 1.38,
"learning_rate": 2.6961836965895067e-05,
"loss": 1.5315,
"step": 327000
},
{
"epoch": 1.38,
"learning_rate": 2.6926610416913257e-05,
"loss": 1.5349,
"step": 327500
},
{
"epoch": 1.39,
"learning_rate": 2.689138386793144e-05,
"loss": 1.5236,
"step": 328000
},
{
"epoch": 1.39,
"learning_rate": 2.685615731894963e-05,
"loss": 1.529,
"step": 328500
},
{
"epoch": 1.39,
"learning_rate": 2.6820930769967818e-05,
"loss": 1.5363,
"step": 329000
},
{
"epoch": 1.39,
"learning_rate": 2.6785704220986008e-05,
"loss": 1.5335,
"step": 329500
},
{
"epoch": 1.39,
"learning_rate": 2.675047767200419e-05,
"loss": 1.5194,
"step": 330000
},
{
"epoch": 1.4,
"learning_rate": 2.6715251123022385e-05,
"loss": 1.5217,
"step": 330500
},
{
"epoch": 1.4,
"learning_rate": 2.668002457404057e-05,
"loss": 1.527,
"step": 331000
},
{
"epoch": 1.4,
"learning_rate": 2.664479802505876e-05,
"loss": 1.5272,
"step": 331500
},
{
"epoch": 1.4,
"learning_rate": 2.660957147607695e-05,
"loss": 1.5252,
"step": 332000
},
{
"epoch": 1.41,
"learning_rate": 2.6574344927095136e-05,
"loss": 1.527,
"step": 332500
},
{
"epoch": 1.41,
"learning_rate": 2.6539118378113326e-05,
"loss": 1.5201,
"step": 333000
},
{
"epoch": 1.41,
"learning_rate": 2.650389182913151e-05,
"loss": 1.53,
"step": 333500
},
{
"epoch": 1.41,
"learning_rate": 2.64686652801497e-05,
"loss": 1.5257,
"step": 334000
},
{
"epoch": 1.41,
"learning_rate": 2.6433438731167887e-05,
"loss": 1.5265,
"step": 334500
},
{
"epoch": 1.42,
"learning_rate": 2.6398212182186077e-05,
"loss": 1.5212,
"step": 335000
},
{
"epoch": 1.42,
"learning_rate": 2.636298563320426e-05,
"loss": 1.528,
"step": 335500
},
{
"epoch": 1.42,
"learning_rate": 2.6327759084222455e-05,
"loss": 1.5323,
"step": 336000
},
{
"epoch": 1.42,
"learning_rate": 2.6292532535240638e-05,
"loss": 1.5255,
"step": 336500
},
{
"epoch": 1.42,
"learning_rate": 2.625730598625883e-05,
"loss": 1.5186,
"step": 337000
},
{
"epoch": 1.43,
"learning_rate": 2.622207943727702e-05,
"loss": 1.5431,
"step": 337500
},
{
"epoch": 1.43,
"learning_rate": 2.6186852888295206e-05,
"loss": 1.5162,
"step": 338000
},
{
"epoch": 1.43,
"learning_rate": 2.6151626339313396e-05,
"loss": 1.5148,
"step": 338500
},
{
"epoch": 1.43,
"learning_rate": 2.611639979033158e-05,
"loss": 1.5157,
"step": 339000
},
{
"epoch": 1.44,
"learning_rate": 2.608117324134977e-05,
"loss": 1.5377,
"step": 339500
},
{
"epoch": 1.44,
"learning_rate": 2.6045946692367957e-05,
"loss": 1.5207,
"step": 340000
},
{
"epoch": 1.44,
"learning_rate": 2.6010720143386147e-05,
"loss": 1.5013,
"step": 340500
},
{
"epoch": 1.44,
"learning_rate": 2.5975493594404334e-05,
"loss": 1.5212,
"step": 341000
},
{
"epoch": 1.44,
"learning_rate": 2.5940267045422524e-05,
"loss": 1.531,
"step": 341500
},
{
"epoch": 1.45,
"learning_rate": 2.5905040496440714e-05,
"loss": 1.5161,
"step": 342000
},
{
"epoch": 1.45,
"learning_rate": 2.5869813947458898e-05,
"loss": 1.5135,
"step": 342500
},
{
"epoch": 1.45,
"learning_rate": 2.5834587398477088e-05,
"loss": 1.5253,
"step": 343000
},
{
"epoch": 1.45,
"learning_rate": 2.5799360849495275e-05,
"loss": 1.5274,
"step": 343500
},
{
"epoch": 1.45,
"learning_rate": 2.5764134300513465e-05,
"loss": 1.5188,
"step": 344000
},
{
"epoch": 1.46,
"learning_rate": 2.572890775153165e-05,
"loss": 1.5449,
"step": 344500
},
{
"epoch": 1.46,
"learning_rate": 2.569368120254984e-05,
"loss": 1.51,
"step": 345000
},
{
"epoch": 1.46,
"learning_rate": 2.5658454653568026e-05,
"loss": 1.5042,
"step": 345500
},
{
"epoch": 1.46,
"learning_rate": 2.5623228104586216e-05,
"loss": 1.5027,
"step": 346000
},
{
"epoch": 1.46,
"learning_rate": 2.5588001555604403e-05,
"loss": 1.522,
"step": 346500
},
{
"epoch": 1.47,
"learning_rate": 2.5552775006622593e-05,
"loss": 1.5235,
"step": 347000
},
{
"epoch": 1.47,
"learning_rate": 2.5517548457640784e-05,
"loss": 1.5245,
"step": 347500
},
{
"epoch": 1.47,
"learning_rate": 2.5482321908658967e-05,
"loss": 1.5253,
"step": 348000
},
{
"epoch": 1.47,
"learning_rate": 2.5447095359677158e-05,
"loss": 1.5275,
"step": 348500
},
{
"epoch": 1.48,
"learning_rate": 2.5411868810695344e-05,
"loss": 1.5279,
"step": 349000
},
{
"epoch": 1.48,
"learning_rate": 2.5376642261713535e-05,
"loss": 1.5291,
"step": 349500
},
{
"epoch": 1.48,
"learning_rate": 2.5341415712731718e-05,
"loss": 1.5053,
"step": 350000
},
{
"epoch": 1.48,
"learning_rate": 2.5306189163749912e-05,
"loss": 1.5128,
"step": 350500
},
{
"epoch": 1.48,
"learning_rate": 2.5270962614768095e-05,
"loss": 1.5131,
"step": 351000
},
{
"epoch": 1.49,
"learning_rate": 2.5235736065786286e-05,
"loss": 1.5201,
"step": 351500
},
{
"epoch": 1.49,
"learning_rate": 2.5200509516804473e-05,
"loss": 1.5065,
"step": 352000
},
{
"epoch": 1.49,
"learning_rate": 2.5165282967822663e-05,
"loss": 1.5378,
"step": 352500
},
{
"epoch": 1.49,
"learning_rate": 2.5130056418840853e-05,
"loss": 1.5125,
"step": 353000
},
{
"epoch": 1.49,
"learning_rate": 2.5094829869859037e-05,
"loss": 1.5002,
"step": 353500
},
{
"epoch": 1.5,
"learning_rate": 2.5059603320877227e-05,
"loss": 1.511,
"step": 354000
},
{
"epoch": 1.5,
"learning_rate": 2.5024376771895414e-05,
"loss": 1.5093,
"step": 354500
},
{
"epoch": 1.5,
"learning_rate": 2.4989150222913604e-05,
"loss": 1.4916,
"step": 355000
},
{
"epoch": 1.5,
"learning_rate": 2.495392367393179e-05,
"loss": 1.5165,
"step": 355500
},
{
"epoch": 1.5,
"learning_rate": 2.491869712494998e-05,
"loss": 1.5244,
"step": 356000
},
{
"epoch": 1.51,
"learning_rate": 2.488347057596817e-05,
"loss": 1.5193,
"step": 356500
},
{
"epoch": 1.51,
"learning_rate": 2.4848244026986355e-05,
"loss": 1.5006,
"step": 357000
},
{
"epoch": 1.51,
"learning_rate": 2.4813017478004542e-05,
"loss": 1.5149,
"step": 357500
},
{
"epoch": 1.51,
"learning_rate": 2.4777790929022732e-05,
"loss": 1.5209,
"step": 358000
},
{
"epoch": 1.52,
"learning_rate": 2.474256438004092e-05,
"loss": 1.5119,
"step": 358500
},
{
"epoch": 1.52,
"learning_rate": 2.4707337831059106e-05,
"loss": 1.5099,
"step": 359000
},
{
"epoch": 1.52,
"learning_rate": 2.4672111282077297e-05,
"loss": 1.5027,
"step": 359500
},
{
"epoch": 1.52,
"learning_rate": 2.4636884733095487e-05,
"loss": 1.5085,
"step": 360000
},
{
"epoch": 1.52,
"learning_rate": 2.4601658184113674e-05,
"loss": 1.5088,
"step": 360500
},
{
"epoch": 1.53,
"learning_rate": 2.456643163513186e-05,
"loss": 1.5116,
"step": 361000
},
{
"epoch": 1.53,
"learning_rate": 2.453120508615005e-05,
"loss": 1.5279,
"step": 361500
},
{
"epoch": 1.53,
"learning_rate": 2.4495978537168238e-05,
"loss": 1.5016,
"step": 362000
},
{
"epoch": 1.53,
"learning_rate": 2.4460751988186425e-05,
"loss": 1.5017,
"step": 362500
},
{
"epoch": 1.53,
"learning_rate": 2.4425525439204615e-05,
"loss": 1.5148,
"step": 363000
},
{
"epoch": 1.54,
"learning_rate": 2.4390298890222802e-05,
"loss": 1.4919,
"step": 363500
},
{
"epoch": 1.54,
"learning_rate": 2.435507234124099e-05,
"loss": 1.5061,
"step": 364000
},
{
"epoch": 1.54,
"learning_rate": 2.4319845792259176e-05,
"loss": 1.5023,
"step": 364500
},
{
"epoch": 1.54,
"learning_rate": 2.428461924327737e-05,
"loss": 1.4962,
"step": 365000
},
{
"epoch": 1.55,
"learning_rate": 2.4249392694295556e-05,
"loss": 1.4906,
"step": 365500
},
{
"epoch": 1.55,
"learning_rate": 2.4214166145313743e-05,
"loss": 1.5112,
"step": 366000
},
{
"epoch": 1.55,
"learning_rate": 2.417893959633193e-05,
"loss": 1.5041,
"step": 366500
},
{
"epoch": 1.55,
"learning_rate": 2.414371304735012e-05,
"loss": 1.4872,
"step": 367000
},
{
"epoch": 1.55,
"learning_rate": 2.4108486498368307e-05,
"loss": 1.5038,
"step": 367500
},
{
"epoch": 1.56,
"learning_rate": 2.4073259949386494e-05,
"loss": 1.4959,
"step": 368000
},
{
"epoch": 1.56,
"learning_rate": 2.4038033400404684e-05,
"loss": 1.4873,
"step": 368500
},
{
"epoch": 1.56,
"learning_rate": 2.400280685142287e-05,
"loss": 1.5008,
"step": 369000
},
{
"epoch": 1.56,
"learning_rate": 2.3967580302441058e-05,
"loss": 1.5,
"step": 369500
},
{
"epoch": 1.56,
"learning_rate": 2.393235375345925e-05,
"loss": 1.5061,
"step": 370000
},
{
"epoch": 1.57,
"learning_rate": 2.389712720447744e-05,
"loss": 1.4817,
"step": 370500
},
{
"epoch": 1.57,
"learning_rate": 2.3861900655495626e-05,
"loss": 1.5042,
"step": 371000
},
{
"epoch": 1.57,
"learning_rate": 2.3826674106513813e-05,
"loss": 1.4949,
"step": 371500
},
{
"epoch": 1.57,
"learning_rate": 2.3791447557532e-05,
"loss": 1.4928,
"step": 372000
},
{
"epoch": 1.57,
"learning_rate": 2.375622100855019e-05,
"loss": 1.4913,
"step": 372500
},
{
"epoch": 1.58,
"learning_rate": 2.3720994459568377e-05,
"loss": 1.4902,
"step": 373000
},
{
"epoch": 1.58,
"learning_rate": 2.3685767910586564e-05,
"loss": 1.4975,
"step": 373500
},
{
"epoch": 1.58,
"learning_rate": 2.3650541361604754e-05,
"loss": 1.4784,
"step": 374000
},
{
"epoch": 1.58,
"learning_rate": 2.361531481262294e-05,
"loss": 1.4918,
"step": 374500
},
{
"epoch": 1.59,
"learning_rate": 2.358008826364113e-05,
"loss": 1.4811,
"step": 375000
},
{
"epoch": 1.59,
"learning_rate": 2.3544861714659318e-05,
"loss": 1.4877,
"step": 375500
},
{
"epoch": 1.59,
"learning_rate": 2.350963516567751e-05,
"loss": 1.4916,
"step": 376000
},
{
"epoch": 1.59,
"learning_rate": 2.3474408616695695e-05,
"loss": 1.4884,
"step": 376500
},
{
"epoch": 1.59,
"learning_rate": 2.3439182067713882e-05,
"loss": 1.496,
"step": 377000
},
{
"epoch": 1.6,
"learning_rate": 2.3403955518732072e-05,
"loss": 1.4891,
"step": 377500
},
{
"epoch": 1.6,
"learning_rate": 2.336872896975026e-05,
"loss": 1.4932,
"step": 378000
},
{
"epoch": 1.6,
"learning_rate": 2.3333502420768446e-05,
"loss": 1.4884,
"step": 378500
},
{
"epoch": 1.6,
"learning_rate": 2.3298275871786633e-05,
"loss": 1.4978,
"step": 379000
},
{
"epoch": 1.6,
"learning_rate": 2.3263049322804823e-05,
"loss": 1.4961,
"step": 379500
},
{
"epoch": 1.61,
"learning_rate": 2.322782277382301e-05,
"loss": 1.4952,
"step": 380000
},
{
"epoch": 1.61,
"learning_rate": 2.31925962248412e-05,
"loss": 1.4837,
"step": 380500
},
{
"epoch": 1.61,
"learning_rate": 2.3157369675859388e-05,
"loss": 1.4911,
"step": 381000
},
{
"epoch": 1.61,
"learning_rate": 2.3122143126877578e-05,
"loss": 1.486,
"step": 381500
},
{
"epoch": 1.61,
"learning_rate": 2.3086916577895765e-05,
"loss": 1.4972,
"step": 382000
},
{
"epoch": 1.62,
"learning_rate": 2.305169002891395e-05,
"loss": 1.489,
"step": 382500
},
{
"epoch": 1.62,
"learning_rate": 2.3016463479932142e-05,
"loss": 1.4957,
"step": 383000
},
{
"epoch": 1.62,
"learning_rate": 2.298123693095033e-05,
"loss": 1.4988,
"step": 383500
},
{
"epoch": 1.62,
"learning_rate": 2.2946010381968516e-05,
"loss": 1.4827,
"step": 384000
},
{
"epoch": 1.63,
"learning_rate": 2.2910783832986703e-05,
"loss": 1.4883,
"step": 384500
},
{
"epoch": 1.63,
"learning_rate": 2.2875557284004893e-05,
"loss": 1.49,
"step": 385000
},
{
"epoch": 1.63,
"learning_rate": 2.2840330735023083e-05,
"loss": 1.4924,
"step": 385500
},
{
"epoch": 1.63,
"learning_rate": 2.280510418604127e-05,
"loss": 1.4747,
"step": 386000
},
{
"epoch": 1.63,
"learning_rate": 2.2769877637059457e-05,
"loss": 1.4842,
"step": 386500
},
{
"epoch": 1.64,
"learning_rate": 2.2734651088077647e-05,
"loss": 1.4707,
"step": 387000
},
{
"epoch": 1.64,
"learning_rate": 2.2699424539095834e-05,
"loss": 1.472,
"step": 387500
},
{
"epoch": 1.64,
"learning_rate": 2.266419799011402e-05,
"loss": 1.4679,
"step": 388000
},
{
"epoch": 1.64,
"learning_rate": 2.262897144113221e-05,
"loss": 1.4705,
"step": 388500
},
{
"epoch": 1.64,
"learning_rate": 2.25937448921504e-05,
"loss": 1.4907,
"step": 389000
},
{
"epoch": 1.65,
"learning_rate": 2.2558518343168585e-05,
"loss": 1.4825,
"step": 389500
},
{
"epoch": 1.65,
"learning_rate": 2.2523291794186776e-05,
"loss": 1.4465,
"step": 390000
},
{
"epoch": 1.65,
"learning_rate": 2.2488065245204966e-05,
"loss": 1.4926,
"step": 390500
},
{
"epoch": 1.65,
"learning_rate": 2.2452838696223153e-05,
"loss": 1.4968,
"step": 391000
},
{
"epoch": 1.65,
"learning_rate": 2.241761214724134e-05,
"loss": 1.4676,
"step": 391500
},
{
"epoch": 1.66,
"learning_rate": 2.2382385598259527e-05,
"loss": 1.4883,
"step": 392000
},
{
"epoch": 1.66,
"learning_rate": 2.2347159049277717e-05,
"loss": 1.4776,
"step": 392500
},
{
"epoch": 1.66,
"learning_rate": 2.2311932500295904e-05,
"loss": 1.4942,
"step": 393000
},
{
"epoch": 1.66,
"learning_rate": 2.227670595131409e-05,
"loss": 1.48,
"step": 393500
},
{
"epoch": 1.67,
"learning_rate": 2.224147940233228e-05,
"loss": 1.4812,
"step": 394000
},
{
"epoch": 1.67,
"learning_rate": 2.2206252853350468e-05,
"loss": 1.4918,
"step": 394500
},
{
"epoch": 1.67,
"learning_rate": 2.2171026304368655e-05,
"loss": 1.4789,
"step": 395000
},
{
"epoch": 1.67,
"learning_rate": 2.2135799755386845e-05,
"loss": 1.4723,
"step": 395500
},
{
"epoch": 1.67,
"learning_rate": 2.2100573206405035e-05,
"loss": 1.4703,
"step": 396000
},
{
"epoch": 1.68,
"learning_rate": 2.2065346657423222e-05,
"loss": 1.474,
"step": 396500
},
{
"epoch": 1.68,
"learning_rate": 2.203012010844141e-05,
"loss": 1.4634,
"step": 397000
},
{
"epoch": 1.68,
"learning_rate": 2.19948935594596e-05,
"loss": 1.4582,
"step": 397500
},
{
"epoch": 1.68,
"learning_rate": 2.1959667010477786e-05,
"loss": 1.4758,
"step": 398000
},
{
"epoch": 1.68,
"learning_rate": 2.1924440461495973e-05,
"loss": 1.4639,
"step": 398500
},
{
"epoch": 1.69,
"learning_rate": 2.188921391251416e-05,
"loss": 1.4959,
"step": 399000
},
{
"epoch": 1.69,
"learning_rate": 2.185398736353235e-05,
"loss": 1.4849,
"step": 399500
},
{
"epoch": 1.69,
"learning_rate": 2.1818760814550537e-05,
"loss": 1.4696,
"step": 400000
},
{
"epoch": 1.69,
"learning_rate": 2.1783534265568728e-05,
"loss": 1.4885,
"step": 400500
},
{
"epoch": 1.7,
"learning_rate": 2.1748307716586914e-05,
"loss": 1.4798,
"step": 401000
},
{
"epoch": 1.7,
"learning_rate": 2.1713081167605105e-05,
"loss": 1.4811,
"step": 401500
},
{
"epoch": 1.7,
"learning_rate": 2.167785461862329e-05,
"loss": 1.4584,
"step": 402000
},
{
"epoch": 1.7,
"learning_rate": 2.164262806964148e-05,
"loss": 1.4638,
"step": 402500
},
{
"epoch": 1.7,
"learning_rate": 2.160740152065967e-05,
"loss": 1.4589,
"step": 403000
},
{
"epoch": 1.71,
"learning_rate": 2.1572174971677856e-05,
"loss": 1.4767,
"step": 403500
},
{
"epoch": 1.71,
"learning_rate": 2.1536948422696043e-05,
"loss": 1.4618,
"step": 404000
},
{
"epoch": 1.71,
"learning_rate": 2.150172187371423e-05,
"loss": 1.4772,
"step": 404500
},
{
"epoch": 1.71,
"learning_rate": 2.146649532473242e-05,
"loss": 1.4819,
"step": 405000
},
{
"epoch": 1.71,
"learning_rate": 2.1431268775750607e-05,
"loss": 1.466,
"step": 405500
},
{
"epoch": 1.72,
"learning_rate": 2.1396042226768797e-05,
"loss": 1.4859,
"step": 406000
},
{
"epoch": 1.72,
"learning_rate": 2.1360815677786984e-05,
"loss": 1.4642,
"step": 406500
},
{
"epoch": 1.72,
"learning_rate": 2.1325589128805174e-05,
"loss": 1.4733,
"step": 407000
},
{
"epoch": 1.72,
"learning_rate": 2.129036257982336e-05,
"loss": 1.4645,
"step": 407500
},
{
"epoch": 1.72,
"learning_rate": 2.1255136030841548e-05,
"loss": 1.4748,
"step": 408000
},
{
"epoch": 1.73,
"learning_rate": 2.121990948185974e-05,
"loss": 1.4548,
"step": 408500
},
{
"epoch": 1.73,
"learning_rate": 2.1184682932877925e-05,
"loss": 1.4783,
"step": 409000
},
{
"epoch": 1.73,
"learning_rate": 2.1149456383896112e-05,
"loss": 1.4683,
"step": 409500
},
{
"epoch": 1.73,
"learning_rate": 2.1114229834914302e-05,
"loss": 1.4649,
"step": 410000
},
{
"epoch": 1.74,
"learning_rate": 2.107900328593249e-05,
"loss": 1.4684,
"step": 410500
},
{
"epoch": 1.74,
"learning_rate": 2.104377673695068e-05,
"loss": 1.4618,
"step": 411000
},
{
"epoch": 1.74,
"learning_rate": 2.1008550187968867e-05,
"loss": 1.445,
"step": 411500
},
{
"epoch": 1.74,
"learning_rate": 2.0973323638987057e-05,
"loss": 1.4678,
"step": 412000
},
{
"epoch": 1.74,
"learning_rate": 2.0938097090005244e-05,
"loss": 1.4766,
"step": 412500
},
{
"epoch": 1.75,
"learning_rate": 2.090287054102343e-05,
"loss": 1.4685,
"step": 413000
},
{
"epoch": 1.75,
"learning_rate": 2.0867643992041618e-05,
"loss": 1.4731,
"step": 413500
},
{
"epoch": 1.75,
"learning_rate": 2.0832417443059808e-05,
"loss": 1.4512,
"step": 414000
},
{
"epoch": 1.75,
"learning_rate": 2.0797190894077995e-05,
"loss": 1.4641,
"step": 414500
},
{
"epoch": 1.75,
"learning_rate": 2.076196434509618e-05,
"loss": 1.4738,
"step": 415000
},
{
"epoch": 1.76,
"learning_rate": 2.0726737796114372e-05,
"loss": 1.4527,
"step": 415500
},
{
"epoch": 1.76,
"learning_rate": 2.0691511247132562e-05,
"loss": 1.4633,
"step": 416000
},
{
"epoch": 1.76,
"learning_rate": 2.065628469815075e-05,
"loss": 1.4489,
"step": 416500
},
{
"epoch": 1.76,
"learning_rate": 2.0621058149168936e-05,
"loss": 1.4547,
"step": 417000
},
{
"epoch": 1.76,
"learning_rate": 2.0585831600187126e-05,
"loss": 1.4494,
"step": 417500
},
{
"epoch": 1.77,
"learning_rate": 2.0550605051205313e-05,
"loss": 1.46,
"step": 418000
},
{
"epoch": 1.77,
"learning_rate": 2.05153785022235e-05,
"loss": 1.4733,
"step": 418500
},
{
"epoch": 1.77,
"learning_rate": 2.0480151953241687e-05,
"loss": 1.4869,
"step": 419000
},
{
"epoch": 1.77,
"learning_rate": 2.0444925404259877e-05,
"loss": 1.4579,
"step": 419500
},
{
"epoch": 1.78,
"learning_rate": 2.0409698855278064e-05,
"loss": 1.4617,
"step": 420000
},
{
"epoch": 1.78,
"learning_rate": 2.037447230629625e-05,
"loss": 1.4677,
"step": 420500
},
{
"epoch": 1.78,
"learning_rate": 2.033924575731444e-05,
"loss": 1.4645,
"step": 421000
},
{
"epoch": 1.78,
"learning_rate": 2.030401920833263e-05,
"loss": 1.4448,
"step": 421500
},
{
"epoch": 1.78,
"learning_rate": 2.026879265935082e-05,
"loss": 1.4605,
"step": 422000
},
{
"epoch": 1.79,
"learning_rate": 2.0233566110369005e-05,
"loss": 1.4556,
"step": 422500
},
{
"epoch": 1.79,
"learning_rate": 2.0198339561387196e-05,
"loss": 1.4718,
"step": 423000
},
{
"epoch": 1.79,
"learning_rate": 2.0163113012405383e-05,
"loss": 1.468,
"step": 423500
},
{
"epoch": 1.79,
"learning_rate": 2.012788646342357e-05,
"loss": 1.461,
"step": 424000
},
{
"epoch": 1.79,
"learning_rate": 2.009265991444176e-05,
"loss": 1.4572,
"step": 424500
},
{
"epoch": 1.8,
"learning_rate": 2.0057433365459947e-05,
"loss": 1.4566,
"step": 425000
},
{
"epoch": 1.8,
"learning_rate": 2.0022206816478134e-05,
"loss": 1.4311,
"step": 425500
},
{
"epoch": 1.8,
"learning_rate": 1.9986980267496324e-05,
"loss": 1.4517,
"step": 426000
},
{
"epoch": 1.8,
"learning_rate": 1.9951753718514514e-05,
"loss": 1.4416,
"step": 426500
},
{
"epoch": 1.81,
"learning_rate": 1.99165271695327e-05,
"loss": 1.4488,
"step": 427000
},
{
"epoch": 1.81,
"learning_rate": 1.9881300620550888e-05,
"loss": 1.4693,
"step": 427500
},
{
"epoch": 1.81,
"learning_rate": 1.9846074071569075e-05,
"loss": 1.4516,
"step": 428000
},
{
"epoch": 1.81,
"learning_rate": 1.9810847522587265e-05,
"loss": 1.4516,
"step": 428500
},
{
"epoch": 1.81,
"learning_rate": 1.9775620973605452e-05,
"loss": 1.4366,
"step": 429000
},
{
"epoch": 1.82,
"learning_rate": 1.974039442462364e-05,
"loss": 1.4532,
"step": 429500
},
{
"epoch": 1.82,
"learning_rate": 1.970516787564183e-05,
"loss": 1.4574,
"step": 430000
},
{
"epoch": 1.82,
"learning_rate": 1.9669941326660016e-05,
"loss": 1.4513,
"step": 430500
},
{
"epoch": 1.82,
"learning_rate": 1.9634714777678203e-05,
"loss": 1.4371,
"step": 431000
},
{
"epoch": 1.82,
"learning_rate": 1.9599488228696393e-05,
"loss": 1.4585,
"step": 431500
},
{
"epoch": 1.83,
"learning_rate": 1.9564261679714584e-05,
"loss": 1.4605,
"step": 432000
},
{
"epoch": 1.83,
"learning_rate": 1.952903513073277e-05,
"loss": 1.4374,
"step": 432500
},
{
"epoch": 1.83,
"learning_rate": 1.9493808581750958e-05,
"loss": 1.4419,
"step": 433000
},
{
"epoch": 1.83,
"learning_rate": 1.9458582032769144e-05,
"loss": 1.4401,
"step": 433500
},
{
"epoch": 1.83,
"learning_rate": 1.9423355483787335e-05,
"loss": 1.4515,
"step": 434000
},
{
"epoch": 1.84,
"learning_rate": 1.938812893480552e-05,
"loss": 1.4376,
"step": 434500
},
{
"epoch": 1.84,
"learning_rate": 1.935290238582371e-05,
"loss": 1.4439,
"step": 435000
},
{
"epoch": 1.84,
"learning_rate": 1.93176758368419e-05,
"loss": 1.4305,
"step": 435500
},
{
"epoch": 1.84,
"learning_rate": 1.9282449287860086e-05,
"loss": 1.4611,
"step": 436000
},
{
"epoch": 1.85,
"learning_rate": 1.9247222738878276e-05,
"loss": 1.4564,
"step": 436500
},
{
"epoch": 1.85,
"learning_rate": 1.9211996189896463e-05,
"loss": 1.4424,
"step": 437000
},
{
"epoch": 1.85,
"learning_rate": 1.9176769640914653e-05,
"loss": 1.4541,
"step": 437500
},
{
"epoch": 1.85,
"learning_rate": 1.914154309193284e-05,
"loss": 1.4338,
"step": 438000
},
{
"epoch": 1.85,
"learning_rate": 1.9106316542951027e-05,
"loss": 1.4419,
"step": 438500
},
{
"epoch": 1.86,
"learning_rate": 1.9071089993969214e-05,
"loss": 1.4363,
"step": 439000
},
{
"epoch": 1.86,
"learning_rate": 1.9035863444987404e-05,
"loss": 1.4404,
"step": 439500
},
{
"epoch": 1.86,
"learning_rate": 1.900063689600559e-05,
"loss": 1.4442,
"step": 440000
},
{
"epoch": 1.86,
"learning_rate": 1.8965410347023778e-05,
"loss": 1.442,
"step": 440500
},
{
"epoch": 1.86,
"learning_rate": 1.8930183798041968e-05,
"loss": 1.4436,
"step": 441000
},
{
"epoch": 1.87,
"learning_rate": 1.889495724906016e-05,
"loss": 1.4399,
"step": 441500
},
{
"epoch": 1.87,
"learning_rate": 1.8859730700078345e-05,
"loss": 1.4423,
"step": 442000
},
{
"epoch": 1.87,
"learning_rate": 1.8824504151096532e-05,
"loss": 1.4486,
"step": 442500
},
{
"epoch": 1.87,
"learning_rate": 1.8789277602114723e-05,
"loss": 1.443,
"step": 443000
},
{
"epoch": 1.87,
"learning_rate": 1.875405105313291e-05,
"loss": 1.4428,
"step": 443500
},
{
"epoch": 1.88,
"learning_rate": 1.8718824504151096e-05,
"loss": 1.4387,
"step": 444000
},
{
"epoch": 1.88,
"learning_rate": 1.8683597955169287e-05,
"loss": 1.44,
"step": 444500
},
{
"epoch": 1.88,
"learning_rate": 1.8648371406187474e-05,
"loss": 1.4466,
"step": 445000
},
{
"epoch": 1.88,
"learning_rate": 1.861314485720566e-05,
"loss": 1.434,
"step": 445500
},
{
"epoch": 1.89,
"learning_rate": 1.8577918308223847e-05,
"loss": 1.4469,
"step": 446000
},
{
"epoch": 1.89,
"learning_rate": 1.854269175924204e-05,
"loss": 1.44,
"step": 446500
},
{
"epoch": 1.89,
"learning_rate": 1.8507465210260228e-05,
"loss": 1.4498,
"step": 447000
},
{
"epoch": 1.89,
"learning_rate": 1.8472238661278415e-05,
"loss": 1.4347,
"step": 447500
},
{
"epoch": 1.89,
"learning_rate": 1.8437012112296602e-05,
"loss": 1.4467,
"step": 448000
},
{
"epoch": 1.9,
"learning_rate": 1.8401785563314792e-05,
"loss": 1.4052,
"step": 448500
},
{
"epoch": 1.9,
"learning_rate": 1.836655901433298e-05,
"loss": 1.4406,
"step": 449000
},
{
"epoch": 1.9,
"learning_rate": 1.8331332465351166e-05,
"loss": 1.4246,
"step": 449500
},
{
"epoch": 1.9,
"learning_rate": 1.8296105916369356e-05,
"loss": 1.4468,
"step": 450000
},
{
"epoch": 1.9,
"learning_rate": 1.8260879367387543e-05,
"loss": 1.4283,
"step": 450500
},
{
"epoch": 1.91,
"learning_rate": 1.822565281840573e-05,
"loss": 1.4526,
"step": 451000
},
{
"epoch": 1.91,
"learning_rate": 1.819042626942392e-05,
"loss": 1.4453,
"step": 451500
},
{
"epoch": 1.91,
"learning_rate": 1.815519972044211e-05,
"loss": 1.4303,
"step": 452000
},
{
"epoch": 1.91,
"learning_rate": 1.8119973171460298e-05,
"loss": 1.4492,
"step": 452500
},
{
"epoch": 1.91,
"learning_rate": 1.8084746622478484e-05,
"loss": 1.4505,
"step": 453000
},
{
"epoch": 1.92,
"learning_rate": 1.804952007349667e-05,
"loss": 1.4485,
"step": 453500
},
{
"epoch": 1.92,
"learning_rate": 1.801429352451486e-05,
"loss": 1.4376,
"step": 454000
},
{
"epoch": 1.92,
"learning_rate": 1.797906697553305e-05,
"loss": 1.4448,
"step": 454500
},
{
"epoch": 1.92,
"learning_rate": 1.7943840426551235e-05,
"loss": 1.4302,
"step": 455000
},
{
"epoch": 1.93,
"learning_rate": 1.7908613877569426e-05,
"loss": 1.4317,
"step": 455500
},
{
"epoch": 1.93,
"learning_rate": 1.7873387328587613e-05,
"loss": 1.431,
"step": 456000
},
{
"epoch": 1.93,
"learning_rate": 1.78381607796058e-05,
"loss": 1.426,
"step": 456500
},
{
"epoch": 1.93,
"learning_rate": 1.780293423062399e-05,
"loss": 1.4361,
"step": 457000
},
{
"epoch": 1.93,
"learning_rate": 1.776770768164218e-05,
"loss": 1.4313,
"step": 457500
},
{
"epoch": 1.94,
"learning_rate": 1.7732481132660367e-05,
"loss": 1.4232,
"step": 458000
},
{
"epoch": 1.94,
"learning_rate": 1.7697254583678554e-05,
"loss": 1.4349,
"step": 458500
},
{
"epoch": 1.94,
"learning_rate": 1.7662028034696744e-05,
"loss": 1.4386,
"step": 459000
},
{
"epoch": 1.94,
"learning_rate": 1.762680148571493e-05,
"loss": 1.4444,
"step": 459500
},
{
"epoch": 1.94,
"learning_rate": 1.7591574936733118e-05,
"loss": 1.4377,
"step": 460000
},
{
"epoch": 1.95,
"learning_rate": 1.7556348387751305e-05,
"loss": 1.4426,
"step": 460500
},
{
"epoch": 1.95,
"learning_rate": 1.7521121838769495e-05,
"loss": 1.4318,
"step": 461000
},
{
"epoch": 1.95,
"learning_rate": 1.7485895289787682e-05,
"loss": 1.4234,
"step": 461500
},
{
"epoch": 1.95,
"learning_rate": 1.7450668740805872e-05,
"loss": 1.4333,
"step": 462000
},
{
"epoch": 1.96,
"learning_rate": 1.741544219182406e-05,
"loss": 1.4448,
"step": 462500
},
{
"epoch": 1.96,
"learning_rate": 1.738021564284225e-05,
"loss": 1.4262,
"step": 463000
},
{
"epoch": 1.96,
"learning_rate": 1.7344989093860436e-05,
"loss": 1.44,
"step": 463500
},
{
"epoch": 1.96,
"learning_rate": 1.7309762544878623e-05,
"loss": 1.4228,
"step": 464000
},
{
"epoch": 1.96,
"learning_rate": 1.7274535995896814e-05,
"loss": 1.4315,
"step": 464500
},
{
"epoch": 1.97,
"learning_rate": 1.7239309446915e-05,
"loss": 1.4252,
"step": 465000
},
{
"epoch": 1.97,
"learning_rate": 1.7204082897933187e-05,
"loss": 1.4299,
"step": 465500
},
{
"epoch": 1.97,
"learning_rate": 1.7168856348951374e-05,
"loss": 1.4385,
"step": 466000
},
{
"epoch": 1.97,
"learning_rate": 1.7133629799969565e-05,
"loss": 1.4411,
"step": 466500
},
{
"epoch": 1.97,
"learning_rate": 1.7098403250987755e-05,
"loss": 1.4311,
"step": 467000
},
{
"epoch": 1.98,
"learning_rate": 1.7063176702005942e-05,
"loss": 1.4331,
"step": 467500
},
{
"epoch": 1.98,
"learning_rate": 1.702795015302413e-05,
"loss": 1.4274,
"step": 468000
},
{
"epoch": 1.98,
"learning_rate": 1.699272360404232e-05,
"loss": 1.4265,
"step": 468500
},
{
"epoch": 1.98,
"learning_rate": 1.6957497055060506e-05,
"loss": 1.4212,
"step": 469000
},
{
"epoch": 1.98,
"learning_rate": 1.6922270506078693e-05,
"loss": 1.4217,
"step": 469500
},
{
"epoch": 1.99,
"learning_rate": 1.6887043957096883e-05,
"loss": 1.4398,
"step": 470000
},
{
"epoch": 1.99,
"learning_rate": 1.685181740811507e-05,
"loss": 1.4163,
"step": 470500
},
{
"epoch": 1.99,
"learning_rate": 1.6816590859133257e-05,
"loss": 1.4322,
"step": 471000
},
{
"epoch": 1.99,
"learning_rate": 1.6781364310151447e-05,
"loss": 1.42,
"step": 471500
},
{
"epoch": 2.0,
"learning_rate": 1.6746137761169638e-05,
"loss": 1.4289,
"step": 472000
},
{
"epoch": 2.0,
"learning_rate": 1.6710911212187824e-05,
"loss": 1.4228,
"step": 472500
},
{
"epoch": 2.0,
"learning_rate": 1.667568466320601e-05,
"loss": 1.4242,
"step": 473000
},
{
"epoch": 2.0,
"learning_rate": 1.66404581142242e-05,
"loss": 1.4102,
"step": 473500
},
{
"epoch": 2.0,
"learning_rate": 1.660523156524239e-05,
"loss": 1.4383,
"step": 474000
},
{
"epoch": 2.01,
"learning_rate": 1.6570005016260575e-05,
"loss": 1.4212,
"step": 474500
},
{
"epoch": 2.01,
"learning_rate": 1.6534778467278762e-05,
"loss": 1.4139,
"step": 475000
},
{
"epoch": 2.01,
"learning_rate": 1.6499551918296953e-05,
"loss": 1.4221,
"step": 475500
},
{
"epoch": 2.01,
"learning_rate": 1.646432536931514e-05,
"loss": 1.4325,
"step": 476000
},
{
"epoch": 2.01,
"learning_rate": 1.6429098820333326e-05,
"loss": 1.4284,
"step": 476500
},
{
"epoch": 2.02,
"learning_rate": 1.6393872271351517e-05,
"loss": 1.4145,
"step": 477000
},
{
"epoch": 2.02,
"learning_rate": 1.6358645722369707e-05,
"loss": 1.4103,
"step": 477500
},
{
"epoch": 2.02,
"learning_rate": 1.6323419173387894e-05,
"loss": 1.428,
"step": 478000
},
{
"epoch": 2.02,
"learning_rate": 1.628819262440608e-05,
"loss": 1.4146,
"step": 478500
},
{
"epoch": 2.02,
"learning_rate": 1.625296607542427e-05,
"loss": 1.3936,
"step": 479000
},
{
"epoch": 2.03,
"learning_rate": 1.6217739526442458e-05,
"loss": 1.4026,
"step": 479500
},
{
"epoch": 2.03,
"learning_rate": 1.6182512977460645e-05,
"loss": 1.409,
"step": 480000
},
{
"epoch": 2.03,
"learning_rate": 1.6147286428478832e-05,
"loss": 1.4209,
"step": 480500
},
{
"epoch": 2.03,
"learning_rate": 1.6112059879497022e-05,
"loss": 1.3949,
"step": 481000
},
{
"epoch": 2.04,
"learning_rate": 1.607683333051521e-05,
"loss": 1.4167,
"step": 481500
},
{
"epoch": 2.04,
"learning_rate": 1.6041606781533396e-05,
"loss": 1.4049,
"step": 482000
},
{
"epoch": 2.04,
"learning_rate": 1.6006380232551586e-05,
"loss": 1.4066,
"step": 482500
},
{
"epoch": 2.04,
"learning_rate": 1.5971153683569776e-05,
"loss": 1.417,
"step": 483000
},
{
"epoch": 2.04,
"learning_rate": 1.5935927134587963e-05,
"loss": 1.4069,
"step": 483500
},
{
"epoch": 2.05,
"learning_rate": 1.590070058560615e-05,
"loss": 1.4171,
"step": 484000
},
{
"epoch": 2.05,
"learning_rate": 1.586547403662434e-05,
"loss": 1.4185,
"step": 484500
},
{
"epoch": 2.05,
"learning_rate": 1.5830247487642527e-05,
"loss": 1.416,
"step": 485000
},
{
"epoch": 2.05,
"learning_rate": 1.5795020938660714e-05,
"loss": 1.4088,
"step": 485500
},
{
"epoch": 2.05,
"learning_rate": 1.57597943896789e-05,
"loss": 1.4141,
"step": 486000
},
{
"epoch": 2.06,
"learning_rate": 1.572456784069709e-05,
"loss": 1.413,
"step": 486500
},
{
"epoch": 2.06,
"learning_rate": 1.568934129171528e-05,
"loss": 1.4033,
"step": 487000
},
{
"epoch": 2.06,
"learning_rate": 1.565411474273347e-05,
"loss": 1.4191,
"step": 487500
},
{
"epoch": 2.06,
"learning_rate": 1.5618888193751656e-05,
"loss": 1.4068,
"step": 488000
},
{
"epoch": 2.06,
"learning_rate": 1.5583661644769846e-05,
"loss": 1.3959,
"step": 488500
},
{
"epoch": 2.07,
"learning_rate": 1.5548435095788033e-05,
"loss": 1.4259,
"step": 489000
},
{
"epoch": 2.07,
"learning_rate": 1.551320854680622e-05,
"loss": 1.4033,
"step": 489500
},
{
"epoch": 2.07,
"learning_rate": 1.547798199782441e-05,
"loss": 1.4058,
"step": 490000
},
{
"epoch": 2.07,
"learning_rate": 1.5442755448842597e-05,
"loss": 1.3953,
"step": 490500
},
{
"epoch": 2.08,
"learning_rate": 1.5407528899860784e-05,
"loss": 1.3968,
"step": 491000
},
{
"epoch": 2.08,
"learning_rate": 1.5372302350878974e-05,
"loss": 1.4198,
"step": 491500
},
{
"epoch": 2.08,
"learning_rate": 1.533707580189716e-05,
"loss": 1.413,
"step": 492000
},
{
"epoch": 2.08,
"learning_rate": 1.530184925291535e-05,
"loss": 1.399,
"step": 492500
},
{
"epoch": 2.08,
"learning_rate": 1.5266622703933538e-05,
"loss": 1.4051,
"step": 493000
},
{
"epoch": 2.09,
"learning_rate": 1.5231396154951727e-05,
"loss": 1.4136,
"step": 493500
},
{
"epoch": 2.09,
"learning_rate": 1.5196169605969915e-05,
"loss": 1.4133,
"step": 494000
},
{
"epoch": 2.09,
"learning_rate": 1.5160943056988102e-05,
"loss": 1.4004,
"step": 494500
},
{
"epoch": 2.09,
"learning_rate": 1.5125716508006291e-05,
"loss": 1.3953,
"step": 495000
},
{
"epoch": 2.09,
"learning_rate": 1.5090489959024478e-05,
"loss": 1.4181,
"step": 495500
},
{
"epoch": 2.1,
"learning_rate": 1.5055263410042666e-05,
"loss": 1.4009,
"step": 496000
},
{
"epoch": 2.1,
"learning_rate": 1.5020036861060855e-05,
"loss": 1.3945,
"step": 496500
},
{
"epoch": 2.1,
"learning_rate": 1.4984810312079042e-05,
"loss": 1.4047,
"step": 497000
},
{
"epoch": 2.1,
"learning_rate": 1.4949583763097232e-05,
"loss": 1.4041,
"step": 497500
},
{
"epoch": 2.11,
"learning_rate": 1.491435721411542e-05,
"loss": 1.4129,
"step": 498000
},
{
"epoch": 2.11,
"learning_rate": 1.487913066513361e-05,
"loss": 1.4148,
"step": 498500
},
{
"epoch": 2.11,
"learning_rate": 1.4843904116151796e-05,
"loss": 1.4022,
"step": 499000
},
{
"epoch": 2.11,
"learning_rate": 1.4808677567169985e-05,
"loss": 1.4081,
"step": 499500
},
{
"epoch": 2.11,
"learning_rate": 1.4773451018188172e-05,
"loss": 1.4085,
"step": 500000
},
{
"epoch": 2.12,
"learning_rate": 1.473822446920636e-05,
"loss": 1.3954,
"step": 500500
},
{
"epoch": 2.12,
"learning_rate": 1.4702997920224549e-05,
"loss": 1.3933,
"step": 501000
},
{
"epoch": 2.12,
"learning_rate": 1.4667771371242736e-05,
"loss": 1.415,
"step": 501500
},
{
"epoch": 2.12,
"learning_rate": 1.4632544822260925e-05,
"loss": 1.4132,
"step": 502000
},
{
"epoch": 2.12,
"learning_rate": 1.4597318273279111e-05,
"loss": 1.3878,
"step": 502500
},
{
"epoch": 2.13,
"learning_rate": 1.4562091724297303e-05,
"loss": 1.3973,
"step": 503000
},
{
"epoch": 2.13,
"learning_rate": 1.452686517531549e-05,
"loss": 1.3923,
"step": 503500
},
{
"epoch": 2.13,
"learning_rate": 1.4491638626333679e-05,
"loss": 1.398,
"step": 504000
},
{
"epoch": 2.13,
"learning_rate": 1.4456412077351866e-05,
"loss": 1.3871,
"step": 504500
},
{
"epoch": 2.13,
"learning_rate": 1.4421185528370054e-05,
"loss": 1.3744,
"step": 505000
},
{
"epoch": 2.14,
"learning_rate": 1.4385958979388241e-05,
"loss": 1.4026,
"step": 505500
},
{
"epoch": 2.14,
"learning_rate": 1.435073243040643e-05,
"loss": 1.3924,
"step": 506000
},
{
"epoch": 2.14,
"learning_rate": 1.4315505881424619e-05,
"loss": 1.4021,
"step": 506500
},
{
"epoch": 2.14,
"learning_rate": 1.4280279332442805e-05,
"loss": 1.3925,
"step": 507000
},
{
"epoch": 2.15,
"learning_rate": 1.4245052783460994e-05,
"loss": 1.3867,
"step": 507500
},
{
"epoch": 2.15,
"learning_rate": 1.4209826234479184e-05,
"loss": 1.391,
"step": 508000
},
{
"epoch": 2.15,
"learning_rate": 1.4174599685497373e-05,
"loss": 1.3814,
"step": 508500
},
{
"epoch": 2.15,
"learning_rate": 1.413937313651556e-05,
"loss": 1.3976,
"step": 509000
},
{
"epoch": 2.15,
"learning_rate": 1.4104146587533748e-05,
"loss": 1.4071,
"step": 509500
},
{
"epoch": 2.16,
"learning_rate": 1.4068920038551935e-05,
"loss": 1.3871,
"step": 510000
},
{
"epoch": 2.16,
"learning_rate": 1.4033693489570124e-05,
"loss": 1.3904,
"step": 510500
},
{
"epoch": 2.16,
"learning_rate": 1.3998466940588312e-05,
"loss": 1.3951,
"step": 511000
},
{
"epoch": 2.16,
"learning_rate": 1.39632403916065e-05,
"loss": 1.4098,
"step": 511500
},
{
"epoch": 2.16,
"learning_rate": 1.3928013842624688e-05,
"loss": 1.385,
"step": 512000
},
{
"epoch": 2.17,
"learning_rate": 1.3892787293642875e-05,
"loss": 1.3975,
"step": 512500
},
{
"epoch": 2.17,
"learning_rate": 1.3857560744661067e-05,
"loss": 1.3953,
"step": 513000
},
{
"epoch": 2.17,
"learning_rate": 1.3822334195679254e-05,
"loss": 1.4071,
"step": 513500
},
{
"epoch": 2.17,
"learning_rate": 1.3787107646697442e-05,
"loss": 1.4085,
"step": 514000
},
{
"epoch": 2.17,
"learning_rate": 1.375188109771563e-05,
"loss": 1.3907,
"step": 514500
},
{
"epoch": 2.18,
"learning_rate": 1.3716654548733818e-05,
"loss": 1.3906,
"step": 515000
},
{
"epoch": 2.18,
"learning_rate": 1.3681427999752006e-05,
"loss": 1.4069,
"step": 515500
},
{
"epoch": 2.18,
"learning_rate": 1.3646201450770193e-05,
"loss": 1.3958,
"step": 516000
},
{
"epoch": 2.18,
"learning_rate": 1.3610974901788382e-05,
"loss": 1.3909,
"step": 516500
},
{
"epoch": 2.19,
"learning_rate": 1.3575748352806569e-05,
"loss": 1.3943,
"step": 517000
},
{
"epoch": 2.19,
"learning_rate": 1.3540521803824757e-05,
"loss": 1.3954,
"step": 517500
},
{
"epoch": 2.19,
"learning_rate": 1.3505295254842948e-05,
"loss": 1.3823,
"step": 518000
},
{
"epoch": 2.19,
"learning_rate": 1.3470068705861136e-05,
"loss": 1.3804,
"step": 518500
},
{
"epoch": 2.19,
"learning_rate": 1.3434842156879323e-05,
"loss": 1.3977,
"step": 519000
},
{
"epoch": 2.2,
"learning_rate": 1.3399615607897512e-05,
"loss": 1.395,
"step": 519500
},
{
"epoch": 2.2,
"learning_rate": 1.3364389058915699e-05,
"loss": 1.4002,
"step": 520000
},
{
"epoch": 2.2,
"learning_rate": 1.3329162509933887e-05,
"loss": 1.3977,
"step": 520500
},
{
"epoch": 2.2,
"learning_rate": 1.3293935960952076e-05,
"loss": 1.4033,
"step": 521000
},
{
"epoch": 2.2,
"learning_rate": 1.3258709411970263e-05,
"loss": 1.3972,
"step": 521500
},
{
"epoch": 2.21,
"learning_rate": 1.3223482862988451e-05,
"loss": 1.3856,
"step": 522000
},
{
"epoch": 2.21,
"learning_rate": 1.3188256314006638e-05,
"loss": 1.3869,
"step": 522500
},
{
"epoch": 2.21,
"learning_rate": 1.315302976502483e-05,
"loss": 1.3738,
"step": 523000
},
{
"epoch": 2.21,
"learning_rate": 1.3117803216043017e-05,
"loss": 1.3967,
"step": 523500
},
{
"epoch": 2.22,
"learning_rate": 1.3082576667061206e-05,
"loss": 1.3648,
"step": 524000
},
{
"epoch": 2.22,
"learning_rate": 1.3047350118079393e-05,
"loss": 1.3963,
"step": 524500
},
{
"epoch": 2.22,
"learning_rate": 1.3012123569097581e-05,
"loss": 1.3893,
"step": 525000
},
{
"epoch": 2.22,
"learning_rate": 1.297689702011577e-05,
"loss": 1.387,
"step": 525500
},
{
"epoch": 2.22,
"learning_rate": 1.2941670471133957e-05,
"loss": 1.3915,
"step": 526000
},
{
"epoch": 2.23,
"learning_rate": 1.2906443922152145e-05,
"loss": 1.3801,
"step": 526500
},
{
"epoch": 2.23,
"learning_rate": 1.2871217373170332e-05,
"loss": 1.3754,
"step": 527000
},
{
"epoch": 2.23,
"learning_rate": 1.2835990824188521e-05,
"loss": 1.4019,
"step": 527500
},
{
"epoch": 2.23,
"learning_rate": 1.2800764275206708e-05,
"loss": 1.3726,
"step": 528000
},
{
"epoch": 2.23,
"learning_rate": 1.27655377262249e-05,
"loss": 1.3976,
"step": 528500
},
{
"epoch": 2.24,
"learning_rate": 1.2730311177243087e-05,
"loss": 1.3837,
"step": 529000
},
{
"epoch": 2.24,
"learning_rate": 1.2695084628261275e-05,
"loss": 1.3865,
"step": 529500
},
{
"epoch": 2.24,
"learning_rate": 1.2659858079279462e-05,
"loss": 1.3897,
"step": 530000
},
{
"epoch": 2.24,
"learning_rate": 1.262463153029765e-05,
"loss": 1.381,
"step": 530500
},
{
"epoch": 2.24,
"learning_rate": 1.258940498131584e-05,
"loss": 1.3918,
"step": 531000
},
{
"epoch": 2.25,
"learning_rate": 1.2554178432334026e-05,
"loss": 1.3859,
"step": 531500
},
{
"epoch": 2.25,
"learning_rate": 1.2518951883352215e-05,
"loss": 1.3831,
"step": 532000
},
{
"epoch": 2.25,
"learning_rate": 1.2483725334370403e-05,
"loss": 1.3788,
"step": 532500
},
{
"epoch": 2.25,
"learning_rate": 1.2448498785388592e-05,
"loss": 1.3823,
"step": 533000
},
{
"epoch": 2.26,
"learning_rate": 1.2413272236406779e-05,
"loss": 1.3675,
"step": 533500
},
{
"epoch": 2.26,
"learning_rate": 1.2378045687424968e-05,
"loss": 1.3848,
"step": 534000
},
{
"epoch": 2.26,
"learning_rate": 1.2342819138443156e-05,
"loss": 1.3831,
"step": 534500
},
{
"epoch": 2.26,
"learning_rate": 1.2307592589461345e-05,
"loss": 1.3887,
"step": 535000
},
{
"epoch": 2.26,
"learning_rate": 1.2272366040479533e-05,
"loss": 1.3719,
"step": 535500
},
{
"epoch": 2.27,
"learning_rate": 1.223713949149772e-05,
"loss": 1.3636,
"step": 536000
},
{
"epoch": 2.27,
"learning_rate": 1.2201912942515909e-05,
"loss": 1.3885,
"step": 536500
},
{
"epoch": 2.27,
"learning_rate": 1.2166686393534097e-05,
"loss": 1.3739,
"step": 537000
},
{
"epoch": 2.27,
"learning_rate": 1.2131459844552286e-05,
"loss": 1.3801,
"step": 537500
},
{
"epoch": 2.27,
"learning_rate": 1.2096233295570473e-05,
"loss": 1.3783,
"step": 538000
},
{
"epoch": 2.28,
"learning_rate": 1.2061006746588662e-05,
"loss": 1.3687,
"step": 538500
},
{
"epoch": 2.28,
"learning_rate": 1.202578019760685e-05,
"loss": 1.3806,
"step": 539000
},
{
"epoch": 2.28,
"learning_rate": 1.1990553648625039e-05,
"loss": 1.3965,
"step": 539500
},
{
"epoch": 2.28,
"learning_rate": 1.1955327099643226e-05,
"loss": 1.3663,
"step": 540000
},
{
"epoch": 2.28,
"learning_rate": 1.1920100550661414e-05,
"loss": 1.3683,
"step": 540500
},
{
"epoch": 2.29,
"learning_rate": 1.1884874001679603e-05,
"loss": 1.3619,
"step": 541000
},
{
"epoch": 2.29,
"learning_rate": 1.184964745269779e-05,
"loss": 1.3862,
"step": 541500
},
{
"epoch": 2.29,
"learning_rate": 1.1814420903715978e-05,
"loss": 1.3779,
"step": 542000
},
{
"epoch": 2.29,
"learning_rate": 1.1779194354734167e-05,
"loss": 1.3827,
"step": 542500
},
{
"epoch": 2.3,
"learning_rate": 1.1743967805752356e-05,
"loss": 1.3755,
"step": 543000
},
{
"epoch": 2.3,
"learning_rate": 1.1708741256770542e-05,
"loss": 1.3804,
"step": 543500
},
{
"epoch": 2.3,
"learning_rate": 1.1673514707788731e-05,
"loss": 1.3846,
"step": 544000
},
{
"epoch": 2.3,
"learning_rate": 1.163828815880692e-05,
"loss": 1.3882,
"step": 544500
},
{
"epoch": 2.3,
"learning_rate": 1.1603061609825108e-05,
"loss": 1.3576,
"step": 545000
},
{
"epoch": 2.31,
"learning_rate": 1.1567835060843297e-05,
"loss": 1.3771,
"step": 545500
},
{
"epoch": 2.31,
"learning_rate": 1.1532608511861484e-05,
"loss": 1.388,
"step": 546000
},
{
"epoch": 2.31,
"learning_rate": 1.1497381962879672e-05,
"loss": 1.3631,
"step": 546500
},
{
"epoch": 2.31,
"learning_rate": 1.146215541389786e-05,
"loss": 1.3813,
"step": 547000
},
{
"epoch": 2.31,
"learning_rate": 1.142692886491605e-05,
"loss": 1.3712,
"step": 547500
},
{
"epoch": 2.32,
"learning_rate": 1.1391702315934236e-05,
"loss": 1.3714,
"step": 548000
},
{
"epoch": 2.32,
"learning_rate": 1.1356475766952425e-05,
"loss": 1.3843,
"step": 548500
},
{
"epoch": 2.32,
"learning_rate": 1.1321249217970614e-05,
"loss": 1.3849,
"step": 549000
},
{
"epoch": 2.32,
"learning_rate": 1.12860226689888e-05,
"loss": 1.3717,
"step": 549500
},
{
"epoch": 2.32,
"learning_rate": 1.125079612000699e-05,
"loss": 1.3724,
"step": 550000
},
{
"epoch": 2.33,
"learning_rate": 1.1215569571025178e-05,
"loss": 1.3741,
"step": 550500
},
{
"epoch": 2.33,
"learning_rate": 1.1180343022043366e-05,
"loss": 1.3816,
"step": 551000
},
{
"epoch": 2.33,
"learning_rate": 1.1145116473061553e-05,
"loss": 1.3747,
"step": 551500
},
{
"epoch": 2.33,
"learning_rate": 1.1109889924079742e-05,
"loss": 1.3786,
"step": 552000
},
{
"epoch": 2.34,
"learning_rate": 1.107466337509793e-05,
"loss": 1.3658,
"step": 552500
},
{
"epoch": 2.34,
"learning_rate": 1.1039436826116119e-05,
"loss": 1.3784,
"step": 553000
},
{
"epoch": 2.34,
"learning_rate": 1.1004210277134306e-05,
"loss": 1.373,
"step": 553500
},
{
"epoch": 2.34,
"learning_rate": 1.0968983728152494e-05,
"loss": 1.3742,
"step": 554000
},
{
"epoch": 2.34,
"learning_rate": 1.0933757179170683e-05,
"loss": 1.3781,
"step": 554500
},
{
"epoch": 2.35,
"learning_rate": 1.0898530630188872e-05,
"loss": 1.3748,
"step": 555000
},
{
"epoch": 2.35,
"learning_rate": 1.086330408120706e-05,
"loss": 1.3886,
"step": 555500
},
{
"epoch": 2.35,
"learning_rate": 1.0828077532225247e-05,
"loss": 1.3756,
"step": 556000
},
{
"epoch": 2.35,
"learning_rate": 1.0792850983243436e-05,
"loss": 1.3639,
"step": 556500
},
{
"epoch": 2.35,
"learning_rate": 1.0757624434261623e-05,
"loss": 1.3748,
"step": 557000
},
{
"epoch": 2.36,
"learning_rate": 1.0722397885279813e-05,
"loss": 1.3738,
"step": 557500
},
{
"epoch": 2.36,
"learning_rate": 1.0687171336298e-05,
"loss": 1.3697,
"step": 558000
},
{
"epoch": 2.36,
"learning_rate": 1.0651944787316188e-05,
"loss": 1.3655,
"step": 558500
},
{
"epoch": 2.36,
"learning_rate": 1.0616718238334377e-05,
"loss": 1.374,
"step": 559000
},
{
"epoch": 2.37,
"learning_rate": 1.0581491689352564e-05,
"loss": 1.3787,
"step": 559500
},
{
"epoch": 2.37,
"learning_rate": 1.0546265140370754e-05,
"loss": 1.3725,
"step": 560000
},
{
"epoch": 2.37,
"learning_rate": 1.0511038591388941e-05,
"loss": 1.3597,
"step": 560500
},
{
"epoch": 2.37,
"learning_rate": 1.047581204240713e-05,
"loss": 1.3592,
"step": 561000
},
{
"epoch": 2.37,
"learning_rate": 1.0440585493425317e-05,
"loss": 1.3705,
"step": 561500
},
{
"epoch": 2.38,
"learning_rate": 1.0405358944443505e-05,
"loss": 1.3599,
"step": 562000
},
{
"epoch": 2.38,
"learning_rate": 1.0370132395461694e-05,
"loss": 1.3682,
"step": 562500
},
{
"epoch": 2.38,
"learning_rate": 1.0334905846479882e-05,
"loss": 1.3778,
"step": 563000
},
{
"epoch": 2.38,
"learning_rate": 1.029967929749807e-05,
"loss": 1.3691,
"step": 563500
},
{
"epoch": 2.38,
"learning_rate": 1.0264452748516258e-05,
"loss": 1.3772,
"step": 564000
},
{
"epoch": 2.39,
"learning_rate": 1.0229226199534447e-05,
"loss": 1.3628,
"step": 564500
},
{
"epoch": 2.39,
"learning_rate": 1.0193999650552633e-05,
"loss": 1.3647,
"step": 565000
},
{
"epoch": 2.39,
"learning_rate": 1.0158773101570824e-05,
"loss": 1.3719,
"step": 565500
},
{
"epoch": 2.39,
"learning_rate": 1.012354655258901e-05,
"loss": 1.3603,
"step": 566000
},
{
"epoch": 2.39,
"learning_rate": 1.00883200036072e-05,
"loss": 1.3517,
"step": 566500
},
{
"epoch": 2.4,
"learning_rate": 1.0053093454625386e-05,
"loss": 1.359,
"step": 567000
},
{
"epoch": 2.4,
"learning_rate": 1.0017866905643575e-05,
"loss": 1.3818,
"step": 567500
},
{
"epoch": 2.4,
"learning_rate": 9.982640356661763e-06,
"loss": 1.362,
"step": 568000
},
{
"epoch": 2.4,
"learning_rate": 9.947413807679952e-06,
"loss": 1.3738,
"step": 568500
},
{
"epoch": 2.41,
"learning_rate": 9.91218725869814e-06,
"loss": 1.3643,
"step": 569000
},
{
"epoch": 2.41,
"learning_rate": 9.876960709716327e-06,
"loss": 1.3711,
"step": 569500
},
{
"epoch": 2.41,
"learning_rate": 9.841734160734516e-06,
"loss": 1.353,
"step": 570000
},
{
"epoch": 2.41,
"learning_rate": 9.806507611752705e-06,
"loss": 1.3638,
"step": 570500
},
{
"epoch": 2.41,
"learning_rate": 9.771281062770893e-06,
"loss": 1.3618,
"step": 571000
},
{
"epoch": 2.42,
"learning_rate": 9.73605451378908e-06,
"loss": 1.3599,
"step": 571500
},
{
"epoch": 2.42,
"learning_rate": 9.700827964807269e-06,
"loss": 1.3479,
"step": 572000
},
{
"epoch": 2.42,
"learning_rate": 9.665601415825457e-06,
"loss": 1.3701,
"step": 572500
},
{
"epoch": 2.42,
"learning_rate": 9.630374866843646e-06,
"loss": 1.3582,
"step": 573000
},
{
"epoch": 2.42,
"learning_rate": 9.595148317861835e-06,
"loss": 1.3629,
"step": 573500
},
{
"epoch": 2.43,
"learning_rate": 9.559921768880021e-06,
"loss": 1.3558,
"step": 574000
},
{
"epoch": 2.43,
"learning_rate": 9.52469521989821e-06,
"loss": 1.3495,
"step": 574500
},
{
"epoch": 2.43,
"learning_rate": 9.489468670916397e-06,
"loss": 1.369,
"step": 575000
},
{
"epoch": 2.43,
"learning_rate": 9.454242121934587e-06,
"loss": 1.3572,
"step": 575500
},
{
"epoch": 2.43,
"learning_rate": 9.419015572952774e-06,
"loss": 1.3634,
"step": 576000
},
{
"epoch": 2.44,
"learning_rate": 9.383789023970963e-06,
"loss": 1.3584,
"step": 576500
},
{
"epoch": 2.44,
"learning_rate": 9.34856247498915e-06,
"loss": 1.3633,
"step": 577000
},
{
"epoch": 2.44,
"learning_rate": 9.313335926007338e-06,
"loss": 1.3648,
"step": 577500
},
{
"epoch": 2.44,
"learning_rate": 9.278109377025527e-06,
"loss": 1.3428,
"step": 578000
},
{
"epoch": 2.45,
"learning_rate": 9.242882828043715e-06,
"loss": 1.37,
"step": 578500
},
{
"epoch": 2.45,
"learning_rate": 9.207656279061904e-06,
"loss": 1.3493,
"step": 579000
},
{
"epoch": 2.45,
"learning_rate": 9.172429730080091e-06,
"loss": 1.357,
"step": 579500
},
{
"epoch": 2.45,
"learning_rate": 9.13720318109828e-06,
"loss": 1.3635,
"step": 580000
},
{
"epoch": 2.45,
"learning_rate": 9.101976632116468e-06,
"loss": 1.3587,
"step": 580500
},
{
"epoch": 2.46,
"learning_rate": 9.066750083134657e-06,
"loss": 1.3605,
"step": 581000
},
{
"epoch": 2.46,
"learning_rate": 9.031523534152844e-06,
"loss": 1.3651,
"step": 581500
},
{
"epoch": 2.46,
"learning_rate": 8.996296985171032e-06,
"loss": 1.369,
"step": 582000
},
{
"epoch": 2.46,
"learning_rate": 8.96107043618922e-06,
"loss": 1.3484,
"step": 582500
},
{
"epoch": 2.46,
"learning_rate": 8.92584388720741e-06,
"loss": 1.3682,
"step": 583000
},
{
"epoch": 2.47,
"learning_rate": 8.890617338225598e-06,
"loss": 1.364,
"step": 583500
},
{
"epoch": 2.47,
"learning_rate": 8.855390789243785e-06,
"loss": 1.3634,
"step": 584000
},
{
"epoch": 2.47,
"learning_rate": 8.820164240261973e-06,
"loss": 1.354,
"step": 584500
},
{
"epoch": 2.47,
"learning_rate": 8.78493769128016e-06,
"loss": 1.3498,
"step": 585000
},
{
"epoch": 2.48,
"learning_rate": 8.74971114229835e-06,
"loss": 1.3625,
"step": 585500
},
{
"epoch": 2.48,
"learning_rate": 8.714484593316538e-06,
"loss": 1.3587,
"step": 586000
},
{
"epoch": 2.48,
"learning_rate": 8.679258044334726e-06,
"loss": 1.3632,
"step": 586500
},
{
"epoch": 2.48,
"learning_rate": 8.644031495352913e-06,
"loss": 1.3528,
"step": 587000
},
{
"epoch": 2.48,
"learning_rate": 8.608804946371102e-06,
"loss": 1.3698,
"step": 587500
},
{
"epoch": 2.49,
"learning_rate": 8.57357839738929e-06,
"loss": 1.3604,
"step": 588000
},
{
"epoch": 2.49,
"learning_rate": 8.538351848407479e-06,
"loss": 1.361,
"step": 588500
},
{
"epoch": 2.49,
"learning_rate": 8.503125299425667e-06,
"loss": 1.3603,
"step": 589000
},
{
"epoch": 2.49,
"learning_rate": 8.467898750443854e-06,
"loss": 1.3618,
"step": 589500
},
{
"epoch": 2.49,
"learning_rate": 8.432672201462043e-06,
"loss": 1.3444,
"step": 590000
},
{
"epoch": 2.5,
"learning_rate": 8.39744565248023e-06,
"loss": 1.3396,
"step": 590500
},
{
"epoch": 2.5,
"learning_rate": 8.36221910349842e-06,
"loss": 1.3527,
"step": 591000
},
{
"epoch": 2.5,
"learning_rate": 8.326992554516607e-06,
"loss": 1.3589,
"step": 591500
},
{
"epoch": 2.5,
"learning_rate": 8.291766005534796e-06,
"loss": 1.3463,
"step": 592000
},
{
"epoch": 2.5,
"learning_rate": 8.256539456552984e-06,
"loss": 1.3477,
"step": 592500
},
{
"epoch": 2.51,
"learning_rate": 8.221312907571171e-06,
"loss": 1.3451,
"step": 593000
},
{
"epoch": 2.51,
"learning_rate": 8.186086358589361e-06,
"loss": 1.3586,
"step": 593500
},
{
"epoch": 2.51,
"learning_rate": 8.150859809607548e-06,
"loss": 1.3506,
"step": 594000
},
{
"epoch": 2.51,
"learning_rate": 8.115633260625737e-06,
"loss": 1.3519,
"step": 594500
},
{
"epoch": 2.52,
"learning_rate": 8.080406711643924e-06,
"loss": 1.3391,
"step": 595000
},
{
"epoch": 2.52,
"learning_rate": 8.045180162662112e-06,
"loss": 1.3489,
"step": 595500
},
{
"epoch": 2.52,
"learning_rate": 8.009953613680301e-06,
"loss": 1.3505,
"step": 596000
},
{
"epoch": 2.52,
"learning_rate": 7.97472706469849e-06,
"loss": 1.3527,
"step": 596500
},
{
"epoch": 2.52,
"learning_rate": 7.939500515716678e-06,
"loss": 1.3402,
"step": 597000
},
{
"epoch": 2.53,
"learning_rate": 7.904273966734865e-06,
"loss": 1.3514,
"step": 597500
},
{
"epoch": 2.53,
"learning_rate": 7.869047417753054e-06,
"loss": 1.3529,
"step": 598000
},
{
"epoch": 2.53,
"learning_rate": 7.833820868771242e-06,
"loss": 1.361,
"step": 598500
},
{
"epoch": 2.53,
"learning_rate": 7.798594319789431e-06,
"loss": 1.3407,
"step": 599000
},
{
"epoch": 2.53,
"learning_rate": 7.763367770807618e-06,
"loss": 1.3368,
"step": 599500
},
{
"epoch": 2.54,
"learning_rate": 7.728141221825806e-06,
"loss": 1.3491,
"step": 600000
},
{
"epoch": 2.54,
"learning_rate": 7.692914672843993e-06,
"loss": 1.3582,
"step": 600500
},
{
"epoch": 2.54,
"learning_rate": 7.657688123862184e-06,
"loss": 1.354,
"step": 601000
},
{
"epoch": 2.54,
"learning_rate": 7.622461574880371e-06,
"loss": 1.3631,
"step": 601500
},
{
"epoch": 2.54,
"learning_rate": 7.587235025898559e-06,
"loss": 1.354,
"step": 602000
},
{
"epoch": 2.55,
"learning_rate": 7.552008476916747e-06,
"loss": 1.3508,
"step": 602500
},
{
"epoch": 2.55,
"learning_rate": 7.516781927934935e-06,
"loss": 1.3408,
"step": 603000
},
{
"epoch": 2.55,
"learning_rate": 7.481555378953124e-06,
"loss": 1.3407,
"step": 603500
},
{
"epoch": 2.55,
"learning_rate": 7.446328829971312e-06,
"loss": 1.3504,
"step": 604000
},
{
"epoch": 2.56,
"learning_rate": 7.4111022809895e-06,
"loss": 1.3384,
"step": 604500
},
{
"epoch": 2.56,
"learning_rate": 7.375875732007688e-06,
"loss": 1.3482,
"step": 605000
},
{
"epoch": 2.56,
"learning_rate": 7.340649183025876e-06,
"loss": 1.3527,
"step": 605500
},
{
"epoch": 2.56,
"learning_rate": 7.305422634044065e-06,
"loss": 1.3469,
"step": 606000
},
{
"epoch": 2.56,
"learning_rate": 7.270196085062253e-06,
"loss": 1.3572,
"step": 606500
},
{
"epoch": 2.57,
"learning_rate": 7.234969536080441e-06,
"loss": 1.3329,
"step": 607000
},
{
"epoch": 2.57,
"learning_rate": 7.1997429870986286e-06,
"loss": 1.3468,
"step": 607500
},
{
"epoch": 2.57,
"learning_rate": 7.164516438116816e-06,
"loss": 1.3374,
"step": 608000
},
{
"epoch": 2.57,
"learning_rate": 7.129289889135006e-06,
"loss": 1.3521,
"step": 608500
},
{
"epoch": 2.57,
"learning_rate": 7.0940633401531935e-06,
"loss": 1.3566,
"step": 609000
},
{
"epoch": 2.58,
"learning_rate": 7.058836791171382e-06,
"loss": 1.3346,
"step": 609500
},
{
"epoch": 2.58,
"learning_rate": 7.02361024218957e-06,
"loss": 1.3446,
"step": 610000
},
{
"epoch": 2.58,
"learning_rate": 6.988383693207758e-06,
"loss": 1.3393,
"step": 610500
},
{
"epoch": 2.58,
"learning_rate": 6.953157144225947e-06,
"loss": 1.3335,
"step": 611000
},
{
"epoch": 2.58,
"learning_rate": 6.917930595244135e-06,
"loss": 1.3398,
"step": 611500
},
{
"epoch": 2.59,
"learning_rate": 6.8827040462623225e-06,
"loss": 1.3617,
"step": 612000
},
{
"epoch": 2.59,
"learning_rate": 6.84747749728051e-06,
"loss": 1.333,
"step": 612500
},
{
"epoch": 2.59,
"learning_rate": 6.812250948298699e-06,
"loss": 1.3367,
"step": 613000
},
{
"epoch": 2.59,
"learning_rate": 6.7770243993168875e-06,
"loss": 1.3456,
"step": 613500
},
{
"epoch": 2.6,
"learning_rate": 6.741797850335075e-06,
"loss": 1.3506,
"step": 614000
},
{
"epoch": 2.6,
"learning_rate": 6.706571301353264e-06,
"loss": 1.3499,
"step": 614500
},
{
"epoch": 2.6,
"learning_rate": 6.671344752371452e-06,
"loss": 1.3481,
"step": 615000
},
{
"epoch": 2.6,
"learning_rate": 6.636118203389639e-06,
"loss": 1.3379,
"step": 615500
},
{
"epoch": 2.6,
"learning_rate": 6.600891654407827e-06,
"loss": 1.3317,
"step": 616000
},
{
"epoch": 2.61,
"learning_rate": 6.5656651054260165e-06,
"loss": 1.3541,
"step": 616500
},
{
"epoch": 2.61,
"learning_rate": 6.530438556444204e-06,
"loss": 1.34,
"step": 617000
},
{
"epoch": 2.61,
"learning_rate": 6.495212007462392e-06,
"loss": 1.3567,
"step": 617500
},
{
"epoch": 2.61,
"learning_rate": 6.459985458480581e-06,
"loss": 1.3509,
"step": 618000
},
{
"epoch": 2.61,
"learning_rate": 6.424758909498768e-06,
"loss": 1.3261,
"step": 618500
},
{
"epoch": 2.62,
"learning_rate": 6.389532360516958e-06,
"loss": 1.3413,
"step": 619000
},
{
"epoch": 2.62,
"learning_rate": 6.3543058115351456e-06,
"loss": 1.337,
"step": 619500
},
{
"epoch": 2.62,
"learning_rate": 6.319079262553333e-06,
"loss": 1.3517,
"step": 620000
},
{
"epoch": 2.62,
"learning_rate": 6.283852713571521e-06,
"loss": 1.3456,
"step": 620500
},
{
"epoch": 2.63,
"learning_rate": 6.24862616458971e-06,
"loss": 1.3509,
"step": 621000
},
{
"epoch": 2.63,
"learning_rate": 6.213399615607897e-06,
"loss": 1.3291,
"step": 621500
},
{
"epoch": 2.63,
"learning_rate": 6.178173066626086e-06,
"loss": 1.3451,
"step": 622000
},
{
"epoch": 2.63,
"learning_rate": 6.142946517644274e-06,
"loss": 1.3332,
"step": 622500
},
{
"epoch": 2.63,
"learning_rate": 6.107719968662462e-06,
"loss": 1.3295,
"step": 623000
},
{
"epoch": 2.64,
"learning_rate": 6.072493419680651e-06,
"loss": 1.3317,
"step": 623500
},
{
"epoch": 2.64,
"learning_rate": 6.037266870698839e-06,
"loss": 1.3144,
"step": 624000
},
{
"epoch": 2.64,
"learning_rate": 6.002040321717027e-06,
"loss": 1.349,
"step": 624500
},
{
"epoch": 2.64,
"learning_rate": 5.966813772735215e-06,
"loss": 1.3405,
"step": 625000
},
{
"epoch": 2.64,
"learning_rate": 5.931587223753403e-06,
"loss": 1.345,
"step": 625500
},
{
"epoch": 2.65,
"learning_rate": 5.896360674771591e-06,
"loss": 1.3336,
"step": 626000
},
{
"epoch": 2.65,
"learning_rate": 5.861134125789779e-06,
"loss": 1.3559,
"step": 626500
},
{
"epoch": 2.65,
"learning_rate": 5.825907576807968e-06,
"loss": 1.3467,
"step": 627000
},
{
"epoch": 2.65,
"learning_rate": 5.7906810278261555e-06,
"loss": 1.3416,
"step": 627500
},
{
"epoch": 2.65,
"learning_rate": 5.755454478844344e-06,
"loss": 1.3367,
"step": 628000
},
{
"epoch": 2.66,
"learning_rate": 5.720227929862533e-06,
"loss": 1.3412,
"step": 628500
},
{
"epoch": 2.66,
"learning_rate": 5.68500138088072e-06,
"loss": 1.331,
"step": 629000
},
{
"epoch": 2.66,
"learning_rate": 5.649774831898909e-06,
"loss": 1.3346,
"step": 629500
},
{
"epoch": 2.66,
"learning_rate": 5.614548282917097e-06,
"loss": 1.3458,
"step": 630000
},
{
"epoch": 2.67,
"learning_rate": 5.5793217339352845e-06,
"loss": 1.3439,
"step": 630500
},
{
"epoch": 2.67,
"learning_rate": 5.544095184953473e-06,
"loss": 1.3298,
"step": 631000
},
{
"epoch": 2.67,
"learning_rate": 5.508868635971661e-06,
"loss": 1.3438,
"step": 631500
},
{
"epoch": 2.67,
"learning_rate": 5.4736420869898495e-06,
"loss": 1.3493,
"step": 632000
},
{
"epoch": 2.67,
"learning_rate": 5.438415538008037e-06,
"loss": 1.3392,
"step": 632500
},
{
"epoch": 2.68,
"learning_rate": 5.403188989026226e-06,
"loss": 1.3413,
"step": 633000
},
{
"epoch": 2.68,
"learning_rate": 5.367962440044414e-06,
"loss": 1.3222,
"step": 633500
},
{
"epoch": 2.68,
"learning_rate": 5.332735891062602e-06,
"loss": 1.3396,
"step": 634000
},
{
"epoch": 2.68,
"learning_rate": 5.29750934208079e-06,
"loss": 1.3346,
"step": 634500
},
{
"epoch": 2.68,
"learning_rate": 5.2622827930989785e-06,
"loss": 1.3346,
"step": 635000
},
{
"epoch": 2.69,
"learning_rate": 5.227056244117166e-06,
"loss": 1.3347,
"step": 635500
},
{
"epoch": 2.69,
"learning_rate": 5.191829695135355e-06,
"loss": 1.3412,
"step": 636000
},
{
"epoch": 2.69,
"learning_rate": 5.156603146153543e-06,
"loss": 1.3337,
"step": 636500
},
{
"epoch": 2.69,
"learning_rate": 5.121376597171731e-06,
"loss": 1.3399,
"step": 637000
},
{
"epoch": 2.69,
"learning_rate": 5.086150048189919e-06,
"loss": 1.3279,
"step": 637500
},
{
"epoch": 2.7,
"learning_rate": 5.0509234992081075e-06,
"loss": 1.3261,
"step": 638000
},
{
"epoch": 2.7,
"learning_rate": 5.015696950226296e-06,
"loss": 1.3432,
"step": 638500
},
{
"epoch": 2.7,
"learning_rate": 4.980470401244484e-06,
"loss": 1.3482,
"step": 639000
},
{
"epoch": 2.7,
"learning_rate": 4.945243852262672e-06,
"loss": 1.3417,
"step": 639500
},
{
"epoch": 2.71,
"learning_rate": 4.91001730328086e-06,
"loss": 1.3162,
"step": 640000
},
{
"epoch": 2.71,
"learning_rate": 4.874790754299048e-06,
"loss": 1.3282,
"step": 640500
},
{
"epoch": 2.71,
"learning_rate": 4.8395642053172366e-06,
"loss": 1.3295,
"step": 641000
},
{
"epoch": 2.71,
"learning_rate": 4.804337656335424e-06,
"loss": 1.3344,
"step": 641500
},
{
"epoch": 2.71,
"learning_rate": 4.769111107353613e-06,
"loss": 1.3251,
"step": 642000
},
{
"epoch": 2.72,
"learning_rate": 4.7338845583718015e-06,
"loss": 1.334,
"step": 642500
},
{
"epoch": 2.72,
"learning_rate": 4.698658009389989e-06,
"loss": 1.3487,
"step": 643000
},
{
"epoch": 2.72,
"learning_rate": 4.663431460408178e-06,
"loss": 1.3221,
"step": 643500
},
{
"epoch": 2.72,
"learning_rate": 4.628204911426366e-06,
"loss": 1.3351,
"step": 644000
},
{
"epoch": 2.72,
"learning_rate": 4.592978362444553e-06,
"loss": 1.3307,
"step": 644500
},
{
"epoch": 2.73,
"learning_rate": 4.557751813462742e-06,
"loss": 1.3378,
"step": 645000
},
{
"epoch": 2.73,
"learning_rate": 4.52252526448093e-06,
"loss": 1.3297,
"step": 645500
},
{
"epoch": 2.73,
"learning_rate": 4.487298715499118e-06,
"loss": 1.3219,
"step": 646000
},
{
"epoch": 2.73,
"learning_rate": 4.452072166517306e-06,
"loss": 1.32,
"step": 646500
},
{
"epoch": 2.73,
"learning_rate": 4.416845617535495e-06,
"loss": 1.3336,
"step": 647000
},
{
"epoch": 2.74,
"learning_rate": 4.381619068553683e-06,
"loss": 1.3265,
"step": 647500
},
{
"epoch": 2.74,
"learning_rate": 4.346392519571871e-06,
"loss": 1.3393,
"step": 648000
},
{
"epoch": 2.74,
"learning_rate": 4.311165970590059e-06,
"loss": 1.3331,
"step": 648500
},
{
"epoch": 2.74,
"learning_rate": 4.275939421608247e-06,
"loss": 1.3354,
"step": 649000
},
{
"epoch": 2.75,
"learning_rate": 4.240712872626435e-06,
"loss": 1.3338,
"step": 649500
},
{
"epoch": 2.75,
"learning_rate": 4.205486323644624e-06,
"loss": 1.3358,
"step": 650000
},
{
"epoch": 2.75,
"learning_rate": 4.1702597746628114e-06,
"loss": 1.3439,
"step": 650500
},
{
"epoch": 2.75,
"learning_rate": 4.135033225680999e-06,
"loss": 1.3401,
"step": 651000
},
{
"epoch": 2.75,
"learning_rate": 4.099806676699188e-06,
"loss": 1.3163,
"step": 651500
},
{
"epoch": 2.76,
"learning_rate": 4.064580127717376e-06,
"loss": 1.3393,
"step": 652000
},
{
"epoch": 2.76,
"learning_rate": 4.029353578735565e-06,
"loss": 1.3251,
"step": 652500
},
{
"epoch": 2.76,
"learning_rate": 3.994127029753753e-06,
"loss": 1.3301,
"step": 653000
},
{
"epoch": 2.76,
"learning_rate": 3.9589004807719405e-06,
"loss": 1.3143,
"step": 653500
},
{
"epoch": 2.76,
"learning_rate": 3.923673931790129e-06,
"loss": 1.3458,
"step": 654000
},
{
"epoch": 2.77,
"learning_rate": 3.888447382808317e-06,
"loss": 1.3413,
"step": 654500
},
{
"epoch": 2.77,
"learning_rate": 3.853220833826505e-06,
"loss": 1.3274,
"step": 655000
},
{
"epoch": 2.77,
"learning_rate": 3.817994284844693e-06,
"loss": 1.324,
"step": 655500
},
{
"epoch": 2.77,
"learning_rate": 3.7827677358628813e-06,
"loss": 1.3265,
"step": 656000
},
{
"epoch": 2.78,
"learning_rate": 3.74754118688107e-06,
"loss": 1.3116,
"step": 656500
},
{
"epoch": 2.78,
"learning_rate": 3.7123146378992577e-06,
"loss": 1.3404,
"step": 657000
},
{
"epoch": 2.78,
"learning_rate": 3.6770880889174463e-06,
"loss": 1.3302,
"step": 657500
},
{
"epoch": 2.78,
"learning_rate": 3.6418615399356344e-06,
"loss": 1.3296,
"step": 658000
},
{
"epoch": 2.78,
"learning_rate": 3.606634990953822e-06,
"loss": 1.3141,
"step": 658500
},
{
"epoch": 2.79,
"learning_rate": 3.571408441972011e-06,
"loss": 1.3288,
"step": 659000
},
{
"epoch": 2.79,
"learning_rate": 3.5361818929901985e-06,
"loss": 1.3337,
"step": 659500
},
{
"epoch": 2.79,
"learning_rate": 3.5009553440083867e-06,
"loss": 1.321,
"step": 660000
},
{
"epoch": 2.79,
"learning_rate": 3.4657287950265753e-06,
"loss": 1.3244,
"step": 660500
},
{
"epoch": 2.79,
"learning_rate": 3.430502246044763e-06,
"loss": 1.3255,
"step": 661000
},
{
"epoch": 2.8,
"learning_rate": 3.3952756970629517e-06,
"loss": 1.3326,
"step": 661500
},
{
"epoch": 2.8,
"learning_rate": 3.3600491480811394e-06,
"loss": 1.3278,
"step": 662000
},
{
"epoch": 2.8,
"learning_rate": 3.3248225990993276e-06,
"loss": 1.3335,
"step": 662500
},
{
"epoch": 2.8,
"learning_rate": 3.289596050117516e-06,
"loss": 1.3376,
"step": 663000
},
{
"epoch": 2.8,
"learning_rate": 3.254369501135704e-06,
"loss": 1.3085,
"step": 663500
},
{
"epoch": 2.81,
"learning_rate": 3.2191429521538925e-06,
"loss": 1.3203,
"step": 664000
},
{
"epoch": 2.81,
"learning_rate": 3.1839164031720803e-06,
"loss": 1.3281,
"step": 664500
},
{
"epoch": 2.81,
"learning_rate": 3.1486898541902684e-06,
"loss": 1.3187,
"step": 665000
},
{
"epoch": 2.81,
"learning_rate": 3.113463305208457e-06,
"loss": 1.3373,
"step": 665500
},
{
"epoch": 2.82,
"learning_rate": 3.0782367562266448e-06,
"loss": 1.3192,
"step": 666000
},
{
"epoch": 2.82,
"learning_rate": 3.043010207244833e-06,
"loss": 1.3173,
"step": 666500
},
{
"epoch": 2.82,
"learning_rate": 3.007783658263021e-06,
"loss": 1.307,
"step": 667000
},
{
"epoch": 2.82,
"learning_rate": 2.9725571092812097e-06,
"loss": 1.3271,
"step": 667500
},
{
"epoch": 2.82,
"learning_rate": 2.937330560299398e-06,
"loss": 1.3362,
"step": 668000
},
{
"epoch": 2.83,
"learning_rate": 2.9021040113175857e-06,
"loss": 1.3479,
"step": 668500
},
{
"epoch": 2.83,
"learning_rate": 2.866877462335774e-06,
"loss": 1.3371,
"step": 669000
},
{
"epoch": 2.83,
"learning_rate": 2.831650913353962e-06,
"loss": 1.3295,
"step": 669500
},
{
"epoch": 2.83,
"learning_rate": 2.7964243643721506e-06,
"loss": 1.3291,
"step": 670000
},
{
"epoch": 2.83,
"learning_rate": 2.7611978153903383e-06,
"loss": 1.3149,
"step": 670500
},
{
"epoch": 2.84,
"learning_rate": 2.7259712664085265e-06,
"loss": 1.325,
"step": 671000
},
{
"epoch": 2.84,
"learning_rate": 2.6907447174267147e-06,
"loss": 1.3295,
"step": 671500
},
{
"epoch": 2.84,
"learning_rate": 2.655518168444903e-06,
"loss": 1.3222,
"step": 672000
},
{
"epoch": 2.84,
"learning_rate": 2.6202916194630915e-06,
"loss": 1.3174,
"step": 672500
},
{
"epoch": 2.84,
"learning_rate": 2.585065070481279e-06,
"loss": 1.3242,
"step": 673000
},
{
"epoch": 2.85,
"learning_rate": 2.5498385214994674e-06,
"loss": 1.3136,
"step": 673500
},
{
"epoch": 2.85,
"learning_rate": 2.5146119725176556e-06,
"loss": 1.3288,
"step": 674000
},
{
"epoch": 2.85,
"learning_rate": 2.479385423535844e-06,
"loss": 1.3329,
"step": 674500
},
{
"epoch": 2.85,
"learning_rate": 2.444158874554032e-06,
"loss": 1.3102,
"step": 675000
},
{
"epoch": 2.86,
"learning_rate": 2.40893232557222e-06,
"loss": 1.3158,
"step": 675500
},
{
"epoch": 2.86,
"learning_rate": 2.3737057765904082e-06,
"loss": 1.3143,
"step": 676000
},
{
"epoch": 2.86,
"learning_rate": 2.3384792276085964e-06,
"loss": 1.3325,
"step": 676500
},
{
"epoch": 2.86,
"learning_rate": 2.303252678626785e-06,
"loss": 1.3158,
"step": 677000
},
{
"epoch": 2.86,
"learning_rate": 2.2680261296449728e-06,
"loss": 1.3207,
"step": 677500
},
{
"epoch": 2.87,
"learning_rate": 2.232799580663161e-06,
"loss": 1.3307,
"step": 678000
},
{
"epoch": 2.87,
"learning_rate": 2.197573031681349e-06,
"loss": 1.3179,
"step": 678500
},
{
"epoch": 2.87,
"learning_rate": 2.1623464826995373e-06,
"loss": 1.3118,
"step": 679000
},
{
"epoch": 2.87,
"learning_rate": 2.127119933717726e-06,
"loss": 1.344,
"step": 679500
},
{
"epoch": 2.87,
"learning_rate": 2.0918933847359136e-06,
"loss": 1.3192,
"step": 680000
},
{
"epoch": 2.88,
"learning_rate": 2.056666835754102e-06,
"loss": 1.3186,
"step": 680500
},
{
"epoch": 2.88,
"learning_rate": 2.02144028677229e-06,
"loss": 1.3112,
"step": 681000
},
{
"epoch": 2.88,
"learning_rate": 1.986213737790478e-06,
"loss": 1.3161,
"step": 681500
},
{
"epoch": 2.88,
"learning_rate": 1.9509871888086663e-06,
"loss": 1.3313,
"step": 682000
},
{
"epoch": 2.89,
"learning_rate": 1.9157606398268545e-06,
"loss": 1.3213,
"step": 682500
},
{
"epoch": 2.89,
"learning_rate": 1.8805340908450427e-06,
"loss": 1.3171,
"step": 683000
},
{
"epoch": 2.89,
"learning_rate": 1.845307541863231e-06,
"loss": 1.3279,
"step": 683500
},
{
"epoch": 2.89,
"learning_rate": 1.8100809928814192e-06,
"loss": 1.3207,
"step": 684000
},
{
"epoch": 2.89,
"learning_rate": 1.7748544438996072e-06,
"loss": 1.3084,
"step": 684500
},
{
"epoch": 2.9,
"learning_rate": 1.7396278949177954e-06,
"loss": 1.3133,
"step": 685000
},
{
"epoch": 2.9,
"learning_rate": 1.7044013459359835e-06,
"loss": 1.3074,
"step": 685500
},
{
"epoch": 2.9,
"learning_rate": 1.669174796954172e-06,
"loss": 1.3178,
"step": 686000
},
{
"epoch": 2.9,
"learning_rate": 1.63394824797236e-06,
"loss": 1.3199,
"step": 686500
},
{
"epoch": 2.9,
"learning_rate": 1.598721698990548e-06,
"loss": 1.3263,
"step": 687000
},
{
"epoch": 2.91,
"learning_rate": 1.5634951500087362e-06,
"loss": 1.3253,
"step": 687500
},
{
"epoch": 2.91,
"learning_rate": 1.5282686010269244e-06,
"loss": 1.3111,
"step": 688000
},
{
"epoch": 2.91,
"learning_rate": 1.4930420520451126e-06,
"loss": 1.3213,
"step": 688500
},
{
"epoch": 2.91,
"learning_rate": 1.4578155030633007e-06,
"loss": 1.3083,
"step": 689000
},
{
"epoch": 2.91,
"learning_rate": 1.422588954081489e-06,
"loss": 1.3245,
"step": 689500
},
{
"epoch": 2.92,
"learning_rate": 1.387362405099677e-06,
"loss": 1.3098,
"step": 690000
},
{
"epoch": 2.92,
"learning_rate": 1.3521358561178653e-06,
"loss": 1.3275,
"step": 690500
},
{
"epoch": 2.92,
"learning_rate": 1.3169093071360534e-06,
"loss": 1.3205,
"step": 691000
},
{
"epoch": 2.92,
"learning_rate": 1.2816827581542416e-06,
"loss": 1.3193,
"step": 691500
},
{
"epoch": 2.93,
"learning_rate": 1.2464562091724298e-06,
"loss": 1.3155,
"step": 692000
},
{
"epoch": 2.93,
"learning_rate": 1.211229660190618e-06,
"loss": 1.3161,
"step": 692500
},
{
"epoch": 2.93,
"learning_rate": 1.1760031112088061e-06,
"loss": 1.324,
"step": 693000
},
{
"epoch": 2.93,
"learning_rate": 1.1407765622269943e-06,
"loss": 1.3231,
"step": 693500
},
{
"epoch": 2.93,
"learning_rate": 1.1055500132451825e-06,
"loss": 1.3215,
"step": 694000
},
{
"epoch": 2.94,
"learning_rate": 1.0703234642633706e-06,
"loss": 1.3236,
"step": 694500
},
{
"epoch": 2.94,
"learning_rate": 1.0350969152815588e-06,
"loss": 1.3201,
"step": 695000
},
{
"epoch": 2.94,
"learning_rate": 9.99870366299747e-07,
"loss": 1.3197,
"step": 695500
},
{
"epoch": 2.94,
"learning_rate": 9.646438173179352e-07,
"loss": 1.3218,
"step": 696000
},
{
"epoch": 2.94,
"learning_rate": 9.294172683361232e-07,
"loss": 1.3271,
"step": 696500
},
{
"epoch": 2.95,
"learning_rate": 8.941907193543115e-07,
"loss": 1.3214,
"step": 697000
},
{
"epoch": 2.95,
"learning_rate": 8.589641703724998e-07,
"loss": 1.3101,
"step": 697500
},
{
"epoch": 2.95,
"learning_rate": 8.237376213906878e-07,
"loss": 1.3195,
"step": 698000
},
{
"epoch": 2.95,
"learning_rate": 7.88511072408876e-07,
"loss": 1.3184,
"step": 698500
},
{
"epoch": 2.95,
"learning_rate": 7.532845234270642e-07,
"loss": 1.3235,
"step": 699000
},
{
"epoch": 2.96,
"learning_rate": 7.180579744452524e-07,
"loss": 1.3289,
"step": 699500
},
{
"epoch": 2.96,
"learning_rate": 6.828314254634405e-07,
"loss": 1.3122,
"step": 700000
},
{
"epoch": 2.96,
"learning_rate": 6.476048764816287e-07,
"loss": 1.3339,
"step": 700500
},
{
"epoch": 2.96,
"learning_rate": 6.123783274998169e-07,
"loss": 1.3216,
"step": 701000
},
{
"epoch": 2.97,
"learning_rate": 5.77151778518005e-07,
"loss": 1.312,
"step": 701500
},
{
"epoch": 2.97,
"learning_rate": 5.419252295361931e-07,
"loss": 1.3186,
"step": 702000
},
{
"epoch": 2.97,
"learning_rate": 5.066986805543813e-07,
"loss": 1.3179,
"step": 702500
},
{
"epoch": 2.97,
"learning_rate": 4.714721315725696e-07,
"loss": 1.3172,
"step": 703000
},
{
"epoch": 2.97,
"learning_rate": 4.3624558259075775e-07,
"loss": 1.314,
"step": 703500
},
{
"epoch": 2.98,
"learning_rate": 4.0101903360894587e-07,
"loss": 1.3203,
"step": 704000
},
{
"epoch": 2.98,
"learning_rate": 3.6579248462713404e-07,
"loss": 1.3128,
"step": 704500
},
{
"epoch": 2.98,
"learning_rate": 3.305659356453222e-07,
"loss": 1.3217,
"step": 705000
},
{
"epoch": 2.98,
"learning_rate": 2.953393866635104e-07,
"loss": 1.318,
"step": 705500
},
{
"epoch": 2.98,
"learning_rate": 2.6011283768169856e-07,
"loss": 1.3209,
"step": 706000
},
{
"epoch": 2.99,
"learning_rate": 2.248862886998867e-07,
"loss": 1.3108,
"step": 706500
},
{
"epoch": 2.99,
"learning_rate": 1.896597397180749e-07,
"loss": 1.3117,
"step": 707000
},
{
"epoch": 2.99,
"learning_rate": 1.5443319073626305e-07,
"loss": 1.3277,
"step": 707500
},
{
"epoch": 2.99,
"learning_rate": 1.1920664175445124e-07,
"loss": 1.3182,
"step": 708000
},
{
"epoch": 2.99,
"learning_rate": 8.39800927726394e-08,
"loss": 1.3135,
"step": 708500
},
{
"epoch": 3.0,
"learning_rate": 4.875354379082757e-08,
"loss": 1.3087,
"step": 709000
},
{
"epoch": 3.0,
"learning_rate": 1.3526994809015742e-08,
"loss": 1.3195,
"step": 709500
},
{
"epoch": 3.0,
"step": 709692,
"total_flos": 6.168001493481062e+18,
"train_runtime": 388027.9373,
"train_samples_per_second": 1.829
}
],
"max_steps": 709692,
"num_train_epochs": 3,
"total_flos": 6.168001493481062e+18,
"trial_name": null,
"trial_params": null
}