EthioLLM-s-70k / trainer_state.json
Seid Muhie Yimam
ethio afro small
46760e7
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 453060,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.9945481834635596e-05,
"loss": 8.2957,
"step": 500
},
{
"epoch": 0.02,
"learning_rate": 4.989030150531939e-05,
"loss": 7.4986,
"step": 1000
},
{
"epoch": 0.03,
"learning_rate": 4.983523153666181e-05,
"loss": 7.1122,
"step": 1500
},
{
"epoch": 0.04,
"learning_rate": 4.978005120734561e-05,
"loss": 6.9308,
"step": 2000
},
{
"epoch": 0.06,
"learning_rate": 4.97248708780294e-05,
"loss": 6.7516,
"step": 2500
},
{
"epoch": 0.07,
"learning_rate": 4.96696905487132e-05,
"loss": 6.6291,
"step": 3000
},
{
"epoch": 0.08,
"learning_rate": 4.9614510219396995e-05,
"loss": 6.5093,
"step": 3500
},
{
"epoch": 0.09,
"learning_rate": 4.9559329890080786e-05,
"loss": 6.3733,
"step": 4000
},
{
"epoch": 0.1,
"learning_rate": 4.9504149560764576e-05,
"loss": 6.3353,
"step": 4500
},
{
"epoch": 0.11,
"learning_rate": 4.944896923144838e-05,
"loss": 6.2526,
"step": 5000
},
{
"epoch": 0.12,
"learning_rate": 4.939378890213217e-05,
"loss": 6.1669,
"step": 5500
},
{
"epoch": 0.13,
"learning_rate": 4.933860857281596e-05,
"loss": 6.1291,
"step": 6000
},
{
"epoch": 0.14,
"learning_rate": 4.928342824349976e-05,
"loss": 6.0292,
"step": 6500
},
{
"epoch": 0.15,
"learning_rate": 4.922824791418356e-05,
"loss": 6.0037,
"step": 7000
},
{
"epoch": 0.17,
"learning_rate": 4.917306758486735e-05,
"loss": 5.8958,
"step": 7500
},
{
"epoch": 0.18,
"learning_rate": 4.9117887255551145e-05,
"loss": 5.8496,
"step": 8000
},
{
"epoch": 0.19,
"learning_rate": 4.9062706926234936e-05,
"loss": 5.8211,
"step": 8500
},
{
"epoch": 0.2,
"learning_rate": 4.9007526596918733e-05,
"loss": 5.7736,
"step": 9000
},
{
"epoch": 0.21,
"learning_rate": 4.895234626760253e-05,
"loss": 5.7172,
"step": 9500
},
{
"epoch": 0.22,
"learning_rate": 4.889716593828632e-05,
"loss": 5.6462,
"step": 10000
},
{
"epoch": 0.23,
"learning_rate": 4.884198560897011e-05,
"loss": 5.6466,
"step": 10500
},
{
"epoch": 0.24,
"learning_rate": 4.8786805279653917e-05,
"loss": 5.5702,
"step": 11000
},
{
"epoch": 0.25,
"learning_rate": 4.873162495033771e-05,
"loss": 5.5162,
"step": 11500
},
{
"epoch": 0.26,
"learning_rate": 4.86764446210215e-05,
"loss": 5.4766,
"step": 12000
},
{
"epoch": 0.28,
"learning_rate": 4.8621264291705296e-05,
"loss": 5.4462,
"step": 12500
},
{
"epoch": 0.29,
"learning_rate": 4.856619432304772e-05,
"loss": 5.4166,
"step": 13000
},
{
"epoch": 0.3,
"learning_rate": 4.851101399373152e-05,
"loss": 5.3868,
"step": 13500
},
{
"epoch": 0.31,
"learning_rate": 4.8455833664415315e-05,
"loss": 5.362,
"step": 14000
},
{
"epoch": 0.32,
"learning_rate": 4.8400653335099106e-05,
"loss": 5.3047,
"step": 14500
},
{
"epoch": 0.33,
"learning_rate": 4.8345473005782897e-05,
"loss": 5.2717,
"step": 15000
},
{
"epoch": 0.34,
"learning_rate": 4.8290292676466694e-05,
"loss": 5.284,
"step": 15500
},
{
"epoch": 0.35,
"learning_rate": 4.823511234715049e-05,
"loss": 5.215,
"step": 16000
},
{
"epoch": 0.36,
"learning_rate": 4.817993201783428e-05,
"loss": 5.1798,
"step": 16500
},
{
"epoch": 0.38,
"learning_rate": 4.812475168851808e-05,
"loss": 5.1993,
"step": 17000
},
{
"epoch": 0.39,
"learning_rate": 4.8069681719860504e-05,
"loss": 5.164,
"step": 17500
},
{
"epoch": 0.4,
"learning_rate": 4.80145013905443e-05,
"loss": 5.1332,
"step": 18000
},
{
"epoch": 0.41,
"learning_rate": 4.79593210612281e-05,
"loss": 5.1316,
"step": 18500
},
{
"epoch": 0.42,
"learning_rate": 4.790414073191189e-05,
"loss": 5.0732,
"step": 19000
},
{
"epoch": 0.43,
"learning_rate": 4.784896040259568e-05,
"loss": 5.0507,
"step": 19500
},
{
"epoch": 0.44,
"learning_rate": 4.779378007327948e-05,
"loss": 5.0318,
"step": 20000
},
{
"epoch": 0.45,
"learning_rate": 4.7738599743963276e-05,
"loss": 4.9986,
"step": 20500
},
{
"epoch": 0.46,
"learning_rate": 4.7683419414647066e-05,
"loss": 5.0123,
"step": 21000
},
{
"epoch": 0.47,
"learning_rate": 4.76283494459895e-05,
"loss": 4.9908,
"step": 21500
},
{
"epoch": 0.49,
"learning_rate": 4.757316911667329e-05,
"loss": 4.9997,
"step": 22000
},
{
"epoch": 0.5,
"learning_rate": 4.7517988787357086e-05,
"loss": 4.9212,
"step": 22500
},
{
"epoch": 0.51,
"learning_rate": 4.7462808458040884e-05,
"loss": 4.9323,
"step": 23000
},
{
"epoch": 0.52,
"learning_rate": 4.7407628128724674e-05,
"loss": 4.9189,
"step": 23500
},
{
"epoch": 0.53,
"learning_rate": 4.7352447799408465e-05,
"loss": 4.8979,
"step": 24000
},
{
"epoch": 0.54,
"learning_rate": 4.729726747009226e-05,
"loss": 4.8994,
"step": 24500
},
{
"epoch": 0.55,
"learning_rate": 4.7242197501434694e-05,
"loss": 4.8811,
"step": 25000
},
{
"epoch": 0.56,
"learning_rate": 4.7187017172118485e-05,
"loss": 4.8343,
"step": 25500
},
{
"epoch": 0.57,
"learning_rate": 4.713183684280228e-05,
"loss": 4.8429,
"step": 26000
},
{
"epoch": 0.58,
"learning_rate": 4.707665651348607e-05,
"loss": 4.7863,
"step": 26500
},
{
"epoch": 0.6,
"learning_rate": 4.702147618416987e-05,
"loss": 4.8158,
"step": 27000
},
{
"epoch": 0.61,
"learning_rate": 4.69664062155123e-05,
"loss": 4.8308,
"step": 27500
},
{
"epoch": 0.62,
"learning_rate": 4.691122588619609e-05,
"loss": 4.7503,
"step": 28000
},
{
"epoch": 0.63,
"learning_rate": 4.685604555687988e-05,
"loss": 4.7542,
"step": 28500
},
{
"epoch": 0.64,
"learning_rate": 4.680086522756368e-05,
"loss": 4.7664,
"step": 29000
},
{
"epoch": 0.65,
"learning_rate": 4.6745795258906105e-05,
"loss": 4.7493,
"step": 29500
},
{
"epoch": 0.66,
"learning_rate": 4.66906149295899e-05,
"loss": 4.7297,
"step": 30000
},
{
"epoch": 0.67,
"learning_rate": 4.66354346002737e-05,
"loss": 4.7293,
"step": 30500
},
{
"epoch": 0.68,
"learning_rate": 4.658025427095749e-05,
"loss": 4.7429,
"step": 31000
},
{
"epoch": 0.7,
"learning_rate": 4.652507394164128e-05,
"loss": 4.6832,
"step": 31500
},
{
"epoch": 0.71,
"learning_rate": 4.6469893612325086e-05,
"loss": 4.6908,
"step": 32000
},
{
"epoch": 0.72,
"learning_rate": 4.6414713283008877e-05,
"loss": 4.6757,
"step": 32500
},
{
"epoch": 0.73,
"learning_rate": 4.635953295369267e-05,
"loss": 4.6644,
"step": 33000
},
{
"epoch": 0.74,
"learning_rate": 4.6304352624376465e-05,
"loss": 4.6495,
"step": 33500
},
{
"epoch": 0.75,
"learning_rate": 4.624917229506026e-05,
"loss": 4.6474,
"step": 34000
},
{
"epoch": 0.76,
"learning_rate": 4.619399196574405e-05,
"loss": 4.6175,
"step": 34500
},
{
"epoch": 0.77,
"learning_rate": 4.613881163642785e-05,
"loss": 4.633,
"step": 35000
},
{
"epoch": 0.78,
"learning_rate": 4.608363130711164e-05,
"loss": 4.6078,
"step": 35500
},
{
"epoch": 0.79,
"learning_rate": 4.602845097779544e-05,
"loss": 4.6498,
"step": 36000
},
{
"epoch": 0.81,
"learning_rate": 4.5973270648479236e-05,
"loss": 4.5697,
"step": 36500
},
{
"epoch": 0.82,
"learning_rate": 4.591820067982166e-05,
"loss": 4.6061,
"step": 37000
},
{
"epoch": 0.83,
"learning_rate": 4.586302035050545e-05,
"loss": 4.5792,
"step": 37500
},
{
"epoch": 0.84,
"learning_rate": 4.580784002118925e-05,
"loss": 4.5529,
"step": 38000
},
{
"epoch": 0.85,
"learning_rate": 4.5752659691873046e-05,
"loss": 4.5448,
"step": 38500
},
{
"epoch": 0.86,
"learning_rate": 4.569747936255684e-05,
"loss": 4.5508,
"step": 39000
},
{
"epoch": 0.87,
"learning_rate": 4.564240939389927e-05,
"loss": 4.5463,
"step": 39500
},
{
"epoch": 0.88,
"learning_rate": 4.558722906458306e-05,
"loss": 4.5267,
"step": 40000
},
{
"epoch": 0.89,
"learning_rate": 4.553204873526685e-05,
"loss": 4.497,
"step": 40500
},
{
"epoch": 0.9,
"learning_rate": 4.5476868405950654e-05,
"loss": 4.4829,
"step": 41000
},
{
"epoch": 0.92,
"learning_rate": 4.5421688076634445e-05,
"loss": 4.4772,
"step": 41500
},
{
"epoch": 0.93,
"learning_rate": 4.536661810797687e-05,
"loss": 4.459,
"step": 42000
},
{
"epoch": 0.94,
"learning_rate": 4.531143777866067e-05,
"loss": 4.4694,
"step": 42500
},
{
"epoch": 0.95,
"learning_rate": 4.525625744934446e-05,
"loss": 4.4664,
"step": 43000
},
{
"epoch": 0.96,
"learning_rate": 4.520118748068688e-05,
"loss": 4.4453,
"step": 43500
},
{
"epoch": 0.97,
"learning_rate": 4.514600715137069e-05,
"loss": 4.4533,
"step": 44000
},
{
"epoch": 0.98,
"learning_rate": 4.509082682205448e-05,
"loss": 4.4311,
"step": 44500
},
{
"epoch": 0.99,
"learning_rate": 4.503564649273827e-05,
"loss": 4.4547,
"step": 45000
},
{
"epoch": 1.0,
"learning_rate": 4.4980466163422066e-05,
"loss": 4.4123,
"step": 45500
},
{
"epoch": 1.02,
"learning_rate": 4.492528583410586e-05,
"loss": 4.4141,
"step": 46000
},
{
"epoch": 1.03,
"learning_rate": 4.4870105504789654e-05,
"loss": 4.4213,
"step": 46500
},
{
"epoch": 1.04,
"learning_rate": 4.481492517547345e-05,
"loss": 4.3711,
"step": 47000
},
{
"epoch": 1.05,
"learning_rate": 4.475974484615724e-05,
"loss": 4.4278,
"step": 47500
},
{
"epoch": 1.06,
"learning_rate": 4.4704674877499667e-05,
"loss": 4.4058,
"step": 48000
},
{
"epoch": 1.07,
"learning_rate": 4.464949454818347e-05,
"loss": 4.3586,
"step": 48500
},
{
"epoch": 1.08,
"learning_rate": 4.459431421886726e-05,
"loss": 4.3977,
"step": 49000
},
{
"epoch": 1.09,
"learning_rate": 4.453913388955105e-05,
"loss": 4.3523,
"step": 49500
},
{
"epoch": 1.1,
"learning_rate": 4.4484063920893484e-05,
"loss": 4.3925,
"step": 50000
},
{
"epoch": 1.11,
"learning_rate": 4.4428883591577274e-05,
"loss": 4.3458,
"step": 50500
},
{
"epoch": 1.13,
"learning_rate": 4.437370326226107e-05,
"loss": 4.3373,
"step": 51000
},
{
"epoch": 1.14,
"learning_rate": 4.431852293294487e-05,
"loss": 4.3274,
"step": 51500
},
{
"epoch": 1.15,
"learning_rate": 4.426334260362866e-05,
"loss": 4.3327,
"step": 52000
},
{
"epoch": 1.16,
"learning_rate": 4.4208272634971085e-05,
"loss": 4.332,
"step": 52500
},
{
"epoch": 1.17,
"learning_rate": 4.415309230565488e-05,
"loss": 4.3084,
"step": 53000
},
{
"epoch": 1.18,
"learning_rate": 4.409802233699731e-05,
"loss": 4.3232,
"step": 53500
},
{
"epoch": 1.19,
"learning_rate": 4.4042842007681104e-05,
"loss": 4.3179,
"step": 54000
},
{
"epoch": 1.2,
"learning_rate": 4.39876616783649e-05,
"loss": 4.3133,
"step": 54500
},
{
"epoch": 1.21,
"learning_rate": 4.393248134904869e-05,
"loss": 4.3039,
"step": 55000
},
{
"epoch": 1.23,
"learning_rate": 4.387730101973248e-05,
"loss": 4.283,
"step": 55500
},
{
"epoch": 1.24,
"learning_rate": 4.382212069041629e-05,
"loss": 4.3063,
"step": 56000
},
{
"epoch": 1.25,
"learning_rate": 4.376694036110008e-05,
"loss": 4.2711,
"step": 56500
},
{
"epoch": 1.26,
"learning_rate": 4.371176003178387e-05,
"loss": 4.2583,
"step": 57000
},
{
"epoch": 1.27,
"learning_rate": 4.3656579702467666e-05,
"loss": 4.2397,
"step": 57500
},
{
"epoch": 1.28,
"learning_rate": 4.360150973381009e-05,
"loss": 4.2567,
"step": 58000
},
{
"epoch": 1.29,
"learning_rate": 4.354632940449389e-05,
"loss": 4.2576,
"step": 58500
},
{
"epoch": 1.3,
"learning_rate": 4.349125943583632e-05,
"loss": 4.245,
"step": 59000
},
{
"epoch": 1.31,
"learning_rate": 4.343607910652011e-05,
"loss": 4.2465,
"step": 59500
},
{
"epoch": 1.32,
"learning_rate": 4.3381009137862535e-05,
"loss": 4.2152,
"step": 60000
},
{
"epoch": 1.34,
"learning_rate": 4.332582880854633e-05,
"loss": 4.2138,
"step": 60500
},
{
"epoch": 1.35,
"learning_rate": 4.327064847923012e-05,
"loss": 4.2366,
"step": 61000
},
{
"epoch": 1.36,
"learning_rate": 4.321546814991392e-05,
"loss": 4.2122,
"step": 61500
},
{
"epoch": 1.37,
"learning_rate": 4.316028782059772e-05,
"loss": 4.2188,
"step": 62000
},
{
"epoch": 1.38,
"learning_rate": 4.310510749128151e-05,
"loss": 4.2128,
"step": 62500
},
{
"epoch": 1.39,
"learning_rate": 4.3049927161965307e-05,
"loss": 4.1938,
"step": 63000
},
{
"epoch": 1.4,
"learning_rate": 4.2994746832649104e-05,
"loss": 4.1701,
"step": 63500
},
{
"epoch": 1.41,
"learning_rate": 4.2939566503332895e-05,
"loss": 4.1805,
"step": 64000
},
{
"epoch": 1.42,
"learning_rate": 4.2884386174016685e-05,
"loss": 4.1506,
"step": 64500
},
{
"epoch": 1.43,
"learning_rate": 4.282920584470048e-05,
"loss": 4.144,
"step": 65000
},
{
"epoch": 1.45,
"learning_rate": 4.277413587604291e-05,
"loss": 4.177,
"step": 65500
},
{
"epoch": 1.46,
"learning_rate": 4.2718955546726705e-05,
"loss": 4.1733,
"step": 66000
},
{
"epoch": 1.47,
"learning_rate": 4.26637752174105e-05,
"loss": 4.1319,
"step": 66500
},
{
"epoch": 1.48,
"learning_rate": 4.260859488809429e-05,
"loss": 4.1476,
"step": 67000
},
{
"epoch": 1.49,
"learning_rate": 4.255341455877809e-05,
"loss": 4.12,
"step": 67500
},
{
"epoch": 1.5,
"learning_rate": 4.249823422946188e-05,
"loss": 4.115,
"step": 68000
},
{
"epoch": 1.51,
"learning_rate": 4.2443164260804306e-05,
"loss": 4.1059,
"step": 68500
},
{
"epoch": 1.52,
"learning_rate": 4.238809429214674e-05,
"loss": 4.106,
"step": 69000
},
{
"epoch": 1.53,
"learning_rate": 4.2332913962830535e-05,
"loss": 4.1249,
"step": 69500
},
{
"epoch": 1.55,
"learning_rate": 4.2277733633514326e-05,
"loss": 4.1079,
"step": 70000
},
{
"epoch": 1.56,
"learning_rate": 4.222255330419812e-05,
"loss": 4.1175,
"step": 70500
},
{
"epoch": 1.57,
"learning_rate": 4.2167372974881914e-05,
"loss": 4.0682,
"step": 71000
},
{
"epoch": 1.58,
"learning_rate": 4.211219264556571e-05,
"loss": 4.0642,
"step": 71500
},
{
"epoch": 1.59,
"learning_rate": 4.205701231624951e-05,
"loss": 4.085,
"step": 72000
},
{
"epoch": 1.6,
"learning_rate": 4.20018319869333e-05,
"loss": 4.0739,
"step": 72500
},
{
"epoch": 1.61,
"learning_rate": 4.1946762018275724e-05,
"loss": 4.0577,
"step": 73000
},
{
"epoch": 1.62,
"learning_rate": 4.189158168895952e-05,
"loss": 4.0602,
"step": 73500
},
{
"epoch": 1.63,
"learning_rate": 4.183640135964332e-05,
"loss": 4.0439,
"step": 74000
},
{
"epoch": 1.64,
"learning_rate": 4.178122103032711e-05,
"loss": 4.0696,
"step": 74500
},
{
"epoch": 1.66,
"learning_rate": 4.172604070101091e-05,
"loss": 4.0246,
"step": 75000
},
{
"epoch": 1.67,
"learning_rate": 4.1671081093011966e-05,
"loss": 4.0381,
"step": 75500
},
{
"epoch": 1.68,
"learning_rate": 4.1615900763695757e-05,
"loss": 4.0224,
"step": 76000
},
{
"epoch": 1.69,
"learning_rate": 4.1560720434379554e-05,
"loss": 4.0377,
"step": 76500
},
{
"epoch": 1.7,
"learning_rate": 4.150554010506335e-05,
"loss": 4.0586,
"step": 77000
},
{
"epoch": 1.71,
"learning_rate": 4.145035977574714e-05,
"loss": 4.0013,
"step": 77500
},
{
"epoch": 1.72,
"learning_rate": 4.139517944643094e-05,
"loss": 4.0415,
"step": 78000
},
{
"epoch": 1.73,
"learning_rate": 4.133999911711473e-05,
"loss": 4.0169,
"step": 78500
},
{
"epoch": 1.74,
"learning_rate": 4.128481878779853e-05,
"loss": 4.0217,
"step": 79000
},
{
"epoch": 1.75,
"learning_rate": 4.1229638458482325e-05,
"loss": 4.0232,
"step": 79500
},
{
"epoch": 1.77,
"learning_rate": 4.1174458129166116e-05,
"loss": 4.0055,
"step": 80000
},
{
"epoch": 1.78,
"learning_rate": 4.111927779984991e-05,
"loss": 3.9907,
"step": 80500
},
{
"epoch": 1.79,
"learning_rate": 4.106409747053371e-05,
"loss": 3.9717,
"step": 81000
},
{
"epoch": 1.8,
"learning_rate": 4.10089171412175e-05,
"loss": 3.981,
"step": 81500
},
{
"epoch": 1.81,
"learning_rate": 4.095373681190129e-05,
"loss": 3.9777,
"step": 82000
},
{
"epoch": 1.82,
"learning_rate": 4.089855648258509e-05,
"loss": 3.9556,
"step": 82500
},
{
"epoch": 1.83,
"learning_rate": 4.084337615326889e-05,
"loss": 3.9571,
"step": 83000
},
{
"epoch": 1.84,
"learning_rate": 4.078830618461131e-05,
"loss": 3.9604,
"step": 83500
},
{
"epoch": 1.85,
"learning_rate": 4.073312585529511e-05,
"loss": 3.9645,
"step": 84000
},
{
"epoch": 1.87,
"learning_rate": 4.06779455259789e-05,
"loss": 3.974,
"step": 84500
},
{
"epoch": 1.88,
"learning_rate": 4.062276519666269e-05,
"loss": 3.951,
"step": 85000
},
{
"epoch": 1.89,
"learning_rate": 4.0567584867346495e-05,
"loss": 3.9614,
"step": 85500
},
{
"epoch": 1.9,
"learning_rate": 4.051262525934755e-05,
"loss": 3.9635,
"step": 86000
},
{
"epoch": 1.91,
"learning_rate": 4.0457444930031345e-05,
"loss": 3.9224,
"step": 86500
},
{
"epoch": 1.92,
"learning_rate": 4.0402374961373776e-05,
"loss": 3.9468,
"step": 87000
},
{
"epoch": 1.93,
"learning_rate": 4.03473049927162e-05,
"loss": 3.9544,
"step": 87500
},
{
"epoch": 1.94,
"learning_rate": 4.029212466339999e-05,
"loss": 3.9295,
"step": 88000
},
{
"epoch": 1.95,
"learning_rate": 4.023694433408379e-05,
"loss": 3.913,
"step": 88500
},
{
"epoch": 1.96,
"learning_rate": 4.018176400476758e-05,
"loss": 3.9419,
"step": 89000
},
{
"epoch": 1.98,
"learning_rate": 4.012658367545138e-05,
"loss": 3.8917,
"step": 89500
},
{
"epoch": 1.99,
"learning_rate": 4.0071403346135174e-05,
"loss": 3.9129,
"step": 90000
},
{
"epoch": 2.0,
"learning_rate": 4.0016223016818965e-05,
"loss": 3.8824,
"step": 90500
},
{
"epoch": 2.01,
"learning_rate": 3.9961042687502756e-05,
"loss": 3.919,
"step": 91000
},
{
"epoch": 2.02,
"learning_rate": 3.990586235818656e-05,
"loss": 3.8981,
"step": 91500
},
{
"epoch": 2.03,
"learning_rate": 3.985068202887035e-05,
"loss": 3.9221,
"step": 92000
},
{
"epoch": 2.04,
"learning_rate": 3.979550169955414e-05,
"loss": 3.8827,
"step": 92500
},
{
"epoch": 2.05,
"learning_rate": 3.974032137023794e-05,
"loss": 3.8746,
"step": 93000
},
{
"epoch": 2.06,
"learning_rate": 3.9685141040921737e-05,
"loss": 3.8723,
"step": 93500
},
{
"epoch": 2.07,
"learning_rate": 3.962996071160553e-05,
"loss": 3.8958,
"step": 94000
},
{
"epoch": 2.09,
"learning_rate": 3.9574780382289325e-05,
"loss": 3.8578,
"step": 94500
},
{
"epoch": 2.1,
"learning_rate": 3.9519600052973115e-05,
"loss": 3.834,
"step": 95000
},
{
"epoch": 2.11,
"learning_rate": 3.946453008431554e-05,
"loss": 3.8655,
"step": 95500
},
{
"epoch": 2.12,
"learning_rate": 3.9409349754999344e-05,
"loss": 3.8697,
"step": 96000
},
{
"epoch": 2.13,
"learning_rate": 3.9354169425683135e-05,
"loss": 3.8506,
"step": 96500
},
{
"epoch": 2.14,
"learning_rate": 3.929909945702556e-05,
"loss": 3.8226,
"step": 97000
},
{
"epoch": 2.15,
"learning_rate": 3.924402948836799e-05,
"loss": 3.8532,
"step": 97500
},
{
"epoch": 2.16,
"learning_rate": 3.918884915905178e-05,
"loss": 3.8276,
"step": 98000
},
{
"epoch": 2.17,
"learning_rate": 3.913366882973557e-05,
"loss": 3.814,
"step": 98500
},
{
"epoch": 2.19,
"learning_rate": 3.907848850041938e-05,
"loss": 3.8336,
"step": 99000
},
{
"epoch": 2.2,
"learning_rate": 3.902330817110317e-05,
"loss": 3.8333,
"step": 99500
},
{
"epoch": 2.21,
"learning_rate": 3.896812784178696e-05,
"loss": 3.8402,
"step": 100000
},
{
"epoch": 2.22,
"learning_rate": 3.8912947512470756e-05,
"loss": 3.7971,
"step": 100500
},
{
"epoch": 2.23,
"learning_rate": 3.885776718315455e-05,
"loss": 3.8324,
"step": 101000
},
{
"epoch": 2.24,
"learning_rate": 3.8802586853838344e-05,
"loss": 3.779,
"step": 101500
},
{
"epoch": 2.25,
"learning_rate": 3.874740652452214e-05,
"loss": 3.81,
"step": 102000
},
{
"epoch": 2.26,
"learning_rate": 3.869222619520593e-05,
"loss": 3.7999,
"step": 102500
},
{
"epoch": 2.27,
"learning_rate": 3.863715622654836e-05,
"loss": 3.823,
"step": 103000
},
{
"epoch": 2.28,
"learning_rate": 3.858197589723216e-05,
"loss": 3.826,
"step": 103500
},
{
"epoch": 2.3,
"learning_rate": 3.852679556791595e-05,
"loss": 3.8032,
"step": 104000
},
{
"epoch": 2.31,
"learning_rate": 3.847161523859974e-05,
"loss": 3.807,
"step": 104500
},
{
"epoch": 2.32,
"learning_rate": 3.841643490928354e-05,
"loss": 3.8085,
"step": 105000
},
{
"epoch": 2.33,
"learning_rate": 3.836125457996734e-05,
"loss": 3.7954,
"step": 105500
},
{
"epoch": 2.34,
"learning_rate": 3.830618461130976e-05,
"loss": 3.7514,
"step": 106000
},
{
"epoch": 2.35,
"learning_rate": 3.825100428199356e-05,
"loss": 3.7642,
"step": 106500
},
{
"epoch": 2.36,
"learning_rate": 3.819582395267735e-05,
"loss": 3.7635,
"step": 107000
},
{
"epoch": 2.37,
"learning_rate": 3.814064362336114e-05,
"loss": 3.7834,
"step": 107500
},
{
"epoch": 2.38,
"learning_rate": 3.8085463294044945e-05,
"loss": 3.7777,
"step": 108000
},
{
"epoch": 2.39,
"learning_rate": 3.8030282964728736e-05,
"loss": 3.7408,
"step": 108500
},
{
"epoch": 2.41,
"learning_rate": 3.797521299607116e-05,
"loss": 3.7762,
"step": 109000
},
{
"epoch": 2.42,
"learning_rate": 3.792003266675496e-05,
"loss": 3.7347,
"step": 109500
},
{
"epoch": 2.43,
"learning_rate": 3.786485233743875e-05,
"loss": 3.7441,
"step": 110000
},
{
"epoch": 2.44,
"learning_rate": 3.7809672008122546e-05,
"loss": 3.7551,
"step": 110500
},
{
"epoch": 2.45,
"learning_rate": 3.7754491678806344e-05,
"loss": 3.7318,
"step": 111000
},
{
"epoch": 2.46,
"learning_rate": 3.7699311349490134e-05,
"loss": 3.7244,
"step": 111500
},
{
"epoch": 2.47,
"learning_rate": 3.764424138083256e-05,
"loss": 3.717,
"step": 112000
},
{
"epoch": 2.48,
"learning_rate": 3.7589061051516356e-05,
"loss": 3.7368,
"step": 112500
},
{
"epoch": 2.49,
"learning_rate": 3.7533880722200154e-05,
"loss": 3.7367,
"step": 113000
},
{
"epoch": 2.51,
"learning_rate": 3.7478700392883945e-05,
"loss": 3.7136,
"step": 113500
},
{
"epoch": 2.52,
"learning_rate": 3.742352006356774e-05,
"loss": 3.7291,
"step": 114000
},
{
"epoch": 2.53,
"learning_rate": 3.736833973425153e-05,
"loss": 3.7232,
"step": 114500
},
{
"epoch": 2.54,
"learning_rate": 3.731315940493533e-05,
"loss": 3.7309,
"step": 115000
},
{
"epoch": 2.55,
"learning_rate": 3.725808943627776e-05,
"loss": 3.7163,
"step": 115500
},
{
"epoch": 2.56,
"learning_rate": 3.720290910696155e-05,
"loss": 3.7204,
"step": 116000
},
{
"epoch": 2.57,
"learning_rate": 3.714772877764534e-05,
"loss": 3.6878,
"step": 116500
},
{
"epoch": 2.58,
"learning_rate": 3.7092658808987775e-05,
"loss": 3.7009,
"step": 117000
},
{
"epoch": 2.59,
"learning_rate": 3.7037478479671565e-05,
"loss": 3.7262,
"step": 117500
},
{
"epoch": 2.6,
"learning_rate": 3.698229815035536e-05,
"loss": 3.6676,
"step": 118000
},
{
"epoch": 2.62,
"learning_rate": 3.692711782103916e-05,
"loss": 3.6949,
"step": 118500
},
{
"epoch": 2.63,
"learning_rate": 3.687193749172295e-05,
"loss": 3.6931,
"step": 119000
},
{
"epoch": 2.64,
"learning_rate": 3.681686752306538e-05,
"loss": 3.6977,
"step": 119500
},
{
"epoch": 2.65,
"learning_rate": 3.676168719374917e-05,
"loss": 3.7026,
"step": 120000
},
{
"epoch": 2.66,
"learning_rate": 3.670650686443297e-05,
"loss": 3.6684,
"step": 120500
},
{
"epoch": 2.67,
"learning_rate": 3.665132653511676e-05,
"loss": 3.6851,
"step": 121000
},
{
"epoch": 2.68,
"learning_rate": 3.659614620580056e-05,
"loss": 3.678,
"step": 121500
},
{
"epoch": 2.69,
"learning_rate": 3.654096587648435e-05,
"loss": 3.6639,
"step": 122000
},
{
"epoch": 2.7,
"learning_rate": 3.648578554716815e-05,
"loss": 3.6633,
"step": 122500
},
{
"epoch": 2.71,
"learning_rate": 3.6430605217851944e-05,
"loss": 3.6612,
"step": 123000
},
{
"epoch": 2.73,
"learning_rate": 3.6375424888535735e-05,
"loss": 3.6267,
"step": 123500
},
{
"epoch": 2.74,
"learning_rate": 3.6320354919878167e-05,
"loss": 3.6461,
"step": 124000
},
{
"epoch": 2.75,
"learning_rate": 3.626517459056196e-05,
"loss": 3.6478,
"step": 124500
},
{
"epoch": 2.76,
"learning_rate": 3.6209994261245755e-05,
"loss": 3.6458,
"step": 125000
},
{
"epoch": 2.77,
"learning_rate": 3.615492429258818e-05,
"loss": 3.6394,
"step": 125500
},
{
"epoch": 2.78,
"learning_rate": 3.609974396327198e-05,
"loss": 3.6469,
"step": 126000
},
{
"epoch": 2.79,
"learning_rate": 3.604456363395577e-05,
"loss": 3.6539,
"step": 126500
},
{
"epoch": 2.8,
"learning_rate": 3.5989383304639565e-05,
"loss": 3.6412,
"step": 127000
},
{
"epoch": 2.81,
"learning_rate": 3.593420297532336e-05,
"loss": 3.6352,
"step": 127500
},
{
"epoch": 2.83,
"learning_rate": 3.587902264600715e-05,
"loss": 3.6516,
"step": 128000
},
{
"epoch": 2.84,
"learning_rate": 3.582384231669095e-05,
"loss": 3.6343,
"step": 128500
},
{
"epoch": 2.85,
"learning_rate": 3.576866198737474e-05,
"loss": 3.6598,
"step": 129000
},
{
"epoch": 2.86,
"learning_rate": 3.571348165805854e-05,
"loss": 3.65,
"step": 129500
},
{
"epoch": 2.87,
"learning_rate": 3.5658411689400964e-05,
"loss": 3.6309,
"step": 130000
},
{
"epoch": 2.88,
"learning_rate": 3.560323136008476e-05,
"loss": 3.6119,
"step": 130500
},
{
"epoch": 2.89,
"learning_rate": 3.554805103076855e-05,
"loss": 3.6272,
"step": 131000
},
{
"epoch": 2.9,
"learning_rate": 3.549287070145235e-05,
"loss": 3.6448,
"step": 131500
},
{
"epoch": 2.91,
"learning_rate": 3.543769037213615e-05,
"loss": 3.6274,
"step": 132000
},
{
"epoch": 2.92,
"learning_rate": 3.5382620403478565e-05,
"loss": 3.5813,
"step": 132500
},
{
"epoch": 2.94,
"learning_rate": 3.532744007416237e-05,
"loss": 3.6487,
"step": 133000
},
{
"epoch": 2.95,
"learning_rate": 3.5272370105504793e-05,
"loss": 3.6124,
"step": 133500
},
{
"epoch": 2.96,
"learning_rate": 3.5217189776188584e-05,
"loss": 3.6033,
"step": 134000
},
{
"epoch": 2.97,
"learning_rate": 3.516200944687238e-05,
"loss": 3.5861,
"step": 134500
},
{
"epoch": 2.98,
"learning_rate": 3.510682911755617e-05,
"loss": 3.611,
"step": 135000
},
{
"epoch": 2.99,
"learning_rate": 3.505164878823997e-05,
"loss": 3.6169,
"step": 135500
},
{
"epoch": 3.0,
"learning_rate": 3.499646845892377e-05,
"loss": 3.5714,
"step": 136000
},
{
"epoch": 3.01,
"learning_rate": 3.494128812960756e-05,
"loss": 3.6016,
"step": 136500
},
{
"epoch": 3.02,
"learning_rate": 3.488610780029135e-05,
"loss": 3.5863,
"step": 137000
},
{
"epoch": 3.03,
"learning_rate": 3.483103783163378e-05,
"loss": 3.5576,
"step": 137500
},
{
"epoch": 3.05,
"learning_rate": 3.477585750231758e-05,
"loss": 3.5632,
"step": 138000
},
{
"epoch": 3.06,
"learning_rate": 3.472067717300137e-05,
"loss": 3.5682,
"step": 138500
},
{
"epoch": 3.07,
"learning_rate": 3.4665496843685166e-05,
"loss": 3.5759,
"step": 139000
},
{
"epoch": 3.08,
"learning_rate": 3.4610316514368957e-05,
"loss": 3.5634,
"step": 139500
},
{
"epoch": 3.09,
"learning_rate": 3.4555136185052754e-05,
"loss": 3.5529,
"step": 140000
},
{
"epoch": 3.1,
"learning_rate": 3.4500066216395185e-05,
"loss": 3.5891,
"step": 140500
},
{
"epoch": 3.11,
"learning_rate": 3.4444885887078976e-05,
"loss": 3.5681,
"step": 141000
},
{
"epoch": 3.12,
"learning_rate": 3.438970555776277e-05,
"loss": 3.5854,
"step": 141500
},
{
"epoch": 3.13,
"learning_rate": 3.4334525228446564e-05,
"loss": 3.5534,
"step": 142000
},
{
"epoch": 3.15,
"learning_rate": 3.427945525978899e-05,
"loss": 3.5804,
"step": 142500
},
{
"epoch": 3.16,
"learning_rate": 3.4224385291131414e-05,
"loss": 3.5757,
"step": 143000
},
{
"epoch": 3.17,
"learning_rate": 3.416920496181522e-05,
"loss": 3.547,
"step": 143500
},
{
"epoch": 3.18,
"learning_rate": 3.411402463249901e-05,
"loss": 3.5348,
"step": 144000
},
{
"epoch": 3.19,
"learning_rate": 3.40588443031828e-05,
"loss": 3.5629,
"step": 144500
},
{
"epoch": 3.2,
"learning_rate": 3.40036639738666e-05,
"loss": 3.5184,
"step": 145000
},
{
"epoch": 3.21,
"learning_rate": 3.3948483644550394e-05,
"loss": 3.5546,
"step": 145500
},
{
"epoch": 3.22,
"learning_rate": 3.3893303315234185e-05,
"loss": 3.5271,
"step": 146000
},
{
"epoch": 3.23,
"learning_rate": 3.383812298591798e-05,
"loss": 3.521,
"step": 146500
},
{
"epoch": 3.24,
"learning_rate": 3.378294265660177e-05,
"loss": 3.5593,
"step": 147000
},
{
"epoch": 3.26,
"learning_rate": 3.372776232728557e-05,
"loss": 3.5194,
"step": 147500
},
{
"epoch": 3.27,
"learning_rate": 3.367258199796937e-05,
"loss": 3.5173,
"step": 148000
},
{
"epoch": 3.28,
"learning_rate": 3.361751202931179e-05,
"loss": 3.5048,
"step": 148500
},
{
"epoch": 3.29,
"learning_rate": 3.3562331699995583e-05,
"loss": 3.5398,
"step": 149000
},
{
"epoch": 3.3,
"learning_rate": 3.350715137067938e-05,
"loss": 3.4967,
"step": 149500
},
{
"epoch": 3.31,
"learning_rate": 3.345197104136318e-05,
"loss": 3.5317,
"step": 150000
},
{
"epoch": 3.32,
"learning_rate": 3.339679071204697e-05,
"loss": 3.5089,
"step": 150500
},
{
"epoch": 3.33,
"learning_rate": 3.33417207433894e-05,
"loss": 3.509,
"step": 151000
},
{
"epoch": 3.34,
"learning_rate": 3.328654041407319e-05,
"loss": 3.5022,
"step": 151500
},
{
"epoch": 3.35,
"learning_rate": 3.323136008475698e-05,
"loss": 3.4962,
"step": 152000
},
{
"epoch": 3.37,
"learning_rate": 3.317629011609941e-05,
"loss": 3.4874,
"step": 152500
},
{
"epoch": 3.38,
"learning_rate": 3.312110978678321e-05,
"loss": 3.4887,
"step": 153000
},
{
"epoch": 3.39,
"learning_rate": 3.3065929457467e-05,
"loss": 3.5197,
"step": 153500
},
{
"epoch": 3.4,
"learning_rate": 3.30107491281508e-05,
"loss": 3.5025,
"step": 154000
},
{
"epoch": 3.41,
"learning_rate": 3.295556879883459e-05,
"loss": 3.5071,
"step": 154500
},
{
"epoch": 3.42,
"learning_rate": 3.290038846951839e-05,
"loss": 3.5018,
"step": 155000
},
{
"epoch": 3.43,
"learning_rate": 3.2845208140202185e-05,
"loss": 3.5011,
"step": 155500
},
{
"epoch": 3.44,
"learning_rate": 3.2790027810885975e-05,
"loss": 3.4938,
"step": 156000
},
{
"epoch": 3.45,
"learning_rate": 3.2734847481569766e-05,
"loss": 3.5023,
"step": 156500
},
{
"epoch": 3.47,
"learning_rate": 3.267966715225357e-05,
"loss": 3.4794,
"step": 157000
},
{
"epoch": 3.48,
"learning_rate": 3.2624597183595995e-05,
"loss": 3.4868,
"step": 157500
},
{
"epoch": 3.49,
"learning_rate": 3.2569416854279786e-05,
"loss": 3.5035,
"step": 158000
},
{
"epoch": 3.5,
"learning_rate": 3.251423652496358e-05,
"loss": 3.4879,
"step": 158500
},
{
"epoch": 3.51,
"learning_rate": 3.2459056195647374e-05,
"loss": 3.4736,
"step": 159000
},
{
"epoch": 3.52,
"learning_rate": 3.240387586633117e-05,
"loss": 3.4857,
"step": 159500
},
{
"epoch": 3.53,
"learning_rate": 3.23488058976736e-05,
"loss": 3.4774,
"step": 160000
},
{
"epoch": 3.54,
"learning_rate": 3.2293625568357394e-05,
"loss": 3.4655,
"step": 160500
},
{
"epoch": 3.55,
"learning_rate": 3.2238445239041184e-05,
"loss": 3.4746,
"step": 161000
},
{
"epoch": 3.56,
"learning_rate": 3.218326490972498e-05,
"loss": 3.4688,
"step": 161500
},
{
"epoch": 3.58,
"learning_rate": 3.212808458040878e-05,
"loss": 3.4573,
"step": 162000
},
{
"epoch": 3.59,
"learning_rate": 3.2073014611751204e-05,
"loss": 3.4748,
"step": 162500
},
{
"epoch": 3.6,
"learning_rate": 3.2017834282435e-05,
"loss": 3.4589,
"step": 163000
},
{
"epoch": 3.61,
"learning_rate": 3.196265395311879e-05,
"loss": 3.4462,
"step": 163500
},
{
"epoch": 3.62,
"learning_rate": 3.190747362380259e-05,
"loss": 3.456,
"step": 164000
},
{
"epoch": 3.63,
"learning_rate": 3.185229329448639e-05,
"loss": 3.458,
"step": 164500
},
{
"epoch": 3.64,
"learning_rate": 3.179711296517018e-05,
"loss": 3.4655,
"step": 165000
},
{
"epoch": 3.65,
"learning_rate": 3.174193263585397e-05,
"loss": 3.4461,
"step": 165500
},
{
"epoch": 3.66,
"learning_rate": 3.16868626671964e-05,
"loss": 3.4555,
"step": 166000
},
{
"epoch": 3.68,
"learning_rate": 3.163168233788019e-05,
"loss": 3.4638,
"step": 166500
},
{
"epoch": 3.69,
"learning_rate": 3.157650200856399e-05,
"loss": 3.4492,
"step": 167000
},
{
"epoch": 3.7,
"learning_rate": 3.1521321679247786e-05,
"loss": 3.4547,
"step": 167500
},
{
"epoch": 3.71,
"learning_rate": 3.1466141349931576e-05,
"loss": 3.4594,
"step": 168000
},
{
"epoch": 3.72,
"learning_rate": 3.1410961020615374e-05,
"loss": 3.4381,
"step": 168500
},
{
"epoch": 3.73,
"learning_rate": 3.135578069129917e-05,
"loss": 3.4342,
"step": 169000
},
{
"epoch": 3.74,
"learning_rate": 3.130060036198296e-05,
"loss": 3.4209,
"step": 169500
},
{
"epoch": 3.75,
"learning_rate": 3.1245530393325387e-05,
"loss": 3.4527,
"step": 170000
},
{
"epoch": 3.76,
"learning_rate": 3.1190350064009184e-05,
"loss": 3.4482,
"step": 170500
},
{
"epoch": 3.77,
"learning_rate": 3.1135169734692975e-05,
"loss": 3.4453,
"step": 171000
},
{
"epoch": 3.79,
"learning_rate": 3.1080099766035406e-05,
"loss": 3.4369,
"step": 171500
},
{
"epoch": 3.8,
"learning_rate": 3.1024919436719204e-05,
"loss": 3.4148,
"step": 172000
},
{
"epoch": 3.81,
"learning_rate": 3.0969739107402994e-05,
"loss": 3.4211,
"step": 172500
},
{
"epoch": 3.82,
"learning_rate": 3.091455877808679e-05,
"loss": 3.4371,
"step": 173000
},
{
"epoch": 3.83,
"learning_rate": 3.085937844877058e-05,
"loss": 3.4307,
"step": 173500
},
{
"epoch": 3.84,
"learning_rate": 3.080419811945438e-05,
"loss": 3.4498,
"step": 174000
},
{
"epoch": 3.85,
"learning_rate": 3.074901779013817e-05,
"loss": 3.4347,
"step": 174500
},
{
"epoch": 3.86,
"learning_rate": 3.069383746082197e-05,
"loss": 3.4131,
"step": 175000
},
{
"epoch": 3.87,
"learning_rate": 3.063876749216439e-05,
"loss": 3.4045,
"step": 175500
},
{
"epoch": 3.88,
"learning_rate": 3.058358716284819e-05,
"loss": 3.409,
"step": 176000
},
{
"epoch": 3.9,
"learning_rate": 3.052840683353199e-05,
"loss": 3.4007,
"step": 176500
},
{
"epoch": 3.91,
"learning_rate": 3.0473336864874412e-05,
"loss": 3.4347,
"step": 177000
},
{
"epoch": 3.92,
"learning_rate": 3.0418156535558207e-05,
"loss": 3.4103,
"step": 177500
},
{
"epoch": 3.93,
"learning_rate": 3.0362976206242e-05,
"loss": 3.4181,
"step": 178000
},
{
"epoch": 3.94,
"learning_rate": 3.030779587692579e-05,
"loss": 3.4215,
"step": 178500
},
{
"epoch": 3.95,
"learning_rate": 3.0252615547609592e-05,
"loss": 3.4293,
"step": 179000
},
{
"epoch": 3.96,
"learning_rate": 3.0197435218293386e-05,
"loss": 3.3945,
"step": 179500
},
{
"epoch": 3.97,
"learning_rate": 3.0142254888977177e-05,
"loss": 3.3907,
"step": 180000
},
{
"epoch": 3.98,
"learning_rate": 3.008707455966097e-05,
"loss": 3.3976,
"step": 180500
},
{
"epoch": 4.0,
"learning_rate": 3.003189423034477e-05,
"loss": 3.3946,
"step": 181000
},
{
"epoch": 4.01,
"learning_rate": 2.9976713901028563e-05,
"loss": 3.389,
"step": 181500
},
{
"epoch": 4.02,
"learning_rate": 2.992164393237099e-05,
"loss": 3.3877,
"step": 182000
},
{
"epoch": 4.03,
"learning_rate": 2.9866463603054785e-05,
"loss": 3.3786,
"step": 182500
},
{
"epoch": 4.04,
"learning_rate": 2.9811283273738576e-05,
"loss": 3.3745,
"step": 183000
},
{
"epoch": 4.05,
"learning_rate": 2.9756102944422376e-05,
"loss": 3.3806,
"step": 183500
},
{
"epoch": 4.06,
"learning_rate": 2.970092261510617e-05,
"loss": 3.3839,
"step": 184000
},
{
"epoch": 4.07,
"learning_rate": 2.964574228578996e-05,
"loss": 3.3679,
"step": 184500
},
{
"epoch": 4.08,
"learning_rate": 2.9590561956473755e-05,
"loss": 3.394,
"step": 185000
},
{
"epoch": 4.09,
"learning_rate": 2.9535381627157553e-05,
"loss": 3.388,
"step": 185500
},
{
"epoch": 4.11,
"learning_rate": 2.9480201297841347e-05,
"loss": 3.3664,
"step": 186000
},
{
"epoch": 4.12,
"learning_rate": 2.9425131329183775e-05,
"loss": 3.3729,
"step": 186500
},
{
"epoch": 4.13,
"learning_rate": 2.936995099986757e-05,
"loss": 3.3757,
"step": 187000
},
{
"epoch": 4.14,
"learning_rate": 2.931477067055136e-05,
"loss": 3.383,
"step": 187500
},
{
"epoch": 4.15,
"learning_rate": 2.925959034123516e-05,
"loss": 3.3396,
"step": 188000
},
{
"epoch": 4.16,
"learning_rate": 2.920452037257759e-05,
"loss": 3.395,
"step": 188500
},
{
"epoch": 4.17,
"learning_rate": 2.914934004326138e-05,
"loss": 3.366,
"step": 189000
},
{
"epoch": 4.18,
"learning_rate": 2.9094159713945173e-05,
"loss": 3.3815,
"step": 189500
},
{
"epoch": 4.19,
"learning_rate": 2.90390897452876e-05,
"loss": 3.3729,
"step": 190000
},
{
"epoch": 4.2,
"learning_rate": 2.8983909415971396e-05,
"loss": 3.3832,
"step": 190500
},
{
"epoch": 4.22,
"learning_rate": 2.8928729086655193e-05,
"loss": 3.347,
"step": 191000
},
{
"epoch": 4.23,
"learning_rate": 2.8873548757338987e-05,
"loss": 3.3794,
"step": 191500
},
{
"epoch": 4.24,
"learning_rate": 2.8818368428022778e-05,
"loss": 3.3489,
"step": 192000
},
{
"epoch": 4.25,
"learning_rate": 2.8763188098706572e-05,
"loss": 3.3298,
"step": 192500
},
{
"epoch": 4.26,
"learning_rate": 2.8708118130049e-05,
"loss": 3.3641,
"step": 193000
},
{
"epoch": 4.27,
"learning_rate": 2.8652937800732797e-05,
"loss": 3.3746,
"step": 193500
},
{
"epoch": 4.28,
"learning_rate": 2.8597867832075225e-05,
"loss": 3.3428,
"step": 194000
},
{
"epoch": 4.29,
"learning_rate": 2.854268750275902e-05,
"loss": 3.3652,
"step": 194500
},
{
"epoch": 4.3,
"learning_rate": 2.8487507173442814e-05,
"loss": 3.3807,
"step": 195000
},
{
"epoch": 4.32,
"learning_rate": 2.8432326844126604e-05,
"loss": 3.3419,
"step": 195500
},
{
"epoch": 4.33,
"learning_rate": 2.8377146514810405e-05,
"loss": 3.3311,
"step": 196000
},
{
"epoch": 4.34,
"learning_rate": 2.8321966185494196e-05,
"loss": 3.359,
"step": 196500
},
{
"epoch": 4.35,
"learning_rate": 2.826678585617799e-05,
"loss": 3.3447,
"step": 197000
},
{
"epoch": 4.36,
"learning_rate": 2.8211605526861784e-05,
"loss": 3.3392,
"step": 197500
},
{
"epoch": 4.37,
"learning_rate": 2.815642519754558e-05,
"loss": 3.3392,
"step": 198000
},
{
"epoch": 4.38,
"learning_rate": 2.8101244868229376e-05,
"loss": 3.3075,
"step": 198500
},
{
"epoch": 4.39,
"learning_rate": 2.804606453891317e-05,
"loss": 3.3379,
"step": 199000
},
{
"epoch": 4.4,
"learning_rate": 2.7990884209596964e-05,
"loss": 3.3344,
"step": 199500
},
{
"epoch": 4.41,
"learning_rate": 2.793581424093939e-05,
"loss": 3.3334,
"step": 200000
},
{
"epoch": 4.43,
"learning_rate": 2.788063391162319e-05,
"loss": 3.3334,
"step": 200500
},
{
"epoch": 4.44,
"learning_rate": 2.782545358230698e-05,
"loss": 3.3318,
"step": 201000
},
{
"epoch": 4.45,
"learning_rate": 2.7770383613649408e-05,
"loss": 3.3378,
"step": 201500
},
{
"epoch": 4.46,
"learning_rate": 2.7715203284333202e-05,
"loss": 3.3445,
"step": 202000
},
{
"epoch": 4.47,
"learning_rate": 2.7660022955016996e-05,
"loss": 3.2963,
"step": 202500
},
{
"epoch": 4.48,
"learning_rate": 2.7604842625700794e-05,
"loss": 3.3234,
"step": 203000
},
{
"epoch": 4.49,
"learning_rate": 2.7549662296384588e-05,
"loss": 3.3248,
"step": 203500
},
{
"epoch": 4.5,
"learning_rate": 2.7494481967068382e-05,
"loss": 3.3144,
"step": 204000
},
{
"epoch": 4.51,
"learning_rate": 2.7439301637752173e-05,
"loss": 3.3204,
"step": 204500
},
{
"epoch": 4.52,
"learning_rate": 2.7384121308435974e-05,
"loss": 3.3345,
"step": 205000
},
{
"epoch": 4.54,
"learning_rate": 2.7328940979119764e-05,
"loss": 3.323,
"step": 205500
},
{
"epoch": 4.55,
"learning_rate": 2.7273871010462192e-05,
"loss": 3.2847,
"step": 206000
},
{
"epoch": 4.56,
"learning_rate": 2.721880104180462e-05,
"loss": 3.3178,
"step": 206500
},
{
"epoch": 4.57,
"learning_rate": 2.7163620712488414e-05,
"loss": 3.3086,
"step": 207000
},
{
"epoch": 4.58,
"learning_rate": 2.7108440383172205e-05,
"loss": 3.3065,
"step": 207500
},
{
"epoch": 4.59,
"learning_rate": 2.7053260053856006e-05,
"loss": 3.3078,
"step": 208000
},
{
"epoch": 4.6,
"learning_rate": 2.69980797245398e-05,
"loss": 3.314,
"step": 208500
},
{
"epoch": 4.61,
"learning_rate": 2.694289939522359e-05,
"loss": 3.3065,
"step": 209000
},
{
"epoch": 4.62,
"learning_rate": 2.6887719065907385e-05,
"loss": 3.3084,
"step": 209500
},
{
"epoch": 4.64,
"learning_rate": 2.6832538736591182e-05,
"loss": 3.3166,
"step": 210000
},
{
"epoch": 4.65,
"learning_rate": 2.6777358407274977e-05,
"loss": 3.3123,
"step": 210500
},
{
"epoch": 4.66,
"learning_rate": 2.672217807795877e-05,
"loss": 3.305,
"step": 211000
},
{
"epoch": 4.67,
"learning_rate": 2.6666997748642565e-05,
"loss": 3.3048,
"step": 211500
},
{
"epoch": 4.68,
"learning_rate": 2.6611817419326362e-05,
"loss": 3.3228,
"step": 212000
},
{
"epoch": 4.69,
"learning_rate": 2.6556747450668784e-05,
"loss": 3.2929,
"step": 212500
},
{
"epoch": 4.7,
"learning_rate": 2.6501567121352584e-05,
"loss": 3.2981,
"step": 213000
},
{
"epoch": 4.71,
"learning_rate": 2.6446386792036375e-05,
"loss": 3.2991,
"step": 213500
},
{
"epoch": 4.72,
"learning_rate": 2.6391316823378803e-05,
"loss": 3.2917,
"step": 214000
},
{
"epoch": 4.73,
"learning_rate": 2.6336136494062597e-05,
"loss": 3.298,
"step": 214500
},
{
"epoch": 4.75,
"learning_rate": 2.628095616474639e-05,
"loss": 3.2994,
"step": 215000
},
{
"epoch": 4.76,
"learning_rate": 2.622577583543019e-05,
"loss": 3.2966,
"step": 215500
},
{
"epoch": 4.77,
"learning_rate": 2.6170595506113983e-05,
"loss": 3.2939,
"step": 216000
},
{
"epoch": 4.78,
"learning_rate": 2.6115415176797774e-05,
"loss": 3.2987,
"step": 216500
},
{
"epoch": 4.79,
"learning_rate": 2.6060234847481568e-05,
"loss": 3.3056,
"step": 217000
},
{
"epoch": 4.8,
"learning_rate": 2.600505451816537e-05,
"loss": 3.2927,
"step": 217500
},
{
"epoch": 4.81,
"learning_rate": 2.594987418884916e-05,
"loss": 3.288,
"step": 218000
},
{
"epoch": 4.82,
"learning_rate": 2.5894804220191587e-05,
"loss": 3.2895,
"step": 218500
},
{
"epoch": 4.83,
"learning_rate": 2.583962389087538e-05,
"loss": 3.2616,
"step": 219000
},
{
"epoch": 4.84,
"learning_rate": 2.5784443561559176e-05,
"loss": 3.2991,
"step": 219500
},
{
"epoch": 4.86,
"learning_rate": 2.5729263232242973e-05,
"loss": 3.2537,
"step": 220000
},
{
"epoch": 4.87,
"learning_rate": 2.56741932635854e-05,
"loss": 3.2507,
"step": 220500
},
{
"epoch": 4.88,
"learning_rate": 2.5619123294927826e-05,
"loss": 3.259,
"step": 221000
},
{
"epoch": 4.89,
"learning_rate": 2.556394296561162e-05,
"loss": 3.2608,
"step": 221500
},
{
"epoch": 4.9,
"learning_rate": 2.5508872996954048e-05,
"loss": 3.2905,
"step": 222000
},
{
"epoch": 4.91,
"learning_rate": 2.5453692667637842e-05,
"loss": 3.2716,
"step": 222500
},
{
"epoch": 4.92,
"learning_rate": 2.5398512338321632e-05,
"loss": 3.2588,
"step": 223000
},
{
"epoch": 4.93,
"learning_rate": 2.5343332009005433e-05,
"loss": 3.2691,
"step": 223500
},
{
"epoch": 4.94,
"learning_rate": 2.5288151679689227e-05,
"loss": 3.274,
"step": 224000
},
{
"epoch": 4.96,
"learning_rate": 2.5232971350373018e-05,
"loss": 3.2837,
"step": 224500
},
{
"epoch": 4.97,
"learning_rate": 2.5177791021056812e-05,
"loss": 3.2598,
"step": 225000
},
{
"epoch": 4.98,
"learning_rate": 2.512261069174061e-05,
"loss": 3.2594,
"step": 225500
},
{
"epoch": 4.99,
"learning_rate": 2.5067540723083038e-05,
"loss": 3.2446,
"step": 226000
},
{
"epoch": 5.0,
"learning_rate": 2.5012360393766832e-05,
"loss": 3.2529,
"step": 226500
},
{
"epoch": 5.01,
"learning_rate": 2.4957180064450626e-05,
"loss": 3.2768,
"step": 227000
},
{
"epoch": 5.02,
"learning_rate": 2.490199973513442e-05,
"loss": 3.2543,
"step": 227500
},
{
"epoch": 5.03,
"learning_rate": 2.4846819405818214e-05,
"loss": 3.2379,
"step": 228000
},
{
"epoch": 5.04,
"learning_rate": 2.479163907650201e-05,
"loss": 3.241,
"step": 228500
},
{
"epoch": 5.05,
"learning_rate": 2.4736569107844436e-05,
"loss": 3.2543,
"step": 229000
},
{
"epoch": 5.07,
"learning_rate": 2.468138877852823e-05,
"loss": 3.239,
"step": 229500
},
{
"epoch": 5.08,
"learning_rate": 2.462631880987066e-05,
"loss": 3.2776,
"step": 230000
},
{
"epoch": 5.09,
"learning_rate": 2.4571138480554452e-05,
"loss": 3.2667,
"step": 230500
},
{
"epoch": 5.1,
"learning_rate": 2.451606851189688e-05,
"loss": 3.2433,
"step": 231000
},
{
"epoch": 5.11,
"learning_rate": 2.4460888182580675e-05,
"loss": 3.2813,
"step": 231500
},
{
"epoch": 5.12,
"learning_rate": 2.440570785326447e-05,
"loss": 3.2439,
"step": 232000
},
{
"epoch": 5.13,
"learning_rate": 2.4350527523948263e-05,
"loss": 3.2444,
"step": 232500
},
{
"epoch": 5.14,
"learning_rate": 2.429534719463206e-05,
"loss": 3.2516,
"step": 233000
},
{
"epoch": 5.15,
"learning_rate": 2.424016686531585e-05,
"loss": 3.2514,
"step": 233500
},
{
"epoch": 5.16,
"learning_rate": 2.418498653599965e-05,
"loss": 3.2358,
"step": 234000
},
{
"epoch": 5.18,
"learning_rate": 2.4129806206683443e-05,
"loss": 3.2162,
"step": 234500
},
{
"epoch": 5.19,
"learning_rate": 2.4074625877367237e-05,
"loss": 3.2603,
"step": 235000
},
{
"epoch": 5.2,
"learning_rate": 2.401944554805103e-05,
"loss": 3.2253,
"step": 235500
},
{
"epoch": 5.21,
"learning_rate": 2.3964265218734828e-05,
"loss": 3.2411,
"step": 236000
},
{
"epoch": 5.22,
"learning_rate": 2.390908488941862e-05,
"loss": 3.2349,
"step": 236500
},
{
"epoch": 5.23,
"learning_rate": 2.3853904560102416e-05,
"loss": 3.2505,
"step": 237000
},
{
"epoch": 5.24,
"learning_rate": 2.379872423078621e-05,
"loss": 3.2212,
"step": 237500
},
{
"epoch": 5.25,
"learning_rate": 2.3743543901470005e-05,
"loss": 3.2011,
"step": 238000
},
{
"epoch": 5.26,
"learning_rate": 2.36883635721538e-05,
"loss": 3.2169,
"step": 238500
},
{
"epoch": 5.28,
"learning_rate": 2.3633293603496227e-05,
"loss": 3.2117,
"step": 239000
},
{
"epoch": 5.29,
"learning_rate": 2.357811327418002e-05,
"loss": 3.2334,
"step": 239500
},
{
"epoch": 5.3,
"learning_rate": 2.3522932944863815e-05,
"loss": 3.2402,
"step": 240000
},
{
"epoch": 5.31,
"learning_rate": 2.3467752615547612e-05,
"loss": 3.2255,
"step": 240500
},
{
"epoch": 5.32,
"learning_rate": 2.3412572286231403e-05,
"loss": 3.2317,
"step": 241000
},
{
"epoch": 5.33,
"learning_rate": 2.33573919569152e-05,
"loss": 3.2066,
"step": 241500
},
{
"epoch": 5.34,
"learning_rate": 2.3302211627598995e-05,
"loss": 3.2142,
"step": 242000
},
{
"epoch": 5.35,
"learning_rate": 2.324703129828279e-05,
"loss": 3.2199,
"step": 242500
},
{
"epoch": 5.36,
"learning_rate": 2.3191850968966583e-05,
"loss": 3.237,
"step": 243000
},
{
"epoch": 5.37,
"learning_rate": 2.313678100030901e-05,
"loss": 3.2328,
"step": 243500
},
{
"epoch": 5.39,
"learning_rate": 2.3081600670992805e-05,
"loss": 3.2408,
"step": 244000
},
{
"epoch": 5.4,
"learning_rate": 2.3026530702335233e-05,
"loss": 3.224,
"step": 244500
},
{
"epoch": 5.41,
"learning_rate": 2.2971350373019027e-05,
"loss": 3.2118,
"step": 245000
},
{
"epoch": 5.42,
"learning_rate": 2.291617004370282e-05,
"loss": 3.2208,
"step": 245500
},
{
"epoch": 5.43,
"learning_rate": 2.286110007504525e-05,
"loss": 3.2107,
"step": 246000
},
{
"epoch": 5.44,
"learning_rate": 2.2805919745729043e-05,
"loss": 3.2121,
"step": 246500
},
{
"epoch": 5.45,
"learning_rate": 2.2750739416412837e-05,
"loss": 3.2333,
"step": 247000
},
{
"epoch": 5.46,
"learning_rate": 2.269555908709663e-05,
"loss": 3.1927,
"step": 247500
},
{
"epoch": 5.47,
"learning_rate": 2.264037875778043e-05,
"loss": 3.228,
"step": 248000
},
{
"epoch": 5.48,
"learning_rate": 2.2585198428464223e-05,
"loss": 3.1896,
"step": 248500
},
{
"epoch": 5.5,
"learning_rate": 2.2530018099148017e-05,
"loss": 3.1933,
"step": 249000
},
{
"epoch": 5.51,
"learning_rate": 2.247483776983181e-05,
"loss": 3.2164,
"step": 249500
},
{
"epoch": 5.52,
"learning_rate": 2.2419657440515606e-05,
"loss": 3.1935,
"step": 250000
},
{
"epoch": 5.53,
"learning_rate": 2.2364587471858033e-05,
"loss": 3.2113,
"step": 250500
},
{
"epoch": 5.54,
"learning_rate": 2.2309407142541828e-05,
"loss": 3.212,
"step": 251000
},
{
"epoch": 5.55,
"learning_rate": 2.225422681322562e-05,
"loss": 3.1896,
"step": 251500
},
{
"epoch": 5.56,
"learning_rate": 2.2199046483909416e-05,
"loss": 3.2206,
"step": 252000
},
{
"epoch": 5.57,
"learning_rate": 2.2143866154593213e-05,
"loss": 3.1786,
"step": 252500
},
{
"epoch": 5.58,
"learning_rate": 2.2088685825277004e-05,
"loss": 3.2255,
"step": 253000
},
{
"epoch": 5.6,
"learning_rate": 2.20335054959608e-05,
"loss": 3.2156,
"step": 253500
},
{
"epoch": 5.61,
"learning_rate": 2.197843552730323e-05,
"loss": 3.1826,
"step": 254000
},
{
"epoch": 5.62,
"learning_rate": 2.1923255197987024e-05,
"loss": 3.1919,
"step": 254500
},
{
"epoch": 5.63,
"learning_rate": 2.1868074868670818e-05,
"loss": 3.2185,
"step": 255000
},
{
"epoch": 5.64,
"learning_rate": 2.1812894539354612e-05,
"loss": 3.1902,
"step": 255500
},
{
"epoch": 5.65,
"learning_rate": 2.1757714210038406e-05,
"loss": 3.1955,
"step": 256000
},
{
"epoch": 5.66,
"learning_rate": 2.17025338807222e-05,
"loss": 3.1839,
"step": 256500
},
{
"epoch": 5.67,
"learning_rate": 2.1647353551405998e-05,
"loss": 3.1814,
"step": 257000
},
{
"epoch": 5.68,
"learning_rate": 2.1592283582748422e-05,
"loss": 3.1801,
"step": 257500
},
{
"epoch": 5.69,
"learning_rate": 2.1537103253432216e-05,
"loss": 3.2027,
"step": 258000
},
{
"epoch": 5.71,
"learning_rate": 2.1481922924116014e-05,
"loss": 3.1947,
"step": 258500
},
{
"epoch": 5.72,
"learning_rate": 2.1426742594799808e-05,
"loss": 3.1846,
"step": 259000
},
{
"epoch": 5.73,
"learning_rate": 2.1371562265483602e-05,
"loss": 3.1904,
"step": 259500
},
{
"epoch": 5.74,
"learning_rate": 2.1316381936167396e-05,
"loss": 3.1703,
"step": 260000
},
{
"epoch": 5.75,
"learning_rate": 2.126120160685119e-05,
"loss": 3.1896,
"step": 260500
},
{
"epoch": 5.76,
"learning_rate": 2.1206021277534984e-05,
"loss": 3.1815,
"step": 261000
},
{
"epoch": 5.77,
"learning_rate": 2.1150951308877412e-05,
"loss": 3.211,
"step": 261500
},
{
"epoch": 5.78,
"learning_rate": 2.1095770979561206e-05,
"loss": 3.198,
"step": 262000
},
{
"epoch": 5.79,
"learning_rate": 2.1040590650245e-05,
"loss": 3.1723,
"step": 262500
},
{
"epoch": 5.8,
"learning_rate": 2.0985410320928798e-05,
"loss": 3.185,
"step": 263000
},
{
"epoch": 5.82,
"learning_rate": 2.0930340352271226e-05,
"loss": 3.1865,
"step": 263500
},
{
"epoch": 5.83,
"learning_rate": 2.0875160022955017e-05,
"loss": 3.1992,
"step": 264000
},
{
"epoch": 5.84,
"learning_rate": 2.0819979693638814e-05,
"loss": 3.1823,
"step": 264500
},
{
"epoch": 5.85,
"learning_rate": 2.0764799364322608e-05,
"loss": 3.1733,
"step": 265000
},
{
"epoch": 5.86,
"learning_rate": 2.0709729395665033e-05,
"loss": 3.2061,
"step": 265500
},
{
"epoch": 5.87,
"learning_rate": 2.065454906634883e-05,
"loss": 3.1565,
"step": 266000
},
{
"epoch": 5.88,
"learning_rate": 2.0599368737032624e-05,
"loss": 3.1813,
"step": 266500
},
{
"epoch": 5.89,
"learning_rate": 2.054418840771642e-05,
"loss": 3.1614,
"step": 267000
},
{
"epoch": 5.9,
"learning_rate": 2.0489118439058846e-05,
"loss": 3.176,
"step": 267500
},
{
"epoch": 5.92,
"learning_rate": 2.043393810974264e-05,
"loss": 3.1679,
"step": 268000
},
{
"epoch": 5.93,
"learning_rate": 2.0378757780426435e-05,
"loss": 3.167,
"step": 268500
},
{
"epoch": 5.94,
"learning_rate": 2.032357745111023e-05,
"loss": 3.1635,
"step": 269000
},
{
"epoch": 5.95,
"learning_rate": 2.0268397121794026e-05,
"loss": 3.1673,
"step": 269500
},
{
"epoch": 5.96,
"learning_rate": 2.021332715313645e-05,
"loss": 3.1811,
"step": 270000
},
{
"epoch": 5.97,
"learning_rate": 2.015825718447888e-05,
"loss": 3.179,
"step": 270500
},
{
"epoch": 5.98,
"learning_rate": 2.0103076855162673e-05,
"loss": 3.1596,
"step": 271000
},
{
"epoch": 5.99,
"learning_rate": 2.0047896525846467e-05,
"loss": 3.1735,
"step": 271500
},
{
"epoch": 6.0,
"learning_rate": 1.999271619653026e-05,
"loss": 3.1534,
"step": 272000
},
{
"epoch": 6.01,
"learning_rate": 1.993753586721406e-05,
"loss": 3.1558,
"step": 272500
},
{
"epoch": 6.03,
"learning_rate": 1.988235553789785e-05,
"loss": 3.1764,
"step": 273000
},
{
"epoch": 6.04,
"learning_rate": 1.9827175208581647e-05,
"loss": 3.1647,
"step": 273500
},
{
"epoch": 6.05,
"learning_rate": 1.9772105239924075e-05,
"loss": 3.1632,
"step": 274000
},
{
"epoch": 6.06,
"learning_rate": 1.971692491060787e-05,
"loss": 3.155,
"step": 274500
},
{
"epoch": 6.07,
"learning_rate": 1.9661744581291663e-05,
"loss": 3.1592,
"step": 275000
},
{
"epoch": 6.08,
"learning_rate": 1.9606564251975457e-05,
"loss": 3.1477,
"step": 275500
},
{
"epoch": 6.09,
"learning_rate": 1.955138392265925e-05,
"loss": 3.1714,
"step": 276000
},
{
"epoch": 6.1,
"learning_rate": 1.9496203593343045e-05,
"loss": 3.1351,
"step": 276500
},
{
"epoch": 6.11,
"learning_rate": 1.9441023264026843e-05,
"loss": 3.146,
"step": 277000
},
{
"epoch": 6.13,
"learning_rate": 1.9385842934710634e-05,
"loss": 3.1386,
"step": 277500
},
{
"epoch": 6.14,
"learning_rate": 1.933077296605306e-05,
"loss": 3.1656,
"step": 278000
},
{
"epoch": 6.15,
"learning_rate": 1.927559263673686e-05,
"loss": 3.1477,
"step": 278500
},
{
"epoch": 6.16,
"learning_rate": 1.9220412307420653e-05,
"loss": 3.1561,
"step": 279000
},
{
"epoch": 6.17,
"learning_rate": 1.9165231978104447e-05,
"loss": 3.1536,
"step": 279500
},
{
"epoch": 6.18,
"learning_rate": 1.9110162009446872e-05,
"loss": 3.1246,
"step": 280000
},
{
"epoch": 6.19,
"learning_rate": 1.905498168013067e-05,
"loss": 3.1629,
"step": 280500
},
{
"epoch": 6.2,
"learning_rate": 1.899980135081446e-05,
"loss": 3.1363,
"step": 281000
},
{
"epoch": 6.21,
"learning_rate": 1.8944621021498258e-05,
"loss": 3.1286,
"step": 281500
},
{
"epoch": 6.22,
"learning_rate": 1.8889551052840686e-05,
"loss": 3.1706,
"step": 282000
},
{
"epoch": 6.24,
"learning_rate": 1.8834370723524476e-05,
"loss": 3.1411,
"step": 282500
},
{
"epoch": 6.25,
"learning_rate": 1.8779190394208274e-05,
"loss": 3.1315,
"step": 283000
},
{
"epoch": 6.26,
"learning_rate": 1.8724120425550702e-05,
"loss": 3.162,
"step": 283500
},
{
"epoch": 6.27,
"learning_rate": 1.8668940096234492e-05,
"loss": 3.1391,
"step": 284000
},
{
"epoch": 6.28,
"learning_rate": 1.861375976691829e-05,
"loss": 3.1538,
"step": 284500
},
{
"epoch": 6.29,
"learning_rate": 1.8558579437602084e-05,
"loss": 3.1415,
"step": 285000
},
{
"epoch": 6.3,
"learning_rate": 1.8503399108285878e-05,
"loss": 3.1341,
"step": 285500
},
{
"epoch": 6.31,
"learning_rate": 1.8448218778969672e-05,
"loss": 3.1411,
"step": 286000
},
{
"epoch": 6.32,
"learning_rate": 1.83931488103121e-05,
"loss": 3.14,
"step": 286500
},
{
"epoch": 6.33,
"learning_rate": 1.8337968480995894e-05,
"loss": 3.122,
"step": 287000
},
{
"epoch": 6.35,
"learning_rate": 1.828278815167969e-05,
"loss": 3.1554,
"step": 287500
},
{
"epoch": 6.36,
"learning_rate": 1.8227607822363486e-05,
"loss": 3.1492,
"step": 288000
},
{
"epoch": 6.37,
"learning_rate": 1.8172427493047277e-05,
"loss": 3.1458,
"step": 288500
},
{
"epoch": 6.38,
"learning_rate": 1.8117247163731074e-05,
"loss": 3.156,
"step": 289000
},
{
"epoch": 6.39,
"learning_rate": 1.8062066834414868e-05,
"loss": 3.1217,
"step": 289500
},
{
"epoch": 6.4,
"learning_rate": 1.8006886505098662e-05,
"loss": 3.1334,
"step": 290000
},
{
"epoch": 6.41,
"learning_rate": 1.795181653644109e-05,
"loss": 3.1217,
"step": 290500
},
{
"epoch": 6.42,
"learning_rate": 1.7896636207124884e-05,
"loss": 3.1375,
"step": 291000
},
{
"epoch": 6.43,
"learning_rate": 1.7841566238467312e-05,
"loss": 3.1317,
"step": 291500
},
{
"epoch": 6.45,
"learning_rate": 1.7786385909151107e-05,
"loss": 3.1241,
"step": 292000
},
{
"epoch": 6.46,
"learning_rate": 1.77312055798349e-05,
"loss": 3.1267,
"step": 292500
},
{
"epoch": 6.47,
"learning_rate": 1.7676025250518695e-05,
"loss": 3.1438,
"step": 293000
},
{
"epoch": 6.48,
"learning_rate": 1.762084492120249e-05,
"loss": 3.1227,
"step": 293500
},
{
"epoch": 6.49,
"learning_rate": 1.7565664591886286e-05,
"loss": 3.1263,
"step": 294000
},
{
"epoch": 6.5,
"learning_rate": 1.751048426257008e-05,
"loss": 3.1351,
"step": 294500
},
{
"epoch": 6.51,
"learning_rate": 1.7455303933253875e-05,
"loss": 3.1313,
"step": 295000
},
{
"epoch": 6.52,
"learning_rate": 1.7400344325254933e-05,
"loss": 3.15,
"step": 295500
},
{
"epoch": 6.53,
"learning_rate": 1.734516399593873e-05,
"loss": 3.111,
"step": 296000
},
{
"epoch": 6.54,
"learning_rate": 1.728998366662252e-05,
"loss": 3.1116,
"step": 296500
},
{
"epoch": 6.56,
"learning_rate": 1.723480333730632e-05,
"loss": 3.1279,
"step": 297000
},
{
"epoch": 6.57,
"learning_rate": 1.7179623007990113e-05,
"loss": 3.1251,
"step": 297500
},
{
"epoch": 6.58,
"learning_rate": 1.7124442678673907e-05,
"loss": 3.1334,
"step": 298000
},
{
"epoch": 6.59,
"learning_rate": 1.70692623493577e-05,
"loss": 3.1136,
"step": 298500
},
{
"epoch": 6.6,
"learning_rate": 1.70140820200415e-05,
"loss": 3.1145,
"step": 299000
},
{
"epoch": 6.61,
"learning_rate": 1.695890169072529e-05,
"loss": 3.1042,
"step": 299500
},
{
"epoch": 6.62,
"learning_rate": 1.6903831722067717e-05,
"loss": 3.1245,
"step": 300000
},
{
"epoch": 6.63,
"learning_rate": 1.6848651392751515e-05,
"loss": 3.1251,
"step": 300500
},
{
"epoch": 6.64,
"learning_rate": 1.6793471063435305e-05,
"loss": 3.0838,
"step": 301000
},
{
"epoch": 6.65,
"learning_rate": 1.6738290734119103e-05,
"loss": 3.1115,
"step": 301500
},
{
"epoch": 6.67,
"learning_rate": 1.668322076546153e-05,
"loss": 3.1018,
"step": 302000
},
{
"epoch": 6.68,
"learning_rate": 1.662804043614532e-05,
"loss": 3.127,
"step": 302500
},
{
"epoch": 6.69,
"learning_rate": 1.657286010682912e-05,
"loss": 3.0928,
"step": 303000
},
{
"epoch": 6.7,
"learning_rate": 1.6517679777512913e-05,
"loss": 3.1218,
"step": 303500
},
{
"epoch": 6.71,
"learning_rate": 1.6462499448196707e-05,
"loss": 3.0903,
"step": 304000
},
{
"epoch": 6.72,
"learning_rate": 1.64073191188805e-05,
"loss": 3.118,
"step": 304500
},
{
"epoch": 6.73,
"learning_rate": 1.63521387895643e-05,
"loss": 3.1023,
"step": 305000
},
{
"epoch": 6.74,
"learning_rate": 1.629695846024809e-05,
"loss": 3.1372,
"step": 305500
},
{
"epoch": 6.75,
"learning_rate": 1.6241888491590518e-05,
"loss": 3.1009,
"step": 306000
},
{
"epoch": 6.77,
"learning_rate": 1.6186708162274315e-05,
"loss": 3.0873,
"step": 306500
},
{
"epoch": 6.78,
"learning_rate": 1.6131527832958106e-05,
"loss": 3.1251,
"step": 307000
},
{
"epoch": 6.79,
"learning_rate": 1.6076568224959168e-05,
"loss": 3.0993,
"step": 307500
},
{
"epoch": 6.8,
"learning_rate": 1.6021387895642962e-05,
"loss": 3.0884,
"step": 308000
},
{
"epoch": 6.81,
"learning_rate": 1.5966207566326756e-05,
"loss": 3.1128,
"step": 308500
},
{
"epoch": 6.82,
"learning_rate": 1.591102723701055e-05,
"loss": 3.0882,
"step": 309000
},
{
"epoch": 6.83,
"learning_rate": 1.5855846907694348e-05,
"loss": 3.0977,
"step": 309500
},
{
"epoch": 6.84,
"learning_rate": 1.5800666578378138e-05,
"loss": 3.1042,
"step": 310000
},
{
"epoch": 6.85,
"learning_rate": 1.5745486249061936e-05,
"loss": 3.1097,
"step": 310500
},
{
"epoch": 6.86,
"learning_rate": 1.569030591974573e-05,
"loss": 3.1116,
"step": 311000
},
{
"epoch": 6.88,
"learning_rate": 1.5635125590429524e-05,
"loss": 3.0995,
"step": 311500
},
{
"epoch": 6.89,
"learning_rate": 1.5579945261113318e-05,
"loss": 3.0802,
"step": 312000
},
{
"epoch": 6.9,
"learning_rate": 1.5524875292455746e-05,
"loss": 3.098,
"step": 312500
},
{
"epoch": 6.91,
"learning_rate": 1.546969496313954e-05,
"loss": 3.0987,
"step": 313000
},
{
"epoch": 6.92,
"learning_rate": 1.5414514633823334e-05,
"loss": 3.0853,
"step": 313500
},
{
"epoch": 6.93,
"learning_rate": 1.5359334304507132e-05,
"loss": 3.102,
"step": 314000
},
{
"epoch": 6.94,
"learning_rate": 1.5304153975190922e-05,
"loss": 3.0814,
"step": 314500
},
{
"epoch": 6.95,
"learning_rate": 1.524897364587472e-05,
"loss": 3.1001,
"step": 315000
},
{
"epoch": 6.96,
"learning_rate": 1.5193793316558514e-05,
"loss": 3.1096,
"step": 315500
},
{
"epoch": 6.97,
"learning_rate": 1.513861298724231e-05,
"loss": 3.1163,
"step": 316000
},
{
"epoch": 6.99,
"learning_rate": 1.5083432657926102e-05,
"loss": 3.0899,
"step": 316500
},
{
"epoch": 7.0,
"learning_rate": 1.502836268926853e-05,
"loss": 3.1128,
"step": 317000
},
{
"epoch": 7.01,
"learning_rate": 1.4973292720610957e-05,
"loss": 3.1108,
"step": 317500
},
{
"epoch": 7.02,
"learning_rate": 1.4918112391294752e-05,
"loss": 3.0949,
"step": 318000
},
{
"epoch": 7.03,
"learning_rate": 1.4862932061978546e-05,
"loss": 3.0832,
"step": 318500
},
{
"epoch": 7.04,
"learning_rate": 1.4807751732662342e-05,
"loss": 3.0809,
"step": 319000
},
{
"epoch": 7.05,
"learning_rate": 1.4752571403346135e-05,
"loss": 3.084,
"step": 319500
},
{
"epoch": 7.06,
"learning_rate": 1.4697391074029932e-05,
"loss": 3.0932,
"step": 320000
},
{
"epoch": 7.07,
"learning_rate": 1.4642210744713725e-05,
"loss": 3.1041,
"step": 320500
},
{
"epoch": 7.09,
"learning_rate": 1.4587140776056153e-05,
"loss": 3.0992,
"step": 321000
},
{
"epoch": 7.1,
"learning_rate": 1.4531960446739948e-05,
"loss": 3.092,
"step": 321500
},
{
"epoch": 7.11,
"learning_rate": 1.447678011742374e-05,
"loss": 3.1064,
"step": 322000
},
{
"epoch": 7.12,
"learning_rate": 1.4421599788107537e-05,
"loss": 3.0972,
"step": 322500
},
{
"epoch": 7.13,
"learning_rate": 1.436641945879133e-05,
"loss": 3.0831,
"step": 323000
},
{
"epoch": 7.14,
"learning_rate": 1.4311239129475126e-05,
"loss": 3.0751,
"step": 323500
},
{
"epoch": 7.15,
"learning_rate": 1.4256169160817553e-05,
"loss": 3.0673,
"step": 324000
},
{
"epoch": 7.16,
"learning_rate": 1.4200988831501347e-05,
"loss": 3.0956,
"step": 324500
},
{
"epoch": 7.17,
"learning_rate": 1.4145808502185143e-05,
"loss": 3.0622,
"step": 325000
},
{
"epoch": 7.18,
"learning_rate": 1.4090628172868937e-05,
"loss": 3.07,
"step": 325500
},
{
"epoch": 7.2,
"learning_rate": 1.4035447843552733e-05,
"loss": 3.0643,
"step": 326000
},
{
"epoch": 7.21,
"learning_rate": 1.3980267514236525e-05,
"loss": 3.1144,
"step": 326500
},
{
"epoch": 7.22,
"learning_rate": 1.392508718492032e-05,
"loss": 3.0449,
"step": 327000
},
{
"epoch": 7.23,
"learning_rate": 1.3869906855604115e-05,
"loss": 3.07,
"step": 327500
},
{
"epoch": 7.24,
"learning_rate": 1.3814836886946541e-05,
"loss": 3.0831,
"step": 328000
},
{
"epoch": 7.25,
"learning_rate": 1.3759656557630337e-05,
"loss": 3.0779,
"step": 328500
},
{
"epoch": 7.26,
"learning_rate": 1.3704476228314131e-05,
"loss": 3.0875,
"step": 329000
},
{
"epoch": 7.27,
"learning_rate": 1.3649295898997927e-05,
"loss": 3.0782,
"step": 329500
},
{
"epoch": 7.28,
"learning_rate": 1.3594225930340355e-05,
"loss": 3.0629,
"step": 330000
},
{
"epoch": 7.29,
"learning_rate": 1.3539045601024147e-05,
"loss": 3.0783,
"step": 330500
},
{
"epoch": 7.31,
"learning_rate": 1.3483865271707943e-05,
"loss": 3.081,
"step": 331000
},
{
"epoch": 7.32,
"learning_rate": 1.3428684942391737e-05,
"loss": 3.0692,
"step": 331500
},
{
"epoch": 7.33,
"learning_rate": 1.3373504613075533e-05,
"loss": 3.0574,
"step": 332000
},
{
"epoch": 7.34,
"learning_rate": 1.331843464441796e-05,
"loss": 3.0429,
"step": 332500
},
{
"epoch": 7.35,
"learning_rate": 1.3263364675760387e-05,
"loss": 3.0709,
"step": 333000
},
{
"epoch": 7.36,
"learning_rate": 1.320818434644418e-05,
"loss": 3.0692,
"step": 333500
},
{
"epoch": 7.37,
"learning_rate": 1.3153004017127975e-05,
"loss": 3.0937,
"step": 334000
},
{
"epoch": 7.38,
"learning_rate": 1.309782368781177e-05,
"loss": 3.0585,
"step": 334500
},
{
"epoch": 7.39,
"learning_rate": 1.3042643358495565e-05,
"loss": 3.0704,
"step": 335000
},
{
"epoch": 7.41,
"learning_rate": 1.2987573389837993e-05,
"loss": 3.0769,
"step": 335500
},
{
"epoch": 7.42,
"learning_rate": 1.2932393060521786e-05,
"loss": 3.0594,
"step": 336000
},
{
"epoch": 7.43,
"learning_rate": 1.2877212731205582e-05,
"loss": 3.1023,
"step": 336500
},
{
"epoch": 7.44,
"learning_rate": 1.2822032401889376e-05,
"loss": 3.07,
"step": 337000
},
{
"epoch": 7.45,
"learning_rate": 1.2766852072573171e-05,
"loss": 3.0833,
"step": 337500
},
{
"epoch": 7.46,
"learning_rate": 1.2711782103915596e-05,
"loss": 3.0631,
"step": 338000
},
{
"epoch": 7.47,
"learning_rate": 1.2656601774599392e-05,
"loss": 3.0495,
"step": 338500
},
{
"epoch": 7.48,
"learning_rate": 1.2601421445283184e-05,
"loss": 3.0842,
"step": 339000
},
{
"epoch": 7.49,
"learning_rate": 1.254624111596698e-05,
"loss": 3.0595,
"step": 339500
},
{
"epoch": 7.5,
"learning_rate": 1.2491171147309408e-05,
"loss": 3.0266,
"step": 340000
},
{
"epoch": 7.52,
"learning_rate": 1.2435990817993202e-05,
"loss": 3.0719,
"step": 340500
},
{
"epoch": 7.53,
"learning_rate": 1.2380810488676998e-05,
"loss": 3.0654,
"step": 341000
},
{
"epoch": 7.54,
"learning_rate": 1.2325630159360792e-05,
"loss": 3.0392,
"step": 341500
},
{
"epoch": 7.55,
"learning_rate": 1.2270449830044586e-05,
"loss": 3.0524,
"step": 342000
},
{
"epoch": 7.56,
"learning_rate": 1.221526950072838e-05,
"loss": 3.0416,
"step": 342500
},
{
"epoch": 7.57,
"learning_rate": 1.2160199532070808e-05,
"loss": 3.0492,
"step": 343000
},
{
"epoch": 7.58,
"learning_rate": 1.2105019202754602e-05,
"loss": 3.0574,
"step": 343500
},
{
"epoch": 7.59,
"learning_rate": 1.2049838873438398e-05,
"loss": 3.0499,
"step": 344000
},
{
"epoch": 7.6,
"learning_rate": 1.1994658544122192e-05,
"loss": 3.0591,
"step": 344500
},
{
"epoch": 7.61,
"learning_rate": 1.1939698936123252e-05,
"loss": 3.0516,
"step": 345000
},
{
"epoch": 7.63,
"learning_rate": 1.1884518606807046e-05,
"loss": 3.0686,
"step": 345500
},
{
"epoch": 7.64,
"learning_rate": 1.182933827749084e-05,
"loss": 3.0515,
"step": 346000
},
{
"epoch": 7.65,
"learning_rate": 1.1774157948174636e-05,
"loss": 3.054,
"step": 346500
},
{
"epoch": 7.66,
"learning_rate": 1.171897761885843e-05,
"loss": 3.0474,
"step": 347000
},
{
"epoch": 7.67,
"learning_rate": 1.1663907650200857e-05,
"loss": 3.0518,
"step": 347500
},
{
"epoch": 7.68,
"learning_rate": 1.1608727320884653e-05,
"loss": 3.0519,
"step": 348000
},
{
"epoch": 7.69,
"learning_rate": 1.1553657352227079e-05,
"loss": 3.0803,
"step": 348500
},
{
"epoch": 7.7,
"learning_rate": 1.1498477022910873e-05,
"loss": 3.0586,
"step": 349000
},
{
"epoch": 7.71,
"learning_rate": 1.1443296693594669e-05,
"loss": 3.0444,
"step": 349500
},
{
"epoch": 7.73,
"learning_rate": 1.1388116364278463e-05,
"loss": 3.065,
"step": 350000
},
{
"epoch": 7.74,
"learning_rate": 1.1332936034962257e-05,
"loss": 3.0469,
"step": 350500
},
{
"epoch": 7.75,
"learning_rate": 1.1277755705646053e-05,
"loss": 3.0598,
"step": 351000
},
{
"epoch": 7.76,
"learning_rate": 1.1222575376329847e-05,
"loss": 3.0432,
"step": 351500
},
{
"epoch": 7.77,
"learning_rate": 1.1167395047013641e-05,
"loss": 3.0776,
"step": 352000
},
{
"epoch": 7.78,
"learning_rate": 1.1112214717697437e-05,
"loss": 3.0461,
"step": 352500
},
{
"epoch": 7.79,
"learning_rate": 1.1057034388381231e-05,
"loss": 3.0249,
"step": 353000
},
{
"epoch": 7.8,
"learning_rate": 1.1001964419723657e-05,
"loss": 3.0283,
"step": 353500
},
{
"epoch": 7.81,
"learning_rate": 1.0946784090407453e-05,
"loss": 3.0611,
"step": 354000
},
{
"epoch": 7.82,
"learning_rate": 1.0891603761091247e-05,
"loss": 3.0543,
"step": 354500
},
{
"epoch": 7.84,
"learning_rate": 1.0836533792433675e-05,
"loss": 3.0499,
"step": 355000
},
{
"epoch": 7.85,
"learning_rate": 1.078135346311747e-05,
"loss": 3.0487,
"step": 355500
},
{
"epoch": 7.86,
"learning_rate": 1.0726173133801263e-05,
"loss": 3.0514,
"step": 356000
},
{
"epoch": 7.87,
"learning_rate": 1.0670992804485059e-05,
"loss": 3.0594,
"step": 356500
},
{
"epoch": 7.88,
"learning_rate": 1.0615812475168853e-05,
"loss": 3.0416,
"step": 357000
},
{
"epoch": 7.89,
"learning_rate": 1.0560632145852647e-05,
"loss": 3.0343,
"step": 357500
},
{
"epoch": 7.9,
"learning_rate": 1.0505451816536441e-05,
"loss": 3.0511,
"step": 358000
},
{
"epoch": 7.91,
"learning_rate": 1.0450271487220237e-05,
"loss": 3.0151,
"step": 358500
},
{
"epoch": 7.92,
"learning_rate": 1.0395091157904031e-05,
"loss": 3.0444,
"step": 359000
},
{
"epoch": 7.93,
"learning_rate": 1.0339910828587825e-05,
"loss": 3.0387,
"step": 359500
},
{
"epoch": 7.95,
"learning_rate": 1.0284730499271621e-05,
"loss": 3.055,
"step": 360000
},
{
"epoch": 7.96,
"learning_rate": 1.0229550169955415e-05,
"loss": 3.0524,
"step": 360500
},
{
"epoch": 7.97,
"learning_rate": 1.017436984063921e-05,
"loss": 3.0318,
"step": 361000
},
{
"epoch": 7.98,
"learning_rate": 1.0119189511323005e-05,
"loss": 3.0385,
"step": 361500
},
{
"epoch": 7.99,
"learning_rate": 1.00640091820068e-05,
"loss": 3.0391,
"step": 362000
},
{
"epoch": 8.0,
"learning_rate": 1.0008828852690593e-05,
"loss": 3.0757,
"step": 362500
},
{
"epoch": 8.01,
"learning_rate": 9.953869244691654e-06,
"loss": 3.0233,
"step": 363000
},
{
"epoch": 8.02,
"learning_rate": 9.898688915375448e-06,
"loss": 3.0403,
"step": 363500
},
{
"epoch": 8.03,
"learning_rate": 9.843508586059243e-06,
"loss": 3.0133,
"step": 364000
},
{
"epoch": 8.05,
"learning_rate": 9.788328256743038e-06,
"loss": 3.0588,
"step": 364500
},
{
"epoch": 8.06,
"learning_rate": 9.733258288085464e-06,
"loss": 3.0474,
"step": 365000
},
{
"epoch": 8.07,
"learning_rate": 9.678077958769258e-06,
"loss": 3.0242,
"step": 365500
},
{
"epoch": 8.08,
"learning_rate": 9.623007990111684e-06,
"loss": 3.0255,
"step": 366000
},
{
"epoch": 8.09,
"learning_rate": 9.56782766079548e-06,
"loss": 3.0275,
"step": 366500
},
{
"epoch": 8.1,
"learning_rate": 9.512647331479274e-06,
"loss": 3.0484,
"step": 367000
},
{
"epoch": 8.11,
"learning_rate": 9.457467002163068e-06,
"loss": 3.0498,
"step": 367500
},
{
"epoch": 8.12,
"learning_rate": 9.402286672846864e-06,
"loss": 3.0495,
"step": 368000
},
{
"epoch": 8.13,
"learning_rate": 9.347106343530658e-06,
"loss": 3.0383,
"step": 368500
},
{
"epoch": 8.14,
"learning_rate": 9.291926014214452e-06,
"loss": 3.028,
"step": 369000
},
{
"epoch": 8.16,
"learning_rate": 9.236745684898248e-06,
"loss": 3.0487,
"step": 369500
},
{
"epoch": 8.17,
"learning_rate": 9.181565355582042e-06,
"loss": 3.0393,
"step": 370000
},
{
"epoch": 8.18,
"learning_rate": 9.126385026265836e-06,
"loss": 3.0427,
"step": 370500
},
{
"epoch": 8.19,
"learning_rate": 9.071315057608264e-06,
"loss": 3.0273,
"step": 371000
},
{
"epoch": 8.2,
"learning_rate": 9.016134728292058e-06,
"loss": 3.0258,
"step": 371500
},
{
"epoch": 8.21,
"learning_rate": 8.960954398975852e-06,
"loss": 3.0272,
"step": 372000
},
{
"epoch": 8.22,
"learning_rate": 8.905774069659648e-06,
"loss": 3.0353,
"step": 372500
},
{
"epoch": 8.23,
"learning_rate": 8.850593740343442e-06,
"loss": 3.026,
"step": 373000
},
{
"epoch": 8.24,
"learning_rate": 8.795413411027236e-06,
"loss": 3.0122,
"step": 373500
},
{
"epoch": 8.25,
"learning_rate": 8.74023308171103e-06,
"loss": 3.0214,
"step": 374000
},
{
"epoch": 8.27,
"learning_rate": 8.685052752394826e-06,
"loss": 3.0363,
"step": 374500
},
{
"epoch": 8.28,
"learning_rate": 8.62987242307862e-06,
"loss": 3.0149,
"step": 375000
},
{
"epoch": 8.29,
"learning_rate": 8.574692093762415e-06,
"loss": 3.0321,
"step": 375500
},
{
"epoch": 8.3,
"learning_rate": 8.51951176444621e-06,
"loss": 3.0214,
"step": 376000
},
{
"epoch": 8.31,
"learning_rate": 8.464331435130005e-06,
"loss": 3.0164,
"step": 376500
},
{
"epoch": 8.32,
"learning_rate": 8.409261466472432e-06,
"loss": 3.0372,
"step": 377000
},
{
"epoch": 8.33,
"learning_rate": 8.354191497814859e-06,
"loss": 3.0201,
"step": 377500
},
{
"epoch": 8.34,
"learning_rate": 8.299011168498653e-06,
"loss": 3.0226,
"step": 378000
},
{
"epoch": 8.35,
"learning_rate": 8.243830839182449e-06,
"loss": 3.0337,
"step": 378500
},
{
"epoch": 8.37,
"learning_rate": 8.188650509866243e-06,
"loss": 3.0406,
"step": 379000
},
{
"epoch": 8.38,
"learning_rate": 8.133470180550037e-06,
"loss": 3.0153,
"step": 379500
},
{
"epoch": 8.39,
"learning_rate": 8.078289851233833e-06,
"loss": 3.0214,
"step": 380000
},
{
"epoch": 8.4,
"learning_rate": 8.023109521917627e-06,
"loss": 3.0262,
"step": 380500
},
{
"epoch": 8.41,
"learning_rate": 7.967929192601421e-06,
"loss": 3.0335,
"step": 381000
},
{
"epoch": 8.42,
"learning_rate": 7.912859223943849e-06,
"loss": 3.0311,
"step": 381500
},
{
"epoch": 8.43,
"learning_rate": 7.857789255286275e-06,
"loss": 3.0177,
"step": 382000
},
{
"epoch": 8.44,
"learning_rate": 7.802608925970071e-06,
"loss": 3.0224,
"step": 382500
},
{
"epoch": 8.45,
"learning_rate": 7.747428596653865e-06,
"loss": 3.0132,
"step": 383000
},
{
"epoch": 8.46,
"learning_rate": 7.69224826733766e-06,
"loss": 3.0463,
"step": 383500
},
{
"epoch": 8.48,
"learning_rate": 7.637067938021453e-06,
"loss": 3.0117,
"step": 384000
},
{
"epoch": 8.49,
"learning_rate": 7.581997969363881e-06,
"loss": 3.0083,
"step": 384500
},
{
"epoch": 8.5,
"learning_rate": 7.526817640047676e-06,
"loss": 3.0156,
"step": 385000
},
{
"epoch": 8.51,
"learning_rate": 7.47163731073147e-06,
"loss": 3.0076,
"step": 385500
},
{
"epoch": 8.52,
"learning_rate": 7.416456981415265e-06,
"loss": 3.0232,
"step": 386000
},
{
"epoch": 8.53,
"learning_rate": 7.361276652099059e-06,
"loss": 2.9985,
"step": 386500
},
{
"epoch": 8.54,
"learning_rate": 7.306096322782854e-06,
"loss": 3.0135,
"step": 387000
},
{
"epoch": 8.55,
"learning_rate": 7.250915993466649e-06,
"loss": 3.0105,
"step": 387500
},
{
"epoch": 8.56,
"learning_rate": 7.195735664150443e-06,
"loss": 3.0113,
"step": 388000
},
{
"epoch": 8.58,
"learning_rate": 7.1407760561515036e-06,
"loss": 3.0216,
"step": 388500
},
{
"epoch": 8.59,
"learning_rate": 7.085595726835298e-06,
"loss": 3.0029,
"step": 389000
},
{
"epoch": 8.6,
"learning_rate": 7.030415397519093e-06,
"loss": 3.0341,
"step": 389500
},
{
"epoch": 8.61,
"learning_rate": 6.9752350682028876e-06,
"loss": 3.0208,
"step": 390000
},
{
"epoch": 8.62,
"learning_rate": 6.920054738886682e-06,
"loss": 2.9932,
"step": 390500
},
{
"epoch": 8.63,
"learning_rate": 6.864984770229109e-06,
"loss": 3.0241,
"step": 391000
},
{
"epoch": 8.64,
"learning_rate": 6.809804440912904e-06,
"loss": 3.0207,
"step": 391500
},
{
"epoch": 8.65,
"learning_rate": 6.754624111596698e-06,
"loss": 3.0147,
"step": 392000
},
{
"epoch": 8.66,
"learning_rate": 6.699443782280493e-06,
"loss": 3.0113,
"step": 392500
},
{
"epoch": 8.67,
"learning_rate": 6.644263452964288e-06,
"loss": 3.0091,
"step": 393000
},
{
"epoch": 8.69,
"learning_rate": 6.589193484306715e-06,
"loss": 3.0171,
"step": 393500
},
{
"epoch": 8.7,
"learning_rate": 6.534013154990509e-06,
"loss": 3.0101,
"step": 394000
},
{
"epoch": 8.71,
"learning_rate": 6.478832825674304e-06,
"loss": 3.0111,
"step": 394500
},
{
"epoch": 8.72,
"learning_rate": 6.423652496358099e-06,
"loss": 3.0004,
"step": 395000
},
{
"epoch": 8.73,
"learning_rate": 6.368472167041893e-06,
"loss": 3.0315,
"step": 395500
},
{
"epoch": 8.74,
"learning_rate": 6.313291837725688e-06,
"loss": 3.019,
"step": 396000
},
{
"epoch": 8.75,
"learning_rate": 6.258111508409482e-06,
"loss": 3.0321,
"step": 396500
},
{
"epoch": 8.76,
"learning_rate": 6.203041539751909e-06,
"loss": 3.0028,
"step": 397000
},
{
"epoch": 8.77,
"learning_rate": 6.147861210435704e-06,
"loss": 3.0046,
"step": 397500
},
{
"epoch": 8.78,
"learning_rate": 6.092680881119499e-06,
"loss": 3.0161,
"step": 398000
},
{
"epoch": 8.8,
"learning_rate": 6.037610912461926e-06,
"loss": 3.0172,
"step": 398500
},
{
"epoch": 8.81,
"learning_rate": 5.98243058314572e-06,
"loss": 3.0022,
"step": 399000
},
{
"epoch": 8.82,
"learning_rate": 5.927250253829515e-06,
"loss": 3.0007,
"step": 399500
},
{
"epoch": 8.83,
"learning_rate": 5.87206992451331e-06,
"loss": 2.9997,
"step": 400000
},
{
"epoch": 8.84,
"learning_rate": 5.816889595197104e-06,
"loss": 3.0021,
"step": 400500
},
{
"epoch": 8.85,
"learning_rate": 5.761709265880899e-06,
"loss": 2.9945,
"step": 401000
},
{
"epoch": 8.86,
"learning_rate": 5.706528936564693e-06,
"loss": 2.9905,
"step": 401500
},
{
"epoch": 8.87,
"learning_rate": 5.651348607248488e-06,
"loss": 3.0359,
"step": 402000
},
{
"epoch": 8.88,
"learning_rate": 5.596168277932283e-06,
"loss": 3.0091,
"step": 402500
},
{
"epoch": 8.9,
"learning_rate": 5.5409879486160774e-06,
"loss": 2.9852,
"step": 403000
},
{
"epoch": 8.91,
"learning_rate": 5.485807619299872e-06,
"loss": 3.005,
"step": 403500
},
{
"epoch": 8.92,
"learning_rate": 5.4306272899836665e-06,
"loss": 2.9915,
"step": 404000
},
{
"epoch": 8.93,
"learning_rate": 5.375557321326094e-06,
"loss": 2.9983,
"step": 404500
},
{
"epoch": 8.94,
"learning_rate": 5.3203769920098886e-06,
"loss": 3.0043,
"step": 405000
},
{
"epoch": 8.95,
"learning_rate": 5.2651966626936835e-06,
"loss": 3.008,
"step": 405500
},
{
"epoch": 8.96,
"learning_rate": 5.210016333377478e-06,
"loss": 3.0131,
"step": 406000
},
{
"epoch": 8.97,
"learning_rate": 5.154836004061273e-06,
"loss": 2.986,
"step": 406500
},
{
"epoch": 8.98,
"learning_rate": 5.0997660354037e-06,
"loss": 3.0058,
"step": 407000
},
{
"epoch": 8.99,
"learning_rate": 5.044585706087495e-06,
"loss": 2.9972,
"step": 407500
},
{
"epoch": 9.01,
"learning_rate": 4.989405376771289e-06,
"loss": 3.0196,
"step": 408000
},
{
"epoch": 9.02,
"learning_rate": 4.934225047455084e-06,
"loss": 2.9983,
"step": 408500
},
{
"epoch": 9.03,
"learning_rate": 4.879044718138878e-06,
"loss": 2.9922,
"step": 409000
},
{
"epoch": 9.04,
"learning_rate": 4.823864388822673e-06,
"loss": 2.9821,
"step": 409500
},
{
"epoch": 9.05,
"learning_rate": 4.768684059506468e-06,
"loss": 3.0117,
"step": 410000
},
{
"epoch": 9.06,
"learning_rate": 4.713503730190262e-06,
"loss": 2.9841,
"step": 410500
},
{
"epoch": 9.07,
"learning_rate": 4.658433761532689e-06,
"loss": 2.999,
"step": 411000
},
{
"epoch": 9.08,
"learning_rate": 4.603363792875116e-06,
"loss": 2.9976,
"step": 411500
},
{
"epoch": 9.09,
"learning_rate": 4.548183463558911e-06,
"loss": 3.0173,
"step": 412000
},
{
"epoch": 9.1,
"learning_rate": 4.493003134242706e-06,
"loss": 3.0106,
"step": 412500
},
{
"epoch": 9.12,
"learning_rate": 4.4378228049265e-06,
"loss": 3.0051,
"step": 413000
},
{
"epoch": 9.13,
"learning_rate": 4.382642475610295e-06,
"loss": 2.9937,
"step": 413500
},
{
"epoch": 9.14,
"learning_rate": 4.327572506952722e-06,
"loss": 3.0037,
"step": 414000
},
{
"epoch": 9.15,
"learning_rate": 4.272392177636516e-06,
"loss": 3.0021,
"step": 414500
},
{
"epoch": 9.16,
"learning_rate": 4.217211848320311e-06,
"loss": 3.0038,
"step": 415000
},
{
"epoch": 9.17,
"learning_rate": 4.162031519004106e-06,
"loss": 2.9963,
"step": 415500
},
{
"epoch": 9.18,
"learning_rate": 4.1068511896879e-06,
"loss": 2.9812,
"step": 416000
},
{
"epoch": 9.19,
"learning_rate": 4.051670860371695e-06,
"loss": 2.9977,
"step": 416500
},
{
"epoch": 9.2,
"learning_rate": 3.99649053105549e-06,
"loss": 3.0,
"step": 417000
},
{
"epoch": 9.22,
"learning_rate": 3.941310201739284e-06,
"loss": 2.9986,
"step": 417500
},
{
"epoch": 9.23,
"learning_rate": 3.886240233081711e-06,
"loss": 2.9909,
"step": 418000
},
{
"epoch": 9.24,
"learning_rate": 3.8310599037655056e-06,
"loss": 3.0106,
"step": 418500
},
{
"epoch": 9.25,
"learning_rate": 3.7758795744493e-06,
"loss": 2.9765,
"step": 419000
},
{
"epoch": 9.26,
"learning_rate": 3.7206992451330955e-06,
"loss": 2.9911,
"step": 419500
},
{
"epoch": 9.27,
"learning_rate": 3.6656292764755218e-06,
"loss": 2.9711,
"step": 420000
},
{
"epoch": 9.28,
"learning_rate": 3.6104489471593167e-06,
"loss": 2.9771,
"step": 420500
},
{
"epoch": 9.29,
"learning_rate": 3.555378978501744e-06,
"loss": 2.9977,
"step": 421000
},
{
"epoch": 9.3,
"learning_rate": 3.5001986491855384e-06,
"loss": 2.9958,
"step": 421500
},
{
"epoch": 9.31,
"learning_rate": 3.445018319869333e-06,
"loss": 2.9842,
"step": 422000
},
{
"epoch": 9.33,
"learning_rate": 3.3898379905531274e-06,
"loss": 2.9804,
"step": 422500
},
{
"epoch": 9.34,
"learning_rate": 3.3346576612369224e-06,
"loss": 2.9934,
"step": 423000
},
{
"epoch": 9.35,
"learning_rate": 3.279477331920717e-06,
"loss": 2.9994,
"step": 423500
},
{
"epoch": 9.36,
"learning_rate": 3.2242970026045114e-06,
"loss": 2.9675,
"step": 424000
},
{
"epoch": 9.37,
"learning_rate": 3.1692270339469386e-06,
"loss": 2.9771,
"step": 424500
},
{
"epoch": 9.38,
"learning_rate": 3.114046704630733e-06,
"loss": 3.0091,
"step": 425000
},
{
"epoch": 9.39,
"learning_rate": 3.058866375314528e-06,
"loss": 2.9936,
"step": 425500
},
{
"epoch": 9.4,
"learning_rate": 3.0036860459983226e-06,
"loss": 2.9873,
"step": 426000
},
{
"epoch": 9.41,
"learning_rate": 2.948505716682117e-06,
"loss": 3.0008,
"step": 426500
},
{
"epoch": 9.42,
"learning_rate": 2.8933253873659116e-06,
"loss": 2.9849,
"step": 427000
},
{
"epoch": 9.44,
"learning_rate": 2.8382554187083388e-06,
"loss": 2.9837,
"step": 427500
},
{
"epoch": 9.45,
"learning_rate": 2.7830750893921337e-06,
"loss": 2.9928,
"step": 428000
},
{
"epoch": 9.46,
"learning_rate": 2.7278947600759283e-06,
"loss": 2.9879,
"step": 428500
},
{
"epoch": 9.47,
"learning_rate": 2.6728247914183554e-06,
"loss": 2.9815,
"step": 429000
},
{
"epoch": 9.48,
"learning_rate": 2.61764446210215e-06,
"loss": 3.0079,
"step": 429500
},
{
"epoch": 9.49,
"learning_rate": 2.5624641327859444e-06,
"loss": 3.0085,
"step": 430000
},
{
"epoch": 9.5,
"learning_rate": 2.5072838034697394e-06,
"loss": 2.9901,
"step": 430500
},
{
"epoch": 9.51,
"learning_rate": 2.452103474153534e-06,
"loss": 3.0045,
"step": 431000
},
{
"epoch": 9.52,
"learning_rate": 2.3969231448373285e-06,
"loss": 2.9818,
"step": 431500
},
{
"epoch": 9.54,
"learning_rate": 2.341742815521123e-06,
"loss": 3.0,
"step": 432000
},
{
"epoch": 9.55,
"learning_rate": 2.2865624862049175e-06,
"loss": 2.9937,
"step": 432500
},
{
"epoch": 9.56,
"learning_rate": 2.2313821568887125e-06,
"loss": 3.0115,
"step": 433000
},
{
"epoch": 9.57,
"learning_rate": 2.176201827572507e-06,
"loss": 2.9705,
"step": 433500
},
{
"epoch": 9.58,
"learning_rate": 2.1210214982563015e-06,
"loss": 2.9965,
"step": 434000
},
{
"epoch": 9.59,
"learning_rate": 2.065841168940096e-06,
"loss": 2.9917,
"step": 434500
},
{
"epoch": 9.6,
"learning_rate": 2.010660839623891e-06,
"loss": 2.9984,
"step": 435000
},
{
"epoch": 9.61,
"learning_rate": 1.9557012316249503e-06,
"loss": 2.9977,
"step": 435500
},
{
"epoch": 9.62,
"learning_rate": 1.900520902308745e-06,
"loss": 2.9705,
"step": 436000
},
{
"epoch": 9.63,
"learning_rate": 1.8453405729925398e-06,
"loss": 2.997,
"step": 436500
},
{
"epoch": 9.65,
"learning_rate": 1.7901602436763343e-06,
"loss": 3.0164,
"step": 437000
},
{
"epoch": 9.66,
"learning_rate": 1.734979914360129e-06,
"loss": 3.0042,
"step": 437500
},
{
"epoch": 9.67,
"learning_rate": 1.6797995850439236e-06,
"loss": 3.0004,
"step": 438000
},
{
"epoch": 9.68,
"learning_rate": 1.6247296163863507e-06,
"loss": 2.9732,
"step": 438500
},
{
"epoch": 9.69,
"learning_rate": 1.5695492870701455e-06,
"loss": 2.9955,
"step": 439000
},
{
"epoch": 9.7,
"learning_rate": 1.51436895775394e-06,
"loss": 2.9884,
"step": 439500
},
{
"epoch": 9.71,
"learning_rate": 1.4591886284377345e-06,
"loss": 2.9598,
"step": 440000
},
{
"epoch": 9.72,
"learning_rate": 1.404008299121529e-06,
"loss": 2.9788,
"step": 440500
},
{
"epoch": 9.73,
"learning_rate": 1.3489383304639564e-06,
"loss": 3.0012,
"step": 441000
},
{
"epoch": 9.74,
"learning_rate": 1.293758001147751e-06,
"loss": 2.9857,
"step": 441500
},
{
"epoch": 9.76,
"learning_rate": 1.2385776718315455e-06,
"loss": 2.9908,
"step": 442000
},
{
"epoch": 9.77,
"learning_rate": 1.1833973425153402e-06,
"loss": 2.9618,
"step": 442500
},
{
"epoch": 9.78,
"learning_rate": 1.1283273738577671e-06,
"loss": 2.989,
"step": 443000
},
{
"epoch": 9.79,
"learning_rate": 1.0731470445415619e-06,
"loss": 2.9716,
"step": 443500
},
{
"epoch": 9.8,
"learning_rate": 1.0179667152253564e-06,
"loss": 2.9988,
"step": 444000
},
{
"epoch": 9.81,
"learning_rate": 9.627863859091511e-07,
"loss": 2.9725,
"step": 444500
},
{
"epoch": 9.82,
"learning_rate": 9.076060565929459e-07,
"loss": 2.9891,
"step": 445000
},
{
"epoch": 9.83,
"learning_rate": 8.524257272767405e-07,
"loss": 2.9862,
"step": 445500
},
{
"epoch": 9.84,
"learning_rate": 7.972453979605352e-07,
"loss": 2.9689,
"step": 446000
},
{
"epoch": 9.86,
"learning_rate": 7.420650686443297e-07,
"loss": 3.0145,
"step": 446500
},
{
"epoch": 9.87,
"learning_rate": 6.869950999867567e-07,
"loss": 2.9883,
"step": 447000
},
{
"epoch": 9.88,
"learning_rate": 6.318147706705513e-07,
"loss": 2.9807,
"step": 447500
},
{
"epoch": 9.89,
"learning_rate": 5.766344413543461e-07,
"loss": 2.9909,
"step": 448000
},
{
"epoch": 9.9,
"learning_rate": 5.214541120381407e-07,
"loss": 2.9903,
"step": 448500
},
{
"epoch": 9.91,
"learning_rate": 4.6638414338056774e-07,
"loss": 2.9957,
"step": 449000
},
{
"epoch": 9.92,
"learning_rate": 4.112038140643624e-07,
"loss": 2.9796,
"step": 449500
},
{
"epoch": 9.93,
"learning_rate": 3.56023484748157e-07,
"loss": 2.9883,
"step": 450000
},
{
"epoch": 9.94,
"learning_rate": 3.0084315543195165e-07,
"loss": 2.9682,
"step": 450500
},
{
"epoch": 9.95,
"learning_rate": 2.457731867743787e-07,
"loss": 2.9838,
"step": 451000
},
{
"epoch": 9.97,
"learning_rate": 1.9070321811680574e-07,
"loss": 3.0001,
"step": 451500
},
{
"epoch": 9.98,
"learning_rate": 1.3552288880060035e-07,
"loss": 2.9725,
"step": 452000
},
{
"epoch": 9.99,
"learning_rate": 8.0342559484395e-08,
"loss": 2.9811,
"step": 452500
},
{
"epoch": 10.0,
"learning_rate": 2.5162230168189646e-08,
"loss": 2.9627,
"step": 453000
},
{
"epoch": 10.0,
"step": 453060,
"total_flos": 1.9088271659160576e+18,
"train_loss": 3.5285831512207335,
"train_runtime": 136651.8747,
"train_samples_per_second": 53.047,
"train_steps_per_second": 3.315
}
],
"logging_steps": 500,
"max_steps": 453060,
"num_train_epochs": 10,
"save_steps": 100000,
"total_flos": 1.9088271659160576e+18,
"trial_name": null,
"trial_params": null
}