M2M100-ja-zh / last-checkpoint /trainer_state.json
Nekofox's picture
Training in progress, step 597000
99356d1
raw
history blame
147 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.649568527120403,
"global_step": 597000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.7856418177137756e-08,
"loss": 2.1137,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 7.571283635427551e-08,
"loss": 2.0821,
"step": 1000
},
{
"epoch": 0.02,
"learning_rate": 1.1356925453141326e-07,
"loss": 2.1063,
"step": 1500
},
{
"epoch": 0.02,
"learning_rate": 1.5142567270855102e-07,
"loss": 2.0393,
"step": 2000
},
{
"epoch": 0.03,
"learning_rate": 1.8928209088568877e-07,
"loss": 2.0135,
"step": 2500
},
{
"epoch": 0.03,
"learning_rate": 2.2713850906282652e-07,
"loss": 1.9837,
"step": 3000
},
{
"epoch": 0.04,
"learning_rate": 2.6499492723996425e-07,
"loss": 1.9524,
"step": 3500
},
{
"epoch": 0.04,
"learning_rate": 3.0277563258074776e-07,
"loss": 1.9752,
"step": 4000
},
{
"epoch": 0.05,
"learning_rate": 3.406320507578855e-07,
"loss": 1.9403,
"step": 4500
},
{
"epoch": 0.05,
"learning_rate": 3.784884689350233e-07,
"loss": 1.9234,
"step": 5000
},
{
"epoch": 0.06,
"learning_rate": 4.16344887112161e-07,
"loss": 1.9008,
"step": 5500
},
{
"epoch": 0.06,
"learning_rate": 4.542013052892988e-07,
"loss": 1.9152,
"step": 6000
},
{
"epoch": 0.07,
"learning_rate": 4.920577234664366e-07,
"loss": 1.8736,
"step": 6500
},
{
"epoch": 0.07,
"learning_rate": 5.299141416435743e-07,
"loss": 1.8995,
"step": 7000
},
{
"epoch": 0.08,
"learning_rate": 5.677705598207121e-07,
"loss": 1.8825,
"step": 7500
},
{
"epoch": 0.08,
"learning_rate": 6.055512651614955e-07,
"loss": 1.8569,
"step": 8000
},
{
"epoch": 0.09,
"learning_rate": 6.433319705022791e-07,
"loss": 1.8883,
"step": 8500
},
{
"epoch": 0.1,
"learning_rate": 6.811883886794167e-07,
"loss": 1.8776,
"step": 9000
},
{
"epoch": 0.1,
"learning_rate": 7.190448068565545e-07,
"loss": 1.8876,
"step": 9500
},
{
"epoch": 0.11,
"learning_rate": 7.569012250336922e-07,
"loss": 1.8702,
"step": 10000
},
{
"epoch": 0.11,
"learning_rate": 7.947576432108301e-07,
"loss": 1.8573,
"step": 10500
},
{
"epoch": 0.12,
"learning_rate": 8.325383485516135e-07,
"loss": 1.857,
"step": 11000
},
{
"epoch": 0.12,
"learning_rate": 8.703947667287513e-07,
"loss": 1.8565,
"step": 11500
},
{
"epoch": 0.13,
"learning_rate": 9.08251184905889e-07,
"loss": 1.831,
"step": 12000
},
{
"epoch": 0.13,
"learning_rate": 9.461076030830267e-07,
"loss": 1.8091,
"step": 12500
},
{
"epoch": 0.14,
"learning_rate": 9.839640212601645e-07,
"loss": 1.8446,
"step": 13000
},
{
"epoch": 0.14,
"learning_rate": 1.021744726600948e-06,
"loss": 1.8294,
"step": 13500
},
{
"epoch": 0.15,
"learning_rate": 1.0595254319417315e-06,
"loss": 1.8024,
"step": 14000
},
{
"epoch": 0.15,
"learning_rate": 1.097381850118869e-06,
"loss": 1.8247,
"step": 14500
},
{
"epoch": 0.16,
"learning_rate": 1.135238268296007e-06,
"loss": 1.8342,
"step": 15000
},
{
"epoch": 0.16,
"learning_rate": 1.1730946864731448e-06,
"loss": 1.8464,
"step": 15500
},
{
"epoch": 0.17,
"learning_rate": 1.2109511046502824e-06,
"loss": 1.8033,
"step": 16000
},
{
"epoch": 0.17,
"learning_rate": 1.248731809991066e-06,
"loss": 1.7894,
"step": 16500
},
{
"epoch": 0.18,
"learning_rate": 1.2865882281682038e-06,
"loss": 1.8257,
"step": 17000
},
{
"epoch": 0.19,
"learning_rate": 1.3244446463453414e-06,
"loss": 1.8199,
"step": 17500
},
{
"epoch": 0.19,
"learning_rate": 1.3623010645224793e-06,
"loss": 1.8294,
"step": 18000
},
{
"epoch": 0.2,
"learning_rate": 1.400157482699617e-06,
"loss": 1.8009,
"step": 18500
},
{
"epoch": 0.2,
"learning_rate": 1.4380139008767545e-06,
"loss": 1.8019,
"step": 19000
},
{
"epoch": 0.21,
"learning_rate": 1.4758703190538926e-06,
"loss": 1.7872,
"step": 19500
},
{
"epoch": 0.21,
"learning_rate": 1.5137267372310303e-06,
"loss": 1.7596,
"step": 20000
},
{
"epoch": 0.22,
"learning_rate": 1.5515074425718137e-06,
"loss": 1.8034,
"step": 20500
},
{
"epoch": 0.22,
"learning_rate": 1.5893638607489514e-06,
"loss": 1.8041,
"step": 21000
},
{
"epoch": 0.23,
"learning_rate": 1.6272202789260894e-06,
"loss": 1.8091,
"step": 21500
},
{
"epoch": 0.23,
"learning_rate": 1.665076697103227e-06,
"loss": 1.818,
"step": 22000
},
{
"epoch": 0.24,
"learning_rate": 1.7028574024440105e-06,
"loss": 1.7801,
"step": 22500
},
{
"epoch": 0.24,
"learning_rate": 1.7407138206211482e-06,
"loss": 1.798,
"step": 23000
},
{
"epoch": 0.25,
"learning_rate": 1.7785702387982858e-06,
"loss": 1.7964,
"step": 23500
},
{
"epoch": 0.25,
"learning_rate": 1.8164266569754239e-06,
"loss": 1.7742,
"step": 24000
},
{
"epoch": 0.26,
"learning_rate": 1.8542830751525615e-06,
"loss": 1.779,
"step": 24500
},
{
"epoch": 0.26,
"learning_rate": 1.892063780493345e-06,
"loss": 1.7976,
"step": 25000
},
{
"epoch": 0.27,
"learning_rate": 1.9299201986704826e-06,
"loss": 1.8047,
"step": 25500
},
{
"epoch": 0.28,
"learning_rate": 1.9677766168476207e-06,
"loss": 1.7778,
"step": 26000
},
{
"epoch": 0.28,
"learning_rate": 2.0056330350247583e-06,
"loss": 1.7395,
"step": 26500
},
{
"epoch": 0.29,
"learning_rate": 2.043413740365542e-06,
"loss": 1.7978,
"step": 27000
},
{
"epoch": 0.29,
"learning_rate": 2.0812701585426794e-06,
"loss": 1.8077,
"step": 27500
},
{
"epoch": 0.3,
"learning_rate": 2.119126576719817e-06,
"loss": 1.7775,
"step": 28000
},
{
"epoch": 0.3,
"learning_rate": 2.156982994896955e-06,
"loss": 1.7742,
"step": 28500
},
{
"epoch": 0.31,
"learning_rate": 2.194839413074093e-06,
"loss": 1.7911,
"step": 29000
},
{
"epoch": 0.31,
"learning_rate": 2.2326201184148763e-06,
"loss": 1.7489,
"step": 29500
},
{
"epoch": 0.32,
"learning_rate": 2.270476536592014e-06,
"loss": 1.7773,
"step": 30000
},
{
"epoch": 0.32,
"learning_rate": 2.308332954769152e-06,
"loss": 1.7914,
"step": 30500
},
{
"epoch": 0.33,
"learning_rate": 2.3461893729462896e-06,
"loss": 1.7394,
"step": 31000
},
{
"epoch": 0.33,
"learning_rate": 2.3840457911234273e-06,
"loss": 1.7794,
"step": 31500
},
{
"epoch": 0.34,
"learning_rate": 2.4218264964642107e-06,
"loss": 1.7762,
"step": 32000
},
{
"epoch": 0.34,
"learning_rate": 4.985384811630502e-06,
"loss": 1.7527,
"step": 32500
},
{
"epoch": 0.35,
"learning_rate": 4.984931927529385e-06,
"loss": 1.7468,
"step": 33000
},
{
"epoch": 0.36,
"learning_rate": 4.984472154475628e-06,
"loss": 1.7746,
"step": 33500
},
{
"epoch": 0.36,
"learning_rate": 4.984006433938366e-06,
"loss": 1.7635,
"step": 34000
},
{
"epoch": 0.37,
"learning_rate": 4.983532900593762e-06,
"loss": 1.7936,
"step": 34500
},
{
"epoch": 0.37,
"learning_rate": 4.983052482175027e-06,
"loss": 1.7431,
"step": 35000
},
{
"epoch": 0.38,
"learning_rate": 4.98256518001402e-06,
"loss": 1.7891,
"step": 35500
},
{
"epoch": 0.38,
"learning_rate": 4.982070995461684e-06,
"loss": 1.7599,
"step": 36000
},
{
"epoch": 0.39,
"learning_rate": 4.981570938885529e-06,
"loss": 1.7501,
"step": 36500
},
{
"epoch": 0.39,
"learning_rate": 4.9810630074375465e-06,
"loss": 1.7679,
"step": 37000
},
{
"epoch": 0.4,
"learning_rate": 4.980548197762693e-06,
"loss": 1.7317,
"step": 37500
},
{
"epoch": 0.4,
"learning_rate": 4.980026511288172e-06,
"loss": 1.7846,
"step": 38000
},
{
"epoch": 0.41,
"learning_rate": 4.979497949460249e-06,
"loss": 1.7632,
"step": 38500
},
{
"epoch": 0.41,
"learning_rate": 4.9789625137442505e-06,
"loss": 1.759,
"step": 39000
},
{
"epoch": 0.42,
"learning_rate": 4.97842020562456e-06,
"loss": 1.7392,
"step": 39500
},
{
"epoch": 0.42,
"learning_rate": 4.977872131818803e-06,
"loss": 1.768,
"step": 40000
},
{
"epoch": 0.43,
"learning_rate": 4.977316097158305e-06,
"loss": 1.7695,
"step": 40500
},
{
"epoch": 0.43,
"learning_rate": 4.976753194658462e-06,
"loss": 1.7787,
"step": 41000
},
{
"epoch": 0.44,
"learning_rate": 4.976183425879801e-06,
"loss": 1.7533,
"step": 41500
},
{
"epoch": 0.45,
"learning_rate": 4.9756067924018844e-06,
"loss": 1.7311,
"step": 42000
},
{
"epoch": 0.45,
"learning_rate": 4.97502446966477e-06,
"loss": 1.7347,
"step": 42500
},
{
"epoch": 0.46,
"learning_rate": 4.9744341253244985e-06,
"loss": 1.7639,
"step": 43000
},
{
"epoch": 0.46,
"learning_rate": 4.973836921134542e-06,
"loss": 1.7626,
"step": 43500
},
{
"epoch": 0.47,
"learning_rate": 4.97323407371866e-06,
"loss": 1.7603,
"step": 44000
},
{
"epoch": 0.47,
"learning_rate": 4.972623168526572e-06,
"loss": 1.7346,
"step": 44500
},
{
"epoch": 0.48,
"learning_rate": 4.972005408505297e-06,
"loss": 1.7621,
"step": 45000
},
{
"epoch": 0.48,
"learning_rate": 4.971380795367446e-06,
"loss": 1.7651,
"step": 45500
},
{
"epoch": 0.49,
"learning_rate": 4.970749330844626e-06,
"loss": 1.7428,
"step": 46000
},
{
"epoch": 0.49,
"learning_rate": 4.970111016687442e-06,
"loss": 1.7332,
"step": 46500
},
{
"epoch": 0.5,
"learning_rate": 4.9694658546654854e-06,
"loss": 1.7329,
"step": 47000
},
{
"epoch": 0.5,
"learning_rate": 4.968813846567331e-06,
"loss": 1.7552,
"step": 47500
},
{
"epoch": 0.51,
"learning_rate": 4.968154994200538e-06,
"loss": 1.7563,
"step": 48000
},
{
"epoch": 0.51,
"learning_rate": 4.967489299391632e-06,
"loss": 1.7412,
"step": 48500
},
{
"epoch": 0.52,
"learning_rate": 4.966818115882607e-06,
"loss": 1.7054,
"step": 49000
},
{
"epoch": 0.52,
"learning_rate": 4.966138755420534e-06,
"loss": 1.7372,
"step": 49500
},
{
"epoch": 0.53,
"learning_rate": 4.96545255810595e-06,
"loss": 1.7653,
"step": 50000
},
{
"epoch": 0.54,
"learning_rate": 4.964759525841192e-06,
"loss": 1.7633,
"step": 50500
},
{
"epoch": 0.54,
"learning_rate": 4.964059660547548e-06,
"loss": 1.733,
"step": 51000
},
{
"epoch": 0.55,
"learning_rate": 4.963354384374138e-06,
"loss": 1.7221,
"step": 51500
},
{
"epoch": 0.55,
"learning_rate": 4.962640872518639e-06,
"loss": 1.7226,
"step": 52000
},
{
"epoch": 0.56,
"learning_rate": 4.961920533507775e-06,
"loss": 1.7142,
"step": 52500
},
{
"epoch": 0.56,
"learning_rate": 4.961193369338535e-06,
"loss": 1.7193,
"step": 53000
},
{
"epoch": 0.57,
"learning_rate": 4.960460856809599e-06,
"loss": 1.7472,
"step": 53500
},
{
"epoch": 0.57,
"learning_rate": 4.9597200620304265e-06,
"loss": 1.7407,
"step": 54000
},
{
"epoch": 0.58,
"learning_rate": 4.958972448193223e-06,
"loss": 1.741,
"step": 54500
},
{
"epoch": 0.58,
"learning_rate": 4.958218017370592e-06,
"loss": 1.7572,
"step": 55000
},
{
"epoch": 0.59,
"learning_rate": 4.957458300945332e-06,
"loss": 1.7177,
"step": 55500
},
{
"epoch": 0.59,
"learning_rate": 4.9566902560686915e-06,
"loss": 1.7255,
"step": 56000
},
{
"epoch": 0.6,
"learning_rate": 4.955915400533522e-06,
"loss": 1.7255,
"step": 56500
},
{
"epoch": 0.6,
"learning_rate": 4.955133736487948e-06,
"loss": 1.7173,
"step": 57000
},
{
"epoch": 0.61,
"learning_rate": 4.954346849831028e-06,
"loss": 1.7042,
"step": 57500
},
{
"epoch": 0.61,
"learning_rate": 4.953551588890636e-06,
"loss": 1.7207,
"step": 58000
},
{
"epoch": 0.62,
"learning_rate": 4.952749525993012e-06,
"loss": 1.7328,
"step": 58500
},
{
"epoch": 0.63,
"learning_rate": 4.9519406633617066e-06,
"loss": 1.6936,
"step": 59000
},
{
"epoch": 0.63,
"learning_rate": 4.9511266413417645e-06,
"loss": 1.7368,
"step": 59500
},
{
"epoch": 0.64,
"learning_rate": 4.95030419957734e-06,
"loss": 1.7637,
"step": 60000
},
{
"epoch": 0.64,
"learning_rate": 4.9494749648583875e-06,
"loss": 1.7161,
"step": 60500
},
{
"epoch": 0.65,
"learning_rate": 4.94863893948379e-06,
"loss": 1.7069,
"step": 61000
},
{
"epoch": 0.65,
"learning_rate": 4.9477978181718865e-06,
"loss": 1.7084,
"step": 61500
},
{
"epoch": 0.66,
"learning_rate": 4.946949937970748e-06,
"loss": 1.7007,
"step": 62000
},
{
"epoch": 0.66,
"learning_rate": 4.946093581740578e-06,
"loss": 1.714,
"step": 62500
},
{
"epoch": 0.67,
"learning_rate": 4.9452304442289365e-06,
"loss": 1.7228,
"step": 63000
},
{
"epoch": 0.67,
"learning_rate": 4.944360527828692e-06,
"loss": 1.7279,
"step": 63500
},
{
"epoch": 0.68,
"learning_rate": 4.943483834951505e-06,
"loss": 1.7026,
"step": 64000
},
{
"epoch": 0.68,
"learning_rate": 4.942602141720543e-06,
"loss": 1.7094,
"step": 64500
},
{
"epoch": 0.69,
"learning_rate": 4.941711916740329e-06,
"loss": 1.7237,
"step": 65000
},
{
"epoch": 0.69,
"learning_rate": 4.940814922625891e-06,
"loss": 1.7269,
"step": 65500
},
{
"epoch": 0.7,
"learning_rate": 4.9399111618639576e-06,
"loss": 1.7229,
"step": 66000
},
{
"epoch": 0.47,
"learning_rate": 4.972827756988442e-06,
"loss": 1.5635,
"step": 66500
},
{
"epoch": 0.47,
"learning_rate": 4.972418204632646e-06,
"loss": 1.6282,
"step": 67000
},
{
"epoch": 0.48,
"learning_rate": 4.972006434197012e-06,
"loss": 1.6165,
"step": 67500
},
{
"epoch": 0.48,
"learning_rate": 4.971590795806552e-06,
"loss": 1.5994,
"step": 68000
},
{
"epoch": 0.48,
"learning_rate": 4.971172952524728e-06,
"loss": 1.6169,
"step": 68500
},
{
"epoch": 0.49,
"learning_rate": 4.970751230143974e-06,
"loss": 1.5898,
"step": 69000
},
{
"epoch": 0.49,
"learning_rate": 4.970326463500062e-06,
"loss": 1.5827,
"step": 69500
},
{
"epoch": 0.49,
"learning_rate": 4.969898653116354e-06,
"loss": 1.5732,
"step": 70000
},
{
"epoch": 0.5,
"learning_rate": 4.969467799519966e-06,
"loss": 1.5965,
"step": 70500
},
{
"epoch": 0.5,
"learning_rate": 4.969033903241761e-06,
"loss": 1.6081,
"step": 71000
},
{
"epoch": 0.51,
"learning_rate": 4.9685969648163515e-06,
"loss": 1.5901,
"step": 71500
},
{
"epoch": 0.51,
"learning_rate": 4.968156984782098e-06,
"loss": 1.6251,
"step": 72000
},
{
"epoch": 0.51,
"learning_rate": 4.9677139636811075e-06,
"loss": 1.5917,
"step": 72500
},
{
"epoch": 0.52,
"learning_rate": 4.967267902059239e-06,
"loss": 1.5778,
"step": 73000
},
{
"epoch": 0.52,
"learning_rate": 4.966818800466091e-06,
"loss": 1.5604,
"step": 73500
},
{
"epoch": 0.52,
"learning_rate": 4.9663666594550116e-06,
"loss": 1.6055,
"step": 74000
},
{
"epoch": 0.53,
"learning_rate": 4.965912392975248e-06,
"loss": 1.5942,
"step": 74500
},
{
"epoch": 0.53,
"learning_rate": 4.9654541808793646e-06,
"loss": 1.6109,
"step": 75000
},
{
"epoch": 0.53,
"learning_rate": 4.964992931046925e-06,
"loss": 1.6153,
"step": 75500
},
{
"epoch": 0.54,
"learning_rate": 4.964528644046248e-06,
"loss": 1.6005,
"step": 76000
},
{
"epoch": 0.54,
"learning_rate": 4.964062258126721e-06,
"loss": 1.597,
"step": 76500
},
{
"epoch": 0.54,
"learning_rate": 4.963591904580939e-06,
"loss": 1.6027,
"step": 77000
},
{
"epoch": 0.55,
"learning_rate": 4.96311851559315e-06,
"loss": 1.5652,
"step": 77500
},
{
"epoch": 0.55,
"learning_rate": 4.962642091746627e-06,
"loss": 1.5838,
"step": 78000
},
{
"epoch": 0.55,
"learning_rate": 4.962162633628382e-06,
"loss": 1.5835,
"step": 78500
},
{
"epoch": 0.56,
"learning_rate": 4.961681109839982e-06,
"loss": 1.5735,
"step": 79000
},
{
"epoch": 0.56,
"learning_rate": 4.961195591019855e-06,
"loss": 1.5813,
"step": 79500
},
{
"epoch": 0.57,
"learning_rate": 4.96070703971027e-06,
"loss": 1.6206,
"step": 80000
},
{
"epoch": 0.57,
"learning_rate": 4.960215456513178e-06,
"loss": 1.5827,
"step": 80500
},
{
"epoch": 0.57,
"learning_rate": 4.959721834288042e-06,
"loss": 1.6156,
"step": 81000
},
{
"epoch": 0.58,
"learning_rate": 4.959224195197477e-06,
"loss": 1.5982,
"step": 81500
},
{
"epoch": 0.58,
"learning_rate": 4.95872352604645e-06,
"loss": 1.6165,
"step": 82000
},
{
"epoch": 0.58,
"learning_rate": 4.958219827451845e-06,
"loss": 1.6277,
"step": 82500
},
{
"epoch": 0.59,
"learning_rate": 4.957714116511465e-06,
"loss": 1.5929,
"step": 83000
},
{
"epoch": 0.59,
"learning_rate": 4.9572043669510584e-06,
"loss": 1.5988,
"step": 83500
},
{
"epoch": 0.59,
"learning_rate": 4.956691589818862e-06,
"loss": 1.5717,
"step": 84000
},
{
"epoch": 0.6,
"learning_rate": 4.956175785746677e-06,
"loss": 1.5993,
"step": 84500
},
{
"epoch": 0.6,
"learning_rate": 4.955657996050618e-06,
"loss": 1.5842,
"step": 85000
},
{
"epoch": 0.6,
"learning_rate": 4.955136146059475e-06,
"loss": 1.5906,
"step": 85500
},
{
"epoch": 0.61,
"learning_rate": 4.954611271044838e-06,
"loss": 1.5686,
"step": 86000
},
{
"epoch": 0.61,
"learning_rate": 4.954083371653418e-06,
"loss": 1.59,
"step": 86500
},
{
"epoch": 0.61,
"learning_rate": 4.9535535133991276e-06,
"loss": 1.5915,
"step": 87000
},
{
"epoch": 0.62,
"learning_rate": 4.953019573254659e-06,
"loss": 1.5991,
"step": 87500
},
{
"epoch": 0.62,
"learning_rate": 4.95248261069457e-06,
"loss": 1.6045,
"step": 88000
},
{
"epoch": 0.63,
"learning_rate": 4.951942626380462e-06,
"loss": 1.5585,
"step": 88500
},
{
"epoch": 0.63,
"learning_rate": 4.951399620977662e-06,
"loss": 1.6104,
"step": 89000
},
{
"epoch": 0.63,
"learning_rate": 4.950854690220794e-06,
"loss": 1.6091,
"step": 89500
},
{
"epoch": 0.64,
"learning_rate": 4.9503056506902935e-06,
"loss": 1.6397,
"step": 90000
},
{
"epoch": 0.64,
"learning_rate": 4.9497535920880514e-06,
"loss": 1.5889,
"step": 90500
},
{
"epoch": 0.64,
"learning_rate": 4.949198515094271e-06,
"loss": 1.5822,
"step": 91000
},
{
"epoch": 0.65,
"learning_rate": 4.948641539593493e-06,
"loss": 1.5949,
"step": 91500
},
{
"epoch": 0.65,
"learning_rate": 4.948081559127373e-06,
"loss": 1.573,
"step": 92000
},
{
"epoch": 0.65,
"learning_rate": 4.947517443141312e-06,
"loss": 1.5856,
"step": 92500
},
{
"epoch": 0.66,
"learning_rate": 4.946950311518915e-06,
"loss": 1.6034,
"step": 93000
},
{
"epoch": 0.66,
"learning_rate": 4.94638016495896e-06,
"loss": 1.5941,
"step": 93500
},
{
"epoch": 0.66,
"learning_rate": 4.945807004163935e-06,
"loss": 1.5872,
"step": 94000
},
{
"epoch": 0.67,
"learning_rate": 4.945230829840042e-06,
"loss": 1.604,
"step": 94500
},
{
"epoch": 0.67,
"learning_rate": 4.944651642697199e-06,
"loss": 1.5954,
"step": 95000
},
{
"epoch": 0.67,
"learning_rate": 4.944069443449033e-06,
"loss": 1.6117,
"step": 95500
},
{
"epoch": 0.68,
"learning_rate": 4.943485406239044e-06,
"loss": 1.5726,
"step": 96000
},
{
"epoch": 0.68,
"learning_rate": 4.942897190956574e-06,
"loss": 1.5713,
"step": 96500
},
{
"epoch": 0.69,
"learning_rate": 4.9423059657304774e-06,
"loss": 1.5863,
"step": 97000
},
{
"epoch": 0.69,
"learning_rate": 4.941711731289216e-06,
"loss": 1.6008,
"step": 97500
},
{
"epoch": 0.69,
"learning_rate": 4.941115685852782e-06,
"loss": 1.609,
"step": 98000
},
{
"epoch": 0.7,
"learning_rate": 4.9405154411961575e-06,
"loss": 1.6016,
"step": 98500
},
{
"epoch": 0.7,
"learning_rate": 4.939913399034342e-06,
"loss": 1.5992,
"step": 99000
},
{
"epoch": 0.7,
"learning_rate": 4.939307147114736e-06,
"loss": 1.6742,
"step": 99500
},
{
"epoch": 0.71,
"learning_rate": 4.938697889674869e-06,
"loss": 1.689,
"step": 100000
},
{
"epoch": 0.71,
"learning_rate": 4.938085627465422e-06,
"loss": 1.6779,
"step": 100500
},
{
"epoch": 0.71,
"learning_rate": 4.937470361240773e-06,
"loss": 1.694,
"step": 101000
},
{
"epoch": 0.72,
"learning_rate": 4.936852091759006e-06,
"loss": 1.6757,
"step": 101500
},
{
"epoch": 0.72,
"learning_rate": 4.936230819781902e-06,
"loss": 1.7025,
"step": 102000
},
{
"epoch": 0.72,
"learning_rate": 4.935606546074945e-06,
"loss": 1.6897,
"step": 102500
},
{
"epoch": 0.73,
"learning_rate": 4.934980528951094e-06,
"loss": 1.7087,
"step": 103000
},
{
"epoch": 0.73,
"learning_rate": 4.9343502600952685e-06,
"loss": 1.6645,
"step": 103500
},
{
"epoch": 0.73,
"learning_rate": 4.9337169918266646e-06,
"loss": 1.6942,
"step": 104000
},
{
"epoch": 0.74,
"learning_rate": 4.9330807249255455e-06,
"loss": 1.6854,
"step": 104500
},
{
"epoch": 0.74,
"learning_rate": 4.932442741696698e-06,
"loss": 1.6781,
"step": 105000
},
{
"epoch": 0.75,
"learning_rate": 4.931800485879448e-06,
"loss": 1.6731,
"step": 105500
},
{
"epoch": 0.75,
"learning_rate": 4.931155233791048e-06,
"loss": 1.6978,
"step": 106000
},
{
"epoch": 0.75,
"learning_rate": 4.930506986226528e-06,
"loss": 1.6978,
"step": 106500
},
{
"epoch": 0.76,
"learning_rate": 4.929857049457245e-06,
"loss": 1.6624,
"step": 107000
},
{
"epoch": 0.76,
"learning_rate": 4.929202819327277e-06,
"loss": 1.6721,
"step": 107500
},
{
"epoch": 0.76,
"learning_rate": 4.9285455961267984e-06,
"loss": 1.6889,
"step": 108000
},
{
"epoch": 0.77,
"learning_rate": 4.927886704082245e-06,
"loss": 1.7113,
"step": 108500
},
{
"epoch": 0.77,
"learning_rate": 4.927223503155845e-06,
"loss": 1.7074,
"step": 109000
},
{
"epoch": 0.77,
"learning_rate": 4.92655731159769e-06,
"loss": 1.6816,
"step": 109500
},
{
"epoch": 0.78,
"learning_rate": 4.925889471574631e-06,
"loss": 1.6795,
"step": 110000
},
{
"epoch": 0.78,
"learning_rate": 4.925217307196282e-06,
"loss": 1.7386,
"step": 110500
},
{
"epoch": 0.78,
"learning_rate": 4.924542154658053e-06,
"loss": 1.6714,
"step": 111000
},
{
"epoch": 0.79,
"learning_rate": 4.9238640147918145e-06,
"loss": 1.699,
"step": 111500
},
{
"epoch": 0.79,
"learning_rate": 4.9231828884331145e-06,
"loss": 1.6785,
"step": 112000
},
{
"epoch": 0.79,
"learning_rate": 4.9224987764211826e-06,
"loss": 1.7445,
"step": 112500
},
{
"epoch": 0.8,
"learning_rate": 4.92181167959893e-06,
"loss": 1.6694,
"step": 113000
},
{
"epoch": 0.8,
"learning_rate": 4.92112159881294e-06,
"loss": 1.715,
"step": 113500
},
{
"epoch": 0.81,
"learning_rate": 4.920428534913476e-06,
"loss": 1.6984,
"step": 114000
},
{
"epoch": 0.81,
"learning_rate": 4.919732488754477e-06,
"loss": 1.6853,
"step": 114500
},
{
"epoch": 0.81,
"learning_rate": 4.919033461193554e-06,
"loss": 1.6866,
"step": 115000
},
{
"epoch": 0.82,
"learning_rate": 4.918331453091995e-06,
"loss": 1.6843,
"step": 115500
},
{
"epoch": 0.82,
"learning_rate": 4.917627878263452e-06,
"loss": 1.6953,
"step": 116000
},
{
"epoch": 0.82,
"learning_rate": 4.916919917635911e-06,
"loss": 1.7,
"step": 116500
},
{
"epoch": 0.83,
"learning_rate": 4.916208979071873e-06,
"loss": 1.6819,
"step": 117000
},
{
"epoch": 0.83,
"learning_rate": 4.915495063447302e-06,
"loss": 1.6938,
"step": 117500
},
{
"epoch": 0.83,
"learning_rate": 4.914779608395079e-06,
"loss": 1.6556,
"step": 118000
},
{
"epoch": 0.84,
"learning_rate": 4.914059747241706e-06,
"loss": 1.674,
"step": 118500
},
{
"epoch": 0.84,
"learning_rate": 4.913336911675913e-06,
"loss": 1.6958,
"step": 119000
},
{
"epoch": 0.84,
"learning_rate": 4.912611102588316e-06,
"loss": 1.6646,
"step": 119500
},
{
"epoch": 0.85,
"learning_rate": 4.9118823208732035e-06,
"loss": 1.6849,
"step": 120000
},
{
"epoch": 0.85,
"learning_rate": 4.91115350036079e-06,
"loss": 1.6699,
"step": 120500
},
{
"epoch": 0.86,
"learning_rate": 4.9104187879696555e-06,
"loss": 1.6576,
"step": 121000
},
{
"epoch": 0.86,
"learning_rate": 4.9096811056521994e-06,
"loss": 1.663,
"step": 121500
},
{
"epoch": 0.86,
"learning_rate": 4.908940454317336e-06,
"loss": 1.6596,
"step": 122000
},
{
"epoch": 0.87,
"learning_rate": 4.9081968348776354e-06,
"loss": 1.6924,
"step": 122500
},
{
"epoch": 0.87,
"learning_rate": 4.9074502482493275e-06,
"loss": 1.7073,
"step": 123000
},
{
"epoch": 0.87,
"learning_rate": 4.906700695352297e-06,
"loss": 1.6862,
"step": 123500
},
{
"epoch": 0.88,
"learning_rate": 4.905948177110082e-06,
"loss": 1.6563,
"step": 124000
},
{
"epoch": 0.88,
"learning_rate": 4.905194208373068e-06,
"loss": 1.6589,
"step": 124500
},
{
"epoch": 0.88,
"learning_rate": 4.904435768151762e-06,
"loss": 1.6651,
"step": 125000
},
{
"epoch": 0.89,
"learning_rate": 4.903674365375935e-06,
"loss": 1.6919,
"step": 125500
},
{
"epoch": 0.89,
"learning_rate": 4.902910000983726e-06,
"loss": 1.6779,
"step": 126000
},
{
"epoch": 0.89,
"learning_rate": 4.902144213521186e-06,
"loss": 1.6898,
"step": 126500
},
{
"epoch": 0.9,
"learning_rate": 4.901373934643746e-06,
"loss": 1.7132,
"step": 127000
},
{
"epoch": 0.9,
"learning_rate": 4.900600696984332e-06,
"loss": 1.6836,
"step": 127500
},
{
"epoch": 0.9,
"learning_rate": 4.899824501495667e-06,
"loss": 1.6751,
"step": 128000
},
{
"epoch": 0.91,
"learning_rate": 4.899046910389163e-06,
"loss": 1.6768,
"step": 128500
},
{
"epoch": 0.91,
"learning_rate": 4.8982648080256045e-06,
"loss": 1.6758,
"step": 129000
},
{
"epoch": 0.92,
"learning_rate": 4.897479750710892e-06,
"loss": 1.6593,
"step": 129500
},
{
"epoch": 0.92,
"learning_rate": 4.89669173941231e-06,
"loss": 1.6927,
"step": 130000
},
{
"epoch": 0.92,
"learning_rate": 4.895902359975868e-06,
"loss": 1.6926,
"step": 130500
},
{
"epoch": 0.93,
"learning_rate": 4.895108449529063e-06,
"loss": 1.6771,
"step": 131000
},
{
"epoch": 0.93,
"learning_rate": 4.894311588020119e-06,
"loss": 1.6277,
"step": 131500
},
{
"epoch": 0.93,
"learning_rate": 4.893511776430863e-06,
"loss": 1.6672,
"step": 132000
},
{
"epoch": 0.94,
"learning_rate": 4.892709015746761e-06,
"loss": 1.6937,
"step": 132500
},
{
"epoch": 0.94,
"learning_rate": 4.89190492131604e-06,
"loss": 1.676,
"step": 133000
},
{
"epoch": 0.94,
"learning_rate": 4.891096271306403e-06,
"loss": 1.6808,
"step": 133500
},
{
"epoch": 0.95,
"learning_rate": 4.890284675178114e-06,
"loss": 1.6601,
"step": 134000
},
{
"epoch": 0.95,
"learning_rate": 4.8894701339311555e-06,
"loss": 1.6762,
"step": 134500
},
{
"epoch": 0.95,
"learning_rate": 4.888654286477422e-06,
"loss": 1.6826,
"step": 135000
},
{
"epoch": 0.96,
"learning_rate": 4.8878338638927996e-06,
"loss": 1.649,
"step": 135500
},
{
"epoch": 0.96,
"learning_rate": 4.887010499209203e-06,
"loss": 1.677,
"step": 136000
},
{
"epoch": 3.62,
"learning_rate": 4.983889443816445e-06,
"loss": 1.5933,
"step": 136500
},
{
"epoch": 3.63,
"learning_rate": 4.983771280259433e-06,
"loss": 1.5906,
"step": 137000
},
{
"epoch": 3.64,
"learning_rate": 4.983652686357577e-06,
"loss": 1.5919,
"step": 137500
},
{
"epoch": 3.66,
"learning_rate": 4.983533662131423e-06,
"loss": 1.5722,
"step": 138000
},
{
"epoch": 3.67,
"learning_rate": 4.983414207601596e-06,
"loss": 1.5707,
"step": 138500
},
{
"epoch": 3.68,
"learning_rate": 4.98329432278879e-06,
"loss": 1.5884,
"step": 139000
},
{
"epoch": 3.7,
"learning_rate": 4.983174007713778e-06,
"loss": 1.5945,
"step": 139500
},
{
"epoch": 3.71,
"learning_rate": 4.983053262397406e-06,
"loss": 1.5783,
"step": 140000
},
{
"epoch": 3.72,
"learning_rate": 4.982932329641013e-06,
"loss": 1.5702,
"step": 140500
},
{
"epoch": 3.74,
"learning_rate": 4.9828109684042125e-06,
"loss": 1.5809,
"step": 141000
},
{
"epoch": 3.75,
"learning_rate": 4.9826889342102535e-06,
"loss": 1.5836,
"step": 141500
},
{
"epoch": 3.76,
"learning_rate": 4.982566715216966e-06,
"loss": 1.5774,
"step": 142000
},
{
"epoch": 3.78,
"learning_rate": 4.982443821589845e-06,
"loss": 1.568,
"step": 142500
},
{
"epoch": 3.79,
"learning_rate": 4.98232049784788e-06,
"loss": 1.5779,
"step": 143000
},
{
"epoch": 3.8,
"learning_rate": 4.9821967440124365e-06,
"loss": 1.5926,
"step": 143500
},
{
"epoch": 3.82,
"learning_rate": 4.9820725601049555e-06,
"loss": 1.5753,
"step": 144000
},
{
"epoch": 3.83,
"learning_rate": 4.981947946146954e-06,
"loss": 1.575,
"step": 144500
},
{
"epoch": 3.84,
"learning_rate": 4.981822902160024e-06,
"loss": 1.5769,
"step": 145000
},
{
"epoch": 3.86,
"learning_rate": 4.981697428165829e-06,
"loss": 1.5729,
"step": 145500
},
{
"epoch": 3.87,
"learning_rate": 4.98157152418611e-06,
"loss": 1.5922,
"step": 146000
},
{
"epoch": 3.88,
"learning_rate": 4.981445190242682e-06,
"loss": 1.5819,
"step": 146500
},
{
"epoch": 3.9,
"learning_rate": 4.981318426357433e-06,
"loss": 1.5708,
"step": 147000
},
{
"epoch": 3.91,
"learning_rate": 4.981191232552328e-06,
"loss": 1.5657,
"step": 147500
},
{
"epoch": 3.92,
"learning_rate": 4.981063608849403e-06,
"loss": 1.5651,
"step": 148000
},
{
"epoch": 3.94,
"learning_rate": 4.98093581180693e-06,
"loss": 1.5693,
"step": 148500
},
{
"epoch": 3.95,
"learning_rate": 4.9808073292344626e-06,
"loss": 1.5734,
"step": 149000
},
{
"epoch": 3.96,
"learning_rate": 4.980678416830693e-06,
"loss": 1.5789,
"step": 149500
},
{
"epoch": 3.97,
"learning_rate": 4.980549074617956e-06,
"loss": 1.5766,
"step": 150000
},
{
"epoch": 3.99,
"learning_rate": 4.980419562591572e-06,
"loss": 1.5693,
"step": 150500
},
{
"epoch": 4.0,
"learning_rate": 4.980289361687711e-06,
"loss": 1.5845,
"step": 151000
},
{
"epoch": 4.01,
"learning_rate": 4.980158731042292e-06,
"loss": 1.5355,
"step": 151500
},
{
"epoch": 4.03,
"learning_rate": 4.980027670677947e-06,
"loss": 1.5572,
"step": 152000
},
{
"epoch": 4.04,
"learning_rate": 4.979896180617384e-06,
"loss": 1.5396,
"step": 152500
},
{
"epoch": 4.05,
"learning_rate": 4.979764525151654e-06,
"loss": 1.5605,
"step": 153000
},
{
"epoch": 4.07,
"learning_rate": 4.979632176626357e-06,
"loss": 1.5431,
"step": 153500
},
{
"epoch": 4.08,
"learning_rate": 4.979499398473366e-06,
"loss": 1.5378,
"step": 154000
},
{
"epoch": 4.09,
"learning_rate": 4.979366190715688e-06,
"loss": 1.536,
"step": 154500
},
{
"epoch": 4.11,
"learning_rate": 4.979232553376403e-06,
"loss": 1.5316,
"step": 155000
},
{
"epoch": 4.12,
"learning_rate": 4.979098486478665e-06,
"loss": 1.5367,
"step": 155500
},
{
"epoch": 4.13,
"learning_rate": 4.978963990045702e-06,
"loss": 1.5489,
"step": 156000
},
{
"epoch": 4.15,
"learning_rate": 4.978829334381346e-06,
"loss": 1.5518,
"step": 156500
},
{
"epoch": 4.16,
"learning_rate": 4.9786939798068715e-06,
"loss": 1.5375,
"step": 157000
},
{
"epoch": 4.17,
"learning_rate": 4.978558195767259e-06,
"loss": 1.5403,
"step": 157500
},
{
"epoch": 4.19,
"learning_rate": 4.9784219822860345e-06,
"loss": 1.5407,
"step": 158000
},
{
"epoch": 4.2,
"learning_rate": 4.9782856131011405e-06,
"loss": 1.5539,
"step": 158500
},
{
"epoch": 4.21,
"learning_rate": 4.9781485416663325e-06,
"loss": 1.5269,
"step": 159000
},
{
"epoch": 4.23,
"learning_rate": 4.97801104086089e-06,
"loss": 1.5315,
"step": 159500
},
{
"epoch": 4.24,
"learning_rate": 4.977873110708638e-06,
"loss": 1.542,
"step": 160000
},
{
"epoch": 4.25,
"learning_rate": 4.977735028380872e-06,
"loss": 1.5212,
"step": 160500
},
{
"epoch": 4.27,
"learning_rate": 4.977596240465341e-06,
"loss": 1.5384,
"step": 161000
},
{
"epoch": 4.28,
"learning_rate": 4.977457023274869e-06,
"loss": 1.5483,
"step": 161500
},
{
"epoch": 4.29,
"learning_rate": 4.977317376833576e-06,
"loss": 1.5383,
"step": 162000
},
{
"epoch": 4.31,
"learning_rate": 4.9771773011656586e-06,
"loss": 1.537,
"step": 162500
},
{
"epoch": 4.32,
"learning_rate": 4.977036796295387e-06,
"loss": 1.5508,
"step": 163000
},
{
"epoch": 4.33,
"learning_rate": 4.976896144543505e-06,
"loss": 1.5501,
"step": 163500
},
{
"epoch": 4.35,
"learning_rate": 4.976754782199913e-06,
"loss": 1.5394,
"step": 164000
},
{
"epoch": 4.36,
"learning_rate": 4.976612990727174e-06,
"loss": 1.5345,
"step": 164500
},
{
"epoch": 4.37,
"learning_rate": 4.976470770149855e-06,
"loss": 1.5118,
"step": 165000
},
{
"epoch": 4.39,
"learning_rate": 4.976328120492595e-06,
"loss": 1.5261,
"step": 165500
},
{
"epoch": 4.4,
"learning_rate": 4.976185328365718e-06,
"loss": 1.5453,
"step": 166000
},
{
"epoch": 4.41,
"learning_rate": 4.976041821480838e-06,
"loss": 1.5373,
"step": 166500
},
{
"epoch": 4.43,
"learning_rate": 4.97589788559034e-06,
"loss": 1.559,
"step": 167000
},
{
"epoch": 4.44,
"learning_rate": 4.975753520719162e-06,
"loss": 1.5539,
"step": 167500
},
{
"epoch": 4.45,
"learning_rate": 4.975608726892317e-06,
"loss": 1.5642,
"step": 168000
},
{
"epoch": 4.47,
"learning_rate": 4.975463504134893e-06,
"loss": 1.5403,
"step": 168500
},
{
"epoch": 4.48,
"learning_rate": 4.975317852472051e-06,
"loss": 1.5434,
"step": 169000
},
{
"epoch": 4.49,
"learning_rate": 4.975171771929028e-06,
"loss": 1.5272,
"step": 169500
},
{
"epoch": 4.5,
"learning_rate": 4.97502555597791e-06,
"loss": 1.5326,
"step": 170000
},
{
"epoch": 4.52,
"learning_rate": 4.974878912910856e-06,
"loss": 1.5426,
"step": 170500
},
{
"epoch": 4.53,
"learning_rate": 4.974731547594612e-06,
"loss": 1.5456,
"step": 171000
},
{
"epoch": 4.54,
"learning_rate": 4.974583753499772e-06,
"loss": 1.5501,
"step": 171500
},
{
"epoch": 4.56,
"learning_rate": 4.974435530651941e-06,
"loss": 1.5401,
"step": 172000
},
{
"epoch": 4.57,
"learning_rate": 4.9742868790768025e-06,
"loss": 1.5446,
"step": 172500
},
{
"epoch": 4.58,
"learning_rate": 4.974138097388491e-06,
"loss": 1.5262,
"step": 173000
},
{
"epoch": 4.6,
"learning_rate": 4.973988888737395e-06,
"loss": 1.5426,
"step": 173500
},
{
"epoch": 4.61,
"learning_rate": 4.973838952849711e-06,
"loss": 1.5344,
"step": 174000
},
{
"epoch": 4.62,
"learning_rate": 4.973688588338083e-06,
"loss": 1.543,
"step": 174500
},
{
"epoch": 4.64,
"learning_rate": 4.973537795228565e-06,
"loss": 1.5278,
"step": 175000
},
{
"epoch": 4.65,
"learning_rate": 4.9733865735472815e-06,
"loss": 1.5355,
"step": 175500
},
{
"epoch": 4.66,
"learning_rate": 4.973234923320436e-06,
"loss": 1.5363,
"step": 176000
},
{
"epoch": 4.68,
"learning_rate": 4.973082844574302e-06,
"loss": 1.5507,
"step": 176500
},
{
"epoch": 4.69,
"learning_rate": 4.97293033733523e-06,
"loss": 1.5343,
"step": 177000
},
{
"epoch": 4.7,
"learning_rate": 4.972777401629643e-06,
"loss": 1.5456,
"step": 177500
},
{
"epoch": 4.72,
"learning_rate": 4.972624037484041e-06,
"loss": 1.5407,
"step": 178000
},
{
"epoch": 4.73,
"learning_rate": 4.97247055293765e-06,
"loss": 1.5362,
"step": 178500
},
{
"epoch": 4.74,
"learning_rate": 4.9723163328485536e-06,
"loss": 1.5474,
"step": 179000
},
{
"epoch": 4.76,
"learning_rate": 4.972161684399326e-06,
"loss": 1.5364,
"step": 179500
},
{
"epoch": 4.77,
"learning_rate": 4.972006607616762e-06,
"loss": 1.5343,
"step": 180000
},
{
"epoch": 4.78,
"learning_rate": 4.971851413965343e-06,
"loss": 1.5308,
"step": 180500
},
{
"epoch": 4.8,
"learning_rate": 4.971695481453321e-06,
"loss": 1.5394,
"step": 181000
},
{
"epoch": 4.81,
"learning_rate": 4.9715391206887405e-06,
"loss": 1.5341,
"step": 181500
},
{
"epoch": 4.82,
"learning_rate": 4.971382331698691e-06,
"loss": 1.5283,
"step": 182000
},
{
"epoch": 4.84,
"learning_rate": 4.971225114510338e-06,
"loss": 1.5497,
"step": 182500
},
{
"epoch": 4.85,
"learning_rate": 4.971067469150923e-06,
"loss": 1.5276,
"step": 183000
},
{
"epoch": 4.86,
"learning_rate": 4.970909395647759e-06,
"loss": 1.5338,
"step": 183500
},
{
"epoch": 4.88,
"learning_rate": 4.970751211458715e-06,
"loss": 1.5269,
"step": 184000
},
{
"epoch": 4.89,
"learning_rate": 4.970592282606443e-06,
"loss": 1.5278,
"step": 184500
},
{
"epoch": 4.9,
"learning_rate": 4.9704329256927524e-06,
"loss": 1.5371,
"step": 185000
},
{
"epoch": 4.92,
"learning_rate": 4.970273140745256e-06,
"loss": 1.5146,
"step": 185500
},
{
"epoch": 4.93,
"learning_rate": 4.97011292779164e-06,
"loss": 1.5309,
"step": 186000
},
{
"epoch": 4.94,
"learning_rate": 4.9699526085686275e-06,
"loss": 1.5257,
"step": 186500
},
{
"epoch": 4.96,
"learning_rate": 4.9697915405419915e-06,
"loss": 1.5359,
"step": 187000
},
{
"epoch": 4.97,
"learning_rate": 4.969630044592678e-06,
"loss": 1.5429,
"step": 187500
},
{
"epoch": 4.98,
"learning_rate": 4.969468120748667e-06,
"loss": 1.5167,
"step": 188000
},
{
"epoch": 5.0,
"learning_rate": 4.969305769038015e-06,
"loss": 1.5503,
"step": 188500
},
{
"epoch": 5.01,
"learning_rate": 4.969143315474913e-06,
"loss": 1.4932,
"step": 189000
},
{
"epoch": 5.02,
"learning_rate": 4.968980435810976e-06,
"loss": 1.4474,
"step": 189500
},
{
"epoch": 5.03,
"learning_rate": 4.968816802380544e-06,
"loss": 1.4653,
"step": 190000
},
{
"epoch": 5.05,
"learning_rate": 4.96865274119632e-06,
"loss": 1.4524,
"step": 190500
},
{
"epoch": 5.06,
"learning_rate": 4.968488252286729e-06,
"loss": 1.4792,
"step": 191000
},
{
"epoch": 5.07,
"learning_rate": 4.968323335680272e-06,
"loss": 1.4806,
"step": 191500
},
{
"epoch": 5.09,
"learning_rate": 4.968157991405521e-06,
"loss": 1.4716,
"step": 192000
},
{
"epoch": 5.1,
"learning_rate": 4.967992219491125e-06,
"loss": 1.4671,
"step": 192500
},
{
"epoch": 5.11,
"learning_rate": 4.967826019965807e-06,
"loss": 1.4659,
"step": 193000
},
{
"epoch": 5.13,
"learning_rate": 4.967659392858362e-06,
"loss": 1.4676,
"step": 193500
},
{
"epoch": 5.14,
"learning_rate": 4.96749233819766e-06,
"loss": 1.4606,
"step": 194000
},
{
"epoch": 5.15,
"learning_rate": 4.967324856012646e-06,
"loss": 1.4769,
"step": 194500
},
{
"epoch": 5.17,
"learning_rate": 4.967156946332338e-06,
"loss": 1.4657,
"step": 195000
},
{
"epoch": 5.18,
"learning_rate": 4.9669889462867134e-06,
"loss": 1.4741,
"step": 195500
},
{
"epoch": 5.19,
"learning_rate": 4.966820182558015e-06,
"loss": 1.4726,
"step": 196000
},
{
"epoch": 5.21,
"learning_rate": 4.966650991421464e-06,
"loss": 1.452,
"step": 196500
},
{
"epoch": 5.22,
"learning_rate": 4.966481372906374e-06,
"loss": 1.4527,
"step": 197000
},
{
"epoch": 5.23,
"learning_rate": 4.966311667560339e-06,
"loss": 1.4611,
"step": 197500
},
{
"epoch": 5.25,
"learning_rate": 4.966141195231022e-06,
"loss": 1.4572,
"step": 198000
},
{
"epoch": 5.26,
"learning_rate": 4.965970637837151e-06,
"loss": 1.4513,
"step": 198500
},
{
"epoch": 5.27,
"learning_rate": 4.965799311811517e-06,
"loss": 1.4561,
"step": 199000
},
{
"epoch": 5.29,
"learning_rate": 4.9656275585549105e-06,
"loss": 1.4485,
"step": 199500
},
{
"epoch": 5.3,
"learning_rate": 4.9654553780970885e-06,
"loss": 1.4565,
"step": 200000
},
{
"epoch": 5.31,
"learning_rate": 4.965282770467883e-06,
"loss": 1.4488,
"step": 200500
},
{
"epoch": 5.33,
"learning_rate": 4.965109735697201e-06,
"loss": 1.4566,
"step": 201000
},
{
"epoch": 5.34,
"learning_rate": 4.964936273815022e-06,
"loss": 1.4779,
"step": 201500
},
{
"epoch": 5.35,
"learning_rate": 4.964762384851403e-06,
"loss": 1.4778,
"step": 202000
},
{
"epoch": 5.37,
"learning_rate": 4.96458806883647e-06,
"loss": 1.4882,
"step": 202500
},
{
"epoch": 5.38,
"learning_rate": 4.964414025623156e-06,
"loss": 1.4705,
"step": 203000
},
{
"epoch": 5.39,
"learning_rate": 4.964238857304181e-06,
"loss": 1.4777,
"step": 203500
},
{
"epoch": 5.41,
"learning_rate": 4.964063262024601e-06,
"loss": 1.4534,
"step": 204000
},
{
"epoch": 5.42,
"learning_rate": 4.963887239814839e-06,
"loss": 1.4618,
"step": 204500
},
{
"epoch": 5.43,
"learning_rate": 4.963710790705395e-06,
"loss": 1.4535,
"step": 205000
},
{
"epoch": 5.45,
"learning_rate": 4.963533914726838e-06,
"loss": 1.4756,
"step": 205500
},
{
"epoch": 5.46,
"learning_rate": 4.963356966941416e-06,
"loss": 1.4723,
"step": 206000
},
{
"epoch": 5.47,
"learning_rate": 4.963179238170235e-06,
"loss": 1.4755,
"step": 206500
},
{
"epoch": 5.49,
"learning_rate": 4.96300108262204e-06,
"loss": 1.45,
"step": 207000
},
{
"epoch": 5.5,
"learning_rate": 4.9628225003277e-06,
"loss": 1.4747,
"step": 207500
},
{
"epoch": 5.51,
"learning_rate": 4.9626434913181575e-06,
"loss": 1.4682,
"step": 208000
},
{
"epoch": 5.53,
"learning_rate": 4.962464055624426e-06,
"loss": 1.4798,
"step": 208500
},
{
"epoch": 5.54,
"learning_rate": 4.962284193277597e-06,
"loss": 1.4686,
"step": 209000
},
{
"epoch": 5.55,
"learning_rate": 4.962103904308832e-06,
"loss": 1.463,
"step": 209500
},
{
"epoch": 5.56,
"learning_rate": 4.961923188749369e-06,
"loss": 1.4668,
"step": 210000
},
{
"epoch": 5.58,
"learning_rate": 4.961742046630521e-06,
"loss": 1.478,
"step": 210500
},
{
"epoch": 5.59,
"learning_rate": 4.96156047798367e-06,
"loss": 1.4583,
"step": 211000
},
{
"epoch": 5.6,
"learning_rate": 4.961378482840278e-06,
"loss": 1.4534,
"step": 211500
},
{
"epoch": 5.62,
"learning_rate": 4.961196791767787e-06,
"loss": 1.4678,
"step": 212000
},
{
"epoch": 5.63,
"learning_rate": 4.961013945431653e-06,
"loss": 1.4661,
"step": 212500
},
{
"epoch": 5.64,
"learning_rate": 4.960830672693671e-06,
"loss": 1.4691,
"step": 213000
},
{
"epoch": 5.66,
"learning_rate": 4.960646973585595e-06,
"loss": 1.4797,
"step": 213500
},
{
"epoch": 5.67,
"learning_rate": 4.960462848139253e-06,
"loss": 1.4629,
"step": 214000
},
{
"epoch": 5.68,
"learning_rate": 4.960278296386547e-06,
"loss": 1.4625,
"step": 214500
},
{
"epoch": 5.7,
"learning_rate": 4.960093318359453e-06,
"loss": 1.4739,
"step": 215000
},
{
"epoch": 5.71,
"learning_rate": 4.959907914090021e-06,
"loss": 1.4674,
"step": 215500
},
{
"epoch": 5.72,
"learning_rate": 4.959722455696669e-06,
"loss": 1.4582,
"step": 216000
},
{
"epoch": 5.74,
"learning_rate": 4.959536572828244e-06,
"loss": 1.4617,
"step": 216500
},
{
"epoch": 5.75,
"learning_rate": 4.959349891729353e-06,
"loss": 1.4862,
"step": 217000
},
{
"epoch": 5.76,
"learning_rate": 4.959162784516932e-06,
"loss": 1.4584,
"step": 217500
},
{
"epoch": 5.78,
"learning_rate": 4.958975251223399e-06,
"loss": 1.4493,
"step": 218000
},
{
"epoch": 5.79,
"learning_rate": 4.958787291881248e-06,
"loss": 1.4721,
"step": 218500
},
{
"epoch": 5.8,
"learning_rate": 4.958598906523043e-06,
"loss": 1.4536,
"step": 219000
},
{
"epoch": 5.82,
"learning_rate": 4.9584100951814265e-06,
"loss": 1.4776,
"step": 219500
},
{
"epoch": 5.83,
"learning_rate": 4.958221236788773e-06,
"loss": 1.4852,
"step": 220000
},
{
"epoch": 23.37,
"learning_rate": 4.356277313166177e-06,
"loss": 1.4167,
"step": 220500
},
{
"epoch": 23.43,
"learning_rate": 4.353486494789948e-06,
"loss": 1.4065,
"step": 221000
},
{
"epoch": 23.48,
"learning_rate": 4.350690537902183e-06,
"loss": 1.4014,
"step": 221500
},
{
"epoch": 23.53,
"learning_rate": 4.3478894502542505e-06,
"loss": 1.4092,
"step": 222000
},
{
"epoch": 23.58,
"learning_rate": 4.3450832396117424e-06,
"loss": 1.3999,
"step": 222500
},
{
"epoch": 23.64,
"learning_rate": 4.342271913754456e-06,
"loss": 1.4067,
"step": 223000
},
{
"epoch": 23.69,
"learning_rate": 4.339455480476367e-06,
"loss": 1.4049,
"step": 223500
},
{
"epoch": 23.74,
"learning_rate": 4.336639595735609e-06,
"loss": 1.404,
"step": 224000
},
{
"epoch": 15.86,
"learning_rate": 4.69610858653822e-06,
"loss": 1.3812,
"step": 224500
},
{
"epoch": 15.9,
"learning_rate": 4.69478118039917e-06,
"loss": 1.3888,
"step": 225000
},
{
"epoch": 15.94,
"learning_rate": 4.693451069948868e-06,
"loss": 1.3894,
"step": 225500
},
{
"epoch": 15.97,
"learning_rate": 4.692118256826217e-06,
"loss": 1.3942,
"step": 226000
},
{
"epoch": 16.01,
"learning_rate": 4.69078274267345e-06,
"loss": 1.3959,
"step": 226500
},
{
"epoch": 16.04,
"learning_rate": 4.689444529136127e-06,
"loss": 1.3875,
"step": 227000
},
{
"epoch": 16.08,
"learning_rate": 4.688103617863134e-06,
"loss": 1.3807,
"step": 227500
},
{
"epoch": 16.11,
"learning_rate": 4.686760010506685e-06,
"loss": 1.3778,
"step": 228000
},
{
"epoch": 16.15,
"learning_rate": 4.685416404013816e-06,
"loss": 1.389,
"step": 228500
},
{
"epoch": 16.18,
"learning_rate": 4.68406741484425e-06,
"loss": 1.3981,
"step": 229000
},
{
"epoch": 16.22,
"learning_rate": 4.682715734564455e-06,
"loss": 1.3847,
"step": 229500
},
{
"epoch": 16.25,
"learning_rate": 4.681361364839912e-06,
"loss": 1.3804,
"step": 230000
},
{
"epoch": 16.29,
"learning_rate": 4.680009740921249e-06,
"loss": 1.392,
"step": 230500
},
{
"epoch": 16.32,
"learning_rate": 4.678650008057981e-06,
"loss": 1.3871,
"step": 231000
},
{
"epoch": 16.36,
"learning_rate": 4.67728759075957e-06,
"loss": 1.3851,
"step": 231500
},
{
"epoch": 16.39,
"learning_rate": 4.675922490704728e-06,
"loss": 1.3877,
"step": 232000
},
{
"epoch": 8.21,
"learning_rate": 4.9172705288745405e-06,
"loss": 1.369,
"step": 232500
},
{
"epoch": 8.23,
"learning_rate": 4.916916872855345e-06,
"loss": 1.3743,
"step": 233000
},
{
"epoch": 8.25,
"learning_rate": 4.9165617643469424e-06,
"loss": 1.3815,
"step": 233500
},
{
"epoch": 8.27,
"learning_rate": 4.916205911443638e-06,
"loss": 1.4003,
"step": 234000
},
{
"epoch": 19.89,
"learning_rate": 4.528145578498841e-06,
"loss": 1.3638,
"step": 234500
},
{
"epoch": 20.76,
"learning_rate": 4.487385084270672e-06,
"loss": 1.3662,
"step": 235000
},
{
"epoch": 4.99,
"learning_rate": 4.969336651851814e-06,
"loss": 1.3665,
"step": 235500
},
{
"epoch": 5.0,
"learning_rate": 4.969206524022907e-06,
"loss": 1.365,
"step": 236000
},
{
"epoch": 5.01,
"learning_rate": 4.969076122373843e-06,
"loss": 1.3487,
"step": 236500
},
{
"epoch": 5.02,
"learning_rate": 4.968945708543242e-06,
"loss": 1.3585,
"step": 237000
},
{
"epoch": 5.03,
"learning_rate": 4.968814759844848e-06,
"loss": 1.3557,
"step": 237500
},
{
"epoch": 5.05,
"learning_rate": 4.968683537369742e-06,
"loss": 1.346,
"step": 238000
},
{
"epoch": 5.06,
"learning_rate": 4.968552041132476e-06,
"loss": 1.368,
"step": 238500
},
{
"epoch": 5.07,
"learning_rate": 4.968420534960793e-06,
"loss": 1.3677,
"step": 239000
},
{
"epoch": 5.08,
"learning_rate": 4.968288491790435e-06,
"loss": 1.3728,
"step": 239500
},
{
"epoch": 5.09,
"learning_rate": 4.968156174901726e-06,
"loss": 1.3692,
"step": 240000
},
{
"epoch": 5.1,
"learning_rate": 4.968023584309339e-06,
"loss": 1.3595,
"step": 240500
},
{
"epoch": 5.11,
"learning_rate": 4.967890720027977e-06,
"loss": 1.3639,
"step": 241000
},
{
"epoch": 5.12,
"learning_rate": 4.967757582072374e-06,
"loss": 1.38,
"step": 241500
},
{
"epoch": 5.13,
"learning_rate": 4.967624170457293e-06,
"loss": 1.3552,
"step": 242000
},
{
"epoch": 5.14,
"learning_rate": 4.967490752841137e-06,
"loss": 1.35,
"step": 242500
},
{
"epoch": 5.15,
"learning_rate": 4.967356794498762e-06,
"loss": 1.3782,
"step": 243000
},
{
"epoch": 5.16,
"learning_rate": 4.967222562541354e-06,
"loss": 1.3719,
"step": 243500
},
{
"epoch": 5.17,
"learning_rate": 4.9670880569838e-06,
"loss": 1.3601,
"step": 244000
},
{
"epoch": 5.18,
"learning_rate": 4.9669532778410155e-06,
"loss": 1.3735,
"step": 244500
},
{
"epoch": 5.19,
"learning_rate": 4.966818225127948e-06,
"loss": 1.3798,
"step": 245000
},
{
"epoch": 5.2,
"learning_rate": 4.9666828988595705e-06,
"loss": 1.351,
"step": 245500
},
{
"epoch": 5.22,
"learning_rate": 4.966547299050893e-06,
"loss": 1.3579,
"step": 246000
},
{
"epoch": 2.61,
"learning_rate": 4.991588952906284e-06,
"loss": 1.3578,
"step": 246500
},
{
"epoch": 2.62,
"learning_rate": 4.991554801918827e-06,
"loss": 1.362,
"step": 247000
},
{
"epoch": 2.62,
"learning_rate": 4.991520581858223e-06,
"loss": 1.3771,
"step": 247500
},
{
"epoch": 2.63,
"learning_rate": 4.9914862927254196e-06,
"loss": 1.3466,
"step": 248000
},
{
"epoch": 2.63,
"learning_rate": 4.991452003306709e-06,
"loss": 1.3999,
"step": 248500
},
{
"epoch": 2.64,
"learning_rate": 4.991417576170502e-06,
"loss": 1.3488,
"step": 249000
},
{
"epoch": 2.64,
"learning_rate": 4.991383079964951e-06,
"loss": 1.3996,
"step": 249500
},
{
"epoch": 2.65,
"learning_rate": 4.991348514691014e-06,
"loss": 1.3566,
"step": 250000
},
{
"epoch": 2.66,
"learning_rate": 4.99131394968726e-06,
"loss": 1.3895,
"step": 250500
},
{
"epoch": 2.66,
"learning_rate": 4.991279246417557e-06,
"loss": 1.3697,
"step": 251000
},
{
"epoch": 2.67,
"learning_rate": 4.991244474082346e-06,
"loss": 1.3646,
"step": 251500
},
{
"epoch": 2.67,
"learning_rate": 4.991209632682591e-06,
"loss": 1.3696,
"step": 252000
},
{
"epoch": 2.68,
"learning_rate": 4.9911747921091095e-06,
"loss": 1.3863,
"step": 252500
},
{
"epoch": 2.68,
"learning_rate": 4.99113981272129e-06,
"loss": 1.3785,
"step": 253000
},
{
"epoch": 2.69,
"learning_rate": 4.991104764271827e-06,
"loss": 1.353,
"step": 253500
},
{
"epoch": 2.69,
"learning_rate": 4.9910696467616924e-06,
"loss": 1.3879,
"step": 254000
},
{
"epoch": 2.7,
"learning_rate": 4.991034530633923e-06,
"loss": 1.3407,
"step": 254500
},
{
"epoch": 2.7,
"learning_rate": 4.990999275143486e-06,
"loss": 1.3679,
"step": 255000
},
{
"epoch": 2.71,
"learning_rate": 4.990963950595301e-06,
"loss": 1.3667,
"step": 255500
},
{
"epoch": 2.71,
"learning_rate": 4.990928556990351e-06,
"loss": 1.3493,
"step": 256000
},
{
"epoch": 2.72,
"learning_rate": 4.990893165323852e-06,
"loss": 1.3701,
"step": 256500
},
{
"epoch": 2.72,
"learning_rate": 4.99085763374642e-06,
"loss": 1.3872,
"step": 257000
},
{
"epoch": 2.73,
"learning_rate": 4.990822104385346e-06,
"loss": 1.4075,
"step": 257500
},
{
"epoch": 2.73,
"learning_rate": 4.990786434839367e-06,
"loss": 1.3791,
"step": 258000
},
{
"epoch": 2.74,
"learning_rate": 4.9907506962415395e-06,
"loss": 1.3829,
"step": 258500
},
{
"epoch": 2.75,
"learning_rate": 4.990714888592859e-06,
"loss": 1.3642,
"step": 259000
},
{
"epoch": 2.75,
"learning_rate": 4.990679011894315e-06,
"loss": 1.4128,
"step": 259500
},
{
"epoch": 2.76,
"learning_rate": 4.990643066146902e-06,
"loss": 1.3936,
"step": 260000
},
{
"epoch": 2.76,
"learning_rate": 4.990607123450118e-06,
"loss": 1.3845,
"step": 260500
},
{
"epoch": 2.77,
"learning_rate": 4.9905710397460545e-06,
"loss": 1.3552,
"step": 261000
},
{
"epoch": 2.77,
"learning_rate": 4.9905348869961154e-06,
"loss": 1.3767,
"step": 261500
},
{
"epoch": 2.78,
"learning_rate": 4.990498665201305e-06,
"loss": 1.3768,
"step": 262000
},
{
"epoch": 2.78,
"learning_rate": 4.990462374362625e-06,
"loss": 1.4119,
"step": 262500
},
{
"epoch": 2.79,
"learning_rate": 4.990426014481083e-06,
"loss": 1.3987,
"step": 263000
},
{
"epoch": 2.79,
"learning_rate": 4.990389585557687e-06,
"loss": 1.3581,
"step": 263500
},
{
"epoch": 2.8,
"learning_rate": 4.990353087593447e-06,
"loss": 1.3736,
"step": 264000
},
{
"epoch": 2.8,
"learning_rate": 4.990316593792282e-06,
"loss": 1.3948,
"step": 264500
},
{
"epoch": 2.81,
"learning_rate": 4.990279957887468e-06,
"loss": 1.3785,
"step": 265000
},
{
"epoch": 2.81,
"learning_rate": 4.990243252944848e-06,
"loss": 1.3607,
"step": 265500
},
{
"epoch": 2.82,
"learning_rate": 4.990206478965441e-06,
"loss": 1.3876,
"step": 266000
},
{
"epoch": 2.82,
"learning_rate": 4.990169709705192e-06,
"loss": 1.3849,
"step": 266500
},
{
"epoch": 2.83,
"learning_rate": 4.990132797793338e-06,
"loss": 1.3816,
"step": 267000
},
{
"epoch": 2.84,
"learning_rate": 4.990095816847759e-06,
"loss": 1.3752,
"step": 267500
},
{
"epoch": 2.84,
"learning_rate": 4.99005876686948e-06,
"loss": 1.3786,
"step": 268000
},
{
"epoch": 2.85,
"learning_rate": 4.99002172216644e-06,
"loss": 1.3846,
"step": 268500
},
{
"epoch": 2.85,
"learning_rate": 4.989984534263904e-06,
"loss": 1.4057,
"step": 269000
},
{
"epoch": 2.86,
"learning_rate": 4.989947277331753e-06,
"loss": 1.4113,
"step": 269500
},
{
"epoch": 2.86,
"learning_rate": 4.989909951371019e-06,
"loss": 1.3907,
"step": 270000
},
{
"epoch": 2.87,
"learning_rate": 4.989872631241603e-06,
"loss": 1.378,
"step": 270500
},
{
"epoch": 2.87,
"learning_rate": 4.98983531735787e-06,
"loss": 1.3763,
"step": 271000
},
{
"epoch": 2.88,
"learning_rate": 4.989797784731755e-06,
"loss": 1.3959,
"step": 271500
},
{
"epoch": 2.88,
"learning_rate": 4.989760183081203e-06,
"loss": 1.3778,
"step": 272000
},
{
"epoch": 2.89,
"learning_rate": 4.989722512407255e-06,
"loss": 1.3821,
"step": 272500
},
{
"epoch": 2.89,
"learning_rate": 4.989684772710956e-06,
"loss": 1.3778,
"step": 273000
},
{
"epoch": 2.9,
"learning_rate": 4.9896469639933535e-06,
"loss": 1.3934,
"step": 273500
},
{
"epoch": 2.9,
"learning_rate": 4.989609086255493e-06,
"loss": 1.3868,
"step": 274000
},
{
"epoch": 2.91,
"learning_rate": 4.989571139498426e-06,
"loss": 1.3706,
"step": 274500
},
{
"epoch": 2.91,
"learning_rate": 4.989533123723205e-06,
"loss": 1.3697,
"step": 275000
},
{
"epoch": 2.92,
"learning_rate": 4.989495038930883e-06,
"loss": 1.3751,
"step": 275500
},
{
"epoch": 2.93,
"learning_rate": 4.9894569614990094e-06,
"loss": 1.3906,
"step": 276000
},
{
"epoch": 2.93,
"learning_rate": 4.9894187388136844e-06,
"loss": 1.3799,
"step": 276500
},
{
"epoch": 2.94,
"learning_rate": 4.989380447114429e-06,
"loss": 1.3671,
"step": 277000
},
{
"epoch": 8.82,
"learning_rate": 4.90462324019519e-06,
"loss": 1.3322,
"step": 277500
},
{
"epoch": 8.84,
"learning_rate": 4.904281303331608e-06,
"loss": 1.3392,
"step": 278000
},
{
"epoch": 11.81,
"learning_rate": 4.830076428614785e-06,
"loss": 1.3219,
"step": 278500
},
{
"epoch": 11.83,
"learning_rate": 4.829472536182216e-06,
"loss": 1.3346,
"step": 279000
},
{
"epoch": 8.89,
"learning_rate": 4.9032518937846354e-06,
"loss": 1.3237,
"step": 279500
},
{
"epoch": 8.9,
"learning_rate": 4.902907557902285e-06,
"loss": 1.335,
"step": 280000
},
{
"epoch": 8.92,
"learning_rate": 4.902562622479655e-06,
"loss": 1.3322,
"step": 280500
},
{
"epoch": 8.94,
"learning_rate": 4.902217087602807e-06,
"loss": 1.3416,
"step": 281000
},
{
"epoch": 8.95,
"learning_rate": 4.901870953357955e-06,
"loss": 1.3218,
"step": 281500
},
{
"epoch": 8.97,
"learning_rate": 4.9015242198314605e-06,
"loss": 1.3355,
"step": 282000
},
{
"epoch": 8.98,
"learning_rate": 4.901176887109837e-06,
"loss": 1.3258,
"step": 282500
},
{
"epoch": 9.0,
"learning_rate": 4.900828955279745e-06,
"loss": 1.3365,
"step": 283000
},
{
"epoch": 9.02,
"learning_rate": 4.900481122087466e-06,
"loss": 1.3185,
"step": 283500
},
{
"epoch": 9.03,
"learning_rate": 4.900131993498804e-06,
"loss": 1.3258,
"step": 284000
},
{
"epoch": 10.05,
"learning_rate": 4.876468017839525e-06,
"loss": 1.3223,
"step": 284500
},
{
"epoch": 10.07,
"learning_rate": 4.876036882141607e-06,
"loss": 1.316,
"step": 285000
},
{
"epoch": 10.09,
"learning_rate": 4.875605014532027e-06,
"loss": 1.3237,
"step": 285500
},
{
"epoch": 10.11,
"learning_rate": 4.875172415143815e-06,
"loss": 1.331,
"step": 286000
},
{
"epoch": 10.12,
"learning_rate": 4.874739084110231e-06,
"loss": 1.3168,
"step": 286500
},
{
"epoch": 10.14,
"learning_rate": 4.874305890419807e-06,
"loss": 1.3253,
"step": 287000
},
{
"epoch": 10.16,
"learning_rate": 4.873871097958773e-06,
"loss": 1.338,
"step": 287500
},
{
"epoch": 10.18,
"learning_rate": 4.873435574253223e-06,
"loss": 1.3369,
"step": 288000
},
{
"epoch": 10.19,
"learning_rate": 4.872999319437314e-06,
"loss": 1.3304,
"step": 288500
},
{
"epoch": 10.21,
"learning_rate": 4.87256233364543e-06,
"loss": 1.3266,
"step": 289000
},
{
"epoch": 10.23,
"learning_rate": 4.872124617012178e-06,
"loss": 1.3233,
"step": 289500
},
{
"epoch": 10.25,
"learning_rate": 4.87168704729623e-06,
"loss": 1.3372,
"step": 290000
},
{
"epoch": 10.26,
"learning_rate": 4.8712478708459795e-06,
"loss": 1.3455,
"step": 290500
},
{
"epoch": 10.28,
"learning_rate": 4.870807963959269e-06,
"loss": 1.33,
"step": 291000
},
{
"epoch": 10.3,
"learning_rate": 4.870367326771606e-06,
"loss": 1.3395,
"step": 291500
},
{
"epoch": 10.32,
"learning_rate": 4.8699259594187235e-06,
"loss": 1.3406,
"step": 292000
},
{
"epoch": 10.33,
"learning_rate": 4.869483862036581e-06,
"loss": 1.336,
"step": 292500
},
{
"epoch": 10.35,
"learning_rate": 4.86904103476136e-06,
"loss": 1.3385,
"step": 293000
},
{
"epoch": 10.37,
"learning_rate": 4.8685974777294685e-06,
"loss": 1.333,
"step": 293500
},
{
"epoch": 10.39,
"learning_rate": 4.868153191077541e-06,
"loss": 1.3279,
"step": 294000
},
{
"epoch": 10.41,
"learning_rate": 4.867708174942434e-06,
"loss": 1.3391,
"step": 294500
},
{
"epoch": 10.42,
"learning_rate": 4.867263321679989e-06,
"loss": 1.3125,
"step": 295000
},
{
"epoch": 10.44,
"learning_rate": 4.866818635793604e-06,
"loss": 1.321,
"step": 295500
},
{
"epoch": 10.46,
"learning_rate": 4.866371436406369e-06,
"loss": 1.3237,
"step": 296000
},
{
"epoch": 10.48,
"learning_rate": 4.865923508084802e-06,
"loss": 1.3373,
"step": 296500
},
{
"epoch": 10.49,
"learning_rate": 4.865474850966887e-06,
"loss": 1.3608,
"step": 297000
},
{
"epoch": 10.51,
"learning_rate": 4.865025465190824e-06,
"loss": 1.3348,
"step": 297500
},
{
"epoch": 10.53,
"learning_rate": 4.864576251850606e-06,
"loss": 1.3257,
"step": 298000
},
{
"epoch": 10.55,
"learning_rate": 4.8641254106303825e-06,
"loss": 1.3386,
"step": 298500
},
{
"epoch": 10.56,
"learning_rate": 4.863673841167693e-06,
"loss": 1.3338,
"step": 299000
},
{
"epoch": 10.58,
"learning_rate": 4.863221543601638e-06,
"loss": 1.3363,
"step": 299500
},
{
"epoch": 10.6,
"learning_rate": 4.8627685180715436e-06,
"loss": 1.3344,
"step": 300000
},
{
"epoch": 10.62,
"learning_rate": 4.862314764716958e-06,
"loss": 1.3245,
"step": 300500
},
{
"epoch": 10.64,
"learning_rate": 4.861860283677654e-06,
"loss": 1.3313,
"step": 301000
},
{
"epoch": 10.65,
"learning_rate": 4.8614050750936315e-06,
"loss": 1.3281,
"step": 301500
},
{
"epoch": 10.67,
"learning_rate": 4.860949139105111e-06,
"loss": 1.3431,
"step": 302000
},
{
"epoch": 8.55,
"learning_rate": 4.910414959702474e-06,
"loss": 1.3054,
"step": 302500
},
{
"epoch": 8.56,
"learning_rate": 4.910120236938768e-06,
"loss": 1.2943,
"step": 303000
},
{
"epoch": 8.58,
"learning_rate": 4.909825039045666e-06,
"loss": 1.2908,
"step": 303500
},
{
"epoch": 8.59,
"learning_rate": 4.909529957901373e-06,
"loss": 1.2962,
"step": 304000
},
{
"epoch": 8.61,
"learning_rate": 4.909233810874125e-06,
"loss": 1.2905,
"step": 304500
},
{
"epoch": 8.62,
"learning_rate": 4.90893718889223e-06,
"loss": 1.3046,
"step": 305000
},
{
"epoch": 8.64,
"learning_rate": 4.908640092014164e-06,
"loss": 1.3078,
"step": 305500
},
{
"epoch": 8.65,
"learning_rate": 4.908342520298496e-06,
"loss": 1.3022,
"step": 306000
},
{
"epoch": 8.66,
"learning_rate": 4.90804507037067e-06,
"loss": 1.3054,
"step": 306500
},
{
"epoch": 8.68,
"learning_rate": 4.9077465501052635e-06,
"loss": 1.3044,
"step": 307000
},
{
"epoch": 8.69,
"learning_rate": 4.907447555178408e-06,
"loss": 1.3073,
"step": 307500
},
{
"epoch": 8.71,
"learning_rate": 4.9071480856490464e-06,
"loss": 1.3195,
"step": 308000
},
{
"epoch": 8.72,
"learning_rate": 4.906848741937917e-06,
"loss": 1.3165,
"step": 308500
},
{
"epoch": 8.73,
"learning_rate": 4.906548324329658e-06,
"loss": 1.3221,
"step": 309000
},
{
"epoch": 8.75,
"learning_rate": 4.906247432296168e-06,
"loss": 1.3176,
"step": 309500
},
{
"epoch": 8.76,
"learning_rate": 4.905946669102941e-06,
"loss": 1.2961,
"step": 310000
},
{
"epoch": 8.78,
"learning_rate": 4.905644829345587e-06,
"loss": 1.322,
"step": 310500
},
{
"epoch": 8.79,
"learning_rate": 4.905342515341116e-06,
"loss": 1.306,
"step": 311000
},
{
"epoch": 8.8,
"learning_rate": 4.905039727149125e-06,
"loss": 1.3045,
"step": 311500
},
{
"epoch": 8.82,
"learning_rate": 4.904736464829305e-06,
"loss": 1.2781,
"step": 312000
},
{
"epoch": 8.83,
"learning_rate": 4.904432728441443e-06,
"loss": 1.2526,
"step": 312500
},
{
"epoch": 8.85,
"learning_rate": 4.904128518045415e-06,
"loss": 1.296,
"step": 313000
},
{
"epoch": 8.86,
"learning_rate": 4.903823833701194e-06,
"loss": 1.3221,
"step": 313500
},
{
"epoch": 8.88,
"learning_rate": 4.903519286258212e-06,
"loss": 1.3018,
"step": 314000
},
{
"epoch": 8.89,
"learning_rate": 4.90321365514549e-06,
"loss": 1.2563,
"step": 314500
},
{
"epoch": 8.9,
"learning_rate": 4.9029075502649305e-06,
"loss": 1.2638,
"step": 315000
},
{
"epoch": 8.92,
"learning_rate": 4.902600971676879e-06,
"loss": 1.261,
"step": 315500
},
{
"epoch": 8.93,
"learning_rate": 4.902294534018904e-06,
"loss": 1.2715,
"step": 316000
},
{
"epoch": 8.95,
"learning_rate": 4.901987009144389e-06,
"loss": 1.2506,
"step": 316500
},
{
"epoch": 8.96,
"learning_rate": 4.9016790107438574e-06,
"loss": 1.266,
"step": 317000
},
{
"epoch": 8.97,
"learning_rate": 4.9013705388780266e-06,
"loss": 1.2541,
"step": 317500
},
{
"epoch": 8.99,
"learning_rate": 4.901061593607708e-06,
"loss": 1.2595,
"step": 318000
},
{
"epoch": 9.0,
"learning_rate": 4.900752794303393e-06,
"loss": 1.2544,
"step": 318500
},
{
"epoch": 9.02,
"learning_rate": 4.900442903353414e-06,
"loss": 1.2194,
"step": 319000
},
{
"epoch": 9.03,
"learning_rate": 4.900132539181822e-06,
"loss": 1.2294,
"step": 319500
},
{
"epoch": 9.05,
"learning_rate": 4.899821701849801e-06,
"loss": 1.2756,
"step": 320000
},
{
"epoch": 9.06,
"learning_rate": 4.899511014511604e-06,
"loss": 1.2882,
"step": 320500
},
{
"epoch": 9.07,
"learning_rate": 4.899199231988669e-06,
"loss": 1.2716,
"step": 321000
},
{
"epoch": 9.09,
"learning_rate": 4.898886976489296e-06,
"loss": 1.2789,
"step": 321500
},
{
"epoch": 9.1,
"learning_rate": 4.8985742480750435e-06,
"loss": 1.2785,
"step": 322000
},
{
"epoch": 9.12,
"learning_rate": 4.898261673681964e-06,
"loss": 1.282,
"step": 322500
},
{
"epoch": 9.13,
"learning_rate": 4.897948000568522e-06,
"loss": 1.3019,
"step": 323000
},
{
"epoch": 9.14,
"learning_rate": 4.897633854725308e-06,
"loss": 1.2736,
"step": 323500
},
{
"epoch": 9.16,
"learning_rate": 4.8973198659229585e-06,
"loss": 1.2739,
"step": 324000
},
{
"epoch": 9.17,
"learning_rate": 4.897004775751239e-06,
"loss": 1.2929,
"step": 324500
},
{
"epoch": 9.19,
"learning_rate": 4.8966892130356955e-06,
"loss": 1.2988,
"step": 325000
},
{
"epoch": 9.2,
"learning_rate": 4.8963731778385396e-06,
"loss": 1.2867,
"step": 325500
},
{
"epoch": 9.21,
"learning_rate": 4.896056670222072e-06,
"loss": 1.2865,
"step": 326000
},
{
"epoch": 9.23,
"learning_rate": 4.89573969024869e-06,
"loss": 1.2867,
"step": 326500
},
{
"epoch": 9.24,
"learning_rate": 4.895422237980881e-06,
"loss": 1.2765,
"step": 327000
},
{
"epoch": 9.26,
"learning_rate": 4.8951043134812306e-06,
"loss": 1.2778,
"step": 327500
},
{
"epoch": 9.27,
"learning_rate": 4.8947859168124125e-06,
"loss": 1.2927,
"step": 328000
},
{
"epoch": 9.29,
"learning_rate": 4.8944670480371934e-06,
"loss": 1.2865,
"step": 328500
},
{
"epoch": 9.3,
"learning_rate": 4.894147707218439e-06,
"loss": 1.2806,
"step": 329000
},
{
"epoch": 9.31,
"learning_rate": 4.893828534515693e-06,
"loss": 1.2919,
"step": 329500
},
{
"epoch": 9.33,
"learning_rate": 4.89350825074259e-06,
"loss": 1.2914,
"step": 330000
},
{
"epoch": 9.34,
"learning_rate": 4.893188137097092e-06,
"loss": 1.2913,
"step": 330500
},
{
"epoch": 9.36,
"learning_rate": 4.8928669106217e-06,
"loss": 1.2886,
"step": 331000
},
{
"epoch": 9.37,
"learning_rate": 4.89254521241822e-06,
"loss": 1.2815,
"step": 331500
},
{
"epoch": 9.38,
"learning_rate": 4.892223042550072e-06,
"loss": 1.285,
"step": 332000
},
{
"epoch": 9.4,
"learning_rate": 4.8919004010807695e-06,
"loss": 1.2824,
"step": 332500
},
{
"epoch": 9.41,
"learning_rate": 4.891577288073915e-06,
"loss": 1.2805,
"step": 333000
},
{
"epoch": 9.43,
"learning_rate": 4.891253703593208e-06,
"loss": 1.2844,
"step": 333500
},
{
"epoch": 9.44,
"learning_rate": 4.890929647702439e-06,
"loss": 1.2709,
"step": 334000
},
{
"epoch": 9.46,
"learning_rate": 4.890605120465494e-06,
"loss": 1.2866,
"step": 334500
},
{
"epoch": 9.47,
"learning_rate": 4.890280121946349e-06,
"loss": 1.2897,
"step": 335000
},
{
"epoch": 9.48,
"learning_rate": 4.889954652209073e-06,
"loss": 1.2909,
"step": 335500
},
{
"epoch": 9.5,
"learning_rate": 4.889628711317831e-06,
"loss": 1.2988,
"step": 336000
},
{
"epoch": 9.51,
"learning_rate": 4.8893029526309435e-06,
"loss": 1.2848,
"step": 336500
},
{
"epoch": 9.53,
"learning_rate": 4.888976724800782e-06,
"loss": 1.2852,
"step": 337000
},
{
"epoch": 9.54,
"learning_rate": 4.888649372717259e-06,
"loss": 1.2939,
"step": 337500
},
{
"epoch": 9.55,
"learning_rate": 4.888321549737091e-06,
"loss": 1.3017,
"step": 338000
},
{
"epoch": 9.57,
"learning_rate": 4.8879932559249035e-06,
"loss": 1.2691,
"step": 338500
},
{
"epoch": 9.58,
"learning_rate": 4.887664491345417e-06,
"loss": 1.2932,
"step": 339000
},
{
"epoch": 9.6,
"learning_rate": 4.887335256063446e-06,
"loss": 1.2779,
"step": 339500
},
{
"epoch": 9.61,
"learning_rate": 4.887006210025384e-06,
"loss": 1.2972,
"step": 340000
},
{
"epoch": 9.62,
"learning_rate": 4.886676034474327e-06,
"loss": 1.3066,
"step": 340500
},
{
"epoch": 9.64,
"learning_rate": 4.8863453884156476e-06,
"loss": 1.2951,
"step": 341000
},
{
"epoch": 9.65,
"learning_rate": 4.8860149346169895e-06,
"loss": 1.3115,
"step": 341500
},
{
"epoch": 9.67,
"learning_rate": 4.885683348679396e-06,
"loss": 1.2997,
"step": 342000
},
{
"epoch": 9.68,
"learning_rate": 4.885351292429877e-06,
"loss": 1.3012,
"step": 342500
},
{
"epoch": 9.7,
"learning_rate": 4.885018765933894e-06,
"loss": 1.2889,
"step": 343000
},
{
"epoch": 9.71,
"learning_rate": 4.884685769257e-06,
"loss": 1.2942,
"step": 343500
},
{
"epoch": 9.72,
"learning_rate": 4.884352302464844e-06,
"loss": 1.2945,
"step": 344000
},
{
"epoch": 9.74,
"learning_rate": 4.8840183656231644e-06,
"loss": 1.2867,
"step": 344500
},
{
"epoch": 9.75,
"learning_rate": 4.8836839587977915e-06,
"loss": 1.2663,
"step": 345000
},
{
"epoch": 9.77,
"learning_rate": 4.8833497522770725e-06,
"loss": 1.2968,
"step": 345500
},
{
"epoch": 9.78,
"learning_rate": 4.883014406621821e-06,
"loss": 1.3086,
"step": 346000
},
{
"epoch": 9.79,
"learning_rate": 4.882678591180798e-06,
"loss": 1.2935,
"step": 346500
},
{
"epoch": 9.81,
"learning_rate": 4.882342979059261e-06,
"loss": 1.2973,
"step": 347000
},
{
"epoch": 9.82,
"learning_rate": 4.882006225184634e-06,
"loss": 1.3053,
"step": 347500
},
{
"epoch": 9.84,
"learning_rate": 4.8816690017229865e-06,
"loss": 1.3102,
"step": 348000
},
{
"epoch": 24.63,
"learning_rate": 4.288886163167568e-06,
"loss": 1.2483,
"step": 348500
},
{
"epoch": 24.66,
"learning_rate": 4.286946522349646e-06,
"loss": 1.2306,
"step": 349000
},
{
"epoch": 24.7,
"learning_rate": 4.285004679736106e-06,
"loss": 1.2313,
"step": 349500
},
{
"epoch": 24.73,
"learning_rate": 4.283060637719597e-06,
"loss": 1.2318,
"step": 350000
},
{
"epoch": 24.77,
"learning_rate": 4.281114398695484e-06,
"loss": 1.2443,
"step": 350500
},
{
"epoch": 24.8,
"learning_rate": 4.279169864117727e-06,
"loss": 1.2297,
"step": 351000
},
{
"epoch": 9.31,
"learning_rate": 4.89380507022057e-06,
"loss": 1.2113,
"step": 351500
},
{
"epoch": 9.33,
"learning_rate": 4.8935047813286015e-06,
"loss": 1.221,
"step": 352000
},
{
"epoch": 9.34,
"learning_rate": 4.8932040777096225e-06,
"loss": 1.2172,
"step": 352500
},
{
"epoch": 9.35,
"learning_rate": 4.892902959415736e-06,
"loss": 1.2136,
"step": 353000
},
{
"epoch": 9.37,
"learning_rate": 4.8926014264991195e-06,
"loss": 1.2045,
"step": 353500
},
{
"epoch": 9.38,
"learning_rate": 4.892299479012018e-06,
"loss": 1.2177,
"step": 354000
},
{
"epoch": 9.39,
"learning_rate": 4.891997117006753e-06,
"loss": 1.2145,
"step": 354500
},
{
"epoch": 9.41,
"learning_rate": 4.891694946502256e-06,
"loss": 1.2144,
"step": 355000
},
{
"epoch": 9.42,
"learning_rate": 4.891391756446678e-06,
"loss": 1.2086,
"step": 355500
},
{
"epoch": 9.43,
"learning_rate": 4.891088152030219e-06,
"loss": 1.2151,
"step": 356000
},
{
"epoch": 9.45,
"learning_rate": 4.890784741756376e-06,
"loss": 1.2111,
"step": 356500
},
{
"epoch": 9.46,
"learning_rate": 4.890480918882196e-06,
"loss": 1.2204,
"step": 357000
},
{
"epoch": 9.47,
"learning_rate": 4.890176073355719e-06,
"loss": 1.227,
"step": 357500
},
{
"epoch": 9.49,
"learning_rate": 4.889870813679004e-06,
"loss": 1.2339,
"step": 358000
},
{
"epoch": 9.5,
"learning_rate": 4.8895651399049425e-06,
"loss": 1.2306,
"step": 358500
},
{
"epoch": 9.51,
"learning_rate": 4.8892590520865e-06,
"loss": 1.218,
"step": 359000
},
{
"epoch": 9.53,
"learning_rate": 4.888952550276713e-06,
"loss": 1.2228,
"step": 359500
},
{
"epoch": 9.54,
"learning_rate": 4.88864563452869e-06,
"loss": 1.2332,
"step": 360000
},
{
"epoch": 9.55,
"learning_rate": 4.88833830489561e-06,
"loss": 1.2273,
"step": 360500
},
{
"epoch": 9.57,
"learning_rate": 4.888031177330624e-06,
"loss": 1.2061,
"step": 361000
},
{
"epoch": 9.58,
"learning_rate": 4.88772302091476e-06,
"loss": 1.2242,
"step": 361500
},
{
"epoch": 9.59,
"learning_rate": 4.887414450773703e-06,
"loss": 1.218,
"step": 362000
},
{
"epoch": 9.61,
"learning_rate": 4.8871054669609185e-06,
"loss": 1.2264,
"step": 362500
},
{
"epoch": 9.62,
"learning_rate": 4.886796069529946e-06,
"loss": 1.2285,
"step": 363000
},
{
"epoch": 9.63,
"learning_rate": 4.886486258534393e-06,
"loss": 1.2361,
"step": 363500
},
{
"epoch": 9.65,
"learning_rate": 4.886176034027944e-06,
"loss": 1.2376,
"step": 364000
},
{
"epoch": 9.66,
"learning_rate": 4.885866017752871e-06,
"loss": 1.2366,
"step": 364500
},
{
"epoch": 9.67,
"learning_rate": 4.885555589726332e-06,
"loss": 1.2311,
"step": 365000
},
{
"epoch": 9.69,
"learning_rate": 4.885244126663284e-06,
"loss": 1.238,
"step": 365500
},
{
"epoch": 9.7,
"learning_rate": 4.884932250304566e-06,
"loss": 1.2222,
"step": 366000
},
{
"epoch": 9.71,
"learning_rate": 4.8846199607042175e-06,
"loss": 1.2327,
"step": 366500
},
{
"epoch": 9.73,
"learning_rate": 4.884307257916347e-06,
"loss": 1.2383,
"step": 367000
},
{
"epoch": 9.74,
"learning_rate": 4.883994141995138e-06,
"loss": 1.2232,
"step": 367500
},
{
"epoch": 9.75,
"learning_rate": 4.883680612994847e-06,
"loss": 1.201,
"step": 368000
},
{
"epoch": 9.77,
"learning_rate": 4.883366670969796e-06,
"loss": 1.2324,
"step": 368500
},
{
"epoch": 9.78,
"learning_rate": 4.883052315974385e-06,
"loss": 1.2413,
"step": 369000
},
{
"epoch": 9.79,
"learning_rate": 4.882737548063082e-06,
"loss": 1.2376,
"step": 369500
},
{
"epoch": 9.8,
"learning_rate": 4.882422367290427e-06,
"loss": 1.2251,
"step": 370000
},
{
"epoch": 9.82,
"learning_rate": 4.882106773711033e-06,
"loss": 1.2382,
"step": 370500
},
{
"epoch": 9.83,
"learning_rate": 4.881791399804135e-06,
"loss": 1.2421,
"step": 371000
},
{
"epoch": 9.84,
"learning_rate": 4.881474981600724e-06,
"loss": 1.2825,
"step": 371500
},
{
"epoch": 9.86,
"learning_rate": 4.8811581507547275e-06,
"loss": 1.2842,
"step": 372000
},
{
"epoch": 9.87,
"learning_rate": 4.880840907321045e-06,
"loss": 1.2895,
"step": 372500
},
{
"epoch": 9.88,
"learning_rate": 4.880523887078249e-06,
"loss": 1.3026,
"step": 373000
},
{
"epoch": 9.9,
"learning_rate": 4.880205819459074e-06,
"loss": 1.3,
"step": 373500
},
{
"epoch": 9.91,
"learning_rate": 4.879887339417224e-06,
"loss": 1.3,
"step": 374000
},
{
"epoch": 9.92,
"learning_rate": 4.879568447007884e-06,
"loss": 1.304,
"step": 374500
},
{
"epoch": 9.94,
"learning_rate": 4.879249142286308e-06,
"loss": 1.3006,
"step": 375000
},
{
"epoch": 9.95,
"learning_rate": 4.878929425307822e-06,
"loss": 1.2926,
"step": 375500
},
{
"epoch": 9.96,
"learning_rate": 4.878609296127827e-06,
"loss": 1.295,
"step": 376000
},
{
"epoch": 9.98,
"learning_rate": 4.878288754801789e-06,
"loss": 1.2845,
"step": 376500
},
{
"epoch": 9.99,
"learning_rate": 4.87796780138525e-06,
"loss": 1.2812,
"step": 377000
},
{
"epoch": 10.0,
"learning_rate": 4.877647079075899e-06,
"loss": 1.2815,
"step": 377500
},
{
"epoch": 10.02,
"learning_rate": 4.877325302469169e-06,
"loss": 1.2275,
"step": 378000
},
{
"epoch": 10.03,
"learning_rate": 4.877003113938878e-06,
"loss": 1.2292,
"step": 378500
},
{
"epoch": 10.04,
"learning_rate": 4.876680513540851e-06,
"loss": 1.1888,
"step": 379000
},
{
"epoch": 10.06,
"learning_rate": 4.876358147766357e-06,
"loss": 1.1984,
"step": 379500
},
{
"epoch": 10.07,
"learning_rate": 4.876034724624079e-06,
"loss": 1.1754,
"step": 380000
},
{
"epoch": 10.08,
"learning_rate": 4.8757108897818595e-06,
"loss": 1.1909,
"step": 380500
},
{
"epoch": 10.1,
"learning_rate": 4.875386643295812e-06,
"loss": 1.1928,
"step": 381000
},
{
"epoch": 10.11,
"learning_rate": 4.8750619852221155e-06,
"loss": 1.1847,
"step": 381500
},
{
"epoch": 10.12,
"learning_rate": 4.874736915617028e-06,
"loss": 1.1797,
"step": 382000
},
{
"epoch": 10.14,
"learning_rate": 4.874412085909648e-06,
"loss": 1.1904,
"step": 382500
},
{
"epoch": 10.15,
"learning_rate": 4.874086846427514e-06,
"loss": 1.1979,
"step": 383000
},
{
"epoch": 10.16,
"learning_rate": 4.873760544211823e-06,
"loss": 1.2004,
"step": 383500
},
{
"epoch": 10.18,
"learning_rate": 4.873433830690242e-06,
"loss": 1.1977,
"step": 384000
},
{
"epoch": 10.19,
"learning_rate": 4.8731067059193815e-06,
"loss": 1.1968,
"step": 384500
},
{
"epoch": 10.2,
"learning_rate": 4.872779169955924e-06,
"loss": 1.1962,
"step": 385000
},
{
"epoch": 10.22,
"learning_rate": 4.8724518791610955e-06,
"loss": 1.1854,
"step": 385500
},
{
"epoch": 10.23,
"learning_rate": 4.872123521804872e-06,
"loss": 1.1861,
"step": 386000
},
{
"epoch": 10.24,
"learning_rate": 4.87179475342641e-06,
"loss": 1.1974,
"step": 386500
},
{
"epoch": 10.26,
"learning_rate": 4.871465574082675e-06,
"loss": 1.2087,
"step": 387000
},
{
"epoch": 10.16,
"learning_rate": 4.8738809087471e-06,
"loss": 1.1914,
"step": 387500
},
{
"epoch": 10.17,
"learning_rate": 4.873557875957663e-06,
"loss": 1.1976,
"step": 388000
},
{
"epoch": 10.18,
"learning_rate": 4.873234440732965e-06,
"loss": 1.2074,
"step": 388500
},
{
"epoch": 10.2,
"learning_rate": 4.872910603127847e-06,
"loss": 1.2017,
"step": 389000
},
{
"epoch": 10.21,
"learning_rate": 4.872587012078557e-06,
"loss": 1.1831,
"step": 389500
},
{
"epoch": 10.22,
"learning_rate": 4.872263020366089e-06,
"loss": 1.183,
"step": 390000
},
{
"epoch": 10.24,
"learning_rate": 4.871937977558167e-06,
"loss": 1.2121,
"step": 390500
},
{
"epoch": 10.25,
"learning_rate": 4.871612532589636e-06,
"loss": 1.1972,
"step": 391000
},
{
"epoch": 10.26,
"learning_rate": 4.871287337611088e-06,
"loss": 1.1987,
"step": 391500
},
{
"epoch": 10.28,
"learning_rate": 4.870961089290992e-06,
"loss": 1.199,
"step": 392000
},
{
"epoch": 10.29,
"learning_rate": 4.8706344389759185e-06,
"loss": 1.2001,
"step": 392500
},
{
"epoch": 10.3,
"learning_rate": 4.870307386721251e-06,
"loss": 1.1861,
"step": 393000
},
{
"epoch": 10.32,
"learning_rate": 4.8699799325824415e-06,
"loss": 1.1936,
"step": 393500
},
{
"epoch": 10.33,
"learning_rate": 4.86965207661501e-06,
"loss": 1.203,
"step": 394000
},
{
"epoch": 10.34,
"learning_rate": 4.869323818874543e-06,
"loss": 1.1971,
"step": 394500
},
{
"epoch": 10.35,
"learning_rate": 4.868995159416697e-06,
"loss": 1.1898,
"step": 395000
},
{
"epoch": 10.37,
"learning_rate": 4.868666098297196e-06,
"loss": 1.1943,
"step": 395500
},
{
"epoch": 10.38,
"learning_rate": 4.8683366355718324e-06,
"loss": 1.219,
"step": 396000
},
{
"epoch": 10.39,
"learning_rate": 4.8680067712964665e-06,
"loss": 1.2131,
"step": 396500
},
{
"epoch": 10.41,
"learning_rate": 4.8676771664592185e-06,
"loss": 1.2046,
"step": 397000
},
{
"epoch": 10.42,
"learning_rate": 4.86734650005452e-06,
"loss": 1.1869,
"step": 397500
},
{
"epoch": 10.43,
"learning_rate": 4.867015432267696e-06,
"loss": 1.2119,
"step": 398000
},
{
"epoch": 10.45,
"learning_rate": 4.866683963154878e-06,
"loss": 1.1984,
"step": 398500
},
{
"epoch": 10.46,
"learning_rate": 4.866352092772267e-06,
"loss": 1.197,
"step": 399000
},
{
"epoch": 10.47,
"learning_rate": 4.866019821176132e-06,
"loss": 1.2049,
"step": 399500
},
{
"epoch": 10.49,
"learning_rate": 4.865687148422809e-06,
"loss": 1.1992,
"step": 400000
},
{
"epoch": 10.5,
"learning_rate": 4.865354074568701e-06,
"loss": 1.2089,
"step": 400500
},
{
"epoch": 10.51,
"learning_rate": 4.865021267020285e-06,
"loss": 1.2094,
"step": 401000
},
{
"epoch": 10.52,
"learning_rate": 4.864687391936014e-06,
"loss": 1.1975,
"step": 401500
},
{
"epoch": 10.54,
"learning_rate": 4.864353115920468e-06,
"loss": 1.212,
"step": 402000
},
{
"epoch": 10.55,
"learning_rate": 4.864018439030322e-06,
"loss": 1.1958,
"step": 402500
},
{
"epoch": 10.56,
"learning_rate": 4.863684031877717e-06,
"loss": 1.2087,
"step": 403000
},
{
"epoch": 10.58,
"learning_rate": 4.863348554210139e-06,
"loss": 1.2177,
"step": 403500
},
{
"epoch": 10.59,
"learning_rate": 4.863012675838284e-06,
"loss": 1.2081,
"step": 404000
},
{
"epoch": 10.6,
"learning_rate": 4.8626770697769475e-06,
"loss": 1.1909,
"step": 404500
},
{
"epoch": 10.62,
"learning_rate": 4.862340390968576e-06,
"loss": 1.2031,
"step": 405000
},
{
"epoch": 10.63,
"learning_rate": 4.86200331162686e-06,
"loss": 1.214,
"step": 405500
},
{
"epoch": 10.64,
"learning_rate": 4.8616658318089535e-06,
"loss": 1.2061,
"step": 406000
},
{
"epoch": 10.66,
"learning_rate": 4.861327951572075e-06,
"loss": 1.2063,
"step": 406500
},
{
"epoch": 10.67,
"learning_rate": 4.860989670973512e-06,
"loss": 1.1869,
"step": 407000
},
{
"epoch": 10.68,
"learning_rate": 4.86065099007062e-06,
"loss": 1.2214,
"step": 407500
},
{
"epoch": 10.7,
"learning_rate": 4.860311908920821e-06,
"loss": 1.1971,
"step": 408000
},
{
"epoch": 10.71,
"learning_rate": 4.859972427581606e-06,
"loss": 1.2011,
"step": 408500
},
{
"epoch": 10.72,
"learning_rate": 4.859633226272771e-06,
"loss": 1.181,
"step": 409000
},
{
"epoch": 10.73,
"learning_rate": 4.859292945527562e-06,
"loss": 1.2205,
"step": 409500
},
{
"epoch": 10.75,
"learning_rate": 4.8589522647657026e-06,
"loss": 1.213,
"step": 410000
},
{
"epoch": 10.76,
"learning_rate": 4.858611184044954e-06,
"loss": 1.2132,
"step": 410500
},
{
"epoch": 10.77,
"learning_rate": 4.858269703423148e-06,
"loss": 1.2043,
"step": 411000
},
{
"epoch": 10.79,
"learning_rate": 4.857928507118115e-06,
"loss": 1.208,
"step": 411500
},
{
"epoch": 10.8,
"learning_rate": 4.857586227667465e-06,
"loss": 1.221,
"step": 412000
},
{
"epoch": 10.81,
"learning_rate": 4.857244234246783e-06,
"loss": 1.2041,
"step": 412500
},
{
"epoch": 10.83,
"learning_rate": 4.85690115619896e-06,
"loss": 1.2149,
"step": 413000
},
{
"epoch": 10.84,
"learning_rate": 4.856557678540012e-06,
"loss": 1.2073,
"step": 413500
},
{
"epoch": 10.85,
"learning_rate": 4.856213801328176e-06,
"loss": 1.2172,
"step": 414000
},
{
"epoch": 10.87,
"learning_rate": 4.855869524621757e-06,
"loss": 1.202,
"step": 414500
},
{
"epoch": 10.88,
"learning_rate": 4.855524848479127e-06,
"loss": 1.2053,
"step": 415000
},
{
"epoch": 10.89,
"learning_rate": 4.855179772958722e-06,
"loss": 1.2172,
"step": 415500
},
{
"epoch": 10.9,
"learning_rate": 4.854834298119055e-06,
"loss": 1.2146,
"step": 416000
},
{
"epoch": 10.92,
"learning_rate": 4.854488424018698e-06,
"loss": 1.2087,
"step": 416500
},
{
"epoch": 10.93,
"learning_rate": 4.854143536604635e-06,
"loss": 1.203,
"step": 417000
},
{
"epoch": 10.94,
"learning_rate": 4.85379686575535e-06,
"loss": 1.213,
"step": 417500
},
{
"epoch": 10.96,
"learning_rate": 4.853450490359387e-06,
"loss": 1.2252,
"step": 418000
},
{
"epoch": 10.97,
"learning_rate": 4.853103022197354e-06,
"loss": 1.2183,
"step": 418500
},
{
"epoch": 10.98,
"learning_rate": 4.852755155068167e-06,
"loss": 1.2286,
"step": 419000
},
{
"epoch": 11.0,
"learning_rate": 4.8524068890308085e-06,
"loss": 1.2051,
"step": 419500
},
{
"epoch": 11.01,
"learning_rate": 4.852058224144326e-06,
"loss": 1.17,
"step": 420000
},
{
"epoch": 11.02,
"learning_rate": 4.851709160467835e-06,
"loss": 1.1583,
"step": 420500
},
{
"epoch": 11.04,
"learning_rate": 4.851359698060518e-06,
"loss": 1.1584,
"step": 421000
},
{
"epoch": 11.05,
"learning_rate": 4.851009836981629e-06,
"loss": 1.1464,
"step": 421500
},
{
"epoch": 11.06,
"learning_rate": 4.850659577290486e-06,
"loss": 1.1508,
"step": 422000
},
{
"epoch": 11.08,
"learning_rate": 4.8503096207606785e-06,
"loss": 1.1493,
"step": 422500
},
{
"epoch": 11.09,
"learning_rate": 4.849958564820179e-06,
"loss": 1.1648,
"step": 423000
},
{
"epoch": 11.1,
"learning_rate": 4.849607110445667e-06,
"loss": 1.1585,
"step": 423500
},
{
"epoch": 11.11,
"learning_rate": 4.849255257696732e-06,
"loss": 1.1564,
"step": 424000
},
{
"epoch": 11.13,
"learning_rate": 4.8489030066330305e-06,
"loss": 1.174,
"step": 424500
},
{
"epoch": 11.14,
"learning_rate": 4.848550357314286e-06,
"loss": 1.1684,
"step": 425000
},
{
"epoch": 11.15,
"learning_rate": 4.84819730980029e-06,
"loss": 1.172,
"step": 425500
},
{
"epoch": 11.17,
"learning_rate": 4.847843864150902e-06,
"loss": 1.1606,
"step": 426000
},
{
"epoch": 11.18,
"learning_rate": 4.847490728510736e-06,
"loss": 1.1452,
"step": 426500
},
{
"epoch": 11.19,
"learning_rate": 4.847137196445449e-06,
"loss": 1.1545,
"step": 427000
},
{
"epoch": 11.21,
"learning_rate": 4.846782558341414e-06,
"loss": 1.1576,
"step": 427500
},
{
"epoch": 11.22,
"learning_rate": 4.846427522341857e-06,
"loss": 1.1528,
"step": 428000
},
{
"epoch": 11.23,
"learning_rate": 4.8460720885069726e-06,
"loss": 1.1541,
"step": 428500
},
{
"epoch": 11.25,
"learning_rate": 4.845716256897027e-06,
"loss": 1.1655,
"step": 429000
},
{
"epoch": 11.26,
"learning_rate": 4.845360027572349e-06,
"loss": 1.1485,
"step": 429500
},
{
"epoch": 11.27,
"learning_rate": 4.845003400593338e-06,
"loss": 1.174,
"step": 430000
},
{
"epoch": 11.28,
"learning_rate": 4.844646376020461e-06,
"loss": 1.1586,
"step": 430500
},
{
"epoch": 11.3,
"learning_rate": 4.844288953914249e-06,
"loss": 1.1561,
"step": 431000
},
{
"epoch": 11.31,
"learning_rate": 4.8439318503711e-06,
"loss": 1.1568,
"step": 431500
},
{
"epoch": 11.32,
"learning_rate": 4.843574351003822e-06,
"loss": 1.174,
"step": 432000
},
{
"epoch": 11.34,
"learning_rate": 4.843215738250767e-06,
"loss": 1.1486,
"step": 432500
},
{
"epoch": 11.35,
"learning_rate": 4.8428567282069416e-06,
"loss": 1.1571,
"step": 433000
},
{
"epoch": 11.36,
"learning_rate": 4.842497320933216e-06,
"loss": 1.1784,
"step": 433500
},
{
"epoch": 11.38,
"learning_rate": 4.842137516490527e-06,
"loss": 1.1644,
"step": 434000
},
{
"epoch": 11.39,
"learning_rate": 4.841777314939881e-06,
"loss": 1.1631,
"step": 434500
},
{
"epoch": 11.4,
"learning_rate": 4.841416716342348e-06,
"loss": 1.1528,
"step": 435000
},
{
"epoch": 11.42,
"learning_rate": 4.841055720759067e-06,
"loss": 1.1698,
"step": 435500
},
{
"epoch": 11.43,
"learning_rate": 4.840695051432351e-06,
"loss": 1.1567,
"step": 436000
},
{
"epoch": 11.44,
"learning_rate": 4.84033326285493e-06,
"loss": 1.1675,
"step": 436500
},
{
"epoch": 11.46,
"learning_rate": 4.839971077475459e-06,
"loss": 1.1662,
"step": 437000
},
{
"epoch": 11.47,
"learning_rate": 4.839608495355349e-06,
"loss": 1.1727,
"step": 437500
},
{
"epoch": 11.48,
"learning_rate": 4.839246242909518e-06,
"loss": 1.1743,
"step": 438000
},
{
"epoch": 11.49,
"learning_rate": 4.838882868285796e-06,
"loss": 1.1701,
"step": 438500
},
{
"epoch": 11.51,
"learning_rate": 4.83851982504402e-06,
"loss": 1.1748,
"step": 439000
},
{
"epoch": 11.52,
"learning_rate": 4.838155658162633e-06,
"loss": 1.1792,
"step": 439500
},
{
"epoch": 11.53,
"learning_rate": 4.837791094848409e-06,
"loss": 1.1614,
"step": 440000
},
{
"epoch": 11.55,
"learning_rate": 4.83742613516316e-06,
"loss": 1.1754,
"step": 440500
},
{
"epoch": 11.56,
"learning_rate": 4.837060779168764e-06,
"loss": 1.1705,
"step": 441000
},
{
"epoch": 11.57,
"learning_rate": 4.8366950269271675e-06,
"loss": 1.1744,
"step": 441500
},
{
"epoch": 11.59,
"learning_rate": 4.8363288785003836e-06,
"loss": 1.1739,
"step": 442000
},
{
"epoch": 11.6,
"learning_rate": 4.835963067434883e-06,
"loss": 1.1749,
"step": 442500
},
{
"epoch": 11.61,
"learning_rate": 4.835596127616091e-06,
"loss": 1.1778,
"step": 443000
},
{
"epoch": 11.63,
"learning_rate": 4.83522879179843e-06,
"loss": 1.1748,
"step": 443500
},
{
"epoch": 11.64,
"learning_rate": 4.834861060044179e-06,
"loss": 1.1721,
"step": 444000
},
{
"epoch": 11.65,
"learning_rate": 4.834492932415691e-06,
"loss": 1.1626,
"step": 444500
},
{
"epoch": 11.67,
"learning_rate": 4.8341244089753775e-06,
"loss": 1.1839,
"step": 445000
},
{
"epoch": 11.68,
"learning_rate": 4.833755489785724e-06,
"loss": 1.1904,
"step": 445500
},
{
"epoch": 11.69,
"learning_rate": 4.833386174909278e-06,
"loss": 1.1619,
"step": 446000
},
{
"epoch": 11.7,
"learning_rate": 4.833017204224453e-06,
"loss": 1.1737,
"step": 446500
},
{
"epoch": 11.72,
"learning_rate": 4.832647098953405e-06,
"loss": 1.1587,
"step": 447000
},
{
"epoch": 11.73,
"learning_rate": 4.832276598183493e-06,
"loss": 1.1805,
"step": 447500
},
{
"epoch": 11.74,
"learning_rate": 4.831905701977536e-06,
"loss": 1.1755,
"step": 448000
},
{
"epoch": 11.76,
"learning_rate": 4.8315358963522344e-06,
"loss": 1.1717,
"step": 448500
},
{
"epoch": 11.77,
"learning_rate": 4.831164211044022e-06,
"loss": 1.1802,
"step": 449000
},
{
"epoch": 11.78,
"learning_rate": 4.830792130488371e-06,
"loss": 1.1907,
"step": 449500
},
{
"epoch": 11.8,
"learning_rate": 4.830419654748364e-06,
"loss": 1.1751,
"step": 450000
},
{
"epoch": 11.81,
"learning_rate": 4.830046783887155e-06,
"loss": 1.192,
"step": 450500
},
{
"epoch": 11.82,
"learning_rate": 4.829673517967963e-06,
"loss": 1.1919,
"step": 451000
},
{
"epoch": 11.84,
"learning_rate": 4.829299857054076e-06,
"loss": 1.172,
"step": 451500
},
{
"epoch": 11.85,
"learning_rate": 4.828925801208848e-06,
"loss": 1.1816,
"step": 452000
},
{
"epoch": 11.86,
"learning_rate": 4.8285513504957e-06,
"loss": 1.1849,
"step": 452500
},
{
"epoch": 11.87,
"learning_rate": 4.82817650497812e-06,
"loss": 1.172,
"step": 453000
},
{
"epoch": 11.89,
"learning_rate": 4.827801264719662e-06,
"loss": 1.1703,
"step": 453500
},
{
"epoch": 11.9,
"learning_rate": 4.827425629783949e-06,
"loss": 1.2058,
"step": 454000
},
{
"epoch": 11.91,
"learning_rate": 4.827050352687549e-06,
"loss": 1.1785,
"step": 454500
},
{
"epoch": 11.93,
"learning_rate": 4.826673929377493e-06,
"loss": 1.1915,
"step": 455000
},
{
"epoch": 11.94,
"learning_rate": 4.82629711158132e-06,
"loss": 1.1824,
"step": 455500
},
{
"epoch": 11.95,
"learning_rate": 4.82591989936292e-06,
"loss": 1.1688,
"step": 456000
},
{
"epoch": 11.97,
"learning_rate": 4.825542292786247e-06,
"loss": 1.1779,
"step": 456500
},
{
"epoch": 11.98,
"learning_rate": 4.825165048310532e-06,
"loss": 1.1804,
"step": 457000
},
{
"epoch": 11.99,
"learning_rate": 4.82478665399785e-06,
"loss": 1.1804,
"step": 457500
},
{
"epoch": 12.01,
"learning_rate": 4.824407865519037e-06,
"loss": 1.1613,
"step": 458000
},
{
"epoch": 12.02,
"learning_rate": 4.824028682938317e-06,
"loss": 1.1089,
"step": 458500
},
{
"epoch": 12.03,
"learning_rate": 4.82364910631998e-06,
"loss": 1.1197,
"step": 459000
},
{
"epoch": 12.05,
"learning_rate": 4.823269896062709e-06,
"loss": 1.1173,
"step": 459500
},
{
"epoch": 12.06,
"learning_rate": 4.82288953235003e-06,
"loss": 1.123,
"step": 460000
},
{
"epoch": 12.07,
"learning_rate": 4.822509536701004e-06,
"loss": 1.1095,
"step": 460500
},
{
"epoch": 12.08,
"learning_rate": 4.8221283861514285e-06,
"loss": 1.1206,
"step": 461000
},
{
"epoch": 12.1,
"learning_rate": 4.82174684188643e-06,
"loss": 1.1055,
"step": 461500
},
{
"epoch": 12.11,
"learning_rate": 4.821364903970699e-06,
"loss": 1.1172,
"step": 462000
},
{
"epoch": 12.12,
"learning_rate": 4.820982572468994e-06,
"loss": 1.1237,
"step": 462500
},
{
"epoch": 12.14,
"learning_rate": 4.820599847446137e-06,
"loss": 1.122,
"step": 463000
},
{
"epoch": 12.15,
"learning_rate": 4.820216728967021e-06,
"loss": 1.0988,
"step": 463500
},
{
"epoch": 12.16,
"learning_rate": 4.819833217096603e-06,
"loss": 1.1121,
"step": 464000
},
{
"epoch": 12.18,
"learning_rate": 4.819449311899906e-06,
"loss": 1.1237,
"step": 464500
},
{
"epoch": 12.19,
"learning_rate": 4.819065782431369e-06,
"loss": 1.12,
"step": 465000
},
{
"epoch": 12.2,
"learning_rate": 4.818681091563782e-06,
"loss": 1.1237,
"step": 465500
},
{
"epoch": 12.22,
"learning_rate": 4.818296007565259e-06,
"loss": 1.1215,
"step": 466000
},
{
"epoch": 12.23,
"learning_rate": 4.817910530501091e-06,
"loss": 1.1335,
"step": 466500
},
{
"epoch": 12.24,
"learning_rate": 4.817524660436635e-06,
"loss": 1.114,
"step": 467000
},
{
"epoch": 12.25,
"learning_rate": 4.817138397437315e-06,
"loss": 1.1203,
"step": 467500
},
{
"epoch": 12.27,
"learning_rate": 4.816751741568621e-06,
"loss": 1.1168,
"step": 468000
},
{
"epoch": 12.28,
"learning_rate": 4.816364692896113e-06,
"loss": 1.12,
"step": 468500
},
{
"epoch": 12.29,
"learning_rate": 4.815978026760142e-06,
"loss": 1.1325,
"step": 469000
},
{
"epoch": 12.31,
"learning_rate": 4.815590193462219e-06,
"loss": 1.1225,
"step": 469500
},
{
"epoch": 12.32,
"learning_rate": 4.815201967557418e-06,
"loss": 1.1121,
"step": 470000
},
{
"epoch": 12.33,
"learning_rate": 4.814813349111565e-06,
"loss": 1.1315,
"step": 470500
},
{
"epoch": 12.35,
"learning_rate": 4.814425116604039e-06,
"loss": 1.1365,
"step": 471000
},
{
"epoch": 12.36,
"learning_rate": 4.814036493255242e-06,
"loss": 1.1201,
"step": 471500
},
{
"epoch": 12.37,
"learning_rate": 4.8136466991510785e-06,
"loss": 1.1359,
"step": 472000
},
{
"epoch": 12.39,
"learning_rate": 4.813256512769557e-06,
"loss": 1.1303,
"step": 472500
},
{
"epoch": 12.4,
"learning_rate": 4.812865934176834e-06,
"loss": 1.1264,
"step": 473000
},
{
"epoch": 12.41,
"learning_rate": 4.812474963439131e-06,
"loss": 1.1238,
"step": 473500
},
{
"epoch": 12.43,
"learning_rate": 4.8120843837396205e-06,
"loss": 1.1424,
"step": 474000
},
{
"epoch": 12.44,
"learning_rate": 4.8116926296948485e-06,
"loss": 1.136,
"step": 474500
},
{
"epoch": 12.45,
"learning_rate": 4.811300483704031e-06,
"loss": 1.1284,
"step": 475000
},
{
"epoch": 12.46,
"learning_rate": 4.810907945833655e-06,
"loss": 1.1467,
"step": 475500
},
{
"epoch": 12.48,
"learning_rate": 4.810515802400626e-06,
"loss": 1.1243,
"step": 476000
},
{
"epoch": 12.49,
"learning_rate": 4.810122481754289e-06,
"loss": 1.1339,
"step": 476500
},
{
"epoch": 12.5,
"learning_rate": 4.809728769428122e-06,
"loss": 1.1389,
"step": 477000
},
{
"epoch": 12.52,
"learning_rate": 4.809334665488881e-06,
"loss": 1.1359,
"step": 477500
},
{
"epoch": 12.53,
"learning_rate": 4.8089401700033835e-06,
"loss": 1.1389,
"step": 478000
},
{
"epoch": 12.54,
"learning_rate": 4.808545283038518e-06,
"loss": 1.1266,
"step": 478500
},
{
"epoch": 12.56,
"learning_rate": 4.808150004661236e-06,
"loss": 1.1358,
"step": 479000
},
{
"epoch": 12.57,
"learning_rate": 4.807754334938557e-06,
"loss": 1.1402,
"step": 479500
},
{
"epoch": 12.58,
"learning_rate": 4.807358273937567e-06,
"loss": 1.1276,
"step": 480000
},
{
"epoch": 12.6,
"learning_rate": 4.806962615020226e-06,
"loss": 1.1438,
"step": 480500
},
{
"epoch": 12.61,
"learning_rate": 4.806565772446356e-06,
"loss": 1.1376,
"step": 481000
},
{
"epoch": 12.62,
"learning_rate": 4.806168538795695e-06,
"loss": 1.1352,
"step": 481500
},
{
"epoch": 12.63,
"learning_rate": 4.805770914135594e-06,
"loss": 1.1314,
"step": 482000
},
{
"epoch": 12.65,
"learning_rate": 4.805373694954788e-06,
"loss": 1.1325,
"step": 482500
},
{
"epoch": 12.66,
"learning_rate": 4.804975289259805e-06,
"loss": 1.1302,
"step": 483000
},
{
"epoch": 12.67,
"learning_rate": 4.804576492757695e-06,
"loss": 1.1332,
"step": 483500
},
{
"epoch": 12.69,
"learning_rate": 4.804178104280471e-06,
"loss": 1.154,
"step": 484000
},
{
"epoch": 12.7,
"learning_rate": 4.8037785271483004e-06,
"loss": 1.1361,
"step": 484500
},
{
"epoch": 12.71,
"learning_rate": 4.803378559411913e-06,
"loss": 1.1456,
"step": 485000
},
{
"epoch": 12.73,
"learning_rate": 4.802978201139125e-06,
"loss": 1.1398,
"step": 485500
},
{
"epoch": 12.74,
"learning_rate": 4.802577452397815e-06,
"loss": 1.145,
"step": 486000
},
{
"epoch": 12.75,
"learning_rate": 4.802176313255932e-06,
"loss": 1.1377,
"step": 486500
},
{
"epoch": 12.77,
"learning_rate": 4.801775587229943e-06,
"loss": 1.1384,
"step": 487000
},
{
"epoch": 12.78,
"learning_rate": 4.801373668271478e-06,
"loss": 1.1282,
"step": 487500
},
{
"epoch": 12.79,
"learning_rate": 4.800971359116539e-06,
"loss": 1.141,
"step": 488000
},
{
"epoch": 12.81,
"learning_rate": 4.800568659833339e-06,
"loss": 1.1376,
"step": 488500
},
{
"epoch": 12.82,
"learning_rate": 4.800165570490154e-06,
"loss": 1.1313,
"step": 489000
},
{
"epoch": 12.83,
"learning_rate": 4.799762091155328e-06,
"loss": 1.149,
"step": 489500
},
{
"epoch": 12.84,
"learning_rate": 4.79935822189727e-06,
"loss": 1.1365,
"step": 490000
},
{
"epoch": 12.86,
"learning_rate": 4.798953962784457e-06,
"loss": 1.1408,
"step": 490500
},
{
"epoch": 12.87,
"learning_rate": 4.79855012357219e-06,
"loss": 1.1372,
"step": 491000
},
{
"epoch": 12.88,
"learning_rate": 4.7981450857349246e-06,
"loss": 1.1431,
"step": 491500
},
{
"epoch": 12.9,
"learning_rate": 4.79773965824859e-06,
"loss": 1.1479,
"step": 492000
},
{
"epoch": 12.91,
"learning_rate": 4.797333841181927e-06,
"loss": 1.1326,
"step": 492500
},
{
"epoch": 12.92,
"learning_rate": 4.796928447405582e-06,
"loss": 1.1603,
"step": 493000
},
{
"epoch": 12.94,
"learning_rate": 4.796521852163562e-06,
"loss": 1.1421,
"step": 493500
},
{
"epoch": 12.95,
"learning_rate": 4.796115681905471e-06,
"loss": 1.1401,
"step": 494000
},
{
"epoch": 12.96,
"learning_rate": 4.7957083087632925e-06,
"loss": 1.1387,
"step": 494500
},
{
"epoch": 12.98,
"learning_rate": 4.7953005463852e-06,
"loss": 1.1333,
"step": 495000
},
{
"epoch": 12.99,
"learning_rate": 4.7948923948403284e-06,
"loss": 1.1458,
"step": 495500
},
{
"epoch": 13.0,
"learning_rate": 4.7944838541978784e-06,
"loss": 1.1453,
"step": 496000
},
{
"epoch": 13.02,
"learning_rate": 4.794075742774665e-06,
"loss": 1.0791,
"step": 496500
},
{
"epoch": 13.03,
"learning_rate": 4.793666424922779e-06,
"loss": 1.0822,
"step": 497000
},
{
"epoch": 13.04,
"learning_rate": 4.793256718181178e-06,
"loss": 1.0778,
"step": 497500
},
{
"epoch": 13.05,
"learning_rate": 4.792846622619328e-06,
"loss": 1.0792,
"step": 498000
},
{
"epoch": 13.07,
"learning_rate": 4.792436138306759e-06,
"loss": 1.0725,
"step": 498500
},
{
"epoch": 13.08,
"learning_rate": 4.792025265313071e-06,
"loss": 1.085,
"step": 499000
},
{
"epoch": 13.09,
"learning_rate": 4.791614003707925e-06,
"loss": 1.0714,
"step": 499500
},
{
"epoch": 13.11,
"learning_rate": 4.79120235356105e-06,
"loss": 1.0867,
"step": 500000
},
{
"epoch": 13.12,
"learning_rate": 4.790790314942243e-06,
"loss": 1.0799,
"step": 500500
},
{
"epoch": 13.13,
"learning_rate": 4.790377887921363e-06,
"loss": 1.0864,
"step": 501000
},
{
"epoch": 13.15,
"learning_rate": 4.789965072568339e-06,
"loss": 1.0776,
"step": 501500
},
{
"epoch": 13.16,
"learning_rate": 4.78955186895316e-06,
"loss": 1.08,
"step": 502000
},
{
"epoch": 13.17,
"learning_rate": 4.789139104716872e-06,
"loss": 1.0866,
"step": 502500
},
{
"epoch": 13.19,
"learning_rate": 4.788725125563804e-06,
"loss": 1.0822,
"step": 503000
},
{
"epoch": 13.2,
"learning_rate": 4.788310758358814e-06,
"loss": 1.0851,
"step": 503500
},
{
"epoch": 13.21,
"learning_rate": 4.78789600317216e-06,
"loss": 1.0887,
"step": 504000
},
{
"epoch": 13.22,
"learning_rate": 4.787481690747446e-06,
"loss": 1.0913,
"step": 504500
},
{
"epoch": 13.24,
"learning_rate": 4.787066160584104e-06,
"loss": 1.0943,
"step": 505000
},
{
"epoch": 13.25,
"learning_rate": 4.786651074872933e-06,
"loss": 1.0976,
"step": 505500
},
{
"epoch": 13.26,
"learning_rate": 4.786234770014151e-06,
"loss": 1.0931,
"step": 506000
},
{
"epoch": 13.28,
"learning_rate": 4.785818077525687e-06,
"loss": 1.0832,
"step": 506500
},
{
"epoch": 13.29,
"learning_rate": 4.785400997478189e-06,
"loss": 1.102,
"step": 507000
},
{
"epoch": 13.3,
"learning_rate": 4.784983529942376e-06,
"loss": 1.1045,
"step": 507500
},
{
"epoch": 13.32,
"learning_rate": 4.784565674989026e-06,
"loss": 1.096,
"step": 508000
},
{
"epoch": 13.33,
"learning_rate": 4.7841482695601125e-06,
"loss": 1.0983,
"step": 508500
},
{
"epoch": 13.34,
"learning_rate": 4.783729640758779e-06,
"loss": 1.098,
"step": 509000
},
{
"epoch": 13.36,
"learning_rate": 4.7833106247525075e-06,
"loss": 1.0947,
"step": 509500
},
{
"epoch": 13.37,
"learning_rate": 4.782891221612339e-06,
"loss": 1.0907,
"step": 510000
},
{
"epoch": 13.38,
"learning_rate": 4.782471431409386e-06,
"loss": 1.0983,
"step": 510500
},
{
"epoch": 13.4,
"learning_rate": 4.782051254214823e-06,
"loss": 1.0879,
"step": 511000
},
{
"epoch": 13.41,
"learning_rate": 4.7816306900998875e-06,
"loss": 1.09,
"step": 511500
},
{
"epoch": 13.42,
"learning_rate": 4.781210581423846e-06,
"loss": 1.0785,
"step": 512000
},
{
"epoch": 13.43,
"learning_rate": 4.7807892444556414e-06,
"loss": 1.101,
"step": 512500
},
{
"epoch": 13.45,
"learning_rate": 4.780367520781039e-06,
"loss": 1.0893,
"step": 513000
},
{
"epoch": 13.46,
"learning_rate": 4.779945410471543e-06,
"loss": 1.108,
"step": 513500
},
{
"epoch": 13.47,
"learning_rate": 4.779522913598722e-06,
"loss": 1.1003,
"step": 514000
},
{
"epoch": 13.49,
"learning_rate": 4.779100876386609e-06,
"loss": 1.0957,
"step": 514500
},
{
"epoch": 13.5,
"learning_rate": 4.778678454298496e-06,
"loss": 1.0968,
"step": 515000
},
{
"epoch": 13.51,
"learning_rate": 4.778254799711014e-06,
"loss": 1.087,
"step": 515500
},
{
"epoch": 13.53,
"learning_rate": 4.777830758846849e-06,
"loss": 1.0854,
"step": 516000
},
{
"epoch": 13.54,
"learning_rate": 1.182370800470857e-06,
"loss": 1.0918,
"step": 516500
},
{
"epoch": 13.55,
"learning_rate": 1.177999464679584e-06,
"loss": 1.1075,
"step": 517000
},
{
"epoch": 13.57,
"learning_rate": 1.1736337324956105e-06,
"loss": 1.0831,
"step": 517500
},
{
"epoch": 13.58,
"learning_rate": 1.169273622424111e-06,
"loss": 1.0886,
"step": 518000
},
{
"epoch": 13.59,
"learning_rate": 1.1649191529464277e-06,
"loss": 1.0991,
"step": 518500
},
{
"epoch": 13.6,
"learning_rate": 1.1605703425199926e-06,
"loss": 1.0869,
"step": 519000
},
{
"epoch": 13.62,
"learning_rate": 1.1562358901657684e-06,
"loss": 1.086,
"step": 519500
},
{
"epoch": 13.63,
"learning_rate": 1.151898441707961e-06,
"loss": 1.0844,
"step": 520000
},
{
"epoch": 13.64,
"learning_rate": 1.147575365246131e-06,
"loss": 1.0818,
"step": 520500
},
{
"epoch": 13.66,
"learning_rate": 1.1432493521510088e-06,
"loss": 1.0782,
"step": 521000
},
{
"epoch": 13.67,
"learning_rate": 1.1389290899596366e-06,
"loss": 1.0899,
"step": 521500
},
{
"epoch": 13.68,
"learning_rate": 1.1346145969844517e-06,
"loss": 1.0921,
"step": 522000
},
{
"epoch": 13.7,
"learning_rate": 1.1303058915134376e-06,
"loss": 1.0767,
"step": 522500
},
{
"epoch": 13.71,
"learning_rate": 1.1260029918100456e-06,
"loss": 1.0871,
"step": 523000
},
{
"epoch": 13.72,
"learning_rate": 1.1217059161131205e-06,
"loss": 1.0825,
"step": 523500
},
{
"epoch": 13.74,
"learning_rate": 1.1174146826368182e-06,
"loss": 1.0772,
"step": 524000
},
{
"epoch": 13.75,
"learning_rate": 1.1131293095705312e-06,
"loss": 1.0944,
"step": 524500
},
{
"epoch": 13.76,
"learning_rate": 1.108849815078811e-06,
"loss": 1.1008,
"step": 525000
},
{
"epoch": 13.78,
"learning_rate": 1.104576217301294e-06,
"loss": 1.0844,
"step": 525500
},
{
"epoch": 13.79,
"learning_rate": 1.10030853435262e-06,
"loss": 1.0865,
"step": 526000
},
{
"epoch": 13.8,
"learning_rate": 1.096046784322356e-06,
"loss": 1.0873,
"step": 526500
},
{
"epoch": 13.81,
"learning_rate": 1.0917909852749228e-06,
"loss": 1.0842,
"step": 527000
},
{
"epoch": 13.83,
"learning_rate": 1.0875411552495178e-06,
"loss": 1.0884,
"step": 527500
},
{
"epoch": 13.84,
"learning_rate": 1.0833057939589902e-06,
"loss": 1.0932,
"step": 528000
},
{
"epoch": 13.85,
"learning_rate": 1.0790679439659621e-06,
"loss": 1.0737,
"step": 528500
},
{
"epoch": 13.87,
"learning_rate": 1.074836116924536e-06,
"loss": 1.0812,
"step": 529000
},
{
"epoch": 13.88,
"learning_rate": 1.0706103307722973e-06,
"loss": 1.1034,
"step": 529500
},
{
"epoch": 13.89,
"learning_rate": 1.0663990368173505e-06,
"loss": 1.0821,
"step": 530000
},
{
"epoch": 13.91,
"learning_rate": 1.062185373982534e-06,
"loss": 1.072,
"step": 530500
},
{
"epoch": 13.92,
"learning_rate": 1.0579778056600256e-06,
"loss": 1.0839,
"step": 531000
},
{
"epoch": 13.93,
"learning_rate": 1.053776349684586e-06,
"loss": 1.0819,
"step": 531500
},
{
"epoch": 13.95,
"learning_rate": 1.049581023865064e-06,
"loss": 1.0867,
"step": 532000
},
{
"epoch": 13.96,
"learning_rate": 1.0454002181926377e-06,
"loss": 1.0671,
"step": 532500
},
{
"epoch": 13.97,
"learning_rate": 1.0412255535423441e-06,
"loss": 1.0848,
"step": 533000
},
{
"epoch": 13.98,
"learning_rate": 1.0370487000144682e-06,
"loss": 1.078,
"step": 533500
},
{
"epoch": 14.0,
"learning_rate": 1.0328780475465507e-06,
"loss": 1.083,
"step": 534000
},
{
"epoch": 14.01,
"learning_rate": 1.0287136138168734e-06,
"loss": 1.0407,
"step": 534500
},
{
"epoch": 14.02,
"learning_rate": 1.0245554164773602e-06,
"loss": 1.0175,
"step": 535000
},
{
"epoch": 14.04,
"learning_rate": 1.0204034731534989e-06,
"loss": 1.0427,
"step": 535500
},
{
"epoch": 14.05,
"learning_rate": 1.0162578014442684e-06,
"loss": 1.037,
"step": 536000
},
{
"epoch": 14.06,
"learning_rate": 1.0121184189220635e-06,
"loss": 1.0289,
"step": 536500
},
{
"epoch": 14.08,
"learning_rate": 1.0079853431326231e-06,
"loss": 1.0478,
"step": 537000
},
{
"epoch": 14.09,
"learning_rate": 1.0038668387747919e-06,
"loss": 1.0367,
"step": 537500
},
{
"epoch": 14.1,
"learning_rate": 9.99746416280159e-07,
"loss": 1.0446,
"step": 538000
},
{
"epoch": 14.12,
"learning_rate": 9.956323529599061e-07,
"loss": 1.0428,
"step": 538500
},
{
"epoch": 14.13,
"learning_rate": 9.91532875250426e-07,
"loss": 1.0412,
"step": 539000
},
{
"epoch": 14.14,
"learning_rate": 9.874315697617445e-07,
"loss": 1.0476,
"step": 539500
},
{
"epoch": 14.16,
"learning_rate": 9.833366756467894e-07,
"loss": 1.033,
"step": 540000
},
{
"epoch": 14.17,
"learning_rate": 9.792482102627217e-07,
"loss": 1.0451,
"step": 540500
},
{
"epoch": 14.18,
"learning_rate": 9.751661909394571e-07,
"loss": 1.0456,
"step": 541000
},
{
"epoch": 14.19,
"learning_rate": 9.710906349795847e-07,
"loss": 1.0404,
"step": 541500
},
{
"epoch": 14.21,
"learning_rate": 9.670215596582979e-07,
"loss": 1.0376,
"step": 542000
},
{
"epoch": 14.22,
"learning_rate": 9.629589822233198e-07,
"loss": 1.0324,
"step": 542500
},
{
"epoch": 14.23,
"learning_rate": 9.589110255059818e-07,
"loss": 1.0485,
"step": 543000
},
{
"epoch": 14.25,
"learning_rate": 9.54869574950421e-07,
"loss": 1.0371,
"step": 543500
},
{
"epoch": 14.26,
"learning_rate": 9.50826568152911e-07,
"loss": 1.0325,
"step": 544000
},
{
"epoch": 14.27,
"learning_rate": 9.467901278879547e-07,
"loss": 1.026,
"step": 544500
},
{
"epoch": 14.29,
"learning_rate": 9.427602712649459e-07,
"loss": 1.0272,
"step": 545000
},
{
"epoch": 14.3,
"learning_rate": 9.387370153653708e-07,
"loss": 1.0499,
"step": 545500
},
{
"epoch": 14.31,
"learning_rate": 9.347203772427363e-07,
"loss": 1.0289,
"step": 546000
},
{
"epoch": 14.33,
"learning_rate": 9.307103739224985e-07,
"loss": 1.0375,
"step": 546500
},
{
"epoch": 14.34,
"learning_rate": 9.267070224019925e-07,
"loss": 1.0378,
"step": 547000
},
{
"epoch": 14.35,
"learning_rate": 9.227103396503556e-07,
"loss": 1.0401,
"step": 547500
},
{
"epoch": 14.37,
"learning_rate": 9.187203426084587e-07,
"loss": 1.029,
"step": 548000
},
{
"epoch": 14.38,
"learning_rate": 9.147450080772296e-07,
"loss": 1.0448,
"step": 548500
},
{
"epoch": 14.39,
"learning_rate": 9.10768419708154e-07,
"loss": 1.0397,
"step": 549000
},
{
"epoch": 14.4,
"learning_rate": 9.068065006374663e-07,
"loss": 1.049,
"step": 549500
},
{
"epoch": 14.42,
"learning_rate": 9.028433882291488e-07,
"loss": 1.0532,
"step": 550000
},
{
"epoch": 14.43,
"learning_rate": 8.988870457412748e-07,
"loss": 1.0347,
"step": 550500
},
{
"epoch": 14.44,
"learning_rate": 8.949374899437233e-07,
"loss": 1.0299,
"step": 551000
},
{
"epoch": 14.46,
"learning_rate": 8.909947375776068e-07,
"loss": 1.0431,
"step": 551500
},
{
"epoch": 14.47,
"learning_rate": 8.870666704020464e-07,
"loss": 1.0337,
"step": 552000
},
{
"epoch": 14.48,
"learning_rate": 8.831375613164294e-07,
"loss": 1.0288,
"step": 552500
},
{
"epoch": 14.5,
"learning_rate": 8.792153056789934e-07,
"loss": 1.0518,
"step": 553000
},
{
"epoch": 14.51,
"learning_rate": 8.752999201151344e-07,
"loss": 1.0395,
"step": 553500
},
{
"epoch": 14.52,
"learning_rate": 8.713914212211255e-07,
"loss": 1.0389,
"step": 554000
},
{
"epoch": 14.54,
"learning_rate": 8.674898255640501e-07,
"loss": 1.0383,
"step": 554500
},
{
"epoch": 14.55,
"learning_rate": 8.636029321165817e-07,
"loss": 1.0271,
"step": 555000
},
{
"epoch": 14.56,
"learning_rate": 8.597151786284866e-07,
"loss": 1.0468,
"step": 555500
},
{
"epoch": 14.57,
"learning_rate": 8.558343778697989e-07,
"loss": 1.0361,
"step": 556000
},
{
"epoch": 14.59,
"learning_rate": 8.519605462901987e-07,
"loss": 1.0345,
"step": 556500
},
{
"epoch": 14.6,
"learning_rate": 8.480937003098225e-07,
"loss": 1.0361,
"step": 557000
},
{
"epoch": 14.61,
"learning_rate": 8.442338563191984e-07,
"loss": 1.0274,
"step": 557500
},
{
"epoch": 14.63,
"learning_rate": 8.403810306791737e-07,
"loss": 1.0317,
"step": 558000
},
{
"epoch": 14.64,
"learning_rate": 8.36535239720849e-07,
"loss": 1.0504,
"step": 558500
},
{
"epoch": 14.65,
"learning_rate": 8.326964997455047e-07,
"loss": 1.0202,
"step": 559000
},
{
"epoch": 14.67,
"learning_rate": 8.28872483306056e-07,
"loss": 1.0332,
"step": 559500
},
{
"epoch": 14.68,
"learning_rate": 8.250478798977172e-07,
"loss": 1.0449,
"step": 560000
},
{
"epoch": 14.69,
"learning_rate": 8.212303761642099e-07,
"loss": 1.0466,
"step": 560500
},
{
"epoch": 14.71,
"learning_rate": 8.174199882869125e-07,
"loss": 1.0305,
"step": 561000
},
{
"epoch": 14.72,
"learning_rate": 8.136167324170433e-07,
"loss": 1.0399,
"step": 561500
},
{
"epoch": 14.73,
"learning_rate": 8.098282097465379e-07,
"loss": 1.0443,
"step": 562000
},
{
"epoch": 14.75,
"learning_rate": 8.060392518797067e-07,
"loss": 1.0334,
"step": 562500
},
{
"epoch": 14.76,
"learning_rate": 8.02257474260206e-07,
"loss": 1.0493,
"step": 563000
},
{
"epoch": 14.77,
"learning_rate": 7.98490434888142e-07,
"loss": 1.0498,
"step": 563500
},
{
"epoch": 14.78,
"learning_rate": 7.947230513821355e-07,
"loss": 1.0275,
"step": 564000
},
{
"epoch": 14.8,
"learning_rate": 7.909628960898113e-07,
"loss": 1.0517,
"step": 564500
},
{
"epoch": 14.81,
"learning_rate": 7.872099849494644e-07,
"loss": 1.0374,
"step": 565000
},
{
"epoch": 14.82,
"learning_rate": 7.83464333868684e-07,
"loss": 1.0497,
"step": 565500
},
{
"epoch": 14.84,
"learning_rate": 7.797259587242842e-07,
"loss": 1.0433,
"step": 566000
},
{
"epoch": 14.85,
"learning_rate": 7.75994875362239e-07,
"loss": 1.0476,
"step": 566500
},
{
"epoch": 14.86,
"learning_rate": 7.72271099597614e-07,
"loss": 1.0389,
"step": 567000
},
{
"epoch": 14.88,
"learning_rate": 7.685546472145017e-07,
"loss": 1.0438,
"step": 567500
},
{
"epoch": 14.89,
"learning_rate": 7.648603557785422e-07,
"loss": 1.0359,
"step": 568000
},
{
"epoch": 14.9,
"learning_rate": 7.6115856793579e-07,
"loss": 1.0527,
"step": 568500
},
{
"epoch": 14.92,
"learning_rate": 7.574715320461648e-07,
"loss": 1.034,
"step": 569000
},
{
"epoch": 14.93,
"learning_rate": 7.537844860442989e-07,
"loss": 1.0461,
"step": 569500
},
{
"epoch": 14.94,
"learning_rate": 7.50104841783765e-07,
"loss": 1.0558,
"step": 570000
},
{
"epoch": 14.95,
"learning_rate": 7.464326148615916e-07,
"loss": 1.0249,
"step": 570500
},
{
"epoch": 14.97,
"learning_rate": 7.427678208433686e-07,
"loss": 1.0517,
"step": 571000
},
{
"epoch": 14.98,
"learning_rate": 7.391177825104881e-07,
"loss": 1.0459,
"step": 571500
},
{
"epoch": 14.99,
"learning_rate": 7.354678859275119e-07,
"loss": 1.0349,
"step": 572000
},
{
"epoch": 15.01,
"learning_rate": 7.318254687250431e-07,
"loss": 1.0185,
"step": 572500
},
{
"epoch": 15.02,
"learning_rate": 7.281905463423178e-07,
"loss": 1.0175,
"step": 573000
},
{
"epoch": 15.03,
"learning_rate": 7.245631341868003e-07,
"loss": 1.0251,
"step": 573500
},
{
"epoch": 15.05,
"learning_rate": 7.209432476341224e-07,
"loss": 1.0218,
"step": 574000
},
{
"epoch": 15.06,
"learning_rate": 7.173309020280161e-07,
"loss": 1.0179,
"step": 574500
},
{
"epoch": 15.07,
"learning_rate": 7.137261126802514e-07,
"loss": 1.0104,
"step": 575000
},
{
"epoch": 15.09,
"learning_rate": 7.10128894870567e-07,
"loss": 1.0149,
"step": 575500
},
{
"epoch": 15.1,
"learning_rate": 7.065392638466087e-07,
"loss": 1.0097,
"step": 576000
},
{
"epoch": 15.11,
"learning_rate": 7.029572348238634e-07,
"loss": 1.0106,
"step": 576500
},
{
"epoch": 15.13,
"learning_rate": 6.99382822985597e-07,
"loss": 1.0243,
"step": 577000
},
{
"epoch": 15.14,
"learning_rate": 6.958231694146666e-07,
"loss": 1.0302,
"step": 577500
},
{
"epoch": 15.15,
"learning_rate": 6.922640220559629e-07,
"loss": 1.0229,
"step": 578000
},
{
"epoch": 15.16,
"learning_rate": 6.887125372074113e-07,
"loss": 1.0252,
"step": 578500
},
{
"epoch": 15.18,
"learning_rate": 6.851687299228074e-07,
"loss": 1.0202,
"step": 579000
},
{
"epoch": 15.19,
"learning_rate": 6.816326152234052e-07,
"loss": 1.034,
"step": 579500
},
{
"epoch": 15.2,
"learning_rate": 6.78111257209996e-07,
"loss": 1.0314,
"step": 580000
},
{
"epoch": 15.22,
"learning_rate": 6.745905571543002e-07,
"loss": 1.0312,
"step": 580500
},
{
"epoch": 15.23,
"learning_rate": 6.710775945218554e-07,
"loss": 1.019,
"step": 581000
},
{
"epoch": 15.24,
"learning_rate": 6.675723842031722e-07,
"loss": 1.0374,
"step": 581500
},
{
"epoch": 15.26,
"learning_rate": 6.640749410559011e-07,
"loss": 1.0248,
"step": 582000
},
{
"epoch": 15.27,
"learning_rate": 6.605922514508042e-07,
"loss": 1.0305,
"step": 582500
},
{
"epoch": 15.28,
"learning_rate": 6.571103714792401e-07,
"loss": 1.0334,
"step": 583000
},
{
"epoch": 15.3,
"learning_rate": 6.536363030247672e-07,
"loss": 1.0313,
"step": 583500
},
{
"epoch": 15.31,
"learning_rate": 6.501700608130337e-07,
"loss": 1.0269,
"step": 584000
},
{
"epoch": 15.32,
"learning_rate": 6.467116595365149e-07,
"loss": 1.0397,
"step": 584500
},
{
"epoch": 15.33,
"learning_rate": 6.432680070962052e-07,
"loss": 1.0271,
"step": 585000
},
{
"epoch": 15.35,
"learning_rate": 6.398253158795223e-07,
"loss": 1.0367,
"step": 585500
},
{
"epoch": 15.36,
"learning_rate": 6.363905094466666e-07,
"loss": 1.0408,
"step": 586000
},
{
"epoch": 15.37,
"learning_rate": 6.32963602356865e-07,
"loss": 1.0184,
"step": 586500
},
{
"epoch": 15.39,
"learning_rate": 6.295446091358618e-07,
"loss": 1.0284,
"step": 587000
},
{
"epoch": 15.4,
"learning_rate": 6.261335442758554e-07,
"loss": 1.0337,
"step": 587500
},
{
"epoch": 15.41,
"learning_rate": 6.227372205429946e-07,
"loss": 1.0273,
"step": 588000
},
{
"epoch": 15.43,
"learning_rate": 6.193420398182335e-07,
"loss": 1.0237,
"step": 588500
},
{
"epoch": 15.44,
"learning_rate": 6.159548307004379e-07,
"loss": 1.0253,
"step": 589000
},
{
"epoch": 15.45,
"learning_rate": 6.125756075470824e-07,
"loss": 1.0333,
"step": 589500
},
{
"epoch": 15.47,
"learning_rate": 6.092043846817916e-07,
"loss": 1.033,
"step": 590000
},
{
"epoch": 15.48,
"learning_rate": 6.058411763942787e-07,
"loss": 1.0371,
"step": 590500
},
{
"epoch": 15.49,
"learning_rate": 6.024926992769612e-07,
"loss": 1.0252,
"step": 591000
},
{
"epoch": 15.51,
"learning_rate": 5.991455467779178e-07,
"loss": 1.0212,
"step": 591500
},
{
"epoch": 15.52,
"learning_rate": 5.958064514933821e-07,
"loss": 1.04,
"step": 592000
},
{
"epoch": 15.53,
"learning_rate": 5.924754275768859e-07,
"loss": 1.023,
"step": 592500
},
{
"epoch": 15.54,
"learning_rate": 5.891591269459249e-07,
"loss": 1.0252,
"step": 593000
},
{
"epoch": 15.56,
"learning_rate": 5.85844271876021e-07,
"loss": 1.0256,
"step": 593500
},
{
"epoch": 15.57,
"learning_rate": 5.825375304011768e-07,
"loss": 1.0333,
"step": 594000
},
{
"epoch": 15.58,
"learning_rate": 5.792455056448578e-07,
"loss": 1.039,
"step": 594500
},
{
"epoch": 15.6,
"learning_rate": 5.759550170777461e-07,
"loss": 1.0357,
"step": 595000
},
{
"epoch": 15.61,
"learning_rate": 5.726726840236046e-07,
"loss": 1.0314,
"step": 595500
},
{
"epoch": 15.62,
"learning_rate": 5.693985203953675e-07,
"loss": 1.0136,
"step": 596000
},
{
"epoch": 15.64,
"learning_rate": 5.661325400713397e-07,
"loss": 1.012,
"step": 596500
},
{
"epoch": 15.65,
"learning_rate": 5.628747568951393e-07,
"loss": 1.0314,
"step": 597000
}
],
"max_steps": 762960,
"num_train_epochs": 20,
"total_flos": 7.136369757666017e+17,
"trial_name": null,
"trial_params": null
}