covid-vaccine-twitter-bert / trainer_state.json
YidaM4396's picture
Upload 11 files
ff806f8
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9646153910676615,
"global_step": 690000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.998839665544078e-05,
"loss": 1.4934,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 4.9976746711104215e-05,
"loss": 1.3318,
"step": 1000
},
{
"epoch": 0.0,
"learning_rate": 4.996509676676765e-05,
"loss": 1.2797,
"step": 1500
},
{
"epoch": 0.0,
"learning_rate": 4.995344682243108e-05,
"loss": 1.244,
"step": 2000
},
{
"epoch": 0.0,
"learning_rate": 4.994186677776054e-05,
"loss": 1.2432,
"step": 2500
},
{
"epoch": 0.0,
"learning_rate": 4.993026343320132e-05,
"loss": 1.2192,
"step": 3000
},
{
"epoch": 0.0,
"learning_rate": 4.991863678875343e-05,
"loss": 1.4231,
"step": 3500
},
{
"epoch": 0.01,
"learning_rate": 4.990701014430553e-05,
"loss": 1.6854,
"step": 4000
},
{
"epoch": 0.01,
"learning_rate": 4.989536019996897e-05,
"loss": 1.3492,
"step": 4500
},
{
"epoch": 0.01,
"learning_rate": 4.98837102556324e-05,
"loss": 1.3758,
"step": 5000
},
{
"epoch": 0.01,
"learning_rate": 4.987206031129584e-05,
"loss": 1.2596,
"step": 5500
},
{
"epoch": 0.01,
"learning_rate": 4.986041036695927e-05,
"loss": 1.5168,
"step": 6000
},
{
"epoch": 0.01,
"learning_rate": 4.9848760422622705e-05,
"loss": 1.465,
"step": 6500
},
{
"epoch": 0.01,
"learning_rate": 4.983713377817481e-05,
"loss": 1.408,
"step": 7000
},
{
"epoch": 0.01,
"learning_rate": 4.982550713372692e-05,
"loss": 1.6209,
"step": 7500
},
{
"epoch": 0.01,
"learning_rate": 4.981385718939035e-05,
"loss": 1.681,
"step": 8000
},
{
"epoch": 0.01,
"learning_rate": 4.980220724505378e-05,
"loss": 1.6519,
"step": 8500
},
{
"epoch": 0.01,
"learning_rate": 4.979055730071722e-05,
"loss": 1.8553,
"step": 9000
},
{
"epoch": 0.01,
"learning_rate": 4.9778907356380654e-05,
"loss": 1.7466,
"step": 9500
},
{
"epoch": 0.01,
"learning_rate": 4.976728071193276e-05,
"loss": 1.7394,
"step": 10000
},
{
"epoch": 0.01,
"learning_rate": 4.9755630767596196e-05,
"loss": 1.648,
"step": 10500
},
{
"epoch": 0.02,
"learning_rate": 4.974398082325963e-05,
"loss": 1.788,
"step": 11000
},
{
"epoch": 0.02,
"learning_rate": 4.973233087892306e-05,
"loss": 2.0314,
"step": 11500
},
{
"epoch": 0.02,
"learning_rate": 4.972068093458649e-05,
"loss": 1.7554,
"step": 12000
},
{
"epoch": 0.02,
"learning_rate": 4.97090542901386e-05,
"loss": 1.66,
"step": 12500
},
{
"epoch": 0.02,
"learning_rate": 4.969740434580204e-05,
"loss": 1.5643,
"step": 13000
},
{
"epoch": 0.02,
"learning_rate": 4.9685777701354144e-05,
"loss": 1.4357,
"step": 13500
},
{
"epoch": 0.02,
"learning_rate": 4.967412775701758e-05,
"loss": 1.4221,
"step": 14000
},
{
"epoch": 0.02,
"learning_rate": 4.9662477812681016e-05,
"loss": 1.3905,
"step": 14500
},
{
"epoch": 0.02,
"learning_rate": 4.965082786834445e-05,
"loss": 1.5023,
"step": 15000
},
{
"epoch": 0.02,
"learning_rate": 4.963917792400788e-05,
"loss": 1.298,
"step": 15500
},
{
"epoch": 0.02,
"learning_rate": 4.962752797967132e-05,
"loss": 1.1518,
"step": 16000
},
{
"epoch": 0.02,
"learning_rate": 4.961587803533475e-05,
"loss": 1.1188,
"step": 16500
},
{
"epoch": 0.02,
"learning_rate": 4.9604228090998185e-05,
"loss": 1.1016,
"step": 17000
},
{
"epoch": 0.02,
"learning_rate": 4.959257814666162e-05,
"loss": 1.0946,
"step": 17500
},
{
"epoch": 0.03,
"learning_rate": 4.958092820232505e-05,
"loss": 1.0894,
"step": 18000
},
{
"epoch": 0.03,
"learning_rate": 4.956927825798849e-05,
"loss": 1.07,
"step": 18500
},
{
"epoch": 0.03,
"learning_rate": 4.955762831365192e-05,
"loss": 1.0765,
"step": 19000
},
{
"epoch": 0.03,
"learning_rate": 4.954597836931535e-05,
"loss": 1.1306,
"step": 19500
},
{
"epoch": 0.03,
"learning_rate": 4.9534328424978786e-05,
"loss": 1.0957,
"step": 20000
},
{
"epoch": 0.03,
"learning_rate": 4.952267848064222e-05,
"loss": 1.0914,
"step": 20500
},
{
"epoch": 0.03,
"learning_rate": 4.951102853630565e-05,
"loss": 1.0757,
"step": 21000
},
{
"epoch": 0.03,
"learning_rate": 4.949937859196909e-05,
"loss": 1.0464,
"step": 21500
},
{
"epoch": 0.03,
"learning_rate": 4.948772864763253e-05,
"loss": 1.0437,
"step": 22000
},
{
"epoch": 0.03,
"learning_rate": 4.947610200318463e-05,
"loss": 1.0503,
"step": 22500
},
{
"epoch": 0.03,
"learning_rate": 4.9464452058848064e-05,
"loss": 1.0416,
"step": 23000
},
{
"epoch": 0.03,
"learning_rate": 4.9452802114511496e-05,
"loss": 1.0405,
"step": 23500
},
{
"epoch": 0.03,
"learning_rate": 4.944115217017493e-05,
"loss": 1.0279,
"step": 24000
},
{
"epoch": 0.03,
"learning_rate": 4.942950222583837e-05,
"loss": 1.0252,
"step": 24500
},
{
"epoch": 0.03,
"learning_rate": 4.94178522815018e-05,
"loss": 1.0149,
"step": 25000
},
{
"epoch": 0.04,
"learning_rate": 4.940620233716524e-05,
"loss": 1.0163,
"step": 25500
},
{
"epoch": 0.04,
"learning_rate": 4.939455239282867e-05,
"loss": 1.0117,
"step": 26000
},
{
"epoch": 0.04,
"learning_rate": 4.9382902448492104e-05,
"loss": 1.0097,
"step": 26500
},
{
"epoch": 0.04,
"learning_rate": 4.9371252504155536e-05,
"loss": 1.0097,
"step": 27000
},
{
"epoch": 0.04,
"learning_rate": 4.9359602559818975e-05,
"loss": 1.0041,
"step": 27500
},
{
"epoch": 0.04,
"learning_rate": 4.934795261548241e-05,
"loss": 1.008,
"step": 28000
},
{
"epoch": 0.04,
"learning_rate": 4.933630267114584e-05,
"loss": 1.0131,
"step": 28500
},
{
"epoch": 0.04,
"learning_rate": 4.932465272680927e-05,
"loss": 1.0003,
"step": 29000
},
{
"epoch": 0.04,
"learning_rate": 4.931302608236138e-05,
"loss": 1.0064,
"step": 29500
},
{
"epoch": 0.04,
"learning_rate": 4.9301376138024815e-05,
"loss": 1.0111,
"step": 30000
},
{
"epoch": 0.04,
"learning_rate": 4.928974949357692e-05,
"loss": 1.0034,
"step": 30500
},
{
"epoch": 0.04,
"learning_rate": 4.9278099549240357e-05,
"loss": 1.0013,
"step": 31000
},
{
"epoch": 0.04,
"learning_rate": 4.926644960490379e-05,
"loss": 0.9905,
"step": 31500
},
{
"epoch": 0.04,
"learning_rate": 4.92548229604559e-05,
"loss": 0.992,
"step": 32000
},
{
"epoch": 0.05,
"learning_rate": 4.924317301611933e-05,
"loss": 0.9861,
"step": 32500
},
{
"epoch": 0.05,
"learning_rate": 4.923152307178276e-05,
"loss": 0.9846,
"step": 33000
},
{
"epoch": 0.05,
"learning_rate": 4.9219873127446196e-05,
"loss": 0.9831,
"step": 33500
},
{
"epoch": 0.05,
"learning_rate": 4.920822318310963e-05,
"loss": 0.985,
"step": 34000
},
{
"epoch": 0.05,
"learning_rate": 4.919657323877307e-05,
"loss": 0.9822,
"step": 34500
},
{
"epoch": 0.05,
"learning_rate": 4.91849232944365e-05,
"loss": 0.9923,
"step": 35000
},
{
"epoch": 0.05,
"learning_rate": 4.917327335009994e-05,
"loss": 0.9875,
"step": 35500
},
{
"epoch": 0.05,
"learning_rate": 4.916162340576337e-05,
"loss": 0.9812,
"step": 36000
},
{
"epoch": 0.05,
"learning_rate": 4.9149973461426804e-05,
"loss": 0.9751,
"step": 36500
},
{
"epoch": 0.05,
"learning_rate": 4.913832351709024e-05,
"loss": 0.9709,
"step": 37000
},
{
"epoch": 0.05,
"learning_rate": 4.9126673572753675e-05,
"loss": 0.984,
"step": 37500
},
{
"epoch": 0.05,
"learning_rate": 4.911502362841711e-05,
"loss": 0.9851,
"step": 38000
},
{
"epoch": 0.05,
"learning_rate": 4.910337368408054e-05,
"loss": 0.9652,
"step": 38500
},
{
"epoch": 0.05,
"learning_rate": 4.909172373974397e-05,
"loss": 0.9695,
"step": 39000
},
{
"epoch": 0.06,
"learning_rate": 4.9080073795407405e-05,
"loss": 0.9655,
"step": 39500
},
{
"epoch": 0.06,
"learning_rate": 4.9068423851070844e-05,
"loss": 0.9629,
"step": 40000
},
{
"epoch": 0.06,
"learning_rate": 4.9056773906734276e-05,
"loss": 0.956,
"step": 40500
},
{
"epoch": 0.06,
"learning_rate": 4.904512396239771e-05,
"loss": 0.9642,
"step": 41000
},
{
"epoch": 0.06,
"learning_rate": 4.903347401806114e-05,
"loss": 0.9581,
"step": 41500
},
{
"epoch": 0.06,
"learning_rate": 4.902182407372457e-05,
"loss": 0.9582,
"step": 42000
},
{
"epoch": 0.06,
"learning_rate": 4.901017412938801e-05,
"loss": 0.9511,
"step": 42500
},
{
"epoch": 0.06,
"learning_rate": 4.8998524185051445e-05,
"loss": 0.9475,
"step": 43000
},
{
"epoch": 0.06,
"learning_rate": 4.8986944140380895e-05,
"loss": 0.9716,
"step": 43500
},
{
"epoch": 0.06,
"learning_rate": 4.8975317495933005e-05,
"loss": 0.9773,
"step": 44000
},
{
"epoch": 0.06,
"learning_rate": 4.8963667551596444e-05,
"loss": 0.9565,
"step": 44500
},
{
"epoch": 0.06,
"learning_rate": 4.8952017607259876e-05,
"loss": 0.9489,
"step": 45000
},
{
"epoch": 0.06,
"learning_rate": 4.8940390962811986e-05,
"loss": 0.9542,
"step": 45500
},
{
"epoch": 0.06,
"learning_rate": 4.892874101847542e-05,
"loss": 0.9575,
"step": 46000
},
{
"epoch": 0.07,
"learning_rate": 4.891711437402752e-05,
"loss": 0.954,
"step": 46500
},
{
"epoch": 0.07,
"learning_rate": 4.8905464429690954e-05,
"loss": 0.9449,
"step": 47000
},
{
"epoch": 0.07,
"learning_rate": 4.8893814485354386e-05,
"loss": 0.9468,
"step": 47500
},
{
"epoch": 0.07,
"learning_rate": 4.8882164541017825e-05,
"loss": 0.9457,
"step": 48000
},
{
"epoch": 0.07,
"learning_rate": 4.887051459668126e-05,
"loss": 0.9456,
"step": 48500
},
{
"epoch": 0.07,
"learning_rate": 4.88588646523447e-05,
"loss": 0.9636,
"step": 49000
},
{
"epoch": 0.07,
"learning_rate": 4.884721470800813e-05,
"loss": 0.9583,
"step": 49500
},
{
"epoch": 0.07,
"learning_rate": 4.883558806356023e-05,
"loss": 0.9474,
"step": 50000
},
{
"epoch": 0.07,
"learning_rate": 4.882396141911234e-05,
"loss": 1.0342,
"step": 50500
},
{
"epoch": 0.07,
"learning_rate": 4.8812311474775774e-05,
"loss": 1.0187,
"step": 51000
},
{
"epoch": 0.07,
"learning_rate": 4.880066153043921e-05,
"loss": 0.9742,
"step": 51500
},
{
"epoch": 0.07,
"learning_rate": 4.8789011586102645e-05,
"loss": 0.9483,
"step": 52000
},
{
"epoch": 0.07,
"learning_rate": 4.877736164176608e-05,
"loss": 0.9584,
"step": 52500
},
{
"epoch": 0.07,
"learning_rate": 4.876571169742951e-05,
"loss": 0.959,
"step": 53000
},
{
"epoch": 0.07,
"learning_rate": 4.875406175309294e-05,
"loss": 0.9821,
"step": 53500
},
{
"epoch": 0.08,
"learning_rate": 4.8742411808756375e-05,
"loss": 0.9683,
"step": 54000
},
{
"epoch": 0.08,
"learning_rate": 4.8730785164308485e-05,
"loss": 0.9494,
"step": 54500
},
{
"epoch": 0.08,
"learning_rate": 4.8719135219971924e-05,
"loss": 0.9441,
"step": 55000
},
{
"epoch": 0.08,
"learning_rate": 4.8707485275635356e-05,
"loss": 0.9676,
"step": 55500
},
{
"epoch": 0.08,
"learning_rate": 4.869583533129879e-05,
"loss": 0.9357,
"step": 56000
},
{
"epoch": 0.08,
"learning_rate": 4.868418538696222e-05,
"loss": 0.9492,
"step": 56500
},
{
"epoch": 0.08,
"learning_rate": 4.867253544262565e-05,
"loss": 0.9378,
"step": 57000
},
{
"epoch": 0.08,
"learning_rate": 4.866088549828909e-05,
"loss": 0.9318,
"step": 57500
},
{
"epoch": 0.08,
"learning_rate": 4.8649235553952525e-05,
"loss": 0.9271,
"step": 58000
},
{
"epoch": 0.08,
"learning_rate": 4.863758560961596e-05,
"loss": 0.9432,
"step": 58500
},
{
"epoch": 0.08,
"learning_rate": 4.8625935665279396e-05,
"loss": 0.931,
"step": 59000
},
{
"epoch": 0.08,
"learning_rate": 4.861428572094283e-05,
"loss": 0.9315,
"step": 59500
},
{
"epoch": 0.08,
"learning_rate": 4.860263577660626e-05,
"loss": 0.9427,
"step": 60000
},
{
"epoch": 0.08,
"learning_rate": 4.859100913215837e-05,
"loss": 0.9496,
"step": 60500
},
{
"epoch": 0.09,
"learning_rate": 4.85793591878218e-05,
"loss": 0.9518,
"step": 61000
},
{
"epoch": 0.09,
"learning_rate": 4.8567709243485235e-05,
"loss": 0.933,
"step": 61500
},
{
"epoch": 0.09,
"learning_rate": 4.855605929914867e-05,
"loss": 0.9394,
"step": 62000
},
{
"epoch": 0.09,
"learning_rate": 4.854443265470078e-05,
"loss": 0.9464,
"step": 62500
},
{
"epoch": 0.09,
"learning_rate": 4.853278271036421e-05,
"loss": 0.9254,
"step": 63000
},
{
"epoch": 0.09,
"learning_rate": 4.852113276602764e-05,
"loss": 0.9263,
"step": 63500
},
{
"epoch": 0.09,
"learning_rate": 4.850948282169108e-05,
"loss": 0.9255,
"step": 64000
},
{
"epoch": 0.09,
"learning_rate": 4.8497832877354514e-05,
"loss": 0.9209,
"step": 64500
},
{
"epoch": 0.09,
"learning_rate": 4.8486182933017946e-05,
"loss": 0.9194,
"step": 65000
},
{
"epoch": 0.09,
"learning_rate": 4.847453298868138e-05,
"loss": 0.9198,
"step": 65500
},
{
"epoch": 0.09,
"learning_rate": 4.846288304434482e-05,
"loss": 0.9218,
"step": 66000
},
{
"epoch": 0.09,
"learning_rate": 4.845123310000825e-05,
"loss": 0.9217,
"step": 66500
},
{
"epoch": 0.09,
"learning_rate": 4.843958315567169e-05,
"loss": 0.9279,
"step": 67000
},
{
"epoch": 0.09,
"learning_rate": 4.842793321133512e-05,
"loss": 0.9383,
"step": 67500
},
{
"epoch": 0.1,
"learning_rate": 4.8416283266998554e-05,
"loss": 0.9232,
"step": 68000
},
{
"epoch": 0.1,
"learning_rate": 4.8404633322661986e-05,
"loss": 0.9182,
"step": 68500
},
{
"epoch": 0.1,
"learning_rate": 4.839300667821409e-05,
"loss": 0.9315,
"step": 69000
},
{
"epoch": 0.1,
"learning_rate": 4.838135673387753e-05,
"loss": 0.9224,
"step": 69500
},
{
"epoch": 0.1,
"learning_rate": 4.836973008942963e-05,
"loss": 0.9153,
"step": 70000
},
{
"epoch": 0.1,
"learning_rate": 4.835808014509307e-05,
"loss": 0.9168,
"step": 70500
},
{
"epoch": 0.1,
"learning_rate": 4.83464302007565e-05,
"loss": 0.9121,
"step": 71000
},
{
"epoch": 0.1,
"learning_rate": 4.8334780256419935e-05,
"loss": 0.9196,
"step": 71500
},
{
"epoch": 0.1,
"learning_rate": 4.832313031208337e-05,
"loss": 0.9037,
"step": 72000
},
{
"epoch": 0.1,
"learning_rate": 4.8311480367746806e-05,
"loss": 0.9065,
"step": 72500
},
{
"epoch": 0.1,
"learning_rate": 4.829983042341024e-05,
"loss": 0.9086,
"step": 73000
},
{
"epoch": 0.1,
"learning_rate": 4.828818047907368e-05,
"loss": 0.9017,
"step": 73500
},
{
"epoch": 0.1,
"learning_rate": 4.827653053473711e-05,
"loss": 0.9111,
"step": 74000
},
{
"epoch": 0.1,
"learning_rate": 4.826488059040054e-05,
"loss": 0.9074,
"step": 74500
},
{
"epoch": 0.1,
"learning_rate": 4.8253230646063975e-05,
"loss": 0.9048,
"step": 75000
},
{
"epoch": 0.11,
"learning_rate": 4.824158070172741e-05,
"loss": 0.9062,
"step": 75500
},
{
"epoch": 0.11,
"learning_rate": 4.8229930757390847e-05,
"loss": 0.9056,
"step": 76000
},
{
"epoch": 0.11,
"learning_rate": 4.821830411294295e-05,
"loss": 0.9076,
"step": 76500
},
{
"epoch": 0.11,
"learning_rate": 4.820665416860639e-05,
"loss": 0.9077,
"step": 77000
},
{
"epoch": 0.11,
"learning_rate": 4.819502752415849e-05,
"loss": 0.9056,
"step": 77500
},
{
"epoch": 0.11,
"learning_rate": 4.8183377579821924e-05,
"loss": 0.9144,
"step": 78000
},
{
"epoch": 0.11,
"learning_rate": 4.8171727635485356e-05,
"loss": 0.9062,
"step": 78500
},
{
"epoch": 0.11,
"learning_rate": 4.816007769114879e-05,
"loss": 0.9229,
"step": 79000
},
{
"epoch": 0.11,
"learning_rate": 4.814842774681223e-05,
"loss": 0.9129,
"step": 79500
},
{
"epoch": 0.11,
"learning_rate": 4.813677780247566e-05,
"loss": 0.9158,
"step": 80000
},
{
"epoch": 0.11,
"learning_rate": 4.81251278581391e-05,
"loss": 0.9036,
"step": 80500
},
{
"epoch": 0.11,
"learning_rate": 4.81135012136912e-05,
"loss": 0.9322,
"step": 81000
},
{
"epoch": 0.11,
"learning_rate": 4.8101851269354634e-05,
"loss": 0.9122,
"step": 81500
},
{
"epoch": 0.11,
"learning_rate": 4.809020132501807e-05,
"loss": 0.9021,
"step": 82000
},
{
"epoch": 0.12,
"learning_rate": 4.8078551380681506e-05,
"loss": 0.8997,
"step": 82500
},
{
"epoch": 0.12,
"learning_rate": 4.806690143634494e-05,
"loss": 0.901,
"step": 83000
},
{
"epoch": 0.12,
"learning_rate": 4.805525149200837e-05,
"loss": 0.8983,
"step": 83500
},
{
"epoch": 0.12,
"learning_rate": 4.804360154767181e-05,
"loss": 0.898,
"step": 84000
},
{
"epoch": 0.12,
"learning_rate": 4.803195160333524e-05,
"loss": 0.8959,
"step": 84500
},
{
"epoch": 0.12,
"learning_rate": 4.802034825877602e-05,
"loss": 0.9191,
"step": 85000
},
{
"epoch": 0.12,
"learning_rate": 4.8008698314439455e-05,
"loss": 0.9007,
"step": 85500
},
{
"epoch": 0.12,
"learning_rate": 4.799704837010289e-05,
"loss": 0.8908,
"step": 86000
},
{
"epoch": 0.12,
"learning_rate": 4.7985398425766326e-05,
"loss": 0.9045,
"step": 86500
},
{
"epoch": 0.12,
"learning_rate": 4.797374848142976e-05,
"loss": 0.8994,
"step": 87000
},
{
"epoch": 0.12,
"learning_rate": 4.796209853709319e-05,
"loss": 0.8981,
"step": 87500
},
{
"epoch": 0.12,
"learning_rate": 4.795044859275662e-05,
"loss": 0.8943,
"step": 88000
},
{
"epoch": 0.12,
"learning_rate": 4.7938798648420056e-05,
"loss": 0.9004,
"step": 88500
},
{
"epoch": 0.12,
"learning_rate": 4.7927148704083495e-05,
"loss": 0.8906,
"step": 89000
},
{
"epoch": 0.13,
"learning_rate": 4.791549875974693e-05,
"loss": 0.8881,
"step": 89500
},
{
"epoch": 0.13,
"learning_rate": 4.790384881541036e-05,
"loss": 0.889,
"step": 90000
},
{
"epoch": 0.13,
"learning_rate": 4.78921988710738e-05,
"loss": 0.8866,
"step": 90500
},
{
"epoch": 0.13,
"learning_rate": 4.788054892673723e-05,
"loss": 0.8868,
"step": 91000
},
{
"epoch": 0.13,
"learning_rate": 4.7868922282289334e-05,
"loss": 0.8909,
"step": 91500
},
{
"epoch": 0.13,
"learning_rate": 4.785727233795277e-05,
"loss": 0.8825,
"step": 92000
},
{
"epoch": 0.13,
"learning_rate": 4.7845622393616205e-05,
"loss": 0.884,
"step": 92500
},
{
"epoch": 0.13,
"learning_rate": 4.783397244927964e-05,
"loss": 0.8906,
"step": 93000
},
{
"epoch": 0.13,
"learning_rate": 4.782232250494307e-05,
"loss": 0.8841,
"step": 93500
},
{
"epoch": 0.13,
"learning_rate": 4.781067256060651e-05,
"loss": 0.8847,
"step": 94000
},
{
"epoch": 0.13,
"learning_rate": 4.779902261626994e-05,
"loss": 0.8936,
"step": 94500
},
{
"epoch": 0.13,
"learning_rate": 4.7787395971822045e-05,
"loss": 0.8905,
"step": 95000
},
{
"epoch": 0.13,
"learning_rate": 4.7775769327374154e-05,
"loss": 0.8817,
"step": 95500
},
{
"epoch": 0.13,
"learning_rate": 4.776411938303759e-05,
"loss": 0.8854,
"step": 96000
},
{
"epoch": 0.13,
"learning_rate": 4.7752492738589696e-05,
"loss": 0.8862,
"step": 96500
},
{
"epoch": 0.14,
"learning_rate": 4.7740842794253135e-05,
"loss": 0.8958,
"step": 97000
},
{
"epoch": 0.14,
"learning_rate": 4.772919284991657e-05,
"loss": 0.9013,
"step": 97500
},
{
"epoch": 0.14,
"learning_rate": 4.771754290558e-05,
"loss": 0.8853,
"step": 98000
},
{
"epoch": 0.14,
"learning_rate": 4.770589296124343e-05,
"loss": 0.8809,
"step": 98500
},
{
"epoch": 0.14,
"learning_rate": 4.769424301690687e-05,
"loss": 0.8817,
"step": 99000
},
{
"epoch": 0.14,
"learning_rate": 4.7682593072570304e-05,
"loss": 0.8817,
"step": 99500
},
{
"epoch": 0.14,
"learning_rate": 4.7670943128233736e-05,
"loss": 0.881,
"step": 100000
},
{
"epoch": 0.14,
"learning_rate": 4.765929318389717e-05,
"loss": 0.9004,
"step": 100500
},
{
"epoch": 0.14,
"learning_rate": 4.76476432395606e-05,
"loss": 0.8794,
"step": 101000
},
{
"epoch": 0.14,
"learning_rate": 4.7635993295224033e-05,
"loss": 0.8715,
"step": 101500
},
{
"epoch": 0.14,
"learning_rate": 4.762434335088747e-05,
"loss": 0.8809,
"step": 102000
},
{
"epoch": 0.14,
"learning_rate": 4.7612693406550905e-05,
"loss": 0.8814,
"step": 102500
},
{
"epoch": 0.14,
"learning_rate": 4.760104346221434e-05,
"loss": 0.8783,
"step": 103000
},
{
"epoch": 0.14,
"learning_rate": 4.758939351787777e-05,
"loss": 0.8761,
"step": 103500
},
{
"epoch": 0.15,
"learning_rate": 4.757776687342988e-05,
"loss": 0.8793,
"step": 104000
},
{
"epoch": 0.15,
"learning_rate": 4.756611692909331e-05,
"loss": 0.8768,
"step": 104500
},
{
"epoch": 0.15,
"learning_rate": 4.755446698475675e-05,
"loss": 0.8768,
"step": 105000
},
{
"epoch": 0.15,
"learning_rate": 4.754281704042018e-05,
"loss": 0.8745,
"step": 105500
},
{
"epoch": 0.15,
"learning_rate": 4.7531167096083616e-05,
"loss": 0.8727,
"step": 106000
},
{
"epoch": 0.15,
"learning_rate": 4.7519563751524396e-05,
"loss": 0.8746,
"step": 106500
},
{
"epoch": 0.15,
"learning_rate": 4.7507913807187835e-05,
"loss": 0.8743,
"step": 107000
},
{
"epoch": 0.15,
"learning_rate": 4.749626386285127e-05,
"loss": 0.8775,
"step": 107500
},
{
"epoch": 0.15,
"learning_rate": 4.74846139185147e-05,
"loss": 0.8712,
"step": 108000
},
{
"epoch": 0.15,
"learning_rate": 4.747296397417814e-05,
"loss": 0.8725,
"step": 108500
},
{
"epoch": 0.15,
"learning_rate": 4.746133732973024e-05,
"loss": 0.8777,
"step": 109000
},
{
"epoch": 0.15,
"learning_rate": 4.7449687385393674e-05,
"loss": 0.8891,
"step": 109500
},
{
"epoch": 0.15,
"learning_rate": 4.7438037441057106e-05,
"loss": 0.8747,
"step": 110000
},
{
"epoch": 0.15,
"learning_rate": 4.7426387496720546e-05,
"loss": 0.8706,
"step": 110500
},
{
"epoch": 0.16,
"learning_rate": 4.741476085227265e-05,
"loss": 0.8772,
"step": 111000
},
{
"epoch": 0.16,
"learning_rate": 4.740311090793608e-05,
"loss": 0.8666,
"step": 111500
},
{
"epoch": 0.16,
"learning_rate": 4.739146096359951e-05,
"loss": 0.8743,
"step": 112000
},
{
"epoch": 0.16,
"learning_rate": 4.737981101926295e-05,
"loss": 0.8679,
"step": 112500
},
{
"epoch": 0.16,
"learning_rate": 4.736818437481506e-05,
"loss": 0.8814,
"step": 113000
},
{
"epoch": 0.16,
"learning_rate": 4.7356534430478494e-05,
"loss": 0.8725,
"step": 113500
},
{
"epoch": 0.16,
"learning_rate": 4.734488448614193e-05,
"loss": 0.8693,
"step": 114000
},
{
"epoch": 0.16,
"learning_rate": 4.733323454180536e-05,
"loss": 0.8713,
"step": 114500
},
{
"epoch": 0.16,
"learning_rate": 4.732158459746879e-05,
"loss": 0.8712,
"step": 115000
},
{
"epoch": 0.16,
"learning_rate": 4.73099579530209e-05,
"loss": 0.8651,
"step": 115500
},
{
"epoch": 0.16,
"learning_rate": 4.729830800868434e-05,
"loss": 0.8671,
"step": 116000
},
{
"epoch": 0.16,
"learning_rate": 4.728665806434777e-05,
"loss": 0.8665,
"step": 116500
},
{
"epoch": 0.16,
"learning_rate": 4.7275008120011205e-05,
"loss": 0.8701,
"step": 117000
},
{
"epoch": 0.16,
"learning_rate": 4.726335817567464e-05,
"loss": 0.8673,
"step": 117500
},
{
"epoch": 0.16,
"learning_rate": 4.725170823133807e-05,
"loss": 0.8647,
"step": 118000
},
{
"epoch": 0.17,
"learning_rate": 4.724005828700151e-05,
"loss": 0.8629,
"step": 118500
},
{
"epoch": 0.17,
"learning_rate": 4.722840834266494e-05,
"loss": 0.866,
"step": 119000
},
{
"epoch": 0.17,
"learning_rate": 4.7216758398328374e-05,
"loss": 0.8644,
"step": 119500
},
{
"epoch": 0.17,
"learning_rate": 4.7205108453991806e-05,
"loss": 0.8627,
"step": 120000
},
{
"epoch": 0.17,
"learning_rate": 4.7193481809543916e-05,
"loss": 0.8642,
"step": 120500
},
{
"epoch": 0.17,
"learning_rate": 4.718183186520735e-05,
"loss": 0.8631,
"step": 121000
},
{
"epoch": 0.17,
"learning_rate": 4.717018192087078e-05,
"loss": 0.8719,
"step": 121500
},
{
"epoch": 0.17,
"learning_rate": 4.715853197653422e-05,
"loss": 0.876,
"step": 122000
},
{
"epoch": 0.17,
"learning_rate": 4.714688203219765e-05,
"loss": 0.8672,
"step": 122500
},
{
"epoch": 0.17,
"learning_rate": 4.713525538774976e-05,
"loss": 0.871,
"step": 123000
},
{
"epoch": 0.17,
"learning_rate": 4.7123605443413194e-05,
"loss": 0.8581,
"step": 123500
},
{
"epoch": 0.17,
"learning_rate": 4.7111978798965303e-05,
"loss": 0.867,
"step": 124000
},
{
"epoch": 0.17,
"learning_rate": 4.7100328854628736e-05,
"loss": 0.87,
"step": 124500
},
{
"epoch": 0.17,
"learning_rate": 4.708870221018084e-05,
"loss": 0.8602,
"step": 125000
},
{
"epoch": 0.18,
"learning_rate": 4.707705226584427e-05,
"loss": 0.8628,
"step": 125500
},
{
"epoch": 0.18,
"learning_rate": 4.706540232150771e-05,
"loss": 0.8648,
"step": 126000
},
{
"epoch": 0.18,
"learning_rate": 4.705375237717114e-05,
"loss": 0.8581,
"step": 126500
},
{
"epoch": 0.18,
"learning_rate": 4.704210243283458e-05,
"loss": 0.861,
"step": 127000
},
{
"epoch": 0.18,
"learning_rate": 4.7030452488498014e-05,
"loss": 0.8733,
"step": 127500
},
{
"epoch": 0.18,
"learning_rate": 4.7018802544161446e-05,
"loss": 0.8674,
"step": 128000
},
{
"epoch": 0.18,
"learning_rate": 4.7007152599824886e-05,
"loss": 0.86,
"step": 128500
},
{
"epoch": 0.18,
"learning_rate": 4.699550265548832e-05,
"loss": 0.8552,
"step": 129000
},
{
"epoch": 0.18,
"learning_rate": 4.698385271115175e-05,
"loss": 0.8596,
"step": 129500
},
{
"epoch": 0.18,
"learning_rate": 4.697220276681518e-05,
"loss": 0.8566,
"step": 130000
},
{
"epoch": 0.18,
"learning_rate": 4.6960552822478615e-05,
"loss": 0.8625,
"step": 130500
},
{
"epoch": 0.18,
"learning_rate": 4.6948926178030725e-05,
"loss": 0.8611,
"step": 131000
},
{
"epoch": 0.18,
"learning_rate": 4.693727623369416e-05,
"loss": 0.8611,
"step": 131500
},
{
"epoch": 0.18,
"learning_rate": 4.6925626289357596e-05,
"loss": 0.8582,
"step": 132000
},
{
"epoch": 0.19,
"learning_rate": 4.691397634502103e-05,
"loss": 0.8596,
"step": 132500
},
{
"epoch": 0.19,
"learning_rate": 4.690232640068446e-05,
"loss": 0.8542,
"step": 133000
},
{
"epoch": 0.19,
"learning_rate": 4.689067645634789e-05,
"loss": 0.8568,
"step": 133500
},
{
"epoch": 0.19,
"learning_rate": 4.68790498119e-05,
"loss": 0.8594,
"step": 134000
},
{
"epoch": 0.19,
"learning_rate": 4.6867399867563435e-05,
"loss": 0.8529,
"step": 134500
},
{
"epoch": 0.19,
"learning_rate": 4.6855749923226874e-05,
"loss": 0.8501,
"step": 135000
},
{
"epoch": 0.19,
"learning_rate": 4.684409997889031e-05,
"loss": 0.8544,
"step": 135500
},
{
"epoch": 0.19,
"learning_rate": 4.683245003455374e-05,
"loss": 0.8526,
"step": 136000
},
{
"epoch": 0.19,
"learning_rate": 4.682080009021717e-05,
"loss": 0.8649,
"step": 136500
},
{
"epoch": 0.19,
"learning_rate": 4.6809150145880604e-05,
"loss": 0.852,
"step": 137000
},
{
"epoch": 0.19,
"learning_rate": 4.6797500201544036e-05,
"loss": 0.8574,
"step": 137500
},
{
"epoch": 0.19,
"learning_rate": 4.6785850257207476e-05,
"loss": 0.8558,
"step": 138000
},
{
"epoch": 0.19,
"learning_rate": 4.677420031287091e-05,
"loss": 0.8509,
"step": 138500
},
{
"epoch": 0.19,
"learning_rate": 4.676255036853434e-05,
"loss": 0.8587,
"step": 139000
},
{
"epoch": 0.2,
"learning_rate": 4.675090042419777e-05,
"loss": 0.8534,
"step": 139500
},
{
"epoch": 0.2,
"learning_rate": 4.6739250479861205e-05,
"loss": 0.8504,
"step": 140000
},
{
"epoch": 0.2,
"learning_rate": 4.6727600535524644e-05,
"loss": 0.8564,
"step": 140500
},
{
"epoch": 0.2,
"learning_rate": 4.6715973891076754e-05,
"loss": 0.8495,
"step": 141000
},
{
"epoch": 0.2,
"learning_rate": 4.6704323946740186e-05,
"loss": 0.8519,
"step": 141500
},
{
"epoch": 0.2,
"learning_rate": 4.669267400240362e-05,
"loss": 0.8472,
"step": 142000
},
{
"epoch": 0.2,
"learning_rate": 4.668102405806705e-05,
"loss": 0.8479,
"step": 142500
},
{
"epoch": 0.2,
"learning_rate": 4.666937411373048e-05,
"loss": 0.8503,
"step": 143000
},
{
"epoch": 0.2,
"learning_rate": 4.6657724169393916e-05,
"loss": 0.8516,
"step": 143500
},
{
"epoch": 0.2,
"learning_rate": 4.664609752494603e-05,
"loss": 0.849,
"step": 144000
},
{
"epoch": 0.2,
"learning_rate": 4.6634470880498135e-05,
"loss": 0.8481,
"step": 144500
},
{
"epoch": 0.2,
"learning_rate": 4.6622844236050244e-05,
"loss": 0.8456,
"step": 145000
},
{
"epoch": 0.2,
"learning_rate": 4.661119429171368e-05,
"loss": 0.8506,
"step": 145500
},
{
"epoch": 0.2,
"learning_rate": 4.659954434737711e-05,
"loss": 0.8504,
"step": 146000
},
{
"epoch": 0.2,
"learning_rate": 4.658789440304054e-05,
"loss": 0.848,
"step": 146500
},
{
"epoch": 0.21,
"learning_rate": 4.6576244458703974e-05,
"loss": 0.8492,
"step": 147000
},
{
"epoch": 0.21,
"learning_rate": 4.656459451436741e-05,
"loss": 0.849,
"step": 147500
},
{
"epoch": 0.21,
"learning_rate": 4.6552944570030846e-05,
"loss": 0.8546,
"step": 148000
},
{
"epoch": 0.21,
"learning_rate": 4.6541294625694285e-05,
"loss": 0.8427,
"step": 148500
},
{
"epoch": 0.21,
"learning_rate": 4.652964468135772e-05,
"loss": 0.8508,
"step": 149000
},
{
"epoch": 0.21,
"learning_rate": 4.651799473702115e-05,
"loss": 0.8483,
"step": 149500
},
{
"epoch": 0.21,
"learning_rate": 4.650634479268458e-05,
"loss": 0.848,
"step": 150000
},
{
"epoch": 0.21,
"learning_rate": 4.649469484834802e-05,
"loss": 0.8455,
"step": 150500
},
{
"epoch": 0.21,
"learning_rate": 4.6483091503788794e-05,
"loss": 0.8497,
"step": 151000
},
{
"epoch": 0.21,
"learning_rate": 4.6471441559452233e-05,
"loss": 0.852,
"step": 151500
},
{
"epoch": 0.21,
"learning_rate": 4.6459791615115666e-05,
"loss": 0.8497,
"step": 152000
},
{
"epoch": 0.21,
"learning_rate": 4.64481416707791e-05,
"loss": 0.8444,
"step": 152500
},
{
"epoch": 0.21,
"learning_rate": 4.643649172644253e-05,
"loss": 0.8655,
"step": 153000
},
{
"epoch": 0.21,
"learning_rate": 4.642484178210596e-05,
"loss": 0.843,
"step": 153500
},
{
"epoch": 0.22,
"learning_rate": 4.6413191837769395e-05,
"loss": 0.8427,
"step": 154000
},
{
"epoch": 0.22,
"learning_rate": 4.6401541893432834e-05,
"loss": 0.8416,
"step": 154500
},
{
"epoch": 0.22,
"learning_rate": 4.6389891949096274e-05,
"loss": 0.8401,
"step": 155000
},
{
"epoch": 0.22,
"learning_rate": 4.6378242004759706e-05,
"loss": 0.8429,
"step": 155500
},
{
"epoch": 0.22,
"learning_rate": 4.636659206042314e-05,
"loss": 0.8458,
"step": 156000
},
{
"epoch": 0.22,
"learning_rate": 4.635494211608657e-05,
"loss": 0.8574,
"step": 156500
},
{
"epoch": 0.22,
"learning_rate": 4.634329217175001e-05,
"loss": 0.845,
"step": 157000
},
{
"epoch": 0.22,
"learning_rate": 4.633168882719078e-05,
"loss": 0.8415,
"step": 157500
},
{
"epoch": 0.22,
"learning_rate": 4.632003888285422e-05,
"loss": 0.8424,
"step": 158000
},
{
"epoch": 0.22,
"learning_rate": 4.6308388938517655e-05,
"loss": 0.8431,
"step": 158500
},
{
"epoch": 0.22,
"learning_rate": 4.629673899418109e-05,
"loss": 0.8437,
"step": 159000
},
{
"epoch": 0.22,
"learning_rate": 4.628508904984452e-05,
"loss": 0.8367,
"step": 159500
},
{
"epoch": 0.22,
"learning_rate": 4.627343910550795e-05,
"loss": 0.8404,
"step": 160000
},
{
"epoch": 0.22,
"learning_rate": 4.626181246106006e-05,
"loss": 0.8385,
"step": 160500
},
{
"epoch": 0.23,
"learning_rate": 4.62501625167235e-05,
"loss": 0.8422,
"step": 161000
},
{
"epoch": 0.23,
"learning_rate": 4.623851257238693e-05,
"loss": 0.84,
"step": 161500
},
{
"epoch": 0.23,
"learning_rate": 4.6226862628050365e-05,
"loss": 0.8419,
"step": 162000
},
{
"epoch": 0.23,
"learning_rate": 4.62152126837138e-05,
"loss": 0.8398,
"step": 162500
},
{
"epoch": 0.23,
"learning_rate": 4.620356273937723e-05,
"loss": 0.8415,
"step": 163000
},
{
"epoch": 0.23,
"learning_rate": 4.619191279504066e-05,
"loss": 0.8439,
"step": 163500
},
{
"epoch": 0.23,
"learning_rate": 4.61802628507041e-05,
"loss": 0.8414,
"step": 164000
},
{
"epoch": 0.23,
"learning_rate": 4.6168612906367534e-05,
"loss": 0.8402,
"step": 164500
},
{
"epoch": 0.23,
"learning_rate": 4.6156962962030966e-05,
"loss": 0.8433,
"step": 165000
},
{
"epoch": 0.23,
"learning_rate": 4.6145313017694405e-05,
"loss": 0.8403,
"step": 165500
},
{
"epoch": 0.23,
"learning_rate": 4.613368637324651e-05,
"loss": 0.8376,
"step": 166000
},
{
"epoch": 0.23,
"learning_rate": 4.612203642890994e-05,
"loss": 0.8375,
"step": 166500
},
{
"epoch": 0.23,
"learning_rate": 4.611038648457338e-05,
"loss": 0.8396,
"step": 167000
},
{
"epoch": 0.23,
"learning_rate": 4.609873654023681e-05,
"loss": 0.8345,
"step": 167500
},
{
"epoch": 0.23,
"learning_rate": 4.6087086595900245e-05,
"loss": 0.8361,
"step": 168000
},
{
"epoch": 0.24,
"learning_rate": 4.6075436651563684e-05,
"loss": 0.8351,
"step": 168500
},
{
"epoch": 0.24,
"learning_rate": 4.6063786707227116e-05,
"loss": 0.8379,
"step": 169000
},
{
"epoch": 0.24,
"learning_rate": 4.605213676289055e-05,
"loss": 0.8309,
"step": 169500
},
{
"epoch": 0.24,
"learning_rate": 4.604048681855399e-05,
"loss": 0.8381,
"step": 170000
},
{
"epoch": 0.24,
"learning_rate": 4.602883687421742e-05,
"loss": 0.832,
"step": 170500
},
{
"epoch": 0.24,
"learning_rate": 4.601718692988085e-05,
"loss": 0.8333,
"step": 171000
},
{
"epoch": 0.24,
"learning_rate": 4.6005536985544285e-05,
"loss": 0.8338,
"step": 171500
},
{
"epoch": 0.24,
"learning_rate": 4.599388704120772e-05,
"loss": 0.8335,
"step": 172000
},
{
"epoch": 0.24,
"learning_rate": 4.5982237096871156e-05,
"loss": 0.8351,
"step": 172500
},
{
"epoch": 0.24,
"learning_rate": 4.597058715253459e-05,
"loss": 0.8336,
"step": 173000
},
{
"epoch": 0.24,
"learning_rate": 4.595893720819802e-05,
"loss": 0.8344,
"step": 173500
},
{
"epoch": 0.24,
"learning_rate": 4.5947287263861453e-05,
"loss": 0.8373,
"step": 174000
},
{
"epoch": 0.24,
"learning_rate": 4.593570721919091e-05,
"loss": 0.8386,
"step": 174500
},
{
"epoch": 0.24,
"learning_rate": 4.592405727485434e-05,
"loss": 0.8302,
"step": 175000
},
{
"epoch": 0.25,
"learning_rate": 4.5912407330517775e-05,
"loss": 0.8354,
"step": 175500
},
{
"epoch": 0.25,
"learning_rate": 4.590075738618121e-05,
"loss": 0.8339,
"step": 176000
},
{
"epoch": 0.25,
"learning_rate": 4.588913074173332e-05,
"loss": 0.8419,
"step": 176500
},
{
"epoch": 0.25,
"learning_rate": 4.587750409728542e-05,
"loss": 0.8406,
"step": 177000
},
{
"epoch": 0.25,
"learning_rate": 4.586585415294886e-05,
"loss": 0.835,
"step": 177500
},
{
"epoch": 0.25,
"learning_rate": 4.585420420861229e-05,
"loss": 0.8351,
"step": 178000
},
{
"epoch": 0.25,
"learning_rate": 4.584255426427573e-05,
"loss": 0.8365,
"step": 178500
},
{
"epoch": 0.25,
"learning_rate": 4.5830927619827834e-05,
"loss": 0.8316,
"step": 179000
},
{
"epoch": 0.25,
"learning_rate": 4.5819277675491266e-05,
"loss": 0.8244,
"step": 179500
},
{
"epoch": 0.25,
"learning_rate": 4.58076277311547e-05,
"loss": 0.8276,
"step": 180000
},
{
"epoch": 0.25,
"learning_rate": 4.579597778681814e-05,
"loss": 0.8323,
"step": 180500
},
{
"epoch": 0.25,
"learning_rate": 4.578432784248157e-05,
"loss": 0.8283,
"step": 181000
},
{
"epoch": 0.25,
"learning_rate": 4.5772677898145e-05,
"loss": 0.829,
"step": 181500
},
{
"epoch": 0.25,
"learning_rate": 4.576102795380844e-05,
"loss": 0.832,
"step": 182000
},
{
"epoch": 0.26,
"learning_rate": 4.5749378009471874e-05,
"loss": 0.8349,
"step": 182500
},
{
"epoch": 0.26,
"learning_rate": 4.5737728065135306e-05,
"loss": 0.8335,
"step": 183000
},
{
"epoch": 0.26,
"learning_rate": 4.5726078120798746e-05,
"loss": 0.8316,
"step": 183500
},
{
"epoch": 0.26,
"learning_rate": 4.571442817646218e-05,
"loss": 0.8307,
"step": 184000
},
{
"epoch": 0.26,
"learning_rate": 4.570277823212561e-05,
"loss": 0.8273,
"step": 184500
},
{
"epoch": 0.26,
"learning_rate": 4.569112828778904e-05,
"loss": 0.8277,
"step": 185000
},
{
"epoch": 0.26,
"learning_rate": 4.5679478343452475e-05,
"loss": 0.8278,
"step": 185500
},
{
"epoch": 0.26,
"learning_rate": 4.5667828399115914e-05,
"loss": 0.8298,
"step": 186000
},
{
"epoch": 0.26,
"learning_rate": 4.5656178454779347e-05,
"loss": 0.8277,
"step": 186500
},
{
"epoch": 0.26,
"learning_rate": 4.5644551810331456e-05,
"loss": 0.8248,
"step": 187000
},
{
"epoch": 0.26,
"learning_rate": 4.563290186599489e-05,
"loss": 0.8233,
"step": 187500
},
{
"epoch": 0.26,
"learning_rate": 4.562125192165832e-05,
"loss": 0.8258,
"step": 188000
},
{
"epoch": 0.26,
"learning_rate": 4.560960197732175e-05,
"loss": 0.8208,
"step": 188500
},
{
"epoch": 0.26,
"learning_rate": 4.5597952032985186e-05,
"loss": 0.823,
"step": 189000
},
{
"epoch": 0.26,
"learning_rate": 4.5586302088648625e-05,
"loss": 0.8265,
"step": 189500
},
{
"epoch": 0.27,
"learning_rate": 4.557465214431206e-05,
"loss": 0.8266,
"step": 190000
},
{
"epoch": 0.27,
"learning_rate": 4.556300219997549e-05,
"loss": 0.8288,
"step": 190500
},
{
"epoch": 0.27,
"learning_rate": 4.55513755555276e-05,
"loss": 0.8261,
"step": 191000
},
{
"epoch": 0.27,
"learning_rate": 4.553972561119103e-05,
"loss": 0.827,
"step": 191500
},
{
"epoch": 0.27,
"learning_rate": 4.5528075666854464e-05,
"loss": 0.8253,
"step": 192000
},
{
"epoch": 0.27,
"learning_rate": 4.5516449022406574e-05,
"loss": 0.824,
"step": 192500
},
{
"epoch": 0.27,
"learning_rate": 4.550479907807001e-05,
"loss": 0.8229,
"step": 193000
},
{
"epoch": 0.27,
"learning_rate": 4.5493149133733445e-05,
"loss": 0.8278,
"step": 193500
},
{
"epoch": 0.27,
"learning_rate": 4.548149918939688e-05,
"loss": 0.823,
"step": 194000
},
{
"epoch": 0.27,
"learning_rate": 4.546984924506031e-05,
"loss": 0.8241,
"step": 194500
},
{
"epoch": 0.27,
"learning_rate": 4.545819930072374e-05,
"loss": 0.8194,
"step": 195000
},
{
"epoch": 0.27,
"learning_rate": 4.5446549356387175e-05,
"loss": 0.8217,
"step": 195500
},
{
"epoch": 0.27,
"learning_rate": 4.5434899412050614e-05,
"loss": 0.8224,
"step": 196000
},
{
"epoch": 0.27,
"learning_rate": 4.542327276760272e-05,
"loss": 0.8192,
"step": 196500
},
{
"epoch": 0.28,
"learning_rate": 4.5411646123154826e-05,
"loss": 0.8228,
"step": 197000
},
{
"epoch": 0.28,
"learning_rate": 4.539999617881826e-05,
"loss": 0.8213,
"step": 197500
},
{
"epoch": 0.28,
"learning_rate": 4.538836953437037e-05,
"loss": 0.8341,
"step": 198000
},
{
"epoch": 0.28,
"learning_rate": 4.53767195900338e-05,
"loss": 0.8326,
"step": 198500
},
{
"epoch": 0.28,
"learning_rate": 4.536506964569723e-05,
"loss": 0.8581,
"step": 199000
},
{
"epoch": 0.28,
"learning_rate": 4.535344300124934e-05,
"loss": 0.8591,
"step": 199500
},
{
"epoch": 0.28,
"learning_rate": 4.5341793056912775e-05,
"loss": 0.8319,
"step": 200000
},
{
"epoch": 0.28,
"learning_rate": 4.5330143112576214e-05,
"loss": 0.8294,
"step": 200500
},
{
"epoch": 0.28,
"learning_rate": 4.5318493168239646e-05,
"loss": 0.8245,
"step": 201000
},
{
"epoch": 0.28,
"learning_rate": 4.530684322390308e-05,
"loss": 0.8229,
"step": 201500
},
{
"epoch": 0.28,
"learning_rate": 4.529519327956651e-05,
"loss": 0.8261,
"step": 202000
},
{
"epoch": 0.28,
"learning_rate": 4.5283543335229944e-05,
"loss": 0.8417,
"step": 202500
},
{
"epoch": 0.28,
"learning_rate": 4.527189339089338e-05,
"loss": 0.8273,
"step": 203000
},
{
"epoch": 0.28,
"learning_rate": 4.5260243446556815e-05,
"loss": 0.825,
"step": 203500
},
{
"epoch": 0.29,
"learning_rate": 4.5248616802108925e-05,
"loss": 0.8281,
"step": 204000
},
{
"epoch": 0.29,
"learning_rate": 4.523699015766103e-05,
"loss": 0.8238,
"step": 204500
},
{
"epoch": 0.29,
"learning_rate": 4.522534021332446e-05,
"loss": 0.8215,
"step": 205000
},
{
"epoch": 0.29,
"learning_rate": 4.52136902689879e-05,
"loss": 0.8256,
"step": 205500
},
{
"epoch": 0.29,
"learning_rate": 4.520204032465133e-05,
"loss": 0.8219,
"step": 206000
},
{
"epoch": 0.29,
"learning_rate": 4.519039038031477e-05,
"loss": 0.8211,
"step": 206500
},
{
"epoch": 0.29,
"learning_rate": 4.51787404359782e-05,
"loss": 0.8247,
"step": 207000
},
{
"epoch": 0.29,
"learning_rate": 4.5167090491641635e-05,
"loss": 0.8256,
"step": 207500
},
{
"epoch": 0.29,
"learning_rate": 4.515546384719374e-05,
"loss": 0.8391,
"step": 208000
},
{
"epoch": 0.29,
"learning_rate": 4.514381390285717e-05,
"loss": 0.8362,
"step": 208500
},
{
"epoch": 0.29,
"learning_rate": 4.513216395852061e-05,
"loss": 0.8285,
"step": 209000
},
{
"epoch": 0.29,
"learning_rate": 4.512051401418404e-05,
"loss": 0.8232,
"step": 209500
},
{
"epoch": 0.29,
"learning_rate": 4.510886406984748e-05,
"loss": 0.8222,
"step": 210000
},
{
"epoch": 0.29,
"learning_rate": 4.5097214125510914e-05,
"loss": 0.8231,
"step": 210500
},
{
"epoch": 0.29,
"learning_rate": 4.5085564181174346e-05,
"loss": 0.8229,
"step": 211000
},
{
"epoch": 0.3,
"learning_rate": 4.507391423683778e-05,
"loss": 0.8194,
"step": 211500
},
{
"epoch": 0.3,
"learning_rate": 4.506226429250121e-05,
"loss": 0.8189,
"step": 212000
},
{
"epoch": 0.3,
"learning_rate": 4.505061434816465e-05,
"loss": 0.8164,
"step": 212500
},
{
"epoch": 0.3,
"learning_rate": 4.503896440382808e-05,
"loss": 0.822,
"step": 213000
},
{
"epoch": 0.3,
"learning_rate": 4.5027314459491515e-05,
"loss": 0.8151,
"step": 213500
},
{
"epoch": 0.3,
"learning_rate": 4.501566451515495e-05,
"loss": 0.8195,
"step": 214000
},
{
"epoch": 0.3,
"learning_rate": 4.500403787070706e-05,
"loss": 0.8306,
"step": 214500
},
{
"epoch": 0.3,
"learning_rate": 4.499238792637049e-05,
"loss": 0.8205,
"step": 215000
},
{
"epoch": 0.3,
"learning_rate": 4.49807612819226e-05,
"loss": 0.8271,
"step": 215500
},
{
"epoch": 0.3,
"learning_rate": 4.496911133758603e-05,
"loss": 0.8212,
"step": 216000
},
{
"epoch": 0.3,
"learning_rate": 4.495746139324947e-05,
"loss": 0.825,
"step": 216500
},
{
"epoch": 0.3,
"learning_rate": 4.49458114489129e-05,
"loss": 0.817,
"step": 217000
},
{
"epoch": 0.3,
"learning_rate": 4.4934161504576335e-05,
"loss": 0.8181,
"step": 217500
},
{
"epoch": 0.3,
"learning_rate": 4.492251156023977e-05,
"loss": 0.8214,
"step": 218000
},
{
"epoch": 0.31,
"learning_rate": 4.49108616159032e-05,
"loss": 0.8148,
"step": 218500
},
{
"epoch": 0.31,
"learning_rate": 4.489921167156664e-05,
"loss": 0.8189,
"step": 219000
},
{
"epoch": 0.31,
"learning_rate": 4.488756172723007e-05,
"loss": 0.8179,
"step": 219500
},
{
"epoch": 0.31,
"learning_rate": 4.4875911782893504e-05,
"loss": 0.8149,
"step": 220000
},
{
"epoch": 0.31,
"learning_rate": 4.4864261838556936e-05,
"loss": 0.82,
"step": 220500
},
{
"epoch": 0.31,
"learning_rate": 4.4852635194109046e-05,
"loss": 0.8152,
"step": 221000
},
{
"epoch": 0.31,
"learning_rate": 4.484098524977248e-05,
"loss": 0.8125,
"step": 221500
},
{
"epoch": 0.31,
"learning_rate": 4.482933530543592e-05,
"loss": 0.8136,
"step": 222000
},
{
"epoch": 0.31,
"learning_rate": 4.481768536109935e-05,
"loss": 0.8138,
"step": 222500
},
{
"epoch": 0.31,
"learning_rate": 4.480603541676278e-05,
"loss": 0.8107,
"step": 223000
},
{
"epoch": 0.31,
"learning_rate": 4.4794385472426214e-05,
"loss": 0.8158,
"step": 223500
},
{
"epoch": 0.31,
"learning_rate": 4.4782735528089647e-05,
"loss": 0.8135,
"step": 224000
},
{
"epoch": 0.31,
"learning_rate": 4.4771108883641756e-05,
"loss": 0.816,
"step": 224500
},
{
"epoch": 0.31,
"learning_rate": 4.475945893930519e-05,
"loss": 0.8199,
"step": 225000
},
{
"epoch": 0.32,
"learning_rate": 4.474780899496863e-05,
"loss": 0.8142,
"step": 225500
},
{
"epoch": 0.32,
"learning_rate": 4.473615905063206e-05,
"loss": 0.8129,
"step": 226000
},
{
"epoch": 0.32,
"learning_rate": 4.472450910629549e-05,
"loss": 0.8195,
"step": 226500
},
{
"epoch": 0.32,
"learning_rate": 4.4712859161958925e-05,
"loss": 0.8131,
"step": 227000
},
{
"epoch": 0.32,
"learning_rate": 4.470120921762236e-05,
"loss": 0.8128,
"step": 227500
},
{
"epoch": 0.32,
"learning_rate": 4.4689559273285796e-05,
"loss": 0.8113,
"step": 228000
},
{
"epoch": 0.32,
"learning_rate": 4.4677932628837906e-05,
"loss": 0.8127,
"step": 228500
},
{
"epoch": 0.32,
"learning_rate": 4.466628268450134e-05,
"loss": 0.814,
"step": 229000
},
{
"epoch": 0.32,
"learning_rate": 4.465463274016477e-05,
"loss": 0.8139,
"step": 229500
},
{
"epoch": 0.32,
"learning_rate": 4.464300609571688e-05,
"loss": 0.8151,
"step": 230000
},
{
"epoch": 0.32,
"learning_rate": 4.463135615138031e-05,
"loss": 0.8121,
"step": 230500
},
{
"epoch": 0.32,
"learning_rate": 4.4619706207043745e-05,
"loss": 0.8115,
"step": 231000
},
{
"epoch": 0.32,
"learning_rate": 4.460805626270718e-05,
"loss": 0.8128,
"step": 231500
},
{
"epoch": 0.32,
"learning_rate": 4.4596406318370617e-05,
"loss": 0.8125,
"step": 232000
},
{
"epoch": 0.33,
"learning_rate": 4.458475637403405e-05,
"loss": 0.8092,
"step": 232500
},
{
"epoch": 0.33,
"learning_rate": 4.457310642969748e-05,
"loss": 0.8085,
"step": 233000
},
{
"epoch": 0.33,
"learning_rate": 4.4561456485360914e-05,
"loss": 0.8093,
"step": 233500
},
{
"epoch": 0.33,
"learning_rate": 4.4549806541024346e-05,
"loss": 0.816,
"step": 234000
},
{
"epoch": 0.33,
"learning_rate": 4.4538203196465126e-05,
"loss": 0.8086,
"step": 234500
},
{
"epoch": 0.33,
"learning_rate": 4.452655325212856e-05,
"loss": 0.8163,
"step": 235000
},
{
"epoch": 0.33,
"learning_rate": 4.4514903307792e-05,
"loss": 0.8117,
"step": 235500
},
{
"epoch": 0.33,
"learning_rate": 4.450325336345543e-05,
"loss": 0.8066,
"step": 236000
},
{
"epoch": 0.33,
"learning_rate": 4.449160341911886e-05,
"loss": 0.815,
"step": 236500
},
{
"epoch": 0.33,
"learning_rate": 4.44799534747823e-05,
"loss": 0.8116,
"step": 237000
},
{
"epoch": 0.33,
"learning_rate": 4.4468326830334404e-05,
"loss": 0.8091,
"step": 237500
},
{
"epoch": 0.33,
"learning_rate": 4.445667688599784e-05,
"loss": 0.8184,
"step": 238000
},
{
"epoch": 0.33,
"learning_rate": 4.4445026941661276e-05,
"loss": 0.8122,
"step": 238500
},
{
"epoch": 0.33,
"learning_rate": 4.4433400297213386e-05,
"loss": 0.8081,
"step": 239000
},
{
"epoch": 0.33,
"learning_rate": 4.442175035287682e-05,
"loss": 0.8166,
"step": 239500
},
{
"epoch": 0.34,
"learning_rate": 4.441012370842893e-05,
"loss": 0.8095,
"step": 240000
},
{
"epoch": 0.34,
"learning_rate": 4.439847376409236e-05,
"loss": 0.8067,
"step": 240500
},
{
"epoch": 0.34,
"learning_rate": 4.438682381975579e-05,
"loss": 0.8133,
"step": 241000
},
{
"epoch": 0.34,
"learning_rate": 4.4375173875419225e-05,
"loss": 0.8031,
"step": 241500
},
{
"epoch": 0.34,
"learning_rate": 4.436352393108266e-05,
"loss": 0.8135,
"step": 242000
},
{
"epoch": 0.34,
"learning_rate": 4.4351873986746096e-05,
"loss": 0.8104,
"step": 242500
},
{
"epoch": 0.34,
"learning_rate": 4.434022404240953e-05,
"loss": 0.8069,
"step": 243000
},
{
"epoch": 0.34,
"learning_rate": 4.432857409807296e-05,
"loss": 0.8095,
"step": 243500
},
{
"epoch": 0.34,
"learning_rate": 4.431692415373639e-05,
"loss": 0.8065,
"step": 244000
},
{
"epoch": 0.34,
"learning_rate": 4.4305274209399826e-05,
"loss": 0.8072,
"step": 244500
},
{
"epoch": 0.34,
"learning_rate": 4.4293647564951935e-05,
"loss": 0.8085,
"step": 245000
},
{
"epoch": 0.34,
"learning_rate": 4.4281997620615374e-05,
"loss": 0.8125,
"step": 245500
},
{
"epoch": 0.34,
"learning_rate": 4.427034767627881e-05,
"loss": 0.8063,
"step": 246000
},
{
"epoch": 0.34,
"learning_rate": 4.425869773194224e-05,
"loss": 0.8096,
"step": 246500
},
{
"epoch": 0.35,
"learning_rate": 4.424704778760567e-05,
"loss": 0.8092,
"step": 247000
},
{
"epoch": 0.35,
"learning_rate": 4.4235397843269104e-05,
"loss": 0.8079,
"step": 247500
},
{
"epoch": 0.35,
"learning_rate": 4.4223771198821214e-05,
"loss": 0.8065,
"step": 248000
},
{
"epoch": 0.35,
"learning_rate": 4.421212125448465e-05,
"loss": 0.8095,
"step": 248500
},
{
"epoch": 0.35,
"learning_rate": 4.4200471310148085e-05,
"loss": 0.8067,
"step": 249000
},
{
"epoch": 0.35,
"learning_rate": 4.418882136581152e-05,
"loss": 0.8038,
"step": 249500
},
{
"epoch": 0.35,
"learning_rate": 4.417719472136363e-05,
"loss": 0.8113,
"step": 250000
},
{
"epoch": 0.35,
"learning_rate": 4.416554477702706e-05,
"loss": 0.8055,
"step": 250500
},
{
"epoch": 0.35,
"learning_rate": 4.415389483269049e-05,
"loss": 0.803,
"step": 251000
},
{
"epoch": 0.35,
"learning_rate": 4.4142244888353924e-05,
"loss": 0.8099,
"step": 251500
},
{
"epoch": 0.35,
"learning_rate": 4.4130594944017363e-05,
"loss": 0.8102,
"step": 252000
},
{
"epoch": 0.35,
"learning_rate": 4.4118968299569466e-05,
"loss": 0.8033,
"step": 252500
},
{
"epoch": 0.35,
"learning_rate": 4.4107341655121576e-05,
"loss": 0.8094,
"step": 253000
},
{
"epoch": 0.35,
"learning_rate": 4.409569171078501e-05,
"loss": 0.8063,
"step": 253500
},
{
"epoch": 0.36,
"learning_rate": 4.408404176644844e-05,
"loss": 0.8095,
"step": 254000
},
{
"epoch": 0.36,
"learning_rate": 4.407239182211187e-05,
"loss": 0.8048,
"step": 254500
},
{
"epoch": 0.36,
"learning_rate": 4.4060741877775305e-05,
"loss": 0.8051,
"step": 255000
},
{
"epoch": 0.36,
"learning_rate": 4.4049091933438745e-05,
"loss": 0.8058,
"step": 255500
},
{
"epoch": 0.36,
"learning_rate": 4.403744198910218e-05,
"loss": 0.8087,
"step": 256000
},
{
"epoch": 0.36,
"learning_rate": 4.402579204476561e-05,
"loss": 0.8039,
"step": 256500
},
{
"epoch": 0.36,
"learning_rate": 4.401414210042905e-05,
"loss": 0.7978,
"step": 257000
},
{
"epoch": 0.36,
"learning_rate": 4.400249215609248e-05,
"loss": 0.8056,
"step": 257500
},
{
"epoch": 0.36,
"learning_rate": 4.399084221175591e-05,
"loss": 0.7997,
"step": 258000
},
{
"epoch": 0.36,
"learning_rate": 4.397919226741935e-05,
"loss": 0.803,
"step": 258500
},
{
"epoch": 0.36,
"learning_rate": 4.3967542323082785e-05,
"loss": 0.802,
"step": 259000
},
{
"epoch": 0.36,
"learning_rate": 4.395589237874622e-05,
"loss": 0.8048,
"step": 259500
},
{
"epoch": 0.36,
"learning_rate": 4.394424243440965e-05,
"loss": 0.801,
"step": 260000
},
{
"epoch": 0.36,
"learning_rate": 4.393259249007308e-05,
"loss": 0.8046,
"step": 260500
},
{
"epoch": 0.36,
"learning_rate": 4.392094254573652e-05,
"loss": 0.8051,
"step": 261000
},
{
"epoch": 0.37,
"learning_rate": 4.390931590128863e-05,
"loss": 0.8089,
"step": 261500
},
{
"epoch": 0.37,
"learning_rate": 4.389766595695206e-05,
"loss": 0.8077,
"step": 262000
},
{
"epoch": 0.37,
"learning_rate": 4.3886016012615495e-05,
"loss": 0.803,
"step": 262500
},
{
"epoch": 0.37,
"learning_rate": 4.387436606827893e-05,
"loss": 0.8043,
"step": 263000
},
{
"epoch": 0.37,
"learning_rate": 4.386271612394236e-05,
"loss": 0.8027,
"step": 263500
},
{
"epoch": 0.37,
"learning_rate": 4.38510661796058e-05,
"loss": 0.8022,
"step": 264000
},
{
"epoch": 0.37,
"learning_rate": 4.383941623526923e-05,
"loss": 0.8056,
"step": 264500
},
{
"epoch": 0.37,
"learning_rate": 4.382778959082134e-05,
"loss": 0.8055,
"step": 265000
},
{
"epoch": 0.37,
"learning_rate": 4.3816139646484774e-05,
"loss": 0.802,
"step": 265500
},
{
"epoch": 0.37,
"learning_rate": 4.3804489702148206e-05,
"loss": 0.7967,
"step": 266000
},
{
"epoch": 0.37,
"learning_rate": 4.379283975781164e-05,
"loss": 0.8052,
"step": 266500
},
{
"epoch": 0.37,
"learning_rate": 4.378118981347507e-05,
"loss": 0.7993,
"step": 267000
},
{
"epoch": 0.37,
"learning_rate": 4.376953986913851e-05,
"loss": 0.7974,
"step": 267500
},
{
"epoch": 0.37,
"learning_rate": 4.375788992480194e-05,
"loss": 0.8015,
"step": 268000
},
{
"epoch": 0.38,
"learning_rate": 4.374626328035405e-05,
"loss": 0.8023,
"step": 268500
},
{
"epoch": 0.38,
"learning_rate": 4.3734613336017484e-05,
"loss": 0.7947,
"step": 269000
},
{
"epoch": 0.38,
"learning_rate": 4.3722963391680917e-05,
"loss": 0.8031,
"step": 269500
},
{
"epoch": 0.38,
"learning_rate": 4.371131344734435e-05,
"loss": 0.801,
"step": 270000
},
{
"epoch": 0.38,
"learning_rate": 4.369966350300779e-05,
"loss": 0.7999,
"step": 270500
},
{
"epoch": 0.38,
"learning_rate": 4.368803685855989e-05,
"loss": 0.8036,
"step": 271000
},
{
"epoch": 0.38,
"learning_rate": 4.367638691422333e-05,
"loss": 0.8005,
"step": 271500
},
{
"epoch": 0.38,
"learning_rate": 4.366473696988676e-05,
"loss": 0.7988,
"step": 272000
},
{
"epoch": 0.38,
"learning_rate": 4.3653087025550195e-05,
"loss": 0.7989,
"step": 272500
},
{
"epoch": 0.38,
"learning_rate": 4.364143708121363e-05,
"loss": 0.7995,
"step": 273000
},
{
"epoch": 0.38,
"learning_rate": 4.362978713687706e-05,
"loss": 0.8034,
"step": 273500
},
{
"epoch": 0.38,
"learning_rate": 4.36181371925405e-05,
"loss": 0.8013,
"step": 274000
},
{
"epoch": 0.38,
"learning_rate": 4.360648724820393e-05,
"loss": 0.7954,
"step": 274500
},
{
"epoch": 0.38,
"learning_rate": 4.359486060375604e-05,
"loss": 0.8002,
"step": 275000
},
{
"epoch": 0.39,
"learning_rate": 4.358321065941947e-05,
"loss": 0.8041,
"step": 275500
},
{
"epoch": 0.39,
"learning_rate": 4.3571560715082905e-05,
"loss": 0.7984,
"step": 276000
},
{
"epoch": 0.39,
"learning_rate": 4.355991077074634e-05,
"loss": 0.7967,
"step": 276500
},
{
"epoch": 0.39,
"learning_rate": 4.354826082640978e-05,
"loss": 0.7998,
"step": 277000
},
{
"epoch": 0.39,
"learning_rate": 4.353661088207321e-05,
"loss": 0.7988,
"step": 277500
},
{
"epoch": 0.39,
"learning_rate": 4.352496093773664e-05,
"loss": 0.7952,
"step": 278000
},
{
"epoch": 0.39,
"learning_rate": 4.3513310993400074e-05,
"loss": 0.7984,
"step": 278500
},
{
"epoch": 0.39,
"learning_rate": 4.350173094872953e-05,
"loss": 0.7991,
"step": 279000
},
{
"epoch": 0.39,
"learning_rate": 4.3490081004392964e-05,
"loss": 0.7961,
"step": 279500
},
{
"epoch": 0.39,
"learning_rate": 4.3478431060056396e-05,
"loss": 0.8015,
"step": 280000
},
{
"epoch": 0.39,
"learning_rate": 4.346678111571983e-05,
"loss": 0.7988,
"step": 280500
},
{
"epoch": 0.39,
"learning_rate": 4.345513117138327e-05,
"loss": 0.7987,
"step": 281000
},
{
"epoch": 0.39,
"learning_rate": 4.34434812270467e-05,
"loss": 0.7979,
"step": 281500
},
{
"epoch": 0.39,
"learning_rate": 4.343183128271013e-05,
"loss": 0.7958,
"step": 282000
},
{
"epoch": 0.39,
"learning_rate": 4.3420181338373565e-05,
"loss": 0.7923,
"step": 282500
},
{
"epoch": 0.4,
"learning_rate": 4.3408531394037e-05,
"loss": 0.8003,
"step": 283000
},
{
"epoch": 0.4,
"learning_rate": 4.339688144970043e-05,
"loss": 0.7944,
"step": 283500
},
{
"epoch": 0.4,
"learning_rate": 4.338523150536387e-05,
"loss": 0.7981,
"step": 284000
},
{
"epoch": 0.4,
"learning_rate": 4.33735815610273e-05,
"loss": 0.7953,
"step": 284500
},
{
"epoch": 0.4,
"learning_rate": 4.336195491657941e-05,
"loss": 0.7931,
"step": 285000
},
{
"epoch": 0.4,
"learning_rate": 4.335030497224284e-05,
"loss": 0.7919,
"step": 285500
},
{
"epoch": 0.4,
"learning_rate": 4.3338655027906276e-05,
"loss": 0.796,
"step": 286000
},
{
"epoch": 0.4,
"learning_rate": 4.332700508356971e-05,
"loss": 0.7923,
"step": 286500
},
{
"epoch": 0.4,
"learning_rate": 4.331537843912182e-05,
"loss": 0.7909,
"step": 287000
},
{
"epoch": 0.4,
"learning_rate": 4.330372849478526e-05,
"loss": 0.8002,
"step": 287500
},
{
"epoch": 0.4,
"learning_rate": 4.329207855044869e-05,
"loss": 0.7961,
"step": 288000
},
{
"epoch": 0.4,
"learning_rate": 4.328042860611212e-05,
"loss": 0.7997,
"step": 288500
},
{
"epoch": 0.4,
"learning_rate": 4.3268778661775554e-05,
"loss": 0.7937,
"step": 289000
},
{
"epoch": 0.4,
"learning_rate": 4.3257175317216334e-05,
"loss": 0.7928,
"step": 289500
},
{
"epoch": 0.41,
"learning_rate": 4.3245525372879766e-05,
"loss": 0.7977,
"step": 290000
},
{
"epoch": 0.41,
"learning_rate": 4.3233875428543205e-05,
"loss": 0.7929,
"step": 290500
},
{
"epoch": 0.41,
"learning_rate": 4.322222548420664e-05,
"loss": 0.7884,
"step": 291000
},
{
"epoch": 0.41,
"learning_rate": 4.321057553987008e-05,
"loss": 0.7985,
"step": 291500
},
{
"epoch": 0.41,
"learning_rate": 4.319894889542218e-05,
"loss": 0.7942,
"step": 292000
},
{
"epoch": 0.41,
"learning_rate": 4.318732225097429e-05,
"loss": 0.7968,
"step": 292500
},
{
"epoch": 0.41,
"learning_rate": 4.317567230663772e-05,
"loss": 0.797,
"step": 293000
},
{
"epoch": 0.41,
"learning_rate": 4.3164022362301154e-05,
"loss": 0.7997,
"step": 293500
},
{
"epoch": 0.41,
"learning_rate": 4.3152372417964586e-05,
"loss": 0.809,
"step": 294000
},
{
"epoch": 0.41,
"learning_rate": 4.3140745773516696e-05,
"loss": 0.7955,
"step": 294500
},
{
"epoch": 0.41,
"learning_rate": 4.3129095829180135e-05,
"loss": 0.7933,
"step": 295000
},
{
"epoch": 0.41,
"learning_rate": 4.311744588484357e-05,
"loss": 0.7966,
"step": 295500
},
{
"epoch": 0.41,
"learning_rate": 4.3105795940507e-05,
"loss": 0.7945,
"step": 296000
},
{
"epoch": 0.41,
"learning_rate": 4.309414599617043e-05,
"loss": 0.7947,
"step": 296500
},
{
"epoch": 0.42,
"learning_rate": 4.3082496051833865e-05,
"loss": 0.797,
"step": 297000
},
{
"epoch": 0.42,
"learning_rate": 4.3070869407385974e-05,
"loss": 0.7984,
"step": 297500
},
{
"epoch": 0.42,
"learning_rate": 4.305921946304941e-05,
"loss": 0.821,
"step": 298000
},
{
"epoch": 0.42,
"learning_rate": 4.3047569518712846e-05,
"loss": 0.8131,
"step": 298500
},
{
"epoch": 0.42,
"learning_rate": 4.303591957437628e-05,
"loss": 0.806,
"step": 299000
},
{
"epoch": 0.42,
"learning_rate": 4.302426963003971e-05,
"loss": 0.8243,
"step": 299500
},
{
"epoch": 0.42,
"learning_rate": 4.301261968570314e-05,
"loss": 0.8011,
"step": 300000
},
{
"epoch": 0.42,
"learning_rate": 4.300099304125525e-05,
"loss": 0.7979,
"step": 300500
},
{
"epoch": 0.42,
"learning_rate": 4.2989343096918685e-05,
"loss": 0.8148,
"step": 301000
},
{
"epoch": 0.42,
"learning_rate": 4.2977693152582124e-05,
"loss": 0.8092,
"step": 301500
},
{
"epoch": 0.42,
"learning_rate": 4.296606650813423e-05,
"loss": 0.8071,
"step": 302000
},
{
"epoch": 0.42,
"learning_rate": 4.295441656379766e-05,
"loss": 0.8033,
"step": 302500
},
{
"epoch": 0.42,
"learning_rate": 4.294276661946109e-05,
"loss": 0.8039,
"step": 303000
},
{
"epoch": 0.42,
"learning_rate": 4.293111667512453e-05,
"loss": 0.7987,
"step": 303500
},
{
"epoch": 0.42,
"learning_rate": 4.291946673078796e-05,
"loss": 0.7926,
"step": 304000
},
{
"epoch": 0.43,
"learning_rate": 4.29078167864514e-05,
"loss": 0.7963,
"step": 304500
},
{
"epoch": 0.43,
"learning_rate": 4.2896166842114835e-05,
"loss": 0.8042,
"step": 305000
},
{
"epoch": 0.43,
"learning_rate": 4.288451689777827e-05,
"loss": 0.8003,
"step": 305500
},
{
"epoch": 0.43,
"learning_rate": 4.28728669534417e-05,
"loss": 0.7982,
"step": 306000
},
{
"epoch": 0.43,
"learning_rate": 4.286121700910513e-05,
"loss": 0.7946,
"step": 306500
},
{
"epoch": 0.43,
"learning_rate": 4.284959036465724e-05,
"loss": 0.7949,
"step": 307000
},
{
"epoch": 0.43,
"learning_rate": 4.2837940420320674e-05,
"loss": 0.7978,
"step": 307500
},
{
"epoch": 0.43,
"learning_rate": 4.282631377587278e-05,
"loss": 0.7875,
"step": 308000
},
{
"epoch": 0.43,
"learning_rate": 4.2814663831536216e-05,
"loss": 0.7979,
"step": 308500
},
{
"epoch": 0.43,
"learning_rate": 4.280301388719965e-05,
"loss": 0.8013,
"step": 309000
},
{
"epoch": 0.43,
"learning_rate": 4.279136394286308e-05,
"loss": 0.8133,
"step": 309500
},
{
"epoch": 0.43,
"learning_rate": 4.277971399852651e-05,
"loss": 0.8316,
"step": 310000
},
{
"epoch": 0.43,
"learning_rate": 4.276808735407862e-05,
"loss": 0.8268,
"step": 310500
},
{
"epoch": 0.43,
"learning_rate": 4.2756437409742055e-05,
"loss": 0.8176,
"step": 311000
},
{
"epoch": 0.44,
"learning_rate": 4.2744787465405494e-05,
"loss": 0.8166,
"step": 311500
},
{
"epoch": 0.44,
"learning_rate": 4.2733137521068927e-05,
"loss": 0.8111,
"step": 312000
},
{
"epoch": 0.44,
"learning_rate": 4.272148757673236e-05,
"loss": 0.8183,
"step": 312500
},
{
"epoch": 0.44,
"learning_rate": 4.270983763239579e-05,
"loss": 0.8156,
"step": 313000
},
{
"epoch": 0.44,
"learning_rate": 4.2698187688059224e-05,
"loss": 0.8149,
"step": 313500
},
{
"epoch": 0.44,
"learning_rate": 4.268653774372266e-05,
"loss": 0.8104,
"step": 314000
},
{
"epoch": 0.44,
"learning_rate": 4.2674887799386095e-05,
"loss": 0.8024,
"step": 314500
},
{
"epoch": 0.44,
"learning_rate": 4.2663237855049534e-05,
"loss": 0.8,
"step": 315000
},
{
"epoch": 0.44,
"learning_rate": 4.265158791071297e-05,
"loss": 0.8098,
"step": 315500
},
{
"epoch": 0.44,
"learning_rate": 4.263996126626507e-05,
"loss": 0.7983,
"step": 316000
},
{
"epoch": 0.44,
"learning_rate": 4.26283113219285e-05,
"loss": 0.8053,
"step": 316500
},
{
"epoch": 0.44,
"learning_rate": 4.2616661377591934e-05,
"loss": 0.8107,
"step": 317000
},
{
"epoch": 0.44,
"learning_rate": 4.2605034733144044e-05,
"loss": 0.8356,
"step": 317500
},
{
"epoch": 0.44,
"learning_rate": 4.259338478880748e-05,
"loss": 0.8317,
"step": 318000
},
{
"epoch": 0.45,
"learning_rate": 4.2581734844470915e-05,
"loss": 0.8162,
"step": 318500
},
{
"epoch": 0.45,
"learning_rate": 4.257008490013435e-05,
"loss": 0.8457,
"step": 319000
},
{
"epoch": 0.45,
"learning_rate": 4.255845825568646e-05,
"loss": 0.8166,
"step": 319500
},
{
"epoch": 0.45,
"learning_rate": 4.254680831134989e-05,
"loss": 0.8169,
"step": 320000
},
{
"epoch": 0.45,
"learning_rate": 4.253515836701332e-05,
"loss": 0.8308,
"step": 320500
},
{
"epoch": 0.45,
"learning_rate": 4.252353172256543e-05,
"loss": 0.8338,
"step": 321000
},
{
"epoch": 0.45,
"learning_rate": 4.251188177822887e-05,
"loss": 0.8385,
"step": 321500
},
{
"epoch": 0.45,
"learning_rate": 4.25002318338923e-05,
"loss": 0.8332,
"step": 322000
},
{
"epoch": 0.45,
"learning_rate": 4.2488581889555736e-05,
"loss": 0.8459,
"step": 322500
},
{
"epoch": 0.45,
"learning_rate": 4.247693194521917e-05,
"loss": 0.7985,
"step": 323000
},
{
"epoch": 0.45,
"learning_rate": 4.24652820008826e-05,
"loss": 0.7977,
"step": 323500
},
{
"epoch": 0.45,
"learning_rate": 4.245363205654603e-05,
"loss": 0.8423,
"step": 324000
},
{
"epoch": 0.45,
"learning_rate": 4.244198211220947e-05,
"loss": 0.8291,
"step": 324500
},
{
"epoch": 0.45,
"learning_rate": 4.2430332167872904e-05,
"loss": 0.8161,
"step": 325000
},
{
"epoch": 0.46,
"learning_rate": 4.2418705523425014e-05,
"loss": 0.8181,
"step": 325500
},
{
"epoch": 0.46,
"learning_rate": 4.2407055579088446e-05,
"loss": 0.8091,
"step": 326000
},
{
"epoch": 0.46,
"learning_rate": 4.239540563475188e-05,
"loss": 0.839,
"step": 326500
},
{
"epoch": 0.46,
"learning_rate": 4.238375569041531e-05,
"loss": 0.8544,
"step": 327000
},
{
"epoch": 0.46,
"learning_rate": 4.237210574607875e-05,
"loss": 0.8582,
"step": 327500
},
{
"epoch": 0.46,
"learning_rate": 4.236047910163086e-05,
"loss": 0.8648,
"step": 328000
},
{
"epoch": 0.46,
"learning_rate": 4.234882915729429e-05,
"loss": 0.8724,
"step": 328500
},
{
"epoch": 0.46,
"learning_rate": 4.2337179212957725e-05,
"loss": 0.8575,
"step": 329000
},
{
"epoch": 0.46,
"learning_rate": 4.232552926862116e-05,
"loss": 0.8607,
"step": 329500
},
{
"epoch": 0.46,
"learning_rate": 4.231387932428459e-05,
"loss": 0.8543,
"step": 330000
},
{
"epoch": 0.46,
"learning_rate": 4.230222937994803e-05,
"loss": 0.8287,
"step": 330500
},
{
"epoch": 0.46,
"learning_rate": 4.229057943561146e-05,
"loss": 0.8257,
"step": 331000
},
{
"epoch": 0.46,
"learning_rate": 4.227895279116357e-05,
"loss": 0.8382,
"step": 331500
},
{
"epoch": 0.46,
"learning_rate": 4.2267302846827e-05,
"loss": 0.8311,
"step": 332000
},
{
"epoch": 0.46,
"learning_rate": 4.2255652902490435e-05,
"loss": 0.8336,
"step": 332500
},
{
"epoch": 0.47,
"learning_rate": 4.224400295815387e-05,
"loss": 0.826,
"step": 333000
},
{
"epoch": 0.47,
"learning_rate": 4.22323530138173e-05,
"loss": 0.8268,
"step": 333500
},
{
"epoch": 0.47,
"learning_rate": 4.222072636936941e-05,
"loss": 0.829,
"step": 334000
},
{
"epoch": 0.47,
"learning_rate": 4.220907642503284e-05,
"loss": 0.829,
"step": 334500
},
{
"epoch": 0.47,
"learning_rate": 4.219742648069628e-05,
"loss": 0.8276,
"step": 335000
},
{
"epoch": 0.47,
"learning_rate": 4.2185776536359714e-05,
"loss": 0.8348,
"step": 335500
},
{
"epoch": 0.47,
"learning_rate": 4.2174126592023146e-05,
"loss": 0.8374,
"step": 336000
},
{
"epoch": 0.47,
"learning_rate": 4.216247664768658e-05,
"loss": 0.8373,
"step": 336500
},
{
"epoch": 0.47,
"learning_rate": 4.215082670335002e-05,
"loss": 0.8385,
"step": 337000
},
{
"epoch": 0.47,
"learning_rate": 4.213917675901345e-05,
"loss": 0.8631,
"step": 337500
},
{
"epoch": 0.47,
"learning_rate": 4.212755011456555e-05,
"loss": 0.8468,
"step": 338000
},
{
"epoch": 0.47,
"learning_rate": 4.211590017022899e-05,
"loss": 0.8287,
"step": 338500
},
{
"epoch": 0.47,
"learning_rate": 4.2104250225892424e-05,
"loss": 0.8369,
"step": 339000
},
{
"epoch": 0.47,
"learning_rate": 4.2092600281555857e-05,
"loss": 0.8269,
"step": 339500
},
{
"epoch": 0.48,
"learning_rate": 4.208095033721929e-05,
"loss": 0.8374,
"step": 340000
},
{
"epoch": 0.48,
"learning_rate": 4.206930039288273e-05,
"loss": 0.8268,
"step": 340500
},
{
"epoch": 0.48,
"learning_rate": 4.205767374843483e-05,
"loss": 0.8253,
"step": 341000
},
{
"epoch": 0.48,
"learning_rate": 4.204602380409827e-05,
"loss": 0.823,
"step": 341500
},
{
"epoch": 0.48,
"learning_rate": 4.203439715965037e-05,
"loss": 0.8634,
"step": 342000
},
{
"epoch": 0.48,
"learning_rate": 4.2022747215313805e-05,
"loss": 0.8651,
"step": 342500
},
{
"epoch": 0.48,
"learning_rate": 4.2011120570865915e-05,
"loss": 0.8343,
"step": 343000
},
{
"epoch": 0.48,
"learning_rate": 4.199949392641802e-05,
"loss": 0.8262,
"step": 343500
},
{
"epoch": 0.48,
"learning_rate": 4.198784398208146e-05,
"loss": 0.8061,
"step": 344000
},
{
"epoch": 0.48,
"learning_rate": 4.197619403774489e-05,
"loss": 0.8069,
"step": 344500
},
{
"epoch": 0.48,
"learning_rate": 4.196454409340833e-05,
"loss": 0.8068,
"step": 345000
},
{
"epoch": 0.48,
"learning_rate": 4.195289414907176e-05,
"loss": 0.8182,
"step": 345500
},
{
"epoch": 0.48,
"learning_rate": 4.194124420473519e-05,
"loss": 0.8081,
"step": 346000
},
{
"epoch": 0.48,
"learning_rate": 4.1929594260398626e-05,
"loss": 0.8062,
"step": 346500
},
{
"epoch": 0.49,
"learning_rate": 4.191794431606206e-05,
"loss": 0.8123,
"step": 347000
},
{
"epoch": 0.49,
"learning_rate": 4.19062943717255e-05,
"loss": 0.8168,
"step": 347500
},
{
"epoch": 0.49,
"learning_rate": 4.189464442738893e-05,
"loss": 0.8224,
"step": 348000
},
{
"epoch": 0.49,
"learning_rate": 4.188299448305236e-05,
"loss": 0.8218,
"step": 348500
},
{
"epoch": 0.49,
"learning_rate": 4.1871344538715794e-05,
"loss": 0.8066,
"step": 349000
},
{
"epoch": 0.49,
"learning_rate": 4.1859694594379227e-05,
"loss": 0.8019,
"step": 349500
},
{
"epoch": 0.49,
"learning_rate": 4.184804465004266e-05,
"loss": 0.8052,
"step": 350000
},
{
"epoch": 0.49,
"learning_rate": 4.18363947057061e-05,
"loss": 0.7851,
"step": 350500
},
{
"epoch": 0.49,
"learning_rate": 4.182474476136953e-05,
"loss": 0.7908,
"step": 351000
},
{
"epoch": 0.49,
"learning_rate": 4.181309481703296e-05,
"loss": 0.7895,
"step": 351500
},
{
"epoch": 0.49,
"learning_rate": 4.18014448726964e-05,
"loss": 0.786,
"step": 352000
},
{
"epoch": 0.49,
"learning_rate": 4.1789818228248505e-05,
"loss": 0.7863,
"step": 352500
},
{
"epoch": 0.49,
"learning_rate": 4.177816828391194e-05,
"loss": 0.7956,
"step": 353000
},
{
"epoch": 0.49,
"learning_rate": 4.176654163946405e-05,
"loss": 0.7947,
"step": 353500
},
{
"epoch": 0.49,
"learning_rate": 4.1754891695127486e-05,
"loss": 0.7881,
"step": 354000
},
{
"epoch": 0.5,
"learning_rate": 4.174324175079092e-05,
"loss": 0.7873,
"step": 354500
},
{
"epoch": 0.5,
"learning_rate": 4.173159180645435e-05,
"loss": 0.7989,
"step": 355000
},
{
"epoch": 0.5,
"learning_rate": 4.171994186211778e-05,
"loss": 0.7978,
"step": 355500
},
{
"epoch": 0.5,
"learning_rate": 4.1708291917781215e-05,
"loss": 0.7814,
"step": 356000
},
{
"epoch": 0.5,
"learning_rate": 4.1696641973444655e-05,
"loss": 0.7898,
"step": 356500
},
{
"epoch": 0.5,
"learning_rate": 4.1685015328996764e-05,
"loss": 0.7904,
"step": 357000
},
{
"epoch": 0.5,
"learning_rate": 4.1673365384660197e-05,
"loss": 0.7859,
"step": 357500
},
{
"epoch": 0.5,
"learning_rate": 4.166171544032363e-05,
"loss": 0.79,
"step": 358000
},
{
"epoch": 0.5,
"learning_rate": 4.165006549598706e-05,
"loss": 0.7886,
"step": 358500
},
{
"epoch": 0.5,
"learning_rate": 4.1638415551650494e-05,
"loss": 0.7877,
"step": 359000
},
{
"epoch": 0.5,
"learning_rate": 4.1626765607313926e-05,
"loss": 0.7856,
"step": 359500
},
{
"epoch": 0.5,
"learning_rate": 4.1615115662977365e-05,
"loss": 0.7807,
"step": 360000
},
{
"epoch": 0.5,
"learning_rate": 4.16034657186408e-05,
"loss": 0.7847,
"step": 360500
},
{
"epoch": 0.5,
"learning_rate": 4.159181577430423e-05,
"loss": 0.787,
"step": 361000
},
{
"epoch": 0.51,
"learning_rate": 4.158016582996766e-05,
"loss": 0.7851,
"step": 361500
},
{
"epoch": 0.51,
"learning_rate": 4.15685158856311e-05,
"loss": 0.787,
"step": 362000
},
{
"epoch": 0.51,
"learning_rate": 4.1556865941294534e-05,
"loss": 0.7851,
"step": 362500
},
{
"epoch": 0.51,
"learning_rate": 4.154521599695797e-05,
"loss": 0.7885,
"step": 363000
},
{
"epoch": 0.51,
"learning_rate": 4.1533566052621405e-05,
"loss": 0.7823,
"step": 363500
},
{
"epoch": 0.51,
"learning_rate": 4.152191610828484e-05,
"loss": 0.7876,
"step": 364000
},
{
"epoch": 0.51,
"learning_rate": 4.151026616394827e-05,
"loss": 0.7874,
"step": 364500
},
{
"epoch": 0.51,
"learning_rate": 4.14986162196117e-05,
"loss": 0.7853,
"step": 365000
},
{
"epoch": 0.51,
"learning_rate": 4.148698957516381e-05,
"loss": 0.7946,
"step": 365500
},
{
"epoch": 0.51,
"learning_rate": 4.147533963082725e-05,
"loss": 0.7879,
"step": 366000
},
{
"epoch": 0.51,
"learning_rate": 4.1463689686490684e-05,
"loss": 0.788,
"step": 366500
},
{
"epoch": 0.51,
"learning_rate": 4.1452063042042787e-05,
"loss": 0.7834,
"step": 367000
},
{
"epoch": 0.51,
"learning_rate": 4.144041309770622e-05,
"loss": 0.7832,
"step": 367500
},
{
"epoch": 0.51,
"learning_rate": 4.142876315336965e-05,
"loss": 0.7851,
"step": 368000
},
{
"epoch": 0.52,
"learning_rate": 4.141711320903309e-05,
"loss": 0.7814,
"step": 368500
},
{
"epoch": 0.52,
"learning_rate": 4.140546326469652e-05,
"loss": 0.7828,
"step": 369000
},
{
"epoch": 0.52,
"learning_rate": 4.139381332035996e-05,
"loss": 0.7884,
"step": 369500
},
{
"epoch": 0.52,
"learning_rate": 4.1382186675912065e-05,
"loss": 0.787,
"step": 370000
},
{
"epoch": 0.52,
"learning_rate": 4.13705367315755e-05,
"loss": 0.7932,
"step": 370500
},
{
"epoch": 0.52,
"learning_rate": 4.135888678723893e-05,
"loss": 0.788,
"step": 371000
},
{
"epoch": 0.52,
"learning_rate": 4.134723684290236e-05,
"loss": 0.7815,
"step": 371500
},
{
"epoch": 0.52,
"learning_rate": 4.13355868985658e-05,
"loss": 0.7907,
"step": 372000
},
{
"epoch": 0.52,
"learning_rate": 4.132396025411791e-05,
"loss": 0.7947,
"step": 372500
},
{
"epoch": 0.52,
"learning_rate": 4.131231030978134e-05,
"loss": 0.7902,
"step": 373000
},
{
"epoch": 0.52,
"learning_rate": 4.1300660365444775e-05,
"loss": 0.7888,
"step": 373500
},
{
"epoch": 0.52,
"learning_rate": 4.128901042110821e-05,
"loss": 0.786,
"step": 374000
},
{
"epoch": 0.52,
"learning_rate": 4.127738377666032e-05,
"loss": 0.7844,
"step": 374500
},
{
"epoch": 0.52,
"learning_rate": 4.126573383232375e-05,
"loss": 0.7838,
"step": 375000
},
{
"epoch": 0.52,
"learning_rate": 4.125408388798718e-05,
"loss": 0.7809,
"step": 375500
},
{
"epoch": 0.53,
"learning_rate": 4.124243394365062e-05,
"loss": 0.7837,
"step": 376000
},
{
"epoch": 0.53,
"learning_rate": 4.123080729920273e-05,
"loss": 0.7841,
"step": 376500
},
{
"epoch": 0.53,
"learning_rate": 4.121915735486616e-05,
"loss": 0.7862,
"step": 377000
},
{
"epoch": 0.53,
"learning_rate": 4.1207507410529596e-05,
"loss": 0.7818,
"step": 377500
},
{
"epoch": 0.53,
"learning_rate": 4.119585746619303e-05,
"loss": 0.7807,
"step": 378000
},
{
"epoch": 0.53,
"learning_rate": 4.118420752185646e-05,
"loss": 0.7823,
"step": 378500
},
{
"epoch": 0.53,
"learning_rate": 4.11725575775199e-05,
"loss": 0.7818,
"step": 379000
},
{
"epoch": 0.53,
"learning_rate": 4.116090763318333e-05,
"loss": 0.7872,
"step": 379500
},
{
"epoch": 0.53,
"learning_rate": 4.114928098873544e-05,
"loss": 0.7896,
"step": 380000
},
{
"epoch": 0.53,
"learning_rate": 4.1137631044398874e-05,
"loss": 0.7799,
"step": 380500
},
{
"epoch": 0.53,
"learning_rate": 4.1125981100062306e-05,
"loss": 0.7825,
"step": 381000
},
{
"epoch": 0.53,
"learning_rate": 4.111433115572574e-05,
"loss": 0.803,
"step": 381500
},
{
"epoch": 0.53,
"learning_rate": 4.110268121138917e-05,
"loss": 0.8095,
"step": 382000
},
{
"epoch": 0.53,
"learning_rate": 4.109103126705261e-05,
"loss": 0.8068,
"step": 382500
},
{
"epoch": 0.54,
"learning_rate": 4.107938132271604e-05,
"loss": 0.7902,
"step": 383000
},
{
"epoch": 0.54,
"learning_rate": 4.1067731378379475e-05,
"loss": 0.7808,
"step": 383500
},
{
"epoch": 0.54,
"learning_rate": 4.105608143404291e-05,
"loss": 0.7997,
"step": 384000
},
{
"epoch": 0.54,
"learning_rate": 4.104445478959502e-05,
"loss": 0.8161,
"step": 384500
},
{
"epoch": 0.54,
"learning_rate": 4.103280484525845e-05,
"loss": 0.7839,
"step": 385000
},
{
"epoch": 0.54,
"learning_rate": 4.102115490092189e-05,
"loss": 0.7872,
"step": 385500
},
{
"epoch": 0.54,
"learning_rate": 4.100950495658532e-05,
"loss": 0.7847,
"step": 386000
},
{
"epoch": 0.54,
"learning_rate": 4.099785501224875e-05,
"loss": 0.78,
"step": 386500
},
{
"epoch": 0.54,
"learning_rate": 4.098622836780086e-05,
"loss": 0.7824,
"step": 387000
},
{
"epoch": 0.54,
"learning_rate": 4.0974578423464295e-05,
"loss": 0.7872,
"step": 387500
},
{
"epoch": 0.54,
"learning_rate": 4.09629517790164e-05,
"loss": 0.7835,
"step": 388000
},
{
"epoch": 0.54,
"learning_rate": 4.095130183467984e-05,
"loss": 0.7871,
"step": 388500
},
{
"epoch": 0.54,
"learning_rate": 4.093965189034327e-05,
"loss": 0.7863,
"step": 389000
},
{
"epoch": 0.54,
"learning_rate": 4.092800194600671e-05,
"loss": 0.7843,
"step": 389500
},
{
"epoch": 0.55,
"learning_rate": 4.091635200167014e-05,
"loss": 0.7778,
"step": 390000
},
{
"epoch": 0.55,
"learning_rate": 4.0904702057333573e-05,
"loss": 0.7853,
"step": 390500
},
{
"epoch": 0.55,
"learning_rate": 4.0893052112997006e-05,
"loss": 0.7824,
"step": 391000
},
{
"epoch": 0.55,
"learning_rate": 4.088140216866044e-05,
"loss": 0.7862,
"step": 391500
},
{
"epoch": 0.55,
"learning_rate": 4.086975222432388e-05,
"loss": 0.7814,
"step": 392000
},
{
"epoch": 0.55,
"learning_rate": 4.085810227998731e-05,
"loss": 0.7866,
"step": 392500
},
{
"epoch": 0.55,
"learning_rate": 4.084645233565074e-05,
"loss": 0.7844,
"step": 393000
},
{
"epoch": 0.55,
"learning_rate": 4.0834802391314174e-05,
"loss": 0.7828,
"step": 393500
},
{
"epoch": 0.55,
"learning_rate": 4.082315244697761e-05,
"loss": 0.7886,
"step": 394000
},
{
"epoch": 0.55,
"learning_rate": 4.0811525802529716e-05,
"loss": 0.7858,
"step": 394500
},
{
"epoch": 0.55,
"learning_rate": 4.0799875858193156e-05,
"loss": 0.7856,
"step": 395000
},
{
"epoch": 0.55,
"learning_rate": 4.078822591385659e-05,
"loss": 0.786,
"step": 395500
},
{
"epoch": 0.55,
"learning_rate": 4.077657596952002e-05,
"loss": 0.7984,
"step": 396000
},
{
"epoch": 0.55,
"learning_rate": 4.076492602518345e-05,
"loss": 0.7928,
"step": 396500
},
{
"epoch": 0.56,
"learning_rate": 4.0753276080846885e-05,
"loss": 0.806,
"step": 397000
},
{
"epoch": 0.56,
"learning_rate": 4.0741649436398995e-05,
"loss": 0.7992,
"step": 397500
},
{
"epoch": 0.56,
"learning_rate": 4.072999949206243e-05,
"loss": 0.8015,
"step": 398000
},
{
"epoch": 0.56,
"learning_rate": 4.0718349547725866e-05,
"loss": 0.8038,
"step": 398500
},
{
"epoch": 0.56,
"learning_rate": 4.07066996033893e-05,
"loss": 0.7918,
"step": 399000
},
{
"epoch": 0.56,
"learning_rate": 4.069504965905273e-05,
"loss": 0.7883,
"step": 399500
},
{
"epoch": 0.56,
"learning_rate": 4.0683399714716163e-05,
"loss": 0.7783,
"step": 400000
},
{
"epoch": 0.56,
"learning_rate": 4.0671749770379596e-05,
"loss": 0.7752,
"step": 400500
},
{
"epoch": 0.56,
"learning_rate": 4.0660099826043035e-05,
"loss": 0.7721,
"step": 401000
},
{
"epoch": 0.56,
"learning_rate": 4.064844988170647e-05,
"loss": 0.7773,
"step": 401500
},
{
"epoch": 0.56,
"learning_rate": 4.06367999373699e-05,
"loss": 0.7706,
"step": 402000
},
{
"epoch": 0.56,
"learning_rate": 4.062514999303333e-05,
"loss": 0.7756,
"step": 402500
},
{
"epoch": 0.56,
"learning_rate": 4.061352334858544e-05,
"loss": 0.7757,
"step": 403000
},
{
"epoch": 0.56,
"learning_rate": 4.0601873404248874e-05,
"loss": 0.7725,
"step": 403500
},
{
"epoch": 0.56,
"learning_rate": 4.059022345991231e-05,
"loss": 0.7725,
"step": 404000
},
{
"epoch": 0.57,
"learning_rate": 4.0578573515575746e-05,
"loss": 0.7754,
"step": 404500
},
{
"epoch": 0.57,
"learning_rate": 4.056692357123918e-05,
"loss": 0.7733,
"step": 405000
},
{
"epoch": 0.57,
"learning_rate": 4.055529692679129e-05,
"loss": 0.7706,
"step": 405500
},
{
"epoch": 0.57,
"learning_rate": 4.054367028234339e-05,
"loss": 0.7704,
"step": 406000
},
{
"epoch": 0.57,
"learning_rate": 4.053202033800683e-05,
"loss": 0.7765,
"step": 406500
},
{
"epoch": 0.57,
"learning_rate": 4.052037039367026e-05,
"loss": 0.7811,
"step": 407000
},
{
"epoch": 0.57,
"learning_rate": 4.0508743749222365e-05,
"loss": 0.7806,
"step": 407500
},
{
"epoch": 0.57,
"learning_rate": 4.04970938048858e-05,
"loss": 0.7753,
"step": 408000
},
{
"epoch": 0.57,
"learning_rate": 4.0485443860549236e-05,
"loss": 0.7761,
"step": 408500
},
{
"epoch": 0.57,
"learning_rate": 4.0473840515990016e-05,
"loss": 0.8088,
"step": 409000
},
{
"epoch": 0.57,
"learning_rate": 4.0462190571653456e-05,
"loss": 0.7892,
"step": 409500
},
{
"epoch": 0.57,
"learning_rate": 4.045054062731689e-05,
"loss": 0.7851,
"step": 410000
},
{
"epoch": 0.57,
"learning_rate": 4.043889068298032e-05,
"loss": 0.7802,
"step": 410500
},
{
"epoch": 0.57,
"learning_rate": 4.042724073864375e-05,
"loss": 0.7767,
"step": 411000
},
{
"epoch": 0.58,
"learning_rate": 4.0415590794307185e-05,
"loss": 0.7801,
"step": 411500
},
{
"epoch": 0.58,
"learning_rate": 4.0403940849970624e-05,
"loss": 0.7739,
"step": 412000
},
{
"epoch": 0.58,
"learning_rate": 4.039231420552273e-05,
"loss": 0.776,
"step": 412500
},
{
"epoch": 0.58,
"learning_rate": 4.0380664261186166e-05,
"loss": 0.7782,
"step": 413000
},
{
"epoch": 0.58,
"learning_rate": 4.03690143168496e-05,
"loss": 0.7742,
"step": 413500
},
{
"epoch": 0.58,
"learning_rate": 4.035736437251303e-05,
"loss": 0.781,
"step": 414000
},
{
"epoch": 0.58,
"learning_rate": 4.034571442817646e-05,
"loss": 0.7775,
"step": 414500
},
{
"epoch": 0.58,
"learning_rate": 4.03340644838399e-05,
"loss": 0.7733,
"step": 415000
},
{
"epoch": 0.58,
"learning_rate": 4.0322414539503335e-05,
"loss": 0.7693,
"step": 415500
},
{
"epoch": 0.58,
"learning_rate": 4.031076459516677e-05,
"loss": 0.7832,
"step": 416000
},
{
"epoch": 0.58,
"learning_rate": 4.02991146508302e-05,
"loss": 0.7757,
"step": 416500
},
{
"epoch": 0.58,
"learning_rate": 4.028746470649363e-05,
"loss": 0.7746,
"step": 417000
},
{
"epoch": 0.58,
"learning_rate": 4.0275814762157064e-05,
"loss": 0.7743,
"step": 417500
},
{
"epoch": 0.58,
"learning_rate": 4.0264164817820503e-05,
"loss": 0.7696,
"step": 418000
},
{
"epoch": 0.59,
"learning_rate": 4.0252514873483936e-05,
"loss": 0.7748,
"step": 418500
},
{
"epoch": 0.59,
"learning_rate": 4.0240888229036045e-05,
"loss": 0.7715,
"step": 419000
},
{
"epoch": 0.59,
"learning_rate": 4.022926158458815e-05,
"loss": 0.776,
"step": 419500
},
{
"epoch": 0.59,
"learning_rate": 4.021761164025159e-05,
"loss": 0.7788,
"step": 420000
},
{
"epoch": 0.59,
"learning_rate": 4.020596169591502e-05,
"loss": 0.8024,
"step": 420500
},
{
"epoch": 0.59,
"learning_rate": 4.019431175157845e-05,
"loss": 0.7791,
"step": 421000
},
{
"epoch": 0.59,
"learning_rate": 4.018266180724189e-05,
"loss": 0.7831,
"step": 421500
},
{
"epoch": 0.59,
"learning_rate": 4.0171011862905324e-05,
"loss": 0.7785,
"step": 422000
},
{
"epoch": 0.59,
"learning_rate": 4.0159385218457427e-05,
"loss": 0.7784,
"step": 422500
},
{
"epoch": 0.59,
"learning_rate": 4.0147735274120866e-05,
"loss": 0.7714,
"step": 423000
},
{
"epoch": 0.59,
"learning_rate": 4.01360853297843e-05,
"loss": 0.7827,
"step": 423500
},
{
"epoch": 0.59,
"learning_rate": 4.01244586853364e-05,
"loss": 0.7722,
"step": 424000
},
{
"epoch": 0.59,
"learning_rate": 4.011280874099983e-05,
"loss": 0.7777,
"step": 424500
},
{
"epoch": 0.59,
"learning_rate": 4.010115879666327e-05,
"loss": 0.7688,
"step": 425000
},
{
"epoch": 0.59,
"learning_rate": 4.0089508852326705e-05,
"loss": 0.7771,
"step": 425500
},
{
"epoch": 0.6,
"learning_rate": 4.007785890799014e-05,
"loss": 0.7729,
"step": 426000
},
{
"epoch": 0.6,
"learning_rate": 4.0066208963653576e-05,
"loss": 0.7732,
"step": 426500
},
{
"epoch": 0.6,
"learning_rate": 4.005455901931701e-05,
"loss": 0.7754,
"step": 427000
},
{
"epoch": 0.6,
"learning_rate": 4.004290907498044e-05,
"loss": 0.7773,
"step": 427500
},
{
"epoch": 0.6,
"learning_rate": 4.003125913064388e-05,
"loss": 0.7773,
"step": 428000
},
{
"epoch": 0.6,
"learning_rate": 4.001963248619598e-05,
"loss": 0.7772,
"step": 428500
},
{
"epoch": 0.6,
"learning_rate": 4.0007982541859415e-05,
"loss": 0.7723,
"step": 429000
},
{
"epoch": 0.6,
"learning_rate": 3.9996355897411525e-05,
"loss": 0.7829,
"step": 429500
},
{
"epoch": 0.6,
"learning_rate": 3.998470595307496e-05,
"loss": 0.7803,
"step": 430000
},
{
"epoch": 0.6,
"learning_rate": 3.997305600873839e-05,
"loss": 0.7911,
"step": 430500
},
{
"epoch": 0.6,
"learning_rate": 3.996140606440182e-05,
"loss": 0.7775,
"step": 431000
},
{
"epoch": 0.6,
"learning_rate": 3.994975612006526e-05,
"loss": 0.7801,
"step": 431500
},
{
"epoch": 0.6,
"learning_rate": 3.9938106175728694e-05,
"loss": 0.7775,
"step": 432000
},
{
"epoch": 0.6,
"learning_rate": 3.9926456231392126e-05,
"loss": 0.7758,
"step": 432500
},
{
"epoch": 0.61,
"learning_rate": 3.991480628705556e-05,
"loss": 0.7754,
"step": 433000
},
{
"epoch": 0.61,
"learning_rate": 3.9903156342719e-05,
"loss": 0.7784,
"step": 433500
},
{
"epoch": 0.61,
"learning_rate": 3.989150639838243e-05,
"loss": 0.7754,
"step": 434000
},
{
"epoch": 0.61,
"learning_rate": 3.987987975393454e-05,
"loss": 0.7753,
"step": 434500
},
{
"epoch": 0.61,
"learning_rate": 3.986822980959797e-05,
"loss": 0.7779,
"step": 435000
},
{
"epoch": 0.61,
"learning_rate": 3.9856579865261404e-05,
"loss": 0.7766,
"step": 435500
},
{
"epoch": 0.61,
"learning_rate": 3.984492992092484e-05,
"loss": 0.7806,
"step": 436000
},
{
"epoch": 0.61,
"learning_rate": 3.983327997658827e-05,
"loss": 0.7814,
"step": 436500
},
{
"epoch": 0.61,
"learning_rate": 3.982163003225171e-05,
"loss": 0.7784,
"step": 437000
},
{
"epoch": 0.61,
"learning_rate": 3.980998008791515e-05,
"loss": 0.7692,
"step": 437500
},
{
"epoch": 0.61,
"learning_rate": 3.979833014357858e-05,
"loss": 0.7721,
"step": 438000
},
{
"epoch": 0.61,
"learning_rate": 3.978668019924201e-05,
"loss": 0.7697,
"step": 438500
},
{
"epoch": 0.61,
"learning_rate": 3.9775030254905445e-05,
"loss": 0.7706,
"step": 439000
},
{
"epoch": 0.61,
"learning_rate": 3.976338031056888e-05,
"loss": 0.7728,
"step": 439500
},
{
"epoch": 0.62,
"learning_rate": 3.975173036623231e-05,
"loss": 0.774,
"step": 440000
},
{
"epoch": 0.62,
"learning_rate": 3.974008042189575e-05,
"loss": 0.7686,
"step": 440500
},
{
"epoch": 0.62,
"learning_rate": 3.972847707733653e-05,
"loss": 0.7732,
"step": 441000
},
{
"epoch": 0.62,
"learning_rate": 3.971682713299996e-05,
"loss": 0.7715,
"step": 441500
},
{
"epoch": 0.62,
"learning_rate": 3.970517718866339e-05,
"loss": 0.773,
"step": 442000
},
{
"epoch": 0.62,
"learning_rate": 3.9693527244326826e-05,
"loss": 0.7728,
"step": 442500
},
{
"epoch": 0.62,
"learning_rate": 3.968187729999026e-05,
"loss": 0.7694,
"step": 443000
},
{
"epoch": 0.62,
"learning_rate": 3.96702273556537e-05,
"loss": 0.773,
"step": 443500
},
{
"epoch": 0.62,
"learning_rate": 3.965857741131713e-05,
"loss": 0.7704,
"step": 444000
},
{
"epoch": 0.62,
"learning_rate": 3.964692746698057e-05,
"loss": 0.776,
"step": 444500
},
{
"epoch": 0.62,
"learning_rate": 3.9635277522644e-05,
"loss": 0.7741,
"step": 445000
},
{
"epoch": 0.62,
"learning_rate": 3.9623627578307433e-05,
"loss": 0.7656,
"step": 445500
},
{
"epoch": 0.62,
"learning_rate": 3.9611977633970866e-05,
"loss": 0.7709,
"step": 446000
},
{
"epoch": 0.62,
"learning_rate": 3.9600374289411646e-05,
"loss": 0.7688,
"step": 446500
},
{
"epoch": 0.62,
"learning_rate": 3.958872434507508e-05,
"loss": 0.7756,
"step": 447000
},
{
"epoch": 0.63,
"learning_rate": 3.957707440073852e-05,
"loss": 0.7668,
"step": 447500
},
{
"epoch": 0.63,
"learning_rate": 3.956542445640195e-05,
"loss": 0.7665,
"step": 448000
},
{
"epoch": 0.63,
"learning_rate": 3.955377451206538e-05,
"loss": 0.7719,
"step": 448500
},
{
"epoch": 0.63,
"learning_rate": 3.9542124567728815e-05,
"loss": 0.7662,
"step": 449000
},
{
"epoch": 0.63,
"learning_rate": 3.953047462339225e-05,
"loss": 0.7666,
"step": 449500
},
{
"epoch": 0.63,
"learning_rate": 3.9518847978944357e-05,
"loss": 0.7697,
"step": 450000
},
{
"epoch": 0.63,
"learning_rate": 3.9507198034607796e-05,
"loss": 0.769,
"step": 450500
},
{
"epoch": 0.63,
"learning_rate": 3.949554809027123e-05,
"loss": 0.7706,
"step": 451000
},
{
"epoch": 0.63,
"learning_rate": 3.948389814593466e-05,
"loss": 0.7684,
"step": 451500
},
{
"epoch": 0.63,
"learning_rate": 3.947224820159809e-05,
"loss": 0.7676,
"step": 452000
},
{
"epoch": 0.63,
"learning_rate": 3.9460598257261525e-05,
"loss": 0.771,
"step": 452500
},
{
"epoch": 0.63,
"learning_rate": 3.944894831292496e-05,
"loss": 0.7712,
"step": 453000
},
{
"epoch": 0.63,
"learning_rate": 3.94372983685884e-05,
"loss": 0.7688,
"step": 453500
},
{
"epoch": 0.63,
"learning_rate": 3.942564842425183e-05,
"loss": 0.771,
"step": 454000
},
{
"epoch": 0.64,
"learning_rate": 3.941399847991527e-05,
"loss": 0.7656,
"step": 454500
},
{
"epoch": 0.64,
"learning_rate": 3.940237183546737e-05,
"loss": 0.7647,
"step": 455000
},
{
"epoch": 0.64,
"learning_rate": 3.9390721891130803e-05,
"loss": 0.7624,
"step": 455500
},
{
"epoch": 0.64,
"learning_rate": 3.9379071946794236e-05,
"loss": 0.7673,
"step": 456000
},
{
"epoch": 0.64,
"learning_rate": 3.9367422002457675e-05,
"loss": 0.7634,
"step": 456500
},
{
"epoch": 0.64,
"learning_rate": 3.935577205812111e-05,
"loss": 0.7618,
"step": 457000
},
{
"epoch": 0.64,
"learning_rate": 3.934412211378454e-05,
"loss": 0.7607,
"step": 457500
},
{
"epoch": 0.64,
"learning_rate": 3.933247216944798e-05,
"loss": 0.7593,
"step": 458000
},
{
"epoch": 0.64,
"learning_rate": 3.932082222511141e-05,
"loss": 0.7635,
"step": 458500
},
{
"epoch": 0.64,
"learning_rate": 3.9309172280774844e-05,
"loss": 0.757,
"step": 459000
},
{
"epoch": 0.64,
"learning_rate": 3.929752233643828e-05,
"loss": 0.7646,
"step": 459500
},
{
"epoch": 0.64,
"learning_rate": 3.9285895691990386e-05,
"loss": 0.761,
"step": 460000
},
{
"epoch": 0.64,
"learning_rate": 3.927424574765382e-05,
"loss": 0.7659,
"step": 460500
},
{
"epoch": 0.64,
"learning_rate": 3.926261910320593e-05,
"loss": 0.7607,
"step": 461000
},
{
"epoch": 0.65,
"learning_rate": 3.925096915886936e-05,
"loss": 0.766,
"step": 461500
},
{
"epoch": 0.65,
"learning_rate": 3.923931921453279e-05,
"loss": 0.7602,
"step": 462000
},
{
"epoch": 0.65,
"learning_rate": 3.9227669270196225e-05,
"loss": 0.766,
"step": 462500
},
{
"epoch": 0.65,
"learning_rate": 3.9216042625748334e-05,
"loss": 0.7637,
"step": 463000
},
{
"epoch": 0.65,
"learning_rate": 3.9204392681411773e-05,
"loss": 0.7652,
"step": 463500
},
{
"epoch": 0.65,
"learning_rate": 3.9192742737075206e-05,
"loss": 0.7648,
"step": 464000
},
{
"epoch": 0.65,
"learning_rate": 3.918109279273864e-05,
"loss": 0.7639,
"step": 464500
},
{
"epoch": 0.65,
"learning_rate": 3.916944284840207e-05,
"loss": 0.766,
"step": 465000
},
{
"epoch": 0.65,
"learning_rate": 3.91577929040655e-05,
"loss": 0.769,
"step": 465500
},
{
"epoch": 0.65,
"learning_rate": 3.914616625961761e-05,
"loss": 0.7665,
"step": 466000
},
{
"epoch": 0.65,
"learning_rate": 3.913451631528105e-05,
"loss": 0.7663,
"step": 466500
},
{
"epoch": 0.65,
"learning_rate": 3.9122866370944484e-05,
"loss": 0.7672,
"step": 467000
},
{
"epoch": 0.65,
"learning_rate": 3.9111216426607916e-05,
"loss": 0.7622,
"step": 467500
},
{
"epoch": 0.65,
"learning_rate": 3.9099613082048697e-05,
"loss": 0.7655,
"step": 468000
},
{
"epoch": 0.65,
"learning_rate": 3.908796313771213e-05,
"loss": 0.7534,
"step": 468500
},
{
"epoch": 0.66,
"learning_rate": 3.907631319337556e-05,
"loss": 0.7596,
"step": 469000
},
{
"epoch": 0.66,
"learning_rate": 3.9064663249038994e-05,
"loss": 0.7588,
"step": 469500
},
{
"epoch": 0.66,
"learning_rate": 3.9053013304702426e-05,
"loss": 0.7607,
"step": 470000
},
{
"epoch": 0.66,
"learning_rate": 3.9041363360365865e-05,
"loss": 0.765,
"step": 470500
},
{
"epoch": 0.66,
"learning_rate": 3.90297134160293e-05,
"loss": 0.7609,
"step": 471000
},
{
"epoch": 0.66,
"learning_rate": 3.901806347169274e-05,
"loss": 0.7629,
"step": 471500
},
{
"epoch": 0.66,
"learning_rate": 3.900641352735617e-05,
"loss": 0.7656,
"step": 472000
},
{
"epoch": 0.66,
"learning_rate": 3.89947635830196e-05,
"loss": 0.7621,
"step": 472500
},
{
"epoch": 0.66,
"learning_rate": 3.898311363868304e-05,
"loss": 0.7606,
"step": 473000
},
{
"epoch": 0.66,
"learning_rate": 3.897146369434647e-05,
"loss": 0.7645,
"step": 473500
},
{
"epoch": 0.66,
"learning_rate": 3.8959813750009905e-05,
"loss": 0.7626,
"step": 474000
},
{
"epoch": 0.66,
"learning_rate": 3.894816380567334e-05,
"loss": 0.758,
"step": 474500
},
{
"epoch": 0.66,
"learning_rate": 3.893651386133677e-05,
"loss": 0.762,
"step": 475000
},
{
"epoch": 0.66,
"learning_rate": 3.892488721688888e-05,
"loss": 0.7597,
"step": 475500
},
{
"epoch": 0.67,
"learning_rate": 3.891323727255231e-05,
"loss": 0.7611,
"step": 476000
},
{
"epoch": 0.67,
"learning_rate": 3.890158732821575e-05,
"loss": 0.7588,
"step": 476500
},
{
"epoch": 0.67,
"learning_rate": 3.8889937383879184e-05,
"loss": 0.7607,
"step": 477000
},
{
"epoch": 0.67,
"learning_rate": 3.8878287439542616e-05,
"loss": 0.7602,
"step": 477500
},
{
"epoch": 0.67,
"learning_rate": 3.886663749520605e-05,
"loss": 0.7598,
"step": 478000
},
{
"epoch": 0.67,
"learning_rate": 3.885501085075816e-05,
"loss": 0.7653,
"step": 478500
},
{
"epoch": 0.67,
"learning_rate": 3.884338420631026e-05,
"loss": 0.7645,
"step": 479000
},
{
"epoch": 0.67,
"learning_rate": 3.883175756186237e-05,
"loss": 0.7676,
"step": 479500
},
{
"epoch": 0.67,
"learning_rate": 3.88201076175258e-05,
"loss": 0.768,
"step": 480000
},
{
"epoch": 0.67,
"learning_rate": 3.880845767318924e-05,
"loss": 0.7636,
"step": 480500
},
{
"epoch": 0.67,
"learning_rate": 3.8796807728852674e-05,
"loss": 0.7617,
"step": 481000
},
{
"epoch": 0.67,
"learning_rate": 3.878515778451611e-05,
"loss": 0.7571,
"step": 481500
},
{
"epoch": 0.67,
"learning_rate": 3.877350784017954e-05,
"loss": 0.7628,
"step": 482000
},
{
"epoch": 0.67,
"learning_rate": 3.876185789584297e-05,
"loss": 0.7676,
"step": 482500
},
{
"epoch": 0.68,
"learning_rate": 3.875020795150641e-05,
"loss": 0.7659,
"step": 483000
},
{
"epoch": 0.68,
"learning_rate": 3.873858130705852e-05,
"loss": 0.7647,
"step": 483500
},
{
"epoch": 0.68,
"learning_rate": 3.872695466261062e-05,
"loss": 0.779,
"step": 484000
},
{
"epoch": 0.68,
"learning_rate": 3.871532801816273e-05,
"loss": 0.7699,
"step": 484500
},
{
"epoch": 0.68,
"learning_rate": 3.8703678073826165e-05,
"loss": 0.7635,
"step": 485000
},
{
"epoch": 0.68,
"learning_rate": 3.86920281294896e-05,
"loss": 0.76,
"step": 485500
},
{
"epoch": 0.68,
"learning_rate": 3.868037818515303e-05,
"loss": 0.7626,
"step": 486000
},
{
"epoch": 0.68,
"learning_rate": 3.866875154070514e-05,
"loss": 0.7594,
"step": 486500
},
{
"epoch": 0.68,
"learning_rate": 3.865710159636857e-05,
"loss": 0.7585,
"step": 487000
},
{
"epoch": 0.68,
"learning_rate": 3.864545165203201e-05,
"loss": 0.7597,
"step": 487500
},
{
"epoch": 0.68,
"learning_rate": 3.863380170769544e-05,
"loss": 0.7558,
"step": 488000
},
{
"epoch": 0.68,
"learning_rate": 3.8622151763358876e-05,
"loss": 0.7594,
"step": 488500
},
{
"epoch": 0.68,
"learning_rate": 3.861050181902231e-05,
"loss": 0.7607,
"step": 489000
},
{
"epoch": 0.68,
"learning_rate": 3.859885187468574e-05,
"loss": 0.7572,
"step": 489500
},
{
"epoch": 0.69,
"learning_rate": 3.858720193034917e-05,
"loss": 0.7626,
"step": 490000
},
{
"epoch": 0.69,
"learning_rate": 3.857555198601261e-05,
"loss": 0.7603,
"step": 490500
},
{
"epoch": 0.69,
"learning_rate": 3.8563902041676044e-05,
"loss": 0.7659,
"step": 491000
},
{
"epoch": 0.69,
"learning_rate": 3.8552252097339484e-05,
"loss": 0.7642,
"step": 491500
},
{
"epoch": 0.69,
"learning_rate": 3.8540625452891586e-05,
"loss": 0.7595,
"step": 492000
},
{
"epoch": 0.69,
"learning_rate": 3.852897550855502e-05,
"loss": 0.7585,
"step": 492500
},
{
"epoch": 0.69,
"learning_rate": 3.851732556421845e-05,
"loss": 0.7628,
"step": 493000
},
{
"epoch": 0.69,
"learning_rate": 3.850567561988189e-05,
"loss": 0.7599,
"step": 493500
},
{
"epoch": 0.69,
"learning_rate": 3.849402567554532e-05,
"loss": 0.7542,
"step": 494000
},
{
"epoch": 0.69,
"learning_rate": 3.848237573120876e-05,
"loss": 0.7566,
"step": 494500
},
{
"epoch": 0.69,
"learning_rate": 3.8470725786872194e-05,
"loss": 0.7602,
"step": 495000
},
{
"epoch": 0.69,
"learning_rate": 3.8459075842535627e-05,
"loss": 0.7634,
"step": 495500
},
{
"epoch": 0.69,
"learning_rate": 3.844742589819906e-05,
"loss": 0.7639,
"step": 496000
},
{
"epoch": 0.69,
"learning_rate": 3.843579925375117e-05,
"loss": 0.7817,
"step": 496500
},
{
"epoch": 0.69,
"learning_rate": 3.84241493094146e-05,
"loss": 0.765,
"step": 497000
},
{
"epoch": 0.7,
"learning_rate": 3.841249936507803e-05,
"loss": 0.7583,
"step": 497500
},
{
"epoch": 0.7,
"learning_rate": 3.840084942074147e-05,
"loss": 0.7596,
"step": 498000
},
{
"epoch": 0.7,
"learning_rate": 3.8389199476404905e-05,
"loss": 0.7565,
"step": 498500
},
{
"epoch": 0.7,
"learning_rate": 3.837757283195701e-05,
"loss": 0.7573,
"step": 499000
},
{
"epoch": 0.7,
"learning_rate": 3.836592288762044e-05,
"loss": 0.7626,
"step": 499500
},
{
"epoch": 0.7,
"learning_rate": 3.835427294328388e-05,
"loss": 0.7613,
"step": 500000
},
{
"epoch": 0.7,
"learning_rate": 3.834262299894731e-05,
"loss": 0.7626,
"step": 500500
},
{
"epoch": 0.7,
"learning_rate": 3.833099635449942e-05,
"loss": 0.7662,
"step": 501000
},
{
"epoch": 0.7,
"learning_rate": 3.8319346410162854e-05,
"loss": 0.7616,
"step": 501500
},
{
"epoch": 0.7,
"learning_rate": 3.8307696465826286e-05,
"loss": 0.7705,
"step": 502000
},
{
"epoch": 0.7,
"learning_rate": 3.8296069821378396e-05,
"loss": 0.789,
"step": 502500
},
{
"epoch": 0.7,
"learning_rate": 3.828441987704183e-05,
"loss": 0.7674,
"step": 503000
},
{
"epoch": 0.7,
"learning_rate": 3.827279323259394e-05,
"loss": 0.7605,
"step": 503500
},
{
"epoch": 0.7,
"learning_rate": 3.826114328825737e-05,
"loss": 0.7618,
"step": 504000
},
{
"epoch": 0.71,
"learning_rate": 3.824949334392081e-05,
"loss": 0.7926,
"step": 504500
},
{
"epoch": 0.71,
"learning_rate": 3.823784339958424e-05,
"loss": 0.7731,
"step": 505000
},
{
"epoch": 0.71,
"learning_rate": 3.8226193455247674e-05,
"loss": 0.7662,
"step": 505500
},
{
"epoch": 0.71,
"learning_rate": 3.8214543510911106e-05,
"loss": 0.7591,
"step": 506000
},
{
"epoch": 0.71,
"learning_rate": 3.820289356657454e-05,
"loss": 0.7589,
"step": 506500
},
{
"epoch": 0.71,
"learning_rate": 3.819124362223798e-05,
"loss": 0.7617,
"step": 507000
},
{
"epoch": 0.71,
"learning_rate": 3.817959367790141e-05,
"loss": 0.7713,
"step": 507500
},
{
"epoch": 0.71,
"learning_rate": 3.816796703345352e-05,
"loss": 0.7646,
"step": 508000
},
{
"epoch": 0.71,
"learning_rate": 3.815631708911695e-05,
"loss": 0.7692,
"step": 508500
},
{
"epoch": 0.71,
"learning_rate": 3.8144690444669055e-05,
"loss": 0.7763,
"step": 509000
},
{
"epoch": 0.71,
"learning_rate": 3.813304050033249e-05,
"loss": 0.7661,
"step": 509500
},
{
"epoch": 0.71,
"learning_rate": 3.812139055599592e-05,
"loss": 0.7688,
"step": 510000
},
{
"epoch": 0.71,
"learning_rate": 3.810974061165936e-05,
"loss": 0.7744,
"step": 510500
},
{
"epoch": 0.71,
"learning_rate": 3.809809066732279e-05,
"loss": 0.8432,
"step": 511000
},
{
"epoch": 0.72,
"learning_rate": 3.808644072298623e-05,
"loss": 0.7715,
"step": 511500
},
{
"epoch": 0.72,
"learning_rate": 3.807479077864966e-05,
"loss": 0.77,
"step": 512000
},
{
"epoch": 0.72,
"learning_rate": 3.8063140834313095e-05,
"loss": 0.7677,
"step": 512500
},
{
"epoch": 0.72,
"learning_rate": 3.80515141898652e-05,
"loss": 0.7692,
"step": 513000
},
{
"epoch": 0.72,
"learning_rate": 3.803986424552864e-05,
"loss": 0.7634,
"step": 513500
},
{
"epoch": 0.72,
"learning_rate": 3.802821430119207e-05,
"loss": 0.7664,
"step": 514000
},
{
"epoch": 0.72,
"learning_rate": 3.80165643568555e-05,
"loss": 0.7629,
"step": 514500
},
{
"epoch": 0.72,
"learning_rate": 3.800491441251894e-05,
"loss": 0.761,
"step": 515000
},
{
"epoch": 0.72,
"learning_rate": 3.799326446818237e-05,
"loss": 0.7586,
"step": 515500
},
{
"epoch": 0.72,
"learning_rate": 3.7981614523845806e-05,
"loss": 0.7567,
"step": 516000
},
{
"epoch": 0.72,
"learning_rate": 3.7969964579509245e-05,
"loss": 0.7585,
"step": 516500
},
{
"epoch": 0.72,
"learning_rate": 3.795831463517268e-05,
"loss": 0.7553,
"step": 517000
},
{
"epoch": 0.72,
"learning_rate": 3.794666469083611e-05,
"loss": 0.7562,
"step": 517500
},
{
"epoch": 0.72,
"learning_rate": 3.793501474649954e-05,
"loss": 0.7585,
"step": 518000
},
{
"epoch": 0.72,
"learning_rate": 3.792338810205165e-05,
"loss": 0.7655,
"step": 518500
},
{
"epoch": 0.73,
"learning_rate": 3.7911738157715084e-05,
"loss": 0.7572,
"step": 519000
},
{
"epoch": 0.73,
"learning_rate": 3.790008821337852e-05,
"loss": 0.7553,
"step": 519500
},
{
"epoch": 0.73,
"learning_rate": 3.7888438269041956e-05,
"loss": 0.7555,
"step": 520000
},
{
"epoch": 0.73,
"learning_rate": 3.787678832470539e-05,
"loss": 0.7544,
"step": 520500
},
{
"epoch": 0.73,
"learning_rate": 3.786513838036882e-05,
"loss": 0.7523,
"step": 521000
},
{
"epoch": 0.73,
"learning_rate": 3.785351173592093e-05,
"loss": 0.7588,
"step": 521500
},
{
"epoch": 0.73,
"learning_rate": 3.784188509147303e-05,
"loss": 0.7585,
"step": 522000
},
{
"epoch": 0.73,
"learning_rate": 3.7830235147136465e-05,
"loss": 0.762,
"step": 522500
},
{
"epoch": 0.73,
"learning_rate": 3.7818585202799904e-05,
"loss": 0.7634,
"step": 523000
},
{
"epoch": 0.73,
"learning_rate": 3.7806981858240684e-05,
"loss": 0.809,
"step": 523500
},
{
"epoch": 0.73,
"learning_rate": 3.779533191390412e-05,
"loss": 0.7722,
"step": 524000
},
{
"epoch": 0.73,
"learning_rate": 3.7783681969567556e-05,
"loss": 0.7643,
"step": 524500
},
{
"epoch": 0.73,
"learning_rate": 3.777203202523099e-05,
"loss": 0.7642,
"step": 525000
},
{
"epoch": 0.73,
"learning_rate": 3.776038208089442e-05,
"loss": 0.7601,
"step": 525500
},
{
"epoch": 0.74,
"learning_rate": 3.774873213655785e-05,
"loss": 0.7588,
"step": 526000
},
{
"epoch": 0.74,
"learning_rate": 3.7737105492109956e-05,
"loss": 0.7585,
"step": 526500
},
{
"epoch": 0.74,
"learning_rate": 3.7725455547773395e-05,
"loss": 0.7676,
"step": 527000
},
{
"epoch": 0.74,
"learning_rate": 3.771380560343683e-05,
"loss": 0.7628,
"step": 527500
},
{
"epoch": 0.74,
"learning_rate": 3.7702155659100266e-05,
"loss": 0.7688,
"step": 528000
},
{
"epoch": 0.74,
"learning_rate": 3.76905057147637e-05,
"loss": 0.7663,
"step": 528500
},
{
"epoch": 0.74,
"learning_rate": 3.767885577042713e-05,
"loss": 0.7603,
"step": 529000
},
{
"epoch": 0.74,
"learning_rate": 3.7667205826090564e-05,
"loss": 0.7616,
"step": 529500
},
{
"epoch": 0.74,
"learning_rate": 3.7655555881754e-05,
"loss": 0.7589,
"step": 530000
},
{
"epoch": 0.74,
"learning_rate": 3.7643905937417435e-05,
"loss": 0.7646,
"step": 530500
},
{
"epoch": 0.74,
"learning_rate": 3.763227929296954e-05,
"loss": 0.768,
"step": 531000
},
{
"epoch": 0.74,
"learning_rate": 3.762062934863298e-05,
"loss": 0.7853,
"step": 531500
},
{
"epoch": 0.74,
"learning_rate": 3.760897940429641e-05,
"loss": 0.8162,
"step": 532000
},
{
"epoch": 0.74,
"learning_rate": 3.759732945995984e-05,
"loss": 0.7718,
"step": 532500
},
{
"epoch": 0.75,
"learning_rate": 3.758567951562328e-05,
"loss": 0.7669,
"step": 533000
},
{
"epoch": 0.75,
"learning_rate": 3.7574029571286713e-05,
"loss": 0.7724,
"step": 533500
},
{
"epoch": 0.75,
"learning_rate": 3.7562402926838816e-05,
"loss": 0.7688,
"step": 534000
},
{
"epoch": 0.75,
"learning_rate": 3.755075298250225e-05,
"loss": 0.7658,
"step": 534500
},
{
"epoch": 0.75,
"learning_rate": 3.753910303816569e-05,
"loss": 0.759,
"step": 535000
},
{
"epoch": 0.75,
"learning_rate": 3.752745309382912e-05,
"loss": 0.7589,
"step": 535500
},
{
"epoch": 0.75,
"learning_rate": 3.751580314949255e-05,
"loss": 0.7532,
"step": 536000
},
{
"epoch": 0.75,
"learning_rate": 3.750415320515599e-05,
"loss": 0.7542,
"step": 536500
},
{
"epoch": 0.75,
"learning_rate": 3.7492526560708095e-05,
"loss": 0.7556,
"step": 537000
},
{
"epoch": 0.75,
"learning_rate": 3.748087661637153e-05,
"loss": 0.7538,
"step": 537500
},
{
"epoch": 0.75,
"learning_rate": 3.7469226672034966e-05,
"loss": 0.7576,
"step": 538000
},
{
"epoch": 0.75,
"learning_rate": 3.74575767276984e-05,
"loss": 0.7719,
"step": 538500
},
{
"epoch": 0.75,
"learning_rate": 3.74459500832505e-05,
"loss": 0.7708,
"step": 539000
},
{
"epoch": 0.75,
"learning_rate": 3.743432343880261e-05,
"loss": 0.7702,
"step": 539500
},
{
"epoch": 0.75,
"learning_rate": 3.742267349446604e-05,
"loss": 0.7647,
"step": 540000
},
{
"epoch": 0.76,
"learning_rate": 3.741102355012948e-05,
"loss": 0.7661,
"step": 540500
},
{
"epoch": 0.76,
"learning_rate": 3.7399373605792915e-05,
"loss": 0.7664,
"step": 541000
},
{
"epoch": 0.76,
"learning_rate": 3.738772366145635e-05,
"loss": 0.7601,
"step": 541500
},
{
"epoch": 0.76,
"learning_rate": 3.737607371711978e-05,
"loss": 0.7611,
"step": 542000
},
{
"epoch": 0.76,
"learning_rate": 3.736442377278321e-05,
"loss": 0.7577,
"step": 542500
},
{
"epoch": 0.76,
"learning_rate": 3.735277382844665e-05,
"loss": 0.7568,
"step": 543000
},
{
"epoch": 0.76,
"learning_rate": 3.7341123884110083e-05,
"loss": 0.7585,
"step": 543500
},
{
"epoch": 0.76,
"learning_rate": 3.7329473939773516e-05,
"loss": 0.7563,
"step": 544000
},
{
"epoch": 0.76,
"learning_rate": 3.731782399543695e-05,
"loss": 0.7664,
"step": 544500
},
{
"epoch": 0.76,
"learning_rate": 3.730617405110039e-05,
"loss": 0.761,
"step": 545000
},
{
"epoch": 0.76,
"learning_rate": 3.729452410676382e-05,
"loss": 0.7524,
"step": 545500
},
{
"epoch": 0.76,
"learning_rate": 3.728287416242726e-05,
"loss": 0.7572,
"step": 546000
},
{
"epoch": 0.76,
"learning_rate": 3.727122421809069e-05,
"loss": 0.7625,
"step": 546500
},
{
"epoch": 0.76,
"learning_rate": 3.7259574273754124e-05,
"loss": 0.7609,
"step": 547000
},
{
"epoch": 0.77,
"learning_rate": 3.7247924329417556e-05,
"loss": 0.7564,
"step": 547500
},
{
"epoch": 0.77,
"learning_rate": 3.723627438508099e-05,
"loss": 0.7585,
"step": 548000
},
{
"epoch": 0.77,
"learning_rate": 3.722462444074443e-05,
"loss": 0.7576,
"step": 548500
},
{
"epoch": 0.77,
"learning_rate": 3.721297449640786e-05,
"loss": 0.7591,
"step": 549000
},
{
"epoch": 0.77,
"learning_rate": 3.720132455207129e-05,
"loss": 0.7579,
"step": 549500
},
{
"epoch": 0.77,
"learning_rate": 3.718972120751207e-05,
"loss": 0.7559,
"step": 550000
},
{
"epoch": 0.77,
"learning_rate": 3.717809456306418e-05,
"loss": 0.7588,
"step": 550500
},
{
"epoch": 0.77,
"learning_rate": 3.7166444618727614e-05,
"loss": 0.755,
"step": 551000
},
{
"epoch": 0.77,
"learning_rate": 3.7154817974279724e-05,
"loss": 0.7565,
"step": 551500
},
{
"epoch": 0.77,
"learning_rate": 3.7143168029943156e-05,
"loss": 0.7546,
"step": 552000
},
{
"epoch": 0.77,
"learning_rate": 3.713151808560659e-05,
"loss": 0.7571,
"step": 552500
},
{
"epoch": 0.77,
"learning_rate": 3.711986814127003e-05,
"loss": 0.7535,
"step": 553000
},
{
"epoch": 0.77,
"learning_rate": 3.710821819693346e-05,
"loss": 0.7545,
"step": 553500
},
{
"epoch": 0.77,
"learning_rate": 3.709659155248556e-05,
"loss": 0.7536,
"step": 554000
},
{
"epoch": 0.78,
"learning_rate": 3.7084941608148995e-05,
"loss": 0.7565,
"step": 554500
},
{
"epoch": 0.78,
"learning_rate": 3.7073291663812435e-05,
"loss": 0.7509,
"step": 555000
},
{
"epoch": 0.78,
"learning_rate": 3.706164171947587e-05,
"loss": 0.7554,
"step": 555500
},
{
"epoch": 0.78,
"learning_rate": 3.70499917751393e-05,
"loss": 0.7624,
"step": 556000
},
{
"epoch": 0.78,
"learning_rate": 3.703834183080274e-05,
"loss": 0.7503,
"step": 556500
},
{
"epoch": 0.78,
"learning_rate": 3.702669188646617e-05,
"loss": 0.7528,
"step": 557000
},
{
"epoch": 0.78,
"learning_rate": 3.70150419421296e-05,
"loss": 0.7537,
"step": 557500
},
{
"epoch": 0.78,
"learning_rate": 3.7003391997793036e-05,
"loss": 0.7563,
"step": 558000
},
{
"epoch": 0.78,
"learning_rate": 3.699174205345647e-05,
"loss": 0.7533,
"step": 558500
},
{
"epoch": 0.78,
"learning_rate": 3.698011540900858e-05,
"loss": 0.7503,
"step": 559000
},
{
"epoch": 0.78,
"learning_rate": 3.696846546467202e-05,
"loss": 0.7522,
"step": 559500
},
{
"epoch": 0.78,
"learning_rate": 3.695681552033545e-05,
"loss": 0.7426,
"step": 560000
},
{
"epoch": 0.78,
"learning_rate": 3.694516557599888e-05,
"loss": 0.7523,
"step": 560500
},
{
"epoch": 0.78,
"learning_rate": 3.6933538931550984e-05,
"loss": 0.7557,
"step": 561000
},
{
"epoch": 0.78,
"learning_rate": 3.6921912287103094e-05,
"loss": 0.7583,
"step": 561500
},
{
"epoch": 0.79,
"learning_rate": 3.6910262342766526e-05,
"loss": 0.7595,
"step": 562000
},
{
"epoch": 0.79,
"learning_rate": 3.689861239842996e-05,
"loss": 0.7562,
"step": 562500
},
{
"epoch": 0.79,
"learning_rate": 3.688696245409339e-05,
"loss": 0.7552,
"step": 563000
},
{
"epoch": 0.79,
"learning_rate": 3.687531250975683e-05,
"loss": 0.7538,
"step": 563500
},
{
"epoch": 0.79,
"learning_rate": 3.686366256542026e-05,
"loss": 0.7509,
"step": 564000
},
{
"epoch": 0.79,
"learning_rate": 3.6852012621083695e-05,
"loss": 0.7508,
"step": 564500
},
{
"epoch": 0.79,
"learning_rate": 3.6840362676747134e-05,
"loss": 0.7509,
"step": 565000
},
{
"epoch": 0.79,
"learning_rate": 3.6828712732410566e-05,
"loss": 0.7525,
"step": 565500
},
{
"epoch": 0.79,
"learning_rate": 3.6817062788074006e-05,
"loss": 0.75,
"step": 566000
},
{
"epoch": 0.79,
"learning_rate": 3.680541284373744e-05,
"loss": 0.7504,
"step": 566500
},
{
"epoch": 0.79,
"learning_rate": 3.679376289940087e-05,
"loss": 0.7468,
"step": 567000
},
{
"epoch": 0.79,
"learning_rate": 3.67821129550643e-05,
"loss": 0.7523,
"step": 567500
},
{
"epoch": 0.79,
"learning_rate": 3.6770463010727735e-05,
"loss": 0.7502,
"step": 568000
},
{
"epoch": 0.79,
"learning_rate": 3.675881306639117e-05,
"loss": 0.7525,
"step": 568500
},
{
"epoch": 0.8,
"learning_rate": 3.674716312205461e-05,
"loss": 0.7487,
"step": 569000
},
{
"epoch": 0.8,
"learning_rate": 3.673551317771804e-05,
"loss": 0.7443,
"step": 569500
},
{
"epoch": 0.8,
"learning_rate": 3.672388653327015e-05,
"loss": 0.7508,
"step": 570000
},
{
"epoch": 0.8,
"learning_rate": 3.671223658893358e-05,
"loss": 0.7477,
"step": 570500
},
{
"epoch": 0.8,
"learning_rate": 3.6700586644597013e-05,
"loss": 0.7492,
"step": 571000
},
{
"epoch": 0.8,
"learning_rate": 3.6688936700260446e-05,
"loss": 0.7507,
"step": 571500
},
{
"epoch": 0.8,
"learning_rate": 3.6677286755923885e-05,
"loss": 0.7468,
"step": 572000
},
{
"epoch": 0.8,
"learning_rate": 3.666563681158732e-05,
"loss": 0.7429,
"step": 572500
},
{
"epoch": 0.8,
"learning_rate": 3.665398686725075e-05,
"loss": 0.7482,
"step": 573000
},
{
"epoch": 0.8,
"learning_rate": 3.664233692291418e-05,
"loss": 0.7509,
"step": 573500
},
{
"epoch": 0.8,
"learning_rate": 3.6630686978577614e-05,
"loss": 0.748,
"step": 574000
},
{
"epoch": 0.8,
"learning_rate": 3.6619060334129724e-05,
"loss": 0.7517,
"step": 574500
},
{
"epoch": 0.8,
"learning_rate": 3.6607433689681834e-05,
"loss": 0.7454,
"step": 575000
},
{
"epoch": 0.8,
"learning_rate": 3.6595783745345266e-05,
"loss": 0.7448,
"step": 575500
},
{
"epoch": 0.81,
"learning_rate": 3.6584133801008705e-05,
"loss": 0.7461,
"step": 576000
},
{
"epoch": 0.81,
"learning_rate": 3.657248385667214e-05,
"loss": 0.7464,
"step": 576500
},
{
"epoch": 0.81,
"learning_rate": 3.656083391233557e-05,
"loss": 0.7469,
"step": 577000
},
{
"epoch": 0.81,
"learning_rate": 3.6549183967999e-05,
"loss": 0.743,
"step": 577500
},
{
"epoch": 0.81,
"learning_rate": 3.6537534023662435e-05,
"loss": 0.7531,
"step": 578000
},
{
"epoch": 0.81,
"learning_rate": 3.6525884079325874e-05,
"loss": 0.7455,
"step": 578500
},
{
"epoch": 0.81,
"learning_rate": 3.6514234134989306e-05,
"loss": 0.746,
"step": 579000
},
{
"epoch": 0.81,
"learning_rate": 3.650258419065274e-05,
"loss": 0.743,
"step": 579500
},
{
"epoch": 0.81,
"learning_rate": 3.649093424631617e-05,
"loss": 0.7446,
"step": 580000
},
{
"epoch": 0.81,
"learning_rate": 3.64792843019796e-05,
"loss": 0.7436,
"step": 580500
},
{
"epoch": 0.81,
"learning_rate": 3.646765765753171e-05,
"loss": 0.7461,
"step": 581000
},
{
"epoch": 0.81,
"learning_rate": 3.645600771319515e-05,
"loss": 0.7433,
"step": 581500
},
{
"epoch": 0.81,
"learning_rate": 3.6444357768858584e-05,
"loss": 0.7447,
"step": 582000
},
{
"epoch": 0.81,
"learning_rate": 3.643270782452202e-05,
"loss": 0.7462,
"step": 582500
},
{
"epoch": 0.82,
"learning_rate": 3.642105788018545e-05,
"loss": 0.7442,
"step": 583000
},
{
"epoch": 0.82,
"learning_rate": 3.640943123573756e-05,
"loss": 0.7412,
"step": 583500
},
{
"epoch": 0.82,
"learning_rate": 3.639778129140099e-05,
"loss": 0.742,
"step": 584000
},
{
"epoch": 0.82,
"learning_rate": 3.6386131347064424e-05,
"loss": 0.7478,
"step": 584500
},
{
"epoch": 0.82,
"learning_rate": 3.637450470261653e-05,
"loss": 0.7434,
"step": 585000
},
{
"epoch": 0.82,
"learning_rate": 3.636287805816864e-05,
"loss": 0.7487,
"step": 585500
},
{
"epoch": 0.82,
"learning_rate": 3.6351228113832075e-05,
"loss": 0.744,
"step": 586000
},
{
"epoch": 0.82,
"learning_rate": 3.6339601469384185e-05,
"loss": 0.7445,
"step": 586500
},
{
"epoch": 0.82,
"learning_rate": 3.632795152504762e-05,
"loss": 0.7438,
"step": 587000
},
{
"epoch": 0.82,
"learning_rate": 3.631630158071105e-05,
"loss": 0.7505,
"step": 587500
},
{
"epoch": 0.82,
"learning_rate": 3.630465163637448e-05,
"loss": 0.7444,
"step": 588000
},
{
"epoch": 0.82,
"learning_rate": 3.6293001692037914e-05,
"loss": 0.7484,
"step": 588500
},
{
"epoch": 0.82,
"learning_rate": 3.6281351747701353e-05,
"loss": 0.75,
"step": 589000
},
{
"epoch": 0.82,
"learning_rate": 3.6269701803364786e-05,
"loss": 0.7474,
"step": 589500
},
{
"epoch": 0.82,
"learning_rate": 3.6258075158916895e-05,
"loss": 0.7424,
"step": 590000
},
{
"epoch": 0.83,
"learning_rate": 3.6246448514469e-05,
"loss": 0.7483,
"step": 590500
},
{
"epoch": 0.83,
"learning_rate": 3.623479857013243e-05,
"loss": 0.7471,
"step": 591000
},
{
"epoch": 0.83,
"learning_rate": 3.622314862579586e-05,
"loss": 0.7421,
"step": 591500
},
{
"epoch": 0.83,
"learning_rate": 3.621152198134797e-05,
"loss": 0.7468,
"step": 592000
},
{
"epoch": 0.83,
"learning_rate": 3.6199872037011405e-05,
"loss": 0.7415,
"step": 592500
},
{
"epoch": 0.83,
"learning_rate": 3.6188222092674844e-05,
"loss": 0.7481,
"step": 593000
},
{
"epoch": 0.83,
"learning_rate": 3.6176572148338277e-05,
"loss": 0.7499,
"step": 593500
},
{
"epoch": 0.83,
"learning_rate": 3.616492220400171e-05,
"loss": 0.7443,
"step": 594000
},
{
"epoch": 0.83,
"learning_rate": 3.615327225966514e-05,
"loss": 0.7481,
"step": 594500
},
{
"epoch": 0.83,
"learning_rate": 3.614164561521725e-05,
"loss": 0.7456,
"step": 595000
},
{
"epoch": 0.83,
"learning_rate": 3.612999567088068e-05,
"loss": 0.7415,
"step": 595500
},
{
"epoch": 0.83,
"learning_rate": 3.611834572654412e-05,
"loss": 0.743,
"step": 596000
},
{
"epoch": 0.83,
"learning_rate": 3.6106695782207555e-05,
"loss": 0.7469,
"step": 596500
},
{
"epoch": 0.83,
"learning_rate": 3.609504583787099e-05,
"loss": 0.7473,
"step": 597000
},
{
"epoch": 0.84,
"learning_rate": 3.608339589353442e-05,
"loss": 0.741,
"step": 597500
},
{
"epoch": 0.84,
"learning_rate": 3.607174594919785e-05,
"loss": 0.75,
"step": 598000
},
{
"epoch": 0.84,
"learning_rate": 3.606009600486129e-05,
"loss": 0.7441,
"step": 598500
},
{
"epoch": 0.84,
"learning_rate": 3.6048469360413394e-05,
"loss": 0.7456,
"step": 599000
},
{
"epoch": 0.84,
"learning_rate": 3.603684271596551e-05,
"loss": 0.7458,
"step": 599500
},
{
"epoch": 0.84,
"learning_rate": 3.602519277162894e-05,
"loss": 0.7469,
"step": 600000
},
{
"epoch": 0.84,
"learning_rate": 3.6013542827292375e-05,
"loss": 0.7484,
"step": 600500
},
{
"epoch": 0.84,
"learning_rate": 3.600189288295581e-05,
"loss": 0.7463,
"step": 601000
},
{
"epoch": 0.84,
"learning_rate": 3.599024293861924e-05,
"loss": 0.7442,
"step": 601500
},
{
"epoch": 0.84,
"learning_rate": 3.597859299428267e-05,
"loss": 0.7382,
"step": 602000
},
{
"epoch": 0.84,
"learning_rate": 3.596694304994611e-05,
"loss": 0.7455,
"step": 602500
},
{
"epoch": 0.84,
"learning_rate": 3.5955293105609544e-05,
"loss": 0.7459,
"step": 603000
},
{
"epoch": 0.84,
"learning_rate": 3.5943643161272976e-05,
"loss": 0.7404,
"step": 603500
},
{
"epoch": 0.84,
"learning_rate": 3.593199321693641e-05,
"loss": 0.741,
"step": 604000
},
{
"epoch": 0.85,
"learning_rate": 3.592034327259984e-05,
"loss": 0.7449,
"step": 604500
},
{
"epoch": 0.85,
"learning_rate": 3.590869332826328e-05,
"loss": 0.7494,
"step": 605000
},
{
"epoch": 0.85,
"learning_rate": 3.589704338392671e-05,
"loss": 0.7427,
"step": 605500
},
{
"epoch": 0.85,
"learning_rate": 3.5885393439590145e-05,
"loss": 0.7445,
"step": 606000
},
{
"epoch": 0.85,
"learning_rate": 3.5873743495253584e-05,
"loss": 0.7414,
"step": 606500
},
{
"epoch": 0.85,
"learning_rate": 3.5862093550917016e-05,
"loss": 0.7412,
"step": 607000
},
{
"epoch": 0.85,
"learning_rate": 3.585044360658045e-05,
"loss": 0.7432,
"step": 607500
},
{
"epoch": 0.85,
"learning_rate": 3.583881696213255e-05,
"loss": 0.7586,
"step": 608000
},
{
"epoch": 0.85,
"learning_rate": 3.582719031768466e-05,
"loss": 0.7483,
"step": 608500
},
{
"epoch": 0.85,
"learning_rate": 3.581556367323677e-05,
"loss": 0.7401,
"step": 609000
},
{
"epoch": 0.85,
"learning_rate": 3.580391372890021e-05,
"loss": 0.7446,
"step": 609500
},
{
"epoch": 0.85,
"learning_rate": 3.579226378456364e-05,
"loss": 0.7418,
"step": 610000
},
{
"epoch": 0.85,
"learning_rate": 3.5780613840227075e-05,
"loss": 0.7411,
"step": 610500
},
{
"epoch": 0.85,
"learning_rate": 3.576896389589051e-05,
"loss": 0.7431,
"step": 611000
},
{
"epoch": 0.85,
"learning_rate": 3.575731395155394e-05,
"loss": 0.7452,
"step": 611500
},
{
"epoch": 0.86,
"learning_rate": 3.574568730710605e-05,
"loss": 0.7424,
"step": 612000
},
{
"epoch": 0.86,
"learning_rate": 3.573403736276948e-05,
"loss": 0.7438,
"step": 612500
},
{
"epoch": 0.86,
"learning_rate": 3.572238741843292e-05,
"loss": 0.7427,
"step": 613000
},
{
"epoch": 0.86,
"learning_rate": 3.571073747409635e-05,
"loss": 0.7442,
"step": 613500
},
{
"epoch": 0.86,
"learning_rate": 3.5699087529759785e-05,
"loss": 0.745,
"step": 614000
},
{
"epoch": 0.86,
"learning_rate": 3.568743758542322e-05,
"loss": 0.7485,
"step": 614500
},
{
"epoch": 0.86,
"learning_rate": 3.567578764108665e-05,
"loss": 0.7445,
"step": 615000
},
{
"epoch": 0.86,
"learning_rate": 3.566413769675009e-05,
"loss": 0.7467,
"step": 615500
},
{
"epoch": 0.86,
"learning_rate": 3.56525110523022e-05,
"loss": 0.7397,
"step": 616000
},
{
"epoch": 0.86,
"learning_rate": 3.564086110796563e-05,
"loss": 0.7442,
"step": 616500
},
{
"epoch": 0.86,
"learning_rate": 3.5629211163629064e-05,
"loss": 0.7415,
"step": 617000
},
{
"epoch": 0.86,
"learning_rate": 3.5617584519181166e-05,
"loss": 0.7451,
"step": 617500
},
{
"epoch": 0.86,
"learning_rate": 3.56059345748446e-05,
"loss": 0.7447,
"step": 618000
},
{
"epoch": 0.86,
"learning_rate": 3.559428463050804e-05,
"loss": 0.7402,
"step": 618500
},
{
"epoch": 0.87,
"learning_rate": 3.558263468617147e-05,
"loss": 0.7419,
"step": 619000
},
{
"epoch": 0.87,
"learning_rate": 3.557098474183491e-05,
"loss": 0.7419,
"step": 619500
},
{
"epoch": 0.87,
"learning_rate": 3.555933479749834e-05,
"loss": 0.7372,
"step": 620000
},
{
"epoch": 0.87,
"learning_rate": 3.5547684853161774e-05,
"loss": 0.7416,
"step": 620500
},
{
"epoch": 0.87,
"learning_rate": 3.5536034908825207e-05,
"loss": 0.7508,
"step": 621000
},
{
"epoch": 0.87,
"learning_rate": 3.552440826437731e-05,
"loss": 0.7427,
"step": 621500
},
{
"epoch": 0.87,
"learning_rate": 3.551275832004075e-05,
"loss": 0.7553,
"step": 622000
},
{
"epoch": 0.87,
"learning_rate": 3.550110837570418e-05,
"loss": 0.7431,
"step": 622500
},
{
"epoch": 0.87,
"learning_rate": 3.548948173125629e-05,
"loss": 0.7429,
"step": 623000
},
{
"epoch": 0.87,
"learning_rate": 3.547783178691972e-05,
"loss": 0.7445,
"step": 623500
},
{
"epoch": 0.87,
"learning_rate": 3.546620514247183e-05,
"loss": 0.7428,
"step": 624000
},
{
"epoch": 0.87,
"learning_rate": 3.5454555198135265e-05,
"loss": 0.7404,
"step": 624500
},
{
"epoch": 0.87,
"learning_rate": 3.54429052537987e-05,
"loss": 0.7428,
"step": 625000
},
{
"epoch": 0.87,
"learning_rate": 3.5431255309462136e-05,
"loss": 0.7454,
"step": 625500
},
{
"epoch": 0.88,
"learning_rate": 3.541960536512557e-05,
"loss": 0.7431,
"step": 626000
},
{
"epoch": 0.88,
"learning_rate": 3.5407955420789e-05,
"loss": 0.7449,
"step": 626500
},
{
"epoch": 0.88,
"learning_rate": 3.5396305476452434e-05,
"loss": 0.7501,
"step": 627000
},
{
"epoch": 0.88,
"learning_rate": 3.5384655532115866e-05,
"loss": 0.7417,
"step": 627500
},
{
"epoch": 0.88,
"learning_rate": 3.53730055877793e-05,
"loss": 0.7425,
"step": 628000
},
{
"epoch": 0.88,
"learning_rate": 3.536135564344274e-05,
"loss": 0.7457,
"step": 628500
},
{
"epoch": 0.88,
"learning_rate": 3.534970569910617e-05,
"loss": 0.7378,
"step": 629000
},
{
"epoch": 0.88,
"learning_rate": 3.533805575476961e-05,
"loss": 0.7445,
"step": 629500
},
{
"epoch": 0.88,
"learning_rate": 3.532640581043304e-05,
"loss": 0.7372,
"step": 630000
},
{
"epoch": 0.88,
"learning_rate": 3.5314755866096474e-05,
"loss": 0.7676,
"step": 630500
},
{
"epoch": 0.88,
"learning_rate": 3.530310592175991e-05,
"loss": 0.7583,
"step": 631000
},
{
"epoch": 0.88,
"learning_rate": 3.5291455977423345e-05,
"loss": 0.7405,
"step": 631500
},
{
"epoch": 0.88,
"learning_rate": 3.527980603308678e-05,
"loss": 0.7347,
"step": 632000
},
{
"epoch": 0.88,
"learning_rate": 3.526817938863888e-05,
"loss": 0.7391,
"step": 632500
},
{
"epoch": 0.88,
"learning_rate": 3.525652944430232e-05,
"loss": 0.7357,
"step": 633000
},
{
"epoch": 0.89,
"learning_rate": 3.524487949996575e-05,
"loss": 0.7388,
"step": 633500
},
{
"epoch": 0.89,
"learning_rate": 3.5233229555629184e-05,
"loss": 0.7448,
"step": 634000
},
{
"epoch": 0.89,
"learning_rate": 3.5221579611292623e-05,
"loss": 0.7372,
"step": 634500
},
{
"epoch": 0.89,
"learning_rate": 3.5209929666956056e-05,
"loss": 0.7379,
"step": 635000
},
{
"epoch": 0.89,
"learning_rate": 3.519827972261949e-05,
"loss": 0.7445,
"step": 635500
},
{
"epoch": 0.89,
"learning_rate": 3.518662977828292e-05,
"loss": 0.7409,
"step": 636000
},
{
"epoch": 0.89,
"learning_rate": 3.517497983394635e-05,
"loss": 0.7337,
"step": 636500
},
{
"epoch": 0.89,
"learning_rate": 3.516332988960979e-05,
"loss": 0.7405,
"step": 637000
},
{
"epoch": 0.89,
"learning_rate": 3.5151679945273225e-05,
"loss": 0.7388,
"step": 637500
},
{
"epoch": 0.89,
"learning_rate": 3.514003000093666e-05,
"loss": 0.7402,
"step": 638000
},
{
"epoch": 0.89,
"learning_rate": 3.512838005660009e-05,
"loss": 0.7424,
"step": 638500
},
{
"epoch": 0.89,
"learning_rate": 3.51167534121522e-05,
"loss": 0.7403,
"step": 639000
},
{
"epoch": 0.89,
"learning_rate": 3.510510346781563e-05,
"loss": 0.7342,
"step": 639500
},
{
"epoch": 0.89,
"learning_rate": 3.509347682336774e-05,
"loss": 0.7364,
"step": 640000
},
{
"epoch": 0.9,
"learning_rate": 3.508182687903117e-05,
"loss": 0.7413,
"step": 640500
},
{
"epoch": 0.9,
"learning_rate": 3.507017693469461e-05,
"loss": 0.7447,
"step": 641000
},
{
"epoch": 0.9,
"learning_rate": 3.5058526990358045e-05,
"loss": 0.7366,
"step": 641500
},
{
"epoch": 0.9,
"learning_rate": 3.504687704602148e-05,
"loss": 0.7382,
"step": 642000
},
{
"epoch": 0.9,
"learning_rate": 3.503522710168491e-05,
"loss": 0.7383,
"step": 642500
},
{
"epoch": 0.9,
"learning_rate": 3.502360045723702e-05,
"loss": 0.7369,
"step": 643000
},
{
"epoch": 0.9,
"learning_rate": 3.501195051290045e-05,
"loss": 0.7393,
"step": 643500
},
{
"epoch": 0.9,
"learning_rate": 3.5000323868452554e-05,
"loss": 0.7413,
"step": 644000
},
{
"epoch": 0.9,
"learning_rate": 3.4988673924115994e-05,
"loss": 0.7318,
"step": 644500
},
{
"epoch": 0.9,
"learning_rate": 3.4977023979779426e-05,
"loss": 0.7414,
"step": 645000
},
{
"epoch": 0.9,
"learning_rate": 3.496537403544286e-05,
"loss": 0.7384,
"step": 645500
},
{
"epoch": 0.9,
"learning_rate": 3.495372409110629e-05,
"loss": 0.7379,
"step": 646000
},
{
"epoch": 0.9,
"learning_rate": 3.494207414676973e-05,
"loss": 0.7382,
"step": 646500
},
{
"epoch": 0.9,
"learning_rate": 3.493042420243316e-05,
"loss": 0.7359,
"step": 647000
},
{
"epoch": 0.91,
"learning_rate": 3.49187742580966e-05,
"loss": 0.7391,
"step": 647500
},
{
"epoch": 0.91,
"learning_rate": 3.4907124313760034e-05,
"loss": 0.7401,
"step": 648000
},
{
"epoch": 0.91,
"learning_rate": 3.4895474369423466e-05,
"loss": 0.7341,
"step": 648500
},
{
"epoch": 0.91,
"learning_rate": 3.48838244250869e-05,
"loss": 0.7383,
"step": 649000
},
{
"epoch": 0.91,
"learning_rate": 3.487217448075033e-05,
"loss": 0.7351,
"step": 649500
},
{
"epoch": 0.91,
"learning_rate": 3.486054783630244e-05,
"loss": 0.7383,
"step": 650000
},
{
"epoch": 0.91,
"learning_rate": 3.484889789196587e-05,
"loss": 0.7362,
"step": 650500
},
{
"epoch": 0.91,
"learning_rate": 3.483727124751798e-05,
"loss": 0.7369,
"step": 651000
},
{
"epoch": 0.91,
"learning_rate": 3.4825621303181415e-05,
"loss": 0.7356,
"step": 651500
},
{
"epoch": 0.91,
"learning_rate": 3.481397135884485e-05,
"loss": 0.7379,
"step": 652000
},
{
"epoch": 0.91,
"learning_rate": 3.480232141450828e-05,
"loss": 0.7419,
"step": 652500
},
{
"epoch": 0.91,
"learning_rate": 3.479067147017171e-05,
"loss": 0.7343,
"step": 653000
},
{
"epoch": 0.91,
"learning_rate": 3.477904482572382e-05,
"loss": 0.7387,
"step": 653500
},
{
"epoch": 0.91,
"learning_rate": 3.476741818127593e-05,
"loss": 0.7362,
"step": 654000
},
{
"epoch": 0.91,
"learning_rate": 3.475576823693937e-05,
"loss": 0.7391,
"step": 654500
},
{
"epoch": 0.92,
"learning_rate": 3.47441182926028e-05,
"loss": 0.7476,
"step": 655000
},
{
"epoch": 0.92,
"learning_rate": 3.4732468348266235e-05,
"loss": 0.7382,
"step": 655500
},
{
"epoch": 0.92,
"learning_rate": 3.472081840392967e-05,
"loss": 0.7402,
"step": 656000
},
{
"epoch": 0.92,
"learning_rate": 3.47091684595931e-05,
"loss": 0.7347,
"step": 656500
},
{
"epoch": 0.92,
"learning_rate": 3.469751851525654e-05,
"loss": 0.7413,
"step": 657000
},
{
"epoch": 0.92,
"learning_rate": 3.468586857091997e-05,
"loss": 0.7415,
"step": 657500
},
{
"epoch": 0.92,
"learning_rate": 3.4674218626583404e-05,
"loss": 0.7383,
"step": 658000
},
{
"epoch": 0.92,
"learning_rate": 3.466259198213551e-05,
"loss": 0.737,
"step": 658500
},
{
"epoch": 0.92,
"learning_rate": 3.4650942037798946e-05,
"loss": 0.7321,
"step": 659000
},
{
"epoch": 0.92,
"learning_rate": 3.463929209346238e-05,
"loss": 0.7359,
"step": 659500
},
{
"epoch": 0.92,
"learning_rate": 3.462764214912581e-05,
"loss": 0.7408,
"step": 660000
},
{
"epoch": 0.92,
"learning_rate": 3.461599220478925e-05,
"loss": 0.7387,
"step": 660500
},
{
"epoch": 0.92,
"learning_rate": 3.460434226045268e-05,
"loss": 0.7339,
"step": 661000
},
{
"epoch": 0.92,
"learning_rate": 3.4592692316116114e-05,
"loss": 0.7356,
"step": 661500
},
{
"epoch": 0.93,
"learning_rate": 3.458104237177955e-05,
"loss": 0.7336,
"step": 662000
},
{
"epoch": 0.93,
"learning_rate": 3.456939242744298e-05,
"loss": 0.7345,
"step": 662500
},
{
"epoch": 0.93,
"learning_rate": 3.455774248310642e-05,
"loss": 0.7389,
"step": 663000
},
{
"epoch": 0.93,
"learning_rate": 3.454611583865853e-05,
"loss": 0.7298,
"step": 663500
},
{
"epoch": 0.93,
"learning_rate": 3.453446589432196e-05,
"loss": 0.736,
"step": 664000
},
{
"epoch": 0.93,
"learning_rate": 3.452286254976274e-05,
"loss": 0.7367,
"step": 664500
},
{
"epoch": 0.93,
"learning_rate": 3.451121260542617e-05,
"loss": 0.7413,
"step": 665000
},
{
"epoch": 0.93,
"learning_rate": 3.4499562661089605e-05,
"loss": 0.739,
"step": 665500
},
{
"epoch": 0.93,
"learning_rate": 3.448791271675304e-05,
"loss": 0.7437,
"step": 666000
},
{
"epoch": 0.93,
"learning_rate": 3.4476262772416477e-05,
"loss": 0.7421,
"step": 666500
},
{
"epoch": 0.93,
"learning_rate": 3.446463612796858e-05,
"loss": 0.7378,
"step": 667000
},
{
"epoch": 0.93,
"learning_rate": 3.445298618363202e-05,
"loss": 0.7381,
"step": 667500
},
{
"epoch": 0.93,
"learning_rate": 3.444133623929545e-05,
"loss": 0.7368,
"step": 668000
},
{
"epoch": 0.93,
"learning_rate": 3.442968629495888e-05,
"loss": 0.7347,
"step": 668500
},
{
"epoch": 0.94,
"learning_rate": 3.4418036350622316e-05,
"loss": 0.7359,
"step": 669000
},
{
"epoch": 0.94,
"learning_rate": 3.440638640628575e-05,
"loss": 0.7371,
"step": 669500
},
{
"epoch": 0.94,
"learning_rate": 3.439473646194919e-05,
"loss": 0.734,
"step": 670000
},
{
"epoch": 0.94,
"learning_rate": 3.438308651761262e-05,
"loss": 0.7397,
"step": 670500
},
{
"epoch": 0.94,
"learning_rate": 3.437143657327606e-05,
"loss": 0.733,
"step": 671000
},
{
"epoch": 0.94,
"learning_rate": 3.435978662893949e-05,
"loss": 0.7378,
"step": 671500
},
{
"epoch": 0.94,
"learning_rate": 3.4348136684602923e-05,
"loss": 0.7305,
"step": 672000
},
{
"epoch": 0.94,
"learning_rate": 3.4336486740266356e-05,
"loss": 0.736,
"step": 672500
},
{
"epoch": 0.94,
"learning_rate": 3.4324836795929795e-05,
"loss": 0.7397,
"step": 673000
},
{
"epoch": 0.94,
"learning_rate": 3.4313256751259246e-05,
"loss": 0.7345,
"step": 673500
},
{
"epoch": 0.94,
"learning_rate": 3.430160680692268e-05,
"loss": 0.7336,
"step": 674000
},
{
"epoch": 0.94,
"learning_rate": 3.428995686258612e-05,
"loss": 0.7354,
"step": 674500
},
{
"epoch": 0.94,
"learning_rate": 3.427833021813822e-05,
"loss": 0.7357,
"step": 675000
},
{
"epoch": 0.94,
"learning_rate": 3.426668027380165e-05,
"loss": 0.7326,
"step": 675500
},
{
"epoch": 0.95,
"learning_rate": 3.4255030329465085e-05,
"loss": 0.7368,
"step": 676000
},
{
"epoch": 0.95,
"learning_rate": 3.4243380385128524e-05,
"loss": 0.7383,
"step": 676500
},
{
"epoch": 0.95,
"learning_rate": 3.4231730440791956e-05,
"loss": 0.7324,
"step": 677000
},
{
"epoch": 0.95,
"learning_rate": 3.4220080496455395e-05,
"loss": 0.7343,
"step": 677500
},
{
"epoch": 0.95,
"learning_rate": 3.420843055211883e-05,
"loss": 0.733,
"step": 678000
},
{
"epoch": 0.95,
"learning_rate": 3.419678060778226e-05,
"loss": 0.7314,
"step": 678500
},
{
"epoch": 0.95,
"learning_rate": 3.418513066344569e-05,
"loss": 0.732,
"step": 679000
},
{
"epoch": 0.95,
"learning_rate": 3.4173480719109125e-05,
"loss": 0.7353,
"step": 679500
},
{
"epoch": 0.95,
"learning_rate": 3.4161854074661234e-05,
"loss": 0.7342,
"step": 680000
},
{
"epoch": 0.95,
"learning_rate": 3.415020413032467e-05,
"loss": 0.7301,
"step": 680500
},
{
"epoch": 0.95,
"learning_rate": 3.4138554185988106e-05,
"loss": 0.7311,
"step": 681000
},
{
"epoch": 0.95,
"learning_rate": 3.412692754154021e-05,
"loss": 0.7326,
"step": 681500
},
{
"epoch": 0.95,
"learning_rate": 3.411527759720364e-05,
"loss": 0.731,
"step": 682000
},
{
"epoch": 0.95,
"learning_rate": 3.4103627652867074e-05,
"loss": 0.734,
"step": 682500
},
{
"epoch": 0.95,
"learning_rate": 3.4091977708530506e-05,
"loss": 0.7337,
"step": 683000
},
{
"epoch": 0.96,
"learning_rate": 3.4080327764193945e-05,
"loss": 0.7311,
"step": 683500
},
{
"epoch": 0.96,
"learning_rate": 3.406867781985738e-05,
"loss": 0.7373,
"step": 684000
},
{
"epoch": 0.96,
"learning_rate": 3.405705117540949e-05,
"loss": 0.7345,
"step": 684500
},
{
"epoch": 0.96,
"learning_rate": 3.404540123107292e-05,
"loss": 0.7408,
"step": 685000
},
{
"epoch": 0.96,
"learning_rate": 3.403375128673635e-05,
"loss": 0.7346,
"step": 685500
},
{
"epoch": 0.96,
"learning_rate": 3.402212464228846e-05,
"loss": 0.7347,
"step": 686000
},
{
"epoch": 0.96,
"learning_rate": 3.4010474697951894e-05,
"loss": 0.7323,
"step": 686500
},
{
"epoch": 0.96,
"learning_rate": 3.3998824753615326e-05,
"loss": 0.7337,
"step": 687000
},
{
"epoch": 0.96,
"learning_rate": 3.3987174809278765e-05,
"loss": 0.7327,
"step": 687500
},
{
"epoch": 0.96,
"learning_rate": 3.39755248649422e-05,
"loss": 0.7318,
"step": 688000
},
{
"epoch": 0.96,
"learning_rate": 3.396387492060563e-05,
"loss": 0.7357,
"step": 688500
},
{
"epoch": 0.96,
"learning_rate": 3.395224827615774e-05,
"loss": 0.7335,
"step": 689000
},
{
"epoch": 0.96,
"learning_rate": 3.394059833182117e-05,
"loss": 0.732,
"step": 689500
},
{
"epoch": 0.96,
"learning_rate": 3.3928948387484604e-05,
"loss": 0.7314,
"step": 690000
}
],
"max_steps": 2145933,
"num_train_epochs": 3,
"total_flos": 1.543433599991808e+19,
"trial_name": null,
"trial_params": null
}