S2ORC-t5-base-v1 / trainer_state.json
nreimers's picture
upload
4238fe7
raw
history blame contribute delete
No virus
191 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"global_step": 156000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5e-06,
"loss": 20.7047,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 1e-05,
"loss": 12.8823,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 1.5e-05,
"loss": 7.4966,
"step": 300
},
{
"epoch": 0.0,
"learning_rate": 2e-05,
"loss": 5.7711,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 2.5e-05,
"loss": 4.7253,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 3e-05,
"loss": 3.951,
"step": 600
},
{
"epoch": 0.0,
"learning_rate": 3.5e-05,
"loss": 3.5325,
"step": 700
},
{
"epoch": 0.01,
"learning_rate": 4e-05,
"loss": 3.3641,
"step": 800
},
{
"epoch": 0.01,
"learning_rate": 4.5e-05,
"loss": 3.2231,
"step": 900
},
{
"epoch": 0.01,
"learning_rate": 5e-05,
"loss": 3.1206,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 4.996779388083736e-05,
"loss": 3.0853,
"step": 1100
},
{
"epoch": 0.01,
"learning_rate": 4.993558776167472e-05,
"loss": 3.0102,
"step": 1200
},
{
"epoch": 0.01,
"learning_rate": 4.990338164251208e-05,
"loss": 2.9424,
"step": 1300
},
{
"epoch": 0.01,
"learning_rate": 4.987117552334944e-05,
"loss": 2.9239,
"step": 1400
},
{
"epoch": 0.01,
"learning_rate": 4.98389694041868e-05,
"loss": 2.8546,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 4.980676328502415e-05,
"loss": 2.8456,
"step": 1600
},
{
"epoch": 0.01,
"learning_rate": 4.977455716586152e-05,
"loss": 2.7827,
"step": 1700
},
{
"epoch": 0.01,
"learning_rate": 4.974235104669888e-05,
"loss": 2.7712,
"step": 1800
},
{
"epoch": 0.01,
"learning_rate": 4.9710144927536237e-05,
"loss": 2.7398,
"step": 1900
},
{
"epoch": 0.01,
"learning_rate": 4.967793880837359e-05,
"loss": 2.7458,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 4.964573268921095e-05,
"loss": 2.6892,
"step": 2100
},
{
"epoch": 0.01,
"learning_rate": 4.9613526570048315e-05,
"loss": 2.6708,
"step": 2200
},
{
"epoch": 0.01,
"learning_rate": 4.9581320450885674e-05,
"loss": 2.6689,
"step": 2300
},
{
"epoch": 0.02,
"learning_rate": 4.9549114331723027e-05,
"loss": 2.6247,
"step": 2400
},
{
"epoch": 0.02,
"learning_rate": 4.9516908212560386e-05,
"loss": 2.6345,
"step": 2500
},
{
"epoch": 0.02,
"learning_rate": 4.948470209339775e-05,
"loss": 2.6269,
"step": 2600
},
{
"epoch": 0.02,
"learning_rate": 4.9452495974235105e-05,
"loss": 2.6141,
"step": 2700
},
{
"epoch": 0.02,
"learning_rate": 4.9420289855072464e-05,
"loss": 2.5648,
"step": 2800
},
{
"epoch": 0.02,
"learning_rate": 4.938808373590982e-05,
"loss": 2.5592,
"step": 2900
},
{
"epoch": 0.02,
"learning_rate": 4.935587761674719e-05,
"loss": 2.5526,
"step": 3000
},
{
"epoch": 0.02,
"learning_rate": 4.932367149758454e-05,
"loss": 2.5508,
"step": 3100
},
{
"epoch": 0.02,
"learning_rate": 4.92914653784219e-05,
"loss": 2.5156,
"step": 3200
},
{
"epoch": 0.02,
"learning_rate": 4.925925925925926e-05,
"loss": 2.5263,
"step": 3300
},
{
"epoch": 0.02,
"learning_rate": 4.922705314009662e-05,
"loss": 2.5354,
"step": 3400
},
{
"epoch": 0.02,
"learning_rate": 4.919484702093398e-05,
"loss": 2.4829,
"step": 3500
},
{
"epoch": 0.02,
"learning_rate": 4.916264090177134e-05,
"loss": 2.48,
"step": 3600
},
{
"epoch": 0.02,
"learning_rate": 4.91304347826087e-05,
"loss": 2.4623,
"step": 3700
},
{
"epoch": 0.02,
"learning_rate": 4.909822866344606e-05,
"loss": 2.4724,
"step": 3800
},
{
"epoch": 0.02,
"learning_rate": 4.906602254428342e-05,
"loss": 2.4455,
"step": 3900
},
{
"epoch": 0.03,
"learning_rate": 4.9033816425120776e-05,
"loss": 2.4768,
"step": 4000
},
{
"epoch": 0.03,
"learning_rate": 4.9001610305958136e-05,
"loss": 2.4518,
"step": 4100
},
{
"epoch": 0.03,
"learning_rate": 4.8969404186795495e-05,
"loss": 2.4427,
"step": 4200
},
{
"epoch": 0.03,
"learning_rate": 4.893719806763285e-05,
"loss": 2.4253,
"step": 4300
},
{
"epoch": 0.03,
"learning_rate": 4.8904991948470214e-05,
"loss": 2.4199,
"step": 4400
},
{
"epoch": 0.03,
"learning_rate": 4.887278582930757e-05,
"loss": 2.4141,
"step": 4500
},
{
"epoch": 0.03,
"learning_rate": 4.884057971014493e-05,
"loss": 2.4239,
"step": 4600
},
{
"epoch": 0.03,
"learning_rate": 4.8808373590982285e-05,
"loss": 2.4204,
"step": 4700
},
{
"epoch": 0.03,
"learning_rate": 4.877616747181965e-05,
"loss": 2.4101,
"step": 4800
},
{
"epoch": 0.03,
"learning_rate": 4.874396135265701e-05,
"loss": 2.4064,
"step": 4900
},
{
"epoch": 0.03,
"learning_rate": 4.871175523349436e-05,
"loss": 2.3889,
"step": 5000
},
{
"epoch": 0.03,
"learning_rate": 4.867954911433172e-05,
"loss": 2.3949,
"step": 5100
},
{
"epoch": 0.03,
"learning_rate": 4.864734299516908e-05,
"loss": 2.3889,
"step": 5200
},
{
"epoch": 0.03,
"learning_rate": 4.861513687600645e-05,
"loss": 2.3725,
"step": 5300
},
{
"epoch": 0.03,
"learning_rate": 4.85829307568438e-05,
"loss": 2.3865,
"step": 5400
},
{
"epoch": 0.04,
"learning_rate": 4.855072463768116e-05,
"loss": 2.3813,
"step": 5500
},
{
"epoch": 0.04,
"learning_rate": 4.851851851851852e-05,
"loss": 2.3615,
"step": 5600
},
{
"epoch": 0.04,
"learning_rate": 4.8486312399355885e-05,
"loss": 2.3394,
"step": 5700
},
{
"epoch": 0.04,
"learning_rate": 4.845410628019324e-05,
"loss": 2.3518,
"step": 5800
},
{
"epoch": 0.04,
"learning_rate": 4.84219001610306e-05,
"loss": 2.3659,
"step": 5900
},
{
"epoch": 0.04,
"learning_rate": 4.8389694041867956e-05,
"loss": 2.3501,
"step": 6000
},
{
"epoch": 0.04,
"learning_rate": 4.8357487922705316e-05,
"loss": 2.3663,
"step": 6100
},
{
"epoch": 0.04,
"learning_rate": 4.8325281803542675e-05,
"loss": 2.3503,
"step": 6200
},
{
"epoch": 0.04,
"learning_rate": 4.8293075684380035e-05,
"loss": 2.3429,
"step": 6300
},
{
"epoch": 0.04,
"learning_rate": 4.8260869565217394e-05,
"loss": 2.3306,
"step": 6400
},
{
"epoch": 0.04,
"learning_rate": 4.822866344605475e-05,
"loss": 2.3269,
"step": 6500
},
{
"epoch": 0.04,
"learning_rate": 4.819645732689211e-05,
"loss": 2.3207,
"step": 6600
},
{
"epoch": 0.04,
"learning_rate": 4.816425120772947e-05,
"loss": 2.3247,
"step": 6700
},
{
"epoch": 0.04,
"learning_rate": 4.813204508856683e-05,
"loss": 2.3479,
"step": 6800
},
{
"epoch": 0.04,
"learning_rate": 4.809983896940419e-05,
"loss": 2.3069,
"step": 6900
},
{
"epoch": 0.04,
"learning_rate": 4.806763285024155e-05,
"loss": 2.3049,
"step": 7000
},
{
"epoch": 0.05,
"learning_rate": 4.803542673107891e-05,
"loss": 2.2958,
"step": 7100
},
{
"epoch": 0.05,
"learning_rate": 4.800322061191627e-05,
"loss": 2.3091,
"step": 7200
},
{
"epoch": 0.05,
"learning_rate": 4.797101449275362e-05,
"loss": 2.2981,
"step": 7300
},
{
"epoch": 0.05,
"learning_rate": 4.793880837359098e-05,
"loss": 2.3055,
"step": 7400
},
{
"epoch": 0.05,
"learning_rate": 4.790660225442835e-05,
"loss": 2.301,
"step": 7500
},
{
"epoch": 0.05,
"learning_rate": 4.7874396135265706e-05,
"loss": 2.2988,
"step": 7600
},
{
"epoch": 0.05,
"learning_rate": 4.784219001610306e-05,
"loss": 2.2942,
"step": 7700
},
{
"epoch": 0.05,
"learning_rate": 4.780998389694042e-05,
"loss": 2.2692,
"step": 7800
},
{
"epoch": 0.05,
"learning_rate": 4.7777777777777784e-05,
"loss": 2.2916,
"step": 7900
},
{
"epoch": 0.05,
"learning_rate": 4.7745571658615143e-05,
"loss": 2.2862,
"step": 8000
},
{
"epoch": 0.05,
"learning_rate": 4.7713365539452496e-05,
"loss": 2.2664,
"step": 8100
},
{
"epoch": 0.05,
"learning_rate": 4.7681159420289855e-05,
"loss": 2.2748,
"step": 8200
},
{
"epoch": 0.05,
"learning_rate": 4.7648953301127215e-05,
"loss": 2.2848,
"step": 8300
},
{
"epoch": 0.05,
"learning_rate": 4.7616747181964574e-05,
"loss": 2.2688,
"step": 8400
},
{
"epoch": 0.05,
"learning_rate": 4.7584541062801933e-05,
"loss": 2.2861,
"step": 8500
},
{
"epoch": 0.06,
"learning_rate": 4.755233494363929e-05,
"loss": 2.2777,
"step": 8600
},
{
"epoch": 0.06,
"learning_rate": 4.752012882447665e-05,
"loss": 2.2673,
"step": 8700
},
{
"epoch": 0.06,
"learning_rate": 4.748792270531401e-05,
"loss": 2.2569,
"step": 8800
},
{
"epoch": 0.06,
"learning_rate": 4.745571658615137e-05,
"loss": 2.2579,
"step": 8900
},
{
"epoch": 0.06,
"learning_rate": 4.742351046698873e-05,
"loss": 2.2576,
"step": 9000
},
{
"epoch": 0.06,
"learning_rate": 4.739130434782609e-05,
"loss": 2.2404,
"step": 9100
},
{
"epoch": 0.06,
"learning_rate": 4.735909822866345e-05,
"loss": 2.2395,
"step": 9200
},
{
"epoch": 0.06,
"learning_rate": 4.732689210950081e-05,
"loss": 2.254,
"step": 9300
},
{
"epoch": 0.06,
"learning_rate": 4.729468599033817e-05,
"loss": 2.2501,
"step": 9400
},
{
"epoch": 0.06,
"learning_rate": 4.726247987117553e-05,
"loss": 2.2509,
"step": 9500
},
{
"epoch": 0.06,
"learning_rate": 4.723027375201288e-05,
"loss": 2.2629,
"step": 9600
},
{
"epoch": 0.06,
"learning_rate": 4.7198067632850246e-05,
"loss": 2.2393,
"step": 9700
},
{
"epoch": 0.06,
"learning_rate": 4.7165861513687605e-05,
"loss": 2.2417,
"step": 9800
},
{
"epoch": 0.06,
"learning_rate": 4.7133655394524964e-05,
"loss": 2.2301,
"step": 9900
},
{
"epoch": 0.06,
"learning_rate": 4.710144927536232e-05,
"loss": 2.2304,
"step": 10000
},
{
"epoch": 0.06,
"learning_rate": 4.706924315619968e-05,
"loss": 2.2171,
"step": 10100
},
{
"epoch": 0.07,
"learning_rate": 4.703703703703704e-05,
"loss": 2.2226,
"step": 10200
},
{
"epoch": 0.07,
"learning_rate": 4.7004830917874395e-05,
"loss": 2.244,
"step": 10300
},
{
"epoch": 0.07,
"learning_rate": 4.6972624798711754e-05,
"loss": 2.24,
"step": 10400
},
{
"epoch": 0.07,
"learning_rate": 4.6940418679549114e-05,
"loss": 2.2216,
"step": 10500
},
{
"epoch": 0.07,
"learning_rate": 4.690821256038648e-05,
"loss": 2.23,
"step": 10600
},
{
"epoch": 0.07,
"learning_rate": 4.687600644122383e-05,
"loss": 2.2267,
"step": 10700
},
{
"epoch": 0.07,
"learning_rate": 4.684380032206119e-05,
"loss": 2.2307,
"step": 10800
},
{
"epoch": 0.07,
"learning_rate": 4.681159420289855e-05,
"loss": 2.2156,
"step": 10900
},
{
"epoch": 0.07,
"learning_rate": 4.677938808373592e-05,
"loss": 2.2401,
"step": 11000
},
{
"epoch": 0.07,
"learning_rate": 4.674718196457327e-05,
"loss": 2.1964,
"step": 11100
},
{
"epoch": 0.07,
"learning_rate": 4.671497584541063e-05,
"loss": 2.2144,
"step": 11200
},
{
"epoch": 0.07,
"learning_rate": 4.668276972624799e-05,
"loss": 2.1971,
"step": 11300
},
{
"epoch": 0.07,
"learning_rate": 4.665056360708535e-05,
"loss": 2.2036,
"step": 11400
},
{
"epoch": 0.07,
"learning_rate": 4.661835748792271e-05,
"loss": 2.2174,
"step": 11500
},
{
"epoch": 0.07,
"learning_rate": 4.6586151368760067e-05,
"loss": 2.1736,
"step": 11600
},
{
"epoch": 0.07,
"learning_rate": 4.6553945249597426e-05,
"loss": 2.2042,
"step": 11700
},
{
"epoch": 0.08,
"learning_rate": 4.6521739130434785e-05,
"loss": 2.1895,
"step": 11800
},
{
"epoch": 0.08,
"learning_rate": 4.6489533011272145e-05,
"loss": 2.2122,
"step": 11900
},
{
"epoch": 0.08,
"learning_rate": 4.6457326892109504e-05,
"loss": 2.203,
"step": 12000
},
{
"epoch": 0.08,
"learning_rate": 4.642512077294686e-05,
"loss": 2.1954,
"step": 12100
},
{
"epoch": 0.08,
"learning_rate": 4.639291465378422e-05,
"loss": 2.1925,
"step": 12200
},
{
"epoch": 0.08,
"learning_rate": 4.636070853462158e-05,
"loss": 2.2087,
"step": 12300
},
{
"epoch": 0.08,
"learning_rate": 4.632850241545894e-05,
"loss": 2.2021,
"step": 12400
},
{
"epoch": 0.08,
"learning_rate": 4.62962962962963e-05,
"loss": 2.2032,
"step": 12500
},
{
"epoch": 0.08,
"learning_rate": 4.626409017713365e-05,
"loss": 2.1941,
"step": 12600
},
{
"epoch": 0.08,
"learning_rate": 4.623188405797101e-05,
"loss": 2.2121,
"step": 12700
},
{
"epoch": 0.08,
"learning_rate": 4.619967793880838e-05,
"loss": 2.1842,
"step": 12800
},
{
"epoch": 0.08,
"learning_rate": 4.616747181964574e-05,
"loss": 2.1993,
"step": 12900
},
{
"epoch": 0.08,
"learning_rate": 4.613526570048309e-05,
"loss": 2.1745,
"step": 13000
},
{
"epoch": 0.08,
"learning_rate": 4.610305958132045e-05,
"loss": 2.1817,
"step": 13100
},
{
"epoch": 0.08,
"learning_rate": 4.6070853462157816e-05,
"loss": 2.2212,
"step": 13200
},
{
"epoch": 0.09,
"learning_rate": 4.6038647342995176e-05,
"loss": 2.1778,
"step": 13300
},
{
"epoch": 0.09,
"learning_rate": 4.600644122383253e-05,
"loss": 2.1628,
"step": 13400
},
{
"epoch": 0.09,
"learning_rate": 4.597423510466989e-05,
"loss": 2.1785,
"step": 13500
},
{
"epoch": 0.09,
"learning_rate": 4.594202898550725e-05,
"loss": 2.172,
"step": 13600
},
{
"epoch": 0.09,
"learning_rate": 4.5909822866344606e-05,
"loss": 2.1751,
"step": 13700
},
{
"epoch": 0.09,
"learning_rate": 4.5877616747181966e-05,
"loss": 2.188,
"step": 13800
},
{
"epoch": 0.09,
"learning_rate": 4.5845410628019325e-05,
"loss": 2.2064,
"step": 13900
},
{
"epoch": 0.09,
"learning_rate": 4.5813204508856684e-05,
"loss": 2.1889,
"step": 14000
},
{
"epoch": 0.09,
"learning_rate": 4.5780998389694044e-05,
"loss": 2.1646,
"step": 14100
},
{
"epoch": 0.09,
"learning_rate": 4.57487922705314e-05,
"loss": 2.1678,
"step": 14200
},
{
"epoch": 0.09,
"learning_rate": 4.571658615136876e-05,
"loss": 2.1834,
"step": 14300
},
{
"epoch": 0.09,
"learning_rate": 4.568438003220612e-05,
"loss": 2.1612,
"step": 14400
},
{
"epoch": 0.09,
"learning_rate": 4.565217391304348e-05,
"loss": 2.1596,
"step": 14500
},
{
"epoch": 0.09,
"learning_rate": 4.561996779388084e-05,
"loss": 2.1775,
"step": 14600
},
{
"epoch": 0.09,
"learning_rate": 4.55877616747182e-05,
"loss": 2.1626,
"step": 14700
},
{
"epoch": 0.09,
"learning_rate": 4.555555555555556e-05,
"loss": 2.1585,
"step": 14800
},
{
"epoch": 0.1,
"learning_rate": 4.552334943639291e-05,
"loss": 2.1642,
"step": 14900
},
{
"epoch": 0.1,
"learning_rate": 4.549114331723028e-05,
"loss": 2.1755,
"step": 15000
},
{
"epoch": 0.1,
"learning_rate": 4.545893719806764e-05,
"loss": 2.1735,
"step": 15100
},
{
"epoch": 0.1,
"learning_rate": 4.5426731078904997e-05,
"loss": 2.1476,
"step": 15200
},
{
"epoch": 0.1,
"learning_rate": 4.539452495974235e-05,
"loss": 2.1624,
"step": 15300
},
{
"epoch": 0.1,
"learning_rate": 4.5362318840579715e-05,
"loss": 2.1375,
"step": 15400
},
{
"epoch": 0.1,
"learning_rate": 4.5330112721417075e-05,
"loss": 2.1561,
"step": 15500
},
{
"epoch": 0.1,
"learning_rate": 4.5297906602254434e-05,
"loss": 2.1357,
"step": 15600
},
{
"epoch": 0.1,
"learning_rate": 4.5265700483091786e-05,
"loss": 2.152,
"step": 15700
},
{
"epoch": 0.1,
"learning_rate": 4.5233494363929146e-05,
"loss": 2.1332,
"step": 15800
},
{
"epoch": 0.1,
"learning_rate": 4.520128824476651e-05,
"loss": 2.1334,
"step": 15900
},
{
"epoch": 0.1,
"learning_rate": 4.5169082125603865e-05,
"loss": 2.1483,
"step": 16000
},
{
"epoch": 0.1,
"learning_rate": 4.5136876006441224e-05,
"loss": 2.1616,
"step": 16100
},
{
"epoch": 0.1,
"learning_rate": 4.510466988727858e-05,
"loss": 2.1178,
"step": 16200
},
{
"epoch": 0.1,
"learning_rate": 4.507246376811595e-05,
"loss": 2.1405,
"step": 16300
},
{
"epoch": 0.1,
"learning_rate": 4.50402576489533e-05,
"loss": 2.1578,
"step": 16400
},
{
"epoch": 0.11,
"learning_rate": 4.500805152979066e-05,
"loss": 2.1422,
"step": 16500
},
{
"epoch": 0.11,
"learning_rate": 4.497584541062802e-05,
"loss": 2.1332,
"step": 16600
},
{
"epoch": 0.11,
"learning_rate": 4.494363929146538e-05,
"loss": 2.1607,
"step": 16700
},
{
"epoch": 0.11,
"learning_rate": 4.491143317230274e-05,
"loss": 2.1271,
"step": 16800
},
{
"epoch": 0.11,
"learning_rate": 4.48792270531401e-05,
"loss": 2.1583,
"step": 16900
},
{
"epoch": 0.11,
"learning_rate": 4.484702093397746e-05,
"loss": 2.111,
"step": 17000
},
{
"epoch": 0.11,
"learning_rate": 4.481481481481482e-05,
"loss": 2.1337,
"step": 17100
},
{
"epoch": 0.11,
"learning_rate": 4.478260869565218e-05,
"loss": 2.1344,
"step": 17200
},
{
"epoch": 0.11,
"learning_rate": 4.4750402576489536e-05,
"loss": 2.1427,
"step": 17300
},
{
"epoch": 0.11,
"learning_rate": 4.4718196457326895e-05,
"loss": 2.1487,
"step": 17400
},
{
"epoch": 0.11,
"learning_rate": 4.4685990338164255e-05,
"loss": 2.1257,
"step": 17500
},
{
"epoch": 0.11,
"learning_rate": 4.465378421900161e-05,
"loss": 2.1398,
"step": 17600
},
{
"epoch": 0.11,
"learning_rate": 4.4621578099838974e-05,
"loss": 2.138,
"step": 17700
},
{
"epoch": 0.11,
"learning_rate": 4.458937198067633e-05,
"loss": 2.1271,
"step": 17800
},
{
"epoch": 0.11,
"learning_rate": 4.455716586151369e-05,
"loss": 2.1222,
"step": 17900
},
{
"epoch": 0.12,
"learning_rate": 4.4524959742351045e-05,
"loss": 2.1603,
"step": 18000
},
{
"epoch": 0.12,
"learning_rate": 4.449275362318841e-05,
"loss": 2.127,
"step": 18100
},
{
"epoch": 0.12,
"learning_rate": 4.446054750402577e-05,
"loss": 2.1174,
"step": 18200
},
{
"epoch": 0.12,
"learning_rate": 4.442834138486312e-05,
"loss": 2.1085,
"step": 18300
},
{
"epoch": 0.12,
"learning_rate": 4.439613526570048e-05,
"loss": 2.1064,
"step": 18400
},
{
"epoch": 0.12,
"learning_rate": 4.436392914653785e-05,
"loss": 2.1103,
"step": 18500
},
{
"epoch": 0.12,
"learning_rate": 4.433172302737521e-05,
"loss": 2.1228,
"step": 18600
},
{
"epoch": 0.12,
"learning_rate": 4.429951690821256e-05,
"loss": 2.1141,
"step": 18700
},
{
"epoch": 0.12,
"learning_rate": 4.426731078904992e-05,
"loss": 2.0989,
"step": 18800
},
{
"epoch": 0.12,
"learning_rate": 4.423510466988728e-05,
"loss": 2.1139,
"step": 18900
},
{
"epoch": 0.12,
"learning_rate": 4.4202898550724645e-05,
"loss": 2.1168,
"step": 19000
},
{
"epoch": 0.12,
"learning_rate": 4.4170692431562e-05,
"loss": 2.1014,
"step": 19100
},
{
"epoch": 0.12,
"learning_rate": 4.413848631239936e-05,
"loss": 2.1247,
"step": 19200
},
{
"epoch": 0.12,
"learning_rate": 4.4106280193236716e-05,
"loss": 2.1197,
"step": 19300
},
{
"epoch": 0.12,
"learning_rate": 4.4074074074074076e-05,
"loss": 2.0946,
"step": 19400
},
{
"epoch": 0.12,
"learning_rate": 4.4041867954911435e-05,
"loss": 2.0958,
"step": 19500
},
{
"epoch": 0.13,
"learning_rate": 4.4009661835748794e-05,
"loss": 2.1009,
"step": 19600
},
{
"epoch": 0.13,
"learning_rate": 4.3977455716586154e-05,
"loss": 2.1218,
"step": 19700
},
{
"epoch": 0.13,
"learning_rate": 4.394524959742351e-05,
"loss": 2.101,
"step": 19800
},
{
"epoch": 0.13,
"learning_rate": 4.391304347826087e-05,
"loss": 2.1013,
"step": 19900
},
{
"epoch": 0.13,
"learning_rate": 4.388083735909823e-05,
"loss": 2.1107,
"step": 20000
},
{
"epoch": 0.13,
"learning_rate": 4.384863123993559e-05,
"loss": 2.1204,
"step": 20100
},
{
"epoch": 0.13,
"learning_rate": 4.3816425120772944e-05,
"loss": 2.0881,
"step": 20200
},
{
"epoch": 0.13,
"learning_rate": 4.378421900161031e-05,
"loss": 2.1067,
"step": 20300
},
{
"epoch": 0.13,
"learning_rate": 4.375201288244767e-05,
"loss": 2.1082,
"step": 20400
},
{
"epoch": 0.13,
"learning_rate": 4.371980676328503e-05,
"loss": 2.0896,
"step": 20500
},
{
"epoch": 0.13,
"learning_rate": 4.368760064412238e-05,
"loss": 2.095,
"step": 20600
},
{
"epoch": 0.13,
"learning_rate": 4.365539452495974e-05,
"loss": 2.1106,
"step": 20700
},
{
"epoch": 0.13,
"learning_rate": 4.362318840579711e-05,
"loss": 2.102,
"step": 20800
},
{
"epoch": 0.13,
"learning_rate": 4.3590982286634466e-05,
"loss": 2.0977,
"step": 20900
},
{
"epoch": 0.13,
"learning_rate": 4.355877616747182e-05,
"loss": 2.0682,
"step": 21000
},
{
"epoch": 0.14,
"learning_rate": 4.352657004830918e-05,
"loss": 2.0927,
"step": 21100
},
{
"epoch": 0.14,
"learning_rate": 4.3494363929146544e-05,
"loss": 2.0983,
"step": 21200
},
{
"epoch": 0.14,
"learning_rate": 4.34621578099839e-05,
"loss": 2.0805,
"step": 21300
},
{
"epoch": 0.14,
"learning_rate": 4.3429951690821256e-05,
"loss": 2.0982,
"step": 21400
},
{
"epoch": 0.14,
"learning_rate": 4.3397745571658615e-05,
"loss": 2.0978,
"step": 21500
},
{
"epoch": 0.14,
"learning_rate": 4.336553945249598e-05,
"loss": 2.0929,
"step": 21600
},
{
"epoch": 0.14,
"learning_rate": 4.3333333333333334e-05,
"loss": 2.112,
"step": 21700
},
{
"epoch": 0.14,
"learning_rate": 4.3301127214170693e-05,
"loss": 2.0842,
"step": 21800
},
{
"epoch": 0.14,
"learning_rate": 4.326892109500805e-05,
"loss": 2.0988,
"step": 21900
},
{
"epoch": 0.14,
"learning_rate": 4.323671497584541e-05,
"loss": 2.102,
"step": 22000
},
{
"epoch": 0.14,
"learning_rate": 4.320450885668277e-05,
"loss": 2.1157,
"step": 22100
},
{
"epoch": 0.14,
"learning_rate": 4.317230273752013e-05,
"loss": 2.0687,
"step": 22200
},
{
"epoch": 0.14,
"learning_rate": 4.314009661835749e-05,
"loss": 2.0705,
"step": 22300
},
{
"epoch": 0.14,
"learning_rate": 4.310789049919485e-05,
"loss": 2.1052,
"step": 22400
},
{
"epoch": 0.14,
"learning_rate": 4.307568438003221e-05,
"loss": 2.1187,
"step": 22500
},
{
"epoch": 0.14,
"learning_rate": 4.304347826086957e-05,
"loss": 2.1024,
"step": 22600
},
{
"epoch": 0.15,
"learning_rate": 4.301127214170693e-05,
"loss": 2.0815,
"step": 22700
},
{
"epoch": 0.15,
"learning_rate": 4.297906602254429e-05,
"loss": 2.0903,
"step": 22800
},
{
"epoch": 0.15,
"learning_rate": 4.294685990338164e-05,
"loss": 2.0914,
"step": 22900
},
{
"epoch": 0.15,
"learning_rate": 4.2914653784219006e-05,
"loss": 2.0803,
"step": 23000
},
{
"epoch": 0.15,
"learning_rate": 4.2882447665056365e-05,
"loss": 2.0937,
"step": 23100
},
{
"epoch": 0.15,
"learning_rate": 4.2850241545893724e-05,
"loss": 2.1011,
"step": 23200
},
{
"epoch": 0.15,
"learning_rate": 4.281803542673108e-05,
"loss": 2.0867,
"step": 23300
},
{
"epoch": 0.15,
"learning_rate": 4.278582930756844e-05,
"loss": 2.0679,
"step": 23400
},
{
"epoch": 0.15,
"learning_rate": 4.27536231884058e-05,
"loss": 2.0698,
"step": 23500
},
{
"epoch": 0.15,
"learning_rate": 4.2721417069243155e-05,
"loss": 2.0689,
"step": 23600
},
{
"epoch": 0.15,
"learning_rate": 4.2689210950080514e-05,
"loss": 2.0717,
"step": 23700
},
{
"epoch": 0.15,
"learning_rate": 4.2657004830917874e-05,
"loss": 2.0758,
"step": 23800
},
{
"epoch": 0.15,
"learning_rate": 4.262479871175524e-05,
"loss": 2.0963,
"step": 23900
},
{
"epoch": 0.15,
"learning_rate": 4.259259259259259e-05,
"loss": 2.0754,
"step": 24000
},
{
"epoch": 0.15,
"learning_rate": 4.256038647342995e-05,
"loss": 2.0867,
"step": 24100
},
{
"epoch": 0.15,
"learning_rate": 4.252818035426731e-05,
"loss": 2.0856,
"step": 24200
},
{
"epoch": 0.16,
"learning_rate": 4.249597423510468e-05,
"loss": 2.0857,
"step": 24300
},
{
"epoch": 0.16,
"learning_rate": 4.246376811594203e-05,
"loss": 2.064,
"step": 24400
},
{
"epoch": 0.16,
"learning_rate": 4.243156199677939e-05,
"loss": 2.0822,
"step": 24500
},
{
"epoch": 0.16,
"learning_rate": 4.239935587761675e-05,
"loss": 2.0654,
"step": 24600
},
{
"epoch": 0.16,
"learning_rate": 4.236714975845411e-05,
"loss": 2.0691,
"step": 24700
},
{
"epoch": 0.16,
"learning_rate": 4.233494363929147e-05,
"loss": 2.0592,
"step": 24800
},
{
"epoch": 0.16,
"learning_rate": 4.2302737520128827e-05,
"loss": 2.0729,
"step": 24900
},
{
"epoch": 0.16,
"learning_rate": 4.2270531400966186e-05,
"loss": 2.0572,
"step": 25000
},
{
"epoch": 0.16,
"learning_rate": 4.2238325281803545e-05,
"loss": 2.0723,
"step": 25100
},
{
"epoch": 0.16,
"learning_rate": 4.2206119162640905e-05,
"loss": 2.0762,
"step": 25200
},
{
"epoch": 0.16,
"learning_rate": 4.2173913043478264e-05,
"loss": 2.0719,
"step": 25300
},
{
"epoch": 0.16,
"learning_rate": 4.214170692431562e-05,
"loss": 2.0779,
"step": 25400
},
{
"epoch": 0.16,
"learning_rate": 4.210950080515298e-05,
"loss": 2.054,
"step": 25500
},
{
"epoch": 0.16,
"learning_rate": 4.207729468599034e-05,
"loss": 2.0936,
"step": 25600
},
{
"epoch": 0.16,
"learning_rate": 4.20450885668277e-05,
"loss": 2.0647,
"step": 25700
},
{
"epoch": 0.17,
"learning_rate": 4.201288244766506e-05,
"loss": 2.072,
"step": 25800
},
{
"epoch": 0.17,
"learning_rate": 4.198067632850241e-05,
"loss": 2.0744,
"step": 25900
},
{
"epoch": 0.17,
"learning_rate": 4.194847020933977e-05,
"loss": 2.0608,
"step": 26000
},
{
"epoch": 0.17,
"learning_rate": 4.191626409017714e-05,
"loss": 2.0529,
"step": 26100
},
{
"epoch": 0.17,
"learning_rate": 4.18840579710145e-05,
"loss": 2.0754,
"step": 26200
},
{
"epoch": 0.17,
"learning_rate": 4.185185185185185e-05,
"loss": 2.0504,
"step": 26300
},
{
"epoch": 0.17,
"learning_rate": 4.181964573268921e-05,
"loss": 2.0662,
"step": 26400
},
{
"epoch": 0.17,
"learning_rate": 4.1787439613526576e-05,
"loss": 2.0513,
"step": 26500
},
{
"epoch": 0.17,
"learning_rate": 4.1755233494363936e-05,
"loss": 2.0747,
"step": 26600
},
{
"epoch": 0.17,
"learning_rate": 4.172302737520129e-05,
"loss": 2.0445,
"step": 26700
},
{
"epoch": 0.17,
"learning_rate": 4.169082125603865e-05,
"loss": 2.0495,
"step": 26800
},
{
"epoch": 0.17,
"learning_rate": 4.165861513687601e-05,
"loss": 2.0413,
"step": 26900
},
{
"epoch": 0.17,
"learning_rate": 4.1626409017713366e-05,
"loss": 2.0497,
"step": 27000
},
{
"epoch": 0.17,
"learning_rate": 4.1594202898550726e-05,
"loss": 2.0505,
"step": 27100
},
{
"epoch": 0.17,
"learning_rate": 4.1561996779388085e-05,
"loss": 2.0402,
"step": 27200
},
{
"epoch": 0.17,
"learning_rate": 4.1529790660225444e-05,
"loss": 2.0659,
"step": 27300
},
{
"epoch": 0.18,
"learning_rate": 4.1497584541062804e-05,
"loss": 2.0507,
"step": 27400
},
{
"epoch": 0.18,
"learning_rate": 4.146537842190016e-05,
"loss": 2.0612,
"step": 27500
},
{
"epoch": 0.18,
"learning_rate": 4.143317230273752e-05,
"loss": 2.0228,
"step": 27600
},
{
"epoch": 0.18,
"learning_rate": 4.140096618357488e-05,
"loss": 2.0434,
"step": 27700
},
{
"epoch": 0.18,
"learning_rate": 4.136876006441224e-05,
"loss": 2.0631,
"step": 27800
},
{
"epoch": 0.18,
"learning_rate": 4.13365539452496e-05,
"loss": 2.0531,
"step": 27900
},
{
"epoch": 0.18,
"learning_rate": 4.130434782608696e-05,
"loss": 2.053,
"step": 28000
},
{
"epoch": 0.18,
"learning_rate": 4.127214170692432e-05,
"loss": 2.0436,
"step": 28100
},
{
"epoch": 0.18,
"learning_rate": 4.123993558776167e-05,
"loss": 2.0755,
"step": 28200
},
{
"epoch": 0.18,
"learning_rate": 4.120772946859904e-05,
"loss": 2.0592,
"step": 28300
},
{
"epoch": 0.18,
"learning_rate": 4.11755233494364e-05,
"loss": 2.0631,
"step": 28400
},
{
"epoch": 0.18,
"learning_rate": 4.1143317230273756e-05,
"loss": 2.0633,
"step": 28500
},
{
"epoch": 0.18,
"learning_rate": 4.111111111111111e-05,
"loss": 2.0166,
"step": 28600
},
{
"epoch": 0.18,
"learning_rate": 4.1078904991948475e-05,
"loss": 2.0463,
"step": 28700
},
{
"epoch": 0.18,
"learning_rate": 4.1046698872785834e-05,
"loss": 2.0555,
"step": 28800
},
{
"epoch": 0.18,
"learning_rate": 4.101449275362319e-05,
"loss": 2.0368,
"step": 28900
},
{
"epoch": 0.19,
"learning_rate": 4.0982286634460546e-05,
"loss": 2.0295,
"step": 29000
},
{
"epoch": 0.19,
"learning_rate": 4.0950080515297906e-05,
"loss": 2.0385,
"step": 29100
},
{
"epoch": 0.19,
"learning_rate": 4.091787439613527e-05,
"loss": 2.038,
"step": 29200
},
{
"epoch": 0.19,
"learning_rate": 4.0885668276972624e-05,
"loss": 2.0391,
"step": 29300
},
{
"epoch": 0.19,
"learning_rate": 4.0853462157809984e-05,
"loss": 2.0398,
"step": 29400
},
{
"epoch": 0.19,
"learning_rate": 4.082125603864734e-05,
"loss": 2.0588,
"step": 29500
},
{
"epoch": 0.19,
"learning_rate": 4.078904991948471e-05,
"loss": 2.0447,
"step": 29600
},
{
"epoch": 0.19,
"learning_rate": 4.075684380032206e-05,
"loss": 2.0312,
"step": 29700
},
{
"epoch": 0.19,
"learning_rate": 4.072463768115942e-05,
"loss": 2.0428,
"step": 29800
},
{
"epoch": 0.19,
"learning_rate": 4.069243156199678e-05,
"loss": 2.0498,
"step": 29900
},
{
"epoch": 0.19,
"learning_rate": 4.066022544283414e-05,
"loss": 2.0573,
"step": 30000
},
{
"epoch": 0.19,
"learning_rate": 4.06280193236715e-05,
"loss": 2.0617,
"step": 30100
},
{
"epoch": 0.19,
"learning_rate": 4.059581320450886e-05,
"loss": 2.0338,
"step": 30200
},
{
"epoch": 0.19,
"learning_rate": 4.056360708534622e-05,
"loss": 2.039,
"step": 30300
},
{
"epoch": 0.19,
"learning_rate": 4.053140096618358e-05,
"loss": 2.0579,
"step": 30400
},
{
"epoch": 0.2,
"learning_rate": 4.049919484702094e-05,
"loss": 2.0547,
"step": 30500
},
{
"epoch": 0.2,
"learning_rate": 4.0466988727858296e-05,
"loss": 2.0526,
"step": 30600
},
{
"epoch": 0.2,
"learning_rate": 4.0434782608695655e-05,
"loss": 2.0437,
"step": 30700
},
{
"epoch": 0.2,
"learning_rate": 4.0402576489533015e-05,
"loss": 2.0353,
"step": 30800
},
{
"epoch": 0.2,
"learning_rate": 4.0370370370370374e-05,
"loss": 2.0399,
"step": 30900
},
{
"epoch": 0.2,
"learning_rate": 4.0338164251207733e-05,
"loss": 2.0242,
"step": 31000
},
{
"epoch": 0.2,
"learning_rate": 4.030595813204509e-05,
"loss": 2.0385,
"step": 31100
},
{
"epoch": 0.2,
"learning_rate": 4.0273752012882445e-05,
"loss": 2.0427,
"step": 31200
},
{
"epoch": 0.2,
"learning_rate": 4.0241545893719805e-05,
"loss": 2.0513,
"step": 31300
},
{
"epoch": 0.2,
"learning_rate": 4.020933977455717e-05,
"loss": 2.038,
"step": 31400
},
{
"epoch": 0.2,
"learning_rate": 4.017713365539453e-05,
"loss": 2.0211,
"step": 31500
},
{
"epoch": 0.2,
"learning_rate": 4.014492753623188e-05,
"loss": 2.0393,
"step": 31600
},
{
"epoch": 0.2,
"learning_rate": 4.011272141706924e-05,
"loss": 2.0405,
"step": 31700
},
{
"epoch": 0.2,
"learning_rate": 4.008051529790661e-05,
"loss": 2.0193,
"step": 31800
},
{
"epoch": 0.2,
"learning_rate": 4.004830917874397e-05,
"loss": 2.0161,
"step": 31900
},
{
"epoch": 0.2,
"learning_rate": 4.001610305958132e-05,
"loss": 2.0235,
"step": 32000
},
{
"epoch": 0.21,
"learning_rate": 3.998389694041868e-05,
"loss": 2.0371,
"step": 32100
},
{
"epoch": 0.21,
"learning_rate": 3.995169082125604e-05,
"loss": 2.0295,
"step": 32200
},
{
"epoch": 0.21,
"learning_rate": 3.99194847020934e-05,
"loss": 2.0574,
"step": 32300
},
{
"epoch": 0.21,
"learning_rate": 3.988727858293076e-05,
"loss": 2.0239,
"step": 32400
},
{
"epoch": 0.21,
"learning_rate": 3.985507246376812e-05,
"loss": 2.0478,
"step": 32500
},
{
"epoch": 0.21,
"learning_rate": 3.9822866344605476e-05,
"loss": 2.0276,
"step": 32600
},
{
"epoch": 0.21,
"learning_rate": 3.9790660225442836e-05,
"loss": 2.0238,
"step": 32700
},
{
"epoch": 0.21,
"learning_rate": 3.9758454106280195e-05,
"loss": 2.0129,
"step": 32800
},
{
"epoch": 0.21,
"learning_rate": 3.9726247987117554e-05,
"loss": 2.0339,
"step": 32900
},
{
"epoch": 0.21,
"learning_rate": 3.9694041867954914e-05,
"loss": 2.0178,
"step": 33000
},
{
"epoch": 0.21,
"learning_rate": 3.966183574879227e-05,
"loss": 2.0519,
"step": 33100
},
{
"epoch": 0.21,
"learning_rate": 3.962962962962963e-05,
"loss": 2.0347,
"step": 33200
},
{
"epoch": 0.21,
"learning_rate": 3.959742351046699e-05,
"loss": 2.0429,
"step": 33300
},
{
"epoch": 0.21,
"learning_rate": 3.956521739130435e-05,
"loss": 2.0277,
"step": 33400
},
{
"epoch": 0.21,
"learning_rate": 3.9533011272141704e-05,
"loss": 2.0162,
"step": 33500
},
{
"epoch": 0.22,
"learning_rate": 3.950080515297907e-05,
"loss": 2.0362,
"step": 33600
},
{
"epoch": 0.22,
"learning_rate": 3.946859903381643e-05,
"loss": 2.0268,
"step": 33700
},
{
"epoch": 0.22,
"learning_rate": 3.943639291465379e-05,
"loss": 2.0022,
"step": 33800
},
{
"epoch": 0.22,
"learning_rate": 3.940418679549114e-05,
"loss": 2.0161,
"step": 33900
},
{
"epoch": 0.22,
"learning_rate": 3.937198067632851e-05,
"loss": 2.0152,
"step": 34000
},
{
"epoch": 0.22,
"learning_rate": 3.9339774557165867e-05,
"loss": 2.0106,
"step": 34100
},
{
"epoch": 0.22,
"learning_rate": 3.9307568438003226e-05,
"loss": 2.0265,
"step": 34200
},
{
"epoch": 0.22,
"learning_rate": 3.927536231884058e-05,
"loss": 2.0143,
"step": 34300
},
{
"epoch": 0.22,
"learning_rate": 3.924315619967794e-05,
"loss": 2.0104,
"step": 34400
},
{
"epoch": 0.22,
"learning_rate": 3.9210950080515304e-05,
"loss": 2.0278,
"step": 34500
},
{
"epoch": 0.22,
"learning_rate": 3.9178743961352657e-05,
"loss": 2.0259,
"step": 34600
},
{
"epoch": 0.22,
"learning_rate": 3.9146537842190016e-05,
"loss": 2.0321,
"step": 34700
},
{
"epoch": 0.22,
"learning_rate": 3.9114331723027375e-05,
"loss": 2.0104,
"step": 34800
},
{
"epoch": 0.22,
"learning_rate": 3.908212560386474e-05,
"loss": 2.0288,
"step": 34900
},
{
"epoch": 0.22,
"learning_rate": 3.9049919484702094e-05,
"loss": 2.012,
"step": 35000
},
{
"epoch": 0.22,
"learning_rate": 3.901771336553945e-05,
"loss": 2.0373,
"step": 35100
},
{
"epoch": 0.23,
"learning_rate": 3.898550724637681e-05,
"loss": 2.0207,
"step": 35200
},
{
"epoch": 0.23,
"learning_rate": 3.895330112721417e-05,
"loss": 2.0174,
"step": 35300
},
{
"epoch": 0.23,
"learning_rate": 3.892109500805153e-05,
"loss": 2.0443,
"step": 35400
},
{
"epoch": 0.23,
"learning_rate": 3.888888888888889e-05,
"loss": 2.0215,
"step": 35500
},
{
"epoch": 0.23,
"learning_rate": 3.885668276972625e-05,
"loss": 2.023,
"step": 35600
},
{
"epoch": 0.23,
"learning_rate": 3.882447665056361e-05,
"loss": 2.0296,
"step": 35700
},
{
"epoch": 0.23,
"learning_rate": 3.879227053140097e-05,
"loss": 2.0307,
"step": 35800
},
{
"epoch": 0.23,
"learning_rate": 3.876006441223833e-05,
"loss": 2.0259,
"step": 35900
},
{
"epoch": 0.23,
"learning_rate": 3.872785829307569e-05,
"loss": 2.0353,
"step": 36000
},
{
"epoch": 0.23,
"learning_rate": 3.869565217391305e-05,
"loss": 2.0114,
"step": 36100
},
{
"epoch": 0.23,
"learning_rate": 3.86634460547504e-05,
"loss": 2.0143,
"step": 36200
},
{
"epoch": 0.23,
"learning_rate": 3.8631239935587766e-05,
"loss": 2.0187,
"step": 36300
},
{
"epoch": 0.23,
"learning_rate": 3.8599033816425125e-05,
"loss": 2.0262,
"step": 36400
},
{
"epoch": 0.23,
"learning_rate": 3.8566827697262484e-05,
"loss": 2.0139,
"step": 36500
},
{
"epoch": 0.23,
"learning_rate": 3.853462157809984e-05,
"loss": 2.036,
"step": 36600
},
{
"epoch": 0.23,
"learning_rate": 3.85024154589372e-05,
"loss": 2.0187,
"step": 36700
},
{
"epoch": 0.24,
"learning_rate": 3.847020933977456e-05,
"loss": 1.9926,
"step": 36800
},
{
"epoch": 0.24,
"learning_rate": 3.8438003220611915e-05,
"loss": 2.0224,
"step": 36900
},
{
"epoch": 0.24,
"learning_rate": 3.8405797101449274e-05,
"loss": 2.0256,
"step": 37000
},
{
"epoch": 0.24,
"learning_rate": 3.837359098228664e-05,
"loss": 2.0109,
"step": 37100
},
{
"epoch": 0.24,
"learning_rate": 3.8341384863124e-05,
"loss": 2.0035,
"step": 37200
},
{
"epoch": 0.24,
"learning_rate": 3.830917874396135e-05,
"loss": 2.0058,
"step": 37300
},
{
"epoch": 0.24,
"learning_rate": 3.827697262479871e-05,
"loss": 2.023,
"step": 37400
},
{
"epoch": 0.24,
"learning_rate": 3.824476650563607e-05,
"loss": 1.9997,
"step": 37500
},
{
"epoch": 0.24,
"learning_rate": 3.821256038647344e-05,
"loss": 2.0374,
"step": 37600
},
{
"epoch": 0.24,
"learning_rate": 3.818035426731079e-05,
"loss": 2.0109,
"step": 37700
},
{
"epoch": 0.24,
"learning_rate": 3.814814814814815e-05,
"loss": 2.0044,
"step": 37800
},
{
"epoch": 0.24,
"learning_rate": 3.811594202898551e-05,
"loss": 2.003,
"step": 37900
},
{
"epoch": 0.24,
"learning_rate": 3.808373590982287e-05,
"loss": 2.0145,
"step": 38000
},
{
"epoch": 0.24,
"learning_rate": 3.805152979066023e-05,
"loss": 1.9764,
"step": 38100
},
{
"epoch": 0.24,
"learning_rate": 3.8019323671497586e-05,
"loss": 2.0289,
"step": 38200
},
{
"epoch": 0.25,
"learning_rate": 3.7987117552334946e-05,
"loss": 2.0061,
"step": 38300
},
{
"epoch": 0.25,
"learning_rate": 3.7954911433172305e-05,
"loss": 1.9941,
"step": 38400
},
{
"epoch": 0.25,
"learning_rate": 3.7922705314009665e-05,
"loss": 2.0057,
"step": 38500
},
{
"epoch": 0.25,
"learning_rate": 3.7890499194847024e-05,
"loss": 2.0101,
"step": 38600
},
{
"epoch": 0.25,
"learning_rate": 3.785829307568438e-05,
"loss": 1.993,
"step": 38700
},
{
"epoch": 0.25,
"learning_rate": 3.7826086956521736e-05,
"loss": 1.9947,
"step": 38800
},
{
"epoch": 0.25,
"learning_rate": 3.77938808373591e-05,
"loss": 2.0238,
"step": 38900
},
{
"epoch": 0.25,
"learning_rate": 3.776167471819646e-05,
"loss": 1.9974,
"step": 39000
},
{
"epoch": 0.25,
"learning_rate": 3.772946859903382e-05,
"loss": 1.9928,
"step": 39100
},
{
"epoch": 0.25,
"learning_rate": 3.769726247987117e-05,
"loss": 1.9883,
"step": 39200
},
{
"epoch": 0.25,
"learning_rate": 3.766505636070853e-05,
"loss": 2.0022,
"step": 39300
},
{
"epoch": 0.25,
"learning_rate": 3.76328502415459e-05,
"loss": 2.0006,
"step": 39400
},
{
"epoch": 0.25,
"learning_rate": 3.760064412238326e-05,
"loss": 2.02,
"step": 39500
},
{
"epoch": 0.25,
"learning_rate": 3.756843800322061e-05,
"loss": 1.9845,
"step": 39600
},
{
"epoch": 0.25,
"learning_rate": 3.753623188405797e-05,
"loss": 1.9716,
"step": 39700
},
{
"epoch": 0.25,
"learning_rate": 3.7504025764895336e-05,
"loss": 2.0128,
"step": 39800
},
{
"epoch": 0.26,
"learning_rate": 3.747181964573269e-05,
"loss": 2.0099,
"step": 39900
},
{
"epoch": 0.26,
"learning_rate": 3.743961352657005e-05,
"loss": 2.0011,
"step": 40000
},
{
"epoch": 0.26,
"learning_rate": 3.740740740740741e-05,
"loss": 1.9956,
"step": 40100
},
{
"epoch": 0.26,
"learning_rate": 3.737520128824477e-05,
"loss": 2.0174,
"step": 40200
},
{
"epoch": 0.26,
"learning_rate": 3.7342995169082126e-05,
"loss": 1.9974,
"step": 40300
},
{
"epoch": 0.26,
"learning_rate": 3.7310789049919485e-05,
"loss": 1.9825,
"step": 40400
},
{
"epoch": 0.26,
"learning_rate": 3.7278582930756845e-05,
"loss": 2.0052,
"step": 40500
},
{
"epoch": 0.26,
"learning_rate": 3.7246376811594204e-05,
"loss": 1.9997,
"step": 40600
},
{
"epoch": 0.26,
"learning_rate": 3.7214170692431564e-05,
"loss": 2.0007,
"step": 40700
},
{
"epoch": 0.26,
"learning_rate": 3.718196457326892e-05,
"loss": 2.0167,
"step": 40800
},
{
"epoch": 0.26,
"learning_rate": 3.714975845410628e-05,
"loss": 2.03,
"step": 40900
},
{
"epoch": 0.26,
"learning_rate": 3.711755233494364e-05,
"loss": 1.994,
"step": 41000
},
{
"epoch": 0.26,
"learning_rate": 3.7085346215781e-05,
"loss": 1.9912,
"step": 41100
},
{
"epoch": 0.26,
"learning_rate": 3.705314009661836e-05,
"loss": 1.9792,
"step": 41200
},
{
"epoch": 0.26,
"learning_rate": 3.702093397745572e-05,
"loss": 2.0012,
"step": 41300
},
{
"epoch": 0.26,
"learning_rate": 3.698872785829308e-05,
"loss": 1.9955,
"step": 41400
},
{
"epoch": 0.27,
"learning_rate": 3.695652173913043e-05,
"loss": 2.0262,
"step": 41500
},
{
"epoch": 0.27,
"learning_rate": 3.69243156199678e-05,
"loss": 1.9885,
"step": 41600
},
{
"epoch": 0.27,
"learning_rate": 3.689210950080516e-05,
"loss": 1.9776,
"step": 41700
},
{
"epoch": 0.27,
"learning_rate": 3.6859903381642516e-05,
"loss": 1.9886,
"step": 41800
},
{
"epoch": 0.27,
"learning_rate": 3.682769726247987e-05,
"loss": 2.0042,
"step": 41900
},
{
"epoch": 0.27,
"learning_rate": 3.6795491143317235e-05,
"loss": 1.9826,
"step": 42000
},
{
"epoch": 0.27,
"learning_rate": 3.6763285024154594e-05,
"loss": 1.9953,
"step": 42100
},
{
"epoch": 0.27,
"learning_rate": 3.673107890499195e-05,
"loss": 1.995,
"step": 42200
},
{
"epoch": 0.27,
"learning_rate": 3.6698872785829306e-05,
"loss": 2.0054,
"step": 42300
},
{
"epoch": 0.27,
"learning_rate": 3.6666666666666666e-05,
"loss": 1.9883,
"step": 42400
},
{
"epoch": 0.27,
"learning_rate": 3.663446054750403e-05,
"loss": 2.0007,
"step": 42500
},
{
"epoch": 0.27,
"learning_rate": 3.6602254428341384e-05,
"loss": 1.9976,
"step": 42600
},
{
"epoch": 0.27,
"learning_rate": 3.6570048309178744e-05,
"loss": 1.9954,
"step": 42700
},
{
"epoch": 0.27,
"learning_rate": 3.65378421900161e-05,
"loss": 1.9918,
"step": 42800
},
{
"epoch": 0.27,
"learning_rate": 3.650563607085347e-05,
"loss": 1.9846,
"step": 42900
},
{
"epoch": 0.28,
"learning_rate": 3.647342995169082e-05,
"loss": 1.9958,
"step": 43000
},
{
"epoch": 0.28,
"learning_rate": 3.644122383252818e-05,
"loss": 1.9655,
"step": 43100
},
{
"epoch": 0.28,
"learning_rate": 3.640901771336554e-05,
"loss": 1.9889,
"step": 43200
},
{
"epoch": 0.28,
"learning_rate": 3.63768115942029e-05,
"loss": 1.9895,
"step": 43300
},
{
"epoch": 0.28,
"learning_rate": 3.634460547504026e-05,
"loss": 1.9816,
"step": 43400
},
{
"epoch": 0.28,
"learning_rate": 3.631239935587762e-05,
"loss": 1.9794,
"step": 43500
},
{
"epoch": 0.28,
"learning_rate": 3.628019323671498e-05,
"loss": 1.9658,
"step": 43600
},
{
"epoch": 0.28,
"learning_rate": 3.624798711755234e-05,
"loss": 2.002,
"step": 43700
},
{
"epoch": 0.28,
"learning_rate": 3.62157809983897e-05,
"loss": 1.9891,
"step": 43800
},
{
"epoch": 0.28,
"learning_rate": 3.6183574879227056e-05,
"loss": 1.9774,
"step": 43900
},
{
"epoch": 0.28,
"learning_rate": 3.6151368760064415e-05,
"loss": 1.9628,
"step": 44000
},
{
"epoch": 0.28,
"learning_rate": 3.6119162640901775e-05,
"loss": 1.9798,
"step": 44100
},
{
"epoch": 0.28,
"learning_rate": 3.6086956521739134e-05,
"loss": 1.9707,
"step": 44200
},
{
"epoch": 0.28,
"learning_rate": 3.6054750402576493e-05,
"loss": 1.9781,
"step": 44300
},
{
"epoch": 0.28,
"learning_rate": 3.602254428341385e-05,
"loss": 1.9823,
"step": 44400
},
{
"epoch": 0.28,
"learning_rate": 3.5990338164251205e-05,
"loss": 1.9717,
"step": 44500
},
{
"epoch": 0.29,
"learning_rate": 3.5958132045088565e-05,
"loss": 1.9937,
"step": 44600
},
{
"epoch": 0.29,
"learning_rate": 3.592592592592593e-05,
"loss": 1.9767,
"step": 44700
},
{
"epoch": 0.29,
"learning_rate": 3.589371980676329e-05,
"loss": 1.9785,
"step": 44800
},
{
"epoch": 0.29,
"learning_rate": 3.586151368760064e-05,
"loss": 2.0002,
"step": 44900
},
{
"epoch": 0.29,
"learning_rate": 3.5829307568438e-05,
"loss": 1.9661,
"step": 45000
},
{
"epoch": 0.29,
"learning_rate": 3.579710144927537e-05,
"loss": 1.9772,
"step": 45100
},
{
"epoch": 0.29,
"learning_rate": 3.576489533011273e-05,
"loss": 1.9909,
"step": 45200
},
{
"epoch": 0.29,
"learning_rate": 3.573268921095008e-05,
"loss": 1.9809,
"step": 45300
},
{
"epoch": 0.29,
"learning_rate": 3.570048309178744e-05,
"loss": 1.9754,
"step": 45400
},
{
"epoch": 0.29,
"learning_rate": 3.56682769726248e-05,
"loss": 1.994,
"step": 45500
},
{
"epoch": 0.29,
"learning_rate": 3.563607085346216e-05,
"loss": 1.9727,
"step": 45600
},
{
"epoch": 0.29,
"learning_rate": 3.560386473429952e-05,
"loss": 1.9875,
"step": 45700
},
{
"epoch": 0.29,
"learning_rate": 3.557165861513688e-05,
"loss": 1.9731,
"step": 45800
},
{
"epoch": 0.29,
"learning_rate": 3.5539452495974236e-05,
"loss": 1.968,
"step": 45900
},
{
"epoch": 0.29,
"learning_rate": 3.5507246376811596e-05,
"loss": 1.9526,
"step": 46000
},
{
"epoch": 0.3,
"learning_rate": 3.5475040257648955e-05,
"loss": 1.9979,
"step": 46100
},
{
"epoch": 0.3,
"learning_rate": 3.5442834138486314e-05,
"loss": 1.976,
"step": 46200
},
{
"epoch": 0.3,
"learning_rate": 3.5410628019323674e-05,
"loss": 1.966,
"step": 46300
},
{
"epoch": 0.3,
"learning_rate": 3.5378421900161026e-05,
"loss": 1.979,
"step": 46400
},
{
"epoch": 0.3,
"learning_rate": 3.534621578099839e-05,
"loss": 1.9783,
"step": 46500
},
{
"epoch": 0.3,
"learning_rate": 3.531400966183575e-05,
"loss": 1.9967,
"step": 46600
},
{
"epoch": 0.3,
"learning_rate": 3.528180354267311e-05,
"loss": 1.9561,
"step": 46700
},
{
"epoch": 0.3,
"learning_rate": 3.5249597423510464e-05,
"loss": 1.997,
"step": 46800
},
{
"epoch": 0.3,
"learning_rate": 3.521739130434783e-05,
"loss": 1.9694,
"step": 46900
},
{
"epoch": 0.3,
"learning_rate": 3.518518518518519e-05,
"loss": 1.9858,
"step": 47000
},
{
"epoch": 0.3,
"learning_rate": 3.515297906602255e-05,
"loss": 1.9802,
"step": 47100
},
{
"epoch": 0.3,
"learning_rate": 3.51207729468599e-05,
"loss": 1.9954,
"step": 47200
},
{
"epoch": 0.3,
"learning_rate": 3.508856682769727e-05,
"loss": 1.9718,
"step": 47300
},
{
"epoch": 0.3,
"learning_rate": 3.5056360708534627e-05,
"loss": 1.976,
"step": 47400
},
{
"epoch": 0.3,
"learning_rate": 3.502415458937198e-05,
"loss": 1.9801,
"step": 47500
},
{
"epoch": 0.3,
"learning_rate": 3.499194847020934e-05,
"loss": 1.9575,
"step": 47600
},
{
"epoch": 0.31,
"learning_rate": 3.49597423510467e-05,
"loss": 1.9845,
"step": 47700
},
{
"epoch": 0.31,
"learning_rate": 3.4927536231884064e-05,
"loss": 1.9366,
"step": 47800
},
{
"epoch": 0.31,
"learning_rate": 3.4895330112721417e-05,
"loss": 1.9785,
"step": 47900
},
{
"epoch": 0.31,
"learning_rate": 3.4863123993558776e-05,
"loss": 1.9794,
"step": 48000
},
{
"epoch": 0.31,
"learning_rate": 3.4830917874396135e-05,
"loss": 1.9734,
"step": 48100
},
{
"epoch": 0.31,
"learning_rate": 3.47987117552335e-05,
"loss": 1.9727,
"step": 48200
},
{
"epoch": 0.31,
"learning_rate": 3.4766505636070854e-05,
"loss": 1.9722,
"step": 48300
},
{
"epoch": 0.31,
"learning_rate": 3.473429951690821e-05,
"loss": 1.9618,
"step": 48400
},
{
"epoch": 0.31,
"learning_rate": 3.470209339774557e-05,
"loss": 1.9739,
"step": 48500
},
{
"epoch": 0.31,
"learning_rate": 3.466988727858293e-05,
"loss": 1.9646,
"step": 48600
},
{
"epoch": 0.31,
"learning_rate": 3.463768115942029e-05,
"loss": 1.9779,
"step": 48700
},
{
"epoch": 0.31,
"learning_rate": 3.460547504025765e-05,
"loss": 1.9951,
"step": 48800
},
{
"epoch": 0.31,
"learning_rate": 3.457326892109501e-05,
"loss": 1.9665,
"step": 48900
},
{
"epoch": 0.31,
"learning_rate": 3.454106280193237e-05,
"loss": 1.9667,
"step": 49000
},
{
"epoch": 0.31,
"learning_rate": 3.450885668276973e-05,
"loss": 1.9688,
"step": 49100
},
{
"epoch": 0.31,
"learning_rate": 3.447665056360709e-05,
"loss": 1.9374,
"step": 49200
},
{
"epoch": 0.32,
"learning_rate": 3.444444444444445e-05,
"loss": 1.978,
"step": 49300
},
{
"epoch": 0.32,
"learning_rate": 3.441223832528181e-05,
"loss": 1.9797,
"step": 49400
},
{
"epoch": 0.32,
"learning_rate": 3.438003220611916e-05,
"loss": 1.9589,
"step": 49500
},
{
"epoch": 0.32,
"learning_rate": 3.4347826086956526e-05,
"loss": 1.9625,
"step": 49600
},
{
"epoch": 0.32,
"learning_rate": 3.4315619967793885e-05,
"loss": 1.9596,
"step": 49700
},
{
"epoch": 0.32,
"learning_rate": 3.428341384863124e-05,
"loss": 1.9727,
"step": 49800
},
{
"epoch": 0.32,
"learning_rate": 3.42512077294686e-05,
"loss": 1.9705,
"step": 49900
},
{
"epoch": 0.32,
"learning_rate": 3.421900161030596e-05,
"loss": 1.9671,
"step": 50000
},
{
"epoch": 0.32,
"learning_rate": 3.418679549114332e-05,
"loss": 1.976,
"step": 50100
},
{
"epoch": 0.32,
"learning_rate": 3.4154589371980675e-05,
"loss": 1.9585,
"step": 50200
},
{
"epoch": 0.32,
"learning_rate": 3.4122383252818034e-05,
"loss": 1.9707,
"step": 50300
},
{
"epoch": 0.32,
"learning_rate": 3.40901771336554e-05,
"loss": 1.9853,
"step": 50400
},
{
"epoch": 0.32,
"learning_rate": 3.405797101449276e-05,
"loss": 1.993,
"step": 50500
},
{
"epoch": 0.32,
"learning_rate": 3.402576489533011e-05,
"loss": 1.9492,
"step": 50600
},
{
"epoch": 0.32,
"learning_rate": 3.399355877616747e-05,
"loss": 1.9813,
"step": 50700
},
{
"epoch": 0.33,
"learning_rate": 3.396135265700483e-05,
"loss": 1.9535,
"step": 50800
},
{
"epoch": 0.33,
"learning_rate": 3.392914653784219e-05,
"loss": 1.9755,
"step": 50900
},
{
"epoch": 0.33,
"learning_rate": 3.389694041867955e-05,
"loss": 1.9432,
"step": 51000
},
{
"epoch": 0.33,
"learning_rate": 3.386473429951691e-05,
"loss": 1.9511,
"step": 51100
},
{
"epoch": 0.33,
"learning_rate": 3.383252818035427e-05,
"loss": 1.9584,
"step": 51200
},
{
"epoch": 0.33,
"learning_rate": 3.380032206119163e-05,
"loss": 1.9639,
"step": 51300
},
{
"epoch": 0.33,
"learning_rate": 3.376811594202899e-05,
"loss": 1.976,
"step": 51400
},
{
"epoch": 0.33,
"learning_rate": 3.3735909822866346e-05,
"loss": 1.9684,
"step": 51500
},
{
"epoch": 0.33,
"learning_rate": 3.3703703703703706e-05,
"loss": 1.9721,
"step": 51600
},
{
"epoch": 0.33,
"learning_rate": 3.3671497584541065e-05,
"loss": 1.9649,
"step": 51700
},
{
"epoch": 0.33,
"learning_rate": 3.3639291465378424e-05,
"loss": 1.9903,
"step": 51800
},
{
"epoch": 0.33,
"learning_rate": 3.3607085346215784e-05,
"loss": 1.9573,
"step": 51900
},
{
"epoch": 0.33,
"learning_rate": 3.357487922705314e-05,
"loss": 1.9473,
"step": 52000
},
{
"epoch": 0.33,
"learning_rate": 3.3542673107890496e-05,
"loss": 1.9602,
"step": 52100
},
{
"epoch": 0.33,
"learning_rate": 3.351046698872786e-05,
"loss": 1.9503,
"step": 52200
},
{
"epoch": 0.33,
"learning_rate": 3.347826086956522e-05,
"loss": 1.9499,
"step": 52300
},
{
"epoch": 0.34,
"learning_rate": 3.344605475040258e-05,
"loss": 1.9534,
"step": 52400
},
{
"epoch": 0.34,
"learning_rate": 3.341384863123993e-05,
"loss": 1.9454,
"step": 52500
},
{
"epoch": 0.34,
"learning_rate": 3.338164251207729e-05,
"loss": 1.975,
"step": 52600
},
{
"epoch": 0.34,
"learning_rate": 3.334943639291466e-05,
"loss": 1.9264,
"step": 52700
},
{
"epoch": 0.34,
"learning_rate": 3.331723027375202e-05,
"loss": 1.9511,
"step": 52800
},
{
"epoch": 0.34,
"learning_rate": 3.328502415458937e-05,
"loss": 1.9429,
"step": 52900
},
{
"epoch": 0.34,
"learning_rate": 3.325281803542673e-05,
"loss": 1.967,
"step": 53000
},
{
"epoch": 0.34,
"learning_rate": 3.3220611916264096e-05,
"loss": 1.9603,
"step": 53100
},
{
"epoch": 0.34,
"learning_rate": 3.318840579710145e-05,
"loss": 1.9292,
"step": 53200
},
{
"epoch": 0.34,
"learning_rate": 3.315619967793881e-05,
"loss": 1.994,
"step": 53300
},
{
"epoch": 0.34,
"learning_rate": 3.312399355877617e-05,
"loss": 1.9592,
"step": 53400
},
{
"epoch": 0.34,
"learning_rate": 3.3091787439613533e-05,
"loss": 1.9562,
"step": 53500
},
{
"epoch": 0.34,
"learning_rate": 3.3059581320450886e-05,
"loss": 1.959,
"step": 53600
},
{
"epoch": 0.34,
"learning_rate": 3.3027375201288245e-05,
"loss": 1.956,
"step": 53700
},
{
"epoch": 0.34,
"learning_rate": 3.2995169082125605e-05,
"loss": 1.9536,
"step": 53800
},
{
"epoch": 0.34,
"learning_rate": 3.2962962962962964e-05,
"loss": 1.9747,
"step": 53900
},
{
"epoch": 0.35,
"learning_rate": 3.2930756843800323e-05,
"loss": 1.9402,
"step": 54000
},
{
"epoch": 0.35,
"learning_rate": 3.289855072463768e-05,
"loss": 1.9568,
"step": 54100
},
{
"epoch": 0.35,
"learning_rate": 3.286634460547504e-05,
"loss": 1.9466,
"step": 54200
},
{
"epoch": 0.35,
"learning_rate": 3.28341384863124e-05,
"loss": 1.981,
"step": 54300
},
{
"epoch": 0.35,
"learning_rate": 3.280193236714976e-05,
"loss": 1.9781,
"step": 54400
},
{
"epoch": 0.35,
"learning_rate": 3.276972624798712e-05,
"loss": 1.957,
"step": 54500
},
{
"epoch": 0.35,
"learning_rate": 3.273752012882448e-05,
"loss": 1.9648,
"step": 54600
},
{
"epoch": 0.35,
"learning_rate": 3.270531400966184e-05,
"loss": 1.9625,
"step": 54700
},
{
"epoch": 0.35,
"learning_rate": 3.267310789049919e-05,
"loss": 1.9251,
"step": 54800
},
{
"epoch": 0.35,
"learning_rate": 3.264090177133656e-05,
"loss": 1.9373,
"step": 54900
},
{
"epoch": 0.35,
"learning_rate": 3.260869565217392e-05,
"loss": 1.9801,
"step": 55000
},
{
"epoch": 0.35,
"learning_rate": 3.2576489533011276e-05,
"loss": 1.9333,
"step": 55100
},
{
"epoch": 0.35,
"learning_rate": 3.254428341384863e-05,
"loss": 1.9513,
"step": 55200
},
{
"epoch": 0.35,
"learning_rate": 3.2512077294685995e-05,
"loss": 1.9765,
"step": 55300
},
{
"epoch": 0.35,
"learning_rate": 3.2479871175523354e-05,
"loss": 1.9478,
"step": 55400
},
{
"epoch": 0.36,
"learning_rate": 3.244766505636071e-05,
"loss": 1.9439,
"step": 55500
},
{
"epoch": 0.36,
"learning_rate": 3.2415458937198066e-05,
"loss": 1.9383,
"step": 55600
},
{
"epoch": 0.36,
"learning_rate": 3.2383252818035426e-05,
"loss": 1.9317,
"step": 55700
},
{
"epoch": 0.36,
"learning_rate": 3.235104669887279e-05,
"loss": 1.9519,
"step": 55800
},
{
"epoch": 0.36,
"learning_rate": 3.2318840579710144e-05,
"loss": 1.9457,
"step": 55900
},
{
"epoch": 0.36,
"learning_rate": 3.2286634460547504e-05,
"loss": 1.9399,
"step": 56000
},
{
"epoch": 0.36,
"learning_rate": 3.225442834138486e-05,
"loss": 1.9469,
"step": 56100
},
{
"epoch": 0.36,
"learning_rate": 3.222222222222223e-05,
"loss": 1.9438,
"step": 56200
},
{
"epoch": 0.36,
"learning_rate": 3.219001610305958e-05,
"loss": 1.9306,
"step": 56300
},
{
"epoch": 0.36,
"learning_rate": 3.215780998389694e-05,
"loss": 1.9333,
"step": 56400
},
{
"epoch": 0.36,
"learning_rate": 3.21256038647343e-05,
"loss": 1.9409,
"step": 56500
},
{
"epoch": 0.36,
"learning_rate": 3.209339774557166e-05,
"loss": 1.9557,
"step": 56600
},
{
"epoch": 0.36,
"learning_rate": 3.206119162640902e-05,
"loss": 1.9562,
"step": 56700
},
{
"epoch": 0.36,
"learning_rate": 3.202898550724638e-05,
"loss": 1.951,
"step": 56800
},
{
"epoch": 0.36,
"learning_rate": 3.199677938808374e-05,
"loss": 1.9563,
"step": 56900
},
{
"epoch": 0.36,
"learning_rate": 3.19645732689211e-05,
"loss": 1.9489,
"step": 57000
},
{
"epoch": 0.37,
"learning_rate": 3.1932367149758457e-05,
"loss": 1.971,
"step": 57100
},
{
"epoch": 0.37,
"learning_rate": 3.1900161030595816e-05,
"loss": 1.9397,
"step": 57200
},
{
"epoch": 0.37,
"learning_rate": 3.1867954911433175e-05,
"loss": 1.9485,
"step": 57300
},
{
"epoch": 0.37,
"learning_rate": 3.183574879227053e-05,
"loss": 1.9503,
"step": 57400
},
{
"epoch": 0.37,
"learning_rate": 3.1803542673107894e-05,
"loss": 1.9544,
"step": 57500
},
{
"epoch": 0.37,
"learning_rate": 3.177133655394525e-05,
"loss": 1.9473,
"step": 57600
},
{
"epoch": 0.37,
"learning_rate": 3.173913043478261e-05,
"loss": 1.9642,
"step": 57700
},
{
"epoch": 0.37,
"learning_rate": 3.1706924315619965e-05,
"loss": 1.9705,
"step": 57800
},
{
"epoch": 0.37,
"learning_rate": 3.1674718196457325e-05,
"loss": 1.9627,
"step": 57900
},
{
"epoch": 0.37,
"learning_rate": 3.164251207729469e-05,
"loss": 1.9428,
"step": 58000
},
{
"epoch": 0.37,
"learning_rate": 3.161030595813205e-05,
"loss": 1.9467,
"step": 58100
},
{
"epoch": 0.37,
"learning_rate": 3.15780998389694e-05,
"loss": 1.96,
"step": 58200
},
{
"epoch": 0.37,
"learning_rate": 3.154589371980676e-05,
"loss": 1.9546,
"step": 58300
},
{
"epoch": 0.37,
"learning_rate": 3.151368760064413e-05,
"loss": 1.946,
"step": 58400
},
{
"epoch": 0.37,
"learning_rate": 3.148148148148148e-05,
"loss": 1.9281,
"step": 58500
},
{
"epoch": 0.38,
"learning_rate": 3.144927536231884e-05,
"loss": 1.9535,
"step": 58600
},
{
"epoch": 0.38,
"learning_rate": 3.14170692431562e-05,
"loss": 1.9442,
"step": 58700
},
{
"epoch": 0.38,
"learning_rate": 3.138486312399356e-05,
"loss": 1.9276,
"step": 58800
},
{
"epoch": 0.38,
"learning_rate": 3.135265700483092e-05,
"loss": 1.9386,
"step": 58900
},
{
"epoch": 0.38,
"learning_rate": 3.132045088566828e-05,
"loss": 1.9197,
"step": 59000
},
{
"epoch": 0.38,
"learning_rate": 3.128824476650564e-05,
"loss": 1.9645,
"step": 59100
},
{
"epoch": 0.38,
"learning_rate": 3.1256038647342996e-05,
"loss": 1.9181,
"step": 59200
},
{
"epoch": 0.38,
"learning_rate": 3.1223832528180356e-05,
"loss": 1.9337,
"step": 59300
},
{
"epoch": 0.38,
"learning_rate": 3.1191626409017715e-05,
"loss": 1.9462,
"step": 59400
},
{
"epoch": 0.38,
"learning_rate": 3.1159420289855074e-05,
"loss": 1.9251,
"step": 59500
},
{
"epoch": 0.38,
"learning_rate": 3.1127214170692434e-05,
"loss": 1.9374,
"step": 59600
},
{
"epoch": 0.38,
"learning_rate": 3.109500805152979e-05,
"loss": 1.9468,
"step": 59700
},
{
"epoch": 0.38,
"learning_rate": 3.106280193236715e-05,
"loss": 1.9274,
"step": 59800
},
{
"epoch": 0.38,
"learning_rate": 3.103059581320451e-05,
"loss": 1.9429,
"step": 59900
},
{
"epoch": 0.38,
"learning_rate": 3.099838969404187e-05,
"loss": 1.9358,
"step": 60000
},
{
"epoch": 0.38,
"learning_rate": 3.0966183574879224e-05,
"loss": 1.9436,
"step": 60100
},
{
"epoch": 0.39,
"learning_rate": 3.093397745571659e-05,
"loss": 1.9332,
"step": 60200
},
{
"epoch": 0.39,
"learning_rate": 3.090177133655395e-05,
"loss": 1.9375,
"step": 60300
},
{
"epoch": 0.39,
"learning_rate": 3.086956521739131e-05,
"loss": 1.9259,
"step": 60400
},
{
"epoch": 0.39,
"learning_rate": 3.083735909822866e-05,
"loss": 1.9103,
"step": 60500
},
{
"epoch": 0.39,
"learning_rate": 3.080515297906603e-05,
"loss": 1.9387,
"step": 60600
},
{
"epoch": 0.39,
"learning_rate": 3.0772946859903386e-05,
"loss": 1.9235,
"step": 60700
},
{
"epoch": 0.39,
"learning_rate": 3.074074074074074e-05,
"loss": 1.9397,
"step": 60800
},
{
"epoch": 0.39,
"learning_rate": 3.07085346215781e-05,
"loss": 1.9448,
"step": 60900
},
{
"epoch": 0.39,
"learning_rate": 3.067632850241546e-05,
"loss": 1.9346,
"step": 61000
},
{
"epoch": 0.39,
"learning_rate": 3.0644122383252824e-05,
"loss": 1.9308,
"step": 61100
},
{
"epoch": 0.39,
"learning_rate": 3.0611916264090176e-05,
"loss": 1.9595,
"step": 61200
},
{
"epoch": 0.39,
"learning_rate": 3.0579710144927536e-05,
"loss": 1.9172,
"step": 61300
},
{
"epoch": 0.39,
"learning_rate": 3.0547504025764895e-05,
"loss": 1.9298,
"step": 61400
},
{
"epoch": 0.39,
"learning_rate": 3.0515297906602258e-05,
"loss": 1.9273,
"step": 61500
},
{
"epoch": 0.39,
"learning_rate": 3.0483091787439617e-05,
"loss": 1.9707,
"step": 61600
},
{
"epoch": 0.39,
"learning_rate": 3.0450885668276973e-05,
"loss": 1.9303,
"step": 61700
},
{
"epoch": 0.4,
"learning_rate": 3.0418679549114333e-05,
"loss": 1.9242,
"step": 61800
},
{
"epoch": 0.4,
"learning_rate": 3.038647342995169e-05,
"loss": 1.9348,
"step": 61900
},
{
"epoch": 0.4,
"learning_rate": 3.035426731078905e-05,
"loss": 1.9187,
"step": 62000
},
{
"epoch": 0.4,
"learning_rate": 3.032206119162641e-05,
"loss": 1.9196,
"step": 62100
},
{
"epoch": 0.4,
"learning_rate": 3.028985507246377e-05,
"loss": 1.9336,
"step": 62200
},
{
"epoch": 0.4,
"learning_rate": 3.0257648953301126e-05,
"loss": 1.9254,
"step": 62300
},
{
"epoch": 0.4,
"learning_rate": 3.022544283413849e-05,
"loss": 1.9535,
"step": 62400
},
{
"epoch": 0.4,
"learning_rate": 3.0193236714975848e-05,
"loss": 1.9546,
"step": 62500
},
{
"epoch": 0.4,
"learning_rate": 3.0161030595813204e-05,
"loss": 1.9615,
"step": 62600
},
{
"epoch": 0.4,
"learning_rate": 3.0128824476650563e-05,
"loss": 1.9511,
"step": 62700
},
{
"epoch": 0.4,
"learning_rate": 3.0096618357487926e-05,
"loss": 1.9144,
"step": 62800
},
{
"epoch": 0.4,
"learning_rate": 3.0064412238325285e-05,
"loss": 1.9207,
"step": 62900
},
{
"epoch": 0.4,
"learning_rate": 3.003220611916264e-05,
"loss": 1.9562,
"step": 63000
},
{
"epoch": 0.4,
"learning_rate": 3e-05,
"loss": 1.9324,
"step": 63100
},
{
"epoch": 0.4,
"learning_rate": 2.9967793880837357e-05,
"loss": 1.9146,
"step": 63200
},
{
"epoch": 0.41,
"learning_rate": 2.9935587761674723e-05,
"loss": 1.9447,
"step": 63300
},
{
"epoch": 0.41,
"learning_rate": 2.990338164251208e-05,
"loss": 1.9364,
"step": 63400
},
{
"epoch": 0.41,
"learning_rate": 2.9871175523349438e-05,
"loss": 1.9225,
"step": 63500
},
{
"epoch": 0.41,
"learning_rate": 2.9838969404186794e-05,
"loss": 1.934,
"step": 63600
},
{
"epoch": 0.41,
"learning_rate": 2.9806763285024157e-05,
"loss": 1.9239,
"step": 63700
},
{
"epoch": 0.41,
"learning_rate": 2.9774557165861516e-05,
"loss": 1.9313,
"step": 63800
},
{
"epoch": 0.41,
"learning_rate": 2.9742351046698876e-05,
"loss": 1.9325,
"step": 63900
},
{
"epoch": 0.41,
"learning_rate": 2.971014492753623e-05,
"loss": 1.93,
"step": 64000
},
{
"epoch": 0.41,
"learning_rate": 2.967793880837359e-05,
"loss": 1.961,
"step": 64100
},
{
"epoch": 0.41,
"learning_rate": 2.9645732689210954e-05,
"loss": 1.919,
"step": 64200
},
{
"epoch": 0.41,
"learning_rate": 2.961352657004831e-05,
"loss": 1.9257,
"step": 64300
},
{
"epoch": 0.41,
"learning_rate": 2.958132045088567e-05,
"loss": 1.9205,
"step": 64400
},
{
"epoch": 0.41,
"learning_rate": 2.9549114331723028e-05,
"loss": 1.9151,
"step": 64500
},
{
"epoch": 0.41,
"learning_rate": 2.951690821256039e-05,
"loss": 1.9203,
"step": 64600
},
{
"epoch": 0.41,
"learning_rate": 2.9484702093397747e-05,
"loss": 1.9544,
"step": 64700
},
{
"epoch": 0.41,
"learning_rate": 2.9452495974235106e-05,
"loss": 1.9393,
"step": 64800
},
{
"epoch": 0.42,
"learning_rate": 2.9420289855072462e-05,
"loss": 1.9239,
"step": 64900
},
{
"epoch": 0.42,
"learning_rate": 2.938808373590982e-05,
"loss": 1.9339,
"step": 65000
},
{
"epoch": 0.42,
"learning_rate": 2.9355877616747184e-05,
"loss": 1.9312,
"step": 65100
},
{
"epoch": 0.42,
"learning_rate": 2.9323671497584544e-05,
"loss": 1.9491,
"step": 65200
},
{
"epoch": 0.42,
"learning_rate": 2.92914653784219e-05,
"loss": 1.9457,
"step": 65300
},
{
"epoch": 0.42,
"learning_rate": 2.925925925925926e-05,
"loss": 1.9449,
"step": 65400
},
{
"epoch": 0.42,
"learning_rate": 2.9227053140096622e-05,
"loss": 1.9569,
"step": 65500
},
{
"epoch": 0.42,
"learning_rate": 2.919484702093398e-05,
"loss": 1.9168,
"step": 65600
},
{
"epoch": 0.42,
"learning_rate": 2.9162640901771337e-05,
"loss": 1.9235,
"step": 65700
},
{
"epoch": 0.42,
"learning_rate": 2.9130434782608696e-05,
"loss": 1.9236,
"step": 65800
},
{
"epoch": 0.42,
"learning_rate": 2.909822866344606e-05,
"loss": 1.9038,
"step": 65900
},
{
"epoch": 0.42,
"learning_rate": 2.9066022544283415e-05,
"loss": 1.9669,
"step": 66000
},
{
"epoch": 0.42,
"learning_rate": 2.9033816425120775e-05,
"loss": 1.9415,
"step": 66100
},
{
"epoch": 0.42,
"learning_rate": 2.9001610305958134e-05,
"loss": 1.9418,
"step": 66200
},
{
"epoch": 0.42,
"learning_rate": 2.896940418679549e-05,
"loss": 1.9209,
"step": 66300
},
{
"epoch": 0.42,
"learning_rate": 2.8937198067632853e-05,
"loss": 1.9262,
"step": 66400
},
{
"epoch": 0.43,
"learning_rate": 2.8904991948470212e-05,
"loss": 1.9387,
"step": 66500
},
{
"epoch": 0.43,
"learning_rate": 2.8872785829307568e-05,
"loss": 1.9561,
"step": 66600
},
{
"epoch": 0.43,
"learning_rate": 2.8840579710144927e-05,
"loss": 1.9161,
"step": 66700
},
{
"epoch": 0.43,
"learning_rate": 2.880837359098229e-05,
"loss": 1.9364,
"step": 66800
},
{
"epoch": 0.43,
"learning_rate": 2.877616747181965e-05,
"loss": 1.9286,
"step": 66900
},
{
"epoch": 0.43,
"learning_rate": 2.8743961352657005e-05,
"loss": 1.9174,
"step": 67000
},
{
"epoch": 0.43,
"learning_rate": 2.8711755233494365e-05,
"loss": 1.9305,
"step": 67100
},
{
"epoch": 0.43,
"learning_rate": 2.867954911433172e-05,
"loss": 1.9169,
"step": 67200
},
{
"epoch": 0.43,
"learning_rate": 2.8647342995169087e-05,
"loss": 1.9348,
"step": 67300
},
{
"epoch": 0.43,
"learning_rate": 2.8615136876006443e-05,
"loss": 1.9234,
"step": 67400
},
{
"epoch": 0.43,
"learning_rate": 2.8582930756843802e-05,
"loss": 1.9469,
"step": 67500
},
{
"epoch": 0.43,
"learning_rate": 2.8550724637681158e-05,
"loss": 1.9653,
"step": 67600
},
{
"epoch": 0.43,
"learning_rate": 2.851851851851852e-05,
"loss": 1.9382,
"step": 67700
},
{
"epoch": 0.43,
"learning_rate": 2.848631239935588e-05,
"loss": 1.9345,
"step": 67800
},
{
"epoch": 0.43,
"learning_rate": 2.845410628019324e-05,
"loss": 1.9461,
"step": 67900
},
{
"epoch": 0.44,
"learning_rate": 2.8421900161030595e-05,
"loss": 1.9277,
"step": 68000
},
{
"epoch": 0.44,
"learning_rate": 2.8389694041867955e-05,
"loss": 1.9074,
"step": 68100
},
{
"epoch": 0.44,
"learning_rate": 2.8357487922705318e-05,
"loss": 1.9207,
"step": 68200
},
{
"epoch": 0.44,
"learning_rate": 2.8325281803542674e-05,
"loss": 1.9161,
"step": 68300
},
{
"epoch": 0.44,
"learning_rate": 2.8293075684380033e-05,
"loss": 1.9098,
"step": 68400
},
{
"epoch": 0.44,
"learning_rate": 2.826086956521739e-05,
"loss": 1.9419,
"step": 68500
},
{
"epoch": 0.44,
"learning_rate": 2.8228663446054755e-05,
"loss": 1.9128,
"step": 68600
},
{
"epoch": 0.44,
"learning_rate": 2.819645732689211e-05,
"loss": 1.9298,
"step": 68700
},
{
"epoch": 0.44,
"learning_rate": 2.816425120772947e-05,
"loss": 1.9396,
"step": 68800
},
{
"epoch": 0.44,
"learning_rate": 2.8132045088566826e-05,
"loss": 1.8993,
"step": 68900
},
{
"epoch": 0.44,
"learning_rate": 2.8099838969404192e-05,
"loss": 1.9452,
"step": 69000
},
{
"epoch": 0.44,
"learning_rate": 2.806763285024155e-05,
"loss": 1.9249,
"step": 69100
},
{
"epoch": 0.44,
"learning_rate": 2.8035426731078908e-05,
"loss": 1.9431,
"step": 69200
},
{
"epoch": 0.44,
"learning_rate": 2.8003220611916264e-05,
"loss": 1.9401,
"step": 69300
},
{
"epoch": 0.44,
"learning_rate": 2.7971014492753623e-05,
"loss": 1.9163,
"step": 69400
},
{
"epoch": 0.44,
"learning_rate": 2.7938808373590986e-05,
"loss": 1.9147,
"step": 69500
},
{
"epoch": 0.45,
"learning_rate": 2.7906602254428345e-05,
"loss": 1.9378,
"step": 69600
},
{
"epoch": 0.45,
"learning_rate": 2.78743961352657e-05,
"loss": 1.9189,
"step": 69700
},
{
"epoch": 0.45,
"learning_rate": 2.784219001610306e-05,
"loss": 1.9157,
"step": 69800
},
{
"epoch": 0.45,
"learning_rate": 2.7809983896940423e-05,
"loss": 1.9255,
"step": 69900
},
{
"epoch": 0.45,
"learning_rate": 2.777777777777778e-05,
"loss": 1.9217,
"step": 70000
},
{
"epoch": 0.45,
"learning_rate": 2.774557165861514e-05,
"loss": 1.9109,
"step": 70100
},
{
"epoch": 0.45,
"learning_rate": 2.7713365539452494e-05,
"loss": 1.9291,
"step": 70200
},
{
"epoch": 0.45,
"learning_rate": 2.7681159420289854e-05,
"loss": 1.9222,
"step": 70300
},
{
"epoch": 0.45,
"learning_rate": 2.7648953301127217e-05,
"loss": 1.9144,
"step": 70400
},
{
"epoch": 0.45,
"learning_rate": 2.7616747181964576e-05,
"loss": 1.9212,
"step": 70500
},
{
"epoch": 0.45,
"learning_rate": 2.7584541062801932e-05,
"loss": 1.9304,
"step": 70600
},
{
"epoch": 0.45,
"learning_rate": 2.755233494363929e-05,
"loss": 1.8976,
"step": 70700
},
{
"epoch": 0.45,
"learning_rate": 2.7520128824476654e-05,
"loss": 1.9387,
"step": 70800
},
{
"epoch": 0.45,
"learning_rate": 2.7487922705314013e-05,
"loss": 1.9291,
"step": 70900
},
{
"epoch": 0.45,
"learning_rate": 2.745571658615137e-05,
"loss": 1.9051,
"step": 71000
},
{
"epoch": 0.46,
"learning_rate": 2.742351046698873e-05,
"loss": 1.9167,
"step": 71100
},
{
"epoch": 0.46,
"learning_rate": 2.7391304347826085e-05,
"loss": 1.9272,
"step": 71200
},
{
"epoch": 0.46,
"learning_rate": 2.7359098228663447e-05,
"loss": 1.9236,
"step": 71300
},
{
"epoch": 0.46,
"learning_rate": 2.7326892109500807e-05,
"loss": 1.9206,
"step": 71400
},
{
"epoch": 0.46,
"learning_rate": 2.7294685990338166e-05,
"loss": 1.9144,
"step": 71500
},
{
"epoch": 0.46,
"learning_rate": 2.7262479871175522e-05,
"loss": 1.9178,
"step": 71600
},
{
"epoch": 0.46,
"learning_rate": 2.7230273752012885e-05,
"loss": 1.9083,
"step": 71700
},
{
"epoch": 0.46,
"learning_rate": 2.7198067632850244e-05,
"loss": 1.9415,
"step": 71800
},
{
"epoch": 0.46,
"learning_rate": 2.71658615136876e-05,
"loss": 1.9164,
"step": 71900
},
{
"epoch": 0.46,
"learning_rate": 2.713365539452496e-05,
"loss": 1.8996,
"step": 72000
},
{
"epoch": 0.46,
"learning_rate": 2.7101449275362322e-05,
"loss": 1.9156,
"step": 72100
},
{
"epoch": 0.46,
"learning_rate": 2.706924315619968e-05,
"loss": 1.9197,
"step": 72200
},
{
"epoch": 0.46,
"learning_rate": 2.7037037037037037e-05,
"loss": 1.9188,
"step": 72300
},
{
"epoch": 0.46,
"learning_rate": 2.7004830917874397e-05,
"loss": 1.8858,
"step": 72400
},
{
"epoch": 0.46,
"learning_rate": 2.6972624798711753e-05,
"loss": 1.9056,
"step": 72500
},
{
"epoch": 0.46,
"learning_rate": 2.694041867954912e-05,
"loss": 1.9244,
"step": 72600
},
{
"epoch": 0.47,
"learning_rate": 2.6908212560386475e-05,
"loss": 1.9054,
"step": 72700
},
{
"epoch": 0.47,
"learning_rate": 2.6876006441223834e-05,
"loss": 1.9299,
"step": 72800
},
{
"epoch": 0.47,
"learning_rate": 2.684380032206119e-05,
"loss": 1.9293,
"step": 72900
},
{
"epoch": 0.47,
"learning_rate": 2.6811594202898553e-05,
"loss": 1.9202,
"step": 73000
},
{
"epoch": 0.47,
"learning_rate": 2.6779388083735912e-05,
"loss": 1.9329,
"step": 73100
},
{
"epoch": 0.47,
"learning_rate": 2.674718196457327e-05,
"loss": 1.9152,
"step": 73200
},
{
"epoch": 0.47,
"learning_rate": 2.6714975845410628e-05,
"loss": 1.923,
"step": 73300
},
{
"epoch": 0.47,
"learning_rate": 2.6682769726247987e-05,
"loss": 1.9007,
"step": 73400
},
{
"epoch": 0.47,
"learning_rate": 2.665056360708535e-05,
"loss": 1.8996,
"step": 73500
},
{
"epoch": 0.47,
"learning_rate": 2.6618357487922706e-05,
"loss": 1.8949,
"step": 73600
},
{
"epoch": 0.47,
"learning_rate": 2.6586151368760065e-05,
"loss": 1.9109,
"step": 73700
},
{
"epoch": 0.47,
"learning_rate": 2.6553945249597424e-05,
"loss": 1.9335,
"step": 73800
},
{
"epoch": 0.47,
"learning_rate": 2.6521739130434787e-05,
"loss": 1.9106,
"step": 73900
},
{
"epoch": 0.47,
"learning_rate": 2.6489533011272143e-05,
"loss": 1.915,
"step": 74000
},
{
"epoch": 0.47,
"learning_rate": 2.6457326892109502e-05,
"loss": 1.9415,
"step": 74100
},
{
"epoch": 0.47,
"learning_rate": 2.642512077294686e-05,
"loss": 1.8955,
"step": 74200
},
{
"epoch": 0.48,
"learning_rate": 2.6392914653784218e-05,
"loss": 1.9071,
"step": 74300
},
{
"epoch": 0.48,
"learning_rate": 2.636070853462158e-05,
"loss": 1.9376,
"step": 74400
},
{
"epoch": 0.48,
"learning_rate": 2.632850241545894e-05,
"loss": 1.9102,
"step": 74500
},
{
"epoch": 0.48,
"learning_rate": 2.6296296296296296e-05,
"loss": 1.9067,
"step": 74600
},
{
"epoch": 0.48,
"learning_rate": 2.6264090177133655e-05,
"loss": 1.9279,
"step": 74700
},
{
"epoch": 0.48,
"learning_rate": 2.6231884057971018e-05,
"loss": 1.9236,
"step": 74800
},
{
"epoch": 0.48,
"learning_rate": 2.6199677938808377e-05,
"loss": 1.9239,
"step": 74900
},
{
"epoch": 0.48,
"learning_rate": 2.6167471819645733e-05,
"loss": 1.9283,
"step": 75000
},
{
"epoch": 0.48,
"learning_rate": 2.6135265700483093e-05,
"loss": 1.8919,
"step": 75100
},
{
"epoch": 0.48,
"learning_rate": 2.6103059581320455e-05,
"loss": 1.9061,
"step": 75200
},
{
"epoch": 0.48,
"learning_rate": 2.607085346215781e-05,
"loss": 1.9127,
"step": 75300
},
{
"epoch": 0.48,
"learning_rate": 2.603864734299517e-05,
"loss": 1.8899,
"step": 75400
},
{
"epoch": 0.48,
"learning_rate": 2.600644122383253e-05,
"loss": 1.8989,
"step": 75500
},
{
"epoch": 0.48,
"learning_rate": 2.5974235104669886e-05,
"loss": 1.9335,
"step": 75600
},
{
"epoch": 0.48,
"learning_rate": 2.594202898550725e-05,
"loss": 1.8984,
"step": 75700
},
{
"epoch": 0.49,
"learning_rate": 2.5909822866344608e-05,
"loss": 1.9197,
"step": 75800
},
{
"epoch": 0.49,
"learning_rate": 2.5877616747181964e-05,
"loss": 1.9151,
"step": 75900
},
{
"epoch": 0.49,
"learning_rate": 2.5845410628019323e-05,
"loss": 1.9292,
"step": 76000
},
{
"epoch": 0.49,
"learning_rate": 2.5813204508856686e-05,
"loss": 1.9312,
"step": 76100
},
{
"epoch": 0.49,
"learning_rate": 2.5780998389694045e-05,
"loss": 1.9321,
"step": 76200
},
{
"epoch": 0.49,
"learning_rate": 2.57487922705314e-05,
"loss": 1.9132,
"step": 76300
},
{
"epoch": 0.49,
"learning_rate": 2.571658615136876e-05,
"loss": 1.9138,
"step": 76400
},
{
"epoch": 0.49,
"learning_rate": 2.5684380032206117e-05,
"loss": 1.9274,
"step": 76500
},
{
"epoch": 0.49,
"learning_rate": 2.5652173913043483e-05,
"loss": 1.9084,
"step": 76600
},
{
"epoch": 0.49,
"learning_rate": 2.561996779388084e-05,
"loss": 1.921,
"step": 76700
},
{
"epoch": 0.49,
"learning_rate": 2.5587761674718198e-05,
"loss": 1.9172,
"step": 76800
},
{
"epoch": 0.49,
"learning_rate": 2.5555555555555554e-05,
"loss": 1.9147,
"step": 76900
},
{
"epoch": 0.49,
"learning_rate": 2.5523349436392917e-05,
"loss": 1.9141,
"step": 77000
},
{
"epoch": 0.49,
"learning_rate": 2.5491143317230276e-05,
"loss": 1.9207,
"step": 77100
},
{
"epoch": 0.49,
"learning_rate": 2.5458937198067636e-05,
"loss": 1.9214,
"step": 77200
},
{
"epoch": 0.49,
"learning_rate": 2.542673107890499e-05,
"loss": 1.8978,
"step": 77300
},
{
"epoch": 0.5,
"learning_rate": 2.539452495974235e-05,
"loss": 1.8992,
"step": 77400
},
{
"epoch": 0.5,
"learning_rate": 2.5362318840579714e-05,
"loss": 1.9013,
"step": 77500
},
{
"epoch": 0.5,
"learning_rate": 2.533011272141707e-05,
"loss": 1.912,
"step": 77600
},
{
"epoch": 0.5,
"learning_rate": 2.529790660225443e-05,
"loss": 1.9324,
"step": 77700
},
{
"epoch": 0.5,
"learning_rate": 2.5265700483091785e-05,
"loss": 1.9058,
"step": 77800
},
{
"epoch": 0.5,
"learning_rate": 2.523349436392915e-05,
"loss": 1.9245,
"step": 77900
},
{
"epoch": 0.5,
"learning_rate": 2.5201288244766507e-05,
"loss": 1.9004,
"step": 78000
},
{
"epoch": 0.5,
"learning_rate": 2.5169082125603866e-05,
"loss": 1.897,
"step": 78100
},
{
"epoch": 0.5,
"learning_rate": 2.5136876006441222e-05,
"loss": 1.9238,
"step": 78200
},
{
"epoch": 0.5,
"learning_rate": 2.510466988727859e-05,
"loss": 1.8965,
"step": 78300
},
{
"epoch": 0.5,
"learning_rate": 2.5072463768115944e-05,
"loss": 1.9034,
"step": 78400
},
{
"epoch": 0.5,
"learning_rate": 2.5040257648953304e-05,
"loss": 1.8886,
"step": 78500
},
{
"epoch": 0.5,
"learning_rate": 2.500805152979066e-05,
"loss": 1.9308,
"step": 78600
},
{
"epoch": 0.5,
"learning_rate": 2.4975845410628022e-05,
"loss": 1.9137,
"step": 78700
},
{
"epoch": 0.5,
"learning_rate": 2.494363929146538e-05,
"loss": 1.9198,
"step": 78800
},
{
"epoch": 0.5,
"learning_rate": 2.491143317230274e-05,
"loss": 1.8878,
"step": 78900
},
{
"epoch": 0.51,
"learning_rate": 2.4879227053140097e-05,
"loss": 1.9344,
"step": 79000
},
{
"epoch": 0.51,
"learning_rate": 2.4847020933977456e-05,
"loss": 1.8962,
"step": 79100
},
{
"epoch": 0.51,
"learning_rate": 2.4814814814814816e-05,
"loss": 1.9369,
"step": 79200
},
{
"epoch": 0.51,
"learning_rate": 2.4782608695652175e-05,
"loss": 1.9446,
"step": 79300
},
{
"epoch": 0.51,
"learning_rate": 2.4750402576489534e-05,
"loss": 1.9154,
"step": 79400
},
{
"epoch": 0.51,
"learning_rate": 2.471819645732689e-05,
"loss": 1.8946,
"step": 79500
},
{
"epoch": 0.51,
"learning_rate": 2.4685990338164253e-05,
"loss": 1.9189,
"step": 79600
},
{
"epoch": 0.51,
"learning_rate": 2.465378421900161e-05,
"loss": 1.8971,
"step": 79700
},
{
"epoch": 0.51,
"learning_rate": 2.4621578099838972e-05,
"loss": 1.9253,
"step": 79800
},
{
"epoch": 0.51,
"learning_rate": 2.4589371980676328e-05,
"loss": 1.9128,
"step": 79900
},
{
"epoch": 0.51,
"learning_rate": 2.455716586151369e-05,
"loss": 1.9089,
"step": 80000
},
{
"epoch": 0.51,
"learning_rate": 2.4524959742351047e-05,
"loss": 1.9188,
"step": 80100
},
{
"epoch": 0.51,
"learning_rate": 2.449275362318841e-05,
"loss": 1.9173,
"step": 80200
},
{
"epoch": 0.51,
"learning_rate": 2.4460547504025765e-05,
"loss": 1.9177,
"step": 80300
},
{
"epoch": 0.51,
"learning_rate": 2.4428341384863128e-05,
"loss": 1.881,
"step": 80400
},
{
"epoch": 0.52,
"learning_rate": 2.4396135265700484e-05,
"loss": 1.8895,
"step": 80500
},
{
"epoch": 0.52,
"learning_rate": 2.4363929146537843e-05,
"loss": 1.8919,
"step": 80600
},
{
"epoch": 0.52,
"learning_rate": 2.4331723027375203e-05,
"loss": 1.9105,
"step": 80700
},
{
"epoch": 0.52,
"learning_rate": 2.4299516908212562e-05,
"loss": 1.9085,
"step": 80800
},
{
"epoch": 0.52,
"learning_rate": 2.426731078904992e-05,
"loss": 1.9067,
"step": 80900
},
{
"epoch": 0.52,
"learning_rate": 2.423510466988728e-05,
"loss": 1.895,
"step": 81000
},
{
"epoch": 0.52,
"learning_rate": 2.420289855072464e-05,
"loss": 1.912,
"step": 81100
},
{
"epoch": 0.52,
"learning_rate": 2.4170692431561996e-05,
"loss": 1.8956,
"step": 81200
},
{
"epoch": 0.52,
"learning_rate": 2.413848631239936e-05,
"loss": 1.906,
"step": 81300
},
{
"epoch": 0.52,
"learning_rate": 2.4106280193236715e-05,
"loss": 1.9046,
"step": 81400
},
{
"epoch": 0.52,
"learning_rate": 2.4074074074074074e-05,
"loss": 1.9135,
"step": 81500
},
{
"epoch": 0.52,
"learning_rate": 2.4041867954911433e-05,
"loss": 1.9171,
"step": 81600
},
{
"epoch": 0.52,
"learning_rate": 2.4009661835748793e-05,
"loss": 1.9182,
"step": 81700
},
{
"epoch": 0.52,
"learning_rate": 2.3977455716586152e-05,
"loss": 1.8843,
"step": 81800
},
{
"epoch": 0.52,
"learning_rate": 2.394524959742351e-05,
"loss": 1.9099,
"step": 81900
},
{
"epoch": 0.52,
"learning_rate": 2.391304347826087e-05,
"loss": 1.9155,
"step": 82000
},
{
"epoch": 0.53,
"learning_rate": 2.388083735909823e-05,
"loss": 1.9088,
"step": 82100
},
{
"epoch": 0.53,
"learning_rate": 2.384863123993559e-05,
"loss": 1.9138,
"step": 82200
},
{
"epoch": 0.53,
"learning_rate": 2.381642512077295e-05,
"loss": 1.9141,
"step": 82300
},
{
"epoch": 0.53,
"learning_rate": 2.3784219001610308e-05,
"loss": 1.9133,
"step": 82400
},
{
"epoch": 0.53,
"learning_rate": 2.3752012882447668e-05,
"loss": 1.9099,
"step": 82500
},
{
"epoch": 0.53,
"learning_rate": 2.3719806763285024e-05,
"loss": 1.9168,
"step": 82600
},
{
"epoch": 0.53,
"learning_rate": 2.3687600644122386e-05,
"loss": 1.8991,
"step": 82700
},
{
"epoch": 0.53,
"learning_rate": 2.3655394524959742e-05,
"loss": 1.9212,
"step": 82800
},
{
"epoch": 0.53,
"learning_rate": 2.36231884057971e-05,
"loss": 1.9018,
"step": 82900
},
{
"epoch": 0.53,
"learning_rate": 2.359098228663446e-05,
"loss": 1.8927,
"step": 83000
},
{
"epoch": 0.53,
"learning_rate": 2.355877616747182e-05,
"loss": 1.9055,
"step": 83100
},
{
"epoch": 0.53,
"learning_rate": 2.352657004830918e-05,
"loss": 1.8597,
"step": 83200
},
{
"epoch": 0.53,
"learning_rate": 2.349436392914654e-05,
"loss": 1.9375,
"step": 83300
},
{
"epoch": 0.53,
"learning_rate": 2.34621578099839e-05,
"loss": 1.919,
"step": 83400
},
{
"epoch": 0.53,
"learning_rate": 2.3429951690821258e-05,
"loss": 1.8875,
"step": 83500
},
{
"epoch": 0.54,
"learning_rate": 2.3397745571658617e-05,
"loss": 1.9004,
"step": 83600
},
{
"epoch": 0.54,
"learning_rate": 2.3365539452495973e-05,
"loss": 1.8994,
"step": 83700
},
{
"epoch": 0.54,
"learning_rate": 2.3333333333333336e-05,
"loss": 1.8946,
"step": 83800
},
{
"epoch": 0.54,
"learning_rate": 2.3301127214170692e-05,
"loss": 1.9344,
"step": 83900
},
{
"epoch": 0.54,
"learning_rate": 2.3268921095008055e-05,
"loss": 1.9186,
"step": 84000
},
{
"epoch": 0.54,
"learning_rate": 2.323671497584541e-05,
"loss": 1.9058,
"step": 84100
},
{
"epoch": 0.54,
"learning_rate": 2.3204508856682773e-05,
"loss": 1.9009,
"step": 84200
},
{
"epoch": 0.54,
"learning_rate": 2.317230273752013e-05,
"loss": 1.8917,
"step": 84300
},
{
"epoch": 0.54,
"learning_rate": 2.314009661835749e-05,
"loss": 1.8943,
"step": 84400
},
{
"epoch": 0.54,
"learning_rate": 2.3107890499194848e-05,
"loss": 1.885,
"step": 84500
},
{
"epoch": 0.54,
"learning_rate": 2.3075684380032207e-05,
"loss": 1.924,
"step": 84600
},
{
"epoch": 0.54,
"learning_rate": 2.3043478260869567e-05,
"loss": 1.8861,
"step": 84700
},
{
"epoch": 0.54,
"learning_rate": 2.3011272141706926e-05,
"loss": 1.921,
"step": 84800
},
{
"epoch": 0.54,
"learning_rate": 2.2979066022544285e-05,
"loss": 1.8925,
"step": 84900
},
{
"epoch": 0.54,
"learning_rate": 2.294685990338164e-05,
"loss": 1.9145,
"step": 85000
},
{
"epoch": 0.54,
"learning_rate": 2.2914653784219004e-05,
"loss": 1.8955,
"step": 85100
},
{
"epoch": 0.55,
"learning_rate": 2.288244766505636e-05,
"loss": 1.9144,
"step": 85200
},
{
"epoch": 0.55,
"learning_rate": 2.2850241545893723e-05,
"loss": 1.9255,
"step": 85300
},
{
"epoch": 0.55,
"learning_rate": 2.281803542673108e-05,
"loss": 1.9091,
"step": 85400
},
{
"epoch": 0.55,
"learning_rate": 2.278582930756844e-05,
"loss": 1.8985,
"step": 85500
},
{
"epoch": 0.55,
"learning_rate": 2.2753623188405797e-05,
"loss": 1.9083,
"step": 85600
},
{
"epoch": 0.55,
"learning_rate": 2.2721417069243157e-05,
"loss": 1.8969,
"step": 85700
},
{
"epoch": 0.55,
"learning_rate": 2.2689210950080516e-05,
"loss": 1.9059,
"step": 85800
},
{
"epoch": 0.55,
"learning_rate": 2.2657004830917875e-05,
"loss": 1.9224,
"step": 85900
},
{
"epoch": 0.55,
"learning_rate": 2.2624798711755235e-05,
"loss": 1.8901,
"step": 86000
},
{
"epoch": 0.55,
"learning_rate": 2.2592592592592594e-05,
"loss": 1.9043,
"step": 86100
},
{
"epoch": 0.55,
"learning_rate": 2.2560386473429953e-05,
"loss": 1.9165,
"step": 86200
},
{
"epoch": 0.55,
"learning_rate": 2.2528180354267313e-05,
"loss": 1.8915,
"step": 86300
},
{
"epoch": 0.55,
"learning_rate": 2.2495974235104672e-05,
"loss": 1.9051,
"step": 86400
},
{
"epoch": 0.55,
"learning_rate": 2.246376811594203e-05,
"loss": 1.8832,
"step": 86500
},
{
"epoch": 0.55,
"learning_rate": 2.243156199677939e-05,
"loss": 1.8834,
"step": 86600
},
{
"epoch": 0.55,
"learning_rate": 2.2399355877616747e-05,
"loss": 1.904,
"step": 86700
},
{
"epoch": 0.56,
"learning_rate": 2.2367149758454106e-05,
"loss": 1.8847,
"step": 86800
},
{
"epoch": 0.56,
"learning_rate": 2.2334943639291466e-05,
"loss": 1.8822,
"step": 86900
},
{
"epoch": 0.56,
"learning_rate": 2.2302737520128825e-05,
"loss": 1.8975,
"step": 87000
},
{
"epoch": 0.56,
"learning_rate": 2.2270531400966184e-05,
"loss": 1.918,
"step": 87100
},
{
"epoch": 0.56,
"learning_rate": 2.2238325281803544e-05,
"loss": 1.9024,
"step": 87200
},
{
"epoch": 0.56,
"learning_rate": 2.2206119162640903e-05,
"loss": 1.8689,
"step": 87300
},
{
"epoch": 0.56,
"learning_rate": 2.2173913043478262e-05,
"loss": 1.8915,
"step": 87400
},
{
"epoch": 0.56,
"learning_rate": 2.214170692431562e-05,
"loss": 1.9032,
"step": 87500
},
{
"epoch": 0.56,
"learning_rate": 2.210950080515298e-05,
"loss": 1.912,
"step": 87600
},
{
"epoch": 0.56,
"learning_rate": 2.2077294685990337e-05,
"loss": 1.8862,
"step": 87700
},
{
"epoch": 0.56,
"learning_rate": 2.20450885668277e-05,
"loss": 1.8947,
"step": 87800
},
{
"epoch": 0.56,
"learning_rate": 2.2012882447665056e-05,
"loss": 1.9107,
"step": 87900
},
{
"epoch": 0.56,
"learning_rate": 2.198067632850242e-05,
"loss": 1.8989,
"step": 88000
},
{
"epoch": 0.56,
"learning_rate": 2.1948470209339774e-05,
"loss": 1.9067,
"step": 88100
},
{
"epoch": 0.56,
"learning_rate": 2.1916264090177137e-05,
"loss": 1.8996,
"step": 88200
},
{
"epoch": 0.57,
"learning_rate": 2.1884057971014493e-05,
"loss": 1.8677,
"step": 88300
},
{
"epoch": 0.57,
"learning_rate": 2.1851851851851852e-05,
"loss": 1.8977,
"step": 88400
},
{
"epoch": 0.57,
"learning_rate": 2.1819645732689212e-05,
"loss": 1.9053,
"step": 88500
},
{
"epoch": 0.57,
"learning_rate": 2.178743961352657e-05,
"loss": 1.9085,
"step": 88600
},
{
"epoch": 0.57,
"learning_rate": 2.175523349436393e-05,
"loss": 1.8789,
"step": 88700
},
{
"epoch": 0.57,
"learning_rate": 2.1723027375201286e-05,
"loss": 1.9097,
"step": 88800
},
{
"epoch": 0.57,
"learning_rate": 2.169082125603865e-05,
"loss": 1.8958,
"step": 88900
},
{
"epoch": 0.57,
"learning_rate": 2.1658615136876005e-05,
"loss": 1.8856,
"step": 89000
},
{
"epoch": 0.57,
"learning_rate": 2.1626409017713368e-05,
"loss": 1.9017,
"step": 89100
},
{
"epoch": 0.57,
"learning_rate": 2.1594202898550724e-05,
"loss": 1.9044,
"step": 89200
},
{
"epoch": 0.57,
"learning_rate": 2.1561996779388087e-05,
"loss": 1.8893,
"step": 89300
},
{
"epoch": 0.57,
"learning_rate": 2.1529790660225443e-05,
"loss": 1.9054,
"step": 89400
},
{
"epoch": 0.57,
"learning_rate": 2.1497584541062805e-05,
"loss": 1.8913,
"step": 89500
},
{
"epoch": 0.57,
"learning_rate": 2.146537842190016e-05,
"loss": 1.9031,
"step": 89600
},
{
"epoch": 0.57,
"learning_rate": 2.1433172302737524e-05,
"loss": 1.8777,
"step": 89700
},
{
"epoch": 0.57,
"learning_rate": 2.140096618357488e-05,
"loss": 1.8875,
"step": 89800
},
{
"epoch": 0.58,
"learning_rate": 2.136876006441224e-05,
"loss": 1.8901,
"step": 89900
},
{
"epoch": 0.58,
"learning_rate": 2.13365539452496e-05,
"loss": 1.901,
"step": 90000
},
{
"epoch": 0.58,
"learning_rate": 2.1304347826086958e-05,
"loss": 1.8862,
"step": 90100
},
{
"epoch": 0.58,
"learning_rate": 2.1272141706924317e-05,
"loss": 1.9022,
"step": 90200
},
{
"epoch": 0.58,
"learning_rate": 2.1239935587761677e-05,
"loss": 1.8905,
"step": 90300
},
{
"epoch": 0.58,
"learning_rate": 2.1207729468599036e-05,
"loss": 1.8827,
"step": 90400
},
{
"epoch": 0.58,
"learning_rate": 2.1175523349436392e-05,
"loss": 1.9018,
"step": 90500
},
{
"epoch": 0.58,
"learning_rate": 2.1143317230273755e-05,
"loss": 1.8741,
"step": 90600
},
{
"epoch": 0.58,
"learning_rate": 2.111111111111111e-05,
"loss": 1.8721,
"step": 90700
},
{
"epoch": 0.58,
"learning_rate": 2.107890499194847e-05,
"loss": 1.8972,
"step": 90800
},
{
"epoch": 0.58,
"learning_rate": 2.104669887278583e-05,
"loss": 1.8879,
"step": 90900
},
{
"epoch": 0.58,
"learning_rate": 2.101449275362319e-05,
"loss": 1.8975,
"step": 91000
},
{
"epoch": 0.58,
"learning_rate": 2.0982286634460548e-05,
"loss": 1.871,
"step": 91100
},
{
"epoch": 0.58,
"learning_rate": 2.0950080515297908e-05,
"loss": 1.8878,
"step": 91200
},
{
"epoch": 0.58,
"learning_rate": 2.0917874396135267e-05,
"loss": 1.9062,
"step": 91300
},
{
"epoch": 0.58,
"learning_rate": 2.0885668276972626e-05,
"loss": 1.9163,
"step": 91400
},
{
"epoch": 0.59,
"learning_rate": 2.0853462157809986e-05,
"loss": 1.8894,
"step": 91500
},
{
"epoch": 0.59,
"learning_rate": 2.0821256038647345e-05,
"loss": 1.8924,
"step": 91600
},
{
"epoch": 0.59,
"learning_rate": 2.0789049919484704e-05,
"loss": 1.9098,
"step": 91700
},
{
"epoch": 0.59,
"learning_rate": 2.0756843800322064e-05,
"loss": 1.8885,
"step": 91800
},
{
"epoch": 0.59,
"learning_rate": 2.072463768115942e-05,
"loss": 1.8955,
"step": 91900
},
{
"epoch": 0.59,
"learning_rate": 2.0692431561996782e-05,
"loss": 1.9106,
"step": 92000
},
{
"epoch": 0.59,
"learning_rate": 2.066022544283414e-05,
"loss": 1.891,
"step": 92100
},
{
"epoch": 0.59,
"learning_rate": 2.0628019323671498e-05,
"loss": 1.8903,
"step": 92200
},
{
"epoch": 0.59,
"learning_rate": 2.0595813204508857e-05,
"loss": 1.9248,
"step": 92300
},
{
"epoch": 0.59,
"learning_rate": 2.0563607085346216e-05,
"loss": 1.897,
"step": 92400
},
{
"epoch": 0.59,
"learning_rate": 2.0531400966183576e-05,
"loss": 1.9115,
"step": 92500
},
{
"epoch": 0.59,
"learning_rate": 2.0499194847020935e-05,
"loss": 1.8948,
"step": 92600
},
{
"epoch": 0.59,
"learning_rate": 2.0466988727858294e-05,
"loss": 1.8946,
"step": 92700
},
{
"epoch": 0.59,
"learning_rate": 2.0434782608695654e-05,
"loss": 1.9115,
"step": 92800
},
{
"epoch": 0.59,
"learning_rate": 2.0402576489533013e-05,
"loss": 1.9016,
"step": 92900
},
{
"epoch": 0.6,
"learning_rate": 2.037037037037037e-05,
"loss": 1.9038,
"step": 93000
},
{
"epoch": 0.6,
"learning_rate": 2.0338164251207732e-05,
"loss": 1.9165,
"step": 93100
},
{
"epoch": 0.6,
"learning_rate": 2.0305958132045088e-05,
"loss": 1.8892,
"step": 93200
},
{
"epoch": 0.6,
"learning_rate": 2.027375201288245e-05,
"loss": 1.9084,
"step": 93300
},
{
"epoch": 0.6,
"learning_rate": 2.0241545893719806e-05,
"loss": 1.9011,
"step": 93400
},
{
"epoch": 0.6,
"learning_rate": 2.020933977455717e-05,
"loss": 1.881,
"step": 93500
},
{
"epoch": 0.6,
"learning_rate": 2.0177133655394525e-05,
"loss": 1.8884,
"step": 93600
},
{
"epoch": 0.6,
"learning_rate": 2.0144927536231885e-05,
"loss": 1.8981,
"step": 93700
},
{
"epoch": 0.6,
"learning_rate": 2.0112721417069244e-05,
"loss": 1.8826,
"step": 93800
},
{
"epoch": 0.6,
"learning_rate": 2.0080515297906603e-05,
"loss": 1.8881,
"step": 93900
},
{
"epoch": 0.6,
"learning_rate": 2.0048309178743963e-05,
"loss": 1.9054,
"step": 94000
},
{
"epoch": 0.6,
"learning_rate": 2.0016103059581322e-05,
"loss": 1.8943,
"step": 94100
},
{
"epoch": 0.6,
"learning_rate": 1.998389694041868e-05,
"loss": 1.9031,
"step": 94200
},
{
"epoch": 0.6,
"learning_rate": 1.9951690821256037e-05,
"loss": 1.8927,
"step": 94300
},
{
"epoch": 0.6,
"learning_rate": 1.99194847020934e-05,
"loss": 1.8928,
"step": 94400
},
{
"epoch": 0.6,
"learning_rate": 1.9887278582930756e-05,
"loss": 1.898,
"step": 94500
},
{
"epoch": 0.61,
"learning_rate": 1.985507246376812e-05,
"loss": 1.8831,
"step": 94600
},
{
"epoch": 0.61,
"learning_rate": 1.9822866344605475e-05,
"loss": 1.9049,
"step": 94700
},
{
"epoch": 0.61,
"learning_rate": 1.9790660225442837e-05,
"loss": 1.892,
"step": 94800
},
{
"epoch": 0.61,
"learning_rate": 1.9758454106280193e-05,
"loss": 1.9153,
"step": 94900
},
{
"epoch": 0.61,
"learning_rate": 1.9726247987117553e-05,
"loss": 1.9086,
"step": 95000
},
{
"epoch": 0.61,
"learning_rate": 1.9694041867954912e-05,
"loss": 1.8912,
"step": 95100
},
{
"epoch": 0.61,
"learning_rate": 1.966183574879227e-05,
"loss": 1.8905,
"step": 95200
},
{
"epoch": 0.61,
"learning_rate": 1.962962962962963e-05,
"loss": 1.88,
"step": 95300
},
{
"epoch": 0.61,
"learning_rate": 1.959742351046699e-05,
"loss": 1.88,
"step": 95400
},
{
"epoch": 0.61,
"learning_rate": 1.956521739130435e-05,
"loss": 1.8691,
"step": 95500
},
{
"epoch": 0.61,
"learning_rate": 1.953301127214171e-05,
"loss": 1.8947,
"step": 95600
},
{
"epoch": 0.61,
"learning_rate": 1.9500805152979068e-05,
"loss": 1.9065,
"step": 95700
},
{
"epoch": 0.61,
"learning_rate": 1.9468599033816428e-05,
"loss": 1.9038,
"step": 95800
},
{
"epoch": 0.61,
"learning_rate": 1.9436392914653787e-05,
"loss": 1.8804,
"step": 95900
},
{
"epoch": 0.61,
"learning_rate": 1.9404186795491143e-05,
"loss": 1.8894,
"step": 96000
},
{
"epoch": 0.62,
"learning_rate": 1.9371980676328502e-05,
"loss": 1.8891,
"step": 96100
},
{
"epoch": 0.62,
"learning_rate": 1.933977455716586e-05,
"loss": 1.9084,
"step": 96200
},
{
"epoch": 0.62,
"learning_rate": 1.930756843800322e-05,
"loss": 1.8714,
"step": 96300
},
{
"epoch": 0.62,
"learning_rate": 1.927536231884058e-05,
"loss": 1.8942,
"step": 96400
},
{
"epoch": 0.62,
"learning_rate": 1.924315619967794e-05,
"loss": 1.8689,
"step": 96500
},
{
"epoch": 0.62,
"learning_rate": 1.92109500805153e-05,
"loss": 1.891,
"step": 96600
},
{
"epoch": 0.62,
"learning_rate": 1.917874396135266e-05,
"loss": 1.891,
"step": 96700
},
{
"epoch": 0.62,
"learning_rate": 1.9146537842190018e-05,
"loss": 1.8879,
"step": 96800
},
{
"epoch": 0.62,
"learning_rate": 1.9114331723027377e-05,
"loss": 1.8815,
"step": 96900
},
{
"epoch": 0.62,
"learning_rate": 1.9082125603864733e-05,
"loss": 1.8945,
"step": 97000
},
{
"epoch": 0.62,
"learning_rate": 1.9049919484702096e-05,
"loss": 1.8776,
"step": 97100
},
{
"epoch": 0.62,
"learning_rate": 1.9017713365539452e-05,
"loss": 1.8812,
"step": 97200
},
{
"epoch": 0.62,
"learning_rate": 1.8985507246376814e-05,
"loss": 1.9013,
"step": 97300
},
{
"epoch": 0.62,
"learning_rate": 1.895330112721417e-05,
"loss": 1.8737,
"step": 97400
},
{
"epoch": 0.62,
"learning_rate": 1.892109500805153e-05,
"loss": 1.8692,
"step": 97500
},
{
"epoch": 0.62,
"learning_rate": 1.888888888888889e-05,
"loss": 1.8855,
"step": 97600
},
{
"epoch": 0.63,
"learning_rate": 1.885668276972625e-05,
"loss": 1.9129,
"step": 97700
},
{
"epoch": 0.63,
"learning_rate": 1.8824476650563608e-05,
"loss": 1.8832,
"step": 97800
},
{
"epoch": 0.63,
"learning_rate": 1.8792270531400967e-05,
"loss": 1.9002,
"step": 97900
},
{
"epoch": 0.63,
"learning_rate": 1.8760064412238327e-05,
"loss": 1.8933,
"step": 98000
},
{
"epoch": 0.63,
"learning_rate": 1.8727858293075682e-05,
"loss": 1.8817,
"step": 98100
},
{
"epoch": 0.63,
"learning_rate": 1.8695652173913045e-05,
"loss": 1.8856,
"step": 98200
},
{
"epoch": 0.63,
"learning_rate": 1.86634460547504e-05,
"loss": 1.8837,
"step": 98300
},
{
"epoch": 0.63,
"learning_rate": 1.8631239935587764e-05,
"loss": 1.8907,
"step": 98400
},
{
"epoch": 0.63,
"learning_rate": 1.859903381642512e-05,
"loss": 1.8819,
"step": 98500
},
{
"epoch": 0.63,
"learning_rate": 1.8566827697262483e-05,
"loss": 1.8753,
"step": 98600
},
{
"epoch": 0.63,
"learning_rate": 1.853462157809984e-05,
"loss": 1.8843,
"step": 98700
},
{
"epoch": 0.63,
"learning_rate": 1.85024154589372e-05,
"loss": 1.8549,
"step": 98800
},
{
"epoch": 0.63,
"learning_rate": 1.8470209339774557e-05,
"loss": 1.8847,
"step": 98900
},
{
"epoch": 0.63,
"learning_rate": 1.8438003220611917e-05,
"loss": 1.8899,
"step": 99000
},
{
"epoch": 0.63,
"learning_rate": 1.8405797101449276e-05,
"loss": 1.9044,
"step": 99100
},
{
"epoch": 0.63,
"learning_rate": 1.8373590982286635e-05,
"loss": 1.9076,
"step": 99200
},
{
"epoch": 0.64,
"learning_rate": 1.8341384863123995e-05,
"loss": 1.8798,
"step": 99300
},
{
"epoch": 0.64,
"learning_rate": 1.8309178743961354e-05,
"loss": 1.8756,
"step": 99400
},
{
"epoch": 0.64,
"learning_rate": 1.8276972624798713e-05,
"loss": 1.906,
"step": 99500
},
{
"epoch": 0.64,
"learning_rate": 1.8244766505636073e-05,
"loss": 1.8876,
"step": 99600
},
{
"epoch": 0.64,
"learning_rate": 1.8212560386473432e-05,
"loss": 1.8693,
"step": 99700
},
{
"epoch": 0.64,
"learning_rate": 1.8180354267310788e-05,
"loss": 1.8639,
"step": 99800
},
{
"epoch": 0.64,
"learning_rate": 1.814814814814815e-05,
"loss": 1.8826,
"step": 99900
},
{
"epoch": 0.64,
"learning_rate": 1.8115942028985507e-05,
"loss": 1.8692,
"step": 100000
},
{
"epoch": 0.64,
"learning_rate": 1.8083735909822866e-05,
"loss": 1.8822,
"step": 100100
},
{
"epoch": 0.64,
"learning_rate": 1.8051529790660225e-05,
"loss": 1.8835,
"step": 100200
},
{
"epoch": 0.64,
"learning_rate": 1.8019323671497585e-05,
"loss": 1.8905,
"step": 100300
},
{
"epoch": 0.64,
"learning_rate": 1.7987117552334944e-05,
"loss": 1.8841,
"step": 100400
},
{
"epoch": 0.64,
"learning_rate": 1.7954911433172304e-05,
"loss": 1.8831,
"step": 100500
},
{
"epoch": 0.64,
"learning_rate": 1.7922705314009663e-05,
"loss": 1.8671,
"step": 100600
},
{
"epoch": 0.64,
"learning_rate": 1.7890499194847022e-05,
"loss": 1.8811,
"step": 100700
},
{
"epoch": 0.65,
"learning_rate": 1.785829307568438e-05,
"loss": 1.8724,
"step": 100800
},
{
"epoch": 0.65,
"learning_rate": 1.782608695652174e-05,
"loss": 1.876,
"step": 100900
},
{
"epoch": 0.65,
"learning_rate": 1.77938808373591e-05,
"loss": 1.8749,
"step": 101000
},
{
"epoch": 0.65,
"learning_rate": 1.776167471819646e-05,
"loss": 1.8928,
"step": 101100
},
{
"epoch": 0.65,
"learning_rate": 1.7729468599033816e-05,
"loss": 1.8855,
"step": 101200
},
{
"epoch": 0.65,
"learning_rate": 1.769726247987118e-05,
"loss": 1.8932,
"step": 101300
},
{
"epoch": 0.65,
"learning_rate": 1.7665056360708534e-05,
"loss": 1.8535,
"step": 101400
},
{
"epoch": 0.65,
"learning_rate": 1.7632850241545894e-05,
"loss": 1.8901,
"step": 101500
},
{
"epoch": 0.65,
"learning_rate": 1.7600644122383253e-05,
"loss": 1.8707,
"step": 101600
},
{
"epoch": 0.65,
"learning_rate": 1.7568438003220612e-05,
"loss": 1.8793,
"step": 101700
},
{
"epoch": 0.65,
"learning_rate": 1.7536231884057972e-05,
"loss": 1.8599,
"step": 101800
},
{
"epoch": 0.65,
"learning_rate": 1.750402576489533e-05,
"loss": 1.8736,
"step": 101900
},
{
"epoch": 0.65,
"learning_rate": 1.747181964573269e-05,
"loss": 1.8869,
"step": 102000
},
{
"epoch": 0.65,
"learning_rate": 1.7439613526570046e-05,
"loss": 1.8696,
"step": 102100
},
{
"epoch": 0.65,
"learning_rate": 1.740740740740741e-05,
"loss": 1.8914,
"step": 102200
},
{
"epoch": 0.65,
"learning_rate": 1.7375201288244765e-05,
"loss": 1.898,
"step": 102300
},
{
"epoch": 0.66,
"learning_rate": 1.7342995169082128e-05,
"loss": 1.8974,
"step": 102400
},
{
"epoch": 0.66,
"learning_rate": 1.7310789049919484e-05,
"loss": 1.8831,
"step": 102500
},
{
"epoch": 0.66,
"learning_rate": 1.7278582930756847e-05,
"loss": 1.8751,
"step": 102600
},
{
"epoch": 0.66,
"learning_rate": 1.7246376811594203e-05,
"loss": 1.8887,
"step": 102700
},
{
"epoch": 0.66,
"learning_rate": 1.7214170692431565e-05,
"loss": 1.8679,
"step": 102800
},
{
"epoch": 0.66,
"learning_rate": 1.718196457326892e-05,
"loss": 1.8784,
"step": 102900
},
{
"epoch": 0.66,
"learning_rate": 1.714975845410628e-05,
"loss": 1.8804,
"step": 103000
},
{
"epoch": 0.66,
"learning_rate": 1.711755233494364e-05,
"loss": 1.8667,
"step": 103100
},
{
"epoch": 0.66,
"learning_rate": 1.7085346215781e-05,
"loss": 1.8982,
"step": 103200
},
{
"epoch": 0.66,
"learning_rate": 1.705314009661836e-05,
"loss": 1.8974,
"step": 103300
},
{
"epoch": 0.66,
"learning_rate": 1.7020933977455718e-05,
"loss": 1.8688,
"step": 103400
},
{
"epoch": 0.66,
"learning_rate": 1.6988727858293077e-05,
"loss": 1.8741,
"step": 103500
},
{
"epoch": 0.66,
"learning_rate": 1.6956521739130433e-05,
"loss": 1.877,
"step": 103600
},
{
"epoch": 0.66,
"learning_rate": 1.6924315619967796e-05,
"loss": 1.8999,
"step": 103700
},
{
"epoch": 0.66,
"learning_rate": 1.6892109500805152e-05,
"loss": 1.8684,
"step": 103800
},
{
"epoch": 0.66,
"learning_rate": 1.6859903381642515e-05,
"loss": 1.8986,
"step": 103900
},
{
"epoch": 0.67,
"learning_rate": 1.682769726247987e-05,
"loss": 1.8652,
"step": 104000
},
{
"epoch": 0.67,
"learning_rate": 1.6795491143317233e-05,
"loss": 1.8797,
"step": 104100
},
{
"epoch": 0.67,
"learning_rate": 1.676328502415459e-05,
"loss": 1.8826,
"step": 104200
},
{
"epoch": 0.67,
"learning_rate": 1.673107890499195e-05,
"loss": 1.8962,
"step": 104300
},
{
"epoch": 0.67,
"learning_rate": 1.6698872785829308e-05,
"loss": 1.8849,
"step": 104400
},
{
"epoch": 0.67,
"learning_rate": 1.6666666666666667e-05,
"loss": 1.8823,
"step": 104500
},
{
"epoch": 0.67,
"learning_rate": 1.6634460547504027e-05,
"loss": 1.8674,
"step": 104600
},
{
"epoch": 0.67,
"learning_rate": 1.6602254428341386e-05,
"loss": 1.8754,
"step": 104700
},
{
"epoch": 0.67,
"learning_rate": 1.6570048309178746e-05,
"loss": 1.8777,
"step": 104800
},
{
"epoch": 0.67,
"learning_rate": 1.6537842190016105e-05,
"loss": 1.8708,
"step": 104900
},
{
"epoch": 0.67,
"learning_rate": 1.6505636070853464e-05,
"loss": 1.8812,
"step": 105000
},
{
"epoch": 0.67,
"learning_rate": 1.6473429951690824e-05,
"loss": 1.8776,
"step": 105100
},
{
"epoch": 0.67,
"learning_rate": 1.644122383252818e-05,
"loss": 1.8881,
"step": 105200
},
{
"epoch": 0.67,
"learning_rate": 1.640901771336554e-05,
"loss": 1.8797,
"step": 105300
},
{
"epoch": 0.67,
"learning_rate": 1.6376811594202898e-05,
"loss": 1.8766,
"step": 105400
},
{
"epoch": 0.68,
"learning_rate": 1.6344605475040258e-05,
"loss": 1.9008,
"step": 105500
},
{
"epoch": 0.68,
"learning_rate": 1.6312399355877617e-05,
"loss": 1.8909,
"step": 105600
},
{
"epoch": 0.68,
"learning_rate": 1.6280193236714976e-05,
"loss": 1.8825,
"step": 105700
},
{
"epoch": 0.68,
"learning_rate": 1.6247987117552336e-05,
"loss": 1.8844,
"step": 105800
},
{
"epoch": 0.68,
"learning_rate": 1.6215780998389695e-05,
"loss": 1.874,
"step": 105900
},
{
"epoch": 0.68,
"learning_rate": 1.6183574879227054e-05,
"loss": 1.8821,
"step": 106000
},
{
"epoch": 0.68,
"learning_rate": 1.6151368760064414e-05,
"loss": 1.862,
"step": 106100
},
{
"epoch": 0.68,
"learning_rate": 1.6119162640901773e-05,
"loss": 1.9082,
"step": 106200
},
{
"epoch": 0.68,
"learning_rate": 1.608695652173913e-05,
"loss": 1.8748,
"step": 106300
},
{
"epoch": 0.68,
"learning_rate": 1.6054750402576492e-05,
"loss": 1.8727,
"step": 106400
},
{
"epoch": 0.68,
"learning_rate": 1.6022544283413848e-05,
"loss": 1.8799,
"step": 106500
},
{
"epoch": 0.68,
"learning_rate": 1.599033816425121e-05,
"loss": 1.8575,
"step": 106600
},
{
"epoch": 0.68,
"learning_rate": 1.5958132045088566e-05,
"loss": 1.874,
"step": 106700
},
{
"epoch": 0.68,
"learning_rate": 1.5925925925925926e-05,
"loss": 1.8752,
"step": 106800
},
{
"epoch": 0.68,
"learning_rate": 1.5893719806763285e-05,
"loss": 1.8671,
"step": 106900
},
{
"epoch": 0.68,
"learning_rate": 1.5861513687600644e-05,
"loss": 1.881,
"step": 107000
},
{
"epoch": 0.69,
"learning_rate": 1.5829307568438004e-05,
"loss": 1.8918,
"step": 107100
},
{
"epoch": 0.69,
"learning_rate": 1.5797101449275363e-05,
"loss": 1.8858,
"step": 107200
},
{
"epoch": 0.69,
"learning_rate": 1.5764895330112723e-05,
"loss": 1.8726,
"step": 107300
},
{
"epoch": 0.69,
"learning_rate": 1.573268921095008e-05,
"loss": 1.8502,
"step": 107400
},
{
"epoch": 0.69,
"learning_rate": 1.570048309178744e-05,
"loss": 1.8657,
"step": 107500
},
{
"epoch": 0.69,
"learning_rate": 1.5668276972624797e-05,
"loss": 1.87,
"step": 107600
},
{
"epoch": 0.69,
"learning_rate": 1.563607085346216e-05,
"loss": 1.8705,
"step": 107700
},
{
"epoch": 0.69,
"learning_rate": 1.5603864734299516e-05,
"loss": 1.8694,
"step": 107800
},
{
"epoch": 0.69,
"learning_rate": 1.557165861513688e-05,
"loss": 1.8837,
"step": 107900
},
{
"epoch": 0.69,
"learning_rate": 1.5539452495974235e-05,
"loss": 1.8929,
"step": 108000
},
{
"epoch": 0.69,
"learning_rate": 1.5507246376811597e-05,
"loss": 1.8718,
"step": 108100
},
{
"epoch": 0.69,
"learning_rate": 1.5475040257648953e-05,
"loss": 1.8925,
"step": 108200
},
{
"epoch": 0.69,
"learning_rate": 1.5442834138486313e-05,
"loss": 1.8771,
"step": 108300
},
{
"epoch": 0.69,
"learning_rate": 1.5410628019323672e-05,
"loss": 1.8856,
"step": 108400
},
{
"epoch": 0.69,
"learning_rate": 1.537842190016103e-05,
"loss": 1.8683,
"step": 108500
},
{
"epoch": 0.7,
"learning_rate": 1.534621578099839e-05,
"loss": 1.8639,
"step": 108600
},
{
"epoch": 0.7,
"learning_rate": 1.531400966183575e-05,
"loss": 1.8686,
"step": 108700
},
{
"epoch": 0.7,
"learning_rate": 1.528180354267311e-05,
"loss": 1.8933,
"step": 108800
},
{
"epoch": 0.7,
"learning_rate": 1.5249597423510467e-05,
"loss": 1.8758,
"step": 108900
},
{
"epoch": 0.7,
"learning_rate": 1.5217391304347828e-05,
"loss": 1.8854,
"step": 109000
},
{
"epoch": 0.7,
"learning_rate": 1.5185185185185186e-05,
"loss": 1.8697,
"step": 109100
},
{
"epoch": 0.7,
"learning_rate": 1.5152979066022547e-05,
"loss": 1.858,
"step": 109200
},
{
"epoch": 0.7,
"learning_rate": 1.5120772946859905e-05,
"loss": 1.8565,
"step": 109300
},
{
"epoch": 0.7,
"learning_rate": 1.5088566827697262e-05,
"loss": 1.8897,
"step": 109400
},
{
"epoch": 0.7,
"learning_rate": 1.5056360708534623e-05,
"loss": 1.8794,
"step": 109500
},
{
"epoch": 0.7,
"learning_rate": 1.5024154589371981e-05,
"loss": 1.8757,
"step": 109600
},
{
"epoch": 0.7,
"learning_rate": 1.499194847020934e-05,
"loss": 1.859,
"step": 109700
},
{
"epoch": 0.7,
"learning_rate": 1.49597423510467e-05,
"loss": 1.8881,
"step": 109800
},
{
"epoch": 0.7,
"learning_rate": 1.4927536231884059e-05,
"loss": 1.8823,
"step": 109900
},
{
"epoch": 0.7,
"learning_rate": 1.4895330112721417e-05,
"loss": 1.8779,
"step": 110000
},
{
"epoch": 0.7,
"learning_rate": 1.4863123993558778e-05,
"loss": 1.857,
"step": 110100
},
{
"epoch": 0.71,
"learning_rate": 1.4830917874396135e-05,
"loss": 1.882,
"step": 110200
},
{
"epoch": 0.71,
"learning_rate": 1.4798711755233496e-05,
"loss": 1.8779,
"step": 110300
},
{
"epoch": 0.71,
"learning_rate": 1.4766505636070854e-05,
"loss": 1.9022,
"step": 110400
},
{
"epoch": 0.71,
"learning_rate": 1.4734299516908212e-05,
"loss": 1.8655,
"step": 110500
},
{
"epoch": 0.71,
"learning_rate": 1.4702093397745573e-05,
"loss": 1.8488,
"step": 110600
},
{
"epoch": 0.71,
"learning_rate": 1.466988727858293e-05,
"loss": 1.8746,
"step": 110700
},
{
"epoch": 0.71,
"learning_rate": 1.4637681159420291e-05,
"loss": 1.8602,
"step": 110800
},
{
"epoch": 0.71,
"learning_rate": 1.4605475040257649e-05,
"loss": 1.8777,
"step": 110900
},
{
"epoch": 0.71,
"learning_rate": 1.457326892109501e-05,
"loss": 1.8818,
"step": 111000
},
{
"epoch": 0.71,
"learning_rate": 1.4541062801932368e-05,
"loss": 1.8555,
"step": 111100
},
{
"epoch": 0.71,
"learning_rate": 1.4508856682769729e-05,
"loss": 1.8671,
"step": 111200
},
{
"epoch": 0.71,
"learning_rate": 1.4476650563607086e-05,
"loss": 1.8809,
"step": 111300
},
{
"epoch": 0.71,
"learning_rate": 1.4444444444444444e-05,
"loss": 1.8522,
"step": 111400
},
{
"epoch": 0.71,
"learning_rate": 1.4412238325281805e-05,
"loss": 1.851,
"step": 111500
},
{
"epoch": 0.71,
"learning_rate": 1.4380032206119163e-05,
"loss": 1.8721,
"step": 111600
},
{
"epoch": 0.71,
"learning_rate": 1.4347826086956522e-05,
"loss": 1.8597,
"step": 111700
},
{
"epoch": 0.72,
"learning_rate": 1.4315619967793882e-05,
"loss": 1.861,
"step": 111800
},
{
"epoch": 0.72,
"learning_rate": 1.4283413848631241e-05,
"loss": 1.889,
"step": 111900
},
{
"epoch": 0.72,
"learning_rate": 1.4251207729468599e-05,
"loss": 1.8584,
"step": 112000
},
{
"epoch": 0.72,
"learning_rate": 1.421900161030596e-05,
"loss": 1.8798,
"step": 112100
},
{
"epoch": 0.72,
"learning_rate": 1.4186795491143317e-05,
"loss": 1.8903,
"step": 112200
},
{
"epoch": 0.72,
"learning_rate": 1.4154589371980678e-05,
"loss": 1.8675,
"step": 112300
},
{
"epoch": 0.72,
"learning_rate": 1.4122383252818036e-05,
"loss": 1.866,
"step": 112400
},
{
"epoch": 0.72,
"learning_rate": 1.4090177133655394e-05,
"loss": 1.8759,
"step": 112500
},
{
"epoch": 0.72,
"learning_rate": 1.4057971014492755e-05,
"loss": 1.8761,
"step": 112600
},
{
"epoch": 0.72,
"learning_rate": 1.4025764895330112e-05,
"loss": 1.8665,
"step": 112700
},
{
"epoch": 0.72,
"learning_rate": 1.3993558776167473e-05,
"loss": 1.8859,
"step": 112800
},
{
"epoch": 0.72,
"learning_rate": 1.3961352657004831e-05,
"loss": 1.8605,
"step": 112900
},
{
"epoch": 0.72,
"learning_rate": 1.3929146537842192e-05,
"loss": 1.8863,
"step": 113000
},
{
"epoch": 0.72,
"learning_rate": 1.389694041867955e-05,
"loss": 1.8675,
"step": 113100
},
{
"epoch": 0.72,
"learning_rate": 1.386473429951691e-05,
"loss": 1.9012,
"step": 113200
},
{
"epoch": 0.73,
"learning_rate": 1.3832528180354268e-05,
"loss": 1.8571,
"step": 113300
},
{
"epoch": 0.73,
"learning_rate": 1.3800322061191628e-05,
"loss": 1.8743,
"step": 113400
},
{
"epoch": 0.73,
"learning_rate": 1.3768115942028985e-05,
"loss": 1.887,
"step": 113500
},
{
"epoch": 0.73,
"learning_rate": 1.3735909822866345e-05,
"loss": 1.8691,
"step": 113600
},
{
"epoch": 0.73,
"learning_rate": 1.3703703703703704e-05,
"loss": 1.893,
"step": 113700
},
{
"epoch": 0.73,
"learning_rate": 1.3671497584541062e-05,
"loss": 1.8667,
"step": 113800
},
{
"epoch": 0.73,
"learning_rate": 1.3639291465378423e-05,
"loss": 1.8851,
"step": 113900
},
{
"epoch": 0.73,
"learning_rate": 1.360708534621578e-05,
"loss": 1.8485,
"step": 114000
},
{
"epoch": 0.73,
"learning_rate": 1.3574879227053142e-05,
"loss": 1.8767,
"step": 114100
},
{
"epoch": 0.73,
"learning_rate": 1.35426731078905e-05,
"loss": 1.8831,
"step": 114200
},
{
"epoch": 0.73,
"learning_rate": 1.351046698872786e-05,
"loss": 1.8654,
"step": 114300
},
{
"epoch": 0.73,
"learning_rate": 1.3478260869565218e-05,
"loss": 1.85,
"step": 114400
},
{
"epoch": 0.73,
"learning_rate": 1.3446054750402576e-05,
"loss": 1.875,
"step": 114500
},
{
"epoch": 0.73,
"learning_rate": 1.3413848631239937e-05,
"loss": 1.8946,
"step": 114600
},
{
"epoch": 0.73,
"learning_rate": 1.3381642512077294e-05,
"loss": 1.8832,
"step": 114700
},
{
"epoch": 0.73,
"learning_rate": 1.3349436392914655e-05,
"loss": 1.8925,
"step": 114800
},
{
"epoch": 0.74,
"learning_rate": 1.3317230273752013e-05,
"loss": 1.859,
"step": 114900
},
{
"epoch": 0.74,
"learning_rate": 1.3285024154589374e-05,
"loss": 1.8942,
"step": 115000
},
{
"epoch": 0.74,
"learning_rate": 1.3252818035426732e-05,
"loss": 1.8728,
"step": 115100
},
{
"epoch": 0.74,
"learning_rate": 1.3220611916264091e-05,
"loss": 1.8546,
"step": 115200
},
{
"epoch": 0.74,
"learning_rate": 1.318840579710145e-05,
"loss": 1.8924,
"step": 115300
},
{
"epoch": 0.74,
"learning_rate": 1.315619967793881e-05,
"loss": 1.8625,
"step": 115400
},
{
"epoch": 0.74,
"learning_rate": 1.3123993558776167e-05,
"loss": 1.8542,
"step": 115500
},
{
"epoch": 0.74,
"learning_rate": 1.3091787439613527e-05,
"loss": 1.8769,
"step": 115600
},
{
"epoch": 0.74,
"learning_rate": 1.3059581320450886e-05,
"loss": 1.8727,
"step": 115700
},
{
"epoch": 0.74,
"learning_rate": 1.3027375201288244e-05,
"loss": 1.878,
"step": 115800
},
{
"epoch": 0.74,
"learning_rate": 1.2995169082125605e-05,
"loss": 1.8539,
"step": 115900
},
{
"epoch": 0.74,
"learning_rate": 1.2962962962962962e-05,
"loss": 1.8889,
"step": 116000
},
{
"epoch": 0.74,
"learning_rate": 1.2930756843800324e-05,
"loss": 1.8814,
"step": 116100
},
{
"epoch": 0.74,
"learning_rate": 1.2898550724637681e-05,
"loss": 1.8855,
"step": 116200
},
{
"epoch": 0.74,
"learning_rate": 1.2866344605475042e-05,
"loss": 1.8708,
"step": 116300
},
{
"epoch": 0.74,
"learning_rate": 1.28341384863124e-05,
"loss": 1.8874,
"step": 116400
},
{
"epoch": 0.75,
"learning_rate": 1.2801932367149761e-05,
"loss": 1.8632,
"step": 116500
},
{
"epoch": 0.75,
"learning_rate": 1.2769726247987119e-05,
"loss": 1.8608,
"step": 116600
},
{
"epoch": 0.75,
"learning_rate": 1.2737520128824476e-05,
"loss": 1.8556,
"step": 116700
},
{
"epoch": 0.75,
"learning_rate": 1.2705314009661837e-05,
"loss": 1.879,
"step": 116800
},
{
"epoch": 0.75,
"learning_rate": 1.2673107890499195e-05,
"loss": 1.8805,
"step": 116900
},
{
"epoch": 0.75,
"learning_rate": 1.2640901771336556e-05,
"loss": 1.8711,
"step": 117000
},
{
"epoch": 0.75,
"learning_rate": 1.2608695652173914e-05,
"loss": 1.866,
"step": 117100
},
{
"epoch": 0.75,
"learning_rate": 1.2576489533011273e-05,
"loss": 1.8723,
"step": 117200
},
{
"epoch": 0.75,
"learning_rate": 1.254428341384863e-05,
"loss": 1.8753,
"step": 117300
},
{
"epoch": 0.75,
"learning_rate": 1.2512077294685992e-05,
"loss": 1.8631,
"step": 117400
},
{
"epoch": 0.75,
"learning_rate": 1.247987117552335e-05,
"loss": 1.8824,
"step": 117500
},
{
"epoch": 0.75,
"learning_rate": 1.2447665056360709e-05,
"loss": 1.8424,
"step": 117600
},
{
"epoch": 0.75,
"learning_rate": 1.2415458937198068e-05,
"loss": 1.8844,
"step": 117700
},
{
"epoch": 0.75,
"learning_rate": 1.2383252818035427e-05,
"loss": 1.8719,
"step": 117800
},
{
"epoch": 0.75,
"learning_rate": 1.2351046698872787e-05,
"loss": 1.8616,
"step": 117900
},
{
"epoch": 0.76,
"learning_rate": 1.2318840579710146e-05,
"loss": 1.8555,
"step": 118000
},
{
"epoch": 0.76,
"learning_rate": 1.2286634460547504e-05,
"loss": 1.8759,
"step": 118100
},
{
"epoch": 0.76,
"learning_rate": 1.2254428341384863e-05,
"loss": 1.8525,
"step": 118200
},
{
"epoch": 0.76,
"learning_rate": 1.2222222222222222e-05,
"loss": 1.8694,
"step": 118300
},
{
"epoch": 0.76,
"learning_rate": 1.2190016103059582e-05,
"loss": 1.8855,
"step": 118400
},
{
"epoch": 0.76,
"learning_rate": 1.2157809983896941e-05,
"loss": 1.8759,
"step": 118500
},
{
"epoch": 0.76,
"learning_rate": 1.21256038647343e-05,
"loss": 1.8727,
"step": 118600
},
{
"epoch": 0.76,
"learning_rate": 1.209339774557166e-05,
"loss": 1.8565,
"step": 118700
},
{
"epoch": 0.76,
"learning_rate": 1.206119162640902e-05,
"loss": 1.8819,
"step": 118800
},
{
"epoch": 0.76,
"learning_rate": 1.2028985507246379e-05,
"loss": 1.8392,
"step": 118900
},
{
"epoch": 0.76,
"learning_rate": 1.1996779388083736e-05,
"loss": 1.8494,
"step": 119000
},
{
"epoch": 0.76,
"learning_rate": 1.1964573268921096e-05,
"loss": 1.8753,
"step": 119100
},
{
"epoch": 0.76,
"learning_rate": 1.1932367149758455e-05,
"loss": 1.8711,
"step": 119200
},
{
"epoch": 0.76,
"learning_rate": 1.1900161030595813e-05,
"loss": 1.8789,
"step": 119300
},
{
"epoch": 0.76,
"learning_rate": 1.1867954911433172e-05,
"loss": 1.8725,
"step": 119400
},
{
"epoch": 0.76,
"learning_rate": 1.1835748792270531e-05,
"loss": 1.8955,
"step": 119500
},
{
"epoch": 0.77,
"learning_rate": 1.180354267310789e-05,
"loss": 1.8681,
"step": 119600
},
{
"epoch": 0.77,
"learning_rate": 1.177133655394525e-05,
"loss": 1.8533,
"step": 119700
},
{
"epoch": 0.77,
"learning_rate": 1.173913043478261e-05,
"loss": 1.8514,
"step": 119800
},
{
"epoch": 0.77,
"learning_rate": 1.1706924315619969e-05,
"loss": 1.8495,
"step": 119900
},
{
"epoch": 0.77,
"learning_rate": 1.1674718196457328e-05,
"loss": 1.8614,
"step": 120000
},
{
"epoch": 0.77,
"learning_rate": 1.1642512077294687e-05,
"loss": 1.872,
"step": 120100
},
{
"epoch": 0.77,
"learning_rate": 1.1610305958132045e-05,
"loss": 1.869,
"step": 120200
},
{
"epoch": 0.77,
"learning_rate": 1.1578099838969404e-05,
"loss": 1.8792,
"step": 120300
},
{
"epoch": 0.77,
"learning_rate": 1.1545893719806764e-05,
"loss": 1.8746,
"step": 120400
},
{
"epoch": 0.77,
"learning_rate": 1.1513687600644123e-05,
"loss": 1.8695,
"step": 120500
},
{
"epoch": 0.77,
"learning_rate": 1.1481481481481482e-05,
"loss": 1.844,
"step": 120600
},
{
"epoch": 0.77,
"learning_rate": 1.1449275362318842e-05,
"loss": 1.8638,
"step": 120700
},
{
"epoch": 0.77,
"learning_rate": 1.1417069243156201e-05,
"loss": 1.848,
"step": 120800
},
{
"epoch": 0.77,
"learning_rate": 1.1384863123993559e-05,
"loss": 1.8924,
"step": 120900
},
{
"epoch": 0.77,
"learning_rate": 1.1352657004830918e-05,
"loss": 1.8717,
"step": 121000
},
{
"epoch": 0.78,
"learning_rate": 1.1320450885668278e-05,
"loss": 1.874,
"step": 121100
},
{
"epoch": 0.78,
"learning_rate": 1.1288244766505635e-05,
"loss": 1.8773,
"step": 121200
},
{
"epoch": 0.78,
"learning_rate": 1.1256038647342995e-05,
"loss": 1.8815,
"step": 121300
},
{
"epoch": 0.78,
"learning_rate": 1.1223832528180354e-05,
"loss": 1.8848,
"step": 121400
},
{
"epoch": 0.78,
"learning_rate": 1.1191626409017713e-05,
"loss": 1.8805,
"step": 121500
},
{
"epoch": 0.78,
"learning_rate": 1.1159420289855073e-05,
"loss": 1.8814,
"step": 121600
},
{
"epoch": 0.78,
"learning_rate": 1.1127214170692432e-05,
"loss": 1.877,
"step": 121700
},
{
"epoch": 0.78,
"learning_rate": 1.1095008051529791e-05,
"loss": 1.8991,
"step": 121800
},
{
"epoch": 0.78,
"learning_rate": 1.106280193236715e-05,
"loss": 1.8779,
"step": 121900
},
{
"epoch": 0.78,
"learning_rate": 1.103059581320451e-05,
"loss": 1.858,
"step": 122000
},
{
"epoch": 0.78,
"learning_rate": 1.099838969404187e-05,
"loss": 1.8608,
"step": 122100
},
{
"epoch": 0.78,
"learning_rate": 1.0966183574879229e-05,
"loss": 1.8399,
"step": 122200
},
{
"epoch": 0.78,
"learning_rate": 1.0933977455716586e-05,
"loss": 1.8796,
"step": 122300
},
{
"epoch": 0.78,
"learning_rate": 1.0901771336553946e-05,
"loss": 1.875,
"step": 122400
},
{
"epoch": 0.78,
"learning_rate": 1.0869565217391305e-05,
"loss": 1.869,
"step": 122500
},
{
"epoch": 0.78,
"learning_rate": 1.0837359098228664e-05,
"loss": 1.8737,
"step": 122600
},
{
"epoch": 0.79,
"learning_rate": 1.0805152979066024e-05,
"loss": 1.8733,
"step": 122700
},
{
"epoch": 0.79,
"learning_rate": 1.0772946859903381e-05,
"loss": 1.8713,
"step": 122800
},
{
"epoch": 0.79,
"learning_rate": 1.074074074074074e-05,
"loss": 1.8625,
"step": 122900
},
{
"epoch": 0.79,
"learning_rate": 1.07085346215781e-05,
"loss": 1.8559,
"step": 123000
},
{
"epoch": 0.79,
"learning_rate": 1.067632850241546e-05,
"loss": 1.8654,
"step": 123100
},
{
"epoch": 0.79,
"learning_rate": 1.0644122383252819e-05,
"loss": 1.8811,
"step": 123200
},
{
"epoch": 0.79,
"learning_rate": 1.0611916264090177e-05,
"loss": 1.8594,
"step": 123300
},
{
"epoch": 0.79,
"learning_rate": 1.0579710144927536e-05,
"loss": 1.8852,
"step": 123400
},
{
"epoch": 0.79,
"learning_rate": 1.0547504025764895e-05,
"loss": 1.8869,
"step": 123500
},
{
"epoch": 0.79,
"learning_rate": 1.0515297906602255e-05,
"loss": 1.8553,
"step": 123600
},
{
"epoch": 0.79,
"learning_rate": 1.0483091787439614e-05,
"loss": 1.8916,
"step": 123700
},
{
"epoch": 0.79,
"learning_rate": 1.0450885668276973e-05,
"loss": 1.8658,
"step": 123800
},
{
"epoch": 0.79,
"learning_rate": 1.0418679549114333e-05,
"loss": 1.8734,
"step": 123900
},
{
"epoch": 0.79,
"learning_rate": 1.0386473429951692e-05,
"loss": 1.8587,
"step": 124000
},
{
"epoch": 0.79,
"learning_rate": 1.0354267310789051e-05,
"loss": 1.859,
"step": 124100
},
{
"epoch": 0.79,
"learning_rate": 1.032206119162641e-05,
"loss": 1.873,
"step": 124200
},
{
"epoch": 0.8,
"learning_rate": 1.0289855072463768e-05,
"loss": 1.8597,
"step": 124300
},
{
"epoch": 0.8,
"learning_rate": 1.0257648953301128e-05,
"loss": 1.881,
"step": 124400
},
{
"epoch": 0.8,
"learning_rate": 1.0225442834138487e-05,
"loss": 1.8559,
"step": 124500
},
{
"epoch": 0.8,
"learning_rate": 1.0193236714975846e-05,
"loss": 1.8764,
"step": 124600
},
{
"epoch": 0.8,
"learning_rate": 1.0161030595813204e-05,
"loss": 1.8976,
"step": 124700
},
{
"epoch": 0.8,
"learning_rate": 1.0128824476650563e-05,
"loss": 1.8746,
"step": 124800
},
{
"epoch": 0.8,
"learning_rate": 1.0096618357487923e-05,
"loss": 1.8705,
"step": 124900
},
{
"epoch": 0.8,
"learning_rate": 1.0064412238325282e-05,
"loss": 1.871,
"step": 125000
},
{
"epoch": 0.8,
"learning_rate": 1.0032206119162641e-05,
"loss": 1.8733,
"step": 125100
},
{
"epoch": 0.8,
"learning_rate": 1e-05,
"loss": 1.8693,
"step": 125200
},
{
"epoch": 0.8,
"learning_rate": 9.96779388083736e-06,
"loss": 1.8836,
"step": 125300
},
{
"epoch": 0.8,
"learning_rate": 9.935587761674718e-06,
"loss": 1.8678,
"step": 125400
},
{
"epoch": 0.8,
"learning_rate": 9.903381642512077e-06,
"loss": 1.8716,
"step": 125500
},
{
"epoch": 0.8,
"learning_rate": 9.871175523349437e-06,
"loss": 1.8658,
"step": 125600
},
{
"epoch": 0.8,
"learning_rate": 9.838969404186796e-06,
"loss": 1.868,
"step": 125700
},
{
"epoch": 0.81,
"learning_rate": 9.806763285024155e-06,
"loss": 1.8543,
"step": 125800
},
{
"epoch": 0.81,
"learning_rate": 9.774557165861515e-06,
"loss": 1.8762,
"step": 125900
},
{
"epoch": 0.81,
"learning_rate": 9.742351046698874e-06,
"loss": 1.8464,
"step": 126000
},
{
"epoch": 0.81,
"learning_rate": 9.710144927536233e-06,
"loss": 1.8661,
"step": 126100
},
{
"epoch": 0.81,
"learning_rate": 9.677938808373593e-06,
"loss": 1.8705,
"step": 126200
},
{
"epoch": 0.81,
"learning_rate": 9.645732689210952e-06,
"loss": 1.8629,
"step": 126300
},
{
"epoch": 0.81,
"learning_rate": 9.61352657004831e-06,
"loss": 1.8665,
"step": 126400
},
{
"epoch": 0.81,
"learning_rate": 9.581320450885669e-06,
"loss": 1.8478,
"step": 126500
},
{
"epoch": 0.81,
"learning_rate": 9.549114331723027e-06,
"loss": 1.879,
"step": 126600
},
{
"epoch": 0.81,
"learning_rate": 9.516908212560386e-06,
"loss": 1.8613,
"step": 126700
},
{
"epoch": 0.81,
"learning_rate": 9.484702093397745e-06,
"loss": 1.8865,
"step": 126800
},
{
"epoch": 0.81,
"learning_rate": 9.452495974235105e-06,
"loss": 1.8579,
"step": 126900
},
{
"epoch": 0.81,
"learning_rate": 9.420289855072464e-06,
"loss": 1.8605,
"step": 127000
},
{
"epoch": 0.81,
"learning_rate": 9.388083735909823e-06,
"loss": 1.8324,
"step": 127100
},
{
"epoch": 0.81,
"learning_rate": 9.355877616747183e-06,
"loss": 1.8882,
"step": 127200
},
{
"epoch": 0.81,
"learning_rate": 9.323671497584542e-06,
"loss": 1.8792,
"step": 127300
},
{
"epoch": 0.82,
"learning_rate": 9.2914653784219e-06,
"loss": 1.8831,
"step": 127400
},
{
"epoch": 0.82,
"learning_rate": 9.259259259259259e-06,
"loss": 1.8776,
"step": 127500
},
{
"epoch": 0.82,
"learning_rate": 9.227053140096618e-06,
"loss": 1.8919,
"step": 127600
},
{
"epoch": 0.82,
"learning_rate": 9.194847020933978e-06,
"loss": 1.8712,
"step": 127700
},
{
"epoch": 0.82,
"learning_rate": 9.162640901771337e-06,
"loss": 1.8534,
"step": 127800
},
{
"epoch": 0.82,
"learning_rate": 9.130434782608697e-06,
"loss": 1.8684,
"step": 127900
},
{
"epoch": 0.82,
"learning_rate": 9.098228663446056e-06,
"loss": 1.8507,
"step": 128000
},
{
"epoch": 0.82,
"learning_rate": 9.066022544283415e-06,
"loss": 1.8465,
"step": 128100
},
{
"epoch": 0.82,
"learning_rate": 9.033816425120775e-06,
"loss": 1.8728,
"step": 128200
},
{
"epoch": 0.82,
"learning_rate": 9.001610305958132e-06,
"loss": 1.8426,
"step": 128300
},
{
"epoch": 0.82,
"learning_rate": 8.969404186795492e-06,
"loss": 1.8706,
"step": 128400
},
{
"epoch": 0.82,
"learning_rate": 8.93719806763285e-06,
"loss": 1.8814,
"step": 128500
},
{
"epoch": 0.82,
"learning_rate": 8.904991948470209e-06,
"loss": 1.8581,
"step": 128600
},
{
"epoch": 0.82,
"learning_rate": 8.872785829307568e-06,
"loss": 1.8579,
"step": 128700
},
{
"epoch": 0.82,
"learning_rate": 8.840579710144927e-06,
"loss": 1.8535,
"step": 128800
},
{
"epoch": 0.82,
"learning_rate": 8.808373590982287e-06,
"loss": 1.8933,
"step": 128900
},
{
"epoch": 0.83,
"learning_rate": 8.776167471819646e-06,
"loss": 1.8781,
"step": 129000
},
{
"epoch": 0.83,
"learning_rate": 8.743961352657005e-06,
"loss": 1.8591,
"step": 129100
},
{
"epoch": 0.83,
"learning_rate": 8.711755233494365e-06,
"loss": 1.8522,
"step": 129200
},
{
"epoch": 0.83,
"learning_rate": 8.679549114331724e-06,
"loss": 1.8661,
"step": 129300
},
{
"epoch": 0.83,
"learning_rate": 8.647342995169083e-06,
"loss": 1.8815,
"step": 129400
},
{
"epoch": 0.83,
"learning_rate": 8.615136876006441e-06,
"loss": 1.8621,
"step": 129500
},
{
"epoch": 0.83,
"learning_rate": 8.5829307568438e-06,
"loss": 1.8787,
"step": 129600
},
{
"epoch": 0.83,
"learning_rate": 8.55072463768116e-06,
"loss": 1.874,
"step": 129700
},
{
"epoch": 0.83,
"learning_rate": 8.518518518518519e-06,
"loss": 1.888,
"step": 129800
},
{
"epoch": 0.83,
"learning_rate": 8.486312399355879e-06,
"loss": 1.8866,
"step": 129900
},
{
"epoch": 0.83,
"learning_rate": 8.454106280193238e-06,
"loss": 1.8538,
"step": 130000
},
{
"epoch": 0.83,
"learning_rate": 8.421900161030597e-06,
"loss": 1.8536,
"step": 130100
},
{
"epoch": 0.83,
"learning_rate": 8.389694041867955e-06,
"loss": 1.8731,
"step": 130200
},
{
"epoch": 0.83,
"learning_rate": 8.357487922705314e-06,
"loss": 1.8778,
"step": 130300
},
{
"epoch": 0.83,
"learning_rate": 8.325281803542674e-06,
"loss": 1.8464,
"step": 130400
},
{
"epoch": 0.84,
"learning_rate": 8.293075684380031e-06,
"loss": 1.8528,
"step": 130500
},
{
"epoch": 0.84,
"learning_rate": 8.26086956521739e-06,
"loss": 1.8429,
"step": 130600
},
{
"epoch": 0.84,
"learning_rate": 8.22866344605475e-06,
"loss": 1.8715,
"step": 130700
},
{
"epoch": 0.84,
"learning_rate": 8.19645732689211e-06,
"loss": 1.8466,
"step": 130800
},
{
"epoch": 0.84,
"learning_rate": 8.164251207729469e-06,
"loss": 1.8626,
"step": 130900
},
{
"epoch": 0.84,
"learning_rate": 8.132045088566828e-06,
"loss": 1.8812,
"step": 131000
},
{
"epoch": 0.84,
"learning_rate": 8.099838969404187e-06,
"loss": 1.87,
"step": 131100
},
{
"epoch": 0.84,
"learning_rate": 8.067632850241547e-06,
"loss": 1.8677,
"step": 131200
},
{
"epoch": 0.84,
"learning_rate": 8.035426731078906e-06,
"loss": 1.8648,
"step": 131300
},
{
"epoch": 0.84,
"learning_rate": 8.003220611916265e-06,
"loss": 1.8467,
"step": 131400
},
{
"epoch": 0.84,
"learning_rate": 7.971014492753623e-06,
"loss": 1.8472,
"step": 131500
},
{
"epoch": 0.84,
"learning_rate": 7.938808373590982e-06,
"loss": 1.8596,
"step": 131600
},
{
"epoch": 0.84,
"learning_rate": 7.906602254428342e-06,
"loss": 1.8721,
"step": 131700
},
{
"epoch": 0.84,
"learning_rate": 7.874396135265701e-06,
"loss": 1.8599,
"step": 131800
},
{
"epoch": 0.84,
"learning_rate": 7.84219001610306e-06,
"loss": 1.8792,
"step": 131900
},
{
"epoch": 0.84,
"learning_rate": 7.80998389694042e-06,
"loss": 1.872,
"step": 132000
},
{
"epoch": 0.85,
"learning_rate": 7.777777777777777e-06,
"loss": 1.8734,
"step": 132100
},
{
"epoch": 0.85,
"learning_rate": 7.745571658615137e-06,
"loss": 1.8606,
"step": 132200
},
{
"epoch": 0.85,
"learning_rate": 7.713365539452496e-06,
"loss": 1.8672,
"step": 132300
},
{
"epoch": 0.85,
"learning_rate": 7.681159420289856e-06,
"loss": 1.8599,
"step": 132400
},
{
"epoch": 0.85,
"learning_rate": 7.648953301127215e-06,
"loss": 1.8781,
"step": 132500
},
{
"epoch": 0.85,
"learning_rate": 7.616747181964573e-06,
"loss": 1.8802,
"step": 132600
},
{
"epoch": 0.85,
"learning_rate": 7.584541062801932e-06,
"loss": 1.8879,
"step": 132700
},
{
"epoch": 0.85,
"learning_rate": 7.552334943639291e-06,
"loss": 1.8912,
"step": 132800
},
{
"epoch": 0.85,
"learning_rate": 7.520128824476651e-06,
"loss": 1.8616,
"step": 132900
},
{
"epoch": 0.85,
"learning_rate": 7.48792270531401e-06,
"loss": 1.877,
"step": 133000
},
{
"epoch": 0.85,
"learning_rate": 7.455716586151369e-06,
"loss": 1.8549,
"step": 133100
},
{
"epoch": 0.85,
"learning_rate": 7.423510466988729e-06,
"loss": 1.8769,
"step": 133200
},
{
"epoch": 0.85,
"learning_rate": 7.391304347826088e-06,
"loss": 1.8706,
"step": 133300
},
{
"epoch": 0.85,
"learning_rate": 7.3590982286634465e-06,
"loss": 1.8514,
"step": 133400
},
{
"epoch": 0.85,
"learning_rate": 7.326892109500806e-06,
"loss": 1.8872,
"step": 133500
},
{
"epoch": 0.86,
"learning_rate": 7.294685990338164e-06,
"loss": 1.863,
"step": 133600
},
{
"epoch": 0.86,
"learning_rate": 7.262479871175523e-06,
"loss": 1.8497,
"step": 133700
},
{
"epoch": 0.86,
"learning_rate": 7.230273752012882e-06,
"loss": 1.876,
"step": 133800
},
{
"epoch": 0.86,
"learning_rate": 7.1980676328502416e-06,
"loss": 1.8555,
"step": 133900
},
{
"epoch": 0.86,
"learning_rate": 7.165861513687601e-06,
"loss": 1.8467,
"step": 134000
},
{
"epoch": 0.86,
"learning_rate": 7.13365539452496e-06,
"loss": 1.8577,
"step": 134100
},
{
"epoch": 0.86,
"learning_rate": 7.10144927536232e-06,
"loss": 1.8537,
"step": 134200
},
{
"epoch": 0.86,
"learning_rate": 7.069243156199679e-06,
"loss": 1.8419,
"step": 134300
},
{
"epoch": 0.86,
"learning_rate": 7.0370370370370375e-06,
"loss": 1.8553,
"step": 134400
},
{
"epoch": 0.86,
"learning_rate": 7.004830917874397e-06,
"loss": 1.8634,
"step": 134500
},
{
"epoch": 0.86,
"learning_rate": 6.9726247987117545e-06,
"loss": 1.8583,
"step": 134600
},
{
"epoch": 0.86,
"learning_rate": 6.940418679549114e-06,
"loss": 1.8682,
"step": 134700
},
{
"epoch": 0.86,
"learning_rate": 6.908212560386473e-06,
"loss": 1.8741,
"step": 134800
},
{
"epoch": 0.86,
"learning_rate": 6.8760064412238326e-06,
"loss": 1.8493,
"step": 134900
},
{
"epoch": 0.86,
"learning_rate": 6.843800322061192e-06,
"loss": 1.8632,
"step": 135000
},
{
"epoch": 0.86,
"learning_rate": 6.811594202898551e-06,
"loss": 1.8571,
"step": 135100
},
{
"epoch": 0.87,
"learning_rate": 6.779388083735911e-06,
"loss": 1.8628,
"step": 135200
},
{
"epoch": 0.87,
"learning_rate": 6.747181964573269e-06,
"loss": 1.863,
"step": 135300
},
{
"epoch": 0.87,
"learning_rate": 6.7149758454106285e-06,
"loss": 1.8652,
"step": 135400
},
{
"epoch": 0.87,
"learning_rate": 6.682769726247988e-06,
"loss": 1.8446,
"step": 135500
},
{
"epoch": 0.87,
"learning_rate": 6.650563607085347e-06,
"loss": 1.8599,
"step": 135600
},
{
"epoch": 0.87,
"learning_rate": 6.618357487922705e-06,
"loss": 1.8741,
"step": 135700
},
{
"epoch": 0.87,
"learning_rate": 6.586151368760064e-06,
"loss": 1.866,
"step": 135800
},
{
"epoch": 0.87,
"learning_rate": 6.5539452495974235e-06,
"loss": 1.8558,
"step": 135900
},
{
"epoch": 0.87,
"learning_rate": 6.521739130434783e-06,
"loss": 1.8609,
"step": 136000
},
{
"epoch": 0.87,
"learning_rate": 6.489533011272142e-06,
"loss": 1.8657,
"step": 136100
},
{
"epoch": 0.87,
"learning_rate": 6.457326892109502e-06,
"loss": 1.8457,
"step": 136200
},
{
"epoch": 0.87,
"learning_rate": 6.42512077294686e-06,
"loss": 1.8566,
"step": 136300
},
{
"epoch": 0.87,
"learning_rate": 6.3929146537842194e-06,
"loss": 1.8413,
"step": 136400
},
{
"epoch": 0.87,
"learning_rate": 6.360708534621579e-06,
"loss": 1.852,
"step": 136500
},
{
"epoch": 0.87,
"learning_rate": 6.328502415458938e-06,
"loss": 1.8638,
"step": 136600
},
{
"epoch": 0.87,
"learning_rate": 6.296296296296296e-06,
"loss": 1.8559,
"step": 136700
},
{
"epoch": 0.88,
"learning_rate": 6.264090177133655e-06,
"loss": 1.875,
"step": 136800
},
{
"epoch": 0.88,
"learning_rate": 6.231884057971015e-06,
"loss": 1.8751,
"step": 136900
},
{
"epoch": 0.88,
"learning_rate": 6.199677938808374e-06,
"loss": 1.8453,
"step": 137000
},
{
"epoch": 0.88,
"learning_rate": 6.167471819645733e-06,
"loss": 1.8475,
"step": 137100
},
{
"epoch": 0.88,
"learning_rate": 6.135265700483092e-06,
"loss": 1.833,
"step": 137200
},
{
"epoch": 0.88,
"learning_rate": 6.103059581320451e-06,
"loss": 1.8518,
"step": 137300
},
{
"epoch": 0.88,
"learning_rate": 6.0708534621578104e-06,
"loss": 1.8729,
"step": 137400
},
{
"epoch": 0.88,
"learning_rate": 6.038647342995169e-06,
"loss": 1.8695,
"step": 137500
},
{
"epoch": 0.88,
"learning_rate": 6.006441223832528e-06,
"loss": 1.8789,
"step": 137600
},
{
"epoch": 0.88,
"learning_rate": 5.974235104669888e-06,
"loss": 1.8568,
"step": 137700
},
{
"epoch": 0.88,
"learning_rate": 5.942028985507247e-06,
"loss": 1.8662,
"step": 137800
},
{
"epoch": 0.88,
"learning_rate": 5.909822866344606e-06,
"loss": 1.8544,
"step": 137900
},
{
"epoch": 0.88,
"learning_rate": 5.877616747181965e-06,
"loss": 1.8544,
"step": 138000
},
{
"epoch": 0.88,
"learning_rate": 5.845410628019324e-06,
"loss": 1.8703,
"step": 138100
},
{
"epoch": 0.88,
"learning_rate": 5.813204508856683e-06,
"loss": 1.8406,
"step": 138200
},
{
"epoch": 0.89,
"learning_rate": 5.780998389694042e-06,
"loss": 1.8843,
"step": 138300
},
{
"epoch": 0.89,
"learning_rate": 5.748792270531401e-06,
"loss": 1.8523,
"step": 138400
},
{
"epoch": 0.89,
"learning_rate": 5.71658615136876e-06,
"loss": 1.8554,
"step": 138500
},
{
"epoch": 0.89,
"learning_rate": 5.684380032206119e-06,
"loss": 1.8774,
"step": 138600
},
{
"epoch": 0.89,
"learning_rate": 5.652173913043479e-06,
"loss": 1.8735,
"step": 138700
},
{
"epoch": 0.89,
"learning_rate": 5.619967793880838e-06,
"loss": 1.877,
"step": 138800
},
{
"epoch": 0.89,
"learning_rate": 5.587761674718197e-06,
"loss": 1.83,
"step": 138900
},
{
"epoch": 0.89,
"learning_rate": 5.555555555555556e-06,
"loss": 1.8605,
"step": 139000
},
{
"epoch": 0.89,
"learning_rate": 5.523349436392914e-06,
"loss": 1.8651,
"step": 139100
},
{
"epoch": 0.89,
"learning_rate": 5.491143317230274e-06,
"loss": 1.8629,
"step": 139200
},
{
"epoch": 0.89,
"learning_rate": 5.458937198067633e-06,
"loss": 1.8484,
"step": 139300
},
{
"epoch": 0.89,
"learning_rate": 5.426731078904992e-06,
"loss": 1.8619,
"step": 139400
},
{
"epoch": 0.89,
"learning_rate": 5.394524959742351e-06,
"loss": 1.8301,
"step": 139500
},
{
"epoch": 0.89,
"learning_rate": 5.36231884057971e-06,
"loss": 1.8655,
"step": 139600
},
{
"epoch": 0.89,
"learning_rate": 5.33011272141707e-06,
"loss": 1.8656,
"step": 139700
},
{
"epoch": 0.89,
"learning_rate": 5.297906602254429e-06,
"loss": 1.8505,
"step": 139800
},
{
"epoch": 0.9,
"learning_rate": 5.265700483091788e-06,
"loss": 1.8538,
"step": 139900
},
{
"epoch": 0.9,
"learning_rate": 5.233494363929147e-06,
"loss": 1.8717,
"step": 140000
},
{
"epoch": 0.9,
"learning_rate": 5.201288244766505e-06,
"loss": 1.8672,
"step": 140100
},
{
"epoch": 0.9,
"learning_rate": 5.169082125603865e-06,
"loss": 1.8685,
"step": 140200
},
{
"epoch": 0.9,
"learning_rate": 5.136876006441224e-06,
"loss": 1.8593,
"step": 140300
},
{
"epoch": 0.9,
"learning_rate": 5.104669887278583e-06,
"loss": 1.8365,
"step": 140400
},
{
"epoch": 0.9,
"learning_rate": 5.072463768115943e-06,
"loss": 1.849,
"step": 140500
},
{
"epoch": 0.9,
"learning_rate": 5.040257648953301e-06,
"loss": 1.8847,
"step": 140600
},
{
"epoch": 0.9,
"learning_rate": 5.0080515297906606e-06,
"loss": 1.8667,
"step": 140700
},
{
"epoch": 0.9,
"learning_rate": 4.97584541062802e-06,
"loss": 1.8617,
"step": 140800
},
{
"epoch": 0.9,
"learning_rate": 4.9436392914653784e-06,
"loss": 1.8508,
"step": 140900
},
{
"epoch": 0.9,
"learning_rate": 4.911433172302738e-06,
"loss": 1.8538,
"step": 141000
},
{
"epoch": 0.9,
"learning_rate": 4.879227053140096e-06,
"loss": 1.8657,
"step": 141100
},
{
"epoch": 0.9,
"learning_rate": 4.847020933977456e-06,
"loss": 1.8578,
"step": 141200
},
{
"epoch": 0.9,
"learning_rate": 4.814814814814815e-06,
"loss": 1.8351,
"step": 141300
},
{
"epoch": 0.9,
"learning_rate": 4.782608695652174e-06,
"loss": 1.8783,
"step": 141400
},
{
"epoch": 0.91,
"learning_rate": 4.750402576489534e-06,
"loss": 1.8586,
"step": 141500
},
{
"epoch": 0.91,
"learning_rate": 4.718196457326892e-06,
"loss": 1.8587,
"step": 141600
},
{
"epoch": 0.91,
"learning_rate": 4.6859903381642516e-06,
"loss": 1.8641,
"step": 141700
},
{
"epoch": 0.91,
"learning_rate": 4.653784219001611e-06,
"loss": 1.8618,
"step": 141800
},
{
"epoch": 0.91,
"learning_rate": 4.621578099838969e-06,
"loss": 1.8707,
"step": 141900
},
{
"epoch": 0.91,
"learning_rate": 4.589371980676329e-06,
"loss": 1.8886,
"step": 142000
},
{
"epoch": 0.91,
"learning_rate": 4.557165861513687e-06,
"loss": 1.882,
"step": 142100
},
{
"epoch": 0.91,
"learning_rate": 4.524959742351047e-06,
"loss": 1.8406,
"step": 142200
},
{
"epoch": 0.91,
"learning_rate": 4.492753623188406e-06,
"loss": 1.8614,
"step": 142300
},
{
"epoch": 0.91,
"learning_rate": 4.460547504025765e-06,
"loss": 1.8619,
"step": 142400
},
{
"epoch": 0.91,
"learning_rate": 4.428341384863125e-06,
"loss": 1.8528,
"step": 142500
},
{
"epoch": 0.91,
"learning_rate": 4.396135265700483e-06,
"loss": 1.8537,
"step": 142600
},
{
"epoch": 0.91,
"learning_rate": 4.3639291465378425e-06,
"loss": 1.8736,
"step": 142700
},
{
"epoch": 0.91,
"learning_rate": 4.331723027375201e-06,
"loss": 1.8506,
"step": 142800
},
{
"epoch": 0.91,
"learning_rate": 4.29951690821256e-06,
"loss": 1.8726,
"step": 142900
},
{
"epoch": 0.92,
"learning_rate": 4.26731078904992e-06,
"loss": 1.8658,
"step": 143000
},
{
"epoch": 0.92,
"learning_rate": 4.235104669887278e-06,
"loss": 1.8612,
"step": 143100
},
{
"epoch": 0.92,
"learning_rate": 4.202898550724638e-06,
"loss": 1.8722,
"step": 143200
},
{
"epoch": 0.92,
"learning_rate": 4.170692431561997e-06,
"loss": 1.879,
"step": 143300
},
{
"epoch": 0.92,
"learning_rate": 4.138486312399356e-06,
"loss": 1.8578,
"step": 143400
},
{
"epoch": 0.92,
"learning_rate": 4.106280193236716e-06,
"loss": 1.8785,
"step": 143500
},
{
"epoch": 0.92,
"learning_rate": 4.074074074074075e-06,
"loss": 1.8672,
"step": 143600
},
{
"epoch": 0.92,
"learning_rate": 4.0418679549114335e-06,
"loss": 1.8607,
"step": 143700
},
{
"epoch": 0.92,
"learning_rate": 4.009661835748792e-06,
"loss": 1.8724,
"step": 143800
},
{
"epoch": 0.92,
"learning_rate": 3.977455716586151e-06,
"loss": 1.8508,
"step": 143900
},
{
"epoch": 0.92,
"learning_rate": 3.945249597423511e-06,
"loss": 1.838,
"step": 144000
},
{
"epoch": 0.92,
"learning_rate": 3.91304347826087e-06,
"loss": 1.8567,
"step": 144100
},
{
"epoch": 0.92,
"learning_rate": 3.8808373590982286e-06,
"loss": 1.8621,
"step": 144200
},
{
"epoch": 0.92,
"learning_rate": 3.848631239935588e-06,
"loss": 1.8438,
"step": 144300
},
{
"epoch": 0.92,
"learning_rate": 3.816425120772947e-06,
"loss": 1.8511,
"step": 144400
},
{
"epoch": 0.92,
"learning_rate": 3.784219001610306e-06,
"loss": 1.866,
"step": 144500
},
{
"epoch": 0.93,
"learning_rate": 3.7520128824476656e-06,
"loss": 1.8655,
"step": 144600
},
{
"epoch": 0.93,
"learning_rate": 3.719806763285024e-06,
"loss": 1.8346,
"step": 144700
},
{
"epoch": 0.93,
"learning_rate": 3.6876006441223834e-06,
"loss": 1.8462,
"step": 144800
},
{
"epoch": 0.93,
"learning_rate": 3.6553945249597423e-06,
"loss": 1.8417,
"step": 144900
},
{
"epoch": 0.93,
"learning_rate": 3.6231884057971017e-06,
"loss": 1.8535,
"step": 145000
},
{
"epoch": 0.93,
"learning_rate": 3.590982286634461e-06,
"loss": 1.8694,
"step": 145100
},
{
"epoch": 0.93,
"learning_rate": 3.5587761674718196e-06,
"loss": 1.8628,
"step": 145200
},
{
"epoch": 0.93,
"learning_rate": 3.526570048309179e-06,
"loss": 1.8552,
"step": 145300
},
{
"epoch": 0.93,
"learning_rate": 3.494363929146538e-06,
"loss": 1.8669,
"step": 145400
},
{
"epoch": 0.93,
"learning_rate": 3.462157809983897e-06,
"loss": 1.8525,
"step": 145500
},
{
"epoch": 0.93,
"learning_rate": 3.4299516908212565e-06,
"loss": 1.8538,
"step": 145600
},
{
"epoch": 0.93,
"learning_rate": 3.397745571658615e-06,
"loss": 1.8447,
"step": 145700
},
{
"epoch": 0.93,
"learning_rate": 3.3655394524959744e-06,
"loss": 1.8647,
"step": 145800
},
{
"epoch": 0.93,
"learning_rate": 3.3333333333333333e-06,
"loss": 1.8514,
"step": 145900
},
{
"epoch": 0.93,
"learning_rate": 3.3011272141706927e-06,
"loss": 1.8571,
"step": 146000
},
{
"epoch": 0.94,
"learning_rate": 3.268921095008052e-06,
"loss": 1.822,
"step": 146100
},
{
"epoch": 0.94,
"learning_rate": 3.2367149758454105e-06,
"loss": 1.8409,
"step": 146200
},
{
"epoch": 0.94,
"learning_rate": 3.20450885668277e-06,
"loss": 1.8446,
"step": 146300
},
{
"epoch": 0.94,
"learning_rate": 3.172302737520129e-06,
"loss": 1.8733,
"step": 146400
},
{
"epoch": 0.94,
"learning_rate": 3.140096618357488e-06,
"loss": 1.8418,
"step": 146500
},
{
"epoch": 0.94,
"learning_rate": 3.107890499194847e-06,
"loss": 1.8353,
"step": 146600
},
{
"epoch": 0.94,
"learning_rate": 3.0756843800322064e-06,
"loss": 1.875,
"step": 146700
},
{
"epoch": 0.94,
"learning_rate": 3.0434782608695654e-06,
"loss": 1.8653,
"step": 146800
},
{
"epoch": 0.94,
"learning_rate": 3.0112721417069243e-06,
"loss": 1.8674,
"step": 146900
},
{
"epoch": 0.94,
"learning_rate": 2.9790660225442837e-06,
"loss": 1.854,
"step": 147000
},
{
"epoch": 0.94,
"learning_rate": 2.9468599033816426e-06,
"loss": 1.8452,
"step": 147100
},
{
"epoch": 0.94,
"learning_rate": 2.9146537842190015e-06,
"loss": 1.86,
"step": 147200
},
{
"epoch": 0.94,
"learning_rate": 2.882447665056361e-06,
"loss": 1.8674,
"step": 147300
},
{
"epoch": 0.94,
"learning_rate": 2.85024154589372e-06,
"loss": 1.862,
"step": 147400
},
{
"epoch": 0.94,
"learning_rate": 2.818035426731079e-06,
"loss": 1.8593,
"step": 147500
},
{
"epoch": 0.94,
"learning_rate": 2.7858293075684385e-06,
"loss": 1.8512,
"step": 147600
},
{
"epoch": 0.95,
"learning_rate": 2.753623188405797e-06,
"loss": 1.8513,
"step": 147700
},
{
"epoch": 0.95,
"learning_rate": 2.7214170692431564e-06,
"loss": 1.8473,
"step": 147800
},
{
"epoch": 0.95,
"learning_rate": 2.6892109500805153e-06,
"loss": 1.8322,
"step": 147900
},
{
"epoch": 0.95,
"learning_rate": 2.6570048309178746e-06,
"loss": 1.8566,
"step": 148000
},
{
"epoch": 0.95,
"learning_rate": 2.6247987117552336e-06,
"loss": 1.8589,
"step": 148100
},
{
"epoch": 0.95,
"learning_rate": 2.5925925925925925e-06,
"loss": 1.8523,
"step": 148200
},
{
"epoch": 0.95,
"learning_rate": 2.560386473429952e-06,
"loss": 1.8697,
"step": 148300
},
{
"epoch": 0.95,
"learning_rate": 2.5281803542673108e-06,
"loss": 1.8473,
"step": 148400
},
{
"epoch": 0.95,
"learning_rate": 2.49597423510467e-06,
"loss": 1.8483,
"step": 148500
},
{
"epoch": 0.95,
"learning_rate": 2.463768115942029e-06,
"loss": 1.8556,
"step": 148600
},
{
"epoch": 0.95,
"learning_rate": 2.431561996779388e-06,
"loss": 1.876,
"step": 148700
},
{
"epoch": 0.95,
"learning_rate": 2.3993558776167473e-06,
"loss": 1.8657,
"step": 148800
},
{
"epoch": 0.95,
"learning_rate": 2.3671497584541067e-06,
"loss": 1.8669,
"step": 148900
},
{
"epoch": 0.95,
"learning_rate": 2.3349436392914656e-06,
"loss": 1.8631,
"step": 149000
},
{
"epoch": 0.95,
"learning_rate": 2.3027375201288245e-06,
"loss": 1.8696,
"step": 149100
},
{
"epoch": 0.95,
"learning_rate": 2.2705314009661835e-06,
"loss": 1.8575,
"step": 149200
},
{
"epoch": 0.96,
"learning_rate": 2.238325281803543e-06,
"loss": 1.8714,
"step": 149300
},
{
"epoch": 0.96,
"learning_rate": 2.206119162640902e-06,
"loss": 1.8405,
"step": 149400
},
{
"epoch": 0.96,
"learning_rate": 2.173913043478261e-06,
"loss": 1.8574,
"step": 149500
},
{
"epoch": 0.96,
"learning_rate": 2.14170692431562e-06,
"loss": 1.8796,
"step": 149600
},
{
"epoch": 0.96,
"learning_rate": 2.109500805152979e-06,
"loss": 1.8597,
"step": 149700
},
{
"epoch": 0.96,
"learning_rate": 2.0772946859903383e-06,
"loss": 1.8593,
"step": 149800
},
{
"epoch": 0.96,
"learning_rate": 2.0450885668276977e-06,
"loss": 1.8484,
"step": 149900
},
{
"epoch": 0.96,
"learning_rate": 2.012882447665056e-06,
"loss": 1.8514,
"step": 150000
},
{
"epoch": 0.96,
"learning_rate": 1.9806763285024155e-06,
"loss": 1.8813,
"step": 150100
},
{
"epoch": 0.96,
"learning_rate": 1.9484702093397744e-06,
"loss": 1.8668,
"step": 150200
},
{
"epoch": 0.96,
"learning_rate": 1.916264090177134e-06,
"loss": 1.8756,
"step": 150300
},
{
"epoch": 0.96,
"learning_rate": 1.884057971014493e-06,
"loss": 1.8756,
"step": 150400
},
{
"epoch": 0.96,
"learning_rate": 1.8518518518518519e-06,
"loss": 1.8359,
"step": 150500
},
{
"epoch": 0.96,
"learning_rate": 1.819645732689211e-06,
"loss": 1.8652,
"step": 150600
},
{
"epoch": 0.96,
"learning_rate": 1.7874396135265704e-06,
"loss": 1.8556,
"step": 150700
},
{
"epoch": 0.97,
"learning_rate": 1.755233494363929e-06,
"loss": 1.87,
"step": 150800
},
{
"epoch": 0.97,
"learning_rate": 1.7230273752012884e-06,
"loss": 1.8698,
"step": 150900
},
{
"epoch": 0.97,
"learning_rate": 1.6908212560386474e-06,
"loss": 1.8636,
"step": 151000
},
{
"epoch": 0.97,
"learning_rate": 1.6586151368760065e-06,
"loss": 1.8749,
"step": 151100
},
{
"epoch": 0.97,
"learning_rate": 1.6264090177133659e-06,
"loss": 1.8565,
"step": 151200
},
{
"epoch": 0.97,
"learning_rate": 1.5942028985507246e-06,
"loss": 1.8518,
"step": 151300
},
{
"epoch": 0.97,
"learning_rate": 1.561996779388084e-06,
"loss": 1.8489,
"step": 151400
},
{
"epoch": 0.97,
"learning_rate": 1.5297906602254428e-06,
"loss": 1.8646,
"step": 151500
},
{
"epoch": 0.97,
"learning_rate": 1.497584541062802e-06,
"loss": 1.8478,
"step": 151600
},
{
"epoch": 0.97,
"learning_rate": 1.4653784219001611e-06,
"loss": 1.859,
"step": 151700
},
{
"epoch": 0.97,
"learning_rate": 1.4331723027375203e-06,
"loss": 1.873,
"step": 151800
},
{
"epoch": 0.97,
"learning_rate": 1.4009661835748794e-06,
"loss": 1.8466,
"step": 151900
},
{
"epoch": 0.97,
"learning_rate": 1.3687600644122383e-06,
"loss": 1.8665,
"step": 152000
},
{
"epoch": 0.97,
"learning_rate": 1.3365539452495975e-06,
"loss": 1.8401,
"step": 152100
},
{
"epoch": 0.97,
"learning_rate": 1.3043478260869564e-06,
"loss": 1.8357,
"step": 152200
},
{
"epoch": 0.97,
"learning_rate": 1.2721417069243158e-06,
"loss": 1.8579,
"step": 152300
},
{
"epoch": 0.98,
"learning_rate": 1.239935587761675e-06,
"loss": 1.8519,
"step": 152400
},
{
"epoch": 0.98,
"learning_rate": 1.2077294685990338e-06,
"loss": 1.8601,
"step": 152500
},
{
"epoch": 0.98,
"learning_rate": 1.175523349436393e-06,
"loss": 1.8628,
"step": 152600
},
{
"epoch": 0.98,
"learning_rate": 1.1433172302737521e-06,
"loss": 1.8564,
"step": 152700
},
{
"epoch": 0.98,
"learning_rate": 1.1111111111111112e-06,
"loss": 1.8709,
"step": 152800
},
{
"epoch": 0.98,
"learning_rate": 1.0789049919484702e-06,
"loss": 1.8571,
"step": 152900
},
{
"epoch": 0.98,
"learning_rate": 1.0466988727858293e-06,
"loss": 1.8486,
"step": 153000
},
{
"epoch": 0.98,
"learning_rate": 1.0144927536231885e-06,
"loss": 1.8716,
"step": 153100
},
{
"epoch": 0.98,
"learning_rate": 9.822866344605476e-07,
"loss": 1.8504,
"step": 153200
},
{
"epoch": 0.98,
"learning_rate": 9.500805152979066e-07,
"loss": 1.8729,
"step": 153300
},
{
"epoch": 0.98,
"learning_rate": 9.178743961352658e-07,
"loss": 1.8741,
"step": 153400
},
{
"epoch": 0.98,
"learning_rate": 8.856682769726248e-07,
"loss": 1.8547,
"step": 153500
},
{
"epoch": 0.98,
"learning_rate": 8.534621578099841e-07,
"loss": 1.8485,
"step": 153600
},
{
"epoch": 0.98,
"learning_rate": 8.212560386473431e-07,
"loss": 1.8678,
"step": 153700
},
{
"epoch": 0.98,
"learning_rate": 7.890499194847021e-07,
"loss": 1.8488,
"step": 153800
},
{
"epoch": 0.98,
"learning_rate": 7.568438003220612e-07,
"loss": 1.8587,
"step": 153900
},
{
"epoch": 0.99,
"learning_rate": 7.246376811594203e-07,
"loss": 1.8768,
"step": 154000
},
{
"epoch": 0.99,
"learning_rate": 6.924315619967794e-07,
"loss": 1.8763,
"step": 154100
},
{
"epoch": 0.99,
"learning_rate": 6.602254428341386e-07,
"loss": 1.8545,
"step": 154200
},
{
"epoch": 0.99,
"learning_rate": 6.280193236714976e-07,
"loss": 1.853,
"step": 154300
},
{
"epoch": 0.99,
"learning_rate": 5.958132045088568e-07,
"loss": 1.8739,
"step": 154400
},
{
"epoch": 0.99,
"learning_rate": 5.636070853462158e-07,
"loss": 1.8408,
"step": 154500
},
{
"epoch": 0.99,
"learning_rate": 5.314009661835749e-07,
"loss": 1.8615,
"step": 154600
},
{
"epoch": 0.99,
"learning_rate": 4.99194847020934e-07,
"loss": 1.8842,
"step": 154700
},
{
"epoch": 0.99,
"learning_rate": 4.6698872785829305e-07,
"loss": 1.8507,
"step": 154800
},
{
"epoch": 0.99,
"learning_rate": 4.347826086956522e-07,
"loss": 1.8427,
"step": 154900
},
{
"epoch": 0.99,
"learning_rate": 4.025764895330113e-07,
"loss": 1.8321,
"step": 155000
},
{
"epoch": 0.99,
"learning_rate": 3.703703703703704e-07,
"loss": 1.8553,
"step": 155100
},
{
"epoch": 0.99,
"learning_rate": 3.381642512077295e-07,
"loss": 1.8731,
"step": 155200
},
{
"epoch": 0.99,
"learning_rate": 3.0595813204508854e-07,
"loss": 1.8518,
"step": 155300
},
{
"epoch": 0.99,
"learning_rate": 2.737520128824477e-07,
"loss": 1.8545,
"step": 155400
},
{
"epoch": 1.0,
"learning_rate": 2.4154589371980677e-07,
"loss": 1.8801,
"step": 155500
},
{
"epoch": 1.0,
"learning_rate": 2.0933977455716585e-07,
"loss": 1.869,
"step": 155600
},
{
"epoch": 1.0,
"learning_rate": 1.7713365539452497e-07,
"loss": 1.8655,
"step": 155700
},
{
"epoch": 1.0,
"learning_rate": 1.4492753623188405e-07,
"loss": 1.854,
"step": 155800
},
{
"epoch": 1.0,
"learning_rate": 1.1272141706924317e-07,
"loss": 1.8411,
"step": 155900
},
{
"epoch": 1.0,
"learning_rate": 8.051529790660226e-08,
"loss": 1.856,
"step": 156000
}
],
"max_steps": 156250,
"num_train_epochs": 1,
"total_flos": 3.942090130452185e+18,
"trial_name": null,
"trial_params": null
}