yahoo_answers-t5-base-v1 / trainer_state.json
nreimers's picture
upload
2be2bae
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.7104,
"global_step": 111000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5e-06,
"loss": 18.5574,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 1e-05,
"loss": 14.5225,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 1.5e-05,
"loss": 9.9891,
"step": 300
},
{
"epoch": 0.0,
"learning_rate": 2e-05,
"loss": 8.0802,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 2.5e-05,
"loss": 6.8181,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 3e-05,
"loss": 5.7913,
"step": 600
},
{
"epoch": 0.0,
"learning_rate": 3.5e-05,
"loss": 5.0126,
"step": 700
},
{
"epoch": 0.01,
"learning_rate": 4e-05,
"loss": 4.4555,
"step": 800
},
{
"epoch": 0.01,
"learning_rate": 4.5e-05,
"loss": 4.204,
"step": 900
},
{
"epoch": 0.01,
"learning_rate": 5e-05,
"loss": 3.9823,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 4.996779388083736e-05,
"loss": 3.8565,
"step": 1100
},
{
"epoch": 0.01,
"learning_rate": 4.993558776167472e-05,
"loss": 3.7623,
"step": 1200
},
{
"epoch": 0.01,
"learning_rate": 4.990338164251208e-05,
"loss": 3.7156,
"step": 1300
},
{
"epoch": 0.01,
"learning_rate": 4.987117552334944e-05,
"loss": 3.7255,
"step": 1400
},
{
"epoch": 0.01,
"learning_rate": 4.98389694041868e-05,
"loss": 3.6762,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 4.980676328502415e-05,
"loss": 3.6646,
"step": 1600
},
{
"epoch": 0.01,
"learning_rate": 4.977455716586152e-05,
"loss": 3.6359,
"step": 1700
},
{
"epoch": 0.01,
"learning_rate": 4.974235104669888e-05,
"loss": 3.5838,
"step": 1800
},
{
"epoch": 0.01,
"learning_rate": 4.9710144927536237e-05,
"loss": 3.5486,
"step": 1900
},
{
"epoch": 0.01,
"learning_rate": 4.967793880837359e-05,
"loss": 3.552,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 4.964573268921095e-05,
"loss": 3.5218,
"step": 2100
},
{
"epoch": 0.01,
"learning_rate": 4.9613526570048315e-05,
"loss": 3.5091,
"step": 2200
},
{
"epoch": 0.01,
"learning_rate": 4.9581320450885674e-05,
"loss": 3.4777,
"step": 2300
},
{
"epoch": 0.02,
"learning_rate": 4.9549114331723027e-05,
"loss": 3.4543,
"step": 2400
},
{
"epoch": 0.02,
"learning_rate": 4.9516908212560386e-05,
"loss": 3.4746,
"step": 2500
},
{
"epoch": 0.02,
"learning_rate": 4.948470209339775e-05,
"loss": 3.4278,
"step": 2600
},
{
"epoch": 0.02,
"learning_rate": 4.9452495974235105e-05,
"loss": 3.4421,
"step": 2700
},
{
"epoch": 0.02,
"learning_rate": 4.9420289855072464e-05,
"loss": 3.4384,
"step": 2800
},
{
"epoch": 0.02,
"learning_rate": 4.938808373590982e-05,
"loss": 3.4251,
"step": 2900
},
{
"epoch": 0.02,
"learning_rate": 4.935587761674719e-05,
"loss": 3.4479,
"step": 3000
},
{
"epoch": 0.02,
"learning_rate": 4.932367149758454e-05,
"loss": 3.4268,
"step": 3100
},
{
"epoch": 0.02,
"learning_rate": 4.92914653784219e-05,
"loss": 3.4272,
"step": 3200
},
{
"epoch": 0.02,
"learning_rate": 4.925925925925926e-05,
"loss": 3.4173,
"step": 3300
},
{
"epoch": 0.02,
"learning_rate": 4.922705314009662e-05,
"loss": 3.3999,
"step": 3400
},
{
"epoch": 0.02,
"learning_rate": 4.919484702093398e-05,
"loss": 3.3821,
"step": 3500
},
{
"epoch": 0.02,
"learning_rate": 4.916264090177134e-05,
"loss": 3.3931,
"step": 3600
},
{
"epoch": 0.02,
"learning_rate": 4.91304347826087e-05,
"loss": 3.3668,
"step": 3700
},
{
"epoch": 0.02,
"learning_rate": 4.909822866344606e-05,
"loss": 3.3695,
"step": 3800
},
{
"epoch": 0.02,
"learning_rate": 4.906602254428342e-05,
"loss": 3.3364,
"step": 3900
},
{
"epoch": 0.03,
"learning_rate": 4.9033816425120776e-05,
"loss": 3.3646,
"step": 4000
},
{
"epoch": 0.03,
"learning_rate": 4.9001610305958136e-05,
"loss": 3.3401,
"step": 4100
},
{
"epoch": 0.03,
"learning_rate": 4.8969404186795495e-05,
"loss": 3.3406,
"step": 4200
},
{
"epoch": 0.03,
"learning_rate": 4.893719806763285e-05,
"loss": 3.3333,
"step": 4300
},
{
"epoch": 0.03,
"learning_rate": 4.8904991948470214e-05,
"loss": 3.3035,
"step": 4400
},
{
"epoch": 0.03,
"learning_rate": 4.887278582930757e-05,
"loss": 3.3023,
"step": 4500
},
{
"epoch": 0.03,
"learning_rate": 4.884057971014493e-05,
"loss": 3.3023,
"step": 4600
},
{
"epoch": 0.03,
"learning_rate": 4.8808373590982285e-05,
"loss": 3.283,
"step": 4700
},
{
"epoch": 0.03,
"learning_rate": 4.877616747181965e-05,
"loss": 3.3129,
"step": 4800
},
{
"epoch": 0.03,
"learning_rate": 4.874396135265701e-05,
"loss": 3.3093,
"step": 4900
},
{
"epoch": 0.03,
"learning_rate": 4.871175523349436e-05,
"loss": 3.3007,
"step": 5000
},
{
"epoch": 0.03,
"learning_rate": 4.867954911433172e-05,
"loss": 3.2911,
"step": 5100
},
{
"epoch": 0.03,
"learning_rate": 4.864734299516908e-05,
"loss": 3.2856,
"step": 5200
},
{
"epoch": 0.03,
"learning_rate": 4.861513687600645e-05,
"loss": 3.2684,
"step": 5300
},
{
"epoch": 0.03,
"learning_rate": 4.85829307568438e-05,
"loss": 3.2817,
"step": 5400
},
{
"epoch": 0.04,
"learning_rate": 4.855072463768116e-05,
"loss": 3.2531,
"step": 5500
},
{
"epoch": 0.04,
"learning_rate": 4.851851851851852e-05,
"loss": 3.2607,
"step": 5600
},
{
"epoch": 0.04,
"learning_rate": 4.8486312399355885e-05,
"loss": 3.2418,
"step": 5700
},
{
"epoch": 0.04,
"learning_rate": 4.845410628019324e-05,
"loss": 3.2464,
"step": 5800
},
{
"epoch": 0.04,
"learning_rate": 4.84219001610306e-05,
"loss": 3.2382,
"step": 5900
},
{
"epoch": 0.04,
"learning_rate": 4.8389694041867956e-05,
"loss": 3.2433,
"step": 6000
},
{
"epoch": 0.04,
"learning_rate": 4.8357487922705316e-05,
"loss": 3.2448,
"step": 6100
},
{
"epoch": 0.04,
"learning_rate": 4.8325281803542675e-05,
"loss": 3.2538,
"step": 6200
},
{
"epoch": 0.04,
"learning_rate": 4.8293075684380035e-05,
"loss": 3.2296,
"step": 6300
},
{
"epoch": 0.04,
"learning_rate": 4.8260869565217394e-05,
"loss": 3.2306,
"step": 6400
},
{
"epoch": 0.04,
"learning_rate": 4.822866344605475e-05,
"loss": 3.2351,
"step": 6500
},
{
"epoch": 0.04,
"learning_rate": 4.819645732689211e-05,
"loss": 3.2291,
"step": 6600
},
{
"epoch": 0.04,
"learning_rate": 4.816425120772947e-05,
"loss": 3.2215,
"step": 6700
},
{
"epoch": 0.04,
"learning_rate": 4.813204508856683e-05,
"loss": 3.1971,
"step": 6800
},
{
"epoch": 0.04,
"learning_rate": 4.809983896940419e-05,
"loss": 3.2214,
"step": 6900
},
{
"epoch": 0.04,
"learning_rate": 4.806763285024155e-05,
"loss": 3.2143,
"step": 7000
},
{
"epoch": 0.05,
"learning_rate": 4.803542673107891e-05,
"loss": 3.1983,
"step": 7100
},
{
"epoch": 0.05,
"learning_rate": 4.800322061191627e-05,
"loss": 3.185,
"step": 7200
},
{
"epoch": 0.05,
"learning_rate": 4.797101449275362e-05,
"loss": 3.2202,
"step": 7300
},
{
"epoch": 0.05,
"learning_rate": 4.793880837359098e-05,
"loss": 3.2021,
"step": 7400
},
{
"epoch": 0.05,
"learning_rate": 4.790660225442835e-05,
"loss": 3.1941,
"step": 7500
},
{
"epoch": 0.05,
"learning_rate": 4.7874396135265706e-05,
"loss": 3.1897,
"step": 7600
},
{
"epoch": 0.05,
"learning_rate": 4.784219001610306e-05,
"loss": 3.2499,
"step": 7700
},
{
"epoch": 0.05,
"learning_rate": 4.780998389694042e-05,
"loss": 3.2734,
"step": 7800
},
{
"epoch": 0.05,
"learning_rate": 4.7777777777777784e-05,
"loss": 3.2337,
"step": 7900
},
{
"epoch": 0.05,
"learning_rate": 4.7745571658615143e-05,
"loss": 3.2304,
"step": 8000
},
{
"epoch": 0.05,
"learning_rate": 4.7713365539452496e-05,
"loss": 3.2397,
"step": 8100
},
{
"epoch": 0.05,
"learning_rate": 4.7681159420289855e-05,
"loss": 3.249,
"step": 8200
},
{
"epoch": 0.05,
"learning_rate": 4.7648953301127215e-05,
"loss": 3.2328,
"step": 8300
},
{
"epoch": 0.05,
"learning_rate": 4.7616747181964574e-05,
"loss": 3.2263,
"step": 8400
},
{
"epoch": 0.05,
"learning_rate": 4.7584541062801933e-05,
"loss": 3.235,
"step": 8500
},
{
"epoch": 0.06,
"learning_rate": 4.755233494363929e-05,
"loss": 3.2242,
"step": 8600
},
{
"epoch": 0.06,
"learning_rate": 4.752012882447665e-05,
"loss": 3.2352,
"step": 8700
},
{
"epoch": 0.06,
"learning_rate": 4.748792270531401e-05,
"loss": 3.2354,
"step": 8800
},
{
"epoch": 0.06,
"learning_rate": 4.745571658615137e-05,
"loss": 3.2433,
"step": 8900
},
{
"epoch": 0.06,
"learning_rate": 4.742351046698873e-05,
"loss": 3.2147,
"step": 9000
},
{
"epoch": 0.06,
"learning_rate": 4.739130434782609e-05,
"loss": 3.2196,
"step": 9100
},
{
"epoch": 0.06,
"learning_rate": 4.735909822866345e-05,
"loss": 3.2019,
"step": 9200
},
{
"epoch": 0.06,
"learning_rate": 4.732689210950081e-05,
"loss": 3.2058,
"step": 9300
},
{
"epoch": 0.06,
"learning_rate": 4.729468599033817e-05,
"loss": 3.2295,
"step": 9400
},
{
"epoch": 0.06,
"learning_rate": 4.726247987117553e-05,
"loss": 3.2247,
"step": 9500
},
{
"epoch": 0.06,
"learning_rate": 4.723027375201288e-05,
"loss": 3.2097,
"step": 9600
},
{
"epoch": 0.06,
"learning_rate": 4.7198067632850246e-05,
"loss": 3.1852,
"step": 9700
},
{
"epoch": 0.06,
"learning_rate": 4.7165861513687605e-05,
"loss": 3.1796,
"step": 9800
},
{
"epoch": 0.06,
"learning_rate": 4.7133655394524964e-05,
"loss": 3.2026,
"step": 9900
},
{
"epoch": 0.06,
"learning_rate": 4.710144927536232e-05,
"loss": 3.183,
"step": 10000
},
{
"epoch": 0.06,
"learning_rate": 4.706924315619968e-05,
"loss": 3.2071,
"step": 10100
},
{
"epoch": 0.07,
"learning_rate": 4.703703703703704e-05,
"loss": 3.195,
"step": 10200
},
{
"epoch": 0.07,
"learning_rate": 4.7004830917874395e-05,
"loss": 3.2103,
"step": 10300
},
{
"epoch": 0.07,
"learning_rate": 4.6972624798711754e-05,
"loss": 3.1835,
"step": 10400
},
{
"epoch": 0.07,
"learning_rate": 4.6940418679549114e-05,
"loss": 3.1994,
"step": 10500
},
{
"epoch": 0.07,
"learning_rate": 4.690821256038648e-05,
"loss": 3.2085,
"step": 10600
},
{
"epoch": 0.07,
"learning_rate": 4.687600644122383e-05,
"loss": 3.1969,
"step": 10700
},
{
"epoch": 0.07,
"learning_rate": 4.684380032206119e-05,
"loss": 3.1746,
"step": 10800
},
{
"epoch": 0.07,
"learning_rate": 4.681159420289855e-05,
"loss": 3.1597,
"step": 10900
},
{
"epoch": 0.07,
"learning_rate": 4.677938808373592e-05,
"loss": 3.229,
"step": 11000
},
{
"epoch": 0.07,
"learning_rate": 4.674718196457327e-05,
"loss": 3.2663,
"step": 11100
},
{
"epoch": 0.07,
"learning_rate": 4.671497584541063e-05,
"loss": 3.2271,
"step": 11200
},
{
"epoch": 0.07,
"learning_rate": 4.668276972624799e-05,
"loss": 3.2445,
"step": 11300
},
{
"epoch": 0.07,
"learning_rate": 4.665056360708535e-05,
"loss": 3.1908,
"step": 11400
},
{
"epoch": 0.07,
"learning_rate": 4.661835748792271e-05,
"loss": 3.2358,
"step": 11500
},
{
"epoch": 0.07,
"learning_rate": 4.6586151368760067e-05,
"loss": 3.2506,
"step": 11600
},
{
"epoch": 0.07,
"learning_rate": 4.6553945249597426e-05,
"loss": 3.2306,
"step": 11700
},
{
"epoch": 0.08,
"learning_rate": 4.6521739130434785e-05,
"loss": 3.219,
"step": 11800
},
{
"epoch": 0.08,
"learning_rate": 4.6489533011272145e-05,
"loss": 3.2098,
"step": 11900
},
{
"epoch": 0.08,
"learning_rate": 4.6457326892109504e-05,
"loss": 3.2448,
"step": 12000
},
{
"epoch": 0.08,
"learning_rate": 4.642512077294686e-05,
"loss": 3.2451,
"step": 12100
},
{
"epoch": 0.08,
"learning_rate": 4.639291465378422e-05,
"loss": 3.2093,
"step": 12200
},
{
"epoch": 0.08,
"learning_rate": 4.636070853462158e-05,
"loss": 3.2084,
"step": 12300
},
{
"epoch": 0.08,
"learning_rate": 4.632850241545894e-05,
"loss": 3.2344,
"step": 12400
},
{
"epoch": 0.08,
"learning_rate": 4.62962962962963e-05,
"loss": 3.197,
"step": 12500
},
{
"epoch": 0.08,
"learning_rate": 4.626409017713365e-05,
"loss": 3.2215,
"step": 12600
},
{
"epoch": 0.08,
"learning_rate": 4.623188405797101e-05,
"loss": 3.2132,
"step": 12700
},
{
"epoch": 0.08,
"learning_rate": 4.619967793880838e-05,
"loss": 3.212,
"step": 12800
},
{
"epoch": 0.08,
"learning_rate": 4.616747181964574e-05,
"loss": 3.2082,
"step": 12900
},
{
"epoch": 0.08,
"learning_rate": 4.613526570048309e-05,
"loss": 3.18,
"step": 13000
},
{
"epoch": 0.08,
"learning_rate": 4.610305958132045e-05,
"loss": 3.2078,
"step": 13100
},
{
"epoch": 0.08,
"learning_rate": 4.6070853462157816e-05,
"loss": 3.2154,
"step": 13200
},
{
"epoch": 0.09,
"learning_rate": 4.6038647342995176e-05,
"loss": 3.1944,
"step": 13300
},
{
"epoch": 0.09,
"learning_rate": 4.600644122383253e-05,
"loss": 3.1892,
"step": 13400
},
{
"epoch": 0.09,
"learning_rate": 4.597423510466989e-05,
"loss": 3.2126,
"step": 13500
},
{
"epoch": 0.09,
"learning_rate": 4.594202898550725e-05,
"loss": 3.1812,
"step": 13600
},
{
"epoch": 0.09,
"learning_rate": 4.5909822866344606e-05,
"loss": 3.1947,
"step": 13700
},
{
"epoch": 0.09,
"learning_rate": 4.5877616747181966e-05,
"loss": 3.1581,
"step": 13800
},
{
"epoch": 0.09,
"learning_rate": 4.5845410628019325e-05,
"loss": 3.1775,
"step": 13900
},
{
"epoch": 0.09,
"learning_rate": 4.5813204508856684e-05,
"loss": 3.1925,
"step": 14000
},
{
"epoch": 0.09,
"learning_rate": 4.5780998389694044e-05,
"loss": 3.2015,
"step": 14100
},
{
"epoch": 0.09,
"learning_rate": 4.57487922705314e-05,
"loss": 3.2129,
"step": 14200
},
{
"epoch": 0.09,
"learning_rate": 4.571658615136876e-05,
"loss": 3.1679,
"step": 14300
},
{
"epoch": 0.09,
"learning_rate": 4.568438003220612e-05,
"loss": 3.1762,
"step": 14400
},
{
"epoch": 0.09,
"learning_rate": 4.565217391304348e-05,
"loss": 3.1758,
"step": 14500
},
{
"epoch": 0.09,
"learning_rate": 4.561996779388084e-05,
"loss": 3.1934,
"step": 14600
},
{
"epoch": 0.09,
"learning_rate": 4.55877616747182e-05,
"loss": 3.1869,
"step": 14700
},
{
"epoch": 0.09,
"learning_rate": 4.555555555555556e-05,
"loss": 3.1614,
"step": 14800
},
{
"epoch": 0.1,
"learning_rate": 4.552334943639291e-05,
"loss": 3.1731,
"step": 14900
},
{
"epoch": 0.1,
"learning_rate": 4.549114331723028e-05,
"loss": 3.1559,
"step": 15000
},
{
"epoch": 0.1,
"learning_rate": 4.545893719806764e-05,
"loss": 3.1907,
"step": 15100
},
{
"epoch": 0.1,
"learning_rate": 4.5426731078904997e-05,
"loss": 3.1697,
"step": 15200
},
{
"epoch": 0.1,
"learning_rate": 4.539452495974235e-05,
"loss": 3.1775,
"step": 15300
},
{
"epoch": 0.1,
"learning_rate": 4.5362318840579715e-05,
"loss": 3.1583,
"step": 15400
},
{
"epoch": 0.1,
"learning_rate": 4.5330112721417075e-05,
"loss": 3.1548,
"step": 15500
},
{
"epoch": 0.1,
"learning_rate": 4.5297906602254434e-05,
"loss": 3.1547,
"step": 15600
},
{
"epoch": 0.1,
"learning_rate": 4.5265700483091786e-05,
"loss": 3.1595,
"step": 15700
},
{
"epoch": 0.1,
"learning_rate": 4.5233494363929146e-05,
"loss": 3.1311,
"step": 15800
},
{
"epoch": 0.1,
"learning_rate": 4.520128824476651e-05,
"loss": 3.1494,
"step": 15900
},
{
"epoch": 0.1,
"learning_rate": 4.5169082125603865e-05,
"loss": 3.1271,
"step": 16000
},
{
"epoch": 0.1,
"learning_rate": 4.5136876006441224e-05,
"loss": 3.13,
"step": 16100
},
{
"epoch": 0.1,
"learning_rate": 4.510466988727858e-05,
"loss": 3.1193,
"step": 16200
},
{
"epoch": 0.1,
"learning_rate": 4.507246376811595e-05,
"loss": 3.1095,
"step": 16300
},
{
"epoch": 0.1,
"learning_rate": 4.50402576489533e-05,
"loss": 3.1219,
"step": 16400
},
{
"epoch": 0.11,
"learning_rate": 4.500805152979066e-05,
"loss": 3.1476,
"step": 16500
},
{
"epoch": 0.11,
"learning_rate": 4.497584541062802e-05,
"loss": 3.1189,
"step": 16600
},
{
"epoch": 0.11,
"learning_rate": 4.494363929146538e-05,
"loss": 3.1486,
"step": 16700
},
{
"epoch": 0.11,
"learning_rate": 4.491143317230274e-05,
"loss": 3.1355,
"step": 16800
},
{
"epoch": 0.11,
"learning_rate": 4.48792270531401e-05,
"loss": 3.1198,
"step": 16900
},
{
"epoch": 0.11,
"learning_rate": 4.484702093397746e-05,
"loss": 3.122,
"step": 17000
},
{
"epoch": 0.11,
"learning_rate": 4.481481481481482e-05,
"loss": 3.1181,
"step": 17100
},
{
"epoch": 0.11,
"learning_rate": 4.478260869565218e-05,
"loss": 3.1137,
"step": 17200
},
{
"epoch": 0.11,
"learning_rate": 4.4750402576489536e-05,
"loss": 3.1287,
"step": 17300
},
{
"epoch": 0.11,
"learning_rate": 4.4718196457326895e-05,
"loss": 3.1159,
"step": 17400
},
{
"epoch": 0.11,
"learning_rate": 4.4685990338164255e-05,
"loss": 3.0879,
"step": 17500
},
{
"epoch": 0.11,
"learning_rate": 4.465378421900161e-05,
"loss": 3.1141,
"step": 17600
},
{
"epoch": 0.11,
"learning_rate": 4.4621578099838974e-05,
"loss": 3.1248,
"step": 17700
},
{
"epoch": 0.11,
"learning_rate": 4.458937198067633e-05,
"loss": 3.1182,
"step": 17800
},
{
"epoch": 0.11,
"learning_rate": 4.455716586151369e-05,
"loss": 3.1048,
"step": 17900
},
{
"epoch": 0.12,
"learning_rate": 4.4524959742351045e-05,
"loss": 3.1278,
"step": 18000
},
{
"epoch": 0.12,
"learning_rate": 4.449275362318841e-05,
"loss": 3.0932,
"step": 18100
},
{
"epoch": 0.12,
"learning_rate": 4.446054750402577e-05,
"loss": 3.0845,
"step": 18200
},
{
"epoch": 0.12,
"learning_rate": 4.442834138486312e-05,
"loss": 3.0972,
"step": 18300
},
{
"epoch": 0.12,
"learning_rate": 4.439613526570048e-05,
"loss": 3.1083,
"step": 18400
},
{
"epoch": 0.12,
"learning_rate": 4.436392914653785e-05,
"loss": 3.1304,
"step": 18500
},
{
"epoch": 0.12,
"learning_rate": 4.433172302737521e-05,
"loss": 3.1046,
"step": 18600
},
{
"epoch": 0.12,
"learning_rate": 4.429951690821256e-05,
"loss": 3.0871,
"step": 18700
},
{
"epoch": 0.12,
"learning_rate": 4.426731078904992e-05,
"loss": 3.0369,
"step": 18800
},
{
"epoch": 0.12,
"learning_rate": 4.423510466988728e-05,
"loss": 3.039,
"step": 18900
},
{
"epoch": 0.12,
"learning_rate": 4.4202898550724645e-05,
"loss": 3.0397,
"step": 19000
},
{
"epoch": 0.12,
"learning_rate": 4.4170692431562e-05,
"loss": 3.0393,
"step": 19100
},
{
"epoch": 0.12,
"learning_rate": 4.413848631239936e-05,
"loss": 3.0536,
"step": 19200
},
{
"epoch": 0.12,
"learning_rate": 4.4106280193236716e-05,
"loss": 3.0447,
"step": 19300
},
{
"epoch": 0.12,
"learning_rate": 4.4074074074074076e-05,
"loss": 3.0601,
"step": 19400
},
{
"epoch": 0.12,
"learning_rate": 4.4041867954911435e-05,
"loss": 3.0573,
"step": 19500
},
{
"epoch": 0.13,
"learning_rate": 4.4009661835748794e-05,
"loss": 3.0309,
"step": 19600
},
{
"epoch": 0.13,
"learning_rate": 4.3977455716586154e-05,
"loss": 3.0176,
"step": 19700
},
{
"epoch": 0.13,
"learning_rate": 4.394524959742351e-05,
"loss": 3.0338,
"step": 19800
},
{
"epoch": 0.13,
"learning_rate": 4.391304347826087e-05,
"loss": 3.0341,
"step": 19900
},
{
"epoch": 0.13,
"learning_rate": 4.388083735909823e-05,
"loss": 3.0642,
"step": 20000
},
{
"epoch": 0.13,
"learning_rate": 4.384863123993559e-05,
"loss": 3.0159,
"step": 20100
},
{
"epoch": 0.13,
"learning_rate": 4.3816425120772944e-05,
"loss": 3.0312,
"step": 20200
},
{
"epoch": 0.13,
"learning_rate": 4.378421900161031e-05,
"loss": 3.0389,
"step": 20300
},
{
"epoch": 0.13,
"learning_rate": 4.375201288244767e-05,
"loss": 3.0185,
"step": 20400
},
{
"epoch": 0.13,
"learning_rate": 4.371980676328503e-05,
"loss": 3.0288,
"step": 20500
},
{
"epoch": 0.13,
"learning_rate": 4.368760064412238e-05,
"loss": 3.0237,
"step": 20600
},
{
"epoch": 0.13,
"learning_rate": 4.365539452495974e-05,
"loss": 3.0061,
"step": 20700
},
{
"epoch": 0.13,
"learning_rate": 4.362318840579711e-05,
"loss": 3.0291,
"step": 20800
},
{
"epoch": 0.13,
"learning_rate": 4.3590982286634466e-05,
"loss": 3.0224,
"step": 20900
},
{
"epoch": 0.13,
"learning_rate": 4.355877616747182e-05,
"loss": 3.036,
"step": 21000
},
{
"epoch": 0.14,
"learning_rate": 4.352657004830918e-05,
"loss": 3.0331,
"step": 21100
},
{
"epoch": 0.14,
"learning_rate": 4.3494363929146544e-05,
"loss": 3.031,
"step": 21200
},
{
"epoch": 0.14,
"learning_rate": 4.34621578099839e-05,
"loss": 3.0111,
"step": 21300
},
{
"epoch": 0.14,
"learning_rate": 4.3429951690821256e-05,
"loss": 3.0262,
"step": 21400
},
{
"epoch": 0.14,
"learning_rate": 4.3397745571658615e-05,
"loss": 3.0206,
"step": 21500
},
{
"epoch": 0.14,
"learning_rate": 4.336553945249598e-05,
"loss": 3.0247,
"step": 21600
},
{
"epoch": 0.14,
"learning_rate": 4.3333333333333334e-05,
"loss": 3.0193,
"step": 21700
},
{
"epoch": 0.14,
"learning_rate": 4.3301127214170693e-05,
"loss": 3.0191,
"step": 21800
},
{
"epoch": 0.14,
"learning_rate": 4.326892109500805e-05,
"loss": 3.0077,
"step": 21900
},
{
"epoch": 0.14,
"learning_rate": 4.323671497584541e-05,
"loss": 3.0137,
"step": 22000
},
{
"epoch": 0.14,
"learning_rate": 4.320450885668277e-05,
"loss": 3.0214,
"step": 22100
},
{
"epoch": 0.14,
"learning_rate": 4.317230273752013e-05,
"loss": 3.0309,
"step": 22200
},
{
"epoch": 0.14,
"learning_rate": 4.314009661835749e-05,
"loss": 3.0105,
"step": 22300
},
{
"epoch": 0.14,
"learning_rate": 4.310789049919485e-05,
"loss": 3.0167,
"step": 22400
},
{
"epoch": 0.14,
"learning_rate": 4.307568438003221e-05,
"loss": 3.0264,
"step": 22500
},
{
"epoch": 0.14,
"learning_rate": 4.304347826086957e-05,
"loss": 3.0052,
"step": 22600
},
{
"epoch": 0.15,
"learning_rate": 4.301127214170693e-05,
"loss": 3.0045,
"step": 22700
},
{
"epoch": 0.15,
"learning_rate": 4.297906602254429e-05,
"loss": 2.9984,
"step": 22800
},
{
"epoch": 0.15,
"learning_rate": 4.294685990338164e-05,
"loss": 3.0421,
"step": 22900
},
{
"epoch": 0.15,
"learning_rate": 4.2914653784219006e-05,
"loss": 3.0304,
"step": 23000
},
{
"epoch": 0.15,
"learning_rate": 4.2882447665056365e-05,
"loss": 2.9881,
"step": 23100
},
{
"epoch": 0.15,
"learning_rate": 4.2850241545893724e-05,
"loss": 3.0141,
"step": 23200
},
{
"epoch": 0.15,
"learning_rate": 4.281803542673108e-05,
"loss": 3.0256,
"step": 23300
},
{
"epoch": 0.15,
"learning_rate": 4.278582930756844e-05,
"loss": 3.0171,
"step": 23400
},
{
"epoch": 0.15,
"learning_rate": 4.27536231884058e-05,
"loss": 3.0125,
"step": 23500
},
{
"epoch": 0.15,
"learning_rate": 4.2721417069243155e-05,
"loss": 2.9943,
"step": 23600
},
{
"epoch": 0.15,
"learning_rate": 4.2689210950080514e-05,
"loss": 3.0275,
"step": 23700
},
{
"epoch": 0.15,
"learning_rate": 4.2657004830917874e-05,
"loss": 3.0172,
"step": 23800
},
{
"epoch": 0.15,
"learning_rate": 4.262479871175524e-05,
"loss": 3.026,
"step": 23900
},
{
"epoch": 0.15,
"learning_rate": 4.259259259259259e-05,
"loss": 2.9931,
"step": 24000
},
{
"epoch": 0.15,
"learning_rate": 4.256038647342995e-05,
"loss": 2.9838,
"step": 24100
},
{
"epoch": 0.15,
"learning_rate": 4.252818035426731e-05,
"loss": 3.0091,
"step": 24200
},
{
"epoch": 0.16,
"learning_rate": 4.249597423510468e-05,
"loss": 3.0074,
"step": 24300
},
{
"epoch": 0.16,
"learning_rate": 4.246376811594203e-05,
"loss": 3.0174,
"step": 24400
},
{
"epoch": 0.16,
"learning_rate": 4.243156199677939e-05,
"loss": 3.0061,
"step": 24500
},
{
"epoch": 0.16,
"learning_rate": 4.239935587761675e-05,
"loss": 3.0047,
"step": 24600
},
{
"epoch": 0.16,
"learning_rate": 4.236714975845411e-05,
"loss": 3.0222,
"step": 24700
},
{
"epoch": 0.16,
"learning_rate": 4.233494363929147e-05,
"loss": 2.9972,
"step": 24800
},
{
"epoch": 0.16,
"learning_rate": 4.2302737520128827e-05,
"loss": 2.9867,
"step": 24900
},
{
"epoch": 0.16,
"learning_rate": 4.2270531400966186e-05,
"loss": 2.9991,
"step": 25000
},
{
"epoch": 0.16,
"learning_rate": 4.2238325281803545e-05,
"loss": 3.0085,
"step": 25100
},
{
"epoch": 0.16,
"learning_rate": 4.2206119162640905e-05,
"loss": 3.0198,
"step": 25200
},
{
"epoch": 0.16,
"learning_rate": 4.2173913043478264e-05,
"loss": 3.0023,
"step": 25300
},
{
"epoch": 0.16,
"learning_rate": 4.214170692431562e-05,
"loss": 3.0037,
"step": 25400
},
{
"epoch": 0.16,
"learning_rate": 4.210950080515298e-05,
"loss": 3.0127,
"step": 25500
},
{
"epoch": 0.16,
"learning_rate": 4.207729468599034e-05,
"loss": 2.9961,
"step": 25600
},
{
"epoch": 0.16,
"learning_rate": 4.20450885668277e-05,
"loss": 2.9947,
"step": 25700
},
{
"epoch": 0.17,
"learning_rate": 4.201288244766506e-05,
"loss": 2.9962,
"step": 25800
},
{
"epoch": 0.17,
"learning_rate": 4.198067632850241e-05,
"loss": 2.9909,
"step": 25900
},
{
"epoch": 0.17,
"learning_rate": 4.194847020933977e-05,
"loss": 3.0012,
"step": 26000
},
{
"epoch": 0.17,
"learning_rate": 4.191626409017714e-05,
"loss": 3.0109,
"step": 26100
},
{
"epoch": 0.17,
"learning_rate": 4.18840579710145e-05,
"loss": 2.9798,
"step": 26200
},
{
"epoch": 0.17,
"learning_rate": 4.185185185185185e-05,
"loss": 2.9892,
"step": 26300
},
{
"epoch": 0.17,
"learning_rate": 4.181964573268921e-05,
"loss": 3.009,
"step": 26400
},
{
"epoch": 0.17,
"learning_rate": 4.1787439613526576e-05,
"loss": 2.9882,
"step": 26500
},
{
"epoch": 0.17,
"learning_rate": 4.1755233494363936e-05,
"loss": 2.9866,
"step": 26600
},
{
"epoch": 0.17,
"learning_rate": 4.172302737520129e-05,
"loss": 2.996,
"step": 26700
},
{
"epoch": 0.17,
"learning_rate": 4.169082125603865e-05,
"loss": 2.9869,
"step": 26800
},
{
"epoch": 0.17,
"learning_rate": 4.165861513687601e-05,
"loss": 2.9803,
"step": 26900
},
{
"epoch": 0.17,
"learning_rate": 4.1626409017713366e-05,
"loss": 2.9795,
"step": 27000
},
{
"epoch": 0.17,
"learning_rate": 4.1594202898550726e-05,
"loss": 2.9647,
"step": 27100
},
{
"epoch": 0.17,
"learning_rate": 4.1561996779388085e-05,
"loss": 2.9798,
"step": 27200
},
{
"epoch": 0.17,
"learning_rate": 4.1529790660225444e-05,
"loss": 2.9796,
"step": 27300
},
{
"epoch": 0.18,
"learning_rate": 4.1497584541062804e-05,
"loss": 3.0083,
"step": 27400
},
{
"epoch": 0.18,
"learning_rate": 4.146537842190016e-05,
"loss": 2.9906,
"step": 27500
},
{
"epoch": 0.18,
"learning_rate": 4.143317230273752e-05,
"loss": 2.9976,
"step": 27600
},
{
"epoch": 0.18,
"learning_rate": 4.140096618357488e-05,
"loss": 2.9938,
"step": 27700
},
{
"epoch": 0.18,
"learning_rate": 4.136876006441224e-05,
"loss": 2.9755,
"step": 27800
},
{
"epoch": 0.18,
"learning_rate": 4.13365539452496e-05,
"loss": 2.9946,
"step": 27900
},
{
"epoch": 0.18,
"learning_rate": 4.130434782608696e-05,
"loss": 2.9822,
"step": 28000
},
{
"epoch": 0.18,
"learning_rate": 4.127214170692432e-05,
"loss": 2.9832,
"step": 28100
},
{
"epoch": 0.18,
"learning_rate": 4.123993558776167e-05,
"loss": 2.9882,
"step": 28200
},
{
"epoch": 0.18,
"learning_rate": 4.120772946859904e-05,
"loss": 2.9852,
"step": 28300
},
{
"epoch": 0.18,
"learning_rate": 4.11755233494364e-05,
"loss": 2.9715,
"step": 28400
},
{
"epoch": 0.18,
"learning_rate": 4.1143317230273756e-05,
"loss": 2.9766,
"step": 28500
},
{
"epoch": 0.18,
"learning_rate": 4.111111111111111e-05,
"loss": 2.9918,
"step": 28600
},
{
"epoch": 0.18,
"learning_rate": 4.1078904991948475e-05,
"loss": 2.987,
"step": 28700
},
{
"epoch": 0.18,
"learning_rate": 4.1046698872785834e-05,
"loss": 3.0042,
"step": 28800
},
{
"epoch": 0.18,
"learning_rate": 4.101449275362319e-05,
"loss": 2.9618,
"step": 28900
},
{
"epoch": 0.19,
"learning_rate": 4.0982286634460546e-05,
"loss": 2.9574,
"step": 29000
},
{
"epoch": 0.19,
"learning_rate": 4.0950080515297906e-05,
"loss": 2.9725,
"step": 29100
},
{
"epoch": 0.19,
"learning_rate": 4.091787439613527e-05,
"loss": 2.9744,
"step": 29200
},
{
"epoch": 0.19,
"learning_rate": 4.0885668276972624e-05,
"loss": 2.9626,
"step": 29300
},
{
"epoch": 0.19,
"learning_rate": 4.0853462157809984e-05,
"loss": 2.9696,
"step": 29400
},
{
"epoch": 0.19,
"learning_rate": 4.082125603864734e-05,
"loss": 2.9656,
"step": 29500
},
{
"epoch": 0.19,
"learning_rate": 4.078904991948471e-05,
"loss": 2.9799,
"step": 29600
},
{
"epoch": 0.19,
"learning_rate": 4.075684380032206e-05,
"loss": 2.967,
"step": 29700
},
{
"epoch": 0.19,
"learning_rate": 4.072463768115942e-05,
"loss": 2.9708,
"step": 29800
},
{
"epoch": 0.19,
"learning_rate": 4.069243156199678e-05,
"loss": 2.9802,
"step": 29900
},
{
"epoch": 0.19,
"learning_rate": 4.066022544283414e-05,
"loss": 2.9924,
"step": 30000
},
{
"epoch": 0.19,
"learning_rate": 4.06280193236715e-05,
"loss": 2.9763,
"step": 30100
},
{
"epoch": 0.19,
"learning_rate": 4.059581320450886e-05,
"loss": 2.9725,
"step": 30200
},
{
"epoch": 0.19,
"learning_rate": 4.056360708534622e-05,
"loss": 2.9613,
"step": 30300
},
{
"epoch": 0.19,
"learning_rate": 4.053140096618358e-05,
"loss": 2.9905,
"step": 30400
},
{
"epoch": 0.2,
"learning_rate": 4.049919484702094e-05,
"loss": 2.983,
"step": 30500
},
{
"epoch": 0.2,
"learning_rate": 4.0466988727858296e-05,
"loss": 2.9856,
"step": 30600
},
{
"epoch": 0.2,
"learning_rate": 4.0434782608695655e-05,
"loss": 2.9911,
"step": 30700
},
{
"epoch": 0.2,
"learning_rate": 4.0402576489533015e-05,
"loss": 2.9511,
"step": 30800
},
{
"epoch": 0.2,
"learning_rate": 4.0370370370370374e-05,
"loss": 2.9594,
"step": 30900
},
{
"epoch": 0.2,
"learning_rate": 4.0338164251207733e-05,
"loss": 2.9881,
"step": 31000
},
{
"epoch": 0.2,
"learning_rate": 4.030595813204509e-05,
"loss": 2.9808,
"step": 31100
},
{
"epoch": 0.2,
"learning_rate": 4.0273752012882445e-05,
"loss": 2.9767,
"step": 31200
},
{
"epoch": 0.2,
"learning_rate": 4.0241545893719805e-05,
"loss": 2.9637,
"step": 31300
},
{
"epoch": 0.2,
"learning_rate": 4.020933977455717e-05,
"loss": 2.97,
"step": 31400
},
{
"epoch": 0.2,
"learning_rate": 4.017713365539453e-05,
"loss": 2.9831,
"step": 31500
},
{
"epoch": 0.2,
"learning_rate": 4.014492753623188e-05,
"loss": 2.9591,
"step": 31600
},
{
"epoch": 0.2,
"learning_rate": 4.011272141706924e-05,
"loss": 2.9677,
"step": 31700
},
{
"epoch": 0.2,
"learning_rate": 4.008051529790661e-05,
"loss": 2.9603,
"step": 31800
},
{
"epoch": 0.2,
"learning_rate": 4.004830917874397e-05,
"loss": 2.9792,
"step": 31900
},
{
"epoch": 0.2,
"learning_rate": 4.001610305958132e-05,
"loss": 2.9638,
"step": 32000
},
{
"epoch": 0.21,
"learning_rate": 3.998389694041868e-05,
"loss": 2.9877,
"step": 32100
},
{
"epoch": 0.21,
"learning_rate": 3.995169082125604e-05,
"loss": 2.9701,
"step": 32200
},
{
"epoch": 0.21,
"learning_rate": 3.99194847020934e-05,
"loss": 2.9743,
"step": 32300
},
{
"epoch": 0.21,
"learning_rate": 3.988727858293076e-05,
"loss": 2.9525,
"step": 32400
},
{
"epoch": 0.21,
"learning_rate": 3.985507246376812e-05,
"loss": 2.9712,
"step": 32500
},
{
"epoch": 0.21,
"learning_rate": 3.9822866344605476e-05,
"loss": 2.9849,
"step": 32600
},
{
"epoch": 0.21,
"learning_rate": 3.9790660225442836e-05,
"loss": 2.9793,
"step": 32700
},
{
"epoch": 0.21,
"learning_rate": 3.9758454106280195e-05,
"loss": 2.962,
"step": 32800
},
{
"epoch": 0.21,
"learning_rate": 3.9726247987117554e-05,
"loss": 2.9341,
"step": 32900
},
{
"epoch": 0.21,
"learning_rate": 3.9694041867954914e-05,
"loss": 2.9616,
"step": 33000
},
{
"epoch": 0.21,
"learning_rate": 3.966183574879227e-05,
"loss": 2.9624,
"step": 33100
},
{
"epoch": 0.21,
"learning_rate": 3.962962962962963e-05,
"loss": 2.9524,
"step": 33200
},
{
"epoch": 0.21,
"learning_rate": 3.959742351046699e-05,
"loss": 2.9496,
"step": 33300
},
{
"epoch": 0.21,
"learning_rate": 3.956521739130435e-05,
"loss": 2.946,
"step": 33400
},
{
"epoch": 0.21,
"learning_rate": 3.9533011272141704e-05,
"loss": 2.9893,
"step": 33500
},
{
"epoch": 0.22,
"learning_rate": 3.950080515297907e-05,
"loss": 2.9673,
"step": 33600
},
{
"epoch": 0.22,
"learning_rate": 3.946859903381643e-05,
"loss": 2.9602,
"step": 33700
},
{
"epoch": 0.22,
"learning_rate": 3.943639291465379e-05,
"loss": 2.9296,
"step": 33800
},
{
"epoch": 0.22,
"learning_rate": 3.940418679549114e-05,
"loss": 2.9511,
"step": 33900
},
{
"epoch": 0.22,
"learning_rate": 3.937198067632851e-05,
"loss": 2.9673,
"step": 34000
},
{
"epoch": 0.22,
"learning_rate": 3.9339774557165867e-05,
"loss": 2.9396,
"step": 34100
},
{
"epoch": 0.22,
"learning_rate": 3.9307568438003226e-05,
"loss": 2.964,
"step": 34200
},
{
"epoch": 0.22,
"learning_rate": 3.927536231884058e-05,
"loss": 2.9424,
"step": 34300
},
{
"epoch": 0.22,
"learning_rate": 3.924315619967794e-05,
"loss": 2.9581,
"step": 34400
},
{
"epoch": 0.22,
"learning_rate": 3.9210950080515304e-05,
"loss": 2.9764,
"step": 34500
},
{
"epoch": 0.22,
"learning_rate": 3.9178743961352657e-05,
"loss": 2.9569,
"step": 34600
},
{
"epoch": 0.22,
"learning_rate": 3.9146537842190016e-05,
"loss": 2.9493,
"step": 34700
},
{
"epoch": 0.22,
"learning_rate": 3.9114331723027375e-05,
"loss": 2.9481,
"step": 34800
},
{
"epoch": 0.22,
"learning_rate": 3.908212560386474e-05,
"loss": 2.9708,
"step": 34900
},
{
"epoch": 0.22,
"learning_rate": 3.9049919484702094e-05,
"loss": 2.961,
"step": 35000
},
{
"epoch": 0.22,
"learning_rate": 3.901771336553945e-05,
"loss": 2.9671,
"step": 35100
},
{
"epoch": 0.23,
"learning_rate": 3.898550724637681e-05,
"loss": 2.9513,
"step": 35200
},
{
"epoch": 0.23,
"learning_rate": 3.895330112721417e-05,
"loss": 2.9627,
"step": 35300
},
{
"epoch": 0.23,
"learning_rate": 3.892109500805153e-05,
"loss": 2.9801,
"step": 35400
},
{
"epoch": 0.23,
"learning_rate": 3.888888888888889e-05,
"loss": 2.9321,
"step": 35500
},
{
"epoch": 0.23,
"learning_rate": 3.885668276972625e-05,
"loss": 2.9341,
"step": 35600
},
{
"epoch": 0.23,
"learning_rate": 3.882447665056361e-05,
"loss": 2.9564,
"step": 35700
},
{
"epoch": 0.23,
"learning_rate": 3.879227053140097e-05,
"loss": 2.9457,
"step": 35800
},
{
"epoch": 0.23,
"learning_rate": 3.876006441223833e-05,
"loss": 2.9391,
"step": 35900
},
{
"epoch": 0.23,
"learning_rate": 3.872785829307569e-05,
"loss": 2.9653,
"step": 36000
},
{
"epoch": 0.23,
"learning_rate": 3.869565217391305e-05,
"loss": 2.9586,
"step": 36100
},
{
"epoch": 0.23,
"learning_rate": 3.86634460547504e-05,
"loss": 2.9318,
"step": 36200
},
{
"epoch": 0.23,
"learning_rate": 3.8631239935587766e-05,
"loss": 2.9638,
"step": 36300
},
{
"epoch": 0.23,
"learning_rate": 3.8599033816425125e-05,
"loss": 2.9298,
"step": 36400
},
{
"epoch": 0.23,
"learning_rate": 3.8566827697262484e-05,
"loss": 2.9582,
"step": 36500
},
{
"epoch": 0.23,
"learning_rate": 3.853462157809984e-05,
"loss": 2.9551,
"step": 36600
},
{
"epoch": 0.23,
"learning_rate": 3.85024154589372e-05,
"loss": 2.9413,
"step": 36700
},
{
"epoch": 0.24,
"learning_rate": 3.847020933977456e-05,
"loss": 2.9451,
"step": 36800
},
{
"epoch": 0.24,
"learning_rate": 3.8438003220611915e-05,
"loss": 2.9283,
"step": 36900
},
{
"epoch": 0.24,
"learning_rate": 3.8405797101449274e-05,
"loss": 2.9442,
"step": 37000
},
{
"epoch": 0.24,
"learning_rate": 3.837359098228664e-05,
"loss": 2.9317,
"step": 37100
},
{
"epoch": 0.24,
"learning_rate": 3.8341384863124e-05,
"loss": 2.9502,
"step": 37200
},
{
"epoch": 0.24,
"learning_rate": 3.830917874396135e-05,
"loss": 2.9569,
"step": 37300
},
{
"epoch": 0.24,
"learning_rate": 3.827697262479871e-05,
"loss": 2.9578,
"step": 37400
},
{
"epoch": 0.24,
"learning_rate": 3.824476650563607e-05,
"loss": 2.9217,
"step": 37500
},
{
"epoch": 0.24,
"learning_rate": 3.821256038647344e-05,
"loss": 2.892,
"step": 37600
},
{
"epoch": 0.24,
"learning_rate": 3.818035426731079e-05,
"loss": 2.9126,
"step": 37700
},
{
"epoch": 0.24,
"learning_rate": 3.814814814814815e-05,
"loss": 2.9241,
"step": 37800
},
{
"epoch": 0.24,
"learning_rate": 3.811594202898551e-05,
"loss": 2.9173,
"step": 37900
},
{
"epoch": 0.24,
"learning_rate": 3.808373590982287e-05,
"loss": 2.9392,
"step": 38000
},
{
"epoch": 0.24,
"learning_rate": 3.805152979066023e-05,
"loss": 2.9118,
"step": 38100
},
{
"epoch": 0.24,
"learning_rate": 3.8019323671497586e-05,
"loss": 2.9359,
"step": 38200
},
{
"epoch": 0.25,
"learning_rate": 3.7987117552334946e-05,
"loss": 2.9353,
"step": 38300
},
{
"epoch": 0.25,
"learning_rate": 3.7954911433172305e-05,
"loss": 2.9162,
"step": 38400
},
{
"epoch": 0.25,
"learning_rate": 3.7922705314009665e-05,
"loss": 2.9191,
"step": 38500
},
{
"epoch": 0.25,
"learning_rate": 3.7890499194847024e-05,
"loss": 2.9109,
"step": 38600
},
{
"epoch": 0.25,
"learning_rate": 3.785829307568438e-05,
"loss": 2.9269,
"step": 38700
},
{
"epoch": 0.25,
"learning_rate": 3.7826086956521736e-05,
"loss": 2.9294,
"step": 38800
},
{
"epoch": 0.25,
"learning_rate": 3.77938808373591e-05,
"loss": 2.9212,
"step": 38900
},
{
"epoch": 0.25,
"learning_rate": 3.776167471819646e-05,
"loss": 2.896,
"step": 39000
},
{
"epoch": 0.25,
"learning_rate": 3.772946859903382e-05,
"loss": 2.9256,
"step": 39100
},
{
"epoch": 0.25,
"learning_rate": 3.769726247987117e-05,
"loss": 2.9094,
"step": 39200
},
{
"epoch": 0.25,
"learning_rate": 3.766505636070853e-05,
"loss": 2.9109,
"step": 39300
},
{
"epoch": 0.25,
"learning_rate": 3.76328502415459e-05,
"loss": 2.93,
"step": 39400
},
{
"epoch": 0.25,
"learning_rate": 3.760064412238326e-05,
"loss": 2.8969,
"step": 39500
},
{
"epoch": 0.25,
"learning_rate": 3.756843800322061e-05,
"loss": 2.8838,
"step": 39600
},
{
"epoch": 0.25,
"learning_rate": 3.753623188405797e-05,
"loss": 2.9289,
"step": 39700
},
{
"epoch": 0.25,
"learning_rate": 3.7504025764895336e-05,
"loss": 2.9072,
"step": 39800
},
{
"epoch": 0.26,
"learning_rate": 3.747181964573269e-05,
"loss": 2.885,
"step": 39900
},
{
"epoch": 0.26,
"learning_rate": 3.743961352657005e-05,
"loss": 2.9021,
"step": 40000
},
{
"epoch": 0.26,
"learning_rate": 3.740740740740741e-05,
"loss": 2.9029,
"step": 40100
},
{
"epoch": 0.26,
"learning_rate": 3.737520128824477e-05,
"loss": 2.9137,
"step": 40200
},
{
"epoch": 0.26,
"learning_rate": 3.7342995169082126e-05,
"loss": 2.9391,
"step": 40300
},
{
"epoch": 0.26,
"learning_rate": 3.7310789049919485e-05,
"loss": 2.8991,
"step": 40400
},
{
"epoch": 0.26,
"learning_rate": 3.7278582930756845e-05,
"loss": 2.9151,
"step": 40500
},
{
"epoch": 0.26,
"learning_rate": 3.7246376811594204e-05,
"loss": 2.9185,
"step": 40600
},
{
"epoch": 0.26,
"learning_rate": 3.7214170692431564e-05,
"loss": 2.9046,
"step": 40700
},
{
"epoch": 0.26,
"learning_rate": 3.718196457326892e-05,
"loss": 2.9137,
"step": 40800
},
{
"epoch": 0.26,
"learning_rate": 3.714975845410628e-05,
"loss": 2.9184,
"step": 40900
},
{
"epoch": 0.26,
"learning_rate": 3.711755233494364e-05,
"loss": 2.9086,
"step": 41000
},
{
"epoch": 0.26,
"learning_rate": 3.7085346215781e-05,
"loss": 2.889,
"step": 41100
},
{
"epoch": 0.26,
"learning_rate": 3.705314009661836e-05,
"loss": 2.9011,
"step": 41200
},
{
"epoch": 0.26,
"learning_rate": 3.702093397745572e-05,
"loss": 2.9011,
"step": 41300
},
{
"epoch": 0.26,
"learning_rate": 3.698872785829308e-05,
"loss": 2.9259,
"step": 41400
},
{
"epoch": 0.27,
"learning_rate": 3.695652173913043e-05,
"loss": 2.9241,
"step": 41500
},
{
"epoch": 0.27,
"learning_rate": 3.69243156199678e-05,
"loss": 2.9135,
"step": 41600
},
{
"epoch": 0.27,
"learning_rate": 3.689210950080516e-05,
"loss": 2.8876,
"step": 41700
},
{
"epoch": 0.27,
"learning_rate": 3.6859903381642516e-05,
"loss": 2.8951,
"step": 41800
},
{
"epoch": 0.27,
"learning_rate": 3.682769726247987e-05,
"loss": 2.8864,
"step": 41900
},
{
"epoch": 0.27,
"learning_rate": 3.6795491143317235e-05,
"loss": 2.9185,
"step": 42000
},
{
"epoch": 0.27,
"learning_rate": 3.6763285024154594e-05,
"loss": 2.9086,
"step": 42100
},
{
"epoch": 0.27,
"learning_rate": 3.673107890499195e-05,
"loss": 2.8971,
"step": 42200
},
{
"epoch": 0.27,
"learning_rate": 3.6698872785829306e-05,
"loss": 2.9113,
"step": 42300
},
{
"epoch": 0.27,
"learning_rate": 3.6666666666666666e-05,
"loss": 2.9018,
"step": 42400
},
{
"epoch": 0.27,
"learning_rate": 3.663446054750403e-05,
"loss": 2.9114,
"step": 42500
},
{
"epoch": 0.27,
"learning_rate": 3.6602254428341384e-05,
"loss": 2.9079,
"step": 42600
},
{
"epoch": 0.27,
"learning_rate": 3.6570048309178744e-05,
"loss": 2.9002,
"step": 42700
},
{
"epoch": 0.27,
"learning_rate": 3.65378421900161e-05,
"loss": 2.895,
"step": 42800
},
{
"epoch": 0.27,
"learning_rate": 3.650563607085347e-05,
"loss": 2.9185,
"step": 42900
},
{
"epoch": 0.28,
"learning_rate": 3.647342995169082e-05,
"loss": 2.9129,
"step": 43000
},
{
"epoch": 0.28,
"learning_rate": 3.644122383252818e-05,
"loss": 2.9062,
"step": 43100
},
{
"epoch": 0.28,
"learning_rate": 3.640901771336554e-05,
"loss": 2.922,
"step": 43200
},
{
"epoch": 0.28,
"learning_rate": 3.63768115942029e-05,
"loss": 2.8951,
"step": 43300
},
{
"epoch": 0.28,
"learning_rate": 3.634460547504026e-05,
"loss": 2.8875,
"step": 43400
},
{
"epoch": 0.28,
"learning_rate": 3.631239935587762e-05,
"loss": 2.9232,
"step": 43500
},
{
"epoch": 0.28,
"learning_rate": 3.628019323671498e-05,
"loss": 2.8988,
"step": 43600
},
{
"epoch": 0.28,
"learning_rate": 3.624798711755234e-05,
"loss": 2.8981,
"step": 43700
},
{
"epoch": 0.28,
"learning_rate": 3.62157809983897e-05,
"loss": 2.9191,
"step": 43800
},
{
"epoch": 0.28,
"learning_rate": 3.6183574879227056e-05,
"loss": 2.913,
"step": 43900
},
{
"epoch": 0.28,
"learning_rate": 3.6151368760064415e-05,
"loss": 2.8866,
"step": 44000
},
{
"epoch": 0.28,
"learning_rate": 3.6119162640901775e-05,
"loss": 2.9148,
"step": 44100
},
{
"epoch": 0.28,
"learning_rate": 3.6086956521739134e-05,
"loss": 2.8952,
"step": 44200
},
{
"epoch": 0.28,
"learning_rate": 3.6054750402576493e-05,
"loss": 2.9016,
"step": 44300
},
{
"epoch": 0.28,
"learning_rate": 3.602254428341385e-05,
"loss": 2.8907,
"step": 44400
},
{
"epoch": 0.28,
"learning_rate": 3.5990338164251205e-05,
"loss": 2.9131,
"step": 44500
},
{
"epoch": 0.29,
"learning_rate": 3.5958132045088565e-05,
"loss": 2.9062,
"step": 44600
},
{
"epoch": 0.29,
"learning_rate": 3.592592592592593e-05,
"loss": 2.8906,
"step": 44700
},
{
"epoch": 0.29,
"learning_rate": 3.589371980676329e-05,
"loss": 2.9001,
"step": 44800
},
{
"epoch": 0.29,
"learning_rate": 3.586151368760064e-05,
"loss": 2.9017,
"step": 44900
},
{
"epoch": 0.29,
"learning_rate": 3.5829307568438e-05,
"loss": 2.8877,
"step": 45000
},
{
"epoch": 0.29,
"learning_rate": 3.579710144927537e-05,
"loss": 2.9001,
"step": 45100
},
{
"epoch": 0.29,
"learning_rate": 3.576489533011273e-05,
"loss": 2.8881,
"step": 45200
},
{
"epoch": 0.29,
"learning_rate": 3.573268921095008e-05,
"loss": 2.9095,
"step": 45300
},
{
"epoch": 0.29,
"learning_rate": 3.570048309178744e-05,
"loss": 2.8986,
"step": 45400
},
{
"epoch": 0.29,
"learning_rate": 3.56682769726248e-05,
"loss": 2.8943,
"step": 45500
},
{
"epoch": 0.29,
"learning_rate": 3.563607085346216e-05,
"loss": 2.9075,
"step": 45600
},
{
"epoch": 0.29,
"learning_rate": 3.560386473429952e-05,
"loss": 2.8839,
"step": 45700
},
{
"epoch": 0.29,
"learning_rate": 3.557165861513688e-05,
"loss": 2.8898,
"step": 45800
},
{
"epoch": 0.29,
"learning_rate": 3.5539452495974236e-05,
"loss": 2.8924,
"step": 45900
},
{
"epoch": 0.29,
"learning_rate": 3.5507246376811596e-05,
"loss": 2.9121,
"step": 46000
},
{
"epoch": 0.3,
"learning_rate": 3.5475040257648955e-05,
"loss": 2.8874,
"step": 46100
},
{
"epoch": 0.3,
"learning_rate": 3.5442834138486314e-05,
"loss": 2.8974,
"step": 46200
},
{
"epoch": 0.3,
"learning_rate": 3.5410628019323674e-05,
"loss": 2.8805,
"step": 46300
},
{
"epoch": 0.3,
"learning_rate": 3.5378421900161026e-05,
"loss": 2.9021,
"step": 46400
},
{
"epoch": 0.3,
"learning_rate": 3.534621578099839e-05,
"loss": 2.9019,
"step": 46500
},
{
"epoch": 0.3,
"learning_rate": 3.531400966183575e-05,
"loss": 2.9134,
"step": 46600
},
{
"epoch": 0.3,
"learning_rate": 3.528180354267311e-05,
"loss": 2.8879,
"step": 46700
},
{
"epoch": 0.3,
"learning_rate": 3.5249597423510464e-05,
"loss": 2.8921,
"step": 46800
},
{
"epoch": 0.3,
"learning_rate": 3.521739130434783e-05,
"loss": 2.8898,
"step": 46900
},
{
"epoch": 0.3,
"learning_rate": 3.518518518518519e-05,
"loss": 2.9093,
"step": 47000
},
{
"epoch": 0.3,
"learning_rate": 3.515297906602255e-05,
"loss": 2.914,
"step": 47100
},
{
"epoch": 0.3,
"learning_rate": 3.51207729468599e-05,
"loss": 2.8932,
"step": 47200
},
{
"epoch": 0.3,
"learning_rate": 3.508856682769727e-05,
"loss": 2.8996,
"step": 47300
},
{
"epoch": 0.3,
"learning_rate": 3.5056360708534627e-05,
"loss": 2.9115,
"step": 47400
},
{
"epoch": 0.3,
"learning_rate": 3.502415458937198e-05,
"loss": 2.9049,
"step": 47500
},
{
"epoch": 0.3,
"learning_rate": 3.499194847020934e-05,
"loss": 2.8781,
"step": 47600
},
{
"epoch": 0.31,
"learning_rate": 3.49597423510467e-05,
"loss": 2.8964,
"step": 47700
},
{
"epoch": 0.31,
"learning_rate": 3.4927536231884064e-05,
"loss": 2.8742,
"step": 47800
},
{
"epoch": 0.31,
"learning_rate": 3.4895330112721417e-05,
"loss": 2.906,
"step": 47900
},
{
"epoch": 0.31,
"learning_rate": 3.4863123993558776e-05,
"loss": 2.8951,
"step": 48000
},
{
"epoch": 0.31,
"learning_rate": 3.4830917874396135e-05,
"loss": 2.917,
"step": 48100
},
{
"epoch": 0.31,
"learning_rate": 3.47987117552335e-05,
"loss": 2.9107,
"step": 48200
},
{
"epoch": 0.31,
"learning_rate": 3.4766505636070854e-05,
"loss": 2.8872,
"step": 48300
},
{
"epoch": 0.31,
"learning_rate": 3.473429951690821e-05,
"loss": 2.899,
"step": 48400
},
{
"epoch": 0.31,
"learning_rate": 3.470209339774557e-05,
"loss": 2.8786,
"step": 48500
},
{
"epoch": 0.31,
"learning_rate": 3.466988727858293e-05,
"loss": 2.8938,
"step": 48600
},
{
"epoch": 0.31,
"learning_rate": 3.463768115942029e-05,
"loss": 2.8837,
"step": 48700
},
{
"epoch": 0.31,
"learning_rate": 3.460547504025765e-05,
"loss": 2.8983,
"step": 48800
},
{
"epoch": 0.31,
"learning_rate": 3.457326892109501e-05,
"loss": 2.8886,
"step": 48900
},
{
"epoch": 0.31,
"learning_rate": 3.454106280193237e-05,
"loss": 2.8936,
"step": 49000
},
{
"epoch": 0.31,
"learning_rate": 3.450885668276973e-05,
"loss": 2.8828,
"step": 49100
},
{
"epoch": 0.31,
"learning_rate": 3.447665056360709e-05,
"loss": 2.9086,
"step": 49200
},
{
"epoch": 0.32,
"learning_rate": 3.444444444444445e-05,
"loss": 2.8953,
"step": 49300
},
{
"epoch": 0.32,
"learning_rate": 3.441223832528181e-05,
"loss": 2.8772,
"step": 49400
},
{
"epoch": 0.32,
"learning_rate": 3.438003220611916e-05,
"loss": 2.9057,
"step": 49500
},
{
"epoch": 0.32,
"learning_rate": 3.4347826086956526e-05,
"loss": 2.903,
"step": 49600
},
{
"epoch": 0.32,
"learning_rate": 3.4315619967793885e-05,
"loss": 2.8807,
"step": 49700
},
{
"epoch": 0.32,
"learning_rate": 3.428341384863124e-05,
"loss": 2.8745,
"step": 49800
},
{
"epoch": 0.32,
"learning_rate": 3.42512077294686e-05,
"loss": 2.901,
"step": 49900
},
{
"epoch": 0.32,
"learning_rate": 3.421900161030596e-05,
"loss": 2.8933,
"step": 50000
},
{
"epoch": 0.32,
"learning_rate": 3.418679549114332e-05,
"loss": 2.8896,
"step": 50100
},
{
"epoch": 0.32,
"learning_rate": 3.4154589371980675e-05,
"loss": 2.9235,
"step": 50200
},
{
"epoch": 0.32,
"learning_rate": 3.4122383252818034e-05,
"loss": 2.8754,
"step": 50300
},
{
"epoch": 0.32,
"learning_rate": 3.40901771336554e-05,
"loss": 2.8787,
"step": 50400
},
{
"epoch": 0.32,
"learning_rate": 3.405797101449276e-05,
"loss": 2.8914,
"step": 50500
},
{
"epoch": 0.32,
"learning_rate": 3.402576489533011e-05,
"loss": 2.8997,
"step": 50600
},
{
"epoch": 0.32,
"learning_rate": 3.399355877616747e-05,
"loss": 2.8955,
"step": 50700
},
{
"epoch": 0.33,
"learning_rate": 3.396135265700483e-05,
"loss": 2.8822,
"step": 50800
},
{
"epoch": 0.33,
"learning_rate": 3.392914653784219e-05,
"loss": 2.8839,
"step": 50900
},
{
"epoch": 0.33,
"learning_rate": 3.389694041867955e-05,
"loss": 2.8913,
"step": 51000
},
{
"epoch": 0.33,
"learning_rate": 3.386473429951691e-05,
"loss": 2.8911,
"step": 51100
},
{
"epoch": 0.33,
"learning_rate": 3.383252818035427e-05,
"loss": 2.8872,
"step": 51200
},
{
"epoch": 0.33,
"learning_rate": 3.380032206119163e-05,
"loss": 2.8705,
"step": 51300
},
{
"epoch": 0.33,
"learning_rate": 3.376811594202899e-05,
"loss": 2.8754,
"step": 51400
},
{
"epoch": 0.33,
"learning_rate": 3.3735909822866346e-05,
"loss": 2.892,
"step": 51500
},
{
"epoch": 0.33,
"learning_rate": 3.3703703703703706e-05,
"loss": 2.8805,
"step": 51600
},
{
"epoch": 0.33,
"learning_rate": 3.3671497584541065e-05,
"loss": 2.8894,
"step": 51700
},
{
"epoch": 0.33,
"learning_rate": 3.3639291465378424e-05,
"loss": 2.8863,
"step": 51800
},
{
"epoch": 0.33,
"learning_rate": 3.3607085346215784e-05,
"loss": 2.8943,
"step": 51900
},
{
"epoch": 0.33,
"learning_rate": 3.357487922705314e-05,
"loss": 2.8963,
"step": 52000
},
{
"epoch": 0.33,
"learning_rate": 3.3542673107890496e-05,
"loss": 2.8789,
"step": 52100
},
{
"epoch": 0.33,
"learning_rate": 3.351046698872786e-05,
"loss": 2.8709,
"step": 52200
},
{
"epoch": 0.33,
"learning_rate": 3.347826086956522e-05,
"loss": 2.8862,
"step": 52300
},
{
"epoch": 0.34,
"learning_rate": 3.344605475040258e-05,
"loss": 2.8959,
"step": 52400
},
{
"epoch": 0.34,
"learning_rate": 3.341384863123993e-05,
"loss": 2.8716,
"step": 52500
},
{
"epoch": 0.34,
"learning_rate": 3.338164251207729e-05,
"loss": 2.8774,
"step": 52600
},
{
"epoch": 0.34,
"learning_rate": 3.334943639291466e-05,
"loss": 2.8918,
"step": 52700
},
{
"epoch": 0.34,
"learning_rate": 3.331723027375202e-05,
"loss": 2.8662,
"step": 52800
},
{
"epoch": 0.34,
"learning_rate": 3.328502415458937e-05,
"loss": 2.8855,
"step": 52900
},
{
"epoch": 0.34,
"learning_rate": 3.325281803542673e-05,
"loss": 2.8963,
"step": 53000
},
{
"epoch": 0.34,
"learning_rate": 3.3220611916264096e-05,
"loss": 2.8666,
"step": 53100
},
{
"epoch": 0.34,
"learning_rate": 3.318840579710145e-05,
"loss": 2.896,
"step": 53200
},
{
"epoch": 0.34,
"learning_rate": 3.315619967793881e-05,
"loss": 2.8793,
"step": 53300
},
{
"epoch": 0.34,
"learning_rate": 3.312399355877617e-05,
"loss": 2.8844,
"step": 53400
},
{
"epoch": 0.34,
"learning_rate": 3.3091787439613533e-05,
"loss": 2.874,
"step": 53500
},
{
"epoch": 0.34,
"learning_rate": 3.3059581320450886e-05,
"loss": 2.8927,
"step": 53600
},
{
"epoch": 0.34,
"learning_rate": 3.3027375201288245e-05,
"loss": 2.911,
"step": 53700
},
{
"epoch": 0.34,
"learning_rate": 3.2995169082125605e-05,
"loss": 2.8625,
"step": 53800
},
{
"epoch": 0.34,
"learning_rate": 3.2962962962962964e-05,
"loss": 2.8974,
"step": 53900
},
{
"epoch": 0.35,
"learning_rate": 3.2930756843800323e-05,
"loss": 2.8925,
"step": 54000
},
{
"epoch": 0.35,
"learning_rate": 3.289855072463768e-05,
"loss": 2.8905,
"step": 54100
},
{
"epoch": 0.35,
"learning_rate": 3.286634460547504e-05,
"loss": 2.8759,
"step": 54200
},
{
"epoch": 0.35,
"learning_rate": 3.28341384863124e-05,
"loss": 2.8914,
"step": 54300
},
{
"epoch": 0.35,
"learning_rate": 3.280193236714976e-05,
"loss": 2.891,
"step": 54400
},
{
"epoch": 0.35,
"learning_rate": 3.276972624798712e-05,
"loss": 2.8682,
"step": 54500
},
{
"epoch": 0.35,
"learning_rate": 3.273752012882448e-05,
"loss": 2.8648,
"step": 54600
},
{
"epoch": 0.35,
"learning_rate": 3.270531400966184e-05,
"loss": 2.8762,
"step": 54700
},
{
"epoch": 0.35,
"learning_rate": 3.267310789049919e-05,
"loss": 2.8824,
"step": 54800
},
{
"epoch": 0.35,
"learning_rate": 3.264090177133656e-05,
"loss": 2.8674,
"step": 54900
},
{
"epoch": 0.35,
"learning_rate": 3.260869565217392e-05,
"loss": 2.875,
"step": 55000
},
{
"epoch": 0.35,
"learning_rate": 3.2576489533011276e-05,
"loss": 2.8818,
"step": 55100
},
{
"epoch": 0.35,
"learning_rate": 3.254428341384863e-05,
"loss": 2.8724,
"step": 55200
},
{
"epoch": 0.35,
"learning_rate": 3.2512077294685995e-05,
"loss": 2.8942,
"step": 55300
},
{
"epoch": 0.35,
"learning_rate": 3.2479871175523354e-05,
"loss": 2.8675,
"step": 55400
},
{
"epoch": 0.36,
"learning_rate": 3.244766505636071e-05,
"loss": 2.8838,
"step": 55500
},
{
"epoch": 0.36,
"learning_rate": 3.2415458937198066e-05,
"loss": 2.8721,
"step": 55600
},
{
"epoch": 0.36,
"learning_rate": 3.2383252818035426e-05,
"loss": 2.8693,
"step": 55700
},
{
"epoch": 0.36,
"learning_rate": 3.235104669887279e-05,
"loss": 2.89,
"step": 55800
},
{
"epoch": 0.36,
"learning_rate": 3.2318840579710144e-05,
"loss": 2.8639,
"step": 55900
},
{
"epoch": 0.36,
"learning_rate": 3.2286634460547504e-05,
"loss": 2.8768,
"step": 56000
},
{
"epoch": 0.36,
"learning_rate": 3.225442834138486e-05,
"loss": 2.8708,
"step": 56100
},
{
"epoch": 0.36,
"learning_rate": 3.222222222222223e-05,
"loss": 2.8661,
"step": 56200
},
{
"epoch": 0.36,
"learning_rate": 3.219001610305958e-05,
"loss": 2.8472,
"step": 56300
},
{
"epoch": 0.36,
"learning_rate": 3.215780998389694e-05,
"loss": 2.866,
"step": 56400
},
{
"epoch": 0.36,
"learning_rate": 3.21256038647343e-05,
"loss": 2.8708,
"step": 56500
},
{
"epoch": 0.36,
"learning_rate": 3.209339774557166e-05,
"loss": 2.8581,
"step": 56600
},
{
"epoch": 0.36,
"learning_rate": 3.206119162640902e-05,
"loss": 2.8434,
"step": 56700
},
{
"epoch": 0.36,
"learning_rate": 3.202898550724638e-05,
"loss": 2.8684,
"step": 56800
},
{
"epoch": 0.36,
"learning_rate": 3.199677938808374e-05,
"loss": 2.8552,
"step": 56900
},
{
"epoch": 0.36,
"learning_rate": 3.19645732689211e-05,
"loss": 2.8626,
"step": 57000
},
{
"epoch": 0.37,
"learning_rate": 3.1932367149758457e-05,
"loss": 2.8462,
"step": 57100
},
{
"epoch": 0.37,
"learning_rate": 3.1900161030595816e-05,
"loss": 2.8377,
"step": 57200
},
{
"epoch": 0.37,
"learning_rate": 3.1867954911433175e-05,
"loss": 2.8687,
"step": 57300
},
{
"epoch": 0.37,
"learning_rate": 3.183574879227053e-05,
"loss": 2.8567,
"step": 57400
},
{
"epoch": 0.37,
"learning_rate": 3.1803542673107894e-05,
"loss": 2.8509,
"step": 57500
},
{
"epoch": 0.37,
"learning_rate": 3.177133655394525e-05,
"loss": 2.8479,
"step": 57600
},
{
"epoch": 0.37,
"learning_rate": 3.173913043478261e-05,
"loss": 2.8708,
"step": 57700
},
{
"epoch": 0.37,
"learning_rate": 3.1706924315619965e-05,
"loss": 2.8539,
"step": 57800
},
{
"epoch": 0.37,
"learning_rate": 3.1674718196457325e-05,
"loss": 2.8535,
"step": 57900
},
{
"epoch": 0.37,
"learning_rate": 3.164251207729469e-05,
"loss": 2.8481,
"step": 58000
},
{
"epoch": 0.37,
"learning_rate": 3.161030595813205e-05,
"loss": 2.8738,
"step": 58100
},
{
"epoch": 0.37,
"learning_rate": 3.15780998389694e-05,
"loss": 2.8605,
"step": 58200
},
{
"epoch": 0.37,
"learning_rate": 3.154589371980676e-05,
"loss": 2.837,
"step": 58300
},
{
"epoch": 0.37,
"learning_rate": 3.151368760064413e-05,
"loss": 2.8444,
"step": 58400
},
{
"epoch": 0.37,
"learning_rate": 3.148148148148148e-05,
"loss": 2.8361,
"step": 58500
},
{
"epoch": 0.38,
"learning_rate": 3.144927536231884e-05,
"loss": 2.8451,
"step": 58600
},
{
"epoch": 0.38,
"learning_rate": 3.14170692431562e-05,
"loss": 2.8295,
"step": 58700
},
{
"epoch": 0.38,
"learning_rate": 3.138486312399356e-05,
"loss": 2.8328,
"step": 58800
},
{
"epoch": 0.38,
"learning_rate": 3.135265700483092e-05,
"loss": 2.8572,
"step": 58900
},
{
"epoch": 0.38,
"learning_rate": 3.132045088566828e-05,
"loss": 2.8473,
"step": 59000
},
{
"epoch": 0.38,
"learning_rate": 3.128824476650564e-05,
"loss": 2.8604,
"step": 59100
},
{
"epoch": 0.38,
"learning_rate": 3.1256038647342996e-05,
"loss": 2.8504,
"step": 59200
},
{
"epoch": 0.38,
"learning_rate": 3.1223832528180356e-05,
"loss": 2.827,
"step": 59300
},
{
"epoch": 0.38,
"learning_rate": 3.1191626409017715e-05,
"loss": 2.8416,
"step": 59400
},
{
"epoch": 0.38,
"learning_rate": 3.1159420289855074e-05,
"loss": 2.8461,
"step": 59500
},
{
"epoch": 0.38,
"learning_rate": 3.1127214170692434e-05,
"loss": 2.8523,
"step": 59600
},
{
"epoch": 0.38,
"learning_rate": 3.109500805152979e-05,
"loss": 2.8445,
"step": 59700
},
{
"epoch": 0.38,
"learning_rate": 3.106280193236715e-05,
"loss": 2.8598,
"step": 59800
},
{
"epoch": 0.38,
"learning_rate": 3.103059581320451e-05,
"loss": 2.8545,
"step": 59900
},
{
"epoch": 0.38,
"learning_rate": 3.099838969404187e-05,
"loss": 2.8377,
"step": 60000
},
{
"epoch": 0.38,
"learning_rate": 3.0966183574879224e-05,
"loss": 2.8577,
"step": 60100
},
{
"epoch": 0.39,
"learning_rate": 3.093397745571659e-05,
"loss": 2.8576,
"step": 60200
},
{
"epoch": 0.39,
"learning_rate": 3.090177133655395e-05,
"loss": 2.8384,
"step": 60300
},
{
"epoch": 0.39,
"learning_rate": 3.086956521739131e-05,
"loss": 2.8546,
"step": 60400
},
{
"epoch": 0.39,
"learning_rate": 3.083735909822866e-05,
"loss": 2.8612,
"step": 60500
},
{
"epoch": 0.39,
"learning_rate": 3.080515297906603e-05,
"loss": 2.8665,
"step": 60600
},
{
"epoch": 0.39,
"learning_rate": 3.0772946859903386e-05,
"loss": 2.842,
"step": 60700
},
{
"epoch": 0.39,
"learning_rate": 3.074074074074074e-05,
"loss": 2.8384,
"step": 60800
},
{
"epoch": 0.39,
"learning_rate": 3.07085346215781e-05,
"loss": 2.8484,
"step": 60900
},
{
"epoch": 0.39,
"learning_rate": 3.067632850241546e-05,
"loss": 2.8333,
"step": 61000
},
{
"epoch": 0.39,
"learning_rate": 3.0644122383252824e-05,
"loss": 2.8461,
"step": 61100
},
{
"epoch": 0.39,
"learning_rate": 3.0611916264090176e-05,
"loss": 2.8529,
"step": 61200
},
{
"epoch": 0.39,
"learning_rate": 3.0579710144927536e-05,
"loss": 2.8303,
"step": 61300
},
{
"epoch": 0.39,
"learning_rate": 3.0547504025764895e-05,
"loss": 2.8382,
"step": 61400
},
{
"epoch": 0.39,
"learning_rate": 3.0515297906602258e-05,
"loss": 2.8208,
"step": 61500
},
{
"epoch": 0.39,
"learning_rate": 3.0483091787439617e-05,
"loss": 2.8559,
"step": 61600
},
{
"epoch": 0.39,
"learning_rate": 3.0450885668276973e-05,
"loss": 2.8508,
"step": 61700
},
{
"epoch": 0.4,
"learning_rate": 3.0418679549114333e-05,
"loss": 2.84,
"step": 61800
},
{
"epoch": 0.4,
"learning_rate": 3.038647342995169e-05,
"loss": 2.8386,
"step": 61900
},
{
"epoch": 0.4,
"learning_rate": 3.035426731078905e-05,
"loss": 2.8582,
"step": 62000
},
{
"epoch": 0.4,
"learning_rate": 3.032206119162641e-05,
"loss": 2.8465,
"step": 62100
},
{
"epoch": 0.4,
"learning_rate": 3.028985507246377e-05,
"loss": 2.8268,
"step": 62200
},
{
"epoch": 0.4,
"learning_rate": 3.0257648953301126e-05,
"loss": 2.8347,
"step": 62300
},
{
"epoch": 0.4,
"learning_rate": 3.022544283413849e-05,
"loss": 2.8303,
"step": 62400
},
{
"epoch": 0.4,
"learning_rate": 3.0193236714975848e-05,
"loss": 2.874,
"step": 62500
},
{
"epoch": 0.4,
"learning_rate": 3.0161030595813204e-05,
"loss": 2.8326,
"step": 62600
},
{
"epoch": 0.4,
"learning_rate": 3.0128824476650563e-05,
"loss": 2.8549,
"step": 62700
},
{
"epoch": 0.4,
"learning_rate": 3.0096618357487926e-05,
"loss": 2.8465,
"step": 62800
},
{
"epoch": 0.4,
"learning_rate": 3.0064412238325285e-05,
"loss": 2.8642,
"step": 62900
},
{
"epoch": 0.4,
"learning_rate": 3.003220611916264e-05,
"loss": 2.8433,
"step": 63000
},
{
"epoch": 0.4,
"learning_rate": 3e-05,
"loss": 2.8583,
"step": 63100
},
{
"epoch": 0.4,
"learning_rate": 2.9967793880837357e-05,
"loss": 2.8494,
"step": 63200
},
{
"epoch": 0.41,
"learning_rate": 2.9935587761674723e-05,
"loss": 2.836,
"step": 63300
},
{
"epoch": 0.41,
"learning_rate": 2.990338164251208e-05,
"loss": 2.8494,
"step": 63400
},
{
"epoch": 0.41,
"learning_rate": 2.9871175523349438e-05,
"loss": 2.8466,
"step": 63500
},
{
"epoch": 0.41,
"learning_rate": 2.9838969404186794e-05,
"loss": 2.8538,
"step": 63600
},
{
"epoch": 0.41,
"learning_rate": 2.9806763285024157e-05,
"loss": 2.8522,
"step": 63700
},
{
"epoch": 0.41,
"learning_rate": 2.9774557165861516e-05,
"loss": 2.854,
"step": 63800
},
{
"epoch": 0.41,
"learning_rate": 2.9742351046698876e-05,
"loss": 2.8272,
"step": 63900
},
{
"epoch": 0.41,
"learning_rate": 2.971014492753623e-05,
"loss": 2.844,
"step": 64000
},
{
"epoch": 0.41,
"learning_rate": 2.967793880837359e-05,
"loss": 2.8471,
"step": 64100
},
{
"epoch": 0.41,
"learning_rate": 2.9645732689210954e-05,
"loss": 2.8302,
"step": 64200
},
{
"epoch": 0.41,
"learning_rate": 2.961352657004831e-05,
"loss": 2.8652,
"step": 64300
},
{
"epoch": 0.41,
"learning_rate": 2.958132045088567e-05,
"loss": 2.8438,
"step": 64400
},
{
"epoch": 0.41,
"learning_rate": 2.9549114331723028e-05,
"loss": 2.8411,
"step": 64500
},
{
"epoch": 0.41,
"learning_rate": 2.951690821256039e-05,
"loss": 2.8372,
"step": 64600
},
{
"epoch": 0.41,
"learning_rate": 2.9484702093397747e-05,
"loss": 2.8387,
"step": 64700
},
{
"epoch": 0.41,
"learning_rate": 2.9452495974235106e-05,
"loss": 2.8622,
"step": 64800
},
{
"epoch": 0.42,
"learning_rate": 2.9420289855072462e-05,
"loss": 2.8256,
"step": 64900
},
{
"epoch": 0.42,
"learning_rate": 2.938808373590982e-05,
"loss": 2.845,
"step": 65000
},
{
"epoch": 0.42,
"learning_rate": 2.9355877616747184e-05,
"loss": 2.843,
"step": 65100
},
{
"epoch": 0.42,
"learning_rate": 2.9323671497584544e-05,
"loss": 2.8388,
"step": 65200
},
{
"epoch": 0.42,
"learning_rate": 2.92914653784219e-05,
"loss": 2.8725,
"step": 65300
},
{
"epoch": 0.42,
"learning_rate": 2.925925925925926e-05,
"loss": 2.844,
"step": 65400
},
{
"epoch": 0.42,
"learning_rate": 2.9227053140096622e-05,
"loss": 2.828,
"step": 65500
},
{
"epoch": 0.42,
"learning_rate": 2.919484702093398e-05,
"loss": 2.8495,
"step": 65600
},
{
"epoch": 0.42,
"learning_rate": 2.9162640901771337e-05,
"loss": 2.8342,
"step": 65700
},
{
"epoch": 0.42,
"learning_rate": 2.9130434782608696e-05,
"loss": 2.8519,
"step": 65800
},
{
"epoch": 0.42,
"learning_rate": 2.909822866344606e-05,
"loss": 2.8379,
"step": 65900
},
{
"epoch": 0.42,
"learning_rate": 2.9066022544283415e-05,
"loss": 2.8307,
"step": 66000
},
{
"epoch": 0.42,
"learning_rate": 2.9033816425120775e-05,
"loss": 2.8247,
"step": 66100
},
{
"epoch": 0.42,
"learning_rate": 2.9001610305958134e-05,
"loss": 2.8353,
"step": 66200
},
{
"epoch": 0.42,
"learning_rate": 2.896940418679549e-05,
"loss": 2.8314,
"step": 66300
},
{
"epoch": 0.42,
"learning_rate": 2.8937198067632853e-05,
"loss": 2.8551,
"step": 66400
},
{
"epoch": 0.43,
"learning_rate": 2.8904991948470212e-05,
"loss": 2.8397,
"step": 66500
},
{
"epoch": 0.43,
"learning_rate": 2.8872785829307568e-05,
"loss": 2.8393,
"step": 66600
},
{
"epoch": 0.43,
"learning_rate": 2.8840579710144927e-05,
"loss": 2.8362,
"step": 66700
},
{
"epoch": 0.43,
"learning_rate": 2.880837359098229e-05,
"loss": 2.8416,
"step": 66800
},
{
"epoch": 0.43,
"learning_rate": 2.877616747181965e-05,
"loss": 2.8438,
"step": 66900
},
{
"epoch": 0.43,
"learning_rate": 2.8743961352657005e-05,
"loss": 2.8604,
"step": 67000
},
{
"epoch": 0.43,
"learning_rate": 2.8711755233494365e-05,
"loss": 2.8333,
"step": 67100
},
{
"epoch": 0.43,
"learning_rate": 2.867954911433172e-05,
"loss": 2.841,
"step": 67200
},
{
"epoch": 0.43,
"learning_rate": 2.8647342995169087e-05,
"loss": 2.8396,
"step": 67300
},
{
"epoch": 0.43,
"learning_rate": 2.8615136876006443e-05,
"loss": 2.8359,
"step": 67400
},
{
"epoch": 0.43,
"learning_rate": 2.8582930756843802e-05,
"loss": 2.8387,
"step": 67500
},
{
"epoch": 0.43,
"learning_rate": 2.8550724637681158e-05,
"loss": 2.8228,
"step": 67600
},
{
"epoch": 0.43,
"learning_rate": 2.851851851851852e-05,
"loss": 2.8492,
"step": 67700
},
{
"epoch": 0.43,
"learning_rate": 2.848631239935588e-05,
"loss": 2.8357,
"step": 67800
},
{
"epoch": 0.43,
"learning_rate": 2.845410628019324e-05,
"loss": 2.8602,
"step": 67900
},
{
"epoch": 0.44,
"learning_rate": 2.8421900161030595e-05,
"loss": 2.8513,
"step": 68000
},
{
"epoch": 0.44,
"learning_rate": 2.8389694041867955e-05,
"loss": 2.8451,
"step": 68100
},
{
"epoch": 0.44,
"learning_rate": 2.8357487922705318e-05,
"loss": 2.838,
"step": 68200
},
{
"epoch": 0.44,
"learning_rate": 2.8325281803542674e-05,
"loss": 2.8168,
"step": 68300
},
{
"epoch": 0.44,
"learning_rate": 2.8293075684380033e-05,
"loss": 2.8381,
"step": 68400
},
{
"epoch": 0.44,
"learning_rate": 2.826086956521739e-05,
"loss": 2.8328,
"step": 68500
},
{
"epoch": 0.44,
"learning_rate": 2.8228663446054755e-05,
"loss": 2.8024,
"step": 68600
},
{
"epoch": 0.44,
"learning_rate": 2.819645732689211e-05,
"loss": 2.8387,
"step": 68700
},
{
"epoch": 0.44,
"learning_rate": 2.816425120772947e-05,
"loss": 2.8464,
"step": 68800
},
{
"epoch": 0.44,
"learning_rate": 2.8132045088566826e-05,
"loss": 2.8304,
"step": 68900
},
{
"epoch": 0.44,
"learning_rate": 2.8099838969404192e-05,
"loss": 2.8233,
"step": 69000
},
{
"epoch": 0.44,
"learning_rate": 2.806763285024155e-05,
"loss": 2.8343,
"step": 69100
},
{
"epoch": 0.44,
"learning_rate": 2.8035426731078908e-05,
"loss": 2.835,
"step": 69200
},
{
"epoch": 0.44,
"learning_rate": 2.8003220611916264e-05,
"loss": 2.8356,
"step": 69300
},
{
"epoch": 0.44,
"learning_rate": 2.7971014492753623e-05,
"loss": 2.8548,
"step": 69400
},
{
"epoch": 0.44,
"learning_rate": 2.7938808373590986e-05,
"loss": 2.8293,
"step": 69500
},
{
"epoch": 0.45,
"learning_rate": 2.7906602254428345e-05,
"loss": 2.8255,
"step": 69600
},
{
"epoch": 0.45,
"learning_rate": 2.78743961352657e-05,
"loss": 2.8358,
"step": 69700
},
{
"epoch": 0.45,
"learning_rate": 2.784219001610306e-05,
"loss": 2.8539,
"step": 69800
},
{
"epoch": 0.45,
"learning_rate": 2.7809983896940423e-05,
"loss": 2.8419,
"step": 69900
},
{
"epoch": 0.45,
"learning_rate": 2.777777777777778e-05,
"loss": 2.8279,
"step": 70000
},
{
"epoch": 0.45,
"learning_rate": 2.774557165861514e-05,
"loss": 2.8491,
"step": 70100
},
{
"epoch": 0.45,
"learning_rate": 2.7713365539452494e-05,
"loss": 2.8203,
"step": 70200
},
{
"epoch": 0.45,
"learning_rate": 2.7681159420289854e-05,
"loss": 2.8144,
"step": 70300
},
{
"epoch": 0.45,
"learning_rate": 2.7648953301127217e-05,
"loss": 2.8371,
"step": 70400
},
{
"epoch": 0.45,
"learning_rate": 2.7616747181964576e-05,
"loss": 2.8459,
"step": 70500
},
{
"epoch": 0.45,
"learning_rate": 2.7584541062801932e-05,
"loss": 2.8455,
"step": 70600
},
{
"epoch": 0.45,
"learning_rate": 2.755233494363929e-05,
"loss": 2.8141,
"step": 70700
},
{
"epoch": 0.45,
"learning_rate": 2.7520128824476654e-05,
"loss": 2.8354,
"step": 70800
},
{
"epoch": 0.45,
"learning_rate": 2.7487922705314013e-05,
"loss": 2.8243,
"step": 70900
},
{
"epoch": 0.45,
"learning_rate": 2.745571658615137e-05,
"loss": 2.8168,
"step": 71000
},
{
"epoch": 0.46,
"learning_rate": 2.742351046698873e-05,
"loss": 2.8294,
"step": 71100
},
{
"epoch": 0.46,
"learning_rate": 2.7391304347826085e-05,
"loss": 2.8431,
"step": 71200
},
{
"epoch": 0.46,
"learning_rate": 2.7359098228663447e-05,
"loss": 2.8458,
"step": 71300
},
{
"epoch": 0.46,
"learning_rate": 2.7326892109500807e-05,
"loss": 2.8317,
"step": 71400
},
{
"epoch": 0.46,
"learning_rate": 2.7294685990338166e-05,
"loss": 2.8287,
"step": 71500
},
{
"epoch": 0.46,
"learning_rate": 2.7262479871175522e-05,
"loss": 2.8311,
"step": 71600
},
{
"epoch": 0.46,
"learning_rate": 2.7230273752012885e-05,
"loss": 2.8279,
"step": 71700
},
{
"epoch": 0.46,
"learning_rate": 2.7198067632850244e-05,
"loss": 2.8073,
"step": 71800
},
{
"epoch": 0.46,
"learning_rate": 2.71658615136876e-05,
"loss": 2.8293,
"step": 71900
},
{
"epoch": 0.46,
"learning_rate": 2.713365539452496e-05,
"loss": 2.8098,
"step": 72000
},
{
"epoch": 0.46,
"learning_rate": 2.7101449275362322e-05,
"loss": 2.8379,
"step": 72100
},
{
"epoch": 0.46,
"learning_rate": 2.706924315619968e-05,
"loss": 2.8241,
"step": 72200
},
{
"epoch": 0.46,
"learning_rate": 2.7037037037037037e-05,
"loss": 2.8216,
"step": 72300
},
{
"epoch": 0.46,
"learning_rate": 2.7004830917874397e-05,
"loss": 2.8374,
"step": 72400
},
{
"epoch": 0.46,
"learning_rate": 2.6972624798711753e-05,
"loss": 2.8052,
"step": 72500
},
{
"epoch": 0.46,
"learning_rate": 2.694041867954912e-05,
"loss": 2.843,
"step": 72600
},
{
"epoch": 0.47,
"learning_rate": 2.6908212560386475e-05,
"loss": 2.8366,
"step": 72700
},
{
"epoch": 0.47,
"learning_rate": 2.6876006441223834e-05,
"loss": 2.8182,
"step": 72800
},
{
"epoch": 0.47,
"learning_rate": 2.684380032206119e-05,
"loss": 2.8541,
"step": 72900
},
{
"epoch": 0.47,
"learning_rate": 2.6811594202898553e-05,
"loss": 2.8235,
"step": 73000
},
{
"epoch": 0.47,
"learning_rate": 2.6779388083735912e-05,
"loss": 2.8081,
"step": 73100
},
{
"epoch": 0.47,
"learning_rate": 2.674718196457327e-05,
"loss": 2.8794,
"step": 73200
},
{
"epoch": 0.47,
"learning_rate": 2.6714975845410628e-05,
"loss": 2.8174,
"step": 73300
},
{
"epoch": 0.47,
"learning_rate": 2.6682769726247987e-05,
"loss": 2.8363,
"step": 73400
},
{
"epoch": 0.47,
"learning_rate": 2.665056360708535e-05,
"loss": 2.8072,
"step": 73500
},
{
"epoch": 0.47,
"learning_rate": 2.6618357487922706e-05,
"loss": 2.8166,
"step": 73600
},
{
"epoch": 0.47,
"learning_rate": 2.6586151368760065e-05,
"loss": 2.8289,
"step": 73700
},
{
"epoch": 0.47,
"learning_rate": 2.6553945249597424e-05,
"loss": 2.8295,
"step": 73800
},
{
"epoch": 0.47,
"learning_rate": 2.6521739130434787e-05,
"loss": 2.8126,
"step": 73900
},
{
"epoch": 0.47,
"learning_rate": 2.6489533011272143e-05,
"loss": 2.8437,
"step": 74000
},
{
"epoch": 0.47,
"learning_rate": 2.6457326892109502e-05,
"loss": 2.8406,
"step": 74100
},
{
"epoch": 0.47,
"learning_rate": 2.642512077294686e-05,
"loss": 2.8443,
"step": 74200
},
{
"epoch": 0.48,
"learning_rate": 2.6392914653784218e-05,
"loss": 2.8291,
"step": 74300
},
{
"epoch": 0.48,
"learning_rate": 2.636070853462158e-05,
"loss": 2.8393,
"step": 74400
},
{
"epoch": 0.48,
"learning_rate": 2.632850241545894e-05,
"loss": 2.825,
"step": 74500
},
{
"epoch": 0.48,
"learning_rate": 2.6296296296296296e-05,
"loss": 2.835,
"step": 74600
},
{
"epoch": 0.48,
"learning_rate": 2.6264090177133655e-05,
"loss": 2.8325,
"step": 74700
},
{
"epoch": 0.48,
"learning_rate": 2.6231884057971018e-05,
"loss": 2.8117,
"step": 74800
},
{
"epoch": 0.48,
"learning_rate": 2.6199677938808377e-05,
"loss": 2.8135,
"step": 74900
},
{
"epoch": 0.48,
"learning_rate": 2.6167471819645733e-05,
"loss": 2.7984,
"step": 75000
},
{
"epoch": 0.48,
"learning_rate": 2.6135265700483093e-05,
"loss": 2.8305,
"step": 75100
},
{
"epoch": 0.48,
"learning_rate": 2.6103059581320455e-05,
"loss": 2.8003,
"step": 75200
},
{
"epoch": 0.48,
"learning_rate": 2.607085346215781e-05,
"loss": 2.7895,
"step": 75300
},
{
"epoch": 0.48,
"learning_rate": 2.603864734299517e-05,
"loss": 2.8147,
"step": 75400
},
{
"epoch": 0.48,
"learning_rate": 2.600644122383253e-05,
"loss": 2.8025,
"step": 75500
},
{
"epoch": 0.48,
"learning_rate": 2.5974235104669886e-05,
"loss": 2.8163,
"step": 75600
},
{
"epoch": 0.48,
"learning_rate": 2.594202898550725e-05,
"loss": 2.8055,
"step": 75700
},
{
"epoch": 0.49,
"learning_rate": 2.5909822866344608e-05,
"loss": 2.8029,
"step": 75800
},
{
"epoch": 0.49,
"learning_rate": 2.5877616747181964e-05,
"loss": 2.8047,
"step": 75900
},
{
"epoch": 0.49,
"learning_rate": 2.5845410628019323e-05,
"loss": 2.8138,
"step": 76000
},
{
"epoch": 0.49,
"learning_rate": 2.5813204508856686e-05,
"loss": 2.8168,
"step": 76100
},
{
"epoch": 0.49,
"learning_rate": 2.5780998389694045e-05,
"loss": 2.844,
"step": 76200
},
{
"epoch": 0.49,
"learning_rate": 2.57487922705314e-05,
"loss": 2.8229,
"step": 76300
},
{
"epoch": 0.49,
"learning_rate": 2.571658615136876e-05,
"loss": 2.8018,
"step": 76400
},
{
"epoch": 0.49,
"learning_rate": 2.5684380032206117e-05,
"loss": 2.8209,
"step": 76500
},
{
"epoch": 0.49,
"learning_rate": 2.5652173913043483e-05,
"loss": 2.8103,
"step": 76600
},
{
"epoch": 0.49,
"learning_rate": 2.561996779388084e-05,
"loss": 2.7793,
"step": 76700
},
{
"epoch": 0.49,
"learning_rate": 2.5587761674718198e-05,
"loss": 2.8128,
"step": 76800
},
{
"epoch": 0.49,
"learning_rate": 2.5555555555555554e-05,
"loss": 2.8035,
"step": 76900
},
{
"epoch": 0.49,
"learning_rate": 2.5523349436392917e-05,
"loss": 2.82,
"step": 77000
},
{
"epoch": 0.49,
"learning_rate": 2.5491143317230276e-05,
"loss": 2.8034,
"step": 77100
},
{
"epoch": 0.49,
"learning_rate": 2.5458937198067636e-05,
"loss": 2.7988,
"step": 77200
},
{
"epoch": 0.49,
"learning_rate": 2.542673107890499e-05,
"loss": 2.7987,
"step": 77300
},
{
"epoch": 0.5,
"learning_rate": 2.539452495974235e-05,
"loss": 2.8125,
"step": 77400
},
{
"epoch": 0.5,
"learning_rate": 2.5362318840579714e-05,
"loss": 2.81,
"step": 77500
},
{
"epoch": 0.5,
"learning_rate": 2.533011272141707e-05,
"loss": 2.8113,
"step": 77600
},
{
"epoch": 0.5,
"learning_rate": 2.529790660225443e-05,
"loss": 2.8195,
"step": 77700
},
{
"epoch": 0.5,
"learning_rate": 2.5265700483091785e-05,
"loss": 2.8123,
"step": 77800
},
{
"epoch": 0.5,
"learning_rate": 2.523349436392915e-05,
"loss": 2.8181,
"step": 77900
},
{
"epoch": 0.5,
"learning_rate": 2.5201288244766507e-05,
"loss": 2.7974,
"step": 78000
},
{
"epoch": 0.5,
"learning_rate": 2.5169082125603866e-05,
"loss": 2.7791,
"step": 78100
},
{
"epoch": 0.5,
"learning_rate": 2.5136876006441222e-05,
"loss": 2.7866,
"step": 78200
},
{
"epoch": 0.5,
"learning_rate": 2.510466988727859e-05,
"loss": 2.8178,
"step": 78300
},
{
"epoch": 0.5,
"learning_rate": 2.5072463768115944e-05,
"loss": 2.806,
"step": 78400
},
{
"epoch": 0.5,
"learning_rate": 2.5040257648953304e-05,
"loss": 2.7977,
"step": 78500
},
{
"epoch": 0.5,
"learning_rate": 2.500805152979066e-05,
"loss": 2.8035,
"step": 78600
},
{
"epoch": 0.5,
"learning_rate": 2.4975845410628022e-05,
"loss": 2.8055,
"step": 78700
},
{
"epoch": 0.5,
"learning_rate": 2.494363929146538e-05,
"loss": 2.8079,
"step": 78800
},
{
"epoch": 0.5,
"learning_rate": 2.491143317230274e-05,
"loss": 2.8052,
"step": 78900
},
{
"epoch": 0.51,
"learning_rate": 2.4879227053140097e-05,
"loss": 2.8017,
"step": 79000
},
{
"epoch": 0.51,
"learning_rate": 2.4847020933977456e-05,
"loss": 2.8018,
"step": 79100
},
{
"epoch": 0.51,
"learning_rate": 2.4814814814814816e-05,
"loss": 2.8021,
"step": 79200
},
{
"epoch": 0.51,
"learning_rate": 2.4782608695652175e-05,
"loss": 2.8039,
"step": 79300
},
{
"epoch": 0.51,
"learning_rate": 2.4750402576489534e-05,
"loss": 2.8036,
"step": 79400
},
{
"epoch": 0.51,
"learning_rate": 2.471819645732689e-05,
"loss": 2.8052,
"step": 79500
},
{
"epoch": 0.51,
"learning_rate": 2.4685990338164253e-05,
"loss": 2.7947,
"step": 79600
},
{
"epoch": 0.51,
"learning_rate": 2.465378421900161e-05,
"loss": 2.814,
"step": 79700
},
{
"epoch": 0.51,
"learning_rate": 2.4621578099838972e-05,
"loss": 2.8053,
"step": 79800
},
{
"epoch": 0.51,
"learning_rate": 2.4589371980676328e-05,
"loss": 2.7983,
"step": 79900
},
{
"epoch": 0.51,
"learning_rate": 2.455716586151369e-05,
"loss": 2.8219,
"step": 80000
},
{
"epoch": 0.51,
"learning_rate": 2.4524959742351047e-05,
"loss": 2.8118,
"step": 80100
},
{
"epoch": 0.51,
"learning_rate": 2.449275362318841e-05,
"loss": 2.7935,
"step": 80200
},
{
"epoch": 0.51,
"learning_rate": 2.4460547504025765e-05,
"loss": 2.7984,
"step": 80300
},
{
"epoch": 0.51,
"learning_rate": 2.4428341384863128e-05,
"loss": 2.8212,
"step": 80400
},
{
"epoch": 0.52,
"learning_rate": 2.4396135265700484e-05,
"loss": 2.8028,
"step": 80500
},
{
"epoch": 0.52,
"learning_rate": 2.4363929146537843e-05,
"loss": 2.7923,
"step": 80600
},
{
"epoch": 0.52,
"learning_rate": 2.4331723027375203e-05,
"loss": 2.7949,
"step": 80700
},
{
"epoch": 0.52,
"learning_rate": 2.4299516908212562e-05,
"loss": 2.7843,
"step": 80800
},
{
"epoch": 0.52,
"learning_rate": 2.426731078904992e-05,
"loss": 2.8099,
"step": 80900
},
{
"epoch": 0.52,
"learning_rate": 2.423510466988728e-05,
"loss": 2.7919,
"step": 81000
},
{
"epoch": 0.52,
"learning_rate": 2.420289855072464e-05,
"loss": 2.7945,
"step": 81100
},
{
"epoch": 0.52,
"learning_rate": 2.4170692431561996e-05,
"loss": 2.7856,
"step": 81200
},
{
"epoch": 0.52,
"learning_rate": 2.413848631239936e-05,
"loss": 2.8014,
"step": 81300
},
{
"epoch": 0.52,
"learning_rate": 2.4106280193236715e-05,
"loss": 2.8032,
"step": 81400
},
{
"epoch": 0.52,
"learning_rate": 2.4074074074074074e-05,
"loss": 2.8034,
"step": 81500
},
{
"epoch": 0.52,
"learning_rate": 2.4041867954911433e-05,
"loss": 2.796,
"step": 81600
},
{
"epoch": 0.52,
"learning_rate": 2.4009661835748793e-05,
"loss": 2.8212,
"step": 81700
},
{
"epoch": 0.52,
"learning_rate": 2.3977455716586152e-05,
"loss": 2.801,
"step": 81800
},
{
"epoch": 0.52,
"learning_rate": 2.394524959742351e-05,
"loss": 2.8197,
"step": 81900
},
{
"epoch": 0.52,
"learning_rate": 2.391304347826087e-05,
"loss": 2.7941,
"step": 82000
},
{
"epoch": 0.53,
"learning_rate": 2.388083735909823e-05,
"loss": 2.8036,
"step": 82100
},
{
"epoch": 0.53,
"learning_rate": 2.384863123993559e-05,
"loss": 2.8133,
"step": 82200
},
{
"epoch": 0.53,
"learning_rate": 2.381642512077295e-05,
"loss": 2.8264,
"step": 82300
},
{
"epoch": 0.53,
"learning_rate": 2.3784219001610308e-05,
"loss": 2.8203,
"step": 82400
},
{
"epoch": 0.53,
"learning_rate": 2.3752012882447668e-05,
"loss": 2.8095,
"step": 82500
},
{
"epoch": 0.53,
"learning_rate": 2.3719806763285024e-05,
"loss": 2.7942,
"step": 82600
},
{
"epoch": 0.53,
"learning_rate": 2.3687600644122386e-05,
"loss": 2.802,
"step": 82700
},
{
"epoch": 0.53,
"learning_rate": 2.3655394524959742e-05,
"loss": 2.8047,
"step": 82800
},
{
"epoch": 0.53,
"learning_rate": 2.36231884057971e-05,
"loss": 2.8333,
"step": 82900
},
{
"epoch": 0.53,
"learning_rate": 2.359098228663446e-05,
"loss": 2.7805,
"step": 83000
},
{
"epoch": 0.53,
"learning_rate": 2.355877616747182e-05,
"loss": 2.7929,
"step": 83100
},
{
"epoch": 0.53,
"learning_rate": 2.352657004830918e-05,
"loss": 2.7951,
"step": 83200
},
{
"epoch": 0.53,
"learning_rate": 2.349436392914654e-05,
"loss": 2.7878,
"step": 83300
},
{
"epoch": 0.53,
"learning_rate": 2.34621578099839e-05,
"loss": 2.8133,
"step": 83400
},
{
"epoch": 0.53,
"learning_rate": 2.3429951690821258e-05,
"loss": 2.81,
"step": 83500
},
{
"epoch": 0.54,
"learning_rate": 2.3397745571658617e-05,
"loss": 2.7892,
"step": 83600
},
{
"epoch": 0.54,
"learning_rate": 2.3365539452495973e-05,
"loss": 2.8081,
"step": 83700
},
{
"epoch": 0.54,
"learning_rate": 2.3333333333333336e-05,
"loss": 2.8013,
"step": 83800
},
{
"epoch": 0.54,
"learning_rate": 2.3301127214170692e-05,
"loss": 2.802,
"step": 83900
},
{
"epoch": 0.54,
"learning_rate": 2.3268921095008055e-05,
"loss": 2.8049,
"step": 84000
},
{
"epoch": 0.54,
"learning_rate": 2.323671497584541e-05,
"loss": 2.8115,
"step": 84100
},
{
"epoch": 0.54,
"learning_rate": 2.3204508856682773e-05,
"loss": 2.8037,
"step": 84200
},
{
"epoch": 0.54,
"learning_rate": 2.317230273752013e-05,
"loss": 2.8067,
"step": 84300
},
{
"epoch": 0.54,
"learning_rate": 2.314009661835749e-05,
"loss": 2.8075,
"step": 84400
},
{
"epoch": 0.54,
"learning_rate": 2.3107890499194848e-05,
"loss": 2.8046,
"step": 84500
},
{
"epoch": 0.54,
"learning_rate": 2.3075684380032207e-05,
"loss": 2.794,
"step": 84600
},
{
"epoch": 0.54,
"learning_rate": 2.3043478260869567e-05,
"loss": 2.7994,
"step": 84700
},
{
"epoch": 0.54,
"learning_rate": 2.3011272141706926e-05,
"loss": 2.8026,
"step": 84800
},
{
"epoch": 0.54,
"learning_rate": 2.2979066022544285e-05,
"loss": 2.7916,
"step": 84900
},
{
"epoch": 0.54,
"learning_rate": 2.294685990338164e-05,
"loss": 2.804,
"step": 85000
},
{
"epoch": 0.54,
"learning_rate": 2.2914653784219004e-05,
"loss": 2.7875,
"step": 85100
},
{
"epoch": 0.55,
"learning_rate": 2.288244766505636e-05,
"loss": 2.8017,
"step": 85200
},
{
"epoch": 0.55,
"learning_rate": 2.2850241545893723e-05,
"loss": 2.7964,
"step": 85300
},
{
"epoch": 0.55,
"learning_rate": 2.281803542673108e-05,
"loss": 2.819,
"step": 85400
},
{
"epoch": 0.55,
"learning_rate": 2.278582930756844e-05,
"loss": 2.8035,
"step": 85500
},
{
"epoch": 0.55,
"learning_rate": 2.2753623188405797e-05,
"loss": 2.8024,
"step": 85600
},
{
"epoch": 0.55,
"learning_rate": 2.2721417069243157e-05,
"loss": 2.7984,
"step": 85700
},
{
"epoch": 0.55,
"learning_rate": 2.2689210950080516e-05,
"loss": 2.8093,
"step": 85800
},
{
"epoch": 0.55,
"learning_rate": 2.2657004830917875e-05,
"loss": 2.8083,
"step": 85900
},
{
"epoch": 0.55,
"learning_rate": 2.2624798711755235e-05,
"loss": 2.8187,
"step": 86000
},
{
"epoch": 0.55,
"learning_rate": 2.2592592592592594e-05,
"loss": 2.8028,
"step": 86100
},
{
"epoch": 0.55,
"learning_rate": 2.2560386473429953e-05,
"loss": 2.7928,
"step": 86200
},
{
"epoch": 0.55,
"learning_rate": 2.2528180354267313e-05,
"loss": 2.8309,
"step": 86300
},
{
"epoch": 0.55,
"learning_rate": 2.2495974235104672e-05,
"loss": 2.8041,
"step": 86400
},
{
"epoch": 0.55,
"learning_rate": 2.246376811594203e-05,
"loss": 2.8062,
"step": 86500
},
{
"epoch": 0.55,
"learning_rate": 2.243156199677939e-05,
"loss": 2.7811,
"step": 86600
},
{
"epoch": 0.55,
"learning_rate": 2.2399355877616747e-05,
"loss": 2.8267,
"step": 86700
},
{
"epoch": 0.56,
"learning_rate": 2.2367149758454106e-05,
"loss": 2.8071,
"step": 86800
},
{
"epoch": 0.56,
"learning_rate": 2.2334943639291466e-05,
"loss": 2.7929,
"step": 86900
},
{
"epoch": 0.56,
"learning_rate": 2.2302737520128825e-05,
"loss": 2.7799,
"step": 87000
},
{
"epoch": 0.56,
"learning_rate": 2.2270531400966184e-05,
"loss": 2.8064,
"step": 87100
},
{
"epoch": 0.56,
"learning_rate": 2.2238325281803544e-05,
"loss": 2.7858,
"step": 87200
},
{
"epoch": 0.56,
"learning_rate": 2.2206119162640903e-05,
"loss": 2.7987,
"step": 87300
},
{
"epoch": 0.56,
"learning_rate": 2.2173913043478262e-05,
"loss": 2.8083,
"step": 87400
},
{
"epoch": 0.56,
"learning_rate": 2.214170692431562e-05,
"loss": 2.7931,
"step": 87500
},
{
"epoch": 0.56,
"learning_rate": 2.210950080515298e-05,
"loss": 2.7966,
"step": 87600
},
{
"epoch": 0.56,
"learning_rate": 2.2077294685990337e-05,
"loss": 2.7905,
"step": 87700
},
{
"epoch": 0.56,
"learning_rate": 2.20450885668277e-05,
"loss": 2.7893,
"step": 87800
},
{
"epoch": 0.56,
"learning_rate": 2.2012882447665056e-05,
"loss": 2.7872,
"step": 87900
},
{
"epoch": 0.56,
"learning_rate": 2.198067632850242e-05,
"loss": 2.7914,
"step": 88000
},
{
"epoch": 0.56,
"learning_rate": 2.1948470209339774e-05,
"loss": 2.804,
"step": 88100
},
{
"epoch": 0.56,
"learning_rate": 2.1916264090177137e-05,
"loss": 2.7954,
"step": 88200
},
{
"epoch": 0.57,
"learning_rate": 2.1884057971014493e-05,
"loss": 2.7865,
"step": 88300
},
{
"epoch": 0.57,
"learning_rate": 2.1851851851851852e-05,
"loss": 2.791,
"step": 88400
},
{
"epoch": 0.57,
"learning_rate": 2.1819645732689212e-05,
"loss": 2.7986,
"step": 88500
},
{
"epoch": 0.57,
"learning_rate": 2.178743961352657e-05,
"loss": 2.7855,
"step": 88600
},
{
"epoch": 0.57,
"learning_rate": 2.175523349436393e-05,
"loss": 2.7979,
"step": 88700
},
{
"epoch": 0.57,
"learning_rate": 2.1723027375201286e-05,
"loss": 2.7887,
"step": 88800
},
{
"epoch": 0.57,
"learning_rate": 2.169082125603865e-05,
"loss": 2.8069,
"step": 88900
},
{
"epoch": 0.57,
"learning_rate": 2.1658615136876005e-05,
"loss": 2.7937,
"step": 89000
},
{
"epoch": 0.57,
"learning_rate": 2.1626409017713368e-05,
"loss": 2.7942,
"step": 89100
},
{
"epoch": 0.57,
"learning_rate": 2.1594202898550724e-05,
"loss": 2.779,
"step": 89200
},
{
"epoch": 0.57,
"learning_rate": 2.1561996779388087e-05,
"loss": 2.7915,
"step": 89300
},
{
"epoch": 0.57,
"learning_rate": 2.1529790660225443e-05,
"loss": 2.7835,
"step": 89400
},
{
"epoch": 0.57,
"learning_rate": 2.1497584541062805e-05,
"loss": 2.7864,
"step": 89500
},
{
"epoch": 0.57,
"learning_rate": 2.146537842190016e-05,
"loss": 2.7905,
"step": 89600
},
{
"epoch": 0.57,
"learning_rate": 2.1433172302737524e-05,
"loss": 2.8186,
"step": 89700
},
{
"epoch": 0.57,
"learning_rate": 2.140096618357488e-05,
"loss": 2.7718,
"step": 89800
},
{
"epoch": 0.58,
"learning_rate": 2.136876006441224e-05,
"loss": 2.8057,
"step": 89900
},
{
"epoch": 0.58,
"learning_rate": 2.13365539452496e-05,
"loss": 2.8028,
"step": 90000
},
{
"epoch": 0.58,
"learning_rate": 2.1304347826086958e-05,
"loss": 2.8135,
"step": 90100
},
{
"epoch": 0.58,
"learning_rate": 2.1272141706924317e-05,
"loss": 2.7958,
"step": 90200
},
{
"epoch": 0.58,
"learning_rate": 2.1239935587761677e-05,
"loss": 2.8114,
"step": 90300
},
{
"epoch": 0.58,
"learning_rate": 2.1207729468599036e-05,
"loss": 2.7923,
"step": 90400
},
{
"epoch": 0.58,
"learning_rate": 2.1175523349436392e-05,
"loss": 2.8136,
"step": 90500
},
{
"epoch": 0.58,
"learning_rate": 2.1143317230273755e-05,
"loss": 2.8194,
"step": 90600
},
{
"epoch": 0.58,
"learning_rate": 2.111111111111111e-05,
"loss": 2.7926,
"step": 90700
},
{
"epoch": 0.58,
"learning_rate": 2.107890499194847e-05,
"loss": 2.7928,
"step": 90800
},
{
"epoch": 0.58,
"learning_rate": 2.104669887278583e-05,
"loss": 2.793,
"step": 90900
},
{
"epoch": 0.58,
"learning_rate": 2.101449275362319e-05,
"loss": 2.7991,
"step": 91000
},
{
"epoch": 0.58,
"learning_rate": 2.0982286634460548e-05,
"loss": 2.8141,
"step": 91100
},
{
"epoch": 0.58,
"learning_rate": 2.0950080515297908e-05,
"loss": 2.7874,
"step": 91200
},
{
"epoch": 0.58,
"learning_rate": 2.0917874396135267e-05,
"loss": 2.8107,
"step": 91300
},
{
"epoch": 0.58,
"learning_rate": 2.0885668276972626e-05,
"loss": 2.8102,
"step": 91400
},
{
"epoch": 0.59,
"learning_rate": 2.0853462157809986e-05,
"loss": 2.7908,
"step": 91500
},
{
"epoch": 0.59,
"learning_rate": 2.0821256038647345e-05,
"loss": 2.7978,
"step": 91600
},
{
"epoch": 0.59,
"learning_rate": 2.0789049919484704e-05,
"loss": 2.7697,
"step": 91700
},
{
"epoch": 0.59,
"learning_rate": 2.0756843800322064e-05,
"loss": 2.7937,
"step": 91800
},
{
"epoch": 0.59,
"learning_rate": 2.072463768115942e-05,
"loss": 2.8289,
"step": 91900
},
{
"epoch": 0.59,
"learning_rate": 2.0692431561996782e-05,
"loss": 2.8001,
"step": 92000
},
{
"epoch": 0.59,
"learning_rate": 2.066022544283414e-05,
"loss": 2.7772,
"step": 92100
},
{
"epoch": 0.59,
"learning_rate": 2.0628019323671498e-05,
"loss": 2.7779,
"step": 92200
},
{
"epoch": 0.59,
"learning_rate": 2.0595813204508857e-05,
"loss": 2.7996,
"step": 92300
},
{
"epoch": 0.59,
"learning_rate": 2.0563607085346216e-05,
"loss": 2.7866,
"step": 92400
},
{
"epoch": 0.59,
"learning_rate": 2.0531400966183576e-05,
"loss": 2.7746,
"step": 92500
},
{
"epoch": 0.59,
"learning_rate": 2.0499194847020935e-05,
"loss": 2.8097,
"step": 92600
},
{
"epoch": 0.59,
"learning_rate": 2.0466988727858294e-05,
"loss": 2.7903,
"step": 92700
},
{
"epoch": 0.59,
"learning_rate": 2.0434782608695654e-05,
"loss": 2.7794,
"step": 92800
},
{
"epoch": 0.59,
"learning_rate": 2.0402576489533013e-05,
"loss": 2.7772,
"step": 92900
},
{
"epoch": 0.6,
"learning_rate": 2.037037037037037e-05,
"loss": 2.7863,
"step": 93000
},
{
"epoch": 0.6,
"learning_rate": 2.0338164251207732e-05,
"loss": 2.8087,
"step": 93100
},
{
"epoch": 0.6,
"learning_rate": 2.0305958132045088e-05,
"loss": 2.8106,
"step": 93200
},
{
"epoch": 0.6,
"learning_rate": 2.027375201288245e-05,
"loss": 2.7916,
"step": 93300
},
{
"epoch": 0.6,
"learning_rate": 2.0241545893719806e-05,
"loss": 2.7952,
"step": 93400
},
{
"epoch": 0.6,
"learning_rate": 2.020933977455717e-05,
"loss": 2.7902,
"step": 93500
},
{
"epoch": 0.6,
"learning_rate": 2.0177133655394525e-05,
"loss": 2.8145,
"step": 93600
},
{
"epoch": 0.6,
"learning_rate": 2.0144927536231885e-05,
"loss": 2.7875,
"step": 93700
},
{
"epoch": 0.6,
"learning_rate": 2.0112721417069244e-05,
"loss": 2.7653,
"step": 93800
},
{
"epoch": 0.6,
"learning_rate": 2.0080515297906603e-05,
"loss": 2.7612,
"step": 93900
},
{
"epoch": 0.6,
"learning_rate": 2.0048309178743963e-05,
"loss": 2.7735,
"step": 94000
},
{
"epoch": 0.6,
"learning_rate": 2.0016103059581322e-05,
"loss": 2.7625,
"step": 94100
},
{
"epoch": 0.6,
"learning_rate": 1.998389694041868e-05,
"loss": 2.7749,
"step": 94200
},
{
"epoch": 0.6,
"learning_rate": 1.9951690821256037e-05,
"loss": 2.7534,
"step": 94300
},
{
"epoch": 0.6,
"learning_rate": 1.99194847020934e-05,
"loss": 2.7682,
"step": 94400
},
{
"epoch": 0.6,
"learning_rate": 1.9887278582930756e-05,
"loss": 2.7758,
"step": 94500
},
{
"epoch": 0.61,
"learning_rate": 1.985507246376812e-05,
"loss": 2.7695,
"step": 94600
},
{
"epoch": 0.61,
"learning_rate": 1.9822866344605475e-05,
"loss": 2.7639,
"step": 94700
},
{
"epoch": 0.61,
"learning_rate": 1.9790660225442837e-05,
"loss": 2.7859,
"step": 94800
},
{
"epoch": 0.61,
"learning_rate": 1.9758454106280193e-05,
"loss": 2.7764,
"step": 94900
},
{
"epoch": 0.61,
"learning_rate": 1.9726247987117553e-05,
"loss": 2.7729,
"step": 95000
},
{
"epoch": 0.61,
"learning_rate": 1.9694041867954912e-05,
"loss": 2.7882,
"step": 95100
},
{
"epoch": 0.61,
"learning_rate": 1.966183574879227e-05,
"loss": 2.7683,
"step": 95200
},
{
"epoch": 0.61,
"learning_rate": 1.962962962962963e-05,
"loss": 2.7647,
"step": 95300
},
{
"epoch": 0.61,
"learning_rate": 1.959742351046699e-05,
"loss": 2.7779,
"step": 95400
},
{
"epoch": 0.61,
"learning_rate": 1.956521739130435e-05,
"loss": 2.748,
"step": 95500
},
{
"epoch": 0.61,
"learning_rate": 1.953301127214171e-05,
"loss": 2.7748,
"step": 95600
},
{
"epoch": 0.61,
"learning_rate": 1.9500805152979068e-05,
"loss": 2.7665,
"step": 95700
},
{
"epoch": 0.61,
"learning_rate": 1.9468599033816428e-05,
"loss": 2.7798,
"step": 95800
},
{
"epoch": 0.61,
"learning_rate": 1.9436392914653787e-05,
"loss": 2.7687,
"step": 95900
},
{
"epoch": 0.61,
"learning_rate": 1.9404186795491143e-05,
"loss": 2.7772,
"step": 96000
},
{
"epoch": 0.62,
"learning_rate": 1.9371980676328502e-05,
"loss": 2.7982,
"step": 96100
},
{
"epoch": 0.62,
"learning_rate": 1.933977455716586e-05,
"loss": 2.7814,
"step": 96200
},
{
"epoch": 0.62,
"learning_rate": 1.930756843800322e-05,
"loss": 2.7493,
"step": 96300
},
{
"epoch": 0.62,
"learning_rate": 1.927536231884058e-05,
"loss": 2.7628,
"step": 96400
},
{
"epoch": 0.62,
"learning_rate": 1.924315619967794e-05,
"loss": 2.7383,
"step": 96500
},
{
"epoch": 0.62,
"learning_rate": 1.92109500805153e-05,
"loss": 2.7732,
"step": 96600
},
{
"epoch": 0.62,
"learning_rate": 1.917874396135266e-05,
"loss": 2.7874,
"step": 96700
},
{
"epoch": 0.62,
"learning_rate": 1.9146537842190018e-05,
"loss": 2.7626,
"step": 96800
},
{
"epoch": 0.62,
"learning_rate": 1.9114331723027377e-05,
"loss": 2.782,
"step": 96900
},
{
"epoch": 0.62,
"learning_rate": 1.9082125603864733e-05,
"loss": 2.779,
"step": 97000
},
{
"epoch": 0.62,
"learning_rate": 1.9049919484702096e-05,
"loss": 2.7991,
"step": 97100
},
{
"epoch": 0.62,
"learning_rate": 1.9017713365539452e-05,
"loss": 2.783,
"step": 97200
},
{
"epoch": 0.62,
"learning_rate": 1.8985507246376814e-05,
"loss": 2.7851,
"step": 97300
},
{
"epoch": 0.62,
"learning_rate": 1.895330112721417e-05,
"loss": 2.8004,
"step": 97400
},
{
"epoch": 0.62,
"learning_rate": 1.892109500805153e-05,
"loss": 2.7691,
"step": 97500
},
{
"epoch": 0.62,
"learning_rate": 1.888888888888889e-05,
"loss": 2.7606,
"step": 97600
},
{
"epoch": 0.63,
"learning_rate": 1.885668276972625e-05,
"loss": 2.7685,
"step": 97700
},
{
"epoch": 0.63,
"learning_rate": 1.8824476650563608e-05,
"loss": 2.7662,
"step": 97800
},
{
"epoch": 0.63,
"learning_rate": 1.8792270531400967e-05,
"loss": 2.7608,
"step": 97900
},
{
"epoch": 0.63,
"learning_rate": 1.8760064412238327e-05,
"loss": 2.7677,
"step": 98000
},
{
"epoch": 0.63,
"learning_rate": 1.8727858293075682e-05,
"loss": 2.778,
"step": 98100
},
{
"epoch": 0.63,
"learning_rate": 1.8695652173913045e-05,
"loss": 2.7742,
"step": 98200
},
{
"epoch": 0.63,
"learning_rate": 1.86634460547504e-05,
"loss": 2.7837,
"step": 98300
},
{
"epoch": 0.63,
"learning_rate": 1.8631239935587764e-05,
"loss": 2.7774,
"step": 98400
},
{
"epoch": 0.63,
"learning_rate": 1.859903381642512e-05,
"loss": 2.8115,
"step": 98500
},
{
"epoch": 0.63,
"learning_rate": 1.8566827697262483e-05,
"loss": 2.7943,
"step": 98600
},
{
"epoch": 0.63,
"learning_rate": 1.853462157809984e-05,
"loss": 2.804,
"step": 98700
},
{
"epoch": 0.63,
"learning_rate": 1.85024154589372e-05,
"loss": 2.7752,
"step": 98800
},
{
"epoch": 0.63,
"learning_rate": 1.8470209339774557e-05,
"loss": 2.7689,
"step": 98900
},
{
"epoch": 0.63,
"learning_rate": 1.8438003220611917e-05,
"loss": 2.7621,
"step": 99000
},
{
"epoch": 0.63,
"learning_rate": 1.8405797101449276e-05,
"loss": 2.7741,
"step": 99100
},
{
"epoch": 0.63,
"learning_rate": 1.8373590982286635e-05,
"loss": 2.7757,
"step": 99200
},
{
"epoch": 0.64,
"learning_rate": 1.8341384863123995e-05,
"loss": 2.7747,
"step": 99300
},
{
"epoch": 0.64,
"learning_rate": 1.8309178743961354e-05,
"loss": 2.7898,
"step": 99400
},
{
"epoch": 0.64,
"learning_rate": 1.8276972624798713e-05,
"loss": 2.7775,
"step": 99500
},
{
"epoch": 0.64,
"learning_rate": 1.8244766505636073e-05,
"loss": 2.7671,
"step": 99600
},
{
"epoch": 0.64,
"learning_rate": 1.8212560386473432e-05,
"loss": 2.784,
"step": 99700
},
{
"epoch": 0.64,
"learning_rate": 1.8180354267310788e-05,
"loss": 2.7866,
"step": 99800
},
{
"epoch": 0.64,
"learning_rate": 1.814814814814815e-05,
"loss": 2.7636,
"step": 99900
},
{
"epoch": 0.64,
"learning_rate": 1.8115942028985507e-05,
"loss": 2.7888,
"step": 100000
},
{
"epoch": 0.64,
"learning_rate": 1.8083735909822866e-05,
"loss": 2.7908,
"step": 100100
},
{
"epoch": 0.64,
"learning_rate": 1.8051529790660225e-05,
"loss": 2.7477,
"step": 100200
},
{
"epoch": 0.64,
"learning_rate": 1.8019323671497585e-05,
"loss": 2.7787,
"step": 100300
},
{
"epoch": 0.64,
"learning_rate": 1.7987117552334944e-05,
"loss": 2.7733,
"step": 100400
},
{
"epoch": 0.64,
"learning_rate": 1.7954911433172304e-05,
"loss": 2.7702,
"step": 100500
},
{
"epoch": 0.64,
"learning_rate": 1.7922705314009663e-05,
"loss": 2.7802,
"step": 100600
},
{
"epoch": 0.64,
"learning_rate": 1.7890499194847022e-05,
"loss": 2.7665,
"step": 100700
},
{
"epoch": 0.65,
"learning_rate": 1.785829307568438e-05,
"loss": 2.7842,
"step": 100800
},
{
"epoch": 0.65,
"learning_rate": 1.782608695652174e-05,
"loss": 2.8066,
"step": 100900
},
{
"epoch": 0.65,
"learning_rate": 1.77938808373591e-05,
"loss": 2.7668,
"step": 101000
},
{
"epoch": 0.65,
"learning_rate": 1.776167471819646e-05,
"loss": 2.7665,
"step": 101100
},
{
"epoch": 0.65,
"learning_rate": 1.7729468599033816e-05,
"loss": 2.7883,
"step": 101200
},
{
"epoch": 0.65,
"learning_rate": 1.769726247987118e-05,
"loss": 2.7733,
"step": 101300
},
{
"epoch": 0.65,
"learning_rate": 1.7665056360708534e-05,
"loss": 2.7688,
"step": 101400
},
{
"epoch": 0.65,
"learning_rate": 1.7632850241545894e-05,
"loss": 2.7634,
"step": 101500
},
{
"epoch": 0.65,
"learning_rate": 1.7600644122383253e-05,
"loss": 2.7543,
"step": 101600
},
{
"epoch": 0.65,
"learning_rate": 1.7568438003220612e-05,
"loss": 2.7563,
"step": 101700
},
{
"epoch": 0.65,
"learning_rate": 1.7536231884057972e-05,
"loss": 2.7511,
"step": 101800
},
{
"epoch": 0.65,
"learning_rate": 1.750402576489533e-05,
"loss": 2.7667,
"step": 101900
},
{
"epoch": 0.65,
"learning_rate": 1.747181964573269e-05,
"loss": 2.7703,
"step": 102000
},
{
"epoch": 0.65,
"learning_rate": 1.7439613526570046e-05,
"loss": 2.7711,
"step": 102100
},
{
"epoch": 0.65,
"learning_rate": 1.740740740740741e-05,
"loss": 2.7846,
"step": 102200
},
{
"epoch": 0.65,
"learning_rate": 1.7375201288244765e-05,
"loss": 2.7767,
"step": 102300
},
{
"epoch": 0.66,
"learning_rate": 1.7342995169082128e-05,
"loss": 2.7751,
"step": 102400
},
{
"epoch": 0.66,
"learning_rate": 1.7310789049919484e-05,
"loss": 2.76,
"step": 102500
},
{
"epoch": 0.66,
"learning_rate": 1.7278582930756847e-05,
"loss": 2.7869,
"step": 102600
},
{
"epoch": 0.66,
"learning_rate": 1.7246376811594203e-05,
"loss": 2.759,
"step": 102700
},
{
"epoch": 0.66,
"learning_rate": 1.7214170692431565e-05,
"loss": 2.778,
"step": 102800
},
{
"epoch": 0.66,
"learning_rate": 1.718196457326892e-05,
"loss": 2.7836,
"step": 102900
},
{
"epoch": 0.66,
"learning_rate": 1.714975845410628e-05,
"loss": 2.748,
"step": 103000
},
{
"epoch": 0.66,
"learning_rate": 1.711755233494364e-05,
"loss": 2.7511,
"step": 103100
},
{
"epoch": 0.66,
"learning_rate": 1.7085346215781e-05,
"loss": 2.7787,
"step": 103200
},
{
"epoch": 0.66,
"learning_rate": 1.705314009661836e-05,
"loss": 2.7732,
"step": 103300
},
{
"epoch": 0.66,
"learning_rate": 1.7020933977455718e-05,
"loss": 2.7735,
"step": 103400
},
{
"epoch": 0.66,
"learning_rate": 1.6988727858293077e-05,
"loss": 2.7658,
"step": 103500
},
{
"epoch": 0.66,
"learning_rate": 1.6956521739130433e-05,
"loss": 2.7737,
"step": 103600
},
{
"epoch": 0.66,
"learning_rate": 1.6924315619967796e-05,
"loss": 2.7989,
"step": 103700
},
{
"epoch": 0.66,
"learning_rate": 1.6892109500805152e-05,
"loss": 2.7815,
"step": 103800
},
{
"epoch": 0.66,
"learning_rate": 1.6859903381642515e-05,
"loss": 2.7763,
"step": 103900
},
{
"epoch": 0.67,
"learning_rate": 1.682769726247987e-05,
"loss": 2.7728,
"step": 104000
},
{
"epoch": 0.67,
"learning_rate": 1.6795491143317233e-05,
"loss": 2.7588,
"step": 104100
},
{
"epoch": 0.67,
"learning_rate": 1.676328502415459e-05,
"loss": 2.7461,
"step": 104200
},
{
"epoch": 0.67,
"learning_rate": 1.673107890499195e-05,
"loss": 2.7898,
"step": 104300
},
{
"epoch": 0.67,
"learning_rate": 1.6698872785829308e-05,
"loss": 2.7885,
"step": 104400
},
{
"epoch": 0.67,
"learning_rate": 1.6666666666666667e-05,
"loss": 2.7739,
"step": 104500
},
{
"epoch": 0.67,
"learning_rate": 1.6634460547504027e-05,
"loss": 2.7645,
"step": 104600
},
{
"epoch": 0.67,
"learning_rate": 1.6602254428341386e-05,
"loss": 2.7802,
"step": 104700
},
{
"epoch": 0.67,
"learning_rate": 1.6570048309178746e-05,
"loss": 2.7774,
"step": 104800
},
{
"epoch": 0.67,
"learning_rate": 1.6537842190016105e-05,
"loss": 2.763,
"step": 104900
},
{
"epoch": 0.67,
"learning_rate": 1.6505636070853464e-05,
"loss": 2.7799,
"step": 105000
},
{
"epoch": 0.67,
"learning_rate": 1.6473429951690824e-05,
"loss": 2.7713,
"step": 105100
},
{
"epoch": 0.67,
"learning_rate": 1.644122383252818e-05,
"loss": 2.775,
"step": 105200
},
{
"epoch": 0.67,
"learning_rate": 1.640901771336554e-05,
"loss": 2.7762,
"step": 105300
},
{
"epoch": 0.67,
"learning_rate": 1.6376811594202898e-05,
"loss": 2.7771,
"step": 105400
},
{
"epoch": 0.68,
"learning_rate": 1.6344605475040258e-05,
"loss": 2.7875,
"step": 105500
},
{
"epoch": 0.68,
"learning_rate": 1.6312399355877617e-05,
"loss": 2.7568,
"step": 105600
},
{
"epoch": 0.68,
"learning_rate": 1.6280193236714976e-05,
"loss": 2.7611,
"step": 105700
},
{
"epoch": 0.68,
"learning_rate": 1.6247987117552336e-05,
"loss": 2.774,
"step": 105800
},
{
"epoch": 0.68,
"learning_rate": 1.6215780998389695e-05,
"loss": 2.7967,
"step": 105900
},
{
"epoch": 0.68,
"learning_rate": 1.6183574879227054e-05,
"loss": 2.7724,
"step": 106000
},
{
"epoch": 0.68,
"learning_rate": 1.6151368760064414e-05,
"loss": 2.7948,
"step": 106100
},
{
"epoch": 0.68,
"learning_rate": 1.6119162640901773e-05,
"loss": 2.7626,
"step": 106200
},
{
"epoch": 0.68,
"learning_rate": 1.608695652173913e-05,
"loss": 2.7769,
"step": 106300
},
{
"epoch": 0.68,
"learning_rate": 1.6054750402576492e-05,
"loss": 2.7456,
"step": 106400
},
{
"epoch": 0.68,
"learning_rate": 1.6022544283413848e-05,
"loss": 2.7788,
"step": 106500
},
{
"epoch": 0.68,
"learning_rate": 1.599033816425121e-05,
"loss": 2.7688,
"step": 106600
},
{
"epoch": 0.68,
"learning_rate": 1.5958132045088566e-05,
"loss": 2.7825,
"step": 106700
},
{
"epoch": 0.68,
"learning_rate": 1.5925925925925926e-05,
"loss": 2.7784,
"step": 106800
},
{
"epoch": 0.68,
"learning_rate": 1.5893719806763285e-05,
"loss": 2.7897,
"step": 106900
},
{
"epoch": 0.68,
"learning_rate": 1.5861513687600644e-05,
"loss": 2.7611,
"step": 107000
},
{
"epoch": 0.69,
"learning_rate": 1.5829307568438004e-05,
"loss": 2.7706,
"step": 107100
},
{
"epoch": 0.69,
"learning_rate": 1.5797101449275363e-05,
"loss": 2.7604,
"step": 107200
},
{
"epoch": 0.69,
"learning_rate": 1.5764895330112723e-05,
"loss": 2.7562,
"step": 107300
},
{
"epoch": 0.69,
"learning_rate": 1.573268921095008e-05,
"loss": 2.7804,
"step": 107400
},
{
"epoch": 0.69,
"learning_rate": 1.570048309178744e-05,
"loss": 2.7732,
"step": 107500
},
{
"epoch": 0.69,
"learning_rate": 1.5668276972624797e-05,
"loss": 2.7837,
"step": 107600
},
{
"epoch": 0.69,
"learning_rate": 1.563607085346216e-05,
"loss": 2.7764,
"step": 107700
},
{
"epoch": 0.69,
"learning_rate": 1.5603864734299516e-05,
"loss": 2.7631,
"step": 107800
},
{
"epoch": 0.69,
"learning_rate": 1.557165861513688e-05,
"loss": 2.7799,
"step": 107900
},
{
"epoch": 0.69,
"learning_rate": 1.5539452495974235e-05,
"loss": 2.7735,
"step": 108000
},
{
"epoch": 0.69,
"learning_rate": 1.5507246376811597e-05,
"loss": 2.7559,
"step": 108100
},
{
"epoch": 0.69,
"learning_rate": 1.5475040257648953e-05,
"loss": 2.7801,
"step": 108200
},
{
"epoch": 0.69,
"learning_rate": 1.5442834138486313e-05,
"loss": 2.7813,
"step": 108300
},
{
"epoch": 0.69,
"learning_rate": 1.5410628019323672e-05,
"loss": 2.7771,
"step": 108400
},
{
"epoch": 0.69,
"learning_rate": 1.537842190016103e-05,
"loss": 2.7749,
"step": 108500
},
{
"epoch": 0.7,
"learning_rate": 1.534621578099839e-05,
"loss": 2.7739,
"step": 108600
},
{
"epoch": 0.7,
"learning_rate": 1.531400966183575e-05,
"loss": 2.7484,
"step": 108700
},
{
"epoch": 0.7,
"learning_rate": 1.528180354267311e-05,
"loss": 2.7949,
"step": 108800
},
{
"epoch": 0.7,
"learning_rate": 1.5249597423510467e-05,
"loss": 2.7832,
"step": 108900
},
{
"epoch": 0.7,
"learning_rate": 1.5217391304347828e-05,
"loss": 2.7844,
"step": 109000
},
{
"epoch": 0.7,
"learning_rate": 1.5185185185185186e-05,
"loss": 2.7597,
"step": 109100
},
{
"epoch": 0.7,
"learning_rate": 1.5152979066022547e-05,
"loss": 2.7675,
"step": 109200
},
{
"epoch": 0.7,
"learning_rate": 1.5120772946859905e-05,
"loss": 2.7787,
"step": 109300
},
{
"epoch": 0.7,
"learning_rate": 1.5088566827697262e-05,
"loss": 2.7745,
"step": 109400
},
{
"epoch": 0.7,
"learning_rate": 1.5056360708534623e-05,
"loss": 2.7835,
"step": 109500
},
{
"epoch": 0.7,
"learning_rate": 1.5024154589371981e-05,
"loss": 2.7636,
"step": 109600
},
{
"epoch": 0.7,
"learning_rate": 1.499194847020934e-05,
"loss": 2.7596,
"step": 109700
},
{
"epoch": 0.7,
"learning_rate": 1.49597423510467e-05,
"loss": 2.7606,
"step": 109800
},
{
"epoch": 0.7,
"learning_rate": 1.4927536231884059e-05,
"loss": 2.7677,
"step": 109900
},
{
"epoch": 0.7,
"learning_rate": 1.4895330112721417e-05,
"loss": 2.7842,
"step": 110000
},
{
"epoch": 0.7,
"learning_rate": 1.4863123993558778e-05,
"loss": 2.7774,
"step": 110100
},
{
"epoch": 0.71,
"learning_rate": 1.4830917874396135e-05,
"loss": 2.786,
"step": 110200
},
{
"epoch": 0.71,
"learning_rate": 1.4798711755233496e-05,
"loss": 2.7628,
"step": 110300
},
{
"epoch": 0.71,
"learning_rate": 1.4766505636070854e-05,
"loss": 2.7699,
"step": 110400
},
{
"epoch": 0.71,
"learning_rate": 1.4734299516908212e-05,
"loss": 2.7805,
"step": 110500
},
{
"epoch": 0.71,
"learning_rate": 1.4702093397745573e-05,
"loss": 2.757,
"step": 110600
},
{
"epoch": 0.71,
"learning_rate": 1.466988727858293e-05,
"loss": 2.768,
"step": 110700
},
{
"epoch": 0.71,
"learning_rate": 1.4637681159420291e-05,
"loss": 2.7504,
"step": 110800
},
{
"epoch": 0.71,
"learning_rate": 1.4605475040257649e-05,
"loss": 2.7523,
"step": 110900
},
{
"epoch": 0.71,
"learning_rate": 1.457326892109501e-05,
"loss": 2.7558,
"step": 111000
}
],
"max_steps": 156250,
"num_train_epochs": 1,
"total_flos": 3.0315746129075896e+18,
"trial_name": null,
"trial_params": null
}