t5_small_squad_trans_old / trainer_state.json
longcld's picture
upload
0b6e5f5
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.239126365054602,
"global_step": 58000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 9.987518721917126e-06,
"loss": 2.674,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 9.97503744383425e-06,
"loss": 2.5884,
"step": 200
},
{
"epoch": 0.04,
"learning_rate": 9.962556165751374e-06,
"loss": 2.614,
"step": 300
},
{
"epoch": 0.05,
"learning_rate": 9.950074887668498e-06,
"loss": 2.6229,
"step": 400
},
{
"epoch": 0.06,
"learning_rate": 9.937593609585623e-06,
"loss": 2.6261,
"step": 500
},
{
"epoch": 0.07,
"learning_rate": 9.925112331502746e-06,
"loss": 2.6,
"step": 600
},
{
"epoch": 0.09,
"learning_rate": 9.912631053419871e-06,
"loss": 2.6558,
"step": 700
},
{
"epoch": 0.1,
"learning_rate": 9.900149775336996e-06,
"loss": 2.5839,
"step": 800
},
{
"epoch": 0.11,
"learning_rate": 9.88766849725412e-06,
"loss": 2.6585,
"step": 900
},
{
"epoch": 0.12,
"learning_rate": 9.875187219171243e-06,
"loss": 2.5986,
"step": 1000
},
{
"epoch": 0.14,
"learning_rate": 9.862705941088368e-06,
"loss": 2.6437,
"step": 1100
},
{
"epoch": 0.15,
"learning_rate": 9.850224663005492e-06,
"loss": 2.5858,
"step": 1200
},
{
"epoch": 0.16,
"learning_rate": 9.837743384922617e-06,
"loss": 2.6142,
"step": 1300
},
{
"epoch": 0.17,
"learning_rate": 9.825262106839742e-06,
"loss": 2.5436,
"step": 1400
},
{
"epoch": 0.19,
"learning_rate": 9.812780828756865e-06,
"loss": 2.5879,
"step": 1500
},
{
"epoch": 0.2,
"learning_rate": 9.80029955067399e-06,
"loss": 2.6459,
"step": 1600
},
{
"epoch": 0.21,
"learning_rate": 9.787818272591114e-06,
"loss": 2.6185,
"step": 1700
},
{
"epoch": 0.22,
"learning_rate": 9.775336994508239e-06,
"loss": 2.6672,
"step": 1800
},
{
"epoch": 0.24,
"learning_rate": 9.762855716425362e-06,
"loss": 2.6541,
"step": 1900
},
{
"epoch": 0.25,
"learning_rate": 9.750374438342487e-06,
"loss": 2.6669,
"step": 2000
},
{
"epoch": 0.26,
"learning_rate": 9.737893160259612e-06,
"loss": 2.6323,
"step": 2100
},
{
"epoch": 0.27,
"learning_rate": 9.725411882176736e-06,
"loss": 2.6764,
"step": 2200
},
{
"epoch": 0.29,
"learning_rate": 9.712930604093859e-06,
"loss": 2.5925,
"step": 2300
},
{
"epoch": 0.3,
"learning_rate": 9.700449326010984e-06,
"loss": 2.5392,
"step": 2400
},
{
"epoch": 0.31,
"learning_rate": 9.68796804792811e-06,
"loss": 2.5843,
"step": 2500
},
{
"epoch": 0.32,
"learning_rate": 9.675486769845233e-06,
"loss": 2.6209,
"step": 2600
},
{
"epoch": 0.34,
"learning_rate": 9.663005491762358e-06,
"loss": 2.6575,
"step": 2700
},
{
"epoch": 0.35,
"learning_rate": 9.650524213679481e-06,
"loss": 2.6499,
"step": 2800
},
{
"epoch": 0.36,
"learning_rate": 9.638042935596606e-06,
"loss": 2.5762,
"step": 2900
},
{
"epoch": 0.37,
"learning_rate": 9.62556165751373e-06,
"loss": 2.5637,
"step": 3000
},
{
"epoch": 0.39,
"learning_rate": 9.613080379430855e-06,
"loss": 2.5902,
"step": 3100
},
{
"epoch": 0.4,
"learning_rate": 9.60059910134798e-06,
"loss": 2.5784,
"step": 3200
},
{
"epoch": 0.41,
"learning_rate": 9.588117823265103e-06,
"loss": 2.6246,
"step": 3300
},
{
"epoch": 0.42,
"learning_rate": 9.575636545182228e-06,
"loss": 2.6543,
"step": 3400
},
{
"epoch": 0.44,
"learning_rate": 9.563155267099352e-06,
"loss": 2.581,
"step": 3500
},
{
"epoch": 0.45,
"learning_rate": 9.550673989016475e-06,
"loss": 2.6039,
"step": 3600
},
{
"epoch": 0.46,
"learning_rate": 9.5381927109336e-06,
"loss": 2.5848,
"step": 3700
},
{
"epoch": 0.47,
"learning_rate": 9.525711432850725e-06,
"loss": 2.6026,
"step": 3800
},
{
"epoch": 0.49,
"learning_rate": 9.513230154767849e-06,
"loss": 2.6178,
"step": 3900
},
{
"epoch": 0.5,
"learning_rate": 9.500748876684974e-06,
"loss": 2.5908,
"step": 4000
},
{
"epoch": 0.51,
"learning_rate": 9.488267598602097e-06,
"loss": 2.599,
"step": 4100
},
{
"epoch": 0.52,
"learning_rate": 9.475786320519222e-06,
"loss": 2.5937,
"step": 4200
},
{
"epoch": 0.54,
"learning_rate": 9.463305042436346e-06,
"loss": 2.6038,
"step": 4300
},
{
"epoch": 0.55,
"learning_rate": 9.45082376435347e-06,
"loss": 2.5825,
"step": 4400
},
{
"epoch": 0.56,
"learning_rate": 9.438342486270596e-06,
"loss": 2.5668,
"step": 4500
},
{
"epoch": 0.57,
"learning_rate": 9.42586120818772e-06,
"loss": 2.6231,
"step": 4600
},
{
"epoch": 0.59,
"learning_rate": 9.413379930104844e-06,
"loss": 2.5955,
"step": 4700
},
{
"epoch": 0.6,
"learning_rate": 9.400898652021968e-06,
"loss": 2.6169,
"step": 4800
},
{
"epoch": 0.61,
"learning_rate": 9.388417373939091e-06,
"loss": 2.5865,
"step": 4900
},
{
"epoch": 0.62,
"learning_rate": 9.375936095856216e-06,
"loss": 2.5837,
"step": 5000
},
{
"epoch": 0.64,
"learning_rate": 9.363454817773341e-06,
"loss": 2.5994,
"step": 5100
},
{
"epoch": 0.65,
"learning_rate": 9.350973539690466e-06,
"loss": 2.6152,
"step": 5200
},
{
"epoch": 0.66,
"learning_rate": 9.33849226160759e-06,
"loss": 2.5968,
"step": 5300
},
{
"epoch": 0.67,
"learning_rate": 9.326010983524713e-06,
"loss": 2.5876,
"step": 5400
},
{
"epoch": 0.69,
"learning_rate": 9.313529705441838e-06,
"loss": 2.5356,
"step": 5500
},
{
"epoch": 0.7,
"learning_rate": 9.301048427358962e-06,
"loss": 2.6401,
"step": 5600
},
{
"epoch": 0.71,
"learning_rate": 9.288567149276087e-06,
"loss": 2.6072,
"step": 5700
},
{
"epoch": 0.72,
"learning_rate": 9.276085871193212e-06,
"loss": 2.6391,
"step": 5800
},
{
"epoch": 0.74,
"learning_rate": 9.263604593110335e-06,
"loss": 2.6492,
"step": 5900
},
{
"epoch": 0.75,
"learning_rate": 9.251123315027458e-06,
"loss": 2.5952,
"step": 6000
},
{
"epoch": 0.76,
"learning_rate": 9.238642036944584e-06,
"loss": 2.606,
"step": 6100
},
{
"epoch": 0.77,
"learning_rate": 9.226160758861709e-06,
"loss": 2.6346,
"step": 6200
},
{
"epoch": 0.79,
"learning_rate": 9.213679480778832e-06,
"loss": 2.6238,
"step": 6300
},
{
"epoch": 0.8,
"learning_rate": 9.201198202695957e-06,
"loss": 2.5924,
"step": 6400
},
{
"epoch": 0.81,
"learning_rate": 9.188716924613082e-06,
"loss": 2.6511,
"step": 6500
},
{
"epoch": 0.82,
"learning_rate": 9.176235646530206e-06,
"loss": 2.5397,
"step": 6600
},
{
"epoch": 0.84,
"learning_rate": 9.163754368447329e-06,
"loss": 2.6166,
"step": 6700
},
{
"epoch": 0.85,
"learning_rate": 9.151273090364454e-06,
"loss": 2.5976,
"step": 6800
},
{
"epoch": 0.86,
"learning_rate": 9.13879181228158e-06,
"loss": 2.5556,
"step": 6900
},
{
"epoch": 0.87,
"learning_rate": 9.126310534198703e-06,
"loss": 2.5568,
"step": 7000
},
{
"epoch": 0.89,
"learning_rate": 9.113829256115828e-06,
"loss": 2.5994,
"step": 7100
},
{
"epoch": 0.9,
"learning_rate": 9.101347978032951e-06,
"loss": 2.5802,
"step": 7200
},
{
"epoch": 0.91,
"learning_rate": 9.088866699950074e-06,
"loss": 2.5724,
"step": 7300
},
{
"epoch": 0.92,
"learning_rate": 9.0763854218672e-06,
"loss": 2.5317,
"step": 7400
},
{
"epoch": 0.94,
"learning_rate": 9.063904143784325e-06,
"loss": 2.6202,
"step": 7500
},
{
"epoch": 0.95,
"learning_rate": 9.051422865701448e-06,
"loss": 2.5479,
"step": 7600
},
{
"epoch": 0.96,
"learning_rate": 9.038941587618573e-06,
"loss": 2.5918,
"step": 7700
},
{
"epoch": 0.97,
"learning_rate": 9.026460309535696e-06,
"loss": 2.5706,
"step": 7800
},
{
"epoch": 0.99,
"learning_rate": 9.013979031452822e-06,
"loss": 2.5578,
"step": 7900
},
{
"epoch": 1.0,
"learning_rate": 9.001497753369945e-06,
"loss": 2.6592,
"step": 8000
},
{
"epoch": 1.01,
"learning_rate": 8.98901647528707e-06,
"loss": 2.6137,
"step": 8100
},
{
"epoch": 1.02,
"learning_rate": 8.976535197204195e-06,
"loss": 2.5963,
"step": 8200
},
{
"epoch": 1.04,
"learning_rate": 8.964053919121319e-06,
"loss": 2.5891,
"step": 8300
},
{
"epoch": 1.05,
"learning_rate": 8.951572641038444e-06,
"loss": 2.5719,
"step": 8400
},
{
"epoch": 1.06,
"learning_rate": 8.939091362955567e-06,
"loss": 2.5967,
"step": 8500
},
{
"epoch": 1.07,
"learning_rate": 8.926610084872692e-06,
"loss": 2.5656,
"step": 8600
},
{
"epoch": 1.09,
"learning_rate": 8.914128806789815e-06,
"loss": 2.6024,
"step": 8700
},
{
"epoch": 1.1,
"learning_rate": 8.90164752870694e-06,
"loss": 2.566,
"step": 8800
},
{
"epoch": 1.11,
"learning_rate": 8.889166250624066e-06,
"loss": 2.5757,
"step": 8900
},
{
"epoch": 1.12,
"learning_rate": 8.876684972541189e-06,
"loss": 2.5595,
"step": 9000
},
{
"epoch": 1.14,
"learning_rate": 8.864203694458312e-06,
"loss": 2.605,
"step": 9100
},
{
"epoch": 1.15,
"learning_rate": 8.851722416375437e-06,
"loss": 2.594,
"step": 9200
},
{
"epoch": 1.16,
"learning_rate": 8.839241138292561e-06,
"loss": 2.6241,
"step": 9300
},
{
"epoch": 1.17,
"learning_rate": 8.826759860209686e-06,
"loss": 2.6328,
"step": 9400
},
{
"epoch": 1.19,
"learning_rate": 8.814278582126811e-06,
"loss": 2.5948,
"step": 9500
},
{
"epoch": 1.2,
"learning_rate": 8.801797304043934e-06,
"loss": 2.5232,
"step": 9600
},
{
"epoch": 1.21,
"learning_rate": 8.78931602596106e-06,
"loss": 2.614,
"step": 9700
},
{
"epoch": 1.22,
"learning_rate": 8.776834747878183e-06,
"loss": 2.561,
"step": 9800
},
{
"epoch": 1.24,
"learning_rate": 8.764353469795308e-06,
"loss": 2.6071,
"step": 9900
},
{
"epoch": 1.25,
"learning_rate": 8.751872191712431e-06,
"loss": 2.5902,
"step": 10000
},
{
"epoch": 1.26,
"learning_rate": 8.739390913629556e-06,
"loss": 2.5653,
"step": 10100
},
{
"epoch": 1.27,
"learning_rate": 8.726909635546682e-06,
"loss": 2.5746,
"step": 10200
},
{
"epoch": 1.29,
"learning_rate": 8.714428357463805e-06,
"loss": 2.525,
"step": 10300
},
{
"epoch": 1.3,
"learning_rate": 8.701947079380928e-06,
"loss": 2.6418,
"step": 10400
},
{
"epoch": 1.31,
"learning_rate": 8.689465801298053e-06,
"loss": 2.6579,
"step": 10500
},
{
"epoch": 1.32,
"learning_rate": 8.676984523215179e-06,
"loss": 2.5815,
"step": 10600
},
{
"epoch": 1.34,
"learning_rate": 8.664503245132302e-06,
"loss": 2.6014,
"step": 10700
},
{
"epoch": 1.35,
"learning_rate": 8.652021967049427e-06,
"loss": 2.6233,
"step": 10800
},
{
"epoch": 1.36,
"learning_rate": 8.63954068896655e-06,
"loss": 2.5808,
"step": 10900
},
{
"epoch": 1.37,
"learning_rate": 8.627059410883675e-06,
"loss": 2.6013,
"step": 11000
},
{
"epoch": 1.39,
"learning_rate": 8.614578132800799e-06,
"loss": 2.5305,
"step": 11100
},
{
"epoch": 1.4,
"learning_rate": 8.602096854717924e-06,
"loss": 2.5726,
"step": 11200
},
{
"epoch": 1.41,
"learning_rate": 8.589615576635049e-06,
"loss": 2.5862,
"step": 11300
},
{
"epoch": 1.42,
"learning_rate": 8.577134298552172e-06,
"loss": 2.5728,
"step": 11400
},
{
"epoch": 1.44,
"learning_rate": 8.564653020469298e-06,
"loss": 2.5863,
"step": 11500
},
{
"epoch": 1.45,
"learning_rate": 8.552171742386421e-06,
"loss": 2.5121,
"step": 11600
},
{
"epoch": 1.46,
"learning_rate": 8.539690464303544e-06,
"loss": 2.583,
"step": 11700
},
{
"epoch": 1.47,
"learning_rate": 8.52720918622067e-06,
"loss": 2.6128,
"step": 11800
},
{
"epoch": 1.49,
"learning_rate": 8.514727908137794e-06,
"loss": 2.5568,
"step": 11900
},
{
"epoch": 1.5,
"learning_rate": 8.502246630054918e-06,
"loss": 2.5913,
"step": 12000
},
{
"epoch": 1.51,
"learning_rate": 8.489765351972043e-06,
"loss": 2.6139,
"step": 12100
},
{
"epoch": 1.52,
"learning_rate": 8.477284073889166e-06,
"loss": 2.5659,
"step": 12200
},
{
"epoch": 1.54,
"learning_rate": 8.464802795806291e-06,
"loss": 2.5676,
"step": 12300
},
{
"epoch": 1.55,
"learning_rate": 8.452321517723415e-06,
"loss": 2.5329,
"step": 12400
},
{
"epoch": 1.56,
"learning_rate": 8.43984023964054e-06,
"loss": 2.5653,
"step": 12500
},
{
"epoch": 1.57,
"learning_rate": 8.427358961557665e-06,
"loss": 2.6023,
"step": 12600
},
{
"epoch": 1.59,
"learning_rate": 8.414877683474788e-06,
"loss": 2.5692,
"step": 12700
},
{
"epoch": 1.6,
"learning_rate": 8.402396405391913e-06,
"loss": 2.5947,
"step": 12800
},
{
"epoch": 1.61,
"learning_rate": 8.389915127309037e-06,
"loss": 2.6291,
"step": 12900
},
{
"epoch": 1.62,
"learning_rate": 8.377433849226162e-06,
"loss": 2.5965,
"step": 13000
},
{
"epoch": 1.64,
"learning_rate": 8.364952571143285e-06,
"loss": 2.5433,
"step": 13100
},
{
"epoch": 1.65,
"learning_rate": 8.35247129306041e-06,
"loss": 2.5804,
"step": 13200
},
{
"epoch": 1.66,
"learning_rate": 8.339990014977535e-06,
"loss": 2.5296,
"step": 13300
},
{
"epoch": 1.67,
"learning_rate": 8.327508736894659e-06,
"loss": 2.6349,
"step": 13400
},
{
"epoch": 1.68,
"learning_rate": 8.315027458811782e-06,
"loss": 2.5424,
"step": 13500
},
{
"epoch": 1.7,
"learning_rate": 8.302546180728907e-06,
"loss": 2.6139,
"step": 13600
},
{
"epoch": 1.71,
"learning_rate": 8.29006490264603e-06,
"loss": 2.585,
"step": 13700
},
{
"epoch": 1.72,
"learning_rate": 8.277583624563156e-06,
"loss": 2.5734,
"step": 13800
},
{
"epoch": 1.73,
"learning_rate": 8.265102346480281e-06,
"loss": 2.591,
"step": 13900
},
{
"epoch": 1.75,
"learning_rate": 8.252621068397404e-06,
"loss": 2.6089,
"step": 14000
},
{
"epoch": 1.76,
"learning_rate": 8.24013979031453e-06,
"loss": 2.6216,
"step": 14100
},
{
"epoch": 1.77,
"learning_rate": 8.227658512231653e-06,
"loss": 2.5636,
"step": 14200
},
{
"epoch": 1.78,
"learning_rate": 8.215177234148778e-06,
"loss": 2.5922,
"step": 14300
},
{
"epoch": 1.8,
"learning_rate": 8.202695956065901e-06,
"loss": 2.5404,
"step": 14400
},
{
"epoch": 1.81,
"learning_rate": 8.190214677983026e-06,
"loss": 2.6366,
"step": 14500
},
{
"epoch": 1.82,
"learning_rate": 8.177733399900151e-06,
"loss": 2.6201,
"step": 14600
},
{
"epoch": 1.83,
"learning_rate": 8.165252121817275e-06,
"loss": 2.5972,
"step": 14700
},
{
"epoch": 1.85,
"learning_rate": 8.152770843734398e-06,
"loss": 2.5356,
"step": 14800
},
{
"epoch": 1.86,
"learning_rate": 8.140289565651523e-06,
"loss": 2.6053,
"step": 14900
},
{
"epoch": 1.87,
"learning_rate": 8.127808287568648e-06,
"loss": 2.5312,
"step": 15000
},
{
"epoch": 1.88,
"learning_rate": 8.115327009485772e-06,
"loss": 2.5696,
"step": 15100
},
{
"epoch": 1.9,
"learning_rate": 8.102845731402897e-06,
"loss": 2.5485,
"step": 15200
},
{
"epoch": 1.91,
"learning_rate": 8.09036445332002e-06,
"loss": 2.5444,
"step": 15300
},
{
"epoch": 1.92,
"learning_rate": 8.077883175237144e-06,
"loss": 2.5953,
"step": 15400
},
{
"epoch": 1.93,
"learning_rate": 8.065401897154269e-06,
"loss": 2.5588,
"step": 15500
},
{
"epoch": 1.95,
"learning_rate": 8.052920619071394e-06,
"loss": 2.5685,
"step": 15600
},
{
"epoch": 1.96,
"learning_rate": 8.040439340988519e-06,
"loss": 2.6248,
"step": 15700
},
{
"epoch": 1.97,
"learning_rate": 8.027958062905642e-06,
"loss": 2.5816,
"step": 15800
},
{
"epoch": 1.98,
"learning_rate": 8.015476784822767e-06,
"loss": 2.5558,
"step": 15900
},
{
"epoch": 2.0,
"learning_rate": 8.00299550673989e-06,
"loss": 2.6264,
"step": 16000
},
{
"epoch": 2.01,
"learning_rate": 7.990514228657014e-06,
"loss": 2.5924,
"step": 16100
},
{
"epoch": 2.02,
"learning_rate": 7.97803295057414e-06,
"loss": 2.5352,
"step": 16200
},
{
"epoch": 2.03,
"learning_rate": 7.965551672491264e-06,
"loss": 2.5441,
"step": 16300
},
{
"epoch": 2.05,
"learning_rate": 7.953070394408388e-06,
"loss": 2.5431,
"step": 16400
},
{
"epoch": 2.06,
"learning_rate": 7.940589116325513e-06,
"loss": 2.5556,
"step": 16500
},
{
"epoch": 2.07,
"learning_rate": 7.928107838242636e-06,
"loss": 2.5459,
"step": 16600
},
{
"epoch": 2.08,
"learning_rate": 7.915626560159761e-06,
"loss": 2.6096,
"step": 16700
},
{
"epoch": 2.1,
"learning_rate": 7.903145282076885e-06,
"loss": 2.5705,
"step": 16800
},
{
"epoch": 2.11,
"learning_rate": 7.89066400399401e-06,
"loss": 2.6404,
"step": 16900
},
{
"epoch": 2.12,
"learning_rate": 7.878182725911135e-06,
"loss": 2.5122,
"step": 17000
},
{
"epoch": 2.13,
"learning_rate": 7.865701447828258e-06,
"loss": 2.5761,
"step": 17100
},
{
"epoch": 2.15,
"learning_rate": 7.853220169745383e-06,
"loss": 2.5586,
"step": 17200
},
{
"epoch": 2.16,
"learning_rate": 7.840738891662507e-06,
"loss": 2.5417,
"step": 17300
},
{
"epoch": 2.17,
"learning_rate": 7.82825761357963e-06,
"loss": 2.5312,
"step": 17400
},
{
"epoch": 2.18,
"learning_rate": 7.815776335496755e-06,
"loss": 2.6014,
"step": 17500
},
{
"epoch": 2.2,
"learning_rate": 7.80329505741388e-06,
"loss": 2.5719,
"step": 17600
},
{
"epoch": 2.21,
"learning_rate": 7.790813779331005e-06,
"loss": 2.6213,
"step": 17700
},
{
"epoch": 2.22,
"learning_rate": 7.778332501248129e-06,
"loss": 2.5363,
"step": 17800
},
{
"epoch": 2.23,
"learning_rate": 7.765851223165252e-06,
"loss": 2.5023,
"step": 17900
},
{
"epoch": 2.25,
"learning_rate": 7.753369945082377e-06,
"loss": 2.5229,
"step": 18000
},
{
"epoch": 2.26,
"learning_rate": 7.7408886669995e-06,
"loss": 2.5801,
"step": 18100
},
{
"epoch": 2.27,
"learning_rate": 7.728407388916626e-06,
"loss": 2.5993,
"step": 18200
},
{
"epoch": 2.28,
"learning_rate": 7.71592611083375e-06,
"loss": 2.5494,
"step": 18300
},
{
"epoch": 2.3,
"learning_rate": 7.703444832750874e-06,
"loss": 2.5426,
"step": 18400
},
{
"epoch": 2.31,
"learning_rate": 7.690963554667998e-06,
"loss": 2.5575,
"step": 18500
},
{
"epoch": 2.32,
"learning_rate": 7.678482276585123e-06,
"loss": 2.5636,
"step": 18600
},
{
"epoch": 2.33,
"learning_rate": 7.666000998502248e-06,
"loss": 2.6409,
"step": 18700
},
{
"epoch": 2.35,
"learning_rate": 7.653519720419371e-06,
"loss": 2.6014,
"step": 18800
},
{
"epoch": 2.36,
"learning_rate": 7.641038442336496e-06,
"loss": 2.5964,
"step": 18900
},
{
"epoch": 2.37,
"learning_rate": 7.6285571642536205e-06,
"loss": 2.5992,
"step": 19000
},
{
"epoch": 2.38,
"learning_rate": 7.616075886170744e-06,
"loss": 2.5852,
"step": 19100
},
{
"epoch": 2.4,
"learning_rate": 7.603594608087869e-06,
"loss": 2.6166,
"step": 19200
},
{
"epoch": 2.41,
"learning_rate": 7.591113330004993e-06,
"loss": 2.5835,
"step": 19300
},
{
"epoch": 2.42,
"learning_rate": 7.578632051922118e-06,
"loss": 2.5523,
"step": 19400
},
{
"epoch": 2.43,
"learning_rate": 7.566150773839242e-06,
"loss": 2.5704,
"step": 19500
},
{
"epoch": 2.45,
"learning_rate": 7.553669495756366e-06,
"loss": 2.6,
"step": 19600
},
{
"epoch": 2.46,
"learning_rate": 7.541188217673491e-06,
"loss": 2.5654,
"step": 19700
},
{
"epoch": 2.47,
"learning_rate": 7.528706939590614e-06,
"loss": 2.5707,
"step": 19800
},
{
"epoch": 2.48,
"learning_rate": 7.516225661507739e-06,
"loss": 2.5366,
"step": 19900
},
{
"epoch": 2.5,
"learning_rate": 7.503744383424864e-06,
"loss": 2.5468,
"step": 20000
},
{
"epoch": 2.51,
"learning_rate": 7.491263105341987e-06,
"loss": 2.5059,
"step": 20100
},
{
"epoch": 2.52,
"learning_rate": 7.478781827259111e-06,
"loss": 2.5545,
"step": 20200
},
{
"epoch": 2.53,
"learning_rate": 7.466300549176236e-06,
"loss": 2.6233,
"step": 20300
},
{
"epoch": 2.55,
"learning_rate": 7.453819271093361e-06,
"loss": 2.5522,
"step": 20400
},
{
"epoch": 2.56,
"learning_rate": 7.441337993010485e-06,
"loss": 2.5458,
"step": 20500
},
{
"epoch": 2.57,
"learning_rate": 7.428856714927609e-06,
"loss": 2.5973,
"step": 20600
},
{
"epoch": 2.58,
"learning_rate": 7.416375436844734e-06,
"loss": 2.5794,
"step": 20700
},
{
"epoch": 2.6,
"learning_rate": 7.403894158761858e-06,
"loss": 2.5503,
"step": 20800
},
{
"epoch": 2.61,
"learning_rate": 7.391412880678982e-06,
"loss": 2.5904,
"step": 20900
},
{
"epoch": 2.62,
"learning_rate": 7.378931602596107e-06,
"loss": 2.5643,
"step": 21000
},
{
"epoch": 2.63,
"learning_rate": 7.366450324513231e-06,
"loss": 2.6286,
"step": 21100
},
{
"epoch": 2.65,
"learning_rate": 7.3539690464303545e-06,
"loss": 2.4896,
"step": 21200
},
{
"epoch": 2.66,
"learning_rate": 7.34148776834748e-06,
"loss": 2.5652,
"step": 21300
},
{
"epoch": 2.67,
"learning_rate": 7.329006490264604e-06,
"loss": 2.5344,
"step": 21400
},
{
"epoch": 2.68,
"learning_rate": 7.316525212181727e-06,
"loss": 2.5794,
"step": 21500
},
{
"epoch": 2.7,
"learning_rate": 7.304043934098852e-06,
"loss": 2.5678,
"step": 21600
},
{
"epoch": 2.71,
"learning_rate": 7.2915626560159766e-06,
"loss": 2.5789,
"step": 21700
},
{
"epoch": 2.72,
"learning_rate": 7.279081377933101e-06,
"loss": 2.5663,
"step": 21800
},
{
"epoch": 2.73,
"learning_rate": 7.266600099850225e-06,
"loss": 2.5707,
"step": 21900
},
{
"epoch": 2.75,
"learning_rate": 7.25411882176735e-06,
"loss": 2.552,
"step": 22000
},
{
"epoch": 2.76,
"learning_rate": 7.241637543684474e-06,
"loss": 2.5694,
"step": 22100
},
{
"epoch": 2.77,
"learning_rate": 7.229156265601598e-06,
"loss": 2.5828,
"step": 22200
},
{
"epoch": 2.78,
"learning_rate": 7.216674987518723e-06,
"loss": 2.5308,
"step": 22300
},
{
"epoch": 2.8,
"learning_rate": 7.204193709435847e-06,
"loss": 2.4647,
"step": 22400
},
{
"epoch": 2.81,
"learning_rate": 7.1917124313529705e-06,
"loss": 2.5888,
"step": 22500
},
{
"epoch": 2.82,
"learning_rate": 7.1792311532700956e-06,
"loss": 2.5999,
"step": 22600
},
{
"epoch": 2.83,
"learning_rate": 7.16674987518722e-06,
"loss": 2.6143,
"step": 22700
},
{
"epoch": 2.85,
"learning_rate": 7.154268597104343e-06,
"loss": 2.6138,
"step": 22800
},
{
"epoch": 2.86,
"learning_rate": 7.141787319021468e-06,
"loss": 2.5534,
"step": 22900
},
{
"epoch": 2.87,
"learning_rate": 7.1293060409385925e-06,
"loss": 2.5856,
"step": 23000
},
{
"epoch": 2.88,
"learning_rate": 7.116824762855718e-06,
"loss": 2.5619,
"step": 23100
},
{
"epoch": 2.9,
"learning_rate": 7.104343484772841e-06,
"loss": 2.5742,
"step": 23200
},
{
"epoch": 2.91,
"learning_rate": 7.091862206689965e-06,
"loss": 2.5515,
"step": 23300
},
{
"epoch": 2.92,
"learning_rate": 7.07938092860709e-06,
"loss": 2.543,
"step": 23400
},
{
"epoch": 2.93,
"learning_rate": 7.066899650524214e-06,
"loss": 2.5166,
"step": 23500
},
{
"epoch": 2.95,
"learning_rate": 7.054418372441339e-06,
"loss": 2.5233,
"step": 23600
},
{
"epoch": 2.96,
"learning_rate": 7.041937094358463e-06,
"loss": 2.571,
"step": 23700
},
{
"epoch": 2.97,
"learning_rate": 7.029455816275588e-06,
"loss": 2.6225,
"step": 23800
},
{
"epoch": 2.98,
"learning_rate": 7.0169745381927115e-06,
"loss": 2.5945,
"step": 23900
},
{
"epoch": 3.0,
"learning_rate": 7.004493260109836e-06,
"loss": 2.5936,
"step": 24000
},
{
"epoch": 3.01,
"learning_rate": 6.992011982026961e-06,
"loss": 2.6008,
"step": 24100
},
{
"epoch": 3.02,
"learning_rate": 6.979530703944084e-06,
"loss": 2.5778,
"step": 24200
},
{
"epoch": 3.03,
"learning_rate": 6.9670494258612085e-06,
"loss": 2.5153,
"step": 24300
},
{
"epoch": 3.05,
"learning_rate": 6.9545681477783335e-06,
"loss": 2.5507,
"step": 24400
},
{
"epoch": 3.06,
"learning_rate": 6.942086869695457e-06,
"loss": 2.5331,
"step": 24500
},
{
"epoch": 3.07,
"learning_rate": 6.929605591612581e-06,
"loss": 2.4988,
"step": 24600
},
{
"epoch": 3.08,
"learning_rate": 6.917124313529706e-06,
"loss": 2.6262,
"step": 24700
},
{
"epoch": 3.1,
"learning_rate": 6.9046430354468305e-06,
"loss": 2.5813,
"step": 24800
},
{
"epoch": 3.11,
"learning_rate": 6.892161757363955e-06,
"loss": 2.475,
"step": 24900
},
{
"epoch": 3.12,
"learning_rate": 6.879680479281079e-06,
"loss": 2.5655,
"step": 25000
},
{
"epoch": 3.13,
"learning_rate": 6.867199201198203e-06,
"loss": 2.5762,
"step": 25100
},
{
"epoch": 3.15,
"learning_rate": 6.8547179231153274e-06,
"loss": 2.5742,
"step": 25200
},
{
"epoch": 3.16,
"learning_rate": 6.842236645032452e-06,
"loss": 2.6108,
"step": 25300
},
{
"epoch": 3.17,
"learning_rate": 6.829755366949577e-06,
"loss": 2.576,
"step": 25400
},
{
"epoch": 3.18,
"learning_rate": 6.817274088866701e-06,
"loss": 2.5012,
"step": 25500
},
{
"epoch": 3.2,
"learning_rate": 6.804792810783824e-06,
"loss": 2.528,
"step": 25600
},
{
"epoch": 3.21,
"learning_rate": 6.7923115327009495e-06,
"loss": 2.5362,
"step": 25700
},
{
"epoch": 3.22,
"learning_rate": 6.779830254618074e-06,
"loss": 2.5012,
"step": 25800
},
{
"epoch": 3.23,
"learning_rate": 6.767348976535197e-06,
"loss": 2.5318,
"step": 25900
},
{
"epoch": 3.25,
"learning_rate": 6.754867698452322e-06,
"loss": 2.5424,
"step": 26000
},
{
"epoch": 3.26,
"learning_rate": 6.7423864203694464e-06,
"loss": 2.4849,
"step": 26100
},
{
"epoch": 3.27,
"learning_rate": 6.72990514228657e-06,
"loss": 2.5104,
"step": 26200
},
{
"epoch": 3.28,
"learning_rate": 6.717423864203695e-06,
"loss": 2.6342,
"step": 26300
},
{
"epoch": 3.3,
"learning_rate": 6.704942586120819e-06,
"loss": 2.5654,
"step": 26400
},
{
"epoch": 3.31,
"learning_rate": 6.692461308037944e-06,
"loss": 2.5578,
"step": 26500
},
{
"epoch": 3.32,
"learning_rate": 6.679980029955068e-06,
"loss": 2.561,
"step": 26600
},
{
"epoch": 3.33,
"learning_rate": 6.667498751872193e-06,
"loss": 2.5678,
"step": 26700
},
{
"epoch": 3.34,
"learning_rate": 6.655017473789317e-06,
"loss": 2.5214,
"step": 26800
},
{
"epoch": 3.36,
"learning_rate": 6.64253619570644e-06,
"loss": 2.6021,
"step": 26900
},
{
"epoch": 3.37,
"learning_rate": 6.6300549176235654e-06,
"loss": 2.524,
"step": 27000
},
{
"epoch": 3.38,
"learning_rate": 6.61757363954069e-06,
"loss": 2.5641,
"step": 27100
},
{
"epoch": 3.39,
"learning_rate": 6.605092361457813e-06,
"loss": 2.59,
"step": 27200
},
{
"epoch": 3.41,
"learning_rate": 6.592611083374938e-06,
"loss": 2.5593,
"step": 27300
},
{
"epoch": 3.42,
"learning_rate": 6.580129805292062e-06,
"loss": 2.5457,
"step": 27400
},
{
"epoch": 3.43,
"learning_rate": 6.5676485272091875e-06,
"loss": 2.5725,
"step": 27500
},
{
"epoch": 3.44,
"learning_rate": 6.555167249126311e-06,
"loss": 2.5082,
"step": 27600
},
{
"epoch": 3.46,
"learning_rate": 6.542685971043435e-06,
"loss": 2.5807,
"step": 27700
},
{
"epoch": 3.47,
"learning_rate": 6.53020469296056e-06,
"loss": 2.6353,
"step": 27800
},
{
"epoch": 3.48,
"learning_rate": 6.5177234148776836e-06,
"loss": 2.6223,
"step": 27900
},
{
"epoch": 3.49,
"learning_rate": 6.505242136794808e-06,
"loss": 2.5538,
"step": 28000
},
{
"epoch": 3.51,
"learning_rate": 6.492760858711933e-06,
"loss": 2.5544,
"step": 28100
},
{
"epoch": 3.52,
"learning_rate": 6.480279580629057e-06,
"loss": 2.6314,
"step": 28200
},
{
"epoch": 3.53,
"learning_rate": 6.467798302546181e-06,
"loss": 2.5278,
"step": 28300
},
{
"epoch": 3.54,
"learning_rate": 6.455317024463306e-06,
"loss": 2.585,
"step": 28400
},
{
"epoch": 3.56,
"learning_rate": 6.442835746380431e-06,
"loss": 2.5468,
"step": 28500
},
{
"epoch": 3.57,
"learning_rate": 6.430354468297554e-06,
"loss": 2.5496,
"step": 28600
},
{
"epoch": 3.58,
"learning_rate": 6.417873190214678e-06,
"loss": 2.5182,
"step": 28700
},
{
"epoch": 3.59,
"learning_rate": 6.405391912131803e-06,
"loss": 2.6226,
"step": 28800
},
{
"epoch": 3.61,
"learning_rate": 6.392910634048927e-06,
"loss": 2.5062,
"step": 28900
},
{
"epoch": 3.62,
"learning_rate": 6.380429355966051e-06,
"loss": 2.5962,
"step": 29000
},
{
"epoch": 3.63,
"learning_rate": 6.367948077883176e-06,
"loss": 2.5113,
"step": 29100
},
{
"epoch": 3.64,
"learning_rate": 6.3554667998003e-06,
"loss": 2.5236,
"step": 29200
},
{
"epoch": 3.66,
"learning_rate": 6.342985521717424e-06,
"loss": 2.4738,
"step": 29300
},
{
"epoch": 3.67,
"learning_rate": 6.330504243634549e-06,
"loss": 2.5689,
"step": 29400
},
{
"epoch": 3.68,
"learning_rate": 6.318022965551673e-06,
"loss": 2.6064,
"step": 29500
},
{
"epoch": 3.69,
"learning_rate": 6.305541687468797e-06,
"loss": 2.5285,
"step": 29600
},
{
"epoch": 3.71,
"learning_rate": 6.2930604093859216e-06,
"loss": 2.6252,
"step": 29700
},
{
"epoch": 3.72,
"learning_rate": 6.280579131303046e-06,
"loss": 2.5797,
"step": 29800
},
{
"epoch": 3.73,
"learning_rate": 6.26809785322017e-06,
"loss": 2.526,
"step": 29900
},
{
"epoch": 3.74,
"learning_rate": 6.255616575137294e-06,
"loss": 2.5698,
"step": 30000
},
{
"epoch": 3.76,
"learning_rate": 6.243135297054419e-06,
"loss": 2.587,
"step": 30100
},
{
"epoch": 3.77,
"learning_rate": 6.230654018971544e-06,
"loss": 2.5236,
"step": 30200
},
{
"epoch": 3.78,
"learning_rate": 6.218172740888667e-06,
"loss": 2.539,
"step": 30300
},
{
"epoch": 3.79,
"learning_rate": 6.205691462805792e-06,
"loss": 2.5915,
"step": 30400
},
{
"epoch": 3.81,
"learning_rate": 6.193210184722916e-06,
"loss": 2.5222,
"step": 30500
},
{
"epoch": 3.82,
"learning_rate": 6.18072890664004e-06,
"loss": 2.5455,
"step": 30600
},
{
"epoch": 3.83,
"learning_rate": 6.168247628557165e-06,
"loss": 2.5418,
"step": 30700
},
{
"epoch": 3.84,
"learning_rate": 6.155766350474289e-06,
"loss": 2.5469,
"step": 30800
},
{
"epoch": 3.86,
"learning_rate": 6.143285072391414e-06,
"loss": 2.5557,
"step": 30900
},
{
"epoch": 3.87,
"learning_rate": 6.1308037943085375e-06,
"loss": 2.534,
"step": 31000
},
{
"epoch": 3.88,
"learning_rate": 6.118322516225662e-06,
"loss": 2.5967,
"step": 31100
},
{
"epoch": 3.89,
"learning_rate": 6.105841238142787e-06,
"loss": 2.493,
"step": 31200
},
{
"epoch": 3.91,
"learning_rate": 6.09335996005991e-06,
"loss": 2.4888,
"step": 31300
},
{
"epoch": 3.92,
"learning_rate": 6.080878681977035e-06,
"loss": 2.4598,
"step": 31400
},
{
"epoch": 3.93,
"learning_rate": 6.0683974038941595e-06,
"loss": 2.5489,
"step": 31500
},
{
"epoch": 3.94,
"learning_rate": 6.055916125811283e-06,
"loss": 2.6207,
"step": 31600
},
{
"epoch": 3.96,
"learning_rate": 6.043434847728408e-06,
"loss": 2.4955,
"step": 31700
},
{
"epoch": 3.97,
"learning_rate": 6.030953569645532e-06,
"loss": 2.5457,
"step": 31800
},
{
"epoch": 3.98,
"learning_rate": 6.018472291562657e-06,
"loss": 2.5991,
"step": 31900
},
{
"epoch": 3.99,
"learning_rate": 6.005991013479781e-06,
"loss": 2.6215,
"step": 32000
},
{
"epoch": 4.01,
"learning_rate": 5.993509735396905e-06,
"loss": 2.483,
"step": 32100
},
{
"epoch": 4.02,
"learning_rate": 5.98102845731403e-06,
"loss": 2.554,
"step": 32200
},
{
"epoch": 4.03,
"learning_rate": 5.9685471792311534e-06,
"loss": 2.5586,
"step": 32300
},
{
"epoch": 4.04,
"learning_rate": 5.956065901148278e-06,
"loss": 2.5568,
"step": 32400
},
{
"epoch": 4.06,
"learning_rate": 5.943584623065403e-06,
"loss": 2.5593,
"step": 32500
},
{
"epoch": 4.07,
"learning_rate": 5.931103344982526e-06,
"loss": 2.5854,
"step": 32600
},
{
"epoch": 4.08,
"learning_rate": 5.91862206689965e-06,
"loss": 2.5227,
"step": 32700
},
{
"epoch": 4.09,
"learning_rate": 5.9061407888167755e-06,
"loss": 2.5099,
"step": 32800
},
{
"epoch": 4.11,
"learning_rate": 5.8936595107339e-06,
"loss": 2.4718,
"step": 32900
},
{
"epoch": 4.12,
"learning_rate": 5.881178232651024e-06,
"loss": 2.5361,
"step": 33000
},
{
"epoch": 4.13,
"learning_rate": 5.868696954568148e-06,
"loss": 2.5505,
"step": 33100
},
{
"epoch": 4.14,
"learning_rate": 5.856215676485273e-06,
"loss": 2.5501,
"step": 33200
},
{
"epoch": 4.16,
"learning_rate": 5.843734398402397e-06,
"loss": 2.5911,
"step": 33300
},
{
"epoch": 4.17,
"learning_rate": 5.831253120319521e-06,
"loss": 2.5891,
"step": 33400
},
{
"epoch": 4.18,
"learning_rate": 5.818771842236646e-06,
"loss": 2.5229,
"step": 33500
},
{
"epoch": 4.19,
"learning_rate": 5.80629056415377e-06,
"loss": 2.518,
"step": 33600
},
{
"epoch": 4.21,
"learning_rate": 5.793809286070894e-06,
"loss": 2.5585,
"step": 33700
},
{
"epoch": 4.22,
"learning_rate": 5.781328007988019e-06,
"loss": 2.5518,
"step": 33800
},
{
"epoch": 4.23,
"learning_rate": 5.768846729905143e-06,
"loss": 2.5389,
"step": 33900
},
{
"epoch": 4.24,
"learning_rate": 5.756365451822266e-06,
"loss": 2.5181,
"step": 34000
},
{
"epoch": 4.26,
"learning_rate": 5.743884173739391e-06,
"loss": 2.4888,
"step": 34100
},
{
"epoch": 4.27,
"learning_rate": 5.731402895656516e-06,
"loss": 2.5526,
"step": 34200
},
{
"epoch": 4.28,
"learning_rate": 5.71892161757364e-06,
"loss": 2.5189,
"step": 34300
},
{
"epoch": 4.29,
"learning_rate": 5.706440339490764e-06,
"loss": 2.4954,
"step": 34400
},
{
"epoch": 4.31,
"learning_rate": 5.693959061407888e-06,
"loss": 2.5498,
"step": 34500
},
{
"epoch": 4.32,
"learning_rate": 5.6814777833250135e-06,
"loss": 2.5855,
"step": 34600
},
{
"epoch": 4.33,
"learning_rate": 5.668996505242137e-06,
"loss": 2.5233,
"step": 34700
},
{
"epoch": 4.34,
"learning_rate": 5.656515227159262e-06,
"loss": 2.4986,
"step": 34800
},
{
"epoch": 4.36,
"learning_rate": 5.644033949076386e-06,
"loss": 2.4563,
"step": 34900
},
{
"epoch": 4.37,
"learning_rate": 5.6315526709935096e-06,
"loss": 2.5183,
"step": 35000
},
{
"epoch": 4.38,
"learning_rate": 5.619071392910635e-06,
"loss": 2.5314,
"step": 35100
},
{
"epoch": 4.39,
"learning_rate": 5.606590114827759e-06,
"loss": 2.5084,
"step": 35200
},
{
"epoch": 4.41,
"learning_rate": 5.594108836744882e-06,
"loss": 2.5564,
"step": 35300
},
{
"epoch": 4.42,
"learning_rate": 5.581627558662007e-06,
"loss": 2.4687,
"step": 35400
},
{
"epoch": 4.43,
"learning_rate": 5.569146280579132e-06,
"loss": 2.5122,
"step": 35500
},
{
"epoch": 4.44,
"learning_rate": 5.556665002496257e-06,
"loss": 2.5395,
"step": 35600
},
{
"epoch": 4.46,
"learning_rate": 5.54418372441338e-06,
"loss": 2.6325,
"step": 35700
},
{
"epoch": 4.47,
"learning_rate": 5.531702446330504e-06,
"loss": 2.5592,
"step": 35800
},
{
"epoch": 4.48,
"learning_rate": 5.519221168247629e-06,
"loss": 2.5724,
"step": 35900
},
{
"epoch": 4.49,
"learning_rate": 5.506739890164753e-06,
"loss": 2.5779,
"step": 36000
},
{
"epoch": 4.51,
"learning_rate": 5.494258612081878e-06,
"loss": 2.556,
"step": 36100
},
{
"epoch": 4.52,
"learning_rate": 5.481777333999002e-06,
"loss": 2.4931,
"step": 36200
},
{
"epoch": 4.53,
"learning_rate": 5.469296055916127e-06,
"loss": 2.5409,
"step": 36300
},
{
"epoch": 4.54,
"learning_rate": 5.456814777833251e-06,
"loss": 2.5513,
"step": 36400
},
{
"epoch": 4.56,
"learning_rate": 5.444333499750375e-06,
"loss": 2.5558,
"step": 36500
},
{
"epoch": 4.57,
"learning_rate": 5.4318522216675e-06,
"loss": 2.5406,
"step": 36600
},
{
"epoch": 4.58,
"learning_rate": 5.419370943584623e-06,
"loss": 2.5845,
"step": 36700
},
{
"epoch": 4.59,
"learning_rate": 5.4068896655017475e-06,
"loss": 2.5388,
"step": 36800
},
{
"epoch": 4.61,
"learning_rate": 5.394408387418873e-06,
"loss": 2.5498,
"step": 36900
},
{
"epoch": 4.62,
"learning_rate": 5.381927109335996e-06,
"loss": 2.4883,
"step": 37000
},
{
"epoch": 4.63,
"learning_rate": 5.36944583125312e-06,
"loss": 2.5466,
"step": 37100
},
{
"epoch": 4.64,
"learning_rate": 5.356964553170245e-06,
"loss": 2.5449,
"step": 37200
},
{
"epoch": 4.66,
"learning_rate": 5.3444832750873696e-06,
"loss": 2.5573,
"step": 37300
},
{
"epoch": 4.67,
"learning_rate": 5.332001997004494e-06,
"loss": 2.5178,
"step": 37400
},
{
"epoch": 4.68,
"learning_rate": 5.319520718921618e-06,
"loss": 2.5307,
"step": 37500
},
{
"epoch": 4.69,
"learning_rate": 5.307039440838742e-06,
"loss": 2.5952,
"step": 37600
},
{
"epoch": 4.71,
"learning_rate": 5.2945581627558665e-06,
"loss": 2.6333,
"step": 37700
},
{
"epoch": 4.72,
"learning_rate": 5.282076884672991e-06,
"loss": 2.5498,
"step": 37800
},
{
"epoch": 4.73,
"learning_rate": 5.269595606590116e-06,
"loss": 2.5644,
"step": 37900
},
{
"epoch": 4.74,
"learning_rate": 5.257114328507239e-06,
"loss": 2.5154,
"step": 38000
},
{
"epoch": 4.76,
"learning_rate": 5.2446330504243635e-06,
"loss": 2.5258,
"step": 38100
},
{
"epoch": 4.77,
"learning_rate": 5.2321517723414886e-06,
"loss": 2.6026,
"step": 38200
},
{
"epoch": 4.78,
"learning_rate": 5.219670494258613e-06,
"loss": 2.5448,
"step": 38300
},
{
"epoch": 4.79,
"learning_rate": 5.207189216175736e-06,
"loss": 2.5479,
"step": 38400
},
{
"epoch": 4.81,
"learning_rate": 5.194707938092861e-06,
"loss": 2.6038,
"step": 38500
},
{
"epoch": 4.82,
"learning_rate": 5.1822266600099855e-06,
"loss": 2.5809,
"step": 38600
},
{
"epoch": 4.83,
"learning_rate": 5.169745381927109e-06,
"loss": 2.5219,
"step": 38700
},
{
"epoch": 4.84,
"learning_rate": 5.157264103844234e-06,
"loss": 2.5623,
"step": 38800
},
{
"epoch": 4.86,
"learning_rate": 5.144782825761358e-06,
"loss": 2.5276,
"step": 38900
},
{
"epoch": 4.87,
"learning_rate": 5.132301547678483e-06,
"loss": 2.5995,
"step": 39000
},
{
"epoch": 4.88,
"learning_rate": 5.119820269595607e-06,
"loss": 2.5641,
"step": 39100
},
{
"epoch": 4.89,
"learning_rate": 5.107338991512732e-06,
"loss": 2.5158,
"step": 39200
},
{
"epoch": 4.91,
"learning_rate": 5.094857713429856e-06,
"loss": 2.4968,
"step": 39300
},
{
"epoch": 4.92,
"learning_rate": 5.082376435346979e-06,
"loss": 2.5531,
"step": 39400
},
{
"epoch": 4.93,
"learning_rate": 5.0698951572641045e-06,
"loss": 2.5547,
"step": 39500
},
{
"epoch": 4.94,
"learning_rate": 5.057413879181229e-06,
"loss": 2.5746,
"step": 39600
},
{
"epoch": 4.96,
"learning_rate": 5.044932601098352e-06,
"loss": 2.461,
"step": 39700
},
{
"epoch": 4.97,
"learning_rate": 5.032451323015477e-06,
"loss": 2.5375,
"step": 39800
},
{
"epoch": 4.98,
"learning_rate": 5.0199700449326015e-06,
"loss": 2.5461,
"step": 39900
},
{
"epoch": 4.99,
"learning_rate": 5.0074887668497265e-06,
"loss": 2.5562,
"step": 40000
},
{
"epoch": 5.0,
"learning_rate": 4.995007488766851e-06,
"loss": 2.5117,
"step": 40100
},
{
"epoch": 5.02,
"learning_rate": 4.982526210683974e-06,
"loss": 2.5144,
"step": 40200
},
{
"epoch": 5.03,
"learning_rate": 4.970044932601098e-06,
"loss": 2.4917,
"step": 40300
},
{
"epoch": 5.04,
"learning_rate": 4.9575636545182235e-06,
"loss": 2.5242,
"step": 40400
},
{
"epoch": 5.05,
"learning_rate": 4.945082376435347e-06,
"loss": 2.5614,
"step": 40500
},
{
"epoch": 5.07,
"learning_rate": 4.932601098352472e-06,
"loss": 2.5885,
"step": 40600
},
{
"epoch": 5.08,
"learning_rate": 4.920119820269596e-06,
"loss": 2.5738,
"step": 40700
},
{
"epoch": 5.09,
"learning_rate": 4.9076385421867204e-06,
"loss": 2.5221,
"step": 40800
},
{
"epoch": 5.1,
"learning_rate": 4.895157264103845e-06,
"loss": 2.579,
"step": 40900
},
{
"epoch": 5.12,
"learning_rate": 4.882675986020969e-06,
"loss": 2.5234,
"step": 41000
},
{
"epoch": 5.13,
"learning_rate": 4.870194707938093e-06,
"loss": 2.5614,
"step": 41100
},
{
"epoch": 5.14,
"learning_rate": 4.857713429855217e-06,
"loss": 2.5128,
"step": 41200
},
{
"epoch": 5.15,
"learning_rate": 4.845232151772342e-06,
"loss": 2.4828,
"step": 41300
},
{
"epoch": 5.17,
"learning_rate": 4.832750873689466e-06,
"loss": 2.5557,
"step": 41400
},
{
"epoch": 5.18,
"learning_rate": 4.82026959560659e-06,
"loss": 2.556,
"step": 41500
},
{
"epoch": 5.19,
"learning_rate": 4.807788317523715e-06,
"loss": 2.5401,
"step": 41600
},
{
"epoch": 5.2,
"learning_rate": 4.7953070394408394e-06,
"loss": 2.501,
"step": 41700
},
{
"epoch": 5.22,
"learning_rate": 4.782825761357963e-06,
"loss": 2.4316,
"step": 41800
},
{
"epoch": 5.23,
"learning_rate": 4.770344483275088e-06,
"loss": 2.5133,
"step": 41900
},
{
"epoch": 5.24,
"learning_rate": 4.757863205192212e-06,
"loss": 2.574,
"step": 42000
},
{
"epoch": 5.25,
"learning_rate": 4.745381927109336e-06,
"loss": 2.6097,
"step": 42100
},
{
"epoch": 5.27,
"learning_rate": 4.732900649026461e-06,
"loss": 2.5314,
"step": 42200
},
{
"epoch": 5.28,
"learning_rate": 4.720419370943585e-06,
"loss": 2.5095,
"step": 42300
},
{
"epoch": 5.29,
"learning_rate": 4.707938092860709e-06,
"loss": 2.5508,
"step": 42400
},
{
"epoch": 5.3,
"learning_rate": 4.695456814777833e-06,
"loss": 2.4751,
"step": 42500
},
{
"epoch": 5.32,
"learning_rate": 4.6829755366949584e-06,
"loss": 2.5443,
"step": 42600
},
{
"epoch": 5.33,
"learning_rate": 4.670494258612082e-06,
"loss": 2.5164,
"step": 42700
},
{
"epoch": 5.34,
"learning_rate": 4.658012980529207e-06,
"loss": 2.5643,
"step": 42800
},
{
"epoch": 5.35,
"learning_rate": 4.645531702446331e-06,
"loss": 2.5964,
"step": 42900
},
{
"epoch": 5.37,
"learning_rate": 4.633050424363455e-06,
"loss": 2.5625,
"step": 43000
},
{
"epoch": 5.38,
"learning_rate": 4.62056914628058e-06,
"loss": 2.5365,
"step": 43100
},
{
"epoch": 5.39,
"learning_rate": 4.608087868197704e-06,
"loss": 2.5633,
"step": 43200
},
{
"epoch": 5.4,
"learning_rate": 4.595606590114828e-06,
"loss": 2.4372,
"step": 43300
},
{
"epoch": 5.42,
"learning_rate": 4.583125312031952e-06,
"loss": 2.5273,
"step": 43400
},
{
"epoch": 5.43,
"learning_rate": 4.5706440339490766e-06,
"loss": 2.4731,
"step": 43500
},
{
"epoch": 5.44,
"learning_rate": 4.558162755866201e-06,
"loss": 2.4437,
"step": 43600
},
{
"epoch": 5.45,
"learning_rate": 4.545681477783325e-06,
"loss": 2.5579,
"step": 43700
},
{
"epoch": 5.47,
"learning_rate": 4.53320019970045e-06,
"loss": 2.4914,
"step": 43800
},
{
"epoch": 5.48,
"learning_rate": 4.520718921617574e-06,
"loss": 2.532,
"step": 43900
},
{
"epoch": 5.49,
"learning_rate": 4.508237643534698e-06,
"loss": 2.5552,
"step": 44000
},
{
"epoch": 5.5,
"learning_rate": 4.495756365451823e-06,
"loss": 2.6329,
"step": 44100
},
{
"epoch": 5.52,
"learning_rate": 4.483275087368947e-06,
"loss": 2.5213,
"step": 44200
},
{
"epoch": 5.53,
"learning_rate": 4.470793809286071e-06,
"loss": 2.4853,
"step": 44300
},
{
"epoch": 5.54,
"learning_rate": 4.4583125312031956e-06,
"loss": 2.5358,
"step": 44400
},
{
"epoch": 5.55,
"learning_rate": 4.44583125312032e-06,
"loss": 2.4875,
"step": 44500
},
{
"epoch": 5.57,
"learning_rate": 4.433349975037444e-06,
"loss": 2.5111,
"step": 44600
},
{
"epoch": 5.58,
"learning_rate": 4.420868696954568e-06,
"loss": 2.5036,
"step": 44700
},
{
"epoch": 5.59,
"learning_rate": 4.408387418871693e-06,
"loss": 2.512,
"step": 44800
},
{
"epoch": 5.6,
"learning_rate": 4.395906140788817e-06,
"loss": 2.5135,
"step": 44900
},
{
"epoch": 5.62,
"learning_rate": 4.383424862705942e-06,
"loss": 2.5813,
"step": 45000
},
{
"epoch": 5.63,
"learning_rate": 4.370943584623066e-06,
"loss": 2.5975,
"step": 45100
},
{
"epoch": 5.64,
"learning_rate": 4.3584623065401895e-06,
"loss": 2.4869,
"step": 45200
},
{
"epoch": 5.65,
"learning_rate": 4.3459810284573146e-06,
"loss": 2.5271,
"step": 45300
},
{
"epoch": 5.67,
"learning_rate": 4.333499750374439e-06,
"loss": 2.5598,
"step": 45400
},
{
"epoch": 5.68,
"learning_rate": 4.321018472291563e-06,
"loss": 2.5388,
"step": 45500
},
{
"epoch": 5.69,
"learning_rate": 4.308537194208687e-06,
"loss": 2.5331,
"step": 45600
},
{
"epoch": 5.7,
"learning_rate": 4.2960559161258115e-06,
"loss": 2.5581,
"step": 45700
},
{
"epoch": 5.72,
"learning_rate": 4.283574638042936e-06,
"loss": 2.5059,
"step": 45800
},
{
"epoch": 5.73,
"learning_rate": 4.27109335996006e-06,
"loss": 2.5461,
"step": 45900
},
{
"epoch": 5.74,
"learning_rate": 4.258612081877185e-06,
"loss": 2.5158,
"step": 46000
},
{
"epoch": 5.75,
"learning_rate": 4.2461308037943085e-06,
"loss": 2.5132,
"step": 46100
},
{
"epoch": 5.77,
"learning_rate": 4.233649525711433e-06,
"loss": 2.5099,
"step": 46200
},
{
"epoch": 5.78,
"learning_rate": 4.221168247628558e-06,
"loss": 2.5432,
"step": 46300
},
{
"epoch": 5.79,
"learning_rate": 4.208686969545682e-06,
"loss": 2.5628,
"step": 46400
},
{
"epoch": 5.8,
"learning_rate": 4.196205691462806e-06,
"loss": 2.5235,
"step": 46500
},
{
"epoch": 5.82,
"learning_rate": 4.1837244133799305e-06,
"loss": 2.5383,
"step": 46600
},
{
"epoch": 5.83,
"learning_rate": 4.171243135297055e-06,
"loss": 2.4903,
"step": 46700
},
{
"epoch": 5.84,
"learning_rate": 4.158761857214179e-06,
"loss": 2.5872,
"step": 46800
},
{
"epoch": 5.85,
"learning_rate": 4.146280579131303e-06,
"loss": 2.5354,
"step": 46900
},
{
"epoch": 5.87,
"learning_rate": 4.1337993010484274e-06,
"loss": 2.5798,
"step": 47000
},
{
"epoch": 5.88,
"learning_rate": 4.121318022965552e-06,
"loss": 2.5454,
"step": 47100
},
{
"epoch": 5.89,
"learning_rate": 4.108836744882677e-06,
"loss": 2.5374,
"step": 47200
},
{
"epoch": 5.9,
"learning_rate": 4.096355466799801e-06,
"loss": 2.5371,
"step": 47300
},
{
"epoch": 5.92,
"learning_rate": 4.083874188716924e-06,
"loss": 2.4904,
"step": 47400
},
{
"epoch": 5.93,
"learning_rate": 4.0713929106340495e-06,
"loss": 2.5545,
"step": 47500
},
{
"epoch": 5.94,
"learning_rate": 4.058911632551174e-06,
"loss": 2.5668,
"step": 47600
},
{
"epoch": 5.95,
"learning_rate": 4.046430354468298e-06,
"loss": 2.5541,
"step": 47700
},
{
"epoch": 5.97,
"learning_rate": 4.033949076385422e-06,
"loss": 2.5027,
"step": 47800
},
{
"epoch": 5.98,
"learning_rate": 4.0214677983025464e-06,
"loss": 2.5735,
"step": 47900
},
{
"epoch": 5.99,
"learning_rate": 4.008986520219671e-06,
"loss": 2.5358,
"step": 48000
},
{
"epoch": 6.0,
"learning_rate": 3.996505242136795e-06,
"loss": 2.4827,
"step": 48100
},
{
"epoch": 6.02,
"learning_rate": 3.98402396405392e-06,
"loss": 2.4968,
"step": 48200
},
{
"epoch": 6.03,
"learning_rate": 3.971542685971043e-06,
"loss": 2.492,
"step": 48300
},
{
"epoch": 6.04,
"learning_rate": 3.959061407888168e-06,
"loss": 2.5652,
"step": 48400
},
{
"epoch": 6.05,
"learning_rate": 3.946580129805293e-06,
"loss": 2.5951,
"step": 48500
},
{
"epoch": 6.07,
"learning_rate": 3.934098851722417e-06,
"loss": 2.5054,
"step": 48600
},
{
"epoch": 6.08,
"learning_rate": 3.921617573639541e-06,
"loss": 2.5064,
"step": 48700
},
{
"epoch": 6.09,
"learning_rate": 3.9091362955566654e-06,
"loss": 2.517,
"step": 48800
},
{
"epoch": 6.1,
"learning_rate": 3.89665501747379e-06,
"loss": 2.4971,
"step": 48900
},
{
"epoch": 6.12,
"learning_rate": 3.884173739390914e-06,
"loss": 2.4405,
"step": 49000
},
{
"epoch": 6.13,
"learning_rate": 3.871692461308038e-06,
"loss": 2.4993,
"step": 49100
},
{
"epoch": 6.14,
"learning_rate": 3.859211183225162e-06,
"loss": 2.4815,
"step": 49200
},
{
"epoch": 6.15,
"learning_rate": 3.846729905142287e-06,
"loss": 2.4978,
"step": 49300
},
{
"epoch": 6.17,
"learning_rate": 3.834248627059411e-06,
"loss": 2.5125,
"step": 49400
},
{
"epoch": 6.18,
"learning_rate": 3.821767348976536e-06,
"loss": 2.5636,
"step": 49500
},
{
"epoch": 6.19,
"learning_rate": 3.8092860708936598e-06,
"loss": 2.5993,
"step": 49600
},
{
"epoch": 6.2,
"learning_rate": 3.7968047928107844e-06,
"loss": 2.4773,
"step": 49700
},
{
"epoch": 6.22,
"learning_rate": 3.7843235147279082e-06,
"loss": 2.46,
"step": 49800
},
{
"epoch": 6.23,
"learning_rate": 3.771842236645033e-06,
"loss": 2.538,
"step": 49900
},
{
"epoch": 6.24,
"learning_rate": 3.759360958562157e-06,
"loss": 2.5968,
"step": 50000
},
{
"epoch": 6.25,
"learning_rate": 3.7468796804792814e-06,
"loss": 2.5301,
"step": 50100
},
{
"epoch": 6.27,
"learning_rate": 3.734398402396406e-06,
"loss": 2.5282,
"step": 50200
},
{
"epoch": 6.28,
"learning_rate": 3.72191712431353e-06,
"loss": 2.5195,
"step": 50300
},
{
"epoch": 6.29,
"learning_rate": 3.7094358462306545e-06,
"loss": 2.5422,
"step": 50400
},
{
"epoch": 6.3,
"learning_rate": 3.6969545681477787e-06,
"loss": 2.5415,
"step": 50500
},
{
"epoch": 6.32,
"learning_rate": 3.6844732900649026e-06,
"loss": 2.5067,
"step": 50600
},
{
"epoch": 6.33,
"learning_rate": 3.6719920119820272e-06,
"loss": 2.514,
"step": 50700
},
{
"epoch": 6.34,
"learning_rate": 3.6595107338991515e-06,
"loss": 2.5947,
"step": 50800
},
{
"epoch": 6.35,
"learning_rate": 3.647029455816276e-06,
"loss": 2.4832,
"step": 50900
},
{
"epoch": 6.37,
"learning_rate": 3.6345481777334004e-06,
"loss": 2.5519,
"step": 51000
},
{
"epoch": 6.38,
"learning_rate": 3.622066899650524e-06,
"loss": 2.5448,
"step": 51100
},
{
"epoch": 6.39,
"learning_rate": 3.609585621567649e-06,
"loss": 2.4973,
"step": 51200
},
{
"epoch": 6.4,
"learning_rate": 3.597104343484773e-06,
"loss": 2.55,
"step": 51300
},
{
"epoch": 6.42,
"learning_rate": 3.5846230654018977e-06,
"loss": 2.4911,
"step": 51400
},
{
"epoch": 6.43,
"learning_rate": 3.5721417873190215e-06,
"loss": 2.4954,
"step": 51500
},
{
"epoch": 6.44,
"learning_rate": 3.5596605092361458e-06,
"loss": 2.5314,
"step": 51600
},
{
"epoch": 6.45,
"learning_rate": 3.5471792311532704e-06,
"loss": 2.474,
"step": 51700
},
{
"epoch": 6.47,
"learning_rate": 3.5346979530703947e-06,
"loss": 2.5239,
"step": 51800
},
{
"epoch": 6.48,
"learning_rate": 3.5222166749875193e-06,
"loss": 2.5104,
"step": 51900
},
{
"epoch": 6.49,
"learning_rate": 3.509735396904643e-06,
"loss": 2.5841,
"step": 52000
},
{
"epoch": 6.5,
"learning_rate": 3.497254118821768e-06,
"loss": 2.5434,
"step": 52100
},
{
"epoch": 6.52,
"learning_rate": 3.484772840738892e-06,
"loss": 2.5161,
"step": 52200
},
{
"epoch": 6.53,
"learning_rate": 3.472291562656016e-06,
"loss": 2.5273,
"step": 52300
},
{
"epoch": 6.54,
"learning_rate": 3.4598102845731405e-06,
"loss": 2.581,
"step": 52400
},
{
"epoch": 6.55,
"learning_rate": 3.4473290064902648e-06,
"loss": 2.5352,
"step": 52500
},
{
"epoch": 6.57,
"learning_rate": 3.4348477284073894e-06,
"loss": 2.5912,
"step": 52600
},
{
"epoch": 6.58,
"learning_rate": 3.4223664503245137e-06,
"loss": 2.5162,
"step": 52700
},
{
"epoch": 6.59,
"learning_rate": 3.4098851722416375e-06,
"loss": 2.5289,
"step": 52800
},
{
"epoch": 6.6,
"learning_rate": 3.397403894158762e-06,
"loss": 2.4994,
"step": 52900
},
{
"epoch": 6.62,
"learning_rate": 3.3849226160758864e-06,
"loss": 2.5098,
"step": 53000
},
{
"epoch": 6.63,
"learning_rate": 3.372441337993011e-06,
"loss": 2.5013,
"step": 53100
},
{
"epoch": 6.64,
"learning_rate": 3.359960059910135e-06,
"loss": 2.5134,
"step": 53200
},
{
"epoch": 6.65,
"learning_rate": 3.347478781827259e-06,
"loss": 2.5036,
"step": 53300
},
{
"epoch": 6.66,
"learning_rate": 3.3349975037443838e-06,
"loss": 2.5203,
"step": 53400
},
{
"epoch": 6.68,
"learning_rate": 3.322516225661508e-06,
"loss": 2.4901,
"step": 53500
},
{
"epoch": 6.69,
"learning_rate": 3.3100349475786327e-06,
"loss": 2.5451,
"step": 53600
},
{
"epoch": 6.7,
"learning_rate": 3.2975536694957565e-06,
"loss": 2.5166,
"step": 53700
},
{
"epoch": 6.71,
"learning_rate": 3.2850723914128807e-06,
"loss": 2.4924,
"step": 53800
},
{
"epoch": 6.73,
"learning_rate": 3.2725911133300054e-06,
"loss": 2.5529,
"step": 53900
},
{
"epoch": 6.74,
"learning_rate": 3.2601098352471296e-06,
"loss": 2.5539,
"step": 54000
},
{
"epoch": 6.75,
"learning_rate": 3.2476285571642543e-06,
"loss": 2.5476,
"step": 54100
},
{
"epoch": 6.76,
"learning_rate": 3.235147279081378e-06,
"loss": 2.5324,
"step": 54200
},
{
"epoch": 6.78,
"learning_rate": 3.2226660009985023e-06,
"loss": 2.5427,
"step": 54300
},
{
"epoch": 6.79,
"learning_rate": 3.210184722915627e-06,
"loss": 2.5474,
"step": 54400
},
{
"epoch": 6.8,
"learning_rate": 3.197703444832751e-06,
"loss": 2.5174,
"step": 54500
},
{
"epoch": 6.81,
"learning_rate": 3.1852221667498755e-06,
"loss": 2.5418,
"step": 54600
},
{
"epoch": 6.83,
"learning_rate": 3.1727408886669997e-06,
"loss": 2.4859,
"step": 54700
},
{
"epoch": 6.84,
"learning_rate": 3.1602596105841244e-06,
"loss": 2.471,
"step": 54800
},
{
"epoch": 6.85,
"learning_rate": 3.1477783325012486e-06,
"loss": 2.568,
"step": 54900
},
{
"epoch": 6.86,
"learning_rate": 3.1352970544183724e-06,
"loss": 2.537,
"step": 55000
},
{
"epoch": 6.88,
"learning_rate": 3.122815776335497e-06,
"loss": 2.5169,
"step": 55100
},
{
"epoch": 6.89,
"learning_rate": 3.1103344982526213e-06,
"loss": 2.5177,
"step": 55200
},
{
"epoch": 6.9,
"learning_rate": 3.097853220169746e-06,
"loss": 2.5503,
"step": 55300
},
{
"epoch": 6.91,
"learning_rate": 3.08537194208687e-06,
"loss": 2.4971,
"step": 55400
},
{
"epoch": 6.93,
"learning_rate": 3.072890664003994e-06,
"loss": 2.4498,
"step": 55500
},
{
"epoch": 6.94,
"learning_rate": 3.0604093859211187e-06,
"loss": 2.4854,
"step": 55600
},
{
"epoch": 6.95,
"learning_rate": 3.047928107838243e-06,
"loss": 2.5306,
"step": 55700
},
{
"epoch": 6.96,
"learning_rate": 3.0354468297553676e-06,
"loss": 2.4973,
"step": 55800
},
{
"epoch": 6.98,
"learning_rate": 3.0229655516724914e-06,
"loss": 2.474,
"step": 55900
},
{
"epoch": 6.99,
"learning_rate": 3.0104842735896156e-06,
"loss": 2.5066,
"step": 56000
},
{
"epoch": 7.0,
"learning_rate": 2.9980029955067403e-06,
"loss": 2.5626,
"step": 56100
},
{
"epoch": 7.01,
"learning_rate": 2.985521717423864e-06,
"loss": 2.5684,
"step": 56200
},
{
"epoch": 7.03,
"learning_rate": 2.9730404393409888e-06,
"loss": 2.5336,
"step": 56300
},
{
"epoch": 7.04,
"learning_rate": 2.960559161258113e-06,
"loss": 2.4971,
"step": 56400
},
{
"epoch": 7.05,
"learning_rate": 2.9480778831752373e-06,
"loss": 2.5267,
"step": 56500
},
{
"epoch": 7.06,
"learning_rate": 2.935596605092362e-06,
"loss": 2.472,
"step": 56600
},
{
"epoch": 7.08,
"learning_rate": 2.9231153270094857e-06,
"loss": 2.5235,
"step": 56700
},
{
"epoch": 7.09,
"learning_rate": 2.9106340489266104e-06,
"loss": 2.5225,
"step": 56800
},
{
"epoch": 7.1,
"learning_rate": 2.8981527708437346e-06,
"loss": 2.5145,
"step": 56900
},
{
"epoch": 7.11,
"learning_rate": 2.885671492760859e-06,
"loss": 2.4227,
"step": 57000
},
{
"epoch": 7.13,
"learning_rate": 2.873190214677983e-06,
"loss": 2.5316,
"step": 57100
},
{
"epoch": 7.14,
"learning_rate": 2.8607089365951074e-06,
"loss": 2.5347,
"step": 57200
},
{
"epoch": 7.15,
"learning_rate": 2.848227658512232e-06,
"loss": 2.5264,
"step": 57300
},
{
"epoch": 7.16,
"learning_rate": 2.8357463804293563e-06,
"loss": 2.5099,
"step": 57400
},
{
"epoch": 7.18,
"learning_rate": 2.823265102346481e-06,
"loss": 2.4761,
"step": 57500
},
{
"epoch": 7.19,
"learning_rate": 2.8107838242636047e-06,
"loss": 2.529,
"step": 57600
},
{
"epoch": 7.2,
"learning_rate": 2.798302546180729e-06,
"loss": 2.483,
"step": 57700
},
{
"epoch": 7.21,
"learning_rate": 2.7858212680978536e-06,
"loss": 2.4986,
"step": 57800
},
{
"epoch": 7.23,
"learning_rate": 2.773339990014978e-06,
"loss": 2.5386,
"step": 57900
},
{
"epoch": 7.24,
"learning_rate": 2.760858711932102e-06,
"loss": 2.4868,
"step": 58000
}
],
"max_steps": 80120,
"num_train_epochs": 10,
"total_flos": 4.356850425234186e+17,
"trial_name": null,
"trial_params": null
}