Phi-3-mini-4k-instruct-finetuned / trainer_state.json
satyanshu404's picture
Model save
f2b2fb5 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 27650,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 1.8083182640144665e-09,
"loss": 13.6388,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 3.616636528028933e-09,
"loss": 13.7324,
"step": 40
},
{
"epoch": 0.02,
"learning_rate": 5.4249547920433996e-09,
"loss": 13.5866,
"step": 60
},
{
"epoch": 0.03,
"learning_rate": 7.233273056057866e-09,
"loss": 13.6427,
"step": 80
},
{
"epoch": 0.04,
"learning_rate": 9.041591320072333e-09,
"loss": 13.6357,
"step": 100
},
{
"epoch": 0.04,
"learning_rate": 1.0849909584086799e-08,
"loss": 13.6714,
"step": 120
},
{
"epoch": 0.05,
"learning_rate": 1.2658227848101265e-08,
"loss": 13.6575,
"step": 140
},
{
"epoch": 0.06,
"learning_rate": 1.4466546112115732e-08,
"loss": 13.6404,
"step": 160
},
{
"epoch": 0.07,
"learning_rate": 1.62748643761302e-08,
"loss": 13.7001,
"step": 180
},
{
"epoch": 0.07,
"learning_rate": 1.8083182640144666e-08,
"loss": 13.6391,
"step": 200
},
{
"epoch": 0.08,
"learning_rate": 1.9891500904159132e-08,
"loss": 13.7209,
"step": 220
},
{
"epoch": 0.09,
"learning_rate": 2.1699819168173598e-08,
"loss": 13.7199,
"step": 240
},
{
"epoch": 0.09,
"learning_rate": 2.3508137432188065e-08,
"loss": 13.6763,
"step": 260
},
{
"epoch": 0.1,
"learning_rate": 2.531645569620253e-08,
"loss": 13.6408,
"step": 280
},
{
"epoch": 0.11,
"learning_rate": 2.7124773960216997e-08,
"loss": 13.6908,
"step": 300
},
{
"epoch": 0.12,
"learning_rate": 2.8933092224231463e-08,
"loss": 13.7436,
"step": 320
},
{
"epoch": 0.12,
"learning_rate": 3.0741410488245926e-08,
"loss": 13.6329,
"step": 340
},
{
"epoch": 0.13,
"learning_rate": 3.25497287522604e-08,
"loss": 13.678,
"step": 360
},
{
"epoch": 0.14,
"learning_rate": 3.4358047016274865e-08,
"loss": 13.7068,
"step": 380
},
{
"epoch": 0.14,
"learning_rate": 3.616636528028933e-08,
"loss": 13.7395,
"step": 400
},
{
"epoch": 0.15,
"learning_rate": 3.79746835443038e-08,
"loss": 13.5979,
"step": 420
},
{
"epoch": 0.16,
"learning_rate": 3.9783001808318264e-08,
"loss": 13.6808,
"step": 440
},
{
"epoch": 0.17,
"learning_rate": 4.159132007233273e-08,
"loss": 13.6201,
"step": 460
},
{
"epoch": 0.17,
"learning_rate": 4.3399638336347197e-08,
"loss": 13.6224,
"step": 480
},
{
"epoch": 0.18,
"learning_rate": 4.520795660036166e-08,
"loss": 13.7064,
"step": 500
},
{
"epoch": 0.19,
"learning_rate": 4.701627486437613e-08,
"loss": 13.7375,
"step": 520
},
{
"epoch": 0.2,
"learning_rate": 4.8824593128390595e-08,
"loss": 13.7072,
"step": 540
},
{
"epoch": 0.2,
"learning_rate": 5.063291139240506e-08,
"loss": 13.719,
"step": 560
},
{
"epoch": 0.21,
"learning_rate": 5.244122965641953e-08,
"loss": 13.6121,
"step": 580
},
{
"epoch": 0.22,
"learning_rate": 5.4249547920433994e-08,
"loss": 13.6514,
"step": 600
},
{
"epoch": 0.22,
"learning_rate": 5.605786618444846e-08,
"loss": 13.7277,
"step": 620
},
{
"epoch": 0.23,
"learning_rate": 5.7866184448462927e-08,
"loss": 13.6584,
"step": 640
},
{
"epoch": 0.24,
"learning_rate": 5.967450271247739e-08,
"loss": 13.6968,
"step": 660
},
{
"epoch": 0.25,
"learning_rate": 6.148282097649185e-08,
"loss": 13.6416,
"step": 680
},
{
"epoch": 0.25,
"learning_rate": 6.329113924050633e-08,
"loss": 13.773,
"step": 700
},
{
"epoch": 0.26,
"learning_rate": 6.50994575045208e-08,
"loss": 13.6784,
"step": 720
},
{
"epoch": 0.27,
"learning_rate": 6.690777576853526e-08,
"loss": 13.6289,
"step": 740
},
{
"epoch": 0.27,
"learning_rate": 6.871609403254973e-08,
"loss": 13.5546,
"step": 760
},
{
"epoch": 0.28,
"learning_rate": 7.05244122965642e-08,
"loss": 13.5832,
"step": 780
},
{
"epoch": 0.29,
"learning_rate": 7.233273056057866e-08,
"loss": 13.5718,
"step": 800
},
{
"epoch": 0.3,
"learning_rate": 7.414104882459313e-08,
"loss": 13.6693,
"step": 820
},
{
"epoch": 0.3,
"learning_rate": 7.59493670886076e-08,
"loss": 13.7198,
"step": 840
},
{
"epoch": 0.31,
"learning_rate": 7.775768535262206e-08,
"loss": 13.6503,
"step": 860
},
{
"epoch": 0.32,
"learning_rate": 7.956600361663653e-08,
"loss": 13.6857,
"step": 880
},
{
"epoch": 0.33,
"learning_rate": 8.1374321880651e-08,
"loss": 13.6539,
"step": 900
},
{
"epoch": 0.33,
"learning_rate": 8.318264014466546e-08,
"loss": 13.6969,
"step": 920
},
{
"epoch": 0.34,
"learning_rate": 8.499095840867993e-08,
"loss": 13.7703,
"step": 940
},
{
"epoch": 0.35,
"learning_rate": 8.679927667269439e-08,
"loss": 13.6366,
"step": 960
},
{
"epoch": 0.35,
"learning_rate": 8.860759493670886e-08,
"loss": 13.7623,
"step": 980
},
{
"epoch": 0.36,
"learning_rate": 9.041591320072333e-08,
"loss": 13.668,
"step": 1000
},
{
"epoch": 0.37,
"learning_rate": 9.222423146473779e-08,
"loss": 13.6859,
"step": 1020
},
{
"epoch": 0.38,
"learning_rate": 9.403254972875226e-08,
"loss": 13.6488,
"step": 1040
},
{
"epoch": 0.38,
"learning_rate": 9.584086799276672e-08,
"loss": 13.604,
"step": 1060
},
{
"epoch": 0.39,
"learning_rate": 9.764918625678119e-08,
"loss": 13.63,
"step": 1080
},
{
"epoch": 0.4,
"learning_rate": 9.945750452079566e-08,
"loss": 13.6844,
"step": 1100
},
{
"epoch": 0.41,
"learning_rate": 1.0126582278481012e-07,
"loss": 13.6574,
"step": 1120
},
{
"epoch": 0.41,
"learning_rate": 1.0307414104882459e-07,
"loss": 13.6688,
"step": 1140
},
{
"epoch": 0.42,
"learning_rate": 1.0488245931283906e-07,
"loss": 13.6358,
"step": 1160
},
{
"epoch": 0.43,
"learning_rate": 1.0669077757685352e-07,
"loss": 13.6021,
"step": 1180
},
{
"epoch": 0.43,
"learning_rate": 1.0849909584086799e-07,
"loss": 13.6597,
"step": 1200
},
{
"epoch": 0.44,
"learning_rate": 1.1030741410488245e-07,
"loss": 13.5985,
"step": 1220
},
{
"epoch": 0.45,
"learning_rate": 1.1211573236889692e-07,
"loss": 13.6186,
"step": 1240
},
{
"epoch": 0.46,
"learning_rate": 1.1392405063291139e-07,
"loss": 13.6391,
"step": 1260
},
{
"epoch": 0.46,
"learning_rate": 1.1573236889692585e-07,
"loss": 13.599,
"step": 1280
},
{
"epoch": 0.47,
"learning_rate": 1.1754068716094032e-07,
"loss": 13.7044,
"step": 1300
},
{
"epoch": 0.48,
"learning_rate": 1.1934900542495477e-07,
"loss": 13.7581,
"step": 1320
},
{
"epoch": 0.48,
"learning_rate": 1.2115732368896925e-07,
"loss": 13.648,
"step": 1340
},
{
"epoch": 0.49,
"learning_rate": 1.229656419529837e-07,
"loss": 13.6057,
"step": 1360
},
{
"epoch": 0.5,
"learning_rate": 1.2477396021699818e-07,
"loss": 13.627,
"step": 1380
},
{
"epoch": 0.51,
"learning_rate": 1.2658227848101266e-07,
"loss": 13.6813,
"step": 1400
},
{
"epoch": 0.51,
"learning_rate": 1.2839059674502712e-07,
"loss": 13.5936,
"step": 1420
},
{
"epoch": 0.52,
"learning_rate": 1.301989150090416e-07,
"loss": 13.6134,
"step": 1440
},
{
"epoch": 0.53,
"learning_rate": 1.3200723327305605e-07,
"loss": 13.6773,
"step": 1460
},
{
"epoch": 0.54,
"learning_rate": 1.3381555153707053e-07,
"loss": 13.6927,
"step": 1480
},
{
"epoch": 0.54,
"learning_rate": 1.3562386980108498e-07,
"loss": 13.5613,
"step": 1500
},
{
"epoch": 0.55,
"learning_rate": 1.3743218806509946e-07,
"loss": 13.6399,
"step": 1520
},
{
"epoch": 0.56,
"learning_rate": 1.3924050632911391e-07,
"loss": 13.6222,
"step": 1540
},
{
"epoch": 0.56,
"learning_rate": 1.410488245931284e-07,
"loss": 13.6306,
"step": 1560
},
{
"epoch": 0.57,
"learning_rate": 1.4285714285714285e-07,
"loss": 13.6322,
"step": 1580
},
{
"epoch": 0.58,
"learning_rate": 1.4466546112115733e-07,
"loss": 13.5773,
"step": 1600
},
{
"epoch": 0.59,
"learning_rate": 1.4647377938517178e-07,
"loss": 13.6057,
"step": 1620
},
{
"epoch": 0.59,
"learning_rate": 1.4828209764918626e-07,
"loss": 13.5897,
"step": 1640
},
{
"epoch": 0.6,
"learning_rate": 1.500904159132007e-07,
"loss": 13.508,
"step": 1660
},
{
"epoch": 0.61,
"learning_rate": 1.518987341772152e-07,
"loss": 13.6156,
"step": 1680
},
{
"epoch": 0.61,
"learning_rate": 1.5370705244122964e-07,
"loss": 13.6333,
"step": 1700
},
{
"epoch": 0.62,
"learning_rate": 1.5551537070524412e-07,
"loss": 13.6283,
"step": 1720
},
{
"epoch": 0.63,
"learning_rate": 1.5732368896925858e-07,
"loss": 13.454,
"step": 1740
},
{
"epoch": 0.64,
"learning_rate": 1.5913200723327306e-07,
"loss": 13.6243,
"step": 1760
},
{
"epoch": 0.64,
"learning_rate": 1.609403254972875e-07,
"loss": 13.5886,
"step": 1780
},
{
"epoch": 0.65,
"learning_rate": 1.62748643761302e-07,
"loss": 13.546,
"step": 1800
},
{
"epoch": 0.66,
"learning_rate": 1.6455696202531644e-07,
"loss": 13.5578,
"step": 1820
},
{
"epoch": 0.67,
"learning_rate": 1.6636528028933092e-07,
"loss": 13.5799,
"step": 1840
},
{
"epoch": 0.67,
"learning_rate": 1.6817359855334537e-07,
"loss": 13.6162,
"step": 1860
},
{
"epoch": 0.68,
"learning_rate": 1.6998191681735985e-07,
"loss": 13.5781,
"step": 1880
},
{
"epoch": 0.69,
"learning_rate": 1.717902350813743e-07,
"loss": 13.5051,
"step": 1900
},
{
"epoch": 0.69,
"learning_rate": 1.7359855334538879e-07,
"loss": 13.5705,
"step": 1920
},
{
"epoch": 0.7,
"learning_rate": 1.7540687160940324e-07,
"loss": 13.5822,
"step": 1940
},
{
"epoch": 0.71,
"learning_rate": 1.7721518987341772e-07,
"loss": 13.4456,
"step": 1960
},
{
"epoch": 0.72,
"learning_rate": 1.7902350813743217e-07,
"loss": 13.5349,
"step": 1980
},
{
"epoch": 0.72,
"learning_rate": 1.8083182640144665e-07,
"loss": 13.5085,
"step": 2000
},
{
"epoch": 0.73,
"learning_rate": 1.8264014466546113e-07,
"loss": 13.6023,
"step": 2020
},
{
"epoch": 0.74,
"learning_rate": 1.8444846292947558e-07,
"loss": 13.5162,
"step": 2040
},
{
"epoch": 0.75,
"learning_rate": 1.8625678119349006e-07,
"loss": 13.4671,
"step": 2060
},
{
"epoch": 0.75,
"learning_rate": 1.8806509945750452e-07,
"loss": 13.5219,
"step": 2080
},
{
"epoch": 0.76,
"learning_rate": 1.89873417721519e-07,
"loss": 13.5798,
"step": 2100
},
{
"epoch": 0.77,
"learning_rate": 1.9168173598553345e-07,
"loss": 13.4227,
"step": 2120
},
{
"epoch": 0.77,
"learning_rate": 1.9349005424954793e-07,
"loss": 13.603,
"step": 2140
},
{
"epoch": 0.78,
"learning_rate": 1.9529837251356238e-07,
"loss": 13.4905,
"step": 2160
},
{
"epoch": 0.79,
"learning_rate": 1.9710669077757686e-07,
"loss": 13.6152,
"step": 2180
},
{
"epoch": 0.8,
"learning_rate": 1.9891500904159131e-07,
"loss": 13.5049,
"step": 2200
},
{
"epoch": 0.8,
"learning_rate": 2.007233273056058e-07,
"loss": 13.574,
"step": 2220
},
{
"epoch": 0.81,
"learning_rate": 2.0253164556962025e-07,
"loss": 13.5087,
"step": 2240
},
{
"epoch": 0.82,
"learning_rate": 2.0433996383363473e-07,
"loss": 13.5461,
"step": 2260
},
{
"epoch": 0.82,
"learning_rate": 2.0614828209764918e-07,
"loss": 13.5421,
"step": 2280
},
{
"epoch": 0.83,
"learning_rate": 2.0795660036166366e-07,
"loss": 13.5746,
"step": 2300
},
{
"epoch": 0.84,
"learning_rate": 2.097649186256781e-07,
"loss": 13.5038,
"step": 2320
},
{
"epoch": 0.85,
"learning_rate": 2.115732368896926e-07,
"loss": 13.534,
"step": 2340
},
{
"epoch": 0.85,
"learning_rate": 2.1338155515370704e-07,
"loss": 13.4248,
"step": 2360
},
{
"epoch": 0.86,
"learning_rate": 2.1518987341772152e-07,
"loss": 13.588,
"step": 2380
},
{
"epoch": 0.87,
"learning_rate": 2.1699819168173598e-07,
"loss": 13.5216,
"step": 2400
},
{
"epoch": 0.88,
"learning_rate": 2.1880650994575046e-07,
"loss": 13.539,
"step": 2420
},
{
"epoch": 0.88,
"learning_rate": 2.206148282097649e-07,
"loss": 13.5719,
"step": 2440
},
{
"epoch": 0.89,
"learning_rate": 2.224231464737794e-07,
"loss": 13.4977,
"step": 2460
},
{
"epoch": 0.9,
"learning_rate": 2.2423146473779384e-07,
"loss": 13.4069,
"step": 2480
},
{
"epoch": 0.9,
"learning_rate": 2.2603978300180832e-07,
"loss": 13.533,
"step": 2500
},
{
"epoch": 0.91,
"learning_rate": 2.2784810126582277e-07,
"loss": 13.5443,
"step": 2520
},
{
"epoch": 0.92,
"learning_rate": 2.2965641952983725e-07,
"loss": 13.5105,
"step": 2540
},
{
"epoch": 0.93,
"learning_rate": 2.314647377938517e-07,
"loss": 13.516,
"step": 2560
},
{
"epoch": 0.93,
"learning_rate": 2.3327305605786619e-07,
"loss": 13.5084,
"step": 2580
},
{
"epoch": 0.94,
"learning_rate": 2.3508137432188064e-07,
"loss": 13.4837,
"step": 2600
},
{
"epoch": 0.95,
"learning_rate": 2.3688969258589512e-07,
"loss": 13.5951,
"step": 2620
},
{
"epoch": 0.95,
"learning_rate": 2.3869801084990954e-07,
"loss": 13.4639,
"step": 2640
},
{
"epoch": 0.96,
"learning_rate": 2.40506329113924e-07,
"loss": 13.4462,
"step": 2660
},
{
"epoch": 0.97,
"learning_rate": 2.423146473779385e-07,
"loss": 13.4621,
"step": 2680
},
{
"epoch": 0.98,
"learning_rate": 2.44122965641953e-07,
"loss": 13.5816,
"step": 2700
},
{
"epoch": 0.98,
"learning_rate": 2.459312839059674e-07,
"loss": 13.5002,
"step": 2720
},
{
"epoch": 0.99,
"learning_rate": 2.477396021699819e-07,
"loss": 13.5648,
"step": 2740
},
{
"epoch": 1.0,
"learning_rate": 2.4954792043399637e-07,
"loss": 13.513,
"step": 2760
},
{
"epoch": 1.01,
"learning_rate": 2.513562386980108e-07,
"loss": 13.5086,
"step": 2780
},
{
"epoch": 1.01,
"learning_rate": 2.5316455696202533e-07,
"loss": 13.5994,
"step": 2800
},
{
"epoch": 1.02,
"learning_rate": 2.5497287522603975e-07,
"loss": 13.4693,
"step": 2820
},
{
"epoch": 1.03,
"learning_rate": 2.5678119349005423e-07,
"loss": 13.5481,
"step": 2840
},
{
"epoch": 1.03,
"learning_rate": 2.5858951175406866e-07,
"loss": 13.4964,
"step": 2860
},
{
"epoch": 1.04,
"learning_rate": 2.603978300180832e-07,
"loss": 13.4156,
"step": 2880
},
{
"epoch": 1.05,
"learning_rate": 2.622061482820976e-07,
"loss": 13.5032,
"step": 2900
},
{
"epoch": 1.06,
"learning_rate": 2.640144665461121e-07,
"loss": 13.5745,
"step": 2920
},
{
"epoch": 1.06,
"learning_rate": 2.658227848101265e-07,
"loss": 13.5202,
"step": 2940
},
{
"epoch": 1.07,
"learning_rate": 2.6763110307414106e-07,
"loss": 13.4655,
"step": 2960
},
{
"epoch": 1.08,
"learning_rate": 2.694394213381555e-07,
"loss": 13.5322,
"step": 2980
},
{
"epoch": 1.08,
"learning_rate": 2.7124773960216996e-07,
"loss": 13.5317,
"step": 3000
},
{
"epoch": 1.09,
"learning_rate": 2.7305605786618444e-07,
"loss": 13.4784,
"step": 3020
},
{
"epoch": 1.1,
"learning_rate": 2.748643761301989e-07,
"loss": 13.4989,
"step": 3040
},
{
"epoch": 1.11,
"learning_rate": 2.7667269439421335e-07,
"loss": 13.421,
"step": 3060
},
{
"epoch": 1.11,
"learning_rate": 2.7848101265822783e-07,
"loss": 13.4547,
"step": 3080
},
{
"epoch": 1.12,
"learning_rate": 2.802893309222423e-07,
"loss": 13.5269,
"step": 3100
},
{
"epoch": 1.13,
"learning_rate": 2.820976491862568e-07,
"loss": 13.4043,
"step": 3120
},
{
"epoch": 1.14,
"learning_rate": 2.839059674502712e-07,
"loss": 13.4519,
"step": 3140
},
{
"epoch": 1.14,
"learning_rate": 2.857142857142857e-07,
"loss": 13.454,
"step": 3160
},
{
"epoch": 1.15,
"learning_rate": 2.8752260397830017e-07,
"loss": 13.4185,
"step": 3180
},
{
"epoch": 1.16,
"learning_rate": 2.8933092224231465e-07,
"loss": 13.4596,
"step": 3200
},
{
"epoch": 1.16,
"learning_rate": 2.911392405063291e-07,
"loss": 13.4783,
"step": 3220
},
{
"epoch": 1.17,
"learning_rate": 2.9294755877034356e-07,
"loss": 13.406,
"step": 3240
},
{
"epoch": 1.18,
"learning_rate": 2.9475587703435804e-07,
"loss": 13.4561,
"step": 3260
},
{
"epoch": 1.19,
"learning_rate": 2.965641952983725e-07,
"loss": 13.4658,
"step": 3280
},
{
"epoch": 1.19,
"learning_rate": 2.9837251356238694e-07,
"loss": 13.3853,
"step": 3300
},
{
"epoch": 1.2,
"learning_rate": 3.001808318264014e-07,
"loss": 13.4351,
"step": 3320
},
{
"epoch": 1.21,
"learning_rate": 3.019891500904159e-07,
"loss": 13.4493,
"step": 3340
},
{
"epoch": 1.22,
"learning_rate": 3.037974683544304e-07,
"loss": 13.493,
"step": 3360
},
{
"epoch": 1.22,
"learning_rate": 3.056057866184448e-07,
"loss": 13.4442,
"step": 3380
},
{
"epoch": 1.23,
"learning_rate": 3.074141048824593e-07,
"loss": 13.424,
"step": 3400
},
{
"epoch": 1.24,
"learning_rate": 3.0922242314647377e-07,
"loss": 13.3822,
"step": 3420
},
{
"epoch": 1.24,
"learning_rate": 3.1103074141048825e-07,
"loss": 13.4068,
"step": 3440
},
{
"epoch": 1.25,
"learning_rate": 3.128390596745027e-07,
"loss": 13.3222,
"step": 3460
},
{
"epoch": 1.26,
"learning_rate": 3.1464737793851715e-07,
"loss": 13.5169,
"step": 3480
},
{
"epoch": 1.27,
"learning_rate": 3.1645569620253163e-07,
"loss": 13.395,
"step": 3500
},
{
"epoch": 1.27,
"learning_rate": 3.182640144665461e-07,
"loss": 13.3751,
"step": 3520
},
{
"epoch": 1.28,
"learning_rate": 3.2007233273056054e-07,
"loss": 13.4999,
"step": 3540
},
{
"epoch": 1.29,
"learning_rate": 3.21880650994575e-07,
"loss": 13.3497,
"step": 3560
},
{
"epoch": 1.29,
"learning_rate": 3.236889692585895e-07,
"loss": 13.3344,
"step": 3580
},
{
"epoch": 1.3,
"learning_rate": 3.25497287522604e-07,
"loss": 13.396,
"step": 3600
},
{
"epoch": 1.31,
"learning_rate": 3.273056057866184e-07,
"loss": 13.4008,
"step": 3620
},
{
"epoch": 1.32,
"learning_rate": 3.291139240506329e-07,
"loss": 13.3652,
"step": 3640
},
{
"epoch": 1.32,
"learning_rate": 3.3092224231464736e-07,
"loss": 13.3697,
"step": 3660
},
{
"epoch": 1.33,
"learning_rate": 3.3273056057866184e-07,
"loss": 13.2918,
"step": 3680
},
{
"epoch": 1.34,
"learning_rate": 3.3453887884267627e-07,
"loss": 13.3176,
"step": 3700
},
{
"epoch": 1.35,
"learning_rate": 3.3634719710669075e-07,
"loss": 13.3925,
"step": 3720
},
{
"epoch": 1.35,
"learning_rate": 3.3815551537070523e-07,
"loss": 13.374,
"step": 3740
},
{
"epoch": 1.36,
"learning_rate": 3.399638336347197e-07,
"loss": 13.2872,
"step": 3760
},
{
"epoch": 1.37,
"learning_rate": 3.4177215189873413e-07,
"loss": 13.3694,
"step": 3780
},
{
"epoch": 1.37,
"learning_rate": 3.435804701627486e-07,
"loss": 13.4143,
"step": 3800
},
{
"epoch": 1.38,
"learning_rate": 3.453887884267631e-07,
"loss": 13.2812,
"step": 3820
},
{
"epoch": 1.39,
"learning_rate": 3.4719710669077757e-07,
"loss": 13.2149,
"step": 3840
},
{
"epoch": 1.4,
"learning_rate": 3.49005424954792e-07,
"loss": 13.3274,
"step": 3860
},
{
"epoch": 1.4,
"learning_rate": 3.508137432188065e-07,
"loss": 13.2607,
"step": 3880
},
{
"epoch": 1.41,
"learning_rate": 3.5262206148282096e-07,
"loss": 13.2853,
"step": 3900
},
{
"epoch": 1.42,
"learning_rate": 3.5443037974683544e-07,
"loss": 13.3039,
"step": 3920
},
{
"epoch": 1.42,
"learning_rate": 3.5623869801084986e-07,
"loss": 13.2795,
"step": 3940
},
{
"epoch": 1.43,
"learning_rate": 3.5804701627486434e-07,
"loss": 13.3223,
"step": 3960
},
{
"epoch": 1.44,
"learning_rate": 3.598553345388788e-07,
"loss": 13.3381,
"step": 3980
},
{
"epoch": 1.45,
"learning_rate": 3.616636528028933e-07,
"loss": 13.2402,
"step": 4000
},
{
"epoch": 1.45,
"learning_rate": 3.6347197106690773e-07,
"loss": 13.276,
"step": 4020
},
{
"epoch": 1.46,
"learning_rate": 3.6528028933092226e-07,
"loss": 13.236,
"step": 4040
},
{
"epoch": 1.47,
"learning_rate": 3.670886075949367e-07,
"loss": 13.3119,
"step": 4060
},
{
"epoch": 1.48,
"learning_rate": 3.6889692585895117e-07,
"loss": 13.2363,
"step": 4080
},
{
"epoch": 1.48,
"learning_rate": 3.707052441229656e-07,
"loss": 13.1797,
"step": 4100
},
{
"epoch": 1.49,
"learning_rate": 3.7251356238698013e-07,
"loss": 13.2306,
"step": 4120
},
{
"epoch": 1.5,
"learning_rate": 3.7432188065099455e-07,
"loss": 13.1583,
"step": 4140
},
{
"epoch": 1.5,
"learning_rate": 3.7613019891500903e-07,
"loss": 13.2315,
"step": 4160
},
{
"epoch": 1.51,
"learning_rate": 3.7793851717902346e-07,
"loss": 13.1212,
"step": 4180
},
{
"epoch": 1.52,
"learning_rate": 3.79746835443038e-07,
"loss": 13.2316,
"step": 4200
},
{
"epoch": 1.53,
"learning_rate": 3.815551537070524e-07,
"loss": 13.2637,
"step": 4220
},
{
"epoch": 1.53,
"learning_rate": 3.833634719710669e-07,
"loss": 13.2633,
"step": 4240
},
{
"epoch": 1.54,
"learning_rate": 3.851717902350813e-07,
"loss": 13.2313,
"step": 4260
},
{
"epoch": 1.55,
"learning_rate": 3.8698010849909586e-07,
"loss": 13.2121,
"step": 4280
},
{
"epoch": 1.56,
"learning_rate": 3.887884267631103e-07,
"loss": 13.2068,
"step": 4300
},
{
"epoch": 1.56,
"learning_rate": 3.9059674502712476e-07,
"loss": 13.1658,
"step": 4320
},
{
"epoch": 1.57,
"learning_rate": 3.924050632911392e-07,
"loss": 13.2193,
"step": 4340
},
{
"epoch": 1.58,
"learning_rate": 3.942133815551537e-07,
"loss": 13.1056,
"step": 4360
},
{
"epoch": 1.58,
"learning_rate": 3.9602169981916815e-07,
"loss": 13.1592,
"step": 4380
},
{
"epoch": 1.59,
"learning_rate": 3.9783001808318263e-07,
"loss": 13.2159,
"step": 4400
},
{
"epoch": 1.6,
"learning_rate": 3.9963833634719705e-07,
"loss": 13.2012,
"step": 4420
},
{
"epoch": 1.61,
"learning_rate": 4.014466546112116e-07,
"loss": 13.1279,
"step": 4440
},
{
"epoch": 1.61,
"learning_rate": 4.03254972875226e-07,
"loss": 13.3183,
"step": 4460
},
{
"epoch": 1.62,
"learning_rate": 4.050632911392405e-07,
"loss": 13.2495,
"step": 4480
},
{
"epoch": 1.63,
"learning_rate": 4.068716094032549e-07,
"loss": 13.0892,
"step": 4500
},
{
"epoch": 1.63,
"learning_rate": 4.0867992766726945e-07,
"loss": 13.1179,
"step": 4520
},
{
"epoch": 1.64,
"learning_rate": 4.104882459312839e-07,
"loss": 13.0991,
"step": 4540
},
{
"epoch": 1.65,
"learning_rate": 4.1229656419529836e-07,
"loss": 13.1761,
"step": 4560
},
{
"epoch": 1.66,
"learning_rate": 4.141048824593128e-07,
"loss": 13.1728,
"step": 4580
},
{
"epoch": 1.66,
"learning_rate": 4.159132007233273e-07,
"loss": 13.1226,
"step": 4600
},
{
"epoch": 1.67,
"learning_rate": 4.1772151898734174e-07,
"loss": 13.193,
"step": 4620
},
{
"epoch": 1.68,
"learning_rate": 4.195298372513562e-07,
"loss": 13.0945,
"step": 4640
},
{
"epoch": 1.69,
"learning_rate": 4.2133815551537065e-07,
"loss": 13.2452,
"step": 4660
},
{
"epoch": 1.69,
"learning_rate": 4.231464737793852e-07,
"loss": 13.1626,
"step": 4680
},
{
"epoch": 1.7,
"learning_rate": 4.249547920433996e-07,
"loss": 12.9736,
"step": 4700
},
{
"epoch": 1.71,
"learning_rate": 4.267631103074141e-07,
"loss": 13.1536,
"step": 4720
},
{
"epoch": 1.71,
"learning_rate": 4.285714285714285e-07,
"loss": 13.0694,
"step": 4740
},
{
"epoch": 1.72,
"learning_rate": 4.3037974683544305e-07,
"loss": 13.1939,
"step": 4760
},
{
"epoch": 1.73,
"learning_rate": 4.3218806509945747e-07,
"loss": 13.089,
"step": 4780
},
{
"epoch": 1.74,
"learning_rate": 4.3399638336347195e-07,
"loss": 13.1015,
"step": 4800
},
{
"epoch": 1.74,
"learning_rate": 4.358047016274864e-07,
"loss": 13.1876,
"step": 4820
},
{
"epoch": 1.75,
"learning_rate": 4.376130198915009e-07,
"loss": 13.1382,
"step": 4840
},
{
"epoch": 1.76,
"learning_rate": 4.3942133815551534e-07,
"loss": 13.1007,
"step": 4860
},
{
"epoch": 1.76,
"learning_rate": 4.412296564195298e-07,
"loss": 13.0365,
"step": 4880
},
{
"epoch": 1.77,
"learning_rate": 4.4303797468354424e-07,
"loss": 13.0515,
"step": 4900
},
{
"epoch": 1.78,
"learning_rate": 4.448462929475588e-07,
"loss": 13.0274,
"step": 4920
},
{
"epoch": 1.79,
"learning_rate": 4.466546112115732e-07,
"loss": 13.0163,
"step": 4940
},
{
"epoch": 1.79,
"learning_rate": 4.484629294755877e-07,
"loss": 13.1132,
"step": 4960
},
{
"epoch": 1.8,
"learning_rate": 4.502712477396021e-07,
"loss": 13.0237,
"step": 4980
},
{
"epoch": 1.81,
"learning_rate": 4.5207956600361664e-07,
"loss": 13.0806,
"step": 5000
},
{
"epoch": 1.82,
"learning_rate": 4.5388788426763107e-07,
"loss": 13.1263,
"step": 5020
},
{
"epoch": 1.82,
"learning_rate": 4.5569620253164555e-07,
"loss": 12.9992,
"step": 5040
},
{
"epoch": 1.83,
"learning_rate": 4.5750452079566003e-07,
"loss": 12.97,
"step": 5060
},
{
"epoch": 1.84,
"learning_rate": 4.593128390596745e-07,
"loss": 12.9025,
"step": 5080
},
{
"epoch": 1.84,
"learning_rate": 4.6112115732368893e-07,
"loss": 12.9131,
"step": 5100
},
{
"epoch": 1.85,
"learning_rate": 4.629294755877034e-07,
"loss": 12.8724,
"step": 5120
},
{
"epoch": 1.86,
"learning_rate": 4.647377938517179e-07,
"loss": 12.7369,
"step": 5140
},
{
"epoch": 1.87,
"learning_rate": 4.6654611211573237e-07,
"loss": 12.6747,
"step": 5160
},
{
"epoch": 1.87,
"learning_rate": 4.683544303797468e-07,
"loss": 12.7058,
"step": 5180
},
{
"epoch": 1.88,
"learning_rate": 4.701627486437613e-07,
"loss": 12.5785,
"step": 5200
},
{
"epoch": 1.89,
"learning_rate": 4.7197106690777576e-07,
"loss": 12.6113,
"step": 5220
},
{
"epoch": 1.9,
"learning_rate": 4.7377938517179024e-07,
"loss": 12.5997,
"step": 5240
},
{
"epoch": 1.9,
"learning_rate": 4.7558770343580466e-07,
"loss": 12.4582,
"step": 5260
},
{
"epoch": 1.91,
"learning_rate": 4.773960216998191e-07,
"loss": 12.4912,
"step": 5280
},
{
"epoch": 1.92,
"learning_rate": 4.792043399638337e-07,
"loss": 12.4618,
"step": 5300
},
{
"epoch": 1.92,
"learning_rate": 4.81012658227848e-07,
"loss": 12.4053,
"step": 5320
},
{
"epoch": 1.93,
"learning_rate": 4.828209764918625e-07,
"loss": 12.1477,
"step": 5340
},
{
"epoch": 1.94,
"learning_rate": 4.84629294755877e-07,
"loss": 12.2996,
"step": 5360
},
{
"epoch": 1.95,
"learning_rate": 4.864376130198915e-07,
"loss": 12.3589,
"step": 5380
},
{
"epoch": 1.95,
"learning_rate": 4.88245931283906e-07,
"loss": 12.2919,
"step": 5400
},
{
"epoch": 1.96,
"learning_rate": 4.900542495479204e-07,
"loss": 12.2481,
"step": 5420
},
{
"epoch": 1.97,
"learning_rate": 4.918625678119348e-07,
"loss": 12.2361,
"step": 5440
},
{
"epoch": 1.97,
"learning_rate": 4.936708860759494e-07,
"loss": 12.1804,
"step": 5460
},
{
"epoch": 1.98,
"learning_rate": 4.954792043399638e-07,
"loss": 12.0909,
"step": 5480
},
{
"epoch": 1.99,
"learning_rate": 4.972875226039783e-07,
"loss": 12.2437,
"step": 5500
},
{
"epoch": 2.0,
"learning_rate": 4.990958408679927e-07,
"loss": 12.2058,
"step": 5520
},
{
"epoch": 2.0,
"learning_rate": 4.999997478613401e-07,
"loss": 12.0753,
"step": 5540
},
{
"epoch": 2.01,
"learning_rate": 4.99997730755113e-07,
"loss": 11.8849,
"step": 5560
},
{
"epoch": 2.02,
"learning_rate": 4.999936965589338e-07,
"loss": 11.9003,
"step": 5580
},
{
"epoch": 2.03,
"learning_rate": 4.99987645305352e-07,
"loss": 12.1372,
"step": 5600
},
{
"epoch": 2.03,
"learning_rate": 4.999795770431919e-07,
"loss": 11.8718,
"step": 5620
},
{
"epoch": 2.04,
"learning_rate": 4.999694918375516e-07,
"loss": 11.945,
"step": 5640
},
{
"epoch": 2.05,
"learning_rate": 4.99957389769803e-07,
"loss": 11.8009,
"step": 5660
},
{
"epoch": 2.05,
"learning_rate": 4.999432709375907e-07,
"loss": 11.8197,
"step": 5680
},
{
"epoch": 2.06,
"learning_rate": 4.999271354548316e-07,
"loss": 11.8286,
"step": 5700
},
{
"epoch": 2.07,
"learning_rate": 4.999089834517138e-07,
"loss": 11.8144,
"step": 5720
},
{
"epoch": 2.08,
"learning_rate": 4.998888150746957e-07,
"loss": 11.8192,
"step": 5740
},
{
"epoch": 2.08,
"learning_rate": 4.99866630486504e-07,
"loss": 11.7335,
"step": 5760
},
{
"epoch": 2.09,
"learning_rate": 4.99842429866134e-07,
"loss": 11.7849,
"step": 5780
},
{
"epoch": 2.1,
"learning_rate": 4.998162134088466e-07,
"loss": 11.6888,
"step": 5800
},
{
"epoch": 2.1,
"learning_rate": 4.997879813261676e-07,
"loss": 11.7469,
"step": 5820
},
{
"epoch": 2.11,
"learning_rate": 4.997577338458857e-07,
"loss": 11.7298,
"step": 5840
},
{
"epoch": 2.12,
"learning_rate": 4.997254712120507e-07,
"loss": 11.633,
"step": 5860
},
{
"epoch": 2.13,
"learning_rate": 4.996911936849713e-07,
"loss": 11.7687,
"step": 5880
},
{
"epoch": 2.13,
"learning_rate": 4.996549015412135e-07,
"loss": 11.628,
"step": 5900
},
{
"epoch": 2.14,
"learning_rate": 4.996165950735983e-07,
"loss": 11.6068,
"step": 5920
},
{
"epoch": 2.15,
"learning_rate": 4.995762745911985e-07,
"loss": 11.6469,
"step": 5940
},
{
"epoch": 2.16,
"learning_rate": 4.995339404193373e-07,
"loss": 11.6589,
"step": 5960
},
{
"epoch": 2.16,
"learning_rate": 4.994895928995854e-07,
"loss": 11.5968,
"step": 5980
},
{
"epoch": 2.17,
"learning_rate": 4.994432323897575e-07,
"loss": 11.6637,
"step": 6000
},
{
"epoch": 2.18,
"learning_rate": 4.993948592639104e-07,
"loss": 11.6312,
"step": 6020
},
{
"epoch": 2.18,
"learning_rate": 4.993444739123394e-07,
"loss": 11.6247,
"step": 6040
},
{
"epoch": 2.19,
"learning_rate": 4.992920767415752e-07,
"loss": 11.6806,
"step": 6060
},
{
"epoch": 2.2,
"learning_rate": 4.992376681743811e-07,
"loss": 11.5952,
"step": 6080
},
{
"epoch": 2.21,
"learning_rate": 4.991812486497489e-07,
"loss": 11.5932,
"step": 6100
},
{
"epoch": 2.21,
"learning_rate": 4.991228186228956e-07,
"loss": 11.526,
"step": 6120
},
{
"epoch": 2.22,
"learning_rate": 4.990623785652603e-07,
"loss": 11.5652,
"step": 6140
},
{
"epoch": 2.23,
"learning_rate": 4.989999289644991e-07,
"loss": 11.5451,
"step": 6160
},
{
"epoch": 2.24,
"learning_rate": 4.989354703244829e-07,
"loss": 11.5461,
"step": 6180
},
{
"epoch": 2.24,
"learning_rate": 4.988690031652916e-07,
"loss": 11.5855,
"step": 6200
},
{
"epoch": 2.25,
"learning_rate": 4.98800528023211e-07,
"loss": 11.5853,
"step": 6220
},
{
"epoch": 2.26,
"learning_rate": 4.987300454507285e-07,
"loss": 11.4898,
"step": 6240
},
{
"epoch": 2.26,
"learning_rate": 4.986575560165277e-07,
"loss": 11.5472,
"step": 6260
},
{
"epoch": 2.27,
"learning_rate": 4.985830603054849e-07,
"loss": 11.5545,
"step": 6280
},
{
"epoch": 2.28,
"learning_rate": 4.985065589186638e-07,
"loss": 11.5627,
"step": 6300
},
{
"epoch": 2.29,
"learning_rate": 4.984280524733107e-07,
"loss": 11.6308,
"step": 6320
},
{
"epoch": 2.29,
"learning_rate": 4.983475416028494e-07,
"loss": 11.5904,
"step": 6340
},
{
"epoch": 2.3,
"learning_rate": 4.982650269568766e-07,
"loss": 11.6035,
"step": 6360
},
{
"epoch": 2.31,
"learning_rate": 4.981805092011564e-07,
"loss": 11.6106,
"step": 6380
},
{
"epoch": 2.31,
"learning_rate": 4.980939890176143e-07,
"loss": 11.5128,
"step": 6400
},
{
"epoch": 2.32,
"learning_rate": 4.980054671043329e-07,
"loss": 11.5708,
"step": 6420
},
{
"epoch": 2.33,
"learning_rate": 4.979149441755452e-07,
"loss": 11.4375,
"step": 6440
},
{
"epoch": 2.34,
"learning_rate": 4.978224209616292e-07,
"loss": 11.6326,
"step": 6460
},
{
"epoch": 2.34,
"learning_rate": 4.977278982091027e-07,
"loss": 11.6381,
"step": 6480
},
{
"epoch": 2.35,
"learning_rate": 4.976313766806159e-07,
"loss": 11.5119,
"step": 6500
},
{
"epoch": 2.36,
"learning_rate": 4.975328571549462e-07,
"loss": 11.594,
"step": 6520
},
{
"epoch": 2.37,
"learning_rate": 4.974323404269921e-07,
"loss": 11.5035,
"step": 6540
},
{
"epoch": 2.37,
"learning_rate": 4.973298273077657e-07,
"loss": 11.5414,
"step": 6560
},
{
"epoch": 2.38,
"learning_rate": 4.972253186243876e-07,
"loss": 11.5682,
"step": 6580
},
{
"epoch": 2.39,
"learning_rate": 4.971188152200791e-07,
"loss": 11.4559,
"step": 6600
},
{
"epoch": 2.39,
"learning_rate": 4.970103179541556e-07,
"loss": 11.6374,
"step": 6620
},
{
"epoch": 2.4,
"learning_rate": 4.9689982770202e-07,
"loss": 11.6862,
"step": 6640
},
{
"epoch": 2.41,
"learning_rate": 4.967873453551557e-07,
"loss": 11.6331,
"step": 6660
},
{
"epoch": 2.42,
"learning_rate": 4.966728718211188e-07,
"loss": 11.5836,
"step": 6680
},
{
"epoch": 2.42,
"learning_rate": 4.965564080235315e-07,
"loss": 11.5568,
"step": 6700
},
{
"epoch": 2.43,
"learning_rate": 4.964379549020741e-07,
"loss": 11.4982,
"step": 6720
},
{
"epoch": 2.44,
"learning_rate": 4.963175134124775e-07,
"loss": 11.5727,
"step": 6740
},
{
"epoch": 2.44,
"learning_rate": 4.961950845265162e-07,
"loss": 11.4852,
"step": 6760
},
{
"epoch": 2.45,
"learning_rate": 4.960706692319991e-07,
"loss": 11.5736,
"step": 6780
},
{
"epoch": 2.46,
"learning_rate": 4.959442685327627e-07,
"loss": 11.5491,
"step": 6800
},
{
"epoch": 2.47,
"learning_rate": 4.958158834486628e-07,
"loss": 11.5888,
"step": 6820
},
{
"epoch": 2.47,
"learning_rate": 4.956855150155657e-07,
"loss": 11.5538,
"step": 6840
},
{
"epoch": 2.48,
"learning_rate": 4.955531642853403e-07,
"loss": 11.5338,
"step": 6860
},
{
"epoch": 2.49,
"learning_rate": 4.954188323258498e-07,
"loss": 11.6751,
"step": 6880
},
{
"epoch": 2.5,
"learning_rate": 4.952825202209426e-07,
"loss": 11.5477,
"step": 6900
},
{
"epoch": 2.5,
"learning_rate": 4.951442290704437e-07,
"loss": 11.613,
"step": 6920
},
{
"epoch": 2.51,
"learning_rate": 4.950039599901459e-07,
"loss": 11.5106,
"step": 6940
},
{
"epoch": 2.52,
"learning_rate": 4.94861714111801e-07,
"loss": 11.5909,
"step": 6960
},
{
"epoch": 2.52,
"learning_rate": 4.947174925831103e-07,
"loss": 11.6413,
"step": 6980
},
{
"epoch": 2.53,
"learning_rate": 4.945712965677158e-07,
"loss": 11.6069,
"step": 7000
},
{
"epoch": 2.54,
"learning_rate": 4.944231272451899e-07,
"loss": 11.5424,
"step": 7020
},
{
"epoch": 2.55,
"learning_rate": 4.94272985811027e-07,
"loss": 11.6403,
"step": 7040
},
{
"epoch": 2.55,
"learning_rate": 4.941208734766332e-07,
"loss": 11.4936,
"step": 7060
},
{
"epoch": 2.56,
"learning_rate": 4.939667914693168e-07,
"loss": 11.5997,
"step": 7080
},
{
"epoch": 2.57,
"learning_rate": 4.93810741032278e-07,
"loss": 11.5182,
"step": 7100
},
{
"epoch": 2.58,
"learning_rate": 4.936527234245994e-07,
"loss": 11.6751,
"step": 7120
},
{
"epoch": 2.58,
"learning_rate": 4.934927399212354e-07,
"loss": 11.5386,
"step": 7140
},
{
"epoch": 2.59,
"learning_rate": 4.933307918130022e-07,
"loss": 11.5771,
"step": 7160
},
{
"epoch": 2.6,
"learning_rate": 4.931668804065674e-07,
"loss": 11.5864,
"step": 7180
},
{
"epoch": 2.6,
"learning_rate": 4.93001007024439e-07,
"loss": 11.4768,
"step": 7200
},
{
"epoch": 2.61,
"learning_rate": 4.928331730049555e-07,
"loss": 11.5559,
"step": 7220
},
{
"epoch": 2.62,
"learning_rate": 4.926633797022744e-07,
"loss": 11.5335,
"step": 7240
},
{
"epoch": 2.63,
"learning_rate": 4.924916284863614e-07,
"loss": 11.6058,
"step": 7260
},
{
"epoch": 2.63,
"learning_rate": 4.923179207429798e-07,
"loss": 11.5176,
"step": 7280
},
{
"epoch": 2.64,
"learning_rate": 4.92142257873679e-07,
"loss": 11.5609,
"step": 7300
},
{
"epoch": 2.65,
"learning_rate": 4.919646412957829e-07,
"loss": 11.7039,
"step": 7320
},
{
"epoch": 2.65,
"learning_rate": 4.917850724423792e-07,
"loss": 11.5618,
"step": 7340
},
{
"epoch": 2.66,
"learning_rate": 4.91603552762307e-07,
"loss": 11.5667,
"step": 7360
},
{
"epoch": 2.67,
"learning_rate": 4.914200837201458e-07,
"loss": 11.5145,
"step": 7380
},
{
"epoch": 2.68,
"learning_rate": 4.912346667962032e-07,
"loss": 11.4585,
"step": 7400
},
{
"epoch": 2.68,
"learning_rate": 4.910473034865032e-07,
"loss": 11.5594,
"step": 7420
},
{
"epoch": 2.69,
"learning_rate": 4.908579953027743e-07,
"loss": 11.6038,
"step": 7440
},
{
"epoch": 2.7,
"learning_rate": 4.906667437724366e-07,
"loss": 11.6523,
"step": 7460
},
{
"epoch": 2.71,
"learning_rate": 4.904735504385906e-07,
"loss": 11.6437,
"step": 7480
},
{
"epoch": 2.71,
"learning_rate": 4.902784168600036e-07,
"loss": 11.7565,
"step": 7500
},
{
"epoch": 2.72,
"learning_rate": 4.900813446110978e-07,
"loss": 11.6141,
"step": 7520
},
{
"epoch": 2.73,
"learning_rate": 4.898823352819375e-07,
"loss": 11.6731,
"step": 7540
},
{
"epoch": 2.73,
"learning_rate": 4.896813904782162e-07,
"loss": 11.647,
"step": 7560
},
{
"epoch": 2.74,
"learning_rate": 4.894785118212435e-07,
"loss": 11.5461,
"step": 7580
},
{
"epoch": 2.75,
"learning_rate": 4.892737009479322e-07,
"loss": 11.6451,
"step": 7600
},
{
"epoch": 2.76,
"learning_rate": 4.890669595107853e-07,
"loss": 11.6812,
"step": 7620
},
{
"epoch": 2.76,
"learning_rate": 4.888582891778821e-07,
"loss": 11.5704,
"step": 7640
},
{
"epoch": 2.77,
"learning_rate": 4.886476916328654e-07,
"loss": 11.5842,
"step": 7660
},
{
"epoch": 2.78,
"learning_rate": 4.88435168574927e-07,
"loss": 11.4895,
"step": 7680
},
{
"epoch": 2.78,
"learning_rate": 4.882207217187954e-07,
"loss": 11.5317,
"step": 7700
},
{
"epoch": 2.79,
"learning_rate": 4.880043527947205e-07,
"loss": 11.5456,
"step": 7720
},
{
"epoch": 2.8,
"learning_rate": 4.877860635484606e-07,
"loss": 11.6659,
"step": 7740
},
{
"epoch": 2.81,
"learning_rate": 4.875658557412676e-07,
"loss": 11.4757,
"step": 7760
},
{
"epoch": 2.81,
"learning_rate": 4.873437311498736e-07,
"loss": 11.58,
"step": 7780
},
{
"epoch": 2.82,
"learning_rate": 4.871196915664761e-07,
"loss": 11.7097,
"step": 7800
},
{
"epoch": 2.83,
"learning_rate": 4.868937387987233e-07,
"loss": 11.6675,
"step": 7820
},
{
"epoch": 2.84,
"learning_rate": 4.866658746697001e-07,
"loss": 11.5867,
"step": 7840
},
{
"epoch": 2.84,
"learning_rate": 4.864361010179128e-07,
"loss": 11.6793,
"step": 7860
},
{
"epoch": 2.85,
"learning_rate": 4.862044196972751e-07,
"loss": 11.4478,
"step": 7880
},
{
"epoch": 2.86,
"learning_rate": 4.859708325770919e-07,
"loss": 11.6853,
"step": 7900
},
{
"epoch": 2.86,
"learning_rate": 4.857353415420452e-07,
"loss": 11.4939,
"step": 7920
},
{
"epoch": 2.87,
"learning_rate": 4.854979484921789e-07,
"loss": 11.6505,
"step": 7940
},
{
"epoch": 2.88,
"learning_rate": 4.852586553428828e-07,
"loss": 11.7574,
"step": 7960
},
{
"epoch": 2.89,
"learning_rate": 4.850174640248775e-07,
"loss": 11.5477,
"step": 7980
},
{
"epoch": 2.89,
"learning_rate": 4.847743764841993e-07,
"loss": 11.5399,
"step": 8000
},
{
"epoch": 2.9,
"learning_rate": 4.845293946821836e-07,
"loss": 11.6456,
"step": 8020
},
{
"epoch": 2.91,
"learning_rate": 4.842825205954495e-07,
"loss": 11.5402,
"step": 8040
},
{
"epoch": 2.92,
"learning_rate": 4.840337562158843e-07,
"loss": 11.6021,
"step": 8060
},
{
"epoch": 2.92,
"learning_rate": 4.837831035506267e-07,
"loss": 11.5701,
"step": 8080
},
{
"epoch": 2.93,
"learning_rate": 4.835305646220509e-07,
"loss": 11.5934,
"step": 8100
},
{
"epoch": 2.94,
"learning_rate": 4.832761414677502e-07,
"loss": 11.6171,
"step": 8120
},
{
"epoch": 2.94,
"learning_rate": 4.83019836140521e-07,
"loss": 11.5625,
"step": 8140
},
{
"epoch": 2.95,
"learning_rate": 4.827616507083456e-07,
"loss": 11.5389,
"step": 8160
},
{
"epoch": 2.96,
"learning_rate": 4.825015872543758e-07,
"loss": 11.5511,
"step": 8180
},
{
"epoch": 2.97,
"learning_rate": 4.822396478769162e-07,
"loss": 11.597,
"step": 8200
},
{
"epoch": 2.97,
"learning_rate": 4.819758346894072e-07,
"loss": 11.6375,
"step": 8220
},
{
"epoch": 2.98,
"learning_rate": 4.817101498204078e-07,
"loss": 11.5426,
"step": 8240
},
{
"epoch": 2.99,
"learning_rate": 4.814425954135785e-07,
"loss": 11.5441,
"step": 8260
},
{
"epoch": 2.99,
"learning_rate": 4.811731736276643e-07,
"loss": 11.5702,
"step": 8280
},
{
"epoch": 3.0,
"learning_rate": 4.809018866364766e-07,
"loss": 11.4933,
"step": 8300
},
{
"epoch": 3.01,
"learning_rate": 4.806287366288766e-07,
"loss": 11.559,
"step": 8320
},
{
"epoch": 3.02,
"learning_rate": 4.803537258087566e-07,
"loss": 11.63,
"step": 8340
},
{
"epoch": 3.02,
"learning_rate": 4.800768563950231e-07,
"loss": 11.613,
"step": 8360
},
{
"epoch": 3.03,
"learning_rate": 4.797981306215784e-07,
"loss": 11.578,
"step": 8380
},
{
"epoch": 3.04,
"learning_rate": 4.795175507373028e-07,
"loss": 11.6106,
"step": 8400
},
{
"epoch": 3.05,
"learning_rate": 4.792351190060363e-07,
"loss": 11.5344,
"step": 8420
},
{
"epoch": 3.05,
"learning_rate": 4.789508377065603e-07,
"loss": 11.6361,
"step": 8440
},
{
"epoch": 3.06,
"learning_rate": 4.786647091325796e-07,
"loss": 11.5861,
"step": 8460
},
{
"epoch": 3.07,
"learning_rate": 4.783767355927033e-07,
"loss": 11.5699,
"step": 8480
},
{
"epoch": 3.07,
"learning_rate": 4.780869194104268e-07,
"loss": 11.5232,
"step": 8500
},
{
"epoch": 3.08,
"learning_rate": 4.777952629241122e-07,
"loss": 11.5817,
"step": 8520
},
{
"epoch": 3.09,
"learning_rate": 4.775017684869707e-07,
"loss": 11.4567,
"step": 8540
},
{
"epoch": 3.1,
"learning_rate": 4.772064384670424e-07,
"loss": 11.5982,
"step": 8560
},
{
"epoch": 3.1,
"learning_rate": 4.769092752471778e-07,
"loss": 11.4741,
"step": 8580
},
{
"epoch": 3.11,
"learning_rate": 4.766102812250183e-07,
"loss": 11.4757,
"step": 8600
},
{
"epoch": 3.12,
"learning_rate": 4.7630945881297746e-07,
"loss": 11.6331,
"step": 8620
},
{
"epoch": 3.12,
"learning_rate": 4.7600681043822044e-07,
"loss": 11.4811,
"step": 8640
},
{
"epoch": 3.13,
"learning_rate": 4.7570233854264564e-07,
"loss": 11.5495,
"step": 8660
},
{
"epoch": 3.14,
"learning_rate": 4.7539604558286395e-07,
"loss": 11.6934,
"step": 8680
},
{
"epoch": 3.15,
"learning_rate": 4.7508793403017976e-07,
"loss": 11.516,
"step": 8700
},
{
"epoch": 3.15,
"learning_rate": 4.747780063705705e-07,
"loss": 11.6319,
"step": 8720
},
{
"epoch": 3.16,
"learning_rate": 4.744662651046666e-07,
"loss": 11.6796,
"step": 8740
},
{
"epoch": 3.17,
"learning_rate": 4.741527127477317e-07,
"loss": 11.5041,
"step": 8760
},
{
"epoch": 3.18,
"learning_rate": 4.738373518296421e-07,
"loss": 11.5453,
"step": 8780
},
{
"epoch": 3.18,
"learning_rate": 4.7352018489486606e-07,
"loss": 11.4937,
"step": 8800
},
{
"epoch": 3.19,
"learning_rate": 4.732012145024439e-07,
"loss": 11.5725,
"step": 8820
},
{
"epoch": 3.2,
"learning_rate": 4.7288044322596663e-07,
"loss": 11.5179,
"step": 8840
},
{
"epoch": 3.2,
"learning_rate": 4.725578736535562e-07,
"loss": 11.4851,
"step": 8860
},
{
"epoch": 3.21,
"learning_rate": 4.722335083878433e-07,
"loss": 11.4871,
"step": 8880
},
{
"epoch": 3.22,
"learning_rate": 4.7190735004594753e-07,
"loss": 11.5277,
"step": 8900
},
{
"epoch": 3.23,
"learning_rate": 4.715794012594555e-07,
"loss": 11.5012,
"step": 8920
},
{
"epoch": 3.23,
"learning_rate": 4.712496646744002e-07,
"loss": 11.6283,
"step": 8940
},
{
"epoch": 3.24,
"learning_rate": 4.70918142951239e-07,
"loss": 11.6502,
"step": 8960
},
{
"epoch": 3.25,
"learning_rate": 4.705848387648329e-07,
"loss": 11.5598,
"step": 8980
},
{
"epoch": 3.25,
"learning_rate": 4.702497548044243e-07,
"loss": 11.5682,
"step": 9000
},
{
"epoch": 3.26,
"learning_rate": 4.699128937736157e-07,
"loss": 11.5341,
"step": 9020
},
{
"epoch": 3.27,
"learning_rate": 4.695742583903478e-07,
"loss": 11.4734,
"step": 9040
},
{
"epoch": 3.28,
"learning_rate": 4.692338513868776e-07,
"loss": 11.502,
"step": 9060
},
{
"epoch": 3.28,
"learning_rate": 4.6889167550975613e-07,
"loss": 11.5905,
"step": 9080
},
{
"epoch": 3.29,
"learning_rate": 4.6854773351980647e-07,
"loss": 11.5555,
"step": 9100
},
{
"epoch": 3.3,
"learning_rate": 4.682020281921017e-07,
"loss": 11.5055,
"step": 9120
},
{
"epoch": 3.31,
"learning_rate": 4.67854562315942e-07,
"loss": 11.6565,
"step": 9140
},
{
"epoch": 3.31,
"learning_rate": 4.6750533869483257e-07,
"loss": 11.5252,
"step": 9160
},
{
"epoch": 3.32,
"learning_rate": 4.6715436014646077e-07,
"loss": 11.5533,
"step": 9180
},
{
"epoch": 3.33,
"learning_rate": 4.6680162950267356e-07,
"loss": 11.6982,
"step": 9200
},
{
"epoch": 3.33,
"learning_rate": 4.6644714960945453e-07,
"loss": 11.5075,
"step": 9220
},
{
"epoch": 3.34,
"learning_rate": 4.660909233269009e-07,
"loss": 11.6,
"step": 9240
},
{
"epoch": 3.35,
"learning_rate": 4.657329535292007e-07,
"loss": 11.5212,
"step": 9260
},
{
"epoch": 3.36,
"learning_rate": 4.653732431046092e-07,
"loss": 11.5537,
"step": 9280
},
{
"epoch": 3.36,
"learning_rate": 4.6501179495542585e-07,
"loss": 11.5932,
"step": 9300
},
{
"epoch": 3.37,
"learning_rate": 4.646486119979709e-07,
"loss": 11.6961,
"step": 9320
},
{
"epoch": 3.38,
"learning_rate": 4.642836971625616e-07,
"loss": 11.5336,
"step": 9340
},
{
"epoch": 3.39,
"learning_rate": 4.639170533934891e-07,
"loss": 11.5924,
"step": 9360
},
{
"epoch": 3.39,
"learning_rate": 4.635486836489938e-07,
"loss": 11.5276,
"step": 9380
},
{
"epoch": 3.4,
"learning_rate": 4.631785909012426e-07,
"loss": 11.5576,
"step": 9400
},
{
"epoch": 3.41,
"learning_rate": 4.6280677813630397e-07,
"loss": 11.6092,
"step": 9420
},
{
"epoch": 3.41,
"learning_rate": 4.624332483541242e-07,
"loss": 11.6501,
"step": 9440
},
{
"epoch": 3.42,
"learning_rate": 4.6205800456850343e-07,
"loss": 11.5759,
"step": 9460
},
{
"epoch": 3.43,
"learning_rate": 4.6168104980707103e-07,
"loss": 11.5808,
"step": 9480
},
{
"epoch": 3.44,
"learning_rate": 4.6130238711126123e-07,
"loss": 11.5683,
"step": 9500
},
{
"epoch": 3.44,
"learning_rate": 4.609220195362886e-07,
"loss": 11.626,
"step": 9520
},
{
"epoch": 3.45,
"learning_rate": 4.6053995015112343e-07,
"loss": 11.6191,
"step": 9540
},
{
"epoch": 3.46,
"learning_rate": 4.601561820384671e-07,
"loss": 11.5689,
"step": 9560
},
{
"epoch": 3.46,
"learning_rate": 4.597707182947268e-07,
"loss": 11.524,
"step": 9580
},
{
"epoch": 3.47,
"learning_rate": 4.593835620299911e-07,
"loss": 11.6196,
"step": 9600
},
{
"epoch": 3.48,
"learning_rate": 4.589947163680041e-07,
"loss": 11.5477,
"step": 9620
},
{
"epoch": 3.49,
"learning_rate": 4.5860418444614133e-07,
"loss": 11.5832,
"step": 9640
},
{
"epoch": 3.49,
"learning_rate": 4.5821196941538334e-07,
"loss": 11.5997,
"step": 9660
},
{
"epoch": 3.5,
"learning_rate": 4.5781807444029075e-07,
"loss": 11.4988,
"step": 9680
},
{
"epoch": 3.51,
"learning_rate": 4.5742250269897884e-07,
"loss": 11.6131,
"step": 9700
},
{
"epoch": 3.52,
"learning_rate": 4.570252573830918e-07,
"loss": 11.5342,
"step": 9720
},
{
"epoch": 3.52,
"learning_rate": 4.5662634169777674e-07,
"loss": 11.6138,
"step": 9740
},
{
"epoch": 3.53,
"learning_rate": 4.5622575886165826e-07,
"loss": 11.5204,
"step": 9760
},
{
"epoch": 3.54,
"learning_rate": 4.55823512106812e-07,
"loss": 11.475,
"step": 9780
},
{
"epoch": 3.54,
"learning_rate": 4.554196046787392e-07,
"loss": 11.5602,
"step": 9800
},
{
"epoch": 3.55,
"learning_rate": 4.550140398363398e-07,
"loss": 11.5868,
"step": 9820
},
{
"epoch": 3.56,
"learning_rate": 4.546068208518865e-07,
"loss": 11.5379,
"step": 9840
},
{
"epoch": 3.57,
"learning_rate": 4.5419795101099847e-07,
"loss": 11.6708,
"step": 9860
},
{
"epoch": 3.57,
"learning_rate": 4.537874336126146e-07,
"loss": 11.5728,
"step": 9880
},
{
"epoch": 3.58,
"learning_rate": 4.5337527196896715e-07,
"loss": 11.543,
"step": 9900
},
{
"epoch": 3.59,
"learning_rate": 4.529614694055546e-07,
"loss": 11.5632,
"step": 9920
},
{
"epoch": 3.59,
"learning_rate": 4.5254602926111533e-07,
"loss": 11.483,
"step": 9940
},
{
"epoch": 3.6,
"learning_rate": 4.521289548876003e-07,
"loss": 11.5291,
"step": 9960
},
{
"epoch": 3.61,
"learning_rate": 4.517102496501462e-07,
"loss": 11.58,
"step": 9980
},
{
"epoch": 3.62,
"learning_rate": 4.512899169270481e-07,
"loss": 11.5208,
"step": 10000
},
{
"epoch": 3.62,
"learning_rate": 4.508679601097326e-07,
"loss": 11.5706,
"step": 10020
},
{
"epoch": 3.63,
"learning_rate": 4.504443826027298e-07,
"loss": 11.5883,
"step": 10040
},
{
"epoch": 3.64,
"learning_rate": 4.5001918782364665e-07,
"loss": 11.5798,
"step": 10060
},
{
"epoch": 3.65,
"learning_rate": 4.4959237920313877e-07,
"loss": 11.51,
"step": 10080
},
{
"epoch": 3.65,
"learning_rate": 4.491639601848828e-07,
"loss": 11.6322,
"step": 10100
},
{
"epoch": 3.66,
"learning_rate": 4.4873393422554894e-07,
"loss": 11.5523,
"step": 10120
},
{
"epoch": 3.67,
"learning_rate": 4.483023047947729e-07,
"loss": 11.6259,
"step": 10140
},
{
"epoch": 3.67,
"learning_rate": 4.478690753751278e-07,
"loss": 11.5645,
"step": 10160
},
{
"epoch": 3.68,
"learning_rate": 4.4743424946209627e-07,
"loss": 11.5416,
"step": 10180
},
{
"epoch": 3.69,
"learning_rate": 4.46997830564042e-07,
"loss": 11.4724,
"step": 10200
},
{
"epoch": 3.7,
"learning_rate": 4.4655982220218167e-07,
"loss": 11.498,
"step": 10220
},
{
"epoch": 3.7,
"learning_rate": 4.461202279105565e-07,
"loss": 11.5235,
"step": 10240
},
{
"epoch": 3.71,
"learning_rate": 4.4567905123600345e-07,
"loss": 11.5966,
"step": 10260
},
{
"epoch": 3.72,
"learning_rate": 4.4523629573812705e-07,
"loss": 11.613,
"step": 10280
},
{
"epoch": 3.73,
"learning_rate": 4.447919649892704e-07,
"loss": 11.6004,
"step": 10300
},
{
"epoch": 3.73,
"learning_rate": 4.443460625744865e-07,
"loss": 11.7358,
"step": 10320
},
{
"epoch": 3.74,
"learning_rate": 4.438985920915089e-07,
"loss": 11.5892,
"step": 10340
},
{
"epoch": 3.75,
"learning_rate": 4.434495571507234e-07,
"loss": 11.4706,
"step": 10360
},
{
"epoch": 3.75,
"learning_rate": 4.4299896137513837e-07,
"loss": 11.6245,
"step": 10380
},
{
"epoch": 3.76,
"learning_rate": 4.4254680840035554e-07,
"loss": 11.4879,
"step": 10400
},
{
"epoch": 3.77,
"learning_rate": 4.42093101874541e-07,
"loss": 11.6351,
"step": 10420
},
{
"epoch": 3.78,
"learning_rate": 4.4163784545839543e-07,
"loss": 11.5064,
"step": 10440
},
{
"epoch": 3.78,
"learning_rate": 4.411810428251248e-07,
"loss": 11.6152,
"step": 10460
},
{
"epoch": 3.79,
"learning_rate": 4.407226976604105e-07,
"loss": 11.7646,
"step": 10480
},
{
"epoch": 3.8,
"learning_rate": 4.402628136623798e-07,
"loss": 11.5066,
"step": 10500
},
{
"epoch": 3.8,
"learning_rate": 4.3980139454157607e-07,
"loss": 11.5206,
"step": 10520
},
{
"epoch": 3.81,
"learning_rate": 4.393384440209284e-07,
"loss": 11.5648,
"step": 10540
},
{
"epoch": 3.82,
"learning_rate": 4.3887396583572225e-07,
"loss": 11.6255,
"step": 10560
},
{
"epoch": 3.83,
"learning_rate": 4.3840796373356864e-07,
"loss": 11.5703,
"step": 10580
},
{
"epoch": 3.83,
"learning_rate": 4.3794044147437437e-07,
"loss": 11.5165,
"step": 10600
},
{
"epoch": 3.84,
"learning_rate": 4.3747140283031153e-07,
"loss": 11.4863,
"step": 10620
},
{
"epoch": 3.85,
"learning_rate": 4.3700085158578694e-07,
"loss": 11.4701,
"step": 10640
},
{
"epoch": 3.86,
"learning_rate": 4.365287915374118e-07,
"loss": 11.4714,
"step": 10660
},
{
"epoch": 3.86,
"learning_rate": 4.360552264939712e-07,
"loss": 11.5868,
"step": 10680
},
{
"epoch": 3.87,
"learning_rate": 4.355801602763927e-07,
"loss": 11.5528,
"step": 10700
},
{
"epoch": 3.88,
"learning_rate": 4.3510359671771647e-07,
"loss": 11.691,
"step": 10720
},
{
"epoch": 3.88,
"learning_rate": 4.3462553966306357e-07,
"loss": 11.5223,
"step": 10740
},
{
"epoch": 3.89,
"learning_rate": 4.341459929696054e-07,
"loss": 11.66,
"step": 10760
},
{
"epoch": 3.9,
"learning_rate": 4.3366496050653235e-07,
"loss": 11.6234,
"step": 10780
},
{
"epoch": 3.91,
"learning_rate": 4.3318244615502254e-07,
"loss": 11.4636,
"step": 10800
},
{
"epoch": 3.91,
"learning_rate": 4.326984538082108e-07,
"loss": 11.6062,
"step": 10820
},
{
"epoch": 3.92,
"learning_rate": 4.32212987371157e-07,
"loss": 11.5589,
"step": 10840
},
{
"epoch": 3.93,
"learning_rate": 4.3172605076081456e-07,
"loss": 11.5099,
"step": 10860
},
{
"epoch": 3.93,
"learning_rate": 4.312376479059988e-07,
"loss": 11.5904,
"step": 10880
},
{
"epoch": 3.94,
"learning_rate": 4.307477827473556e-07,
"loss": 11.5902,
"step": 10900
},
{
"epoch": 3.95,
"learning_rate": 4.302564592373292e-07,
"loss": 11.5599,
"step": 10920
},
{
"epoch": 3.96,
"learning_rate": 4.2976368134013033e-07,
"loss": 11.5479,
"step": 10940
},
{
"epoch": 3.96,
"learning_rate": 4.292694530317046e-07,
"loss": 11.4897,
"step": 10960
},
{
"epoch": 3.97,
"learning_rate": 4.2877377829969983e-07,
"loss": 11.5071,
"step": 10980
},
{
"epoch": 3.98,
"learning_rate": 4.2827666114343463e-07,
"loss": 11.5219,
"step": 11000
},
{
"epoch": 3.99,
"learning_rate": 4.2777810557386534e-07,
"loss": 11.5207,
"step": 11020
},
{
"epoch": 3.99,
"learning_rate": 4.2727811561355423e-07,
"loss": 11.5808,
"step": 11040
},
{
"epoch": 4.0,
"learning_rate": 4.2677669529663686e-07,
"loss": 11.5662,
"step": 11060
},
{
"epoch": 4.01,
"learning_rate": 4.262738486687895e-07,
"loss": 11.6212,
"step": 11080
},
{
"epoch": 4.01,
"learning_rate": 4.2576957978719636e-07,
"loss": 11.7402,
"step": 11100
},
{
"epoch": 4.02,
"learning_rate": 4.252638927205172e-07,
"loss": 11.5713,
"step": 11120
},
{
"epoch": 4.03,
"learning_rate": 4.2475679154885443e-07,
"loss": 11.5501,
"step": 11140
},
{
"epoch": 4.04,
"learning_rate": 4.242482803637197e-07,
"loss": 11.6619,
"step": 11160
},
{
"epoch": 4.04,
"learning_rate": 4.237383632680015e-07,
"loss": 11.4901,
"step": 11180
},
{
"epoch": 4.05,
"learning_rate": 4.232270443759319e-07,
"loss": 11.5512,
"step": 11200
},
{
"epoch": 4.06,
"learning_rate": 4.2271432781305293e-07,
"loss": 11.712,
"step": 11220
},
{
"epoch": 4.07,
"learning_rate": 4.222002177161841e-07,
"loss": 11.5573,
"step": 11240
},
{
"epoch": 4.07,
"learning_rate": 4.216847182333881e-07,
"loss": 11.685,
"step": 11260
},
{
"epoch": 4.08,
"learning_rate": 4.2116783352393803e-07,
"loss": 11.4884,
"step": 11280
},
{
"epoch": 4.09,
"learning_rate": 4.2064956775828366e-07,
"loss": 11.5112,
"step": 11300
},
{
"epoch": 4.09,
"learning_rate": 4.201299251180176e-07,
"loss": 11.513,
"step": 11320
},
{
"epoch": 4.1,
"learning_rate": 4.1960890979584155e-07,
"loss": 11.5298,
"step": 11340
},
{
"epoch": 4.11,
"learning_rate": 4.1908652599553293e-07,
"loss": 11.5555,
"step": 11360
},
{
"epoch": 4.12,
"learning_rate": 4.1856277793191044e-07,
"loss": 11.5323,
"step": 11380
},
{
"epoch": 4.12,
"learning_rate": 4.1803766983080006e-07,
"loss": 11.5696,
"step": 11400
},
{
"epoch": 4.13,
"learning_rate": 4.1751120592900156e-07,
"loss": 11.6017,
"step": 11420
},
{
"epoch": 4.14,
"learning_rate": 4.169833904742537e-07,
"loss": 11.5497,
"step": 11440
},
{
"epoch": 4.14,
"learning_rate": 4.164542277252e-07,
"loss": 11.6159,
"step": 11460
},
{
"epoch": 4.15,
"learning_rate": 4.159237219513547e-07,
"loss": 11.5327,
"step": 11480
},
{
"epoch": 4.16,
"learning_rate": 4.153918774330682e-07,
"loss": 11.6313,
"step": 11500
},
{
"epoch": 4.17,
"learning_rate": 4.1485869846149233e-07,
"loss": 11.5099,
"step": 11520
},
{
"epoch": 4.17,
"learning_rate": 4.1432418933854586e-07,
"loss": 11.5367,
"step": 11540
},
{
"epoch": 4.18,
"learning_rate": 4.1378835437687996e-07,
"loss": 11.5341,
"step": 11560
},
{
"epoch": 4.19,
"learning_rate": 4.132511978998432e-07,
"loss": 11.5623,
"step": 11580
},
{
"epoch": 4.2,
"learning_rate": 4.1271272424144645e-07,
"loss": 11.5277,
"step": 11600
},
{
"epoch": 4.2,
"learning_rate": 4.1217293774632844e-07,
"loss": 11.5259,
"step": 11620
},
{
"epoch": 4.21,
"learning_rate": 4.116318427697205e-07,
"loss": 11.5667,
"step": 11640
},
{
"epoch": 4.22,
"learning_rate": 4.1108944367741105e-07,
"loss": 11.6065,
"step": 11660
},
{
"epoch": 4.22,
"learning_rate": 4.1054574484571105e-07,
"loss": 11.579,
"step": 11680
},
{
"epoch": 4.23,
"learning_rate": 4.100007506614178e-07,
"loss": 11.7267,
"step": 11700
},
{
"epoch": 4.24,
"learning_rate": 4.094544655217807e-07,
"loss": 11.5985,
"step": 11720
},
{
"epoch": 4.25,
"learning_rate": 4.0890689383446476e-07,
"loss": 11.5627,
"step": 11740
},
{
"epoch": 4.25,
"learning_rate": 4.083580400175153e-07,
"loss": 11.4979,
"step": 11760
},
{
"epoch": 4.26,
"learning_rate": 4.078079084993227e-07,
"loss": 11.6263,
"step": 11780
},
{
"epoch": 4.27,
"learning_rate": 4.0725650371858646e-07,
"loss": 11.6158,
"step": 11800
},
{
"epoch": 4.27,
"learning_rate": 4.0670383012427877e-07,
"loss": 11.5151,
"step": 11820
},
{
"epoch": 4.28,
"learning_rate": 4.0614989217560983e-07,
"loss": 11.6298,
"step": 11840
},
{
"epoch": 4.29,
"learning_rate": 4.0559469434199077e-07,
"loss": 11.5004,
"step": 11860
},
{
"epoch": 4.3,
"learning_rate": 4.050382411029981e-07,
"loss": 11.5026,
"step": 11880
},
{
"epoch": 4.3,
"learning_rate": 4.044805369483377e-07,
"loss": 11.5553,
"step": 11900
},
{
"epoch": 4.31,
"learning_rate": 4.0392158637780794e-07,
"loss": 11.5134,
"step": 11920
},
{
"epoch": 4.32,
"learning_rate": 4.0336139390126424e-07,
"loss": 11.5529,
"step": 11940
},
{
"epoch": 4.33,
"learning_rate": 4.027999640385821e-07,
"loss": 11.6092,
"step": 11960
},
{
"epoch": 4.33,
"learning_rate": 4.022373013196206e-07,
"loss": 11.4795,
"step": 11980
},
{
"epoch": 4.34,
"learning_rate": 4.0167341028418655e-07,
"loss": 11.5562,
"step": 12000
},
{
"epoch": 4.35,
"learning_rate": 4.0110829548199667e-07,
"loss": 11.5685,
"step": 12020
},
{
"epoch": 4.35,
"learning_rate": 4.005419614726421e-07,
"loss": 11.5652,
"step": 12040
},
{
"epoch": 4.36,
"learning_rate": 3.999744128255508e-07,
"loss": 11.5517,
"step": 12060
},
{
"epoch": 4.37,
"learning_rate": 3.994056541199511e-07,
"loss": 11.6305,
"step": 12080
},
{
"epoch": 4.38,
"learning_rate": 3.988356899448344e-07,
"loss": 11.6278,
"step": 12100
},
{
"epoch": 4.38,
"learning_rate": 3.982645248989186e-07,
"loss": 11.6063,
"step": 12120
},
{
"epoch": 4.39,
"learning_rate": 3.9769216359061063e-07,
"loss": 11.6833,
"step": 12140
},
{
"epoch": 4.4,
"learning_rate": 3.971186106379693e-07,
"loss": 11.5823,
"step": 12160
},
{
"epoch": 4.41,
"learning_rate": 3.9654387066866833e-07,
"loss": 11.512,
"step": 12180
},
{
"epoch": 4.41,
"learning_rate": 3.9596794831995863e-07,
"loss": 11.5795,
"step": 12200
},
{
"epoch": 4.42,
"learning_rate": 3.953908482386311e-07,
"loss": 11.7476,
"step": 12220
},
{
"epoch": 4.43,
"learning_rate": 3.94812575080979e-07,
"loss": 11.5925,
"step": 12240
},
{
"epoch": 4.43,
"learning_rate": 3.9423313351276075e-07,
"loss": 11.6184,
"step": 12260
},
{
"epoch": 4.44,
"learning_rate": 3.9365252820916186e-07,
"loss": 11.5744,
"step": 12280
},
{
"epoch": 4.45,
"learning_rate": 3.930707638547571e-07,
"loss": 11.5405,
"step": 12300
},
{
"epoch": 4.46,
"learning_rate": 3.924878451434735e-07,
"loss": 11.5852,
"step": 12320
},
{
"epoch": 4.46,
"learning_rate": 3.9190377677855155e-07,
"loss": 11.5914,
"step": 12340
},
{
"epoch": 4.47,
"learning_rate": 3.913185634725077e-07,
"loss": 11.5288,
"step": 12360
},
{
"epoch": 4.48,
"learning_rate": 3.907322099470963e-07,
"loss": 11.6778,
"step": 12380
},
{
"epoch": 4.48,
"learning_rate": 3.9014472093327164e-07,
"loss": 11.5544,
"step": 12400
},
{
"epoch": 4.49,
"learning_rate": 3.8955610117114946e-07,
"loss": 11.4673,
"step": 12420
},
{
"epoch": 4.5,
"learning_rate": 3.889663554099688e-07,
"loss": 11.4688,
"step": 12440
},
{
"epoch": 4.51,
"learning_rate": 3.883754884080539e-07,
"loss": 11.5434,
"step": 12460
},
{
"epoch": 4.51,
"learning_rate": 3.8778350493277566e-07,
"loss": 11.4583,
"step": 12480
},
{
"epoch": 4.52,
"learning_rate": 3.871904097605131e-07,
"loss": 11.6132,
"step": 12500
},
{
"epoch": 4.53,
"learning_rate": 3.8659620767661483e-07,
"loss": 11.6718,
"step": 12520
},
{
"epoch": 4.54,
"learning_rate": 3.8600090347536064e-07,
"loss": 11.5192,
"step": 12540
},
{
"epoch": 4.54,
"learning_rate": 3.8540450195992255e-07,
"loss": 11.552,
"step": 12560
},
{
"epoch": 4.55,
"learning_rate": 3.8480700794232634e-07,
"loss": 11.4474,
"step": 12580
},
{
"epoch": 4.56,
"learning_rate": 3.842084262434125e-07,
"loss": 11.5033,
"step": 12600
},
{
"epoch": 4.56,
"learning_rate": 3.8360876169279734e-07,
"loss": 11.5274,
"step": 12620
},
{
"epoch": 4.57,
"learning_rate": 3.8300801912883414e-07,
"loss": 11.5262,
"step": 12640
},
{
"epoch": 4.58,
"learning_rate": 3.82406203398574e-07,
"loss": 11.4807,
"step": 12660
},
{
"epoch": 4.59,
"learning_rate": 3.81803319357727e-07,
"loss": 11.6133,
"step": 12680
},
{
"epoch": 4.59,
"learning_rate": 3.8119937187062254e-07,
"loss": 11.5197,
"step": 12700
},
{
"epoch": 4.6,
"learning_rate": 3.8059436581017044e-07,
"loss": 11.5477,
"step": 12720
},
{
"epoch": 4.61,
"learning_rate": 3.7998830605782175e-07,
"loss": 11.5387,
"step": 12740
},
{
"epoch": 4.61,
"learning_rate": 3.7938119750352885e-07,
"loss": 11.5181,
"step": 12760
},
{
"epoch": 4.62,
"learning_rate": 3.787730450457065e-07,
"loss": 11.5785,
"step": 12780
},
{
"epoch": 4.63,
"learning_rate": 3.781638535911922e-07,
"loss": 11.5045,
"step": 12800
},
{
"epoch": 4.64,
"learning_rate": 3.775536280552063e-07,
"loss": 11.5656,
"step": 12820
},
{
"epoch": 4.64,
"learning_rate": 3.769423733613126e-07,
"loss": 11.5141,
"step": 12840
},
{
"epoch": 4.65,
"learning_rate": 3.76330094441379e-07,
"loss": 11.5804,
"step": 12860
},
{
"epoch": 4.66,
"learning_rate": 3.757167962355365e-07,
"loss": 11.5743,
"step": 12880
},
{
"epoch": 4.67,
"learning_rate": 3.7510248369214093e-07,
"loss": 11.5083,
"step": 12900
},
{
"epoch": 4.67,
"learning_rate": 3.744871617677319e-07,
"loss": 11.638,
"step": 12920
},
{
"epoch": 4.68,
"learning_rate": 3.73870835426993e-07,
"loss": 11.5635,
"step": 12940
},
{
"epoch": 4.69,
"learning_rate": 3.732535096427123e-07,
"loss": 11.662,
"step": 12960
},
{
"epoch": 4.69,
"learning_rate": 3.7263518939574136e-07,
"loss": 11.5764,
"step": 12980
},
{
"epoch": 4.7,
"learning_rate": 3.720158796749556e-07,
"loss": 11.5007,
"step": 13000
},
{
"epoch": 4.71,
"learning_rate": 3.713955854772143e-07,
"loss": 11.5644,
"step": 13020
},
{
"epoch": 4.72,
"learning_rate": 3.707743118073195e-07,
"loss": 11.5123,
"step": 13040
},
{
"epoch": 4.72,
"learning_rate": 3.7015206367797627e-07,
"loss": 11.5952,
"step": 13060
},
{
"epoch": 4.73,
"learning_rate": 3.695288461097519e-07,
"loss": 11.5475,
"step": 13080
},
{
"epoch": 4.74,
"learning_rate": 3.6890466413103574e-07,
"loss": 11.5173,
"step": 13100
},
{
"epoch": 4.75,
"learning_rate": 3.682795227779981e-07,
"loss": 11.5033,
"step": 13120
},
{
"epoch": 4.75,
"learning_rate": 3.6765342709455035e-07,
"loss": 11.5651,
"step": 13140
},
{
"epoch": 4.76,
"learning_rate": 3.670263821323034e-07,
"loss": 11.4879,
"step": 13160
},
{
"epoch": 4.77,
"learning_rate": 3.6639839295052776e-07,
"loss": 11.6856,
"step": 13180
},
{
"epoch": 4.77,
"learning_rate": 3.657694646161119e-07,
"loss": 11.5647,
"step": 13200
},
{
"epoch": 4.78,
"learning_rate": 3.6513960220352204e-07,
"loss": 11.5979,
"step": 13220
},
{
"epoch": 4.79,
"learning_rate": 3.645088107947609e-07,
"loss": 11.6209,
"step": 13240
},
{
"epoch": 4.8,
"learning_rate": 3.638770954793268e-07,
"loss": 11.5795,
"step": 13260
},
{
"epoch": 4.8,
"learning_rate": 3.632444613541723e-07,
"loss": 11.5411,
"step": 13280
},
{
"epoch": 4.81,
"learning_rate": 3.6261091352366363e-07,
"loss": 11.4905,
"step": 13300
},
{
"epoch": 4.82,
"learning_rate": 3.6197645709953895e-07,
"loss": 11.4929,
"step": 13320
},
{
"epoch": 4.82,
"learning_rate": 3.613410972008674e-07,
"loss": 11.6179,
"step": 13340
},
{
"epoch": 4.83,
"learning_rate": 3.60704838954008e-07,
"loss": 11.5728,
"step": 13360
},
{
"epoch": 4.84,
"learning_rate": 3.6006768749256755e-07,
"loss": 11.5603,
"step": 13380
},
{
"epoch": 4.85,
"learning_rate": 3.594296479573602e-07,
"loss": 11.5548,
"step": 13400
},
{
"epoch": 4.85,
"learning_rate": 3.5879072549636494e-07,
"loss": 11.5005,
"step": 13420
},
{
"epoch": 4.86,
"learning_rate": 3.581509252646851e-07,
"loss": 11.5154,
"step": 13440
},
{
"epoch": 4.87,
"learning_rate": 3.5751025242450596e-07,
"loss": 11.6091,
"step": 13460
},
{
"epoch": 4.88,
"learning_rate": 3.568687121450533e-07,
"loss": 11.565,
"step": 13480
},
{
"epoch": 4.88,
"learning_rate": 3.5622630960255215e-07,
"loss": 11.5348,
"step": 13500
},
{
"epoch": 4.89,
"learning_rate": 3.5558304998018426e-07,
"loss": 11.5913,
"step": 13520
},
{
"epoch": 4.9,
"learning_rate": 3.5493893846804673e-07,
"loss": 11.4063,
"step": 13540
},
{
"epoch": 4.9,
"learning_rate": 3.5429398026311037e-07,
"loss": 11.5915,
"step": 13560
},
{
"epoch": 4.91,
"learning_rate": 3.5364818056917724e-07,
"loss": 11.576,
"step": 13580
},
{
"epoch": 4.92,
"learning_rate": 3.530015445968388e-07,
"loss": 11.6471,
"step": 13600
},
{
"epoch": 4.93,
"learning_rate": 3.5235407756343416e-07,
"loss": 11.5045,
"step": 13620
},
{
"epoch": 4.93,
"learning_rate": 3.517057846930078e-07,
"loss": 11.5106,
"step": 13640
},
{
"epoch": 4.94,
"learning_rate": 3.510566712162673e-07,
"loss": 11.5982,
"step": 13660
},
{
"epoch": 4.95,
"learning_rate": 3.5040674237054125e-07,
"loss": 11.5696,
"step": 13680
},
{
"epoch": 4.95,
"learning_rate": 3.49756003399737e-07,
"loss": 11.6319,
"step": 13700
},
{
"epoch": 4.96,
"learning_rate": 3.491044595542985e-07,
"loss": 11.5474,
"step": 13720
},
{
"epoch": 4.97,
"learning_rate": 3.4845211609116354e-07,
"loss": 11.6031,
"step": 13740
},
{
"epoch": 4.98,
"learning_rate": 3.4779897827372164e-07,
"loss": 11.4964,
"step": 13760
},
{
"epoch": 4.98,
"learning_rate": 3.4714505137177163e-07,
"loss": 11.4955,
"step": 13780
},
{
"epoch": 4.99,
"learning_rate": 3.4649034066147894e-07,
"loss": 11.527,
"step": 13800
},
{
"epoch": 5.0,
"learning_rate": 3.4583485142533303e-07,
"loss": 11.6848,
"step": 13820
},
{
"epoch": 5.01,
"learning_rate": 3.4517858895210493e-07,
"loss": 11.5795,
"step": 13840
},
{
"epoch": 5.01,
"learning_rate": 3.4452155853680454e-07,
"loss": 11.4507,
"step": 13860
},
{
"epoch": 5.02,
"learning_rate": 3.438637654806378e-07,
"loss": 11.4203,
"step": 13880
},
{
"epoch": 5.03,
"learning_rate": 3.432052150909637e-07,
"loss": 11.4583,
"step": 13900
},
{
"epoch": 5.03,
"learning_rate": 3.4254591268125214e-07,
"loss": 11.4966,
"step": 13920
},
{
"epoch": 5.04,
"learning_rate": 3.418858635710406e-07,
"loss": 11.4683,
"step": 13940
},
{
"epoch": 5.05,
"learning_rate": 3.412250730858909e-07,
"loss": 11.5058,
"step": 13960
},
{
"epoch": 5.06,
"learning_rate": 3.4056354655734686e-07,
"loss": 11.6324,
"step": 13980
},
{
"epoch": 5.06,
"learning_rate": 3.399012893228912e-07,
"loss": 11.5019,
"step": 14000
},
{
"epoch": 5.07,
"learning_rate": 3.392383067259018e-07,
"loss": 11.6269,
"step": 14020
},
{
"epoch": 5.08,
"learning_rate": 3.3857460411560943e-07,
"loss": 11.4724,
"step": 14040
},
{
"epoch": 5.08,
"learning_rate": 3.379101868470543e-07,
"loss": 11.5622,
"step": 14060
},
{
"epoch": 5.09,
"learning_rate": 3.372450602810426e-07,
"loss": 11.6062,
"step": 14080
},
{
"epoch": 5.1,
"learning_rate": 3.3657922978410335e-07,
"loss": 11.6198,
"step": 14100
},
{
"epoch": 5.11,
"learning_rate": 3.3591270072844547e-07,
"loss": 11.5821,
"step": 14120
},
{
"epoch": 5.11,
"learning_rate": 3.3524547849191396e-07,
"loss": 11.662,
"step": 14140
},
{
"epoch": 5.12,
"learning_rate": 3.3457756845794687e-07,
"loss": 11.6006,
"step": 14160
},
{
"epoch": 5.13,
"learning_rate": 3.3390897601553146e-07,
"loss": 11.5646,
"step": 14180
},
{
"epoch": 5.14,
"learning_rate": 3.3323970655916115e-07,
"loss": 11.4826,
"step": 14200
},
{
"epoch": 5.14,
"learning_rate": 3.325697654887918e-07,
"loss": 11.4648,
"step": 14220
},
{
"epoch": 5.15,
"learning_rate": 3.3189915820979785e-07,
"loss": 11.5364,
"step": 14240
},
{
"epoch": 5.16,
"learning_rate": 3.312278901329295e-07,
"loss": 11.5953,
"step": 14260
},
{
"epoch": 5.16,
"learning_rate": 3.305559666742682e-07,
"loss": 11.6724,
"step": 14280
},
{
"epoch": 5.17,
"learning_rate": 3.298833932551832e-07,
"loss": 11.493,
"step": 14300
},
{
"epoch": 5.18,
"learning_rate": 3.2921017530228845e-07,
"loss": 11.5222,
"step": 14320
},
{
"epoch": 5.19,
"learning_rate": 3.2853631824739756e-07,
"loss": 11.5317,
"step": 14340
},
{
"epoch": 5.19,
"learning_rate": 3.278618275274814e-07,
"loss": 11.5487,
"step": 14360
},
{
"epoch": 5.2,
"learning_rate": 3.2718670858462296e-07,
"loss": 11.5281,
"step": 14380
},
{
"epoch": 5.21,
"learning_rate": 3.2651096686597423e-07,
"loss": 11.5604,
"step": 14400
},
{
"epoch": 5.22,
"learning_rate": 3.2583460782371215e-07,
"loss": 11.5536,
"step": 14420
},
{
"epoch": 5.22,
"learning_rate": 3.2515763691499425e-07,
"loss": 11.4892,
"step": 14440
},
{
"epoch": 5.23,
"learning_rate": 3.2448005960191507e-07,
"loss": 11.5586,
"step": 14460
},
{
"epoch": 5.24,
"learning_rate": 3.2380188135146173e-07,
"loss": 11.5617,
"step": 14480
},
{
"epoch": 5.24,
"learning_rate": 3.2312310763547005e-07,
"loss": 11.5167,
"step": 14500
},
{
"epoch": 5.25,
"learning_rate": 3.224437439305803e-07,
"loss": 11.5548,
"step": 14520
},
{
"epoch": 5.26,
"learning_rate": 3.2176379571819314e-07,
"loss": 11.6007,
"step": 14540
},
{
"epoch": 5.27,
"learning_rate": 3.2108326848442503e-07,
"loss": 11.5821,
"step": 14560
},
{
"epoch": 5.27,
"learning_rate": 3.2040216772006457e-07,
"loss": 11.6232,
"step": 14580
},
{
"epoch": 5.28,
"learning_rate": 3.197204989205276e-07,
"loss": 11.5639,
"step": 14600
},
{
"epoch": 5.29,
"learning_rate": 3.190382675858131e-07,
"loss": 11.5587,
"step": 14620
},
{
"epoch": 5.29,
"learning_rate": 3.18355479220459e-07,
"loss": 11.6086,
"step": 14640
},
{
"epoch": 5.3,
"learning_rate": 3.1767213933349756e-07,
"loss": 11.5826,
"step": 14660
},
{
"epoch": 5.31,
"learning_rate": 3.1698825343841086e-07,
"loss": 11.517,
"step": 14680
},
{
"epoch": 5.32,
"learning_rate": 3.1630382705308637e-07,
"loss": 11.7103,
"step": 14700
},
{
"epoch": 5.32,
"learning_rate": 3.156188656997727e-07,
"loss": 11.5983,
"step": 14720
},
{
"epoch": 5.33,
"learning_rate": 3.1493337490503457e-07,
"loss": 11.5032,
"step": 14740
},
{
"epoch": 5.34,
"learning_rate": 3.142473601997086e-07,
"loss": 11.6589,
"step": 14760
},
{
"epoch": 5.35,
"learning_rate": 3.1356082711885846e-07,
"loss": 11.6167,
"step": 14780
},
{
"epoch": 5.35,
"learning_rate": 3.1287378120173045e-07,
"loss": 11.4824,
"step": 14800
},
{
"epoch": 5.36,
"learning_rate": 3.121862279917084e-07,
"loss": 11.5116,
"step": 14820
},
{
"epoch": 5.37,
"learning_rate": 3.1149817303626947e-07,
"loss": 11.5834,
"step": 14840
},
{
"epoch": 5.37,
"learning_rate": 3.1080962188693907e-07,
"loss": 11.6011,
"step": 14860
},
{
"epoch": 5.38,
"learning_rate": 3.101205800992458e-07,
"loss": 11.6644,
"step": 14880
},
{
"epoch": 5.39,
"learning_rate": 3.0943105323267746e-07,
"loss": 11.6467,
"step": 14900
},
{
"epoch": 5.4,
"learning_rate": 3.0874104685063515e-07,
"loss": 11.5182,
"step": 14920
},
{
"epoch": 5.4,
"learning_rate": 3.080505665203893e-07,
"loss": 11.604,
"step": 14940
},
{
"epoch": 5.41,
"learning_rate": 3.073596178130342e-07,
"loss": 11.6015,
"step": 14960
},
{
"epoch": 5.42,
"learning_rate": 3.066682063034433e-07,
"loss": 11.476,
"step": 14980
},
{
"epoch": 5.42,
"learning_rate": 3.059763375702241e-07,
"loss": 11.4862,
"step": 15000
},
{
"epoch": 5.43,
"learning_rate": 3.05284017195673e-07,
"loss": 11.5193,
"step": 15020
},
{
"epoch": 5.44,
"learning_rate": 3.0459125076573063e-07,
"loss": 11.4783,
"step": 15040
},
{
"epoch": 5.45,
"learning_rate": 3.038980438699366e-07,
"loss": 11.5304,
"step": 15060
},
{
"epoch": 5.45,
"learning_rate": 3.0320440210138433e-07,
"loss": 11.6162,
"step": 15080
},
{
"epoch": 5.46,
"learning_rate": 3.0251033105667594e-07,
"loss": 11.5835,
"step": 15100
},
{
"epoch": 5.47,
"learning_rate": 3.018158363358773e-07,
"loss": 11.4436,
"step": 15120
},
{
"epoch": 5.48,
"learning_rate": 3.0112092354247235e-07,
"loss": 11.5809,
"step": 15140
},
{
"epoch": 5.48,
"learning_rate": 3.004255982833186e-07,
"loss": 11.5485,
"step": 15160
},
{
"epoch": 5.49,
"learning_rate": 2.997298661686014e-07,
"loss": 11.5227,
"step": 15180
},
{
"epoch": 5.5,
"learning_rate": 2.990337328117886e-07,
"loss": 11.474,
"step": 15200
},
{
"epoch": 5.5,
"learning_rate": 2.983372038295855e-07,
"loss": 11.6214,
"step": 15220
},
{
"epoch": 5.51,
"learning_rate": 2.9764028484188985e-07,
"loss": 11.5108,
"step": 15240
},
{
"epoch": 5.52,
"learning_rate": 2.969429814717456e-07,
"loss": 11.607,
"step": 15260
},
{
"epoch": 5.53,
"learning_rate": 2.9624529934529845e-07,
"loss": 11.4842,
"step": 15280
},
{
"epoch": 5.53,
"learning_rate": 2.955472440917498e-07,
"loss": 11.5665,
"step": 15300
},
{
"epoch": 5.54,
"learning_rate": 2.948488213433118e-07,
"loss": 11.5481,
"step": 15320
},
{
"epoch": 5.55,
"learning_rate": 2.9415003673516165e-07,
"loss": 11.5603,
"step": 15340
},
{
"epoch": 5.56,
"learning_rate": 2.9345089590539605e-07,
"loss": 11.5924,
"step": 15360
},
{
"epoch": 5.56,
"learning_rate": 2.927514044949861e-07,
"loss": 11.6252,
"step": 15380
},
{
"epoch": 5.57,
"learning_rate": 2.9205156814773143e-07,
"loss": 11.5732,
"step": 15400
},
{
"epoch": 5.58,
"learning_rate": 2.913513925102146e-07,
"loss": 11.5231,
"step": 15420
},
{
"epoch": 5.58,
"learning_rate": 2.9065088323175594e-07,
"loss": 11.5965,
"step": 15440
},
{
"epoch": 5.59,
"learning_rate": 2.8995004596436774e-07,
"loss": 11.5837,
"step": 15460
},
{
"epoch": 5.6,
"learning_rate": 2.892488863627085e-07,
"loss": 11.5682,
"step": 15480
},
{
"epoch": 5.61,
"learning_rate": 2.8854741008403753e-07,
"loss": 11.5165,
"step": 15500
},
{
"epoch": 5.61,
"learning_rate": 2.878456227881692e-07,
"loss": 11.5324,
"step": 15520
},
{
"epoch": 5.62,
"learning_rate": 2.871435301374273e-07,
"loss": 11.5491,
"step": 15540
},
{
"epoch": 5.63,
"learning_rate": 2.864411377965995e-07,
"loss": 11.592,
"step": 15560
},
{
"epoch": 5.63,
"learning_rate": 2.8573845143289123e-07,
"loss": 11.5268,
"step": 15580
},
{
"epoch": 5.64,
"learning_rate": 2.850354767158804e-07,
"loss": 11.6144,
"step": 15600
},
{
"epoch": 5.65,
"learning_rate": 2.843322193174715e-07,
"loss": 11.5777,
"step": 15620
},
{
"epoch": 5.66,
"learning_rate": 2.8362868491184965e-07,
"loss": 11.5478,
"step": 15640
},
{
"epoch": 5.66,
"learning_rate": 2.829248791754353e-07,
"loss": 11.5472,
"step": 15660
},
{
"epoch": 5.67,
"learning_rate": 2.8222080778683766e-07,
"loss": 11.4977,
"step": 15680
},
{
"epoch": 5.68,
"learning_rate": 2.8151647642680976e-07,
"loss": 11.5799,
"step": 15700
},
{
"epoch": 5.69,
"learning_rate": 2.8081189077820206e-07,
"loss": 11.6459,
"step": 15720
},
{
"epoch": 5.69,
"learning_rate": 2.801070565259165e-07,
"loss": 11.412,
"step": 15740
},
{
"epoch": 5.7,
"learning_rate": 2.7940197935686123e-07,
"loss": 11.6231,
"step": 15760
},
{
"epoch": 5.71,
"learning_rate": 2.78696664959904e-07,
"loss": 11.6703,
"step": 15780
},
{
"epoch": 5.71,
"learning_rate": 2.7799111902582693e-07,
"loss": 11.5974,
"step": 15800
},
{
"epoch": 5.72,
"learning_rate": 2.7728534724728023e-07,
"loss": 11.5836,
"step": 15820
},
{
"epoch": 5.73,
"learning_rate": 2.7657935531873606e-07,
"loss": 11.5461,
"step": 15840
},
{
"epoch": 5.74,
"learning_rate": 2.758731489364431e-07,
"loss": 11.5596,
"step": 15860
},
{
"epoch": 5.74,
"learning_rate": 2.751667337983803e-07,
"loss": 11.5528,
"step": 15880
},
{
"epoch": 5.75,
"learning_rate": 2.7446011560421087e-07,
"loss": 11.5686,
"step": 15900
},
{
"epoch": 5.76,
"learning_rate": 2.737533000552363e-07,
"loss": 11.5344,
"step": 15920
},
{
"epoch": 5.76,
"learning_rate": 2.730462928543507e-07,
"loss": 11.5748,
"step": 15940
},
{
"epoch": 5.77,
"learning_rate": 2.7233909970599426e-07,
"loss": 11.5089,
"step": 15960
},
{
"epoch": 5.78,
"learning_rate": 2.716317263161076e-07,
"loss": 11.5718,
"step": 15980
},
{
"epoch": 5.79,
"learning_rate": 2.7092417839208537e-07,
"loss": 11.4656,
"step": 16000
},
{
"epoch": 5.79,
"learning_rate": 2.7021646164273084e-07,
"loss": 11.5286,
"step": 16020
},
{
"epoch": 5.8,
"learning_rate": 2.695085817782091e-07,
"loss": 11.5443,
"step": 16040
},
{
"epoch": 5.81,
"learning_rate": 2.6880054451000144e-07,
"loss": 11.627,
"step": 16060
},
{
"epoch": 5.82,
"learning_rate": 2.6809235555085923e-07,
"loss": 11.6072,
"step": 16080
},
{
"epoch": 5.82,
"learning_rate": 2.673840206147576e-07,
"loss": 11.605,
"step": 16100
},
{
"epoch": 5.83,
"learning_rate": 2.666755454168495e-07,
"loss": 11.7521,
"step": 16120
},
{
"epoch": 5.84,
"learning_rate": 2.659669356734198e-07,
"loss": 11.6228,
"step": 16140
},
{
"epoch": 5.84,
"learning_rate": 2.6525819710183867e-07,
"loss": 11.5816,
"step": 16160
},
{
"epoch": 5.85,
"learning_rate": 2.645493354205158e-07,
"loss": 11.6014,
"step": 16180
},
{
"epoch": 5.86,
"learning_rate": 2.638403563488542e-07,
"loss": 11.5542,
"step": 16200
},
{
"epoch": 5.87,
"learning_rate": 2.6313126560720413e-07,
"loss": 11.547,
"step": 16220
},
{
"epoch": 5.87,
"learning_rate": 2.6242206891681663e-07,
"loss": 11.4594,
"step": 16240
},
{
"epoch": 5.88,
"learning_rate": 2.6171277199979785e-07,
"loss": 11.6306,
"step": 16260
},
{
"epoch": 5.89,
"learning_rate": 2.6100338057906243e-07,
"loss": 11.5441,
"step": 16280
},
{
"epoch": 5.9,
"learning_rate": 2.602939003782875e-07,
"loss": 11.5436,
"step": 16300
},
{
"epoch": 5.9,
"learning_rate": 2.5958433712186656e-07,
"loss": 11.5212,
"step": 16320
},
{
"epoch": 5.91,
"learning_rate": 2.5887469653486327e-07,
"loss": 11.4528,
"step": 16340
},
{
"epoch": 5.92,
"learning_rate": 2.5816498434296513e-07,
"loss": 11.6732,
"step": 16360
},
{
"epoch": 5.92,
"learning_rate": 2.5745520627243756e-07,
"loss": 11.6237,
"step": 16380
},
{
"epoch": 5.93,
"learning_rate": 2.567453680500774e-07,
"loss": 11.5789,
"step": 16400
},
{
"epoch": 5.94,
"learning_rate": 2.560354754031667e-07,
"loss": 11.6132,
"step": 16420
},
{
"epoch": 5.95,
"learning_rate": 2.553255340594268e-07,
"loss": 11.547,
"step": 16440
},
{
"epoch": 5.95,
"learning_rate": 2.54615549746972e-07,
"loss": 11.5666,
"step": 16460
},
{
"epoch": 5.96,
"learning_rate": 2.53905528194263e-07,
"loss": 11.6524,
"step": 16480
},
{
"epoch": 5.97,
"learning_rate": 2.5319547513006124e-07,
"loss": 11.6698,
"step": 16500
},
{
"epoch": 5.97,
"learning_rate": 2.524853962833824e-07,
"loss": 11.5607,
"step": 16520
},
{
"epoch": 5.98,
"learning_rate": 2.5177529738345005e-07,
"loss": 11.5959,
"step": 16540
},
{
"epoch": 5.99,
"learning_rate": 2.510651841596496e-07,
"loss": 11.615,
"step": 16560
},
{
"epoch": 6.0,
"learning_rate": 2.5035506234148213e-07,
"loss": 11.5368,
"step": 16580
},
{
"epoch": 6.0,
"learning_rate": 2.4964493765851795e-07,
"loss": 11.6266,
"step": 16600
},
{
"epoch": 6.01,
"learning_rate": 2.4893481584035043e-07,
"loss": 11.5118,
"step": 16620
},
{
"epoch": 6.02,
"learning_rate": 2.4822470261655e-07,
"loss": 11.4948,
"step": 16640
},
{
"epoch": 6.03,
"learning_rate": 2.475146037166176e-07,
"loss": 11.621,
"step": 16660
},
{
"epoch": 6.03,
"learning_rate": 2.4680452486993874e-07,
"loss": 11.5366,
"step": 16680
},
{
"epoch": 6.04,
"learning_rate": 2.460944718057371e-07,
"loss": 11.7582,
"step": 16700
},
{
"epoch": 6.05,
"learning_rate": 2.453844502530281e-07,
"loss": 11.5283,
"step": 16720
},
{
"epoch": 6.05,
"learning_rate": 2.446744659405732e-07,
"loss": 11.572,
"step": 16740
},
{
"epoch": 6.06,
"learning_rate": 2.439645245968333e-07,
"loss": 11.551,
"step": 16760
},
{
"epoch": 6.07,
"learning_rate": 2.432546319499226e-07,
"loss": 11.6219,
"step": 16780
},
{
"epoch": 6.08,
"learning_rate": 2.4254479372756236e-07,
"loss": 11.5473,
"step": 16800
},
{
"epoch": 6.08,
"learning_rate": 2.4183501565703485e-07,
"loss": 11.6384,
"step": 16820
},
{
"epoch": 6.09,
"learning_rate": 2.4112530346513676e-07,
"loss": 11.639,
"step": 16840
},
{
"epoch": 6.1,
"learning_rate": 2.404156628781335e-07,
"loss": 11.5192,
"step": 16860
},
{
"epoch": 6.1,
"learning_rate": 2.3970609962171255e-07,
"loss": 11.5126,
"step": 16880
},
{
"epoch": 6.11,
"learning_rate": 2.3899661942093755e-07,
"loss": 11.5228,
"step": 16900
},
{
"epoch": 6.12,
"learning_rate": 2.382872280002022e-07,
"loss": 11.5192,
"step": 16920
},
{
"epoch": 6.13,
"learning_rate": 2.3757793108318337e-07,
"loss": 11.4967,
"step": 16940
},
{
"epoch": 6.13,
"learning_rate": 2.368687343927959e-07,
"loss": 11.5445,
"step": 16960
},
{
"epoch": 6.14,
"learning_rate": 2.361596436511458e-07,
"loss": 11.4839,
"step": 16980
},
{
"epoch": 6.15,
"learning_rate": 2.354506645794842e-07,
"loss": 11.4847,
"step": 17000
},
{
"epoch": 6.16,
"learning_rate": 2.347418028981614e-07,
"loss": 11.532,
"step": 17020
},
{
"epoch": 6.16,
"learning_rate": 2.3403306432658023e-07,
"loss": 11.5848,
"step": 17040
},
{
"epoch": 6.17,
"learning_rate": 2.3332445458315048e-07,
"loss": 11.6084,
"step": 17060
},
{
"epoch": 6.18,
"learning_rate": 2.3261597938524244e-07,
"loss": 11.5482,
"step": 17080
},
{
"epoch": 6.18,
"learning_rate": 2.3190764444914078e-07,
"loss": 11.5448,
"step": 17100
},
{
"epoch": 6.19,
"learning_rate": 2.311994554899985e-07,
"loss": 11.4889,
"step": 17120
},
{
"epoch": 6.2,
"learning_rate": 2.3049141822179097e-07,
"loss": 11.5677,
"step": 17140
},
{
"epoch": 6.21,
"learning_rate": 2.2978353835726919e-07,
"loss": 11.5334,
"step": 17160
},
{
"epoch": 6.21,
"learning_rate": 2.290758216079146e-07,
"loss": 11.659,
"step": 17180
},
{
"epoch": 6.22,
"learning_rate": 2.2836827368389245e-07,
"loss": 11.5961,
"step": 17200
},
{
"epoch": 6.23,
"learning_rate": 2.276609002940057e-07,
"loss": 11.5773,
"step": 17220
},
{
"epoch": 6.24,
"learning_rate": 2.2695370714564925e-07,
"loss": 11.5673,
"step": 17240
},
{
"epoch": 6.24,
"learning_rate": 2.2624669994476368e-07,
"loss": 11.4363,
"step": 17260
},
{
"epoch": 6.25,
"learning_rate": 2.2553988439578914e-07,
"loss": 11.6368,
"step": 17280
},
{
"epoch": 6.26,
"learning_rate": 2.2483326620161975e-07,
"loss": 11.4992,
"step": 17300
},
{
"epoch": 6.26,
"learning_rate": 2.2412685106355693e-07,
"loss": 11.6233,
"step": 17320
},
{
"epoch": 6.27,
"learning_rate": 2.2342064468126395e-07,
"loss": 11.5791,
"step": 17340
},
{
"epoch": 6.28,
"learning_rate": 2.2271465275271983e-07,
"loss": 11.65,
"step": 17360
},
{
"epoch": 6.29,
"learning_rate": 2.2200888097417302e-07,
"loss": 11.5825,
"step": 17380
},
{
"epoch": 6.29,
"learning_rate": 2.21303335040096e-07,
"loss": 11.5799,
"step": 17400
},
{
"epoch": 6.3,
"learning_rate": 2.2059802064313882e-07,
"loss": 11.512,
"step": 17420
},
{
"epoch": 6.31,
"learning_rate": 2.1989294347408347e-07,
"loss": 11.4995,
"step": 17440
},
{
"epoch": 6.31,
"learning_rate": 2.1918810922179803e-07,
"loss": 11.5878,
"step": 17460
},
{
"epoch": 6.32,
"learning_rate": 2.1848352357319022e-07,
"loss": 11.55,
"step": 17480
},
{
"epoch": 6.33,
"learning_rate": 2.1777919221316232e-07,
"loss": 11.5182,
"step": 17500
},
{
"epoch": 6.34,
"learning_rate": 2.1707512082456473e-07,
"loss": 11.5603,
"step": 17520
},
{
"epoch": 6.34,
"learning_rate": 2.1637131508815027e-07,
"loss": 11.482,
"step": 17540
},
{
"epoch": 6.35,
"learning_rate": 2.1566778068252858e-07,
"loss": 11.5986,
"step": 17560
},
{
"epoch": 6.36,
"learning_rate": 2.1496452328411964e-07,
"loss": 11.552,
"step": 17580
},
{
"epoch": 6.37,
"learning_rate": 2.142615485671088e-07,
"loss": 11.6,
"step": 17600
},
{
"epoch": 6.37,
"learning_rate": 2.135588622034005e-07,
"loss": 11.5519,
"step": 17620
},
{
"epoch": 6.38,
"learning_rate": 2.128564698625726e-07,
"loss": 11.5955,
"step": 17640
},
{
"epoch": 6.39,
"learning_rate": 2.1215437721183074e-07,
"loss": 11.6683,
"step": 17660
},
{
"epoch": 6.39,
"learning_rate": 2.1145258991596245e-07,
"loss": 11.5436,
"step": 17680
},
{
"epoch": 6.4,
"learning_rate": 2.1075111363729154e-07,
"loss": 11.5688,
"step": 17700
},
{
"epoch": 6.41,
"learning_rate": 2.1004995403563224e-07,
"loss": 11.5736,
"step": 17720
},
{
"epoch": 6.42,
"learning_rate": 2.0934911676824403e-07,
"loss": 11.5119,
"step": 17740
},
{
"epoch": 6.42,
"learning_rate": 2.086486074897854e-07,
"loss": 11.5214,
"step": 17760
},
{
"epoch": 6.43,
"learning_rate": 2.0794843185226865e-07,
"loss": 11.5909,
"step": 17780
},
{
"epoch": 6.44,
"learning_rate": 2.0724859550501393e-07,
"loss": 11.5332,
"step": 17800
},
{
"epoch": 6.44,
"learning_rate": 2.0654910409460396e-07,
"loss": 11.4678,
"step": 17820
},
{
"epoch": 6.45,
"learning_rate": 2.0584996326483838e-07,
"loss": 11.6677,
"step": 17840
},
{
"epoch": 6.46,
"learning_rate": 2.0515117865668815e-07,
"loss": 11.5631,
"step": 17860
},
{
"epoch": 6.47,
"learning_rate": 2.0445275590825024e-07,
"loss": 11.5571,
"step": 17880
},
{
"epoch": 6.47,
"learning_rate": 2.0375470065470158e-07,
"loss": 11.5439,
"step": 17900
},
{
"epoch": 6.48,
"learning_rate": 2.0305701852825438e-07,
"loss": 11.5207,
"step": 17920
},
{
"epoch": 6.49,
"learning_rate": 2.0235971515811013e-07,
"loss": 11.5164,
"step": 17940
},
{
"epoch": 6.5,
"learning_rate": 2.016627961704144e-07,
"loss": 11.5409,
"step": 17960
},
{
"epoch": 6.5,
"learning_rate": 2.0096626718821143e-07,
"loss": 11.5803,
"step": 17980
},
{
"epoch": 6.51,
"learning_rate": 2.002701338313987e-07,
"loss": 11.6046,
"step": 18000
},
{
"epoch": 6.52,
"learning_rate": 1.995744017166814e-07,
"loss": 11.6438,
"step": 18020
},
{
"epoch": 6.52,
"learning_rate": 1.9887907645752765e-07,
"loss": 11.5637,
"step": 18040
},
{
"epoch": 6.53,
"learning_rate": 1.9818416366412275e-07,
"loss": 11.8021,
"step": 18060
},
{
"epoch": 6.54,
"learning_rate": 1.9748966894332404e-07,
"loss": 11.6382,
"step": 18080
},
{
"epoch": 6.55,
"learning_rate": 1.9679559789861575e-07,
"loss": 11.6082,
"step": 18100
},
{
"epoch": 6.55,
"learning_rate": 1.9610195613006343e-07,
"loss": 11.7247,
"step": 18120
},
{
"epoch": 6.56,
"learning_rate": 1.954087492342694e-07,
"loss": 11.4888,
"step": 18140
},
{
"epoch": 6.57,
"learning_rate": 1.9471598280432705e-07,
"loss": 11.5055,
"step": 18160
},
{
"epoch": 6.58,
"learning_rate": 1.9402366242977592e-07,
"loss": 11.5259,
"step": 18180
},
{
"epoch": 6.58,
"learning_rate": 1.933317936965566e-07,
"loss": 11.5196,
"step": 18200
},
{
"epoch": 6.59,
"learning_rate": 1.9264038218696576e-07,
"loss": 11.5081,
"step": 18220
},
{
"epoch": 6.6,
"learning_rate": 1.919494334796107e-07,
"loss": 11.6072,
"step": 18240
},
{
"epoch": 6.6,
"learning_rate": 1.9125895314936488e-07,
"loss": 11.5958,
"step": 18260
},
{
"epoch": 6.61,
"learning_rate": 1.905689467673226e-07,
"loss": 11.5092,
"step": 18280
},
{
"epoch": 6.62,
"learning_rate": 1.8987941990075415e-07,
"loss": 11.6284,
"step": 18300
},
{
"epoch": 6.63,
"learning_rate": 1.8919037811306104e-07,
"loss": 11.5005,
"step": 18320
},
{
"epoch": 6.63,
"learning_rate": 1.885018269637305e-07,
"loss": 11.5771,
"step": 18340
},
{
"epoch": 6.64,
"learning_rate": 1.8781377200829156e-07,
"loss": 11.5173,
"step": 18360
},
{
"epoch": 6.65,
"learning_rate": 1.8712621879826955e-07,
"loss": 11.5931,
"step": 18380
},
{
"epoch": 6.65,
"learning_rate": 1.8643917288114146e-07,
"loss": 11.5598,
"step": 18400
},
{
"epoch": 6.66,
"learning_rate": 1.8575263980029147e-07,
"loss": 11.5043,
"step": 18420
},
{
"epoch": 6.67,
"learning_rate": 1.8506662509496546e-07,
"loss": 11.6294,
"step": 18440
},
{
"epoch": 6.68,
"learning_rate": 1.8438113430022733e-07,
"loss": 11.5551,
"step": 18460
},
{
"epoch": 6.68,
"learning_rate": 1.8369617294691358e-07,
"loss": 11.4976,
"step": 18480
},
{
"epoch": 6.69,
"learning_rate": 1.8301174656158912e-07,
"loss": 11.5757,
"step": 18500
},
{
"epoch": 6.7,
"learning_rate": 1.823278606665024e-07,
"loss": 11.5967,
"step": 18520
},
{
"epoch": 6.71,
"learning_rate": 1.81644520779541e-07,
"loss": 11.5046,
"step": 18540
},
{
"epoch": 6.71,
"learning_rate": 1.8096173241418695e-07,
"loss": 11.5908,
"step": 18560
},
{
"epoch": 6.72,
"learning_rate": 1.8027950107947246e-07,
"loss": 11.548,
"step": 18580
},
{
"epoch": 6.73,
"learning_rate": 1.795978322799354e-07,
"loss": 11.4977,
"step": 18600
},
{
"epoch": 6.73,
"learning_rate": 1.7891673151557492e-07,
"loss": 11.6205,
"step": 18620
},
{
"epoch": 6.74,
"learning_rate": 1.7823620428180692e-07,
"loss": 11.63,
"step": 18640
},
{
"epoch": 6.75,
"learning_rate": 1.775562560694197e-07,
"loss": 11.5467,
"step": 18660
},
{
"epoch": 6.76,
"learning_rate": 1.7687689236452995e-07,
"loss": 11.4762,
"step": 18680
},
{
"epoch": 6.76,
"learning_rate": 1.7619811864853827e-07,
"loss": 11.6024,
"step": 18700
},
{
"epoch": 6.77,
"learning_rate": 1.75519940398085e-07,
"loss": 11.4667,
"step": 18720
},
{
"epoch": 6.78,
"learning_rate": 1.748423630850058e-07,
"loss": 11.6476,
"step": 18740
},
{
"epoch": 6.78,
"learning_rate": 1.741653921762879e-07,
"loss": 11.6168,
"step": 18760
},
{
"epoch": 6.79,
"learning_rate": 1.734890331340258e-07,
"loss": 11.5395,
"step": 18780
},
{
"epoch": 6.8,
"learning_rate": 1.728132914153771e-07,
"loss": 11.4921,
"step": 18800
},
{
"epoch": 6.81,
"learning_rate": 1.7213817247251862e-07,
"loss": 11.6036,
"step": 18820
},
{
"epoch": 6.81,
"learning_rate": 1.7146368175260234e-07,
"loss": 11.6459,
"step": 18840
},
{
"epoch": 6.82,
"learning_rate": 1.7078982469771163e-07,
"loss": 11.5501,
"step": 18860
},
{
"epoch": 6.83,
"learning_rate": 1.7011660674481676e-07,
"loss": 11.456,
"step": 18880
},
{
"epoch": 6.84,
"learning_rate": 1.6944403332573185e-07,
"loss": 11.6167,
"step": 18900
},
{
"epoch": 6.84,
"learning_rate": 1.6877210986707046e-07,
"loss": 11.5092,
"step": 18920
},
{
"epoch": 6.85,
"learning_rate": 1.6810084179020208e-07,
"loss": 11.5915,
"step": 18940
},
{
"epoch": 6.86,
"learning_rate": 1.674302345112083e-07,
"loss": 11.6085,
"step": 18960
},
{
"epoch": 6.86,
"learning_rate": 1.6676029344083885e-07,
"loss": 11.5425,
"step": 18980
},
{
"epoch": 6.87,
"learning_rate": 1.6609102398446852e-07,
"loss": 11.5581,
"step": 19000
},
{
"epoch": 6.88,
"learning_rate": 1.654224315420531e-07,
"loss": 11.5621,
"step": 19020
},
{
"epoch": 6.89,
"learning_rate": 1.6475452150808597e-07,
"loss": 11.5826,
"step": 19040
},
{
"epoch": 6.89,
"learning_rate": 1.6408729927155453e-07,
"loss": 11.5437,
"step": 19060
},
{
"epoch": 6.9,
"learning_rate": 1.6342077021589669e-07,
"loss": 11.5366,
"step": 19080
},
{
"epoch": 6.91,
"learning_rate": 1.6275493971895743e-07,
"loss": 11.5912,
"step": 19100
},
{
"epoch": 6.92,
"learning_rate": 1.620898131529457e-07,
"loss": 11.4859,
"step": 19120
},
{
"epoch": 6.92,
"learning_rate": 1.6142539588439052e-07,
"loss": 11.5597,
"step": 19140
},
{
"epoch": 6.93,
"learning_rate": 1.607616932740982e-07,
"loss": 11.4989,
"step": 19160
},
{
"epoch": 6.94,
"learning_rate": 1.600987106771089e-07,
"loss": 11.5415,
"step": 19180
},
{
"epoch": 6.94,
"learning_rate": 1.5943645344265312e-07,
"loss": 11.5759,
"step": 19200
},
{
"epoch": 6.95,
"learning_rate": 1.5877492691410913e-07,
"loss": 11.6497,
"step": 19220
},
{
"epoch": 6.96,
"learning_rate": 1.5811413642895943e-07,
"loss": 11.5942,
"step": 19240
},
{
"epoch": 6.97,
"learning_rate": 1.5745408731874776e-07,
"loss": 11.6553,
"step": 19260
},
{
"epoch": 6.97,
"learning_rate": 1.5679478490903635e-07,
"loss": 11.5545,
"step": 19280
},
{
"epoch": 6.98,
"learning_rate": 1.5613623451936232e-07,
"loss": 11.5778,
"step": 19300
},
{
"epoch": 6.99,
"learning_rate": 1.5547844146319544e-07,
"loss": 11.6732,
"step": 19320
},
{
"epoch": 6.99,
"learning_rate": 1.5482141104789504e-07,
"loss": 11.5742,
"step": 19340
},
{
"epoch": 7.0,
"learning_rate": 1.5416514857466695e-07,
"loss": 11.5561,
"step": 19360
},
{
"epoch": 7.01,
"learning_rate": 1.5350965933852104e-07,
"loss": 11.5486,
"step": 19380
},
{
"epoch": 7.02,
"learning_rate": 1.528549486282284e-07,
"loss": 11.5493,
"step": 19400
},
{
"epoch": 7.02,
"learning_rate": 1.5220102172627837e-07,
"loss": 11.5734,
"step": 19420
},
{
"epoch": 7.03,
"learning_rate": 1.515478839088365e-07,
"loss": 11.5701,
"step": 19440
},
{
"epoch": 7.04,
"learning_rate": 1.5089554044570149e-07,
"loss": 11.5146,
"step": 19460
},
{
"epoch": 7.05,
"learning_rate": 1.5024399660026294e-07,
"loss": 11.4944,
"step": 19480
},
{
"epoch": 7.05,
"learning_rate": 1.495932576294588e-07,
"loss": 11.5861,
"step": 19500
},
{
"epoch": 7.06,
"learning_rate": 1.4894332878373276e-07,
"loss": 11.4813,
"step": 19520
},
{
"epoch": 7.07,
"learning_rate": 1.4829421530699222e-07,
"loss": 11.507,
"step": 19540
},
{
"epoch": 7.07,
"learning_rate": 1.4764592243656582e-07,
"loss": 11.6154,
"step": 19560
},
{
"epoch": 7.08,
"learning_rate": 1.4699845540316123e-07,
"loss": 11.5995,
"step": 19580
},
{
"epoch": 7.09,
"learning_rate": 1.4635181943082284e-07,
"loss": 11.6338,
"step": 19600
},
{
"epoch": 7.1,
"learning_rate": 1.4570601973688966e-07,
"loss": 11.5402,
"step": 19620
},
{
"epoch": 7.1,
"learning_rate": 1.450610615319533e-07,
"loss": 11.5688,
"step": 19640
},
{
"epoch": 7.11,
"learning_rate": 1.4441695001981585e-07,
"loss": 11.6236,
"step": 19660
},
{
"epoch": 7.12,
"learning_rate": 1.437736903974479e-07,
"loss": 11.6191,
"step": 19680
},
{
"epoch": 7.12,
"learning_rate": 1.4313128785494667e-07,
"loss": 11.6409,
"step": 19700
},
{
"epoch": 7.13,
"learning_rate": 1.4248974757549415e-07,
"loss": 11.6191,
"step": 19720
},
{
"epoch": 7.14,
"learning_rate": 1.4184907473531496e-07,
"loss": 11.6084,
"step": 19740
},
{
"epoch": 7.15,
"learning_rate": 1.412092745036351e-07,
"loss": 11.5322,
"step": 19760
},
{
"epoch": 7.15,
"learning_rate": 1.405703520426399e-07,
"loss": 11.4722,
"step": 19780
},
{
"epoch": 7.16,
"learning_rate": 1.3993231250743243e-07,
"loss": 11.5774,
"step": 19800
},
{
"epoch": 7.17,
"learning_rate": 1.3929516104599202e-07,
"loss": 11.4938,
"step": 19820
},
{
"epoch": 7.18,
"learning_rate": 1.386589027991325e-07,
"loss": 11.5817,
"step": 19840
},
{
"epoch": 7.18,
"learning_rate": 1.3802354290046103e-07,
"loss": 11.5777,
"step": 19860
},
{
"epoch": 7.19,
"learning_rate": 1.3738908647633634e-07,
"loss": 11.5889,
"step": 19880
},
{
"epoch": 7.2,
"learning_rate": 1.367555386458276e-07,
"loss": 11.5573,
"step": 19900
},
{
"epoch": 7.2,
"learning_rate": 1.3612290452067322e-07,
"loss": 11.5237,
"step": 19920
},
{
"epoch": 7.21,
"learning_rate": 1.3549118920523905e-07,
"loss": 11.5466,
"step": 19940
},
{
"epoch": 7.22,
"learning_rate": 1.3486039779647793e-07,
"loss": 11.6006,
"step": 19960
},
{
"epoch": 7.23,
"learning_rate": 1.3423053538388808e-07,
"loss": 11.5048,
"step": 19980
},
{
"epoch": 7.23,
"learning_rate": 1.3360160704947221e-07,
"loss": 11.5967,
"step": 20000
},
{
"epoch": 7.24,
"learning_rate": 1.329736178676965e-07,
"loss": 11.5465,
"step": 20020
},
{
"epoch": 7.25,
"learning_rate": 1.323465729054497e-07,
"loss": 11.5456,
"step": 20040
},
{
"epoch": 7.25,
"learning_rate": 1.317204772220019e-07,
"loss": 11.6243,
"step": 20060
},
{
"epoch": 7.26,
"learning_rate": 1.3109533586896432e-07,
"loss": 11.5195,
"step": 20080
},
{
"epoch": 7.27,
"learning_rate": 1.304711538902481e-07,
"loss": 11.5885,
"step": 20100
},
{
"epoch": 7.28,
"learning_rate": 1.2984793632202373e-07,
"loss": 11.5375,
"step": 20120
},
{
"epoch": 7.28,
"learning_rate": 1.2922568819268054e-07,
"loss": 11.5209,
"step": 20140
},
{
"epoch": 7.29,
"learning_rate": 1.2860441452278574e-07,
"loss": 11.5969,
"step": 20160
},
{
"epoch": 7.3,
"learning_rate": 1.2798412032504437e-07,
"loss": 11.5644,
"step": 20180
},
{
"epoch": 7.31,
"learning_rate": 1.273648106042587e-07,
"loss": 11.4928,
"step": 20200
},
{
"epoch": 7.31,
"learning_rate": 1.2674649035728768e-07,
"loss": 11.6097,
"step": 20220
},
{
"epoch": 7.32,
"learning_rate": 1.2612916457300687e-07,
"loss": 11.5373,
"step": 20240
},
{
"epoch": 7.33,
"learning_rate": 1.2551283823226812e-07,
"loss": 11.4943,
"step": 20260
},
{
"epoch": 7.33,
"learning_rate": 1.2489751630785905e-07,
"loss": 11.5864,
"step": 20280
},
{
"epoch": 7.34,
"learning_rate": 1.2428320376446348e-07,
"loss": 11.4818,
"step": 20300
},
{
"epoch": 7.35,
"learning_rate": 1.2366990555862106e-07,
"loss": 11.5295,
"step": 20320
},
{
"epoch": 7.36,
"learning_rate": 1.2305762663868728e-07,
"loss": 11.6868,
"step": 20340
},
{
"epoch": 7.36,
"learning_rate": 1.2244637194479376e-07,
"loss": 11.5521,
"step": 20360
},
{
"epoch": 7.37,
"learning_rate": 1.2183614640880783e-07,
"loss": 11.5967,
"step": 20380
},
{
"epoch": 7.38,
"learning_rate": 1.2122695495429347e-07,
"loss": 11.7017,
"step": 20400
},
{
"epoch": 7.39,
"learning_rate": 1.2061880249647113e-07,
"loss": 11.526,
"step": 20420
},
{
"epoch": 7.39,
"learning_rate": 1.2001169394217825e-07,
"loss": 11.5515,
"step": 20440
},
{
"epoch": 7.4,
"learning_rate": 1.1940563418982959e-07,
"loss": 11.6034,
"step": 20460
},
{
"epoch": 7.41,
"learning_rate": 1.1880062812937753e-07,
"loss": 11.6085,
"step": 20480
},
{
"epoch": 7.41,
"learning_rate": 1.1819668064227303e-07,
"loss": 11.5318,
"step": 20500
},
{
"epoch": 7.42,
"learning_rate": 1.1759379660142597e-07,
"loss": 11.581,
"step": 20520
},
{
"epoch": 7.43,
"learning_rate": 1.1699198087116588e-07,
"loss": 11.4248,
"step": 20540
},
{
"epoch": 7.44,
"learning_rate": 1.1639123830720265e-07,
"loss": 11.6121,
"step": 20560
},
{
"epoch": 7.44,
"learning_rate": 1.1579157375658755e-07,
"loss": 11.5246,
"step": 20580
},
{
"epoch": 7.45,
"learning_rate": 1.151929920576737e-07,
"loss": 11.6043,
"step": 20600
},
{
"epoch": 7.46,
"learning_rate": 1.1459549804007748e-07,
"loss": 11.5643,
"step": 20620
},
{
"epoch": 7.46,
"learning_rate": 1.1399909652463943e-07,
"loss": 11.4834,
"step": 20640
},
{
"epoch": 7.47,
"learning_rate": 1.134037923233852e-07,
"loss": 11.5314,
"step": 20660
},
{
"epoch": 7.48,
"learning_rate": 1.1280959023948692e-07,
"loss": 11.5545,
"step": 20680
},
{
"epoch": 7.49,
"learning_rate": 1.122164950672243e-07,
"loss": 11.5737,
"step": 20700
},
{
"epoch": 7.49,
"learning_rate": 1.1162451159194614e-07,
"loss": 11.5333,
"step": 20720
},
{
"epoch": 7.5,
"learning_rate": 1.1103364459003126e-07,
"loss": 11.543,
"step": 20740
},
{
"epoch": 7.51,
"learning_rate": 1.1044389882885058e-07,
"loss": 11.657,
"step": 20760
},
{
"epoch": 7.52,
"learning_rate": 1.0985527906672834e-07,
"loss": 11.5753,
"step": 20780
},
{
"epoch": 7.52,
"learning_rate": 1.0926779005290365e-07,
"loss": 11.6339,
"step": 20800
},
{
"epoch": 7.53,
"learning_rate": 1.0868143652749228e-07,
"loss": 11.5556,
"step": 20820
},
{
"epoch": 7.54,
"learning_rate": 1.0809622322144843e-07,
"loss": 11.6215,
"step": 20840
},
{
"epoch": 7.54,
"learning_rate": 1.0751215485652643e-07,
"loss": 11.7268,
"step": 20860
},
{
"epoch": 7.55,
"learning_rate": 1.0692923614524279e-07,
"loss": 11.4722,
"step": 20880
},
{
"epoch": 7.56,
"learning_rate": 1.063474717908382e-07,
"loss": 11.5266,
"step": 20900
},
{
"epoch": 7.57,
"learning_rate": 1.0576686648723923e-07,
"loss": 11.5344,
"step": 20920
},
{
"epoch": 7.57,
"learning_rate": 1.0518742491902097e-07,
"loss": 11.6412,
"step": 20940
},
{
"epoch": 7.58,
"learning_rate": 1.0460915176136892e-07,
"loss": 11.5482,
"step": 20960
},
{
"epoch": 7.59,
"learning_rate": 1.0403205168004132e-07,
"loss": 11.5908,
"step": 20980
},
{
"epoch": 7.59,
"learning_rate": 1.0345612933133166e-07,
"loss": 11.5425,
"step": 21000
},
{
"epoch": 7.6,
"learning_rate": 1.0288138936203067e-07,
"loss": 11.6107,
"step": 21020
},
{
"epoch": 7.61,
"learning_rate": 1.0230783640938936e-07,
"loss": 11.5566,
"step": 21040
},
{
"epoch": 7.62,
"learning_rate": 1.0173547510108136e-07,
"loss": 11.6423,
"step": 21060
},
{
"epoch": 7.62,
"learning_rate": 1.0116431005516557e-07,
"loss": 11.642,
"step": 21080
},
{
"epoch": 7.63,
"learning_rate": 1.0059434588004897e-07,
"loss": 11.6133,
"step": 21100
},
{
"epoch": 7.64,
"learning_rate": 1.0002558717444922e-07,
"loss": 11.5334,
"step": 21120
},
{
"epoch": 7.65,
"learning_rate": 9.945803852735793e-08,
"loss": 11.5411,
"step": 21140
},
{
"epoch": 7.65,
"learning_rate": 9.889170451800332e-08,
"loss": 11.6101,
"step": 21160
},
{
"epoch": 7.66,
"learning_rate": 9.832658971581346e-08,
"loss": 11.4722,
"step": 21180
},
{
"epoch": 7.67,
"learning_rate": 9.776269868037928e-08,
"loss": 11.5552,
"step": 21200
},
{
"epoch": 7.67,
"learning_rate": 9.720003596141796e-08,
"loss": 11.6173,
"step": 21220
},
{
"epoch": 7.68,
"learning_rate": 9.663860609873575e-08,
"loss": 11.5692,
"step": 21240
},
{
"epoch": 7.69,
"learning_rate": 9.607841362219207e-08,
"loss": 11.5422,
"step": 21260
},
{
"epoch": 7.7,
"learning_rate": 9.551946305166233e-08,
"loss": 11.6599,
"step": 21280
},
{
"epoch": 7.7,
"learning_rate": 9.496175889700184e-08,
"loss": 11.4644,
"step": 21300
},
{
"epoch": 7.71,
"learning_rate": 9.440530565800927e-08,
"loss": 11.5522,
"step": 21320
},
{
"epoch": 7.72,
"learning_rate": 9.385010782439018e-08,
"loss": 11.509,
"step": 21340
},
{
"epoch": 7.73,
"learning_rate": 9.329616987572122e-08,
"loss": 11.5604,
"step": 21360
},
{
"epoch": 7.73,
"learning_rate": 9.274349628141359e-08,
"loss": 11.6646,
"step": 21380
},
{
"epoch": 7.74,
"learning_rate": 9.219209150067725e-08,
"loss": 11.5983,
"step": 21400
},
{
"epoch": 7.75,
"learning_rate": 9.164195998248469e-08,
"loss": 11.5241,
"step": 21420
},
{
"epoch": 7.75,
"learning_rate": 9.109310616553534e-08,
"loss": 11.5351,
"step": 21440
},
{
"epoch": 7.76,
"learning_rate": 9.05455344782193e-08,
"loss": 11.5137,
"step": 21460
},
{
"epoch": 7.77,
"learning_rate": 8.999924933858219e-08,
"loss": 11.6381,
"step": 21480
},
{
"epoch": 7.78,
"learning_rate": 8.945425515428904e-08,
"loss": 11.5419,
"step": 21500
},
{
"epoch": 7.78,
"learning_rate": 8.891055632258892e-08,
"loss": 11.5143,
"step": 21520
},
{
"epoch": 7.79,
"learning_rate": 8.836815723027957e-08,
"loss": 11.5709,
"step": 21540
},
{
"epoch": 7.8,
"learning_rate": 8.78270622536716e-08,
"loss": 11.6181,
"step": 21560
},
{
"epoch": 7.8,
"learning_rate": 8.728727575855363e-08,
"loss": 11.5301,
"step": 21580
},
{
"epoch": 7.81,
"learning_rate": 8.67488021001569e-08,
"loss": 11.6804,
"step": 21600
},
{
"epoch": 7.82,
"learning_rate": 8.621164562312003e-08,
"loss": 11.4996,
"step": 21620
},
{
"epoch": 7.83,
"learning_rate": 8.567581066145413e-08,
"loss": 11.546,
"step": 21640
},
{
"epoch": 7.83,
"learning_rate": 8.514130153850768e-08,
"loss": 11.6006,
"step": 21660
},
{
"epoch": 7.84,
"learning_rate": 8.460812256693178e-08,
"loss": 11.627,
"step": 21680
},
{
"epoch": 7.85,
"learning_rate": 8.407627804864523e-08,
"loss": 11.5404,
"step": 21700
},
{
"epoch": 7.86,
"learning_rate": 8.354577227479995e-08,
"loss": 11.6296,
"step": 21720
},
{
"epoch": 7.86,
"learning_rate": 8.301660952574633e-08,
"loss": 11.5375,
"step": 21740
},
{
"epoch": 7.87,
"learning_rate": 8.24887940709984e-08,
"loss": 11.5262,
"step": 21760
},
{
"epoch": 7.88,
"learning_rate": 8.19623301691999e-08,
"loss": 11.5798,
"step": 21780
},
{
"epoch": 7.88,
"learning_rate": 8.143722206808959e-08,
"loss": 11.5828,
"step": 21800
},
{
"epoch": 7.89,
"learning_rate": 8.091347400446702e-08,
"loss": 11.5238,
"step": 21820
},
{
"epoch": 7.9,
"learning_rate": 8.039109020415838e-08,
"loss": 11.5729,
"step": 21840
},
{
"epoch": 7.91,
"learning_rate": 7.987007488198244e-08,
"loss": 11.5649,
"step": 21860
},
{
"epoch": 7.91,
"learning_rate": 7.935043224171631e-08,
"loss": 11.5577,
"step": 21880
},
{
"epoch": 7.92,
"learning_rate": 7.883216647606192e-08,
"loss": 11.5743,
"step": 21900
},
{
"epoch": 7.93,
"learning_rate": 7.831528176661189e-08,
"loss": 11.5481,
"step": 21920
},
{
"epoch": 7.93,
"learning_rate": 7.77997822838159e-08,
"loss": 11.5684,
"step": 21940
},
{
"epoch": 7.94,
"learning_rate": 7.728567218694706e-08,
"loss": 11.5378,
"step": 21960
},
{
"epoch": 7.95,
"learning_rate": 7.677295562406812e-08,
"loss": 11.5941,
"step": 21980
},
{
"epoch": 7.96,
"learning_rate": 7.626163673199848e-08,
"loss": 11.5268,
"step": 22000
},
{
"epoch": 7.96,
"learning_rate": 7.57517196362803e-08,
"loss": 11.5047,
"step": 22020
},
{
"epoch": 7.97,
"learning_rate": 7.524320845114557e-08,
"loss": 11.5994,
"step": 22040
},
{
"epoch": 7.98,
"learning_rate": 7.473610727948271e-08,
"loss": 11.5535,
"step": 22060
},
{
"epoch": 7.99,
"learning_rate": 7.423042021280369e-08,
"loss": 11.6226,
"step": 22080
},
{
"epoch": 7.99,
"learning_rate": 7.372615133121057e-08,
"loss": 11.6075,
"step": 22100
},
{
"epoch": 8.0,
"learning_rate": 7.322330470336313e-08,
"loss": 11.4413,
"step": 22120
},
{
"epoch": 8.01,
"learning_rate": 7.272188438644574e-08,
"loss": 11.5181,
"step": 22140
},
{
"epoch": 8.01,
"learning_rate": 7.222189442613464e-08,
"loss": 11.5709,
"step": 22160
},
{
"epoch": 8.02,
"learning_rate": 7.172333885656542e-08,
"loss": 11.53,
"step": 22180
},
{
"epoch": 8.03,
"learning_rate": 7.122622170030016e-08,
"loss": 11.5248,
"step": 22200
},
{
"epoch": 8.04,
"learning_rate": 7.073054696829545e-08,
"loss": 11.573,
"step": 22220
},
{
"epoch": 8.04,
"learning_rate": 7.023631865986965e-08,
"loss": 11.6162,
"step": 22240
},
{
"epoch": 8.05,
"learning_rate": 6.97435407626708e-08,
"loss": 11.5448,
"step": 22260
},
{
"epoch": 8.06,
"learning_rate": 6.925221725264436e-08,
"loss": 11.5652,
"step": 22280
},
{
"epoch": 8.07,
"learning_rate": 6.876235209400123e-08,
"loss": 11.6287,
"step": 22300
},
{
"epoch": 8.07,
"learning_rate": 6.827394923918553e-08,
"loss": 11.6471,
"step": 22320
},
{
"epoch": 8.08,
"learning_rate": 6.778701262884304e-08,
"loss": 11.5994,
"step": 22340
},
{
"epoch": 8.09,
"learning_rate": 6.730154619178918e-08,
"loss": 11.5979,
"step": 22360
},
{
"epoch": 8.09,
"learning_rate": 6.681755384497748e-08,
"loss": 11.5242,
"step": 22380
},
{
"epoch": 8.1,
"learning_rate": 6.633503949346775e-08,
"loss": 11.6002,
"step": 22400
},
{
"epoch": 8.11,
"learning_rate": 6.585400703039465e-08,
"loss": 11.4928,
"step": 22420
},
{
"epoch": 8.12,
"learning_rate": 6.537446033693645e-08,
"loss": 11.5,
"step": 22440
},
{
"epoch": 8.12,
"learning_rate": 6.489640328228354e-08,
"loss": 11.453,
"step": 22460
},
{
"epoch": 8.13,
"learning_rate": 6.441983972360729e-08,
"loss": 11.5869,
"step": 22480
},
{
"epoch": 8.14,
"learning_rate": 6.39447735060288e-08,
"loss": 11.5977,
"step": 22500
},
{
"epoch": 8.14,
"learning_rate": 6.347120846258818e-08,
"loss": 11.6181,
"step": 22520
},
{
"epoch": 8.15,
"learning_rate": 6.299914841421309e-08,
"loss": 11.585,
"step": 22540
},
{
"epoch": 8.16,
"learning_rate": 6.25285971696885e-08,
"loss": 11.6489,
"step": 22560
},
{
"epoch": 8.17,
"learning_rate": 6.20595585256256e-08,
"loss": 11.5211,
"step": 22580
},
{
"epoch": 8.17,
"learning_rate": 6.159203626643137e-08,
"loss": 11.6789,
"step": 22600
},
{
"epoch": 8.18,
"learning_rate": 6.112603416427776e-08,
"loss": 11.5727,
"step": 22620
},
{
"epoch": 8.19,
"learning_rate": 6.066155597907157e-08,
"loss": 11.5466,
"step": 22640
},
{
"epoch": 8.2,
"learning_rate": 6.019860545842392e-08,
"loss": 11.6394,
"step": 22660
},
{
"epoch": 8.2,
"learning_rate": 5.973718633762015e-08,
"loss": 11.4351,
"step": 22680
},
{
"epoch": 8.21,
"learning_rate": 5.927730233958947e-08,
"loss": 11.5098,
"step": 22700
},
{
"epoch": 8.22,
"learning_rate": 5.881895717487523e-08,
"loss": 11.5179,
"step": 22720
},
{
"epoch": 8.22,
"learning_rate": 5.836215454160453e-08,
"loss": 11.5002,
"step": 22740
},
{
"epoch": 8.23,
"learning_rate": 5.7906898125458984e-08,
"loss": 11.5277,
"step": 22760
},
{
"epoch": 8.24,
"learning_rate": 5.7453191599644405e-08,
"loss": 11.567,
"step": 22780
},
{
"epoch": 8.25,
"learning_rate": 5.700103862486158e-08,
"loss": 11.674,
"step": 22800
},
{
"epoch": 8.25,
"learning_rate": 5.655044284927657e-08,
"loss": 11.5012,
"step": 22820
},
{
"epoch": 8.26,
"learning_rate": 5.610140790849108e-08,
"loss": 11.4852,
"step": 22840
},
{
"epoch": 8.27,
"learning_rate": 5.5653937425513526e-08,
"loss": 11.5356,
"step": 22860
},
{
"epoch": 8.27,
"learning_rate": 5.520803501072954e-08,
"loss": 11.5343,
"step": 22880
},
{
"epoch": 8.28,
"learning_rate": 5.4763704261872906e-08,
"loss": 11.6046,
"step": 22900
},
{
"epoch": 8.29,
"learning_rate": 5.432094876399654e-08,
"loss": 11.5436,
"step": 22920
},
{
"epoch": 8.3,
"learning_rate": 5.387977208944355e-08,
"loss": 11.5449,
"step": 22940
},
{
"epoch": 8.3,
"learning_rate": 5.344017779781834e-08,
"loss": 11.5867,
"step": 22960
},
{
"epoch": 8.31,
"learning_rate": 5.3002169435958e-08,
"loss": 11.714,
"step": 22980
},
{
"epoch": 8.32,
"learning_rate": 5.2565750537903716e-08,
"loss": 11.5761,
"step": 23000
},
{
"epoch": 8.33,
"learning_rate": 5.213092462487215e-08,
"loss": 11.5768,
"step": 23020
},
{
"epoch": 8.33,
"learning_rate": 5.1697695205227126e-08,
"loss": 11.724,
"step": 23040
},
{
"epoch": 8.34,
"learning_rate": 5.1266065774451086e-08,
"loss": 11.5739,
"step": 23060
},
{
"epoch": 8.35,
"learning_rate": 5.0836039815117224e-08,
"loss": 11.4955,
"step": 23080
},
{
"epoch": 8.35,
"learning_rate": 5.040762079686123e-08,
"loss": 11.5552,
"step": 23100
},
{
"epoch": 8.36,
"learning_rate": 4.9980812176353274e-08,
"loss": 11.5925,
"step": 23120
},
{
"epoch": 8.37,
"learning_rate": 4.955561739727013e-08,
"loss": 11.5741,
"step": 23140
},
{
"epoch": 8.38,
"learning_rate": 4.9132039890267456e-08,
"loss": 11.5445,
"step": 23160
},
{
"epoch": 8.38,
"learning_rate": 4.8710083072951904e-08,
"loss": 11.5156,
"step": 23180
},
{
"epoch": 8.39,
"learning_rate": 4.8289750349853834e-08,
"loss": 11.5152,
"step": 23200
},
{
"epoch": 8.4,
"learning_rate": 4.7871045112399674e-08,
"loss": 11.4828,
"step": 23220
},
{
"epoch": 8.41,
"learning_rate": 4.745397073888463e-08,
"loss": 11.4675,
"step": 23240
},
{
"epoch": 8.41,
"learning_rate": 4.703853059444543e-08,
"loss": 11.4411,
"step": 23260
},
{
"epoch": 8.42,
"learning_rate": 4.66247280310329e-08,
"loss": 11.5138,
"step": 23280
},
{
"epoch": 8.43,
"learning_rate": 4.621256638738541e-08,
"loss": 11.5501,
"step": 23300
},
{
"epoch": 8.43,
"learning_rate": 4.5802048989001556e-08,
"loss": 11.5232,
"step": 23320
},
{
"epoch": 8.44,
"learning_rate": 4.539317914811353e-08,
"loss": 11.5105,
"step": 23340
},
{
"epoch": 8.45,
"learning_rate": 4.498596016366027e-08,
"loss": 11.5695,
"step": 23360
},
{
"epoch": 8.46,
"learning_rate": 4.458039532126082e-08,
"loss": 11.6165,
"step": 23380
},
{
"epoch": 8.46,
"learning_rate": 4.4176487893187956e-08,
"loss": 11.4926,
"step": 23400
},
{
"epoch": 8.47,
"learning_rate": 4.3774241138341775e-08,
"loss": 11.6075,
"step": 23420
},
{
"epoch": 8.48,
"learning_rate": 4.3373658302223253e-08,
"loss": 11.6337,
"step": 23440
},
{
"epoch": 8.48,
"learning_rate": 4.29747426169082e-08,
"loss": 11.565,
"step": 23460
},
{
"epoch": 8.49,
"learning_rate": 4.257749730102112e-08,
"loss": 11.6841,
"step": 23480
},
{
"epoch": 8.5,
"learning_rate": 4.218192555970923e-08,
"loss": 11.5039,
"step": 23500
},
{
"epoch": 8.51,
"learning_rate": 4.1788030584616634e-08,
"loss": 11.539,
"step": 23520
},
{
"epoch": 8.51,
"learning_rate": 4.1395815553858624e-08,
"loss": 11.5355,
"step": 23540
},
{
"epoch": 8.52,
"learning_rate": 4.1005283631995816e-08,
"loss": 11.6099,
"step": 23560
},
{
"epoch": 8.53,
"learning_rate": 4.061643797000894e-08,
"loss": 11.6437,
"step": 23580
},
{
"epoch": 8.54,
"learning_rate": 4.022928170527315e-08,
"loss": 11.5887,
"step": 23600
},
{
"epoch": 8.54,
"learning_rate": 3.984381796153288e-08,
"loss": 11.4992,
"step": 23620
},
{
"epoch": 8.55,
"learning_rate": 3.946004984887652e-08,
"loss": 11.5405,
"step": 23640
},
{
"epoch": 8.56,
"learning_rate": 3.9077980463711384e-08,
"loss": 11.6498,
"step": 23660
},
{
"epoch": 8.56,
"learning_rate": 3.869761288873882e-08,
"loss": 11.6689,
"step": 23680
},
{
"epoch": 8.57,
"learning_rate": 3.831895019292897e-08,
"loss": 11.5665,
"step": 23700
},
{
"epoch": 8.58,
"learning_rate": 3.7941995431496535e-08,
"loss": 11.5487,
"step": 23720
},
{
"epoch": 8.59,
"learning_rate": 3.7566751645875776e-08,
"loss": 11.5458,
"step": 23740
},
{
"epoch": 8.59,
"learning_rate": 3.7193221863696026e-08,
"loss": 11.5934,
"step": 23760
},
{
"epoch": 8.6,
"learning_rate": 3.6821409098757336e-08,
"loss": 11.5329,
"step": 23780
},
{
"epoch": 8.61,
"learning_rate": 3.6451316351006153e-08,
"loss": 11.4771,
"step": 23800
},
{
"epoch": 8.61,
"learning_rate": 3.6082946606510956e-08,
"loss": 11.5533,
"step": 23820
},
{
"epoch": 8.62,
"learning_rate": 3.571630283743837e-08,
"loss": 11.4931,
"step": 23840
},
{
"epoch": 8.63,
"learning_rate": 3.535138800202911e-08,
"loss": 11.5671,
"step": 23860
},
{
"epoch": 8.64,
"learning_rate": 3.498820504457414e-08,
"loss": 11.5643,
"step": 23880
},
{
"epoch": 8.64,
"learning_rate": 3.4626756895390824e-08,
"loss": 11.507,
"step": 23900
},
{
"epoch": 8.65,
"learning_rate": 3.426704647079928e-08,
"loss": 11.5686,
"step": 23920
},
{
"epoch": 8.66,
"learning_rate": 3.390907667309906e-08,
"loss": 11.6062,
"step": 23940
},
{
"epoch": 8.67,
"learning_rate": 3.355285039054545e-08,
"loss": 11.5904,
"step": 23960
},
{
"epoch": 8.67,
"learning_rate": 3.3198370497326405e-08,
"loss": 11.4588,
"step": 23980
},
{
"epoch": 8.68,
"learning_rate": 3.284563985353925e-08,
"loss": 11.4522,
"step": 24000
},
{
"epoch": 8.69,
"learning_rate": 3.249466130516745e-08,
"loss": 11.5236,
"step": 24020
},
{
"epoch": 8.69,
"learning_rate": 3.2145437684058e-08,
"loss": 11.5657,
"step": 24040
},
{
"epoch": 8.7,
"learning_rate": 3.179797180789831e-08,
"loss": 11.6823,
"step": 24060
},
{
"epoch": 8.71,
"learning_rate": 3.14522664801935e-08,
"loss": 11.5531,
"step": 24080
},
{
"epoch": 8.72,
"learning_rate": 3.1108324490243864e-08,
"loss": 11.5316,
"step": 24100
},
{
"epoch": 8.72,
"learning_rate": 3.07661486131224e-08,
"loss": 11.6228,
"step": 24120
},
{
"epoch": 8.73,
"learning_rate": 3.0425741609652166e-08,
"loss": 11.5026,
"step": 24140
},
{
"epoch": 8.74,
"learning_rate": 3.00871062263843e-08,
"loss": 11.5837,
"step": 24160
},
{
"epoch": 8.75,
"learning_rate": 2.9750245195575703e-08,
"loss": 11.4998,
"step": 24180
},
{
"epoch": 8.75,
"learning_rate": 2.94151612351671e-08,
"loss": 11.5767,
"step": 24200
},
{
"epoch": 8.76,
"learning_rate": 2.908185704876101e-08,
"loss": 11.5251,
"step": 24220
},
{
"epoch": 8.77,
"learning_rate": 2.875033532559984e-08,
"loss": 11.5134,
"step": 24240
},
{
"epoch": 8.77,
"learning_rate": 2.8420598740544476e-08,
"loss": 11.5617,
"step": 24260
},
{
"epoch": 8.78,
"learning_rate": 2.8092649954052473e-08,
"loss": 11.6258,
"step": 24280
},
{
"epoch": 8.79,
"learning_rate": 2.7766491612156663e-08,
"loss": 11.563,
"step": 24300
},
{
"epoch": 8.8,
"learning_rate": 2.74421263464438e-08,
"loss": 11.5837,
"step": 24320
},
{
"epoch": 8.8,
"learning_rate": 2.7119556774033327e-08,
"loss": 11.554,
"step": 24340
},
{
"epoch": 8.81,
"learning_rate": 2.679878549755618e-08,
"loss": 11.5175,
"step": 24360
},
{
"epoch": 8.82,
"learning_rate": 2.6479815105133974e-08,
"loss": 11.5168,
"step": 24380
},
{
"epoch": 8.82,
"learning_rate": 2.616264817035793e-08,
"loss": 11.5535,
"step": 24400
},
{
"epoch": 8.83,
"learning_rate": 2.5847287252268228e-08,
"loss": 11.5774,
"step": 24420
},
{
"epoch": 8.84,
"learning_rate": 2.5533734895333363e-08,
"loss": 11.6685,
"step": 24440
},
{
"epoch": 8.85,
"learning_rate": 2.5221993629429505e-08,
"loss": 11.6426,
"step": 24460
},
{
"epoch": 8.85,
"learning_rate": 2.4912065969820206e-08,
"loss": 11.6459,
"step": 24480
},
{
"epoch": 8.86,
"learning_rate": 2.4603954417136e-08,
"loss": 11.5111,
"step": 24500
},
{
"epoch": 8.87,
"learning_rate": 2.4297661457354346e-08,
"loss": 11.5756,
"step": 24520
},
{
"epoch": 8.88,
"learning_rate": 2.3993189561779537e-08,
"loss": 11.508,
"step": 24540
},
{
"epoch": 8.88,
"learning_rate": 2.3690541187022545e-08,
"loss": 11.5202,
"step": 24560
},
{
"epoch": 8.89,
"learning_rate": 2.338971877498161e-08,
"loss": 11.6627,
"step": 24580
},
{
"epoch": 8.9,
"learning_rate": 2.30907247528222e-08,
"loss": 11.6358,
"step": 24600
},
{
"epoch": 8.9,
"learning_rate": 2.2793561532957555e-08,
"loss": 11.5407,
"step": 24620
},
{
"epoch": 8.91,
"learning_rate": 2.2498231513029236e-08,
"loss": 11.6465,
"step": 24640
},
{
"epoch": 8.92,
"learning_rate": 2.220473707588777e-08,
"loss": 11.5609,
"step": 24660
},
{
"epoch": 8.93,
"learning_rate": 2.191308058957328e-08,
"loss": 11.47,
"step": 24680
},
{
"epoch": 8.93,
"learning_rate": 2.1623264407296642e-08,
"loss": 11.5498,
"step": 24700
},
{
"epoch": 8.94,
"learning_rate": 2.1335290867420337e-08,
"loss": 11.5768,
"step": 24720
},
{
"epoch": 8.95,
"learning_rate": 2.1049162293439587e-08,
"loss": 11.5281,
"step": 24740
},
{
"epoch": 8.95,
"learning_rate": 2.0764880993963675e-08,
"loss": 11.5445,
"step": 24760
},
{
"epoch": 8.96,
"learning_rate": 2.0482449262697126e-08,
"loss": 11.5208,
"step": 24780
},
{
"epoch": 8.97,
"learning_rate": 2.0201869378421497e-08,
"loss": 11.6154,
"step": 24800
},
{
"epoch": 8.98,
"learning_rate": 1.9923143604976823e-08,
"loss": 11.6076,
"step": 24820
},
{
"epoch": 8.98,
"learning_rate": 1.9646274191243318e-08,
"loss": 11.6224,
"step": 24840
},
{
"epoch": 8.99,
"learning_rate": 1.937126337112338e-08,
"loss": 11.4927,
"step": 24860
},
{
"epoch": 9.0,
"learning_rate": 1.909811336352332e-08,
"loss": 11.5288,
"step": 24880
},
{
"epoch": 9.01,
"learning_rate": 1.882682637233568e-08,
"loss": 11.5851,
"step": 24900
},
{
"epoch": 9.01,
"learning_rate": 1.855740458642141e-08,
"loss": 11.5949,
"step": 24920
},
{
"epoch": 9.02,
"learning_rate": 1.828985017959217e-08,
"loss": 11.7067,
"step": 24940
},
{
"epoch": 9.03,
"learning_rate": 1.8024165310592754e-08,
"loss": 11.5169,
"step": 24960
},
{
"epoch": 9.03,
"learning_rate": 1.7760352123083798e-08,
"loss": 11.6754,
"step": 24980
},
{
"epoch": 9.04,
"learning_rate": 1.749841274562422e-08,
"loss": 11.5465,
"step": 25000
},
{
"epoch": 9.05,
"learning_rate": 1.7238349291654435e-08,
"loss": 11.563,
"step": 25020
},
{
"epoch": 9.06,
"learning_rate": 1.6980163859479007e-08,
"loss": 11.4924,
"step": 25040
},
{
"epoch": 9.06,
"learning_rate": 1.6723858532249778e-08,
"loss": 11.4883,
"step": 25060
},
{
"epoch": 9.07,
"learning_rate": 1.6469435377949175e-08,
"loss": 11.5703,
"step": 25080
},
{
"epoch": 9.08,
"learning_rate": 1.6216896449373295e-08,
"loss": 11.5845,
"step": 25100
},
{
"epoch": 9.08,
"learning_rate": 1.596624378411565e-08,
"loss": 11.5394,
"step": 25120
},
{
"epoch": 9.09,
"learning_rate": 1.5717479404550455e-08,
"loss": 11.4934,
"step": 25140
},
{
"epoch": 9.1,
"learning_rate": 1.5470605317816436e-08,
"loss": 11.4668,
"step": 25160
},
{
"epoch": 9.11,
"learning_rate": 1.5225623515800673e-08,
"loss": 11.724,
"step": 25180
},
{
"epoch": 9.11,
"learning_rate": 1.4982535975122474e-08,
"loss": 11.5385,
"step": 25200
},
{
"epoch": 9.12,
"learning_rate": 1.4741344657117238e-08,
"loss": 11.615,
"step": 25220
},
{
"epoch": 9.13,
"learning_rate": 1.4502051507821106e-08,
"loss": 11.5736,
"step": 25240
},
{
"epoch": 9.14,
"learning_rate": 1.4264658457954743e-08,
"loss": 11.5491,
"step": 25260
},
{
"epoch": 9.14,
"learning_rate": 1.4029167422908105e-08,
"loss": 11.5916,
"step": 25280
},
{
"epoch": 9.15,
"learning_rate": 1.3795580302724874e-08,
"loss": 11.6231,
"step": 25300
},
{
"epoch": 9.16,
"learning_rate": 1.3563898982087069e-08,
"loss": 11.5331,
"step": 25320
},
{
"epoch": 9.16,
"learning_rate": 1.3334125330299928e-08,
"loss": 11.514,
"step": 25340
},
{
"epoch": 9.17,
"learning_rate": 1.3106261201276724e-08,
"loss": 11.6518,
"step": 25360
},
{
"epoch": 9.18,
"learning_rate": 1.2880308433523945e-08,
"loss": 11.6731,
"step": 25380
},
{
"epoch": 9.19,
"learning_rate": 1.2656268850126411e-08,
"loss": 11.49,
"step": 25400
},
{
"epoch": 9.19,
"learning_rate": 1.243414425873246e-08,
"loss": 11.5648,
"step": 25420
},
{
"epoch": 9.2,
"learning_rate": 1.221393645153948e-08,
"loss": 11.6055,
"step": 25440
},
{
"epoch": 9.21,
"learning_rate": 1.1995647205279457e-08,
"loss": 11.5351,
"step": 25460
},
{
"epoch": 9.22,
"learning_rate": 1.1779278281204536e-08,
"loss": 11.5114,
"step": 25480
},
{
"epoch": 9.22,
"learning_rate": 1.156483142507289e-08,
"loss": 11.5505,
"step": 25500
},
{
"epoch": 9.23,
"learning_rate": 1.135230836713466e-08,
"loss": 11.596,
"step": 25520
},
{
"epoch": 9.24,
"learning_rate": 1.1141710822117872e-08,
"loss": 11.8107,
"step": 25540
},
{
"epoch": 9.24,
"learning_rate": 1.0933040489214674e-08,
"loss": 11.6673,
"step": 25560
},
{
"epoch": 9.25,
"learning_rate": 1.0726299052067761e-08,
"loss": 11.5999,
"step": 25580
},
{
"epoch": 9.26,
"learning_rate": 1.0521488178756532e-08,
"loss": 11.5096,
"step": 25600
},
{
"epoch": 9.27,
"learning_rate": 1.0318609521783817e-08,
"loss": 11.5442,
"step": 25620
},
{
"epoch": 9.27,
"learning_rate": 1.0117664718062469e-08,
"loss": 11.472,
"step": 25640
},
{
"epoch": 9.28,
"learning_rate": 9.918655388902158e-09,
"loss": 11.501,
"step": 25660
},
{
"epoch": 9.29,
"learning_rate": 9.721583139996382e-09,
"loss": 11.6153,
"step": 25680
},
{
"epoch": 9.29,
"learning_rate": 9.526449561409356e-09,
"loss": 11.5813,
"step": 25700
},
{
"epoch": 9.3,
"learning_rate": 9.333256227563341e-09,
"loss": 11.5631,
"step": 25720
},
{
"epoch": 9.31,
"learning_rate": 9.14200469722573e-09,
"loss": 11.6075,
"step": 25740
},
{
"epoch": 9.32,
"learning_rate": 8.952696513496755e-09,
"loss": 11.5646,
"step": 25760
},
{
"epoch": 9.32,
"learning_rate": 8.76533320379677e-09,
"loss": 11.6741,
"step": 25780
},
{
"epoch": 9.33,
"learning_rate": 8.57991627985416e-09,
"loss": 11.5443,
"step": 25800
},
{
"epoch": 9.34,
"learning_rate": 8.396447237692921e-09,
"loss": 11.505,
"step": 25820
},
{
"epoch": 9.35,
"learning_rate": 8.214927557620766e-09,
"loss": 11.5103,
"step": 25840
},
{
"epoch": 9.35,
"learning_rate": 8.035358704217039e-09,
"loss": 11.5984,
"step": 25860
},
{
"epoch": 9.36,
"learning_rate": 7.85774212632101e-09,
"loss": 11.5731,
"step": 25880
},
{
"epoch": 9.37,
"learning_rate": 7.682079257020163e-09,
"loss": 11.5109,
"step": 25900
},
{
"epoch": 9.37,
"learning_rate": 7.508371513638618e-09,
"loss": 11.5923,
"step": 25920
},
{
"epoch": 9.38,
"learning_rate": 7.336620297725666e-09,
"loss": 11.5269,
"step": 25940
},
{
"epoch": 9.39,
"learning_rate": 7.1668269950444784e-09,
"loss": 11.5616,
"step": 25960
},
{
"epoch": 9.4,
"learning_rate": 6.998992975560919e-09,
"loss": 11.5697,
"step": 25980
},
{
"epoch": 9.4,
"learning_rate": 6.833119593432607e-09,
"loss": 11.5153,
"step": 26000
},
{
"epoch": 9.41,
"learning_rate": 6.66920818699776e-09,
"loss": 11.5417,
"step": 26020
},
{
"epoch": 9.42,
"learning_rate": 6.50726007876462e-09,
"loss": 11.5555,
"step": 26040
},
{
"epoch": 9.42,
"learning_rate": 6.347276575400628e-09,
"loss": 11.6729,
"step": 26060
},
{
"epoch": 9.43,
"learning_rate": 6.189258967721989e-09,
"loss": 11.5967,
"step": 26080
},
{
"epoch": 9.44,
"learning_rate": 6.033208530683204e-09,
"loss": 11.5563,
"step": 26100
},
{
"epoch": 9.45,
"learning_rate": 5.879126523366751e-09,
"loss": 11.6089,
"step": 26120
},
{
"epoch": 9.45,
"learning_rate": 5.727014188972979e-09,
"loss": 11.5541,
"step": 26140
},
{
"epoch": 9.46,
"learning_rate": 5.576872754810113e-09,
"loss": 11.5303,
"step": 26160
},
{
"epoch": 9.47,
"learning_rate": 5.428703432284243e-09,
"loss": 11.5068,
"step": 26180
},
{
"epoch": 9.48,
"learning_rate": 5.282507416889625e-09,
"loss": 11.4943,
"step": 26200
},
{
"epoch": 9.48,
"learning_rate": 5.138285888199007e-09,
"loss": 11.5372,
"step": 26220
},
{
"epoch": 9.49,
"learning_rate": 4.996040009854152e-09,
"loss": 11.5843,
"step": 26240
},
{
"epoch": 9.5,
"learning_rate": 4.855770929556385e-09,
"loss": 11.4974,
"step": 26260
},
{
"epoch": 9.5,
"learning_rate": 4.7174797790574264e-09,
"loss": 11.5472,
"step": 26280
},
{
"epoch": 9.51,
"learning_rate": 4.5811676741501496e-09,
"loss": 11.6288,
"step": 26300
},
{
"epoch": 9.52,
"learning_rate": 4.446835714659647e-09,
"loss": 11.5523,
"step": 26320
},
{
"epoch": 9.53,
"learning_rate": 4.314484984434319e-09,
"loss": 11.5398,
"step": 26340
},
{
"epoch": 9.53,
"learning_rate": 4.184116551337241e-09,
"loss": 11.5505,
"step": 26360
},
{
"epoch": 9.54,
"learning_rate": 4.055731467237283e-09,
"loss": 11.5857,
"step": 26380
},
{
"epoch": 9.55,
"learning_rate": 3.929330768000949e-09,
"loss": 11.5889,
"step": 26400
},
{
"epoch": 9.56,
"learning_rate": 3.8049154734838275e-09,
"loss": 11.563,
"step": 26420
},
{
"epoch": 9.56,
"learning_rate": 3.6824865875224043e-09,
"loss": 11.5881,
"step": 26440
},
{
"epoch": 9.57,
"learning_rate": 3.562045097925903e-09,
"loss": 11.614,
"step": 26460
},
{
"epoch": 9.58,
"learning_rate": 3.4435919764684572e-09,
"loss": 11.6156,
"step": 26480
},
{
"epoch": 9.58,
"learning_rate": 3.3271281788811444e-09,
"loss": 11.4977,
"step": 26500
},
{
"epoch": 9.59,
"learning_rate": 3.2126546448442704e-09,
"loss": 11.5597,
"step": 26520
},
{
"epoch": 9.6,
"learning_rate": 3.1001722979799306e-09,
"loss": 11.5895,
"step": 26540
},
{
"epoch": 9.61,
"learning_rate": 2.989682045844405e-09,
"loss": 11.5509,
"step": 26560
},
{
"epoch": 9.61,
"learning_rate": 2.8811847799208868e-09,
"loss": 11.4969,
"step": 26580
},
{
"epoch": 9.62,
"learning_rate": 2.774681375612292e-09,
"loss": 11.6286,
"step": 26600
},
{
"epoch": 9.63,
"learning_rate": 2.6701726922342126e-09,
"loss": 11.5598,
"step": 26620
},
{
"epoch": 9.63,
"learning_rate": 2.5676595730079742e-09,
"loss": 11.5438,
"step": 26640
},
{
"epoch": 9.64,
"learning_rate": 2.4671428450537824e-09,
"loss": 11.6316,
"step": 26660
},
{
"epoch": 9.65,
"learning_rate": 2.3686233193841722e-09,
"loss": 11.5677,
"step": 26680
},
{
"epoch": 9.66,
"learning_rate": 2.272101790897346e-09,
"loss": 11.5419,
"step": 26700
},
{
"epoch": 9.66,
"learning_rate": 2.177579038370736e-09,
"loss": 11.6155,
"step": 26720
},
{
"epoch": 9.67,
"learning_rate": 2.085055824454868e-09,
"loss": 11.5047,
"step": 26740
},
{
"epoch": 9.68,
"learning_rate": 1.994532895667117e-09,
"loss": 11.6007,
"step": 26760
},
{
"epoch": 9.69,
"learning_rate": 1.9060109823856583e-09,
"loss": 11.4755,
"step": 26780
},
{
"epoch": 9.69,
"learning_rate": 1.8194907988436093e-09,
"loss": 11.5557,
"step": 26800
},
{
"epoch": 9.7,
"learning_rate": 1.7349730431233111e-09,
"loss": 11.5062,
"step": 26820
},
{
"epoch": 9.71,
"learning_rate": 1.6524583971505857e-09,
"loss": 11.6292,
"step": 26840
},
{
"epoch": 9.71,
"learning_rate": 1.5719475266893489e-09,
"loss": 11.4641,
"step": 26860
},
{
"epoch": 9.72,
"learning_rate": 1.493441081336172e-09,
"loss": 11.5438,
"step": 26880
},
{
"epoch": 9.73,
"learning_rate": 1.4169396945150346e-09,
"loss": 11.5255,
"step": 26900
},
{
"epoch": 9.74,
"learning_rate": 1.3424439834722746e-09,
"loss": 11.5248,
"step": 26920
},
{
"epoch": 9.74,
"learning_rate": 1.269954549271507e-09,
"loss": 11.5856,
"step": 26940
},
{
"epoch": 9.75,
"learning_rate": 1.199471976788935e-09,
"loss": 11.5466,
"step": 26960
},
{
"epoch": 9.76,
"learning_rate": 1.1309968347084364e-09,
"loss": 11.534,
"step": 26980
},
{
"epoch": 9.76,
"learning_rate": 1.0645296755171229e-09,
"loss": 11.5456,
"step": 27000
},
{
"epoch": 9.77,
"learning_rate": 1.0000710355008157e-09,
"loss": 11.5889,
"step": 27020
},
{
"epoch": 9.78,
"learning_rate": 9.376214347397437e-10,
"loss": 11.5358,
"step": 27040
},
{
"epoch": 9.79,
"learning_rate": 8.771813771042968e-10,
"loss": 11.5742,
"step": 27060
},
{
"epoch": 9.79,
"learning_rate": 8.187513502510846e-10,
"loss": 11.5168,
"step": 27080
},
{
"epoch": 9.8,
"learning_rate": 7.62331825618856e-10,
"loss": 11.6472,
"step": 27100
},
{
"epoch": 9.81,
"learning_rate": 7.079232584247252e-10,
"loss": 11.6027,
"step": 27120
},
{
"epoch": 9.82,
"learning_rate": 6.555260876606183e-10,
"loss": 11.5031,
"step": 27140
},
{
"epoch": 9.82,
"learning_rate": 6.051407360895822e-10,
"loss": 11.6224,
"step": 27160
},
{
"epoch": 9.83,
"learning_rate": 5.567676102424534e-10,
"loss": 11.4651,
"step": 27180
},
{
"epoch": 9.84,
"learning_rate": 5.10407100414556e-10,
"loss": 11.5794,
"step": 27200
},
{
"epoch": 9.84,
"learning_rate": 4.660595806625645e-10,
"loss": 11.6067,
"step": 27220
},
{
"epoch": 9.85,
"learning_rate": 4.2372540880147854e-10,
"loss": 11.5273,
"step": 27240
},
{
"epoch": 9.86,
"learning_rate": 3.834049264017092e-10,
"loss": 11.5379,
"step": 27260
},
{
"epoch": 9.87,
"learning_rate": 3.450984587863859e-10,
"loss": 11.5575,
"step": 27280
},
{
"epoch": 9.87,
"learning_rate": 3.088063150286924e-10,
"loss": 11.4664,
"step": 27300
},
{
"epoch": 9.88,
"learning_rate": 2.74528787949313e-10,
"loss": 11.6222,
"step": 27320
},
{
"epoch": 9.89,
"learning_rate": 2.4226615411424013e-10,
"loss": 11.5381,
"step": 27340
},
{
"epoch": 9.9,
"learning_rate": 2.1201867383233153e-10,
"loss": 11.6545,
"step": 27360
},
{
"epoch": 9.9,
"learning_rate": 1.8378659115333984e-10,
"loss": 11.6301,
"step": 27380
},
{
"epoch": 9.91,
"learning_rate": 1.5757013386599738e-10,
"loss": 11.4795,
"step": 27400
},
{
"epoch": 9.92,
"learning_rate": 1.3336951349599e-10,
"loss": 11.6273,
"step": 27420
},
{
"epoch": 9.92,
"learning_rate": 1.1118492530443058e-10,
"loss": 11.4725,
"step": 27440
},
{
"epoch": 9.93,
"learning_rate": 9.101654828613803e-11,
"loss": 11.556,
"step": 27460
},
{
"epoch": 9.94,
"learning_rate": 7.286454516833296e-11,
"loss": 11.5383,
"step": 27480
},
{
"epoch": 9.95,
"learning_rate": 5.672906240927755e-11,
"loss": 11.6122,
"step": 27500
},
{
"epoch": 9.95,
"learning_rate": 4.261023019697108e-11,
"loss": 11.5628,
"step": 27520
},
{
"epoch": 9.96,
"learning_rate": 3.050816244831722e-11,
"loss": 11.6629,
"step": 27540
},
{
"epoch": 9.97,
"learning_rate": 2.0422956808013868e-11,
"loss": 11.4987,
"step": 27560
},
{
"epoch": 9.97,
"learning_rate": 1.235469464785921e-11,
"loss": 11.5243,
"step": 27580
},
{
"epoch": 9.98,
"learning_rate": 6.3034410661133574e-12,
"loss": 11.5907,
"step": 27600
},
{
"epoch": 9.99,
"learning_rate": 2.2692448868877334e-12,
"loss": 11.56,
"step": 27620
},
{
"epoch": 10.0,
"learning_rate": 2.521386598119957e-13,
"loss": 11.5704,
"step": 27640
},
{
"epoch": 10.0,
"step": 27650,
"total_flos": 8.165662585242993e+17,
"train_loss": 11.932003958565849,
"train_runtime": 24504.6855,
"train_samples_per_second": 2.257,
"train_steps_per_second": 1.128
}
],
"logging_steps": 20,
"max_steps": 27650,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 100,
"total_flos": 8.165662585242993e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}