t5-small-e2e-qa / trainer_state.json
longcld's picture
loss 3.0
14fe5bb
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.690504807692308,
"global_step": 64500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 9.984975961538462e-05,
"loss": 9.5457,
"step": 100
},
{
"epoch": 0.03,
"learning_rate": 9.969951923076923e-05,
"loss": 6.3223,
"step": 200
},
{
"epoch": 0.05,
"learning_rate": 9.954927884615385e-05,
"loss": 5.9118,
"step": 300
},
{
"epoch": 0.06,
"learning_rate": 9.939903846153847e-05,
"loss": 5.6779,
"step": 400
},
{
"epoch": 0.08,
"learning_rate": 9.924879807692308e-05,
"loss": 5.4467,
"step": 500
},
{
"epoch": 0.09,
"learning_rate": 9.909855769230769e-05,
"loss": 5.3733,
"step": 600
},
{
"epoch": 0.11,
"learning_rate": 9.894831730769232e-05,
"loss": 5.3544,
"step": 700
},
{
"epoch": 0.12,
"learning_rate": 9.879807692307693e-05,
"loss": 5.2784,
"step": 800
},
{
"epoch": 0.14,
"learning_rate": 9.864783653846154e-05,
"loss": 5.1503,
"step": 900
},
{
"epoch": 0.15,
"learning_rate": 9.849759615384617e-05,
"loss": 5.0107,
"step": 1000
},
{
"epoch": 0.17,
"learning_rate": 9.834735576923078e-05,
"loss": 5.1408,
"step": 1100
},
{
"epoch": 0.18,
"learning_rate": 9.819711538461539e-05,
"loss": 4.9742,
"step": 1200
},
{
"epoch": 0.2,
"learning_rate": 9.8046875e-05,
"loss": 4.8672,
"step": 1300
},
{
"epoch": 0.21,
"learning_rate": 9.789663461538462e-05,
"loss": 4.858,
"step": 1400
},
{
"epoch": 0.23,
"learning_rate": 9.774639423076923e-05,
"loss": 4.8912,
"step": 1500
},
{
"epoch": 0.24,
"learning_rate": 9.759615384615385e-05,
"loss": 4.8654,
"step": 1600
},
{
"epoch": 0.26,
"learning_rate": 9.744591346153847e-05,
"loss": 4.8192,
"step": 1700
},
{
"epoch": 0.27,
"learning_rate": 9.729567307692308e-05,
"loss": 4.7405,
"step": 1800
},
{
"epoch": 0.29,
"learning_rate": 9.714543269230769e-05,
"loss": 4.69,
"step": 1900
},
{
"epoch": 0.3,
"learning_rate": 9.699519230769232e-05,
"loss": 4.5945,
"step": 2000
},
{
"epoch": 0.32,
"learning_rate": 9.684495192307693e-05,
"loss": 4.621,
"step": 2100
},
{
"epoch": 0.33,
"learning_rate": 9.669471153846155e-05,
"loss": 4.68,
"step": 2200
},
{
"epoch": 0.35,
"learning_rate": 9.654447115384616e-05,
"loss": 4.5514,
"step": 2300
},
{
"epoch": 0.36,
"learning_rate": 9.639423076923078e-05,
"loss": 4.6192,
"step": 2400
},
{
"epoch": 0.38,
"learning_rate": 9.624399038461539e-05,
"loss": 4.4672,
"step": 2500
},
{
"epoch": 0.39,
"learning_rate": 9.609375e-05,
"loss": 4.5006,
"step": 2600
},
{
"epoch": 0.41,
"learning_rate": 9.594350961538461e-05,
"loss": 4.5668,
"step": 2700
},
{
"epoch": 0.42,
"learning_rate": 9.579326923076924e-05,
"loss": 4.5345,
"step": 2800
},
{
"epoch": 0.44,
"learning_rate": 9.564302884615385e-05,
"loss": 4.4806,
"step": 2900
},
{
"epoch": 0.45,
"learning_rate": 9.549278846153846e-05,
"loss": 4.3853,
"step": 3000
},
{
"epoch": 0.47,
"learning_rate": 9.534254807692308e-05,
"loss": 4.4482,
"step": 3100
},
{
"epoch": 0.48,
"learning_rate": 9.519230769230769e-05,
"loss": 4.4437,
"step": 3200
},
{
"epoch": 0.5,
"learning_rate": 9.504206730769231e-05,
"loss": 4.3163,
"step": 3300
},
{
"epoch": 0.51,
"learning_rate": 9.489182692307693e-05,
"loss": 4.3702,
"step": 3400
},
{
"epoch": 0.53,
"learning_rate": 9.474158653846155e-05,
"loss": 4.3657,
"step": 3500
},
{
"epoch": 0.54,
"learning_rate": 9.459134615384616e-05,
"loss": 4.4059,
"step": 3600
},
{
"epoch": 0.56,
"learning_rate": 9.444110576923078e-05,
"loss": 4.3584,
"step": 3700
},
{
"epoch": 0.57,
"learning_rate": 9.429086538461539e-05,
"loss": 4.2906,
"step": 3800
},
{
"epoch": 0.59,
"learning_rate": 9.4140625e-05,
"loss": 4.2862,
"step": 3900
},
{
"epoch": 0.6,
"learning_rate": 9.399038461538463e-05,
"loss": 4.3426,
"step": 4000
},
{
"epoch": 0.62,
"learning_rate": 9.384014423076924e-05,
"loss": 4.2767,
"step": 4100
},
{
"epoch": 0.63,
"learning_rate": 9.368990384615385e-05,
"loss": 4.3542,
"step": 4200
},
{
"epoch": 0.65,
"learning_rate": 9.353966346153846e-05,
"loss": 4.3341,
"step": 4300
},
{
"epoch": 0.66,
"learning_rate": 9.338942307692307e-05,
"loss": 4.2464,
"step": 4400
},
{
"epoch": 0.68,
"learning_rate": 9.323918269230769e-05,
"loss": 4.3169,
"step": 4500
},
{
"epoch": 0.69,
"learning_rate": 9.308894230769231e-05,
"loss": 4.1644,
"step": 4600
},
{
"epoch": 0.71,
"learning_rate": 9.293870192307694e-05,
"loss": 4.3213,
"step": 4700
},
{
"epoch": 0.72,
"learning_rate": 9.278846153846155e-05,
"loss": 4.2788,
"step": 4800
},
{
"epoch": 0.74,
"learning_rate": 9.263822115384616e-05,
"loss": 4.2807,
"step": 4900
},
{
"epoch": 0.75,
"learning_rate": 9.248798076923077e-05,
"loss": 4.1281,
"step": 5000
},
{
"epoch": 0.77,
"learning_rate": 9.233774038461539e-05,
"loss": 4.2481,
"step": 5100
},
{
"epoch": 0.78,
"learning_rate": 9.21875e-05,
"loss": 4.163,
"step": 5200
},
{
"epoch": 0.8,
"learning_rate": 9.203725961538462e-05,
"loss": 4.2567,
"step": 5300
},
{
"epoch": 0.81,
"learning_rate": 9.188701923076924e-05,
"loss": 4.2224,
"step": 5400
},
{
"epoch": 0.83,
"learning_rate": 9.173677884615385e-05,
"loss": 4.132,
"step": 5500
},
{
"epoch": 0.84,
"learning_rate": 9.158653846153846e-05,
"loss": 4.1601,
"step": 5600
},
{
"epoch": 0.86,
"learning_rate": 9.143629807692307e-05,
"loss": 4.0697,
"step": 5700
},
{
"epoch": 0.87,
"learning_rate": 9.12860576923077e-05,
"loss": 4.1043,
"step": 5800
},
{
"epoch": 0.89,
"learning_rate": 9.113581730769232e-05,
"loss": 4.1087,
"step": 5900
},
{
"epoch": 0.9,
"learning_rate": 9.098557692307694e-05,
"loss": 3.9814,
"step": 6000
},
{
"epoch": 0.92,
"learning_rate": 9.083533653846155e-05,
"loss": 4.0993,
"step": 6100
},
{
"epoch": 0.93,
"learning_rate": 9.068509615384616e-05,
"loss": 4.1064,
"step": 6200
},
{
"epoch": 0.95,
"learning_rate": 9.053485576923077e-05,
"loss": 4.065,
"step": 6300
},
{
"epoch": 0.96,
"learning_rate": 9.038461538461538e-05,
"loss": 4.0022,
"step": 6400
},
{
"epoch": 0.98,
"learning_rate": 9.023437500000001e-05,
"loss": 4.0968,
"step": 6500
},
{
"epoch": 0.99,
"learning_rate": 9.008413461538462e-05,
"loss": 4.1214,
"step": 6600
},
{
"epoch": 1.01,
"learning_rate": 8.993389423076923e-05,
"loss": 4.0841,
"step": 6700
},
{
"epoch": 1.02,
"learning_rate": 8.978365384615385e-05,
"loss": 4.0666,
"step": 6800
},
{
"epoch": 1.04,
"learning_rate": 8.963341346153846e-05,
"loss": 3.9632,
"step": 6900
},
{
"epoch": 1.05,
"learning_rate": 8.948317307692307e-05,
"loss": 3.9496,
"step": 7000
},
{
"epoch": 1.07,
"learning_rate": 8.93329326923077e-05,
"loss": 3.9757,
"step": 7100
},
{
"epoch": 1.08,
"learning_rate": 8.918269230769232e-05,
"loss": 3.9959,
"step": 7200
},
{
"epoch": 1.1,
"learning_rate": 8.903245192307693e-05,
"loss": 3.9594,
"step": 7300
},
{
"epoch": 1.11,
"learning_rate": 8.888221153846155e-05,
"loss": 3.9735,
"step": 7400
},
{
"epoch": 1.13,
"learning_rate": 8.873197115384616e-05,
"loss": 3.9647,
"step": 7500
},
{
"epoch": 1.14,
"learning_rate": 8.858173076923077e-05,
"loss": 3.8759,
"step": 7600
},
{
"epoch": 1.16,
"learning_rate": 8.84314903846154e-05,
"loss": 3.9956,
"step": 7700
},
{
"epoch": 1.17,
"learning_rate": 8.828125000000001e-05,
"loss": 3.9526,
"step": 7800
},
{
"epoch": 1.19,
"learning_rate": 8.813100961538462e-05,
"loss": 3.9409,
"step": 7900
},
{
"epoch": 1.2,
"learning_rate": 8.798076923076923e-05,
"loss": 3.9545,
"step": 8000
},
{
"epoch": 1.22,
"learning_rate": 8.783052884615384e-05,
"loss": 3.8668,
"step": 8100
},
{
"epoch": 1.23,
"learning_rate": 8.768028846153846e-05,
"loss": 3.8675,
"step": 8200
},
{
"epoch": 1.25,
"learning_rate": 8.753004807692308e-05,
"loss": 3.9234,
"step": 8300
},
{
"epoch": 1.26,
"learning_rate": 8.73798076923077e-05,
"loss": 3.9181,
"step": 8400
},
{
"epoch": 1.28,
"learning_rate": 8.722956730769232e-05,
"loss": 3.9288,
"step": 8500
},
{
"epoch": 1.29,
"learning_rate": 8.707932692307693e-05,
"loss": 3.9103,
"step": 8600
},
{
"epoch": 1.31,
"learning_rate": 8.692908653846154e-05,
"loss": 3.9452,
"step": 8700
},
{
"epoch": 1.32,
"learning_rate": 8.677884615384616e-05,
"loss": 3.8211,
"step": 8800
},
{
"epoch": 1.34,
"learning_rate": 8.662860576923077e-05,
"loss": 3.7997,
"step": 8900
},
{
"epoch": 1.35,
"learning_rate": 8.64783653846154e-05,
"loss": 3.8444,
"step": 9000
},
{
"epoch": 1.37,
"learning_rate": 8.6328125e-05,
"loss": 3.8875,
"step": 9100
},
{
"epoch": 1.38,
"learning_rate": 8.617788461538462e-05,
"loss": 3.8792,
"step": 9200
},
{
"epoch": 1.4,
"learning_rate": 8.602764423076923e-05,
"loss": 3.8994,
"step": 9300
},
{
"epoch": 1.41,
"learning_rate": 8.587740384615384e-05,
"loss": 3.8862,
"step": 9400
},
{
"epoch": 1.43,
"learning_rate": 8.572716346153847e-05,
"loss": 3.824,
"step": 9500
},
{
"epoch": 1.44,
"learning_rate": 8.557692307692308e-05,
"loss": 3.7989,
"step": 9600
},
{
"epoch": 1.46,
"learning_rate": 8.542668269230769e-05,
"loss": 3.8998,
"step": 9700
},
{
"epoch": 1.47,
"learning_rate": 8.527644230769232e-05,
"loss": 3.8514,
"step": 9800
},
{
"epoch": 1.49,
"learning_rate": 8.512620192307693e-05,
"loss": 3.8324,
"step": 9900
},
{
"epoch": 1.5,
"learning_rate": 8.497596153846154e-05,
"loss": 3.8852,
"step": 10000
},
{
"epoch": 1.52,
"learning_rate": 8.482572115384615e-05,
"loss": 3.7941,
"step": 10100
},
{
"epoch": 1.53,
"learning_rate": 8.467548076923078e-05,
"loss": 3.9133,
"step": 10200
},
{
"epoch": 1.55,
"learning_rate": 8.452524038461539e-05,
"loss": 3.7357,
"step": 10300
},
{
"epoch": 1.56,
"learning_rate": 8.4375e-05,
"loss": 3.7986,
"step": 10400
},
{
"epoch": 1.58,
"learning_rate": 8.422475961538462e-05,
"loss": 3.8262,
"step": 10500
},
{
"epoch": 1.59,
"learning_rate": 8.407451923076923e-05,
"loss": 3.888,
"step": 10600
},
{
"epoch": 1.61,
"learning_rate": 8.392427884615384e-05,
"loss": 3.7276,
"step": 10700
},
{
"epoch": 1.62,
"learning_rate": 8.377403846153847e-05,
"loss": 3.8952,
"step": 10800
},
{
"epoch": 1.64,
"learning_rate": 8.362379807692308e-05,
"loss": 3.89,
"step": 10900
},
{
"epoch": 1.65,
"learning_rate": 8.347355769230769e-05,
"loss": 3.8234,
"step": 11000
},
{
"epoch": 1.67,
"learning_rate": 8.332331730769232e-05,
"loss": 3.7485,
"step": 11100
},
{
"epoch": 1.68,
"learning_rate": 8.317307692307693e-05,
"loss": 3.7877,
"step": 11200
},
{
"epoch": 1.7,
"learning_rate": 8.302283653846154e-05,
"loss": 3.7776,
"step": 11300
},
{
"epoch": 1.71,
"learning_rate": 8.287259615384617e-05,
"loss": 3.7945,
"step": 11400
},
{
"epoch": 1.73,
"learning_rate": 8.272235576923078e-05,
"loss": 3.8574,
"step": 11500
},
{
"epoch": 1.74,
"learning_rate": 8.257211538461539e-05,
"loss": 3.7562,
"step": 11600
},
{
"epoch": 1.76,
"learning_rate": 8.2421875e-05,
"loss": 3.7713,
"step": 11700
},
{
"epoch": 1.77,
"learning_rate": 8.227163461538461e-05,
"loss": 3.7257,
"step": 11800
},
{
"epoch": 1.79,
"learning_rate": 8.212139423076923e-05,
"loss": 3.7542,
"step": 11900
},
{
"epoch": 1.8,
"learning_rate": 8.197115384615385e-05,
"loss": 3.6902,
"step": 12000
},
{
"epoch": 1.82,
"learning_rate": 8.182091346153846e-05,
"loss": 3.7354,
"step": 12100
},
{
"epoch": 1.83,
"learning_rate": 8.167067307692308e-05,
"loss": 3.745,
"step": 12200
},
{
"epoch": 1.85,
"learning_rate": 8.152043269230769e-05,
"loss": 3.7681,
"step": 12300
},
{
"epoch": 1.86,
"learning_rate": 8.137019230769231e-05,
"loss": 3.8431,
"step": 12400
},
{
"epoch": 1.88,
"learning_rate": 8.121995192307693e-05,
"loss": 3.7285,
"step": 12500
},
{
"epoch": 1.89,
"learning_rate": 8.106971153846155e-05,
"loss": 3.6801,
"step": 12600
},
{
"epoch": 1.91,
"learning_rate": 8.091947115384616e-05,
"loss": 3.6456,
"step": 12700
},
{
"epoch": 1.92,
"learning_rate": 8.076923076923078e-05,
"loss": 3.764,
"step": 12800
},
{
"epoch": 1.94,
"learning_rate": 8.061899038461539e-05,
"loss": 3.7537,
"step": 12900
},
{
"epoch": 1.95,
"learning_rate": 8.046875e-05,
"loss": 3.5876,
"step": 13000
},
{
"epoch": 1.97,
"learning_rate": 8.031850961538461e-05,
"loss": 3.8075,
"step": 13100
},
{
"epoch": 1.98,
"learning_rate": 8.016826923076924e-05,
"loss": 3.6953,
"step": 13200
},
{
"epoch": 2.0,
"learning_rate": 8.001802884615385e-05,
"loss": 3.6577,
"step": 13300
},
{
"epoch": 2.01,
"learning_rate": 7.986778846153846e-05,
"loss": 3.7064,
"step": 13400
},
{
"epoch": 2.03,
"learning_rate": 7.971754807692308e-05,
"loss": 3.6817,
"step": 13500
},
{
"epoch": 2.04,
"learning_rate": 7.956730769230769e-05,
"loss": 3.6729,
"step": 13600
},
{
"epoch": 2.06,
"learning_rate": 7.941706730769231e-05,
"loss": 3.6737,
"step": 13700
},
{
"epoch": 2.07,
"learning_rate": 7.926682692307693e-05,
"loss": 3.6222,
"step": 13800
},
{
"epoch": 2.09,
"learning_rate": 7.911658653846155e-05,
"loss": 3.6704,
"step": 13900
},
{
"epoch": 2.1,
"learning_rate": 7.896634615384616e-05,
"loss": 3.7445,
"step": 14000
},
{
"epoch": 2.12,
"learning_rate": 7.881610576923078e-05,
"loss": 3.6227,
"step": 14100
},
{
"epoch": 2.13,
"learning_rate": 7.866586538461539e-05,
"loss": 3.6822,
"step": 14200
},
{
"epoch": 2.15,
"learning_rate": 7.8515625e-05,
"loss": 3.6178,
"step": 14300
},
{
"epoch": 2.16,
"learning_rate": 7.836538461538462e-05,
"loss": 3.6329,
"step": 14400
},
{
"epoch": 2.18,
"learning_rate": 7.821514423076924e-05,
"loss": 3.627,
"step": 14500
},
{
"epoch": 2.19,
"learning_rate": 7.806490384615385e-05,
"loss": 3.7061,
"step": 14600
},
{
"epoch": 2.21,
"learning_rate": 7.791466346153846e-05,
"loss": 3.7037,
"step": 14700
},
{
"epoch": 2.22,
"learning_rate": 7.776442307692307e-05,
"loss": 3.5797,
"step": 14800
},
{
"epoch": 2.24,
"learning_rate": 7.761418269230769e-05,
"loss": 3.6865,
"step": 14900
},
{
"epoch": 2.25,
"learning_rate": 7.746394230769231e-05,
"loss": 3.7016,
"step": 15000
},
{
"epoch": 2.27,
"learning_rate": 7.731370192307694e-05,
"loss": 3.6149,
"step": 15100
},
{
"epoch": 2.28,
"learning_rate": 7.716346153846155e-05,
"loss": 3.614,
"step": 15200
},
{
"epoch": 2.3,
"learning_rate": 7.701322115384616e-05,
"loss": 3.5511,
"step": 15300
},
{
"epoch": 2.31,
"learning_rate": 7.686298076923077e-05,
"loss": 3.5887,
"step": 15400
},
{
"epoch": 2.33,
"learning_rate": 7.671274038461539e-05,
"loss": 3.6247,
"step": 15500
},
{
"epoch": 2.34,
"learning_rate": 7.65625e-05,
"loss": 3.5886,
"step": 15600
},
{
"epoch": 2.36,
"learning_rate": 7.641225961538462e-05,
"loss": 3.4512,
"step": 15700
},
{
"epoch": 2.37,
"learning_rate": 7.626201923076924e-05,
"loss": 3.4764,
"step": 15800
},
{
"epoch": 2.39,
"learning_rate": 7.611177884615385e-05,
"loss": 3.6115,
"step": 15900
},
{
"epoch": 2.4,
"learning_rate": 7.596153846153846e-05,
"loss": 3.5766,
"step": 16000
},
{
"epoch": 2.42,
"learning_rate": 7.581129807692307e-05,
"loss": 3.6717,
"step": 16100
},
{
"epoch": 2.43,
"learning_rate": 7.56610576923077e-05,
"loss": 3.4975,
"step": 16200
},
{
"epoch": 2.45,
"learning_rate": 7.551081730769232e-05,
"loss": 3.572,
"step": 16300
},
{
"epoch": 2.46,
"learning_rate": 7.536057692307694e-05,
"loss": 3.5285,
"step": 16400
},
{
"epoch": 2.48,
"learning_rate": 7.521033653846155e-05,
"loss": 3.634,
"step": 16500
},
{
"epoch": 2.49,
"learning_rate": 7.506009615384616e-05,
"loss": 3.5654,
"step": 16600
},
{
"epoch": 2.51,
"learning_rate": 7.490985576923077e-05,
"loss": 3.5617,
"step": 16700
},
{
"epoch": 2.52,
"learning_rate": 7.475961538461538e-05,
"loss": 3.6074,
"step": 16800
},
{
"epoch": 2.54,
"learning_rate": 7.460937500000001e-05,
"loss": 3.5804,
"step": 16900
},
{
"epoch": 2.55,
"learning_rate": 7.445913461538462e-05,
"loss": 3.5686,
"step": 17000
},
{
"epoch": 2.57,
"learning_rate": 7.430889423076923e-05,
"loss": 3.5895,
"step": 17100
},
{
"epoch": 2.58,
"learning_rate": 7.415865384615385e-05,
"loss": 3.5292,
"step": 17200
},
{
"epoch": 2.6,
"learning_rate": 7.400841346153846e-05,
"loss": 3.553,
"step": 17300
},
{
"epoch": 2.61,
"learning_rate": 7.385817307692307e-05,
"loss": 3.6004,
"step": 17400
},
{
"epoch": 2.63,
"learning_rate": 7.37079326923077e-05,
"loss": 3.5905,
"step": 17500
},
{
"epoch": 2.64,
"learning_rate": 7.355769230769232e-05,
"loss": 3.4882,
"step": 17600
},
{
"epoch": 2.66,
"learning_rate": 7.340745192307693e-05,
"loss": 3.7265,
"step": 17700
},
{
"epoch": 2.67,
"learning_rate": 7.325721153846155e-05,
"loss": 3.4666,
"step": 17800
},
{
"epoch": 2.69,
"learning_rate": 7.310697115384616e-05,
"loss": 3.5222,
"step": 17900
},
{
"epoch": 2.7,
"learning_rate": 7.295673076923077e-05,
"loss": 3.5472,
"step": 18000
},
{
"epoch": 2.72,
"learning_rate": 7.28064903846154e-05,
"loss": 3.4465,
"step": 18100
},
{
"epoch": 2.73,
"learning_rate": 7.265625000000001e-05,
"loss": 3.4855,
"step": 18200
},
{
"epoch": 2.75,
"learning_rate": 7.250600961538462e-05,
"loss": 3.4352,
"step": 18300
},
{
"epoch": 2.76,
"learning_rate": 7.235576923076923e-05,
"loss": 3.5702,
"step": 18400
},
{
"epoch": 2.78,
"learning_rate": 7.220552884615384e-05,
"loss": 3.5483,
"step": 18500
},
{
"epoch": 2.79,
"learning_rate": 7.205528846153846e-05,
"loss": 3.4681,
"step": 18600
},
{
"epoch": 2.81,
"learning_rate": 7.190504807692308e-05,
"loss": 3.4907,
"step": 18700
},
{
"epoch": 2.82,
"learning_rate": 7.17548076923077e-05,
"loss": 3.6045,
"step": 18800
},
{
"epoch": 2.84,
"learning_rate": 7.160456730769232e-05,
"loss": 3.4191,
"step": 18900
},
{
"epoch": 2.85,
"learning_rate": 7.145432692307693e-05,
"loss": 3.5984,
"step": 19000
},
{
"epoch": 2.87,
"learning_rate": 7.130408653846154e-05,
"loss": 3.4533,
"step": 19100
},
{
"epoch": 2.88,
"learning_rate": 7.115384615384616e-05,
"loss": 3.4745,
"step": 19200
},
{
"epoch": 2.9,
"learning_rate": 7.100360576923077e-05,
"loss": 3.4572,
"step": 19300
},
{
"epoch": 2.91,
"learning_rate": 7.08533653846154e-05,
"loss": 3.5035,
"step": 19400
},
{
"epoch": 2.93,
"learning_rate": 7.0703125e-05,
"loss": 3.5592,
"step": 19500
},
{
"epoch": 2.94,
"learning_rate": 7.055288461538462e-05,
"loss": 3.4138,
"step": 19600
},
{
"epoch": 2.96,
"learning_rate": 7.040264423076923e-05,
"loss": 3.4603,
"step": 19700
},
{
"epoch": 2.97,
"learning_rate": 7.025240384615384e-05,
"loss": 3.5511,
"step": 19800
},
{
"epoch": 2.99,
"learning_rate": 7.010216346153847e-05,
"loss": 3.4787,
"step": 19900
},
{
"epoch": 3.0,
"learning_rate": 6.995192307692308e-05,
"loss": 3.5999,
"step": 20000
},
{
"epoch": 3.02,
"learning_rate": 6.980168269230769e-05,
"loss": 3.4902,
"step": 20100
},
{
"epoch": 3.03,
"learning_rate": 6.965144230769232e-05,
"loss": 3.5083,
"step": 20200
},
{
"epoch": 3.05,
"learning_rate": 6.950120192307693e-05,
"loss": 3.5033,
"step": 20300
},
{
"epoch": 3.06,
"learning_rate": 6.935096153846154e-05,
"loss": 3.5604,
"step": 20400
},
{
"epoch": 3.08,
"learning_rate": 6.920072115384615e-05,
"loss": 3.5009,
"step": 20500
},
{
"epoch": 3.09,
"learning_rate": 6.905048076923078e-05,
"loss": 3.5076,
"step": 20600
},
{
"epoch": 3.11,
"learning_rate": 6.890024038461539e-05,
"loss": 3.4699,
"step": 20700
},
{
"epoch": 3.12,
"learning_rate": 6.875e-05,
"loss": 3.4902,
"step": 20800
},
{
"epoch": 3.14,
"learning_rate": 6.859975961538462e-05,
"loss": 3.4528,
"step": 20900
},
{
"epoch": 3.16,
"learning_rate": 6.844951923076923e-05,
"loss": 3.5253,
"step": 21000
},
{
"epoch": 3.17,
"learning_rate": 6.829927884615384e-05,
"loss": 3.48,
"step": 21100
},
{
"epoch": 3.19,
"learning_rate": 6.814903846153847e-05,
"loss": 3.3898,
"step": 21200
},
{
"epoch": 3.2,
"learning_rate": 6.799879807692308e-05,
"loss": 3.4181,
"step": 21300
},
{
"epoch": 3.22,
"learning_rate": 6.784855769230769e-05,
"loss": 3.4118,
"step": 21400
},
{
"epoch": 3.23,
"learning_rate": 6.769831730769232e-05,
"loss": 3.442,
"step": 21500
},
{
"epoch": 3.25,
"learning_rate": 6.754807692307693e-05,
"loss": 3.4115,
"step": 21600
},
{
"epoch": 3.26,
"learning_rate": 6.739783653846154e-05,
"loss": 3.4211,
"step": 21700
},
{
"epoch": 3.28,
"learning_rate": 6.724759615384617e-05,
"loss": 3.4422,
"step": 21800
},
{
"epoch": 3.29,
"learning_rate": 6.709735576923078e-05,
"loss": 3.5675,
"step": 21900
},
{
"epoch": 3.31,
"learning_rate": 6.694711538461539e-05,
"loss": 3.3725,
"step": 22000
},
{
"epoch": 3.32,
"learning_rate": 6.6796875e-05,
"loss": 3.4709,
"step": 22100
},
{
"epoch": 3.34,
"learning_rate": 6.664663461538461e-05,
"loss": 3.4247,
"step": 22200
},
{
"epoch": 3.35,
"learning_rate": 6.649639423076923e-05,
"loss": 3.411,
"step": 22300
},
{
"epoch": 3.37,
"learning_rate": 6.634615384615385e-05,
"loss": 3.3555,
"step": 22400
},
{
"epoch": 3.38,
"learning_rate": 6.619591346153846e-05,
"loss": 3.3727,
"step": 22500
},
{
"epoch": 3.4,
"learning_rate": 6.604567307692308e-05,
"loss": 3.4175,
"step": 22600
},
{
"epoch": 3.41,
"learning_rate": 6.589543269230769e-05,
"loss": 3.4083,
"step": 22700
},
{
"epoch": 3.43,
"learning_rate": 6.574519230769231e-05,
"loss": 3.4315,
"step": 22800
},
{
"epoch": 3.44,
"learning_rate": 6.559495192307693e-05,
"loss": 3.4095,
"step": 22900
},
{
"epoch": 3.46,
"learning_rate": 6.544471153846154e-05,
"loss": 3.4292,
"step": 23000
},
{
"epoch": 3.47,
"learning_rate": 6.529447115384616e-05,
"loss": 3.4097,
"step": 23100
},
{
"epoch": 3.49,
"learning_rate": 6.514423076923078e-05,
"loss": 3.5273,
"step": 23200
},
{
"epoch": 3.5,
"learning_rate": 6.499399038461539e-05,
"loss": 3.4549,
"step": 23300
},
{
"epoch": 3.52,
"learning_rate": 6.484375e-05,
"loss": 3.3474,
"step": 23400
},
{
"epoch": 3.53,
"learning_rate": 6.469350961538461e-05,
"loss": 3.3646,
"step": 23500
},
{
"epoch": 3.55,
"learning_rate": 6.454326923076924e-05,
"loss": 3.4457,
"step": 23600
},
{
"epoch": 3.56,
"learning_rate": 6.439302884615385e-05,
"loss": 3.4549,
"step": 23700
},
{
"epoch": 3.58,
"learning_rate": 6.424278846153846e-05,
"loss": 3.4032,
"step": 23800
},
{
"epoch": 3.59,
"learning_rate": 6.409254807692307e-05,
"loss": 3.3047,
"step": 23900
},
{
"epoch": 3.61,
"learning_rate": 6.394230769230769e-05,
"loss": 3.4121,
"step": 24000
},
{
"epoch": 3.62,
"learning_rate": 6.379206730769231e-05,
"loss": 3.4355,
"step": 24100
},
{
"epoch": 3.64,
"learning_rate": 6.364182692307692e-05,
"loss": 3.4153,
"step": 24200
},
{
"epoch": 3.65,
"learning_rate": 6.349158653846155e-05,
"loss": 3.3583,
"step": 24300
},
{
"epoch": 3.67,
"learning_rate": 6.334134615384616e-05,
"loss": 3.3435,
"step": 24400
},
{
"epoch": 3.68,
"learning_rate": 6.319110576923077e-05,
"loss": 3.3166,
"step": 24500
},
{
"epoch": 3.7,
"learning_rate": 6.304086538461539e-05,
"loss": 3.3751,
"step": 24600
},
{
"epoch": 3.71,
"learning_rate": 6.2890625e-05,
"loss": 3.4042,
"step": 24700
},
{
"epoch": 3.73,
"learning_rate": 6.274038461538461e-05,
"loss": 3.3774,
"step": 24800
},
{
"epoch": 3.74,
"learning_rate": 6.259014423076924e-05,
"loss": 3.4476,
"step": 24900
},
{
"epoch": 3.76,
"learning_rate": 6.243990384615385e-05,
"loss": 3.3689,
"step": 25000
},
{
"epoch": 3.77,
"learning_rate": 6.228966346153846e-05,
"loss": 3.4963,
"step": 25100
},
{
"epoch": 3.79,
"learning_rate": 6.213942307692307e-05,
"loss": 3.3515,
"step": 25200
},
{
"epoch": 3.8,
"learning_rate": 6.198918269230769e-05,
"loss": 3.329,
"step": 25300
},
{
"epoch": 3.82,
"learning_rate": 6.183894230769231e-05,
"loss": 3.4401,
"step": 25400
},
{
"epoch": 3.83,
"learning_rate": 6.168870192307694e-05,
"loss": 3.3613,
"step": 25500
},
{
"epoch": 3.85,
"learning_rate": 6.153846153846155e-05,
"loss": 3.3463,
"step": 25600
},
{
"epoch": 3.86,
"learning_rate": 6.138822115384616e-05,
"loss": 3.3742,
"step": 25700
},
{
"epoch": 3.88,
"learning_rate": 6.123798076923077e-05,
"loss": 3.3858,
"step": 25800
},
{
"epoch": 3.89,
"learning_rate": 6.108774038461539e-05,
"loss": 3.4393,
"step": 25900
},
{
"epoch": 3.91,
"learning_rate": 6.0937500000000004e-05,
"loss": 3.4649,
"step": 26000
},
{
"epoch": 3.92,
"learning_rate": 6.0787259615384616e-05,
"loss": 3.411,
"step": 26100
},
{
"epoch": 3.94,
"learning_rate": 6.0637019230769235e-05,
"loss": 3.4315,
"step": 26200
},
{
"epoch": 3.95,
"learning_rate": 6.048677884615385e-05,
"loss": 3.3696,
"step": 26300
},
{
"epoch": 3.97,
"learning_rate": 6.033653846153846e-05,
"loss": 3.4611,
"step": 26400
},
{
"epoch": 3.98,
"learning_rate": 6.018629807692308e-05,
"loss": 3.4147,
"step": 26500
},
{
"epoch": 4.0,
"learning_rate": 6.003605769230769e-05,
"loss": 3.3671,
"step": 26600
},
{
"epoch": 4.01,
"learning_rate": 5.9885817307692316e-05,
"loss": 3.2922,
"step": 26700
},
{
"epoch": 4.03,
"learning_rate": 5.973557692307693e-05,
"loss": 3.4404,
"step": 26800
},
{
"epoch": 4.04,
"learning_rate": 5.958533653846155e-05,
"loss": 3.2936,
"step": 26900
},
{
"epoch": 4.06,
"learning_rate": 5.943509615384616e-05,
"loss": 3.3042,
"step": 27000
},
{
"epoch": 4.07,
"learning_rate": 5.928485576923077e-05,
"loss": 3.3803,
"step": 27100
},
{
"epoch": 4.09,
"learning_rate": 5.913461538461539e-05,
"loss": 3.3067,
"step": 27200
},
{
"epoch": 4.1,
"learning_rate": 5.8984375e-05,
"loss": 3.2929,
"step": 27300
},
{
"epoch": 4.12,
"learning_rate": 5.8834134615384614e-05,
"loss": 3.3419,
"step": 27400
},
{
"epoch": 4.13,
"learning_rate": 5.868389423076923e-05,
"loss": 3.3165,
"step": 27500
},
{
"epoch": 4.15,
"learning_rate": 5.8533653846153845e-05,
"loss": 3.3961,
"step": 27600
},
{
"epoch": 4.16,
"learning_rate": 5.8383413461538464e-05,
"loss": 3.3305,
"step": 27700
},
{
"epoch": 4.18,
"learning_rate": 5.8233173076923076e-05,
"loss": 3.3619,
"step": 27800
},
{
"epoch": 4.19,
"learning_rate": 5.808293269230769e-05,
"loss": 3.3138,
"step": 27900
},
{
"epoch": 4.21,
"learning_rate": 5.7932692307692314e-05,
"loss": 3.3557,
"step": 28000
},
{
"epoch": 4.22,
"learning_rate": 5.778245192307693e-05,
"loss": 3.4086,
"step": 28100
},
{
"epoch": 4.24,
"learning_rate": 5.7632211538461545e-05,
"loss": 3.2462,
"step": 28200
},
{
"epoch": 4.25,
"learning_rate": 5.748197115384616e-05,
"loss": 3.3991,
"step": 28300
},
{
"epoch": 4.27,
"learning_rate": 5.7331730769230776e-05,
"loss": 3.3424,
"step": 28400
},
{
"epoch": 4.28,
"learning_rate": 5.718149038461539e-05,
"loss": 3.345,
"step": 28500
},
{
"epoch": 4.3,
"learning_rate": 5.703125e-05,
"loss": 3.3057,
"step": 28600
},
{
"epoch": 4.31,
"learning_rate": 5.688100961538462e-05,
"loss": 3.339,
"step": 28700
},
{
"epoch": 4.33,
"learning_rate": 5.673076923076923e-05,
"loss": 3.3355,
"step": 28800
},
{
"epoch": 4.34,
"learning_rate": 5.6580528846153844e-05,
"loss": 3.2835,
"step": 28900
},
{
"epoch": 4.36,
"learning_rate": 5.643028846153846e-05,
"loss": 3.1553,
"step": 29000
},
{
"epoch": 4.37,
"learning_rate": 5.6280048076923075e-05,
"loss": 3.3456,
"step": 29100
},
{
"epoch": 4.39,
"learning_rate": 5.612980769230769e-05,
"loss": 3.2839,
"step": 29200
},
{
"epoch": 4.4,
"learning_rate": 5.597956730769231e-05,
"loss": 3.3465,
"step": 29300
},
{
"epoch": 4.42,
"learning_rate": 5.582932692307693e-05,
"loss": 3.3,
"step": 29400
},
{
"epoch": 4.43,
"learning_rate": 5.5679086538461544e-05,
"loss": 3.395,
"step": 29500
},
{
"epoch": 4.45,
"learning_rate": 5.552884615384616e-05,
"loss": 3.3677,
"step": 29600
},
{
"epoch": 4.46,
"learning_rate": 5.5378605769230775e-05,
"loss": 3.2638,
"step": 29700
},
{
"epoch": 4.48,
"learning_rate": 5.522836538461539e-05,
"loss": 3.2915,
"step": 29800
},
{
"epoch": 4.49,
"learning_rate": 5.5078125000000006e-05,
"loss": 3.3927,
"step": 29900
},
{
"epoch": 4.51,
"learning_rate": 5.492788461538462e-05,
"loss": 3.2758,
"step": 30000
},
{
"epoch": 4.52,
"learning_rate": 5.477764423076923e-05,
"loss": 3.2846,
"step": 30100
},
{
"epoch": 4.54,
"learning_rate": 5.462740384615385e-05,
"loss": 3.2502,
"step": 30200
},
{
"epoch": 4.55,
"learning_rate": 5.447716346153846e-05,
"loss": 3.345,
"step": 30300
},
{
"epoch": 4.57,
"learning_rate": 5.432692307692307e-05,
"loss": 3.3753,
"step": 30400
},
{
"epoch": 4.58,
"learning_rate": 5.417668269230769e-05,
"loss": 3.2171,
"step": 30500
},
{
"epoch": 4.6,
"learning_rate": 5.402644230769232e-05,
"loss": 3.2815,
"step": 30600
},
{
"epoch": 4.61,
"learning_rate": 5.387620192307693e-05,
"loss": 3.3164,
"step": 30700
},
{
"epoch": 4.63,
"learning_rate": 5.372596153846154e-05,
"loss": 3.3076,
"step": 30800
},
{
"epoch": 4.64,
"learning_rate": 5.357572115384616e-05,
"loss": 3.3597,
"step": 30900
},
{
"epoch": 4.66,
"learning_rate": 5.342548076923077e-05,
"loss": 3.2528,
"step": 31000
},
{
"epoch": 4.67,
"learning_rate": 5.3275240384615385e-05,
"loss": 3.3317,
"step": 31100
},
{
"epoch": 4.69,
"learning_rate": 5.3125000000000004e-05,
"loss": 3.4087,
"step": 31200
},
{
"epoch": 4.7,
"learning_rate": 5.2974759615384616e-05,
"loss": 3.3444,
"step": 31300
},
{
"epoch": 4.72,
"learning_rate": 5.2824519230769235e-05,
"loss": 3.3282,
"step": 31400
},
{
"epoch": 4.73,
"learning_rate": 5.267427884615385e-05,
"loss": 3.2904,
"step": 31500
},
{
"epoch": 4.75,
"learning_rate": 5.252403846153846e-05,
"loss": 3.3006,
"step": 31600
},
{
"epoch": 4.76,
"learning_rate": 5.237379807692308e-05,
"loss": 3.3654,
"step": 31700
},
{
"epoch": 4.78,
"learning_rate": 5.222355769230769e-05,
"loss": 3.2973,
"step": 31800
},
{
"epoch": 4.79,
"learning_rate": 5.2073317307692316e-05,
"loss": 3.3668,
"step": 31900
},
{
"epoch": 4.81,
"learning_rate": 5.192307692307693e-05,
"loss": 3.3186,
"step": 32000
},
{
"epoch": 4.82,
"learning_rate": 5.177283653846155e-05,
"loss": 3.1989,
"step": 32100
},
{
"epoch": 4.84,
"learning_rate": 5.162259615384616e-05,
"loss": 3.3046,
"step": 32200
},
{
"epoch": 4.85,
"learning_rate": 5.147235576923077e-05,
"loss": 3.2191,
"step": 32300
},
{
"epoch": 4.87,
"learning_rate": 5.132211538461539e-05,
"loss": 3.2571,
"step": 32400
},
{
"epoch": 4.88,
"learning_rate": 5.1171875e-05,
"loss": 3.2072,
"step": 32500
},
{
"epoch": 4.9,
"learning_rate": 5.1021634615384614e-05,
"loss": 3.322,
"step": 32600
},
{
"epoch": 4.91,
"learning_rate": 5.087139423076923e-05,
"loss": 3.299,
"step": 32700
},
{
"epoch": 4.93,
"learning_rate": 5.0721153846153845e-05,
"loss": 3.2845,
"step": 32800
},
{
"epoch": 4.94,
"learning_rate": 5.057091346153846e-05,
"loss": 3.2909,
"step": 32900
},
{
"epoch": 4.96,
"learning_rate": 5.0420673076923076e-05,
"loss": 3.3085,
"step": 33000
},
{
"epoch": 4.97,
"learning_rate": 5.027043269230769e-05,
"loss": 3.3532,
"step": 33100
},
{
"epoch": 4.99,
"learning_rate": 5.0120192307692314e-05,
"loss": 3.2598,
"step": 33200
},
{
"epoch": 5.0,
"learning_rate": 4.9969951923076926e-05,
"loss": 3.259,
"step": 33300
},
{
"epoch": 5.02,
"learning_rate": 4.981971153846154e-05,
"loss": 3.194,
"step": 33400
},
{
"epoch": 5.03,
"learning_rate": 4.966947115384616e-05,
"loss": 3.202,
"step": 33500
},
{
"epoch": 5.05,
"learning_rate": 4.9519230769230776e-05,
"loss": 3.1681,
"step": 33600
},
{
"epoch": 5.06,
"learning_rate": 4.936899038461539e-05,
"loss": 3.256,
"step": 33700
},
{
"epoch": 5.08,
"learning_rate": 4.921875e-05,
"loss": 3.2368,
"step": 33800
},
{
"epoch": 5.09,
"learning_rate": 4.906850961538462e-05,
"loss": 3.3036,
"step": 33900
},
{
"epoch": 5.11,
"learning_rate": 4.891826923076923e-05,
"loss": 3.2212,
"step": 34000
},
{
"epoch": 5.12,
"learning_rate": 4.8768028846153843e-05,
"loss": 3.3221,
"step": 34100
},
{
"epoch": 5.14,
"learning_rate": 4.861778846153847e-05,
"loss": 3.2859,
"step": 34200
},
{
"epoch": 5.15,
"learning_rate": 4.846754807692308e-05,
"loss": 3.1449,
"step": 34300
},
{
"epoch": 5.17,
"learning_rate": 4.8317307692307693e-05,
"loss": 3.2694,
"step": 34400
},
{
"epoch": 5.18,
"learning_rate": 4.816706730769231e-05,
"loss": 3.2395,
"step": 34500
},
{
"epoch": 5.2,
"learning_rate": 4.8016826923076924e-05,
"loss": 3.2135,
"step": 34600
},
{
"epoch": 5.21,
"learning_rate": 4.7866586538461537e-05,
"loss": 3.2219,
"step": 34700
},
{
"epoch": 5.23,
"learning_rate": 4.7716346153846155e-05,
"loss": 3.2907,
"step": 34800
},
{
"epoch": 5.24,
"learning_rate": 4.7566105769230774e-05,
"loss": 3.3046,
"step": 34900
},
{
"epoch": 5.26,
"learning_rate": 4.7415865384615386e-05,
"loss": 3.2011,
"step": 35000
},
{
"epoch": 5.27,
"learning_rate": 4.7265625000000005e-05,
"loss": 3.2394,
"step": 35100
},
{
"epoch": 5.29,
"learning_rate": 4.711538461538462e-05,
"loss": 3.2004,
"step": 35200
},
{
"epoch": 5.3,
"learning_rate": 4.696514423076923e-05,
"loss": 3.2539,
"step": 35300
},
{
"epoch": 5.32,
"learning_rate": 4.681490384615385e-05,
"loss": 3.3304,
"step": 35400
},
{
"epoch": 5.33,
"learning_rate": 4.666466346153847e-05,
"loss": 3.2046,
"step": 35500
},
{
"epoch": 5.35,
"learning_rate": 4.651442307692308e-05,
"loss": 3.3181,
"step": 35600
},
{
"epoch": 5.36,
"learning_rate": 4.636418269230769e-05,
"loss": 3.1461,
"step": 35700
},
{
"epoch": 5.38,
"learning_rate": 4.621394230769231e-05,
"loss": 3.2141,
"step": 35800
},
{
"epoch": 5.39,
"learning_rate": 4.606370192307692e-05,
"loss": 3.2212,
"step": 35900
},
{
"epoch": 5.41,
"learning_rate": 4.591346153846154e-05,
"loss": 3.2862,
"step": 36000
},
{
"epoch": 5.42,
"learning_rate": 4.576322115384616e-05,
"loss": 3.1912,
"step": 36100
},
{
"epoch": 5.44,
"learning_rate": 4.561298076923077e-05,
"loss": 3.3004,
"step": 36200
},
{
"epoch": 5.45,
"learning_rate": 4.5462740384615385e-05,
"loss": 3.1604,
"step": 36300
},
{
"epoch": 5.47,
"learning_rate": 4.5312500000000004e-05,
"loss": 3.1868,
"step": 36400
},
{
"epoch": 5.48,
"learning_rate": 4.5162259615384616e-05,
"loss": 3.2882,
"step": 36500
},
{
"epoch": 5.5,
"learning_rate": 4.501201923076923e-05,
"loss": 3.2211,
"step": 36600
},
{
"epoch": 5.51,
"learning_rate": 4.486177884615385e-05,
"loss": 3.2298,
"step": 36700
},
{
"epoch": 5.53,
"learning_rate": 4.4711538461538466e-05,
"loss": 3.2108,
"step": 36800
},
{
"epoch": 5.54,
"learning_rate": 4.456129807692308e-05,
"loss": 3.2887,
"step": 36900
},
{
"epoch": 5.56,
"learning_rate": 4.44110576923077e-05,
"loss": 3.1887,
"step": 37000
},
{
"epoch": 5.57,
"learning_rate": 4.426081730769231e-05,
"loss": 3.3018,
"step": 37100
},
{
"epoch": 5.59,
"learning_rate": 4.411057692307692e-05,
"loss": 3.2316,
"step": 37200
},
{
"epoch": 5.6,
"learning_rate": 4.396033653846154e-05,
"loss": 3.3247,
"step": 37300
},
{
"epoch": 5.62,
"learning_rate": 4.381009615384616e-05,
"loss": 3.2524,
"step": 37400
},
{
"epoch": 5.63,
"learning_rate": 4.365985576923077e-05,
"loss": 3.4009,
"step": 37500
},
{
"epoch": 5.65,
"learning_rate": 4.350961538461539e-05,
"loss": 3.1606,
"step": 37600
},
{
"epoch": 5.66,
"learning_rate": 4.3359375e-05,
"loss": 3.3663,
"step": 37700
},
{
"epoch": 5.68,
"learning_rate": 4.3209134615384614e-05,
"loss": 3.2013,
"step": 37800
},
{
"epoch": 5.69,
"learning_rate": 4.305889423076923e-05,
"loss": 3.2427,
"step": 37900
},
{
"epoch": 5.71,
"learning_rate": 4.2908653846153845e-05,
"loss": 3.2398,
"step": 38000
},
{
"epoch": 5.72,
"learning_rate": 4.2758413461538464e-05,
"loss": 3.293,
"step": 38100
},
{
"epoch": 5.74,
"learning_rate": 4.260817307692308e-05,
"loss": 3.2233,
"step": 38200
},
{
"epoch": 5.75,
"learning_rate": 4.2457932692307695e-05,
"loss": 3.2299,
"step": 38300
},
{
"epoch": 5.77,
"learning_rate": 4.230769230769231e-05,
"loss": 3.1866,
"step": 38400
},
{
"epoch": 5.78,
"learning_rate": 4.2157451923076926e-05,
"loss": 3.2959,
"step": 38500
},
{
"epoch": 5.8,
"learning_rate": 4.200721153846154e-05,
"loss": 3.2326,
"step": 38600
},
{
"epoch": 5.81,
"learning_rate": 4.185697115384616e-05,
"loss": 3.2325,
"step": 38700
},
{
"epoch": 5.83,
"learning_rate": 4.1706730769230776e-05,
"loss": 3.312,
"step": 38800
},
{
"epoch": 5.84,
"learning_rate": 4.155649038461539e-05,
"loss": 3.2872,
"step": 38900
},
{
"epoch": 5.86,
"learning_rate": 4.140625e-05,
"loss": 3.3211,
"step": 39000
},
{
"epoch": 5.87,
"learning_rate": 4.125600961538462e-05,
"loss": 3.267,
"step": 39100
},
{
"epoch": 5.89,
"learning_rate": 4.110576923076923e-05,
"loss": 3.2381,
"step": 39200
},
{
"epoch": 5.9,
"learning_rate": 4.095552884615384e-05,
"loss": 3.1223,
"step": 39300
},
{
"epoch": 5.92,
"learning_rate": 4.080528846153847e-05,
"loss": 3.1428,
"step": 39400
},
{
"epoch": 5.93,
"learning_rate": 4.065504807692308e-05,
"loss": 3.3539,
"step": 39500
},
{
"epoch": 5.95,
"learning_rate": 4.050480769230769e-05,
"loss": 3.1743,
"step": 39600
},
{
"epoch": 5.96,
"learning_rate": 4.035456730769231e-05,
"loss": 3.1944,
"step": 39700
},
{
"epoch": 5.98,
"learning_rate": 4.0204326923076924e-05,
"loss": 3.2237,
"step": 39800
},
{
"epoch": 5.99,
"learning_rate": 4.0054086538461536e-05,
"loss": 3.3603,
"step": 39900
},
{
"epoch": 6.01,
"learning_rate": 3.9903846153846155e-05,
"loss": 3.1751,
"step": 40000
},
{
"epoch": 6.02,
"learning_rate": 3.9753605769230774e-05,
"loss": 3.1382,
"step": 40100
},
{
"epoch": 6.04,
"learning_rate": 3.9603365384615386e-05,
"loss": 3.2931,
"step": 40200
},
{
"epoch": 6.05,
"learning_rate": 3.9453125000000005e-05,
"loss": 3.1821,
"step": 40300
},
{
"epoch": 6.07,
"learning_rate": 3.930288461538462e-05,
"loss": 3.159,
"step": 40400
},
{
"epoch": 6.08,
"learning_rate": 3.915264423076923e-05,
"loss": 3.1653,
"step": 40500
},
{
"epoch": 6.1,
"learning_rate": 3.900240384615385e-05,
"loss": 3.2043,
"step": 40600
},
{
"epoch": 6.11,
"learning_rate": 3.885216346153847e-05,
"loss": 3.1382,
"step": 40700
},
{
"epoch": 6.13,
"learning_rate": 3.870192307692308e-05,
"loss": 3.1522,
"step": 40800
},
{
"epoch": 6.14,
"learning_rate": 3.855168269230769e-05,
"loss": 3.1957,
"step": 40900
},
{
"epoch": 6.16,
"learning_rate": 3.840144230769231e-05,
"loss": 3.1041,
"step": 41000
},
{
"epoch": 6.17,
"learning_rate": 3.825120192307692e-05,
"loss": 3.1838,
"step": 41100
},
{
"epoch": 6.19,
"learning_rate": 3.810096153846154e-05,
"loss": 3.1468,
"step": 41200
},
{
"epoch": 6.2,
"learning_rate": 3.795072115384616e-05,
"loss": 3.1075,
"step": 41300
},
{
"epoch": 6.22,
"learning_rate": 3.780048076923077e-05,
"loss": 3.1594,
"step": 41400
},
{
"epoch": 6.23,
"learning_rate": 3.7650240384615385e-05,
"loss": 3.1844,
"step": 41500
},
{
"epoch": 6.25,
"learning_rate": 3.7500000000000003e-05,
"loss": 3.3313,
"step": 41600
},
{
"epoch": 6.27,
"learning_rate": 3.7349759615384616e-05,
"loss": 3.1786,
"step": 41700
},
{
"epoch": 6.28,
"learning_rate": 3.719951923076923e-05,
"loss": 3.2375,
"step": 41800
},
{
"epoch": 6.3,
"learning_rate": 3.704927884615385e-05,
"loss": 3.1206,
"step": 41900
},
{
"epoch": 6.31,
"learning_rate": 3.6899038461538466e-05,
"loss": 3.1919,
"step": 42000
},
{
"epoch": 6.33,
"learning_rate": 3.674879807692308e-05,
"loss": 3.2956,
"step": 42100
},
{
"epoch": 6.34,
"learning_rate": 3.6598557692307697e-05,
"loss": 3.2287,
"step": 42200
},
{
"epoch": 6.36,
"learning_rate": 3.644831730769231e-05,
"loss": 3.1453,
"step": 42300
},
{
"epoch": 6.37,
"learning_rate": 3.629807692307692e-05,
"loss": 3.1535,
"step": 42400
},
{
"epoch": 6.39,
"learning_rate": 3.614783653846154e-05,
"loss": 3.165,
"step": 42500
},
{
"epoch": 6.4,
"learning_rate": 3.599759615384616e-05,
"loss": 3.1838,
"step": 42600
},
{
"epoch": 6.42,
"learning_rate": 3.584735576923077e-05,
"loss": 3.1737,
"step": 42700
},
{
"epoch": 6.43,
"learning_rate": 3.569711538461539e-05,
"loss": 3.1649,
"step": 42800
},
{
"epoch": 6.45,
"learning_rate": 3.5546875e-05,
"loss": 3.103,
"step": 42900
},
{
"epoch": 6.46,
"learning_rate": 3.5396634615384614e-05,
"loss": 3.1786,
"step": 43000
},
{
"epoch": 6.48,
"learning_rate": 3.524639423076923e-05,
"loss": 3.2883,
"step": 43100
},
{
"epoch": 6.49,
"learning_rate": 3.5096153846153845e-05,
"loss": 3.2017,
"step": 43200
},
{
"epoch": 6.51,
"learning_rate": 3.4945913461538464e-05,
"loss": 3.2765,
"step": 43300
},
{
"epoch": 6.52,
"learning_rate": 3.479567307692308e-05,
"loss": 3.1713,
"step": 43400
},
{
"epoch": 6.54,
"learning_rate": 3.4645432692307695e-05,
"loss": 3.1993,
"step": 43500
},
{
"epoch": 6.55,
"learning_rate": 3.449519230769231e-05,
"loss": 3.2256,
"step": 43600
},
{
"epoch": 6.57,
"learning_rate": 3.4344951923076926e-05,
"loss": 3.2242,
"step": 43700
},
{
"epoch": 6.58,
"learning_rate": 3.419471153846154e-05,
"loss": 3.256,
"step": 43800
},
{
"epoch": 6.6,
"learning_rate": 3.404447115384616e-05,
"loss": 3.1709,
"step": 43900
},
{
"epoch": 6.61,
"learning_rate": 3.3894230769230776e-05,
"loss": 3.1652,
"step": 44000
},
{
"epoch": 6.63,
"learning_rate": 3.374399038461539e-05,
"loss": 3.0949,
"step": 44100
},
{
"epoch": 6.64,
"learning_rate": 3.359375e-05,
"loss": 3.2357,
"step": 44200
},
{
"epoch": 6.66,
"learning_rate": 3.344350961538462e-05,
"loss": 3.1377,
"step": 44300
},
{
"epoch": 6.67,
"learning_rate": 3.329326923076923e-05,
"loss": 3.1905,
"step": 44400
},
{
"epoch": 6.69,
"learning_rate": 3.314302884615384e-05,
"loss": 3.1792,
"step": 44500
},
{
"epoch": 6.7,
"learning_rate": 3.299278846153846e-05,
"loss": 3.1817,
"step": 44600
},
{
"epoch": 6.72,
"learning_rate": 3.284254807692308e-05,
"loss": 3.1305,
"step": 44700
},
{
"epoch": 6.73,
"learning_rate": 3.269230769230769e-05,
"loss": 3.1406,
"step": 44800
},
{
"epoch": 6.75,
"learning_rate": 3.254206730769231e-05,
"loss": 3.2035,
"step": 44900
},
{
"epoch": 6.76,
"learning_rate": 3.2391826923076924e-05,
"loss": 3.115,
"step": 45000
},
{
"epoch": 6.78,
"learning_rate": 3.2241586538461536e-05,
"loss": 3.1376,
"step": 45100
},
{
"epoch": 6.79,
"learning_rate": 3.2091346153846155e-05,
"loss": 3.1234,
"step": 45200
},
{
"epoch": 6.81,
"learning_rate": 3.1941105769230774e-05,
"loss": 3.1036,
"step": 45300
},
{
"epoch": 6.82,
"learning_rate": 3.1790865384615386e-05,
"loss": 3.2273,
"step": 45400
},
{
"epoch": 6.84,
"learning_rate": 3.1640625e-05,
"loss": 3.1999,
"step": 45500
},
{
"epoch": 6.85,
"learning_rate": 3.149038461538462e-05,
"loss": 3.1843,
"step": 45600
},
{
"epoch": 6.87,
"learning_rate": 3.134014423076923e-05,
"loss": 3.1522,
"step": 45700
},
{
"epoch": 6.88,
"learning_rate": 3.118990384615385e-05,
"loss": 3.1722,
"step": 45800
},
{
"epoch": 6.9,
"learning_rate": 3.103966346153847e-05,
"loss": 3.2638,
"step": 45900
},
{
"epoch": 6.91,
"learning_rate": 3.088942307692308e-05,
"loss": 3.1483,
"step": 46000
},
{
"epoch": 6.93,
"learning_rate": 3.073918269230769e-05,
"loss": 3.2081,
"step": 46100
},
{
"epoch": 6.94,
"learning_rate": 3.058894230769231e-05,
"loss": 3.2939,
"step": 46200
},
{
"epoch": 6.96,
"learning_rate": 3.0438701923076922e-05,
"loss": 3.0819,
"step": 46300
},
{
"epoch": 6.97,
"learning_rate": 3.0288461538461538e-05,
"loss": 3.1972,
"step": 46400
},
{
"epoch": 6.99,
"learning_rate": 3.0138221153846157e-05,
"loss": 3.1313,
"step": 46500
},
{
"epoch": 7.0,
"learning_rate": 2.9987980769230772e-05,
"loss": 3.2346,
"step": 46600
},
{
"epoch": 7.02,
"learning_rate": 2.9837740384615388e-05,
"loss": 3.0856,
"step": 46700
},
{
"epoch": 7.03,
"learning_rate": 2.96875e-05,
"loss": 3.1688,
"step": 46800
},
{
"epoch": 7.05,
"learning_rate": 2.9537259615384615e-05,
"loss": 3.0855,
"step": 46900
},
{
"epoch": 7.06,
"learning_rate": 2.938701923076923e-05,
"loss": 3.107,
"step": 47000
},
{
"epoch": 7.08,
"learning_rate": 2.9236778846153846e-05,
"loss": 3.1544,
"step": 47100
},
{
"epoch": 7.09,
"learning_rate": 2.9086538461538465e-05,
"loss": 3.1732,
"step": 47200
},
{
"epoch": 7.11,
"learning_rate": 2.893629807692308e-05,
"loss": 3.1477,
"step": 47300
},
{
"epoch": 7.12,
"learning_rate": 2.8786057692307693e-05,
"loss": 3.137,
"step": 47400
},
{
"epoch": 7.14,
"learning_rate": 2.863581730769231e-05,
"loss": 3.1548,
"step": 47500
},
{
"epoch": 7.15,
"learning_rate": 2.8485576923076924e-05,
"loss": 3.2163,
"step": 47600
},
{
"epoch": 7.17,
"learning_rate": 2.8335336538461536e-05,
"loss": 3.1692,
"step": 47700
},
{
"epoch": 7.18,
"learning_rate": 2.818509615384616e-05,
"loss": 3.1894,
"step": 47800
},
{
"epoch": 7.2,
"learning_rate": 2.8034855769230774e-05,
"loss": 3.1581,
"step": 47900
},
{
"epoch": 7.21,
"learning_rate": 2.7884615384615386e-05,
"loss": 3.1314,
"step": 48000
},
{
"epoch": 7.23,
"learning_rate": 2.7734375e-05,
"loss": 3.1914,
"step": 48100
},
{
"epoch": 7.24,
"learning_rate": 2.7584134615384617e-05,
"loss": 3.193,
"step": 48200
},
{
"epoch": 7.26,
"learning_rate": 2.743389423076923e-05,
"loss": 3.1188,
"step": 48300
},
{
"epoch": 7.27,
"learning_rate": 2.7283653846153845e-05,
"loss": 3.1413,
"step": 48400
},
{
"epoch": 7.29,
"learning_rate": 2.7133413461538464e-05,
"loss": 3.1699,
"step": 48500
},
{
"epoch": 7.3,
"learning_rate": 2.698317307692308e-05,
"loss": 3.1479,
"step": 48600
},
{
"epoch": 7.32,
"learning_rate": 2.6832932692307695e-05,
"loss": 3.1354,
"step": 48700
},
{
"epoch": 7.33,
"learning_rate": 2.668269230769231e-05,
"loss": 3.1386,
"step": 48800
},
{
"epoch": 7.35,
"learning_rate": 2.6532451923076922e-05,
"loss": 3.1308,
"step": 48900
},
{
"epoch": 7.36,
"learning_rate": 2.6382211538461538e-05,
"loss": 3.1955,
"step": 49000
},
{
"epoch": 7.38,
"learning_rate": 2.6231971153846157e-05,
"loss": 3.0473,
"step": 49100
},
{
"epoch": 7.39,
"learning_rate": 2.6081730769230772e-05,
"loss": 3.1117,
"step": 49200
},
{
"epoch": 7.41,
"learning_rate": 2.5931490384615388e-05,
"loss": 3.127,
"step": 49300
},
{
"epoch": 7.42,
"learning_rate": 2.578125e-05,
"loss": 3.1794,
"step": 49400
},
{
"epoch": 7.44,
"learning_rate": 2.5631009615384615e-05,
"loss": 3.1297,
"step": 49500
},
{
"epoch": 7.45,
"learning_rate": 2.548076923076923e-05,
"loss": 3.204,
"step": 49600
},
{
"epoch": 7.47,
"learning_rate": 2.5330528846153846e-05,
"loss": 3.1469,
"step": 49700
},
{
"epoch": 7.48,
"learning_rate": 2.5180288461538465e-05,
"loss": 3.1524,
"step": 49800
},
{
"epoch": 7.5,
"learning_rate": 2.503004807692308e-05,
"loss": 3.1529,
"step": 49900
},
{
"epoch": 7.51,
"learning_rate": 2.4879807692307693e-05,
"loss": 3.209,
"step": 50000
},
{
"epoch": 7.53,
"learning_rate": 2.472956730769231e-05,
"loss": 3.1885,
"step": 50100
},
{
"epoch": 7.54,
"learning_rate": 2.4579326923076924e-05,
"loss": 3.1388,
"step": 50200
},
{
"epoch": 7.56,
"learning_rate": 2.442908653846154e-05,
"loss": 3.163,
"step": 50300
},
{
"epoch": 7.57,
"learning_rate": 2.4278846153846155e-05,
"loss": 3.1048,
"step": 50400
},
{
"epoch": 7.59,
"learning_rate": 2.412860576923077e-05,
"loss": 3.1563,
"step": 50500
},
{
"epoch": 7.6,
"learning_rate": 2.3978365384615386e-05,
"loss": 3.0561,
"step": 50600
},
{
"epoch": 7.62,
"learning_rate": 2.3828125e-05,
"loss": 3.1133,
"step": 50700
},
{
"epoch": 7.63,
"learning_rate": 2.3677884615384617e-05,
"loss": 3.0987,
"step": 50800
},
{
"epoch": 7.65,
"learning_rate": 2.3527644230769233e-05,
"loss": 3.1504,
"step": 50900
},
{
"epoch": 7.66,
"learning_rate": 2.3377403846153848e-05,
"loss": 3.057,
"step": 51000
},
{
"epoch": 7.68,
"learning_rate": 2.3227163461538464e-05,
"loss": 3.1533,
"step": 51100
},
{
"epoch": 7.69,
"learning_rate": 2.307692307692308e-05,
"loss": 3.1509,
"step": 51200
},
{
"epoch": 7.71,
"learning_rate": 2.2926682692307695e-05,
"loss": 3.1098,
"step": 51300
},
{
"epoch": 7.72,
"learning_rate": 2.2776442307692307e-05,
"loss": 3.1403,
"step": 51400
},
{
"epoch": 7.74,
"learning_rate": 2.2626201923076922e-05,
"loss": 3.175,
"step": 51500
},
{
"epoch": 7.75,
"learning_rate": 2.247596153846154e-05,
"loss": 3.082,
"step": 51600
},
{
"epoch": 7.77,
"learning_rate": 2.2325721153846153e-05,
"loss": 3.1393,
"step": 51700
},
{
"epoch": 7.78,
"learning_rate": 2.217548076923077e-05,
"loss": 3.0962,
"step": 51800
},
{
"epoch": 7.8,
"learning_rate": 2.2025240384615388e-05,
"loss": 3.1229,
"step": 51900
},
{
"epoch": 7.81,
"learning_rate": 2.1875e-05,
"loss": 3.1898,
"step": 52000
},
{
"epoch": 7.83,
"learning_rate": 2.1724759615384615e-05,
"loss": 3.1181,
"step": 52100
},
{
"epoch": 7.84,
"learning_rate": 2.1574519230769234e-05,
"loss": 3.1294,
"step": 52200
},
{
"epoch": 7.86,
"learning_rate": 2.1424278846153846e-05,
"loss": 3.1118,
"step": 52300
},
{
"epoch": 7.87,
"learning_rate": 2.1274038461538462e-05,
"loss": 3.2093,
"step": 52400
},
{
"epoch": 7.89,
"learning_rate": 2.112379807692308e-05,
"loss": 3.0968,
"step": 52500
},
{
"epoch": 7.9,
"learning_rate": 2.0973557692307693e-05,
"loss": 3.1043,
"step": 52600
},
{
"epoch": 7.92,
"learning_rate": 2.082331730769231e-05,
"loss": 3.1901,
"step": 52700
},
{
"epoch": 7.93,
"learning_rate": 2.0673076923076924e-05,
"loss": 3.1718,
"step": 52800
},
{
"epoch": 7.95,
"learning_rate": 2.052283653846154e-05,
"loss": 3.1754,
"step": 52900
},
{
"epoch": 7.96,
"learning_rate": 2.0372596153846155e-05,
"loss": 3.1013,
"step": 53000
},
{
"epoch": 7.98,
"learning_rate": 2.022235576923077e-05,
"loss": 3.1273,
"step": 53100
},
{
"epoch": 7.99,
"learning_rate": 2.0072115384615386e-05,
"loss": 3.0944,
"step": 53200
},
{
"epoch": 8.01,
"learning_rate": 1.9921875e-05,
"loss": 3.1341,
"step": 53300
},
{
"epoch": 8.02,
"learning_rate": 1.9771634615384617e-05,
"loss": 3.1024,
"step": 53400
},
{
"epoch": 8.04,
"learning_rate": 1.9621394230769232e-05,
"loss": 3.2147,
"step": 53500
},
{
"epoch": 8.05,
"learning_rate": 1.9471153846153848e-05,
"loss": 3.1102,
"step": 53600
},
{
"epoch": 8.07,
"learning_rate": 1.9320913461538463e-05,
"loss": 3.2247,
"step": 53700
},
{
"epoch": 8.08,
"learning_rate": 1.917067307692308e-05,
"loss": 3.1304,
"step": 53800
},
{
"epoch": 8.1,
"learning_rate": 1.9020432692307695e-05,
"loss": 3.1136,
"step": 53900
},
{
"epoch": 8.11,
"learning_rate": 1.8870192307692307e-05,
"loss": 3.1116,
"step": 54000
},
{
"epoch": 8.13,
"learning_rate": 1.8719951923076922e-05,
"loss": 3.0719,
"step": 54100
},
{
"epoch": 8.14,
"learning_rate": 1.856971153846154e-05,
"loss": 2.9952,
"step": 54200
},
{
"epoch": 8.16,
"learning_rate": 1.8419471153846153e-05,
"loss": 3.1379,
"step": 54300
},
{
"epoch": 8.17,
"learning_rate": 1.826923076923077e-05,
"loss": 3.0153,
"step": 54400
},
{
"epoch": 8.19,
"learning_rate": 1.8118990384615388e-05,
"loss": 3.0276,
"step": 54500
},
{
"epoch": 8.2,
"learning_rate": 1.796875e-05,
"loss": 3.1095,
"step": 54600
},
{
"epoch": 8.22,
"learning_rate": 1.7818509615384615e-05,
"loss": 3.173,
"step": 54700
},
{
"epoch": 8.23,
"learning_rate": 1.7668269230769234e-05,
"loss": 3.0593,
"step": 54800
},
{
"epoch": 8.25,
"learning_rate": 1.7518028846153846e-05,
"loss": 3.2008,
"step": 54900
},
{
"epoch": 8.26,
"learning_rate": 1.7367788461538462e-05,
"loss": 3.0976,
"step": 55000
},
{
"epoch": 8.28,
"learning_rate": 1.721754807692308e-05,
"loss": 3.0239,
"step": 55100
},
{
"epoch": 8.29,
"learning_rate": 1.7067307692307693e-05,
"loss": 2.9954,
"step": 55200
},
{
"epoch": 8.31,
"learning_rate": 1.6917067307692308e-05,
"loss": 3.0959,
"step": 55300
},
{
"epoch": 8.32,
"learning_rate": 1.6766826923076924e-05,
"loss": 3.1744,
"step": 55400
},
{
"epoch": 8.34,
"learning_rate": 1.661658653846154e-05,
"loss": 3.1568,
"step": 55500
},
{
"epoch": 8.35,
"learning_rate": 1.6466346153846155e-05,
"loss": 3.2058,
"step": 55600
},
{
"epoch": 8.37,
"learning_rate": 1.631610576923077e-05,
"loss": 3.2486,
"step": 55700
},
{
"epoch": 8.38,
"learning_rate": 1.6165865384615386e-05,
"loss": 3.0785,
"step": 55800
},
{
"epoch": 8.4,
"learning_rate": 1.6015625e-05,
"loss": 3.1703,
"step": 55900
},
{
"epoch": 8.41,
"learning_rate": 1.5865384615384617e-05,
"loss": 3.0091,
"step": 56000
},
{
"epoch": 8.43,
"learning_rate": 1.5715144230769232e-05,
"loss": 2.9814,
"step": 56100
},
{
"epoch": 8.44,
"learning_rate": 1.5564903846153848e-05,
"loss": 3.1474,
"step": 56200
},
{
"epoch": 8.46,
"learning_rate": 1.541466346153846e-05,
"loss": 3.0801,
"step": 56300
},
{
"epoch": 8.47,
"learning_rate": 1.526442307692308e-05,
"loss": 3.0667,
"step": 56400
},
{
"epoch": 8.49,
"learning_rate": 1.5114182692307693e-05,
"loss": 3.1068,
"step": 56500
},
{
"epoch": 8.5,
"learning_rate": 1.4963942307692308e-05,
"loss": 3.1035,
"step": 56600
},
{
"epoch": 8.52,
"learning_rate": 1.4813701923076922e-05,
"loss": 3.1616,
"step": 56700
},
{
"epoch": 8.53,
"learning_rate": 1.466346153846154e-05,
"loss": 3.2153,
"step": 56800
},
{
"epoch": 8.55,
"learning_rate": 1.4513221153846155e-05,
"loss": 3.1312,
"step": 56900
},
{
"epoch": 8.56,
"learning_rate": 1.4362980769230769e-05,
"loss": 3.1062,
"step": 57000
},
{
"epoch": 8.58,
"learning_rate": 1.4212740384615386e-05,
"loss": 3.092,
"step": 57100
},
{
"epoch": 8.59,
"learning_rate": 1.4062500000000001e-05,
"loss": 3.1349,
"step": 57200
},
{
"epoch": 8.61,
"learning_rate": 1.3912259615384615e-05,
"loss": 3.1018,
"step": 57300
},
{
"epoch": 8.62,
"learning_rate": 1.3762019230769232e-05,
"loss": 3.078,
"step": 57400
},
{
"epoch": 8.64,
"learning_rate": 1.3611778846153848e-05,
"loss": 3.0693,
"step": 57500
},
{
"epoch": 8.65,
"learning_rate": 1.3461538461538462e-05,
"loss": 3.0487,
"step": 57600
},
{
"epoch": 8.67,
"learning_rate": 1.3311298076923079e-05,
"loss": 3.0893,
"step": 57700
},
{
"epoch": 8.68,
"learning_rate": 1.3161057692307693e-05,
"loss": 3.0998,
"step": 57800
},
{
"epoch": 8.7,
"learning_rate": 1.3010817307692308e-05,
"loss": 3.0287,
"step": 57900
},
{
"epoch": 8.71,
"learning_rate": 1.2860576923076922e-05,
"loss": 3.0558,
"step": 58000
},
{
"epoch": 8.73,
"learning_rate": 1.271033653846154e-05,
"loss": 3.1865,
"step": 58100
},
{
"epoch": 8.74,
"learning_rate": 1.2560096153846155e-05,
"loss": 3.1941,
"step": 58200
},
{
"epoch": 8.76,
"learning_rate": 1.240985576923077e-05,
"loss": 3.1259,
"step": 58300
},
{
"epoch": 8.77,
"learning_rate": 1.2259615384615384e-05,
"loss": 3.0836,
"step": 58400
},
{
"epoch": 8.79,
"learning_rate": 1.2109375000000001e-05,
"loss": 3.2193,
"step": 58500
},
{
"epoch": 8.8,
"learning_rate": 1.1959134615384617e-05,
"loss": 3.091,
"step": 58600
},
{
"epoch": 8.82,
"learning_rate": 1.180889423076923e-05,
"loss": 3.1139,
"step": 58700
},
{
"epoch": 8.83,
"learning_rate": 1.1658653846153846e-05,
"loss": 3.1585,
"step": 58800
},
{
"epoch": 8.85,
"learning_rate": 1.1508413461538462e-05,
"loss": 3.1098,
"step": 58900
},
{
"epoch": 8.86,
"learning_rate": 1.1358173076923077e-05,
"loss": 3.0185,
"step": 59000
},
{
"epoch": 8.88,
"learning_rate": 1.1207932692307693e-05,
"loss": 3.1064,
"step": 59100
},
{
"epoch": 8.89,
"learning_rate": 1.1057692307692308e-05,
"loss": 3.1722,
"step": 59200
},
{
"epoch": 8.91,
"learning_rate": 1.0907451923076924e-05,
"loss": 3.107,
"step": 59300
},
{
"epoch": 8.92,
"learning_rate": 1.075721153846154e-05,
"loss": 3.0859,
"step": 59400
},
{
"epoch": 8.94,
"learning_rate": 1.0606971153846155e-05,
"loss": 3.1676,
"step": 59500
},
{
"epoch": 8.95,
"learning_rate": 1.045673076923077e-05,
"loss": 3.1134,
"step": 59600
},
{
"epoch": 8.97,
"learning_rate": 1.0306490384615384e-05,
"loss": 3.1622,
"step": 59700
},
{
"epoch": 8.98,
"learning_rate": 1.0156250000000001e-05,
"loss": 3.1357,
"step": 59800
},
{
"epoch": 9.0,
"learning_rate": 1.0006009615384617e-05,
"loss": 3.1807,
"step": 59900
},
{
"epoch": 9.01,
"learning_rate": 9.85576923076923e-06,
"loss": 3.1431,
"step": 60000
},
{
"epoch": 9.03,
"learning_rate": 9.705528846153846e-06,
"loss": 3.2043,
"step": 60100
},
{
"epoch": 9.04,
"learning_rate": 9.555288461538462e-06,
"loss": 3.0665,
"step": 60200
},
{
"epoch": 9.06,
"learning_rate": 9.405048076923077e-06,
"loss": 3.016,
"step": 60300
},
{
"epoch": 9.07,
"learning_rate": 9.254807692307693e-06,
"loss": 3.0571,
"step": 60400
},
{
"epoch": 9.09,
"learning_rate": 9.104567307692308e-06,
"loss": 3.1035,
"step": 60500
},
{
"epoch": 9.1,
"learning_rate": 8.954326923076924e-06,
"loss": 3.1543,
"step": 60600
},
{
"epoch": 9.12,
"learning_rate": 8.804086538461539e-06,
"loss": 3.1839,
"step": 60700
},
{
"epoch": 9.13,
"learning_rate": 8.653846153846155e-06,
"loss": 3.1786,
"step": 60800
},
{
"epoch": 9.15,
"learning_rate": 8.50360576923077e-06,
"loss": 3.0707,
"step": 60900
},
{
"epoch": 9.16,
"learning_rate": 8.353365384615384e-06,
"loss": 3.052,
"step": 61000
},
{
"epoch": 9.18,
"learning_rate": 8.203125000000001e-06,
"loss": 3.0002,
"step": 61100
},
{
"epoch": 9.19,
"learning_rate": 8.052884615384617e-06,
"loss": 3.0621,
"step": 61200
},
{
"epoch": 9.21,
"learning_rate": 7.90264423076923e-06,
"loss": 3.1472,
"step": 61300
},
{
"epoch": 9.22,
"learning_rate": 7.752403846153846e-06,
"loss": 3.2139,
"step": 61400
},
{
"epoch": 9.24,
"learning_rate": 7.6021634615384615e-06,
"loss": 3.1535,
"step": 61500
},
{
"epoch": 9.25,
"learning_rate": 7.451923076923077e-06,
"loss": 3.0481,
"step": 61600
},
{
"epoch": 9.27,
"learning_rate": 7.301682692307693e-06,
"loss": 3.1044,
"step": 61700
},
{
"epoch": 9.28,
"learning_rate": 7.151442307692307e-06,
"loss": 3.1502,
"step": 61800
},
{
"epoch": 9.3,
"learning_rate": 7.001201923076924e-06,
"loss": 3.16,
"step": 61900
},
{
"epoch": 9.31,
"learning_rate": 6.850961538461539e-06,
"loss": 3.1364,
"step": 62000
},
{
"epoch": 9.33,
"learning_rate": 6.700721153846154e-06,
"loss": 3.1023,
"step": 62100
},
{
"epoch": 9.34,
"learning_rate": 6.55048076923077e-06,
"loss": 3.0382,
"step": 62200
},
{
"epoch": 9.36,
"learning_rate": 6.400240384615385e-06,
"loss": 3.087,
"step": 62300
},
{
"epoch": 9.38,
"learning_rate": 6.25e-06,
"loss": 3.1023,
"step": 62400
},
{
"epoch": 9.39,
"learning_rate": 6.099759615384616e-06,
"loss": 3.1571,
"step": 62500
},
{
"epoch": 9.41,
"learning_rate": 5.9495192307692305e-06,
"loss": 3.0847,
"step": 62600
},
{
"epoch": 9.42,
"learning_rate": 5.799278846153847e-06,
"loss": 3.0513,
"step": 62700
},
{
"epoch": 9.44,
"learning_rate": 5.6490384615384615e-06,
"loss": 3.1568,
"step": 62800
},
{
"epoch": 9.45,
"learning_rate": 5.498798076923077e-06,
"loss": 3.1487,
"step": 62900
},
{
"epoch": 9.47,
"learning_rate": 5.3485576923076925e-06,
"loss": 3.1173,
"step": 63000
},
{
"epoch": 9.48,
"learning_rate": 5.198317307692308e-06,
"loss": 3.0394,
"step": 63100
},
{
"epoch": 9.5,
"learning_rate": 5.0480769230769235e-06,
"loss": 3.0607,
"step": 63200
},
{
"epoch": 9.51,
"learning_rate": 4.897836538461539e-06,
"loss": 3.0811,
"step": 63300
},
{
"epoch": 9.53,
"learning_rate": 4.747596153846154e-06,
"loss": 3.1086,
"step": 63400
},
{
"epoch": 9.54,
"learning_rate": 4.59735576923077e-06,
"loss": 3.0946,
"step": 63500
},
{
"epoch": 9.56,
"learning_rate": 4.447115384615385e-06,
"loss": 3.0247,
"step": 63600
},
{
"epoch": 9.57,
"learning_rate": 4.296875e-06,
"loss": 3.1085,
"step": 63700
},
{
"epoch": 9.59,
"learning_rate": 4.146634615384616e-06,
"loss": 3.1925,
"step": 63800
},
{
"epoch": 9.6,
"learning_rate": 3.9963942307692304e-06,
"loss": 3.1547,
"step": 63900
},
{
"epoch": 9.62,
"learning_rate": 3.846153846153847e-06,
"loss": 3.0974,
"step": 64000
},
{
"epoch": 9.63,
"learning_rate": 3.695913461538462e-06,
"loss": 3.0615,
"step": 64100
},
{
"epoch": 9.65,
"learning_rate": 3.545673076923077e-06,
"loss": 3.1745,
"step": 64200
},
{
"epoch": 9.66,
"learning_rate": 3.395432692307692e-06,
"loss": 3.0591,
"step": 64300
},
{
"epoch": 9.68,
"learning_rate": 3.245192307692308e-06,
"loss": 3.0274,
"step": 64400
},
{
"epoch": 9.69,
"learning_rate": 3.0949519230769235e-06,
"loss": 3.0092,
"step": 64500
}
],
"max_steps": 66560,
"num_train_epochs": 10,
"total_flos": 1.3030638807239885e+17,
"trial_name": null,
"trial_params": null
}