diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,16620 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 20.0, + "eval_steps": 500, + "global_step": 55300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 9.041591320072332e-10, + "loss": 8.7035, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 1.8083182640144665e-09, + "loss": 8.5842, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 2.7124773960216998e-09, + "loss": 8.6549, + "step": 60 + }, + { + "epoch": 0.03, + "learning_rate": 3.616636528028933e-09, + "loss": 8.6371, + "step": 80 + }, + { + "epoch": 0.04, + "learning_rate": 4.5207956600361664e-09, + "loss": 8.6676, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5.4249547920433996e-09, + "loss": 8.6693, + "step": 120 + }, + { + "epoch": 0.05, + "learning_rate": 6.329113924050633e-09, + "loss": 8.6823, + "step": 140 + }, + { + "epoch": 0.06, + "learning_rate": 7.233273056057866e-09, + "loss": 8.6305, + "step": 160 + }, + { + "epoch": 0.07, + "learning_rate": 8.1374321880651e-09, + "loss": 8.7476, + "step": 180 + }, + { + "epoch": 0.07, + "learning_rate": 9.041591320072333e-09, + "loss": 8.6453, + "step": 200 + }, + { + "epoch": 0.08, + "learning_rate": 9.945750452079566e-09, + "loss": 8.7242, + "step": 220 + }, + { + "epoch": 0.09, + "learning_rate": 1.0849909584086799e-08, + "loss": 8.8198, + "step": 240 + }, + { + "epoch": 0.09, + "learning_rate": 1.1754068716094032e-08, + "loss": 8.7031, + "step": 260 + }, + { + "epoch": 0.1, + "learning_rate": 1.2658227848101265e-08, + "loss": 8.6468, + "step": 280 + }, + { + "epoch": 0.11, + "learning_rate": 1.3562386980108499e-08, + "loss": 8.5893, + "step": 300 + }, + { + "epoch": 0.12, + "learning_rate": 1.4466546112115732e-08, + "loss": 8.6628, + "step": 320 + }, + { + "epoch": 0.12, + "learning_rate": 1.5370705244122963e-08, + "loss": 8.5913, + "step": 340 + }, + { + "epoch": 0.13, + "learning_rate": 1.62748643761302e-08, + "loss": 8.6978, + "step": 360 + }, + { + "epoch": 0.14, + "learning_rate": 1.7179023508137433e-08, + "loss": 8.6814, + "step": 380 + }, + { + "epoch": 0.14, + "learning_rate": 1.8083182640144666e-08, + "loss": 8.7502, + "step": 400 + }, + { + "epoch": 0.15, + "learning_rate": 1.89873417721519e-08, + "loss": 8.5715, + "step": 420 + }, + { + "epoch": 0.16, + "learning_rate": 1.9891500904159132e-08, + "loss": 8.7008, + "step": 440 + }, + { + "epoch": 0.17, + "learning_rate": 2.0795660036166365e-08, + "loss": 8.6715, + "step": 460 + }, + { + "epoch": 0.17, + "learning_rate": 2.1699819168173598e-08, + "loss": 8.5782, + "step": 480 + }, + { + "epoch": 0.18, + "learning_rate": 2.260397830018083e-08, + "loss": 8.6972, + "step": 500 + }, + { + "epoch": 0.19, + "learning_rate": 2.3508137432188065e-08, + "loss": 8.7056, + "step": 520 + }, + { + "epoch": 0.2, + "learning_rate": 2.4412296564195298e-08, + "loss": 8.7433, + "step": 540 + }, + { + "epoch": 0.2, + "learning_rate": 2.531645569620253e-08, + "loss": 8.6857, + "step": 560 + }, + { + "epoch": 0.21, + "learning_rate": 2.6220614828209764e-08, + "loss": 8.6377, + "step": 580 + }, + { + "epoch": 0.22, + "learning_rate": 2.7124773960216997e-08, + "loss": 8.6514, + "step": 600 + }, + { + "epoch": 0.22, + "learning_rate": 2.802893309222423e-08, + "loss": 8.668, + "step": 620 + }, + { + "epoch": 0.23, + "learning_rate": 2.8933092224231463e-08, + "loss": 8.7431, + "step": 640 + }, + { + "epoch": 0.24, + "learning_rate": 2.983725135623869e-08, + "loss": 8.7326, + "step": 660 + }, + { + "epoch": 0.25, + "learning_rate": 3.0741410488245926e-08, + "loss": 8.7252, + "step": 680 + }, + { + "epoch": 0.25, + "learning_rate": 3.1645569620253166e-08, + "loss": 8.6977, + "step": 700 + }, + { + "epoch": 0.26, + "learning_rate": 3.25497287522604e-08, + "loss": 8.7162, + "step": 720 + }, + { + "epoch": 0.27, + "learning_rate": 3.345388788426763e-08, + "loss": 8.6373, + "step": 740 + }, + { + "epoch": 0.27, + "learning_rate": 3.4358047016274865e-08, + "loss": 8.5484, + "step": 760 + }, + { + "epoch": 0.28, + "learning_rate": 3.52622061482821e-08, + "loss": 8.6158, + "step": 780 + }, + { + "epoch": 0.29, + "learning_rate": 3.616636528028933e-08, + "loss": 8.6236, + "step": 800 + }, + { + "epoch": 0.3, + "learning_rate": 3.7070524412296565e-08, + "loss": 8.6796, + "step": 820 + }, + { + "epoch": 0.3, + "learning_rate": 3.79746835443038e-08, + "loss": 8.6624, + "step": 840 + }, + { + "epoch": 0.31, + "learning_rate": 3.887884267631103e-08, + "loss": 8.6699, + "step": 860 + }, + { + "epoch": 0.32, + "learning_rate": 3.9783001808318264e-08, + "loss": 8.6308, + "step": 880 + }, + { + "epoch": 0.33, + "learning_rate": 4.06871609403255e-08, + "loss": 8.5808, + "step": 900 + }, + { + "epoch": 0.33, + "learning_rate": 4.159132007233273e-08, + "loss": 8.6027, + "step": 920 + }, + { + "epoch": 0.34, + "learning_rate": 4.2495479204339963e-08, + "loss": 8.7472, + "step": 940 + }, + { + "epoch": 0.35, + "learning_rate": 4.3399638336347197e-08, + "loss": 8.6128, + "step": 960 + }, + { + "epoch": 0.35, + "learning_rate": 4.430379746835443e-08, + "loss": 8.7902, + "step": 980 + }, + { + "epoch": 0.36, + "learning_rate": 4.520795660036166e-08, + "loss": 8.6797, + "step": 1000 + }, + { + "epoch": 0.37, + "learning_rate": 4.6112115732368896e-08, + "loss": 8.6378, + "step": 1020 + }, + { + "epoch": 0.38, + "learning_rate": 4.701627486437613e-08, + "loss": 8.6656, + "step": 1040 + }, + { + "epoch": 0.38, + "learning_rate": 4.792043399638336e-08, + "loss": 8.6539, + "step": 1060 + }, + { + "epoch": 0.39, + "learning_rate": 4.8824593128390595e-08, + "loss": 8.7486, + "step": 1080 + }, + { + "epoch": 0.4, + "learning_rate": 4.972875226039783e-08, + "loss": 8.6913, + "step": 1100 + }, + { + "epoch": 0.41, + "learning_rate": 5.063291139240506e-08, + "loss": 8.8054, + "step": 1120 + }, + { + "epoch": 0.41, + "learning_rate": 5.1537070524412295e-08, + "loss": 8.6303, + "step": 1140 + }, + { + "epoch": 0.42, + "learning_rate": 5.244122965641953e-08, + "loss": 8.6419, + "step": 1160 + }, + { + "epoch": 0.43, + "learning_rate": 5.334538878842676e-08, + "loss": 8.6868, + "step": 1180 + }, + { + "epoch": 0.43, + "learning_rate": 5.4249547920433994e-08, + "loss": 8.7265, + "step": 1200 + }, + { + "epoch": 0.44, + "learning_rate": 5.515370705244123e-08, + "loss": 8.6924, + "step": 1220 + }, + { + "epoch": 0.45, + "learning_rate": 5.605786618444846e-08, + "loss": 8.6787, + "step": 1240 + }, + { + "epoch": 0.46, + "learning_rate": 5.6962025316455693e-08, + "loss": 8.693, + "step": 1260 + }, + { + "epoch": 0.46, + "learning_rate": 5.7866184448462927e-08, + "loss": 8.5927, + "step": 1280 + }, + { + "epoch": 0.47, + "learning_rate": 5.877034358047016e-08, + "loss": 8.6922, + "step": 1300 + }, + { + "epoch": 0.48, + "learning_rate": 5.967450271247739e-08, + "loss": 8.9393, + "step": 1320 + }, + { + "epoch": 0.48, + "learning_rate": 6.057866184448463e-08, + "loss": 8.7923, + "step": 1340 + }, + { + "epoch": 0.49, + "learning_rate": 6.148282097649185e-08, + "loss": 8.7394, + "step": 1360 + }, + { + "epoch": 0.5, + "learning_rate": 6.238698010849909e-08, + "loss": 8.7055, + "step": 1380 + }, + { + "epoch": 0.51, + "learning_rate": 6.329113924050633e-08, + "loss": 8.7928, + "step": 1400 + }, + { + "epoch": 0.51, + "learning_rate": 6.419529837251356e-08, + "loss": 8.6412, + "step": 1420 + }, + { + "epoch": 0.52, + "learning_rate": 6.50994575045208e-08, + "loss": 8.6909, + "step": 1440 + }, + { + "epoch": 0.53, + "learning_rate": 6.600361663652802e-08, + "loss": 8.6415, + "step": 1460 + }, + { + "epoch": 0.54, + "learning_rate": 6.690777576853526e-08, + "loss": 8.7029, + "step": 1480 + }, + { + "epoch": 0.54, + "learning_rate": 6.781193490054249e-08, + "loss": 8.6085, + "step": 1500 + }, + { + "epoch": 0.55, + "learning_rate": 6.871609403254973e-08, + "loss": 8.6977, + "step": 1520 + }, + { + "epoch": 0.56, + "learning_rate": 6.962025316455696e-08, + "loss": 8.6144, + "step": 1540 + }, + { + "epoch": 0.56, + "learning_rate": 7.05244122965642e-08, + "loss": 8.647, + "step": 1560 + }, + { + "epoch": 0.57, + "learning_rate": 7.142857142857142e-08, + "loss": 8.701, + "step": 1580 + }, + { + "epoch": 0.58, + "learning_rate": 7.233273056057866e-08, + "loss": 8.6896, + "step": 1600 + }, + { + "epoch": 0.59, + "learning_rate": 7.323688969258589e-08, + "loss": 8.7102, + "step": 1620 + }, + { + "epoch": 0.59, + "learning_rate": 7.414104882459313e-08, + "loss": 8.7441, + "step": 1640 + }, + { + "epoch": 0.6, + "learning_rate": 7.504520795660036e-08, + "loss": 8.6805, + "step": 1660 + }, + { + "epoch": 0.61, + "learning_rate": 7.59493670886076e-08, + "loss": 8.7272, + "step": 1680 + }, + { + "epoch": 0.61, + "learning_rate": 7.685352622061482e-08, + "loss": 8.6238, + "step": 1700 + }, + { + "epoch": 0.62, + "learning_rate": 7.775768535262206e-08, + "loss": 8.8162, + "step": 1720 + }, + { + "epoch": 0.63, + "learning_rate": 7.866184448462929e-08, + "loss": 8.6112, + "step": 1740 + }, + { + "epoch": 0.64, + "learning_rate": 7.956600361663653e-08, + "loss": 8.6329, + "step": 1760 + }, + { + "epoch": 0.64, + "learning_rate": 8.047016274864375e-08, + "loss": 8.5605, + "step": 1780 + }, + { + "epoch": 0.65, + "learning_rate": 8.1374321880651e-08, + "loss": 8.6107, + "step": 1800 + }, + { + "epoch": 0.66, + "learning_rate": 8.227848101265822e-08, + "loss": 8.6376, + "step": 1820 + }, + { + "epoch": 0.67, + "learning_rate": 8.318264014466546e-08, + "loss": 8.6818, + "step": 1840 + }, + { + "epoch": 0.67, + "learning_rate": 8.408679927667269e-08, + "loss": 8.7487, + "step": 1860 + }, + { + "epoch": 0.68, + "learning_rate": 8.499095840867993e-08, + "loss": 8.6741, + "step": 1880 + }, + { + "epoch": 0.69, + "learning_rate": 8.589511754068715e-08, + "loss": 8.6679, + "step": 1900 + }, + { + "epoch": 0.69, + "learning_rate": 8.679927667269439e-08, + "loss": 8.6636, + "step": 1920 + }, + { + "epoch": 0.7, + "learning_rate": 8.770343580470162e-08, + "loss": 8.7159, + "step": 1940 + }, + { + "epoch": 0.71, + "learning_rate": 8.860759493670886e-08, + "loss": 8.5982, + "step": 1960 + }, + { + "epoch": 0.72, + "learning_rate": 8.951175406871609e-08, + "loss": 8.7386, + "step": 1980 + }, + { + "epoch": 0.72, + "learning_rate": 9.041591320072333e-08, + "loss": 8.6729, + "step": 2000 + }, + { + "epoch": 0.73, + "learning_rate": 9.132007233273057e-08, + "loss": 8.6625, + "step": 2020 + }, + { + "epoch": 0.74, + "learning_rate": 9.222423146473779e-08, + "loss": 8.6866, + "step": 2040 + }, + { + "epoch": 0.75, + "learning_rate": 9.312839059674503e-08, + "loss": 8.6284, + "step": 2060 + }, + { + "epoch": 0.75, + "learning_rate": 9.403254972875226e-08, + "loss": 8.6068, + "step": 2080 + }, + { + "epoch": 0.76, + "learning_rate": 9.49367088607595e-08, + "loss": 8.7023, + "step": 2100 + }, + { + "epoch": 0.77, + "learning_rate": 9.584086799276672e-08, + "loss": 8.6516, + "step": 2120 + }, + { + "epoch": 0.77, + "learning_rate": 9.674502712477396e-08, + "loss": 8.746, + "step": 2140 + }, + { + "epoch": 0.78, + "learning_rate": 9.764918625678119e-08, + "loss": 8.5947, + "step": 2160 + }, + { + "epoch": 0.79, + "learning_rate": 9.855334538878843e-08, + "loss": 8.7309, + "step": 2180 + }, + { + "epoch": 0.8, + "learning_rate": 9.945750452079566e-08, + "loss": 8.7407, + "step": 2200 + }, + { + "epoch": 0.8, + "learning_rate": 1.003616636528029e-07, + "loss": 8.7647, + "step": 2220 + }, + { + "epoch": 0.81, + "learning_rate": 1.0126582278481012e-07, + "loss": 8.6067, + "step": 2240 + }, + { + "epoch": 0.82, + "learning_rate": 1.0216998191681736e-07, + "loss": 8.6241, + "step": 2260 + }, + { + "epoch": 0.82, + "learning_rate": 1.0307414104882459e-07, + "loss": 8.6616, + "step": 2280 + }, + { + "epoch": 0.83, + "learning_rate": 1.0397830018083183e-07, + "loss": 8.7636, + "step": 2300 + }, + { + "epoch": 0.84, + "learning_rate": 1.0488245931283906e-07, + "loss": 8.6465, + "step": 2320 + }, + { + "epoch": 0.85, + "learning_rate": 1.057866184448463e-07, + "loss": 8.6704, + "step": 2340 + }, + { + "epoch": 0.85, + "learning_rate": 1.0669077757685352e-07, + "loss": 8.5897, + "step": 2360 + }, + { + "epoch": 0.86, + "learning_rate": 1.0759493670886076e-07, + "loss": 8.664, + "step": 2380 + }, + { + "epoch": 0.87, + "learning_rate": 1.0849909584086799e-07, + "loss": 8.7798, + "step": 2400 + }, + { + "epoch": 0.88, + "learning_rate": 1.0940325497287523e-07, + "loss": 8.6508, + "step": 2420 + }, + { + "epoch": 0.88, + "learning_rate": 1.1030741410488245e-07, + "loss": 8.7532, + "step": 2440 + }, + { + "epoch": 0.89, + "learning_rate": 1.112115732368897e-07, + "loss": 8.6628, + "step": 2460 + }, + { + "epoch": 0.9, + "learning_rate": 1.1211573236889692e-07, + "loss": 8.6175, + "step": 2480 + }, + { + "epoch": 0.9, + "learning_rate": 1.1301989150090416e-07, + "loss": 8.7609, + "step": 2500 + }, + { + "epoch": 0.91, + "learning_rate": 1.1392405063291139e-07, + "loss": 8.7609, + "step": 2520 + }, + { + "epoch": 0.92, + "learning_rate": 1.1482820976491863e-07, + "loss": 8.5844, + "step": 2540 + }, + { + "epoch": 0.93, + "learning_rate": 1.1573236889692585e-07, + "loss": 8.6872, + "step": 2560 + }, + { + "epoch": 0.93, + "learning_rate": 1.1663652802893309e-07, + "loss": 8.7795, + "step": 2580 + }, + { + "epoch": 0.94, + "learning_rate": 1.1754068716094032e-07, + "loss": 8.7157, + "step": 2600 + }, + { + "epoch": 0.95, + "learning_rate": 1.1844484629294756e-07, + "loss": 8.767, + "step": 2620 + }, + { + "epoch": 0.95, + "learning_rate": 1.1934900542495477e-07, + "loss": 8.6297, + "step": 2640 + }, + { + "epoch": 0.96, + "learning_rate": 1.20253164556962e-07, + "loss": 8.6604, + "step": 2660 + }, + { + "epoch": 0.97, + "learning_rate": 1.2115732368896925e-07, + "loss": 8.6753, + "step": 2680 + }, + { + "epoch": 0.98, + "learning_rate": 1.220614828209765e-07, + "loss": 8.8354, + "step": 2700 + }, + { + "epoch": 0.98, + "learning_rate": 1.229656419529837e-07, + "loss": 8.6935, + "step": 2720 + }, + { + "epoch": 0.99, + "learning_rate": 1.2386980108499094e-07, + "loss": 8.6787, + "step": 2740 + }, + { + "epoch": 1.0, + "learning_rate": 1.2477396021699818e-07, + "loss": 8.6191, + "step": 2760 + }, + { + "epoch": 1.01, + "learning_rate": 1.256781193490054e-07, + "loss": 8.6495, + "step": 2780 + }, + { + "epoch": 1.01, + "learning_rate": 1.2658227848101266e-07, + "loss": 8.7641, + "step": 2800 + }, + { + "epoch": 1.02, + "learning_rate": 1.2748643761301988e-07, + "loss": 8.7237, + "step": 2820 + }, + { + "epoch": 1.03, + "learning_rate": 1.2839059674502712e-07, + "loss": 8.6795, + "step": 2840 + }, + { + "epoch": 1.03, + "learning_rate": 1.2929475587703433e-07, + "loss": 8.6644, + "step": 2860 + }, + { + "epoch": 1.04, + "learning_rate": 1.301989150090416e-07, + "loss": 8.5847, + "step": 2880 + }, + { + "epoch": 1.05, + "learning_rate": 1.311030741410488e-07, + "loss": 8.5999, + "step": 2900 + }, + { + "epoch": 1.06, + "learning_rate": 1.3200723327305605e-07, + "loss": 8.6626, + "step": 2920 + }, + { + "epoch": 1.06, + "learning_rate": 1.3291139240506326e-07, + "loss": 8.7718, + "step": 2940 + }, + { + "epoch": 1.07, + "learning_rate": 1.3381555153707053e-07, + "loss": 8.5763, + "step": 2960 + }, + { + "epoch": 1.08, + "learning_rate": 1.3471971066907774e-07, + "loss": 8.6696, + "step": 2980 + }, + { + "epoch": 1.08, + "learning_rate": 1.3562386980108498e-07, + "loss": 8.6623, + "step": 3000 + }, + { + "epoch": 1.09, + "learning_rate": 1.3652802893309222e-07, + "loss": 8.6512, + "step": 3020 + }, + { + "epoch": 1.1, + "learning_rate": 1.3743218806509946e-07, + "loss": 8.6001, + "step": 3040 + }, + { + "epoch": 1.11, + "learning_rate": 1.3833634719710667e-07, + "loss": 8.6426, + "step": 3060 + }, + { + "epoch": 1.11, + "learning_rate": 1.3924050632911391e-07, + "loss": 8.7206, + "step": 3080 + }, + { + "epoch": 1.12, + "learning_rate": 1.4014466546112115e-07, + "loss": 8.6603, + "step": 3100 + }, + { + "epoch": 1.13, + "learning_rate": 1.410488245931284e-07, + "loss": 8.6396, + "step": 3120 + }, + { + "epoch": 1.14, + "learning_rate": 1.419529837251356e-07, + "loss": 8.7162, + "step": 3140 + }, + { + "epoch": 1.14, + "learning_rate": 1.4285714285714285e-07, + "loss": 8.6815, + "step": 3160 + }, + { + "epoch": 1.15, + "learning_rate": 1.4376130198915009e-07, + "loss": 8.6283, + "step": 3180 + }, + { + "epoch": 1.16, + "learning_rate": 1.4466546112115733e-07, + "loss": 8.76, + "step": 3200 + }, + { + "epoch": 1.16, + "learning_rate": 1.4556962025316454e-07, + "loss": 8.6929, + "step": 3220 + }, + { + "epoch": 1.17, + "learning_rate": 1.4647377938517178e-07, + "loss": 8.5704, + "step": 3240 + }, + { + "epoch": 1.18, + "learning_rate": 1.4737793851717902e-07, + "loss": 8.7375, + "step": 3260 + }, + { + "epoch": 1.19, + "learning_rate": 1.4828209764918626e-07, + "loss": 8.7367, + "step": 3280 + }, + { + "epoch": 1.19, + "learning_rate": 1.4918625678119347e-07, + "loss": 8.7486, + "step": 3300 + }, + { + "epoch": 1.2, + "learning_rate": 1.500904159132007e-07, + "loss": 8.6897, + "step": 3320 + }, + { + "epoch": 1.21, + "learning_rate": 1.5099457504520795e-07, + "loss": 8.6202, + "step": 3340 + }, + { + "epoch": 1.22, + "learning_rate": 1.518987341772152e-07, + "loss": 8.6978, + "step": 3360 + }, + { + "epoch": 1.22, + "learning_rate": 1.528028933092224e-07, + "loss": 8.6673, + "step": 3380 + }, + { + "epoch": 1.23, + "learning_rate": 1.5370705244122964e-07, + "loss": 8.6666, + "step": 3400 + }, + { + "epoch": 1.24, + "learning_rate": 1.5461121157323688e-07, + "loss": 8.6343, + "step": 3420 + }, + { + "epoch": 1.24, + "learning_rate": 1.5551537070524412e-07, + "loss": 8.6778, + "step": 3440 + }, + { + "epoch": 1.25, + "learning_rate": 1.5641952983725134e-07, + "loss": 8.5937, + "step": 3460 + }, + { + "epoch": 1.26, + "learning_rate": 1.5732368896925858e-07, + "loss": 8.7552, + "step": 3480 + }, + { + "epoch": 1.27, + "learning_rate": 1.5822784810126582e-07, + "loss": 8.7656, + "step": 3500 + }, + { + "epoch": 1.27, + "learning_rate": 1.5913200723327306e-07, + "loss": 8.7108, + "step": 3520 + }, + { + "epoch": 1.28, + "learning_rate": 1.6003616636528027e-07, + "loss": 8.8032, + "step": 3540 + }, + { + "epoch": 1.29, + "learning_rate": 1.609403254972875e-07, + "loss": 8.6476, + "step": 3560 + }, + { + "epoch": 1.29, + "learning_rate": 1.6184448462929475e-07, + "loss": 8.6317, + "step": 3580 + }, + { + "epoch": 1.3, + "learning_rate": 1.62748643761302e-07, + "loss": 8.6736, + "step": 3600 + }, + { + "epoch": 1.31, + "learning_rate": 1.636528028933092e-07, + "loss": 8.6469, + "step": 3620 + }, + { + "epoch": 1.32, + "learning_rate": 1.6455696202531644e-07, + "loss": 8.6429, + "step": 3640 + }, + { + "epoch": 1.32, + "learning_rate": 1.6546112115732368e-07, + "loss": 8.6454, + "step": 3660 + }, + { + "epoch": 1.33, + "learning_rate": 1.6636528028933092e-07, + "loss": 8.6146, + "step": 3680 + }, + { + "epoch": 1.34, + "learning_rate": 1.6726943942133813e-07, + "loss": 8.6884, + "step": 3700 + }, + { + "epoch": 1.35, + "learning_rate": 1.6817359855334537e-07, + "loss": 8.586, + "step": 3720 + }, + { + "epoch": 1.35, + "learning_rate": 1.6907775768535261e-07, + "loss": 8.6912, + "step": 3740 + }, + { + "epoch": 1.36, + "learning_rate": 1.6998191681735985e-07, + "loss": 8.6692, + "step": 3760 + }, + { + "epoch": 1.37, + "learning_rate": 1.7088607594936707e-07, + "loss": 8.7503, + "step": 3780 + }, + { + "epoch": 1.37, + "learning_rate": 1.717902350813743e-07, + "loss": 8.8529, + "step": 3800 + }, + { + "epoch": 1.38, + "learning_rate": 1.7269439421338155e-07, + "loss": 8.6796, + "step": 3820 + }, + { + "epoch": 1.39, + "learning_rate": 1.7359855334538879e-07, + "loss": 8.651, + "step": 3840 + }, + { + "epoch": 1.4, + "learning_rate": 1.74502712477396e-07, + "loss": 8.6298, + "step": 3860 + }, + { + "epoch": 1.4, + "learning_rate": 1.7540687160940324e-07, + "loss": 8.7301, + "step": 3880 + }, + { + "epoch": 1.41, + "learning_rate": 1.7631103074141048e-07, + "loss": 8.6036, + "step": 3900 + }, + { + "epoch": 1.42, + "learning_rate": 1.7721518987341772e-07, + "loss": 8.674, + "step": 3920 + }, + { + "epoch": 1.42, + "learning_rate": 1.7811934900542493e-07, + "loss": 8.7339, + "step": 3940 + }, + { + "epoch": 1.43, + "learning_rate": 1.7902350813743217e-07, + "loss": 8.7509, + "step": 3960 + }, + { + "epoch": 1.44, + "learning_rate": 1.799276672694394e-07, + "loss": 8.7087, + "step": 3980 + }, + { + "epoch": 1.45, + "learning_rate": 1.8083182640144665e-07, + "loss": 8.6391, + "step": 4000 + }, + { + "epoch": 1.45, + "learning_rate": 1.8173598553345386e-07, + "loss": 8.7756, + "step": 4020 + }, + { + "epoch": 1.46, + "learning_rate": 1.8264014466546113e-07, + "loss": 8.6629, + "step": 4040 + }, + { + "epoch": 1.47, + "learning_rate": 1.8354430379746834e-07, + "loss": 8.8972, + "step": 4060 + }, + { + "epoch": 1.48, + "learning_rate": 1.8444846292947558e-07, + "loss": 8.6525, + "step": 4080 + }, + { + "epoch": 1.48, + "learning_rate": 1.853526220614828e-07, + "loss": 8.6576, + "step": 4100 + }, + { + "epoch": 1.49, + "learning_rate": 1.8625678119349006e-07, + "loss": 8.6853, + "step": 4120 + }, + { + "epoch": 1.5, + "learning_rate": 1.8716094032549728e-07, + "loss": 8.5977, + "step": 4140 + }, + { + "epoch": 1.5, + "learning_rate": 1.8806509945750452e-07, + "loss": 8.6767, + "step": 4160 + }, + { + "epoch": 1.51, + "learning_rate": 1.8896925858951173e-07, + "loss": 8.5974, + "step": 4180 + }, + { + "epoch": 1.52, + "learning_rate": 1.89873417721519e-07, + "loss": 8.6597, + "step": 4200 + }, + { + "epoch": 1.53, + "learning_rate": 1.907775768535262e-07, + "loss": 8.6174, + "step": 4220 + }, + { + "epoch": 1.53, + "learning_rate": 1.9168173598553345e-07, + "loss": 8.657, + "step": 4240 + }, + { + "epoch": 1.54, + "learning_rate": 1.9258589511754066e-07, + "loss": 8.6542, + "step": 4260 + }, + { + "epoch": 1.55, + "learning_rate": 1.9349005424954793e-07, + "loss": 8.704, + "step": 4280 + }, + { + "epoch": 1.56, + "learning_rate": 1.9439421338155514e-07, + "loss": 8.6938, + "step": 4300 + }, + { + "epoch": 1.56, + "learning_rate": 1.9529837251356238e-07, + "loss": 8.592, + "step": 4320 + }, + { + "epoch": 1.57, + "learning_rate": 1.962025316455696e-07, + "loss": 8.7386, + "step": 4340 + }, + { + "epoch": 1.58, + "learning_rate": 1.9710669077757686e-07, + "loss": 8.5609, + "step": 4360 + }, + { + "epoch": 1.58, + "learning_rate": 1.9801084990958407e-07, + "loss": 8.7259, + "step": 4380 + }, + { + "epoch": 1.59, + "learning_rate": 1.9891500904159131e-07, + "loss": 8.642, + "step": 4400 + }, + { + "epoch": 1.6, + "learning_rate": 1.9981916817359853e-07, + "loss": 8.711, + "step": 4420 + }, + { + "epoch": 1.61, + "learning_rate": 2.007233273056058e-07, + "loss": 8.5766, + "step": 4440 + }, + { + "epoch": 1.61, + "learning_rate": 2.01627486437613e-07, + "loss": 8.8441, + "step": 4460 + }, + { + "epoch": 1.62, + "learning_rate": 2.0253164556962025e-07, + "loss": 8.6968, + "step": 4480 + }, + { + "epoch": 1.63, + "learning_rate": 2.0343580470162746e-07, + "loss": 8.6558, + "step": 4500 + }, + { + "epoch": 1.63, + "learning_rate": 2.0433996383363473e-07, + "loss": 8.708, + "step": 4520 + }, + { + "epoch": 1.64, + "learning_rate": 2.0524412296564194e-07, + "loss": 8.6468, + "step": 4540 + }, + { + "epoch": 1.65, + "learning_rate": 2.0614828209764918e-07, + "loss": 8.76, + "step": 4560 + }, + { + "epoch": 1.66, + "learning_rate": 2.070524412296564e-07, + "loss": 8.6234, + "step": 4580 + }, + { + "epoch": 1.66, + "learning_rate": 2.0795660036166366e-07, + "loss": 8.6519, + "step": 4600 + }, + { + "epoch": 1.67, + "learning_rate": 2.0886075949367087e-07, + "loss": 8.8524, + "step": 4620 + }, + { + "epoch": 1.68, + "learning_rate": 2.097649186256781e-07, + "loss": 8.6722, + "step": 4640 + }, + { + "epoch": 1.69, + "learning_rate": 2.1066907775768532e-07, + "loss": 8.7367, + "step": 4660 + }, + { + "epoch": 1.69, + "learning_rate": 2.115732368896926e-07, + "loss": 8.6933, + "step": 4680 + }, + { + "epoch": 1.7, + "learning_rate": 2.124773960216998e-07, + "loss": 8.5989, + "step": 4700 + }, + { + "epoch": 1.71, + "learning_rate": 2.1338155515370704e-07, + "loss": 8.7641, + "step": 4720 + }, + { + "epoch": 1.71, + "learning_rate": 2.1428571428571426e-07, + "loss": 8.6898, + "step": 4740 + }, + { + "epoch": 1.72, + "learning_rate": 2.1518987341772152e-07, + "loss": 8.7893, + "step": 4760 + }, + { + "epoch": 1.73, + "learning_rate": 2.1609403254972874e-07, + "loss": 8.6064, + "step": 4780 + }, + { + "epoch": 1.74, + "learning_rate": 2.1699819168173598e-07, + "loss": 8.6028, + "step": 4800 + }, + { + "epoch": 1.74, + "learning_rate": 2.179023508137432e-07, + "loss": 8.7767, + "step": 4820 + }, + { + "epoch": 1.75, + "learning_rate": 2.1880650994575046e-07, + "loss": 8.7362, + "step": 4840 + }, + { + "epoch": 1.76, + "learning_rate": 2.1971066907775767e-07, + "loss": 8.6766, + "step": 4860 + }, + { + "epoch": 1.76, + "learning_rate": 2.206148282097649e-07, + "loss": 8.6547, + "step": 4880 + }, + { + "epoch": 1.77, + "learning_rate": 2.2151898734177212e-07, + "loss": 8.6397, + "step": 4900 + }, + { + "epoch": 1.78, + "learning_rate": 2.224231464737794e-07, + "loss": 8.7055, + "step": 4920 + }, + { + "epoch": 1.79, + "learning_rate": 2.233273056057866e-07, + "loss": 8.5466, + "step": 4940 + }, + { + "epoch": 1.79, + "learning_rate": 2.2423146473779384e-07, + "loss": 8.6754, + "step": 4960 + }, + { + "epoch": 1.8, + "learning_rate": 2.2513562386980105e-07, + "loss": 8.6826, + "step": 4980 + }, + { + "epoch": 1.81, + "learning_rate": 2.2603978300180832e-07, + "loss": 8.6895, + "step": 5000 + }, + { + "epoch": 1.82, + "learning_rate": 2.2694394213381553e-07, + "loss": 8.7094, + "step": 5020 + }, + { + "epoch": 1.82, + "learning_rate": 2.2784810126582277e-07, + "loss": 8.7513, + "step": 5040 + }, + { + "epoch": 1.83, + "learning_rate": 2.2875226039783001e-07, + "loss": 8.6838, + "step": 5060 + }, + { + "epoch": 1.84, + "learning_rate": 2.2965641952983725e-07, + "loss": 8.6037, + "step": 5080 + }, + { + "epoch": 1.84, + "learning_rate": 2.3056057866184447e-07, + "loss": 8.7219, + "step": 5100 + }, + { + "epoch": 1.85, + "learning_rate": 2.314647377938517e-07, + "loss": 8.6567, + "step": 5120 + }, + { + "epoch": 1.86, + "learning_rate": 2.3236889692585895e-07, + "loss": 8.6228, + "step": 5140 + }, + { + "epoch": 1.87, + "learning_rate": 2.3327305605786619e-07, + "loss": 8.6443, + "step": 5160 + }, + { + "epoch": 1.87, + "learning_rate": 2.341772151898734e-07, + "loss": 8.6465, + "step": 5180 + }, + { + "epoch": 1.88, + "learning_rate": 2.3508137432188064e-07, + "loss": 8.6116, + "step": 5200 + }, + { + "epoch": 1.89, + "learning_rate": 2.3598553345388788e-07, + "loss": 8.7553, + "step": 5220 + }, + { + "epoch": 1.9, + "learning_rate": 2.3688969258589512e-07, + "loss": 8.7313, + "step": 5240 + }, + { + "epoch": 1.9, + "learning_rate": 2.3779385171790233e-07, + "loss": 8.6952, + "step": 5260 + }, + { + "epoch": 1.91, + "learning_rate": 2.3869801084990954e-07, + "loss": 8.6394, + "step": 5280 + }, + { + "epoch": 1.92, + "learning_rate": 2.3960216998191684e-07, + "loss": 8.8396, + "step": 5300 + }, + { + "epoch": 1.92, + "learning_rate": 2.40506329113924e-07, + "loss": 8.6893, + "step": 5320 + }, + { + "epoch": 1.93, + "learning_rate": 2.4141048824593126e-07, + "loss": 8.5754, + "step": 5340 + }, + { + "epoch": 1.94, + "learning_rate": 2.423146473779385e-07, + "loss": 8.7712, + "step": 5360 + }, + { + "epoch": 1.95, + "learning_rate": 2.4321880650994574e-07, + "loss": 8.8228, + "step": 5380 + }, + { + "epoch": 1.95, + "learning_rate": 2.44122965641953e-07, + "loss": 8.6878, + "step": 5400 + }, + { + "epoch": 1.96, + "learning_rate": 2.450271247739602e-07, + "loss": 8.6763, + "step": 5420 + }, + { + "epoch": 1.97, + "learning_rate": 2.459312839059674e-07, + "loss": 8.6758, + "step": 5440 + }, + { + "epoch": 1.97, + "learning_rate": 2.468354430379747e-07, + "loss": 8.7743, + "step": 5460 + }, + { + "epoch": 1.98, + "learning_rate": 2.477396021699819e-07, + "loss": 8.6171, + "step": 5480 + }, + { + "epoch": 1.99, + "learning_rate": 2.4864376130198913e-07, + "loss": 8.8626, + "step": 5500 + }, + { + "epoch": 2.0, + "learning_rate": 2.4954792043399637e-07, + "loss": 8.7344, + "step": 5520 + }, + { + "epoch": 2.0, + "learning_rate": 2.504520795660036e-07, + "loss": 8.7143, + "step": 5540 + }, + { + "epoch": 2.01, + "learning_rate": 2.513562386980108e-07, + "loss": 8.6419, + "step": 5560 + }, + { + "epoch": 2.02, + "learning_rate": 2.522603978300181e-07, + "loss": 8.6991, + "step": 5580 + }, + { + "epoch": 2.03, + "learning_rate": 2.5316455696202533e-07, + "loss": 8.8916, + "step": 5600 + }, + { + "epoch": 2.03, + "learning_rate": 2.540687160940325e-07, + "loss": 8.6356, + "step": 5620 + }, + { + "epoch": 2.04, + "learning_rate": 2.5497287522603975e-07, + "loss": 8.7768, + "step": 5640 + }, + { + "epoch": 2.05, + "learning_rate": 2.5587703435804705e-07, + "loss": 8.6408, + "step": 5660 + }, + { + "epoch": 2.05, + "learning_rate": 2.5678119349005423e-07, + "loss": 8.7009, + "step": 5680 + }, + { + "epoch": 2.06, + "learning_rate": 2.576853526220615e-07, + "loss": 8.6798, + "step": 5700 + }, + { + "epoch": 2.07, + "learning_rate": 2.5858951175406866e-07, + "loss": 8.7363, + "step": 5720 + }, + { + "epoch": 2.08, + "learning_rate": 2.5949367088607595e-07, + "loss": 8.7773, + "step": 5740 + }, + { + "epoch": 2.08, + "learning_rate": 2.603978300180832e-07, + "loss": 8.5859, + "step": 5760 + }, + { + "epoch": 2.09, + "learning_rate": 2.613019891500904e-07, + "loss": 8.7, + "step": 5780 + }, + { + "epoch": 2.1, + "learning_rate": 2.622061482820976e-07, + "loss": 8.6803, + "step": 5800 + }, + { + "epoch": 2.1, + "learning_rate": 2.631103074141049e-07, + "loss": 8.7215, + "step": 5820 + }, + { + "epoch": 2.11, + "learning_rate": 2.640144665461121e-07, + "loss": 8.6357, + "step": 5840 + }, + { + "epoch": 2.12, + "learning_rate": 2.6491862567811934e-07, + "loss": 8.6427, + "step": 5860 + }, + { + "epoch": 2.13, + "learning_rate": 2.658227848101265e-07, + "loss": 8.7049, + "step": 5880 + }, + { + "epoch": 2.13, + "learning_rate": 2.667269439421338e-07, + "loss": 8.6825, + "step": 5900 + }, + { + "epoch": 2.14, + "learning_rate": 2.6763110307414106e-07, + "loss": 8.5983, + "step": 5920 + }, + { + "epoch": 2.15, + "learning_rate": 2.6853526220614824e-07, + "loss": 8.7502, + "step": 5940 + }, + { + "epoch": 2.16, + "learning_rate": 2.694394213381555e-07, + "loss": 8.6624, + "step": 5960 + }, + { + "epoch": 2.16, + "learning_rate": 2.703435804701628e-07, + "loss": 8.586, + "step": 5980 + }, + { + "epoch": 2.17, + "learning_rate": 2.7124773960216996e-07, + "loss": 8.6519, + "step": 6000 + }, + { + "epoch": 2.18, + "learning_rate": 2.721518987341772e-07, + "loss": 8.6292, + "step": 6020 + }, + { + "epoch": 2.18, + "learning_rate": 2.7305605786618444e-07, + "loss": 8.6325, + "step": 6040 + }, + { + "epoch": 2.19, + "learning_rate": 2.739602169981917e-07, + "loss": 8.6599, + "step": 6060 + }, + { + "epoch": 2.2, + "learning_rate": 2.748643761301989e-07, + "loss": 8.6505, + "step": 6080 + }, + { + "epoch": 2.21, + "learning_rate": 2.757685352622061e-07, + "loss": 8.6876, + "step": 6100 + }, + { + "epoch": 2.21, + "learning_rate": 2.7667269439421335e-07, + "loss": 8.6015, + "step": 6120 + }, + { + "epoch": 2.22, + "learning_rate": 2.7757685352622064e-07, + "loss": 8.5816, + "step": 6140 + }, + { + "epoch": 2.23, + "learning_rate": 2.7848101265822783e-07, + "loss": 8.6318, + "step": 6160 + }, + { + "epoch": 2.24, + "learning_rate": 2.7938517179023507e-07, + "loss": 8.6486, + "step": 6180 + }, + { + "epoch": 2.24, + "learning_rate": 2.802893309222423e-07, + "loss": 8.6428, + "step": 6200 + }, + { + "epoch": 2.25, + "learning_rate": 2.8119349005424955e-07, + "loss": 8.6547, + "step": 6220 + }, + { + "epoch": 2.26, + "learning_rate": 2.820976491862568e-07, + "loss": 8.6352, + "step": 6240 + }, + { + "epoch": 2.26, + "learning_rate": 2.83001808318264e-07, + "loss": 8.6556, + "step": 6260 + }, + { + "epoch": 2.27, + "learning_rate": 2.839059674502712e-07, + "loss": 8.5903, + "step": 6280 + }, + { + "epoch": 2.28, + "learning_rate": 2.848101265822785e-07, + "loss": 8.645, + "step": 6300 + }, + { + "epoch": 2.29, + "learning_rate": 2.857142857142857e-07, + "loss": 8.6496, + "step": 6320 + }, + { + "epoch": 2.29, + "learning_rate": 2.8661844484629293e-07, + "loss": 8.678, + "step": 6340 + }, + { + "epoch": 2.3, + "learning_rate": 2.8752260397830017e-07, + "loss": 8.7506, + "step": 6360 + }, + { + "epoch": 2.31, + "learning_rate": 2.884267631103074e-07, + "loss": 8.7135, + "step": 6380 + }, + { + "epoch": 2.31, + "learning_rate": 2.8933092224231465e-07, + "loss": 8.6221, + "step": 6400 + }, + { + "epoch": 2.32, + "learning_rate": 2.9023508137432184e-07, + "loss": 8.646, + "step": 6420 + }, + { + "epoch": 2.33, + "learning_rate": 2.911392405063291e-07, + "loss": 8.5481, + "step": 6440 + }, + { + "epoch": 2.34, + "learning_rate": 2.9204339963833637e-07, + "loss": 8.7291, + "step": 6460 + }, + { + "epoch": 2.34, + "learning_rate": 2.9294755877034356e-07, + "loss": 8.6583, + "step": 6480 + }, + { + "epoch": 2.35, + "learning_rate": 2.938517179023508e-07, + "loss": 8.5624, + "step": 6500 + }, + { + "epoch": 2.36, + "learning_rate": 2.9475587703435804e-07, + "loss": 8.648, + "step": 6520 + }, + { + "epoch": 2.37, + "learning_rate": 2.956600361663653e-07, + "loss": 8.6145, + "step": 6540 + }, + { + "epoch": 2.37, + "learning_rate": 2.965641952983725e-07, + "loss": 8.6264, + "step": 6560 + }, + { + "epoch": 2.38, + "learning_rate": 2.974683544303797e-07, + "loss": 8.6417, + "step": 6580 + }, + { + "epoch": 2.39, + "learning_rate": 2.9837251356238694e-07, + "loss": 8.6012, + "step": 6600 + }, + { + "epoch": 2.39, + "learning_rate": 2.9927667269439424e-07, + "loss": 8.6653, + "step": 6620 + }, + { + "epoch": 2.4, + "learning_rate": 3.001808318264014e-07, + "loss": 8.7473, + "step": 6640 + }, + { + "epoch": 2.41, + "learning_rate": 3.0108499095840866e-07, + "loss": 8.6582, + "step": 6660 + }, + { + "epoch": 2.42, + "learning_rate": 3.019891500904159e-07, + "loss": 8.679, + "step": 6680 + }, + { + "epoch": 2.42, + "learning_rate": 3.0289330922242314e-07, + "loss": 8.6476, + "step": 6700 + }, + { + "epoch": 2.43, + "learning_rate": 3.037974683544304e-07, + "loss": 8.6075, + "step": 6720 + }, + { + "epoch": 2.44, + "learning_rate": 3.0470162748643757e-07, + "loss": 8.6942, + "step": 6740 + }, + { + "epoch": 2.44, + "learning_rate": 3.056057866184448e-07, + "loss": 8.6298, + "step": 6760 + }, + { + "epoch": 2.45, + "learning_rate": 3.065099457504521e-07, + "loss": 8.6439, + "step": 6780 + }, + { + "epoch": 2.46, + "learning_rate": 3.074141048824593e-07, + "loss": 8.6465, + "step": 6800 + }, + { + "epoch": 2.47, + "learning_rate": 3.0831826401446653e-07, + "loss": 8.6938, + "step": 6820 + }, + { + "epoch": 2.47, + "learning_rate": 3.0922242314647377e-07, + "loss": 8.6629, + "step": 6840 + }, + { + "epoch": 2.48, + "learning_rate": 3.10126582278481e-07, + "loss": 8.6653, + "step": 6860 + }, + { + "epoch": 2.49, + "learning_rate": 3.1103074141048825e-07, + "loss": 8.6678, + "step": 6880 + }, + { + "epoch": 2.5, + "learning_rate": 3.1193490054249543e-07, + "loss": 8.6495, + "step": 6900 + }, + { + "epoch": 2.5, + "learning_rate": 3.128390596745027e-07, + "loss": 8.6629, + "step": 6920 + }, + { + "epoch": 2.51, + "learning_rate": 3.1374321880650997e-07, + "loss": 8.6308, + "step": 6940 + }, + { + "epoch": 2.52, + "learning_rate": 3.1464737793851715e-07, + "loss": 8.723, + "step": 6960 + }, + { + "epoch": 2.52, + "learning_rate": 3.155515370705244e-07, + "loss": 8.7843, + "step": 6980 + }, + { + "epoch": 2.53, + "learning_rate": 3.1645569620253163e-07, + "loss": 8.7357, + "step": 7000 + }, + { + "epoch": 2.54, + "learning_rate": 3.1735985533453887e-07, + "loss": 8.6289, + "step": 7020 + }, + { + "epoch": 2.55, + "learning_rate": 3.182640144665461e-07, + "loss": 8.722, + "step": 7040 + }, + { + "epoch": 2.55, + "learning_rate": 3.1916817359855335e-07, + "loss": 8.6224, + "step": 7060 + }, + { + "epoch": 2.56, + "learning_rate": 3.2007233273056054e-07, + "loss": 8.6937, + "step": 7080 + }, + { + "epoch": 2.57, + "learning_rate": 3.2097649186256783e-07, + "loss": 8.6861, + "step": 7100 + }, + { + "epoch": 2.58, + "learning_rate": 3.21880650994575e-07, + "loss": 8.771, + "step": 7120 + }, + { + "epoch": 2.58, + "learning_rate": 3.2278481012658226e-07, + "loss": 8.6539, + "step": 7140 + }, + { + "epoch": 2.59, + "learning_rate": 3.236889692585895e-07, + "loss": 8.6444, + "step": 7160 + }, + { + "epoch": 2.6, + "learning_rate": 3.2459312839059674e-07, + "loss": 8.7097, + "step": 7180 + }, + { + "epoch": 2.6, + "learning_rate": 3.25497287522604e-07, + "loss": 8.6235, + "step": 7200 + }, + { + "epoch": 2.61, + "learning_rate": 3.264014466546112e-07, + "loss": 8.6301, + "step": 7220 + }, + { + "epoch": 2.62, + "learning_rate": 3.273056057866184e-07, + "loss": 8.6158, + "step": 7240 + }, + { + "epoch": 2.63, + "learning_rate": 3.282097649186257e-07, + "loss": 8.7017, + "step": 7260 + }, + { + "epoch": 2.63, + "learning_rate": 3.291139240506329e-07, + "loss": 8.5819, + "step": 7280 + }, + { + "epoch": 2.64, + "learning_rate": 3.300180831826401e-07, + "loss": 8.6706, + "step": 7300 + }, + { + "epoch": 2.65, + "learning_rate": 3.3092224231464736e-07, + "loss": 8.7503, + "step": 7320 + }, + { + "epoch": 2.65, + "learning_rate": 3.318264014466546e-07, + "loss": 8.651, + "step": 7340 + }, + { + "epoch": 2.66, + "learning_rate": 3.3273056057866184e-07, + "loss": 8.6765, + "step": 7360 + }, + { + "epoch": 2.67, + "learning_rate": 3.336347197106691e-07, + "loss": 8.6551, + "step": 7380 + }, + { + "epoch": 2.68, + "learning_rate": 3.3453887884267627e-07, + "loss": 8.5522, + "step": 7400 + }, + { + "epoch": 2.68, + "learning_rate": 3.3544303797468356e-07, + "loss": 8.6139, + "step": 7420 + }, + { + "epoch": 2.69, + "learning_rate": 3.3634719710669075e-07, + "loss": 8.6521, + "step": 7440 + }, + { + "epoch": 2.7, + "learning_rate": 3.37251356238698e-07, + "loss": 8.7296, + "step": 7460 + }, + { + "epoch": 2.71, + "learning_rate": 3.3815551537070523e-07, + "loss": 8.8073, + "step": 7480 + }, + { + "epoch": 2.71, + "learning_rate": 3.3905967450271247e-07, + "loss": 8.8213, + "step": 7500 + }, + { + "epoch": 2.72, + "learning_rate": 3.399638336347197e-07, + "loss": 8.792, + "step": 7520 + }, + { + "epoch": 2.73, + "learning_rate": 3.4086799276672695e-07, + "loss": 8.8606, + "step": 7540 + }, + { + "epoch": 2.73, + "learning_rate": 3.4177215189873413e-07, + "loss": 8.767, + "step": 7560 + }, + { + "epoch": 2.74, + "learning_rate": 3.4267631103074143e-07, + "loss": 8.6718, + "step": 7580 + }, + { + "epoch": 2.75, + "learning_rate": 3.435804701627486e-07, + "loss": 8.7104, + "step": 7600 + }, + { + "epoch": 2.76, + "learning_rate": 3.4448462929475585e-07, + "loss": 8.8865, + "step": 7620 + }, + { + "epoch": 2.76, + "learning_rate": 3.453887884267631e-07, + "loss": 8.7782, + "step": 7640 + }, + { + "epoch": 2.77, + "learning_rate": 3.4629294755877033e-07, + "loss": 8.7133, + "step": 7660 + }, + { + "epoch": 2.78, + "learning_rate": 3.4719710669077757e-07, + "loss": 8.6618, + "step": 7680 + }, + { + "epoch": 2.78, + "learning_rate": 3.481012658227848e-07, + "loss": 8.6329, + "step": 7700 + }, + { + "epoch": 2.79, + "learning_rate": 3.49005424954792e-07, + "loss": 8.6203, + "step": 7720 + }, + { + "epoch": 2.8, + "learning_rate": 3.499095840867993e-07, + "loss": 8.8011, + "step": 7740 + }, + { + "epoch": 2.81, + "learning_rate": 3.508137432188065e-07, + "loss": 8.6341, + "step": 7760 + }, + { + "epoch": 2.81, + "learning_rate": 3.517179023508137e-07, + "loss": 8.7076, + "step": 7780 + }, + { + "epoch": 2.82, + "learning_rate": 3.5262206148282096e-07, + "loss": 8.8637, + "step": 7800 + }, + { + "epoch": 2.83, + "learning_rate": 3.535262206148282e-07, + "loss": 8.8632, + "step": 7820 + }, + { + "epoch": 2.84, + "learning_rate": 3.5443037974683544e-07, + "loss": 8.7457, + "step": 7840 + }, + { + "epoch": 2.84, + "learning_rate": 3.553345388788427e-07, + "loss": 8.8141, + "step": 7860 + }, + { + "epoch": 2.85, + "learning_rate": 3.5623869801084986e-07, + "loss": 8.6005, + "step": 7880 + }, + { + "epoch": 2.86, + "learning_rate": 3.5714285714285716e-07, + "loss": 8.8337, + "step": 7900 + }, + { + "epoch": 2.86, + "learning_rate": 3.5804701627486434e-07, + "loss": 8.5335, + "step": 7920 + }, + { + "epoch": 2.87, + "learning_rate": 3.589511754068716e-07, + "loss": 8.7152, + "step": 7940 + }, + { + "epoch": 2.88, + "learning_rate": 3.598553345388788e-07, + "loss": 8.8067, + "step": 7960 + }, + { + "epoch": 2.89, + "learning_rate": 3.6075949367088606e-07, + "loss": 8.6357, + "step": 7980 + }, + { + "epoch": 2.89, + "learning_rate": 3.616636528028933e-07, + "loss": 8.6081, + "step": 8000 + }, + { + "epoch": 2.9, + "learning_rate": 3.6256781193490054e-07, + "loss": 8.7371, + "step": 8020 + }, + { + "epoch": 2.91, + "learning_rate": 3.6347197106690773e-07, + "loss": 8.6043, + "step": 8040 + }, + { + "epoch": 2.92, + "learning_rate": 3.64376130198915e-07, + "loss": 8.7781, + "step": 8060 + }, + { + "epoch": 2.92, + "learning_rate": 3.6528028933092226e-07, + "loss": 8.7021, + "step": 8080 + }, + { + "epoch": 2.93, + "learning_rate": 3.6618444846292945e-07, + "loss": 8.6577, + "step": 8100 + }, + { + "epoch": 2.94, + "learning_rate": 3.670886075949367e-07, + "loss": 8.7209, + "step": 8120 + }, + { + "epoch": 2.94, + "learning_rate": 3.6799276672694393e-07, + "loss": 8.7849, + "step": 8140 + }, + { + "epoch": 2.95, + "learning_rate": 3.6889692585895117e-07, + "loss": 8.6802, + "step": 8160 + }, + { + "epoch": 2.96, + "learning_rate": 3.698010849909584e-07, + "loss": 8.6445, + "step": 8180 + }, + { + "epoch": 2.97, + "learning_rate": 3.707052441229656e-07, + "loss": 8.8102, + "step": 8200 + }, + { + "epoch": 2.97, + "learning_rate": 3.716094032549729e-07, + "loss": 8.6827, + "step": 8220 + }, + { + "epoch": 2.98, + "learning_rate": 3.7251356238698013e-07, + "loss": 8.5796, + "step": 8240 + }, + { + "epoch": 2.99, + "learning_rate": 3.734177215189873e-07, + "loss": 8.6077, + "step": 8260 + }, + { + "epoch": 2.99, + "learning_rate": 3.7432188065099455e-07, + "loss": 8.6815, + "step": 8280 + }, + { + "epoch": 3.0, + "learning_rate": 3.752260397830018e-07, + "loss": 8.6782, + "step": 8300 + }, + { + "epoch": 3.01, + "learning_rate": 3.7613019891500903e-07, + "loss": 8.6266, + "step": 8320 + }, + { + "epoch": 3.02, + "learning_rate": 3.7703435804701627e-07, + "loss": 8.7515, + "step": 8340 + }, + { + "epoch": 3.02, + "learning_rate": 3.7793851717902346e-07, + "loss": 8.7789, + "step": 8360 + }, + { + "epoch": 3.03, + "learning_rate": 3.7884267631103075e-07, + "loss": 8.6853, + "step": 8380 + }, + { + "epoch": 3.04, + "learning_rate": 3.79746835443038e-07, + "loss": 8.6932, + "step": 8400 + }, + { + "epoch": 3.05, + "learning_rate": 3.806509945750452e-07, + "loss": 8.6517, + "step": 8420 + }, + { + "epoch": 3.05, + "learning_rate": 3.815551537070524e-07, + "loss": 8.7313, + "step": 8440 + }, + { + "epoch": 3.06, + "learning_rate": 3.8245931283905966e-07, + "loss": 8.6474, + "step": 8460 + }, + { + "epoch": 3.07, + "learning_rate": 3.833634719710669e-07, + "loss": 8.5971, + "step": 8480 + }, + { + "epoch": 3.07, + "learning_rate": 3.8426763110307414e-07, + "loss": 8.6707, + "step": 8500 + }, + { + "epoch": 3.08, + "learning_rate": 3.851717902350813e-07, + "loss": 8.6945, + "step": 8520 + }, + { + "epoch": 3.09, + "learning_rate": 3.860759493670886e-07, + "loss": 8.5875, + "step": 8540 + }, + { + "epoch": 3.1, + "learning_rate": 3.8698010849909586e-07, + "loss": 8.6613, + "step": 8560 + }, + { + "epoch": 3.1, + "learning_rate": 3.8788426763110304e-07, + "loss": 8.6394, + "step": 8580 + }, + { + "epoch": 3.11, + "learning_rate": 3.887884267631103e-07, + "loss": 8.6201, + "step": 8600 + }, + { + "epoch": 3.12, + "learning_rate": 3.896925858951175e-07, + "loss": 8.7747, + "step": 8620 + }, + { + "epoch": 3.12, + "learning_rate": 3.9059674502712476e-07, + "loss": 8.6703, + "step": 8640 + }, + { + "epoch": 3.13, + "learning_rate": 3.91500904159132e-07, + "loss": 8.6593, + "step": 8660 + }, + { + "epoch": 3.14, + "learning_rate": 3.924050632911392e-07, + "loss": 8.8128, + "step": 8680 + }, + { + "epoch": 3.15, + "learning_rate": 3.933092224231465e-07, + "loss": 8.625, + "step": 8700 + }, + { + "epoch": 3.15, + "learning_rate": 3.942133815551537e-07, + "loss": 8.7078, + "step": 8720 + }, + { + "epoch": 3.16, + "learning_rate": 3.951175406871609e-07, + "loss": 8.7598, + "step": 8740 + }, + { + "epoch": 3.17, + "learning_rate": 3.9602169981916815e-07, + "loss": 8.6145, + "step": 8760 + }, + { + "epoch": 3.18, + "learning_rate": 3.969258589511754e-07, + "loss": 8.5881, + "step": 8780 + }, + { + "epoch": 3.18, + "learning_rate": 3.9783001808318263e-07, + "loss": 8.6745, + "step": 8800 + }, + { + "epoch": 3.19, + "learning_rate": 3.9873417721518987e-07, + "loss": 8.7567, + "step": 8820 + }, + { + "epoch": 3.2, + "learning_rate": 3.9963833634719705e-07, + "loss": 8.6587, + "step": 8840 + }, + { + "epoch": 3.2, + "learning_rate": 4.0054249547920435e-07, + "loss": 8.6343, + "step": 8860 + }, + { + "epoch": 3.21, + "learning_rate": 4.014466546112116e-07, + "loss": 8.6116, + "step": 8880 + }, + { + "epoch": 3.22, + "learning_rate": 4.0235081374321877e-07, + "loss": 8.6474, + "step": 8900 + }, + { + "epoch": 3.23, + "learning_rate": 4.03254972875226e-07, + "loss": 8.5796, + "step": 8920 + }, + { + "epoch": 3.23, + "learning_rate": 4.0415913200723325e-07, + "loss": 8.8124, + "step": 8940 + }, + { + "epoch": 3.24, + "learning_rate": 4.050632911392405e-07, + "loss": 8.7305, + "step": 8960 + }, + { + "epoch": 3.25, + "learning_rate": 4.0596745027124773e-07, + "loss": 8.6832, + "step": 8980 + }, + { + "epoch": 3.25, + "learning_rate": 4.068716094032549e-07, + "loss": 8.6668, + "step": 9000 + }, + { + "epoch": 3.26, + "learning_rate": 4.077757685352622e-07, + "loss": 8.611, + "step": 9020 + }, + { + "epoch": 3.27, + "learning_rate": 4.0867992766726945e-07, + "loss": 8.6182, + "step": 9040 + }, + { + "epoch": 3.28, + "learning_rate": 4.0958408679927664e-07, + "loss": 8.6509, + "step": 9060 + }, + { + "epoch": 3.28, + "learning_rate": 4.104882459312839e-07, + "loss": 8.7003, + "step": 9080 + }, + { + "epoch": 3.29, + "learning_rate": 4.1139240506329117e-07, + "loss": 8.7466, + "step": 9100 + }, + { + "epoch": 3.3, + "learning_rate": 4.1229656419529836e-07, + "loss": 8.6764, + "step": 9120 + }, + { + "epoch": 3.31, + "learning_rate": 4.132007233273056e-07, + "loss": 8.8049, + "step": 9140 + }, + { + "epoch": 3.31, + "learning_rate": 4.141048824593128e-07, + "loss": 8.6367, + "step": 9160 + }, + { + "epoch": 3.32, + "learning_rate": 4.150090415913201e-07, + "loss": 8.6451, + "step": 9180 + }, + { + "epoch": 3.33, + "learning_rate": 4.159132007233273e-07, + "loss": 8.7919, + "step": 9200 + }, + { + "epoch": 3.33, + "learning_rate": 4.168173598553345e-07, + "loss": 8.6583, + "step": 9220 + }, + { + "epoch": 3.34, + "learning_rate": 4.1772151898734174e-07, + "loss": 8.6932, + "step": 9240 + }, + { + "epoch": 3.35, + "learning_rate": 4.1862567811934904e-07, + "loss": 8.7003, + "step": 9260 + }, + { + "epoch": 3.36, + "learning_rate": 4.195298372513562e-07, + "loss": 8.7071, + "step": 9280 + }, + { + "epoch": 3.36, + "learning_rate": 4.2043399638336346e-07, + "loss": 8.5826, + "step": 9300 + }, + { + "epoch": 3.37, + "learning_rate": 4.2133815551537065e-07, + "loss": 8.8741, + "step": 9320 + }, + { + "epoch": 3.38, + "learning_rate": 4.2224231464737794e-07, + "loss": 8.7105, + "step": 9340 + }, + { + "epoch": 3.39, + "learning_rate": 4.231464737793852e-07, + "loss": 8.7351, + "step": 9360 + }, + { + "epoch": 3.39, + "learning_rate": 4.2405063291139237e-07, + "loss": 8.6307, + "step": 9380 + }, + { + "epoch": 3.4, + "learning_rate": 4.249547920433996e-07, + "loss": 8.6908, + "step": 9400 + }, + { + "epoch": 3.41, + "learning_rate": 4.258589511754069e-07, + "loss": 8.7118, + "step": 9420 + }, + { + "epoch": 3.41, + "learning_rate": 4.267631103074141e-07, + "loss": 8.7018, + "step": 9440 + }, + { + "epoch": 3.42, + "learning_rate": 4.2766726943942133e-07, + "loss": 8.7443, + "step": 9460 + }, + { + "epoch": 3.43, + "learning_rate": 4.285714285714285e-07, + "loss": 8.6614, + "step": 9480 + }, + { + "epoch": 3.44, + "learning_rate": 4.294755877034358e-07, + "loss": 8.7883, + "step": 9500 + }, + { + "epoch": 3.44, + "learning_rate": 4.3037974683544305e-07, + "loss": 8.7251, + "step": 9520 + }, + { + "epoch": 3.45, + "learning_rate": 4.3128390596745023e-07, + "loss": 8.8043, + "step": 9540 + }, + { + "epoch": 3.46, + "learning_rate": 4.3218806509945747e-07, + "loss": 8.6045, + "step": 9560 + }, + { + "epoch": 3.46, + "learning_rate": 4.3309222423146477e-07, + "loss": 8.6793, + "step": 9580 + }, + { + "epoch": 3.47, + "learning_rate": 4.3399638336347195e-07, + "loss": 8.7049, + "step": 9600 + }, + { + "epoch": 3.48, + "learning_rate": 4.349005424954792e-07, + "loss": 8.6692, + "step": 9620 + }, + { + "epoch": 3.49, + "learning_rate": 4.358047016274864e-07, + "loss": 8.79, + "step": 9640 + }, + { + "epoch": 3.49, + "learning_rate": 4.3670886075949367e-07, + "loss": 8.7726, + "step": 9660 + }, + { + "epoch": 3.5, + "learning_rate": 4.376130198915009e-07, + "loss": 8.6092, + "step": 9680 + }, + { + "epoch": 3.51, + "learning_rate": 4.385171790235081e-07, + "loss": 8.7118, + "step": 9700 + }, + { + "epoch": 3.52, + "learning_rate": 4.3942133815551534e-07, + "loss": 8.6013, + "step": 9720 + }, + { + "epoch": 3.52, + "learning_rate": 4.4032549728752263e-07, + "loss": 8.762, + "step": 9740 + }, + { + "epoch": 3.53, + "learning_rate": 4.412296564195298e-07, + "loss": 8.6593, + "step": 9760 + }, + { + "epoch": 3.54, + "learning_rate": 4.4213381555153706e-07, + "loss": 8.6182, + "step": 9780 + }, + { + "epoch": 3.54, + "learning_rate": 4.4303797468354424e-07, + "loss": 8.6179, + "step": 9800 + }, + { + "epoch": 3.55, + "learning_rate": 4.4394213381555154e-07, + "loss": 8.7162, + "step": 9820 + }, + { + "epoch": 3.56, + "learning_rate": 4.448462929475588e-07, + "loss": 8.6335, + "step": 9840 + }, + { + "epoch": 3.57, + "learning_rate": 4.4575045207956596e-07, + "loss": 8.7033, + "step": 9860 + }, + { + "epoch": 3.57, + "learning_rate": 4.466546112115732e-07, + "loss": 8.6223, + "step": 9880 + }, + { + "epoch": 3.58, + "learning_rate": 4.475587703435805e-07, + "loss": 8.6641, + "step": 9900 + }, + { + "epoch": 3.59, + "learning_rate": 4.484629294755877e-07, + "loss": 8.7288, + "step": 9920 + }, + { + "epoch": 3.59, + "learning_rate": 4.493670886075949e-07, + "loss": 8.6236, + "step": 9940 + }, + { + "epoch": 3.6, + "learning_rate": 4.502712477396021e-07, + "loss": 8.6478, + "step": 9960 + }, + { + "epoch": 3.61, + "learning_rate": 4.511754068716094e-07, + "loss": 8.7282, + "step": 9980 + }, + { + "epoch": 3.62, + "learning_rate": 4.5207956600361664e-07, + "loss": 8.6599, + "step": 10000 + }, + { + "epoch": 3.62, + "learning_rate": 4.5298372513562383e-07, + "loss": 8.7713, + "step": 10020 + }, + { + "epoch": 3.63, + "learning_rate": 4.5388788426763107e-07, + "loss": 8.6667, + "step": 10040 + }, + { + "epoch": 3.64, + "learning_rate": 4.5479204339963836e-07, + "loss": 8.7424, + "step": 10060 + }, + { + "epoch": 3.65, + "learning_rate": 4.5569620253164555e-07, + "loss": 8.6676, + "step": 10080 + }, + { + "epoch": 3.65, + "learning_rate": 4.566003616636528e-07, + "loss": 8.7331, + "step": 10100 + }, + { + "epoch": 3.66, + "learning_rate": 4.5750452079566003e-07, + "loss": 8.6343, + "step": 10120 + }, + { + "epoch": 3.67, + "learning_rate": 4.5840867992766727e-07, + "loss": 8.6826, + "step": 10140 + }, + { + "epoch": 3.67, + "learning_rate": 4.593128390596745e-07, + "loss": 8.6749, + "step": 10160 + }, + { + "epoch": 3.68, + "learning_rate": 4.602169981916817e-07, + "loss": 8.6509, + "step": 10180 + }, + { + "epoch": 3.69, + "learning_rate": 4.6112115732368893e-07, + "loss": 8.6455, + "step": 10200 + }, + { + "epoch": 3.7, + "learning_rate": 4.620253164556962e-07, + "loss": 8.6079, + "step": 10220 + }, + { + "epoch": 3.7, + "learning_rate": 4.629294755877034e-07, + "loss": 8.6061, + "step": 10240 + }, + { + "epoch": 3.71, + "learning_rate": 4.6383363471971065e-07, + "loss": 8.6707, + "step": 10260 + }, + { + "epoch": 3.72, + "learning_rate": 4.647377938517179e-07, + "loss": 8.6731, + "step": 10280 + }, + { + "epoch": 3.73, + "learning_rate": 4.6564195298372513e-07, + "loss": 8.6561, + "step": 10300 + }, + { + "epoch": 3.73, + "learning_rate": 4.6654611211573237e-07, + "loss": 8.8374, + "step": 10320 + }, + { + "epoch": 3.74, + "learning_rate": 4.6745027124773956e-07, + "loss": 8.7873, + "step": 10340 + }, + { + "epoch": 3.75, + "learning_rate": 4.683544303797468e-07, + "loss": 8.5839, + "step": 10360 + }, + { + "epoch": 3.75, + "learning_rate": 4.692585895117541e-07, + "loss": 8.7062, + "step": 10380 + }, + { + "epoch": 3.76, + "learning_rate": 4.701627486437613e-07, + "loss": 8.5917, + "step": 10400 + }, + { + "epoch": 3.77, + "learning_rate": 4.710669077757685e-07, + "loss": 8.6824, + "step": 10420 + }, + { + "epoch": 3.78, + "learning_rate": 4.7197106690777576e-07, + "loss": 8.624, + "step": 10440 + }, + { + "epoch": 3.78, + "learning_rate": 4.72875226039783e-07, + "loss": 8.6687, + "step": 10460 + }, + { + "epoch": 3.79, + "learning_rate": 4.7377938517179024e-07, + "loss": 8.8627, + "step": 10480 + }, + { + "epoch": 3.8, + "learning_rate": 4.746835443037974e-07, + "loss": 8.6775, + "step": 10500 + }, + { + "epoch": 3.8, + "learning_rate": 4.7558770343580466e-07, + "loss": 8.6905, + "step": 10520 + }, + { + "epoch": 3.81, + "learning_rate": 4.7649186256781196e-07, + "loss": 8.6904, + "step": 10540 + }, + { + "epoch": 3.82, + "learning_rate": 4.773960216998191e-07, + "loss": 8.6781, + "step": 10560 + }, + { + "epoch": 3.83, + "learning_rate": 4.783001808318264e-07, + "loss": 8.6216, + "step": 10580 + }, + { + "epoch": 3.83, + "learning_rate": 4.792043399638337e-07, + "loss": 8.6004, + "step": 10600 + }, + { + "epoch": 3.84, + "learning_rate": 4.801084990958408e-07, + "loss": 8.6008, + "step": 10620 + }, + { + "epoch": 3.85, + "learning_rate": 4.81012658227848e-07, + "loss": 8.5841, + "step": 10640 + }, + { + "epoch": 3.86, + "learning_rate": 4.819168173598553e-07, + "loss": 8.6029, + "step": 10660 + }, + { + "epoch": 3.86, + "learning_rate": 4.828209764918625e-07, + "loss": 8.7195, + "step": 10680 + }, + { + "epoch": 3.87, + "learning_rate": 4.837251356238698e-07, + "loss": 8.7097, + "step": 10700 + }, + { + "epoch": 3.88, + "learning_rate": 4.84629294755877e-07, + "loss": 8.7943, + "step": 10720 + }, + { + "epoch": 3.88, + "learning_rate": 4.855334538878842e-07, + "loss": 8.6566, + "step": 10740 + }, + { + "epoch": 3.89, + "learning_rate": 4.864376130198915e-07, + "loss": 8.6458, + "step": 10760 + }, + { + "epoch": 3.9, + "learning_rate": 4.873417721518987e-07, + "loss": 8.8129, + "step": 10780 + }, + { + "epoch": 3.91, + "learning_rate": 4.88245931283906e-07, + "loss": 8.6123, + "step": 10800 + }, + { + "epoch": 3.91, + "learning_rate": 4.891500904159131e-07, + "loss": 8.7739, + "step": 10820 + }, + { + "epoch": 3.92, + "learning_rate": 4.900542495479204e-07, + "loss": 8.5971, + "step": 10840 + }, + { + "epoch": 3.93, + "learning_rate": 4.909584086799277e-07, + "loss": 8.6343, + "step": 10860 + }, + { + "epoch": 3.93, + "learning_rate": 4.918625678119348e-07, + "loss": 8.6995, + "step": 10880 + }, + { + "epoch": 3.94, + "learning_rate": 4.927667269439422e-07, + "loss": 8.7568, + "step": 10900 + }, + { + "epoch": 3.95, + "learning_rate": 4.936708860759494e-07, + "loss": 8.6049, + "step": 10920 + }, + { + "epoch": 3.96, + "learning_rate": 4.945750452079565e-07, + "loss": 8.6712, + "step": 10940 + }, + { + "epoch": 3.96, + "learning_rate": 4.954792043399638e-07, + "loss": 8.6592, + "step": 10960 + }, + { + "epoch": 3.97, + "learning_rate": 4.96383363471971e-07, + "loss": 8.6155, + "step": 10980 + }, + { + "epoch": 3.98, + "learning_rate": 4.972875226039783e-07, + "loss": 8.5876, + "step": 11000 + }, + { + "epoch": 3.99, + "learning_rate": 4.981916817359855e-07, + "loss": 8.7089, + "step": 11020 + }, + { + "epoch": 3.99, + "learning_rate": 4.990958408679927e-07, + "loss": 8.67, + "step": 11040 + }, + { + "epoch": 4.0, + "learning_rate": 5e-07, + "loss": 8.6221, + "step": 11060 + }, + { + "epoch": 4.01, + "learning_rate": 4.999997478613401e-07, + "loss": 8.7503, + "step": 11080 + }, + { + "epoch": 4.01, + "learning_rate": 4.999989914458693e-07, + "loss": 8.8729, + "step": 11100 + }, + { + "epoch": 4.02, + "learning_rate": 4.99997730755113e-07, + "loss": 8.7594, + "step": 11120 + }, + { + "epoch": 4.03, + "learning_rate": 4.999959657916146e-07, + "loss": 8.6993, + "step": 11140 + }, + { + "epoch": 4.04, + "learning_rate": 4.999936965589338e-07, + "loss": 8.8509, + "step": 11160 + }, + { + "epoch": 4.04, + "learning_rate": 4.999909230616482e-07, + "loss": 8.6098, + "step": 11180 + }, + { + "epoch": 4.05, + "learning_rate": 4.99987645305352e-07, + "loss": 8.6471, + "step": 11200 + }, + { + "epoch": 4.06, + "learning_rate": 4.999838632966571e-07, + "loss": 8.7941, + "step": 11220 + }, + { + "epoch": 4.07, + "learning_rate": 4.999795770431919e-07, + "loss": 8.6125, + "step": 11240 + }, + { + "epoch": 4.07, + "learning_rate": 4.999747865536025e-07, + "loss": 8.7692, + "step": 11260 + }, + { + "epoch": 4.08, + "learning_rate": 4.999694918375516e-07, + "loss": 8.6221, + "step": 11280 + }, + { + "epoch": 4.09, + "learning_rate": 4.999636929057195e-07, + "loss": 8.6307, + "step": 11300 + }, + { + "epoch": 4.09, + "learning_rate": 4.99957389769803e-07, + "loss": 8.6476, + "step": 11320 + }, + { + "epoch": 4.1, + "learning_rate": 4.999505824425163e-07, + "loss": 8.6193, + "step": 11340 + }, + { + "epoch": 4.11, + "learning_rate": 4.999432709375907e-07, + "loss": 8.5791, + "step": 11360 + }, + { + "epoch": 4.12, + "learning_rate": 4.999354552697741e-07, + "loss": 8.6525, + "step": 11380 + }, + { + "epoch": 4.12, + "learning_rate": 4.999271354548316e-07, + "loss": 8.6326, + "step": 11400 + }, + { + "epoch": 4.13, + "learning_rate": 4.999183115095452e-07, + "loss": 8.7265, + "step": 11420 + }, + { + "epoch": 4.14, + "learning_rate": 4.999089834517138e-07, + "loss": 8.6149, + "step": 11440 + }, + { + "epoch": 4.14, + "learning_rate": 4.998991513001532e-07, + "loss": 8.7124, + "step": 11460 + }, + { + "epoch": 4.15, + "learning_rate": 4.998888150746957e-07, + "loss": 8.5983, + "step": 11480 + }, + { + "epoch": 4.16, + "learning_rate": 4.998779747961905e-07, + "loss": 8.685, + "step": 11500 + }, + { + "epoch": 4.17, + "learning_rate": 4.99866630486504e-07, + "loss": 8.5874, + "step": 11520 + }, + { + "epoch": 4.17, + "learning_rate": 4.998547821685187e-07, + "loss": 8.6896, + "step": 11540 + }, + { + "epoch": 4.18, + "learning_rate": 4.99842429866134e-07, + "loss": 8.6125, + "step": 11560 + }, + { + "epoch": 4.19, + "learning_rate": 4.998295736042658e-07, + "loss": 8.6181, + "step": 11580 + }, + { + "epoch": 4.2, + "learning_rate": 4.998162134088466e-07, + "loss": 8.6908, + "step": 11600 + }, + { + "epoch": 4.2, + "learning_rate": 4.998023493068254e-07, + "loss": 8.611, + "step": 11620 + }, + { + "epoch": 4.21, + "learning_rate": 4.997879813261676e-07, + "loss": 8.6569, + "step": 11640 + }, + { + "epoch": 4.22, + "learning_rate": 4.99773109495855e-07, + "loss": 8.7846, + "step": 11660 + }, + { + "epoch": 4.22, + "learning_rate": 4.997577338458857e-07, + "loss": 8.6122, + "step": 11680 + }, + { + "epoch": 4.23, + "learning_rate": 4.997418544072741e-07, + "loss": 8.8447, + "step": 11700 + }, + { + "epoch": 4.24, + "learning_rate": 4.997254712120507e-07, + "loss": 8.7572, + "step": 11720 + }, + { + "epoch": 4.25, + "learning_rate": 4.997085842932621e-07, + "loss": 8.6279, + "step": 11740 + }, + { + "epoch": 4.25, + "learning_rate": 4.996911936849713e-07, + "loss": 8.6707, + "step": 11760 + }, + { + "epoch": 4.26, + "learning_rate": 4.996732994222569e-07, + "loss": 8.7163, + "step": 11780 + }, + { + "epoch": 4.27, + "learning_rate": 4.996549015412135e-07, + "loss": 8.7067, + "step": 11800 + }, + { + "epoch": 4.27, + "learning_rate": 4.996360000789519e-07, + "loss": 8.5964, + "step": 11820 + }, + { + "epoch": 4.28, + "learning_rate": 4.996165950735983e-07, + "loss": 8.8121, + "step": 11840 + }, + { + "epoch": 4.29, + "learning_rate": 4.995966865642945e-07, + "loss": 8.7009, + "step": 11860 + }, + { + "epoch": 4.3, + "learning_rate": 4.995762745911985e-07, + "loss": 8.6199, + "step": 11880 + }, + { + "epoch": 4.3, + "learning_rate": 4.995553591954832e-07, + "loss": 8.7139, + "step": 11900 + }, + { + "epoch": 4.31, + "learning_rate": 4.995339404193373e-07, + "loss": 8.6098, + "step": 11920 + }, + { + "epoch": 4.32, + "learning_rate": 4.99512018305965e-07, + "loss": 8.6987, + "step": 11940 + }, + { + "epoch": 4.33, + "learning_rate": 4.994895928995854e-07, + "loss": 8.6441, + "step": 11960 + }, + { + "epoch": 4.33, + "learning_rate": 4.99466664245433e-07, + "loss": 8.6625, + "step": 11980 + }, + { + "epoch": 4.34, + "learning_rate": 4.994432323897575e-07, + "loss": 8.6652, + "step": 12000 + }, + { + "epoch": 4.35, + "learning_rate": 4.994192973798235e-07, + "loss": 8.7255, + "step": 12020 + }, + { + "epoch": 4.35, + "learning_rate": 4.993948592639104e-07, + "loss": 8.6808, + "step": 12040 + }, + { + "epoch": 4.36, + "learning_rate": 4.993699180913127e-07, + "loss": 8.6249, + "step": 12060 + }, + { + "epoch": 4.37, + "learning_rate": 4.993444739123394e-07, + "loss": 8.8293, + "step": 12080 + }, + { + "epoch": 4.38, + "learning_rate": 4.993185267783141e-07, + "loss": 8.6877, + "step": 12100 + }, + { + "epoch": 4.38, + "learning_rate": 4.992920767415752e-07, + "loss": 8.6937, + "step": 12120 + }, + { + "epoch": 4.39, + "learning_rate": 4.992651238554753e-07, + "loss": 8.8042, + "step": 12140 + }, + { + "epoch": 4.4, + "learning_rate": 4.992376681743811e-07, + "loss": 8.631, + "step": 12160 + }, + { + "epoch": 4.41, + "learning_rate": 4.992097097536739e-07, + "loss": 8.6861, + "step": 12180 + }, + { + "epoch": 4.41, + "learning_rate": 4.991812486497489e-07, + "loss": 8.754, + "step": 12200 + }, + { + "epoch": 4.42, + "learning_rate": 4.991522849200152e-07, + "loss": 8.8765, + "step": 12220 + }, + { + "epoch": 4.43, + "learning_rate": 4.991228186228956e-07, + "loss": 8.7654, + "step": 12240 + }, + { + "epoch": 4.43, + "learning_rate": 4.990928498178273e-07, + "loss": 8.7668, + "step": 12260 + }, + { + "epoch": 4.44, + "learning_rate": 4.990623785652603e-07, + "loss": 8.711, + "step": 12280 + }, + { + "epoch": 4.45, + "learning_rate": 4.990314049266585e-07, + "loss": 8.6748, + "step": 12300 + }, + { + "epoch": 4.46, + "learning_rate": 4.989999289644991e-07, + "loss": 8.7944, + "step": 12320 + }, + { + "epoch": 4.46, + "learning_rate": 4.989679507422728e-07, + "loss": 8.699, + "step": 12340 + }, + { + "epoch": 4.47, + "learning_rate": 4.989354703244829e-07, + "loss": 8.618, + "step": 12360 + }, + { + "epoch": 4.48, + "learning_rate": 4.98902487776646e-07, + "loss": 8.7872, + "step": 12380 + }, + { + "epoch": 4.48, + "learning_rate": 4.988690031652916e-07, + "loss": 8.6499, + "step": 12400 + }, + { + "epoch": 4.49, + "learning_rate": 4.988350165579616e-07, + "loss": 8.641, + "step": 12420 + }, + { + "epoch": 4.5, + "learning_rate": 4.98800528023211e-07, + "loss": 8.6022, + "step": 12440 + }, + { + "epoch": 4.51, + "learning_rate": 4.987655376306068e-07, + "loss": 8.7124, + "step": 12460 + }, + { + "epoch": 4.51, + "learning_rate": 4.987300454507285e-07, + "loss": 8.6464, + "step": 12480 + }, + { + "epoch": 4.52, + "learning_rate": 4.986940515551675e-07, + "loss": 8.7243, + "step": 12500 + }, + { + "epoch": 4.53, + "learning_rate": 4.986575560165277e-07, + "loss": 8.8642, + "step": 12520 + }, + { + "epoch": 4.54, + "learning_rate": 4.986205589084244e-07, + "loss": 8.6188, + "step": 12540 + }, + { + "epoch": 4.54, + "learning_rate": 4.985830603054849e-07, + "loss": 8.6786, + "step": 12560 + }, + { + "epoch": 4.55, + "learning_rate": 4.985450602833479e-07, + "loss": 8.6291, + "step": 12580 + }, + { + "epoch": 4.56, + "learning_rate": 4.985065589186638e-07, + "loss": 8.6776, + "step": 12600 + }, + { + "epoch": 4.56, + "learning_rate": 4.984675562890938e-07, + "loss": 8.626, + "step": 12620 + }, + { + "epoch": 4.57, + "learning_rate": 4.984280524733107e-07, + "loss": 8.7193, + "step": 12640 + }, + { + "epoch": 4.58, + "learning_rate": 4.983880475509977e-07, + "loss": 8.6363, + "step": 12660 + }, + { + "epoch": 4.59, + "learning_rate": 4.983475416028494e-07, + "loss": 8.6651, + "step": 12680 + }, + { + "epoch": 4.59, + "learning_rate": 4.983065347105706e-07, + "loss": 8.6346, + "step": 12700 + }, + { + "epoch": 4.6, + "learning_rate": 4.982650269568766e-07, + "loss": 8.685, + "step": 12720 + }, + { + "epoch": 4.61, + "learning_rate": 4.982230184254932e-07, + "loss": 8.6415, + "step": 12740 + }, + { + "epoch": 4.61, + "learning_rate": 4.981805092011564e-07, + "loss": 8.5435, + "step": 12760 + }, + { + "epoch": 4.62, + "learning_rate": 4.981374993696115e-07, + "loss": 8.6561, + "step": 12780 + }, + { + "epoch": 4.63, + "learning_rate": 4.980939890176143e-07, + "loss": 8.6248, + "step": 12800 + }, + { + "epoch": 4.64, + "learning_rate": 4.980499782329299e-07, + "loss": 8.709, + "step": 12820 + }, + { + "epoch": 4.64, + "learning_rate": 4.980054671043329e-07, + "loss": 8.619, + "step": 12840 + }, + { + "epoch": 4.65, + "learning_rate": 4.979604557216069e-07, + "loss": 8.7461, + "step": 12860 + }, + { + "epoch": 4.66, + "learning_rate": 4.979149441755452e-07, + "loss": 8.7711, + "step": 12880 + }, + { + "epoch": 4.67, + "learning_rate": 4.978689325579491e-07, + "loss": 8.6337, + "step": 12900 + }, + { + "epoch": 4.67, + "learning_rate": 4.978224209616292e-07, + "loss": 8.7815, + "step": 12920 + }, + { + "epoch": 4.68, + "learning_rate": 4.977754094804047e-07, + "loss": 8.6441, + "step": 12940 + }, + { + "epoch": 4.69, + "learning_rate": 4.977278982091027e-07, + "loss": 8.765, + "step": 12960 + }, + { + "epoch": 4.69, + "learning_rate": 4.976798872435586e-07, + "loss": 8.5918, + "step": 12980 + }, + { + "epoch": 4.7, + "learning_rate": 4.976313766806159e-07, + "loss": 8.5984, + "step": 13000 + }, + { + "epoch": 4.71, + "learning_rate": 4.975823666181255e-07, + "loss": 8.7362, + "step": 13020 + }, + { + "epoch": 4.72, + "learning_rate": 4.975328571549462e-07, + "loss": 8.6716, + "step": 13040 + }, + { + "epoch": 4.72, + "learning_rate": 4.97482848390944e-07, + "loss": 8.7553, + "step": 13060 + }, + { + "epoch": 4.73, + "learning_rate": 4.974323404269921e-07, + "loss": 8.6592, + "step": 13080 + }, + { + "epoch": 4.74, + "learning_rate": 4.973813333649703e-07, + "loss": 8.7005, + "step": 13100 + }, + { + "epoch": 4.75, + "learning_rate": 4.973298273077657e-07, + "loss": 8.6346, + "step": 13120 + }, + { + "epoch": 4.75, + "learning_rate": 4.972778223592717e-07, + "loss": 8.6217, + "step": 13140 + }, + { + "epoch": 4.76, + "learning_rate": 4.972253186243876e-07, + "loss": 8.6394, + "step": 13160 + }, + { + "epoch": 4.77, + "learning_rate": 4.971723162090196e-07, + "loss": 8.8543, + "step": 13180 + }, + { + "epoch": 4.77, + "learning_rate": 4.971188152200791e-07, + "loss": 8.6189, + "step": 13200 + }, + { + "epoch": 4.78, + "learning_rate": 4.970648157654835e-07, + "loss": 8.6655, + "step": 13220 + }, + { + "epoch": 4.79, + "learning_rate": 4.970103179541556e-07, + "loss": 8.7302, + "step": 13240 + }, + { + "epoch": 4.8, + "learning_rate": 4.969553218960234e-07, + "loss": 8.629, + "step": 13260 + }, + { + "epoch": 4.8, + "learning_rate": 4.9689982770202e-07, + "loss": 8.6618, + "step": 13280 + }, + { + "epoch": 4.81, + "learning_rate": 4.968438354840833e-07, + "loss": 8.5832, + "step": 13300 + }, + { + "epoch": 4.82, + "learning_rate": 4.967873453551557e-07, + "loss": 8.6059, + "step": 13320 + }, + { + "epoch": 4.82, + "learning_rate": 4.967303574291839e-07, + "loss": 8.7163, + "step": 13340 + }, + { + "epoch": 4.83, + "learning_rate": 4.966728718211188e-07, + "loss": 8.6663, + "step": 13360 + }, + { + "epoch": 4.84, + "learning_rate": 4.966148886469152e-07, + "loss": 8.6538, + "step": 13380 + }, + { + "epoch": 4.85, + "learning_rate": 4.965564080235315e-07, + "loss": 8.6983, + "step": 13400 + }, + { + "epoch": 4.85, + "learning_rate": 4.964974300689295e-07, + "loss": 8.6548, + "step": 13420 + }, + { + "epoch": 4.86, + "learning_rate": 4.964379549020741e-07, + "loss": 8.6463, + "step": 13440 + }, + { + "epoch": 4.87, + "learning_rate": 4.963779826429333e-07, + "loss": 8.7331, + "step": 13460 + }, + { + "epoch": 4.88, + "learning_rate": 4.963175134124775e-07, + "loss": 8.7252, + "step": 13480 + }, + { + "epoch": 4.88, + "learning_rate": 4.962565473326802e-07, + "loss": 8.7028, + "step": 13500 + }, + { + "epoch": 4.89, + "learning_rate": 4.961950845265162e-07, + "loss": 8.6745, + "step": 13520 + }, + { + "epoch": 4.9, + "learning_rate": 4.961331251179628e-07, + "loss": 8.5407, + "step": 13540 + }, + { + "epoch": 4.9, + "learning_rate": 4.960706692319991e-07, + "loss": 8.7639, + "step": 13560 + }, + { + "epoch": 4.91, + "learning_rate": 4.960077169946052e-07, + "loss": 8.6626, + "step": 13580 + }, + { + "epoch": 4.92, + "learning_rate": 4.959442685327627e-07, + "loss": 8.7467, + "step": 13600 + }, + { + "epoch": 4.93, + "learning_rate": 4.958803239744542e-07, + "loss": 8.6506, + "step": 13620 + }, + { + "epoch": 4.93, + "learning_rate": 4.958158834486628e-07, + "loss": 8.5932, + "step": 13640 + }, + { + "epoch": 4.94, + "learning_rate": 4.95750947085372e-07, + "loss": 8.751, + "step": 13660 + }, + { + "epoch": 4.95, + "learning_rate": 4.956855150155657e-07, + "loss": 8.6431, + "step": 13680 + }, + { + "epoch": 4.95, + "learning_rate": 4.956195873712273e-07, + "loss": 8.7175, + "step": 13700 + }, + { + "epoch": 4.96, + "learning_rate": 4.955531642853403e-07, + "loss": 8.7028, + "step": 13720 + }, + { + "epoch": 4.97, + "learning_rate": 4.954862458918873e-07, + "loss": 8.7185, + "step": 13740 + }, + { + "epoch": 4.98, + "learning_rate": 4.954188323258498e-07, + "loss": 8.6743, + "step": 13760 + }, + { + "epoch": 4.98, + "learning_rate": 4.953509237232085e-07, + "loss": 8.63, + "step": 13780 + }, + { + "epoch": 4.99, + "learning_rate": 4.952825202209426e-07, + "loss": 8.5902, + "step": 13800 + }, + { + "epoch": 5.0, + "learning_rate": 4.952136219570291e-07, + "loss": 8.7626, + "step": 13820 + }, + { + "epoch": 5.01, + "learning_rate": 4.951442290704437e-07, + "loss": 8.6377, + "step": 13840 + }, + { + "epoch": 5.01, + "learning_rate": 4.950743417011591e-07, + "loss": 8.6489, + "step": 13860 + }, + { + "epoch": 5.02, + "learning_rate": 4.950039599901459e-07, + "loss": 8.5806, + "step": 13880 + }, + { + "epoch": 5.03, + "learning_rate": 4.949330840793717e-07, + "loss": 8.6237, + "step": 13900 + }, + { + "epoch": 5.03, + "learning_rate": 4.94861714111801e-07, + "loss": 8.5744, + "step": 13920 + }, + { + "epoch": 5.04, + "learning_rate": 4.947898502313948e-07, + "loss": 8.6261, + "step": 13940 + }, + { + "epoch": 5.05, + "learning_rate": 4.947174925831103e-07, + "loss": 8.6471, + "step": 13960 + }, + { + "epoch": 5.06, + "learning_rate": 4.946446413129011e-07, + "loss": 8.7867, + "step": 13980 + }, + { + "epoch": 5.06, + "learning_rate": 4.945712965677158e-07, + "loss": 8.6515, + "step": 14000 + }, + { + "epoch": 5.07, + "learning_rate": 4.944974584954988e-07, + "loss": 8.7224, + "step": 14020 + }, + { + "epoch": 5.08, + "learning_rate": 4.944231272451899e-07, + "loss": 8.6068, + "step": 14040 + }, + { + "epoch": 5.08, + "learning_rate": 4.94348302966723e-07, + "loss": 8.6677, + "step": 14060 + }, + { + "epoch": 5.09, + "learning_rate": 4.94272985811027e-07, + "loss": 8.7058, + "step": 14080 + }, + { + "epoch": 5.1, + "learning_rate": 4.941971759300248e-07, + "loss": 8.7201, + "step": 14100 + }, + { + "epoch": 5.11, + "learning_rate": 4.941208734766332e-07, + "loss": 8.6761, + "step": 14120 + }, + { + "epoch": 5.11, + "learning_rate": 4.940440786047627e-07, + "loss": 8.7667, + "step": 14140 + }, + { + "epoch": 5.12, + "learning_rate": 4.939667914693168e-07, + "loss": 8.7497, + "step": 14160 + }, + { + "epoch": 5.13, + "learning_rate": 4.938890122261922e-07, + "loss": 8.6883, + "step": 14180 + }, + { + "epoch": 5.14, + "learning_rate": 4.93810741032278e-07, + "loss": 8.6371, + "step": 14200 + }, + { + "epoch": 5.14, + "learning_rate": 4.937319780454559e-07, + "loss": 8.5895, + "step": 14220 + }, + { + "epoch": 5.15, + "learning_rate": 4.936527234245994e-07, + "loss": 8.7159, + "step": 14240 + }, + { + "epoch": 5.16, + "learning_rate": 4.935729773295737e-07, + "loss": 8.6975, + "step": 14260 + }, + { + "epoch": 5.16, + "learning_rate": 4.934927399212354e-07, + "loss": 8.7762, + "step": 14280 + }, + { + "epoch": 5.17, + "learning_rate": 4.934120113614321e-07, + "loss": 8.6464, + "step": 14300 + }, + { + "epoch": 5.18, + "learning_rate": 4.933307918130022e-07, + "loss": 8.6431, + "step": 14320 + }, + { + "epoch": 5.19, + "learning_rate": 4.932490814397744e-07, + "loss": 8.6835, + "step": 14340 + }, + { + "epoch": 5.19, + "learning_rate": 4.931668804065674e-07, + "loss": 8.7269, + "step": 14360 + }, + { + "epoch": 5.2, + "learning_rate": 4.930841888791897e-07, + "loss": 8.6963, + "step": 14380 + }, + { + "epoch": 5.21, + "learning_rate": 4.93001007024439e-07, + "loss": 8.7272, + "step": 14400 + }, + { + "epoch": 5.22, + "learning_rate": 4.929173350101024e-07, + "loss": 8.6172, + "step": 14420 + }, + { + "epoch": 5.22, + "learning_rate": 4.928331730049555e-07, + "loss": 8.6379, + "step": 14440 + }, + { + "epoch": 5.23, + "learning_rate": 4.927485211787622e-07, + "loss": 8.6049, + "step": 14460 + }, + { + "epoch": 5.24, + "learning_rate": 4.926633797022744e-07, + "loss": 8.7153, + "step": 14480 + }, + { + "epoch": 5.24, + "learning_rate": 4.925777487472317e-07, + "loss": 8.6468, + "step": 14500 + }, + { + "epoch": 5.25, + "learning_rate": 4.924916284863614e-07, + "loss": 8.7272, + "step": 14520 + }, + { + "epoch": 5.26, + "learning_rate": 4.924050190933772e-07, + "loss": 8.7109, + "step": 14540 + }, + { + "epoch": 5.27, + "learning_rate": 4.923179207429798e-07, + "loss": 8.7212, + "step": 14560 + }, + { + "epoch": 5.27, + "learning_rate": 4.922303336108562e-07, + "loss": 8.6957, + "step": 14580 + }, + { + "epoch": 5.28, + "learning_rate": 4.92142257873679e-07, + "loss": 8.7531, + "step": 14600 + }, + { + "epoch": 5.29, + "learning_rate": 4.920536937091067e-07, + "loss": 8.6634, + "step": 14620 + }, + { + "epoch": 5.29, + "learning_rate": 4.919646412957829e-07, + "loss": 8.687, + "step": 14640 + }, + { + "epoch": 5.3, + "learning_rate": 4.918751008133362e-07, + "loss": 8.7248, + "step": 14660 + }, + { + "epoch": 5.31, + "learning_rate": 4.917850724423792e-07, + "loss": 8.6262, + "step": 14680 + }, + { + "epoch": 5.32, + "learning_rate": 4.916945563645093e-07, + "loss": 8.7897, + "step": 14700 + }, + { + "epoch": 5.32, + "learning_rate": 4.91603552762307e-07, + "loss": 8.6599, + "step": 14720 + }, + { + "epoch": 5.33, + "learning_rate": 4.915120618193368e-07, + "loss": 8.6223, + "step": 14740 + }, + { + "epoch": 5.34, + "learning_rate": 4.914200837201458e-07, + "loss": 8.807, + "step": 14760 + }, + { + "epoch": 5.35, + "learning_rate": 4.913276186502639e-07, + "loss": 8.7852, + "step": 14780 + }, + { + "epoch": 5.35, + "learning_rate": 4.912346667962032e-07, + "loss": 8.6163, + "step": 14800 + }, + { + "epoch": 5.36, + "learning_rate": 4.911412283454578e-07, + "loss": 8.5919, + "step": 14820 + }, + { + "epoch": 5.37, + "learning_rate": 4.910473034865032e-07, + "loss": 8.6501, + "step": 14840 + }, + { + "epoch": 5.37, + "learning_rate": 4.909528924087963e-07, + "loss": 8.7825, + "step": 14860 + }, + { + "epoch": 5.38, + "learning_rate": 4.908579953027743e-07, + "loss": 8.7476, + "step": 14880 + }, + { + "epoch": 5.39, + "learning_rate": 4.907626123598551e-07, + "loss": 8.7136, + "step": 14900 + }, + { + "epoch": 5.4, + "learning_rate": 4.906667437724366e-07, + "loss": 8.6728, + "step": 14920 + }, + { + "epoch": 5.4, + "learning_rate": 4.905703897338963e-07, + "loss": 8.7006, + "step": 14940 + }, + { + "epoch": 5.41, + "learning_rate": 4.904735504385906e-07, + "loss": 8.68, + "step": 14960 + }, + { + "epoch": 5.42, + "learning_rate": 4.903762260818551e-07, + "loss": 8.5956, + "step": 14980 + }, + { + "epoch": 5.42, + "learning_rate": 4.902784168600036e-07, + "loss": 8.5962, + "step": 15000 + }, + { + "epoch": 5.43, + "learning_rate": 4.90180122970328e-07, + "loss": 8.7187, + "step": 15020 + }, + { + "epoch": 5.44, + "learning_rate": 4.900813446110978e-07, + "loss": 8.6258, + "step": 15040 + }, + { + "epoch": 5.45, + "learning_rate": 4.899820819815598e-07, + "loss": 8.6823, + "step": 15060 + }, + { + "epoch": 5.45, + "learning_rate": 4.898823352819375e-07, + "loss": 8.7023, + "step": 15080 + }, + { + "epoch": 5.46, + "learning_rate": 4.897821047134309e-07, + "loss": 8.7809, + "step": 15100 + }, + { + "epoch": 5.47, + "learning_rate": 4.896813904782162e-07, + "loss": 8.6179, + "step": 15120 + }, + { + "epoch": 5.48, + "learning_rate": 4.895801927794448e-07, + "loss": 8.7411, + "step": 15140 + }, + { + "epoch": 5.48, + "learning_rate": 4.894785118212435e-07, + "loss": 8.7115, + "step": 15160 + }, + { + "epoch": 5.49, + "learning_rate": 4.893763478087139e-07, + "loss": 8.6297, + "step": 15180 + }, + { + "epoch": 5.5, + "learning_rate": 4.892737009479322e-07, + "loss": 8.5999, + "step": 15200 + }, + { + "epoch": 5.5, + "learning_rate": 4.891705714459482e-07, + "loss": 8.7592, + "step": 15220 + }, + { + "epoch": 5.51, + "learning_rate": 4.890669595107853e-07, + "loss": 8.5834, + "step": 15240 + }, + { + "epoch": 5.52, + "learning_rate": 4.889628653514402e-07, + "loss": 8.6487, + "step": 15260 + }, + { + "epoch": 5.53, + "learning_rate": 4.888582891778821e-07, + "loss": 8.6445, + "step": 15280 + }, + { + "epoch": 5.53, + "learning_rate": 4.887532312010527e-07, + "loss": 8.697, + "step": 15300 + }, + { + "epoch": 5.54, + "learning_rate": 4.886476916328654e-07, + "loss": 8.6094, + "step": 15320 + }, + { + "epoch": 5.55, + "learning_rate": 4.885416706862048e-07, + "loss": 8.7348, + "step": 15340 + }, + { + "epoch": 5.56, + "learning_rate": 4.88435168574927e-07, + "loss": 8.7422, + "step": 15360 + }, + { + "epoch": 5.56, + "learning_rate": 4.883281855138585e-07, + "loss": 8.7982, + "step": 15380 + }, + { + "epoch": 5.57, + "learning_rate": 4.882207217187954e-07, + "loss": 8.64, + "step": 15400 + }, + { + "epoch": 5.58, + "learning_rate": 4.881127774065044e-07, + "loss": 8.74, + "step": 15420 + }, + { + "epoch": 5.58, + "learning_rate": 4.880043527947205e-07, + "loss": 8.6966, + "step": 15440 + }, + { + "epoch": 5.59, + "learning_rate": 4.878954481021483e-07, + "loss": 8.7114, + "step": 15460 + }, + { + "epoch": 5.6, + "learning_rate": 4.877860635484606e-07, + "loss": 8.6878, + "step": 15480 + }, + { + "epoch": 5.61, + "learning_rate": 4.876761993542975e-07, + "loss": 8.6922, + "step": 15500 + }, + { + "epoch": 5.61, + "learning_rate": 4.875658557412676e-07, + "loss": 8.6065, + "step": 15520 + }, + { + "epoch": 5.62, + "learning_rate": 4.874550329319457e-07, + "loss": 8.6935, + "step": 15540 + }, + { + "epoch": 5.63, + "learning_rate": 4.873437311498736e-07, + "loss": 8.766, + "step": 15560 + }, + { + "epoch": 5.63, + "learning_rate": 4.872319506195592e-07, + "loss": 8.6696, + "step": 15580 + }, + { + "epoch": 5.64, + "learning_rate": 4.871196915664761e-07, + "loss": 8.6916, + "step": 15600 + }, + { + "epoch": 5.65, + "learning_rate": 4.870069542170629e-07, + "loss": 8.6553, + "step": 15620 + }, + { + "epoch": 5.66, + "learning_rate": 4.868937387987233e-07, + "loss": 8.698, + "step": 15640 + }, + { + "epoch": 5.66, + "learning_rate": 4.867800455398251e-07, + "loss": 8.6451, + "step": 15660 + }, + { + "epoch": 5.67, + "learning_rate": 4.866658746697001e-07, + "loss": 8.6472, + "step": 15680 + }, + { + "epoch": 5.68, + "learning_rate": 4.865512264186433e-07, + "loss": 8.5794, + "step": 15700 + }, + { + "epoch": 5.69, + "learning_rate": 4.864361010179128e-07, + "loss": 8.6878, + "step": 15720 + }, + { + "epoch": 5.69, + "learning_rate": 4.863204986997294e-07, + "loss": 8.5777, + "step": 15740 + }, + { + "epoch": 5.7, + "learning_rate": 4.862044196972751e-07, + "loss": 8.724, + "step": 15760 + }, + { + "epoch": 5.71, + "learning_rate": 4.860878642446943e-07, + "loss": 8.7462, + "step": 15780 + }, + { + "epoch": 5.71, + "learning_rate": 4.859708325770919e-07, + "loss": 8.6529, + "step": 15800 + }, + { + "epoch": 5.72, + "learning_rate": 4.858533249305336e-07, + "loss": 8.628, + "step": 15820 + }, + { + "epoch": 5.73, + "learning_rate": 4.857353415420452e-07, + "loss": 8.6557, + "step": 15840 + }, + { + "epoch": 5.74, + "learning_rate": 4.856168826496122e-07, + "loss": 8.6378, + "step": 15860 + }, + { + "epoch": 5.74, + "learning_rate": 4.854979484921789e-07, + "loss": 8.6128, + "step": 15880 + }, + { + "epoch": 5.75, + "learning_rate": 4.853785393096487e-07, + "loss": 8.6342, + "step": 15900 + }, + { + "epoch": 5.76, + "learning_rate": 4.852586553428828e-07, + "loss": 8.623, + "step": 15920 + }, + { + "epoch": 5.76, + "learning_rate": 4.851382968337004e-07, + "loss": 8.6746, + "step": 15940 + }, + { + "epoch": 5.77, + "learning_rate": 4.850174640248775e-07, + "loss": 8.7209, + "step": 15960 + }, + { + "epoch": 5.78, + "learning_rate": 4.848961571601475e-07, + "loss": 8.676, + "step": 15980 + }, + { + "epoch": 5.79, + "learning_rate": 4.847743764841993e-07, + "loss": 8.6759, + "step": 16000 + }, + { + "epoch": 5.79, + "learning_rate": 4.84652122242678e-07, + "loss": 8.6827, + "step": 16020 + }, + { + "epoch": 5.8, + "learning_rate": 4.845293946821836e-07, + "loss": 8.6991, + "step": 16040 + }, + { + "epoch": 5.81, + "learning_rate": 4.844061940502711e-07, + "loss": 8.7061, + "step": 16060 + }, + { + "epoch": 5.82, + "learning_rate": 4.842825205954495e-07, + "loss": 8.6462, + "step": 16080 + }, + { + "epoch": 5.82, + "learning_rate": 4.84158374567182e-07, + "loss": 8.647, + "step": 16100 + }, + { + "epoch": 5.83, + "learning_rate": 4.840337562158843e-07, + "loss": 8.8409, + "step": 16120 + }, + { + "epoch": 5.84, + "learning_rate": 4.839086657929256e-07, + "loss": 8.6913, + "step": 16140 + }, + { + "epoch": 5.84, + "learning_rate": 4.837831035506267e-07, + "loss": 8.7391, + "step": 16160 + }, + { + "epoch": 5.85, + "learning_rate": 4.836570697422605e-07, + "loss": 8.7379, + "step": 16180 + }, + { + "epoch": 5.86, + "learning_rate": 4.835305646220509e-07, + "loss": 8.644, + "step": 16200 + }, + { + "epoch": 5.87, + "learning_rate": 4.834035884451725e-07, + "loss": 8.7277, + "step": 16220 + }, + { + "epoch": 5.87, + "learning_rate": 4.832761414677502e-07, + "loss": 8.6412, + "step": 16240 + }, + { + "epoch": 5.88, + "learning_rate": 4.831482239468585e-07, + "loss": 8.7398, + "step": 16260 + }, + { + "epoch": 5.89, + "learning_rate": 4.83019836140521e-07, + "loss": 8.754, + "step": 16280 + }, + { + "epoch": 5.9, + "learning_rate": 4.828909783077099e-07, + "loss": 8.6548, + "step": 16300 + }, + { + "epoch": 5.9, + "learning_rate": 4.827616507083456e-07, + "loss": 8.6114, + "step": 16320 + }, + { + "epoch": 5.91, + "learning_rate": 4.826318536032958e-07, + "loss": 8.6163, + "step": 16340 + }, + { + "epoch": 5.92, + "learning_rate": 4.825015872543758e-07, + "loss": 8.7388, + "step": 16360 + }, + { + "epoch": 5.92, + "learning_rate": 4.823708519243467e-07, + "loss": 8.7207, + "step": 16380 + }, + { + "epoch": 5.93, + "learning_rate": 4.822396478769162e-07, + "loss": 8.7599, + "step": 16400 + }, + { + "epoch": 5.94, + "learning_rate": 4.821079753767371e-07, + "loss": 8.7465, + "step": 16420 + }, + { + "epoch": 5.95, + "learning_rate": 4.819758346894072e-07, + "loss": 8.6565, + "step": 16440 + }, + { + "epoch": 5.95, + "learning_rate": 4.818432260814688e-07, + "loss": 8.6793, + "step": 16460 + }, + { + "epoch": 5.96, + "learning_rate": 4.817101498204078e-07, + "loss": 8.7599, + "step": 16480 + }, + { + "epoch": 5.97, + "learning_rate": 4.815766061746537e-07, + "loss": 8.7688, + "step": 16500 + }, + { + "epoch": 5.97, + "learning_rate": 4.814425954135785e-07, + "loss": 8.6962, + "step": 16520 + }, + { + "epoch": 5.98, + "learning_rate": 4.813081178074968e-07, + "loss": 8.7069, + "step": 16540 + }, + { + "epoch": 5.99, + "learning_rate": 4.811731736276643e-07, + "loss": 8.6498, + "step": 16560 + }, + { + "epoch": 6.0, + "learning_rate": 4.810377631462785e-07, + "loss": 8.6458, + "step": 16580 + }, + { + "epoch": 6.0, + "learning_rate": 4.809018866364766e-07, + "loss": 8.6952, + "step": 16600 + }, + { + "epoch": 6.01, + "learning_rate": 4.80765544372337e-07, + "loss": 8.6439, + "step": 16620 + }, + { + "epoch": 6.02, + "learning_rate": 4.806287366288766e-07, + "loss": 8.5903, + "step": 16640 + }, + { + "epoch": 6.03, + "learning_rate": 4.804914636820516e-07, + "loss": 8.7511, + "step": 16660 + }, + { + "epoch": 6.03, + "learning_rate": 4.803537258087566e-07, + "loss": 8.6639, + "step": 16680 + }, + { + "epoch": 6.04, + "learning_rate": 4.802155232868239e-07, + "loss": 8.9259, + "step": 16700 + }, + { + "epoch": 6.05, + "learning_rate": 4.800768563950231e-07, + "loss": 8.662, + "step": 16720 + }, + { + "epoch": 6.05, + "learning_rate": 4.799377254130606e-07, + "loss": 8.7333, + "step": 16740 + }, + { + "epoch": 6.06, + "learning_rate": 4.797981306215784e-07, + "loss": 8.6613, + "step": 16760 + }, + { + "epoch": 6.07, + "learning_rate": 4.79658072302155e-07, + "loss": 8.7945, + "step": 16780 + }, + { + "epoch": 6.08, + "learning_rate": 4.795175507373028e-07, + "loss": 8.6238, + "step": 16800 + }, + { + "epoch": 6.08, + "learning_rate": 4.793765662104696e-07, + "loss": 8.6998, + "step": 16820 + }, + { + "epoch": 6.09, + "learning_rate": 4.792351190060363e-07, + "loss": 8.7753, + "step": 16840 + }, + { + "epoch": 6.1, + "learning_rate": 4.790932094093175e-07, + "loss": 8.5856, + "step": 16860 + }, + { + "epoch": 6.1, + "learning_rate": 4.789508377065603e-07, + "loss": 8.6769, + "step": 16880 + }, + { + "epoch": 6.11, + "learning_rate": 4.788080041849441e-07, + "loss": 8.6106, + "step": 16900 + }, + { + "epoch": 6.12, + "learning_rate": 4.786647091325796e-07, + "loss": 8.6677, + "step": 16920 + }, + { + "epoch": 6.13, + "learning_rate": 4.785209528385087e-07, + "loss": 8.5975, + "step": 16940 + }, + { + "epoch": 6.13, + "learning_rate": 4.783767355927033e-07, + "loss": 8.7969, + "step": 16960 + }, + { + "epoch": 6.14, + "learning_rate": 4.782320576860656e-07, + "loss": 8.6307, + "step": 16980 + }, + { + "epoch": 6.15, + "learning_rate": 4.780869194104268e-07, + "loss": 8.5835, + "step": 17000 + }, + { + "epoch": 6.16, + "learning_rate": 4.779413210585464e-07, + "loss": 8.6421, + "step": 17020 + }, + { + "epoch": 6.16, + "learning_rate": 4.777952629241122e-07, + "loss": 8.7197, + "step": 17040 + }, + { + "epoch": 6.17, + "learning_rate": 4.776487453017397e-07, + "loss": 8.672, + "step": 17060 + }, + { + "epoch": 6.18, + "learning_rate": 4.775017684869707e-07, + "loss": 8.6878, + "step": 17080 + }, + { + "epoch": 6.18, + "learning_rate": 4.773543327762737e-07, + "loss": 8.6636, + "step": 17100 + }, + { + "epoch": 6.19, + "learning_rate": 4.772064384670424e-07, + "loss": 8.5946, + "step": 17120 + }, + { + "epoch": 6.2, + "learning_rate": 4.77058085857596e-07, + "loss": 8.7402, + "step": 17140 + }, + { + "epoch": 6.21, + "learning_rate": 4.769092752471778e-07, + "loss": 8.5723, + "step": 17160 + }, + { + "epoch": 6.21, + "learning_rate": 4.7676000693595506e-07, + "loss": 8.8408, + "step": 17180 + }, + { + "epoch": 6.22, + "learning_rate": 4.766102812250183e-07, + "loss": 8.723, + "step": 17200 + }, + { + "epoch": 6.23, + "learning_rate": 4.764600984163808e-07, + "loss": 8.6513, + "step": 17220 + }, + { + "epoch": 6.24, + "learning_rate": 4.7630945881297746e-07, + "loss": 8.6348, + "step": 17240 + }, + { + "epoch": 6.24, + "learning_rate": 4.761583627186649e-07, + "loss": 8.572, + "step": 17260 + }, + { + "epoch": 6.25, + "learning_rate": 4.7600681043822044e-07, + "loss": 8.7423, + "step": 17280 + }, + { + "epoch": 6.26, + "learning_rate": 4.7585480227734163e-07, + "loss": 8.6616, + "step": 17300 + }, + { + "epoch": 6.26, + "learning_rate": 4.7570233854264564e-07, + "loss": 8.7175, + "step": 17320 + }, + { + "epoch": 6.27, + "learning_rate": 4.7554941954166826e-07, + "loss": 8.7237, + "step": 17340 + }, + { + "epoch": 6.28, + "learning_rate": 4.7539604558286395e-07, + "loss": 8.8209, + "step": 17360 + }, + { + "epoch": 6.29, + "learning_rate": 4.752422169756047e-07, + "loss": 8.6999, + "step": 17380 + }, + { + "epoch": 6.29, + "learning_rate": 4.7508793403017976e-07, + "loss": 8.6965, + "step": 17400 + }, + { + "epoch": 6.3, + "learning_rate": 4.749331970577946e-07, + "loss": 8.6204, + "step": 17420 + }, + { + "epoch": 6.31, + "learning_rate": 4.747780063705705e-07, + "loss": 8.6613, + "step": 17440 + }, + { + "epoch": 6.31, + "learning_rate": 4.7462236228154405e-07, + "loss": 8.7295, + "step": 17460 + }, + { + "epoch": 6.32, + "learning_rate": 4.744662651046666e-07, + "loss": 8.6806, + "step": 17480 + }, + { + "epoch": 6.33, + "learning_rate": 4.7430971515480304e-07, + "loss": 8.6633, + "step": 17500 + }, + { + "epoch": 6.34, + "learning_rate": 4.741527127477317e-07, + "loss": 8.676, + "step": 17520 + }, + { + "epoch": 6.34, + "learning_rate": 4.7399525820014376e-07, + "loss": 8.6381, + "step": 17540 + }, + { + "epoch": 6.35, + "learning_rate": 4.738373518296421e-07, + "loss": 8.7071, + "step": 17560 + }, + { + "epoch": 6.36, + "learning_rate": 4.7367899395474106e-07, + "loss": 8.6584, + "step": 17580 + }, + { + "epoch": 6.37, + "learning_rate": 4.7352018489486606e-07, + "loss": 8.7591, + "step": 17600 + }, + { + "epoch": 6.37, + "learning_rate": 4.7336092497035207e-07, + "loss": 8.7067, + "step": 17620 + }, + { + "epoch": 6.38, + "learning_rate": 4.732012145024439e-07, + "loss": 8.6802, + "step": 17640 + }, + { + "epoch": 6.39, + "learning_rate": 4.7304105381329484e-07, + "loss": 8.9072, + "step": 17660 + }, + { + "epoch": 6.39, + "learning_rate": 4.7288044322596663e-07, + "loss": 8.6963, + "step": 17680 + }, + { + "epoch": 6.4, + "learning_rate": 4.727193830644285e-07, + "loss": 8.6309, + "step": 17700 + }, + { + "epoch": 6.41, + "learning_rate": 4.725578736535562e-07, + "loss": 8.6925, + "step": 17720 + }, + { + "epoch": 6.42, + "learning_rate": 4.723959153191319e-07, + "loss": 8.653, + "step": 17740 + }, + { + "epoch": 6.42, + "learning_rate": 4.722335083878433e-07, + "loss": 8.612, + "step": 17760 + }, + { + "epoch": 6.43, + "learning_rate": 4.7207065318728296e-07, + "loss": 8.7143, + "step": 17780 + }, + { + "epoch": 6.44, + "learning_rate": 4.7190735004594753e-07, + "loss": 8.6625, + "step": 17800 + }, + { + "epoch": 6.44, + "learning_rate": 4.7174359929323735e-07, + "loss": 8.6155, + "step": 17820 + }, + { + "epoch": 6.45, + "learning_rate": 4.715794012594555e-07, + "loss": 8.7971, + "step": 17840 + }, + { + "epoch": 6.46, + "learning_rate": 4.7141475627580754e-07, + "loss": 8.6821, + "step": 17860 + }, + { + "epoch": 6.47, + "learning_rate": 4.712496646744002e-07, + "loss": 8.6292, + "step": 17880 + }, + { + "epoch": 6.47, + "learning_rate": 4.7108412678824134e-07, + "loss": 8.6244, + "step": 17900 + }, + { + "epoch": 6.48, + "learning_rate": 4.70918142951239e-07, + "loss": 8.5691, + "step": 17920 + }, + { + "epoch": 6.49, + "learning_rate": 4.7075171349820077e-07, + "loss": 8.6863, + "step": 17940 + }, + { + "epoch": 6.5, + "learning_rate": 4.705848387648329e-07, + "loss": 8.6821, + "step": 17960 + }, + { + "epoch": 6.5, + "learning_rate": 4.7041751908774007e-07, + "loss": 8.6589, + "step": 17980 + }, + { + "epoch": 6.51, + "learning_rate": 4.702497548044243e-07, + "loss": 8.7016, + "step": 18000 + }, + { + "epoch": 6.52, + "learning_rate": 4.700815462532845e-07, + "loss": 8.725, + "step": 18020 + }, + { + "epoch": 6.52, + "learning_rate": 4.699128937736157e-07, + "loss": 8.6708, + "step": 18040 + }, + { + "epoch": 6.53, + "learning_rate": 4.697437977056084e-07, + "loss": 8.8955, + "step": 18060 + }, + { + "epoch": 6.54, + "learning_rate": 4.695742583903478e-07, + "loss": 8.7241, + "step": 18080 + }, + { + "epoch": 6.55, + "learning_rate": 4.694042761698134e-07, + "loss": 8.6222, + "step": 18100 + }, + { + "epoch": 6.55, + "learning_rate": 4.692338513868776e-07, + "loss": 8.7879, + "step": 18120 + }, + { + "epoch": 6.56, + "learning_rate": 4.6906298438530604e-07, + "loss": 8.6359, + "step": 18140 + }, + { + "epoch": 6.57, + "learning_rate": 4.6889167550975613e-07, + "loss": 8.6056, + "step": 18160 + }, + { + "epoch": 6.58, + "learning_rate": 4.6871992510577644e-07, + "loss": 8.6364, + "step": 18180 + }, + { + "epoch": 6.58, + "learning_rate": 4.6854773351980647e-07, + "loss": 8.6699, + "step": 18200 + }, + { + "epoch": 6.59, + "learning_rate": 4.683751010991754e-07, + "loss": 8.6423, + "step": 18220 + }, + { + "epoch": 6.6, + "learning_rate": 4.682020281921017e-07, + "loss": 8.7572, + "step": 18240 + }, + { + "epoch": 6.6, + "learning_rate": 4.6802851514769227e-07, + "loss": 8.7399, + "step": 18260 + }, + { + "epoch": 6.61, + "learning_rate": 4.67854562315942e-07, + "loss": 8.6589, + "step": 18280 + }, + { + "epoch": 6.62, + "learning_rate": 4.6768017004773263e-07, + "loss": 8.7953, + "step": 18300 + }, + { + "epoch": 6.63, + "learning_rate": 4.6750533869483257e-07, + "loss": 8.5799, + "step": 18320 + }, + { + "epoch": 6.63, + "learning_rate": 4.6733006860989567e-07, + "loss": 8.6455, + "step": 18340 + }, + { + "epoch": 6.64, + "learning_rate": 4.6715436014646077e-07, + "loss": 8.6309, + "step": 18360 + }, + { + "epoch": 6.65, + "learning_rate": 4.669782136589512e-07, + "loss": 8.6688, + "step": 18380 + }, + { + "epoch": 6.65, + "learning_rate": 4.6680162950267356e-07, + "loss": 8.6939, + "step": 18400 + }, + { + "epoch": 6.66, + "learning_rate": 4.666246080338175e-07, + "loss": 8.6871, + "step": 18420 + }, + { + "epoch": 6.67, + "learning_rate": 4.6644714960945453e-07, + "loss": 8.7004, + "step": 18440 + }, + { + "epoch": 6.68, + "learning_rate": 4.662692545875378e-07, + "loss": 8.6696, + "step": 18460 + }, + { + "epoch": 6.68, + "learning_rate": 4.660909233269009e-07, + "loss": 8.6115, + "step": 18480 + }, + { + "epoch": 6.69, + "learning_rate": 4.6591215618725766e-07, + "loss": 8.6684, + "step": 18500 + }, + { + "epoch": 6.7, + "learning_rate": 4.657329535292007e-07, + "loss": 8.7201, + "step": 18520 + }, + { + "epoch": 6.71, + "learning_rate": 4.6555331571420155e-07, + "loss": 8.6737, + "step": 18540 + }, + { + "epoch": 6.71, + "learning_rate": 4.653732431046092e-07, + "loss": 8.709, + "step": 18560 + }, + { + "epoch": 6.72, + "learning_rate": 4.6519273606364984e-07, + "loss": 8.5615, + "step": 18580 + }, + { + "epoch": 6.73, + "learning_rate": 4.6501179495542585e-07, + "loss": 8.6053, + "step": 18600 + }, + { + "epoch": 6.73, + "learning_rate": 4.6483042014491527e-07, + "loss": 8.6445, + "step": 18620 + }, + { + "epoch": 6.74, + "learning_rate": 4.646486119979709e-07, + "loss": 8.7607, + "step": 18640 + }, + { + "epoch": 6.75, + "learning_rate": 4.6446637088131956e-07, + "loss": 8.7539, + "step": 18660 + }, + { + "epoch": 6.76, + "learning_rate": 4.642836971625616e-07, + "loss": 8.6076, + "step": 18680 + }, + { + "epoch": 6.76, + "learning_rate": 4.6410059121016984e-07, + "loss": 8.7657, + "step": 18700 + }, + { + "epoch": 6.77, + "learning_rate": 4.639170533934891e-07, + "loss": 8.5961, + "step": 18720 + }, + { + "epoch": 6.78, + "learning_rate": 4.6373308408273495e-07, + "loss": 8.7081, + "step": 18740 + }, + { + "epoch": 6.78, + "learning_rate": 4.635486836489938e-07, + "loss": 8.8006, + "step": 18760 + }, + { + "epoch": 6.79, + "learning_rate": 4.633638524642215e-07, + "loss": 8.6963, + "step": 18780 + }, + { + "epoch": 6.8, + "learning_rate": 4.631785909012426e-07, + "loss": 8.6344, + "step": 18800 + }, + { + "epoch": 6.81, + "learning_rate": 4.6299289933375007e-07, + "loss": 8.7319, + "step": 18820 + }, + { + "epoch": 6.81, + "learning_rate": 4.6280677813630397e-07, + "loss": 8.7458, + "step": 18840 + }, + { + "epoch": 6.82, + "learning_rate": 4.626202276843311e-07, + "loss": 8.6465, + "step": 18860 + }, + { + "epoch": 6.83, + "learning_rate": 4.624332483541242e-07, + "loss": 8.5764, + "step": 18880 + }, + { + "epoch": 6.84, + "learning_rate": 4.62245840522841e-07, + "loss": 8.7452, + "step": 18900 + }, + { + "epoch": 6.84, + "learning_rate": 4.6205800456850343e-07, + "loss": 8.6529, + "step": 18920 + }, + { + "epoch": 6.85, + "learning_rate": 4.618697408699973e-07, + "loss": 8.6897, + "step": 18940 + }, + { + "epoch": 6.86, + "learning_rate": 4.6168104980707103e-07, + "loss": 8.7269, + "step": 18960 + }, + { + "epoch": 6.86, + "learning_rate": 4.6149193176033505e-07, + "loss": 8.6609, + "step": 18980 + }, + { + "epoch": 6.87, + "learning_rate": 4.6130238711126123e-07, + "loss": 8.6924, + "step": 19000 + }, + { + "epoch": 6.88, + "learning_rate": 4.6111241624218166e-07, + "loss": 8.7371, + "step": 19020 + }, + { + "epoch": 6.89, + "learning_rate": 4.609220195362886e-07, + "loss": 8.6188, + "step": 19040 + }, + { + "epoch": 6.89, + "learning_rate": 4.607311973776328e-07, + "loss": 8.7018, + "step": 19060 + }, + { + "epoch": 6.9, + "learning_rate": 4.6053995015112343e-07, + "loss": 8.5875, + "step": 19080 + }, + { + "epoch": 6.91, + "learning_rate": 4.6034827824252715e-07, + "loss": 8.7276, + "step": 19100 + }, + { + "epoch": 6.92, + "learning_rate": 4.601561820384671e-07, + "loss": 8.5814, + "step": 19120 + }, + { + "epoch": 6.92, + "learning_rate": 4.5996366192642226e-07, + "loss": 8.6103, + "step": 19140 + }, + { + "epoch": 6.93, + "learning_rate": 4.597707182947268e-07, + "loss": 8.6523, + "step": 19160 + }, + { + "epoch": 6.94, + "learning_rate": 4.595773515325691e-07, + "loss": 8.6586, + "step": 19180 + }, + { + "epoch": 6.94, + "learning_rate": 4.593835620299911e-07, + "loss": 8.6554, + "step": 19200 + }, + { + "epoch": 6.95, + "learning_rate": 4.5918935017788724e-07, + "loss": 8.6933, + "step": 19220 + }, + { + "epoch": 6.96, + "learning_rate": 4.589947163680041e-07, + "loss": 8.6822, + "step": 19240 + }, + { + "epoch": 6.97, + "learning_rate": 4.5879966099293955e-07, + "loss": 8.7641, + "step": 19260 + }, + { + "epoch": 6.97, + "learning_rate": 4.5860418444614133e-07, + "loss": 8.6162, + "step": 19280 + }, + { + "epoch": 6.98, + "learning_rate": 4.5840828712190717e-07, + "loss": 8.6388, + "step": 19300 + }, + { + "epoch": 6.99, + "learning_rate": 4.5821196941538334e-07, + "loss": 8.8059, + "step": 19320 + }, + { + "epoch": 6.99, + "learning_rate": 4.580152317225641e-07, + "loss": 8.6918, + "step": 19340 + }, + { + "epoch": 7.0, + "learning_rate": 4.5781807444029075e-07, + "loss": 8.6036, + "step": 19360 + }, + { + "epoch": 7.01, + "learning_rate": 4.5762049796625124e-07, + "loss": 8.697, + "step": 19380 + }, + { + "epoch": 7.02, + "learning_rate": 4.5742250269897884e-07, + "loss": 8.65, + "step": 19400 + }, + { + "epoch": 7.02, + "learning_rate": 4.572240890378517e-07, + "loss": 8.6848, + "step": 19420 + }, + { + "epoch": 7.03, + "learning_rate": 4.570252573830918e-07, + "loss": 8.7286, + "step": 19440 + }, + { + "epoch": 7.04, + "learning_rate": 4.568260081357643e-07, + "loss": 8.5952, + "step": 19460 + }, + { + "epoch": 7.05, + "learning_rate": 4.5662634169777674e-07, + "loss": 8.6095, + "step": 19480 + }, + { + "epoch": 7.05, + "learning_rate": 4.5642625847187813e-07, + "loss": 8.7558, + "step": 19500 + }, + { + "epoch": 7.06, + "learning_rate": 4.5622575886165826e-07, + "loss": 8.6469, + "step": 19520 + }, + { + "epoch": 7.07, + "learning_rate": 4.5602484327154666e-07, + "loss": 8.6547, + "step": 19540 + }, + { + "epoch": 7.07, + "learning_rate": 4.55823512106812e-07, + "loss": 8.6466, + "step": 19560 + }, + { + "epoch": 7.08, + "learning_rate": 4.5562176577356146e-07, + "loss": 8.7095, + "step": 19580 + }, + { + "epoch": 7.09, + "learning_rate": 4.554196046787392e-07, + "loss": 8.7402, + "step": 19600 + }, + { + "epoch": 7.1, + "learning_rate": 4.552170292301264e-07, + "loss": 8.6959, + "step": 19620 + }, + { + "epoch": 7.1, + "learning_rate": 4.550140398363398e-07, + "loss": 8.6991, + "step": 19640 + }, + { + "epoch": 7.11, + "learning_rate": 4.5481063690683116e-07, + "loss": 8.6932, + "step": 19660 + }, + { + "epoch": 7.12, + "learning_rate": 4.546068208518865e-07, + "loss": 8.702, + "step": 19680 + }, + { + "epoch": 7.12, + "learning_rate": 4.5440259208262497e-07, + "loss": 8.7574, + "step": 19700 + }, + { + "epoch": 7.13, + "learning_rate": 4.5419795101099847e-07, + "loss": 8.7992, + "step": 19720 + }, + { + "epoch": 7.14, + "learning_rate": 4.539928980497902e-07, + "loss": 8.7671, + "step": 19740 + }, + { + "epoch": 7.15, + "learning_rate": 4.537874336126146e-07, + "loss": 8.7184, + "step": 19760 + }, + { + "epoch": 7.15, + "learning_rate": 4.535815581139157e-07, + "loss": 8.6308, + "step": 19780 + }, + { + "epoch": 7.16, + "learning_rate": 4.5337527196896715e-07, + "loss": 8.7107, + "step": 19800 + }, + { + "epoch": 7.17, + "learning_rate": 4.5316857559387036e-07, + "loss": 8.6339, + "step": 19820 + }, + { + "epoch": 7.18, + "learning_rate": 4.529614694055546e-07, + "loss": 8.676, + "step": 19840 + }, + { + "epoch": 7.18, + "learning_rate": 4.527539538217757e-07, + "loss": 8.686, + "step": 19860 + }, + { + "epoch": 7.19, + "learning_rate": 4.5254602926111533e-07, + "loss": 8.7237, + "step": 19880 + }, + { + "epoch": 7.2, + "learning_rate": 4.5233769614298e-07, + "loss": 8.6244, + "step": 19900 + }, + { + "epoch": 7.2, + "learning_rate": 4.521289548876003e-07, + "loss": 8.6432, + "step": 19920 + }, + { + "epoch": 7.21, + "learning_rate": 4.519198059160303e-07, + "loss": 8.6141, + "step": 19940 + }, + { + "epoch": 7.22, + "learning_rate": 4.517102496501462e-07, + "loss": 8.6889, + "step": 19960 + }, + { + "epoch": 7.23, + "learning_rate": 4.5150028651264596e-07, + "loss": 8.5727, + "step": 19980 + }, + { + "epoch": 7.23, + "learning_rate": 4.512899169270481e-07, + "loss": 8.6158, + "step": 20000 + }, + { + "epoch": 7.24, + "learning_rate": 4.510791413176912e-07, + "loss": 8.7124, + "step": 20020 + }, + { + "epoch": 7.25, + "learning_rate": 4.508679601097326e-07, + "loss": 8.6239, + "step": 20040 + }, + { + "epoch": 7.25, + "learning_rate": 4.5065637372914784e-07, + "loss": 8.7528, + "step": 20060 + }, + { + "epoch": 7.26, + "learning_rate": 4.504443826027298e-07, + "loss": 8.6065, + "step": 20080 + }, + { + "epoch": 7.27, + "learning_rate": 4.5023198715808783e-07, + "loss": 8.7053, + "step": 20100 + }, + { + "epoch": 7.28, + "learning_rate": 4.5001918782364665e-07, + "loss": 8.6622, + "step": 20120 + }, + { + "epoch": 7.28, + "learning_rate": 4.498059850286459e-07, + "loss": 8.6676, + "step": 20140 + }, + { + "epoch": 7.29, + "learning_rate": 4.4959237920313877e-07, + "loss": 8.6896, + "step": 20160 + }, + { + "epoch": 7.3, + "learning_rate": 4.493783707779916e-07, + "loss": 8.7231, + "step": 20180 + }, + { + "epoch": 7.31, + "learning_rate": 4.491639601848828e-07, + "loss": 8.6187, + "step": 20200 + }, + { + "epoch": 7.31, + "learning_rate": 4.489491478563019e-07, + "loss": 8.7279, + "step": 20220 + }, + { + "epoch": 7.32, + "learning_rate": 4.4873393422554894e-07, + "loss": 8.6275, + "step": 20240 + }, + { + "epoch": 7.33, + "learning_rate": 4.4851831972673324e-07, + "loss": 8.6786, + "step": 20260 + }, + { + "epoch": 7.33, + "learning_rate": 4.483023047947729e-07, + "loss": 8.679, + "step": 20280 + }, + { + "epoch": 7.34, + "learning_rate": 4.4808588986539355e-07, + "loss": 8.6271, + "step": 20300 + }, + { + "epoch": 7.35, + "learning_rate": 4.478690753751278e-07, + "loss": 8.6284, + "step": 20320 + }, + { + "epoch": 7.36, + "learning_rate": 4.476518617613142e-07, + "loss": 8.7289, + "step": 20340 + }, + { + "epoch": 7.36, + "learning_rate": 4.4743424946209627e-07, + "loss": 8.7282, + "step": 20360 + }, + { + "epoch": 7.37, + "learning_rate": 4.4721623891642185e-07, + "loss": 8.709, + "step": 20380 + }, + { + "epoch": 7.38, + "learning_rate": 4.46997830564042e-07, + "loss": 8.8007, + "step": 20400 + }, + { + "epoch": 7.39, + "learning_rate": 4.4677902484551023e-07, + "loss": 8.7284, + "step": 20420 + }, + { + "epoch": 7.39, + "learning_rate": 4.4655982220218167e-07, + "loss": 8.6282, + "step": 20440 + }, + { + "epoch": 7.4, + "learning_rate": 4.463402230762119e-07, + "loss": 8.7566, + "step": 20460 + }, + { + "epoch": 7.41, + "learning_rate": 4.461202279105565e-07, + "loss": 8.7809, + "step": 20480 + }, + { + "epoch": 7.41, + "learning_rate": 4.458998371489695e-07, + "loss": 8.6693, + "step": 20500 + }, + { + "epoch": 7.42, + "learning_rate": 4.4567905123600345e-07, + "loss": 8.6117, + "step": 20520 + }, + { + "epoch": 7.43, + "learning_rate": 4.4545787061700746e-07, + "loss": 8.5389, + "step": 20540 + }, + { + "epoch": 7.44, + "learning_rate": 4.4523629573812705e-07, + "loss": 8.6785, + "step": 20560 + }, + { + "epoch": 7.44, + "learning_rate": 4.45014327046303e-07, + "loss": 8.6557, + "step": 20580 + }, + { + "epoch": 7.45, + "learning_rate": 4.447919649892704e-07, + "loss": 8.7228, + "step": 20600 + }, + { + "epoch": 7.46, + "learning_rate": 4.445692100155579e-07, + "loss": 8.661, + "step": 20620 + }, + { + "epoch": 7.46, + "learning_rate": 4.443460625744865e-07, + "loss": 8.5727, + "step": 20640 + }, + { + "epoch": 7.47, + "learning_rate": 4.44122523116169e-07, + "loss": 8.6239, + "step": 20660 + }, + { + "epoch": 7.48, + "learning_rate": 4.438985920915089e-07, + "loss": 8.62, + "step": 20680 + }, + { + "epoch": 7.49, + "learning_rate": 4.436742699521997e-07, + "loss": 8.7129, + "step": 20700 + }, + { + "epoch": 7.49, + "learning_rate": 4.434495571507234e-07, + "loss": 8.6496, + "step": 20720 + }, + { + "epoch": 7.5, + "learning_rate": 4.432244541403506e-07, + "loss": 8.7242, + "step": 20740 + }, + { + "epoch": 7.51, + "learning_rate": 4.4299896137513837e-07, + "loss": 8.7956, + "step": 20760 + }, + { + "epoch": 7.52, + "learning_rate": 4.4277307930993045e-07, + "loss": 8.7197, + "step": 20780 + }, + { + "epoch": 7.52, + "learning_rate": 4.4254680840035554e-07, + "loss": 8.8465, + "step": 20800 + }, + { + "epoch": 7.53, + "learning_rate": 4.423201491028269e-07, + "loss": 8.6558, + "step": 20820 + }, + { + "epoch": 7.54, + "learning_rate": 4.42093101874541e-07, + "loss": 8.7996, + "step": 20840 + }, + { + "epoch": 7.54, + "learning_rate": 4.4186566717347693e-07, + "loss": 8.8602, + "step": 20860 + }, + { + "epoch": 7.55, + "learning_rate": 4.4163784545839543e-07, + "loss": 8.5926, + "step": 20880 + }, + { + "epoch": 7.56, + "learning_rate": 4.414096371888377e-07, + "loss": 8.6053, + "step": 20900 + }, + { + "epoch": 7.57, + "learning_rate": 4.411810428251248e-07, + "loss": 8.6233, + "step": 20920 + }, + { + "epoch": 7.57, + "learning_rate": 4.409520628283565e-07, + "loss": 8.7897, + "step": 20940 + }, + { + "epoch": 7.58, + "learning_rate": 4.407226976604105e-07, + "loss": 8.6926, + "step": 20960 + }, + { + "epoch": 7.59, + "learning_rate": 4.404929477839414e-07, + "loss": 8.6766, + "step": 20980 + }, + { + "epoch": 7.59, + "learning_rate": 4.402628136623798e-07, + "loss": 8.6537, + "step": 21000 + }, + { + "epoch": 7.6, + "learning_rate": 4.400322957599314e-07, + "loss": 8.697, + "step": 21020 + }, + { + "epoch": 7.61, + "learning_rate": 4.3980139454157607e-07, + "loss": 8.6, + "step": 21040 + }, + { + "epoch": 7.62, + "learning_rate": 4.3957011047306656e-07, + "loss": 8.7509, + "step": 21060 + }, + { + "epoch": 7.62, + "learning_rate": 4.393384440209284e-07, + "loss": 8.8166, + "step": 21080 + }, + { + "epoch": 7.63, + "learning_rate": 4.39106395652458e-07, + "loss": 8.7277, + "step": 21100 + }, + { + "epoch": 7.64, + "learning_rate": 4.3887396583572225e-07, + "loss": 8.6775, + "step": 21120 + }, + { + "epoch": 7.65, + "learning_rate": 4.386411550395575e-07, + "loss": 8.637, + "step": 21140 + }, + { + "epoch": 7.65, + "learning_rate": 4.3840796373356864e-07, + "loss": 8.6908, + "step": 21160 + }, + { + "epoch": 7.66, + "learning_rate": 4.381743923881279e-07, + "loss": 8.6484, + "step": 21180 + }, + { + "epoch": 7.67, + "learning_rate": 4.3794044147437437e-07, + "loss": 8.6396, + "step": 21200 + }, + { + "epoch": 7.67, + "learning_rate": 4.377061114642125e-07, + "loss": 8.7759, + "step": 21220 + }, + { + "epoch": 7.68, + "learning_rate": 4.3747140283031153e-07, + "loss": 8.6919, + "step": 21240 + }, + { + "epoch": 7.69, + "learning_rate": 4.3723631604610447e-07, + "loss": 8.6463, + "step": 21260 + }, + { + "epoch": 7.7, + "learning_rate": 4.3700085158578694e-07, + "loss": 8.8272, + "step": 21280 + }, + { + "epoch": 7.7, + "learning_rate": 4.367650099243166e-07, + "loss": 8.6321, + "step": 21300 + }, + { + "epoch": 7.71, + "learning_rate": 4.365287915374118e-07, + "loss": 8.6541, + "step": 21320 + }, + { + "epoch": 7.72, + "learning_rate": 4.362921969015509e-07, + "loss": 8.6122, + "step": 21340 + }, + { + "epoch": 7.73, + "learning_rate": 4.360552264939712e-07, + "loss": 8.6558, + "step": 21360 + }, + { + "epoch": 7.73, + "learning_rate": 4.358178807926677e-07, + "loss": 8.7851, + "step": 21380 + }, + { + "epoch": 7.74, + "learning_rate": 4.355801602763927e-07, + "loss": 8.7331, + "step": 21400 + }, + { + "epoch": 7.75, + "learning_rate": 4.353420654246546e-07, + "loss": 8.6654, + "step": 21420 + }, + { + "epoch": 7.75, + "learning_rate": 4.3510359671771647e-07, + "loss": 8.6424, + "step": 21440 + }, + { + "epoch": 7.76, + "learning_rate": 4.3486475463659593e-07, + "loss": 8.6382, + "step": 21460 + }, + { + "epoch": 7.77, + "learning_rate": 4.3462553966306357e-07, + "loss": 8.7952, + "step": 21480 + }, + { + "epoch": 7.78, + "learning_rate": 4.3438595227964205e-07, + "loss": 8.6571, + "step": 21500 + }, + { + "epoch": 7.78, + "learning_rate": 4.341459929696054e-07, + "loss": 8.615, + "step": 21520 + }, + { + "epoch": 7.79, + "learning_rate": 4.3390566221697764e-07, + "loss": 8.6564, + "step": 21540 + }, + { + "epoch": 7.8, + "learning_rate": 4.3366496050653235e-07, + "loss": 8.7334, + "step": 21560 + }, + { + "epoch": 7.8, + "learning_rate": 4.3342388832379094e-07, + "loss": 8.5915, + "step": 21580 + }, + { + "epoch": 7.81, + "learning_rate": 4.3318244615502254e-07, + "loss": 8.7452, + "step": 21600 + }, + { + "epoch": 7.82, + "learning_rate": 4.329406344872423e-07, + "loss": 8.62, + "step": 21620 + }, + { + "epoch": 7.83, + "learning_rate": 4.326984538082108e-07, + "loss": 8.6302, + "step": 21640 + }, + { + "epoch": 7.83, + "learning_rate": 4.3245590460643293e-07, + "loss": 8.6475, + "step": 21660 + }, + { + "epoch": 7.84, + "learning_rate": 4.32212987371157e-07, + "loss": 8.771, + "step": 21680 + }, + { + "epoch": 7.85, + "learning_rate": 4.3196970259237355e-07, + "loss": 8.6497, + "step": 21700 + }, + { + "epoch": 7.86, + "learning_rate": 4.3172605076081456e-07, + "loss": 8.8053, + "step": 21720 + }, + { + "epoch": 7.86, + "learning_rate": 4.3148203236795234e-07, + "loss": 8.6689, + "step": 21740 + }, + { + "epoch": 7.87, + "learning_rate": 4.312376479059988e-07, + "loss": 8.6764, + "step": 21760 + }, + { + "epoch": 7.88, + "learning_rate": 4.3099289786790405e-07, + "loss": 8.6942, + "step": 21780 + }, + { + "epoch": 7.88, + "learning_rate": 4.307477827473556e-07, + "loss": 8.7068, + "step": 21800 + }, + { + "epoch": 7.89, + "learning_rate": 4.305023030387775e-07, + "loss": 8.6783, + "step": 21820 + }, + { + "epoch": 7.9, + "learning_rate": 4.302564592373292e-07, + "loss": 8.7257, + "step": 21840 + }, + { + "epoch": 7.91, + "learning_rate": 4.300102518389044e-07, + "loss": 8.7192, + "step": 21860 + }, + { + "epoch": 7.91, + "learning_rate": 4.2976368134013033e-07, + "loss": 8.7052, + "step": 21880 + }, + { + "epoch": 7.92, + "learning_rate": 4.295167482383667e-07, + "loss": 8.687, + "step": 21900 + }, + { + "epoch": 7.93, + "learning_rate": 4.292694530317046e-07, + "loss": 8.6849, + "step": 21920 + }, + { + "epoch": 7.93, + "learning_rate": 4.2902179621896534e-07, + "loss": 8.6997, + "step": 21940 + }, + { + "epoch": 7.94, + "learning_rate": 4.2877377829969983e-07, + "loss": 8.6191, + "step": 21960 + }, + { + "epoch": 7.95, + "learning_rate": 4.2852539977418745e-07, + "loss": 8.6416, + "step": 21980 + }, + { + "epoch": 7.96, + "learning_rate": 4.2827666114343463e-07, + "loss": 8.6274, + "step": 22000 + }, + { + "epoch": 7.96, + "learning_rate": 4.2802756290917446e-07, + "loss": 8.6461, + "step": 22020 + }, + { + "epoch": 7.97, + "learning_rate": 4.2777810557386534e-07, + "loss": 8.6694, + "step": 22040 + }, + { + "epoch": 7.98, + "learning_rate": 4.2752828964068996e-07, + "loss": 8.6447, + "step": 22060 + }, + { + "epoch": 7.99, + "learning_rate": 4.2727811561355423e-07, + "loss": 8.7354, + "step": 22080 + }, + { + "epoch": 7.99, + "learning_rate": 4.2702758399708674e-07, + "loss": 8.7295, + "step": 22100 + }, + { + "epoch": 8.0, + "learning_rate": 4.2677669529663686e-07, + "loss": 8.6513, + "step": 22120 + }, + { + "epoch": 8.01, + "learning_rate": 4.2652545001827474e-07, + "loss": 8.5954, + "step": 22140 + }, + { + "epoch": 8.01, + "learning_rate": 4.262738486687895e-07, + "loss": 8.6644, + "step": 22160 + }, + { + "epoch": 8.02, + "learning_rate": 4.2602189175568847e-07, + "loss": 8.6703, + "step": 22180 + }, + { + "epoch": 8.03, + "learning_rate": 4.2576957978719636e-07, + "loss": 8.7068, + "step": 22200 + }, + { + "epoch": 8.04, + "learning_rate": 4.255169132722539e-07, + "loss": 8.7908, + "step": 22220 + }, + { + "epoch": 8.04, + "learning_rate": 4.252638927205172e-07, + "loss": 8.7085, + "step": 22240 + }, + { + "epoch": 8.05, + "learning_rate": 4.250105186423563e-07, + "loss": 8.6468, + "step": 22260 + }, + { + "epoch": 8.06, + "learning_rate": 4.2475679154885443e-07, + "loss": 8.6714, + "step": 22280 + }, + { + "epoch": 8.07, + "learning_rate": 4.2450271195180675e-07, + "loss": 8.7756, + "step": 22300 + }, + { + "epoch": 8.07, + "learning_rate": 4.242482803637197e-07, + "loss": 8.8086, + "step": 22320 + }, + { + "epoch": 8.08, + "learning_rate": 4.2399349729780954e-07, + "loss": 8.7139, + "step": 22340 + }, + { + "epoch": 8.09, + "learning_rate": 4.237383632680015e-07, + "loss": 8.7571, + "step": 22360 + }, + { + "epoch": 8.09, + "learning_rate": 4.2348287878892896e-07, + "loss": 8.6348, + "step": 22380 + }, + { + "epoch": 8.1, + "learning_rate": 4.232270443759319e-07, + "loss": 8.7034, + "step": 22400 + }, + { + "epoch": 8.11, + "learning_rate": 4.2297086054505626e-07, + "loss": 8.6211, + "step": 22420 + }, + { + "epoch": 8.12, + "learning_rate": 4.2271432781305293e-07, + "loss": 8.578, + "step": 22440 + }, + { + "epoch": 8.12, + "learning_rate": 4.224574466973765e-07, + "loss": 8.6008, + "step": 22460 + }, + { + "epoch": 8.13, + "learning_rate": 4.222002177161841e-07, + "loss": 8.6203, + "step": 22480 + }, + { + "epoch": 8.14, + "learning_rate": 4.219426413883348e-07, + "loss": 8.7373, + "step": 22500 + }, + { + "epoch": 8.14, + "learning_rate": 4.216847182333881e-07, + "loss": 8.712, + "step": 22520 + }, + { + "epoch": 8.15, + "learning_rate": 4.2142644877160325e-07, + "loss": 8.6409, + "step": 22540 + }, + { + "epoch": 8.16, + "learning_rate": 4.2116783352393803e-07, + "loss": 8.7292, + "step": 22560 + }, + { + "epoch": 8.17, + "learning_rate": 4.2090887301204763e-07, + "loss": 8.602, + "step": 22580 + }, + { + "epoch": 8.17, + "learning_rate": 4.2064956775828366e-07, + "loss": 8.8186, + "step": 22600 + }, + { + "epoch": 8.18, + "learning_rate": 4.203899182856932e-07, + "loss": 8.6242, + "step": 22620 + }, + { + "epoch": 8.19, + "learning_rate": 4.201299251180176e-07, + "loss": 8.7123, + "step": 22640 + }, + { + "epoch": 8.2, + "learning_rate": 4.198695887796914e-07, + "loss": 8.8066, + "step": 22660 + }, + { + "epoch": 8.2, + "learning_rate": 4.1960890979584155e-07, + "loss": 8.6635, + "step": 22680 + }, + { + "epoch": 8.21, + "learning_rate": 4.1934788869228603e-07, + "loss": 8.6928, + "step": 22700 + }, + { + "epoch": 8.22, + "learning_rate": 4.1908652599553293e-07, + "loss": 8.6065, + "step": 22720 + }, + { + "epoch": 8.22, + "learning_rate": 4.188248222327794e-07, + "loss": 8.651, + "step": 22740 + }, + { + "epoch": 8.23, + "learning_rate": 4.1856277793191044e-07, + "loss": 8.5583, + "step": 22760 + }, + { + "epoch": 8.24, + "learning_rate": 4.1830039362149807e-07, + "loss": 8.6621, + "step": 22780 + }, + { + "epoch": 8.25, + "learning_rate": 4.1803766983080006e-07, + "loss": 8.7382, + "step": 22800 + }, + { + "epoch": 8.25, + "learning_rate": 4.177746070897592e-07, + "loss": 8.6508, + "step": 22820 + }, + { + "epoch": 8.26, + "learning_rate": 4.1751120592900156e-07, + "loss": 8.6554, + "step": 22840 + }, + { + "epoch": 8.27, + "learning_rate": 4.1724746687983623e-07, + "loss": 8.6529, + "step": 22860 + }, + { + "epoch": 8.27, + "learning_rate": 4.169833904742537e-07, + "loss": 8.6613, + "step": 22880 + }, + { + "epoch": 8.28, + "learning_rate": 4.1671897724492475e-07, + "loss": 8.7068, + "step": 22900 + }, + { + "epoch": 8.29, + "learning_rate": 4.164542277252e-07, + "loss": 8.7057, + "step": 22920 + }, + { + "epoch": 8.3, + "learning_rate": 4.1618914244910797e-07, + "loss": 8.6056, + "step": 22940 + }, + { + "epoch": 8.3, + "learning_rate": 4.159237219513547e-07, + "loss": 8.6663, + "step": 22960 + }, + { + "epoch": 8.31, + "learning_rate": 4.1565796676732237e-07, + "loss": 8.838, + "step": 22980 + }, + { + "epoch": 8.32, + "learning_rate": 4.153918774330682e-07, + "loss": 8.6266, + "step": 23000 + }, + { + "epoch": 8.33, + "learning_rate": 4.151254544853234e-07, + "loss": 8.6716, + "step": 23020 + }, + { + "epoch": 8.33, + "learning_rate": 4.1485869846149233e-07, + "loss": 8.8722, + "step": 23040 + }, + { + "epoch": 8.34, + "learning_rate": 4.1459160989965087e-07, + "loss": 8.7373, + "step": 23060 + }, + { + "epoch": 8.35, + "learning_rate": 4.1432418933854586e-07, + "loss": 8.5846, + "step": 23080 + }, + { + "epoch": 8.35, + "learning_rate": 4.140564373175939e-07, + "loss": 8.6194, + "step": 23100 + }, + { + "epoch": 8.36, + "learning_rate": 4.1378835437687996e-07, + "loss": 8.6622, + "step": 23120 + }, + { + "epoch": 8.37, + "learning_rate": 4.135199410571567e-07, + "loss": 8.6823, + "step": 23140 + }, + { + "epoch": 8.38, + "learning_rate": 4.132511978998432e-07, + "loss": 8.6646, + "step": 23160 + }, + { + "epoch": 8.38, + "learning_rate": 4.1298212544702356e-07, + "loss": 8.7136, + "step": 23180 + }, + { + "epoch": 8.39, + "learning_rate": 4.1271272424144645e-07, + "loss": 8.6156, + "step": 23200 + }, + { + "epoch": 8.4, + "learning_rate": 4.124429948265235e-07, + "loss": 8.6579, + "step": 23220 + }, + { + "epoch": 8.41, + "learning_rate": 4.1217293774632844e-07, + "loss": 8.5948, + "step": 23240 + }, + { + "epoch": 8.41, + "learning_rate": 4.11902553545596e-07, + "loss": 8.5581, + "step": 23260 + }, + { + "epoch": 8.42, + "learning_rate": 4.116318427697205e-07, + "loss": 8.7029, + "step": 23280 + }, + { + "epoch": 8.43, + "learning_rate": 4.113608059647552e-07, + "loss": 8.6475, + "step": 23300 + }, + { + "epoch": 8.43, + "learning_rate": 4.1108944367741105e-07, + "loss": 8.6973, + "step": 23320 + }, + { + "epoch": 8.44, + "learning_rate": 4.108177564550554e-07, + "loss": 8.6234, + "step": 23340 + }, + { + "epoch": 8.45, + "learning_rate": 4.1054574484571105e-07, + "loss": 8.7258, + "step": 23360 + }, + { + "epoch": 8.46, + "learning_rate": 4.10273409398055e-07, + "loss": 8.648, + "step": 23380 + }, + { + "epoch": 8.46, + "learning_rate": 4.100007506614178e-07, + "loss": 8.5639, + "step": 23400 + }, + { + "epoch": 8.47, + "learning_rate": 4.097277691857819e-07, + "loss": 8.7568, + "step": 23420 + }, + { + "epoch": 8.48, + "learning_rate": 4.094544655217807e-07, + "loss": 8.818, + "step": 23440 + }, + { + "epoch": 8.48, + "learning_rate": 4.091808402206976e-07, + "loss": 8.6586, + "step": 23460 + }, + { + "epoch": 8.49, + "learning_rate": 4.0890689383446476e-07, + "loss": 8.7856, + "step": 23480 + }, + { + "epoch": 8.5, + "learning_rate": 4.086326269156618e-07, + "loss": 8.5939, + "step": 23500 + }, + { + "epoch": 8.51, + "learning_rate": 4.083580400175153e-07, + "loss": 8.647, + "step": 23520 + }, + { + "epoch": 8.51, + "learning_rate": 4.0808313369389693e-07, + "loss": 8.6309, + "step": 23540 + }, + { + "epoch": 8.52, + "learning_rate": 4.078079084993227e-07, + "loss": 8.827, + "step": 23560 + }, + { + "epoch": 8.53, + "learning_rate": 4.0753236498895215e-07, + "loss": 8.7282, + "step": 23580 + }, + { + "epoch": 8.54, + "learning_rate": 4.0725650371858646e-07, + "loss": 8.6681, + "step": 23600 + }, + { + "epoch": 8.54, + "learning_rate": 4.069803252446679e-07, + "loss": 8.6488, + "step": 23620 + }, + { + "epoch": 8.55, + "learning_rate": 4.0670383012427877e-07, + "loss": 8.6818, + "step": 23640 + }, + { + "epoch": 8.56, + "learning_rate": 4.0642701891513996e-07, + "loss": 8.8065, + "step": 23660 + }, + { + "epoch": 8.56, + "learning_rate": 4.0614989217560983e-07, + "loss": 8.8094, + "step": 23680 + }, + { + "epoch": 8.57, + "learning_rate": 4.058724504646834e-07, + "loss": 8.7127, + "step": 23700 + }, + { + "epoch": 8.58, + "learning_rate": 4.0559469434199077e-07, + "loss": 8.6676, + "step": 23720 + }, + { + "epoch": 8.59, + "learning_rate": 4.0531662436779654e-07, + "loss": 8.7212, + "step": 23740 + }, + { + "epoch": 8.59, + "learning_rate": 4.050382411029981e-07, + "loss": 8.721, + "step": 23760 + }, + { + "epoch": 8.6, + "learning_rate": 4.0475954510912513e-07, + "loss": 8.6397, + "step": 23780 + }, + { + "epoch": 8.61, + "learning_rate": 4.044805369483377e-07, + "loss": 8.6683, + "step": 23800 + }, + { + "epoch": 8.61, + "learning_rate": 4.0420121718342583e-07, + "loss": 8.6248, + "step": 23820 + }, + { + "epoch": 8.62, + "learning_rate": 4.0392158637780794e-07, + "loss": 8.5869, + "step": 23840 + }, + { + "epoch": 8.63, + "learning_rate": 4.0364164509553e-07, + "loss": 8.6588, + "step": 23860 + }, + { + "epoch": 8.64, + "learning_rate": 4.0336139390126424e-07, + "loss": 8.692, + "step": 23880 + }, + { + "epoch": 8.64, + "learning_rate": 4.0308083336030784e-07, + "loss": 8.6193, + "step": 23900 + }, + { + "epoch": 8.65, + "learning_rate": 4.027999640385821e-07, + "loss": 8.6798, + "step": 23920 + }, + { + "epoch": 8.66, + "learning_rate": 4.0251878650263107e-07, + "loss": 8.7117, + "step": 23940 + }, + { + "epoch": 8.67, + "learning_rate": 4.022373013196206e-07, + "loss": 8.6933, + "step": 23960 + }, + { + "epoch": 8.67, + "learning_rate": 4.019555090573372e-07, + "loss": 8.5723, + "step": 23980 + }, + { + "epoch": 8.68, + "learning_rate": 4.0167341028418655e-07, + "loss": 8.6345, + "step": 24000 + }, + { + "epoch": 8.69, + "learning_rate": 4.0139100556919266e-07, + "loss": 8.6287, + "step": 24020 + }, + { + "epoch": 8.69, + "learning_rate": 4.0110829548199667e-07, + "loss": 8.621, + "step": 24040 + }, + { + "epoch": 8.7, + "learning_rate": 4.008252805928559e-07, + "loss": 8.7364, + "step": 24060 + }, + { + "epoch": 8.71, + "learning_rate": 4.005419614726421e-07, + "loss": 8.7642, + "step": 24080 + }, + { + "epoch": 8.72, + "learning_rate": 4.00258338692841e-07, + "loss": 8.6653, + "step": 24100 + }, + { + "epoch": 8.72, + "learning_rate": 3.999744128255508e-07, + "loss": 8.6865, + "step": 24120 + }, + { + "epoch": 8.73, + "learning_rate": 3.996901844434809e-07, + "loss": 8.6638, + "step": 24140 + }, + { + "epoch": 8.74, + "learning_rate": 3.994056541199511e-07, + "loss": 8.6719, + "step": 24160 + }, + { + "epoch": 8.75, + "learning_rate": 3.9912082242889e-07, + "loss": 8.6338, + "step": 24180 + }, + { + "epoch": 8.75, + "learning_rate": 3.988356899448344e-07, + "loss": 8.7657, + "step": 24200 + }, + { + "epoch": 8.76, + "learning_rate": 3.9855025724292763e-07, + "loss": 8.6599, + "step": 24220 + }, + { + "epoch": 8.77, + "learning_rate": 3.982645248989186e-07, + "loss": 8.5804, + "step": 24240 + }, + { + "epoch": 8.77, + "learning_rate": 3.9797849348916074e-07, + "loss": 8.7593, + "step": 24260 + }, + { + "epoch": 8.78, + "learning_rate": 3.9769216359061063e-07, + "loss": 8.7266, + "step": 24280 + }, + { + "epoch": 8.79, + "learning_rate": 3.974055357808269e-07, + "loss": 8.683, + "step": 24300 + }, + { + "epoch": 8.8, + "learning_rate": 3.971186106379693e-07, + "loss": 8.7335, + "step": 24320 + }, + { + "epoch": 8.8, + "learning_rate": 3.968313887407971e-07, + "loss": 8.7124, + "step": 24340 + }, + { + "epoch": 8.81, + "learning_rate": 3.9654387066866833e-07, + "loss": 8.6624, + "step": 24360 + }, + { + "epoch": 8.82, + "learning_rate": 3.962560570015383e-07, + "loss": 8.6476, + "step": 24380 + }, + { + "epoch": 8.82, + "learning_rate": 3.9596794831995863e-07, + "loss": 8.6399, + "step": 24400 + }, + { + "epoch": 8.83, + "learning_rate": 3.9567954520507594e-07, + "loss": 8.6308, + "step": 24420 + }, + { + "epoch": 8.84, + "learning_rate": 3.953908482386311e-07, + "loss": 8.7086, + "step": 24440 + }, + { + "epoch": 8.85, + "learning_rate": 3.9510185800295715e-07, + "loss": 8.7306, + "step": 24460 + }, + { + "epoch": 8.85, + "learning_rate": 3.94812575080979e-07, + "loss": 8.688, + "step": 24480 + }, + { + "epoch": 8.86, + "learning_rate": 3.9452300005621206e-07, + "loss": 8.7237, + "step": 24500 + }, + { + "epoch": 8.87, + "learning_rate": 3.9423313351276075e-07, + "loss": 8.7392, + "step": 24520 + }, + { + "epoch": 8.88, + "learning_rate": 3.9394297603531756e-07, + "loss": 8.6499, + "step": 24540 + }, + { + "epoch": 8.88, + "learning_rate": 3.9365252820916186e-07, + "loss": 8.6272, + "step": 24560 + }, + { + "epoch": 8.89, + "learning_rate": 3.933617906201585e-07, + "loss": 8.7747, + "step": 24580 + }, + { + "epoch": 8.9, + "learning_rate": 3.930707638547571e-07, + "loss": 8.744, + "step": 24600 + }, + { + "epoch": 8.9, + "learning_rate": 3.927794484999905e-07, + "loss": 8.6325, + "step": 24620 + }, + { + "epoch": 8.91, + "learning_rate": 3.924878451434735e-07, + "loss": 8.8209, + "step": 24640 + }, + { + "epoch": 8.92, + "learning_rate": 3.9219595437340205e-07, + "loss": 8.6263, + "step": 24660 + }, + { + "epoch": 8.93, + "learning_rate": 3.9190377677855155e-07, + "loss": 8.5573, + "step": 24680 + }, + { + "epoch": 8.93, + "learning_rate": 3.916113129482762e-07, + "loss": 8.6047, + "step": 24700 + }, + { + "epoch": 8.94, + "learning_rate": 3.913185634725077e-07, + "loss": 8.7075, + "step": 24720 + }, + { + "epoch": 8.95, + "learning_rate": 3.9102552894175347e-07, + "loss": 8.6059, + "step": 24740 + }, + { + "epoch": 8.95, + "learning_rate": 3.907322099470963e-07, + "loss": 8.7004, + "step": 24760 + }, + { + "epoch": 8.96, + "learning_rate": 3.904386070801927e-07, + "loss": 8.645, + "step": 24780 + }, + { + "epoch": 8.97, + "learning_rate": 3.9014472093327164e-07, + "loss": 8.7493, + "step": 24800 + }, + { + "epoch": 8.98, + "learning_rate": 3.8985055209913367e-07, + "loss": 8.7113, + "step": 24820 + }, + { + "epoch": 8.98, + "learning_rate": 3.8955610117114946e-07, + "loss": 8.7381, + "step": 24840 + }, + { + "epoch": 8.99, + "learning_rate": 3.8926136874325867e-07, + "loss": 8.6343, + "step": 24860 + }, + { + "epoch": 9.0, + "learning_rate": 3.889663554099688e-07, + "loss": 8.6153, + "step": 24880 + }, + { + "epoch": 9.01, + "learning_rate": 3.88671061766354e-07, + "loss": 8.7392, + "step": 24900 + }, + { + "epoch": 9.01, + "learning_rate": 3.883754884080539e-07, + "loss": 8.7271, + "step": 24920 + }, + { + "epoch": 9.02, + "learning_rate": 3.880796359312722e-07, + "loss": 8.7976, + "step": 24940 + }, + { + "epoch": 9.03, + "learning_rate": 3.8778350493277566e-07, + "loss": 8.6362, + "step": 24960 + }, + { + "epoch": 9.03, + "learning_rate": 3.8748709600989296e-07, + "loss": 8.7061, + "step": 24980 + }, + { + "epoch": 9.04, + "learning_rate": 3.871904097605131e-07, + "loss": 8.6856, + "step": 25000 + }, + { + "epoch": 9.05, + "learning_rate": 3.8689344678308476e-07, + "loss": 8.6853, + "step": 25020 + }, + { + "epoch": 9.06, + "learning_rate": 3.8659620767661483e-07, + "loss": 8.6812, + "step": 25040 + }, + { + "epoch": 9.06, + "learning_rate": 3.862986930406669e-07, + "loss": 8.5669, + "step": 25060 + }, + { + "epoch": 9.07, + "learning_rate": 3.8600090347536064e-07, + "loss": 8.747, + "step": 25080 + }, + { + "epoch": 9.08, + "learning_rate": 3.8570283958137e-07, + "loss": 8.7162, + "step": 25100 + }, + { + "epoch": 9.08, + "learning_rate": 3.8540450195992255e-07, + "loss": 8.6484, + "step": 25120 + }, + { + "epoch": 9.09, + "learning_rate": 3.8510589121279787e-07, + "loss": 8.6183, + "step": 25140 + }, + { + "epoch": 9.1, + "learning_rate": 3.8480700794232634e-07, + "loss": 8.5655, + "step": 25160 + }, + { + "epoch": 9.11, + "learning_rate": 3.845078527513883e-07, + "loss": 8.8672, + "step": 25180 + }, + { + "epoch": 9.11, + "learning_rate": 3.842084262434125e-07, + "loss": 8.6426, + "step": 25200 + }, + { + "epoch": 9.12, + "learning_rate": 3.839087290223748e-07, + "loss": 8.6784, + "step": 25220 + }, + { + "epoch": 9.13, + "learning_rate": 3.8360876169279734e-07, + "loss": 8.686, + "step": 25240 + }, + { + "epoch": 9.14, + "learning_rate": 3.8330852485974697e-07, + "loss": 8.6127, + "step": 25260 + }, + { + "epoch": 9.14, + "learning_rate": 3.8300801912883414e-07, + "loss": 8.6752, + "step": 25280 + }, + { + "epoch": 9.15, + "learning_rate": 3.8270724510621177e-07, + "loss": 8.7257, + "step": 25300 + }, + { + "epoch": 9.16, + "learning_rate": 3.82406203398574e-07, + "loss": 8.706, + "step": 25320 + }, + { + "epoch": 9.16, + "learning_rate": 3.8210489461315485e-07, + "loss": 8.6395, + "step": 25340 + }, + { + "epoch": 9.17, + "learning_rate": 3.81803319357727e-07, + "loss": 8.6969, + "step": 25360 + }, + { + "epoch": 9.18, + "learning_rate": 3.8150147824060075e-07, + "loss": 8.8156, + "step": 25380 + }, + { + "epoch": 9.19, + "learning_rate": 3.8119937187062254e-07, + "loss": 8.634, + "step": 25400 + }, + { + "epoch": 9.19, + "learning_rate": 3.8089700085717405e-07, + "loss": 8.7115, + "step": 25420 + }, + { + "epoch": 9.2, + "learning_rate": 3.8059436581017044e-07, + "loss": 8.7182, + "step": 25440 + }, + { + "epoch": 9.21, + "learning_rate": 3.802914673400599e-07, + "loss": 8.6773, + "step": 25460 + }, + { + "epoch": 9.22, + "learning_rate": 3.7998830605782175e-07, + "loss": 8.5986, + "step": 25480 + }, + { + "epoch": 9.22, + "learning_rate": 3.796848825749652e-07, + "loss": 8.6632, + "step": 25500 + }, + { + "epoch": 9.23, + "learning_rate": 3.7938119750352885e-07, + "loss": 8.7261, + "step": 25520 + }, + { + "epoch": 9.24, + "learning_rate": 3.790772514560785e-07, + "loss": 8.9573, + "step": 25540 + }, + { + "epoch": 9.24, + "learning_rate": 3.787730450457065e-07, + "loss": 8.8924, + "step": 25560 + }, + { + "epoch": 9.25, + "learning_rate": 3.7846857888603056e-07, + "loss": 8.6284, + "step": 25580 + }, + { + "epoch": 9.26, + "learning_rate": 3.781638535911922e-07, + "loss": 8.6416, + "step": 25600 + }, + { + "epoch": 9.27, + "learning_rate": 3.7785886977585555e-07, + "loss": 8.6983, + "step": 25620 + }, + { + "epoch": 9.27, + "learning_rate": 3.775536280552063e-07, + "loss": 8.6153, + "step": 25640 + }, + { + "epoch": 9.28, + "learning_rate": 3.7724812904495035e-07, + "loss": 8.6753, + "step": 25660 + }, + { + "epoch": 9.29, + "learning_rate": 3.769423733613126e-07, + "loss": 8.7574, + "step": 25680 + }, + { + "epoch": 9.29, + "learning_rate": 3.7663636162103577e-07, + "loss": 8.6958, + "step": 25700 + }, + { + "epoch": 9.3, + "learning_rate": 3.76330094441379e-07, + "loss": 8.6696, + "step": 25720 + }, + { + "epoch": 9.31, + "learning_rate": 3.760235724401164e-07, + "loss": 8.6922, + "step": 25740 + }, + { + "epoch": 9.32, + "learning_rate": 3.757167962355365e-07, + "loss": 8.7126, + "step": 25760 + }, + { + "epoch": 9.32, + "learning_rate": 3.754097664464405e-07, + "loss": 8.9095, + "step": 25780 + }, + { + "epoch": 9.33, + "learning_rate": 3.7510248369214093e-07, + "loss": 8.6746, + "step": 25800 + }, + { + "epoch": 9.34, + "learning_rate": 3.7479494859246073e-07, + "loss": 8.5895, + "step": 25820 + }, + { + "epoch": 9.35, + "learning_rate": 3.744871617677319e-07, + "loss": 8.6395, + "step": 25840 + }, + { + "epoch": 9.35, + "learning_rate": 3.7417912383879394e-07, + "loss": 8.7634, + "step": 25860 + }, + { + "epoch": 9.36, + "learning_rate": 3.73870835426993e-07, + "loss": 8.7353, + "step": 25880 + }, + { + "epoch": 9.37, + "learning_rate": 3.735622971541807e-07, + "loss": 8.6576, + "step": 25900 + }, + { + "epoch": 9.37, + "learning_rate": 3.732535096427123e-07, + "loss": 8.7598, + "step": 25920 + }, + { + "epoch": 9.38, + "learning_rate": 3.7294447351544594e-07, + "loss": 8.669, + "step": 25940 + }, + { + "epoch": 9.39, + "learning_rate": 3.7263518939574136e-07, + "loss": 8.595, + "step": 25960 + }, + { + "epoch": 9.4, + "learning_rate": 3.723256579074583e-07, + "loss": 8.6576, + "step": 25980 + }, + { + "epoch": 9.4, + "learning_rate": 3.720158796749556e-07, + "loss": 8.6325, + "step": 26000 + }, + { + "epoch": 9.41, + "learning_rate": 3.7170585532308995e-07, + "loss": 8.6388, + "step": 26020 + }, + { + "epoch": 9.42, + "learning_rate": 3.713955854772143e-07, + "loss": 8.6669, + "step": 26040 + }, + { + "epoch": 9.42, + "learning_rate": 3.710850707631767e-07, + "loss": 8.6721, + "step": 26060 + }, + { + "epoch": 9.43, + "learning_rate": 3.707743118073195e-07, + "loss": 8.7287, + "step": 26080 + }, + { + "epoch": 9.44, + "learning_rate": 3.704633092364773e-07, + "loss": 8.6791, + "step": 26100 + }, + { + "epoch": 9.45, + "learning_rate": 3.7015206367797627e-07, + "loss": 8.6707, + "step": 26120 + }, + { + "epoch": 9.45, + "learning_rate": 3.698405757596327e-07, + "loss": 8.6335, + "step": 26140 + }, + { + "epoch": 9.46, + "learning_rate": 3.695288461097519e-07, + "loss": 8.6813, + "step": 26160 + }, + { + "epoch": 9.47, + "learning_rate": 3.692168753571265e-07, + "loss": 8.6498, + "step": 26180 + }, + { + "epoch": 9.48, + "learning_rate": 3.6890466413103574e-07, + "loss": 8.6455, + "step": 26200 + }, + { + "epoch": 9.48, + "learning_rate": 3.6859221306124353e-07, + "loss": 8.626, + "step": 26220 + }, + { + "epoch": 9.49, + "learning_rate": 3.682795227779981e-07, + "loss": 8.6599, + "step": 26240 + }, + { + "epoch": 9.5, + "learning_rate": 3.6796659391202976e-07, + "loss": 8.6244, + "step": 26260 + }, + { + "epoch": 9.5, + "learning_rate": 3.6765342709455035e-07, + "loss": 8.6675, + "step": 26280 + }, + { + "epoch": 9.51, + "learning_rate": 3.673400229572514e-07, + "loss": 8.6763, + "step": 26300 + }, + { + "epoch": 9.52, + "learning_rate": 3.670263821323034e-07, + "loss": 8.7124, + "step": 26320 + }, + { + "epoch": 9.53, + "learning_rate": 3.667125052523542e-07, + "loss": 8.6563, + "step": 26340 + }, + { + "epoch": 9.53, + "learning_rate": 3.6639839295052776e-07, + "loss": 8.6153, + "step": 26360 + }, + { + "epoch": 9.54, + "learning_rate": 3.660840458604228e-07, + "loss": 8.663, + "step": 26380 + }, + { + "epoch": 9.55, + "learning_rate": 3.657694646161119e-07, + "loss": 8.7112, + "step": 26400 + }, + { + "epoch": 9.56, + "learning_rate": 3.654546498521397e-07, + "loss": 8.6633, + "step": 26420 + }, + { + "epoch": 9.56, + "learning_rate": 3.6513960220352204e-07, + "loss": 8.6195, + "step": 26440 + }, + { + "epoch": 9.57, + "learning_rate": 3.6482432230574445e-07, + "loss": 8.7926, + "step": 26460 + }, + { + "epoch": 9.58, + "learning_rate": 3.645088107947609e-07, + "loss": 8.6659, + "step": 26480 + }, + { + "epoch": 9.58, + "learning_rate": 3.641930683069927e-07, + "loss": 8.6512, + "step": 26500 + }, + { + "epoch": 9.59, + "learning_rate": 3.638770954793268e-07, + "loss": 8.6814, + "step": 26520 + }, + { + "epoch": 9.6, + "learning_rate": 3.6356089294911494e-07, + "loss": 8.7023, + "step": 26540 + }, + { + "epoch": 9.61, + "learning_rate": 3.632444613541723e-07, + "loss": 8.7315, + "step": 26560 + }, + { + "epoch": 9.61, + "learning_rate": 3.629278013327759e-07, + "loss": 8.5947, + "step": 26580 + }, + { + "epoch": 9.62, + "learning_rate": 3.6261091352366363e-07, + "loss": 8.8138, + "step": 26600 + }, + { + "epoch": 9.63, + "learning_rate": 3.622937985660328e-07, + "loss": 8.7118, + "step": 26620 + }, + { + "epoch": 9.63, + "learning_rate": 3.6197645709953895e-07, + "loss": 8.681, + "step": 26640 + }, + { + "epoch": 9.64, + "learning_rate": 3.6165888976429447e-07, + "loss": 8.7761, + "step": 26660 + }, + { + "epoch": 9.65, + "learning_rate": 3.613410972008674e-07, + "loss": 8.6516, + "step": 26680 + }, + { + "epoch": 9.66, + "learning_rate": 3.610230800502802e-07, + "loss": 8.6794, + "step": 26700 + }, + { + "epoch": 9.66, + "learning_rate": 3.60704838954008e-07, + "loss": 8.7636, + "step": 26720 + }, + { + "epoch": 9.67, + "learning_rate": 3.6038637455397796e-07, + "loss": 8.6292, + "step": 26740 + }, + { + "epoch": 9.68, + "learning_rate": 3.6006768749256755e-07, + "loss": 8.7857, + "step": 26760 + }, + { + "epoch": 9.69, + "learning_rate": 3.597487784126035e-07, + "loss": 8.6254, + "step": 26780 + }, + { + "epoch": 9.69, + "learning_rate": 3.594296479573602e-07, + "loss": 8.6402, + "step": 26800 + }, + { + "epoch": 9.7, + "learning_rate": 3.591102967705586e-07, + "loss": 8.6597, + "step": 26820 + }, + { + "epoch": 9.71, + "learning_rate": 3.5879072549636494e-07, + "loss": 8.7317, + "step": 26840 + }, + { + "epoch": 9.71, + "learning_rate": 3.584709347793895e-07, + "loss": 8.6803, + "step": 26860 + }, + { + "epoch": 9.72, + "learning_rate": 3.581509252646851e-07, + "loss": 8.6608, + "step": 26880 + }, + { + "epoch": 9.73, + "learning_rate": 3.5783069759774587e-07, + "loss": 8.6517, + "step": 26900 + }, + { + "epoch": 9.74, + "learning_rate": 3.5751025242450596e-07, + "loss": 8.6721, + "step": 26920 + }, + { + "epoch": 9.74, + "learning_rate": 3.5718959039133836e-07, + "loss": 8.7057, + "step": 26940 + }, + { + "epoch": 9.75, + "learning_rate": 3.568687121450533e-07, + "loss": 8.5605, + "step": 26960 + }, + { + "epoch": 9.76, + "learning_rate": 3.565476183328975e-07, + "loss": 8.666, + "step": 26980 + }, + { + "epoch": 9.76, + "learning_rate": 3.5622630960255215e-07, + "loss": 8.6459, + "step": 27000 + }, + { + "epoch": 9.77, + "learning_rate": 3.5590478660213206e-07, + "loss": 8.6509, + "step": 27020 + }, + { + "epoch": 9.78, + "learning_rate": 3.5558304998018426e-07, + "loss": 8.6461, + "step": 27040 + }, + { + "epoch": 9.79, + "learning_rate": 3.5526110038568664e-07, + "loss": 8.7623, + "step": 27060 + }, + { + "epoch": 9.79, + "learning_rate": 3.5493893846804673e-07, + "loss": 8.6492, + "step": 27080 + }, + { + "epoch": 9.8, + "learning_rate": 3.546165648771004e-07, + "loss": 8.6686, + "step": 27100 + }, + { + "epoch": 9.81, + "learning_rate": 3.5429398026311037e-07, + "loss": 8.7239, + "step": 27120 + }, + { + "epoch": 9.82, + "learning_rate": 3.5397118527676505e-07, + "loss": 8.6809, + "step": 27140 + }, + { + "epoch": 9.82, + "learning_rate": 3.5364818056917724e-07, + "loss": 8.6912, + "step": 27160 + }, + { + "epoch": 9.83, + "learning_rate": 3.5332496679188264e-07, + "loss": 8.6091, + "step": 27180 + }, + { + "epoch": 9.84, + "learning_rate": 3.530015445968388e-07, + "loss": 8.7204, + "step": 27200 + }, + { + "epoch": 9.84, + "learning_rate": 3.5267791463642367e-07, + "loss": 8.6489, + "step": 27220 + }, + { + "epoch": 9.85, + "learning_rate": 3.5235407756343416e-07, + "loss": 8.5754, + "step": 27240 + }, + { + "epoch": 9.86, + "learning_rate": 3.520300340310852e-07, + "loss": 8.6814, + "step": 27260 + }, + { + "epoch": 9.87, + "learning_rate": 3.517057846930078e-07, + "loss": 8.6847, + "step": 27280 + }, + { + "epoch": 9.87, + "learning_rate": 3.5138133020324844e-07, + "loss": 8.6579, + "step": 27300 + }, + { + "epoch": 9.88, + "learning_rate": 3.510566712162673e-07, + "loss": 8.716, + "step": 27320 + }, + { + "epoch": 9.89, + "learning_rate": 3.5073180838693694e-07, + "loss": 8.6446, + "step": 27340 + }, + { + "epoch": 9.9, + "learning_rate": 3.5040674237054125e-07, + "loss": 8.7245, + "step": 27360 + }, + { + "epoch": 9.9, + "learning_rate": 3.500814738227739e-07, + "loss": 8.7127, + "step": 27380 + }, + { + "epoch": 9.91, + "learning_rate": 3.49756003399737e-07, + "loss": 8.6278, + "step": 27400 + }, + { + "epoch": 9.92, + "learning_rate": 3.494303317579401e-07, + "loss": 8.781, + "step": 27420 + }, + { + "epoch": 9.92, + "learning_rate": 3.491044595542985e-07, + "loss": 8.6123, + "step": 27440 + }, + { + "epoch": 9.93, + "learning_rate": 3.4877838744613194e-07, + "loss": 8.6652, + "step": 27460 + }, + { + "epoch": 9.94, + "learning_rate": 3.4845211609116354e-07, + "loss": 8.6108, + "step": 27480 + }, + { + "epoch": 9.95, + "learning_rate": 3.481256461475182e-07, + "loss": 8.7342, + "step": 27500 + }, + { + "epoch": 9.95, + "learning_rate": 3.4779897827372164e-07, + "loss": 8.6786, + "step": 27520 + }, + { + "epoch": 9.96, + "learning_rate": 3.474721131286985e-07, + "loss": 8.7928, + "step": 27540 + }, + { + "epoch": 9.97, + "learning_rate": 3.4714505137177163e-07, + "loss": 8.6573, + "step": 27560 + }, + { + "epoch": 9.97, + "learning_rate": 3.468177936626603e-07, + "loss": 8.6552, + "step": 27580 + }, + { + "epoch": 9.98, + "learning_rate": 3.4649034066147894e-07, + "loss": 8.6789, + "step": 27600 + }, + { + "epoch": 9.99, + "learning_rate": 3.461626930287361e-07, + "loss": 8.6438, + "step": 27620 + }, + { + "epoch": 10.0, + "learning_rate": 3.4583485142533303e-07, + "loss": 8.6694, + "step": 27640 + }, + { + "epoch": 10.0, + "learning_rate": 3.455068165125619e-07, + "loss": 8.746, + "step": 27660 + }, + { + "epoch": 10.01, + "learning_rate": 3.4517858895210493e-07, + "loss": 8.705, + "step": 27680 + }, + { + "epoch": 10.02, + "learning_rate": 3.448501694060332e-07, + "loss": 8.6114, + "step": 27700 + }, + { + "epoch": 10.03, + "learning_rate": 3.4452155853680454e-07, + "loss": 8.7115, + "step": 27720 + }, + { + "epoch": 10.03, + "learning_rate": 3.441927570072632e-07, + "loss": 8.923, + "step": 27740 + }, + { + "epoch": 10.04, + "learning_rate": 3.438637654806378e-07, + "loss": 8.8009, + "step": 27760 + }, + { + "epoch": 10.05, + "learning_rate": 3.4353458462053995e-07, + "loss": 8.6399, + "step": 27780 + }, + { + "epoch": 10.05, + "learning_rate": 3.432052150909637e-07, + "loss": 8.941, + "step": 27800 + }, + { + "epoch": 10.06, + "learning_rate": 3.428756575562832e-07, + "loss": 8.7679, + "step": 27820 + }, + { + "epoch": 10.07, + "learning_rate": 3.4254591268125214e-07, + "loss": 8.7031, + "step": 27840 + }, + { + "epoch": 10.08, + "learning_rate": 3.4221598113100194e-07, + "loss": 8.7045, + "step": 27860 + }, + { + "epoch": 10.08, + "learning_rate": 3.418858635710406e-07, + "loss": 8.7079, + "step": 27880 + }, + { + "epoch": 10.09, + "learning_rate": 3.415555606672512e-07, + "loss": 8.6618, + "step": 27900 + }, + { + "epoch": 10.1, + "learning_rate": 3.412250730858909e-07, + "loss": 8.7842, + "step": 27920 + }, + { + "epoch": 10.1, + "learning_rate": 3.408944014935892e-07, + "loss": 8.7194, + "step": 27940 + }, + { + "epoch": 10.11, + "learning_rate": 3.4056354655734686e-07, + "loss": 8.7642, + "step": 27960 + }, + { + "epoch": 10.12, + "learning_rate": 3.402325089445346e-07, + "loss": 8.6615, + "step": 27980 + }, + { + "epoch": 10.13, + "learning_rate": 3.399012893228912e-07, + "loss": 8.7468, + "step": 28000 + }, + { + "epoch": 10.13, + "learning_rate": 3.3956988836052293e-07, + "loss": 8.671, + "step": 28020 + }, + { + "epoch": 10.14, + "learning_rate": 3.392383067259018e-07, + "loss": 8.6229, + "step": 28040 + }, + { + "epoch": 10.15, + "learning_rate": 3.389065450878641e-07, + "loss": 8.6014, + "step": 28060 + }, + { + "epoch": 10.16, + "learning_rate": 3.3857460411560943e-07, + "loss": 8.7231, + "step": 28080 + }, + { + "epoch": 10.16, + "learning_rate": 3.38242484478699e-07, + "loss": 8.5853, + "step": 28100 + }, + { + "epoch": 10.17, + "learning_rate": 3.379101868470543e-07, + "loss": 8.6662, + "step": 28120 + }, + { + "epoch": 10.18, + "learning_rate": 3.375777118909561e-07, + "loss": 8.6895, + "step": 28140 + }, + { + "epoch": 10.18, + "learning_rate": 3.372450602810426e-07, + "loss": 8.6535, + "step": 28160 + }, + { + "epoch": 10.19, + "learning_rate": 3.3691223268830846e-07, + "loss": 8.6333, + "step": 28180 + }, + { + "epoch": 10.2, + "learning_rate": 3.3657922978410335e-07, + "loss": 8.7247, + "step": 28200 + }, + { + "epoch": 10.21, + "learning_rate": 3.3624605224013054e-07, + "loss": 8.5743, + "step": 28220 + }, + { + "epoch": 10.21, + "learning_rate": 3.3591270072844547e-07, + "loss": 8.6759, + "step": 28240 + }, + { + "epoch": 10.22, + "learning_rate": 3.355791759214546e-07, + "loss": 8.6982, + "step": 28260 + }, + { + "epoch": 10.23, + "learning_rate": 3.3524547849191396e-07, + "loss": 8.7385, + "step": 28280 + }, + { + "epoch": 10.24, + "learning_rate": 3.3491160911292774e-07, + "loss": 8.6972, + "step": 28300 + }, + { + "epoch": 10.24, + "learning_rate": 3.3457756845794687e-07, + "loss": 8.6619, + "step": 28320 + }, + { + "epoch": 10.25, + "learning_rate": 3.342433572007679e-07, + "loss": 8.659, + "step": 28340 + }, + { + "epoch": 10.26, + "learning_rate": 3.3390897601553146e-07, + "loss": 8.6246, + "step": 28360 + }, + { + "epoch": 10.26, + "learning_rate": 3.3357442557672096e-07, + "loss": 8.6084, + "step": 28380 + }, + { + "epoch": 10.27, + "learning_rate": 3.3323970655916115e-07, + "loss": 8.5871, + "step": 28400 + }, + { + "epoch": 10.28, + "learning_rate": 3.3290481963801696e-07, + "loss": 8.6602, + "step": 28420 + }, + { + "epoch": 10.29, + "learning_rate": 3.325697654887918e-07, + "loss": 8.6066, + "step": 28440 + }, + { + "epoch": 10.29, + "learning_rate": 3.3223454478732647e-07, + "loss": 8.6817, + "step": 28460 + }, + { + "epoch": 10.3, + "learning_rate": 3.3189915820979785e-07, + "loss": 8.7466, + "step": 28480 + }, + { + "epoch": 10.31, + "learning_rate": 3.3156360643271736e-07, + "loss": 8.8099, + "step": 28500 + }, + { + "epoch": 10.31, + "learning_rate": 3.312278901329295e-07, + "loss": 8.6193, + "step": 28520 + }, + { + "epoch": 10.32, + "learning_rate": 3.3089200998761077e-07, + "loss": 8.7632, + "step": 28540 + }, + { + "epoch": 10.33, + "learning_rate": 3.305559666742682e-07, + "loss": 8.7607, + "step": 28560 + }, + { + "epoch": 10.34, + "learning_rate": 3.3021976087073767e-07, + "loss": 8.5753, + "step": 28580 + }, + { + "epoch": 10.34, + "learning_rate": 3.298833932551832e-07, + "loss": 8.7299, + "step": 28600 + }, + { + "epoch": 10.35, + "learning_rate": 3.295468645060951e-07, + "loss": 8.7158, + "step": 28620 + }, + { + "epoch": 10.36, + "learning_rate": 3.2921017530228845e-07, + "loss": 8.6602, + "step": 28640 + }, + { + "epoch": 10.37, + "learning_rate": 3.288733263229022e-07, + "loss": 8.6871, + "step": 28660 + }, + { + "epoch": 10.37, + "learning_rate": 3.2853631824739756e-07, + "loss": 8.751, + "step": 28680 + }, + { + "epoch": 10.38, + "learning_rate": 3.281991517555568e-07, + "loss": 8.7443, + "step": 28700 + }, + { + "epoch": 10.39, + "learning_rate": 3.278618275274814e-07, + "loss": 8.6701, + "step": 28720 + }, + { + "epoch": 10.39, + "learning_rate": 3.2752434624359127e-07, + "loss": 8.8152, + "step": 28740 + }, + { + "epoch": 10.4, + "learning_rate": 3.2718670858462296e-07, + "loss": 8.6241, + "step": 28760 + }, + { + "epoch": 10.41, + "learning_rate": 3.2684891523162854e-07, + "loss": 8.6202, + "step": 28780 + }, + { + "epoch": 10.42, + "learning_rate": 3.2651096686597423e-07, + "loss": 8.7507, + "step": 28800 + }, + { + "epoch": 10.42, + "learning_rate": 3.261728641693387e-07, + "loss": 8.6473, + "step": 28820 + }, + { + "epoch": 10.43, + "learning_rate": 3.2583460782371215e-07, + "loss": 8.582, + "step": 28840 + }, + { + "epoch": 10.44, + "learning_rate": 3.254961985113944e-07, + "loss": 8.77, + "step": 28860 + }, + { + "epoch": 10.44, + "learning_rate": 3.2515763691499425e-07, + "loss": 8.7138, + "step": 28880 + }, + { + "epoch": 10.45, + "learning_rate": 3.2481892371742725e-07, + "loss": 8.6842, + "step": 28900 + }, + { + "epoch": 10.46, + "learning_rate": 3.2448005960191507e-07, + "loss": 8.67, + "step": 28920 + }, + { + "epoch": 10.47, + "learning_rate": 3.241410452519835e-07, + "loss": 8.7029, + "step": 28940 + }, + { + "epoch": 10.47, + "learning_rate": 3.2380188135146173e-07, + "loss": 8.658, + "step": 28960 + }, + { + "epoch": 10.48, + "learning_rate": 3.234625685844802e-07, + "loss": 8.6853, + "step": 28980 + }, + { + "epoch": 10.49, + "learning_rate": 3.2312310763547005e-07, + "loss": 8.5897, + "step": 29000 + }, + { + "epoch": 10.5, + "learning_rate": 3.227834991891609e-07, + "loss": 8.7498, + "step": 29020 + }, + { + "epoch": 10.5, + "learning_rate": 3.224437439305803e-07, + "loss": 8.6559, + "step": 29040 + }, + { + "epoch": 10.51, + "learning_rate": 3.2210384254505164e-07, + "loss": 8.6665, + "step": 29060 + }, + { + "epoch": 10.52, + "learning_rate": 3.2176379571819314e-07, + "loss": 8.6583, + "step": 29080 + }, + { + "epoch": 10.52, + "learning_rate": 3.214236041359164e-07, + "loss": 8.6153, + "step": 29100 + }, + { + "epoch": 10.53, + "learning_rate": 3.2108326848442503e-07, + "loss": 8.6621, + "step": 29120 + }, + { + "epoch": 10.54, + "learning_rate": 3.2074278945021326e-07, + "loss": 8.6406, + "step": 29140 + }, + { + "epoch": 10.55, + "learning_rate": 3.2040216772006457e-07, + "loss": 8.7462, + "step": 29160 + }, + { + "epoch": 10.55, + "learning_rate": 3.200614039810501e-07, + "loss": 8.6706, + "step": 29180 + }, + { + "epoch": 10.56, + "learning_rate": 3.197204989205276e-07, + "loss": 8.6145, + "step": 29200 + }, + { + "epoch": 10.57, + "learning_rate": 3.1937945322613976e-07, + "loss": 8.6873, + "step": 29220 + }, + { + "epoch": 10.58, + "learning_rate": 3.190382675858131e-07, + "loss": 8.6631, + "step": 29240 + }, + { + "epoch": 10.58, + "learning_rate": 3.186969426877563e-07, + "loss": 8.8381, + "step": 29260 + }, + { + "epoch": 10.59, + "learning_rate": 3.18355479220459e-07, + "loss": 8.6286, + "step": 29280 + }, + { + "epoch": 10.6, + "learning_rate": 3.1801387787269043e-07, + "loss": 8.6504, + "step": 29300 + }, + { + "epoch": 10.6, + "learning_rate": 3.1767213933349756e-07, + "loss": 8.6191, + "step": 29320 + }, + { + "epoch": 10.61, + "learning_rate": 3.173302642922046e-07, + "loss": 8.7694, + "step": 29340 + }, + { + "epoch": 10.62, + "learning_rate": 3.1698825343841086e-07, + "loss": 8.7551, + "step": 29360 + }, + { + "epoch": 10.63, + "learning_rate": 3.166461074619895e-07, + "loss": 8.6852, + "step": 29380 + }, + { + "epoch": 10.63, + "learning_rate": 3.1630382705308637e-07, + "loss": 8.582, + "step": 29400 + }, + { + "epoch": 10.64, + "learning_rate": 3.1596141290211854e-07, + "loss": 8.7058, + "step": 29420 + }, + { + "epoch": 10.65, + "learning_rate": 3.156188656997727e-07, + "loss": 8.7603, + "step": 29440 + }, + { + "epoch": 10.65, + "learning_rate": 3.1527618613700396e-07, + "loss": 8.591, + "step": 29460 + }, + { + "epoch": 10.66, + "learning_rate": 3.1493337490503457e-07, + "loss": 8.7316, + "step": 29480 + }, + { + "epoch": 10.67, + "learning_rate": 3.145904326953521e-07, + "loss": 8.6253, + "step": 29500 + }, + { + "epoch": 10.68, + "learning_rate": 3.142473601997086e-07, + "loss": 8.5578, + "step": 29520 + }, + { + "epoch": 10.68, + "learning_rate": 3.1390415811011864e-07, + "loss": 8.6595, + "step": 29540 + }, + { + "epoch": 10.69, + "learning_rate": 3.1356082711885846e-07, + "loss": 8.6187, + "step": 29560 + }, + { + "epoch": 10.7, + "learning_rate": 3.1321736791846416e-07, + "loss": 8.7339, + "step": 29580 + }, + { + "epoch": 10.71, + "learning_rate": 3.1287378120173045e-07, + "loss": 8.7281, + "step": 29600 + }, + { + "epoch": 10.71, + "learning_rate": 3.125300676617093e-07, + "loss": 8.6875, + "step": 29620 + }, + { + "epoch": 10.72, + "learning_rate": 3.121862279917084e-07, + "loss": 8.5509, + "step": 29640 + }, + { + "epoch": 10.73, + "learning_rate": 3.118422628852901e-07, + "loss": 8.6831, + "step": 29660 + }, + { + "epoch": 10.73, + "learning_rate": 3.1149817303626947e-07, + "loss": 8.6986, + "step": 29680 + }, + { + "epoch": 10.74, + "learning_rate": 3.111539591387135e-07, + "loss": 8.6567, + "step": 29700 + }, + { + "epoch": 10.75, + "learning_rate": 3.1080962188693907e-07, + "loss": 8.7286, + "step": 29720 + }, + { + "epoch": 10.76, + "learning_rate": 3.1046516197551204e-07, + "loss": 8.6032, + "step": 29740 + }, + { + "epoch": 10.76, + "learning_rate": 3.101205800992458e-07, + "loss": 8.6943, + "step": 29760 + }, + { + "epoch": 10.77, + "learning_rate": 3.097758769531996e-07, + "loss": 8.7576, + "step": 29780 + }, + { + "epoch": 10.78, + "learning_rate": 3.0943105323267746e-07, + "loss": 8.6276, + "step": 29800 + }, + { + "epoch": 10.78, + "learning_rate": 3.0908610963322626e-07, + "loss": 8.6576, + "step": 29820 + }, + { + "epoch": 10.79, + "learning_rate": 3.0874104685063515e-07, + "loss": 8.6753, + "step": 29840 + }, + { + "epoch": 10.8, + "learning_rate": 3.0839586558093333e-07, + "loss": 8.6131, + "step": 29860 + }, + { + "epoch": 10.81, + "learning_rate": 3.080505665203893e-07, + "loss": 8.667, + "step": 29880 + }, + { + "epoch": 10.81, + "learning_rate": 3.077051503655089e-07, + "loss": 8.6642, + "step": 29900 + }, + { + "epoch": 10.82, + "learning_rate": 3.073596178130342e-07, + "loss": 8.6796, + "step": 29920 + }, + { + "epoch": 10.83, + "learning_rate": 3.070139695599423e-07, + "loss": 8.7781, + "step": 29940 + }, + { + "epoch": 10.84, + "learning_rate": 3.066682063034433e-07, + "loss": 8.6049, + "step": 29960 + }, + { + "epoch": 10.84, + "learning_rate": 3.063223287409797e-07, + "loss": 8.5715, + "step": 29980 + }, + { + "epoch": 10.85, + "learning_rate": 3.059763375702241e-07, + "loss": 8.7563, + "step": 30000 + }, + { + "epoch": 10.86, + "learning_rate": 3.056302334890786e-07, + "loss": 8.6739, + "step": 30020 + }, + { + "epoch": 10.86, + "learning_rate": 3.05284017195673e-07, + "loss": 8.6724, + "step": 30040 + }, + { + "epoch": 10.87, + "learning_rate": 3.049376893883633e-07, + "loss": 8.6921, + "step": 30060 + }, + { + "epoch": 10.88, + "learning_rate": 3.0459125076573063e-07, + "loss": 8.9008, + "step": 30080 + }, + { + "epoch": 10.89, + "learning_rate": 3.0424470202657946e-07, + "loss": 8.7367, + "step": 30100 + }, + { + "epoch": 10.89, + "learning_rate": 3.038980438699366e-07, + "loss": 8.5934, + "step": 30120 + }, + { + "epoch": 10.9, + "learning_rate": 3.035512769950493e-07, + "loss": 8.5653, + "step": 30140 + }, + { + "epoch": 10.91, + "learning_rate": 3.0320440210138433e-07, + "loss": 8.6243, + "step": 30160 + }, + { + "epoch": 10.92, + "learning_rate": 3.028574198886262e-07, + "loss": 8.6315, + "step": 30180 + }, + { + "epoch": 10.92, + "learning_rate": 3.0251033105667594e-07, + "loss": 8.651, + "step": 30200 + }, + { + "epoch": 10.93, + "learning_rate": 3.021631363056497e-07, + "loss": 8.5832, + "step": 30220 + }, + { + "epoch": 10.94, + "learning_rate": 3.018158363358773e-07, + "loss": 8.6438, + "step": 30240 + }, + { + "epoch": 10.94, + "learning_rate": 3.0146843184790056e-07, + "loss": 8.7393, + "step": 30260 + }, + { + "epoch": 10.95, + "learning_rate": 3.0112092354247235e-07, + "loss": 8.7362, + "step": 30280 + }, + { + "epoch": 10.96, + "learning_rate": 3.00773312120555e-07, + "loss": 8.7914, + "step": 30300 + }, + { + "epoch": 10.97, + "learning_rate": 3.004255982833186e-07, + "loss": 8.6087, + "step": 30320 + }, + { + "epoch": 10.97, + "learning_rate": 3.0007778273214015e-07, + "loss": 8.6884, + "step": 30340 + }, + { + "epoch": 10.98, + "learning_rate": 2.997298661686014e-07, + "loss": 8.6022, + "step": 30360 + }, + { + "epoch": 10.99, + "learning_rate": 2.9938184929448816e-07, + "loss": 8.6424, + "step": 30380 + }, + { + "epoch": 10.99, + "learning_rate": 2.990337328117886e-07, + "loss": 8.6903, + "step": 30400 + }, + { + "epoch": 11.0, + "learning_rate": 2.986855174226915e-07, + "loss": 8.6494, + "step": 30420 + }, + { + "epoch": 11.01, + "learning_rate": 2.983372038295855e-07, + "loss": 8.8256, + "step": 30440 + }, + { + "epoch": 11.02, + "learning_rate": 2.979887927350573e-07, + "loss": 8.7223, + "step": 30460 + }, + { + "epoch": 11.02, + "learning_rate": 2.9764028484188985e-07, + "loss": 8.7295, + "step": 30480 + }, + { + "epoch": 11.03, + "learning_rate": 2.972916808530619e-07, + "loss": 8.7298, + "step": 30500 + }, + { + "epoch": 11.04, + "learning_rate": 2.969429814717456e-07, + "loss": 8.8727, + "step": 30520 + }, + { + "epoch": 11.05, + "learning_rate": 2.9659418740130587e-07, + "loss": 8.6341, + "step": 30540 + }, + { + "epoch": 11.05, + "learning_rate": 2.9624529934529845e-07, + "loss": 8.7136, + "step": 30560 + }, + { + "epoch": 11.06, + "learning_rate": 2.9589631800746864e-07, + "loss": 8.712, + "step": 30580 + }, + { + "epoch": 11.07, + "learning_rate": 2.955472440917498e-07, + "loss": 8.6917, + "step": 30600 + }, + { + "epoch": 11.07, + "learning_rate": 2.9519807830226234e-07, + "loss": 8.655, + "step": 30620 + }, + { + "epoch": 11.08, + "learning_rate": 2.948488213433118e-07, + "loss": 8.6994, + "step": 30640 + }, + { + "epoch": 11.09, + "learning_rate": 2.944994739193876e-07, + "loss": 8.6317, + "step": 30660 + }, + { + "epoch": 11.1, + "learning_rate": 2.9415003673516165e-07, + "loss": 8.5908, + "step": 30680 + }, + { + "epoch": 11.1, + "learning_rate": 2.9380051049548695e-07, + "loss": 8.6299, + "step": 30700 + }, + { + "epoch": 11.11, + "learning_rate": 2.9345089590539605e-07, + "loss": 8.715, + "step": 30720 + }, + { + "epoch": 11.12, + "learning_rate": 2.9310119367009987e-07, + "loss": 8.7997, + "step": 30740 + }, + { + "epoch": 11.12, + "learning_rate": 2.927514044949861e-07, + "loss": 8.6665, + "step": 30760 + }, + { + "epoch": 11.13, + "learning_rate": 2.9240152908561765e-07, + "loss": 8.646, + "step": 30780 + }, + { + "epoch": 11.14, + "learning_rate": 2.9205156814773143e-07, + "loss": 8.6162, + "step": 30800 + }, + { + "epoch": 11.15, + "learning_rate": 2.917015223872369e-07, + "loss": 8.6797, + "step": 30820 + }, + { + "epoch": 11.15, + "learning_rate": 2.913513925102146e-07, + "loss": 8.7125, + "step": 30840 + }, + { + "epoch": 11.16, + "learning_rate": 2.9100117922291476e-07, + "loss": 8.7651, + "step": 30860 + }, + { + "epoch": 11.17, + "learning_rate": 2.9065088323175594e-07, + "loss": 8.7213, + "step": 30880 + }, + { + "epoch": 11.18, + "learning_rate": 2.903005052433234e-07, + "loss": 8.7114, + "step": 30900 + }, + { + "epoch": 11.18, + "learning_rate": 2.8995004596436774e-07, + "loss": 8.7222, + "step": 30920 + }, + { + "epoch": 11.19, + "learning_rate": 2.8959950610180373e-07, + "loss": 8.7304, + "step": 30940 + }, + { + "epoch": 11.2, + "learning_rate": 2.892488863627085e-07, + "loss": 8.6837, + "step": 30960 + }, + { + "epoch": 11.2, + "learning_rate": 2.888981874543205e-07, + "loss": 8.6943, + "step": 30980 + }, + { + "epoch": 11.21, + "learning_rate": 2.8854741008403753e-07, + "loss": 8.5617, + "step": 31000 + }, + { + "epoch": 11.22, + "learning_rate": 2.881965549594161e-07, + "loss": 8.8403, + "step": 31020 + }, + { + "epoch": 11.23, + "learning_rate": 2.878456227881692e-07, + "loss": 8.7576, + "step": 31040 + }, + { + "epoch": 11.23, + "learning_rate": 2.8749461427816546e-07, + "loss": 8.6772, + "step": 31060 + }, + { + "epoch": 11.24, + "learning_rate": 2.871435301374273e-07, + "loss": 8.6206, + "step": 31080 + }, + { + "epoch": 11.25, + "learning_rate": 2.8679237107413e-07, + "loss": 8.6233, + "step": 31100 + }, + { + "epoch": 11.25, + "learning_rate": 2.864411377965995e-07, + "loss": 8.6301, + "step": 31120 + }, + { + "epoch": 11.26, + "learning_rate": 2.860898310133119e-07, + "loss": 8.7266, + "step": 31140 + }, + { + "epoch": 11.27, + "learning_rate": 2.8573845143289123e-07, + "loss": 8.7034, + "step": 31160 + }, + { + "epoch": 11.28, + "learning_rate": 2.853869997641086e-07, + "loss": 8.6918, + "step": 31180 + }, + { + "epoch": 11.28, + "learning_rate": 2.850354767158804e-07, + "loss": 8.6865, + "step": 31200 + }, + { + "epoch": 11.29, + "learning_rate": 2.846838829972671e-07, + "loss": 8.6556, + "step": 31220 + }, + { + "epoch": 11.3, + "learning_rate": 2.843322193174715e-07, + "loss": 8.8643, + "step": 31240 + }, + { + "epoch": 11.31, + "learning_rate": 2.8398048638583774e-07, + "loss": 8.623, + "step": 31260 + }, + { + "epoch": 11.31, + "learning_rate": 2.8362868491184965e-07, + "loss": 8.6568, + "step": 31280 + }, + { + "epoch": 11.32, + "learning_rate": 2.8327681560512925e-07, + "loss": 8.6121, + "step": 31300 + }, + { + "epoch": 11.33, + "learning_rate": 2.829248791754353e-07, + "loss": 8.66, + "step": 31320 + }, + { + "epoch": 11.33, + "learning_rate": 2.8257287633266205e-07, + "loss": 8.6959, + "step": 31340 + }, + { + "epoch": 11.34, + "learning_rate": 2.8222080778683766e-07, + "loss": 8.9021, + "step": 31360 + }, + { + "epoch": 11.35, + "learning_rate": 2.81868674248123e-07, + "loss": 8.6877, + "step": 31380 + }, + { + "epoch": 11.36, + "learning_rate": 2.8151647642680976e-07, + "loss": 8.7424, + "step": 31400 + }, + { + "epoch": 11.36, + "learning_rate": 2.811642150333196e-07, + "loss": 8.8318, + "step": 31420 + }, + { + "epoch": 11.37, + "learning_rate": 2.8081189077820206e-07, + "loss": 8.6563, + "step": 31440 + }, + { + "epoch": 11.38, + "learning_rate": 2.804595043721337e-07, + "loss": 8.5683, + "step": 31460 + }, + { + "epoch": 11.39, + "learning_rate": 2.801070565259165e-07, + "loss": 8.6405, + "step": 31480 + }, + { + "epoch": 11.39, + "learning_rate": 2.797545479504762e-07, + "loss": 8.602, + "step": 31500 + }, + { + "epoch": 11.4, + "learning_rate": 2.7940197935686123e-07, + "loss": 8.6209, + "step": 31520 + }, + { + "epoch": 11.41, + "learning_rate": 2.790493514562408e-07, + "loss": 8.7519, + "step": 31540 + }, + { + "epoch": 11.41, + "learning_rate": 2.78696664959904e-07, + "loss": 8.639, + "step": 31560 + }, + { + "epoch": 11.42, + "learning_rate": 2.783439205792581e-07, + "loss": 8.7752, + "step": 31580 + }, + { + "epoch": 11.43, + "learning_rate": 2.7799111902582693e-07, + "loss": 8.5395, + "step": 31600 + }, + { + "epoch": 11.44, + "learning_rate": 2.7763826101124996e-07, + "loss": 8.6828, + "step": 31620 + }, + { + "epoch": 11.44, + "learning_rate": 2.7728534724728023e-07, + "loss": 8.6239, + "step": 31640 + }, + { + "epoch": 11.45, + "learning_rate": 2.7693237844578336e-07, + "loss": 8.6411, + "step": 31660 + }, + { + "epoch": 11.46, + "learning_rate": 2.7657935531873606e-07, + "loss": 8.61, + "step": 31680 + }, + { + "epoch": 11.46, + "learning_rate": 2.762262785782245e-07, + "loss": 8.6629, + "step": 31700 + }, + { + "epoch": 11.47, + "learning_rate": 2.758731489364431e-07, + "loss": 8.7071, + "step": 31720 + }, + { + "epoch": 11.48, + "learning_rate": 2.7551996710569294e-07, + "loss": 8.7268, + "step": 31740 + }, + { + "epoch": 11.49, + "learning_rate": 2.751667337983803e-07, + "loss": 8.6602, + "step": 31760 + }, + { + "epoch": 11.49, + "learning_rate": 2.7481344972701545e-07, + "loss": 8.8078, + "step": 31780 + }, + { + "epoch": 11.5, + "learning_rate": 2.7446011560421087e-07, + "loss": 8.7245, + "step": 31800 + }, + { + "epoch": 11.51, + "learning_rate": 2.7410673214268017e-07, + "loss": 8.7388, + "step": 31820 + }, + { + "epoch": 11.52, + "learning_rate": 2.737533000552363e-07, + "loss": 8.8027, + "step": 31840 + }, + { + "epoch": 11.52, + "learning_rate": 2.733998200547906e-07, + "loss": 8.6749, + "step": 31860 + }, + { + "epoch": 11.53, + "learning_rate": 2.730462928543507e-07, + "loss": 8.6042, + "step": 31880 + }, + { + "epoch": 11.54, + "learning_rate": 2.726927191670197e-07, + "loss": 8.6362, + "step": 31900 + }, + { + "epoch": 11.54, + "learning_rate": 2.7233909970599426e-07, + "loss": 8.7159, + "step": 31920 + }, + { + "epoch": 11.55, + "learning_rate": 2.7198543518456356e-07, + "loss": 8.8243, + "step": 31940 + }, + { + "epoch": 11.56, + "learning_rate": 2.716317263161076e-07, + "loss": 8.7407, + "step": 31960 + }, + { + "epoch": 11.57, + "learning_rate": 2.712779738140957e-07, + "loss": 8.7114, + "step": 31980 + }, + { + "epoch": 11.57, + "learning_rate": 2.7092417839208537e-07, + "loss": 8.6751, + "step": 32000 + }, + { + "epoch": 11.58, + "learning_rate": 2.7057034076372073e-07, + "loss": 8.6862, + "step": 32020 + }, + { + "epoch": 11.59, + "learning_rate": 2.7021646164273084e-07, + "loss": 8.6095, + "step": 32040 + }, + { + "epoch": 11.59, + "learning_rate": 2.698625417429286e-07, + "loss": 8.6848, + "step": 32060 + }, + { + "epoch": 11.6, + "learning_rate": 2.695085817782091e-07, + "loss": 8.7513, + "step": 32080 + }, + { + "epoch": 11.61, + "learning_rate": 2.691545824625483e-07, + "loss": 8.6242, + "step": 32100 + }, + { + "epoch": 11.62, + "learning_rate": 2.6880054451000144e-07, + "loss": 8.7933, + "step": 32120 + }, + { + "epoch": 11.62, + "learning_rate": 2.6844646863470185e-07, + "loss": 8.6506, + "step": 32140 + }, + { + "epoch": 11.63, + "learning_rate": 2.6809235555085923e-07, + "loss": 8.6979, + "step": 32160 + }, + { + "epoch": 11.64, + "learning_rate": 2.677382059727583e-07, + "loss": 8.6129, + "step": 32180 + }, + { + "epoch": 11.65, + "learning_rate": 2.673840206147576e-07, + "loss": 8.6981, + "step": 32200 + }, + { + "epoch": 11.65, + "learning_rate": 2.670298001912875e-07, + "loss": 8.6077, + "step": 32220 + }, + { + "epoch": 11.66, + "learning_rate": 2.666755454168495e-07, + "loss": 8.6092, + "step": 32240 + }, + { + "epoch": 11.67, + "learning_rate": 2.663212570060141e-07, + "loss": 8.651, + "step": 32260 + }, + { + "epoch": 11.67, + "learning_rate": 2.659669356734198e-07, + "loss": 8.7201, + "step": 32280 + }, + { + "epoch": 11.68, + "learning_rate": 2.6561258213377133e-07, + "loss": 8.6061, + "step": 32300 + }, + { + "epoch": 11.69, + "learning_rate": 2.6525819710183867e-07, + "loss": 8.7201, + "step": 32320 + }, + { + "epoch": 11.7, + "learning_rate": 2.6490378129245496e-07, + "loss": 8.6382, + "step": 32340 + }, + { + "epoch": 11.7, + "learning_rate": 2.645493354205158e-07, + "loss": 8.7026, + "step": 32360 + }, + { + "epoch": 11.71, + "learning_rate": 2.6419486020097713e-07, + "loss": 8.6788, + "step": 32380 + }, + { + "epoch": 11.72, + "learning_rate": 2.638403563488542e-07, + "loss": 8.6216, + "step": 32400 + }, + { + "epoch": 11.73, + "learning_rate": 2.6348582457922006e-07, + "loss": 8.7143, + "step": 32420 + }, + { + "epoch": 11.73, + "learning_rate": 2.6313126560720413e-07, + "loss": 8.6853, + "step": 32440 + }, + { + "epoch": 11.74, + "learning_rate": 2.627766801479904e-07, + "loss": 8.676, + "step": 32460 + }, + { + "epoch": 11.75, + "learning_rate": 2.6242206891681663e-07, + "loss": 8.7085, + "step": 32480 + }, + { + "epoch": 11.75, + "learning_rate": 2.620674326289725e-07, + "loss": 8.7902, + "step": 32500 + }, + { + "epoch": 11.76, + "learning_rate": 2.6171277199979785e-07, + "loss": 8.6366, + "step": 32520 + }, + { + "epoch": 11.77, + "learning_rate": 2.613580877446822e-07, + "loss": 8.5595, + "step": 32540 + }, + { + "epoch": 11.78, + "learning_rate": 2.6100338057906243e-07, + "loss": 8.6858, + "step": 32560 + }, + { + "epoch": 11.78, + "learning_rate": 2.606486512184215e-07, + "loss": 8.6326, + "step": 32580 + }, + { + "epoch": 11.79, + "learning_rate": 2.602939003782875e-07, + "loss": 8.6178, + "step": 32600 + }, + { + "epoch": 11.8, + "learning_rate": 2.5993912877423147e-07, + "loss": 8.6427, + "step": 32620 + }, + { + "epoch": 11.8, + "learning_rate": 2.5958433712186656e-07, + "loss": 8.7161, + "step": 32640 + }, + { + "epoch": 11.81, + "learning_rate": 2.5922952613684627e-07, + "loss": 8.6176, + "step": 32660 + }, + { + "epoch": 11.82, + "learning_rate": 2.5887469653486327e-07, + "loss": 8.5744, + "step": 32680 + }, + { + "epoch": 11.83, + "learning_rate": 2.585198490316475e-07, + "loss": 8.6941, + "step": 32700 + }, + { + "epoch": 11.83, + "learning_rate": 2.5816498434296513e-07, + "loss": 8.7242, + "step": 32720 + }, + { + "epoch": 11.84, + "learning_rate": 2.5781010318461714e-07, + "loss": 8.758, + "step": 32740 + }, + { + "epoch": 11.85, + "learning_rate": 2.5745520627243756e-07, + "loss": 8.7095, + "step": 32760 + }, + { + "epoch": 11.86, + "learning_rate": 2.571002943222922e-07, + "loss": 8.6995, + "step": 32780 + }, + { + "epoch": 11.86, + "learning_rate": 2.567453680500774e-07, + "loss": 8.6771, + "step": 32800 + }, + { + "epoch": 11.87, + "learning_rate": 2.5639042817171804e-07, + "loss": 8.6903, + "step": 32820 + }, + { + "epoch": 11.88, + "learning_rate": 2.560354754031667e-07, + "loss": 8.7104, + "step": 32840 + }, + { + "epoch": 11.88, + "learning_rate": 2.5568051046040197e-07, + "loss": 8.7613, + "step": 32860 + }, + { + "epoch": 11.89, + "learning_rate": 2.553255340594268e-07, + "loss": 8.6661, + "step": 32880 + }, + { + "epoch": 11.9, + "learning_rate": 2.549705469162675e-07, + "loss": 8.581, + "step": 32900 + }, + { + "epoch": 11.91, + "learning_rate": 2.54615549746972e-07, + "loss": 8.6058, + "step": 32920 + }, + { + "epoch": 11.91, + "learning_rate": 2.5426054326760816e-07, + "loss": 8.6671, + "step": 32940 + }, + { + "epoch": 11.92, + "learning_rate": 2.53905528194263e-07, + "loss": 8.7079, + "step": 32960 + }, + { + "epoch": 11.93, + "learning_rate": 2.5355050524304067e-07, + "loss": 8.7816, + "step": 32980 + }, + { + "epoch": 11.93, + "learning_rate": 2.5319547513006124e-07, + "loss": 8.7454, + "step": 33000 + }, + { + "epoch": 11.94, + "learning_rate": 2.528404385714594e-07, + "loss": 8.692, + "step": 33020 + }, + { + "epoch": 11.95, + "learning_rate": 2.524853962833824e-07, + "loss": 8.6828, + "step": 33040 + }, + { + "epoch": 11.96, + "learning_rate": 2.521303489819896e-07, + "loss": 8.6124, + "step": 33060 + }, + { + "epoch": 11.96, + "learning_rate": 2.5177529738345005e-07, + "loss": 8.6774, + "step": 33080 + }, + { + "epoch": 11.97, + "learning_rate": 2.514202422039417e-07, + "loss": 8.6826, + "step": 33100 + }, + { + "epoch": 11.98, + "learning_rate": 2.510651841596496e-07, + "loss": 8.6866, + "step": 33120 + }, + { + "epoch": 11.99, + "learning_rate": 2.5071012396676473e-07, + "loss": 8.5997, + "step": 33140 + }, + { + "epoch": 11.99, + "learning_rate": 2.5035506234148213e-07, + "loss": 8.6878, + "step": 33160 + }, + { + "epoch": 12.0, + "learning_rate": 2.5e-07, + "loss": 8.6028, + "step": 33180 + }, + { + "epoch": 12.01, + "learning_rate": 2.4964493765851795e-07, + "loss": 8.5968, + "step": 33200 + }, + { + "epoch": 12.01, + "learning_rate": 2.492898760332353e-07, + "loss": 8.7196, + "step": 33220 + }, + { + "epoch": 12.02, + "learning_rate": 2.4893481584035043e-07, + "loss": 8.5782, + "step": 33240 + }, + { + "epoch": 12.03, + "learning_rate": 2.485797577960583e-07, + "loss": 8.6388, + "step": 33260 + }, + { + "epoch": 12.04, + "learning_rate": 2.4822470261655e-07, + "loss": 8.6747, + "step": 33280 + }, + { + "epoch": 12.04, + "learning_rate": 2.478696510180105e-07, + "loss": 8.678, + "step": 33300 + }, + { + "epoch": 12.05, + "learning_rate": 2.475146037166176e-07, + "loss": 8.7884, + "step": 33320 + }, + { + "epoch": 12.06, + "learning_rate": 2.471595614285407e-07, + "loss": 8.6724, + "step": 33340 + }, + { + "epoch": 12.07, + "learning_rate": 2.4680452486993874e-07, + "loss": 8.5924, + "step": 33360 + }, + { + "epoch": 12.07, + "learning_rate": 2.4644949475695936e-07, + "loss": 8.6632, + "step": 33380 + }, + { + "epoch": 12.08, + "learning_rate": 2.460944718057371e-07, + "loss": 8.7362, + "step": 33400 + }, + { + "epoch": 12.09, + "learning_rate": 2.4573945673239187e-07, + "loss": 8.7602, + "step": 33420 + }, + { + "epoch": 12.09, + "learning_rate": 2.453844502530281e-07, + "loss": 8.6372, + "step": 33440 + }, + { + "epoch": 12.1, + "learning_rate": 2.4502945308373243e-07, + "loss": 8.6209, + "step": 33460 + }, + { + "epoch": 12.11, + "learning_rate": 2.446744659405732e-07, + "loss": 8.6308, + "step": 33480 + }, + { + "epoch": 12.12, + "learning_rate": 2.44319489539598e-07, + "loss": 8.6478, + "step": 33500 + }, + { + "epoch": 12.12, + "learning_rate": 2.439645245968333e-07, + "loss": 8.6312, + "step": 33520 + }, + { + "epoch": 12.13, + "learning_rate": 2.43609571828282e-07, + "loss": 8.6993, + "step": 33540 + }, + { + "epoch": 12.14, + "learning_rate": 2.432546319499226e-07, + "loss": 8.7386, + "step": 33560 + }, + { + "epoch": 12.14, + "learning_rate": 2.4289970567770775e-07, + "loss": 8.6758, + "step": 33580 + }, + { + "epoch": 12.15, + "learning_rate": 2.4254479372756236e-07, + "loss": 8.7626, + "step": 33600 + }, + { + "epoch": 12.16, + "learning_rate": 2.4218989681538284e-07, + "loss": 8.7846, + "step": 33620 + }, + { + "epoch": 12.17, + "learning_rate": 2.4183501565703485e-07, + "loss": 8.8279, + "step": 33640 + }, + { + "epoch": 12.17, + "learning_rate": 2.4148015096835255e-07, + "loss": 8.759, + "step": 33660 + }, + { + "epoch": 12.18, + "learning_rate": 2.4112530346513676e-07, + "loss": 8.6495, + "step": 33680 + }, + { + "epoch": 12.19, + "learning_rate": 2.407704738631537e-07, + "loss": 8.6556, + "step": 33700 + }, + { + "epoch": 12.2, + "learning_rate": 2.404156628781335e-07, + "loss": 8.6345, + "step": 33720 + }, + { + "epoch": 12.2, + "learning_rate": 2.400608712257686e-07, + "loss": 8.6331, + "step": 33740 + }, + { + "epoch": 12.21, + "learning_rate": 2.3970609962171255e-07, + "loss": 8.617, + "step": 33760 + }, + { + "epoch": 12.22, + "learning_rate": 2.3935134878157853e-07, + "loss": 8.7142, + "step": 33780 + }, + { + "epoch": 12.22, + "learning_rate": 2.3899661942093755e-07, + "loss": 8.6639, + "step": 33800 + }, + { + "epoch": 12.23, + "learning_rate": 2.386419122553178e-07, + "loss": 8.6693, + "step": 33820 + }, + { + "epoch": 12.24, + "learning_rate": 2.382872280002022e-07, + "loss": 8.7309, + "step": 33840 + }, + { + "epoch": 12.25, + "learning_rate": 2.3793256737102757e-07, + "loss": 8.7177, + "step": 33860 + }, + { + "epoch": 12.25, + "learning_rate": 2.3757793108318337e-07, + "loss": 8.6647, + "step": 33880 + }, + { + "epoch": 12.26, + "learning_rate": 2.3722331985200956e-07, + "loss": 8.725, + "step": 33900 + }, + { + "epoch": 12.27, + "learning_rate": 2.368687343927959e-07, + "loss": 8.5963, + "step": 33920 + }, + { + "epoch": 12.27, + "learning_rate": 2.3651417542077994e-07, + "loss": 8.694, + "step": 33940 + }, + { + "epoch": 12.28, + "learning_rate": 2.361596436511458e-07, + "loss": 8.7634, + "step": 33960 + }, + { + "epoch": 12.29, + "learning_rate": 2.3580513979902295e-07, + "loss": 8.9021, + "step": 33980 + }, + { + "epoch": 12.3, + "learning_rate": 2.354506645794842e-07, + "loss": 8.6636, + "step": 34000 + }, + { + "epoch": 12.3, + "learning_rate": 2.3509621870754504e-07, + "loss": 8.6979, + "step": 34020 + }, + { + "epoch": 12.31, + "learning_rate": 2.347418028981614e-07, + "loss": 8.6839, + "step": 34040 + }, + { + "epoch": 12.32, + "learning_rate": 2.3438741786622862e-07, + "loss": 8.6451, + "step": 34060 + }, + { + "epoch": 12.33, + "learning_rate": 2.3403306432658023e-07, + "loss": 8.6216, + "step": 34080 + }, + { + "epoch": 12.33, + "learning_rate": 2.3367874299398583e-07, + "loss": 8.7277, + "step": 34100 + }, + { + "epoch": 12.34, + "learning_rate": 2.3332445458315048e-07, + "loss": 8.6828, + "step": 34120 + }, + { + "epoch": 12.35, + "learning_rate": 2.3297019980871242e-07, + "loss": 8.7317, + "step": 34140 + }, + { + "epoch": 12.35, + "learning_rate": 2.3261597938524244e-07, + "loss": 8.7596, + "step": 34160 + }, + { + "epoch": 12.36, + "learning_rate": 2.3226179402724173e-07, + "loss": 8.6388, + "step": 34180 + }, + { + "epoch": 12.37, + "learning_rate": 2.3190764444914078e-07, + "loss": 8.6952, + "step": 34200 + }, + { + "epoch": 12.38, + "learning_rate": 2.3155353136529818e-07, + "loss": 8.5665, + "step": 34220 + }, + { + "epoch": 12.38, + "learning_rate": 2.311994554899985e-07, + "loss": 8.669, + "step": 34240 + }, + { + "epoch": 12.39, + "learning_rate": 2.3084541753745173e-07, + "loss": 8.6576, + "step": 34260 + }, + { + "epoch": 12.4, + "learning_rate": 2.3049141822179097e-07, + "loss": 8.6854, + "step": 34280 + }, + { + "epoch": 12.41, + "learning_rate": 2.301374582570714e-07, + "loss": 8.6454, + "step": 34300 + }, + { + "epoch": 12.41, + "learning_rate": 2.2978353835726919e-07, + "loss": 8.7634, + "step": 34320 + }, + { + "epoch": 12.42, + "learning_rate": 2.2942965923627925e-07, + "loss": 8.6704, + "step": 34340 + }, + { + "epoch": 12.43, + "learning_rate": 2.290758216079146e-07, + "loss": 8.7067, + "step": 34360 + }, + { + "epoch": 12.43, + "learning_rate": 2.2872202618590437e-07, + "loss": 8.6651, + "step": 34380 + }, + { + "epoch": 12.44, + "learning_rate": 2.2836827368389245e-07, + "loss": 8.7543, + "step": 34400 + }, + { + "epoch": 12.45, + "learning_rate": 2.2801456481543645e-07, + "loss": 8.6127, + "step": 34420 + }, + { + "epoch": 12.46, + "learning_rate": 2.276609002940057e-07, + "loss": 8.6231, + "step": 34440 + }, + { + "epoch": 12.46, + "learning_rate": 2.2730728083298032e-07, + "loss": 8.7681, + "step": 34460 + }, + { + "epoch": 12.47, + "learning_rate": 2.2695370714564925e-07, + "loss": 8.636, + "step": 34480 + }, + { + "epoch": 12.48, + "learning_rate": 2.2660017994520938e-07, + "loss": 8.6136, + "step": 34500 + }, + { + "epoch": 12.48, + "learning_rate": 2.2624669994476368e-07, + "loss": 8.7001, + "step": 34520 + }, + { + "epoch": 12.49, + "learning_rate": 2.2589326785731986e-07, + "loss": 8.6366, + "step": 34540 + }, + { + "epoch": 12.5, + "learning_rate": 2.2553988439578914e-07, + "loss": 8.6689, + "step": 34560 + }, + { + "epoch": 12.51, + "learning_rate": 2.2518655027298464e-07, + "loss": 8.5895, + "step": 34580 + }, + { + "epoch": 12.51, + "learning_rate": 2.2483326620161975e-07, + "loss": 8.5908, + "step": 34600 + }, + { + "epoch": 12.52, + "learning_rate": 2.2448003289430712e-07, + "loss": 8.6957, + "step": 34620 + }, + { + "epoch": 12.53, + "learning_rate": 2.2412685106355693e-07, + "loss": 8.6366, + "step": 34640 + }, + { + "epoch": 12.54, + "learning_rate": 2.237737214217755e-07, + "loss": 8.7087, + "step": 34660 + }, + { + "epoch": 12.54, + "learning_rate": 2.2342064468126395e-07, + "loss": 8.6485, + "step": 34680 + }, + { + "epoch": 12.55, + "learning_rate": 2.2306762155421662e-07, + "loss": 8.6545, + "step": 34700 + }, + { + "epoch": 12.56, + "learning_rate": 2.2271465275271983e-07, + "loss": 8.7019, + "step": 34720 + }, + { + "epoch": 12.56, + "learning_rate": 2.2236173898875002e-07, + "loss": 8.69, + "step": 34740 + }, + { + "epoch": 12.57, + "learning_rate": 2.2200888097417302e-07, + "loss": 8.638, + "step": 34760 + }, + { + "epoch": 12.58, + "learning_rate": 2.216560794207419e-07, + "loss": 8.7119, + "step": 34780 + }, + { + "epoch": 12.59, + "learning_rate": 2.21303335040096e-07, + "loss": 8.7205, + "step": 34800 + }, + { + "epoch": 12.59, + "learning_rate": 2.2095064854375928e-07, + "loss": 8.7169, + "step": 34820 + }, + { + "epoch": 12.6, + "learning_rate": 2.2059802064313882e-07, + "loss": 8.5366, + "step": 34840 + }, + { + "epoch": 12.61, + "learning_rate": 2.2024545204952382e-07, + "loss": 8.7096, + "step": 34860 + }, + { + "epoch": 12.61, + "learning_rate": 2.1989294347408347e-07, + "loss": 8.5825, + "step": 34880 + }, + { + "epoch": 12.62, + "learning_rate": 2.195404956278663e-07, + "loss": 8.6663, + "step": 34900 + }, + { + "epoch": 12.63, + "learning_rate": 2.1918810922179803e-07, + "loss": 8.6597, + "step": 34920 + }, + { + "epoch": 12.64, + "learning_rate": 2.1883578496668043e-07, + "loss": 8.586, + "step": 34940 + }, + { + "epoch": 12.64, + "learning_rate": 2.1848352357319022e-07, + "loss": 8.7007, + "step": 34960 + }, + { + "epoch": 12.65, + "learning_rate": 2.1813132575187697e-07, + "loss": 8.7152, + "step": 34980 + }, + { + "epoch": 12.66, + "learning_rate": 2.1777919221316232e-07, + "loss": 8.7078, + "step": 35000 + }, + { + "epoch": 12.67, + "learning_rate": 2.1742712366733803e-07, + "loss": 8.7375, + "step": 35020 + }, + { + "epoch": 12.67, + "learning_rate": 2.1707512082456473e-07, + "loss": 8.7171, + "step": 35040 + }, + { + "epoch": 12.68, + "learning_rate": 2.167231843948708e-07, + "loss": 8.6852, + "step": 35060 + }, + { + "epoch": 12.69, + "learning_rate": 2.1637131508815027e-07, + "loss": 8.6224, + "step": 35080 + }, + { + "epoch": 12.69, + "learning_rate": 2.1601951361416223e-07, + "loss": 8.7685, + "step": 35100 + }, + { + "epoch": 12.7, + "learning_rate": 2.1566778068252858e-07, + "loss": 8.7018, + "step": 35120 + }, + { + "epoch": 12.71, + "learning_rate": 2.1531611700273295e-07, + "loss": 8.7495, + "step": 35140 + }, + { + "epoch": 12.72, + "learning_rate": 2.1496452328411964e-07, + "loss": 8.7644, + "step": 35160 + }, + { + "epoch": 12.72, + "learning_rate": 2.146130002358914e-07, + "loss": 8.674, + "step": 35180 + }, + { + "epoch": 12.73, + "learning_rate": 2.142615485671088e-07, + "loss": 8.7433, + "step": 35200 + }, + { + "epoch": 12.74, + "learning_rate": 2.139101689866881e-07, + "loss": 8.615, + "step": 35220 + }, + { + "epoch": 12.75, + "learning_rate": 2.135588622034005e-07, + "loss": 8.642, + "step": 35240 + }, + { + "epoch": 12.75, + "learning_rate": 2.1320762892587008e-07, + "loss": 8.8543, + "step": 35260 + }, + { + "epoch": 12.76, + "learning_rate": 2.128564698625726e-07, + "loss": 8.562, + "step": 35280 + }, + { + "epoch": 12.77, + "learning_rate": 2.1250538572183457e-07, + "loss": 8.7055, + "step": 35300 + }, + { + "epoch": 12.77, + "learning_rate": 2.1215437721183074e-07, + "loss": 8.6682, + "step": 35320 + }, + { + "epoch": 12.78, + "learning_rate": 2.1180344504058392e-07, + "loss": 8.6209, + "step": 35340 + }, + { + "epoch": 12.79, + "learning_rate": 2.1145258991596245e-07, + "loss": 8.6196, + "step": 35360 + }, + { + "epoch": 12.8, + "learning_rate": 2.1110181254567955e-07, + "loss": 8.6175, + "step": 35380 + }, + { + "epoch": 12.8, + "learning_rate": 2.1075111363729154e-07, + "loss": 8.813, + "step": 35400 + }, + { + "epoch": 12.81, + "learning_rate": 2.1040049389819624e-07, + "loss": 8.5952, + "step": 35420 + }, + { + "epoch": 12.82, + "learning_rate": 2.1004995403563224e-07, + "loss": 8.7259, + "step": 35440 + }, + { + "epoch": 12.82, + "learning_rate": 2.096994947566766e-07, + "loss": 8.7216, + "step": 35460 + }, + { + "epoch": 12.83, + "learning_rate": 2.0934911676824403e-07, + "loss": 8.7122, + "step": 35480 + }, + { + "epoch": 12.84, + "learning_rate": 2.089988207770852e-07, + "loss": 8.614, + "step": 35500 + }, + { + "epoch": 12.85, + "learning_rate": 2.086486074897854e-07, + "loss": 8.6394, + "step": 35520 + }, + { + "epoch": 12.85, + "learning_rate": 2.0829847761276316e-07, + "loss": 8.6364, + "step": 35540 + }, + { + "epoch": 12.86, + "learning_rate": 2.0794843185226865e-07, + "loss": 8.7904, + "step": 35560 + }, + { + "epoch": 12.87, + "learning_rate": 2.0759847091438238e-07, + "loss": 8.6759, + "step": 35580 + }, + { + "epoch": 12.88, + "learning_rate": 2.0724859550501393e-07, + "loss": 8.5717, + "step": 35600 + }, + { + "epoch": 12.88, + "learning_rate": 2.0689880632990008e-07, + "loss": 8.7717, + "step": 35620 + }, + { + "epoch": 12.89, + "learning_rate": 2.0654910409460396e-07, + "loss": 8.6799, + "step": 35640 + }, + { + "epoch": 12.9, + "learning_rate": 2.0619948950451316e-07, + "loss": 8.6777, + "step": 35660 + }, + { + "epoch": 12.9, + "learning_rate": 2.0584996326483838e-07, + "loss": 8.6712, + "step": 35680 + }, + { + "epoch": 12.91, + "learning_rate": 2.0550052608061248e-07, + "loss": 8.6452, + "step": 35700 + }, + { + "epoch": 12.92, + "learning_rate": 2.0515117865668815e-07, + "loss": 8.6933, + "step": 35720 + }, + { + "epoch": 12.93, + "learning_rate": 2.0480192169773763e-07, + "loss": 8.7019, + "step": 35740 + }, + { + "epoch": 12.93, + "learning_rate": 2.0445275590825024e-07, + "loss": 8.7944, + "step": 35760 + }, + { + "epoch": 12.94, + "learning_rate": 2.0410368199253142e-07, + "loss": 8.7143, + "step": 35780 + }, + { + "epoch": 12.95, + "learning_rate": 2.0375470065470158e-07, + "loss": 8.6063, + "step": 35800 + }, + { + "epoch": 12.95, + "learning_rate": 2.0340581259869405e-07, + "loss": 8.6562, + "step": 35820 + }, + { + "epoch": 12.96, + "learning_rate": 2.0305701852825438e-07, + "loss": 8.6519, + "step": 35840 + }, + { + "epoch": 12.97, + "learning_rate": 2.027083191469381e-07, + "loss": 8.703, + "step": 35860 + }, + { + "epoch": 12.98, + "learning_rate": 2.0235971515811013e-07, + "loss": 8.6644, + "step": 35880 + }, + { + "epoch": 12.98, + "learning_rate": 2.0201120726494278e-07, + "loss": 8.751, + "step": 35900 + }, + { + "epoch": 12.99, + "learning_rate": 2.016627961704144e-07, + "loss": 8.6513, + "step": 35920 + }, + { + "epoch": 13.0, + "learning_rate": 2.013144825773085e-07, + "loss": 8.5942, + "step": 35940 + }, + { + "epoch": 13.01, + "learning_rate": 2.0096626718821143e-07, + "loss": 8.739, + "step": 35960 + }, + { + "epoch": 13.01, + "learning_rate": 2.0061815070551184e-07, + "loss": 8.6871, + "step": 35980 + }, + { + "epoch": 13.02, + "learning_rate": 2.002701338313987e-07, + "loss": 8.6548, + "step": 36000 + }, + { + "epoch": 13.03, + "learning_rate": 1.9992221726785988e-07, + "loss": 8.7896, + "step": 36020 + }, + { + "epoch": 13.03, + "learning_rate": 1.995744017166814e-07, + "loss": 8.5878, + "step": 36040 + }, + { + "epoch": 13.04, + "learning_rate": 1.9922668787944497e-07, + "loss": 8.6677, + "step": 36060 + }, + { + "epoch": 13.05, + "learning_rate": 1.9887907645752765e-07, + "loss": 8.5799, + "step": 36080 + }, + { + "epoch": 13.06, + "learning_rate": 1.9853156815209955e-07, + "loss": 8.6493, + "step": 36100 + }, + { + "epoch": 13.06, + "learning_rate": 1.9818416366412275e-07, + "loss": 8.7074, + "step": 36120 + }, + { + "epoch": 13.07, + "learning_rate": 1.9783686369435031e-07, + "loss": 8.6966, + "step": 36140 + }, + { + "epoch": 13.08, + "learning_rate": 1.9748966894332404e-07, + "loss": 8.623, + "step": 36160 + }, + { + "epoch": 13.08, + "learning_rate": 1.9714258011137384e-07, + "loss": 8.6557, + "step": 36180 + }, + { + "epoch": 13.09, + "learning_rate": 1.9679559789861575e-07, + "loss": 8.6353, + "step": 36200 + }, + { + "epoch": 13.1, + "learning_rate": 1.9644872300495068e-07, + "loss": 8.5737, + "step": 36220 + }, + { + "epoch": 13.11, + "learning_rate": 1.9610195613006343e-07, + "loss": 8.7244, + "step": 36240 + }, + { + "epoch": 13.11, + "learning_rate": 1.9575529797342047e-07, + "loss": 8.6988, + "step": 36260 + }, + { + "epoch": 13.12, + "learning_rate": 1.954087492342694e-07, + "loss": 8.6604, + "step": 36280 + }, + { + "epoch": 13.13, + "learning_rate": 1.950623106116367e-07, + "loss": 8.614, + "step": 36300 + }, + { + "epoch": 13.14, + "learning_rate": 1.9471598280432705e-07, + "loss": 8.7385, + "step": 36320 + }, + { + "epoch": 13.14, + "learning_rate": 1.9436976651092142e-07, + "loss": 8.6884, + "step": 36340 + }, + { + "epoch": 13.15, + "learning_rate": 1.9402366242977592e-07, + "loss": 8.6888, + "step": 36360 + }, + { + "epoch": 13.16, + "learning_rate": 1.936776712590203e-07, + "loss": 8.7554, + "step": 36380 + }, + { + "epoch": 13.16, + "learning_rate": 1.933317936965566e-07, + "loss": 8.7202, + "step": 36400 + }, + { + "epoch": 13.17, + "learning_rate": 1.9298603044005774e-07, + "loss": 8.7588, + "step": 36420 + }, + { + "epoch": 13.18, + "learning_rate": 1.9264038218696576e-07, + "loss": 8.6336, + "step": 36440 + }, + { + "epoch": 13.19, + "learning_rate": 1.9229484963449112e-07, + "loss": 8.6737, + "step": 36460 + }, + { + "epoch": 13.19, + "learning_rate": 1.919494334796107e-07, + "loss": 8.7338, + "step": 36480 + }, + { + "epoch": 13.2, + "learning_rate": 1.9160413441906665e-07, + "loss": 8.6593, + "step": 36500 + }, + { + "epoch": 13.21, + "learning_rate": 1.9125895314936488e-07, + "loss": 8.6246, + "step": 36520 + }, + { + "epoch": 13.22, + "learning_rate": 1.909138903667738e-07, + "loss": 8.6588, + "step": 36540 + }, + { + "epoch": 13.22, + "learning_rate": 1.905689467673226e-07, + "loss": 8.655, + "step": 36560 + }, + { + "epoch": 13.23, + "learning_rate": 1.9022412304680042e-07, + "loss": 8.6502, + "step": 36580 + }, + { + "epoch": 13.24, + "learning_rate": 1.8987941990075415e-07, + "loss": 8.6792, + "step": 36600 + }, + { + "epoch": 13.24, + "learning_rate": 1.8953483802448796e-07, + "loss": 8.6347, + "step": 36620 + }, + { + "epoch": 13.25, + "learning_rate": 1.8919037811306104e-07, + "loss": 8.64, + "step": 36640 + }, + { + "epoch": 13.26, + "learning_rate": 1.8884604086128654e-07, + "loss": 8.681, + "step": 36660 + }, + { + "epoch": 13.27, + "learning_rate": 1.885018269637305e-07, + "loss": 8.6094, + "step": 36680 + }, + { + "epoch": 13.27, + "learning_rate": 1.8815773711470987e-07, + "loss": 8.6675, + "step": 36700 + }, + { + "epoch": 13.28, + "learning_rate": 1.8781377200829156e-07, + "loss": 8.6894, + "step": 36720 + }, + { + "epoch": 13.29, + "learning_rate": 1.8746993233829079e-07, + "loss": 8.6208, + "step": 36740 + }, + { + "epoch": 13.29, + "learning_rate": 1.8712621879826955e-07, + "loss": 8.6134, + "step": 36760 + }, + { + "epoch": 13.3, + "learning_rate": 1.867826320815359e-07, + "loss": 8.7218, + "step": 36780 + }, + { + "epoch": 13.31, + "learning_rate": 1.8643917288114146e-07, + "loss": 8.8159, + "step": 36800 + }, + { + "epoch": 13.32, + "learning_rate": 1.8609584188988133e-07, + "loss": 8.7762, + "step": 36820 + }, + { + "epoch": 13.32, + "learning_rate": 1.8575263980029147e-07, + "loss": 8.7979, + "step": 36840 + }, + { + "epoch": 13.33, + "learning_rate": 1.8540956730464785e-07, + "loss": 8.6362, + "step": 36860 + }, + { + "epoch": 13.34, + "learning_rate": 1.8506662509496546e-07, + "loss": 8.7675, + "step": 36880 + }, + { + "epoch": 13.35, + "learning_rate": 1.8472381386299596e-07, + "loss": 8.6792, + "step": 36900 + }, + { + "epoch": 13.35, + "learning_rate": 1.8438113430022733e-07, + "loss": 8.6449, + "step": 36920 + }, + { + "epoch": 13.36, + "learning_rate": 1.840385870978815e-07, + "loss": 8.6976, + "step": 36940 + }, + { + "epoch": 13.37, + "learning_rate": 1.8369617294691358e-07, + "loss": 8.7641, + "step": 36960 + }, + { + "epoch": 13.37, + "learning_rate": 1.8335389253801055e-07, + "loss": 8.7507, + "step": 36980 + }, + { + "epoch": 13.38, + "learning_rate": 1.8301174656158912e-07, + "loss": 8.6479, + "step": 37000 + }, + { + "epoch": 13.39, + "learning_rate": 1.826697357077954e-07, + "loss": 8.6015, + "step": 37020 + }, + { + "epoch": 13.4, + "learning_rate": 1.823278606665024e-07, + "loss": 8.74, + "step": 37040 + }, + { + "epoch": 13.4, + "learning_rate": 1.8198612212730963e-07, + "loss": 8.6362, + "step": 37060 + }, + { + "epoch": 13.41, + "learning_rate": 1.81644520779541e-07, + "loss": 8.5745, + "step": 37080 + }, + { + "epoch": 13.42, + "learning_rate": 1.8130305731224365e-07, + "loss": 8.7794, + "step": 37100 + }, + { + "epoch": 13.42, + "learning_rate": 1.8096173241418695e-07, + "loss": 8.5795, + "step": 37120 + }, + { + "epoch": 13.43, + "learning_rate": 1.8062054677386021e-07, + "loss": 8.6263, + "step": 37140 + }, + { + "epoch": 13.44, + "learning_rate": 1.8027950107947246e-07, + "loss": 8.7687, + "step": 37160 + }, + { + "epoch": 13.45, + "learning_rate": 1.7993859601894992e-07, + "loss": 8.6807, + "step": 37180 + }, + { + "epoch": 13.45, + "learning_rate": 1.795978322799354e-07, + "loss": 8.7346, + "step": 37200 + }, + { + "epoch": 13.46, + "learning_rate": 1.7925721054978674e-07, + "loss": 8.6137, + "step": 37220 + }, + { + "epoch": 13.47, + "learning_rate": 1.7891673151557492e-07, + "loss": 8.7427, + "step": 37240 + }, + { + "epoch": 13.48, + "learning_rate": 1.7857639586408364e-07, + "loss": 8.6359, + "step": 37260 + }, + { + "epoch": 13.48, + "learning_rate": 1.7823620428180692e-07, + "loss": 8.7831, + "step": 37280 + }, + { + "epoch": 13.49, + "learning_rate": 1.778961574549484e-07, + "loss": 8.6557, + "step": 37300 + }, + { + "epoch": 13.5, + "learning_rate": 1.775562560694197e-07, + "loss": 8.645, + "step": 37320 + }, + { + "epoch": 13.5, + "learning_rate": 1.7721650081083905e-07, + "loss": 8.6701, + "step": 37340 + }, + { + "epoch": 13.51, + "learning_rate": 1.7687689236452995e-07, + "loss": 8.7074, + "step": 37360 + }, + { + "epoch": 13.52, + "learning_rate": 1.765374314155198e-07, + "loss": 8.7768, + "step": 37380 + }, + { + "epoch": 13.53, + "learning_rate": 1.7619811864853827e-07, + "loss": 8.5979, + "step": 37400 + }, + { + "epoch": 13.53, + "learning_rate": 1.758589547480165e-07, + "loss": 8.6545, + "step": 37420 + }, + { + "epoch": 13.54, + "learning_rate": 1.75519940398085e-07, + "loss": 8.6859, + "step": 37440 + }, + { + "epoch": 13.55, + "learning_rate": 1.7518107628257273e-07, + "loss": 8.6216, + "step": 37460 + }, + { + "epoch": 13.56, + "learning_rate": 1.748423630850058e-07, + "loss": 8.8257, + "step": 37480 + }, + { + "epoch": 13.56, + "learning_rate": 1.7450380148860556e-07, + "loss": 8.6443, + "step": 37500 + }, + { + "epoch": 13.57, + "learning_rate": 1.741653921762879e-07, + "loss": 8.5728, + "step": 37520 + }, + { + "epoch": 13.58, + "learning_rate": 1.7382713583066125e-07, + "loss": 8.6605, + "step": 37540 + }, + { + "epoch": 13.58, + "learning_rate": 1.734890331340258e-07, + "loss": 8.6944, + "step": 37560 + }, + { + "epoch": 13.59, + "learning_rate": 1.731510847683714e-07, + "loss": 8.6816, + "step": 37580 + }, + { + "epoch": 13.6, + "learning_rate": 1.728132914153771e-07, + "loss": 8.7527, + "step": 37600 + }, + { + "epoch": 13.61, + "learning_rate": 1.7247565375640881e-07, + "loss": 8.9439, + "step": 37620 + }, + { + "epoch": 13.61, + "learning_rate": 1.7213817247251862e-07, + "loss": 8.6915, + "step": 37640 + }, + { + "epoch": 13.62, + "learning_rate": 1.7180084824444325e-07, + "loss": 8.6954, + "step": 37660 + }, + { + "epoch": 13.63, + "learning_rate": 1.7146368175260234e-07, + "loss": 8.6402, + "step": 37680 + }, + { + "epoch": 13.63, + "learning_rate": 1.711266736770978e-07, + "loss": 8.7081, + "step": 37700 + }, + { + "epoch": 13.64, + "learning_rate": 1.7078982469771163e-07, + "loss": 8.5867, + "step": 37720 + }, + { + "epoch": 13.65, + "learning_rate": 1.704531354939049e-07, + "loss": 8.6839, + "step": 37740 + }, + { + "epoch": 13.66, + "learning_rate": 1.7011660674481676e-07, + "loss": 8.7227, + "step": 37760 + }, + { + "epoch": 13.66, + "learning_rate": 1.6978023912926225e-07, + "loss": 8.5406, + "step": 37780 + }, + { + "epoch": 13.67, + "learning_rate": 1.6944403332573185e-07, + "loss": 8.6903, + "step": 37800 + }, + { + "epoch": 13.68, + "learning_rate": 1.6910799001238923e-07, + "loss": 8.6448, + "step": 37820 + }, + { + "epoch": 13.69, + "learning_rate": 1.6877210986707046e-07, + "loss": 8.6563, + "step": 37840 + }, + { + "epoch": 13.69, + "learning_rate": 1.6843639356728267e-07, + "loss": 8.7411, + "step": 37860 + }, + { + "epoch": 13.7, + "learning_rate": 1.6810084179020208e-07, + "loss": 8.644, + "step": 37880 + }, + { + "epoch": 13.71, + "learning_rate": 1.6776545521267354e-07, + "loss": 8.6785, + "step": 37900 + }, + { + "epoch": 13.71, + "learning_rate": 1.674302345112083e-07, + "loss": 8.6754, + "step": 37920 + }, + { + "epoch": 13.72, + "learning_rate": 1.6709518036198307e-07, + "loss": 8.6037, + "step": 37940 + }, + { + "epoch": 13.73, + "learning_rate": 1.6676029344083885e-07, + "loss": 8.6496, + "step": 37960 + }, + { + "epoch": 13.74, + "learning_rate": 1.66425574423279e-07, + "loss": 8.5986, + "step": 37980 + }, + { + "epoch": 13.74, + "learning_rate": 1.6609102398446852e-07, + "loss": 8.6326, + "step": 38000 + }, + { + "epoch": 13.75, + "learning_rate": 1.6575664279923212e-07, + "loss": 8.8621, + "step": 38020 + }, + { + "epoch": 13.76, + "learning_rate": 1.654224315420531e-07, + "loss": 8.5913, + "step": 38040 + }, + { + "epoch": 13.76, + "learning_rate": 1.650883908870723e-07, + "loss": 8.6037, + "step": 38060 + }, + { + "epoch": 13.77, + "learning_rate": 1.6475452150808597e-07, + "loss": 8.631, + "step": 38080 + }, + { + "epoch": 13.78, + "learning_rate": 1.6442082407854538e-07, + "loss": 8.6065, + "step": 38100 + }, + { + "epoch": 13.79, + "learning_rate": 1.6408729927155453e-07, + "loss": 8.7009, + "step": 38120 + }, + { + "epoch": 13.79, + "learning_rate": 1.6375394775986952e-07, + "loss": 8.6739, + "step": 38140 + }, + { + "epoch": 13.8, + "learning_rate": 1.6342077021589669e-07, + "loss": 8.5858, + "step": 38160 + }, + { + "epoch": 13.81, + "learning_rate": 1.6308776731169154e-07, + "loss": 8.9962, + "step": 38180 + }, + { + "epoch": 13.82, + "learning_rate": 1.6275493971895743e-07, + "loss": 8.6618, + "step": 38200 + }, + { + "epoch": 13.82, + "learning_rate": 1.624222881090439e-07, + "loss": 8.7069, + "step": 38220 + }, + { + "epoch": 13.83, + "learning_rate": 1.620898131529457e-07, + "loss": 8.7122, + "step": 38240 + }, + { + "epoch": 13.84, + "learning_rate": 1.61757515521301e-07, + "loss": 8.6128, + "step": 38260 + }, + { + "epoch": 13.84, + "learning_rate": 1.6142539588439052e-07, + "loss": 8.9103, + "step": 38280 + }, + { + "epoch": 13.85, + "learning_rate": 1.6109345491213585e-07, + "loss": 8.6976, + "step": 38300 + }, + { + "epoch": 13.86, + "learning_rate": 1.607616932740982e-07, + "loss": 8.6669, + "step": 38320 + }, + { + "epoch": 13.87, + "learning_rate": 1.6043011163947707e-07, + "loss": 8.6298, + "step": 38340 + }, + { + "epoch": 13.87, + "learning_rate": 1.600987106771089e-07, + "loss": 8.6765, + "step": 38360 + }, + { + "epoch": 13.88, + "learning_rate": 1.5976749105546545e-07, + "loss": 8.7179, + "step": 38380 + }, + { + "epoch": 13.89, + "learning_rate": 1.5943645344265312e-07, + "loss": 8.7319, + "step": 38400 + }, + { + "epoch": 13.9, + "learning_rate": 1.5910559850641076e-07, + "loss": 8.6038, + "step": 38420 + }, + { + "epoch": 13.9, + "learning_rate": 1.5877492691410913e-07, + "loss": 8.6624, + "step": 38440 + }, + { + "epoch": 13.91, + "learning_rate": 1.5844443933274886e-07, + "loss": 8.6136, + "step": 38460 + }, + { + "epoch": 13.92, + "learning_rate": 1.5811413642895943e-07, + "loss": 8.8174, + "step": 38480 + }, + { + "epoch": 13.92, + "learning_rate": 1.5778401886899806e-07, + "loss": 8.7944, + "step": 38500 + }, + { + "epoch": 13.93, + "learning_rate": 1.5745408731874776e-07, + "loss": 8.5984, + "step": 38520 + }, + { + "epoch": 13.94, + "learning_rate": 1.5712434244371675e-07, + "loss": 8.8729, + "step": 38540 + }, + { + "epoch": 13.95, + "learning_rate": 1.5679478490903635e-07, + "loss": 8.6856, + "step": 38560 + }, + { + "epoch": 13.95, + "learning_rate": 1.5646541537946003e-07, + "loss": 8.6329, + "step": 38580 + }, + { + "epoch": 13.96, + "learning_rate": 1.5613623451936232e-07, + "loss": 8.6972, + "step": 38600 + }, + { + "epoch": 13.97, + "learning_rate": 1.5580724299273677e-07, + "loss": 8.7132, + "step": 38620 + }, + { + "epoch": 13.97, + "learning_rate": 1.5547844146319544e-07, + "loss": 8.6481, + "step": 38640 + }, + { + "epoch": 13.98, + "learning_rate": 1.551498305939669e-07, + "loss": 8.6931, + "step": 38660 + }, + { + "epoch": 13.99, + "learning_rate": 1.5482141104789504e-07, + "loss": 8.7225, + "step": 38680 + }, + { + "epoch": 14.0, + "learning_rate": 1.5449318348743817e-07, + "loss": 8.7001, + "step": 38700 + }, + { + "epoch": 14.0, + "learning_rate": 1.5416514857466695e-07, + "loss": 8.6532, + "step": 38720 + }, + { + "epoch": 14.01, + "learning_rate": 1.5383730697126386e-07, + "loss": 8.6646, + "step": 38740 + }, + { + "epoch": 14.02, + "learning_rate": 1.5350965933852104e-07, + "loss": 8.5851, + "step": 38760 + }, + { + "epoch": 14.03, + "learning_rate": 1.5318220633733975e-07, + "loss": 8.7631, + "step": 38780 + }, + { + "epoch": 14.03, + "learning_rate": 1.528549486282284e-07, + "loss": 8.7466, + "step": 38800 + }, + { + "epoch": 14.04, + "learning_rate": 1.5252788687130143e-07, + "loss": 8.7216, + "step": 38820 + }, + { + "epoch": 14.05, + "learning_rate": 1.5220102172627837e-07, + "loss": 8.5501, + "step": 38840 + }, + { + "epoch": 14.05, + "learning_rate": 1.518743538524817e-07, + "loss": 8.6958, + "step": 38860 + }, + { + "epoch": 14.06, + "learning_rate": 1.515478839088365e-07, + "loss": 8.6093, + "step": 38880 + }, + { + "epoch": 14.07, + "learning_rate": 1.5122161255386812e-07, + "loss": 8.6491, + "step": 38900 + }, + { + "epoch": 14.08, + "learning_rate": 1.5089554044570149e-07, + "loss": 8.6376, + "step": 38920 + }, + { + "epoch": 14.08, + "learning_rate": 1.5056966824205988e-07, + "loss": 8.729, + "step": 38940 + }, + { + "epoch": 14.09, + "learning_rate": 1.5024399660026294e-07, + "loss": 8.7798, + "step": 38960 + }, + { + "epoch": 14.1, + "learning_rate": 1.4991852617722617e-07, + "loss": 8.6727, + "step": 38980 + }, + { + "epoch": 14.1, + "learning_rate": 1.495932576294588e-07, + "loss": 8.6122, + "step": 39000 + }, + { + "epoch": 14.11, + "learning_rate": 1.4926819161306306e-07, + "loss": 8.7075, + "step": 39020 + }, + { + "epoch": 14.12, + "learning_rate": 1.4894332878373276e-07, + "loss": 8.7238, + "step": 39040 + }, + { + "epoch": 14.13, + "learning_rate": 1.4861866979675152e-07, + "loss": 8.7556, + "step": 39060 + }, + { + "epoch": 14.13, + "learning_rate": 1.4829421530699222e-07, + "loss": 8.7966, + "step": 39080 + }, + { + "epoch": 14.14, + "learning_rate": 1.4796996596891487e-07, + "loss": 8.7137, + "step": 39100 + }, + { + "epoch": 14.15, + "learning_rate": 1.4764592243656582e-07, + "loss": 8.6169, + "step": 39120 + }, + { + "epoch": 14.16, + "learning_rate": 1.4732208536357636e-07, + "loss": 8.6715, + "step": 39140 + }, + { + "epoch": 14.16, + "learning_rate": 1.4699845540316123e-07, + "loss": 8.6655, + "step": 39160 + }, + { + "epoch": 14.17, + "learning_rate": 1.466750332081174e-07, + "loss": 8.6515, + "step": 39180 + }, + { + "epoch": 14.18, + "learning_rate": 1.4635181943082284e-07, + "loss": 8.6899, + "step": 39200 + }, + { + "epoch": 14.18, + "learning_rate": 1.4602881472323498e-07, + "loss": 8.731, + "step": 39220 + }, + { + "epoch": 14.19, + "learning_rate": 1.4570601973688966e-07, + "loss": 8.6511, + "step": 39240 + }, + { + "epoch": 14.2, + "learning_rate": 1.4538343512289957e-07, + "loss": 8.6555, + "step": 39260 + }, + { + "epoch": 14.21, + "learning_rate": 1.450610615319533e-07, + "loss": 8.6992, + "step": 39280 + }, + { + "epoch": 14.21, + "learning_rate": 1.447388996143134e-07, + "loss": 8.6656, + "step": 39300 + }, + { + "epoch": 14.22, + "learning_rate": 1.4441695001981585e-07, + "loss": 8.6907, + "step": 39320 + }, + { + "epoch": 14.23, + "learning_rate": 1.4409521339786808e-07, + "loss": 8.8347, + "step": 39340 + }, + { + "epoch": 14.24, + "learning_rate": 1.437736903974479e-07, + "loss": 8.6904, + "step": 39360 + }, + { + "epoch": 14.24, + "learning_rate": 1.4345238166710254e-07, + "loss": 8.6437, + "step": 39380 + }, + { + "epoch": 14.25, + "learning_rate": 1.4313128785494667e-07, + "loss": 8.6775, + "step": 39400 + }, + { + "epoch": 14.26, + "learning_rate": 1.4281040960866175e-07, + "loss": 8.5845, + "step": 39420 + }, + { + "epoch": 14.26, + "learning_rate": 1.4248974757549415e-07, + "loss": 8.6379, + "step": 39440 + }, + { + "epoch": 14.27, + "learning_rate": 1.421693024022542e-07, + "loss": 8.6482, + "step": 39460 + }, + { + "epoch": 14.28, + "learning_rate": 1.4184907473531496e-07, + "loss": 8.7291, + "step": 39480 + }, + { + "epoch": 14.29, + "learning_rate": 1.4152906522061047e-07, + "loss": 8.7168, + "step": 39500 + }, + { + "epoch": 14.29, + "learning_rate": 1.412092745036351e-07, + "loss": 8.6693, + "step": 39520 + }, + { + "epoch": 14.3, + "learning_rate": 1.4088970322944145e-07, + "loss": 8.6391, + "step": 39540 + }, + { + "epoch": 14.31, + "learning_rate": 1.405703520426399e-07, + "loss": 8.705, + "step": 39560 + }, + { + "epoch": 14.31, + "learning_rate": 1.402512215873965e-07, + "loss": 8.84, + "step": 39580 + }, + { + "epoch": 14.32, + "learning_rate": 1.3993231250743243e-07, + "loss": 8.7896, + "step": 39600 + }, + { + "epoch": 14.33, + "learning_rate": 1.3961362544602212e-07, + "loss": 8.599, + "step": 39620 + }, + { + "epoch": 14.34, + "learning_rate": 1.3929516104599202e-07, + "loss": 8.7522, + "step": 39640 + }, + { + "epoch": 14.34, + "learning_rate": 1.3897691994971985e-07, + "loss": 8.6363, + "step": 39660 + }, + { + "epoch": 14.35, + "learning_rate": 1.386589027991325e-07, + "loss": 8.6951, + "step": 39680 + }, + { + "epoch": 14.36, + "learning_rate": 1.3834111023570556e-07, + "loss": 8.8011, + "step": 39700 + }, + { + "epoch": 14.37, + "learning_rate": 1.3802354290046103e-07, + "loss": 8.5725, + "step": 39720 + }, + { + "epoch": 14.37, + "learning_rate": 1.377062014339672e-07, + "loss": 8.7008, + "step": 39740 + }, + { + "epoch": 14.38, + "learning_rate": 1.3738908647633634e-07, + "loss": 8.6939, + "step": 39760 + }, + { + "epoch": 14.39, + "learning_rate": 1.3707219866722408e-07, + "loss": 8.6877, + "step": 39780 + }, + { + "epoch": 14.39, + "learning_rate": 1.367555386458276e-07, + "loss": 8.634, + "step": 39800 + }, + { + "epoch": 14.4, + "learning_rate": 1.3643910705088501e-07, + "loss": 8.719, + "step": 39820 + }, + { + "epoch": 14.41, + "learning_rate": 1.3612290452067322e-07, + "loss": 8.6471, + "step": 39840 + }, + { + "epoch": 14.42, + "learning_rate": 1.3580693169300727e-07, + "loss": 8.6649, + "step": 39860 + }, + { + "epoch": 14.42, + "learning_rate": 1.3549118920523905e-07, + "loss": 8.7158, + "step": 39880 + }, + { + "epoch": 14.43, + "learning_rate": 1.3517567769425548e-07, + "loss": 8.6799, + "step": 39900 + }, + { + "epoch": 14.44, + "learning_rate": 1.3486039779647793e-07, + "loss": 8.6747, + "step": 39920 + }, + { + "epoch": 14.44, + "learning_rate": 1.3454535014786023e-07, + "loss": 8.6028, + "step": 39940 + }, + { + "epoch": 14.45, + "learning_rate": 1.3423053538388808e-07, + "loss": 8.6774, + "step": 39960 + }, + { + "epoch": 14.46, + "learning_rate": 1.3391595413957717e-07, + "loss": 8.6907, + "step": 39980 + }, + { + "epoch": 14.47, + "learning_rate": 1.3360160704947221e-07, + "loss": 8.6227, + "step": 40000 + }, + { + "epoch": 14.47, + "learning_rate": 1.3328749474764577e-07, + "loss": 8.6221, + "step": 40020 + }, + { + "epoch": 14.48, + "learning_rate": 1.329736178676965e-07, + "loss": 8.6608, + "step": 40040 + }, + { + "epoch": 14.49, + "learning_rate": 1.3265997704274856e-07, + "loss": 8.7676, + "step": 40060 + }, + { + "epoch": 14.5, + "learning_rate": 1.323465729054497e-07, + "loss": 8.6608, + "step": 40080 + }, + { + "epoch": 14.5, + "learning_rate": 1.3203340608797016e-07, + "loss": 8.7215, + "step": 40100 + }, + { + "epoch": 14.51, + "learning_rate": 1.317204772220019e-07, + "loss": 8.6288, + "step": 40120 + }, + { + "epoch": 14.52, + "learning_rate": 1.3140778693875637e-07, + "loss": 8.5937, + "step": 40140 + }, + { + "epoch": 14.52, + "learning_rate": 1.3109533586896432e-07, + "loss": 8.6781, + "step": 40160 + }, + { + "epoch": 14.53, + "learning_rate": 1.3078312464287354e-07, + "loss": 8.6522, + "step": 40180 + }, + { + "epoch": 14.54, + "learning_rate": 1.304711538902481e-07, + "loss": 8.5842, + "step": 40200 + }, + { + "epoch": 14.55, + "learning_rate": 1.301594242403673e-07, + "loss": 8.695, + "step": 40220 + }, + { + "epoch": 14.55, + "learning_rate": 1.2984793632202373e-07, + "loss": 8.6941, + "step": 40240 + }, + { + "epoch": 14.56, + "learning_rate": 1.2953669076352274e-07, + "loss": 8.6796, + "step": 40260 + }, + { + "epoch": 14.57, + "learning_rate": 1.2922568819268054e-07, + "loss": 8.7716, + "step": 40280 + }, + { + "epoch": 14.58, + "learning_rate": 1.2891492923682323e-07, + "loss": 8.6689, + "step": 40300 + }, + { + "epoch": 14.58, + "learning_rate": 1.2860441452278574e-07, + "loss": 8.4937, + "step": 40320 + }, + { + "epoch": 14.59, + "learning_rate": 1.2829414467691e-07, + "loss": 8.695, + "step": 40340 + }, + { + "epoch": 14.6, + "learning_rate": 1.2798412032504437e-07, + "loss": 8.6764, + "step": 40360 + }, + { + "epoch": 14.6, + "learning_rate": 1.276743420925418e-07, + "loss": 8.5718, + "step": 40380 + }, + { + "epoch": 14.61, + "learning_rate": 1.273648106042587e-07, + "loss": 8.7333, + "step": 40400 + }, + { + "epoch": 14.62, + "learning_rate": 1.270555264845541e-07, + "loss": 8.6268, + "step": 40420 + }, + { + "epoch": 14.63, + "learning_rate": 1.2674649035728768e-07, + "loss": 8.8738, + "step": 40440 + }, + { + "epoch": 14.63, + "learning_rate": 1.264377028458193e-07, + "loss": 8.6499, + "step": 40460 + }, + { + "epoch": 14.64, + "learning_rate": 1.2612916457300687e-07, + "loss": 8.667, + "step": 40480 + }, + { + "epoch": 14.65, + "learning_rate": 1.2582087616120607e-07, + "loss": 8.816, + "step": 40500 + }, + { + "epoch": 14.65, + "learning_rate": 1.2551283823226812e-07, + "loss": 8.6488, + "step": 40520 + }, + { + "epoch": 14.66, + "learning_rate": 1.2520505140753917e-07, + "loss": 8.6484, + "step": 40540 + }, + { + "epoch": 14.67, + "learning_rate": 1.2489751630785905e-07, + "loss": 8.6768, + "step": 40560 + }, + { + "epoch": 14.68, + "learning_rate": 1.2459023355355946e-07, + "loss": 8.7502, + "step": 40580 + }, + { + "epoch": 14.68, + "learning_rate": 1.2428320376446348e-07, + "loss": 8.6466, + "step": 40600 + }, + { + "epoch": 14.69, + "learning_rate": 1.2397642755988368e-07, + "loss": 8.6937, + "step": 40620 + }, + { + "epoch": 14.7, + "learning_rate": 1.2366990555862106e-07, + "loss": 8.7133, + "step": 40640 + }, + { + "epoch": 14.71, + "learning_rate": 1.233636383789642e-07, + "loss": 8.6059, + "step": 40660 + }, + { + "epoch": 14.71, + "learning_rate": 1.2305762663868728e-07, + "loss": 8.5764, + "step": 40680 + }, + { + "epoch": 14.72, + "learning_rate": 1.2275187095504962e-07, + "loss": 8.8081, + "step": 40700 + }, + { + "epoch": 14.73, + "learning_rate": 1.2244637194479376e-07, + "loss": 8.6386, + "step": 40720 + }, + { + "epoch": 14.73, + "learning_rate": 1.2214113022414446e-07, + "loss": 8.762, + "step": 40740 + }, + { + "epoch": 14.74, + "learning_rate": 1.2183614640880783e-07, + "loss": 8.6669, + "step": 40760 + }, + { + "epoch": 14.75, + "learning_rate": 1.2153142111396937e-07, + "loss": 8.6712, + "step": 40780 + }, + { + "epoch": 14.76, + "learning_rate": 1.2122695495429347e-07, + "loss": 8.719, + "step": 40800 + }, + { + "epoch": 14.76, + "learning_rate": 1.2092274854392156e-07, + "loss": 8.7279, + "step": 40820 + }, + { + "epoch": 14.77, + "learning_rate": 1.2061880249647113e-07, + "loss": 8.6048, + "step": 40840 + }, + { + "epoch": 14.78, + "learning_rate": 1.2031511742503478e-07, + "loss": 8.7292, + "step": 40860 + }, + { + "epoch": 14.78, + "learning_rate": 1.2001169394217825e-07, + "loss": 8.6321, + "step": 40880 + }, + { + "epoch": 14.79, + "learning_rate": 1.1970853265994007e-07, + "loss": 8.6249, + "step": 40900 + }, + { + "epoch": 14.8, + "learning_rate": 1.1940563418982959e-07, + "loss": 8.5311, + "step": 40920 + }, + { + "epoch": 14.81, + "learning_rate": 1.1910299914282601e-07, + "loss": 8.8256, + "step": 40940 + }, + { + "epoch": 14.81, + "learning_rate": 1.1880062812937753e-07, + "loss": 8.9261, + "step": 40960 + }, + { + "epoch": 14.82, + "learning_rate": 1.1849852175939928e-07, + "loss": 8.6666, + "step": 40980 + }, + { + "epoch": 14.83, + "learning_rate": 1.1819668064227303e-07, + "loss": 8.6694, + "step": 41000 + }, + { + "epoch": 14.84, + "learning_rate": 1.1789510538684522e-07, + "loss": 8.7188, + "step": 41020 + }, + { + "epoch": 14.84, + "learning_rate": 1.1759379660142597e-07, + "loss": 8.6856, + "step": 41040 + }, + { + "epoch": 14.85, + "learning_rate": 1.1729275489378826e-07, + "loss": 8.5903, + "step": 41060 + }, + { + "epoch": 14.86, + "learning_rate": 1.1699198087116588e-07, + "loss": 8.6546, + "step": 41080 + }, + { + "epoch": 14.86, + "learning_rate": 1.166914751402531e-07, + "loss": 8.6771, + "step": 41100 + }, + { + "epoch": 14.87, + "learning_rate": 1.1639123830720265e-07, + "loss": 8.7304, + "step": 41120 + }, + { + "epoch": 14.88, + "learning_rate": 1.160912709776252e-07, + "loss": 8.707, + "step": 41140 + }, + { + "epoch": 14.89, + "learning_rate": 1.1579157375658755e-07, + "loss": 8.6712, + "step": 41160 + }, + { + "epoch": 14.89, + "learning_rate": 1.1549214724861168e-07, + "loss": 8.7817, + "step": 41180 + }, + { + "epoch": 14.9, + "learning_rate": 1.151929920576737e-07, + "loss": 8.7075, + "step": 41200 + }, + { + "epoch": 14.91, + "learning_rate": 1.1489410878720216e-07, + "loss": 8.7929, + "step": 41220 + }, + { + "epoch": 14.92, + "learning_rate": 1.1459549804007748e-07, + "loss": 8.6006, + "step": 41240 + }, + { + "epoch": 14.92, + "learning_rate": 1.1429716041863008e-07, + "loss": 8.7001, + "step": 41260 + }, + { + "epoch": 14.93, + "learning_rate": 1.1399909652463943e-07, + "loss": 8.6809, + "step": 41280 + }, + { + "epoch": 14.94, + "learning_rate": 1.1370130695933316e-07, + "loss": 8.6832, + "step": 41300 + }, + { + "epoch": 14.94, + "learning_rate": 1.134037923233852e-07, + "loss": 8.5958, + "step": 41320 + }, + { + "epoch": 14.95, + "learning_rate": 1.1310655321691525e-07, + "loss": 8.754, + "step": 41340 + }, + { + "epoch": 14.96, + "learning_rate": 1.1280959023948692e-07, + "loss": 8.7372, + "step": 41360 + }, + { + "epoch": 14.97, + "learning_rate": 1.1251290399010712e-07, + "loss": 8.6279, + "step": 41380 + }, + { + "epoch": 14.97, + "learning_rate": 1.122164950672243e-07, + "loss": 8.6381, + "step": 41400 + }, + { + "epoch": 14.98, + "learning_rate": 1.1192036406872781e-07, + "loss": 8.6657, + "step": 41420 + }, + { + "epoch": 14.99, + "learning_rate": 1.1162451159194614e-07, + "loss": 8.6235, + "step": 41440 + }, + { + "epoch": 14.99, + "learning_rate": 1.1132893823364594e-07, + "loss": 8.684, + "step": 41460 + }, + { + "epoch": 15.0, + "learning_rate": 1.1103364459003126e-07, + "loss": 8.651, + "step": 41480 + }, + { + "epoch": 15.01, + "learning_rate": 1.1073863125674135e-07, + "loss": 8.7139, + "step": 41500 + }, + { + "epoch": 15.02, + "learning_rate": 1.1044389882885058e-07, + "loss": 8.6635, + "step": 41520 + }, + { + "epoch": 15.02, + "learning_rate": 1.1014944790086631e-07, + "loss": 8.6469, + "step": 41540 + }, + { + "epoch": 15.03, + "learning_rate": 1.0985527906672834e-07, + "loss": 8.6706, + "step": 41560 + }, + { + "epoch": 15.04, + "learning_rate": 1.0956139291980726e-07, + "loss": 8.6693, + "step": 41580 + }, + { + "epoch": 15.05, + "learning_rate": 1.0926779005290365e-07, + "loss": 8.6948, + "step": 41600 + }, + { + "epoch": 15.05, + "learning_rate": 1.0897447105824645e-07, + "loss": 8.6314, + "step": 41620 + }, + { + "epoch": 15.06, + "learning_rate": 1.0868143652749228e-07, + "loss": 8.6982, + "step": 41640 + }, + { + "epoch": 15.07, + "learning_rate": 1.0838868705172377e-07, + "loss": 8.6151, + "step": 41660 + }, + { + "epoch": 15.07, + "learning_rate": 1.0809622322144843e-07, + "loss": 8.6785, + "step": 41680 + }, + { + "epoch": 15.08, + "learning_rate": 1.07804045626598e-07, + "loss": 8.8313, + "step": 41700 + }, + { + "epoch": 15.09, + "learning_rate": 1.0751215485652643e-07, + "loss": 8.7213, + "step": 41720 + }, + { + "epoch": 15.1, + "learning_rate": 1.0722055150000947e-07, + "loss": 8.6961, + "step": 41740 + }, + { + "epoch": 15.1, + "learning_rate": 1.0692923614524279e-07, + "loss": 8.73, + "step": 41760 + }, + { + "epoch": 15.11, + "learning_rate": 1.0663820937984147e-07, + "loss": 8.7056, + "step": 41780 + }, + { + "epoch": 15.12, + "learning_rate": 1.063474717908382e-07, + "loss": 8.6131, + "step": 41800 + }, + { + "epoch": 15.12, + "learning_rate": 1.0605702396468238e-07, + "loss": 8.6164, + "step": 41820 + }, + { + "epoch": 15.13, + "learning_rate": 1.0576686648723923e-07, + "loss": 8.6729, + "step": 41840 + }, + { + "epoch": 15.14, + "learning_rate": 1.0547699994378786e-07, + "loss": 8.7336, + "step": 41860 + }, + { + "epoch": 15.15, + "learning_rate": 1.0518742491902097e-07, + "loss": 8.7729, + "step": 41880 + }, + { + "epoch": 15.15, + "learning_rate": 1.0489814199704292e-07, + "loss": 8.7094, + "step": 41900 + }, + { + "epoch": 15.16, + "learning_rate": 1.0460915176136892e-07, + "loss": 8.7918, + "step": 41920 + }, + { + "epoch": 15.17, + "learning_rate": 1.0432045479492399e-07, + "loss": 8.6686, + "step": 41940 + }, + { + "epoch": 15.18, + "learning_rate": 1.0403205168004132e-07, + "loss": 8.5945, + "step": 41960 + }, + { + "epoch": 15.18, + "learning_rate": 1.0374394299846168e-07, + "loss": 8.6095, + "step": 41980 + }, + { + "epoch": 15.19, + "learning_rate": 1.0345612933133166e-07, + "loss": 8.687, + "step": 42000 + }, + { + "epoch": 15.2, + "learning_rate": 1.0316861125920281e-07, + "loss": 8.6749, + "step": 42020 + }, + { + "epoch": 15.2, + "learning_rate": 1.0288138936203067e-07, + "loss": 8.6988, + "step": 42040 + }, + { + "epoch": 15.21, + "learning_rate": 1.02594464219173e-07, + "loss": 8.645, + "step": 42060 + }, + { + "epoch": 15.22, + "learning_rate": 1.0230783640938936e-07, + "loss": 8.6744, + "step": 42080 + }, + { + "epoch": 15.23, + "learning_rate": 1.0202150651083929e-07, + "loss": 8.6319, + "step": 42100 + }, + { + "epoch": 15.23, + "learning_rate": 1.0173547510108136e-07, + "loss": 8.6984, + "step": 42120 + }, + { + "epoch": 15.24, + "learning_rate": 1.0144974275707241e-07, + "loss": 8.6916, + "step": 42140 + }, + { + "epoch": 15.25, + "learning_rate": 1.0116431005516557e-07, + "loss": 8.6657, + "step": 42160 + }, + { + "epoch": 15.25, + "learning_rate": 1.0087917757111e-07, + "loss": 8.6635, + "step": 42180 + }, + { + "epoch": 15.26, + "learning_rate": 1.0059434588004897e-07, + "loss": 8.759, + "step": 42200 + }, + { + "epoch": 15.27, + "learning_rate": 1.0030981555651908e-07, + "loss": 8.6307, + "step": 42220 + }, + { + "epoch": 15.28, + "learning_rate": 1.0002558717444922e-07, + "loss": 8.6877, + "step": 42240 + }, + { + "epoch": 15.28, + "learning_rate": 9.974166130715894e-08, + "loss": 8.6019, + "step": 42260 + }, + { + "epoch": 15.29, + "learning_rate": 9.945803852735793e-08, + "loss": 8.6728, + "step": 42280 + }, + { + "epoch": 15.3, + "learning_rate": 9.917471940714412e-08, + "loss": 8.7323, + "step": 42300 + }, + { + "epoch": 15.31, + "learning_rate": 9.889170451800332e-08, + "loss": 8.6797, + "step": 42320 + }, + { + "epoch": 15.31, + "learning_rate": 9.860899443080741e-08, + "loss": 8.6671, + "step": 42340 + }, + { + "epoch": 15.32, + "learning_rate": 9.832658971581346e-08, + "loss": 8.6717, + "step": 42360 + }, + { + "epoch": 15.33, + "learning_rate": 9.804449094266279e-08, + "loss": 8.6571, + "step": 42380 + }, + { + "epoch": 15.33, + "learning_rate": 9.776269868037928e-08, + "loss": 8.7102, + "step": 42400 + }, + { + "epoch": 15.34, + "learning_rate": 9.748121349736891e-08, + "loss": 8.6032, + "step": 42420 + }, + { + "epoch": 15.35, + "learning_rate": 9.720003596141796e-08, + "loss": 8.6771, + "step": 42440 + }, + { + "epoch": 15.36, + "learning_rate": 9.691916663969214e-08, + "loss": 8.6988, + "step": 42460 + }, + { + "epoch": 15.36, + "learning_rate": 9.663860609873575e-08, + "loss": 8.6573, + "step": 42480 + }, + { + "epoch": 15.37, + "learning_rate": 9.635835490446992e-08, + "loss": 8.7768, + "step": 42500 + }, + { + "epoch": 15.38, + "learning_rate": 9.607841362219207e-08, + "loss": 8.6382, + "step": 42520 + }, + { + "epoch": 15.39, + "learning_rate": 9.579878281657428e-08, + "loss": 8.6746, + "step": 42540 + }, + { + "epoch": 15.39, + "learning_rate": 9.551946305166233e-08, + "loss": 8.7247, + "step": 42560 + }, + { + "epoch": 15.4, + "learning_rate": 9.524045489087493e-08, + "loss": 8.6537, + "step": 42580 + }, + { + "epoch": 15.41, + "learning_rate": 9.496175889700184e-08, + "loss": 8.69, + "step": 42600 + }, + { + "epoch": 15.41, + "learning_rate": 9.46833756322035e-08, + "loss": 8.6793, + "step": 42620 + }, + { + "epoch": 15.42, + "learning_rate": 9.440530565800927e-08, + "loss": 8.6738, + "step": 42640 + }, + { + "epoch": 15.43, + "learning_rate": 9.412754953531663e-08, + "loss": 8.6723, + "step": 42660 + }, + { + "epoch": 15.44, + "learning_rate": 9.385010782439018e-08, + "loss": 8.6072, + "step": 42680 + }, + { + "epoch": 15.44, + "learning_rate": 9.357298108486003e-08, + "loss": 8.6054, + "step": 42700 + }, + { + "epoch": 15.45, + "learning_rate": 9.329616987572122e-08, + "loss": 8.6386, + "step": 42720 + }, + { + "epoch": 15.46, + "learning_rate": 9.301967475533215e-08, + "loss": 8.7196, + "step": 42740 + }, + { + "epoch": 15.46, + "learning_rate": 9.274349628141359e-08, + "loss": 8.7016, + "step": 42760 + }, + { + "epoch": 15.47, + "learning_rate": 9.246763501104793e-08, + "loss": 8.657, + "step": 42780 + }, + { + "epoch": 15.48, + "learning_rate": 9.219209150067725e-08, + "loss": 8.8457, + "step": 42800 + }, + { + "epoch": 15.49, + "learning_rate": 9.191686630610313e-08, + "loss": 8.591, + "step": 42820 + }, + { + "epoch": 15.49, + "learning_rate": 9.164195998248469e-08, + "loss": 8.5781, + "step": 42840 + }, + { + "epoch": 15.5, + "learning_rate": 9.13673730843382e-08, + "loss": 8.5978, + "step": 42860 + }, + { + "epoch": 15.51, + "learning_rate": 9.109310616553534e-08, + "loss": 8.64, + "step": 42880 + }, + { + "epoch": 15.52, + "learning_rate": 9.08191597793024e-08, + "loss": 8.6924, + "step": 42900 + }, + { + "epoch": 15.52, + "learning_rate": 9.05455344782193e-08, + "loss": 8.5971, + "step": 42920 + }, + { + "epoch": 15.53, + "learning_rate": 9.027223081421806e-08, + "loss": 8.6123, + "step": 42940 + }, + { + "epoch": 15.54, + "learning_rate": 8.999924933858219e-08, + "loss": 8.5796, + "step": 42960 + }, + { + "epoch": 15.54, + "learning_rate": 8.972659060194504e-08, + "loss": 8.6404, + "step": 42980 + }, + { + "epoch": 15.55, + "learning_rate": 8.945425515428904e-08, + "loss": 8.7294, + "step": 43000 + }, + { + "epoch": 15.56, + "learning_rate": 8.918224354494466e-08, + "loss": 8.7276, + "step": 43020 + }, + { + "epoch": 15.57, + "learning_rate": 8.891055632258892e-08, + "loss": 8.6615, + "step": 43040 + }, + { + "epoch": 15.57, + "learning_rate": 8.863919403524478e-08, + "loss": 8.6805, + "step": 43060 + }, + { + "epoch": 15.58, + "learning_rate": 8.836815723027957e-08, + "loss": 8.6901, + "step": 43080 + }, + { + "epoch": 15.59, + "learning_rate": 8.809744645440403e-08, + "loss": 8.6524, + "step": 43100 + }, + { + "epoch": 15.59, + "learning_rate": 8.78270622536716e-08, + "loss": 8.9045, + "step": 43120 + }, + { + "epoch": 15.6, + "learning_rate": 8.75570051734765e-08, + "loss": 8.7131, + "step": 43140 + }, + { + "epoch": 15.61, + "learning_rate": 8.728727575855363e-08, + "loss": 8.7249, + "step": 43160 + }, + { + "epoch": 15.62, + "learning_rate": 8.701787455297646e-08, + "loss": 8.6845, + "step": 43180 + }, + { + "epoch": 15.62, + "learning_rate": 8.67488021001569e-08, + "loss": 8.6726, + "step": 43200 + }, + { + "epoch": 15.63, + "learning_rate": 8.648005894284324e-08, + "loss": 8.7113, + "step": 43220 + }, + { + "epoch": 15.64, + "learning_rate": 8.621164562312003e-08, + "loss": 8.6752, + "step": 43240 + }, + { + "epoch": 15.65, + "learning_rate": 8.594356268240616e-08, + "loss": 8.6732, + "step": 43260 + }, + { + "epoch": 15.65, + "learning_rate": 8.567581066145413e-08, + "loss": 8.686, + "step": 43280 + }, + { + "epoch": 15.66, + "learning_rate": 8.54083901003492e-08, + "loss": 8.7233, + "step": 43300 + }, + { + "epoch": 15.67, + "learning_rate": 8.514130153850768e-08, + "loss": 8.7432, + "step": 43320 + }, + { + "epoch": 15.67, + "learning_rate": 8.487454551467657e-08, + "loss": 8.7446, + "step": 43340 + }, + { + "epoch": 15.68, + "learning_rate": 8.460812256693178e-08, + "loss": 8.9778, + "step": 43360 + }, + { + "epoch": 15.69, + "learning_rate": 8.434203323267764e-08, + "loss": 8.7359, + "step": 43380 + }, + { + "epoch": 15.7, + "learning_rate": 8.407627804864523e-08, + "loss": 8.7208, + "step": 43400 + }, + { + "epoch": 15.7, + "learning_rate": 8.381085755089201e-08, + "loss": 8.5855, + "step": 43420 + }, + { + "epoch": 15.71, + "learning_rate": 8.354577227479995e-08, + "loss": 8.6439, + "step": 43440 + }, + { + "epoch": 15.72, + "learning_rate": 8.328102275507518e-08, + "loss": 8.7329, + "step": 43460 + }, + { + "epoch": 15.73, + "learning_rate": 8.301660952574633e-08, + "loss": 8.6073, + "step": 43480 + }, + { + "epoch": 15.73, + "learning_rate": 8.27525331201637e-08, + "loss": 8.7215, + "step": 43500 + }, + { + "epoch": 15.74, + "learning_rate": 8.24887940709984e-08, + "loss": 8.6948, + "step": 43520 + }, + { + "epoch": 15.75, + "learning_rate": 8.222539291024077e-08, + "loss": 8.7347, + "step": 43540 + }, + { + "epoch": 15.75, + "learning_rate": 8.19623301691999e-08, + "loss": 8.6756, + "step": 43560 + }, + { + "epoch": 15.76, + "learning_rate": 8.169960637850192e-08, + "loss": 8.6516, + "step": 43580 + }, + { + "epoch": 15.77, + "learning_rate": 8.143722206808959e-08, + "loss": 8.6615, + "step": 43600 + }, + { + "epoch": 15.78, + "learning_rate": 8.117517776722066e-08, + "loss": 8.6372, + "step": 43620 + }, + { + "epoch": 15.78, + "learning_rate": 8.091347400446702e-08, + "loss": 8.6507, + "step": 43640 + }, + { + "epoch": 15.79, + "learning_rate": 8.065211130771393e-08, + "loss": 8.7741, + "step": 43660 + }, + { + "epoch": 15.8, + "learning_rate": 8.039109020415838e-08, + "loss": 8.5572, + "step": 43680 + }, + { + "epoch": 15.8, + "learning_rate": 8.013041122030856e-08, + "loss": 8.6496, + "step": 43700 + }, + { + "epoch": 15.81, + "learning_rate": 7.987007488198244e-08, + "loss": 8.6822, + "step": 43720 + }, + { + "epoch": 15.82, + "learning_rate": 7.961008171430677e-08, + "loss": 8.723, + "step": 43740 + }, + { + "epoch": 15.83, + "learning_rate": 7.935043224171631e-08, + "loss": 8.6827, + "step": 43760 + }, + { + "epoch": 15.83, + "learning_rate": 7.909112698795231e-08, + "loss": 8.6871, + "step": 43780 + }, + { + "epoch": 15.84, + "learning_rate": 7.883216647606192e-08, + "loss": 8.6921, + "step": 43800 + }, + { + "epoch": 15.85, + "learning_rate": 7.857355122839673e-08, + "loss": 8.7701, + "step": 43820 + }, + { + "epoch": 15.86, + "learning_rate": 7.831528176661189e-08, + "loss": 8.5903, + "step": 43840 + }, + { + "epoch": 15.86, + "learning_rate": 7.805735861166527e-08, + "loss": 8.6362, + "step": 43860 + }, + { + "epoch": 15.87, + "learning_rate": 7.77997822838159e-08, + "loss": 8.6959, + "step": 43880 + }, + { + "epoch": 15.88, + "learning_rate": 7.754255330262353e-08, + "loss": 8.6671, + "step": 43900 + }, + { + "epoch": 15.88, + "learning_rate": 7.728567218694706e-08, + "loss": 8.7909, + "step": 43920 + }, + { + "epoch": 15.89, + "learning_rate": 7.702913945494368e-08, + "loss": 8.6083, + "step": 43940 + }, + { + "epoch": 15.9, + "learning_rate": 7.677295562406812e-08, + "loss": 8.639, + "step": 43960 + }, + { + "epoch": 15.91, + "learning_rate": 7.651712121107101e-08, + "loss": 8.8751, + "step": 43980 + }, + { + "epoch": 15.91, + "learning_rate": 7.626163673199848e-08, + "loss": 8.6571, + "step": 44000 + }, + { + "epoch": 15.92, + "learning_rate": 7.600650270219044e-08, + "loss": 8.8169, + "step": 44020 + }, + { + "epoch": 15.93, + "learning_rate": 7.57517196362803e-08, + "loss": 8.6811, + "step": 44040 + }, + { + "epoch": 15.93, + "learning_rate": 7.549728804819325e-08, + "loss": 8.6199, + "step": 44060 + }, + { + "epoch": 15.94, + "learning_rate": 7.524320845114557e-08, + "loss": 8.6188, + "step": 44080 + }, + { + "epoch": 15.95, + "learning_rate": 7.498948135764368e-08, + "loss": 8.6039, + "step": 44100 + }, + { + "epoch": 15.96, + "learning_rate": 7.473610727948271e-08, + "loss": 8.6675, + "step": 44120 + }, + { + "epoch": 15.96, + "learning_rate": 7.448308672774605e-08, + "loss": 8.6623, + "step": 44140 + }, + { + "epoch": 15.97, + "learning_rate": 7.423042021280369e-08, + "loss": 8.8081, + "step": 44160 + }, + { + "epoch": 15.98, + "learning_rate": 7.397810824431155e-08, + "loss": 8.6835, + "step": 44180 + }, + { + "epoch": 15.99, + "learning_rate": 7.372615133121057e-08, + "loss": 8.6718, + "step": 44200 + }, + { + "epoch": 15.99, + "learning_rate": 7.347454998172522e-08, + "loss": 8.6558, + "step": 44220 + }, + { + "epoch": 16.0, + "learning_rate": 7.322330470336313e-08, + "loss": 8.6661, + "step": 44240 + }, + { + "epoch": 16.01, + "learning_rate": 7.297241600291334e-08, + "loss": 8.6431, + "step": 44260 + }, + { + "epoch": 16.01, + "learning_rate": 7.272188438644574e-08, + "loss": 8.6932, + "step": 44280 + }, + { + "epoch": 16.02, + "learning_rate": 7.24717103593101e-08, + "loss": 8.6602, + "step": 44300 + }, + { + "epoch": 16.03, + "learning_rate": 7.222189442613464e-08, + "loss": 8.6308, + "step": 44320 + }, + { + "epoch": 16.04, + "learning_rate": 7.197243709082554e-08, + "loss": 8.6254, + "step": 44340 + }, + { + "epoch": 16.04, + "learning_rate": 7.172333885656542e-08, + "loss": 8.6904, + "step": 44360 + }, + { + "epoch": 16.05, + "learning_rate": 7.147460022581255e-08, + "loss": 8.6725, + "step": 44380 + }, + { + "epoch": 16.06, + "learning_rate": 7.122622170030016e-08, + "loss": 8.763, + "step": 44400 + }, + { + "epoch": 16.07, + "learning_rate": 7.097820378103464e-08, + "loss": 8.6677, + "step": 44420 + }, + { + "epoch": 16.07, + "learning_rate": 7.073054696829545e-08, + "loss": 8.6222, + "step": 44440 + }, + { + "epoch": 16.08, + "learning_rate": 7.04832517616333e-08, + "loss": 8.7767, + "step": 44460 + }, + { + "epoch": 16.09, + "learning_rate": 7.023631865986965e-08, + "loss": 8.7476, + "step": 44480 + }, + { + "epoch": 16.09, + "learning_rate": 6.998974816109565e-08, + "loss": 8.7494, + "step": 44500 + }, + { + "epoch": 16.1, + "learning_rate": 6.97435407626708e-08, + "loss": 8.7064, + "step": 44520 + }, + { + "epoch": 16.11, + "learning_rate": 6.949769696122249e-08, + "loss": 8.7511, + "step": 44540 + }, + { + "epoch": 16.12, + "learning_rate": 6.925221725264436e-08, + "loss": 8.6708, + "step": 44560 + }, + { + "epoch": 16.12, + "learning_rate": 6.900710213209596e-08, + "loss": 8.6829, + "step": 44580 + }, + { + "epoch": 16.13, + "learning_rate": 6.876235209400123e-08, + "loss": 8.5974, + "step": 44600 + }, + { + "epoch": 16.14, + "learning_rate": 6.851796763204765e-08, + "loss": 8.612, + "step": 44620 + }, + { + "epoch": 16.14, + "learning_rate": 6.827394923918553e-08, + "loss": 8.5953, + "step": 44640 + }, + { + "epoch": 16.15, + "learning_rate": 6.803029740762648e-08, + "loss": 8.6415, + "step": 44660 + }, + { + "epoch": 16.16, + "learning_rate": 6.778701262884304e-08, + "loss": 8.6311, + "step": 44680 + }, + { + "epoch": 16.17, + "learning_rate": 6.75440953935671e-08, + "loss": 8.708, + "step": 44700 + }, + { + "epoch": 16.17, + "learning_rate": 6.730154619178918e-08, + "loss": 8.6652, + "step": 44720 + }, + { + "epoch": 16.18, + "learning_rate": 6.705936551275773e-08, + "loss": 8.6269, + "step": 44740 + }, + { + "epoch": 16.19, + "learning_rate": 6.681755384497748e-08, + "loss": 8.7657, + "step": 44760 + }, + { + "epoch": 16.2, + "learning_rate": 6.657611167620908e-08, + "loss": 8.709, + "step": 44780 + }, + { + "epoch": 16.2, + "learning_rate": 6.633503949346775e-08, + "loss": 8.6648, + "step": 44800 + }, + { + "epoch": 16.21, + "learning_rate": 6.609433778302234e-08, + "loss": 8.6801, + "step": 44820 + }, + { + "epoch": 16.22, + "learning_rate": 6.585400703039465e-08, + "loss": 8.7038, + "step": 44840 + }, + { + "epoch": 16.22, + "learning_rate": 6.561404772035792e-08, + "loss": 8.7411, + "step": 44860 + }, + { + "epoch": 16.23, + "learning_rate": 6.537446033693645e-08, + "loss": 8.6095, + "step": 44880 + }, + { + "epoch": 16.24, + "learning_rate": 6.513524536340412e-08, + "loss": 8.7588, + "step": 44900 + }, + { + "epoch": 16.25, + "learning_rate": 6.489640328228354e-08, + "loss": 8.6095, + "step": 44920 + }, + { + "epoch": 16.25, + "learning_rate": 6.465793457534552e-08, + "loss": 8.6027, + "step": 44940 + }, + { + "epoch": 16.26, + "learning_rate": 6.441983972360729e-08, + "loss": 8.5969, + "step": 44960 + }, + { + "epoch": 16.27, + "learning_rate": 6.418211920733235e-08, + "loss": 8.6493, + "step": 44980 + }, + { + "epoch": 16.27, + "learning_rate": 6.39447735060288e-08, + "loss": 8.6815, + "step": 45000 + }, + { + "epoch": 16.28, + "learning_rate": 6.370780309844906e-08, + "loss": 8.6133, + "step": 45020 + }, + { + "epoch": 16.29, + "learning_rate": 6.347120846258818e-08, + "loss": 8.743, + "step": 45040 + }, + { + "epoch": 16.3, + "learning_rate": 6.323499007568336e-08, + "loss": 8.6719, + "step": 45060 + }, + { + "epoch": 16.3, + "learning_rate": 6.299914841421309e-08, + "loss": 8.7164, + "step": 45080 + }, + { + "epoch": 16.31, + "learning_rate": 6.276368395389556e-08, + "loss": 8.5924, + "step": 45100 + }, + { + "epoch": 16.32, + "learning_rate": 6.25285971696885e-08, + "loss": 8.7207, + "step": 45120 + }, + { + "epoch": 16.33, + "learning_rate": 6.229388853578748e-08, + "loss": 8.691, + "step": 45140 + }, + { + "epoch": 16.33, + "learning_rate": 6.20595585256256e-08, + "loss": 8.6939, + "step": 45160 + }, + { + "epoch": 16.34, + "learning_rate": 6.1825607611872e-08, + "loss": 8.7609, + "step": 45180 + }, + { + "epoch": 16.35, + "learning_rate": 6.159203626643137e-08, + "loss": 8.6598, + "step": 45200 + }, + { + "epoch": 16.35, + "learning_rate": 6.135884496044244e-08, + "loss": 8.6806, + "step": 45220 + }, + { + "epoch": 16.36, + "learning_rate": 6.112603416427776e-08, + "loss": 8.6477, + "step": 45240 + }, + { + "epoch": 16.37, + "learning_rate": 6.089360434754203e-08, + "loss": 8.7188, + "step": 45260 + }, + { + "epoch": 16.38, + "learning_rate": 6.066155597907157e-08, + "loss": 8.6378, + "step": 45280 + }, + { + "epoch": 16.38, + "learning_rate": 6.04298895269334e-08, + "loss": 8.6899, + "step": 45300 + }, + { + "epoch": 16.39, + "learning_rate": 6.019860545842392e-08, + "loss": 8.6343, + "step": 45320 + }, + { + "epoch": 16.4, + "learning_rate": 5.996770424006856e-08, + "loss": 8.741, + "step": 45340 + }, + { + "epoch": 16.41, + "learning_rate": 5.973718633762015e-08, + "loss": 8.6815, + "step": 45360 + }, + { + "epoch": 16.41, + "learning_rate": 5.950705221605859e-08, + "loss": 8.6599, + "step": 45380 + }, + { + "epoch": 16.42, + "learning_rate": 5.927730233958947e-08, + "loss": 8.692, + "step": 45400 + }, + { + "epoch": 16.43, + "learning_rate": 5.9047937171643494e-08, + "loss": 8.7429, + "step": 45420 + }, + { + "epoch": 16.43, + "learning_rate": 5.881895717487523e-08, + "loss": 8.7445, + "step": 45440 + }, + { + "epoch": 16.44, + "learning_rate": 5.8590362811162254e-08, + "loss": 8.6593, + "step": 45460 + }, + { + "epoch": 16.45, + "learning_rate": 5.836215454160453e-08, + "loss": 8.8061, + "step": 45480 + }, + { + "epoch": 16.46, + "learning_rate": 5.813433282652297e-08, + "loss": 8.6217, + "step": 45500 + }, + { + "epoch": 16.46, + "learning_rate": 5.7906898125458984e-08, + "loss": 8.7685, + "step": 45520 + }, + { + "epoch": 16.47, + "learning_rate": 5.767985089717312e-08, + "loss": 8.7338, + "step": 45540 + }, + { + "epoch": 16.48, + "learning_rate": 5.7453191599644405e-08, + "loss": 8.6323, + "step": 45560 + }, + { + "epoch": 16.48, + "learning_rate": 5.722692069006957e-08, + "loss": 8.5866, + "step": 45580 + }, + { + "epoch": 16.49, + "learning_rate": 5.700103862486158e-08, + "loss": 8.7144, + "step": 45600 + }, + { + "epoch": 16.5, + "learning_rate": 5.6775545859649446e-08, + "loss": 8.6515, + "step": 45620 + }, + { + "epoch": 16.51, + "learning_rate": 5.655044284927657e-08, + "loss": 8.7815, + "step": 45640 + }, + { + "epoch": 16.51, + "learning_rate": 5.632573004780031e-08, + "loss": 8.6392, + "step": 45660 + }, + { + "epoch": 16.52, + "learning_rate": 5.610140790849108e-08, + "loss": 8.6613, + "step": 45680 + }, + { + "epoch": 16.53, + "learning_rate": 5.587747688383099e-08, + "loss": 8.7389, + "step": 45700 + }, + { + "epoch": 16.54, + "learning_rate": 5.5653937425513526e-08, + "loss": 8.6548, + "step": 45720 + }, + { + "epoch": 16.54, + "learning_rate": 5.5430789984442064e-08, + "loss": 8.661, + "step": 45740 + }, + { + "epoch": 16.55, + "learning_rate": 5.520803501072954e-08, + "loss": 8.7147, + "step": 45760 + }, + { + "epoch": 16.56, + "learning_rate": 5.4985672953696995e-08, + "loss": 8.7411, + "step": 45780 + }, + { + "epoch": 16.56, + "learning_rate": 5.4763704261872906e-08, + "loss": 8.6135, + "step": 45800 + }, + { + "epoch": 16.57, + "learning_rate": 5.454212938299255e-08, + "loss": 8.7409, + "step": 45820 + }, + { + "epoch": 16.58, + "learning_rate": 5.432094876399654e-08, + "loss": 8.6923, + "step": 45840 + }, + { + "epoch": 16.59, + "learning_rate": 5.4100162851030447e-08, + "loss": 8.5929, + "step": 45860 + }, + { + "epoch": 16.59, + "learning_rate": 5.387977208944355e-08, + "loss": 8.644, + "step": 45880 + }, + { + "epoch": 16.6, + "learning_rate": 5.3659776923788036e-08, + "loss": 8.648, + "step": 45900 + }, + { + "epoch": 16.61, + "learning_rate": 5.344017779781834e-08, + "loss": 8.594, + "step": 45920 + }, + { + "epoch": 16.61, + "learning_rate": 5.32209751544897e-08, + "loss": 8.7064, + "step": 45940 + }, + { + "epoch": 16.62, + "learning_rate": 5.3002169435958e-08, + "loss": 8.7087, + "step": 45960 + }, + { + "epoch": 16.63, + "learning_rate": 5.278376108357818e-08, + "loss": 8.8258, + "step": 45980 + }, + { + "epoch": 16.64, + "learning_rate": 5.2565750537903716e-08, + "loss": 8.758, + "step": 46000 + }, + { + "epoch": 16.64, + "learning_rate": 5.2348138238685835e-08, + "loss": 8.6735, + "step": 46020 + }, + { + "epoch": 16.65, + "learning_rate": 5.213092462487215e-08, + "loss": 8.6791, + "step": 46040 + }, + { + "epoch": 16.66, + "learning_rate": 5.1914110134606445e-08, + "loss": 8.6668, + "step": 46060 + }, + { + "epoch": 16.67, + "learning_rate": 5.1697695205227126e-08, + "loss": 8.6343, + "step": 46080 + }, + { + "epoch": 16.67, + "learning_rate": 5.1481680273266713e-08, + "loss": 8.6874, + "step": 46100 + }, + { + "epoch": 16.68, + "learning_rate": 5.1266065774451086e-08, + "loss": 8.6916, + "step": 46120 + }, + { + "epoch": 16.69, + "learning_rate": 5.105085214369806e-08, + "loss": 8.7685, + "step": 46140 + }, + { + "epoch": 16.69, + "learning_rate": 5.0836039815117224e-08, + "loss": 8.7846, + "step": 46160 + }, + { + "epoch": 16.7, + "learning_rate": 5.062162922200844e-08, + "loss": 8.7066, + "step": 46180 + }, + { + "epoch": 16.71, + "learning_rate": 5.040762079686123e-08, + "loss": 8.6493, + "step": 46200 + }, + { + "epoch": 16.72, + "learning_rate": 5.019401497135414e-08, + "loss": 8.6473, + "step": 46220 + }, + { + "epoch": 16.72, + "learning_rate": 4.9980812176353274e-08, + "loss": 8.6748, + "step": 46240 + }, + { + "epoch": 16.73, + "learning_rate": 4.9768012841912147e-08, + "loss": 8.7513, + "step": 46260 + }, + { + "epoch": 16.74, + "learning_rate": 4.955561739727013e-08, + "loss": 8.7193, + "step": 46280 + }, + { + "epoch": 16.75, + "learning_rate": 4.9343626270852174e-08, + "loss": 8.5842, + "step": 46300 + }, + { + "epoch": 16.75, + "learning_rate": 4.9132039890267456e-08, + "loss": 8.6781, + "step": 46320 + }, + { + "epoch": 16.76, + "learning_rate": 4.89208586823088e-08, + "loss": 8.7505, + "step": 46340 + }, + { + "epoch": 16.77, + "learning_rate": 4.8710083072951904e-08, + "loss": 8.839, + "step": 46360 + }, + { + "epoch": 16.77, + "learning_rate": 4.849971348735405e-08, + "loss": 8.6246, + "step": 46380 + }, + { + "epoch": 16.78, + "learning_rate": 4.8289750349853834e-08, + "loss": 8.6899, + "step": 46400 + }, + { + "epoch": 16.79, + "learning_rate": 4.8080194083969764e-08, + "loss": 8.7211, + "step": 46420 + }, + { + "epoch": 16.8, + "learning_rate": 4.7871045112399674e-08, + "loss": 8.7762, + "step": 46440 + }, + { + "epoch": 16.8, + "learning_rate": 4.7662303857020056e-08, + "loss": 8.7086, + "step": 46460 + }, + { + "epoch": 16.81, + "learning_rate": 4.745397073888463e-08, + "loss": 8.6422, + "step": 46480 + }, + { + "epoch": 16.82, + "learning_rate": 4.724604617822428e-08, + "loss": 8.6367, + "step": 46500 + }, + { + "epoch": 16.82, + "learning_rate": 4.703853059444543e-08, + "loss": 8.6699, + "step": 46520 + }, + { + "epoch": 16.83, + "learning_rate": 4.683142440612967e-08, + "loss": 8.7553, + "step": 46540 + }, + { + "epoch": 16.84, + "learning_rate": 4.66247280310329e-08, + "loss": 8.6603, + "step": 46560 + }, + { + "epoch": 16.85, + "learning_rate": 4.641844188608421e-08, + "loss": 8.7499, + "step": 46580 + }, + { + "epoch": 16.85, + "learning_rate": 4.621256638738541e-08, + "loss": 8.6107, + "step": 46600 + }, + { + "epoch": 16.86, + "learning_rate": 4.600710195020982e-08, + "loss": 8.5863, + "step": 46620 + }, + { + "epoch": 16.87, + "learning_rate": 4.5802048989001556e-08, + "loss": 8.8368, + "step": 46640 + }, + { + "epoch": 16.88, + "learning_rate": 4.559740791737504e-08, + "loss": 8.7823, + "step": 46660 + }, + { + "epoch": 16.88, + "learning_rate": 4.539317914811353e-08, + "loss": 8.5468, + "step": 46680 + }, + { + "epoch": 16.89, + "learning_rate": 4.518936309316887e-08, + "loss": 8.5938, + "step": 46700 + }, + { + "epoch": 16.9, + "learning_rate": 4.498596016366027e-08, + "loss": 8.6411, + "step": 46720 + }, + { + "epoch": 16.9, + "learning_rate": 4.4782970769873614e-08, + "loss": 8.6245, + "step": 46740 + }, + { + "epoch": 16.91, + "learning_rate": 4.458039532126082e-08, + "loss": 8.5955, + "step": 46760 + }, + { + "epoch": 16.92, + "learning_rate": 4.4378234226438546e-08, + "loss": 8.665, + "step": 46780 + }, + { + "epoch": 16.93, + "learning_rate": 4.4176487893187956e-08, + "loss": 8.7599, + "step": 46800 + }, + { + "epoch": 16.93, + "learning_rate": 4.3975156728453336e-08, + "loss": 8.6156, + "step": 46820 + }, + { + "epoch": 16.94, + "learning_rate": 4.3774241138341775e-08, + "loss": 8.6159, + "step": 46840 + }, + { + "epoch": 16.95, + "learning_rate": 4.35737415281219e-08, + "loss": 8.6734, + "step": 46860 + }, + { + "epoch": 16.95, + "learning_rate": 4.3373658302223253e-08, + "loss": 8.652, + "step": 46880 + }, + { + "epoch": 16.96, + "learning_rate": 4.317399186423573e-08, + "loss": 8.6976, + "step": 46900 + }, + { + "epoch": 16.97, + "learning_rate": 4.29747426169082e-08, + "loss": 8.6995, + "step": 46920 + }, + { + "epoch": 16.98, + "learning_rate": 4.2775910962148334e-08, + "loss": 8.6838, + "step": 46940 + }, + { + "epoch": 16.98, + "learning_rate": 4.257749730102112e-08, + "loss": 8.7379, + "step": 46960 + }, + { + "epoch": 16.99, + "learning_rate": 4.2379502033748764e-08, + "loss": 8.6209, + "step": 46980 + }, + { + "epoch": 17.0, + "learning_rate": 4.218192555970923e-08, + "loss": 8.7024, + "step": 47000 + }, + { + "epoch": 17.01, + "learning_rate": 4.198476827743597e-08, + "loss": 8.6773, + "step": 47020 + }, + { + "epoch": 17.01, + "learning_rate": 4.1788030584616634e-08, + "loss": 8.6747, + "step": 47040 + }, + { + "epoch": 17.02, + "learning_rate": 4.159171287809279e-08, + "loss": 8.8099, + "step": 47060 + }, + { + "epoch": 17.03, + "learning_rate": 4.1395815553858624e-08, + "loss": 8.7174, + "step": 47080 + }, + { + "epoch": 17.03, + "learning_rate": 4.120033900706041e-08, + "loss": 8.6592, + "step": 47100 + }, + { + "epoch": 17.04, + "learning_rate": 4.1005283631995816e-08, + "loss": 8.6115, + "step": 47120 + }, + { + "epoch": 17.05, + "learning_rate": 4.081064982211274e-08, + "loss": 8.8176, + "step": 47140 + }, + { + "epoch": 17.06, + "learning_rate": 4.061643797000894e-08, + "loss": 8.7223, + "step": 47160 + }, + { + "epoch": 17.06, + "learning_rate": 4.042264846743085e-08, + "loss": 8.6302, + "step": 47180 + }, + { + "epoch": 17.07, + "learning_rate": 4.022928170527315e-08, + "loss": 8.6813, + "step": 47200 + }, + { + "epoch": 17.08, + "learning_rate": 4.003633807357767e-08, + "loss": 8.6455, + "step": 47220 + }, + { + "epoch": 17.08, + "learning_rate": 3.984381796153288e-08, + "loss": 8.5833, + "step": 47240 + }, + { + "epoch": 17.09, + "learning_rate": 3.9651721757472835e-08, + "loss": 8.6188, + "step": 47260 + }, + { + "epoch": 17.1, + "learning_rate": 3.946004984887652e-08, + "loss": 8.6727, + "step": 47280 + }, + { + "epoch": 17.11, + "learning_rate": 3.9268802622367234e-08, + "loss": 8.6855, + "step": 47300 + }, + { + "epoch": 17.11, + "learning_rate": 3.9077980463711384e-08, + "loss": 8.81, + "step": 47320 + }, + { + "epoch": 17.12, + "learning_rate": 3.888758375781828e-08, + "loss": 8.761, + "step": 47340 + }, + { + "epoch": 17.13, + "learning_rate": 3.869761288873882e-08, + "loss": 8.6576, + "step": 47360 + }, + { + "epoch": 17.14, + "learning_rate": 3.850806823966491e-08, + "loss": 8.6554, + "step": 47380 + }, + { + "epoch": 17.14, + "learning_rate": 3.831895019292897e-08, + "loss": 8.6861, + "step": 47400 + }, + { + "epoch": 17.15, + "learning_rate": 3.813025913000265e-08, + "loss": 8.7043, + "step": 47420 + }, + { + "epoch": 17.16, + "learning_rate": 3.7941995431496535e-08, + "loss": 8.7619, + "step": 47440 + }, + { + "epoch": 17.16, + "learning_rate": 3.775415947715899e-08, + "loss": 8.726, + "step": 47460 + }, + { + "epoch": 17.17, + "learning_rate": 3.7566751645875776e-08, + "loss": 8.6323, + "step": 47480 + }, + { + "epoch": 17.18, + "learning_rate": 3.7379772315668885e-08, + "loss": 8.7214, + "step": 47500 + }, + { + "epoch": 17.19, + "learning_rate": 3.7193221863696026e-08, + "loss": 8.7395, + "step": 47520 + }, + { + "epoch": 17.19, + "learning_rate": 3.700710066624993e-08, + "loss": 8.6951, + "step": 47540 + }, + { + "epoch": 17.2, + "learning_rate": 3.6821409098757336e-08, + "loss": 8.6347, + "step": 47560 + }, + { + "epoch": 17.21, + "learning_rate": 3.6636147535778483e-08, + "loss": 8.6968, + "step": 47580 + }, + { + "epoch": 17.22, + "learning_rate": 3.6451316351006153e-08, + "loss": 8.6192, + "step": 47600 + }, + { + "epoch": 17.22, + "learning_rate": 3.6266915917264994e-08, + "loss": 8.6344, + "step": 47620 + }, + { + "epoch": 17.23, + "learning_rate": 3.6082946606510956e-08, + "loss": 8.8091, + "step": 47640 + }, + { + "epoch": 17.24, + "learning_rate": 3.589940878983008e-08, + "loss": 8.802, + "step": 47660 + }, + { + "epoch": 17.24, + "learning_rate": 3.571630283743837e-08, + "loss": 8.6752, + "step": 47680 + }, + { + "epoch": 17.25, + "learning_rate": 3.5533629118680436e-08, + "loss": 8.7103, + "step": 47700 + }, + { + "epoch": 17.26, + "learning_rate": 3.535138800202911e-08, + "loss": 8.7011, + "step": 47720 + }, + { + "epoch": 17.27, + "learning_rate": 3.5169579855084755e-08, + "loss": 8.7138, + "step": 47740 + }, + { + "epoch": 17.27, + "learning_rate": 3.498820504457414e-08, + "loss": 8.6862, + "step": 47760 + }, + { + "epoch": 17.28, + "learning_rate": 3.4807263936350166e-08, + "loss": 8.6514, + "step": 47780 + }, + { + "epoch": 17.29, + "learning_rate": 3.4626756895390824e-08, + "loss": 8.6566, + "step": 47800 + }, + { + "epoch": 17.29, + "learning_rate": 3.444668428579844e-08, + "loss": 8.6684, + "step": 47820 + }, + { + "epoch": 17.3, + "learning_rate": 3.426704647079928e-08, + "loss": 8.6983, + "step": 47840 + }, + { + "epoch": 17.31, + "learning_rate": 3.4087843812742354e-08, + "loss": 8.6276, + "step": 47860 + }, + { + "epoch": 17.32, + "learning_rate": 3.390907667309906e-08, + "loss": 8.6916, + "step": 47880 + }, + { + "epoch": 17.32, + "learning_rate": 3.373074541246224e-08, + "loss": 8.6749, + "step": 47900 + }, + { + "epoch": 17.33, + "learning_rate": 3.355285039054545e-08, + "loss": 8.6663, + "step": 47920 + }, + { + "epoch": 17.34, + "learning_rate": 3.3375391966182526e-08, + "loss": 8.626, + "step": 47940 + }, + { + "epoch": 17.35, + "learning_rate": 3.3198370497326405e-08, + "loss": 8.7064, + "step": 47960 + }, + { + "epoch": 17.35, + "learning_rate": 3.3021786341048824e-08, + "loss": 8.6254, + "step": 47980 + }, + { + "epoch": 17.36, + "learning_rate": 3.284563985353925e-08, + "loss": 8.715, + "step": 48000 + }, + { + "epoch": 17.37, + "learning_rate": 3.2669931390104374e-08, + "loss": 8.7273, + "step": 48020 + }, + { + "epoch": 17.37, + "learning_rate": 3.249466130516745e-08, + "loss": 8.6447, + "step": 48040 + }, + { + "epoch": 17.38, + "learning_rate": 3.231982995226731e-08, + "loss": 8.7037, + "step": 48060 + }, + { + "epoch": 17.39, + "learning_rate": 3.2145437684058e-08, + "loss": 8.6636, + "step": 48080 + }, + { + "epoch": 17.4, + "learning_rate": 3.197148485230769e-08, + "loss": 8.6266, + "step": 48100 + }, + { + "epoch": 17.4, + "learning_rate": 3.179797180789831e-08, + "loss": 8.7095, + "step": 48120 + }, + { + "epoch": 17.41, + "learning_rate": 3.162489890082459e-08, + "loss": 8.6365, + "step": 48140 + }, + { + "epoch": 17.42, + "learning_rate": 3.14522664801935e-08, + "loss": 8.6417, + "step": 48160 + }, + { + "epoch": 17.42, + "learning_rate": 3.1280074894223545e-08, + "loss": 8.6506, + "step": 48180 + }, + { + "epoch": 17.43, + "learning_rate": 3.1108324490243864e-08, + "loss": 8.771, + "step": 48200 + }, + { + "epoch": 17.44, + "learning_rate": 3.093701561469394e-08, + "loss": 8.7983, + "step": 48220 + }, + { + "epoch": 17.45, + "learning_rate": 3.07661486131224e-08, + "loss": 8.6677, + "step": 48240 + }, + { + "epoch": 17.45, + "learning_rate": 3.059572383018666e-08, + "loss": 8.6159, + "step": 48260 + }, + { + "epoch": 17.46, + "learning_rate": 3.0425741609652166e-08, + "loss": 8.5923, + "step": 48280 + }, + { + "epoch": 17.47, + "learning_rate": 3.0256202294391576e-08, + "loss": 8.6792, + "step": 48300 + }, + { + "epoch": 17.48, + "learning_rate": 3.00871062263843e-08, + "loss": 8.6226, + "step": 48320 + }, + { + "epoch": 17.48, + "learning_rate": 2.991845374671553e-08, + "loss": 8.6312, + "step": 48340 + }, + { + "epoch": 17.49, + "learning_rate": 2.9750245195575703e-08, + "loss": 8.6999, + "step": 48360 + }, + { + "epoch": 17.5, + "learning_rate": 2.9582480912259984e-08, + "loss": 8.6946, + "step": 48380 + }, + { + "epoch": 17.5, + "learning_rate": 2.94151612351671e-08, + "loss": 8.6892, + "step": 48400 + }, + { + "epoch": 17.51, + "learning_rate": 2.924828650179928e-08, + "loss": 8.6228, + "step": 48420 + }, + { + "epoch": 17.52, + "learning_rate": 2.908185704876101e-08, + "loss": 8.6398, + "step": 48440 + }, + { + "epoch": 17.53, + "learning_rate": 2.8915873211758645e-08, + "loss": 8.6749, + "step": 48460 + }, + { + "epoch": 17.53, + "learning_rate": 2.875033532559984e-08, + "loss": 8.7345, + "step": 48480 + }, + { + "epoch": 17.54, + "learning_rate": 2.8585243724192466e-08, + "loss": 8.6511, + "step": 48500 + }, + { + "epoch": 17.55, + "learning_rate": 2.8420598740544476e-08, + "loss": 8.616, + "step": 48520 + }, + { + "epoch": 17.56, + "learning_rate": 2.825640070676269e-08, + "loss": 8.8774, + "step": 48540 + }, + { + "epoch": 17.56, + "learning_rate": 2.8092649954052473e-08, + "loss": 8.6434, + "step": 48560 + }, + { + "epoch": 17.57, + "learning_rate": 2.792934681271708e-08, + "loss": 8.5202, + "step": 48580 + }, + { + "epoch": 17.58, + "learning_rate": 2.7766491612156663e-08, + "loss": 8.6703, + "step": 48600 + }, + { + "epoch": 17.58, + "learning_rate": 2.7604084680868112e-08, + "loss": 8.6421, + "step": 48620 + }, + { + "epoch": 17.59, + "learning_rate": 2.74421263464438e-08, + "loss": 8.717, + "step": 48640 + }, + { + "epoch": 17.6, + "learning_rate": 2.7280616935571516e-08, + "loss": 8.6583, + "step": 48660 + }, + { + "epoch": 17.61, + "learning_rate": 2.7119556774033327e-08, + "loss": 8.7085, + "step": 48680 + }, + { + "epoch": 17.61, + "learning_rate": 2.6958946186705162e-08, + "loss": 8.6937, + "step": 48700 + }, + { + "epoch": 17.62, + "learning_rate": 2.679878549755618e-08, + "loss": 8.6624, + "step": 48720 + }, + { + "epoch": 17.63, + "learning_rate": 2.6639075029647935e-08, + "loss": 8.6899, + "step": 48740 + }, + { + "epoch": 17.63, + "learning_rate": 2.6479815105133974e-08, + "loss": 8.7277, + "step": 48760 + }, + { + "epoch": 17.64, + "learning_rate": 2.632100604525886e-08, + "loss": 8.6808, + "step": 48780 + }, + { + "epoch": 17.65, + "learning_rate": 2.616264817035793e-08, + "loss": 8.722, + "step": 48800 + }, + { + "epoch": 17.66, + "learning_rate": 2.6004741799856207e-08, + "loss": 8.6183, + "step": 48820 + }, + { + "epoch": 17.66, + "learning_rate": 2.5847287252268228e-08, + "loss": 8.6058, + "step": 48840 + }, + { + "epoch": 17.67, + "learning_rate": 2.569028484519692e-08, + "loss": 8.6472, + "step": 48860 + }, + { + "epoch": 17.68, + "learning_rate": 2.5533734895333363e-08, + "loss": 8.8245, + "step": 48880 + }, + { + "epoch": 17.69, + "learning_rate": 2.5377637718455884e-08, + "loss": 8.6572, + "step": 48900 + }, + { + "epoch": 17.69, + "learning_rate": 2.5221993629429505e-08, + "loss": 8.5695, + "step": 48920 + }, + { + "epoch": 17.7, + "learning_rate": 2.5066802942205452e-08, + "loss": 8.7051, + "step": 48940 + }, + { + "epoch": 17.71, + "learning_rate": 2.4912065969820206e-08, + "loss": 8.688, + "step": 48960 + }, + { + "epoch": 17.71, + "learning_rate": 2.475778302439524e-08, + "loss": 8.732, + "step": 48980 + }, + { + "epoch": 17.72, + "learning_rate": 2.4603954417136e-08, + "loss": 8.6216, + "step": 49000 + }, + { + "epoch": 17.73, + "learning_rate": 2.445058045833173e-08, + "loss": 8.6969, + "step": 49020 + }, + { + "epoch": 17.74, + "learning_rate": 2.4297661457354346e-08, + "loss": 8.8897, + "step": 49040 + }, + { + "epoch": 17.74, + "learning_rate": 2.41451977226583e-08, + "loss": 8.552, + "step": 49060 + }, + { + "epoch": 17.75, + "learning_rate": 2.3993189561779537e-08, + "loss": 8.6289, + "step": 49080 + }, + { + "epoch": 17.76, + "learning_rate": 2.3841637281335064e-08, + "loss": 8.793, + "step": 49100 + }, + { + "epoch": 17.76, + "learning_rate": 2.3690541187022545e-08, + "loss": 8.6217, + "step": 49120 + }, + { + "epoch": 17.77, + "learning_rate": 2.3539901583619183e-08, + "loss": 8.6762, + "step": 49140 + }, + { + "epoch": 17.78, + "learning_rate": 2.338971877498161e-08, + "loss": 8.619, + "step": 49160 + }, + { + "epoch": 17.79, + "learning_rate": 2.323999306404492e-08, + "loss": 8.7034, + "step": 49180 + }, + { + "epoch": 17.79, + "learning_rate": 2.30907247528222e-08, + "loss": 8.613, + "step": 49200 + }, + { + "epoch": 17.8, + "learning_rate": 2.2941914142404013e-08, + "loss": 8.6475, + "step": 49220 + }, + { + "epoch": 17.81, + "learning_rate": 2.2793561532957555e-08, + "loss": 8.575, + "step": 49240 + }, + { + "epoch": 17.82, + "learning_rate": 2.2645667223726322e-08, + "loss": 8.7523, + "step": 49260 + }, + { + "epoch": 17.82, + "learning_rate": 2.2498231513029236e-08, + "loss": 8.7378, + "step": 49280 + }, + { + "epoch": 17.83, + "learning_rate": 2.2351254698260296e-08, + "loss": 8.5931, + "step": 49300 + }, + { + "epoch": 17.84, + "learning_rate": 2.220473707588777e-08, + "loss": 8.663, + "step": 49320 + }, + { + "epoch": 17.84, + "learning_rate": 2.205867894145366e-08, + "loss": 8.6425, + "step": 49340 + }, + { + "epoch": 17.85, + "learning_rate": 2.191308058957328e-08, + "loss": 8.55, + "step": 49360 + }, + { + "epoch": 17.86, + "learning_rate": 2.1767942313934334e-08, + "loss": 8.7109, + "step": 49380 + }, + { + "epoch": 17.87, + "learning_rate": 2.1623264407296642e-08, + "loss": 8.6994, + "step": 49400 + }, + { + "epoch": 17.87, + "learning_rate": 2.147904716149135e-08, + "loss": 8.6966, + "step": 49420 + }, + { + "epoch": 17.88, + "learning_rate": 2.1335290867420337e-08, + "loss": 8.6716, + "step": 49440 + }, + { + "epoch": 17.89, + "learning_rate": 2.1191995815055876e-08, + "loss": 8.6833, + "step": 49460 + }, + { + "epoch": 17.9, + "learning_rate": 2.1049162293439587e-08, + "loss": 8.7627, + "step": 49480 + }, + { + "epoch": 17.9, + "learning_rate": 2.0906790590682455e-08, + "loss": 8.6788, + "step": 49500 + }, + { + "epoch": 17.91, + "learning_rate": 2.0764880993963675e-08, + "loss": 8.6624, + "step": 49520 + }, + { + "epoch": 17.92, + "learning_rate": 2.062343378953038e-08, + "loss": 8.778, + "step": 49540 + }, + { + "epoch": 17.92, + "learning_rate": 2.0482449262697126e-08, + "loss": 8.7197, + "step": 49560 + }, + { + "epoch": 17.93, + "learning_rate": 2.0341927697845012e-08, + "loss": 8.7374, + "step": 49580 + }, + { + "epoch": 17.94, + "learning_rate": 2.0201869378421497e-08, + "loss": 8.6552, + "step": 49600 + }, + { + "epoch": 17.95, + "learning_rate": 2.006227458693946e-08, + "loss": 8.7903, + "step": 49620 + }, + { + "epoch": 17.95, + "learning_rate": 1.9923143604976823e-08, + "loss": 8.6071, + "step": 49640 + }, + { + "epoch": 17.96, + "learning_rate": 1.978447671317604e-08, + "loss": 8.6615, + "step": 49660 + }, + { + "epoch": 17.97, + "learning_rate": 1.9646274191243318e-08, + "loss": 8.5126, + "step": 49680 + }, + { + "epoch": 17.97, + "learning_rate": 1.9508536317948356e-08, + "loss": 8.6629, + "step": 49700 + }, + { + "epoch": 17.98, + "learning_rate": 1.937126337112338e-08, + "loss": 8.6126, + "step": 49720 + }, + { + "epoch": 17.99, + "learning_rate": 1.923445562766296e-08, + "loss": 8.9988, + "step": 49740 + }, + { + "epoch": 18.0, + "learning_rate": 1.909811336352332e-08, + "loss": 8.7915, + "step": 49760 + }, + { + "epoch": 18.0, + "learning_rate": 1.8962236853721586e-08, + "loss": 8.6122, + "step": 49780 + }, + { + "epoch": 18.01, + "learning_rate": 1.882682637233568e-08, + "loss": 8.7854, + "step": 49800 + }, + { + "epoch": 18.02, + "learning_rate": 1.86918821925032e-08, + "loss": 8.6883, + "step": 49820 + }, + { + "epoch": 18.03, + "learning_rate": 1.855740458642141e-08, + "loss": 8.6263, + "step": 49840 + }, + { + "epoch": 18.03, + "learning_rate": 1.84233938253463e-08, + "loss": 8.6004, + "step": 49860 + }, + { + "epoch": 18.04, + "learning_rate": 1.828985017959217e-08, + "loss": 8.5865, + "step": 49880 + }, + { + "epoch": 18.05, + "learning_rate": 1.815677391853124e-08, + "loss": 8.5881, + "step": 49900 + }, + { + "epoch": 18.05, + "learning_rate": 1.8024165310592754e-08, + "loss": 8.6077, + "step": 49920 + }, + { + "epoch": 18.06, + "learning_rate": 1.789202462326289e-08, + "loss": 8.6032, + "step": 49940 + }, + { + "epoch": 18.07, + "learning_rate": 1.7760352123083798e-08, + "loss": 8.6446, + "step": 49960 + }, + { + "epoch": 18.08, + "learning_rate": 1.7629148075653243e-08, + "loss": 8.6063, + "step": 49980 + }, + { + "epoch": 18.08, + "learning_rate": 1.749841274562422e-08, + "loss": 8.6, + "step": 50000 + }, + { + "epoch": 18.09, + "learning_rate": 1.7368146396704113e-08, + "loss": 8.5862, + "step": 50020 + }, + { + "epoch": 18.1, + "learning_rate": 1.7238349291654435e-08, + "loss": 8.6713, + "step": 50040 + }, + { + "epoch": 18.1, + "learning_rate": 1.7109021692290114e-08, + "loss": 8.5957, + "step": 50060 + }, + { + "epoch": 18.11, + "learning_rate": 1.6980163859479007e-08, + "loss": 8.6717, + "step": 50080 + }, + { + "epoch": 18.12, + "learning_rate": 1.6851776053141503e-08, + "loss": 8.6833, + "step": 50100 + }, + { + "epoch": 18.13, + "learning_rate": 1.6723858532249778e-08, + "loss": 8.6057, + "step": 50120 + }, + { + "epoch": 18.13, + "learning_rate": 1.6596411554827522e-08, + "loss": 8.7165, + "step": 50140 + }, + { + "epoch": 18.14, + "learning_rate": 1.6469435377949175e-08, + "loss": 8.6832, + "step": 50160 + }, + { + "epoch": 18.15, + "learning_rate": 1.634293025773953e-08, + "loss": 8.6771, + "step": 50180 + }, + { + "epoch": 18.16, + "learning_rate": 1.6216896449373295e-08, + "loss": 8.7166, + "step": 50200 + }, + { + "epoch": 18.16, + "learning_rate": 1.6091334207074398e-08, + "loss": 8.8346, + "step": 50220 + }, + { + "epoch": 18.17, + "learning_rate": 1.596624378411565e-08, + "loss": 8.7444, + "step": 50240 + }, + { + "epoch": 18.18, + "learning_rate": 1.5841625432818057e-08, + "loss": 8.7195, + "step": 50260 + }, + { + "epoch": 18.18, + "learning_rate": 1.5717479404550455e-08, + "loss": 8.7715, + "step": 50280 + }, + { + "epoch": 18.19, + "learning_rate": 1.5593805949728977e-08, + "loss": 8.7242, + "step": 50300 + }, + { + "epoch": 18.2, + "learning_rate": 1.5470605317816436e-08, + "loss": 8.7336, + "step": 50320 + }, + { + "epoch": 18.21, + "learning_rate": 1.5347877757322076e-08, + "loss": 8.7456, + "step": 50340 + }, + { + "epoch": 18.21, + "learning_rate": 1.5225623515800673e-08, + "loss": 8.9196, + "step": 50360 + }, + { + "epoch": 18.22, + "learning_rate": 1.5103842839852527e-08, + "loss": 8.6397, + "step": 50380 + }, + { + "epoch": 18.23, + "learning_rate": 1.4982535975122474e-08, + "loss": 8.7685, + "step": 50400 + }, + { + "epoch": 18.24, + "learning_rate": 1.48617031662997e-08, + "loss": 8.7352, + "step": 50420 + }, + { + "epoch": 18.24, + "learning_rate": 1.4741344657117238e-08, + "loss": 8.6774, + "step": 50440 + }, + { + "epoch": 18.25, + "learning_rate": 1.4621460690351334e-08, + "loss": 8.6874, + "step": 50460 + }, + { + "epoch": 18.26, + "learning_rate": 1.4502051507821106e-08, + "loss": 8.6548, + "step": 50480 + }, + { + "epoch": 18.26, + "learning_rate": 1.438311735038783e-08, + "loss": 8.6336, + "step": 50500 + }, + { + "epoch": 18.27, + "learning_rate": 1.4264658457954743e-08, + "loss": 8.5954, + "step": 50520 + }, + { + "epoch": 18.28, + "learning_rate": 1.4146675069466401e-08, + "loss": 8.6314, + "step": 50540 + }, + { + "epoch": 18.29, + "learning_rate": 1.4029167422908105e-08, + "loss": 8.6163, + "step": 50560 + }, + { + "epoch": 18.29, + "learning_rate": 1.3912135755305753e-08, + "loss": 8.756, + "step": 50580 + }, + { + "epoch": 18.3, + "learning_rate": 1.3795580302724874e-08, + "loss": 8.7309, + "step": 50600 + }, + { + "epoch": 18.31, + "learning_rate": 1.3679501300270652e-08, + "loss": 8.6124, + "step": 50620 + }, + { + "epoch": 18.31, + "learning_rate": 1.3563898982087069e-08, + "loss": 8.7536, + "step": 50640 + }, + { + "epoch": 18.32, + "learning_rate": 1.3448773581356653e-08, + "loss": 8.6723, + "step": 50660 + }, + { + "epoch": 18.33, + "learning_rate": 1.3334125330299928e-08, + "loss": 8.6517, + "step": 50680 + }, + { + "epoch": 18.34, + "learning_rate": 1.3219954460174876e-08, + "loss": 8.7087, + "step": 50700 + }, + { + "epoch": 18.34, + "learning_rate": 1.3106261201276724e-08, + "loss": 8.6847, + "step": 50720 + }, + { + "epoch": 18.35, + "learning_rate": 1.2993045782937084e-08, + "loss": 8.6975, + "step": 50740 + }, + { + "epoch": 18.36, + "learning_rate": 1.2880308433523945e-08, + "loss": 8.6035, + "step": 50760 + }, + { + "epoch": 18.37, + "learning_rate": 1.2768049380440765e-08, + "loss": 8.7271, + "step": 50780 + }, + { + "epoch": 18.37, + "learning_rate": 1.2656268850126411e-08, + "loss": 8.7072, + "step": 50800 + }, + { + "epoch": 18.38, + "learning_rate": 1.254496706805433e-08, + "loss": 8.806, + "step": 50820 + }, + { + "epoch": 18.39, + "learning_rate": 1.243414425873246e-08, + "loss": 8.6377, + "step": 50840 + }, + { + "epoch": 18.39, + "learning_rate": 1.2323800645702431e-08, + "loss": 8.7319, + "step": 50860 + }, + { + "epoch": 18.4, + "learning_rate": 1.221393645153948e-08, + "loss": 8.6606, + "step": 50880 + }, + { + "epoch": 18.41, + "learning_rate": 1.2104551897851644e-08, + "loss": 8.6349, + "step": 50900 + }, + { + "epoch": 18.42, + "learning_rate": 1.1995647205279457e-08, + "loss": 8.7337, + "step": 50920 + }, + { + "epoch": 18.42, + "learning_rate": 1.1887222593495699e-08, + "loss": 8.7268, + "step": 50940 + }, + { + "epoch": 18.43, + "learning_rate": 1.1779278281204536e-08, + "loss": 8.7171, + "step": 50960 + }, + { + "epoch": 18.44, + "learning_rate": 1.1671814486141546e-08, + "loss": 8.6429, + "step": 50980 + }, + { + "epoch": 18.44, + "learning_rate": 1.156483142507289e-08, + "loss": 8.6306, + "step": 51000 + }, + { + "epoch": 18.45, + "learning_rate": 1.1458329313795146e-08, + "loss": 8.7194, + "step": 51020 + }, + { + "epoch": 18.46, + "learning_rate": 1.135230836713466e-08, + "loss": 8.6686, + "step": 51040 + }, + { + "epoch": 18.47, + "learning_rate": 1.1246768798947287e-08, + "loss": 8.6497, + "step": 51060 + }, + { + "epoch": 18.47, + "learning_rate": 1.1141710822117872e-08, + "loss": 8.6738, + "step": 51080 + }, + { + "epoch": 18.48, + "learning_rate": 1.1037134648559793e-08, + "loss": 8.6014, + "step": 51100 + }, + { + "epoch": 18.49, + "learning_rate": 1.0933040489214674e-08, + "loss": 8.6016, + "step": 51120 + }, + { + "epoch": 18.5, + "learning_rate": 1.0829428554051834e-08, + "loss": 8.7236, + "step": 51140 + }, + { + "epoch": 18.5, + "learning_rate": 1.0726299052067761e-08, + "loss": 8.6591, + "step": 51160 + }, + { + "epoch": 18.51, + "learning_rate": 1.0623652191286026e-08, + "loss": 8.7178, + "step": 51180 + }, + { + "epoch": 18.52, + "learning_rate": 1.0521488178756532e-08, + "loss": 8.8135, + "step": 51200 + }, + { + "epoch": 18.52, + "learning_rate": 1.0419807220555271e-08, + "loss": 8.6145, + "step": 51220 + }, + { + "epoch": 18.53, + "learning_rate": 1.0318609521783817e-08, + "loss": 8.6767, + "step": 51240 + }, + { + "epoch": 18.54, + "learning_rate": 1.0217895286568995e-08, + "loss": 8.6303, + "step": 51260 + }, + { + "epoch": 18.55, + "learning_rate": 1.0117664718062469e-08, + "loss": 8.5655, + "step": 51280 + }, + { + "epoch": 18.55, + "learning_rate": 1.001791801844018e-08, + "loss": 8.6803, + "step": 51300 + }, + { + "epoch": 18.56, + "learning_rate": 9.918655388902158e-09, + "loss": 8.6766, + "step": 51320 + }, + { + "epoch": 18.57, + "learning_rate": 9.81987702967202e-09, + "loss": 8.6313, + "step": 51340 + }, + { + "epoch": 18.58, + "learning_rate": 9.721583139996382e-09, + "loss": 8.7152, + "step": 51360 + }, + { + "epoch": 18.58, + "learning_rate": 9.623773918144895e-09, + "loss": 8.6508, + "step": 51380 + }, + { + "epoch": 18.59, + "learning_rate": 9.526449561409356e-09, + "loss": 8.742, + "step": 51400 + }, + { + "epoch": 18.6, + "learning_rate": 9.429610266103699e-09, + "loss": 8.6364, + "step": 51420 + }, + { + "epoch": 18.6, + "learning_rate": 9.333256227563341e-09, + "loss": 8.8212, + "step": 51440 + }, + { + "epoch": 18.61, + "learning_rate": 9.237387640144867e-09, + "loss": 8.6756, + "step": 51460 + }, + { + "epoch": 18.62, + "learning_rate": 9.14200469722573e-09, + "loss": 8.5969, + "step": 51480 + }, + { + "epoch": 18.63, + "learning_rate": 9.047107591203723e-09, + "loss": 8.6711, + "step": 51500 + }, + { + "epoch": 18.63, + "learning_rate": 8.952696513496755e-09, + "loss": 8.6413, + "step": 51520 + }, + { + "epoch": 18.64, + "learning_rate": 8.858771654542185e-09, + "loss": 8.6178, + "step": 51540 + }, + { + "epoch": 18.65, + "learning_rate": 8.76533320379677e-09, + "loss": 8.7109, + "step": 51560 + }, + { + "epoch": 18.65, + "learning_rate": 8.672381349736108e-09, + "loss": 8.858, + "step": 51580 + }, + { + "epoch": 18.66, + "learning_rate": 8.57991627985416e-09, + "loss": 8.745, + "step": 51600 + }, + { + "epoch": 18.67, + "learning_rate": 8.48793818066315e-09, + "loss": 8.6337, + "step": 51620 + }, + { + "epoch": 18.68, + "learning_rate": 8.396447237692921e-09, + "loss": 8.7487, + "step": 51640 + }, + { + "epoch": 18.68, + "learning_rate": 8.305443635490712e-09, + "loss": 8.5949, + "step": 51660 + }, + { + "epoch": 18.69, + "learning_rate": 8.214927557620766e-09, + "loss": 8.6928, + "step": 51680 + }, + { + "epoch": 18.7, + "learning_rate": 8.124899186663815e-09, + "loss": 8.7684, + "step": 51700 + }, + { + "epoch": 18.71, + "learning_rate": 8.035358704217039e-09, + "loss": 8.6277, + "step": 51720 + }, + { + "epoch": 18.71, + "learning_rate": 7.94630629089324e-09, + "loss": 8.6413, + "step": 51740 + }, + { + "epoch": 18.72, + "learning_rate": 7.85774212632101e-09, + "loss": 8.6884, + "step": 51760 + }, + { + "epoch": 18.73, + "learning_rate": 7.769666389143864e-09, + "loss": 8.7835, + "step": 51780 + }, + { + "epoch": 18.73, + "learning_rate": 7.682079257020163e-09, + "loss": 8.6372, + "step": 51800 + }, + { + "epoch": 18.74, + "learning_rate": 7.594980906622805e-09, + "loss": 8.6274, + "step": 51820 + }, + { + "epoch": 18.75, + "learning_rate": 7.508371513638618e-09, + "loss": 8.6954, + "step": 51840 + }, + { + "epoch": 18.76, + "learning_rate": 7.422251252768269e-09, + "loss": 8.6183, + "step": 51860 + }, + { + "epoch": 18.76, + "learning_rate": 7.336620297725666e-09, + "loss": 8.6687, + "step": 51880 + }, + { + "epoch": 18.77, + "learning_rate": 7.251478821237833e-09, + "loss": 8.5802, + "step": 51900 + }, + { + "epoch": 18.78, + "learning_rate": 7.1668269950444784e-09, + "loss": 8.6163, + "step": 51920 + }, + { + "epoch": 18.78, + "learning_rate": 7.0826649898974856e-09, + "loss": 8.6173, + "step": 51940 + }, + { + "epoch": 18.79, + "learning_rate": 6.998992975560919e-09, + "loss": 8.6898, + "step": 51960 + }, + { + "epoch": 18.8, + "learning_rate": 6.915811120810355e-09, + "loss": 8.7128, + "step": 51980 + }, + { + "epoch": 18.81, + "learning_rate": 6.833119593432607e-09, + "loss": 8.6544, + "step": 52000 + }, + { + "epoch": 18.81, + "learning_rate": 6.750918560225583e-09, + "loss": 8.6416, + "step": 52020 + }, + { + "epoch": 18.82, + "learning_rate": 6.66920818699776e-09, + "loss": 8.7659, + "step": 52040 + }, + { + "epoch": 18.83, + "learning_rate": 6.587988638567881e-09, + "loss": 8.6386, + "step": 52060 + }, + { + "epoch": 18.84, + "learning_rate": 6.50726007876462e-09, + "loss": 8.7111, + "step": 52080 + }, + { + "epoch": 18.84, + "learning_rate": 6.427022670426329e-09, + "loss": 8.6797, + "step": 52100 + }, + { + "epoch": 18.85, + "learning_rate": 6.347276575400628e-09, + "loss": 8.664, + "step": 52120 + }, + { + "epoch": 18.86, + "learning_rate": 6.268021954544095e-09, + "loss": 8.6207, + "step": 52140 + }, + { + "epoch": 18.86, + "learning_rate": 6.189258967721989e-09, + "loss": 8.6631, + "step": 52160 + }, + { + "epoch": 18.87, + "learning_rate": 6.110987773807835e-09, + "loss": 8.7244, + "step": 52180 + }, + { + "epoch": 18.88, + "learning_rate": 6.033208530683204e-09, + "loss": 8.6522, + "step": 52200 + }, + { + "epoch": 18.89, + "learning_rate": 5.955921395237318e-09, + "loss": 8.698, + "step": 52220 + }, + { + "epoch": 18.89, + "learning_rate": 5.879126523366751e-09, + "loss": 8.6421, + "step": 52240 + }, + { + "epoch": 18.9, + "learning_rate": 5.802824069975176e-09, + "loss": 8.6436, + "step": 52260 + }, + { + "epoch": 18.91, + "learning_rate": 5.727014188972979e-09, + "loss": 8.6355, + "step": 52280 + }, + { + "epoch": 18.92, + "learning_rate": 5.651697033277003e-09, + "loss": 8.6781, + "step": 52300 + }, + { + "epoch": 18.92, + "learning_rate": 5.576872754810113e-09, + "loss": 8.7829, + "step": 52320 + }, + { + "epoch": 18.93, + "learning_rate": 5.5025415045011066e-09, + "loss": 8.9203, + "step": 52340 + }, + { + "epoch": 18.94, + "learning_rate": 5.428703432284243e-09, + "loss": 8.6602, + "step": 52360 + }, + { + "epoch": 18.94, + "learning_rate": 5.355358687098938e-09, + "loss": 8.7053, + "step": 52380 + }, + { + "epoch": 18.95, + "learning_rate": 5.282507416889625e-09, + "loss": 8.7101, + "step": 52400 + }, + { + "epoch": 18.96, + "learning_rate": 5.210149768605177e-09, + "loss": 8.7651, + "step": 52420 + }, + { + "epoch": 18.97, + "learning_rate": 5.138285888199007e-09, + "loss": 8.7112, + "step": 52440 + }, + { + "epoch": 18.97, + "learning_rate": 5.066915920628301e-09, + "loss": 8.7172, + "step": 52460 + }, + { + "epoch": 18.98, + "learning_rate": 4.996040009854152e-09, + "loss": 8.7541, + "step": 52480 + }, + { + "epoch": 18.99, + "learning_rate": 4.925658298840979e-09, + "loss": 8.7614, + "step": 52500 + }, + { + "epoch": 18.99, + "learning_rate": 4.855770929556385e-09, + "loss": 8.807, + "step": 52520 + }, + { + "epoch": 19.0, + "learning_rate": 4.786378042970884e-09, + "loss": 8.6, + "step": 52540 + }, + { + "epoch": 19.01, + "learning_rate": 4.7174797790574264e-09, + "loss": 8.7356, + "step": 52560 + }, + { + "epoch": 19.02, + "learning_rate": 4.649076276791425e-09, + "loss": 8.6023, + "step": 52580 + }, + { + "epoch": 19.02, + "learning_rate": 4.5811676741501496e-09, + "loss": 8.6362, + "step": 52600 + }, + { + "epoch": 19.03, + "learning_rate": 4.513754108112722e-09, + "loss": 8.748, + "step": 52620 + }, + { + "epoch": 19.04, + "learning_rate": 4.446835714659647e-09, + "loss": 8.679, + "step": 52640 + }, + { + "epoch": 19.05, + "learning_rate": 4.380412628772645e-09, + "loss": 8.6705, + "step": 52660 + }, + { + "epoch": 19.05, + "learning_rate": 4.314484984434319e-09, + "loss": 8.6436, + "step": 52680 + }, + { + "epoch": 19.06, + "learning_rate": 4.249052914627988e-09, + "loss": 8.6, + "step": 52700 + }, + { + "epoch": 19.07, + "learning_rate": 4.184116551337241e-09, + "loss": 8.9462, + "step": 52720 + }, + { + "epoch": 19.07, + "learning_rate": 4.119676025545777e-09, + "loss": 8.7455, + "step": 52740 + }, + { + "epoch": 19.08, + "learning_rate": 4.055731467237283e-09, + "loss": 8.6577, + "step": 52760 + }, + { + "epoch": 19.09, + "learning_rate": 3.992283005394837e-09, + "loss": 8.6922, + "step": 52780 + }, + { + "epoch": 19.1, + "learning_rate": 3.929330768000949e-09, + "loss": 8.6823, + "step": 52800 + }, + { + "epoch": 19.1, + "learning_rate": 3.866874882037157e-09, + "loss": 8.7113, + "step": 52820 + }, + { + "epoch": 19.11, + "learning_rate": 3.8049154734838275e-09, + "loss": 8.6948, + "step": 52840 + }, + { + "epoch": 19.12, + "learning_rate": 3.743452667319846e-09, + "loss": 8.6673, + "step": 52860 + }, + { + "epoch": 19.12, + "learning_rate": 3.6824865875224043e-09, + "loss": 8.7135, + "step": 52880 + }, + { + "epoch": 19.13, + "learning_rate": 3.6220173570667424e-09, + "loss": 8.6514, + "step": 52900 + }, + { + "epoch": 19.14, + "learning_rate": 3.562045097925903e-09, + "loss": 8.7106, + "step": 52920 + }, + { + "epoch": 19.15, + "learning_rate": 3.502569931070509e-09, + "loss": 8.6637, + "step": 52940 + }, + { + "epoch": 19.15, + "learning_rate": 3.4435919764684572e-09, + "loss": 8.6529, + "step": 52960 + }, + { + "epoch": 19.16, + "learning_rate": 3.385111353084724e-09, + "loss": 8.6605, + "step": 52980 + }, + { + "epoch": 19.17, + "learning_rate": 3.3271281788811444e-09, + "loss": 8.7399, + "step": 53000 + }, + { + "epoch": 19.18, + "learning_rate": 3.26964257081605e-09, + "loss": 8.6828, + "step": 53020 + }, + { + "epoch": 19.18, + "learning_rate": 3.2126546448442704e-09, + "loss": 8.7259, + "step": 53040 + }, + { + "epoch": 19.19, + "learning_rate": 3.1561645159166596e-09, + "loss": 8.7069, + "step": 53060 + }, + { + "epoch": 19.2, + "learning_rate": 3.1001722979799306e-09, + "loss": 8.6755, + "step": 53080 + }, + { + "epoch": 19.2, + "learning_rate": 3.0446781039765725e-09, + "loss": 8.637, + "step": 53100 + }, + { + "epoch": 19.21, + "learning_rate": 2.989682045844405e-09, + "loss": 8.6292, + "step": 53120 + }, + { + "epoch": 19.22, + "learning_rate": 2.935184234516497e-09, + "loss": 8.7152, + "step": 53140 + }, + { + "epoch": 19.23, + "learning_rate": 2.8811847799208868e-09, + "loss": 8.6, + "step": 53160 + }, + { + "epoch": 19.23, + "learning_rate": 2.827683790980362e-09, + "loss": 8.6343, + "step": 53180 + }, + { + "epoch": 19.24, + "learning_rate": 2.774681375612292e-09, + "loss": 8.6721, + "step": 53200 + }, + { + "epoch": 19.25, + "learning_rate": 2.722177640728324e-09, + "loss": 8.7229, + "step": 53220 + }, + { + "epoch": 19.25, + "learning_rate": 2.6701726922342126e-09, + "loss": 8.5951, + "step": 53240 + }, + { + "epoch": 19.26, + "learning_rate": 2.6186666350296594e-09, + "loss": 8.6535, + "step": 53260 + }, + { + "epoch": 19.27, + "learning_rate": 2.5676595730079742e-09, + "loss": 8.6983, + "step": 53280 + }, + { + "epoch": 19.28, + "learning_rate": 2.517151609055995e-09, + "loss": 8.5833, + "step": 53300 + }, + { + "epoch": 19.28, + "learning_rate": 2.4671428450537824e-09, + "loss": 8.6913, + "step": 53320 + }, + { + "epoch": 19.29, + "learning_rate": 2.417633381874534e-09, + "loss": 8.759, + "step": 53340 + }, + { + "epoch": 19.3, + "learning_rate": 2.3686233193841722e-09, + "loss": 8.6451, + "step": 53360 + }, + { + "epoch": 19.31, + "learning_rate": 2.3201127564414223e-09, + "loss": 8.6675, + "step": 53380 + }, + { + "epoch": 19.31, + "learning_rate": 2.272101790897346e-09, + "loss": 8.7818, + "step": 53400 + }, + { + "epoch": 19.32, + "learning_rate": 2.224590519595282e-09, + "loss": 8.5935, + "step": 53420 + }, + { + "epoch": 19.33, + "learning_rate": 2.177579038370736e-09, + "loss": 8.6203, + "step": 53440 + }, + { + "epoch": 19.33, + "learning_rate": 2.13106744205091e-09, + "loss": 8.6421, + "step": 53460 + }, + { + "epoch": 19.34, + "learning_rate": 2.085055824454868e-09, + "loss": 8.6868, + "step": 53480 + }, + { + "epoch": 19.35, + "learning_rate": 2.039544278393007e-09, + "loss": 8.7173, + "step": 53500 + }, + { + "epoch": 19.36, + "learning_rate": 1.994532895667117e-09, + "loss": 8.7867, + "step": 53520 + }, + { + "epoch": 19.36, + "learning_rate": 1.95002176707007e-09, + "loss": 8.6087, + "step": 53540 + }, + { + "epoch": 19.37, + "learning_rate": 1.9060109823856583e-09, + "loss": 8.7634, + "step": 53560 + }, + { + "epoch": 19.38, + "learning_rate": 1.8625006303884527e-09, + "loss": 8.5889, + "step": 53580 + }, + { + "epoch": 19.39, + "learning_rate": 1.8194907988436093e-09, + "loss": 8.5872, + "step": 53600 + }, + { + "epoch": 19.39, + "learning_rate": 1.7769815745066474e-09, + "loss": 8.629, + "step": 53620 + }, + { + "epoch": 19.4, + "learning_rate": 1.7349730431233111e-09, + "loss": 8.6705, + "step": 53640 + }, + { + "epoch": 19.41, + "learning_rate": 1.6934652894294022e-09, + "loss": 8.7446, + "step": 53660 + }, + { + "epoch": 19.41, + "learning_rate": 1.6524583971505857e-09, + "loss": 8.5836, + "step": 53680 + }, + { + "epoch": 19.42, + "learning_rate": 1.6119524490022795e-09, + "loss": 8.6287, + "step": 53700 + }, + { + "epoch": 19.43, + "learning_rate": 1.5719475266893489e-09, + "loss": 8.5938, + "step": 53720 + }, + { + "epoch": 19.44, + "learning_rate": 1.5324437109061616e-09, + "loss": 8.6851, + "step": 53740 + }, + { + "epoch": 19.44, + "learning_rate": 1.493441081336172e-09, + "loss": 8.7353, + "step": 53760 + }, + { + "epoch": 19.45, + "learning_rate": 1.454939716651976e-09, + "loss": 8.7456, + "step": 53780 + }, + { + "epoch": 19.46, + "learning_rate": 1.4169396945150346e-09, + "loss": 8.5975, + "step": 53800 + }, + { + "epoch": 19.46, + "learning_rate": 1.3794410915755339e-09, + "loss": 8.6356, + "step": 53820 + }, + { + "epoch": 19.47, + "learning_rate": 1.3424439834722746e-09, + "loss": 8.6342, + "step": 53840 + }, + { + "epoch": 19.48, + "learning_rate": 1.3059484448324221e-09, + "loss": 8.6636, + "step": 53860 + }, + { + "epoch": 19.49, + "learning_rate": 1.269954549271507e-09, + "loss": 8.7235, + "step": 53880 + }, + { + "epoch": 19.49, + "learning_rate": 1.2344623693931467e-09, + "loss": 8.7035, + "step": 53900 + }, + { + "epoch": 19.5, + "learning_rate": 1.199471976788935e-09, + "loss": 8.6849, + "step": 53920 + }, + { + "epoch": 19.51, + "learning_rate": 1.1649834420383032e-09, + "loss": 8.6341, + "step": 53940 + }, + { + "epoch": 19.52, + "learning_rate": 1.1309968347084364e-09, + "loss": 8.7291, + "step": 53960 + }, + { + "epoch": 19.52, + "learning_rate": 1.0975122233539968e-09, + "loss": 8.5852, + "step": 53980 + }, + { + "epoch": 19.53, + "learning_rate": 1.0645296755171229e-09, + "loss": 8.6188, + "step": 54000 + }, + { + "epoch": 19.54, + "learning_rate": 1.0320492577272077e-09, + "loss": 8.7887, + "step": 54020 + }, + { + "epoch": 19.54, + "learning_rate": 1.0000710355008157e-09, + "loss": 8.9253, + "step": 54040 + }, + { + "epoch": 19.55, + "learning_rate": 9.685950733414882e-10, + "loss": 8.5889, + "step": 54060 + }, + { + "epoch": 19.56, + "learning_rate": 9.376214347397437e-10, + "loss": 8.7428, + "step": 54080 + }, + { + "epoch": 19.57, + "learning_rate": 9.071501821727167e-10, + "loss": 8.7254, + "step": 54100 + }, + { + "epoch": 19.57, + "learning_rate": 8.771813771042968e-10, + "loss": 8.6894, + "step": 54120 + }, + { + "epoch": 19.58, + "learning_rate": 8.47715079984851e-10, + "loss": 8.7289, + "step": 54140 + }, + { + "epoch": 19.59, + "learning_rate": 8.187513502510846e-10, + "loss": 8.6529, + "step": 54160 + }, + { + "epoch": 19.59, + "learning_rate": 7.902902463260419e-10, + "loss": 8.6643, + "step": 54180 + }, + { + "epoch": 19.6, + "learning_rate": 7.62331825618856e-10, + "loss": 8.6873, + "step": 54200 + }, + { + "epoch": 19.61, + "learning_rate": 7.348761445247209e-10, + "loss": 8.6534, + "step": 54220 + }, + { + "epoch": 19.62, + "learning_rate": 7.079232584247252e-10, + "loss": 8.5672, + "step": 54240 + }, + { + "epoch": 19.62, + "learning_rate": 6.814732216858243e-10, + "loss": 8.6725, + "step": 54260 + }, + { + "epoch": 19.63, + "learning_rate": 6.555260876606183e-10, + "loss": 8.751, + "step": 54280 + }, + { + "epoch": 19.64, + "learning_rate": 6.300819086873243e-10, + "loss": 8.6916, + "step": 54300 + }, + { + "epoch": 19.65, + "learning_rate": 6.051407360895822e-10, + "loss": 8.6952, + "step": 54320 + }, + { + "epoch": 19.65, + "learning_rate": 5.807026201765098e-10, + "loss": 8.6289, + "step": 54340 + }, + { + "epoch": 19.66, + "learning_rate": 5.567676102424534e-10, + "loss": 8.7357, + "step": 54360 + }, + { + "epoch": 19.67, + "learning_rate": 5.333357545669325e-10, + "loss": 8.5972, + "step": 54380 + }, + { + "epoch": 19.67, + "learning_rate": 5.10407100414556e-10, + "loss": 8.6977, + "step": 54400 + }, + { + "epoch": 19.68, + "learning_rate": 4.87981694034939e-10, + "loss": 8.6525, + "step": 54420 + }, + { + "epoch": 19.69, + "learning_rate": 4.660595806625645e-10, + "loss": 8.7391, + "step": 54440 + }, + { + "epoch": 19.7, + "learning_rate": 4.446408045167549e-10, + "loss": 8.7213, + "step": 54460 + }, + { + "epoch": 19.7, + "learning_rate": 4.2372540880147854e-10, + "loss": 8.6799, + "step": 54480 + }, + { + "epoch": 19.71, + "learning_rate": 4.0331343570540466e-10, + "loss": 8.6531, + "step": 54500 + }, + { + "epoch": 19.72, + "learning_rate": 3.834049264017092e-10, + "loss": 8.7059, + "step": 54520 + }, + { + "epoch": 19.73, + "learning_rate": 3.6399992104804713e-10, + "loss": 8.6514, + "step": 54540 + }, + { + "epoch": 19.73, + "learning_rate": 3.450984587863859e-10, + "loss": 8.5528, + "step": 54560 + }, + { + "epoch": 19.74, + "learning_rate": 3.267005777430887e-10, + "loss": 8.944, + "step": 54580 + }, + { + "epoch": 19.75, + "learning_rate": 3.088063150286924e-10, + "loss": 8.6987, + "step": 54600 + }, + { + "epoch": 19.75, + "learning_rate": 2.91415706737852e-10, + "loss": 8.6739, + "step": 54620 + }, + { + "epoch": 19.76, + "learning_rate": 2.74528787949313e-10, + "loss": 8.702, + "step": 54640 + }, + { + "epoch": 19.77, + "learning_rate": 2.5814559272588353e-10, + "loss": 8.7295, + "step": 54660 + }, + { + "epoch": 19.78, + "learning_rate": 2.4226615411424013e-10, + "loss": 8.6574, + "step": 54680 + }, + { + "epoch": 19.78, + "learning_rate": 2.268905041449276e-10, + "loss": 8.6305, + "step": 54700 + }, + { + "epoch": 19.79, + "learning_rate": 2.1201867383233153e-10, + "loss": 8.6852, + "step": 54720 + }, + { + "epoch": 19.8, + "learning_rate": 1.9765069317453918e-10, + "loss": 8.6515, + "step": 54740 + }, + { + "epoch": 19.8, + "learning_rate": 1.8378659115333984e-10, + "loss": 8.7308, + "step": 54760 + }, + { + "epoch": 19.81, + "learning_rate": 1.7042639573419672e-10, + "loss": 8.6392, + "step": 54780 + }, + { + "epoch": 19.82, + "learning_rate": 1.5757013386599738e-10, + "loss": 8.6741, + "step": 54800 + }, + { + "epoch": 19.83, + "learning_rate": 1.452178314813035e-10, + "loss": 8.7132, + "step": 54820 + }, + { + "epoch": 19.83, + "learning_rate": 1.3336951349599e-10, + "loss": 8.6422, + "step": 54840 + }, + { + "epoch": 19.84, + "learning_rate": 1.2202520380946713e-10, + "loss": 8.6508, + "step": 54860 + }, + { + "epoch": 19.85, + "learning_rate": 1.1118492530443058e-10, + "loss": 8.5966, + "step": 54880 + }, + { + "epoch": 19.86, + "learning_rate": 1.0084869984686162e-10, + "loss": 8.7772, + "step": 54900 + }, + { + "epoch": 19.86, + "learning_rate": 9.101654828613803e-11, + "loss": 8.7014, + "step": 54920 + }, + { + "epoch": 19.87, + "learning_rate": 8.168849045472881e-11, + "loss": 8.7369, + "step": 54940 + }, + { + "epoch": 19.88, + "learning_rate": 7.286454516833296e-11, + "loss": 8.7147, + "step": 54960 + }, + { + "epoch": 19.88, + "learning_rate": 6.454473022587947e-11, + "loss": 8.593, + "step": 54980 + }, + { + "epoch": 19.89, + "learning_rate": 5.672906240927755e-11, + "loss": 8.6138, + "step": 55000 + }, + { + "epoch": 19.9, + "learning_rate": 4.9417557483610875e-11, + "loss": 8.6584, + "step": 55020 + }, + { + "epoch": 19.91, + "learning_rate": 4.261023019697108e-11, + "loss": 8.6232, + "step": 55040 + }, + { + "epoch": 19.91, + "learning_rate": 3.630709428051326e-11, + "loss": 8.7377, + "step": 55060 + }, + { + "epoch": 19.92, + "learning_rate": 3.050816244831722e-11, + "loss": 8.6403, + "step": 55080 + }, + { + "epoch": 19.93, + "learning_rate": 2.5213446397470693e-11, + "loss": 8.7605, + "step": 55100 + }, + { + "epoch": 19.93, + "learning_rate": 2.0422956808013868e-11, + "loss": 8.571, + "step": 55120 + }, + { + "epoch": 19.94, + "learning_rate": 1.6136703342856107e-11, + "loss": 8.6668, + "step": 55140 + }, + { + "epoch": 19.95, + "learning_rate": 1.235469464785921e-11, + "loss": 8.7839, + "step": 55160 + }, + { + "epoch": 19.96, + "learning_rate": 9.07693835175416e-12, + "loss": 8.7593, + "step": 55180 + }, + { + "epoch": 19.96, + "learning_rate": 6.3034410661133574e-12, + "loss": 8.732, + "step": 55200 + }, + { + "epoch": 19.97, + "learning_rate": 4.034208385378379e-12, + "loss": 8.725, + "step": 55220 + }, + { + "epoch": 19.98, + "learning_rate": 2.2692448868877334e-12, + "loss": 8.8461, + "step": 55240 + }, + { + "epoch": 19.99, + "learning_rate": 1.0085541307103262e-12, + "loss": 8.6615, + "step": 55260 + }, + { + "epoch": 19.99, + "learning_rate": 2.521386598119957e-13, + "loss": 8.6046, + "step": 55280 + }, + { + "epoch": 20.0, + "learning_rate": 0.0, + "loss": 8.6345, + "step": 55300 + }, + { + "epoch": 20.0, + "step": 55300, + "total_flos": 1.644510409710981e+18, + "train_loss": 8.68246454138868, + "train_runtime": 29891.7849, + "train_samples_per_second": 3.7, + "train_steps_per_second": 1.85 + } + ], + "logging_steps": 20, + "max_steps": 55300, + "num_input_tokens_seen": 0, + "num_train_epochs": 20, + "save_steps": 100, + "total_flos": 1.644510409710981e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}