{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9992627698788836, "eval_steps": 10000000000000, "global_step": 1186, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": "4.1667e-06", "loss": 0.2308, "slid_loss": 0.2308, "step": 1, "time": 69.64 }, { "epoch": 0.0, "learning_rate": "8.3333e-06", "loss": 0.2283, "slid_loss": 0.2295, "step": 2, "time": 49.7 }, { "epoch": 0.0, "learning_rate": "1.2500e-05", "loss": 0.2048, "slid_loss": 0.2213, "step": 3, "time": 50.89 }, { "epoch": 0.0, "learning_rate": "1.6667e-05", "loss": 0.2068, "slid_loss": 0.2177, "step": 4, "time": 51.24 }, { "epoch": 0.0, "learning_rate": "2.0833e-05", "loss": 0.2212, "slid_loss": 0.2184, "step": 5, "time": 49.99 }, { "epoch": 0.01, "learning_rate": "2.5000e-05", "loss": 0.2041, "slid_loss": 0.216, "step": 6, "time": 50.92 }, { "epoch": 0.01, "learning_rate": "2.9167e-05", "loss": 0.2007, "slid_loss": 0.2138, "step": 7, "time": 50.23 }, { "epoch": 0.01, "learning_rate": "3.3333e-05", "loss": 0.184, "slid_loss": 0.2101, "step": 8, "time": 50.16 }, { "epoch": 0.01, "learning_rate": "3.7500e-05", "loss": 0.1886, "slid_loss": 0.2077, "step": 9, "time": 49.22 }, { "epoch": 0.01, "learning_rate": "4.1667e-05", "loss": 0.1734, "slid_loss": 0.2043, "step": 10, "time": 50.58 }, { "epoch": 0.01, "learning_rate": "4.5833e-05", "loss": 0.1826, "slid_loss": 0.2023, "step": 11, "time": 50.18 }, { "epoch": 0.01, "learning_rate": "5.0000e-05", "loss": 0.1937, "slid_loss": 0.2016, "step": 12, "time": 50.83 }, { "epoch": 0.01, "learning_rate": "5.0000e-05", "loss": 0.173, "slid_loss": 0.1994, "step": 13, "time": 50.77 }, { "epoch": 0.01, "learning_rate": "5.0000e-05", "loss": 0.1723, "slid_loss": 0.1974, "step": 14, "time": 50.49 }, { "epoch": 0.01, "learning_rate": "4.9999e-05", "loss": 0.1709, "slid_loss": 0.1957, "step": 15, "time": 50.7 }, { "epoch": 0.01, "learning_rate": "4.9999e-05", "loss": 0.1635, "slid_loss": 0.1937, "step": 16, "time": 51.16 }, { "epoch": 0.01, "learning_rate": "4.9998e-05", "loss": 0.1688, "slid_loss": 0.1922, "step": 17, "time": 48.41 }, { "epoch": 0.02, "learning_rate": "4.9997e-05", "loss": 0.1591, "slid_loss": 0.1904, "step": 18, "time": 50.35 }, { "epoch": 0.02, "learning_rate": "4.9996e-05", "loss": 0.16, "slid_loss": 0.1888, "step": 19, "time": 50.23 }, { "epoch": 0.02, "learning_rate": "4.9995e-05", "loss": 0.1569, "slid_loss": 0.1872, "step": 20, "time": 50.83 }, { "epoch": 0.02, "learning_rate": "4.9994e-05", "loss": 0.1618, "slid_loss": 0.186, "step": 21, "time": 52.65 }, { "epoch": 0.02, "learning_rate": "4.9993e-05", "loss": 0.1533, "slid_loss": 0.1845, "step": 22, "time": 49.45 }, { "epoch": 0.02, "learning_rate": "4.9991e-05", "loss": 0.1526, "slid_loss": 0.1831, "step": 23, "time": 49.9 }, { "epoch": 0.02, "learning_rate": "4.9990e-05", "loss": 0.1434, "slid_loss": 0.1814, "step": 24, "time": 49.17 }, { "epoch": 0.02, "learning_rate": "4.9988e-05", "loss": 0.1542, "slid_loss": 0.1803, "step": 25, "time": 49.77 }, { "epoch": 0.02, "learning_rate": "4.9986e-05", "loss": 0.1513, "slid_loss": 0.1792, "step": 26, "time": 50.4 }, { "epoch": 0.02, "learning_rate": "4.9984e-05", "loss": 0.1505, "slid_loss": 0.1782, "step": 27, "time": 51.29 }, { "epoch": 0.02, "learning_rate": "4.9982e-05", "loss": 0.1455, "slid_loss": 0.177, "step": 28, "time": 50.09 }, { "epoch": 0.02, "learning_rate": "4.9979e-05", "loss": 0.1421, "slid_loss": 0.1758, "step": 29, "time": 49.82 }, { "epoch": 0.03, "learning_rate": "4.9977e-05", "loss": 0.1426, "slid_loss": 0.1747, "step": 30, "time": 50.34 }, { "epoch": 0.03, "learning_rate": "4.9974e-05", "loss": 0.1473, "slid_loss": 0.1738, "step": 31, "time": 50.35 }, { "epoch": 0.03, "learning_rate": "4.9971e-05", "loss": 0.1445, "slid_loss": 0.1729, "step": 32, "time": 49.16 }, { "epoch": 0.03, "learning_rate": "4.9968e-05", "loss": 0.1394, "slid_loss": 0.1719, "step": 33, "time": 50.26 }, { "epoch": 0.03, "learning_rate": "4.9965e-05", "loss": 0.1384, "slid_loss": 0.1709, "step": 34, "time": 49.85 }, { "epoch": 0.03, "learning_rate": "4.9962e-05", "loss": 0.1364, "slid_loss": 0.1699, "step": 35, "time": 49.4 }, { "epoch": 0.03, "learning_rate": "4.9959e-05", "loss": 0.1502, "slid_loss": 0.1694, "step": 36, "time": 50.33 }, { "epoch": 0.03, "learning_rate": "4.9955e-05", "loss": 0.1404, "slid_loss": 0.1686, "step": 37, "time": 49.8 }, { "epoch": 0.03, "learning_rate": "4.9952e-05", "loss": 0.1316, "slid_loss": 0.1676, "step": 38, "time": 51.2 }, { "epoch": 0.03, "learning_rate": "4.9948e-05", "loss": 0.1428, "slid_loss": 0.167, "step": 39, "time": 51.34 }, { "epoch": 0.03, "learning_rate": "4.9944e-05", "loss": 0.1493, "slid_loss": 0.1665, "step": 40, "time": 50.37 }, { "epoch": 0.03, "learning_rate": "4.9940e-05", "loss": 0.1366, "slid_loss": 0.1658, "step": 41, "time": 50.4 }, { "epoch": 0.04, "learning_rate": "4.9936e-05", "loss": 0.1434, "slid_loss": 0.1653, "step": 42, "time": 51.24 }, { "epoch": 0.04, "learning_rate": "4.9931e-05", "loss": 0.1316, "slid_loss": 0.1645, "step": 43, "time": 49.58 }, { "epoch": 0.04, "learning_rate": "4.9927e-05", "loss": 0.1322, "slid_loss": 0.1637, "step": 44, "time": 49.87 }, { "epoch": 0.04, "learning_rate": "4.9922e-05", "loss": 0.1338, "slid_loss": 0.1631, "step": 45, "time": 50.87 }, { "epoch": 0.04, "learning_rate": "4.9917e-05", "loss": 0.1334, "slid_loss": 0.1624, "step": 46, "time": 49.59 }, { "epoch": 0.04, "learning_rate": "4.9912e-05", "loss": 0.1355, "slid_loss": 0.1619, "step": 47, "time": 49.54 }, { "epoch": 0.04, "learning_rate": "4.9907e-05", "loss": 0.1302, "slid_loss": 0.1612, "step": 48, "time": 48.96 }, { "epoch": 0.04, "learning_rate": "4.9902e-05", "loss": 0.1342, "slid_loss": 0.1607, "step": 49, "time": 49.08 }, { "epoch": 0.04, "learning_rate": "4.9897e-05", "loss": 0.139, "slid_loss": 0.1602, "step": 50, "time": 49.88 }, { "epoch": 0.04, "learning_rate": "4.9891e-05", "loss": 0.1259, "slid_loss": 0.1595, "step": 51, "time": 51.06 }, { "epoch": 0.04, "learning_rate": "4.9886e-05", "loss": 0.1327, "slid_loss": 0.159, "step": 52, "time": 50.25 }, { "epoch": 0.04, "learning_rate": "4.9880e-05", "loss": 0.1431, "slid_loss": 0.1587, "step": 53, "time": 49.78 }, { "epoch": 0.05, "learning_rate": "4.9874e-05", "loss": 0.1359, "slid_loss": 0.1583, "step": 54, "time": 50.38 }, { "epoch": 0.05, "learning_rate": "4.9868e-05", "loss": 0.129, "slid_loss": 0.1578, "step": 55, "time": 49.66 }, { "epoch": 0.05, "learning_rate": "4.9862e-05", "loss": 0.1361, "slid_loss": 0.1574, "step": 56, "time": 50.36 }, { "epoch": 0.05, "learning_rate": "4.9855e-05", "loss": 0.132, "slid_loss": 0.1569, "step": 57, "time": 51.04 }, { "epoch": 0.05, "learning_rate": "4.9849e-05", "loss": 0.1334, "slid_loss": 0.1565, "step": 58, "time": 51.49 }, { "epoch": 0.05, "learning_rate": "4.9842e-05", "loss": 0.1316, "slid_loss": 0.1561, "step": 59, "time": 48.73 }, { "epoch": 0.05, "learning_rate": "4.9835e-05", "loss": 0.1312, "slid_loss": 0.1557, "step": 60, "time": 51.57 }, { "epoch": 0.05, "learning_rate": "4.9828e-05", "loss": 0.1414, "slid_loss": 0.1555, "step": 61, "time": 50.53 }, { "epoch": 0.05, "learning_rate": "4.9821e-05", "loss": 0.127, "slid_loss": 0.155, "step": 62, "time": 49.96 }, { "epoch": 0.05, "learning_rate": "4.9814e-05", "loss": 0.1217, "slid_loss": 0.1545, "step": 63, "time": 51.1 }, { "epoch": 0.05, "learning_rate": "4.9807e-05", "loss": 0.1316, "slid_loss": 0.1541, "step": 64, "time": 49.48 }, { "epoch": 0.05, "learning_rate": "4.9799e-05", "loss": 0.1346, "slid_loss": 0.1538, "step": 65, "time": 49.18 }, { "epoch": 0.06, "learning_rate": "4.9792e-05", "loss": 0.1301, "slid_loss": 0.1535, "step": 66, "time": 51.31 }, { "epoch": 0.06, "learning_rate": "4.9784e-05", "loss": 0.1379, "slid_loss": 0.1532, "step": 67, "time": 51.16 }, { "epoch": 0.06, "learning_rate": "4.9776e-05", "loss": 0.1301, "slid_loss": 0.1529, "step": 68, "time": 50.42 }, { "epoch": 0.06, "learning_rate": "4.9768e-05", "loss": 0.1387, "slid_loss": 0.1527, "step": 69, "time": 53.07 }, { "epoch": 0.06, "learning_rate": "4.9760e-05", "loss": 0.1253, "slid_loss": 0.1523, "step": 70, "time": 52.88 }, { "epoch": 0.06, "learning_rate": "4.9751e-05", "loss": 0.1288, "slid_loss": 0.152, "step": 71, "time": 52.12 }, { "epoch": 0.06, "learning_rate": "4.9743e-05", "loss": 0.1313, "slid_loss": 0.1517, "step": 72, "time": 52.58 }, { "epoch": 0.06, "learning_rate": "4.9734e-05", "loss": 0.138, "slid_loss": 0.1515, "step": 73, "time": 51.67 }, { "epoch": 0.06, "learning_rate": "4.9725e-05", "loss": 0.127, "slid_loss": 0.1512, "step": 74, "time": 52.98 }, { "epoch": 0.06, "learning_rate": "4.9716e-05", "loss": 0.1301, "slid_loss": 0.1509, "step": 75, "time": 50.21 }, { "epoch": 0.06, "learning_rate": "4.9707e-05", "loss": 0.1301, "slid_loss": 0.1506, "step": 76, "time": 50.7 }, { "epoch": 0.06, "learning_rate": "4.9698e-05", "loss": 0.1312, "slid_loss": 0.1503, "step": 77, "time": 50.1 }, { "epoch": 0.07, "learning_rate": "4.9689e-05", "loss": 0.1243, "slid_loss": 0.15, "step": 78, "time": 50.26 }, { "epoch": 0.07, "learning_rate": "4.9679e-05", "loss": 0.1336, "slid_loss": 0.1498, "step": 79, "time": 50.27 }, { "epoch": 0.07, "learning_rate": "4.9670e-05", "loss": 0.1288, "slid_loss": 0.1495, "step": 80, "time": 48.68 }, { "epoch": 0.07, "learning_rate": "4.9660e-05", "loss": 0.1327, "slid_loss": 0.1493, "step": 81, "time": 50.4 }, { "epoch": 0.07, "learning_rate": "4.9650e-05", "loss": 0.1281, "slid_loss": 0.1491, "step": 82, "time": 49.98 }, { "epoch": 0.07, "learning_rate": "4.9640e-05", "loss": 0.1127, "slid_loss": 0.1486, "step": 83, "time": 49.61 }, { "epoch": 0.07, "learning_rate": "4.9630e-05", "loss": 0.1264, "slid_loss": 0.1484, "step": 84, "time": 49.91 }, { "epoch": 0.07, "learning_rate": "4.9620e-05", "loss": 0.135, "slid_loss": 0.1482, "step": 85, "time": 50.92 }, { "epoch": 0.07, "learning_rate": "4.9609e-05", "loss": 0.131, "slid_loss": 0.148, "step": 86, "time": 50.52 }, { "epoch": 0.07, "learning_rate": "4.9599e-05", "loss": 0.1303, "slid_loss": 0.1478, "step": 87, "time": 50.71 }, { "epoch": 0.07, "learning_rate": "4.9588e-05", "loss": 0.1226, "slid_loss": 0.1475, "step": 88, "time": 50.43 }, { "epoch": 0.07, "learning_rate": "4.9577e-05", "loss": 0.1281, "slid_loss": 0.1473, "step": 89, "time": 49.89 }, { "epoch": 0.08, "learning_rate": "4.9566e-05", "loss": 0.1265, "slid_loss": 0.1471, "step": 90, "time": 50.54 }, { "epoch": 0.08, "learning_rate": "4.9555e-05", "loss": 0.1297, "slid_loss": 0.1469, "step": 91, "time": 50.48 }, { "epoch": 0.08, "learning_rate": "4.9543e-05", "loss": 0.1225, "slid_loss": 0.1466, "step": 92, "time": 50.87 }, { "epoch": 0.08, "learning_rate": "4.9532e-05", "loss": 0.1336, "slid_loss": 0.1465, "step": 93, "time": 49.78 }, { "epoch": 0.08, "learning_rate": "4.9520e-05", "loss": 0.1184, "slid_loss": 0.1462, "step": 94, "time": 50.34 }, { "epoch": 0.08, "learning_rate": "4.9509e-05", "loss": 0.1279, "slid_loss": 0.146, "step": 95, "time": 50.36 }, { "epoch": 0.08, "learning_rate": "4.9497e-05", "loss": 0.1218, "slid_loss": 0.1457, "step": 96, "time": 50.76 }, { "epoch": 0.08, "learning_rate": "4.9485e-05", "loss": 0.1242, "slid_loss": 0.1455, "step": 97, "time": 50.23 }, { "epoch": 0.08, "learning_rate": "4.9473e-05", "loss": 0.1258, "slid_loss": 0.1453, "step": 98, "time": 50.93 }, { "epoch": 0.08, "learning_rate": "4.9460e-05", "loss": 0.1295, "slid_loss": 0.1452, "step": 99, "time": 50.54 }, { "epoch": 0.08, "learning_rate": "4.9448e-05", "loss": 0.126, "slid_loss": 0.145, "step": 100, "time": 49.25 }, { "epoch": 0.09, "learning_rate": "4.9435e-05", "loss": 0.1182, "slid_loss": 0.1438, "step": 101, "time": 49.93 }, { "epoch": 0.09, "learning_rate": "4.9423e-05", "loss": 0.13, "slid_loss": 0.1429, "step": 102, "time": 50.46 }, { "epoch": 0.09, "learning_rate": "4.9410e-05", "loss": 0.1263, "slid_loss": 0.1421, "step": 103, "time": 51.24 }, { "epoch": 0.09, "learning_rate": "4.9397e-05", "loss": 0.1242, "slid_loss": 0.1412, "step": 104, "time": 49.8 }, { "epoch": 0.09, "learning_rate": "4.9384e-05", "loss": 0.1289, "slid_loss": 0.1403, "step": 105, "time": 50.47 }, { "epoch": 0.09, "learning_rate": "4.9371e-05", "loss": 0.1259, "slid_loss": 0.1395, "step": 106, "time": 51.11 }, { "epoch": 0.09, "learning_rate": "4.9357e-05", "loss": 0.128, "slid_loss": 0.1388, "step": 107, "time": 49.39 }, { "epoch": 0.09, "learning_rate": "4.9344e-05", "loss": 0.1245, "slid_loss": 0.1382, "step": 108, "time": 50.88 }, { "epoch": 0.09, "learning_rate": "4.9330e-05", "loss": 0.1224, "slid_loss": 0.1376, "step": 109, "time": 50.42 }, { "epoch": 0.09, "learning_rate": "4.9316e-05", "loss": 0.1296, "slid_loss": 0.1371, "step": 110, "time": 50.49 }, { "epoch": 0.09, "learning_rate": "4.9302e-05", "loss": 0.1262, "slid_loss": 0.1366, "step": 111, "time": 50.89 }, { "epoch": 0.09, "learning_rate": "4.9288e-05", "loss": 0.1222, "slid_loss": 0.1358, "step": 112, "time": 50.04 }, { "epoch": 0.1, "learning_rate": "4.9274e-05", "loss": 0.122, "slid_loss": 0.1353, "step": 113, "time": 50.12 }, { "epoch": 0.1, "learning_rate": "4.9260e-05", "loss": 0.1237, "slid_loss": 0.1348, "step": 114, "time": 49.1 }, { "epoch": 0.1, "learning_rate": "4.9245e-05", "loss": 0.1329, "slid_loss": 0.1345, "step": 115, "time": 51.21 }, { "epoch": 0.1, "learning_rate": "4.9230e-05", "loss": 0.1191, "slid_loss": 0.134, "step": 116, "time": 50.8 }, { "epoch": 0.1, "learning_rate": "4.9216e-05", "loss": 0.1247, "slid_loss": 0.1336, "step": 117, "time": 50.53 }, { "epoch": 0.1, "learning_rate": "4.9201e-05", "loss": 0.1238, "slid_loss": 0.1332, "step": 118, "time": 49.55 }, { "epoch": 0.1, "learning_rate": "4.9186e-05", "loss": 0.1215, "slid_loss": 0.1328, "step": 119, "time": 51.06 }, { "epoch": 0.1, "learning_rate": "4.9171e-05", "loss": 0.1218, "slid_loss": 0.1325, "step": 120, "time": 49.46 }, { "epoch": 0.1, "learning_rate": "4.9155e-05", "loss": 0.1209, "slid_loss": 0.1321, "step": 121, "time": 49.18 }, { "epoch": 0.1, "learning_rate": "4.9140e-05", "loss": 0.1238, "slid_loss": 0.1318, "step": 122, "time": 50.97 }, { "epoch": 0.1, "learning_rate": "4.9124e-05", "loss": 0.1235, "slid_loss": 0.1315, "step": 123, "time": 50.14 }, { "epoch": 0.1, "learning_rate": "4.9108e-05", "loss": 0.1274, "slid_loss": 0.1313, "step": 124, "time": 50.83 }, { "epoch": 0.11, "learning_rate": "4.9093e-05", "loss": 0.1163, "slid_loss": 0.131, "step": 125, "time": 49.39 }, { "epoch": 0.11, "learning_rate": "4.9077e-05", "loss": 0.1164, "slid_loss": 0.1306, "step": 126, "time": 51.38 }, { "epoch": 0.11, "learning_rate": "4.9060e-05", "loss": 0.1217, "slid_loss": 0.1303, "step": 127, "time": 50.15 }, { "epoch": 0.11, "learning_rate": "4.9044e-05", "loss": 0.12, "slid_loss": 0.1301, "step": 128, "time": 50.21 }, { "epoch": 0.11, "learning_rate": "4.9028e-05", "loss": 0.1107, "slid_loss": 0.1297, "step": 129, "time": 49.67 }, { "epoch": 0.11, "learning_rate": "4.9011e-05", "loss": 0.1298, "slid_loss": 0.1296, "step": 130, "time": 49.08 }, { "epoch": 0.11, "learning_rate": "4.8994e-05", "loss": 0.123, "slid_loss": 0.1294, "step": 131, "time": 49.75 }, { "epoch": 0.11, "learning_rate": "4.8978e-05", "loss": 0.1248, "slid_loss": 0.1292, "step": 132, "time": 50.3 }, { "epoch": 0.11, "learning_rate": "4.8961e-05", "loss": 0.1213, "slid_loss": 0.129, "step": 133, "time": 49.66 }, { "epoch": 0.11, "learning_rate": "4.8944e-05", "loss": 0.124, "slid_loss": 0.1289, "step": 134, "time": 50.02 }, { "epoch": 0.11, "learning_rate": "4.8926e-05", "loss": 0.1199, "slid_loss": 0.1287, "step": 135, "time": 50.48 }, { "epoch": 0.11, "learning_rate": "4.8909e-05", "loss": 0.1321, "slid_loss": 0.1285, "step": 136, "time": 50.4 }, { "epoch": 0.12, "learning_rate": "4.8892e-05", "loss": 0.1217, "slid_loss": 0.1283, "step": 137, "time": 51.55 }, { "epoch": 0.12, "learning_rate": "4.8874e-05", "loss": 0.1181, "slid_loss": 0.1282, "step": 138, "time": 49.5 }, { "epoch": 0.12, "learning_rate": "4.8856e-05", "loss": 0.1193, "slid_loss": 0.128, "step": 139, "time": 51.01 }, { "epoch": 0.12, "learning_rate": "4.8838e-05", "loss": 0.1174, "slid_loss": 0.1276, "step": 140, "time": 52.77 }, { "epoch": 0.12, "learning_rate": "4.8820e-05", "loss": 0.1215, "slid_loss": 0.1275, "step": 141, "time": 52.37 }, { "epoch": 0.12, "learning_rate": "4.8802e-05", "loss": 0.1185, "slid_loss": 0.1272, "step": 142, "time": 50.64 }, { "epoch": 0.12, "learning_rate": "4.8784e-05", "loss": 0.1168, "slid_loss": 0.1271, "step": 143, "time": 52.15 }, { "epoch": 0.12, "learning_rate": "4.8765e-05", "loss": 0.1189, "slid_loss": 0.127, "step": 144, "time": 51.23 }, { "epoch": 0.12, "learning_rate": "4.8747e-05", "loss": 0.1195, "slid_loss": 0.1268, "step": 145, "time": 51.95 }, { "epoch": 0.12, "learning_rate": "4.8728e-05", "loss": 0.1251, "slid_loss": 0.1267, "step": 146, "time": 50.27 }, { "epoch": 0.12, "learning_rate": "4.8709e-05", "loss": 0.131, "slid_loss": 0.1267, "step": 147, "time": 51.18 }, { "epoch": 0.12, "learning_rate": "4.8690e-05", "loss": 0.1173, "slid_loss": 0.1266, "step": 148, "time": 51.61 }, { "epoch": 0.13, "learning_rate": "4.8671e-05", "loss": 0.1189, "slid_loss": 0.1264, "step": 149, "time": 49.69 }, { "epoch": 0.13, "learning_rate": "4.8652e-05", "loss": 0.1204, "slid_loss": 0.1262, "step": 150, "time": 50.31 }, { "epoch": 0.13, "learning_rate": "4.8632e-05", "loss": 0.1137, "slid_loss": 0.1261, "step": 151, "time": 49.5 }, { "epoch": 0.13, "learning_rate": "4.8613e-05", "loss": 0.1194, "slid_loss": 0.126, "step": 152, "time": 50.56 }, { "epoch": 0.13, "learning_rate": "4.8593e-05", "loss": 0.1268, "slid_loss": 0.1258, "step": 153, "time": 48.37 }, { "epoch": 0.13, "learning_rate": "4.8573e-05", "loss": 0.1231, "slid_loss": 0.1257, "step": 154, "time": 50.35 }, { "epoch": 0.13, "learning_rate": "4.8553e-05", "loss": 0.1239, "slid_loss": 0.1256, "step": 155, "time": 50.18 }, { "epoch": 0.13, "learning_rate": "4.8533e-05", "loss": 0.1268, "slid_loss": 0.1255, "step": 156, "time": 51.29 }, { "epoch": 0.13, "learning_rate": "4.8513e-05", "loss": 0.1265, "slid_loss": 0.1255, "step": 157, "time": 50.82 }, { "epoch": 0.13, "learning_rate": "4.8493e-05", "loss": 0.1155, "slid_loss": 0.1253, "step": 158, "time": 50.56 }, { "epoch": 0.13, "learning_rate": "4.8472e-05", "loss": 0.1164, "slid_loss": 0.1251, "step": 159, "time": 49.91 }, { "epoch": 0.13, "learning_rate": "4.8452e-05", "loss": 0.1209, "slid_loss": 0.125, "step": 160, "time": 50.8 }, { "epoch": 0.14, "learning_rate": "4.8431e-05", "loss": 0.1202, "slid_loss": 0.1248, "step": 161, "time": 49.74 }, { "epoch": 0.14, "learning_rate": "4.8410e-05", "loss": 0.1171, "slid_loss": 0.1247, "step": 162, "time": 50.48 }, { "epoch": 0.14, "learning_rate": "4.8389e-05", "loss": 0.1214, "slid_loss": 0.1247, "step": 163, "time": 50.5 }, { "epoch": 0.14, "learning_rate": "4.8368e-05", "loss": 0.1228, "slid_loss": 0.1246, "step": 164, "time": 50.49 }, { "epoch": 0.14, "learning_rate": "4.8347e-05", "loss": 0.1276, "slid_loss": 0.1246, "step": 165, "time": 50.61 }, { "epoch": 0.14, "learning_rate": "4.8326e-05", "loss": 0.1202, "slid_loss": 0.1245, "step": 166, "time": 50.21 }, { "epoch": 0.14, "learning_rate": "4.8304e-05", "loss": 0.1152, "slid_loss": 0.1242, "step": 167, "time": 49.36 }, { "epoch": 0.14, "learning_rate": "4.8283e-05", "loss": 0.1169, "slid_loss": 0.1241, "step": 168, "time": 50.49 }, { "epoch": 0.14, "learning_rate": "4.8261e-05", "loss": 0.1181, "slid_loss": 0.1239, "step": 169, "time": 49.69 }, { "epoch": 0.14, "learning_rate": "4.8239e-05", "loss": 0.1219, "slid_loss": 0.1239, "step": 170, "time": 50.78 }, { "epoch": 0.14, "learning_rate": "4.8217e-05", "loss": 0.1169, "slid_loss": 0.1237, "step": 171, "time": 49.56 }, { "epoch": 0.14, "learning_rate": "4.8195e-05", "loss": 0.1215, "slid_loss": 0.1236, "step": 172, "time": 50.45 }, { "epoch": 0.15, "learning_rate": "4.8172e-05", "loss": 0.1145, "slid_loss": 0.1234, "step": 173, "time": 49.6 }, { "epoch": 0.15, "learning_rate": "4.8150e-05", "loss": 0.115, "slid_loss": 0.1233, "step": 174, "time": 50.01 }, { "epoch": 0.15, "learning_rate": "4.8127e-05", "loss": 0.1204, "slid_loss": 0.1232, "step": 175, "time": 49.86 }, { "epoch": 0.15, "learning_rate": "4.8105e-05", "loss": 0.1145, "slid_loss": 0.123, "step": 176, "time": 50.41 }, { "epoch": 0.15, "learning_rate": "4.8082e-05", "loss": 0.1181, "slid_loss": 0.1229, "step": 177, "time": 50.73 }, { "epoch": 0.15, "learning_rate": "4.8059e-05", "loss": 0.1148, "slid_loss": 0.1228, "step": 178, "time": 50.6 }, { "epoch": 0.15, "learning_rate": "4.8036e-05", "loss": 0.1104, "slid_loss": 0.1226, "step": 179, "time": 50.35 }, { "epoch": 0.15, "learning_rate": "4.8013e-05", "loss": 0.114, "slid_loss": 0.1224, "step": 180, "time": 50.3 }, { "epoch": 0.15, "learning_rate": "4.7989e-05", "loss": 0.1162, "slid_loss": 0.1223, "step": 181, "time": 49.87 }, { "epoch": 0.15, "learning_rate": "4.7966e-05", "loss": 0.1095, "slid_loss": 0.1221, "step": 182, "time": 50.08 }, { "epoch": 0.15, "learning_rate": "4.7942e-05", "loss": 0.1239, "slid_loss": 0.1222, "step": 183, "time": 50.1 }, { "epoch": 0.16, "learning_rate": "4.7919e-05", "loss": 0.1214, "slid_loss": 0.1221, "step": 184, "time": 49.79 }, { "epoch": 0.16, "learning_rate": "4.7895e-05", "loss": 0.1168, "slid_loss": 0.122, "step": 185, "time": 49.23 }, { "epoch": 0.16, "learning_rate": "4.7871e-05", "loss": 0.1181, "slid_loss": 0.1218, "step": 186, "time": 50.71 }, { "epoch": 0.16, "learning_rate": "4.7847e-05", "loss": 0.121, "slid_loss": 0.1217, "step": 187, "time": 49.24 }, { "epoch": 0.16, "learning_rate": "4.7823e-05", "loss": 0.1145, "slid_loss": 0.1217, "step": 188, "time": 49.53 }, { "epoch": 0.16, "learning_rate": "4.7798e-05", "loss": 0.1134, "slid_loss": 0.1215, "step": 189, "time": 50.66 }, { "epoch": 0.16, "learning_rate": "4.7774e-05", "loss": 0.1184, "slid_loss": 0.1214, "step": 190, "time": 49.88 }, { "epoch": 0.16, "learning_rate": "4.7749e-05", "loss": 0.115, "slid_loss": 0.1213, "step": 191, "time": 49.38 }, { "epoch": 0.16, "learning_rate": "4.7724e-05", "loss": 0.1187, "slid_loss": 0.1212, "step": 192, "time": 50.85 }, { "epoch": 0.16, "learning_rate": "4.7700e-05", "loss": 0.1123, "slid_loss": 0.121, "step": 193, "time": 49.9 }, { "epoch": 0.16, "learning_rate": "4.7675e-05", "loss": 0.1252, "slid_loss": 0.1211, "step": 194, "time": 50.64 }, { "epoch": 0.16, "learning_rate": "4.7649e-05", "loss": 0.1167, "slid_loss": 0.121, "step": 195, "time": 50.57 }, { "epoch": 0.17, "learning_rate": "4.7624e-05", "loss": 0.1122, "slid_loss": 0.1209, "step": 196, "time": 48.92 }, { "epoch": 0.17, "learning_rate": "4.7599e-05", "loss": 0.1123, "slid_loss": 0.1208, "step": 197, "time": 50.21 }, { "epoch": 0.17, "learning_rate": "4.7573e-05", "loss": 0.1159, "slid_loss": 0.1207, "step": 198, "time": 50.58 }, { "epoch": 0.17, "learning_rate": "4.7548e-05", "loss": 0.1124, "slid_loss": 0.1205, "step": 199, "time": 49.72 }, { "epoch": 0.17, "learning_rate": "4.7522e-05", "loss": 0.1183, "slid_loss": 0.1204, "step": 200, "time": 48.06 }, { "epoch": 0.17, "learning_rate": "4.7496e-05", "loss": 0.1167, "slid_loss": 0.1204, "step": 201, "time": 48.79 }, { "epoch": 0.17, "learning_rate": "4.7470e-05", "loss": 0.1167, "slid_loss": 0.1203, "step": 202, "time": 51.61 }, { "epoch": 0.17, "learning_rate": "4.7444e-05", "loss": 0.1214, "slid_loss": 0.1202, "step": 203, "time": 51.11 }, { "epoch": 0.17, "learning_rate": "4.7418e-05", "loss": 0.1265, "slid_loss": 0.1203, "step": 204, "time": 49.26 }, { "epoch": 0.17, "learning_rate": "4.7391e-05", "loss": 0.1181, "slid_loss": 0.1201, "step": 205, "time": 49.29 }, { "epoch": 0.17, "learning_rate": "4.7365e-05", "loss": 0.1163, "slid_loss": 0.12, "step": 206, "time": 50.05 }, { "epoch": 0.17, "learning_rate": "4.7338e-05", "loss": 0.1151, "slid_loss": 0.1199, "step": 207, "time": 50.45 }, { "epoch": 0.18, "learning_rate": "4.7312e-05", "loss": 0.1185, "slid_loss": 0.1199, "step": 208, "time": 49.57 }, { "epoch": 0.18, "learning_rate": "4.7285e-05", "loss": 0.1166, "slid_loss": 0.1198, "step": 209, "time": 51.03 }, { "epoch": 0.18, "learning_rate": "4.7258e-05", "loss": 0.1097, "slid_loss": 0.1196, "step": 210, "time": 50.34 }, { "epoch": 0.18, "learning_rate": "4.7231e-05", "loss": 0.1225, "slid_loss": 0.1196, "step": 211, "time": 50.64 }, { "epoch": 0.18, "learning_rate": "4.7203e-05", "loss": 0.1212, "slid_loss": 0.1196, "step": 212, "time": 50.56 }, { "epoch": 0.18, "learning_rate": "4.7176e-05", "loss": 0.118, "slid_loss": 0.1195, "step": 213, "time": 50.85 }, { "epoch": 0.18, "learning_rate": "4.7149e-05", "loss": 0.1083, "slid_loss": 0.1194, "step": 214, "time": 51.55 }, { "epoch": 0.18, "learning_rate": "4.7121e-05", "loss": 0.1119, "slid_loss": 0.1192, "step": 215, "time": 51.59 }, { "epoch": 0.18, "learning_rate": "4.7093e-05", "loss": 0.1192, "slid_loss": 0.1192, "step": 216, "time": 50.36 }, { "epoch": 0.18, "learning_rate": "4.7065e-05", "loss": 0.1188, "slid_loss": 0.1191, "step": 217, "time": 51.95 }, { "epoch": 0.18, "learning_rate": "4.7037e-05", "loss": 0.1151, "slid_loss": 0.119, "step": 218, "time": 51.78 }, { "epoch": 0.18, "learning_rate": "4.7009e-05", "loss": 0.1198, "slid_loss": 0.119, "step": 219, "time": 51.2 }, { "epoch": 0.19, "learning_rate": "4.6981e-05", "loss": 0.1089, "slid_loss": 0.1189, "step": 220, "time": 49.62 }, { "epoch": 0.19, "learning_rate": "4.6953e-05", "loss": 0.1135, "slid_loss": 0.1188, "step": 221, "time": 50.2 }, { "epoch": 0.19, "learning_rate": "4.6924e-05", "loss": 0.1174, "slid_loss": 0.1187, "step": 222, "time": 50.9 }, { "epoch": 0.19, "learning_rate": "4.6896e-05", "loss": 0.1137, "slid_loss": 0.1186, "step": 223, "time": 50.58 }, { "epoch": 0.19, "learning_rate": "4.6867e-05", "loss": 0.1172, "slid_loss": 0.1185, "step": 224, "time": 51.32 }, { "epoch": 0.19, "learning_rate": "4.6838e-05", "loss": 0.1103, "slid_loss": 0.1185, "step": 225, "time": 50.4 }, { "epoch": 0.19, "learning_rate": "4.6809e-05", "loss": 0.1131, "slid_loss": 0.1184, "step": 226, "time": 51.02 }, { "epoch": 0.19, "learning_rate": "4.6780e-05", "loss": 0.1155, "slid_loss": 0.1184, "step": 227, "time": 50.68 }, { "epoch": 0.19, "learning_rate": "4.6751e-05", "loss": 0.1177, "slid_loss": 0.1183, "step": 228, "time": 50.61 }, { "epoch": 0.19, "learning_rate": "4.6722e-05", "loss": 0.1211, "slid_loss": 0.1184, "step": 229, "time": 49.81 }, { "epoch": 0.19, "learning_rate": "4.6692e-05", "loss": 0.1152, "slid_loss": 0.1183, "step": 230, "time": 50.56 }, { "epoch": 0.19, "learning_rate": "4.6663e-05", "loss": 0.115, "slid_loss": 0.1182, "step": 231, "time": 49.83 }, { "epoch": 0.2, "learning_rate": "4.6633e-05", "loss": 0.1132, "slid_loss": 0.1181, "step": 232, "time": 50.79 }, { "epoch": 0.2, "learning_rate": "4.6603e-05", "loss": 0.1113, "slid_loss": 0.118, "step": 233, "time": 49.61 }, { "epoch": 0.2, "learning_rate": "4.6573e-05", "loss": 0.1005, "slid_loss": 0.1178, "step": 234, "time": 50.63 }, { "epoch": 0.2, "learning_rate": "4.6543e-05", "loss": 0.1168, "slid_loss": 0.1177, "step": 235, "time": 50.37 }, { "epoch": 0.2, "learning_rate": "4.6513e-05", "loss": 0.1128, "slid_loss": 0.1175, "step": 236, "time": 50.37 }, { "epoch": 0.2, "learning_rate": "4.6483e-05", "loss": 0.12, "slid_loss": 0.1175, "step": 237, "time": 50.55 }, { "epoch": 0.2, "learning_rate": "4.6453e-05", "loss": 0.1089, "slid_loss": 0.1174, "step": 238, "time": 50.55 }, { "epoch": 0.2, "learning_rate": "4.6422e-05", "loss": 0.1135, "slid_loss": 0.1174, "step": 239, "time": 50.22 }, { "epoch": 0.2, "learning_rate": "4.6392e-05", "loss": 0.1069, "slid_loss": 0.1173, "step": 240, "time": 49.21 }, { "epoch": 0.2, "learning_rate": "4.6361e-05", "loss": 0.1107, "slid_loss": 0.1172, "step": 241, "time": 50.55 }, { "epoch": 0.2, "learning_rate": "4.6330e-05", "loss": 0.1085, "slid_loss": 0.1171, "step": 242, "time": 50.62 }, { "epoch": 0.2, "learning_rate": "4.6299e-05", "loss": 0.1118, "slid_loss": 0.117, "step": 243, "time": 50.09 }, { "epoch": 0.21, "learning_rate": "4.6268e-05", "loss": 0.1099, "slid_loss": 0.1169, "step": 244, "time": 49.6 }, { "epoch": 0.21, "learning_rate": "4.6237e-05", "loss": 0.1049, "slid_loss": 0.1168, "step": 245, "time": 50.88 }, { "epoch": 0.21, "learning_rate": "4.6205e-05", "loss": 0.1057, "slid_loss": 0.1166, "step": 246, "time": 50.23 }, { "epoch": 0.21, "learning_rate": "4.6174e-05", "loss": 0.1079, "slid_loss": 0.1164, "step": 247, "time": 50.44 }, { "epoch": 0.21, "learning_rate": "4.6143e-05", "loss": 0.1135, "slid_loss": 0.1163, "step": 248, "time": 49.95 }, { "epoch": 0.21, "learning_rate": "4.6111e-05", "loss": 0.1065, "slid_loss": 0.1162, "step": 249, "time": 50.62 }, { "epoch": 0.21, "learning_rate": "4.6079e-05", "loss": 0.1155, "slid_loss": 0.1161, "step": 250, "time": 50.41 }, { "epoch": 0.21, "learning_rate": "4.6047e-05", "loss": 0.114, "slid_loss": 0.1161, "step": 251, "time": 50.16 }, { "epoch": 0.21, "learning_rate": "4.6015e-05", "loss": 0.116, "slid_loss": 0.1161, "step": 252, "time": 50.14 }, { "epoch": 0.21, "learning_rate": "4.5983e-05", "loss": 0.1132, "slid_loss": 0.116, "step": 253, "time": 50.48 }, { "epoch": 0.21, "learning_rate": "4.5951e-05", "loss": 0.1124, "slid_loss": 0.1159, "step": 254, "time": 50.14 }, { "epoch": 0.21, "learning_rate": "4.5919e-05", "loss": 0.118, "slid_loss": 0.1158, "step": 255, "time": 49.58 }, { "epoch": 0.22, "learning_rate": "4.5886e-05", "loss": 0.109, "slid_loss": 0.1156, "step": 256, "time": 49.29 }, { "epoch": 0.22, "learning_rate": "4.5853e-05", "loss": 0.1036, "slid_loss": 0.1154, "step": 257, "time": 49.93 }, { "epoch": 0.22, "learning_rate": "4.5821e-05", "loss": 0.1076, "slid_loss": 0.1153, "step": 258, "time": 50.52 }, { "epoch": 0.22, "learning_rate": "4.5788e-05", "loss": 0.1132, "slid_loss": 0.1153, "step": 259, "time": 49.8 }, { "epoch": 0.22, "learning_rate": "4.5755e-05", "loss": 0.1086, "slid_loss": 0.1152, "step": 260, "time": 50.23 }, { "epoch": 0.22, "learning_rate": "4.5722e-05", "loss": 0.1109, "slid_loss": 0.1151, "step": 261, "time": 49.79 }, { "epoch": 0.22, "learning_rate": "4.5689e-05", "loss": 0.1076, "slid_loss": 0.115, "step": 262, "time": 49.62 }, { "epoch": 0.22, "learning_rate": "4.5656e-05", "loss": 0.1127, "slid_loss": 0.1149, "step": 263, "time": 49.36 }, { "epoch": 0.22, "learning_rate": "4.5622e-05", "loss": 0.1036, "slid_loss": 0.1147, "step": 264, "time": 50.07 }, { "epoch": 0.22, "learning_rate": "4.5589e-05", "loss": 0.1039, "slid_loss": 0.1145, "step": 265, "time": 49.52 }, { "epoch": 0.22, "learning_rate": "4.5555e-05", "loss": 0.107, "slid_loss": 0.1143, "step": 266, "time": 51.07 }, { "epoch": 0.22, "learning_rate": "4.5522e-05", "loss": 0.1139, "slid_loss": 0.1143, "step": 267, "time": 51.16 }, { "epoch": 0.23, "learning_rate": "4.5488e-05", "loss": 0.1132, "slid_loss": 0.1143, "step": 268, "time": 51.19 }, { "epoch": 0.23, "learning_rate": "4.5454e-05", "loss": 0.1074, "slid_loss": 0.1142, "step": 269, "time": 51.0 }, { "epoch": 0.23, "learning_rate": "4.5420e-05", "loss": 0.1115, "slid_loss": 0.1141, "step": 270, "time": 50.38 }, { "epoch": 0.23, "learning_rate": "4.5386e-05", "loss": 0.1134, "slid_loss": 0.114, "step": 271, "time": 49.94 }, { "epoch": 0.23, "learning_rate": "4.5351e-05", "loss": 0.1145, "slid_loss": 0.114, "step": 272, "time": 50.03 }, { "epoch": 0.23, "learning_rate": "4.5317e-05", "loss": 0.106, "slid_loss": 0.1139, "step": 273, "time": 50.4 }, { "epoch": 0.23, "learning_rate": "4.5283e-05", "loss": 0.115, "slid_loss": 0.1139, "step": 274, "time": 49.0 }, { "epoch": 0.23, "learning_rate": "4.5248e-05", "loss": 0.107, "slid_loss": 0.1137, "step": 275, "time": 50.08 }, { "epoch": 0.23, "learning_rate": "4.5213e-05", "loss": 0.1083, "slid_loss": 0.1137, "step": 276, "time": 49.41 }, { "epoch": 0.23, "learning_rate": "4.5179e-05", "loss": 0.0965, "slid_loss": 0.1135, "step": 277, "time": 50.21 }, { "epoch": 0.23, "learning_rate": "4.5144e-05", "loss": 0.1062, "slid_loss": 0.1134, "step": 278, "time": 50.39 }, { "epoch": 0.24, "learning_rate": "4.5109e-05", "loss": 0.1104, "slid_loss": 0.1134, "step": 279, "time": 51.17 }, { "epoch": 0.24, "learning_rate": "4.5073e-05", "loss": 0.1051, "slid_loss": 0.1133, "step": 280, "time": 50.35 }, { "epoch": 0.24, "learning_rate": "4.5038e-05", "loss": 0.1082, "slid_loss": 0.1132, "step": 281, "time": 50.55 }, { "epoch": 0.24, "learning_rate": "4.5003e-05", "loss": 0.1123, "slid_loss": 0.1132, "step": 282, "time": 49.94 }, { "epoch": 0.24, "learning_rate": "4.4967e-05", "loss": 0.1015, "slid_loss": 0.113, "step": 283, "time": 50.3 }, { "epoch": 0.24, "learning_rate": "4.4932e-05", "loss": 0.1051, "slid_loss": 0.1129, "step": 284, "time": 51.65 }, { "epoch": 0.24, "learning_rate": "4.4896e-05", "loss": 0.1026, "slid_loss": 0.1127, "step": 285, "time": 51.43 }, { "epoch": 0.24, "learning_rate": "4.4861e-05", "loss": 0.1097, "slid_loss": 0.1126, "step": 286, "time": 51.39 }, { "epoch": 0.24, "learning_rate": "4.4825e-05", "loss": 0.1005, "slid_loss": 0.1124, "step": 287, "time": 51.26 }, { "epoch": 0.24, "learning_rate": "4.4789e-05", "loss": 0.1106, "slid_loss": 0.1124, "step": 288, "time": 50.35 }, { "epoch": 0.24, "learning_rate": "4.4753e-05", "loss": 0.1137, "slid_loss": 0.1124, "step": 289, "time": 51.84 }, { "epoch": 0.24, "learning_rate": "4.4716e-05", "loss": 0.1045, "slid_loss": 0.1122, "step": 290, "time": 51.6 }, { "epoch": 0.25, "learning_rate": "4.4680e-05", "loss": 0.1097, "slid_loss": 0.1122, "step": 291, "time": 51.55 }, { "epoch": 0.25, "learning_rate": "4.4644e-05", "loss": 0.1159, "slid_loss": 0.1122, "step": 292, "time": 51.6 }, { "epoch": 0.25, "learning_rate": "4.4607e-05", "loss": 0.1041, "slid_loss": 0.1121, "step": 293, "time": 50.07 }, { "epoch": 0.25, "learning_rate": "4.4571e-05", "loss": 0.1019, "slid_loss": 0.1119, "step": 294, "time": 50.74 }, { "epoch": 0.25, "learning_rate": "4.4534e-05", "loss": 0.1098, "slid_loss": 0.1118, "step": 295, "time": 49.78 }, { "epoch": 0.25, "learning_rate": "4.4497e-05", "loss": 0.107, "slid_loss": 0.1117, "step": 296, "time": 49.94 }, { "epoch": 0.25, "learning_rate": "4.4460e-05", "loss": 0.1013, "slid_loss": 0.1116, "step": 297, "time": 49.75 }, { "epoch": 0.25, "learning_rate": "4.4423e-05", "loss": 0.1059, "slid_loss": 0.1115, "step": 298, "time": 50.45 }, { "epoch": 0.25, "learning_rate": "4.4386e-05", "loss": 0.1062, "slid_loss": 0.1115, "step": 299, "time": 48.27 }, { "epoch": 0.25, "learning_rate": "4.4349e-05", "loss": 0.102, "slid_loss": 0.1113, "step": 300, "time": 50.58 }, { "epoch": 0.25, "learning_rate": "4.4311e-05", "loss": 0.0995, "slid_loss": 0.1111, "step": 301, "time": 49.87 }, { "epoch": 0.25, "learning_rate": "4.4274e-05", "loss": 0.1063, "slid_loss": 0.111, "step": 302, "time": 50.61 }, { "epoch": 0.26, "learning_rate": "4.4236e-05", "loss": 0.1133, "slid_loss": 0.1109, "step": 303, "time": 51.01 }, { "epoch": 0.26, "learning_rate": "4.4199e-05", "loss": 0.1038, "slid_loss": 0.1107, "step": 304, "time": 49.56 }, { "epoch": 0.26, "learning_rate": "4.4161e-05", "loss": 0.1125, "slid_loss": 0.1107, "step": 305, "time": 50.73 }, { "epoch": 0.26, "learning_rate": "4.4123e-05", "loss": 0.1159, "slid_loss": 0.1107, "step": 306, "time": 49.74 }, { "epoch": 0.26, "learning_rate": "4.4085e-05", "loss": 0.1037, "slid_loss": 0.1105, "step": 307, "time": 49.21 }, { "epoch": 0.26, "learning_rate": "4.4047e-05", "loss": 0.0998, "slid_loss": 0.1104, "step": 308, "time": 50.26 }, { "epoch": 0.26, "learning_rate": "4.4009e-05", "loss": 0.104, "slid_loss": 0.1102, "step": 309, "time": 50.34 }, { "epoch": 0.26, "learning_rate": "4.3971e-05", "loss": 0.1088, "slid_loss": 0.1102, "step": 310, "time": 50.86 }, { "epoch": 0.26, "learning_rate": "4.3932e-05", "loss": 0.1088, "slid_loss": 0.1101, "step": 311, "time": 49.64 }, { "epoch": 0.26, "learning_rate": "4.3894e-05", "loss": 0.1019, "slid_loss": 0.1099, "step": 312, "time": 48.89 }, { "epoch": 0.26, "learning_rate": "4.3855e-05", "loss": 0.1073, "slid_loss": 0.1098, "step": 313, "time": 49.67 }, { "epoch": 0.26, "learning_rate": "4.3817e-05", "loss": 0.1062, "slid_loss": 0.1098, "step": 314, "time": 50.64 }, { "epoch": 0.27, "learning_rate": "4.3778e-05", "loss": 0.1021, "slid_loss": 0.1097, "step": 315, "time": 50.39 }, { "epoch": 0.27, "learning_rate": "4.3739e-05", "loss": 0.106, "slid_loss": 0.1095, "step": 316, "time": 49.99 }, { "epoch": 0.27, "learning_rate": "4.3700e-05", "loss": 0.1115, "slid_loss": 0.1095, "step": 317, "time": 50.87 }, { "epoch": 0.27, "learning_rate": "4.3661e-05", "loss": 0.104, "slid_loss": 0.1093, "step": 318, "time": 50.0 }, { "epoch": 0.27, "learning_rate": "4.3622e-05", "loss": 0.1046, "slid_loss": 0.1092, "step": 319, "time": 49.98 }, { "epoch": 0.27, "learning_rate": "4.3583e-05", "loss": 0.1035, "slid_loss": 0.1091, "step": 320, "time": 50.47 }, { "epoch": 0.27, "learning_rate": "4.3544e-05", "loss": 0.1068, "slid_loss": 0.1091, "step": 321, "time": 50.64 }, { "epoch": 0.27, "learning_rate": "4.3504e-05", "loss": 0.0982, "slid_loss": 0.1089, "step": 322, "time": 50.34 }, { "epoch": 0.27, "learning_rate": "4.3465e-05", "loss": 0.1081, "slid_loss": 0.1088, "step": 323, "time": 50.66 }, { "epoch": 0.27, "learning_rate": "4.3425e-05", "loss": 0.106, "slid_loss": 0.1087, "step": 324, "time": 50.57 }, { "epoch": 0.27, "learning_rate": "4.3385e-05", "loss": 0.1073, "slid_loss": 0.1087, "step": 325, "time": 50.56 }, { "epoch": 0.27, "learning_rate": "4.3345e-05", "loss": 0.1051, "slid_loss": 0.1086, "step": 326, "time": 51.0 }, { "epoch": 0.28, "learning_rate": "4.3306e-05", "loss": 0.1044, "slid_loss": 0.1085, "step": 327, "time": 49.22 }, { "epoch": 0.28, "learning_rate": "4.3266e-05", "loss": 0.1063, "slid_loss": 0.1084, "step": 328, "time": 50.3 }, { "epoch": 0.28, "learning_rate": "4.3225e-05", "loss": 0.1017, "slid_loss": 0.1082, "step": 329, "time": 50.34 }, { "epoch": 0.28, "learning_rate": "4.3185e-05", "loss": 0.1022, "slid_loss": 0.1081, "step": 330, "time": 49.94 }, { "epoch": 0.28, "learning_rate": "4.3145e-05", "loss": 0.1021, "slid_loss": 0.1079, "step": 331, "time": 50.27 }, { "epoch": 0.28, "learning_rate": "4.3105e-05", "loss": 0.104, "slid_loss": 0.1078, "step": 332, "time": 49.29 }, { "epoch": 0.28, "learning_rate": "4.3064e-05", "loss": 0.1103, "slid_loss": 0.1078, "step": 333, "time": 50.46 }, { "epoch": 0.28, "learning_rate": "4.3024e-05", "loss": 0.1014, "slid_loss": 0.1078, "step": 334, "time": 48.78 }, { "epoch": 0.28, "learning_rate": "4.2983e-05", "loss": 0.1034, "slid_loss": 0.1077, "step": 335, "time": 49.91 }, { "epoch": 0.28, "learning_rate": "4.2942e-05", "loss": 0.1007, "slid_loss": 0.1076, "step": 336, "time": 50.57 }, { "epoch": 0.28, "learning_rate": "4.2901e-05", "loss": 0.1039, "slid_loss": 0.1074, "step": 337, "time": 49.61 }, { "epoch": 0.28, "learning_rate": "4.2860e-05", "loss": 0.1073, "slid_loss": 0.1074, "step": 338, "time": 50.51 }, { "epoch": 0.29, "learning_rate": "4.2819e-05", "loss": 0.101, "slid_loss": 0.1073, "step": 339, "time": 50.23 }, { "epoch": 0.29, "learning_rate": "4.2778e-05", "loss": 0.1003, "slid_loss": 0.1072, "step": 340, "time": 50.66 }, { "epoch": 0.29, "learning_rate": "4.2737e-05", "loss": 0.1066, "slid_loss": 0.1072, "step": 341, "time": 50.04 }, { "epoch": 0.29, "learning_rate": "4.2696e-05", "loss": 0.1037, "slid_loss": 0.1071, "step": 342, "time": 50.47 }, { "epoch": 0.29, "learning_rate": "4.2654e-05", "loss": 0.0975, "slid_loss": 0.107, "step": 343, "time": 49.64 }, { "epoch": 0.29, "learning_rate": "4.2613e-05", "loss": 0.0989, "slid_loss": 0.1069, "step": 344, "time": 50.89 }, { "epoch": 0.29, "learning_rate": "4.2571e-05", "loss": 0.0974, "slid_loss": 0.1068, "step": 345, "time": 50.14 }, { "epoch": 0.29, "learning_rate": "4.2529e-05", "loss": 0.1077, "slid_loss": 0.1068, "step": 346, "time": 50.51 }, { "epoch": 0.29, "learning_rate": "4.2488e-05", "loss": 0.1063, "slid_loss": 0.1068, "step": 347, "time": 49.4 }, { "epoch": 0.29, "learning_rate": "4.2446e-05", "loss": 0.0983, "slid_loss": 0.1066, "step": 348, "time": 50.54 }, { "epoch": 0.29, "learning_rate": "4.2404e-05", "loss": 0.1026, "slid_loss": 0.1066, "step": 349, "time": 49.4 }, { "epoch": 0.29, "learning_rate": "4.2362e-05", "loss": 0.0993, "slid_loss": 0.1064, "step": 350, "time": 49.97 }, { "epoch": 0.3, "learning_rate": "4.2320e-05", "loss": 0.1051, "slid_loss": 0.1064, "step": 351, "time": 50.96 }, { "epoch": 0.3, "learning_rate": "4.2278e-05", "loss": 0.1005, "slid_loss": 0.1062, "step": 352, "time": 50.61 }, { "epoch": 0.3, "learning_rate": "4.2235e-05", "loss": 0.1018, "slid_loss": 0.1061, "step": 353, "time": 49.88 }, { "epoch": 0.3, "learning_rate": "4.2193e-05", "loss": 0.1073, "slid_loss": 0.106, "step": 354, "time": 51.64 }, { "epoch": 0.3, "learning_rate": "4.2150e-05", "loss": 0.1019, "slid_loss": 0.1059, "step": 355, "time": 50.61 }, { "epoch": 0.3, "learning_rate": "4.2108e-05", "loss": 0.1045, "slid_loss": 0.1058, "step": 356, "time": 50.08 }, { "epoch": 0.3, "learning_rate": "4.2065e-05", "loss": 0.1056, "slid_loss": 0.1058, "step": 357, "time": 50.61 }, { "epoch": 0.3, "learning_rate": "4.2023e-05", "loss": 0.1089, "slid_loss": 0.1059, "step": 358, "time": 50.35 }, { "epoch": 0.3, "learning_rate": "4.1980e-05", "loss": 0.1032, "slid_loss": 0.1058, "step": 359, "time": 50.75 }, { "epoch": 0.3, "learning_rate": "4.1937e-05", "loss": 0.1039, "slid_loss": 0.1057, "step": 360, "time": 49.85 }, { "epoch": 0.3, "learning_rate": "4.1894e-05", "loss": 0.1017, "slid_loss": 0.1056, "step": 361, "time": 50.9 }, { "epoch": 0.31, "learning_rate": "4.1851e-05", "loss": 0.1064, "slid_loss": 0.1056, "step": 362, "time": 50.92 }, { "epoch": 0.31, "learning_rate": "4.1808e-05", "loss": 0.0978, "slid_loss": 0.1055, "step": 363, "time": 52.77 }, { "epoch": 0.31, "learning_rate": "4.1764e-05", "loss": 0.0996, "slid_loss": 0.1054, "step": 364, "time": 50.11 }, { "epoch": 0.31, "learning_rate": "4.1721e-05", "loss": 0.104, "slid_loss": 0.1054, "step": 365, "time": 49.93 }, { "epoch": 0.31, "learning_rate": "4.1678e-05", "loss": 0.1076, "slid_loss": 0.1054, "step": 366, "time": 50.23 }, { "epoch": 0.31, "learning_rate": "4.1634e-05", "loss": 0.105, "slid_loss": 0.1053, "step": 367, "time": 51.54 }, { "epoch": 0.31, "learning_rate": "4.1591e-05", "loss": 0.1062, "slid_loss": 0.1053, "step": 368, "time": 50.06 }, { "epoch": 0.31, "learning_rate": "4.1547e-05", "loss": 0.107, "slid_loss": 0.1053, "step": 369, "time": 49.9 }, { "epoch": 0.31, "learning_rate": "4.1503e-05", "loss": 0.0985, "slid_loss": 0.1051, "step": 370, "time": 49.71 }, { "epoch": 0.31, "learning_rate": "4.1459e-05", "loss": 0.0947, "slid_loss": 0.1049, "step": 371, "time": 49.72 }, { "epoch": 0.31, "learning_rate": "4.1415e-05", "loss": 0.0964, "slid_loss": 0.1048, "step": 372, "time": 49.45 }, { "epoch": 0.31, "learning_rate": "4.1371e-05", "loss": 0.1018, "slid_loss": 0.1047, "step": 373, "time": 49.47 }, { "epoch": 0.32, "learning_rate": "4.1327e-05", "loss": 0.099, "slid_loss": 0.1046, "step": 374, "time": 50.86 }, { "epoch": 0.32, "learning_rate": "4.1283e-05", "loss": 0.097, "slid_loss": 0.1045, "step": 375, "time": 50.09 }, { "epoch": 0.32, "learning_rate": "4.1239e-05", "loss": 0.0981, "slid_loss": 0.1044, "step": 376, "time": 49.97 }, { "epoch": 0.32, "learning_rate": "4.1195e-05", "loss": 0.0952, "slid_loss": 0.1043, "step": 377, "time": 50.51 }, { "epoch": 0.32, "learning_rate": "4.1150e-05", "loss": 0.0988, "slid_loss": 0.1043, "step": 378, "time": 49.27 }, { "epoch": 0.32, "learning_rate": "4.1106e-05", "loss": 0.0996, "slid_loss": 0.1042, "step": 379, "time": 50.29 }, { "epoch": 0.32, "learning_rate": "4.1061e-05", "loss": 0.0978, "slid_loss": 0.1041, "step": 380, "time": 49.47 }, { "epoch": 0.32, "learning_rate": "4.1017e-05", "loss": 0.097, "slid_loss": 0.104, "step": 381, "time": 50.67 }, { "epoch": 0.32, "learning_rate": "4.0972e-05", "loss": 0.1013, "slid_loss": 0.1039, "step": 382, "time": 50.52 }, { "epoch": 0.32, "learning_rate": "4.0927e-05", "loss": 0.1046, "slid_loss": 0.1039, "step": 383, "time": 48.77 }, { "epoch": 0.32, "learning_rate": "4.0882e-05", "loss": 0.093, "slid_loss": 0.1038, "step": 384, "time": 50.28 }, { "epoch": 0.32, "learning_rate": "4.0837e-05", "loss": 0.0999, "slid_loss": 0.1038, "step": 385, "time": 50.01 }, { "epoch": 0.33, "learning_rate": "4.0792e-05", "loss": 0.099, "slid_loss": 0.1036, "step": 386, "time": 49.53 }, { "epoch": 0.33, "learning_rate": "4.0747e-05", "loss": 0.1015, "slid_loss": 0.1037, "step": 387, "time": 50.95 }, { "epoch": 0.33, "learning_rate": "4.0702e-05", "loss": 0.1028, "slid_loss": 0.1036, "step": 388, "time": 49.67 }, { "epoch": 0.33, "learning_rate": "4.0657e-05", "loss": 0.0986, "slid_loss": 0.1034, "step": 389, "time": 49.68 }, { "epoch": 0.33, "learning_rate": "4.0611e-05", "loss": 0.0949, "slid_loss": 0.1033, "step": 390, "time": 50.42 }, { "epoch": 0.33, "learning_rate": "4.0566e-05", "loss": 0.0923, "slid_loss": 0.1032, "step": 391, "time": 50.27 }, { "epoch": 0.33, "learning_rate": "4.0521e-05", "loss": 0.1016, "slid_loss": 0.103, "step": 392, "time": 50.08 }, { "epoch": 0.33, "learning_rate": "4.0475e-05", "loss": 0.0963, "slid_loss": 0.1029, "step": 393, "time": 50.07 }, { "epoch": 0.33, "learning_rate": "4.0429e-05", "loss": 0.0944, "slid_loss": 0.1029, "step": 394, "time": 49.83 }, { "epoch": 0.33, "learning_rate": "4.0384e-05", "loss": 0.0979, "slid_loss": 0.1027, "step": 395, "time": 50.17 }, { "epoch": 0.33, "learning_rate": "4.0338e-05", "loss": 0.099, "slid_loss": 0.1027, "step": 396, "time": 50.2 }, { "epoch": 0.33, "learning_rate": "4.0292e-05", "loss": 0.0984, "slid_loss": 0.1026, "step": 397, "time": 49.44 }, { "epoch": 0.34, "learning_rate": "4.0246e-05", "loss": 0.0948, "slid_loss": 0.1025, "step": 398, "time": 49.66 }, { "epoch": 0.34, "learning_rate": "4.0200e-05", "loss": 0.1063, "slid_loss": 0.1025, "step": 399, "time": 49.78 }, { "epoch": 0.34, "learning_rate": "4.0154e-05", "loss": 0.1012, "slid_loss": 0.1025, "step": 400, "time": 50.54 }, { "epoch": 0.34, "learning_rate": "4.0108e-05", "loss": 0.0981, "slid_loss": 0.1025, "step": 401, "time": 50.72 }, { "epoch": 0.34, "learning_rate": "4.0062e-05", "loss": 0.0999, "slid_loss": 0.1024, "step": 402, "time": 50.47 }, { "epoch": 0.34, "learning_rate": "4.0015e-05", "loss": 0.0965, "slid_loss": 0.1023, "step": 403, "time": 50.44 }, { "epoch": 0.34, "learning_rate": "3.9969e-05", "loss": 0.1013, "slid_loss": 0.1022, "step": 404, "time": 50.2 }, { "epoch": 0.34, "learning_rate": "3.9923e-05", "loss": 0.0955, "slid_loss": 0.1021, "step": 405, "time": 51.32 }, { "epoch": 0.34, "learning_rate": "3.9876e-05", "loss": 0.0942, "slid_loss": 0.1019, "step": 406, "time": 49.73 }, { "epoch": 0.34, "learning_rate": "3.9830e-05", "loss": 0.1029, "slid_loss": 0.1018, "step": 407, "time": 49.92 }, { "epoch": 0.34, "learning_rate": "3.9783e-05", "loss": 0.0941, "slid_loss": 0.1018, "step": 408, "time": 51.35 }, { "epoch": 0.34, "learning_rate": "3.9736e-05", "loss": 0.1027, "slid_loss": 0.1018, "step": 409, "time": 49.71 }, { "epoch": 0.35, "learning_rate": "3.9689e-05", "loss": 0.1054, "slid_loss": 0.1017, "step": 410, "time": 50.89 }, { "epoch": 0.35, "learning_rate": "3.9643e-05", "loss": 0.0953, "slid_loss": 0.1016, "step": 411, "time": 50.13 }, { "epoch": 0.35, "learning_rate": "3.9596e-05", "loss": 0.1036, "slid_loss": 0.1016, "step": 412, "time": 50.48 }, { "epoch": 0.35, "learning_rate": "3.9549e-05", "loss": 0.093, "slid_loss": 0.1015, "step": 413, "time": 50.25 }, { "epoch": 0.35, "learning_rate": "3.9502e-05", "loss": 0.0954, "slid_loss": 0.1014, "step": 414, "time": 49.17 }, { "epoch": 0.35, "learning_rate": "3.9454e-05", "loss": 0.0934, "slid_loss": 0.1013, "step": 415, "time": 50.6 }, { "epoch": 0.35, "learning_rate": "3.9407e-05", "loss": 0.1027, "slid_loss": 0.1013, "step": 416, "time": 50.34 }, { "epoch": 0.35, "learning_rate": "3.9360e-05", "loss": 0.0925, "slid_loss": 0.1011, "step": 417, "time": 50.11 }, { "epoch": 0.35, "learning_rate": "3.9313e-05", "loss": 0.0941, "slid_loss": 0.101, "step": 418, "time": 51.31 }, { "epoch": 0.35, "learning_rate": "3.9265e-05", "loss": 0.0947, "slid_loss": 0.1009, "step": 419, "time": 50.51 }, { "epoch": 0.35, "learning_rate": "3.9218e-05", "loss": 0.0955, "slid_loss": 0.1008, "step": 420, "time": 48.92 }, { "epoch": 0.35, "learning_rate": "3.9170e-05", "loss": 0.0979, "slid_loss": 0.1007, "step": 421, "time": 49.64 }, { "epoch": 0.36, "learning_rate": "3.9123e-05", "loss": 0.1018, "slid_loss": 0.1007, "step": 422, "time": 51.71 }, { "epoch": 0.36, "learning_rate": "3.9075e-05", "loss": 0.0956, "slid_loss": 0.1006, "step": 423, "time": 49.98 }, { "epoch": 0.36, "learning_rate": "3.9027e-05", "loss": 0.0997, "slid_loss": 0.1005, "step": 424, "time": 49.89 }, { "epoch": 0.36, "learning_rate": "3.8980e-05", "loss": 0.0993, "slid_loss": 0.1005, "step": 425, "time": 51.68 }, { "epoch": 0.36, "learning_rate": "3.8932e-05", "loss": 0.1039, "slid_loss": 0.1005, "step": 426, "time": 50.24 }, { "epoch": 0.36, "learning_rate": "3.8884e-05", "loss": 0.0937, "slid_loss": 0.1004, "step": 427, "time": 49.49 }, { "epoch": 0.36, "learning_rate": "3.8836e-05", "loss": 0.0965, "slid_loss": 0.1003, "step": 428, "time": 49.36 }, { "epoch": 0.36, "learning_rate": "3.8788e-05", "loss": 0.0952, "slid_loss": 0.1002, "step": 429, "time": 50.71 }, { "epoch": 0.36, "learning_rate": "3.8740e-05", "loss": 0.1085, "slid_loss": 0.1003, "step": 430, "time": 50.56 }, { "epoch": 0.36, "learning_rate": "3.8691e-05", "loss": 0.0952, "slid_loss": 0.1002, "step": 431, "time": 50.65 }, { "epoch": 0.36, "learning_rate": "3.8643e-05", "loss": 0.1016, "slid_loss": 0.1002, "step": 432, "time": 51.46 }, { "epoch": 0.36, "learning_rate": "3.8595e-05", "loss": 0.0996, "slid_loss": 0.1001, "step": 433, "time": 50.41 }, { "epoch": 0.37, "learning_rate": "3.8547e-05", "loss": 0.0991, "slid_loss": 0.1, "step": 434, "time": 51.21 }, { "epoch": 0.37, "learning_rate": "3.8498e-05", "loss": 0.0923, "slid_loss": 0.0999, "step": 435, "time": 50.33 }, { "epoch": 0.37, "learning_rate": "3.8450e-05", "loss": 0.0895, "slid_loss": 0.0998, "step": 436, "time": 51.46 }, { "epoch": 0.37, "learning_rate": "3.8401e-05", "loss": 0.0991, "slid_loss": 0.0998, "step": 437, "time": 50.86 }, { "epoch": 0.37, "learning_rate": "3.8353e-05", "loss": 0.0938, "slid_loss": 0.0996, "step": 438, "time": 50.37 }, { "epoch": 0.37, "learning_rate": "3.8304e-05", "loss": 0.0974, "slid_loss": 0.0996, "step": 439, "time": 50.54 }, { "epoch": 0.37, "learning_rate": "3.8255e-05", "loss": 0.0969, "slid_loss": 0.0996, "step": 440, "time": 50.32 }, { "epoch": 0.37, "learning_rate": "3.8206e-05", "loss": 0.1066, "slid_loss": 0.0995, "step": 441, "time": 49.73 }, { "epoch": 0.37, "learning_rate": "3.8158e-05", "loss": 0.0977, "slid_loss": 0.0995, "step": 442, "time": 50.84 }, { "epoch": 0.37, "learning_rate": "3.8109e-05", "loss": 0.0939, "slid_loss": 0.0995, "step": 443, "time": 49.68 }, { "epoch": 0.37, "learning_rate": "3.8060e-05", "loss": 0.0914, "slid_loss": 0.0994, "step": 444, "time": 49.77 }, { "epoch": 0.37, "learning_rate": "3.8011e-05", "loss": 0.1032, "slid_loss": 0.0994, "step": 445, "time": 49.56 }, { "epoch": 0.38, "learning_rate": "3.7962e-05", "loss": 0.1031, "slid_loss": 0.0994, "step": 446, "time": 51.07 }, { "epoch": 0.38, "learning_rate": "3.7912e-05", "loss": 0.0932, "slid_loss": 0.0993, "step": 447, "time": 49.81 }, { "epoch": 0.38, "learning_rate": "3.7863e-05", "loss": 0.0982, "slid_loss": 0.0993, "step": 448, "time": 50.42 }, { "epoch": 0.38, "learning_rate": "3.7814e-05", "loss": 0.0933, "slid_loss": 0.0992, "step": 449, "time": 49.96 }, { "epoch": 0.38, "learning_rate": "3.7765e-05", "loss": 0.0927, "slid_loss": 0.0991, "step": 450, "time": 49.63 }, { "epoch": 0.38, "learning_rate": "3.7715e-05", "loss": 0.103, "slid_loss": 0.0991, "step": 451, "time": 50.16 }, { "epoch": 0.38, "learning_rate": "3.7666e-05", "loss": 0.0926, "slid_loss": 0.099, "step": 452, "time": 49.83 }, { "epoch": 0.38, "learning_rate": "3.7617e-05", "loss": 0.0997, "slid_loss": 0.099, "step": 453, "time": 49.53 }, { "epoch": 0.38, "learning_rate": "3.7567e-05", "loss": 0.0908, "slid_loss": 0.0988, "step": 454, "time": 49.18 }, { "epoch": 0.38, "learning_rate": "3.7517e-05", "loss": 0.0971, "slid_loss": 0.0988, "step": 455, "time": 50.52 }, { "epoch": 0.38, "learning_rate": "3.7468e-05", "loss": 0.0926, "slid_loss": 0.0986, "step": 456, "time": 50.61 }, { "epoch": 0.39, "learning_rate": "3.7418e-05", "loss": 0.0929, "slid_loss": 0.0985, "step": 457, "time": 49.59 }, { "epoch": 0.39, "learning_rate": "3.7368e-05", "loss": 0.0895, "slid_loss": 0.0983, "step": 458, "time": 50.62 }, { "epoch": 0.39, "learning_rate": "3.7319e-05", "loss": 0.0988, "slid_loss": 0.0983, "step": 459, "time": 49.21 }, { "epoch": 0.39, "learning_rate": "3.7269e-05", "loss": 0.0936, "slid_loss": 0.0982, "step": 460, "time": 47.86 }, { "epoch": 0.39, "learning_rate": "3.7219e-05", "loss": 0.0932, "slid_loss": 0.0981, "step": 461, "time": 50.37 }, { "epoch": 0.39, "learning_rate": "3.7169e-05", "loss": 0.0943, "slid_loss": 0.098, "step": 462, "time": 49.78 }, { "epoch": 0.39, "learning_rate": "3.7119e-05", "loss": 0.0954, "slid_loss": 0.098, "step": 463, "time": 49.23 }, { "epoch": 0.39, "learning_rate": "3.7069e-05", "loss": 0.0944, "slid_loss": 0.0979, "step": 464, "time": 50.28 }, { "epoch": 0.39, "learning_rate": "3.7019e-05", "loss": 0.0957, "slid_loss": 0.0978, "step": 465, "time": 49.15 }, { "epoch": 0.39, "learning_rate": "3.6969e-05", "loss": 0.0978, "slid_loss": 0.0977, "step": 466, "time": 50.88 }, { "epoch": 0.39, "learning_rate": "3.6919e-05", "loss": 0.0927, "slid_loss": 0.0976, "step": 467, "time": 50.09 }, { "epoch": 0.39, "learning_rate": "3.6868e-05", "loss": 0.099, "slid_loss": 0.0975, "step": 468, "time": 50.45 }, { "epoch": 0.4, "learning_rate": "3.6818e-05", "loss": 0.0953, "slid_loss": 0.0974, "step": 469, "time": 50.38 }, { "epoch": 0.4, "learning_rate": "3.6768e-05", "loss": 0.0925, "slid_loss": 0.0973, "step": 470, "time": 49.11 }, { "epoch": 0.4, "learning_rate": "3.6717e-05", "loss": 0.0853, "slid_loss": 0.0973, "step": 471, "time": 49.94 }, { "epoch": 0.4, "learning_rate": "3.6667e-05", "loss": 0.0868, "slid_loss": 0.0972, "step": 472, "time": 49.78 }, { "epoch": 0.4, "learning_rate": "3.6616e-05", "loss": 0.0979, "slid_loss": 0.0971, "step": 473, "time": 49.83 }, { "epoch": 0.4, "learning_rate": "3.6566e-05", "loss": 0.0931, "slid_loss": 0.0971, "step": 474, "time": 50.52 }, { "epoch": 0.4, "learning_rate": "3.6515e-05", "loss": 0.0985, "slid_loss": 0.0971, "step": 475, "time": 49.85 }, { "epoch": 0.4, "learning_rate": "3.6465e-05", "loss": 0.0967, "slid_loss": 0.0971, "step": 476, "time": 49.57 }, { "epoch": 0.4, "learning_rate": "3.6414e-05", "loss": 0.0933, "slid_loss": 0.097, "step": 477, "time": 50.91 }, { "epoch": 0.4, "learning_rate": "3.6363e-05", "loss": 0.0919, "slid_loss": 0.097, "step": 478, "time": 50.5 }, { "epoch": 0.4, "learning_rate": "3.6313e-05", "loss": 0.0948, "slid_loss": 0.0969, "step": 479, "time": 49.21 }, { "epoch": 0.4, "learning_rate": "3.6262e-05", "loss": 0.0895, "slid_loss": 0.0968, "step": 480, "time": 50.52 }, { "epoch": 0.41, "learning_rate": "3.6211e-05", "loss": 0.0999, "slid_loss": 0.0969, "step": 481, "time": 49.6 }, { "epoch": 0.41, "learning_rate": "3.6160e-05", "loss": 0.0921, "slid_loss": 0.0968, "step": 482, "time": 50.17 }, { "epoch": 0.41, "learning_rate": "3.6109e-05", "loss": 0.0946, "slid_loss": 0.0967, "step": 483, "time": 50.55 }, { "epoch": 0.41, "learning_rate": "3.6058e-05", "loss": 0.0872, "slid_loss": 0.0966, "step": 484, "time": 50.47 }, { "epoch": 0.41, "learning_rate": "3.6007e-05", "loss": 0.0841, "slid_loss": 0.0965, "step": 485, "time": 49.04 }, { "epoch": 0.41, "learning_rate": "3.5956e-05", "loss": 0.0818, "slid_loss": 0.0963, "step": 486, "time": 50.11 }, { "epoch": 0.41, "learning_rate": "3.5905e-05", "loss": 0.092, "slid_loss": 0.0962, "step": 487, "time": 50.2 }, { "epoch": 0.41, "learning_rate": "3.5854e-05", "loss": 0.1016, "slid_loss": 0.0962, "step": 488, "time": 51.03 }, { "epoch": 0.41, "learning_rate": "3.5802e-05", "loss": 0.0818, "slid_loss": 0.096, "step": 489, "time": 50.43 }, { "epoch": 0.41, "learning_rate": "3.5751e-05", "loss": 0.0875, "slid_loss": 0.0959, "step": 490, "time": 50.12 }, { "epoch": 0.41, "learning_rate": "3.5700e-05", "loss": 0.0918, "slid_loss": 0.0959, "step": 491, "time": 50.16 }, { "epoch": 0.41, "learning_rate": "3.5649e-05", "loss": 0.0991, "slid_loss": 0.0959, "step": 492, "time": 50.21 }, { "epoch": 0.42, "learning_rate": "3.5597e-05", "loss": 0.1004, "slid_loss": 0.096, "step": 493, "time": 49.7 }, { "epoch": 0.42, "learning_rate": "3.5546e-05", "loss": 0.0945, "slid_loss": 0.096, "step": 494, "time": 49.21 }, { "epoch": 0.42, "learning_rate": "3.5494e-05", "loss": 0.0967, "slid_loss": 0.0959, "step": 495, "time": 50.65 }, { "epoch": 0.42, "learning_rate": "3.5443e-05", "loss": 0.099, "slid_loss": 0.0959, "step": 496, "time": 49.11 }, { "epoch": 0.42, "learning_rate": "3.5391e-05", "loss": 0.0892, "slid_loss": 0.0958, "step": 497, "time": 50.7 }, { "epoch": 0.42, "learning_rate": "3.5340e-05", "loss": 0.0896, "slid_loss": 0.0958, "step": 498, "time": 49.79 }, { "epoch": 0.42, "learning_rate": "3.5288e-05", "loss": 0.0905, "slid_loss": 0.0956, "step": 499, "time": 50.32 }, { "epoch": 0.42, "learning_rate": "3.5237e-05", "loss": 0.0978, "slid_loss": 0.0956, "step": 500, "time": 51.04 }, { "epoch": 0.42, "learning_rate": "3.5185e-05", "loss": 0.098, "slid_loss": 0.0956, "step": 501, "time": 50.18 }, { "epoch": 0.42, "learning_rate": "3.5133e-05", "loss": 0.094, "slid_loss": 0.0955, "step": 502, "time": 51.22 }, { "epoch": 0.42, "learning_rate": "3.5082e-05", "loss": 0.0924, "slid_loss": 0.0955, "step": 503, "time": 50.92 }, { "epoch": 0.42, "learning_rate": "3.5030e-05", "loss": 0.0956, "slid_loss": 0.0954, "step": 504, "time": 49.98 }, { "epoch": 0.43, "learning_rate": "3.4978e-05", "loss": 0.0919, "slid_loss": 0.0954, "step": 505, "time": 50.96 }, { "epoch": 0.43, "learning_rate": "3.4926e-05", "loss": 0.0922, "slid_loss": 0.0954, "step": 506, "time": 50.05 }, { "epoch": 0.43, "learning_rate": "3.4874e-05", "loss": 0.0878, "slid_loss": 0.0952, "step": 507, "time": 50.51 }, { "epoch": 0.43, "learning_rate": "3.4822e-05", "loss": 0.0928, "slid_loss": 0.0952, "step": 508, "time": 51.59 }, { "epoch": 0.43, "learning_rate": "3.4770e-05", "loss": 0.0835, "slid_loss": 0.095, "step": 509, "time": 50.79 }, { "epoch": 0.43, "learning_rate": "3.4718e-05", "loss": 0.0813, "slid_loss": 0.0948, "step": 510, "time": 51.13 }, { "epoch": 0.43, "learning_rate": "3.4666e-05", "loss": 0.0924, "slid_loss": 0.0948, "step": 511, "time": 50.89 }, { "epoch": 0.43, "learning_rate": "3.4614e-05", "loss": 0.0865, "slid_loss": 0.0946, "step": 512, "time": 50.52 }, { "epoch": 0.43, "learning_rate": "3.4562e-05", "loss": 0.0915, "slid_loss": 0.0946, "step": 513, "time": 49.22 }, { "epoch": 0.43, "learning_rate": "3.4510e-05", "loss": 0.088, "slid_loss": 0.0945, "step": 514, "time": 51.76 }, { "epoch": 0.43, "learning_rate": "3.4458e-05", "loss": 0.0942, "slid_loss": 0.0945, "step": 515, "time": 49.79 }, { "epoch": 0.43, "learning_rate": "3.4406e-05", "loss": 0.0802, "slid_loss": 0.0943, "step": 516, "time": 50.35 }, { "epoch": 0.44, "learning_rate": "3.4353e-05", "loss": 0.0937, "slid_loss": 0.0943, "step": 517, "time": 50.31 }, { "epoch": 0.44, "learning_rate": "3.4301e-05", "loss": 0.0881, "slid_loss": 0.0942, "step": 518, "time": 50.01 }, { "epoch": 0.44, "learning_rate": "3.4249e-05", "loss": 0.0916, "slid_loss": 0.0942, "step": 519, "time": 50.51 }, { "epoch": 0.44, "learning_rate": "3.4197e-05", "loss": 0.087, "slid_loss": 0.0941, "step": 520, "time": 50.07 }, { "epoch": 0.44, "learning_rate": "3.4144e-05", "loss": 0.0915, "slid_loss": 0.0941, "step": 521, "time": 49.6 }, { "epoch": 0.44, "learning_rate": "3.4092e-05", "loss": 0.0853, "slid_loss": 0.0939, "step": 522, "time": 50.98 }, { "epoch": 0.44, "learning_rate": "3.4039e-05", "loss": 0.098, "slid_loss": 0.0939, "step": 523, "time": 50.35 }, { "epoch": 0.44, "learning_rate": "3.3987e-05", "loss": 0.0884, "slid_loss": 0.0938, "step": 524, "time": 50.47 }, { "epoch": 0.44, "learning_rate": "3.3935e-05", "loss": 0.0898, "slid_loss": 0.0937, "step": 525, "time": 49.4 }, { "epoch": 0.44, "learning_rate": "3.3882e-05", "loss": 0.0934, "slid_loss": 0.0936, "step": 526, "time": 50.51 }, { "epoch": 0.44, "learning_rate": "3.3830e-05", "loss": 0.0887, "slid_loss": 0.0936, "step": 527, "time": 50.24 }, { "epoch": 0.44, "learning_rate": "3.3777e-05", "loss": 0.0879, "slid_loss": 0.0935, "step": 528, "time": 49.79 }, { "epoch": 0.45, "learning_rate": "3.3724e-05", "loss": 0.0888, "slid_loss": 0.0934, "step": 529, "time": 51.02 }, { "epoch": 0.45, "learning_rate": "3.3672e-05", "loss": 0.0827, "slid_loss": 0.0931, "step": 530, "time": 50.8 }, { "epoch": 0.45, "learning_rate": "3.3619e-05", "loss": 0.0936, "slid_loss": 0.0931, "step": 531, "time": 49.21 }, { "epoch": 0.45, "learning_rate": "3.3567e-05", "loss": 0.0921, "slid_loss": 0.093, "step": 532, "time": 50.17 }, { "epoch": 0.45, "learning_rate": "3.3514e-05", "loss": 0.0921, "slid_loss": 0.093, "step": 533, "time": 50.14 }, { "epoch": 0.45, "learning_rate": "3.3461e-05", "loss": 0.0908, "slid_loss": 0.0929, "step": 534, "time": 49.42 }, { "epoch": 0.45, "learning_rate": "3.3409e-05", "loss": 0.0888, "slid_loss": 0.0928, "step": 535, "time": 49.95 }, { "epoch": 0.45, "learning_rate": "3.3356e-05", "loss": 0.0804, "slid_loss": 0.0928, "step": 536, "time": 49.73 }, { "epoch": 0.45, "learning_rate": "3.3303e-05", "loss": 0.0879, "slid_loss": 0.0926, "step": 537, "time": 49.52 }, { "epoch": 0.45, "learning_rate": "3.3250e-05", "loss": 0.0935, "slid_loss": 0.0926, "step": 538, "time": 51.27 }, { "epoch": 0.45, "learning_rate": "3.3197e-05", "loss": 0.0855, "slid_loss": 0.0925, "step": 539, "time": 49.51 }, { "epoch": 0.45, "learning_rate": "3.3145e-05", "loss": 0.0945, "slid_loss": 0.0925, "step": 540, "time": 49.97 }, { "epoch": 0.46, "learning_rate": "3.3092e-05", "loss": 0.088, "slid_loss": 0.0923, "step": 541, "time": 50.55 }, { "epoch": 0.46, "learning_rate": "3.3039e-05", "loss": 0.0833, "slid_loss": 0.0922, "step": 542, "time": 50.2 }, { "epoch": 0.46, "learning_rate": "3.2986e-05", "loss": 0.088, "slid_loss": 0.0921, "step": 543, "time": 50.99 }, { "epoch": 0.46, "learning_rate": "3.2933e-05", "loss": 0.093, "slid_loss": 0.0921, "step": 544, "time": 50.34 }, { "epoch": 0.46, "learning_rate": "3.2880e-05", "loss": 0.0923, "slid_loss": 0.092, "step": 545, "time": 50.31 }, { "epoch": 0.46, "learning_rate": "3.2827e-05", "loss": 0.0845, "slid_loss": 0.0918, "step": 546, "time": 49.06 }, { "epoch": 0.46, "learning_rate": "3.2774e-05", "loss": 0.0902, "slid_loss": 0.0918, "step": 547, "time": 49.91 }, { "epoch": 0.46, "learning_rate": "3.2721e-05", "loss": 0.0919, "slid_loss": 0.0917, "step": 548, "time": 50.44 }, { "epoch": 0.46, "learning_rate": "3.2668e-05", "loss": 0.0874, "slid_loss": 0.0917, "step": 549, "time": 50.91 }, { "epoch": 0.46, "learning_rate": "3.2615e-05", "loss": 0.0937, "slid_loss": 0.0917, "step": 550, "time": 49.45 }, { "epoch": 0.46, "learning_rate": "3.2562e-05", "loss": 0.0837, "slid_loss": 0.0915, "step": 551, "time": 49.69 }, { "epoch": 0.47, "learning_rate": "3.2509e-05", "loss": 0.0947, "slid_loss": 0.0915, "step": 552, "time": 51.26 }, { "epoch": 0.47, "learning_rate": "3.2456e-05", "loss": 0.0852, "slid_loss": 0.0914, "step": 553, "time": 50.23 }, { "epoch": 0.47, "learning_rate": "3.2403e-05", "loss": 0.0913, "slid_loss": 0.0914, "step": 554, "time": 49.45 }, { "epoch": 0.47, "learning_rate": "3.2349e-05", "loss": 0.0853, "slid_loss": 0.0913, "step": 555, "time": 48.73 }, { "epoch": 0.47, "learning_rate": "3.2296e-05", "loss": 0.0867, "slid_loss": 0.0912, "step": 556, "time": 51.28 }, { "epoch": 0.47, "learning_rate": "3.2243e-05", "loss": 0.0795, "slid_loss": 0.0911, "step": 557, "time": 50.56 }, { "epoch": 0.47, "learning_rate": "3.2190e-05", "loss": 0.0921, "slid_loss": 0.0911, "step": 558, "time": 50.84 }, { "epoch": 0.47, "learning_rate": "3.2137e-05", "loss": 0.0859, "slid_loss": 0.091, "step": 559, "time": 51.36 }, { "epoch": 0.47, "learning_rate": "3.2083e-05", "loss": 0.0881, "slid_loss": 0.0909, "step": 560, "time": 51.03 }, { "epoch": 0.47, "learning_rate": "3.2030e-05", "loss": 0.0836, "slid_loss": 0.0908, "step": 561, "time": 50.4 }, { "epoch": 0.47, "learning_rate": "3.1977e-05", "loss": 0.0796, "slid_loss": 0.0907, "step": 562, "time": 49.27 }, { "epoch": 0.47, "learning_rate": "3.1924e-05", "loss": 0.084, "slid_loss": 0.0906, "step": 563, "time": 50.09 }, { "epoch": 0.48, "learning_rate": "3.1870e-05", "loss": 0.0798, "slid_loss": 0.0904, "step": 564, "time": 49.7 }, { "epoch": 0.48, "learning_rate": "3.1817e-05", "loss": 0.0912, "slid_loss": 0.0904, "step": 565, "time": 50.56 }, { "epoch": 0.48, "learning_rate": "3.1764e-05", "loss": 0.0912, "slid_loss": 0.0903, "step": 566, "time": 49.87 }, { "epoch": 0.48, "learning_rate": "3.1711e-05", "loss": 0.0903, "slid_loss": 0.0903, "step": 567, "time": 50.11 }, { "epoch": 0.48, "learning_rate": "3.1657e-05", "loss": 0.0803, "slid_loss": 0.0901, "step": 568, "time": 49.84 }, { "epoch": 0.48, "learning_rate": "3.1604e-05", "loss": 0.0858, "slid_loss": 0.09, "step": 569, "time": 49.68 }, { "epoch": 0.48, "learning_rate": "3.1551e-05", "loss": 0.0928, "slid_loss": 0.09, "step": 570, "time": 52.25 }, { "epoch": 0.48, "learning_rate": "3.1497e-05", "loss": 0.084, "slid_loss": 0.09, "step": 571, "time": 49.77 }, { "epoch": 0.48, "learning_rate": "3.1444e-05", "loss": 0.0865, "slid_loss": 0.09, "step": 572, "time": 50.52 }, { "epoch": 0.48, "learning_rate": "3.1390e-05", "loss": 0.0862, "slid_loss": 0.0899, "step": 573, "time": 50.04 }, { "epoch": 0.48, "learning_rate": "3.1337e-05", "loss": 0.0776, "slid_loss": 0.0897, "step": 574, "time": 49.82 }, { "epoch": 0.48, "learning_rate": "3.1284e-05", "loss": 0.0846, "slid_loss": 0.0896, "step": 575, "time": 49.85 }, { "epoch": 0.49, "learning_rate": "3.1230e-05", "loss": 0.094, "slid_loss": 0.0895, "step": 576, "time": 51.33 }, { "epoch": 0.49, "learning_rate": "3.1177e-05", "loss": 0.094, "slid_loss": 0.0895, "step": 577, "time": 51.52 }, { "epoch": 0.49, "learning_rate": "3.1123e-05", "loss": 0.0887, "slid_loss": 0.0895, "step": 578, "time": 52.36 }, { "epoch": 0.49, "learning_rate": "3.1070e-05", "loss": 0.0871, "slid_loss": 0.0894, "step": 579, "time": 51.16 }, { "epoch": 0.49, "learning_rate": "3.1016e-05", "loss": 0.0923, "slid_loss": 0.0895, "step": 580, "time": 51.52 }, { "epoch": 0.49, "learning_rate": "3.0963e-05", "loss": 0.0856, "slid_loss": 0.0893, "step": 581, "time": 50.76 }, { "epoch": 0.49, "learning_rate": "3.0910e-05", "loss": 0.0837, "slid_loss": 0.0892, "step": 582, "time": 50.65 }, { "epoch": 0.49, "learning_rate": "3.0856e-05", "loss": 0.0897, "slid_loss": 0.0892, "step": 583, "time": 50.92 }, { "epoch": 0.49, "learning_rate": "3.0803e-05", "loss": 0.0827, "slid_loss": 0.0891, "step": 584, "time": 50.57 }, { "epoch": 0.49, "learning_rate": "3.0749e-05", "loss": 0.0824, "slid_loss": 0.0891, "step": 585, "time": 51.06 }, { "epoch": 0.49, "learning_rate": "3.0696e-05", "loss": 0.0809, "slid_loss": 0.0891, "step": 586, "time": 51.44 }, { "epoch": 0.49, "learning_rate": "3.0642e-05", "loss": 0.0875, "slid_loss": 0.0891, "step": 587, "time": 49.4 }, { "epoch": 0.5, "learning_rate": "3.0589e-05", "loss": 0.0821, "slid_loss": 0.0889, "step": 588, "time": 50.81 }, { "epoch": 0.5, "learning_rate": "3.0535e-05", "loss": 0.0799, "slid_loss": 0.0889, "step": 589, "time": 50.21 }, { "epoch": 0.5, "learning_rate": "3.0482e-05", "loss": 0.0851, "slid_loss": 0.0888, "step": 590, "time": 50.46 }, { "epoch": 0.5, "learning_rate": "3.0428e-05", "loss": 0.081, "slid_loss": 0.0887, "step": 591, "time": 51.03 }, { "epoch": 0.5, "learning_rate": "3.0375e-05", "loss": 0.0862, "slid_loss": 0.0886, "step": 592, "time": 50.6 }, { "epoch": 0.5, "learning_rate": "3.0321e-05", "loss": 0.09, "slid_loss": 0.0885, "step": 593, "time": 50.32 }, { "epoch": 0.5, "learning_rate": "3.0268e-05", "loss": 0.0889, "slid_loss": 0.0884, "step": 594, "time": 50.44 }, { "epoch": 0.5, "learning_rate": "3.0214e-05", "loss": 0.0812, "slid_loss": 0.0883, "step": 595, "time": 50.13 }, { "epoch": 0.5, "learning_rate": "3.0161e-05", "loss": 0.0875, "slid_loss": 0.0882, "step": 596, "time": 51.01 }, { "epoch": 0.5, "learning_rate": "3.0107e-05", "loss": 0.0879, "slid_loss": 0.0882, "step": 597, "time": 49.94 }, { "epoch": 0.5, "learning_rate": "3.0054e-05", "loss": 0.0823, "slid_loss": 0.0881, "step": 598, "time": 50.44 }, { "epoch": 0.5, "learning_rate": "3.0000e-05", "loss": 0.0851, "slid_loss": 0.088, "step": 599, "time": 49.98 }, { "epoch": 0.51, "learning_rate": "2.9946e-05", "loss": 0.0878, "slid_loss": 0.0879, "step": 600, "time": 49.65 }, { "epoch": 0.51, "learning_rate": "2.9893e-05", "loss": 0.0778, "slid_loss": 0.0877, "step": 601, "time": 145.75 }, { "epoch": 0.51, "learning_rate": "2.9839e-05", "loss": 0.0902, "slid_loss": 0.0877, "step": 602, "time": 49.79 }, { "epoch": 0.51, "learning_rate": "2.9786e-05", "loss": 0.0809, "slid_loss": 0.0876, "step": 603, "time": 49.71 }, { "epoch": 0.51, "learning_rate": "2.9732e-05", "loss": 0.0795, "slid_loss": 0.0874, "step": 604, "time": 49.44 }, { "epoch": 0.51, "learning_rate": "2.9679e-05", "loss": 0.082, "slid_loss": 0.0873, "step": 605, "time": 50.19 }, { "epoch": 0.51, "learning_rate": "2.9625e-05", "loss": 0.0861, "slid_loss": 0.0873, "step": 606, "time": 49.42 }, { "epoch": 0.51, "learning_rate": "2.9572e-05", "loss": 0.0809, "slid_loss": 0.0872, "step": 607, "time": 50.81 }, { "epoch": 0.51, "learning_rate": "2.9518e-05", "loss": 0.0857, "slid_loss": 0.0871, "step": 608, "time": 50.6 }, { "epoch": 0.51, "learning_rate": "2.9465e-05", "loss": 0.0854, "slid_loss": 0.0871, "step": 609, "time": 50.93 }, { "epoch": 0.51, "learning_rate": "2.9411e-05", "loss": 0.0795, "slid_loss": 0.0871, "step": 610, "time": 49.67 }, { "epoch": 0.51, "learning_rate": "2.9358e-05", "loss": 0.0901, "slid_loss": 0.0871, "step": 611, "time": 49.77 }, { "epoch": 0.52, "learning_rate": "2.9304e-05", "loss": 0.0833, "slid_loss": 0.0871, "step": 612, "time": 50.3 }, { "epoch": 0.52, "learning_rate": "2.9251e-05", "loss": 0.0857, "slid_loss": 0.087, "step": 613, "time": 49.94 }, { "epoch": 0.52, "learning_rate": "2.9197e-05", "loss": 0.081, "slid_loss": 0.0869, "step": 614, "time": 50.62 }, { "epoch": 0.52, "learning_rate": "2.9144e-05", "loss": 0.0867, "slid_loss": 0.0869, "step": 615, "time": 50.73 }, { "epoch": 0.52, "learning_rate": "2.9090e-05", "loss": 0.0834, "slid_loss": 0.0869, "step": 616, "time": 49.48 }, { "epoch": 0.52, "learning_rate": "2.9037e-05", "loss": 0.0787, "slid_loss": 0.0867, "step": 617, "time": 50.18 }, { "epoch": 0.52, "learning_rate": "2.8984e-05", "loss": 0.08, "slid_loss": 0.0867, "step": 618, "time": 49.4 }, { "epoch": 0.52, "learning_rate": "2.8930e-05", "loss": 0.0857, "slid_loss": 0.0866, "step": 619, "time": 49.67 }, { "epoch": 0.52, "learning_rate": "2.8877e-05", "loss": 0.0838, "slid_loss": 0.0866, "step": 620, "time": 49.79 }, { "epoch": 0.52, "learning_rate": "2.8823e-05", "loss": 0.0843, "slid_loss": 0.0865, "step": 621, "time": 51.24 }, { "epoch": 0.52, "learning_rate": "2.8770e-05", "loss": 0.0788, "slid_loss": 0.0864, "step": 622, "time": 50.61 }, { "epoch": 0.52, "learning_rate": "2.8716e-05", "loss": 0.0832, "slid_loss": 0.0863, "step": 623, "time": 50.67 }, { "epoch": 0.53, "learning_rate": "2.8663e-05", "loss": 0.0808, "slid_loss": 0.0862, "step": 624, "time": 50.24 }, { "epoch": 0.53, "learning_rate": "2.8610e-05", "loss": 0.0775, "slid_loss": 0.0861, "step": 625, "time": 50.53 }, { "epoch": 0.53, "learning_rate": "2.8556e-05", "loss": 0.0756, "slid_loss": 0.0859, "step": 626, "time": 49.76 }, { "epoch": 0.53, "learning_rate": "2.8503e-05", "loss": 0.0886, "slid_loss": 0.0859, "step": 627, "time": 50.08 }, { "epoch": 0.53, "learning_rate": "2.8449e-05", "loss": 0.0861, "slid_loss": 0.0859, "step": 628, "time": 50.08 }, { "epoch": 0.53, "learning_rate": "2.8396e-05", "loss": 0.0826, "slid_loss": 0.0858, "step": 629, "time": 49.85 }, { "epoch": 0.53, "learning_rate": "2.8343e-05", "loss": 0.0809, "slid_loss": 0.0858, "step": 630, "time": 50.94 }, { "epoch": 0.53, "learning_rate": "2.8289e-05", "loss": 0.0838, "slid_loss": 0.0857, "step": 631, "time": 49.9 }, { "epoch": 0.53, "learning_rate": "2.8236e-05", "loss": 0.085, "slid_loss": 0.0856, "step": 632, "time": 50.05 }, { "epoch": 0.53, "learning_rate": "2.8183e-05", "loss": 0.0836, "slid_loss": 0.0855, "step": 633, "time": 50.03 }, { "epoch": 0.53, "learning_rate": "2.8130e-05", "loss": 0.0834, "slid_loss": 0.0855, "step": 634, "time": 49.36 }, { "epoch": 0.54, "learning_rate": "2.8076e-05", "loss": 0.0832, "slid_loss": 0.0854, "step": 635, "time": 51.01 }, { "epoch": 0.54, "learning_rate": "2.8023e-05", "loss": 0.0842, "slid_loss": 0.0855, "step": 636, "time": 49.91 }, { "epoch": 0.54, "learning_rate": "2.7970e-05", "loss": 0.0757, "slid_loss": 0.0853, "step": 637, "time": 49.72 }, { "epoch": 0.54, "learning_rate": "2.7917e-05", "loss": 0.0803, "slid_loss": 0.0852, "step": 638, "time": 51.54 }, { "epoch": 0.54, "learning_rate": "2.7863e-05", "loss": 0.0808, "slid_loss": 0.0852, "step": 639, "time": 51.19 }, { "epoch": 0.54, "learning_rate": "2.7810e-05", "loss": 0.0765, "slid_loss": 0.085, "step": 640, "time": 50.22 }, { "epoch": 0.54, "learning_rate": "2.7757e-05", "loss": 0.0847, "slid_loss": 0.0849, "step": 641, "time": 50.81 }, { "epoch": 0.54, "learning_rate": "2.7704e-05", "loss": 0.0833, "slid_loss": 0.0849, "step": 642, "time": 50.13 }, { "epoch": 0.54, "learning_rate": "2.7651e-05", "loss": 0.0754, "slid_loss": 0.0848, "step": 643, "time": 50.72 }, { "epoch": 0.54, "learning_rate": "2.7597e-05", "loss": 0.0837, "slid_loss": 0.0847, "step": 644, "time": 50.36 }, { "epoch": 0.54, "learning_rate": "2.7544e-05", "loss": 0.0866, "slid_loss": 0.0847, "step": 645, "time": 50.84 }, { "epoch": 0.54, "learning_rate": "2.7491e-05", "loss": 0.0809, "slid_loss": 0.0846, "step": 646, "time": 49.7 }, { "epoch": 0.55, "learning_rate": "2.7438e-05", "loss": 0.079, "slid_loss": 0.0845, "step": 647, "time": 49.87 }, { "epoch": 0.55, "learning_rate": "2.7385e-05", "loss": 0.0839, "slid_loss": 0.0844, "step": 648, "time": 49.94 }, { "epoch": 0.55, "learning_rate": "2.7332e-05", "loss": 0.0819, "slid_loss": 0.0844, "step": 649, "time": 50.47 }, { "epoch": 0.55, "learning_rate": "2.7279e-05", "loss": 0.0836, "slid_loss": 0.0843, "step": 650, "time": 51.3 }, { "epoch": 0.55, "learning_rate": "2.7226e-05", "loss": 0.089, "slid_loss": 0.0843, "step": 651, "time": 50.39 }, { "epoch": 0.55, "learning_rate": "2.7173e-05", "loss": 0.0855, "slid_loss": 0.0842, "step": 652, "time": 50.59 }, { "epoch": 0.55, "learning_rate": "2.7120e-05", "loss": 0.0786, "slid_loss": 0.0842, "step": 653, "time": 49.96 }, { "epoch": 0.55, "learning_rate": "2.7067e-05", "loss": 0.0871, "slid_loss": 0.0841, "step": 654, "time": 51.69 }, { "epoch": 0.55, "learning_rate": "2.7014e-05", "loss": 0.079, "slid_loss": 0.0841, "step": 655, "time": 49.12 }, { "epoch": 0.55, "learning_rate": "2.6961e-05", "loss": 0.0859, "slid_loss": 0.0841, "step": 656, "time": 50.36 }, { "epoch": 0.55, "learning_rate": "2.6908e-05", "loss": 0.0823, "slid_loss": 0.0841, "step": 657, "time": 50.74 }, { "epoch": 0.55, "learning_rate": "2.6855e-05", "loss": 0.0847, "slid_loss": 0.084, "step": 658, "time": 50.43 }, { "epoch": 0.56, "learning_rate": "2.6803e-05", "loss": 0.0761, "slid_loss": 0.0839, "step": 659, "time": 50.65 }, { "epoch": 0.56, "learning_rate": "2.6750e-05", "loss": 0.0818, "slid_loss": 0.0839, "step": 660, "time": 49.88 }, { "epoch": 0.56, "learning_rate": "2.6697e-05", "loss": 0.0787, "slid_loss": 0.0838, "step": 661, "time": 50.43 }, { "epoch": 0.56, "learning_rate": "2.6644e-05", "loss": 0.0814, "slid_loss": 0.0838, "step": 662, "time": 50.91 }, { "epoch": 0.56, "learning_rate": "2.6591e-05", "loss": 0.0856, "slid_loss": 0.0838, "step": 663, "time": 50.76 }, { "epoch": 0.56, "learning_rate": "2.6539e-05", "loss": 0.0834, "slid_loss": 0.0839, "step": 664, "time": 50.22 }, { "epoch": 0.56, "learning_rate": "2.6486e-05", "loss": 0.0813, "slid_loss": 0.0838, "step": 665, "time": 49.54 }, { "epoch": 0.56, "learning_rate": "2.6433e-05", "loss": 0.0804, "slid_loss": 0.0837, "step": 666, "time": 50.07 }, { "epoch": 0.56, "learning_rate": "2.6381e-05", "loss": 0.0791, "slid_loss": 0.0836, "step": 667, "time": 49.91 }, { "epoch": 0.56, "learning_rate": "2.6328e-05", "loss": 0.0698, "slid_loss": 0.0835, "step": 668, "time": 49.77 }, { "epoch": 0.56, "learning_rate": "2.6276e-05", "loss": 0.0839, "slid_loss": 0.0834, "step": 669, "time": 50.33 }, { "epoch": 0.56, "learning_rate": "2.6223e-05", "loss": 0.0817, "slid_loss": 0.0833, "step": 670, "time": 50.91 }, { "epoch": 0.57, "learning_rate": "2.6170e-05", "loss": 0.0843, "slid_loss": 0.0833, "step": 671, "time": 51.49 }, { "epoch": 0.57, "learning_rate": "2.6118e-05", "loss": 0.082, "slid_loss": 0.0833, "step": 672, "time": 51.16 }, { "epoch": 0.57, "learning_rate": "2.6065e-05", "loss": 0.0837, "slid_loss": 0.0833, "step": 673, "time": 50.36 }, { "epoch": 0.57, "learning_rate": "2.6013e-05", "loss": 0.083, "slid_loss": 0.0833, "step": 674, "time": 50.52 }, { "epoch": 0.57, "learning_rate": "2.5961e-05", "loss": 0.0838, "slid_loss": 0.0833, "step": 675, "time": 51.8 }, { "epoch": 0.57, "learning_rate": "2.5908e-05", "loss": 0.0803, "slid_loss": 0.0832, "step": 676, "time": 50.37 }, { "epoch": 0.57, "learning_rate": "2.5856e-05", "loss": 0.076, "slid_loss": 0.083, "step": 677, "time": 49.78 }, { "epoch": 0.57, "learning_rate": "2.5803e-05", "loss": 0.0817, "slid_loss": 0.0829, "step": 678, "time": 50.8 }, { "epoch": 0.57, "learning_rate": "2.5751e-05", "loss": 0.0831, "slid_loss": 0.0829, "step": 679, "time": 49.96 }, { "epoch": 0.57, "learning_rate": "2.5699e-05", "loss": 0.0773, "slid_loss": 0.0827, "step": 680, "time": 49.88 }, { "epoch": 0.57, "learning_rate": "2.5647e-05", "loss": 0.0818, "slid_loss": 0.0827, "step": 681, "time": 50.24 }, { "epoch": 0.57, "learning_rate": "2.5594e-05", "loss": 0.0836, "slid_loss": 0.0827, "step": 682, "time": 49.63 }, { "epoch": 0.58, "learning_rate": "2.5542e-05", "loss": 0.0785, "slid_loss": 0.0826, "step": 683, "time": 49.66 }, { "epoch": 0.58, "learning_rate": "2.5490e-05", "loss": 0.09, "slid_loss": 0.0826, "step": 684, "time": 50.71 }, { "epoch": 0.58, "learning_rate": "2.5438e-05", "loss": 0.0745, "slid_loss": 0.0826, "step": 685, "time": 49.38 }, { "epoch": 0.58, "learning_rate": "2.5386e-05", "loss": 0.0806, "slid_loss": 0.0826, "step": 686, "time": 50.25 }, { "epoch": 0.58, "learning_rate": "2.5334e-05", "loss": 0.0842, "slid_loss": 0.0825, "step": 687, "time": 50.26 }, { "epoch": 0.58, "learning_rate": "2.5282e-05", "loss": 0.0841, "slid_loss": 0.0825, "step": 688, "time": 49.91 }, { "epoch": 0.58, "learning_rate": "2.5230e-05", "loss": 0.0801, "slid_loss": 0.0826, "step": 689, "time": 49.73 }, { "epoch": 0.58, "learning_rate": "2.5178e-05", "loss": 0.0846, "slid_loss": 0.0825, "step": 690, "time": 49.86 }, { "epoch": 0.58, "learning_rate": "2.5126e-05", "loss": 0.0858, "slid_loss": 0.0826, "step": 691, "time": 50.09 }, { "epoch": 0.58, "learning_rate": "2.5074e-05", "loss": 0.0758, "slid_loss": 0.0825, "step": 692, "time": 50.28 }, { "epoch": 0.58, "learning_rate": "2.5022e-05", "loss": 0.0839, "slid_loss": 0.0824, "step": 693, "time": 49.71 }, { "epoch": 0.58, "learning_rate": "2.4970e-05", "loss": 0.0794, "slid_loss": 0.0823, "step": 694, "time": 49.94 }, { "epoch": 0.59, "learning_rate": "2.4918e-05", "loss": 0.078, "slid_loss": 0.0823, "step": 695, "time": 49.81 }, { "epoch": 0.59, "learning_rate": "2.4867e-05", "loss": 0.0788, "slid_loss": 0.0822, "step": 696, "time": 50.66 }, { "epoch": 0.59, "learning_rate": "2.4815e-05", "loss": 0.0762, "slid_loss": 0.0821, "step": 697, "time": 50.26 }, { "epoch": 0.59, "learning_rate": "2.4763e-05", "loss": 0.0832, "slid_loss": 0.0821, "step": 698, "time": 50.27 }, { "epoch": 0.59, "learning_rate": "2.4712e-05", "loss": 0.0777, "slid_loss": 0.082, "step": 699, "time": 51.04 }, { "epoch": 0.59, "learning_rate": "2.4660e-05", "loss": 0.0774, "slid_loss": 0.0819, "step": 700, "time": 50.6 }, { "epoch": 0.59, "learning_rate": "2.4609e-05", "loss": 0.0783, "slid_loss": 0.0819, "step": 701, "time": 50.55 }, { "epoch": 0.59, "learning_rate": "2.4557e-05", "loss": 0.0789, "slid_loss": 0.0818, "step": 702, "time": 50.02 }, { "epoch": 0.59, "learning_rate": "2.4506e-05", "loss": 0.0833, "slid_loss": 0.0818, "step": 703, "time": 50.41 }, { "epoch": 0.59, "learning_rate": "2.4454e-05", "loss": 0.0726, "slid_loss": 0.0818, "step": 704, "time": 50.06 }, { "epoch": 0.59, "learning_rate": "2.4403e-05", "loss": 0.0788, "slid_loss": 0.0817, "step": 705, "time": 49.42 }, { "epoch": 0.59, "learning_rate": "2.4351e-05", "loss": 0.0782, "slid_loss": 0.0817, "step": 706, "time": 50.21 }, { "epoch": 0.6, "learning_rate": "2.4300e-05", "loss": 0.0795, "slid_loss": 0.0816, "step": 707, "time": 50.91 }, { "epoch": 0.6, "learning_rate": "2.4249e-05", "loss": 0.0795, "slid_loss": 0.0816, "step": 708, "time": 50.38 }, { "epoch": 0.6, "learning_rate": "2.4198e-05", "loss": 0.0716, "slid_loss": 0.0814, "step": 709, "time": 50.42 }, { "epoch": 0.6, "learning_rate": "2.4146e-05", "loss": 0.0741, "slid_loss": 0.0814, "step": 710, "time": 50.53 }, { "epoch": 0.6, "learning_rate": "2.4095e-05", "loss": 0.0789, "slid_loss": 0.0813, "step": 711, "time": 51.59 }, { "epoch": 0.6, "learning_rate": "2.4044e-05", "loss": 0.0765, "slid_loss": 0.0812, "step": 712, "time": 50.14 }, { "epoch": 0.6, "learning_rate": "2.3993e-05", "loss": 0.0806, "slid_loss": 0.0812, "step": 713, "time": 49.77 }, { "epoch": 0.6, "learning_rate": "2.3942e-05", "loss": 0.0814, "slid_loss": 0.0812, "step": 714, "time": 50.36 }, { "epoch": 0.6, "learning_rate": "2.3891e-05", "loss": 0.082, "slid_loss": 0.0811, "step": 715, "time": 50.35 }, { "epoch": 0.6, "learning_rate": "2.3840e-05", "loss": 0.0752, "slid_loss": 0.081, "step": 716, "time": 50.83 }, { "epoch": 0.6, "learning_rate": "2.3789e-05", "loss": 0.0888, "slid_loss": 0.0811, "step": 717, "time": 51.15 }, { "epoch": 0.6, "learning_rate": "2.3738e-05", "loss": 0.0772, "slid_loss": 0.0811, "step": 718, "time": 50.42 }, { "epoch": 0.61, "learning_rate": "2.3687e-05", "loss": 0.0843, "slid_loss": 0.0811, "step": 719, "time": 49.8 }, { "epoch": 0.61, "learning_rate": "2.3637e-05", "loss": 0.0811, "slid_loss": 0.0811, "step": 720, "time": 48.98 }, { "epoch": 0.61, "learning_rate": "2.3586e-05", "loss": 0.0831, "slid_loss": 0.0811, "step": 721, "time": 51.0 }, { "epoch": 0.61, "learning_rate": "2.3535e-05", "loss": 0.0773, "slid_loss": 0.081, "step": 722, "time": 51.52 }, { "epoch": 0.61, "learning_rate": "2.3485e-05", "loss": 0.0763, "slid_loss": 0.081, "step": 723, "time": 49.4 }, { "epoch": 0.61, "learning_rate": "2.3434e-05", "loss": 0.0844, "slid_loss": 0.081, "step": 724, "time": 50.88 }, { "epoch": 0.61, "learning_rate": "2.3384e-05", "loss": 0.0747, "slid_loss": 0.081, "step": 725, "time": 49.39 }, { "epoch": 0.61, "learning_rate": "2.3333e-05", "loss": 0.0736, "slid_loss": 0.081, "step": 726, "time": 51.02 }, { "epoch": 0.61, "learning_rate": "2.3283e-05", "loss": 0.0796, "slid_loss": 0.0809, "step": 727, "time": 52.93 }, { "epoch": 0.61, "learning_rate": "2.3232e-05", "loss": 0.0787, "slid_loss": 0.0808, "step": 728, "time": 50.68 }, { "epoch": 0.61, "learning_rate": "2.3182e-05", "loss": 0.0762, "slid_loss": 0.0807, "step": 729, "time": 50.08 }, { "epoch": 0.62, "learning_rate": "2.3132e-05", "loss": 0.0886, "slid_loss": 0.0808, "step": 730, "time": 50.04 }, { "epoch": 0.62, "learning_rate": "2.3081e-05", "loss": 0.0691, "slid_loss": 0.0807, "step": 731, "time": 48.89 }, { "epoch": 0.62, "learning_rate": "2.3031e-05", "loss": 0.0823, "slid_loss": 0.0806, "step": 732, "time": 50.3 }, { "epoch": 0.62, "learning_rate": "2.2981e-05", "loss": 0.0803, "slid_loss": 0.0806, "step": 733, "time": 50.6 }, { "epoch": 0.62, "learning_rate": "2.2931e-05", "loss": 0.0807, "slid_loss": 0.0806, "step": 734, "time": 50.87 }, { "epoch": 0.62, "learning_rate": "2.2881e-05", "loss": 0.0716, "slid_loss": 0.0805, "step": 735, "time": 50.8 }, { "epoch": 0.62, "learning_rate": "2.2831e-05", "loss": 0.0781, "slid_loss": 0.0804, "step": 736, "time": 49.98 }, { "epoch": 0.62, "learning_rate": "2.2781e-05", "loss": 0.0744, "slid_loss": 0.0804, "step": 737, "time": 50.06 }, { "epoch": 0.62, "learning_rate": "2.2731e-05", "loss": 0.0692, "slid_loss": 0.0803, "step": 738, "time": 50.17 }, { "epoch": 0.62, "learning_rate": "2.2681e-05", "loss": 0.0745, "slid_loss": 0.0802, "step": 739, "time": 50.44 }, { "epoch": 0.62, "learning_rate": "2.2632e-05", "loss": 0.0751, "slid_loss": 0.0802, "step": 740, "time": 49.64 }, { "epoch": 0.62, "learning_rate": "2.2582e-05", "loss": 0.0722, "slid_loss": 0.0801, "step": 741, "time": 50.41 }, { "epoch": 0.63, "learning_rate": "2.2532e-05", "loss": 0.0821, "slid_loss": 0.0801, "step": 742, "time": 50.58 }, { "epoch": 0.63, "learning_rate": "2.2483e-05", "loss": 0.0804, "slid_loss": 0.0801, "step": 743, "time": 50.66 }, { "epoch": 0.63, "learning_rate": "2.2433e-05", "loss": 0.0752, "slid_loss": 0.08, "step": 744, "time": 48.73 }, { "epoch": 0.63, "learning_rate": "2.2383e-05", "loss": 0.0798, "slid_loss": 0.08, "step": 745, "time": 49.0 }, { "epoch": 0.63, "learning_rate": "2.2334e-05", "loss": 0.0779, "slid_loss": 0.0799, "step": 746, "time": 49.41 }, { "epoch": 0.63, "learning_rate": "2.2285e-05", "loss": 0.0809, "slid_loss": 0.08, "step": 747, "time": 49.59 }, { "epoch": 0.63, "learning_rate": "2.2235e-05", "loss": 0.0813, "slid_loss": 0.0799, "step": 748, "time": 50.63 }, { "epoch": 0.63, "learning_rate": "2.2186e-05", "loss": 0.0795, "slid_loss": 0.0799, "step": 749, "time": 50.01 }, { "epoch": 0.63, "learning_rate": "2.2137e-05", "loss": 0.0773, "slid_loss": 0.0798, "step": 750, "time": 49.16 }, { "epoch": 0.63, "learning_rate": "2.2088e-05", "loss": 0.0749, "slid_loss": 0.0797, "step": 751, "time": 51.11 }, { "epoch": 0.63, "learning_rate": "2.2038e-05", "loss": 0.0771, "slid_loss": 0.0796, "step": 752, "time": 50.43 }, { "epoch": 0.63, "learning_rate": "2.1989e-05", "loss": 0.0822, "slid_loss": 0.0796, "step": 753, "time": 50.25 }, { "epoch": 0.64, "learning_rate": "2.1940e-05", "loss": 0.0734, "slid_loss": 0.0795, "step": 754, "time": 50.33 }, { "epoch": 0.64, "learning_rate": "2.1891e-05", "loss": 0.0833, "slid_loss": 0.0796, "step": 755, "time": 50.54 }, { "epoch": 0.64, "learning_rate": "2.1842e-05", "loss": 0.0788, "slid_loss": 0.0795, "step": 756, "time": 50.75 }, { "epoch": 0.64, "learning_rate": "2.1794e-05", "loss": 0.0799, "slid_loss": 0.0795, "step": 757, "time": 49.65 }, { "epoch": 0.64, "learning_rate": "2.1745e-05", "loss": 0.0749, "slid_loss": 0.0794, "step": 758, "time": 50.76 }, { "epoch": 0.64, "learning_rate": "2.1696e-05", "loss": 0.0755, "slid_loss": 0.0794, "step": 759, "time": 50.16 }, { "epoch": 0.64, "learning_rate": "2.1647e-05", "loss": 0.0788, "slid_loss": 0.0793, "step": 760, "time": 50.51 }, { "epoch": 0.64, "learning_rate": "2.1599e-05", "loss": 0.0732, "slid_loss": 0.0793, "step": 761, "time": 50.23 }, { "epoch": 0.64, "learning_rate": "2.1550e-05", "loss": 0.0849, "slid_loss": 0.0793, "step": 762, "time": 50.65 }, { "epoch": 0.64, "learning_rate": "2.1502e-05", "loss": 0.0805, "slid_loss": 0.0793, "step": 763, "time": 50.87 }, { "epoch": 0.64, "learning_rate": "2.1453e-05", "loss": 0.0777, "slid_loss": 0.0792, "step": 764, "time": 49.3 }, { "epoch": 0.64, "learning_rate": "2.1405e-05", "loss": 0.0685, "slid_loss": 0.0791, "step": 765, "time": 50.54 }, { "epoch": 0.65, "learning_rate": "2.1357e-05", "loss": 0.0791, "slid_loss": 0.0791, "step": 766, "time": 49.54 }, { "epoch": 0.65, "learning_rate": "2.1309e-05", "loss": 0.077, "slid_loss": 0.079, "step": 767, "time": 50.39 }, { "epoch": 0.65, "learning_rate": "2.1260e-05", "loss": 0.0735, "slid_loss": 0.0791, "step": 768, "time": 50.49 }, { "epoch": 0.65, "learning_rate": "2.1212e-05", "loss": 0.0773, "slid_loss": 0.079, "step": 769, "time": 49.37 }, { "epoch": 0.65, "learning_rate": "2.1164e-05", "loss": 0.0815, "slid_loss": 0.079, "step": 770, "time": 49.73 }, { "epoch": 0.65, "learning_rate": "2.1116e-05", "loss": 0.0764, "slid_loss": 0.0789, "step": 771, "time": 51.09 }, { "epoch": 0.65, "learning_rate": "2.1068e-05", "loss": 0.0734, "slid_loss": 0.0788, "step": 772, "time": 50.1 }, { "epoch": 0.65, "learning_rate": "2.1020e-05", "loss": 0.0777, "slid_loss": 0.0788, "step": 773, "time": 49.76 }, { "epoch": 0.65, "learning_rate": "2.0973e-05", "loss": 0.0799, "slid_loss": 0.0787, "step": 774, "time": 49.91 }, { "epoch": 0.65, "learning_rate": "2.0925e-05", "loss": 0.0784, "slid_loss": 0.0787, "step": 775, "time": 49.79 }, { "epoch": 0.65, "learning_rate": "2.0877e-05", "loss": 0.0775, "slid_loss": 0.0787, "step": 776, "time": 50.41 }, { "epoch": 0.65, "learning_rate": "2.0830e-05", "loss": 0.0843, "slid_loss": 0.0787, "step": 777, "time": 50.28 }, { "epoch": 0.66, "learning_rate": "2.0782e-05", "loss": 0.0756, "slid_loss": 0.0787, "step": 778, "time": 49.88 }, { "epoch": 0.66, "learning_rate": "2.0735e-05", "loss": 0.0764, "slid_loss": 0.0786, "step": 779, "time": 50.33 }, { "epoch": 0.66, "learning_rate": "2.0687e-05", "loss": 0.0752, "slid_loss": 0.0786, "step": 780, "time": 50.96 }, { "epoch": 0.66, "learning_rate": "2.0640e-05", "loss": 0.0812, "slid_loss": 0.0786, "step": 781, "time": 49.74 }, { "epoch": 0.66, "learning_rate": "2.0593e-05", "loss": 0.0694, "slid_loss": 0.0785, "step": 782, "time": 50.58 }, { "epoch": 0.66, "learning_rate": "2.0546e-05", "loss": 0.0728, "slid_loss": 0.0784, "step": 783, "time": 49.5 }, { "epoch": 0.66, "learning_rate": "2.0498e-05", "loss": 0.0725, "slid_loss": 0.0782, "step": 784, "time": 50.23 }, { "epoch": 0.66, "learning_rate": "2.0451e-05", "loss": 0.0741, "slid_loss": 0.0782, "step": 785, "time": 50.39 }, { "epoch": 0.66, "learning_rate": "2.0404e-05", "loss": 0.078, "slid_loss": 0.0782, "step": 786, "time": 50.91 }, { "epoch": 0.66, "learning_rate": "2.0357e-05", "loss": 0.0758, "slid_loss": 0.0781, "step": 787, "time": 50.31 }, { "epoch": 0.66, "learning_rate": "2.0311e-05", "loss": 0.0787, "slid_loss": 0.0781, "step": 788, "time": 50.34 }, { "epoch": 0.66, "learning_rate": "2.0264e-05", "loss": 0.0759, "slid_loss": 0.078, "step": 789, "time": 50.44 }, { "epoch": 0.67, "learning_rate": "2.0217e-05", "loss": 0.0674, "slid_loss": 0.0778, "step": 790, "time": 49.42 }, { "epoch": 0.67, "learning_rate": "2.0170e-05", "loss": 0.0756, "slid_loss": 0.0777, "step": 791, "time": 50.43 }, { "epoch": 0.67, "learning_rate": "2.0124e-05", "loss": 0.0769, "slid_loss": 0.0777, "step": 792, "time": 50.26 }, { "epoch": 0.67, "learning_rate": "2.0077e-05", "loss": 0.075, "slid_loss": 0.0777, "step": 793, "time": 50.62 }, { "epoch": 0.67, "learning_rate": "2.0031e-05", "loss": 0.0681, "slid_loss": 0.0775, "step": 794, "time": 49.62 }, { "epoch": 0.67, "learning_rate": "1.9985e-05", "loss": 0.0748, "slid_loss": 0.0775, "step": 795, "time": 50.8 }, { "epoch": 0.67, "learning_rate": "1.9938e-05", "loss": 0.0706, "slid_loss": 0.0774, "step": 796, "time": 52.25 }, { "epoch": 0.67, "learning_rate": "1.9892e-05", "loss": 0.0738, "slid_loss": 0.0774, "step": 797, "time": 51.03 }, { "epoch": 0.67, "learning_rate": "1.9846e-05", "loss": 0.0719, "slid_loss": 0.0773, "step": 798, "time": 50.94 }, { "epoch": 0.67, "learning_rate": "1.9800e-05", "loss": 0.075, "slid_loss": 0.0773, "step": 799, "time": 51.4 }, { "epoch": 0.67, "learning_rate": "1.9754e-05", "loss": 0.0811, "slid_loss": 0.0773, "step": 800, "time": 50.78 }, { "epoch": 0.67, "learning_rate": "1.9708e-05", "loss": 0.0773, "slid_loss": 0.0773, "step": 801, "time": 50.44 }, { "epoch": 0.68, "learning_rate": "1.9662e-05", "loss": 0.0672, "slid_loss": 0.0772, "step": 802, "time": 49.79 }, { "epoch": 0.68, "learning_rate": "1.9616e-05", "loss": 0.0683, "slid_loss": 0.077, "step": 803, "time": 49.84 }, { "epoch": 0.68, "learning_rate": "1.9571e-05", "loss": 0.0755, "slid_loss": 0.0771, "step": 804, "time": 49.16 }, { "epoch": 0.68, "learning_rate": "1.9525e-05", "loss": 0.0772, "slid_loss": 0.077, "step": 805, "time": 50.15 }, { "epoch": 0.68, "learning_rate": "1.9479e-05", "loss": 0.0774, "slid_loss": 0.077, "step": 806, "time": 49.0 }, { "epoch": 0.68, "learning_rate": "1.9434e-05", "loss": 0.0808, "slid_loss": 0.077, "step": 807, "time": 50.64 }, { "epoch": 0.68, "learning_rate": "1.9389e-05", "loss": 0.0787, "slid_loss": 0.077, "step": 808, "time": 50.0 }, { "epoch": 0.68, "learning_rate": "1.9343e-05", "loss": 0.0755, "slid_loss": 0.0771, "step": 809, "time": 50.8 }, { "epoch": 0.68, "learning_rate": "1.9298e-05", "loss": 0.0776, "slid_loss": 0.0771, "step": 810, "time": 51.02 }, { "epoch": 0.68, "learning_rate": "1.9253e-05", "loss": 0.0662, "slid_loss": 0.077, "step": 811, "time": 50.55 }, { "epoch": 0.68, "learning_rate": "1.9208e-05", "loss": 0.0802, "slid_loss": 0.077, "step": 812, "time": 50.32 }, { "epoch": 0.68, "learning_rate": "1.9163e-05", "loss": 0.0684, "slid_loss": 0.0769, "step": 813, "time": 51.03 }, { "epoch": 0.69, "learning_rate": "1.9118e-05", "loss": 0.0816, "slid_loss": 0.0769, "step": 814, "time": 49.65 }, { "epoch": 0.69, "learning_rate": "1.9073e-05", "loss": 0.0808, "slid_loss": 0.0769, "step": 815, "time": 50.56 }, { "epoch": 0.69, "learning_rate": "1.9028e-05", "loss": 0.0726, "slid_loss": 0.0769, "step": 816, "time": 50.78 }, { "epoch": 0.69, "learning_rate": "1.8983e-05", "loss": 0.0762, "slid_loss": 0.0767, "step": 817, "time": 50.37 }, { "epoch": 0.69, "learning_rate": "1.8939e-05", "loss": 0.0811, "slid_loss": 0.0768, "step": 818, "time": 51.49 }, { "epoch": 0.69, "learning_rate": "1.8894e-05", "loss": 0.0824, "slid_loss": 0.0768, "step": 819, "time": 50.54 }, { "epoch": 0.69, "learning_rate": "1.8850e-05", "loss": 0.0696, "slid_loss": 0.0766, "step": 820, "time": 50.35 }, { "epoch": 0.69, "learning_rate": "1.8805e-05", "loss": 0.0698, "slid_loss": 0.0765, "step": 821, "time": 50.73 }, { "epoch": 0.69, "learning_rate": "1.8761e-05", "loss": 0.0751, "slid_loss": 0.0765, "step": 822, "time": 49.38 }, { "epoch": 0.69, "learning_rate": "1.8717e-05", "loss": 0.0691, "slid_loss": 0.0764, "step": 823, "time": 50.18 }, { "epoch": 0.69, "learning_rate": "1.8673e-05", "loss": 0.0719, "slid_loss": 0.0763, "step": 824, "time": 50.12 }, { "epoch": 0.7, "learning_rate": "1.8629e-05", "loss": 0.0714, "slid_loss": 0.0763, "step": 825, "time": 49.72 }, { "epoch": 0.7, "learning_rate": "1.8585e-05", "loss": 0.0704, "slid_loss": 0.0762, "step": 826, "time": 49.03 }, { "epoch": 0.7, "learning_rate": "1.8541e-05", "loss": 0.0749, "slid_loss": 0.0762, "step": 827, "time": 50.33 }, { "epoch": 0.7, "learning_rate": "1.8497e-05", "loss": 0.0662, "slid_loss": 0.0761, "step": 828, "time": 50.35 }, { "epoch": 0.7, "learning_rate": "1.8453e-05", "loss": 0.065, "slid_loss": 0.0759, "step": 829, "time": 50.61 }, { "epoch": 0.7, "learning_rate": "1.8409e-05", "loss": 0.0762, "slid_loss": 0.0758, "step": 830, "time": 49.26 }, { "epoch": 0.7, "learning_rate": "1.8366e-05", "loss": 0.0686, "slid_loss": 0.0758, "step": 831, "time": 49.85 }, { "epoch": 0.7, "learning_rate": "1.8322e-05", "loss": 0.0788, "slid_loss": 0.0758, "step": 832, "time": 49.99 }, { "epoch": 0.7, "learning_rate": "1.8279e-05", "loss": 0.0711, "slid_loss": 0.0757, "step": 833, "time": 50.29 }, { "epoch": 0.7, "learning_rate": "1.8236e-05", "loss": 0.075, "slid_loss": 0.0756, "step": 834, "time": 50.15 }, { "epoch": 0.7, "learning_rate": "1.8192e-05", "loss": 0.0684, "slid_loss": 0.0756, "step": 835, "time": 50.14 }, { "epoch": 0.7, "learning_rate": "1.8149e-05", "loss": 0.0714, "slid_loss": 0.0755, "step": 836, "time": 50.2 }, { "epoch": 0.71, "learning_rate": "1.8106e-05", "loss": 0.0764, "slid_loss": 0.0756, "step": 837, "time": 50.29 }, { "epoch": 0.71, "learning_rate": "1.8063e-05", "loss": 0.0687, "slid_loss": 0.0755, "step": 838, "time": 48.96 }, { "epoch": 0.71, "learning_rate": "1.8020e-05", "loss": 0.0731, "slid_loss": 0.0755, "step": 839, "time": 50.12 }, { "epoch": 0.71, "learning_rate": "1.7977e-05", "loss": 0.0736, "slid_loss": 0.0755, "step": 840, "time": 50.1 }, { "epoch": 0.71, "learning_rate": "1.7935e-05", "loss": 0.076, "slid_loss": 0.0756, "step": 841, "time": 51.18 }, { "epoch": 0.71, "learning_rate": "1.7892e-05", "loss": 0.0692, "slid_loss": 0.0754, "step": 842, "time": 50.22 }, { "epoch": 0.71, "learning_rate": "1.7850e-05", "loss": 0.0736, "slid_loss": 0.0754, "step": 843, "time": 49.54 }, { "epoch": 0.71, "learning_rate": "1.7807e-05", "loss": 0.0742, "slid_loss": 0.0753, "step": 844, "time": 50.41 }, { "epoch": 0.71, "learning_rate": "1.7765e-05", "loss": 0.0717, "slid_loss": 0.0753, "step": 845, "time": 51.53 }, { "epoch": 0.71, "learning_rate": "1.7722e-05", "loss": 0.0778, "slid_loss": 0.0753, "step": 846, "time": 50.01 }, { "epoch": 0.71, "learning_rate": "1.7680e-05", "loss": 0.0703, "slid_loss": 0.0752, "step": 847, "time": 50.9 }, { "epoch": 0.71, "learning_rate": "1.7638e-05", "loss": 0.0733, "slid_loss": 0.0751, "step": 848, "time": 50.54 }, { "epoch": 0.72, "learning_rate": "1.7596e-05", "loss": 0.0733, "slid_loss": 0.075, "step": 849, "time": 50.55 }, { "epoch": 0.72, "learning_rate": "1.7554e-05", "loss": 0.0736, "slid_loss": 0.075, "step": 850, "time": 50.86 }, { "epoch": 0.72, "learning_rate": "1.7512e-05", "loss": 0.0751, "slid_loss": 0.075, "step": 851, "time": 49.08 }, { "epoch": 0.72, "learning_rate": "1.7471e-05", "loss": 0.071, "slid_loss": 0.0749, "step": 852, "time": 50.21 }, { "epoch": 0.72, "learning_rate": "1.7429e-05", "loss": 0.0689, "slid_loss": 0.0748, "step": 853, "time": 49.3 }, { "epoch": 0.72, "learning_rate": "1.7387e-05", "loss": 0.0748, "slid_loss": 0.0748, "step": 854, "time": 50.71 }, { "epoch": 0.72, "learning_rate": "1.7346e-05", "loss": 0.0745, "slid_loss": 0.0747, "step": 855, "time": 49.21 }, { "epoch": 0.72, "learning_rate": "1.7304e-05", "loss": 0.0736, "slid_loss": 0.0747, "step": 856, "time": 50.66 }, { "epoch": 0.72, "learning_rate": "1.7263e-05", "loss": 0.0709, "slid_loss": 0.0746, "step": 857, "time": 50.34 }, { "epoch": 0.72, "learning_rate": "1.7222e-05", "loss": 0.0681, "slid_loss": 0.0745, "step": 858, "time": 49.52 }, { "epoch": 0.72, "learning_rate": "1.7181e-05", "loss": 0.0733, "slid_loss": 0.0745, "step": 859, "time": 50.31 }, { "epoch": 0.72, "learning_rate": "1.7140e-05", "loss": 0.0758, "slid_loss": 0.0745, "step": 860, "time": 49.78 }, { "epoch": 0.73, "learning_rate": "1.7099e-05", "loss": 0.0704, "slid_loss": 0.0744, "step": 861, "time": 50.52 }, { "epoch": 0.73, "learning_rate": "1.7058e-05", "loss": 0.0811, "slid_loss": 0.0744, "step": 862, "time": 51.25 }, { "epoch": 0.73, "learning_rate": "1.7017e-05", "loss": 0.0758, "slid_loss": 0.0743, "step": 863, "time": 51.0 }, { "epoch": 0.73, "learning_rate": "1.6976e-05", "loss": 0.074, "slid_loss": 0.0743, "step": 864, "time": 50.33 }, { "epoch": 0.73, "learning_rate": "1.6936e-05", "loss": 0.0731, "slid_loss": 0.0743, "step": 865, "time": 50.83 }, { "epoch": 0.73, "learning_rate": "1.6895e-05", "loss": 0.0701, "slid_loss": 0.0743, "step": 866, "time": 50.76 }, { "epoch": 0.73, "learning_rate": "1.6855e-05", "loss": 0.0704, "slid_loss": 0.0742, "step": 867, "time": 50.87 }, { "epoch": 0.73, "learning_rate": "1.6815e-05", "loss": 0.0653, "slid_loss": 0.0741, "step": 868, "time": 51.76 }, { "epoch": 0.73, "learning_rate": "1.6775e-05", "loss": 0.0693, "slid_loss": 0.074, "step": 869, "time": 50.23 }, { "epoch": 0.73, "learning_rate": "1.6734e-05", "loss": 0.0705, "slid_loss": 0.0739, "step": 870, "time": 51.89 }, { "epoch": 0.73, "learning_rate": "1.6694e-05", "loss": 0.0693, "slid_loss": 0.0739, "step": 871, "time": 49.6 }, { "epoch": 0.73, "learning_rate": "1.6655e-05", "loss": 0.072, "slid_loss": 0.0738, "step": 872, "time": 49.99 }, { "epoch": 0.74, "learning_rate": "1.6615e-05", "loss": 0.0683, "slid_loss": 0.0737, "step": 873, "time": 49.51 }, { "epoch": 0.74, "learning_rate": "1.6575e-05", "loss": 0.0718, "slid_loss": 0.0737, "step": 874, "time": 50.81 }, { "epoch": 0.74, "learning_rate": "1.6535e-05", "loss": 0.0735, "slid_loss": 0.0736, "step": 875, "time": 50.2 }, { "epoch": 0.74, "learning_rate": "1.6496e-05", "loss": 0.0715, "slid_loss": 0.0736, "step": 876, "time": 49.88 }, { "epoch": 0.74, "learning_rate": "1.6456e-05", "loss": 0.0794, "slid_loss": 0.0735, "step": 877, "time": 51.5 }, { "epoch": 0.74, "learning_rate": "1.6417e-05", "loss": 0.0712, "slid_loss": 0.0735, "step": 878, "time": 51.09 }, { "epoch": 0.74, "learning_rate": "1.6378e-05", "loss": 0.0643, "slid_loss": 0.0733, "step": 879, "time": 49.15 }, { "epoch": 0.74, "learning_rate": "1.6339e-05", "loss": 0.072, "slid_loss": 0.0733, "step": 880, "time": 50.28 }, { "epoch": 0.74, "learning_rate": "1.6300e-05", "loss": 0.0846, "slid_loss": 0.0733, "step": 881, "time": 50.31 }, { "epoch": 0.74, "learning_rate": "1.6261e-05", "loss": 0.0706, "slid_loss": 0.0734, "step": 882, "time": 49.16 }, { "epoch": 0.74, "learning_rate": "1.6222e-05", "loss": 0.0633, "slid_loss": 0.0733, "step": 883, "time": 50.15 }, { "epoch": 0.74, "learning_rate": "1.6183e-05", "loss": 0.0674, "slid_loss": 0.0732, "step": 884, "time": 50.13 }, { "epoch": 0.75, "learning_rate": "1.6145e-05", "loss": 0.0772, "slid_loss": 0.0732, "step": 885, "time": 50.75 }, { "epoch": 0.75, "learning_rate": "1.6106e-05", "loss": 0.0714, "slid_loss": 0.0732, "step": 886, "time": 50.72 }, { "epoch": 0.75, "learning_rate": "1.6068e-05", "loss": 0.0721, "slid_loss": 0.0731, "step": 887, "time": 50.12 }, { "epoch": 0.75, "learning_rate": "1.6029e-05", "loss": 0.0662, "slid_loss": 0.073, "step": 888, "time": 50.48 }, { "epoch": 0.75, "learning_rate": "1.5991e-05", "loss": 0.0718, "slid_loss": 0.073, "step": 889, "time": 50.12 }, { "epoch": 0.75, "learning_rate": "1.5953e-05", "loss": 0.0659, "slid_loss": 0.073, "step": 890, "time": 50.95 }, { "epoch": 0.75, "learning_rate": "1.5915e-05", "loss": 0.076, "slid_loss": 0.073, "step": 891, "time": 50.21 }, { "epoch": 0.75, "learning_rate": "1.5877e-05", "loss": 0.0755, "slid_loss": 0.0729, "step": 892, "time": 50.8 }, { "epoch": 0.75, "learning_rate": "1.5839e-05", "loss": 0.0744, "slid_loss": 0.0729, "step": 893, "time": 49.39 }, { "epoch": 0.75, "learning_rate": "1.5801e-05", "loss": 0.0627, "slid_loss": 0.0729, "step": 894, "time": 49.81 }, { "epoch": 0.75, "learning_rate": "1.5764e-05", "loss": 0.0708, "slid_loss": 0.0728, "step": 895, "time": 51.24 }, { "epoch": 0.75, "learning_rate": "1.5726e-05", "loss": 0.0693, "slid_loss": 0.0728, "step": 896, "time": 49.32 }, { "epoch": 0.76, "learning_rate": "1.5689e-05", "loss": 0.0716, "slid_loss": 0.0728, "step": 897, "time": 49.4 }, { "epoch": 0.76, "learning_rate": "1.5651e-05", "loss": 0.0715, "slid_loss": 0.0728, "step": 898, "time": 49.56 }, { "epoch": 0.76, "learning_rate": "1.5614e-05", "loss": 0.0684, "slid_loss": 0.0727, "step": 899, "time": 49.4 }, { "epoch": 0.76, "learning_rate": "1.5577e-05", "loss": 0.0693, "slid_loss": 0.0726, "step": 900, "time": 49.32 }, { "epoch": 0.76, "learning_rate": "1.5540e-05", "loss": 0.0741, "slid_loss": 0.0726, "step": 901, "time": 49.64 }, { "epoch": 0.76, "learning_rate": "1.5503e-05", "loss": 0.0792, "slid_loss": 0.0727, "step": 902, "time": 50.43 }, { "epoch": 0.76, "learning_rate": "1.5466e-05", "loss": 0.066, "slid_loss": 0.0727, "step": 903, "time": 49.66 }, { "epoch": 0.76, "learning_rate": "1.5429e-05", "loss": 0.0715, "slid_loss": 0.0726, "step": 904, "time": 50.25 }, { "epoch": 0.76, "learning_rate": "1.5393e-05", "loss": 0.0717, "slid_loss": 0.0726, "step": 905, "time": 49.47 }, { "epoch": 0.76, "learning_rate": "1.5356e-05", "loss": 0.0717, "slid_loss": 0.0725, "step": 906, "time": 50.09 }, { "epoch": 0.76, "learning_rate": "1.5320e-05", "loss": 0.0804, "slid_loss": 0.0725, "step": 907, "time": 51.6 }, { "epoch": 0.77, "learning_rate": "1.5284e-05", "loss": 0.0688, "slid_loss": 0.0724, "step": 908, "time": 50.64 }, { "epoch": 0.77, "learning_rate": "1.5247e-05", "loss": 0.0744, "slid_loss": 0.0724, "step": 909, "time": 49.79 }, { "epoch": 0.77, "learning_rate": "1.5211e-05", "loss": 0.0704, "slid_loss": 0.0723, "step": 910, "time": 50.22 }, { "epoch": 0.77, "learning_rate": "1.5175e-05", "loss": 0.0678, "slid_loss": 0.0724, "step": 911, "time": 50.5 }, { "epoch": 0.77, "learning_rate": "1.5139e-05", "loss": 0.0663, "slid_loss": 0.0722, "step": 912, "time": 50.38 }, { "epoch": 0.77, "learning_rate": "1.5104e-05", "loss": 0.0692, "slid_loss": 0.0722, "step": 913, "time": 49.69 }, { "epoch": 0.77, "learning_rate": "1.5068e-05", "loss": 0.08, "slid_loss": 0.0722, "step": 914, "time": 50.38 }, { "epoch": 0.77, "learning_rate": "1.5033e-05", "loss": 0.0662, "slid_loss": 0.0721, "step": 915, "time": 49.47 }, { "epoch": 0.77, "learning_rate": "1.4997e-05", "loss": 0.0628, "slid_loss": 0.072, "step": 916, "time": 49.56 }, { "epoch": 0.77, "learning_rate": "1.4962e-05", "loss": 0.0669, "slid_loss": 0.0719, "step": 917, "time": 49.33 }, { "epoch": 0.77, "learning_rate": "1.4927e-05", "loss": 0.0701, "slid_loss": 0.0718, "step": 918, "time": 50.52 }, { "epoch": 0.77, "learning_rate": "1.4891e-05", "loss": 0.0707, "slid_loss": 0.0717, "step": 919, "time": 50.65 }, { "epoch": 0.78, "learning_rate": "1.4856e-05", "loss": 0.0679, "slid_loss": 0.0716, "step": 920, "time": 50.36 }, { "epoch": 0.78, "learning_rate": "1.4821e-05", "loss": 0.0707, "slid_loss": 0.0716, "step": 921, "time": 50.22 }, { "epoch": 0.78, "learning_rate": "1.4787e-05", "loss": 0.0775, "slid_loss": 0.0717, "step": 922, "time": 50.7 }, { "epoch": 0.78, "learning_rate": "1.4752e-05", "loss": 0.0693, "slid_loss": 0.0717, "step": 923, "time": 50.19 }, { "epoch": 0.78, "learning_rate": "1.4717e-05", "loss": 0.0706, "slid_loss": 0.0717, "step": 924, "time": 50.19 }, { "epoch": 0.78, "learning_rate": "1.4683e-05", "loss": 0.0676, "slid_loss": 0.0716, "step": 925, "time": 50.29 }, { "epoch": 0.78, "learning_rate": "1.4649e-05", "loss": 0.0709, "slid_loss": 0.0716, "step": 926, "time": 51.33 }, { "epoch": 0.78, "learning_rate": "1.4614e-05", "loss": 0.065, "slid_loss": 0.0715, "step": 927, "time": 48.79 }, { "epoch": 0.78, "learning_rate": "1.4580e-05", "loss": 0.0791, "slid_loss": 0.0717, "step": 928, "time": 51.37 }, { "epoch": 0.78, "learning_rate": "1.4546e-05", "loss": 0.0685, "slid_loss": 0.0717, "step": 929, "time": 50.81 }, { "epoch": 0.78, "learning_rate": "1.4512e-05", "loss": 0.074, "slid_loss": 0.0717, "step": 930, "time": 50.2 }, { "epoch": 0.78, "learning_rate": "1.4478e-05", "loss": 0.0778, "slid_loss": 0.0718, "step": 931, "time": 49.65 }, { "epoch": 0.79, "learning_rate": "1.4445e-05", "loss": 0.0721, "slid_loss": 0.0717, "step": 932, "time": 50.3 }, { "epoch": 0.79, "learning_rate": "1.4411e-05", "loss": 0.073, "slid_loss": 0.0717, "step": 933, "time": 50.04 }, { "epoch": 0.79, "learning_rate": "1.4378e-05", "loss": 0.0665, "slid_loss": 0.0716, "step": 934, "time": 50.87 }, { "epoch": 0.79, "learning_rate": "1.4344e-05", "loss": 0.0768, "slid_loss": 0.0717, "step": 935, "time": 51.22 }, { "epoch": 0.79, "learning_rate": "1.4311e-05", "loss": 0.0628, "slid_loss": 0.0716, "step": 936, "time": 50.79 }, { "epoch": 0.79, "learning_rate": "1.4278e-05", "loss": 0.0734, "slid_loss": 0.0716, "step": 937, "time": 51.51 }, { "epoch": 0.79, "learning_rate": "1.4245e-05", "loss": 0.0779, "slid_loss": 0.0717, "step": 938, "time": 50.14 }, { "epoch": 0.79, "learning_rate": "1.4212e-05", "loss": 0.0684, "slid_loss": 0.0716, "step": 939, "time": 49.17 }, { "epoch": 0.79, "learning_rate": "1.4179e-05", "loss": 0.0746, "slid_loss": 0.0716, "step": 940, "time": 50.72 }, { "epoch": 0.79, "learning_rate": "1.4147e-05", "loss": 0.0741, "slid_loss": 0.0716, "step": 941, "time": 50.21 }, { "epoch": 0.79, "learning_rate": "1.4114e-05", "loss": 0.0788, "slid_loss": 0.0717, "step": 942, "time": 51.26 }, { "epoch": 0.79, "learning_rate": "1.4081e-05", "loss": 0.062, "slid_loss": 0.0716, "step": 943, "time": 49.72 }, { "epoch": 0.8, "learning_rate": "1.4049e-05", "loss": 0.0701, "slid_loss": 0.0716, "step": 944, "time": 49.62 }, { "epoch": 0.8, "learning_rate": "1.4017e-05", "loss": 0.073, "slid_loss": 0.0716, "step": 945, "time": 50.59 }, { "epoch": 0.8, "learning_rate": "1.3985e-05", "loss": 0.0707, "slid_loss": 0.0715, "step": 946, "time": 50.35 }, { "epoch": 0.8, "learning_rate": "1.3953e-05", "loss": 0.0734, "slid_loss": 0.0715, "step": 947, "time": 50.25 }, { "epoch": 0.8, "learning_rate": "1.3921e-05", "loss": 0.0726, "slid_loss": 0.0715, "step": 948, "time": 50.63 }, { "epoch": 0.8, "learning_rate": "1.3889e-05", "loss": 0.0654, "slid_loss": 0.0715, "step": 949, "time": 49.8 }, { "epoch": 0.8, "learning_rate": "1.3857e-05", "loss": 0.0668, "slid_loss": 0.0714, "step": 950, "time": 49.83 }, { "epoch": 0.8, "learning_rate": "1.3826e-05", "loss": 0.0679, "slid_loss": 0.0713, "step": 951, "time": 50.5 }, { "epoch": 0.8, "learning_rate": "1.3795e-05", "loss": 0.0662, "slid_loss": 0.0713, "step": 952, "time": 50.57 }, { "epoch": 0.8, "learning_rate": "1.3763e-05", "loss": 0.0807, "slid_loss": 0.0714, "step": 953, "time": 50.58 }, { "epoch": 0.8, "learning_rate": "1.3732e-05", "loss": 0.0695, "slid_loss": 0.0713, "step": 954, "time": 49.67 }, { "epoch": 0.8, "learning_rate": "1.3701e-05", "loss": 0.0707, "slid_loss": 0.0713, "step": 955, "time": 49.58 }, { "epoch": 0.81, "learning_rate": "1.3670e-05", "loss": 0.0667, "slid_loss": 0.0712, "step": 956, "time": 51.06 }, { "epoch": 0.81, "learning_rate": "1.3639e-05", "loss": 0.0675, "slid_loss": 0.0712, "step": 957, "time": 50.42 }, { "epoch": 0.81, "learning_rate": "1.3608e-05", "loss": 0.0603, "slid_loss": 0.0711, "step": 958, "time": 50.44 }, { "epoch": 0.81, "learning_rate": "1.3578e-05", "loss": 0.08, "slid_loss": 0.0712, "step": 959, "time": 50.83 }, { "epoch": 0.81, "learning_rate": "1.3547e-05", "loss": 0.072, "slid_loss": 0.0711, "step": 960, "time": 50.79 }, { "epoch": 0.81, "learning_rate": "1.3517e-05", "loss": 0.0698, "slid_loss": 0.0711, "step": 961, "time": 49.96 }, { "epoch": 0.81, "learning_rate": "1.3487e-05", "loss": 0.077, "slid_loss": 0.0711, "step": 962, "time": 48.97 }, { "epoch": 0.81, "learning_rate": "1.3457e-05", "loss": 0.0695, "slid_loss": 0.071, "step": 963, "time": 50.08 }, { "epoch": 0.81, "learning_rate": "1.3427e-05", "loss": 0.0703, "slid_loss": 0.071, "step": 964, "time": 50.5 }, { "epoch": 0.81, "learning_rate": "1.3397e-05", "loss": 0.0655, "slid_loss": 0.0709, "step": 965, "time": 50.36 }, { "epoch": 0.81, "learning_rate": "1.3367e-05", "loss": 0.0649, "slid_loss": 0.0709, "step": 966, "time": 50.17 }, { "epoch": 0.81, "learning_rate": "1.3337e-05", "loss": 0.0689, "slid_loss": 0.0709, "step": 967, "time": 50.69 }, { "epoch": 0.82, "learning_rate": "1.3308e-05", "loss": 0.0668, "slid_loss": 0.0709, "step": 968, "time": 49.51 }, { "epoch": 0.82, "learning_rate": "1.3278e-05", "loss": 0.0767, "slid_loss": 0.0709, "step": 969, "time": 50.92 }, { "epoch": 0.82, "learning_rate": "1.3249e-05", "loss": 0.0785, "slid_loss": 0.071, "step": 970, "time": 50.84 }, { "epoch": 0.82, "learning_rate": "1.3220e-05", "loss": 0.0629, "slid_loss": 0.071, "step": 971, "time": 48.92 }, { "epoch": 0.82, "learning_rate": "1.3191e-05", "loss": 0.0749, "slid_loss": 0.071, "step": 972, "time": 50.73 }, { "epoch": 0.82, "learning_rate": "1.3162e-05", "loss": 0.0713, "slid_loss": 0.071, "step": 973, "time": 50.1 }, { "epoch": 0.82, "learning_rate": "1.3133e-05", "loss": 0.0731, "slid_loss": 0.071, "step": 974, "time": 50.45 }, { "epoch": 0.82, "learning_rate": "1.3104e-05", "loss": 0.0745, "slid_loss": 0.071, "step": 975, "time": 49.35 }, { "epoch": 0.82, "learning_rate": "1.3076e-05", "loss": 0.0712, "slid_loss": 0.071, "step": 976, "time": 49.95 }, { "epoch": 0.82, "learning_rate": "1.3047e-05", "loss": 0.0698, "slid_loss": 0.0709, "step": 977, "time": 50.76 }, { "epoch": 0.82, "learning_rate": "1.3019e-05", "loss": 0.0738, "slid_loss": 0.071, "step": 978, "time": 49.49 }, { "epoch": 0.82, "learning_rate": "1.2991e-05", "loss": 0.0709, "slid_loss": 0.071, "step": 979, "time": 49.76 }, { "epoch": 0.83, "learning_rate": "1.2963e-05", "loss": 0.0663, "slid_loss": 0.071, "step": 980, "time": 49.72 }, { "epoch": 0.83, "learning_rate": "1.2935e-05", "loss": 0.0778, "slid_loss": 0.0709, "step": 981, "time": 50.56 }, { "epoch": 0.83, "learning_rate": "1.2907e-05", "loss": 0.0755, "slid_loss": 0.071, "step": 982, "time": 50.17 }, { "epoch": 0.83, "learning_rate": "1.2879e-05", "loss": 0.0711, "slid_loss": 0.071, "step": 983, "time": 50.46 }, { "epoch": 0.83, "learning_rate": "1.2851e-05", "loss": 0.0642, "slid_loss": 0.071, "step": 984, "time": 50.02 }, { "epoch": 0.83, "learning_rate": "1.2824e-05", "loss": 0.0679, "slid_loss": 0.0709, "step": 985, "time": 50.64 }, { "epoch": 0.83, "learning_rate": "1.2797e-05", "loss": 0.0779, "slid_loss": 0.071, "step": 986, "time": 50.06 }, { "epoch": 0.83, "learning_rate": "1.2769e-05", "loss": 0.0616, "slid_loss": 0.0709, "step": 987, "time": 50.95 }, { "epoch": 0.83, "learning_rate": "1.2742e-05", "loss": 0.0667, "slid_loss": 0.0709, "step": 988, "time": 50.86 }, { "epoch": 0.83, "learning_rate": "1.2715e-05", "loss": 0.0658, "slid_loss": 0.0708, "step": 989, "time": 49.81 }, { "epoch": 0.83, "learning_rate": "1.2688e-05", "loss": 0.0623, "slid_loss": 0.0708, "step": 990, "time": 49.53 }, { "epoch": 0.83, "learning_rate": "1.2662e-05", "loss": 0.0709, "slid_loss": 0.0707, "step": 991, "time": 50.22 }, { "epoch": 0.84, "learning_rate": "1.2635e-05", "loss": 0.06, "slid_loss": 0.0706, "step": 992, "time": 50.01 }, { "epoch": 0.84, "learning_rate": "1.2609e-05", "loss": 0.0627, "slid_loss": 0.0705, "step": 993, "time": 50.45 }, { "epoch": 0.84, "learning_rate": "1.2582e-05", "loss": 0.0684, "slid_loss": 0.0705, "step": 994, "time": 50.73 }, { "epoch": 0.84, "learning_rate": "1.2556e-05", "loss": 0.0707, "slid_loss": 0.0705, "step": 995, "time": 50.32 }, { "epoch": 0.84, "learning_rate": "1.2530e-05", "loss": 0.071, "slid_loss": 0.0705, "step": 996, "time": 50.2 }, { "epoch": 0.84, "learning_rate": "1.2504e-05", "loss": 0.0606, "slid_loss": 0.0704, "step": 997, "time": 51.32 }, { "epoch": 0.84, "learning_rate": "1.2478e-05", "loss": 0.0648, "slid_loss": 0.0704, "step": 998, "time": 49.11 }, { "epoch": 0.84, "learning_rate": "1.2452e-05", "loss": 0.0721, "slid_loss": 0.0704, "step": 999, "time": 51.17 }, { "epoch": 0.84, "learning_rate": "1.2427e-05", "loss": 0.0665, "slid_loss": 0.0704, "step": 1000, "time": 50.28 }, { "epoch": 0.84, "learning_rate": "1.2401e-05", "loss": 0.0697, "slid_loss": 0.0703, "step": 1001, "time": 50.71 }, { "epoch": 0.84, "learning_rate": "1.2376e-05", "loss": 0.073, "slid_loss": 0.0703, "step": 1002, "time": 49.96 }, { "epoch": 0.85, "learning_rate": "1.2351e-05", "loss": 0.0747, "slid_loss": 0.0703, "step": 1003, "time": 49.97 }, { "epoch": 0.85, "learning_rate": "1.2325e-05", "loss": 0.0716, "slid_loss": 0.0703, "step": 1004, "time": 50.76 }, { "epoch": 0.85, "learning_rate": "1.2300e-05", "loss": 0.0716, "slid_loss": 0.0703, "step": 1005, "time": 50.34 }, { "epoch": 0.85, "learning_rate": "1.2276e-05", "loss": 0.0706, "slid_loss": 0.0703, "step": 1006, "time": 50.77 }, { "epoch": 0.85, "learning_rate": "1.2251e-05", "loss": 0.0627, "slid_loss": 0.0702, "step": 1007, "time": 51.78 }, { "epoch": 0.85, "learning_rate": "1.2226e-05", "loss": 0.0666, "slid_loss": 0.0701, "step": 1008, "time": 50.55 }, { "epoch": 0.85, "learning_rate": "1.2202e-05", "loss": 0.0686, "slid_loss": 0.0701, "step": 1009, "time": 50.68 }, { "epoch": 0.85, "learning_rate": "1.2177e-05", "loss": 0.0642, "slid_loss": 0.07, "step": 1010, "time": 50.34 }, { "epoch": 0.85, "learning_rate": "1.2153e-05", "loss": 0.0671, "slid_loss": 0.07, "step": 1011, "time": 51.11 }, { "epoch": 0.85, "learning_rate": "1.2129e-05", "loss": 0.0639, "slid_loss": 0.07, "step": 1012, "time": 50.76 }, { "epoch": 0.85, "learning_rate": "1.2105e-05", "loss": 0.0685, "slid_loss": 0.07, "step": 1013, "time": 50.28 }, { "epoch": 0.85, "learning_rate": "1.2081e-05", "loss": 0.0707, "slid_loss": 0.0699, "step": 1014, "time": 51.27 }, { "epoch": 0.86, "learning_rate": "1.2058e-05", "loss": 0.0618, "slid_loss": 0.0698, "step": 1015, "time": 49.57 }, { "epoch": 0.86, "learning_rate": "1.2034e-05", "loss": 0.0645, "slid_loss": 0.0699, "step": 1016, "time": 50.53 }, { "epoch": 0.86, "learning_rate": "1.2011e-05", "loss": 0.0679, "slid_loss": 0.0699, "step": 1017, "time": 50.88 }, { "epoch": 0.86, "learning_rate": "1.1987e-05", "loss": 0.0628, "slid_loss": 0.0698, "step": 1018, "time": 50.26 }, { "epoch": 0.86, "learning_rate": "1.1964e-05", "loss": 0.0685, "slid_loss": 0.0698, "step": 1019, "time": 49.88 }, { "epoch": 0.86, "learning_rate": "1.1941e-05", "loss": 0.0696, "slid_loss": 0.0698, "step": 1020, "time": 51.59 }, { "epoch": 0.86, "learning_rate": "1.1918e-05", "loss": 0.0712, "slid_loss": 0.0698, "step": 1021, "time": 51.36 }, { "epoch": 0.86, "learning_rate": "1.1895e-05", "loss": 0.0684, "slid_loss": 0.0697, "step": 1022, "time": 50.74 }, { "epoch": 0.86, "learning_rate": "1.1873e-05", "loss": 0.0764, "slid_loss": 0.0698, "step": 1023, "time": 50.0 }, { "epoch": 0.86, "learning_rate": "1.1850e-05", "loss": 0.0705, "slid_loss": 0.0698, "step": 1024, "time": 51.26 }, { "epoch": 0.86, "learning_rate": "1.1828e-05", "loss": 0.0659, "slid_loss": 0.0698, "step": 1025, "time": 50.87 }, { "epoch": 0.86, "learning_rate": "1.1805e-05", "loss": 0.0696, "slid_loss": 0.0697, "step": 1026, "time": 50.26 }, { "epoch": 0.87, "learning_rate": "1.1783e-05", "loss": 0.0753, "slid_loss": 0.0698, "step": 1027, "time": 49.98 }, { "epoch": 0.87, "learning_rate": "1.1761e-05", "loss": 0.0646, "slid_loss": 0.0697, "step": 1028, "time": 49.78 }, { "epoch": 0.87, "learning_rate": "1.1739e-05", "loss": 0.0734, "slid_loss": 0.0697, "step": 1029, "time": 51.4 }, { "epoch": 0.87, "learning_rate": "1.1717e-05", "loss": 0.0767, "slid_loss": 0.0698, "step": 1030, "time": 49.79 }, { "epoch": 0.87, "learning_rate": "1.1696e-05", "loss": 0.0655, "slid_loss": 0.0696, "step": 1031, "time": 50.13 }, { "epoch": 0.87, "learning_rate": "1.1674e-05", "loss": 0.0713, "slid_loss": 0.0696, "step": 1032, "time": 51.08 }, { "epoch": 0.87, "learning_rate": "1.1653e-05", "loss": 0.0645, "slid_loss": 0.0696, "step": 1033, "time": 49.51 }, { "epoch": 0.87, "learning_rate": "1.1632e-05", "loss": 0.065, "slid_loss": 0.0695, "step": 1034, "time": 49.83 }, { "epoch": 0.87, "learning_rate": "1.1611e-05", "loss": 0.061, "slid_loss": 0.0694, "step": 1035, "time": 50.23 }, { "epoch": 0.87, "learning_rate": "1.1590e-05", "loss": 0.0676, "slid_loss": 0.0694, "step": 1036, "time": 50.02 }, { "epoch": 0.87, "learning_rate": "1.1569e-05", "loss": 0.0602, "slid_loss": 0.0693, "step": 1037, "time": 50.28 }, { "epoch": 0.87, "learning_rate": "1.1548e-05", "loss": 0.0616, "slid_loss": 0.0691, "step": 1038, "time": 50.31 }, { "epoch": 0.88, "learning_rate": "1.1528e-05", "loss": 0.0679, "slid_loss": 0.0691, "step": 1039, "time": 50.25 }, { "epoch": 0.88, "learning_rate": "1.1507e-05", "loss": 0.0685, "slid_loss": 0.0691, "step": 1040, "time": 49.91 }, { "epoch": 0.88, "learning_rate": "1.1487e-05", "loss": 0.072, "slid_loss": 0.069, "step": 1041, "time": 49.89 }, { "epoch": 0.88, "learning_rate": "1.1467e-05", "loss": 0.0672, "slid_loss": 0.0689, "step": 1042, "time": 49.73 }, { "epoch": 0.88, "learning_rate": "1.1447e-05", "loss": 0.0758, "slid_loss": 0.0691, "step": 1043, "time": 50.5 }, { "epoch": 0.88, "learning_rate": "1.1427e-05", "loss": 0.0688, "slid_loss": 0.0691, "step": 1044, "time": 50.5 }, { "epoch": 0.88, "learning_rate": "1.1407e-05", "loss": 0.0717, "slid_loss": 0.069, "step": 1045, "time": 50.89 }, { "epoch": 0.88, "learning_rate": "1.1387e-05", "loss": 0.0689, "slid_loss": 0.069, "step": 1046, "time": 50.44 }, { "epoch": 0.88, "learning_rate": "1.1368e-05", "loss": 0.0661, "slid_loss": 0.069, "step": 1047, "time": 49.68 }, { "epoch": 0.88, "learning_rate": "1.1348e-05", "loss": 0.0672, "slid_loss": 0.0689, "step": 1048, "time": 50.39 }, { "epoch": 0.88, "learning_rate": "1.1329e-05", "loss": 0.0692, "slid_loss": 0.0689, "step": 1049, "time": 50.57 }, { "epoch": 0.88, "learning_rate": "1.1310e-05", "loss": 0.0642, "slid_loss": 0.0689, "step": 1050, "time": 49.48 }, { "epoch": 0.89, "learning_rate": "1.1291e-05", "loss": 0.0713, "slid_loss": 0.0689, "step": 1051, "time": 49.76 }, { "epoch": 0.89, "learning_rate": "1.1272e-05", "loss": 0.0651, "slid_loss": 0.0689, "step": 1052, "time": 49.79 }, { "epoch": 0.89, "learning_rate": "1.1253e-05", "loss": 0.0681, "slid_loss": 0.0688, "step": 1053, "time": 49.58 }, { "epoch": 0.89, "learning_rate": "1.1235e-05", "loss": 0.0711, "slid_loss": 0.0688, "step": 1054, "time": 50.85 }, { "epoch": 0.89, "learning_rate": "1.1216e-05", "loss": 0.0681, "slid_loss": 0.0688, "step": 1055, "time": 49.8 }, { "epoch": 0.89, "learning_rate": "1.1198e-05", "loss": 0.0711, "slid_loss": 0.0688, "step": 1056, "time": 50.41 }, { "epoch": 0.89, "learning_rate": "1.1180e-05", "loss": 0.0705, "slid_loss": 0.0689, "step": 1057, "time": 49.19 }, { "epoch": 0.89, "learning_rate": "1.1162e-05", "loss": 0.0683, "slid_loss": 0.069, "step": 1058, "time": 50.78 }, { "epoch": 0.89, "learning_rate": "1.1144e-05", "loss": 0.0754, "slid_loss": 0.0689, "step": 1059, "time": 50.54 }, { "epoch": 0.89, "learning_rate": "1.1126e-05", "loss": 0.0652, "slid_loss": 0.0688, "step": 1060, "time": 50.79 }, { "epoch": 0.89, "learning_rate": "1.1108e-05", "loss": 0.0715, "slid_loss": 0.0689, "step": 1061, "time": 49.99 }, { "epoch": 0.89, "learning_rate": "1.1091e-05", "loss": 0.0698, "slid_loss": 0.0688, "step": 1062, "time": 50.53 }, { "epoch": 0.9, "learning_rate": "1.1074e-05", "loss": 0.0716, "slid_loss": 0.0688, "step": 1063, "time": 50.29 }, { "epoch": 0.9, "learning_rate": "1.1056e-05", "loss": 0.0584, "slid_loss": 0.0687, "step": 1064, "time": 49.14 }, { "epoch": 0.9, "learning_rate": "1.1039e-05", "loss": 0.0648, "slid_loss": 0.0687, "step": 1065, "time": 51.32 }, { "epoch": 0.9, "learning_rate": "1.1022e-05", "loss": 0.0659, "slid_loss": 0.0687, "step": 1066, "time": 50.29 }, { "epoch": 0.9, "learning_rate": "1.1006e-05", "loss": 0.0794, "slid_loss": 0.0688, "step": 1067, "time": 51.19 }, { "epoch": 0.9, "learning_rate": "1.0989e-05", "loss": 0.0621, "slid_loss": 0.0687, "step": 1068, "time": 50.76 }, { "epoch": 0.9, "learning_rate": "1.0972e-05", "loss": 0.0713, "slid_loss": 0.0687, "step": 1069, "time": 51.38 }, { "epoch": 0.9, "learning_rate": "1.0956e-05", "loss": 0.0688, "slid_loss": 0.0686, "step": 1070, "time": 51.01 }, { "epoch": 0.9, "learning_rate": "1.0940e-05", "loss": 0.0681, "slid_loss": 0.0686, "step": 1071, "time": 49.8 }, { "epoch": 0.9, "learning_rate": "1.0923e-05", "loss": 0.0654, "slid_loss": 0.0686, "step": 1072, "time": 49.53 }, { "epoch": 0.9, "learning_rate": "1.0907e-05", "loss": 0.0754, "slid_loss": 0.0686, "step": 1073, "time": 51.44 }, { "epoch": 0.9, "learning_rate": "1.0892e-05", "loss": 0.0673, "slid_loss": 0.0685, "step": 1074, "time": 51.04 }, { "epoch": 0.91, "learning_rate": "1.0876e-05", "loss": 0.0603, "slid_loss": 0.0684, "step": 1075, "time": 50.8 }, { "epoch": 0.91, "learning_rate": "1.0860e-05", "loss": 0.0666, "slid_loss": 0.0683, "step": 1076, "time": 49.31 }, { "epoch": 0.91, "learning_rate": "1.0845e-05", "loss": 0.0749, "slid_loss": 0.0684, "step": 1077, "time": 50.97 }, { "epoch": 0.91, "learning_rate": "1.0829e-05", "loss": 0.0675, "slid_loss": 0.0683, "step": 1078, "time": 50.03 }, { "epoch": 0.91, "learning_rate": "1.0814e-05", "loss": 0.0691, "slid_loss": 0.0683, "step": 1079, "time": 50.8 }, { "epoch": 0.91, "learning_rate": "1.0799e-05", "loss": 0.0689, "slid_loss": 0.0683, "step": 1080, "time": 51.22 }, { "epoch": 0.91, "learning_rate": "1.0784e-05", "loss": 0.0647, "slid_loss": 0.0682, "step": 1081, "time": 49.94 }, { "epoch": 0.91, "learning_rate": "1.0770e-05", "loss": 0.066, "slid_loss": 0.0681, "step": 1082, "time": 49.96 }, { "epoch": 0.91, "learning_rate": "1.0755e-05", "loss": 0.062, "slid_loss": 0.068, "step": 1083, "time": 50.87 }, { "epoch": 0.91, "learning_rate": "1.0740e-05", "loss": 0.0658, "slid_loss": 0.068, "step": 1084, "time": 51.62 }, { "epoch": 0.91, "learning_rate": "1.0726e-05", "loss": 0.0634, "slid_loss": 0.068, "step": 1085, "time": 50.76 }, { "epoch": 0.92, "learning_rate": "1.0712e-05", "loss": 0.068, "slid_loss": 0.0679, "step": 1086, "time": 50.77 }, { "epoch": 0.92, "learning_rate": "1.0698e-05", "loss": 0.0685, "slid_loss": 0.068, "step": 1087, "time": 50.89 }, { "epoch": 0.92, "learning_rate": "1.0684e-05", "loss": 0.064, "slid_loss": 0.0679, "step": 1088, "time": 49.55 }, { "epoch": 0.92, "learning_rate": "1.0670e-05", "loss": 0.0674, "slid_loss": 0.068, "step": 1089, "time": 49.73 }, { "epoch": 0.92, "learning_rate": "1.0656e-05", "loss": 0.0664, "slid_loss": 0.068, "step": 1090, "time": 49.53 }, { "epoch": 0.92, "learning_rate": "1.0643e-05", "loss": 0.0676, "slid_loss": 0.068, "step": 1091, "time": 51.07 }, { "epoch": 0.92, "learning_rate": "1.0629e-05", "loss": 0.0678, "slid_loss": 0.068, "step": 1092, "time": 51.6 }, { "epoch": 0.92, "learning_rate": "1.0616e-05", "loss": 0.065, "slid_loss": 0.0681, "step": 1093, "time": 49.02 }, { "epoch": 0.92, "learning_rate": "1.0603e-05", "loss": 0.0702, "slid_loss": 0.0681, "step": 1094, "time": 50.25 }, { "epoch": 0.92, "learning_rate": "1.0590e-05", "loss": 0.0679, "slid_loss": 0.0681, "step": 1095, "time": 50.58 }, { "epoch": 0.92, "learning_rate": "1.0577e-05", "loss": 0.0757, "slid_loss": 0.0681, "step": 1096, "time": 50.25 }, { "epoch": 0.92, "learning_rate": "1.0565e-05", "loss": 0.0622, "slid_loss": 0.0681, "step": 1097, "time": 52.14 }, { "epoch": 0.93, "learning_rate": "1.0552e-05", "loss": 0.0586, "slid_loss": 0.0681, "step": 1098, "time": 50.53 }, { "epoch": 0.93, "learning_rate": "1.0540e-05", "loss": 0.0681, "slid_loss": 0.068, "step": 1099, "time": 49.74 }, { "epoch": 0.93, "learning_rate": "1.0527e-05", "loss": 0.0771, "slid_loss": 0.0681, "step": 1100, "time": 50.46 }, { "epoch": 0.93, "learning_rate": "1.0515e-05", "loss": 0.0707, "slid_loss": 0.0681, "step": 1101, "time": 49.67 }, { "epoch": 0.93, "learning_rate": "1.0503e-05", "loss": 0.0654, "slid_loss": 0.0681, "step": 1102, "time": 50.35 }, { "epoch": 0.93, "learning_rate": "1.0491e-05", "loss": 0.0701, "slid_loss": 0.068, "step": 1103, "time": 50.91 }, { "epoch": 0.93, "learning_rate": "1.0480e-05", "loss": 0.0718, "slid_loss": 0.068, "step": 1104, "time": 50.63 }, { "epoch": 0.93, "learning_rate": "1.0468e-05", "loss": 0.0627, "slid_loss": 0.0679, "step": 1105, "time": 50.38 }, { "epoch": 0.93, "learning_rate": "1.0457e-05", "loss": 0.0651, "slid_loss": 0.0679, "step": 1106, "time": 50.1 }, { "epoch": 0.93, "learning_rate": "1.0445e-05", "loss": 0.0644, "slid_loss": 0.0679, "step": 1107, "time": 50.74 }, { "epoch": 0.93, "learning_rate": "1.0434e-05", "loss": 0.0641, "slid_loss": 0.0679, "step": 1108, "time": 51.36 }, { "epoch": 0.93, "learning_rate": "1.0423e-05", "loss": 0.0637, "slid_loss": 0.0678, "step": 1109, "time": 49.73 }, { "epoch": 0.94, "learning_rate": "1.0412e-05", "loss": 0.074, "slid_loss": 0.0679, "step": 1110, "time": 51.31 }, { "epoch": 0.94, "learning_rate": "1.0401e-05", "loss": 0.0683, "slid_loss": 0.0679, "step": 1111, "time": 49.84 }, { "epoch": 0.94, "learning_rate": "1.0391e-05", "loss": 0.0563, "slid_loss": 0.0678, "step": 1112, "time": 50.61 }, { "epoch": 0.94, "learning_rate": "1.0380e-05", "loss": 0.0605, "slid_loss": 0.0678, "step": 1113, "time": 49.71 }, { "epoch": 0.94, "learning_rate": "1.0370e-05", "loss": 0.0612, "slid_loss": 0.0677, "step": 1114, "time": 49.31 }, { "epoch": 0.94, "learning_rate": "1.0360e-05", "loss": 0.0529, "slid_loss": 0.0676, "step": 1115, "time": 50.56 }, { "epoch": 0.94, "learning_rate": "1.0350e-05", "loss": 0.0709, "slid_loss": 0.0676, "step": 1116, "time": 51.04 }, { "epoch": 0.94, "learning_rate": "1.0340e-05", "loss": 0.0735, "slid_loss": 0.0677, "step": 1117, "time": 49.96 }, { "epoch": 0.94, "learning_rate": "1.0330e-05", "loss": 0.0667, "slid_loss": 0.0677, "step": 1118, "time": 49.38 }, { "epoch": 0.94, "learning_rate": "1.0321e-05", "loss": 0.0723, "slid_loss": 0.0678, "step": 1119, "time": 50.69 }, { "epoch": 0.94, "learning_rate": "1.0311e-05", "loss": 0.0649, "slid_loss": 0.0677, "step": 1120, "time": 49.64 }, { "epoch": 0.94, "learning_rate": "1.0302e-05", "loss": 0.0724, "slid_loss": 0.0677, "step": 1121, "time": 50.66 }, { "epoch": 0.95, "learning_rate": "1.0293e-05", "loss": 0.0619, "slid_loss": 0.0677, "step": 1122, "time": 49.67 }, { "epoch": 0.95, "learning_rate": "1.0284e-05", "loss": 0.065, "slid_loss": 0.0676, "step": 1123, "time": 50.12 }, { "epoch": 0.95, "learning_rate": "1.0275e-05", "loss": 0.0673, "slid_loss": 0.0675, "step": 1124, "time": 50.23 }, { "epoch": 0.95, "learning_rate": "1.0266e-05", "loss": 0.0712, "slid_loss": 0.0676, "step": 1125, "time": 49.73 }, { "epoch": 0.95, "learning_rate": "1.0257e-05", "loss": 0.0603, "slid_loss": 0.0675, "step": 1126, "time": 49.44 }, { "epoch": 0.95, "learning_rate": "1.0249e-05", "loss": 0.07, "slid_loss": 0.0674, "step": 1127, "time": 50.53 }, { "epoch": 0.95, "learning_rate": "1.0240e-05", "loss": 0.0624, "slid_loss": 0.0674, "step": 1128, "time": 48.91 }, { "epoch": 0.95, "learning_rate": "1.0232e-05", "loss": 0.0698, "slid_loss": 0.0674, "step": 1129, "time": 51.07 }, { "epoch": 0.95, "learning_rate": "1.0224e-05", "loss": 0.0645, "slid_loss": 0.0673, "step": 1130, "time": 51.18 }, { "epoch": 0.95, "learning_rate": "1.0216e-05", "loss": 0.0653, "slid_loss": 0.0673, "step": 1131, "time": 50.39 }, { "epoch": 0.95, "learning_rate": "1.0208e-05", "loss": 0.0719, "slid_loss": 0.0673, "step": 1132, "time": 49.8 }, { "epoch": 0.95, "learning_rate": "1.0201e-05", "loss": 0.0662, "slid_loss": 0.0673, "step": 1133, "time": 50.26 }, { "epoch": 0.96, "learning_rate": "1.0193e-05", "loss": 0.0617, "slid_loss": 0.0672, "step": 1134, "time": 49.49 }, { "epoch": 0.96, "learning_rate": "1.0186e-05", "loss": 0.0755, "slid_loss": 0.0674, "step": 1135, "time": 50.53 }, { "epoch": 0.96, "learning_rate": "1.0179e-05", "loss": 0.0687, "slid_loss": 0.0674, "step": 1136, "time": 49.42 }, { "epoch": 0.96, "learning_rate": "1.0172e-05", "loss": 0.063, "slid_loss": 0.0674, "step": 1137, "time": 50.09 }, { "epoch": 0.96, "learning_rate": "1.0165e-05", "loss": 0.0622, "slid_loss": 0.0674, "step": 1138, "time": 50.38 }, { "epoch": 0.96, "learning_rate": "1.0158e-05", "loss": 0.066, "slid_loss": 0.0674, "step": 1139, "time": 50.87 }, { "epoch": 0.96, "learning_rate": "1.0151e-05", "loss": 0.0615, "slid_loss": 0.0673, "step": 1140, "time": 50.22 }, { "epoch": 0.96, "learning_rate": "1.0145e-05", "loss": 0.0721, "slid_loss": 0.0673, "step": 1141, "time": 50.72 }, { "epoch": 0.96, "learning_rate": "1.0138e-05", "loss": 0.0564, "slid_loss": 0.0672, "step": 1142, "time": 49.61 }, { "epoch": 0.96, "learning_rate": "1.0132e-05", "loss": 0.0645, "slid_loss": 0.0671, "step": 1143, "time": 50.67 }, { "epoch": 0.96, "learning_rate": "1.0126e-05", "loss": 0.0698, "slid_loss": 0.0671, "step": 1144, "time": 49.88 }, { "epoch": 0.96, "learning_rate": "1.0120e-05", "loss": 0.0768, "slid_loss": 0.0672, "step": 1145, "time": 50.54 }, { "epoch": 0.97, "learning_rate": "1.0114e-05", "loss": 0.0661, "slid_loss": 0.0672, "step": 1146, "time": 49.94 }, { "epoch": 0.97, "learning_rate": "1.0109e-05", "loss": 0.0705, "slid_loss": 0.0672, "step": 1147, "time": 51.48 }, { "epoch": 0.97, "learning_rate": "1.0103e-05", "loss": 0.0682, "slid_loss": 0.0672, "step": 1148, "time": 51.01 }, { "epoch": 0.97, "learning_rate": "1.0098e-05", "loss": 0.0681, "slid_loss": 0.0672, "step": 1149, "time": 51.46 }, { "epoch": 0.97, "learning_rate": "1.0093e-05", "loss": 0.0699, "slid_loss": 0.0673, "step": 1150, "time": 50.99 }, { "epoch": 0.97, "learning_rate": "1.0088e-05", "loss": 0.0689, "slid_loss": 0.0672, "step": 1151, "time": 51.55 }, { "epoch": 0.97, "learning_rate": "1.0083e-05", "loss": 0.0633, "slid_loss": 0.0672, "step": 1152, "time": 49.95 }, { "epoch": 0.97, "learning_rate": "1.0078e-05", "loss": 0.0623, "slid_loss": 0.0672, "step": 1153, "time": 50.35 }, { "epoch": 0.97, "learning_rate": "1.0073e-05", "loss": 0.0674, "slid_loss": 0.0671, "step": 1154, "time": 49.44 }, { "epoch": 0.97, "learning_rate": "1.0069e-05", "loss": 0.0651, "slid_loss": 0.0671, "step": 1155, "time": 50.03 }, { "epoch": 0.97, "learning_rate": "1.0064e-05", "loss": 0.0681, "slid_loss": 0.0671, "step": 1156, "time": 50.4 }, { "epoch": 0.97, "learning_rate": "1.0060e-05", "loss": 0.0702, "slid_loss": 0.0671, "step": 1157, "time": 50.71 }, { "epoch": 0.98, "learning_rate": "1.0056e-05", "loss": 0.0698, "slid_loss": 0.0671, "step": 1158, "time": 51.35 }, { "epoch": 0.98, "learning_rate": "1.0052e-05", "loss": 0.0649, "slid_loss": 0.067, "step": 1159, "time": 50.36 }, { "epoch": 0.98, "learning_rate": "1.0048e-05", "loss": 0.0675, "slid_loss": 0.067, "step": 1160, "time": 50.16 }, { "epoch": 0.98, "learning_rate": "1.0045e-05", "loss": 0.0617, "slid_loss": 0.0669, "step": 1161, "time": 49.34 }, { "epoch": 0.98, "learning_rate": "1.0041e-05", "loss": 0.0654, "slid_loss": 0.0669, "step": 1162, "time": 49.35 }, { "epoch": 0.98, "learning_rate": "1.0038e-05", "loss": 0.0699, "slid_loss": 0.0668, "step": 1163, "time": 50.03 }, { "epoch": 0.98, "learning_rate": "1.0035e-05", "loss": 0.0658, "slid_loss": 0.0669, "step": 1164, "time": 50.88 }, { "epoch": 0.98, "learning_rate": "1.0032e-05", "loss": 0.0699, "slid_loss": 0.067, "step": 1165, "time": 49.44 }, { "epoch": 0.98, "learning_rate": "1.0029e-05", "loss": 0.0662, "slid_loss": 0.067, "step": 1166, "time": 50.24 }, { "epoch": 0.98, "learning_rate": "1.0026e-05", "loss": 0.0592, "slid_loss": 0.0668, "step": 1167, "time": 50.1 }, { "epoch": 0.98, "learning_rate": "1.0023e-05", "loss": 0.0745, "slid_loss": 0.0669, "step": 1168, "time": 51.63 }, { "epoch": 0.98, "learning_rate": "1.0021e-05", "loss": 0.0658, "slid_loss": 0.0668, "step": 1169, "time": 50.67 }, { "epoch": 0.99, "learning_rate": "1.0018e-05", "loss": 0.0733, "slid_loss": 0.0669, "step": 1170, "time": 50.69 }, { "epoch": 0.99, "learning_rate": "1.0016e-05", "loss": 0.0591, "slid_loss": 0.0668, "step": 1171, "time": 50.18 }, { "epoch": 0.99, "learning_rate": "1.0014e-05", "loss": 0.0673, "slid_loss": 0.0668, "step": 1172, "time": 49.27 }, { "epoch": 0.99, "learning_rate": "1.0012e-05", "loss": 0.0678, "slid_loss": 0.0667, "step": 1173, "time": 50.26 }, { "epoch": 0.99, "learning_rate": "1.0010e-05", "loss": 0.0584, "slid_loss": 0.0666, "step": 1174, "time": 49.51 }, { "epoch": 0.99, "learning_rate": "1.0009e-05", "loss": 0.0662, "slid_loss": 0.0667, "step": 1175, "time": 50.24 }, { "epoch": 0.99, "learning_rate": "1.0007e-05", "loss": 0.062, "slid_loss": 0.0667, "step": 1176, "time": 50.21 }, { "epoch": 0.99, "learning_rate": "1.0006e-05", "loss": 0.0744, "slid_loss": 0.0666, "step": 1177, "time": 48.83 }, { "epoch": 0.99, "learning_rate": "1.0005e-05", "loss": 0.0656, "slid_loss": 0.0666, "step": 1178, "time": 50.52 }, { "epoch": 0.99, "learning_rate": "1.0004e-05", "loss": 0.0622, "slid_loss": 0.0666, "step": 1179, "time": 50.26 }, { "epoch": 0.99, "learning_rate": "1.0003e-05", "loss": 0.0612, "slid_loss": 0.0665, "step": 1180, "time": 49.18 }, { "epoch": 1.0, "learning_rate": "1.0002e-05", "loss": 0.0609, "slid_loss": 0.0664, "step": 1181, "time": 50.46 }, { "epoch": 1.0, "learning_rate": "1.0001e-05", "loss": 0.0664, "slid_loss": 0.0665, "step": 1182, "time": 50.51 }, { "epoch": 1.0, "learning_rate": "1.0001e-05", "loss": 0.0656, "slid_loss": 0.0665, "step": 1183, "time": 49.71 }, { "epoch": 1.0, "learning_rate": "1.0000e-05", "loss": 0.0689, "slid_loss": 0.0665, "step": 1184, "time": 50.23 }, { "epoch": 1.0, "learning_rate": "1.0000e-05", "loss": 0.0633, "slid_loss": 0.0665, "step": 1185, "time": 50.02 }, { "epoch": 1.0, "learning_rate": "1.0000e-05", "loss": 0.0665, "slid_loss": 0.0665, "step": 1186, "time": 49.94 }, { "epoch": 1.0, "step": 1186, "time": 0.79, "total_flos": 0.0, "train_loss": 0.09191470937473577, "train_runtime": 59773.5628, "train_samples_per_second": 5.083, "train_steps_per_second": 0.02 } ], "logging_steps": 1.0, "max_steps": 1186, "num_train_epochs": 1, "save_steps": 600, "total_flos": 0.0, "trial_name": null, "trial_params": null }