{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9992627698788836, "eval_steps": 10000000000000, "global_step": 1186, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": "8.3333e-06", "loss": 0.2161, "slid_loss": 0.2161, "step": 1, "time": 63.57 }, { "epoch": 0.0, "learning_rate": "1.6667e-05", "loss": 0.2083, "slid_loss": 0.2122, "step": 2, "time": 44.72 }, { "epoch": 0.0, "learning_rate": "2.5000e-05", "loss": 0.2154, "slid_loss": 0.2133, "step": 3, "time": 46.86 }, { "epoch": 0.0, "learning_rate": "3.3333e-05", "loss": 0.1934, "slid_loss": 0.2083, "step": 4, "time": 46.36 }, { "epoch": 0.0, "learning_rate": "4.1667e-05", "loss": 0.1994, "slid_loss": 0.2065, "step": 5, "time": 45.98 }, { "epoch": 0.01, "learning_rate": "5.0000e-05", "loss": 0.187, "slid_loss": 0.2033, "step": 6, "time": 50.17 }, { "epoch": 0.01, "learning_rate": "5.8333e-05", "loss": 0.1959, "slid_loss": 0.2022, "step": 7, "time": 49.06 }, { "epoch": 0.01, "learning_rate": "6.6667e-05", "loss": 0.1983, "slid_loss": 0.2017, "step": 8, "time": 46.23 }, { "epoch": 0.01, "learning_rate": "7.5000e-05", "loss": 0.1768, "slid_loss": 0.1989, "step": 9, "time": 46.75 }, { "epoch": 0.01, "learning_rate": "8.3333e-05", "loss": 0.1891, "slid_loss": 0.198, "step": 10, "time": 47.51 }, { "epoch": 0.01, "learning_rate": "9.1667e-05", "loss": 0.176, "slid_loss": 0.196, "step": 11, "time": 47.4 }, { "epoch": 0.01, "learning_rate": "1.0000e-04", "loss": 0.172, "slid_loss": 0.194, "step": 12, "time": 48.28 }, { "epoch": 0.01, "learning_rate": "1.0000e-04", "loss": 0.1811, "slid_loss": 0.193, "step": 13, "time": 45.98 }, { "epoch": 0.01, "learning_rate": "9.9999e-05", "loss": 0.1826, "slid_loss": 0.1922, "step": 14, "time": 49.9 }, { "epoch": 0.01, "learning_rate": "9.9999e-05", "loss": 0.1658, "slid_loss": 0.1905, "step": 15, "time": 48.77 }, { "epoch": 0.01, "learning_rate": "9.9998e-05", "loss": 0.1667, "slid_loss": 0.189, "step": 16, "time": 44.99 }, { "epoch": 0.01, "learning_rate": "9.9996e-05", "loss": 0.1641, "slid_loss": 0.1875, "step": 17, "time": 46.92 }, { "epoch": 0.02, "learning_rate": "9.9995e-05", "loss": 0.1717, "slid_loss": 0.1867, "step": 18, "time": 47.76 }, { "epoch": 0.02, "learning_rate": "9.9993e-05", "loss": 0.1579, "slid_loss": 0.1851, "step": 19, "time": 46.11 }, { "epoch": 0.02, "learning_rate": "9.9991e-05", "loss": 0.1678, "slid_loss": 0.1843, "step": 20, "time": 47.59 }, { "epoch": 0.02, "learning_rate": "9.9988e-05", "loss": 0.1666, "slid_loss": 0.1834, "step": 21, "time": 46.51 }, { "epoch": 0.02, "learning_rate": "9.9986e-05", "loss": 0.1689, "slid_loss": 0.1828, "step": 22, "time": 47.13 }, { "epoch": 0.02, "learning_rate": "9.9983e-05", "loss": 0.1634, "slid_loss": 0.1819, "step": 23, "time": 46.48 }, { "epoch": 0.02, "learning_rate": "9.9979e-05", "loss": 0.1587, "slid_loss": 0.181, "step": 24, "time": 50.81 }, { "epoch": 0.02, "learning_rate": "9.9976e-05", "loss": 0.1598, "slid_loss": 0.1801, "step": 25, "time": 49.95 }, { "epoch": 0.02, "learning_rate": "9.9972e-05", "loss": 0.1741, "slid_loss": 0.1799, "step": 26, "time": 48.44 }, { "epoch": 0.02, "learning_rate": "9.9968e-05", "loss": 0.1595, "slid_loss": 0.1791, "step": 27, "time": 45.33 }, { "epoch": 0.02, "learning_rate": "9.9963e-05", "loss": 0.1633, "slid_loss": 0.1786, "step": 28, "time": 48.15 }, { "epoch": 0.02, "learning_rate": "9.9959e-05", "loss": 0.1643, "slid_loss": 0.1781, "step": 29, "time": 46.65 }, { "epoch": 0.03, "learning_rate": "9.9954e-05", "loss": 0.1596, "slid_loss": 0.1775, "step": 30, "time": 47.56 }, { "epoch": 0.03, "learning_rate": "9.9948e-05", "loss": 0.1653, "slid_loss": 0.1771, "step": 31, "time": 47.08 }, { "epoch": 0.03, "learning_rate": "9.9943e-05", "loss": 0.1521, "slid_loss": 0.1763, "step": 32, "time": 45.78 }, { "epoch": 0.03, "learning_rate": "9.9937e-05", "loss": 0.1595, "slid_loss": 0.1758, "step": 33, "time": 49.14 }, { "epoch": 0.03, "learning_rate": "9.9931e-05", "loss": 0.1543, "slid_loss": 0.1751, "step": 34, "time": 46.07 }, { "epoch": 0.03, "learning_rate": "9.9924e-05", "loss": 0.1569, "slid_loss": 0.1746, "step": 35, "time": 46.77 }, { "epoch": 0.03, "learning_rate": "9.9918e-05", "loss": 0.139, "slid_loss": 0.1736, "step": 36, "time": 45.64 }, { "epoch": 0.03, "learning_rate": "9.9911e-05", "loss": 0.1632, "slid_loss": 0.1733, "step": 37, "time": 47.39 }, { "epoch": 0.03, "learning_rate": "9.9903e-05", "loss": 0.1501, "slid_loss": 0.1727, "step": 38, "time": 47.17 }, { "epoch": 0.03, "learning_rate": "9.9896e-05", "loss": 0.1521, "slid_loss": 0.1722, "step": 39, "time": 45.96 }, { "epoch": 0.03, "learning_rate": "9.9888e-05", "loss": 0.149, "slid_loss": 0.1716, "step": 40, "time": 47.63 }, { "epoch": 0.03, "learning_rate": "9.9880e-05", "loss": 0.1515, "slid_loss": 0.1711, "step": 41, "time": 45.83 }, { "epoch": 0.04, "learning_rate": "9.9871e-05", "loss": 0.1471, "slid_loss": 0.1706, "step": 42, "time": 49.04 }, { "epoch": 0.04, "learning_rate": "9.9862e-05", "loss": 0.1536, "slid_loss": 0.1702, "step": 43, "time": 46.63 }, { "epoch": 0.04, "learning_rate": "9.9853e-05", "loss": 0.1485, "slid_loss": 0.1697, "step": 44, "time": 48.86 }, { "epoch": 0.04, "learning_rate": "9.9844e-05", "loss": 0.1496, "slid_loss": 0.1692, "step": 45, "time": 47.03 }, { "epoch": 0.04, "learning_rate": "9.9835e-05", "loss": 0.1519, "slid_loss": 0.1689, "step": 46, "time": 49.21 }, { "epoch": 0.04, "learning_rate": "9.9825e-05", "loss": 0.146, "slid_loss": 0.1684, "step": 47, "time": 45.37 }, { "epoch": 0.04, "learning_rate": "9.9815e-05", "loss": 0.1516, "slid_loss": 0.168, "step": 48, "time": 46.19 }, { "epoch": 0.04, "learning_rate": "9.9804e-05", "loss": 0.1475, "slid_loss": 0.1676, "step": 49, "time": 45.62 }, { "epoch": 0.04, "learning_rate": "9.9793e-05", "loss": 0.1509, "slid_loss": 0.1673, "step": 50, "time": 46.11 }, { "epoch": 0.04, "learning_rate": "9.9782e-05", "loss": 0.1513, "slid_loss": 0.167, "step": 51, "time": 44.94 }, { "epoch": 0.04, "learning_rate": "9.9771e-05", "loss": 0.1425, "slid_loss": 0.1665, "step": 52, "time": 46.8 }, { "epoch": 0.04, "learning_rate": "9.9759e-05", "loss": 0.1461, "slid_loss": 0.1661, "step": 53, "time": 47.33 }, { "epoch": 0.05, "learning_rate": "9.9748e-05", "loss": 0.1457, "slid_loss": 0.1657, "step": 54, "time": 45.88 }, { "epoch": 0.05, "learning_rate": "9.9735e-05", "loss": 0.1503, "slid_loss": 0.1654, "step": 55, "time": 47.22 }, { "epoch": 0.05, "learning_rate": "9.9723e-05", "loss": 0.1525, "slid_loss": 0.1652, "step": 56, "time": 46.7 }, { "epoch": 0.05, "learning_rate": "9.9710e-05", "loss": 0.1466, "slid_loss": 0.1649, "step": 57, "time": 46.63 }, { "epoch": 0.05, "learning_rate": "9.9697e-05", "loss": 0.1464, "slid_loss": 0.1646, "step": 58, "time": 46.62 }, { "epoch": 0.05, "learning_rate": "9.9684e-05", "loss": 0.1434, "slid_loss": 0.1642, "step": 59, "time": 46.47 }, { "epoch": 0.05, "learning_rate": "9.9670e-05", "loss": 0.1478, "slid_loss": 0.1639, "step": 60, "time": 49.29 }, { "epoch": 0.05, "learning_rate": "9.9657e-05", "loss": 0.1542, "slid_loss": 0.1638, "step": 61, "time": 47.25 }, { "epoch": 0.05, "learning_rate": "9.9642e-05", "loss": 0.1569, "slid_loss": 0.1637, "step": 62, "time": 46.6 }, { "epoch": 0.05, "learning_rate": "9.9628e-05", "loss": 0.1486, "slid_loss": 0.1634, "step": 63, "time": 47.97 }, { "epoch": 0.05, "learning_rate": "9.9613e-05", "loss": 0.1481, "slid_loss": 0.1632, "step": 64, "time": 46.52 }, { "epoch": 0.05, "learning_rate": "9.9598e-05", "loss": 0.1501, "slid_loss": 0.163, "step": 65, "time": 46.88 }, { "epoch": 0.06, "learning_rate": "9.9583e-05", "loss": 0.1522, "slid_loss": 0.1628, "step": 66, "time": 46.17 }, { "epoch": 0.06, "learning_rate": "9.9568e-05", "loss": 0.1539, "slid_loss": 0.1627, "step": 67, "time": 46.06 }, { "epoch": 0.06, "learning_rate": "9.9552e-05", "loss": 0.1496, "slid_loss": 0.1625, "step": 68, "time": 47.13 }, { "epoch": 0.06, "learning_rate": "9.9536e-05", "loss": 0.1535, "slid_loss": 0.1624, "step": 69, "time": 45.6 }, { "epoch": 0.06, "learning_rate": "9.9519e-05", "loss": 0.1493, "slid_loss": 0.1622, "step": 70, "time": 47.53 }, { "epoch": 0.06, "learning_rate": "9.9502e-05", "loss": 0.146, "slid_loss": 0.1619, "step": 71, "time": 47.76 }, { "epoch": 0.06, "learning_rate": "9.9486e-05", "loss": 0.1372, "slid_loss": 0.1616, "step": 72, "time": 47.97 }, { "epoch": 0.06, "learning_rate": "9.9468e-05", "loss": 0.1411, "slid_loss": 0.1613, "step": 73, "time": 46.46 }, { "epoch": 0.06, "learning_rate": "9.9451e-05", "loss": 0.1403, "slid_loss": 0.161, "step": 74, "time": 45.97 }, { "epoch": 0.06, "learning_rate": "9.9433e-05", "loss": 0.1385, "slid_loss": 0.1607, "step": 75, "time": 47.65 }, { "epoch": 0.06, "learning_rate": "9.9415e-05", "loss": 0.1463, "slid_loss": 0.1605, "step": 76, "time": 45.63 }, { "epoch": 0.06, "learning_rate": "9.9396e-05", "loss": 0.1549, "slid_loss": 0.1605, "step": 77, "time": 47.19 }, { "epoch": 0.07, "learning_rate": "9.9378e-05", "loss": 0.1484, "slid_loss": 0.1603, "step": 78, "time": 48.92 }, { "epoch": 0.07, "learning_rate": "9.9359e-05", "loss": 0.1446, "slid_loss": 0.1601, "step": 79, "time": 46.14 }, { "epoch": 0.07, "learning_rate": "9.9340e-05", "loss": 0.1403, "slid_loss": 0.1599, "step": 80, "time": 45.0 }, { "epoch": 0.07, "learning_rate": "9.9320e-05", "loss": 0.1444, "slid_loss": 0.1597, "step": 81, "time": 46.98 }, { "epoch": 0.07, "learning_rate": "9.9300e-05", "loss": 0.1492, "slid_loss": 0.1596, "step": 82, "time": 45.89 }, { "epoch": 0.07, "learning_rate": "9.9280e-05", "loss": 0.14, "slid_loss": 0.1593, "step": 83, "time": 46.72 }, { "epoch": 0.07, "learning_rate": "9.9260e-05", "loss": 0.1398, "slid_loss": 0.1591, "step": 84, "time": 44.65 }, { "epoch": 0.07, "learning_rate": "9.9239e-05", "loss": 0.1457, "slid_loss": 0.1589, "step": 85, "time": 45.24 }, { "epoch": 0.07, "learning_rate": "9.9218e-05", "loss": 0.1442, "slid_loss": 0.1588, "step": 86, "time": 46.01 }, { "epoch": 0.07, "learning_rate": "9.9197e-05", "loss": 0.147, "slid_loss": 0.1586, "step": 87, "time": 47.13 }, { "epoch": 0.07, "learning_rate": "9.9176e-05", "loss": 0.1385, "slid_loss": 0.1584, "step": 88, "time": 45.66 }, { "epoch": 0.07, "learning_rate": "9.9154e-05", "loss": 0.1359, "slid_loss": 0.1581, "step": 89, "time": 46.29 }, { "epoch": 0.08, "learning_rate": "9.9132e-05", "loss": 0.1498, "slid_loss": 0.158, "step": 90, "time": 45.65 }, { "epoch": 0.08, "learning_rate": "9.9110e-05", "loss": 0.145, "slid_loss": 0.1579, "step": 91, "time": 46.64 }, { "epoch": 0.08, "learning_rate": "9.9087e-05", "loss": 0.1426, "slid_loss": 0.1577, "step": 92, "time": 47.69 }, { "epoch": 0.08, "learning_rate": "9.9064e-05", "loss": 0.143, "slid_loss": 0.1576, "step": 93, "time": 50.06 }, { "epoch": 0.08, "learning_rate": "9.9041e-05", "loss": 0.1417, "slid_loss": 0.1574, "step": 94, "time": 47.81 }, { "epoch": 0.08, "learning_rate": "9.9017e-05", "loss": 0.1503, "slid_loss": 0.1573, "step": 95, "time": 46.11 }, { "epoch": 0.08, "learning_rate": "9.8994e-05", "loss": 0.1413, "slid_loss": 0.1572, "step": 96, "time": 45.96 }, { "epoch": 0.08, "learning_rate": "9.8970e-05", "loss": 0.1367, "slid_loss": 0.157, "step": 97, "time": 47.16 }, { "epoch": 0.08, "learning_rate": "9.8945e-05", "loss": 0.1431, "slid_loss": 0.1568, "step": 98, "time": 47.64 }, { "epoch": 0.08, "learning_rate": "9.8921e-05", "loss": 0.1434, "slid_loss": 0.1567, "step": 99, "time": 45.85 }, { "epoch": 0.08, "learning_rate": "9.8896e-05", "loss": 0.1463, "slid_loss": 0.1566, "step": 100, "time": 46.13 }, { "epoch": 0.09, "learning_rate": "9.8871e-05", "loss": 0.1357, "slid_loss": 0.1558, "step": 101, "time": 46.45 }, { "epoch": 0.09, "learning_rate": "9.8846e-05", "loss": 0.1451, "slid_loss": 0.1551, "step": 102, "time": 47.18 }, { "epoch": 0.09, "learning_rate": "9.8820e-05", "loss": 0.1489, "slid_loss": 0.1545, "step": 103, "time": 48.45 }, { "epoch": 0.09, "learning_rate": "9.8794e-05", "loss": 0.1416, "slid_loss": 0.154, "step": 104, "time": 47.46 }, { "epoch": 0.09, "learning_rate": "9.8768e-05", "loss": 0.1428, "slid_loss": 0.1534, "step": 105, "time": 47.11 }, { "epoch": 0.09, "learning_rate": "9.8741e-05", "loss": 0.1397, "slid_loss": 0.1529, "step": 106, "time": 46.69 }, { "epoch": 0.09, "learning_rate": "9.8714e-05", "loss": 0.1459, "slid_loss": 0.1524, "step": 107, "time": 45.91 }, { "epoch": 0.09, "learning_rate": "9.8687e-05", "loss": 0.1504, "slid_loss": 0.1519, "step": 108, "time": 46.65 }, { "epoch": 0.09, "learning_rate": "9.8660e-05", "loss": 0.147, "slid_loss": 0.1516, "step": 109, "time": 46.61 }, { "epoch": 0.09, "learning_rate": "9.8632e-05", "loss": 0.1346, "slid_loss": 0.1511, "step": 110, "time": 46.59 }, { "epoch": 0.09, "learning_rate": "9.8605e-05", "loss": 0.1442, "slid_loss": 0.1508, "step": 111, "time": 45.14 }, { "epoch": 0.09, "learning_rate": "9.8576e-05", "loss": 0.1427, "slid_loss": 0.1505, "step": 112, "time": 48.58 }, { "epoch": 0.1, "learning_rate": "9.8548e-05", "loss": 0.1339, "slid_loss": 0.15, "step": 113, "time": 46.61 }, { "epoch": 0.1, "learning_rate": "9.8519e-05", "loss": 0.1427, "slid_loss": 0.1496, "step": 114, "time": 45.24 }, { "epoch": 0.1, "learning_rate": "9.8490e-05", "loss": 0.1372, "slid_loss": 0.1493, "step": 115, "time": 46.51 }, { "epoch": 0.1, "learning_rate": "9.8461e-05", "loss": 0.147, "slid_loss": 0.1491, "step": 116, "time": 48.66 }, { "epoch": 0.1, "learning_rate": "9.8431e-05", "loss": 0.1475, "slid_loss": 0.149, "step": 117, "time": 45.56 }, { "epoch": 0.1, "learning_rate": "9.8402e-05", "loss": 0.14, "slid_loss": 0.1486, "step": 118, "time": 45.94 }, { "epoch": 0.1, "learning_rate": "9.8371e-05", "loss": 0.1438, "slid_loss": 0.1485, "step": 119, "time": 45.77 }, { "epoch": 0.1, "learning_rate": "9.8341e-05", "loss": 0.1394, "slid_loss": 0.1482, "step": 120, "time": 45.5 }, { "epoch": 0.1, "learning_rate": "9.8310e-05", "loss": 0.1455, "slid_loss": 0.148, "step": 121, "time": 48.74 }, { "epoch": 0.1, "learning_rate": "9.8280e-05", "loss": 0.1388, "slid_loss": 0.1477, "step": 122, "time": 46.07 }, { "epoch": 0.1, "learning_rate": "9.8248e-05", "loss": 0.1398, "slid_loss": 0.1475, "step": 123, "time": 45.6 }, { "epoch": 0.1, "learning_rate": "9.8217e-05", "loss": 0.1472, "slid_loss": 0.1474, "step": 124, "time": 47.77 }, { "epoch": 0.11, "learning_rate": "9.8185e-05", "loss": 0.1488, "slid_loss": 0.1472, "step": 125, "time": 45.48 }, { "epoch": 0.11, "learning_rate": "9.8153e-05", "loss": 0.143, "slid_loss": 0.1469, "step": 126, "time": 48.73 }, { "epoch": 0.11, "learning_rate": "9.8121e-05", "loss": 0.1421, "slid_loss": 0.1468, "step": 127, "time": 45.63 }, { "epoch": 0.11, "learning_rate": "9.8088e-05", "loss": 0.1357, "slid_loss": 0.1465, "step": 128, "time": 47.22 }, { "epoch": 0.11, "learning_rate": "9.8055e-05", "loss": 0.1416, "slid_loss": 0.1463, "step": 129, "time": 46.94 }, { "epoch": 0.11, "learning_rate": "9.8022e-05", "loss": 0.1332, "slid_loss": 0.146, "step": 130, "time": 47.46 }, { "epoch": 0.11, "learning_rate": "9.7989e-05", "loss": 0.1464, "slid_loss": 0.1458, "step": 131, "time": 46.41 }, { "epoch": 0.11, "learning_rate": "9.7955e-05", "loss": 0.1465, "slid_loss": 0.1458, "step": 132, "time": 46.8 }, { "epoch": 0.11, "learning_rate": "9.7921e-05", "loss": 0.1473, "slid_loss": 0.1456, "step": 133, "time": 48.26 }, { "epoch": 0.11, "learning_rate": "9.7887e-05", "loss": 0.1481, "slid_loss": 0.1456, "step": 134, "time": 49.79 }, { "epoch": 0.11, "learning_rate": "9.7853e-05", "loss": 0.1391, "slid_loss": 0.1454, "step": 135, "time": 46.83 }, { "epoch": 0.11, "learning_rate": "9.7818e-05", "loss": 0.1474, "slid_loss": 0.1455, "step": 136, "time": 46.18 }, { "epoch": 0.12, "learning_rate": "9.7783e-05", "loss": 0.1435, "slid_loss": 0.1453, "step": 137, "time": 45.84 }, { "epoch": 0.12, "learning_rate": "9.7748e-05", "loss": 0.1387, "slid_loss": 0.1452, "step": 138, "time": 44.86 }, { "epoch": 0.12, "learning_rate": "9.7712e-05", "loss": 0.1458, "slid_loss": 0.1451, "step": 139, "time": 45.62 }, { "epoch": 0.12, "learning_rate": "9.7676e-05", "loss": 0.1474, "slid_loss": 0.1451, "step": 140, "time": 46.06 }, { "epoch": 0.12, "learning_rate": "9.7640e-05", "loss": 0.1358, "slid_loss": 0.1449, "step": 141, "time": 45.46 }, { "epoch": 0.12, "learning_rate": "9.7604e-05", "loss": 0.1416, "slid_loss": 0.1449, "step": 142, "time": 47.35 }, { "epoch": 0.12, "learning_rate": "9.7567e-05", "loss": 0.1276, "slid_loss": 0.1446, "step": 143, "time": 47.16 }, { "epoch": 0.12, "learning_rate": "9.7530e-05", "loss": 0.1344, "slid_loss": 0.1445, "step": 144, "time": 45.11 }, { "epoch": 0.12, "learning_rate": "9.7493e-05", "loss": 0.1369, "slid_loss": 0.1443, "step": 145, "time": 46.5 }, { "epoch": 0.12, "learning_rate": "9.7456e-05", "loss": 0.1415, "slid_loss": 0.1442, "step": 146, "time": 45.43 }, { "epoch": 0.12, "learning_rate": "9.7418e-05", "loss": 0.1359, "slid_loss": 0.1441, "step": 147, "time": 48.09 }, { "epoch": 0.12, "learning_rate": "9.7380e-05", "loss": 0.1359, "slid_loss": 0.144, "step": 148, "time": 47.34 }, { "epoch": 0.13, "learning_rate": "9.7342e-05", "loss": 0.1315, "slid_loss": 0.1438, "step": 149, "time": 49.79 }, { "epoch": 0.13, "learning_rate": "9.7303e-05", "loss": 0.1404, "slid_loss": 0.1437, "step": 150, "time": 46.45 }, { "epoch": 0.13, "learning_rate": "9.7265e-05", "loss": 0.1402, "slid_loss": 0.1436, "step": 151, "time": 46.96 }, { "epoch": 0.13, "learning_rate": "9.7226e-05", "loss": 0.1397, "slid_loss": 0.1436, "step": 152, "time": 46.84 }, { "epoch": 0.13, "learning_rate": "9.7186e-05", "loss": 0.1522, "slid_loss": 0.1436, "step": 153, "time": 46.07 }, { "epoch": 0.13, "learning_rate": "9.7147e-05", "loss": 0.1413, "slid_loss": 0.1436, "step": 154, "time": 45.52 }, { "epoch": 0.13, "learning_rate": "9.7107e-05", "loss": 0.1399, "slid_loss": 0.1435, "step": 155, "time": 46.48 }, { "epoch": 0.13, "learning_rate": "9.7067e-05", "loss": 0.1346, "slid_loss": 0.1433, "step": 156, "time": 47.82 }, { "epoch": 0.13, "learning_rate": "9.7026e-05", "loss": 0.1414, "slid_loss": 0.1433, "step": 157, "time": 44.85 }, { "epoch": 0.13, "learning_rate": "9.6986e-05", "loss": 0.1399, "slid_loss": 0.1432, "step": 158, "time": 46.29 }, { "epoch": 0.13, "learning_rate": "9.6945e-05", "loss": 0.1374, "slid_loss": 0.1431, "step": 159, "time": 47.2 }, { "epoch": 0.13, "learning_rate": "9.6904e-05", "loss": 0.1385, "slid_loss": 0.143, "step": 160, "time": 47.41 }, { "epoch": 0.14, "learning_rate": "9.6862e-05", "loss": 0.1434, "slid_loss": 0.1429, "step": 161, "time": 46.89 }, { "epoch": 0.14, "learning_rate": "9.6821e-05", "loss": 0.1402, "slid_loss": 0.1428, "step": 162, "time": 45.75 }, { "epoch": 0.14, "learning_rate": "9.6779e-05", "loss": 0.1412, "slid_loss": 0.1427, "step": 163, "time": 45.58 }, { "epoch": 0.14, "learning_rate": "9.6736e-05", "loss": 0.1435, "slid_loss": 0.1426, "step": 164, "time": 48.24 }, { "epoch": 0.14, "learning_rate": "9.6694e-05", "loss": 0.1362, "slid_loss": 0.1425, "step": 165, "time": 47.07 }, { "epoch": 0.14, "learning_rate": "9.6651e-05", "loss": 0.1415, "slid_loss": 0.1424, "step": 166, "time": 46.34 }, { "epoch": 0.14, "learning_rate": "9.6608e-05", "loss": 0.1412, "slid_loss": 0.1423, "step": 167, "time": 47.21 }, { "epoch": 0.14, "learning_rate": "9.6565e-05", "loss": 0.1383, "slid_loss": 0.1422, "step": 168, "time": 45.14 }, { "epoch": 0.14, "learning_rate": "9.6521e-05", "loss": 0.1257, "slid_loss": 0.1419, "step": 169, "time": 47.43 }, { "epoch": 0.14, "learning_rate": "9.6478e-05", "loss": 0.1301, "slid_loss": 0.1417, "step": 170, "time": 47.65 }, { "epoch": 0.14, "learning_rate": "9.6434e-05", "loss": 0.1454, "slid_loss": 0.1417, "step": 171, "time": 47.82 }, { "epoch": 0.14, "learning_rate": "9.6389e-05", "loss": 0.1501, "slid_loss": 0.1418, "step": 172, "time": 45.79 }, { "epoch": 0.15, "learning_rate": "9.6345e-05", "loss": 0.1367, "slid_loss": 0.1418, "step": 173, "time": 47.27 }, { "epoch": 0.15, "learning_rate": "9.6300e-05", "loss": 0.1427, "slid_loss": 0.1418, "step": 174, "time": 48.48 }, { "epoch": 0.15, "learning_rate": "9.6255e-05", "loss": 0.1379, "slid_loss": 0.1418, "step": 175, "time": 44.65 }, { "epoch": 0.15, "learning_rate": "9.6209e-05", "loss": 0.1382, "slid_loss": 0.1417, "step": 176, "time": 45.96 }, { "epoch": 0.15, "learning_rate": "9.6164e-05", "loss": 0.1371, "slid_loss": 0.1415, "step": 177, "time": 48.49 }, { "epoch": 0.15, "learning_rate": "9.6118e-05", "loss": 0.1429, "slid_loss": 0.1415, "step": 178, "time": 46.0 }, { "epoch": 0.15, "learning_rate": "9.6072e-05", "loss": 0.1495, "slid_loss": 0.1415, "step": 179, "time": 46.39 }, { "epoch": 0.15, "learning_rate": "9.6025e-05", "loss": 0.1334, "slid_loss": 0.1415, "step": 180, "time": 46.35 }, { "epoch": 0.15, "learning_rate": "9.5979e-05", "loss": 0.1394, "slid_loss": 0.1414, "step": 181, "time": 46.39 }, { "epoch": 0.15, "learning_rate": "9.5932e-05", "loss": 0.1337, "slid_loss": 0.1413, "step": 182, "time": 45.25 }, { "epoch": 0.15, "learning_rate": "9.5885e-05", "loss": 0.1342, "slid_loss": 0.1412, "step": 183, "time": 47.43 }, { "epoch": 0.16, "learning_rate": "9.5837e-05", "loss": 0.1356, "slid_loss": 0.1412, "step": 184, "time": 47.52 }, { "epoch": 0.16, "learning_rate": "9.5790e-05", "loss": 0.1373, "slid_loss": 0.1411, "step": 185, "time": 45.52 }, { "epoch": 0.16, "learning_rate": "9.5742e-05", "loss": 0.1377, "slid_loss": 0.141, "step": 186, "time": 45.8 }, { "epoch": 0.16, "learning_rate": "9.5694e-05", "loss": 0.1407, "slid_loss": 0.1409, "step": 187, "time": 49.12 }, { "epoch": 0.16, "learning_rate": "9.5645e-05", "loss": 0.1379, "slid_loss": 0.1409, "step": 188, "time": 46.77 }, { "epoch": 0.16, "learning_rate": "9.5596e-05", "loss": 0.1448, "slid_loss": 0.141, "step": 189, "time": 46.49 }, { "epoch": 0.16, "learning_rate": "9.5547e-05", "loss": 0.139, "slid_loss": 0.1409, "step": 190, "time": 45.94 }, { "epoch": 0.16, "learning_rate": "9.5498e-05", "loss": 0.1451, "slid_loss": 0.1409, "step": 191, "time": 46.17 }, { "epoch": 0.16, "learning_rate": "9.5449e-05", "loss": 0.1369, "slid_loss": 0.1409, "step": 192, "time": 46.11 }, { "epoch": 0.16, "learning_rate": "9.5399e-05", "loss": 0.1339, "slid_loss": 0.1408, "step": 193, "time": 46.96 }, { "epoch": 0.16, "learning_rate": "9.5349e-05", "loss": 0.1345, "slid_loss": 0.1407, "step": 194, "time": 47.56 }, { "epoch": 0.16, "learning_rate": "9.5299e-05", "loss": 0.1346, "slid_loss": 0.1405, "step": 195, "time": 46.84 }, { "epoch": 0.17, "learning_rate": "9.5248e-05", "loss": 0.1351, "slid_loss": 0.1405, "step": 196, "time": 49.93 }, { "epoch": 0.17, "learning_rate": "9.5198e-05", "loss": 0.1386, "slid_loss": 0.1405, "step": 197, "time": 45.32 }, { "epoch": 0.17, "learning_rate": "9.5147e-05", "loss": 0.1417, "slid_loss": 0.1405, "step": 198, "time": 47.06 }, { "epoch": 0.17, "learning_rate": "9.5095e-05", "loss": 0.1379, "slid_loss": 0.1404, "step": 199, "time": 45.81 }, { "epoch": 0.17, "learning_rate": "9.5044e-05", "loss": 0.1324, "slid_loss": 0.1403, "step": 200, "time": 45.83 }, { "epoch": 0.17, "learning_rate": "9.4992e-05", "loss": 0.1402, "slid_loss": 0.1403, "step": 201, "time": 46.5 }, { "epoch": 0.17, "learning_rate": "9.4940e-05", "loss": 0.1349, "slid_loss": 0.1402, "step": 202, "time": 49.6 }, { "epoch": 0.17, "learning_rate": "9.4888e-05", "loss": 0.1409, "slid_loss": 0.1402, "step": 203, "time": 49.01 }, { "epoch": 0.17, "learning_rate": "9.4836e-05", "loss": 0.1312, "slid_loss": 0.14, "step": 204, "time": 47.51 }, { "epoch": 0.17, "learning_rate": "9.4783e-05", "loss": 0.14, "slid_loss": 0.14, "step": 205, "time": 45.38 }, { "epoch": 0.17, "learning_rate": "9.4730e-05", "loss": 0.1367, "slid_loss": 0.14, "step": 206, "time": 46.64 }, { "epoch": 0.17, "learning_rate": "9.4677e-05", "loss": 0.1363, "slid_loss": 0.1399, "step": 207, "time": 46.32 }, { "epoch": 0.18, "learning_rate": "9.4623e-05", "loss": 0.1275, "slid_loss": 0.1397, "step": 208, "time": 46.34 }, { "epoch": 0.18, "learning_rate": "9.4569e-05", "loss": 0.1346, "slid_loss": 0.1395, "step": 209, "time": 48.32 }, { "epoch": 0.18, "learning_rate": "9.4515e-05", "loss": 0.1368, "slid_loss": 0.1396, "step": 210, "time": 45.5 }, { "epoch": 0.18, "learning_rate": "9.4461e-05", "loss": 0.1401, "slid_loss": 0.1395, "step": 211, "time": 46.83 }, { "epoch": 0.18, "learning_rate": "9.4407e-05", "loss": 0.136, "slid_loss": 0.1395, "step": 212, "time": 47.35 }, { "epoch": 0.18, "learning_rate": "9.4352e-05", "loss": 0.1433, "slid_loss": 0.1395, "step": 213, "time": 46.19 }, { "epoch": 0.18, "learning_rate": "9.4297e-05", "loss": 0.1332, "slid_loss": 0.1395, "step": 214, "time": 45.95 }, { "epoch": 0.18, "learning_rate": "9.4242e-05", "loss": 0.1427, "slid_loss": 0.1395, "step": 215, "time": 48.28 }, { "epoch": 0.18, "learning_rate": "9.4186e-05", "loss": 0.1444, "slid_loss": 0.1395, "step": 216, "time": 47.17 }, { "epoch": 0.18, "learning_rate": "9.4131e-05", "loss": 0.1356, "slid_loss": 0.1394, "step": 217, "time": 47.52 }, { "epoch": 0.18, "learning_rate": "9.4075e-05", "loss": 0.1422, "slid_loss": 0.1394, "step": 218, "time": 45.98 }, { "epoch": 0.18, "learning_rate": "9.4019e-05", "loss": 0.1369, "slid_loss": 0.1393, "step": 219, "time": 45.78 }, { "epoch": 0.19, "learning_rate": "9.3962e-05", "loss": 0.1389, "slid_loss": 0.1393, "step": 220, "time": 45.72 }, { "epoch": 0.19, "learning_rate": "9.3906e-05", "loss": 0.1319, "slid_loss": 0.1392, "step": 221, "time": 45.04 }, { "epoch": 0.19, "learning_rate": "9.3849e-05", "loss": 0.1309, "slid_loss": 0.1391, "step": 222, "time": 46.05 }, { "epoch": 0.19, "learning_rate": "9.3791e-05", "loss": 0.1342, "slid_loss": 0.139, "step": 223, "time": 46.89 }, { "epoch": 0.19, "learning_rate": "9.3734e-05", "loss": 0.1373, "slid_loss": 0.1389, "step": 224, "time": 45.79 }, { "epoch": 0.19, "learning_rate": "9.3676e-05", "loss": 0.1387, "slid_loss": 0.1388, "step": 225, "time": 46.5 }, { "epoch": 0.19, "learning_rate": "9.3619e-05", "loss": 0.1317, "slid_loss": 0.1387, "step": 226, "time": 48.07 }, { "epoch": 0.19, "learning_rate": "9.3560e-05", "loss": 0.1432, "slid_loss": 0.1387, "step": 227, "time": 46.22 }, { "epoch": 0.19, "learning_rate": "9.3502e-05", "loss": 0.1358, "slid_loss": 0.1387, "step": 228, "time": 47.6 }, { "epoch": 0.19, "learning_rate": "9.3443e-05", "loss": 0.1341, "slid_loss": 0.1387, "step": 229, "time": 46.8 }, { "epoch": 0.19, "learning_rate": "9.3385e-05", "loss": 0.1334, "slid_loss": 0.1387, "step": 230, "time": 49.29 }, { "epoch": 0.19, "learning_rate": "9.3326e-05", "loss": 0.1339, "slid_loss": 0.1385, "step": 231, "time": 46.45 }, { "epoch": 0.2, "learning_rate": "9.3266e-05", "loss": 0.1376, "slid_loss": 0.1385, "step": 232, "time": 46.14 }, { "epoch": 0.2, "learning_rate": "9.3207e-05", "loss": 0.1363, "slid_loss": 0.1383, "step": 233, "time": 45.01 }, { "epoch": 0.2, "learning_rate": "9.3147e-05", "loss": 0.1382, "slid_loss": 0.1382, "step": 234, "time": 47.27 }, { "epoch": 0.2, "learning_rate": "9.3087e-05", "loss": 0.1356, "slid_loss": 0.1382, "step": 235, "time": 44.83 }, { "epoch": 0.2, "learning_rate": "9.3027e-05", "loss": 0.1396, "slid_loss": 0.1381, "step": 236, "time": 45.61 }, { "epoch": 0.2, "learning_rate": "9.2966e-05", "loss": 0.1398, "slid_loss": 0.1381, "step": 237, "time": 45.87 }, { "epoch": 0.2, "learning_rate": "9.2905e-05", "loss": 0.1305, "slid_loss": 0.138, "step": 238, "time": 48.21 }, { "epoch": 0.2, "learning_rate": "9.2844e-05", "loss": 0.1344, "slid_loss": 0.1379, "step": 239, "time": 45.7 }, { "epoch": 0.2, "learning_rate": "9.2783e-05", "loss": 0.1443, "slid_loss": 0.1379, "step": 240, "time": 47.16 }, { "epoch": 0.2, "learning_rate": "9.2722e-05", "loss": 0.1361, "slid_loss": 0.1379, "step": 241, "time": 48.03 }, { "epoch": 0.2, "learning_rate": "9.2660e-05", "loss": 0.1289, "slid_loss": 0.1377, "step": 242, "time": 48.08 }, { "epoch": 0.2, "learning_rate": "9.2598e-05", "loss": 0.1469, "slid_loss": 0.1379, "step": 243, "time": 47.05 }, { "epoch": 0.21, "learning_rate": "9.2536e-05", "loss": 0.1327, "slid_loss": 0.1379, "step": 244, "time": 44.99 }, { "epoch": 0.21, "learning_rate": "9.2474e-05", "loss": 0.1368, "slid_loss": 0.1379, "step": 245, "time": 46.44 }, { "epoch": 0.21, "learning_rate": "9.2411e-05", "loss": 0.1385, "slid_loss": 0.1379, "step": 246, "time": 47.77 }, { "epoch": 0.21, "learning_rate": "9.2348e-05", "loss": 0.1367, "slid_loss": 0.1379, "step": 247, "time": 45.9 }, { "epoch": 0.21, "learning_rate": "9.2285e-05", "loss": 0.13, "slid_loss": 0.1378, "step": 248, "time": 45.19 }, { "epoch": 0.21, "learning_rate": "9.2222e-05", "loss": 0.1264, "slid_loss": 0.1378, "step": 249, "time": 46.23 }, { "epoch": 0.21, "learning_rate": "9.2158e-05", "loss": 0.1413, "slid_loss": 0.1378, "step": 250, "time": 47.56 }, { "epoch": 0.21, "learning_rate": "9.2094e-05", "loss": 0.1404, "slid_loss": 0.1378, "step": 251, "time": 45.91 }, { "epoch": 0.21, "learning_rate": "9.2030e-05", "loss": 0.1368, "slid_loss": 0.1378, "step": 252, "time": 44.73 }, { "epoch": 0.21, "learning_rate": "9.1966e-05", "loss": 0.1397, "slid_loss": 0.1376, "step": 253, "time": 46.38 }, { "epoch": 0.21, "learning_rate": "9.1902e-05", "loss": 0.1376, "slid_loss": 0.1376, "step": 254, "time": 45.53 }, { "epoch": 0.21, "learning_rate": "9.1837e-05", "loss": 0.1328, "slid_loss": 0.1375, "step": 255, "time": 46.59 }, { "epoch": 0.22, "learning_rate": "9.1772e-05", "loss": 0.1337, "slid_loss": 0.1375, "step": 256, "time": 46.71 }, { "epoch": 0.22, "learning_rate": "9.1707e-05", "loss": 0.1374, "slid_loss": 0.1375, "step": 257, "time": 46.27 }, { "epoch": 0.22, "learning_rate": "9.1642e-05", "loss": 0.1326, "slid_loss": 0.1374, "step": 258, "time": 47.03 }, { "epoch": 0.22, "learning_rate": "9.1576e-05", "loss": 0.1279, "slid_loss": 0.1373, "step": 259, "time": 44.97 }, { "epoch": 0.22, "learning_rate": "9.1510e-05", "loss": 0.1303, "slid_loss": 0.1372, "step": 260, "time": 47.27 }, { "epoch": 0.22, "learning_rate": "9.1444e-05", "loss": 0.134, "slid_loss": 0.1371, "step": 261, "time": 47.77 }, { "epoch": 0.22, "learning_rate": "9.1378e-05", "loss": 0.131, "slid_loss": 0.1371, "step": 262, "time": 47.75 }, { "epoch": 0.22, "learning_rate": "9.1311e-05", "loss": 0.1343, "slid_loss": 0.137, "step": 263, "time": 49.38 }, { "epoch": 0.22, "learning_rate": "9.1245e-05", "loss": 0.1419, "slid_loss": 0.137, "step": 264, "time": 48.7 }, { "epoch": 0.22, "learning_rate": "9.1178e-05", "loss": 0.139, "slid_loss": 0.137, "step": 265, "time": 44.41 }, { "epoch": 0.22, "learning_rate": "9.1111e-05", "loss": 0.1325, "slid_loss": 0.1369, "step": 266, "time": 47.23 }, { "epoch": 0.22, "learning_rate": "9.1043e-05", "loss": 0.1396, "slid_loss": 0.1369, "step": 267, "time": 45.76 }, { "epoch": 0.23, "learning_rate": "9.0976e-05", "loss": 0.1381, "slid_loss": 0.1369, "step": 268, "time": 45.71 }, { "epoch": 0.23, "learning_rate": "9.0908e-05", "loss": 0.135, "slid_loss": 0.137, "step": 269, "time": 47.32 }, { "epoch": 0.23, "learning_rate": "9.0840e-05", "loss": 0.1312, "slid_loss": 0.137, "step": 270, "time": 45.63 }, { "epoch": 0.23, "learning_rate": "9.0771e-05", "loss": 0.1276, "slid_loss": 0.1368, "step": 271, "time": 45.77 }, { "epoch": 0.23, "learning_rate": "9.0703e-05", "loss": 0.1407, "slid_loss": 0.1367, "step": 272, "time": 46.35 }, { "epoch": 0.23, "learning_rate": "9.0634e-05", "loss": 0.1302, "slid_loss": 0.1367, "step": 273, "time": 47.35 }, { "epoch": 0.23, "learning_rate": "9.0565e-05", "loss": 0.143, "slid_loss": 0.1367, "step": 274, "time": 45.74 }, { "epoch": 0.23, "learning_rate": "9.0496e-05", "loss": 0.1337, "slid_loss": 0.1366, "step": 275, "time": 45.74 }, { "epoch": 0.23, "learning_rate": "9.0427e-05", "loss": 0.1318, "slid_loss": 0.1366, "step": 276, "time": 47.6 }, { "epoch": 0.23, "learning_rate": "9.0357e-05", "loss": 0.133, "slid_loss": 0.1365, "step": 277, "time": 45.32 }, { "epoch": 0.23, "learning_rate": "9.0287e-05", "loss": 0.1359, "slid_loss": 0.1364, "step": 278, "time": 48.58 }, { "epoch": 0.24, "learning_rate": "9.0217e-05", "loss": 0.1359, "slid_loss": 0.1363, "step": 279, "time": 45.08 }, { "epoch": 0.24, "learning_rate": "9.0147e-05", "loss": 0.1377, "slid_loss": 0.1363, "step": 280, "time": 46.46 }, { "epoch": 0.24, "learning_rate": "9.0077e-05", "loss": 0.1306, "slid_loss": 0.1363, "step": 281, "time": 46.85 }, { "epoch": 0.24, "learning_rate": "9.0006e-05", "loss": 0.1301, "slid_loss": 0.1362, "step": 282, "time": 45.49 }, { "epoch": 0.24, "learning_rate": "8.9935e-05", "loss": 0.1345, "slid_loss": 0.1362, "step": 283, "time": 48.25 }, { "epoch": 0.24, "learning_rate": "8.9864e-05", "loss": 0.1297, "slid_loss": 0.1362, "step": 284, "time": 45.85 }, { "epoch": 0.24, "learning_rate": "8.9793e-05", "loss": 0.1373, "slid_loss": 0.1362, "step": 285, "time": 46.44 }, { "epoch": 0.24, "learning_rate": "8.9721e-05", "loss": 0.1368, "slid_loss": 0.1362, "step": 286, "time": 46.42 }, { "epoch": 0.24, "learning_rate": "8.9649e-05", "loss": 0.138, "slid_loss": 0.1361, "step": 287, "time": 46.31 }, { "epoch": 0.24, "learning_rate": "8.9577e-05", "loss": 0.1345, "slid_loss": 0.1361, "step": 288, "time": 45.31 }, { "epoch": 0.24, "learning_rate": "8.9505e-05", "loss": 0.1374, "slid_loss": 0.136, "step": 289, "time": 47.19 }, { "epoch": 0.24, "learning_rate": "8.9433e-05", "loss": 0.133, "slid_loss": 0.136, "step": 290, "time": 46.14 }, { "epoch": 0.25, "learning_rate": "8.9360e-05", "loss": 0.1334, "slid_loss": 0.1358, "step": 291, "time": 45.89 }, { "epoch": 0.25, "learning_rate": "8.9287e-05", "loss": 0.1331, "slid_loss": 0.1358, "step": 292, "time": 48.08 }, { "epoch": 0.25, "learning_rate": "8.9214e-05", "loss": 0.1314, "slid_loss": 0.1358, "step": 293, "time": 46.56 }, { "epoch": 0.25, "learning_rate": "8.9141e-05", "loss": 0.1275, "slid_loss": 0.1357, "step": 294, "time": 48.63 }, { "epoch": 0.25, "learning_rate": "8.9068e-05", "loss": 0.1387, "slid_loss": 0.1358, "step": 295, "time": 48.59 }, { "epoch": 0.25, "learning_rate": "8.8994e-05", "loss": 0.1335, "slid_loss": 0.1357, "step": 296, "time": 47.66 }, { "epoch": 0.25, "learning_rate": "8.8920e-05", "loss": 0.1363, "slid_loss": 0.1357, "step": 297, "time": 46.77 }, { "epoch": 0.25, "learning_rate": "8.8846e-05", "loss": 0.1322, "slid_loss": 0.1356, "step": 298, "time": 48.14 }, { "epoch": 0.25, "learning_rate": "8.8772e-05", "loss": 0.1324, "slid_loss": 0.1356, "step": 299, "time": 48.01 }, { "epoch": 0.25, "learning_rate": "8.8697e-05", "loss": 0.1272, "slid_loss": 0.1355, "step": 300, "time": 47.08 }, { "epoch": 0.25, "learning_rate": "8.8623e-05", "loss": 0.1399, "slid_loss": 0.1355, "step": 301, "time": 47.34 }, { "epoch": 0.25, "learning_rate": "8.8548e-05", "loss": 0.1364, "slid_loss": 0.1355, "step": 302, "time": 45.45 }, { "epoch": 0.26, "learning_rate": "8.8473e-05", "loss": 0.1307, "slid_loss": 0.1354, "step": 303, "time": 45.43 }, { "epoch": 0.26, "learning_rate": "8.8398e-05", "loss": 0.1303, "slid_loss": 0.1354, "step": 304, "time": 45.13 }, { "epoch": 0.26, "learning_rate": "8.8322e-05", "loss": 0.1412, "slid_loss": 0.1354, "step": 305, "time": 47.48 }, { "epoch": 0.26, "learning_rate": "8.8246e-05", "loss": 0.1257, "slid_loss": 0.1353, "step": 306, "time": 47.46 }, { "epoch": 0.26, "learning_rate": "8.8171e-05", "loss": 0.1307, "slid_loss": 0.1353, "step": 307, "time": 46.46 }, { "epoch": 0.26, "learning_rate": "8.8094e-05", "loss": 0.1294, "slid_loss": 0.1353, "step": 308, "time": 47.94 }, { "epoch": 0.26, "learning_rate": "8.8018e-05", "loss": 0.1318, "slid_loss": 0.1352, "step": 309, "time": 47.16 }, { "epoch": 0.26, "learning_rate": "8.7942e-05", "loss": 0.1346, "slid_loss": 0.1352, "step": 310, "time": 45.65 }, { "epoch": 0.26, "learning_rate": "8.7865e-05", "loss": 0.1362, "slid_loss": 0.1352, "step": 311, "time": 48.16 }, { "epoch": 0.26, "learning_rate": "8.7788e-05", "loss": 0.1388, "slid_loss": 0.1352, "step": 312, "time": 51.78 }, { "epoch": 0.26, "learning_rate": "8.7711e-05", "loss": 0.1291, "slid_loss": 0.1351, "step": 313, "time": 46.35 }, { "epoch": 0.26, "learning_rate": "8.7634e-05", "loss": 0.1372, "slid_loss": 0.1351, "step": 314, "time": 48.89 }, { "epoch": 0.27, "learning_rate": "8.7556e-05", "loss": 0.1288, "slid_loss": 0.135, "step": 315, "time": 45.88 }, { "epoch": 0.27, "learning_rate": "8.7478e-05", "loss": 0.1332, "slid_loss": 0.1349, "step": 316, "time": 47.52 }, { "epoch": 0.27, "learning_rate": "8.7401e-05", "loss": 0.1337, "slid_loss": 0.1348, "step": 317, "time": 45.04 }, { "epoch": 0.27, "learning_rate": "8.7323e-05", "loss": 0.131, "slid_loss": 0.1347, "step": 318, "time": 45.84 }, { "epoch": 0.27, "learning_rate": "8.7244e-05", "loss": 0.1293, "slid_loss": 0.1347, "step": 319, "time": 48.9 }, { "epoch": 0.27, "learning_rate": "8.7166e-05", "loss": 0.124, "slid_loss": 0.1345, "step": 320, "time": 46.7 }, { "epoch": 0.27, "learning_rate": "8.7087e-05", "loss": 0.1322, "slid_loss": 0.1345, "step": 321, "time": 46.28 }, { "epoch": 0.27, "learning_rate": "8.7008e-05", "loss": 0.1285, "slid_loss": 0.1345, "step": 322, "time": 46.66 }, { "epoch": 0.27, "learning_rate": "8.6929e-05", "loss": 0.1279, "slid_loss": 0.1344, "step": 323, "time": 46.51 }, { "epoch": 0.27, "learning_rate": "8.6850e-05", "loss": 0.1397, "slid_loss": 0.1344, "step": 324, "time": 45.32 }, { "epoch": 0.27, "learning_rate": "8.6771e-05", "loss": 0.1308, "slid_loss": 0.1344, "step": 325, "time": 46.31 }, { "epoch": 0.27, "learning_rate": "8.6691e-05", "loss": 0.1334, "slid_loss": 0.1344, "step": 326, "time": 46.36 }, { "epoch": 0.28, "learning_rate": "8.6611e-05", "loss": 0.1366, "slid_loss": 0.1343, "step": 327, "time": 45.64 }, { "epoch": 0.28, "learning_rate": "8.6531e-05", "loss": 0.1333, "slid_loss": 0.1343, "step": 328, "time": 45.06 }, { "epoch": 0.28, "learning_rate": "8.6451e-05", "loss": 0.1258, "slid_loss": 0.1342, "step": 329, "time": 47.7 }, { "epoch": 0.28, "learning_rate": "8.6370e-05", "loss": 0.1377, "slid_loss": 0.1343, "step": 330, "time": 47.65 }, { "epoch": 0.28, "learning_rate": "8.6290e-05", "loss": 0.1372, "slid_loss": 0.1343, "step": 331, "time": 46.1 }, { "epoch": 0.28, "learning_rate": "8.6209e-05", "loss": 0.1288, "slid_loss": 0.1342, "step": 332, "time": 46.35 }, { "epoch": 0.28, "learning_rate": "8.6128e-05", "loss": 0.1328, "slid_loss": 0.1342, "step": 333, "time": 45.03 }, { "epoch": 0.28, "learning_rate": "8.6047e-05", "loss": 0.1307, "slid_loss": 0.1341, "step": 334, "time": 45.68 }, { "epoch": 0.28, "learning_rate": "8.5966e-05", "loss": 0.1349, "slid_loss": 0.1341, "step": 335, "time": 49.23 }, { "epoch": 0.28, "learning_rate": "8.5884e-05", "loss": 0.1324, "slid_loss": 0.134, "step": 336, "time": 48.05 }, { "epoch": 0.28, "learning_rate": "8.5802e-05", "loss": 0.1393, "slid_loss": 0.134, "step": 337, "time": 45.29 }, { "epoch": 0.28, "learning_rate": "8.5721e-05", "loss": 0.1318, "slid_loss": 0.134, "step": 338, "time": 47.05 }, { "epoch": 0.29, "learning_rate": "8.5639e-05", "loss": 0.1376, "slid_loss": 0.1341, "step": 339, "time": 47.72 }, { "epoch": 0.29, "learning_rate": "8.5556e-05", "loss": 0.1318, "slid_loss": 0.1339, "step": 340, "time": 46.61 }, { "epoch": 0.29, "learning_rate": "8.5474e-05", "loss": 0.1319, "slid_loss": 0.1339, "step": 341, "time": 46.36 }, { "epoch": 0.29, "learning_rate": "8.5391e-05", "loss": 0.1356, "slid_loss": 0.134, "step": 342, "time": 47.32 }, { "epoch": 0.29, "learning_rate": "8.5308e-05", "loss": 0.1245, "slid_loss": 0.1337, "step": 343, "time": 48.38 }, { "epoch": 0.29, "learning_rate": "8.5225e-05", "loss": 0.1311, "slid_loss": 0.1337, "step": 344, "time": 47.25 }, { "epoch": 0.29, "learning_rate": "8.5142e-05", "loss": 0.1378, "slid_loss": 0.1337, "step": 345, "time": 48.14 }, { "epoch": 0.29, "learning_rate": "8.5059e-05", "loss": 0.1413, "slid_loss": 0.1337, "step": 346, "time": 46.51 }, { "epoch": 0.29, "learning_rate": "8.4975e-05", "loss": 0.1356, "slid_loss": 0.1337, "step": 347, "time": 45.1 }, { "epoch": 0.29, "learning_rate": "8.4892e-05", "loss": 0.1295, "slid_loss": 0.1337, "step": 348, "time": 46.81 }, { "epoch": 0.29, "learning_rate": "8.4808e-05", "loss": 0.1277, "slid_loss": 0.1337, "step": 349, "time": 47.47 }, { "epoch": 0.29, "learning_rate": "8.4724e-05", "loss": 0.1292, "slid_loss": 0.1336, "step": 350, "time": 47.44 }, { "epoch": 0.3, "learning_rate": "8.4640e-05", "loss": 0.1283, "slid_loss": 0.1335, "step": 351, "time": 50.1 }, { "epoch": 0.3, "learning_rate": "8.4555e-05", "loss": 0.1251, "slid_loss": 0.1334, "step": 352, "time": 46.83 }, { "epoch": 0.3, "learning_rate": "8.4471e-05", "loss": 0.1405, "slid_loss": 0.1334, "step": 353, "time": 48.32 }, { "epoch": 0.3, "learning_rate": "8.4386e-05", "loss": 0.1254, "slid_loss": 0.1333, "step": 354, "time": 47.68 }, { "epoch": 0.3, "learning_rate": "8.4301e-05", "loss": 0.1326, "slid_loss": 0.1333, "step": 355, "time": 47.78 }, { "epoch": 0.3, "learning_rate": "8.4216e-05", "loss": 0.1436, "slid_loss": 0.1334, "step": 356, "time": 47.13 }, { "epoch": 0.3, "learning_rate": "8.4131e-05", "loss": 0.13, "slid_loss": 0.1333, "step": 357, "time": 46.48 }, { "epoch": 0.3, "learning_rate": "8.4045e-05", "loss": 0.1323, "slid_loss": 0.1333, "step": 358, "time": 47.54 }, { "epoch": 0.3, "learning_rate": "8.3959e-05", "loss": 0.1331, "slid_loss": 0.1333, "step": 359, "time": 46.67 }, { "epoch": 0.3, "learning_rate": "8.3874e-05", "loss": 0.1387, "slid_loss": 0.1334, "step": 360, "time": 48.05 }, { "epoch": 0.3, "learning_rate": "8.3788e-05", "loss": 0.1309, "slid_loss": 0.1334, "step": 361, "time": 46.49 }, { "epoch": 0.31, "learning_rate": "8.3702e-05", "loss": 0.131, "slid_loss": 0.1334, "step": 362, "time": 47.59 }, { "epoch": 0.31, "learning_rate": "8.3615e-05", "loss": 0.1396, "slid_loss": 0.1335, "step": 363, "time": 47.87 }, { "epoch": 0.31, "learning_rate": "8.3529e-05", "loss": 0.1297, "slid_loss": 0.1333, "step": 364, "time": 45.82 }, { "epoch": 0.31, "learning_rate": "8.3442e-05", "loss": 0.1253, "slid_loss": 0.1332, "step": 365, "time": 47.38 }, { "epoch": 0.31, "learning_rate": "8.3355e-05", "loss": 0.1288, "slid_loss": 0.1332, "step": 366, "time": 45.19 }, { "epoch": 0.31, "learning_rate": "8.3268e-05", "loss": 0.1376, "slid_loss": 0.1331, "step": 367, "time": 46.03 }, { "epoch": 0.31, "learning_rate": "8.3181e-05", "loss": 0.1346, "slid_loss": 0.1331, "step": 368, "time": 46.73 }, { "epoch": 0.31, "learning_rate": "8.3094e-05", "loss": 0.1412, "slid_loss": 0.1332, "step": 369, "time": 47.2 }, { "epoch": 0.31, "learning_rate": "8.3006e-05", "loss": 0.131, "slid_loss": 0.1332, "step": 370, "time": 46.76 }, { "epoch": 0.31, "learning_rate": "8.2919e-05", "loss": 0.1436, "slid_loss": 0.1333, "step": 371, "time": 47.27 }, { "epoch": 0.31, "learning_rate": "8.2831e-05", "loss": 0.1296, "slid_loss": 0.1332, "step": 372, "time": 46.89 }, { "epoch": 0.31, "learning_rate": "8.2743e-05", "loss": 0.1287, "slid_loss": 0.1332, "step": 373, "time": 44.38 }, { "epoch": 0.32, "learning_rate": "8.2655e-05", "loss": 0.1384, "slid_loss": 0.1331, "step": 374, "time": 47.52 }, { "epoch": 0.32, "learning_rate": "8.2566e-05", "loss": 0.1357, "slid_loss": 0.1332, "step": 375, "time": 46.74 }, { "epoch": 0.32, "learning_rate": "8.2478e-05", "loss": 0.1257, "slid_loss": 0.1331, "step": 376, "time": 45.27 }, { "epoch": 0.32, "learning_rate": "8.2389e-05", "loss": 0.1301, "slid_loss": 0.1331, "step": 377, "time": 46.68 }, { "epoch": 0.32, "learning_rate": "8.2301e-05", "loss": 0.1363, "slid_loss": 0.1331, "step": 378, "time": 47.75 }, { "epoch": 0.32, "learning_rate": "8.2212e-05", "loss": 0.1263, "slid_loss": 0.133, "step": 379, "time": 44.94 }, { "epoch": 0.32, "learning_rate": "8.2123e-05", "loss": 0.1325, "slid_loss": 0.1329, "step": 380, "time": 47.44 }, { "epoch": 0.32, "learning_rate": "8.2033e-05", "loss": 0.1373, "slid_loss": 0.133, "step": 381, "time": 46.08 }, { "epoch": 0.32, "learning_rate": "8.1944e-05", "loss": 0.139, "slid_loss": 0.1331, "step": 382, "time": 50.22 }, { "epoch": 0.32, "learning_rate": "8.1854e-05", "loss": 0.1341, "slid_loss": 0.1331, "step": 383, "time": 46.37 }, { "epoch": 0.32, "learning_rate": "8.1765e-05", "loss": 0.128, "slid_loss": 0.1331, "step": 384, "time": 46.65 }, { "epoch": 0.32, "learning_rate": "8.1675e-05", "loss": 0.125, "slid_loss": 0.1329, "step": 385, "time": 46.41 }, { "epoch": 0.33, "learning_rate": "8.1585e-05", "loss": 0.1328, "slid_loss": 0.1329, "step": 386, "time": 45.24 }, { "epoch": 0.33, "learning_rate": "8.1494e-05", "loss": 0.1305, "slid_loss": 0.1328, "step": 387, "time": 46.78 }, { "epoch": 0.33, "learning_rate": "8.1404e-05", "loss": 0.1334, "slid_loss": 0.1328, "step": 388, "time": 47.67 }, { "epoch": 0.33, "learning_rate": "8.1314e-05", "loss": 0.1325, "slid_loss": 0.1328, "step": 389, "time": 47.71 }, { "epoch": 0.33, "learning_rate": "8.1223e-05", "loss": 0.1226, "slid_loss": 0.1327, "step": 390, "time": 46.58 }, { "epoch": 0.33, "learning_rate": "8.1132e-05", "loss": 0.1348, "slid_loss": 0.1327, "step": 391, "time": 47.42 }, { "epoch": 0.33, "learning_rate": "8.1041e-05", "loss": 0.1377, "slid_loss": 0.1327, "step": 392, "time": 47.11 }, { "epoch": 0.33, "learning_rate": "8.0950e-05", "loss": 0.1367, "slid_loss": 0.1328, "step": 393, "time": 46.24 }, { "epoch": 0.33, "learning_rate": "8.0859e-05", "loss": 0.1331, "slid_loss": 0.1328, "step": 394, "time": 46.44 }, { "epoch": 0.33, "learning_rate": "8.0767e-05", "loss": 0.1334, "slid_loss": 0.1328, "step": 395, "time": 44.03 }, { "epoch": 0.33, "learning_rate": "8.0676e-05", "loss": 0.1288, "slid_loss": 0.1327, "step": 396, "time": 46.18 }, { "epoch": 0.33, "learning_rate": "8.0584e-05", "loss": 0.1351, "slid_loss": 0.1327, "step": 397, "time": 47.62 }, { "epoch": 0.34, "learning_rate": "8.0492e-05", "loss": 0.1269, "slid_loss": 0.1327, "step": 398, "time": 46.23 }, { "epoch": 0.34, "learning_rate": "8.0400e-05", "loss": 0.1278, "slid_loss": 0.1326, "step": 399, "time": 45.92 }, { "epoch": 0.34, "learning_rate": "8.0308e-05", "loss": 0.1385, "slid_loss": 0.1327, "step": 400, "time": 45.8 }, { "epoch": 0.34, "learning_rate": "8.0216e-05", "loss": 0.1272, "slid_loss": 0.1326, "step": 401, "time": 48.02 }, { "epoch": 0.34, "learning_rate": "8.0123e-05", "loss": 0.1162, "slid_loss": 0.1324, "step": 402, "time": 48.85 }, { "epoch": 0.34, "learning_rate": "8.0031e-05", "loss": 0.1308, "slid_loss": 0.1324, "step": 403, "time": 45.57 }, { "epoch": 0.34, "learning_rate": "7.9938e-05", "loss": 0.1266, "slid_loss": 0.1324, "step": 404, "time": 45.77 }, { "epoch": 0.34, "learning_rate": "7.9845e-05", "loss": 0.1349, "slid_loss": 0.1323, "step": 405, "time": 46.6 }, { "epoch": 0.34, "learning_rate": "7.9752e-05", "loss": 0.1197, "slid_loss": 0.1323, "step": 406, "time": 44.61 }, { "epoch": 0.34, "learning_rate": "7.9659e-05", "loss": 0.1363, "slid_loss": 0.1323, "step": 407, "time": 47.86 }, { "epoch": 0.34, "learning_rate": "7.9566e-05", "loss": 0.1336, "slid_loss": 0.1323, "step": 408, "time": 47.58 }, { "epoch": 0.34, "learning_rate": "7.9472e-05", "loss": 0.1332, "slid_loss": 0.1324, "step": 409, "time": 44.94 }, { "epoch": 0.35, "learning_rate": "7.9379e-05", "loss": 0.1252, "slid_loss": 0.1323, "step": 410, "time": 46.27 }, { "epoch": 0.35, "learning_rate": "7.9285e-05", "loss": 0.129, "slid_loss": 0.1322, "step": 411, "time": 46.03 }, { "epoch": 0.35, "learning_rate": "7.9191e-05", "loss": 0.1257, "slid_loss": 0.1321, "step": 412, "time": 46.49 }, { "epoch": 0.35, "learning_rate": "7.9097e-05", "loss": 0.14, "slid_loss": 0.1322, "step": 413, "time": 45.87 }, { "epoch": 0.35, "learning_rate": "7.9003e-05", "loss": 0.1209, "slid_loss": 0.132, "step": 414, "time": 46.87 }, { "epoch": 0.35, "learning_rate": "7.8909e-05", "loss": 0.1216, "slid_loss": 0.1319, "step": 415, "time": 46.89 }, { "epoch": 0.35, "learning_rate": "7.8815e-05", "loss": 0.1263, "slid_loss": 0.1319, "step": 416, "time": 45.85 }, { "epoch": 0.35, "learning_rate": "7.8720e-05", "loss": 0.1179, "slid_loss": 0.1317, "step": 417, "time": 45.86 }, { "epoch": 0.35, "learning_rate": "7.8625e-05", "loss": 0.1332, "slid_loss": 0.1317, "step": 418, "time": 46.26 }, { "epoch": 0.35, "learning_rate": "7.8531e-05", "loss": 0.1247, "slid_loss": 0.1317, "step": 419, "time": 46.52 }, { "epoch": 0.35, "learning_rate": "7.8436e-05", "loss": 0.1429, "slid_loss": 0.1319, "step": 420, "time": 44.08 }, { "epoch": 0.35, "learning_rate": "7.8341e-05", "loss": 0.1346, "slid_loss": 0.1319, "step": 421, "time": 45.56 }, { "epoch": 0.36, "learning_rate": "7.8245e-05", "loss": 0.1236, "slid_loss": 0.1319, "step": 422, "time": 46.09 }, { "epoch": 0.36, "learning_rate": "7.8150e-05", "loss": 0.1339, "slid_loss": 0.1319, "step": 423, "time": 46.39 }, { "epoch": 0.36, "learning_rate": "7.8055e-05", "loss": 0.1274, "slid_loss": 0.1318, "step": 424, "time": 47.06 }, { "epoch": 0.36, "learning_rate": "7.7959e-05", "loss": 0.1316, "slid_loss": 0.1318, "step": 425, "time": 48.23 }, { "epoch": 0.36, "learning_rate": "7.7863e-05", "loss": 0.1291, "slid_loss": 0.1318, "step": 426, "time": 45.25 }, { "epoch": 0.36, "learning_rate": "7.7768e-05", "loss": 0.1232, "slid_loss": 0.1316, "step": 427, "time": 45.73 }, { "epoch": 0.36, "learning_rate": "7.7672e-05", "loss": 0.1252, "slid_loss": 0.1315, "step": 428, "time": 46.88 }, { "epoch": 0.36, "learning_rate": "7.7575e-05", "loss": 0.1267, "slid_loss": 0.1315, "step": 429, "time": 46.09 }, { "epoch": 0.36, "learning_rate": "7.7479e-05", "loss": 0.1245, "slid_loss": 0.1314, "step": 430, "time": 44.91 }, { "epoch": 0.36, "learning_rate": "7.7383e-05", "loss": 0.1279, "slid_loss": 0.1313, "step": 431, "time": 46.5 }, { "epoch": 0.36, "learning_rate": "7.7286e-05", "loss": 0.13, "slid_loss": 0.1313, "step": 432, "time": 48.81 }, { "epoch": 0.36, "learning_rate": "7.7190e-05", "loss": 0.1209, "slid_loss": 0.1312, "step": 433, "time": 47.13 }, { "epoch": 0.37, "learning_rate": "7.7093e-05", "loss": 0.1358, "slid_loss": 0.1313, "step": 434, "time": 45.47 }, { "epoch": 0.37, "learning_rate": "7.6996e-05", "loss": 0.1366, "slid_loss": 0.1313, "step": 435, "time": 45.98 }, { "epoch": 0.37, "learning_rate": "7.6899e-05", "loss": 0.1315, "slid_loss": 0.1313, "step": 436, "time": 45.82 }, { "epoch": 0.37, "learning_rate": "7.6802e-05", "loss": 0.1305, "slid_loss": 0.1312, "step": 437, "time": 46.56 }, { "epoch": 0.37, "learning_rate": "7.6705e-05", "loss": 0.1326, "slid_loss": 0.1312, "step": 438, "time": 46.4 }, { "epoch": 0.37, "learning_rate": "7.6608e-05", "loss": 0.1221, "slid_loss": 0.131, "step": 439, "time": 47.54 }, { "epoch": 0.37, "learning_rate": "7.6510e-05", "loss": 0.1307, "slid_loss": 0.131, "step": 440, "time": 46.23 }, { "epoch": 0.37, "learning_rate": "7.6413e-05", "loss": 0.1247, "slid_loss": 0.131, "step": 441, "time": 47.53 }, { "epoch": 0.37, "learning_rate": "7.6315e-05", "loss": 0.1246, "slid_loss": 0.1308, "step": 442, "time": 47.38 }, { "epoch": 0.37, "learning_rate": "7.6217e-05", "loss": 0.1271, "slid_loss": 0.1309, "step": 443, "time": 46.51 }, { "epoch": 0.37, "learning_rate": "7.6119e-05", "loss": 0.1265, "slid_loss": 0.1308, "step": 444, "time": 46.68 }, { "epoch": 0.37, "learning_rate": "7.6021e-05", "loss": 0.1167, "slid_loss": 0.1306, "step": 445, "time": 46.01 }, { "epoch": 0.38, "learning_rate": "7.5923e-05", "loss": 0.1242, "slid_loss": 0.1304, "step": 446, "time": 45.1 }, { "epoch": 0.38, "learning_rate": "7.5825e-05", "loss": 0.1241, "slid_loss": 0.1303, "step": 447, "time": 47.81 }, { "epoch": 0.38, "learning_rate": "7.5727e-05", "loss": 0.1274, "slid_loss": 0.1303, "step": 448, "time": 46.65 }, { "epoch": 0.38, "learning_rate": "7.5628e-05", "loss": 0.1281, "slid_loss": 0.1303, "step": 449, "time": 47.53 }, { "epoch": 0.38, "learning_rate": "7.5530e-05", "loss": 0.13, "slid_loss": 0.1303, "step": 450, "time": 45.53 }, { "epoch": 0.38, "learning_rate": "7.5431e-05", "loss": 0.114, "slid_loss": 0.1302, "step": 451, "time": 46.52 }, { "epoch": 0.38, "learning_rate": "7.5332e-05", "loss": 0.1267, "slid_loss": 0.1302, "step": 452, "time": 47.67 }, { "epoch": 0.38, "learning_rate": "7.5233e-05", "loss": 0.1176, "slid_loss": 0.13, "step": 453, "time": 44.83 }, { "epoch": 0.38, "learning_rate": "7.5134e-05", "loss": 0.1332, "slid_loss": 0.13, "step": 454, "time": 46.01 }, { "epoch": 0.38, "learning_rate": "7.5035e-05", "loss": 0.1275, "slid_loss": 0.13, "step": 455, "time": 47.44 }, { "epoch": 0.38, "learning_rate": "7.4936e-05", "loss": 0.1231, "slid_loss": 0.1298, "step": 456, "time": 45.97 }, { "epoch": 0.39, "learning_rate": "7.4836e-05", "loss": 0.1282, "slid_loss": 0.1298, "step": 457, "time": 46.83 }, { "epoch": 0.39, "learning_rate": "7.4737e-05", "loss": 0.1312, "slid_loss": 0.1298, "step": 458, "time": 45.03 }, { "epoch": 0.39, "learning_rate": "7.4637e-05", "loss": 0.1236, "slid_loss": 0.1297, "step": 459, "time": 46.63 }, { "epoch": 0.39, "learning_rate": "7.4538e-05", "loss": 0.1302, "slid_loss": 0.1296, "step": 460, "time": 47.17 }, { "epoch": 0.39, "learning_rate": "7.4438e-05", "loss": 0.1287, "slid_loss": 0.1296, "step": 461, "time": 48.56 }, { "epoch": 0.39, "learning_rate": "7.4338e-05", "loss": 0.1271, "slid_loss": 0.1295, "step": 462, "time": 45.87 }, { "epoch": 0.39, "learning_rate": "7.4238e-05", "loss": 0.1279, "slid_loss": 0.1294, "step": 463, "time": 46.72 }, { "epoch": 0.39, "learning_rate": "7.4138e-05", "loss": 0.1247, "slid_loss": 0.1293, "step": 464, "time": 46.61 }, { "epoch": 0.39, "learning_rate": "7.4038e-05", "loss": 0.1261, "slid_loss": 0.1294, "step": 465, "time": 45.42 }, { "epoch": 0.39, "learning_rate": "7.3938e-05", "loss": 0.1252, "slid_loss": 0.1293, "step": 466, "time": 45.49 }, { "epoch": 0.39, "learning_rate": "7.3837e-05", "loss": 0.1267, "slid_loss": 0.1292, "step": 467, "time": 46.25 }, { "epoch": 0.39, "learning_rate": "7.3737e-05", "loss": 0.1329, "slid_loss": 0.1292, "step": 468, "time": 46.23 }, { "epoch": 0.4, "learning_rate": "7.3636e-05", "loss": 0.1196, "slid_loss": 0.129, "step": 469, "time": 46.99 }, { "epoch": 0.4, "learning_rate": "7.3535e-05", "loss": 0.122, "slid_loss": 0.1289, "step": 470, "time": 47.19 }, { "epoch": 0.4, "learning_rate": "7.3435e-05", "loss": 0.1296, "slid_loss": 0.1287, "step": 471, "time": 45.5 }, { "epoch": 0.4, "learning_rate": "7.3334e-05", "loss": 0.1192, "slid_loss": 0.1286, "step": 472, "time": 45.99 }, { "epoch": 0.4, "learning_rate": "7.3233e-05", "loss": 0.1258, "slid_loss": 0.1286, "step": 473, "time": 46.01 }, { "epoch": 0.4, "learning_rate": "7.3132e-05", "loss": 0.1372, "slid_loss": 0.1286, "step": 474, "time": 44.81 }, { "epoch": 0.4, "learning_rate": "7.3031e-05", "loss": 0.1176, "slid_loss": 0.1284, "step": 475, "time": 47.61 }, { "epoch": 0.4, "learning_rate": "7.2929e-05", "loss": 0.1225, "slid_loss": 0.1284, "step": 476, "time": 44.58 }, { "epoch": 0.4, "learning_rate": "7.2828e-05", "loss": 0.1284, "slid_loss": 0.1284, "step": 477, "time": 46.35 }, { "epoch": 0.4, "learning_rate": "7.2727e-05", "loss": 0.1286, "slid_loss": 0.1283, "step": 478, "time": 46.28 }, { "epoch": 0.4, "learning_rate": "7.2625e-05", "loss": 0.1276, "slid_loss": 0.1283, "step": 479, "time": 45.77 }, { "epoch": 0.4, "learning_rate": "7.2523e-05", "loss": 0.1316, "slid_loss": 0.1283, "step": 480, "time": 45.24 }, { "epoch": 0.41, "learning_rate": "7.2422e-05", "loss": 0.1221, "slid_loss": 0.1281, "step": 481, "time": 45.57 }, { "epoch": 0.41, "learning_rate": "7.2320e-05", "loss": 0.1216, "slid_loss": 0.128, "step": 482, "time": 46.64 }, { "epoch": 0.41, "learning_rate": "7.2218e-05", "loss": 0.1202, "slid_loss": 0.1278, "step": 483, "time": 45.27 }, { "epoch": 0.41, "learning_rate": "7.2116e-05", "loss": 0.129, "slid_loss": 0.1278, "step": 484, "time": 46.2 }, { "epoch": 0.41, "learning_rate": "7.2014e-05", "loss": 0.1314, "slid_loss": 0.1279, "step": 485, "time": 49.14 }, { "epoch": 0.41, "learning_rate": "7.1912e-05", "loss": 0.1242, "slid_loss": 0.1278, "step": 486, "time": 47.23 }, { "epoch": 0.41, "learning_rate": "7.1810e-05", "loss": 0.1282, "slid_loss": 0.1278, "step": 487, "time": 45.96 }, { "epoch": 0.41, "learning_rate": "7.1707e-05", "loss": 0.1317, "slid_loss": 0.1278, "step": 488, "time": 47.52 }, { "epoch": 0.41, "learning_rate": "7.1605e-05", "loss": 0.127, "slid_loss": 0.1277, "step": 489, "time": 45.46 }, { "epoch": 0.41, "learning_rate": "7.1503e-05", "loss": 0.1296, "slid_loss": 0.1278, "step": 490, "time": 46.13 }, { "epoch": 0.41, "learning_rate": "7.1400e-05", "loss": 0.1215, "slid_loss": 0.1277, "step": 491, "time": 46.36 }, { "epoch": 0.41, "learning_rate": "7.1297e-05", "loss": 0.1211, "slid_loss": 0.1275, "step": 492, "time": 46.4 }, { "epoch": 0.42, "learning_rate": "7.1195e-05", "loss": 0.1284, "slid_loss": 0.1274, "step": 493, "time": 46.68 }, { "epoch": 0.42, "learning_rate": "7.1092e-05", "loss": 0.1205, "slid_loss": 0.1273, "step": 494, "time": 46.9 }, { "epoch": 0.42, "learning_rate": "7.0989e-05", "loss": 0.1231, "slid_loss": 0.1272, "step": 495, "time": 46.14 }, { "epoch": 0.42, "learning_rate": "7.0886e-05", "loss": 0.1299, "slid_loss": 0.1272, "step": 496, "time": 46.65 }, { "epoch": 0.42, "learning_rate": "7.0783e-05", "loss": 0.1286, "slid_loss": 0.1271, "step": 497, "time": 46.68 }, { "epoch": 0.42, "learning_rate": "7.0680e-05", "loss": 0.1201, "slid_loss": 0.1271, "step": 498, "time": 48.29 }, { "epoch": 0.42, "learning_rate": "7.0577e-05", "loss": 0.1255, "slid_loss": 0.127, "step": 499, "time": 49.17 }, { "epoch": 0.42, "learning_rate": "7.0473e-05", "loss": 0.124, "slid_loss": 0.1269, "step": 500, "time": 47.87 }, { "epoch": 0.42, "learning_rate": "7.0370e-05", "loss": 0.1234, "slid_loss": 0.1269, "step": 501, "time": 45.78 }, { "epoch": 0.42, "learning_rate": "7.0267e-05", "loss": 0.1197, "slid_loss": 0.1269, "step": 502, "time": 47.5 }, { "epoch": 0.42, "learning_rate": "7.0163e-05", "loss": 0.1296, "slid_loss": 0.1269, "step": 503, "time": 45.32 }, { "epoch": 0.42, "learning_rate": "7.0060e-05", "loss": 0.1307, "slid_loss": 0.1269, "step": 504, "time": 45.23 }, { "epoch": 0.43, "learning_rate": "6.9956e-05", "loss": 0.1251, "slid_loss": 0.1268, "step": 505, "time": 47.33 }, { "epoch": 0.43, "learning_rate": "6.9852e-05", "loss": 0.1176, "slid_loss": 0.1268, "step": 506, "time": 47.45 }, { "epoch": 0.43, "learning_rate": "6.9748e-05", "loss": 0.1299, "slid_loss": 0.1267, "step": 507, "time": 48.54 }, { "epoch": 0.43, "learning_rate": "6.9645e-05", "loss": 0.132, "slid_loss": 0.1267, "step": 508, "time": 46.86 }, { "epoch": 0.43, "learning_rate": "6.9541e-05", "loss": 0.1166, "slid_loss": 0.1266, "step": 509, "time": 46.23 }, { "epoch": 0.43, "learning_rate": "6.9437e-05", "loss": 0.1306, "slid_loss": 0.1266, "step": 510, "time": 47.96 }, { "epoch": 0.43, "learning_rate": "6.9333e-05", "loss": 0.1281, "slid_loss": 0.1266, "step": 511, "time": 47.94 }, { "epoch": 0.43, "learning_rate": "6.9228e-05", "loss": 0.1199, "slid_loss": 0.1265, "step": 512, "time": 48.0 }, { "epoch": 0.43, "learning_rate": "6.9124e-05", "loss": 0.1222, "slid_loss": 0.1264, "step": 513, "time": 46.22 }, { "epoch": 0.43, "learning_rate": "6.9020e-05", "loss": 0.1249, "slid_loss": 0.1264, "step": 514, "time": 46.94 }, { "epoch": 0.43, "learning_rate": "6.8916e-05", "loss": 0.1193, "slid_loss": 0.1264, "step": 515, "time": 45.78 }, { "epoch": 0.43, "learning_rate": "6.8811e-05", "loss": 0.1265, "slid_loss": 0.1264, "step": 516, "time": 45.7 }, { "epoch": 0.44, "learning_rate": "6.8707e-05", "loss": 0.1284, "slid_loss": 0.1265, "step": 517, "time": 48.55 }, { "epoch": 0.44, "learning_rate": "6.8602e-05", "loss": 0.123, "slid_loss": 0.1264, "step": 518, "time": 44.82 }, { "epoch": 0.44, "learning_rate": "6.8498e-05", "loss": 0.1274, "slid_loss": 0.1264, "step": 519, "time": 48.3 }, { "epoch": 0.44, "learning_rate": "6.8393e-05", "loss": 0.1292, "slid_loss": 0.1263, "step": 520, "time": 46.15 }, { "epoch": 0.44, "learning_rate": "6.8289e-05", "loss": 0.1263, "slid_loss": 0.1262, "step": 521, "time": 46.29 }, { "epoch": 0.44, "learning_rate": "6.8184e-05", "loss": 0.1273, "slid_loss": 0.1262, "step": 522, "time": 48.24 }, { "epoch": 0.44, "learning_rate": "6.8079e-05", "loss": 0.1266, "slid_loss": 0.1262, "step": 523, "time": 46.75 }, { "epoch": 0.44, "learning_rate": "6.7974e-05", "loss": 0.1346, "slid_loss": 0.1262, "step": 524, "time": 47.09 }, { "epoch": 0.44, "learning_rate": "6.7869e-05", "loss": 0.1289, "slid_loss": 0.1262, "step": 525, "time": 49.67 }, { "epoch": 0.44, "learning_rate": "6.7764e-05", "loss": 0.1255, "slid_loss": 0.1262, "step": 526, "time": 47.09 }, { "epoch": 0.44, "learning_rate": "6.7659e-05", "loss": 0.127, "slid_loss": 0.1262, "step": 527, "time": 46.68 }, { "epoch": 0.44, "learning_rate": "6.7554e-05", "loss": 0.1217, "slid_loss": 0.1262, "step": 528, "time": 47.36 }, { "epoch": 0.45, "learning_rate": "6.7449e-05", "loss": 0.1274, "slid_loss": 0.1262, "step": 529, "time": 46.03 }, { "epoch": 0.45, "learning_rate": "6.7344e-05", "loss": 0.1226, "slid_loss": 0.1262, "step": 530, "time": 48.25 }, { "epoch": 0.45, "learning_rate": "6.7239e-05", "loss": 0.125, "slid_loss": 0.1261, "step": 531, "time": 48.33 }, { "epoch": 0.45, "learning_rate": "6.7133e-05", "loss": 0.1205, "slid_loss": 0.126, "step": 532, "time": 47.15 }, { "epoch": 0.45, "learning_rate": "6.7028e-05", "loss": 0.1273, "slid_loss": 0.1261, "step": 533, "time": 45.32 }, { "epoch": 0.45, "learning_rate": "6.6923e-05", "loss": 0.1239, "slid_loss": 0.126, "step": 534, "time": 47.65 }, { "epoch": 0.45, "learning_rate": "6.6817e-05", "loss": 0.1339, "slid_loss": 0.126, "step": 535, "time": 47.39 }, { "epoch": 0.45, "learning_rate": "6.6712e-05", "loss": 0.1238, "slid_loss": 0.1259, "step": 536, "time": 46.21 }, { "epoch": 0.45, "learning_rate": "6.6606e-05", "loss": 0.1249, "slid_loss": 0.1258, "step": 537, "time": 46.77 }, { "epoch": 0.45, "learning_rate": "6.6500e-05", "loss": 0.1272, "slid_loss": 0.1258, "step": 538, "time": 47.54 }, { "epoch": 0.45, "learning_rate": "6.6395e-05", "loss": 0.1276, "slid_loss": 0.1258, "step": 539, "time": 47.36 }, { "epoch": 0.45, "learning_rate": "6.6289e-05", "loss": 0.1329, "slid_loss": 0.1258, "step": 540, "time": 46.69 }, { "epoch": 0.46, "learning_rate": "6.6183e-05", "loss": 0.1278, "slid_loss": 0.1259, "step": 541, "time": 45.83 }, { "epoch": 0.46, "learning_rate": "6.6078e-05", "loss": 0.1298, "slid_loss": 0.1259, "step": 542, "time": 46.98 }, { "epoch": 0.46, "learning_rate": "6.5972e-05", "loss": 0.1241, "slid_loss": 0.1259, "step": 543, "time": 47.74 }, { "epoch": 0.46, "learning_rate": "6.5866e-05", "loss": 0.1241, "slid_loss": 0.1259, "step": 544, "time": 46.3 }, { "epoch": 0.46, "learning_rate": "6.5760e-05", "loss": 0.1269, "slid_loss": 0.126, "step": 545, "time": 45.15 }, { "epoch": 0.46, "learning_rate": "6.5654e-05", "loss": 0.1277, "slid_loss": 0.126, "step": 546, "time": 47.36 }, { "epoch": 0.46, "learning_rate": "6.5548e-05", "loss": 0.1294, "slid_loss": 0.1261, "step": 547, "time": 45.29 }, { "epoch": 0.46, "learning_rate": "6.5442e-05", "loss": 0.1275, "slid_loss": 0.1261, "step": 548, "time": 45.9 }, { "epoch": 0.46, "learning_rate": "6.5336e-05", "loss": 0.1264, "slid_loss": 0.126, "step": 549, "time": 46.54 }, { "epoch": 0.46, "learning_rate": "6.5230e-05", "loss": 0.1259, "slid_loss": 0.126, "step": 550, "time": 45.18 }, { "epoch": 0.46, "learning_rate": "6.5124e-05", "loss": 0.1221, "slid_loss": 0.1261, "step": 551, "time": 45.83 }, { "epoch": 0.47, "learning_rate": "6.5018e-05", "loss": 0.1215, "slid_loss": 0.126, "step": 552, "time": 47.8 }, { "epoch": 0.47, "learning_rate": "6.4911e-05", "loss": 0.1319, "slid_loss": 0.1262, "step": 553, "time": 45.78 }, { "epoch": 0.47, "learning_rate": "6.4805e-05", "loss": 0.1266, "slid_loss": 0.1261, "step": 554, "time": 47.0 }, { "epoch": 0.47, "learning_rate": "6.4699e-05", "loss": 0.1228, "slid_loss": 0.1261, "step": 555, "time": 48.07 }, { "epoch": 0.47, "learning_rate": "6.4593e-05", "loss": 0.128, "slid_loss": 0.1261, "step": 556, "time": 48.73 }, { "epoch": 0.47, "learning_rate": "6.4486e-05", "loss": 0.1192, "slid_loss": 0.126, "step": 557, "time": 47.06 }, { "epoch": 0.47, "learning_rate": "6.4380e-05", "loss": 0.1273, "slid_loss": 0.126, "step": 558, "time": 48.46 }, { "epoch": 0.47, "learning_rate": "6.4273e-05", "loss": 0.1264, "slid_loss": 0.126, "step": 559, "time": 50.44 }, { "epoch": 0.47, "learning_rate": "6.4167e-05", "loss": 0.1247, "slid_loss": 0.126, "step": 560, "time": 46.43 }, { "epoch": 0.47, "learning_rate": "6.4060e-05", "loss": 0.1224, "slid_loss": 0.1259, "step": 561, "time": 46.38 }, { "epoch": 0.47, "learning_rate": "6.3954e-05", "loss": 0.1213, "slid_loss": 0.1258, "step": 562, "time": 48.2 }, { "epoch": 0.47, "learning_rate": "6.3847e-05", "loss": 0.1308, "slid_loss": 0.1259, "step": 563, "time": 46.5 }, { "epoch": 0.48, "learning_rate": "6.3741e-05", "loss": 0.1247, "slid_loss": 0.1259, "step": 564, "time": 45.24 }, { "epoch": 0.48, "learning_rate": "6.3634e-05", "loss": 0.1248, "slid_loss": 0.1259, "step": 565, "time": 45.23 }, { "epoch": 0.48, "learning_rate": "6.3528e-05", "loss": 0.1262, "slid_loss": 0.1259, "step": 566, "time": 49.45 }, { "epoch": 0.48, "learning_rate": "6.3421e-05", "loss": 0.1274, "slid_loss": 0.1259, "step": 567, "time": 48.02 }, { "epoch": 0.48, "learning_rate": "6.3314e-05", "loss": 0.1203, "slid_loss": 0.1257, "step": 568, "time": 46.67 }, { "epoch": 0.48, "learning_rate": "6.3208e-05", "loss": 0.1215, "slid_loss": 0.1258, "step": 569, "time": 46.92 }, { "epoch": 0.48, "learning_rate": "6.3101e-05", "loss": 0.1261, "slid_loss": 0.1258, "step": 570, "time": 49.22 }, { "epoch": 0.48, "learning_rate": "6.2994e-05", "loss": 0.1187, "slid_loss": 0.1257, "step": 571, "time": 44.64 }, { "epoch": 0.48, "learning_rate": "6.2888e-05", "loss": 0.136, "slid_loss": 0.1259, "step": 572, "time": 48.45 }, { "epoch": 0.48, "learning_rate": "6.2781e-05", "loss": 0.1301, "slid_loss": 0.1259, "step": 573, "time": 47.53 }, { "epoch": 0.48, "learning_rate": "6.2674e-05", "loss": 0.1217, "slid_loss": 0.1258, "step": 574, "time": 47.97 }, { "epoch": 0.48, "learning_rate": "6.2567e-05", "loss": 0.1187, "slid_loss": 0.1258, "step": 575, "time": 45.52 }, { "epoch": 0.49, "learning_rate": "6.2460e-05", "loss": 0.1265, "slid_loss": 0.1258, "step": 576, "time": 46.06 }, { "epoch": 0.49, "learning_rate": "6.2353e-05", "loss": 0.1167, "slid_loss": 0.1257, "step": 577, "time": 47.47 }, { "epoch": 0.49, "learning_rate": "6.2247e-05", "loss": 0.1217, "slid_loss": 0.1256, "step": 578, "time": 46.4 }, { "epoch": 0.49, "learning_rate": "6.2140e-05", "loss": 0.1251, "slid_loss": 0.1256, "step": 579, "time": 46.21 }, { "epoch": 0.49, "learning_rate": "6.2033e-05", "loss": 0.1236, "slid_loss": 0.1255, "step": 580, "time": 46.81 }, { "epoch": 0.49, "learning_rate": "6.1926e-05", "loss": 0.1212, "slid_loss": 0.1255, "step": 581, "time": 47.14 }, { "epoch": 0.49, "learning_rate": "6.1819e-05", "loss": 0.126, "slid_loss": 0.1255, "step": 582, "time": 46.65 }, { "epoch": 0.49, "learning_rate": "6.1712e-05", "loss": 0.1253, "slid_loss": 0.1256, "step": 583, "time": 47.18 }, { "epoch": 0.49, "learning_rate": "6.1605e-05", "loss": 0.1245, "slid_loss": 0.1256, "step": 584, "time": 46.46 }, { "epoch": 0.49, "learning_rate": "6.1498e-05", "loss": 0.1212, "slid_loss": 0.1255, "step": 585, "time": 46.98 }, { "epoch": 0.49, "learning_rate": "6.1391e-05", "loss": 0.1174, "slid_loss": 0.1254, "step": 586, "time": 46.05 }, { "epoch": 0.49, "learning_rate": "6.1284e-05", "loss": 0.1298, "slid_loss": 0.1254, "step": 587, "time": 48.71 }, { "epoch": 0.5, "learning_rate": "6.1177e-05", "loss": 0.128, "slid_loss": 0.1254, "step": 588, "time": 45.56 }, { "epoch": 0.5, "learning_rate": "6.1070e-05", "loss": 0.1196, "slid_loss": 0.1253, "step": 589, "time": 47.79 }, { "epoch": 0.5, "learning_rate": "6.0963e-05", "loss": 0.1289, "slid_loss": 0.1253, "step": 590, "time": 48.66 }, { "epoch": 0.5, "learning_rate": "6.0856e-05", "loss": 0.1251, "slid_loss": 0.1253, "step": 591, "time": 45.29 }, { "epoch": 0.5, "learning_rate": "6.0749e-05", "loss": 0.121, "slid_loss": 0.1253, "step": 592, "time": 47.72 }, { "epoch": 0.5, "learning_rate": "6.0642e-05", "loss": 0.122, "slid_loss": 0.1253, "step": 593, "time": 45.48 }, { "epoch": 0.5, "learning_rate": "6.0535e-05", "loss": 0.1303, "slid_loss": 0.1253, "step": 594, "time": 45.98 }, { "epoch": 0.5, "learning_rate": "6.0428e-05", "loss": 0.113, "slid_loss": 0.1252, "step": 595, "time": 45.64 }, { "epoch": 0.5, "learning_rate": "6.0321e-05", "loss": 0.125, "slid_loss": 0.1252, "step": 596, "time": 47.26 }, { "epoch": 0.5, "learning_rate": "6.0214e-05", "loss": 0.1247, "slid_loss": 0.1252, "step": 597, "time": 49.22 }, { "epoch": 0.5, "learning_rate": "6.0107e-05", "loss": 0.1232, "slid_loss": 0.1252, "step": 598, "time": 46.48 }, { "epoch": 0.5, "learning_rate": "6.0000e-05", "loss": 0.1211, "slid_loss": 0.1251, "step": 599, "time": 47.23 }, { "epoch": 0.51, "learning_rate": "5.9893e-05", "loss": 0.1148, "slid_loss": 0.1251, "step": 600, "time": 48.26 }, { "epoch": 0.51, "learning_rate": "5.9786e-05", "loss": 0.1208, "slid_loss": 0.125, "step": 601, "time": 149.89 }, { "epoch": 0.51, "learning_rate": "5.9679e-05", "loss": 0.123, "slid_loss": 0.1251, "step": 602, "time": 47.96 }, { "epoch": 0.51, "learning_rate": "5.9572e-05", "loss": 0.1151, "slid_loss": 0.1249, "step": 603, "time": 46.82 }, { "epoch": 0.51, "learning_rate": "5.9465e-05", "loss": 0.1222, "slid_loss": 0.1248, "step": 604, "time": 46.66 }, { "epoch": 0.51, "learning_rate": "5.9358e-05", "loss": 0.121, "slid_loss": 0.1248, "step": 605, "time": 46.25 }, { "epoch": 0.51, "learning_rate": "5.9251e-05", "loss": 0.1235, "slid_loss": 0.1248, "step": 606, "time": 48.74 }, { "epoch": 0.51, "learning_rate": "5.9144e-05", "loss": 0.1177, "slid_loss": 0.1247, "step": 607, "time": 48.81 }, { "epoch": 0.51, "learning_rate": "5.9037e-05", "loss": 0.1214, "slid_loss": 0.1246, "step": 608, "time": 46.74 }, { "epoch": 0.51, "learning_rate": "5.8930e-05", "loss": 0.122, "slid_loss": 0.1247, "step": 609, "time": 44.76 }, { "epoch": 0.51, "learning_rate": "5.8823e-05", "loss": 0.1243, "slid_loss": 0.1246, "step": 610, "time": 45.94 }, { "epoch": 0.51, "learning_rate": "5.8716e-05", "loss": 0.1242, "slid_loss": 0.1246, "step": 611, "time": 45.59 }, { "epoch": 0.52, "learning_rate": "5.8609e-05", "loss": 0.125, "slid_loss": 0.1246, "step": 612, "time": 45.63 }, { "epoch": 0.52, "learning_rate": "5.8502e-05", "loss": 0.1277, "slid_loss": 0.1247, "step": 613, "time": 47.56 }, { "epoch": 0.52, "learning_rate": "5.8395e-05", "loss": 0.1183, "slid_loss": 0.1246, "step": 614, "time": 44.87 }, { "epoch": 0.52, "learning_rate": "5.8288e-05", "loss": 0.121, "slid_loss": 0.1246, "step": 615, "time": 46.38 }, { "epoch": 0.52, "learning_rate": "5.8181e-05", "loss": 0.1222, "slid_loss": 0.1246, "step": 616, "time": 46.97 }, { "epoch": 0.52, "learning_rate": "5.8074e-05", "loss": 0.1291, "slid_loss": 0.1246, "step": 617, "time": 45.82 }, { "epoch": 0.52, "learning_rate": "5.7967e-05", "loss": 0.1268, "slid_loss": 0.1246, "step": 618, "time": 46.08 }, { "epoch": 0.52, "learning_rate": "5.7860e-05", "loss": 0.1165, "slid_loss": 0.1245, "step": 619, "time": 47.16 }, { "epoch": 0.52, "learning_rate": "5.7753e-05", "loss": 0.1251, "slid_loss": 0.1245, "step": 620, "time": 46.75 }, { "epoch": 0.52, "learning_rate": "5.7647e-05", "loss": 0.1315, "slid_loss": 0.1245, "step": 621, "time": 49.53 }, { "epoch": 0.52, "learning_rate": "5.7540e-05", "loss": 0.1285, "slid_loss": 0.1245, "step": 622, "time": 46.15 }, { "epoch": 0.52, "learning_rate": "5.7433e-05", "loss": 0.1199, "slid_loss": 0.1245, "step": 623, "time": 47.39 }, { "epoch": 0.53, "learning_rate": "5.7326e-05", "loss": 0.1154, "slid_loss": 0.1243, "step": 624, "time": 45.78 }, { "epoch": 0.53, "learning_rate": "5.7219e-05", "loss": 0.1209, "slid_loss": 0.1242, "step": 625, "time": 44.79 }, { "epoch": 0.53, "learning_rate": "5.7112e-05", "loss": 0.1168, "slid_loss": 0.1241, "step": 626, "time": 46.43 }, { "epoch": 0.53, "learning_rate": "5.7006e-05", "loss": 0.1247, "slid_loss": 0.1241, "step": 627, "time": 47.42 }, { "epoch": 0.53, "learning_rate": "5.6899e-05", "loss": 0.1314, "slid_loss": 0.1242, "step": 628, "time": 46.0 }, { "epoch": 0.53, "learning_rate": "5.6792e-05", "loss": 0.1234, "slid_loss": 0.1242, "step": 629, "time": 45.57 }, { "epoch": 0.53, "learning_rate": "5.6686e-05", "loss": 0.1255, "slid_loss": 0.1242, "step": 630, "time": 44.54 }, { "epoch": 0.53, "learning_rate": "5.6579e-05", "loss": 0.12, "slid_loss": 0.1241, "step": 631, "time": 47.12 }, { "epoch": 0.53, "learning_rate": "5.6472e-05", "loss": 0.1203, "slid_loss": 0.1241, "step": 632, "time": 47.42 }, { "epoch": 0.53, "learning_rate": "5.6366e-05", "loss": 0.1192, "slid_loss": 0.1241, "step": 633, "time": 47.23 }, { "epoch": 0.53, "learning_rate": "5.6259e-05", "loss": 0.1206, "slid_loss": 0.124, "step": 634, "time": 46.73 }, { "epoch": 0.54, "learning_rate": "5.6153e-05", "loss": 0.1246, "slid_loss": 0.1239, "step": 635, "time": 47.57 }, { "epoch": 0.54, "learning_rate": "5.6046e-05", "loss": 0.1186, "slid_loss": 0.1239, "step": 636, "time": 48.39 }, { "epoch": 0.54, "learning_rate": "5.5940e-05", "loss": 0.1215, "slid_loss": 0.1238, "step": 637, "time": 46.95 }, { "epoch": 0.54, "learning_rate": "5.5833e-05", "loss": 0.1255, "slid_loss": 0.1238, "step": 638, "time": 47.03 }, { "epoch": 0.54, "learning_rate": "5.5727e-05", "loss": 0.1156, "slid_loss": 0.1237, "step": 639, "time": 47.87 }, { "epoch": 0.54, "learning_rate": "5.5620e-05", "loss": 0.1206, "slid_loss": 0.1236, "step": 640, "time": 46.27 }, { "epoch": 0.54, "learning_rate": "5.5514e-05", "loss": 0.1207, "slid_loss": 0.1235, "step": 641, "time": 45.77 }, { "epoch": 0.54, "learning_rate": "5.5407e-05", "loss": 0.12, "slid_loss": 0.1234, "step": 642, "time": 51.08 }, { "epoch": 0.54, "learning_rate": "5.5301e-05", "loss": 0.1256, "slid_loss": 0.1234, "step": 643, "time": 46.26 }, { "epoch": 0.54, "learning_rate": "5.5195e-05", "loss": 0.1149, "slid_loss": 0.1233, "step": 644, "time": 45.48 }, { "epoch": 0.54, "learning_rate": "5.5089e-05", "loss": 0.1259, "slid_loss": 0.1233, "step": 645, "time": 45.26 }, { "epoch": 0.54, "learning_rate": "5.4982e-05", "loss": 0.1189, "slid_loss": 0.1232, "step": 646, "time": 45.44 }, { "epoch": 0.55, "learning_rate": "5.4876e-05", "loss": 0.1154, "slid_loss": 0.1231, "step": 647, "time": 46.8 }, { "epoch": 0.55, "learning_rate": "5.4770e-05", "loss": 0.1219, "slid_loss": 0.123, "step": 648, "time": 46.87 }, { "epoch": 0.55, "learning_rate": "5.4664e-05", "loss": 0.1133, "slid_loss": 0.1229, "step": 649, "time": 47.42 }, { "epoch": 0.55, "learning_rate": "5.4558e-05", "loss": 0.1176, "slid_loss": 0.1228, "step": 650, "time": 47.24 }, { "epoch": 0.55, "learning_rate": "5.4452e-05", "loss": 0.1164, "slid_loss": 0.1228, "step": 651, "time": 49.44 }, { "epoch": 0.55, "learning_rate": "5.4346e-05", "loss": 0.1243, "slid_loss": 0.1228, "step": 652, "time": 45.62 }, { "epoch": 0.55, "learning_rate": "5.4240e-05", "loss": 0.1215, "slid_loss": 0.1227, "step": 653, "time": 46.94 }, { "epoch": 0.55, "learning_rate": "5.4134e-05", "loss": 0.1268, "slid_loss": 0.1227, "step": 654, "time": 45.39 }, { "epoch": 0.55, "learning_rate": "5.4028e-05", "loss": 0.1211, "slid_loss": 0.1227, "step": 655, "time": 46.99 }, { "epoch": 0.55, "learning_rate": "5.3922e-05", "loss": 0.1201, "slid_loss": 0.1226, "step": 656, "time": 48.14 }, { "epoch": 0.55, "learning_rate": "5.3817e-05", "loss": 0.1246, "slid_loss": 0.1227, "step": 657, "time": 47.39 }, { "epoch": 0.55, "learning_rate": "5.3711e-05", "loss": 0.123, "slid_loss": 0.1226, "step": 658, "time": 46.64 }, { "epoch": 0.56, "learning_rate": "5.3605e-05", "loss": 0.1141, "slid_loss": 0.1225, "step": 659, "time": 46.64 }, { "epoch": 0.56, "learning_rate": "5.3500e-05", "loss": 0.1209, "slid_loss": 0.1225, "step": 660, "time": 45.91 }, { "epoch": 0.56, "learning_rate": "5.3394e-05", "loss": 0.1208, "slid_loss": 0.1224, "step": 661, "time": 46.43 }, { "epoch": 0.56, "learning_rate": "5.3288e-05", "loss": 0.1346, "slid_loss": 0.1226, "step": 662, "time": 46.66 }, { "epoch": 0.56, "learning_rate": "5.3183e-05", "loss": 0.1216, "slid_loss": 0.1225, "step": 663, "time": 46.06 }, { "epoch": 0.56, "learning_rate": "5.3077e-05", "loss": 0.117, "slid_loss": 0.1224, "step": 664, "time": 46.37 }, { "epoch": 0.56, "learning_rate": "5.2972e-05", "loss": 0.1258, "slid_loss": 0.1224, "step": 665, "time": 46.81 }, { "epoch": 0.56, "learning_rate": "5.2867e-05", "loss": 0.1208, "slid_loss": 0.1224, "step": 666, "time": 46.32 }, { "epoch": 0.56, "learning_rate": "5.2761e-05", "loss": 0.1225, "slid_loss": 0.1223, "step": 667, "time": 46.81 }, { "epoch": 0.56, "learning_rate": "5.2656e-05", "loss": 0.1192, "slid_loss": 0.1223, "step": 668, "time": 45.87 }, { "epoch": 0.56, "learning_rate": "5.2551e-05", "loss": 0.1213, "slid_loss": 0.1223, "step": 669, "time": 46.03 }, { "epoch": 0.56, "learning_rate": "5.2446e-05", "loss": 0.1239, "slid_loss": 0.1223, "step": 670, "time": 45.12 }, { "epoch": 0.57, "learning_rate": "5.2341e-05", "loss": 0.125, "slid_loss": 0.1223, "step": 671, "time": 44.65 }, { "epoch": 0.57, "learning_rate": "5.2236e-05", "loss": 0.1196, "slid_loss": 0.1222, "step": 672, "time": 44.51 }, { "epoch": 0.57, "learning_rate": "5.2131e-05", "loss": 0.1246, "slid_loss": 0.1221, "step": 673, "time": 46.49 }, { "epoch": 0.57, "learning_rate": "5.2026e-05", "loss": 0.117, "slid_loss": 0.1221, "step": 674, "time": 45.56 }, { "epoch": 0.57, "learning_rate": "5.1921e-05", "loss": 0.1153, "slid_loss": 0.122, "step": 675, "time": 45.52 }, { "epoch": 0.57, "learning_rate": "5.1816e-05", "loss": 0.12, "slid_loss": 0.122, "step": 676, "time": 47.11 }, { "epoch": 0.57, "learning_rate": "5.1711e-05", "loss": 0.1246, "slid_loss": 0.1221, "step": 677, "time": 45.87 }, { "epoch": 0.57, "learning_rate": "5.1607e-05", "loss": 0.1212, "slid_loss": 0.122, "step": 678, "time": 44.99 }, { "epoch": 0.57, "learning_rate": "5.1502e-05", "loss": 0.1171, "slid_loss": 0.122, "step": 679, "time": 46.24 }, { "epoch": 0.57, "learning_rate": "5.1398e-05", "loss": 0.1215, "slid_loss": 0.1219, "step": 680, "time": 47.35 }, { "epoch": 0.57, "learning_rate": "5.1293e-05", "loss": 0.1118, "slid_loss": 0.1219, "step": 681, "time": 48.56 }, { "epoch": 0.57, "learning_rate": "5.1189e-05", "loss": 0.1268, "slid_loss": 0.1219, "step": 682, "time": 46.27 }, { "epoch": 0.58, "learning_rate": "5.1084e-05", "loss": 0.1274, "slid_loss": 0.1219, "step": 683, "time": 49.49 }, { "epoch": 0.58, "learning_rate": "5.0980e-05", "loss": 0.1222, "slid_loss": 0.1219, "step": 684, "time": 45.26 }, { "epoch": 0.58, "learning_rate": "5.0876e-05", "loss": 0.1213, "slid_loss": 0.1219, "step": 685, "time": 47.56 }, { "epoch": 0.58, "learning_rate": "5.0772e-05", "loss": 0.1239, "slid_loss": 0.1219, "step": 686, "time": 47.89 }, { "epoch": 0.58, "learning_rate": "5.0667e-05", "loss": 0.1151, "slid_loss": 0.1218, "step": 687, "time": 45.36 }, { "epoch": 0.58, "learning_rate": "5.0563e-05", "loss": 0.1102, "slid_loss": 0.1216, "step": 688, "time": 46.01 }, { "epoch": 0.58, "learning_rate": "5.0459e-05", "loss": 0.1269, "slid_loss": 0.1217, "step": 689, "time": 49.43 }, { "epoch": 0.58, "learning_rate": "5.0355e-05", "loss": 0.1225, "slid_loss": 0.1216, "step": 690, "time": 46.27 }, { "epoch": 0.58, "learning_rate": "5.0252e-05", "loss": 0.121, "slid_loss": 0.1216, "step": 691, "time": 47.47 }, { "epoch": 0.58, "learning_rate": "5.0148e-05", "loss": 0.1188, "slid_loss": 0.1215, "step": 692, "time": 47.74 }, { "epoch": 0.58, "learning_rate": "5.0044e-05", "loss": 0.1165, "slid_loss": 0.1215, "step": 693, "time": 46.68 }, { "epoch": 0.58, "learning_rate": "4.9940e-05", "loss": 0.117, "slid_loss": 0.1214, "step": 694, "time": 48.16 }, { "epoch": 0.59, "learning_rate": "4.9837e-05", "loss": 0.1207, "slid_loss": 0.1214, "step": 695, "time": 45.3 }, { "epoch": 0.59, "learning_rate": "4.9733e-05", "loss": 0.1185, "slid_loss": 0.1214, "step": 696, "time": 45.82 }, { "epoch": 0.59, "learning_rate": "4.9630e-05", "loss": 0.125, "slid_loss": 0.1214, "step": 697, "time": 47.56 }, { "epoch": 0.59, "learning_rate": "4.9527e-05", "loss": 0.1246, "slid_loss": 0.1214, "step": 698, "time": 48.19 }, { "epoch": 0.59, "learning_rate": "4.9423e-05", "loss": 0.1222, "slid_loss": 0.1214, "step": 699, "time": 47.65 }, { "epoch": 0.59, "learning_rate": "4.9320e-05", "loss": 0.1267, "slid_loss": 0.1215, "step": 700, "time": 48.47 }, { "epoch": 0.59, "learning_rate": "4.9217e-05", "loss": 0.1222, "slid_loss": 0.1215, "step": 701, "time": 46.84 }, { "epoch": 0.59, "learning_rate": "4.9114e-05", "loss": 0.118, "slid_loss": 0.1215, "step": 702, "time": 45.36 }, { "epoch": 0.59, "learning_rate": "4.9011e-05", "loss": 0.1179, "slid_loss": 0.1215, "step": 703, "time": 46.8 }, { "epoch": 0.59, "learning_rate": "4.8908e-05", "loss": 0.1215, "slid_loss": 0.1215, "step": 704, "time": 47.75 }, { "epoch": 0.59, "learning_rate": "4.8805e-05", "loss": 0.1272, "slid_loss": 0.1216, "step": 705, "time": 47.34 }, { "epoch": 0.59, "learning_rate": "4.8703e-05", "loss": 0.1251, "slid_loss": 0.1216, "step": 706, "time": 46.05 }, { "epoch": 0.6, "learning_rate": "4.8600e-05", "loss": 0.123, "slid_loss": 0.1216, "step": 707, "time": 45.38 }, { "epoch": 0.6, "learning_rate": "4.8497e-05", "loss": 0.1223, "slid_loss": 0.1216, "step": 708, "time": 44.58 }, { "epoch": 0.6, "learning_rate": "4.8395e-05", "loss": 0.1112, "slid_loss": 0.1215, "step": 709, "time": 46.83 }, { "epoch": 0.6, "learning_rate": "4.8293e-05", "loss": 0.1168, "slid_loss": 0.1215, "step": 710, "time": 45.9 }, { "epoch": 0.6, "learning_rate": "4.8190e-05", "loss": 0.1194, "slid_loss": 0.1214, "step": 711, "time": 47.86 }, { "epoch": 0.6, "learning_rate": "4.8088e-05", "loss": 0.1167, "slid_loss": 0.1213, "step": 712, "time": 45.71 }, { "epoch": 0.6, "learning_rate": "4.7986e-05", "loss": 0.1279, "slid_loss": 0.1213, "step": 713, "time": 48.04 }, { "epoch": 0.6, "learning_rate": "4.7884e-05", "loss": 0.1299, "slid_loss": 0.1214, "step": 714, "time": 46.67 }, { "epoch": 0.6, "learning_rate": "4.7782e-05", "loss": 0.1237, "slid_loss": 0.1215, "step": 715, "time": 46.35 }, { "epoch": 0.6, "learning_rate": "4.7680e-05", "loss": 0.1284, "slid_loss": 0.1215, "step": 716, "time": 47.04 }, { "epoch": 0.6, "learning_rate": "4.7578e-05", "loss": 0.117, "slid_loss": 0.1214, "step": 717, "time": 46.39 }, { "epoch": 0.6, "learning_rate": "4.7477e-05", "loss": 0.1185, "slid_loss": 0.1213, "step": 718, "time": 45.73 }, { "epoch": 0.61, "learning_rate": "4.7375e-05", "loss": 0.1186, "slid_loss": 0.1213, "step": 719, "time": 45.73 }, { "epoch": 0.61, "learning_rate": "4.7273e-05", "loss": 0.1237, "slid_loss": 0.1213, "step": 720, "time": 46.84 }, { "epoch": 0.61, "learning_rate": "4.7172e-05", "loss": 0.1163, "slid_loss": 0.1212, "step": 721, "time": 46.18 }, { "epoch": 0.61, "learning_rate": "4.7071e-05", "loss": 0.1197, "slid_loss": 0.1211, "step": 722, "time": 44.48 }, { "epoch": 0.61, "learning_rate": "4.6969e-05", "loss": 0.1177, "slid_loss": 0.1211, "step": 723, "time": 46.24 }, { "epoch": 0.61, "learning_rate": "4.6868e-05", "loss": 0.1255, "slid_loss": 0.1212, "step": 724, "time": 47.96 }, { "epoch": 0.61, "learning_rate": "4.6767e-05", "loss": 0.12, "slid_loss": 0.1212, "step": 725, "time": 46.05 }, { "epoch": 0.61, "learning_rate": "4.6666e-05", "loss": 0.1178, "slid_loss": 0.1212, "step": 726, "time": 45.39 }, { "epoch": 0.61, "learning_rate": "4.6565e-05", "loss": 0.1126, "slid_loss": 0.121, "step": 727, "time": 46.66 }, { "epoch": 0.61, "learning_rate": "4.6465e-05", "loss": 0.1126, "slid_loss": 0.1209, "step": 728, "time": 45.38 }, { "epoch": 0.61, "learning_rate": "4.6364e-05", "loss": 0.125, "slid_loss": 0.1209, "step": 729, "time": 48.38 }, { "epoch": 0.62, "learning_rate": "4.6263e-05", "loss": 0.116, "slid_loss": 0.1208, "step": 730, "time": 46.89 }, { "epoch": 0.62, "learning_rate": "4.6163e-05", "loss": 0.1305, "slid_loss": 0.1209, "step": 731, "time": 44.79 }, { "epoch": 0.62, "learning_rate": "4.6062e-05", "loss": 0.1116, "slid_loss": 0.1208, "step": 732, "time": 45.82 }, { "epoch": 0.62, "learning_rate": "4.5962e-05", "loss": 0.1205, "slid_loss": 0.1208, "step": 733, "time": 46.2 }, { "epoch": 0.62, "learning_rate": "4.5862e-05", "loss": 0.1163, "slid_loss": 0.1208, "step": 734, "time": 46.33 }, { "epoch": 0.62, "learning_rate": "4.5762e-05", "loss": 0.1203, "slid_loss": 0.1207, "step": 735, "time": 45.73 }, { "epoch": 0.62, "learning_rate": "4.5662e-05", "loss": 0.1288, "slid_loss": 0.1208, "step": 736, "time": 48.51 }, { "epoch": 0.62, "learning_rate": "4.5562e-05", "loss": 0.1177, "slid_loss": 0.1208, "step": 737, "time": 47.25 }, { "epoch": 0.62, "learning_rate": "4.5462e-05", "loss": 0.1298, "slid_loss": 0.1208, "step": 738, "time": 47.49 }, { "epoch": 0.62, "learning_rate": "4.5363e-05", "loss": 0.1146, "slid_loss": 0.1208, "step": 739, "time": 45.81 }, { "epoch": 0.62, "learning_rate": "4.5263e-05", "loss": 0.1247, "slid_loss": 0.1209, "step": 740, "time": 47.21 }, { "epoch": 0.62, "learning_rate": "4.5164e-05", "loss": 0.1176, "slid_loss": 0.1208, "step": 741, "time": 46.84 }, { "epoch": 0.63, "learning_rate": "4.5064e-05", "loss": 0.115, "slid_loss": 0.1208, "step": 742, "time": 48.34 }, { "epoch": 0.63, "learning_rate": "4.4965e-05", "loss": 0.1216, "slid_loss": 0.1207, "step": 743, "time": 45.92 }, { "epoch": 0.63, "learning_rate": "4.4866e-05", "loss": 0.1132, "slid_loss": 0.1207, "step": 744, "time": 46.59 }, { "epoch": 0.63, "learning_rate": "4.4767e-05", "loss": 0.1124, "slid_loss": 0.1206, "step": 745, "time": 46.28 }, { "epoch": 0.63, "learning_rate": "4.4668e-05", "loss": 0.1197, "slid_loss": 0.1206, "step": 746, "time": 48.43 }, { "epoch": 0.63, "learning_rate": "4.4569e-05", "loss": 0.122, "slid_loss": 0.1207, "step": 747, "time": 47.84 }, { "epoch": 0.63, "learning_rate": "4.4470e-05", "loss": 0.124, "slid_loss": 0.1207, "step": 748, "time": 45.65 }, { "epoch": 0.63, "learning_rate": "4.4372e-05", "loss": 0.1226, "slid_loss": 0.1208, "step": 749, "time": 46.86 }, { "epoch": 0.63, "learning_rate": "4.4273e-05", "loss": 0.1188, "slid_loss": 0.1208, "step": 750, "time": 46.47 }, { "epoch": 0.63, "learning_rate": "4.4175e-05", "loss": 0.1252, "slid_loss": 0.1209, "step": 751, "time": 44.67 }, { "epoch": 0.63, "learning_rate": "4.4077e-05", "loss": 0.1216, "slid_loss": 0.1209, "step": 752, "time": 46.69 }, { "epoch": 0.63, "learning_rate": "4.3979e-05", "loss": 0.1192, "slid_loss": 0.1208, "step": 753, "time": 47.98 }, { "epoch": 0.64, "learning_rate": "4.3881e-05", "loss": 0.1198, "slid_loss": 0.1208, "step": 754, "time": 46.17 }, { "epoch": 0.64, "learning_rate": "4.3783e-05", "loss": 0.1252, "slid_loss": 0.1208, "step": 755, "time": 46.65 }, { "epoch": 0.64, "learning_rate": "4.3685e-05", "loss": 0.1213, "slid_loss": 0.1208, "step": 756, "time": 47.16 }, { "epoch": 0.64, "learning_rate": "4.3587e-05", "loss": 0.1183, "slid_loss": 0.1207, "step": 757, "time": 46.91 }, { "epoch": 0.64, "learning_rate": "4.3490e-05", "loss": 0.1112, "slid_loss": 0.1206, "step": 758, "time": 46.41 }, { "epoch": 0.64, "learning_rate": "4.3392e-05", "loss": 0.1228, "slid_loss": 0.1207, "step": 759, "time": 48.0 }, { "epoch": 0.64, "learning_rate": "4.3295e-05", "loss": 0.1201, "slid_loss": 0.1207, "step": 760, "time": 47.42 }, { "epoch": 0.64, "learning_rate": "4.3198e-05", "loss": 0.1221, "slid_loss": 0.1207, "step": 761, "time": 46.62 }, { "epoch": 0.64, "learning_rate": "4.3101e-05", "loss": 0.12, "slid_loss": 0.1206, "step": 762, "time": 50.4 }, { "epoch": 0.64, "learning_rate": "4.3004e-05", "loss": 0.1179, "slid_loss": 0.1205, "step": 763, "time": 47.21 }, { "epoch": 0.64, "learning_rate": "4.2907e-05", "loss": 0.118, "slid_loss": 0.1205, "step": 764, "time": 46.38 }, { "epoch": 0.64, "learning_rate": "4.2810e-05", "loss": 0.1144, "slid_loss": 0.1204, "step": 765, "time": 47.06 }, { "epoch": 0.65, "learning_rate": "4.2714e-05", "loss": 0.1279, "slid_loss": 0.1205, "step": 766, "time": 44.95 }, { "epoch": 0.65, "learning_rate": "4.2617e-05", "loss": 0.1109, "slid_loss": 0.1204, "step": 767, "time": 47.12 }, { "epoch": 0.65, "learning_rate": "4.2521e-05", "loss": 0.1142, "slid_loss": 0.1203, "step": 768, "time": 49.81 }, { "epoch": 0.65, "learning_rate": "4.2425e-05", "loss": 0.117, "slid_loss": 0.1203, "step": 769, "time": 47.69 }, { "epoch": 0.65, "learning_rate": "4.2328e-05", "loss": 0.1216, "slid_loss": 0.1203, "step": 770, "time": 46.84 }, { "epoch": 0.65, "learning_rate": "4.2232e-05", "loss": 0.12, "slid_loss": 0.1202, "step": 771, "time": 47.39 }, { "epoch": 0.65, "learning_rate": "4.2137e-05", "loss": 0.1248, "slid_loss": 0.1203, "step": 772, "time": 47.73 }, { "epoch": 0.65, "learning_rate": "4.2041e-05", "loss": 0.1276, "slid_loss": 0.1203, "step": 773, "time": 45.08 }, { "epoch": 0.65, "learning_rate": "4.1945e-05", "loss": 0.1153, "slid_loss": 0.1203, "step": 774, "time": 46.15 }, { "epoch": 0.65, "learning_rate": "4.1850e-05", "loss": 0.1159, "slid_loss": 0.1203, "step": 775, "time": 46.06 }, { "epoch": 0.65, "learning_rate": "4.1755e-05", "loss": 0.1206, "slid_loss": 0.1203, "step": 776, "time": 45.24 }, { "epoch": 0.65, "learning_rate": "4.1659e-05", "loss": 0.1253, "slid_loss": 0.1203, "step": 777, "time": 45.69 }, { "epoch": 0.66, "learning_rate": "4.1564e-05", "loss": 0.1191, "slid_loss": 0.1203, "step": 778, "time": 46.7 }, { "epoch": 0.66, "learning_rate": "4.1469e-05", "loss": 0.1154, "slid_loss": 0.1203, "step": 779, "time": 45.64 }, { "epoch": 0.66, "learning_rate": "4.1375e-05", "loss": 0.1267, "slid_loss": 0.1203, "step": 780, "time": 46.72 }, { "epoch": 0.66, "learning_rate": "4.1280e-05", "loss": 0.1186, "slid_loss": 0.1204, "step": 781, "time": 46.05 }, { "epoch": 0.66, "learning_rate": "4.1185e-05", "loss": 0.1162, "slid_loss": 0.1203, "step": 782, "time": 46.63 }, { "epoch": 0.66, "learning_rate": "4.1091e-05", "loss": 0.1171, "slid_loss": 0.1202, "step": 783, "time": 46.76 }, { "epoch": 0.66, "learning_rate": "4.0997e-05", "loss": 0.1177, "slid_loss": 0.1201, "step": 784, "time": 46.01 }, { "epoch": 0.66, "learning_rate": "4.0903e-05", "loss": 0.1198, "slid_loss": 0.1201, "step": 785, "time": 47.51 }, { "epoch": 0.66, "learning_rate": "4.0809e-05", "loss": 0.1217, "slid_loss": 0.1201, "step": 786, "time": 47.21 }, { "epoch": 0.66, "learning_rate": "4.0715e-05", "loss": 0.1238, "slid_loss": 0.1202, "step": 787, "time": 45.95 }, { "epoch": 0.66, "learning_rate": "4.0621e-05", "loss": 0.1216, "slid_loss": 0.1203, "step": 788, "time": 45.33 }, { "epoch": 0.66, "learning_rate": "4.0528e-05", "loss": 0.114, "slid_loss": 0.1202, "step": 789, "time": 46.59 }, { "epoch": 0.67, "learning_rate": "4.0434e-05", "loss": 0.1195, "slid_loss": 0.1201, "step": 790, "time": 47.54 }, { "epoch": 0.67, "learning_rate": "4.0341e-05", "loss": 0.1215, "slid_loss": 0.1201, "step": 791, "time": 50.13 }, { "epoch": 0.67, "learning_rate": "4.0248e-05", "loss": 0.1212, "slid_loss": 0.1202, "step": 792, "time": 44.09 }, { "epoch": 0.67, "learning_rate": "4.0155e-05", "loss": 0.1137, "slid_loss": 0.1201, "step": 793, "time": 46.41 }, { "epoch": 0.67, "learning_rate": "4.0062e-05", "loss": 0.1187, "slid_loss": 0.1202, "step": 794, "time": 46.21 }, { "epoch": 0.67, "learning_rate": "3.9969e-05", "loss": 0.1217, "slid_loss": 0.1202, "step": 795, "time": 45.73 }, { "epoch": 0.67, "learning_rate": "3.9877e-05", "loss": 0.1254, "slid_loss": 0.1202, "step": 796, "time": 44.63 }, { "epoch": 0.67, "learning_rate": "3.9784e-05", "loss": 0.1267, "slid_loss": 0.1203, "step": 797, "time": 45.66 }, { "epoch": 0.67, "learning_rate": "3.9692e-05", "loss": 0.1231, "slid_loss": 0.1202, "step": 798, "time": 47.52 }, { "epoch": 0.67, "learning_rate": "3.9600e-05", "loss": 0.1205, "slid_loss": 0.1202, "step": 799, "time": 47.11 }, { "epoch": 0.67, "learning_rate": "3.9508e-05", "loss": 0.1149, "slid_loss": 0.1201, "step": 800, "time": 46.8 }, { "epoch": 0.67, "learning_rate": "3.9416e-05", "loss": 0.118, "slid_loss": 0.1201, "step": 801, "time": 47.67 }, { "epoch": 0.68, "learning_rate": "3.9324e-05", "loss": 0.1202, "slid_loss": 0.1201, "step": 802, "time": 46.56 }, { "epoch": 0.68, "learning_rate": "3.9233e-05", "loss": 0.1128, "slid_loss": 0.12, "step": 803, "time": 47.23 }, { "epoch": 0.68, "learning_rate": "3.9141e-05", "loss": 0.1214, "slid_loss": 0.12, "step": 804, "time": 46.44 }, { "epoch": 0.68, "learning_rate": "3.9050e-05", "loss": 0.1283, "slid_loss": 0.12, "step": 805, "time": 47.95 }, { "epoch": 0.68, "learning_rate": "3.8959e-05", "loss": 0.1264, "slid_loss": 0.1201, "step": 806, "time": 46.54 }, { "epoch": 0.68, "learning_rate": "3.8868e-05", "loss": 0.1185, "slid_loss": 0.12, "step": 807, "time": 46.57 }, { "epoch": 0.68, "learning_rate": "3.8777e-05", "loss": 0.1118, "slid_loss": 0.1199, "step": 808, "time": 45.26 }, { "epoch": 0.68, "learning_rate": "3.8686e-05", "loss": 0.1137, "slid_loss": 0.1199, "step": 809, "time": 46.26 }, { "epoch": 0.68, "learning_rate": "3.8596e-05", "loss": 0.1226, "slid_loss": 0.12, "step": 810, "time": 48.42 }, { "epoch": 0.68, "learning_rate": "3.8506e-05", "loss": 0.1186, "slid_loss": 0.12, "step": 811, "time": 46.01 }, { "epoch": 0.68, "learning_rate": "3.8415e-05", "loss": 0.1229, "slid_loss": 0.12, "step": 812, "time": 46.66 }, { "epoch": 0.68, "learning_rate": "3.8325e-05", "loss": 0.1102, "slid_loss": 0.1199, "step": 813, "time": 46.01 }, { "epoch": 0.69, "learning_rate": "3.8235e-05", "loss": 0.1159, "slid_loss": 0.1197, "step": 814, "time": 47.39 }, { "epoch": 0.69, "learning_rate": "3.8146e-05", "loss": 0.1204, "slid_loss": 0.1197, "step": 815, "time": 45.79 }, { "epoch": 0.69, "learning_rate": "3.8056e-05", "loss": 0.1153, "slid_loss": 0.1196, "step": 816, "time": 46.22 }, { "epoch": 0.69, "learning_rate": "3.7967e-05", "loss": 0.12, "slid_loss": 0.1196, "step": 817, "time": 47.15 }, { "epoch": 0.69, "learning_rate": "3.7877e-05", "loss": 0.1149, "slid_loss": 0.1196, "step": 818, "time": 46.56 }, { "epoch": 0.69, "learning_rate": "3.7788e-05", "loss": 0.1203, "slid_loss": 0.1196, "step": 819, "time": 47.96 }, { "epoch": 0.69, "learning_rate": "3.7699e-05", "loss": 0.1165, "slid_loss": 0.1195, "step": 820, "time": 44.85 }, { "epoch": 0.69, "learning_rate": "3.7611e-05", "loss": 0.1162, "slid_loss": 0.1195, "step": 821, "time": 48.31 }, { "epoch": 0.69, "learning_rate": "3.7522e-05", "loss": 0.1112, "slid_loss": 0.1194, "step": 822, "time": 46.11 }, { "epoch": 0.69, "learning_rate": "3.7434e-05", "loss": 0.1156, "slid_loss": 0.1194, "step": 823, "time": 46.89 }, { "epoch": 0.69, "learning_rate": "3.7345e-05", "loss": 0.1138, "slid_loss": 0.1193, "step": 824, "time": 46.08 }, { "epoch": 0.7, "learning_rate": "3.7257e-05", "loss": 0.116, "slid_loss": 0.1192, "step": 825, "time": 51.03 }, { "epoch": 0.7, "learning_rate": "3.7169e-05", "loss": 0.1181, "slid_loss": 0.1192, "step": 826, "time": 47.67 }, { "epoch": 0.7, "learning_rate": "3.7081e-05", "loss": 0.1187, "slid_loss": 0.1193, "step": 827, "time": 47.69 }, { "epoch": 0.7, "learning_rate": "3.6994e-05", "loss": 0.1124, "slid_loss": 0.1193, "step": 828, "time": 47.49 }, { "epoch": 0.7, "learning_rate": "3.6906e-05", "loss": 0.1199, "slid_loss": 0.1192, "step": 829, "time": 45.32 }, { "epoch": 0.7, "learning_rate": "3.6819e-05", "loss": 0.1088, "slid_loss": 0.1192, "step": 830, "time": 46.17 }, { "epoch": 0.7, "learning_rate": "3.6732e-05", "loss": 0.1143, "slid_loss": 0.119, "step": 831, "time": 47.72 }, { "epoch": 0.7, "learning_rate": "3.6645e-05", "loss": 0.1212, "slid_loss": 0.1191, "step": 832, "time": 47.58 }, { "epoch": 0.7, "learning_rate": "3.6558e-05", "loss": 0.1204, "slid_loss": 0.1191, "step": 833, "time": 44.6 }, { "epoch": 0.7, "learning_rate": "3.6471e-05", "loss": 0.1142, "slid_loss": 0.1191, "step": 834, "time": 47.64 }, { "epoch": 0.7, "learning_rate": "3.6385e-05", "loss": 0.119, "slid_loss": 0.1191, "step": 835, "time": 45.21 }, { "epoch": 0.7, "learning_rate": "3.6298e-05", "loss": 0.1112, "slid_loss": 0.1189, "step": 836, "time": 46.24 }, { "epoch": 0.71, "learning_rate": "3.6212e-05", "loss": 0.1157, "slid_loss": 0.1189, "step": 837, "time": 49.35 }, { "epoch": 0.71, "learning_rate": "3.6126e-05", "loss": 0.1156, "slid_loss": 0.1187, "step": 838, "time": 47.95 }, { "epoch": 0.71, "learning_rate": "3.6041e-05", "loss": 0.1113, "slid_loss": 0.1187, "step": 839, "time": 46.78 }, { "epoch": 0.71, "learning_rate": "3.5955e-05", "loss": 0.1195, "slid_loss": 0.1187, "step": 840, "time": 45.81 }, { "epoch": 0.71, "learning_rate": "3.5869e-05", "loss": 0.1179, "slid_loss": 0.1187, "step": 841, "time": 46.43 }, { "epoch": 0.71, "learning_rate": "3.5784e-05", "loss": 0.1096, "slid_loss": 0.1186, "step": 842, "time": 47.27 }, { "epoch": 0.71, "learning_rate": "3.5699e-05", "loss": 0.1169, "slid_loss": 0.1186, "step": 843, "time": 45.92 }, { "epoch": 0.71, "learning_rate": "3.5614e-05", "loss": 0.1204, "slid_loss": 0.1186, "step": 844, "time": 48.48 }, { "epoch": 0.71, "learning_rate": "3.5529e-05", "loss": 0.1219, "slid_loss": 0.1187, "step": 845, "time": 47.8 }, { "epoch": 0.71, "learning_rate": "3.5445e-05", "loss": 0.124, "slid_loss": 0.1188, "step": 846, "time": 46.01 }, { "epoch": 0.71, "learning_rate": "3.5360e-05", "loss": 0.1179, "slid_loss": 0.1187, "step": 847, "time": 46.46 }, { "epoch": 0.71, "learning_rate": "3.5276e-05", "loss": 0.1132, "slid_loss": 0.1186, "step": 848, "time": 48.07 }, { "epoch": 0.72, "learning_rate": "3.5192e-05", "loss": 0.1206, "slid_loss": 0.1186, "step": 849, "time": 48.53 }, { "epoch": 0.72, "learning_rate": "3.5108e-05", "loss": 0.1161, "slid_loss": 0.1186, "step": 850, "time": 46.09 }, { "epoch": 0.72, "learning_rate": "3.5025e-05", "loss": 0.1146, "slid_loss": 0.1185, "step": 851, "time": 48.48 }, { "epoch": 0.72, "learning_rate": "3.4941e-05", "loss": 0.1196, "slid_loss": 0.1184, "step": 852, "time": 47.18 }, { "epoch": 0.72, "learning_rate": "3.4858e-05", "loss": 0.1151, "slid_loss": 0.1184, "step": 853, "time": 46.16 }, { "epoch": 0.72, "learning_rate": "3.4775e-05", "loss": 0.1268, "slid_loss": 0.1185, "step": 854, "time": 46.71 }, { "epoch": 0.72, "learning_rate": "3.4692e-05", "loss": 0.1176, "slid_loss": 0.1184, "step": 855, "time": 45.81 }, { "epoch": 0.72, "learning_rate": "3.4609e-05", "loss": 0.1152, "slid_loss": 0.1183, "step": 856, "time": 47.43 }, { "epoch": 0.72, "learning_rate": "3.4526e-05", "loss": 0.1259, "slid_loss": 0.1184, "step": 857, "time": 46.53 }, { "epoch": 0.72, "learning_rate": "3.4444e-05", "loss": 0.1076, "slid_loss": 0.1184, "step": 858, "time": 47.91 }, { "epoch": 0.72, "learning_rate": "3.4361e-05", "loss": 0.1193, "slid_loss": 0.1183, "step": 859, "time": 47.58 }, { "epoch": 0.72, "learning_rate": "3.4279e-05", "loss": 0.1154, "slid_loss": 0.1183, "step": 860, "time": 46.0 }, { "epoch": 0.73, "learning_rate": "3.4198e-05", "loss": 0.1179, "slid_loss": 0.1183, "step": 861, "time": 46.49 }, { "epoch": 0.73, "learning_rate": "3.4116e-05", "loss": 0.1222, "slid_loss": 0.1183, "step": 862, "time": 46.74 }, { "epoch": 0.73, "learning_rate": "3.4034e-05", "loss": 0.1129, "slid_loss": 0.1182, "step": 863, "time": 46.19 }, { "epoch": 0.73, "learning_rate": "3.3953e-05", "loss": 0.1159, "slid_loss": 0.1182, "step": 864, "time": 48.54 }, { "epoch": 0.73, "learning_rate": "3.3872e-05", "loss": 0.1183, "slid_loss": 0.1182, "step": 865, "time": 47.87 }, { "epoch": 0.73, "learning_rate": "3.3791e-05", "loss": 0.1165, "slid_loss": 0.1181, "step": 866, "time": 45.96 }, { "epoch": 0.73, "learning_rate": "3.3710e-05", "loss": 0.1114, "slid_loss": 0.1181, "step": 867, "time": 47.74 }, { "epoch": 0.73, "learning_rate": "3.3630e-05", "loss": 0.12, "slid_loss": 0.1182, "step": 868, "time": 47.14 }, { "epoch": 0.73, "learning_rate": "3.3549e-05", "loss": 0.1181, "slid_loss": 0.1182, "step": 869, "time": 46.14 }, { "epoch": 0.73, "learning_rate": "3.3469e-05", "loss": 0.1178, "slid_loss": 0.1182, "step": 870, "time": 45.85 }, { "epoch": 0.73, "learning_rate": "3.3389e-05", "loss": 0.1191, "slid_loss": 0.1182, "step": 871, "time": 46.84 }, { "epoch": 0.73, "learning_rate": "3.3309e-05", "loss": 0.121, "slid_loss": 0.1181, "step": 872, "time": 48.94 }, { "epoch": 0.74, "learning_rate": "3.3229e-05", "loss": 0.1175, "slid_loss": 0.118, "step": 873, "time": 47.76 }, { "epoch": 0.74, "learning_rate": "3.3150e-05", "loss": 0.1235, "slid_loss": 0.1181, "step": 874, "time": 47.11 }, { "epoch": 0.74, "learning_rate": "3.3071e-05", "loss": 0.1157, "slid_loss": 0.1181, "step": 875, "time": 47.86 }, { "epoch": 0.74, "learning_rate": "3.2992e-05", "loss": 0.1211, "slid_loss": 0.1181, "step": 876, "time": 49.33 }, { "epoch": 0.74, "learning_rate": "3.2913e-05", "loss": 0.1093, "slid_loss": 0.1179, "step": 877, "time": 46.76 }, { "epoch": 0.74, "learning_rate": "3.2834e-05", "loss": 0.1207, "slid_loss": 0.118, "step": 878, "time": 47.07 }, { "epoch": 0.74, "learning_rate": "3.2756e-05", "loss": 0.1247, "slid_loss": 0.1181, "step": 879, "time": 45.94 }, { "epoch": 0.74, "learning_rate": "3.2677e-05", "loss": 0.1186, "slid_loss": 0.118, "step": 880, "time": 46.12 }, { "epoch": 0.74, "learning_rate": "3.2599e-05", "loss": 0.1146, "slid_loss": 0.1179, "step": 881, "time": 48.24 }, { "epoch": 0.74, "learning_rate": "3.2522e-05", "loss": 0.1171, "slid_loss": 0.1179, "step": 882, "time": 46.17 }, { "epoch": 0.74, "learning_rate": "3.2444e-05", "loss": 0.1285, "slid_loss": 0.1181, "step": 883, "time": 47.01 }, { "epoch": 0.74, "learning_rate": "3.2366e-05", "loss": 0.1132, "slid_loss": 0.118, "step": 884, "time": 45.85 }, { "epoch": 0.75, "learning_rate": "3.2289e-05", "loss": 0.1168, "slid_loss": 0.118, "step": 885, "time": 45.91 }, { "epoch": 0.75, "learning_rate": "3.2212e-05", "loss": 0.1176, "slid_loss": 0.1179, "step": 886, "time": 45.82 }, { "epoch": 0.75, "learning_rate": "3.2135e-05", "loss": 0.1133, "slid_loss": 0.1178, "step": 887, "time": 46.58 }, { "epoch": 0.75, "learning_rate": "3.2058e-05", "loss": 0.1185, "slid_loss": 0.1178, "step": 888, "time": 45.12 }, { "epoch": 0.75, "learning_rate": "3.1982e-05", "loss": 0.1192, "slid_loss": 0.1179, "step": 889, "time": 47.64 }, { "epoch": 0.75, "learning_rate": "3.1906e-05", "loss": 0.1083, "slid_loss": 0.1177, "step": 890, "time": 47.17 }, { "epoch": 0.75, "learning_rate": "3.1829e-05", "loss": 0.111, "slid_loss": 0.1176, "step": 891, "time": 48.31 }, { "epoch": 0.75, "learning_rate": "3.1754e-05", "loss": 0.11, "slid_loss": 0.1175, "step": 892, "time": 47.13 }, { "epoch": 0.75, "learning_rate": "3.1678e-05", "loss": 0.119, "slid_loss": 0.1176, "step": 893, "time": 45.72 }, { "epoch": 0.75, "learning_rate": "3.1602e-05", "loss": 0.1122, "slid_loss": 0.1175, "step": 894, "time": 46.13 }, { "epoch": 0.75, "learning_rate": "3.1527e-05", "loss": 0.113, "slid_loss": 0.1174, "step": 895, "time": 48.87 }, { "epoch": 0.75, "learning_rate": "3.1452e-05", "loss": 0.1094, "slid_loss": 0.1173, "step": 896, "time": 49.0 }, { "epoch": 0.76, "learning_rate": "3.1377e-05", "loss": 0.1165, "slid_loss": 0.1172, "step": 897, "time": 45.51 }, { "epoch": 0.76, "learning_rate": "3.1303e-05", "loss": 0.1152, "slid_loss": 0.1171, "step": 898, "time": 45.83 }, { "epoch": 0.76, "learning_rate": "3.1228e-05", "loss": 0.1186, "slid_loss": 0.1171, "step": 899, "time": 47.17 }, { "epoch": 0.76, "learning_rate": "3.1154e-05", "loss": 0.117, "slid_loss": 0.1171, "step": 900, "time": 46.53 }, { "epoch": 0.76, "learning_rate": "3.1080e-05", "loss": 0.1192, "slid_loss": 0.1171, "step": 901, "time": 46.23 }, { "epoch": 0.76, "learning_rate": "3.1006e-05", "loss": 0.1152, "slid_loss": 0.1171, "step": 902, "time": 48.7 }, { "epoch": 0.76, "learning_rate": "3.0932e-05", "loss": 0.117, "slid_loss": 0.1171, "step": 903, "time": 45.46 }, { "epoch": 0.76, "learning_rate": "3.0859e-05", "loss": 0.1143, "slid_loss": 0.117, "step": 904, "time": 46.95 }, { "epoch": 0.76, "learning_rate": "3.0786e-05", "loss": 0.1189, "slid_loss": 0.1169, "step": 905, "time": 46.39 }, { "epoch": 0.76, "learning_rate": "3.0713e-05", "loss": 0.1161, "slid_loss": 0.1168, "step": 906, "time": 47.25 }, { "epoch": 0.76, "learning_rate": "3.0640e-05", "loss": 0.1148, "slid_loss": 0.1168, "step": 907, "time": 46.94 }, { "epoch": 0.77, "learning_rate": "3.0567e-05", "loss": 0.1112, "slid_loss": 0.1168, "step": 908, "time": 46.12 }, { "epoch": 0.77, "learning_rate": "3.0495e-05", "loss": 0.1151, "slid_loss": 0.1168, "step": 909, "time": 46.94 }, { "epoch": 0.77, "learning_rate": "3.0423e-05", "loss": 0.1129, "slid_loss": 0.1167, "step": 910, "time": 47.02 }, { "epoch": 0.77, "learning_rate": "3.0351e-05", "loss": 0.1109, "slid_loss": 0.1166, "step": 911, "time": 46.06 }, { "epoch": 0.77, "learning_rate": "3.0279e-05", "loss": 0.1222, "slid_loss": 0.1166, "step": 912, "time": 46.7 }, { "epoch": 0.77, "learning_rate": "3.0207e-05", "loss": 0.1184, "slid_loss": 0.1167, "step": 913, "time": 49.91 }, { "epoch": 0.77, "learning_rate": "3.0136e-05", "loss": 0.1135, "slid_loss": 0.1167, "step": 914, "time": 47.68 }, { "epoch": 0.77, "learning_rate": "3.0065e-05", "loss": 0.1132, "slid_loss": 0.1166, "step": 915, "time": 45.99 }, { "epoch": 0.77, "learning_rate": "2.9994e-05", "loss": 0.1175, "slid_loss": 0.1166, "step": 916, "time": 47.32 }, { "epoch": 0.77, "learning_rate": "2.9923e-05", "loss": 0.11, "slid_loss": 0.1165, "step": 917, "time": 47.84 }, { "epoch": 0.77, "learning_rate": "2.9853e-05", "loss": 0.1085, "slid_loss": 0.1165, "step": 918, "time": 45.78 }, { "epoch": 0.77, "learning_rate": "2.9783e-05", "loss": 0.1136, "slid_loss": 0.1164, "step": 919, "time": 45.71 }, { "epoch": 0.78, "learning_rate": "2.9713e-05", "loss": 0.1123, "slid_loss": 0.1163, "step": 920, "time": 46.23 }, { "epoch": 0.78, "learning_rate": "2.9643e-05", "loss": 0.1094, "slid_loss": 0.1163, "step": 921, "time": 46.4 }, { "epoch": 0.78, "learning_rate": "2.9573e-05", "loss": 0.1165, "slid_loss": 0.1163, "step": 922, "time": 45.21 }, { "epoch": 0.78, "learning_rate": "2.9504e-05", "loss": 0.1179, "slid_loss": 0.1164, "step": 923, "time": 45.09 }, { "epoch": 0.78, "learning_rate": "2.9435e-05", "loss": 0.1105, "slid_loss": 0.1163, "step": 924, "time": 49.4 }, { "epoch": 0.78, "learning_rate": "2.9366e-05", "loss": 0.1157, "slid_loss": 0.1163, "step": 925, "time": 45.97 }, { "epoch": 0.78, "learning_rate": "2.9297e-05", "loss": 0.1114, "slid_loss": 0.1163, "step": 926, "time": 47.66 }, { "epoch": 0.78, "learning_rate": "2.9229e-05", "loss": 0.119, "slid_loss": 0.1163, "step": 927, "time": 46.03 }, { "epoch": 0.78, "learning_rate": "2.9160e-05", "loss": 0.1205, "slid_loss": 0.1163, "step": 928, "time": 46.26 }, { "epoch": 0.78, "learning_rate": "2.9092e-05", "loss": 0.1196, "slid_loss": 0.1163, "step": 929, "time": 47.92 }, { "epoch": 0.78, "learning_rate": "2.9024e-05", "loss": 0.109, "slid_loss": 0.1163, "step": 930, "time": 46.43 }, { "epoch": 0.78, "learning_rate": "2.8957e-05", "loss": 0.1186, "slid_loss": 0.1164, "step": 931, "time": 48.1 }, { "epoch": 0.79, "learning_rate": "2.8889e-05", "loss": 0.1201, "slid_loss": 0.1164, "step": 932, "time": 46.14 }, { "epoch": 0.79, "learning_rate": "2.8822e-05", "loss": 0.1181, "slid_loss": 0.1163, "step": 933, "time": 46.97 }, { "epoch": 0.79, "learning_rate": "2.8755e-05", "loss": 0.1159, "slid_loss": 0.1164, "step": 934, "time": 45.61 }, { "epoch": 0.79, "learning_rate": "2.8689e-05", "loss": 0.1097, "slid_loss": 0.1163, "step": 935, "time": 47.74 }, { "epoch": 0.79, "learning_rate": "2.8622e-05", "loss": 0.1231, "slid_loss": 0.1164, "step": 936, "time": 45.26 }, { "epoch": 0.79, "learning_rate": "2.8556e-05", "loss": 0.1208, "slid_loss": 0.1164, "step": 937, "time": 48.38 }, { "epoch": 0.79, "learning_rate": "2.8490e-05", "loss": 0.1139, "slid_loss": 0.1164, "step": 938, "time": 46.4 }, { "epoch": 0.79, "learning_rate": "2.8424e-05", "loss": 0.1126, "slid_loss": 0.1164, "step": 939, "time": 45.93 }, { "epoch": 0.79, "learning_rate": "2.8358e-05", "loss": 0.1088, "slid_loss": 0.1163, "step": 940, "time": 50.99 }, { "epoch": 0.79, "learning_rate": "2.8293e-05", "loss": 0.1192, "slid_loss": 0.1163, "step": 941, "time": 47.68 }, { "epoch": 0.79, "learning_rate": "2.8228e-05", "loss": 0.1127, "slid_loss": 0.1164, "step": 942, "time": 46.44 }, { "epoch": 0.79, "learning_rate": "2.8163e-05", "loss": 0.1268, "slid_loss": 0.1165, "step": 943, "time": 45.53 }, { "epoch": 0.8, "learning_rate": "2.8098e-05", "loss": 0.1257, "slid_loss": 0.1165, "step": 944, "time": 46.87 }, { "epoch": 0.8, "learning_rate": "2.8034e-05", "loss": 0.1135, "slid_loss": 0.1164, "step": 945, "time": 48.51 }, { "epoch": 0.8, "learning_rate": "2.7970e-05", "loss": 0.1159, "slid_loss": 0.1164, "step": 946, "time": 45.8 }, { "epoch": 0.8, "learning_rate": "2.7906e-05", "loss": 0.1171, "slid_loss": 0.1164, "step": 947, "time": 47.06 }, { "epoch": 0.8, "learning_rate": "2.7842e-05", "loss": 0.1099, "slid_loss": 0.1163, "step": 948, "time": 45.61 }, { "epoch": 0.8, "learning_rate": "2.7778e-05", "loss": 0.1158, "slid_loss": 0.1163, "step": 949, "time": 48.64 }, { "epoch": 0.8, "learning_rate": "2.7715e-05", "loss": 0.1108, "slid_loss": 0.1162, "step": 950, "time": 46.89 }, { "epoch": 0.8, "learning_rate": "2.7652e-05", "loss": 0.1181, "slid_loss": 0.1163, "step": 951, "time": 45.91 }, { "epoch": 0.8, "learning_rate": "2.7589e-05", "loss": 0.1097, "slid_loss": 0.1162, "step": 952, "time": 46.57 }, { "epoch": 0.8, "learning_rate": "2.7526e-05", "loss": 0.1192, "slid_loss": 0.1162, "step": 953, "time": 46.58 }, { "epoch": 0.8, "learning_rate": "2.7464e-05", "loss": 0.1069, "slid_loss": 0.116, "step": 954, "time": 44.75 }, { "epoch": 0.8, "learning_rate": "2.7402e-05", "loss": 0.116, "slid_loss": 0.116, "step": 955, "time": 46.85 }, { "epoch": 0.81, "learning_rate": "2.7340e-05", "loss": 0.1187, "slid_loss": 0.116, "step": 956, "time": 46.7 }, { "epoch": 0.81, "learning_rate": "2.7278e-05", "loss": 0.1139, "slid_loss": 0.1159, "step": 957, "time": 45.91 }, { "epoch": 0.81, "learning_rate": "2.7217e-05", "loss": 0.1053, "slid_loss": 0.1159, "step": 958, "time": 46.81 }, { "epoch": 0.81, "learning_rate": "2.7156e-05", "loss": 0.1125, "slid_loss": 0.1158, "step": 959, "time": 46.81 }, { "epoch": 0.81, "learning_rate": "2.7095e-05", "loss": 0.1191, "slid_loss": 0.1158, "step": 960, "time": 44.53 }, { "epoch": 0.81, "learning_rate": "2.7034e-05", "loss": 0.1089, "slid_loss": 0.1158, "step": 961, "time": 45.3 }, { "epoch": 0.81, "learning_rate": "2.6973e-05", "loss": 0.1067, "slid_loss": 0.1156, "step": 962, "time": 46.3 }, { "epoch": 0.81, "learning_rate": "2.6913e-05", "loss": 0.108, "slid_loss": 0.1155, "step": 963, "time": 49.07 }, { "epoch": 0.81, "learning_rate": "2.6853e-05", "loss": 0.1148, "slid_loss": 0.1155, "step": 964, "time": 45.9 }, { "epoch": 0.81, "learning_rate": "2.6793e-05", "loss": 0.1077, "slid_loss": 0.1154, "step": 965, "time": 45.71 }, { "epoch": 0.81, "learning_rate": "2.6734e-05", "loss": 0.1205, "slid_loss": 0.1155, "step": 966, "time": 46.22 }, { "epoch": 0.81, "learning_rate": "2.6674e-05", "loss": 0.1153, "slid_loss": 0.1155, "step": 967, "time": 45.77 }, { "epoch": 0.82, "learning_rate": "2.6615e-05", "loss": 0.1231, "slid_loss": 0.1155, "step": 968, "time": 47.15 }, { "epoch": 0.82, "learning_rate": "2.6557e-05", "loss": 0.1187, "slid_loss": 0.1155, "step": 969, "time": 47.57 }, { "epoch": 0.82, "learning_rate": "2.6498e-05", "loss": 0.1167, "slid_loss": 0.1155, "step": 970, "time": 45.24 }, { "epoch": 0.82, "learning_rate": "2.6440e-05", "loss": 0.1168, "slid_loss": 0.1155, "step": 971, "time": 48.19 }, { "epoch": 0.82, "learning_rate": "2.6381e-05", "loss": 0.1163, "slid_loss": 0.1155, "step": 972, "time": 50.11 }, { "epoch": 0.82, "learning_rate": "2.6324e-05", "loss": 0.1194, "slid_loss": 0.1155, "step": 973, "time": 46.14 }, { "epoch": 0.82, "learning_rate": "2.6266e-05", "loss": 0.1256, "slid_loss": 0.1155, "step": 974, "time": 45.84 }, { "epoch": 0.82, "learning_rate": "2.6209e-05", "loss": 0.1228, "slid_loss": 0.1156, "step": 975, "time": 45.73 }, { "epoch": 0.82, "learning_rate": "2.6151e-05", "loss": 0.1255, "slid_loss": 0.1156, "step": 976, "time": 46.78 }, { "epoch": 0.82, "learning_rate": "2.6094e-05", "loss": 0.12, "slid_loss": 0.1157, "step": 977, "time": 50.52 }, { "epoch": 0.82, "learning_rate": "2.6038e-05", "loss": 0.1192, "slid_loss": 0.1157, "step": 978, "time": 46.46 }, { "epoch": 0.82, "learning_rate": "2.5981e-05", "loss": 0.1104, "slid_loss": 0.1156, "step": 979, "time": 47.8 }, { "epoch": 0.83, "learning_rate": "2.5925e-05", "loss": 0.1197, "slid_loss": 0.1156, "step": 980, "time": 46.11 }, { "epoch": 0.83, "learning_rate": "2.5869e-05", "loss": 0.1053, "slid_loss": 0.1155, "step": 981, "time": 48.43 }, { "epoch": 0.83, "learning_rate": "2.5814e-05", "loss": 0.1098, "slid_loss": 0.1154, "step": 982, "time": 47.15 }, { "epoch": 0.83, "learning_rate": "2.5758e-05", "loss": 0.1153, "slid_loss": 0.1153, "step": 983, "time": 47.75 }, { "epoch": 0.83, "learning_rate": "2.5703e-05", "loss": 0.1203, "slid_loss": 0.1154, "step": 984, "time": 45.52 }, { "epoch": 0.83, "learning_rate": "2.5648e-05", "loss": 0.1182, "slid_loss": 0.1154, "step": 985, "time": 46.57 }, { "epoch": 0.83, "learning_rate": "2.5593e-05", "loss": 0.1094, "slid_loss": 0.1153, "step": 986, "time": 47.1 }, { "epoch": 0.83, "learning_rate": "2.5539e-05", "loss": 0.1139, "slid_loss": 0.1153, "step": 987, "time": 47.68 }, { "epoch": 0.83, "learning_rate": "2.5485e-05", "loss": 0.1116, "slid_loss": 0.1152, "step": 988, "time": 44.93 }, { "epoch": 0.83, "learning_rate": "2.5431e-05", "loss": 0.1165, "slid_loss": 0.1152, "step": 989, "time": 47.18 }, { "epoch": 0.83, "learning_rate": "2.5377e-05", "loss": 0.116, "slid_loss": 0.1153, "step": 990, "time": 48.04 }, { "epoch": 0.83, "learning_rate": "2.5323e-05", "loss": 0.1196, "slid_loss": 0.1154, "step": 991, "time": 46.07 }, { "epoch": 0.84, "learning_rate": "2.5270e-05", "loss": 0.1067, "slid_loss": 0.1153, "step": 992, "time": 46.95 }, { "epoch": 0.84, "learning_rate": "2.5217e-05", "loss": 0.1148, "slid_loss": 0.1153, "step": 993, "time": 46.44 }, { "epoch": 0.84, "learning_rate": "2.5164e-05", "loss": 0.1128, "slid_loss": 0.1153, "step": 994, "time": 47.29 }, { "epoch": 0.84, "learning_rate": "2.5112e-05", "loss": 0.1214, "slid_loss": 0.1154, "step": 995, "time": 45.88 }, { "epoch": 0.84, "learning_rate": "2.5060e-05", "loss": 0.1116, "slid_loss": 0.1154, "step": 996, "time": 48.57 }, { "epoch": 0.84, "learning_rate": "2.5008e-05", "loss": 0.106, "slid_loss": 0.1153, "step": 997, "time": 46.91 }, { "epoch": 0.84, "learning_rate": "2.4956e-05", "loss": 0.1137, "slid_loss": 0.1153, "step": 998, "time": 46.65 }, { "epoch": 0.84, "learning_rate": "2.4905e-05", "loss": 0.1185, "slid_loss": 0.1153, "step": 999, "time": 48.06 }, { "epoch": 0.84, "learning_rate": "2.4853e-05", "loss": 0.1116, "slid_loss": 0.1152, "step": 1000, "time": 47.62 }, { "epoch": 0.84, "learning_rate": "2.4802e-05", "loss": 0.1087, "slid_loss": 0.1151, "step": 1001, "time": 47.27 }, { "epoch": 0.84, "learning_rate": "2.4752e-05", "loss": 0.1241, "slid_loss": 0.1152, "step": 1002, "time": 46.49 }, { "epoch": 0.85, "learning_rate": "2.4701e-05", "loss": 0.1145, "slid_loss": 0.1152, "step": 1003, "time": 46.74 }, { "epoch": 0.85, "learning_rate": "2.4651e-05", "loss": 0.1114, "slid_loss": 0.1151, "step": 1004, "time": 48.33 }, { "epoch": 0.85, "learning_rate": "2.4601e-05", "loss": 0.1096, "slid_loss": 0.1151, "step": 1005, "time": 46.77 }, { "epoch": 0.85, "learning_rate": "2.4551e-05", "loss": 0.1115, "slid_loss": 0.115, "step": 1006, "time": 46.68 }, { "epoch": 0.85, "learning_rate": "2.4502e-05", "loss": 0.1132, "slid_loss": 0.115, "step": 1007, "time": 48.08 }, { "epoch": 0.85, "learning_rate": "2.4453e-05", "loss": 0.1218, "slid_loss": 0.1151, "step": 1008, "time": 47.73 }, { "epoch": 0.85, "learning_rate": "2.4404e-05", "loss": 0.1179, "slid_loss": 0.1151, "step": 1009, "time": 47.98 }, { "epoch": 0.85, "learning_rate": "2.4355e-05", "loss": 0.1096, "slid_loss": 0.1151, "step": 1010, "time": 46.23 }, { "epoch": 0.85, "learning_rate": "2.4306e-05", "loss": 0.1202, "slid_loss": 0.1152, "step": 1011, "time": 45.19 }, { "epoch": 0.85, "learning_rate": "2.4258e-05", "loss": 0.1154, "slid_loss": 0.1151, "step": 1012, "time": 46.47 }, { "epoch": 0.85, "learning_rate": "2.4210e-05", "loss": 0.1181, "slid_loss": 0.1151, "step": 1013, "time": 47.2 }, { "epoch": 0.85, "learning_rate": "2.4163e-05", "loss": 0.1154, "slid_loss": 0.1151, "step": 1014, "time": 46.84 }, { "epoch": 0.86, "learning_rate": "2.4115e-05", "loss": 0.1129, "slid_loss": 0.1151, "step": 1015, "time": 45.4 }, { "epoch": 0.86, "learning_rate": "2.4068e-05", "loss": 0.1159, "slid_loss": 0.1151, "step": 1016, "time": 49.58 }, { "epoch": 0.86, "learning_rate": "2.4021e-05", "loss": 0.1113, "slid_loss": 0.1151, "step": 1017, "time": 48.75 }, { "epoch": 0.86, "learning_rate": "2.3975e-05", "loss": 0.1106, "slid_loss": 0.1152, "step": 1018, "time": 46.42 }, { "epoch": 0.86, "learning_rate": "2.3928e-05", "loss": 0.1189, "slid_loss": 0.1152, "step": 1019, "time": 45.68 }, { "epoch": 0.86, "learning_rate": "2.3882e-05", "loss": 0.1173, "slid_loss": 0.1153, "step": 1020, "time": 47.61 }, { "epoch": 0.86, "learning_rate": "2.3836e-05", "loss": 0.1178, "slid_loss": 0.1153, "step": 1021, "time": 46.65 }, { "epoch": 0.86, "learning_rate": "2.3791e-05", "loss": 0.1111, "slid_loss": 0.1153, "step": 1022, "time": 47.24 }, { "epoch": 0.86, "learning_rate": "2.3745e-05", "loss": 0.1213, "slid_loss": 0.1153, "step": 1023, "time": 46.95 }, { "epoch": 0.86, "learning_rate": "2.3700e-05", "loss": 0.1255, "slid_loss": 0.1155, "step": 1024, "time": 46.45 }, { "epoch": 0.86, "learning_rate": "2.3655e-05", "loss": 0.121, "slid_loss": 0.1155, "step": 1025, "time": 46.76 }, { "epoch": 0.86, "learning_rate": "2.3611e-05", "loss": 0.1161, "slid_loss": 0.1156, "step": 1026, "time": 46.05 }, { "epoch": 0.87, "learning_rate": "2.3566e-05", "loss": 0.1122, "slid_loss": 0.1155, "step": 1027, "time": 48.79 }, { "epoch": 0.87, "learning_rate": "2.3522e-05", "loss": 0.1145, "slid_loss": 0.1154, "step": 1028, "time": 47.81 }, { "epoch": 0.87, "learning_rate": "2.3479e-05", "loss": 0.1181, "slid_loss": 0.1154, "step": 1029, "time": 47.66 }, { "epoch": 0.87, "learning_rate": "2.3435e-05", "loss": 0.1125, "slid_loss": 0.1155, "step": 1030, "time": 47.31 }, { "epoch": 0.87, "learning_rate": "2.3392e-05", "loss": 0.1113, "slid_loss": 0.1154, "step": 1031, "time": 46.28 }, { "epoch": 0.87, "learning_rate": "2.3349e-05", "loss": 0.1224, "slid_loss": 0.1154, "step": 1032, "time": 45.2 }, { "epoch": 0.87, "learning_rate": "2.3306e-05", "loss": 0.1088, "slid_loss": 0.1153, "step": 1033, "time": 48.14 }, { "epoch": 0.87, "learning_rate": "2.3264e-05", "loss": 0.1111, "slid_loss": 0.1153, "step": 1034, "time": 45.16 }, { "epoch": 0.87, "learning_rate": "2.3221e-05", "loss": 0.1147, "slid_loss": 0.1153, "step": 1035, "time": 46.36 }, { "epoch": 0.87, "learning_rate": "2.3179e-05", "loss": 0.1126, "slid_loss": 0.1152, "step": 1036, "time": 46.2 }, { "epoch": 0.87, "learning_rate": "2.3138e-05", "loss": 0.12, "slid_loss": 0.1152, "step": 1037, "time": 47.44 }, { "epoch": 0.87, "learning_rate": "2.3096e-05", "loss": 0.1141, "slid_loss": 0.1152, "step": 1038, "time": 49.26 }, { "epoch": 0.88, "learning_rate": "2.3055e-05", "loss": 0.1162, "slid_loss": 0.1152, "step": 1039, "time": 48.24 }, { "epoch": 0.88, "learning_rate": "2.3014e-05", "loss": 0.1146, "slid_loss": 0.1153, "step": 1040, "time": 45.57 }, { "epoch": 0.88, "learning_rate": "2.2974e-05", "loss": 0.1096, "slid_loss": 0.1152, "step": 1041, "time": 46.74 }, { "epoch": 0.88, "learning_rate": "2.2933e-05", "loss": 0.1071, "slid_loss": 0.1151, "step": 1042, "time": 46.73 }, { "epoch": 0.88, "learning_rate": "2.2893e-05", "loss": 0.123, "slid_loss": 0.1151, "step": 1043, "time": 46.23 }, { "epoch": 0.88, "learning_rate": "2.2853e-05", "loss": 0.11, "slid_loss": 0.115, "step": 1044, "time": 48.14 }, { "epoch": 0.88, "learning_rate": "2.2814e-05", "loss": 0.1118, "slid_loss": 0.1149, "step": 1045, "time": 44.74 }, { "epoch": 0.88, "learning_rate": "2.2774e-05", "loss": 0.1162, "slid_loss": 0.1149, "step": 1046, "time": 46.83 }, { "epoch": 0.88, "learning_rate": "2.2735e-05", "loss": 0.1075, "slid_loss": 0.1148, "step": 1047, "time": 49.54 }, { "epoch": 0.88, "learning_rate": "2.2697e-05", "loss": 0.1115, "slid_loss": 0.1149, "step": 1048, "time": 46.73 }, { "epoch": 0.88, "learning_rate": "2.2658e-05", "loss": 0.1076, "slid_loss": 0.1148, "step": 1049, "time": 46.04 }, { "epoch": 0.88, "learning_rate": "2.2620e-05", "loss": 0.1045, "slid_loss": 0.1147, "step": 1050, "time": 46.85 }, { "epoch": 0.89, "learning_rate": "2.2582e-05", "loss": 0.1134, "slid_loss": 0.1147, "step": 1051, "time": 48.16 }, { "epoch": 0.89, "learning_rate": "2.2544e-05", "loss": 0.1146, "slid_loss": 0.1147, "step": 1052, "time": 44.94 }, { "epoch": 0.89, "learning_rate": "2.2507e-05", "loss": 0.1182, "slid_loss": 0.1147, "step": 1053, "time": 48.19 }, { "epoch": 0.89, "learning_rate": "2.2470e-05", "loss": 0.1172, "slid_loss": 0.1148, "step": 1054, "time": 48.89 }, { "epoch": 0.89, "learning_rate": "2.2433e-05", "loss": 0.1189, "slid_loss": 0.1148, "step": 1055, "time": 47.75 }, { "epoch": 0.89, "learning_rate": "2.2396e-05", "loss": 0.1113, "slid_loss": 0.1148, "step": 1056, "time": 47.32 }, { "epoch": 0.89, "learning_rate": "2.2360e-05", "loss": 0.1136, "slid_loss": 0.1148, "step": 1057, "time": 45.24 }, { "epoch": 0.89, "learning_rate": "2.2324e-05", "loss": 0.1161, "slid_loss": 0.1149, "step": 1058, "time": 47.15 }, { "epoch": 0.89, "learning_rate": "2.2288e-05", "loss": 0.1152, "slid_loss": 0.1149, "step": 1059, "time": 46.34 }, { "epoch": 0.89, "learning_rate": "2.2252e-05", "loss": 0.1129, "slid_loss": 0.1148, "step": 1060, "time": 47.62 }, { "epoch": 0.89, "learning_rate": "2.2217e-05", "loss": 0.1196, "slid_loss": 0.1149, "step": 1061, "time": 46.57 }, { "epoch": 0.89, "learning_rate": "2.2182e-05", "loss": 0.1145, "slid_loss": 0.115, "step": 1062, "time": 48.06 }, { "epoch": 0.9, "learning_rate": "2.2147e-05", "loss": 0.1085, "slid_loss": 0.115, "step": 1063, "time": 46.09 }, { "epoch": 0.9, "learning_rate": "2.2113e-05", "loss": 0.1171, "slid_loss": 0.115, "step": 1064, "time": 46.02 }, { "epoch": 0.9, "learning_rate": "2.2079e-05", "loss": 0.1189, "slid_loss": 0.1152, "step": 1065, "time": 47.42 }, { "epoch": 0.9, "learning_rate": "2.2045e-05", "loss": 0.112, "slid_loss": 0.1151, "step": 1066, "time": 47.54 }, { "epoch": 0.9, "learning_rate": "2.2011e-05", "loss": 0.1182, "slid_loss": 0.1151, "step": 1067, "time": 47.85 }, { "epoch": 0.9, "learning_rate": "2.1978e-05", "loss": 0.116, "slid_loss": 0.115, "step": 1068, "time": 47.25 }, { "epoch": 0.9, "learning_rate": "2.1945e-05", "loss": 0.1126, "slid_loss": 0.115, "step": 1069, "time": 46.86 }, { "epoch": 0.9, "learning_rate": "2.1912e-05", "loss": 0.112, "slid_loss": 0.1149, "step": 1070, "time": 49.15 }, { "epoch": 0.9, "learning_rate": "2.1879e-05", "loss": 0.1167, "slid_loss": 0.1149, "step": 1071, "time": 46.98 }, { "epoch": 0.9, "learning_rate": "2.1847e-05", "loss": 0.1098, "slid_loss": 0.1149, "step": 1072, "time": 45.64 }, { "epoch": 0.9, "learning_rate": "2.1815e-05", "loss": 0.1188, "slid_loss": 0.1148, "step": 1073, "time": 46.94 }, { "epoch": 0.9, "learning_rate": "2.1783e-05", "loss": 0.1207, "slid_loss": 0.1148, "step": 1074, "time": 45.65 }, { "epoch": 0.91, "learning_rate": "2.1752e-05", "loss": 0.1143, "slid_loss": 0.1147, "step": 1075, "time": 45.88 }, { "epoch": 0.91, "learning_rate": "2.1720e-05", "loss": 0.1115, "slid_loss": 0.1146, "step": 1076, "time": 46.22 }, { "epoch": 0.91, "learning_rate": "2.1690e-05", "loss": 0.1112, "slid_loss": 0.1145, "step": 1077, "time": 45.05 }, { "epoch": 0.91, "learning_rate": "2.1659e-05", "loss": 0.1088, "slid_loss": 0.1144, "step": 1078, "time": 47.14 }, { "epoch": 0.91, "learning_rate": "2.1629e-05", "loss": 0.1157, "slid_loss": 0.1144, "step": 1079, "time": 46.14 }, { "epoch": 0.91, "learning_rate": "2.1598e-05", "loss": 0.1133, "slid_loss": 0.1144, "step": 1080, "time": 47.65 }, { "epoch": 0.91, "learning_rate": "2.1569e-05", "loss": 0.1097, "slid_loss": 0.1144, "step": 1081, "time": 46.47 }, { "epoch": 0.91, "learning_rate": "2.1539e-05", "loss": 0.1146, "slid_loss": 0.1145, "step": 1082, "time": 47.5 }, { "epoch": 0.91, "learning_rate": "2.1510e-05", "loss": 0.1162, "slid_loss": 0.1145, "step": 1083, "time": 45.48 }, { "epoch": 0.91, "learning_rate": "2.1481e-05", "loss": 0.1179, "slid_loss": 0.1145, "step": 1084, "time": 47.38 }, { "epoch": 0.91, "learning_rate": "2.1452e-05", "loss": 0.1112, "slid_loss": 0.1144, "step": 1085, "time": 46.7 }, { "epoch": 0.92, "learning_rate": "2.1424e-05", "loss": 0.1093, "slid_loss": 0.1144, "step": 1086, "time": 49.39 }, { "epoch": 0.92, "learning_rate": "2.1395e-05", "loss": 0.1179, "slid_loss": 0.1144, "step": 1087, "time": 45.56 }, { "epoch": 0.92, "learning_rate": "2.1368e-05", "loss": 0.1082, "slid_loss": 0.1144, "step": 1088, "time": 47.11 }, { "epoch": 0.92, "learning_rate": "2.1340e-05", "loss": 0.1225, "slid_loss": 0.1144, "step": 1089, "time": 47.24 }, { "epoch": 0.92, "learning_rate": "2.1313e-05", "loss": 0.1201, "slid_loss": 0.1145, "step": 1090, "time": 47.99 }, { "epoch": 0.92, "learning_rate": "2.1286e-05", "loss": 0.1151, "slid_loss": 0.1144, "step": 1091, "time": 45.78 }, { "epoch": 0.92, "learning_rate": "2.1259e-05", "loss": 0.1146, "slid_loss": 0.1145, "step": 1092, "time": 46.39 }, { "epoch": 0.92, "learning_rate": "2.1232e-05", "loss": 0.1114, "slid_loss": 0.1145, "step": 1093, "time": 49.06 }, { "epoch": 0.92, "learning_rate": "2.1206e-05", "loss": 0.1182, "slid_loss": 0.1145, "step": 1094, "time": 47.98 }, { "epoch": 0.92, "learning_rate": "2.1180e-05", "loss": 0.1196, "slid_loss": 0.1145, "step": 1095, "time": 45.44 }, { "epoch": 0.92, "learning_rate": "2.1154e-05", "loss": 0.1152, "slid_loss": 0.1146, "step": 1096, "time": 46.09 }, { "epoch": 0.92, "learning_rate": "2.1129e-05", "loss": 0.1139, "slid_loss": 0.1146, "step": 1097, "time": 46.07 }, { "epoch": 0.93, "learning_rate": "2.1104e-05", "loss": 0.1157, "slid_loss": 0.1147, "step": 1098, "time": 47.39 }, { "epoch": 0.93, "learning_rate": "2.1079e-05", "loss": 0.1163, "slid_loss": 0.1146, "step": 1099, "time": 45.75 }, { "epoch": 0.93, "learning_rate": "2.1055e-05", "loss": 0.113, "slid_loss": 0.1146, "step": 1100, "time": 47.66 }, { "epoch": 0.93, "learning_rate": "2.1030e-05", "loss": 0.1101, "slid_loss": 0.1147, "step": 1101, "time": 45.75 }, { "epoch": 0.93, "learning_rate": "2.1006e-05", "loss": 0.1153, "slid_loss": 0.1146, "step": 1102, "time": 46.29 }, { "epoch": 0.93, "learning_rate": "2.0983e-05", "loss": 0.1164, "slid_loss": 0.1146, "step": 1103, "time": 45.14 }, { "epoch": 0.93, "learning_rate": "2.0959e-05", "loss": 0.1127, "slid_loss": 0.1146, "step": 1104, "time": 48.06 }, { "epoch": 0.93, "learning_rate": "2.0936e-05", "loss": 0.1137, "slid_loss": 0.1146, "step": 1105, "time": 46.65 }, { "epoch": 0.93, "learning_rate": "2.0913e-05", "loss": 0.1248, "slid_loss": 0.1148, "step": 1106, "time": 46.96 }, { "epoch": 0.93, "learning_rate": "2.0890e-05", "loss": 0.1099, "slid_loss": 0.1147, "step": 1107, "time": 48.73 }, { "epoch": 0.93, "learning_rate": "2.0868e-05", "loss": 0.1062, "slid_loss": 0.1146, "step": 1108, "time": 46.86 }, { "epoch": 0.93, "learning_rate": "2.0846e-05", "loss": 0.1138, "slid_loss": 0.1146, "step": 1109, "time": 46.17 }, { "epoch": 0.94, "learning_rate": "2.0824e-05", "loss": 0.1239, "slid_loss": 0.1147, "step": 1110, "time": 46.76 }, { "epoch": 0.94, "learning_rate": "2.0803e-05", "loss": 0.1187, "slid_loss": 0.1147, "step": 1111, "time": 47.0 }, { "epoch": 0.94, "learning_rate": "2.0782e-05", "loss": 0.1144, "slid_loss": 0.1147, "step": 1112, "time": 46.01 }, { "epoch": 0.94, "learning_rate": "2.0761e-05", "loss": 0.1121, "slid_loss": 0.1146, "step": 1113, "time": 46.16 }, { "epoch": 0.94, "learning_rate": "2.0740e-05", "loss": 0.1023, "slid_loss": 0.1145, "step": 1114, "time": 46.6 }, { "epoch": 0.94, "learning_rate": "2.0720e-05", "loss": 0.1192, "slid_loss": 0.1145, "step": 1115, "time": 46.4 }, { "epoch": 0.94, "learning_rate": "2.0700e-05", "loss": 0.1215, "slid_loss": 0.1146, "step": 1116, "time": 45.24 }, { "epoch": 0.94, "learning_rate": "2.0680e-05", "loss": 0.1172, "slid_loss": 0.1147, "step": 1117, "time": 46.4 }, { "epoch": 0.94, "learning_rate": "2.0660e-05", "loss": 0.1107, "slid_loss": 0.1147, "step": 1118, "time": 45.15 }, { "epoch": 0.94, "learning_rate": "2.0641e-05", "loss": 0.1127, "slid_loss": 0.1146, "step": 1119, "time": 48.25 }, { "epoch": 0.94, "learning_rate": "2.0622e-05", "loss": 0.1221, "slid_loss": 0.1146, "step": 1120, "time": 45.73 }, { "epoch": 0.94, "learning_rate": "2.0604e-05", "loss": 0.1115, "slid_loss": 0.1146, "step": 1121, "time": 47.47 }, { "epoch": 0.95, "learning_rate": "2.0585e-05", "loss": 0.1149, "slid_loss": 0.1146, "step": 1122, "time": 46.26 }, { "epoch": 0.95, "learning_rate": "2.0567e-05", "loss": 0.12, "slid_loss": 0.1146, "step": 1123, "time": 44.85 }, { "epoch": 0.95, "learning_rate": "2.0549e-05", "loss": 0.1147, "slid_loss": 0.1145, "step": 1124, "time": 48.0 }, { "epoch": 0.95, "learning_rate": "2.0532e-05", "loss": 0.1107, "slid_loss": 0.1144, "step": 1125, "time": 46.44 }, { "epoch": 0.95, "learning_rate": "2.0514e-05", "loss": 0.1048, "slid_loss": 0.1143, "step": 1126, "time": 47.71 }, { "epoch": 0.95, "learning_rate": "2.0498e-05", "loss": 0.1152, "slid_loss": 0.1143, "step": 1127, "time": 47.88 }, { "epoch": 0.95, "learning_rate": "2.0481e-05", "loss": 0.1091, "slid_loss": 0.1143, "step": 1128, "time": 47.35 }, { "epoch": 0.95, "learning_rate": "2.0464e-05", "loss": 0.1254, "slid_loss": 0.1143, "step": 1129, "time": 47.7 }, { "epoch": 0.95, "learning_rate": "2.0448e-05", "loss": 0.1193, "slid_loss": 0.1144, "step": 1130, "time": 45.63 }, { "epoch": 0.95, "learning_rate": "2.0432e-05", "loss": 0.1174, "slid_loss": 0.1145, "step": 1131, "time": 46.13 }, { "epoch": 0.95, "learning_rate": "2.0417e-05", "loss": 0.1174, "slid_loss": 0.1144, "step": 1132, "time": 46.23 }, { "epoch": 0.95, "learning_rate": "2.0402e-05", "loss": 0.1123, "slid_loss": 0.1144, "step": 1133, "time": 47.42 }, { "epoch": 0.96, "learning_rate": "2.0387e-05", "loss": 0.1105, "slid_loss": 0.1144, "step": 1134, "time": 47.95 }, { "epoch": 0.96, "learning_rate": "2.0372e-05", "loss": 0.1154, "slid_loss": 0.1144, "step": 1135, "time": 45.59 }, { "epoch": 0.96, "learning_rate": "2.0358e-05", "loss": 0.1167, "slid_loss": 0.1145, "step": 1136, "time": 47.05 }, { "epoch": 0.96, "learning_rate": "2.0343e-05", "loss": 0.1145, "slid_loss": 0.1144, "step": 1137, "time": 47.51 }, { "epoch": 0.96, "learning_rate": "2.0330e-05", "loss": 0.1123, "slid_loss": 0.1144, "step": 1138, "time": 47.18 }, { "epoch": 0.96, "learning_rate": "2.0316e-05", "loss": 0.1078, "slid_loss": 0.1143, "step": 1139, "time": 48.45 }, { "epoch": 0.96, "learning_rate": "2.0303e-05", "loss": 0.1095, "slid_loss": 0.1143, "step": 1140, "time": 46.43 }, { "epoch": 0.96, "learning_rate": "2.0290e-05", "loss": 0.1101, "slid_loss": 0.1143, "step": 1141, "time": 46.39 }, { "epoch": 0.96, "learning_rate": "2.0277e-05", "loss": 0.1234, "slid_loss": 0.1144, "step": 1142, "time": 46.22 }, { "epoch": 0.96, "learning_rate": "2.0265e-05", "loss": 0.1055, "slid_loss": 0.1143, "step": 1143, "time": 48.46 }, { "epoch": 0.96, "learning_rate": "2.0252e-05", "loss": 0.1198, "slid_loss": 0.1144, "step": 1144, "time": 46.7 }, { "epoch": 0.96, "learning_rate": "2.0241e-05", "loss": 0.1056, "slid_loss": 0.1143, "step": 1145, "time": 45.29 }, { "epoch": 0.97, "learning_rate": "2.0229e-05", "loss": 0.116, "slid_loss": 0.1143, "step": 1146, "time": 46.78 }, { "epoch": 0.97, "learning_rate": "2.0218e-05", "loss": 0.1062, "slid_loss": 0.1143, "step": 1147, "time": 47.94 }, { "epoch": 0.97, "learning_rate": "2.0207e-05", "loss": 0.1157, "slid_loss": 0.1143, "step": 1148, "time": 46.77 }, { "epoch": 0.97, "learning_rate": "2.0196e-05", "loss": 0.1127, "slid_loss": 0.1144, "step": 1149, "time": 45.72 }, { "epoch": 0.97, "learning_rate": "2.0185e-05", "loss": 0.1199, "slid_loss": 0.1145, "step": 1150, "time": 47.32 }, { "epoch": 0.97, "learning_rate": "2.0175e-05", "loss": 0.1127, "slid_loss": 0.1145, "step": 1151, "time": 44.74 }, { "epoch": 0.97, "learning_rate": "2.0165e-05", "loss": 0.1115, "slid_loss": 0.1145, "step": 1152, "time": 45.66 }, { "epoch": 0.97, "learning_rate": "2.0156e-05", "loss": 0.1042, "slid_loss": 0.1144, "step": 1153, "time": 45.44 }, { "epoch": 0.97, "learning_rate": "2.0147e-05", "loss": 0.1144, "slid_loss": 0.1143, "step": 1154, "time": 47.6 }, { "epoch": 0.97, "learning_rate": "2.0138e-05", "loss": 0.1159, "slid_loss": 0.1143, "step": 1155, "time": 46.33 }, { "epoch": 0.97, "learning_rate": "2.0129e-05", "loss": 0.1123, "slid_loss": 0.1143, "step": 1156, "time": 47.49 }, { "epoch": 0.97, "learning_rate": "2.0120e-05", "loss": 0.1144, "slid_loss": 0.1143, "step": 1157, "time": 47.53 }, { "epoch": 0.98, "learning_rate": "2.0112e-05", "loss": 0.1138, "slid_loss": 0.1143, "step": 1158, "time": 47.61 }, { "epoch": 0.98, "learning_rate": "2.0104e-05", "loss": 0.1161, "slid_loss": 0.1143, "step": 1159, "time": 47.37 }, { "epoch": 0.98, "learning_rate": "2.0097e-05", "loss": 0.1179, "slid_loss": 0.1144, "step": 1160, "time": 46.91 }, { "epoch": 0.98, "learning_rate": "2.0089e-05", "loss": 0.119, "slid_loss": 0.1144, "step": 1161, "time": 45.47 }, { "epoch": 0.98, "learning_rate": "2.0082e-05", "loss": 0.1113, "slid_loss": 0.1143, "step": 1162, "time": 47.74 }, { "epoch": 0.98, "learning_rate": "2.0076e-05", "loss": 0.1136, "slid_loss": 0.1144, "step": 1163, "time": 45.57 }, { "epoch": 0.98, "learning_rate": "2.0069e-05", "loss": 0.1156, "slid_loss": 0.1144, "step": 1164, "time": 47.75 }, { "epoch": 0.98, "learning_rate": "2.0063e-05", "loss": 0.118, "slid_loss": 0.1143, "step": 1165, "time": 46.14 }, { "epoch": 0.98, "learning_rate": "2.0057e-05", "loss": 0.1091, "slid_loss": 0.1143, "step": 1166, "time": 45.85 }, { "epoch": 0.98, "learning_rate": "2.0052e-05", "loss": 0.1072, "slid_loss": 0.1142, "step": 1167, "time": 46.69 }, { "epoch": 0.98, "learning_rate": "2.0046e-05", "loss": 0.1128, "slid_loss": 0.1142, "step": 1168, "time": 47.05 }, { "epoch": 0.98, "learning_rate": "2.0041e-05", "loss": 0.1165, "slid_loss": 0.1142, "step": 1169, "time": 48.52 }, { "epoch": 0.99, "learning_rate": "2.0037e-05", "loss": 0.1176, "slid_loss": 0.1143, "step": 1170, "time": 45.19 }, { "epoch": 0.99, "learning_rate": "2.0032e-05", "loss": 0.1156, "slid_loss": 0.1143, "step": 1171, "time": 48.84 }, { "epoch": 0.99, "learning_rate": "2.0028e-05", "loss": 0.1182, "slid_loss": 0.1143, "step": 1172, "time": 46.05 }, { "epoch": 0.99, "learning_rate": "2.0024e-05", "loss": 0.1156, "slid_loss": 0.1143, "step": 1173, "time": 45.28 }, { "epoch": 0.99, "learning_rate": "2.0021e-05", "loss": 0.1119, "slid_loss": 0.1142, "step": 1174, "time": 47.03 }, { "epoch": 0.99, "learning_rate": "2.0017e-05", "loss": 0.1131, "slid_loss": 0.1142, "step": 1175, "time": 46.41 }, { "epoch": 0.99, "learning_rate": "2.0014e-05", "loss": 0.1171, "slid_loss": 0.1143, "step": 1176, "time": 47.64 }, { "epoch": 0.99, "learning_rate": "2.0012e-05", "loss": 0.1137, "slid_loss": 0.1143, "step": 1177, "time": 47.6 }, { "epoch": 0.99, "learning_rate": "2.0009e-05", "loss": 0.1142, "slid_loss": 0.1143, "step": 1178, "time": 45.89 }, { "epoch": 0.99, "learning_rate": "2.0007e-05", "loss": 0.1107, "slid_loss": 0.1143, "step": 1179, "time": 47.36 }, { "epoch": 0.99, "learning_rate": "2.0005e-05", "loss": 0.1177, "slid_loss": 0.1143, "step": 1180, "time": 46.62 }, { "epoch": 1.0, "learning_rate": "2.0004e-05", "loss": 0.1089, "slid_loss": 0.1143, "step": 1181, "time": 47.25 }, { "epoch": 1.0, "learning_rate": "2.0002e-05", "loss": 0.1056, "slid_loss": 0.1142, "step": 1182, "time": 47.04 }, { "epoch": 1.0, "learning_rate": "2.0001e-05", "loss": 0.1204, "slid_loss": 0.1143, "step": 1183, "time": 47.78 }, { "epoch": 1.0, "learning_rate": "2.0001e-05", "loss": 0.1193, "slid_loss": 0.1143, "step": 1184, "time": 46.95 }, { "epoch": 1.0, "learning_rate": "2.0000e-05", "loss": 0.114, "slid_loss": 0.1143, "step": 1185, "time": 45.8 }, { "epoch": 1.0, "learning_rate": "2.0000e-05", "loss": 0.1138, "slid_loss": 0.1144, "step": 1186, "time": 47.2 }, { "epoch": 1.0, "step": 1186, "time": 1.43, "total_flos": 0.0, "train_loss": 0.12679563476554861, "train_runtime": 55610.9881, "train_samples_per_second": 5.464, "train_steps_per_second": 0.021 } ], "logging_steps": 1.0, "max_steps": 1186, "num_train_epochs": 1, "save_steps": 600, "total_flos": 0.0, "trial_name": null, "trial_params": null }