diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,9 +1,9 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 2.5690430314707773, + "epoch": 2.772002772002772, "eval_steps": 500, - "global_step": 1000, + "global_step": 13000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -1207,13 +1207,14413 @@ "learning_rate": 0.0001, "loss": 4.1717, "step": 1000 + }, + { + "epoch": 0.21, + "learning_rate": 9.571337172104927e-05, + "loss": 4.4083, + "step": 1005 + }, + { + "epoch": 0.22, + "learning_rate": 9.569204521219876e-05, + "loss": 4.1957, + "step": 1010 + }, + { + "epoch": 0.22, + "learning_rate": 9.567071870334826e-05, + "loss": 4.2654, + "step": 1015 + }, + { + "epoch": 0.22, + "learning_rate": 9.564939219449776e-05, + "loss": 4.251, + "step": 1020 + }, + { + "epoch": 0.22, + "learning_rate": 9.562806568564726e-05, + "loss": 4.2639, + "step": 1025 + }, + { + "epoch": 0.22, + "learning_rate": 9.560673917679676e-05, + "loss": 4.1973, + "step": 1030 + }, + { + "epoch": 0.22, + "learning_rate": 9.558541266794626e-05, + "loss": 4.129, + "step": 1035 + }, + { + "epoch": 0.22, + "learning_rate": 9.556408615909575e-05, + "loss": 4.2596, + "step": 1040 + }, + { + "epoch": 0.22, + "learning_rate": 9.554275965024526e-05, + "loss": 4.3568, + "step": 1045 + }, + { + "epoch": 0.22, + "learning_rate": 9.552143314139475e-05, + "loss": 4.3071, + "step": 1050 + }, + { + "epoch": 0.22, + "learning_rate": 9.550010663254426e-05, + "loss": 4.275, + "step": 1055 + }, + { + "epoch": 0.23, + "learning_rate": 9.547878012369376e-05, + "loss": 4.1531, + "step": 1060 + }, + { + "epoch": 0.23, + "learning_rate": 9.545745361484326e-05, + "loss": 4.2054, + "step": 1065 + }, + { + "epoch": 0.23, + "learning_rate": 9.543612710599276e-05, + "loss": 4.3501, + "step": 1070 + }, + { + "epoch": 0.23, + "learning_rate": 9.541480059714226e-05, + "loss": 4.2209, + "step": 1075 + }, + { + "epoch": 0.23, + "learning_rate": 9.539347408829176e-05, + "loss": 4.3029, + "step": 1080 + }, + { + "epoch": 0.23, + "learning_rate": 9.537214757944126e-05, + "loss": 4.2065, + "step": 1085 + }, + { + "epoch": 0.23, + "learning_rate": 9.535082107059075e-05, + "loss": 4.0332, + "step": 1090 + }, + { + "epoch": 0.23, + "learning_rate": 9.532949456174026e-05, + "loss": 4.2867, + "step": 1095 + }, + { + "epoch": 0.23, + "learning_rate": 9.530816805288975e-05, + "loss": 4.2094, + "step": 1100 + }, + { + "epoch": 0.24, + "learning_rate": 9.528684154403924e-05, + "loss": 4.2344, + "step": 1105 + }, + { + "epoch": 0.24, + "learning_rate": 9.526551503518875e-05, + "loss": 4.3236, + "step": 1110 + }, + { + "epoch": 0.24, + "learning_rate": 9.524418852633824e-05, + "loss": 4.3949, + "step": 1115 + }, + { + "epoch": 0.24, + "learning_rate": 9.522286201748775e-05, + "loss": 4.2918, + "step": 1120 + }, + { + "epoch": 0.24, + "learning_rate": 9.520153550863724e-05, + "loss": 4.1913, + "step": 1125 + }, + { + "epoch": 0.24, + "learning_rate": 9.518020899978674e-05, + "loss": 4.2062, + "step": 1130 + }, + { + "epoch": 0.24, + "learning_rate": 9.515888249093624e-05, + "loss": 4.2299, + "step": 1135 + }, + { + "epoch": 0.24, + "learning_rate": 9.513755598208574e-05, + "loss": 4.3388, + "step": 1140 + }, + { + "epoch": 0.24, + "learning_rate": 9.511622947323524e-05, + "loss": 4.1539, + "step": 1145 + }, + { + "epoch": 0.25, + "learning_rate": 9.509490296438474e-05, + "loss": 4.2461, + "step": 1150 + }, + { + "epoch": 0.25, + "learning_rate": 9.507357645553423e-05, + "loss": 4.2264, + "step": 1155 + }, + { + "epoch": 0.25, + "learning_rate": 9.505224994668374e-05, + "loss": 4.1781, + "step": 1160 + }, + { + "epoch": 0.25, + "learning_rate": 9.503092343783323e-05, + "loss": 4.2005, + "step": 1165 + }, + { + "epoch": 0.25, + "learning_rate": 9.500959692898272e-05, + "loss": 4.2533, + "step": 1170 + }, + { + "epoch": 0.25, + "learning_rate": 9.498827042013223e-05, + "loss": 4.1824, + "step": 1175 + }, + { + "epoch": 0.25, + "learning_rate": 9.496694391128172e-05, + "loss": 4.177, + "step": 1180 + }, + { + "epoch": 0.25, + "learning_rate": 9.494561740243123e-05, + "loss": 4.24, + "step": 1185 + }, + { + "epoch": 0.25, + "learning_rate": 9.492429089358072e-05, + "loss": 4.2081, + "step": 1190 + }, + { + "epoch": 0.25, + "learning_rate": 9.490296438473022e-05, + "loss": 4.3068, + "step": 1195 + }, + { + "epoch": 0.26, + "learning_rate": 9.488163787587972e-05, + "loss": 4.2077, + "step": 1200 + }, + { + "epoch": 0.26, + "learning_rate": 9.486031136702922e-05, + "loss": 4.2133, + "step": 1205 + }, + { + "epoch": 0.26, + "learning_rate": 9.483898485817872e-05, + "loss": 4.2082, + "step": 1210 + }, + { + "epoch": 0.26, + "learning_rate": 9.481765834932822e-05, + "loss": 4.1653, + "step": 1215 + }, + { + "epoch": 0.26, + "learning_rate": 9.479633184047771e-05, + "loss": 4.2722, + "step": 1220 + }, + { + "epoch": 0.26, + "learning_rate": 9.477500533162722e-05, + "loss": 4.2118, + "step": 1225 + }, + { + "epoch": 0.26, + "learning_rate": 9.475367882277671e-05, + "loss": 4.2188, + "step": 1230 + }, + { + "epoch": 0.26, + "learning_rate": 9.473235231392622e-05, + "loss": 4.2077, + "step": 1235 + }, + { + "epoch": 0.26, + "learning_rate": 9.471102580507571e-05, + "loss": 4.2659, + "step": 1240 + }, + { + "epoch": 0.27, + "learning_rate": 9.468969929622522e-05, + "loss": 4.2111, + "step": 1245 + }, + { + "epoch": 0.27, + "learning_rate": 9.466837278737471e-05, + "loss": 4.0976, + "step": 1250 + }, + { + "epoch": 0.27, + "learning_rate": 9.464704627852422e-05, + "loss": 4.1634, + "step": 1255 + }, + { + "epoch": 0.27, + "learning_rate": 9.462571976967371e-05, + "loss": 4.219, + "step": 1260 + }, + { + "epoch": 0.27, + "learning_rate": 9.460439326082322e-05, + "loss": 4.1829, + "step": 1265 + }, + { + "epoch": 0.27, + "learning_rate": 9.458306675197271e-05, + "loss": 4.2319, + "step": 1270 + }, + { + "epoch": 0.27, + "learning_rate": 9.456174024312222e-05, + "loss": 4.1322, + "step": 1275 + }, + { + "epoch": 0.27, + "learning_rate": 9.454041373427171e-05, + "loss": 4.1847, + "step": 1280 + }, + { + "epoch": 0.27, + "learning_rate": 9.45190872254212e-05, + "loss": 4.1901, + "step": 1285 + }, + { + "epoch": 0.28, + "learning_rate": 9.449776071657071e-05, + "loss": 4.2486, + "step": 1290 + }, + { + "epoch": 0.28, + "learning_rate": 9.44764342077202e-05, + "loss": 4.2425, + "step": 1295 + }, + { + "epoch": 0.28, + "learning_rate": 9.445510769886971e-05, + "loss": 4.2159, + "step": 1300 + }, + { + "epoch": 0.28, + "learning_rate": 9.44337811900192e-05, + "loss": 4.3078, + "step": 1305 + }, + { + "epoch": 0.28, + "learning_rate": 9.44124546811687e-05, + "loss": 4.2446, + "step": 1310 + }, + { + "epoch": 0.28, + "learning_rate": 9.43911281723182e-05, + "loss": 4.1374, + "step": 1315 + }, + { + "epoch": 0.28, + "learning_rate": 9.43698016634677e-05, + "loss": 4.1335, + "step": 1320 + }, + { + "epoch": 0.28, + "learning_rate": 9.434847515461719e-05, + "loss": 4.207, + "step": 1325 + }, + { + "epoch": 0.28, + "learning_rate": 9.43271486457667e-05, + "loss": 4.2393, + "step": 1330 + }, + { + "epoch": 0.28, + "learning_rate": 9.430582213691619e-05, + "loss": 4.2811, + "step": 1335 + }, + { + "epoch": 0.29, + "learning_rate": 9.42844956280657e-05, + "loss": 4.117, + "step": 1340 + }, + { + "epoch": 0.29, + "learning_rate": 9.426316911921519e-05, + "loss": 4.219, + "step": 1345 + }, + { + "epoch": 0.29, + "learning_rate": 9.424184261036468e-05, + "loss": 4.1704, + "step": 1350 + }, + { + "epoch": 0.29, + "learning_rate": 9.422051610151419e-05, + "loss": 4.1236, + "step": 1355 + }, + { + "epoch": 0.29, + "learning_rate": 9.419918959266368e-05, + "loss": 4.1909, + "step": 1360 + }, + { + "epoch": 0.29, + "learning_rate": 9.417786308381319e-05, + "loss": 4.1695, + "step": 1365 + }, + { + "epoch": 0.29, + "learning_rate": 9.415653657496268e-05, + "loss": 4.2374, + "step": 1370 + }, + { + "epoch": 0.29, + "learning_rate": 9.413521006611218e-05, + "loss": 4.1597, + "step": 1375 + }, + { + "epoch": 0.29, + "learning_rate": 9.411388355726168e-05, + "loss": 4.2953, + "step": 1380 + }, + { + "epoch": 0.3, + "learning_rate": 9.409255704841118e-05, + "loss": 4.2969, + "step": 1385 + }, + { + "epoch": 0.3, + "learning_rate": 9.407123053956067e-05, + "loss": 4.2616, + "step": 1390 + }, + { + "epoch": 0.3, + "learning_rate": 9.404990403071018e-05, + "loss": 4.1079, + "step": 1395 + }, + { + "epoch": 0.3, + "learning_rate": 9.402857752185967e-05, + "loss": 4.1583, + "step": 1400 + }, + { + "epoch": 0.3, + "learning_rate": 9.400725101300918e-05, + "loss": 4.2673, + "step": 1405 + }, + { + "epoch": 0.3, + "learning_rate": 9.398592450415867e-05, + "loss": 4.0916, + "step": 1410 + }, + { + "epoch": 0.3, + "learning_rate": 9.396459799530816e-05, + "loss": 4.2036, + "step": 1415 + }, + { + "epoch": 0.3, + "learning_rate": 9.394327148645767e-05, + "loss": 4.1299, + "step": 1420 + }, + { + "epoch": 0.3, + "learning_rate": 9.392194497760716e-05, + "loss": 4.1743, + "step": 1425 + }, + { + "epoch": 0.3, + "learning_rate": 9.390061846875667e-05, + "loss": 4.1222, + "step": 1430 + }, + { + "epoch": 0.31, + "learning_rate": 9.387929195990616e-05, + "loss": 4.2357, + "step": 1435 + }, + { + "epoch": 0.31, + "learning_rate": 9.385796545105567e-05, + "loss": 4.1811, + "step": 1440 + }, + { + "epoch": 0.31, + "learning_rate": 9.383663894220516e-05, + "loss": 4.2133, + "step": 1445 + }, + { + "epoch": 0.31, + "learning_rate": 9.381531243335467e-05, + "loss": 4.1, + "step": 1450 + }, + { + "epoch": 0.31, + "learning_rate": 9.379398592450416e-05, + "loss": 4.1854, + "step": 1455 + }, + { + "epoch": 0.31, + "learning_rate": 9.377265941565367e-05, + "loss": 4.2457, + "step": 1460 + }, + { + "epoch": 0.31, + "learning_rate": 9.375133290680316e-05, + "loss": 4.089, + "step": 1465 + }, + { + "epoch": 0.31, + "learning_rate": 9.373000639795267e-05, + "loss": 4.1352, + "step": 1470 + }, + { + "epoch": 0.31, + "learning_rate": 9.370867988910216e-05, + "loss": 4.1776, + "step": 1475 + }, + { + "epoch": 0.32, + "learning_rate": 9.368735338025166e-05, + "loss": 4.0168, + "step": 1480 + }, + { + "epoch": 0.32, + "learning_rate": 9.366602687140116e-05, + "loss": 4.2118, + "step": 1485 + }, + { + "epoch": 0.32, + "learning_rate": 9.364470036255066e-05, + "loss": 4.2413, + "step": 1490 + }, + { + "epoch": 0.32, + "learning_rate": 9.362337385370016e-05, + "loss": 4.1838, + "step": 1495 + }, + { + "epoch": 0.32, + "learning_rate": 9.360204734484966e-05, + "loss": 4.1858, + "step": 1500 + }, + { + "epoch": 0.32, + "learning_rate": 9.358072083599915e-05, + "loss": 4.1478, + "step": 1505 + }, + { + "epoch": 0.32, + "learning_rate": 9.355939432714866e-05, + "loss": 4.2252, + "step": 1510 + }, + { + "epoch": 0.32, + "learning_rate": 9.353806781829815e-05, + "loss": 4.1753, + "step": 1515 + }, + { + "epoch": 0.32, + "learning_rate": 9.351674130944766e-05, + "loss": 4.1992, + "step": 1520 + }, + { + "epoch": 0.33, + "learning_rate": 9.349541480059715e-05, + "loss": 4.0956, + "step": 1525 + }, + { + "epoch": 0.33, + "learning_rate": 9.347408829174664e-05, + "loss": 4.1486, + "step": 1530 + }, + { + "epoch": 0.33, + "learning_rate": 9.345276178289615e-05, + "loss": 4.113, + "step": 1535 + }, + { + "epoch": 0.33, + "learning_rate": 9.343143527404564e-05, + "loss": 4.1695, + "step": 1540 + }, + { + "epoch": 0.33, + "learning_rate": 9.341010876519514e-05, + "loss": 4.1399, + "step": 1545 + }, + { + "epoch": 0.33, + "learning_rate": 9.338878225634464e-05, + "loss": 4.1361, + "step": 1550 + }, + { + "epoch": 0.33, + "learning_rate": 9.336745574749414e-05, + "loss": 4.1291, + "step": 1555 + }, + { + "epoch": 0.33, + "learning_rate": 9.334612923864364e-05, + "loss": 4.1729, + "step": 1560 + }, + { + "epoch": 0.33, + "learning_rate": 9.332480272979314e-05, + "loss": 4.186, + "step": 1565 + }, + { + "epoch": 0.33, + "learning_rate": 9.330347622094263e-05, + "loss": 4.1694, + "step": 1570 + }, + { + "epoch": 0.34, + "learning_rate": 9.328214971209214e-05, + "loss": 4.2573, + "step": 1575 + }, + { + "epoch": 0.34, + "learning_rate": 9.326082320324163e-05, + "loss": 4.1644, + "step": 1580 + }, + { + "epoch": 0.34, + "learning_rate": 9.323949669439114e-05, + "loss": 4.1274, + "step": 1585 + }, + { + "epoch": 0.34, + "learning_rate": 9.321817018554063e-05, + "loss": 4.2186, + "step": 1590 + }, + { + "epoch": 0.34, + "learning_rate": 9.319684367669012e-05, + "loss": 4.1449, + "step": 1595 + }, + { + "epoch": 0.34, + "learning_rate": 9.317551716783963e-05, + "loss": 4.2566, + "step": 1600 + }, + { + "epoch": 0.34, + "learning_rate": 9.315419065898912e-05, + "loss": 4.1832, + "step": 1605 + }, + { + "epoch": 0.34, + "learning_rate": 9.313286415013863e-05, + "loss": 4.2554, + "step": 1610 + }, + { + "epoch": 0.34, + "learning_rate": 9.311153764128812e-05, + "loss": 4.2106, + "step": 1615 + }, + { + "epoch": 0.35, + "learning_rate": 9.309021113243761e-05, + "loss": 4.2412, + "step": 1620 + }, + { + "epoch": 0.35, + "learning_rate": 9.306888462358712e-05, + "loss": 4.2214, + "step": 1625 + }, + { + "epoch": 0.35, + "learning_rate": 9.304755811473661e-05, + "loss": 4.124, + "step": 1630 + }, + { + "epoch": 0.35, + "learning_rate": 9.302623160588612e-05, + "loss": 4.2064, + "step": 1635 + }, + { + "epoch": 0.35, + "learning_rate": 9.300490509703563e-05, + "loss": 4.2735, + "step": 1640 + }, + { + "epoch": 0.35, + "learning_rate": 9.298357858818512e-05, + "loss": 4.1409, + "step": 1645 + }, + { + "epoch": 0.35, + "learning_rate": 9.296225207933463e-05, + "loss": 4.1364, + "step": 1650 + }, + { + "epoch": 0.35, + "learning_rate": 9.294092557048412e-05, + "loss": 4.1741, + "step": 1655 + }, + { + "epoch": 0.35, + "learning_rate": 9.291959906163361e-05, + "loss": 4.2158, + "step": 1660 + }, + { + "epoch": 0.36, + "learning_rate": 9.289827255278312e-05, + "loss": 4.1697, + "step": 1665 + }, + { + "epoch": 0.36, + "learning_rate": 9.287694604393261e-05, + "loss": 4.1616, + "step": 1670 + }, + { + "epoch": 0.36, + "learning_rate": 9.285561953508212e-05, + "loss": 4.0912, + "step": 1675 + }, + { + "epoch": 0.36, + "learning_rate": 9.283429302623161e-05, + "loss": 4.1208, + "step": 1680 + }, + { + "epoch": 0.36, + "learning_rate": 9.281296651738111e-05, + "loss": 4.2515, + "step": 1685 + }, + { + "epoch": 0.36, + "learning_rate": 9.279164000853062e-05, + "loss": 4.2275, + "step": 1690 + }, + { + "epoch": 0.36, + "learning_rate": 9.277031349968011e-05, + "loss": 4.1442, + "step": 1695 + }, + { + "epoch": 0.36, + "learning_rate": 9.27489869908296e-05, + "loss": 4.3005, + "step": 1700 + }, + { + "epoch": 0.36, + "learning_rate": 9.272766048197911e-05, + "loss": 4.1924, + "step": 1705 + }, + { + "epoch": 0.36, + "learning_rate": 9.27063339731286e-05, + "loss": 4.2763, + "step": 1710 + }, + { + "epoch": 0.37, + "learning_rate": 9.268500746427811e-05, + "loss": 4.1912, + "step": 1715 + }, + { + "epoch": 0.37, + "learning_rate": 9.26636809554276e-05, + "loss": 4.1089, + "step": 1720 + }, + { + "epoch": 0.37, + "learning_rate": 9.26423544465771e-05, + "loss": 4.1599, + "step": 1725 + }, + { + "epoch": 0.37, + "learning_rate": 9.26210279377266e-05, + "loss": 4.2416, + "step": 1730 + }, + { + "epoch": 0.37, + "learning_rate": 9.25997014288761e-05, + "loss": 4.2332, + "step": 1735 + }, + { + "epoch": 0.37, + "learning_rate": 9.25783749200256e-05, + "loss": 4.1482, + "step": 1740 + }, + { + "epoch": 0.37, + "learning_rate": 9.25570484111751e-05, + "loss": 4.2588, + "step": 1745 + }, + { + "epoch": 0.37, + "learning_rate": 9.253572190232459e-05, + "loss": 4.2557, + "step": 1750 + }, + { + "epoch": 0.37, + "learning_rate": 9.25143953934741e-05, + "loss": 4.2302, + "step": 1755 + }, + { + "epoch": 0.38, + "learning_rate": 9.249306888462359e-05, + "loss": 4.1986, + "step": 1760 + }, + { + "epoch": 0.38, + "learning_rate": 9.247174237577308e-05, + "loss": 4.1768, + "step": 1765 + }, + { + "epoch": 0.38, + "learning_rate": 9.245041586692259e-05, + "loss": 4.2593, + "step": 1770 + }, + { + "epoch": 0.38, + "learning_rate": 9.242908935807208e-05, + "loss": 4.0778, + "step": 1775 + }, + { + "epoch": 0.38, + "learning_rate": 9.240776284922159e-05, + "loss": 4.1143, + "step": 1780 + }, + { + "epoch": 0.38, + "learning_rate": 9.238643634037108e-05, + "loss": 4.2432, + "step": 1785 + }, + { + "epoch": 0.38, + "learning_rate": 9.236510983152057e-05, + "loss": 4.1342, + "step": 1790 + }, + { + "epoch": 0.38, + "learning_rate": 9.234378332267008e-05, + "loss": 4.1526, + "step": 1795 + }, + { + "epoch": 0.38, + "learning_rate": 9.232245681381957e-05, + "loss": 4.1949, + "step": 1800 + }, + { + "epoch": 0.38, + "learning_rate": 9.230113030496908e-05, + "loss": 4.1611, + "step": 1805 + }, + { + "epoch": 0.39, + "learning_rate": 9.227980379611857e-05, + "loss": 4.2797, + "step": 1810 + }, + { + "epoch": 0.39, + "learning_rate": 9.225847728726808e-05, + "loss": 4.1547, + "step": 1815 + }, + { + "epoch": 0.39, + "learning_rate": 9.223715077841757e-05, + "loss": 4.21, + "step": 1820 + }, + { + "epoch": 0.39, + "learning_rate": 9.221582426956708e-05, + "loss": 4.1497, + "step": 1825 + }, + { + "epoch": 0.39, + "learning_rate": 9.219449776071657e-05, + "loss": 4.2368, + "step": 1830 + }, + { + "epoch": 0.39, + "learning_rate": 9.217317125186608e-05, + "loss": 4.1244, + "step": 1835 + }, + { + "epoch": 0.39, + "learning_rate": 9.215184474301557e-05, + "loss": 4.1749, + "step": 1840 + }, + { + "epoch": 0.39, + "learning_rate": 9.213051823416508e-05, + "loss": 4.1415, + "step": 1845 + }, + { + "epoch": 0.39, + "learning_rate": 9.210919172531457e-05, + "loss": 4.1345, + "step": 1850 + }, + { + "epoch": 0.4, + "learning_rate": 9.208786521646407e-05, + "loss": 4.109, + "step": 1855 + }, + { + "epoch": 0.4, + "learning_rate": 9.206653870761357e-05, + "loss": 4.2461, + "step": 1860 + }, + { + "epoch": 0.4, + "learning_rate": 9.204521219876307e-05, + "loss": 4.2095, + "step": 1865 + }, + { + "epoch": 0.4, + "learning_rate": 9.202388568991257e-05, + "loss": 4.0995, + "step": 1870 + }, + { + "epoch": 0.4, + "learning_rate": 9.200255918106207e-05, + "loss": 4.1551, + "step": 1875 + }, + { + "epoch": 0.4, + "learning_rate": 9.198123267221156e-05, + "loss": 4.1101, + "step": 1880 + }, + { + "epoch": 0.4, + "learning_rate": 9.195990616336107e-05, + "loss": 4.1769, + "step": 1885 + }, + { + "epoch": 0.4, + "learning_rate": 9.193857965451056e-05, + "loss": 4.1166, + "step": 1890 + }, + { + "epoch": 0.4, + "learning_rate": 9.191725314566007e-05, + "loss": 4.1728, + "step": 1895 + }, + { + "epoch": 0.41, + "learning_rate": 9.189592663680956e-05, + "loss": 4.1185, + "step": 1900 + }, + { + "epoch": 0.41, + "learning_rate": 9.187460012795905e-05, + "loss": 4.1217, + "step": 1905 + }, + { + "epoch": 0.41, + "learning_rate": 9.185327361910856e-05, + "loss": 4.2622, + "step": 1910 + }, + { + "epoch": 0.41, + "learning_rate": 9.183194711025805e-05, + "loss": 4.1488, + "step": 1915 + }, + { + "epoch": 0.41, + "learning_rate": 9.181062060140755e-05, + "loss": 4.1439, + "step": 1920 + }, + { + "epoch": 0.41, + "learning_rate": 9.178929409255705e-05, + "loss": 4.0916, + "step": 1925 + }, + { + "epoch": 0.41, + "learning_rate": 9.176796758370655e-05, + "loss": 4.2217, + "step": 1930 + }, + { + "epoch": 0.41, + "learning_rate": 9.174664107485605e-05, + "loss": 4.1477, + "step": 1935 + }, + { + "epoch": 0.41, + "learning_rate": 9.172531456600555e-05, + "loss": 4.1755, + "step": 1940 + }, + { + "epoch": 0.41, + "learning_rate": 9.170398805715504e-05, + "loss": 4.2345, + "step": 1945 + }, + { + "epoch": 0.42, + "learning_rate": 9.168266154830455e-05, + "loss": 4.219, + "step": 1950 + }, + { + "epoch": 0.42, + "learning_rate": 9.166133503945404e-05, + "loss": 4.1584, + "step": 1955 + }, + { + "epoch": 0.42, + "learning_rate": 9.164000853060355e-05, + "loss": 4.2019, + "step": 1960 + }, + { + "epoch": 0.42, + "learning_rate": 9.161868202175304e-05, + "loss": 4.2094, + "step": 1965 + }, + { + "epoch": 0.42, + "learning_rate": 9.159735551290253e-05, + "loss": 4.0424, + "step": 1970 + }, + { + "epoch": 0.42, + "learning_rate": 9.157602900405204e-05, + "loss": 4.1501, + "step": 1975 + }, + { + "epoch": 0.42, + "learning_rate": 9.155470249520153e-05, + "loss": 4.1649, + "step": 1980 + }, + { + "epoch": 0.42, + "learning_rate": 9.153337598635103e-05, + "loss": 4.1051, + "step": 1985 + }, + { + "epoch": 0.42, + "learning_rate": 9.151204947750053e-05, + "loss": 4.188, + "step": 1990 + }, + { + "epoch": 0.43, + "learning_rate": 9.149072296865003e-05, + "loss": 4.212, + "step": 1995 + }, + { + "epoch": 0.43, + "learning_rate": 9.146939645979953e-05, + "loss": 4.1834, + "step": 2000 + }, + { + "epoch": 0.43, + "learning_rate": 9.91448069950949e-05, + "loss": 4.1643, + "step": 2005 + }, + { + "epoch": 0.43, + "learning_rate": 9.914267434420986e-05, + "loss": 4.0988, + "step": 2010 + }, + { + "epoch": 0.43, + "learning_rate": 9.91405416933248e-05, + "loss": 4.1608, + "step": 2015 + }, + { + "epoch": 0.43, + "learning_rate": 9.913840904243976e-05, + "loss": 4.1784, + "step": 2020 + }, + { + "epoch": 0.43, + "learning_rate": 9.913627639155471e-05, + "loss": 4.0438, + "step": 2025 + }, + { + "epoch": 0.43, + "learning_rate": 9.913414374066967e-05, + "loss": 4.2819, + "step": 2030 + }, + { + "epoch": 0.43, + "learning_rate": 9.913201108978461e-05, + "loss": 4.1087, + "step": 2035 + }, + { + "epoch": 0.43, + "learning_rate": 9.912987843889956e-05, + "loss": 4.137, + "step": 2040 + }, + { + "epoch": 0.44, + "learning_rate": 9.91277457880145e-05, + "loss": 4.176, + "step": 2045 + }, + { + "epoch": 0.44, + "learning_rate": 9.912561313712946e-05, + "loss": 4.1253, + "step": 2050 + }, + { + "epoch": 0.44, + "learning_rate": 9.91234804862444e-05, + "loss": 4.1579, + "step": 2055 + }, + { + "epoch": 0.44, + "learning_rate": 9.912134783535935e-05, + "loss": 4.1829, + "step": 2060 + }, + { + "epoch": 0.44, + "learning_rate": 9.91192151844743e-05, + "loss": 4.2767, + "step": 2065 + }, + { + "epoch": 0.44, + "learning_rate": 9.911708253358925e-05, + "loss": 4.1685, + "step": 2070 + }, + { + "epoch": 0.44, + "learning_rate": 9.91149498827042e-05, + "loss": 4.1432, + "step": 2075 + }, + { + "epoch": 0.44, + "learning_rate": 9.911281723181916e-05, + "loss": 4.0659, + "step": 2080 + }, + { + "epoch": 0.44, + "learning_rate": 9.911068458093411e-05, + "loss": 4.1739, + "step": 2085 + }, + { + "epoch": 0.45, + "learning_rate": 9.910855193004906e-05, + "loss": 4.1117, + "step": 2090 + }, + { + "epoch": 0.45, + "learning_rate": 9.910641927916401e-05, + "loss": 4.2319, + "step": 2095 + }, + { + "epoch": 0.45, + "learning_rate": 9.910428662827895e-05, + "loss": 4.2685, + "step": 2100 + }, + { + "epoch": 0.45, + "learning_rate": 9.91021539773939e-05, + "loss": 4.1428, + "step": 2105 + }, + { + "epoch": 0.45, + "learning_rate": 9.910002132650885e-05, + "loss": 4.1356, + "step": 2110 + }, + { + "epoch": 0.45, + "learning_rate": 9.90978886756238e-05, + "loss": 4.1466, + "step": 2115 + }, + { + "epoch": 0.45, + "learning_rate": 9.909575602473876e-05, + "loss": 4.1401, + "step": 2120 + }, + { + "epoch": 0.45, + "learning_rate": 9.909362337385371e-05, + "loss": 4.1408, + "step": 2125 + }, + { + "epoch": 0.45, + "learning_rate": 9.909149072296865e-05, + "loss": 4.0734, + "step": 2130 + }, + { + "epoch": 0.46, + "learning_rate": 9.908935807208361e-05, + "loss": 4.1361, + "step": 2135 + }, + { + "epoch": 0.46, + "learning_rate": 9.908722542119856e-05, + "loss": 4.036, + "step": 2140 + }, + { + "epoch": 0.46, + "learning_rate": 9.90850927703135e-05, + "loss": 4.1354, + "step": 2145 + }, + { + "epoch": 0.46, + "learning_rate": 9.908296011942846e-05, + "loss": 4.0682, + "step": 2150 + }, + { + "epoch": 0.46, + "learning_rate": 9.90808274685434e-05, + "loss": 4.1486, + "step": 2155 + }, + { + "epoch": 0.46, + "learning_rate": 9.907869481765835e-05, + "loss": 4.1871, + "step": 2160 + }, + { + "epoch": 0.46, + "learning_rate": 9.90765621667733e-05, + "loss": 4.1501, + "step": 2165 + }, + { + "epoch": 0.46, + "learning_rate": 9.907442951588825e-05, + "loss": 4.1746, + "step": 2170 + }, + { + "epoch": 0.46, + "learning_rate": 9.90722968650032e-05, + "loss": 4.0955, + "step": 2175 + }, + { + "epoch": 0.46, + "learning_rate": 9.907016421411816e-05, + "loss": 4.1039, + "step": 2180 + }, + { + "epoch": 0.47, + "learning_rate": 9.90680315632331e-05, + "loss": 4.2038, + "step": 2185 + }, + { + "epoch": 0.47, + "learning_rate": 9.906589891234806e-05, + "loss": 4.1666, + "step": 2190 + }, + { + "epoch": 0.47, + "learning_rate": 9.9063766261463e-05, + "loss": 4.1018, + "step": 2195 + }, + { + "epoch": 0.47, + "learning_rate": 9.906163361057795e-05, + "loss": 4.2162, + "step": 2200 + }, + { + "epoch": 0.47, + "learning_rate": 9.90595009596929e-05, + "loss": 4.0588, + "step": 2205 + }, + { + "epoch": 0.47, + "learning_rate": 9.905736830880785e-05, + "loss": 4.0441, + "step": 2210 + }, + { + "epoch": 0.47, + "learning_rate": 9.90552356579228e-05, + "loss": 4.1408, + "step": 2215 + }, + { + "epoch": 0.47, + "learning_rate": 9.905310300703776e-05, + "loss": 4.1757, + "step": 2220 + }, + { + "epoch": 0.47, + "learning_rate": 9.905097035615271e-05, + "loss": 4.2243, + "step": 2225 + }, + { + "epoch": 0.48, + "learning_rate": 9.904883770526765e-05, + "loss": 4.1703, + "step": 2230 + }, + { + "epoch": 0.48, + "learning_rate": 9.904670505438261e-05, + "loss": 4.0536, + "step": 2235 + }, + { + "epoch": 0.48, + "learning_rate": 9.904457240349755e-05, + "loss": 4.1506, + "step": 2240 + }, + { + "epoch": 0.48, + "learning_rate": 9.90424397526125e-05, + "loss": 4.2364, + "step": 2245 + }, + { + "epoch": 0.48, + "learning_rate": 9.904030710172744e-05, + "loss": 4.2115, + "step": 2250 + }, + { + "epoch": 0.48, + "learning_rate": 9.90381744508424e-05, + "loss": 4.0522, + "step": 2255 + }, + { + "epoch": 0.48, + "learning_rate": 9.903604179995734e-05, + "loss": 4.2237, + "step": 2260 + }, + { + "epoch": 0.48, + "learning_rate": 9.90339091490723e-05, + "loss": 4.1049, + "step": 2265 + }, + { + "epoch": 0.48, + "learning_rate": 9.903177649818726e-05, + "loss": 4.1546, + "step": 2270 + }, + { + "epoch": 0.49, + "learning_rate": 9.90296438473022e-05, + "loss": 4.0804, + "step": 2275 + }, + { + "epoch": 0.49, + "learning_rate": 9.902751119641716e-05, + "loss": 4.132, + "step": 2280 + }, + { + "epoch": 0.49, + "learning_rate": 9.90253785455321e-05, + "loss": 4.093, + "step": 2285 + }, + { + "epoch": 0.49, + "learning_rate": 9.902324589464706e-05, + "loss": 4.154, + "step": 2290 + }, + { + "epoch": 0.49, + "learning_rate": 9.9021113243762e-05, + "loss": 4.1667, + "step": 2295 + }, + { + "epoch": 0.49, + "learning_rate": 9.901898059287695e-05, + "loss": 4.0874, + "step": 2300 + }, + { + "epoch": 0.49, + "learning_rate": 9.901684794199189e-05, + "loss": 4.1243, + "step": 2305 + }, + { + "epoch": 0.49, + "learning_rate": 9.901471529110685e-05, + "loss": 4.2504, + "step": 2310 + }, + { + "epoch": 0.49, + "learning_rate": 9.90125826402218e-05, + "loss": 4.0576, + "step": 2315 + }, + { + "epoch": 0.49, + "learning_rate": 9.901044998933676e-05, + "loss": 4.1209, + "step": 2320 + }, + { + "epoch": 0.5, + "learning_rate": 9.90083173384517e-05, + "loss": 4.1222, + "step": 2325 + }, + { + "epoch": 0.5, + "learning_rate": 9.900618468756665e-05, + "loss": 4.2048, + "step": 2330 + }, + { + "epoch": 0.5, + "learning_rate": 9.900405203668161e-05, + "loss": 4.1303, + "step": 2335 + }, + { + "epoch": 0.5, + "learning_rate": 9.900191938579655e-05, + "loss": 4.0732, + "step": 2340 + }, + { + "epoch": 0.5, + "learning_rate": 9.89997867349115e-05, + "loss": 4.1394, + "step": 2345 + }, + { + "epoch": 0.5, + "learning_rate": 9.899765408402644e-05, + "loss": 3.9982, + "step": 2350 + }, + { + "epoch": 0.5, + "learning_rate": 9.89955214331414e-05, + "loss": 4.0899, + "step": 2355 + }, + { + "epoch": 0.5, + "learning_rate": 9.899338878225634e-05, + "loss": 4.2053, + "step": 2360 + }, + { + "epoch": 0.5, + "learning_rate": 9.89912561313713e-05, + "loss": 4.1326, + "step": 2365 + }, + { + "epoch": 0.51, + "learning_rate": 9.898912348048625e-05, + "loss": 4.11, + "step": 2370 + }, + { + "epoch": 0.51, + "learning_rate": 9.89869908296012e-05, + "loss": 4.1633, + "step": 2375 + }, + { + "epoch": 0.51, + "learning_rate": 9.898485817871615e-05, + "loss": 4.1121, + "step": 2380 + }, + { + "epoch": 0.51, + "learning_rate": 9.89827255278311e-05, + "loss": 4.0735, + "step": 2385 + }, + { + "epoch": 0.51, + "learning_rate": 9.898059287694604e-05, + "loss": 4.1586, + "step": 2390 + }, + { + "epoch": 0.51, + "learning_rate": 9.8978460226061e-05, + "loss": 4.2001, + "step": 2395 + }, + { + "epoch": 0.51, + "learning_rate": 9.897632757517595e-05, + "loss": 4.1727, + "step": 2400 + }, + { + "epoch": 0.51, + "learning_rate": 9.897419492429089e-05, + "loss": 4.1287, + "step": 2405 + }, + { + "epoch": 0.51, + "learning_rate": 9.897206227340585e-05, + "loss": 4.212, + "step": 2410 + }, + { + "epoch": 0.51, + "learning_rate": 9.89699296225208e-05, + "loss": 4.1361, + "step": 2415 + }, + { + "epoch": 0.52, + "learning_rate": 9.896779697163576e-05, + "loss": 4.0663, + "step": 2420 + }, + { + "epoch": 0.52, + "learning_rate": 9.89656643207507e-05, + "loss": 4.1787, + "step": 2425 + }, + { + "epoch": 0.52, + "learning_rate": 9.896353166986565e-05, + "loss": 4.1471, + "step": 2430 + }, + { + "epoch": 0.52, + "learning_rate": 9.89613990189806e-05, + "loss": 4.1378, + "step": 2435 + }, + { + "epoch": 0.52, + "learning_rate": 9.895926636809555e-05, + "loss": 4.1138, + "step": 2440 + }, + { + "epoch": 0.52, + "learning_rate": 9.895713371721049e-05, + "loss": 4.1356, + "step": 2445 + }, + { + "epoch": 0.52, + "learning_rate": 9.895500106632544e-05, + "loss": 4.1292, + "step": 2450 + }, + { + "epoch": 0.52, + "learning_rate": 9.895286841544039e-05, + "loss": 4.2884, + "step": 2455 + }, + { + "epoch": 0.52, + "learning_rate": 9.895073576455534e-05, + "loss": 4.1709, + "step": 2460 + }, + { + "epoch": 0.53, + "learning_rate": 9.894860311367031e-05, + "loss": 4.1509, + "step": 2465 + }, + { + "epoch": 0.53, + "learning_rate": 9.894647046278525e-05, + "loss": 4.1469, + "step": 2470 + }, + { + "epoch": 0.53, + "learning_rate": 9.89443378119002e-05, + "loss": 4.1039, + "step": 2475 + }, + { + "epoch": 0.53, + "learning_rate": 9.894220516101515e-05, + "loss": 4.0355, + "step": 2480 + }, + { + "epoch": 0.53, + "learning_rate": 9.89400725101301e-05, + "loss": 4.1512, + "step": 2485 + }, + { + "epoch": 0.53, + "learning_rate": 9.893793985924504e-05, + "loss": 4.2215, + "step": 2490 + }, + { + "epoch": 0.53, + "learning_rate": 9.893580720836e-05, + "loss": 4.0549, + "step": 2495 + }, + { + "epoch": 0.53, + "learning_rate": 9.893367455747494e-05, + "loss": 4.0303, + "step": 2500 + }, + { + "epoch": 0.53, + "learning_rate": 9.893154190658989e-05, + "loss": 4.0719, + "step": 2505 + }, + { + "epoch": 0.54, + "learning_rate": 9.892940925570485e-05, + "loss": 4.1445, + "step": 2510 + }, + { + "epoch": 0.54, + "learning_rate": 9.89272766048198e-05, + "loss": 4.0641, + "step": 2515 + }, + { + "epoch": 0.54, + "learning_rate": 9.892514395393474e-05, + "loss": 4.124, + "step": 2520 + }, + { + "epoch": 0.54, + "learning_rate": 9.89230113030497e-05, + "loss": 4.1024, + "step": 2525 + }, + { + "epoch": 0.54, + "learning_rate": 9.892087865216465e-05, + "loss": 4.0906, + "step": 2530 + }, + { + "epoch": 0.54, + "learning_rate": 9.89187460012796e-05, + "loss": 4.1771, + "step": 2535 + }, + { + "epoch": 0.54, + "learning_rate": 9.891661335039455e-05, + "loss": 4.1613, + "step": 2540 + }, + { + "epoch": 0.54, + "learning_rate": 9.891448069950949e-05, + "loss": 4.052, + "step": 2545 + }, + { + "epoch": 0.54, + "learning_rate": 9.891234804862444e-05, + "loss": 4.0764, + "step": 2550 + }, + { + "epoch": 0.54, + "learning_rate": 9.891021539773939e-05, + "loss": 4.1504, + "step": 2555 + }, + { + "epoch": 0.55, + "learning_rate": 9.890808274685434e-05, + "loss": 4.0837, + "step": 2560 + }, + { + "epoch": 0.55, + "learning_rate": 9.89059500959693e-05, + "loss": 4.0608, + "step": 2565 + }, + { + "epoch": 0.55, + "learning_rate": 9.890381744508425e-05, + "loss": 4.0945, + "step": 2570 + }, + { + "epoch": 0.55, + "learning_rate": 9.890168479419919e-05, + "loss": 4.1251, + "step": 2575 + }, + { + "epoch": 0.55, + "learning_rate": 9.889955214331415e-05, + "loss": 4.1016, + "step": 2580 + }, + { + "epoch": 0.55, + "learning_rate": 9.889741949242909e-05, + "loss": 4.0497, + "step": 2585 + }, + { + "epoch": 0.55, + "learning_rate": 9.889528684154404e-05, + "loss": 4.0531, + "step": 2590 + }, + { + "epoch": 0.55, + "learning_rate": 9.8893154190659e-05, + "loss": 4.1791, + "step": 2595 + }, + { + "epoch": 0.55, + "learning_rate": 9.889102153977394e-05, + "loss": 4.0953, + "step": 2600 + }, + { + "epoch": 0.56, + "learning_rate": 9.888888888888889e-05, + "loss": 4.1382, + "step": 2605 + }, + { + "epoch": 0.56, + "learning_rate": 9.888675623800385e-05, + "loss": 4.1342, + "step": 2610 + }, + { + "epoch": 0.56, + "learning_rate": 9.88846235871188e-05, + "loss": 4.1683, + "step": 2615 + }, + { + "epoch": 0.56, + "learning_rate": 9.888249093623374e-05, + "loss": 4.1578, + "step": 2620 + }, + { + "epoch": 0.56, + "learning_rate": 9.88803582853487e-05, + "loss": 4.1064, + "step": 2625 + }, + { + "epoch": 0.56, + "learning_rate": 9.887822563446364e-05, + "loss": 4.1793, + "step": 2630 + }, + { + "epoch": 0.56, + "learning_rate": 9.88760929835786e-05, + "loss": 4.0855, + "step": 2635 + }, + { + "epoch": 0.56, + "learning_rate": 9.887396033269353e-05, + "loss": 4.001, + "step": 2640 + }, + { + "epoch": 0.56, + "learning_rate": 9.887182768180849e-05, + "loss": 4.1257, + "step": 2645 + }, + { + "epoch": 0.57, + "learning_rate": 9.886969503092343e-05, + "loss": 4.0758, + "step": 2650 + }, + { + "epoch": 0.57, + "learning_rate": 9.886756238003839e-05, + "loss": 4.1618, + "step": 2655 + }, + { + "epoch": 0.57, + "learning_rate": 9.886542972915335e-05, + "loss": 3.9996, + "step": 2660 + }, + { + "epoch": 0.57, + "learning_rate": 9.88632970782683e-05, + "loss": 3.9529, + "step": 2665 + }, + { + "epoch": 0.57, + "learning_rate": 9.886116442738325e-05, + "loss": 4.0993, + "step": 2670 + }, + { + "epoch": 0.57, + "learning_rate": 9.885903177649819e-05, + "loss": 4.1167, + "step": 2675 + }, + { + "epoch": 0.57, + "learning_rate": 9.885689912561315e-05, + "loss": 4.0109, + "step": 2680 + }, + { + "epoch": 0.57, + "learning_rate": 9.885476647472809e-05, + "loss": 4.1242, + "step": 2685 + }, + { + "epoch": 0.57, + "learning_rate": 9.885263382384304e-05, + "loss": 4.1102, + "step": 2690 + }, + { + "epoch": 0.57, + "learning_rate": 9.885050117295798e-05, + "loss": 4.1421, + "step": 2695 + }, + { + "epoch": 0.58, + "learning_rate": 9.884836852207294e-05, + "loss": 4.1446, + "step": 2700 + }, + { + "epoch": 0.58, + "learning_rate": 9.884623587118789e-05, + "loss": 4.0766, + "step": 2705 + }, + { + "epoch": 0.58, + "learning_rate": 9.884410322030285e-05, + "loss": 4.2073, + "step": 2710 + }, + { + "epoch": 0.58, + "learning_rate": 9.884197056941779e-05, + "loss": 4.0118, + "step": 2715 + }, + { + "epoch": 0.58, + "learning_rate": 9.883983791853274e-05, + "loss": 4.052, + "step": 2720 + }, + { + "epoch": 0.58, + "learning_rate": 9.88377052676477e-05, + "loss": 4.2132, + "step": 2725 + }, + { + "epoch": 0.58, + "learning_rate": 9.883557261676264e-05, + "loss": 4.0909, + "step": 2730 + }, + { + "epoch": 0.58, + "learning_rate": 9.88334399658776e-05, + "loss": 4.0882, + "step": 2735 + }, + { + "epoch": 0.58, + "learning_rate": 9.883130731499253e-05, + "loss": 4.0401, + "step": 2740 + }, + { + "epoch": 0.59, + "learning_rate": 9.882917466410749e-05, + "loss": 4.046, + "step": 2745 + }, + { + "epoch": 0.59, + "learning_rate": 9.882704201322243e-05, + "loss": 4.0792, + "step": 2750 + }, + { + "epoch": 0.59, + "learning_rate": 9.882490936233739e-05, + "loss": 4.1257, + "step": 2755 + }, + { + "epoch": 0.59, + "learning_rate": 9.882277671145234e-05, + "loss": 4.0106, + "step": 2760 + }, + { + "epoch": 0.59, + "learning_rate": 9.88206440605673e-05, + "loss": 4.0985, + "step": 2765 + }, + { + "epoch": 0.59, + "learning_rate": 9.881851140968224e-05, + "loss": 4.0917, + "step": 2770 + }, + { + "epoch": 0.59, + "learning_rate": 9.881637875879719e-05, + "loss": 4.0979, + "step": 2775 + }, + { + "epoch": 0.59, + "learning_rate": 9.881424610791213e-05, + "loss": 4.0565, + "step": 2780 + }, + { + "epoch": 0.59, + "learning_rate": 9.881211345702709e-05, + "loss": 4.1683, + "step": 2785 + }, + { + "epoch": 0.59, + "learning_rate": 9.880998080614204e-05, + "loss": 4.143, + "step": 2790 + }, + { + "epoch": 0.6, + "learning_rate": 9.880784815525698e-05, + "loss": 4.034, + "step": 2795 + }, + { + "epoch": 0.6, + "learning_rate": 9.880571550437194e-05, + "loss": 4.1128, + "step": 2800 + }, + { + "epoch": 0.6, + "learning_rate": 9.880358285348689e-05, + "loss": 4.1253, + "step": 2805 + }, + { + "epoch": 0.6, + "learning_rate": 9.880145020260185e-05, + "loss": 4.0372, + "step": 2810 + }, + { + "epoch": 0.6, + "learning_rate": 9.879931755171679e-05, + "loss": 4.1651, + "step": 2815 + }, + { + "epoch": 0.6, + "learning_rate": 9.879718490083174e-05, + "loss": 4.1341, + "step": 2820 + }, + { + "epoch": 0.6, + "learning_rate": 9.879505224994668e-05, + "loss": 4.0892, + "step": 2825 + }, + { + "epoch": 0.6, + "learning_rate": 9.879291959906164e-05, + "loss": 4.1236, + "step": 2830 + }, + { + "epoch": 0.6, + "learning_rate": 9.879078694817658e-05, + "loss": 4.1719, + "step": 2835 + }, + { + "epoch": 0.61, + "learning_rate": 9.878865429729153e-05, + "loss": 4.1386, + "step": 2840 + }, + { + "epoch": 0.61, + "learning_rate": 9.878652164640649e-05, + "loss": 4.1138, + "step": 2845 + }, + { + "epoch": 0.61, + "learning_rate": 9.878438899552143e-05, + "loss": 4.1469, + "step": 2850 + }, + { + "epoch": 0.61, + "learning_rate": 9.87822563446364e-05, + "loss": 4.1446, + "step": 2855 + }, + { + "epoch": 0.61, + "learning_rate": 9.878012369375134e-05, + "loss": 4.1322, + "step": 2860 + }, + { + "epoch": 0.61, + "learning_rate": 9.87779910428663e-05, + "loss": 4.0845, + "step": 2865 + }, + { + "epoch": 0.61, + "learning_rate": 9.877585839198124e-05, + "loss": 3.9765, + "step": 2870 + }, + { + "epoch": 0.61, + "learning_rate": 9.877372574109619e-05, + "loss": 4.011, + "step": 2875 + }, + { + "epoch": 0.61, + "learning_rate": 9.877159309021113e-05, + "loss": 4.1012, + "step": 2880 + }, + { + "epoch": 0.62, + "learning_rate": 9.876946043932609e-05, + "loss": 4.0735, + "step": 2885 + }, + { + "epoch": 0.62, + "learning_rate": 9.876732778844103e-05, + "loss": 4.1378, + "step": 2890 + }, + { + "epoch": 0.62, + "learning_rate": 9.876519513755598e-05, + "loss": 4.0898, + "step": 2895 + }, + { + "epoch": 0.62, + "learning_rate": 9.876306248667094e-05, + "loss": 3.9766, + "step": 2900 + }, + { + "epoch": 0.62, + "learning_rate": 9.876092983578589e-05, + "loss": 4.0375, + "step": 2905 + }, + { + "epoch": 0.62, + "learning_rate": 9.875879718490085e-05, + "loss": 4.1669, + "step": 2910 + }, + { + "epoch": 0.62, + "learning_rate": 9.875666453401579e-05, + "loss": 4.1662, + "step": 2915 + }, + { + "epoch": 0.62, + "learning_rate": 9.875453188313074e-05, + "loss": 4.032, + "step": 2920 + }, + { + "epoch": 0.62, + "learning_rate": 9.875239923224568e-05, + "loss": 4.1211, + "step": 2925 + }, + { + "epoch": 0.62, + "learning_rate": 9.875026658136064e-05, + "loss": 4.0966, + "step": 2930 + }, + { + "epoch": 0.63, + "learning_rate": 9.874813393047558e-05, + "loss": 4.0909, + "step": 2935 + }, + { + "epoch": 0.63, + "learning_rate": 9.874600127959053e-05, + "loss": 4.1091, + "step": 2940 + }, + { + "epoch": 0.63, + "learning_rate": 9.874386862870548e-05, + "loss": 4.0977, + "step": 2945 + }, + { + "epoch": 0.63, + "learning_rate": 9.874173597782044e-05, + "loss": 4.0881, + "step": 2950 + }, + { + "epoch": 0.63, + "learning_rate": 9.873960332693539e-05, + "loss": 4.1552, + "step": 2955 + }, + { + "epoch": 0.63, + "learning_rate": 9.873747067605034e-05, + "loss": 4.0706, + "step": 2960 + }, + { + "epoch": 0.63, + "learning_rate": 9.873533802516528e-05, + "loss": 4.0482, + "step": 2965 + }, + { + "epoch": 0.63, + "learning_rate": 9.873320537428024e-05, + "loss": 4.1118, + "step": 2970 + }, + { + "epoch": 0.63, + "learning_rate": 9.873107272339519e-05, + "loss": 4.1406, + "step": 2975 + }, + { + "epoch": 0.64, + "learning_rate": 9.872894007251013e-05, + "loss": 4.0668, + "step": 2980 + }, + { + "epoch": 0.64, + "learning_rate": 9.872680742162509e-05, + "loss": 4.1856, + "step": 2985 + }, + { + "epoch": 0.64, + "learning_rate": 9.872467477074003e-05, + "loss": 4.1681, + "step": 2990 + }, + { + "epoch": 0.64, + "learning_rate": 9.872254211985498e-05, + "loss": 4.1388, + "step": 2995 + }, + { + "epoch": 0.64, + "learning_rate": 9.872040946896994e-05, + "loss": 4.0494, + "step": 3000 + }, + { + "epoch": 0.64, + "learning_rate": 9.871827681808489e-05, + "loss": 4.1185, + "step": 3005 + }, + { + "epoch": 0.64, + "learning_rate": 9.871614416719983e-05, + "loss": 4.1959, + "step": 3010 + }, + { + "epoch": 0.64, + "learning_rate": 9.871401151631479e-05, + "loss": 4.1379, + "step": 3015 + }, + { + "epoch": 0.64, + "learning_rate": 9.871187886542973e-05, + "loss": 4.1076, + "step": 3020 + }, + { + "epoch": 0.65, + "learning_rate": 9.870974621454468e-05, + "loss": 4.1256, + "step": 3025 + }, + { + "epoch": 0.65, + "learning_rate": 9.870761356365963e-05, + "loss": 4.0688, + "step": 3030 + }, + { + "epoch": 0.65, + "learning_rate": 9.870548091277458e-05, + "loss": 4.1218, + "step": 3035 + }, + { + "epoch": 0.65, + "learning_rate": 9.870334826188953e-05, + "loss": 4.061, + "step": 3040 + }, + { + "epoch": 0.65, + "learning_rate": 9.870121561100448e-05, + "loss": 4.05, + "step": 3045 + }, + { + "epoch": 0.65, + "learning_rate": 9.869908296011944e-05, + "loss": 4.0169, + "step": 3050 + }, + { + "epoch": 0.65, + "learning_rate": 9.869695030923439e-05, + "loss": 4.127, + "step": 3055 + }, + { + "epoch": 0.65, + "learning_rate": 9.869481765834934e-05, + "loss": 3.9995, + "step": 3060 + }, + { + "epoch": 0.65, + "learning_rate": 9.869268500746428e-05, + "loss": 4.1399, + "step": 3065 + }, + { + "epoch": 0.65, + "learning_rate": 9.869055235657924e-05, + "loss": 4.1214, + "step": 3070 + }, + { + "epoch": 0.66, + "learning_rate": 9.868841970569418e-05, + "loss": 4.1678, + "step": 3075 + }, + { + "epoch": 0.66, + "learning_rate": 9.868628705480913e-05, + "loss": 4.0547, + "step": 3080 + }, + { + "epoch": 0.66, + "learning_rate": 9.868415440392407e-05, + "loss": 4.1074, + "step": 3085 + }, + { + "epoch": 0.66, + "learning_rate": 9.868202175303903e-05, + "loss": 4.1286, + "step": 3090 + }, + { + "epoch": 0.66, + "learning_rate": 9.867988910215398e-05, + "loss": 4.1574, + "step": 3095 + }, + { + "epoch": 0.66, + "learning_rate": 9.867775645126894e-05, + "loss": 4.1979, + "step": 3100 + }, + { + "epoch": 0.66, + "learning_rate": 9.867562380038389e-05, + "loss": 4.0781, + "step": 3105 + }, + { + "epoch": 0.66, + "learning_rate": 9.867349114949883e-05, + "loss": 4.1659, + "step": 3110 + }, + { + "epoch": 0.66, + "learning_rate": 9.867135849861379e-05, + "loss": 4.0674, + "step": 3115 + }, + { + "epoch": 0.67, + "learning_rate": 9.866922584772873e-05, + "loss": 4.1713, + "step": 3120 + }, + { + "epoch": 0.67, + "learning_rate": 9.866709319684368e-05, + "loss": 4.1347, + "step": 3125 + }, + { + "epoch": 0.67, + "learning_rate": 9.866496054595863e-05, + "loss": 4.0761, + "step": 3130 + }, + { + "epoch": 0.67, + "learning_rate": 9.866282789507358e-05, + "loss": 4.0691, + "step": 3135 + }, + { + "epoch": 0.67, + "learning_rate": 9.866069524418852e-05, + "loss": 4.0705, + "step": 3140 + }, + { + "epoch": 0.67, + "learning_rate": 9.865856259330349e-05, + "loss": 4.0338, + "step": 3145 + }, + { + "epoch": 0.67, + "learning_rate": 9.865642994241843e-05, + "loss": 4.1374, + "step": 3150 + }, + { + "epoch": 0.67, + "learning_rate": 9.865429729153339e-05, + "loss": 4.0365, + "step": 3155 + }, + { + "epoch": 0.67, + "learning_rate": 9.865216464064833e-05, + "loss": 4.0976, + "step": 3160 + }, + { + "epoch": 0.67, + "learning_rate": 9.865003198976328e-05, + "loss": 4.0604, + "step": 3165 + }, + { + "epoch": 0.68, + "learning_rate": 9.864789933887824e-05, + "loss": 4.1707, + "step": 3170 + }, + { + "epoch": 0.68, + "learning_rate": 9.864576668799318e-05, + "loss": 4.0944, + "step": 3175 + }, + { + "epoch": 0.68, + "learning_rate": 9.864363403710813e-05, + "loss": 4.0963, + "step": 3180 + }, + { + "epoch": 0.68, + "learning_rate": 9.864150138622307e-05, + "loss": 4.0062, + "step": 3185 + }, + { + "epoch": 0.68, + "learning_rate": 9.863936873533803e-05, + "loss": 3.9077, + "step": 3190 + }, + { + "epoch": 0.68, + "learning_rate": 9.863723608445298e-05, + "loss": 4.0919, + "step": 3195 + }, + { + "epoch": 0.68, + "learning_rate": 9.863510343356794e-05, + "loss": 4.1093, + "step": 3200 + }, + { + "epoch": 0.68, + "learning_rate": 9.863297078268288e-05, + "loss": 4.0909, + "step": 3205 + }, + { + "epoch": 0.68, + "learning_rate": 9.863083813179783e-05, + "loss": 4.1038, + "step": 3210 + }, + { + "epoch": 0.69, + "learning_rate": 9.862870548091277e-05, + "loss": 4.042, + "step": 3215 + }, + { + "epoch": 0.69, + "learning_rate": 9.862657283002773e-05, + "loss": 4.0529, + "step": 3220 + }, + { + "epoch": 0.69, + "learning_rate": 9.862444017914267e-05, + "loss": 4.013, + "step": 3225 + }, + { + "epoch": 0.69, + "learning_rate": 9.862230752825763e-05, + "loss": 4.0706, + "step": 3230 + }, + { + "epoch": 0.69, + "learning_rate": 9.862017487737258e-05, + "loss": 4.0464, + "step": 3235 + }, + { + "epoch": 0.69, + "learning_rate": 9.861804222648752e-05, + "loss": 4.11, + "step": 3240 + }, + { + "epoch": 0.69, + "learning_rate": 9.861590957560249e-05, + "loss": 4.0446, + "step": 3245 + }, + { + "epoch": 0.69, + "learning_rate": 9.861377692471743e-05, + "loss": 4.2276, + "step": 3250 + }, + { + "epoch": 0.69, + "learning_rate": 9.861164427383239e-05, + "loss": 3.9942, + "step": 3255 + }, + { + "epoch": 0.7, + "learning_rate": 9.860951162294733e-05, + "loss": 4.094, + "step": 3260 + }, + { + "epoch": 0.7, + "learning_rate": 9.860737897206228e-05, + "loss": 4.0907, + "step": 3265 + }, + { + "epoch": 0.7, + "learning_rate": 9.860524632117722e-05, + "loss": 4.0857, + "step": 3270 + }, + { + "epoch": 0.7, + "learning_rate": 9.860311367029218e-05, + "loss": 4.0191, + "step": 3275 + }, + { + "epoch": 0.7, + "learning_rate": 9.860098101940712e-05, + "loss": 4.0758, + "step": 3280 + }, + { + "epoch": 0.7, + "learning_rate": 9.859884836852207e-05, + "loss": 4.1778, + "step": 3285 + }, + { + "epoch": 0.7, + "learning_rate": 9.859671571763703e-05, + "loss": 4.2016, + "step": 3290 + }, + { + "epoch": 0.7, + "learning_rate": 9.859458306675198e-05, + "loss": 3.9973, + "step": 3295 + }, + { + "epoch": 0.7, + "learning_rate": 9.859245041586694e-05, + "loss": 4.0544, + "step": 3300 + }, + { + "epoch": 0.7, + "learning_rate": 9.859031776498188e-05, + "loss": 4.0442, + "step": 3305 + }, + { + "epoch": 0.71, + "learning_rate": 9.858818511409683e-05, + "loss": 4.1261, + "step": 3310 + }, + { + "epoch": 0.71, + "learning_rate": 9.858605246321177e-05, + "loss": 4.2476, + "step": 3315 + }, + { + "epoch": 0.71, + "learning_rate": 9.858391981232673e-05, + "loss": 3.9749, + "step": 3320 + }, + { + "epoch": 0.71, + "learning_rate": 9.858178716144167e-05, + "loss": 4.1603, + "step": 3325 + }, + { + "epoch": 0.71, + "learning_rate": 9.857965451055663e-05, + "loss": 4.1023, + "step": 3330 + }, + { + "epoch": 0.71, + "learning_rate": 9.857752185967157e-05, + "loss": 4.0706, + "step": 3335 + }, + { + "epoch": 0.71, + "learning_rate": 9.857538920878653e-05, + "loss": 4.1547, + "step": 3340 + }, + { + "epoch": 0.71, + "learning_rate": 9.857325655790148e-05, + "loss": 4.1731, + "step": 3345 + }, + { + "epoch": 0.71, + "learning_rate": 9.857112390701643e-05, + "loss": 3.9327, + "step": 3350 + }, + { + "epoch": 0.72, + "learning_rate": 9.856899125613137e-05, + "loss": 4.099, + "step": 3355 + }, + { + "epoch": 0.72, + "learning_rate": 9.856685860524633e-05, + "loss": 4.0212, + "step": 3360 + }, + { + "epoch": 0.72, + "learning_rate": 9.856472595436128e-05, + "loss": 4.0144, + "step": 3365 + }, + { + "epoch": 0.72, + "learning_rate": 9.856259330347622e-05, + "loss": 4.1351, + "step": 3370 + }, + { + "epoch": 0.72, + "learning_rate": 9.856046065259118e-05, + "loss": 4.0463, + "step": 3375 + }, + { + "epoch": 0.72, + "learning_rate": 9.855832800170612e-05, + "loss": 4.1414, + "step": 3380 + }, + { + "epoch": 0.72, + "learning_rate": 9.855619535082107e-05, + "loss": 4.0608, + "step": 3385 + }, + { + "epoch": 0.72, + "learning_rate": 9.855406269993603e-05, + "loss": 3.9693, + "step": 3390 + }, + { + "epoch": 0.72, + "learning_rate": 9.855193004905098e-05, + "loss": 4.1074, + "step": 3395 + }, + { + "epoch": 0.72, + "learning_rate": 9.854979739816592e-05, + "loss": 4.106, + "step": 3400 + }, + { + "epoch": 0.73, + "learning_rate": 9.854766474728088e-05, + "loss": 4.0068, + "step": 3405 + }, + { + "epoch": 0.73, + "learning_rate": 9.854553209639582e-05, + "loss": 4.0362, + "step": 3410 + }, + { + "epoch": 0.73, + "learning_rate": 9.854339944551077e-05, + "loss": 4.0621, + "step": 3415 + }, + { + "epoch": 0.73, + "learning_rate": 9.854126679462572e-05, + "loss": 4.0533, + "step": 3420 + }, + { + "epoch": 0.73, + "learning_rate": 9.853913414374067e-05, + "loss": 4.1254, + "step": 3425 + }, + { + "epoch": 0.73, + "learning_rate": 9.853700149285563e-05, + "loss": 4.1176, + "step": 3430 + }, + { + "epoch": 0.73, + "learning_rate": 9.853486884197057e-05, + "loss": 4.1032, + "step": 3435 + }, + { + "epoch": 0.73, + "learning_rate": 9.853273619108553e-05, + "loss": 4.0994, + "step": 3440 + }, + { + "epoch": 0.73, + "learning_rate": 9.853060354020048e-05, + "loss": 4.0515, + "step": 3445 + }, + { + "epoch": 0.74, + "learning_rate": 9.852847088931543e-05, + "loss": 4.0812, + "step": 3450 + }, + { + "epoch": 0.74, + "learning_rate": 9.852633823843037e-05, + "loss": 4.0606, + "step": 3455 + }, + { + "epoch": 0.74, + "learning_rate": 9.852420558754533e-05, + "loss": 4.0684, + "step": 3460 + }, + { + "epoch": 0.74, + "learning_rate": 9.852207293666027e-05, + "loss": 4.0667, + "step": 3465 + }, + { + "epoch": 0.74, + "learning_rate": 9.851994028577522e-05, + "loss": 4.0139, + "step": 3470 + }, + { + "epoch": 0.74, + "learning_rate": 9.851780763489016e-05, + "loss": 4.0982, + "step": 3475 + }, + { + "epoch": 0.74, + "learning_rate": 9.851567498400512e-05, + "loss": 4.1625, + "step": 3480 + }, + { + "epoch": 0.74, + "learning_rate": 9.851354233312007e-05, + "loss": 4.0794, + "step": 3485 + }, + { + "epoch": 0.74, + "learning_rate": 9.851140968223503e-05, + "loss": 3.9972, + "step": 3490 + }, + { + "epoch": 0.75, + "learning_rate": 9.850927703134998e-05, + "loss": 4.1615, + "step": 3495 + }, + { + "epoch": 0.75, + "learning_rate": 9.850714438046492e-05, + "loss": 4.099, + "step": 3500 + }, + { + "epoch": 0.75, + "learning_rate": 9.850501172957988e-05, + "loss": 4.2062, + "step": 3505 + }, + { + "epoch": 0.75, + "learning_rate": 9.850287907869482e-05, + "loss": 4.0242, + "step": 3510 + }, + { + "epoch": 0.75, + "learning_rate": 9.850074642780977e-05, + "loss": 4.114, + "step": 3515 + }, + { + "epoch": 0.75, + "learning_rate": 9.849861377692472e-05, + "loss": 4.1395, + "step": 3520 + }, + { + "epoch": 0.75, + "learning_rate": 9.849648112603967e-05, + "loss": 4.0926, + "step": 3525 + }, + { + "epoch": 0.75, + "learning_rate": 9.849434847515461e-05, + "loss": 4.0642, + "step": 3530 + }, + { + "epoch": 0.75, + "learning_rate": 9.849221582426958e-05, + "loss": 4.1643, + "step": 3535 + }, + { + "epoch": 0.75, + "learning_rate": 9.849008317338452e-05, + "loss": 4.0857, + "step": 3540 + }, + { + "epoch": 0.76, + "learning_rate": 9.848795052249948e-05, + "loss": 4.0544, + "step": 3545 + }, + { + "epoch": 0.76, + "learning_rate": 9.848581787161442e-05, + "loss": 4.0275, + "step": 3550 + }, + { + "epoch": 0.76, + "learning_rate": 9.848368522072937e-05, + "loss": 4.0562, + "step": 3555 + }, + { + "epoch": 0.76, + "learning_rate": 9.848155256984433e-05, + "loss": 4.0621, + "step": 3560 + }, + { + "epoch": 0.76, + "learning_rate": 9.847941991895927e-05, + "loss": 4.1615, + "step": 3565 + }, + { + "epoch": 0.76, + "learning_rate": 9.847728726807422e-05, + "loss": 4.0546, + "step": 3570 + }, + { + "epoch": 0.76, + "learning_rate": 9.847515461718916e-05, + "loss": 4.0172, + "step": 3575 + }, + { + "epoch": 0.76, + "learning_rate": 9.847302196630412e-05, + "loss": 4.1494, + "step": 3580 + }, + { + "epoch": 0.76, + "learning_rate": 9.847088931541907e-05, + "loss": 4.0837, + "step": 3585 + }, + { + "epoch": 0.77, + "learning_rate": 9.846875666453403e-05, + "loss": 4.0739, + "step": 3590 + }, + { + "epoch": 0.77, + "learning_rate": 9.846662401364897e-05, + "loss": 4.1764, + "step": 3595 + }, + { + "epoch": 0.77, + "learning_rate": 9.846449136276392e-05, + "loss": 4.005, + "step": 3600 + }, + { + "epoch": 0.77, + "learning_rate": 9.846235871187887e-05, + "loss": 4.1376, + "step": 3605 + }, + { + "epoch": 0.77, + "learning_rate": 9.846022606099382e-05, + "loss": 4.05, + "step": 3610 + }, + { + "epoch": 0.77, + "learning_rate": 9.845809341010876e-05, + "loss": 4.0567, + "step": 3615 + }, + { + "epoch": 0.77, + "learning_rate": 9.845596075922372e-05, + "loss": 4.0898, + "step": 3620 + }, + { + "epoch": 0.77, + "learning_rate": 9.845382810833867e-05, + "loss": 4.0379, + "step": 3625 + }, + { + "epoch": 0.77, + "learning_rate": 9.845169545745363e-05, + "loss": 4.0889, + "step": 3630 + }, + { + "epoch": 0.78, + "learning_rate": 9.844956280656858e-05, + "loss": 3.9299, + "step": 3635 + }, + { + "epoch": 0.78, + "learning_rate": 9.844743015568352e-05, + "loss": 4.0745, + "step": 3640 + }, + { + "epoch": 0.78, + "learning_rate": 9.844529750479848e-05, + "loss": 4.0128, + "step": 3645 + }, + { + "epoch": 0.78, + "learning_rate": 9.844316485391342e-05, + "loss": 4.0397, + "step": 3650 + }, + { + "epoch": 0.78, + "learning_rate": 9.844103220302837e-05, + "loss": 3.9828, + "step": 3655 + }, + { + "epoch": 0.78, + "learning_rate": 9.843889955214331e-05, + "loss": 4.0789, + "step": 3660 + }, + { + "epoch": 0.78, + "learning_rate": 9.843676690125827e-05, + "loss": 3.9853, + "step": 3665 + }, + { + "epoch": 0.78, + "learning_rate": 9.843463425037321e-05, + "loss": 4.0654, + "step": 3670 + }, + { + "epoch": 0.78, + "learning_rate": 9.843250159948816e-05, + "loss": 4.1018, + "step": 3675 + }, + { + "epoch": 0.78, + "learning_rate": 9.843036894860312e-05, + "loss": 4.1252, + "step": 3680 + }, + { + "epoch": 0.79, + "learning_rate": 9.842823629771807e-05, + "loss": 3.99, + "step": 3685 + }, + { + "epoch": 0.79, + "learning_rate": 9.842610364683303e-05, + "loss": 4.0717, + "step": 3690 + }, + { + "epoch": 0.79, + "learning_rate": 9.842397099594797e-05, + "loss": 4.015, + "step": 3695 + }, + { + "epoch": 0.79, + "learning_rate": 9.842183834506292e-05, + "loss": 4.1548, + "step": 3700 + }, + { + "epoch": 0.79, + "learning_rate": 9.841970569417787e-05, + "loss": 4.0925, + "step": 3705 + }, + { + "epoch": 0.79, + "learning_rate": 9.841757304329282e-05, + "loss": 4.0652, + "step": 3710 + }, + { + "epoch": 0.79, + "learning_rate": 9.841544039240776e-05, + "loss": 4.01, + "step": 3715 + }, + { + "epoch": 0.79, + "learning_rate": 9.841330774152272e-05, + "loss": 4.1887, + "step": 3720 + }, + { + "epoch": 0.79, + "learning_rate": 9.841117509063766e-05, + "loss": 3.9903, + "step": 3725 + }, + { + "epoch": 0.8, + "learning_rate": 9.840904243975263e-05, + "loss": 4.0842, + "step": 3730 + }, + { + "epoch": 0.8, + "learning_rate": 9.840690978886757e-05, + "loss": 4.1217, + "step": 3735 + }, + { + "epoch": 0.8, + "learning_rate": 9.840477713798252e-05, + "loss": 4.1292, + "step": 3740 + }, + { + "epoch": 0.8, + "learning_rate": 9.840264448709746e-05, + "loss": 4.0607, + "step": 3745 + }, + { + "epoch": 0.8, + "learning_rate": 9.840051183621242e-05, + "loss": 4.0346, + "step": 3750 + }, + { + "epoch": 0.8, + "learning_rate": 9.839837918532737e-05, + "loss": 4.018, + "step": 3755 + }, + { + "epoch": 0.8, + "learning_rate": 9.839624653444231e-05, + "loss": 4.0427, + "step": 3760 + }, + { + "epoch": 0.8, + "learning_rate": 9.839411388355727e-05, + "loss": 3.9912, + "step": 3765 + }, + { + "epoch": 0.8, + "learning_rate": 9.839198123267221e-05, + "loss": 4.0487, + "step": 3770 + }, + { + "epoch": 0.8, + "learning_rate": 9.838984858178716e-05, + "loss": 4.0825, + "step": 3775 + }, + { + "epoch": 0.81, + "learning_rate": 9.838771593090212e-05, + "loss": 4.0951, + "step": 3780 + }, + { + "epoch": 0.81, + "learning_rate": 9.838558328001707e-05, + "loss": 3.9822, + "step": 3785 + }, + { + "epoch": 0.81, + "learning_rate": 9.838345062913201e-05, + "loss": 4.0599, + "step": 3790 + }, + { + "epoch": 0.81, + "learning_rate": 9.838131797824697e-05, + "loss": 4.1555, + "step": 3795 + }, + { + "epoch": 0.81, + "learning_rate": 9.837918532736191e-05, + "loss": 4.0708, + "step": 3800 + }, + { + "epoch": 0.81, + "learning_rate": 9.837705267647687e-05, + "loss": 4.1252, + "step": 3805 + }, + { + "epoch": 0.81, + "learning_rate": 9.83749200255918e-05, + "loss": 4.1803, + "step": 3810 + }, + { + "epoch": 0.81, + "learning_rate": 9.837278737470676e-05, + "loss": 4.0704, + "step": 3815 + }, + { + "epoch": 0.81, + "learning_rate": 9.837065472382172e-05, + "loss": 4.0226, + "step": 3820 + }, + { + "epoch": 0.82, + "learning_rate": 9.836852207293667e-05, + "loss": 4.0557, + "step": 3825 + }, + { + "epoch": 0.82, + "learning_rate": 9.836638942205163e-05, + "loss": 4.1036, + "step": 3830 + }, + { + "epoch": 0.82, + "learning_rate": 9.836425677116657e-05, + "loss": 4.1469, + "step": 3835 + }, + { + "epoch": 0.82, + "learning_rate": 9.836212412028152e-05, + "loss": 4.0344, + "step": 3840 + }, + { + "epoch": 0.82, + "learning_rate": 9.835999146939646e-05, + "loss": 4.101, + "step": 3845 + }, + { + "epoch": 0.82, + "learning_rate": 9.835785881851142e-05, + "loss": 4.1505, + "step": 3850 + }, + { + "epoch": 0.82, + "learning_rate": 9.835572616762636e-05, + "loss": 4.1152, + "step": 3855 + }, + { + "epoch": 0.82, + "learning_rate": 9.835359351674131e-05, + "loss": 4.004, + "step": 3860 + }, + { + "epoch": 0.82, + "learning_rate": 9.835146086585625e-05, + "loss": 4.0193, + "step": 3865 + }, + { + "epoch": 0.83, + "learning_rate": 9.834932821497121e-05, + "loss": 4.0349, + "step": 3870 + }, + { + "epoch": 0.83, + "learning_rate": 9.834719556408616e-05, + "loss": 4.078, + "step": 3875 + }, + { + "epoch": 0.83, + "learning_rate": 9.834506291320112e-05, + "loss": 4.0406, + "step": 3880 + }, + { + "epoch": 0.83, + "learning_rate": 9.834293026231607e-05, + "loss": 4.0963, + "step": 3885 + }, + { + "epoch": 0.83, + "learning_rate": 9.834079761143101e-05, + "loss": 4.0159, + "step": 3890 + }, + { + "epoch": 0.83, + "learning_rate": 9.833866496054597e-05, + "loss": 4.1063, + "step": 3895 + }, + { + "epoch": 0.83, + "learning_rate": 9.833653230966091e-05, + "loss": 4.0392, + "step": 3900 + }, + { + "epoch": 0.83, + "learning_rate": 9.833439965877587e-05, + "loss": 4.1441, + "step": 3905 + }, + { + "epoch": 0.83, + "learning_rate": 9.83322670078908e-05, + "loss": 4.1397, + "step": 3910 + }, + { + "epoch": 0.83, + "learning_rate": 9.833013435700576e-05, + "loss": 4.0371, + "step": 3915 + }, + { + "epoch": 0.84, + "learning_rate": 9.83280017061207e-05, + "loss": 4.0455, + "step": 3920 + }, + { + "epoch": 0.84, + "learning_rate": 9.832586905523567e-05, + "loss": 3.9925, + "step": 3925 + }, + { + "epoch": 0.84, + "learning_rate": 9.832373640435061e-05, + "loss": 4.1464, + "step": 3930 + }, + { + "epoch": 0.84, + "learning_rate": 9.832160375346557e-05, + "loss": 4.0579, + "step": 3935 + }, + { + "epoch": 0.84, + "learning_rate": 9.831947110258051e-05, + "loss": 4.1011, + "step": 3940 + }, + { + "epoch": 0.84, + "learning_rate": 9.831733845169546e-05, + "loss": 3.9529, + "step": 3945 + }, + { + "epoch": 0.84, + "learning_rate": 9.831520580081042e-05, + "loss": 4.0772, + "step": 3950 + }, + { + "epoch": 0.84, + "learning_rate": 9.831307314992536e-05, + "loss": 4.0175, + "step": 3955 + }, + { + "epoch": 0.84, + "learning_rate": 9.831094049904031e-05, + "loss": 4.1711, + "step": 3960 + }, + { + "epoch": 0.85, + "learning_rate": 9.830880784815525e-05, + "loss": 4.074, + "step": 3965 + }, + { + "epoch": 0.85, + "learning_rate": 9.830667519727021e-05, + "loss": 4.0718, + "step": 3970 + }, + { + "epoch": 0.85, + "learning_rate": 9.830454254638516e-05, + "loss": 4.0085, + "step": 3975 + }, + { + "epoch": 0.85, + "learning_rate": 9.830240989550012e-05, + "loss": 4.0626, + "step": 3980 + }, + { + "epoch": 0.85, + "learning_rate": 9.830027724461506e-05, + "loss": 3.887, + "step": 3985 + }, + { + "epoch": 0.85, + "learning_rate": 9.829814459373001e-05, + "loss": 4.0241, + "step": 3990 + }, + { + "epoch": 0.85, + "learning_rate": 9.829601194284496e-05, + "loss": 4.0986, + "step": 3995 + }, + { + "epoch": 0.85, + "learning_rate": 9.829387929195991e-05, + "loss": 4.0039, + "step": 4000 + }, + { + "epoch": 0.85, + "learning_rate": 9.829174664107485e-05, + "loss": 4.0547, + "step": 4005 + }, + { + "epoch": 0.86, + "learning_rate": 9.82896139901898e-05, + "loss": 4.1122, + "step": 4010 + }, + { + "epoch": 0.86, + "learning_rate": 9.828748133930476e-05, + "loss": 4.0487, + "step": 4015 + }, + { + "epoch": 0.86, + "learning_rate": 9.828534868841972e-05, + "loss": 3.945, + "step": 4020 + }, + { + "epoch": 0.86, + "learning_rate": 9.828321603753467e-05, + "loss": 4.1208, + "step": 4025 + }, + { + "epoch": 0.86, + "learning_rate": 9.828108338664961e-05, + "loss": 4.1426, + "step": 4030 + }, + { + "epoch": 0.86, + "learning_rate": 9.827895073576457e-05, + "loss": 3.9897, + "step": 4035 + }, + { + "epoch": 0.86, + "learning_rate": 9.827681808487951e-05, + "loss": 4.0565, + "step": 4040 + }, + { + "epoch": 0.86, + "learning_rate": 9.827468543399446e-05, + "loss": 4.0652, + "step": 4045 + }, + { + "epoch": 0.86, + "learning_rate": 9.82725527831094e-05, + "loss": 4.1183, + "step": 4050 + }, + { + "epoch": 0.86, + "learning_rate": 9.827042013222436e-05, + "loss": 3.9832, + "step": 4055 + }, + { + "epoch": 0.87, + "learning_rate": 9.82682874813393e-05, + "loss": 4.0783, + "step": 4060 + }, + { + "epoch": 0.87, + "learning_rate": 9.826615483045425e-05, + "loss": 4.0811, + "step": 4065 + }, + { + "epoch": 0.87, + "learning_rate": 9.826402217956921e-05, + "loss": 4.042, + "step": 4070 + }, + { + "epoch": 0.87, + "learning_rate": 9.826188952868416e-05, + "loss": 4.1294, + "step": 4075 + }, + { + "epoch": 0.87, + "learning_rate": 9.825975687779912e-05, + "loss": 4.0604, + "step": 4080 + }, + { + "epoch": 0.87, + "learning_rate": 9.825762422691406e-05, + "loss": 4.1075, + "step": 4085 + }, + { + "epoch": 0.87, + "learning_rate": 9.825549157602901e-05, + "loss": 4.07, + "step": 4090 + }, + { + "epoch": 0.87, + "learning_rate": 9.825335892514396e-05, + "loss": 4.0523, + "step": 4095 + }, + { + "epoch": 0.87, + "learning_rate": 9.825122627425891e-05, + "loss": 4.0995, + "step": 4100 + }, + { + "epoch": 0.88, + "learning_rate": 9.824909362337385e-05, + "loss": 3.9715, + "step": 4105 + }, + { + "epoch": 0.88, + "learning_rate": 9.82469609724888e-05, + "loss": 3.9682, + "step": 4110 + }, + { + "epoch": 0.88, + "learning_rate": 9.824482832160375e-05, + "loss": 4.0932, + "step": 4115 + }, + { + "epoch": 0.88, + "learning_rate": 9.824269567071872e-05, + "loss": 4.0342, + "step": 4120 + }, + { + "epoch": 0.88, + "learning_rate": 9.824056301983366e-05, + "loss": 4.0012, + "step": 4125 + }, + { + "epoch": 0.88, + "learning_rate": 9.823843036894861e-05, + "loss": 4.1553, + "step": 4130 + }, + { + "epoch": 0.88, + "learning_rate": 9.823629771806355e-05, + "loss": 4.0861, + "step": 4135 + }, + { + "epoch": 0.88, + "learning_rate": 9.823416506717851e-05, + "loss": 4.0703, + "step": 4140 + }, + { + "epoch": 0.88, + "learning_rate": 9.823203241629346e-05, + "loss": 4.0106, + "step": 4145 + }, + { + "epoch": 0.88, + "learning_rate": 9.82298997654084e-05, + "loss": 4.0657, + "step": 4150 + }, + { + "epoch": 0.89, + "learning_rate": 9.822776711452336e-05, + "loss": 4.0908, + "step": 4155 + }, + { + "epoch": 0.89, + "learning_rate": 9.82256344636383e-05, + "loss": 4.0279, + "step": 4160 + }, + { + "epoch": 0.89, + "learning_rate": 9.822350181275325e-05, + "loss": 4.027, + "step": 4165 + }, + { + "epoch": 0.89, + "learning_rate": 9.822136916186821e-05, + "loss": 3.9981, + "step": 4170 + }, + { + "epoch": 0.89, + "learning_rate": 9.821923651098316e-05, + "loss": 4.0162, + "step": 4175 + }, + { + "epoch": 0.89, + "learning_rate": 9.82171038600981e-05, + "loss": 4.078, + "step": 4180 + }, + { + "epoch": 0.89, + "learning_rate": 9.821497120921306e-05, + "loss": 4.1236, + "step": 4185 + }, + { + "epoch": 0.89, + "learning_rate": 9.8212838558328e-05, + "loss": 3.9386, + "step": 4190 + }, + { + "epoch": 0.89, + "learning_rate": 9.821070590744296e-05, + "loss": 3.9884, + "step": 4195 + }, + { + "epoch": 0.9, + "learning_rate": 9.82085732565579e-05, + "loss": 4.0046, + "step": 4200 + }, + { + "epoch": 0.9, + "learning_rate": 9.820644060567285e-05, + "loss": 3.9942, + "step": 4205 + }, + { + "epoch": 0.9, + "learning_rate": 9.82043079547878e-05, + "loss": 3.9106, + "step": 4210 + }, + { + "epoch": 0.9, + "learning_rate": 9.820217530390276e-05, + "loss": 4.0357, + "step": 4215 + }, + { + "epoch": 0.9, + "learning_rate": 9.820004265301772e-05, + "loss": 4.0835, + "step": 4220 + }, + { + "epoch": 0.9, + "learning_rate": 9.819791000213266e-05, + "loss": 4.0181, + "step": 4225 + }, + { + "epoch": 0.9, + "learning_rate": 9.819577735124761e-05, + "loss": 4.059, + "step": 4230 + }, + { + "epoch": 0.9, + "learning_rate": 9.819364470036255e-05, + "loss": 3.9714, + "step": 4235 + }, + { + "epoch": 0.9, + "learning_rate": 9.819151204947751e-05, + "loss": 3.9571, + "step": 4240 + }, + { + "epoch": 0.91, + "learning_rate": 9.818937939859245e-05, + "loss": 4.0431, + "step": 4245 + }, + { + "epoch": 0.91, + "learning_rate": 9.81872467477074e-05, + "loss": 4.0491, + "step": 4250 + }, + { + "epoch": 0.91, + "learning_rate": 9.818511409682235e-05, + "loss": 4.0191, + "step": 4255 + }, + { + "epoch": 0.91, + "learning_rate": 9.81829814459373e-05, + "loss": 4.0263, + "step": 4260 + }, + { + "epoch": 0.91, + "learning_rate": 9.818084879505225e-05, + "loss": 4.0452, + "step": 4265 + }, + { + "epoch": 0.91, + "learning_rate": 9.817871614416721e-05, + "loss": 3.9861, + "step": 4270 + }, + { + "epoch": 0.91, + "learning_rate": 9.817658349328216e-05, + "loss": 4.0039, + "step": 4275 + }, + { + "epoch": 0.91, + "learning_rate": 9.81744508423971e-05, + "loss": 3.9324, + "step": 4280 + }, + { + "epoch": 0.91, + "learning_rate": 9.817231819151206e-05, + "loss": 4.0597, + "step": 4285 + }, + { + "epoch": 0.91, + "learning_rate": 9.8170185540627e-05, + "loss": 4.1195, + "step": 4290 + }, + { + "epoch": 0.92, + "learning_rate": 9.816805288974196e-05, + "loss": 4.0344, + "step": 4295 + }, + { + "epoch": 0.92, + "learning_rate": 9.81659202388569e-05, + "loss": 4.0057, + "step": 4300 + }, + { + "epoch": 0.92, + "learning_rate": 9.816378758797185e-05, + "loss": 4.0535, + "step": 4305 + }, + { + "epoch": 0.92, + "learning_rate": 9.81616549370868e-05, + "loss": 4.1324, + "step": 4310 + }, + { + "epoch": 0.92, + "learning_rate": 9.815952228620176e-05, + "loss": 4.0478, + "step": 4315 + }, + { + "epoch": 0.92, + "learning_rate": 9.81573896353167e-05, + "loss": 4.1411, + "step": 4320 + }, + { + "epoch": 0.92, + "learning_rate": 9.815525698443166e-05, + "loss": 4.0773, + "step": 4325 + }, + { + "epoch": 0.92, + "learning_rate": 9.81531243335466e-05, + "loss": 4.0551, + "step": 4330 + }, + { + "epoch": 0.92, + "learning_rate": 9.815099168266155e-05, + "loss": 4.1022, + "step": 4335 + }, + { + "epoch": 0.93, + "learning_rate": 9.814885903177651e-05, + "loss": 4.0866, + "step": 4340 + }, + { + "epoch": 0.93, + "learning_rate": 9.814672638089145e-05, + "loss": 3.9278, + "step": 4345 + }, + { + "epoch": 0.93, + "learning_rate": 9.81445937300064e-05, + "loss": 4.1098, + "step": 4350 + }, + { + "epoch": 0.93, + "learning_rate": 9.814246107912135e-05, + "loss": 3.9803, + "step": 4355 + }, + { + "epoch": 0.93, + "learning_rate": 9.81403284282363e-05, + "loss": 3.9907, + "step": 4360 + }, + { + "epoch": 0.93, + "learning_rate": 9.813819577735125e-05, + "loss": 3.996, + "step": 4365 + }, + { + "epoch": 0.93, + "learning_rate": 9.813606312646621e-05, + "loss": 4.108, + "step": 4370 + }, + { + "epoch": 0.93, + "learning_rate": 9.813393047558115e-05, + "loss": 4.0114, + "step": 4375 + }, + { + "epoch": 0.93, + "learning_rate": 9.81317978246961e-05, + "loss": 4.0698, + "step": 4380 + }, + { + "epoch": 0.94, + "learning_rate": 9.812966517381105e-05, + "loss": 4.0204, + "step": 4385 + }, + { + "epoch": 0.94, + "learning_rate": 9.8127532522926e-05, + "loss": 4.11, + "step": 4390 + }, + { + "epoch": 0.94, + "learning_rate": 9.812539987204094e-05, + "loss": 4.1124, + "step": 4395 + }, + { + "epoch": 0.94, + "learning_rate": 9.81232672211559e-05, + "loss": 4.023, + "step": 4400 + }, + { + "epoch": 0.94, + "learning_rate": 9.812113457027085e-05, + "loss": 4.0111, + "step": 4405 + }, + { + "epoch": 0.94, + "learning_rate": 9.81190019193858e-05, + "loss": 4.0778, + "step": 4410 + }, + { + "epoch": 0.94, + "learning_rate": 9.811686926850076e-05, + "loss": 3.9801, + "step": 4415 + }, + { + "epoch": 0.94, + "learning_rate": 9.81147366176157e-05, + "loss": 4.0222, + "step": 4420 + }, + { + "epoch": 0.94, + "learning_rate": 9.811260396673066e-05, + "loss": 4.0027, + "step": 4425 + }, + { + "epoch": 0.94, + "learning_rate": 9.81104713158456e-05, + "loss": 4.0564, + "step": 4430 + }, + { + "epoch": 0.95, + "learning_rate": 9.810833866496055e-05, + "loss": 4.0356, + "step": 4435 + }, + { + "epoch": 0.95, + "learning_rate": 9.81062060140755e-05, + "loss": 4.0885, + "step": 4440 + }, + { + "epoch": 0.95, + "learning_rate": 9.810407336319045e-05, + "loss": 4.0675, + "step": 4445 + }, + { + "epoch": 0.95, + "learning_rate": 9.810194071230539e-05, + "loss": 4.1067, + "step": 4450 + }, + { + "epoch": 0.95, + "learning_rate": 9.809980806142035e-05, + "loss": 4.0227, + "step": 4455 + }, + { + "epoch": 0.95, + "learning_rate": 9.80976754105353e-05, + "loss": 4.1558, + "step": 4460 + }, + { + "epoch": 0.95, + "learning_rate": 9.809554275965025e-05, + "loss": 4.0434, + "step": 4465 + }, + { + "epoch": 0.95, + "learning_rate": 9.809341010876521e-05, + "loss": 3.9757, + "step": 4470 + }, + { + "epoch": 0.95, + "learning_rate": 9.809127745788015e-05, + "loss": 4.0859, + "step": 4475 + }, + { + "epoch": 0.96, + "learning_rate": 9.80891448069951e-05, + "loss": 4.1458, + "step": 4480 + }, + { + "epoch": 0.96, + "learning_rate": 9.808701215611005e-05, + "loss": 3.9807, + "step": 4485 + }, + { + "epoch": 0.96, + "learning_rate": 9.8084879505225e-05, + "loss": 4.1011, + "step": 4490 + }, + { + "epoch": 0.96, + "learning_rate": 9.808274685433994e-05, + "loss": 4.1034, + "step": 4495 + }, + { + "epoch": 0.96, + "learning_rate": 9.80806142034549e-05, + "loss": 4.0167, + "step": 4500 + }, + { + "epoch": 0.96, + "learning_rate": 9.807848155256985e-05, + "loss": 4.0626, + "step": 4505 + }, + { + "epoch": 0.96, + "learning_rate": 9.80763489016848e-05, + "loss": 3.9839, + "step": 4510 + }, + { + "epoch": 0.96, + "learning_rate": 9.807421625079975e-05, + "loss": 4.0531, + "step": 4515 + }, + { + "epoch": 0.96, + "learning_rate": 9.80720835999147e-05, + "loss": 4.0065, + "step": 4520 + }, + { + "epoch": 0.96, + "learning_rate": 9.806995094902964e-05, + "loss": 4.0236, + "step": 4525 + }, + { + "epoch": 0.97, + "learning_rate": 9.80678182981446e-05, + "loss": 3.9416, + "step": 4530 + }, + { + "epoch": 0.97, + "learning_rate": 9.806568564725955e-05, + "loss": 4.0856, + "step": 4535 + }, + { + "epoch": 0.97, + "learning_rate": 9.80635529963745e-05, + "loss": 4.0172, + "step": 4540 + }, + { + "epoch": 0.97, + "learning_rate": 9.806142034548945e-05, + "loss": 4.0182, + "step": 4545 + }, + { + "epoch": 0.97, + "learning_rate": 9.805928769460439e-05, + "loss": 4.0691, + "step": 4550 + }, + { + "epoch": 0.97, + "learning_rate": 9.805715504371935e-05, + "loss": 3.9655, + "step": 4555 + }, + { + "epoch": 0.97, + "learning_rate": 9.80550223928343e-05, + "loss": 4.088, + "step": 4560 + }, + { + "epoch": 0.97, + "learning_rate": 9.805288974194925e-05, + "loss": 4.1289, + "step": 4565 + }, + { + "epoch": 0.97, + "learning_rate": 9.80507570910642e-05, + "loss": 4.0653, + "step": 4570 + }, + { + "epoch": 0.98, + "learning_rate": 9.804862444017915e-05, + "loss": 4.0539, + "step": 4575 + }, + { + "epoch": 0.98, + "learning_rate": 9.804649178929409e-05, + "loss": 4.0928, + "step": 4580 + }, + { + "epoch": 0.98, + "learning_rate": 9.804435913840905e-05, + "loss": 3.9413, + "step": 4585 + }, + { + "epoch": 0.98, + "learning_rate": 9.804222648752399e-05, + "loss": 4.1666, + "step": 4590 + }, + { + "epoch": 0.98, + "learning_rate": 9.804009383663894e-05, + "loss": 4.1192, + "step": 4595 + }, + { + "epoch": 0.98, + "learning_rate": 9.80379611857539e-05, + "loss": 3.9901, + "step": 4600 + }, + { + "epoch": 0.98, + "learning_rate": 9.803582853486885e-05, + "loss": 4.0651, + "step": 4605 + }, + { + "epoch": 0.98, + "learning_rate": 9.80336958839838e-05, + "loss": 3.9998, + "step": 4610 + }, + { + "epoch": 0.98, + "learning_rate": 9.803156323309875e-05, + "loss": 4.1184, + "step": 4615 + }, + { + "epoch": 0.99, + "learning_rate": 9.80294305822137e-05, + "loss": 4.0873, + "step": 4620 + }, + { + "epoch": 0.99, + "learning_rate": 9.802729793132864e-05, + "loss": 4.0269, + "step": 4625 + }, + { + "epoch": 0.99, + "learning_rate": 9.80251652804436e-05, + "loss": 4.0547, + "step": 4630 + }, + { + "epoch": 0.99, + "learning_rate": 9.802303262955854e-05, + "loss": 4.0636, + "step": 4635 + }, + { + "epoch": 0.99, + "learning_rate": 9.80208999786735e-05, + "loss": 4.0391, + "step": 4640 + }, + { + "epoch": 0.99, + "learning_rate": 9.801876732778844e-05, + "loss": 4.0591, + "step": 4645 + }, + { + "epoch": 0.99, + "learning_rate": 9.801663467690339e-05, + "loss": 4.0858, + "step": 4650 + }, + { + "epoch": 0.99, + "learning_rate": 9.801450202601835e-05, + "loss": 3.9863, + "step": 4655 + }, + { + "epoch": 0.99, + "learning_rate": 9.80123693751333e-05, + "loss": 4.0796, + "step": 4660 + }, + { + "epoch": 0.99, + "learning_rate": 9.801023672424825e-05, + "loss": 4.0941, + "step": 4665 + }, + { + "epoch": 1.0, + "learning_rate": 9.80081040733632e-05, + "loss": 3.9785, + "step": 4670 + }, + { + "epoch": 1.0, + "learning_rate": 9.800597142247815e-05, + "loss": 4.0741, + "step": 4675 + }, + { + "epoch": 1.0, + "learning_rate": 9.800383877159309e-05, + "loss": 4.0798, + "step": 4680 + }, + { + "epoch": 1.0, + "learning_rate": 9.800170612070805e-05, + "loss": 4.0571, + "step": 4685 + }, + { + "epoch": 1.0, + "learning_rate": 9.799957346982299e-05, + "loss": 4.1389, + "step": 4690 + }, + { + "epoch": 1.0, + "learning_rate": 9.799744081893794e-05, + "loss": 4.0451, + "step": 4695 + }, + { + "epoch": 1.0, + "learning_rate": 9.79953081680529e-05, + "loss": 4.0766, + "step": 4700 + }, + { + "epoch": 1.0, + "learning_rate": 9.799317551716785e-05, + "loss": 4.0352, + "step": 4705 + }, + { + "epoch": 1.0, + "learning_rate": 9.799104286628279e-05, + "loss": 4.034, + "step": 4710 + }, + { + "epoch": 1.01, + "learning_rate": 9.798891021539775e-05, + "loss": 4.1231, + "step": 4715 + }, + { + "epoch": 1.01, + "learning_rate": 9.798677756451269e-05, + "loss": 4.0251, + "step": 4720 + }, + { + "epoch": 1.01, + "learning_rate": 9.798464491362764e-05, + "loss": 4.0337, + "step": 4725 + }, + { + "epoch": 1.01, + "learning_rate": 9.79825122627426e-05, + "loss": 3.9813, + "step": 4730 + }, + { + "epoch": 1.01, + "learning_rate": 9.798037961185754e-05, + "loss": 4.0151, + "step": 4735 + }, + { + "epoch": 1.01, + "learning_rate": 9.79782469609725e-05, + "loss": 4.0088, + "step": 4740 + }, + { + "epoch": 1.01, + "learning_rate": 9.797611431008744e-05, + "loss": 4.0241, + "step": 4745 + }, + { + "epoch": 1.01, + "learning_rate": 9.797398165920239e-05, + "loss": 3.9885, + "step": 4750 + }, + { + "epoch": 1.01, + "learning_rate": 9.797184900831735e-05, + "loss": 4.0564, + "step": 4755 + }, + { + "epoch": 1.01, + "learning_rate": 9.79697163574323e-05, + "loss": 4.1148, + "step": 4760 + }, + { + "epoch": 1.02, + "learning_rate": 9.796758370654724e-05, + "loss": 3.9412, + "step": 4765 + }, + { + "epoch": 1.02, + "learning_rate": 9.79654510556622e-05, + "loss": 3.9033, + "step": 4770 + }, + { + "epoch": 1.02, + "learning_rate": 9.796331840477714e-05, + "loss": 4.1227, + "step": 4775 + }, + { + "epoch": 1.02, + "learning_rate": 9.796118575389209e-05, + "loss": 4.0332, + "step": 4780 + }, + { + "epoch": 1.02, + "learning_rate": 9.795905310300705e-05, + "loss": 4.0095, + "step": 4785 + }, + { + "epoch": 1.02, + "learning_rate": 9.795692045212199e-05, + "loss": 4.0263, + "step": 4790 + }, + { + "epoch": 1.02, + "learning_rate": 9.795478780123694e-05, + "loss": 4.0743, + "step": 4795 + }, + { + "epoch": 1.02, + "learning_rate": 9.79526551503519e-05, + "loss": 4.0627, + "step": 4800 + }, + { + "epoch": 1.02, + "learning_rate": 9.795052249946685e-05, + "loss": 4.0533, + "step": 4805 + }, + { + "epoch": 1.03, + "learning_rate": 9.79483898485818e-05, + "loss": 4.0252, + "step": 4810 + }, + { + "epoch": 1.03, + "learning_rate": 9.794625719769675e-05, + "loss": 3.9966, + "step": 4815 + }, + { + "epoch": 1.03, + "learning_rate": 9.794412454681169e-05, + "loss": 3.9786, + "step": 4820 + }, + { + "epoch": 1.03, + "learning_rate": 9.794199189592664e-05, + "loss": 4.0399, + "step": 4825 + }, + { + "epoch": 1.03, + "learning_rate": 9.793985924504159e-05, + "loss": 3.9489, + "step": 4830 + }, + { + "epoch": 1.03, + "learning_rate": 9.793772659415654e-05, + "loss": 4.1798, + "step": 4835 + }, + { + "epoch": 1.03, + "learning_rate": 9.793559394327148e-05, + "loss": 4.0606, + "step": 4840 + }, + { + "epoch": 1.03, + "learning_rate": 9.793346129238644e-05, + "loss": 4.0424, + "step": 4845 + }, + { + "epoch": 1.03, + "learning_rate": 9.793132864150139e-05, + "loss": 3.984, + "step": 4850 + }, + { + "epoch": 1.04, + "learning_rate": 9.792919599061635e-05, + "loss": 4.0398, + "step": 4855 + }, + { + "epoch": 1.04, + "learning_rate": 9.79270633397313e-05, + "loss": 3.9847, + "step": 4860 + }, + { + "epoch": 1.04, + "learning_rate": 9.792493068884624e-05, + "loss": 4.0735, + "step": 4865 + }, + { + "epoch": 1.04, + "learning_rate": 9.79227980379612e-05, + "loss": 4.0534, + "step": 4870 + }, + { + "epoch": 1.04, + "learning_rate": 9.792066538707614e-05, + "loss": 4.0098, + "step": 4875 + }, + { + "epoch": 1.04, + "learning_rate": 9.791853273619109e-05, + "loss": 4.0024, + "step": 4880 + }, + { + "epoch": 1.04, + "learning_rate": 9.791640008530603e-05, + "loss": 4.0428, + "step": 4885 + }, + { + "epoch": 1.04, + "learning_rate": 9.791426743442099e-05, + "loss": 3.9316, + "step": 4890 + }, + { + "epoch": 1.04, + "learning_rate": 9.791213478353594e-05, + "loss": 4.0487, + "step": 4895 + }, + { + "epoch": 1.04, + "learning_rate": 9.79100021326509e-05, + "loss": 3.9686, + "step": 4900 + }, + { + "epoch": 1.05, + "learning_rate": 9.790786948176584e-05, + "loss": 4.0, + "step": 4905 + }, + { + "epoch": 1.05, + "learning_rate": 9.79057368308808e-05, + "loss": 4.0191, + "step": 4910 + }, + { + "epoch": 1.05, + "learning_rate": 9.790360417999575e-05, + "loss": 3.8967, + "step": 4915 + }, + { + "epoch": 1.05, + "learning_rate": 9.790147152911069e-05, + "loss": 4.0222, + "step": 4920 + }, + { + "epoch": 1.05, + "learning_rate": 9.789933887822564e-05, + "loss": 3.9386, + "step": 4925 + }, + { + "epoch": 1.05, + "learning_rate": 9.789720622734059e-05, + "loss": 4.0404, + "step": 4930 + }, + { + "epoch": 1.05, + "learning_rate": 9.789507357645554e-05, + "loss": 4.066, + "step": 4935 + }, + { + "epoch": 1.05, + "learning_rate": 9.789294092557048e-05, + "loss": 3.9719, + "step": 4940 + }, + { + "epoch": 1.05, + "learning_rate": 9.789080827468544e-05, + "loss": 4.0497, + "step": 4945 + }, + { + "epoch": 1.06, + "learning_rate": 9.788867562380039e-05, + "loss": 4.0181, + "step": 4950 + }, + { + "epoch": 1.06, + "learning_rate": 9.788654297291535e-05, + "loss": 4.0261, + "step": 4955 + }, + { + "epoch": 1.06, + "learning_rate": 9.788441032203029e-05, + "loss": 4.0205, + "step": 4960 + }, + { + "epoch": 1.06, + "learning_rate": 9.788227767114524e-05, + "loss": 3.9148, + "step": 4965 + }, + { + "epoch": 1.06, + "learning_rate": 9.788014502026018e-05, + "loss": 3.9803, + "step": 4970 + }, + { + "epoch": 1.06, + "learning_rate": 9.787801236937514e-05, + "loss": 3.9548, + "step": 4975 + }, + { + "epoch": 1.06, + "learning_rate": 9.787587971849009e-05, + "loss": 4.0615, + "step": 4980 + }, + { + "epoch": 1.06, + "learning_rate": 9.787374706760503e-05, + "loss": 3.9432, + "step": 4985 + }, + { + "epoch": 1.06, + "learning_rate": 9.787161441671999e-05, + "loss": 4.1357, + "step": 4990 + }, + { + "epoch": 1.07, + "learning_rate": 9.786948176583494e-05, + "loss": 3.9741, + "step": 4995 + }, + { + "epoch": 1.07, + "learning_rate": 9.78673491149499e-05, + "loss": 3.9806, + "step": 5000 + }, + { + "epoch": 1.07, + "learning_rate": 9.786521646406484e-05, + "loss": 3.9904, + "step": 5005 + }, + { + "epoch": 1.07, + "learning_rate": 9.78630838131798e-05, + "loss": 3.9437, + "step": 5010 + }, + { + "epoch": 1.07, + "learning_rate": 9.786095116229473e-05, + "loss": 4.0055, + "step": 5015 + }, + { + "epoch": 1.07, + "learning_rate": 9.785881851140969e-05, + "loss": 4.0632, + "step": 5020 + }, + { + "epoch": 1.07, + "learning_rate": 9.785668586052463e-05, + "loss": 4.0871, + "step": 5025 + }, + { + "epoch": 1.07, + "learning_rate": 9.785455320963959e-05, + "loss": 4.0062, + "step": 5030 + }, + { + "epoch": 1.07, + "learning_rate": 9.785242055875453e-05, + "loss": 4.1021, + "step": 5035 + }, + { + "epoch": 1.07, + "learning_rate": 9.785028790786948e-05, + "loss": 4.0405, + "step": 5040 + }, + { + "epoch": 1.08, + "learning_rate": 9.784815525698444e-05, + "loss": 4.0284, + "step": 5045 + }, + { + "epoch": 1.08, + "learning_rate": 9.784602260609939e-05, + "loss": 3.9969, + "step": 5050 + }, + { + "epoch": 1.08, + "learning_rate": 9.784388995521435e-05, + "loss": 4.054, + "step": 5055 + }, + { + "epoch": 1.08, + "learning_rate": 9.784175730432929e-05, + "loss": 4.0135, + "step": 5060 + }, + { + "epoch": 1.08, + "learning_rate": 9.783962465344424e-05, + "loss": 4.114, + "step": 5065 + }, + { + "epoch": 1.08, + "learning_rate": 9.783749200255918e-05, + "loss": 3.9327, + "step": 5070 + }, + { + "epoch": 1.08, + "learning_rate": 9.783535935167414e-05, + "loss": 4.0121, + "step": 5075 + }, + { + "epoch": 1.08, + "learning_rate": 9.783322670078908e-05, + "loss": 3.981, + "step": 5080 + }, + { + "epoch": 1.08, + "learning_rate": 9.783109404990403e-05, + "loss": 3.9656, + "step": 5085 + }, + { + "epoch": 1.09, + "learning_rate": 9.782896139901899e-05, + "loss": 3.9937, + "step": 5090 + }, + { + "epoch": 1.09, + "learning_rate": 9.782682874813394e-05, + "loss": 4.0654, + "step": 5095 + }, + { + "epoch": 1.09, + "learning_rate": 9.782469609724888e-05, + "loss": 4.0631, + "step": 5100 + }, + { + "epoch": 1.09, + "learning_rate": 9.782256344636384e-05, + "loss": 3.985, + "step": 5105 + }, + { + "epoch": 1.09, + "learning_rate": 9.78204307954788e-05, + "loss": 3.9168, + "step": 5110 + }, + { + "epoch": 1.09, + "learning_rate": 9.781829814459373e-05, + "loss": 4.0774, + "step": 5115 + }, + { + "epoch": 1.09, + "learning_rate": 9.781616549370869e-05, + "loss": 3.9991, + "step": 5120 + }, + { + "epoch": 1.09, + "learning_rate": 9.781403284282363e-05, + "loss": 3.9888, + "step": 5125 + }, + { + "epoch": 1.09, + "learning_rate": 9.781190019193859e-05, + "loss": 3.9953, + "step": 5130 + }, + { + "epoch": 1.09, + "learning_rate": 9.780976754105353e-05, + "loss": 3.9908, + "step": 5135 + }, + { + "epoch": 1.1, + "learning_rate": 9.780763489016848e-05, + "loss": 4.0206, + "step": 5140 + }, + { + "epoch": 1.1, + "learning_rate": 9.780550223928344e-05, + "loss": 3.9307, + "step": 5145 + }, + { + "epoch": 1.1, + "learning_rate": 9.780336958839839e-05, + "loss": 3.963, + "step": 5150 + }, + { + "epoch": 1.1, + "learning_rate": 9.780123693751333e-05, + "loss": 4.0101, + "step": 5155 + }, + { + "epoch": 1.1, + "learning_rate": 9.779910428662829e-05, + "loss": 3.9811, + "step": 5160 + }, + { + "epoch": 1.1, + "learning_rate": 9.779697163574323e-05, + "loss": 4.0322, + "step": 5165 + }, + { + "epoch": 1.1, + "learning_rate": 9.779483898485818e-05, + "loss": 3.9802, + "step": 5170 + }, + { + "epoch": 1.1, + "learning_rate": 9.779270633397314e-05, + "loss": 4.0302, + "step": 5175 + }, + { + "epoch": 1.1, + "learning_rate": 9.779057368308808e-05, + "loss": 3.8904, + "step": 5180 + }, + { + "epoch": 1.11, + "learning_rate": 9.778844103220303e-05, + "loss": 4.1, + "step": 5185 + }, + { + "epoch": 1.11, + "learning_rate": 9.778630838131799e-05, + "loss": 4.0124, + "step": 5190 + }, + { + "epoch": 1.11, + "learning_rate": 9.778417573043294e-05, + "loss": 3.9944, + "step": 5195 + }, + { + "epoch": 1.11, + "learning_rate": 9.778204307954788e-05, + "loss": 4.0364, + "step": 5200 + }, + { + "epoch": 1.11, + "learning_rate": 9.777991042866284e-05, + "loss": 4.0015, + "step": 5205 + }, + { + "epoch": 1.11, + "learning_rate": 9.777777777777778e-05, + "loss": 4.0459, + "step": 5210 + }, + { + "epoch": 1.11, + "learning_rate": 9.777564512689273e-05, + "loss": 3.9391, + "step": 5215 + }, + { + "epoch": 1.11, + "learning_rate": 9.777351247600768e-05, + "loss": 3.9457, + "step": 5220 + }, + { + "epoch": 1.11, + "learning_rate": 9.777137982512263e-05, + "loss": 4.0463, + "step": 5225 + }, + { + "epoch": 1.12, + "learning_rate": 9.776924717423757e-05, + "loss": 3.9905, + "step": 5230 + }, + { + "epoch": 1.12, + "learning_rate": 9.776711452335253e-05, + "loss": 4.0013, + "step": 5235 + }, + { + "epoch": 1.12, + "learning_rate": 9.776498187246748e-05, + "loss": 3.9897, + "step": 5240 + }, + { + "epoch": 1.12, + "learning_rate": 9.776284922158244e-05, + "loss": 4.0325, + "step": 5245 + }, + { + "epoch": 1.12, + "learning_rate": 9.776071657069739e-05, + "loss": 3.965, + "step": 5250 + }, + { + "epoch": 1.12, + "learning_rate": 9.775858391981233e-05, + "loss": 4.0412, + "step": 5255 + }, + { + "epoch": 1.12, + "learning_rate": 9.775645126892729e-05, + "loss": 3.9602, + "step": 5260 + }, + { + "epoch": 1.12, + "learning_rate": 9.775431861804223e-05, + "loss": 4.1305, + "step": 5265 + }, + { + "epoch": 1.12, + "learning_rate": 9.775218596715718e-05, + "loss": 3.9748, + "step": 5270 + }, + { + "epoch": 1.12, + "learning_rate": 9.775005331627212e-05, + "loss": 4.0559, + "step": 5275 + }, + { + "epoch": 1.13, + "learning_rate": 9.774792066538708e-05, + "loss": 3.9368, + "step": 5280 + }, + { + "epoch": 1.13, + "learning_rate": 9.774578801450203e-05, + "loss": 4.027, + "step": 5285 + }, + { + "epoch": 1.13, + "learning_rate": 9.774365536361699e-05, + "loss": 3.9226, + "step": 5290 + }, + { + "epoch": 1.13, + "learning_rate": 9.774152271273193e-05, + "loss": 4.0619, + "step": 5295 + }, + { + "epoch": 1.13, + "learning_rate": 9.773939006184688e-05, + "loss": 3.9517, + "step": 5300 + }, + { + "epoch": 1.13, + "learning_rate": 9.773725741096184e-05, + "loss": 4.0764, + "step": 5305 + }, + { + "epoch": 1.13, + "learning_rate": 9.773512476007678e-05, + "loss": 4.0074, + "step": 5310 + }, + { + "epoch": 1.13, + "learning_rate": 9.773299210919173e-05, + "loss": 4.0077, + "step": 5315 + }, + { + "epoch": 1.13, + "learning_rate": 9.773085945830668e-05, + "loss": 3.9018, + "step": 5320 + }, + { + "epoch": 1.14, + "learning_rate": 9.772872680742163e-05, + "loss": 3.9531, + "step": 5325 + }, + { + "epoch": 1.14, + "learning_rate": 9.772659415653657e-05, + "loss": 4.0914, + "step": 5330 + }, + { + "epoch": 1.14, + "learning_rate": 9.772446150565153e-05, + "loss": 3.9734, + "step": 5335 + }, + { + "epoch": 1.14, + "learning_rate": 9.772232885476648e-05, + "loss": 4.063, + "step": 5340 + }, + { + "epoch": 1.14, + "learning_rate": 9.772019620388144e-05, + "loss": 4.0108, + "step": 5345 + }, + { + "epoch": 1.14, + "learning_rate": 9.771806355299638e-05, + "loss": 4.1145, + "step": 5350 + }, + { + "epoch": 1.14, + "learning_rate": 9.771593090211133e-05, + "loss": 3.965, + "step": 5355 + }, + { + "epoch": 1.14, + "learning_rate": 9.771379825122627e-05, + "loss": 3.9633, + "step": 5360 + }, + { + "epoch": 1.14, + "learning_rate": 9.771166560034123e-05, + "loss": 3.8689, + "step": 5365 + }, + { + "epoch": 1.15, + "learning_rate": 9.770953294945618e-05, + "loss": 3.9957, + "step": 5370 + }, + { + "epoch": 1.15, + "learning_rate": 9.770740029857112e-05, + "loss": 3.9757, + "step": 5375 + }, + { + "epoch": 1.15, + "learning_rate": 9.770526764768608e-05, + "loss": 4.0402, + "step": 5380 + }, + { + "epoch": 1.15, + "learning_rate": 9.770313499680103e-05, + "loss": 3.9377, + "step": 5385 + }, + { + "epoch": 1.15, + "learning_rate": 9.770100234591599e-05, + "loss": 3.9933, + "step": 5390 + }, + { + "epoch": 1.15, + "learning_rate": 9.769886969503093e-05, + "loss": 3.9787, + "step": 5395 + }, + { + "epoch": 1.15, + "learning_rate": 9.769673704414588e-05, + "loss": 4.0208, + "step": 5400 + }, + { + "epoch": 1.15, + "learning_rate": 9.769460439326083e-05, + "loss": 4.0543, + "step": 5405 + }, + { + "epoch": 1.15, + "learning_rate": 9.769247174237578e-05, + "loss": 4.0265, + "step": 5410 + }, + { + "epoch": 1.15, + "learning_rate": 9.769033909149072e-05, + "loss": 4.0181, + "step": 5415 + }, + { + "epoch": 1.16, + "learning_rate": 9.768820644060568e-05, + "loss": 4.0528, + "step": 5420 + }, + { + "epoch": 1.16, + "learning_rate": 9.768607378972062e-05, + "loss": 4.0293, + "step": 5425 + }, + { + "epoch": 1.16, + "learning_rate": 9.768394113883557e-05, + "loss": 3.9797, + "step": 5430 + }, + { + "epoch": 1.16, + "learning_rate": 9.768180848795053e-05, + "loss": 4.0331, + "step": 5435 + }, + { + "epoch": 1.16, + "learning_rate": 9.767967583706548e-05, + "loss": 3.9143, + "step": 5440 + }, + { + "epoch": 1.16, + "learning_rate": 9.767754318618044e-05, + "loss": 4.0379, + "step": 5445 + }, + { + "epoch": 1.16, + "learning_rate": 9.767541053529538e-05, + "loss": 3.9779, + "step": 5450 + }, + { + "epoch": 1.16, + "learning_rate": 9.767327788441033e-05, + "loss": 3.9843, + "step": 5455 + }, + { + "epoch": 1.16, + "learning_rate": 9.767114523352527e-05, + "loss": 4.0985, + "step": 5460 + }, + { + "epoch": 1.17, + "learning_rate": 9.766901258264023e-05, + "loss": 4.0878, + "step": 5465 + }, + { + "epoch": 1.17, + "learning_rate": 9.766687993175517e-05, + "loss": 3.9589, + "step": 5470 + }, + { + "epoch": 1.17, + "learning_rate": 9.766474728087012e-05, + "loss": 4.0069, + "step": 5475 + }, + { + "epoch": 1.17, + "learning_rate": 9.766261462998508e-05, + "loss": 4.0183, + "step": 5480 + }, + { + "epoch": 1.17, + "learning_rate": 9.766048197910003e-05, + "loss": 4.0204, + "step": 5485 + }, + { + "epoch": 1.17, + "learning_rate": 9.765834932821497e-05, + "loss": 4.027, + "step": 5490 + }, + { + "epoch": 1.17, + "learning_rate": 9.765621667732993e-05, + "loss": 4.0197, + "step": 5495 + }, + { + "epoch": 1.17, + "learning_rate": 9.765408402644488e-05, + "loss": 3.882, + "step": 5500 + }, + { + "epoch": 1.17, + "learning_rate": 9.765195137555983e-05, + "loss": 4.0029, + "step": 5505 + }, + { + "epoch": 1.17, + "learning_rate": 9.764981872467478e-05, + "loss": 3.9696, + "step": 5510 + }, + { + "epoch": 1.18, + "learning_rate": 9.764768607378972e-05, + "loss": 4.0449, + "step": 5515 + }, + { + "epoch": 1.18, + "learning_rate": 9.764555342290468e-05, + "loss": 4.0341, + "step": 5520 + }, + { + "epoch": 1.18, + "learning_rate": 9.764342077201962e-05, + "loss": 3.9208, + "step": 5525 + }, + { + "epoch": 1.18, + "learning_rate": 9.764128812113457e-05, + "loss": 4.0587, + "step": 5530 + }, + { + "epoch": 1.18, + "learning_rate": 9.763915547024953e-05, + "loss": 4.0061, + "step": 5535 + }, + { + "epoch": 1.18, + "learning_rate": 9.763702281936448e-05, + "loss": 3.9587, + "step": 5540 + }, + { + "epoch": 1.18, + "learning_rate": 9.763489016847942e-05, + "loss": 4.0801, + "step": 5545 + }, + { + "epoch": 1.18, + "learning_rate": 9.763275751759438e-05, + "loss": 4.0097, + "step": 5550 + }, + { + "epoch": 1.18, + "learning_rate": 9.763062486670932e-05, + "loss": 3.9811, + "step": 5555 + }, + { + "epoch": 1.19, + "learning_rate": 9.762849221582427e-05, + "loss": 4.0808, + "step": 5560 + }, + { + "epoch": 1.19, + "learning_rate": 9.762635956493923e-05, + "loss": 4.0921, + "step": 5565 + }, + { + "epoch": 1.19, + "learning_rate": 9.762422691405417e-05, + "loss": 3.8761, + "step": 5570 + }, + { + "epoch": 1.19, + "learning_rate": 9.762209426316912e-05, + "loss": 3.9694, + "step": 5575 + }, + { + "epoch": 1.19, + "learning_rate": 9.761996161228408e-05, + "loss": 3.9674, + "step": 5580 + }, + { + "epoch": 1.19, + "learning_rate": 9.761782896139903e-05, + "loss": 4.0061, + "step": 5585 + }, + { + "epoch": 1.19, + "learning_rate": 9.761569631051397e-05, + "loss": 3.9304, + "step": 5590 + }, + { + "epoch": 1.19, + "learning_rate": 9.761356365962893e-05, + "loss": 3.9911, + "step": 5595 + }, + { + "epoch": 1.19, + "learning_rate": 9.761143100874387e-05, + "loss": 3.9715, + "step": 5600 + }, + { + "epoch": 1.2, + "learning_rate": 9.760929835785883e-05, + "loss": 3.9093, + "step": 5605 + }, + { + "epoch": 1.2, + "learning_rate": 9.760716570697377e-05, + "loss": 3.9688, + "step": 5610 + }, + { + "epoch": 1.2, + "learning_rate": 9.760503305608872e-05, + "loss": 3.9166, + "step": 5615 + }, + { + "epoch": 1.2, + "learning_rate": 9.760290040520366e-05, + "loss": 4.1225, + "step": 5620 + }, + { + "epoch": 1.2, + "learning_rate": 9.760076775431862e-05, + "loss": 4.0253, + "step": 5625 + }, + { + "epoch": 1.2, + "learning_rate": 9.759863510343357e-05, + "loss": 3.9521, + "step": 5630 + }, + { + "epoch": 1.2, + "learning_rate": 9.759650245254853e-05, + "loss": 4.0724, + "step": 5635 + }, + { + "epoch": 1.2, + "learning_rate": 9.759436980166348e-05, + "loss": 4.0403, + "step": 5640 + }, + { + "epoch": 1.2, + "learning_rate": 9.759223715077842e-05, + "loss": 4.006, + "step": 5645 + }, + { + "epoch": 1.2, + "learning_rate": 9.759010449989338e-05, + "loss": 3.9746, + "step": 5650 + }, + { + "epoch": 1.21, + "learning_rate": 9.758797184900832e-05, + "loss": 4.0022, + "step": 5655 + }, + { + "epoch": 1.21, + "learning_rate": 9.758583919812327e-05, + "loss": 4.0028, + "step": 5660 + }, + { + "epoch": 1.21, + "learning_rate": 9.758370654723821e-05, + "loss": 4.0601, + "step": 5665 + }, + { + "epoch": 1.21, + "learning_rate": 9.758157389635317e-05, + "loss": 3.9339, + "step": 5670 + }, + { + "epoch": 1.21, + "learning_rate": 9.757944124546812e-05, + "loss": 3.9979, + "step": 5675 + }, + { + "epoch": 1.21, + "learning_rate": 9.757730859458308e-05, + "loss": 3.917, + "step": 5680 + }, + { + "epoch": 1.21, + "learning_rate": 9.757517594369802e-05, + "loss": 3.9101, + "step": 5685 + }, + { + "epoch": 1.21, + "learning_rate": 9.757304329281297e-05, + "loss": 3.9277, + "step": 5690 + }, + { + "epoch": 1.21, + "learning_rate": 9.757091064192793e-05, + "loss": 3.9479, + "step": 5695 + }, + { + "epoch": 1.22, + "learning_rate": 9.756877799104287e-05, + "loss": 3.9229, + "step": 5700 + }, + { + "epoch": 1.22, + "learning_rate": 9.756664534015783e-05, + "loss": 3.9866, + "step": 5705 + }, + { + "epoch": 1.22, + "learning_rate": 9.756451268927277e-05, + "loss": 4.0105, + "step": 5710 + }, + { + "epoch": 1.22, + "learning_rate": 9.756238003838772e-05, + "loss": 4.0302, + "step": 5715 + }, + { + "epoch": 1.22, + "learning_rate": 9.756024738750266e-05, + "loss": 3.9889, + "step": 5720 + }, + { + "epoch": 1.22, + "learning_rate": 9.755811473661762e-05, + "loss": 4.0629, + "step": 5725 + }, + { + "epoch": 1.22, + "learning_rate": 9.755598208573257e-05, + "loss": 3.9892, + "step": 5730 + }, + { + "epoch": 1.22, + "learning_rate": 9.755384943484753e-05, + "loss": 3.9448, + "step": 5735 + }, + { + "epoch": 1.22, + "learning_rate": 9.755171678396247e-05, + "loss": 3.9498, + "step": 5740 + }, + { + "epoch": 1.23, + "learning_rate": 9.754958413307742e-05, + "loss": 4.0775, + "step": 5745 + }, + { + "epoch": 1.23, + "learning_rate": 9.754745148219236e-05, + "loss": 3.977, + "step": 5750 + }, + { + "epoch": 1.23, + "learning_rate": 9.754531883130732e-05, + "loss": 3.9594, + "step": 5755 + }, + { + "epoch": 1.23, + "learning_rate": 9.754318618042227e-05, + "loss": 4.0098, + "step": 5760 + }, + { + "epoch": 1.23, + "learning_rate": 9.754105352953721e-05, + "loss": 4.0366, + "step": 5765 + }, + { + "epoch": 1.23, + "learning_rate": 9.753892087865217e-05, + "loss": 3.9989, + "step": 5770 + }, + { + "epoch": 1.23, + "learning_rate": 9.753678822776712e-05, + "loss": 4.0615, + "step": 5775 + }, + { + "epoch": 1.23, + "learning_rate": 9.753465557688208e-05, + "loss": 3.935, + "step": 5780 + }, + { + "epoch": 1.23, + "learning_rate": 9.753252292599702e-05, + "loss": 4.0137, + "step": 5785 + }, + { + "epoch": 1.23, + "learning_rate": 9.753039027511197e-05, + "loss": 4.015, + "step": 5790 + }, + { + "epoch": 1.24, + "learning_rate": 9.752825762422692e-05, + "loss": 4.0238, + "step": 5795 + }, + { + "epoch": 1.24, + "learning_rate": 9.752612497334187e-05, + "loss": 3.9634, + "step": 5800 + }, + { + "epoch": 1.24, + "learning_rate": 9.752399232245681e-05, + "loss": 3.8972, + "step": 5805 + }, + { + "epoch": 1.24, + "learning_rate": 9.752185967157177e-05, + "loss": 3.911, + "step": 5810 + }, + { + "epoch": 1.24, + "learning_rate": 9.751972702068671e-05, + "loss": 3.9551, + "step": 5815 + }, + { + "epoch": 1.24, + "learning_rate": 9.751759436980166e-05, + "loss": 3.9579, + "step": 5820 + }, + { + "epoch": 1.24, + "learning_rate": 9.751546171891662e-05, + "loss": 4.0427, + "step": 5825 + }, + { + "epoch": 1.24, + "learning_rate": 9.751332906803157e-05, + "loss": 3.9728, + "step": 5830 + }, + { + "epoch": 1.24, + "learning_rate": 9.751119641714653e-05, + "loss": 3.938, + "step": 5835 + }, + { + "epoch": 1.25, + "learning_rate": 9.750906376626147e-05, + "loss": 3.99, + "step": 5840 + }, + { + "epoch": 1.25, + "learning_rate": 9.750693111537642e-05, + "loss": 4.0078, + "step": 5845 + }, + { + "epoch": 1.25, + "learning_rate": 9.750479846449136e-05, + "loss": 3.9465, + "step": 5850 + }, + { + "epoch": 1.25, + "learning_rate": 9.750266581360632e-05, + "loss": 3.9558, + "step": 5855 + }, + { + "epoch": 1.25, + "learning_rate": 9.750053316272126e-05, + "loss": 4.0207, + "step": 5860 + }, + { + "epoch": 1.25, + "learning_rate": 9.749840051183621e-05, + "loss": 3.9998, + "step": 5865 + }, + { + "epoch": 1.25, + "learning_rate": 9.749626786095117e-05, + "loss": 3.9949, + "step": 5870 + }, + { + "epoch": 1.25, + "learning_rate": 9.749413521006612e-05, + "loss": 4.1192, + "step": 5875 + }, + { + "epoch": 1.25, + "learning_rate": 9.749200255918106e-05, + "loss": 4.0863, + "step": 5880 + }, + { + "epoch": 1.25, + "learning_rate": 9.748986990829602e-05, + "loss": 3.9634, + "step": 5885 + }, + { + "epoch": 1.26, + "learning_rate": 9.748773725741097e-05, + "loss": 4.0147, + "step": 5890 + }, + { + "epoch": 1.26, + "learning_rate": 9.748560460652592e-05, + "loss": 3.9261, + "step": 5895 + }, + { + "epoch": 1.26, + "learning_rate": 9.748347195564087e-05, + "loss": 3.9583, + "step": 5900 + }, + { + "epoch": 1.26, + "learning_rate": 9.748133930475581e-05, + "loss": 3.9653, + "step": 5905 + }, + { + "epoch": 1.26, + "learning_rate": 9.747920665387077e-05, + "loss": 3.9179, + "step": 5910 + }, + { + "epoch": 1.26, + "learning_rate": 9.747707400298571e-05, + "loss": 4.0832, + "step": 5915 + }, + { + "epoch": 1.26, + "learning_rate": 9.747494135210066e-05, + "loss": 3.9294, + "step": 5920 + }, + { + "epoch": 1.26, + "learning_rate": 9.747280870121562e-05, + "loss": 3.9206, + "step": 5925 + }, + { + "epoch": 1.26, + "learning_rate": 9.747067605033057e-05, + "loss": 4.0002, + "step": 5930 + }, + { + "epoch": 1.27, + "learning_rate": 9.746854339944551e-05, + "loss": 3.9828, + "step": 5935 + }, + { + "epoch": 1.27, + "learning_rate": 9.746641074856047e-05, + "loss": 3.9561, + "step": 5940 + }, + { + "epoch": 1.27, + "learning_rate": 9.746427809767541e-05, + "loss": 4.0734, + "step": 5945 + }, + { + "epoch": 1.27, + "learning_rate": 9.746214544679036e-05, + "loss": 3.9524, + "step": 5950 + }, + { + "epoch": 1.27, + "learning_rate": 9.746001279590532e-05, + "loss": 3.9586, + "step": 5955 + }, + { + "epoch": 1.27, + "learning_rate": 9.745788014502026e-05, + "loss": 3.9722, + "step": 5960 + }, + { + "epoch": 1.27, + "learning_rate": 9.745574749413521e-05, + "loss": 3.9812, + "step": 5965 + }, + { + "epoch": 1.27, + "learning_rate": 9.745361484325017e-05, + "loss": 3.9916, + "step": 5970 + }, + { + "epoch": 1.27, + "learning_rate": 9.745148219236512e-05, + "loss": 4.072, + "step": 5975 + }, + { + "epoch": 1.28, + "learning_rate": 9.744934954148006e-05, + "loss": 3.9842, + "step": 5980 + }, + { + "epoch": 1.28, + "learning_rate": 9.744721689059502e-05, + "loss": 3.938, + "step": 5985 + }, + { + "epoch": 1.28, + "learning_rate": 9.744508423970996e-05, + "loss": 4.1343, + "step": 5990 + }, + { + "epoch": 1.28, + "learning_rate": 9.744295158882492e-05, + "loss": 3.9017, + "step": 5995 + }, + { + "epoch": 1.28, + "learning_rate": 9.744081893793986e-05, + "loss": 3.8578, + "step": 6000 + }, + { + "epoch": 1.28, + "learning_rate": 9.743868628705481e-05, + "loss": 3.94, + "step": 6005 + }, + { + "epoch": 1.28, + "learning_rate": 9.743655363616975e-05, + "loss": 3.9782, + "step": 6010 + }, + { + "epoch": 1.28, + "learning_rate": 9.743442098528471e-05, + "loss": 3.9211, + "step": 6015 + }, + { + "epoch": 1.28, + "learning_rate": 9.743228833439966e-05, + "loss": 3.9576, + "step": 6020 + }, + { + "epoch": 1.28, + "learning_rate": 9.743015568351462e-05, + "loss": 4.0611, + "step": 6025 + }, + { + "epoch": 1.29, + "learning_rate": 9.742802303262957e-05, + "loss": 4.0018, + "step": 6030 + }, + { + "epoch": 1.29, + "learning_rate": 9.742589038174451e-05, + "loss": 3.8432, + "step": 6035 + }, + { + "epoch": 1.29, + "learning_rate": 9.742375773085947e-05, + "loss": 3.9131, + "step": 6040 + }, + { + "epoch": 1.29, + "learning_rate": 9.742162507997441e-05, + "loss": 4.0715, + "step": 6045 + }, + { + "epoch": 1.29, + "learning_rate": 9.741949242908936e-05, + "loss": 3.9914, + "step": 6050 + }, + { + "epoch": 1.29, + "learning_rate": 9.74173597782043e-05, + "loss": 3.8975, + "step": 6055 + }, + { + "epoch": 1.29, + "learning_rate": 9.741522712731926e-05, + "loss": 3.9765, + "step": 6060 + }, + { + "epoch": 1.29, + "learning_rate": 9.741309447643421e-05, + "loss": 4.0488, + "step": 6065 + }, + { + "epoch": 1.29, + "learning_rate": 9.741096182554917e-05, + "loss": 3.9656, + "step": 6070 + }, + { + "epoch": 1.3, + "learning_rate": 9.740882917466411e-05, + "loss": 4.0355, + "step": 6075 + }, + { + "epoch": 1.3, + "learning_rate": 9.740669652377907e-05, + "loss": 4.0277, + "step": 6080 + }, + { + "epoch": 1.3, + "learning_rate": 9.740456387289402e-05, + "loss": 3.9381, + "step": 6085 + }, + { + "epoch": 1.3, + "learning_rate": 9.740243122200896e-05, + "loss": 3.9785, + "step": 6090 + }, + { + "epoch": 1.3, + "learning_rate": 9.740029857112392e-05, + "loss": 3.8608, + "step": 6095 + }, + { + "epoch": 1.3, + "learning_rate": 9.739816592023886e-05, + "loss": 4.0212, + "step": 6100 + }, + { + "epoch": 1.3, + "learning_rate": 9.739603326935381e-05, + "loss": 3.8705, + "step": 6105 + }, + { + "epoch": 1.3, + "learning_rate": 9.739390061846875e-05, + "loss": 4.0517, + "step": 6110 + }, + { + "epoch": 1.3, + "learning_rate": 9.739176796758371e-05, + "loss": 3.9316, + "step": 6115 + }, + { + "epoch": 1.3, + "learning_rate": 9.738963531669866e-05, + "loss": 3.8944, + "step": 6120 + }, + { + "epoch": 1.31, + "learning_rate": 9.738750266581362e-05, + "loss": 3.9119, + "step": 6125 + }, + { + "epoch": 1.31, + "learning_rate": 9.738537001492856e-05, + "loss": 3.942, + "step": 6130 + }, + { + "epoch": 1.31, + "learning_rate": 9.738323736404351e-05, + "loss": 4.0059, + "step": 6135 + }, + { + "epoch": 1.31, + "learning_rate": 9.738110471315845e-05, + "loss": 4.0799, + "step": 6140 + }, + { + "epoch": 1.31, + "learning_rate": 9.737897206227341e-05, + "loss": 3.9049, + "step": 6145 + }, + { + "epoch": 1.31, + "learning_rate": 9.737683941138836e-05, + "loss": 3.9992, + "step": 6150 + }, + { + "epoch": 1.31, + "learning_rate": 9.73747067605033e-05, + "loss": 3.9884, + "step": 6155 + }, + { + "epoch": 1.31, + "learning_rate": 9.737257410961826e-05, + "loss": 4.0387, + "step": 6160 + }, + { + "epoch": 1.31, + "learning_rate": 9.737044145873321e-05, + "loss": 3.9519, + "step": 6165 + }, + { + "epoch": 1.32, + "learning_rate": 9.736830880784817e-05, + "loss": 4.0121, + "step": 6170 + }, + { + "epoch": 1.32, + "learning_rate": 9.736617615696311e-05, + "loss": 4.0136, + "step": 6175 + }, + { + "epoch": 1.32, + "learning_rate": 9.736404350607807e-05, + "loss": 3.9673, + "step": 6180 + }, + { + "epoch": 1.32, + "learning_rate": 9.7361910855193e-05, + "loss": 3.971, + "step": 6185 + }, + { + "epoch": 1.32, + "learning_rate": 9.735977820430796e-05, + "loss": 4.0319, + "step": 6190 + }, + { + "epoch": 1.32, + "learning_rate": 9.73576455534229e-05, + "loss": 3.9679, + "step": 6195 + }, + { + "epoch": 1.32, + "learning_rate": 9.735551290253786e-05, + "loss": 3.9192, + "step": 6200 + }, + { + "epoch": 1.32, + "learning_rate": 9.73533802516528e-05, + "loss": 3.9561, + "step": 6205 + }, + { + "epoch": 1.32, + "learning_rate": 9.735124760076775e-05, + "loss": 3.9399, + "step": 6210 + }, + { + "epoch": 1.33, + "learning_rate": 9.734911494988271e-05, + "loss": 3.9983, + "step": 6215 + }, + { + "epoch": 1.33, + "learning_rate": 9.734698229899766e-05, + "loss": 4.0306, + "step": 6220 + }, + { + "epoch": 1.33, + "learning_rate": 9.734484964811262e-05, + "loss": 3.9713, + "step": 6225 + }, + { + "epoch": 1.33, + "learning_rate": 9.734271699722756e-05, + "loss": 4.0409, + "step": 6230 + }, + { + "epoch": 1.33, + "learning_rate": 9.734058434634251e-05, + "loss": 4.0486, + "step": 6235 + }, + { + "epoch": 1.33, + "learning_rate": 9.733845169545745e-05, + "loss": 3.9206, + "step": 6240 + }, + { + "epoch": 1.33, + "learning_rate": 9.733631904457241e-05, + "loss": 3.9002, + "step": 6245 + }, + { + "epoch": 1.33, + "learning_rate": 9.733418639368735e-05, + "loss": 4.0133, + "step": 6250 + }, + { + "epoch": 1.33, + "learning_rate": 9.73320537428023e-05, + "loss": 4.0602, + "step": 6255 + }, + { + "epoch": 1.33, + "learning_rate": 9.732992109191726e-05, + "loss": 3.931, + "step": 6260 + }, + { + "epoch": 1.34, + "learning_rate": 9.732778844103221e-05, + "loss": 4.0271, + "step": 6265 + }, + { + "epoch": 1.34, + "learning_rate": 9.732565579014716e-05, + "loss": 3.8892, + "step": 6270 + }, + { + "epoch": 1.34, + "learning_rate": 9.732352313926211e-05, + "loss": 3.996, + "step": 6275 + }, + { + "epoch": 1.34, + "learning_rate": 9.732139048837707e-05, + "loss": 3.9151, + "step": 6280 + }, + { + "epoch": 1.34, + "learning_rate": 9.7319257837492e-05, + "loss": 3.9578, + "step": 6285 + }, + { + "epoch": 1.34, + "learning_rate": 9.731712518660696e-05, + "loss": 4.057, + "step": 6290 + }, + { + "epoch": 1.34, + "learning_rate": 9.73149925357219e-05, + "loss": 3.991, + "step": 6295 + }, + { + "epoch": 1.34, + "learning_rate": 9.731285988483686e-05, + "loss": 4.0351, + "step": 6300 + }, + { + "epoch": 1.34, + "learning_rate": 9.73107272339518e-05, + "loss": 3.9629, + "step": 6305 + }, + { + "epoch": 1.35, + "learning_rate": 9.730859458306675e-05, + "loss": 4.0369, + "step": 6310 + }, + { + "epoch": 1.35, + "learning_rate": 9.730646193218171e-05, + "loss": 4.0343, + "step": 6315 + }, + { + "epoch": 1.35, + "learning_rate": 9.730432928129666e-05, + "loss": 3.992, + "step": 6320 + }, + { + "epoch": 1.35, + "learning_rate": 9.73021966304116e-05, + "loss": 4.0402, + "step": 6325 + }, + { + "epoch": 1.35, + "learning_rate": 9.730006397952656e-05, + "loss": 3.8911, + "step": 6330 + }, + { + "epoch": 1.35, + "learning_rate": 9.72979313286415e-05, + "loss": 3.9832, + "step": 6335 + }, + { + "epoch": 1.35, + "learning_rate": 9.729579867775645e-05, + "loss": 4.1012, + "step": 6340 + }, + { + "epoch": 1.35, + "learning_rate": 9.729366602687141e-05, + "loss": 4.0473, + "step": 6345 + }, + { + "epoch": 1.35, + "learning_rate": 9.729153337598635e-05, + "loss": 3.9977, + "step": 6350 + }, + { + "epoch": 1.36, + "learning_rate": 9.72894007251013e-05, + "loss": 3.9992, + "step": 6355 + }, + { + "epoch": 1.36, + "learning_rate": 9.728726807421626e-05, + "loss": 3.8766, + "step": 6360 + }, + { + "epoch": 1.36, + "learning_rate": 9.728513542333121e-05, + "loss": 3.9757, + "step": 6365 + }, + { + "epoch": 1.36, + "learning_rate": 9.728300277244616e-05, + "loss": 4.0369, + "step": 6370 + }, + { + "epoch": 1.36, + "learning_rate": 9.728087012156111e-05, + "loss": 4.0345, + "step": 6375 + }, + { + "epoch": 1.36, + "learning_rate": 9.727873747067605e-05, + "loss": 4.044, + "step": 6380 + }, + { + "epoch": 1.36, + "learning_rate": 9.7276604819791e-05, + "loss": 3.975, + "step": 6385 + }, + { + "epoch": 1.36, + "learning_rate": 9.727447216890595e-05, + "loss": 3.9199, + "step": 6390 + }, + { + "epoch": 1.36, + "learning_rate": 9.72723395180209e-05, + "loss": 4.0104, + "step": 6395 + }, + { + "epoch": 1.36, + "learning_rate": 9.727020686713584e-05, + "loss": 3.996, + "step": 6400 + }, + { + "epoch": 1.37, + "learning_rate": 9.72680742162508e-05, + "loss": 3.9792, + "step": 6405 + }, + { + "epoch": 1.37, + "learning_rate": 9.726594156536575e-05, + "loss": 3.892, + "step": 6410 + }, + { + "epoch": 1.37, + "learning_rate": 9.726380891448071e-05, + "loss": 4.025, + "step": 6415 + }, + { + "epoch": 1.37, + "learning_rate": 9.726167626359566e-05, + "loss": 3.9211, + "step": 6420 + }, + { + "epoch": 1.37, + "learning_rate": 9.72595436127106e-05, + "loss": 4.0579, + "step": 6425 + }, + { + "epoch": 1.37, + "learning_rate": 9.725741096182556e-05, + "loss": 3.9146, + "step": 6430 + }, + { + "epoch": 1.37, + "learning_rate": 9.72552783109405e-05, + "loss": 3.9175, + "step": 6435 + }, + { + "epoch": 1.37, + "learning_rate": 9.725314566005545e-05, + "loss": 3.926, + "step": 6440 + }, + { + "epoch": 1.37, + "learning_rate": 9.72510130091704e-05, + "loss": 3.9523, + "step": 6445 + }, + { + "epoch": 1.38, + "learning_rate": 9.724888035828535e-05, + "loss": 3.8757, + "step": 6450 + }, + { + "epoch": 1.38, + "learning_rate": 9.72467477074003e-05, + "loss": 4.0347, + "step": 6455 + }, + { + "epoch": 1.38, + "learning_rate": 9.724461505651526e-05, + "loss": 3.9398, + "step": 6460 + }, + { + "epoch": 1.38, + "learning_rate": 9.72424824056302e-05, + "loss": 3.9983, + "step": 6465 + }, + { + "epoch": 1.38, + "learning_rate": 9.724034975474516e-05, + "loss": 3.9665, + "step": 6470 + }, + { + "epoch": 1.38, + "learning_rate": 9.723821710386011e-05, + "loss": 3.8756, + "step": 6475 + }, + { + "epoch": 1.38, + "learning_rate": 9.723608445297505e-05, + "loss": 4.0386, + "step": 6480 + }, + { + "epoch": 1.38, + "learning_rate": 9.723395180209e-05, + "loss": 4.0804, + "step": 6485 + }, + { + "epoch": 1.38, + "learning_rate": 9.723181915120495e-05, + "loss": 3.967, + "step": 6490 + }, + { + "epoch": 1.38, + "learning_rate": 9.72296865003199e-05, + "loss": 3.9696, + "step": 6495 + }, + { + "epoch": 1.39, + "learning_rate": 9.722755384943484e-05, + "loss": 4.0244, + "step": 6500 + }, + { + "epoch": 1.39, + "learning_rate": 9.72254211985498e-05, + "loss": 3.9741, + "step": 6505 + }, + { + "epoch": 1.39, + "learning_rate": 9.722328854766475e-05, + "loss": 3.964, + "step": 6510 + }, + { + "epoch": 1.39, + "learning_rate": 9.722115589677971e-05, + "loss": 4.0751, + "step": 6515 + }, + { + "epoch": 1.39, + "learning_rate": 9.721902324589465e-05, + "loss": 3.9303, + "step": 6520 + }, + { + "epoch": 1.39, + "learning_rate": 9.72168905950096e-05, + "loss": 4.0814, + "step": 6525 + }, + { + "epoch": 1.39, + "learning_rate": 9.721475794412454e-05, + "loss": 3.9309, + "step": 6530 + }, + { + "epoch": 1.39, + "learning_rate": 9.72126252932395e-05, + "loss": 4.0157, + "step": 6535 + }, + { + "epoch": 1.39, + "learning_rate": 9.721049264235445e-05, + "loss": 3.9339, + "step": 6540 + }, + { + "epoch": 1.4, + "learning_rate": 9.72083599914694e-05, + "loss": 4.0612, + "step": 6545 + }, + { + "epoch": 1.4, + "learning_rate": 9.720622734058435e-05, + "loss": 4.0784, + "step": 6550 + }, + { + "epoch": 1.4, + "learning_rate": 9.72040946896993e-05, + "loss": 3.9563, + "step": 6555 + }, + { + "epoch": 1.4, + "learning_rate": 9.720196203881426e-05, + "loss": 3.9543, + "step": 6560 + }, + { + "epoch": 1.4, + "learning_rate": 9.71998293879292e-05, + "loss": 3.9408, + "step": 6565 + }, + { + "epoch": 1.4, + "learning_rate": 9.719769673704416e-05, + "loss": 3.9911, + "step": 6570 + }, + { + "epoch": 1.4, + "learning_rate": 9.71955640861591e-05, + "loss": 4.0082, + "step": 6575 + }, + { + "epoch": 1.4, + "learning_rate": 9.719343143527405e-05, + "loss": 3.9502, + "step": 6580 + }, + { + "epoch": 1.4, + "learning_rate": 9.719129878438899e-05, + "loss": 3.8762, + "step": 6585 + }, + { + "epoch": 1.41, + "learning_rate": 9.718916613350395e-05, + "loss": 3.9223, + "step": 6590 + }, + { + "epoch": 1.41, + "learning_rate": 9.718703348261889e-05, + "loss": 3.9421, + "step": 6595 + }, + { + "epoch": 1.41, + "learning_rate": 9.718490083173384e-05, + "loss": 3.9378, + "step": 6600 + }, + { + "epoch": 1.41, + "learning_rate": 9.71827681808488e-05, + "loss": 3.9996, + "step": 6605 + }, + { + "epoch": 1.41, + "learning_rate": 9.718063552996375e-05, + "loss": 3.9914, + "step": 6610 + }, + { + "epoch": 1.41, + "learning_rate": 9.717850287907871e-05, + "loss": 3.9317, + "step": 6615 + }, + { + "epoch": 1.41, + "learning_rate": 9.717637022819365e-05, + "loss": 3.9648, + "step": 6620 + }, + { + "epoch": 1.41, + "learning_rate": 9.71742375773086e-05, + "loss": 3.9988, + "step": 6625 + }, + { + "epoch": 1.41, + "learning_rate": 9.717210492642354e-05, + "loss": 3.9859, + "step": 6630 + }, + { + "epoch": 1.41, + "learning_rate": 9.71699722755385e-05, + "loss": 3.9104, + "step": 6635 + }, + { + "epoch": 1.42, + "learning_rate": 9.716783962465344e-05, + "loss": 3.9238, + "step": 6640 + }, + { + "epoch": 1.42, + "learning_rate": 9.71657069737684e-05, + "loss": 3.9543, + "step": 6645 + }, + { + "epoch": 1.42, + "learning_rate": 9.716357432288335e-05, + "loss": 3.9124, + "step": 6650 + }, + { + "epoch": 1.42, + "learning_rate": 9.71614416719983e-05, + "loss": 3.8599, + "step": 6655 + }, + { + "epoch": 1.42, + "learning_rate": 9.715930902111326e-05, + "loss": 3.9637, + "step": 6660 + }, + { + "epoch": 1.42, + "learning_rate": 9.71571763702282e-05, + "loss": 3.935, + "step": 6665 + }, + { + "epoch": 1.42, + "learning_rate": 9.715504371934316e-05, + "loss": 3.9602, + "step": 6670 + }, + { + "epoch": 1.42, + "learning_rate": 9.71529110684581e-05, + "loss": 3.9441, + "step": 6675 + }, + { + "epoch": 1.42, + "learning_rate": 9.715077841757305e-05, + "loss": 4.0186, + "step": 6680 + }, + { + "epoch": 1.43, + "learning_rate": 9.714864576668799e-05, + "loss": 3.8798, + "step": 6685 + }, + { + "epoch": 1.43, + "learning_rate": 9.714651311580295e-05, + "loss": 3.9574, + "step": 6690 + }, + { + "epoch": 1.43, + "learning_rate": 9.714438046491789e-05, + "loss": 3.9688, + "step": 6695 + }, + { + "epoch": 1.43, + "learning_rate": 9.714224781403284e-05, + "loss": 3.9298, + "step": 6700 + }, + { + "epoch": 1.43, + "learning_rate": 9.71401151631478e-05, + "loss": 3.8945, + "step": 6705 + }, + { + "epoch": 1.43, + "learning_rate": 9.713798251226275e-05, + "loss": 3.9595, + "step": 6710 + }, + { + "epoch": 1.43, + "learning_rate": 9.71358498613777e-05, + "loss": 4.0051, + "step": 6715 + }, + { + "epoch": 1.43, + "learning_rate": 9.713371721049265e-05, + "loss": 3.9391, + "step": 6720 + }, + { + "epoch": 1.43, + "learning_rate": 9.71315845596076e-05, + "loss": 3.9339, + "step": 6725 + }, + { + "epoch": 1.44, + "learning_rate": 9.712945190872254e-05, + "loss": 3.9424, + "step": 6730 + }, + { + "epoch": 1.44, + "learning_rate": 9.71273192578375e-05, + "loss": 4.0085, + "step": 6735 + }, + { + "epoch": 1.44, + "learning_rate": 9.712518660695244e-05, + "loss": 3.9636, + "step": 6740 + }, + { + "epoch": 1.44, + "learning_rate": 9.71230539560674e-05, + "loss": 3.9177, + "step": 6745 + }, + { + "epoch": 1.44, + "learning_rate": 9.712092130518235e-05, + "loss": 3.9685, + "step": 6750 + }, + { + "epoch": 1.44, + "learning_rate": 9.71187886542973e-05, + "loss": 4.0065, + "step": 6755 + }, + { + "epoch": 1.44, + "learning_rate": 9.711665600341225e-05, + "loss": 3.9373, + "step": 6760 + }, + { + "epoch": 1.44, + "learning_rate": 9.71145233525272e-05, + "loss": 3.969, + "step": 6765 + }, + { + "epoch": 1.44, + "learning_rate": 9.711239070164214e-05, + "loss": 3.9644, + "step": 6770 + }, + { + "epoch": 1.44, + "learning_rate": 9.71102580507571e-05, + "loss": 3.8681, + "step": 6775 + }, + { + "epoch": 1.45, + "learning_rate": 9.710812539987204e-05, + "loss": 3.8926, + "step": 6780 + }, + { + "epoch": 1.45, + "learning_rate": 9.710599274898699e-05, + "loss": 4.0021, + "step": 6785 + }, + { + "epoch": 1.45, + "learning_rate": 9.710386009810195e-05, + "loss": 3.9215, + "step": 6790 + }, + { + "epoch": 1.45, + "learning_rate": 9.710172744721689e-05, + "loss": 3.9291, + "step": 6795 + }, + { + "epoch": 1.45, + "learning_rate": 9.709959479633184e-05, + "loss": 3.911, + "step": 6800 + }, + { + "epoch": 1.45, + "learning_rate": 9.70974621454468e-05, + "loss": 3.975, + "step": 6805 + }, + { + "epoch": 1.45, + "learning_rate": 9.709532949456175e-05, + "loss": 4.0073, + "step": 6810 + }, + { + "epoch": 1.45, + "learning_rate": 9.70931968436767e-05, + "loss": 3.8968, + "step": 6815 + }, + { + "epoch": 1.45, + "learning_rate": 9.709106419279165e-05, + "loss": 3.9345, + "step": 6820 + }, + { + "epoch": 1.46, + "learning_rate": 9.708893154190659e-05, + "loss": 3.9894, + "step": 6825 + }, + { + "epoch": 1.46, + "learning_rate": 9.708679889102154e-05, + "loss": 3.9378, + "step": 6830 + }, + { + "epoch": 1.46, + "learning_rate": 9.708466624013649e-05, + "loss": 3.9816, + "step": 6835 + }, + { + "epoch": 1.46, + "learning_rate": 9.708253358925144e-05, + "loss": 3.9267, + "step": 6840 + }, + { + "epoch": 1.46, + "learning_rate": 9.70804009383664e-05, + "loss": 3.9295, + "step": 6845 + }, + { + "epoch": 1.46, + "learning_rate": 9.707826828748135e-05, + "loss": 3.8927, + "step": 6850 + }, + { + "epoch": 1.46, + "learning_rate": 9.70761356365963e-05, + "loss": 3.9844, + "step": 6855 + }, + { + "epoch": 1.46, + "learning_rate": 9.707400298571125e-05, + "loss": 3.8569, + "step": 6860 + }, + { + "epoch": 1.46, + "learning_rate": 9.70718703348262e-05, + "loss": 3.9604, + "step": 6865 + }, + { + "epoch": 1.46, + "learning_rate": 9.706973768394114e-05, + "loss": 3.9782, + "step": 6870 + }, + { + "epoch": 1.47, + "learning_rate": 9.70676050330561e-05, + "loss": 3.9195, + "step": 6875 + }, + { + "epoch": 1.47, + "learning_rate": 9.706547238217104e-05, + "loss": 4.0089, + "step": 6880 + }, + { + "epoch": 1.47, + "learning_rate": 9.706333973128599e-05, + "loss": 4.056, + "step": 6885 + }, + { + "epoch": 1.47, + "learning_rate": 9.706120708040093e-05, + "loss": 4.0068, + "step": 6890 + }, + { + "epoch": 1.47, + "learning_rate": 9.705907442951589e-05, + "loss": 4.0302, + "step": 6895 + }, + { + "epoch": 1.47, + "learning_rate": 9.705694177863084e-05, + "loss": 3.9245, + "step": 6900 + }, + { + "epoch": 1.47, + "learning_rate": 9.70548091277458e-05, + "loss": 4.0525, + "step": 6905 + }, + { + "epoch": 1.47, + "learning_rate": 9.705267647686074e-05, + "loss": 3.9153, + "step": 6910 + }, + { + "epoch": 1.47, + "learning_rate": 9.70505438259757e-05, + "loss": 3.8977, + "step": 6915 + }, + { + "epoch": 1.48, + "learning_rate": 9.704841117509065e-05, + "loss": 3.9582, + "step": 6920 + }, + { + "epoch": 1.48, + "learning_rate": 9.704627852420559e-05, + "loss": 3.8941, + "step": 6925 + }, + { + "epoch": 1.48, + "learning_rate": 9.704414587332054e-05, + "loss": 3.929, + "step": 6930 + }, + { + "epoch": 1.48, + "learning_rate": 9.704201322243549e-05, + "loss": 3.8578, + "step": 6935 + }, + { + "epoch": 1.48, + "learning_rate": 9.703988057155044e-05, + "loss": 3.9318, + "step": 6940 + }, + { + "epoch": 1.48, + "learning_rate": 9.70377479206654e-05, + "loss": 4.081, + "step": 6945 + }, + { + "epoch": 1.48, + "learning_rate": 9.703561526978035e-05, + "loss": 3.9084, + "step": 6950 + }, + { + "epoch": 1.48, + "learning_rate": 9.703348261889529e-05, + "loss": 3.9184, + "step": 6955 + }, + { + "epoch": 1.48, + "learning_rate": 9.703134996801025e-05, + "loss": 3.9983, + "step": 6960 + }, + { + "epoch": 1.49, + "learning_rate": 9.702921731712519e-05, + "loss": 3.9593, + "step": 6965 + }, + { + "epoch": 1.49, + "learning_rate": 9.702708466624014e-05, + "loss": 4.0072, + "step": 6970 + }, + { + "epoch": 1.49, + "learning_rate": 9.702495201535508e-05, + "loss": 4.0001, + "step": 6975 + }, + { + "epoch": 1.49, + "learning_rate": 9.702281936447004e-05, + "loss": 3.9286, + "step": 6980 + }, + { + "epoch": 1.49, + "learning_rate": 9.702068671358499e-05, + "loss": 4.0657, + "step": 6985 + }, + { + "epoch": 1.49, + "learning_rate": 9.701855406269993e-05, + "loss": 3.9592, + "step": 6990 + }, + { + "epoch": 1.49, + "learning_rate": 9.701642141181489e-05, + "loss": 4.0018, + "step": 6995 + }, + { + "epoch": 1.49, + "learning_rate": 9.701428876092984e-05, + "loss": 3.925, + "step": 7000 + }, + { + "epoch": 1.49, + "learning_rate": 9.70121561100448e-05, + "loss": 4.0501, + "step": 7005 + }, + { + "epoch": 1.49, + "learning_rate": 9.701002345915974e-05, + "loss": 3.9154, + "step": 7010 + }, + { + "epoch": 1.5, + "learning_rate": 9.70078908082747e-05, + "loss": 4.0242, + "step": 7015 + }, + { + "epoch": 1.5, + "learning_rate": 9.700575815738964e-05, + "loss": 3.9354, + "step": 7020 + }, + { + "epoch": 1.5, + "learning_rate": 9.700362550650459e-05, + "loss": 3.9424, + "step": 7025 + }, + { + "epoch": 1.5, + "learning_rate": 9.700149285561953e-05, + "loss": 3.995, + "step": 7030 + }, + { + "epoch": 1.5, + "learning_rate": 9.699936020473449e-05, + "loss": 3.999, + "step": 7035 + }, + { + "epoch": 1.5, + "learning_rate": 9.699722755384944e-05, + "loss": 3.8797, + "step": 7040 + }, + { + "epoch": 1.5, + "learning_rate": 9.69950949029644e-05, + "loss": 3.9442, + "step": 7045 + }, + { + "epoch": 1.5, + "learning_rate": 9.699296225207935e-05, + "loss": 3.9112, + "step": 7050 + }, + { + "epoch": 1.5, + "learning_rate": 9.699082960119429e-05, + "loss": 4.0144, + "step": 7055 + }, + { + "epoch": 1.51, + "learning_rate": 9.698869695030925e-05, + "loss": 3.9404, + "step": 7060 + }, + { + "epoch": 1.51, + "learning_rate": 9.698656429942419e-05, + "loss": 3.9207, + "step": 7065 + }, + { + "epoch": 1.51, + "learning_rate": 9.698443164853914e-05, + "loss": 4.0416, + "step": 7070 + }, + { + "epoch": 1.51, + "learning_rate": 9.698229899765408e-05, + "loss": 3.8205, + "step": 7075 + }, + { + "epoch": 1.51, + "learning_rate": 9.698016634676904e-05, + "loss": 3.9883, + "step": 7080 + }, + { + "epoch": 1.51, + "learning_rate": 9.697803369588398e-05, + "loss": 4.0117, + "step": 7085 + }, + { + "epoch": 1.51, + "learning_rate": 9.697590104499893e-05, + "loss": 4.065, + "step": 7090 + }, + { + "epoch": 1.51, + "learning_rate": 9.697376839411389e-05, + "loss": 3.9995, + "step": 7095 + }, + { + "epoch": 1.51, + "learning_rate": 9.697163574322884e-05, + "loss": 4.017, + "step": 7100 + }, + { + "epoch": 1.52, + "learning_rate": 9.696950309234378e-05, + "loss": 3.9544, + "step": 7105 + }, + { + "epoch": 1.52, + "learning_rate": 9.696737044145874e-05, + "loss": 3.959, + "step": 7110 + }, + { + "epoch": 1.52, + "learning_rate": 9.69652377905737e-05, + "loss": 3.947, + "step": 7115 + }, + { + "epoch": 1.52, + "learning_rate": 9.696310513968864e-05, + "loss": 3.8877, + "step": 7120 + }, + { + "epoch": 1.52, + "learning_rate": 9.696097248880359e-05, + "loss": 3.9816, + "step": 7125 + }, + { + "epoch": 1.52, + "learning_rate": 9.695883983791853e-05, + "loss": 3.9636, + "step": 7130 + }, + { + "epoch": 1.52, + "learning_rate": 9.695670718703349e-05, + "loss": 4.034, + "step": 7135 + }, + { + "epoch": 1.52, + "learning_rate": 9.695457453614844e-05, + "loss": 3.9534, + "step": 7140 + }, + { + "epoch": 1.52, + "learning_rate": 9.69524418852634e-05, + "loss": 4.0422, + "step": 7145 + }, + { + "epoch": 1.52, + "learning_rate": 9.695030923437834e-05, + "loss": 3.9076, + "step": 7150 + }, + { + "epoch": 1.53, + "learning_rate": 9.694817658349329e-05, + "loss": 3.935, + "step": 7155 + }, + { + "epoch": 1.53, + "learning_rate": 9.694604393260823e-05, + "loss": 3.9518, + "step": 7160 + }, + { + "epoch": 1.53, + "learning_rate": 9.694391128172319e-05, + "loss": 3.9601, + "step": 7165 + }, + { + "epoch": 1.53, + "learning_rate": 9.694177863083813e-05, + "loss": 3.9242, + "step": 7170 + }, + { + "epoch": 1.53, + "learning_rate": 9.693964597995308e-05, + "loss": 4.0258, + "step": 7175 + }, + { + "epoch": 1.53, + "learning_rate": 9.693751332906804e-05, + "loss": 3.9495, + "step": 7180 + }, + { + "epoch": 1.53, + "learning_rate": 9.693538067818298e-05, + "loss": 3.9609, + "step": 7185 + }, + { + "epoch": 1.53, + "learning_rate": 9.693324802729793e-05, + "loss": 3.9768, + "step": 7190 + }, + { + "epoch": 1.53, + "learning_rate": 9.693111537641289e-05, + "loss": 3.9831, + "step": 7195 + }, + { + "epoch": 1.54, + "learning_rate": 9.692898272552784e-05, + "loss": 3.9994, + "step": 7200 + }, + { + "epoch": 1.54, + "learning_rate": 9.692685007464278e-05, + "loss": 4.0035, + "step": 7205 + }, + { + "epoch": 1.54, + "learning_rate": 9.692471742375774e-05, + "loss": 3.9015, + "step": 7210 + }, + { + "epoch": 1.54, + "learning_rate": 9.692258477287268e-05, + "loss": 3.9324, + "step": 7215 + }, + { + "epoch": 1.54, + "learning_rate": 9.692045212198764e-05, + "loss": 4.0493, + "step": 7220 + }, + { + "epoch": 1.54, + "learning_rate": 9.691831947110258e-05, + "loss": 3.8966, + "step": 7225 + }, + { + "epoch": 1.54, + "learning_rate": 9.691618682021753e-05, + "loss": 4.096, + "step": 7230 + }, + { + "epoch": 1.54, + "learning_rate": 9.691405416933249e-05, + "loss": 3.9641, + "step": 7235 + }, + { + "epoch": 1.54, + "learning_rate": 9.691192151844744e-05, + "loss": 3.9442, + "step": 7240 + }, + { + "epoch": 1.54, + "learning_rate": 9.69097888675624e-05, + "loss": 4.0565, + "step": 7245 + }, + { + "epoch": 1.55, + "learning_rate": 9.690765621667734e-05, + "loss": 4.006, + "step": 7250 + }, + { + "epoch": 1.55, + "learning_rate": 9.690552356579229e-05, + "loss": 3.8722, + "step": 7255 + }, + { + "epoch": 1.55, + "learning_rate": 9.690339091490723e-05, + "loss": 4.0283, + "step": 7260 + }, + { + "epoch": 1.55, + "learning_rate": 9.690125826402219e-05, + "loss": 3.8485, + "step": 7265 + }, + { + "epoch": 1.55, + "learning_rate": 9.689912561313713e-05, + "loss": 3.932, + "step": 7270 + }, + { + "epoch": 1.55, + "learning_rate": 9.689699296225208e-05, + "loss": 3.974, + "step": 7275 + }, + { + "epoch": 1.55, + "learning_rate": 9.689486031136702e-05, + "loss": 4.0064, + "step": 7280 + }, + { + "epoch": 1.55, + "learning_rate": 9.689272766048198e-05, + "loss": 4.0187, + "step": 7285 + }, + { + "epoch": 1.55, + "learning_rate": 9.689059500959693e-05, + "loss": 4.0893, + "step": 7290 + }, + { + "epoch": 1.56, + "learning_rate": 9.688846235871189e-05, + "loss": 3.931, + "step": 7295 + }, + { + "epoch": 1.56, + "learning_rate": 9.688632970782683e-05, + "loss": 4.0212, + "step": 7300 + }, + { + "epoch": 1.56, + "learning_rate": 9.688419705694178e-05, + "loss": 3.942, + "step": 7305 + }, + { + "epoch": 1.56, + "learning_rate": 9.688206440605674e-05, + "loss": 3.9389, + "step": 7310 + }, + { + "epoch": 1.56, + "learning_rate": 9.687993175517168e-05, + "loss": 3.9988, + "step": 7315 + }, + { + "epoch": 1.56, + "learning_rate": 9.687779910428664e-05, + "loss": 3.9388, + "step": 7320 + }, + { + "epoch": 1.56, + "learning_rate": 9.687566645340158e-05, + "loss": 3.8766, + "step": 7325 + }, + { + "epoch": 1.56, + "learning_rate": 9.687353380251653e-05, + "loss": 3.9111, + "step": 7330 + }, + { + "epoch": 1.56, + "learning_rate": 9.687140115163149e-05, + "loss": 3.9832, + "step": 7335 + }, + { + "epoch": 1.57, + "learning_rate": 9.686926850074644e-05, + "loss": 3.9796, + "step": 7340 + }, + { + "epoch": 1.57, + "learning_rate": 9.686713584986138e-05, + "loss": 3.9806, + "step": 7345 + }, + { + "epoch": 1.57, + "learning_rate": 9.686500319897634e-05, + "loss": 3.9975, + "step": 7350 + }, + { + "epoch": 1.57, + "learning_rate": 9.686287054809128e-05, + "loss": 3.9825, + "step": 7355 + }, + { + "epoch": 1.57, + "learning_rate": 9.686073789720623e-05, + "loss": 4.0601, + "step": 7360 + }, + { + "epoch": 1.57, + "learning_rate": 9.685860524632117e-05, + "loss": 3.8426, + "step": 7365 + }, + { + "epoch": 1.57, + "learning_rate": 9.685647259543613e-05, + "loss": 3.8947, + "step": 7370 + }, + { + "epoch": 1.57, + "learning_rate": 9.685433994455108e-05, + "loss": 4.0457, + "step": 7375 + }, + { + "epoch": 1.57, + "learning_rate": 9.685220729366602e-05, + "loss": 4.033, + "step": 7380 + }, + { + "epoch": 1.57, + "learning_rate": 9.685007464278098e-05, + "loss": 4.0532, + "step": 7385 + }, + { + "epoch": 1.58, + "learning_rate": 9.684794199189593e-05, + "loss": 4.0249, + "step": 7390 + }, + { + "epoch": 1.58, + "learning_rate": 9.684580934101089e-05, + "loss": 3.8826, + "step": 7395 + }, + { + "epoch": 1.58, + "learning_rate": 9.684367669012583e-05, + "loss": 3.9584, + "step": 7400 + }, + { + "epoch": 1.58, + "learning_rate": 9.684154403924078e-05, + "loss": 3.973, + "step": 7405 + }, + { + "epoch": 1.58, + "learning_rate": 9.683941138835573e-05, + "loss": 3.9925, + "step": 7410 + }, + { + "epoch": 1.58, + "learning_rate": 9.683727873747068e-05, + "loss": 3.965, + "step": 7415 + }, + { + "epoch": 1.58, + "learning_rate": 9.683514608658562e-05, + "loss": 3.9309, + "step": 7420 + }, + { + "epoch": 1.58, + "learning_rate": 9.683301343570058e-05, + "loss": 3.9501, + "step": 7425 + }, + { + "epoch": 1.58, + "learning_rate": 9.683088078481553e-05, + "loss": 3.9286, + "step": 7430 + }, + { + "epoch": 1.59, + "learning_rate": 9.682874813393049e-05, + "loss": 3.9402, + "step": 7435 + }, + { + "epoch": 1.59, + "learning_rate": 9.682661548304544e-05, + "loss": 3.8433, + "step": 7440 + }, + { + "epoch": 1.59, + "learning_rate": 9.682448283216038e-05, + "loss": 3.8946, + "step": 7445 + }, + { + "epoch": 1.59, + "learning_rate": 9.682235018127534e-05, + "loss": 3.9796, + "step": 7450 + }, + { + "epoch": 1.59, + "learning_rate": 9.682021753039028e-05, + "loss": 3.9952, + "step": 7455 + }, + { + "epoch": 1.59, + "learning_rate": 9.681808487950523e-05, + "loss": 3.9109, + "step": 7460 + }, + { + "epoch": 1.59, + "learning_rate": 9.681595222862017e-05, + "loss": 3.9025, + "step": 7465 + }, + { + "epoch": 1.59, + "learning_rate": 9.681381957773513e-05, + "loss": 3.9738, + "step": 7470 + }, + { + "epoch": 1.59, + "learning_rate": 9.681168692685007e-05, + "loss": 3.9674, + "step": 7475 + }, + { + "epoch": 1.59, + "learning_rate": 9.680955427596502e-05, + "loss": 3.9367, + "step": 7480 + }, + { + "epoch": 1.6, + "learning_rate": 9.680742162507998e-05, + "loss": 3.9728, + "step": 7485 + }, + { + "epoch": 1.6, + "learning_rate": 9.680528897419493e-05, + "loss": 3.9568, + "step": 7490 + }, + { + "epoch": 1.6, + "learning_rate": 9.680315632330988e-05, + "loss": 3.9422, + "step": 7495 + }, + { + "epoch": 1.6, + "learning_rate": 9.680102367242483e-05, + "loss": 3.9362, + "step": 7500 + }, + { + "epoch": 1.6, + "learning_rate": 9.679889102153978e-05, + "loss": 3.9472, + "step": 7505 + }, + { + "epoch": 1.6, + "learning_rate": 9.679675837065473e-05, + "loss": 3.9567, + "step": 7510 + }, + { + "epoch": 1.6, + "learning_rate": 9.679462571976968e-05, + "loss": 4.069, + "step": 7515 + }, + { + "epoch": 1.6, + "learning_rate": 9.679249306888462e-05, + "loss": 3.9777, + "step": 7520 + }, + { + "epoch": 1.6, + "learning_rate": 9.679036041799958e-05, + "loss": 3.9746, + "step": 7525 + }, + { + "epoch": 1.61, + "learning_rate": 9.678822776711453e-05, + "loss": 3.8673, + "step": 7530 + }, + { + "epoch": 1.61, + "learning_rate": 9.678609511622949e-05, + "loss": 3.8358, + "step": 7535 + }, + { + "epoch": 1.61, + "learning_rate": 9.678396246534443e-05, + "loss": 3.9602, + "step": 7540 + }, + { + "epoch": 1.61, + "learning_rate": 9.678182981445938e-05, + "loss": 3.9317, + "step": 7545 + }, + { + "epoch": 1.61, + "learning_rate": 9.677969716357432e-05, + "loss": 4.0068, + "step": 7550 + }, + { + "epoch": 1.61, + "learning_rate": 9.677756451268928e-05, + "loss": 3.9074, + "step": 7555 + }, + { + "epoch": 1.61, + "learning_rate": 9.677543186180422e-05, + "loss": 3.9604, + "step": 7560 + }, + { + "epoch": 1.61, + "learning_rate": 9.677329921091917e-05, + "loss": 3.9919, + "step": 7565 + }, + { + "epoch": 1.61, + "learning_rate": 9.677116656003413e-05, + "loss": 3.952, + "step": 7570 + }, + { + "epoch": 1.62, + "learning_rate": 9.676903390914907e-05, + "loss": 3.9517, + "step": 7575 + }, + { + "epoch": 1.62, + "learning_rate": 9.676690125826402e-05, + "loss": 4.0089, + "step": 7580 + }, + { + "epoch": 1.62, + "learning_rate": 9.676476860737898e-05, + "loss": 3.9788, + "step": 7585 + }, + { + "epoch": 1.62, + "learning_rate": 9.676263595649393e-05, + "loss": 3.9306, + "step": 7590 + }, + { + "epoch": 1.62, + "learning_rate": 9.676050330560888e-05, + "loss": 3.9499, + "step": 7595 + }, + { + "epoch": 1.62, + "learning_rate": 9.675837065472383e-05, + "loss": 3.9562, + "step": 7600 + }, + { + "epoch": 1.62, + "learning_rate": 9.675623800383877e-05, + "loss": 3.8718, + "step": 7605 + }, + { + "epoch": 1.62, + "learning_rate": 9.675410535295373e-05, + "loss": 4.0027, + "step": 7610 + }, + { + "epoch": 1.62, + "learning_rate": 9.675197270206867e-05, + "loss": 4.0228, + "step": 7615 + }, + { + "epoch": 1.62, + "learning_rate": 9.674984005118362e-05, + "loss": 3.9901, + "step": 7620 + }, + { + "epoch": 1.63, + "learning_rate": 9.674770740029858e-05, + "loss": 4.001, + "step": 7625 + }, + { + "epoch": 1.63, + "learning_rate": 9.674557474941353e-05, + "loss": 3.933, + "step": 7630 + }, + { + "epoch": 1.63, + "learning_rate": 9.674344209852849e-05, + "loss": 4.038, + "step": 7635 + }, + { + "epoch": 1.63, + "learning_rate": 9.674130944764343e-05, + "loss": 4.0128, + "step": 7640 + }, + { + "epoch": 1.63, + "learning_rate": 9.673917679675838e-05, + "loss": 3.9429, + "step": 7645 + }, + { + "epoch": 1.63, + "learning_rate": 9.673704414587332e-05, + "loss": 4.0048, + "step": 7650 + }, + { + "epoch": 1.63, + "learning_rate": 9.673491149498828e-05, + "loss": 3.9505, + "step": 7655 + }, + { + "epoch": 1.63, + "learning_rate": 9.673277884410322e-05, + "loss": 3.9534, + "step": 7660 + }, + { + "epoch": 1.63, + "learning_rate": 9.673064619321817e-05, + "loss": 3.9964, + "step": 7665 + }, + { + "epoch": 1.64, + "learning_rate": 9.672851354233312e-05, + "loss": 4.0046, + "step": 7670 + }, + { + "epoch": 1.64, + "learning_rate": 9.672638089144807e-05, + "loss": 3.9452, + "step": 7675 + }, + { + "epoch": 1.64, + "learning_rate": 9.672424824056302e-05, + "loss": 3.8981, + "step": 7680 + }, + { + "epoch": 1.64, + "learning_rate": 9.672211558967798e-05, + "loss": 3.9757, + "step": 7685 + }, + { + "epoch": 1.64, + "learning_rate": 9.671998293879292e-05, + "loss": 3.9287, + "step": 7690 + }, + { + "epoch": 1.64, + "learning_rate": 9.671785028790788e-05, + "loss": 3.7842, + "step": 7695 + }, + { + "epoch": 1.64, + "learning_rate": 9.671571763702283e-05, + "loss": 3.907, + "step": 7700 + }, + { + "epoch": 1.64, + "learning_rate": 9.671358498613777e-05, + "loss": 3.9674, + "step": 7705 + }, + { + "epoch": 1.64, + "learning_rate": 9.671145233525273e-05, + "loss": 3.9987, + "step": 7710 + }, + { + "epoch": 1.65, + "learning_rate": 9.670931968436767e-05, + "loss": 3.9149, + "step": 7715 + }, + { + "epoch": 1.65, + "learning_rate": 9.670718703348262e-05, + "loss": 3.9172, + "step": 7720 + }, + { + "epoch": 1.65, + "learning_rate": 9.670505438259758e-05, + "loss": 3.8956, + "step": 7725 + }, + { + "epoch": 1.65, + "learning_rate": 9.670292173171253e-05, + "loss": 4.0116, + "step": 7730 + }, + { + "epoch": 1.65, + "learning_rate": 9.670078908082747e-05, + "loss": 3.8976, + "step": 7735 + }, + { + "epoch": 1.65, + "learning_rate": 9.669865642994243e-05, + "loss": 3.9813, + "step": 7740 + }, + { + "epoch": 1.65, + "learning_rate": 9.669652377905737e-05, + "loss": 3.8436, + "step": 7745 + }, + { + "epoch": 1.65, + "learning_rate": 9.669439112817232e-05, + "loss": 3.8233, + "step": 7750 + }, + { + "epoch": 1.65, + "learning_rate": 9.669225847728726e-05, + "loss": 3.9002, + "step": 7755 + }, + { + "epoch": 1.65, + "learning_rate": 9.669012582640222e-05, + "loss": 3.8529, + "step": 7760 + }, + { + "epoch": 1.66, + "learning_rate": 9.668799317551717e-05, + "loss": 3.868, + "step": 7765 + }, + { + "epoch": 1.66, + "learning_rate": 9.668586052463212e-05, + "loss": 3.873, + "step": 7770 + }, + { + "epoch": 1.66, + "learning_rate": 9.668372787374707e-05, + "loss": 3.93, + "step": 7775 + }, + { + "epoch": 1.66, + "learning_rate": 9.668159522286202e-05, + "loss": 3.923, + "step": 7780 + }, + { + "epoch": 1.66, + "learning_rate": 9.667946257197698e-05, + "loss": 4.0066, + "step": 7785 + }, + { + "epoch": 1.66, + "learning_rate": 9.667732992109192e-05, + "loss": 3.9441, + "step": 7790 + }, + { + "epoch": 1.66, + "learning_rate": 9.667519727020688e-05, + "loss": 3.9373, + "step": 7795 + }, + { + "epoch": 1.66, + "learning_rate": 9.667306461932182e-05, + "loss": 4.0194, + "step": 7800 + }, + { + "epoch": 1.66, + "learning_rate": 9.667093196843677e-05, + "loss": 3.9457, + "step": 7805 + }, + { + "epoch": 1.67, + "learning_rate": 9.666879931755171e-05, + "loss": 3.9548, + "step": 7810 + }, + { + "epoch": 1.67, + "learning_rate": 9.666666666666667e-05, + "loss": 3.957, + "step": 7815 + }, + { + "epoch": 1.67, + "learning_rate": 9.666453401578162e-05, + "loss": 3.9356, + "step": 7820 + }, + { + "epoch": 1.67, + "learning_rate": 9.666240136489658e-05, + "loss": 3.8731, + "step": 7825 + }, + { + "epoch": 1.67, + "learning_rate": 9.666026871401153e-05, + "loss": 3.9486, + "step": 7830 + }, + { + "epoch": 1.67, + "learning_rate": 9.665813606312647e-05, + "loss": 4.0875, + "step": 7835 + }, + { + "epoch": 1.67, + "learning_rate": 9.665600341224143e-05, + "loss": 3.9712, + "step": 7840 + }, + { + "epoch": 1.67, + "learning_rate": 9.665387076135637e-05, + "loss": 4.051, + "step": 7845 + }, + { + "epoch": 1.67, + "learning_rate": 9.665173811047132e-05, + "loss": 3.955, + "step": 7850 + }, + { + "epoch": 1.67, + "learning_rate": 9.664960545958626e-05, + "loss": 4.1314, + "step": 7855 + }, + { + "epoch": 1.68, + "learning_rate": 9.664747280870122e-05, + "loss": 3.9846, + "step": 7860 + }, + { + "epoch": 1.68, + "learning_rate": 9.664534015781616e-05, + "loss": 3.8318, + "step": 7865 + }, + { + "epoch": 1.68, + "learning_rate": 9.664320750693112e-05, + "loss": 3.9138, + "step": 7870 + }, + { + "epoch": 1.68, + "learning_rate": 9.664107485604607e-05, + "loss": 3.8744, + "step": 7875 + }, + { + "epoch": 1.68, + "learning_rate": 9.663894220516102e-05, + "loss": 3.974, + "step": 7880 + }, + { + "epoch": 1.68, + "learning_rate": 9.663680955427597e-05, + "loss": 3.9319, + "step": 7885 + }, + { + "epoch": 1.68, + "learning_rate": 9.663467690339092e-05, + "loss": 3.9605, + "step": 7890 + }, + { + "epoch": 1.68, + "learning_rate": 9.663254425250588e-05, + "loss": 3.9157, + "step": 7895 + }, + { + "epoch": 1.68, + "learning_rate": 9.663041160162082e-05, + "loss": 3.9587, + "step": 7900 + }, + { + "epoch": 1.69, + "learning_rate": 9.662827895073577e-05, + "loss": 3.9459, + "step": 7905 + }, + { + "epoch": 1.69, + "learning_rate": 9.662614629985071e-05, + "loss": 3.9782, + "step": 7910 + }, + { + "epoch": 1.69, + "learning_rate": 9.662401364896567e-05, + "loss": 3.9681, + "step": 7915 + }, + { + "epoch": 1.69, + "learning_rate": 9.662188099808062e-05, + "loss": 3.9228, + "step": 7920 + }, + { + "epoch": 1.69, + "learning_rate": 9.661974834719558e-05, + "loss": 3.9107, + "step": 7925 + }, + { + "epoch": 1.69, + "learning_rate": 9.661761569631052e-05, + "loss": 3.9819, + "step": 7930 + }, + { + "epoch": 1.69, + "learning_rate": 9.661548304542547e-05, + "loss": 3.9108, + "step": 7935 + }, + { + "epoch": 1.69, + "learning_rate": 9.661335039454041e-05, + "loss": 3.9229, + "step": 7940 + }, + { + "epoch": 1.69, + "learning_rate": 9.661121774365537e-05, + "loss": 3.8541, + "step": 7945 + }, + { + "epoch": 1.7, + "learning_rate": 9.660908509277031e-05, + "loss": 3.9974, + "step": 7950 + }, + { + "epoch": 1.7, + "learning_rate": 9.660695244188526e-05, + "loss": 3.9336, + "step": 7955 + }, + { + "epoch": 1.7, + "learning_rate": 9.660481979100022e-05, + "loss": 3.9691, + "step": 7960 + }, + { + "epoch": 1.7, + "learning_rate": 9.660268714011516e-05, + "loss": 4.0531, + "step": 7965 + }, + { + "epoch": 1.7, + "learning_rate": 9.660055448923012e-05, + "loss": 3.8823, + "step": 7970 + }, + { + "epoch": 1.7, + "learning_rate": 9.659842183834507e-05, + "loss": 3.891, + "step": 7975 + }, + { + "epoch": 1.7, + "learning_rate": 9.659628918746002e-05, + "loss": 3.9161, + "step": 7980 + }, + { + "epoch": 1.7, + "learning_rate": 9.659415653657497e-05, + "loss": 3.8977, + "step": 7985 + }, + { + "epoch": 1.7, + "learning_rate": 9.659202388568992e-05, + "loss": 3.97, + "step": 7990 + }, + { + "epoch": 1.7, + "learning_rate": 9.658989123480486e-05, + "loss": 3.9983, + "step": 7995 + }, + { + "epoch": 1.71, + "learning_rate": 9.658775858391982e-05, + "loss": 3.9206, + "step": 8000 + }, + { + "epoch": 1.71, + "learning_rate": 9.658562593303476e-05, + "loss": 3.858, + "step": 8005 + }, + { + "epoch": 1.71, + "learning_rate": 9.658349328214971e-05, + "loss": 3.932, + "step": 8010 + }, + { + "epoch": 1.71, + "learning_rate": 9.658136063126467e-05, + "loss": 3.9168, + "step": 8015 + }, + { + "epoch": 1.71, + "learning_rate": 9.657922798037962e-05, + "loss": 3.9086, + "step": 8020 + }, + { + "epoch": 1.71, + "learning_rate": 9.657709532949458e-05, + "loss": 3.8977, + "step": 8025 + }, + { + "epoch": 1.71, + "learning_rate": 9.657496267860952e-05, + "loss": 3.9408, + "step": 8030 + }, + { + "epoch": 1.71, + "learning_rate": 9.657283002772447e-05, + "loss": 3.974, + "step": 8035 + }, + { + "epoch": 1.71, + "learning_rate": 9.657069737683941e-05, + "loss": 3.9407, + "step": 8040 + }, + { + "epoch": 1.72, + "learning_rate": 9.656856472595437e-05, + "loss": 3.9635, + "step": 8045 + }, + { + "epoch": 1.72, + "learning_rate": 9.656643207506931e-05, + "loss": 3.9529, + "step": 8050 + }, + { + "epoch": 1.72, + "learning_rate": 9.656429942418426e-05, + "loss": 3.8821, + "step": 8055 + }, + { + "epoch": 1.72, + "learning_rate": 9.65621667732992e-05, + "loss": 3.9566, + "step": 8060 + }, + { + "epoch": 1.72, + "learning_rate": 9.656003412241416e-05, + "loss": 3.9737, + "step": 8065 + }, + { + "epoch": 1.72, + "learning_rate": 9.655790147152912e-05, + "loss": 3.9576, + "step": 8070 + }, + { + "epoch": 1.72, + "learning_rate": 9.655576882064407e-05, + "loss": 4.0184, + "step": 8075 + }, + { + "epoch": 1.72, + "learning_rate": 9.655363616975901e-05, + "loss": 3.9613, + "step": 8080 + }, + { + "epoch": 1.72, + "learning_rate": 9.655150351887397e-05, + "loss": 4.0161, + "step": 8085 + }, + { + "epoch": 1.73, + "learning_rate": 9.654937086798892e-05, + "loss": 3.9501, + "step": 8090 + }, + { + "epoch": 1.73, + "learning_rate": 9.654723821710386e-05, + "loss": 3.9022, + "step": 8095 + }, + { + "epoch": 1.73, + "learning_rate": 9.654510556621882e-05, + "loss": 3.9753, + "step": 8100 + }, + { + "epoch": 1.73, + "learning_rate": 9.654297291533376e-05, + "loss": 3.8564, + "step": 8105 + }, + { + "epoch": 1.73, + "learning_rate": 9.654084026444871e-05, + "loss": 3.9418, + "step": 8110 + }, + { + "epoch": 1.73, + "learning_rate": 9.653870761356367e-05, + "loss": 3.8981, + "step": 8115 + }, + { + "epoch": 1.73, + "learning_rate": 9.653657496267862e-05, + "loss": 3.8043, + "step": 8120 + }, + { + "epoch": 1.73, + "learning_rate": 9.653444231179356e-05, + "loss": 3.8836, + "step": 8125 + }, + { + "epoch": 1.73, + "learning_rate": 9.653230966090852e-05, + "loss": 4.028, + "step": 8130 + }, + { + "epoch": 1.73, + "learning_rate": 9.653017701002346e-05, + "loss": 3.848, + "step": 8135 + }, + { + "epoch": 1.74, + "learning_rate": 9.652804435913841e-05, + "loss": 3.9408, + "step": 8140 + }, + { + "epoch": 1.74, + "learning_rate": 9.652591170825336e-05, + "loss": 3.9674, + "step": 8145 + }, + { + "epoch": 1.74, + "learning_rate": 9.652377905736831e-05, + "loss": 3.9338, + "step": 8150 + }, + { + "epoch": 1.74, + "learning_rate": 9.652164640648326e-05, + "loss": 3.9236, + "step": 8155 + }, + { + "epoch": 1.74, + "learning_rate": 9.65195137555982e-05, + "loss": 3.8631, + "step": 8160 + }, + { + "epoch": 1.74, + "learning_rate": 9.651738110471316e-05, + "loss": 4.0081, + "step": 8165 + }, + { + "epoch": 1.74, + "learning_rate": 9.651524845382812e-05, + "loss": 3.9319, + "step": 8170 + }, + { + "epoch": 1.74, + "learning_rate": 9.651311580294307e-05, + "loss": 3.9249, + "step": 8175 + }, + { + "epoch": 1.74, + "learning_rate": 9.651098315205801e-05, + "loss": 3.9253, + "step": 8180 + }, + { + "epoch": 1.75, + "learning_rate": 9.650885050117297e-05, + "loss": 3.9863, + "step": 8185 + }, + { + "epoch": 1.75, + "learning_rate": 9.650671785028791e-05, + "loss": 3.9515, + "step": 8190 + }, + { + "epoch": 1.75, + "learning_rate": 9.650458519940286e-05, + "loss": 3.9285, + "step": 8195 + }, + { + "epoch": 1.75, + "learning_rate": 9.65024525485178e-05, + "loss": 3.9682, + "step": 8200 + }, + { + "epoch": 1.75, + "learning_rate": 9.650031989763276e-05, + "loss": 3.9667, + "step": 8205 + }, + { + "epoch": 1.75, + "learning_rate": 9.649818724674771e-05, + "loss": 4.0204, + "step": 8210 + }, + { + "epoch": 1.75, + "learning_rate": 9.649605459586267e-05, + "loss": 3.9949, + "step": 8215 + }, + { + "epoch": 1.75, + "learning_rate": 9.649392194497762e-05, + "loss": 3.9062, + "step": 8220 + }, + { + "epoch": 1.75, + "learning_rate": 9.649178929409256e-05, + "loss": 3.9013, + "step": 8225 + }, + { + "epoch": 1.75, + "learning_rate": 9.648965664320752e-05, + "loss": 3.9498, + "step": 8230 + }, + { + "epoch": 1.76, + "learning_rate": 9.648752399232246e-05, + "loss": 4.0275, + "step": 8235 + }, + { + "epoch": 1.76, + "learning_rate": 9.648539134143741e-05, + "loss": 3.9451, + "step": 8240 + }, + { + "epoch": 1.76, + "learning_rate": 9.648325869055236e-05, + "loss": 3.9771, + "step": 8245 + }, + { + "epoch": 1.76, + "learning_rate": 9.648112603966731e-05, + "loss": 3.9929, + "step": 8250 + }, + { + "epoch": 1.76, + "learning_rate": 9.647899338878225e-05, + "loss": 4.0019, + "step": 8255 + }, + { + "epoch": 1.76, + "learning_rate": 9.64768607378972e-05, + "loss": 3.9003, + "step": 8260 + }, + { + "epoch": 1.76, + "learning_rate": 9.647472808701216e-05, + "loss": 4.0787, + "step": 8265 + }, + { + "epoch": 1.76, + "learning_rate": 9.647259543612712e-05, + "loss": 3.8502, + "step": 8270 + }, + { + "epoch": 1.76, + "learning_rate": 9.647046278524206e-05, + "loss": 3.8723, + "step": 8275 + }, + { + "epoch": 1.77, + "learning_rate": 9.646833013435701e-05, + "loss": 3.8762, + "step": 8280 + }, + { + "epoch": 1.77, + "learning_rate": 9.646619748347197e-05, + "loss": 3.982, + "step": 8285 + }, + { + "epoch": 1.77, + "learning_rate": 9.646406483258691e-05, + "loss": 3.9126, + "step": 8290 + }, + { + "epoch": 1.77, + "learning_rate": 9.646193218170186e-05, + "loss": 4.0413, + "step": 8295 + }, + { + "epoch": 1.77, + "learning_rate": 9.64597995308168e-05, + "loss": 3.9974, + "step": 8300 + }, + { + "epoch": 1.77, + "learning_rate": 9.645766687993176e-05, + "loss": 3.9017, + "step": 8305 + }, + { + "epoch": 1.77, + "learning_rate": 9.645553422904671e-05, + "loss": 4.0687, + "step": 8310 + }, + { + "epoch": 1.77, + "learning_rate": 9.645340157816167e-05, + "loss": 3.9069, + "step": 8315 + }, + { + "epoch": 1.77, + "learning_rate": 9.645126892727661e-05, + "loss": 3.888, + "step": 8320 + }, + { + "epoch": 1.78, + "learning_rate": 9.644913627639156e-05, + "loss": 3.9087, + "step": 8325 + }, + { + "epoch": 1.78, + "learning_rate": 9.64470036255065e-05, + "loss": 3.9771, + "step": 8330 + }, + { + "epoch": 1.78, + "learning_rate": 9.644487097462146e-05, + "loss": 3.9748, + "step": 8335 + }, + { + "epoch": 1.78, + "learning_rate": 9.64427383237364e-05, + "loss": 4.0026, + "step": 8340 + }, + { + "epoch": 1.78, + "learning_rate": 9.644060567285136e-05, + "loss": 3.895, + "step": 8345 + }, + { + "epoch": 1.78, + "learning_rate": 9.643847302196631e-05, + "loss": 4.012, + "step": 8350 + }, + { + "epoch": 1.78, + "learning_rate": 9.643634037108125e-05, + "loss": 3.87, + "step": 8355 + }, + { + "epoch": 1.78, + "learning_rate": 9.64342077201962e-05, + "loss": 4.0043, + "step": 8360 + }, + { + "epoch": 1.78, + "learning_rate": 9.643207506931116e-05, + "loss": 4.0, + "step": 8365 + }, + { + "epoch": 1.78, + "learning_rate": 9.642994241842612e-05, + "loss": 3.9676, + "step": 8370 + }, + { + "epoch": 1.79, + "learning_rate": 9.642780976754106e-05, + "loss": 3.9396, + "step": 8375 + }, + { + "epoch": 1.79, + "learning_rate": 9.642567711665601e-05, + "loss": 3.9793, + "step": 8380 + }, + { + "epoch": 1.79, + "learning_rate": 9.642354446577095e-05, + "loss": 3.9329, + "step": 8385 + }, + { + "epoch": 1.79, + "learning_rate": 9.642141181488591e-05, + "loss": 4.0025, + "step": 8390 + }, + { + "epoch": 1.79, + "learning_rate": 9.641927916400085e-05, + "loss": 3.9627, + "step": 8395 + }, + { + "epoch": 1.79, + "learning_rate": 9.64171465131158e-05, + "loss": 3.936, + "step": 8400 + }, + { + "epoch": 1.79, + "learning_rate": 9.641501386223076e-05, + "loss": 3.9795, + "step": 8405 + }, + { + "epoch": 1.79, + "learning_rate": 9.641288121134571e-05, + "loss": 3.9362, + "step": 8410 + }, + { + "epoch": 1.79, + "learning_rate": 9.641074856046067e-05, + "loss": 3.931, + "step": 8415 + }, + { + "epoch": 1.8, + "learning_rate": 9.640861590957561e-05, + "loss": 3.904, + "step": 8420 + }, + { + "epoch": 1.8, + "learning_rate": 9.640648325869056e-05, + "loss": 3.9252, + "step": 8425 + }, + { + "epoch": 1.8, + "learning_rate": 9.64043506078055e-05, + "loss": 3.794, + "step": 8430 + }, + { + "epoch": 1.8, + "learning_rate": 9.640221795692046e-05, + "loss": 3.8642, + "step": 8435 + }, + { + "epoch": 1.8, + "learning_rate": 9.64000853060354e-05, + "loss": 3.9434, + "step": 8440 + }, + { + "epoch": 1.8, + "learning_rate": 9.639795265515036e-05, + "loss": 3.9625, + "step": 8445 + }, + { + "epoch": 1.8, + "learning_rate": 9.63958200042653e-05, + "loss": 3.9158, + "step": 8450 + }, + { + "epoch": 1.8, + "learning_rate": 9.639368735338025e-05, + "loss": 3.9622, + "step": 8455 + }, + { + "epoch": 1.8, + "learning_rate": 9.63915547024952e-05, + "loss": 3.8525, + "step": 8460 + }, + { + "epoch": 1.81, + "learning_rate": 9.638942205161016e-05, + "loss": 3.9034, + "step": 8465 + }, + { + "epoch": 1.81, + "learning_rate": 9.63872894007251e-05, + "loss": 3.9927, + "step": 8470 + }, + { + "epoch": 1.81, + "learning_rate": 9.638515674984006e-05, + "loss": 3.941, + "step": 8475 + }, + { + "epoch": 1.81, + "learning_rate": 9.638302409895501e-05, + "loss": 3.9747, + "step": 8480 + }, + { + "epoch": 1.81, + "learning_rate": 9.638089144806995e-05, + "loss": 4.0354, + "step": 8485 + }, + { + "epoch": 1.81, + "learning_rate": 9.637875879718491e-05, + "loss": 3.9309, + "step": 8490 + }, + { + "epoch": 1.81, + "learning_rate": 9.637662614629985e-05, + "loss": 3.8677, + "step": 8495 + }, + { + "epoch": 1.81, + "learning_rate": 9.63744934954148e-05, + "loss": 3.9659, + "step": 8500 + }, + { + "epoch": 1.81, + "learning_rate": 9.637236084452976e-05, + "loss": 3.9476, + "step": 8505 + }, + { + "epoch": 1.81, + "learning_rate": 9.637022819364471e-05, + "loss": 3.9306, + "step": 8510 + }, + { + "epoch": 1.82, + "learning_rate": 9.636809554275965e-05, + "loss": 3.9389, + "step": 8515 + }, + { + "epoch": 1.82, + "learning_rate": 9.636596289187461e-05, + "loss": 3.9782, + "step": 8520 + }, + { + "epoch": 1.82, + "learning_rate": 9.636383024098955e-05, + "loss": 3.9809, + "step": 8525 + }, + { + "epoch": 1.82, + "learning_rate": 9.63616975901045e-05, + "loss": 3.867, + "step": 8530 + }, + { + "epoch": 1.82, + "learning_rate": 9.635956493921946e-05, + "loss": 4.043, + "step": 8535 + }, + { + "epoch": 1.82, + "learning_rate": 9.63574322883344e-05, + "loss": 3.9205, + "step": 8540 + }, + { + "epoch": 1.82, + "learning_rate": 9.635529963744936e-05, + "loss": 3.916, + "step": 8545 + }, + { + "epoch": 1.82, + "learning_rate": 9.63531669865643e-05, + "loss": 3.9487, + "step": 8550 + }, + { + "epoch": 1.82, + "learning_rate": 9.635103433567925e-05, + "loss": 3.9018, + "step": 8555 + }, + { + "epoch": 1.83, + "learning_rate": 9.63489016847942e-05, + "loss": 3.8664, + "step": 8560 + }, + { + "epoch": 1.83, + "learning_rate": 9.634676903390916e-05, + "loss": 3.9274, + "step": 8565 + }, + { + "epoch": 1.83, + "learning_rate": 9.63446363830241e-05, + "loss": 4.0504, + "step": 8570 + }, + { + "epoch": 1.83, + "learning_rate": 9.634250373213906e-05, + "loss": 3.9087, + "step": 8575 + }, + { + "epoch": 1.83, + "learning_rate": 9.6340371081254e-05, + "loss": 3.9666, + "step": 8580 + }, + { + "epoch": 1.83, + "learning_rate": 9.633823843036895e-05, + "loss": 3.9854, + "step": 8585 + }, + { + "epoch": 1.83, + "learning_rate": 9.63361057794839e-05, + "loss": 3.8774, + "step": 8590 + }, + { + "epoch": 1.83, + "learning_rate": 9.633397312859885e-05, + "loss": 4.0348, + "step": 8595 + }, + { + "epoch": 1.83, + "learning_rate": 9.63318404777138e-05, + "loss": 3.9192, + "step": 8600 + }, + { + "epoch": 1.83, + "learning_rate": 9.632970782682876e-05, + "loss": 3.9169, + "step": 8605 + }, + { + "epoch": 1.84, + "learning_rate": 9.632757517594371e-05, + "loss": 3.9721, + "step": 8610 + }, + { + "epoch": 1.84, + "learning_rate": 9.632544252505865e-05, + "loss": 3.9537, + "step": 8615 + }, + { + "epoch": 1.84, + "learning_rate": 9.632330987417361e-05, + "loss": 3.997, + "step": 8620 + }, + { + "epoch": 1.84, + "learning_rate": 9.632117722328855e-05, + "loss": 3.8441, + "step": 8625 + }, + { + "epoch": 1.84, + "learning_rate": 9.63190445724035e-05, + "loss": 3.8964, + "step": 8630 + }, + { + "epoch": 1.84, + "learning_rate": 9.631691192151845e-05, + "loss": 3.91, + "step": 8635 + }, + { + "epoch": 1.84, + "learning_rate": 9.63147792706334e-05, + "loss": 3.9637, + "step": 8640 + }, + { + "epoch": 1.84, + "learning_rate": 9.631264661974834e-05, + "loss": 3.816, + "step": 8645 + }, + { + "epoch": 1.84, + "learning_rate": 9.63105139688633e-05, + "loss": 3.991, + "step": 8650 + }, + { + "epoch": 1.85, + "learning_rate": 9.630838131797825e-05, + "loss": 3.8514, + "step": 8655 + }, + { + "epoch": 1.85, + "learning_rate": 9.63062486670932e-05, + "loss": 3.9244, + "step": 8660 + }, + { + "epoch": 1.85, + "learning_rate": 9.630411601620816e-05, + "loss": 3.959, + "step": 8665 + }, + { + "epoch": 1.85, + "learning_rate": 9.63019833653231e-05, + "loss": 3.8843, + "step": 8670 + }, + { + "epoch": 1.85, + "learning_rate": 9.629985071443806e-05, + "loss": 3.9632, + "step": 8675 + }, + { + "epoch": 1.85, + "learning_rate": 9.6297718063553e-05, + "loss": 3.9585, + "step": 8680 + }, + { + "epoch": 1.85, + "learning_rate": 9.629558541266795e-05, + "loss": 3.8212, + "step": 8685 + }, + { + "epoch": 1.85, + "learning_rate": 9.62934527617829e-05, + "loss": 3.8204, + "step": 8690 + }, + { + "epoch": 1.85, + "learning_rate": 9.629132011089785e-05, + "loss": 3.928, + "step": 8695 + }, + { + "epoch": 1.86, + "learning_rate": 9.62891874600128e-05, + "loss": 3.9056, + "step": 8700 + }, + { + "epoch": 1.86, + "learning_rate": 9.628705480912776e-05, + "loss": 3.8705, + "step": 8705 + }, + { + "epoch": 1.86, + "learning_rate": 9.62849221582427e-05, + "loss": 3.942, + "step": 8710 + }, + { + "epoch": 1.86, + "learning_rate": 9.628278950735765e-05, + "loss": 3.8656, + "step": 8715 + }, + { + "epoch": 1.86, + "learning_rate": 9.62806568564726e-05, + "loss": 3.8137, + "step": 8720 + }, + { + "epoch": 1.86, + "learning_rate": 9.627852420558755e-05, + "loss": 3.8635, + "step": 8725 + }, + { + "epoch": 1.86, + "learning_rate": 9.62763915547025e-05, + "loss": 3.9186, + "step": 8730 + }, + { + "epoch": 1.86, + "learning_rate": 9.627425890381745e-05, + "loss": 3.8304, + "step": 8735 + }, + { + "epoch": 1.86, + "learning_rate": 9.62721262529324e-05, + "loss": 3.9784, + "step": 8740 + }, + { + "epoch": 1.86, + "learning_rate": 9.626999360204734e-05, + "loss": 3.8235, + "step": 8745 + }, + { + "epoch": 1.87, + "learning_rate": 9.62678609511623e-05, + "loss": 3.9063, + "step": 8750 + }, + { + "epoch": 1.87, + "learning_rate": 9.626572830027725e-05, + "loss": 3.8882, + "step": 8755 + }, + { + "epoch": 1.87, + "learning_rate": 9.62635956493922e-05, + "loss": 3.8283, + "step": 8760 + }, + { + "epoch": 1.87, + "learning_rate": 9.626146299850715e-05, + "loss": 3.8209, + "step": 8765 + }, + { + "epoch": 1.87, + "learning_rate": 9.62593303476221e-05, + "loss": 3.8694, + "step": 8770 + }, + { + "epoch": 1.87, + "learning_rate": 9.625719769673704e-05, + "loss": 3.9335, + "step": 8775 + }, + { + "epoch": 1.87, + "learning_rate": 9.6255065045852e-05, + "loss": 3.8924, + "step": 8780 + }, + { + "epoch": 1.87, + "learning_rate": 9.625293239496694e-05, + "loss": 3.9002, + "step": 8785 + }, + { + "epoch": 1.87, + "learning_rate": 9.62507997440819e-05, + "loss": 3.9546, + "step": 8790 + }, + { + "epoch": 1.88, + "learning_rate": 9.624866709319685e-05, + "loss": 3.9759, + "step": 8795 + }, + { + "epoch": 1.88, + "learning_rate": 9.62465344423118e-05, + "loss": 3.9054, + "step": 8800 + }, + { + "epoch": 1.88, + "learning_rate": 9.624440179142676e-05, + "loss": 3.9971, + "step": 8805 + }, + { + "epoch": 1.88, + "learning_rate": 9.62422691405417e-05, + "loss": 3.9312, + "step": 8810 + }, + { + "epoch": 1.88, + "learning_rate": 9.624013648965665e-05, + "loss": 3.9775, + "step": 8815 + }, + { + "epoch": 1.88, + "learning_rate": 9.62380038387716e-05, + "loss": 3.8942, + "step": 8820 + }, + { + "epoch": 1.88, + "learning_rate": 9.623587118788655e-05, + "loss": 3.9469, + "step": 8825 + }, + { + "epoch": 1.88, + "learning_rate": 9.623373853700149e-05, + "loss": 3.9949, + "step": 8830 + }, + { + "epoch": 1.88, + "learning_rate": 9.623160588611645e-05, + "loss": 3.8898, + "step": 8835 + }, + { + "epoch": 1.88, + "learning_rate": 9.622947323523139e-05, + "loss": 3.9294, + "step": 8840 + }, + { + "epoch": 1.89, + "learning_rate": 9.622734058434634e-05, + "loss": 3.9123, + "step": 8845 + }, + { + "epoch": 1.89, + "learning_rate": 9.62252079334613e-05, + "loss": 3.9071, + "step": 8850 + }, + { + "epoch": 1.89, + "learning_rate": 9.622307528257625e-05, + "loss": 3.9909, + "step": 8855 + }, + { + "epoch": 1.89, + "learning_rate": 9.62209426316912e-05, + "loss": 3.9856, + "step": 8860 + }, + { + "epoch": 1.89, + "learning_rate": 9.621880998080615e-05, + "loss": 3.9783, + "step": 8865 + }, + { + "epoch": 1.89, + "learning_rate": 9.62166773299211e-05, + "loss": 3.89, + "step": 8870 + }, + { + "epoch": 1.89, + "learning_rate": 9.621454467903604e-05, + "loss": 3.938, + "step": 8875 + }, + { + "epoch": 1.89, + "learning_rate": 9.6212412028151e-05, + "loss": 3.8597, + "step": 8880 + }, + { + "epoch": 1.89, + "learning_rate": 9.621027937726594e-05, + "loss": 3.9513, + "step": 8885 + }, + { + "epoch": 1.9, + "learning_rate": 9.62081467263809e-05, + "loss": 3.8398, + "step": 8890 + }, + { + "epoch": 1.9, + "learning_rate": 9.620601407549585e-05, + "loss": 3.9375, + "step": 8895 + }, + { + "epoch": 1.9, + "learning_rate": 9.62038814246108e-05, + "loss": 3.8109, + "step": 8900 + }, + { + "epoch": 1.9, + "learning_rate": 9.620174877372574e-05, + "loss": 3.9151, + "step": 8905 + }, + { + "epoch": 1.9, + "learning_rate": 9.61996161228407e-05, + "loss": 3.865, + "step": 8910 + }, + { + "epoch": 1.9, + "learning_rate": 9.619748347195564e-05, + "loss": 4.0337, + "step": 8915 + }, + { + "epoch": 1.9, + "learning_rate": 9.61953508210706e-05, + "loss": 3.8709, + "step": 8920 + }, + { + "epoch": 1.9, + "learning_rate": 9.619321817018555e-05, + "loss": 3.8984, + "step": 8925 + }, + { + "epoch": 1.9, + "learning_rate": 9.619108551930049e-05, + "loss": 3.9675, + "step": 8930 + }, + { + "epoch": 1.91, + "learning_rate": 9.618895286841545e-05, + "loss": 3.8844, + "step": 8935 + }, + { + "epoch": 1.91, + "learning_rate": 9.618682021753039e-05, + "loss": 3.9666, + "step": 8940 + }, + { + "epoch": 1.91, + "learning_rate": 9.618468756664534e-05, + "loss": 3.9984, + "step": 8945 + }, + { + "epoch": 1.91, + "learning_rate": 9.61825549157603e-05, + "loss": 3.8754, + "step": 8950 + }, + { + "epoch": 1.91, + "learning_rate": 9.618042226487525e-05, + "loss": 3.8035, + "step": 8955 + }, + { + "epoch": 1.91, + "learning_rate": 9.617828961399019e-05, + "loss": 3.8567, + "step": 8960 + }, + { + "epoch": 1.91, + "learning_rate": 9.617615696310515e-05, + "loss": 4.0156, + "step": 8965 + }, + { + "epoch": 1.91, + "learning_rate": 9.617402431222009e-05, + "loss": 3.9287, + "step": 8970 + }, + { + "epoch": 1.91, + "learning_rate": 9.617189166133504e-05, + "loss": 3.8815, + "step": 8975 + }, + { + "epoch": 1.91, + "learning_rate": 9.616975901044998e-05, + "loss": 4.0201, + "step": 8980 + }, + { + "epoch": 1.92, + "learning_rate": 9.616762635956494e-05, + "loss": 3.9027, + "step": 8985 + }, + { + "epoch": 1.92, + "learning_rate": 9.61654937086799e-05, + "loss": 3.8751, + "step": 8990 + }, + { + "epoch": 1.92, + "learning_rate": 9.616336105779485e-05, + "loss": 3.807, + "step": 8995 + }, + { + "epoch": 1.92, + "learning_rate": 9.61612284069098e-05, + "loss": 3.9068, + "step": 9000 + }, + { + "epoch": 1.92, + "learning_rate": 9.615909575602474e-05, + "loss": 3.9204, + "step": 9005 + }, + { + "epoch": 1.92, + "learning_rate": 9.61569631051397e-05, + "loss": 3.8568, + "step": 9010 + }, + { + "epoch": 1.92, + "learning_rate": 9.615483045425464e-05, + "loss": 3.9047, + "step": 9015 + }, + { + "epoch": 1.92, + "learning_rate": 9.61526978033696e-05, + "loss": 3.9855, + "step": 9020 + }, + { + "epoch": 1.92, + "learning_rate": 9.615056515248454e-05, + "loss": 3.9231, + "step": 9025 + }, + { + "epoch": 1.93, + "learning_rate": 9.614843250159949e-05, + "loss": 4.0087, + "step": 9030 + }, + { + "epoch": 1.93, + "learning_rate": 9.614629985071443e-05, + "loss": 4.0523, + "step": 9035 + }, + { + "epoch": 1.93, + "learning_rate": 9.614416719982939e-05, + "loss": 3.9145, + "step": 9040 + }, + { + "epoch": 1.93, + "learning_rate": 9.614203454894434e-05, + "loss": 3.7717, + "step": 9045 + }, + { + "epoch": 1.93, + "learning_rate": 9.61399018980593e-05, + "loss": 3.7863, + "step": 9050 + }, + { + "epoch": 1.93, + "learning_rate": 9.613776924717425e-05, + "loss": 3.9593, + "step": 9055 + }, + { + "epoch": 1.93, + "learning_rate": 9.613563659628919e-05, + "loss": 3.9541, + "step": 9060 + }, + { + "epoch": 1.93, + "learning_rate": 9.613350394540415e-05, + "loss": 3.9072, + "step": 9065 + }, + { + "epoch": 1.93, + "learning_rate": 9.613137129451909e-05, + "loss": 3.8733, + "step": 9070 + }, + { + "epoch": 1.94, + "learning_rate": 9.612923864363404e-05, + "loss": 3.9044, + "step": 9075 + }, + { + "epoch": 1.94, + "learning_rate": 9.612710599274898e-05, + "loss": 3.9704, + "step": 9080 + }, + { + "epoch": 1.94, + "learning_rate": 9.612497334186394e-05, + "loss": 3.8255, + "step": 9085 + }, + { + "epoch": 1.94, + "learning_rate": 9.61228406909789e-05, + "loss": 3.862, + "step": 9090 + }, + { + "epoch": 1.94, + "learning_rate": 9.612070804009385e-05, + "loss": 3.9216, + "step": 9095 + }, + { + "epoch": 1.94, + "learning_rate": 9.611857538920879e-05, + "loss": 3.8865, + "step": 9100 + }, + { + "epoch": 1.94, + "learning_rate": 9.611644273832374e-05, + "loss": 3.9935, + "step": 9105 + }, + { + "epoch": 1.94, + "learning_rate": 9.611431008743869e-05, + "loss": 3.9115, + "step": 9110 + }, + { + "epoch": 1.94, + "learning_rate": 9.611217743655364e-05, + "loss": 3.8866, + "step": 9115 + }, + { + "epoch": 1.94, + "learning_rate": 9.61100447856686e-05, + "loss": 3.888, + "step": 9120 + }, + { + "epoch": 1.95, + "learning_rate": 9.610791213478354e-05, + "loss": 3.9667, + "step": 9125 + }, + { + "epoch": 1.95, + "learning_rate": 9.610577948389849e-05, + "loss": 3.9008, + "step": 9130 + }, + { + "epoch": 1.95, + "learning_rate": 9.610364683301343e-05, + "loss": 3.9049, + "step": 9135 + }, + { + "epoch": 1.95, + "learning_rate": 9.610151418212839e-05, + "loss": 3.9664, + "step": 9140 + }, + { + "epoch": 1.95, + "learning_rate": 9.609938153124334e-05, + "loss": 3.9578, + "step": 9145 + }, + { + "epoch": 1.95, + "learning_rate": 9.60972488803583e-05, + "loss": 3.9024, + "step": 9150 + }, + { + "epoch": 1.95, + "learning_rate": 9.609511622947324e-05, + "loss": 3.9579, + "step": 9155 + }, + { + "epoch": 1.95, + "learning_rate": 9.609298357858819e-05, + "loss": 3.9001, + "step": 9160 + }, + { + "epoch": 1.95, + "learning_rate": 9.609085092770313e-05, + "loss": 3.8728, + "step": 9165 + }, + { + "epoch": 1.96, + "learning_rate": 9.608871827681809e-05, + "loss": 3.9192, + "step": 9170 + }, + { + "epoch": 1.96, + "learning_rate": 9.608658562593303e-05, + "loss": 3.9924, + "step": 9175 + }, + { + "epoch": 1.96, + "learning_rate": 9.608445297504798e-05, + "loss": 3.9081, + "step": 9180 + }, + { + "epoch": 1.96, + "learning_rate": 9.608232032416294e-05, + "loss": 3.9442, + "step": 9185 + }, + { + "epoch": 1.96, + "learning_rate": 9.60801876732779e-05, + "loss": 3.921, + "step": 9190 + }, + { + "epoch": 1.96, + "learning_rate": 9.607805502239285e-05, + "loss": 3.9555, + "step": 9195 + }, + { + "epoch": 1.96, + "learning_rate": 9.607592237150779e-05, + "loss": 3.9979, + "step": 9200 + }, + { + "epoch": 1.96, + "learning_rate": 9.607378972062274e-05, + "loss": 3.8777, + "step": 9205 + }, + { + "epoch": 1.96, + "learning_rate": 9.607165706973769e-05, + "loss": 3.8775, + "step": 9210 + }, + { + "epoch": 1.96, + "learning_rate": 9.606952441885264e-05, + "loss": 3.9365, + "step": 9215 + }, + { + "epoch": 1.97, + "learning_rate": 9.606739176796758e-05, + "loss": 3.9906, + "step": 9220 + }, + { + "epoch": 1.97, + "learning_rate": 9.606525911708254e-05, + "loss": 3.8708, + "step": 9225 + }, + { + "epoch": 1.97, + "learning_rate": 9.606312646619748e-05, + "loss": 3.9532, + "step": 9230 + }, + { + "epoch": 1.97, + "learning_rate": 9.606099381531243e-05, + "loss": 3.9191, + "step": 9235 + }, + { + "epoch": 1.97, + "learning_rate": 9.605886116442739e-05, + "loss": 3.9653, + "step": 9240 + }, + { + "epoch": 1.97, + "learning_rate": 9.605672851354234e-05, + "loss": 3.9414, + "step": 9245 + }, + { + "epoch": 1.97, + "learning_rate": 9.60545958626573e-05, + "loss": 4.0171, + "step": 9250 + }, + { + "epoch": 1.97, + "learning_rate": 9.605246321177224e-05, + "loss": 3.8316, + "step": 9255 + }, + { + "epoch": 1.97, + "learning_rate": 9.605033056088719e-05, + "loss": 4.0025, + "step": 9260 + }, + { + "epoch": 1.98, + "learning_rate": 9.604819791000213e-05, + "loss": 3.9262, + "step": 9265 + }, + { + "epoch": 1.98, + "learning_rate": 9.604606525911709e-05, + "loss": 3.9224, + "step": 9270 + }, + { + "epoch": 1.98, + "learning_rate": 9.604393260823203e-05, + "loss": 3.9596, + "step": 9275 + }, + { + "epoch": 1.98, + "learning_rate": 9.604179995734698e-05, + "loss": 3.9289, + "step": 9280 + }, + { + "epoch": 1.98, + "learning_rate": 9.603966730646194e-05, + "loss": 3.8967, + "step": 9285 + }, + { + "epoch": 1.98, + "learning_rate": 9.60375346555769e-05, + "loss": 3.8816, + "step": 9290 + }, + { + "epoch": 1.98, + "learning_rate": 9.603540200469183e-05, + "loss": 3.7878, + "step": 9295 + }, + { + "epoch": 1.98, + "learning_rate": 9.603326935380679e-05, + "loss": 3.8872, + "step": 9300 + }, + { + "epoch": 1.98, + "learning_rate": 9.603113670292173e-05, + "loss": 3.8306, + "step": 9305 + }, + { + "epoch": 1.99, + "learning_rate": 9.602900405203669e-05, + "loss": 3.7977, + "step": 9310 + }, + { + "epoch": 1.99, + "learning_rate": 9.602687140115164e-05, + "loss": 3.9371, + "step": 9315 + }, + { + "epoch": 1.99, + "learning_rate": 9.602473875026658e-05, + "loss": 3.9401, + "step": 9320 + }, + { + "epoch": 1.99, + "learning_rate": 9.602260609938154e-05, + "loss": 3.88, + "step": 9325 + }, + { + "epoch": 1.99, + "learning_rate": 9.602047344849648e-05, + "loss": 3.8741, + "step": 9330 + }, + { + "epoch": 1.99, + "learning_rate": 9.601834079761145e-05, + "loss": 4.0231, + "step": 9335 + }, + { + "epoch": 1.99, + "learning_rate": 9.601620814672639e-05, + "loss": 3.7976, + "step": 9340 + }, + { + "epoch": 1.99, + "learning_rate": 9.601407549584134e-05, + "loss": 3.9515, + "step": 9345 + }, + { + "epoch": 1.99, + "learning_rate": 9.601194284495628e-05, + "loss": 3.9097, + "step": 9350 + }, + { + "epoch": 1.99, + "learning_rate": 9.600981019407124e-05, + "loss": 3.9143, + "step": 9355 + }, + { + "epoch": 2.0, + "learning_rate": 9.600767754318618e-05, + "loss": 3.9404, + "step": 9360 + }, + { + "epoch": 2.0, + "learning_rate": 9.600554489230113e-05, + "loss": 3.9616, + "step": 9365 + }, + { + "epoch": 2.0, + "learning_rate": 9.600341224141607e-05, + "loss": 4.005, + "step": 9370 + }, + { + "epoch": 2.0, + "learning_rate": 9.600127959053103e-05, + "loss": 3.9032, + "step": 9375 + }, + { + "epoch": 2.0, + "learning_rate": 9.599914693964598e-05, + "loss": 3.9546, + "step": 9380 + }, + { + "epoch": 2.0, + "learning_rate": 9.599701428876094e-05, + "loss": 3.9168, + "step": 9385 + }, + { + "epoch": 2.0, + "learning_rate": 9.59948816378759e-05, + "loss": 3.9348, + "step": 9390 + }, + { + "epoch": 2.0, + "learning_rate": 9.599274898699084e-05, + "loss": 3.975, + "step": 9395 + }, + { + "epoch": 2.0, + "learning_rate": 9.599061633610579e-05, + "loss": 3.9352, + "step": 9400 + }, + { + "epoch": 2.01, + "learning_rate": 9.598848368522073e-05, + "loss": 3.9051, + "step": 9405 + }, + { + "epoch": 2.01, + "learning_rate": 9.598635103433569e-05, + "loss": 3.8676, + "step": 9410 + }, + { + "epoch": 2.01, + "learning_rate": 9.598421838345063e-05, + "loss": 3.8709, + "step": 9415 + }, + { + "epoch": 2.01, + "learning_rate": 9.598208573256558e-05, + "loss": 3.8946, + "step": 9420 + }, + { + "epoch": 2.01, + "learning_rate": 9.597995308168052e-05, + "loss": 3.8885, + "step": 9425 + }, + { + "epoch": 2.01, + "learning_rate": 9.597782043079548e-05, + "loss": 3.9497, + "step": 9430 + }, + { + "epoch": 2.01, + "learning_rate": 9.597568777991043e-05, + "loss": 3.797, + "step": 9435 + }, + { + "epoch": 2.01, + "learning_rate": 9.597355512902539e-05, + "loss": 3.9236, + "step": 9440 + }, + { + "epoch": 2.01, + "learning_rate": 9.597142247814034e-05, + "loss": 3.8859, + "step": 9445 + }, + { + "epoch": 2.02, + "learning_rate": 9.596928982725528e-05, + "loss": 3.8632, + "step": 9450 + }, + { + "epoch": 2.02, + "learning_rate": 9.596715717637024e-05, + "loss": 3.894, + "step": 9455 + }, + { + "epoch": 2.02, + "learning_rate": 9.596502452548518e-05, + "loss": 3.8814, + "step": 9460 + }, + { + "epoch": 2.02, + "learning_rate": 9.596289187460013e-05, + "loss": 3.934, + "step": 9465 + }, + { + "epoch": 2.02, + "learning_rate": 9.596075922371507e-05, + "loss": 3.8968, + "step": 9470 + }, + { + "epoch": 2.02, + "learning_rate": 9.595862657283003e-05, + "loss": 3.9153, + "step": 9475 + }, + { + "epoch": 2.02, + "learning_rate": 9.595649392194498e-05, + "loss": 3.8999, + "step": 9480 + }, + { + "epoch": 2.02, + "learning_rate": 9.595436127105994e-05, + "loss": 3.8634, + "step": 9485 + }, + { + "epoch": 2.02, + "learning_rate": 9.595222862017488e-05, + "loss": 3.7982, + "step": 9490 + }, + { + "epoch": 2.02, + "learning_rate": 9.595009596928984e-05, + "loss": 3.8526, + "step": 9495 + }, + { + "epoch": 2.03, + "learning_rate": 9.594796331840478e-05, + "loss": 3.8855, + "step": 9500 + }, + { + "epoch": 2.03, + "learning_rate": 9.594583066751973e-05, + "loss": 3.9068, + "step": 9505 + }, + { + "epoch": 2.03, + "learning_rate": 9.594369801663469e-05, + "loss": 3.9052, + "step": 9510 + }, + { + "epoch": 2.03, + "learning_rate": 9.594156536574963e-05, + "loss": 3.9008, + "step": 9515 + }, + { + "epoch": 2.03, + "learning_rate": 9.593943271486458e-05, + "loss": 3.887, + "step": 9520 + }, + { + "epoch": 2.03, + "learning_rate": 9.593730006397952e-05, + "loss": 3.7917, + "step": 9525 + }, + { + "epoch": 2.03, + "learning_rate": 9.593516741309449e-05, + "loss": 3.9603, + "step": 9530 + }, + { + "epoch": 2.03, + "learning_rate": 9.593303476220943e-05, + "loss": 3.9165, + "step": 9535 + }, + { + "epoch": 2.03, + "learning_rate": 9.593090211132439e-05, + "loss": 3.8178, + "step": 9540 + }, + { + "epoch": 2.04, + "learning_rate": 9.592876946043933e-05, + "loss": 3.9096, + "step": 9545 + }, + { + "epoch": 2.04, + "learning_rate": 9.592663680955428e-05, + "loss": 3.8815, + "step": 9550 + }, + { + "epoch": 2.04, + "learning_rate": 9.592450415866922e-05, + "loss": 3.8176, + "step": 9555 + }, + { + "epoch": 2.04, + "learning_rate": 9.592237150778418e-05, + "loss": 3.8058, + "step": 9560 + }, + { + "epoch": 2.04, + "learning_rate": 9.592023885689912e-05, + "loss": 3.8784, + "step": 9565 + }, + { + "epoch": 2.04, + "learning_rate": 9.591810620601407e-05, + "loss": 3.8491, + "step": 9570 + }, + { + "epoch": 2.04, + "learning_rate": 9.591597355512903e-05, + "loss": 3.9405, + "step": 9575 + }, + { + "epoch": 2.04, + "learning_rate": 9.591384090424398e-05, + "loss": 3.9151, + "step": 9580 + }, + { + "epoch": 2.04, + "learning_rate": 9.591170825335894e-05, + "loss": 3.9383, + "step": 9585 + }, + { + "epoch": 2.04, + "learning_rate": 9.590957560247388e-05, + "loss": 3.7983, + "step": 9590 + }, + { + "epoch": 2.05, + "learning_rate": 9.590744295158884e-05, + "loss": 3.9112, + "step": 9595 + }, + { + "epoch": 2.05, + "learning_rate": 9.590531030070378e-05, + "loss": 3.8687, + "step": 9600 + }, + { + "epoch": 2.05, + "learning_rate": 9.590317764981873e-05, + "loss": 3.9013, + "step": 9605 + }, + { + "epoch": 2.05, + "learning_rate": 9.590104499893367e-05, + "loss": 3.915, + "step": 9610 + }, + { + "epoch": 2.05, + "learning_rate": 9.589891234804863e-05, + "loss": 3.9355, + "step": 9615 + }, + { + "epoch": 2.05, + "learning_rate": 9.589677969716357e-05, + "loss": 3.8931, + "step": 9620 + }, + { + "epoch": 2.05, + "learning_rate": 9.589464704627852e-05, + "loss": 3.8911, + "step": 9625 + }, + { + "epoch": 2.05, + "learning_rate": 9.589251439539348e-05, + "loss": 3.9141, + "step": 9630 + }, + { + "epoch": 2.05, + "learning_rate": 9.589038174450843e-05, + "loss": 3.8991, + "step": 9635 + }, + { + "epoch": 2.06, + "learning_rate": 9.588824909362339e-05, + "loss": 3.9115, + "step": 9640 + }, + { + "epoch": 2.06, + "learning_rate": 9.588611644273833e-05, + "loss": 3.9708, + "step": 9645 + }, + { + "epoch": 2.06, + "learning_rate": 9.588398379185328e-05, + "loss": 3.9296, + "step": 9650 + }, + { + "epoch": 2.06, + "learning_rate": 9.588185114096822e-05, + "loss": 3.8931, + "step": 9655 + }, + { + "epoch": 2.06, + "learning_rate": 9.587971849008318e-05, + "loss": 3.8585, + "step": 9660 + }, + { + "epoch": 2.06, + "learning_rate": 9.587758583919812e-05, + "loss": 3.8648, + "step": 9665 + }, + { + "epoch": 2.06, + "learning_rate": 9.587545318831307e-05, + "loss": 3.8755, + "step": 9670 + }, + { + "epoch": 2.06, + "learning_rate": 9.587332053742803e-05, + "loss": 3.9061, + "step": 9675 + }, + { + "epoch": 2.06, + "learning_rate": 9.587118788654298e-05, + "loss": 3.931, + "step": 9680 + }, + { + "epoch": 2.07, + "learning_rate": 9.586905523565793e-05, + "loss": 3.8748, + "step": 9685 + }, + { + "epoch": 2.07, + "learning_rate": 9.586692258477288e-05, + "loss": 3.9259, + "step": 9690 + }, + { + "epoch": 2.07, + "learning_rate": 9.586478993388782e-05, + "loss": 3.8767, + "step": 9695 + }, + { + "epoch": 2.07, + "learning_rate": 9.586265728300278e-05, + "loss": 3.8803, + "step": 9700 + }, + { + "epoch": 2.07, + "learning_rate": 9.586052463211773e-05, + "loss": 3.905, + "step": 9705 + }, + { + "epoch": 2.07, + "learning_rate": 9.585839198123267e-05, + "loss": 3.8924, + "step": 9710 + }, + { + "epoch": 2.07, + "learning_rate": 9.585625933034763e-05, + "loss": 3.873, + "step": 9715 + }, + { + "epoch": 2.07, + "learning_rate": 9.585412667946257e-05, + "loss": 3.8622, + "step": 9720 + }, + { + "epoch": 2.07, + "learning_rate": 9.585199402857754e-05, + "loss": 3.8733, + "step": 9725 + }, + { + "epoch": 2.07, + "learning_rate": 9.584986137769248e-05, + "loss": 3.9065, + "step": 9730 + }, + { + "epoch": 2.08, + "learning_rate": 9.584772872680743e-05, + "loss": 4.0425, + "step": 9735 + }, + { + "epoch": 2.08, + "learning_rate": 9.584559607592237e-05, + "loss": 3.8029, + "step": 9740 + }, + { + "epoch": 2.08, + "learning_rate": 9.584346342503733e-05, + "loss": 3.9621, + "step": 9745 + }, + { + "epoch": 2.08, + "learning_rate": 9.584133077415227e-05, + "loss": 3.917, + "step": 9750 + }, + { + "epoch": 2.08, + "learning_rate": 9.583919812326722e-05, + "loss": 3.8314, + "step": 9755 + }, + { + "epoch": 2.08, + "learning_rate": 9.583706547238217e-05, + "loss": 3.7895, + "step": 9760 + }, + { + "epoch": 2.08, + "learning_rate": 9.583493282149712e-05, + "loss": 3.8788, + "step": 9765 + }, + { + "epoch": 2.08, + "learning_rate": 9.583280017061207e-05, + "loss": 3.9547, + "step": 9770 + }, + { + "epoch": 2.08, + "learning_rate": 9.583066751972703e-05, + "loss": 3.8664, + "step": 9775 + }, + { + "epoch": 2.09, + "learning_rate": 9.582853486884198e-05, + "loss": 3.863, + "step": 9780 + }, + { + "epoch": 2.09, + "learning_rate": 9.582640221795693e-05, + "loss": 3.8867, + "step": 9785 + }, + { + "epoch": 2.09, + "learning_rate": 9.582426956707188e-05, + "loss": 3.9876, + "step": 9790 + }, + { + "epoch": 2.09, + "learning_rate": 9.582213691618682e-05, + "loss": 3.9828, + "step": 9795 + }, + { + "epoch": 2.09, + "learning_rate": 9.582000426530178e-05, + "loss": 3.8856, + "step": 9800 + }, + { + "epoch": 2.09, + "learning_rate": 9.581787161441672e-05, + "loss": 3.8726, + "step": 9805 + }, + { + "epoch": 2.09, + "learning_rate": 9.581573896353167e-05, + "loss": 3.8798, + "step": 9810 + }, + { + "epoch": 2.09, + "learning_rate": 9.581360631264661e-05, + "loss": 3.8873, + "step": 9815 + }, + { + "epoch": 2.09, + "learning_rate": 9.581147366176157e-05, + "loss": 3.8703, + "step": 9820 + }, + { + "epoch": 2.09, + "learning_rate": 9.580934101087652e-05, + "loss": 3.8842, + "step": 9825 + }, + { + "epoch": 2.1, + "learning_rate": 9.580720835999148e-05, + "loss": 3.769, + "step": 9830 + }, + { + "epoch": 2.1, + "learning_rate": 9.580507570910643e-05, + "loss": 3.9277, + "step": 9835 + }, + { + "epoch": 2.1, + "learning_rate": 9.580294305822137e-05, + "loss": 3.8941, + "step": 9840 + }, + { + "epoch": 2.1, + "learning_rate": 9.580081040733633e-05, + "loss": 3.9055, + "step": 9845 + }, + { + "epoch": 2.1, + "learning_rate": 9.579867775645127e-05, + "loss": 3.8511, + "step": 9850 + }, + { + "epoch": 2.1, + "learning_rate": 9.579654510556622e-05, + "loss": 3.8535, + "step": 9855 + }, + { + "epoch": 2.1, + "learning_rate": 9.579441245468117e-05, + "loss": 3.9194, + "step": 9860 + }, + { + "epoch": 2.1, + "learning_rate": 9.579227980379612e-05, + "loss": 4.0642, + "step": 9865 + }, + { + "epoch": 2.1, + "learning_rate": 9.579014715291107e-05, + "loss": 3.7907, + "step": 9870 + }, + { + "epoch": 2.11, + "learning_rate": 9.578801450202603e-05, + "loss": 3.8971, + "step": 9875 + }, + { + "epoch": 2.11, + "learning_rate": 9.578588185114097e-05, + "loss": 3.9967, + "step": 9880 + }, + { + "epoch": 2.11, + "learning_rate": 9.578374920025593e-05, + "loss": 3.8269, + "step": 9885 + }, + { + "epoch": 2.11, + "learning_rate": 9.578161654937087e-05, + "loss": 3.7898, + "step": 9890 + }, + { + "epoch": 2.11, + "learning_rate": 9.577948389848582e-05, + "loss": 3.9452, + "step": 9895 + }, + { + "epoch": 2.11, + "learning_rate": 9.577735124760078e-05, + "loss": 3.8991, + "step": 9900 + }, + { + "epoch": 2.11, + "learning_rate": 9.577521859671572e-05, + "loss": 3.9386, + "step": 9905 + }, + { + "epoch": 2.11, + "learning_rate": 9.577308594583067e-05, + "loss": 3.8612, + "step": 9910 + }, + { + "epoch": 2.11, + "learning_rate": 9.577095329494561e-05, + "loss": 3.9359, + "step": 9915 + }, + { + "epoch": 2.12, + "learning_rate": 9.576882064406058e-05, + "loss": 3.9041, + "step": 9920 + }, + { + "epoch": 2.12, + "learning_rate": 9.576668799317552e-05, + "loss": 3.9987, + "step": 9925 + }, + { + "epoch": 2.12, + "learning_rate": 9.576455534229048e-05, + "loss": 3.8928, + "step": 9930 + }, + { + "epoch": 2.12, + "learning_rate": 9.576242269140542e-05, + "loss": 3.9351, + "step": 9935 + }, + { + "epoch": 2.12, + "learning_rate": 9.576029004052037e-05, + "loss": 3.8734, + "step": 9940 + }, + { + "epoch": 2.12, + "learning_rate": 9.575815738963531e-05, + "loss": 3.9403, + "step": 9945 + }, + { + "epoch": 2.12, + "learning_rate": 9.575602473875027e-05, + "loss": 3.9082, + "step": 9950 + }, + { + "epoch": 2.12, + "learning_rate": 9.575389208786521e-05, + "loss": 3.9097, + "step": 9955 + }, + { + "epoch": 2.12, + "learning_rate": 9.575175943698017e-05, + "loss": 3.93, + "step": 9960 + }, + { + "epoch": 2.12, + "learning_rate": 9.574962678609512e-05, + "loss": 3.9413, + "step": 9965 + }, + { + "epoch": 2.13, + "learning_rate": 9.574749413521007e-05, + "loss": 3.8804, + "step": 9970 + }, + { + "epoch": 2.13, + "learning_rate": 9.574536148432503e-05, + "loss": 3.8673, + "step": 9975 + }, + { + "epoch": 2.13, + "learning_rate": 9.574322883343997e-05, + "loss": 3.8399, + "step": 9980 + }, + { + "epoch": 2.13, + "learning_rate": 9.574109618255493e-05, + "loss": 3.8549, + "step": 9985 + }, + { + "epoch": 2.13, + "learning_rate": 9.573896353166987e-05, + "loss": 3.8775, + "step": 9990 + }, + { + "epoch": 2.13, + "learning_rate": 9.573683088078482e-05, + "loss": 3.865, + "step": 9995 + }, + { + "epoch": 2.13, + "learning_rate": 9.573469822989976e-05, + "loss": 3.8723, + "step": 10000 + }, + { + "epoch": 2.13, + "learning_rate": 9.573256557901472e-05, + "loss": 3.8955, + "step": 10005 + }, + { + "epoch": 2.13, + "learning_rate": 9.573043292812966e-05, + "loss": 3.9485, + "step": 10010 + }, + { + "epoch": 2.14, + "learning_rate": 9.572830027724463e-05, + "loss": 3.8632, + "step": 10015 + }, + { + "epoch": 2.14, + "learning_rate": 9.572616762635957e-05, + "loss": 3.9496, + "step": 10020 + }, + { + "epoch": 2.14, + "learning_rate": 9.572403497547452e-05, + "loss": 3.9024, + "step": 10025 + }, + { + "epoch": 2.14, + "learning_rate": 9.572190232458948e-05, + "loss": 3.8518, + "step": 10030 + }, + { + "epoch": 2.14, + "learning_rate": 9.571976967370442e-05, + "loss": 3.901, + "step": 10035 + }, + { + "epoch": 2.14, + "learning_rate": 9.571763702281937e-05, + "loss": 3.8741, + "step": 10040 + }, + { + "epoch": 2.14, + "learning_rate": 9.571550437193431e-05, + "loss": 3.8388, + "step": 10045 + }, + { + "epoch": 2.14, + "learning_rate": 9.571337172104927e-05, + "loss": 3.8321, + "step": 10050 + }, + { + "epoch": 2.14, + "learning_rate": 9.571123907016421e-05, + "loss": 3.9299, + "step": 10055 + }, + { + "epoch": 2.15, + "learning_rate": 9.570910641927917e-05, + "loss": 3.8561, + "step": 10060 + }, + { + "epoch": 2.15, + "learning_rate": 9.570697376839412e-05, + "loss": 3.8446, + "step": 10065 + }, + { + "epoch": 2.15, + "learning_rate": 9.570484111750907e-05, + "loss": 3.8178, + "step": 10070 + }, + { + "epoch": 2.15, + "learning_rate": 9.570270846662402e-05, + "loss": 3.9552, + "step": 10075 + }, + { + "epoch": 2.15, + "learning_rate": 9.570057581573897e-05, + "loss": 3.7841, + "step": 10080 + }, + { + "epoch": 2.15, + "learning_rate": 9.569844316485391e-05, + "loss": 3.8744, + "step": 10085 + }, + { + "epoch": 2.15, + "learning_rate": 9.569631051396887e-05, + "loss": 3.9294, + "step": 10090 + }, + { + "epoch": 2.15, + "learning_rate": 9.569417786308382e-05, + "loss": 3.7963, + "step": 10095 + }, + { + "epoch": 2.15, + "learning_rate": 9.569204521219876e-05, + "loss": 3.9618, + "step": 10100 + }, + { + "epoch": 2.15, + "learning_rate": 9.568991256131372e-05, + "loss": 3.9147, + "step": 10105 + }, + { + "epoch": 2.16, + "learning_rate": 9.568777991042866e-05, + "loss": 3.8262, + "step": 10110 + }, + { + "epoch": 2.16, + "learning_rate": 9.568564725954363e-05, + "loss": 3.8093, + "step": 10115 + }, + { + "epoch": 2.16, + "learning_rate": 9.568351460865857e-05, + "loss": 3.9012, + "step": 10120 + }, + { + "epoch": 2.16, + "learning_rate": 9.568138195777352e-05, + "loss": 3.9055, + "step": 10125 + }, + { + "epoch": 2.16, + "learning_rate": 9.567924930688846e-05, + "loss": 3.8433, + "step": 10130 + }, + { + "epoch": 2.16, + "learning_rate": 9.567711665600342e-05, + "loss": 3.8674, + "step": 10135 + }, + { + "epoch": 2.16, + "learning_rate": 9.567498400511836e-05, + "loss": 4.0064, + "step": 10140 + }, + { + "epoch": 2.16, + "learning_rate": 9.567285135423331e-05, + "loss": 3.8263, + "step": 10145 + }, + { + "epoch": 2.16, + "learning_rate": 9.567071870334826e-05, + "loss": 3.8567, + "step": 10150 + }, + { + "epoch": 2.17, + "learning_rate": 9.566858605246321e-05, + "loss": 3.882, + "step": 10155 + }, + { + "epoch": 2.17, + "learning_rate": 9.566645340157817e-05, + "loss": 3.8634, + "step": 10160 + }, + { + "epoch": 2.17, + "learning_rate": 9.566432075069312e-05, + "loss": 4.0028, + "step": 10165 + }, + { + "epoch": 2.17, + "learning_rate": 9.566218809980807e-05, + "loss": 3.8764, + "step": 10170 + }, + { + "epoch": 2.17, + "learning_rate": 9.566005544892302e-05, + "loss": 3.8509, + "step": 10175 + }, + { + "epoch": 2.17, + "learning_rate": 9.565792279803797e-05, + "loss": 3.9297, + "step": 10180 + }, + { + "epoch": 2.17, + "learning_rate": 9.565579014715291e-05, + "loss": 3.885, + "step": 10185 + }, + { + "epoch": 2.17, + "learning_rate": 9.565365749626787e-05, + "loss": 3.8632, + "step": 10190 + }, + { + "epoch": 2.17, + "learning_rate": 9.565152484538281e-05, + "loss": 3.8047, + "step": 10195 + }, + { + "epoch": 2.17, + "learning_rate": 9.564939219449776e-05, + "loss": 3.9207, + "step": 10200 + }, + { + "epoch": 2.18, + "learning_rate": 9.56472595436127e-05, + "loss": 3.842, + "step": 10205 + }, + { + "epoch": 2.18, + "learning_rate": 9.564512689272767e-05, + "loss": 3.8528, + "step": 10210 + }, + { + "epoch": 2.18, + "learning_rate": 9.564299424184261e-05, + "loss": 3.9084, + "step": 10215 + }, + { + "epoch": 2.18, + "learning_rate": 9.564086159095757e-05, + "loss": 3.9161, + "step": 10220 + }, + { + "epoch": 2.18, + "learning_rate": 9.563872894007252e-05, + "loss": 3.8518, + "step": 10225 + }, + { + "epoch": 2.18, + "learning_rate": 9.563659628918746e-05, + "loss": 3.7427, + "step": 10230 + }, + { + "epoch": 2.18, + "learning_rate": 9.563446363830242e-05, + "loss": 3.9175, + "step": 10235 + }, + { + "epoch": 2.18, + "learning_rate": 9.563233098741736e-05, + "loss": 3.8272, + "step": 10240 + }, + { + "epoch": 2.18, + "learning_rate": 9.563019833653231e-05, + "loss": 3.9403, + "step": 10245 + }, + { + "epoch": 2.19, + "learning_rate": 9.562806568564726e-05, + "loss": 3.7691, + "step": 10250 + }, + { + "epoch": 2.19, + "learning_rate": 9.562593303476221e-05, + "loss": 3.856, + "step": 10255 + }, + { + "epoch": 2.19, + "learning_rate": 9.562380038387717e-05, + "loss": 3.8328, + "step": 10260 + }, + { + "epoch": 2.19, + "learning_rate": 9.562166773299212e-05, + "loss": 3.9218, + "step": 10265 + }, + { + "epoch": 2.19, + "learning_rate": 9.561953508210706e-05, + "loss": 3.9295, + "step": 10270 + }, + { + "epoch": 2.19, + "learning_rate": 9.561740243122202e-05, + "loss": 3.9037, + "step": 10275 + }, + { + "epoch": 2.19, + "learning_rate": 9.561526978033696e-05, + "loss": 4.0196, + "step": 10280 + }, + { + "epoch": 2.19, + "learning_rate": 9.561313712945191e-05, + "loss": 3.9428, + "step": 10285 + }, + { + "epoch": 2.19, + "learning_rate": 9.561100447856687e-05, + "loss": 3.8756, + "step": 10290 + }, + { + "epoch": 2.2, + "learning_rate": 9.560887182768181e-05, + "loss": 3.9362, + "step": 10295 + }, + { + "epoch": 2.2, + "learning_rate": 9.560673917679676e-05, + "loss": 3.931, + "step": 10300 + }, + { + "epoch": 2.2, + "learning_rate": 9.56046065259117e-05, + "loss": 3.9106, + "step": 10305 + }, + { + "epoch": 2.2, + "learning_rate": 9.560247387502667e-05, + "loss": 3.8869, + "step": 10310 + }, + { + "epoch": 2.2, + "learning_rate": 9.560034122414161e-05, + "loss": 3.8791, + "step": 10315 + }, + { + "epoch": 2.2, + "learning_rate": 9.559820857325657e-05, + "loss": 3.9313, + "step": 10320 + }, + { + "epoch": 2.2, + "learning_rate": 9.559607592237151e-05, + "loss": 3.8584, + "step": 10325 + }, + { + "epoch": 2.2, + "learning_rate": 9.559394327148646e-05, + "loss": 3.8393, + "step": 10330 + }, + { + "epoch": 2.2, + "learning_rate": 9.55918106206014e-05, + "loss": 3.9054, + "step": 10335 + }, + { + "epoch": 2.2, + "learning_rate": 9.558967796971636e-05, + "loss": 3.8296, + "step": 10340 + }, + { + "epoch": 2.21, + "learning_rate": 9.55875453188313e-05, + "loss": 3.8346, + "step": 10345 + }, + { + "epoch": 2.21, + "learning_rate": 9.558541266794626e-05, + "loss": 3.8221, + "step": 10350 + }, + { + "epoch": 2.21, + "learning_rate": 9.558328001706121e-05, + "loss": 3.8941, + "step": 10355 + }, + { + "epoch": 2.21, + "learning_rate": 9.558114736617617e-05, + "loss": 3.8657, + "step": 10360 + }, + { + "epoch": 2.21, + "learning_rate": 9.557901471529112e-05, + "loss": 3.8482, + "step": 10365 + }, + { + "epoch": 2.21, + "learning_rate": 9.557688206440606e-05, + "loss": 3.8533, + "step": 10370 + }, + { + "epoch": 2.21, + "learning_rate": 9.557474941352102e-05, + "loss": 3.8012, + "step": 10375 + }, + { + "epoch": 2.21, + "learning_rate": 9.557261676263596e-05, + "loss": 3.8421, + "step": 10380 + }, + { + "epoch": 2.21, + "learning_rate": 9.557048411175091e-05, + "loss": 3.9025, + "step": 10385 + }, + { + "epoch": 2.22, + "learning_rate": 9.556835146086585e-05, + "loss": 3.8446, + "step": 10390 + }, + { + "epoch": 2.22, + "learning_rate": 9.556621880998081e-05, + "loss": 3.908, + "step": 10395 + }, + { + "epoch": 2.22, + "learning_rate": 9.556408615909575e-05, + "loss": 3.8781, + "step": 10400 + }, + { + "epoch": 2.22, + "learning_rate": 9.556195350821072e-05, + "loss": 3.867, + "step": 10405 + }, + { + "epoch": 2.22, + "learning_rate": 9.555982085732567e-05, + "loss": 3.8401, + "step": 10410 + }, + { + "epoch": 2.22, + "learning_rate": 9.555768820644061e-05, + "loss": 3.9424, + "step": 10415 + }, + { + "epoch": 2.22, + "learning_rate": 9.555555555555557e-05, + "loss": 3.8786, + "step": 10420 + }, + { + "epoch": 2.22, + "learning_rate": 9.555342290467051e-05, + "loss": 3.8737, + "step": 10425 + }, + { + "epoch": 2.22, + "learning_rate": 9.555129025378546e-05, + "loss": 3.8781, + "step": 10430 + }, + { + "epoch": 2.23, + "learning_rate": 9.55491576029004e-05, + "loss": 3.7376, + "step": 10435 + }, + { + "epoch": 2.23, + "learning_rate": 9.554702495201536e-05, + "loss": 3.9315, + "step": 10440 + }, + { + "epoch": 2.23, + "learning_rate": 9.55448923011303e-05, + "loss": 3.8474, + "step": 10445 + }, + { + "epoch": 2.23, + "learning_rate": 9.554275965024526e-05, + "loss": 3.8621, + "step": 10450 + }, + { + "epoch": 2.23, + "learning_rate": 9.554062699936021e-05, + "loss": 3.9362, + "step": 10455 + }, + { + "epoch": 2.23, + "learning_rate": 9.553849434847517e-05, + "loss": 3.9791, + "step": 10460 + }, + { + "epoch": 2.23, + "learning_rate": 9.55363616975901e-05, + "loss": 3.8505, + "step": 10465 + }, + { + "epoch": 2.23, + "learning_rate": 9.553422904670506e-05, + "loss": 3.9466, + "step": 10470 + }, + { + "epoch": 2.23, + "learning_rate": 9.553209639582002e-05, + "loss": 3.9119, + "step": 10475 + }, + { + "epoch": 2.23, + "learning_rate": 9.552996374493496e-05, + "loss": 4.0029, + "step": 10480 + }, + { + "epoch": 2.24, + "learning_rate": 9.552783109404991e-05, + "loss": 3.926, + "step": 10485 + }, + { + "epoch": 2.24, + "learning_rate": 9.552569844316485e-05, + "loss": 3.7486, + "step": 10490 + }, + { + "epoch": 2.24, + "learning_rate": 9.552356579227981e-05, + "loss": 3.8645, + "step": 10495 + }, + { + "epoch": 2.24, + "learning_rate": 9.552143314139475e-05, + "loss": 3.8516, + "step": 10500 + }, + { + "epoch": 2.24, + "learning_rate": 9.551930049050972e-05, + "loss": 3.8728, + "step": 10505 + }, + { + "epoch": 2.24, + "learning_rate": 9.551716783962466e-05, + "loss": 3.82, + "step": 10510 + }, + { + "epoch": 2.24, + "learning_rate": 9.551503518873961e-05, + "loss": 3.9228, + "step": 10515 + }, + { + "epoch": 2.24, + "learning_rate": 9.551290253785455e-05, + "loss": 4.0067, + "step": 10520 + }, + { + "epoch": 2.24, + "learning_rate": 9.551076988696951e-05, + "loss": 3.9225, + "step": 10525 + }, + { + "epoch": 2.25, + "learning_rate": 9.550863723608445e-05, + "loss": 3.8593, + "step": 10530 + }, + { + "epoch": 2.25, + "learning_rate": 9.55065045851994e-05, + "loss": 3.8555, + "step": 10535 + }, + { + "epoch": 2.25, + "learning_rate": 9.550437193431436e-05, + "loss": 3.8449, + "step": 10540 + }, + { + "epoch": 2.25, + "learning_rate": 9.55022392834293e-05, + "loss": 3.865, + "step": 10545 + }, + { + "epoch": 2.25, + "learning_rate": 9.550010663254426e-05, + "loss": 3.8232, + "step": 10550 + }, + { + "epoch": 2.25, + "learning_rate": 9.549797398165921e-05, + "loss": 3.8967, + "step": 10555 + }, + { + "epoch": 2.25, + "learning_rate": 9.549584133077417e-05, + "loss": 3.7787, + "step": 10560 + }, + { + "epoch": 2.25, + "learning_rate": 9.54937086798891e-05, + "loss": 3.8804, + "step": 10565 + }, + { + "epoch": 2.25, + "learning_rate": 9.549157602900406e-05, + "loss": 3.8963, + "step": 10570 + }, + { + "epoch": 2.25, + "learning_rate": 9.5489443378119e-05, + "loss": 3.8402, + "step": 10575 + }, + { + "epoch": 2.26, + "learning_rate": 9.548731072723396e-05, + "loss": 3.9114, + "step": 10580 + }, + { + "epoch": 2.26, + "learning_rate": 9.54851780763489e-05, + "loss": 3.9136, + "step": 10585 + }, + { + "epoch": 2.26, + "learning_rate": 9.548304542546385e-05, + "loss": 3.8516, + "step": 10590 + }, + { + "epoch": 2.26, + "learning_rate": 9.54809127745788e-05, + "loss": 3.7779, + "step": 10595 + }, + { + "epoch": 2.26, + "learning_rate": 9.547878012369376e-05, + "loss": 3.8112, + "step": 10600 + }, + { + "epoch": 2.26, + "learning_rate": 9.547664747280872e-05, + "loss": 3.9328, + "step": 10605 + }, + { + "epoch": 2.26, + "learning_rate": 9.547451482192366e-05, + "loss": 3.7798, + "step": 10610 + }, + { + "epoch": 2.26, + "learning_rate": 9.547238217103861e-05, + "loss": 3.9559, + "step": 10615 + }, + { + "epoch": 2.26, + "learning_rate": 9.547024952015355e-05, + "loss": 3.9166, + "step": 10620 + }, + { + "epoch": 2.27, + "learning_rate": 9.546811686926851e-05, + "loss": 3.8668, + "step": 10625 + }, + { + "epoch": 2.27, + "learning_rate": 9.546598421838345e-05, + "loss": 3.856, + "step": 10630 + }, + { + "epoch": 2.27, + "learning_rate": 9.54638515674984e-05, + "loss": 3.9358, + "step": 10635 + }, + { + "epoch": 2.27, + "learning_rate": 9.546171891661335e-05, + "loss": 3.8494, + "step": 10640 + }, + { + "epoch": 2.27, + "learning_rate": 9.54595862657283e-05, + "loss": 3.8878, + "step": 10645 + }, + { + "epoch": 2.27, + "learning_rate": 9.545745361484326e-05, + "loss": 3.8773, + "step": 10650 + }, + { + "epoch": 2.27, + "learning_rate": 9.545532096395821e-05, + "loss": 3.9263, + "step": 10655 + }, + { + "epoch": 2.27, + "learning_rate": 9.545318831307315e-05, + "loss": 3.7892, + "step": 10660 + }, + { + "epoch": 2.27, + "learning_rate": 9.545105566218811e-05, + "loss": 3.7867, + "step": 10665 + }, + { + "epoch": 2.28, + "learning_rate": 9.544892301130306e-05, + "loss": 3.7862, + "step": 10670 + }, + { + "epoch": 2.28, + "learning_rate": 9.5446790360418e-05, + "loss": 3.7521, + "step": 10675 + }, + { + "epoch": 2.28, + "learning_rate": 9.544465770953296e-05, + "loss": 3.9254, + "step": 10680 + }, + { + "epoch": 2.28, + "learning_rate": 9.54425250586479e-05, + "loss": 3.8303, + "step": 10685 + }, + { + "epoch": 2.28, + "learning_rate": 9.544039240776285e-05, + "loss": 3.754, + "step": 10690 + }, + { + "epoch": 2.28, + "learning_rate": 9.543825975687781e-05, + "loss": 3.8379, + "step": 10695 + }, + { + "epoch": 2.28, + "learning_rate": 9.543612710599276e-05, + "loss": 3.8989, + "step": 10700 + }, + { + "epoch": 2.28, + "learning_rate": 9.54339944551077e-05, + "loss": 3.8993, + "step": 10705 + }, + { + "epoch": 2.28, + "learning_rate": 9.543186180422266e-05, + "loss": 3.8889, + "step": 10710 + }, + { + "epoch": 2.28, + "learning_rate": 9.54297291533376e-05, + "loss": 3.869, + "step": 10715 + }, + { + "epoch": 2.29, + "learning_rate": 9.542759650245255e-05, + "loss": 3.8044, + "step": 10720 + }, + { + "epoch": 2.29, + "learning_rate": 9.54254638515675e-05, + "loss": 3.8364, + "step": 10725 + }, + { + "epoch": 2.29, + "learning_rate": 9.542333120068245e-05, + "loss": 3.8113, + "step": 10730 + }, + { + "epoch": 2.29, + "learning_rate": 9.54211985497974e-05, + "loss": 3.8514, + "step": 10735 + }, + { + "epoch": 2.29, + "learning_rate": 9.541906589891235e-05, + "loss": 3.9185, + "step": 10740 + }, + { + "epoch": 2.29, + "learning_rate": 9.54169332480273e-05, + "loss": 3.978, + "step": 10745 + }, + { + "epoch": 2.29, + "learning_rate": 9.541480059714226e-05, + "loss": 3.8926, + "step": 10750 + }, + { + "epoch": 2.29, + "learning_rate": 9.541266794625721e-05, + "loss": 3.8531, + "step": 10755 + }, + { + "epoch": 2.29, + "learning_rate": 9.541053529537215e-05, + "loss": 3.864, + "step": 10760 + }, + { + "epoch": 2.3, + "learning_rate": 9.540840264448711e-05, + "loss": 3.7836, + "step": 10765 + }, + { + "epoch": 2.3, + "learning_rate": 9.540626999360205e-05, + "loss": 3.905, + "step": 10770 + }, + { + "epoch": 2.3, + "learning_rate": 9.5404137342717e-05, + "loss": 3.9142, + "step": 10775 + }, + { + "epoch": 2.3, + "learning_rate": 9.540200469183194e-05, + "loss": 3.832, + "step": 10780 + }, + { + "epoch": 2.3, + "learning_rate": 9.53998720409469e-05, + "loss": 3.8712, + "step": 10785 + }, + { + "epoch": 2.3, + "learning_rate": 9.539773939006184e-05, + "loss": 3.9275, + "step": 10790 + }, + { + "epoch": 2.3, + "learning_rate": 9.539560673917681e-05, + "loss": 3.8709, + "step": 10795 + }, + { + "epoch": 2.3, + "learning_rate": 9.539347408829176e-05, + "loss": 3.8757, + "step": 10800 + }, + { + "epoch": 2.3, + "learning_rate": 9.53913414374067e-05, + "loss": 3.8324, + "step": 10805 + }, + { + "epoch": 2.31, + "learning_rate": 9.538920878652166e-05, + "loss": 3.8187, + "step": 10810 + }, + { + "epoch": 2.31, + "learning_rate": 9.53870761356366e-05, + "loss": 3.8017, + "step": 10815 + }, + { + "epoch": 2.31, + "learning_rate": 9.538494348475155e-05, + "loss": 3.8841, + "step": 10820 + }, + { + "epoch": 2.31, + "learning_rate": 9.53828108338665e-05, + "loss": 3.8557, + "step": 10825 + }, + { + "epoch": 2.31, + "learning_rate": 9.538067818298145e-05, + "loss": 3.8736, + "step": 10830 + }, + { + "epoch": 2.31, + "learning_rate": 9.537854553209639e-05, + "loss": 3.8095, + "step": 10835 + }, + { + "epoch": 2.31, + "learning_rate": 9.537641288121135e-05, + "loss": 3.8223, + "step": 10840 + }, + { + "epoch": 2.31, + "learning_rate": 9.53742802303263e-05, + "loss": 3.7927, + "step": 10845 + }, + { + "epoch": 2.31, + "learning_rate": 9.537214757944126e-05, + "loss": 3.7873, + "step": 10850 + }, + { + "epoch": 2.31, + "learning_rate": 9.53700149285562e-05, + "loss": 3.9492, + "step": 10855 + }, + { + "epoch": 2.32, + "learning_rate": 9.536788227767115e-05, + "loss": 3.8324, + "step": 10860 + }, + { + "epoch": 2.32, + "learning_rate": 9.536574962678611e-05, + "loss": 3.9354, + "step": 10865 + }, + { + "epoch": 2.32, + "learning_rate": 9.536361697590105e-05, + "loss": 3.8118, + "step": 10870 + }, + { + "epoch": 2.32, + "learning_rate": 9.5361484325016e-05, + "loss": 3.8806, + "step": 10875 + }, + { + "epoch": 2.32, + "learning_rate": 9.535935167413094e-05, + "loss": 3.9292, + "step": 10880 + }, + { + "epoch": 2.32, + "learning_rate": 9.53572190232459e-05, + "loss": 3.8313, + "step": 10885 + }, + { + "epoch": 2.32, + "learning_rate": 9.535508637236085e-05, + "loss": 3.9351, + "step": 10890 + }, + { + "epoch": 2.32, + "learning_rate": 9.535295372147581e-05, + "loss": 3.7931, + "step": 10895 + }, + { + "epoch": 2.32, + "learning_rate": 9.535082107059075e-05, + "loss": 3.8909, + "step": 10900 + }, + { + "epoch": 2.33, + "learning_rate": 9.53486884197057e-05, + "loss": 3.8385, + "step": 10905 + }, + { + "epoch": 2.33, + "learning_rate": 9.534655576882065e-05, + "loss": 3.8528, + "step": 10910 + }, + { + "epoch": 2.33, + "learning_rate": 9.53444231179356e-05, + "loss": 3.9241, + "step": 10915 + }, + { + "epoch": 2.33, + "learning_rate": 9.534229046705054e-05, + "loss": 3.9389, + "step": 10920 + }, + { + "epoch": 2.33, + "learning_rate": 9.53401578161655e-05, + "loss": 3.9009, + "step": 10925 + }, + { + "epoch": 2.33, + "learning_rate": 9.533802516528045e-05, + "loss": 3.8599, + "step": 10930 + }, + { + "epoch": 2.33, + "learning_rate": 9.533589251439539e-05, + "loss": 3.7557, + "step": 10935 + }, + { + "epoch": 2.33, + "learning_rate": 9.533375986351035e-05, + "loss": 3.8791, + "step": 10940 + }, + { + "epoch": 2.33, + "learning_rate": 9.53316272126253e-05, + "loss": 3.784, + "step": 10945 + }, + { + "epoch": 2.33, + "learning_rate": 9.532949456174026e-05, + "loss": 3.8488, + "step": 10950 + }, + { + "epoch": 2.34, + "learning_rate": 9.53273619108552e-05, + "loss": 3.8906, + "step": 10955 + }, + { + "epoch": 2.34, + "learning_rate": 9.532522925997015e-05, + "loss": 3.9001, + "step": 10960 + }, + { + "epoch": 2.34, + "learning_rate": 9.53230966090851e-05, + "loss": 3.8156, + "step": 10965 + }, + { + "epoch": 2.34, + "learning_rate": 9.532096395820005e-05, + "loss": 3.9544, + "step": 10970 + }, + { + "epoch": 2.34, + "learning_rate": 9.531883130731499e-05, + "loss": 3.8833, + "step": 10975 + }, + { + "epoch": 2.34, + "learning_rate": 9.531669865642994e-05, + "loss": 3.8381, + "step": 10980 + }, + { + "epoch": 2.34, + "learning_rate": 9.531456600554489e-05, + "loss": 3.8852, + "step": 10985 + }, + { + "epoch": 2.34, + "learning_rate": 9.531243335465985e-05, + "loss": 3.8482, + "step": 10990 + }, + { + "epoch": 2.34, + "learning_rate": 9.531030070377481e-05, + "loss": 3.8442, + "step": 10995 + }, + { + "epoch": 2.35, + "learning_rate": 9.530816805288975e-05, + "loss": 3.9358, + "step": 11000 + }, + { + "epoch": 2.35, + "learning_rate": 9.53060354020047e-05, + "loss": 3.8005, + "step": 11005 + }, + { + "epoch": 2.35, + "learning_rate": 9.530390275111965e-05, + "loss": 3.8792, + "step": 11010 + }, + { + "epoch": 2.35, + "learning_rate": 9.53017701002346e-05, + "loss": 3.7995, + "step": 11015 + }, + { + "epoch": 2.35, + "learning_rate": 9.529963744934954e-05, + "loss": 3.8723, + "step": 11020 + }, + { + "epoch": 2.35, + "learning_rate": 9.52975047984645e-05, + "loss": 3.9429, + "step": 11025 + }, + { + "epoch": 2.35, + "learning_rate": 9.529537214757944e-05, + "loss": 3.9988, + "step": 11030 + }, + { + "epoch": 2.35, + "learning_rate": 9.529323949669439e-05, + "loss": 3.9746, + "step": 11035 + }, + { + "epoch": 2.35, + "learning_rate": 9.529110684580935e-05, + "loss": 3.9214, + "step": 11040 + }, + { + "epoch": 2.36, + "learning_rate": 9.52889741949243e-05, + "loss": 3.9161, + "step": 11045 + }, + { + "epoch": 2.36, + "learning_rate": 9.528684154403924e-05, + "loss": 3.8679, + "step": 11050 + }, + { + "epoch": 2.36, + "learning_rate": 9.52847088931542e-05, + "loss": 3.9297, + "step": 11055 + }, + { + "epoch": 2.36, + "learning_rate": 9.528257624226915e-05, + "loss": 3.8636, + "step": 11060 + }, + { + "epoch": 2.36, + "learning_rate": 9.52804435913841e-05, + "loss": 3.8815, + "step": 11065 + }, + { + "epoch": 2.36, + "learning_rate": 9.527831094049905e-05, + "loss": 3.8747, + "step": 11070 + }, + { + "epoch": 2.36, + "learning_rate": 9.527617828961399e-05, + "loss": 3.9002, + "step": 11075 + }, + { + "epoch": 2.36, + "learning_rate": 9.527404563872894e-05, + "loss": 3.8004, + "step": 11080 + }, + { + "epoch": 2.36, + "learning_rate": 9.52719129878439e-05, + "loss": 3.8331, + "step": 11085 + }, + { + "epoch": 2.36, + "learning_rate": 9.526978033695885e-05, + "loss": 3.9091, + "step": 11090 + }, + { + "epoch": 2.37, + "learning_rate": 9.52676476860738e-05, + "loss": 3.8626, + "step": 11095 + }, + { + "epoch": 2.37, + "learning_rate": 9.526551503518875e-05, + "loss": 3.7815, + "step": 11100 + }, + { + "epoch": 2.37, + "learning_rate": 9.526338238430369e-05, + "loss": 3.823, + "step": 11105 + }, + { + "epoch": 2.37, + "learning_rate": 9.526124973341865e-05, + "loss": 3.8166, + "step": 11110 + }, + { + "epoch": 2.37, + "learning_rate": 9.525911708253359e-05, + "loss": 3.8537, + "step": 11115 + }, + { + "epoch": 2.37, + "learning_rate": 9.525698443164854e-05, + "loss": 3.8056, + "step": 11120 + }, + { + "epoch": 2.37, + "learning_rate": 9.52548517807635e-05, + "loss": 3.8709, + "step": 11125 + }, + { + "epoch": 2.37, + "learning_rate": 9.525271912987844e-05, + "loss": 3.8565, + "step": 11130 + }, + { + "epoch": 2.37, + "learning_rate": 9.525058647899339e-05, + "loss": 3.8792, + "step": 11135 + }, + { + "epoch": 2.38, + "learning_rate": 9.524845382810835e-05, + "loss": 3.7237, + "step": 11140 + }, + { + "epoch": 2.38, + "learning_rate": 9.52463211772233e-05, + "loss": 3.8421, + "step": 11145 + }, + { + "epoch": 2.38, + "learning_rate": 9.524418852633824e-05, + "loss": 3.8637, + "step": 11150 + }, + { + "epoch": 2.38, + "learning_rate": 9.52420558754532e-05, + "loss": 3.8359, + "step": 11155 + }, + { + "epoch": 2.38, + "learning_rate": 9.523992322456814e-05, + "loss": 3.8814, + "step": 11160 + }, + { + "epoch": 2.38, + "learning_rate": 9.52377905736831e-05, + "loss": 3.8743, + "step": 11165 + }, + { + "epoch": 2.38, + "learning_rate": 9.523565792279803e-05, + "loss": 3.832, + "step": 11170 + }, + { + "epoch": 2.38, + "learning_rate": 9.523352527191299e-05, + "loss": 3.9288, + "step": 11175 + }, + { + "epoch": 2.38, + "learning_rate": 9.523139262102793e-05, + "loss": 3.8756, + "step": 11180 + }, + { + "epoch": 2.38, + "learning_rate": 9.52292599701429e-05, + "loss": 3.7986, + "step": 11185 + }, + { + "epoch": 2.39, + "learning_rate": 9.522712731925785e-05, + "loss": 3.9345, + "step": 11190 + }, + { + "epoch": 2.39, + "learning_rate": 9.52249946683728e-05, + "loss": 3.8711, + "step": 11195 + }, + { + "epoch": 2.39, + "learning_rate": 9.522286201748775e-05, + "loss": 3.7496, + "step": 11200 + }, + { + "epoch": 2.39, + "learning_rate": 9.522072936660269e-05, + "loss": 3.8577, + "step": 11205 + }, + { + "epoch": 2.39, + "learning_rate": 9.521859671571765e-05, + "loss": 3.8267, + "step": 11210 + }, + { + "epoch": 2.39, + "learning_rate": 9.521646406483259e-05, + "loss": 3.8628, + "step": 11215 + }, + { + "epoch": 2.39, + "learning_rate": 9.521433141394754e-05, + "loss": 3.7818, + "step": 11220 + }, + { + "epoch": 2.39, + "learning_rate": 9.521219876306248e-05, + "loss": 3.8626, + "step": 11225 + }, + { + "epoch": 2.39, + "learning_rate": 9.521006611217744e-05, + "loss": 3.7875, + "step": 11230 + }, + { + "epoch": 2.4, + "learning_rate": 9.520793346129239e-05, + "loss": 3.8996, + "step": 11235 + }, + { + "epoch": 2.4, + "learning_rate": 9.520580081040735e-05, + "loss": 3.922, + "step": 11240 + }, + { + "epoch": 2.4, + "learning_rate": 9.520366815952229e-05, + "loss": 3.9729, + "step": 11245 + }, + { + "epoch": 2.4, + "learning_rate": 9.520153550863724e-05, + "loss": 3.8814, + "step": 11250 + }, + { + "epoch": 2.4, + "learning_rate": 9.51994028577522e-05, + "loss": 3.8322, + "step": 11255 + }, + { + "epoch": 2.4, + "learning_rate": 9.519727020686714e-05, + "loss": 3.9059, + "step": 11260 + }, + { + "epoch": 2.4, + "learning_rate": 9.51951375559821e-05, + "loss": 3.8843, + "step": 11265 + }, + { + "epoch": 2.4, + "learning_rate": 9.519300490509703e-05, + "loss": 3.8554, + "step": 11270 + }, + { + "epoch": 2.4, + "learning_rate": 9.519087225421199e-05, + "loss": 3.917, + "step": 11275 + }, + { + "epoch": 2.41, + "learning_rate": 9.518873960332694e-05, + "loss": 3.7869, + "step": 11280 + }, + { + "epoch": 2.41, + "learning_rate": 9.51866069524419e-05, + "loss": 3.7851, + "step": 11285 + }, + { + "epoch": 2.41, + "learning_rate": 9.518447430155684e-05, + "loss": 3.8404, + "step": 11290 + }, + { + "epoch": 2.41, + "learning_rate": 9.51823416506718e-05, + "loss": 3.8633, + "step": 11295 + }, + { + "epoch": 2.41, + "learning_rate": 9.518020899978674e-05, + "loss": 3.8489, + "step": 11300 + }, + { + "epoch": 2.41, + "learning_rate": 9.517807634890169e-05, + "loss": 3.8636, + "step": 11305 + }, + { + "epoch": 2.41, + "learning_rate": 9.517594369801663e-05, + "loss": 3.9264, + "step": 11310 + }, + { + "epoch": 2.41, + "learning_rate": 9.517381104713159e-05, + "loss": 3.868, + "step": 11315 + }, + { + "epoch": 2.41, + "learning_rate": 9.517167839624654e-05, + "loss": 3.9043, + "step": 11320 + }, + { + "epoch": 2.41, + "learning_rate": 9.516954574536148e-05, + "loss": 3.8982, + "step": 11325 + }, + { + "epoch": 2.42, + "learning_rate": 9.516741309447644e-05, + "loss": 3.8716, + "step": 11330 + }, + { + "epoch": 2.42, + "learning_rate": 9.516528044359139e-05, + "loss": 3.8573, + "step": 11335 + }, + { + "epoch": 2.42, + "learning_rate": 9.516314779270635e-05, + "loss": 3.8715, + "step": 11340 + }, + { + "epoch": 2.42, + "learning_rate": 9.516101514182129e-05, + "loss": 3.9904, + "step": 11345 + }, + { + "epoch": 2.42, + "learning_rate": 9.515888249093624e-05, + "loss": 3.8789, + "step": 11350 + }, + { + "epoch": 2.42, + "learning_rate": 9.515674984005118e-05, + "loss": 3.8145, + "step": 11355 + }, + { + "epoch": 2.42, + "learning_rate": 9.515461718916614e-05, + "loss": 3.8247, + "step": 11360 + }, + { + "epoch": 2.42, + "learning_rate": 9.515248453828108e-05, + "loss": 3.9156, + "step": 11365 + }, + { + "epoch": 2.42, + "learning_rate": 9.515035188739603e-05, + "loss": 3.8972, + "step": 11370 + }, + { + "epoch": 2.43, + "learning_rate": 9.514821923651099e-05, + "loss": 3.8997, + "step": 11375 + }, + { + "epoch": 2.43, + "learning_rate": 9.514608658562594e-05, + "loss": 3.9253, + "step": 11380 + }, + { + "epoch": 2.43, + "learning_rate": 9.51439539347409e-05, + "loss": 3.8999, + "step": 11385 + }, + { + "epoch": 2.43, + "learning_rate": 9.514182128385584e-05, + "loss": 3.9728, + "step": 11390 + }, + { + "epoch": 2.43, + "learning_rate": 9.51396886329708e-05, + "loss": 3.8845, + "step": 11395 + }, + { + "epoch": 2.43, + "learning_rate": 9.513755598208574e-05, + "loss": 3.854, + "step": 11400 + }, + { + "epoch": 2.43, + "learning_rate": 9.513542333120069e-05, + "loss": 3.8744, + "step": 11405 + }, + { + "epoch": 2.43, + "learning_rate": 9.513329068031563e-05, + "loss": 3.8516, + "step": 11410 + }, + { + "epoch": 2.43, + "learning_rate": 9.513115802943059e-05, + "loss": 3.7825, + "step": 11415 + }, + { + "epoch": 2.44, + "learning_rate": 9.512902537854553e-05, + "loss": 3.8688, + "step": 11420 + }, + { + "epoch": 2.44, + "learning_rate": 9.512689272766048e-05, + "loss": 3.8947, + "step": 11425 + }, + { + "epoch": 2.44, + "learning_rate": 9.512476007677544e-05, + "loss": 3.8144, + "step": 11430 + }, + { + "epoch": 2.44, + "learning_rate": 9.512262742589039e-05, + "loss": 3.9057, + "step": 11435 + }, + { + "epoch": 2.44, + "learning_rate": 9.512049477500533e-05, + "loss": 3.9402, + "step": 11440 + }, + { + "epoch": 2.44, + "learning_rate": 9.511836212412029e-05, + "loss": 3.8584, + "step": 11445 + }, + { + "epoch": 2.44, + "learning_rate": 9.511622947323524e-05, + "loss": 3.9328, + "step": 11450 + }, + { + "epoch": 2.44, + "learning_rate": 9.511409682235018e-05, + "loss": 3.9059, + "step": 11455 + }, + { + "epoch": 2.44, + "learning_rate": 9.511196417146514e-05, + "loss": 3.887, + "step": 11460 + }, + { + "epoch": 2.44, + "learning_rate": 9.510983152058008e-05, + "loss": 3.8703, + "step": 11465 + }, + { + "epoch": 2.45, + "learning_rate": 9.510769886969503e-05, + "loss": 3.9565, + "step": 11470 + }, + { + "epoch": 2.45, + "learning_rate": 9.510556621880999e-05, + "loss": 3.9683, + "step": 11475 + }, + { + "epoch": 2.45, + "learning_rate": 9.510343356792494e-05, + "loss": 3.8323, + "step": 11480 + }, + { + "epoch": 2.45, + "learning_rate": 9.510130091703989e-05, + "loss": 3.9019, + "step": 11485 + }, + { + "epoch": 2.45, + "learning_rate": 9.509916826615484e-05, + "loss": 3.9326, + "step": 11490 + }, + { + "epoch": 2.45, + "learning_rate": 9.509703561526978e-05, + "loss": 3.8418, + "step": 11495 + }, + { + "epoch": 2.45, + "learning_rate": 9.509490296438474e-05, + "loss": 3.941, + "step": 11500 + }, + { + "epoch": 2.45, + "learning_rate": 9.509277031349968e-05, + "loss": 3.8944, + "step": 11505 + }, + { + "epoch": 2.45, + "learning_rate": 9.509063766261463e-05, + "loss": 3.8895, + "step": 11510 + }, + { + "epoch": 2.46, + "learning_rate": 9.508850501172959e-05, + "loss": 3.877, + "step": 11515 + }, + { + "epoch": 2.46, + "learning_rate": 9.508637236084453e-05, + "loss": 3.8945, + "step": 11520 + }, + { + "epoch": 2.46, + "learning_rate": 9.508423970995948e-05, + "loss": 3.9132, + "step": 11525 + }, + { + "epoch": 2.46, + "learning_rate": 9.508210705907444e-05, + "loss": 3.8479, + "step": 11530 + }, + { + "epoch": 2.46, + "learning_rate": 9.507997440818939e-05, + "loss": 3.9395, + "step": 11535 + }, + { + "epoch": 2.46, + "learning_rate": 9.507784175730433e-05, + "loss": 3.7716, + "step": 11540 + }, + { + "epoch": 2.46, + "learning_rate": 9.507570910641929e-05, + "loss": 4.0152, + "step": 11545 + }, + { + "epoch": 2.46, + "learning_rate": 9.507357645553423e-05, + "loss": 3.8792, + "step": 11550 + }, + { + "epoch": 2.46, + "learning_rate": 9.507144380464918e-05, + "loss": 3.8406, + "step": 11555 + }, + { + "epoch": 2.46, + "learning_rate": 9.506931115376413e-05, + "loss": 3.8779, + "step": 11560 + }, + { + "epoch": 2.47, + "learning_rate": 9.506717850287908e-05, + "loss": 3.8604, + "step": 11565 + }, + { + "epoch": 2.47, + "learning_rate": 9.506504585199403e-05, + "loss": 3.9058, + "step": 11570 + }, + { + "epoch": 2.47, + "learning_rate": 9.506291320110899e-05, + "loss": 3.8959, + "step": 11575 + }, + { + "epoch": 2.47, + "learning_rate": 9.506078055022394e-05, + "loss": 3.9086, + "step": 11580 + }, + { + "epoch": 2.47, + "learning_rate": 9.505864789933889e-05, + "loss": 3.8602, + "step": 11585 + }, + { + "epoch": 2.47, + "learning_rate": 9.505651524845384e-05, + "loss": 3.9418, + "step": 11590 + }, + { + "epoch": 2.47, + "learning_rate": 9.505438259756878e-05, + "loss": 3.8621, + "step": 11595 + }, + { + "epoch": 2.47, + "learning_rate": 9.505224994668374e-05, + "loss": 3.891, + "step": 11600 + }, + { + "epoch": 2.47, + "learning_rate": 9.505011729579868e-05, + "loss": 3.8971, + "step": 11605 + }, + { + "epoch": 2.48, + "learning_rate": 9.504798464491363e-05, + "loss": 3.8623, + "step": 11610 + }, + { + "epoch": 2.48, + "learning_rate": 9.504585199402857e-05, + "loss": 3.8798, + "step": 11615 + }, + { + "epoch": 2.48, + "learning_rate": 9.504371934314353e-05, + "loss": 3.8488, + "step": 11620 + }, + { + "epoch": 2.48, + "learning_rate": 9.504158669225848e-05, + "loss": 3.7583, + "step": 11625 + }, + { + "epoch": 2.48, + "learning_rate": 9.503945404137344e-05, + "loss": 3.833, + "step": 11630 + }, + { + "epoch": 2.48, + "learning_rate": 9.503732139048838e-05, + "loss": 3.8027, + "step": 11635 + }, + { + "epoch": 2.48, + "learning_rate": 9.503518873960333e-05, + "loss": 3.7856, + "step": 11640 + }, + { + "epoch": 2.48, + "learning_rate": 9.503305608871829e-05, + "loss": 3.818, + "step": 11645 + }, + { + "epoch": 2.48, + "learning_rate": 9.503092343783323e-05, + "loss": 3.9409, + "step": 11650 + }, + { + "epoch": 2.49, + "learning_rate": 9.502879078694818e-05, + "loss": 3.8591, + "step": 11655 + }, + { + "epoch": 2.49, + "learning_rate": 9.502665813606313e-05, + "loss": 3.861, + "step": 11660 + }, + { + "epoch": 2.49, + "learning_rate": 9.502452548517808e-05, + "loss": 4.0183, + "step": 11665 + }, + { + "epoch": 2.49, + "learning_rate": 9.502239283429303e-05, + "loss": 3.9115, + "step": 11670 + }, + { + "epoch": 2.49, + "learning_rate": 9.502026018340799e-05, + "loss": 3.8273, + "step": 11675 + }, + { + "epoch": 2.49, + "learning_rate": 9.501812753252293e-05, + "loss": 3.8538, + "step": 11680 + }, + { + "epoch": 2.49, + "learning_rate": 9.501599488163789e-05, + "loss": 3.7904, + "step": 11685 + }, + { + "epoch": 2.49, + "learning_rate": 9.501386223075283e-05, + "loss": 3.8011, + "step": 11690 + }, + { + "epoch": 2.49, + "learning_rate": 9.501172957986778e-05, + "loss": 3.7745, + "step": 11695 + }, + { + "epoch": 2.49, + "learning_rate": 9.500959692898272e-05, + "loss": 3.8577, + "step": 11700 + }, + { + "epoch": 2.5, + "learning_rate": 9.500746427809768e-05, + "loss": 3.8567, + "step": 11705 + }, + { + "epoch": 2.5, + "learning_rate": 9.500533162721263e-05, + "loss": 3.8914, + "step": 11710 + }, + { + "epoch": 2.5, + "learning_rate": 9.500319897632757e-05, + "loss": 3.8989, + "step": 11715 + }, + { + "epoch": 2.5, + "learning_rate": 9.500106632544253e-05, + "loss": 3.815, + "step": 11720 + }, + { + "epoch": 2.5, + "learning_rate": 9.499893367455748e-05, + "loss": 3.8976, + "step": 11725 + }, + { + "epoch": 2.5, + "learning_rate": 9.499680102367244e-05, + "loss": 3.9922, + "step": 11730 + }, + { + "epoch": 2.5, + "learning_rate": 9.499466837278738e-05, + "loss": 3.8147, + "step": 11735 + }, + { + "epoch": 2.5, + "learning_rate": 9.499253572190233e-05, + "loss": 3.8722, + "step": 11740 + }, + { + "epoch": 2.5, + "learning_rate": 9.499040307101727e-05, + "loss": 3.9046, + "step": 11745 + }, + { + "epoch": 2.51, + "learning_rate": 9.498827042013223e-05, + "loss": 3.9106, + "step": 11750 + }, + { + "epoch": 2.51, + "learning_rate": 9.498613776924717e-05, + "loss": 3.9781, + "step": 11755 + }, + { + "epoch": 2.51, + "learning_rate": 9.498400511836213e-05, + "loss": 3.7725, + "step": 11760 + }, + { + "epoch": 2.51, + "learning_rate": 9.498187246747708e-05, + "loss": 4.0173, + "step": 11765 + }, + { + "epoch": 2.51, + "learning_rate": 9.497973981659203e-05, + "loss": 3.8014, + "step": 11770 + }, + { + "epoch": 2.51, + "learning_rate": 9.497760716570699e-05, + "loss": 3.8209, + "step": 11775 + }, + { + "epoch": 2.51, + "learning_rate": 9.497547451482193e-05, + "loss": 3.7884, + "step": 11780 + }, + { + "epoch": 2.51, + "learning_rate": 9.497334186393689e-05, + "loss": 3.9114, + "step": 11785 + }, + { + "epoch": 2.51, + "learning_rate": 9.497120921305183e-05, + "loss": 3.8205, + "step": 11790 + }, + { + "epoch": 2.52, + "learning_rate": 9.496907656216678e-05, + "loss": 3.8869, + "step": 11795 + }, + { + "epoch": 2.52, + "learning_rate": 9.496694391128172e-05, + "loss": 3.847, + "step": 11800 + }, + { + "epoch": 2.52, + "learning_rate": 9.496481126039668e-05, + "loss": 3.9064, + "step": 11805 + }, + { + "epoch": 2.52, + "learning_rate": 9.496267860951162e-05, + "loss": 3.8412, + "step": 11810 + }, + { + "epoch": 2.52, + "learning_rate": 9.496054595862657e-05, + "loss": 3.882, + "step": 11815 + }, + { + "epoch": 2.52, + "learning_rate": 9.495841330774153e-05, + "loss": 3.9267, + "step": 11820 + }, + { + "epoch": 2.52, + "learning_rate": 9.495628065685648e-05, + "loss": 3.9251, + "step": 11825 + }, + { + "epoch": 2.52, + "learning_rate": 9.495414800597142e-05, + "loss": 3.8157, + "step": 11830 + }, + { + "epoch": 2.52, + "learning_rate": 9.495201535508638e-05, + "loss": 3.8826, + "step": 11835 + }, + { + "epoch": 2.52, + "learning_rate": 9.494988270420133e-05, + "loss": 3.7828, + "step": 11840 + }, + { + "epoch": 2.53, + "learning_rate": 9.494775005331627e-05, + "loss": 3.8646, + "step": 11845 + }, + { + "epoch": 2.53, + "learning_rate": 9.494561740243123e-05, + "loss": 3.9115, + "step": 11850 + }, + { + "epoch": 2.53, + "learning_rate": 9.494348475154617e-05, + "loss": 3.8406, + "step": 11855 + }, + { + "epoch": 2.53, + "learning_rate": 9.494135210066113e-05, + "loss": 3.8486, + "step": 11860 + }, + { + "epoch": 2.53, + "learning_rate": 9.493921944977608e-05, + "loss": 3.7279, + "step": 11865 + }, + { + "epoch": 2.53, + "learning_rate": 9.493708679889103e-05, + "loss": 3.8669, + "step": 11870 + }, + { + "epoch": 2.53, + "learning_rate": 9.493495414800598e-05, + "loss": 3.762, + "step": 11875 + }, + { + "epoch": 2.53, + "learning_rate": 9.493282149712093e-05, + "loss": 3.967, + "step": 11880 + }, + { + "epoch": 2.53, + "learning_rate": 9.493068884623587e-05, + "loss": 3.8016, + "step": 11885 + }, + { + "epoch": 2.54, + "learning_rate": 9.492855619535083e-05, + "loss": 3.7951, + "step": 11890 + }, + { + "epoch": 2.54, + "learning_rate": 9.492642354446577e-05, + "loss": 3.9575, + "step": 11895 + }, + { + "epoch": 2.54, + "learning_rate": 9.492429089358072e-05, + "loss": 3.8486, + "step": 11900 + }, + { + "epoch": 2.54, + "learning_rate": 9.492215824269568e-05, + "loss": 3.8549, + "step": 11905 + }, + { + "epoch": 2.54, + "learning_rate": 9.492002559181062e-05, + "loss": 3.7932, + "step": 11910 + }, + { + "epoch": 2.54, + "learning_rate": 9.491789294092557e-05, + "loss": 3.7255, + "step": 11915 + }, + { + "epoch": 2.54, + "learning_rate": 9.491576029004053e-05, + "loss": 3.8825, + "step": 11920 + }, + { + "epoch": 2.54, + "learning_rate": 9.491362763915548e-05, + "loss": 3.8665, + "step": 11925 + }, + { + "epoch": 2.54, + "learning_rate": 9.491149498827042e-05, + "loss": 3.9885, + "step": 11930 + }, + { + "epoch": 2.54, + "learning_rate": 9.490936233738538e-05, + "loss": 3.9467, + "step": 11935 + }, + { + "epoch": 2.55, + "learning_rate": 9.490722968650032e-05, + "loss": 3.9258, + "step": 11940 + }, + { + "epoch": 2.55, + "learning_rate": 9.490509703561527e-05, + "loss": 3.7383, + "step": 11945 + }, + { + "epoch": 2.55, + "learning_rate": 9.490296438473022e-05, + "loss": 3.8705, + "step": 11950 + }, + { + "epoch": 2.55, + "learning_rate": 9.490083173384517e-05, + "loss": 3.773, + "step": 11955 + }, + { + "epoch": 2.55, + "learning_rate": 9.489869908296013e-05, + "loss": 3.838, + "step": 11960 + }, + { + "epoch": 2.55, + "learning_rate": 9.489656643207508e-05, + "loss": 3.801, + "step": 11965 + }, + { + "epoch": 2.55, + "learning_rate": 9.489443378119003e-05, + "loss": 3.7859, + "step": 11970 + }, + { + "epoch": 2.55, + "learning_rate": 9.489230113030498e-05, + "loss": 3.9198, + "step": 11975 + }, + { + "epoch": 2.55, + "learning_rate": 9.489016847941993e-05, + "loss": 3.8493, + "step": 11980 + }, + { + "epoch": 2.56, + "learning_rate": 9.488803582853487e-05, + "loss": 3.888, + "step": 11985 + }, + { + "epoch": 2.56, + "learning_rate": 9.488590317764983e-05, + "loss": 3.8691, + "step": 11990 + }, + { + "epoch": 2.56, + "learning_rate": 9.488377052676477e-05, + "loss": 3.7743, + "step": 11995 + }, + { + "epoch": 2.56, + "learning_rate": 9.488163787587972e-05, + "loss": 3.9196, + "step": 12000 + }, + { + "epoch": 2.56, + "learning_rate": 9.487950522499466e-05, + "loss": 3.8054, + "step": 12005 + }, + { + "epoch": 2.56, + "learning_rate": 9.487737257410962e-05, + "loss": 3.8614, + "step": 12010 + }, + { + "epoch": 2.56, + "learning_rate": 9.487523992322457e-05, + "loss": 3.8772, + "step": 12015 + }, + { + "epoch": 2.56, + "learning_rate": 9.487310727233953e-05, + "loss": 3.7921, + "step": 12020 + }, + { + "epoch": 2.56, + "learning_rate": 9.487097462145447e-05, + "loss": 3.8524, + "step": 12025 + }, + { + "epoch": 2.57, + "learning_rate": 9.486884197056942e-05, + "loss": 3.8269, + "step": 12030 + }, + { + "epoch": 2.57, + "learning_rate": 9.486670931968438e-05, + "loss": 3.877, + "step": 12035 + }, + { + "epoch": 2.57, + "learning_rate": 9.486457666879932e-05, + "loss": 3.8248, + "step": 12040 + }, + { + "epoch": 2.57, + "learning_rate": 9.486244401791427e-05, + "loss": 3.8146, + "step": 12045 + }, + { + "epoch": 2.57, + "learning_rate": 9.486031136702922e-05, + "loss": 3.8214, + "step": 12050 + }, + { + "epoch": 2.57, + "learning_rate": 9.485817871614417e-05, + "loss": 3.8447, + "step": 12055 + }, + { + "epoch": 2.57, + "learning_rate": 9.485604606525913e-05, + "loss": 3.8957, + "step": 12060 + }, + { + "epoch": 2.57, + "learning_rate": 9.485391341437408e-05, + "loss": 3.81, + "step": 12065 + }, + { + "epoch": 2.57, + "learning_rate": 9.485178076348902e-05, + "loss": 3.8389, + "step": 12070 + }, + { + "epoch": 2.57, + "learning_rate": 9.484964811260398e-05, + "loss": 3.8421, + "step": 12075 + }, + { + "epoch": 2.58, + "learning_rate": 9.484751546171892e-05, + "loss": 3.8608, + "step": 12080 + }, + { + "epoch": 2.58, + "learning_rate": 9.484538281083387e-05, + "loss": 3.8778, + "step": 12085 + }, + { + "epoch": 2.58, + "learning_rate": 9.484325015994881e-05, + "loss": 3.8864, + "step": 12090 + }, + { + "epoch": 2.58, + "learning_rate": 9.484111750906377e-05, + "loss": 3.9188, + "step": 12095 + }, + { + "epoch": 2.58, + "learning_rate": 9.483898485817872e-05, + "loss": 3.9404, + "step": 12100 + }, + { + "epoch": 2.58, + "learning_rate": 9.483685220729366e-05, + "loss": 3.9068, + "step": 12105 + }, + { + "epoch": 2.58, + "learning_rate": 9.483471955640862e-05, + "loss": 3.8319, + "step": 12110 + }, + { + "epoch": 2.58, + "learning_rate": 9.483258690552357e-05, + "loss": 3.8308, + "step": 12115 + }, + { + "epoch": 2.58, + "learning_rate": 9.483045425463853e-05, + "loss": 3.8978, + "step": 12120 + }, + { + "epoch": 2.59, + "learning_rate": 9.482832160375347e-05, + "loss": 3.8876, + "step": 12125 + }, + { + "epoch": 2.59, + "learning_rate": 9.482618895286842e-05, + "loss": 3.8715, + "step": 12130 + }, + { + "epoch": 2.59, + "learning_rate": 9.482405630198337e-05, + "loss": 3.8699, + "step": 12135 + }, + { + "epoch": 2.59, + "learning_rate": 9.482192365109832e-05, + "loss": 3.7837, + "step": 12140 + }, + { + "epoch": 2.59, + "learning_rate": 9.481979100021326e-05, + "loss": 3.8165, + "step": 12145 + }, + { + "epoch": 2.59, + "learning_rate": 9.481765834932822e-05, + "loss": 3.8433, + "step": 12150 + }, + { + "epoch": 2.59, + "learning_rate": 9.481552569844317e-05, + "loss": 3.8599, + "step": 12155 + }, + { + "epoch": 2.59, + "learning_rate": 9.481339304755813e-05, + "loss": 3.8229, + "step": 12160 + }, + { + "epoch": 2.59, + "learning_rate": 9.481126039667308e-05, + "loss": 3.929, + "step": 12165 + }, + { + "epoch": 2.6, + "learning_rate": 9.480912774578802e-05, + "loss": 3.869, + "step": 12170 + }, + { + "epoch": 2.6, + "learning_rate": 9.480699509490298e-05, + "loss": 3.8685, + "step": 12175 + }, + { + "epoch": 2.6, + "learning_rate": 9.480486244401792e-05, + "loss": 3.9377, + "step": 12180 + }, + { + "epoch": 2.6, + "learning_rate": 9.480272979313287e-05, + "loss": 3.8776, + "step": 12185 + }, + { + "epoch": 2.6, + "learning_rate": 9.480059714224781e-05, + "loss": 3.8821, + "step": 12190 + }, + { + "epoch": 2.6, + "learning_rate": 9.479846449136277e-05, + "loss": 3.8149, + "step": 12195 + }, + { + "epoch": 2.6, + "learning_rate": 9.479633184047771e-05, + "loss": 3.8796, + "step": 12200 + }, + { + "epoch": 2.6, + "learning_rate": 9.479419918959266e-05, + "loss": 3.8825, + "step": 12205 + }, + { + "epoch": 2.6, + "learning_rate": 9.479206653870762e-05, + "loss": 3.8366, + "step": 12210 + }, + { + "epoch": 2.6, + "learning_rate": 9.478993388782257e-05, + "loss": 3.8682, + "step": 12215 + }, + { + "epoch": 2.61, + "learning_rate": 9.478780123693751e-05, + "loss": 4.0087, + "step": 12220 + }, + { + "epoch": 2.61, + "learning_rate": 9.478566858605247e-05, + "loss": 3.8197, + "step": 12225 + }, + { + "epoch": 2.61, + "learning_rate": 9.478353593516742e-05, + "loss": 3.96, + "step": 12230 + }, + { + "epoch": 2.61, + "learning_rate": 9.478140328428237e-05, + "loss": 3.7963, + "step": 12235 + }, + { + "epoch": 2.61, + "learning_rate": 9.477927063339732e-05, + "loss": 3.8428, + "step": 12240 + }, + { + "epoch": 2.61, + "learning_rate": 9.477713798251226e-05, + "loss": 3.8662, + "step": 12245 + }, + { + "epoch": 2.61, + "learning_rate": 9.477500533162722e-05, + "loss": 3.8656, + "step": 12250 + }, + { + "epoch": 2.61, + "learning_rate": 9.477287268074217e-05, + "loss": 3.8357, + "step": 12255 + }, + { + "epoch": 2.61, + "learning_rate": 9.477074002985713e-05, + "loss": 3.8721, + "step": 12260 + }, + { + "epoch": 2.62, + "learning_rate": 9.476860737897207e-05, + "loss": 3.8898, + "step": 12265 + }, + { + "epoch": 2.62, + "learning_rate": 9.476647472808702e-05, + "loss": 3.853, + "step": 12270 + }, + { + "epoch": 2.62, + "learning_rate": 9.476434207720196e-05, + "loss": 3.8396, + "step": 12275 + }, + { + "epoch": 2.62, + "learning_rate": 9.476220942631692e-05, + "loss": 3.7641, + "step": 12280 + }, + { + "epoch": 2.62, + "learning_rate": 9.476007677543186e-05, + "loss": 3.7911, + "step": 12285 + }, + { + "epoch": 2.62, + "learning_rate": 9.475794412454681e-05, + "loss": 3.8446, + "step": 12290 + }, + { + "epoch": 2.62, + "learning_rate": 9.475581147366177e-05, + "loss": 3.8506, + "step": 12295 + }, + { + "epoch": 2.62, + "learning_rate": 9.475367882277671e-05, + "loss": 3.87, + "step": 12300 + }, + { + "epoch": 2.62, + "learning_rate": 9.475154617189166e-05, + "loss": 3.7995, + "step": 12305 + }, + { + "epoch": 2.62, + "learning_rate": 9.474941352100662e-05, + "loss": 3.8378, + "step": 12310 + }, + { + "epoch": 2.63, + "learning_rate": 9.474728087012157e-05, + "loss": 3.8838, + "step": 12315 + }, + { + "epoch": 2.63, + "learning_rate": 9.474514821923651e-05, + "loss": 3.8849, + "step": 12320 + }, + { + "epoch": 2.63, + "learning_rate": 9.474301556835147e-05, + "loss": 3.8572, + "step": 12325 + }, + { + "epoch": 2.63, + "learning_rate": 9.474088291746641e-05, + "loss": 3.7967, + "step": 12330 + }, + { + "epoch": 2.63, + "learning_rate": 9.473875026658137e-05, + "loss": 3.9313, + "step": 12335 + }, + { + "epoch": 2.63, + "learning_rate": 9.47366176156963e-05, + "loss": 3.9456, + "step": 12340 + }, + { + "epoch": 2.63, + "learning_rate": 9.473448496481126e-05, + "loss": 3.7697, + "step": 12345 + }, + { + "epoch": 2.63, + "learning_rate": 9.473235231392622e-05, + "loss": 3.9275, + "step": 12350 + }, + { + "epoch": 2.63, + "learning_rate": 9.473021966304117e-05, + "loss": 3.8399, + "step": 12355 + }, + { + "epoch": 2.64, + "learning_rate": 9.472808701215613e-05, + "loss": 3.8394, + "step": 12360 + }, + { + "epoch": 2.64, + "learning_rate": 9.472595436127107e-05, + "loss": 3.8439, + "step": 12365 + }, + { + "epoch": 2.64, + "learning_rate": 9.472382171038602e-05, + "loss": 3.9485, + "step": 12370 + }, + { + "epoch": 2.64, + "learning_rate": 9.472168905950096e-05, + "loss": 3.9012, + "step": 12375 + }, + { + "epoch": 2.64, + "learning_rate": 9.471955640861592e-05, + "loss": 3.8532, + "step": 12380 + }, + { + "epoch": 2.64, + "learning_rate": 9.471742375773086e-05, + "loss": 3.8111, + "step": 12385 + }, + { + "epoch": 2.64, + "learning_rate": 9.471529110684581e-05, + "loss": 3.8193, + "step": 12390 + }, + { + "epoch": 2.64, + "learning_rate": 9.471315845596075e-05, + "loss": 3.7686, + "step": 12395 + }, + { + "epoch": 2.64, + "learning_rate": 9.471102580507571e-05, + "loss": 3.8529, + "step": 12400 + }, + { + "epoch": 2.65, + "learning_rate": 9.470889315419066e-05, + "loss": 3.9288, + "step": 12405 + }, + { + "epoch": 2.65, + "learning_rate": 9.470676050330562e-05, + "loss": 3.8933, + "step": 12410 + }, + { + "epoch": 2.65, + "learning_rate": 9.470462785242057e-05, + "loss": 3.9774, + "step": 12415 + }, + { + "epoch": 2.65, + "learning_rate": 9.470249520153551e-05, + "loss": 3.8566, + "step": 12420 + }, + { + "epoch": 2.65, + "learning_rate": 9.470036255065047e-05, + "loss": 3.8747, + "step": 12425 + }, + { + "epoch": 2.65, + "learning_rate": 9.469822989976541e-05, + "loss": 3.8786, + "step": 12430 + }, + { + "epoch": 2.65, + "learning_rate": 9.469609724888037e-05, + "loss": 3.8393, + "step": 12435 + }, + { + "epoch": 2.65, + "learning_rate": 9.46939645979953e-05, + "loss": 3.7922, + "step": 12440 + }, + { + "epoch": 2.65, + "learning_rate": 9.469183194711026e-05, + "loss": 3.7708, + "step": 12445 + }, + { + "epoch": 2.65, + "learning_rate": 9.468969929622522e-05, + "loss": 3.9003, + "step": 12450 + }, + { + "epoch": 2.66, + "learning_rate": 9.468756664534017e-05, + "loss": 3.8484, + "step": 12455 + }, + { + "epoch": 2.66, + "learning_rate": 9.468543399445511e-05, + "loss": 3.8923, + "step": 12460 + }, + { + "epoch": 2.66, + "learning_rate": 9.468330134357007e-05, + "loss": 3.8726, + "step": 12465 + }, + { + "epoch": 2.66, + "learning_rate": 9.468116869268501e-05, + "loss": 3.8075, + "step": 12470 + }, + { + "epoch": 2.66, + "learning_rate": 9.467903604179996e-05, + "loss": 3.8379, + "step": 12475 + }, + { + "epoch": 2.66, + "learning_rate": 9.467690339091492e-05, + "loss": 3.9067, + "step": 12480 + }, + { + "epoch": 2.66, + "learning_rate": 9.467477074002986e-05, + "loss": 3.8481, + "step": 12485 + }, + { + "epoch": 2.66, + "learning_rate": 9.467263808914481e-05, + "loss": 3.8343, + "step": 12490 + }, + { + "epoch": 2.66, + "learning_rate": 9.467050543825975e-05, + "loss": 3.8555, + "step": 12495 + }, + { + "epoch": 2.67, + "learning_rate": 9.466837278737471e-05, + "loss": 3.9263, + "step": 12500 + }, + { + "epoch": 2.67, + "learning_rate": 9.466624013648966e-05, + "loss": 3.8103, + "step": 12505 + }, + { + "epoch": 2.67, + "learning_rate": 9.466410748560462e-05, + "loss": 3.8817, + "step": 12510 + }, + { + "epoch": 2.67, + "learning_rate": 9.466197483471956e-05, + "loss": 3.7999, + "step": 12515 + }, + { + "epoch": 2.67, + "learning_rate": 9.465984218383451e-05, + "loss": 3.8179, + "step": 12520 + }, + { + "epoch": 2.67, + "learning_rate": 9.465770953294946e-05, + "loss": 3.8504, + "step": 12525 + }, + { + "epoch": 2.67, + "learning_rate": 9.465557688206441e-05, + "loss": 3.8828, + "step": 12530 + }, + { + "epoch": 2.67, + "learning_rate": 9.465344423117935e-05, + "loss": 3.8332, + "step": 12535 + }, + { + "epoch": 2.67, + "learning_rate": 9.46513115802943e-05, + "loss": 3.9158, + "step": 12540 + }, + { + "epoch": 2.67, + "learning_rate": 9.464917892940926e-05, + "loss": 3.8512, + "step": 12545 + }, + { + "epoch": 2.68, + "learning_rate": 9.464704627852422e-05, + "loss": 3.8978, + "step": 12550 + }, + { + "epoch": 2.68, + "learning_rate": 9.464491362763917e-05, + "loss": 3.8572, + "step": 12555 + }, + { + "epoch": 2.68, + "learning_rate": 9.464278097675411e-05, + "loss": 3.933, + "step": 12560 + }, + { + "epoch": 2.68, + "learning_rate": 9.464064832586907e-05, + "loss": 3.8698, + "step": 12565 + }, + { + "epoch": 2.68, + "learning_rate": 9.463851567498401e-05, + "loss": 3.7444, + "step": 12570 + }, + { + "epoch": 2.68, + "learning_rate": 9.463638302409896e-05, + "loss": 3.8093, + "step": 12575 + }, + { + "epoch": 2.68, + "learning_rate": 9.46342503732139e-05, + "loss": 3.9234, + "step": 12580 + }, + { + "epoch": 2.68, + "learning_rate": 9.463211772232886e-05, + "loss": 3.8823, + "step": 12585 + }, + { + "epoch": 2.68, + "learning_rate": 9.46299850714438e-05, + "loss": 3.9563, + "step": 12590 + }, + { + "epoch": 2.69, + "learning_rate": 9.462785242055875e-05, + "loss": 3.8849, + "step": 12595 + }, + { + "epoch": 2.69, + "learning_rate": 9.462571976967371e-05, + "loss": 3.8896, + "step": 12600 + }, + { + "epoch": 2.69, + "learning_rate": 9.462358711878866e-05, + "loss": 3.8194, + "step": 12605 + }, + { + "epoch": 2.69, + "learning_rate": 9.462145446790362e-05, + "loss": 3.8281, + "step": 12610 + }, + { + "epoch": 2.69, + "learning_rate": 9.461932181701856e-05, + "loss": 3.7781, + "step": 12615 + }, + { + "epoch": 2.69, + "learning_rate": 9.461718916613351e-05, + "loss": 3.8643, + "step": 12620 + }, + { + "epoch": 2.69, + "learning_rate": 9.461505651524846e-05, + "loss": 3.8265, + "step": 12625 + }, + { + "epoch": 2.69, + "learning_rate": 9.461292386436341e-05, + "loss": 3.8002, + "step": 12630 + }, + { + "epoch": 2.69, + "learning_rate": 9.461079121347835e-05, + "loss": 3.8869, + "step": 12635 + }, + { + "epoch": 2.7, + "learning_rate": 9.46086585625933e-05, + "loss": 3.8442, + "step": 12640 + }, + { + "epoch": 2.7, + "learning_rate": 9.460652591170826e-05, + "loss": 3.8418, + "step": 12645 + }, + { + "epoch": 2.7, + "learning_rate": 9.460439326082322e-05, + "loss": 3.7834, + "step": 12650 + }, + { + "epoch": 2.7, + "learning_rate": 9.460226060993816e-05, + "loss": 3.8669, + "step": 12655 + }, + { + "epoch": 2.7, + "learning_rate": 9.460012795905311e-05, + "loss": 3.7716, + "step": 12660 + }, + { + "epoch": 2.7, + "learning_rate": 9.459799530816805e-05, + "loss": 3.8248, + "step": 12665 + }, + { + "epoch": 2.7, + "learning_rate": 9.459586265728301e-05, + "loss": 3.8009, + "step": 12670 + }, + { + "epoch": 2.7, + "learning_rate": 9.459373000639796e-05, + "loss": 3.884, + "step": 12675 + }, + { + "epoch": 2.7, + "learning_rate": 9.45915973555129e-05, + "loss": 3.7295, + "step": 12680 + }, + { + "epoch": 2.7, + "learning_rate": 9.458946470462786e-05, + "loss": 3.9192, + "step": 12685 + }, + { + "epoch": 2.71, + "learning_rate": 9.45873320537428e-05, + "loss": 3.8366, + "step": 12690 + }, + { + "epoch": 2.71, + "learning_rate": 9.458519940285775e-05, + "loss": 3.8342, + "step": 12695 + }, + { + "epoch": 2.71, + "learning_rate": 9.458306675197271e-05, + "loss": 3.8637, + "step": 12700 + }, + { + "epoch": 2.71, + "learning_rate": 9.458093410108766e-05, + "loss": 3.7581, + "step": 12705 + }, + { + "epoch": 2.71, + "learning_rate": 9.45788014502026e-05, + "loss": 3.8351, + "step": 12710 + }, + { + "epoch": 2.71, + "learning_rate": 9.457666879931756e-05, + "loss": 3.8001, + "step": 12715 + }, + { + "epoch": 2.71, + "learning_rate": 9.45745361484325e-05, + "loss": 3.7676, + "step": 12720 + }, + { + "epoch": 2.71, + "learning_rate": 9.457240349754746e-05, + "loss": 3.8946, + "step": 12725 + }, + { + "epoch": 2.71, + "learning_rate": 9.45702708466624e-05, + "loss": 3.9437, + "step": 12730 + }, + { + "epoch": 2.72, + "learning_rate": 9.456813819577735e-05, + "loss": 3.7802, + "step": 12735 + }, + { + "epoch": 2.72, + "learning_rate": 9.45660055448923e-05, + "loss": 3.8232, + "step": 12740 + }, + { + "epoch": 2.72, + "learning_rate": 9.456387289400726e-05, + "loss": 3.7918, + "step": 12745 + }, + { + "epoch": 2.72, + "learning_rate": 9.456174024312222e-05, + "loss": 3.7892, + "step": 12750 + }, + { + "epoch": 2.72, + "learning_rate": 9.455960759223716e-05, + "loss": 3.9601, + "step": 12755 + }, + { + "epoch": 2.72, + "learning_rate": 9.455747494135211e-05, + "loss": 3.9499, + "step": 12760 + }, + { + "epoch": 2.72, + "learning_rate": 9.455534229046705e-05, + "loss": 3.886, + "step": 12765 + }, + { + "epoch": 2.72, + "learning_rate": 9.455320963958201e-05, + "loss": 3.8193, + "step": 12770 + }, + { + "epoch": 2.72, + "learning_rate": 9.455107698869695e-05, + "loss": 3.8418, + "step": 12775 + }, + { + "epoch": 2.73, + "learning_rate": 9.45489443378119e-05, + "loss": 3.7722, + "step": 12780 + }, + { + "epoch": 2.73, + "learning_rate": 9.454681168692684e-05, + "loss": 3.8784, + "step": 12785 + }, + { + "epoch": 2.73, + "learning_rate": 9.45446790360418e-05, + "loss": 3.8373, + "step": 12790 + }, + { + "epoch": 2.73, + "learning_rate": 9.454254638515675e-05, + "loss": 3.7727, + "step": 12795 + }, + { + "epoch": 2.73, + "learning_rate": 9.454041373427171e-05, + "loss": 3.8564, + "step": 12800 + }, + { + "epoch": 2.73, + "learning_rate": 9.453828108338666e-05, + "loss": 3.7502, + "step": 12805 + }, + { + "epoch": 2.73, + "learning_rate": 9.45361484325016e-05, + "loss": 3.8341, + "step": 12810 + }, + { + "epoch": 2.73, + "learning_rate": 9.453401578161656e-05, + "loss": 3.757, + "step": 12815 + }, + { + "epoch": 2.73, + "learning_rate": 9.45318831307315e-05, + "loss": 3.7438, + "step": 12820 + }, + { + "epoch": 2.73, + "learning_rate": 9.452975047984646e-05, + "loss": 3.7876, + "step": 12825 + }, + { + "epoch": 2.74, + "learning_rate": 9.45276178289614e-05, + "loss": 3.7533, + "step": 12830 + }, + { + "epoch": 2.74, + "learning_rate": 9.452548517807635e-05, + "loss": 3.7903, + "step": 12835 + }, + { + "epoch": 2.74, + "learning_rate": 9.45233525271913e-05, + "loss": 3.8345, + "step": 12840 + }, + { + "epoch": 2.74, + "learning_rate": 9.452121987630626e-05, + "loss": 3.8461, + "step": 12845 + }, + { + "epoch": 2.74, + "learning_rate": 9.45190872254212e-05, + "loss": 3.8457, + "step": 12850 + }, + { + "epoch": 2.74, + "learning_rate": 9.451695457453616e-05, + "loss": 3.8823, + "step": 12855 + }, + { + "epoch": 2.74, + "learning_rate": 9.45148219236511e-05, + "loss": 3.8906, + "step": 12860 + }, + { + "epoch": 2.74, + "learning_rate": 9.451268927276605e-05, + "loss": 3.8835, + "step": 12865 + }, + { + "epoch": 2.74, + "learning_rate": 9.451055662188101e-05, + "loss": 3.8094, + "step": 12870 + }, + { + "epoch": 2.75, + "learning_rate": 9.450842397099595e-05, + "loss": 3.9155, + "step": 12875 + }, + { + "epoch": 2.75, + "learning_rate": 9.45062913201109e-05, + "loss": 3.8129, + "step": 12880 + }, + { + "epoch": 2.75, + "learning_rate": 9.450415866922584e-05, + "loss": 3.7896, + "step": 12885 + }, + { + "epoch": 2.75, + "learning_rate": 9.45020260183408e-05, + "loss": 3.9116, + "step": 12890 + }, + { + "epoch": 2.75, + "learning_rate": 9.449989336745575e-05, + "loss": 3.785, + "step": 12895 + }, + { + "epoch": 2.75, + "learning_rate": 9.449776071657071e-05, + "loss": 3.8062, + "step": 12900 + }, + { + "epoch": 2.75, + "learning_rate": 9.449562806568565e-05, + "loss": 3.8623, + "step": 12905 + }, + { + "epoch": 2.75, + "learning_rate": 9.44934954148006e-05, + "loss": 3.9554, + "step": 12910 + }, + { + "epoch": 2.75, + "learning_rate": 9.449136276391555e-05, + "loss": 3.8695, + "step": 12915 + }, + { + "epoch": 2.75, + "learning_rate": 9.44892301130305e-05, + "loss": 3.872, + "step": 12920 + }, + { + "epoch": 2.76, + "learning_rate": 9.448709746214544e-05, + "loss": 3.7832, + "step": 12925 + }, + { + "epoch": 2.76, + "learning_rate": 9.44849648112604e-05, + "loss": 3.9063, + "step": 12930 + }, + { + "epoch": 2.76, + "learning_rate": 9.448283216037535e-05, + "loss": 3.8449, + "step": 12935 + }, + { + "epoch": 2.76, + "learning_rate": 9.44806995094903e-05, + "loss": 3.8684, + "step": 12940 + }, + { + "epoch": 2.76, + "learning_rate": 9.447856685860526e-05, + "loss": 3.8864, + "step": 12945 + }, + { + "epoch": 2.76, + "learning_rate": 9.44764342077202e-05, + "loss": 3.8493, + "step": 12950 + }, + { + "epoch": 2.76, + "learning_rate": 9.447430155683516e-05, + "loss": 3.7985, + "step": 12955 + }, + { + "epoch": 2.76, + "learning_rate": 9.44721689059501e-05, + "loss": 3.9547, + "step": 12960 + }, + { + "epoch": 2.76, + "learning_rate": 9.447003625506505e-05, + "loss": 3.8626, + "step": 12965 + }, + { + "epoch": 2.77, + "learning_rate": 9.446790360418e-05, + "loss": 3.7944, + "step": 12970 + }, + { + "epoch": 2.77, + "learning_rate": 9.446577095329495e-05, + "loss": 3.8526, + "step": 12975 + }, + { + "epoch": 2.77, + "learning_rate": 9.446363830240989e-05, + "loss": 3.8157, + "step": 12980 + }, + { + "epoch": 2.77, + "learning_rate": 9.446150565152484e-05, + "loss": 3.8383, + "step": 12985 + }, + { + "epoch": 2.77, + "learning_rate": 9.44593730006398e-05, + "loss": 3.8873, + "step": 12990 + }, + { + "epoch": 2.77, + "learning_rate": 9.445724034975475e-05, + "loss": 3.9132, + "step": 12995 + }, + { + "epoch": 2.77, + "learning_rate": 9.445510769886971e-05, + "loss": 3.7766, + "step": 13000 } ], "logging_steps": 5, - "max_steps": 1945, - "num_train_epochs": 5, + "max_steps": 234450, + "num_train_epochs": 50, "save_steps": 1000, - "total_flos": 1824768000000000.0, + "total_flos": 4.56192e+16, "trial_name": null, "trial_params": null }