{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.99979288919572, "eval_steps": 500, "global_step": 32589, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.5000000000000004e-07, "loss": 2.5996, "step": 10 }, { "epoch": 0.0, "learning_rate": 5.000000000000001e-07, "loss": 2.6561, "step": 20 }, { "epoch": 0.0, "learning_rate": 7.5e-07, "loss": 2.5935, "step": 30 }, { "epoch": 0.0, "learning_rate": 1.0000000000000002e-06, "loss": 2.5662, "step": 40 }, { "epoch": 0.0, "learning_rate": 1.25e-06, "loss": 2.6244, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.5e-06, "loss": 2.6111, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.7500000000000002e-06, "loss": 2.6025, "step": 70 }, { "epoch": 0.01, "learning_rate": 2.0000000000000003e-06, "loss": 2.5608, "step": 80 }, { "epoch": 0.01, "learning_rate": 2.25e-06, "loss": 2.5819, "step": 90 }, { "epoch": 0.01, "learning_rate": 2.5e-06, "loss": 2.5695, "step": 100 }, { "epoch": 0.01, "learning_rate": 2.7500000000000004e-06, "loss": 2.5394, "step": 110 }, { "epoch": 0.01, "learning_rate": 3e-06, "loss": 2.4985, "step": 120 }, { "epoch": 0.01, "learning_rate": 3.2500000000000002e-06, "loss": 2.5326, "step": 130 }, { "epoch": 0.01, "learning_rate": 3.5000000000000004e-06, "loss": 2.5102, "step": 140 }, { "epoch": 0.01, "learning_rate": 3.75e-06, "loss": 2.483, "step": 150 }, { "epoch": 0.01, "learning_rate": 4.000000000000001e-06, "loss": 2.453, "step": 160 }, { "epoch": 0.02, "learning_rate": 4.250000000000001e-06, "loss": 2.4671, "step": 170 }, { "epoch": 0.02, "learning_rate": 4.5e-06, "loss": 2.4468, "step": 180 }, { "epoch": 0.02, "learning_rate": 4.75e-06, "loss": 2.4797, "step": 190 }, { "epoch": 0.02, "learning_rate": 5e-06, "loss": 2.4552, "step": 200 }, { "epoch": 0.02, "learning_rate": 5.25e-06, "loss": 2.3846, "step": 210 }, { "epoch": 0.02, "learning_rate": 5.500000000000001e-06, "loss": 2.3943, "step": 220 }, { "epoch": 0.02, "learning_rate": 5.750000000000001e-06, "loss": 2.4245, "step": 230 }, { "epoch": 0.02, "learning_rate": 6e-06, "loss": 2.431, "step": 240 }, { "epoch": 0.02, "learning_rate": 6.25e-06, "loss": 2.4355, "step": 250 }, { "epoch": 0.02, "learning_rate": 6.5000000000000004e-06, "loss": 2.4562, "step": 260 }, { "epoch": 0.02, "learning_rate": 6.750000000000001e-06, "loss": 2.4268, "step": 270 }, { "epoch": 0.03, "learning_rate": 7.000000000000001e-06, "loss": 2.4117, "step": 280 }, { "epoch": 0.03, "learning_rate": 7.25e-06, "loss": 2.3703, "step": 290 }, { "epoch": 0.03, "learning_rate": 7.5e-06, "loss": 2.3887, "step": 300 }, { "epoch": 0.03, "learning_rate": 7.75e-06, "loss": 2.3624, "step": 310 }, { "epoch": 0.03, "learning_rate": 8.000000000000001e-06, "loss": 2.3812, "step": 320 }, { "epoch": 0.03, "learning_rate": 8.25e-06, "loss": 2.3811, "step": 330 }, { "epoch": 0.03, "learning_rate": 8.500000000000002e-06, "loss": 2.4231, "step": 340 }, { "epoch": 0.03, "learning_rate": 8.75e-06, "loss": 2.3461, "step": 350 }, { "epoch": 0.03, "learning_rate": 9e-06, "loss": 2.3071, "step": 360 }, { "epoch": 0.03, "learning_rate": 9.25e-06, "loss": 2.3165, "step": 370 }, { "epoch": 0.03, "learning_rate": 9.5e-06, "loss": 2.3736, "step": 380 }, { "epoch": 0.04, "learning_rate": 9.750000000000002e-06, "loss": 2.3698, "step": 390 }, { "epoch": 0.04, "learning_rate": 1e-05, "loss": 2.3527, "step": 400 }, { "epoch": 0.04, "learning_rate": 1.025e-05, "loss": 2.3605, "step": 410 }, { "epoch": 0.04, "learning_rate": 1.05e-05, "loss": 2.3542, "step": 420 }, { "epoch": 0.04, "learning_rate": 1.075e-05, "loss": 2.2693, "step": 430 }, { "epoch": 0.04, "learning_rate": 1.1000000000000001e-05, "loss": 2.3021, "step": 440 }, { "epoch": 0.04, "learning_rate": 1.125e-05, "loss": 2.3607, "step": 450 }, { "epoch": 0.04, "learning_rate": 1.1500000000000002e-05, "loss": 2.3409, "step": 460 }, { "epoch": 0.04, "learning_rate": 1.175e-05, "loss": 2.329, "step": 470 }, { "epoch": 0.04, "learning_rate": 1.2e-05, "loss": 2.3437, "step": 480 }, { "epoch": 0.05, "learning_rate": 1.225e-05, "loss": 2.3313, "step": 490 }, { "epoch": 0.05, "learning_rate": 1.25e-05, "loss": 2.3172, "step": 500 }, { "epoch": 0.05, "learning_rate": 1.2750000000000002e-05, "loss": 2.3183, "step": 510 }, { "epoch": 0.05, "learning_rate": 1.3000000000000001e-05, "loss": 2.2978, "step": 520 }, { "epoch": 0.05, "learning_rate": 1.3250000000000002e-05, "loss": 2.3428, "step": 530 }, { "epoch": 0.05, "learning_rate": 1.3500000000000001e-05, "loss": 2.3157, "step": 540 }, { "epoch": 0.05, "learning_rate": 1.3750000000000002e-05, "loss": 2.3244, "step": 550 }, { "epoch": 0.05, "learning_rate": 1.4000000000000001e-05, "loss": 2.2608, "step": 560 }, { "epoch": 0.05, "learning_rate": 1.4249999999999999e-05, "loss": 2.2907, "step": 570 }, { "epoch": 0.05, "learning_rate": 1.45e-05, "loss": 2.2475, "step": 580 }, { "epoch": 0.05, "learning_rate": 1.475e-05, "loss": 2.3085, "step": 590 }, { "epoch": 0.06, "learning_rate": 1.5e-05, "loss": 2.2704, "step": 600 }, { "epoch": 0.06, "learning_rate": 1.525e-05, "loss": 2.2883, "step": 610 }, { "epoch": 0.06, "learning_rate": 1.55e-05, "loss": 2.301, "step": 620 }, { "epoch": 0.06, "learning_rate": 1.575e-05, "loss": 2.2919, "step": 630 }, { "epoch": 0.06, "learning_rate": 1.6000000000000003e-05, "loss": 2.2991, "step": 640 }, { "epoch": 0.06, "learning_rate": 1.6250000000000002e-05, "loss": 2.3189, "step": 650 }, { "epoch": 0.06, "learning_rate": 1.65e-05, "loss": 2.2357, "step": 660 }, { "epoch": 0.06, "learning_rate": 1.675e-05, "loss": 2.2548, "step": 670 }, { "epoch": 0.06, "learning_rate": 1.7000000000000003e-05, "loss": 2.3021, "step": 680 }, { "epoch": 0.06, "learning_rate": 1.725e-05, "loss": 2.312, "step": 690 }, { "epoch": 0.06, "learning_rate": 1.75e-05, "loss": 2.2714, "step": 700 }, { "epoch": 0.07, "learning_rate": 1.775e-05, "loss": 2.2572, "step": 710 }, { "epoch": 0.07, "learning_rate": 1.8e-05, "loss": 2.2421, "step": 720 }, { "epoch": 0.07, "learning_rate": 1.825e-05, "loss": 2.2531, "step": 730 }, { "epoch": 0.07, "learning_rate": 1.85e-05, "loss": 2.2163, "step": 740 }, { "epoch": 0.07, "learning_rate": 1.8750000000000002e-05, "loss": 2.2661, "step": 750 }, { "epoch": 0.07, "learning_rate": 1.9e-05, "loss": 2.2466, "step": 760 }, { "epoch": 0.07, "learning_rate": 1.925e-05, "loss": 2.2549, "step": 770 }, { "epoch": 0.07, "learning_rate": 1.9500000000000003e-05, "loss": 2.2366, "step": 780 }, { "epoch": 0.07, "learning_rate": 1.9750000000000002e-05, "loss": 2.1994, "step": 790 }, { "epoch": 0.07, "learning_rate": 2e-05, "loss": 2.2505, "step": 800 }, { "epoch": 0.07, "learning_rate": 2.025e-05, "loss": 2.2686, "step": 810 }, { "epoch": 0.08, "learning_rate": 2.05e-05, "loss": 2.2334, "step": 820 }, { "epoch": 0.08, "learning_rate": 2.075e-05, "loss": 2.216, "step": 830 }, { "epoch": 0.08, "learning_rate": 2.1e-05, "loss": 2.2154, "step": 840 }, { "epoch": 0.08, "learning_rate": 2.125e-05, "loss": 2.2336, "step": 850 }, { "epoch": 0.08, "learning_rate": 2.15e-05, "loss": 2.2361, "step": 860 }, { "epoch": 0.08, "learning_rate": 2.175e-05, "loss": 2.2963, "step": 870 }, { "epoch": 0.08, "learning_rate": 2.2000000000000003e-05, "loss": 2.1888, "step": 880 }, { "epoch": 0.08, "learning_rate": 2.2250000000000002e-05, "loss": 2.2079, "step": 890 }, { "epoch": 0.08, "learning_rate": 2.25e-05, "loss": 2.2039, "step": 900 }, { "epoch": 0.08, "learning_rate": 2.275e-05, "loss": 2.2239, "step": 910 }, { "epoch": 0.08, "learning_rate": 2.3000000000000003e-05, "loss": 2.2428, "step": 920 }, { "epoch": 0.09, "learning_rate": 2.3250000000000003e-05, "loss": 2.2524, "step": 930 }, { "epoch": 0.09, "learning_rate": 2.35e-05, "loss": 2.207, "step": 940 }, { "epoch": 0.09, "learning_rate": 2.375e-05, "loss": 2.229, "step": 950 }, { "epoch": 0.09, "learning_rate": 2.4e-05, "loss": 2.1913, "step": 960 }, { "epoch": 0.09, "learning_rate": 2.425e-05, "loss": 2.2341, "step": 970 }, { "epoch": 0.09, "learning_rate": 2.45e-05, "loss": 2.2347, "step": 980 }, { "epoch": 0.09, "learning_rate": 2.4750000000000002e-05, "loss": 2.2302, "step": 990 }, { "epoch": 0.09, "learning_rate": 2.5e-05, "loss": 2.1816, "step": 1000 }, { "epoch": 0.09, "learning_rate": 2.525e-05, "loss": 2.2063, "step": 1010 }, { "epoch": 0.09, "learning_rate": 2.5500000000000003e-05, "loss": 2.2406, "step": 1020 }, { "epoch": 0.09, "learning_rate": 2.5750000000000002e-05, "loss": 2.1993, "step": 1030 }, { "epoch": 0.1, "learning_rate": 2.6000000000000002e-05, "loss": 2.2402, "step": 1040 }, { "epoch": 0.1, "learning_rate": 2.625e-05, "loss": 2.1586, "step": 1050 }, { "epoch": 0.1, "learning_rate": 2.6500000000000004e-05, "loss": 2.2624, "step": 1060 }, { "epoch": 0.1, "learning_rate": 2.6750000000000003e-05, "loss": 2.2414, "step": 1070 }, { "epoch": 0.1, "learning_rate": 2.7000000000000002e-05, "loss": 2.2251, "step": 1080 }, { "epoch": 0.1, "learning_rate": 2.725e-05, "loss": 2.2162, "step": 1090 }, { "epoch": 0.1, "learning_rate": 2.7500000000000004e-05, "loss": 2.2135, "step": 1100 }, { "epoch": 0.1, "learning_rate": 2.7750000000000004e-05, "loss": 2.2018, "step": 1110 }, { "epoch": 0.1, "learning_rate": 2.8000000000000003e-05, "loss": 2.208, "step": 1120 }, { "epoch": 0.1, "learning_rate": 2.825e-05, "loss": 2.2407, "step": 1130 }, { "epoch": 0.1, "learning_rate": 2.8499999999999998e-05, "loss": 2.2079, "step": 1140 }, { "epoch": 0.11, "learning_rate": 2.8749999999999997e-05, "loss": 2.1825, "step": 1150 }, { "epoch": 0.11, "learning_rate": 2.9e-05, "loss": 2.1919, "step": 1160 }, { "epoch": 0.11, "learning_rate": 2.925e-05, "loss": 2.2067, "step": 1170 }, { "epoch": 0.11, "learning_rate": 2.95e-05, "loss": 2.2247, "step": 1180 }, { "epoch": 0.11, "learning_rate": 2.975e-05, "loss": 2.1795, "step": 1190 }, { "epoch": 0.11, "learning_rate": 3e-05, "loss": 2.1569, "step": 1200 }, { "epoch": 0.11, "learning_rate": 3.025e-05, "loss": 2.2106, "step": 1210 }, { "epoch": 0.11, "learning_rate": 3.05e-05, "loss": 2.1846, "step": 1220 }, { "epoch": 0.11, "learning_rate": 3.075e-05, "loss": 2.1926, "step": 1230 }, { "epoch": 0.11, "learning_rate": 3.1e-05, "loss": 2.2159, "step": 1240 }, { "epoch": 0.12, "learning_rate": 3.125e-05, "loss": 2.1878, "step": 1250 }, { "epoch": 0.12, "learning_rate": 3.15e-05, "loss": 2.1561, "step": 1260 }, { "epoch": 0.12, "learning_rate": 3.175e-05, "loss": 2.1986, "step": 1270 }, { "epoch": 0.12, "learning_rate": 3.2000000000000005e-05, "loss": 2.2018, "step": 1280 }, { "epoch": 0.12, "learning_rate": 3.2250000000000005e-05, "loss": 2.1701, "step": 1290 }, { "epoch": 0.12, "learning_rate": 3.2500000000000004e-05, "loss": 2.2654, "step": 1300 }, { "epoch": 0.12, "learning_rate": 3.275e-05, "loss": 2.1503, "step": 1310 }, { "epoch": 0.12, "learning_rate": 3.3e-05, "loss": 2.1932, "step": 1320 }, { "epoch": 0.12, "learning_rate": 3.325e-05, "loss": 2.1817, "step": 1330 }, { "epoch": 0.12, "learning_rate": 3.35e-05, "loss": 2.2429, "step": 1340 }, { "epoch": 0.12, "learning_rate": 3.375000000000001e-05, "loss": 2.1627, "step": 1350 }, { "epoch": 0.13, "learning_rate": 3.4000000000000007e-05, "loss": 2.1215, "step": 1360 }, { "epoch": 0.13, "learning_rate": 3.4250000000000006e-05, "loss": 2.2165, "step": 1370 }, { "epoch": 0.13, "learning_rate": 3.45e-05, "loss": 2.2696, "step": 1380 }, { "epoch": 0.13, "learning_rate": 3.475e-05, "loss": 2.1738, "step": 1390 }, { "epoch": 0.13, "learning_rate": 3.5e-05, "loss": 2.2379, "step": 1400 }, { "epoch": 0.13, "learning_rate": 3.525e-05, "loss": 2.2034, "step": 1410 }, { "epoch": 0.13, "learning_rate": 3.55e-05, "loss": 2.1831, "step": 1420 }, { "epoch": 0.13, "learning_rate": 3.575e-05, "loss": 2.1386, "step": 1430 }, { "epoch": 0.13, "learning_rate": 3.6e-05, "loss": 2.239, "step": 1440 }, { "epoch": 0.13, "learning_rate": 3.625e-05, "loss": 2.2001, "step": 1450 }, { "epoch": 0.13, "learning_rate": 3.65e-05, "loss": 2.202, "step": 1460 }, { "epoch": 0.14, "learning_rate": 3.675e-05, "loss": 2.1472, "step": 1470 }, { "epoch": 0.14, "learning_rate": 3.7e-05, "loss": 2.202, "step": 1480 }, { "epoch": 0.14, "learning_rate": 3.7250000000000004e-05, "loss": 2.1692, "step": 1490 }, { "epoch": 0.14, "learning_rate": 3.7500000000000003e-05, "loss": 2.1946, "step": 1500 }, { "epoch": 0.14, "learning_rate": 3.775e-05, "loss": 2.2032, "step": 1510 }, { "epoch": 0.14, "learning_rate": 3.8e-05, "loss": 2.1648, "step": 1520 }, { "epoch": 0.14, "learning_rate": 3.825e-05, "loss": 2.2834, "step": 1530 }, { "epoch": 0.14, "learning_rate": 3.85e-05, "loss": 2.1959, "step": 1540 }, { "epoch": 0.14, "learning_rate": 3.875e-05, "loss": 2.2103, "step": 1550 }, { "epoch": 0.14, "learning_rate": 3.9000000000000006e-05, "loss": 2.2377, "step": 1560 }, { "epoch": 0.14, "learning_rate": 3.9250000000000005e-05, "loss": 2.1761, "step": 1570 }, { "epoch": 0.15, "learning_rate": 3.9500000000000005e-05, "loss": 2.1726, "step": 1580 }, { "epoch": 0.15, "learning_rate": 3.9750000000000004e-05, "loss": 2.2014, "step": 1590 }, { "epoch": 0.15, "learning_rate": 4e-05, "loss": 2.2468, "step": 1600 }, { "epoch": 0.15, "learning_rate": 4.025e-05, "loss": 2.1827, "step": 1610 }, { "epoch": 0.15, "learning_rate": 4.05e-05, "loss": 2.1716, "step": 1620 }, { "epoch": 0.15, "learning_rate": 4.075e-05, "loss": 2.1918, "step": 1630 }, { "epoch": 0.15, "learning_rate": 4.1e-05, "loss": 2.1848, "step": 1640 }, { "epoch": 0.15, "learning_rate": 4.125e-05, "loss": 2.2133, "step": 1650 }, { "epoch": 0.15, "learning_rate": 4.15e-05, "loss": 2.1884, "step": 1660 }, { "epoch": 0.15, "learning_rate": 4.175e-05, "loss": 2.2345, "step": 1670 }, { "epoch": 0.15, "learning_rate": 4.2e-05, "loss": 2.169, "step": 1680 }, { "epoch": 0.16, "learning_rate": 4.2250000000000004e-05, "loss": 2.191, "step": 1690 }, { "epoch": 0.16, "learning_rate": 4.25e-05, "loss": 2.2457, "step": 1700 }, { "epoch": 0.16, "learning_rate": 4.275e-05, "loss": 2.1883, "step": 1710 }, { "epoch": 0.16, "learning_rate": 4.3e-05, "loss": 2.2064, "step": 1720 }, { "epoch": 0.16, "learning_rate": 4.325e-05, "loss": 2.1627, "step": 1730 }, { "epoch": 0.16, "learning_rate": 4.35e-05, "loss": 2.1973, "step": 1740 }, { "epoch": 0.16, "learning_rate": 4.375e-05, "loss": 2.2273, "step": 1750 }, { "epoch": 0.16, "learning_rate": 4.4000000000000006e-05, "loss": 2.1886, "step": 1760 }, { "epoch": 0.16, "learning_rate": 4.4250000000000005e-05, "loss": 2.2742, "step": 1770 }, { "epoch": 0.16, "learning_rate": 4.4500000000000004e-05, "loss": 2.2049, "step": 1780 }, { "epoch": 0.16, "learning_rate": 4.4750000000000004e-05, "loss": 2.2726, "step": 1790 }, { "epoch": 0.17, "learning_rate": 4.5e-05, "loss": 2.1619, "step": 1800 }, { "epoch": 0.17, "learning_rate": 4.525e-05, "loss": 2.1577, "step": 1810 }, { "epoch": 0.17, "learning_rate": 4.55e-05, "loss": 2.2098, "step": 1820 }, { "epoch": 0.17, "learning_rate": 4.575e-05, "loss": 2.1906, "step": 1830 }, { "epoch": 0.17, "learning_rate": 4.600000000000001e-05, "loss": 2.1568, "step": 1840 }, { "epoch": 0.17, "learning_rate": 4.6250000000000006e-05, "loss": 2.1792, "step": 1850 }, { "epoch": 0.17, "learning_rate": 4.6500000000000005e-05, "loss": 2.1768, "step": 1860 }, { "epoch": 0.17, "learning_rate": 4.6750000000000005e-05, "loss": 2.1965, "step": 1870 }, { "epoch": 0.17, "learning_rate": 4.7e-05, "loss": 2.1331, "step": 1880 }, { "epoch": 0.17, "learning_rate": 4.7249999999999997e-05, "loss": 2.2698, "step": 1890 }, { "epoch": 0.17, "learning_rate": 4.75e-05, "loss": 2.2036, "step": 1900 }, { "epoch": 0.18, "learning_rate": 4.775e-05, "loss": 2.1914, "step": 1910 }, { "epoch": 0.18, "learning_rate": 4.8e-05, "loss": 2.1904, "step": 1920 }, { "epoch": 0.18, "learning_rate": 4.825e-05, "loss": 2.2257, "step": 1930 }, { "epoch": 0.18, "learning_rate": 4.85e-05, "loss": 2.2195, "step": 1940 }, { "epoch": 0.18, "learning_rate": 4.875e-05, "loss": 2.1784, "step": 1950 }, { "epoch": 0.18, "learning_rate": 4.9e-05, "loss": 2.1663, "step": 1960 }, { "epoch": 0.18, "learning_rate": 4.9250000000000004e-05, "loss": 2.1949, "step": 1970 }, { "epoch": 0.18, "learning_rate": 4.9500000000000004e-05, "loss": 2.1924, "step": 1980 }, { "epoch": 0.18, "learning_rate": 4.975e-05, "loss": 2.1822, "step": 1990 }, { "epoch": 0.18, "learning_rate": 5e-05, "loss": 2.2388, "step": 2000 }, { "epoch": 0.19, "learning_rate": 4.999998681502954e-05, "loss": 2.1987, "step": 2010 }, { "epoch": 0.19, "learning_rate": 4.999994726013205e-05, "loss": 2.2016, "step": 2020 }, { "epoch": 0.19, "learning_rate": 4.999988133534927e-05, "loss": 2.2296, "step": 2030 }, { "epoch": 0.19, "learning_rate": 4.9999789040750726e-05, "loss": 2.2039, "step": 2040 }, { "epoch": 0.19, "learning_rate": 4.9999670376433774e-05, "loss": 2.1696, "step": 2050 }, { "epoch": 0.19, "learning_rate": 4.999952534252358e-05, "loss": 2.215, "step": 2060 }, { "epoch": 0.19, "learning_rate": 4.999935393917313e-05, "loss": 2.2708, "step": 2070 }, { "epoch": 0.19, "learning_rate": 4.999915616656321e-05, "loss": 2.2016, "step": 2080 }, { "epoch": 0.19, "learning_rate": 4.999893202490243e-05, "loss": 2.2359, "step": 2090 }, { "epoch": 0.19, "learning_rate": 4.999868151442723e-05, "loss": 2.2085, "step": 2100 }, { "epoch": 0.19, "learning_rate": 4.9998404635401836e-05, "loss": 2.2345, "step": 2110 }, { "epoch": 0.2, "learning_rate": 4.99981013881183e-05, "loss": 2.2376, "step": 2120 }, { "epoch": 0.2, "learning_rate": 4.999777177289648e-05, "loss": 2.2421, "step": 2130 }, { "epoch": 0.2, "learning_rate": 4.999741579008407e-05, "loss": 2.2049, "step": 2140 }, { "epoch": 0.2, "learning_rate": 4.9997033440056546e-05, "loss": 2.1839, "step": 2150 }, { "epoch": 0.2, "learning_rate": 4.999662472321721e-05, "loss": 2.214, "step": 2160 }, { "epoch": 0.2, "learning_rate": 4.9996189639997194e-05, "loss": 2.1728, "step": 2170 }, { "epoch": 0.2, "learning_rate": 4.99957281908554e-05, "loss": 2.1487, "step": 2180 }, { "epoch": 0.2, "learning_rate": 4.9995240376278566e-05, "loss": 2.2367, "step": 2190 }, { "epoch": 0.2, "learning_rate": 4.999472619678125e-05, "loss": 2.1542, "step": 2200 }, { "epoch": 0.2, "learning_rate": 4.99941856529058e-05, "loss": 2.208, "step": 2210 }, { "epoch": 0.2, "learning_rate": 4.999361874522238e-05, "loss": 2.1771, "step": 2220 }, { "epoch": 0.21, "learning_rate": 4.999302547432897e-05, "loss": 2.2336, "step": 2230 }, { "epoch": 0.21, "learning_rate": 4.999240584085134e-05, "loss": 2.1399, "step": 2240 }, { "epoch": 0.21, "learning_rate": 4.9991759845443086e-05, "loss": 2.2295, "step": 2250 }, { "epoch": 0.21, "learning_rate": 4.99910874887856e-05, "loss": 2.1966, "step": 2260 }, { "epoch": 0.21, "learning_rate": 4.9990388771588084e-05, "loss": 2.2049, "step": 2270 }, { "epoch": 0.21, "learning_rate": 4.998966369458754e-05, "loss": 2.1745, "step": 2280 }, { "epoch": 0.21, "learning_rate": 4.998891225854877e-05, "loss": 2.1274, "step": 2290 }, { "epoch": 0.21, "learning_rate": 4.99881344642644e-05, "loss": 2.197, "step": 2300 }, { "epoch": 0.21, "learning_rate": 4.998733031255485e-05, "loss": 2.171, "step": 2310 }, { "epoch": 0.21, "learning_rate": 4.998649980426832e-05, "loss": 2.2024, "step": 2320 }, { "epoch": 0.21, "learning_rate": 4.998564294028084e-05, "loss": 2.1485, "step": 2330 }, { "epoch": 0.22, "learning_rate": 4.9984759721496224e-05, "loss": 2.1765, "step": 2340 }, { "epoch": 0.22, "learning_rate": 4.998385014884608e-05, "loss": 2.1462, "step": 2350 }, { "epoch": 0.22, "learning_rate": 4.998291422328984e-05, "loss": 2.1808, "step": 2360 }, { "epoch": 0.22, "learning_rate": 4.998195194581471e-05, "loss": 2.223, "step": 2370 }, { "epoch": 0.22, "learning_rate": 4.99809633174357e-05, "loss": 2.1612, "step": 2380 }, { "epoch": 0.22, "learning_rate": 4.99799483391956e-05, "loss": 2.1479, "step": 2390 }, { "epoch": 0.22, "learning_rate": 4.9978907012165036e-05, "loss": 2.1605, "step": 2400 }, { "epoch": 0.22, "learning_rate": 4.9977839337442356e-05, "loss": 2.1372, "step": 2410 }, { "epoch": 0.22, "learning_rate": 4.9976745316153776e-05, "loss": 2.189, "step": 2420 }, { "epoch": 0.22, "learning_rate": 4.9975624949453246e-05, "loss": 2.1357, "step": 2430 }, { "epoch": 0.22, "learning_rate": 4.997447823852254e-05, "loss": 2.1359, "step": 2440 }, { "epoch": 0.23, "learning_rate": 4.9973305184571195e-05, "loss": 2.1375, "step": 2450 }, { "epoch": 0.23, "learning_rate": 4.997210578883655e-05, "loss": 2.1954, "step": 2460 }, { "epoch": 0.23, "learning_rate": 4.997088005258372e-05, "loss": 2.1948, "step": 2470 }, { "epoch": 0.23, "learning_rate": 4.996962797710563e-05, "loss": 2.1887, "step": 2480 }, { "epoch": 0.23, "learning_rate": 4.996834956372294e-05, "loss": 2.1216, "step": 2490 }, { "epoch": 0.23, "learning_rate": 4.9967044813784124e-05, "loss": 2.1703, "step": 2500 }, { "epoch": 0.23, "learning_rate": 4.996571372866543e-05, "loss": 2.2074, "step": 2510 }, { "epoch": 0.23, "learning_rate": 4.99643563097709e-05, "loss": 2.1699, "step": 2520 }, { "epoch": 0.23, "learning_rate": 4.996297255853231e-05, "loss": 2.2154, "step": 2530 }, { "epoch": 0.23, "learning_rate": 4.996156247640925e-05, "loss": 2.1514, "step": 2540 }, { "epoch": 0.23, "learning_rate": 4.996012606488908e-05, "loss": 2.1834, "step": 2550 }, { "epoch": 0.24, "learning_rate": 4.995866332548691e-05, "loss": 2.1979, "step": 2560 }, { "epoch": 0.24, "learning_rate": 4.995717425974564e-05, "loss": 2.1411, "step": 2570 }, { "epoch": 0.24, "learning_rate": 4.995565886923593e-05, "loss": 2.1839, "step": 2580 }, { "epoch": 0.24, "learning_rate": 4.995411715555621e-05, "loss": 2.1555, "step": 2590 }, { "epoch": 0.24, "learning_rate": 4.995254912033268e-05, "loss": 2.1551, "step": 2600 }, { "epoch": 0.24, "learning_rate": 4.995095476521929e-05, "loss": 2.1536, "step": 2610 }, { "epoch": 0.24, "learning_rate": 4.994933409189778e-05, "loss": 2.1189, "step": 2620 }, { "epoch": 0.24, "learning_rate": 4.994768710207762e-05, "loss": 2.2039, "step": 2630 }, { "epoch": 0.24, "learning_rate": 4.994601379749605e-05, "loss": 2.1846, "step": 2640 }, { "epoch": 0.24, "learning_rate": 4.994431417991807e-05, "loss": 2.1923, "step": 2650 }, { "epoch": 0.24, "learning_rate": 4.9942588251136445e-05, "loss": 2.1252, "step": 2660 }, { "epoch": 0.25, "learning_rate": 4.994083601297166e-05, "loss": 2.1784, "step": 2670 }, { "epoch": 0.25, "learning_rate": 4.993905746727198e-05, "loss": 2.1358, "step": 2680 }, { "epoch": 0.25, "learning_rate": 4.993725261591341e-05, "loss": 2.1357, "step": 2690 }, { "epoch": 0.25, "learning_rate": 4.993542146079971e-05, "loss": 2.0982, "step": 2700 }, { "epoch": 0.25, "learning_rate": 4.993356400386236e-05, "loss": 2.1236, "step": 2710 }, { "epoch": 0.25, "learning_rate": 4.993168024706063e-05, "loss": 2.2397, "step": 2720 }, { "epoch": 0.25, "learning_rate": 4.992977019238148e-05, "loss": 2.1677, "step": 2730 }, { "epoch": 0.25, "learning_rate": 4.9927833841839636e-05, "loss": 2.1817, "step": 2740 }, { "epoch": 0.25, "learning_rate": 4.9925871197477556e-05, "loss": 2.1726, "step": 2750 }, { "epoch": 0.25, "learning_rate": 4.992388226136544e-05, "loss": 2.1257, "step": 2760 }, { "epoch": 0.25, "learning_rate": 4.99218670356012e-05, "loss": 2.16, "step": 2770 }, { "epoch": 0.26, "learning_rate": 4.9919825522310505e-05, "loss": 2.1605, "step": 2780 }, { "epoch": 0.26, "learning_rate": 4.991775772364672e-05, "loss": 2.1732, "step": 2790 }, { "epoch": 0.26, "learning_rate": 4.9915663641790975e-05, "loss": 2.1598, "step": 2800 }, { "epoch": 0.26, "learning_rate": 4.9913543278952093e-05, "loss": 2.1661, "step": 2810 }, { "epoch": 0.26, "learning_rate": 4.9911396637366625e-05, "loss": 2.1079, "step": 2820 }, { "epoch": 0.26, "learning_rate": 4.990922371929885e-05, "loss": 2.1995, "step": 2830 }, { "epoch": 0.26, "learning_rate": 4.9907024527040746e-05, "loss": 2.1806, "step": 2840 }, { "epoch": 0.26, "learning_rate": 4.990479906291203e-05, "loss": 2.146, "step": 2850 }, { "epoch": 0.26, "learning_rate": 4.9902547329260105e-05, "loss": 2.1917, "step": 2860 }, { "epoch": 0.26, "learning_rate": 4.990026932846011e-05, "loss": 2.1145, "step": 2870 }, { "epoch": 0.27, "learning_rate": 4.9897965062914856e-05, "loss": 2.1512, "step": 2880 }, { "epoch": 0.27, "learning_rate": 4.989563453505488e-05, "loss": 2.1789, "step": 2890 }, { "epoch": 0.27, "learning_rate": 4.989327774733843e-05, "loss": 2.1492, "step": 2900 }, { "epoch": 0.27, "learning_rate": 4.9890894702251426e-05, "loss": 2.1642, "step": 2910 }, { "epoch": 0.27, "learning_rate": 4.988848540230751e-05, "loss": 2.171, "step": 2920 }, { "epoch": 0.27, "learning_rate": 4.988604985004799e-05, "loss": 2.1889, "step": 2930 }, { "epoch": 0.27, "learning_rate": 4.98835880480419e-05, "loss": 2.1344, "step": 2940 }, { "epoch": 0.27, "learning_rate": 4.988109999888593e-05, "loss": 2.1493, "step": 2950 }, { "epoch": 0.27, "learning_rate": 4.987858570520447e-05, "loss": 2.1232, "step": 2960 }, { "epoch": 0.27, "learning_rate": 4.98760451696496e-05, "loss": 2.1579, "step": 2970 }, { "epoch": 0.27, "learning_rate": 4.987347839490106e-05, "loss": 2.1448, "step": 2980 }, { "epoch": 0.28, "learning_rate": 4.987088538366628e-05, "loss": 2.1364, "step": 2990 }, { "epoch": 0.28, "learning_rate": 4.9868266138680366e-05, "loss": 2.1336, "step": 3000 }, { "epoch": 0.28, "learning_rate": 4.986562066270609e-05, "loss": 2.1686, "step": 3010 }, { "epoch": 0.28, "learning_rate": 4.9862948958533896e-05, "loss": 2.1615, "step": 3020 }, { "epoch": 0.28, "learning_rate": 4.986025102898188e-05, "loss": 2.1301, "step": 3030 }, { "epoch": 0.28, "learning_rate": 4.985752687689583e-05, "loss": 2.1842, "step": 3040 }, { "epoch": 0.28, "learning_rate": 4.985477650514916e-05, "loss": 2.0933, "step": 3050 }, { "epoch": 0.28, "learning_rate": 4.985199991664297e-05, "loss": 2.1257, "step": 3060 }, { "epoch": 0.28, "learning_rate": 4.984919711430598e-05, "loss": 2.109, "step": 3070 }, { "epoch": 0.28, "learning_rate": 4.98463681010946e-05, "loss": 2.1242, "step": 3080 }, { "epoch": 0.28, "learning_rate": 4.984351287999284e-05, "loss": 2.1459, "step": 3090 }, { "epoch": 0.29, "learning_rate": 4.984063145401241e-05, "loss": 2.121, "step": 3100 }, { "epoch": 0.29, "learning_rate": 4.983772382619261e-05, "loss": 2.1025, "step": 3110 }, { "epoch": 0.29, "learning_rate": 4.983478999960041e-05, "loss": 2.1247, "step": 3120 }, { "epoch": 0.29, "learning_rate": 4.9831829977330405e-05, "loss": 2.1086, "step": 3130 }, { "epoch": 0.29, "learning_rate": 4.982884376250481e-05, "loss": 2.1499, "step": 3140 }, { "epoch": 0.29, "learning_rate": 4.982583135827348e-05, "loss": 2.1761, "step": 3150 }, { "epoch": 0.29, "learning_rate": 4.98227927678139e-05, "loss": 2.1295, "step": 3160 }, { "epoch": 0.29, "learning_rate": 4.981972799433115e-05, "loss": 2.1109, "step": 3170 }, { "epoch": 0.29, "learning_rate": 4.9816637041057965e-05, "loss": 2.1533, "step": 3180 }, { "epoch": 0.29, "learning_rate": 4.981351991125467e-05, "loss": 2.1885, "step": 3190 }, { "epoch": 0.29, "learning_rate": 4.98103766082092e-05, "loss": 2.1577, "step": 3200 }, { "epoch": 0.3, "learning_rate": 4.980720713523711e-05, "loss": 2.1182, "step": 3210 }, { "epoch": 0.3, "learning_rate": 4.980401149568155e-05, "loss": 2.1276, "step": 3220 }, { "epoch": 0.3, "learning_rate": 4.980078969291327e-05, "loss": 2.156, "step": 3230 }, { "epoch": 0.3, "learning_rate": 4.979754173033063e-05, "loss": 2.0985, "step": 3240 }, { "epoch": 0.3, "learning_rate": 4.979426761135956e-05, "loss": 2.1673, "step": 3250 }, { "epoch": 0.3, "learning_rate": 4.97909673394536e-05, "loss": 2.1402, "step": 3260 }, { "epoch": 0.3, "learning_rate": 4.978764091809387e-05, "loss": 2.1441, "step": 3270 }, { "epoch": 0.3, "learning_rate": 4.9784288350789064e-05, "loss": 2.1406, "step": 3280 }, { "epoch": 0.3, "learning_rate": 4.978090964107548e-05, "loss": 2.1072, "step": 3290 }, { "epoch": 0.3, "learning_rate": 4.977750479251695e-05, "loss": 2.1438, "step": 3300 }, { "epoch": 0.3, "learning_rate": 4.977407380870492e-05, "loss": 2.1384, "step": 3310 }, { "epoch": 0.31, "learning_rate": 4.977061669325837e-05, "loss": 2.1293, "step": 3320 }, { "epoch": 0.31, "learning_rate": 4.9767133449823864e-05, "loss": 2.1121, "step": 3330 }, { "epoch": 0.31, "learning_rate": 4.976362408207551e-05, "loss": 2.1541, "step": 3340 }, { "epoch": 0.31, "learning_rate": 4.976008859371499e-05, "loss": 2.0867, "step": 3350 }, { "epoch": 0.31, "learning_rate": 4.9756526988471534e-05, "loss": 2.0785, "step": 3360 }, { "epoch": 0.31, "learning_rate": 4.97529392701019e-05, "loss": 2.1422, "step": 3370 }, { "epoch": 0.31, "learning_rate": 4.974932544239042e-05, "loss": 2.1225, "step": 3380 }, { "epoch": 0.31, "learning_rate": 4.9745685509148936e-05, "loss": 2.1567, "step": 3390 }, { "epoch": 0.31, "learning_rate": 4.974201947421685e-05, "loss": 2.112, "step": 3400 }, { "epoch": 0.31, "learning_rate": 4.973832734146109e-05, "loss": 2.0931, "step": 3410 }, { "epoch": 0.31, "learning_rate": 4.9734609114776096e-05, "loss": 2.0682, "step": 3420 }, { "epoch": 0.32, "learning_rate": 4.973086479808385e-05, "loss": 2.1965, "step": 3430 }, { "epoch": 0.32, "learning_rate": 4.972709439533386e-05, "loss": 2.1135, "step": 3440 }, { "epoch": 0.32, "learning_rate": 4.972329791050312e-05, "loss": 2.1633, "step": 3450 }, { "epoch": 0.32, "learning_rate": 4.9719475347596174e-05, "loss": 2.2322, "step": 3460 }, { "epoch": 0.32, "learning_rate": 4.971562671064504e-05, "loss": 2.1525, "step": 3470 }, { "epoch": 0.32, "learning_rate": 4.971175200370924e-05, "loss": 2.106, "step": 3480 }, { "epoch": 0.32, "learning_rate": 4.970785123087582e-05, "loss": 2.1747, "step": 3490 }, { "epoch": 0.32, "learning_rate": 4.97039243962593e-05, "loss": 2.1026, "step": 3500 }, { "epoch": 0.32, "learning_rate": 4.9699971504001716e-05, "loss": 2.1309, "step": 3510 }, { "epoch": 0.32, "learning_rate": 4.969599255827254e-05, "loss": 2.1428, "step": 3520 }, { "epoch": 0.32, "learning_rate": 4.9691987563268774e-05, "loss": 2.1619, "step": 3530 }, { "epoch": 0.33, "learning_rate": 4.968795652321486e-05, "loss": 2.189, "step": 3540 }, { "epoch": 0.33, "learning_rate": 4.9683899442362744e-05, "loss": 2.1222, "step": 3550 }, { "epoch": 0.33, "learning_rate": 4.9679816324991826e-05, "loss": 2.1128, "step": 3560 }, { "epoch": 0.33, "learning_rate": 4.967570717540896e-05, "loss": 2.169, "step": 3570 }, { "epoch": 0.33, "learning_rate": 4.967157199794848e-05, "loss": 2.0696, "step": 3580 }, { "epoch": 0.33, "learning_rate": 4.9667410796972133e-05, "loss": 2.155, "step": 3590 }, { "epoch": 0.33, "learning_rate": 4.966322357686918e-05, "loss": 2.1569, "step": 3600 }, { "epoch": 0.33, "learning_rate": 4.9659010342056266e-05, "loss": 2.1152, "step": 3610 }, { "epoch": 0.33, "learning_rate": 4.965477109697751e-05, "loss": 2.1587, "step": 3620 }, { "epoch": 0.33, "learning_rate": 4.9650505846104466e-05, "loss": 2.0674, "step": 3630 }, { "epoch": 0.34, "learning_rate": 4.964621459393609e-05, "loss": 2.0963, "step": 3640 }, { "epoch": 0.34, "learning_rate": 4.964189734499881e-05, "loss": 2.1112, "step": 3650 }, { "epoch": 0.34, "learning_rate": 4.963755410384643e-05, "loss": 2.0924, "step": 3660 }, { "epoch": 0.34, "learning_rate": 4.963318487506019e-05, "loss": 2.1413, "step": 3670 }, { "epoch": 0.34, "learning_rate": 4.962878966324875e-05, "loss": 2.1106, "step": 3680 }, { "epoch": 0.34, "learning_rate": 4.962436847304818e-05, "loss": 2.1188, "step": 3690 }, { "epoch": 0.34, "learning_rate": 4.961992130912192e-05, "loss": 2.1311, "step": 3700 }, { "epoch": 0.34, "learning_rate": 4.961544817616084e-05, "loss": 2.1232, "step": 3710 }, { "epoch": 0.34, "learning_rate": 4.9610949078883186e-05, "loss": 2.1166, "step": 3720 }, { "epoch": 0.34, "learning_rate": 4.960642402203459e-05, "loss": 2.1116, "step": 3730 }, { "epoch": 0.34, "learning_rate": 4.960187301038809e-05, "loss": 2.1006, "step": 3740 }, { "epoch": 0.35, "learning_rate": 4.959729604874406e-05, "loss": 2.1307, "step": 3750 }, { "epoch": 0.35, "learning_rate": 4.959269314193028e-05, "loss": 2.13, "step": 3760 }, { "epoch": 0.35, "learning_rate": 4.9588064294801884e-05, "loss": 2.1217, "step": 3770 }, { "epoch": 0.35, "learning_rate": 4.9583409512241374e-05, "loss": 2.0358, "step": 3780 }, { "epoch": 0.35, "learning_rate": 4.9578728799158594e-05, "loss": 2.0745, "step": 3790 }, { "epoch": 0.35, "learning_rate": 4.957402216049075e-05, "loss": 2.1031, "step": 3800 }, { "epoch": 0.35, "learning_rate": 4.95692896012024e-05, "loss": 2.0909, "step": 3810 }, { "epoch": 0.35, "learning_rate": 4.956453112628543e-05, "loss": 2.1465, "step": 3820 }, { "epoch": 0.35, "learning_rate": 4.955974674075907e-05, "loss": 2.0565, "step": 3830 }, { "epoch": 0.35, "learning_rate": 4.9554936449669884e-05, "loss": 2.0841, "step": 3840 }, { "epoch": 0.35, "learning_rate": 4.955010025809176e-05, "loss": 2.1026, "step": 3850 }, { "epoch": 0.36, "learning_rate": 4.9545238171125875e-05, "loss": 2.1488, "step": 3860 }, { "epoch": 0.36, "learning_rate": 4.954035019390078e-05, "loss": 2.0649, "step": 3870 }, { "epoch": 0.36, "learning_rate": 4.953543633157227e-05, "loss": 2.074, "step": 3880 }, { "epoch": 0.36, "learning_rate": 4.9530496589323504e-05, "loss": 2.1104, "step": 3890 }, { "epoch": 0.36, "learning_rate": 4.9525530972364896e-05, "loss": 2.0534, "step": 3900 }, { "epoch": 0.36, "learning_rate": 4.952053948593417e-05, "loss": 2.1115, "step": 3910 }, { "epoch": 0.36, "learning_rate": 4.951552213529633e-05, "loss": 2.1284, "step": 3920 }, { "epoch": 0.36, "learning_rate": 4.9510478925743666e-05, "loss": 2.1579, "step": 3930 }, { "epoch": 0.36, "learning_rate": 4.9505409862595755e-05, "loss": 2.1443, "step": 3940 }, { "epoch": 0.36, "learning_rate": 4.950031495119942e-05, "loss": 2.0533, "step": 3950 }, { "epoch": 0.36, "learning_rate": 4.949519419692876e-05, "loss": 2.0863, "step": 3960 }, { "epoch": 0.37, "learning_rate": 4.949004760518515e-05, "loss": 2.0886, "step": 3970 }, { "epoch": 0.37, "learning_rate": 4.948487518139719e-05, "loss": 2.1446, "step": 3980 }, { "epoch": 0.37, "learning_rate": 4.9479676931020744e-05, "loss": 2.1257, "step": 3990 }, { "epoch": 0.37, "learning_rate": 4.947445285953891e-05, "loss": 2.0812, "step": 4000 }, { "epoch": 0.37, "learning_rate": 4.946920297246203e-05, "loss": 2.1239, "step": 4010 }, { "epoch": 0.37, "learning_rate": 4.946392727532767e-05, "loss": 2.0931, "step": 4020 }, { "epoch": 0.37, "learning_rate": 4.945862577370064e-05, "loss": 2.0685, "step": 4030 }, { "epoch": 0.37, "learning_rate": 4.9453298473172924e-05, "loss": 2.0697, "step": 4040 }, { "epoch": 0.37, "learning_rate": 4.944794537936376e-05, "loss": 2.1232, "step": 4050 }, { "epoch": 0.37, "learning_rate": 4.944256649791958e-05, "loss": 2.0633, "step": 4060 }, { "epoch": 0.37, "learning_rate": 4.943716183451401e-05, "loss": 2.1236, "step": 4070 }, { "epoch": 0.38, "learning_rate": 4.943173139484788e-05, "loss": 2.0687, "step": 4080 }, { "epoch": 0.38, "learning_rate": 4.942627518464921e-05, "loss": 2.142, "step": 4090 }, { "epoch": 0.38, "learning_rate": 4.9420793209673174e-05, "loss": 2.1352, "step": 4100 }, { "epoch": 0.38, "learning_rate": 4.941528547570218e-05, "loss": 2.1581, "step": 4110 }, { "epoch": 0.38, "learning_rate": 4.9409751988545754e-05, "loss": 2.0829, "step": 4120 }, { "epoch": 0.38, "learning_rate": 4.9404192754040604e-05, "loss": 2.0949, "step": 4130 }, { "epoch": 0.38, "learning_rate": 4.939860777805061e-05, "loss": 2.1316, "step": 4140 }, { "epoch": 0.38, "learning_rate": 4.9392997066466775e-05, "loss": 2.0814, "step": 4150 }, { "epoch": 0.38, "learning_rate": 4.938736062520727e-05, "loss": 2.1062, "step": 4160 }, { "epoch": 0.38, "learning_rate": 4.9381698460217405e-05, "loss": 2.0182, "step": 4170 }, { "epoch": 0.38, "learning_rate": 4.9376010577469615e-05, "loss": 2.1192, "step": 4180 }, { "epoch": 0.39, "learning_rate": 4.937029698296347e-05, "loss": 2.13, "step": 4190 }, { "epoch": 0.39, "learning_rate": 4.9364557682725646e-05, "loss": 2.1034, "step": 4200 }, { "epoch": 0.39, "learning_rate": 4.935879268280995e-05, "loss": 2.1121, "step": 4210 }, { "epoch": 0.39, "learning_rate": 4.935300198929729e-05, "loss": 2.0715, "step": 4220 }, { "epoch": 0.39, "learning_rate": 4.9347185608295674e-05, "loss": 2.074, "step": 4230 }, { "epoch": 0.39, "learning_rate": 4.9341343545940204e-05, "loss": 2.1005, "step": 4240 }, { "epoch": 0.39, "learning_rate": 4.933547580839309e-05, "loss": 2.08, "step": 4250 }, { "epoch": 0.39, "learning_rate": 4.932958240184358e-05, "loss": 2.0833, "step": 4260 }, { "epoch": 0.39, "learning_rate": 4.9323663332508044e-05, "loss": 2.1219, "step": 4270 }, { "epoch": 0.39, "learning_rate": 4.9317718606629906e-05, "loss": 2.0737, "step": 4280 }, { "epoch": 0.39, "learning_rate": 4.931174823047964e-05, "loss": 2.1169, "step": 4290 }, { "epoch": 0.4, "learning_rate": 4.930575221035478e-05, "loss": 2.0806, "step": 4300 }, { "epoch": 0.4, "learning_rate": 4.929973055257992e-05, "loss": 2.0884, "step": 4310 }, { "epoch": 0.4, "learning_rate": 4.92936832635067e-05, "loss": 2.0731, "step": 4320 }, { "epoch": 0.4, "learning_rate": 4.9287610349513766e-05, "loss": 2.0991, "step": 4330 }, { "epoch": 0.4, "learning_rate": 4.928151181700683e-05, "loss": 2.0704, "step": 4340 }, { "epoch": 0.4, "learning_rate": 4.92753876724186e-05, "loss": 2.0926, "step": 4350 }, { "epoch": 0.4, "learning_rate": 4.926923792220881e-05, "loss": 2.0614, "step": 4360 }, { "epoch": 0.4, "learning_rate": 4.9263062572864214e-05, "loss": 2.0228, "step": 4370 }, { "epoch": 0.4, "learning_rate": 4.9256861630898544e-05, "loss": 2.0758, "step": 4380 }, { "epoch": 0.4, "learning_rate": 4.9250635102852536e-05, "loss": 2.0423, "step": 4390 }, { "epoch": 0.41, "learning_rate": 4.924438299529394e-05, "loss": 2.0883, "step": 4400 }, { "epoch": 0.41, "learning_rate": 4.9238105314817426e-05, "loss": 2.1273, "step": 4410 }, { "epoch": 0.41, "learning_rate": 4.923180206804471e-05, "loss": 2.0977, "step": 4420 }, { "epoch": 0.41, "learning_rate": 4.922547326162442e-05, "loss": 2.11, "step": 4430 }, { "epoch": 0.41, "learning_rate": 4.921911890223219e-05, "loss": 2.1392, "step": 4440 }, { "epoch": 0.41, "learning_rate": 4.921273899657055e-05, "loss": 2.102, "step": 4450 }, { "epoch": 0.41, "learning_rate": 4.920633355136904e-05, "loss": 2.1199, "step": 4460 }, { "epoch": 0.41, "learning_rate": 4.9199902573384095e-05, "loss": 2.0724, "step": 4470 }, { "epoch": 0.41, "learning_rate": 4.91934460693991e-05, "loss": 2.0901, "step": 4480 }, { "epoch": 0.41, "learning_rate": 4.918696404622435e-05, "loss": 2.1605, "step": 4490 }, { "epoch": 0.41, "learning_rate": 4.918045651069708e-05, "loss": 2.0648, "step": 4500 }, { "epoch": 0.42, "learning_rate": 4.917392346968142e-05, "loss": 2.0649, "step": 4510 }, { "epoch": 0.42, "learning_rate": 4.91673649300684e-05, "loss": 2.0794, "step": 4520 }, { "epoch": 0.42, "learning_rate": 4.916078089877596e-05, "loss": 2.0358, "step": 4530 }, { "epoch": 0.42, "learning_rate": 4.915417138274891e-05, "loss": 2.016, "step": 4540 }, { "epoch": 0.42, "learning_rate": 4.914753638895897e-05, "loss": 2.1051, "step": 4550 }, { "epoch": 0.42, "learning_rate": 4.914087592440469e-05, "loss": 2.0973, "step": 4560 }, { "epoch": 0.42, "learning_rate": 4.9134189996111543e-05, "loss": 2.1027, "step": 4570 }, { "epoch": 0.42, "learning_rate": 4.9127478611131805e-05, "loss": 2.067, "step": 4580 }, { "epoch": 0.42, "learning_rate": 4.912074177654464e-05, "loss": 2.0473, "step": 4590 }, { "epoch": 0.42, "learning_rate": 4.911397949945604e-05, "loss": 2.0913, "step": 4600 }, { "epoch": 0.42, "learning_rate": 4.910719178699885e-05, "loss": 2.0299, "step": 4610 }, { "epoch": 0.43, "learning_rate": 4.910037864633272e-05, "loss": 2.0801, "step": 4620 }, { "epoch": 0.43, "learning_rate": 4.909354008464414e-05, "loss": 2.0653, "step": 4630 }, { "epoch": 0.43, "learning_rate": 4.908667610914641e-05, "loss": 2.0744, "step": 4640 }, { "epoch": 0.43, "learning_rate": 4.907978672707964e-05, "loss": 2.0834, "step": 4650 }, { "epoch": 0.43, "learning_rate": 4.9072871945710717e-05, "loss": 2.0825, "step": 4660 }, { "epoch": 0.43, "learning_rate": 4.9065931772333354e-05, "loss": 2.0583, "step": 4670 }, { "epoch": 0.43, "learning_rate": 4.905896621426802e-05, "loss": 2.1035, "step": 4680 }, { "epoch": 0.43, "learning_rate": 4.905197527886197e-05, "loss": 2.0679, "step": 4690 }, { "epoch": 0.43, "learning_rate": 4.904495897348923e-05, "loss": 2.0986, "step": 4700 }, { "epoch": 0.43, "learning_rate": 4.903791730555058e-05, "loss": 2.0929, "step": 4710 }, { "epoch": 0.43, "learning_rate": 4.903085028247355e-05, "loss": 2.1453, "step": 4720 }, { "epoch": 0.44, "learning_rate": 4.902375791171243e-05, "loss": 2.0712, "step": 4730 }, { "epoch": 0.44, "learning_rate": 4.901664020074823e-05, "loss": 2.0741, "step": 4740 }, { "epoch": 0.44, "learning_rate": 4.9009497157088695e-05, "loss": 2.0169, "step": 4750 }, { "epoch": 0.44, "learning_rate": 4.9002328788268295e-05, "loss": 2.0796, "step": 4760 }, { "epoch": 0.44, "learning_rate": 4.899513510184819e-05, "loss": 2.0417, "step": 4770 }, { "epoch": 0.44, "learning_rate": 4.898791610541629e-05, "loss": 2.0768, "step": 4780 }, { "epoch": 0.44, "learning_rate": 4.8980671806587155e-05, "loss": 2.068, "step": 4790 }, { "epoch": 0.44, "learning_rate": 4.897340221300206e-05, "loss": 2.0589, "step": 4800 }, { "epoch": 0.44, "learning_rate": 4.896610733232896e-05, "loss": 2.0629, "step": 4810 }, { "epoch": 0.44, "learning_rate": 4.8958787172262474e-05, "loss": 2.0676, "step": 4820 }, { "epoch": 0.44, "learning_rate": 4.895144174052389e-05, "loss": 2.0978, "step": 4830 }, { "epoch": 0.45, "learning_rate": 4.894407104486116e-05, "loss": 2.1134, "step": 4840 }, { "epoch": 0.45, "learning_rate": 4.8936675093048867e-05, "loss": 2.0558, "step": 4850 }, { "epoch": 0.45, "learning_rate": 4.892925389288824e-05, "loss": 2.0676, "step": 4860 }, { "epoch": 0.45, "learning_rate": 4.8921807452207156e-05, "loss": 2.0756, "step": 4870 }, { "epoch": 0.45, "learning_rate": 4.8914335778860086e-05, "loss": 2.0837, "step": 4880 }, { "epoch": 0.45, "learning_rate": 4.890683888072814e-05, "loss": 2.0624, "step": 4890 }, { "epoch": 0.45, "learning_rate": 4.889931676571905e-05, "loss": 2.081, "step": 4900 }, { "epoch": 0.45, "learning_rate": 4.889176944176709e-05, "loss": 2.0217, "step": 4910 }, { "epoch": 0.45, "learning_rate": 4.888419691683318e-05, "loss": 2.0362, "step": 4920 }, { "epoch": 0.45, "learning_rate": 4.8876599198904806e-05, "loss": 1.9754, "step": 4930 }, { "epoch": 0.45, "learning_rate": 4.8868976295996006e-05, "loss": 2.1155, "step": 4940 }, { "epoch": 0.46, "learning_rate": 4.8861328216147415e-05, "loss": 2.0996, "step": 4950 }, { "epoch": 0.46, "learning_rate": 4.8853654967426196e-05, "loss": 2.075, "step": 4960 }, { "epoch": 0.46, "learning_rate": 4.884595655792609e-05, "loss": 2.0857, "step": 4970 }, { "epoch": 0.46, "learning_rate": 4.883823299576734e-05, "loss": 1.9819, "step": 4980 }, { "epoch": 0.46, "learning_rate": 4.883048428909676e-05, "loss": 2.0781, "step": 4990 }, { "epoch": 0.46, "learning_rate": 4.8822710446087655e-05, "loss": 2.0685, "step": 5000 }, { "epoch": 0.46, "learning_rate": 4.881491147493987e-05, "loss": 2.0944, "step": 5010 }, { "epoch": 0.46, "learning_rate": 4.880708738387972e-05, "loss": 2.066, "step": 5020 }, { "epoch": 0.46, "learning_rate": 4.879923818116007e-05, "loss": 2.1074, "step": 5030 }, { "epoch": 0.46, "learning_rate": 4.8791363875060204e-05, "loss": 2.0623, "step": 5040 }, { "epoch": 0.46, "learning_rate": 4.8783464473885945e-05, "loss": 2.051, "step": 5050 }, { "epoch": 0.47, "learning_rate": 4.877553998596955e-05, "loss": 2.0249, "step": 5060 }, { "epoch": 0.47, "learning_rate": 4.876759041966977e-05, "loss": 2.0144, "step": 5070 }, { "epoch": 0.47, "learning_rate": 4.875961578337177e-05, "loss": 2.0864, "step": 5080 }, { "epoch": 0.47, "learning_rate": 4.875161608548718e-05, "loss": 2.117, "step": 5090 }, { "epoch": 0.47, "learning_rate": 4.8743591334454063e-05, "loss": 2.0896, "step": 5100 }, { "epoch": 0.47, "learning_rate": 4.8735541538736915e-05, "loss": 2.095, "step": 5110 }, { "epoch": 0.47, "learning_rate": 4.872746670682663e-05, "loss": 2.0462, "step": 5120 }, { "epoch": 0.47, "learning_rate": 4.871936684724053e-05, "loss": 2.045, "step": 5130 }, { "epoch": 0.47, "learning_rate": 4.8711241968522335e-05, "loss": 2.0629, "step": 5140 }, { "epoch": 0.47, "learning_rate": 4.870309207924213e-05, "loss": 2.1329, "step": 5150 }, { "epoch": 0.47, "learning_rate": 4.869491718799641e-05, "loss": 2.0891, "step": 5160 }, { "epoch": 0.48, "learning_rate": 4.8686717303408024e-05, "loss": 2.0423, "step": 5170 }, { "epoch": 0.48, "learning_rate": 4.86784924341262e-05, "loss": 2.0797, "step": 5180 }, { "epoch": 0.48, "learning_rate": 4.8670242588826495e-05, "loss": 2.1249, "step": 5190 }, { "epoch": 0.48, "learning_rate": 4.8661967776210855e-05, "loss": 2.0409, "step": 5200 }, { "epoch": 0.48, "learning_rate": 4.86536680050075e-05, "loss": 2.1039, "step": 5210 }, { "epoch": 0.48, "learning_rate": 4.864534328397103e-05, "loss": 2.1059, "step": 5220 }, { "epoch": 0.48, "learning_rate": 4.863699362188234e-05, "loss": 2.0376, "step": 5230 }, { "epoch": 0.48, "learning_rate": 4.8628619027548624e-05, "loss": 2.0883, "step": 5240 }, { "epoch": 0.48, "learning_rate": 4.8620219509803386e-05, "loss": 2.0463, "step": 5250 }, { "epoch": 0.48, "learning_rate": 4.861179507750643e-05, "loss": 2.0796, "step": 5260 }, { "epoch": 0.49, "learning_rate": 4.860334573954381e-05, "loss": 2.0386, "step": 5270 }, { "epoch": 0.49, "learning_rate": 4.8594871504827874e-05, "loss": 2.0243, "step": 5280 }, { "epoch": 0.49, "learning_rate": 4.858637238229724e-05, "loss": 2.0798, "step": 5290 }, { "epoch": 0.49, "learning_rate": 4.857784838091673e-05, "loss": 2.0232, "step": 5300 }, { "epoch": 0.49, "learning_rate": 4.856929950967747e-05, "loss": 2.0282, "step": 5310 }, { "epoch": 0.49, "learning_rate": 4.8560725777596784e-05, "loss": 2.0654, "step": 5320 }, { "epoch": 0.49, "learning_rate": 4.855212719371821e-05, "loss": 2.0741, "step": 5330 }, { "epoch": 0.49, "learning_rate": 4.854350376711153e-05, "loss": 2.0829, "step": 5340 }, { "epoch": 0.49, "learning_rate": 4.85348555068727e-05, "loss": 2.0889, "step": 5350 }, { "epoch": 0.49, "learning_rate": 4.852618242212389e-05, "loss": 2.0612, "step": 5360 }, { "epoch": 0.49, "learning_rate": 4.8517484522013456e-05, "loss": 2.0253, "step": 5370 }, { "epoch": 0.5, "learning_rate": 4.850876181571592e-05, "loss": 2.0571, "step": 5380 }, { "epoch": 0.5, "learning_rate": 4.850001431243196e-05, "loss": 2.0878, "step": 5390 }, { "epoch": 0.5, "learning_rate": 4.849124202138844e-05, "loss": 2.0735, "step": 5400 }, { "epoch": 0.5, "learning_rate": 4.8482444951838335e-05, "loss": 2.0973, "step": 5410 }, { "epoch": 0.5, "learning_rate": 4.8473623113060785e-05, "loss": 2.0992, "step": 5420 }, { "epoch": 0.5, "learning_rate": 4.846477651436104e-05, "loss": 2.1126, "step": 5430 }, { "epoch": 0.5, "learning_rate": 4.845590516507047e-05, "loss": 2.0833, "step": 5440 }, { "epoch": 0.5, "learning_rate": 4.844700907454656e-05, "loss": 2.094, "step": 5450 }, { "epoch": 0.5, "learning_rate": 4.8438088252172876e-05, "loss": 2.0654, "step": 5460 }, { "epoch": 0.5, "learning_rate": 4.8429142707359086e-05, "loss": 2.0737, "step": 5470 }, { "epoch": 0.5, "learning_rate": 4.8420172449540935e-05, "loss": 2.082, "step": 5480 }, { "epoch": 0.51, "learning_rate": 4.8411177488180214e-05, "loss": 2.1008, "step": 5490 }, { "epoch": 0.51, "learning_rate": 4.8402157832764804e-05, "loss": 2.0081, "step": 5500 }, { "epoch": 0.51, "learning_rate": 4.8393113492808605e-05, "loss": 2.0616, "step": 5510 }, { "epoch": 0.51, "learning_rate": 4.838404447785157e-05, "loss": 2.0429, "step": 5520 }, { "epoch": 0.51, "learning_rate": 4.837495079745967e-05, "loss": 2.0703, "step": 5530 }, { "epoch": 0.51, "learning_rate": 4.83658324612249e-05, "loss": 2.0606, "step": 5540 }, { "epoch": 0.51, "learning_rate": 4.835668947876527e-05, "loss": 2.0481, "step": 5550 }, { "epoch": 0.51, "learning_rate": 4.834752185972476e-05, "loss": 2.0412, "step": 5560 }, { "epoch": 0.51, "learning_rate": 4.833832961377336e-05, "loss": 2.1465, "step": 5570 }, { "epoch": 0.51, "learning_rate": 4.832911275060703e-05, "loss": 2.0141, "step": 5580 }, { "epoch": 0.51, "learning_rate": 4.8319871279947694e-05, "loss": 2.0978, "step": 5590 }, { "epoch": 0.52, "learning_rate": 4.831060521154324e-05, "loss": 2.0936, "step": 5600 }, { "epoch": 0.52, "learning_rate": 4.8301314555167474e-05, "loss": 2.0648, "step": 5610 }, { "epoch": 0.52, "learning_rate": 4.829199932062019e-05, "loss": 2.0815, "step": 5620 }, { "epoch": 0.52, "learning_rate": 4.828265951772705e-05, "loss": 2.0842, "step": 5630 }, { "epoch": 0.52, "learning_rate": 4.827329515633966e-05, "loss": 2.0444, "step": 5640 }, { "epoch": 0.52, "learning_rate": 4.826390624633554e-05, "loss": 2.0528, "step": 5650 }, { "epoch": 0.52, "learning_rate": 4.825449279761808e-05, "loss": 2.0856, "step": 5660 }, { "epoch": 0.52, "learning_rate": 4.824505482011656e-05, "loss": 2.0136, "step": 5670 }, { "epoch": 0.52, "learning_rate": 4.8235592323786144e-05, "loss": 2.0499, "step": 5680 }, { "epoch": 0.52, "learning_rate": 4.822610531860785e-05, "loss": 2.0279, "step": 5690 }, { "epoch": 0.52, "learning_rate": 4.821659381458854e-05, "loss": 2.0135, "step": 5700 }, { "epoch": 0.53, "learning_rate": 4.820705782176094e-05, "loss": 2.0751, "step": 5710 }, { "epoch": 0.53, "learning_rate": 4.819749735018358e-05, "loss": 2.0757, "step": 5720 }, { "epoch": 0.53, "learning_rate": 4.8187912409940825e-05, "loss": 2.0975, "step": 5730 }, { "epoch": 0.53, "learning_rate": 4.8178303011142855e-05, "loss": 2.0728, "step": 5740 }, { "epoch": 0.53, "learning_rate": 4.8168669163925636e-05, "loss": 2.0712, "step": 5750 }, { "epoch": 0.53, "learning_rate": 4.8159010878450936e-05, "loss": 2.0569, "step": 5760 }, { "epoch": 0.53, "learning_rate": 4.814932816490627e-05, "loss": 2.0942, "step": 5770 }, { "epoch": 0.53, "learning_rate": 4.8139621033504964e-05, "loss": 2.0392, "step": 5780 }, { "epoch": 0.53, "learning_rate": 4.8129889494486055e-05, "loss": 2.0597, "step": 5790 }, { "epoch": 0.53, "learning_rate": 4.812013355811438e-05, "loss": 2.009, "step": 5800 }, { "epoch": 0.53, "learning_rate": 4.8110353234680437e-05, "loss": 2.0831, "step": 5810 }, { "epoch": 0.54, "learning_rate": 4.810054853450052e-05, "loss": 2.0138, "step": 5820 }, { "epoch": 0.54, "learning_rate": 4.80907194679166e-05, "loss": 2.0988, "step": 5830 }, { "epoch": 0.54, "learning_rate": 4.8080866045296336e-05, "loss": 2.0352, "step": 5840 }, { "epoch": 0.54, "learning_rate": 4.8070988277033105e-05, "loss": 2.0231, "step": 5850 }, { "epoch": 0.54, "learning_rate": 4.806108617354595e-05, "loss": 2.046, "step": 5860 }, { "epoch": 0.54, "learning_rate": 4.80511597452796e-05, "loss": 2.0531, "step": 5870 }, { "epoch": 0.54, "learning_rate": 4.8041209002704415e-05, "loss": 2.064, "step": 5880 }, { "epoch": 0.54, "learning_rate": 4.803123395631641e-05, "loss": 1.9861, "step": 5890 }, { "epoch": 0.54, "learning_rate": 4.8021234616637255e-05, "loss": 2.1135, "step": 5900 }, { "epoch": 0.54, "learning_rate": 4.801121099421422e-05, "loss": 2.0684, "step": 5910 }, { "epoch": 0.54, "learning_rate": 4.8001163099620205e-05, "loss": 2.066, "step": 5920 }, { "epoch": 0.55, "learning_rate": 4.79910909434537e-05, "loss": 2.0571, "step": 5930 }, { "epoch": 0.55, "learning_rate": 4.7980994536338786e-05, "loss": 2.02, "step": 5940 }, { "epoch": 0.55, "learning_rate": 4.797087388892514e-05, "loss": 2.0463, "step": 5950 }, { "epoch": 0.55, "learning_rate": 4.7960729011887995e-05, "loss": 2.0875, "step": 5960 }, { "epoch": 0.55, "learning_rate": 4.7950559915928136e-05, "loss": 2.0259, "step": 5970 }, { "epoch": 0.55, "learning_rate": 4.794036661177192e-05, "loss": 2.0556, "step": 5980 }, { "epoch": 0.55, "learning_rate": 4.7930149110171195e-05, "loss": 2.0344, "step": 5990 }, { "epoch": 0.55, "learning_rate": 4.791990742190338e-05, "loss": 2.0926, "step": 6000 }, { "epoch": 0.55, "learning_rate": 4.790964155777137e-05, "loss": 2.0821, "step": 6010 }, { "epoch": 0.55, "learning_rate": 4.789935152860357e-05, "loss": 2.1069, "step": 6020 }, { "epoch": 0.56, "learning_rate": 4.78890373452539e-05, "loss": 2.0471, "step": 6030 }, { "epoch": 0.56, "learning_rate": 4.787869901860171e-05, "loss": 2.0465, "step": 6040 }, { "epoch": 0.56, "learning_rate": 4.786833655955186e-05, "loss": 2.0406, "step": 6050 }, { "epoch": 0.56, "learning_rate": 4.785794997903464e-05, "loss": 1.9583, "step": 6060 }, { "epoch": 0.56, "learning_rate": 4.78475392880058e-05, "loss": 2.0239, "step": 6070 }, { "epoch": 0.56, "learning_rate": 4.7837104497446486e-05, "loss": 1.9777, "step": 6080 }, { "epoch": 0.56, "learning_rate": 4.782664561836332e-05, "loss": 2.0212, "step": 6090 }, { "epoch": 0.56, "learning_rate": 4.78161626617883e-05, "loss": 2.0583, "step": 6100 }, { "epoch": 0.56, "learning_rate": 4.780565563877881e-05, "loss": 2.0258, "step": 6110 }, { "epoch": 0.56, "learning_rate": 4.7795124560417637e-05, "loss": 2.0238, "step": 6120 }, { "epoch": 0.56, "learning_rate": 4.7784569437812945e-05, "loss": 2.0275, "step": 6130 }, { "epoch": 0.57, "learning_rate": 4.7773990282098244e-05, "loss": 2.0717, "step": 6140 }, { "epoch": 0.57, "learning_rate": 4.776338710443241e-05, "loss": 2.0396, "step": 6150 }, { "epoch": 0.57, "learning_rate": 4.775275991599964e-05, "loss": 2.078, "step": 6160 }, { "epoch": 0.57, "learning_rate": 4.774210872800947e-05, "loss": 2.0357, "step": 6170 }, { "epoch": 0.57, "learning_rate": 4.7731433551696755e-05, "loss": 2.0273, "step": 6180 }, { "epoch": 0.57, "learning_rate": 4.772073439832164e-05, "loss": 2.1097, "step": 6190 }, { "epoch": 0.57, "learning_rate": 4.7710011279169577e-05, "loss": 2.0619, "step": 6200 }, { "epoch": 0.57, "learning_rate": 4.769926420555126e-05, "loss": 2.0329, "step": 6210 }, { "epoch": 0.57, "learning_rate": 4.768849318880271e-05, "loss": 2.0276, "step": 6220 }, { "epoch": 0.57, "learning_rate": 4.767769824028515e-05, "loss": 1.9751, "step": 6230 }, { "epoch": 0.57, "learning_rate": 4.766687937138507e-05, "loss": 2.0295, "step": 6240 }, { "epoch": 0.58, "learning_rate": 4.765603659351419e-05, "loss": 2.0532, "step": 6250 }, { "epoch": 0.58, "learning_rate": 4.7645169918109445e-05, "loss": 2.0006, "step": 6260 }, { "epoch": 0.58, "learning_rate": 4.763427935663297e-05, "loss": 2.051, "step": 6270 }, { "epoch": 0.58, "learning_rate": 4.762336492057212e-05, "loss": 2.0895, "step": 6280 }, { "epoch": 0.58, "learning_rate": 4.761242662143941e-05, "loss": 2.0881, "step": 6290 }, { "epoch": 0.58, "learning_rate": 4.760146447077252e-05, "loss": 2.0006, "step": 6300 }, { "epoch": 0.58, "learning_rate": 4.759047848013432e-05, "loss": 2.0302, "step": 6310 }, { "epoch": 0.58, "learning_rate": 4.757946866111279e-05, "loss": 2.0431, "step": 6320 }, { "epoch": 0.58, "learning_rate": 4.756843502532108e-05, "loss": 2.0477, "step": 6330 }, { "epoch": 0.58, "learning_rate": 4.755737758439742e-05, "loss": 2.0249, "step": 6340 }, { "epoch": 0.58, "learning_rate": 4.754629635000519e-05, "loss": 2.0185, "step": 6350 }, { "epoch": 0.59, "learning_rate": 4.753519133383284e-05, "loss": 1.9887, "step": 6360 }, { "epoch": 0.59, "learning_rate": 4.752406254759393e-05, "loss": 2.0241, "step": 6370 }, { "epoch": 0.59, "learning_rate": 4.7512910003027056e-05, "loss": 2.08, "step": 6380 }, { "epoch": 0.59, "learning_rate": 4.750173371189591e-05, "loss": 2.0488, "step": 6390 }, { "epoch": 0.59, "learning_rate": 4.7490533685989215e-05, "loss": 2.0415, "step": 6400 }, { "epoch": 0.59, "learning_rate": 4.747930993712073e-05, "loss": 2.0632, "step": 6410 }, { "epoch": 0.59, "learning_rate": 4.746806247712924e-05, "loss": 2.0429, "step": 6420 }, { "epoch": 0.59, "learning_rate": 4.745679131787853e-05, "loss": 1.9876, "step": 6430 }, { "epoch": 0.59, "learning_rate": 4.7445496471257406e-05, "loss": 2.0407, "step": 6440 }, { "epoch": 0.59, "learning_rate": 4.7434177949179636e-05, "loss": 1.9981, "step": 6450 }, { "epoch": 0.59, "learning_rate": 4.7422835763583985e-05, "loss": 2.0273, "step": 6460 }, { "epoch": 0.6, "learning_rate": 4.741146992643415e-05, "loss": 2.0298, "step": 6470 }, { "epoch": 0.6, "learning_rate": 4.7400080449718786e-05, "loss": 2.0137, "step": 6480 }, { "epoch": 0.6, "learning_rate": 4.73886673454515e-05, "loss": 2.0542, "step": 6490 }, { "epoch": 0.6, "learning_rate": 4.7377230625670786e-05, "loss": 2.1057, "step": 6500 }, { "epoch": 0.6, "learning_rate": 4.736577030244009e-05, "loss": 2.1014, "step": 6510 }, { "epoch": 0.6, "learning_rate": 4.7354286387847737e-05, "loss": 2.0245, "step": 6520 }, { "epoch": 0.6, "learning_rate": 4.734277889400691e-05, "loss": 1.9993, "step": 6530 }, { "epoch": 0.6, "learning_rate": 4.7331247833055705e-05, "loss": 1.9951, "step": 6540 }, { "epoch": 0.6, "learning_rate": 4.731969321715705e-05, "loss": 2.0316, "step": 6550 }, { "epoch": 0.6, "learning_rate": 4.730811505849873e-05, "loss": 2.0185, "step": 6560 }, { "epoch": 0.6, "learning_rate": 4.7296513369293354e-05, "loss": 2.0514, "step": 6570 }, { "epoch": 0.61, "learning_rate": 4.7284888161778366e-05, "loss": 2.0528, "step": 6580 }, { "epoch": 0.61, "learning_rate": 4.7273239448215995e-05, "loss": 1.9955, "step": 6590 }, { "epoch": 0.61, "learning_rate": 4.726156724089328e-05, "loss": 2.0423, "step": 6600 }, { "epoch": 0.61, "learning_rate": 4.724987155212205e-05, "loss": 2.0128, "step": 6610 }, { "epoch": 0.61, "learning_rate": 4.7238152394238875e-05, "loss": 2.0617, "step": 6620 }, { "epoch": 0.61, "learning_rate": 4.722640977960511e-05, "loss": 2.0446, "step": 6630 }, { "epoch": 0.61, "learning_rate": 4.721464372060681e-05, "loss": 2.0264, "step": 6640 }, { "epoch": 0.61, "learning_rate": 4.720285422965482e-05, "loss": 2.0245, "step": 6650 }, { "epoch": 0.61, "learning_rate": 4.719104131918465e-05, "loss": 1.9999, "step": 6660 }, { "epoch": 0.61, "learning_rate": 4.717920500165652e-05, "loss": 2.0251, "step": 6670 }, { "epoch": 0.61, "learning_rate": 4.716734528955537e-05, "loss": 2.0177, "step": 6680 }, { "epoch": 0.62, "learning_rate": 4.7155462195390784e-05, "loss": 2.0193, "step": 6690 }, { "epoch": 0.62, "learning_rate": 4.714355573169703e-05, "loss": 2.0585, "step": 6700 }, { "epoch": 0.62, "learning_rate": 4.7131625911033005e-05, "loss": 2.034, "step": 6710 }, { "epoch": 0.62, "learning_rate": 4.711967274598227e-05, "loss": 2.0425, "step": 6720 }, { "epoch": 0.62, "learning_rate": 4.7107696249152985e-05, "loss": 2.0466, "step": 6730 }, { "epoch": 0.62, "learning_rate": 4.7095696433177934e-05, "loss": 2.0173, "step": 6740 }, { "epoch": 0.62, "learning_rate": 4.708367331071449e-05, "loss": 2.0801, "step": 6750 }, { "epoch": 0.62, "learning_rate": 4.707162689444463e-05, "loss": 1.9801, "step": 6760 }, { "epoch": 0.62, "learning_rate": 4.705955719707487e-05, "loss": 1.9949, "step": 6770 }, { "epoch": 0.62, "learning_rate": 4.70474642313363e-05, "loss": 2.0455, "step": 6780 }, { "epoch": 0.63, "learning_rate": 4.703534800998456e-05, "loss": 2.0497, "step": 6790 }, { "epoch": 0.63, "learning_rate": 4.7023208545799804e-05, "loss": 2.0692, "step": 6800 }, { "epoch": 0.63, "learning_rate": 4.70110458515867e-05, "loss": 2.0878, "step": 6810 }, { "epoch": 0.63, "learning_rate": 4.6998859940174455e-05, "loss": 1.9941, "step": 6820 }, { "epoch": 0.63, "learning_rate": 4.6986650824416726e-05, "loss": 2.021, "step": 6830 }, { "epoch": 0.63, "learning_rate": 4.6974418517191654e-05, "loss": 2.0433, "step": 6840 }, { "epoch": 0.63, "learning_rate": 4.696216303140185e-05, "loss": 2.0468, "step": 6850 }, { "epoch": 0.63, "learning_rate": 4.6949884379974385e-05, "loss": 2.0734, "step": 6860 }, { "epoch": 0.63, "learning_rate": 4.693758257586073e-05, "loss": 2.0342, "step": 6870 }, { "epoch": 0.63, "learning_rate": 4.6925257632036814e-05, "loss": 2.0269, "step": 6880 }, { "epoch": 0.63, "learning_rate": 4.691290956150296e-05, "loss": 2.0559, "step": 6890 }, { "epoch": 0.64, "learning_rate": 4.690053837728387e-05, "loss": 2.0001, "step": 6900 }, { "epoch": 0.64, "learning_rate": 4.688814409242865e-05, "loss": 2.0412, "step": 6910 }, { "epoch": 0.64, "learning_rate": 4.687572672001076e-05, "loss": 1.9941, "step": 6920 }, { "epoch": 0.64, "learning_rate": 4.6863286273128014e-05, "loss": 2.0358, "step": 6930 }, { "epoch": 0.64, "learning_rate": 4.6850822764902575e-05, "loss": 2.0464, "step": 6940 }, { "epoch": 0.64, "learning_rate": 4.683833620848091e-05, "loss": 2.0108, "step": 6950 }, { "epoch": 0.64, "learning_rate": 4.6825826617033815e-05, "loss": 2.0334, "step": 6960 }, { "epoch": 0.64, "learning_rate": 4.681329400375637e-05, "loss": 1.9189, "step": 6970 }, { "epoch": 0.64, "learning_rate": 4.6800738381867956e-05, "loss": 2.0715, "step": 6980 }, { "epoch": 0.64, "learning_rate": 4.678815976461221e-05, "loss": 1.9744, "step": 6990 }, { "epoch": 0.64, "learning_rate": 4.677555816525703e-05, "loss": 2.0562, "step": 7000 }, { "epoch": 0.65, "learning_rate": 4.676293359709454e-05, "loss": 2.0226, "step": 7010 }, { "epoch": 0.65, "learning_rate": 4.675028607344113e-05, "loss": 1.9664, "step": 7020 }, { "epoch": 0.65, "learning_rate": 4.673761560763735e-05, "loss": 1.9677, "step": 7030 }, { "epoch": 0.65, "learning_rate": 4.6724922213047994e-05, "loss": 2.0097, "step": 7040 }, { "epoch": 0.65, "learning_rate": 4.6712205903062025e-05, "loss": 2.0383, "step": 7050 }, { "epoch": 0.65, "learning_rate": 4.669946669109257e-05, "loss": 2.0114, "step": 7060 }, { "epoch": 0.65, "learning_rate": 4.668670459057692e-05, "loss": 2.0334, "step": 7070 }, { "epoch": 0.65, "learning_rate": 4.667391961497652e-05, "loss": 1.9776, "step": 7080 }, { "epoch": 0.65, "learning_rate": 4.6661111777776914e-05, "loss": 2.0283, "step": 7090 }, { "epoch": 0.65, "learning_rate": 4.664828109248779e-05, "loss": 2.0376, "step": 7100 }, { "epoch": 0.65, "learning_rate": 4.663542757264292e-05, "loss": 2.0047, "step": 7110 }, { "epoch": 0.66, "learning_rate": 4.662255123180017e-05, "loss": 1.9556, "step": 7120 }, { "epoch": 0.66, "learning_rate": 4.6609652083541475e-05, "loss": 2.0311, "step": 7130 }, { "epoch": 0.66, "learning_rate": 4.659673014147282e-05, "loss": 2.07, "step": 7140 }, { "epoch": 0.66, "learning_rate": 4.6583785419224244e-05, "loss": 2.0315, "step": 7150 }, { "epoch": 0.66, "learning_rate": 4.657081793044981e-05, "loss": 2.0158, "step": 7160 }, { "epoch": 0.66, "learning_rate": 4.6557827688827586e-05, "loss": 2.0799, "step": 7170 }, { "epoch": 0.66, "learning_rate": 4.6544814708059656e-05, "loss": 2.0128, "step": 7180 }, { "epoch": 0.66, "learning_rate": 4.653177900187208e-05, "loss": 1.9904, "step": 7190 }, { "epoch": 0.66, "learning_rate": 4.65187205840149e-05, "loss": 2.0234, "step": 7200 }, { "epoch": 0.66, "learning_rate": 4.650563946826208e-05, "loss": 2.0565, "step": 7210 }, { "epoch": 0.66, "learning_rate": 4.649253566841157e-05, "loss": 2.0534, "step": 7220 }, { "epoch": 0.67, "learning_rate": 4.6479409198285214e-05, "loss": 2.0716, "step": 7230 }, { "epoch": 0.67, "learning_rate": 4.64662600717288e-05, "loss": 2.0423, "step": 7240 }, { "epoch": 0.67, "learning_rate": 4.645308830261198e-05, "loss": 2.0522, "step": 7250 }, { "epoch": 0.67, "learning_rate": 4.643989390482831e-05, "loss": 2.0123, "step": 7260 }, { "epoch": 0.67, "learning_rate": 4.6426676892295205e-05, "loss": 2.0366, "step": 7270 }, { "epoch": 0.67, "learning_rate": 4.641343727895395e-05, "loss": 2.0244, "step": 7280 }, { "epoch": 0.67, "learning_rate": 4.640017507876965e-05, "loss": 2.073, "step": 7290 }, { "epoch": 0.67, "learning_rate": 4.638689030573124e-05, "loss": 1.9681, "step": 7300 }, { "epoch": 0.67, "learning_rate": 4.637358297385146e-05, "loss": 2.0428, "step": 7310 }, { "epoch": 0.67, "learning_rate": 4.636025309716688e-05, "loss": 2.0111, "step": 7320 }, { "epoch": 0.67, "learning_rate": 4.634690068973779e-05, "loss": 2.0024, "step": 7330 }, { "epoch": 0.68, "learning_rate": 4.63335257656483e-05, "loss": 2.0078, "step": 7340 }, { "epoch": 0.68, "learning_rate": 4.632012833900624e-05, "loss": 2.0397, "step": 7350 }, { "epoch": 0.68, "learning_rate": 4.630670842394319e-05, "loss": 2.0091, "step": 7360 }, { "epoch": 0.68, "learning_rate": 4.6293266034614435e-05, "loss": 2.035, "step": 7370 }, { "epoch": 0.68, "learning_rate": 4.6279801185198976e-05, "loss": 2.0606, "step": 7380 }, { "epoch": 0.68, "learning_rate": 4.626631388989951e-05, "loss": 2.0335, "step": 7390 }, { "epoch": 0.68, "learning_rate": 4.625280416294241e-05, "loss": 2.017, "step": 7400 }, { "epoch": 0.68, "learning_rate": 4.6239272018577695e-05, "loss": 2.0321, "step": 7410 }, { "epoch": 0.68, "learning_rate": 4.6225717471079034e-05, "loss": 2.0266, "step": 7420 }, { "epoch": 0.68, "learning_rate": 4.6212140534743745e-05, "loss": 2.0296, "step": 7430 }, { "epoch": 0.68, "learning_rate": 4.6198541223892736e-05, "loss": 1.9894, "step": 7440 }, { "epoch": 0.69, "learning_rate": 4.618491955287054e-05, "loss": 2.0047, "step": 7450 }, { "epoch": 0.69, "learning_rate": 4.6171275536045256e-05, "loss": 2.0122, "step": 7460 }, { "epoch": 0.69, "learning_rate": 4.6157609187808554e-05, "loss": 1.9681, "step": 7470 }, { "epoch": 0.69, "learning_rate": 4.614392052257568e-05, "loss": 2.0514, "step": 7480 }, { "epoch": 0.69, "learning_rate": 4.61302095547854e-05, "loss": 1.9684, "step": 7490 }, { "epoch": 0.69, "learning_rate": 4.61164762989e-05, "loss": 1.9397, "step": 7500 }, { "epoch": 0.69, "learning_rate": 4.610272076940531e-05, "loss": 2.0157, "step": 7510 }, { "epoch": 0.69, "learning_rate": 4.608894298081061e-05, "loss": 2.0127, "step": 7520 }, { "epoch": 0.69, "learning_rate": 4.607514294764868e-05, "loss": 2.0133, "step": 7530 }, { "epoch": 0.69, "learning_rate": 4.606132068447577e-05, "loss": 2.011, "step": 7540 }, { "epoch": 0.69, "learning_rate": 4.6047476205871565e-05, "loss": 2.0317, "step": 7550 }, { "epoch": 0.7, "learning_rate": 4.60336095264392e-05, "loss": 1.992, "step": 7560 }, { "epoch": 0.7, "learning_rate": 4.60197206608052e-05, "loss": 2.0512, "step": 7570 }, { "epoch": 0.7, "learning_rate": 4.6005809623619514e-05, "loss": 2.0243, "step": 7580 }, { "epoch": 0.7, "learning_rate": 4.599187642955547e-05, "loss": 1.9604, "step": 7590 }, { "epoch": 0.7, "learning_rate": 4.597792109330977e-05, "loss": 1.9937, "step": 7600 }, { "epoch": 0.7, "learning_rate": 4.596394362960247e-05, "loss": 2.0016, "step": 7610 }, { "epoch": 0.7, "learning_rate": 4.5949944053176965e-05, "loss": 1.963, "step": 7620 }, { "epoch": 0.7, "learning_rate": 4.5935922378799966e-05, "loss": 1.9858, "step": 7630 }, { "epoch": 0.7, "learning_rate": 4.5921878621261524e-05, "loss": 1.99, "step": 7640 }, { "epoch": 0.7, "learning_rate": 4.590781279537494e-05, "loss": 2.0357, "step": 7650 }, { "epoch": 0.71, "learning_rate": 4.5893724915976824e-05, "loss": 2.0423, "step": 7660 }, { "epoch": 0.71, "learning_rate": 4.587961499792705e-05, "loss": 2.0039, "step": 7670 }, { "epoch": 0.71, "learning_rate": 4.586548305610869e-05, "loss": 1.9643, "step": 7680 }, { "epoch": 0.71, "learning_rate": 4.5851329105428116e-05, "loss": 2.0193, "step": 7690 }, { "epoch": 0.71, "learning_rate": 4.583715316081487e-05, "loss": 1.958, "step": 7700 }, { "epoch": 0.71, "learning_rate": 4.58229552372217e-05, "loss": 1.9949, "step": 7710 }, { "epoch": 0.71, "learning_rate": 4.5808735349624555e-05, "loss": 2.0038, "step": 7720 }, { "epoch": 0.71, "learning_rate": 4.579449351302253e-05, "loss": 1.9937, "step": 7730 }, { "epoch": 0.71, "learning_rate": 4.578022974243789e-05, "loss": 2.0152, "step": 7740 }, { "epoch": 0.71, "learning_rate": 4.576594405291601e-05, "loss": 2.03, "step": 7750 }, { "epoch": 0.71, "learning_rate": 4.5751636459525405e-05, "loss": 1.9511, "step": 7760 }, { "epoch": 0.72, "learning_rate": 4.5737306977357704e-05, "loss": 2.0107, "step": 7770 }, { "epoch": 0.72, "learning_rate": 4.5722955621527595e-05, "loss": 2.0204, "step": 7780 }, { "epoch": 0.72, "learning_rate": 4.5708582407172873e-05, "loss": 2.0015, "step": 7790 }, { "epoch": 0.72, "learning_rate": 4.569418734945436e-05, "loss": 2.0171, "step": 7800 }, { "epoch": 0.72, "learning_rate": 4.567977046355592e-05, "loss": 2.0278, "step": 7810 }, { "epoch": 0.72, "learning_rate": 4.566533176468446e-05, "loss": 1.9671, "step": 7820 }, { "epoch": 0.72, "learning_rate": 4.5650871268069896e-05, "loss": 1.9951, "step": 7830 }, { "epoch": 0.72, "learning_rate": 4.56363889889651e-05, "loss": 2.0074, "step": 7840 }, { "epoch": 0.72, "learning_rate": 4.5621884942645966e-05, "loss": 1.9489, "step": 7850 }, { "epoch": 0.72, "learning_rate": 4.560735914441132e-05, "loss": 2.0193, "step": 7860 }, { "epoch": 0.72, "learning_rate": 4.559281160958294e-05, "loss": 2.0391, "step": 7870 }, { "epoch": 0.73, "learning_rate": 4.557824235350553e-05, "loss": 2.0138, "step": 7880 }, { "epoch": 0.73, "learning_rate": 4.556365139154672e-05, "loss": 1.9971, "step": 7890 }, { "epoch": 0.73, "learning_rate": 4.554903873909701e-05, "loss": 2.0419, "step": 7900 }, { "epoch": 0.73, "learning_rate": 4.5534404411569785e-05, "loss": 2.0033, "step": 7910 }, { "epoch": 0.73, "learning_rate": 4.5519748424401306e-05, "loss": 2.053, "step": 7920 }, { "epoch": 0.73, "learning_rate": 4.550507079305068e-05, "loss": 1.9766, "step": 7930 }, { "epoch": 0.73, "learning_rate": 4.5490371532999834e-05, "loss": 1.9309, "step": 7940 }, { "epoch": 0.73, "learning_rate": 4.547565065975351e-05, "loss": 2.0521, "step": 7950 }, { "epoch": 0.73, "learning_rate": 4.5460908188839245e-05, "loss": 2.0671, "step": 7960 }, { "epoch": 0.73, "learning_rate": 4.544614413580738e-05, "loss": 1.982, "step": 7970 }, { "epoch": 0.73, "learning_rate": 4.5431358516230984e-05, "loss": 1.9858, "step": 7980 }, { "epoch": 0.74, "learning_rate": 4.541655134570591e-05, "loss": 1.9876, "step": 7990 }, { "epoch": 0.74, "learning_rate": 4.540172263985071e-05, "loss": 1.9768, "step": 8000 }, { "epoch": 0.74, "learning_rate": 4.538687241430667e-05, "loss": 1.9949, "step": 8010 }, { "epoch": 0.74, "learning_rate": 4.5372000684737794e-05, "loss": 1.974, "step": 8020 }, { "epoch": 0.74, "learning_rate": 4.535710746683073e-05, "loss": 1.9996, "step": 8030 }, { "epoch": 0.74, "learning_rate": 4.53421927762948e-05, "loss": 1.9729, "step": 8040 }, { "epoch": 0.74, "learning_rate": 4.532725662886201e-05, "loss": 1.99, "step": 8050 }, { "epoch": 0.74, "learning_rate": 4.5312299040286954e-05, "loss": 1.9823, "step": 8060 }, { "epoch": 0.74, "learning_rate": 4.529732002634686e-05, "loss": 2.0368, "step": 8070 }, { "epoch": 0.74, "learning_rate": 4.5282319602841565e-05, "loss": 2.005, "step": 8080 }, { "epoch": 0.74, "learning_rate": 4.526729778559348e-05, "loss": 2.0487, "step": 8090 }, { "epoch": 0.75, "learning_rate": 4.525225459044758e-05, "loss": 2.0087, "step": 8100 }, { "epoch": 0.75, "learning_rate": 4.5237190033271394e-05, "loss": 2.0141, "step": 8110 }, { "epoch": 0.75, "learning_rate": 4.5222104129954966e-05, "loss": 1.966, "step": 8120 }, { "epoch": 0.75, "learning_rate": 4.52069968964109e-05, "loss": 1.9539, "step": 8130 }, { "epoch": 0.75, "learning_rate": 4.5191868348574254e-05, "loss": 2.0239, "step": 8140 }, { "epoch": 0.75, "learning_rate": 4.517671850240257e-05, "loss": 2.0225, "step": 8150 }, { "epoch": 0.75, "learning_rate": 4.5161547373875904e-05, "loss": 2.002, "step": 8160 }, { "epoch": 0.75, "learning_rate": 4.514635497899669e-05, "loss": 1.9996, "step": 8170 }, { "epoch": 0.75, "learning_rate": 4.513114133378986e-05, "loss": 1.9948, "step": 8180 }, { "epoch": 0.75, "learning_rate": 4.511590645430271e-05, "loss": 2.0151, "step": 8190 }, { "epoch": 0.75, "learning_rate": 4.510065035660497e-05, "loss": 2.038, "step": 8200 }, { "epoch": 0.76, "learning_rate": 4.508537305678872e-05, "loss": 1.9964, "step": 8210 }, { "epoch": 0.76, "learning_rate": 4.507007457096843e-05, "loss": 2.0488, "step": 8220 }, { "epoch": 0.76, "learning_rate": 4.5054754915280914e-05, "loss": 1.9307, "step": 8230 }, { "epoch": 0.76, "learning_rate": 4.5039414105885305e-05, "loss": 2.0249, "step": 8240 }, { "epoch": 0.76, "learning_rate": 4.5024052158963036e-05, "loss": 1.9777, "step": 8250 }, { "epoch": 0.76, "learning_rate": 4.500866909071787e-05, "loss": 2.0061, "step": 8260 }, { "epoch": 0.76, "learning_rate": 4.499326491737582e-05, "loss": 2.0268, "step": 8270 }, { "epoch": 0.76, "learning_rate": 4.4977839655185184e-05, "loss": 2.0718, "step": 8280 }, { "epoch": 0.76, "learning_rate": 4.496239332041648e-05, "loss": 2.0378, "step": 8290 }, { "epoch": 0.76, "learning_rate": 4.494692592936247e-05, "loss": 1.963, "step": 8300 }, { "epoch": 0.76, "learning_rate": 4.493143749833812e-05, "loss": 2.002, "step": 8310 }, { "epoch": 0.77, "learning_rate": 4.49159280436806e-05, "loss": 2.0434, "step": 8320 }, { "epoch": 0.77, "learning_rate": 4.4900397581749235e-05, "loss": 1.985, "step": 8330 }, { "epoch": 0.77, "learning_rate": 4.4884846128925516e-05, "loss": 1.9477, "step": 8340 }, { "epoch": 0.77, "learning_rate": 4.48692737016131e-05, "loss": 1.9751, "step": 8350 }, { "epoch": 0.77, "learning_rate": 4.4853680316237724e-05, "loss": 1.958, "step": 8360 }, { "epoch": 0.77, "learning_rate": 4.4838065989247256e-05, "loss": 2.0218, "step": 8370 }, { "epoch": 0.77, "learning_rate": 4.482243073711167e-05, "loss": 1.9647, "step": 8380 }, { "epoch": 0.77, "learning_rate": 4.4806774576322974e-05, "loss": 1.9985, "step": 8390 }, { "epoch": 0.77, "learning_rate": 4.4791097523395255e-05, "loss": 1.9432, "step": 8400 }, { "epoch": 0.77, "learning_rate": 4.4775399594864634e-05, "loss": 1.9799, "step": 8410 }, { "epoch": 0.78, "learning_rate": 4.475968080728924e-05, "loss": 1.9605, "step": 8420 }, { "epoch": 0.78, "learning_rate": 4.4743941177249236e-05, "loss": 1.9919, "step": 8430 }, { "epoch": 0.78, "learning_rate": 4.472818072134672e-05, "loss": 2.0162, "step": 8440 }, { "epoch": 0.78, "learning_rate": 4.471239945620579e-05, "loss": 1.9824, "step": 8450 }, { "epoch": 0.78, "learning_rate": 4.469659739847251e-05, "loss": 2.005, "step": 8460 }, { "epoch": 0.78, "learning_rate": 4.468077456481481e-05, "loss": 1.9907, "step": 8470 }, { "epoch": 0.78, "learning_rate": 4.466493097192262e-05, "loss": 2.0031, "step": 8480 }, { "epoch": 0.78, "learning_rate": 4.4649066636507705e-05, "loss": 2.0105, "step": 8490 }, { "epoch": 0.78, "learning_rate": 4.463318157530373e-05, "loss": 1.9901, "step": 8500 }, { "epoch": 0.78, "learning_rate": 4.4617275805066214e-05, "loss": 2.0154, "step": 8510 }, { "epoch": 0.78, "learning_rate": 4.460134934257254e-05, "loss": 2.057, "step": 8520 }, { "epoch": 0.79, "learning_rate": 4.458540220462188e-05, "loss": 2.0475, "step": 8530 }, { "epoch": 0.79, "learning_rate": 4.4569434408035266e-05, "loss": 2.0061, "step": 8540 }, { "epoch": 0.79, "learning_rate": 4.455344596965547e-05, "loss": 2.0327, "step": 8550 }, { "epoch": 0.79, "learning_rate": 4.4537436906347077e-05, "loss": 1.9802, "step": 8560 }, { "epoch": 0.79, "learning_rate": 4.45214072349964e-05, "loss": 1.9913, "step": 8570 }, { "epoch": 0.79, "learning_rate": 4.450535697251149e-05, "loss": 2.0786, "step": 8580 }, { "epoch": 0.79, "learning_rate": 4.448928613582214e-05, "loss": 2.0213, "step": 8590 }, { "epoch": 0.79, "learning_rate": 4.447319474187982e-05, "loss": 2.0207, "step": 8600 }, { "epoch": 0.79, "learning_rate": 4.44570828076577e-05, "loss": 2.0149, "step": 8610 }, { "epoch": 0.79, "learning_rate": 4.444095035015062e-05, "loss": 2.0085, "step": 8620 }, { "epoch": 0.79, "learning_rate": 4.442479738637504e-05, "loss": 2.0153, "step": 8630 }, { "epoch": 0.8, "learning_rate": 4.440862393336907e-05, "loss": 1.9833, "step": 8640 }, { "epoch": 0.8, "learning_rate": 4.439243000819244e-05, "loss": 1.9679, "step": 8650 }, { "epoch": 0.8, "learning_rate": 4.4376215627926454e-05, "loss": 1.9803, "step": 8660 }, { "epoch": 0.8, "learning_rate": 4.435998080967402e-05, "loss": 1.9728, "step": 8670 }, { "epoch": 0.8, "learning_rate": 4.434372557055956e-05, "loss": 1.966, "step": 8680 }, { "epoch": 0.8, "learning_rate": 4.4327449927729084e-05, "loss": 1.9858, "step": 8690 }, { "epoch": 0.8, "learning_rate": 4.431115389835009e-05, "loss": 2.0218, "step": 8700 }, { "epoch": 0.8, "learning_rate": 4.4294837499611585e-05, "loss": 2.0492, "step": 8710 }, { "epoch": 0.8, "learning_rate": 4.427850074872409e-05, "loss": 2.0344, "step": 8720 }, { "epoch": 0.8, "learning_rate": 4.426214366291955e-05, "loss": 1.9559, "step": 8730 }, { "epoch": 0.8, "learning_rate": 4.424576625945139e-05, "loss": 1.9953, "step": 8740 }, { "epoch": 0.81, "learning_rate": 4.4229368555594456e-05, "loss": 1.9274, "step": 8750 }, { "epoch": 0.81, "learning_rate": 4.4212950568645007e-05, "loss": 1.9799, "step": 8760 }, { "epoch": 0.81, "learning_rate": 4.41965123159207e-05, "loss": 1.8871, "step": 8770 }, { "epoch": 0.81, "learning_rate": 4.418005381476056e-05, "loss": 1.9561, "step": 8780 }, { "epoch": 0.81, "learning_rate": 4.416357508252497e-05, "loss": 1.9276, "step": 8790 }, { "epoch": 0.81, "learning_rate": 4.414707613659567e-05, "loss": 1.9694, "step": 8800 }, { "epoch": 0.81, "learning_rate": 4.413055699437569e-05, "loss": 2.0563, "step": 8810 }, { "epoch": 0.81, "learning_rate": 4.411401767328941e-05, "loss": 2.0011, "step": 8820 }, { "epoch": 0.81, "learning_rate": 4.409745819078245e-05, "loss": 1.985, "step": 8830 }, { "epoch": 0.81, "learning_rate": 4.4080878564321715e-05, "loss": 1.9735, "step": 8840 }, { "epoch": 0.81, "learning_rate": 4.4064278811395355e-05, "loss": 1.9848, "step": 8850 }, { "epoch": 0.82, "learning_rate": 4.4047658949512746e-05, "loss": 1.9985, "step": 8860 }, { "epoch": 0.82, "learning_rate": 4.403101899620449e-05, "loss": 2.0096, "step": 8870 }, { "epoch": 0.82, "learning_rate": 4.401435896902237e-05, "loss": 1.9221, "step": 8880 }, { "epoch": 0.82, "learning_rate": 4.399767888553933e-05, "loss": 1.9695, "step": 8890 }, { "epoch": 0.82, "learning_rate": 4.39809787633495e-05, "loss": 1.9793, "step": 8900 }, { "epoch": 0.82, "learning_rate": 4.396425862006811e-05, "loss": 1.9456, "step": 8910 }, { "epoch": 0.82, "learning_rate": 4.394751847333154e-05, "loss": 1.981, "step": 8920 }, { "epoch": 0.82, "learning_rate": 4.393075834079726e-05, "loss": 1.9553, "step": 8930 }, { "epoch": 0.82, "learning_rate": 4.391397824014381e-05, "loss": 1.9674, "step": 8940 }, { "epoch": 0.82, "learning_rate": 4.3897178189070795e-05, "loss": 1.9923, "step": 8950 }, { "epoch": 0.82, "learning_rate": 4.3880358205298885e-05, "loss": 2.0531, "step": 8960 }, { "epoch": 0.83, "learning_rate": 4.386351830656976e-05, "loss": 2.0096, "step": 8970 }, { "epoch": 0.83, "learning_rate": 4.384665851064609e-05, "loss": 1.9899, "step": 8980 }, { "epoch": 0.83, "learning_rate": 4.3829778835311554e-05, "loss": 2.0788, "step": 8990 }, { "epoch": 0.83, "learning_rate": 4.38128792983708e-05, "loss": 2.0363, "step": 9000 }, { "epoch": 0.83, "learning_rate": 4.3795959917649406e-05, "loss": 2.0095, "step": 9010 }, { "epoch": 0.83, "learning_rate": 4.377902071099391e-05, "loss": 1.9073, "step": 9020 }, { "epoch": 0.83, "learning_rate": 4.376206169627175e-05, "loss": 2.0195, "step": 9030 }, { "epoch": 0.83, "learning_rate": 4.374508289137123e-05, "loss": 1.9821, "step": 9040 }, { "epoch": 0.83, "learning_rate": 4.3728084314201575e-05, "loss": 1.9756, "step": 9050 }, { "epoch": 0.83, "learning_rate": 4.3711065982692836e-05, "loss": 1.9228, "step": 9060 }, { "epoch": 0.83, "learning_rate": 4.369402791479591e-05, "loss": 2.0036, "step": 9070 }, { "epoch": 0.84, "learning_rate": 4.367697012848251e-05, "loss": 2.0282, "step": 9080 }, { "epoch": 0.84, "learning_rate": 4.365989264174515e-05, "loss": 1.9656, "step": 9090 }, { "epoch": 0.84, "learning_rate": 4.364279547259713e-05, "loss": 1.9978, "step": 9100 }, { "epoch": 0.84, "learning_rate": 4.3625678639072486e-05, "loss": 1.9624, "step": 9110 }, { "epoch": 0.84, "learning_rate": 4.360854215922603e-05, "loss": 1.9732, "step": 9120 }, { "epoch": 0.84, "learning_rate": 4.359138605113328e-05, "loss": 1.9608, "step": 9130 }, { "epoch": 0.84, "learning_rate": 4.357421033289044e-05, "loss": 1.9927, "step": 9140 }, { "epoch": 0.84, "learning_rate": 4.3557015022614436e-05, "loss": 1.9079, "step": 9150 }, { "epoch": 0.84, "learning_rate": 4.353980013844283e-05, "loss": 1.974, "step": 9160 }, { "epoch": 0.84, "learning_rate": 4.352256569853385e-05, "loss": 1.9638, "step": 9170 }, { "epoch": 0.85, "learning_rate": 4.3505311721066336e-05, "loss": 1.9727, "step": 9180 }, { "epoch": 0.85, "learning_rate": 4.348803822423974e-05, "loss": 1.9985, "step": 9190 }, { "epoch": 0.85, "learning_rate": 4.347074522627411e-05, "loss": 2.0743, "step": 9200 }, { "epoch": 0.85, "learning_rate": 4.3453432745410075e-05, "loss": 1.9197, "step": 9210 }, { "epoch": 0.85, "learning_rate": 4.3436100799908766e-05, "loss": 1.9912, "step": 9220 }, { "epoch": 0.85, "learning_rate": 4.341874940805191e-05, "loss": 1.9436, "step": 9230 }, { "epoch": 0.85, "learning_rate": 4.3401378588141685e-05, "loss": 1.9904, "step": 9240 }, { "epoch": 0.85, "learning_rate": 4.338398835850081e-05, "loss": 1.9322, "step": 9250 }, { "epoch": 0.85, "learning_rate": 4.336657873747245e-05, "loss": 1.9586, "step": 9260 }, { "epoch": 0.85, "learning_rate": 4.334914974342025e-05, "loss": 2.0224, "step": 9270 }, { "epoch": 0.85, "learning_rate": 4.333170139472824e-05, "loss": 1.9757, "step": 9280 }, { "epoch": 0.86, "learning_rate": 4.331423370980093e-05, "loss": 2.0178, "step": 9290 }, { "epoch": 0.86, "learning_rate": 4.3296746707063166e-05, "loss": 1.9404, "step": 9300 }, { "epoch": 0.86, "learning_rate": 4.327924040496022e-05, "loss": 1.9508, "step": 9310 }, { "epoch": 0.86, "learning_rate": 4.326171482195768e-05, "loss": 2.0075, "step": 9320 }, { "epoch": 0.86, "learning_rate": 4.324416997654149e-05, "loss": 1.9732, "step": 9330 }, { "epoch": 0.86, "learning_rate": 4.322660588721792e-05, "loss": 2.005, "step": 9340 }, { "epoch": 0.86, "learning_rate": 4.320902257251353e-05, "loss": 1.9517, "step": 9350 }, { "epoch": 0.86, "learning_rate": 4.3191420050975164e-05, "loss": 1.9993, "step": 9360 }, { "epoch": 0.86, "learning_rate": 4.31737983411699e-05, "loss": 1.9019, "step": 9370 }, { "epoch": 0.86, "learning_rate": 4.3156157461685094e-05, "loss": 1.9932, "step": 9380 }, { "epoch": 0.86, "learning_rate": 4.31384974311283e-05, "loss": 1.9896, "step": 9390 }, { "epoch": 0.87, "learning_rate": 4.3120818268127275e-05, "loss": 2.0078, "step": 9400 }, { "epoch": 0.87, "learning_rate": 4.3103119991329946e-05, "loss": 2.0203, "step": 9410 }, { "epoch": 0.87, "learning_rate": 4.3085402619404435e-05, "loss": 1.97, "step": 9420 }, { "epoch": 0.87, "learning_rate": 4.306766617103898e-05, "loss": 1.9805, "step": 9430 }, { "epoch": 0.87, "learning_rate": 4.304991066494193e-05, "loss": 1.9643, "step": 9440 }, { "epoch": 0.87, "learning_rate": 4.303213611984177e-05, "loss": 1.9922, "step": 9450 }, { "epoch": 0.87, "learning_rate": 4.301434255448703e-05, "loss": 1.9574, "step": 9460 }, { "epoch": 0.87, "learning_rate": 4.2996529987646325e-05, "loss": 1.9624, "step": 9470 }, { "epoch": 0.87, "learning_rate": 4.297869843810832e-05, "loss": 1.9574, "step": 9480 }, { "epoch": 0.87, "learning_rate": 4.296084792468169e-05, "loss": 1.9716, "step": 9490 }, { "epoch": 0.87, "learning_rate": 4.29429784661951e-05, "loss": 2.0206, "step": 9500 }, { "epoch": 0.88, "learning_rate": 4.292509008149722e-05, "loss": 2.0262, "step": 9510 }, { "epoch": 0.88, "learning_rate": 4.290718278945669e-05, "loss": 2.0439, "step": 9520 }, { "epoch": 0.88, "learning_rate": 4.288925660896205e-05, "loss": 2.0096, "step": 9530 }, { "epoch": 0.88, "learning_rate": 4.287131155892181e-05, "loss": 1.9333, "step": 9540 }, { "epoch": 0.88, "learning_rate": 4.285334765826438e-05, "loss": 2.0326, "step": 9550 }, { "epoch": 0.88, "learning_rate": 4.283536492593801e-05, "loss": 2.0018, "step": 9560 }, { "epoch": 0.88, "learning_rate": 4.281736338091087e-05, "loss": 1.9743, "step": 9570 }, { "epoch": 0.88, "learning_rate": 4.2799343042170924e-05, "loss": 1.9123, "step": 9580 }, { "epoch": 0.88, "learning_rate": 4.2781303928726e-05, "loss": 1.9795, "step": 9590 }, { "epoch": 0.88, "learning_rate": 4.276324605960371e-05, "loss": 2.0061, "step": 9600 }, { "epoch": 0.88, "learning_rate": 4.2745169453851455e-05, "loss": 1.9398, "step": 9610 }, { "epoch": 0.89, "learning_rate": 4.272707413053638e-05, "loss": 1.996, "step": 9620 }, { "epoch": 0.89, "learning_rate": 4.27089601087454e-05, "loss": 1.9963, "step": 9630 }, { "epoch": 0.89, "learning_rate": 4.269082740758514e-05, "loss": 1.957, "step": 9640 }, { "epoch": 0.89, "learning_rate": 4.2672676046181934e-05, "loss": 1.9462, "step": 9650 }, { "epoch": 0.89, "learning_rate": 4.2654506043681793e-05, "loss": 1.9145, "step": 9660 }, { "epoch": 0.89, "learning_rate": 4.263631741925039e-05, "loss": 1.9613, "step": 9670 }, { "epoch": 0.89, "learning_rate": 4.261811019207306e-05, "loss": 1.9277, "step": 9680 }, { "epoch": 0.89, "learning_rate": 4.259988438135471e-05, "loss": 1.9809, "step": 9690 }, { "epoch": 0.89, "learning_rate": 4.258164000631991e-05, "loss": 1.9565, "step": 9700 }, { "epoch": 0.89, "learning_rate": 4.256337708621277e-05, "loss": 2.034, "step": 9710 }, { "epoch": 0.89, "learning_rate": 4.254509564029699e-05, "loss": 1.9525, "step": 9720 }, { "epoch": 0.9, "learning_rate": 4.252679568785577e-05, "loss": 1.991, "step": 9730 }, { "epoch": 0.9, "learning_rate": 4.250847724819189e-05, "loss": 1.9891, "step": 9740 }, { "epoch": 0.9, "learning_rate": 4.249014034062757e-05, "loss": 1.965, "step": 9750 }, { "epoch": 0.9, "learning_rate": 4.2471784984504536e-05, "loss": 1.9758, "step": 9760 }, { "epoch": 0.9, "learning_rate": 4.2453411199184e-05, "loss": 1.9499, "step": 9770 }, { "epoch": 0.9, "learning_rate": 4.243501900404656e-05, "loss": 2.0057, "step": 9780 }, { "epoch": 0.9, "learning_rate": 4.241660841849228e-05, "loss": 1.8928, "step": 9790 }, { "epoch": 0.9, "learning_rate": 4.239817946194059e-05, "loss": 1.951, "step": 9800 }, { "epoch": 0.9, "learning_rate": 4.237973215383031e-05, "loss": 1.9932, "step": 9810 }, { "epoch": 0.9, "learning_rate": 4.236126651361962e-05, "loss": 1.9615, "step": 9820 }, { "epoch": 0.9, "learning_rate": 4.2342782560786045e-05, "loss": 1.9803, "step": 9830 }, { "epoch": 0.91, "learning_rate": 4.2324280314826394e-05, "loss": 1.9421, "step": 9840 }, { "epoch": 0.91, "learning_rate": 4.23057597952568e-05, "loss": 1.9743, "step": 9850 }, { "epoch": 0.91, "learning_rate": 4.228722102161268e-05, "loss": 1.948, "step": 9860 }, { "epoch": 0.91, "learning_rate": 4.226866401344865e-05, "loss": 1.9615, "step": 9870 }, { "epoch": 0.91, "learning_rate": 4.2250088790338645e-05, "loss": 1.956, "step": 9880 }, { "epoch": 0.91, "learning_rate": 4.223149537187573e-05, "loss": 1.9901, "step": 9890 }, { "epoch": 0.91, "learning_rate": 4.221288377767222e-05, "loss": 1.913, "step": 9900 }, { "epoch": 0.91, "learning_rate": 4.219425402735957e-05, "loss": 1.9662, "step": 9910 }, { "epoch": 0.91, "learning_rate": 4.21756061405884e-05, "loss": 1.9381, "step": 9920 }, { "epoch": 0.91, "learning_rate": 4.215694013702847e-05, "loss": 2.0091, "step": 9930 }, { "epoch": 0.91, "learning_rate": 4.21382560363686e-05, "loss": 1.9216, "step": 9940 }, { "epoch": 0.92, "learning_rate": 4.2119553858316775e-05, "loss": 2.0321, "step": 9950 }, { "epoch": 0.92, "learning_rate": 4.2100833622599986e-05, "loss": 2.0119, "step": 9960 }, { "epoch": 0.92, "learning_rate": 4.2082095348964294e-05, "loss": 1.9482, "step": 9970 }, { "epoch": 0.92, "learning_rate": 4.20633390571748e-05, "loss": 1.9531, "step": 9980 }, { "epoch": 0.92, "learning_rate": 4.204456476701558e-05, "loss": 1.9764, "step": 9990 }, { "epoch": 0.92, "learning_rate": 4.202577249828972e-05, "loss": 1.9925, "step": 10000 }, { "epoch": 0.92, "learning_rate": 4.200696227081926e-05, "loss": 2.0129, "step": 10010 }, { "epoch": 0.92, "learning_rate": 4.1988134104445185e-05, "loss": 2.0088, "step": 10020 }, { "epoch": 0.92, "learning_rate": 4.1969288019027394e-05, "loss": 1.9775, "step": 10030 }, { "epoch": 0.92, "learning_rate": 4.19504240344447e-05, "loss": 1.9562, "step": 10040 }, { "epoch": 0.93, "learning_rate": 4.1931542170594785e-05, "loss": 1.9744, "step": 10050 }, { "epoch": 0.93, "learning_rate": 4.191264244739419e-05, "loss": 1.9773, "step": 10060 }, { "epoch": 0.93, "learning_rate": 4.189372488477831e-05, "loss": 1.953, "step": 10070 }, { "epoch": 0.93, "learning_rate": 4.187478950270134e-05, "loss": 1.9536, "step": 10080 }, { "epoch": 0.93, "learning_rate": 4.1855836321136264e-05, "loss": 1.9602, "step": 10090 }, { "epoch": 0.93, "learning_rate": 4.183686536007488e-05, "loss": 1.9688, "step": 10100 }, { "epoch": 0.93, "learning_rate": 4.181787663952769e-05, "loss": 1.9768, "step": 10110 }, { "epoch": 0.93, "learning_rate": 4.1798870179523966e-05, "loss": 1.9429, "step": 10120 }, { "epoch": 0.93, "learning_rate": 4.177984600011166e-05, "loss": 1.9749, "step": 10130 }, { "epoch": 0.93, "learning_rate": 4.176080412135745e-05, "loss": 1.9335, "step": 10140 }, { "epoch": 0.93, "learning_rate": 4.174174456334665e-05, "loss": 1.9702, "step": 10150 }, { "epoch": 0.94, "learning_rate": 4.172266734618325e-05, "loss": 1.9803, "step": 10160 }, { "epoch": 0.94, "learning_rate": 4.1703572489989846e-05, "loss": 1.9407, "step": 10170 }, { "epoch": 0.94, "learning_rate": 4.1684460014907645e-05, "loss": 1.9839, "step": 10180 }, { "epoch": 0.94, "learning_rate": 4.1665329941096446e-05, "loss": 1.9989, "step": 10190 }, { "epoch": 0.94, "learning_rate": 4.1646182288734604e-05, "loss": 1.9716, "step": 10200 }, { "epoch": 0.94, "learning_rate": 4.162701707801901e-05, "loss": 1.9428, "step": 10210 }, { "epoch": 0.94, "learning_rate": 4.16078343291651e-05, "loss": 1.9629, "step": 10220 }, { "epoch": 0.94, "learning_rate": 4.1588634062406775e-05, "loss": 1.9576, "step": 10230 }, { "epoch": 0.94, "learning_rate": 4.1569416297996436e-05, "loss": 1.9515, "step": 10240 }, { "epoch": 0.94, "learning_rate": 4.155018105620494e-05, "loss": 1.9403, "step": 10250 }, { "epoch": 0.94, "learning_rate": 4.1530928357321576e-05, "loss": 1.9719, "step": 10260 }, { "epoch": 0.95, "learning_rate": 4.151165822165404e-05, "loss": 1.8946, "step": 10270 }, { "epoch": 0.95, "learning_rate": 4.149237066952842e-05, "loss": 2.0095, "step": 10280 }, { "epoch": 0.95, "learning_rate": 4.14730657212892e-05, "loss": 1.9869, "step": 10290 }, { "epoch": 0.95, "learning_rate": 4.145374339729917e-05, "loss": 1.9533, "step": 10300 }, { "epoch": 0.95, "learning_rate": 4.143440371793949e-05, "loss": 1.974, "step": 10310 }, { "epoch": 0.95, "learning_rate": 4.141504670360959e-05, "loss": 1.908, "step": 10320 }, { "epoch": 0.95, "learning_rate": 4.139567237472722e-05, "loss": 2.0087, "step": 10330 }, { "epoch": 0.95, "learning_rate": 4.137628075172837e-05, "loss": 1.9157, "step": 10340 }, { "epoch": 0.95, "learning_rate": 4.135687185506728e-05, "loss": 1.9917, "step": 10350 }, { "epoch": 0.95, "learning_rate": 4.133744570521641e-05, "loss": 1.9598, "step": 10360 }, { "epoch": 0.95, "learning_rate": 4.1318002322666405e-05, "loss": 1.8927, "step": 10370 }, { "epoch": 0.96, "learning_rate": 4.129854172792611e-05, "loss": 1.9883, "step": 10380 }, { "epoch": 0.96, "learning_rate": 4.127906394152251e-05, "loss": 1.9964, "step": 10390 }, { "epoch": 0.96, "learning_rate": 4.125956898400074e-05, "loss": 1.9459, "step": 10400 }, { "epoch": 0.96, "learning_rate": 4.124005687592401e-05, "loss": 1.9531, "step": 10410 }, { "epoch": 0.96, "learning_rate": 4.1220527637873675e-05, "loss": 1.9402, "step": 10420 }, { "epoch": 0.96, "learning_rate": 4.120098129044911e-05, "loss": 1.9987, "step": 10430 }, { "epoch": 0.96, "learning_rate": 4.118141785426776e-05, "loss": 1.9301, "step": 10440 }, { "epoch": 0.96, "learning_rate": 4.116183734996509e-05, "loss": 1.9816, "step": 10450 }, { "epoch": 0.96, "learning_rate": 4.114223979819457e-05, "loss": 1.9888, "step": 10460 }, { "epoch": 0.96, "learning_rate": 4.112262521962766e-05, "loss": 1.8941, "step": 10470 }, { "epoch": 0.96, "learning_rate": 4.1102993634953756e-05, "loss": 1.9494, "step": 10480 }, { "epoch": 0.97, "learning_rate": 4.108334506488023e-05, "loss": 1.9668, "step": 10490 }, { "epoch": 0.97, "learning_rate": 4.1063679530132315e-05, "loss": 1.9267, "step": 10500 }, { "epoch": 0.97, "learning_rate": 4.10439970514532e-05, "loss": 1.9232, "step": 10510 }, { "epoch": 0.97, "learning_rate": 4.10242976496039e-05, "loss": 1.9192, "step": 10520 }, { "epoch": 0.97, "learning_rate": 4.1004581345363303e-05, "loss": 1.9017, "step": 10530 }, { "epoch": 0.97, "learning_rate": 4.0984848159528124e-05, "loss": 1.9454, "step": 10540 }, { "epoch": 0.97, "learning_rate": 4.096509811291287e-05, "loss": 1.945, "step": 10550 }, { "epoch": 0.97, "learning_rate": 4.094533122634985e-05, "loss": 1.9431, "step": 10560 }, { "epoch": 0.97, "learning_rate": 4.092554752068913e-05, "loss": 1.9573, "step": 10570 }, { "epoch": 0.97, "learning_rate": 4.0905747016798526e-05, "loss": 1.998, "step": 10580 }, { "epoch": 0.97, "learning_rate": 4.088592973556355e-05, "loss": 1.911, "step": 10590 }, { "epoch": 0.98, "learning_rate": 4.086609569788741e-05, "loss": 1.9949, "step": 10600 }, { "epoch": 0.98, "learning_rate": 4.0846244924691024e-05, "loss": 2.0162, "step": 10610 }, { "epoch": 0.98, "learning_rate": 4.082637743691294e-05, "loss": 1.9798, "step": 10620 }, { "epoch": 0.98, "learning_rate": 4.080649325550933e-05, "loss": 1.9578, "step": 10630 }, { "epoch": 0.98, "learning_rate": 4.0786592401453974e-05, "loss": 1.9503, "step": 10640 }, { "epoch": 0.98, "learning_rate": 4.076667489573826e-05, "loss": 1.9405, "step": 10650 }, { "epoch": 0.98, "learning_rate": 4.0746740759371126e-05, "loss": 1.9518, "step": 10660 }, { "epoch": 0.98, "learning_rate": 4.0726790013379045e-05, "loss": 1.9578, "step": 10670 }, { "epoch": 0.98, "learning_rate": 4.0706822678806025e-05, "loss": 1.9839, "step": 10680 }, { "epoch": 0.98, "learning_rate": 4.068683877671354e-05, "loss": 1.9775, "step": 10690 }, { "epoch": 0.98, "learning_rate": 4.066683832818059e-05, "loss": 1.9807, "step": 10700 }, { "epoch": 0.99, "learning_rate": 4.064682135430359e-05, "loss": 1.9304, "step": 10710 }, { "epoch": 0.99, "learning_rate": 4.062678787619639e-05, "loss": 1.9982, "step": 10720 }, { "epoch": 0.99, "learning_rate": 4.0606737914990265e-05, "loss": 1.953, "step": 10730 }, { "epoch": 0.99, "learning_rate": 4.058667149183387e-05, "loss": 1.9875, "step": 10740 }, { "epoch": 0.99, "learning_rate": 4.056658862789321e-05, "loss": 1.9056, "step": 10750 }, { "epoch": 0.99, "learning_rate": 4.054648934435165e-05, "loss": 1.9112, "step": 10760 }, { "epoch": 0.99, "learning_rate": 4.052637366240987e-05, "loss": 1.9296, "step": 10770 }, { "epoch": 0.99, "learning_rate": 4.050624160328583e-05, "loss": 1.9095, "step": 10780 }, { "epoch": 0.99, "learning_rate": 4.0486093188214786e-05, "loss": 1.9317, "step": 10790 }, { "epoch": 0.99, "learning_rate": 4.046592843844924e-05, "loss": 1.963, "step": 10800 }, { "epoch": 1.0, "learning_rate": 4.044574737525892e-05, "loss": 1.9589, "step": 10810 }, { "epoch": 1.0, "learning_rate": 4.042555001993077e-05, "loss": 1.9032, "step": 10820 }, { "epoch": 1.0, "learning_rate": 4.04053363937689e-05, "loss": 1.9798, "step": 10830 }, { "epoch": 1.0, "learning_rate": 4.0385106518094604e-05, "loss": 1.9335, "step": 10840 }, { "epoch": 1.0, "learning_rate": 4.036486041424631e-05, "loss": 1.947, "step": 10850 }, { "epoch": 1.0, "learning_rate": 4.0344598103579554e-05, "loss": 1.9384, "step": 10860 }, { "epoch": 1.0, "learning_rate": 4.032431960746697e-05, "loss": 1.8553, "step": 10870 }, { "epoch": 1.0, "learning_rate": 4.030402494729829e-05, "loss": 1.7901, "step": 10880 }, { "epoch": 1.0, "learning_rate": 4.0283714144480246e-05, "loss": 1.6902, "step": 10890 }, { "epoch": 1.0, "learning_rate": 4.0263387220436634e-05, "loss": 1.7611, "step": 10900 }, { "epoch": 1.0, "learning_rate": 4.0243044196608254e-05, "loss": 1.7317, "step": 10910 }, { "epoch": 1.01, "learning_rate": 4.022268509445287e-05, "loss": 1.7445, "step": 10920 }, { "epoch": 1.01, "learning_rate": 4.0202309935445214e-05, "loss": 1.7666, "step": 10930 }, { "epoch": 1.01, "learning_rate": 4.0181918741076964e-05, "loss": 1.7369, "step": 10940 }, { "epoch": 1.01, "learning_rate": 4.01615115328567e-05, "loss": 1.7564, "step": 10950 }, { "epoch": 1.01, "learning_rate": 4.014108833230989e-05, "loss": 1.8109, "step": 10960 }, { "epoch": 1.01, "learning_rate": 4.0120649160978894e-05, "loss": 1.7499, "step": 10970 }, { "epoch": 1.01, "learning_rate": 4.0100194040422875e-05, "loss": 1.6888, "step": 10980 }, { "epoch": 1.01, "learning_rate": 4.007972299221788e-05, "loss": 1.744, "step": 10990 }, { "epoch": 1.01, "learning_rate": 4.0059236037956694e-05, "loss": 1.6909, "step": 11000 }, { "epoch": 1.01, "learning_rate": 4.0038733199248926e-05, "loss": 1.7606, "step": 11010 }, { "epoch": 1.01, "learning_rate": 4.0018214497720904e-05, "loss": 1.7788, "step": 11020 }, { "epoch": 1.02, "learning_rate": 3.9997679955015724e-05, "loss": 1.7223, "step": 11030 }, { "epoch": 1.02, "learning_rate": 3.997712959279316e-05, "loss": 1.7223, "step": 11040 }, { "epoch": 1.02, "learning_rate": 3.995656343272969e-05, "loss": 1.7232, "step": 11050 }, { "epoch": 1.02, "learning_rate": 3.9935981496518455e-05, "loss": 1.7261, "step": 11060 }, { "epoch": 1.02, "learning_rate": 3.9915383805869234e-05, "loss": 1.7184, "step": 11070 }, { "epoch": 1.02, "learning_rate": 3.9894770382508404e-05, "loss": 1.735, "step": 11080 }, { "epoch": 1.02, "learning_rate": 3.987414124817898e-05, "loss": 1.7935, "step": 11090 }, { "epoch": 1.02, "learning_rate": 3.985349642464051e-05, "loss": 1.7636, "step": 11100 }, { "epoch": 1.02, "learning_rate": 3.983283593366911e-05, "loss": 1.7065, "step": 11110 }, { "epoch": 1.02, "learning_rate": 3.981215979705741e-05, "loss": 1.7261, "step": 11120 }, { "epoch": 1.02, "learning_rate": 3.9791468036614564e-05, "loss": 1.6855, "step": 11130 }, { "epoch": 1.03, "learning_rate": 3.977076067416617e-05, "loss": 1.7156, "step": 11140 }, { "epoch": 1.03, "learning_rate": 3.9750037731554335e-05, "loss": 1.696, "step": 11150 }, { "epoch": 1.03, "learning_rate": 3.9729299230637554e-05, "loss": 1.7236, "step": 11160 }, { "epoch": 1.03, "learning_rate": 3.970854519329073e-05, "loss": 1.7392, "step": 11170 }, { "epoch": 1.03, "learning_rate": 3.96877756414052e-05, "loss": 1.7161, "step": 11180 }, { "epoch": 1.03, "learning_rate": 3.966699059688863e-05, "loss": 1.721, "step": 11190 }, { "epoch": 1.03, "learning_rate": 3.9646190081665034e-05, "loss": 1.7553, "step": 11200 }, { "epoch": 1.03, "learning_rate": 3.9625374117674754e-05, "loss": 1.7701, "step": 11210 }, { "epoch": 1.03, "learning_rate": 3.96045427268744e-05, "loss": 1.7405, "step": 11220 }, { "epoch": 1.03, "learning_rate": 3.95836959312369e-05, "loss": 1.6592, "step": 11230 }, { "epoch": 1.03, "learning_rate": 3.956283375275138e-05, "loss": 1.679, "step": 11240 }, { "epoch": 1.04, "learning_rate": 3.9541956213423234e-05, "loss": 1.7957, "step": 11250 }, { "epoch": 1.04, "learning_rate": 3.9521063335274034e-05, "loss": 1.7162, "step": 11260 }, { "epoch": 1.04, "learning_rate": 3.950015514034153e-05, "loss": 1.7049, "step": 11270 }, { "epoch": 1.04, "learning_rate": 3.947923165067966e-05, "loss": 1.7736, "step": 11280 }, { "epoch": 1.04, "learning_rate": 3.945829288835845e-05, "loss": 1.7064, "step": 11290 }, { "epoch": 1.04, "learning_rate": 3.943733887546407e-05, "loss": 1.7371, "step": 11300 }, { "epoch": 1.04, "learning_rate": 3.941636963409875e-05, "loss": 1.7248, "step": 11310 }, { "epoch": 1.04, "learning_rate": 3.939538518638081e-05, "loss": 1.7147, "step": 11320 }, { "epoch": 1.04, "learning_rate": 3.9374385554444585e-05, "loss": 1.6338, "step": 11330 }, { "epoch": 1.04, "learning_rate": 3.9353370760440443e-05, "loss": 1.7662, "step": 11340 }, { "epoch": 1.04, "learning_rate": 3.933234082653474e-05, "loss": 1.7701, "step": 11350 }, { "epoch": 1.05, "learning_rate": 3.93112957749098e-05, "loss": 1.6648, "step": 11360 }, { "epoch": 1.05, "learning_rate": 3.9290235627763894e-05, "loss": 1.6865, "step": 11370 }, { "epoch": 1.05, "learning_rate": 3.926916040731121e-05, "loss": 1.7795, "step": 11380 }, { "epoch": 1.05, "learning_rate": 3.924807013578185e-05, "loss": 1.6948, "step": 11390 }, { "epoch": 1.05, "learning_rate": 3.9226964835421776e-05, "loss": 1.7114, "step": 11400 }, { "epoch": 1.05, "learning_rate": 3.92058445284928e-05, "loss": 1.6885, "step": 11410 }, { "epoch": 1.05, "learning_rate": 3.918470923727259e-05, "loss": 1.7148, "step": 11420 }, { "epoch": 1.05, "learning_rate": 3.91635589840546e-05, "loss": 1.6794, "step": 11430 }, { "epoch": 1.05, "learning_rate": 3.9142393791148045e-05, "loss": 1.7438, "step": 11440 }, { "epoch": 1.05, "learning_rate": 3.912121368087795e-05, "loss": 1.7616, "step": 11450 }, { "epoch": 1.05, "learning_rate": 3.9100018675585026e-05, "loss": 1.7036, "step": 11460 }, { "epoch": 1.06, "learning_rate": 3.907880879762572e-05, "loss": 1.7406, "step": 11470 }, { "epoch": 1.06, "learning_rate": 3.9057584069372154e-05, "loss": 1.6831, "step": 11480 }, { "epoch": 1.06, "learning_rate": 3.903634451321213e-05, "loss": 1.748, "step": 11490 }, { "epoch": 1.06, "learning_rate": 3.9015090151549086e-05, "loss": 1.7618, "step": 11500 }, { "epoch": 1.06, "learning_rate": 3.8993821006802064e-05, "loss": 1.7399, "step": 11510 }, { "epoch": 1.06, "learning_rate": 3.8972537101405715e-05, "loss": 1.7247, "step": 11520 }, { "epoch": 1.06, "learning_rate": 3.895123845781024e-05, "loss": 1.7591, "step": 11530 }, { "epoch": 1.06, "learning_rate": 3.892992509848141e-05, "loss": 1.7663, "step": 11540 }, { "epoch": 1.06, "learning_rate": 3.890859704590049e-05, "loss": 1.7002, "step": 11550 }, { "epoch": 1.06, "learning_rate": 3.888725432256428e-05, "loss": 1.7155, "step": 11560 }, { "epoch": 1.07, "learning_rate": 3.8865896950985024e-05, "loss": 1.7515, "step": 11570 }, { "epoch": 1.07, "learning_rate": 3.884452495369042e-05, "loss": 1.7961, "step": 11580 }, { "epoch": 1.07, "learning_rate": 3.882313835322362e-05, "loss": 1.7438, "step": 11590 }, { "epoch": 1.07, "learning_rate": 3.8801737172143135e-05, "loss": 1.7161, "step": 11600 }, { "epoch": 1.07, "learning_rate": 3.87803214330229e-05, "loss": 1.7438, "step": 11610 }, { "epoch": 1.07, "learning_rate": 3.875889115845218e-05, "loss": 1.7225, "step": 11620 }, { "epoch": 1.07, "learning_rate": 3.873744637103557e-05, "loss": 1.7508, "step": 11630 }, { "epoch": 1.07, "learning_rate": 3.871598709339299e-05, "loss": 1.7262, "step": 11640 }, { "epoch": 1.07, "learning_rate": 3.8694513348159634e-05, "loss": 1.7067, "step": 11650 }, { "epoch": 1.07, "learning_rate": 3.8673025157985965e-05, "loss": 1.7159, "step": 11660 }, { "epoch": 1.07, "learning_rate": 3.8651522545537656e-05, "loss": 1.8109, "step": 11670 }, { "epoch": 1.08, "learning_rate": 3.8630005533495626e-05, "loss": 1.7665, "step": 11680 }, { "epoch": 1.08, "learning_rate": 3.860847414455596e-05, "loss": 1.7357, "step": 11690 }, { "epoch": 1.08, "learning_rate": 3.8586928401429925e-05, "loss": 1.7369, "step": 11700 }, { "epoch": 1.08, "learning_rate": 3.856536832684392e-05, "loss": 1.7388, "step": 11710 }, { "epoch": 1.08, "learning_rate": 3.8543793943539466e-05, "loss": 1.7187, "step": 11720 }, { "epoch": 1.08, "learning_rate": 3.852220527427315e-05, "loss": 1.7128, "step": 11730 }, { "epoch": 1.08, "learning_rate": 3.8500602341816664e-05, "loss": 1.7041, "step": 11740 }, { "epoch": 1.08, "learning_rate": 3.847898516895673e-05, "loss": 1.7893, "step": 11750 }, { "epoch": 1.08, "learning_rate": 3.845735377849509e-05, "loss": 1.7412, "step": 11760 }, { "epoch": 1.08, "learning_rate": 3.843570819324848e-05, "loss": 1.7506, "step": 11770 }, { "epoch": 1.08, "learning_rate": 3.841404843604861e-05, "loss": 1.7656, "step": 11780 }, { "epoch": 1.09, "learning_rate": 3.839237452974215e-05, "loss": 1.7444, "step": 11790 }, { "epoch": 1.09, "learning_rate": 3.837068649719068e-05, "loss": 1.7171, "step": 11800 }, { "epoch": 1.09, "learning_rate": 3.834898436127069e-05, "loss": 1.722, "step": 11810 }, { "epoch": 1.09, "learning_rate": 3.832726814487353e-05, "loss": 1.6788, "step": 11820 }, { "epoch": 1.09, "learning_rate": 3.830553787090543e-05, "loss": 1.7502, "step": 11830 }, { "epoch": 1.09, "learning_rate": 3.8283793562287415e-05, "loss": 1.7741, "step": 11840 }, { "epoch": 1.09, "learning_rate": 3.8262035241955344e-05, "loss": 1.7264, "step": 11850 }, { "epoch": 1.09, "learning_rate": 3.824026293285983e-05, "loss": 1.7364, "step": 11860 }, { "epoch": 1.09, "learning_rate": 3.821847665796627e-05, "loss": 1.7407, "step": 11870 }, { "epoch": 1.09, "learning_rate": 3.819667644025477e-05, "loss": 1.6924, "step": 11880 }, { "epoch": 1.09, "learning_rate": 3.8174862302720125e-05, "loss": 1.7231, "step": 11890 }, { "epoch": 1.1, "learning_rate": 3.8153034268371864e-05, "loss": 1.711, "step": 11900 }, { "epoch": 1.1, "learning_rate": 3.8131192360234124e-05, "loss": 1.6637, "step": 11910 }, { "epoch": 1.1, "learning_rate": 3.8109336601345725e-05, "loss": 1.7604, "step": 11920 }, { "epoch": 1.1, "learning_rate": 3.8087467014760044e-05, "loss": 1.7419, "step": 11930 }, { "epoch": 1.1, "learning_rate": 3.806558362354509e-05, "loss": 1.6968, "step": 11940 }, { "epoch": 1.1, "learning_rate": 3.80436864507834e-05, "loss": 1.7373, "step": 11950 }, { "epoch": 1.1, "learning_rate": 3.802177551957206e-05, "loss": 1.7715, "step": 11960 }, { "epoch": 1.1, "learning_rate": 3.799985085302266e-05, "loss": 1.7565, "step": 11970 }, { "epoch": 1.1, "learning_rate": 3.797791247426131e-05, "loss": 1.7923, "step": 11980 }, { "epoch": 1.1, "learning_rate": 3.795596040642855e-05, "loss": 1.7154, "step": 11990 }, { "epoch": 1.1, "learning_rate": 3.793399467267935e-05, "loss": 1.7669, "step": 12000 }, { "epoch": 1.11, "learning_rate": 3.791201529618314e-05, "loss": 1.7995, "step": 12010 }, { "epoch": 1.11, "learning_rate": 3.789002230012372e-05, "loss": 1.6891, "step": 12020 }, { "epoch": 1.11, "learning_rate": 3.7868015707699224e-05, "loss": 1.7537, "step": 12030 }, { "epoch": 1.11, "learning_rate": 3.7845995542122157e-05, "loss": 1.7582, "step": 12040 }, { "epoch": 1.11, "learning_rate": 3.782396182661936e-05, "loss": 1.75, "step": 12050 }, { "epoch": 1.11, "learning_rate": 3.7801914584431917e-05, "loss": 1.7315, "step": 12060 }, { "epoch": 1.11, "learning_rate": 3.777985383881523e-05, "loss": 1.768, "step": 12070 }, { "epoch": 1.11, "learning_rate": 3.77577796130389e-05, "loss": 1.7497, "step": 12080 }, { "epoch": 1.11, "learning_rate": 3.7735691930386784e-05, "loss": 1.7822, "step": 12090 }, { "epoch": 1.11, "learning_rate": 3.771359081415692e-05, "loss": 1.7003, "step": 12100 }, { "epoch": 1.11, "learning_rate": 3.76914762876615e-05, "loss": 1.7043, "step": 12110 }, { "epoch": 1.12, "learning_rate": 3.7669348374226874e-05, "loss": 1.7755, "step": 12120 }, { "epoch": 1.12, "learning_rate": 3.764720709719353e-05, "loss": 1.7836, "step": 12130 }, { "epoch": 1.12, "learning_rate": 3.7625052479916015e-05, "loss": 1.7534, "step": 12140 }, { "epoch": 1.12, "learning_rate": 3.760288454576298e-05, "loss": 1.8054, "step": 12150 }, { "epoch": 1.12, "learning_rate": 3.758070331811711e-05, "loss": 1.7523, "step": 12160 }, { "epoch": 1.12, "learning_rate": 3.7558508820375104e-05, "loss": 1.7637, "step": 12170 }, { "epoch": 1.12, "learning_rate": 3.7536301075947655e-05, "loss": 1.7442, "step": 12180 }, { "epoch": 1.12, "learning_rate": 3.751408010825947e-05, "loss": 1.8079, "step": 12190 }, { "epoch": 1.12, "learning_rate": 3.749184594074915e-05, "loss": 1.6611, "step": 12200 }, { "epoch": 1.12, "learning_rate": 3.7469598596869246e-05, "loss": 1.7599, "step": 12210 }, { "epoch": 1.12, "learning_rate": 3.744733810008621e-05, "loss": 1.736, "step": 12220 }, { "epoch": 1.13, "learning_rate": 3.742506447388036e-05, "loss": 1.7419, "step": 12230 }, { "epoch": 1.13, "learning_rate": 3.7402777741745855e-05, "loss": 1.7393, "step": 12240 }, { "epoch": 1.13, "learning_rate": 3.738047792719069e-05, "loss": 1.7667, "step": 12250 }, { "epoch": 1.13, "learning_rate": 3.735816505373667e-05, "loss": 1.7477, "step": 12260 }, { "epoch": 1.13, "learning_rate": 3.733583914491935e-05, "loss": 1.6918, "step": 12270 }, { "epoch": 1.13, "learning_rate": 3.731350022428805e-05, "loss": 1.734, "step": 12280 }, { "epoch": 1.13, "learning_rate": 3.7291148315405814e-05, "loss": 1.7485, "step": 12290 }, { "epoch": 1.13, "learning_rate": 3.726878344184936e-05, "loss": 1.7433, "step": 12300 }, { "epoch": 1.13, "learning_rate": 3.7246405627209144e-05, "loss": 1.6982, "step": 12310 }, { "epoch": 1.13, "learning_rate": 3.722401489508919e-05, "loss": 1.7701, "step": 12320 }, { "epoch": 1.13, "learning_rate": 3.7201611269107216e-05, "loss": 1.7296, "step": 12330 }, { "epoch": 1.14, "learning_rate": 3.717919477289451e-05, "loss": 1.7485, "step": 12340 }, { "epoch": 1.14, "learning_rate": 3.715676543009593e-05, "loss": 1.7824, "step": 12350 }, { "epoch": 1.14, "learning_rate": 3.71343232643699e-05, "loss": 1.718, "step": 12360 }, { "epoch": 1.14, "learning_rate": 3.711186829938836e-05, "loss": 1.7495, "step": 12370 }, { "epoch": 1.14, "learning_rate": 3.708940055883676e-05, "loss": 1.7514, "step": 12380 }, { "epoch": 1.14, "learning_rate": 3.706692006641402e-05, "loss": 1.7232, "step": 12390 }, { "epoch": 1.14, "learning_rate": 3.7044426845832503e-05, "loss": 1.7434, "step": 12400 }, { "epoch": 1.14, "learning_rate": 3.702192092081801e-05, "loss": 1.7421, "step": 12410 }, { "epoch": 1.14, "learning_rate": 3.699940231510973e-05, "loss": 1.7124, "step": 12420 }, { "epoch": 1.14, "learning_rate": 3.697687105246025e-05, "loss": 1.6403, "step": 12430 }, { "epoch": 1.15, "learning_rate": 3.695432715663548e-05, "loss": 1.7148, "step": 12440 }, { "epoch": 1.15, "learning_rate": 3.693177065141467e-05, "loss": 1.7811, "step": 12450 }, { "epoch": 1.15, "learning_rate": 3.690920156059037e-05, "loss": 1.7317, "step": 12460 }, { "epoch": 1.15, "learning_rate": 3.6886619907968415e-05, "loss": 1.7576, "step": 12470 }, { "epoch": 1.15, "learning_rate": 3.686402571736785e-05, "loss": 1.7974, "step": 12480 }, { "epoch": 1.15, "learning_rate": 3.6841419012620996e-05, "loss": 1.7364, "step": 12490 }, { "epoch": 1.15, "learning_rate": 3.6818799817573356e-05, "loss": 1.7447, "step": 12500 }, { "epoch": 1.15, "learning_rate": 3.6796168156083585e-05, "loss": 1.7559, "step": 12510 }, { "epoch": 1.15, "learning_rate": 3.6773524052023525e-05, "loss": 1.7591, "step": 12520 }, { "epoch": 1.15, "learning_rate": 3.675086752927811e-05, "loss": 1.7323, "step": 12530 }, { "epoch": 1.15, "learning_rate": 3.672819861174539e-05, "loss": 1.7043, "step": 12540 }, { "epoch": 1.16, "learning_rate": 3.670551732333649e-05, "loss": 1.7451, "step": 12550 }, { "epoch": 1.16, "learning_rate": 3.668282368797558e-05, "loss": 1.7514, "step": 12560 }, { "epoch": 1.16, "learning_rate": 3.666011772959984e-05, "loss": 1.7177, "step": 12570 }, { "epoch": 1.16, "learning_rate": 3.6637399472159475e-05, "loss": 1.7111, "step": 12580 }, { "epoch": 1.16, "learning_rate": 3.661466893961764e-05, "loss": 1.7254, "step": 12590 }, { "epoch": 1.16, "learning_rate": 3.659192615595045e-05, "loss": 1.7595, "step": 12600 }, { "epoch": 1.16, "learning_rate": 3.6569171145146945e-05, "loss": 1.7658, "step": 12610 }, { "epoch": 1.16, "learning_rate": 3.654640393120906e-05, "loss": 1.7179, "step": 12620 }, { "epoch": 1.16, "learning_rate": 3.652362453815158e-05, "loss": 1.7306, "step": 12630 }, { "epoch": 1.16, "learning_rate": 3.650083299000217e-05, "loss": 1.765, "step": 12640 }, { "epoch": 1.16, "learning_rate": 3.6478029310801296e-05, "loss": 1.7208, "step": 12650 }, { "epoch": 1.17, "learning_rate": 3.6455213524602224e-05, "loss": 1.7199, "step": 12660 }, { "epoch": 1.17, "learning_rate": 3.6432385655471004e-05, "loss": 1.7401, "step": 12670 }, { "epoch": 1.17, "learning_rate": 3.64095457274864e-05, "loss": 1.759, "step": 12680 }, { "epoch": 1.17, "learning_rate": 3.638669376473992e-05, "loss": 1.7199, "step": 12690 }, { "epoch": 1.17, "learning_rate": 3.6363829791335766e-05, "loss": 1.7874, "step": 12700 }, { "epoch": 1.17, "learning_rate": 3.6340953831390805e-05, "loss": 1.7517, "step": 12710 }, { "epoch": 1.17, "learning_rate": 3.6318065909034524e-05, "loss": 1.7877, "step": 12720 }, { "epoch": 1.17, "learning_rate": 3.6295166048409076e-05, "loss": 1.7277, "step": 12730 }, { "epoch": 1.17, "learning_rate": 3.627225427366918e-05, "loss": 1.7579, "step": 12740 }, { "epoch": 1.17, "learning_rate": 3.624933060898209e-05, "loss": 1.7169, "step": 12750 }, { "epoch": 1.17, "learning_rate": 3.622639507852767e-05, "loss": 1.7919, "step": 12760 }, { "epoch": 1.18, "learning_rate": 3.6203447706498236e-05, "loss": 1.7341, "step": 12770 }, { "epoch": 1.18, "learning_rate": 3.618048851709864e-05, "loss": 1.7126, "step": 12780 }, { "epoch": 1.18, "learning_rate": 3.6157517534546176e-05, "loss": 1.8168, "step": 12790 }, { "epoch": 1.18, "learning_rate": 3.613453478307058e-05, "loss": 1.7444, "step": 12800 }, { "epoch": 1.18, "learning_rate": 3.6111540286914e-05, "loss": 1.7671, "step": 12810 }, { "epoch": 1.18, "learning_rate": 3.608853407033098e-05, "loss": 1.7162, "step": 12820 }, { "epoch": 1.18, "learning_rate": 3.606551615758843e-05, "loss": 1.7603, "step": 12830 }, { "epoch": 1.18, "learning_rate": 3.6042486572965585e-05, "loss": 1.7355, "step": 12840 }, { "epoch": 1.18, "learning_rate": 3.601944534075399e-05, "loss": 1.7453, "step": 12850 }, { "epoch": 1.18, "learning_rate": 3.599639248525749e-05, "loss": 1.7289, "step": 12860 }, { "epoch": 1.18, "learning_rate": 3.597332803079217e-05, "loss": 1.7713, "step": 12870 }, { "epoch": 1.19, "learning_rate": 3.595025200168638e-05, "loss": 1.7597, "step": 12880 }, { "epoch": 1.19, "learning_rate": 3.592716442228066e-05, "loss": 1.7228, "step": 12890 }, { "epoch": 1.19, "learning_rate": 3.590406531692771e-05, "loss": 1.7271, "step": 12900 }, { "epoch": 1.19, "learning_rate": 3.5880954709992434e-05, "loss": 1.7817, "step": 12910 }, { "epoch": 1.19, "learning_rate": 3.585783262585185e-05, "loss": 1.7906, "step": 12920 }, { "epoch": 1.19, "learning_rate": 3.583469908889506e-05, "loss": 1.7294, "step": 12930 }, { "epoch": 1.19, "learning_rate": 3.581155412352327e-05, "loss": 1.7469, "step": 12940 }, { "epoch": 1.19, "learning_rate": 3.578839775414974e-05, "loss": 1.7844, "step": 12950 }, { "epoch": 1.19, "learning_rate": 3.5765230005199755e-05, "loss": 1.7146, "step": 12960 }, { "epoch": 1.19, "learning_rate": 3.5742050901110595e-05, "loss": 1.6974, "step": 12970 }, { "epoch": 1.19, "learning_rate": 3.571886046633153e-05, "loss": 1.7459, "step": 12980 }, { "epoch": 1.2, "learning_rate": 3.569565872532376e-05, "loss": 1.7128, "step": 12990 }, { "epoch": 1.2, "learning_rate": 3.567244570256045e-05, "loss": 1.7845, "step": 13000 }, { "epoch": 1.2, "learning_rate": 3.5649221422526626e-05, "loss": 1.7128, "step": 13010 }, { "epoch": 1.2, "learning_rate": 3.5625985909719206e-05, "loss": 1.7432, "step": 13020 }, { "epoch": 1.2, "learning_rate": 3.560273918864696e-05, "loss": 1.7179, "step": 13030 }, { "epoch": 1.2, "learning_rate": 3.5579481283830476e-05, "loss": 1.7193, "step": 13040 }, { "epoch": 1.2, "learning_rate": 3.555621221980212e-05, "loss": 1.738, "step": 13050 }, { "epoch": 1.2, "learning_rate": 3.553293202110607e-05, "loss": 1.7414, "step": 13060 }, { "epoch": 1.2, "learning_rate": 3.55096407122982e-05, "loss": 1.7192, "step": 13070 }, { "epoch": 1.2, "learning_rate": 3.5486338317946146e-05, "loss": 1.7194, "step": 13080 }, { "epoch": 1.2, "learning_rate": 3.546302486262921e-05, "loss": 1.7501, "step": 13090 }, { "epoch": 1.21, "learning_rate": 3.543970037093838e-05, "loss": 1.7458, "step": 13100 }, { "epoch": 1.21, "learning_rate": 3.541636486747627e-05, "loss": 1.7757, "step": 13110 }, { "epoch": 1.21, "learning_rate": 3.5393018376857095e-05, "loss": 1.6973, "step": 13120 }, { "epoch": 1.21, "learning_rate": 3.536966092370671e-05, "loss": 1.7336, "step": 13130 }, { "epoch": 1.21, "learning_rate": 3.5346292532662465e-05, "loss": 1.6863, "step": 13140 }, { "epoch": 1.21, "learning_rate": 3.532291322837331e-05, "loss": 1.7283, "step": 13150 }, { "epoch": 1.21, "learning_rate": 3.529952303549968e-05, "loss": 1.7145, "step": 13160 }, { "epoch": 1.21, "learning_rate": 3.5276121978713485e-05, "loss": 1.7102, "step": 13170 }, { "epoch": 1.21, "learning_rate": 3.5252710082698106e-05, "loss": 1.7549, "step": 13180 }, { "epoch": 1.21, "learning_rate": 3.522928737214836e-05, "loss": 1.7406, "step": 13190 }, { "epoch": 1.22, "learning_rate": 3.520585387177046e-05, "loss": 1.6968, "step": 13200 }, { "epoch": 1.22, "learning_rate": 3.518240960628202e-05, "loss": 1.71, "step": 13210 }, { "epoch": 1.22, "learning_rate": 3.515895460041199e-05, "loss": 1.7541, "step": 13220 }, { "epoch": 1.22, "learning_rate": 3.513548887890064e-05, "loss": 1.7441, "step": 13230 }, { "epoch": 1.22, "learning_rate": 3.511201246649958e-05, "loss": 1.7309, "step": 13240 }, { "epoch": 1.22, "learning_rate": 3.5088525387971654e-05, "loss": 1.7774, "step": 13250 }, { "epoch": 1.22, "learning_rate": 3.506502766809099e-05, "loss": 1.7986, "step": 13260 }, { "epoch": 1.22, "learning_rate": 3.504151933164292e-05, "loss": 1.746, "step": 13270 }, { "epoch": 1.22, "learning_rate": 3.5018000403424e-05, "loss": 1.7003, "step": 13280 }, { "epoch": 1.22, "learning_rate": 3.499447090824191e-05, "loss": 1.7147, "step": 13290 }, { "epoch": 1.22, "learning_rate": 3.497093087091553e-05, "loss": 1.7916, "step": 13300 }, { "epoch": 1.23, "learning_rate": 3.494738031627483e-05, "loss": 1.7467, "step": 13310 }, { "epoch": 1.23, "learning_rate": 3.4923819269160865e-05, "loss": 1.708, "step": 13320 }, { "epoch": 1.23, "learning_rate": 3.4900247754425795e-05, "loss": 1.7802, "step": 13330 }, { "epoch": 1.23, "learning_rate": 3.487666579693279e-05, "loss": 1.7484, "step": 13340 }, { "epoch": 1.23, "learning_rate": 3.485307342155602e-05, "loss": 1.723, "step": 13350 }, { "epoch": 1.23, "learning_rate": 3.48294706531807e-05, "loss": 1.7259, "step": 13360 }, { "epoch": 1.23, "learning_rate": 3.4805857516702967e-05, "loss": 1.7603, "step": 13370 }, { "epoch": 1.23, "learning_rate": 3.478223403702989e-05, "loss": 1.7606, "step": 13380 }, { "epoch": 1.23, "learning_rate": 3.4758600239079464e-05, "loss": 1.7501, "step": 13390 }, { "epoch": 1.23, "learning_rate": 3.473495614778056e-05, "loss": 1.6901, "step": 13400 }, { "epoch": 1.23, "learning_rate": 3.471130178807292e-05, "loss": 1.7421, "step": 13410 }, { "epoch": 1.24, "learning_rate": 3.4687637184907093e-05, "loss": 1.7112, "step": 13420 }, { "epoch": 1.24, "learning_rate": 3.466396236324447e-05, "loss": 1.7596, "step": 13430 }, { "epoch": 1.24, "learning_rate": 3.464027734805716e-05, "loss": 1.7644, "step": 13440 }, { "epoch": 1.24, "learning_rate": 3.46165821643281e-05, "loss": 1.6836, "step": 13450 }, { "epoch": 1.24, "learning_rate": 3.459287683705089e-05, "loss": 1.7295, "step": 13460 }, { "epoch": 1.24, "learning_rate": 3.456916139122985e-05, "loss": 1.7405, "step": 13470 }, { "epoch": 1.24, "learning_rate": 3.4545435851879994e-05, "loss": 1.7845, "step": 13480 }, { "epoch": 1.24, "learning_rate": 3.452170024402696e-05, "loss": 1.6984, "step": 13490 }, { "epoch": 1.24, "learning_rate": 3.4497954592707e-05, "loss": 1.6895, "step": 13500 }, { "epoch": 1.24, "learning_rate": 3.447419892296698e-05, "loss": 1.7636, "step": 13510 }, { "epoch": 1.24, "learning_rate": 3.445043325986432e-05, "loss": 1.7865, "step": 13520 }, { "epoch": 1.25, "learning_rate": 3.4426657628466994e-05, "loss": 1.6962, "step": 13530 }, { "epoch": 1.25, "learning_rate": 3.440287205385347e-05, "loss": 1.7675, "step": 13540 }, { "epoch": 1.25, "learning_rate": 3.4379076561112726e-05, "loss": 1.7717, "step": 13550 }, { "epoch": 1.25, "learning_rate": 3.435527117534418e-05, "loss": 1.792, "step": 13560 }, { "epoch": 1.25, "learning_rate": 3.4331455921657714e-05, "loss": 1.7312, "step": 13570 }, { "epoch": 1.25, "learning_rate": 3.430763082517359e-05, "loss": 1.7415, "step": 13580 }, { "epoch": 1.25, "learning_rate": 3.428379591102245e-05, "loss": 1.7216, "step": 13590 }, { "epoch": 1.25, "learning_rate": 3.425995120434533e-05, "loss": 1.7235, "step": 13600 }, { "epoch": 1.25, "learning_rate": 3.423609673029356e-05, "loss": 1.7722, "step": 13610 }, { "epoch": 1.25, "learning_rate": 3.421223251402878e-05, "loss": 1.7393, "step": 13620 }, { "epoch": 1.25, "learning_rate": 3.4188358580722913e-05, "loss": 1.6786, "step": 13630 }, { "epoch": 1.26, "learning_rate": 3.4164474955558126e-05, "loss": 1.7354, "step": 13640 }, { "epoch": 1.26, "learning_rate": 3.414058166372681e-05, "loss": 1.822, "step": 13650 }, { "epoch": 1.26, "learning_rate": 3.411667873043156e-05, "loss": 1.7312, "step": 13660 }, { "epoch": 1.26, "learning_rate": 3.409276618088512e-05, "loss": 1.7716, "step": 13670 }, { "epoch": 1.26, "learning_rate": 3.4068844040310395e-05, "loss": 1.7486, "step": 13680 }, { "epoch": 1.26, "learning_rate": 3.404491233394041e-05, "loss": 1.7807, "step": 13690 }, { "epoch": 1.26, "learning_rate": 3.402097108701827e-05, "loss": 1.7438, "step": 13700 }, { "epoch": 1.26, "learning_rate": 3.399702032479713e-05, "loss": 1.7927, "step": 13710 }, { "epoch": 1.26, "learning_rate": 3.397306007254022e-05, "loss": 1.7255, "step": 13720 }, { "epoch": 1.26, "learning_rate": 3.3949090355520744e-05, "loss": 1.7312, "step": 13730 }, { "epoch": 1.26, "learning_rate": 3.3925111199021905e-05, "loss": 1.7336, "step": 13740 }, { "epoch": 1.27, "learning_rate": 3.390112262833687e-05, "loss": 1.68, "step": 13750 }, { "epoch": 1.27, "learning_rate": 3.387712466876872e-05, "loss": 1.7852, "step": 13760 }, { "epoch": 1.27, "learning_rate": 3.3853117345630435e-05, "loss": 1.7407, "step": 13770 }, { "epoch": 1.27, "learning_rate": 3.38291006842449e-05, "loss": 1.745, "step": 13780 }, { "epoch": 1.27, "learning_rate": 3.380507470994483e-05, "loss": 1.6988, "step": 13790 }, { "epoch": 1.27, "learning_rate": 3.3781039448072745e-05, "loss": 1.7042, "step": 13800 }, { "epoch": 1.27, "learning_rate": 3.375699492398101e-05, "loss": 1.7324, "step": 13810 }, { "epoch": 1.27, "learning_rate": 3.373294116303172e-05, "loss": 1.723, "step": 13820 }, { "epoch": 1.27, "learning_rate": 3.370887819059672e-05, "loss": 1.7229, "step": 13830 }, { "epoch": 1.27, "learning_rate": 3.368480603205759e-05, "loss": 1.7887, "step": 13840 }, { "epoch": 1.27, "learning_rate": 3.3660724712805574e-05, "loss": 1.7546, "step": 13850 }, { "epoch": 1.28, "learning_rate": 3.36366342582416e-05, "loss": 1.7024, "step": 13860 }, { "epoch": 1.28, "learning_rate": 3.361253469377621e-05, "loss": 1.7688, "step": 13870 }, { "epoch": 1.28, "learning_rate": 3.358842604482959e-05, "loss": 1.7706, "step": 13880 }, { "epoch": 1.28, "learning_rate": 3.356430833683145e-05, "loss": 1.7546, "step": 13890 }, { "epoch": 1.28, "learning_rate": 3.354018159522113e-05, "loss": 1.7531, "step": 13900 }, { "epoch": 1.28, "learning_rate": 3.3516045845447436e-05, "loss": 1.7406, "step": 13910 }, { "epoch": 1.28, "learning_rate": 3.349190111296871e-05, "loss": 1.7214, "step": 13920 }, { "epoch": 1.28, "learning_rate": 3.3467747423252755e-05, "loss": 1.6971, "step": 13930 }, { "epoch": 1.28, "learning_rate": 3.344358480177682e-05, "loss": 1.7491, "step": 13940 }, { "epoch": 1.28, "learning_rate": 3.341941327402759e-05, "loss": 1.7139, "step": 13950 }, { "epoch": 1.29, "learning_rate": 3.3395232865501134e-05, "loss": 1.7092, "step": 13960 }, { "epoch": 1.29, "learning_rate": 3.337104360170289e-05, "loss": 1.6899, "step": 13970 }, { "epoch": 1.29, "learning_rate": 3.3346845508147635e-05, "loss": 1.707, "step": 13980 }, { "epoch": 1.29, "learning_rate": 3.3322638610359456e-05, "loss": 1.7223, "step": 13990 }, { "epoch": 1.29, "learning_rate": 3.329842293387174e-05, "loss": 1.7089, "step": 14000 }, { "epoch": 1.29, "learning_rate": 3.3274198504227114e-05, "loss": 1.7615, "step": 14010 }, { "epoch": 1.29, "learning_rate": 3.324996534697747e-05, "loss": 1.7773, "step": 14020 }, { "epoch": 1.29, "learning_rate": 3.322572348768387e-05, "loss": 1.7031, "step": 14030 }, { "epoch": 1.29, "learning_rate": 3.3201472951916565e-05, "loss": 1.7099, "step": 14040 }, { "epoch": 1.29, "learning_rate": 3.317721376525497e-05, "loss": 1.7019, "step": 14050 }, { "epoch": 1.29, "learning_rate": 3.3152945953287615e-05, "loss": 1.69, "step": 14060 }, { "epoch": 1.3, "learning_rate": 3.312866954161214e-05, "loss": 1.7245, "step": 14070 }, { "epoch": 1.3, "learning_rate": 3.3104384555835235e-05, "loss": 1.722, "step": 14080 }, { "epoch": 1.3, "learning_rate": 3.308009102157265e-05, "loss": 1.7066, "step": 14090 }, { "epoch": 1.3, "learning_rate": 3.3055788964449146e-05, "loss": 1.7519, "step": 14100 }, { "epoch": 1.3, "learning_rate": 3.303147841009848e-05, "loss": 1.7335, "step": 14110 }, { "epoch": 1.3, "learning_rate": 3.300715938416337e-05, "loss": 1.7178, "step": 14120 }, { "epoch": 1.3, "learning_rate": 3.298283191229545e-05, "loss": 1.7545, "step": 14130 }, { "epoch": 1.3, "learning_rate": 3.295849602015529e-05, "loss": 1.7403, "step": 14140 }, { "epoch": 1.3, "learning_rate": 3.293415173341235e-05, "loss": 1.7352, "step": 14150 }, { "epoch": 1.3, "learning_rate": 3.29097990777449e-05, "loss": 1.7306, "step": 14160 }, { "epoch": 1.3, "learning_rate": 3.288543807884007e-05, "loss": 1.7165, "step": 14170 }, { "epoch": 1.31, "learning_rate": 3.2861068762393785e-05, "loss": 1.759, "step": 14180 }, { "epoch": 1.31, "learning_rate": 3.283669115411075e-05, "loss": 1.697, "step": 14190 }, { "epoch": 1.31, "learning_rate": 3.2812305279704406e-05, "loss": 1.6693, "step": 14200 }, { "epoch": 1.31, "learning_rate": 3.27879111648969e-05, "loss": 1.7212, "step": 14210 }, { "epoch": 1.31, "learning_rate": 3.276350883541911e-05, "loss": 1.7274, "step": 14220 }, { "epoch": 1.31, "learning_rate": 3.273909831701054e-05, "loss": 1.7398, "step": 14230 }, { "epoch": 1.31, "learning_rate": 3.2714679635419346e-05, "loss": 1.6926, "step": 14240 }, { "epoch": 1.31, "learning_rate": 3.2690252816402315e-05, "loss": 1.7001, "step": 14250 }, { "epoch": 1.31, "learning_rate": 3.266581788572478e-05, "loss": 1.6896, "step": 14260 }, { "epoch": 1.31, "learning_rate": 3.264137486916064e-05, "loss": 1.717, "step": 14270 }, { "epoch": 1.31, "learning_rate": 3.261692379249235e-05, "loss": 1.7356, "step": 14280 }, { "epoch": 1.32, "learning_rate": 3.2592464681510856e-05, "loss": 1.7755, "step": 14290 }, { "epoch": 1.32, "learning_rate": 3.256799756201554e-05, "loss": 1.7329, "step": 14300 }, { "epoch": 1.32, "learning_rate": 3.2543522459814286e-05, "loss": 1.7051, "step": 14310 }, { "epoch": 1.32, "learning_rate": 3.2519039400723364e-05, "loss": 1.7302, "step": 14320 }, { "epoch": 1.32, "learning_rate": 3.2494548410567444e-05, "loss": 1.726, "step": 14330 }, { "epoch": 1.32, "learning_rate": 3.2470049515179574e-05, "loss": 1.7719, "step": 14340 }, { "epoch": 1.32, "learning_rate": 3.244554274040112e-05, "loss": 1.7409, "step": 14350 }, { "epoch": 1.32, "learning_rate": 3.242102811208179e-05, "loss": 1.7287, "step": 14360 }, { "epoch": 1.32, "learning_rate": 3.239650565607953e-05, "loss": 1.7083, "step": 14370 }, { "epoch": 1.32, "learning_rate": 3.2371975398260586e-05, "loss": 1.6905, "step": 14380 }, { "epoch": 1.32, "learning_rate": 3.234743736449941e-05, "loss": 1.784, "step": 14390 }, { "epoch": 1.33, "learning_rate": 3.2322891580678664e-05, "loss": 1.7381, "step": 14400 }, { "epoch": 1.33, "learning_rate": 3.2298338072689175e-05, "loss": 1.7368, "step": 14410 }, { "epoch": 1.33, "learning_rate": 3.227377686642994e-05, "loss": 1.7461, "step": 14420 }, { "epoch": 1.33, "learning_rate": 3.224920798780805e-05, "loss": 1.7467, "step": 14430 }, { "epoch": 1.33, "learning_rate": 3.222463146273871e-05, "loss": 1.6928, "step": 14440 }, { "epoch": 1.33, "learning_rate": 3.220004731714517e-05, "loss": 1.8042, "step": 14450 }, { "epoch": 1.33, "learning_rate": 3.217545557695872e-05, "loss": 1.7326, "step": 14460 }, { "epoch": 1.33, "learning_rate": 3.21508562681187e-05, "loss": 1.6777, "step": 14470 }, { "epoch": 1.33, "learning_rate": 3.2126249416572374e-05, "loss": 1.7353, "step": 14480 }, { "epoch": 1.33, "learning_rate": 3.210163504827501e-05, "loss": 1.7877, "step": 14490 }, { "epoch": 1.33, "learning_rate": 3.2077013189189776e-05, "loss": 1.7386, "step": 14500 }, { "epoch": 1.34, "learning_rate": 3.2052383865287746e-05, "loss": 1.7633, "step": 14510 }, { "epoch": 1.34, "learning_rate": 3.202774710254788e-05, "loss": 1.7008, "step": 14520 }, { "epoch": 1.34, "learning_rate": 3.200310292695699e-05, "loss": 1.7251, "step": 14530 }, { "epoch": 1.34, "learning_rate": 3.1978451364509656e-05, "loss": 1.7456, "step": 14540 }, { "epoch": 1.34, "learning_rate": 3.195379244120833e-05, "loss": 1.7846, "step": 14550 }, { "epoch": 1.34, "learning_rate": 3.192912618306316e-05, "loss": 1.7146, "step": 14560 }, { "epoch": 1.34, "learning_rate": 3.190445261609207e-05, "loss": 1.7629, "step": 14570 }, { "epoch": 1.34, "learning_rate": 3.1879771766320675e-05, "loss": 1.7201, "step": 14580 }, { "epoch": 1.34, "learning_rate": 3.185508365978227e-05, "loss": 1.8089, "step": 14590 }, { "epoch": 1.34, "learning_rate": 3.1830388322517823e-05, "loss": 1.7033, "step": 14600 }, { "epoch": 1.34, "learning_rate": 3.180568578057591e-05, "loss": 1.7651, "step": 14610 }, { "epoch": 1.35, "learning_rate": 3.1780976060012725e-05, "loss": 1.7073, "step": 14620 }, { "epoch": 1.35, "learning_rate": 3.1756259186892e-05, "loss": 1.7383, "step": 14630 }, { "epoch": 1.35, "learning_rate": 3.1731535187285065e-05, "loss": 1.7289, "step": 14640 }, { "epoch": 1.35, "learning_rate": 3.1706804087270716e-05, "loss": 1.7578, "step": 14650 }, { "epoch": 1.35, "learning_rate": 3.168206591293525e-05, "loss": 1.7324, "step": 14660 }, { "epoch": 1.35, "learning_rate": 3.165732069037246e-05, "loss": 1.7191, "step": 14670 }, { "epoch": 1.35, "learning_rate": 3.1632568445683534e-05, "loss": 1.7847, "step": 14680 }, { "epoch": 1.35, "learning_rate": 3.1607809204977085e-05, "loss": 1.7358, "step": 14690 }, { "epoch": 1.35, "learning_rate": 3.158304299436909e-05, "loss": 1.7276, "step": 14700 }, { "epoch": 1.35, "learning_rate": 3.155826983998292e-05, "loss": 1.7575, "step": 14710 }, { "epoch": 1.35, "learning_rate": 3.153348976794921e-05, "loss": 1.7538, "step": 14720 }, { "epoch": 1.36, "learning_rate": 3.1508702804405924e-05, "loss": 1.7333, "step": 14730 }, { "epoch": 1.36, "learning_rate": 3.148390897549831e-05, "loss": 1.7258, "step": 14740 }, { "epoch": 1.36, "learning_rate": 3.145910830737882e-05, "loss": 1.7174, "step": 14750 }, { "epoch": 1.36, "learning_rate": 3.143430082620715e-05, "loss": 1.7286, "step": 14760 }, { "epoch": 1.36, "learning_rate": 3.140948655815017e-05, "loss": 1.7603, "step": 14770 }, { "epoch": 1.36, "learning_rate": 3.138466552938192e-05, "loss": 1.7802, "step": 14780 }, { "epoch": 1.36, "learning_rate": 3.135983776608355e-05, "loss": 1.7178, "step": 14790 }, { "epoch": 1.36, "learning_rate": 3.1335003294443335e-05, "loss": 1.7093, "step": 14800 }, { "epoch": 1.36, "learning_rate": 3.131016214065661e-05, "loss": 1.7499, "step": 14810 }, { "epoch": 1.36, "learning_rate": 3.128531433092577e-05, "loss": 1.7247, "step": 14820 }, { "epoch": 1.37, "learning_rate": 3.1260459891460236e-05, "loss": 1.6804, "step": 14830 }, { "epoch": 1.37, "learning_rate": 3.12355988484764e-05, "loss": 1.7403, "step": 14840 }, { "epoch": 1.37, "learning_rate": 3.121073122819762e-05, "loss": 1.7542, "step": 14850 }, { "epoch": 1.37, "learning_rate": 3.1185857056854225e-05, "loss": 1.744, "step": 14860 }, { "epoch": 1.37, "learning_rate": 3.116097636068342e-05, "loss": 1.7747, "step": 14870 }, { "epoch": 1.37, "learning_rate": 3.113608916592931e-05, "loss": 1.6977, "step": 14880 }, { "epoch": 1.37, "learning_rate": 3.111119549884284e-05, "loss": 1.7056, "step": 14890 }, { "epoch": 1.37, "learning_rate": 3.1086295385681796e-05, "loss": 1.6952, "step": 14900 }, { "epoch": 1.37, "learning_rate": 3.1061388852710766e-05, "loss": 1.753, "step": 14910 }, { "epoch": 1.37, "learning_rate": 3.10364759262011e-05, "loss": 1.6997, "step": 14920 }, { "epoch": 1.37, "learning_rate": 3.101155663243088e-05, "loss": 1.7508, "step": 14930 }, { "epoch": 1.38, "learning_rate": 3.098663099768494e-05, "loss": 1.7467, "step": 14940 }, { "epoch": 1.38, "learning_rate": 3.0961699048254765e-05, "loss": 1.6817, "step": 14950 }, { "epoch": 1.38, "learning_rate": 3.093676081043852e-05, "loss": 1.7479, "step": 14960 }, { "epoch": 1.38, "learning_rate": 3.0911816310540996e-05, "loss": 1.7656, "step": 14970 }, { "epoch": 1.38, "learning_rate": 3.0886865574873606e-05, "loss": 1.7962, "step": 14980 }, { "epoch": 1.38, "learning_rate": 3.086190862975431e-05, "loss": 1.767, "step": 14990 }, { "epoch": 1.38, "learning_rate": 3.083694550150765e-05, "loss": 1.7175, "step": 15000 }, { "epoch": 1.38, "learning_rate": 3.081197621646466e-05, "loss": 1.7412, "step": 15010 }, { "epoch": 1.38, "learning_rate": 3.078700080096289e-05, "loss": 1.71, "step": 15020 }, { "epoch": 1.38, "learning_rate": 3.076201928134635e-05, "loss": 1.6667, "step": 15030 }, { "epoch": 1.38, "learning_rate": 3.073703168396549e-05, "loss": 1.7637, "step": 15040 }, { "epoch": 1.39, "learning_rate": 3.071203803517716e-05, "loss": 1.7815, "step": 15050 }, { "epoch": 1.39, "learning_rate": 3.068703836134461e-05, "loss": 1.7336, "step": 15060 }, { "epoch": 1.39, "learning_rate": 3.066203268883744e-05, "loss": 1.6911, "step": 15070 }, { "epoch": 1.39, "learning_rate": 3.063702104403156e-05, "loss": 1.7461, "step": 15080 }, { "epoch": 1.39, "learning_rate": 3.061200345330921e-05, "loss": 1.735, "step": 15090 }, { "epoch": 1.39, "learning_rate": 3.058697994305887e-05, "loss": 1.7287, "step": 15100 }, { "epoch": 1.39, "learning_rate": 3.056195053967529e-05, "loss": 1.7733, "step": 15110 }, { "epoch": 1.39, "learning_rate": 3.053691526955943e-05, "loss": 1.7069, "step": 15120 }, { "epoch": 1.39, "learning_rate": 3.0511874159118426e-05, "loss": 1.7211, "step": 15130 }, { "epoch": 1.39, "learning_rate": 3.048682723476557e-05, "loss": 1.731, "step": 15140 }, { "epoch": 1.39, "learning_rate": 3.0461774522920322e-05, "loss": 1.6898, "step": 15150 }, { "epoch": 1.4, "learning_rate": 3.0436716050008212e-05, "loss": 1.7456, "step": 15160 }, { "epoch": 1.4, "learning_rate": 3.0411651842460855e-05, "loss": 1.7423, "step": 15170 }, { "epoch": 1.4, "learning_rate": 3.0386581926715925e-05, "loss": 1.7716, "step": 15180 }, { "epoch": 1.4, "learning_rate": 3.0361506329217106e-05, "loss": 1.7277, "step": 15190 }, { "epoch": 1.4, "learning_rate": 3.0336425076414073e-05, "loss": 1.7145, "step": 15200 }, { "epoch": 1.4, "learning_rate": 3.0311338194762483e-05, "loss": 1.7933, "step": 15210 }, { "epoch": 1.4, "learning_rate": 3.028624571072392e-05, "loss": 1.6865, "step": 15220 }, { "epoch": 1.4, "learning_rate": 3.0261147650765865e-05, "loss": 1.7139, "step": 15230 }, { "epoch": 1.4, "learning_rate": 3.0236044041361707e-05, "loss": 1.7164, "step": 15240 }, { "epoch": 1.4, "learning_rate": 3.021093490899066e-05, "loss": 1.7338, "step": 15250 }, { "epoch": 1.4, "learning_rate": 3.018582028013779e-05, "loss": 1.687, "step": 15260 }, { "epoch": 1.41, "learning_rate": 3.016070018129394e-05, "loss": 1.711, "step": 15270 }, { "epoch": 1.41, "learning_rate": 3.013557463895573e-05, "loss": 1.6712, "step": 15280 }, { "epoch": 1.41, "learning_rate": 3.011044367962552e-05, "loss": 1.7027, "step": 15290 }, { "epoch": 1.41, "learning_rate": 3.00853073298114e-05, "loss": 1.7073, "step": 15300 }, { "epoch": 1.41, "learning_rate": 3.0060165616027125e-05, "loss": 1.704, "step": 15310 }, { "epoch": 1.41, "learning_rate": 3.0035018564792112e-05, "loss": 1.7232, "step": 15320 }, { "epoch": 1.41, "learning_rate": 3.0009866202631415e-05, "loss": 1.7459, "step": 15330 }, { "epoch": 1.41, "learning_rate": 2.9984708556075686e-05, "loss": 1.71, "step": 15340 }, { "epoch": 1.41, "learning_rate": 2.995954565166115e-05, "loss": 1.731, "step": 15350 }, { "epoch": 1.41, "learning_rate": 2.993437751592958e-05, "loss": 1.7229, "step": 15360 }, { "epoch": 1.41, "learning_rate": 2.990920417542826e-05, "loss": 1.7186, "step": 15370 }, { "epoch": 1.42, "learning_rate": 2.9884025656709986e-05, "loss": 1.7262, "step": 15380 }, { "epoch": 1.42, "learning_rate": 2.9858841986332986e-05, "loss": 1.6911, "step": 15390 }, { "epoch": 1.42, "learning_rate": 2.9833653190860938e-05, "loss": 1.7087, "step": 15400 }, { "epoch": 1.42, "learning_rate": 2.9808459296862922e-05, "loss": 1.7176, "step": 15410 }, { "epoch": 1.42, "learning_rate": 2.9783260330913405e-05, "loss": 1.7566, "step": 15420 }, { "epoch": 1.42, "learning_rate": 2.9758056319592194e-05, "loss": 1.7135, "step": 15430 }, { "epoch": 1.42, "learning_rate": 2.9732847289484412e-05, "loss": 1.7569, "step": 15440 }, { "epoch": 1.42, "learning_rate": 2.97076332671805e-05, "loss": 1.7475, "step": 15450 }, { "epoch": 1.42, "learning_rate": 2.9682414279276145e-05, "loss": 1.7553, "step": 15460 }, { "epoch": 1.42, "learning_rate": 2.965719035237226e-05, "loss": 1.6646, "step": 15470 }, { "epoch": 1.42, "learning_rate": 2.963196151307501e-05, "loss": 1.7543, "step": 15480 }, { "epoch": 1.43, "learning_rate": 2.9606727787995692e-05, "loss": 1.7515, "step": 15490 }, { "epoch": 1.43, "learning_rate": 2.9581489203750785e-05, "loss": 1.6891, "step": 15500 }, { "epoch": 1.43, "learning_rate": 2.95562457869619e-05, "loss": 1.691, "step": 15510 }, { "epoch": 1.43, "learning_rate": 2.9530997564255725e-05, "loss": 1.7236, "step": 15520 }, { "epoch": 1.43, "learning_rate": 2.950574456226402e-05, "loss": 1.7513, "step": 15530 }, { "epoch": 1.43, "learning_rate": 2.9480486807623596e-05, "loss": 1.7559, "step": 15540 }, { "epoch": 1.43, "learning_rate": 2.9455224326976284e-05, "loss": 1.7337, "step": 15550 }, { "epoch": 1.43, "learning_rate": 2.9429957146968866e-05, "loss": 1.7253, "step": 15560 }, { "epoch": 1.43, "learning_rate": 2.9404685294253126e-05, "loss": 1.723, "step": 15570 }, { "epoch": 1.43, "learning_rate": 2.937940879548574e-05, "loss": 1.7266, "step": 15580 }, { "epoch": 1.44, "learning_rate": 2.935412767732831e-05, "loss": 1.7345, "step": 15590 }, { "epoch": 1.44, "learning_rate": 2.932884196644729e-05, "loss": 1.7453, "step": 15600 }, { "epoch": 1.44, "learning_rate": 2.9303551689513992e-05, "loss": 1.7445, "step": 15610 }, { "epoch": 1.44, "learning_rate": 2.927825687320454e-05, "loss": 1.719, "step": 15620 }, { "epoch": 1.44, "learning_rate": 2.925295754419985e-05, "loss": 1.6857, "step": 15630 }, { "epoch": 1.44, "learning_rate": 2.9227653729185595e-05, "loss": 1.7212, "step": 15640 }, { "epoch": 1.44, "learning_rate": 2.920234545485217e-05, "loss": 1.7511, "step": 15650 }, { "epoch": 1.44, "learning_rate": 2.9177032747894696e-05, "loss": 1.6968, "step": 15660 }, { "epoch": 1.44, "learning_rate": 2.9151715635012945e-05, "loss": 1.7, "step": 15670 }, { "epoch": 1.44, "learning_rate": 2.9126394142911344e-05, "loss": 1.7448, "step": 15680 }, { "epoch": 1.44, "learning_rate": 2.910106829829896e-05, "loss": 1.7463, "step": 15690 }, { "epoch": 1.45, "learning_rate": 2.907573812788943e-05, "loss": 1.6469, "step": 15700 }, { "epoch": 1.45, "learning_rate": 2.9050403658400936e-05, "loss": 1.719, "step": 15710 }, { "epoch": 1.45, "learning_rate": 2.902506491655625e-05, "loss": 1.7035, "step": 15720 }, { "epoch": 1.45, "learning_rate": 2.8999721929082596e-05, "loss": 1.7145, "step": 15730 }, { "epoch": 1.45, "learning_rate": 2.89743747227117e-05, "loss": 1.7141, "step": 15740 }, { "epoch": 1.45, "learning_rate": 2.8949023324179736e-05, "loss": 1.7618, "step": 15750 }, { "epoch": 1.45, "learning_rate": 2.892366776022731e-05, "loss": 1.7154, "step": 15760 }, { "epoch": 1.45, "learning_rate": 2.8898308057599388e-05, "loss": 1.7234, "step": 15770 }, { "epoch": 1.45, "learning_rate": 2.8872944243045347e-05, "loss": 1.7442, "step": 15780 }, { "epoch": 1.45, "learning_rate": 2.884757634331887e-05, "loss": 1.6925, "step": 15790 }, { "epoch": 1.45, "learning_rate": 2.8822204385177957e-05, "loss": 1.7423, "step": 15800 }, { "epoch": 1.46, "learning_rate": 2.8796828395384888e-05, "loss": 1.7066, "step": 15810 }, { "epoch": 1.46, "learning_rate": 2.8771448400706203e-05, "loss": 1.7806, "step": 15820 }, { "epoch": 1.46, "learning_rate": 2.874606442791265e-05, "loss": 1.711, "step": 15830 }, { "epoch": 1.46, "learning_rate": 2.8720676503779193e-05, "loss": 1.6829, "step": 15840 }, { "epoch": 1.46, "learning_rate": 2.8695284655084953e-05, "loss": 1.7651, "step": 15850 }, { "epoch": 1.46, "learning_rate": 2.8669888908613186e-05, "loss": 1.7062, "step": 15860 }, { "epoch": 1.46, "learning_rate": 2.8644489291151272e-05, "loss": 1.7309, "step": 15870 }, { "epoch": 1.46, "learning_rate": 2.8619085829490665e-05, "loss": 1.6659, "step": 15880 }, { "epoch": 1.46, "learning_rate": 2.859367855042687e-05, "loss": 1.7235, "step": 15890 }, { "epoch": 1.46, "learning_rate": 2.8568267480759443e-05, "loss": 1.7288, "step": 15900 }, { "epoch": 1.46, "learning_rate": 2.8542852647291905e-05, "loss": 1.7301, "step": 15910 }, { "epoch": 1.47, "learning_rate": 2.851743407683177e-05, "loss": 1.7226, "step": 15920 }, { "epoch": 1.47, "learning_rate": 2.8492011796190477e-05, "loss": 1.7121, "step": 15930 }, { "epoch": 1.47, "learning_rate": 2.8466585832183394e-05, "loss": 1.7098, "step": 15940 }, { "epoch": 1.47, "learning_rate": 2.844115621162976e-05, "loss": 1.7612, "step": 15950 }, { "epoch": 1.47, "learning_rate": 2.841572296135269e-05, "loss": 1.7225, "step": 15960 }, { "epoch": 1.47, "learning_rate": 2.8390286108179114e-05, "loss": 1.6771, "step": 15970 }, { "epoch": 1.47, "learning_rate": 2.8364845678939757e-05, "loss": 1.7074, "step": 15980 }, { "epoch": 1.47, "learning_rate": 2.833940170046913e-05, "loss": 1.6938, "step": 15990 }, { "epoch": 1.47, "learning_rate": 2.8313954199605487e-05, "loss": 1.6877, "step": 16000 }, { "epoch": 1.47, "learning_rate": 2.828850320319077e-05, "loss": 1.7357, "step": 16010 }, { "epoch": 1.47, "learning_rate": 2.826304873807065e-05, "loss": 1.7112, "step": 16020 }, { "epoch": 1.48, "learning_rate": 2.823759083109444e-05, "loss": 1.6702, "step": 16030 }, { "epoch": 1.48, "learning_rate": 2.8212129509115058e-05, "loss": 1.7644, "step": 16040 }, { "epoch": 1.48, "learning_rate": 2.818666479898907e-05, "loss": 1.7191, "step": 16050 }, { "epoch": 1.48, "learning_rate": 2.816119672757658e-05, "loss": 1.7206, "step": 16060 }, { "epoch": 1.48, "learning_rate": 2.8135725321741258e-05, "loss": 1.7208, "step": 16070 }, { "epoch": 1.48, "learning_rate": 2.8110250608350268e-05, "loss": 1.737, "step": 16080 }, { "epoch": 1.48, "learning_rate": 2.8084772614274286e-05, "loss": 1.6965, "step": 16090 }, { "epoch": 1.48, "learning_rate": 2.8059291366387435e-05, "loss": 1.6993, "step": 16100 }, { "epoch": 1.48, "learning_rate": 2.803380689156729e-05, "loss": 1.6881, "step": 16110 }, { "epoch": 1.48, "learning_rate": 2.8008319216694795e-05, "loss": 1.6999, "step": 16120 }, { "epoch": 1.48, "learning_rate": 2.7982828368654294e-05, "loss": 1.8017, "step": 16130 }, { "epoch": 1.49, "learning_rate": 2.795733437433349e-05, "loss": 1.6713, "step": 16140 }, { "epoch": 1.49, "learning_rate": 2.793183726062337e-05, "loss": 1.7431, "step": 16150 }, { "epoch": 1.49, "learning_rate": 2.7906337054418237e-05, "loss": 1.7157, "step": 16160 }, { "epoch": 1.49, "learning_rate": 2.7880833782615644e-05, "loss": 1.776, "step": 16170 }, { "epoch": 1.49, "learning_rate": 2.785532747211639e-05, "loss": 1.757, "step": 16180 }, { "epoch": 1.49, "learning_rate": 2.782981814982446e-05, "loss": 1.6998, "step": 16190 }, { "epoch": 1.49, "learning_rate": 2.7804305842647032e-05, "loss": 1.7445, "step": 16200 }, { "epoch": 1.49, "learning_rate": 2.7778790577494434e-05, "loss": 1.7155, "step": 16210 }, { "epoch": 1.49, "learning_rate": 2.7753272381280088e-05, "loss": 1.7086, "step": 16220 }, { "epoch": 1.49, "learning_rate": 2.772775128092055e-05, "loss": 1.6775, "step": 16230 }, { "epoch": 1.49, "learning_rate": 2.7702227303335405e-05, "loss": 1.7062, "step": 16240 }, { "epoch": 1.5, "learning_rate": 2.767670047544728e-05, "loss": 1.7533, "step": 16250 }, { "epoch": 1.5, "learning_rate": 2.7651170824181823e-05, "loss": 1.7234, "step": 16260 }, { "epoch": 1.5, "learning_rate": 2.7625638376467645e-05, "loss": 1.6802, "step": 16270 }, { "epoch": 1.5, "learning_rate": 2.7600103159236307e-05, "loss": 1.7223, "step": 16280 }, { "epoch": 1.5, "learning_rate": 2.757456519942231e-05, "loss": 1.7798, "step": 16290 }, { "epoch": 1.5, "learning_rate": 2.7549024523963013e-05, "loss": 1.726, "step": 16300 }, { "epoch": 1.5, "learning_rate": 2.7523481159798674e-05, "loss": 1.6668, "step": 16310 }, { "epoch": 1.5, "learning_rate": 2.7497935133872377e-05, "loss": 1.7482, "step": 16320 }, { "epoch": 1.5, "learning_rate": 2.7472386473129995e-05, "loss": 1.7278, "step": 16330 }, { "epoch": 1.5, "learning_rate": 2.744683520452021e-05, "loss": 1.704, "step": 16340 }, { "epoch": 1.51, "learning_rate": 2.742128135499443e-05, "loss": 1.6388, "step": 16350 }, { "epoch": 1.51, "learning_rate": 2.7395724951506802e-05, "loss": 1.7833, "step": 16360 }, { "epoch": 1.51, "learning_rate": 2.737016602101415e-05, "loss": 1.6731, "step": 16370 }, { "epoch": 1.51, "learning_rate": 2.734460459047598e-05, "loss": 1.7292, "step": 16380 }, { "epoch": 1.51, "learning_rate": 2.731904068685443e-05, "loss": 1.67, "step": 16390 }, { "epoch": 1.51, "learning_rate": 2.729347433711424e-05, "loss": 1.7081, "step": 16400 }, { "epoch": 1.51, "learning_rate": 2.726790556822274e-05, "loss": 1.6943, "step": 16410 }, { "epoch": 1.51, "learning_rate": 2.7242334407149806e-05, "loss": 1.695, "step": 16420 }, { "epoch": 1.51, "learning_rate": 2.721676088086783e-05, "loss": 1.7438, "step": 16430 }, { "epoch": 1.51, "learning_rate": 2.7191185016351728e-05, "loss": 1.6678, "step": 16440 }, { "epoch": 1.51, "learning_rate": 2.7165606840578844e-05, "loss": 1.7141, "step": 16450 }, { "epoch": 1.52, "learning_rate": 2.714002638052898e-05, "loss": 1.7647, "step": 16460 }, { "epoch": 1.52, "learning_rate": 2.7114443663184348e-05, "loss": 1.711, "step": 16470 }, { "epoch": 1.52, "learning_rate": 2.7088858715529536e-05, "loss": 1.6975, "step": 16480 }, { "epoch": 1.52, "learning_rate": 2.7063271564551484e-05, "loss": 1.7319, "step": 16490 }, { "epoch": 1.52, "learning_rate": 2.7037682237239466e-05, "loss": 1.6367, "step": 16500 }, { "epoch": 1.52, "learning_rate": 2.7012090760585033e-05, "loss": 1.7635, "step": 16510 }, { "epoch": 1.52, "learning_rate": 2.698649716158202e-05, "loss": 1.6587, "step": 16520 }, { "epoch": 1.52, "learning_rate": 2.6960901467226495e-05, "loss": 1.7169, "step": 16530 }, { "epoch": 1.52, "learning_rate": 2.6935303704516744e-05, "loss": 1.7173, "step": 16540 }, { "epoch": 1.52, "learning_rate": 2.690970390045321e-05, "loss": 1.7269, "step": 16550 }, { "epoch": 1.52, "learning_rate": 2.6884102082038513e-05, "loss": 1.7076, "step": 16560 }, { "epoch": 1.53, "learning_rate": 2.68584982762774e-05, "loss": 1.7035, "step": 16570 }, { "epoch": 1.53, "learning_rate": 2.683289251017669e-05, "loss": 1.7118, "step": 16580 }, { "epoch": 1.53, "learning_rate": 2.6807284810745293e-05, "loss": 1.7123, "step": 16590 }, { "epoch": 1.53, "learning_rate": 2.678167520499415e-05, "loss": 1.6857, "step": 16600 }, { "epoch": 1.53, "learning_rate": 2.6756063719936207e-05, "loss": 1.741, "step": 16610 }, { "epoch": 1.53, "learning_rate": 2.673045038258641e-05, "loss": 1.6781, "step": 16620 }, { "epoch": 1.53, "learning_rate": 2.6704835219961627e-05, "loss": 1.7362, "step": 16630 }, { "epoch": 1.53, "learning_rate": 2.6679218259080686e-05, "loss": 1.7229, "step": 16640 }, { "epoch": 1.53, "learning_rate": 2.6653599526964295e-05, "loss": 1.701, "step": 16650 }, { "epoch": 1.53, "learning_rate": 2.6627979050635032e-05, "loss": 1.6456, "step": 16660 }, { "epoch": 1.53, "learning_rate": 2.6602356857117306e-05, "loss": 1.7771, "step": 16670 }, { "epoch": 1.54, "learning_rate": 2.6576732973437358e-05, "loss": 1.6761, "step": 16680 }, { "epoch": 1.54, "learning_rate": 2.6551107426623194e-05, "loss": 1.7019, "step": 16690 }, { "epoch": 1.54, "learning_rate": 2.652548024370458e-05, "loss": 1.7189, "step": 16700 }, { "epoch": 1.54, "learning_rate": 2.6499851451713014e-05, "loss": 1.7097, "step": 16710 }, { "epoch": 1.54, "learning_rate": 2.6474221077681682e-05, "loss": 1.7236, "step": 16720 }, { "epoch": 1.54, "learning_rate": 2.6448589148645435e-05, "loss": 1.7174, "step": 16730 }, { "epoch": 1.54, "learning_rate": 2.6422955691640783e-05, "loss": 1.7043, "step": 16740 }, { "epoch": 1.54, "learning_rate": 2.6397320733705828e-05, "loss": 1.7441, "step": 16750 }, { "epoch": 1.54, "learning_rate": 2.6371684301880255e-05, "loss": 1.7219, "step": 16760 }, { "epoch": 1.54, "learning_rate": 2.6346046423205335e-05, "loss": 1.7381, "step": 16770 }, { "epoch": 1.54, "learning_rate": 2.632040712472382e-05, "loss": 1.705, "step": 16780 }, { "epoch": 1.55, "learning_rate": 2.6294766433479988e-05, "loss": 1.6977, "step": 16790 }, { "epoch": 1.55, "learning_rate": 2.6269124376519587e-05, "loss": 1.7216, "step": 16800 }, { "epoch": 1.55, "learning_rate": 2.6243480980889797e-05, "loss": 1.7175, "step": 16810 }, { "epoch": 1.55, "learning_rate": 2.6217836273639194e-05, "loss": 1.778, "step": 16820 }, { "epoch": 1.55, "learning_rate": 2.6192190281817774e-05, "loss": 1.7148, "step": 16830 }, { "epoch": 1.55, "learning_rate": 2.6166543032476865e-05, "loss": 1.7349, "step": 16840 }, { "epoch": 1.55, "learning_rate": 2.6140894552669116e-05, "loss": 1.7414, "step": 16850 }, { "epoch": 1.55, "learning_rate": 2.6115244869448492e-05, "loss": 1.6768, "step": 16860 }, { "epoch": 1.55, "learning_rate": 2.6089594009870216e-05, "loss": 1.738, "step": 16870 }, { "epoch": 1.55, "learning_rate": 2.606394200099075e-05, "loss": 1.7104, "step": 16880 }, { "epoch": 1.55, "learning_rate": 2.6038288869867778e-05, "loss": 1.7241, "step": 16890 }, { "epoch": 1.56, "learning_rate": 2.601263464356015e-05, "loss": 1.6887, "step": 16900 }, { "epoch": 1.56, "learning_rate": 2.5986979349127905e-05, "loss": 1.7028, "step": 16910 }, { "epoch": 1.56, "learning_rate": 2.5961323013632173e-05, "loss": 1.7373, "step": 16920 }, { "epoch": 1.56, "learning_rate": 2.593566566413519e-05, "loss": 1.7297, "step": 16930 }, { "epoch": 1.56, "learning_rate": 2.591000732770028e-05, "loss": 1.773, "step": 16940 }, { "epoch": 1.56, "learning_rate": 2.5884348031391793e-05, "loss": 1.6844, "step": 16950 }, { "epoch": 1.56, "learning_rate": 2.5858687802275087e-05, "loss": 1.7264, "step": 16960 }, { "epoch": 1.56, "learning_rate": 2.583302666741652e-05, "loss": 1.7276, "step": 16970 }, { "epoch": 1.56, "learning_rate": 2.5807364653883392e-05, "loss": 1.7384, "step": 16980 }, { "epoch": 1.56, "learning_rate": 2.5781701788743938e-05, "loss": 1.718, "step": 16990 }, { "epoch": 1.56, "learning_rate": 2.5756038099067282e-05, "loss": 1.7619, "step": 17000 }, { "epoch": 1.57, "learning_rate": 2.5730373611923425e-05, "loss": 1.7057, "step": 17010 }, { "epoch": 1.57, "learning_rate": 2.5704708354383206e-05, "loss": 1.7278, "step": 17020 }, { "epoch": 1.57, "learning_rate": 2.5679042353518278e-05, "loss": 1.7112, "step": 17030 }, { "epoch": 1.57, "learning_rate": 2.565337563640109e-05, "loss": 1.7244, "step": 17040 }, { "epoch": 1.57, "learning_rate": 2.5627708230104812e-05, "loss": 1.7042, "step": 17050 }, { "epoch": 1.57, "learning_rate": 2.5602040161703383e-05, "loss": 1.7414, "step": 17060 }, { "epoch": 1.57, "learning_rate": 2.5576371458271413e-05, "loss": 1.7025, "step": 17070 }, { "epoch": 1.57, "learning_rate": 2.5550702146884188e-05, "loss": 1.7495, "step": 17080 }, { "epoch": 1.57, "learning_rate": 2.552503225461764e-05, "loss": 1.6769, "step": 17090 }, { "epoch": 1.57, "learning_rate": 2.5499361808548315e-05, "loss": 1.6858, "step": 17100 }, { "epoch": 1.57, "learning_rate": 2.5473690835753327e-05, "loss": 1.6722, "step": 17110 }, { "epoch": 1.58, "learning_rate": 2.544801936331037e-05, "loss": 1.7015, "step": 17120 }, { "epoch": 1.58, "learning_rate": 2.5422347418297654e-05, "loss": 1.6725, "step": 17130 }, { "epoch": 1.58, "learning_rate": 2.539667502779387e-05, "loss": 1.6965, "step": 17140 }, { "epoch": 1.58, "learning_rate": 2.537100221887821e-05, "loss": 1.717, "step": 17150 }, { "epoch": 1.58, "learning_rate": 2.5345329018630282e-05, "loss": 1.7482, "step": 17160 }, { "epoch": 1.58, "learning_rate": 2.531965545413012e-05, "loss": 1.7158, "step": 17170 }, { "epoch": 1.58, "learning_rate": 2.529398155245814e-05, "loss": 1.7096, "step": 17180 }, { "epoch": 1.58, "learning_rate": 2.526830734069512e-05, "loss": 1.7355, "step": 17190 }, { "epoch": 1.58, "learning_rate": 2.5242632845922137e-05, "loss": 1.6415, "step": 17200 }, { "epoch": 1.58, "learning_rate": 2.521695809522061e-05, "loss": 1.7011, "step": 17210 }, { "epoch": 1.59, "learning_rate": 2.5191283115672202e-05, "loss": 1.6752, "step": 17220 }, { "epoch": 1.59, "learning_rate": 2.5165607934358803e-05, "loss": 1.7408, "step": 17230 }, { "epoch": 1.59, "learning_rate": 2.5139932578362557e-05, "loss": 1.7095, "step": 17240 }, { "epoch": 1.59, "learning_rate": 2.5114257074765755e-05, "loss": 1.7137, "step": 17250 }, { "epoch": 1.59, "learning_rate": 2.5088581450650866e-05, "loss": 1.6944, "step": 17260 }, { "epoch": 1.59, "learning_rate": 2.5062905733100467e-05, "loss": 1.6663, "step": 17270 }, { "epoch": 1.59, "learning_rate": 2.503722994919725e-05, "loss": 1.7517, "step": 17280 }, { "epoch": 1.59, "learning_rate": 2.501155412602396e-05, "loss": 1.6892, "step": 17290 }, { "epoch": 1.59, "learning_rate": 2.498587829066342e-05, "loss": 1.6721, "step": 17300 }, { "epoch": 1.59, "learning_rate": 2.4960202470198422e-05, "loss": 1.706, "step": 17310 }, { "epoch": 1.59, "learning_rate": 2.493452669171176e-05, "loss": 1.7191, "step": 17320 }, { "epoch": 1.6, "learning_rate": 2.4908850982286197e-05, "loss": 1.673, "step": 17330 }, { "epoch": 1.6, "learning_rate": 2.4883175369004385e-05, "loss": 1.6811, "step": 17340 }, { "epoch": 1.6, "learning_rate": 2.485749987894893e-05, "loss": 1.6978, "step": 17350 }, { "epoch": 1.6, "learning_rate": 2.483182453920227e-05, "loss": 1.7055, "step": 17360 }, { "epoch": 1.6, "learning_rate": 2.4806149376846686e-05, "loss": 1.7246, "step": 17370 }, { "epoch": 1.6, "learning_rate": 2.478047441896428e-05, "loss": 1.7207, "step": 17380 }, { "epoch": 1.6, "learning_rate": 2.475479969263694e-05, "loss": 1.6744, "step": 17390 }, { "epoch": 1.6, "learning_rate": 2.4729125224946298e-05, "loss": 1.6585, "step": 17400 }, { "epoch": 1.6, "learning_rate": 2.4703451042973733e-05, "loss": 1.65, "step": 17410 }, { "epoch": 1.6, "learning_rate": 2.467777717380031e-05, "loss": 1.6626, "step": 17420 }, { "epoch": 1.6, "learning_rate": 2.4652103644506762e-05, "loss": 1.7183, "step": 17430 }, { "epoch": 1.61, "learning_rate": 2.4626430482173465e-05, "loss": 1.6577, "step": 17440 }, { "epoch": 1.61, "learning_rate": 2.4600757713880414e-05, "loss": 1.6758, "step": 17450 }, { "epoch": 1.61, "learning_rate": 2.4575085366707177e-05, "loss": 1.6587, "step": 17460 }, { "epoch": 1.61, "learning_rate": 2.4549413467732896e-05, "loss": 1.6944, "step": 17470 }, { "epoch": 1.61, "learning_rate": 2.452374204403623e-05, "loss": 1.6933, "step": 17480 }, { "epoch": 1.61, "learning_rate": 2.4498071122695324e-05, "loss": 1.724, "step": 17490 }, { "epoch": 1.61, "learning_rate": 2.4472400730787816e-05, "loss": 1.7295, "step": 17500 }, { "epoch": 1.61, "learning_rate": 2.444673089539077e-05, "loss": 1.6825, "step": 17510 }, { "epoch": 1.61, "learning_rate": 2.442106164358066e-05, "loss": 1.7409, "step": 17520 }, { "epoch": 1.61, "learning_rate": 2.439539300243337e-05, "loss": 1.7246, "step": 17530 }, { "epoch": 1.61, "learning_rate": 2.436972499902411e-05, "loss": 1.6543, "step": 17540 }, { "epoch": 1.62, "learning_rate": 2.4344057660427434e-05, "loss": 1.7134, "step": 17550 }, { "epoch": 1.62, "learning_rate": 2.4318391013717183e-05, "loss": 1.6648, "step": 17560 }, { "epoch": 1.62, "learning_rate": 2.4292725085966486e-05, "loss": 1.6145, "step": 17570 }, { "epoch": 1.62, "learning_rate": 2.4267059904247683e-05, "loss": 1.6583, "step": 17580 }, { "epoch": 1.62, "learning_rate": 2.4241395495632372e-05, "loss": 1.7642, "step": 17590 }, { "epoch": 1.62, "learning_rate": 2.4215731887191302e-05, "loss": 1.6668, "step": 17600 }, { "epoch": 1.62, "learning_rate": 2.4190069105994382e-05, "loss": 1.6792, "step": 17610 }, { "epoch": 1.62, "learning_rate": 2.4164407179110654e-05, "loss": 1.6748, "step": 17620 }, { "epoch": 1.62, "learning_rate": 2.4138746133608262e-05, "loss": 1.7133, "step": 17630 }, { "epoch": 1.62, "learning_rate": 2.4113085996554402e-05, "loss": 1.6814, "step": 17640 }, { "epoch": 1.62, "learning_rate": 2.4087426795015346e-05, "loss": 1.6936, "step": 17650 }, { "epoch": 1.63, "learning_rate": 2.4061768556056354e-05, "loss": 1.729, "step": 17660 }, { "epoch": 1.63, "learning_rate": 2.4036111306741667e-05, "loss": 1.7259, "step": 17670 }, { "epoch": 1.63, "learning_rate": 2.40104550741345e-05, "loss": 1.6811, "step": 17680 }, { "epoch": 1.63, "learning_rate": 2.398479988529698e-05, "loss": 1.7067, "step": 17690 }, { "epoch": 1.63, "learning_rate": 2.395914576729014e-05, "loss": 1.6952, "step": 17700 }, { "epoch": 1.63, "learning_rate": 2.3933492747173887e-05, "loss": 1.6959, "step": 17710 }, { "epoch": 1.63, "learning_rate": 2.390784085200697e-05, "loss": 1.7126, "step": 17720 }, { "epoch": 1.63, "learning_rate": 2.3882190108846935e-05, "loss": 1.7263, "step": 17730 }, { "epoch": 1.63, "learning_rate": 2.3856540544750135e-05, "loss": 1.6958, "step": 17740 }, { "epoch": 1.63, "learning_rate": 2.3830892186771667e-05, "loss": 1.6466, "step": 17750 }, { "epoch": 1.63, "learning_rate": 2.3805245061965346e-05, "loss": 1.6786, "step": 17760 }, { "epoch": 1.64, "learning_rate": 2.3779599197383724e-05, "loss": 1.7202, "step": 17770 }, { "epoch": 1.64, "learning_rate": 2.375395462007798e-05, "loss": 1.7251, "step": 17780 }, { "epoch": 1.64, "learning_rate": 2.3728311357097962e-05, "loss": 1.7256, "step": 17790 }, { "epoch": 1.64, "learning_rate": 2.370266943549212e-05, "loss": 1.6486, "step": 17800 }, { "epoch": 1.64, "learning_rate": 2.367702888230749e-05, "loss": 1.7101, "step": 17810 }, { "epoch": 1.64, "learning_rate": 2.3651389724589664e-05, "loss": 1.7254, "step": 17820 }, { "epoch": 1.64, "learning_rate": 2.3625751989382776e-05, "loss": 1.6548, "step": 17830 }, { "epoch": 1.64, "learning_rate": 2.3600115703729442e-05, "loss": 1.6551, "step": 17840 }, { "epoch": 1.64, "learning_rate": 2.3574480894670756e-05, "loss": 1.7369, "step": 17850 }, { "epoch": 1.64, "learning_rate": 2.3548847589246255e-05, "loss": 1.7058, "step": 17860 }, { "epoch": 1.64, "learning_rate": 2.352321581449389e-05, "loss": 1.7669, "step": 17870 }, { "epoch": 1.65, "learning_rate": 2.349758559744998e-05, "loss": 1.7086, "step": 17880 }, { "epoch": 1.65, "learning_rate": 2.347195696514924e-05, "loss": 1.7345, "step": 17890 }, { "epoch": 1.65, "learning_rate": 2.3446329944624687e-05, "loss": 1.7157, "step": 17900 }, { "epoch": 1.65, "learning_rate": 2.3420704562907636e-05, "loss": 1.7007, "step": 17910 }, { "epoch": 1.65, "learning_rate": 2.339508084702768e-05, "loss": 1.692, "step": 17920 }, { "epoch": 1.65, "learning_rate": 2.3369458824012654e-05, "loss": 1.6696, "step": 17930 }, { "epoch": 1.65, "learning_rate": 2.3343838520888605e-05, "loss": 1.7023, "step": 17940 }, { "epoch": 1.65, "learning_rate": 2.3318219964679774e-05, "loss": 1.6239, "step": 17950 }, { "epoch": 1.65, "learning_rate": 2.329260318240855e-05, "loss": 1.6705, "step": 17960 }, { "epoch": 1.65, "learning_rate": 2.3266988201095457e-05, "loss": 1.6869, "step": 17970 }, { "epoch": 1.66, "learning_rate": 2.324137504775911e-05, "loss": 1.6903, "step": 17980 }, { "epoch": 1.66, "learning_rate": 2.3215763749416213e-05, "loss": 1.7285, "step": 17990 }, { "epoch": 1.66, "learning_rate": 2.319015433308148e-05, "loss": 1.7311, "step": 18000 }, { "epoch": 1.66, "learning_rate": 2.3164546825767698e-05, "loss": 1.6886, "step": 18010 }, { "epoch": 1.66, "learning_rate": 2.3138941254485582e-05, "loss": 1.697, "step": 18020 }, { "epoch": 1.66, "learning_rate": 2.3113337646243837e-05, "loss": 1.7463, "step": 18030 }, { "epoch": 1.66, "learning_rate": 2.3087736028049084e-05, "loss": 1.7168, "step": 18040 }, { "epoch": 1.66, "learning_rate": 2.3062136426905848e-05, "loss": 1.7221, "step": 18050 }, { "epoch": 1.66, "learning_rate": 2.3036538869816527e-05, "loss": 1.6498, "step": 18060 }, { "epoch": 1.66, "learning_rate": 2.301094338378137e-05, "loss": 1.6669, "step": 18070 }, { "epoch": 1.66, "learning_rate": 2.2985349995798434e-05, "loss": 1.6755, "step": 18080 }, { "epoch": 1.67, "learning_rate": 2.295975873286356e-05, "loss": 1.7197, "step": 18090 }, { "epoch": 1.67, "learning_rate": 2.2934169621970354e-05, "loss": 1.6722, "step": 18100 }, { "epoch": 1.67, "learning_rate": 2.2908582690110146e-05, "loss": 1.6518, "step": 18110 }, { "epoch": 1.67, "learning_rate": 2.288299796427197e-05, "loss": 1.6868, "step": 18120 }, { "epoch": 1.67, "learning_rate": 2.2857415471442547e-05, "loss": 1.7119, "step": 18130 }, { "epoch": 1.67, "learning_rate": 2.2831835238606223e-05, "loss": 1.6862, "step": 18140 }, { "epoch": 1.67, "learning_rate": 2.280625729274497e-05, "loss": 1.7168, "step": 18150 }, { "epoch": 1.67, "learning_rate": 2.2780681660838332e-05, "loss": 1.5838, "step": 18160 }, { "epoch": 1.67, "learning_rate": 2.275510836986344e-05, "loss": 1.7139, "step": 18170 }, { "epoch": 1.67, "learning_rate": 2.2729537446794926e-05, "loss": 1.6533, "step": 18180 }, { "epoch": 1.67, "learning_rate": 2.2703968918604955e-05, "loss": 1.6026, "step": 18190 }, { "epoch": 1.68, "learning_rate": 2.2678402812263144e-05, "loss": 1.7355, "step": 18200 }, { "epoch": 1.68, "learning_rate": 2.265283915473656e-05, "loss": 1.7138, "step": 18210 }, { "epoch": 1.68, "learning_rate": 2.2627277972989694e-05, "loss": 1.6687, "step": 18220 }, { "epoch": 1.68, "learning_rate": 2.2601719293984414e-05, "loss": 1.6976, "step": 18230 }, { "epoch": 1.68, "learning_rate": 2.2576163144679947e-05, "loss": 1.7127, "step": 18240 }, { "epoch": 1.68, "learning_rate": 2.2550609552032875e-05, "loss": 1.727, "step": 18250 }, { "epoch": 1.68, "learning_rate": 2.252505854299706e-05, "loss": 1.6607, "step": 18260 }, { "epoch": 1.68, "learning_rate": 2.2499510144523646e-05, "loss": 1.723, "step": 18270 }, { "epoch": 1.68, "learning_rate": 2.2473964383561025e-05, "loss": 1.6989, "step": 18280 }, { "epoch": 1.68, "learning_rate": 2.244842128705479e-05, "loss": 1.6502, "step": 18290 }, { "epoch": 1.68, "learning_rate": 2.2422880881947768e-05, "loss": 1.6881, "step": 18300 }, { "epoch": 1.69, "learning_rate": 2.2397343195179895e-05, "loss": 1.7273, "step": 18310 }, { "epoch": 1.69, "learning_rate": 2.2371808253688275e-05, "loss": 1.6922, "step": 18320 }, { "epoch": 1.69, "learning_rate": 2.23462760844071e-05, "loss": 1.6885, "step": 18330 }, { "epoch": 1.69, "learning_rate": 2.2320746714267638e-05, "loss": 1.6969, "step": 18340 }, { "epoch": 1.69, "learning_rate": 2.2295220170198207e-05, "loss": 1.6248, "step": 18350 }, { "epoch": 1.69, "learning_rate": 2.226969647912415e-05, "loss": 1.6399, "step": 18360 }, { "epoch": 1.69, "learning_rate": 2.22441756679678e-05, "loss": 1.7001, "step": 18370 }, { "epoch": 1.69, "learning_rate": 2.2218657763648442e-05, "loss": 1.617, "step": 18380 }, { "epoch": 1.69, "learning_rate": 2.2193142793082306e-05, "loss": 1.6754, "step": 18390 }, { "epoch": 1.69, "learning_rate": 2.2167630783182516e-05, "loss": 1.6961, "step": 18400 }, { "epoch": 1.69, "learning_rate": 2.2142121760859075e-05, "loss": 1.7072, "step": 18410 }, { "epoch": 1.7, "learning_rate": 2.211661575301886e-05, "loss": 1.7133, "step": 18420 }, { "epoch": 1.7, "learning_rate": 2.209111278656553e-05, "loss": 1.6345, "step": 18430 }, { "epoch": 1.7, "learning_rate": 2.206561288839956e-05, "loss": 1.6613, "step": 18440 }, { "epoch": 1.7, "learning_rate": 2.2040116085418187e-05, "loss": 1.6596, "step": 18450 }, { "epoch": 1.7, "learning_rate": 2.201462240451537e-05, "loss": 1.6695, "step": 18460 }, { "epoch": 1.7, "learning_rate": 2.1989131872581783e-05, "loss": 1.6223, "step": 18470 }, { "epoch": 1.7, "learning_rate": 2.1963644516504785e-05, "loss": 1.7186, "step": 18480 }, { "epoch": 1.7, "learning_rate": 2.193816036316838e-05, "loss": 1.6348, "step": 18490 }, { "epoch": 1.7, "learning_rate": 2.191267943945319e-05, "loss": 1.6987, "step": 18500 }, { "epoch": 1.7, "learning_rate": 2.1887201772236427e-05, "loss": 1.6693, "step": 18510 }, { "epoch": 1.7, "learning_rate": 2.1861727388391882e-05, "loss": 1.6475, "step": 18520 }, { "epoch": 1.71, "learning_rate": 2.183625631478986e-05, "loss": 1.7146, "step": 18530 }, { "epoch": 1.71, "learning_rate": 2.1810788578297215e-05, "loss": 1.6335, "step": 18540 }, { "epoch": 1.71, "learning_rate": 2.178532420577724e-05, "loss": 1.66, "step": 18550 }, { "epoch": 1.71, "learning_rate": 2.17598632240897e-05, "loss": 1.6871, "step": 18560 }, { "epoch": 1.71, "learning_rate": 2.1734405660090774e-05, "loss": 1.7064, "step": 18570 }, { "epoch": 1.71, "learning_rate": 2.1708951540633037e-05, "loss": 1.6917, "step": 18580 }, { "epoch": 1.71, "learning_rate": 2.168350089256544e-05, "loss": 1.6883, "step": 18590 }, { "epoch": 1.71, "learning_rate": 2.1658053742733264e-05, "loss": 1.7099, "step": 18600 }, { "epoch": 1.71, "learning_rate": 2.1632610117978107e-05, "loss": 1.6792, "step": 18610 }, { "epoch": 1.71, "learning_rate": 2.1607170045137844e-05, "loss": 1.7255, "step": 18620 }, { "epoch": 1.71, "learning_rate": 2.1581733551046595e-05, "loss": 1.6937, "step": 18630 }, { "epoch": 1.72, "learning_rate": 2.155630066253472e-05, "loss": 1.6393, "step": 18640 }, { "epoch": 1.72, "learning_rate": 2.153087140642876e-05, "loss": 1.6662, "step": 18650 }, { "epoch": 1.72, "learning_rate": 2.1505445809551457e-05, "loss": 1.6419, "step": 18660 }, { "epoch": 1.72, "learning_rate": 2.1480023898721653e-05, "loss": 1.7251, "step": 18670 }, { "epoch": 1.72, "learning_rate": 2.145460570075432e-05, "loss": 1.6772, "step": 18680 }, { "epoch": 1.72, "learning_rate": 2.142919124246052e-05, "loss": 1.6656, "step": 18690 }, { "epoch": 1.72, "learning_rate": 2.1403780550647366e-05, "loss": 1.7205, "step": 18700 }, { "epoch": 1.72, "learning_rate": 2.1378373652117972e-05, "loss": 1.6516, "step": 18710 }, { "epoch": 1.72, "learning_rate": 2.1352970573671503e-05, "loss": 1.7429, "step": 18720 }, { "epoch": 1.72, "learning_rate": 2.1327571342103058e-05, "loss": 1.6788, "step": 18730 }, { "epoch": 1.73, "learning_rate": 2.1302175984203684e-05, "loss": 1.6381, "step": 18740 }, { "epoch": 1.73, "learning_rate": 2.1276784526760343e-05, "loss": 1.6717, "step": 18750 }, { "epoch": 1.73, "learning_rate": 2.1251396996555886e-05, "loss": 1.6717, "step": 18760 }, { "epoch": 1.73, "learning_rate": 2.1226013420369016e-05, "loss": 1.6913, "step": 18770 }, { "epoch": 1.73, "learning_rate": 2.1200633824974277e-05, "loss": 1.6189, "step": 18780 }, { "epoch": 1.73, "learning_rate": 2.1175258237141997e-05, "loss": 1.6713, "step": 18790 }, { "epoch": 1.73, "learning_rate": 2.1149886683638297e-05, "loss": 1.7423, "step": 18800 }, { "epoch": 1.73, "learning_rate": 2.1124519191225023e-05, "loss": 1.6598, "step": 18810 }, { "epoch": 1.73, "learning_rate": 2.1099155786659752e-05, "loss": 1.6731, "step": 18820 }, { "epoch": 1.73, "learning_rate": 2.1073796496695726e-05, "loss": 1.6498, "step": 18830 }, { "epoch": 1.73, "learning_rate": 2.104844134808189e-05, "loss": 1.7319, "step": 18840 }, { "epoch": 1.74, "learning_rate": 2.102309036756278e-05, "loss": 1.6132, "step": 18850 }, { "epoch": 1.74, "learning_rate": 2.0997743581878558e-05, "loss": 1.6507, "step": 18860 }, { "epoch": 1.74, "learning_rate": 2.0972401017764946e-05, "loss": 1.6662, "step": 18870 }, { "epoch": 1.74, "learning_rate": 2.0947062701953226e-05, "loss": 1.6616, "step": 18880 }, { "epoch": 1.74, "learning_rate": 2.0921728661170188e-05, "loss": 1.7256, "step": 18890 }, { "epoch": 1.74, "learning_rate": 2.0896398922138122e-05, "loss": 1.6421, "step": 18900 }, { "epoch": 1.74, "learning_rate": 2.0871073511574778e-05, "loss": 1.7173, "step": 18910 }, { "epoch": 1.74, "learning_rate": 2.084575245619334e-05, "loss": 1.6635, "step": 18920 }, { "epoch": 1.74, "learning_rate": 2.0820435782702395e-05, "loss": 1.6579, "step": 18930 }, { "epoch": 1.74, "learning_rate": 2.0795123517805908e-05, "loss": 1.6619, "step": 18940 }, { "epoch": 1.74, "learning_rate": 2.076981568820319e-05, "loss": 1.6784, "step": 18950 }, { "epoch": 1.75, "learning_rate": 2.07445123205889e-05, "loss": 1.6808, "step": 18960 }, { "epoch": 1.75, "learning_rate": 2.0719213441652955e-05, "loss": 1.6669, "step": 18970 }, { "epoch": 1.75, "learning_rate": 2.0693919078080558e-05, "loss": 1.6346, "step": 18980 }, { "epoch": 1.75, "learning_rate": 2.0668629256552148e-05, "loss": 1.6569, "step": 18990 }, { "epoch": 1.75, "learning_rate": 2.064334400374336e-05, "loss": 1.6797, "step": 19000 }, { "epoch": 1.75, "learning_rate": 2.0618063346325017e-05, "loss": 1.6494, "step": 19010 }, { "epoch": 1.75, "learning_rate": 2.0592787310963106e-05, "loss": 1.6951, "step": 19020 }, { "epoch": 1.75, "learning_rate": 2.0567515924318727e-05, "loss": 1.6996, "step": 19030 }, { "epoch": 1.75, "learning_rate": 2.054224921304808e-05, "loss": 1.7069, "step": 19040 }, { "epoch": 1.75, "learning_rate": 2.051698720380243e-05, "loss": 1.6511, "step": 19050 }, { "epoch": 1.75, "learning_rate": 2.0491729923228082e-05, "loss": 1.6989, "step": 19060 }, { "epoch": 1.76, "learning_rate": 2.0466477397966347e-05, "loss": 1.6417, "step": 19070 }, { "epoch": 1.76, "learning_rate": 2.0441229654653553e-05, "loss": 1.6478, "step": 19080 }, { "epoch": 1.76, "learning_rate": 2.041598671992095e-05, "loss": 1.6916, "step": 19090 }, { "epoch": 1.76, "learning_rate": 2.0390748620394716e-05, "loss": 1.6337, "step": 19100 }, { "epoch": 1.76, "learning_rate": 2.0365515382695955e-05, "loss": 1.6721, "step": 19110 }, { "epoch": 1.76, "learning_rate": 2.034028703344061e-05, "loss": 1.6292, "step": 19120 }, { "epoch": 1.76, "learning_rate": 2.031506359923949e-05, "loss": 1.6424, "step": 19130 }, { "epoch": 1.76, "learning_rate": 2.0289845106698223e-05, "loss": 1.7019, "step": 19140 }, { "epoch": 1.76, "learning_rate": 2.0264631582417206e-05, "loss": 1.684, "step": 19150 }, { "epoch": 1.76, "learning_rate": 2.0239423052991605e-05, "loss": 1.6668, "step": 19160 }, { "epoch": 1.76, "learning_rate": 2.021421954501132e-05, "loss": 1.6679, "step": 19170 }, { "epoch": 1.77, "learning_rate": 2.018902108506095e-05, "loss": 1.6382, "step": 19180 }, { "epoch": 1.77, "learning_rate": 2.016382769971976e-05, "loss": 1.6105, "step": 19190 }, { "epoch": 1.77, "learning_rate": 2.01386394155617e-05, "loss": 1.6761, "step": 19200 }, { "epoch": 1.77, "learning_rate": 2.0113456259155293e-05, "loss": 1.6176, "step": 19210 }, { "epoch": 1.77, "learning_rate": 2.008827825706368e-05, "loss": 1.6534, "step": 19220 }, { "epoch": 1.77, "learning_rate": 2.006310543584456e-05, "loss": 1.6857, "step": 19230 }, { "epoch": 1.77, "learning_rate": 2.0037937822050156e-05, "loss": 1.6486, "step": 19240 }, { "epoch": 1.77, "learning_rate": 2.00127754422272e-05, "loss": 1.6794, "step": 19250 }, { "epoch": 1.77, "learning_rate": 1.9987618322916936e-05, "loss": 1.6648, "step": 19260 }, { "epoch": 1.77, "learning_rate": 1.9962466490655023e-05, "loss": 1.6852, "step": 19270 }, { "epoch": 1.77, "learning_rate": 1.9937319971971552e-05, "loss": 1.6741, "step": 19280 }, { "epoch": 1.78, "learning_rate": 1.9912178793391013e-05, "loss": 1.6804, "step": 19290 }, { "epoch": 1.78, "learning_rate": 1.9887042981432257e-05, "loss": 1.7168, "step": 19300 }, { "epoch": 1.78, "learning_rate": 1.9861912562608482e-05, "loss": 1.6666, "step": 19310 }, { "epoch": 1.78, "learning_rate": 1.9836787563427195e-05, "loss": 1.6937, "step": 19320 }, { "epoch": 1.78, "learning_rate": 1.9811668010390192e-05, "loss": 1.6545, "step": 19330 }, { "epoch": 1.78, "learning_rate": 1.9786553929993508e-05, "loss": 1.6361, "step": 19340 }, { "epoch": 1.78, "learning_rate": 1.9761445348727422e-05, "loss": 1.6781, "step": 19350 }, { "epoch": 1.78, "learning_rate": 1.9736342293076403e-05, "loss": 1.6574, "step": 19360 }, { "epoch": 1.78, "learning_rate": 1.9711244789519087e-05, "loss": 1.7006, "step": 19370 }, { "epoch": 1.78, "learning_rate": 1.9686152864528277e-05, "loss": 1.716, "step": 19380 }, { "epoch": 1.78, "learning_rate": 1.9661066544570876e-05, "loss": 1.71, "step": 19390 }, { "epoch": 1.79, "learning_rate": 1.9635985856107864e-05, "loss": 1.6952, "step": 19400 }, { "epoch": 1.79, "learning_rate": 1.96109108255943e-05, "loss": 1.6634, "step": 19410 }, { "epoch": 1.79, "learning_rate": 1.958584147947926e-05, "loss": 1.6683, "step": 19420 }, { "epoch": 1.79, "learning_rate": 1.9560777844205827e-05, "loss": 1.5973, "step": 19430 }, { "epoch": 1.79, "learning_rate": 1.953571994621108e-05, "loss": 1.7138, "step": 19440 }, { "epoch": 1.79, "learning_rate": 1.9510667811926023e-05, "loss": 1.7011, "step": 19450 }, { "epoch": 1.79, "learning_rate": 1.9485621467775588e-05, "loss": 1.6806, "step": 19460 }, { "epoch": 1.79, "learning_rate": 1.9460580940178596e-05, "loss": 1.674, "step": 19470 }, { "epoch": 1.79, "learning_rate": 1.943554625554774e-05, "loss": 1.6819, "step": 19480 }, { "epoch": 1.79, "learning_rate": 1.9410517440289535e-05, "loss": 1.6854, "step": 19490 }, { "epoch": 1.79, "learning_rate": 1.938549452080434e-05, "loss": 1.6975, "step": 19500 }, { "epoch": 1.8, "learning_rate": 1.9360477523486256e-05, "loss": 1.6087, "step": 19510 }, { "epoch": 1.8, "learning_rate": 1.9335466474723154e-05, "loss": 1.6403, "step": 19520 }, { "epoch": 1.8, "learning_rate": 1.9310461400896632e-05, "loss": 1.6303, "step": 19530 }, { "epoch": 1.8, "learning_rate": 1.9285462328381978e-05, "loss": 1.6629, "step": 19540 }, { "epoch": 1.8, "learning_rate": 1.9260469283548153e-05, "loss": 1.654, "step": 19550 }, { "epoch": 1.8, "learning_rate": 1.923548229275777e-05, "loss": 1.7324, "step": 19560 }, { "epoch": 1.8, "learning_rate": 1.9210501382367045e-05, "loss": 1.6442, "step": 19570 }, { "epoch": 1.8, "learning_rate": 1.9185526578725784e-05, "loss": 1.6777, "step": 19580 }, { "epoch": 1.8, "learning_rate": 1.916055790817735e-05, "loss": 1.6246, "step": 19590 }, { "epoch": 1.8, "learning_rate": 1.913559539705863e-05, "loss": 1.6959, "step": 19600 }, { "epoch": 1.81, "learning_rate": 1.9110639071700024e-05, "loss": 1.6743, "step": 19610 }, { "epoch": 1.81, "learning_rate": 1.9085688958425416e-05, "loss": 1.672, "step": 19620 }, { "epoch": 1.81, "learning_rate": 1.9060745083552125e-05, "loss": 1.668, "step": 19630 }, { "epoch": 1.81, "learning_rate": 1.903580747339088e-05, "loss": 1.6651, "step": 19640 }, { "epoch": 1.81, "learning_rate": 1.901087615424582e-05, "loss": 1.6712, "step": 19650 }, { "epoch": 1.81, "learning_rate": 1.8985951152414445e-05, "loss": 1.6458, "step": 19660 }, { "epoch": 1.81, "learning_rate": 1.8961032494187574e-05, "loss": 1.6897, "step": 19670 }, { "epoch": 1.81, "learning_rate": 1.8936120205849367e-05, "loss": 1.6139, "step": 19680 }, { "epoch": 1.81, "learning_rate": 1.8911214313677237e-05, "loss": 1.6794, "step": 19690 }, { "epoch": 1.81, "learning_rate": 1.8886314843941864e-05, "loss": 1.5615, "step": 19700 }, { "epoch": 1.81, "learning_rate": 1.8861421822907144e-05, "loss": 1.6718, "step": 19710 }, { "epoch": 1.82, "learning_rate": 1.8836535276830185e-05, "loss": 1.6843, "step": 19720 }, { "epoch": 1.82, "learning_rate": 1.8811655231961238e-05, "loss": 1.6223, "step": 19730 }, { "epoch": 1.82, "learning_rate": 1.8786781714543746e-05, "loss": 1.6008, "step": 19740 }, { "epoch": 1.82, "learning_rate": 1.8761914750814215e-05, "loss": 1.6355, "step": 19750 }, { "epoch": 1.82, "learning_rate": 1.8737054367002267e-05, "loss": 1.6959, "step": 19760 }, { "epoch": 1.82, "learning_rate": 1.8712200589330576e-05, "loss": 1.6611, "step": 19770 }, { "epoch": 1.82, "learning_rate": 1.8687353444014842e-05, "loss": 1.6096, "step": 19780 }, { "epoch": 1.82, "learning_rate": 1.866251295726377e-05, "loss": 1.6439, "step": 19790 }, { "epoch": 1.82, "learning_rate": 1.8637679155279068e-05, "loss": 1.7313, "step": 19800 }, { "epoch": 1.82, "learning_rate": 1.8612852064255358e-05, "loss": 1.6606, "step": 19810 }, { "epoch": 1.82, "learning_rate": 1.85880317103802e-05, "loss": 1.6576, "step": 19820 }, { "epoch": 1.83, "learning_rate": 1.8563218119834042e-05, "loss": 1.6584, "step": 19830 }, { "epoch": 1.83, "learning_rate": 1.8538411318790203e-05, "loss": 1.6709, "step": 19840 }, { "epoch": 1.83, "learning_rate": 1.8513611333414833e-05, "loss": 1.7167, "step": 19850 }, { "epoch": 1.83, "learning_rate": 1.84888181898669e-05, "loss": 1.6967, "step": 19860 }, { "epoch": 1.83, "learning_rate": 1.8464031914298164e-05, "loss": 1.6105, "step": 19870 }, { "epoch": 1.83, "learning_rate": 1.843925253285312e-05, "loss": 1.7035, "step": 19880 }, { "epoch": 1.83, "learning_rate": 1.8414480071669e-05, "loss": 1.6615, "step": 19890 }, { "epoch": 1.83, "learning_rate": 1.8389714556875736e-05, "loss": 1.6745, "step": 19900 }, { "epoch": 1.83, "learning_rate": 1.836495601459593e-05, "loss": 1.6604, "step": 19910 }, { "epoch": 1.83, "learning_rate": 1.8340204470944856e-05, "loss": 1.6873, "step": 19920 }, { "epoch": 1.83, "learning_rate": 1.8315459952030368e-05, "loss": 1.6102, "step": 19930 }, { "epoch": 1.84, "learning_rate": 1.8290722483952926e-05, "loss": 1.6882, "step": 19940 }, { "epoch": 1.84, "learning_rate": 1.8265992092805557e-05, "loss": 1.6486, "step": 19950 }, { "epoch": 1.84, "learning_rate": 1.8241268804673812e-05, "loss": 1.6355, "step": 19960 }, { "epoch": 1.84, "learning_rate": 1.8216552645635763e-05, "loss": 1.703, "step": 19970 }, { "epoch": 1.84, "learning_rate": 1.8191843641761958e-05, "loss": 1.6144, "step": 19980 }, { "epoch": 1.84, "learning_rate": 1.816714181911539e-05, "loss": 1.6748, "step": 19990 }, { "epoch": 1.84, "learning_rate": 1.8142447203751488e-05, "loss": 1.6349, "step": 20000 }, { "epoch": 1.84, "learning_rate": 1.8117759821718072e-05, "loss": 1.6769, "step": 20010 }, { "epoch": 1.84, "learning_rate": 1.8093079699055333e-05, "loss": 1.6327, "step": 20020 }, { "epoch": 1.84, "learning_rate": 1.80684068617958e-05, "loss": 1.6655, "step": 20030 }, { "epoch": 1.84, "learning_rate": 1.8043741335964336e-05, "loss": 1.6659, "step": 20040 }, { "epoch": 1.85, "learning_rate": 1.801908314757808e-05, "loss": 1.6349, "step": 20050 }, { "epoch": 1.85, "learning_rate": 1.799443232264642e-05, "loss": 1.6921, "step": 20060 }, { "epoch": 1.85, "learning_rate": 1.7969788887170995e-05, "loss": 1.6803, "step": 20070 }, { "epoch": 1.85, "learning_rate": 1.7945152867145644e-05, "loss": 1.6642, "step": 20080 }, { "epoch": 1.85, "learning_rate": 1.7920524288556367e-05, "loss": 1.6887, "step": 20090 }, { "epoch": 1.85, "learning_rate": 1.789590317738135e-05, "loss": 1.6534, "step": 20100 }, { "epoch": 1.85, "learning_rate": 1.7871289559590876e-05, "loss": 1.6639, "step": 20110 }, { "epoch": 1.85, "learning_rate": 1.784668346114733e-05, "loss": 1.6367, "step": 20120 }, { "epoch": 1.85, "learning_rate": 1.7822084908005167e-05, "loss": 1.6691, "step": 20130 }, { "epoch": 1.85, "learning_rate": 1.779749392611088e-05, "loss": 1.6708, "step": 20140 }, { "epoch": 1.85, "learning_rate": 1.7772910541402976e-05, "loss": 1.6146, "step": 20150 }, { "epoch": 1.86, "learning_rate": 1.7748334779811962e-05, "loss": 1.6423, "step": 20160 }, { "epoch": 1.86, "learning_rate": 1.772376666726029e-05, "loss": 1.6311, "step": 20170 }, { "epoch": 1.86, "learning_rate": 1.7699206229662343e-05, "loss": 1.6631, "step": 20180 }, { "epoch": 1.86, "learning_rate": 1.7674653492924413e-05, "loss": 1.6662, "step": 20190 }, { "epoch": 1.86, "learning_rate": 1.7650108482944672e-05, "loss": 1.6621, "step": 20200 }, { "epoch": 1.86, "learning_rate": 1.762557122561313e-05, "loss": 1.6509, "step": 20210 }, { "epoch": 1.86, "learning_rate": 1.760104174681164e-05, "loss": 1.6139, "step": 20220 }, { "epoch": 1.86, "learning_rate": 1.757652007241383e-05, "loss": 1.6286, "step": 20230 }, { "epoch": 1.86, "learning_rate": 1.755200622828511e-05, "loss": 1.6895, "step": 20240 }, { "epoch": 1.86, "learning_rate": 1.7527500240282625e-05, "loss": 1.5687, "step": 20250 }, { "epoch": 1.86, "learning_rate": 1.7503002134255224e-05, "loss": 1.6699, "step": 20260 }, { "epoch": 1.87, "learning_rate": 1.747851193604345e-05, "loss": 1.6942, "step": 20270 }, { "epoch": 1.87, "learning_rate": 1.7454029671479523e-05, "loss": 1.6288, "step": 20280 }, { "epoch": 1.87, "learning_rate": 1.742955536638727e-05, "loss": 1.6361, "step": 20290 }, { "epoch": 1.87, "learning_rate": 1.7405089046582123e-05, "loss": 1.6215, "step": 20300 }, { "epoch": 1.87, "learning_rate": 1.7380630737871108e-05, "loss": 1.6011, "step": 20310 }, { "epoch": 1.87, "learning_rate": 1.7356180466052788e-05, "loss": 1.6986, "step": 20320 }, { "epoch": 1.87, "learning_rate": 1.733173825691724e-05, "loss": 1.6367, "step": 20330 }, { "epoch": 1.87, "learning_rate": 1.7307304136246067e-05, "loss": 1.6509, "step": 20340 }, { "epoch": 1.87, "learning_rate": 1.7282878129812316e-05, "loss": 1.6771, "step": 20350 }, { "epoch": 1.87, "learning_rate": 1.7258460263380478e-05, "loss": 1.6348, "step": 20360 }, { "epoch": 1.88, "learning_rate": 1.7234050562706465e-05, "loss": 1.5845, "step": 20370 }, { "epoch": 1.88, "learning_rate": 1.7209649053537564e-05, "loss": 1.6711, "step": 20380 }, { "epoch": 1.88, "learning_rate": 1.7185255761612432e-05, "loss": 1.6807, "step": 20390 }, { "epoch": 1.88, "learning_rate": 1.7160870712661062e-05, "loss": 1.627, "step": 20400 }, { "epoch": 1.88, "learning_rate": 1.713649393240474e-05, "loss": 1.6133, "step": 20410 }, { "epoch": 1.88, "learning_rate": 1.7112125446556038e-05, "loss": 1.6617, "step": 20420 }, { "epoch": 1.88, "learning_rate": 1.7087765280818778e-05, "loss": 1.6365, "step": 20430 }, { "epoch": 1.88, "learning_rate": 1.7063413460888003e-05, "loss": 1.6828, "step": 20440 }, { "epoch": 1.88, "learning_rate": 1.7039070012449947e-05, "loss": 1.6291, "step": 20450 }, { "epoch": 1.88, "learning_rate": 1.701473496118204e-05, "loss": 1.6631, "step": 20460 }, { "epoch": 1.88, "learning_rate": 1.699040833275283e-05, "loss": 1.6744, "step": 20470 }, { "epoch": 1.89, "learning_rate": 1.6966090152821982e-05, "loss": 1.5781, "step": 20480 }, { "epoch": 1.89, "learning_rate": 1.694178044704026e-05, "loss": 1.6872, "step": 20490 }, { "epoch": 1.89, "learning_rate": 1.6917479241049478e-05, "loss": 1.6349, "step": 20500 }, { "epoch": 1.89, "learning_rate": 1.6893186560482492e-05, "loss": 1.6885, "step": 20510 }, { "epoch": 1.89, "learning_rate": 1.6868902430963173e-05, "loss": 1.6505, "step": 20520 }, { "epoch": 1.89, "learning_rate": 1.6844626878106357e-05, "loss": 1.6426, "step": 20530 }, { "epoch": 1.89, "learning_rate": 1.682035992751784e-05, "loss": 1.6292, "step": 20540 }, { "epoch": 1.89, "learning_rate": 1.679610160479434e-05, "loss": 1.6434, "step": 20550 }, { "epoch": 1.89, "learning_rate": 1.6771851935523486e-05, "loss": 1.6731, "step": 20560 }, { "epoch": 1.89, "learning_rate": 1.674761094528376e-05, "loss": 1.6292, "step": 20570 }, { "epoch": 1.89, "learning_rate": 1.6723378659644515e-05, "loss": 1.6762, "step": 20580 }, { "epoch": 1.9, "learning_rate": 1.6699155104165904e-05, "loss": 1.6727, "step": 20590 }, { "epoch": 1.9, "learning_rate": 1.667494030439888e-05, "loss": 1.6416, "step": 20600 }, { "epoch": 1.9, "learning_rate": 1.6650734285885145e-05, "loss": 1.6526, "step": 20610 }, { "epoch": 1.9, "learning_rate": 1.662653707415716e-05, "loss": 1.6688, "step": 20620 }, { "epoch": 1.9, "learning_rate": 1.660234869473808e-05, "loss": 1.6272, "step": 20630 }, { "epoch": 1.9, "learning_rate": 1.6578169173141756e-05, "loss": 1.6615, "step": 20640 }, { "epoch": 1.9, "learning_rate": 1.655399853487269e-05, "loss": 1.6249, "step": 20650 }, { "epoch": 1.9, "learning_rate": 1.6529836805426012e-05, "loss": 1.6695, "step": 20660 }, { "epoch": 1.9, "learning_rate": 1.650568401028746e-05, "loss": 1.6482, "step": 20670 }, { "epoch": 1.9, "learning_rate": 1.648154017493334e-05, "loss": 1.6266, "step": 20680 }, { "epoch": 1.9, "learning_rate": 1.6457405324830508e-05, "loss": 1.6883, "step": 20690 }, { "epoch": 1.91, "learning_rate": 1.6433279485436364e-05, "loss": 1.6747, "step": 20700 }, { "epoch": 1.91, "learning_rate": 1.6409162682198775e-05, "loss": 1.6006, "step": 20710 }, { "epoch": 1.91, "learning_rate": 1.6385054940556092e-05, "loss": 1.6925, "step": 20720 }, { "epoch": 1.91, "learning_rate": 1.6360956285937097e-05, "loss": 1.6679, "step": 20730 }, { "epoch": 1.91, "learning_rate": 1.6336866743761003e-05, "loss": 1.6915, "step": 20740 }, { "epoch": 1.91, "learning_rate": 1.631278633943739e-05, "loss": 1.6595, "step": 20750 }, { "epoch": 1.91, "learning_rate": 1.6288715098366224e-05, "loss": 1.6124, "step": 20760 }, { "epoch": 1.91, "learning_rate": 1.6264653045937794e-05, "loss": 1.6207, "step": 20770 }, { "epoch": 1.91, "learning_rate": 1.6240600207532686e-05, "loss": 1.6279, "step": 20780 }, { "epoch": 1.91, "learning_rate": 1.621655660852178e-05, "loss": 1.5784, "step": 20790 }, { "epoch": 1.91, "learning_rate": 1.6192522274266214e-05, "loss": 1.6515, "step": 20800 }, { "epoch": 1.92, "learning_rate": 1.6168497230117328e-05, "loss": 1.6594, "step": 20810 }, { "epoch": 1.92, "learning_rate": 1.6144481501416707e-05, "loss": 1.6321, "step": 20820 }, { "epoch": 1.92, "learning_rate": 1.6120475113496077e-05, "loss": 1.6808, "step": 20830 }, { "epoch": 1.92, "learning_rate": 1.6096478091677314e-05, "loss": 1.6697, "step": 20840 }, { "epoch": 1.92, "learning_rate": 1.607249046127242e-05, "loss": 1.6591, "step": 20850 }, { "epoch": 1.92, "learning_rate": 1.6048512247583495e-05, "loss": 1.629, "step": 20860 }, { "epoch": 1.92, "learning_rate": 1.6024543475902686e-05, "loss": 1.6712, "step": 20870 }, { "epoch": 1.92, "learning_rate": 1.6000584171512225e-05, "loss": 1.7041, "step": 20880 }, { "epoch": 1.92, "learning_rate": 1.5976634359684313e-05, "loss": 1.6399, "step": 20890 }, { "epoch": 1.92, "learning_rate": 1.595269406568116e-05, "loss": 1.629, "step": 20900 }, { "epoch": 1.92, "learning_rate": 1.5928763314754927e-05, "loss": 1.5882, "step": 20910 }, { "epoch": 1.93, "learning_rate": 1.590484213214772e-05, "loss": 1.6595, "step": 20920 }, { "epoch": 1.93, "learning_rate": 1.5880930543091526e-05, "loss": 1.6252, "step": 20930 }, { "epoch": 1.93, "learning_rate": 1.585702857280826e-05, "loss": 1.6354, "step": 20940 }, { "epoch": 1.93, "learning_rate": 1.583313624650965e-05, "loss": 1.6565, "step": 20950 }, { "epoch": 1.93, "learning_rate": 1.5809253589397267e-05, "loss": 1.6684, "step": 20960 }, { "epoch": 1.93, "learning_rate": 1.5785380626662484e-05, "loss": 1.6105, "step": 20970 }, { "epoch": 1.93, "learning_rate": 1.576151738348643e-05, "loss": 1.6705, "step": 20980 }, { "epoch": 1.93, "learning_rate": 1.5737663885040023e-05, "loss": 1.6981, "step": 20990 }, { "epoch": 1.93, "learning_rate": 1.5713820156483862e-05, "loss": 1.5712, "step": 21000 }, { "epoch": 1.93, "learning_rate": 1.5689986222968256e-05, "loss": 1.6362, "step": 21010 }, { "epoch": 1.93, "learning_rate": 1.5666162109633187e-05, "loss": 1.6304, "step": 21020 }, { "epoch": 1.94, "learning_rate": 1.5642347841608267e-05, "loss": 1.6838, "step": 21030 }, { "epoch": 1.94, "learning_rate": 1.561854344401273e-05, "loss": 1.652, "step": 21040 }, { "epoch": 1.94, "learning_rate": 1.5594748941955406e-05, "loss": 1.6314, "step": 21050 }, { "epoch": 1.94, "learning_rate": 1.5570964360534673e-05, "loss": 1.6703, "step": 21060 }, { "epoch": 1.94, "learning_rate": 1.5547189724838455e-05, "loss": 1.6599, "step": 21070 }, { "epoch": 1.94, "learning_rate": 1.5523425059944176e-05, "loss": 1.5975, "step": 21080 }, { "epoch": 1.94, "learning_rate": 1.5499670390918754e-05, "loss": 1.6161, "step": 21090 }, { "epoch": 1.94, "learning_rate": 1.5475925742818547e-05, "loss": 1.6174, "step": 21100 }, { "epoch": 1.94, "learning_rate": 1.545219114068937e-05, "loss": 1.5893, "step": 21110 }, { "epoch": 1.94, "learning_rate": 1.5428466609566417e-05, "loss": 1.6351, "step": 21120 }, { "epoch": 1.95, "learning_rate": 1.540475217447427e-05, "loss": 1.6589, "step": 21130 }, { "epoch": 1.95, "learning_rate": 1.5381047860426855e-05, "loss": 1.6384, "step": 21140 }, { "epoch": 1.95, "learning_rate": 1.5357353692427433e-05, "loss": 1.6534, "step": 21150 }, { "epoch": 1.95, "learning_rate": 1.533366969546854e-05, "loss": 1.6413, "step": 21160 }, { "epoch": 1.95, "learning_rate": 1.530999589453202e-05, "loss": 1.6548, "step": 21170 }, { "epoch": 1.95, "learning_rate": 1.5286332314588935e-05, "loss": 1.642, "step": 21180 }, { "epoch": 1.95, "learning_rate": 1.5262678980599572e-05, "loss": 1.644, "step": 21190 }, { "epoch": 1.95, "learning_rate": 1.5239035917513412e-05, "loss": 1.6382, "step": 21200 }, { "epoch": 1.95, "learning_rate": 1.5215403150269103e-05, "loss": 1.6721, "step": 21210 }, { "epoch": 1.95, "learning_rate": 1.5191780703794417e-05, "loss": 1.5831, "step": 21220 }, { "epoch": 1.95, "learning_rate": 1.5168168603006283e-05, "loss": 1.6083, "step": 21230 }, { "epoch": 1.96, "learning_rate": 1.5144566872810675e-05, "loss": 1.5992, "step": 21240 }, { "epoch": 1.96, "learning_rate": 1.5120975538102641e-05, "loss": 1.617, "step": 21250 }, { "epoch": 1.96, "learning_rate": 1.5097394623766268e-05, "loss": 1.6203, "step": 21260 }, { "epoch": 1.96, "learning_rate": 1.5073824154674645e-05, "loss": 1.6557, "step": 21270 }, { "epoch": 1.96, "learning_rate": 1.5050264155689847e-05, "loss": 1.6215, "step": 21280 }, { "epoch": 1.96, "learning_rate": 1.502671465166291e-05, "loss": 1.6724, "step": 21290 }, { "epoch": 1.96, "learning_rate": 1.5003175667433794e-05, "loss": 1.5907, "step": 21300 }, { "epoch": 1.96, "learning_rate": 1.4979647227831361e-05, "loss": 1.6073, "step": 21310 }, { "epoch": 1.96, "learning_rate": 1.495612935767336e-05, "loss": 1.6261, "step": 21320 }, { "epoch": 1.96, "learning_rate": 1.4932622081766376e-05, "loss": 1.6442, "step": 21330 }, { "epoch": 1.96, "learning_rate": 1.4909125424905824e-05, "loss": 1.6906, "step": 21340 }, { "epoch": 1.97, "learning_rate": 1.488563941187594e-05, "loss": 1.5918, "step": 21350 }, { "epoch": 1.97, "learning_rate": 1.4862164067449702e-05, "loss": 1.6099, "step": 21360 }, { "epoch": 1.97, "learning_rate": 1.4838699416388854e-05, "loss": 1.6243, "step": 21370 }, { "epoch": 1.97, "learning_rate": 1.4815245483443851e-05, "loss": 1.617, "step": 21380 }, { "epoch": 1.97, "learning_rate": 1.4791802293353849e-05, "loss": 1.6126, "step": 21390 }, { "epoch": 1.97, "learning_rate": 1.4768369870846654e-05, "loss": 1.6433, "step": 21400 }, { "epoch": 1.97, "learning_rate": 1.4744948240638757e-05, "loss": 1.5567, "step": 21410 }, { "epoch": 1.97, "learning_rate": 1.472153742743522e-05, "loss": 1.5798, "step": 21420 }, { "epoch": 1.97, "learning_rate": 1.469813745592972e-05, "loss": 1.6163, "step": 21430 }, { "epoch": 1.97, "learning_rate": 1.467474835080449e-05, "loss": 1.6171, "step": 21440 }, { "epoch": 1.97, "learning_rate": 1.46513701367303e-05, "loss": 1.6124, "step": 21450 }, { "epoch": 1.98, "learning_rate": 1.4628002838366433e-05, "loss": 1.6663, "step": 21460 }, { "epoch": 1.98, "learning_rate": 1.460464648036067e-05, "loss": 1.6675, "step": 21470 }, { "epoch": 1.98, "learning_rate": 1.458130108734923e-05, "loss": 1.5532, "step": 21480 }, { "epoch": 1.98, "learning_rate": 1.4557966683956798e-05, "loss": 1.6111, "step": 21490 }, { "epoch": 1.98, "learning_rate": 1.4534643294796425e-05, "loss": 1.6542, "step": 21500 }, { "epoch": 1.98, "learning_rate": 1.4511330944469581e-05, "loss": 1.6878, "step": 21510 }, { "epoch": 1.98, "learning_rate": 1.4488029657566065e-05, "loss": 1.648, "step": 21520 }, { "epoch": 1.98, "learning_rate": 1.4464739458664032e-05, "loss": 1.5542, "step": 21530 }, { "epoch": 1.98, "learning_rate": 1.444146037232993e-05, "loss": 1.6533, "step": 21540 }, { "epoch": 1.98, "learning_rate": 1.441819242311847e-05, "loss": 1.6182, "step": 21550 }, { "epoch": 1.98, "learning_rate": 1.4394935635572648e-05, "loss": 1.6499, "step": 21560 }, { "epoch": 1.99, "learning_rate": 1.437169003422365e-05, "loss": 1.6126, "step": 21570 }, { "epoch": 1.99, "learning_rate": 1.4348455643590892e-05, "loss": 1.692, "step": 21580 }, { "epoch": 1.99, "learning_rate": 1.432523248818195e-05, "loss": 1.5918, "step": 21590 }, { "epoch": 1.99, "learning_rate": 1.4302020592492568e-05, "loss": 1.6468, "step": 21600 }, { "epoch": 1.99, "learning_rate": 1.4278819981006578e-05, "loss": 1.6132, "step": 21610 }, { "epoch": 1.99, "learning_rate": 1.4255630678195947e-05, "loss": 1.622, "step": 21620 }, { "epoch": 1.99, "learning_rate": 1.423245270852068e-05, "loss": 1.6468, "step": 21630 }, { "epoch": 1.99, "learning_rate": 1.4209286096428853e-05, "loss": 1.6314, "step": 21640 }, { "epoch": 1.99, "learning_rate": 1.4186130866356564e-05, "loss": 1.7057, "step": 21650 }, { "epoch": 1.99, "learning_rate": 1.4162987042727877e-05, "loss": 1.6238, "step": 21660 }, { "epoch": 1.99, "learning_rate": 1.413985464995486e-05, "loss": 1.6636, "step": 21670 }, { "epoch": 2.0, "learning_rate": 1.4116733712437486e-05, "loss": 1.5948, "step": 21680 }, { "epoch": 2.0, "learning_rate": 1.4093624254563686e-05, "loss": 1.615, "step": 21690 }, { "epoch": 2.0, "learning_rate": 1.4070526300709234e-05, "loss": 1.5786, "step": 21700 }, { "epoch": 2.0, "learning_rate": 1.4047439875237834e-05, "loss": 1.6517, "step": 21710 }, { "epoch": 2.0, "learning_rate": 1.4024365002500961e-05, "loss": 1.6168, "step": 21720 }, { "epoch": 2.0, "learning_rate": 1.4001301706837963e-05, "loss": 1.5787, "step": 21730 }, { "epoch": 2.0, "learning_rate": 1.3978250012575928e-05, "loss": 1.269, "step": 21740 }, { "epoch": 2.0, "learning_rate": 1.395520994402974e-05, "loss": 1.2194, "step": 21750 }, { "epoch": 2.0, "learning_rate": 1.3932181525502003e-05, "loss": 1.2786, "step": 21760 }, { "epoch": 2.0, "learning_rate": 1.3909164781283035e-05, "loss": 1.2438, "step": 21770 }, { "epoch": 2.0, "learning_rate": 1.3886159735650861e-05, "loss": 1.2679, "step": 21780 }, { "epoch": 2.01, "learning_rate": 1.3863166412871128e-05, "loss": 1.2673, "step": 21790 }, { "epoch": 2.01, "learning_rate": 1.3840184837197156e-05, "loss": 1.1908, "step": 21800 }, { "epoch": 2.01, "learning_rate": 1.381721503286984e-05, "loss": 1.2274, "step": 21810 }, { "epoch": 2.01, "learning_rate": 1.3794257024117684e-05, "loss": 1.333, "step": 21820 }, { "epoch": 2.01, "learning_rate": 1.3771310835156743e-05, "loss": 1.2429, "step": 21830 }, { "epoch": 2.01, "learning_rate": 1.3748376490190606e-05, "loss": 1.2077, "step": 21840 }, { "epoch": 2.01, "learning_rate": 1.3725454013410355e-05, "loss": 1.2057, "step": 21850 }, { "epoch": 2.01, "learning_rate": 1.370254342899458e-05, "loss": 1.2451, "step": 21860 }, { "epoch": 2.01, "learning_rate": 1.3679644761109289e-05, "loss": 1.1968, "step": 21870 }, { "epoch": 2.01, "learning_rate": 1.3656758033907957e-05, "loss": 1.2469, "step": 21880 }, { "epoch": 2.01, "learning_rate": 1.3633883271531462e-05, "loss": 1.2406, "step": 21890 }, { "epoch": 2.02, "learning_rate": 1.3611020498108018e-05, "loss": 1.3113, "step": 21900 }, { "epoch": 2.02, "learning_rate": 1.3588169737753256e-05, "loss": 1.2346, "step": 21910 }, { "epoch": 2.02, "learning_rate": 1.3565331014570082e-05, "loss": 1.2946, "step": 21920 }, { "epoch": 2.02, "learning_rate": 1.3542504352648738e-05, "loss": 1.2427, "step": 21930 }, { "epoch": 2.02, "learning_rate": 1.3519689776066718e-05, "loss": 1.2449, "step": 21940 }, { "epoch": 2.02, "learning_rate": 1.3496887308888812e-05, "loss": 1.243, "step": 21950 }, { "epoch": 2.02, "learning_rate": 1.347409697516698e-05, "loss": 1.1853, "step": 21960 }, { "epoch": 2.02, "learning_rate": 1.3451318798940438e-05, "loss": 1.2534, "step": 21970 }, { "epoch": 2.02, "learning_rate": 1.3428552804235528e-05, "loss": 1.2388, "step": 21980 }, { "epoch": 2.02, "learning_rate": 1.3405799015065784e-05, "loss": 1.2125, "step": 21990 }, { "epoch": 2.03, "learning_rate": 1.338305745543183e-05, "loss": 1.2498, "step": 22000 }, { "epoch": 2.03, "learning_rate": 1.3360328149321439e-05, "loss": 1.2782, "step": 22010 }, { "epoch": 2.03, "learning_rate": 1.33376111207094e-05, "loss": 1.2224, "step": 22020 }, { "epoch": 2.03, "learning_rate": 1.3314906393557608e-05, "loss": 1.2221, "step": 22030 }, { "epoch": 2.03, "learning_rate": 1.329221399181493e-05, "loss": 1.1893, "step": 22040 }, { "epoch": 2.03, "learning_rate": 1.326953393941728e-05, "loss": 1.2159, "step": 22050 }, { "epoch": 2.03, "learning_rate": 1.3246866260287504e-05, "loss": 1.2241, "step": 22060 }, { "epoch": 2.03, "learning_rate": 1.3224210978335424e-05, "loss": 1.2036, "step": 22070 }, { "epoch": 2.03, "learning_rate": 1.3201568117457791e-05, "loss": 1.174, "step": 22080 }, { "epoch": 2.03, "learning_rate": 1.317893770153822e-05, "loss": 1.2085, "step": 22090 }, { "epoch": 2.03, "learning_rate": 1.3156319754447235e-05, "loss": 1.1838, "step": 22100 }, { "epoch": 2.04, "learning_rate": 1.3133714300042183e-05, "loss": 1.2218, "step": 22110 }, { "epoch": 2.04, "learning_rate": 1.3111121362167244e-05, "loss": 1.2315, "step": 22120 }, { "epoch": 2.04, "learning_rate": 1.30885409646534e-05, "loss": 1.2593, "step": 22130 }, { "epoch": 2.04, "learning_rate": 1.3065973131318416e-05, "loss": 1.2441, "step": 22140 }, { "epoch": 2.04, "learning_rate": 1.3043417885966763e-05, "loss": 1.2401, "step": 22150 }, { "epoch": 2.04, "learning_rate": 1.302087525238968e-05, "loss": 1.2326, "step": 22160 }, { "epoch": 2.04, "learning_rate": 1.2998345254365068e-05, "loss": 1.2313, "step": 22170 }, { "epoch": 2.04, "learning_rate": 1.2975827915657521e-05, "loss": 1.16, "step": 22180 }, { "epoch": 2.04, "learning_rate": 1.2953323260018288e-05, "loss": 1.2549, "step": 22190 }, { "epoch": 2.04, "learning_rate": 1.2930831311185203e-05, "loss": 1.2841, "step": 22200 }, { "epoch": 2.04, "learning_rate": 1.2908352092882747e-05, "loss": 1.2568, "step": 22210 }, { "epoch": 2.05, "learning_rate": 1.288588562882192e-05, "loss": 1.2262, "step": 22220 }, { "epoch": 2.05, "learning_rate": 1.2863431942700319e-05, "loss": 1.2226, "step": 22230 }, { "epoch": 2.05, "learning_rate": 1.2840991058202007e-05, "loss": 1.28, "step": 22240 }, { "epoch": 2.05, "learning_rate": 1.2818562998997616e-05, "loss": 1.1831, "step": 22250 }, { "epoch": 2.05, "learning_rate": 1.2796147788744178e-05, "loss": 1.2386, "step": 22260 }, { "epoch": 2.05, "learning_rate": 1.2773745451085228e-05, "loss": 1.2604, "step": 22270 }, { "epoch": 2.05, "learning_rate": 1.2751356009650681e-05, "loss": 1.224, "step": 22280 }, { "epoch": 2.05, "learning_rate": 1.272897948805688e-05, "loss": 1.2317, "step": 22290 }, { "epoch": 2.05, "learning_rate": 1.2706615909906511e-05, "loss": 1.2278, "step": 22300 }, { "epoch": 2.05, "learning_rate": 1.2684265298788628e-05, "loss": 1.187, "step": 22310 }, { "epoch": 2.05, "learning_rate": 1.2661927678278621e-05, "loss": 1.2475, "step": 22320 }, { "epoch": 2.06, "learning_rate": 1.2639603071938135e-05, "loss": 1.1994, "step": 22330 }, { "epoch": 2.06, "learning_rate": 1.2617291503315131e-05, "loss": 1.209, "step": 22340 }, { "epoch": 2.06, "learning_rate": 1.2594992995943783e-05, "loss": 1.2287, "step": 22350 }, { "epoch": 2.06, "learning_rate": 1.2572707573344511e-05, "loss": 1.2271, "step": 22360 }, { "epoch": 2.06, "learning_rate": 1.2550435259023927e-05, "loss": 1.2455, "step": 22370 }, { "epoch": 2.06, "learning_rate": 1.2528176076474823e-05, "loss": 1.2617, "step": 22380 }, { "epoch": 2.06, "learning_rate": 1.2505930049176114e-05, "loss": 1.2189, "step": 22390 }, { "epoch": 2.06, "learning_rate": 1.2483697200592878e-05, "loss": 1.2554, "step": 22400 }, { "epoch": 2.06, "learning_rate": 1.2461477554176252e-05, "loss": 1.2343, "step": 22410 }, { "epoch": 2.06, "learning_rate": 1.2439271133363473e-05, "loss": 1.227, "step": 22420 }, { "epoch": 2.06, "learning_rate": 1.2417077961577833e-05, "loss": 1.2456, "step": 22430 }, { "epoch": 2.07, "learning_rate": 1.2394898062228619e-05, "loss": 1.23, "step": 22440 }, { "epoch": 2.07, "learning_rate": 1.237273145871115e-05, "loss": 1.2292, "step": 22450 }, { "epoch": 2.07, "learning_rate": 1.2350578174406696e-05, "loss": 1.2253, "step": 22460 }, { "epoch": 2.07, "learning_rate": 1.2328438232682505e-05, "loss": 1.2667, "step": 22470 }, { "epoch": 2.07, "learning_rate": 1.2306311656891702e-05, "loss": 1.2141, "step": 22480 }, { "epoch": 2.07, "learning_rate": 1.228419847037339e-05, "loss": 1.2333, "step": 22490 }, { "epoch": 2.07, "learning_rate": 1.2262098696452475e-05, "loss": 1.2244, "step": 22500 }, { "epoch": 2.07, "learning_rate": 1.224001235843977e-05, "loss": 1.238, "step": 22510 }, { "epoch": 2.07, "learning_rate": 1.2217939479631868e-05, "loss": 1.2138, "step": 22520 }, { "epoch": 2.07, "learning_rate": 1.219588008331121e-05, "loss": 1.2382, "step": 22530 }, { "epoch": 2.07, "learning_rate": 1.2173834192745964e-05, "loss": 1.1949, "step": 22540 }, { "epoch": 2.08, "learning_rate": 1.2151801831190127e-05, "loss": 1.1701, "step": 22550 }, { "epoch": 2.08, "learning_rate": 1.2129783021883347e-05, "loss": 1.1596, "step": 22560 }, { "epoch": 2.08, "learning_rate": 1.2107777788051036e-05, "loss": 1.2096, "step": 22570 }, { "epoch": 2.08, "learning_rate": 1.2085786152904242e-05, "loss": 1.2557, "step": 22580 }, { "epoch": 2.08, "learning_rate": 1.206380813963971e-05, "loss": 1.2033, "step": 22590 }, { "epoch": 2.08, "learning_rate": 1.2041843771439773e-05, "loss": 1.2267, "step": 22600 }, { "epoch": 2.08, "learning_rate": 1.201989307147241e-05, "loss": 1.2414, "step": 22610 }, { "epoch": 2.08, "learning_rate": 1.1997956062891172e-05, "loss": 1.2082, "step": 22620 }, { "epoch": 2.08, "learning_rate": 1.197603276883515e-05, "loss": 1.2258, "step": 22630 }, { "epoch": 2.08, "learning_rate": 1.1954123212428996e-05, "loss": 1.2031, "step": 22640 }, { "epoch": 2.08, "learning_rate": 1.1932227416782842e-05, "loss": 1.2287, "step": 22650 }, { "epoch": 2.09, "learning_rate": 1.1910345404992334e-05, "loss": 1.1498, "step": 22660 }, { "epoch": 2.09, "learning_rate": 1.1888477200138561e-05, "loss": 1.2816, "step": 22670 }, { "epoch": 2.09, "learning_rate": 1.1866622825288068e-05, "loss": 1.28, "step": 22680 }, { "epoch": 2.09, "learning_rate": 1.1844782303492777e-05, "loss": 1.2044, "step": 22690 }, { "epoch": 2.09, "learning_rate": 1.1822955657790036e-05, "loss": 1.2173, "step": 22700 }, { "epoch": 2.09, "learning_rate": 1.1801142911202523e-05, "loss": 1.2019, "step": 22710 }, { "epoch": 2.09, "learning_rate": 1.1779344086738277e-05, "loss": 1.2777, "step": 22720 }, { "epoch": 2.09, "learning_rate": 1.1757559207390664e-05, "loss": 1.306, "step": 22730 }, { "epoch": 2.09, "learning_rate": 1.1735788296138298e-05, "loss": 1.19, "step": 22740 }, { "epoch": 2.09, "learning_rate": 1.1714031375945102e-05, "loss": 1.2048, "step": 22750 }, { "epoch": 2.1, "learning_rate": 1.1692288469760209e-05, "loss": 1.2324, "step": 22760 }, { "epoch": 2.1, "learning_rate": 1.1670559600518005e-05, "loss": 1.1873, "step": 22770 }, { "epoch": 2.1, "learning_rate": 1.1648844791138017e-05, "loss": 1.211, "step": 22780 }, { "epoch": 2.1, "learning_rate": 1.1627144064525016e-05, "loss": 1.1985, "step": 22790 }, { "epoch": 2.1, "learning_rate": 1.1605457443568849e-05, "loss": 1.2435, "step": 22800 }, { "epoch": 2.1, "learning_rate": 1.158378495114453e-05, "loss": 1.1781, "step": 22810 }, { "epoch": 2.1, "learning_rate": 1.1562126610112133e-05, "loss": 1.2413, "step": 22820 }, { "epoch": 2.1, "learning_rate": 1.1540482443316844e-05, "loss": 1.188, "step": 22830 }, { "epoch": 2.1, "learning_rate": 1.1518852473588865e-05, "loss": 1.267, "step": 22840 }, { "epoch": 2.1, "learning_rate": 1.149723672374344e-05, "loss": 1.2549, "step": 22850 }, { "epoch": 2.1, "learning_rate": 1.147563521658082e-05, "loss": 1.2454, "step": 22860 }, { "epoch": 2.11, "learning_rate": 1.1454047974886212e-05, "loss": 1.2198, "step": 22870 }, { "epoch": 2.11, "learning_rate": 1.14324750214298e-05, "loss": 1.1975, "step": 22880 }, { "epoch": 2.11, "learning_rate": 1.141091637896667e-05, "loss": 1.1932, "step": 22890 }, { "epoch": 2.11, "learning_rate": 1.1389372070236834e-05, "loss": 1.2248, "step": 22900 }, { "epoch": 2.11, "learning_rate": 1.1367842117965174e-05, "loss": 1.1985, "step": 22910 }, { "epoch": 2.11, "learning_rate": 1.134632654486145e-05, "loss": 1.2165, "step": 22920 }, { "epoch": 2.11, "learning_rate": 1.1324825373620213e-05, "loss": 1.2247, "step": 22930 }, { "epoch": 2.11, "learning_rate": 1.130333862692087e-05, "loss": 1.1967, "step": 22940 }, { "epoch": 2.11, "learning_rate": 1.128186632742757e-05, "loss": 1.2648, "step": 22950 }, { "epoch": 2.11, "learning_rate": 1.1260408497789254e-05, "loss": 1.1766, "step": 22960 }, { "epoch": 2.11, "learning_rate": 1.1238965160639598e-05, "loss": 1.2575, "step": 22970 }, { "epoch": 2.12, "learning_rate": 1.1217536338596964e-05, "loss": 1.2335, "step": 22980 }, { "epoch": 2.12, "learning_rate": 1.1196122054264447e-05, "loss": 1.213, "step": 22990 }, { "epoch": 2.12, "learning_rate": 1.1174722330229759e-05, "loss": 1.2379, "step": 23000 }, { "epoch": 2.12, "learning_rate": 1.1153337189065296e-05, "loss": 1.2267, "step": 23010 }, { "epoch": 2.12, "learning_rate": 1.1131966653328033e-05, "loss": 1.1999, "step": 23020 }, { "epoch": 2.12, "learning_rate": 1.1110610745559593e-05, "loss": 1.2753, "step": 23030 }, { "epoch": 2.12, "learning_rate": 1.1089269488286111e-05, "loss": 1.2454, "step": 23040 }, { "epoch": 2.12, "learning_rate": 1.1067942904018311e-05, "loss": 1.1573, "step": 23050 }, { "epoch": 2.12, "learning_rate": 1.1046631015251411e-05, "loss": 1.2329, "step": 23060 }, { "epoch": 2.12, "learning_rate": 1.102533384446515e-05, "loss": 1.2428, "step": 23070 }, { "epoch": 2.12, "learning_rate": 1.1004051414123715e-05, "loss": 1.2629, "step": 23080 }, { "epoch": 2.13, "learning_rate": 1.0982783746675792e-05, "loss": 1.2036, "step": 23090 }, { "epoch": 2.13, "learning_rate": 1.0961530864554446e-05, "loss": 1.2114, "step": 23100 }, { "epoch": 2.13, "learning_rate": 1.0940292790177181e-05, "loss": 1.1725, "step": 23110 }, { "epoch": 2.13, "learning_rate": 1.091906954594585e-05, "loss": 1.2163, "step": 23120 }, { "epoch": 2.13, "learning_rate": 1.08978611542467e-05, "loss": 1.2038, "step": 23130 }, { "epoch": 2.13, "learning_rate": 1.0876667637450274e-05, "loss": 1.2107, "step": 23140 }, { "epoch": 2.13, "learning_rate": 1.0855489017911451e-05, "loss": 1.1872, "step": 23150 }, { "epoch": 2.13, "learning_rate": 1.0834325317969401e-05, "loss": 1.2082, "step": 23160 }, { "epoch": 2.13, "learning_rate": 1.0813176559947524e-05, "loss": 1.2076, "step": 23170 }, { "epoch": 2.13, "learning_rate": 1.0792042766153499e-05, "loss": 1.223, "step": 23180 }, { "epoch": 2.13, "learning_rate": 1.0770923958879184e-05, "loss": 1.2214, "step": 23190 }, { "epoch": 2.14, "learning_rate": 1.0749820160400654e-05, "loss": 1.2418, "step": 23200 }, { "epoch": 2.14, "learning_rate": 1.0728731392978147e-05, "loss": 1.2016, "step": 23210 }, { "epoch": 2.14, "learning_rate": 1.0707657678856056e-05, "loss": 1.2292, "step": 23220 }, { "epoch": 2.14, "learning_rate": 1.0686599040262865e-05, "loss": 1.2397, "step": 23230 }, { "epoch": 2.14, "learning_rate": 1.0665555499411195e-05, "loss": 1.1836, "step": 23240 }, { "epoch": 2.14, "learning_rate": 1.0644527078497704e-05, "loss": 1.2243, "step": 23250 }, { "epoch": 2.14, "learning_rate": 1.0623513799703131e-05, "loss": 1.2567, "step": 23260 }, { "epoch": 2.14, "learning_rate": 1.060251568519224e-05, "loss": 1.2382, "step": 23270 }, { "epoch": 2.14, "learning_rate": 1.0581532757113777e-05, "loss": 1.275, "step": 23280 }, { "epoch": 2.14, "learning_rate": 1.05605650376005e-05, "loss": 1.2396, "step": 23290 }, { "epoch": 2.14, "learning_rate": 1.0539612548769095e-05, "loss": 1.2497, "step": 23300 }, { "epoch": 2.15, "learning_rate": 1.0518675312720213e-05, "loss": 1.2343, "step": 23310 }, { "epoch": 2.15, "learning_rate": 1.0497753351538378e-05, "loss": 1.233, "step": 23320 }, { "epoch": 2.15, "learning_rate": 1.0476846687292058e-05, "loss": 1.2208, "step": 23330 }, { "epoch": 2.15, "learning_rate": 1.0455955342033525e-05, "loss": 1.2315, "step": 23340 }, { "epoch": 2.15, "learning_rate": 1.0435079337798942e-05, "loss": 1.1892, "step": 23350 }, { "epoch": 2.15, "learning_rate": 1.041421869660825e-05, "loss": 1.1891, "step": 23360 }, { "epoch": 2.15, "learning_rate": 1.0393373440465217e-05, "loss": 1.2606, "step": 23370 }, { "epoch": 2.15, "learning_rate": 1.0372543591357362e-05, "loss": 1.2056, "step": 23380 }, { "epoch": 2.15, "learning_rate": 1.0351729171255961e-05, "loss": 1.2375, "step": 23390 }, { "epoch": 2.15, "learning_rate": 1.0330930202116027e-05, "loss": 1.2055, "step": 23400 }, { "epoch": 2.15, "learning_rate": 1.0310146705876247e-05, "loss": 1.231, "step": 23410 }, { "epoch": 2.16, "learning_rate": 1.0289378704459018e-05, "loss": 1.2133, "step": 23420 }, { "epoch": 2.16, "learning_rate": 1.0268626219770367e-05, "loss": 1.2401, "step": 23430 }, { "epoch": 2.16, "learning_rate": 1.0247889273699968e-05, "loss": 1.2179, "step": 23440 }, { "epoch": 2.16, "learning_rate": 1.0227167888121105e-05, "loss": 1.2531, "step": 23450 }, { "epoch": 2.16, "learning_rate": 1.0206462084890659e-05, "loss": 1.2294, "step": 23460 }, { "epoch": 2.16, "learning_rate": 1.0185771885849037e-05, "loss": 1.2418, "step": 23470 }, { "epoch": 2.16, "learning_rate": 1.0165097312820237e-05, "loss": 1.2235, "step": 23480 }, { "epoch": 2.16, "learning_rate": 1.0144438387611733e-05, "loss": 1.2653, "step": 23490 }, { "epoch": 2.16, "learning_rate": 1.012379513201451e-05, "loss": 1.2144, "step": 23500 }, { "epoch": 2.16, "learning_rate": 1.0103167567803042e-05, "loss": 1.2254, "step": 23510 }, { "epoch": 2.16, "learning_rate": 1.0082555716735217e-05, "loss": 1.258, "step": 23520 }, { "epoch": 2.17, "learning_rate": 1.0061959600552381e-05, "loss": 1.2491, "step": 23530 }, { "epoch": 2.17, "learning_rate": 1.0041379240979253e-05, "loss": 1.2448, "step": 23540 }, { "epoch": 2.17, "learning_rate": 1.0020814659723965e-05, "loss": 1.1916, "step": 23550 }, { "epoch": 2.17, "learning_rate": 1.0000265878477964e-05, "loss": 1.2245, "step": 23560 }, { "epoch": 2.17, "learning_rate": 9.979732918916088e-06, "loss": 1.1913, "step": 23570 }, { "epoch": 2.17, "learning_rate": 9.959215802696428e-06, "loss": 1.2371, "step": 23580 }, { "epoch": 2.17, "learning_rate": 9.93871455146041e-06, "loss": 1.2436, "step": 23590 }, { "epoch": 2.17, "learning_rate": 9.918229186832687e-06, "loss": 1.2179, "step": 23600 }, { "epoch": 2.17, "learning_rate": 9.897759730421188e-06, "loss": 1.2842, "step": 23610 }, { "epoch": 2.17, "learning_rate": 9.877306203817024e-06, "loss": 1.1999, "step": 23620 }, { "epoch": 2.18, "learning_rate": 9.85686862859456e-06, "loss": 1.2034, "step": 23630 }, { "epoch": 2.18, "learning_rate": 9.836447026311277e-06, "loss": 1.198, "step": 23640 }, { "epoch": 2.18, "learning_rate": 9.816041418507852e-06, "loss": 1.2049, "step": 23650 }, { "epoch": 2.18, "learning_rate": 9.795651826708049e-06, "loss": 1.2607, "step": 23660 }, { "epoch": 2.18, "learning_rate": 9.775278272418773e-06, "loss": 1.3027, "step": 23670 }, { "epoch": 2.18, "learning_rate": 9.754920777130012e-06, "loss": 1.2005, "step": 23680 }, { "epoch": 2.18, "learning_rate": 9.734579362314783e-06, "loss": 1.2499, "step": 23690 }, { "epoch": 2.18, "learning_rate": 9.71425404942918e-06, "loss": 1.2686, "step": 23700 }, { "epoch": 2.18, "learning_rate": 9.693944859912282e-06, "loss": 1.2279, "step": 23710 }, { "epoch": 2.18, "learning_rate": 9.673651815186185e-06, "loss": 1.1879, "step": 23720 }, { "epoch": 2.18, "learning_rate": 9.653374936655923e-06, "loss": 1.2284, "step": 23730 }, { "epoch": 2.19, "learning_rate": 9.633114245709532e-06, "loss": 1.2245, "step": 23740 }, { "epoch": 2.19, "learning_rate": 9.612869763717917e-06, "loss": 1.1991, "step": 23750 }, { "epoch": 2.19, "learning_rate": 9.592641512034926e-06, "loss": 1.2504, "step": 23760 }, { "epoch": 2.19, "learning_rate": 9.572429511997252e-06, "loss": 1.2284, "step": 23770 }, { "epoch": 2.19, "learning_rate": 9.552233784924485e-06, "loss": 1.214, "step": 23780 }, { "epoch": 2.19, "learning_rate": 9.532054352119013e-06, "loss": 1.2302, "step": 23790 }, { "epoch": 2.19, "learning_rate": 9.511891234866061e-06, "loss": 1.2603, "step": 23800 }, { "epoch": 2.19, "learning_rate": 9.491744454433643e-06, "loss": 1.2802, "step": 23810 }, { "epoch": 2.19, "learning_rate": 9.471614032072526e-06, "loss": 1.2213, "step": 23820 }, { "epoch": 2.19, "learning_rate": 9.451499989016243e-06, "loss": 1.2328, "step": 23830 }, { "epoch": 2.19, "learning_rate": 9.431402346481024e-06, "loss": 1.2427, "step": 23840 }, { "epoch": 2.2, "learning_rate": 9.411321125665823e-06, "loss": 1.2757, "step": 23850 }, { "epoch": 2.2, "learning_rate": 9.391256347752264e-06, "loss": 1.2158, "step": 23860 }, { "epoch": 2.2, "learning_rate": 9.371208033904638e-06, "loss": 1.1752, "step": 23870 }, { "epoch": 2.2, "learning_rate": 9.35117620526984e-06, "loss": 1.2014, "step": 23880 }, { "epoch": 2.2, "learning_rate": 9.331160882977414e-06, "loss": 1.2267, "step": 23890 }, { "epoch": 2.2, "learning_rate": 9.311162088139453e-06, "loss": 1.2395, "step": 23900 }, { "epoch": 2.2, "learning_rate": 9.291179841850653e-06, "loss": 1.1755, "step": 23910 }, { "epoch": 2.2, "learning_rate": 9.271214165188247e-06, "loss": 1.1887, "step": 23920 }, { "epoch": 2.2, "learning_rate": 9.251265079211963e-06, "loss": 1.2211, "step": 23930 }, { "epoch": 2.2, "learning_rate": 9.231332604964072e-06, "loss": 1.2083, "step": 23940 }, { "epoch": 2.2, "learning_rate": 9.21141676346928e-06, "loss": 1.1712, "step": 23950 }, { "epoch": 2.21, "learning_rate": 9.191517575734789e-06, "loss": 1.1373, "step": 23960 }, { "epoch": 2.21, "learning_rate": 9.171635062750189e-06, "loss": 1.1735, "step": 23970 }, { "epoch": 2.21, "learning_rate": 9.15176924548754e-06, "loss": 1.1637, "step": 23980 }, { "epoch": 2.21, "learning_rate": 9.131920144901235e-06, "loss": 1.2065, "step": 23990 }, { "epoch": 2.21, "learning_rate": 9.112087781928075e-06, "loss": 1.2083, "step": 24000 }, { "epoch": 2.21, "learning_rate": 9.092272177487173e-06, "loss": 1.2037, "step": 24010 }, { "epoch": 2.21, "learning_rate": 9.07247335248e-06, "loss": 1.2406, "step": 24020 }, { "epoch": 2.21, "learning_rate": 9.052691327790291e-06, "loss": 1.2236, "step": 24030 }, { "epoch": 2.21, "learning_rate": 9.032926124284088e-06, "loss": 1.1981, "step": 24040 }, { "epoch": 2.21, "learning_rate": 9.013177762809687e-06, "loss": 1.207, "step": 24050 }, { "epoch": 2.21, "learning_rate": 8.993446264197603e-06, "loss": 1.2308, "step": 24060 }, { "epoch": 2.22, "learning_rate": 8.973731649260581e-06, "loss": 1.2314, "step": 24070 }, { "epoch": 2.22, "learning_rate": 8.954033938793544e-06, "loss": 1.1864, "step": 24080 }, { "epoch": 2.22, "learning_rate": 8.934353153573589e-06, "loss": 1.2115, "step": 24090 }, { "epoch": 2.22, "learning_rate": 8.914689314359969e-06, "loss": 1.2078, "step": 24100 }, { "epoch": 2.22, "learning_rate": 8.895042441894062e-06, "loss": 1.1674, "step": 24110 }, { "epoch": 2.22, "learning_rate": 8.87541255689932e-06, "loss": 1.1823, "step": 24120 }, { "epoch": 2.22, "learning_rate": 8.855799680081325e-06, "loss": 1.1865, "step": 24130 }, { "epoch": 2.22, "learning_rate": 8.836203832127671e-06, "loss": 1.2159, "step": 24140 }, { "epoch": 2.22, "learning_rate": 8.816625033708023e-06, "loss": 1.2496, "step": 24150 }, { "epoch": 2.22, "learning_rate": 8.797063305474048e-06, "loss": 1.2527, "step": 24160 }, { "epoch": 2.22, "learning_rate": 8.777518668059423e-06, "loss": 1.2384, "step": 24170 }, { "epoch": 2.23, "learning_rate": 8.757991142079767e-06, "loss": 1.2231, "step": 24180 }, { "epoch": 2.23, "learning_rate": 8.738480748132685e-06, "loss": 1.2133, "step": 24190 }, { "epoch": 2.23, "learning_rate": 8.718987506797676e-06, "loss": 1.2312, "step": 24200 }, { "epoch": 2.23, "learning_rate": 8.699511438636173e-06, "loss": 1.2078, "step": 24210 }, { "epoch": 2.23, "learning_rate": 8.680052564191501e-06, "loss": 1.1714, "step": 24220 }, { "epoch": 2.23, "learning_rate": 8.660610903988812e-06, "loss": 1.2044, "step": 24230 }, { "epoch": 2.23, "learning_rate": 8.64118647853514e-06, "loss": 1.1771, "step": 24240 }, { "epoch": 2.23, "learning_rate": 8.62177930831931e-06, "loss": 1.299, "step": 24250 }, { "epoch": 2.23, "learning_rate": 8.602389413811976e-06, "loss": 1.2249, "step": 24260 }, { "epoch": 2.23, "learning_rate": 8.583016815465525e-06, "loss": 1.1927, "step": 24270 }, { "epoch": 2.23, "learning_rate": 8.56366153371416e-06, "loss": 1.2191, "step": 24280 }, { "epoch": 2.24, "learning_rate": 8.544323588973765e-06, "loss": 1.1526, "step": 24290 }, { "epoch": 2.24, "learning_rate": 8.525003001641974e-06, "loss": 1.2553, "step": 24300 }, { "epoch": 2.24, "learning_rate": 8.505699792098081e-06, "loss": 1.2044, "step": 24310 }, { "epoch": 2.24, "learning_rate": 8.48641398070308e-06, "loss": 1.1687, "step": 24320 }, { "epoch": 2.24, "learning_rate": 8.467145587799585e-06, "loss": 1.2715, "step": 24330 }, { "epoch": 2.24, "learning_rate": 8.447894633711861e-06, "loss": 1.1767, "step": 24340 }, { "epoch": 2.24, "learning_rate": 8.428661138745772e-06, "loss": 1.2901, "step": 24350 }, { "epoch": 2.24, "learning_rate": 8.40944512318875e-06, "loss": 1.2337, "step": 24360 }, { "epoch": 2.24, "learning_rate": 8.39024660730982e-06, "loss": 1.2097, "step": 24370 }, { "epoch": 2.24, "learning_rate": 8.371065611359514e-06, "loss": 1.2513, "step": 24380 }, { "epoch": 2.25, "learning_rate": 8.351902155569907e-06, "loss": 1.2076, "step": 24390 }, { "epoch": 2.25, "learning_rate": 8.33275626015457e-06, "loss": 1.2331, "step": 24400 }, { "epoch": 2.25, "learning_rate": 8.313627945308556e-06, "loss": 1.2257, "step": 24410 }, { "epoch": 2.25, "learning_rate": 8.294517231208346e-06, "loss": 1.247, "step": 24420 }, { "epoch": 2.25, "learning_rate": 8.275424138011897e-06, "loss": 1.1883, "step": 24430 }, { "epoch": 2.25, "learning_rate": 8.256348685858543e-06, "loss": 1.2615, "step": 24440 }, { "epoch": 2.25, "learning_rate": 8.237290894869032e-06, "loss": 1.1696, "step": 24450 }, { "epoch": 2.25, "learning_rate": 8.218250785145482e-06, "loss": 1.1923, "step": 24460 }, { "epoch": 2.25, "learning_rate": 8.199228376771345e-06, "loss": 1.2326, "step": 24470 }, { "epoch": 2.25, "learning_rate": 8.180223689811426e-06, "loss": 1.2713, "step": 24480 }, { "epoch": 2.25, "learning_rate": 8.161236744311806e-06, "loss": 1.215, "step": 24490 }, { "epoch": 2.26, "learning_rate": 8.14226756029989e-06, "loss": 1.1923, "step": 24500 }, { "epoch": 2.26, "learning_rate": 8.123316157784299e-06, "loss": 1.2392, "step": 24510 }, { "epoch": 2.26, "learning_rate": 8.104382556754967e-06, "loss": 1.2, "step": 24520 }, { "epoch": 2.26, "learning_rate": 8.085466777182985e-06, "loss": 1.2062, "step": 24530 }, { "epoch": 2.26, "learning_rate": 8.066568839020682e-06, "loss": 1.2347, "step": 24540 }, { "epoch": 2.26, "learning_rate": 8.047688762201552e-06, "loss": 1.1611, "step": 24550 }, { "epoch": 2.26, "learning_rate": 8.028826566640269e-06, "loss": 1.167, "step": 24560 }, { "epoch": 2.26, "learning_rate": 8.00998227223261e-06, "loss": 1.2001, "step": 24570 }, { "epoch": 2.26, "learning_rate": 7.991155898855506e-06, "loss": 1.2053, "step": 24580 }, { "epoch": 2.26, "learning_rate": 7.972347466366981e-06, "loss": 1.2197, "step": 24590 }, { "epoch": 2.26, "learning_rate": 7.953556994606105e-06, "loss": 1.2369, "step": 24600 }, { "epoch": 2.27, "learning_rate": 7.93478450339304e-06, "loss": 1.2342, "step": 24610 }, { "epoch": 2.27, "learning_rate": 7.916030012528946e-06, "loss": 1.1892, "step": 24620 }, { "epoch": 2.27, "learning_rate": 7.897293541796033e-06, "loss": 1.2675, "step": 24630 }, { "epoch": 2.27, "learning_rate": 7.878575110957478e-06, "loss": 1.2369, "step": 24640 }, { "epoch": 2.27, "learning_rate": 7.859874739757447e-06, "loss": 1.2264, "step": 24650 }, { "epoch": 2.27, "learning_rate": 7.841192447921036e-06, "loss": 1.2176, "step": 24660 }, { "epoch": 2.27, "learning_rate": 7.822528255154293e-06, "loss": 1.2669, "step": 24670 }, { "epoch": 2.27, "learning_rate": 7.803882181144148e-06, "loss": 1.2297, "step": 24680 }, { "epoch": 2.27, "learning_rate": 7.785254245558449e-06, "loss": 1.2042, "step": 24690 }, { "epoch": 2.27, "learning_rate": 7.766644468045895e-06, "loss": 1.1728, "step": 24700 }, { "epoch": 2.27, "learning_rate": 7.748052868236042e-06, "loss": 1.2075, "step": 24710 }, { "epoch": 2.28, "learning_rate": 7.72947946573925e-06, "loss": 1.1917, "step": 24720 }, { "epoch": 2.28, "learning_rate": 7.710924280146723e-06, "loss": 1.2582, "step": 24730 }, { "epoch": 2.28, "learning_rate": 7.692387331030402e-06, "loss": 1.2238, "step": 24740 }, { "epoch": 2.28, "learning_rate": 7.673868637943027e-06, "loss": 1.2035, "step": 24750 }, { "epoch": 2.28, "learning_rate": 7.655368220418082e-06, "loss": 1.1993, "step": 24760 }, { "epoch": 2.28, "learning_rate": 7.636886097969748e-06, "loss": 1.2417, "step": 24770 }, { "epoch": 2.28, "learning_rate": 7.618422290092939e-06, "loss": 1.2155, "step": 24780 }, { "epoch": 2.28, "learning_rate": 7.599976816263219e-06, "loss": 1.2356, "step": 24790 }, { "epoch": 2.28, "learning_rate": 7.581549695936846e-06, "loss": 1.2195, "step": 24800 }, { "epoch": 2.28, "learning_rate": 7.563140948550682e-06, "loss": 1.2247, "step": 24810 }, { "epoch": 2.28, "learning_rate": 7.544750593522259e-06, "loss": 1.2184, "step": 24820 }, { "epoch": 2.29, "learning_rate": 7.526378650249658e-06, "loss": 1.2506, "step": 24830 }, { "epoch": 2.29, "learning_rate": 7.5080251381115765e-06, "loss": 1.2236, "step": 24840 }, { "epoch": 2.29, "learning_rate": 7.48969007646724e-06, "loss": 1.2417, "step": 24850 }, { "epoch": 2.29, "learning_rate": 7.471373484656444e-06, "loss": 1.2615, "step": 24860 }, { "epoch": 2.29, "learning_rate": 7.4530753819994694e-06, "loss": 1.2065, "step": 24870 }, { "epoch": 2.29, "learning_rate": 7.434795787797122e-06, "loss": 1.2741, "step": 24880 }, { "epoch": 2.29, "learning_rate": 7.416534721330678e-06, "loss": 1.2089, "step": 24890 }, { "epoch": 2.29, "learning_rate": 7.3982922018618585e-06, "loss": 1.229, "step": 24900 }, { "epoch": 2.29, "learning_rate": 7.38006824863284e-06, "loss": 1.22, "step": 24910 }, { "epoch": 2.29, "learning_rate": 7.361862880866191e-06, "loss": 1.1765, "step": 24920 }, { "epoch": 2.29, "learning_rate": 7.3436761177649e-06, "loss": 1.2886, "step": 24930 }, { "epoch": 2.3, "learning_rate": 7.325507978512316e-06, "loss": 1.1737, "step": 24940 }, { "epoch": 2.3, "learning_rate": 7.307358482272167e-06, "loss": 1.1766, "step": 24950 }, { "epoch": 2.3, "learning_rate": 7.289227648188473e-06, "loss": 1.2557, "step": 24960 }, { "epoch": 2.3, "learning_rate": 7.271115495385619e-06, "loss": 1.2273, "step": 24970 }, { "epoch": 2.3, "learning_rate": 7.253022042968238e-06, "loss": 1.2007, "step": 24980 }, { "epoch": 2.3, "learning_rate": 7.234947310021273e-06, "loss": 1.1589, "step": 24990 }, { "epoch": 2.3, "learning_rate": 7.2168913156099165e-06, "loss": 1.2412, "step": 25000 }, { "epoch": 2.3, "learning_rate": 7.198854078779574e-06, "loss": 1.2128, "step": 25010 }, { "epoch": 2.3, "learning_rate": 7.180835618555895e-06, "loss": 1.1683, "step": 25020 }, { "epoch": 2.3, "learning_rate": 7.162835953944694e-06, "loss": 1.1592, "step": 25030 }, { "epoch": 2.3, "learning_rate": 7.144855103931988e-06, "loss": 1.2242, "step": 25040 }, { "epoch": 2.31, "learning_rate": 7.126893087483913e-06, "loss": 1.2264, "step": 25050 }, { "epoch": 2.31, "learning_rate": 7.1089499235467926e-06, "loss": 1.2279, "step": 25060 }, { "epoch": 2.31, "learning_rate": 7.091025631047013e-06, "loss": 1.2155, "step": 25070 }, { "epoch": 2.31, "learning_rate": 7.0731202288910865e-06, "loss": 1.2255, "step": 25080 }, { "epoch": 2.31, "learning_rate": 7.055233735965578e-06, "loss": 1.2328, "step": 25090 }, { "epoch": 2.31, "learning_rate": 7.03736617113713e-06, "loss": 1.1384, "step": 25100 }, { "epoch": 2.31, "learning_rate": 7.019517553252391e-06, "loss": 1.2161, "step": 25110 }, { "epoch": 2.31, "learning_rate": 7.001687901138054e-06, "loss": 1.2541, "step": 25120 }, { "epoch": 2.31, "learning_rate": 6.9838772336008e-06, "loss": 1.2361, "step": 25130 }, { "epoch": 2.31, "learning_rate": 6.966085569427261e-06, "loss": 1.2191, "step": 25140 }, { "epoch": 2.32, "learning_rate": 6.948312927384057e-06, "loss": 1.2164, "step": 25150 }, { "epoch": 2.32, "learning_rate": 6.930559326217717e-06, "loss": 1.2281, "step": 25160 }, { "epoch": 2.32, "learning_rate": 6.912824784654703e-06, "loss": 1.2031, "step": 25170 }, { "epoch": 2.32, "learning_rate": 6.895109321401372e-06, "loss": 1.2545, "step": 25180 }, { "epoch": 2.32, "learning_rate": 6.8774129551439525e-06, "loss": 1.2304, "step": 25190 }, { "epoch": 2.32, "learning_rate": 6.85973570454852e-06, "loss": 1.2121, "step": 25200 }, { "epoch": 2.32, "learning_rate": 6.842077588261012e-06, "loss": 1.2579, "step": 25210 }, { "epoch": 2.32, "learning_rate": 6.824438624907151e-06, "loss": 1.203, "step": 25220 }, { "epoch": 2.32, "learning_rate": 6.806818833092479e-06, "loss": 1.2149, "step": 25230 }, { "epoch": 2.32, "learning_rate": 6.789218231402317e-06, "loss": 1.1856, "step": 25240 }, { "epoch": 2.32, "learning_rate": 6.771636838401741e-06, "loss": 1.2758, "step": 25250 }, { "epoch": 2.33, "learning_rate": 6.754074672635549e-06, "loss": 1.273, "step": 25260 }, { "epoch": 2.33, "learning_rate": 6.7365317526282865e-06, "loss": 1.1955, "step": 25270 }, { "epoch": 2.33, "learning_rate": 6.719008096884169e-06, "loss": 1.2347, "step": 25280 }, { "epoch": 2.33, "learning_rate": 6.701503723887115e-06, "loss": 1.2223, "step": 25290 }, { "epoch": 2.33, "learning_rate": 6.684018652100704e-06, "loss": 1.2417, "step": 25300 }, { "epoch": 2.33, "learning_rate": 6.666552899968134e-06, "loss": 1.1952, "step": 25310 }, { "epoch": 2.33, "learning_rate": 6.649106485912251e-06, "loss": 1.2326, "step": 25320 }, { "epoch": 2.33, "learning_rate": 6.63167942833548e-06, "loss": 1.2136, "step": 25330 }, { "epoch": 2.33, "learning_rate": 6.614271745619852e-06, "loss": 1.2411, "step": 25340 }, { "epoch": 2.33, "learning_rate": 6.596883456126929e-06, "loss": 1.1798, "step": 25350 }, { "epoch": 2.33, "learning_rate": 6.579514578197868e-06, "loss": 1.2398, "step": 25360 }, { "epoch": 2.34, "learning_rate": 6.562165130153294e-06, "loss": 1.2379, "step": 25370 }, { "epoch": 2.34, "learning_rate": 6.544835130293383e-06, "loss": 1.1994, "step": 25380 }, { "epoch": 2.34, "learning_rate": 6.527524596897763e-06, "loss": 1.2174, "step": 25390 }, { "epoch": 2.34, "learning_rate": 6.510233548225558e-06, "loss": 1.2389, "step": 25400 }, { "epoch": 2.34, "learning_rate": 6.49296200251531e-06, "loss": 1.2067, "step": 25410 }, { "epoch": 2.34, "learning_rate": 6.475709977985009e-06, "loss": 1.2179, "step": 25420 }, { "epoch": 2.34, "learning_rate": 6.458477492832063e-06, "loss": 1.245, "step": 25430 }, { "epoch": 2.34, "learning_rate": 6.441264565233238e-06, "loss": 1.159, "step": 25440 }, { "epoch": 2.34, "learning_rate": 6.424071213344704e-06, "loss": 1.1742, "step": 25450 }, { "epoch": 2.34, "learning_rate": 6.406897455301952e-06, "loss": 1.1713, "step": 25460 }, { "epoch": 2.34, "learning_rate": 6.389743309219834e-06, "loss": 1.2238, "step": 25470 }, { "epoch": 2.35, "learning_rate": 6.3726087931925e-06, "loss": 1.1619, "step": 25480 }, { "epoch": 2.35, "learning_rate": 6.3554939252934015e-06, "loss": 1.2556, "step": 25490 }, { "epoch": 2.35, "learning_rate": 6.3383987235752504e-06, "loss": 1.2084, "step": 25500 }, { "epoch": 2.35, "learning_rate": 6.321323206070037e-06, "loss": 1.1804, "step": 25510 }, { "epoch": 2.35, "learning_rate": 6.304267390788965e-06, "loss": 1.1585, "step": 25520 }, { "epoch": 2.35, "learning_rate": 6.28723129572247e-06, "loss": 1.2288, "step": 25530 }, { "epoch": 2.35, "learning_rate": 6.270214938840199e-06, "loss": 1.2176, "step": 25540 }, { "epoch": 2.35, "learning_rate": 6.253218338090947e-06, "loss": 1.2079, "step": 25550 }, { "epoch": 2.35, "learning_rate": 6.236241511402699e-06, "loss": 1.2361, "step": 25560 }, { "epoch": 2.35, "learning_rate": 6.219284476682563e-06, "loss": 1.21, "step": 25570 }, { "epoch": 2.35, "learning_rate": 6.2023472518167925e-06, "loss": 1.1788, "step": 25580 }, { "epoch": 2.36, "learning_rate": 6.1854298546707105e-06, "loss": 1.192, "step": 25590 }, { "epoch": 2.36, "learning_rate": 6.168532303088775e-06, "loss": 1.218, "step": 25600 }, { "epoch": 2.36, "learning_rate": 6.1516546148944615e-06, "loss": 1.2521, "step": 25610 }, { "epoch": 2.36, "learning_rate": 6.134796807890331e-06, "loss": 1.2152, "step": 25620 }, { "epoch": 2.36, "learning_rate": 6.117958899857945e-06, "loss": 1.2392, "step": 25630 }, { "epoch": 2.36, "learning_rate": 6.1011409085579004e-06, "loss": 1.2188, "step": 25640 }, { "epoch": 2.36, "learning_rate": 6.0843428517297655e-06, "loss": 1.1926, "step": 25650 }, { "epoch": 2.36, "learning_rate": 6.067564747092094e-06, "loss": 1.1761, "step": 25660 }, { "epoch": 2.36, "learning_rate": 6.0508066123424e-06, "loss": 1.1358, "step": 25670 }, { "epoch": 2.36, "learning_rate": 6.034068465157108e-06, "loss": 1.2062, "step": 25680 }, { "epoch": 2.36, "learning_rate": 6.017350323191595e-06, "loss": 1.2252, "step": 25690 }, { "epoch": 2.37, "learning_rate": 6.000652204080099e-06, "loss": 1.1887, "step": 25700 }, { "epoch": 2.37, "learning_rate": 5.983974125435762e-06, "loss": 1.1966, "step": 25710 }, { "epoch": 2.37, "learning_rate": 5.9673161048505875e-06, "loss": 1.1873, "step": 25720 }, { "epoch": 2.37, "learning_rate": 5.95067815989542e-06, "loss": 1.1841, "step": 25730 }, { "epoch": 2.37, "learning_rate": 5.9340603081199095e-06, "loss": 1.2534, "step": 25740 }, { "epoch": 2.37, "learning_rate": 5.917462567052545e-06, "loss": 1.2114, "step": 25750 }, { "epoch": 2.37, "learning_rate": 5.90088495420057e-06, "loss": 1.158, "step": 25760 }, { "epoch": 2.37, "learning_rate": 5.884327487050012e-06, "loss": 1.1811, "step": 25770 }, { "epoch": 2.37, "learning_rate": 5.867790183065655e-06, "loss": 1.2538, "step": 25780 }, { "epoch": 2.37, "learning_rate": 5.851273059691012e-06, "loss": 1.2804, "step": 25790 }, { "epoch": 2.37, "learning_rate": 5.834776134348291e-06, "loss": 1.2433, "step": 25800 }, { "epoch": 2.38, "learning_rate": 5.818299424438428e-06, "loss": 1.2214, "step": 25810 }, { "epoch": 2.38, "learning_rate": 5.8018429473409995e-06, "loss": 1.2027, "step": 25820 }, { "epoch": 2.38, "learning_rate": 5.785406720414269e-06, "loss": 1.1806, "step": 25830 }, { "epoch": 2.38, "learning_rate": 5.768990760995133e-06, "loss": 1.1438, "step": 25840 }, { "epoch": 2.38, "learning_rate": 5.752595086399096e-06, "loss": 1.2284, "step": 25850 }, { "epoch": 2.38, "learning_rate": 5.73621971392029e-06, "loss": 1.2255, "step": 25860 }, { "epoch": 2.38, "learning_rate": 5.719864660831403e-06, "loss": 1.1849, "step": 25870 }, { "epoch": 2.38, "learning_rate": 5.703529944383723e-06, "loss": 1.243, "step": 25880 }, { "epoch": 2.38, "learning_rate": 5.687215581807048e-06, "loss": 1.2097, "step": 25890 }, { "epoch": 2.38, "learning_rate": 5.670921590309758e-06, "loss": 1.1817, "step": 25900 }, { "epoch": 2.38, "learning_rate": 5.654647987078696e-06, "loss": 1.2192, "step": 25910 }, { "epoch": 2.39, "learning_rate": 5.6383947892792334e-06, "loss": 1.2026, "step": 25920 }, { "epoch": 2.39, "learning_rate": 5.622162014055191e-06, "loss": 1.2409, "step": 25930 }, { "epoch": 2.39, "learning_rate": 5.605949678528874e-06, "loss": 1.2385, "step": 25940 }, { "epoch": 2.39, "learning_rate": 5.5897577998010045e-06, "loss": 1.138, "step": 25950 }, { "epoch": 2.39, "learning_rate": 5.573586394950742e-06, "loss": 1.2289, "step": 25960 }, { "epoch": 2.39, "learning_rate": 5.557435481035658e-06, "loss": 1.2537, "step": 25970 }, { "epoch": 2.39, "learning_rate": 5.541305075091679e-06, "loss": 1.2298, "step": 25980 }, { "epoch": 2.39, "learning_rate": 5.525195194133134e-06, "loss": 1.2479, "step": 25990 }, { "epoch": 2.39, "learning_rate": 5.509105855152677e-06, "loss": 1.2148, "step": 26000 }, { "epoch": 2.39, "learning_rate": 5.49303707512131e-06, "loss": 1.2071, "step": 26010 }, { "epoch": 2.4, "learning_rate": 5.4769888709883425e-06, "loss": 1.1897, "step": 26020 }, { "epoch": 2.4, "learning_rate": 5.460961259681388e-06, "loss": 1.2098, "step": 26030 }, { "epoch": 2.4, "learning_rate": 5.444954258106325e-06, "loss": 1.2361, "step": 26040 }, { "epoch": 2.4, "learning_rate": 5.42896788314731e-06, "loss": 1.2338, "step": 26050 }, { "epoch": 2.4, "learning_rate": 5.413002151666721e-06, "loss": 1.2276, "step": 26060 }, { "epoch": 2.4, "learning_rate": 5.39705708050518e-06, "loss": 1.1595, "step": 26070 }, { "epoch": 2.4, "learning_rate": 5.381132686481519e-06, "loss": 1.2244, "step": 26080 }, { "epoch": 2.4, "learning_rate": 5.365228986392734e-06, "loss": 1.2506, "step": 26090 }, { "epoch": 2.4, "learning_rate": 5.34934599701403e-06, "loss": 1.1824, "step": 26100 }, { "epoch": 2.4, "learning_rate": 5.333483735098727e-06, "loss": 1.2216, "step": 26110 }, { "epoch": 2.4, "learning_rate": 5.317642217378319e-06, "loss": 1.2072, "step": 26120 }, { "epoch": 2.41, "learning_rate": 5.301821460562376e-06, "loss": 1.2363, "step": 26130 }, { "epoch": 2.41, "learning_rate": 5.2860214813386296e-06, "loss": 1.1966, "step": 26140 }, { "epoch": 2.41, "learning_rate": 5.270242296372835e-06, "loss": 1.2067, "step": 26150 }, { "epoch": 2.41, "learning_rate": 5.254483922308853e-06, "loss": 1.1887, "step": 26160 }, { "epoch": 2.41, "learning_rate": 5.238746375768566e-06, "loss": 1.2311, "step": 26170 }, { "epoch": 2.41, "learning_rate": 5.223029673351918e-06, "loss": 1.1872, "step": 26180 }, { "epoch": 2.41, "learning_rate": 5.2073338316368295e-06, "loss": 1.1954, "step": 26190 }, { "epoch": 2.41, "learning_rate": 5.19165886717925e-06, "loss": 1.2046, "step": 26200 }, { "epoch": 2.41, "learning_rate": 5.1760047965131005e-06, "loss": 1.1914, "step": 26210 }, { "epoch": 2.41, "learning_rate": 5.1603716361502406e-06, "loss": 1.1714, "step": 26220 }, { "epoch": 2.41, "learning_rate": 5.14475940258051e-06, "loss": 1.2284, "step": 26230 }, { "epoch": 2.42, "learning_rate": 5.129168112271637e-06, "loss": 1.1919, "step": 26240 }, { "epoch": 2.42, "learning_rate": 5.1135977816692856e-06, "loss": 1.1776, "step": 26250 }, { "epoch": 2.42, "learning_rate": 5.098048427197008e-06, "loss": 1.2503, "step": 26260 }, { "epoch": 2.42, "learning_rate": 5.082520065256227e-06, "loss": 1.201, "step": 26270 }, { "epoch": 2.42, "learning_rate": 5.067012712226216e-06, "loss": 1.1843, "step": 26280 }, { "epoch": 2.42, "learning_rate": 5.0515263844641e-06, "loss": 1.2094, "step": 26290 }, { "epoch": 2.42, "learning_rate": 5.036061098304812e-06, "loss": 1.2745, "step": 26300 }, { "epoch": 2.42, "learning_rate": 5.020616870061107e-06, "loss": 1.2234, "step": 26310 }, { "epoch": 2.42, "learning_rate": 5.005193716023515e-06, "loss": 1.2375, "step": 26320 }, { "epoch": 2.42, "learning_rate": 4.989791652460355e-06, "loss": 1.2322, "step": 26330 }, { "epoch": 2.42, "learning_rate": 4.974410695617673e-06, "loss": 1.2678, "step": 26340 }, { "epoch": 2.43, "learning_rate": 4.959050861719275e-06, "loss": 1.2022, "step": 26350 }, { "epoch": 2.43, "learning_rate": 4.943712166966669e-06, "loss": 1.2102, "step": 26360 }, { "epoch": 2.43, "learning_rate": 4.9283946275390765e-06, "loss": 1.2362, "step": 26370 }, { "epoch": 2.43, "learning_rate": 4.913098259593413e-06, "loss": 1.2029, "step": 26380 }, { "epoch": 2.43, "learning_rate": 4.897823079264236e-06, "loss": 1.2296, "step": 26390 }, { "epoch": 2.43, "learning_rate": 4.882569102663781e-06, "loss": 1.2391, "step": 26400 }, { "epoch": 2.43, "learning_rate": 4.867336345881895e-06, "loss": 1.2591, "step": 26410 }, { "epoch": 2.43, "learning_rate": 4.85212482498606e-06, "loss": 1.2007, "step": 26420 }, { "epoch": 2.43, "learning_rate": 4.836934556021355e-06, "loss": 1.2211, "step": 26430 }, { "epoch": 2.43, "learning_rate": 4.82176555501044e-06, "loss": 1.2053, "step": 26440 }, { "epoch": 2.43, "learning_rate": 4.806617837953536e-06, "loss": 1.1713, "step": 26450 }, { "epoch": 2.44, "learning_rate": 4.791491420828428e-06, "loss": 1.2284, "step": 26460 }, { "epoch": 2.44, "learning_rate": 4.776386319590412e-06, "loss": 1.2159, "step": 26470 }, { "epoch": 2.44, "learning_rate": 4.761302550172319e-06, "loss": 1.1463, "step": 26480 }, { "epoch": 2.44, "learning_rate": 4.74624012848448e-06, "loss": 1.2125, "step": 26490 }, { "epoch": 2.44, "learning_rate": 4.731199070414694e-06, "loss": 1.1595, "step": 26500 }, { "epoch": 2.44, "learning_rate": 4.7161793918282374e-06, "loss": 1.2313, "step": 26510 }, { "epoch": 2.44, "learning_rate": 4.701181108567826e-06, "loss": 1.1493, "step": 26520 }, { "epoch": 2.44, "learning_rate": 4.686204236453626e-06, "loss": 1.1896, "step": 26530 }, { "epoch": 2.44, "learning_rate": 4.671248791283183e-06, "loss": 1.2132, "step": 26540 }, { "epoch": 2.44, "learning_rate": 4.656314788831492e-06, "loss": 1.2173, "step": 26550 }, { "epoch": 2.44, "learning_rate": 4.64140224485089e-06, "loss": 1.2067, "step": 26560 }, { "epoch": 2.45, "learning_rate": 4.626511175071099e-06, "loss": 1.1786, "step": 26570 }, { "epoch": 2.45, "learning_rate": 4.611641595199173e-06, "loss": 1.2066, "step": 26580 }, { "epoch": 2.45, "learning_rate": 4.596793520919526e-06, "loss": 1.2441, "step": 26590 }, { "epoch": 2.45, "learning_rate": 4.581966967893856e-06, "loss": 1.2152, "step": 26600 }, { "epoch": 2.45, "learning_rate": 4.567161951761182e-06, "loss": 1.221, "step": 26610 }, { "epoch": 2.45, "learning_rate": 4.552378488137807e-06, "loss": 1.2148, "step": 26620 }, { "epoch": 2.45, "learning_rate": 4.53761659261728e-06, "loss": 1.1904, "step": 26630 }, { "epoch": 2.45, "learning_rate": 4.52287628077043e-06, "loss": 1.2509, "step": 26640 }, { "epoch": 2.45, "learning_rate": 4.508157568145285e-06, "loss": 1.2018, "step": 26650 }, { "epoch": 2.45, "learning_rate": 4.493460470267119e-06, "loss": 1.2174, "step": 26660 }, { "epoch": 2.45, "learning_rate": 4.478785002638394e-06, "loss": 1.1808, "step": 26670 }, { "epoch": 2.46, "learning_rate": 4.464131180738768e-06, "loss": 1.2039, "step": 26680 }, { "epoch": 2.46, "learning_rate": 4.449499020025039e-06, "loss": 1.2171, "step": 26690 }, { "epoch": 2.46, "learning_rate": 4.434888535931192e-06, "loss": 1.2214, "step": 26700 }, { "epoch": 2.46, "learning_rate": 4.42029974386832e-06, "loss": 1.2732, "step": 26710 }, { "epoch": 2.46, "learning_rate": 4.405732659224648e-06, "loss": 1.2228, "step": 26720 }, { "epoch": 2.46, "learning_rate": 4.391187297365515e-06, "loss": 1.2624, "step": 26730 }, { "epoch": 2.46, "learning_rate": 4.376663673633316e-06, "loss": 1.1639, "step": 26740 }, { "epoch": 2.46, "learning_rate": 4.36216180334755e-06, "loss": 1.1382, "step": 26750 }, { "epoch": 2.46, "learning_rate": 4.347681701804745e-06, "loss": 1.2351, "step": 26760 }, { "epoch": 2.46, "learning_rate": 4.333223384278487e-06, "loss": 1.1664, "step": 26770 }, { "epoch": 2.47, "learning_rate": 4.318786866019356e-06, "loss": 1.1857, "step": 26780 }, { "epoch": 2.47, "learning_rate": 4.304372162254985e-06, "loss": 1.2027, "step": 26790 }, { "epoch": 2.47, "learning_rate": 4.2899792881899495e-06, "loss": 1.2222, "step": 26800 }, { "epoch": 2.47, "learning_rate": 4.275608259005834e-06, "loss": 1.1844, "step": 26810 }, { "epoch": 2.47, "learning_rate": 4.26125908986115e-06, "loss": 1.1695, "step": 26820 }, { "epoch": 2.47, "learning_rate": 4.246931795891379e-06, "loss": 1.1979, "step": 26830 }, { "epoch": 2.47, "learning_rate": 4.232626392208902e-06, "loss": 1.1793, "step": 26840 }, { "epoch": 2.47, "learning_rate": 4.218342893903049e-06, "loss": 1.1703, "step": 26850 }, { "epoch": 2.47, "learning_rate": 4.204081316040001e-06, "loss": 1.22, "step": 26860 }, { "epoch": 2.47, "learning_rate": 4.189841673662847e-06, "loss": 1.1726, "step": 26870 }, { "epoch": 2.47, "learning_rate": 4.175623981791518e-06, "loss": 1.2165, "step": 26880 }, { "epoch": 2.48, "learning_rate": 4.161428255422814e-06, "loss": 1.1631, "step": 26890 }, { "epoch": 2.48, "learning_rate": 4.14725450953034e-06, "loss": 1.1741, "step": 26900 }, { "epoch": 2.48, "learning_rate": 4.133102759064539e-06, "loss": 1.169, "step": 26910 }, { "epoch": 2.48, "learning_rate": 4.118973018952646e-06, "loss": 1.2116, "step": 26920 }, { "epoch": 2.48, "learning_rate": 4.104865304098668e-06, "loss": 1.1948, "step": 26930 }, { "epoch": 2.48, "learning_rate": 4.090779629383401e-06, "loss": 1.1897, "step": 26940 }, { "epoch": 2.48, "learning_rate": 4.076716009664366e-06, "loss": 1.1897, "step": 26950 }, { "epoch": 2.48, "learning_rate": 4.062674459775847e-06, "loss": 1.1672, "step": 26960 }, { "epoch": 2.48, "learning_rate": 4.048654994528836e-06, "loss": 1.2235, "step": 26970 }, { "epoch": 2.48, "learning_rate": 4.034657628711039e-06, "loss": 1.1545, "step": 26980 }, { "epoch": 2.48, "learning_rate": 4.020682377086829e-06, "loss": 1.2166, "step": 26990 }, { "epoch": 2.49, "learning_rate": 4.006729254397279e-06, "loss": 1.1383, "step": 27000 }, { "epoch": 2.49, "learning_rate": 3.992798275360105e-06, "loss": 1.2158, "step": 27010 }, { "epoch": 2.49, "learning_rate": 3.978889454669671e-06, "loss": 1.1325, "step": 27020 }, { "epoch": 2.49, "learning_rate": 3.965002806996976e-06, "loss": 1.204, "step": 27030 }, { "epoch": 2.49, "learning_rate": 3.951138346989611e-06, "loss": 1.1595, "step": 27040 }, { "epoch": 2.49, "learning_rate": 3.937296089271788e-06, "loss": 1.2002, "step": 27050 }, { "epoch": 2.49, "learning_rate": 3.923476048444275e-06, "loss": 1.2319, "step": 27060 }, { "epoch": 2.49, "learning_rate": 3.90967823908443e-06, "loss": 1.1922, "step": 27070 }, { "epoch": 2.49, "learning_rate": 3.895902675746133e-06, "loss": 1.185, "step": 27080 }, { "epoch": 2.49, "learning_rate": 3.8821493729598395e-06, "loss": 1.1847, "step": 27090 }, { "epoch": 2.49, "learning_rate": 3.868418345232483e-06, "loss": 1.2013, "step": 27100 }, { "epoch": 2.5, "learning_rate": 3.85470960704753e-06, "loss": 1.186, "step": 27110 }, { "epoch": 2.5, "learning_rate": 3.841023172864913e-06, "loss": 1.1713, "step": 27120 }, { "epoch": 2.5, "learning_rate": 3.8273590571210664e-06, "loss": 1.1691, "step": 27130 }, { "epoch": 2.5, "learning_rate": 3.8137172742288478e-06, "loss": 1.23, "step": 27140 }, { "epoch": 2.5, "learning_rate": 3.8000978385775903e-06, "loss": 1.1863, "step": 27150 }, { "epoch": 2.5, "learning_rate": 3.7865007645330437e-06, "loss": 1.1926, "step": 27160 }, { "epoch": 2.5, "learning_rate": 3.7729260664373627e-06, "loss": 1.1845, "step": 27170 }, { "epoch": 2.5, "learning_rate": 3.759373758609114e-06, "loss": 1.1876, "step": 27180 }, { "epoch": 2.5, "learning_rate": 3.7458438553432285e-06, "loss": 1.2309, "step": 27190 }, { "epoch": 2.5, "learning_rate": 3.7323363709110244e-06, "loss": 1.151, "step": 27200 }, { "epoch": 2.5, "learning_rate": 3.7188513195601655e-06, "loss": 1.1694, "step": 27210 }, { "epoch": 2.51, "learning_rate": 3.7053887155146542e-06, "loss": 1.1857, "step": 27220 }, { "epoch": 2.51, "learning_rate": 3.6919485729748017e-06, "loss": 1.2149, "step": 27230 }, { "epoch": 2.51, "learning_rate": 3.6785309061172528e-06, "loss": 1.1707, "step": 27240 }, { "epoch": 2.51, "learning_rate": 3.665135729094921e-06, "loss": 1.1843, "step": 27250 }, { "epoch": 2.51, "learning_rate": 3.651763056037008e-06, "loss": 1.2071, "step": 27260 }, { "epoch": 2.51, "learning_rate": 3.6384129010489846e-06, "loss": 1.1205, "step": 27270 }, { "epoch": 2.51, "learning_rate": 3.625085278212553e-06, "loss": 1.1514, "step": 27280 }, { "epoch": 2.51, "learning_rate": 3.611780201585671e-06, "loss": 1.1854, "step": 27290 }, { "epoch": 2.51, "learning_rate": 3.59849768520249e-06, "loss": 1.1839, "step": 27300 }, { "epoch": 2.51, "learning_rate": 3.585237743073386e-06, "loss": 1.2443, "step": 27310 }, { "epoch": 2.51, "learning_rate": 3.5720003891848957e-06, "loss": 1.2026, "step": 27320 }, { "epoch": 2.52, "learning_rate": 3.5587856374997787e-06, "loss": 1.201, "step": 27330 }, { "epoch": 2.52, "learning_rate": 3.5455935019569008e-06, "loss": 1.215, "step": 27340 }, { "epoch": 2.52, "learning_rate": 3.532423996471307e-06, "loss": 1.2232, "step": 27350 }, { "epoch": 2.52, "learning_rate": 3.5192771349341543e-06, "loss": 1.2152, "step": 27360 }, { "epoch": 2.52, "learning_rate": 3.506152931212725e-06, "loss": 1.1677, "step": 27370 }, { "epoch": 2.52, "learning_rate": 3.493051399150388e-06, "loss": 1.2094, "step": 27380 }, { "epoch": 2.52, "learning_rate": 3.4799725525666266e-06, "loss": 1.23, "step": 27390 }, { "epoch": 2.52, "learning_rate": 3.4669164052569617e-06, "loss": 1.2071, "step": 27400 }, { "epoch": 2.52, "learning_rate": 3.453882970993e-06, "loss": 1.2523, "step": 27410 }, { "epoch": 2.52, "learning_rate": 3.440872263522363e-06, "loss": 1.2297, "step": 27420 }, { "epoch": 2.52, "learning_rate": 3.427884296568726e-06, "loss": 1.2056, "step": 27430 }, { "epoch": 2.53, "learning_rate": 3.414919083831755e-06, "loss": 1.1987, "step": 27440 }, { "epoch": 2.53, "learning_rate": 3.401976638987134e-06, "loss": 1.2467, "step": 27450 }, { "epoch": 2.53, "learning_rate": 3.389056975686522e-06, "loss": 1.2355, "step": 27460 }, { "epoch": 2.53, "learning_rate": 3.3761601075575465e-06, "loss": 1.21, "step": 27470 }, { "epoch": 2.53, "learning_rate": 3.363286048203798e-06, "loss": 1.1901, "step": 27480 }, { "epoch": 2.53, "learning_rate": 3.3504348112047947e-06, "loss": 1.1724, "step": 27490 }, { "epoch": 2.53, "learning_rate": 3.337606410115998e-06, "loss": 1.1775, "step": 27500 }, { "epoch": 2.53, "learning_rate": 3.3248008584687767e-06, "loss": 1.1554, "step": 27510 }, { "epoch": 2.53, "learning_rate": 3.3120181697703976e-06, "loss": 1.1952, "step": 27520 }, { "epoch": 2.53, "learning_rate": 3.299258357504001e-06, "loss": 1.2248, "step": 27530 }, { "epoch": 2.54, "learning_rate": 3.286521435128617e-06, "loss": 1.1875, "step": 27540 }, { "epoch": 2.54, "learning_rate": 3.273807416079114e-06, "loss": 1.2243, "step": 27550 }, { "epoch": 2.54, "learning_rate": 3.2611163137662105e-06, "loss": 1.2165, "step": 27560 }, { "epoch": 2.54, "learning_rate": 3.24844814157646e-06, "loss": 1.243, "step": 27570 }, { "epoch": 2.54, "learning_rate": 3.235802912872207e-06, "loss": 1.2465, "step": 27580 }, { "epoch": 2.54, "learning_rate": 3.2231806409916215e-06, "loss": 1.2078, "step": 27590 }, { "epoch": 2.54, "learning_rate": 3.2105813392486334e-06, "loss": 1.1769, "step": 27600 }, { "epoch": 2.54, "learning_rate": 3.1980050209329675e-06, "loss": 1.1853, "step": 27610 }, { "epoch": 2.54, "learning_rate": 3.1854516993100798e-06, "loss": 1.1801, "step": 27620 }, { "epoch": 2.54, "learning_rate": 3.1729213876212084e-06, "loss": 1.2374, "step": 27630 }, { "epoch": 2.54, "learning_rate": 3.1604140990832725e-06, "loss": 1.1938, "step": 27640 }, { "epoch": 2.55, "learning_rate": 3.14792984688895e-06, "loss": 1.2643, "step": 27650 }, { "epoch": 2.55, "learning_rate": 3.135468644206582e-06, "loss": 1.1813, "step": 27660 }, { "epoch": 2.55, "learning_rate": 3.1230305041802326e-06, "loss": 1.2001, "step": 27670 }, { "epoch": 2.55, "learning_rate": 3.1106154399296095e-06, "loss": 1.2164, "step": 27680 }, { "epoch": 2.55, "learning_rate": 3.0982234645500986e-06, "loss": 1.1972, "step": 27690 }, { "epoch": 2.55, "learning_rate": 3.08585459111273e-06, "loss": 1.1964, "step": 27700 }, { "epoch": 2.55, "learning_rate": 3.073508832664154e-06, "loss": 1.2009, "step": 27710 }, { "epoch": 2.55, "learning_rate": 3.061186202226654e-06, "loss": 1.2299, "step": 27720 }, { "epoch": 2.55, "learning_rate": 3.0488867127981073e-06, "loss": 1.2007, "step": 27730 }, { "epoch": 2.55, "learning_rate": 3.0366103773519868e-06, "loss": 1.1948, "step": 27740 }, { "epoch": 2.55, "learning_rate": 3.0243572088373446e-06, "loss": 1.1785, "step": 27750 }, { "epoch": 2.56, "learning_rate": 3.0121272201787955e-06, "loss": 1.2156, "step": 27760 }, { "epoch": 2.56, "learning_rate": 2.9999204242764976e-06, "loss": 1.1987, "step": 27770 }, { "epoch": 2.56, "learning_rate": 2.987736834006155e-06, "loss": 1.1796, "step": 27780 }, { "epoch": 2.56, "learning_rate": 2.9755764622189846e-06, "loss": 1.1707, "step": 27790 }, { "epoch": 2.56, "learning_rate": 2.9634393217417206e-06, "loss": 1.142, "step": 27800 }, { "epoch": 2.56, "learning_rate": 2.9513254253765945e-06, "loss": 1.1923, "step": 27810 }, { "epoch": 2.56, "learning_rate": 2.939234785901307e-06, "loss": 1.2249, "step": 27820 }, { "epoch": 2.56, "learning_rate": 2.927167416069043e-06, "loss": 1.1328, "step": 27830 }, { "epoch": 2.56, "learning_rate": 2.9151233286084324e-06, "loss": 1.1791, "step": 27840 }, { "epoch": 2.56, "learning_rate": 2.9031025362235537e-06, "loss": 1.1947, "step": 27850 }, { "epoch": 2.56, "learning_rate": 2.8911050515938954e-06, "loss": 1.2373, "step": 27860 }, { "epoch": 2.57, "learning_rate": 2.879130887374401e-06, "loss": 1.1818, "step": 27870 }, { "epoch": 2.57, "learning_rate": 2.8671800561953706e-06, "loss": 1.1515, "step": 27880 }, { "epoch": 2.57, "learning_rate": 2.8552525706625256e-06, "loss": 1.2142, "step": 27890 }, { "epoch": 2.57, "learning_rate": 2.8433484433569384e-06, "loss": 1.2131, "step": 27900 }, { "epoch": 2.57, "learning_rate": 2.8314676868350654e-06, "loss": 1.2032, "step": 27910 }, { "epoch": 2.57, "learning_rate": 2.8196103136286845e-06, "loss": 1.1593, "step": 27920 }, { "epoch": 2.57, "learning_rate": 2.8077763362449464e-06, "loss": 1.1954, "step": 27930 }, { "epoch": 2.57, "learning_rate": 2.7959657671662836e-06, "loss": 1.2057, "step": 27940 }, { "epoch": 2.57, "learning_rate": 2.784178618850472e-06, "loss": 1.1612, "step": 27950 }, { "epoch": 2.57, "learning_rate": 2.7724149037305527e-06, "loss": 1.1211, "step": 27960 }, { "epoch": 2.57, "learning_rate": 2.760674634214877e-06, "loss": 1.1194, "step": 27970 }, { "epoch": 2.58, "learning_rate": 2.7489578226870453e-06, "loss": 1.2213, "step": 27980 }, { "epoch": 2.58, "learning_rate": 2.7372644815059246e-06, "loss": 1.1751, "step": 27990 }, { "epoch": 2.58, "learning_rate": 2.7255946230056284e-06, "loss": 1.2255, "step": 28000 }, { "epoch": 2.58, "learning_rate": 2.7139482594954887e-06, "loss": 1.1974, "step": 28010 }, { "epoch": 2.58, "learning_rate": 2.70232540326007e-06, "loss": 1.1569, "step": 28020 }, { "epoch": 2.58, "learning_rate": 2.6907260665591237e-06, "loss": 1.1929, "step": 28030 }, { "epoch": 2.58, "learning_rate": 2.6791502616276082e-06, "loss": 1.2049, "step": 28040 }, { "epoch": 2.58, "learning_rate": 2.6675980006756558e-06, "loss": 1.2045, "step": 28050 }, { "epoch": 2.58, "learning_rate": 2.6560692958885693e-06, "loss": 1.1523, "step": 28060 }, { "epoch": 2.58, "learning_rate": 2.644564159426788e-06, "loss": 1.1713, "step": 28070 }, { "epoch": 2.58, "learning_rate": 2.6330826034259127e-06, "loss": 1.1345, "step": 28080 }, { "epoch": 2.59, "learning_rate": 2.6216246399966505e-06, "loss": 1.1627, "step": 28090 }, { "epoch": 2.59, "learning_rate": 2.6101902812248417e-06, "loss": 1.202, "step": 28100 }, { "epoch": 2.59, "learning_rate": 2.5987795391714255e-06, "loss": 1.1545, "step": 28110 }, { "epoch": 2.59, "learning_rate": 2.5873924258724147e-06, "loss": 1.2147, "step": 28120 }, { "epoch": 2.59, "learning_rate": 2.5760289533389215e-06, "loss": 1.1749, "step": 28130 }, { "epoch": 2.59, "learning_rate": 2.5646891335570966e-06, "loss": 1.2141, "step": 28140 }, { "epoch": 2.59, "learning_rate": 2.5533729784881667e-06, "loss": 1.19, "step": 28150 }, { "epoch": 2.59, "learning_rate": 2.5420805000683697e-06, "loss": 1.173, "step": 28160 }, { "epoch": 2.59, "learning_rate": 2.530811710209008e-06, "loss": 1.1873, "step": 28170 }, { "epoch": 2.59, "learning_rate": 2.519566620796354e-06, "loss": 1.1993, "step": 28180 }, { "epoch": 2.59, "learning_rate": 2.5083452436917177e-06, "loss": 1.191, "step": 28190 }, { "epoch": 2.6, "learning_rate": 2.497147590731369e-06, "loss": 1.1884, "step": 28200 }, { "epoch": 2.6, "learning_rate": 2.4859736737265714e-06, "loss": 1.161, "step": 28210 }, { "epoch": 2.6, "learning_rate": 2.4748235044635415e-06, "loss": 1.201, "step": 28220 }, { "epoch": 2.6, "learning_rate": 2.463697094703457e-06, "loss": 1.1892, "step": 28230 }, { "epoch": 2.6, "learning_rate": 2.452594456182428e-06, "loss": 1.1956, "step": 28240 }, { "epoch": 2.6, "learning_rate": 2.441515600611488e-06, "loss": 1.1413, "step": 28250 }, { "epoch": 2.6, "learning_rate": 2.4304605396765934e-06, "loss": 1.1542, "step": 28260 }, { "epoch": 2.6, "learning_rate": 2.4194292850385852e-06, "loss": 1.1977, "step": 28270 }, { "epoch": 2.6, "learning_rate": 2.4084218483332144e-06, "loss": 1.1746, "step": 28280 }, { "epoch": 2.6, "learning_rate": 2.3974382411710962e-06, "loss": 1.163, "step": 28290 }, { "epoch": 2.6, "learning_rate": 2.3864784751377186e-06, "loss": 1.2037, "step": 28300 }, { "epoch": 2.61, "learning_rate": 2.375542561793409e-06, "loss": 1.1697, "step": 28310 }, { "epoch": 2.61, "learning_rate": 2.364630512673349e-06, "loss": 1.2154, "step": 28320 }, { "epoch": 2.61, "learning_rate": 2.3537423392875376e-06, "loss": 1.2078, "step": 28330 }, { "epoch": 2.61, "learning_rate": 2.3428780531207924e-06, "loss": 1.1488, "step": 28340 }, { "epoch": 2.61, "learning_rate": 2.3320376656327497e-06, "loss": 1.1855, "step": 28350 }, { "epoch": 2.61, "learning_rate": 2.3212211882578077e-06, "loss": 1.163, "step": 28360 }, { "epoch": 2.61, "learning_rate": 2.310428632405176e-06, "loss": 1.2428, "step": 28370 }, { "epoch": 2.61, "learning_rate": 2.2996600094588037e-06, "loss": 1.2595, "step": 28380 }, { "epoch": 2.61, "learning_rate": 2.2889153307774193e-06, "loss": 1.2455, "step": 28390 }, { "epoch": 2.61, "learning_rate": 2.2781946076944727e-06, "loss": 1.2011, "step": 28400 }, { "epoch": 2.62, "learning_rate": 2.2674978515181743e-06, "loss": 1.1563, "step": 28410 }, { "epoch": 2.62, "learning_rate": 2.2568250735314237e-06, "loss": 1.1688, "step": 28420 }, { "epoch": 2.62, "learning_rate": 2.2461762849918495e-06, "loss": 1.2487, "step": 28430 }, { "epoch": 2.62, "learning_rate": 2.235551497131763e-06, "loss": 1.1671, "step": 28440 }, { "epoch": 2.62, "learning_rate": 2.22495072115817e-06, "loss": 1.1833, "step": 28450 }, { "epoch": 2.62, "learning_rate": 2.214373968252734e-06, "loss": 1.1801, "step": 28460 }, { "epoch": 2.62, "learning_rate": 2.2038212495718026e-06, "loss": 1.1967, "step": 28470 }, { "epoch": 2.62, "learning_rate": 2.19329257624635e-06, "loss": 1.2236, "step": 28480 }, { "epoch": 2.62, "learning_rate": 2.1827879593820044e-06, "loss": 1.2091, "step": 28490 }, { "epoch": 2.62, "learning_rate": 2.172307410058999e-06, "loss": 1.1736, "step": 28500 }, { "epoch": 2.62, "learning_rate": 2.161850939332202e-06, "loss": 1.1917, "step": 28510 }, { "epoch": 2.63, "learning_rate": 2.1514185582310694e-06, "loss": 1.2084, "step": 28520 }, { "epoch": 2.63, "learning_rate": 2.1410102777596503e-06, "loss": 1.2356, "step": 28530 }, { "epoch": 2.63, "learning_rate": 2.130626108896583e-06, "loss": 1.144, "step": 28540 }, { "epoch": 2.63, "learning_rate": 2.1202660625950527e-06, "loss": 1.2438, "step": 28550 }, { "epoch": 2.63, "learning_rate": 2.109930149782821e-06, "loss": 1.1833, "step": 28560 }, { "epoch": 2.63, "learning_rate": 2.0996183813621784e-06, "loss": 1.1737, "step": 28570 }, { "epoch": 2.63, "learning_rate": 2.0893307682099543e-06, "loss": 1.1638, "step": 28580 }, { "epoch": 2.63, "learning_rate": 2.0790673211775026e-06, "loss": 1.1755, "step": 28590 }, { "epoch": 2.63, "learning_rate": 2.068828051090685e-06, "loss": 1.2055, "step": 28600 }, { "epoch": 2.63, "learning_rate": 2.0586129687498513e-06, "loss": 1.2122, "step": 28610 }, { "epoch": 2.63, "learning_rate": 2.0484220849298548e-06, "loss": 1.2074, "step": 28620 }, { "epoch": 2.64, "learning_rate": 2.0382554103800076e-06, "loss": 1.2601, "step": 28630 }, { "epoch": 2.64, "learning_rate": 2.0281129558240963e-06, "loss": 1.169, "step": 28640 }, { "epoch": 2.64, "learning_rate": 2.0179947319603656e-06, "loss": 1.1724, "step": 28650 }, { "epoch": 2.64, "learning_rate": 2.0079007494614804e-06, "loss": 1.1817, "step": 28660 }, { "epoch": 2.64, "learning_rate": 1.9978310189745637e-06, "loss": 1.1449, "step": 28670 }, { "epoch": 2.64, "learning_rate": 1.9877855511211325e-06, "loss": 1.2412, "step": 28680 }, { "epoch": 2.64, "learning_rate": 1.9777643564971302e-06, "loss": 1.1597, "step": 28690 }, { "epoch": 2.64, "learning_rate": 1.96776744567288e-06, "loss": 1.1549, "step": 28700 }, { "epoch": 2.64, "learning_rate": 1.9577948291931103e-06, "loss": 1.223, "step": 28710 }, { "epoch": 2.64, "learning_rate": 1.947846517576907e-06, "loss": 1.1874, "step": 28720 }, { "epoch": 2.64, "learning_rate": 1.937922521317731e-06, "loss": 1.2007, "step": 28730 }, { "epoch": 2.65, "learning_rate": 1.9280228508833818e-06, "loss": 1.1798, "step": 28740 }, { "epoch": 2.65, "learning_rate": 1.9181475167160206e-06, "loss": 1.2094, "step": 28750 }, { "epoch": 2.65, "learning_rate": 1.908296529232112e-06, "loss": 1.1684, "step": 28760 }, { "epoch": 2.65, "learning_rate": 1.898469898822461e-06, "loss": 1.2038, "step": 28770 }, { "epoch": 2.65, "learning_rate": 1.8886676358521787e-06, "loss": 1.2081, "step": 28780 }, { "epoch": 2.65, "learning_rate": 1.8788897506606618e-06, "loss": 1.1801, "step": 28790 }, { "epoch": 2.65, "learning_rate": 1.8691362535616097e-06, "loss": 1.2101, "step": 28800 }, { "epoch": 2.65, "learning_rate": 1.8594071548429736e-06, "loss": 1.1516, "step": 28810 }, { "epoch": 2.65, "learning_rate": 1.8497024647669948e-06, "loss": 1.2125, "step": 28820 }, { "epoch": 2.65, "learning_rate": 1.8400221935701539e-06, "loss": 1.1581, "step": 28830 }, { "epoch": 2.65, "learning_rate": 1.8303663514631836e-06, "loss": 1.2095, "step": 28840 }, { "epoch": 2.66, "learning_rate": 1.8207349486310338e-06, "loss": 1.1507, "step": 28850 }, { "epoch": 2.66, "learning_rate": 1.8111279952328946e-06, "loss": 1.1364, "step": 28860 }, { "epoch": 2.66, "learning_rate": 1.801545501402152e-06, "loss": 1.245, "step": 28870 }, { "epoch": 2.66, "learning_rate": 1.7919874772463957e-06, "loss": 1.2474, "step": 28880 }, { "epoch": 2.66, "learning_rate": 1.7824539328474165e-06, "loss": 1.1656, "step": 28890 }, { "epoch": 2.66, "learning_rate": 1.7729448782611652e-06, "loss": 1.2162, "step": 28900 }, { "epoch": 2.66, "learning_rate": 1.7634603235177765e-06, "loss": 1.2703, "step": 28910 }, { "epoch": 2.66, "learning_rate": 1.7540002786215286e-06, "loss": 1.1471, "step": 28920 }, { "epoch": 2.66, "learning_rate": 1.7445647535508646e-06, "loss": 1.1894, "step": 28930 }, { "epoch": 2.66, "learning_rate": 1.7351537582583399e-06, "loss": 1.2465, "step": 28940 }, { "epoch": 2.66, "learning_rate": 1.7257673026706695e-06, "loss": 1.1865, "step": 28950 }, { "epoch": 2.67, "learning_rate": 1.7164053966886474e-06, "loss": 1.2328, "step": 28960 }, { "epoch": 2.67, "learning_rate": 1.7070680501872022e-06, "loss": 1.2049, "step": 28970 }, { "epoch": 2.67, "learning_rate": 1.6977552730153333e-06, "loss": 1.2396, "step": 28980 }, { "epoch": 2.67, "learning_rate": 1.6884670749961474e-06, "loss": 1.2064, "step": 28990 }, { "epoch": 2.67, "learning_rate": 1.6792034659268013e-06, "loss": 1.1882, "step": 29000 }, { "epoch": 2.67, "learning_rate": 1.6699644555785405e-06, "loss": 1.2412, "step": 29010 }, { "epoch": 2.67, "learning_rate": 1.6607500536966442e-06, "loss": 1.189, "step": 29020 }, { "epoch": 2.67, "learning_rate": 1.6515602700004463e-06, "loss": 1.1894, "step": 29030 }, { "epoch": 2.67, "learning_rate": 1.6423951141833011e-06, "loss": 1.2445, "step": 29040 }, { "epoch": 2.67, "learning_rate": 1.6332545959126033e-06, "loss": 1.1802, "step": 29050 }, { "epoch": 2.67, "learning_rate": 1.6241387248297407e-06, "loss": 1.2062, "step": 29060 }, { "epoch": 2.68, "learning_rate": 1.6150475105501189e-06, "loss": 1.1678, "step": 29070 }, { "epoch": 2.68, "learning_rate": 1.6059809626631283e-06, "loss": 1.2091, "step": 29080 }, { "epoch": 2.68, "learning_rate": 1.5969390907321385e-06, "loss": 1.1789, "step": 29090 }, { "epoch": 2.68, "learning_rate": 1.587921904294501e-06, "loss": 1.1884, "step": 29100 }, { "epoch": 2.68, "learning_rate": 1.57892941286151e-06, "loss": 1.1645, "step": 29110 }, { "epoch": 2.68, "learning_rate": 1.5699616259184425e-06, "loss": 1.1931, "step": 29120 }, { "epoch": 2.68, "learning_rate": 1.561018552924484e-06, "loss": 1.1644, "step": 29130 }, { "epoch": 2.68, "learning_rate": 1.5521002033127785e-06, "loss": 1.1928, "step": 29140 }, { "epoch": 2.68, "learning_rate": 1.5432065864903699e-06, "loss": 1.2172, "step": 29150 }, { "epoch": 2.68, "learning_rate": 1.5343377118382297e-06, "loss": 1.2062, "step": 29160 }, { "epoch": 2.69, "learning_rate": 1.5254935887112215e-06, "loss": 1.2131, "step": 29170 }, { "epoch": 2.69, "learning_rate": 1.5166742264381112e-06, "loss": 1.1929, "step": 29180 }, { "epoch": 2.69, "learning_rate": 1.5078796343215407e-06, "loss": 1.1246, "step": 29190 }, { "epoch": 2.69, "learning_rate": 1.4991098216380173e-06, "loss": 1.173, "step": 29200 }, { "epoch": 2.69, "learning_rate": 1.4903647976379304e-06, "loss": 1.153, "step": 29210 }, { "epoch": 2.69, "learning_rate": 1.4816445715454985e-06, "loss": 1.1432, "step": 29220 }, { "epoch": 2.69, "learning_rate": 1.472949152558803e-06, "loss": 1.1573, "step": 29230 }, { "epoch": 2.69, "learning_rate": 1.4642785498497514e-06, "loss": 1.1876, "step": 29240 }, { "epoch": 2.69, "learning_rate": 1.455632772564075e-06, "loss": 1.1671, "step": 29250 }, { "epoch": 2.69, "learning_rate": 1.4470118298213125e-06, "loss": 1.2323, "step": 29260 }, { "epoch": 2.69, "learning_rate": 1.438415730714826e-06, "loss": 1.1345, "step": 29270 }, { "epoch": 2.7, "learning_rate": 1.4298444843117486e-06, "loss": 1.1706, "step": 29280 }, { "epoch": 2.7, "learning_rate": 1.421298099653015e-06, "loss": 1.1947, "step": 29290 }, { "epoch": 2.7, "learning_rate": 1.4127765857533364e-06, "loss": 1.1415, "step": 29300 }, { "epoch": 2.7, "learning_rate": 1.4042799516011756e-06, "loss": 1.1626, "step": 29310 }, { "epoch": 2.7, "learning_rate": 1.3958082061587746e-06, "loss": 1.1762, "step": 29320 }, { "epoch": 2.7, "learning_rate": 1.3873613583620992e-06, "loss": 1.1987, "step": 29330 }, { "epoch": 2.7, "learning_rate": 1.3789394171208724e-06, "loss": 1.1447, "step": 29340 }, { "epoch": 2.7, "learning_rate": 1.3705423913185301e-06, "loss": 1.1702, "step": 29350 }, { "epoch": 2.7, "learning_rate": 1.3621702898122423e-06, "loss": 1.1423, "step": 29360 }, { "epoch": 2.7, "learning_rate": 1.353823121432879e-06, "loss": 1.1842, "step": 29370 }, { "epoch": 2.7, "learning_rate": 1.345500894985019e-06, "loss": 1.1869, "step": 29380 }, { "epoch": 2.71, "learning_rate": 1.3372036192469156e-06, "loss": 1.1974, "step": 29390 }, { "epoch": 2.71, "learning_rate": 1.3289313029705292e-06, "loss": 1.1816, "step": 29400 }, { "epoch": 2.71, "learning_rate": 1.3206839548814658e-06, "loss": 1.172, "step": 29410 }, { "epoch": 2.71, "learning_rate": 1.3124615836790172e-06, "loss": 1.2005, "step": 29420 }, { "epoch": 2.71, "learning_rate": 1.3042641980361203e-06, "loss": 1.1541, "step": 29430 }, { "epoch": 2.71, "learning_rate": 1.296091806599356e-06, "loss": 1.2044, "step": 29440 }, { "epoch": 2.71, "learning_rate": 1.2879444179889483e-06, "loss": 1.2278, "step": 29450 }, { "epoch": 2.71, "learning_rate": 1.2798220407987365e-06, "loss": 1.1717, "step": 29460 }, { "epoch": 2.71, "learning_rate": 1.2717246835961872e-06, "loss": 1.1609, "step": 29470 }, { "epoch": 2.71, "learning_rate": 1.2636523549223734e-06, "loss": 1.1758, "step": 29480 }, { "epoch": 2.71, "learning_rate": 1.2556050632919758e-06, "loss": 1.2386, "step": 29490 }, { "epoch": 2.72, "learning_rate": 1.2475828171932463e-06, "loss": 1.195, "step": 29500 }, { "epoch": 2.72, "learning_rate": 1.239585625088041e-06, "loss": 1.2101, "step": 29510 }, { "epoch": 2.72, "learning_rate": 1.2316134954117736e-06, "loss": 1.1982, "step": 29520 }, { "epoch": 2.72, "learning_rate": 1.2236664365734284e-06, "loss": 1.1614, "step": 29530 }, { "epoch": 2.72, "learning_rate": 1.2157444569555448e-06, "loss": 1.2051, "step": 29540 }, { "epoch": 2.72, "learning_rate": 1.2078475649142135e-06, "loss": 1.2084, "step": 29550 }, { "epoch": 2.72, "learning_rate": 1.1999757687790464e-06, "loss": 1.2515, "step": 29560 }, { "epoch": 2.72, "learning_rate": 1.1921290768532074e-06, "loss": 1.1465, "step": 29570 }, { "epoch": 2.72, "learning_rate": 1.1843074974133589e-06, "loss": 1.1425, "step": 29580 }, { "epoch": 2.72, "learning_rate": 1.1765110387096907e-06, "loss": 1.1452, "step": 29590 }, { "epoch": 2.72, "learning_rate": 1.168739708965888e-06, "loss": 1.2001, "step": 29600 }, { "epoch": 2.73, "learning_rate": 1.1609935163791275e-06, "loss": 1.2247, "step": 29610 }, { "epoch": 2.73, "learning_rate": 1.1532724691200786e-06, "loss": 1.2385, "step": 29620 }, { "epoch": 2.73, "learning_rate": 1.1455765753328824e-06, "loss": 1.1708, "step": 29630 }, { "epoch": 2.73, "learning_rate": 1.137905843135148e-06, "loss": 1.2111, "step": 29640 }, { "epoch": 2.73, "learning_rate": 1.1302602806179452e-06, "loss": 1.2377, "step": 29650 }, { "epoch": 2.73, "learning_rate": 1.1226398958458006e-06, "loss": 1.1754, "step": 29660 }, { "epoch": 2.73, "learning_rate": 1.1150446968566735e-06, "loss": 1.1708, "step": 29670 }, { "epoch": 2.73, "learning_rate": 1.1074746916619638e-06, "loss": 1.1955, "step": 29680 }, { "epoch": 2.73, "learning_rate": 1.0999298882464926e-06, "loss": 1.2318, "step": 29690 }, { "epoch": 2.73, "learning_rate": 1.092410294568505e-06, "loss": 1.1745, "step": 29700 }, { "epoch": 2.73, "learning_rate": 1.0849159185596452e-06, "loss": 1.2007, "step": 29710 }, { "epoch": 2.74, "learning_rate": 1.077446768124965e-06, "loss": 1.1702, "step": 29720 }, { "epoch": 2.74, "learning_rate": 1.0700028511429122e-06, "loss": 1.197, "step": 29730 }, { "epoch": 2.74, "learning_rate": 1.0625841754653032e-06, "loss": 1.2003, "step": 29740 }, { "epoch": 2.74, "learning_rate": 1.0551907489173479e-06, "loss": 1.2285, "step": 29750 }, { "epoch": 2.74, "learning_rate": 1.047822579297611e-06, "loss": 1.1665, "step": 29760 }, { "epoch": 2.74, "learning_rate": 1.0404796743780198e-06, "loss": 1.15, "step": 29770 }, { "epoch": 2.74, "learning_rate": 1.033162041903854e-06, "loss": 1.1507, "step": 29780 }, { "epoch": 2.74, "learning_rate": 1.025869689593742e-06, "loss": 1.1699, "step": 29790 }, { "epoch": 2.74, "learning_rate": 1.018602625139628e-06, "loss": 1.1893, "step": 29800 }, { "epoch": 2.74, "learning_rate": 1.0113608562068027e-06, "loss": 1.1751, "step": 29810 }, { "epoch": 2.74, "learning_rate": 1.0041443904338616e-06, "loss": 1.1923, "step": 29820 }, { "epoch": 2.75, "learning_rate": 9.969532354327182e-07, "loss": 1.1734, "step": 29830 }, { "epoch": 2.75, "learning_rate": 9.897873987885886e-07, "loss": 1.1529, "step": 29840 }, { "epoch": 2.75, "learning_rate": 9.826468880599737e-07, "loss": 1.1846, "step": 29850 }, { "epoch": 2.75, "learning_rate": 9.755317107786739e-07, "loss": 1.1704, "step": 29860 }, { "epoch": 2.75, "learning_rate": 9.684418744497575e-07, "loss": 1.1699, "step": 29870 }, { "epoch": 2.75, "learning_rate": 9.613773865515706e-07, "loss": 1.1859, "step": 29880 }, { "epoch": 2.75, "learning_rate": 9.543382545357083e-07, "loss": 1.1523, "step": 29890 }, { "epoch": 2.75, "learning_rate": 9.473244858270474e-07, "loss": 1.1704, "step": 29900 }, { "epoch": 2.75, "learning_rate": 9.403360878236788e-07, "loss": 1.1564, "step": 29910 }, { "epoch": 2.75, "learning_rate": 9.333730678969588e-07, "loss": 1.1316, "step": 29920 }, { "epoch": 2.76, "learning_rate": 9.264354333914594e-07, "loss": 1.1753, "step": 29930 }, { "epoch": 2.76, "learning_rate": 9.195231916249824e-07, "loss": 1.1973, "step": 29940 }, { "epoch": 2.76, "learning_rate": 9.126363498885404e-07, "loss": 1.1977, "step": 29950 }, { "epoch": 2.76, "learning_rate": 9.057749154463641e-07, "loss": 1.1886, "step": 29960 }, { "epoch": 2.76, "learning_rate": 8.989388955358752e-07, "loss": 1.2136, "step": 29970 }, { "epoch": 2.76, "learning_rate": 8.921282973676892e-07, "loss": 1.1874, "step": 29980 }, { "epoch": 2.76, "learning_rate": 8.853431281256153e-07, "loss": 1.191, "step": 29990 }, { "epoch": 2.76, "learning_rate": 8.785833949666256e-07, "loss": 1.1935, "step": 30000 }, { "epoch": 2.76, "learning_rate": 8.718491050208749e-07, "loss": 1.1942, "step": 30010 }, { "epoch": 2.76, "learning_rate": 8.651402653916757e-07, "loss": 1.1558, "step": 30020 }, { "epoch": 2.76, "learning_rate": 8.584568831555006e-07, "loss": 1.1452, "step": 30030 }, { "epoch": 2.77, "learning_rate": 8.517989653619579e-07, "loss": 1.2503, "step": 30040 }, { "epoch": 2.77, "learning_rate": 8.451665190338132e-07, "loss": 1.188, "step": 30050 }, { "epoch": 2.77, "learning_rate": 8.385595511669425e-07, "loss": 1.1953, "step": 30060 }, { "epoch": 2.77, "learning_rate": 8.319780687303686e-07, "loss": 1.1444, "step": 30070 }, { "epoch": 2.77, "learning_rate": 8.254220786662215e-07, "loss": 1.2139, "step": 30080 }, { "epoch": 2.77, "learning_rate": 8.188915878897474e-07, "loss": 1.1827, "step": 30090 }, { "epoch": 2.77, "learning_rate": 8.123866032892863e-07, "loss": 1.1878, "step": 30100 }, { "epoch": 2.77, "learning_rate": 8.059071317262857e-07, "loss": 1.1536, "step": 30110 }, { "epoch": 2.77, "learning_rate": 7.994531800352728e-07, "loss": 1.1642, "step": 30120 }, { "epoch": 2.77, "learning_rate": 7.930247550238607e-07, "loss": 1.1925, "step": 30130 }, { "epoch": 2.77, "learning_rate": 7.866218634727446e-07, "loss": 1.1918, "step": 30140 }, { "epoch": 2.78, "learning_rate": 7.802445121356694e-07, "loss": 1.2419, "step": 30150 }, { "epoch": 2.78, "learning_rate": 7.73892707739457e-07, "loss": 1.203, "step": 30160 }, { "epoch": 2.78, "learning_rate": 7.675664569839703e-07, "loss": 1.2009, "step": 30170 }, { "epoch": 2.78, "learning_rate": 7.612657665421297e-07, "loss": 1.1767, "step": 30180 }, { "epoch": 2.78, "learning_rate": 7.549906430598774e-07, "loss": 1.2129, "step": 30190 }, { "epoch": 2.78, "learning_rate": 7.487410931562161e-07, "loss": 1.178, "step": 30200 }, { "epoch": 2.78, "learning_rate": 7.42517123423142e-07, "loss": 1.1378, "step": 30210 }, { "epoch": 2.78, "learning_rate": 7.363187404256927e-07, "loss": 1.1679, "step": 30220 }, { "epoch": 2.78, "learning_rate": 7.301459507018993e-07, "loss": 1.2023, "step": 30230 }, { "epoch": 2.78, "learning_rate": 7.239987607628146e-07, "loss": 1.2225, "step": 30240 }, { "epoch": 2.78, "learning_rate": 7.178771770924714e-07, "loss": 1.1841, "step": 30250 }, { "epoch": 2.79, "learning_rate": 7.117812061479073e-07, "loss": 1.1829, "step": 30260 }, { "epoch": 2.79, "learning_rate": 7.057108543591401e-07, "loss": 1.121, "step": 30270 }, { "epoch": 2.79, "learning_rate": 6.996661281291561e-07, "loss": 1.181, "step": 30280 }, { "epoch": 2.79, "learning_rate": 6.936470338339246e-07, "loss": 1.1791, "step": 30290 }, { "epoch": 2.79, "learning_rate": 6.876535778223641e-07, "loss": 1.1574, "step": 30300 }, { "epoch": 2.79, "learning_rate": 6.816857664163651e-07, "loss": 1.1593, "step": 30310 }, { "epoch": 2.79, "learning_rate": 6.757436059107558e-07, "loss": 1.1523, "step": 30320 }, { "epoch": 2.79, "learning_rate": 6.6982710257332e-07, "loss": 1.1528, "step": 30330 }, { "epoch": 2.79, "learning_rate": 6.639362626447653e-07, "loss": 1.1742, "step": 30340 }, { "epoch": 2.79, "learning_rate": 6.580710923387384e-07, "loss": 1.1925, "step": 30350 }, { "epoch": 2.79, "learning_rate": 6.522315978418042e-07, "loss": 1.1597, "step": 30360 }, { "epoch": 2.8, "learning_rate": 6.464177853134495e-07, "loss": 1.1804, "step": 30370 }, { "epoch": 2.8, "learning_rate": 6.406296608860746e-07, "loss": 1.1657, "step": 30380 }, { "epoch": 2.8, "learning_rate": 6.348672306649706e-07, "loss": 1.2313, "step": 30390 }, { "epoch": 2.8, "learning_rate": 6.291305007283421e-07, "loss": 1.1491, "step": 30400 }, { "epoch": 2.8, "learning_rate": 6.234194771272766e-07, "loss": 1.2229, "step": 30410 }, { "epoch": 2.8, "learning_rate": 6.177341658857472e-07, "loss": 1.1793, "step": 30420 }, { "epoch": 2.8, "learning_rate": 6.120745730006038e-07, "loss": 1.1703, "step": 30430 }, { "epoch": 2.8, "learning_rate": 6.064407044415799e-07, "loss": 1.1931, "step": 30440 }, { "epoch": 2.8, "learning_rate": 6.008325661512576e-07, "loss": 1.1383, "step": 30450 }, { "epoch": 2.8, "learning_rate": 5.952501640450969e-07, "loss": 1.1678, "step": 30460 }, { "epoch": 2.8, "learning_rate": 5.896935040113932e-07, "loss": 1.1798, "step": 30470 }, { "epoch": 2.81, "learning_rate": 5.841625919113053e-07, "loss": 1.1756, "step": 30480 }, { "epoch": 2.81, "learning_rate": 5.786574335788248e-07, "loss": 1.2003, "step": 30490 }, { "epoch": 2.81, "learning_rate": 5.73178034820776e-07, "loss": 1.2125, "step": 30500 }, { "epoch": 2.81, "learning_rate": 5.677244014168193e-07, "loss": 1.1794, "step": 30510 }, { "epoch": 2.81, "learning_rate": 5.622965391194335e-07, "loss": 1.1925, "step": 30520 }, { "epoch": 2.81, "learning_rate": 5.568944536539166e-07, "loss": 1.2096, "step": 30530 }, { "epoch": 2.81, "learning_rate": 5.515181507183719e-07, "loss": 1.1587, "step": 30540 }, { "epoch": 2.81, "learning_rate": 5.461676359837131e-07, "loss": 1.2294, "step": 30550 }, { "epoch": 2.81, "learning_rate": 5.408429150936506e-07, "loss": 1.2235, "step": 30560 }, { "epoch": 2.81, "learning_rate": 5.355439936646889e-07, "loss": 1.1715, "step": 30570 }, { "epoch": 2.81, "learning_rate": 5.302708772861125e-07, "loss": 1.1636, "step": 30580 }, { "epoch": 2.82, "learning_rate": 5.250235715200002e-07, "loss": 1.1825, "step": 30590 }, { "epoch": 2.82, "learning_rate": 5.198020819011884e-07, "loss": 1.1696, "step": 30600 }, { "epoch": 2.82, "learning_rate": 5.146064139372991e-07, "loss": 1.143, "step": 30610 }, { "epoch": 2.82, "learning_rate": 5.094365731087097e-07, "loss": 1.2064, "step": 30620 }, { "epoch": 2.82, "learning_rate": 5.042925648685553e-07, "loss": 1.178, "step": 30630 }, { "epoch": 2.82, "learning_rate": 4.99174394642718e-07, "loss": 1.1453, "step": 30640 }, { "epoch": 2.82, "learning_rate": 4.94082067829843e-07, "loss": 1.1901, "step": 30650 }, { "epoch": 2.82, "learning_rate": 4.890155898012949e-07, "loss": 1.2526, "step": 30660 }, { "epoch": 2.82, "learning_rate": 4.83974965901185e-07, "loss": 1.1501, "step": 30670 }, { "epoch": 2.82, "learning_rate": 4.789602014463574e-07, "loss": 1.1574, "step": 30680 }, { "epoch": 2.82, "learning_rate": 4.7397130172636417e-07, "loss": 1.1837, "step": 30690 }, { "epoch": 2.83, "learning_rate": 4.690082720034933e-07, "loss": 1.1855, "step": 30700 }, { "epoch": 2.83, "learning_rate": 4.6407111751273215e-07, "loss": 1.1698, "step": 30710 }, { "epoch": 2.83, "learning_rate": 4.5915984346178167e-07, "loss": 1.1623, "step": 30720 }, { "epoch": 2.83, "learning_rate": 4.5427445503103684e-07, "loss": 1.1644, "step": 30730 }, { "epoch": 2.83, "learning_rate": 4.4941495737360064e-07, "loss": 1.16, "step": 30740 }, { "epoch": 2.83, "learning_rate": 4.445813556152589e-07, "loss": 1.209, "step": 30750 }, { "epoch": 2.83, "learning_rate": 4.3977365485448064e-07, "loss": 1.1869, "step": 30760 }, { "epoch": 2.83, "learning_rate": 4.3499186016242034e-07, "loss": 1.2258, "step": 30770 }, { "epoch": 2.83, "learning_rate": 4.302359765829017e-07, "loss": 1.2072, "step": 30780 }, { "epoch": 2.83, "learning_rate": 4.255060091324148e-07, "loss": 1.1974, "step": 30790 }, { "epoch": 2.84, "learning_rate": 4.2080196280012694e-07, "loss": 1.2127, "step": 30800 }, { "epoch": 2.84, "learning_rate": 4.1612384254784975e-07, "loss": 1.2128, "step": 30810 }, { "epoch": 2.84, "learning_rate": 4.1147165331005544e-07, "loss": 1.1999, "step": 30820 }, { "epoch": 2.84, "learning_rate": 4.0684539999386315e-07, "loss": 1.1426, "step": 30830 }, { "epoch": 2.84, "learning_rate": 4.0224508747903065e-07, "loss": 1.1462, "step": 30840 }, { "epoch": 2.84, "learning_rate": 3.9767072061795697e-07, "loss": 1.1812, "step": 30850 }, { "epoch": 2.84, "learning_rate": 3.9312230423567406e-07, "loss": 1.1651, "step": 30860 }, { "epoch": 2.84, "learning_rate": 3.885998431298443e-07, "loss": 1.1858, "step": 30870 }, { "epoch": 2.84, "learning_rate": 3.8410334207074626e-07, "loss": 1.1574, "step": 30880 }, { "epoch": 2.84, "learning_rate": 3.796328058012777e-07, "loss": 1.2134, "step": 30890 }, { "epoch": 2.84, "learning_rate": 3.751882390369499e-07, "loss": 1.1526, "step": 30900 }, { "epoch": 2.85, "learning_rate": 3.7076964646588217e-07, "loss": 1.2087, "step": 30910 }, { "epoch": 2.85, "learning_rate": 3.663770327487964e-07, "loss": 1.2097, "step": 30920 }, { "epoch": 2.85, "learning_rate": 3.620104025190113e-07, "loss": 1.2087, "step": 30930 }, { "epoch": 2.85, "learning_rate": 3.576697603824342e-07, "loss": 1.1741, "step": 30940 }, { "epoch": 2.85, "learning_rate": 3.533551109175692e-07, "loss": 1.2135, "step": 30950 }, { "epoch": 2.85, "learning_rate": 3.490664586754955e-07, "loss": 1.2072, "step": 30960 }, { "epoch": 2.85, "learning_rate": 3.448038081798721e-07, "loss": 1.1783, "step": 30970 }, { "epoch": 2.85, "learning_rate": 3.4056716392693866e-07, "loss": 1.1557, "step": 30980 }, { "epoch": 2.85, "learning_rate": 3.3635653038548987e-07, "loss": 1.1743, "step": 30990 }, { "epoch": 2.85, "learning_rate": 3.32171911996898e-07, "loss": 1.1879, "step": 31000 }, { "epoch": 2.85, "learning_rate": 3.280133131750851e-07, "loss": 1.23, "step": 31010 }, { "epoch": 2.86, "learning_rate": 3.238807383065312e-07, "loss": 1.2049, "step": 31020 }, { "epoch": 2.86, "learning_rate": 3.1977419175026893e-07, "loss": 1.1504, "step": 31030 }, { "epoch": 2.86, "learning_rate": 3.1569367783787217e-07, "loss": 1.21, "step": 31040 }, { "epoch": 2.86, "learning_rate": 3.11639200873462e-07, "loss": 1.1816, "step": 31050 }, { "epoch": 2.86, "learning_rate": 3.0761076513368124e-07, "loss": 1.1709, "step": 31060 }, { "epoch": 2.86, "learning_rate": 3.036083748677254e-07, "loss": 1.1568, "step": 31070 }, { "epoch": 2.86, "learning_rate": 2.9963203429729804e-07, "loss": 1.148, "step": 31080 }, { "epoch": 2.86, "learning_rate": 2.956817476166357e-07, "loss": 1.2025, "step": 31090 }, { "epoch": 2.86, "learning_rate": 2.9175751899249424e-07, "loss": 1.1857, "step": 31100 }, { "epoch": 2.86, "learning_rate": 2.878593525641404e-07, "loss": 1.1769, "step": 31110 }, { "epoch": 2.86, "learning_rate": 2.83987252443349e-07, "loss": 1.2128, "step": 31120 }, { "epoch": 2.87, "learning_rate": 2.801412227144029e-07, "loss": 1.2016, "step": 31130 }, { "epoch": 2.87, "learning_rate": 2.763212674340848e-07, "loss": 1.2001, "step": 31140 }, { "epoch": 2.87, "learning_rate": 2.725273906316744e-07, "loss": 1.213, "step": 31150 }, { "epoch": 2.87, "learning_rate": 2.687595963089429e-07, "loss": 1.218, "step": 31160 }, { "epoch": 2.87, "learning_rate": 2.650178884401555e-07, "loss": 1.1584, "step": 31170 }, { "epoch": 2.87, "learning_rate": 2.613022709720525e-07, "loss": 1.1917, "step": 31180 }, { "epoch": 2.87, "learning_rate": 2.5761274782385993e-07, "loss": 1.1917, "step": 31190 }, { "epoch": 2.87, "learning_rate": 2.539493228872758e-07, "loss": 1.2023, "step": 31200 }, { "epoch": 2.87, "learning_rate": 2.5031200002647303e-07, "loss": 1.1989, "step": 31210 }, { "epoch": 2.87, "learning_rate": 2.4670078307809653e-07, "loss": 1.2185, "step": 31220 }, { "epoch": 2.87, "learning_rate": 2.4311567585124096e-07, "loss": 1.1663, "step": 31230 }, { "epoch": 2.88, "learning_rate": 2.3955668212747585e-07, "loss": 1.1963, "step": 31240 }, { "epoch": 2.88, "learning_rate": 2.3602380566081218e-07, "loss": 1.1347, "step": 31250 }, { "epoch": 2.88, "learning_rate": 2.3251705017773018e-07, "loss": 1.1496, "step": 31260 }, { "epoch": 2.88, "learning_rate": 2.2903641937713493e-07, "loss": 1.1481, "step": 31270 }, { "epoch": 2.88, "learning_rate": 2.2558191693039798e-07, "loss": 1.1253, "step": 31280 }, { "epoch": 2.88, "learning_rate": 2.2215354648131293e-07, "loss": 1.1277, "step": 31290 }, { "epoch": 2.88, "learning_rate": 2.1875131164612318e-07, "loss": 1.1803, "step": 31300 }, { "epoch": 2.88, "learning_rate": 2.1537521601349143e-07, "loss": 1.1931, "step": 31310 }, { "epoch": 2.88, "learning_rate": 2.1202526314452187e-07, "loss": 1.1787, "step": 31320 }, { "epoch": 2.88, "learning_rate": 2.0870145657273244e-07, "loss": 1.1559, "step": 31330 }, { "epoch": 2.88, "learning_rate": 2.0540379980406865e-07, "loss": 1.1681, "step": 31340 }, { "epoch": 2.89, "learning_rate": 2.0213229631688978e-07, "loss": 1.1344, "step": 31350 }, { "epoch": 2.89, "learning_rate": 1.9888694956196885e-07, "loss": 1.2073, "step": 31360 }, { "epoch": 2.89, "learning_rate": 1.9566776296249255e-07, "loss": 1.165, "step": 31370 }, { "epoch": 2.89, "learning_rate": 1.924747399140503e-07, "loss": 1.1996, "step": 31380 }, { "epoch": 2.89, "learning_rate": 1.8930788378463403e-07, "loss": 1.2119, "step": 31390 }, { "epoch": 2.89, "learning_rate": 1.8616719791463843e-07, "loss": 1.237, "step": 31400 }, { "epoch": 2.89, "learning_rate": 1.830526856168524e-07, "loss": 1.1729, "step": 31410 }, { "epoch": 2.89, "learning_rate": 1.7996435017645087e-07, "loss": 1.1802, "step": 31420 }, { "epoch": 2.89, "learning_rate": 1.7690219485100857e-07, "loss": 1.1901, "step": 31430 }, { "epoch": 2.89, "learning_rate": 1.7386622287047515e-07, "loss": 1.2365, "step": 31440 }, { "epoch": 2.89, "learning_rate": 1.7085643743718904e-07, "loss": 1.1602, "step": 31450 }, { "epoch": 2.9, "learning_rate": 1.6787284172586347e-07, "loss": 1.1898, "step": 31460 }, { "epoch": 2.9, "learning_rate": 1.6491543888359218e-07, "loss": 1.2016, "step": 31470 }, { "epoch": 2.9, "learning_rate": 1.619842320298326e-07, "loss": 1.1705, "step": 31480 }, { "epoch": 2.9, "learning_rate": 1.590792242564143e-07, "loss": 1.1652, "step": 31490 }, { "epoch": 2.9, "learning_rate": 1.562004186275362e-07, "loss": 1.1628, "step": 31500 }, { "epoch": 2.9, "learning_rate": 1.5334781817974985e-07, "loss": 1.1771, "step": 31510 }, { "epoch": 2.9, "learning_rate": 1.5052142592197893e-07, "loss": 1.2274, "step": 31520 }, { "epoch": 2.9, "learning_rate": 1.4772124483548867e-07, "loss": 1.1896, "step": 31530 }, { "epoch": 2.9, "learning_rate": 1.4494727787391082e-07, "loss": 1.1933, "step": 31540 }, { "epoch": 2.9, "learning_rate": 1.4219952796321045e-07, "loss": 1.1322, "step": 31550 }, { "epoch": 2.91, "learning_rate": 1.3947799800171357e-07, "loss": 1.1756, "step": 31560 }, { "epoch": 2.91, "learning_rate": 1.367826908600822e-07, "loss": 1.1851, "step": 31570 }, { "epoch": 2.91, "learning_rate": 1.3411360938132e-07, "loss": 1.1965, "step": 31580 }, { "epoch": 2.91, "learning_rate": 1.3147075638076655e-07, "loss": 1.1798, "step": 31590 }, { "epoch": 2.91, "learning_rate": 1.2885413464609752e-07, "loss": 1.1894, "step": 31600 }, { "epoch": 2.91, "learning_rate": 1.262637469373218e-07, "loss": 1.2342, "step": 31610 }, { "epoch": 2.91, "learning_rate": 1.236995959867704e-07, "loss": 1.1555, "step": 31620 }, { "epoch": 2.91, "learning_rate": 1.2116168449910757e-07, "loss": 1.171, "step": 31630 }, { "epoch": 2.91, "learning_rate": 1.1865001515131147e-07, "loss": 1.186, "step": 31640 }, { "epoch": 2.91, "learning_rate": 1.1616459059269058e-07, "loss": 1.179, "step": 31650 }, { "epoch": 2.91, "learning_rate": 1.1370541344485896e-07, "loss": 1.1479, "step": 31660 }, { "epoch": 2.92, "learning_rate": 1.1127248630175835e-07, "loss": 1.1796, "step": 31670 }, { "epoch": 2.92, "learning_rate": 1.0886581172962484e-07, "loss": 1.1849, "step": 31680 }, { "epoch": 2.92, "learning_rate": 1.0648539226702225e-07, "loss": 1.178, "step": 31690 }, { "epoch": 2.92, "learning_rate": 1.0413123042480322e-07, "loss": 1.2134, "step": 31700 }, { "epoch": 2.92, "learning_rate": 1.0180332868614251e-07, "loss": 1.1342, "step": 31710 }, { "epoch": 2.92, "learning_rate": 9.95016895064954e-08, "loss": 1.1457, "step": 31720 }, { "epoch": 2.92, "learning_rate": 9.722631531362825e-08, "loss": 1.1748, "step": 31730 }, { "epoch": 2.92, "learning_rate": 9.49772085076045e-08, "loss": 1.1262, "step": 31740 }, { "epoch": 2.92, "learning_rate": 9.275437146077092e-08, "loss": 1.1884, "step": 31750 }, { "epoch": 2.92, "learning_rate": 9.055780651777701e-08, "loss": 1.2045, "step": 31760 }, { "epoch": 2.92, "learning_rate": 8.838751599554718e-08, "loss": 1.1944, "step": 31770 }, { "epoch": 2.93, "learning_rate": 8.624350218330579e-08, "loss": 1.1743, "step": 31780 }, { "epoch": 2.93, "learning_rate": 8.412576734254662e-08, "loss": 1.1435, "step": 31790 }, { "epoch": 2.93, "learning_rate": 8.203431370705505e-08, "loss": 1.2222, "step": 31800 }, { "epoch": 2.93, "learning_rate": 7.99691434828942e-08, "loss": 1.1817, "step": 31810 }, { "epoch": 2.93, "learning_rate": 7.793025884839656e-08, "loss": 1.2112, "step": 31820 }, { "epoch": 2.93, "learning_rate": 7.591766195417238e-08, "loss": 1.191, "step": 31830 }, { "epoch": 2.93, "learning_rate": 7.393135492310687e-08, "loss": 1.1668, "step": 31840 }, { "epoch": 2.93, "learning_rate": 7.197133985035188e-08, "loss": 1.1988, "step": 31850 }, { "epoch": 2.93, "learning_rate": 7.003761880332304e-08, "loss": 1.1904, "step": 31860 }, { "epoch": 2.93, "learning_rate": 6.813019382171104e-08, "loss": 1.1933, "step": 31870 }, { "epoch": 2.93, "learning_rate": 6.624906691745647e-08, "loss": 1.2039, "step": 31880 }, { "epoch": 2.94, "learning_rate": 6.439424007477212e-08, "loss": 1.2355, "step": 31890 }, { "epoch": 2.94, "learning_rate": 6.256571525012355e-08, "loss": 1.1987, "step": 31900 }, { "epoch": 2.94, "learning_rate": 6.07634943722346e-08, "loss": 1.1511, "step": 31910 }, { "epoch": 2.94, "learning_rate": 5.898757934208188e-08, "loss": 1.1641, "step": 31920 }, { "epoch": 2.94, "learning_rate": 5.723797203289749e-08, "loss": 1.1389, "step": 31930 }, { "epoch": 2.94, "learning_rate": 5.551467429016632e-08, "loss": 1.2206, "step": 31940 }, { "epoch": 2.94, "learning_rate": 5.38176879316149e-08, "loss": 1.1872, "step": 31950 }, { "epoch": 2.94, "learning_rate": 5.2147014747219724e-08, "loss": 1.1345, "step": 31960 }, { "epoch": 2.94, "learning_rate": 5.050265649920727e-08, "loss": 1.1794, "step": 31970 }, { "epoch": 2.94, "learning_rate": 4.888461492203733e-08, "loss": 1.1617, "step": 31980 }, { "epoch": 2.94, "learning_rate": 4.7292891722419684e-08, "loss": 1.1856, "step": 31990 }, { "epoch": 2.95, "learning_rate": 4.5727488579300206e-08, "loss": 1.1704, "step": 32000 }, { "epoch": 2.95, "learning_rate": 4.418840714386363e-08, "loss": 1.192, "step": 32010 }, { "epoch": 2.95, "learning_rate": 4.267564903952526e-08, "loss": 1.1736, "step": 32020 }, { "epoch": 2.95, "learning_rate": 4.118921586194202e-08, "loss": 1.2089, "step": 32030 }, { "epoch": 2.95, "learning_rate": 3.972910917899864e-08, "loss": 1.1934, "step": 32040 }, { "epoch": 2.95, "learning_rate": 3.8295330530813134e-08, "loss": 1.2196, "step": 32050 }, { "epoch": 2.95, "learning_rate": 3.688788142973132e-08, "loss": 1.2014, "step": 32060 }, { "epoch": 2.95, "learning_rate": 3.5506763360326766e-08, "loss": 1.1506, "step": 32070 }, { "epoch": 2.95, "learning_rate": 3.4151977779400824e-08, "loss": 1.1659, "step": 32080 }, { "epoch": 2.95, "learning_rate": 3.282352611597705e-08, "loss": 1.1821, "step": 32090 }, { "epoch": 2.95, "learning_rate": 3.152140977130402e-08, "loss": 1.1615, "step": 32100 }, { "epoch": 2.96, "learning_rate": 3.0245630118852506e-08, "loss": 1.1182, "step": 32110 }, { "epoch": 2.96, "learning_rate": 2.8996188504307188e-08, "loss": 1.2013, "step": 32120 }, { "epoch": 2.96, "learning_rate": 2.777308624557773e-08, "loss": 1.1643, "step": 32130 }, { "epoch": 2.96, "learning_rate": 2.6576324632793248e-08, "loss": 1.2097, "step": 32140 }, { "epoch": 2.96, "learning_rate": 2.5405904928291202e-08, "loss": 1.2058, "step": 32150 }, { "epoch": 2.96, "learning_rate": 2.426182836662849e-08, "loss": 1.1771, "step": 32160 }, { "epoch": 2.96, "learning_rate": 2.3144096154575912e-08, "loss": 1.1334, "step": 32170 }, { "epoch": 2.96, "learning_rate": 2.205270947110982e-08, "loss": 1.1261, "step": 32180 }, { "epoch": 2.96, "learning_rate": 2.0987669467428805e-08, "loss": 1.1643, "step": 32190 }, { "epoch": 2.96, "learning_rate": 1.9948977266931456e-08, "loss": 1.1845, "step": 32200 }, { "epoch": 2.96, "learning_rate": 1.893663396522749e-08, "loss": 1.1448, "step": 32210 }, { "epoch": 2.97, "learning_rate": 1.795064063013496e-08, "loss": 1.1851, "step": 32220 }, { "epoch": 2.97, "learning_rate": 1.6990998301677497e-08, "loss": 1.1839, "step": 32230 }, { "epoch": 2.97, "learning_rate": 1.6057707992081506e-08, "loss": 1.2001, "step": 32240 }, { "epoch": 2.97, "learning_rate": 1.515077068578452e-08, "loss": 1.1827, "step": 32250 }, { "epoch": 2.97, "learning_rate": 1.4270187339415763e-08, "loss": 1.2388, "step": 32260 }, { "epoch": 2.97, "learning_rate": 1.3415958881815572e-08, "loss": 1.1603, "step": 32270 }, { "epoch": 2.97, "learning_rate": 1.2588086214021521e-08, "loss": 1.2087, "step": 32280 }, { "epoch": 2.97, "learning_rate": 1.1786570209271208e-08, "loss": 1.1928, "step": 32290 }, { "epoch": 2.97, "learning_rate": 1.1011411713005015e-08, "loss": 1.1758, "step": 32300 }, { "epoch": 2.97, "learning_rate": 1.0262611542852241e-08, "loss": 1.1695, "step": 32310 }, { "epoch": 2.98, "learning_rate": 9.5401704886533e-09, "loss": 1.1517, "step": 32320 }, { "epoch": 2.98, "learning_rate": 8.844089312429193e-09, "loss": 1.1963, "step": 32330 }, { "epoch": 2.98, "learning_rate": 8.174368748409267e-09, "loss": 1.1967, "step": 32340 }, { "epoch": 2.98, "learning_rate": 7.531009503014552e-09, "loss": 1.2463, "step": 32350 }, { "epoch": 2.98, "learning_rate": 6.914012254854996e-09, "loss": 1.2455, "step": 32360 }, { "epoch": 2.98, "learning_rate": 6.323377654740559e-09, "loss": 1.1091, "step": 32370 }, { "epoch": 2.98, "learning_rate": 5.7591063256701164e-09, "loss": 1.1483, "step": 32380 }, { "epoch": 2.98, "learning_rate": 5.221198862837007e-09, "loss": 1.1936, "step": 32390 }, { "epoch": 2.98, "learning_rate": 4.709655833620707e-09, "loss": 1.1787, "step": 32400 }, { "epoch": 2.98, "learning_rate": 4.2244777776007104e-09, "loss": 1.173, "step": 32410 }, { "epoch": 2.98, "learning_rate": 3.76566520653987e-09, "loss": 1.1684, "step": 32420 }, { "epoch": 2.99, "learning_rate": 3.3332186043927296e-09, "loss": 1.2057, "step": 32430 }, { "epoch": 2.99, "learning_rate": 2.9271384272999693e-09, "loss": 1.1744, "step": 32440 }, { "epoch": 2.99, "learning_rate": 2.54742510359951e-09, "loss": 1.1942, "step": 32450 }, { "epoch": 2.99, "learning_rate": 2.194079033807084e-09, "loss": 1.1966, "step": 32460 }, { "epoch": 2.99, "learning_rate": 1.8671005906328865e-09, "loss": 1.2219, "step": 32470 }, { "epoch": 2.99, "learning_rate": 1.5664901189760273e-09, "loss": 1.1946, "step": 32480 }, { "epoch": 2.99, "learning_rate": 1.2922479359162022e-09, "loss": 1.1696, "step": 32490 }, { "epoch": 2.99, "learning_rate": 1.04437433072202e-09, "loss": 1.2083, "step": 32500 }, { "epoch": 2.99, "learning_rate": 8.228695648537788e-10, "loss": 1.1433, "step": 32510 }, { "epoch": 2.99, "learning_rate": 6.277338719523629e-10, "loss": 1.1747, "step": 32520 }, { "epoch": 2.99, "learning_rate": 4.5896745784757e-10, "loss": 1.1694, "step": 32530 }, { "epoch": 3.0, "learning_rate": 3.165705005525599e-10, "loss": 1.1802, "step": 32540 }, { "epoch": 3.0, "learning_rate": 2.0054315026662994e-10, "loss": 1.1825, "step": 32550 }, { "epoch": 3.0, "learning_rate": 1.1088552937799091e-10, "loss": 1.2358, "step": 32560 }, { "epoch": 3.0, "learning_rate": 4.759773245543997e-11, "loss": 1.1818, "step": 32570 }, { "epoch": 3.0, "learning_rate": 1.0679826253912062e-11, "loss": 1.2134, "step": 32580 }, { "epoch": 3.0, "step": 32589, "total_flos": 2.9444667270974407e+19, "train_loss": 1.664952250687868, "train_runtime": 336340.7542, "train_samples_per_second": 1.55, "train_steps_per_second": 0.097 } ], "logging_steps": 10, "max_steps": 32589, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "total_flos": 2.9444667270974407e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }