{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.8933333333333333, "global_step": 14200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.999111111111111e-05, "loss": 2.4048, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.9982222222222224e-05, "loss": 1.8775, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.9973333333333334e-05, "loss": 1.8612, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.9964444444444447e-05, "loss": 1.846, "step": 40 }, { "epoch": 0.01, "learning_rate": 1.9955555555555557e-05, "loss": 1.7451, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.9946666666666667e-05, "loss": 1.7533, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.993777777777778e-05, "loss": 1.7204, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.992888888888889e-05, "loss": 1.6441, "step": 80 }, { "epoch": 0.01, "learning_rate": 1.9920000000000002e-05, "loss": 1.6775, "step": 90 }, { "epoch": 0.01, "learning_rate": 1.9911111111111112e-05, "loss": 1.6084, "step": 100 }, { "epoch": 0.01, "learning_rate": 1.9902222222222222e-05, "loss": 1.6458, "step": 110 }, { "epoch": 0.02, "learning_rate": 1.9893333333333335e-05, "loss": 1.7025, "step": 120 }, { "epoch": 0.02, "learning_rate": 1.9884444444444445e-05, "loss": 1.6242, "step": 130 }, { "epoch": 0.02, "learning_rate": 1.9875555555555558e-05, "loss": 1.6565, "step": 140 }, { "epoch": 0.02, "learning_rate": 1.9866666666666667e-05, "loss": 1.5933, "step": 150 }, { "epoch": 0.02, "learning_rate": 1.985777777777778e-05, "loss": 1.6074, "step": 160 }, { "epoch": 0.02, "learning_rate": 1.984888888888889e-05, "loss": 1.4749, "step": 170 }, { "epoch": 0.02, "learning_rate": 1.9840000000000003e-05, "loss": 1.6277, "step": 180 }, { "epoch": 0.03, "learning_rate": 1.9831111111111113e-05, "loss": 1.584, "step": 190 }, { "epoch": 0.03, "learning_rate": 1.9822222222222226e-05, "loss": 1.6242, "step": 200 }, { "epoch": 0.03, "learning_rate": 1.9813333333333336e-05, "loss": 1.5364, "step": 210 }, { "epoch": 0.03, "learning_rate": 1.9804444444444445e-05, "loss": 1.558, "step": 220 }, { "epoch": 0.03, "learning_rate": 1.979555555555556e-05, "loss": 1.4482, "step": 230 }, { "epoch": 0.03, "learning_rate": 1.9786666666666668e-05, "loss": 1.4228, "step": 240 }, { "epoch": 0.03, "learning_rate": 1.977777777777778e-05, "loss": 1.4842, "step": 250 }, { "epoch": 0.03, "learning_rate": 1.976888888888889e-05, "loss": 1.4997, "step": 260 }, { "epoch": 0.04, "learning_rate": 1.976e-05, "loss": 1.5658, "step": 270 }, { "epoch": 0.04, "learning_rate": 1.9751111111111114e-05, "loss": 1.5769, "step": 280 }, { "epoch": 0.04, "learning_rate": 1.9742222222222223e-05, "loss": 1.4129, "step": 290 }, { "epoch": 0.04, "learning_rate": 1.9733333333333336e-05, "loss": 1.4415, "step": 300 }, { "epoch": 0.04, "learning_rate": 1.9724444444444446e-05, "loss": 1.5418, "step": 310 }, { "epoch": 0.04, "learning_rate": 1.9715555555555556e-05, "loss": 1.5264, "step": 320 }, { "epoch": 0.04, "learning_rate": 1.970666666666667e-05, "loss": 1.5169, "step": 330 }, { "epoch": 0.05, "learning_rate": 1.969777777777778e-05, "loss": 1.4987, "step": 340 }, { "epoch": 0.05, "learning_rate": 1.968888888888889e-05, "loss": 1.4791, "step": 350 }, { "epoch": 0.05, "learning_rate": 1.968e-05, "loss": 1.4609, "step": 360 }, { "epoch": 0.05, "learning_rate": 1.967111111111111e-05, "loss": 1.5073, "step": 370 }, { "epoch": 0.05, "learning_rate": 1.9662222222222224e-05, "loss": 1.3887, "step": 380 }, { "epoch": 0.05, "learning_rate": 1.9653333333333334e-05, "loss": 1.4547, "step": 390 }, { "epoch": 0.05, "learning_rate": 1.9644444444444447e-05, "loss": 1.3708, "step": 400 }, { "epoch": 0.05, "learning_rate": 1.9635555555555557e-05, "loss": 1.372, "step": 410 }, { "epoch": 0.06, "learning_rate": 1.9626666666666666e-05, "loss": 1.3724, "step": 420 }, { "epoch": 0.06, "learning_rate": 1.961777777777778e-05, "loss": 1.4475, "step": 430 }, { "epoch": 0.06, "learning_rate": 1.960888888888889e-05, "loss": 1.4078, "step": 440 }, { "epoch": 0.06, "learning_rate": 1.9600000000000002e-05, "loss": 1.3645, "step": 450 }, { "epoch": 0.06, "learning_rate": 1.9591111111111112e-05, "loss": 1.3766, "step": 460 }, { "epoch": 0.06, "learning_rate": 1.9582222222222225e-05, "loss": 1.547, "step": 470 }, { "epoch": 0.06, "learning_rate": 1.9573333333333335e-05, "loss": 1.4228, "step": 480 }, { "epoch": 0.07, "learning_rate": 1.9564444444444444e-05, "loss": 1.3612, "step": 490 }, { "epoch": 0.07, "learning_rate": 1.9555555555555557e-05, "loss": 1.4907, "step": 500 }, { "epoch": 0.07, "learning_rate": 1.954666666666667e-05, "loss": 1.4219, "step": 510 }, { "epoch": 0.07, "learning_rate": 1.953777777777778e-05, "loss": 1.428, "step": 520 }, { "epoch": 0.07, "learning_rate": 1.952888888888889e-05, "loss": 1.4665, "step": 530 }, { "epoch": 0.07, "learning_rate": 1.9520000000000003e-05, "loss": 1.4952, "step": 540 }, { "epoch": 0.07, "learning_rate": 1.9511111111111113e-05, "loss": 1.4559, "step": 550 }, { "epoch": 0.07, "learning_rate": 1.9502222222222226e-05, "loss": 1.3358, "step": 560 }, { "epoch": 0.08, "learning_rate": 1.9493333333333335e-05, "loss": 1.3978, "step": 570 }, { "epoch": 0.08, "learning_rate": 1.9484444444444445e-05, "loss": 1.4329, "step": 580 }, { "epoch": 0.08, "learning_rate": 1.9475555555555558e-05, "loss": 1.4256, "step": 590 }, { "epoch": 0.08, "learning_rate": 1.9466666666666668e-05, "loss": 1.4073, "step": 600 }, { "epoch": 0.08, "learning_rate": 1.945777777777778e-05, "loss": 1.3364, "step": 610 }, { "epoch": 0.08, "learning_rate": 1.944888888888889e-05, "loss": 1.3648, "step": 620 }, { "epoch": 0.08, "learning_rate": 1.944e-05, "loss": 1.3115, "step": 630 }, { "epoch": 0.09, "learning_rate": 1.9431111111111113e-05, "loss": 1.3724, "step": 640 }, { "epoch": 0.09, "learning_rate": 1.9422222222222223e-05, "loss": 1.3438, "step": 650 }, { "epoch": 0.09, "learning_rate": 1.9413333333333336e-05, "loss": 1.4041, "step": 660 }, { "epoch": 0.09, "learning_rate": 1.9404444444444446e-05, "loss": 1.3519, "step": 670 }, { "epoch": 0.09, "learning_rate": 1.9395555555555555e-05, "loss": 1.3564, "step": 680 }, { "epoch": 0.09, "learning_rate": 1.938666666666667e-05, "loss": 1.3786, "step": 690 }, { "epoch": 0.09, "learning_rate": 1.9377777777777778e-05, "loss": 1.3111, "step": 700 }, { "epoch": 0.09, "learning_rate": 1.936888888888889e-05, "loss": 1.3665, "step": 710 }, { "epoch": 0.1, "learning_rate": 1.936e-05, "loss": 1.3987, "step": 720 }, { "epoch": 0.1, "learning_rate": 1.935111111111111e-05, "loss": 1.3155, "step": 730 }, { "epoch": 0.1, "learning_rate": 1.9342222222222224e-05, "loss": 1.3128, "step": 740 }, { "epoch": 0.1, "learning_rate": 1.9333333333333333e-05, "loss": 1.3127, "step": 750 }, { "epoch": 0.1, "learning_rate": 1.9324444444444447e-05, "loss": 1.2801, "step": 760 }, { "epoch": 0.1, "learning_rate": 1.9315555555555556e-05, "loss": 1.3582, "step": 770 }, { "epoch": 0.1, "learning_rate": 1.930666666666667e-05, "loss": 1.3537, "step": 780 }, { "epoch": 0.11, "learning_rate": 1.929777777777778e-05, "loss": 1.2926, "step": 790 }, { "epoch": 0.11, "learning_rate": 1.928888888888889e-05, "loss": 1.2753, "step": 800 }, { "epoch": 0.11, "learning_rate": 1.9280000000000002e-05, "loss": 1.3155, "step": 810 }, { "epoch": 0.11, "learning_rate": 1.9271111111111115e-05, "loss": 1.3894, "step": 820 }, { "epoch": 0.11, "learning_rate": 1.9262222222222225e-05, "loss": 1.2848, "step": 830 }, { "epoch": 0.11, "learning_rate": 1.9253333333333334e-05, "loss": 1.3126, "step": 840 }, { "epoch": 0.11, "learning_rate": 1.9244444444444444e-05, "loss": 1.33, "step": 850 }, { "epoch": 0.11, "learning_rate": 1.9235555555555557e-05, "loss": 1.2493, "step": 860 }, { "epoch": 0.12, "learning_rate": 1.922666666666667e-05, "loss": 1.2834, "step": 870 }, { "epoch": 0.12, "learning_rate": 1.921777777777778e-05, "loss": 1.3542, "step": 880 }, { "epoch": 0.12, "learning_rate": 1.920888888888889e-05, "loss": 1.2733, "step": 890 }, { "epoch": 0.12, "learning_rate": 1.9200000000000003e-05, "loss": 1.3271, "step": 900 }, { "epoch": 0.12, "learning_rate": 1.9191111111111112e-05, "loss": 1.4075, "step": 910 }, { "epoch": 0.12, "learning_rate": 1.9182222222222225e-05, "loss": 1.2204, "step": 920 }, { "epoch": 0.12, "learning_rate": 1.9173333333333335e-05, "loss": 1.3184, "step": 930 }, { "epoch": 0.13, "learning_rate": 1.9164444444444445e-05, "loss": 1.3577, "step": 940 }, { "epoch": 0.13, "learning_rate": 1.9155555555555558e-05, "loss": 1.2539, "step": 950 }, { "epoch": 0.13, "learning_rate": 1.9146666666666667e-05, "loss": 1.3455, "step": 960 }, { "epoch": 0.13, "learning_rate": 1.913777777777778e-05, "loss": 1.3104, "step": 970 }, { "epoch": 0.13, "learning_rate": 1.912888888888889e-05, "loss": 1.3331, "step": 980 }, { "epoch": 0.13, "learning_rate": 1.912e-05, "loss": 1.2342, "step": 990 }, { "epoch": 0.13, "learning_rate": 1.9111111111111113e-05, "loss": 1.3409, "step": 1000 }, { "epoch": 0.13, "learning_rate": 1.9102222222222223e-05, "loss": 1.2472, "step": 1010 }, { "epoch": 0.14, "learning_rate": 1.9093333333333336e-05, "loss": 1.1771, "step": 1020 }, { "epoch": 0.14, "learning_rate": 1.9084444444444445e-05, "loss": 1.3049, "step": 1030 }, { "epoch": 0.14, "learning_rate": 1.9075555555555555e-05, "loss": 1.2137, "step": 1040 }, { "epoch": 0.14, "learning_rate": 1.9066666666666668e-05, "loss": 1.4173, "step": 1050 }, { "epoch": 0.14, "learning_rate": 1.9057777777777778e-05, "loss": 1.2551, "step": 1060 }, { "epoch": 0.14, "learning_rate": 1.904888888888889e-05, "loss": 1.2043, "step": 1070 }, { "epoch": 0.14, "learning_rate": 1.904e-05, "loss": 1.2418, "step": 1080 }, { "epoch": 0.15, "learning_rate": 1.9031111111111114e-05, "loss": 1.2922, "step": 1090 }, { "epoch": 0.15, "learning_rate": 1.9022222222222223e-05, "loss": 1.2136, "step": 1100 }, { "epoch": 0.15, "learning_rate": 1.9013333333333333e-05, "loss": 1.2552, "step": 1110 }, { "epoch": 0.15, "learning_rate": 1.9004444444444446e-05, "loss": 1.2499, "step": 1120 }, { "epoch": 0.15, "learning_rate": 1.899555555555556e-05, "loss": 1.2998, "step": 1130 }, { "epoch": 0.15, "learning_rate": 1.898666666666667e-05, "loss": 1.2775, "step": 1140 }, { "epoch": 0.15, "learning_rate": 1.897777777777778e-05, "loss": 1.3368, "step": 1150 }, { "epoch": 0.15, "learning_rate": 1.896888888888889e-05, "loss": 1.2595, "step": 1160 }, { "epoch": 0.16, "learning_rate": 1.896e-05, "loss": 1.3476, "step": 1170 }, { "epoch": 0.16, "learning_rate": 1.8951111111111115e-05, "loss": 1.2566, "step": 1180 }, { "epoch": 0.16, "learning_rate": 1.8942222222222224e-05, "loss": 1.2813, "step": 1190 }, { "epoch": 0.16, "learning_rate": 1.8933333333333334e-05, "loss": 1.2985, "step": 1200 }, { "epoch": 0.16, "learning_rate": 1.8924444444444447e-05, "loss": 1.2264, "step": 1210 }, { "epoch": 0.16, "learning_rate": 1.8915555555555557e-05, "loss": 1.2706, "step": 1220 }, { "epoch": 0.16, "learning_rate": 1.890666666666667e-05, "loss": 1.1898, "step": 1230 }, { "epoch": 0.17, "learning_rate": 1.889777777777778e-05, "loss": 1.2292, "step": 1240 }, { "epoch": 0.17, "learning_rate": 1.888888888888889e-05, "loss": 1.2652, "step": 1250 }, { "epoch": 0.17, "learning_rate": 1.8880000000000002e-05, "loss": 1.2528, "step": 1260 }, { "epoch": 0.17, "learning_rate": 1.8871111111111112e-05, "loss": 1.271, "step": 1270 }, { "epoch": 0.17, "learning_rate": 1.8862222222222225e-05, "loss": 1.1702, "step": 1280 }, { "epoch": 0.17, "learning_rate": 1.8853333333333335e-05, "loss": 1.1904, "step": 1290 }, { "epoch": 0.17, "learning_rate": 1.8844444444444444e-05, "loss": 1.3074, "step": 1300 }, { "epoch": 0.17, "learning_rate": 1.8835555555555557e-05, "loss": 1.2295, "step": 1310 }, { "epoch": 0.18, "learning_rate": 1.8826666666666667e-05, "loss": 1.1498, "step": 1320 }, { "epoch": 0.18, "learning_rate": 1.881777777777778e-05, "loss": 1.3108, "step": 1330 }, { "epoch": 0.18, "learning_rate": 1.880888888888889e-05, "loss": 1.2615, "step": 1340 }, { "epoch": 0.18, "learning_rate": 1.88e-05, "loss": 1.1936, "step": 1350 }, { "epoch": 0.18, "learning_rate": 1.8791111111111113e-05, "loss": 1.2588, "step": 1360 }, { "epoch": 0.18, "learning_rate": 1.8782222222222222e-05, "loss": 1.2175, "step": 1370 }, { "epoch": 0.18, "learning_rate": 1.8773333333333335e-05, "loss": 1.3057, "step": 1380 }, { "epoch": 0.19, "learning_rate": 1.8764444444444445e-05, "loss": 1.3412, "step": 1390 }, { "epoch": 0.19, "learning_rate": 1.8755555555555558e-05, "loss": 1.148, "step": 1400 }, { "epoch": 0.19, "learning_rate": 1.8746666666666668e-05, "loss": 1.2396, "step": 1410 }, { "epoch": 0.19, "learning_rate": 1.8737777777777778e-05, "loss": 1.2624, "step": 1420 }, { "epoch": 0.19, "learning_rate": 1.872888888888889e-05, "loss": 1.1955, "step": 1430 }, { "epoch": 0.19, "learning_rate": 1.8720000000000004e-05, "loss": 1.1798, "step": 1440 }, { "epoch": 0.19, "learning_rate": 1.8711111111111113e-05, "loss": 1.2366, "step": 1450 }, { "epoch": 0.19, "learning_rate": 1.8702222222222223e-05, "loss": 1.1596, "step": 1460 }, { "epoch": 0.2, "learning_rate": 1.8693333333333333e-05, "loss": 1.2025, "step": 1470 }, { "epoch": 0.2, "learning_rate": 1.8684444444444446e-05, "loss": 1.1361, "step": 1480 }, { "epoch": 0.2, "learning_rate": 1.867555555555556e-05, "loss": 1.221, "step": 1490 }, { "epoch": 0.2, "learning_rate": 1.866666666666667e-05, "loss": 1.1684, "step": 1500 }, { "epoch": 0.2, "learning_rate": 1.865777777777778e-05, "loss": 1.2633, "step": 1510 }, { "epoch": 0.2, "learning_rate": 1.8648888888888888e-05, "loss": 1.2157, "step": 1520 }, { "epoch": 0.2, "learning_rate": 1.864e-05, "loss": 1.2302, "step": 1530 }, { "epoch": 0.21, "learning_rate": 1.8631111111111114e-05, "loss": 1.1223, "step": 1540 }, { "epoch": 0.21, "learning_rate": 1.8622222222222224e-05, "loss": 1.131, "step": 1550 }, { "epoch": 0.21, "learning_rate": 1.8613333333333334e-05, "loss": 1.1905, "step": 1560 }, { "epoch": 0.21, "learning_rate": 1.8604444444444447e-05, "loss": 1.2618, "step": 1570 }, { "epoch": 0.21, "learning_rate": 1.8595555555555556e-05, "loss": 1.1637, "step": 1580 }, { "epoch": 0.21, "learning_rate": 1.858666666666667e-05, "loss": 1.2449, "step": 1590 }, { "epoch": 0.21, "learning_rate": 1.857777777777778e-05, "loss": 1.246, "step": 1600 }, { "epoch": 0.21, "learning_rate": 1.856888888888889e-05, "loss": 1.2406, "step": 1610 }, { "epoch": 0.22, "learning_rate": 1.8560000000000002e-05, "loss": 1.2184, "step": 1620 }, { "epoch": 0.22, "learning_rate": 1.855111111111111e-05, "loss": 1.1665, "step": 1630 }, { "epoch": 0.22, "learning_rate": 1.8542222222222225e-05, "loss": 1.1181, "step": 1640 }, { "epoch": 0.22, "learning_rate": 1.8533333333333334e-05, "loss": 1.1796, "step": 1650 }, { "epoch": 0.22, "learning_rate": 1.8524444444444444e-05, "loss": 1.121, "step": 1660 }, { "epoch": 0.22, "learning_rate": 1.8515555555555557e-05, "loss": 1.1749, "step": 1670 }, { "epoch": 0.22, "learning_rate": 1.8506666666666667e-05, "loss": 1.1692, "step": 1680 }, { "epoch": 0.23, "learning_rate": 1.849777777777778e-05, "loss": 1.136, "step": 1690 }, { "epoch": 0.23, "learning_rate": 1.848888888888889e-05, "loss": 1.1391, "step": 1700 }, { "epoch": 0.23, "learning_rate": 1.8480000000000003e-05, "loss": 1.2016, "step": 1710 }, { "epoch": 0.23, "learning_rate": 1.8471111111111112e-05, "loss": 1.1369, "step": 1720 }, { "epoch": 0.23, "learning_rate": 1.8462222222222222e-05, "loss": 1.1447, "step": 1730 }, { "epoch": 0.23, "learning_rate": 1.8453333333333335e-05, "loss": 1.1595, "step": 1740 }, { "epoch": 0.23, "learning_rate": 1.8444444444444448e-05, "loss": 1.1653, "step": 1750 }, { "epoch": 0.23, "learning_rate": 1.8435555555555558e-05, "loss": 1.2229, "step": 1760 }, { "epoch": 0.24, "learning_rate": 1.8426666666666668e-05, "loss": 1.1258, "step": 1770 }, { "epoch": 0.24, "learning_rate": 1.8417777777777777e-05, "loss": 1.1931, "step": 1780 }, { "epoch": 0.24, "learning_rate": 1.840888888888889e-05, "loss": 1.1667, "step": 1790 }, { "epoch": 0.24, "learning_rate": 1.8400000000000003e-05, "loss": 1.101, "step": 1800 }, { "epoch": 0.24, "learning_rate": 1.8391111111111113e-05, "loss": 1.1117, "step": 1810 }, { "epoch": 0.24, "learning_rate": 1.8382222222222223e-05, "loss": 1.1763, "step": 1820 }, { "epoch": 0.24, "learning_rate": 1.8373333333333332e-05, "loss": 1.1924, "step": 1830 }, { "epoch": 0.25, "learning_rate": 1.8364444444444446e-05, "loss": 1.1929, "step": 1840 }, { "epoch": 0.25, "learning_rate": 1.835555555555556e-05, "loss": 1.1407, "step": 1850 }, { "epoch": 0.25, "learning_rate": 1.834666666666667e-05, "loss": 1.1641, "step": 1860 }, { "epoch": 0.25, "learning_rate": 1.8337777777777778e-05, "loss": 1.2749, "step": 1870 }, { "epoch": 0.25, "learning_rate": 1.832888888888889e-05, "loss": 1.2064, "step": 1880 }, { "epoch": 0.25, "learning_rate": 1.832e-05, "loss": 1.1516, "step": 1890 }, { "epoch": 0.25, "learning_rate": 1.8311111111111114e-05, "loss": 1.0882, "step": 1900 }, { "epoch": 0.25, "learning_rate": 1.8302222222222224e-05, "loss": 1.2308, "step": 1910 }, { "epoch": 0.26, "learning_rate": 1.8293333333333333e-05, "loss": 1.1474, "step": 1920 }, { "epoch": 0.26, "learning_rate": 1.8284444444444446e-05, "loss": 1.2042, "step": 1930 }, { "epoch": 0.26, "learning_rate": 1.8275555555555556e-05, "loss": 1.2158, "step": 1940 }, { "epoch": 0.26, "learning_rate": 1.826666666666667e-05, "loss": 1.2252, "step": 1950 }, { "epoch": 0.26, "learning_rate": 1.825777777777778e-05, "loss": 1.2847, "step": 1960 }, { "epoch": 0.26, "learning_rate": 1.824888888888889e-05, "loss": 1.1319, "step": 1970 }, { "epoch": 0.26, "learning_rate": 1.824e-05, "loss": 1.1339, "step": 1980 }, { "epoch": 0.27, "learning_rate": 1.823111111111111e-05, "loss": 1.1259, "step": 1990 }, { "epoch": 0.27, "learning_rate": 1.8222222222222224e-05, "loss": 1.1442, "step": 2000 }, { "epoch": 0.27, "learning_rate": 1.8213333333333334e-05, "loss": 1.1424, "step": 2010 }, { "epoch": 0.27, "learning_rate": 1.8204444444444447e-05, "loss": 1.1209, "step": 2020 }, { "epoch": 0.27, "learning_rate": 1.8195555555555557e-05, "loss": 1.1215, "step": 2030 }, { "epoch": 0.27, "learning_rate": 1.8186666666666666e-05, "loss": 1.1604, "step": 2040 }, { "epoch": 0.27, "learning_rate": 1.817777777777778e-05, "loss": 1.1118, "step": 2050 }, { "epoch": 0.27, "learning_rate": 1.8168888888888893e-05, "loss": 1.0966, "step": 2060 }, { "epoch": 0.28, "learning_rate": 1.8160000000000002e-05, "loss": 1.1598, "step": 2070 }, { "epoch": 0.28, "learning_rate": 1.8151111111111112e-05, "loss": 1.1438, "step": 2080 }, { "epoch": 0.28, "learning_rate": 1.814222222222222e-05, "loss": 1.1719, "step": 2090 }, { "epoch": 0.28, "learning_rate": 1.8133333333333335e-05, "loss": 1.1146, "step": 2100 }, { "epoch": 0.28, "learning_rate": 1.8124444444444448e-05, "loss": 1.1549, "step": 2110 }, { "epoch": 0.28, "learning_rate": 1.8115555555555558e-05, "loss": 1.1294, "step": 2120 }, { "epoch": 0.28, "learning_rate": 1.8106666666666667e-05, "loss": 1.0684, "step": 2130 }, { "epoch": 0.29, "learning_rate": 1.8097777777777777e-05, "loss": 1.1102, "step": 2140 }, { "epoch": 0.29, "learning_rate": 1.808888888888889e-05, "loss": 1.1921, "step": 2150 }, { "epoch": 0.29, "learning_rate": 1.8080000000000003e-05, "loss": 1.0957, "step": 2160 }, { "epoch": 0.29, "learning_rate": 1.8071111111111113e-05, "loss": 1.2035, "step": 2170 }, { "epoch": 0.29, "learning_rate": 1.8062222222222222e-05, "loss": 1.1848, "step": 2180 }, { "epoch": 0.29, "learning_rate": 1.8053333333333332e-05, "loss": 1.1267, "step": 2190 }, { "epoch": 0.29, "learning_rate": 1.8044444444444445e-05, "loss": 1.0981, "step": 2200 }, { "epoch": 0.29, "learning_rate": 1.803555555555556e-05, "loss": 1.1, "step": 2210 }, { "epoch": 0.3, "learning_rate": 1.8026666666666668e-05, "loss": 1.0529, "step": 2220 }, { "epoch": 0.3, "learning_rate": 1.8017777777777778e-05, "loss": 1.0171, "step": 2230 }, { "epoch": 0.3, "learning_rate": 1.800888888888889e-05, "loss": 1.1594, "step": 2240 }, { "epoch": 0.3, "learning_rate": 1.8e-05, "loss": 1.0629, "step": 2250 }, { "epoch": 0.3, "learning_rate": 1.7991111111111114e-05, "loss": 1.1984, "step": 2260 }, { "epoch": 0.3, "learning_rate": 1.7982222222222223e-05, "loss": 1.1947, "step": 2270 }, { "epoch": 0.3, "learning_rate": 1.7973333333333333e-05, "loss": 1.156, "step": 2280 }, { "epoch": 0.31, "learning_rate": 1.7964444444444446e-05, "loss": 1.1862, "step": 2290 }, { "epoch": 0.31, "learning_rate": 1.7955555555555556e-05, "loss": 1.0595, "step": 2300 }, { "epoch": 0.31, "learning_rate": 1.794666666666667e-05, "loss": 1.0802, "step": 2310 }, { "epoch": 0.31, "learning_rate": 1.793777777777778e-05, "loss": 1.0912, "step": 2320 }, { "epoch": 0.31, "learning_rate": 1.792888888888889e-05, "loss": 1.0775, "step": 2330 }, { "epoch": 0.31, "learning_rate": 1.792e-05, "loss": 1.048, "step": 2340 }, { "epoch": 0.31, "learning_rate": 1.791111111111111e-05, "loss": 1.1577, "step": 2350 }, { "epoch": 0.31, "learning_rate": 1.7902222222222224e-05, "loss": 1.1265, "step": 2360 }, { "epoch": 0.32, "learning_rate": 1.7893333333333337e-05, "loss": 1.1948, "step": 2370 }, { "epoch": 0.32, "learning_rate": 1.7884444444444447e-05, "loss": 1.031, "step": 2380 }, { "epoch": 0.32, "learning_rate": 1.7875555555555556e-05, "loss": 1.1495, "step": 2390 }, { "epoch": 0.32, "learning_rate": 1.7866666666666666e-05, "loss": 1.0832, "step": 2400 }, { "epoch": 0.32, "learning_rate": 1.785777777777778e-05, "loss": 1.1345, "step": 2410 }, { "epoch": 0.32, "learning_rate": 1.7848888888888892e-05, "loss": 1.0734, "step": 2420 }, { "epoch": 0.32, "learning_rate": 1.7840000000000002e-05, "loss": 1.1869, "step": 2430 }, { "epoch": 0.33, "learning_rate": 1.783111111111111e-05, "loss": 1.1339, "step": 2440 }, { "epoch": 0.33, "learning_rate": 1.782222222222222e-05, "loss": 1.1147, "step": 2450 }, { "epoch": 0.33, "learning_rate": 1.7813333333333334e-05, "loss": 1.1347, "step": 2460 }, { "epoch": 0.33, "learning_rate": 1.7804444444444448e-05, "loss": 1.116, "step": 2470 }, { "epoch": 0.33, "learning_rate": 1.7795555555555557e-05, "loss": 1.0951, "step": 2480 }, { "epoch": 0.33, "learning_rate": 1.7786666666666667e-05, "loss": 1.0371, "step": 2490 }, { "epoch": 0.33, "learning_rate": 1.7777777777777777e-05, "loss": 1.1412, "step": 2500 }, { "epoch": 0.33, "learning_rate": 1.776888888888889e-05, "loss": 1.0725, "step": 2510 }, { "epoch": 0.34, "learning_rate": 1.7760000000000003e-05, "loss": 1.106, "step": 2520 }, { "epoch": 0.34, "learning_rate": 1.7751111111111112e-05, "loss": 1.1332, "step": 2530 }, { "epoch": 0.34, "learning_rate": 1.7742222222222222e-05, "loss": 1.1655, "step": 2540 }, { "epoch": 0.34, "learning_rate": 1.7733333333333335e-05, "loss": 1.0724, "step": 2550 }, { "epoch": 0.34, "learning_rate": 1.7724444444444445e-05, "loss": 1.1051, "step": 2560 }, { "epoch": 0.34, "learning_rate": 1.7715555555555558e-05, "loss": 1.0878, "step": 2570 }, { "epoch": 0.34, "learning_rate": 1.7706666666666668e-05, "loss": 1.1064, "step": 2580 }, { "epoch": 0.35, "learning_rate": 1.7697777777777777e-05, "loss": 1.0038, "step": 2590 }, { "epoch": 0.35, "learning_rate": 1.768888888888889e-05, "loss": 1.1117, "step": 2600 }, { "epoch": 0.35, "learning_rate": 1.768e-05, "loss": 1.0629, "step": 2610 }, { "epoch": 0.35, "learning_rate": 1.7671111111111113e-05, "loss": 1.1432, "step": 2620 }, { "epoch": 0.35, "learning_rate": 1.7662222222222223e-05, "loss": 1.0908, "step": 2630 }, { "epoch": 0.35, "learning_rate": 1.7653333333333336e-05, "loss": 1.1247, "step": 2640 }, { "epoch": 0.35, "learning_rate": 1.7644444444444446e-05, "loss": 1.0317, "step": 2650 }, { "epoch": 0.35, "learning_rate": 1.7635555555555555e-05, "loss": 1.0392, "step": 2660 }, { "epoch": 0.36, "learning_rate": 1.762666666666667e-05, "loss": 1.0568, "step": 2670 }, { "epoch": 0.36, "learning_rate": 1.761777777777778e-05, "loss": 1.1636, "step": 2680 }, { "epoch": 0.36, "learning_rate": 1.760888888888889e-05, "loss": 1.1749, "step": 2690 }, { "epoch": 0.36, "learning_rate": 1.76e-05, "loss": 1.0609, "step": 2700 }, { "epoch": 0.36, "learning_rate": 1.759111111111111e-05, "loss": 1.0416, "step": 2710 }, { "epoch": 0.36, "learning_rate": 1.7582222222222224e-05, "loss": 1.0575, "step": 2720 }, { "epoch": 0.36, "learning_rate": 1.7573333333333337e-05, "loss": 1.1435, "step": 2730 }, { "epoch": 0.37, "learning_rate": 1.7564444444444446e-05, "loss": 1.1143, "step": 2740 }, { "epoch": 0.37, "learning_rate": 1.7555555555555556e-05, "loss": 1.0495, "step": 2750 }, { "epoch": 0.37, "learning_rate": 1.7546666666666666e-05, "loss": 1.1068, "step": 2760 }, { "epoch": 0.37, "learning_rate": 1.753777777777778e-05, "loss": 1.0669, "step": 2770 }, { "epoch": 0.37, "learning_rate": 1.7528888888888892e-05, "loss": 1.1336, "step": 2780 }, { "epoch": 0.37, "learning_rate": 1.752e-05, "loss": 1.0577, "step": 2790 }, { "epoch": 0.37, "learning_rate": 1.751111111111111e-05, "loss": 1.1104, "step": 2800 }, { "epoch": 0.37, "learning_rate": 1.750222222222222e-05, "loss": 1.0981, "step": 2810 }, { "epoch": 0.38, "learning_rate": 1.7493333333333334e-05, "loss": 1.1212, "step": 2820 }, { "epoch": 0.38, "learning_rate": 1.7484444444444447e-05, "loss": 1.1093, "step": 2830 }, { "epoch": 0.38, "learning_rate": 1.7475555555555557e-05, "loss": 1.0886, "step": 2840 }, { "epoch": 0.38, "learning_rate": 1.7466666666666667e-05, "loss": 1.0578, "step": 2850 }, { "epoch": 0.38, "learning_rate": 1.745777777777778e-05, "loss": 1.0253, "step": 2860 }, { "epoch": 0.38, "learning_rate": 1.744888888888889e-05, "loss": 1.2101, "step": 2870 }, { "epoch": 0.38, "learning_rate": 1.7440000000000002e-05, "loss": 1.1014, "step": 2880 }, { "epoch": 0.39, "learning_rate": 1.7431111111111112e-05, "loss": 1.0862, "step": 2890 }, { "epoch": 0.39, "learning_rate": 1.7422222222222222e-05, "loss": 1.1147, "step": 2900 }, { "epoch": 0.39, "learning_rate": 1.7413333333333335e-05, "loss": 1.1526, "step": 2910 }, { "epoch": 0.39, "learning_rate": 1.7404444444444445e-05, "loss": 0.9954, "step": 2920 }, { "epoch": 0.39, "learning_rate": 1.7395555555555558e-05, "loss": 1.0464, "step": 2930 }, { "epoch": 0.39, "learning_rate": 1.7386666666666667e-05, "loss": 1.0722, "step": 2940 }, { "epoch": 0.39, "learning_rate": 1.737777777777778e-05, "loss": 1.1072, "step": 2950 }, { "epoch": 0.39, "learning_rate": 1.736888888888889e-05, "loss": 1.1118, "step": 2960 }, { "epoch": 0.4, "learning_rate": 1.736e-05, "loss": 1.115, "step": 2970 }, { "epoch": 0.4, "learning_rate": 1.7351111111111113e-05, "loss": 1.0415, "step": 2980 }, { "epoch": 0.4, "learning_rate": 1.7342222222222226e-05, "loss": 1.0142, "step": 2990 }, { "epoch": 0.4, "learning_rate": 1.7333333333333336e-05, "loss": 1.1325, "step": 3000 }, { "epoch": 0.4, "learning_rate": 1.7324444444444445e-05, "loss": 1.0434, "step": 3010 }, { "epoch": 0.4, "learning_rate": 1.7315555555555555e-05, "loss": 0.9897, "step": 3020 }, { "epoch": 0.4, "learning_rate": 1.7306666666666668e-05, "loss": 1.0997, "step": 3030 }, { "epoch": 0.41, "learning_rate": 1.729777777777778e-05, "loss": 1.0362, "step": 3040 }, { "epoch": 0.41, "learning_rate": 1.728888888888889e-05, "loss": 1.0217, "step": 3050 }, { "epoch": 0.41, "learning_rate": 1.728e-05, "loss": 1.0983, "step": 3060 }, { "epoch": 0.41, "learning_rate": 1.727111111111111e-05, "loss": 1.0657, "step": 3070 }, { "epoch": 0.41, "learning_rate": 1.7262222222222223e-05, "loss": 1.0473, "step": 3080 }, { "epoch": 0.41, "learning_rate": 1.7253333333333336e-05, "loss": 1.0808, "step": 3090 }, { "epoch": 0.41, "learning_rate": 1.7244444444444446e-05, "loss": 1.077, "step": 3100 }, { "epoch": 0.41, "learning_rate": 1.7235555555555556e-05, "loss": 1.1284, "step": 3110 }, { "epoch": 0.42, "learning_rate": 1.7226666666666665e-05, "loss": 1.0361, "step": 3120 }, { "epoch": 0.42, "learning_rate": 1.721777777777778e-05, "loss": 1.071, "step": 3130 }, { "epoch": 0.42, "learning_rate": 1.720888888888889e-05, "loss": 1.0644, "step": 3140 }, { "epoch": 0.42, "learning_rate": 1.72e-05, "loss": 1.0926, "step": 3150 }, { "epoch": 0.42, "learning_rate": 1.719111111111111e-05, "loss": 1.1191, "step": 3160 }, { "epoch": 0.42, "learning_rate": 1.7182222222222224e-05, "loss": 1.0015, "step": 3170 }, { "epoch": 0.42, "learning_rate": 1.7173333333333334e-05, "loss": 1.019, "step": 3180 }, { "epoch": 0.43, "learning_rate": 1.7164444444444447e-05, "loss": 1.0915, "step": 3190 }, { "epoch": 0.43, "learning_rate": 1.7155555555555557e-05, "loss": 1.0894, "step": 3200 }, { "epoch": 0.43, "learning_rate": 1.7146666666666666e-05, "loss": 1.0791, "step": 3210 }, { "epoch": 0.43, "learning_rate": 1.713777777777778e-05, "loss": 1.0386, "step": 3220 }, { "epoch": 0.43, "learning_rate": 1.712888888888889e-05, "loss": 1.0754, "step": 3230 }, { "epoch": 0.43, "learning_rate": 1.7120000000000002e-05, "loss": 1.0645, "step": 3240 }, { "epoch": 0.43, "learning_rate": 1.7111111111111112e-05, "loss": 0.9963, "step": 3250 }, { "epoch": 0.43, "learning_rate": 1.7102222222222225e-05, "loss": 1.021, "step": 3260 }, { "epoch": 0.44, "learning_rate": 1.7093333333333335e-05, "loss": 0.9769, "step": 3270 }, { "epoch": 0.44, "learning_rate": 1.7084444444444444e-05, "loss": 1.0322, "step": 3280 }, { "epoch": 0.44, "learning_rate": 1.7075555555555557e-05, "loss": 1.1114, "step": 3290 }, { "epoch": 0.44, "learning_rate": 1.706666666666667e-05, "loss": 1.0605, "step": 3300 }, { "epoch": 0.44, "learning_rate": 1.705777777777778e-05, "loss": 1.0789, "step": 3310 }, { "epoch": 0.44, "learning_rate": 1.704888888888889e-05, "loss": 1.0943, "step": 3320 }, { "epoch": 0.44, "learning_rate": 1.704e-05, "loss": 1.0243, "step": 3330 }, { "epoch": 0.45, "learning_rate": 1.7031111111111113e-05, "loss": 1.0203, "step": 3340 }, { "epoch": 0.45, "learning_rate": 1.7022222222222226e-05, "loss": 1.0241, "step": 3350 }, { "epoch": 0.45, "learning_rate": 1.7013333333333335e-05, "loss": 1.0587, "step": 3360 }, { "epoch": 0.45, "learning_rate": 1.7004444444444445e-05, "loss": 0.9933, "step": 3370 }, { "epoch": 0.45, "learning_rate": 1.6995555555555555e-05, "loss": 1.0356, "step": 3380 }, { "epoch": 0.45, "learning_rate": 1.6986666666666668e-05, "loss": 1.0042, "step": 3390 }, { "epoch": 0.45, "learning_rate": 1.697777777777778e-05, "loss": 1.0549, "step": 3400 }, { "epoch": 0.45, "learning_rate": 1.696888888888889e-05, "loss": 1.0179, "step": 3410 }, { "epoch": 0.46, "learning_rate": 1.696e-05, "loss": 1.0327, "step": 3420 }, { "epoch": 0.46, "learning_rate": 1.695111111111111e-05, "loss": 1.039, "step": 3430 }, { "epoch": 0.46, "learning_rate": 1.6942222222222223e-05, "loss": 0.9851, "step": 3440 }, { "epoch": 0.46, "learning_rate": 1.6933333333333336e-05, "loss": 1.0751, "step": 3450 }, { "epoch": 0.46, "learning_rate": 1.6924444444444446e-05, "loss": 1.0844, "step": 3460 }, { "epoch": 0.46, "learning_rate": 1.6915555555555555e-05, "loss": 1.0434, "step": 3470 }, { "epoch": 0.46, "learning_rate": 1.690666666666667e-05, "loss": 1.0278, "step": 3480 }, { "epoch": 0.47, "learning_rate": 1.6897777777777778e-05, "loss": 0.9855, "step": 3490 }, { "epoch": 0.47, "learning_rate": 1.688888888888889e-05, "loss": 1.0691, "step": 3500 }, { "epoch": 0.47, "learning_rate": 1.688e-05, "loss": 1.0254, "step": 3510 }, { "epoch": 0.47, "learning_rate": 1.687111111111111e-05, "loss": 1.0265, "step": 3520 }, { "epoch": 0.47, "learning_rate": 1.6862222222222224e-05, "loss": 1.0081, "step": 3530 }, { "epoch": 0.47, "learning_rate": 1.6853333333333333e-05, "loss": 0.9902, "step": 3540 }, { "epoch": 0.47, "learning_rate": 1.6844444444444447e-05, "loss": 1.0496, "step": 3550 }, { "epoch": 0.47, "learning_rate": 1.6835555555555556e-05, "loss": 1.092, "step": 3560 }, { "epoch": 0.48, "learning_rate": 1.682666666666667e-05, "loss": 1.0257, "step": 3570 }, { "epoch": 0.48, "learning_rate": 1.681777777777778e-05, "loss": 0.9694, "step": 3580 }, { "epoch": 0.48, "learning_rate": 1.680888888888889e-05, "loss": 1.0551, "step": 3590 }, { "epoch": 0.48, "learning_rate": 1.6800000000000002e-05, "loss": 1.0301, "step": 3600 }, { "epoch": 0.48, "learning_rate": 1.6791111111111115e-05, "loss": 1.0309, "step": 3610 }, { "epoch": 0.48, "learning_rate": 1.6782222222222225e-05, "loss": 0.9837, "step": 3620 }, { "epoch": 0.48, "learning_rate": 1.6773333333333334e-05, "loss": 1.0675, "step": 3630 }, { "epoch": 0.49, "learning_rate": 1.6764444444444444e-05, "loss": 1.0518, "step": 3640 }, { "epoch": 0.49, "learning_rate": 1.6755555555555557e-05, "loss": 0.9905, "step": 3650 }, { "epoch": 0.49, "learning_rate": 1.674666666666667e-05, "loss": 0.972, "step": 3660 }, { "epoch": 0.49, "learning_rate": 1.673777777777778e-05, "loss": 0.9871, "step": 3670 }, { "epoch": 0.49, "learning_rate": 1.672888888888889e-05, "loss": 1.0209, "step": 3680 }, { "epoch": 0.49, "learning_rate": 1.672e-05, "loss": 1.0496, "step": 3690 }, { "epoch": 0.49, "learning_rate": 1.6711111111111112e-05, "loss": 1.0903, "step": 3700 }, { "epoch": 0.49, "learning_rate": 1.6702222222222225e-05, "loss": 1.0351, "step": 3710 }, { "epoch": 0.5, "learning_rate": 1.6693333333333335e-05, "loss": 1.0321, "step": 3720 }, { "epoch": 0.5, "learning_rate": 1.6684444444444445e-05, "loss": 1.0223, "step": 3730 }, { "epoch": 0.5, "learning_rate": 1.6675555555555554e-05, "loss": 1.0591, "step": 3740 }, { "epoch": 0.5, "learning_rate": 1.6666666666666667e-05, "loss": 1.005, "step": 3750 }, { "epoch": 0.5, "learning_rate": 1.665777777777778e-05, "loss": 1.0344, "step": 3760 }, { "epoch": 0.5, "learning_rate": 1.664888888888889e-05, "loss": 1.0726, "step": 3770 }, { "epoch": 0.5, "learning_rate": 1.664e-05, "loss": 1.0353, "step": 3780 }, { "epoch": 0.51, "learning_rate": 1.6631111111111113e-05, "loss": 1.0594, "step": 3790 }, { "epoch": 0.51, "learning_rate": 1.6622222222222223e-05, "loss": 1.0401, "step": 3800 }, { "epoch": 0.51, "learning_rate": 1.6613333333333336e-05, "loss": 0.987, "step": 3810 }, { "epoch": 0.51, "learning_rate": 1.6604444444444445e-05, "loss": 1.0044, "step": 3820 }, { "epoch": 0.51, "learning_rate": 1.6595555555555555e-05, "loss": 0.9659, "step": 3830 }, { "epoch": 0.51, "learning_rate": 1.6586666666666668e-05, "loss": 0.984, "step": 3840 }, { "epoch": 0.51, "learning_rate": 1.6577777777777778e-05, "loss": 0.9561, "step": 3850 }, { "epoch": 0.51, "learning_rate": 1.656888888888889e-05, "loss": 1.0197, "step": 3860 }, { "epoch": 0.52, "learning_rate": 1.656e-05, "loss": 1.0082, "step": 3870 }, { "epoch": 0.52, "learning_rate": 1.6551111111111114e-05, "loss": 0.9926, "step": 3880 }, { "epoch": 0.52, "learning_rate": 1.6542222222222223e-05, "loss": 1.1347, "step": 3890 }, { "epoch": 0.52, "learning_rate": 1.6533333333333333e-05, "loss": 0.9225, "step": 3900 }, { "epoch": 0.52, "learning_rate": 1.6524444444444446e-05, "loss": 1.0511, "step": 3910 }, { "epoch": 0.52, "learning_rate": 1.651555555555556e-05, "loss": 0.9633, "step": 3920 }, { "epoch": 0.52, "learning_rate": 1.650666666666667e-05, "loss": 1.0121, "step": 3930 }, { "epoch": 0.53, "learning_rate": 1.649777777777778e-05, "loss": 1.0054, "step": 3940 }, { "epoch": 0.53, "learning_rate": 1.648888888888889e-05, "loss": 0.9862, "step": 3950 }, { "epoch": 0.53, "learning_rate": 1.648e-05, "loss": 1.0617, "step": 3960 }, { "epoch": 0.53, "learning_rate": 1.6471111111111115e-05, "loss": 1.0535, "step": 3970 }, { "epoch": 0.53, "learning_rate": 1.6462222222222224e-05, "loss": 0.994, "step": 3980 }, { "epoch": 0.53, "learning_rate": 1.6453333333333334e-05, "loss": 1.0628, "step": 3990 }, { "epoch": 0.53, "learning_rate": 1.6444444444444444e-05, "loss": 0.9894, "step": 4000 }, { "epoch": 0.53, "learning_rate": 1.6435555555555557e-05, "loss": 1.0635, "step": 4010 }, { "epoch": 0.54, "learning_rate": 1.642666666666667e-05, "loss": 1.0418, "step": 4020 }, { "epoch": 0.54, "learning_rate": 1.641777777777778e-05, "loss": 0.9938, "step": 4030 }, { "epoch": 0.54, "learning_rate": 1.640888888888889e-05, "loss": 0.9737, "step": 4040 }, { "epoch": 0.54, "learning_rate": 1.64e-05, "loss": 0.9671, "step": 4050 }, { "epoch": 0.54, "learning_rate": 1.6391111111111112e-05, "loss": 0.9937, "step": 4060 }, { "epoch": 0.54, "learning_rate": 1.6382222222222225e-05, "loss": 0.9477, "step": 4070 }, { "epoch": 0.54, "learning_rate": 1.6373333333333335e-05, "loss": 1.1009, "step": 4080 }, { "epoch": 0.55, "learning_rate": 1.6364444444444444e-05, "loss": 0.9271, "step": 4090 }, { "epoch": 0.55, "learning_rate": 1.6355555555555557e-05, "loss": 0.9725, "step": 4100 }, { "epoch": 0.55, "learning_rate": 1.6346666666666667e-05, "loss": 1.018, "step": 4110 }, { "epoch": 0.55, "learning_rate": 1.633777777777778e-05, "loss": 1.0247, "step": 4120 }, { "epoch": 0.55, "learning_rate": 1.632888888888889e-05, "loss": 0.9936, "step": 4130 }, { "epoch": 0.55, "learning_rate": 1.632e-05, "loss": 0.9915, "step": 4140 }, { "epoch": 0.55, "learning_rate": 1.6311111111111113e-05, "loss": 0.9977, "step": 4150 }, { "epoch": 0.55, "learning_rate": 1.6302222222222222e-05, "loss": 0.9974, "step": 4160 }, { "epoch": 0.56, "learning_rate": 1.6293333333333335e-05, "loss": 0.9561, "step": 4170 }, { "epoch": 0.56, "learning_rate": 1.6284444444444445e-05, "loss": 0.9646, "step": 4180 }, { "epoch": 0.56, "learning_rate": 1.6275555555555558e-05, "loss": 1.0278, "step": 4190 }, { "epoch": 0.56, "learning_rate": 1.6266666666666668e-05, "loss": 1.051, "step": 4200 }, { "epoch": 0.56, "learning_rate": 1.6257777777777778e-05, "loss": 0.9725, "step": 4210 }, { "epoch": 0.56, "learning_rate": 1.624888888888889e-05, "loss": 0.9907, "step": 4220 }, { "epoch": 0.56, "learning_rate": 1.6240000000000004e-05, "loss": 1.0095, "step": 4230 }, { "epoch": 0.57, "learning_rate": 1.6231111111111113e-05, "loss": 1.0398, "step": 4240 }, { "epoch": 0.57, "learning_rate": 1.6222222222222223e-05, "loss": 0.9242, "step": 4250 }, { "epoch": 0.57, "learning_rate": 1.6213333333333333e-05, "loss": 0.971, "step": 4260 }, { "epoch": 0.57, "learning_rate": 1.6204444444444446e-05, "loss": 0.9973, "step": 4270 }, { "epoch": 0.57, "learning_rate": 1.619555555555556e-05, "loss": 0.952, "step": 4280 }, { "epoch": 0.57, "learning_rate": 1.618666666666667e-05, "loss": 0.9942, "step": 4290 }, { "epoch": 0.57, "learning_rate": 1.617777777777778e-05, "loss": 0.9642, "step": 4300 }, { "epoch": 0.57, "learning_rate": 1.6168888888888888e-05, "loss": 0.9615, "step": 4310 }, { "epoch": 0.58, "learning_rate": 1.616e-05, "loss": 0.9425, "step": 4320 }, { "epoch": 0.58, "learning_rate": 1.6151111111111114e-05, "loss": 0.9787, "step": 4330 }, { "epoch": 0.58, "learning_rate": 1.6142222222222224e-05, "loss": 0.9459, "step": 4340 }, { "epoch": 0.58, "learning_rate": 1.6133333333333334e-05, "loss": 0.9945, "step": 4350 }, { "epoch": 0.58, "learning_rate": 1.6124444444444443e-05, "loss": 1.0766, "step": 4360 }, { "epoch": 0.58, "learning_rate": 1.6115555555555556e-05, "loss": 0.9697, "step": 4370 }, { "epoch": 0.58, "learning_rate": 1.610666666666667e-05, "loss": 0.9975, "step": 4380 }, { "epoch": 0.59, "learning_rate": 1.609777777777778e-05, "loss": 1.0221, "step": 4390 }, { "epoch": 0.59, "learning_rate": 1.608888888888889e-05, "loss": 0.9796, "step": 4400 }, { "epoch": 0.59, "learning_rate": 1.6080000000000002e-05, "loss": 0.9725, "step": 4410 }, { "epoch": 0.59, "learning_rate": 1.607111111111111e-05, "loss": 0.9555, "step": 4420 }, { "epoch": 0.59, "learning_rate": 1.6062222222222225e-05, "loss": 1.0233, "step": 4430 }, { "epoch": 0.59, "learning_rate": 1.6053333333333334e-05, "loss": 0.9116, "step": 4440 }, { "epoch": 0.59, "learning_rate": 1.6044444444444444e-05, "loss": 0.9576, "step": 4450 }, { "epoch": 0.59, "learning_rate": 1.6035555555555557e-05, "loss": 1.0401, "step": 4460 }, { "epoch": 0.6, "learning_rate": 1.6026666666666667e-05, "loss": 0.9826, "step": 4470 }, { "epoch": 0.6, "learning_rate": 1.601777777777778e-05, "loss": 0.9551, "step": 4480 }, { "epoch": 0.6, "learning_rate": 1.600888888888889e-05, "loss": 0.9195, "step": 4490 }, { "epoch": 0.6, "learning_rate": 1.6000000000000003e-05, "loss": 0.936, "step": 4500 }, { "epoch": 0.6, "learning_rate": 1.5991111111111112e-05, "loss": 0.9904, "step": 4510 }, { "epoch": 0.6, "learning_rate": 1.5982222222222222e-05, "loss": 1.0146, "step": 4520 }, { "epoch": 0.6, "learning_rate": 1.5973333333333335e-05, "loss": 0.9256, "step": 4530 }, { "epoch": 0.61, "learning_rate": 1.5964444444444448e-05, "loss": 0.9898, "step": 4540 }, { "epoch": 0.61, "learning_rate": 1.5955555555555558e-05, "loss": 1.0003, "step": 4550 }, { "epoch": 0.61, "learning_rate": 1.5946666666666668e-05, "loss": 0.9904, "step": 4560 }, { "epoch": 0.61, "learning_rate": 1.5937777777777777e-05, "loss": 1.0455, "step": 4570 }, { "epoch": 0.61, "learning_rate": 1.592888888888889e-05, "loss": 1.0103, "step": 4580 }, { "epoch": 0.61, "learning_rate": 1.5920000000000003e-05, "loss": 0.9051, "step": 4590 }, { "epoch": 0.61, "learning_rate": 1.5911111111111113e-05, "loss": 0.9406, "step": 4600 }, { "epoch": 0.61, "learning_rate": 1.5902222222222223e-05, "loss": 0.9872, "step": 4610 }, { "epoch": 0.62, "learning_rate": 1.5893333333333333e-05, "loss": 0.9281, "step": 4620 }, { "epoch": 0.62, "learning_rate": 1.5884444444444446e-05, "loss": 0.9992, "step": 4630 }, { "epoch": 0.62, "learning_rate": 1.587555555555556e-05, "loss": 0.9596, "step": 4640 }, { "epoch": 0.62, "learning_rate": 1.586666666666667e-05, "loss": 0.9157, "step": 4650 }, { "epoch": 0.62, "learning_rate": 1.5857777777777778e-05, "loss": 0.9478, "step": 4660 }, { "epoch": 0.62, "learning_rate": 1.5848888888888888e-05, "loss": 0.903, "step": 4670 }, { "epoch": 0.62, "learning_rate": 1.584e-05, "loss": 0.9383, "step": 4680 }, { "epoch": 0.63, "learning_rate": 1.5831111111111114e-05, "loss": 0.9767, "step": 4690 }, { "epoch": 0.63, "learning_rate": 1.5822222222222224e-05, "loss": 0.9972, "step": 4700 }, { "epoch": 0.63, "learning_rate": 1.5813333333333333e-05, "loss": 0.9159, "step": 4710 }, { "epoch": 0.63, "learning_rate": 1.5804444444444446e-05, "loss": 0.9773, "step": 4720 }, { "epoch": 0.63, "learning_rate": 1.5795555555555556e-05, "loss": 0.907, "step": 4730 }, { "epoch": 0.63, "learning_rate": 1.578666666666667e-05, "loss": 0.9322, "step": 4740 }, { "epoch": 0.63, "learning_rate": 1.577777777777778e-05, "loss": 0.9645, "step": 4750 }, { "epoch": 0.63, "learning_rate": 1.576888888888889e-05, "loss": 0.9977, "step": 4760 }, { "epoch": 0.64, "learning_rate": 1.576e-05, "loss": 0.9669, "step": 4770 }, { "epoch": 0.64, "learning_rate": 1.575111111111111e-05, "loss": 0.9661, "step": 4780 }, { "epoch": 0.64, "learning_rate": 1.5742222222222224e-05, "loss": 1.0359, "step": 4790 }, { "epoch": 0.64, "learning_rate": 1.5733333333333334e-05, "loss": 0.9274, "step": 4800 }, { "epoch": 0.64, "learning_rate": 1.5724444444444447e-05, "loss": 0.9323, "step": 4810 }, { "epoch": 0.64, "learning_rate": 1.5715555555555557e-05, "loss": 0.9501, "step": 4820 }, { "epoch": 0.64, "learning_rate": 1.5706666666666666e-05, "loss": 1.0361, "step": 4830 }, { "epoch": 0.65, "learning_rate": 1.569777777777778e-05, "loss": 0.9884, "step": 4840 }, { "epoch": 0.65, "learning_rate": 1.5688888888888893e-05, "loss": 0.9659, "step": 4850 }, { "epoch": 0.65, "learning_rate": 1.5680000000000002e-05, "loss": 0.9623, "step": 4860 }, { "epoch": 0.65, "learning_rate": 1.5671111111111112e-05, "loss": 0.9322, "step": 4870 }, { "epoch": 0.65, "learning_rate": 1.5662222222222222e-05, "loss": 0.9616, "step": 4880 }, { "epoch": 0.65, "learning_rate": 1.5653333333333335e-05, "loss": 0.9586, "step": 4890 }, { "epoch": 0.65, "learning_rate": 1.5644444444444448e-05, "loss": 0.9288, "step": 4900 }, { "epoch": 0.65, "learning_rate": 1.5635555555555558e-05, "loss": 0.9597, "step": 4910 }, { "epoch": 0.66, "learning_rate": 1.5626666666666667e-05, "loss": 0.9326, "step": 4920 }, { "epoch": 0.66, "learning_rate": 1.5617777777777777e-05, "loss": 0.9742, "step": 4930 }, { "epoch": 0.66, "learning_rate": 1.560888888888889e-05, "loss": 0.9202, "step": 4940 }, { "epoch": 0.66, "learning_rate": 1.5600000000000003e-05, "loss": 0.9748, "step": 4950 }, { "epoch": 0.66, "learning_rate": 1.5591111111111113e-05, "loss": 1.0312, "step": 4960 }, { "epoch": 0.66, "learning_rate": 1.5582222222222222e-05, "loss": 0.8969, "step": 4970 }, { "epoch": 0.66, "learning_rate": 1.5573333333333332e-05, "loss": 0.9671, "step": 4980 }, { "epoch": 0.67, "learning_rate": 1.5564444444444445e-05, "loss": 0.9489, "step": 4990 }, { "epoch": 0.67, "learning_rate": 1.555555555555556e-05, "loss": 0.9591, "step": 5000 }, { "epoch": 0.67, "learning_rate": 1.5546666666666668e-05, "loss": 0.8998, "step": 5010 }, { "epoch": 0.67, "learning_rate": 1.5537777777777778e-05, "loss": 0.9953, "step": 5020 }, { "epoch": 0.67, "learning_rate": 1.552888888888889e-05, "loss": 0.8992, "step": 5030 }, { "epoch": 0.67, "learning_rate": 1.552e-05, "loss": 0.9277, "step": 5040 }, { "epoch": 0.67, "learning_rate": 1.5511111111111114e-05, "loss": 0.9116, "step": 5050 }, { "epoch": 0.67, "learning_rate": 1.5502222222222223e-05, "loss": 0.9418, "step": 5060 }, { "epoch": 0.68, "learning_rate": 1.5493333333333333e-05, "loss": 1.0156, "step": 5070 }, { "epoch": 0.68, "learning_rate": 1.5484444444444446e-05, "loss": 0.8966, "step": 5080 }, { "epoch": 0.68, "learning_rate": 1.5475555555555556e-05, "loss": 0.9672, "step": 5090 }, { "epoch": 0.68, "learning_rate": 1.546666666666667e-05, "loss": 0.9586, "step": 5100 }, { "epoch": 0.68, "learning_rate": 1.545777777777778e-05, "loss": 0.9494, "step": 5110 }, { "epoch": 0.68, "learning_rate": 1.544888888888889e-05, "loss": 1.0093, "step": 5120 }, { "epoch": 0.68, "learning_rate": 1.544e-05, "loss": 0.9218, "step": 5130 }, { "epoch": 0.69, "learning_rate": 1.543111111111111e-05, "loss": 0.9788, "step": 5140 }, { "epoch": 0.69, "learning_rate": 1.5422222222222224e-05, "loss": 1.0103, "step": 5150 }, { "epoch": 0.69, "learning_rate": 1.5413333333333337e-05, "loss": 0.9994, "step": 5160 }, { "epoch": 0.69, "learning_rate": 1.5404444444444447e-05, "loss": 1.0622, "step": 5170 }, { "epoch": 0.69, "learning_rate": 1.5395555555555556e-05, "loss": 0.9772, "step": 5180 }, { "epoch": 0.69, "learning_rate": 1.5386666666666666e-05, "loss": 1.0067, "step": 5190 }, { "epoch": 0.69, "learning_rate": 1.537777777777778e-05, "loss": 0.9603, "step": 5200 }, { "epoch": 0.69, "learning_rate": 1.5368888888888892e-05, "loss": 0.9853, "step": 5210 }, { "epoch": 0.7, "learning_rate": 1.5360000000000002e-05, "loss": 0.9613, "step": 5220 }, { "epoch": 0.7, "learning_rate": 1.5351111111111112e-05, "loss": 0.9236, "step": 5230 }, { "epoch": 0.7, "learning_rate": 1.534222222222222e-05, "loss": 0.9349, "step": 5240 }, { "epoch": 0.7, "learning_rate": 1.5333333333333334e-05, "loss": 0.9557, "step": 5250 }, { "epoch": 0.7, "learning_rate": 1.5324444444444448e-05, "loss": 0.9329, "step": 5260 }, { "epoch": 0.7, "learning_rate": 1.5315555555555557e-05, "loss": 0.9509, "step": 5270 }, { "epoch": 0.7, "learning_rate": 1.5306666666666667e-05, "loss": 0.8865, "step": 5280 }, { "epoch": 0.71, "learning_rate": 1.5297777777777777e-05, "loss": 0.991, "step": 5290 }, { "epoch": 0.71, "learning_rate": 1.528888888888889e-05, "loss": 0.8983, "step": 5300 }, { "epoch": 0.71, "learning_rate": 1.5280000000000003e-05, "loss": 0.9348, "step": 5310 }, { "epoch": 0.71, "learning_rate": 1.5271111111111112e-05, "loss": 0.9497, "step": 5320 }, { "epoch": 0.71, "learning_rate": 1.5262222222222222e-05, "loss": 0.9707, "step": 5330 }, { "epoch": 0.71, "learning_rate": 1.5253333333333335e-05, "loss": 0.9372, "step": 5340 }, { "epoch": 0.71, "learning_rate": 1.5244444444444447e-05, "loss": 0.8714, "step": 5350 }, { "epoch": 0.71, "learning_rate": 1.5235555555555556e-05, "loss": 0.9135, "step": 5360 }, { "epoch": 0.72, "learning_rate": 1.5226666666666668e-05, "loss": 0.9346, "step": 5370 }, { "epoch": 0.72, "learning_rate": 1.5217777777777777e-05, "loss": 0.965, "step": 5380 }, { "epoch": 0.72, "learning_rate": 1.520888888888889e-05, "loss": 0.8985, "step": 5390 }, { "epoch": 0.72, "learning_rate": 1.5200000000000002e-05, "loss": 0.8954, "step": 5400 }, { "epoch": 0.72, "learning_rate": 1.5191111111111112e-05, "loss": 0.8545, "step": 5410 }, { "epoch": 0.72, "learning_rate": 1.5182222222222223e-05, "loss": 0.9676, "step": 5420 }, { "epoch": 0.72, "learning_rate": 1.5173333333333336e-05, "loss": 0.9198, "step": 5430 }, { "epoch": 0.73, "learning_rate": 1.5164444444444446e-05, "loss": 0.9798, "step": 5440 }, { "epoch": 0.73, "learning_rate": 1.5155555555555557e-05, "loss": 0.9875, "step": 5450 }, { "epoch": 0.73, "learning_rate": 1.5146666666666667e-05, "loss": 0.9161, "step": 5460 }, { "epoch": 0.73, "learning_rate": 1.513777777777778e-05, "loss": 0.8654, "step": 5470 }, { "epoch": 0.73, "learning_rate": 1.5128888888888891e-05, "loss": 1.0018, "step": 5480 }, { "epoch": 0.73, "learning_rate": 1.5120000000000001e-05, "loss": 0.8823, "step": 5490 }, { "epoch": 0.73, "learning_rate": 1.5111111111111112e-05, "loss": 0.9539, "step": 5500 }, { "epoch": 0.73, "learning_rate": 1.5102222222222222e-05, "loss": 0.9626, "step": 5510 }, { "epoch": 0.74, "learning_rate": 1.5093333333333335e-05, "loss": 0.9232, "step": 5520 }, { "epoch": 0.74, "learning_rate": 1.5084444444444446e-05, "loss": 0.8346, "step": 5530 }, { "epoch": 0.74, "learning_rate": 1.5075555555555556e-05, "loss": 1.0124, "step": 5540 }, { "epoch": 0.74, "learning_rate": 1.5066666666666668e-05, "loss": 0.9096, "step": 5550 }, { "epoch": 0.74, "learning_rate": 1.505777777777778e-05, "loss": 0.9815, "step": 5560 }, { "epoch": 0.74, "learning_rate": 1.504888888888889e-05, "loss": 0.8835, "step": 5570 }, { "epoch": 0.74, "learning_rate": 1.5040000000000002e-05, "loss": 0.9604, "step": 5580 }, { "epoch": 0.75, "learning_rate": 1.5031111111111111e-05, "loss": 0.9602, "step": 5590 }, { "epoch": 0.75, "learning_rate": 1.5022222222222223e-05, "loss": 0.9536, "step": 5600 }, { "epoch": 0.75, "learning_rate": 1.5013333333333336e-05, "loss": 0.9988, "step": 5610 }, { "epoch": 0.75, "learning_rate": 1.5004444444444446e-05, "loss": 0.9481, "step": 5620 }, { "epoch": 0.75, "learning_rate": 1.4995555555555557e-05, "loss": 0.898, "step": 5630 }, { "epoch": 0.75, "learning_rate": 1.4986666666666667e-05, "loss": 0.8502, "step": 5640 }, { "epoch": 0.75, "learning_rate": 1.497777777777778e-05, "loss": 0.9179, "step": 5650 }, { "epoch": 0.75, "learning_rate": 1.4968888888888891e-05, "loss": 0.8196, "step": 5660 }, { "epoch": 0.76, "learning_rate": 1.496e-05, "loss": 0.9092, "step": 5670 }, { "epoch": 0.76, "learning_rate": 1.4951111111111112e-05, "loss": 0.8639, "step": 5680 }, { "epoch": 0.76, "learning_rate": 1.4942222222222222e-05, "loss": 0.9076, "step": 5690 }, { "epoch": 0.76, "learning_rate": 1.4933333333333335e-05, "loss": 0.8628, "step": 5700 }, { "epoch": 0.76, "learning_rate": 1.4924444444444446e-05, "loss": 0.9502, "step": 5710 }, { "epoch": 0.76, "learning_rate": 1.4915555555555556e-05, "loss": 1.0408, "step": 5720 }, { "epoch": 0.76, "learning_rate": 1.4906666666666667e-05, "loss": 0.855, "step": 5730 }, { "epoch": 0.77, "learning_rate": 1.489777777777778e-05, "loss": 0.9414, "step": 5740 }, { "epoch": 0.77, "learning_rate": 1.488888888888889e-05, "loss": 0.9403, "step": 5750 }, { "epoch": 0.77, "learning_rate": 1.4880000000000002e-05, "loss": 0.8758, "step": 5760 }, { "epoch": 0.77, "learning_rate": 1.4871111111111111e-05, "loss": 0.9094, "step": 5770 }, { "epoch": 0.77, "learning_rate": 1.4862222222222223e-05, "loss": 0.8756, "step": 5780 }, { "epoch": 0.77, "learning_rate": 1.4853333333333336e-05, "loss": 0.8516, "step": 5790 }, { "epoch": 0.77, "learning_rate": 1.4844444444444445e-05, "loss": 0.9689, "step": 5800 }, { "epoch": 0.77, "learning_rate": 1.4835555555555557e-05, "loss": 0.9742, "step": 5810 }, { "epoch": 0.78, "learning_rate": 1.4826666666666666e-05, "loss": 0.9089, "step": 5820 }, { "epoch": 0.78, "learning_rate": 1.481777777777778e-05, "loss": 0.9663, "step": 5830 }, { "epoch": 0.78, "learning_rate": 1.4808888888888891e-05, "loss": 0.8227, "step": 5840 }, { "epoch": 0.78, "learning_rate": 1.48e-05, "loss": 0.8964, "step": 5850 }, { "epoch": 0.78, "learning_rate": 1.4791111111111112e-05, "loss": 0.8939, "step": 5860 }, { "epoch": 0.78, "learning_rate": 1.4782222222222225e-05, "loss": 0.9705, "step": 5870 }, { "epoch": 0.78, "learning_rate": 1.4773333333333335e-05, "loss": 0.8779, "step": 5880 }, { "epoch": 0.79, "learning_rate": 1.4764444444444446e-05, "loss": 0.9388, "step": 5890 }, { "epoch": 0.79, "learning_rate": 1.4755555555555556e-05, "loss": 0.94, "step": 5900 }, { "epoch": 0.79, "learning_rate": 1.4746666666666667e-05, "loss": 0.8964, "step": 5910 }, { "epoch": 0.79, "learning_rate": 1.473777777777778e-05, "loss": 0.908, "step": 5920 }, { "epoch": 0.79, "learning_rate": 1.472888888888889e-05, "loss": 0.8959, "step": 5930 }, { "epoch": 0.79, "learning_rate": 1.4720000000000001e-05, "loss": 0.8989, "step": 5940 }, { "epoch": 0.79, "learning_rate": 1.4711111111111111e-05, "loss": 0.8716, "step": 5950 }, { "epoch": 0.79, "learning_rate": 1.4702222222222224e-05, "loss": 0.832, "step": 5960 }, { "epoch": 0.8, "learning_rate": 1.4693333333333336e-05, "loss": 0.8614, "step": 5970 }, { "epoch": 0.8, "learning_rate": 1.4684444444444445e-05, "loss": 0.892, "step": 5980 }, { "epoch": 0.8, "learning_rate": 1.4675555555555557e-05, "loss": 0.844, "step": 5990 }, { "epoch": 0.8, "learning_rate": 1.4666666666666666e-05, "loss": 0.9293, "step": 6000 }, { "epoch": 0.8, "learning_rate": 1.465777777777778e-05, "loss": 0.839, "step": 6010 }, { "epoch": 0.8, "learning_rate": 1.464888888888889e-05, "loss": 0.856, "step": 6020 }, { "epoch": 0.8, "learning_rate": 1.464e-05, "loss": 0.8213, "step": 6030 }, { "epoch": 0.81, "learning_rate": 1.4631111111111112e-05, "loss": 0.9093, "step": 6040 }, { "epoch": 0.81, "learning_rate": 1.4622222222222225e-05, "loss": 0.8882, "step": 6050 }, { "epoch": 0.81, "learning_rate": 1.4613333333333335e-05, "loss": 0.9232, "step": 6060 }, { "epoch": 0.81, "learning_rate": 1.4604444444444446e-05, "loss": 0.9056, "step": 6070 }, { "epoch": 0.81, "learning_rate": 1.4595555555555556e-05, "loss": 0.855, "step": 6080 }, { "epoch": 0.81, "learning_rate": 1.4586666666666667e-05, "loss": 0.9247, "step": 6090 }, { "epoch": 0.81, "learning_rate": 1.457777777777778e-05, "loss": 0.9013, "step": 6100 }, { "epoch": 0.81, "learning_rate": 1.456888888888889e-05, "loss": 0.8943, "step": 6110 }, { "epoch": 0.82, "learning_rate": 1.4560000000000001e-05, "loss": 0.8485, "step": 6120 }, { "epoch": 0.82, "learning_rate": 1.4551111111111111e-05, "loss": 0.8636, "step": 6130 }, { "epoch": 0.82, "learning_rate": 1.4542222222222224e-05, "loss": 0.9246, "step": 6140 }, { "epoch": 0.82, "learning_rate": 1.4533333333333335e-05, "loss": 0.9055, "step": 6150 }, { "epoch": 0.82, "learning_rate": 1.4524444444444445e-05, "loss": 0.8709, "step": 6160 }, { "epoch": 0.82, "learning_rate": 1.4515555555555556e-05, "loss": 0.9288, "step": 6170 }, { "epoch": 0.82, "learning_rate": 1.450666666666667e-05, "loss": 0.896, "step": 6180 }, { "epoch": 0.83, "learning_rate": 1.449777777777778e-05, "loss": 0.9383, "step": 6190 }, { "epoch": 0.83, "learning_rate": 1.448888888888889e-05, "loss": 0.8979, "step": 6200 }, { "epoch": 0.83, "learning_rate": 1.448e-05, "loss": 0.8791, "step": 6210 }, { "epoch": 0.83, "learning_rate": 1.4471111111111112e-05, "loss": 0.8206, "step": 6220 }, { "epoch": 0.83, "learning_rate": 1.4462222222222225e-05, "loss": 0.8948, "step": 6230 }, { "epoch": 0.83, "learning_rate": 1.4453333333333334e-05, "loss": 0.9486, "step": 6240 }, { "epoch": 0.83, "learning_rate": 1.4444444444444446e-05, "loss": 0.9089, "step": 6250 }, { "epoch": 0.83, "learning_rate": 1.4435555555555556e-05, "loss": 0.9173, "step": 6260 }, { "epoch": 0.84, "learning_rate": 1.4426666666666669e-05, "loss": 0.9715, "step": 6270 }, { "epoch": 0.84, "learning_rate": 1.441777777777778e-05, "loss": 0.8725, "step": 6280 }, { "epoch": 0.84, "learning_rate": 1.440888888888889e-05, "loss": 0.905, "step": 6290 }, { "epoch": 0.84, "learning_rate": 1.4400000000000001e-05, "loss": 0.8985, "step": 6300 }, { "epoch": 0.84, "learning_rate": 1.439111111111111e-05, "loss": 0.9293, "step": 6310 }, { "epoch": 0.84, "learning_rate": 1.4382222222222224e-05, "loss": 0.9075, "step": 6320 }, { "epoch": 0.84, "learning_rate": 1.4373333333333335e-05, "loss": 0.8554, "step": 6330 }, { "epoch": 0.85, "learning_rate": 1.4364444444444445e-05, "loss": 0.9562, "step": 6340 }, { "epoch": 0.85, "learning_rate": 1.4355555555555556e-05, "loss": 0.8829, "step": 6350 }, { "epoch": 0.85, "learning_rate": 1.434666666666667e-05, "loss": 0.926, "step": 6360 }, { "epoch": 0.85, "learning_rate": 1.4337777777777779e-05, "loss": 0.9242, "step": 6370 }, { "epoch": 0.85, "learning_rate": 1.432888888888889e-05, "loss": 0.9388, "step": 6380 }, { "epoch": 0.85, "learning_rate": 1.432e-05, "loss": 0.9885, "step": 6390 }, { "epoch": 0.85, "learning_rate": 1.4311111111111111e-05, "loss": 0.9199, "step": 6400 }, { "epoch": 0.85, "learning_rate": 1.4302222222222225e-05, "loss": 0.8749, "step": 6410 }, { "epoch": 0.86, "learning_rate": 1.4293333333333334e-05, "loss": 0.8322, "step": 6420 }, { "epoch": 0.86, "learning_rate": 1.4284444444444446e-05, "loss": 0.9338, "step": 6430 }, { "epoch": 0.86, "learning_rate": 1.4275555555555555e-05, "loss": 0.8999, "step": 6440 }, { "epoch": 0.86, "learning_rate": 1.4266666666666668e-05, "loss": 0.8232, "step": 6450 }, { "epoch": 0.86, "learning_rate": 1.425777777777778e-05, "loss": 0.8343, "step": 6460 }, { "epoch": 0.86, "learning_rate": 1.424888888888889e-05, "loss": 0.8645, "step": 6470 }, { "epoch": 0.86, "learning_rate": 1.4240000000000001e-05, "loss": 0.9297, "step": 6480 }, { "epoch": 0.87, "learning_rate": 1.4231111111111114e-05, "loss": 0.8599, "step": 6490 }, { "epoch": 0.87, "learning_rate": 1.4222222222222224e-05, "loss": 0.9592, "step": 6500 }, { "epoch": 0.87, "learning_rate": 1.4213333333333335e-05, "loss": 0.8855, "step": 6510 }, { "epoch": 0.87, "learning_rate": 1.4204444444444445e-05, "loss": 0.9007, "step": 6520 }, { "epoch": 0.87, "learning_rate": 1.4195555555555556e-05, "loss": 0.8942, "step": 6530 }, { "epoch": 0.87, "learning_rate": 1.418666666666667e-05, "loss": 0.8361, "step": 6540 }, { "epoch": 0.87, "learning_rate": 1.4177777777777779e-05, "loss": 0.8866, "step": 6550 }, { "epoch": 0.87, "learning_rate": 1.416888888888889e-05, "loss": 0.8302, "step": 6560 }, { "epoch": 0.88, "learning_rate": 1.416e-05, "loss": 0.8951, "step": 6570 }, { "epoch": 0.88, "learning_rate": 1.4151111111111113e-05, "loss": 0.8963, "step": 6580 }, { "epoch": 0.88, "learning_rate": 1.4142222222222224e-05, "loss": 0.874, "step": 6590 }, { "epoch": 0.88, "learning_rate": 1.4133333333333334e-05, "loss": 0.9074, "step": 6600 }, { "epoch": 0.88, "learning_rate": 1.4124444444444445e-05, "loss": 0.8651, "step": 6610 }, { "epoch": 0.88, "learning_rate": 1.4115555555555555e-05, "loss": 0.8497, "step": 6620 }, { "epoch": 0.88, "learning_rate": 1.4106666666666668e-05, "loss": 0.8779, "step": 6630 }, { "epoch": 0.89, "learning_rate": 1.409777777777778e-05, "loss": 0.9283, "step": 6640 }, { "epoch": 0.89, "learning_rate": 1.408888888888889e-05, "loss": 0.959, "step": 6650 }, { "epoch": 0.89, "learning_rate": 1.408e-05, "loss": 0.9327, "step": 6660 }, { "epoch": 0.89, "learning_rate": 1.4071111111111114e-05, "loss": 0.9255, "step": 6670 }, { "epoch": 0.89, "learning_rate": 1.4062222222222223e-05, "loss": 0.9078, "step": 6680 }, { "epoch": 0.89, "learning_rate": 1.4053333333333335e-05, "loss": 0.8844, "step": 6690 }, { "epoch": 0.89, "learning_rate": 1.4044444444444445e-05, "loss": 0.8804, "step": 6700 }, { "epoch": 0.89, "learning_rate": 1.4035555555555556e-05, "loss": 0.8899, "step": 6710 }, { "epoch": 0.9, "learning_rate": 1.4026666666666669e-05, "loss": 0.7667, "step": 6720 }, { "epoch": 0.9, "learning_rate": 1.4017777777777779e-05, "loss": 0.8689, "step": 6730 }, { "epoch": 0.9, "learning_rate": 1.400888888888889e-05, "loss": 0.8819, "step": 6740 }, { "epoch": 0.9, "learning_rate": 1.4e-05, "loss": 0.9365, "step": 6750 }, { "epoch": 0.9, "learning_rate": 1.3991111111111113e-05, "loss": 0.8558, "step": 6760 }, { "epoch": 0.9, "learning_rate": 1.3982222222222224e-05, "loss": 0.8704, "step": 6770 }, { "epoch": 0.9, "learning_rate": 1.3973333333333334e-05, "loss": 0.8887, "step": 6780 }, { "epoch": 0.91, "learning_rate": 1.3964444444444445e-05, "loss": 0.9008, "step": 6790 }, { "epoch": 0.91, "learning_rate": 1.3955555555555558e-05, "loss": 0.922, "step": 6800 }, { "epoch": 0.91, "learning_rate": 1.3946666666666668e-05, "loss": 0.9465, "step": 6810 }, { "epoch": 0.91, "learning_rate": 1.393777777777778e-05, "loss": 0.8523, "step": 6820 }, { "epoch": 0.91, "learning_rate": 1.392888888888889e-05, "loss": 0.8503, "step": 6830 }, { "epoch": 0.91, "learning_rate": 1.392e-05, "loss": 0.8935, "step": 6840 }, { "epoch": 0.91, "learning_rate": 1.3911111111111114e-05, "loss": 0.8598, "step": 6850 }, { "epoch": 0.91, "learning_rate": 1.3902222222222223e-05, "loss": 0.9195, "step": 6860 }, { "epoch": 0.92, "learning_rate": 1.3893333333333335e-05, "loss": 0.8922, "step": 6870 }, { "epoch": 0.92, "learning_rate": 1.3884444444444444e-05, "loss": 0.882, "step": 6880 }, { "epoch": 0.92, "learning_rate": 1.3875555555555557e-05, "loss": 0.8444, "step": 6890 }, { "epoch": 0.92, "learning_rate": 1.3866666666666669e-05, "loss": 0.9444, "step": 6900 }, { "epoch": 0.92, "learning_rate": 1.3857777777777779e-05, "loss": 0.8691, "step": 6910 }, { "epoch": 0.92, "learning_rate": 1.384888888888889e-05, "loss": 0.9562, "step": 6920 }, { "epoch": 0.92, "learning_rate": 1.384e-05, "loss": 0.9621, "step": 6930 }, { "epoch": 0.93, "learning_rate": 1.3831111111111113e-05, "loss": 0.9059, "step": 6940 }, { "epoch": 0.93, "learning_rate": 1.3822222222222224e-05, "loss": 0.8453, "step": 6950 }, { "epoch": 0.93, "learning_rate": 1.3813333333333334e-05, "loss": 0.907, "step": 6960 }, { "epoch": 0.93, "learning_rate": 1.3804444444444445e-05, "loss": 0.8586, "step": 6970 }, { "epoch": 0.93, "learning_rate": 1.3795555555555558e-05, "loss": 0.8168, "step": 6980 }, { "epoch": 0.93, "learning_rate": 1.3786666666666668e-05, "loss": 0.8572, "step": 6990 }, { "epoch": 0.93, "learning_rate": 1.377777777777778e-05, "loss": 0.8161, "step": 7000 }, { "epoch": 0.93, "learning_rate": 1.3768888888888889e-05, "loss": 0.869, "step": 7010 }, { "epoch": 0.94, "learning_rate": 1.376e-05, "loss": 0.8421, "step": 7020 }, { "epoch": 0.94, "learning_rate": 1.3751111111111113e-05, "loss": 0.8431, "step": 7030 }, { "epoch": 0.94, "learning_rate": 1.3742222222222223e-05, "loss": 0.8565, "step": 7040 }, { "epoch": 0.94, "learning_rate": 1.3733333333333335e-05, "loss": 0.8588, "step": 7050 }, { "epoch": 0.94, "learning_rate": 1.3724444444444444e-05, "loss": 0.9456, "step": 7060 }, { "epoch": 0.94, "learning_rate": 1.3715555555555557e-05, "loss": 0.9185, "step": 7070 }, { "epoch": 0.94, "learning_rate": 1.3706666666666669e-05, "loss": 0.8514, "step": 7080 }, { "epoch": 0.95, "learning_rate": 1.3697777777777778e-05, "loss": 0.8622, "step": 7090 }, { "epoch": 0.95, "learning_rate": 1.368888888888889e-05, "loss": 0.8476, "step": 7100 }, { "epoch": 0.95, "learning_rate": 1.3680000000000003e-05, "loss": 0.8, "step": 7110 }, { "epoch": 0.95, "learning_rate": 1.3671111111111113e-05, "loss": 0.7997, "step": 7120 }, { "epoch": 0.95, "learning_rate": 1.3662222222222224e-05, "loss": 0.8403, "step": 7130 }, { "epoch": 0.95, "learning_rate": 1.3653333333333334e-05, "loss": 0.83, "step": 7140 }, { "epoch": 0.95, "learning_rate": 1.3644444444444445e-05, "loss": 0.817, "step": 7150 }, { "epoch": 0.95, "learning_rate": 1.3635555555555558e-05, "loss": 0.8979, "step": 7160 }, { "epoch": 0.96, "learning_rate": 1.3626666666666668e-05, "loss": 0.873, "step": 7170 }, { "epoch": 0.96, "learning_rate": 1.361777777777778e-05, "loss": 0.8684, "step": 7180 }, { "epoch": 0.96, "learning_rate": 1.3608888888888889e-05, "loss": 0.8954, "step": 7190 }, { "epoch": 0.96, "learning_rate": 1.3600000000000002e-05, "loss": 0.8569, "step": 7200 }, { "epoch": 0.96, "learning_rate": 1.3591111111111113e-05, "loss": 0.8912, "step": 7210 }, { "epoch": 0.96, "learning_rate": 1.3582222222222223e-05, "loss": 0.8192, "step": 7220 }, { "epoch": 0.96, "learning_rate": 1.3573333333333334e-05, "loss": 0.8306, "step": 7230 }, { "epoch": 0.97, "learning_rate": 1.3564444444444444e-05, "loss": 0.9216, "step": 7240 }, { "epoch": 0.97, "learning_rate": 1.3555555555555557e-05, "loss": 0.866, "step": 7250 }, { "epoch": 0.97, "learning_rate": 1.3546666666666669e-05, "loss": 0.8389, "step": 7260 }, { "epoch": 0.97, "learning_rate": 1.3537777777777778e-05, "loss": 0.8611, "step": 7270 }, { "epoch": 0.97, "learning_rate": 1.352888888888889e-05, "loss": 0.8438, "step": 7280 }, { "epoch": 0.97, "learning_rate": 1.3520000000000003e-05, "loss": 0.8522, "step": 7290 }, { "epoch": 0.97, "learning_rate": 1.3511111111111112e-05, "loss": 0.759, "step": 7300 }, { "epoch": 0.97, "learning_rate": 1.3502222222222224e-05, "loss": 0.8135, "step": 7310 }, { "epoch": 0.98, "learning_rate": 1.3493333333333333e-05, "loss": 0.8795, "step": 7320 }, { "epoch": 0.98, "learning_rate": 1.3484444444444445e-05, "loss": 0.8669, "step": 7330 }, { "epoch": 0.98, "learning_rate": 1.3475555555555558e-05, "loss": 0.9327, "step": 7340 }, { "epoch": 0.98, "learning_rate": 1.3466666666666668e-05, "loss": 0.9265, "step": 7350 }, { "epoch": 0.98, "learning_rate": 1.3457777777777779e-05, "loss": 0.876, "step": 7360 }, { "epoch": 0.98, "learning_rate": 1.3448888888888889e-05, "loss": 0.8412, "step": 7370 }, { "epoch": 0.98, "learning_rate": 1.3440000000000002e-05, "loss": 0.8775, "step": 7380 }, { "epoch": 0.99, "learning_rate": 1.3431111111111113e-05, "loss": 0.8599, "step": 7390 }, { "epoch": 0.99, "learning_rate": 1.3422222222222223e-05, "loss": 0.9101, "step": 7400 }, { "epoch": 0.99, "learning_rate": 1.3413333333333334e-05, "loss": 0.8497, "step": 7410 }, { "epoch": 0.99, "learning_rate": 1.3404444444444447e-05, "loss": 0.8807, "step": 7420 }, { "epoch": 0.99, "learning_rate": 1.3395555555555557e-05, "loss": 0.8847, "step": 7430 }, { "epoch": 0.99, "learning_rate": 1.3386666666666668e-05, "loss": 0.8731, "step": 7440 }, { "epoch": 0.99, "learning_rate": 1.3377777777777778e-05, "loss": 0.85, "step": 7450 }, { "epoch": 0.99, "learning_rate": 1.336888888888889e-05, "loss": 0.836, "step": 7460 }, { "epoch": 1.0, "learning_rate": 1.3360000000000003e-05, "loss": 0.8322, "step": 7470 }, { "epoch": 1.0, "learning_rate": 1.3351111111111112e-05, "loss": 0.8306, "step": 7480 }, { "epoch": 1.0, "learning_rate": 1.3342222222222224e-05, "loss": 0.8685, "step": 7490 }, { "epoch": 1.0, "learning_rate": 1.3333333333333333e-05, "loss": 0.8619, "step": 7500 }, { "epoch": 1.0, "learning_rate": 1.3324444444444446e-05, "loss": 0.7652, "step": 7510 }, { "epoch": 1.0, "learning_rate": 1.3315555555555558e-05, "loss": 0.8104, "step": 7520 }, { "epoch": 1.0, "learning_rate": 1.3306666666666667e-05, "loss": 0.7767, "step": 7530 }, { "epoch": 1.01, "learning_rate": 1.3297777777777779e-05, "loss": 0.8019, "step": 7540 }, { "epoch": 1.01, "learning_rate": 1.3288888888888889e-05, "loss": 0.8629, "step": 7550 }, { "epoch": 1.01, "learning_rate": 1.3280000000000002e-05, "loss": 0.7541, "step": 7560 }, { "epoch": 1.01, "learning_rate": 1.3271111111111113e-05, "loss": 0.7929, "step": 7570 }, { "epoch": 1.01, "learning_rate": 1.3262222222222223e-05, "loss": 0.8354, "step": 7580 }, { "epoch": 1.01, "learning_rate": 1.3253333333333334e-05, "loss": 0.8083, "step": 7590 }, { "epoch": 1.01, "learning_rate": 1.3244444444444447e-05, "loss": 0.7403, "step": 7600 }, { "epoch": 1.01, "learning_rate": 1.3235555555555557e-05, "loss": 0.7754, "step": 7610 }, { "epoch": 1.02, "learning_rate": 1.3226666666666668e-05, "loss": 0.8386, "step": 7620 }, { "epoch": 1.02, "learning_rate": 1.3217777777777778e-05, "loss": 0.7527, "step": 7630 }, { "epoch": 1.02, "learning_rate": 1.320888888888889e-05, "loss": 0.7943, "step": 7640 }, { "epoch": 1.02, "learning_rate": 1.3200000000000002e-05, "loss": 0.8167, "step": 7650 }, { "epoch": 1.02, "learning_rate": 1.3191111111111112e-05, "loss": 0.8685, "step": 7660 }, { "epoch": 1.02, "learning_rate": 1.3182222222222223e-05, "loss": 0.8758, "step": 7670 }, { "epoch": 1.02, "learning_rate": 1.3173333333333333e-05, "loss": 0.8335, "step": 7680 }, { "epoch": 1.03, "learning_rate": 1.3164444444444446e-05, "loss": 0.8248, "step": 7690 }, { "epoch": 1.03, "learning_rate": 1.3155555555555558e-05, "loss": 0.8254, "step": 7700 }, { "epoch": 1.03, "learning_rate": 1.3146666666666667e-05, "loss": 0.8562, "step": 7710 }, { "epoch": 1.03, "learning_rate": 1.3137777777777779e-05, "loss": 0.8274, "step": 7720 }, { "epoch": 1.03, "learning_rate": 1.3128888888888892e-05, "loss": 0.7922, "step": 7730 }, { "epoch": 1.03, "learning_rate": 1.3120000000000001e-05, "loss": 0.7828, "step": 7740 }, { "epoch": 1.03, "learning_rate": 1.3111111111111113e-05, "loss": 0.8065, "step": 7750 }, { "epoch": 1.03, "learning_rate": 1.3102222222222223e-05, "loss": 0.8148, "step": 7760 }, { "epoch": 1.04, "learning_rate": 1.3093333333333334e-05, "loss": 0.7478, "step": 7770 }, { "epoch": 1.04, "learning_rate": 1.3084444444444447e-05, "loss": 0.776, "step": 7780 }, { "epoch": 1.04, "learning_rate": 1.3075555555555557e-05, "loss": 0.8066, "step": 7790 }, { "epoch": 1.04, "learning_rate": 1.3066666666666668e-05, "loss": 0.7365, "step": 7800 }, { "epoch": 1.04, "learning_rate": 1.3057777777777778e-05, "loss": 0.8159, "step": 7810 }, { "epoch": 1.04, "learning_rate": 1.304888888888889e-05, "loss": 0.8735, "step": 7820 }, { "epoch": 1.04, "learning_rate": 1.3040000000000002e-05, "loss": 0.7734, "step": 7830 }, { "epoch": 1.05, "learning_rate": 1.3031111111111112e-05, "loss": 0.877, "step": 7840 }, { "epoch": 1.05, "learning_rate": 1.3022222222222223e-05, "loss": 0.8253, "step": 7850 }, { "epoch": 1.05, "learning_rate": 1.3013333333333333e-05, "loss": 0.7839, "step": 7860 }, { "epoch": 1.05, "learning_rate": 1.3004444444444446e-05, "loss": 0.7516, "step": 7870 }, { "epoch": 1.05, "learning_rate": 1.2995555555555557e-05, "loss": 0.8081, "step": 7880 }, { "epoch": 1.05, "learning_rate": 1.2986666666666667e-05, "loss": 0.8645, "step": 7890 }, { "epoch": 1.05, "learning_rate": 1.2977777777777779e-05, "loss": 0.8403, "step": 7900 }, { "epoch": 1.05, "learning_rate": 1.2968888888888892e-05, "loss": 0.904, "step": 7910 }, { "epoch": 1.06, "learning_rate": 1.2960000000000001e-05, "loss": 0.7961, "step": 7920 }, { "epoch": 1.06, "learning_rate": 1.2951111111111113e-05, "loss": 0.7917, "step": 7930 }, { "epoch": 1.06, "learning_rate": 1.2942222222222222e-05, "loss": 0.8858, "step": 7940 }, { "epoch": 1.06, "learning_rate": 1.2933333333333334e-05, "loss": 0.81, "step": 7950 }, { "epoch": 1.06, "learning_rate": 1.2924444444444447e-05, "loss": 0.7789, "step": 7960 }, { "epoch": 1.06, "learning_rate": 1.2915555555555557e-05, "loss": 0.814, "step": 7970 }, { "epoch": 1.06, "learning_rate": 1.2906666666666668e-05, "loss": 0.8832, "step": 7980 }, { "epoch": 1.07, "learning_rate": 1.2897777777777778e-05, "loss": 0.7147, "step": 7990 }, { "epoch": 1.07, "learning_rate": 1.288888888888889e-05, "loss": 0.8188, "step": 8000 }, { "epoch": 1.07, "learning_rate": 1.2880000000000002e-05, "loss": 0.7606, "step": 8010 }, { "epoch": 1.07, "learning_rate": 1.2871111111111112e-05, "loss": 0.8368, "step": 8020 }, { "epoch": 1.07, "learning_rate": 1.2862222222222223e-05, "loss": 0.8728, "step": 8030 }, { "epoch": 1.07, "learning_rate": 1.2853333333333336e-05, "loss": 0.8504, "step": 8040 }, { "epoch": 1.07, "learning_rate": 1.2844444444444446e-05, "loss": 0.7784, "step": 8050 }, { "epoch": 1.07, "learning_rate": 1.2835555555555557e-05, "loss": 0.7533, "step": 8060 }, { "epoch": 1.08, "learning_rate": 1.2826666666666667e-05, "loss": 0.7817, "step": 8070 }, { "epoch": 1.08, "learning_rate": 1.2817777777777778e-05, "loss": 0.7948, "step": 8080 }, { "epoch": 1.08, "learning_rate": 1.2808888888888891e-05, "loss": 0.8063, "step": 8090 }, { "epoch": 1.08, "learning_rate": 1.2800000000000001e-05, "loss": 0.7936, "step": 8100 }, { "epoch": 1.08, "learning_rate": 1.2791111111111112e-05, "loss": 0.8074, "step": 8110 }, { "epoch": 1.08, "learning_rate": 1.2782222222222222e-05, "loss": 0.8113, "step": 8120 }, { "epoch": 1.08, "learning_rate": 1.2773333333333335e-05, "loss": 0.7884, "step": 8130 }, { "epoch": 1.09, "learning_rate": 1.2764444444444447e-05, "loss": 0.8021, "step": 8140 }, { "epoch": 1.09, "learning_rate": 1.2755555555555556e-05, "loss": 0.7884, "step": 8150 }, { "epoch": 1.09, "learning_rate": 1.2746666666666668e-05, "loss": 0.8074, "step": 8160 }, { "epoch": 1.09, "learning_rate": 1.2737777777777777e-05, "loss": 0.8423, "step": 8170 }, { "epoch": 1.09, "learning_rate": 1.272888888888889e-05, "loss": 0.8459, "step": 8180 }, { "epoch": 1.09, "learning_rate": 1.2720000000000002e-05, "loss": 0.7375, "step": 8190 }, { "epoch": 1.09, "learning_rate": 1.2711111111111112e-05, "loss": 0.8253, "step": 8200 }, { "epoch": 1.09, "learning_rate": 1.2702222222222223e-05, "loss": 0.7408, "step": 8210 }, { "epoch": 1.1, "learning_rate": 1.2693333333333336e-05, "loss": 0.8176, "step": 8220 }, { "epoch": 1.1, "learning_rate": 1.2684444444444446e-05, "loss": 0.8467, "step": 8230 }, { "epoch": 1.1, "learning_rate": 1.2675555555555557e-05, "loss": 0.7997, "step": 8240 }, { "epoch": 1.1, "learning_rate": 1.2666666666666667e-05, "loss": 0.8435, "step": 8250 }, { "epoch": 1.1, "learning_rate": 1.2657777777777778e-05, "loss": 0.6854, "step": 8260 }, { "epoch": 1.1, "learning_rate": 1.2648888888888891e-05, "loss": 0.8151, "step": 8270 }, { "epoch": 1.1, "learning_rate": 1.2640000000000001e-05, "loss": 0.8691, "step": 8280 }, { "epoch": 1.11, "learning_rate": 1.2631111111111112e-05, "loss": 0.7453, "step": 8290 }, { "epoch": 1.11, "learning_rate": 1.2622222222222222e-05, "loss": 0.8242, "step": 8300 }, { "epoch": 1.11, "learning_rate": 1.2613333333333335e-05, "loss": 0.7078, "step": 8310 }, { "epoch": 1.11, "learning_rate": 1.2604444444444446e-05, "loss": 0.7312, "step": 8320 }, { "epoch": 1.11, "learning_rate": 1.2595555555555556e-05, "loss": 0.7741, "step": 8330 }, { "epoch": 1.11, "learning_rate": 1.2586666666666668e-05, "loss": 0.7878, "step": 8340 }, { "epoch": 1.11, "learning_rate": 1.257777777777778e-05, "loss": 0.7667, "step": 8350 }, { "epoch": 1.11, "learning_rate": 1.256888888888889e-05, "loss": 0.8244, "step": 8360 }, { "epoch": 1.12, "learning_rate": 1.2560000000000002e-05, "loss": 0.8027, "step": 8370 }, { "epoch": 1.12, "learning_rate": 1.2551111111111111e-05, "loss": 0.7907, "step": 8380 }, { "epoch": 1.12, "learning_rate": 1.2542222222222223e-05, "loss": 0.77, "step": 8390 }, { "epoch": 1.12, "learning_rate": 1.2533333333333336e-05, "loss": 0.7589, "step": 8400 }, { "epoch": 1.12, "learning_rate": 1.2524444444444446e-05, "loss": 0.7713, "step": 8410 }, { "epoch": 1.12, "learning_rate": 1.2515555555555557e-05, "loss": 0.8287, "step": 8420 }, { "epoch": 1.12, "learning_rate": 1.2506666666666667e-05, "loss": 0.861, "step": 8430 }, { "epoch": 1.13, "learning_rate": 1.249777777777778e-05, "loss": 0.8335, "step": 8440 }, { "epoch": 1.13, "learning_rate": 1.2488888888888891e-05, "loss": 0.8153, "step": 8450 }, { "epoch": 1.13, "learning_rate": 1.248e-05, "loss": 0.8117, "step": 8460 }, { "epoch": 1.13, "learning_rate": 1.2471111111111112e-05, "loss": 0.7395, "step": 8470 }, { "epoch": 1.13, "learning_rate": 1.2462222222222222e-05, "loss": 0.7969, "step": 8480 }, { "epoch": 1.13, "learning_rate": 1.2453333333333335e-05, "loss": 0.7664, "step": 8490 }, { "epoch": 1.13, "learning_rate": 1.2444444444444446e-05, "loss": 0.788, "step": 8500 }, { "epoch": 1.13, "learning_rate": 1.2435555555555556e-05, "loss": 0.7665, "step": 8510 }, { "epoch": 1.14, "learning_rate": 1.2426666666666667e-05, "loss": 0.7799, "step": 8520 }, { "epoch": 1.14, "learning_rate": 1.241777777777778e-05, "loss": 0.7451, "step": 8530 }, { "epoch": 1.14, "learning_rate": 1.240888888888889e-05, "loss": 0.7908, "step": 8540 }, { "epoch": 1.14, "learning_rate": 1.2400000000000002e-05, "loss": 0.7834, "step": 8550 }, { "epoch": 1.14, "learning_rate": 1.2391111111111111e-05, "loss": 0.8276, "step": 8560 }, { "epoch": 1.14, "learning_rate": 1.2382222222222223e-05, "loss": 0.7491, "step": 8570 }, { "epoch": 1.14, "learning_rate": 1.2373333333333336e-05, "loss": 0.8073, "step": 8580 }, { "epoch": 1.15, "learning_rate": 1.2364444444444445e-05, "loss": 0.7062, "step": 8590 }, { "epoch": 1.15, "learning_rate": 1.2355555555555557e-05, "loss": 0.8316, "step": 8600 }, { "epoch": 1.15, "learning_rate": 1.2346666666666666e-05, "loss": 0.8742, "step": 8610 }, { "epoch": 1.15, "learning_rate": 1.233777777777778e-05, "loss": 0.8407, "step": 8620 }, { "epoch": 1.15, "learning_rate": 1.2328888888888891e-05, "loss": 0.8387, "step": 8630 }, { "epoch": 1.15, "learning_rate": 1.232e-05, "loss": 0.7871, "step": 8640 }, { "epoch": 1.15, "learning_rate": 1.2311111111111112e-05, "loss": 0.8154, "step": 8650 }, { "epoch": 1.15, "learning_rate": 1.2302222222222225e-05, "loss": 0.8002, "step": 8660 }, { "epoch": 1.16, "learning_rate": 1.2293333333333335e-05, "loss": 0.7993, "step": 8670 }, { "epoch": 1.16, "learning_rate": 1.2284444444444446e-05, "loss": 0.7214, "step": 8680 }, { "epoch": 1.16, "learning_rate": 1.2275555555555556e-05, "loss": 0.7596, "step": 8690 }, { "epoch": 1.16, "learning_rate": 1.2266666666666667e-05, "loss": 0.7683, "step": 8700 }, { "epoch": 1.16, "learning_rate": 1.225777777777778e-05, "loss": 0.7787, "step": 8710 }, { "epoch": 1.16, "learning_rate": 1.224888888888889e-05, "loss": 0.8293, "step": 8720 }, { "epoch": 1.16, "learning_rate": 1.2240000000000001e-05, "loss": 0.8355, "step": 8730 }, { "epoch": 1.17, "learning_rate": 1.2231111111111111e-05, "loss": 0.8168, "step": 8740 }, { "epoch": 1.17, "learning_rate": 1.2222222222222224e-05, "loss": 0.8321, "step": 8750 }, { "epoch": 1.17, "learning_rate": 1.2213333333333336e-05, "loss": 0.7942, "step": 8760 }, { "epoch": 1.17, "learning_rate": 1.2204444444444445e-05, "loss": 0.7354, "step": 8770 }, { "epoch": 1.17, "learning_rate": 1.2195555555555557e-05, "loss": 0.7821, "step": 8780 }, { "epoch": 1.17, "learning_rate": 1.2186666666666666e-05, "loss": 0.8045, "step": 8790 }, { "epoch": 1.17, "learning_rate": 1.217777777777778e-05, "loss": 0.7925, "step": 8800 }, { "epoch": 1.17, "learning_rate": 1.216888888888889e-05, "loss": 0.7675, "step": 8810 }, { "epoch": 1.18, "learning_rate": 1.216e-05, "loss": 0.8388, "step": 8820 }, { "epoch": 1.18, "learning_rate": 1.2151111111111112e-05, "loss": 0.7763, "step": 8830 }, { "epoch": 1.18, "learning_rate": 1.2142222222222225e-05, "loss": 0.725, "step": 8840 }, { "epoch": 1.18, "learning_rate": 1.2133333333333335e-05, "loss": 0.7409, "step": 8850 }, { "epoch": 1.18, "learning_rate": 1.2124444444444446e-05, "loss": 0.7972, "step": 8860 }, { "epoch": 1.18, "learning_rate": 1.2115555555555556e-05, "loss": 0.7559, "step": 8870 }, { "epoch": 1.18, "learning_rate": 1.2106666666666667e-05, "loss": 0.7916, "step": 8880 }, { "epoch": 1.19, "learning_rate": 1.209777777777778e-05, "loss": 0.8488, "step": 8890 }, { "epoch": 1.19, "learning_rate": 1.208888888888889e-05, "loss": 0.7573, "step": 8900 }, { "epoch": 1.19, "learning_rate": 1.2080000000000001e-05, "loss": 0.8603, "step": 8910 }, { "epoch": 1.19, "learning_rate": 1.2071111111111111e-05, "loss": 0.7329, "step": 8920 }, { "epoch": 1.19, "learning_rate": 1.2062222222222224e-05, "loss": 0.7988, "step": 8930 }, { "epoch": 1.19, "learning_rate": 1.2053333333333335e-05, "loss": 0.6915, "step": 8940 }, { "epoch": 1.19, "learning_rate": 1.2044444444444445e-05, "loss": 0.7726, "step": 8950 }, { "epoch": 1.19, "learning_rate": 1.2035555555555556e-05, "loss": 0.7653, "step": 8960 }, { "epoch": 1.2, "learning_rate": 1.202666666666667e-05, "loss": 0.7435, "step": 8970 }, { "epoch": 1.2, "learning_rate": 1.201777777777778e-05, "loss": 0.7668, "step": 8980 }, { "epoch": 1.2, "learning_rate": 1.200888888888889e-05, "loss": 0.7759, "step": 8990 }, { "epoch": 1.2, "learning_rate": 1.2e-05, "loss": 0.7711, "step": 9000 }, { "epoch": 1.2, "learning_rate": 1.1991111111111112e-05, "loss": 0.7968, "step": 9010 }, { "epoch": 1.2, "learning_rate": 1.1982222222222225e-05, "loss": 0.7736, "step": 9020 }, { "epoch": 1.2, "learning_rate": 1.1973333333333334e-05, "loss": 0.7461, "step": 9030 }, { "epoch": 1.21, "learning_rate": 1.1964444444444446e-05, "loss": 0.7298, "step": 9040 }, { "epoch": 1.21, "learning_rate": 1.1955555555555556e-05, "loss": 0.7281, "step": 9050 }, { "epoch": 1.21, "learning_rate": 1.1946666666666669e-05, "loss": 0.7728, "step": 9060 }, { "epoch": 1.21, "learning_rate": 1.193777777777778e-05, "loss": 0.8506, "step": 9070 }, { "epoch": 1.21, "learning_rate": 1.192888888888889e-05, "loss": 0.8115, "step": 9080 }, { "epoch": 1.21, "learning_rate": 1.1920000000000001e-05, "loss": 0.7715, "step": 9090 }, { "epoch": 1.21, "learning_rate": 1.191111111111111e-05, "loss": 0.8234, "step": 9100 }, { "epoch": 1.21, "learning_rate": 1.1902222222222224e-05, "loss": 0.7911, "step": 9110 }, { "epoch": 1.22, "learning_rate": 1.1893333333333335e-05, "loss": 0.7786, "step": 9120 }, { "epoch": 1.22, "learning_rate": 1.1884444444444445e-05, "loss": 0.846, "step": 9130 }, { "epoch": 1.22, "learning_rate": 1.1875555555555556e-05, "loss": 0.7619, "step": 9140 }, { "epoch": 1.22, "learning_rate": 1.186666666666667e-05, "loss": 0.7318, "step": 9150 }, { "epoch": 1.22, "learning_rate": 1.1857777777777779e-05, "loss": 0.8328, "step": 9160 }, { "epoch": 1.22, "learning_rate": 1.184888888888889e-05, "loss": 0.7974, "step": 9170 }, { "epoch": 1.22, "learning_rate": 1.184e-05, "loss": 0.8137, "step": 9180 }, { "epoch": 1.23, "learning_rate": 1.1831111111111112e-05, "loss": 0.8265, "step": 9190 }, { "epoch": 1.23, "learning_rate": 1.1822222222222225e-05, "loss": 0.7497, "step": 9200 }, { "epoch": 1.23, "learning_rate": 1.1813333333333334e-05, "loss": 0.7859, "step": 9210 }, { "epoch": 1.23, "learning_rate": 1.1804444444444446e-05, "loss": 0.7464, "step": 9220 }, { "epoch": 1.23, "learning_rate": 1.1795555555555555e-05, "loss": 0.7343, "step": 9230 }, { "epoch": 1.23, "learning_rate": 1.1786666666666668e-05, "loss": 0.744, "step": 9240 }, { "epoch": 1.23, "learning_rate": 1.177777777777778e-05, "loss": 0.914, "step": 9250 }, { "epoch": 1.23, "learning_rate": 1.176888888888889e-05, "loss": 0.8342, "step": 9260 }, { "epoch": 1.24, "learning_rate": 1.1760000000000001e-05, "loss": 0.7114, "step": 9270 }, { "epoch": 1.24, "learning_rate": 1.1751111111111112e-05, "loss": 0.7673, "step": 9280 }, { "epoch": 1.24, "learning_rate": 1.1742222222222224e-05, "loss": 0.786, "step": 9290 }, { "epoch": 1.24, "learning_rate": 1.1733333333333335e-05, "loss": 0.7374, "step": 9300 }, { "epoch": 1.24, "learning_rate": 1.1724444444444445e-05, "loss": 0.7658, "step": 9310 }, { "epoch": 1.24, "learning_rate": 1.1715555555555556e-05, "loss": 0.7388, "step": 9320 }, { "epoch": 1.24, "learning_rate": 1.170666666666667e-05, "loss": 0.7143, "step": 9330 }, { "epoch": 1.25, "learning_rate": 1.1697777777777779e-05, "loss": 0.7885, "step": 9340 }, { "epoch": 1.25, "learning_rate": 1.168888888888889e-05, "loss": 0.76, "step": 9350 }, { "epoch": 1.25, "learning_rate": 1.168e-05, "loss": 0.788, "step": 9360 }, { "epoch": 1.25, "learning_rate": 1.1671111111111113e-05, "loss": 0.7285, "step": 9370 }, { "epoch": 1.25, "learning_rate": 1.1662222222222224e-05, "loss": 0.7672, "step": 9380 }, { "epoch": 1.25, "learning_rate": 1.1653333333333334e-05, "loss": 0.7399, "step": 9390 }, { "epoch": 1.25, "learning_rate": 1.1644444444444446e-05, "loss": 0.8254, "step": 9400 }, { "epoch": 1.25, "learning_rate": 1.1635555555555555e-05, "loss": 0.746, "step": 9410 }, { "epoch": 1.26, "learning_rate": 1.1626666666666668e-05, "loss": 0.8151, "step": 9420 }, { "epoch": 1.26, "learning_rate": 1.161777777777778e-05, "loss": 0.7813, "step": 9430 }, { "epoch": 1.26, "learning_rate": 1.160888888888889e-05, "loss": 0.7144, "step": 9440 }, { "epoch": 1.26, "learning_rate": 1.16e-05, "loss": 0.7607, "step": 9450 }, { "epoch": 1.26, "learning_rate": 1.1591111111111114e-05, "loss": 0.8035, "step": 9460 }, { "epoch": 1.26, "learning_rate": 1.1582222222222224e-05, "loss": 0.7826, "step": 9470 }, { "epoch": 1.26, "learning_rate": 1.1573333333333335e-05, "loss": 0.7327, "step": 9480 }, { "epoch": 1.27, "learning_rate": 1.1564444444444445e-05, "loss": 0.7162, "step": 9490 }, { "epoch": 1.27, "learning_rate": 1.1555555555555556e-05, "loss": 0.8058, "step": 9500 }, { "epoch": 1.27, "learning_rate": 1.1546666666666669e-05, "loss": 0.7683, "step": 9510 }, { "epoch": 1.27, "learning_rate": 1.1537777777777779e-05, "loss": 0.8565, "step": 9520 }, { "epoch": 1.27, "learning_rate": 1.152888888888889e-05, "loss": 0.7249, "step": 9530 }, { "epoch": 1.27, "learning_rate": 1.152e-05, "loss": 0.7367, "step": 9540 }, { "epoch": 1.27, "learning_rate": 1.1511111111111113e-05, "loss": 0.7976, "step": 9550 }, { "epoch": 1.27, "learning_rate": 1.1502222222222224e-05, "loss": 0.808, "step": 9560 }, { "epoch": 1.28, "learning_rate": 1.1493333333333334e-05, "loss": 0.7246, "step": 9570 }, { "epoch": 1.28, "learning_rate": 1.1484444444444445e-05, "loss": 0.7858, "step": 9580 }, { "epoch": 1.28, "learning_rate": 1.1475555555555557e-05, "loss": 0.7846, "step": 9590 }, { "epoch": 1.28, "learning_rate": 1.1466666666666668e-05, "loss": 0.7237, "step": 9600 }, { "epoch": 1.28, "learning_rate": 1.145777777777778e-05, "loss": 0.8154, "step": 9610 }, { "epoch": 1.28, "learning_rate": 1.144888888888889e-05, "loss": 0.8533, "step": 9620 }, { "epoch": 1.28, "learning_rate": 1.144e-05, "loss": 0.7323, "step": 9630 }, { "epoch": 1.29, "learning_rate": 1.1431111111111114e-05, "loss": 0.8197, "step": 9640 }, { "epoch": 1.29, "learning_rate": 1.1422222222222223e-05, "loss": 0.7628, "step": 9650 }, { "epoch": 1.29, "learning_rate": 1.1413333333333335e-05, "loss": 0.7413, "step": 9660 }, { "epoch": 1.29, "learning_rate": 1.1404444444444444e-05, "loss": 0.7718, "step": 9670 }, { "epoch": 1.29, "learning_rate": 1.1395555555555558e-05, "loss": 0.7067, "step": 9680 }, { "epoch": 1.29, "learning_rate": 1.1386666666666669e-05, "loss": 0.8018, "step": 9690 }, { "epoch": 1.29, "learning_rate": 1.1377777777777779e-05, "loss": 0.7511, "step": 9700 }, { "epoch": 1.29, "learning_rate": 1.136888888888889e-05, "loss": 0.7979, "step": 9710 }, { "epoch": 1.3, "learning_rate": 1.136e-05, "loss": 0.7846, "step": 9720 }, { "epoch": 1.3, "learning_rate": 1.1351111111111113e-05, "loss": 0.7348, "step": 9730 }, { "epoch": 1.3, "learning_rate": 1.1342222222222224e-05, "loss": 0.7523, "step": 9740 }, { "epoch": 1.3, "learning_rate": 1.1333333333333334e-05, "loss": 0.8074, "step": 9750 }, { "epoch": 1.3, "learning_rate": 1.1324444444444445e-05, "loss": 0.7263, "step": 9760 }, { "epoch": 1.3, "learning_rate": 1.1315555555555557e-05, "loss": 0.8393, "step": 9770 }, { "epoch": 1.3, "learning_rate": 1.1306666666666668e-05, "loss": 0.7898, "step": 9780 }, { "epoch": 1.31, "learning_rate": 1.129777777777778e-05, "loss": 0.8025, "step": 9790 }, { "epoch": 1.31, "learning_rate": 1.1288888888888889e-05, "loss": 0.7455, "step": 9800 }, { "epoch": 1.31, "learning_rate": 1.128e-05, "loss": 0.7876, "step": 9810 }, { "epoch": 1.31, "learning_rate": 1.1271111111111113e-05, "loss": 0.7323, "step": 9820 }, { "epoch": 1.31, "learning_rate": 1.1262222222222223e-05, "loss": 0.7671, "step": 9830 }, { "epoch": 1.31, "learning_rate": 1.1253333333333335e-05, "loss": 0.8148, "step": 9840 }, { "epoch": 1.31, "learning_rate": 1.1244444444444444e-05, "loss": 0.7532, "step": 9850 }, { "epoch": 1.31, "learning_rate": 1.1235555555555557e-05, "loss": 0.7501, "step": 9860 }, { "epoch": 1.32, "learning_rate": 1.1226666666666669e-05, "loss": 0.7711, "step": 9870 }, { "epoch": 1.32, "learning_rate": 1.1217777777777778e-05, "loss": 0.7606, "step": 9880 }, { "epoch": 1.32, "learning_rate": 1.120888888888889e-05, "loss": 0.7166, "step": 9890 }, { "epoch": 1.32, "learning_rate": 1.1200000000000001e-05, "loss": 0.7462, "step": 9900 }, { "epoch": 1.32, "learning_rate": 1.1191111111111113e-05, "loss": 0.7213, "step": 9910 }, { "epoch": 1.32, "learning_rate": 1.1182222222222224e-05, "loss": 0.7864, "step": 9920 }, { "epoch": 1.32, "learning_rate": 1.1173333333333334e-05, "loss": 0.7229, "step": 9930 }, { "epoch": 1.33, "learning_rate": 1.1164444444444445e-05, "loss": 0.7744, "step": 9940 }, { "epoch": 1.33, "learning_rate": 1.1155555555555556e-05, "loss": 0.7246, "step": 9950 }, { "epoch": 1.33, "learning_rate": 1.1146666666666668e-05, "loss": 0.7975, "step": 9960 }, { "epoch": 1.33, "learning_rate": 1.113777777777778e-05, "loss": 0.7355, "step": 9970 }, { "epoch": 1.33, "learning_rate": 1.1128888888888889e-05, "loss": 0.8357, "step": 9980 }, { "epoch": 1.33, "learning_rate": 1.1120000000000002e-05, "loss": 0.7834, "step": 9990 }, { "epoch": 1.33, "learning_rate": 1.1111111111111113e-05, "loss": 0.8293, "step": 10000 }, { "epoch": 1.33, "learning_rate": 1.1102222222222223e-05, "loss": 0.7226, "step": 10010 }, { "epoch": 1.34, "learning_rate": 1.1093333333333334e-05, "loss": 0.8126, "step": 10020 }, { "epoch": 1.34, "learning_rate": 1.1084444444444444e-05, "loss": 0.8374, "step": 10030 }, { "epoch": 1.34, "learning_rate": 1.1075555555555557e-05, "loss": 0.786, "step": 10040 }, { "epoch": 1.34, "learning_rate": 1.1066666666666669e-05, "loss": 0.8358, "step": 10050 }, { "epoch": 1.34, "learning_rate": 1.1057777777777778e-05, "loss": 0.8163, "step": 10060 }, { "epoch": 1.34, "learning_rate": 1.104888888888889e-05, "loss": 0.7775, "step": 10070 }, { "epoch": 1.34, "learning_rate": 1.1040000000000001e-05, "loss": 0.7185, "step": 10080 }, { "epoch": 1.35, "learning_rate": 1.1031111111111112e-05, "loss": 0.758, "step": 10090 }, { "epoch": 1.35, "learning_rate": 1.1022222222222224e-05, "loss": 0.7644, "step": 10100 }, { "epoch": 1.35, "learning_rate": 1.1013333333333333e-05, "loss": 0.7676, "step": 10110 }, { "epoch": 1.35, "learning_rate": 1.1004444444444445e-05, "loss": 0.6974, "step": 10120 }, { "epoch": 1.35, "learning_rate": 1.0995555555555556e-05, "loss": 0.7701, "step": 10130 }, { "epoch": 1.35, "learning_rate": 1.0986666666666668e-05, "loss": 0.7988, "step": 10140 }, { "epoch": 1.35, "learning_rate": 1.0977777777777779e-05, "loss": 0.7663, "step": 10150 }, { "epoch": 1.35, "learning_rate": 1.0968888888888889e-05, "loss": 0.7213, "step": 10160 }, { "epoch": 1.36, "learning_rate": 1.0960000000000002e-05, "loss": 0.8264, "step": 10170 }, { "epoch": 1.36, "learning_rate": 1.0951111111111113e-05, "loss": 0.7377, "step": 10180 }, { "epoch": 1.36, "learning_rate": 1.0942222222222223e-05, "loss": 0.7266, "step": 10190 }, { "epoch": 1.36, "learning_rate": 1.0933333333333334e-05, "loss": 0.8125, "step": 10200 }, { "epoch": 1.36, "learning_rate": 1.0924444444444446e-05, "loss": 0.7755, "step": 10210 }, { "epoch": 1.36, "learning_rate": 1.0915555555555557e-05, "loss": 0.773, "step": 10220 }, { "epoch": 1.36, "learning_rate": 1.0906666666666668e-05, "loss": 0.7298, "step": 10230 }, { "epoch": 1.37, "learning_rate": 1.0897777777777778e-05, "loss": 0.7758, "step": 10240 }, { "epoch": 1.37, "learning_rate": 1.088888888888889e-05, "loss": 0.742, "step": 10250 }, { "epoch": 1.37, "learning_rate": 1.0880000000000001e-05, "loss": 0.736, "step": 10260 }, { "epoch": 1.37, "learning_rate": 1.0871111111111112e-05, "loss": 0.7613, "step": 10270 }, { "epoch": 1.37, "learning_rate": 1.0862222222222224e-05, "loss": 0.7463, "step": 10280 }, { "epoch": 1.37, "learning_rate": 1.0853333333333333e-05, "loss": 0.721, "step": 10290 }, { "epoch": 1.37, "learning_rate": 1.0844444444444446e-05, "loss": 0.7498, "step": 10300 }, { "epoch": 1.37, "learning_rate": 1.0835555555555556e-05, "loss": 0.7986, "step": 10310 }, { "epoch": 1.38, "learning_rate": 1.0826666666666667e-05, "loss": 0.808, "step": 10320 }, { "epoch": 1.38, "learning_rate": 1.0817777777777779e-05, "loss": 0.7459, "step": 10330 }, { "epoch": 1.38, "learning_rate": 1.0808888888888889e-05, "loss": 0.7394, "step": 10340 }, { "epoch": 1.38, "learning_rate": 1.0800000000000002e-05, "loss": 0.7931, "step": 10350 }, { "epoch": 1.38, "learning_rate": 1.0791111111111113e-05, "loss": 0.7159, "step": 10360 }, { "epoch": 1.38, "learning_rate": 1.0782222222222223e-05, "loss": 0.7146, "step": 10370 }, { "epoch": 1.38, "learning_rate": 1.0773333333333334e-05, "loss": 0.7166, "step": 10380 }, { "epoch": 1.39, "learning_rate": 1.0764444444444445e-05, "loss": 0.747, "step": 10390 }, { "epoch": 1.39, "learning_rate": 1.0755555555555557e-05, "loss": 0.7531, "step": 10400 }, { "epoch": 1.39, "learning_rate": 1.0746666666666668e-05, "loss": 0.7861, "step": 10410 }, { "epoch": 1.39, "learning_rate": 1.0737777777777778e-05, "loss": 0.7888, "step": 10420 }, { "epoch": 1.39, "learning_rate": 1.072888888888889e-05, "loss": 0.6578, "step": 10430 }, { "epoch": 1.39, "learning_rate": 1.072e-05, "loss": 0.7542, "step": 10440 }, { "epoch": 1.39, "learning_rate": 1.0711111111111112e-05, "loss": 0.7091, "step": 10450 }, { "epoch": 1.39, "learning_rate": 1.0702222222222223e-05, "loss": 0.7238, "step": 10460 }, { "epoch": 1.4, "learning_rate": 1.0693333333333333e-05, "loss": 0.6912, "step": 10470 }, { "epoch": 1.4, "learning_rate": 1.0684444444444446e-05, "loss": 0.7583, "step": 10480 }, { "epoch": 1.4, "learning_rate": 1.0675555555555558e-05, "loss": 0.7198, "step": 10490 }, { "epoch": 1.4, "learning_rate": 1.0666666666666667e-05, "loss": 0.7406, "step": 10500 }, { "epoch": 1.4, "learning_rate": 1.0657777777777779e-05, "loss": 0.7321, "step": 10510 }, { "epoch": 1.4, "learning_rate": 1.064888888888889e-05, "loss": 0.7009, "step": 10520 }, { "epoch": 1.4, "learning_rate": 1.0640000000000001e-05, "loss": 0.7136, "step": 10530 }, { "epoch": 1.41, "learning_rate": 1.0631111111111113e-05, "loss": 0.7258, "step": 10540 }, { "epoch": 1.41, "learning_rate": 1.0622222222222223e-05, "loss": 0.6963, "step": 10550 }, { "epoch": 1.41, "learning_rate": 1.0613333333333334e-05, "loss": 0.7559, "step": 10560 }, { "epoch": 1.41, "learning_rate": 1.0604444444444445e-05, "loss": 0.7619, "step": 10570 }, { "epoch": 1.41, "learning_rate": 1.0595555555555557e-05, "loss": 0.7853, "step": 10580 }, { "epoch": 1.41, "learning_rate": 1.0586666666666668e-05, "loss": 0.795, "step": 10590 }, { "epoch": 1.41, "learning_rate": 1.0577777777777778e-05, "loss": 0.6995, "step": 10600 }, { "epoch": 1.41, "learning_rate": 1.0568888888888891e-05, "loss": 0.7825, "step": 10610 }, { "epoch": 1.42, "learning_rate": 1.056e-05, "loss": 0.7539, "step": 10620 }, { "epoch": 1.42, "learning_rate": 1.0551111111111112e-05, "loss": 0.7691, "step": 10630 }, { "epoch": 1.42, "learning_rate": 1.0542222222222223e-05, "loss": 0.7297, "step": 10640 }, { "epoch": 1.42, "learning_rate": 1.0533333333333333e-05, "loss": 0.6817, "step": 10650 }, { "epoch": 1.42, "learning_rate": 1.0524444444444446e-05, "loss": 0.7288, "step": 10660 }, { "epoch": 1.42, "learning_rate": 1.0515555555555557e-05, "loss": 0.7351, "step": 10670 }, { "epoch": 1.42, "learning_rate": 1.0506666666666667e-05, "loss": 0.701, "step": 10680 }, { "epoch": 1.43, "learning_rate": 1.0497777777777779e-05, "loss": 0.7694, "step": 10690 }, { "epoch": 1.43, "learning_rate": 1.048888888888889e-05, "loss": 0.779, "step": 10700 }, { "epoch": 1.43, "learning_rate": 1.0480000000000001e-05, "loss": 0.7777, "step": 10710 }, { "epoch": 1.43, "learning_rate": 1.0471111111111113e-05, "loss": 0.7087, "step": 10720 }, { "epoch": 1.43, "learning_rate": 1.0462222222222222e-05, "loss": 0.726, "step": 10730 }, { "epoch": 1.43, "learning_rate": 1.0453333333333334e-05, "loss": 0.7361, "step": 10740 }, { "epoch": 1.43, "learning_rate": 1.0444444444444445e-05, "loss": 0.7797, "step": 10750 }, { "epoch": 1.43, "learning_rate": 1.0435555555555557e-05, "loss": 0.7506, "step": 10760 }, { "epoch": 1.44, "learning_rate": 1.0426666666666668e-05, "loss": 0.7607, "step": 10770 }, { "epoch": 1.44, "learning_rate": 1.0417777777777778e-05, "loss": 0.7281, "step": 10780 }, { "epoch": 1.44, "learning_rate": 1.040888888888889e-05, "loss": 0.8274, "step": 10790 }, { "epoch": 1.44, "learning_rate": 1.04e-05, "loss": 0.7909, "step": 10800 }, { "epoch": 1.44, "learning_rate": 1.0391111111111112e-05, "loss": 0.7153, "step": 10810 }, { "epoch": 1.44, "learning_rate": 1.0382222222222223e-05, "loss": 0.7852, "step": 10820 }, { "epoch": 1.44, "learning_rate": 1.0373333333333335e-05, "loss": 0.7045, "step": 10830 }, { "epoch": 1.45, "learning_rate": 1.0364444444444446e-05, "loss": 0.8618, "step": 10840 }, { "epoch": 1.45, "learning_rate": 1.0355555555555557e-05, "loss": 0.7632, "step": 10850 }, { "epoch": 1.45, "learning_rate": 1.0346666666666667e-05, "loss": 0.7132, "step": 10860 }, { "epoch": 1.45, "learning_rate": 1.0337777777777778e-05, "loss": 0.6889, "step": 10870 }, { "epoch": 1.45, "learning_rate": 1.032888888888889e-05, "loss": 0.737, "step": 10880 }, { "epoch": 1.45, "learning_rate": 1.0320000000000001e-05, "loss": 0.6662, "step": 10890 }, { "epoch": 1.45, "learning_rate": 1.0311111111111113e-05, "loss": 0.6746, "step": 10900 }, { "epoch": 1.45, "learning_rate": 1.0302222222222222e-05, "loss": 0.6366, "step": 10910 }, { "epoch": 1.46, "learning_rate": 1.0293333333333335e-05, "loss": 0.6941, "step": 10920 }, { "epoch": 1.46, "learning_rate": 1.0284444444444445e-05, "loss": 0.7469, "step": 10930 }, { "epoch": 1.46, "learning_rate": 1.0275555555555556e-05, "loss": 0.7361, "step": 10940 }, { "epoch": 1.46, "learning_rate": 1.0266666666666668e-05, "loss": 0.7217, "step": 10950 }, { "epoch": 1.46, "learning_rate": 1.0257777777777777e-05, "loss": 0.6962, "step": 10960 }, { "epoch": 1.46, "learning_rate": 1.024888888888889e-05, "loss": 0.7235, "step": 10970 }, { "epoch": 1.46, "learning_rate": 1.024e-05, "loss": 0.7203, "step": 10980 }, { "epoch": 1.47, "learning_rate": 1.0231111111111112e-05, "loss": 0.7356, "step": 10990 }, { "epoch": 1.47, "learning_rate": 1.0222222222222223e-05, "loss": 0.6177, "step": 11000 }, { "epoch": 1.47, "learning_rate": 1.0213333333333334e-05, "loss": 0.7063, "step": 11010 }, { "epoch": 1.47, "learning_rate": 1.0204444444444446e-05, "loss": 0.7497, "step": 11020 }, { "epoch": 1.47, "learning_rate": 1.0195555555555557e-05, "loss": 0.7654, "step": 11030 }, { "epoch": 1.47, "learning_rate": 1.0186666666666667e-05, "loss": 0.7089, "step": 11040 }, { "epoch": 1.47, "learning_rate": 1.0177777777777778e-05, "loss": 0.7678, "step": 11050 }, { "epoch": 1.47, "learning_rate": 1.016888888888889e-05, "loss": 0.7753, "step": 11060 }, { "epoch": 1.48, "learning_rate": 1.0160000000000001e-05, "loss": 0.731, "step": 11070 }, { "epoch": 1.48, "learning_rate": 1.0151111111111112e-05, "loss": 0.7266, "step": 11080 }, { "epoch": 1.48, "learning_rate": 1.0142222222222222e-05, "loss": 0.7556, "step": 11090 }, { "epoch": 1.48, "learning_rate": 1.0133333333333335e-05, "loss": 0.6988, "step": 11100 }, { "epoch": 1.48, "learning_rate": 1.0124444444444445e-05, "loss": 0.7614, "step": 11110 }, { "epoch": 1.48, "learning_rate": 1.0115555555555556e-05, "loss": 0.7615, "step": 11120 }, { "epoch": 1.48, "learning_rate": 1.0106666666666668e-05, "loss": 0.7595, "step": 11130 }, { "epoch": 1.49, "learning_rate": 1.0097777777777779e-05, "loss": 0.7555, "step": 11140 }, { "epoch": 1.49, "learning_rate": 1.008888888888889e-05, "loss": 0.759, "step": 11150 }, { "epoch": 1.49, "learning_rate": 1.008e-05, "loss": 0.7215, "step": 11160 }, { "epoch": 1.49, "learning_rate": 1.0071111111111111e-05, "loss": 0.736, "step": 11170 }, { "epoch": 1.49, "learning_rate": 1.0062222222222223e-05, "loss": 0.7113, "step": 11180 }, { "epoch": 1.49, "learning_rate": 1.0053333333333334e-05, "loss": 0.6735, "step": 11190 }, { "epoch": 1.49, "learning_rate": 1.0044444444444446e-05, "loss": 0.6848, "step": 11200 }, { "epoch": 1.49, "learning_rate": 1.0035555555555557e-05, "loss": 0.7385, "step": 11210 }, { "epoch": 1.5, "learning_rate": 1.0026666666666667e-05, "loss": 0.6983, "step": 11220 }, { "epoch": 1.5, "learning_rate": 1.001777777777778e-05, "loss": 0.7155, "step": 11230 }, { "epoch": 1.5, "learning_rate": 1.000888888888889e-05, "loss": 0.789, "step": 11240 }, { "epoch": 1.5, "learning_rate": 1e-05, "loss": 0.6871, "step": 11250 }, { "epoch": 1.5, "learning_rate": 9.991111111111112e-06, "loss": 0.7541, "step": 11260 }, { "epoch": 1.5, "learning_rate": 9.982222222222224e-06, "loss": 0.6912, "step": 11270 }, { "epoch": 1.5, "learning_rate": 9.973333333333333e-06, "loss": 0.7459, "step": 11280 }, { "epoch": 1.51, "learning_rate": 9.964444444444445e-06, "loss": 0.7243, "step": 11290 }, { "epoch": 1.51, "learning_rate": 9.955555555555556e-06, "loss": 0.7236, "step": 11300 }, { "epoch": 1.51, "learning_rate": 9.946666666666667e-06, "loss": 0.6962, "step": 11310 }, { "epoch": 1.51, "learning_rate": 9.937777777777779e-06, "loss": 0.7803, "step": 11320 }, { "epoch": 1.51, "learning_rate": 9.92888888888889e-06, "loss": 0.7795, "step": 11330 }, { "epoch": 1.51, "learning_rate": 9.920000000000002e-06, "loss": 0.7613, "step": 11340 }, { "epoch": 1.51, "learning_rate": 9.911111111111113e-06, "loss": 0.726, "step": 11350 }, { "epoch": 1.51, "learning_rate": 9.902222222222223e-06, "loss": 0.7561, "step": 11360 }, { "epoch": 1.52, "learning_rate": 9.893333333333334e-06, "loss": 0.712, "step": 11370 }, { "epoch": 1.52, "learning_rate": 9.884444444444445e-06, "loss": 0.7257, "step": 11380 }, { "epoch": 1.52, "learning_rate": 9.875555555555557e-06, "loss": 0.7219, "step": 11390 }, { "epoch": 1.52, "learning_rate": 9.866666666666668e-06, "loss": 0.803, "step": 11400 }, { "epoch": 1.52, "learning_rate": 9.857777777777778e-06, "loss": 0.6742, "step": 11410 }, { "epoch": 1.52, "learning_rate": 9.84888888888889e-06, "loss": 0.6409, "step": 11420 }, { "epoch": 1.52, "learning_rate": 9.84e-06, "loss": 0.6738, "step": 11430 }, { "epoch": 1.53, "learning_rate": 9.831111111111112e-06, "loss": 0.6947, "step": 11440 }, { "epoch": 1.53, "learning_rate": 9.822222222222223e-06, "loss": 0.7449, "step": 11450 }, { "epoch": 1.53, "learning_rate": 9.813333333333333e-06, "loss": 0.726, "step": 11460 }, { "epoch": 1.53, "learning_rate": 9.804444444444444e-06, "loss": 0.7434, "step": 11470 }, { "epoch": 1.53, "learning_rate": 9.795555555555556e-06, "loss": 0.6992, "step": 11480 }, { "epoch": 1.53, "learning_rate": 9.786666666666667e-06, "loss": 0.7759, "step": 11490 }, { "epoch": 1.53, "learning_rate": 9.777777777777779e-06, "loss": 0.769, "step": 11500 }, { "epoch": 1.53, "learning_rate": 9.76888888888889e-06, "loss": 0.7542, "step": 11510 }, { "epoch": 1.54, "learning_rate": 9.760000000000001e-06, "loss": 0.7031, "step": 11520 }, { "epoch": 1.54, "learning_rate": 9.751111111111113e-06, "loss": 0.6945, "step": 11530 }, { "epoch": 1.54, "learning_rate": 9.742222222222222e-06, "loss": 0.7452, "step": 11540 }, { "epoch": 1.54, "learning_rate": 9.733333333333334e-06, "loss": 0.766, "step": 11550 }, { "epoch": 1.54, "learning_rate": 9.724444444444445e-06, "loss": 0.7297, "step": 11560 }, { "epoch": 1.54, "learning_rate": 9.715555555555557e-06, "loss": 0.7682, "step": 11570 }, { "epoch": 1.54, "learning_rate": 9.706666666666668e-06, "loss": 0.7813, "step": 11580 }, { "epoch": 1.55, "learning_rate": 9.697777777777778e-06, "loss": 0.7848, "step": 11590 }, { "epoch": 1.55, "learning_rate": 9.688888888888889e-06, "loss": 0.7779, "step": 11600 }, { "epoch": 1.55, "learning_rate": 9.68e-06, "loss": 0.7297, "step": 11610 }, { "epoch": 1.55, "learning_rate": 9.671111111111112e-06, "loss": 0.6557, "step": 11620 }, { "epoch": 1.55, "learning_rate": 9.662222222222223e-06, "loss": 0.6723, "step": 11630 }, { "epoch": 1.55, "learning_rate": 9.653333333333335e-06, "loss": 0.7018, "step": 11640 }, { "epoch": 1.55, "learning_rate": 9.644444444444444e-06, "loss": 0.7004, "step": 11650 }, { "epoch": 1.55, "learning_rate": 9.635555555555557e-06, "loss": 0.6964, "step": 11660 }, { "epoch": 1.56, "learning_rate": 9.626666666666667e-06, "loss": 0.7217, "step": 11670 }, { "epoch": 1.56, "learning_rate": 9.617777777777778e-06, "loss": 0.6883, "step": 11680 }, { "epoch": 1.56, "learning_rate": 9.60888888888889e-06, "loss": 0.6794, "step": 11690 }, { "epoch": 1.56, "learning_rate": 9.600000000000001e-06, "loss": 0.8018, "step": 11700 }, { "epoch": 1.56, "learning_rate": 9.591111111111113e-06, "loss": 0.7603, "step": 11710 }, { "epoch": 1.56, "learning_rate": 9.582222222222222e-06, "loss": 0.7066, "step": 11720 }, { "epoch": 1.56, "learning_rate": 9.573333333333334e-06, "loss": 0.7601, "step": 11730 }, { "epoch": 1.57, "learning_rate": 9.564444444444445e-06, "loss": 0.6663, "step": 11740 }, { "epoch": 1.57, "learning_rate": 9.555555555555556e-06, "loss": 0.7425, "step": 11750 }, { "epoch": 1.57, "learning_rate": 9.546666666666668e-06, "loss": 0.7277, "step": 11760 }, { "epoch": 1.57, "learning_rate": 9.537777777777778e-06, "loss": 0.646, "step": 11770 }, { "epoch": 1.57, "learning_rate": 9.528888888888889e-06, "loss": 0.7021, "step": 11780 }, { "epoch": 1.57, "learning_rate": 9.52e-06, "loss": 0.6693, "step": 11790 }, { "epoch": 1.57, "learning_rate": 9.511111111111112e-06, "loss": 0.8546, "step": 11800 }, { "epoch": 1.57, "learning_rate": 9.502222222222223e-06, "loss": 0.7618, "step": 11810 }, { "epoch": 1.58, "learning_rate": 9.493333333333334e-06, "loss": 0.7135, "step": 11820 }, { "epoch": 1.58, "learning_rate": 9.484444444444444e-06, "loss": 0.6883, "step": 11830 }, { "epoch": 1.58, "learning_rate": 9.475555555555557e-06, "loss": 0.6747, "step": 11840 }, { "epoch": 1.58, "learning_rate": 9.466666666666667e-06, "loss": 0.7313, "step": 11850 }, { "epoch": 1.58, "learning_rate": 9.457777777777778e-06, "loss": 0.688, "step": 11860 }, { "epoch": 1.58, "learning_rate": 9.44888888888889e-06, "loss": 0.7064, "step": 11870 }, { "epoch": 1.58, "learning_rate": 9.440000000000001e-06, "loss": 0.7938, "step": 11880 }, { "epoch": 1.59, "learning_rate": 9.431111111111112e-06, "loss": 0.7476, "step": 11890 }, { "epoch": 1.59, "learning_rate": 9.422222222222222e-06, "loss": 0.7319, "step": 11900 }, { "epoch": 1.59, "learning_rate": 9.413333333333334e-06, "loss": 0.6986, "step": 11910 }, { "epoch": 1.59, "learning_rate": 9.404444444444445e-06, "loss": 0.7206, "step": 11920 }, { "epoch": 1.59, "learning_rate": 9.395555555555556e-06, "loss": 0.7049, "step": 11930 }, { "epoch": 1.59, "learning_rate": 9.386666666666668e-06, "loss": 0.7671, "step": 11940 }, { "epoch": 1.59, "learning_rate": 9.377777777777779e-06, "loss": 0.6748, "step": 11950 }, { "epoch": 1.59, "learning_rate": 9.368888888888889e-06, "loss": 0.7162, "step": 11960 }, { "epoch": 1.6, "learning_rate": 9.360000000000002e-06, "loss": 0.7036, "step": 11970 }, { "epoch": 1.6, "learning_rate": 9.351111111111112e-06, "loss": 0.7187, "step": 11980 }, { "epoch": 1.6, "learning_rate": 9.342222222222223e-06, "loss": 0.7259, "step": 11990 }, { "epoch": 1.6, "learning_rate": 9.333333333333334e-06, "loss": 0.6994, "step": 12000 }, { "epoch": 1.6, "learning_rate": 9.324444444444444e-06, "loss": 0.7618, "step": 12010 }, { "epoch": 1.6, "learning_rate": 9.315555555555557e-06, "loss": 0.7888, "step": 12020 }, { "epoch": 1.6, "learning_rate": 9.306666666666667e-06, "loss": 0.697, "step": 12030 }, { "epoch": 1.61, "learning_rate": 9.297777777777778e-06, "loss": 0.7245, "step": 12040 }, { "epoch": 1.61, "learning_rate": 9.28888888888889e-06, "loss": 0.7089, "step": 12050 }, { "epoch": 1.61, "learning_rate": 9.280000000000001e-06, "loss": 0.7039, "step": 12060 }, { "epoch": 1.61, "learning_rate": 9.271111111111112e-06, "loss": 0.6872, "step": 12070 }, { "epoch": 1.61, "learning_rate": 9.262222222222222e-06, "loss": 0.7602, "step": 12080 }, { "epoch": 1.61, "learning_rate": 9.253333333333333e-06, "loss": 0.71, "step": 12090 }, { "epoch": 1.61, "learning_rate": 9.244444444444445e-06, "loss": 0.7005, "step": 12100 }, { "epoch": 1.61, "learning_rate": 9.235555555555556e-06, "loss": 0.7184, "step": 12110 }, { "epoch": 1.62, "learning_rate": 9.226666666666668e-06, "loss": 0.6709, "step": 12120 }, { "epoch": 1.62, "learning_rate": 9.217777777777779e-06, "loss": 0.7453, "step": 12130 }, { "epoch": 1.62, "learning_rate": 9.208888888888889e-06, "loss": 0.6611, "step": 12140 }, { "epoch": 1.62, "learning_rate": 9.200000000000002e-06, "loss": 0.6782, "step": 12150 }, { "epoch": 1.62, "learning_rate": 9.191111111111111e-06, "loss": 0.756, "step": 12160 }, { "epoch": 1.62, "learning_rate": 9.182222222222223e-06, "loss": 0.7242, "step": 12170 }, { "epoch": 1.62, "learning_rate": 9.173333333333334e-06, "loss": 0.7422, "step": 12180 }, { "epoch": 1.63, "learning_rate": 9.164444444444446e-06, "loss": 0.6587, "step": 12190 }, { "epoch": 1.63, "learning_rate": 9.155555555555557e-06, "loss": 0.6848, "step": 12200 }, { "epoch": 1.63, "learning_rate": 9.146666666666667e-06, "loss": 0.7194, "step": 12210 }, { "epoch": 1.63, "learning_rate": 9.137777777777778e-06, "loss": 0.6904, "step": 12220 }, { "epoch": 1.63, "learning_rate": 9.12888888888889e-06, "loss": 0.7084, "step": 12230 }, { "epoch": 1.63, "learning_rate": 9.12e-06, "loss": 0.7074, "step": 12240 }, { "epoch": 1.63, "learning_rate": 9.111111111111112e-06, "loss": 0.7263, "step": 12250 }, { "epoch": 1.63, "learning_rate": 9.102222222222224e-06, "loss": 0.7782, "step": 12260 }, { "epoch": 1.64, "learning_rate": 9.093333333333333e-06, "loss": 0.7199, "step": 12270 }, { "epoch": 1.64, "learning_rate": 9.084444444444446e-06, "loss": 0.7075, "step": 12280 }, { "epoch": 1.64, "learning_rate": 9.075555555555556e-06, "loss": 0.7458, "step": 12290 }, { "epoch": 1.64, "learning_rate": 9.066666666666667e-06, "loss": 0.6965, "step": 12300 }, { "epoch": 1.64, "learning_rate": 9.057777777777779e-06, "loss": 0.7635, "step": 12310 }, { "epoch": 1.64, "learning_rate": 9.048888888888888e-06, "loss": 0.7366, "step": 12320 }, { "epoch": 1.64, "learning_rate": 9.040000000000002e-06, "loss": 0.7603, "step": 12330 }, { "epoch": 1.65, "learning_rate": 9.031111111111111e-06, "loss": 0.7134, "step": 12340 }, { "epoch": 1.65, "learning_rate": 9.022222222222223e-06, "loss": 0.6368, "step": 12350 }, { "epoch": 1.65, "learning_rate": 9.013333333333334e-06, "loss": 0.7192, "step": 12360 }, { "epoch": 1.65, "learning_rate": 9.004444444444445e-06, "loss": 0.8004, "step": 12370 }, { "epoch": 1.65, "learning_rate": 8.995555555555557e-06, "loss": 0.6733, "step": 12380 }, { "epoch": 1.65, "learning_rate": 8.986666666666666e-06, "loss": 0.7525, "step": 12390 }, { "epoch": 1.65, "learning_rate": 8.977777777777778e-06, "loss": 0.7409, "step": 12400 }, { "epoch": 1.65, "learning_rate": 8.96888888888889e-06, "loss": 0.6567, "step": 12410 }, { "epoch": 1.66, "learning_rate": 8.96e-06, "loss": 0.7242, "step": 12420 }, { "epoch": 1.66, "learning_rate": 8.951111111111112e-06, "loss": 0.6818, "step": 12430 }, { "epoch": 1.66, "learning_rate": 8.942222222222223e-06, "loss": 0.7027, "step": 12440 }, { "epoch": 1.66, "learning_rate": 8.933333333333333e-06, "loss": 0.6762, "step": 12450 }, { "epoch": 1.66, "learning_rate": 8.924444444444446e-06, "loss": 0.7043, "step": 12460 }, { "epoch": 1.66, "learning_rate": 8.915555555555556e-06, "loss": 0.6918, "step": 12470 }, { "epoch": 1.66, "learning_rate": 8.906666666666667e-06, "loss": 0.6995, "step": 12480 }, { "epoch": 1.67, "learning_rate": 8.897777777777779e-06, "loss": 0.6593, "step": 12490 }, { "epoch": 1.67, "learning_rate": 8.888888888888888e-06, "loss": 0.6886, "step": 12500 }, { "epoch": 1.67, "learning_rate": 8.880000000000001e-06, "loss": 0.6795, "step": 12510 }, { "epoch": 1.67, "learning_rate": 8.871111111111111e-06, "loss": 0.6816, "step": 12520 }, { "epoch": 1.67, "learning_rate": 8.862222222222222e-06, "loss": 0.6703, "step": 12530 }, { "epoch": 1.67, "learning_rate": 8.853333333333334e-06, "loss": 0.7687, "step": 12540 }, { "epoch": 1.67, "learning_rate": 8.844444444444445e-06, "loss": 0.7018, "step": 12550 }, { "epoch": 1.67, "learning_rate": 8.835555555555557e-06, "loss": 0.6887, "step": 12560 }, { "epoch": 1.68, "learning_rate": 8.826666666666668e-06, "loss": 0.7376, "step": 12570 }, { "epoch": 1.68, "learning_rate": 8.817777777777778e-06, "loss": 0.6635, "step": 12580 }, { "epoch": 1.68, "learning_rate": 8.80888888888889e-06, "loss": 0.6776, "step": 12590 }, { "epoch": 1.68, "learning_rate": 8.8e-06, "loss": 0.7321, "step": 12600 }, { "epoch": 1.68, "learning_rate": 8.791111111111112e-06, "loss": 0.6588, "step": 12610 }, { "epoch": 1.68, "learning_rate": 8.782222222222223e-06, "loss": 0.7106, "step": 12620 }, { "epoch": 1.68, "learning_rate": 8.773333333333333e-06, "loss": 0.7018, "step": 12630 }, { "epoch": 1.69, "learning_rate": 8.764444444444446e-06, "loss": 0.7487, "step": 12640 }, { "epoch": 1.69, "learning_rate": 8.755555555555556e-06, "loss": 0.7104, "step": 12650 }, { "epoch": 1.69, "learning_rate": 8.746666666666667e-06, "loss": 0.7269, "step": 12660 }, { "epoch": 1.69, "learning_rate": 8.737777777777778e-06, "loss": 0.7153, "step": 12670 }, { "epoch": 1.69, "learning_rate": 8.72888888888889e-06, "loss": 0.7051, "step": 12680 }, { "epoch": 1.69, "learning_rate": 8.720000000000001e-06, "loss": 0.7838, "step": 12690 }, { "epoch": 1.69, "learning_rate": 8.711111111111111e-06, "loss": 0.712, "step": 12700 }, { "epoch": 1.69, "learning_rate": 8.702222222222222e-06, "loss": 0.6718, "step": 12710 }, { "epoch": 1.7, "learning_rate": 8.693333333333334e-06, "loss": 0.7334, "step": 12720 }, { "epoch": 1.7, "learning_rate": 8.684444444444445e-06, "loss": 0.7287, "step": 12730 }, { "epoch": 1.7, "learning_rate": 8.675555555555556e-06, "loss": 0.7179, "step": 12740 }, { "epoch": 1.7, "learning_rate": 8.666666666666668e-06, "loss": 0.7112, "step": 12750 }, { "epoch": 1.7, "learning_rate": 8.657777777777778e-06, "loss": 0.7557, "step": 12760 }, { "epoch": 1.7, "learning_rate": 8.64888888888889e-06, "loss": 0.6514, "step": 12770 }, { "epoch": 1.7, "learning_rate": 8.64e-06, "loss": 0.716, "step": 12780 }, { "epoch": 1.71, "learning_rate": 8.631111111111112e-06, "loss": 0.7039, "step": 12790 }, { "epoch": 1.71, "learning_rate": 8.622222222222223e-06, "loss": 0.6395, "step": 12800 }, { "epoch": 1.71, "learning_rate": 8.613333333333333e-06, "loss": 0.713, "step": 12810 }, { "epoch": 1.71, "learning_rate": 8.604444444444446e-06, "loss": 0.7074, "step": 12820 }, { "epoch": 1.71, "learning_rate": 8.595555555555556e-06, "loss": 0.672, "step": 12830 }, { "epoch": 1.71, "learning_rate": 8.586666666666667e-06, "loss": 0.7612, "step": 12840 }, { "epoch": 1.71, "learning_rate": 8.577777777777778e-06, "loss": 0.7159, "step": 12850 }, { "epoch": 1.71, "learning_rate": 8.56888888888889e-06, "loss": 0.6793, "step": 12860 }, { "epoch": 1.72, "learning_rate": 8.560000000000001e-06, "loss": 0.6674, "step": 12870 }, { "epoch": 1.72, "learning_rate": 8.551111111111112e-06, "loss": 0.6803, "step": 12880 }, { "epoch": 1.72, "learning_rate": 8.542222222222222e-06, "loss": 0.6837, "step": 12890 }, { "epoch": 1.72, "learning_rate": 8.533333333333335e-06, "loss": 0.7543, "step": 12900 }, { "epoch": 1.72, "learning_rate": 8.524444444444445e-06, "loss": 0.6482, "step": 12910 }, { "epoch": 1.72, "learning_rate": 8.515555555555556e-06, "loss": 0.64, "step": 12920 }, { "epoch": 1.72, "learning_rate": 8.506666666666668e-06, "loss": 0.6771, "step": 12930 }, { "epoch": 1.73, "learning_rate": 8.497777777777777e-06, "loss": 0.6631, "step": 12940 }, { "epoch": 1.73, "learning_rate": 8.48888888888889e-06, "loss": 0.7043, "step": 12950 }, { "epoch": 1.73, "learning_rate": 8.48e-06, "loss": 0.7257, "step": 12960 }, { "epoch": 1.73, "learning_rate": 8.471111111111112e-06, "loss": 0.7321, "step": 12970 }, { "epoch": 1.73, "learning_rate": 8.462222222222223e-06, "loss": 0.6808, "step": 12980 }, { "epoch": 1.73, "learning_rate": 8.453333333333334e-06, "loss": 0.7341, "step": 12990 }, { "epoch": 1.73, "learning_rate": 8.444444444444446e-06, "loss": 0.6642, "step": 13000 }, { "epoch": 1.73, "learning_rate": 8.435555555555555e-06, "loss": 0.6515, "step": 13010 }, { "epoch": 1.74, "learning_rate": 8.426666666666667e-06, "loss": 0.6671, "step": 13020 }, { "epoch": 1.74, "learning_rate": 8.417777777777778e-06, "loss": 0.7053, "step": 13030 }, { "epoch": 1.74, "learning_rate": 8.40888888888889e-06, "loss": 0.6431, "step": 13040 }, { "epoch": 1.74, "learning_rate": 8.400000000000001e-06, "loss": 0.693, "step": 13050 }, { "epoch": 1.74, "learning_rate": 8.391111111111112e-06, "loss": 0.7597, "step": 13060 }, { "epoch": 1.74, "learning_rate": 8.382222222222222e-06, "loss": 0.7125, "step": 13070 }, { "epoch": 1.74, "learning_rate": 8.373333333333335e-06, "loss": 0.5933, "step": 13080 }, { "epoch": 1.75, "learning_rate": 8.364444444444445e-06, "loss": 0.6834, "step": 13090 }, { "epoch": 1.75, "learning_rate": 8.355555555555556e-06, "loss": 0.6817, "step": 13100 }, { "epoch": 1.75, "learning_rate": 8.346666666666668e-06, "loss": 0.7137, "step": 13110 }, { "epoch": 1.75, "learning_rate": 8.337777777777777e-06, "loss": 0.7056, "step": 13120 }, { "epoch": 1.75, "learning_rate": 8.32888888888889e-06, "loss": 0.6912, "step": 13130 }, { "epoch": 1.75, "learning_rate": 8.32e-06, "loss": 0.7214, "step": 13140 }, { "epoch": 1.75, "learning_rate": 8.311111111111111e-06, "loss": 0.7583, "step": 13150 }, { "epoch": 1.75, "learning_rate": 8.302222222222223e-06, "loss": 0.6606, "step": 13160 }, { "epoch": 1.76, "learning_rate": 8.293333333333334e-06, "loss": 0.7134, "step": 13170 }, { "epoch": 1.76, "learning_rate": 8.284444444444446e-06, "loss": 0.6941, "step": 13180 }, { "epoch": 1.76, "learning_rate": 8.275555555555557e-06, "loss": 0.7008, "step": 13190 }, { "epoch": 1.76, "learning_rate": 8.266666666666667e-06, "loss": 0.732, "step": 13200 }, { "epoch": 1.76, "learning_rate": 8.25777777777778e-06, "loss": 0.6883, "step": 13210 }, { "epoch": 1.76, "learning_rate": 8.24888888888889e-06, "loss": 0.7943, "step": 13220 }, { "epoch": 1.76, "learning_rate": 8.24e-06, "loss": 0.6992, "step": 13230 }, { "epoch": 1.77, "learning_rate": 8.231111111111112e-06, "loss": 0.6463, "step": 13240 }, { "epoch": 1.77, "learning_rate": 8.222222222222222e-06, "loss": 0.7039, "step": 13250 }, { "epoch": 1.77, "learning_rate": 8.213333333333335e-06, "loss": 0.7103, "step": 13260 }, { "epoch": 1.77, "learning_rate": 8.204444444444445e-06, "loss": 0.6533, "step": 13270 }, { "epoch": 1.77, "learning_rate": 8.195555555555556e-06, "loss": 0.7102, "step": 13280 }, { "epoch": 1.77, "learning_rate": 8.186666666666667e-06, "loss": 0.6318, "step": 13290 }, { "epoch": 1.77, "learning_rate": 8.177777777777779e-06, "loss": 0.6831, "step": 13300 }, { "epoch": 1.77, "learning_rate": 8.16888888888889e-06, "loss": 0.6734, "step": 13310 }, { "epoch": 1.78, "learning_rate": 8.16e-06, "loss": 0.6934, "step": 13320 }, { "epoch": 1.78, "learning_rate": 8.151111111111111e-06, "loss": 0.6982, "step": 13330 }, { "epoch": 1.78, "learning_rate": 8.142222222222223e-06, "loss": 0.7029, "step": 13340 }, { "epoch": 1.78, "learning_rate": 8.133333333333334e-06, "loss": 0.6543, "step": 13350 }, { "epoch": 1.78, "learning_rate": 8.124444444444445e-06, "loss": 0.6207, "step": 13360 }, { "epoch": 1.78, "learning_rate": 8.115555555555557e-06, "loss": 0.6856, "step": 13370 }, { "epoch": 1.78, "learning_rate": 8.106666666666666e-06, "loss": 0.731, "step": 13380 }, { "epoch": 1.79, "learning_rate": 8.09777777777778e-06, "loss": 0.6858, "step": 13390 }, { "epoch": 1.79, "learning_rate": 8.08888888888889e-06, "loss": 0.6667, "step": 13400 }, { "epoch": 1.79, "learning_rate": 8.08e-06, "loss": 0.701, "step": 13410 }, { "epoch": 1.79, "learning_rate": 8.071111111111112e-06, "loss": 0.708, "step": 13420 }, { "epoch": 1.79, "learning_rate": 8.062222222222222e-06, "loss": 0.6504, "step": 13430 }, { "epoch": 1.79, "learning_rate": 8.053333333333335e-06, "loss": 0.6561, "step": 13440 }, { "epoch": 1.79, "learning_rate": 8.044444444444444e-06, "loss": 0.6682, "step": 13450 }, { "epoch": 1.79, "learning_rate": 8.035555555555556e-06, "loss": 0.71, "step": 13460 }, { "epoch": 1.8, "learning_rate": 8.026666666666667e-06, "loss": 0.7156, "step": 13470 }, { "epoch": 1.8, "learning_rate": 8.017777777777779e-06, "loss": 0.6964, "step": 13480 }, { "epoch": 1.8, "learning_rate": 8.00888888888889e-06, "loss": 0.6996, "step": 13490 }, { "epoch": 1.8, "learning_rate": 8.000000000000001e-06, "loss": 0.7139, "step": 13500 }, { "epoch": 1.8, "learning_rate": 7.991111111111111e-06, "loss": 0.76, "step": 13510 }, { "epoch": 1.8, "learning_rate": 7.982222222222224e-06, "loss": 0.6734, "step": 13520 }, { "epoch": 1.8, "learning_rate": 7.973333333333334e-06, "loss": 0.723, "step": 13530 }, { "epoch": 1.81, "learning_rate": 7.964444444444445e-06, "loss": 0.7115, "step": 13540 }, { "epoch": 1.81, "learning_rate": 7.955555555555557e-06, "loss": 0.6493, "step": 13550 }, { "epoch": 1.81, "learning_rate": 7.946666666666666e-06, "loss": 0.6918, "step": 13560 }, { "epoch": 1.81, "learning_rate": 7.93777777777778e-06, "loss": 0.6698, "step": 13570 }, { "epoch": 1.81, "learning_rate": 7.928888888888889e-06, "loss": 0.7333, "step": 13580 }, { "epoch": 1.81, "learning_rate": 7.92e-06, "loss": 0.6791, "step": 13590 }, { "epoch": 1.81, "learning_rate": 7.911111111111112e-06, "loss": 0.6802, "step": 13600 }, { "epoch": 1.81, "learning_rate": 7.902222222222223e-06, "loss": 0.7073, "step": 13610 }, { "epoch": 1.82, "learning_rate": 7.893333333333335e-06, "loss": 0.6823, "step": 13620 }, { "epoch": 1.82, "learning_rate": 7.884444444444444e-06, "loss": 0.6737, "step": 13630 }, { "epoch": 1.82, "learning_rate": 7.875555555555556e-06, "loss": 0.6729, "step": 13640 }, { "epoch": 1.82, "learning_rate": 7.866666666666667e-06, "loss": 0.5965, "step": 13650 }, { "epoch": 1.82, "learning_rate": 7.857777777777778e-06, "loss": 0.6568, "step": 13660 }, { "epoch": 1.82, "learning_rate": 7.84888888888889e-06, "loss": 0.624, "step": 13670 }, { "epoch": 1.82, "learning_rate": 7.840000000000001e-06, "loss": 0.6822, "step": 13680 }, { "epoch": 1.83, "learning_rate": 7.831111111111111e-06, "loss": 0.7094, "step": 13690 }, { "epoch": 1.83, "learning_rate": 7.822222222222224e-06, "loss": 0.6948, "step": 13700 }, { "epoch": 1.83, "learning_rate": 7.813333333333334e-06, "loss": 0.6947, "step": 13710 }, { "epoch": 1.83, "learning_rate": 7.804444444444445e-06, "loss": 0.637, "step": 13720 }, { "epoch": 1.83, "learning_rate": 7.795555555555556e-06, "loss": 0.7252, "step": 13730 }, { "epoch": 1.83, "learning_rate": 7.786666666666666e-06, "loss": 0.656, "step": 13740 }, { "epoch": 1.83, "learning_rate": 7.77777777777778e-06, "loss": 0.6919, "step": 13750 }, { "epoch": 1.83, "learning_rate": 7.768888888888889e-06, "loss": 0.6921, "step": 13760 }, { "epoch": 1.84, "learning_rate": 7.76e-06, "loss": 0.7302, "step": 13770 }, { "epoch": 1.84, "learning_rate": 7.751111111111112e-06, "loss": 0.7288, "step": 13780 }, { "epoch": 1.84, "learning_rate": 7.742222222222223e-06, "loss": 0.6818, "step": 13790 }, { "epoch": 1.84, "learning_rate": 7.733333333333334e-06, "loss": 0.6658, "step": 13800 }, { "epoch": 1.84, "learning_rate": 7.724444444444446e-06, "loss": 0.7007, "step": 13810 }, { "epoch": 1.84, "learning_rate": 7.715555555555555e-06, "loss": 0.6918, "step": 13820 }, { "epoch": 1.84, "learning_rate": 7.706666666666669e-06, "loss": 0.6665, "step": 13830 }, { "epoch": 1.85, "learning_rate": 7.697777777777778e-06, "loss": 0.7375, "step": 13840 }, { "epoch": 1.85, "learning_rate": 7.68888888888889e-06, "loss": 0.6759, "step": 13850 }, { "epoch": 1.85, "learning_rate": 7.680000000000001e-06, "loss": 0.6705, "step": 13860 }, { "epoch": 1.85, "learning_rate": 7.67111111111111e-06, "loss": 0.7336, "step": 13870 }, { "epoch": 1.85, "learning_rate": 7.662222222222224e-06, "loss": 0.726, "step": 13880 }, { "epoch": 1.85, "learning_rate": 7.653333333333333e-06, "loss": 0.7052, "step": 13890 }, { "epoch": 1.85, "learning_rate": 7.644444444444445e-06, "loss": 0.7233, "step": 13900 }, { "epoch": 1.85, "learning_rate": 7.635555555555556e-06, "loss": 0.6375, "step": 13910 }, { "epoch": 1.86, "learning_rate": 7.626666666666668e-06, "loss": 0.6844, "step": 13920 }, { "epoch": 1.86, "learning_rate": 7.617777777777778e-06, "loss": 0.7097, "step": 13930 }, { "epoch": 1.86, "learning_rate": 7.608888888888889e-06, "loss": 0.6683, "step": 13940 }, { "epoch": 1.86, "learning_rate": 7.600000000000001e-06, "loss": 0.6622, "step": 13950 }, { "epoch": 1.86, "learning_rate": 7.5911111111111115e-06, "loss": 0.65, "step": 13960 }, { "epoch": 1.86, "learning_rate": 7.582222222222223e-06, "loss": 0.6841, "step": 13970 }, { "epoch": 1.86, "learning_rate": 7.573333333333333e-06, "loss": 0.6802, "step": 13980 }, { "epoch": 1.87, "learning_rate": 7.564444444444446e-06, "loss": 0.6763, "step": 13990 }, { "epoch": 1.87, "learning_rate": 7.555555555555556e-06, "loss": 0.6184, "step": 14000 }, { "epoch": 1.87, "learning_rate": 7.5466666666666675e-06, "loss": 0.6764, "step": 14010 }, { "epoch": 1.87, "learning_rate": 7.537777777777778e-06, "loss": 0.6801, "step": 14020 }, { "epoch": 1.87, "learning_rate": 7.52888888888889e-06, "loss": 0.6836, "step": 14030 }, { "epoch": 1.87, "learning_rate": 7.520000000000001e-06, "loss": 0.6634, "step": 14040 }, { "epoch": 1.87, "learning_rate": 7.511111111111111e-06, "loss": 0.6955, "step": 14050 }, { "epoch": 1.87, "learning_rate": 7.502222222222223e-06, "loss": 0.6495, "step": 14060 }, { "epoch": 1.88, "learning_rate": 7.493333333333333e-06, "loss": 0.7518, "step": 14070 }, { "epoch": 1.88, "learning_rate": 7.4844444444444455e-06, "loss": 0.6673, "step": 14080 }, { "epoch": 1.88, "learning_rate": 7.475555555555556e-06, "loss": 0.606, "step": 14090 }, { "epoch": 1.88, "learning_rate": 7.4666666666666675e-06, "loss": 0.6734, "step": 14100 }, { "epoch": 1.88, "learning_rate": 7.457777777777778e-06, "loss": 0.652, "step": 14110 }, { "epoch": 1.88, "learning_rate": 7.44888888888889e-06, "loss": 0.668, "step": 14120 }, { "epoch": 1.88, "learning_rate": 7.440000000000001e-06, "loss": 0.7122, "step": 14130 }, { "epoch": 1.89, "learning_rate": 7.431111111111111e-06, "loss": 0.6522, "step": 14140 }, { "epoch": 1.89, "learning_rate": 7.422222222222223e-06, "loss": 0.6663, "step": 14150 }, { "epoch": 1.89, "learning_rate": 7.413333333333333e-06, "loss": 0.6331, "step": 14160 }, { "epoch": 1.89, "learning_rate": 7.4044444444444455e-06, "loss": 0.6312, "step": 14170 }, { "epoch": 1.89, "learning_rate": 7.395555555555556e-06, "loss": 0.7119, "step": 14180 }, { "epoch": 1.89, "learning_rate": 7.386666666666667e-06, "loss": 0.7851, "step": 14190 }, { "epoch": 1.89, "learning_rate": 7.377777777777778e-06, "loss": 0.6033, "step": 14200 } ], "max_steps": 22500, "num_train_epochs": 3, "total_flos": 4.623791925392179e+16, "trial_name": null, "trial_params": null }