{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 2500, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9999999999999996e-07, "loss": 10.5991, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.5e-07, "loss": 10.4404, "step": 20 }, { "epoch": 0.0, "learning_rate": 7e-07, "loss": 10.4079, "step": 30 }, { "epoch": 0.0, "learning_rate": 9.499999999999999e-07, "loss": 10.3433, "step": 40 }, { "epoch": 0.0, "learning_rate": 1.2e-06, "loss": 10.2894, "step": 50 }, { "epoch": 0.0, "learning_rate": 1.4499999999999999e-06, "loss": 10.2258, "step": 60 }, { "epoch": 0.0, "learning_rate": 1.7e-06, "loss": 10.1618, "step": 70 }, { "epoch": 0.0, "learning_rate": 1.9499999999999995e-06, "loss": 10.0841, "step": 80 }, { "epoch": 0.0, "learning_rate": 2.1999999999999997e-06, "loss": 10.0074, "step": 90 }, { "epoch": 0.0, "learning_rate": 2.4499999999999994e-06, "loss": 9.9546, "step": 100 }, { "epoch": 0.0, "learning_rate": 2.6999999999999996e-06, "loss": 9.8959, "step": 110 }, { "epoch": 0.0, "learning_rate": 2.9499999999999997e-06, "loss": 9.8438, "step": 120 }, { "epoch": 0.0, "learning_rate": 3.1999999999999994e-06, "loss": 9.7693, "step": 130 }, { "epoch": 0.0, "learning_rate": 3.4499999999999996e-06, "loss": 9.6931, "step": 140 }, { "epoch": 0.0, "learning_rate": 3.6999999999999997e-06, "loss": 9.6457, "step": 150 }, { "epoch": 0.0, "learning_rate": 3.9499999999999995e-06, "loss": 9.5779, "step": 160 }, { "epoch": 0.0, "learning_rate": 4.2e-06, "loss": 9.5133, "step": 170 }, { "epoch": 0.0, "learning_rate": 4.45e-06, "loss": 9.4415, "step": 180 }, { "epoch": 0.0, "learning_rate": 4.699999999999999e-06, "loss": 9.3703, "step": 190 }, { "epoch": 0.0, "learning_rate": 4.95e-06, "loss": 9.2958, "step": 200 }, { "epoch": 0.0, "learning_rate": 5.199999999999999e-06, "loss": 9.2097, "step": 210 }, { "epoch": 0.0, "learning_rate": 5.4499999999999995e-06, "loss": 9.1562, "step": 220 }, { "epoch": 0.0, "learning_rate": 5.7e-06, "loss": 9.0652, "step": 230 }, { "epoch": 0.0, "learning_rate": 5.95e-06, "loss": 9.0147, "step": 240 }, { "epoch": 0.0, "learning_rate": 6.199999999999999e-06, "loss": 8.917, "step": 250 }, { "epoch": 0.0, "learning_rate": 6.449999999999999e-06, "loss": 8.86, "step": 260 }, { "epoch": 0.0, "learning_rate": 6.699999999999999e-06, "loss": 8.7804, "step": 270 }, { "epoch": 0.0, "learning_rate": 6.949999999999999e-06, "loss": 8.6979, "step": 280 }, { "epoch": 0.0, "learning_rate": 7.2e-06, "loss": 8.6123, "step": 290 }, { "epoch": 0.0, "learning_rate": 7.449999999999999e-06, "loss": 8.5626, "step": 300 }, { "epoch": 0.0, "learning_rate": 7.699999999999999e-06, "loss": 8.4648, "step": 310 }, { "epoch": 0.0, "learning_rate": 7.949999999999998e-06, "loss": 8.4083, "step": 320 }, { "epoch": 0.0, "learning_rate": 8.2e-06, "loss": 8.3222, "step": 330 }, { "epoch": 0.0, "learning_rate": 8.449999999999999e-06, "loss": 8.2542, "step": 340 }, { "epoch": 0.0, "learning_rate": 8.7e-06, "loss": 8.1903, "step": 350 }, { "epoch": 0.0, "learning_rate": 8.949999999999999e-06, "loss": 8.1185, "step": 360 }, { "epoch": 0.0, "learning_rate": 9.199999999999998e-06, "loss": 8.0568, "step": 370 }, { "epoch": 0.0, "learning_rate": 9.45e-06, "loss": 7.9776, "step": 380 }, { "epoch": 0.0, "learning_rate": 9.699999999999999e-06, "loss": 7.9201, "step": 390 }, { "epoch": 0.0, "learning_rate": 9.949999999999998e-06, "loss": 7.84, "step": 400 }, { "epoch": 0.0, "learning_rate": 1.02e-05, "loss": 7.788, "step": 410 }, { "epoch": 0.0, "learning_rate": 1.0425e-05, "loss": 7.708, "step": 420 }, { "epoch": 0.0, "learning_rate": 1.0675e-05, "loss": 7.6345, "step": 430 }, { "epoch": 0.0, "learning_rate": 1.0925e-05, "loss": 7.5975, "step": 440 }, { "epoch": 0.0, "learning_rate": 1.1174999999999999e-05, "loss": 7.5093, "step": 450 }, { "epoch": 0.0, "learning_rate": 1.1424999999999998e-05, "loss": 7.4443, "step": 460 }, { "epoch": 0.0, "learning_rate": 1.1675e-05, "loss": 7.3872, "step": 470 }, { "epoch": 0.0, "learning_rate": 1.1924999999999998e-05, "loss": 7.3158, "step": 480 }, { "epoch": 0.0, "learning_rate": 1.2175e-05, "loss": 7.2681, "step": 490 }, { "epoch": 0.01, "learning_rate": 1.2424999999999999e-05, "loss": 7.2233, "step": 500 }, { "epoch": 0.01, "learning_rate": 1.2675e-05, "loss": 7.1814, "step": 510 }, { "epoch": 0.01, "learning_rate": 1.2924999999999999e-05, "loss": 7.1009, "step": 520 }, { "epoch": 0.01, "learning_rate": 1.3174999999999998e-05, "loss": 7.0456, "step": 530 }, { "epoch": 0.01, "learning_rate": 1.3424999999999998e-05, "loss": 7.0051, "step": 540 }, { "epoch": 0.01, "learning_rate": 1.3674999999999997e-05, "loss": 6.9287, "step": 550 }, { "epoch": 0.01, "learning_rate": 1.3925e-05, "loss": 6.858, "step": 560 }, { "epoch": 0.01, "learning_rate": 1.4174999999999999e-05, "loss": 6.8178, "step": 570 }, { "epoch": 0.01, "learning_rate": 1.4424999999999998e-05, "loss": 6.8199, "step": 580 }, { "epoch": 0.01, "learning_rate": 1.4674999999999998e-05, "loss": 6.7664, "step": 590 }, { "epoch": 0.01, "learning_rate": 1.4925e-05, "loss": 6.6982, "step": 600 }, { "epoch": 0.01, "learning_rate": 1.5175e-05, "loss": 6.6649, "step": 610 }, { "epoch": 0.01, "learning_rate": 1.5425e-05, "loss": 6.6193, "step": 620 }, { "epoch": 0.01, "learning_rate": 1.5674999999999998e-05, "loss": 6.5672, "step": 630 }, { "epoch": 0.01, "learning_rate": 1.5925e-05, "loss": 6.5679, "step": 640 }, { "epoch": 0.01, "learning_rate": 1.6175e-05, "loss": 6.5134, "step": 650 }, { "epoch": 0.01, "learning_rate": 1.6425e-05, "loss": 6.4569, "step": 660 }, { "epoch": 0.01, "learning_rate": 1.6675e-05, "loss": 6.4107, "step": 670 }, { "epoch": 0.01, "learning_rate": 1.6924999999999998e-05, "loss": 6.396, "step": 680 }, { "epoch": 0.01, "learning_rate": 1.7175e-05, "loss": 6.3522, "step": 690 }, { "epoch": 0.01, "learning_rate": 1.74e-05, "loss": 6.3442, "step": 700 }, { "epoch": 0.01, "learning_rate": 1.765e-05, "loss": 6.3664, "step": 710 }, { "epoch": 0.01, "learning_rate": 1.7899999999999998e-05, "loss": 6.2891, "step": 720 }, { "epoch": 0.01, "learning_rate": 1.8149999999999997e-05, "loss": 6.2448, "step": 730 }, { "epoch": 0.01, "learning_rate": 1.8399999999999997e-05, "loss": 6.1858, "step": 740 }, { "epoch": 0.01, "learning_rate": 1.865e-05, "loss": 6.1695, "step": 750 }, { "epoch": 0.01, "learning_rate": 1.89e-05, "loss": 6.1665, "step": 760 }, { "epoch": 0.01, "learning_rate": 1.915e-05, "loss": 6.0991, "step": 770 }, { "epoch": 0.01, "learning_rate": 1.9399999999999997e-05, "loss": 6.08, "step": 780 }, { "epoch": 0.01, "learning_rate": 1.965e-05, "loss": 6.0533, "step": 790 }, { "epoch": 0.01, "learning_rate": 1.9899999999999996e-05, "loss": 6.0263, "step": 800 }, { "epoch": 0.01, "learning_rate": 2.015e-05, "loss": 5.9932, "step": 810 }, { "epoch": 0.01, "learning_rate": 2.04e-05, "loss": 5.962, "step": 820 }, { "epoch": 0.01, "learning_rate": 2.0649999999999997e-05, "loss": 5.9409, "step": 830 }, { "epoch": 0.01, "learning_rate": 2.09e-05, "loss": 5.919, "step": 840 }, { "epoch": 0.01, "learning_rate": 2.1149999999999996e-05, "loss": 5.9305, "step": 850 }, { "epoch": 0.01, "learning_rate": 2.14e-05, "loss": 5.9071, "step": 860 }, { "epoch": 0.01, "learning_rate": 2.1649999999999998e-05, "loss": 5.8489, "step": 870 }, { "epoch": 0.01, "learning_rate": 2.1899999999999997e-05, "loss": 5.8171, "step": 880 }, { "epoch": 0.01, "learning_rate": 2.215e-05, "loss": 5.7521, "step": 890 }, { "epoch": 0.01, "learning_rate": 2.24e-05, "loss": 5.748, "step": 900 }, { "epoch": 0.01, "learning_rate": 2.2649999999999998e-05, "loss": 5.7184, "step": 910 }, { "epoch": 0.01, "learning_rate": 2.2899999999999998e-05, "loss": 5.7007, "step": 920 }, { "epoch": 0.01, "learning_rate": 2.3149999999999997e-05, "loss": 5.7113, "step": 930 }, { "epoch": 0.01, "learning_rate": 2.34e-05, "loss": 5.6782, "step": 940 }, { "epoch": 0.01, "learning_rate": 2.365e-05, "loss": 5.6706, "step": 950 }, { "epoch": 0.01, "learning_rate": 2.3899999999999998e-05, "loss": 5.6403, "step": 960 }, { "epoch": 0.01, "learning_rate": 2.4149999999999997e-05, "loss": 5.6065, "step": 970 }, { "epoch": 0.01, "learning_rate": 2.4399999999999997e-05, "loss": 5.5808, "step": 980 }, { "epoch": 0.01, "learning_rate": 2.4649999999999996e-05, "loss": 5.5483, "step": 990 }, { "epoch": 0.01, "learning_rate": 2.49e-05, "loss": 5.5431, "step": 1000 }, { "epoch": 0.01, "learning_rate": 2.5149999999999998e-05, "loss": 5.5711, "step": 1010 }, { "epoch": 0.01, "learning_rate": 2.5399999999999997e-05, "loss": 5.5249, "step": 1020 }, { "epoch": 0.01, "learning_rate": 2.565e-05, "loss": 5.483, "step": 1030 }, { "epoch": 0.01, "learning_rate": 2.5899999999999996e-05, "loss": 5.4368, "step": 1040 }, { "epoch": 0.01, "learning_rate": 2.615e-05, "loss": 5.4181, "step": 1050 }, { "epoch": 0.01, "learning_rate": 2.6399999999999995e-05, "loss": 5.4194, "step": 1060 }, { "epoch": 0.01, "learning_rate": 2.6649999999999997e-05, "loss": 5.3955, "step": 1070 }, { "epoch": 0.01, "learning_rate": 2.69e-05, "loss": 5.4054, "step": 1080 }, { "epoch": 0.01, "learning_rate": 2.7149999999999996e-05, "loss": 5.356, "step": 1090 }, { "epoch": 0.01, "learning_rate": 2.74e-05, "loss": 5.3373, "step": 1100 }, { "epoch": 0.01, "learning_rate": 2.7649999999999994e-05, "loss": 5.289, "step": 1110 }, { "epoch": 0.01, "learning_rate": 2.7899999999999997e-05, "loss": 5.3229, "step": 1120 }, { "epoch": 0.01, "learning_rate": 2.815e-05, "loss": 5.2827, "step": 1130 }, { "epoch": 0.01, "learning_rate": 2.8399999999999996e-05, "loss": 5.2503, "step": 1140 }, { "epoch": 0.01, "learning_rate": 2.865e-05, "loss": 5.179, "step": 1150 }, { "epoch": 0.01, "learning_rate": 2.89e-05, "loss": 5.1685, "step": 1160 }, { "epoch": 0.01, "learning_rate": 2.9149999999999997e-05, "loss": 5.2519, "step": 1170 }, { "epoch": 0.01, "learning_rate": 2.94e-05, "loss": 5.2074, "step": 1180 }, { "epoch": 0.01, "learning_rate": 2.9649999999999996e-05, "loss": 5.172, "step": 1190 }, { "epoch": 0.01, "learning_rate": 2.99e-05, "loss": 5.1738, "step": 1200 }, { "epoch": 0.01, "learning_rate": 3.0149999999999998e-05, "loss": 5.1721, "step": 1210 }, { "epoch": 0.01, "learning_rate": 3.0399999999999997e-05, "loss": 5.1373, "step": 1220 }, { "epoch": 0.01, "learning_rate": 3.065e-05, "loss": 5.0675, "step": 1230 }, { "epoch": 0.01, "learning_rate": 3.09e-05, "loss": 5.0579, "step": 1240 }, { "epoch": 0.01, "learning_rate": 3.115e-05, "loss": 4.9938, "step": 1250 }, { "epoch": 0.01, "learning_rate": 3.14e-05, "loss": 5.0288, "step": 1260 }, { "epoch": 0.01, "learning_rate": 3.165e-05, "loss": 5.0438, "step": 1270 }, { "epoch": 0.01, "learning_rate": 3.1899999999999996e-05, "loss": 4.9984, "step": 1280 }, { "epoch": 0.01, "learning_rate": 3.2149999999999995e-05, "loss": 5.0142, "step": 1290 }, { "epoch": 0.01, "learning_rate": 3.2399999999999995e-05, "loss": 4.9647, "step": 1300 }, { "epoch": 0.01, "learning_rate": 3.265e-05, "loss": 4.9983, "step": 1310 }, { "epoch": 0.01, "learning_rate": 3.289999999999999e-05, "loss": 4.9675, "step": 1320 }, { "epoch": 0.01, "learning_rate": 3.315e-05, "loss": 4.9787, "step": 1330 }, { "epoch": 0.01, "learning_rate": 3.34e-05, "loss": 4.9638, "step": 1340 }, { "epoch": 0.01, "learning_rate": 3.365e-05, "loss": 4.8791, "step": 1350 }, { "epoch": 0.01, "learning_rate": 3.39e-05, "loss": 4.8835, "step": 1360 }, { "epoch": 0.01, "learning_rate": 3.4149999999999997e-05, "loss": 4.8899, "step": 1370 }, { "epoch": 0.01, "learning_rate": 3.4399999999999996e-05, "loss": 4.8351, "step": 1380 }, { "epoch": 0.01, "learning_rate": 3.465e-05, "loss": 4.8162, "step": 1390 }, { "epoch": 0.01, "learning_rate": 3.4899999999999995e-05, "loss": 4.8588, "step": 1400 }, { "epoch": 0.01, "learning_rate": 3.515e-05, "loss": 4.8019, "step": 1410 }, { "epoch": 0.01, "learning_rate": 3.539999999999999e-05, "loss": 4.8557, "step": 1420 }, { "epoch": 0.01, "learning_rate": 3.565e-05, "loss": 4.8064, "step": 1430 }, { "epoch": 0.01, "learning_rate": 3.59e-05, "loss": 4.7612, "step": 1440 }, { "epoch": 0.01, "learning_rate": 3.615e-05, "loss": 4.7071, "step": 1450 }, { "epoch": 0.01, "learning_rate": 3.64e-05, "loss": 4.727, "step": 1460 }, { "epoch": 0.01, "learning_rate": 3.6649999999999996e-05, "loss": 4.686, "step": 1470 }, { "epoch": 0.01, "learning_rate": 3.6899999999999996e-05, "loss": 4.6832, "step": 1480 }, { "epoch": 0.01, "learning_rate": 3.7149999999999995e-05, "loss": 4.6862, "step": 1490 }, { "epoch": 0.01, "learning_rate": 3.7399999999999994e-05, "loss": 4.6609, "step": 1500 }, { "epoch": 0.02, "learning_rate": 3.7649999999999994e-05, "loss": 4.6903, "step": 1510 }, { "epoch": 0.02, "learning_rate": 3.789999999999999e-05, "loss": 4.6529, "step": 1520 }, { "epoch": 0.02, "learning_rate": 3.815e-05, "loss": 4.6384, "step": 1530 }, { "epoch": 0.02, "learning_rate": 3.84e-05, "loss": 4.6419, "step": 1540 }, { "epoch": 0.02, "learning_rate": 3.865e-05, "loss": 4.5833, "step": 1550 }, { "epoch": 0.02, "learning_rate": 3.8900000000000004e-05, "loss": 4.5477, "step": 1560 }, { "epoch": 0.02, "learning_rate": 3.9149999999999996e-05, "loss": 4.6188, "step": 1570 }, { "epoch": 0.02, "learning_rate": 3.9399999999999995e-05, "loss": 4.5747, "step": 1580 }, { "epoch": 0.02, "learning_rate": 3.9649999999999995e-05, "loss": 4.4917, "step": 1590 }, { "epoch": 0.02, "learning_rate": 3.99e-05, "loss": 4.4842, "step": 1600 }, { "epoch": 0.02, "learning_rate": 4.014999999999999e-05, "loss": 4.5239, "step": 1610 }, { "epoch": 0.02, "learning_rate": 4.039999999999999e-05, "loss": 4.4849, "step": 1620 }, { "epoch": 0.02, "learning_rate": 4.065e-05, "loss": 4.4855, "step": 1630 }, { "epoch": 0.02, "learning_rate": 4.09e-05, "loss": 4.4695, "step": 1640 }, { "epoch": 0.02, "learning_rate": 4.115e-05, "loss": 4.462, "step": 1650 }, { "epoch": 0.02, "learning_rate": 4.14e-05, "loss": 4.4728, "step": 1660 }, { "epoch": 0.02, "learning_rate": 4.1649999999999996e-05, "loss": 4.4426, "step": 1670 }, { "epoch": 0.02, "learning_rate": 4.1899999999999995e-05, "loss": 4.4123, "step": 1680 }, { "epoch": 0.02, "learning_rate": 4.215e-05, "loss": 4.4026, "step": 1690 }, { "epoch": 0.02, "learning_rate": 4.24e-05, "loss": 4.4942, "step": 1700 }, { "epoch": 0.02, "learning_rate": 4.264999999999999e-05, "loss": 4.4393, "step": 1710 }, { "epoch": 0.02, "learning_rate": 4.289999999999999e-05, "loss": 4.3781, "step": 1720 }, { "epoch": 0.02, "learning_rate": 4.315e-05, "loss": 4.36, "step": 1730 }, { "epoch": 0.02, "learning_rate": 4.34e-05, "loss": 4.4202, "step": 1740 }, { "epoch": 0.02, "learning_rate": 4.364999999999999e-05, "loss": 4.4418, "step": 1750 }, { "epoch": 0.02, "learning_rate": 4.3899999999999996e-05, "loss": 4.3932, "step": 1760 }, { "epoch": 0.02, "learning_rate": 4.4149999999999996e-05, "loss": 4.3306, "step": 1770 }, { "epoch": 0.02, "learning_rate": 4.4399999999999995e-05, "loss": 4.2984, "step": 1780 }, { "epoch": 0.02, "learning_rate": 4.465e-05, "loss": 4.2832, "step": 1790 }, { "epoch": 0.02, "learning_rate": 4.49e-05, "loss": 4.2793, "step": 1800 }, { "epoch": 0.02, "learning_rate": 4.514999999999999e-05, "loss": 4.2818, "step": 1810 }, { "epoch": 0.02, "learning_rate": 4.539999999999999e-05, "loss": 4.2509, "step": 1820 }, { "epoch": 0.02, "learning_rate": 4.565e-05, "loss": 4.2453, "step": 1830 }, { "epoch": 0.02, "learning_rate": 4.59e-05, "loss": 4.1921, "step": 1840 }, { "epoch": 0.02, "learning_rate": 4.614999999999999e-05, "loss": 4.2396, "step": 1850 }, { "epoch": 0.02, "learning_rate": 4.6399999999999996e-05, "loss": 4.2299, "step": 1860 }, { "epoch": 0.02, "learning_rate": 4.6649999999999996e-05, "loss": 4.192, "step": 1870 }, { "epoch": 0.02, "learning_rate": 4.6899999999999995e-05, "loss": 4.2031, "step": 1880 }, { "epoch": 0.02, "learning_rate": 4.715e-05, "loss": 4.2628, "step": 1890 }, { "epoch": 0.02, "learning_rate": 4.7399999999999993e-05, "loss": 4.2162, "step": 1900 }, { "epoch": 0.02, "learning_rate": 4.764999999999999e-05, "loss": 4.1488, "step": 1910 }, { "epoch": 0.02, "learning_rate": 4.79e-05, "loss": 4.1782, "step": 1920 }, { "epoch": 0.02, "learning_rate": 4.815e-05, "loss": 4.164, "step": 1930 }, { "epoch": 0.02, "learning_rate": 4.84e-05, "loss": 4.1491, "step": 1940 }, { "epoch": 0.02, "learning_rate": 4.864999999999999e-05, "loss": 4.1645, "step": 1950 }, { "epoch": 0.02, "learning_rate": 4.8899999999999996e-05, "loss": 4.1327, "step": 1960 }, { "epoch": 0.02, "learning_rate": 4.9149999999999995e-05, "loss": 4.1126, "step": 1970 }, { "epoch": 0.02, "learning_rate": 4.9399999999999995e-05, "loss": 4.0764, "step": 1980 }, { "epoch": 0.02, "learning_rate": 4.965e-05, "loss": 4.1517, "step": 1990 }, { "epoch": 0.02, "learning_rate": 4.989999999999999e-05, "loss": 4.1227, "step": 2000 }, { "epoch": 0.02, "learning_rate": 5.014999999999999e-05, "loss": 4.0753, "step": 2010 }, { "epoch": 0.02, "learning_rate": 5.04e-05, "loss": 4.0627, "step": 2020 }, { "epoch": 0.02, "learning_rate": 5.065e-05, "loss": 4.0252, "step": 2030 }, { "epoch": 0.02, "learning_rate": 5.089999999999999e-05, "loss": 4.0361, "step": 2040 }, { "epoch": 0.02, "learning_rate": 5.1149999999999996e-05, "loss": 4.0053, "step": 2050 }, { "epoch": 0.02, "learning_rate": 5.1399999999999996e-05, "loss": 4.0252, "step": 2060 }, { "epoch": 0.02, "learning_rate": 5.1649999999999995e-05, "loss": 4.001, "step": 2070 }, { "epoch": 0.02, "learning_rate": 5.1899999999999994e-05, "loss": 3.9855, "step": 2080 }, { "epoch": 0.02, "learning_rate": 5.215e-05, "loss": 3.9878, "step": 2090 }, { "epoch": 0.02, "learning_rate": 5.239999999999999e-05, "loss": 3.989, "step": 2100 }, { "epoch": 0.02, "learning_rate": 5.264999999999999e-05, "loss": 3.9635, "step": 2110 }, { "epoch": 0.02, "learning_rate": 5.29e-05, "loss": 4.0035, "step": 2120 }, { "epoch": 0.02, "learning_rate": 5.315e-05, "loss": 3.9413, "step": 2130 }, { "epoch": 0.02, "learning_rate": 5.339999999999999e-05, "loss": 3.8845, "step": 2140 }, { "epoch": 0.02, "learning_rate": 5.3649999999999996e-05, "loss": 3.8886, "step": 2150 }, { "epoch": 0.02, "learning_rate": 5.3899999999999996e-05, "loss": 3.9756, "step": 2160 }, { "epoch": 0.02, "learning_rate": 5.4149999999999995e-05, "loss": 3.9235, "step": 2170 }, { "epoch": 0.02, "learning_rate": 5.44e-05, "loss": 3.9015, "step": 2180 }, { "epoch": 0.02, "learning_rate": 5.4649999999999993e-05, "loss": 3.9052, "step": 2190 }, { "epoch": 0.02, "learning_rate": 5.489999999999999e-05, "loss": 3.9044, "step": 2200 }, { "epoch": 0.02, "learning_rate": 5.514999999999999e-05, "loss": 3.8846, "step": 2210 }, { "epoch": 0.02, "learning_rate": 5.54e-05, "loss": 3.902, "step": 2220 }, { "epoch": 0.02, "learning_rate": 5.565e-05, "loss": 3.8615, "step": 2230 }, { "epoch": 0.02, "learning_rate": 5.589999999999999e-05, "loss": 3.8604, "step": 2240 }, { "epoch": 0.02, "learning_rate": 5.6149999999999996e-05, "loss": 3.8167, "step": 2250 }, { "epoch": 0.02, "learning_rate": 5.6399999999999995e-05, "loss": 3.819, "step": 2260 }, { "epoch": 0.02, "learning_rate": 5.6649999999999995e-05, "loss": 3.7915, "step": 2270 }, { "epoch": 0.02, "learning_rate": 5.69e-05, "loss": 3.7781, "step": 2280 }, { "epoch": 0.02, "learning_rate": 5.714999999999999e-05, "loss": 3.7887, "step": 2290 }, { "epoch": 0.02, "learning_rate": 5.739999999999999e-05, "loss": 3.8007, "step": 2300 }, { "epoch": 0.02, "learning_rate": 5.765e-05, "loss": 3.7612, "step": 2310 }, { "epoch": 0.02, "learning_rate": 5.79e-05, "loss": 3.75, "step": 2320 }, { "epoch": 0.02, "learning_rate": 5.814999999999999e-05, "loss": 3.7403, "step": 2330 }, { "epoch": 0.02, "learning_rate": 5.839999999999999e-05, "loss": 3.7463, "step": 2340 }, { "epoch": 0.02, "learning_rate": 5.8649999999999996e-05, "loss": 3.7643, "step": 2350 }, { "epoch": 0.02, "learning_rate": 5.8899999999999995e-05, "loss": 3.7407, "step": 2360 }, { "epoch": 0.02, "learning_rate": 5.9149999999999994e-05, "loss": 3.7601, "step": 2370 }, { "epoch": 0.02, "learning_rate": 5.94e-05, "loss": 3.6923, "step": 2380 }, { "epoch": 0.02, "learning_rate": 5.964999999999999e-05, "loss": 3.7234, "step": 2390 }, { "epoch": 0.02, "learning_rate": 5.989999999999999e-05, "loss": 3.6752, "step": 2400 }, { "epoch": 0.02, "learning_rate": 6.015e-05, "loss": 3.6437, "step": 2410 }, { "epoch": 0.02, "learning_rate": 6.04e-05, "loss": 3.6805, "step": 2420 }, { "epoch": 0.02, "learning_rate": 6.064999999999999e-05, "loss": 3.7198, "step": 2430 }, { "epoch": 0.02, "learning_rate": 6.0899999999999996e-05, "loss": 3.6588, "step": 2440 }, { "epoch": 0.02, "learning_rate": 6.115e-05, "loss": 3.6113, "step": 2450 }, { "epoch": 0.02, "learning_rate": 6.139999999999999e-05, "loss": 3.6444, "step": 2460 }, { "epoch": 0.02, "learning_rate": 6.165e-05, "loss": 3.6489, "step": 2470 }, { "epoch": 0.02, "learning_rate": 6.19e-05, "loss": 3.5805, "step": 2480 }, { "epoch": 0.02, "learning_rate": 6.214999999999999e-05, "loss": 3.5343, "step": 2490 }, { "epoch": 0.03, "learning_rate": 6.239999999999999e-05, "loss": 3.5642, "step": 2500 }, { "epoch": 0.03, "eval_accuracy": 0.4383129848262597, "eval_loss": 3.9296875, "eval_runtime": 97.6812, "eval_samples_per_second": 818.99, "eval_steps_per_second": 1.607, "step": 2500 }, { "epoch": 0.03, "learning_rate": 6.264999999999999e-05, "loss": 3.5807, "step": 2510 }, { "epoch": 0.03, "learning_rate": 6.29e-05, "loss": 3.5057, "step": 2520 }, { "epoch": 0.03, "learning_rate": 6.314999999999999e-05, "loss": 3.5416, "step": 2530 }, { "epoch": 0.03, "learning_rate": 6.34e-05, "loss": 3.5582, "step": 2540 }, { "epoch": 0.03, "learning_rate": 6.365e-05, "loss": 3.5261, "step": 2550 }, { "epoch": 0.03, "learning_rate": 6.39e-05, "loss": 3.5442, "step": 2560 }, { "epoch": 0.03, "learning_rate": 6.414999999999999e-05, "loss": 3.5676, "step": 2570 }, { "epoch": 0.03, "learning_rate": 6.44e-05, "loss": 3.514, "step": 2580 }, { "epoch": 0.03, "learning_rate": 6.465e-05, "loss": 3.5178, "step": 2590 }, { "epoch": 0.03, "learning_rate": 6.489999999999999e-05, "loss": 3.4848, "step": 2600 }, { "epoch": 0.03, "learning_rate": 6.515e-05, "loss": 3.4749, "step": 2610 }, { "epoch": 0.03, "learning_rate": 6.539999999999999e-05, "loss": 3.4313, "step": 2620 }, { "epoch": 0.03, "learning_rate": 6.565e-05, "loss": 3.473, "step": 2630 }, { "epoch": 0.03, "learning_rate": 6.59e-05, "loss": 3.4574, "step": 2640 }, { "epoch": 0.03, "learning_rate": 6.615e-05, "loss": 3.4329, "step": 2650 }, { "epoch": 0.03, "learning_rate": 6.639999999999999e-05, "loss": 3.4348, "step": 2660 }, { "epoch": 0.03, "learning_rate": 6.665e-05, "loss": 3.4441, "step": 2670 }, { "epoch": 0.03, "learning_rate": 6.69e-05, "loss": 3.4589, "step": 2680 }, { "epoch": 0.03, "learning_rate": 6.714999999999999e-05, "loss": 3.4277, "step": 2690 }, { "epoch": 0.03, "learning_rate": 6.739999999999998e-05, "loss": 3.4026, "step": 2700 }, { "epoch": 0.03, "learning_rate": 6.764999999999999e-05, "loss": 3.3649, "step": 2710 }, { "epoch": 0.03, "learning_rate": 6.79e-05, "loss": 3.356, "step": 2720 }, { "epoch": 0.03, "learning_rate": 6.814999999999999e-05, "loss": 3.3409, "step": 2730 }, { "epoch": 0.03, "learning_rate": 6.84e-05, "loss": 3.3401, "step": 2740 }, { "epoch": 0.03, "learning_rate": 6.864999999999999e-05, "loss": 3.3325, "step": 2750 }, { "epoch": 0.03, "learning_rate": 6.89e-05, "loss": 3.3165, "step": 2760 }, { "epoch": 0.03, "learning_rate": 6.915e-05, "loss": 3.3381, "step": 2770 }, { "epoch": 0.03, "learning_rate": 6.939999999999999e-05, "loss": 3.3134, "step": 2780 }, { "epoch": 0.03, "learning_rate": 6.964999999999999e-05, "loss": 3.3009, "step": 2790 }, { "epoch": 0.03, "learning_rate": 6.989999999999999e-05, "loss": 3.289, "step": 2800 }, { "epoch": 0.03, "learning_rate": 7.015e-05, "loss": 3.2767, "step": 2810 }, { "epoch": 0.03, "learning_rate": 7.039999999999999e-05, "loss": 3.2677, "step": 2820 }, { "epoch": 0.03, "learning_rate": 7.065e-05, "loss": 3.2375, "step": 2830 }, { "epoch": 0.03, "learning_rate": 7.09e-05, "loss": 3.2508, "step": 2840 }, { "epoch": 0.03, "learning_rate": 7.115e-05, "loss": 3.2772, "step": 2850 }, { "epoch": 0.03, "learning_rate": 7.139999999999999e-05, "loss": 3.1986, "step": 2860 }, { "epoch": 0.03, "learning_rate": 7.165e-05, "loss": 3.2352, "step": 2870 }, { "epoch": 0.03, "learning_rate": 7.19e-05, "loss": 3.2138, "step": 2880 }, { "epoch": 0.03, "learning_rate": 7.214999999999999e-05, "loss": 3.207, "step": 2890 }, { "epoch": 0.03, "learning_rate": 7.24e-05, "loss": 3.189, "step": 2900 }, { "epoch": 0.03, "learning_rate": 7.264999999999999e-05, "loss": 3.129, "step": 2910 }, { "epoch": 0.03, "learning_rate": 7.29e-05, "loss": 3.149, "step": 2920 }, { "epoch": 0.03, "learning_rate": 7.315e-05, "loss": 3.167, "step": 2930 }, { "epoch": 0.03, "learning_rate": 7.34e-05, "loss": 3.1163, "step": 2940 }, { "epoch": 0.03, "learning_rate": 7.364999999999999e-05, "loss": 3.1226, "step": 2950 }, { "epoch": 0.03, "learning_rate": 7.39e-05, "loss": 3.08, "step": 2960 }, { "epoch": 0.03, "learning_rate": 7.415e-05, "loss": 3.1136, "step": 2970 }, { "epoch": 0.03, "learning_rate": 7.439999999999999e-05, "loss": 3.0943, "step": 2980 }, { "epoch": 0.03, "learning_rate": 7.464999999999998e-05, "loss": 3.0379, "step": 2990 }, { "epoch": 0.03, "learning_rate": 7.489999999999999e-05, "loss": 3.0346, "step": 3000 }, { "epoch": 0.03, "learning_rate": 7.515e-05, "loss": 3.0365, "step": 3010 }, { "epoch": 0.03, "learning_rate": 7.54e-05, "loss": 3.0473, "step": 3020 }, { "epoch": 0.03, "learning_rate": 7.564999999999998e-05, "loss": 3.0086, "step": 3030 }, { "epoch": 0.03, "learning_rate": 7.589999999999999e-05, "loss": 3.0474, "step": 3040 }, { "epoch": 0.03, "learning_rate": 7.615e-05, "loss": 3.0107, "step": 3050 }, { "epoch": 0.03, "learning_rate": 7.639999999999999e-05, "loss": 2.9755, "step": 3060 }, { "epoch": 0.03, "learning_rate": 7.664999999999999e-05, "loss": 2.9917, "step": 3070 }, { "epoch": 0.03, "learning_rate": 7.69e-05, "loss": 2.952, "step": 3080 }, { "epoch": 0.03, "learning_rate": 7.714999999999999e-05, "loss": 2.9553, "step": 3090 }, { "epoch": 0.03, "learning_rate": 7.74e-05, "loss": 2.9484, "step": 3100 }, { "epoch": 0.03, "learning_rate": 7.765e-05, "loss": 3.0178, "step": 3110 }, { "epoch": 0.03, "learning_rate": 7.79e-05, "loss": 2.9621, "step": 3120 }, { "epoch": 0.03, "learning_rate": 7.815e-05, "loss": 2.9268, "step": 3130 }, { "epoch": 0.03, "learning_rate": 7.839999999999998e-05, "loss": 2.9683, "step": 3140 }, { "epoch": 0.03, "learning_rate": 7.864999999999999e-05, "loss": 2.9026, "step": 3150 }, { "epoch": 0.03, "learning_rate": 7.89e-05, "loss": 2.8892, "step": 3160 }, { "epoch": 0.03, "learning_rate": 7.914999999999999e-05, "loss": 2.8845, "step": 3170 }, { "epoch": 0.03, "learning_rate": 7.939999999999999e-05, "loss": 2.8826, "step": 3180 }, { "epoch": 0.03, "learning_rate": 7.965e-05, "loss": 2.8888, "step": 3190 }, { "epoch": 0.03, "learning_rate": 7.989999999999999e-05, "loss": 2.8068, "step": 3200 }, { "epoch": 0.03, "learning_rate": 8.015e-05, "loss": 2.8483, "step": 3210 }, { "epoch": 0.03, "learning_rate": 8.04e-05, "loss": 2.8422, "step": 3220 }, { "epoch": 0.03, "learning_rate": 8.064999999999998e-05, "loss": 2.8175, "step": 3230 }, { "epoch": 0.03, "learning_rate": 8.089999999999999e-05, "loss": 2.8425, "step": 3240 }, { "epoch": 0.03, "learning_rate": 8.115e-05, "loss": 2.826, "step": 3250 }, { "epoch": 0.03, "learning_rate": 8.139999999999999e-05, "loss": 2.7938, "step": 3260 }, { "epoch": 0.03, "learning_rate": 8.164999999999999e-05, "loss": 2.7908, "step": 3270 }, { "epoch": 0.03, "learning_rate": 8.19e-05, "loss": 2.7734, "step": 3280 }, { "epoch": 0.03, "learning_rate": 8.214999999999999e-05, "loss": 2.8051, "step": 3290 }, { "epoch": 0.03, "learning_rate": 8.24e-05, "loss": 2.7474, "step": 3300 }, { "epoch": 0.03, "learning_rate": 8.265e-05, "loss": 2.7743, "step": 3310 }, { "epoch": 0.03, "learning_rate": 8.289999999999998e-05, "loss": 2.7415, "step": 3320 }, { "epoch": 0.03, "learning_rate": 8.314999999999999e-05, "loss": 2.7359, "step": 3330 }, { "epoch": 0.03, "learning_rate": 8.34e-05, "loss": 2.7306, "step": 3340 }, { "epoch": 0.03, "learning_rate": 8.364999999999999e-05, "loss": 2.7382, "step": 3350 }, { "epoch": 0.03, "learning_rate": 8.389999999999999e-05, "loss": 2.7887, "step": 3360 }, { "epoch": 0.03, "learning_rate": 8.415e-05, "loss": 2.7417, "step": 3370 }, { "epoch": 0.03, "learning_rate": 8.439999999999999e-05, "loss": 2.7036, "step": 3380 }, { "epoch": 0.03, "learning_rate": 8.465e-05, "loss": 2.6724, "step": 3390 }, { "epoch": 0.03, "learning_rate": 8.489999999999999e-05, "loss": 2.6508, "step": 3400 }, { "epoch": 0.03, "learning_rate": 8.515e-05, "loss": 2.6735, "step": 3410 }, { "epoch": 0.03, "learning_rate": 8.54e-05, "loss": 2.652, "step": 3420 }, { "epoch": 0.03, "learning_rate": 8.564999999999998e-05, "loss": 2.6687, "step": 3430 }, { "epoch": 0.03, "learning_rate": 8.589999999999999e-05, "loss": 2.6581, "step": 3440 }, { "epoch": 0.03, "learning_rate": 8.615e-05, "loss": 2.6346, "step": 3450 }, { "epoch": 0.03, "learning_rate": 8.639999999999999e-05, "loss": 2.6429, "step": 3460 }, { "epoch": 0.03, "learning_rate": 8.664999999999999e-05, "loss": 2.6137, "step": 3470 }, { "epoch": 0.03, "learning_rate": 8.69e-05, "loss": 2.6249, "step": 3480 }, { "epoch": 0.03, "learning_rate": 8.714999999999999e-05, "loss": 2.6009, "step": 3490 }, { "epoch": 0.04, "learning_rate": 8.74e-05, "loss": 2.6004, "step": 3500 }, { "epoch": 0.04, "learning_rate": 8.765e-05, "loss": 2.5755, "step": 3510 }, { "epoch": 0.04, "learning_rate": 8.789999999999998e-05, "loss": 2.5508, "step": 3520 }, { "epoch": 0.04, "learning_rate": 8.814999999999999e-05, "loss": 2.5676, "step": 3530 }, { "epoch": 0.04, "learning_rate": 8.84e-05, "loss": 2.5596, "step": 3540 }, { "epoch": 0.04, "learning_rate": 8.864999999999999e-05, "loss": 2.5754, "step": 3550 }, { "epoch": 0.04, "learning_rate": 8.889999999999999e-05, "loss": 2.5696, "step": 3560 }, { "epoch": 0.04, "learning_rate": 8.915e-05, "loss": 2.542, "step": 3570 }, { "epoch": 0.04, "learning_rate": 8.939999999999999e-05, "loss": 2.5561, "step": 3580 }, { "epoch": 0.04, "learning_rate": 8.965e-05, "loss": 2.5059, "step": 3590 }, { "epoch": 0.04, "learning_rate": 8.99e-05, "loss": 2.5288, "step": 3600 }, { "epoch": 0.04, "learning_rate": 9.014999999999998e-05, "loss": 2.5039, "step": 3610 }, { "epoch": 0.04, "learning_rate": 9.039999999999999e-05, "loss": 2.5243, "step": 3620 }, { "epoch": 0.04, "learning_rate": 9.064999999999998e-05, "loss": 2.4772, "step": 3630 }, { "epoch": 0.04, "learning_rate": 9.089999999999999e-05, "loss": 2.4543, "step": 3640 }, { "epoch": 0.04, "learning_rate": 9.114999999999999e-05, "loss": 2.4743, "step": 3650 }, { "epoch": 0.04, "learning_rate": 9.139999999999999e-05, "loss": 2.488, "step": 3660 }, { "epoch": 0.04, "learning_rate": 9.164999999999999e-05, "loss": 2.4918, "step": 3670 }, { "epoch": 0.04, "learning_rate": 9.19e-05, "loss": 2.4866, "step": 3680 }, { "epoch": 0.04, "learning_rate": 9.214999999999999e-05, "loss": 2.4649, "step": 3690 }, { "epoch": 0.04, "learning_rate": 9.24e-05, "loss": 2.4613, "step": 3700 }, { "epoch": 0.04, "learning_rate": 9.265e-05, "loss": 2.4337, "step": 3710 }, { "epoch": 0.04, "learning_rate": 9.289999999999998e-05, "loss": 2.4073, "step": 3720 }, { "epoch": 0.04, "learning_rate": 9.314999999999999e-05, "loss": 2.4314, "step": 3730 }, { "epoch": 0.04, "learning_rate": 9.34e-05, "loss": 2.4311, "step": 3740 }, { "epoch": 0.04, "learning_rate": 9.364999999999999e-05, "loss": 2.4553, "step": 3750 }, { "epoch": 0.04, "learning_rate": 9.389999999999999e-05, "loss": 2.4242, "step": 3760 }, { "epoch": 0.04, "learning_rate": 9.415e-05, "loss": 2.3964, "step": 3770 }, { "epoch": 0.04, "learning_rate": 9.439999999999999e-05, "loss": 2.3865, "step": 3780 }, { "epoch": 0.04, "learning_rate": 9.465e-05, "loss": 2.4134, "step": 3790 }, { "epoch": 0.04, "learning_rate": 9.49e-05, "loss": 2.3987, "step": 3800 }, { "epoch": 0.04, "learning_rate": 9.514999999999998e-05, "loss": 2.3879, "step": 3810 }, { "epoch": 0.04, "learning_rate": 9.539999999999999e-05, "loss": 2.3892, "step": 3820 }, { "epoch": 0.04, "learning_rate": 9.565e-05, "loss": 2.3583, "step": 3830 }, { "epoch": 0.04, "learning_rate": 9.589999999999999e-05, "loss": 2.3354, "step": 3840 }, { "epoch": 0.04, "learning_rate": 9.614999999999999e-05, "loss": 2.3411, "step": 3850 }, { "epoch": 0.04, "learning_rate": 9.64e-05, "loss": 2.3319, "step": 3860 }, { "epoch": 0.04, "learning_rate": 9.664999999999999e-05, "loss": 2.339, "step": 3870 }, { "epoch": 0.04, "learning_rate": 9.69e-05, "loss": 2.3017, "step": 3880 }, { "epoch": 0.04, "learning_rate": 9.714999999999999e-05, "loss": 2.3514, "step": 3890 }, { "epoch": 0.04, "learning_rate": 9.74e-05, "loss": 2.3212, "step": 3900 }, { "epoch": 0.04, "learning_rate": 9.764999999999999e-05, "loss": 2.3247, "step": 3910 }, { "epoch": 0.04, "learning_rate": 9.789999999999998e-05, "loss": 2.3187, "step": 3920 }, { "epoch": 0.04, "learning_rate": 9.814999999999999e-05, "loss": 2.302, "step": 3930 }, { "epoch": 0.04, "learning_rate": 9.839999999999999e-05, "loss": 2.3278, "step": 3940 }, { "epoch": 0.04, "learning_rate": 9.864999999999999e-05, "loss": 2.321, "step": 3950 }, { "epoch": 0.04, "learning_rate": 9.889999999999999e-05, "loss": 2.2785, "step": 3960 }, { "epoch": 0.04, "learning_rate": 9.915e-05, "loss": 2.2715, "step": 3970 }, { "epoch": 0.04, "learning_rate": 9.939999999999999e-05, "loss": 2.2599, "step": 3980 }, { "epoch": 0.04, "learning_rate": 9.965e-05, "loss": 2.2777, "step": 3990 }, { "epoch": 0.04, "learning_rate": 9.99e-05, "loss": 2.2513, "step": 4000 }, { "epoch": 0.04, "learning_rate": 0.00010014999999999998, "loss": 2.2255, "step": 4010 }, { "epoch": 0.04, "learning_rate": 0.00010039999999999999, "loss": 2.2582, "step": 4020 }, { "epoch": 0.04, "learning_rate": 0.00010065, "loss": 2.2871, "step": 4030 }, { "epoch": 0.04, "learning_rate": 0.00010089999999999999, "loss": 2.2631, "step": 4040 }, { "epoch": 0.04, "learning_rate": 0.00010114999999999999, "loss": 2.2364, "step": 4050 }, { "epoch": 0.04, "learning_rate": 0.0001014, "loss": 2.2495, "step": 4060 }, { "epoch": 0.04, "learning_rate": 0.00010164999999999999, "loss": 2.218, "step": 4070 }, { "epoch": 0.04, "learning_rate": 0.0001019, "loss": 2.2098, "step": 4080 }, { "epoch": 0.04, "learning_rate": 0.00010215, "loss": 2.2088, "step": 4090 }, { "epoch": 0.04, "learning_rate": 0.00010239999999999998, "loss": 2.1903, "step": 4100 }, { "epoch": 0.04, "learning_rate": 0.00010264999999999999, "loss": 2.2031, "step": 4110 }, { "epoch": 0.04, "learning_rate": 0.0001029, "loss": 2.1972, "step": 4120 }, { "epoch": 0.04, "learning_rate": 0.00010314999999999999, "loss": 2.1614, "step": 4130 }, { "epoch": 0.04, "learning_rate": 0.00010339999999999999, "loss": 2.1714, "step": 4140 }, { "epoch": 0.04, "learning_rate": 0.00010364999999999999, "loss": 2.1924, "step": 4150 }, { "epoch": 0.04, "learning_rate": 0.00010389999999999999, "loss": 2.1667, "step": 4160 }, { "epoch": 0.04, "learning_rate": 0.00010415, "loss": 2.1696, "step": 4170 }, { "epoch": 0.04, "learning_rate": 0.00010439999999999999, "loss": 2.1879, "step": 4180 }, { "epoch": 0.04, "learning_rate": 0.00010465, "loss": 2.1699, "step": 4190 }, { "epoch": 0.04, "learning_rate": 0.0001049, "loss": 2.1627, "step": 4200 }, { "epoch": 0.04, "learning_rate": 0.00010514999999999998, "loss": 2.1355, "step": 4210 }, { "epoch": 0.04, "learning_rate": 0.00010539999999999999, "loss": 2.1718, "step": 4220 }, { "epoch": 0.04, "learning_rate": 0.00010564999999999999, "loss": 2.1131, "step": 4230 }, { "epoch": 0.04, "learning_rate": 0.00010589999999999999, "loss": 2.099, "step": 4240 }, { "epoch": 0.04, "learning_rate": 0.00010614999999999999, "loss": 2.138, "step": 4250 }, { "epoch": 0.04, "learning_rate": 0.0001064, "loss": 2.1346, "step": 4260 }, { "epoch": 0.04, "learning_rate": 0.00010664999999999999, "loss": 2.1164, "step": 4270 }, { "epoch": 0.04, "learning_rate": 0.0001069, "loss": 2.1027, "step": 4280 }, { "epoch": 0.04, "learning_rate": 0.00010715, "loss": 2.1362, "step": 4290 }, { "epoch": 0.04, "learning_rate": 0.00010739999999999998, "loss": 2.1061, "step": 4300 }, { "epoch": 0.04, "learning_rate": 0.00010764999999999999, "loss": 2.097, "step": 4310 }, { "epoch": 0.04, "learning_rate": 0.0001079, "loss": 2.0938, "step": 4320 }, { "epoch": 0.04, "learning_rate": 0.00010814999999999999, "loss": 2.081, "step": 4330 }, { "epoch": 0.04, "learning_rate": 0.00010839999999999999, "loss": 2.0956, "step": 4340 }, { "epoch": 0.04, "learning_rate": 0.00010865, "loss": 2.0812, "step": 4350 }, { "epoch": 0.04, "learning_rate": 0.00010889999999999999, "loss": 2.0977, "step": 4360 }, { "epoch": 0.04, "learning_rate": 0.00010915, "loss": 2.0734, "step": 4370 }, { "epoch": 0.04, "learning_rate": 0.00010939999999999998, "loss": 2.0834, "step": 4380 }, { "epoch": 0.04, "learning_rate": 0.00010964999999999998, "loss": 2.0577, "step": 4390 }, { "epoch": 0.04, "learning_rate": 0.00010989999999999999, "loss": 2.0629, "step": 4400 }, { "epoch": 0.04, "learning_rate": 0.00011014999999999998, "loss": 2.0315, "step": 4410 }, { "epoch": 0.04, "learning_rate": 0.00011039999999999999, "loss": 2.0288, "step": 4420 }, { "epoch": 0.04, "learning_rate": 0.00011064999999999999, "loss": 2.0043, "step": 4430 }, { "epoch": 0.04, "learning_rate": 0.00011089999999999999, "loss": 2.0263, "step": 4440 }, { "epoch": 0.04, "learning_rate": 0.00011114999999999999, "loss": 2.0275, "step": 4450 }, { "epoch": 0.04, "learning_rate": 0.0001114, "loss": 2.0508, "step": 4460 }, { "epoch": 0.04, "learning_rate": 0.00011164999999999999, "loss": 2.0482, "step": 4470 }, { "epoch": 0.04, "learning_rate": 0.0001119, "loss": 2.0245, "step": 4480 }, { "epoch": 0.04, "learning_rate": 0.00011215, "loss": 2.0276, "step": 4490 }, { "epoch": 0.04, "learning_rate": 0.00011239999999999998, "loss": 2.0157, "step": 4500 }, { "epoch": 0.05, "learning_rate": 0.00011264999999999999, "loss": 2.0158, "step": 4510 }, { "epoch": 0.05, "learning_rate": 0.00011289999999999999, "loss": 1.9956, "step": 4520 }, { "epoch": 0.05, "learning_rate": 0.00011314999999999999, "loss": 2.0053, "step": 4530 }, { "epoch": 0.05, "learning_rate": 0.00011339999999999999, "loss": 1.9749, "step": 4540 }, { "epoch": 0.05, "learning_rate": 0.00011365, "loss": 1.9494, "step": 4550 }, { "epoch": 0.05, "learning_rate": 0.00011389999999999999, "loss": 1.9653, "step": 4560 }, { "epoch": 0.05, "learning_rate": 0.00011415, "loss": 1.9768, "step": 4570 }, { "epoch": 0.05, "learning_rate": 0.0001144, "loss": 1.9863, "step": 4580 }, { "epoch": 0.05, "learning_rate": 0.00011464999999999998, "loss": 1.9441, "step": 4590 }, { "epoch": 0.05, "learning_rate": 0.00011489999999999999, "loss": 1.9764, "step": 4600 }, { "epoch": 0.05, "learning_rate": 0.00011515, "loss": 1.9818, "step": 4610 }, { "epoch": 0.05, "learning_rate": 0.00011539999999999999, "loss": 1.9608, "step": 4620 }, { "epoch": 0.05, "learning_rate": 0.00011564999999999999, "loss": 1.9741, "step": 4630 }, { "epoch": 0.05, "learning_rate": 0.00011589999999999998, "loss": 1.9437, "step": 4640 }, { "epoch": 0.05, "learning_rate": 0.00011614999999999999, "loss": 1.9227, "step": 4650 }, { "epoch": 0.05, "learning_rate": 0.0001164, "loss": 1.9289, "step": 4660 }, { "epoch": 0.05, "learning_rate": 0.00011664999999999998, "loss": 1.9195, "step": 4670 }, { "epoch": 0.05, "learning_rate": 0.00011689999999999998, "loss": 1.9182, "step": 4680 }, { "epoch": 0.05, "learning_rate": 0.00011714999999999999, "loss": 1.8692, "step": 4690 }, { "epoch": 0.05, "learning_rate": 0.00011739999999999998, "loss": 1.909, "step": 4700 }, { "epoch": 0.05, "learning_rate": 0.00011762499999999999, "loss": 1.931, "step": 4710 }, { "epoch": 0.05, "learning_rate": 0.00011787499999999999, "loss": 1.8903, "step": 4720 }, { "epoch": 0.05, "learning_rate": 0.00011812499999999998, "loss": 1.9148, "step": 4730 }, { "epoch": 0.05, "learning_rate": 0.00011837499999999999, "loss": 1.9032, "step": 4740 }, { "epoch": 0.05, "learning_rate": 0.000118625, "loss": 1.8977, "step": 4750 }, { "epoch": 0.05, "learning_rate": 0.00011887499999999999, "loss": 1.8749, "step": 4760 }, { "epoch": 0.05, "learning_rate": 0.000119125, "loss": 1.8801, "step": 4770 }, { "epoch": 0.05, "learning_rate": 0.00011937499999999999, "loss": 1.8636, "step": 4780 }, { "epoch": 0.05, "learning_rate": 0.000119625, "loss": 1.8592, "step": 4790 }, { "epoch": 0.05, "learning_rate": 0.000119875, "loss": 1.8598, "step": 4800 }, { "epoch": 0.05, "learning_rate": 0.00012012499999999998, "loss": 1.8642, "step": 4810 }, { "epoch": 0.05, "learning_rate": 0.00012037499999999999, "loss": 1.8885, "step": 4820 }, { "epoch": 0.05, "learning_rate": 0.00012062499999999999, "loss": 1.8736, "step": 4830 }, { "epoch": 0.05, "learning_rate": 0.00012087499999999998, "loss": 1.8383, "step": 4840 }, { "epoch": 0.05, "learning_rate": 0.00012112499999999999, "loss": 1.861, "step": 4850 }, { "epoch": 0.05, "learning_rate": 0.000121375, "loss": 1.8416, "step": 4860 }, { "epoch": 0.05, "learning_rate": 0.00012162499999999999, "loss": 1.8399, "step": 4870 }, { "epoch": 0.05, "learning_rate": 0.000121875, "loss": 1.8497, "step": 4880 }, { "epoch": 0.05, "learning_rate": 0.000122125, "loss": 1.8374, "step": 4890 }, { "epoch": 0.05, "learning_rate": 0.00012237499999999998, "loss": 1.8468, "step": 4900 }, { "epoch": 0.05, "learning_rate": 0.00012262499999999999, "loss": 1.814, "step": 4910 }, { "epoch": 0.05, "learning_rate": 0.000122875, "loss": 1.8219, "step": 4920 }, { "epoch": 0.05, "learning_rate": 0.000123125, "loss": 1.8261, "step": 4930 }, { "epoch": 0.05, "learning_rate": 0.000123375, "loss": 1.8272, "step": 4940 }, { "epoch": 0.05, "learning_rate": 0.00012362499999999998, "loss": 1.822, "step": 4950 }, { "epoch": 0.05, "learning_rate": 0.000123875, "loss": 1.8071, "step": 4960 }, { "epoch": 0.05, "learning_rate": 0.000124125, "loss": 1.825, "step": 4970 }, { "epoch": 0.05, "learning_rate": 0.000124375, "loss": 1.8145, "step": 4980 }, { "epoch": 0.05, "learning_rate": 0.00012462499999999998, "loss": 1.8154, "step": 4990 }, { "epoch": 0.05, "learning_rate": 0.00012487499999999999, "loss": 1.7897, "step": 5000 }, { "epoch": 0.05, "eval_accuracy": 0.6654779469588099, "eval_loss": 2.021484375, "eval_runtime": 98.4067, "eval_samples_per_second": 812.953, "eval_steps_per_second": 1.595, "step": 5000 }, { "epoch": 0.05, "learning_rate": 0.000125125, "loss": 1.7843, "step": 5010 }, { "epoch": 0.05, "learning_rate": 0.000125375, "loss": 1.7975, "step": 5020 }, { "epoch": 0.05, "learning_rate": 0.000125625, "loss": 1.7791, "step": 5030 }, { "epoch": 0.05, "learning_rate": 0.00012587499999999998, "loss": 1.8006, "step": 5040 }, { "epoch": 0.05, "learning_rate": 0.000126125, "loss": 1.7863, "step": 5050 }, { "epoch": 0.05, "learning_rate": 0.000126375, "loss": 1.7956, "step": 5060 }, { "epoch": 0.05, "learning_rate": 0.00012662499999999997, "loss": 1.7842, "step": 5070 }, { "epoch": 0.05, "learning_rate": 0.00012687499999999998, "loss": 1.7654, "step": 5080 }, { "epoch": 0.05, "learning_rate": 0.000127125, "loss": 1.7536, "step": 5090 }, { "epoch": 0.05, "learning_rate": 0.000127375, "loss": 1.7645, "step": 5100 }, { "epoch": 0.05, "learning_rate": 0.000127625, "loss": 1.7458, "step": 5110 }, { "epoch": 0.05, "learning_rate": 0.000127875, "loss": 1.7508, "step": 5120 }, { "epoch": 0.05, "learning_rate": 0.00012812499999999998, "loss": 1.7588, "step": 5130 }, { "epoch": 0.05, "learning_rate": 0.000128375, "loss": 1.7645, "step": 5140 }, { "epoch": 0.05, "learning_rate": 0.000128625, "loss": 1.7593, "step": 5150 }, { "epoch": 0.05, "learning_rate": 0.00012887499999999997, "loss": 1.7307, "step": 5160 }, { "epoch": 0.05, "learning_rate": 0.00012912499999999998, "loss": 1.7578, "step": 5170 }, { "epoch": 0.05, "learning_rate": 0.000129375, "loss": 1.7509, "step": 5180 }, { "epoch": 0.05, "learning_rate": 0.000129625, "loss": 1.7421, "step": 5190 }, { "epoch": 0.05, "learning_rate": 0.000129875, "loss": 1.7378, "step": 5200 }, { "epoch": 0.05, "learning_rate": 0.000130125, "loss": 1.7253, "step": 5210 }, { "epoch": 0.05, "learning_rate": 0.00013037499999999998, "loss": 1.7089, "step": 5220 }, { "epoch": 0.05, "learning_rate": 0.000130625, "loss": 1.6957, "step": 5230 }, { "epoch": 0.05, "learning_rate": 0.000130875, "loss": 1.6984, "step": 5240 }, { "epoch": 0.05, "learning_rate": 0.00013112499999999998, "loss": 1.7067, "step": 5250 }, { "epoch": 0.05, "learning_rate": 0.00013137499999999998, "loss": 1.7153, "step": 5260 }, { "epoch": 0.05, "learning_rate": 0.000131625, "loss": 1.7003, "step": 5270 }, { "epoch": 0.05, "learning_rate": 0.000131875, "loss": 1.7009, "step": 5280 }, { "epoch": 0.05, "learning_rate": 0.000132125, "loss": 1.7018, "step": 5290 }, { "epoch": 0.05, "learning_rate": 0.00013237499999999998, "loss": 1.7049, "step": 5300 }, { "epoch": 0.05, "learning_rate": 0.00013262499999999998, "loss": 1.6905, "step": 5310 }, { "epoch": 0.05, "learning_rate": 0.000132875, "loss": 1.6874, "step": 5320 }, { "epoch": 0.05, "learning_rate": 0.00013312499999999997, "loss": 1.6798, "step": 5330 }, { "epoch": 0.05, "learning_rate": 0.00013337499999999998, "loss": 1.681, "step": 5340 }, { "epoch": 0.05, "learning_rate": 0.00013362499999999998, "loss": 1.6508, "step": 5350 }, { "epoch": 0.05, "learning_rate": 0.000133875, "loss": 1.6663, "step": 5360 }, { "epoch": 0.05, "learning_rate": 0.000134125, "loss": 1.6725, "step": 5370 }, { "epoch": 0.05, "learning_rate": 0.000134375, "loss": 1.6529, "step": 5380 }, { "epoch": 0.05, "learning_rate": 0.00013462499999999998, "loss": 1.6667, "step": 5390 }, { "epoch": 0.05, "learning_rate": 0.00013487499999999999, "loss": 1.6653, "step": 5400 }, { "epoch": 0.05, "learning_rate": 0.000135125, "loss": 1.6569, "step": 5410 }, { "epoch": 0.05, "learning_rate": 0.00013537499999999997, "loss": 1.667, "step": 5420 }, { "epoch": 0.05, "learning_rate": 0.00013562499999999998, "loss": 1.6484, "step": 5430 }, { "epoch": 0.05, "learning_rate": 0.00013587499999999998, "loss": 1.621, "step": 5440 }, { "epoch": 0.05, "learning_rate": 0.000136125, "loss": 1.6436, "step": 5450 }, { "epoch": 0.05, "learning_rate": 0.000136375, "loss": 1.633, "step": 5460 }, { "epoch": 0.05, "learning_rate": 0.000136625, "loss": 1.6295, "step": 5470 }, { "epoch": 0.05, "learning_rate": 0.00013687499999999998, "loss": 1.628, "step": 5480 }, { "epoch": 0.05, "learning_rate": 0.00013712499999999999, "loss": 1.6232, "step": 5490 }, { "epoch": 0.06, "learning_rate": 0.000137375, "loss": 1.6255, "step": 5500 }, { "epoch": 0.06, "learning_rate": 0.000137625, "loss": 1.6432, "step": 5510 }, { "epoch": 0.06, "learning_rate": 0.000137875, "loss": 1.6279, "step": 5520 }, { "epoch": 0.06, "learning_rate": 0.00013812499999999998, "loss": 1.6173, "step": 5530 }, { "epoch": 0.06, "learning_rate": 0.000138375, "loss": 1.614, "step": 5540 }, { "epoch": 0.06, "learning_rate": 0.000138625, "loss": 1.613, "step": 5550 }, { "epoch": 0.06, "learning_rate": 0.00013887499999999997, "loss": 1.6166, "step": 5560 }, { "epoch": 0.06, "learning_rate": 0.00013912499999999998, "loss": 1.6158, "step": 5570 }, { "epoch": 0.06, "learning_rate": 0.00013937499999999999, "loss": 1.6133, "step": 5580 }, { "epoch": 0.06, "learning_rate": 0.000139625, "loss": 1.5946, "step": 5590 }, { "epoch": 0.06, "learning_rate": 0.000139875, "loss": 1.6178, "step": 5600 }, { "epoch": 0.06, "learning_rate": 0.000140125, "loss": 1.6214, "step": 5610 }, { "epoch": 0.06, "learning_rate": 0.00014037499999999998, "loss": 1.6187, "step": 5620 }, { "epoch": 0.06, "learning_rate": 0.000140625, "loss": 1.6119, "step": 5630 }, { "epoch": 0.06, "learning_rate": 0.000140875, "loss": 1.5932, "step": 5640 }, { "epoch": 0.06, "learning_rate": 0.00014112499999999997, "loss": 1.6099, "step": 5650 }, { "epoch": 0.06, "learning_rate": 0.00014137499999999998, "loss": 1.5841, "step": 5660 }, { "epoch": 0.06, "learning_rate": 0.000141625, "loss": 1.5941, "step": 5670 }, { "epoch": 0.06, "learning_rate": 0.000141875, "loss": 1.5714, "step": 5680 }, { "epoch": 0.06, "learning_rate": 0.000142125, "loss": 1.5596, "step": 5690 }, { "epoch": 0.06, "learning_rate": 0.000142375, "loss": 1.5825, "step": 5700 }, { "epoch": 0.06, "learning_rate": 0.0001426, "loss": 1.5837, "step": 5710 }, { "epoch": 0.06, "learning_rate": 0.00014285, "loss": 1.5929, "step": 5720 }, { "epoch": 0.06, "learning_rate": 0.00014309999999999998, "loss": 1.5859, "step": 5730 }, { "epoch": 0.06, "learning_rate": 0.00014335, "loss": 1.5909, "step": 5740 }, { "epoch": 0.06, "learning_rate": 0.0001436, "loss": 1.5944, "step": 5750 }, { "epoch": 0.06, "learning_rate": 0.00014384999999999997, "loss": 1.5592, "step": 5760 }, { "epoch": 0.06, "learning_rate": 0.00014409999999999998, "loss": 1.5771, "step": 5770 }, { "epoch": 0.06, "learning_rate": 0.00014434999999999999, "loss": 1.5739, "step": 5780 }, { "epoch": 0.06, "learning_rate": 0.0001446, "loss": 1.5645, "step": 5790 }, { "epoch": 0.06, "learning_rate": 0.00014485, "loss": 1.5484, "step": 5800 }, { "epoch": 0.06, "learning_rate": 0.0001451, "loss": 1.5657, "step": 5810 }, { "epoch": 0.06, "learning_rate": 0.00014534999999999998, "loss": 1.5617, "step": 5820 }, { "epoch": 0.06, "learning_rate": 0.0001456, "loss": 1.5707, "step": 5830 }, { "epoch": 0.06, "learning_rate": 0.00014585, "loss": 1.5512, "step": 5840 }, { "epoch": 0.06, "learning_rate": 0.00014609999999999997, "loss": 1.5669, "step": 5850 }, { "epoch": 0.06, "learning_rate": 0.00014634999999999998, "loss": 1.5415, "step": 5860 }, { "epoch": 0.06, "learning_rate": 0.00014659999999999999, "loss": 1.5404, "step": 5870 }, { "epoch": 0.06, "learning_rate": 0.00014685, "loss": 1.515, "step": 5880 }, { "epoch": 0.06, "learning_rate": 0.0001471, "loss": 1.5422, "step": 5890 }, { "epoch": 0.06, "learning_rate": 0.00014734999999999998, "loss": 1.496, "step": 5900 }, { "epoch": 0.06, "learning_rate": 0.00014759999999999998, "loss": 1.4912, "step": 5910 }, { "epoch": 0.06, "learning_rate": 0.00014785, "loss": 1.5192, "step": 5920 }, { "epoch": 0.06, "learning_rate": 0.00014809999999999997, "loss": 1.5174, "step": 5930 }, { "epoch": 0.06, "learning_rate": 0.00014834999999999997, "loss": 1.5201, "step": 5940 }, { "epoch": 0.06, "learning_rate": 0.00014859999999999998, "loss": 1.5123, "step": 5950 }, { "epoch": 0.06, "learning_rate": 0.00014884999999999999, "loss": 1.5244, "step": 5960 }, { "epoch": 0.06, "learning_rate": 0.0001491, "loss": 1.5144, "step": 5970 }, { "epoch": 0.06, "learning_rate": 0.00014935, "loss": 1.514, "step": 5980 }, { "epoch": 0.06, "learning_rate": 0.00014959999999999998, "loss": 1.4987, "step": 5990 }, { "epoch": 0.06, "learning_rate": 0.00014984999999999998, "loss": 1.5266, "step": 6000 }, { "epoch": 0.06, "learning_rate": 0.0001501, "loss": 1.5074, "step": 6010 }, { "epoch": 0.06, "learning_rate": 0.00015034999999999997, "loss": 1.4996, "step": 6020 }, { "epoch": 0.06, "learning_rate": 0.00015059999999999997, "loss": 1.5075, "step": 6030 }, { "epoch": 0.06, "learning_rate": 0.00015084999999999998, "loss": 1.4872, "step": 6040 }, { "epoch": 0.06, "learning_rate": 0.0001511, "loss": 1.5025, "step": 6050 }, { "epoch": 0.06, "learning_rate": 0.00015134999999999997, "loss": 1.4719, "step": 6060 }, { "epoch": 0.06, "learning_rate": 0.00015159999999999997, "loss": 1.502, "step": 6070 }, { "epoch": 0.06, "learning_rate": 0.00015184999999999998, "loss": 1.5157, "step": 6080 }, { "epoch": 0.06, "learning_rate": 0.00015209999999999998, "loss": 1.4907, "step": 6090 }, { "epoch": 0.06, "learning_rate": 0.00015235, "loss": 1.4747, "step": 6100 }, { "epoch": 0.06, "learning_rate": 0.0001526, "loss": 1.4808, "step": 6110 }, { "epoch": 0.06, "learning_rate": 0.00015284999999999997, "loss": 1.4805, "step": 6120 }, { "epoch": 0.06, "learning_rate": 0.00015309999999999998, "loss": 1.4615, "step": 6130 }, { "epoch": 0.06, "learning_rate": 0.00015335, "loss": 1.4887, "step": 6140 }, { "epoch": 0.06, "learning_rate": 0.0001536, "loss": 1.453, "step": 6150 }, { "epoch": 0.06, "learning_rate": 0.00015385, "loss": 1.463, "step": 6160 }, { "epoch": 0.06, "learning_rate": 0.0001541, "loss": 1.4306, "step": 6170 }, { "epoch": 0.06, "learning_rate": 0.00015434999999999998, "loss": 1.469, "step": 6180 }, { "epoch": 0.06, "learning_rate": 0.0001546, "loss": 1.4942, "step": 6190 }, { "epoch": 0.06, "learning_rate": 0.00015485, "loss": 1.4675, "step": 6200 }, { "epoch": 0.06, "learning_rate": 0.0001551, "loss": 1.4585, "step": 6210 }, { "epoch": 0.06, "learning_rate": 0.00015535, "loss": 1.467, "step": 6220 }, { "epoch": 0.06, "learning_rate": 0.00015560000000000001, "loss": 1.4841, "step": 6230 }, { "epoch": 0.06, "learning_rate": 0.00015584999999999997, "loss": 1.4545, "step": 6240 }, { "epoch": 0.06, "learning_rate": 0.00015609999999999997, "loss": 1.4503, "step": 6250 }, { "epoch": 0.06, "learning_rate": 0.00015634999999999998, "loss": 1.447, "step": 6260 }, { "epoch": 0.06, "learning_rate": 0.00015659999999999998, "loss": 1.4531, "step": 6270 }, { "epoch": 0.06, "learning_rate": 0.00015685, "loss": 1.4416, "step": 6280 }, { "epoch": 0.06, "learning_rate": 0.00015709999999999997, "loss": 1.4706, "step": 6290 }, { "epoch": 0.06, "learning_rate": 0.00015734999999999998, "loss": 1.4469, "step": 6300 }, { "epoch": 0.06, "learning_rate": 0.00015759999999999998, "loss": 1.4523, "step": 6310 }, { "epoch": 0.06, "learning_rate": 0.00015785, "loss": 1.4449, "step": 6320 }, { "epoch": 0.06, "learning_rate": 0.0001581, "loss": 1.4422, "step": 6330 }, { "epoch": 0.06, "learning_rate": 0.00015835, "loss": 1.4359, "step": 6340 }, { "epoch": 0.06, "learning_rate": 0.00015859999999999998, "loss": 1.4429, "step": 6350 }, { "epoch": 0.06, "learning_rate": 0.00015884999999999999, "loss": 1.4271, "step": 6360 }, { "epoch": 0.06, "learning_rate": 0.0001591, "loss": 1.4198, "step": 6370 }, { "epoch": 0.06, "learning_rate": 0.00015935, "loss": 1.4261, "step": 6380 }, { "epoch": 0.06, "learning_rate": 0.0001596, "loss": 1.4375, "step": 6390 }, { "epoch": 0.06, "learning_rate": 0.00015985, "loss": 1.4319, "step": 6400 }, { "epoch": 0.06, "learning_rate": 0.00016009999999999996, "loss": 1.4446, "step": 6410 }, { "epoch": 0.06, "learning_rate": 0.00016034999999999997, "loss": 1.4272, "step": 6420 }, { "epoch": 0.06, "learning_rate": 0.00016059999999999997, "loss": 1.4311, "step": 6430 }, { "epoch": 0.06, "learning_rate": 0.00016084999999999998, "loss": 1.4489, "step": 6440 }, { "epoch": 0.06, "learning_rate": 0.00016109999999999999, "loss": 1.4271, "step": 6450 }, { "epoch": 0.06, "learning_rate": 0.00016135, "loss": 1.4196, "step": 6460 }, { "epoch": 0.06, "learning_rate": 0.00016159999999999997, "loss": 1.3898, "step": 6470 }, { "epoch": 0.06, "learning_rate": 0.00016184999999999998, "loss": 1.4023, "step": 6480 }, { "epoch": 0.06, "learning_rate": 0.00016209999999999998, "loss": 1.4047, "step": 6490 }, { "epoch": 0.07, "learning_rate": 0.00016235, "loss": 1.3904, "step": 6500 }, { "epoch": 0.07, "learning_rate": 0.0001626, "loss": 1.4102, "step": 6510 }, { "epoch": 0.07, "learning_rate": 0.00016284999999999997, "loss": 1.3983, "step": 6520 }, { "epoch": 0.07, "learning_rate": 0.00016309999999999998, "loss": 1.4131, "step": 6530 }, { "epoch": 0.07, "learning_rate": 0.00016334999999999999, "loss": 1.4131, "step": 6540 }, { "epoch": 0.07, "learning_rate": 0.0001636, "loss": 1.4097, "step": 6550 }, { "epoch": 0.07, "learning_rate": 0.00016385, "loss": 1.3955, "step": 6560 }, { "epoch": 0.07, "learning_rate": 0.0001641, "loss": 1.3888, "step": 6570 }, { "epoch": 0.07, "learning_rate": 0.00016434999999999998, "loss": 1.3908, "step": 6580 }, { "epoch": 0.07, "learning_rate": 0.0001646, "loss": 1.3946, "step": 6590 }, { "epoch": 0.07, "learning_rate": 0.00016485, "loss": 1.3861, "step": 6600 }, { "epoch": 0.07, "learning_rate": 0.00016509999999999997, "loss": 1.389, "step": 6610 }, { "epoch": 0.07, "learning_rate": 0.00016534999999999998, "loss": 1.3817, "step": 6620 }, { "epoch": 0.07, "learning_rate": 0.0001656, "loss": 1.3813, "step": 6630 }, { "epoch": 0.07, "learning_rate": 0.00016584999999999997, "loss": 1.402, "step": 6640 }, { "epoch": 0.07, "learning_rate": 0.00016609999999999997, "loss": 1.3934, "step": 6650 }, { "epoch": 0.07, "learning_rate": 0.00016634999999999998, "loss": 1.3689, "step": 6660 }, { "epoch": 0.07, "learning_rate": 0.00016659999999999998, "loss": 1.3701, "step": 6670 }, { "epoch": 0.07, "learning_rate": 0.00016685, "loss": 1.365, "step": 6680 }, { "epoch": 0.07, "learning_rate": 0.0001671, "loss": 1.3718, "step": 6690 }, { "epoch": 0.07, "learning_rate": 0.00016734999999999997, "loss": 1.3546, "step": 6700 }, { "epoch": 0.07, "learning_rate": 0.000167575, "loss": 1.3649, "step": 6710 }, { "epoch": 0.07, "learning_rate": 0.000167825, "loss": 1.3595, "step": 6720 }, { "epoch": 0.07, "learning_rate": 0.000168075, "loss": 1.3644, "step": 6730 }, { "epoch": 0.07, "learning_rate": 0.000168325, "loss": 1.3749, "step": 6740 }, { "epoch": 0.07, "learning_rate": 0.00016857499999999996, "loss": 1.3693, "step": 6750 }, { "epoch": 0.07, "learning_rate": 0.00016882499999999997, "loss": 1.3468, "step": 6760 }, { "epoch": 0.07, "learning_rate": 0.00016907499999999998, "loss": 1.3447, "step": 6770 }, { "epoch": 0.07, "learning_rate": 0.00016932499999999998, "loss": 1.3418, "step": 6780 }, { "epoch": 0.07, "learning_rate": 0.000169575, "loss": 1.3477, "step": 6790 }, { "epoch": 0.07, "learning_rate": 0.000169825, "loss": 1.3519, "step": 6800 }, { "epoch": 0.07, "learning_rate": 0.00017007499999999997, "loss": 1.3188, "step": 6810 }, { "epoch": 0.07, "learning_rate": 0.00017032499999999998, "loss": 1.3433, "step": 6820 }, { "epoch": 0.07, "learning_rate": 0.00017057499999999999, "loss": 1.3405, "step": 6830 }, { "epoch": 0.07, "learning_rate": 0.000170825, "loss": 1.3375, "step": 6840 }, { "epoch": 0.07, "learning_rate": 0.000171075, "loss": 1.3299, "step": 6850 }, { "epoch": 0.07, "learning_rate": 0.000171325, "loss": 1.3495, "step": 6860 }, { "epoch": 0.07, "learning_rate": 0.00017157499999999998, "loss": 1.3485, "step": 6870 }, { "epoch": 0.07, "learning_rate": 0.000171825, "loss": 1.3383, "step": 6880 }, { "epoch": 0.07, "learning_rate": 0.000172075, "loss": 1.3346, "step": 6890 }, { "epoch": 0.07, "learning_rate": 0.000172325, "loss": 1.3392, "step": 6900 }, { "epoch": 0.07, "learning_rate": 0.000172575, "loss": 1.3336, "step": 6910 }, { "epoch": 0.07, "learning_rate": 0.00017282499999999996, "loss": 1.3228, "step": 6920 }, { "epoch": 0.07, "learning_rate": 0.00017307499999999996, "loss": 1.3324, "step": 6930 }, { "epoch": 0.07, "learning_rate": 0.00017332499999999997, "loss": 1.3237, "step": 6940 }, { "epoch": 0.07, "learning_rate": 0.00017357499999999998, "loss": 1.3331, "step": 6950 }, { "epoch": 0.07, "learning_rate": 0.00017382499999999998, "loss": 1.3495, "step": 6960 }, { "epoch": 0.07, "learning_rate": 0.000174075, "loss": 1.3498, "step": 6970 }, { "epoch": 0.07, "learning_rate": 0.00017432499999999997, "loss": 1.3428, "step": 6980 }, { "epoch": 0.07, "learning_rate": 0.00017457499999999997, "loss": 1.3347, "step": 6990 }, { "epoch": 0.07, "learning_rate": 0.00017482499999999998, "loss": 1.3182, "step": 7000 }, { "epoch": 0.07, "learning_rate": 0.000175075, "loss": 1.3255, "step": 7010 }, { "epoch": 0.07, "learning_rate": 0.000175325, "loss": 1.3057, "step": 7020 }, { "epoch": 0.07, "learning_rate": 0.000175575, "loss": 1.3166, "step": 7030 }, { "epoch": 0.07, "learning_rate": 0.00017582499999999998, "loss": 1.3268, "step": 7040 }, { "epoch": 0.07, "learning_rate": 0.00017607499999999998, "loss": 1.3121, "step": 7050 }, { "epoch": 0.07, "learning_rate": 0.000176325, "loss": 1.301, "step": 7060 }, { "epoch": 0.07, "learning_rate": 0.000176575, "loss": 1.3167, "step": 7070 }, { "epoch": 0.07, "learning_rate": 0.000176825, "loss": 1.3102, "step": 7080 }, { "epoch": 0.07, "learning_rate": 0.000177075, "loss": 1.329, "step": 7090 }, { "epoch": 0.07, "learning_rate": 0.00017732499999999996, "loss": 1.3241, "step": 7100 }, { "epoch": 0.07, "learning_rate": 0.00017757499999999997, "loss": 1.3216, "step": 7110 }, { "epoch": 0.07, "learning_rate": 0.00017782499999999997, "loss": 1.3041, "step": 7120 }, { "epoch": 0.07, "learning_rate": 0.00017807499999999998, "loss": 1.2956, "step": 7130 }, { "epoch": 0.07, "learning_rate": 0.00017832499999999998, "loss": 1.3219, "step": 7140 }, { "epoch": 0.07, "learning_rate": 0.00017857499999999996, "loss": 1.2878, "step": 7150 }, { "epoch": 0.07, "learning_rate": 0.00017882499999999997, "loss": 1.2961, "step": 7160 }, { "epoch": 0.07, "learning_rate": 0.00017907499999999998, "loss": 1.2876, "step": 7170 }, { "epoch": 0.07, "learning_rate": 0.00017932499999999998, "loss": 1.3088, "step": 7180 }, { "epoch": 0.07, "learning_rate": 0.000179575, "loss": 1.296, "step": 7190 }, { "epoch": 0.07, "learning_rate": 0.000179825, "loss": 1.3052, "step": 7200 }, { "epoch": 0.07, "learning_rate": 0.00018007499999999997, "loss": 1.3145, "step": 7210 }, { "epoch": 0.07, "learning_rate": 0.00018032499999999998, "loss": 1.2777, "step": 7220 }, { "epoch": 0.07, "learning_rate": 0.00018057499999999998, "loss": 1.2916, "step": 7230 }, { "epoch": 0.07, "learning_rate": 0.000180825, "loss": 1.2826, "step": 7240 }, { "epoch": 0.07, "learning_rate": 0.000181075, "loss": 1.2731, "step": 7250 }, { "epoch": 0.07, "learning_rate": 0.000181325, "loss": 1.2718, "step": 7260 }, { "epoch": 0.07, "learning_rate": 0.00018157499999999998, "loss": 1.2597, "step": 7270 }, { "epoch": 0.07, "learning_rate": 0.000181825, "loss": 1.2753, "step": 7280 }, { "epoch": 0.07, "learning_rate": 0.000182075, "loss": 1.2755, "step": 7290 }, { "epoch": 0.07, "learning_rate": 0.000182325, "loss": 1.2735, "step": 7300 }, { "epoch": 0.07, "learning_rate": 0.000182575, "loss": 1.269, "step": 7310 }, { "epoch": 0.07, "learning_rate": 0.000182825, "loss": 1.2967, "step": 7320 }, { "epoch": 0.07, "learning_rate": 0.00018307499999999996, "loss": 1.283, "step": 7330 }, { "epoch": 0.07, "learning_rate": 0.00018332499999999997, "loss": 1.2773, "step": 7340 }, { "epoch": 0.07, "learning_rate": 0.00018357499999999998, "loss": 1.2836, "step": 7350 }, { "epoch": 0.07, "learning_rate": 0.00018382499999999998, "loss": 1.272, "step": 7360 }, { "epoch": 0.07, "learning_rate": 0.000184075, "loss": 1.2588, "step": 7370 }, { "epoch": 0.07, "learning_rate": 0.000184325, "loss": 1.2535, "step": 7380 }, { "epoch": 0.07, "learning_rate": 0.00018457499999999997, "loss": 1.2585, "step": 7390 }, { "epoch": 0.07, "learning_rate": 0.00018482499999999998, "loss": 1.2544, "step": 7400 }, { "epoch": 0.07, "learning_rate": 0.00018507499999999999, "loss": 1.236, "step": 7410 }, { "epoch": 0.07, "learning_rate": 0.000185325, "loss": 1.26, "step": 7420 }, { "epoch": 0.07, "learning_rate": 0.000185575, "loss": 1.2725, "step": 7430 }, { "epoch": 0.07, "learning_rate": 0.00018582499999999998, "loss": 1.2621, "step": 7440 }, { "epoch": 0.07, "learning_rate": 0.00018607499999999998, "loss": 1.2566, "step": 7450 }, { "epoch": 0.07, "learning_rate": 0.000186325, "loss": 1.2642, "step": 7460 }, { "epoch": 0.07, "learning_rate": 0.000186575, "loss": 1.2521, "step": 7470 }, { "epoch": 0.07, "learning_rate": 0.000186825, "loss": 1.2418, "step": 7480 }, { "epoch": 0.07, "learning_rate": 0.000187075, "loss": 1.2386, "step": 7490 }, { "epoch": 0.07, "learning_rate": 0.00018732499999999996, "loss": 1.2496, "step": 7500 }, { "epoch": 0.07, "eval_accuracy": 0.7488450887376084, "eval_loss": 1.373046875, "eval_runtime": 97.1795, "eval_samples_per_second": 823.219, "eval_steps_per_second": 1.616, "step": 7500 }, { "epoch": 0.08, "learning_rate": 0.00018757499999999997, "loss": 1.2656, "step": 7510 }, { "epoch": 0.08, "learning_rate": 0.00018782499999999997, "loss": 1.2877, "step": 7520 }, { "epoch": 0.08, "learning_rate": 0.00018807499999999998, "loss": 1.2559, "step": 7530 }, { "epoch": 0.08, "learning_rate": 0.00018832499999999998, "loss": 1.289, "step": 7540 }, { "epoch": 0.08, "learning_rate": 0.000188575, "loss": 1.2772, "step": 7550 }, { "epoch": 0.08, "learning_rate": 0.00018882499999999997, "loss": 1.2847, "step": 7560 }, { "epoch": 0.08, "learning_rate": 0.00018907499999999997, "loss": 1.2564, "step": 7570 }, { "epoch": 0.08, "learning_rate": 0.00018932499999999998, "loss": 1.276, "step": 7580 }, { "epoch": 0.08, "learning_rate": 0.000189575, "loss": 1.2677, "step": 7590 }, { "epoch": 0.08, "learning_rate": 0.000189825, "loss": 1.2607, "step": 7600 }, { "epoch": 0.08, "learning_rate": 0.000190075, "loss": 1.2511, "step": 7610 }, { "epoch": 0.08, "learning_rate": 0.00019032499999999998, "loss": 1.2928, "step": 7620 }, { "epoch": 0.08, "learning_rate": 0.00019057499999999998, "loss": 1.2494, "step": 7630 }, { "epoch": 0.08, "learning_rate": 0.000190825, "loss": 1.2637, "step": 7640 }, { "epoch": 0.08, "learning_rate": 0.000191075, "loss": 1.2602, "step": 7650 }, { "epoch": 0.08, "learning_rate": 0.000191325, "loss": 1.2572, "step": 7660 }, { "epoch": 0.08, "learning_rate": 0.00019157499999999995, "loss": 1.2685, "step": 7670 }, { "epoch": 0.08, "learning_rate": 0.00019182499999999996, "loss": 1.2509, "step": 7680 }, { "epoch": 0.08, "learning_rate": 0.00019207499999999997, "loss": 1.2515, "step": 7690 }, { "epoch": 0.08, "learning_rate": 0.00019232499999999997, "loss": 1.2698, "step": 7700 }, { "epoch": 0.08, "learning_rate": 0.00019255, "loss": 1.2357, "step": 7710 }, { "epoch": 0.08, "learning_rate": 0.0001928, "loss": 1.2434, "step": 7720 }, { "epoch": 0.08, "learning_rate": 0.00019304999999999998, "loss": 1.2472, "step": 7730 }, { "epoch": 0.08, "learning_rate": 0.00019329999999999998, "loss": 1.2485, "step": 7740 }, { "epoch": 0.08, "learning_rate": 0.00019355, "loss": 1.2525, "step": 7750 }, { "epoch": 0.08, "learning_rate": 0.0001938, "loss": 1.2505, "step": 7760 }, { "epoch": 0.08, "learning_rate": 0.00019405, "loss": 1.2413, "step": 7770 }, { "epoch": 0.08, "learning_rate": 0.00019429999999999998, "loss": 1.2561, "step": 7780 }, { "epoch": 0.08, "learning_rate": 0.00019454999999999999, "loss": 1.2504, "step": 7790 }, { "epoch": 0.08, "learning_rate": 0.0001948, "loss": 1.2537, "step": 7800 }, { "epoch": 0.08, "learning_rate": 0.00019504999999999997, "loss": 1.2451, "step": 7810 }, { "epoch": 0.08, "learning_rate": 0.00019529999999999998, "loss": 1.2397, "step": 7820 }, { "epoch": 0.08, "learning_rate": 0.00019554999999999998, "loss": 1.2262, "step": 7830 }, { "epoch": 0.08, "learning_rate": 0.00019579999999999996, "loss": 1.2322, "step": 7840 }, { "epoch": 0.08, "learning_rate": 0.00019604999999999997, "loss": 1.2229, "step": 7850 }, { "epoch": 0.08, "learning_rate": 0.00019629999999999997, "loss": 1.2553, "step": 7860 }, { "epoch": 0.08, "learning_rate": 0.00019654999999999998, "loss": 1.2222, "step": 7870 }, { "epoch": 0.08, "learning_rate": 0.00019679999999999999, "loss": 1.2358, "step": 7880 }, { "epoch": 0.08, "learning_rate": 0.00019705, "loss": 1.2192, "step": 7890 }, { "epoch": 0.08, "learning_rate": 0.00019729999999999997, "loss": 1.2226, "step": 7900 }, { "epoch": 0.08, "learning_rate": 0.00019754999999999998, "loss": 1.2095, "step": 7910 }, { "epoch": 0.08, "learning_rate": 0.00019779999999999998, "loss": 1.2142, "step": 7920 }, { "epoch": 0.08, "learning_rate": 0.00019805, "loss": 1.216, "step": 7930 }, { "epoch": 0.08, "learning_rate": 0.0001983, "loss": 1.2335, "step": 7940 }, { "epoch": 0.08, "learning_rate": 0.00019855, "loss": 1.2481, "step": 7950 }, { "epoch": 0.08, "learning_rate": 0.00019879999999999998, "loss": 1.2291, "step": 7960 }, { "epoch": 0.08, "learning_rate": 0.00019905, "loss": 1.2168, "step": 7970 }, { "epoch": 0.08, "learning_rate": 0.0001993, "loss": 1.2517, "step": 7980 }, { "epoch": 0.08, "learning_rate": 0.00019955, "loss": 1.2335, "step": 7990 }, { "epoch": 0.08, "learning_rate": 0.0001998, "loss": 1.2429, "step": 8000 }, { "epoch": 0.08, "learning_rate": 0.00020004999999999996, "loss": 1.2454, "step": 8010 }, { "epoch": 0.08, "learning_rate": 0.00020029999999999996, "loss": 1.2418, "step": 8020 }, { "epoch": 0.08, "learning_rate": 0.00020054999999999997, "loss": 1.2321, "step": 8030 }, { "epoch": 0.08, "learning_rate": 0.00020079999999999997, "loss": 1.2369, "step": 8040 }, { "epoch": 0.08, "learning_rate": 0.00020104999999999998, "loss": 1.2323, "step": 8050 }, { "epoch": 0.08, "learning_rate": 0.0002013, "loss": 1.2338, "step": 8060 }, { "epoch": 0.08, "learning_rate": 0.00020154999999999997, "loss": 1.2371, "step": 8070 }, { "epoch": 0.08, "learning_rate": 0.00020179999999999997, "loss": 1.2068, "step": 8080 }, { "epoch": 0.08, "learning_rate": 0.00020204999999999998, "loss": 1.2246, "step": 8090 }, { "epoch": 0.08, "learning_rate": 0.00020229999999999998, "loss": 1.2138, "step": 8100 }, { "epoch": 0.08, "learning_rate": 0.00020255, "loss": 1.2173, "step": 8110 }, { "epoch": 0.08, "learning_rate": 0.0002028, "loss": 1.2031, "step": 8120 }, { "epoch": 0.08, "learning_rate": 0.00020304999999999998, "loss": 1.222, "step": 8130 }, { "epoch": 0.08, "learning_rate": 0.00020329999999999998, "loss": 1.212, "step": 8140 }, { "epoch": 0.08, "learning_rate": 0.00020355, "loss": 1.2242, "step": 8150 }, { "epoch": 0.08, "learning_rate": 0.0002038, "loss": 1.2087, "step": 8160 }, { "epoch": 0.08, "learning_rate": 0.00020405, "loss": 1.216, "step": 8170 }, { "epoch": 0.08, "learning_rate": 0.0002043, "loss": 1.2334, "step": 8180 }, { "epoch": 0.08, "learning_rate": 0.00020454999999999996, "loss": 1.2154, "step": 8190 }, { "epoch": 0.08, "learning_rate": 0.00020479999999999996, "loss": 1.2098, "step": 8200 }, { "epoch": 0.08, "learning_rate": 0.00020504999999999997, "loss": 1.1788, "step": 8210 }, { "epoch": 0.08, "learning_rate": 0.00020529999999999998, "loss": 1.1994, "step": 8220 }, { "epoch": 0.08, "learning_rate": 0.00020554999999999998, "loss": 1.2037, "step": 8230 }, { "epoch": 0.08, "learning_rate": 0.0002058, "loss": 1.2082, "step": 8240 }, { "epoch": 0.08, "learning_rate": 0.00020604999999999997, "loss": 1.2125, "step": 8250 }, { "epoch": 0.08, "learning_rate": 0.00020629999999999997, "loss": 1.1954, "step": 8260 }, { "epoch": 0.08, "learning_rate": 0.00020654999999999998, "loss": 1.2014, "step": 8270 }, { "epoch": 0.08, "learning_rate": 0.00020679999999999999, "loss": 1.2146, "step": 8280 }, { "epoch": 0.08, "learning_rate": 0.00020705, "loss": 1.2013, "step": 8290 }, { "epoch": 0.08, "learning_rate": 0.00020729999999999997, "loss": 1.2174, "step": 8300 }, { "epoch": 0.08, "learning_rate": 0.00020754999999999998, "loss": 1.2043, "step": 8310 }, { "epoch": 0.08, "learning_rate": 0.00020779999999999998, "loss": 1.1967, "step": 8320 }, { "epoch": 0.08, "learning_rate": 0.00020805, "loss": 1.1849, "step": 8330 }, { "epoch": 0.08, "learning_rate": 0.0002083, "loss": 1.1729, "step": 8340 }, { "epoch": 0.08, "learning_rate": 0.00020855, "loss": 1.1918, "step": 8350 }, { "epoch": 0.08, "learning_rate": 0.00020879999999999998, "loss": 1.1862, "step": 8360 }, { "epoch": 0.08, "learning_rate": 0.00020904999999999999, "loss": 1.1838, "step": 8370 }, { "epoch": 0.08, "learning_rate": 0.0002093, "loss": 1.1633, "step": 8380 }, { "epoch": 0.08, "learning_rate": 0.00020955, "loss": 1.175, "step": 8390 }, { "epoch": 0.08, "learning_rate": 0.0002098, "loss": 1.1767, "step": 8400 }, { "epoch": 0.08, "learning_rate": 0.00021004999999999998, "loss": 1.184, "step": 8410 }, { "epoch": 0.08, "learning_rate": 0.00021029999999999996, "loss": 1.1896, "step": 8420 }, { "epoch": 0.08, "learning_rate": 0.00021054999999999997, "loss": 1.2001, "step": 8430 }, { "epoch": 0.08, "learning_rate": 0.00021079999999999997, "loss": 1.1948, "step": 8440 }, { "epoch": 0.08, "learning_rate": 0.00021104999999999998, "loss": 1.1864, "step": 8450 }, { "epoch": 0.08, "learning_rate": 0.00021129999999999999, "loss": 1.1713, "step": 8460 }, { "epoch": 0.08, "learning_rate": 0.00021155, "loss": 1.1674, "step": 8470 }, { "epoch": 0.08, "learning_rate": 0.00021179999999999997, "loss": 1.1793, "step": 8480 }, { "epoch": 0.08, "learning_rate": 0.00021204999999999998, "loss": 1.1904, "step": 8490 }, { "epoch": 0.09, "learning_rate": 0.00021229999999999998, "loss": 1.1735, "step": 8500 }, { "epoch": 0.09, "learning_rate": 0.00021255, "loss": 1.1594, "step": 8510 }, { "epoch": 0.09, "learning_rate": 0.0002128, "loss": 1.1885, "step": 8520 }, { "epoch": 0.09, "learning_rate": 0.00021304999999999997, "loss": 1.1641, "step": 8530 }, { "epoch": 0.09, "learning_rate": 0.00021329999999999998, "loss": 1.1783, "step": 8540 }, { "epoch": 0.09, "learning_rate": 0.00021355, "loss": 1.1782, "step": 8550 }, { "epoch": 0.09, "learning_rate": 0.0002138, "loss": 1.1854, "step": 8560 }, { "epoch": 0.09, "learning_rate": 0.00021405, "loss": 1.1445, "step": 8570 }, { "epoch": 0.09, "learning_rate": 0.0002143, "loss": 1.1713, "step": 8580 }, { "epoch": 0.09, "learning_rate": 0.00021454999999999996, "loss": 1.1643, "step": 8590 }, { "epoch": 0.09, "learning_rate": 0.00021479999999999996, "loss": 1.1834, "step": 8600 }, { "epoch": 0.09, "learning_rate": 0.00021504999999999997, "loss": 1.1736, "step": 8610 }, { "epoch": 0.09, "learning_rate": 0.00021529999999999997, "loss": 1.1749, "step": 8620 }, { "epoch": 0.09, "learning_rate": 0.00021554999999999998, "loss": 1.1803, "step": 8630 }, { "epoch": 0.09, "learning_rate": 0.0002158, "loss": 1.1583, "step": 8640 }, { "epoch": 0.09, "learning_rate": 0.00021604999999999997, "loss": 1.1511, "step": 8650 }, { "epoch": 0.09, "learning_rate": 0.00021629999999999997, "loss": 1.1896, "step": 8660 }, { "epoch": 0.09, "learning_rate": 0.00021654999999999998, "loss": 1.1664, "step": 8670 }, { "epoch": 0.09, "learning_rate": 0.00021679999999999998, "loss": 1.1734, "step": 8680 }, { "epoch": 0.09, "learning_rate": 0.00021705, "loss": 1.1613, "step": 8690 }, { "epoch": 0.09, "learning_rate": 0.0002173, "loss": 1.1422, "step": 8700 }, { "epoch": 0.09, "learning_rate": 0.00021752499999999996, "loss": 1.1381, "step": 8710 }, { "epoch": 0.09, "learning_rate": 0.00021777499999999997, "loss": 1.1172, "step": 8720 }, { "epoch": 0.09, "learning_rate": 0.00021802499999999997, "loss": 1.131, "step": 8730 }, { "epoch": 0.09, "learning_rate": 0.00021827499999999998, "loss": 1.1344, "step": 8740 }, { "epoch": 0.09, "learning_rate": 0.00021852499999999999, "loss": 1.1266, "step": 8750 }, { "epoch": 0.09, "learning_rate": 0.00021877499999999996, "loss": 1.1072, "step": 8760 }, { "epoch": 0.09, "learning_rate": 0.00021902499999999997, "loss": 1.1199, "step": 8770 }, { "epoch": 0.09, "learning_rate": 0.00021927499999999998, "loss": 1.1395, "step": 8780 }, { "epoch": 0.09, "learning_rate": 0.00021952499999999998, "loss": 1.1471, "step": 8790 }, { "epoch": 0.09, "learning_rate": 0.000219775, "loss": 1.1389, "step": 8800 }, { "epoch": 0.09, "learning_rate": 0.000220025, "loss": 1.1376, "step": 8810 }, { "epoch": 0.09, "learning_rate": 0.00022027499999999997, "loss": 1.1415, "step": 8820 }, { "epoch": 0.09, "learning_rate": 0.00022052499999999998, "loss": 1.158, "step": 8830 }, { "epoch": 0.09, "learning_rate": 0.00022077499999999999, "loss": 1.1549, "step": 8840 }, { "epoch": 0.09, "learning_rate": 0.000221025, "loss": 1.1279, "step": 8850 }, { "epoch": 0.09, "learning_rate": 0.000221275, "loss": 1.1354, "step": 8860 }, { "epoch": 0.09, "learning_rate": 0.000221525, "loss": 1.1481, "step": 8870 }, { "epoch": 0.09, "learning_rate": 0.00022177499999999996, "loss": 1.1304, "step": 8880 }, { "epoch": 0.09, "learning_rate": 0.00022202499999999996, "loss": 1.1206, "step": 8890 }, { "epoch": 0.09, "learning_rate": 0.00022227499999999997, "loss": 1.1287, "step": 8900 }, { "epoch": 0.09, "learning_rate": 0.00022252499999999997, "loss": 1.1263, "step": 8910 }, { "epoch": 0.09, "learning_rate": 0.00022277499999999998, "loss": 1.1377, "step": 8920 }, { "epoch": 0.09, "learning_rate": 0.00022302499999999996, "loss": 1.1323, "step": 8930 }, { "epoch": 0.09, "learning_rate": 0.00022327499999999997, "loss": 1.1399, "step": 8940 }, { "epoch": 0.09, "learning_rate": 0.00022352499999999997, "loss": 1.1158, "step": 8950 }, { "epoch": 0.09, "learning_rate": 0.00022377499999999998, "loss": 1.119, "step": 8960 }, { "epoch": 0.09, "learning_rate": 0.00022402499999999998, "loss": 1.1351, "step": 8970 }, { "epoch": 0.09, "learning_rate": 0.000224275, "loss": 1.1323, "step": 8980 }, { "epoch": 0.09, "learning_rate": 0.00022452499999999997, "loss": 1.128, "step": 8990 }, { "epoch": 0.09, "learning_rate": 0.00022477499999999997, "loss": 1.1033, "step": 9000 }, { "epoch": 0.09, "learning_rate": 0.00022502499999999998, "loss": 1.1259, "step": 9010 }, { "epoch": 0.09, "learning_rate": 0.000225275, "loss": 1.1012, "step": 9020 }, { "epoch": 0.09, "learning_rate": 0.000225525, "loss": 1.1288, "step": 9030 }, { "epoch": 0.09, "learning_rate": 0.000225775, "loss": 1.1358, "step": 9040 }, { "epoch": 0.09, "learning_rate": 0.00022602499999999998, "loss": 1.1197, "step": 9050 }, { "epoch": 0.09, "learning_rate": 0.00022627499999999998, "loss": 1.1229, "step": 9060 }, { "epoch": 0.09, "learning_rate": 0.000226525, "loss": 1.1221, "step": 9070 }, { "epoch": 0.09, "learning_rate": 0.000226775, "loss": 1.1059, "step": 9080 }, { "epoch": 0.09, "learning_rate": 0.000227025, "loss": 1.1354, "step": 9090 }, { "epoch": 0.09, "learning_rate": 0.000227275, "loss": 1.1306, "step": 9100 }, { "epoch": 0.09, "learning_rate": 0.00022752499999999996, "loss": 1.1264, "step": 9110 }, { "epoch": 0.09, "learning_rate": 0.00022777499999999997, "loss": 1.1259, "step": 9120 }, { "epoch": 0.09, "learning_rate": 0.00022802499999999997, "loss": 1.1077, "step": 9130 }, { "epoch": 0.09, "learning_rate": 0.00022827499999999998, "loss": 1.1097, "step": 9140 }, { "epoch": 0.09, "learning_rate": 0.00022852499999999998, "loss": 1.1277, "step": 9150 }, { "epoch": 0.09, "learning_rate": 0.00022877499999999996, "loss": 1.1337, "step": 9160 }, { "epoch": 0.09, "learning_rate": 0.00022902499999999997, "loss": 1.1278, "step": 9170 }, { "epoch": 0.09, "learning_rate": 0.00022927499999999998, "loss": 1.1213, "step": 9180 }, { "epoch": 0.09, "learning_rate": 0.00022952499999999998, "loss": 1.1104, "step": 9190 }, { "epoch": 0.09, "learning_rate": 0.000229775, "loss": 1.0953, "step": 9200 }, { "epoch": 0.09, "learning_rate": 0.000230025, "loss": 1.1256, "step": 9210 }, { "epoch": 0.09, "learning_rate": 0.00023027499999999997, "loss": 1.1117, "step": 9220 }, { "epoch": 0.09, "learning_rate": 0.00023052499999999998, "loss": 1.1406, "step": 9230 }, { "epoch": 0.09, "learning_rate": 0.00023077499999999999, "loss": 1.1138, "step": 9240 }, { "epoch": 0.09, "learning_rate": 0.000231025, "loss": 1.1336, "step": 9250 }, { "epoch": 0.09, "learning_rate": 0.000231275, "loss": 1.1208, "step": 9260 }, { "epoch": 0.09, "learning_rate": 0.000231525, "loss": 1.1263, "step": 9270 }, { "epoch": 0.09, "learning_rate": 0.00023177499999999996, "loss": 1.1306, "step": 9280 }, { "epoch": 0.09, "learning_rate": 0.00023202499999999996, "loss": 1.1261, "step": 9290 }, { "epoch": 0.09, "learning_rate": 0.00023227499999999997, "loss": 1.1191, "step": 9300 }, { "epoch": 0.09, "learning_rate": 0.00023252499999999997, "loss": 1.1023, "step": 9310 }, { "epoch": 0.09, "learning_rate": 0.00023277499999999998, "loss": 1.0853, "step": 9320 }, { "epoch": 0.09, "learning_rate": 0.00023302499999999999, "loss": 1.1104, "step": 9330 }, { "epoch": 0.09, "learning_rate": 0.00023327499999999996, "loss": 1.1077, "step": 9340 }, { "epoch": 0.09, "learning_rate": 0.00023352499999999997, "loss": 1.1262, "step": 9350 }, { "epoch": 0.09, "learning_rate": 0.00023377499999999998, "loss": 1.125, "step": 9360 }, { "epoch": 0.09, "learning_rate": 0.00023402499999999998, "loss": 1.1072, "step": 9370 }, { "epoch": 0.09, "learning_rate": 0.000234275, "loss": 1.11, "step": 9380 }, { "epoch": 0.09, "learning_rate": 0.00023452499999999997, "loss": 1.0846, "step": 9390 }, { "epoch": 0.09, "learning_rate": 0.00023477499999999997, "loss": 1.0895, "step": 9400 }, { "epoch": 0.09, "learning_rate": 0.00023502499999999998, "loss": 1.0858, "step": 9410 }, { "epoch": 0.09, "learning_rate": 0.00023527499999999999, "loss": 1.0886, "step": 9420 }, { "epoch": 0.09, "learning_rate": 0.000235525, "loss": 1.0858, "step": 9430 }, { "epoch": 0.09, "learning_rate": 0.000235775, "loss": 1.0684, "step": 9440 }, { "epoch": 0.09, "learning_rate": 0.00023602499999999998, "loss": 1.0701, "step": 9450 }, { "epoch": 0.09, "learning_rate": 0.00023627499999999998, "loss": 1.0703, "step": 9460 }, { "epoch": 0.09, "learning_rate": 0.00023652499999999996, "loss": 1.0928, "step": 9470 }, { "epoch": 0.09, "learning_rate": 0.00023677499999999997, "loss": 1.0823, "step": 9480 }, { "epoch": 0.09, "learning_rate": 0.00023702499999999997, "loss": 1.0958, "step": 9490 }, { "epoch": 0.1, "learning_rate": 0.00023727499999999998, "loss": 1.0629, "step": 9500 }, { "epoch": 0.1, "learning_rate": 0.00023752499999999996, "loss": 1.0654, "step": 9510 }, { "epoch": 0.1, "learning_rate": 0.00023777499999999997, "loss": 1.063, "step": 9520 }, { "epoch": 0.1, "learning_rate": 0.00023802499999999997, "loss": 1.0727, "step": 9530 }, { "epoch": 0.1, "learning_rate": 0.00023827499999999998, "loss": 1.0725, "step": 9540 }, { "epoch": 0.1, "learning_rate": 0.00023852499999999998, "loss": 1.0759, "step": 9550 }, { "epoch": 0.1, "learning_rate": 0.000238775, "loss": 1.0493, "step": 9560 }, { "epoch": 0.1, "learning_rate": 0.00023902499999999997, "loss": 1.0511, "step": 9570 }, { "epoch": 0.1, "learning_rate": 0.00023927499999999997, "loss": 1.0333, "step": 9580 }, { "epoch": 0.1, "learning_rate": 0.00023952499999999998, "loss": 1.0718, "step": 9590 }, { "epoch": 0.1, "learning_rate": 0.000239775, "loss": 1.0631, "step": 9600 }, { "epoch": 0.1, "learning_rate": 0.000240025, "loss": 1.0662, "step": 9610 }, { "epoch": 0.1, "learning_rate": 0.000240275, "loss": 1.0533, "step": 9620 }, { "epoch": 0.1, "learning_rate": 0.00024052499999999998, "loss": 1.0558, "step": 9630 }, { "epoch": 0.1, "learning_rate": 0.00024077499999999998, "loss": 1.0694, "step": 9640 }, { "epoch": 0.1, "learning_rate": 0.000241025, "loss": 1.1064, "step": 9650 }, { "epoch": 0.1, "learning_rate": 0.000241275, "loss": 1.0828, "step": 9660 }, { "epoch": 0.1, "learning_rate": 0.000241525, "loss": 1.0639, "step": 9670 }, { "epoch": 0.1, "learning_rate": 0.00024177499999999995, "loss": 1.0705, "step": 9680 }, { "epoch": 0.1, "learning_rate": 0.00024202499999999996, "loss": 1.0866, "step": 9690 }, { "epoch": 0.1, "learning_rate": 0.00024227499999999997, "loss": 1.0813, "step": 9700 }, { "epoch": 0.1, "learning_rate": 0.00024252499999999997, "loss": 1.081, "step": 9710 }, { "epoch": 0.1, "learning_rate": 0.00024275, "loss": 1.103, "step": 9720 }, { "epoch": 0.1, "learning_rate": 0.000243, "loss": 1.0906, "step": 9730 }, { "epoch": 0.1, "learning_rate": 0.00024324999999999998, "loss": 1.0728, "step": 9740 }, { "epoch": 0.1, "learning_rate": 0.00024349999999999998, "loss": 1.0843, "step": 9750 }, { "epoch": 0.1, "learning_rate": 0.00024375, "loss": 1.0672, "step": 9760 }, { "epoch": 0.1, "learning_rate": 0.000244, "loss": 1.0846, "step": 9770 }, { "epoch": 0.1, "learning_rate": 0.00024425, "loss": 1.0973, "step": 9780 }, { "epoch": 0.1, "learning_rate": 0.0002445, "loss": 1.0933, "step": 9790 }, { "epoch": 0.1, "learning_rate": 0.00024474999999999996, "loss": 1.0767, "step": 9800 }, { "epoch": 0.1, "learning_rate": 0.000245, "loss": 1.0875, "step": 9810 }, { "epoch": 0.1, "learning_rate": 0.00024524999999999997, "loss": 1.0775, "step": 9820 }, { "epoch": 0.1, "learning_rate": 0.0002455, "loss": 1.0833, "step": 9830 }, { "epoch": 0.1, "learning_rate": 0.00024575, "loss": 1.0829, "step": 9840 }, { "epoch": 0.1, "learning_rate": 0.00024599999999999996, "loss": 1.0804, "step": 9850 }, { "epoch": 0.1, "learning_rate": 0.00024625, "loss": 1.0755, "step": 9860 }, { "epoch": 0.1, "learning_rate": 0.0002465, "loss": 1.0638, "step": 9870 }, { "epoch": 0.1, "learning_rate": 0.00024675, "loss": 1.0426, "step": 9880 }, { "epoch": 0.1, "learning_rate": 0.000247, "loss": 1.0552, "step": 9890 }, { "epoch": 0.1, "learning_rate": 0.00024724999999999997, "loss": 1.0444, "step": 9900 }, { "epoch": 0.1, "learning_rate": 0.00024749999999999994, "loss": 1.07, "step": 9910 }, { "epoch": 0.1, "learning_rate": 0.00024775, "loss": 1.078, "step": 9920 }, { "epoch": 0.1, "learning_rate": 0.00024799999999999996, "loss": 1.0591, "step": 9930 }, { "epoch": 0.1, "learning_rate": 0.00024825, "loss": 1.0609, "step": 9940 }, { "epoch": 0.1, "learning_rate": 0.00024849999999999997, "loss": 1.0778, "step": 9950 }, { "epoch": 0.1, "learning_rate": 0.00024875, "loss": 1.0898, "step": 9960 }, { "epoch": 0.1, "learning_rate": 0.000249, "loss": 1.0609, "step": 9970 }, { "epoch": 0.1, "learning_rate": 0.00024924999999999996, "loss": 1.0747, "step": 9980 }, { "epoch": 0.1, "learning_rate": 0.0002495, "loss": 1.0724, "step": 9990 }, { "epoch": 0.1, "learning_rate": 0.00024974999999999997, "loss": 1.0477, "step": 10000 }, { "epoch": 0.1, "eval_accuracy": 0.7842036631094624, "eval_loss": 1.115234375, "eval_runtime": 96.9888, "eval_samples_per_second": 824.838, "eval_steps_per_second": 1.619, "step": 10000 }, { "epoch": 0.1, "learning_rate": 0.00025, "loss": 1.0264, "step": 10010 }, { "epoch": 0.1, "learning_rate": 0.00025025, "loss": 1.0462, "step": 10020 }, { "epoch": 0.1, "learning_rate": 0.00025049999999999996, "loss": 1.0428, "step": 10030 }, { "epoch": 0.1, "learning_rate": 0.00025075, "loss": 1.0627, "step": 10040 }, { "epoch": 0.1, "learning_rate": 0.000251, "loss": 1.049, "step": 10050 }, { "epoch": 0.1, "learning_rate": 0.00025125, "loss": 1.0465, "step": 10060 }, { "epoch": 0.1, "learning_rate": 0.0002515, "loss": 1.0318, "step": 10070 }, { "epoch": 0.1, "learning_rate": 0.00025174999999999997, "loss": 1.0378, "step": 10080 }, { "epoch": 0.1, "learning_rate": 0.00025199999999999995, "loss": 1.0406, "step": 10090 }, { "epoch": 0.1, "learning_rate": 0.00025225, "loss": 1.0562, "step": 10100 }, { "epoch": 0.1, "learning_rate": 0.00025249999999999996, "loss": 1.0432, "step": 10110 }, { "epoch": 0.1, "learning_rate": 0.00025275, "loss": 1.0308, "step": 10120 }, { "epoch": 0.1, "learning_rate": 0.00025299999999999997, "loss": 1.0178, "step": 10130 }, { "epoch": 0.1, "learning_rate": 0.00025324999999999995, "loss": 1.0565, "step": 10140 }, { "epoch": 0.1, "learning_rate": 0.0002535, "loss": 1.0464, "step": 10150 }, { "epoch": 0.1, "learning_rate": 0.00025374999999999996, "loss": 1.0578, "step": 10160 }, { "epoch": 0.1, "learning_rate": 0.000254, "loss": 1.0467, "step": 10170 }, { "epoch": 0.1, "learning_rate": 0.00025425, "loss": 1.0362, "step": 10180 }, { "epoch": 0.1, "learning_rate": 0.0002545, "loss": 1.0415, "step": 10190 }, { "epoch": 0.1, "learning_rate": 0.00025475, "loss": 1.0466, "step": 10200 }, { "epoch": 0.1, "learning_rate": 0.00025499999999999996, "loss": 1.0281, "step": 10210 }, { "epoch": 0.1, "learning_rate": 0.00025525, "loss": 1.0507, "step": 10220 }, { "epoch": 0.1, "learning_rate": 0.0002555, "loss": 1.0262, "step": 10230 }, { "epoch": 0.1, "learning_rate": 0.00025575, "loss": 1.0221, "step": 10240 }, { "epoch": 0.1, "learning_rate": 0.000256, "loss": 1.0043, "step": 10250 }, { "epoch": 0.1, "learning_rate": 0.00025624999999999997, "loss": 1.0257, "step": 10260 }, { "epoch": 0.1, "learning_rate": 0.00025649999999999995, "loss": 1.0277, "step": 10270 }, { "epoch": 0.1, "learning_rate": 0.00025675, "loss": 1.0356, "step": 10280 }, { "epoch": 0.1, "learning_rate": 0.00025699999999999996, "loss": 1.0249, "step": 10290 }, { "epoch": 0.1, "learning_rate": 0.00025725, "loss": 1.0259, "step": 10300 }, { "epoch": 0.1, "learning_rate": 0.00025749999999999997, "loss": 0.9973, "step": 10310 }, { "epoch": 0.1, "learning_rate": 0.00025774999999999995, "loss": 1.0064, "step": 10320 }, { "epoch": 0.1, "learning_rate": 0.000258, "loss": 1.0049, "step": 10330 }, { "epoch": 0.1, "learning_rate": 0.00025824999999999996, "loss": 1.0566, "step": 10340 }, { "epoch": 0.1, "learning_rate": 0.0002585, "loss": 1.0659, "step": 10350 }, { "epoch": 0.1, "learning_rate": 0.00025875, "loss": 1.0536, "step": 10360 }, { "epoch": 0.1, "learning_rate": 0.00025899999999999995, "loss": 1.0511, "step": 10370 }, { "epoch": 0.1, "learning_rate": 0.00025925, "loss": 1.0522, "step": 10380 }, { "epoch": 0.1, "learning_rate": 0.00025949999999999997, "loss": 1.0431, "step": 10390 }, { "epoch": 0.1, "learning_rate": 0.00025975, "loss": 1.0517, "step": 10400 }, { "epoch": 0.1, "learning_rate": 0.00026, "loss": 1.0646, "step": 10410 }, { "epoch": 0.1, "learning_rate": 0.00026025, "loss": 1.0405, "step": 10420 }, { "epoch": 0.1, "learning_rate": 0.0002605, "loss": 1.0417, "step": 10430 }, { "epoch": 0.1, "learning_rate": 0.00026074999999999997, "loss": 1.0508, "step": 10440 }, { "epoch": 0.1, "learning_rate": 0.000261, "loss": 1.0434, "step": 10450 }, { "epoch": 0.1, "learning_rate": 0.00026125, "loss": 1.0584, "step": 10460 }, { "epoch": 0.1, "learning_rate": 0.0002615, "loss": 1.069, "step": 10470 }, { "epoch": 0.1, "learning_rate": 0.00026175, "loss": 1.0468, "step": 10480 }, { "epoch": 0.1, "learning_rate": 0.00026199999999999997, "loss": 1.0446, "step": 10490 }, { "epoch": 0.1, "learning_rate": 0.00026224999999999995, "loss": 1.0502, "step": 10500 }, { "epoch": 0.11, "learning_rate": 0.0002625, "loss": 1.0355, "step": 10510 }, { "epoch": 0.11, "learning_rate": 0.00026274999999999996, "loss": 1.0702, "step": 10520 }, { "epoch": 0.11, "learning_rate": 0.000263, "loss": 1.0659, "step": 10530 }, { "epoch": 0.11, "learning_rate": 0.00026325, "loss": 1.0434, "step": 10540 }, { "epoch": 0.11, "learning_rate": 0.00026349999999999995, "loss": 1.0538, "step": 10550 }, { "epoch": 0.11, "learning_rate": 0.00026375, "loss": 1.0319, "step": 10560 }, { "epoch": 0.11, "learning_rate": 0.00026399999999999997, "loss": 1.0367, "step": 10570 }, { "epoch": 0.11, "learning_rate": 0.00026425, "loss": 1.0461, "step": 10580 }, { "epoch": 0.11, "learning_rate": 0.0002645, "loss": 1.0409, "step": 10590 }, { "epoch": 0.11, "learning_rate": 0.00026474999999999996, "loss": 1.0372, "step": 10600 }, { "epoch": 0.11, "learning_rate": 0.000265, "loss": 1.048, "step": 10610 }, { "epoch": 0.11, "learning_rate": 0.00026524999999999997, "loss": 1.0323, "step": 10620 }, { "epoch": 0.11, "learning_rate": 0.0002655, "loss": 1.0342, "step": 10630 }, { "epoch": 0.11, "learning_rate": 0.00026575, "loss": 1.0125, "step": 10640 }, { "epoch": 0.11, "learning_rate": 0.000266, "loss": 1.0315, "step": 10650 }, { "epoch": 0.11, "learning_rate": 0.00026624999999999994, "loss": 1.0279, "step": 10660 }, { "epoch": 0.11, "learning_rate": 0.00026649999999999997, "loss": 1.0302, "step": 10670 }, { "epoch": 0.11, "learning_rate": 0.00026674999999999995, "loss": 1.0211, "step": 10680 }, { "epoch": 0.11, "learning_rate": 0.000267, "loss": 1.0223, "step": 10690 }, { "epoch": 0.11, "learning_rate": 0.00026724999999999996, "loss": 1.0169, "step": 10700 }, { "epoch": 0.11, "learning_rate": 0.0002675, "loss": 1.036, "step": 10710 }, { "epoch": 0.11, "learning_rate": 0.00026772499999999996, "loss": 1.0318, "step": 10720 }, { "epoch": 0.11, "learning_rate": 0.000267975, "loss": 1.0206, "step": 10730 }, { "epoch": 0.11, "learning_rate": 0.000268225, "loss": 0.999, "step": 10740 }, { "epoch": 0.11, "learning_rate": 0.000268475, "loss": 0.9801, "step": 10750 }, { "epoch": 0.11, "learning_rate": 0.000268725, "loss": 0.9834, "step": 10760 }, { "epoch": 0.11, "learning_rate": 0.00026897499999999996, "loss": 1.0261, "step": 10770 }, { "epoch": 0.11, "learning_rate": 0.00026922499999999994, "loss": 1.0063, "step": 10780 }, { "epoch": 0.11, "learning_rate": 0.000269475, "loss": 1.0153, "step": 10790 }, { "epoch": 0.11, "learning_rate": 0.00026972499999999996, "loss": 0.9967, "step": 10800 }, { "epoch": 0.11, "learning_rate": 0.000269975, "loss": 0.9976, "step": 10810 }, { "epoch": 0.11, "learning_rate": 0.00027022499999999997, "loss": 1.0015, "step": 10820 }, { "epoch": 0.11, "learning_rate": 0.00027047499999999995, "loss": 1.0215, "step": 10830 }, { "epoch": 0.11, "learning_rate": 0.000270725, "loss": 1.0095, "step": 10840 }, { "epoch": 0.11, "learning_rate": 0.00027097499999999996, "loss": 1.0341, "step": 10850 }, { "epoch": 0.11, "learning_rate": 0.000271225, "loss": 1.0016, "step": 10860 }, { "epoch": 0.11, "learning_rate": 0.00027147499999999997, "loss": 1.0226, "step": 10870 }, { "epoch": 0.11, "learning_rate": 0.000271725, "loss": 0.9989, "step": 10880 }, { "epoch": 0.11, "learning_rate": 0.000271975, "loss": 1.006, "step": 10890 }, { "epoch": 0.11, "learning_rate": 0.00027222499999999996, "loss": 0.9861, "step": 10900 }, { "epoch": 0.11, "learning_rate": 0.000272475, "loss": 1.0227, "step": 10910 }, { "epoch": 0.11, "learning_rate": 0.000272725, "loss": 0.9842, "step": 10920 }, { "epoch": 0.11, "learning_rate": 0.000272975, "loss": 0.9882, "step": 10930 }, { "epoch": 0.11, "learning_rate": 0.000273225, "loss": 0.9849, "step": 10940 }, { "epoch": 0.11, "learning_rate": 0.00027347499999999997, "loss": 1.003, "step": 10950 }, { "epoch": 0.11, "learning_rate": 0.000273725, "loss": 1.0014, "step": 10960 }, { "epoch": 0.11, "learning_rate": 0.000273975, "loss": 1.0029, "step": 10970 }, { "epoch": 0.11, "learning_rate": 0.00027422499999999996, "loss": 1.0124, "step": 10980 }, { "epoch": 0.11, "learning_rate": 0.000274475, "loss": 1.0175, "step": 10990 }, { "epoch": 0.11, "learning_rate": 0.00027472499999999997, "loss": 0.9947, "step": 11000 }, { "epoch": 0.11, "learning_rate": 0.00027497499999999995, "loss": 0.9935, "step": 11010 }, { "epoch": 0.11, "learning_rate": 0.000275225, "loss": 1.0061, "step": 11020 }, { "epoch": 0.11, "learning_rate": 0.00027547499999999996, "loss": 1.0129, "step": 11030 }, { "epoch": 0.11, "learning_rate": 0.000275725, "loss": 1.0046, "step": 11040 }, { "epoch": 0.11, "learning_rate": 0.00027597499999999997, "loss": 0.9996, "step": 11050 }, { "epoch": 0.11, "learning_rate": 0.00027622499999999995, "loss": 0.9892, "step": 11060 }, { "epoch": 0.11, "learning_rate": 0.000276475, "loss": 0.9932, "step": 11070 }, { "epoch": 0.11, "learning_rate": 0.00027672499999999996, "loss": 1.021, "step": 11080 }, { "epoch": 0.11, "learning_rate": 0.000276975, "loss": 1.0211, "step": 11090 }, { "epoch": 0.11, "learning_rate": 0.000277225, "loss": 1.0129, "step": 11100 }, { "epoch": 0.11, "learning_rate": 0.000277475, "loss": 0.9703, "step": 11110 }, { "epoch": 0.11, "learning_rate": 0.000277725, "loss": 0.9861, "step": 11120 }, { "epoch": 0.11, "learning_rate": 0.00027797499999999997, "loss": 0.9778, "step": 11130 }, { "epoch": 0.11, "learning_rate": 0.000278225, "loss": 0.9914, "step": 11140 }, { "epoch": 0.11, "learning_rate": 0.000278475, "loss": 0.9898, "step": 11150 }, { "epoch": 0.11, "learning_rate": 0.000278725, "loss": 1.0094, "step": 11160 }, { "epoch": 0.11, "learning_rate": 0.00027897499999999994, "loss": 0.9753, "step": 11170 }, { "epoch": 0.11, "learning_rate": 0.00027922499999999997, "loss": 0.9997, "step": 11180 }, { "epoch": 0.11, "learning_rate": 0.00027947499999999995, "loss": 0.9668, "step": 11190 }, { "epoch": 0.11, "learning_rate": 0.000279725, "loss": 1.0121, "step": 11200 }, { "epoch": 0.11, "learning_rate": 0.00027997499999999996, "loss": 1.0003, "step": 11210 }, { "epoch": 0.11, "learning_rate": 0.000280225, "loss": 1.0057, "step": 11220 }, { "epoch": 0.11, "learning_rate": 0.000280475, "loss": 1.0012, "step": 11230 }, { "epoch": 0.11, "learning_rate": 0.00028072499999999995, "loss": 0.9905, "step": 11240 }, { "epoch": 0.11, "learning_rate": 0.000280975, "loss": 0.9816, "step": 11250 }, { "epoch": 0.11, "learning_rate": 0.00028122499999999996, "loss": 1.0025, "step": 11260 }, { "epoch": 0.11, "learning_rate": 0.000281475, "loss": 0.9853, "step": 11270 }, { "epoch": 0.11, "learning_rate": 0.000281725, "loss": 0.9964, "step": 11280 }, { "epoch": 0.11, "learning_rate": 0.00028197499999999996, "loss": 0.9941, "step": 11290 }, { "epoch": 0.11, "learning_rate": 0.000282225, "loss": 0.9888, "step": 11300 }, { "epoch": 0.11, "learning_rate": 0.00028247499999999997, "loss": 0.9934, "step": 11310 }, { "epoch": 0.11, "learning_rate": 0.000282725, "loss": 1.0022, "step": 11320 }, { "epoch": 0.11, "learning_rate": 0.000282975, "loss": 0.9831, "step": 11330 }, { "epoch": 0.11, "learning_rate": 0.000283225, "loss": 1.0045, "step": 11340 }, { "epoch": 0.11, "learning_rate": 0.00028347499999999994, "loss": 0.9923, "step": 11350 }, { "epoch": 0.11, "learning_rate": 0.00028372499999999997, "loss": 1.0049, "step": 11360 }, { "epoch": 0.11, "learning_rate": 0.00028397499999999995, "loss": 0.9778, "step": 11370 }, { "epoch": 0.11, "learning_rate": 0.000284225, "loss": 0.9896, "step": 11380 }, { "epoch": 0.11, "learning_rate": 0.00028447499999999996, "loss": 0.9914, "step": 11390 }, { "epoch": 0.11, "learning_rate": 0.00028472499999999994, "loss": 1.005, "step": 11400 }, { "epoch": 0.11, "learning_rate": 0.000284975, "loss": 1.0014, "step": 11410 }, { "epoch": 0.11, "learning_rate": 0.00028522499999999995, "loss": 1.0103, "step": 11420 }, { "epoch": 0.11, "learning_rate": 0.000285475, "loss": 0.9762, "step": 11430 }, { "epoch": 0.11, "learning_rate": 0.00028572499999999997, "loss": 1.002, "step": 11440 }, { "epoch": 0.11, "learning_rate": 0.000285975, "loss": 0.9845, "step": 11450 }, { "epoch": 0.11, "learning_rate": 0.000286225, "loss": 0.9944, "step": 11460 }, { "epoch": 0.11, "learning_rate": 0.00028647499999999996, "loss": 0.9813, "step": 11470 }, { "epoch": 0.11, "learning_rate": 0.000286725, "loss": 0.9881, "step": 11480 }, { "epoch": 0.11, "learning_rate": 0.00028697499999999997, "loss": 0.9478, "step": 11490 }, { "epoch": 0.12, "learning_rate": 0.000287225, "loss": 0.976, "step": 11500 }, { "epoch": 0.12, "learning_rate": 0.000287475, "loss": 0.9837, "step": 11510 }, { "epoch": 0.12, "learning_rate": 0.00028772499999999996, "loss": 1.0044, "step": 11520 }, { "epoch": 0.12, "learning_rate": 0.000287975, "loss": 0.9704, "step": 11530 }, { "epoch": 0.12, "learning_rate": 0.00028822499999999997, "loss": 0.9819, "step": 11540 }, { "epoch": 0.12, "learning_rate": 0.000288475, "loss": 0.9807, "step": 11550 }, { "epoch": 0.12, "learning_rate": 0.000288725, "loss": 0.9934, "step": 11560 }, { "epoch": 0.12, "learning_rate": 0.00028897499999999996, "loss": 0.9772, "step": 11570 }, { "epoch": 0.12, "learning_rate": 0.00028922499999999994, "loss": 0.9869, "step": 11580 }, { "epoch": 0.12, "learning_rate": 0.000289475, "loss": 1.0183, "step": 11590 }, { "epoch": 0.12, "learning_rate": 0.00028972499999999995, "loss": 1.0031, "step": 11600 }, { "epoch": 0.12, "learning_rate": 0.000289975, "loss": 1.005, "step": 11610 }, { "epoch": 0.12, "learning_rate": 0.00029022499999999997, "loss": 0.9948, "step": 11620 }, { "epoch": 0.12, "learning_rate": 0.000290475, "loss": 1.0125, "step": 11630 }, { "epoch": 0.12, "learning_rate": 0.000290725, "loss": 1.0066, "step": 11640 }, { "epoch": 0.12, "learning_rate": 0.00029097499999999996, "loss": 1.0183, "step": 11650 }, { "epoch": 0.12, "learning_rate": 0.000291225, "loss": 1.0012, "step": 11660 }, { "epoch": 0.12, "learning_rate": 0.00029147499999999997, "loss": 0.9977, "step": 11670 }, { "epoch": 0.12, "learning_rate": 0.000291725, "loss": 0.991, "step": 11680 }, { "epoch": 0.12, "learning_rate": 0.000291975, "loss": 0.996, "step": 11690 }, { "epoch": 0.12, "learning_rate": 0.00029222499999999996, "loss": 1.0012, "step": 11700 }, { "epoch": 0.12, "learning_rate": 0.000292475, "loss": 1.0169, "step": 11710 }, { "epoch": 0.12, "learning_rate": 0.00029269999999999996, "loss": 1.0089, "step": 11720 }, { "epoch": 0.12, "learning_rate": 0.00029295, "loss": 0.9878, "step": 11730 }, { "epoch": 0.12, "learning_rate": 0.00029319999999999997, "loss": 0.9738, "step": 11740 }, { "epoch": 0.12, "learning_rate": 0.00029344999999999995, "loss": 1.0032, "step": 11750 }, { "epoch": 0.12, "learning_rate": 0.0002937, "loss": 0.994, "step": 11760 }, { "epoch": 0.12, "learning_rate": 0.00029394999999999996, "loss": 1.005, "step": 11770 }, { "epoch": 0.12, "learning_rate": 0.0002942, "loss": 0.9683, "step": 11780 }, { "epoch": 0.12, "learning_rate": 0.00029445, "loss": 0.983, "step": 11790 }, { "epoch": 0.12, "learning_rate": 0.00029469999999999995, "loss": 0.9743, "step": 11800 }, { "epoch": 0.12, "learning_rate": 0.00029495, "loss": 0.9866, "step": 11810 }, { "epoch": 0.12, "learning_rate": 0.00029519999999999997, "loss": 0.9976, "step": 11820 }, { "epoch": 0.12, "learning_rate": 0.00029545, "loss": 1.0009, "step": 11830 }, { "epoch": 0.12, "learning_rate": 0.0002957, "loss": 1.0055, "step": 11840 }, { "epoch": 0.12, "learning_rate": 0.00029595, "loss": 0.9906, "step": 11850 }, { "epoch": 0.12, "learning_rate": 0.00029619999999999994, "loss": 0.9845, "step": 11860 }, { "epoch": 0.12, "learning_rate": 0.00029644999999999997, "loss": 0.9647, "step": 11870 }, { "epoch": 0.12, "learning_rate": 0.00029669999999999995, "loss": 1.0115, "step": 11880 }, { "epoch": 0.12, "learning_rate": 0.00029695, "loss": 1.0021, "step": 11890 }, { "epoch": 0.12, "learning_rate": 0.00029719999999999996, "loss": 1.0008, "step": 11900 }, { "epoch": 0.12, "learning_rate": 0.00029745, "loss": 0.9766, "step": 11910 }, { "epoch": 0.12, "learning_rate": 0.00029769999999999997, "loss": 0.966, "step": 11920 }, { "epoch": 0.12, "learning_rate": 0.00029794999999999995, "loss": 0.991, "step": 11930 }, { "epoch": 0.12, "learning_rate": 0.0002982, "loss": 0.9696, "step": 11940 }, { "epoch": 0.12, "learning_rate": 0.00029844999999999996, "loss": 0.9896, "step": 11950 }, { "epoch": 0.12, "learning_rate": 0.0002987, "loss": 0.9878, "step": 11960 }, { "epoch": 0.12, "learning_rate": 0.00029895, "loss": 0.9877, "step": 11970 }, { "epoch": 0.12, "learning_rate": 0.00029919999999999995, "loss": 0.9899, "step": 11980 }, { "epoch": 0.12, "learning_rate": 0.00029945, "loss": 0.9921, "step": 11990 }, { "epoch": 0.12, "learning_rate": 0.00029969999999999997, "loss": 0.9874, "step": 12000 }, { "epoch": 0.12, "learning_rate": 0.00029995, "loss": 1.0079, "step": 12010 }, { "epoch": 0.12, "learning_rate": 0.0003002, "loss": 0.9987, "step": 12020 }, { "epoch": 0.12, "learning_rate": 0.00030045, "loss": 0.9862, "step": 12030 }, { "epoch": 0.12, "learning_rate": 0.00030069999999999994, "loss": 0.9898, "step": 12040 }, { "epoch": 0.12, "learning_rate": 0.00030095, "loss": 0.9914, "step": 12050 }, { "epoch": 0.12, "learning_rate": 0.00030119999999999995, "loss": 0.9903, "step": 12060 }, { "epoch": 0.12, "learning_rate": 0.00030144999999999993, "loss": 0.9978, "step": 12070 }, { "epoch": 0.12, "learning_rate": 0.00030169999999999996, "loss": 0.9808, "step": 12080 }, { "epoch": 0.12, "learning_rate": 0.00030194999999999994, "loss": 1.0049, "step": 12090 }, { "epoch": 0.12, "learning_rate": 0.0003022, "loss": 0.9987, "step": 12100 }, { "epoch": 0.12, "learning_rate": 0.00030244999999999995, "loss": 0.9894, "step": 12110 }, { "epoch": 0.12, "learning_rate": 0.00030269999999999993, "loss": 0.9732, "step": 12120 }, { "epoch": 0.12, "learning_rate": 0.00030294999999999996, "loss": 0.9937, "step": 12130 }, { "epoch": 0.12, "learning_rate": 0.00030319999999999994, "loss": 0.994, "step": 12140 }, { "epoch": 0.12, "learning_rate": 0.00030345, "loss": 1.0007, "step": 12150 }, { "epoch": 0.12, "learning_rate": 0.00030369999999999996, "loss": 0.989, "step": 12160 }, { "epoch": 0.12, "learning_rate": 0.00030395, "loss": 0.9978, "step": 12170 }, { "epoch": 0.12, "learning_rate": 0.00030419999999999997, "loss": 0.993, "step": 12180 }, { "epoch": 0.12, "learning_rate": 0.00030444999999999995, "loss": 0.9995, "step": 12190 }, { "epoch": 0.12, "learning_rate": 0.0003047, "loss": 0.9951, "step": 12200 }, { "epoch": 0.12, "learning_rate": 0.00030494999999999996, "loss": 0.9982, "step": 12210 }, { "epoch": 0.12, "learning_rate": 0.0003052, "loss": 1.0111, "step": 12220 }, { "epoch": 0.12, "learning_rate": 0.00030544999999999997, "loss": 0.9741, "step": 12230 }, { "epoch": 0.12, "learning_rate": 0.00030569999999999995, "loss": 0.9882, "step": 12240 }, { "epoch": 0.12, "learning_rate": 0.00030595, "loss": 0.9801, "step": 12250 }, { "epoch": 0.12, "learning_rate": 0.00030619999999999996, "loss": 0.9705, "step": 12260 }, { "epoch": 0.12, "learning_rate": 0.00030645, "loss": 0.9911, "step": 12270 }, { "epoch": 0.12, "learning_rate": 0.0003067, "loss": 0.9778, "step": 12280 }, { "epoch": 0.12, "learning_rate": 0.00030694999999999995, "loss": 0.9778, "step": 12290 }, { "epoch": 0.12, "learning_rate": 0.0003072, "loss": 0.9886, "step": 12300 }, { "epoch": 0.12, "learning_rate": 0.00030744999999999997, "loss": 0.9936, "step": 12310 }, { "epoch": 0.12, "learning_rate": 0.0003077, "loss": 0.9837, "step": 12320 }, { "epoch": 0.12, "learning_rate": 0.00030795, "loss": 0.9883, "step": 12330 }, { "epoch": 0.12, "learning_rate": 0.0003082, "loss": 0.9718, "step": 12340 }, { "epoch": 0.12, "learning_rate": 0.00030845, "loss": 0.9745, "step": 12350 }, { "epoch": 0.12, "learning_rate": 0.00030869999999999997, "loss": 0.9696, "step": 12360 }, { "epoch": 0.12, "learning_rate": 0.00030895, "loss": 0.9832, "step": 12370 }, { "epoch": 0.12, "learning_rate": 0.0003092, "loss": 0.9874, "step": 12380 }, { "epoch": 0.12, "learning_rate": 0.00030945, "loss": 0.9961, "step": 12390 }, { "epoch": 0.12, "learning_rate": 0.0003097, "loss": 0.9864, "step": 12400 }, { "epoch": 0.12, "learning_rate": 0.0003099499999999999, "loss": 0.967, "step": 12410 }, { "epoch": 0.12, "learning_rate": 0.0003102, "loss": 0.9945, "step": 12420 }, { "epoch": 0.12, "learning_rate": 0.00031044999999999993, "loss": 0.9778, "step": 12430 }, { "epoch": 0.12, "learning_rate": 0.0003107, "loss": 0.9881, "step": 12440 }, { "epoch": 0.12, "learning_rate": 0.00031094999999999994, "loss": 0.9843, "step": 12450 }, { "epoch": 0.12, "learning_rate": 0.00031120000000000003, "loss": 0.9742, "step": 12460 }, { "epoch": 0.12, "learning_rate": 0.00031144999999999995, "loss": 0.9726, "step": 12470 }, { "epoch": 0.12, "learning_rate": 0.00031169999999999993, "loss": 0.9565, "step": 12480 }, { "epoch": 0.12, "learning_rate": 0.00031194999999999997, "loss": 0.9647, "step": 12490 }, { "epoch": 0.12, "learning_rate": 0.00031219999999999995, "loss": 0.9817, "step": 12500 }, { "epoch": 0.12, "eval_accuracy": 0.7950684202995898, "eval_loss": 1.0478515625, "eval_runtime": 97.1269, "eval_samples_per_second": 823.665, "eval_steps_per_second": 1.616, "step": 12500 }, { "epoch": 0.13, "learning_rate": 0.00031245, "loss": 0.9518, "step": 12510 }, { "epoch": 0.13, "learning_rate": 0.00031269999999999996, "loss": 0.9495, "step": 12520 }, { "epoch": 0.13, "learning_rate": 0.00031294999999999994, "loss": 0.9507, "step": 12530 }, { "epoch": 0.13, "learning_rate": 0.00031319999999999997, "loss": 0.9361, "step": 12540 }, { "epoch": 0.13, "learning_rate": 0.00031344999999999995, "loss": 0.944, "step": 12550 }, { "epoch": 0.13, "learning_rate": 0.0003137, "loss": 0.9476, "step": 12560 }, { "epoch": 0.13, "learning_rate": 0.00031394999999999996, "loss": 0.9425, "step": 12570 }, { "epoch": 0.13, "learning_rate": 0.00031419999999999994, "loss": 0.9681, "step": 12580 }, { "epoch": 0.13, "learning_rate": 0.00031444999999999997, "loss": 0.9618, "step": 12590 }, { "epoch": 0.13, "learning_rate": 0.00031469999999999995, "loss": 0.9591, "step": 12600 }, { "epoch": 0.13, "learning_rate": 0.00031495, "loss": 0.9782, "step": 12610 }, { "epoch": 0.13, "learning_rate": 0.00031519999999999996, "loss": 0.9584, "step": 12620 }, { "epoch": 0.13, "learning_rate": 0.00031545, "loss": 0.9911, "step": 12630 }, { "epoch": 0.13, "learning_rate": 0.0003157, "loss": 0.9893, "step": 12640 }, { "epoch": 0.13, "learning_rate": 0.00031594999999999995, "loss": 0.9754, "step": 12650 }, { "epoch": 0.13, "learning_rate": 0.0003162, "loss": 0.9664, "step": 12660 }, { "epoch": 0.13, "learning_rate": 0.00031644999999999997, "loss": 0.9677, "step": 12670 }, { "epoch": 0.13, "learning_rate": 0.0003167, "loss": 0.9835, "step": 12680 }, { "epoch": 0.13, "learning_rate": 0.00031695, "loss": 0.9739, "step": 12690 }, { "epoch": 0.13, "learning_rate": 0.00031719999999999996, "loss": 0.9776, "step": 12700 }, { "epoch": 0.13, "learning_rate": 0.00031745, "loss": 0.9723, "step": 12710 }, { "epoch": 0.13, "learning_rate": 0.000317675, "loss": 0.9647, "step": 12720 }, { "epoch": 0.13, "learning_rate": 0.000317925, "loss": 0.9681, "step": 12730 }, { "epoch": 0.13, "learning_rate": 0.000318175, "loss": 0.9747, "step": 12740 }, { "epoch": 0.13, "learning_rate": 0.000318425, "loss": 0.9683, "step": 12750 }, { "epoch": 0.13, "learning_rate": 0.0003186749999999999, "loss": 0.9854, "step": 12760 }, { "epoch": 0.13, "learning_rate": 0.00031892499999999996, "loss": 0.9822, "step": 12770 }, { "epoch": 0.13, "learning_rate": 0.00031917499999999994, "loss": 0.9719, "step": 12780 }, { "epoch": 0.13, "learning_rate": 0.00031942499999999997, "loss": 0.9675, "step": 12790 }, { "epoch": 0.13, "learning_rate": 0.00031967499999999995, "loss": 0.9678, "step": 12800 }, { "epoch": 0.13, "learning_rate": 0.00031992499999999993, "loss": 0.9665, "step": 12810 }, { "epoch": 0.13, "learning_rate": 0.00032017499999999996, "loss": 0.9547, "step": 12820 }, { "epoch": 0.13, "learning_rate": 0.00032042499999999994, "loss": 0.9615, "step": 12830 }, { "epoch": 0.13, "learning_rate": 0.000320675, "loss": 0.9436, "step": 12840 }, { "epoch": 0.13, "learning_rate": 0.00032092499999999995, "loss": 0.985, "step": 12850 }, { "epoch": 0.13, "learning_rate": 0.000321175, "loss": 0.9761, "step": 12860 }, { "epoch": 0.13, "learning_rate": 0.00032142499999999997, "loss": 0.9892, "step": 12870 }, { "epoch": 0.13, "learning_rate": 0.00032167499999999994, "loss": 0.9831, "step": 12880 }, { "epoch": 0.13, "learning_rate": 0.000321925, "loss": 0.978, "step": 12890 }, { "epoch": 0.13, "learning_rate": 0.00032217499999999996, "loss": 0.9744, "step": 12900 }, { "epoch": 0.13, "learning_rate": 0.000322425, "loss": 0.9768, "step": 12910 }, { "epoch": 0.13, "learning_rate": 0.00032267499999999997, "loss": 0.9772, "step": 12920 }, { "epoch": 0.13, "learning_rate": 0.00032292499999999995, "loss": 0.9737, "step": 12930 }, { "epoch": 0.13, "learning_rate": 0.000323175, "loss": 0.9604, "step": 12940 }, { "epoch": 0.13, "learning_rate": 0.00032342499999999996, "loss": 0.9649, "step": 12950 }, { "epoch": 0.13, "learning_rate": 0.000323675, "loss": 0.9837, "step": 12960 }, { "epoch": 0.13, "learning_rate": 0.00032392499999999997, "loss": 0.9745, "step": 12970 }, { "epoch": 0.13, "learning_rate": 0.00032417499999999995, "loss": 0.9844, "step": 12980 }, { "epoch": 0.13, "learning_rate": 0.000324425, "loss": 0.9779, "step": 12990 }, { "epoch": 0.13, "learning_rate": 0.00032467499999999996, "loss": 0.9783, "step": 13000 }, { "epoch": 0.13, "learning_rate": 0.000324925, "loss": 0.9821, "step": 13010 }, { "epoch": 0.13, "learning_rate": 0.000325175, "loss": 0.9589, "step": 13020 }, { "epoch": 0.13, "learning_rate": 0.000325425, "loss": 0.977, "step": 13030 }, { "epoch": 0.13, "learning_rate": 0.000325675, "loss": 0.9504, "step": 13040 }, { "epoch": 0.13, "learning_rate": 0.00032592499999999997, "loss": 0.9621, "step": 13050 }, { "epoch": 0.13, "learning_rate": 0.000326175, "loss": 0.9669, "step": 13060 }, { "epoch": 0.13, "learning_rate": 0.000326425, "loss": 0.9712, "step": 13070 }, { "epoch": 0.13, "learning_rate": 0.000326675, "loss": 0.9772, "step": 13080 }, { "epoch": 0.13, "learning_rate": 0.000326925, "loss": 0.969, "step": 13090 }, { "epoch": 0.13, "learning_rate": 0.0003271749999999999, "loss": 0.9666, "step": 13100 }, { "epoch": 0.13, "learning_rate": 0.000327425, "loss": 0.9537, "step": 13110 }, { "epoch": 0.13, "learning_rate": 0.00032767499999999993, "loss": 0.9664, "step": 13120 }, { "epoch": 0.13, "learning_rate": 0.000327925, "loss": 0.9789, "step": 13130 }, { "epoch": 0.13, "learning_rate": 0.00032817499999999994, "loss": 0.9689, "step": 13140 }, { "epoch": 0.13, "learning_rate": 0.0003284249999999999, "loss": 0.9689, "step": 13150 }, { "epoch": 0.13, "learning_rate": 0.00032867499999999995, "loss": 0.9678, "step": 13160 }, { "epoch": 0.13, "learning_rate": 0.00032892499999999993, "loss": 0.968, "step": 13170 }, { "epoch": 0.13, "learning_rate": 0.00032917499999999996, "loss": 0.952, "step": 13180 }, { "epoch": 0.13, "learning_rate": 0.00032942499999999994, "loss": 0.9559, "step": 13190 }, { "epoch": 0.13, "learning_rate": 0.000329675, "loss": 0.963, "step": 13200 }, { "epoch": 0.13, "learning_rate": 0.00032992499999999996, "loss": 0.9352, "step": 13210 }, { "epoch": 0.13, "learning_rate": 0.00033017499999999993, "loss": 0.9508, "step": 13220 }, { "epoch": 0.13, "learning_rate": 0.00033042499999999997, "loss": 0.9283, "step": 13230 }, { "epoch": 0.13, "learning_rate": 0.00033067499999999995, "loss": 0.9382, "step": 13240 }, { "epoch": 0.13, "learning_rate": 0.000330925, "loss": 0.944, "step": 13250 }, { "epoch": 0.13, "learning_rate": 0.00033117499999999996, "loss": 0.951, "step": 13260 }, { "epoch": 0.13, "learning_rate": 0.00033142499999999994, "loss": 0.9331, "step": 13270 }, { "epoch": 0.13, "learning_rate": 0.00033167499999999997, "loss": 0.9377, "step": 13280 }, { "epoch": 0.13, "learning_rate": 0.00033192499999999995, "loss": 0.934, "step": 13290 }, { "epoch": 0.13, "learning_rate": 0.000332175, "loss": 0.9317, "step": 13300 }, { "epoch": 0.13, "learning_rate": 0.00033242499999999996, "loss": 0.929, "step": 13310 }, { "epoch": 0.13, "learning_rate": 0.000332675, "loss": 0.9474, "step": 13320 }, { "epoch": 0.13, "learning_rate": 0.000332925, "loss": 0.9454, "step": 13330 }, { "epoch": 0.13, "learning_rate": 0.00033317499999999995, "loss": 0.9484, "step": 13340 }, { "epoch": 0.13, "learning_rate": 0.000333425, "loss": 0.9346, "step": 13350 }, { "epoch": 0.13, "learning_rate": 0.00033367499999999997, "loss": 0.9379, "step": 13360 }, { "epoch": 0.13, "learning_rate": 0.000333925, "loss": 0.93, "step": 13370 }, { "epoch": 0.13, "learning_rate": 0.000334175, "loss": 0.9487, "step": 13380 }, { "epoch": 0.13, "learning_rate": 0.00033442499999999996, "loss": 0.9495, "step": 13390 }, { "epoch": 0.13, "learning_rate": 0.000334675, "loss": 0.9382, "step": 13400 }, { "epoch": 0.13, "learning_rate": 0.00033492499999999997, "loss": 0.9315, "step": 13410 }, { "epoch": 0.13, "learning_rate": 0.000335175, "loss": 0.9374, "step": 13420 }, { "epoch": 0.13, "learning_rate": 0.000335425, "loss": 0.9448, "step": 13430 }, { "epoch": 0.13, "learning_rate": 0.00033567499999999996, "loss": 0.9493, "step": 13440 }, { "epoch": 0.13, "learning_rate": 0.000335925, "loss": 0.9409, "step": 13450 }, { "epoch": 0.13, "learning_rate": 0.00033617499999999997, "loss": 0.9346, "step": 13460 }, { "epoch": 0.13, "learning_rate": 0.000336425, "loss": 0.9601, "step": 13470 }, { "epoch": 0.13, "learning_rate": 0.000336675, "loss": 0.9639, "step": 13480 }, { "epoch": 0.13, "learning_rate": 0.000336925, "loss": 0.9514, "step": 13490 }, { "epoch": 0.14, "learning_rate": 0.000337175, "loss": 0.974, "step": 13500 }, { "epoch": 0.14, "learning_rate": 0.0003374249999999999, "loss": 0.9714, "step": 13510 }, { "epoch": 0.14, "learning_rate": 0.000337675, "loss": 0.9557, "step": 13520 }, { "epoch": 0.14, "learning_rate": 0.00033792499999999993, "loss": 0.9642, "step": 13530 }, { "epoch": 0.14, "learning_rate": 0.000338175, "loss": 0.957, "step": 13540 }, { "epoch": 0.14, "learning_rate": 0.00033842499999999995, "loss": 0.9521, "step": 13550 }, { "epoch": 0.14, "learning_rate": 0.0003386749999999999, "loss": 0.9619, "step": 13560 }, { "epoch": 0.14, "learning_rate": 0.00033892499999999996, "loss": 0.9743, "step": 13570 }, { "epoch": 0.14, "learning_rate": 0.00033917499999999994, "loss": 0.9548, "step": 13580 }, { "epoch": 0.14, "learning_rate": 0.00033942499999999997, "loss": 0.9571, "step": 13590 }, { "epoch": 0.14, "learning_rate": 0.00033967499999999995, "loss": 0.9536, "step": 13600 }, { "epoch": 0.14, "learning_rate": 0.000339925, "loss": 0.9645, "step": 13610 }, { "epoch": 0.14, "learning_rate": 0.00034017499999999996, "loss": 0.9431, "step": 13620 }, { "epoch": 0.14, "learning_rate": 0.00034042499999999994, "loss": 0.9596, "step": 13630 }, { "epoch": 0.14, "learning_rate": 0.000340675, "loss": 0.9659, "step": 13640 }, { "epoch": 0.14, "learning_rate": 0.00034092499999999995, "loss": 0.9651, "step": 13650 }, { "epoch": 0.14, "learning_rate": 0.000341175, "loss": 0.9674, "step": 13660 }, { "epoch": 0.14, "learning_rate": 0.00034142499999999996, "loss": 0.9548, "step": 13670 }, { "epoch": 0.14, "learning_rate": 0.00034167499999999994, "loss": 0.9649, "step": 13680 }, { "epoch": 0.14, "learning_rate": 0.000341925, "loss": 0.9571, "step": 13690 }, { "epoch": 0.14, "learning_rate": 0.00034217499999999996, "loss": 0.9592, "step": 13700 }, { "epoch": 0.14, "learning_rate": 0.000342425, "loss": 0.9577, "step": 13710 }, { "epoch": 0.14, "learning_rate": 0.00034265, "loss": 0.9515, "step": 13720 }, { "epoch": 0.14, "learning_rate": 0.0003429, "loss": 0.9422, "step": 13730 }, { "epoch": 0.14, "learning_rate": 0.00034314999999999997, "loss": 0.9448, "step": 13740 }, { "epoch": 0.14, "learning_rate": 0.0003434, "loss": 0.9529, "step": 13750 }, { "epoch": 0.14, "learning_rate": 0.00034365, "loss": 0.9589, "step": 13760 }, { "epoch": 0.14, "learning_rate": 0.0003439, "loss": 0.9651, "step": 13770 }, { "epoch": 0.14, "learning_rate": 0.00034415, "loss": 0.9682, "step": 13780 }, { "epoch": 0.14, "learning_rate": 0.00034439999999999997, "loss": 0.942, "step": 13790 }, { "epoch": 0.14, "learning_rate": 0.00034465, "loss": 0.9512, "step": 13800 }, { "epoch": 0.14, "learning_rate": 0.0003449, "loss": 0.9686, "step": 13810 }, { "epoch": 0.14, "learning_rate": 0.00034515, "loss": 0.9516, "step": 13820 }, { "epoch": 0.14, "learning_rate": 0.00034539999999999994, "loss": 0.9463, "step": 13830 }, { "epoch": 0.14, "learning_rate": 0.0003456499999999999, "loss": 0.943, "step": 13840 }, { "epoch": 0.14, "learning_rate": 0.00034589999999999995, "loss": 0.9457, "step": 13850 }, { "epoch": 0.14, "learning_rate": 0.00034614999999999993, "loss": 0.9424, "step": 13860 }, { "epoch": 0.14, "learning_rate": 0.00034639999999999996, "loss": 0.9498, "step": 13870 }, { "epoch": 0.14, "learning_rate": 0.00034664999999999994, "loss": 0.9644, "step": 13880 }, { "epoch": 0.14, "learning_rate": 0.0003469, "loss": 0.9507, "step": 13890 }, { "epoch": 0.14, "learning_rate": 0.00034714999999999995, "loss": 0.9474, "step": 13900 }, { "epoch": 0.14, "learning_rate": 0.00034739999999999993, "loss": 0.9442, "step": 13910 }, { "epoch": 0.14, "learning_rate": 0.00034764999999999997, "loss": 0.9538, "step": 13920 }, { "epoch": 0.14, "learning_rate": 0.00034789999999999995, "loss": 0.9647, "step": 13930 }, { "epoch": 0.14, "learning_rate": 0.00034815, "loss": 0.9311, "step": 13940 }, { "epoch": 0.14, "learning_rate": 0.00034839999999999996, "loss": 0.9468, "step": 13950 }, { "epoch": 0.14, "learning_rate": 0.00034864999999999994, "loss": 0.9446, "step": 13960 }, { "epoch": 0.14, "learning_rate": 0.00034889999999999997, "loss": 0.9407, "step": 13970 }, { "epoch": 0.14, "learning_rate": 0.00034914999999999995, "loss": 0.928, "step": 13980 }, { "epoch": 0.14, "learning_rate": 0.0003494, "loss": 0.9465, "step": 13990 }, { "epoch": 0.14, "learning_rate": 0.00034964999999999996, "loss": 0.9496, "step": 14000 }, { "epoch": 0.14, "learning_rate": 0.0003499, "loss": 0.9511, "step": 14010 }, { "epoch": 0.14, "learning_rate": 0.00035015, "loss": 0.9594, "step": 14020 }, { "epoch": 0.14, "learning_rate": 0.00035039999999999995, "loss": 0.9473, "step": 14030 }, { "epoch": 0.14, "learning_rate": 0.00035065, "loss": 0.9374, "step": 14040 }, { "epoch": 0.14, "learning_rate": 0.00035089999999999996, "loss": 0.9394, "step": 14050 }, { "epoch": 0.14, "learning_rate": 0.00035115, "loss": 0.9547, "step": 14060 }, { "epoch": 0.14, "learning_rate": 0.0003514, "loss": 0.9557, "step": 14070 }, { "epoch": 0.14, "learning_rate": 0.00035164999999999996, "loss": 0.9297, "step": 14080 }, { "epoch": 0.14, "learning_rate": 0.0003519, "loss": 0.9475, "step": 14090 }, { "epoch": 0.14, "learning_rate": 0.00035214999999999997, "loss": 0.9497, "step": 14100 }, { "epoch": 0.14, "learning_rate": 0.0003524, "loss": 0.9467, "step": 14110 }, { "epoch": 0.14, "learning_rate": 0.00035265, "loss": 0.9632, "step": 14120 }, { "epoch": 0.14, "learning_rate": 0.00035289999999999996, "loss": 0.9579, "step": 14130 }, { "epoch": 0.14, "learning_rate": 0.00035315, "loss": 0.9547, "step": 14140 }, { "epoch": 0.14, "learning_rate": 0.00035339999999999997, "loss": 0.941, "step": 14150 }, { "epoch": 0.14, "learning_rate": 0.00035365, "loss": 0.9354, "step": 14160 }, { "epoch": 0.14, "learning_rate": 0.0003539, "loss": 0.9401, "step": 14170 }, { "epoch": 0.14, "learning_rate": 0.00035415, "loss": 0.9526, "step": 14180 }, { "epoch": 0.14, "learning_rate": 0.0003544, "loss": 0.9453, "step": 14190 }, { "epoch": 0.14, "learning_rate": 0.0003546499999999999, "loss": 0.9356, "step": 14200 }, { "epoch": 0.14, "learning_rate": 0.0003549, "loss": 0.9381, "step": 14210 }, { "epoch": 0.14, "learning_rate": 0.00035514999999999993, "loss": 0.944, "step": 14220 }, { "epoch": 0.14, "learning_rate": 0.0003554, "loss": 0.9448, "step": 14230 }, { "epoch": 0.14, "learning_rate": 0.00035564999999999994, "loss": 0.9566, "step": 14240 }, { "epoch": 0.14, "learning_rate": 0.0003558999999999999, "loss": 0.9356, "step": 14250 }, { "epoch": 0.14, "learning_rate": 0.00035614999999999996, "loss": 0.9481, "step": 14260 }, { "epoch": 0.14, "learning_rate": 0.00035639999999999994, "loss": 0.9392, "step": 14270 }, { "epoch": 0.14, "learning_rate": 0.00035664999999999997, "loss": 0.9335, "step": 14280 }, { "epoch": 0.14, "learning_rate": 0.00035689999999999995, "loss": 0.9419, "step": 14290 }, { "epoch": 0.14, "learning_rate": 0.0003571499999999999, "loss": 0.9458, "step": 14300 }, { "epoch": 0.14, "learning_rate": 0.00035739999999999996, "loss": 0.949, "step": 14310 }, { "epoch": 0.14, "learning_rate": 0.00035764999999999994, "loss": 0.9428, "step": 14320 }, { "epoch": 0.14, "learning_rate": 0.00035789999999999997, "loss": 0.9429, "step": 14330 }, { "epoch": 0.14, "learning_rate": 0.00035814999999999995, "loss": 0.9408, "step": 14340 }, { "epoch": 0.14, "learning_rate": 0.0003584, "loss": 0.94, "step": 14350 }, { "epoch": 0.14, "learning_rate": 0.00035864999999999996, "loss": 0.9442, "step": 14360 }, { "epoch": 0.14, "learning_rate": 0.00035889999999999994, "loss": 0.9475, "step": 14370 }, { "epoch": 0.14, "learning_rate": 0.00035915, "loss": 0.939, "step": 14380 }, { "epoch": 0.14, "learning_rate": 0.00035939999999999995, "loss": 0.9331, "step": 14390 }, { "epoch": 0.14, "learning_rate": 0.00035965, "loss": 0.9342, "step": 14400 }, { "epoch": 0.14, "learning_rate": 0.00035989999999999997, "loss": 0.9157, "step": 14410 }, { "epoch": 0.14, "learning_rate": 0.00036014999999999995, "loss": 0.9288, "step": 14420 }, { "epoch": 0.14, "learning_rate": 0.0003604, "loss": 0.9442, "step": 14430 }, { "epoch": 0.14, "learning_rate": 0.00036064999999999996, "loss": 0.9333, "step": 14440 }, { "epoch": 0.14, "learning_rate": 0.0003609, "loss": 0.9477, "step": 14450 }, { "epoch": 0.14, "learning_rate": 0.00036114999999999997, "loss": 0.9564, "step": 14460 }, { "epoch": 0.14, "learning_rate": 0.0003614, "loss": 0.9514, "step": 14470 }, { "epoch": 0.14, "learning_rate": 0.00036165, "loss": 0.9478, "step": 14480 }, { "epoch": 0.14, "learning_rate": 0.00036189999999999996, "loss": 0.9499, "step": 14490 }, { "epoch": 0.14, "learning_rate": 0.00036215, "loss": 0.9363, "step": 14500 }, { "epoch": 0.15, "learning_rate": 0.00036239999999999997, "loss": 0.9307, "step": 14510 }, { "epoch": 0.15, "learning_rate": 0.00036265, "loss": 0.933, "step": 14520 }, { "epoch": 0.15, "learning_rate": 0.0003629, "loss": 0.9388, "step": 14530 }, { "epoch": 0.15, "learning_rate": 0.00036314999999999996, "loss": 0.9317, "step": 14540 }, { "epoch": 0.15, "learning_rate": 0.0003634, "loss": 0.9299, "step": 14550 }, { "epoch": 0.15, "learning_rate": 0.00036365, "loss": 0.9437, "step": 14560 }, { "epoch": 0.15, "learning_rate": 0.0003639, "loss": 0.9454, "step": 14570 }, { "epoch": 0.15, "learning_rate": 0.00036415, "loss": 0.9416, "step": 14580 }, { "epoch": 0.15, "learning_rate": 0.0003643999999999999, "loss": 0.9183, "step": 14590 }, { "epoch": 0.15, "learning_rate": 0.00036465, "loss": 0.9327, "step": 14600 }, { "epoch": 0.15, "learning_rate": 0.0003648999999999999, "loss": 0.922, "step": 14610 }, { "epoch": 0.15, "learning_rate": 0.00036515, "loss": 0.9334, "step": 14620 }, { "epoch": 0.15, "learning_rate": 0.00036539999999999994, "loss": 0.9428, "step": 14630 }, { "epoch": 0.15, "learning_rate": 0.00036565, "loss": 0.9437, "step": 14640 }, { "epoch": 0.15, "learning_rate": 0.00036589999999999995, "loss": 0.9216, "step": 14650 }, { "epoch": 0.15, "learning_rate": 0.00036614999999999993, "loss": 0.9294, "step": 14660 }, { "epoch": 0.15, "learning_rate": 0.00036639999999999996, "loss": 0.9188, "step": 14670 }, { "epoch": 0.15, "learning_rate": 0.00036664999999999994, "loss": 0.9329, "step": 14680 }, { "epoch": 0.15, "learning_rate": 0.0003669, "loss": 0.9372, "step": 14690 }, { "epoch": 0.15, "learning_rate": 0.00036714999999999995, "loss": 0.9343, "step": 14700 }, { "epoch": 0.15, "learning_rate": 0.00036739999999999993, "loss": 0.9351, "step": 14710 }, { "epoch": 0.15, "learning_rate": 0.00036764999999999996, "loss": 0.9063, "step": 14720 }, { "epoch": 0.15, "learning_rate": 0.000367875, "loss": 0.9074, "step": 14730 }, { "epoch": 0.15, "learning_rate": 0.00036812499999999996, "loss": 0.9242, "step": 14740 }, { "epoch": 0.15, "learning_rate": 0.000368375, "loss": 0.9145, "step": 14750 }, { "epoch": 0.15, "learning_rate": 0.000368625, "loss": 0.9093, "step": 14760 }, { "epoch": 0.15, "learning_rate": 0.00036887499999999995, "loss": 0.9172, "step": 14770 }, { "epoch": 0.15, "learning_rate": 0.000369125, "loss": 0.9219, "step": 14780 }, { "epoch": 0.15, "learning_rate": 0.00036937499999999997, "loss": 0.9111, "step": 14790 }, { "epoch": 0.15, "learning_rate": 0.000369625, "loss": 0.9313, "step": 14800 }, { "epoch": 0.15, "learning_rate": 0.000369875, "loss": 0.9135, "step": 14810 }, { "epoch": 0.15, "learning_rate": 0.00037012499999999996, "loss": 0.931, "step": 14820 }, { "epoch": 0.15, "learning_rate": 0.000370375, "loss": 0.9043, "step": 14830 }, { "epoch": 0.15, "learning_rate": 0.00037062499999999997, "loss": 0.9024, "step": 14840 }, { "epoch": 0.15, "learning_rate": 0.000370875, "loss": 0.9054, "step": 14850 }, { "epoch": 0.15, "learning_rate": 0.000371125, "loss": 0.9101, "step": 14860 }, { "epoch": 0.15, "learning_rate": 0.000371375, "loss": 0.9087, "step": 14870 }, { "epoch": 0.15, "learning_rate": 0.000371625, "loss": 0.9163, "step": 14880 }, { "epoch": 0.15, "learning_rate": 0.0003718749999999999, "loss": 0.9187, "step": 14890 }, { "epoch": 0.15, "learning_rate": 0.000372125, "loss": 0.9218, "step": 14900 }, { "epoch": 0.15, "learning_rate": 0.00037237499999999993, "loss": 0.9167, "step": 14910 }, { "epoch": 0.15, "learning_rate": 0.000372625, "loss": 0.9154, "step": 14920 }, { "epoch": 0.15, "learning_rate": 0.00037287499999999994, "loss": 0.9129, "step": 14930 }, { "epoch": 0.15, "learning_rate": 0.0003731249999999999, "loss": 0.9134, "step": 14940 }, { "epoch": 0.15, "learning_rate": 0.00037337499999999995, "loss": 0.914, "step": 14950 }, { "epoch": 0.15, "learning_rate": 0.00037362499999999993, "loss": 0.9167, "step": 14960 }, { "epoch": 0.15, "learning_rate": 0.00037387499999999997, "loss": 0.9252, "step": 14970 }, { "epoch": 0.15, "learning_rate": 0.00037412499999999995, "loss": 0.9329, "step": 14980 }, { "epoch": 0.15, "learning_rate": 0.0003743749999999999, "loss": 0.949, "step": 14990 }, { "epoch": 0.15, "learning_rate": 0.00037462499999999996, "loss": 0.9463, "step": 15000 }, { "epoch": 0.15, "eval_accuracy": 0.811620901381853, "eval_loss": 0.94189453125, "eval_runtime": 97.6826, "eval_samples_per_second": 818.979, "eval_steps_per_second": 1.607, "step": 15000 }, { "epoch": 0.15, "learning_rate": 0.00037487499999999994, "loss": 0.9369, "step": 15010 }, { "epoch": 0.15, "learning_rate": 0.00037512499999999997, "loss": 0.9289, "step": 15020 }, { "epoch": 0.15, "learning_rate": 0.00037537499999999995, "loss": 0.9494, "step": 15030 }, { "epoch": 0.15, "learning_rate": 0.000375625, "loss": 0.9298, "step": 15040 }, { "epoch": 0.15, "learning_rate": 0.00037587499999999996, "loss": 0.9263, "step": 15050 }, { "epoch": 0.15, "learning_rate": 0.00037612499999999994, "loss": 0.9053, "step": 15060 }, { "epoch": 0.15, "learning_rate": 0.000376375, "loss": 0.8997, "step": 15070 }, { "epoch": 0.15, "learning_rate": 0.00037662499999999995, "loss": 0.8866, "step": 15080 }, { "epoch": 0.15, "learning_rate": 0.000376875, "loss": 0.9064, "step": 15090 }, { "epoch": 0.15, "learning_rate": 0.00037712499999999996, "loss": 0.9229, "step": 15100 }, { "epoch": 0.15, "learning_rate": 0.00037737499999999994, "loss": 0.928, "step": 15110 }, { "epoch": 0.15, "learning_rate": 0.000377625, "loss": 0.9462, "step": 15120 }, { "epoch": 0.15, "learning_rate": 0.00037787499999999996, "loss": 0.9331, "step": 15130 }, { "epoch": 0.15, "learning_rate": 0.000378125, "loss": 0.9253, "step": 15140 }, { "epoch": 0.15, "learning_rate": 0.00037837499999999997, "loss": 0.9153, "step": 15150 }, { "epoch": 0.15, "learning_rate": 0.00037862499999999995, "loss": 0.93, "step": 15160 }, { "epoch": 0.15, "learning_rate": 0.000378875, "loss": 0.9232, "step": 15170 }, { "epoch": 0.15, "learning_rate": 0.00037912499999999996, "loss": 0.9408, "step": 15180 }, { "epoch": 0.15, "learning_rate": 0.000379375, "loss": 0.9373, "step": 15190 }, { "epoch": 0.15, "learning_rate": 0.00037962499999999997, "loss": 0.9201, "step": 15200 }, { "epoch": 0.15, "learning_rate": 0.000379875, "loss": 0.9188, "step": 15210 }, { "epoch": 0.15, "learning_rate": 0.000380125, "loss": 0.9333, "step": 15220 }, { "epoch": 0.15, "learning_rate": 0.00038037499999999996, "loss": 0.9171, "step": 15230 }, { "epoch": 0.15, "learning_rate": 0.000380625, "loss": 0.924, "step": 15240 }, { "epoch": 0.15, "learning_rate": 0.000380875, "loss": 0.938, "step": 15250 }, { "epoch": 0.15, "learning_rate": 0.000381125, "loss": 0.9111, "step": 15260 }, { "epoch": 0.15, "learning_rate": 0.000381375, "loss": 0.9052, "step": 15270 }, { "epoch": 0.15, "learning_rate": 0.0003816249999999999, "loss": 0.9045, "step": 15280 }, { "epoch": 0.15, "learning_rate": 0.000381875, "loss": 0.9103, "step": 15290 }, { "epoch": 0.15, "learning_rate": 0.0003821249999999999, "loss": 0.8965, "step": 15300 }, { "epoch": 0.15, "learning_rate": 0.000382375, "loss": 0.9055, "step": 15310 }, { "epoch": 0.15, "learning_rate": 0.00038262499999999994, "loss": 0.885, "step": 15320 }, { "epoch": 0.15, "learning_rate": 0.000382875, "loss": 0.8948, "step": 15330 }, { "epoch": 0.15, "learning_rate": 0.00038312499999999995, "loss": 0.8965, "step": 15340 }, { "epoch": 0.15, "learning_rate": 0.0003833749999999999, "loss": 0.909, "step": 15350 }, { "epoch": 0.15, "learning_rate": 0.00038362499999999996, "loss": 0.9016, "step": 15360 }, { "epoch": 0.15, "learning_rate": 0.00038387499999999994, "loss": 0.8971, "step": 15370 }, { "epoch": 0.15, "learning_rate": 0.00038412499999999997, "loss": 0.8991, "step": 15380 }, { "epoch": 0.15, "learning_rate": 0.00038437499999999995, "loss": 0.9154, "step": 15390 }, { "epoch": 0.15, "learning_rate": 0.00038462499999999993, "loss": 0.8933, "step": 15400 }, { "epoch": 0.15, "learning_rate": 0.00038487499999999996, "loss": 0.9045, "step": 15410 }, { "epoch": 0.15, "learning_rate": 0.00038512499999999994, "loss": 0.9009, "step": 15420 }, { "epoch": 0.15, "learning_rate": 0.000385375, "loss": 0.9093, "step": 15430 }, { "epoch": 0.15, "learning_rate": 0.00038562499999999995, "loss": 0.8835, "step": 15440 }, { "epoch": 0.15, "learning_rate": 0.00038587499999999993, "loss": 0.9153, "step": 15450 }, { "epoch": 0.15, "learning_rate": 0.00038612499999999997, "loss": 0.888, "step": 15460 }, { "epoch": 0.15, "learning_rate": 0.00038637499999999995, "loss": 0.8992, "step": 15470 }, { "epoch": 0.15, "learning_rate": 0.000386625, "loss": 0.9111, "step": 15480 }, { "epoch": 0.15, "learning_rate": 0.00038687499999999996, "loss": 0.9072, "step": 15490 }, { "epoch": 0.15, "learning_rate": 0.000387125, "loss": 0.8989, "step": 15500 }, { "epoch": 0.16, "learning_rate": 0.00038737499999999997, "loss": 0.9184, "step": 15510 }, { "epoch": 0.16, "learning_rate": 0.00038762499999999995, "loss": 0.9062, "step": 15520 }, { "epoch": 0.16, "learning_rate": 0.000387875, "loss": 0.8942, "step": 15530 }, { "epoch": 0.16, "learning_rate": 0.00038812499999999996, "loss": 0.8903, "step": 15540 }, { "epoch": 0.16, "learning_rate": 0.000388375, "loss": 0.9002, "step": 15550 }, { "epoch": 0.16, "learning_rate": 0.000388625, "loss": 0.8949, "step": 15560 }, { "epoch": 0.16, "learning_rate": 0.00038887499999999995, "loss": 0.9059, "step": 15570 }, { "epoch": 0.16, "learning_rate": 0.000389125, "loss": 0.8925, "step": 15580 }, { "epoch": 0.16, "learning_rate": 0.00038937499999999996, "loss": 0.9097, "step": 15590 }, { "epoch": 0.16, "learning_rate": 0.000389625, "loss": 0.902, "step": 15600 }, { "epoch": 0.16, "learning_rate": 0.000389875, "loss": 0.9065, "step": 15610 }, { "epoch": 0.16, "learning_rate": 0.000390125, "loss": 0.9019, "step": 15620 }, { "epoch": 0.16, "learning_rate": 0.000390375, "loss": 0.9129, "step": 15630 }, { "epoch": 0.16, "learning_rate": 0.00039062499999999997, "loss": 0.9301, "step": 15640 }, { "epoch": 0.16, "learning_rate": 0.000390875, "loss": 0.9264, "step": 15650 }, { "epoch": 0.16, "learning_rate": 0.000391125, "loss": 0.9174, "step": 15660 }, { "epoch": 0.16, "learning_rate": 0.000391375, "loss": 0.9326, "step": 15670 }, { "epoch": 0.16, "learning_rate": 0.000391625, "loss": 0.9315, "step": 15680 }, { "epoch": 0.16, "learning_rate": 0.0003918749999999999, "loss": 0.9348, "step": 15690 }, { "epoch": 0.16, "learning_rate": 0.000392125, "loss": 0.9141, "step": 15700 }, { "epoch": 0.16, "learning_rate": 0.00039237499999999993, "loss": 0.9214, "step": 15710 }, { "epoch": 0.16, "learning_rate": 0.000392625, "loss": 0.9206, "step": 15720 }, { "epoch": 0.16, "learning_rate": 0.00039285, "loss": 0.9257, "step": 15730 }, { "epoch": 0.16, "learning_rate": 0.00039309999999999996, "loss": 0.9082, "step": 15740 }, { "epoch": 0.16, "learning_rate": 0.00039334999999999994, "loss": 0.9235, "step": 15750 }, { "epoch": 0.16, "learning_rate": 0.00039359999999999997, "loss": 0.9195, "step": 15760 }, { "epoch": 0.16, "learning_rate": 0.00039384999999999995, "loss": 0.9134, "step": 15770 }, { "epoch": 0.16, "learning_rate": 0.0003941, "loss": 0.9079, "step": 15780 }, { "epoch": 0.16, "learning_rate": 0.00039434999999999996, "loss": 0.9062, "step": 15790 }, { "epoch": 0.16, "learning_rate": 0.00039459999999999994, "loss": 0.9015, "step": 15800 }, { "epoch": 0.16, "learning_rate": 0.00039485, "loss": 0.905, "step": 15810 }, { "epoch": 0.16, "learning_rate": 0.00039509999999999995, "loss": 0.917, "step": 15820 }, { "epoch": 0.16, "learning_rate": 0.00039535, "loss": 0.9032, "step": 15830 }, { "epoch": 0.16, "learning_rate": 0.00039559999999999997, "loss": 0.9049, "step": 15840 }, { "epoch": 0.16, "learning_rate": 0.00039584999999999995, "loss": 0.9183, "step": 15850 }, { "epoch": 0.16, "learning_rate": 0.0003961, "loss": 0.9155, "step": 15860 }, { "epoch": 0.16, "learning_rate": 0.00039634999999999996, "loss": 0.9321, "step": 15870 }, { "epoch": 0.16, "learning_rate": 0.0003966, "loss": 0.9194, "step": 15880 }, { "epoch": 0.16, "learning_rate": 0.00039684999999999997, "loss": 0.9276, "step": 15890 }, { "epoch": 0.16, "learning_rate": 0.0003971, "loss": 0.9179, "step": 15900 }, { "epoch": 0.16, "learning_rate": 0.00039735, "loss": 0.9153, "step": 15910 }, { "epoch": 0.16, "learning_rate": 0.00039759999999999996, "loss": 0.9091, "step": 15920 }, { "epoch": 0.16, "learning_rate": 0.00039785, "loss": 0.8992, "step": 15930 }, { "epoch": 0.16, "learning_rate": 0.0003981, "loss": 0.9248, "step": 15940 }, { "epoch": 0.16, "learning_rate": 0.00039835, "loss": 0.8834, "step": 15950 }, { "epoch": 0.16, "learning_rate": 0.0003986, "loss": 0.8917, "step": 15960 }, { "epoch": 0.16, "learning_rate": 0.0003988499999999999, "loss": 0.9172, "step": 15970 }, { "epoch": 0.16, "learning_rate": 0.0003991, "loss": 0.9126, "step": 15980 }, { "epoch": 0.16, "learning_rate": 0.0003993499999999999, "loss": 0.9144, "step": 15990 }, { "epoch": 0.16, "learning_rate": 0.0003996, "loss": 0.927, "step": 16000 }, { "epoch": 0.16, "learning_rate": 0.00039984999999999993, "loss": 0.9193, "step": 16010 }, { "epoch": 0.16, "learning_rate": 0.0004000999999999999, "loss": 0.9001, "step": 16020 }, { "epoch": 0.16, "learning_rate": 0.00040034999999999995, "loss": 0.9074, "step": 16030 }, { "epoch": 0.16, "learning_rate": 0.0004005999999999999, "loss": 0.8888, "step": 16040 }, { "epoch": 0.16, "learning_rate": 0.00040084999999999996, "loss": 0.9051, "step": 16050 }, { "epoch": 0.16, "learning_rate": 0.00040109999999999994, "loss": 0.886, "step": 16060 }, { "epoch": 0.16, "learning_rate": 0.00040134999999999997, "loss": 0.8988, "step": 16070 }, { "epoch": 0.16, "learning_rate": 0.00040159999999999995, "loss": 0.8789, "step": 16080 }, { "epoch": 0.16, "learning_rate": 0.00040184999999999993, "loss": 0.8828, "step": 16090 }, { "epoch": 0.16, "learning_rate": 0.00040209999999999996, "loss": 0.8723, "step": 16100 }, { "epoch": 0.16, "learning_rate": 0.00040234999999999994, "loss": 0.9006, "step": 16110 }, { "epoch": 0.16, "learning_rate": 0.0004026, "loss": 0.8891, "step": 16120 }, { "epoch": 0.16, "learning_rate": 0.00040284999999999995, "loss": 0.8801, "step": 16130 }, { "epoch": 0.16, "learning_rate": 0.00040309999999999993, "loss": 0.8827, "step": 16140 }, { "epoch": 0.16, "learning_rate": 0.00040334999999999997, "loss": 0.8866, "step": 16150 }, { "epoch": 0.16, "learning_rate": 0.00040359999999999994, "loss": 0.8691, "step": 16160 }, { "epoch": 0.16, "learning_rate": 0.00040385, "loss": 0.8749, "step": 16170 }, { "epoch": 0.16, "learning_rate": 0.00040409999999999996, "loss": 0.8902, "step": 16180 }, { "epoch": 0.16, "learning_rate": 0.00040435, "loss": 0.9015, "step": 16190 }, { "epoch": 0.16, "learning_rate": 0.00040459999999999997, "loss": 0.9031, "step": 16200 }, { "epoch": 0.16, "learning_rate": 0.00040484999999999995, "loss": 0.9151, "step": 16210 }, { "epoch": 0.16, "learning_rate": 0.0004051, "loss": 0.9132, "step": 16220 }, { "epoch": 0.16, "learning_rate": 0.00040534999999999996, "loss": 0.9079, "step": 16230 }, { "epoch": 0.16, "learning_rate": 0.0004056, "loss": 0.918, "step": 16240 }, { "epoch": 0.16, "learning_rate": 0.00040584999999999997, "loss": 0.9121, "step": 16250 }, { "epoch": 0.16, "learning_rate": 0.00040609999999999995, "loss": 0.9106, "step": 16260 }, { "epoch": 0.16, "learning_rate": 0.00040635, "loss": 0.9031, "step": 16270 }, { "epoch": 0.16, "learning_rate": 0.00040659999999999996, "loss": 0.8931, "step": 16280 }, { "epoch": 0.16, "learning_rate": 0.00040685, "loss": 0.8999, "step": 16290 }, { "epoch": 0.16, "learning_rate": 0.0004071, "loss": 0.9123, "step": 16300 }, { "epoch": 0.16, "learning_rate": 0.00040734999999999995, "loss": 0.9201, "step": 16310 }, { "epoch": 0.16, "learning_rate": 0.0004076, "loss": 0.9105, "step": 16320 }, { "epoch": 0.16, "learning_rate": 0.00040784999999999997, "loss": 0.9038, "step": 16330 }, { "epoch": 0.16, "learning_rate": 0.0004081, "loss": 0.8916, "step": 16340 }, { "epoch": 0.16, "learning_rate": 0.00040835, "loss": 0.9051, "step": 16350 }, { "epoch": 0.16, "learning_rate": 0.0004086, "loss": 0.9035, "step": 16360 }, { "epoch": 0.16, "learning_rate": 0.00040885, "loss": 0.9139, "step": 16370 }, { "epoch": 0.16, "learning_rate": 0.0004090999999999999, "loss": 0.9228, "step": 16380 }, { "epoch": 0.16, "learning_rate": 0.00040935, "loss": 0.9001, "step": 16390 }, { "epoch": 0.16, "learning_rate": 0.00040959999999999993, "loss": 0.8981, "step": 16400 }, { "epoch": 0.16, "learning_rate": 0.00040985, "loss": 0.8796, "step": 16410 }, { "epoch": 0.16, "learning_rate": 0.00041009999999999994, "loss": 0.8982, "step": 16420 }, { "epoch": 0.16, "learning_rate": 0.0004103499999999999, "loss": 0.9092, "step": 16430 }, { "epoch": 0.16, "learning_rate": 0.00041059999999999995, "loss": 0.9078, "step": 16440 }, { "epoch": 0.16, "learning_rate": 0.00041084999999999993, "loss": 0.9136, "step": 16450 }, { "epoch": 0.16, "learning_rate": 0.00041109999999999996, "loss": 0.8812, "step": 16460 }, { "epoch": 0.16, "learning_rate": 0.00041134999999999994, "loss": 0.8807, "step": 16470 }, { "epoch": 0.16, "learning_rate": 0.0004116, "loss": 0.893, "step": 16480 }, { "epoch": 0.16, "learning_rate": 0.00041184999999999995, "loss": 0.8879, "step": 16490 }, { "epoch": 0.17, "learning_rate": 0.00041209999999999993, "loss": 0.8825, "step": 16500 }, { "epoch": 0.17, "learning_rate": 0.00041234999999999997, "loss": 0.8709, "step": 16510 }, { "epoch": 0.17, "learning_rate": 0.00041259999999999995, "loss": 0.8962, "step": 16520 }, { "epoch": 0.17, "learning_rate": 0.00041285, "loss": 0.8799, "step": 16530 }, { "epoch": 0.17, "learning_rate": 0.00041309999999999996, "loss": 0.9039, "step": 16540 }, { "epoch": 0.17, "learning_rate": 0.00041334999999999994, "loss": 0.8811, "step": 16550 }, { "epoch": 0.17, "learning_rate": 0.00041359999999999997, "loss": 0.8934, "step": 16560 }, { "epoch": 0.17, "learning_rate": 0.00041384999999999995, "loss": 0.8987, "step": 16570 }, { "epoch": 0.17, "learning_rate": 0.0004141, "loss": 0.8806, "step": 16580 }, { "epoch": 0.17, "learning_rate": 0.00041434999999999996, "loss": 0.8867, "step": 16590 }, { "epoch": 0.17, "learning_rate": 0.00041459999999999994, "loss": 0.8716, "step": 16600 }, { "epoch": 0.17, "learning_rate": 0.00041485, "loss": 0.8798, "step": 16610 }, { "epoch": 0.17, "learning_rate": 0.00041509999999999995, "loss": 0.8875, "step": 16620 }, { "epoch": 0.17, "learning_rate": 0.00041535, "loss": 0.8752, "step": 16630 }, { "epoch": 0.17, "learning_rate": 0.00041559999999999996, "loss": 0.8721, "step": 16640 }, { "epoch": 0.17, "learning_rate": 0.00041585, "loss": 0.8783, "step": 16650 }, { "epoch": 0.17, "learning_rate": 0.0004161, "loss": 0.8941, "step": 16660 }, { "epoch": 0.17, "learning_rate": 0.00041634999999999996, "loss": 0.9065, "step": 16670 }, { "epoch": 0.17, "learning_rate": 0.0004166, "loss": 0.8996, "step": 16680 }, { "epoch": 0.17, "learning_rate": 0.00041684999999999997, "loss": 0.9105, "step": 16690 }, { "epoch": 0.17, "learning_rate": 0.0004171, "loss": 0.9004, "step": 16700 }, { "epoch": 0.17, "learning_rate": 0.00041735, "loss": 0.8908, "step": 16710 }, { "epoch": 0.17, "learning_rate": 0.00041759999999999996, "loss": 0.8875, "step": 16720 }, { "epoch": 0.17, "learning_rate": 0.0004178249999999999, "loss": 0.8938, "step": 16730 }, { "epoch": 0.17, "learning_rate": 0.00041807499999999996, "loss": 0.8937, "step": 16740 }, { "epoch": 0.17, "learning_rate": 0.00041832499999999994, "loss": 0.9222, "step": 16750 }, { "epoch": 0.17, "learning_rate": 0.00041857499999999997, "loss": 0.905, "step": 16760 }, { "epoch": 0.17, "learning_rate": 0.00041882499999999995, "loss": 0.9112, "step": 16770 }, { "epoch": 0.17, "learning_rate": 0.00041907499999999993, "loss": 0.875, "step": 16780 }, { "epoch": 0.17, "learning_rate": 0.00041932499999999996, "loss": 0.9051, "step": 16790 }, { "epoch": 0.17, "learning_rate": 0.00041957499999999994, "loss": 0.8955, "step": 16800 }, { "epoch": 0.17, "learning_rate": 0.00041982499999999997, "loss": 0.8845, "step": 16810 }, { "epoch": 0.17, "learning_rate": 0.00042007499999999995, "loss": 0.8922, "step": 16820 }, { "epoch": 0.17, "learning_rate": 0.00042032499999999993, "loss": 0.8942, "step": 16830 }, { "epoch": 0.17, "learning_rate": 0.00042057499999999996, "loss": 0.8848, "step": 16840 }, { "epoch": 0.17, "learning_rate": 0.00042082499999999994, "loss": 0.8988, "step": 16850 }, { "epoch": 0.17, "learning_rate": 0.000421075, "loss": 0.9023, "step": 16860 }, { "epoch": 0.17, "learning_rate": 0.00042132499999999995, "loss": 0.8887, "step": 16870 }, { "epoch": 0.17, "learning_rate": 0.000421575, "loss": 0.8788, "step": 16880 }, { "epoch": 0.17, "learning_rate": 0.00042182499999999997, "loss": 0.9045, "step": 16890 }, { "epoch": 0.17, "learning_rate": 0.00042207499999999995, "loss": 0.8972, "step": 16900 }, { "epoch": 0.17, "learning_rate": 0.000422325, "loss": 0.8951, "step": 16910 }, { "epoch": 0.17, "learning_rate": 0.00042257499999999996, "loss": 0.9044, "step": 16920 }, { "epoch": 0.17, "learning_rate": 0.000422825, "loss": 0.919, "step": 16930 }, { "epoch": 0.17, "learning_rate": 0.00042307499999999997, "loss": 0.9027, "step": 16940 }, { "epoch": 0.17, "learning_rate": 0.00042332499999999995, "loss": 0.9085, "step": 16950 }, { "epoch": 0.17, "learning_rate": 0.000423575, "loss": 0.9036, "step": 16960 }, { "epoch": 0.17, "learning_rate": 0.00042382499999999996, "loss": 0.8956, "step": 16970 }, { "epoch": 0.17, "learning_rate": 0.000424075, "loss": 0.8858, "step": 16980 }, { "epoch": 0.17, "learning_rate": 0.000424325, "loss": 0.8867, "step": 16990 }, { "epoch": 0.17, "learning_rate": 0.00042457499999999995, "loss": 0.8947, "step": 17000 }, { "epoch": 0.17, "learning_rate": 0.000424825, "loss": 0.8837, "step": 17010 }, { "epoch": 0.17, "learning_rate": 0.00042507499999999996, "loss": 0.8845, "step": 17020 }, { "epoch": 0.17, "learning_rate": 0.000425325, "loss": 0.9002, "step": 17030 }, { "epoch": 0.17, "learning_rate": 0.000425575, "loss": 0.903, "step": 17040 }, { "epoch": 0.17, "learning_rate": 0.000425825, "loss": 0.9118, "step": 17050 }, { "epoch": 0.17, "learning_rate": 0.000426075, "loss": 0.8913, "step": 17060 }, { "epoch": 0.17, "learning_rate": 0.0004263249999999999, "loss": 0.8945, "step": 17070 }, { "epoch": 0.17, "learning_rate": 0.000426575, "loss": 0.886, "step": 17080 }, { "epoch": 0.17, "learning_rate": 0.0004268249999999999, "loss": 0.8845, "step": 17090 }, { "epoch": 0.17, "learning_rate": 0.000427075, "loss": 0.882, "step": 17100 }, { "epoch": 0.17, "learning_rate": 0.00042732499999999994, "loss": 0.901, "step": 17110 }, { "epoch": 0.17, "learning_rate": 0.0004275749999999999, "loss": 0.8849, "step": 17120 }, { "epoch": 0.17, "learning_rate": 0.00042782499999999995, "loss": 0.8931, "step": 17130 }, { "epoch": 0.17, "learning_rate": 0.00042807499999999993, "loss": 0.886, "step": 17140 }, { "epoch": 0.17, "learning_rate": 0.00042832499999999996, "loss": 0.8738, "step": 17150 }, { "epoch": 0.17, "learning_rate": 0.00042857499999999994, "loss": 0.8694, "step": 17160 }, { "epoch": 0.17, "learning_rate": 0.0004288249999999999, "loss": 0.8888, "step": 17170 }, { "epoch": 0.17, "learning_rate": 0.00042907499999999995, "loss": 0.8771, "step": 17180 }, { "epoch": 0.17, "learning_rate": 0.00042932499999999993, "loss": 0.8846, "step": 17190 }, { "epoch": 0.17, "learning_rate": 0.00042957499999999997, "loss": 0.8768, "step": 17200 }, { "epoch": 0.17, "learning_rate": 0.00042982499999999994, "loss": 0.8796, "step": 17210 }, { "epoch": 0.17, "learning_rate": 0.000430075, "loss": 0.8849, "step": 17220 }, { "epoch": 0.17, "learning_rate": 0.00043032499999999996, "loss": 0.9074, "step": 17230 }, { "epoch": 0.17, "learning_rate": 0.00043057499999999994, "loss": 0.8949, "step": 17240 }, { "epoch": 0.17, "learning_rate": 0.00043082499999999997, "loss": 0.8993, "step": 17250 }, { "epoch": 0.17, "learning_rate": 0.00043107499999999995, "loss": 0.9037, "step": 17260 }, { "epoch": 0.17, "learning_rate": 0.000431325, "loss": 0.888, "step": 17270 }, { "epoch": 0.17, "learning_rate": 0.00043157499999999996, "loss": 0.8889, "step": 17280 }, { "epoch": 0.17, "learning_rate": 0.00043182499999999994, "loss": 0.8938, "step": 17290 }, { "epoch": 0.17, "learning_rate": 0.00043207499999999997, "loss": 0.8967, "step": 17300 }, { "epoch": 0.17, "learning_rate": 0.00043232499999999995, "loss": 0.8824, "step": 17310 }, { "epoch": 0.17, "learning_rate": 0.000432575, "loss": 0.8823, "step": 17320 }, { "epoch": 0.17, "learning_rate": 0.00043282499999999996, "loss": 0.9003, "step": 17330 }, { "epoch": 0.17, "learning_rate": 0.000433075, "loss": 0.9115, "step": 17340 }, { "epoch": 0.17, "learning_rate": 0.000433325, "loss": 0.8907, "step": 17350 }, { "epoch": 0.17, "learning_rate": 0.00043357499999999995, "loss": 0.9, "step": 17360 }, { "epoch": 0.17, "learning_rate": 0.000433825, "loss": 0.8947, "step": 17370 }, { "epoch": 0.17, "learning_rate": 0.00043407499999999997, "loss": 0.8867, "step": 17380 }, { "epoch": 0.17, "learning_rate": 0.000434325, "loss": 0.8783, "step": 17390 }, { "epoch": 0.17, "learning_rate": 0.000434575, "loss": 0.8778, "step": 17400 }, { "epoch": 0.17, "learning_rate": 0.00043482499999999996, "loss": 0.8951, "step": 17410 }, { "epoch": 0.17, "learning_rate": 0.000435075, "loss": 0.8697, "step": 17420 }, { "epoch": 0.17, "learning_rate": 0.00043532499999999997, "loss": 0.887, "step": 17430 }, { "epoch": 0.17, "learning_rate": 0.000435575, "loss": 0.8831, "step": 17440 }, { "epoch": 0.17, "learning_rate": 0.000435825, "loss": 0.8838, "step": 17450 }, { "epoch": 0.17, "learning_rate": 0.0004360749999999999, "loss": 0.8913, "step": 17460 }, { "epoch": 0.17, "learning_rate": 0.000436325, "loss": 0.8844, "step": 17470 }, { "epoch": 0.17, "learning_rate": 0.0004365749999999999, "loss": 0.8904, "step": 17480 }, { "epoch": 0.17, "learning_rate": 0.000436825, "loss": 0.893, "step": 17490 }, { "epoch": 0.17, "learning_rate": 0.00043707499999999993, "loss": 0.8983, "step": 17500 }, { "epoch": 0.17, "eval_accuracy": 0.8182743179218973, "eval_loss": 0.91259765625, "eval_runtime": 97.2312, "eval_samples_per_second": 822.781, "eval_steps_per_second": 1.615, "step": 17500 }, { "epoch": 0.18, "learning_rate": 0.000437325, "loss": 0.8869, "step": 17510 }, { "epoch": 0.18, "learning_rate": 0.00043757499999999994, "loss": 0.892, "step": 17520 }, { "epoch": 0.18, "learning_rate": 0.0004378249999999999, "loss": 0.8886, "step": 17530 }, { "epoch": 0.18, "learning_rate": 0.00043807499999999996, "loss": 0.8904, "step": 17540 }, { "epoch": 0.18, "learning_rate": 0.00043832499999999993, "loss": 0.8751, "step": 17550 }, { "epoch": 0.18, "learning_rate": 0.00043857499999999997, "loss": 0.8874, "step": 17560 }, { "epoch": 0.18, "learning_rate": 0.00043882499999999995, "loss": 0.8813, "step": 17570 }, { "epoch": 0.18, "learning_rate": 0.0004390749999999999, "loss": 0.8831, "step": 17580 }, { "epoch": 0.18, "learning_rate": 0.00043932499999999996, "loss": 0.8802, "step": 17590 }, { "epoch": 0.18, "learning_rate": 0.00043957499999999994, "loss": 0.8797, "step": 17600 }, { "epoch": 0.18, "learning_rate": 0.00043982499999999997, "loss": 0.8671, "step": 17610 }, { "epoch": 0.18, "learning_rate": 0.00044007499999999995, "loss": 0.8755, "step": 17620 }, { "epoch": 0.18, "learning_rate": 0.000440325, "loss": 0.8715, "step": 17630 }, { "epoch": 0.18, "learning_rate": 0.00044057499999999996, "loss": 0.8605, "step": 17640 }, { "epoch": 0.18, "learning_rate": 0.00044082499999999994, "loss": 0.859, "step": 17650 }, { "epoch": 0.18, "learning_rate": 0.000441075, "loss": 0.8716, "step": 17660 }, { "epoch": 0.18, "learning_rate": 0.00044132499999999995, "loss": 0.8661, "step": 17670 }, { "epoch": 0.18, "learning_rate": 0.000441575, "loss": 0.8838, "step": 17680 }, { "epoch": 0.18, "learning_rate": 0.00044182499999999997, "loss": 0.8765, "step": 17690 }, { "epoch": 0.18, "learning_rate": 0.00044207499999999994, "loss": 0.866, "step": 17700 }, { "epoch": 0.18, "learning_rate": 0.000442325, "loss": 0.8841, "step": 17710 }, { "epoch": 0.18, "learning_rate": 0.00044257499999999996, "loss": 0.8905, "step": 17720 }, { "epoch": 0.18, "learning_rate": 0.0004428, "loss": 0.8865, "step": 17730 }, { "epoch": 0.18, "learning_rate": 0.00044305, "loss": 0.8793, "step": 17740 }, { "epoch": 0.18, "learning_rate": 0.0004433, "loss": 0.8794, "step": 17750 }, { "epoch": 0.18, "learning_rate": 0.0004435499999999999, "loss": 0.875, "step": 17760 }, { "epoch": 0.18, "learning_rate": 0.0004438, "loss": 0.8675, "step": 17770 }, { "epoch": 0.18, "learning_rate": 0.0004440499999999999, "loss": 0.8694, "step": 17780 }, { "epoch": 0.18, "learning_rate": 0.0004443, "loss": 0.8795, "step": 17790 }, { "epoch": 0.18, "learning_rate": 0.00044454999999999994, "loss": 0.8764, "step": 17800 }, { "epoch": 0.18, "learning_rate": 0.0004447999999999999, "loss": 0.8657, "step": 17810 }, { "epoch": 0.18, "learning_rate": 0.00044504999999999995, "loss": 0.8679, "step": 17820 }, { "epoch": 0.18, "learning_rate": 0.00044529999999999993, "loss": 0.8732, "step": 17830 }, { "epoch": 0.18, "learning_rate": 0.00044554999999999996, "loss": 0.8791, "step": 17840 }, { "epoch": 0.18, "learning_rate": 0.00044579999999999994, "loss": 0.8636, "step": 17850 }, { "epoch": 0.18, "learning_rate": 0.0004460499999999999, "loss": 0.882, "step": 17860 }, { "epoch": 0.18, "learning_rate": 0.00044629999999999995, "loss": 0.8777, "step": 17870 }, { "epoch": 0.18, "learning_rate": 0.00044654999999999993, "loss": 0.8735, "step": 17880 }, { "epoch": 0.18, "learning_rate": 0.00044679999999999996, "loss": 0.8776, "step": 17890 }, { "epoch": 0.18, "learning_rate": 0.00044704999999999994, "loss": 0.891, "step": 17900 }, { "epoch": 0.18, "learning_rate": 0.0004473, "loss": 0.9082, "step": 17910 }, { "epoch": 0.18, "learning_rate": 0.00044754999999999996, "loss": 0.9046, "step": 17920 }, { "epoch": 0.18, "learning_rate": 0.00044779999999999993, "loss": 0.8918, "step": 17930 }, { "epoch": 0.18, "learning_rate": 0.00044804999999999997, "loss": 0.8849, "step": 17940 }, { "epoch": 0.18, "learning_rate": 0.00044829999999999995, "loss": 0.8936, "step": 17950 }, { "epoch": 0.18, "learning_rate": 0.00044855, "loss": 0.8865, "step": 17960 }, { "epoch": 0.18, "learning_rate": 0.00044879999999999996, "loss": 0.8825, "step": 17970 }, { "epoch": 0.18, "learning_rate": 0.00044904999999999994, "loss": 0.8978, "step": 17980 }, { "epoch": 0.18, "learning_rate": 0.00044929999999999997, "loss": 0.8707, "step": 17990 }, { "epoch": 0.18, "learning_rate": 0.00044954999999999995, "loss": 0.8807, "step": 18000 }, { "epoch": 0.18, "learning_rate": 0.0004498, "loss": 0.8942, "step": 18010 }, { "epoch": 0.18, "learning_rate": 0.00045004999999999996, "loss": 0.8926, "step": 18020 }, { "epoch": 0.18, "learning_rate": 0.00045029999999999994, "loss": 0.8825, "step": 18030 }, { "epoch": 0.18, "learning_rate": 0.00045055, "loss": 0.8993, "step": 18040 }, { "epoch": 0.18, "learning_rate": 0.00045079999999999995, "loss": 0.896, "step": 18050 }, { "epoch": 0.18, "learning_rate": 0.00045105, "loss": 0.8884, "step": 18060 }, { "epoch": 0.18, "learning_rate": 0.00045129999999999997, "loss": 0.8755, "step": 18070 }, { "epoch": 0.18, "learning_rate": 0.00045155, "loss": 0.8749, "step": 18080 }, { "epoch": 0.18, "learning_rate": 0.0004518, "loss": 0.9011, "step": 18090 }, { "epoch": 0.18, "learning_rate": 0.00045204999999999996, "loss": 0.8811, "step": 18100 }, { "epoch": 0.18, "learning_rate": 0.0004523, "loss": 0.8883, "step": 18110 }, { "epoch": 0.18, "learning_rate": 0.00045254999999999997, "loss": 0.8898, "step": 18120 }, { "epoch": 0.18, "learning_rate": 0.0004528, "loss": 0.8912, "step": 18130 }, { "epoch": 0.18, "learning_rate": 0.00045305, "loss": 0.8892, "step": 18140 }, { "epoch": 0.18, "learning_rate": 0.0004532999999999999, "loss": 0.8652, "step": 18150 }, { "epoch": 0.18, "learning_rate": 0.00045355, "loss": 0.8985, "step": 18160 }, { "epoch": 0.18, "learning_rate": 0.0004537999999999999, "loss": 0.8885, "step": 18170 }, { "epoch": 0.18, "learning_rate": 0.00045405, "loss": 0.8844, "step": 18180 }, { "epoch": 0.18, "learning_rate": 0.00045429999999999993, "loss": 0.8754, "step": 18190 }, { "epoch": 0.18, "learning_rate": 0.00045455, "loss": 0.8633, "step": 18200 }, { "epoch": 0.18, "learning_rate": 0.00045479999999999994, "loss": 0.8808, "step": 18210 }, { "epoch": 0.18, "learning_rate": 0.0004550499999999999, "loss": 0.8859, "step": 18220 }, { "epoch": 0.18, "learning_rate": 0.00045529999999999995, "loss": 0.8836, "step": 18230 }, { "epoch": 0.18, "learning_rate": 0.00045554999999999993, "loss": 0.885, "step": 18240 }, { "epoch": 0.18, "learning_rate": 0.00045579999999999997, "loss": 0.8675, "step": 18250 }, { "epoch": 0.18, "learning_rate": 0.00045604999999999995, "loss": 0.8838, "step": 18260 }, { "epoch": 0.18, "learning_rate": 0.0004562999999999999, "loss": 0.8973, "step": 18270 }, { "epoch": 0.18, "learning_rate": 0.00045654999999999996, "loss": 0.8845, "step": 18280 }, { "epoch": 0.18, "learning_rate": 0.00045679999999999994, "loss": 0.8809, "step": 18290 }, { "epoch": 0.18, "learning_rate": 0.00045704999999999997, "loss": 0.8767, "step": 18300 }, { "epoch": 0.18, "learning_rate": 0.00045729999999999995, "loss": 0.8878, "step": 18310 }, { "epoch": 0.18, "learning_rate": 0.00045754999999999993, "loss": 0.8706, "step": 18320 }, { "epoch": 0.18, "learning_rate": 0.00045779999999999996, "loss": 0.884, "step": 18330 }, { "epoch": 0.18, "learning_rate": 0.00045804999999999994, "loss": 0.8678, "step": 18340 }, { "epoch": 0.18, "learning_rate": 0.00045829999999999997, "loss": 0.8785, "step": 18350 }, { "epoch": 0.18, "learning_rate": 0.00045854999999999995, "loss": 0.8933, "step": 18360 }, { "epoch": 0.18, "learning_rate": 0.0004588, "loss": 0.8826, "step": 18370 }, { "epoch": 0.18, "learning_rate": 0.00045904999999999996, "loss": 0.8625, "step": 18380 }, { "epoch": 0.18, "learning_rate": 0.00045929999999999994, "loss": 0.8767, "step": 18390 }, { "epoch": 0.18, "learning_rate": 0.00045955, "loss": 0.8611, "step": 18400 }, { "epoch": 0.18, "learning_rate": 0.00045979999999999995, "loss": 0.8732, "step": 18410 }, { "epoch": 0.18, "learning_rate": 0.00046005, "loss": 0.8704, "step": 18420 }, { "epoch": 0.18, "learning_rate": 0.00046029999999999997, "loss": 0.8753, "step": 18430 }, { "epoch": 0.18, "learning_rate": 0.00046054999999999995, "loss": 0.8651, "step": 18440 }, { "epoch": 0.18, "learning_rate": 0.0004608, "loss": 0.8572, "step": 18450 }, { "epoch": 0.18, "learning_rate": 0.00046104999999999996, "loss": 0.85, "step": 18460 }, { "epoch": 0.18, "learning_rate": 0.0004613, "loss": 0.8867, "step": 18470 }, { "epoch": 0.18, "learning_rate": 0.00046154999999999997, "loss": 0.8704, "step": 18480 }, { "epoch": 0.18, "learning_rate": 0.0004618, "loss": 0.8707, "step": 18490 }, { "epoch": 0.18, "learning_rate": 0.00046205, "loss": 0.8768, "step": 18500 }, { "epoch": 0.19, "learning_rate": 0.00046229999999999996, "loss": 0.8776, "step": 18510 }, { "epoch": 0.19, "learning_rate": 0.00046255, "loss": 0.8515, "step": 18520 }, { "epoch": 0.19, "learning_rate": 0.0004628, "loss": 0.8539, "step": 18530 }, { "epoch": 0.19, "learning_rate": 0.00046305, "loss": 0.8578, "step": 18540 }, { "epoch": 0.19, "learning_rate": 0.0004633, "loss": 0.8909, "step": 18550 }, { "epoch": 0.19, "learning_rate": 0.0004635499999999999, "loss": 0.8641, "step": 18560 }, { "epoch": 0.19, "learning_rate": 0.0004638, "loss": 0.8626, "step": 18570 }, { "epoch": 0.19, "learning_rate": 0.0004640499999999999, "loss": 0.8804, "step": 18580 }, { "epoch": 0.19, "learning_rate": 0.0004643, "loss": 0.8824, "step": 18590 }, { "epoch": 0.19, "learning_rate": 0.00046454999999999993, "loss": 0.88, "step": 18600 }, { "epoch": 0.19, "learning_rate": 0.0004647999999999999, "loss": 0.8648, "step": 18610 }, { "epoch": 0.19, "learning_rate": 0.00046504999999999995, "loss": 0.8716, "step": 18620 }, { "epoch": 0.19, "learning_rate": 0.0004652999999999999, "loss": 0.871, "step": 18630 }, { "epoch": 0.19, "learning_rate": 0.00046554999999999996, "loss": 0.884, "step": 18640 }, { "epoch": 0.19, "learning_rate": 0.00046579999999999994, "loss": 0.8656, "step": 18650 }, { "epoch": 0.19, "learning_rate": 0.00046604999999999997, "loss": 0.8872, "step": 18660 }, { "epoch": 0.19, "learning_rate": 0.00046629999999999995, "loss": 0.8771, "step": 18670 }, { "epoch": 0.19, "learning_rate": 0.00046654999999999993, "loss": 0.8846, "step": 18680 }, { "epoch": 0.19, "learning_rate": 0.00046679999999999996, "loss": 0.8677, "step": 18690 }, { "epoch": 0.19, "learning_rate": 0.00046704999999999994, "loss": 0.8832, "step": 18700 }, { "epoch": 0.19, "learning_rate": 0.0004673, "loss": 0.8815, "step": 18710 }, { "epoch": 0.19, "learning_rate": 0.00046754999999999995, "loss": 0.8752, "step": 18720 }, { "epoch": 0.19, "learning_rate": 0.00046777499999999997, "loss": 0.8859, "step": 18730 }, { "epoch": 0.19, "learning_rate": 0.00046802499999999995, "loss": 0.8812, "step": 18740 }, { "epoch": 0.19, "learning_rate": 0.000468275, "loss": 0.8743, "step": 18750 }, { "epoch": 0.19, "learning_rate": 0.00046852499999999996, "loss": 0.8647, "step": 18760 }, { "epoch": 0.19, "learning_rate": 0.000468775, "loss": 0.8854, "step": 18770 }, { "epoch": 0.19, "learning_rate": 0.000469025, "loss": 0.8724, "step": 18780 }, { "epoch": 0.19, "learning_rate": 0.00046927499999999995, "loss": 0.8696, "step": 18790 }, { "epoch": 0.19, "learning_rate": 0.000469525, "loss": 0.8873, "step": 18800 }, { "epoch": 0.19, "learning_rate": 0.00046977499999999997, "loss": 0.8876, "step": 18810 }, { "epoch": 0.19, "learning_rate": 0.000470025, "loss": 0.874, "step": 18820 }, { "epoch": 0.19, "learning_rate": 0.000470275, "loss": 0.8747, "step": 18830 }, { "epoch": 0.19, "learning_rate": 0.0004705249999999999, "loss": 0.8735, "step": 18840 }, { "epoch": 0.19, "learning_rate": 0.000470775, "loss": 0.8863, "step": 18850 }, { "epoch": 0.19, "learning_rate": 0.0004710249999999999, "loss": 0.8821, "step": 18860 }, { "epoch": 0.19, "learning_rate": 0.000471275, "loss": 0.8747, "step": 18870 }, { "epoch": 0.19, "learning_rate": 0.00047152499999999993, "loss": 0.8709, "step": 18880 }, { "epoch": 0.19, "learning_rate": 0.000471775, "loss": 0.8837, "step": 18890 }, { "epoch": 0.19, "learning_rate": 0.00047202499999999994, "loss": 0.8673, "step": 18900 }, { "epoch": 0.19, "learning_rate": 0.0004722749999999999, "loss": 0.8712, "step": 18910 }, { "epoch": 0.19, "learning_rate": 0.00047252499999999995, "loss": 0.8723, "step": 18920 }, { "epoch": 0.19, "learning_rate": 0.00047277499999999993, "loss": 0.8574, "step": 18930 }, { "epoch": 0.19, "learning_rate": 0.00047302499999999996, "loss": 0.8789, "step": 18940 }, { "epoch": 0.19, "learning_rate": 0.00047327499999999994, "loss": 0.8872, "step": 18950 }, { "epoch": 0.19, "learning_rate": 0.0004735249999999999, "loss": 0.8802, "step": 18960 }, { "epoch": 0.19, "learning_rate": 0.00047377499999999996, "loss": 0.8921, "step": 18970 }, { "epoch": 0.19, "learning_rate": 0.00047402499999999993, "loss": 0.8751, "step": 18980 }, { "epoch": 0.19, "learning_rate": 0.00047427499999999997, "loss": 0.8665, "step": 18990 }, { "epoch": 0.19, "learning_rate": 0.00047452499999999995, "loss": 0.8721, "step": 19000 }, { "epoch": 0.19, "learning_rate": 0.0004747749999999999, "loss": 0.8839, "step": 19010 }, { "epoch": 0.19, "learning_rate": 0.00047502499999999996, "loss": 0.8676, "step": 19020 }, { "epoch": 0.19, "learning_rate": 0.00047527499999999994, "loss": 0.8704, "step": 19030 }, { "epoch": 0.19, "learning_rate": 0.00047552499999999997, "loss": 0.8782, "step": 19040 }, { "epoch": 0.19, "learning_rate": 0.00047577499999999995, "loss": 0.8763, "step": 19050 }, { "epoch": 0.19, "learning_rate": 0.000476025, "loss": 0.8463, "step": 19060 }, { "epoch": 0.19, "learning_rate": 0.00047627499999999996, "loss": 0.8617, "step": 19070 }, { "epoch": 0.19, "learning_rate": 0.00047652499999999994, "loss": 0.8501, "step": 19080 }, { "epoch": 0.19, "learning_rate": 0.000476775, "loss": 0.8741, "step": 19090 }, { "epoch": 0.19, "learning_rate": 0.00047702499999999995, "loss": 0.876, "step": 19100 }, { "epoch": 0.19, "learning_rate": 0.000477275, "loss": 0.8645, "step": 19110 }, { "epoch": 0.19, "learning_rate": 0.00047752499999999997, "loss": 0.8666, "step": 19120 }, { "epoch": 0.19, "learning_rate": 0.00047777499999999994, "loss": 0.869, "step": 19130 }, { "epoch": 0.19, "learning_rate": 0.000478025, "loss": 0.8542, "step": 19140 }, { "epoch": 0.19, "learning_rate": 0.00047827499999999996, "loss": 0.8722, "step": 19150 }, { "epoch": 0.19, "learning_rate": 0.000478525, "loss": 0.8885, "step": 19160 }, { "epoch": 0.19, "learning_rate": 0.00047877499999999997, "loss": 0.876, "step": 19170 }, { "epoch": 0.19, "learning_rate": 0.00047902499999999995, "loss": 0.8765, "step": 19180 }, { "epoch": 0.19, "learning_rate": 0.000479275, "loss": 0.8642, "step": 19190 }, { "epoch": 0.19, "learning_rate": 0.00047952499999999996, "loss": 0.8772, "step": 19200 }, { "epoch": 0.19, "learning_rate": 0.000479775, "loss": 0.8533, "step": 19210 }, { "epoch": 0.19, "learning_rate": 0.00048002499999999997, "loss": 0.8791, "step": 19220 }, { "epoch": 0.19, "learning_rate": 0.000480275, "loss": 0.8693, "step": 19230 }, { "epoch": 0.19, "learning_rate": 0.000480525, "loss": 0.871, "step": 19240 }, { "epoch": 0.19, "learning_rate": 0.0004807749999999999, "loss": 0.8712, "step": 19250 }, { "epoch": 0.19, "learning_rate": 0.000481025, "loss": 0.8774, "step": 19260 }, { "epoch": 0.19, "learning_rate": 0.0004812749999999999, "loss": 0.8788, "step": 19270 }, { "epoch": 0.19, "learning_rate": 0.000481525, "loss": 0.873, "step": 19280 }, { "epoch": 0.19, "learning_rate": 0.00048177499999999993, "loss": 0.8738, "step": 19290 }, { "epoch": 0.19, "learning_rate": 0.0004820249999999999, "loss": 0.8689, "step": 19300 }, { "epoch": 0.19, "learning_rate": 0.00048227499999999995, "loss": 0.8711, "step": 19310 }, { "epoch": 0.19, "learning_rate": 0.0004825249999999999, "loss": 0.8706, "step": 19320 }, { "epoch": 0.19, "learning_rate": 0.00048277499999999996, "loss": 0.8562, "step": 19330 }, { "epoch": 0.19, "learning_rate": 0.00048302499999999994, "loss": 0.8691, "step": 19340 }, { "epoch": 0.19, "learning_rate": 0.00048327499999999997, "loss": 0.8735, "step": 19350 }, { "epoch": 0.19, "learning_rate": 0.00048352499999999995, "loss": 0.8635, "step": 19360 }, { "epoch": 0.19, "learning_rate": 0.00048377499999999993, "loss": 0.855, "step": 19370 }, { "epoch": 0.19, "learning_rate": 0.00048402499999999996, "loss": 0.8673, "step": 19380 }, { "epoch": 0.19, "learning_rate": 0.00048427499999999994, "loss": 0.8831, "step": 19390 }, { "epoch": 0.19, "learning_rate": 0.000484525, "loss": 0.8582, "step": 19400 }, { "epoch": 0.19, "learning_rate": 0.00048477499999999995, "loss": 0.8718, "step": 19410 }, { "epoch": 0.19, "learning_rate": 0.00048502499999999993, "loss": 0.8744, "step": 19420 }, { "epoch": 0.19, "learning_rate": 0.00048527499999999996, "loss": 0.8592, "step": 19430 }, { "epoch": 0.19, "learning_rate": 0.00048552499999999994, "loss": 0.8467, "step": 19440 }, { "epoch": 0.19, "learning_rate": 0.000485775, "loss": 0.8335, "step": 19450 }, { "epoch": 0.19, "learning_rate": 0.00048602499999999996, "loss": 0.8515, "step": 19460 }, { "epoch": 0.19, "learning_rate": 0.00048627499999999993, "loss": 0.8433, "step": 19470 }, { "epoch": 0.19, "learning_rate": 0.00048652499999999997, "loss": 0.85, "step": 19480 }, { "epoch": 0.19, "learning_rate": 0.00048677499999999995, "loss": 0.8501, "step": 19490 }, { "epoch": 0.2, "learning_rate": 0.000487025, "loss": 0.8527, "step": 19500 }, { "epoch": 0.2, "learning_rate": 0.00048727499999999996, "loss": 0.8431, "step": 19510 }, { "epoch": 0.2, "learning_rate": 0.000487525, "loss": 0.8531, "step": 19520 }, { "epoch": 0.2, "learning_rate": 0.00048777499999999997, "loss": 0.8515, "step": 19530 }, { "epoch": 0.2, "learning_rate": 0.00048802499999999995, "loss": 0.8508, "step": 19540 }, { "epoch": 0.2, "learning_rate": 0.000488275, "loss": 0.846, "step": 19550 }, { "epoch": 0.2, "learning_rate": 0.000488525, "loss": 0.8413, "step": 19560 }, { "epoch": 0.2, "learning_rate": 0.000488775, "loss": 0.8532, "step": 19570 }, { "epoch": 0.2, "learning_rate": 0.0004890249999999999, "loss": 0.8424, "step": 19580 }, { "epoch": 0.2, "learning_rate": 0.000489275, "loss": 0.8488, "step": 19590 }, { "epoch": 0.2, "learning_rate": 0.000489525, "loss": 0.8475, "step": 19600 }, { "epoch": 0.2, "learning_rate": 0.0004897749999999999, "loss": 0.8432, "step": 19610 }, { "epoch": 0.2, "learning_rate": 0.0004900249999999999, "loss": 0.8494, "step": 19620 }, { "epoch": 0.2, "learning_rate": 0.000490275, "loss": 0.849, "step": 19630 }, { "epoch": 0.2, "learning_rate": 0.000490525, "loss": 0.8531, "step": 19640 }, { "epoch": 0.2, "learning_rate": 0.0004907749999999999, "loss": 0.8527, "step": 19650 }, { "epoch": 0.2, "learning_rate": 0.000491025, "loss": 0.8762, "step": 19660 }, { "epoch": 0.2, "learning_rate": 0.000491275, "loss": 0.867, "step": 19670 }, { "epoch": 0.2, "learning_rate": 0.0004915249999999999, "loss": 0.8551, "step": 19680 }, { "epoch": 0.2, "learning_rate": 0.000491775, "loss": 0.854, "step": 19690 }, { "epoch": 0.2, "learning_rate": 0.000492025, "loss": 0.868, "step": 19700 }, { "epoch": 0.2, "learning_rate": 0.0004922749999999999, "loss": 0.8572, "step": 19710 }, { "epoch": 0.2, "learning_rate": 0.000492525, "loss": 0.863, "step": 19720 }, { "epoch": 0.2, "learning_rate": 0.000492775, "loss": 0.8684, "step": 19730 }, { "epoch": 0.2, "learning_rate": 0.000493, "loss": 0.8623, "step": 19740 }, { "epoch": 0.2, "learning_rate": 0.00049325, "loss": 0.8543, "step": 19750 }, { "epoch": 0.2, "learning_rate": 0.0004935, "loss": 0.8525, "step": 19760 }, { "epoch": 0.2, "learning_rate": 0.0004937499999999999, "loss": 0.8636, "step": 19770 }, { "epoch": 0.2, "learning_rate": 0.000494, "loss": 0.8801, "step": 19780 }, { "epoch": 0.2, "learning_rate": 0.0004942499999999999, "loss": 0.8639, "step": 19790 }, { "epoch": 0.2, "learning_rate": 0.0004944999999999999, "loss": 0.8695, "step": 19800 }, { "epoch": 0.2, "learning_rate": 0.00049475, "loss": 0.8659, "step": 19810 }, { "epoch": 0.2, "learning_rate": 0.0004949999999999999, "loss": 0.864, "step": 19820 }, { "epoch": 0.2, "learning_rate": 0.0004952499999999999, "loss": 0.8424, "step": 19830 }, { "epoch": 0.2, "learning_rate": 0.0004955, "loss": 0.8659, "step": 19840 }, { "epoch": 0.2, "learning_rate": 0.00049575, "loss": 0.8535, "step": 19850 }, { "epoch": 0.2, "learning_rate": 0.0004959999999999999, "loss": 0.871, "step": 19860 }, { "epoch": 0.2, "learning_rate": 0.00049625, "loss": 0.8599, "step": 19870 }, { "epoch": 0.2, "learning_rate": 0.0004965, "loss": 0.849, "step": 19880 }, { "epoch": 0.2, "learning_rate": 0.0004967499999999999, "loss": 0.8578, "step": 19890 }, { "epoch": 0.2, "learning_rate": 0.0004969999999999999, "loss": 0.8619, "step": 19900 }, { "epoch": 0.2, "learning_rate": 0.00049725, "loss": 0.8659, "step": 19910 }, { "epoch": 0.2, "learning_rate": 0.0004975, "loss": 0.866, "step": 19920 }, { "epoch": 0.2, "learning_rate": 0.0004977499999999999, "loss": 0.8616, "step": 19930 }, { "epoch": 0.2, "learning_rate": 0.000498, "loss": 0.8478, "step": 19940 }, { "epoch": 0.2, "learning_rate": 0.00049825, "loss": 0.8596, "step": 19950 }, { "epoch": 0.2, "learning_rate": 0.0004984999999999999, "loss": 0.862, "step": 19960 }, { "epoch": 0.2, "learning_rate": 0.00049875, "loss": 0.8678, "step": 19970 }, { "epoch": 0.2, "learning_rate": 0.000499, "loss": 0.879, "step": 19980 }, { "epoch": 0.2, "learning_rate": 0.0004992499999999999, "loss": 0.8744, "step": 19990 }, { "epoch": 0.2, "learning_rate": 0.0004994999999999999, "loss": 0.8751, "step": 20000 }, { "epoch": 0.2, "eval_accuracy": 0.8198587930184829, "eval_loss": 0.89892578125, "eval_runtime": 96.5681, "eval_samples_per_second": 828.431, "eval_steps_per_second": 1.626, "step": 20000 }, { "epoch": 0.2, "learning_rate": 0.00049975, "loss": 0.8638, "step": 20010 }, { "epoch": 0.2, "learning_rate": 0.0005, "loss": 0.8699, "step": 20020 }, { "epoch": 0.2, "learning_rate": 0.0005002499999999999, "loss": 0.8509, "step": 20030 }, { "epoch": 0.2, "learning_rate": 0.0005005, "loss": 0.853, "step": 20040 }, { "epoch": 0.2, "learning_rate": 0.00050075, "loss": 0.8575, "step": 20050 }, { "epoch": 0.2, "learning_rate": 0.0005009999999999999, "loss": 0.8483, "step": 20060 }, { "epoch": 0.2, "learning_rate": 0.00050125, "loss": 0.8648, "step": 20070 }, { "epoch": 0.2, "learning_rate": 0.0005015, "loss": 0.8515, "step": 20080 }, { "epoch": 0.2, "learning_rate": 0.00050175, "loss": 0.8699, "step": 20090 }, { "epoch": 0.2, "learning_rate": 0.000502, "loss": 0.8394, "step": 20100 }, { "epoch": 0.2, "learning_rate": 0.00050225, "loss": 0.8705, "step": 20110 }, { "epoch": 0.2, "learning_rate": 0.0005025, "loss": 0.8578, "step": 20120 }, { "epoch": 0.2, "learning_rate": 0.0005027499999999999, "loss": 0.8525, "step": 20130 }, { "epoch": 0.2, "learning_rate": 0.000503, "loss": 0.8574, "step": 20140 }, { "epoch": 0.2, "learning_rate": 0.0005032499999999999, "loss": 0.8574, "step": 20150 }, { "epoch": 0.2, "learning_rate": 0.0005034999999999999, "loss": 0.8503, "step": 20160 }, { "epoch": 0.2, "learning_rate": 0.00050375, "loss": 0.859, "step": 20170 }, { "epoch": 0.2, "learning_rate": 0.0005039999999999999, "loss": 0.8582, "step": 20180 }, { "epoch": 0.2, "learning_rate": 0.0005042499999999999, "loss": 0.8535, "step": 20190 }, { "epoch": 0.2, "learning_rate": 0.0005045, "loss": 0.8333, "step": 20200 }, { "epoch": 0.2, "learning_rate": 0.00050475, "loss": 0.8377, "step": 20210 }, { "epoch": 0.2, "learning_rate": 0.0005049999999999999, "loss": 0.8509, "step": 20220 }, { "epoch": 0.2, "learning_rate": 0.00050525, "loss": 0.8661, "step": 20230 }, { "epoch": 0.2, "learning_rate": 0.0005055, "loss": 0.8696, "step": 20240 }, { "epoch": 0.2, "learning_rate": 0.0005057499999999999, "loss": 0.8537, "step": 20250 }, { "epoch": 0.2, "learning_rate": 0.0005059999999999999, "loss": 0.8625, "step": 20260 }, { "epoch": 0.2, "learning_rate": 0.00050625, "loss": 0.859, "step": 20270 }, { "epoch": 0.2, "learning_rate": 0.0005064999999999999, "loss": 0.8617, "step": 20280 }, { "epoch": 0.2, "learning_rate": 0.0005067499999999999, "loss": 0.8424, "step": 20290 }, { "epoch": 0.2, "learning_rate": 0.000507, "loss": 0.8521, "step": 20300 }, { "epoch": 0.2, "learning_rate": 0.00050725, "loss": 0.8474, "step": 20310 }, { "epoch": 0.2, "learning_rate": 0.0005074999999999999, "loss": 0.8544, "step": 20320 }, { "epoch": 0.2, "learning_rate": 0.00050775, "loss": 0.8454, "step": 20330 }, { "epoch": 0.2, "learning_rate": 0.000508, "loss": 0.8555, "step": 20340 }, { "epoch": 0.2, "learning_rate": 0.0005082499999999999, "loss": 0.8385, "step": 20350 }, { "epoch": 0.2, "learning_rate": 0.0005085, "loss": 0.8507, "step": 20360 }, { "epoch": 0.2, "learning_rate": 0.00050875, "loss": 0.8535, "step": 20370 }, { "epoch": 0.2, "learning_rate": 0.000509, "loss": 0.8636, "step": 20380 }, { "epoch": 0.2, "learning_rate": 0.0005092499999999999, "loss": 0.8672, "step": 20390 }, { "epoch": 0.2, "learning_rate": 0.0005095, "loss": 0.8736, "step": 20400 }, { "epoch": 0.2, "learning_rate": 0.00050975, "loss": 0.8626, "step": 20410 }, { "epoch": 0.2, "learning_rate": 0.0005099999999999999, "loss": 0.8581, "step": 20420 }, { "epoch": 0.2, "learning_rate": 0.00051025, "loss": 0.8369, "step": 20430 }, { "epoch": 0.2, "learning_rate": 0.0005105, "loss": 0.8421, "step": 20440 }, { "epoch": 0.2, "learning_rate": 0.0005107499999999999, "loss": 0.8572, "step": 20450 }, { "epoch": 0.2, "learning_rate": 0.000511, "loss": 0.8611, "step": 20460 }, { "epoch": 0.2, "learning_rate": 0.00051125, "loss": 0.849, "step": 20470 }, { "epoch": 0.2, "learning_rate": 0.0005115, "loss": 0.8553, "step": 20480 }, { "epoch": 0.2, "learning_rate": 0.0005117499999999999, "loss": 0.8542, "step": 20490 }, { "epoch": 0.2, "learning_rate": 0.000512, "loss": 0.851, "step": 20500 }, { "epoch": 0.21, "learning_rate": 0.00051225, "loss": 0.8633, "step": 20510 }, { "epoch": 0.21, "learning_rate": 0.0005124999999999999, "loss": 0.8568, "step": 20520 }, { "epoch": 0.21, "learning_rate": 0.00051275, "loss": 0.8648, "step": 20530 }, { "epoch": 0.21, "learning_rate": 0.0005129999999999999, "loss": 0.8609, "step": 20540 }, { "epoch": 0.21, "learning_rate": 0.00051325, "loss": 0.872, "step": 20550 }, { "epoch": 0.21, "learning_rate": 0.0005135, "loss": 0.8543, "step": 20560 }, { "epoch": 0.21, "learning_rate": 0.0005137499999999999, "loss": 0.8456, "step": 20570 }, { "epoch": 0.21, "learning_rate": 0.0005139999999999999, "loss": 0.8625, "step": 20580 }, { "epoch": 0.21, "learning_rate": 0.00051425, "loss": 0.863, "step": 20590 }, { "epoch": 0.21, "learning_rate": 0.0005145, "loss": 0.8661, "step": 20600 }, { "epoch": 0.21, "learning_rate": 0.0005147499999999999, "loss": 0.853, "step": 20610 }, { "epoch": 0.21, "learning_rate": 0.0005149999999999999, "loss": 0.8355, "step": 20620 }, { "epoch": 0.21, "learning_rate": 0.00051525, "loss": 0.8516, "step": 20630 }, { "epoch": 0.21, "learning_rate": 0.0005154999999999999, "loss": 0.8582, "step": 20640 }, { "epoch": 0.21, "learning_rate": 0.0005157499999999999, "loss": 0.8616, "step": 20650 }, { "epoch": 0.21, "learning_rate": 0.000516, "loss": 0.872, "step": 20660 }, { "epoch": 0.21, "learning_rate": 0.00051625, "loss": 0.8662, "step": 20670 }, { "epoch": 0.21, "learning_rate": 0.0005164999999999999, "loss": 0.8629, "step": 20680 }, { "epoch": 0.21, "learning_rate": 0.00051675, "loss": 0.8667, "step": 20690 }, { "epoch": 0.21, "learning_rate": 0.000517, "loss": 0.8714, "step": 20700 }, { "epoch": 0.21, "learning_rate": 0.0005172499999999999, "loss": 0.8544, "step": 20710 }, { "epoch": 0.21, "learning_rate": 0.0005175, "loss": 0.858, "step": 20720 }, { "epoch": 0.21, "learning_rate": 0.00051775, "loss": 0.8394, "step": 20730 }, { "epoch": 0.21, "learning_rate": 0.000517975, "loss": 0.8487, "step": 20740 }, { "epoch": 0.21, "learning_rate": 0.000518225, "loss": 0.8566, "step": 20750 }, { "epoch": 0.21, "learning_rate": 0.000518475, "loss": 0.8522, "step": 20760 }, { "epoch": 0.21, "learning_rate": 0.0005187249999999999, "loss": 0.8654, "step": 20770 }, { "epoch": 0.21, "learning_rate": 0.000518975, "loss": 0.8526, "step": 20780 }, { "epoch": 0.21, "learning_rate": 0.000519225, "loss": 0.8596, "step": 20790 }, { "epoch": 0.21, "learning_rate": 0.0005194749999999999, "loss": 0.8721, "step": 20800 }, { "epoch": 0.21, "learning_rate": 0.000519725, "loss": 0.8496, "step": 20810 }, { "epoch": 0.21, "learning_rate": 0.000519975, "loss": 0.8524, "step": 20820 }, { "epoch": 0.21, "learning_rate": 0.000520225, "loss": 0.8407, "step": 20830 }, { "epoch": 0.21, "learning_rate": 0.000520475, "loss": 0.842, "step": 20840 }, { "epoch": 0.21, "learning_rate": 0.0005207249999999999, "loss": 0.8428, "step": 20850 }, { "epoch": 0.21, "learning_rate": 0.000520975, "loss": 0.8262, "step": 20860 }, { "epoch": 0.21, "learning_rate": 0.0005212249999999999, "loss": 0.8374, "step": 20870 }, { "epoch": 0.21, "learning_rate": 0.000521475, "loss": 0.8292, "step": 20880 }, { "epoch": 0.21, "learning_rate": 0.0005217249999999999, "loss": 0.8493, "step": 20890 }, { "epoch": 0.21, "learning_rate": 0.0005219749999999999, "loss": 0.8546, "step": 20900 }, { "epoch": 0.21, "learning_rate": 0.000522225, "loss": 0.8577, "step": 20910 }, { "epoch": 0.21, "learning_rate": 0.0005224749999999999, "loss": 0.8526, "step": 20920 }, { "epoch": 0.21, "learning_rate": 0.0005227249999999999, "loss": 0.8755, "step": 20930 }, { "epoch": 0.21, "learning_rate": 0.000522975, "loss": 0.8588, "step": 20940 }, { "epoch": 0.21, "learning_rate": 0.000523225, "loss": 0.8551, "step": 20950 }, { "epoch": 0.21, "learning_rate": 0.0005234749999999999, "loss": 0.8439, "step": 20960 }, { "epoch": 0.21, "learning_rate": 0.000523725, "loss": 0.8404, "step": 20970 }, { "epoch": 0.21, "learning_rate": 0.000523975, "loss": 0.8425, "step": 20980 }, { "epoch": 0.21, "learning_rate": 0.0005242249999999999, "loss": 0.8488, "step": 20990 }, { "epoch": 0.21, "learning_rate": 0.0005244749999999999, "loss": 0.8544, "step": 21000 }, { "epoch": 0.21, "learning_rate": 0.000524725, "loss": 0.8515, "step": 21010 }, { "epoch": 0.21, "learning_rate": 0.0005249749999999999, "loss": 0.8592, "step": 21020 }, { "epoch": 0.21, "learning_rate": 0.0005252249999999999, "loss": 0.8461, "step": 21030 }, { "epoch": 0.21, "learning_rate": 0.000525475, "loss": 0.8671, "step": 21040 }, { "epoch": 0.21, "learning_rate": 0.000525725, "loss": 0.848, "step": 21050 }, { "epoch": 0.21, "learning_rate": 0.0005259749999999999, "loss": 0.8458, "step": 21060 }, { "epoch": 0.21, "learning_rate": 0.000526225, "loss": 0.8492, "step": 21070 }, { "epoch": 0.21, "learning_rate": 0.000526475, "loss": 0.8559, "step": 21080 }, { "epoch": 0.21, "learning_rate": 0.0005267249999999999, "loss": 0.8533, "step": 21090 }, { "epoch": 0.21, "learning_rate": 0.000526975, "loss": 0.8399, "step": 21100 }, { "epoch": 0.21, "learning_rate": 0.000527225, "loss": 0.839, "step": 21110 }, { "epoch": 0.21, "learning_rate": 0.000527475, "loss": 0.8347, "step": 21120 }, { "epoch": 0.21, "learning_rate": 0.0005277249999999999, "loss": 0.8422, "step": 21130 }, { "epoch": 0.21, "learning_rate": 0.000527975, "loss": 0.8471, "step": 21140 }, { "epoch": 0.21, "learning_rate": 0.000528225, "loss": 0.8427, "step": 21150 }, { "epoch": 0.21, "learning_rate": 0.0005284749999999999, "loss": 0.8431, "step": 21160 }, { "epoch": 0.21, "learning_rate": 0.000528725, "loss": 0.8579, "step": 21170 }, { "epoch": 0.21, "learning_rate": 0.000528975, "loss": 0.8445, "step": 21180 }, { "epoch": 0.21, "learning_rate": 0.0005292249999999999, "loss": 0.8808, "step": 21190 }, { "epoch": 0.21, "learning_rate": 0.000529475, "loss": 0.8514, "step": 21200 }, { "epoch": 0.21, "learning_rate": 0.0005297249999999999, "loss": 0.8644, "step": 21210 }, { "epoch": 0.21, "learning_rate": 0.000529975, "loss": 0.8643, "step": 21220 }, { "epoch": 0.21, "learning_rate": 0.0005302249999999999, "loss": 0.8652, "step": 21230 }, { "epoch": 0.21, "learning_rate": 0.000530475, "loss": 0.8588, "step": 21240 }, { "epoch": 0.21, "learning_rate": 0.0005307249999999999, "loss": 0.8664, "step": 21250 }, { "epoch": 0.21, "learning_rate": 0.0005309749999999999, "loss": 0.8559, "step": 21260 }, { "epoch": 0.21, "learning_rate": 0.000531225, "loss": 0.8528, "step": 21270 }, { "epoch": 0.21, "learning_rate": 0.0005314749999999999, "loss": 0.8701, "step": 21280 }, { "epoch": 0.21, "learning_rate": 0.0005317249999999999, "loss": 0.8587, "step": 21290 }, { "epoch": 0.21, "learning_rate": 0.000531975, "loss": 0.8486, "step": 21300 }, { "epoch": 0.21, "learning_rate": 0.0005322249999999999, "loss": 0.834, "step": 21310 }, { "epoch": 0.21, "learning_rate": 0.0005324749999999999, "loss": 0.8394, "step": 21320 }, { "epoch": 0.21, "learning_rate": 0.000532725, "loss": 0.8362, "step": 21330 }, { "epoch": 0.21, "learning_rate": 0.000532975, "loss": 0.8399, "step": 21340 }, { "epoch": 0.21, "learning_rate": 0.0005332249999999999, "loss": 0.8561, "step": 21350 }, { "epoch": 0.21, "learning_rate": 0.0005334749999999999, "loss": 0.8599, "step": 21360 }, { "epoch": 0.21, "learning_rate": 0.000533725, "loss": 0.8555, "step": 21370 }, { "epoch": 0.21, "learning_rate": 0.0005339749999999999, "loss": 0.8606, "step": 21380 }, { "epoch": 0.21, "learning_rate": 0.0005342249999999999, "loss": 0.8653, "step": 21390 }, { "epoch": 0.21, "learning_rate": 0.000534475, "loss": 0.8586, "step": 21400 }, { "epoch": 0.21, "learning_rate": 0.000534725, "loss": 0.8409, "step": 21410 }, { "epoch": 0.21, "learning_rate": 0.0005349749999999999, "loss": 0.8412, "step": 21420 }, { "epoch": 0.21, "learning_rate": 0.000535225, "loss": 0.8464, "step": 21430 }, { "epoch": 0.21, "learning_rate": 0.000535475, "loss": 0.8684, "step": 21440 }, { "epoch": 0.21, "learning_rate": 0.0005357249999999999, "loss": 0.8533, "step": 21450 }, { "epoch": 0.21, "learning_rate": 0.000535975, "loss": 0.8652, "step": 21460 }, { "epoch": 0.21, "learning_rate": 0.000536225, "loss": 0.8394, "step": 21470 }, { "epoch": 0.21, "learning_rate": 0.0005364749999999999, "loss": 0.8395, "step": 21480 }, { "epoch": 0.21, "learning_rate": 0.0005367249999999999, "loss": 0.8405, "step": 21490 }, { "epoch": 0.21, "learning_rate": 0.000536975, "loss": 0.861, "step": 21500 }, { "epoch": 0.22, "learning_rate": 0.000537225, "loss": 0.8605, "step": 21510 }, { "epoch": 0.22, "learning_rate": 0.0005374749999999999, "loss": 0.8449, "step": 21520 }, { "epoch": 0.22, "learning_rate": 0.000537725, "loss": 0.8417, "step": 21530 }, { "epoch": 0.22, "learning_rate": 0.000537975, "loss": 0.842, "step": 21540 }, { "epoch": 0.22, "learning_rate": 0.0005382249999999999, "loss": 0.8467, "step": 21550 }, { "epoch": 0.22, "learning_rate": 0.000538475, "loss": 0.8455, "step": 21560 }, { "epoch": 0.22, "learning_rate": 0.000538725, "loss": 0.8414, "step": 21570 }, { "epoch": 0.22, "learning_rate": 0.000538975, "loss": 0.8568, "step": 21580 }, { "epoch": 0.22, "learning_rate": 0.000539225, "loss": 0.85, "step": 21590 }, { "epoch": 0.22, "learning_rate": 0.0005394749999999999, "loss": 0.8696, "step": 21600 }, { "epoch": 0.22, "learning_rate": 0.000539725, "loss": 0.8536, "step": 21610 }, { "epoch": 0.22, "learning_rate": 0.0005399749999999999, "loss": 0.8483, "step": 21620 }, { "epoch": 0.22, "learning_rate": 0.000540225, "loss": 0.8525, "step": 21630 }, { "epoch": 0.22, "learning_rate": 0.0005404749999999999, "loss": 0.8526, "step": 21640 }, { "epoch": 0.22, "learning_rate": 0.0005407249999999999, "loss": 0.8366, "step": 21650 }, { "epoch": 0.22, "learning_rate": 0.000540975, "loss": 0.8595, "step": 21660 }, { "epoch": 0.22, "learning_rate": 0.0005412249999999999, "loss": 0.84, "step": 21670 }, { "epoch": 0.22, "learning_rate": 0.0005414749999999999, "loss": 0.8594, "step": 21680 }, { "epoch": 0.22, "learning_rate": 0.000541725, "loss": 0.8491, "step": 21690 }, { "epoch": 0.22, "learning_rate": 0.000541975, "loss": 0.8567, "step": 21700 }, { "epoch": 0.22, "learning_rate": 0.0005422249999999999, "loss": 0.8567, "step": 21710 }, { "epoch": 0.22, "learning_rate": 0.0005424749999999999, "loss": 0.8429, "step": 21720 }, { "epoch": 0.22, "learning_rate": 0.000542725, "loss": 0.8601, "step": 21730 }, { "epoch": 0.22, "learning_rate": 0.0005429499999999999, "loss": 0.8466, "step": 21740 }, { "epoch": 0.22, "learning_rate": 0.0005432, "loss": 0.8704, "step": 21750 }, { "epoch": 0.22, "learning_rate": 0.00054345, "loss": 0.851, "step": 21760 }, { "epoch": 0.22, "learning_rate": 0.0005436999999999999, "loss": 0.8732, "step": 21770 }, { "epoch": 0.22, "learning_rate": 0.00054395, "loss": 0.8686, "step": 21780 }, { "epoch": 0.22, "learning_rate": 0.0005442, "loss": 0.852, "step": 21790 }, { "epoch": 0.22, "learning_rate": 0.0005444499999999999, "loss": 0.8505, "step": 21800 }, { "epoch": 0.22, "learning_rate": 0.0005447, "loss": 0.8207, "step": 21810 }, { "epoch": 0.22, "learning_rate": 0.00054495, "loss": 0.8405, "step": 21820 }, { "epoch": 0.22, "learning_rate": 0.0005451999999999999, "loss": 0.8289, "step": 21830 }, { "epoch": 0.22, "learning_rate": 0.00054545, "loss": 0.8277, "step": 21840 }, { "epoch": 0.22, "learning_rate": 0.0005457, "loss": 0.848, "step": 21850 }, { "epoch": 0.22, "learning_rate": 0.00054595, "loss": 0.8432, "step": 21860 }, { "epoch": 0.22, "learning_rate": 0.0005461999999999999, "loss": 0.8451, "step": 21870 }, { "epoch": 0.22, "learning_rate": 0.00054645, "loss": 0.8518, "step": 21880 }, { "epoch": 0.22, "learning_rate": 0.0005467, "loss": 0.8537, "step": 21890 }, { "epoch": 0.22, "learning_rate": 0.0005469499999999999, "loss": 0.8512, "step": 21900 }, { "epoch": 0.22, "learning_rate": 0.0005472, "loss": 0.8544, "step": 21910 }, { "epoch": 0.22, "learning_rate": 0.00054745, "loss": 0.8408, "step": 21920 }, { "epoch": 0.22, "learning_rate": 0.0005477, "loss": 0.8433, "step": 21930 }, { "epoch": 0.22, "learning_rate": 0.00054795, "loss": 0.8379, "step": 21940 }, { "epoch": 0.22, "learning_rate": 0.0005481999999999999, "loss": 0.8502, "step": 21950 }, { "epoch": 0.22, "learning_rate": 0.0005484499999999999, "loss": 0.8576, "step": 21960 }, { "epoch": 0.22, "learning_rate": 0.0005487, "loss": 0.8571, "step": 21970 }, { "epoch": 0.22, "learning_rate": 0.00054895, "loss": 0.8501, "step": 21980 }, { "epoch": 0.22, "learning_rate": 0.0005491999999999999, "loss": 0.8382, "step": 21990 }, { "epoch": 0.22, "learning_rate": 0.0005494499999999999, "loss": 0.8386, "step": 22000 }, { "epoch": 0.22, "learning_rate": 0.0005497, "loss": 0.8449, "step": 22010 }, { "epoch": 0.22, "learning_rate": 0.0005499499999999999, "loss": 0.855, "step": 22020 }, { "epoch": 0.22, "learning_rate": 0.0005501999999999999, "loss": 0.8483, "step": 22030 }, { "epoch": 0.22, "learning_rate": 0.00055045, "loss": 0.8802, "step": 22040 }, { "epoch": 0.22, "learning_rate": 0.0005506999999999999, "loss": 0.8487, "step": 22050 }, { "epoch": 0.22, "learning_rate": 0.0005509499999999999, "loss": 0.8665, "step": 22060 }, { "epoch": 0.22, "learning_rate": 0.0005512, "loss": 0.8622, "step": 22070 }, { "epoch": 0.22, "learning_rate": 0.00055145, "loss": 0.8407, "step": 22080 }, { "epoch": 0.22, "learning_rate": 0.0005516999999999999, "loss": 0.8573, "step": 22090 }, { "epoch": 0.22, "learning_rate": 0.0005519499999999999, "loss": 0.8576, "step": 22100 }, { "epoch": 0.22, "learning_rate": 0.0005522, "loss": 0.8431, "step": 22110 }, { "epoch": 0.22, "learning_rate": 0.0005524499999999999, "loss": 0.8333, "step": 22120 }, { "epoch": 0.22, "learning_rate": 0.0005526999999999999, "loss": 0.8526, "step": 22130 }, { "epoch": 0.22, "learning_rate": 0.00055295, "loss": 0.8583, "step": 22140 }, { "epoch": 0.22, "learning_rate": 0.0005532, "loss": 0.8526, "step": 22150 }, { "epoch": 0.22, "learning_rate": 0.0005534499999999999, "loss": 0.8519, "step": 22160 }, { "epoch": 0.22, "learning_rate": 0.0005537, "loss": 0.8788, "step": 22170 }, { "epoch": 0.22, "learning_rate": 0.00055395, "loss": 0.8524, "step": 22180 }, { "epoch": 0.22, "learning_rate": 0.0005541999999999999, "loss": 0.8541, "step": 22190 }, { "epoch": 0.22, "learning_rate": 0.00055445, "loss": 0.8464, "step": 22200 }, { "epoch": 0.22, "learning_rate": 0.0005547, "loss": 0.8489, "step": 22210 }, { "epoch": 0.22, "learning_rate": 0.00055495, "loss": 0.84, "step": 22220 }, { "epoch": 0.22, "learning_rate": 0.0005551999999999999, "loss": 0.8484, "step": 22230 }, { "epoch": 0.22, "learning_rate": 0.00055545, "loss": 0.8545, "step": 22240 }, { "epoch": 0.22, "learning_rate": 0.0005557, "loss": 0.8498, "step": 22250 }, { "epoch": 0.22, "learning_rate": 0.0005559499999999999, "loss": 0.8379, "step": 22260 }, { "epoch": 0.22, "learning_rate": 0.0005562, "loss": 0.8475, "step": 22270 }, { "epoch": 0.22, "learning_rate": 0.00055645, "loss": 0.8605, "step": 22280 }, { "epoch": 0.22, "learning_rate": 0.0005566999999999999, "loss": 0.8661, "step": 22290 }, { "epoch": 0.22, "learning_rate": 0.00055695, "loss": 0.8457, "step": 22300 }, { "epoch": 0.22, "learning_rate": 0.0005571999999999999, "loss": 0.8488, "step": 22310 }, { "epoch": 0.22, "learning_rate": 0.00055745, "loss": 0.8541, "step": 22320 }, { "epoch": 0.22, "learning_rate": 0.0005577, "loss": 0.8565, "step": 22330 }, { "epoch": 0.22, "learning_rate": 0.0005579499999999999, "loss": 0.8472, "step": 22340 }, { "epoch": 0.22, "learning_rate": 0.0005581999999999999, "loss": 0.8344, "step": 22350 }, { "epoch": 0.22, "learning_rate": 0.0005584499999999999, "loss": 0.8216, "step": 22360 }, { "epoch": 0.22, "learning_rate": 0.0005587, "loss": 0.8391, "step": 22370 }, { "epoch": 0.22, "learning_rate": 0.0005589499999999999, "loss": 0.8371, "step": 22380 }, { "epoch": 0.22, "learning_rate": 0.0005591999999999999, "loss": 0.85, "step": 22390 }, { "epoch": 0.22, "learning_rate": 0.00055945, "loss": 0.8419, "step": 22400 }, { "epoch": 0.22, "learning_rate": 0.0005596999999999999, "loss": 0.8556, "step": 22410 }, { "epoch": 0.22, "learning_rate": 0.0005599499999999999, "loss": 0.8554, "step": 22420 }, { "epoch": 0.22, "learning_rate": 0.0005602, "loss": 0.8672, "step": 22430 }, { "epoch": 0.22, "learning_rate": 0.00056045, "loss": 0.8626, "step": 22440 }, { "epoch": 0.22, "learning_rate": 0.0005606999999999999, "loss": 0.8509, "step": 22450 }, { "epoch": 0.22, "learning_rate": 0.00056095, "loss": 0.8573, "step": 22460 }, { "epoch": 0.22, "learning_rate": 0.0005612, "loss": 0.8384, "step": 22470 }, { "epoch": 0.22, "learning_rate": 0.0005614499999999999, "loss": 0.8403, "step": 22480 }, { "epoch": 0.22, "learning_rate": 0.0005616999999999999, "loss": 0.8321, "step": 22490 }, { "epoch": 0.23, "learning_rate": 0.00056195, "loss": 0.8331, "step": 22500 }, { "epoch": 0.23, "eval_accuracy": 0.8251687860262823, "eval_loss": 0.86767578125, "eval_runtime": 96.699, "eval_samples_per_second": 827.309, "eval_steps_per_second": 1.624, "step": 22500 }, { "epoch": 0.23, "learning_rate": 0.0005622, "loss": 0.8412, "step": 22510 }, { "epoch": 0.23, "learning_rate": 0.0005624499999999999, "loss": 0.8565, "step": 22520 }, { "epoch": 0.23, "learning_rate": 0.0005627, "loss": 0.8348, "step": 22530 }, { "epoch": 0.23, "learning_rate": 0.00056295, "loss": 0.8422, "step": 22540 }, { "epoch": 0.23, "learning_rate": 0.0005631999999999999, "loss": 0.8546, "step": 22550 }, { "epoch": 0.23, "learning_rate": 0.00056345, "loss": 0.8493, "step": 22560 }, { "epoch": 0.23, "learning_rate": 0.0005637, "loss": 0.8426, "step": 22570 }, { "epoch": 0.23, "learning_rate": 0.0005639499999999999, "loss": 0.8362, "step": 22580 }, { "epoch": 0.23, "learning_rate": 0.0005641999999999999, "loss": 0.8515, "step": 22590 }, { "epoch": 0.23, "learning_rate": 0.00056445, "loss": 0.8461, "step": 22600 }, { "epoch": 0.23, "learning_rate": 0.0005647, "loss": 0.8419, "step": 22610 }, { "epoch": 0.23, "learning_rate": 0.0005649499999999999, "loss": 0.8442, "step": 22620 }, { "epoch": 0.23, "learning_rate": 0.0005652, "loss": 0.8306, "step": 22630 }, { "epoch": 0.23, "learning_rate": 0.00056545, "loss": 0.8336, "step": 22640 }, { "epoch": 0.23, "learning_rate": 0.0005656999999999999, "loss": 0.8601, "step": 22650 }, { "epoch": 0.23, "learning_rate": 0.00056595, "loss": 0.8297, "step": 22660 }, { "epoch": 0.23, "learning_rate": 0.0005662, "loss": 0.8361, "step": 22670 }, { "epoch": 0.23, "learning_rate": 0.00056645, "loss": 0.8433, "step": 22680 }, { "epoch": 0.23, "learning_rate": 0.0005667, "loss": 0.85, "step": 22690 }, { "epoch": 0.23, "learning_rate": 0.0005669499999999999, "loss": 0.8516, "step": 22700 }, { "epoch": 0.23, "learning_rate": 0.0005672, "loss": 0.8673, "step": 22710 }, { "epoch": 0.23, "learning_rate": 0.0005674499999999999, "loss": 0.858, "step": 22720 }, { "epoch": 0.23, "learning_rate": 0.0005677, "loss": 0.8526, "step": 22730 }, { "epoch": 0.23, "learning_rate": 0.0005679249999999999, "loss": 0.8419, "step": 22740 }, { "epoch": 0.23, "learning_rate": 0.000568175, "loss": 0.8424, "step": 22750 }, { "epoch": 0.23, "learning_rate": 0.0005684249999999999, "loss": 0.8331, "step": 22760 }, { "epoch": 0.23, "learning_rate": 0.0005686749999999999, "loss": 0.8234, "step": 22770 }, { "epoch": 0.23, "learning_rate": 0.000568925, "loss": 0.8343, "step": 22780 }, { "epoch": 0.23, "learning_rate": 0.000569175, "loss": 0.8336, "step": 22790 }, { "epoch": 0.23, "learning_rate": 0.0005694249999999999, "loss": 0.8394, "step": 22800 }, { "epoch": 0.23, "learning_rate": 0.000569675, "loss": 0.8365, "step": 22810 }, { "epoch": 0.23, "learning_rate": 0.000569925, "loss": 0.8417, "step": 22820 }, { "epoch": 0.23, "learning_rate": 0.0005701749999999999, "loss": 0.8479, "step": 22830 }, { "epoch": 0.23, "learning_rate": 0.000570425, "loss": 0.8533, "step": 22840 }, { "epoch": 0.23, "learning_rate": 0.000570675, "loss": 0.8259, "step": 22850 }, { "epoch": 0.23, "learning_rate": 0.0005709249999999999, "loss": 0.8621, "step": 22860 }, { "epoch": 0.23, "learning_rate": 0.0005711749999999999, "loss": 0.8413, "step": 22870 }, { "epoch": 0.23, "learning_rate": 0.000571425, "loss": 0.8336, "step": 22880 }, { "epoch": 0.23, "learning_rate": 0.000571675, "loss": 0.8262, "step": 22890 }, { "epoch": 0.23, "learning_rate": 0.0005719249999999999, "loss": 0.8366, "step": 22900 }, { "epoch": 0.23, "learning_rate": 0.000572175, "loss": 0.8295, "step": 22910 }, { "epoch": 0.23, "learning_rate": 0.000572425, "loss": 0.8373, "step": 22920 }, { "epoch": 0.23, "learning_rate": 0.0005726749999999999, "loss": 0.8466, "step": 22930 }, { "epoch": 0.23, "learning_rate": 0.000572925, "loss": 0.8341, "step": 22940 }, { "epoch": 0.23, "learning_rate": 0.000573175, "loss": 0.8429, "step": 22950 }, { "epoch": 0.23, "learning_rate": 0.000573425, "loss": 0.8465, "step": 22960 }, { "epoch": 0.23, "learning_rate": 0.0005736749999999999, "loss": 0.8499, "step": 22970 }, { "epoch": 0.23, "learning_rate": 0.000573925, "loss": 0.8308, "step": 22980 }, { "epoch": 0.23, "learning_rate": 0.000574175, "loss": 0.8424, "step": 22990 }, { "epoch": 0.23, "learning_rate": 0.0005744249999999999, "loss": 0.8312, "step": 23000 }, { "epoch": 0.23, "learning_rate": 0.000574675, "loss": 0.8453, "step": 23010 }, { "epoch": 0.23, "learning_rate": 0.0005749249999999999, "loss": 0.8476, "step": 23020 }, { "epoch": 0.23, "learning_rate": 0.0005751749999999999, "loss": 0.8329, "step": 23030 }, { "epoch": 0.23, "learning_rate": 0.000575425, "loss": 0.8365, "step": 23040 }, { "epoch": 0.23, "learning_rate": 0.0005756749999999999, "loss": 0.8487, "step": 23050 }, { "epoch": 0.23, "learning_rate": 0.0005759249999999999, "loss": 0.8407, "step": 23060 }, { "epoch": 0.23, "learning_rate": 0.000576175, "loss": 0.8449, "step": 23070 }, { "epoch": 0.23, "learning_rate": 0.000576425, "loss": 0.8644, "step": 23080 }, { "epoch": 0.23, "learning_rate": 0.0005766749999999999, "loss": 0.8564, "step": 23090 }, { "epoch": 0.23, "learning_rate": 0.0005769249999999999, "loss": 0.8385, "step": 23100 }, { "epoch": 0.23, "learning_rate": 0.000577175, "loss": 0.8285, "step": 23110 }, { "epoch": 0.23, "learning_rate": 0.0005774249999999999, "loss": 0.8194, "step": 23120 }, { "epoch": 0.23, "learning_rate": 0.0005776749999999999, "loss": 0.814, "step": 23130 }, { "epoch": 0.23, "learning_rate": 0.000577925, "loss": 0.8261, "step": 23140 }, { "epoch": 0.23, "learning_rate": 0.0005781749999999999, "loss": 0.8141, "step": 23150 }, { "epoch": 0.23, "learning_rate": 0.0005784249999999999, "loss": 0.8089, "step": 23160 }, { "epoch": 0.23, "learning_rate": 0.000578675, "loss": 0.8003, "step": 23170 }, { "epoch": 0.23, "learning_rate": 0.000578925, "loss": 0.8148, "step": 23180 }, { "epoch": 0.23, "learning_rate": 0.0005791749999999999, "loss": 0.814, "step": 23190 }, { "epoch": 0.23, "learning_rate": 0.000579425, "loss": 0.8206, "step": 23200 }, { "epoch": 0.23, "learning_rate": 0.000579675, "loss": 0.7938, "step": 23210 }, { "epoch": 0.23, "learning_rate": 0.0005799249999999999, "loss": 0.8371, "step": 23220 }, { "epoch": 0.23, "learning_rate": 0.0005801749999999999, "loss": 0.8515, "step": 23230 }, { "epoch": 0.23, "learning_rate": 0.000580425, "loss": 0.853, "step": 23240 }, { "epoch": 0.23, "learning_rate": 0.000580675, "loss": 0.8462, "step": 23250 }, { "epoch": 0.23, "learning_rate": 0.0005809249999999999, "loss": 0.8354, "step": 23260 }, { "epoch": 0.23, "learning_rate": 0.000581175, "loss": 0.8352, "step": 23270 }, { "epoch": 0.23, "learning_rate": 0.000581425, "loss": 0.8431, "step": 23280 }, { "epoch": 0.23, "learning_rate": 0.0005816749999999999, "loss": 0.8469, "step": 23290 }, { "epoch": 0.23, "learning_rate": 0.000581925, "loss": 0.8279, "step": 23300 }, { "epoch": 0.23, "learning_rate": 0.000582175, "loss": 0.8322, "step": 23310 }, { "epoch": 0.23, "learning_rate": 0.0005824249999999999, "loss": 0.8279, "step": 23320 }, { "epoch": 0.23, "learning_rate": 0.000582675, "loss": 0.8376, "step": 23330 }, { "epoch": 0.23, "learning_rate": 0.000582925, "loss": 0.8371, "step": 23340 }, { "epoch": 0.23, "learning_rate": 0.000583175, "loss": 0.8421, "step": 23350 }, { "epoch": 0.23, "learning_rate": 0.0005834249999999999, "loss": 0.8337, "step": 23360 }, { "epoch": 0.23, "learning_rate": 0.000583675, "loss": 0.8473, "step": 23370 }, { "epoch": 0.23, "learning_rate": 0.000583925, "loss": 0.8464, "step": 23380 }, { "epoch": 0.23, "learning_rate": 0.0005841749999999999, "loss": 0.8401, "step": 23390 }, { "epoch": 0.23, "learning_rate": 0.000584425, "loss": 0.849, "step": 23400 }, { "epoch": 0.23, "learning_rate": 0.0005846749999999999, "loss": 0.8351, "step": 23410 }, { "epoch": 0.23, "learning_rate": 0.000584925, "loss": 0.8496, "step": 23420 }, { "epoch": 0.23, "learning_rate": 0.000585175, "loss": 0.8413, "step": 23430 }, { "epoch": 0.23, "learning_rate": 0.0005854249999999999, "loss": 0.8396, "step": 23440 }, { "epoch": 0.23, "learning_rate": 0.0005856749999999999, "loss": 0.8265, "step": 23450 }, { "epoch": 0.23, "learning_rate": 0.0005859249999999999, "loss": 0.8421, "step": 23460 }, { "epoch": 0.23, "learning_rate": 0.000586175, "loss": 0.8418, "step": 23470 }, { "epoch": 0.23, "learning_rate": 0.0005864249999999999, "loss": 0.8253, "step": 23480 }, { "epoch": 0.23, "learning_rate": 0.0005866749999999999, "loss": 0.7979, "step": 23490 }, { "epoch": 0.23, "learning_rate": 0.000586925, "loss": 0.823, "step": 23500 }, { "epoch": 0.24, "learning_rate": 0.0005871749999999999, "loss": 0.839, "step": 23510 }, { "epoch": 0.24, "learning_rate": 0.0005874249999999999, "loss": 0.8571, "step": 23520 }, { "epoch": 0.24, "learning_rate": 0.000587675, "loss": 0.8338, "step": 23530 }, { "epoch": 0.24, "learning_rate": 0.000587925, "loss": 0.8443, "step": 23540 }, { "epoch": 0.24, "learning_rate": 0.0005881749999999999, "loss": 0.8499, "step": 23550 }, { "epoch": 0.24, "learning_rate": 0.000588425, "loss": 0.8371, "step": 23560 }, { "epoch": 0.24, "learning_rate": 0.000588675, "loss": 0.833, "step": 23570 }, { "epoch": 0.24, "learning_rate": 0.0005889249999999999, "loss": 0.8266, "step": 23580 }, { "epoch": 0.24, "learning_rate": 0.0005891749999999999, "loss": 0.8274, "step": 23590 }, { "epoch": 0.24, "learning_rate": 0.000589425, "loss": 0.8249, "step": 23600 }, { "epoch": 0.24, "learning_rate": 0.0005896749999999999, "loss": 0.8405, "step": 23610 }, { "epoch": 0.24, "learning_rate": 0.0005899249999999999, "loss": 0.8365, "step": 23620 }, { "epoch": 0.24, "learning_rate": 0.000590175, "loss": 0.8367, "step": 23630 }, { "epoch": 0.24, "learning_rate": 0.000590425, "loss": 0.8378, "step": 23640 }, { "epoch": 0.24, "learning_rate": 0.0005906749999999999, "loss": 0.8411, "step": 23650 }, { "epoch": 0.24, "learning_rate": 0.000590925, "loss": 0.8432, "step": 23660 }, { "epoch": 0.24, "learning_rate": 0.000591175, "loss": 0.8525, "step": 23670 }, { "epoch": 0.24, "learning_rate": 0.0005914249999999999, "loss": 0.8394, "step": 23680 }, { "epoch": 0.24, "learning_rate": 0.000591675, "loss": 0.8471, "step": 23690 }, { "epoch": 0.24, "learning_rate": 0.000591925, "loss": 0.8306, "step": 23700 }, { "epoch": 0.24, "learning_rate": 0.000592175, "loss": 0.8147, "step": 23710 }, { "epoch": 0.24, "learning_rate": 0.0005924249999999999, "loss": 0.8235, "step": 23720 }, { "epoch": 0.24, "learning_rate": 0.000592675, "loss": 0.8362, "step": 23730 }, { "epoch": 0.24, "learning_rate": 0.0005928999999999999, "loss": 0.8339, "step": 23740 }, { "epoch": 0.24, "learning_rate": 0.00059315, "loss": 0.839, "step": 23750 }, { "epoch": 0.24, "learning_rate": 0.0005933999999999999, "loss": 0.8156, "step": 23760 }, { "epoch": 0.24, "learning_rate": 0.0005936499999999999, "loss": 0.8529, "step": 23770 }, { "epoch": 0.24, "learning_rate": 0.0005939, "loss": 0.8476, "step": 23780 }, { "epoch": 0.24, "learning_rate": 0.0005941499999999999, "loss": 0.8443, "step": 23790 }, { "epoch": 0.24, "learning_rate": 0.0005943999999999999, "loss": 0.8345, "step": 23800 }, { "epoch": 0.24, "learning_rate": 0.00059465, "loss": 0.8323, "step": 23810 }, { "epoch": 0.24, "learning_rate": 0.0005949, "loss": 0.835, "step": 23820 }, { "epoch": 0.24, "learning_rate": 0.0005951499999999999, "loss": 0.8255, "step": 23830 }, { "epoch": 0.24, "learning_rate": 0.0005953999999999999, "loss": 0.8313, "step": 23840 }, { "epoch": 0.24, "learning_rate": 0.00059565, "loss": 0.8261, "step": 23850 }, { "epoch": 0.24, "learning_rate": 0.0005958999999999999, "loss": 0.8401, "step": 23860 }, { "epoch": 0.24, "learning_rate": 0.0005961499999999999, "loss": 0.8233, "step": 23870 }, { "epoch": 0.24, "learning_rate": 0.0005964, "loss": 0.8306, "step": 23880 }, { "epoch": 0.24, "learning_rate": 0.0005966499999999999, "loss": 0.843, "step": 23890 }, { "epoch": 0.24, "learning_rate": 0.0005968999999999999, "loss": 0.8355, "step": 23900 }, { "epoch": 0.24, "learning_rate": 0.00059715, "loss": 0.8333, "step": 23910 }, { "epoch": 0.24, "learning_rate": 0.0005974, "loss": 0.8079, "step": 23920 }, { "epoch": 0.24, "learning_rate": 0.0005976499999999999, "loss": 0.8175, "step": 23930 }, { "epoch": 0.24, "learning_rate": 0.0005979, "loss": 0.8355, "step": 23940 }, { "epoch": 0.24, "learning_rate": 0.00059815, "loss": 0.8416, "step": 23950 }, { "epoch": 0.24, "learning_rate": 0.0005983999999999999, "loss": 0.8322, "step": 23960 }, { "epoch": 0.24, "learning_rate": 0.0005986499999999999, "loss": 0.839, "step": 23970 }, { "epoch": 0.24, "learning_rate": 0.0005989, "loss": 0.8322, "step": 23980 }, { "epoch": 0.24, "learning_rate": 0.00059915, "loss": 0.8334, "step": 23990 }, { "epoch": 0.24, "learning_rate": 0.0005993999999999999, "loss": 0.8202, "step": 24000 }, { "epoch": 0.24, "learning_rate": 0.00059965, "loss": 0.8244, "step": 24010 }, { "epoch": 0.24, "learning_rate": 0.0005999, "loss": 0.8304, "step": 24020 }, { "epoch": 0.24, "learning_rate": 0.0005999526315789473, "loss": 0.8353, "step": 24030 }, { "epoch": 0.24, "learning_rate": 0.0005998736842105262, "loss": 0.8389, "step": 24040 }, { "epoch": 0.24, "learning_rate": 0.0005997947368421052, "loss": 0.8423, "step": 24050 }, { "epoch": 0.24, "learning_rate": 0.0005997157894736841, "loss": 0.8378, "step": 24060 }, { "epoch": 0.24, "learning_rate": 0.0005996368421052631, "loss": 0.8273, "step": 24070 }, { "epoch": 0.24, "learning_rate": 0.000599557894736842, "loss": 0.8392, "step": 24080 }, { "epoch": 0.24, "learning_rate": 0.000599478947368421, "loss": 0.8372, "step": 24090 }, { "epoch": 0.24, "learning_rate": 0.0005993999999999999, "loss": 0.8344, "step": 24100 }, { "epoch": 0.24, "learning_rate": 0.0005993210526315789, "loss": 0.8248, "step": 24110 }, { "epoch": 0.24, "learning_rate": 0.0005992421052631578, "loss": 0.8391, "step": 24120 }, { "epoch": 0.24, "learning_rate": 0.0005991631578947368, "loss": 0.8254, "step": 24130 }, { "epoch": 0.24, "learning_rate": 0.0005990842105263157, "loss": 0.8272, "step": 24140 }, { "epoch": 0.24, "learning_rate": 0.0005990052631578947, "loss": 0.8237, "step": 24150 }, { "epoch": 0.24, "learning_rate": 0.0005989263157894736, "loss": 0.8403, "step": 24160 }, { "epoch": 0.24, "learning_rate": 0.0005988473684210526, "loss": 0.8436, "step": 24170 }, { "epoch": 0.24, "learning_rate": 0.0005987684210526315, "loss": 0.8277, "step": 24180 }, { "epoch": 0.24, "learning_rate": 0.0005986894736842105, "loss": 0.8273, "step": 24190 }, { "epoch": 0.24, "learning_rate": 0.0005986105263157894, "loss": 0.8324, "step": 24200 }, { "epoch": 0.24, "learning_rate": 0.0005985315789473684, "loss": 0.8262, "step": 24210 }, { "epoch": 0.24, "learning_rate": 0.0005984526315789473, "loss": 0.8249, "step": 24220 }, { "epoch": 0.24, "learning_rate": 0.0005983736842105263, "loss": 0.8301, "step": 24230 }, { "epoch": 0.24, "learning_rate": 0.0005982947368421052, "loss": 0.8368, "step": 24240 }, { "epoch": 0.24, "learning_rate": 0.0005982157894736842, "loss": 0.8256, "step": 24250 }, { "epoch": 0.24, "learning_rate": 0.0005981368421052631, "loss": 0.8332, "step": 24260 }, { "epoch": 0.24, "learning_rate": 0.000598057894736842, "loss": 0.8273, "step": 24270 }, { "epoch": 0.24, "learning_rate": 0.000597978947368421, "loss": 0.823, "step": 24280 }, { "epoch": 0.24, "learning_rate": 0.0005979, "loss": 0.8095, "step": 24290 }, { "epoch": 0.24, "learning_rate": 0.0005978210526315789, "loss": 0.8352, "step": 24300 }, { "epoch": 0.24, "learning_rate": 0.0005977421052631578, "loss": 0.8351, "step": 24310 }, { "epoch": 0.24, "learning_rate": 0.0005976631578947368, "loss": 0.8267, "step": 24320 }, { "epoch": 0.24, "learning_rate": 0.0005975842105263157, "loss": 0.8318, "step": 24330 }, { "epoch": 0.24, "learning_rate": 0.0005975052631578947, "loss": 0.8272, "step": 24340 }, { "epoch": 0.24, "learning_rate": 0.0005974263157894736, "loss": 0.8129, "step": 24350 }, { "epoch": 0.24, "learning_rate": 0.0005973473684210526, "loss": 0.8201, "step": 24360 }, { "epoch": 0.24, "learning_rate": 0.0005972684210526315, "loss": 0.8211, "step": 24370 }, { "epoch": 0.24, "learning_rate": 0.0005971894736842105, "loss": 0.8217, "step": 24380 }, { "epoch": 0.24, "learning_rate": 0.0005971105263157894, "loss": 0.8114, "step": 24390 }, { "epoch": 0.24, "learning_rate": 0.0005970315789473684, "loss": 0.8189, "step": 24400 }, { "epoch": 0.24, "learning_rate": 0.0005969526315789473, "loss": 0.8383, "step": 24410 }, { "epoch": 0.24, "learning_rate": 0.0005968736842105263, "loss": 0.8187, "step": 24420 }, { "epoch": 0.24, "learning_rate": 0.0005967947368421052, "loss": 0.8175, "step": 24430 }, { "epoch": 0.24, "learning_rate": 0.0005967157894736842, "loss": 0.8105, "step": 24440 }, { "epoch": 0.24, "learning_rate": 0.0005966368421052631, "loss": 0.8205, "step": 24450 }, { "epoch": 0.24, "learning_rate": 0.0005965578947368421, "loss": 0.8291, "step": 24460 }, { "epoch": 0.24, "learning_rate": 0.000596478947368421, "loss": 0.8141, "step": 24470 }, { "epoch": 0.24, "learning_rate": 0.0005964, "loss": 0.8309, "step": 24480 }, { "epoch": 0.24, "learning_rate": 0.0005963210526315789, "loss": 0.8155, "step": 24490 }, { "epoch": 0.24, "learning_rate": 0.0005962421052631579, "loss": 0.8303, "step": 24500 }, { "epoch": 0.25, "learning_rate": 0.0005961631578947368, "loss": 0.8319, "step": 24510 }, { "epoch": 0.25, "learning_rate": 0.0005960842105263158, "loss": 0.8229, "step": 24520 }, { "epoch": 0.25, "learning_rate": 0.0005960052631578947, "loss": 0.8236, "step": 24530 }, { "epoch": 0.25, "learning_rate": 0.0005959263157894737, "loss": 0.8219, "step": 24540 }, { "epoch": 0.25, "learning_rate": 0.0005958473684210526, "loss": 0.8101, "step": 24550 }, { "epoch": 0.25, "learning_rate": 0.0005957684210526314, "loss": 0.8133, "step": 24560 }, { "epoch": 0.25, "learning_rate": 0.0005956894736842105, "loss": 0.8224, "step": 24570 }, { "epoch": 0.25, "learning_rate": 0.0005956105263157895, "loss": 0.8266, "step": 24580 }, { "epoch": 0.25, "learning_rate": 0.0005955315789473684, "loss": 0.8204, "step": 24590 }, { "epoch": 0.25, "learning_rate": 0.0005954526315789473, "loss": 0.8312, "step": 24600 }, { "epoch": 0.25, "learning_rate": 0.0005953736842105263, "loss": 0.8187, "step": 24610 }, { "epoch": 0.25, "learning_rate": 0.0005952947368421052, "loss": 0.8262, "step": 24620 }, { "epoch": 0.25, "learning_rate": 0.0005952157894736841, "loss": 0.8141, "step": 24630 }, { "epoch": 0.25, "learning_rate": 0.0005951368421052631, "loss": 0.8192, "step": 24640 }, { "epoch": 0.25, "learning_rate": 0.000595057894736842, "loss": 0.8178, "step": 24650 }, { "epoch": 0.25, "learning_rate": 0.000594978947368421, "loss": 0.8326, "step": 24660 }, { "epoch": 0.25, "learning_rate": 0.0005949, "loss": 0.8171, "step": 24670 }, { "epoch": 0.25, "learning_rate": 0.0005948210526315789, "loss": 0.8305, "step": 24680 }, { "epoch": 0.25, "learning_rate": 0.0005947421052631579, "loss": 0.8214, "step": 24690 }, { "epoch": 0.25, "learning_rate": 0.0005946631578947367, "loss": 0.8161, "step": 24700 }, { "epoch": 0.25, "learning_rate": 0.0005945842105263158, "loss": 0.8281, "step": 24710 }, { "epoch": 0.25, "learning_rate": 0.0005945052631578946, "loss": 0.8173, "step": 24720 }, { "epoch": 0.25, "learning_rate": 0.0005944263157894737, "loss": 0.819, "step": 24730 }, { "epoch": 0.25, "learning_rate": 0.0005943473684210525, "loss": 0.8153, "step": 24740 }, { "epoch": 0.25, "learning_rate": 0.0005942763157894737, "loss": 0.8132, "step": 24750 }, { "epoch": 0.25, "learning_rate": 0.0005941973684210525, "loss": 0.8105, "step": 24760 }, { "epoch": 0.25, "learning_rate": 0.0005941184210526316, "loss": 0.8286, "step": 24770 }, { "epoch": 0.25, "learning_rate": 0.0005940394736842104, "loss": 0.8345, "step": 24780 }, { "epoch": 0.25, "learning_rate": 0.0005939605263157894, "loss": 0.8198, "step": 24790 }, { "epoch": 0.25, "learning_rate": 0.0005938815789473683, "loss": 0.8296, "step": 24800 }, { "epoch": 0.25, "learning_rate": 0.0005938026315789473, "loss": 0.8112, "step": 24810 }, { "epoch": 0.25, "learning_rate": 0.0005937236842105263, "loss": 0.8217, "step": 24820 }, { "epoch": 0.25, "learning_rate": 0.0005936447368421052, "loss": 0.8239, "step": 24830 }, { "epoch": 0.25, "learning_rate": 0.0005935657894736842, "loss": 0.8473, "step": 24840 }, { "epoch": 0.25, "learning_rate": 0.0005934868421052631, "loss": 0.8215, "step": 24850 }, { "epoch": 0.25, "learning_rate": 0.000593407894736842, "loss": 0.798, "step": 24860 }, { "epoch": 0.25, "learning_rate": 0.000593328947368421, "loss": 0.8289, "step": 24870 }, { "epoch": 0.25, "learning_rate": 0.0005932499999999999, "loss": 0.8461, "step": 24880 }, { "epoch": 0.25, "learning_rate": 0.0005931710526315789, "loss": 0.8384, "step": 24890 }, { "epoch": 0.25, "learning_rate": 0.0005930921052631578, "loss": 0.8234, "step": 24900 }, { "epoch": 0.25, "learning_rate": 0.0005930131578947369, "loss": 0.8142, "step": 24910 }, { "epoch": 0.25, "learning_rate": 0.0005929342105263157, "loss": 0.8214, "step": 24920 }, { "epoch": 0.25, "learning_rate": 0.0005928552631578947, "loss": 0.8247, "step": 24930 }, { "epoch": 0.25, "learning_rate": 0.0005927763157894736, "loss": 0.8213, "step": 24940 }, { "epoch": 0.25, "learning_rate": 0.0005926973684210526, "loss": 0.8267, "step": 24950 }, { "epoch": 0.25, "learning_rate": 0.0005926184210526315, "loss": 0.8101, "step": 24960 }, { "epoch": 0.25, "learning_rate": 0.0005925394736842105, "loss": 0.8124, "step": 24970 }, { "epoch": 0.25, "learning_rate": 0.0005924605263157894, "loss": 0.8121, "step": 24980 }, { "epoch": 0.25, "learning_rate": 0.0005923815789473684, "loss": 0.8292, "step": 24990 }, { "epoch": 0.25, "learning_rate": 0.0005923026315789473, "loss": 0.8129, "step": 25000 }, { "epoch": 0.25, "eval_accuracy": 0.8254310925043975, "eval_loss": 0.86767578125, "eval_runtime": 96.7476, "eval_samples_per_second": 826.894, "eval_steps_per_second": 1.623, "step": 25000 }, { "epoch": 0.25, "learning_rate": 0.0005922236842105262, "loss": 0.8243, "step": 25010 }, { "epoch": 0.25, "learning_rate": 0.0005921447368421052, "loss": 0.8351, "step": 25020 }, { "epoch": 0.25, "learning_rate": 0.0005920657894736841, "loss": 0.8311, "step": 25030 }, { "epoch": 0.25, "learning_rate": 0.0005919868421052631, "loss": 0.8154, "step": 25040 }, { "epoch": 0.25, "learning_rate": 0.000591907894736842, "loss": 0.8111, "step": 25050 }, { "epoch": 0.25, "learning_rate": 0.000591828947368421, "loss": 0.8188, "step": 25060 }, { "epoch": 0.25, "learning_rate": 0.0005917499999999999, "loss": 0.8169, "step": 25070 }, { "epoch": 0.25, "learning_rate": 0.0005916710526315789, "loss": 0.8221, "step": 25080 }, { "epoch": 0.25, "learning_rate": 0.0005915921052631578, "loss": 0.8105, "step": 25090 }, { "epoch": 0.25, "learning_rate": 0.0005915131578947368, "loss": 0.8269, "step": 25100 }, { "epoch": 0.25, "learning_rate": 0.0005914342105263157, "loss": 0.8161, "step": 25110 }, { "epoch": 0.25, "learning_rate": 0.0005913552631578947, "loss": 0.815, "step": 25120 }, { "epoch": 0.25, "learning_rate": 0.0005912763157894736, "loss": 0.8187, "step": 25130 }, { "epoch": 0.25, "learning_rate": 0.0005911973684210526, "loss": 0.8193, "step": 25140 }, { "epoch": 0.25, "learning_rate": 0.0005911184210526315, "loss": 0.8331, "step": 25150 }, { "epoch": 0.25, "learning_rate": 0.0005910394736842105, "loss": 0.818, "step": 25160 }, { "epoch": 0.25, "learning_rate": 0.0005909605263157894, "loss": 0.8096, "step": 25170 }, { "epoch": 0.25, "learning_rate": 0.0005908815789473684, "loss": 0.7797, "step": 25180 }, { "epoch": 0.25, "learning_rate": 0.0005908026315789473, "loss": 0.8, "step": 25190 }, { "epoch": 0.25, "learning_rate": 0.0005907236842105263, "loss": 0.7881, "step": 25200 }, { "epoch": 0.25, "learning_rate": 0.0005906447368421052, "loss": 0.7861, "step": 25210 }, { "epoch": 0.25, "learning_rate": 0.0005905657894736842, "loss": 0.793, "step": 25220 }, { "epoch": 0.25, "learning_rate": 0.0005904868421052631, "loss": 0.8112, "step": 25230 }, { "epoch": 0.25, "learning_rate": 0.0005904078947368421, "loss": 0.8275, "step": 25240 }, { "epoch": 0.25, "learning_rate": 0.000590328947368421, "loss": 0.8055, "step": 25250 }, { "epoch": 0.25, "learning_rate": 0.00059025, "loss": 0.8217, "step": 25260 }, { "epoch": 0.25, "learning_rate": 0.0005901710526315789, "loss": 0.8035, "step": 25270 }, { "epoch": 0.25, "learning_rate": 0.0005900921052631579, "loss": 0.8117, "step": 25280 }, { "epoch": 0.25, "learning_rate": 0.0005900131578947368, "loss": 0.8194, "step": 25290 }, { "epoch": 0.25, "learning_rate": 0.0005899342105263157, "loss": 0.8228, "step": 25300 }, { "epoch": 0.25, "learning_rate": 0.0005898552631578947, "loss": 0.7978, "step": 25310 }, { "epoch": 0.25, "learning_rate": 0.0005897763157894736, "loss": 0.8268, "step": 25320 }, { "epoch": 0.25, "learning_rate": 0.0005896973684210526, "loss": 0.807, "step": 25330 }, { "epoch": 0.25, "learning_rate": 0.0005896184210526315, "loss": 0.81, "step": 25340 }, { "epoch": 0.25, "learning_rate": 0.0005895394736842105, "loss": 0.8109, "step": 25350 }, { "epoch": 0.25, "learning_rate": 0.0005894605263157894, "loss": 0.8241, "step": 25360 }, { "epoch": 0.25, "learning_rate": 0.0005893815789473684, "loss": 0.8234, "step": 25370 }, { "epoch": 0.25, "learning_rate": 0.0005893026315789473, "loss": 0.8096, "step": 25380 }, { "epoch": 0.25, "learning_rate": 0.0005892236842105263, "loss": 0.8234, "step": 25390 }, { "epoch": 0.25, "learning_rate": 0.0005891447368421052, "loss": 0.8193, "step": 25400 }, { "epoch": 0.25, "learning_rate": 0.0005890657894736842, "loss": 0.8166, "step": 25410 }, { "epoch": 0.25, "learning_rate": 0.0005889868421052631, "loss": 0.821, "step": 25420 }, { "epoch": 0.25, "learning_rate": 0.0005889078947368421, "loss": 0.8112, "step": 25430 }, { "epoch": 0.25, "learning_rate": 0.000588828947368421, "loss": 0.8002, "step": 25440 }, { "epoch": 0.25, "learning_rate": 0.00058875, "loss": 0.8111, "step": 25450 }, { "epoch": 0.25, "learning_rate": 0.0005886710526315789, "loss": 0.8138, "step": 25460 }, { "epoch": 0.25, "learning_rate": 0.0005885921052631579, "loss": 0.8053, "step": 25470 }, { "epoch": 0.25, "learning_rate": 0.0005885131578947368, "loss": 0.795, "step": 25480 }, { "epoch": 0.25, "learning_rate": 0.0005884342105263158, "loss": 0.8374, "step": 25490 }, { "epoch": 0.26, "learning_rate": 0.0005883552631578947, "loss": 0.8137, "step": 25500 }, { "epoch": 0.26, "learning_rate": 0.0005882763157894737, "loss": 0.8247, "step": 25510 }, { "epoch": 0.26, "learning_rate": 0.0005881973684210526, "loss": 0.8148, "step": 25520 }, { "epoch": 0.26, "learning_rate": 0.0005881184210526316, "loss": 0.8125, "step": 25530 }, { "epoch": 0.26, "learning_rate": 0.0005880394736842104, "loss": 0.8205, "step": 25540 }, { "epoch": 0.26, "learning_rate": 0.0005879605263157895, "loss": 0.8077, "step": 25550 }, { "epoch": 0.26, "learning_rate": 0.0005878815789473683, "loss": 0.8192, "step": 25560 }, { "epoch": 0.26, "learning_rate": 0.0005878026315789474, "loss": 0.8213, "step": 25570 }, { "epoch": 0.26, "learning_rate": 0.0005877236842105263, "loss": 0.8145, "step": 25580 }, { "epoch": 0.26, "learning_rate": 0.0005876447368421052, "loss": 0.8032, "step": 25590 }, { "epoch": 0.26, "learning_rate": 0.0005875657894736842, "loss": 0.8245, "step": 25600 }, { "epoch": 0.26, "learning_rate": 0.000587486842105263, "loss": 0.8164, "step": 25610 }, { "epoch": 0.26, "learning_rate": 0.0005874078947368421, "loss": 0.7993, "step": 25620 }, { "epoch": 0.26, "learning_rate": 0.0005873289473684209, "loss": 0.8149, "step": 25630 }, { "epoch": 0.26, "learning_rate": 0.00058725, "loss": 0.8116, "step": 25640 }, { "epoch": 0.26, "learning_rate": 0.0005871710526315788, "loss": 0.8087, "step": 25650 }, { "epoch": 0.26, "learning_rate": 0.0005870921052631579, "loss": 0.8107, "step": 25660 }, { "epoch": 0.26, "learning_rate": 0.0005870131578947368, "loss": 0.807, "step": 25670 }, { "epoch": 0.26, "learning_rate": 0.0005869342105263158, "loss": 0.8366, "step": 25680 }, { "epoch": 0.26, "learning_rate": 0.0005868552631578947, "loss": 0.8207, "step": 25690 }, { "epoch": 0.26, "learning_rate": 0.0005867763157894736, "loss": 0.8152, "step": 25700 }, { "epoch": 0.26, "learning_rate": 0.0005866973684210526, "loss": 0.8138, "step": 25710 }, { "epoch": 0.26, "learning_rate": 0.0005866184210526315, "loss": 0.819, "step": 25720 }, { "epoch": 0.26, "learning_rate": 0.0005865394736842105, "loss": 0.8066, "step": 25730 }, { "epoch": 0.26, "learning_rate": 0.0005864605263157894, "loss": 0.8157, "step": 25740 }, { "epoch": 0.26, "learning_rate": 0.0005863894736842105, "loss": 0.8029, "step": 25750 }, { "epoch": 0.26, "learning_rate": 0.0005863105263157894, "loss": 0.8109, "step": 25760 }, { "epoch": 0.26, "learning_rate": 0.0005862315789473683, "loss": 0.825, "step": 25770 }, { "epoch": 0.26, "learning_rate": 0.0005861526315789473, "loss": 0.8045, "step": 25780 }, { "epoch": 0.26, "learning_rate": 0.0005860736842105262, "loss": 0.8127, "step": 25790 }, { "epoch": 0.26, "learning_rate": 0.0005859947368421052, "loss": 0.8105, "step": 25800 }, { "epoch": 0.26, "learning_rate": 0.0005859157894736841, "loss": 0.8017, "step": 25810 }, { "epoch": 0.26, "learning_rate": 0.0005858368421052632, "loss": 0.8192, "step": 25820 }, { "epoch": 0.26, "learning_rate": 0.000585757894736842, "loss": 0.8236, "step": 25830 }, { "epoch": 0.26, "learning_rate": 0.000585678947368421, "loss": 0.8142, "step": 25840 }, { "epoch": 0.26, "learning_rate": 0.0005855999999999999, "loss": 0.8061, "step": 25850 }, { "epoch": 0.26, "learning_rate": 0.0005855210526315789, "loss": 0.8054, "step": 25860 }, { "epoch": 0.26, "learning_rate": 0.0005854421052631578, "loss": 0.8058, "step": 25870 }, { "epoch": 0.26, "learning_rate": 0.0005853631578947368, "loss": 0.8033, "step": 25880 }, { "epoch": 0.26, "learning_rate": 0.0005852842105263157, "loss": 0.8201, "step": 25890 }, { "epoch": 0.26, "learning_rate": 0.0005852052631578947, "loss": 0.8065, "step": 25900 }, { "epoch": 0.26, "learning_rate": 0.0005851263157894736, "loss": 0.8104, "step": 25910 }, { "epoch": 0.26, "learning_rate": 0.0005850473684210526, "loss": 0.7849, "step": 25920 }, { "epoch": 0.26, "learning_rate": 0.0005849684210526315, "loss": 0.7978, "step": 25930 }, { "epoch": 0.26, "learning_rate": 0.0005848894736842105, "loss": 0.812, "step": 25940 }, { "epoch": 0.26, "learning_rate": 0.0005848105263157894, "loss": 0.8091, "step": 25950 }, { "epoch": 0.26, "learning_rate": 0.0005847315789473684, "loss": 0.8145, "step": 25960 }, { "epoch": 0.26, "learning_rate": 0.0005846526315789473, "loss": 0.8113, "step": 25970 }, { "epoch": 0.26, "learning_rate": 0.0005845736842105263, "loss": 0.8041, "step": 25980 }, { "epoch": 0.26, "learning_rate": 0.0005844947368421052, "loss": 0.8074, "step": 25990 }, { "epoch": 0.26, "learning_rate": 0.0005844157894736841, "loss": 0.805, "step": 26000 }, { "epoch": 0.26, "learning_rate": 0.0005843368421052631, "loss": 0.8021, "step": 26010 }, { "epoch": 0.26, "learning_rate": 0.000584257894736842, "loss": 0.8067, "step": 26020 }, { "epoch": 0.26, "learning_rate": 0.000584178947368421, "loss": 0.8017, "step": 26030 }, { "epoch": 0.26, "learning_rate": 0.0005840999999999999, "loss": 0.8088, "step": 26040 }, { "epoch": 0.26, "learning_rate": 0.0005840210526315789, "loss": 0.8108, "step": 26050 }, { "epoch": 0.26, "learning_rate": 0.0005839421052631578, "loss": 0.7941, "step": 26060 }, { "epoch": 0.26, "learning_rate": 0.0005838631578947368, "loss": 0.8195, "step": 26070 }, { "epoch": 0.26, "learning_rate": 0.0005837842105263157, "loss": 0.8182, "step": 26080 }, { "epoch": 0.26, "learning_rate": 0.0005837052631578947, "loss": 0.8138, "step": 26090 }, { "epoch": 0.26, "learning_rate": 0.0005836263157894736, "loss": 0.8126, "step": 26100 }, { "epoch": 0.26, "learning_rate": 0.0005835473684210526, "loss": 0.817, "step": 26110 }, { "epoch": 0.26, "learning_rate": 0.0005834684210526315, "loss": 0.81, "step": 26120 }, { "epoch": 0.26, "learning_rate": 0.0005833894736842105, "loss": 0.7905, "step": 26130 }, { "epoch": 0.26, "learning_rate": 0.0005833105263157894, "loss": 0.8001, "step": 26140 }, { "epoch": 0.26, "learning_rate": 0.0005832315789473684, "loss": 0.8134, "step": 26150 }, { "epoch": 0.26, "learning_rate": 0.0005831526315789473, "loss": 0.8116, "step": 26160 }, { "epoch": 0.26, "learning_rate": 0.0005830736842105263, "loss": 0.7971, "step": 26170 }, { "epoch": 0.26, "learning_rate": 0.0005829947368421052, "loss": 0.795, "step": 26180 }, { "epoch": 0.26, "learning_rate": 0.0005829157894736842, "loss": 0.8128, "step": 26190 }, { "epoch": 0.26, "learning_rate": 0.0005828368421052631, "loss": 0.8144, "step": 26200 }, { "epoch": 0.26, "learning_rate": 0.0005827578947368421, "loss": 0.8022, "step": 26210 }, { "epoch": 0.26, "learning_rate": 0.000582678947368421, "loss": 0.8091, "step": 26220 }, { "epoch": 0.26, "learning_rate": 0.0005826, "loss": 0.7954, "step": 26230 }, { "epoch": 0.26, "learning_rate": 0.0005825210526315789, "loss": 0.797, "step": 26240 }, { "epoch": 0.26, "learning_rate": 0.0005824421052631579, "loss": 0.8183, "step": 26250 }, { "epoch": 0.26, "learning_rate": 0.0005823631578947368, "loss": 0.8078, "step": 26260 }, { "epoch": 0.26, "learning_rate": 0.0005822842105263158, "loss": 0.8063, "step": 26270 }, { "epoch": 0.26, "learning_rate": 0.0005822052631578947, "loss": 0.7925, "step": 26280 }, { "epoch": 0.26, "learning_rate": 0.0005821263157894736, "loss": 0.7894, "step": 26290 }, { "epoch": 0.26, "learning_rate": 0.0005820473684210526, "loss": 0.7882, "step": 26300 }, { "epoch": 0.26, "learning_rate": 0.0005819684210526315, "loss": 0.7932, "step": 26310 }, { "epoch": 0.26, "learning_rate": 0.0005818894736842105, "loss": 0.7961, "step": 26320 }, { "epoch": 0.26, "learning_rate": 0.0005818105263157894, "loss": 0.7923, "step": 26330 }, { "epoch": 0.26, "learning_rate": 0.0005817315789473684, "loss": 0.7974, "step": 26340 }, { "epoch": 0.26, "learning_rate": 0.0005816526315789473, "loss": 0.798, "step": 26350 }, { "epoch": 0.26, "learning_rate": 0.0005815736842105263, "loss": 0.792, "step": 26360 }, { "epoch": 0.26, "learning_rate": 0.0005814947368421052, "loss": 0.8103, "step": 26370 }, { "epoch": 0.26, "learning_rate": 0.0005814157894736842, "loss": 0.8009, "step": 26380 }, { "epoch": 0.26, "learning_rate": 0.0005813368421052631, "loss": 0.8062, "step": 26390 }, { "epoch": 0.26, "learning_rate": 0.0005812578947368421, "loss": 0.8107, "step": 26400 }, { "epoch": 0.26, "learning_rate": 0.000581178947368421, "loss": 0.814, "step": 26410 }, { "epoch": 0.26, "learning_rate": 0.0005811, "loss": 0.8003, "step": 26420 }, { "epoch": 0.26, "learning_rate": 0.0005810210526315789, "loss": 0.7893, "step": 26430 }, { "epoch": 0.26, "learning_rate": 0.0005809421052631579, "loss": 0.7695, "step": 26440 }, { "epoch": 0.26, "learning_rate": 0.0005808631578947367, "loss": 0.7856, "step": 26450 }, { "epoch": 0.26, "learning_rate": 0.0005807842105263158, "loss": 0.7692, "step": 26460 }, { "epoch": 0.26, "learning_rate": 0.0005807052631578947, "loss": 0.7626, "step": 26470 }, { "epoch": 0.26, "learning_rate": 0.0005806263157894737, "loss": 0.7926, "step": 26480 }, { "epoch": 0.26, "learning_rate": 0.0005805473684210526, "loss": 0.8006, "step": 26490 }, { "epoch": 0.27, "learning_rate": 0.0005804684210526316, "loss": 0.802, "step": 26500 }, { "epoch": 0.27, "learning_rate": 0.0005803894736842105, "loss": 0.8092, "step": 26510 }, { "epoch": 0.27, "learning_rate": 0.0005803105263157894, "loss": 0.8018, "step": 26520 }, { "epoch": 0.27, "learning_rate": 0.0005802315789473684, "loss": 0.7875, "step": 26530 }, { "epoch": 0.27, "learning_rate": 0.0005801526315789472, "loss": 0.8163, "step": 26540 }, { "epoch": 0.27, "learning_rate": 0.0005800736842105263, "loss": 0.7923, "step": 26550 }, { "epoch": 0.27, "learning_rate": 0.0005799947368421053, "loss": 0.8019, "step": 26560 }, { "epoch": 0.27, "learning_rate": 0.0005799157894736842, "loss": 0.8018, "step": 26570 }, { "epoch": 0.27, "learning_rate": 0.0005798368421052631, "loss": 0.7946, "step": 26580 }, { "epoch": 0.27, "learning_rate": 0.0005797578947368421, "loss": 0.7982, "step": 26590 }, { "epoch": 0.27, "learning_rate": 0.000579678947368421, "loss": 0.7962, "step": 26600 }, { "epoch": 0.27, "learning_rate": 0.0005795999999999999, "loss": 0.797, "step": 26610 }, { "epoch": 0.27, "learning_rate": 0.0005795210526315789, "loss": 0.7993, "step": 26620 }, { "epoch": 0.27, "learning_rate": 0.0005794421052631578, "loss": 0.7906, "step": 26630 }, { "epoch": 0.27, "learning_rate": 0.0005793631578947368, "loss": 0.8111, "step": 26640 }, { "epoch": 0.27, "learning_rate": 0.0005792842105263157, "loss": 0.8141, "step": 26650 }, { "epoch": 0.27, "learning_rate": 0.0005792052631578947, "loss": 0.8079, "step": 26660 }, { "epoch": 0.27, "learning_rate": 0.0005791263157894737, "loss": 0.7938, "step": 26670 }, { "epoch": 0.27, "learning_rate": 0.0005790473684210525, "loss": 0.8108, "step": 26680 }, { "epoch": 0.27, "learning_rate": 0.0005789684210526316, "loss": 0.8085, "step": 26690 }, { "epoch": 0.27, "learning_rate": 0.0005788894736842104, "loss": 0.7985, "step": 26700 }, { "epoch": 0.27, "learning_rate": 0.0005788105263157895, "loss": 0.7647, "step": 26710 }, { "epoch": 0.27, "learning_rate": 0.0005787315789473683, "loss": 0.8048, "step": 26720 }, { "epoch": 0.27, "learning_rate": 0.0005786526315789474, "loss": 0.7926, "step": 26730 }, { "epoch": 0.27, "learning_rate": 0.0005785736842105262, "loss": 0.8004, "step": 26740 }, { "epoch": 0.27, "learning_rate": 0.0005785026315789473, "loss": 0.7965, "step": 26750 }, { "epoch": 0.27, "learning_rate": 0.0005784236842105262, "loss": 0.8007, "step": 26760 }, { "epoch": 0.27, "learning_rate": 0.0005783447368421052, "loss": 0.7922, "step": 26770 }, { "epoch": 0.27, "learning_rate": 0.0005782657894736841, "loss": 0.8026, "step": 26780 }, { "epoch": 0.27, "learning_rate": 0.0005781868421052631, "loss": 0.8007, "step": 26790 }, { "epoch": 0.27, "learning_rate": 0.000578107894736842, "loss": 0.7939, "step": 26800 }, { "epoch": 0.27, "learning_rate": 0.000578028947368421, "loss": 0.795, "step": 26810 }, { "epoch": 0.27, "learning_rate": 0.0005779499999999999, "loss": 0.7852, "step": 26820 }, { "epoch": 0.27, "learning_rate": 0.0005778710526315789, "loss": 0.7874, "step": 26830 }, { "epoch": 0.27, "learning_rate": 0.0005777921052631578, "loss": 0.7885, "step": 26840 }, { "epoch": 0.27, "learning_rate": 0.0005777131578947368, "loss": 0.7963, "step": 26850 }, { "epoch": 0.27, "learning_rate": 0.0005776342105263157, "loss": 0.795, "step": 26860 }, { "epoch": 0.27, "learning_rate": 0.0005775552631578947, "loss": 0.8115, "step": 26870 }, { "epoch": 0.27, "learning_rate": 0.0005774763157894736, "loss": 0.7849, "step": 26880 }, { "epoch": 0.27, "learning_rate": 0.0005773973684210526, "loss": 0.7789, "step": 26890 }, { "epoch": 0.27, "learning_rate": 0.0005773184210526315, "loss": 0.8073, "step": 26900 }, { "epoch": 0.27, "learning_rate": 0.0005772394736842105, "loss": 0.7947, "step": 26910 }, { "epoch": 0.27, "learning_rate": 0.0005771605263157894, "loss": 0.7839, "step": 26920 }, { "epoch": 0.27, "learning_rate": 0.0005770815789473684, "loss": 0.8056, "step": 26930 }, { "epoch": 0.27, "learning_rate": 0.0005770026315789473, "loss": 0.7972, "step": 26940 }, { "epoch": 0.27, "learning_rate": 0.0005769236842105263, "loss": 0.8008, "step": 26950 }, { "epoch": 0.27, "learning_rate": 0.0005768447368421052, "loss": 0.7911, "step": 26960 }, { "epoch": 0.27, "learning_rate": 0.0005767657894736842, "loss": 0.8002, "step": 26970 }, { "epoch": 0.27, "learning_rate": 0.0005766868421052631, "loss": 0.7812, "step": 26980 }, { "epoch": 0.27, "learning_rate": 0.000576607894736842, "loss": 0.7864, "step": 26990 }, { "epoch": 0.27, "learning_rate": 0.000576528947368421, "loss": 0.7809, "step": 27000 }, { "epoch": 0.27, "learning_rate": 0.0005764499999999999, "loss": 0.8083, "step": 27010 }, { "epoch": 0.27, "learning_rate": 0.0005763710526315789, "loss": 0.7935, "step": 27020 }, { "epoch": 0.27, "learning_rate": 0.0005762921052631578, "loss": 0.79, "step": 27030 }, { "epoch": 0.27, "learning_rate": 0.0005762131578947368, "loss": 0.7868, "step": 27040 }, { "epoch": 0.27, "learning_rate": 0.0005761342105263157, "loss": 0.7978, "step": 27050 }, { "epoch": 0.27, "learning_rate": 0.0005760552631578947, "loss": 0.7975, "step": 27060 }, { "epoch": 0.27, "learning_rate": 0.0005759763157894736, "loss": 0.7971, "step": 27070 }, { "epoch": 0.27, "learning_rate": 0.0005758973684210526, "loss": 0.7861, "step": 27080 }, { "epoch": 0.27, "learning_rate": 0.0005758184210526315, "loss": 0.7807, "step": 27090 }, { "epoch": 0.27, "learning_rate": 0.0005757394736842105, "loss": 0.7933, "step": 27100 }, { "epoch": 0.27, "learning_rate": 0.0005756605263157894, "loss": 0.8048, "step": 27110 }, { "epoch": 0.27, "learning_rate": 0.0005755815789473684, "loss": 0.7791, "step": 27120 }, { "epoch": 0.27, "learning_rate": 0.0005755026315789473, "loss": 0.7752, "step": 27130 }, { "epoch": 0.27, "learning_rate": 0.0005754236842105263, "loss": 0.7753, "step": 27140 }, { "epoch": 0.27, "learning_rate": 0.0005753447368421052, "loss": 0.7925, "step": 27150 }, { "epoch": 0.27, "learning_rate": 0.0005752657894736842, "loss": 0.7894, "step": 27160 }, { "epoch": 0.27, "learning_rate": 0.0005751868421052631, "loss": 0.7974, "step": 27170 }, { "epoch": 0.27, "learning_rate": 0.0005751078947368421, "loss": 0.7849, "step": 27180 }, { "epoch": 0.27, "learning_rate": 0.000575028947368421, "loss": 0.8053, "step": 27190 }, { "epoch": 0.27, "learning_rate": 0.00057495, "loss": 0.8107, "step": 27200 }, { "epoch": 0.27, "learning_rate": 0.0005748710526315789, "loss": 0.7912, "step": 27210 }, { "epoch": 0.27, "learning_rate": 0.0005747921052631579, "loss": 0.7863, "step": 27220 }, { "epoch": 0.27, "learning_rate": 0.0005747131578947368, "loss": 0.7913, "step": 27230 }, { "epoch": 0.27, "learning_rate": 0.0005746342105263158, "loss": 0.7951, "step": 27240 }, { "epoch": 0.27, "learning_rate": 0.0005745552631578947, "loss": 0.7872, "step": 27250 }, { "epoch": 0.27, "learning_rate": 0.0005744763157894737, "loss": 0.7988, "step": 27260 }, { "epoch": 0.27, "learning_rate": 0.0005743973684210526, "loss": 0.8092, "step": 27270 }, { "epoch": 0.27, "learning_rate": 0.0005743184210526315, "loss": 0.7866, "step": 27280 }, { "epoch": 0.27, "learning_rate": 0.0005742394736842105, "loss": 0.7957, "step": 27290 }, { "epoch": 0.27, "learning_rate": 0.0005741605263157894, "loss": 0.7757, "step": 27300 }, { "epoch": 0.27, "learning_rate": 0.0005740815789473684, "loss": 0.792, "step": 27310 }, { "epoch": 0.27, "learning_rate": 0.0005740026315789473, "loss": 0.7942, "step": 27320 }, { "epoch": 0.27, "learning_rate": 0.0005739236842105263, "loss": 0.7846, "step": 27330 }, { "epoch": 0.27, "learning_rate": 0.0005738447368421052, "loss": 0.7727, "step": 27340 }, { "epoch": 0.27, "learning_rate": 0.0005737657894736842, "loss": 0.7893, "step": 27350 }, { "epoch": 0.27, "learning_rate": 0.000573686842105263, "loss": 0.807, "step": 27360 }, { "epoch": 0.27, "learning_rate": 0.0005736078947368421, "loss": 0.8077, "step": 27370 }, { "epoch": 0.27, "learning_rate": 0.000573528947368421, "loss": 0.7967, "step": 27380 }, { "epoch": 0.27, "learning_rate": 0.00057345, "loss": 0.7939, "step": 27390 }, { "epoch": 0.27, "learning_rate": 0.0005733710526315789, "loss": 0.7984, "step": 27400 }, { "epoch": 0.27, "learning_rate": 0.0005732921052631579, "loss": 0.7908, "step": 27410 }, { "epoch": 0.27, "learning_rate": 0.0005732131578947368, "loss": 0.7929, "step": 27420 }, { "epoch": 0.27, "learning_rate": 0.0005731342105263157, "loss": 0.7869, "step": 27430 }, { "epoch": 0.27, "learning_rate": 0.0005730552631578947, "loss": 0.7889, "step": 27440 }, { "epoch": 0.27, "learning_rate": 0.0005729763157894736, "loss": 0.7958, "step": 27450 }, { "epoch": 0.27, "learning_rate": 0.0005728973684210526, "loss": 0.7759, "step": 27460 }, { "epoch": 0.27, "learning_rate": 0.0005728184210526316, "loss": 0.7919, "step": 27470 }, { "epoch": 0.27, "learning_rate": 0.0005727394736842105, "loss": 0.7901, "step": 27480 }, { "epoch": 0.27, "learning_rate": 0.0005726605263157895, "loss": 0.7976, "step": 27490 }, { "epoch": 0.28, "learning_rate": 0.0005725815789473684, "loss": 0.7826, "step": 27500 }, { "epoch": 0.28, "eval_accuracy": 0.832012190127962, "eval_loss": 0.82568359375, "eval_runtime": 96.9614, "eval_samples_per_second": 825.071, "eval_steps_per_second": 1.619, "step": 27500 }, { "epoch": 0.28, "learning_rate": 0.0005725026315789474, "loss": 0.7844, "step": 27510 }, { "epoch": 0.28, "learning_rate": 0.0005724236842105262, "loss": 0.7746, "step": 27520 }, { "epoch": 0.28, "learning_rate": 0.0005723447368421053, "loss": 0.7749, "step": 27530 }, { "epoch": 0.28, "learning_rate": 0.0005722657894736841, "loss": 0.7559, "step": 27540 }, { "epoch": 0.28, "learning_rate": 0.0005721868421052632, "loss": 0.7876, "step": 27550 }, { "epoch": 0.28, "learning_rate": 0.0005721078947368421, "loss": 0.7792, "step": 27560 }, { "epoch": 0.28, "learning_rate": 0.000572028947368421, "loss": 0.7734, "step": 27570 }, { "epoch": 0.28, "learning_rate": 0.00057195, "loss": 0.7701, "step": 27580 }, { "epoch": 0.28, "learning_rate": 0.0005718710526315788, "loss": 0.7835, "step": 27590 }, { "epoch": 0.28, "learning_rate": 0.0005717921052631579, "loss": 0.7688, "step": 27600 }, { "epoch": 0.28, "learning_rate": 0.0005717131578947367, "loss": 0.8105, "step": 27610 }, { "epoch": 0.28, "learning_rate": 0.0005716342105263158, "loss": 0.8011, "step": 27620 }, { "epoch": 0.28, "learning_rate": 0.0005715552631578946, "loss": 0.8044, "step": 27630 }, { "epoch": 0.28, "learning_rate": 0.0005714763157894737, "loss": 0.8107, "step": 27640 }, { "epoch": 0.28, "learning_rate": 0.0005713973684210525, "loss": 0.8012, "step": 27650 }, { "epoch": 0.28, "learning_rate": 0.0005713184210526315, "loss": 0.796, "step": 27660 }, { "epoch": 0.28, "learning_rate": 0.0005712394736842105, "loss": 0.7865, "step": 27670 }, { "epoch": 0.28, "learning_rate": 0.0005711605263157894, "loss": 0.7908, "step": 27680 }, { "epoch": 0.28, "learning_rate": 0.0005710815789473684, "loss": 0.8021, "step": 27690 }, { "epoch": 0.28, "learning_rate": 0.0005710026315789473, "loss": 0.7944, "step": 27700 }, { "epoch": 0.28, "learning_rate": 0.0005709236842105263, "loss": 0.7863, "step": 27710 }, { "epoch": 0.28, "learning_rate": 0.0005708447368421052, "loss": 0.7902, "step": 27720 }, { "epoch": 0.28, "learning_rate": 0.0005707657894736841, "loss": 0.7787, "step": 27730 }, { "epoch": 0.28, "learning_rate": 0.0005706868421052631, "loss": 0.7952, "step": 27740 }, { "epoch": 0.28, "learning_rate": 0.0005706157894736841, "loss": 0.7887, "step": 27750 }, { "epoch": 0.28, "learning_rate": 0.0005705368421052631, "loss": 0.7949, "step": 27760 }, { "epoch": 0.28, "learning_rate": 0.000570457894736842, "loss": 0.7929, "step": 27770 }, { "epoch": 0.28, "learning_rate": 0.000570378947368421, "loss": 0.802, "step": 27780 }, { "epoch": 0.28, "learning_rate": 0.0005702999999999999, "loss": 0.7668, "step": 27790 }, { "epoch": 0.28, "learning_rate": 0.0005702210526315789, "loss": 0.7895, "step": 27800 }, { "epoch": 0.28, "learning_rate": 0.0005701421052631578, "loss": 0.7788, "step": 27810 }, { "epoch": 0.28, "learning_rate": 0.0005700631578947368, "loss": 0.7903, "step": 27820 }, { "epoch": 0.28, "learning_rate": 0.0005699842105263157, "loss": 0.7973, "step": 27830 }, { "epoch": 0.28, "learning_rate": 0.0005699052631578947, "loss": 0.8102, "step": 27840 }, { "epoch": 0.28, "learning_rate": 0.0005698263157894736, "loss": 0.7954, "step": 27850 }, { "epoch": 0.28, "learning_rate": 0.0005697473684210526, "loss": 0.7891, "step": 27860 }, { "epoch": 0.28, "learning_rate": 0.0005696684210526315, "loss": 0.8007, "step": 27870 }, { "epoch": 0.28, "learning_rate": 0.0005695894736842105, "loss": 0.8003, "step": 27880 }, { "epoch": 0.28, "learning_rate": 0.0005695105263157894, "loss": 0.7952, "step": 27890 }, { "epoch": 0.28, "learning_rate": 0.0005694315789473684, "loss": 0.807, "step": 27900 }, { "epoch": 0.28, "learning_rate": 0.0005693526315789473, "loss": 0.7858, "step": 27910 }, { "epoch": 0.28, "learning_rate": 0.0005692736842105263, "loss": 0.7877, "step": 27920 }, { "epoch": 0.28, "learning_rate": 0.0005691947368421052, "loss": 0.7896, "step": 27930 }, { "epoch": 0.28, "learning_rate": 0.0005691157894736842, "loss": 0.7885, "step": 27940 }, { "epoch": 0.28, "learning_rate": 0.0005690368421052631, "loss": 0.7758, "step": 27950 }, { "epoch": 0.28, "learning_rate": 0.000568957894736842, "loss": 0.8041, "step": 27960 }, { "epoch": 0.28, "learning_rate": 0.000568878947368421, "loss": 0.7975, "step": 27970 }, { "epoch": 0.28, "learning_rate": 0.0005688, "loss": 0.789, "step": 27980 }, { "epoch": 0.28, "learning_rate": 0.0005687210526315789, "loss": 0.7913, "step": 27990 }, { "epoch": 0.28, "learning_rate": 0.0005686421052631578, "loss": 0.8035, "step": 28000 }, { "epoch": 0.28, "learning_rate": 0.0005685631578947368, "loss": 0.7875, "step": 28010 }, { "epoch": 0.28, "learning_rate": 0.0005684842105263157, "loss": 0.7854, "step": 28020 }, { "epoch": 0.28, "learning_rate": 0.0005684052631578947, "loss": 0.7866, "step": 28030 }, { "epoch": 0.28, "learning_rate": 0.0005683263157894736, "loss": 0.7918, "step": 28040 }, { "epoch": 0.28, "learning_rate": 0.0005682473684210526, "loss": 0.7825, "step": 28050 }, { "epoch": 0.28, "learning_rate": 0.0005681684210526315, "loss": 0.7777, "step": 28060 }, { "epoch": 0.28, "learning_rate": 0.0005680894736842105, "loss": 0.7947, "step": 28070 }, { "epoch": 0.28, "learning_rate": 0.0005680105263157894, "loss": 0.7992, "step": 28080 }, { "epoch": 0.28, "learning_rate": 0.0005679315789473684, "loss": 0.7978, "step": 28090 }, { "epoch": 0.28, "learning_rate": 0.0005678526315789473, "loss": 0.8152, "step": 28100 }, { "epoch": 0.28, "learning_rate": 0.0005677736842105263, "loss": 0.7904, "step": 28110 }, { "epoch": 0.28, "learning_rate": 0.0005676947368421052, "loss": 0.8009, "step": 28120 }, { "epoch": 0.28, "learning_rate": 0.0005676157894736842, "loss": 0.7671, "step": 28130 }, { "epoch": 0.28, "learning_rate": 0.0005675368421052631, "loss": 0.7616, "step": 28140 }, { "epoch": 0.28, "learning_rate": 0.0005674578947368421, "loss": 0.7662, "step": 28150 }, { "epoch": 0.28, "learning_rate": 0.000567378947368421, "loss": 0.7741, "step": 28160 }, { "epoch": 0.28, "learning_rate": 0.0005673, "loss": 0.7594, "step": 28170 }, { "epoch": 0.28, "learning_rate": 0.0005672210526315789, "loss": 0.7638, "step": 28180 }, { "epoch": 0.28, "learning_rate": 0.0005671421052631579, "loss": 0.7669, "step": 28190 }, { "epoch": 0.28, "learning_rate": 0.0005670631578947368, "loss": 0.7689, "step": 28200 }, { "epoch": 0.28, "learning_rate": 0.0005669842105263158, "loss": 0.7642, "step": 28210 }, { "epoch": 0.28, "learning_rate": 0.0005669052631578947, "loss": 0.7709, "step": 28220 }, { "epoch": 0.28, "learning_rate": 0.0005668263157894737, "loss": 0.7563, "step": 28230 }, { "epoch": 0.28, "learning_rate": 0.0005667473684210526, "loss": 0.7715, "step": 28240 }, { "epoch": 0.28, "learning_rate": 0.0005666684210526316, "loss": 0.7512, "step": 28250 }, { "epoch": 0.28, "learning_rate": 0.0005665894736842105, "loss": 0.7723, "step": 28260 }, { "epoch": 0.28, "learning_rate": 0.0005665105263157893, "loss": 0.7626, "step": 28270 }, { "epoch": 0.28, "learning_rate": 0.0005664315789473684, "loss": 0.7751, "step": 28280 }, { "epoch": 0.28, "learning_rate": 0.0005663526315789473, "loss": 0.7474, "step": 28290 }, { "epoch": 0.28, "learning_rate": 0.0005662736842105263, "loss": 0.7722, "step": 28300 }, { "epoch": 0.28, "learning_rate": 0.0005661947368421052, "loss": 0.7533, "step": 28310 }, { "epoch": 0.28, "learning_rate": 0.0005661157894736842, "loss": 0.7673, "step": 28320 }, { "epoch": 0.28, "learning_rate": 0.0005660368421052631, "loss": 0.7517, "step": 28330 }, { "epoch": 0.28, "learning_rate": 0.000565957894736842, "loss": 0.7431, "step": 28340 }, { "epoch": 0.28, "learning_rate": 0.000565878947368421, "loss": 0.7501, "step": 28350 }, { "epoch": 0.28, "learning_rate": 0.0005657999999999999, "loss": 0.7459, "step": 28360 }, { "epoch": 0.28, "learning_rate": 0.0005657210526315789, "loss": 0.7557, "step": 28370 }, { "epoch": 0.28, "learning_rate": 0.0005656421052631579, "loss": 0.7627, "step": 28380 }, { "epoch": 0.28, "learning_rate": 0.0005655631578947368, "loss": 0.7589, "step": 28390 }, { "epoch": 0.28, "learning_rate": 0.0005654842105263158, "loss": 0.7543, "step": 28400 }, { "epoch": 0.28, "learning_rate": 0.0005654052631578947, "loss": 0.7585, "step": 28410 }, { "epoch": 0.28, "learning_rate": 0.0005653263157894737, "loss": 0.7612, "step": 28420 }, { "epoch": 0.28, "learning_rate": 0.0005652473684210525, "loss": 0.7489, "step": 28430 }, { "epoch": 0.28, "learning_rate": 0.0005651684210526316, "loss": 0.7545, "step": 28440 }, { "epoch": 0.28, "learning_rate": 0.0005650894736842104, "loss": 0.7444, "step": 28450 }, { "epoch": 0.28, "learning_rate": 0.0005650105263157895, "loss": 0.751, "step": 28460 }, { "epoch": 0.28, "learning_rate": 0.0005649315789473684, "loss": 0.7676, "step": 28470 }, { "epoch": 0.28, "learning_rate": 0.0005648526315789474, "loss": 0.7867, "step": 28480 }, { "epoch": 0.28, "learning_rate": 0.0005647736842105263, "loss": 0.7744, "step": 28490 }, { "epoch": 0.28, "learning_rate": 0.0005646947368421052, "loss": 0.7803, "step": 28500 }, { "epoch": 0.29, "learning_rate": 0.0005646157894736842, "loss": 0.7884, "step": 28510 }, { "epoch": 0.29, "learning_rate": 0.000564536842105263, "loss": 0.7509, "step": 28520 }, { "epoch": 0.29, "learning_rate": 0.0005644578947368421, "loss": 0.7463, "step": 28530 }, { "epoch": 0.29, "learning_rate": 0.0005643789473684209, "loss": 0.7696, "step": 28540 }, { "epoch": 0.29, "learning_rate": 0.0005643, "loss": 0.7679, "step": 28550 }, { "epoch": 0.29, "learning_rate": 0.000564221052631579, "loss": 0.7665, "step": 28560 }, { "epoch": 0.29, "learning_rate": 0.0005641421052631578, "loss": 0.7845, "step": 28570 }, { "epoch": 0.29, "learning_rate": 0.0005640631578947368, "loss": 0.7717, "step": 28580 }, { "epoch": 0.29, "learning_rate": 0.0005639842105263157, "loss": 0.7749, "step": 28590 }, { "epoch": 0.29, "learning_rate": 0.0005639052631578947, "loss": 0.7643, "step": 28600 }, { "epoch": 0.29, "learning_rate": 0.0005638263157894736, "loss": 0.7721, "step": 28610 }, { "epoch": 0.29, "learning_rate": 0.0005637473684210526, "loss": 0.7806, "step": 28620 }, { "epoch": 0.29, "learning_rate": 0.0005636684210526315, "loss": 0.7703, "step": 28630 }, { "epoch": 0.29, "learning_rate": 0.0005635894736842104, "loss": 0.775, "step": 28640 }, { "epoch": 0.29, "learning_rate": 0.0005635105263157895, "loss": 0.7549, "step": 28650 }, { "epoch": 0.29, "learning_rate": 0.0005634315789473683, "loss": 0.7824, "step": 28660 }, { "epoch": 0.29, "learning_rate": 0.0005633526315789474, "loss": 0.777, "step": 28670 }, { "epoch": 0.29, "learning_rate": 0.0005632736842105262, "loss": 0.7699, "step": 28680 }, { "epoch": 0.29, "learning_rate": 0.0005631947368421053, "loss": 0.7834, "step": 28690 }, { "epoch": 0.29, "learning_rate": 0.0005631157894736841, "loss": 0.7771, "step": 28700 }, { "epoch": 0.29, "learning_rate": 0.0005630368421052631, "loss": 0.776, "step": 28710 }, { "epoch": 0.29, "learning_rate": 0.000562957894736842, "loss": 0.7718, "step": 28720 }, { "epoch": 0.29, "learning_rate": 0.000562878947368421, "loss": 0.7746, "step": 28730 }, { "epoch": 0.29, "learning_rate": 0.0005627999999999999, "loss": 0.7664, "step": 28740 }, { "epoch": 0.29, "learning_rate": 0.000562728947368421, "loss": 0.7783, "step": 28750 }, { "epoch": 0.29, "learning_rate": 0.0005626499999999999, "loss": 0.7738, "step": 28760 }, { "epoch": 0.29, "learning_rate": 0.0005625710526315789, "loss": 0.7636, "step": 28770 }, { "epoch": 0.29, "learning_rate": 0.0005624921052631578, "loss": 0.7785, "step": 28780 }, { "epoch": 0.29, "learning_rate": 0.0005624131578947368, "loss": 0.7855, "step": 28790 }, { "epoch": 0.29, "learning_rate": 0.0005623342105263157, "loss": 0.7823, "step": 28800 }, { "epoch": 0.29, "learning_rate": 0.0005622552631578947, "loss": 0.7698, "step": 28810 }, { "epoch": 0.29, "learning_rate": 0.0005621763157894736, "loss": 0.7731, "step": 28820 }, { "epoch": 0.29, "learning_rate": 0.0005620973684210526, "loss": 0.7695, "step": 28830 }, { "epoch": 0.29, "learning_rate": 0.0005620184210526315, "loss": 0.7962, "step": 28840 }, { "epoch": 0.29, "learning_rate": 0.0005619394736842105, "loss": 0.7916, "step": 28850 }, { "epoch": 0.29, "learning_rate": 0.0005618605263157894, "loss": 0.7721, "step": 28860 }, { "epoch": 0.29, "learning_rate": 0.0005617815789473684, "loss": 0.7727, "step": 28870 }, { "epoch": 0.29, "learning_rate": 0.0005617026315789473, "loss": 0.7596, "step": 28880 }, { "epoch": 0.29, "learning_rate": 0.0005616236842105263, "loss": 0.779, "step": 28890 }, { "epoch": 0.29, "learning_rate": 0.0005615447368421052, "loss": 0.7699, "step": 28900 }, { "epoch": 0.29, "learning_rate": 0.0005614657894736842, "loss": 0.7563, "step": 28910 }, { "epoch": 0.29, "learning_rate": 0.0005613868421052631, "loss": 0.7691, "step": 28920 }, { "epoch": 0.29, "learning_rate": 0.0005613078947368421, "loss": 0.762, "step": 28930 }, { "epoch": 0.29, "learning_rate": 0.000561228947368421, "loss": 0.758, "step": 28940 }, { "epoch": 0.29, "learning_rate": 0.00056115, "loss": 0.7634, "step": 28950 }, { "epoch": 0.29, "learning_rate": 0.0005610710526315789, "loss": 0.7748, "step": 28960 }, { "epoch": 0.29, "learning_rate": 0.0005609921052631578, "loss": 0.7597, "step": 28970 }, { "epoch": 0.29, "learning_rate": 0.0005609131578947368, "loss": 0.7689, "step": 28980 }, { "epoch": 0.29, "learning_rate": 0.0005608342105263157, "loss": 0.7684, "step": 28990 }, { "epoch": 0.29, "learning_rate": 0.0005607552631578947, "loss": 0.7795, "step": 29000 }, { "epoch": 0.29, "learning_rate": 0.0005606763157894736, "loss": 0.7696, "step": 29010 }, { "epoch": 0.29, "learning_rate": 0.0005605973684210526, "loss": 0.7731, "step": 29020 }, { "epoch": 0.29, "learning_rate": 0.0005605184210526315, "loss": 0.7873, "step": 29030 }, { "epoch": 0.29, "learning_rate": 0.0005604394736842105, "loss": 0.7711, "step": 29040 }, { "epoch": 0.29, "learning_rate": 0.0005603605263157894, "loss": 0.7722, "step": 29050 }, { "epoch": 0.29, "learning_rate": 0.0005602815789473684, "loss": 0.7827, "step": 29060 }, { "epoch": 0.29, "learning_rate": 0.0005602026315789473, "loss": 0.7647, "step": 29070 }, { "epoch": 0.29, "learning_rate": 0.0005601236842105263, "loss": 0.7693, "step": 29080 }, { "epoch": 0.29, "learning_rate": 0.0005600447368421052, "loss": 0.7714, "step": 29090 }, { "epoch": 0.29, "learning_rate": 0.0005599657894736842, "loss": 0.777, "step": 29100 }, { "epoch": 0.29, "learning_rate": 0.0005598868421052631, "loss": 0.7623, "step": 29110 }, { "epoch": 0.29, "learning_rate": 0.0005598078947368421, "loss": 0.7806, "step": 29120 }, { "epoch": 0.29, "learning_rate": 0.000559728947368421, "loss": 0.7622, "step": 29130 }, { "epoch": 0.29, "learning_rate": 0.00055965, "loss": 0.7731, "step": 29140 }, { "epoch": 0.29, "learning_rate": 0.0005595710526315789, "loss": 0.7647, "step": 29150 }, { "epoch": 0.29, "learning_rate": 0.0005594921052631579, "loss": 0.787, "step": 29160 }, { "epoch": 0.29, "learning_rate": 0.0005594131578947368, "loss": 0.7716, "step": 29170 }, { "epoch": 0.29, "learning_rate": 0.0005593342105263158, "loss": 0.7641, "step": 29180 }, { "epoch": 0.29, "learning_rate": 0.0005592552631578947, "loss": 0.7624, "step": 29190 }, { "epoch": 0.29, "learning_rate": 0.0005591763157894737, "loss": 0.7725, "step": 29200 }, { "epoch": 0.29, "learning_rate": 0.0005590973684210526, "loss": 0.7749, "step": 29210 }, { "epoch": 0.29, "learning_rate": 0.0005590184210526316, "loss": 0.7672, "step": 29220 }, { "epoch": 0.29, "learning_rate": 0.0005589394736842105, "loss": 0.7671, "step": 29230 }, { "epoch": 0.29, "learning_rate": 0.0005588605263157895, "loss": 0.7642, "step": 29240 }, { "epoch": 0.29, "learning_rate": 0.0005587815789473684, "loss": 0.7693, "step": 29250 }, { "epoch": 0.29, "learning_rate": 0.0005587026315789473, "loss": 0.7706, "step": 29260 }, { "epoch": 0.29, "learning_rate": 0.0005586236842105262, "loss": 0.7733, "step": 29270 }, { "epoch": 0.29, "learning_rate": 0.0005585447368421052, "loss": 0.7648, "step": 29280 }, { "epoch": 0.29, "learning_rate": 0.0005584657894736842, "loss": 0.7538, "step": 29290 }, { "epoch": 0.29, "learning_rate": 0.0005583868421052631, "loss": 0.7698, "step": 29300 }, { "epoch": 0.29, "learning_rate": 0.0005583078947368421, "loss": 0.7615, "step": 29310 }, { "epoch": 0.29, "learning_rate": 0.000558228947368421, "loss": 0.7724, "step": 29320 }, { "epoch": 0.29, "learning_rate": 0.00055815, "loss": 0.745, "step": 29330 }, { "epoch": 0.29, "learning_rate": 0.0005580710526315788, "loss": 0.7809, "step": 29340 }, { "epoch": 0.29, "learning_rate": 0.0005579921052631579, "loss": 0.7832, "step": 29350 }, { "epoch": 0.29, "learning_rate": 0.0005579131578947367, "loss": 0.7819, "step": 29360 }, { "epoch": 0.29, "learning_rate": 0.0005578342105263158, "loss": 0.7809, "step": 29370 }, { "epoch": 0.29, "learning_rate": 0.0005577552631578947, "loss": 0.7701, "step": 29380 }, { "epoch": 0.29, "learning_rate": 0.0005576763157894737, "loss": 0.7612, "step": 29390 }, { "epoch": 0.29, "learning_rate": 0.0005575973684210526, "loss": 0.7629, "step": 29400 }, { "epoch": 0.29, "learning_rate": 0.0005575184210526315, "loss": 0.7665, "step": 29410 }, { "epoch": 0.29, "learning_rate": 0.0005574394736842105, "loss": 0.7607, "step": 29420 }, { "epoch": 0.29, "learning_rate": 0.0005573605263157894, "loss": 0.7558, "step": 29430 }, { "epoch": 0.29, "learning_rate": 0.0005572815789473684, "loss": 0.7546, "step": 29440 }, { "epoch": 0.29, "learning_rate": 0.0005572026315789473, "loss": 0.7674, "step": 29450 }, { "epoch": 0.29, "learning_rate": 0.0005571236842105263, "loss": 0.7651, "step": 29460 }, { "epoch": 0.29, "learning_rate": 0.0005570447368421053, "loss": 0.7576, "step": 29470 }, { "epoch": 0.29, "learning_rate": 0.0005569657894736841, "loss": 0.7815, "step": 29480 }, { "epoch": 0.29, "learning_rate": 0.0005568868421052632, "loss": 0.7652, "step": 29490 }, { "epoch": 0.29, "learning_rate": 0.000556807894736842, "loss": 0.7596, "step": 29500 }, { "epoch": 0.3, "learning_rate": 0.0005567289473684211, "loss": 0.7656, "step": 29510 }, { "epoch": 0.3, "learning_rate": 0.0005566499999999999, "loss": 0.7722, "step": 29520 }, { "epoch": 0.3, "learning_rate": 0.000556571052631579, "loss": 0.7622, "step": 29530 }, { "epoch": 0.3, "learning_rate": 0.0005564921052631578, "loss": 0.783, "step": 29540 }, { "epoch": 0.3, "learning_rate": 0.0005564131578947367, "loss": 0.756, "step": 29550 }, { "epoch": 0.3, "learning_rate": 0.0005563342105263158, "loss": 0.7565, "step": 29560 }, { "epoch": 0.3, "learning_rate": 0.0005562552631578946, "loss": 0.7852, "step": 29570 }, { "epoch": 0.3, "learning_rate": 0.0005561763157894737, "loss": 0.764, "step": 29580 }, { "epoch": 0.3, "learning_rate": 0.0005560973684210525, "loss": 0.7621, "step": 29590 }, { "epoch": 0.3, "learning_rate": 0.0005560184210526316, "loss": 0.773, "step": 29600 }, { "epoch": 0.3, "learning_rate": 0.0005559394736842104, "loss": 0.7667, "step": 29610 }, { "epoch": 0.3, "learning_rate": 0.0005558605263157895, "loss": 0.7601, "step": 29620 }, { "epoch": 0.3, "learning_rate": 0.0005557815789473683, "loss": 0.7701, "step": 29630 }, { "epoch": 0.3, "learning_rate": 0.0005557026315789473, "loss": 0.7738, "step": 29640 }, { "epoch": 0.3, "learning_rate": 0.0005556236842105263, "loss": 0.7708, "step": 29650 }, { "epoch": 0.3, "learning_rate": 0.0005555447368421052, "loss": 0.7586, "step": 29660 }, { "epoch": 0.3, "learning_rate": 0.0005554657894736842, "loss": 0.7645, "step": 29670 }, { "epoch": 0.3, "learning_rate": 0.0005553868421052631, "loss": 0.7619, "step": 29680 }, { "epoch": 0.3, "learning_rate": 0.0005553078947368421, "loss": 0.7617, "step": 29690 }, { "epoch": 0.3, "learning_rate": 0.000555228947368421, "loss": 0.7491, "step": 29700 }, { "epoch": 0.3, "learning_rate": 0.0005551499999999999, "loss": 0.767, "step": 29710 }, { "epoch": 0.3, "learning_rate": 0.0005550710526315789, "loss": 0.7461, "step": 29720 }, { "epoch": 0.3, "learning_rate": 0.0005549921052631578, "loss": 0.7609, "step": 29730 }, { "epoch": 0.3, "learning_rate": 0.0005549131578947369, "loss": 0.748, "step": 29740 }, { "epoch": 0.3, "learning_rate": 0.0005548342105263157, "loss": 0.7701, "step": 29750 }, { "epoch": 0.3, "learning_rate": 0.0005547631578947368, "loss": 0.7538, "step": 29760 }, { "epoch": 0.3, "learning_rate": 0.0005546842105263157, "loss": 0.7573, "step": 29770 }, { "epoch": 0.3, "learning_rate": 0.0005546052631578947, "loss": 0.7668, "step": 29780 }, { "epoch": 0.3, "learning_rate": 0.0005545263157894736, "loss": 0.7655, "step": 29790 }, { "epoch": 0.3, "learning_rate": 0.0005544473684210526, "loss": 0.7486, "step": 29800 }, { "epoch": 0.3, "learning_rate": 0.0005543684210526315, "loss": 0.7578, "step": 29810 }, { "epoch": 0.3, "learning_rate": 0.0005542894736842105, "loss": 0.7572, "step": 29820 }, { "epoch": 0.3, "learning_rate": 0.0005542105263157894, "loss": 0.7479, "step": 29830 }, { "epoch": 0.3, "learning_rate": 0.0005541315789473684, "loss": 0.7673, "step": 29840 }, { "epoch": 0.3, "learning_rate": 0.0005540526315789473, "loss": 0.7573, "step": 29850 }, { "epoch": 0.3, "learning_rate": 0.0005539736842105263, "loss": 0.7462, "step": 29860 }, { "epoch": 0.3, "learning_rate": 0.0005538947368421052, "loss": 0.7546, "step": 29870 }, { "epoch": 0.3, "learning_rate": 0.0005538157894736842, "loss": 0.7494, "step": 29880 }, { "epoch": 0.3, "learning_rate": 0.0005537368421052631, "loss": 0.7548, "step": 29890 }, { "epoch": 0.3, "learning_rate": 0.0005536578947368421, "loss": 0.7572, "step": 29900 }, { "epoch": 0.3, "learning_rate": 0.000553578947368421, "loss": 0.7564, "step": 29910 }, { "epoch": 0.3, "learning_rate": 0.0005535, "loss": 0.7634, "step": 29920 }, { "epoch": 0.3, "learning_rate": 0.0005534210526315789, "loss": 0.7591, "step": 29930 }, { "epoch": 0.3, "learning_rate": 0.0005533421052631579, "loss": 0.7549, "step": 29940 }, { "epoch": 0.3, "learning_rate": 0.0005532631578947368, "loss": 0.7569, "step": 29950 }, { "epoch": 0.3, "learning_rate": 0.0005531842105263157, "loss": 0.7466, "step": 29960 }, { "epoch": 0.3, "learning_rate": 0.0005531052631578947, "loss": 0.7431, "step": 29970 }, { "epoch": 0.3, "learning_rate": 0.0005530263157894736, "loss": 0.7572, "step": 29980 }, { "epoch": 0.3, "learning_rate": 0.0005529473684210526, "loss": 0.7676, "step": 29990 }, { "epoch": 0.3, "learning_rate": 0.0005528684210526315, "loss": 0.7634, "step": 30000 }, { "epoch": 0.3, "eval_accuracy": 0.8433683834141076, "eval_loss": 0.76123046875, "eval_runtime": 96.8175, "eval_samples_per_second": 826.297, "eval_steps_per_second": 1.622, "step": 30000 }, { "epoch": 0.3, "learning_rate": 0.0005527894736842105, "loss": 0.74, "step": 30010 }, { "epoch": 0.3, "learning_rate": 0.0005527105263157894, "loss": 0.762, "step": 30020 }, { "epoch": 0.3, "learning_rate": 0.0005526315789473684, "loss": 0.7723, "step": 30030 }, { "epoch": 0.3, "learning_rate": 0.0005525526315789473, "loss": 0.7572, "step": 30040 }, { "epoch": 0.3, "learning_rate": 0.0005524736842105263, "loss": 0.7527, "step": 30050 }, { "epoch": 0.3, "learning_rate": 0.0005523947368421052, "loss": 0.7593, "step": 30060 }, { "epoch": 0.3, "learning_rate": 0.0005523157894736842, "loss": 0.7476, "step": 30070 }, { "epoch": 0.3, "learning_rate": 0.0005522368421052631, "loss": 0.7554, "step": 30080 }, { "epoch": 0.3, "learning_rate": 0.0005521578947368421, "loss": 0.7492, "step": 30090 }, { "epoch": 0.3, "learning_rate": 0.000552078947368421, "loss": 0.7597, "step": 30100 }, { "epoch": 0.3, "learning_rate": 0.000552, "loss": 0.7607, "step": 30110 }, { "epoch": 0.3, "learning_rate": 0.0005519210526315789, "loss": 0.7455, "step": 30120 }, { "epoch": 0.3, "learning_rate": 0.0005518421052631579, "loss": 0.7545, "step": 30130 }, { "epoch": 0.3, "learning_rate": 0.0005517631578947368, "loss": 0.7592, "step": 30140 }, { "epoch": 0.3, "learning_rate": 0.0005516842105263158, "loss": 0.7563, "step": 30150 }, { "epoch": 0.3, "learning_rate": 0.0005516052631578947, "loss": 0.7479, "step": 30160 }, { "epoch": 0.3, "learning_rate": 0.0005515263157894737, "loss": 0.7569, "step": 30170 }, { "epoch": 0.3, "learning_rate": 0.0005514473684210526, "loss": 0.7524, "step": 30180 }, { "epoch": 0.3, "learning_rate": 0.0005513684210526316, "loss": 0.7568, "step": 30190 }, { "epoch": 0.3, "learning_rate": 0.0005512894736842105, "loss": 0.7635, "step": 30200 }, { "epoch": 0.3, "learning_rate": 0.0005512105263157895, "loss": 0.7526, "step": 30210 }, { "epoch": 0.3, "learning_rate": 0.0005511315789473684, "loss": 0.7605, "step": 30220 }, { "epoch": 0.3, "learning_rate": 0.0005510526315789474, "loss": 0.7517, "step": 30230 }, { "epoch": 0.3, "learning_rate": 0.0005509736842105263, "loss": 0.7414, "step": 30240 }, { "epoch": 0.3, "learning_rate": 0.0005508947368421051, "loss": 0.7578, "step": 30250 }, { "epoch": 0.3, "learning_rate": 0.0005508157894736842, "loss": 0.7417, "step": 30260 }, { "epoch": 0.3, "learning_rate": 0.000550736842105263, "loss": 0.7437, "step": 30270 }, { "epoch": 0.3, "learning_rate": 0.0005506578947368421, "loss": 0.7469, "step": 30280 }, { "epoch": 0.3, "learning_rate": 0.000550578947368421, "loss": 0.7589, "step": 30290 }, { "epoch": 0.3, "learning_rate": 0.0005505, "loss": 0.7563, "step": 30300 }, { "epoch": 0.3, "learning_rate": 0.0005504210526315789, "loss": 0.7491, "step": 30310 }, { "epoch": 0.3, "learning_rate": 0.0005503421052631578, "loss": 0.7615, "step": 30320 }, { "epoch": 0.3, "learning_rate": 0.0005502631578947368, "loss": 0.7601, "step": 30330 }, { "epoch": 0.3, "learning_rate": 0.0005501842105263157, "loss": 0.7411, "step": 30340 }, { "epoch": 0.3, "learning_rate": 0.0005501052631578947, "loss": 0.7549, "step": 30350 }, { "epoch": 0.3, "learning_rate": 0.0005500263157894736, "loss": 0.7653, "step": 30360 }, { "epoch": 0.3, "learning_rate": 0.0005499473684210526, "loss": 0.7534, "step": 30370 }, { "epoch": 0.3, "learning_rate": 0.0005498684210526316, "loss": 0.754, "step": 30380 }, { "epoch": 0.3, "learning_rate": 0.0005497894736842104, "loss": 0.7536, "step": 30390 }, { "epoch": 0.3, "learning_rate": 0.0005497105263157895, "loss": 0.7557, "step": 30400 }, { "epoch": 0.3, "learning_rate": 0.0005496315789473683, "loss": 0.7542, "step": 30410 }, { "epoch": 0.3, "learning_rate": 0.0005495526315789474, "loss": 0.761, "step": 30420 }, { "epoch": 0.3, "learning_rate": 0.0005494736842105262, "loss": 0.7696, "step": 30430 }, { "epoch": 0.3, "learning_rate": 0.0005493947368421053, "loss": 0.752, "step": 30440 }, { "epoch": 0.3, "learning_rate": 0.0005493157894736841, "loss": 0.7548, "step": 30450 }, { "epoch": 0.3, "learning_rate": 0.0005492368421052631, "loss": 0.7614, "step": 30460 }, { "epoch": 0.3, "learning_rate": 0.0005491578947368421, "loss": 0.7621, "step": 30470 }, { "epoch": 0.3, "learning_rate": 0.000549078947368421, "loss": 0.7664, "step": 30480 }, { "epoch": 0.3, "learning_rate": 0.000549, "loss": 0.7525, "step": 30490 }, { "epoch": 0.3, "learning_rate": 0.0005489210526315788, "loss": 0.7453, "step": 30500 }, { "epoch": 0.31, "learning_rate": 0.0005488421052631579, "loss": 0.7522, "step": 30510 }, { "epoch": 0.31, "learning_rate": 0.0005487631578947367, "loss": 0.7564, "step": 30520 }, { "epoch": 0.31, "learning_rate": 0.0005486842105263158, "loss": 0.772, "step": 30530 }, { "epoch": 0.31, "learning_rate": 0.0005486052631578946, "loss": 0.7477, "step": 30540 }, { "epoch": 0.31, "learning_rate": 0.0005485263157894736, "loss": 0.7515, "step": 30550 }, { "epoch": 0.31, "learning_rate": 0.0005484473684210526, "loss": 0.7445, "step": 30560 }, { "epoch": 0.31, "learning_rate": 0.0005483684210526315, "loss": 0.7702, "step": 30570 }, { "epoch": 0.31, "learning_rate": 0.0005482894736842105, "loss": 0.758, "step": 30580 }, { "epoch": 0.31, "learning_rate": 0.0005482105263157894, "loss": 0.7574, "step": 30590 }, { "epoch": 0.31, "learning_rate": 0.0005481315789473684, "loss": 0.7539, "step": 30600 }, { "epoch": 0.31, "learning_rate": 0.0005480526315789473, "loss": 0.7551, "step": 30610 }, { "epoch": 0.31, "learning_rate": 0.0005479736842105262, "loss": 0.7581, "step": 30620 }, { "epoch": 0.31, "learning_rate": 0.0005478947368421052, "loss": 0.7581, "step": 30630 }, { "epoch": 0.31, "learning_rate": 0.0005478157894736841, "loss": 0.7569, "step": 30640 }, { "epoch": 0.31, "learning_rate": 0.0005477368421052632, "loss": 0.7576, "step": 30650 }, { "epoch": 0.31, "learning_rate": 0.000547657894736842, "loss": 0.7551, "step": 30660 }, { "epoch": 0.31, "learning_rate": 0.0005475789473684211, "loss": 0.7675, "step": 30670 }, { "epoch": 0.31, "learning_rate": 0.0005474999999999999, "loss": 0.7667, "step": 30680 }, { "epoch": 0.31, "learning_rate": 0.0005474210526315789, "loss": 0.7623, "step": 30690 }, { "epoch": 0.31, "learning_rate": 0.0005473421052631578, "loss": 0.7515, "step": 30700 }, { "epoch": 0.31, "learning_rate": 0.0005472631578947368, "loss": 0.7656, "step": 30710 }, { "epoch": 0.31, "learning_rate": 0.0005471842105263157, "loss": 0.7542, "step": 30720 }, { "epoch": 0.31, "learning_rate": 0.0005471052631578947, "loss": 0.7649, "step": 30730 }, { "epoch": 0.31, "learning_rate": 0.0005470263157894737, "loss": 0.7544, "step": 30740 }, { "epoch": 0.31, "learning_rate": 0.0005469473684210526, "loss": 0.7633, "step": 30750 }, { "epoch": 0.31, "learning_rate": 0.0005468763157894736, "loss": 0.7596, "step": 30760 }, { "epoch": 0.31, "learning_rate": 0.0005467973684210526, "loss": 0.7561, "step": 30770 }, { "epoch": 0.31, "learning_rate": 0.0005467184210526315, "loss": 0.7612, "step": 30780 }, { "epoch": 0.31, "learning_rate": 0.0005466394736842105, "loss": 0.7541, "step": 30790 }, { "epoch": 0.31, "learning_rate": 0.0005465605263157894, "loss": 0.7617, "step": 30800 }, { "epoch": 0.31, "learning_rate": 0.0005464815789473684, "loss": 0.749, "step": 30810 }, { "epoch": 0.31, "learning_rate": 0.0005464026315789473, "loss": 0.7634, "step": 30820 }, { "epoch": 0.31, "learning_rate": 0.0005463236842105263, "loss": 0.7516, "step": 30830 }, { "epoch": 0.31, "learning_rate": 0.0005462447368421052, "loss": 0.7469, "step": 30840 }, { "epoch": 0.31, "learning_rate": 0.0005461657894736842, "loss": 0.7483, "step": 30850 }, { "epoch": 0.31, "learning_rate": 0.0005460868421052631, "loss": 0.7579, "step": 30860 }, { "epoch": 0.31, "learning_rate": 0.0005460078947368421, "loss": 0.7577, "step": 30870 }, { "epoch": 0.31, "learning_rate": 0.000545928947368421, "loss": 0.7598, "step": 30880 }, { "epoch": 0.31, "learning_rate": 0.00054585, "loss": 0.7574, "step": 30890 }, { "epoch": 0.31, "learning_rate": 0.0005457710526315789, "loss": 0.7502, "step": 30900 }, { "epoch": 0.31, "learning_rate": 0.0005456921052631579, "loss": 0.7667, "step": 30910 }, { "epoch": 0.31, "learning_rate": 0.0005456131578947368, "loss": 0.7648, "step": 30920 }, { "epoch": 0.31, "learning_rate": 0.0005455342105263158, "loss": 0.7654, "step": 30930 }, { "epoch": 0.31, "learning_rate": 0.0005454552631578947, "loss": 0.7654, "step": 30940 }, { "epoch": 0.31, "learning_rate": 0.0005453763157894736, "loss": 0.7554, "step": 30950 }, { "epoch": 0.31, "learning_rate": 0.0005452973684210526, "loss": 0.7673, "step": 30960 }, { "epoch": 0.31, "learning_rate": 0.0005452184210526315, "loss": 0.7704, "step": 30970 }, { "epoch": 0.31, "learning_rate": 0.0005451394736842105, "loss": 0.7606, "step": 30980 }, { "epoch": 0.31, "learning_rate": 0.0005450605263157894, "loss": 0.7612, "step": 30990 }, { "epoch": 0.31, "learning_rate": 0.0005449815789473684, "loss": 0.7536, "step": 31000 }, { "epoch": 0.31, "learning_rate": 0.0005449026315789473, "loss": 0.7804, "step": 31010 }, { "epoch": 0.31, "learning_rate": 0.0005448236842105263, "loss": 0.7685, "step": 31020 }, { "epoch": 0.31, "learning_rate": 0.0005447447368421052, "loss": 0.7722, "step": 31030 }, { "epoch": 0.31, "learning_rate": 0.0005446657894736842, "loss": 0.7534, "step": 31040 }, { "epoch": 0.31, "learning_rate": 0.0005445868421052631, "loss": 0.7505, "step": 31050 }, { "epoch": 0.31, "learning_rate": 0.0005445078947368421, "loss": 0.7667, "step": 31060 }, { "epoch": 0.31, "learning_rate": 0.000544428947368421, "loss": 0.7583, "step": 31070 }, { "epoch": 0.31, "learning_rate": 0.00054435, "loss": 0.7462, "step": 31080 }, { "epoch": 0.31, "learning_rate": 0.0005442710526315789, "loss": 0.7504, "step": 31090 }, { "epoch": 0.31, "learning_rate": 0.0005441921052631579, "loss": 0.7445, "step": 31100 }, { "epoch": 0.31, "learning_rate": 0.0005441131578947368, "loss": 0.757, "step": 31110 }, { "epoch": 0.31, "learning_rate": 0.0005440342105263158, "loss": 0.7521, "step": 31120 }, { "epoch": 0.31, "learning_rate": 0.0005439552631578947, "loss": 0.7544, "step": 31130 }, { "epoch": 0.31, "learning_rate": 0.0005438763157894737, "loss": 0.776, "step": 31140 }, { "epoch": 0.31, "learning_rate": 0.0005437973684210526, "loss": 0.7558, "step": 31150 }, { "epoch": 0.31, "learning_rate": 0.0005437184210526315, "loss": 0.7536, "step": 31160 }, { "epoch": 0.31, "learning_rate": 0.0005436394736842105, "loss": 0.7672, "step": 31170 }, { "epoch": 0.31, "learning_rate": 0.0005435605263157895, "loss": 0.7524, "step": 31180 }, { "epoch": 0.31, "learning_rate": 0.0005434815789473684, "loss": 0.7593, "step": 31190 }, { "epoch": 0.31, "learning_rate": 0.0005434026315789474, "loss": 0.7449, "step": 31200 }, { "epoch": 0.31, "learning_rate": 0.0005433236842105263, "loss": 0.7637, "step": 31210 }, { "epoch": 0.31, "learning_rate": 0.0005432447368421053, "loss": 0.749, "step": 31220 }, { "epoch": 0.31, "learning_rate": 0.0005431657894736841, "loss": 0.7534, "step": 31230 }, { "epoch": 0.31, "learning_rate": 0.0005430868421052631, "loss": 0.7537, "step": 31240 }, { "epoch": 0.31, "learning_rate": 0.000543007894736842, "loss": 0.7719, "step": 31250 }, { "epoch": 0.31, "learning_rate": 0.000542928947368421, "loss": 0.7491, "step": 31260 }, { "epoch": 0.31, "learning_rate": 0.00054285, "loss": 0.7525, "step": 31270 }, { "epoch": 0.31, "learning_rate": 0.0005427710526315789, "loss": 0.7667, "step": 31280 }, { "epoch": 0.31, "learning_rate": 0.0005426921052631579, "loss": 0.7592, "step": 31290 }, { "epoch": 0.31, "learning_rate": 0.0005426131578947367, "loss": 0.7334, "step": 31300 }, { "epoch": 0.31, "learning_rate": 0.0005425342105263158, "loss": 0.7551, "step": 31310 }, { "epoch": 0.31, "learning_rate": 0.0005424552631578946, "loss": 0.7591, "step": 31320 }, { "epoch": 0.31, "learning_rate": 0.0005423763157894737, "loss": 0.7398, "step": 31330 }, { "epoch": 0.31, "learning_rate": 0.0005422973684210525, "loss": 0.7528, "step": 31340 }, { "epoch": 0.31, "learning_rate": 0.0005422184210526316, "loss": 0.7515, "step": 31350 }, { "epoch": 0.31, "learning_rate": 0.0005421394736842104, "loss": 0.7617, "step": 31360 }, { "epoch": 0.31, "learning_rate": 0.0005420605263157894, "loss": 0.7424, "step": 31370 }, { "epoch": 0.31, "learning_rate": 0.0005419815789473684, "loss": 0.7467, "step": 31380 }, { "epoch": 0.31, "learning_rate": 0.0005419026315789473, "loss": 0.752, "step": 31390 }, { "epoch": 0.31, "learning_rate": 0.0005418236842105263, "loss": 0.7533, "step": 31400 }, { "epoch": 0.31, "learning_rate": 0.0005417447368421052, "loss": 0.7586, "step": 31410 }, { "epoch": 0.31, "learning_rate": 0.0005416657894736842, "loss": 0.7534, "step": 31420 }, { "epoch": 0.31, "learning_rate": 0.0005415868421052631, "loss": 0.7456, "step": 31430 }, { "epoch": 0.31, "learning_rate": 0.0005415078947368421, "loss": 0.7484, "step": 31440 }, { "epoch": 0.31, "learning_rate": 0.000541428947368421, "loss": 0.7525, "step": 31450 }, { "epoch": 0.31, "learning_rate": 0.0005413499999999999, "loss": 0.75, "step": 31460 }, { "epoch": 0.31, "learning_rate": 0.000541271052631579, "loss": 0.7527, "step": 31470 }, { "epoch": 0.31, "learning_rate": 0.0005411921052631578, "loss": 0.7389, "step": 31480 }, { "epoch": 0.31, "learning_rate": 0.0005411131578947369, "loss": 0.7251, "step": 31490 }, { "epoch": 0.32, "learning_rate": 0.0005410342105263157, "loss": 0.7385, "step": 31500 }, { "epoch": 0.32, "learning_rate": 0.0005409552631578948, "loss": 0.7557, "step": 31510 }, { "epoch": 0.32, "learning_rate": 0.0005408763157894736, "loss": 0.7681, "step": 31520 }, { "epoch": 0.32, "learning_rate": 0.0005407973684210525, "loss": 0.752, "step": 31530 }, { "epoch": 0.32, "learning_rate": 0.0005407184210526315, "loss": 0.7445, "step": 31540 }, { "epoch": 0.32, "learning_rate": 0.0005406394736842104, "loss": 0.7544, "step": 31550 }, { "epoch": 0.32, "learning_rate": 0.0005405605263157895, "loss": 0.7519, "step": 31560 }, { "epoch": 0.32, "learning_rate": 0.0005404815789473683, "loss": 0.7554, "step": 31570 }, { "epoch": 0.32, "learning_rate": 0.0005404026315789474, "loss": 0.7552, "step": 31580 }, { "epoch": 0.32, "learning_rate": 0.0005403236842105262, "loss": 0.7531, "step": 31590 }, { "epoch": 0.32, "learning_rate": 0.0005402447368421052, "loss": 0.7452, "step": 31600 }, { "epoch": 0.32, "learning_rate": 0.0005401657894736841, "loss": 0.7409, "step": 31610 }, { "epoch": 0.32, "learning_rate": 0.0005400868421052631, "loss": 0.7496, "step": 31620 }, { "epoch": 0.32, "learning_rate": 0.000540007894736842, "loss": 0.7526, "step": 31630 }, { "epoch": 0.32, "learning_rate": 0.000539928947368421, "loss": 0.7584, "step": 31640 }, { "epoch": 0.32, "learning_rate": 0.00053985, "loss": 0.7541, "step": 31650 }, { "epoch": 0.32, "learning_rate": 0.0005397710526315789, "loss": 0.7505, "step": 31660 }, { "epoch": 0.32, "learning_rate": 0.0005396921052631578, "loss": 0.7549, "step": 31670 }, { "epoch": 0.32, "learning_rate": 0.0005396131578947368, "loss": 0.7518, "step": 31680 }, { "epoch": 0.32, "learning_rate": 0.0005395342105263157, "loss": 0.7668, "step": 31690 }, { "epoch": 0.32, "learning_rate": 0.0005394552631578947, "loss": 0.7565, "step": 31700 }, { "epoch": 0.32, "learning_rate": 0.0005393763157894736, "loss": 0.7698, "step": 31710 }, { "epoch": 0.32, "learning_rate": 0.0005392973684210526, "loss": 0.7571, "step": 31720 }, { "epoch": 0.32, "learning_rate": 0.0005392184210526315, "loss": 0.7514, "step": 31730 }, { "epoch": 0.32, "learning_rate": 0.0005391394736842105, "loss": 0.7492, "step": 31740 }, { "epoch": 0.32, "learning_rate": 0.0005390605263157894, "loss": 0.7502, "step": 31750 }, { "epoch": 0.32, "learning_rate": 0.0005389894736842105, "loss": 0.7524, "step": 31760 }, { "epoch": 0.32, "learning_rate": 0.0005389105263157894, "loss": 0.7314, "step": 31770 }, { "epoch": 0.32, "learning_rate": 0.0005388315789473684, "loss": 0.7403, "step": 31780 }, { "epoch": 0.32, "learning_rate": 0.0005387526315789473, "loss": 0.7702, "step": 31790 }, { "epoch": 0.32, "learning_rate": 0.0005386736842105263, "loss": 0.7652, "step": 31800 }, { "epoch": 0.32, "learning_rate": 0.0005385947368421052, "loss": 0.7406, "step": 31810 }, { "epoch": 0.32, "learning_rate": 0.0005385157894736842, "loss": 0.7541, "step": 31820 }, { "epoch": 0.32, "learning_rate": 0.0005384368421052631, "loss": 0.7522, "step": 31830 }, { "epoch": 0.32, "learning_rate": 0.0005383578947368421, "loss": 0.7502, "step": 31840 }, { "epoch": 0.32, "learning_rate": 0.000538278947368421, "loss": 0.7463, "step": 31850 }, { "epoch": 0.32, "learning_rate": 0.0005382, "loss": 0.7564, "step": 31860 }, { "epoch": 0.32, "learning_rate": 0.0005381210526315789, "loss": 0.7583, "step": 31870 }, { "epoch": 0.32, "learning_rate": 0.0005380421052631579, "loss": 0.7582, "step": 31880 }, { "epoch": 0.32, "learning_rate": 0.0005379631578947368, "loss": 0.7337, "step": 31890 }, { "epoch": 0.32, "learning_rate": 0.0005378842105263158, "loss": 0.7439, "step": 31900 }, { "epoch": 0.32, "learning_rate": 0.0005378052631578947, "loss": 0.7558, "step": 31910 }, { "epoch": 0.32, "learning_rate": 0.0005377263157894737, "loss": 0.7434, "step": 31920 }, { "epoch": 0.32, "learning_rate": 0.0005376473684210526, "loss": 0.7456, "step": 31930 }, { "epoch": 0.32, "learning_rate": 0.0005375684210526315, "loss": 0.7559, "step": 31940 }, { "epoch": 0.32, "learning_rate": 0.0005374894736842105, "loss": 0.7459, "step": 31950 }, { "epoch": 0.32, "learning_rate": 0.0005374105263157894, "loss": 0.7421, "step": 31960 }, { "epoch": 0.32, "learning_rate": 0.0005373315789473684, "loss": 0.7134, "step": 31970 }, { "epoch": 0.32, "learning_rate": 0.0005372526315789473, "loss": 0.737, "step": 31980 }, { "epoch": 0.32, "learning_rate": 0.0005371736842105263, "loss": 0.7193, "step": 31990 }, { "epoch": 0.32, "learning_rate": 0.0005370947368421052, "loss": 0.7333, "step": 32000 }, { "epoch": 0.32, "learning_rate": 0.0005370157894736842, "loss": 0.7228, "step": 32010 }, { "epoch": 0.32, "learning_rate": 0.0005369368421052631, "loss": 0.7304, "step": 32020 }, { "epoch": 0.32, "learning_rate": 0.0005368578947368421, "loss": 0.7186, "step": 32030 }, { "epoch": 0.32, "learning_rate": 0.000536778947368421, "loss": 0.7269, "step": 32040 }, { "epoch": 0.32, "learning_rate": 0.0005367, "loss": 0.7097, "step": 32050 }, { "epoch": 0.32, "learning_rate": 0.0005366210526315789, "loss": 0.73, "step": 32060 }, { "epoch": 0.32, "learning_rate": 0.0005365421052631578, "loss": 0.7304, "step": 32070 }, { "epoch": 0.32, "learning_rate": 0.0005364631578947368, "loss": 0.7259, "step": 32080 }, { "epoch": 0.32, "learning_rate": 0.0005363842105263158, "loss": 0.7218, "step": 32090 }, { "epoch": 0.32, "learning_rate": 0.0005363052631578947, "loss": 0.7385, "step": 32100 }, { "epoch": 0.32, "learning_rate": 0.0005362263157894737, "loss": 0.7354, "step": 32110 }, { "epoch": 0.32, "learning_rate": 0.0005361473684210526, "loss": 0.7346, "step": 32120 }, { "epoch": 0.32, "learning_rate": 0.0005360684210526316, "loss": 0.73, "step": 32130 }, { "epoch": 0.32, "learning_rate": 0.0005359894736842104, "loss": 0.728, "step": 32140 }, { "epoch": 0.32, "learning_rate": 0.0005359105263157895, "loss": 0.7234, "step": 32150 }, { "epoch": 0.32, "learning_rate": 0.0005358315789473683, "loss": 0.7322, "step": 32160 }, { "epoch": 0.32, "learning_rate": 0.0005357526315789474, "loss": 0.726, "step": 32170 }, { "epoch": 0.32, "learning_rate": 0.0005356736842105263, "loss": 0.7364, "step": 32180 }, { "epoch": 0.32, "learning_rate": 0.0005355947368421053, "loss": 0.7312, "step": 32190 }, { "epoch": 0.32, "learning_rate": 0.0005355157894736842, "loss": 0.7171, "step": 32200 }, { "epoch": 0.32, "learning_rate": 0.000535436842105263, "loss": 0.72, "step": 32210 }, { "epoch": 0.32, "learning_rate": 0.0005353578947368421, "loss": 0.7491, "step": 32220 }, { "epoch": 0.32, "learning_rate": 0.0005352789473684209, "loss": 0.7367, "step": 32230 }, { "epoch": 0.32, "learning_rate": 0.0005352, "loss": 0.742, "step": 32240 }, { "epoch": 0.32, "learning_rate": 0.0005351210526315788, "loss": 0.7544, "step": 32250 }, { "epoch": 0.32, "learning_rate": 0.0005350421052631579, "loss": 0.7528, "step": 32260 }, { "epoch": 0.32, "learning_rate": 0.0005349631578947368, "loss": 0.7569, "step": 32270 }, { "epoch": 0.32, "learning_rate": 0.0005348842105263157, "loss": 0.7343, "step": 32280 }, { "epoch": 0.32, "learning_rate": 0.0005348052631578947, "loss": 0.7507, "step": 32290 }, { "epoch": 0.32, "learning_rate": 0.0005347263157894736, "loss": 0.7515, "step": 32300 }, { "epoch": 0.32, "learning_rate": 0.0005346473684210526, "loss": 0.7291, "step": 32310 }, { "epoch": 0.32, "learning_rate": 0.0005345684210526315, "loss": 0.7468, "step": 32320 }, { "epoch": 0.32, "learning_rate": 0.0005344894736842105, "loss": 0.7384, "step": 32330 }, { "epoch": 0.32, "learning_rate": 0.0005344105263157894, "loss": 0.7359, "step": 32340 }, { "epoch": 0.32, "learning_rate": 0.0005343315789473684, "loss": 0.7395, "step": 32350 }, { "epoch": 0.32, "learning_rate": 0.0005342526315789474, "loss": 0.7398, "step": 32360 }, { "epoch": 0.32, "learning_rate": 0.0005341736842105262, "loss": 0.7363, "step": 32370 }, { "epoch": 0.32, "learning_rate": 0.0005340947368421053, "loss": 0.7574, "step": 32380 }, { "epoch": 0.32, "learning_rate": 0.0005340157894736841, "loss": 0.7408, "step": 32390 }, { "epoch": 0.32, "learning_rate": 0.0005339368421052632, "loss": 0.7352, "step": 32400 }, { "epoch": 0.32, "learning_rate": 0.000533857894736842, "loss": 0.7276, "step": 32410 }, { "epoch": 0.32, "learning_rate": 0.0005337789473684211, "loss": 0.7376, "step": 32420 }, { "epoch": 0.32, "learning_rate": 0.0005336999999999999, "loss": 0.7443, "step": 32430 }, { "epoch": 0.32, "learning_rate": 0.0005336210526315789, "loss": 0.7413, "step": 32440 }, { "epoch": 0.32, "learning_rate": 0.0005335421052631578, "loss": 0.7386, "step": 32450 }, { "epoch": 0.32, "learning_rate": 0.0005334631578947368, "loss": 0.732, "step": 32460 }, { "epoch": 0.32, "learning_rate": 0.0005333842105263158, "loss": 0.7392, "step": 32470 }, { "epoch": 0.32, "learning_rate": 0.0005333052631578946, "loss": 0.7499, "step": 32480 }, { "epoch": 0.32, "learning_rate": 0.0005332263157894737, "loss": 0.7445, "step": 32490 }, { "epoch": 0.33, "learning_rate": 0.0005331473684210525, "loss": 0.7359, "step": 32500 }, { "epoch": 0.33, "eval_accuracy": 0.8473949658152091, "eval_loss": 0.73388671875, "eval_runtime": 97.0409, "eval_samples_per_second": 824.395, "eval_steps_per_second": 1.618, "step": 32500 }, { "epoch": 0.33, "learning_rate": 0.0005330684210526315, "loss": 0.7473, "step": 32510 }, { "epoch": 0.33, "learning_rate": 0.0005329894736842104, "loss": 0.7474, "step": 32520 }, { "epoch": 0.33, "learning_rate": 0.0005329105263157894, "loss": 0.7436, "step": 32530 }, { "epoch": 0.33, "learning_rate": 0.0005328315789473683, "loss": 0.7425, "step": 32540 }, { "epoch": 0.33, "learning_rate": 0.0005327526315789473, "loss": 0.7468, "step": 32550 }, { "epoch": 0.33, "learning_rate": 0.0005326736842105263, "loss": 0.7484, "step": 32560 }, { "epoch": 0.33, "learning_rate": 0.0005325947368421052, "loss": 0.7538, "step": 32570 }, { "epoch": 0.33, "learning_rate": 0.0005325157894736841, "loss": 0.7434, "step": 32580 }, { "epoch": 0.33, "learning_rate": 0.0005324368421052631, "loss": 0.7416, "step": 32590 }, { "epoch": 0.33, "learning_rate": 0.000532357894736842, "loss": 0.7532, "step": 32600 }, { "epoch": 0.33, "learning_rate": 0.000532278947368421, "loss": 0.7541, "step": 32610 }, { "epoch": 0.33, "learning_rate": 0.0005321999999999999, "loss": 0.7343, "step": 32620 }, { "epoch": 0.33, "learning_rate": 0.0005321210526315789, "loss": 0.7493, "step": 32630 }, { "epoch": 0.33, "learning_rate": 0.0005320421052631578, "loss": 0.7483, "step": 32640 }, { "epoch": 0.33, "learning_rate": 0.0005319631578947368, "loss": 0.7399, "step": 32650 }, { "epoch": 0.33, "learning_rate": 0.0005318842105263157, "loss": 0.7524, "step": 32660 }, { "epoch": 0.33, "learning_rate": 0.0005318052631578947, "loss": 0.7564, "step": 32670 }, { "epoch": 0.33, "learning_rate": 0.0005317263157894736, "loss": 0.7478, "step": 32680 }, { "epoch": 0.33, "learning_rate": 0.0005316473684210526, "loss": 0.7527, "step": 32690 }, { "epoch": 0.33, "learning_rate": 0.0005315684210526315, "loss": 0.738, "step": 32700 }, { "epoch": 0.33, "learning_rate": 0.0005314894736842105, "loss": 0.7476, "step": 32710 }, { "epoch": 0.33, "learning_rate": 0.0005314105263157894, "loss": 0.7487, "step": 32720 }, { "epoch": 0.33, "learning_rate": 0.0005313315789473684, "loss": 0.7353, "step": 32730 }, { "epoch": 0.33, "learning_rate": 0.0005312526315789473, "loss": 0.7482, "step": 32740 }, { "epoch": 0.33, "learning_rate": 0.0005311736842105263, "loss": 0.751, "step": 32750 }, { "epoch": 0.33, "learning_rate": 0.0005311026315789473, "loss": 0.7522, "step": 32760 }, { "epoch": 0.33, "learning_rate": 0.0005310236842105263, "loss": 0.7365, "step": 32770 }, { "epoch": 0.33, "learning_rate": 0.0005309447368421052, "loss": 0.7514, "step": 32780 }, { "epoch": 0.33, "learning_rate": 0.0005308657894736842, "loss": 0.7545, "step": 32790 }, { "epoch": 0.33, "learning_rate": 0.0005307868421052631, "loss": 0.7517, "step": 32800 }, { "epoch": 0.33, "learning_rate": 0.0005307078947368421, "loss": 0.746, "step": 32810 }, { "epoch": 0.33, "learning_rate": 0.000530628947368421, "loss": 0.743, "step": 32820 }, { "epoch": 0.33, "learning_rate": 0.00053055, "loss": 0.7451, "step": 32830 }, { "epoch": 0.33, "learning_rate": 0.0005304710526315789, "loss": 0.7388, "step": 32840 }, { "epoch": 0.33, "learning_rate": 0.0005303921052631579, "loss": 0.7551, "step": 32850 }, { "epoch": 0.33, "learning_rate": 0.0005303131578947368, "loss": 0.7417, "step": 32860 }, { "epoch": 0.33, "learning_rate": 0.0005302342105263158, "loss": 0.75, "step": 32870 }, { "epoch": 0.33, "learning_rate": 0.0005301552631578947, "loss": 0.7354, "step": 32880 }, { "epoch": 0.33, "learning_rate": 0.0005300763157894737, "loss": 0.7264, "step": 32890 }, { "epoch": 0.33, "learning_rate": 0.0005299973684210526, "loss": 0.7326, "step": 32900 }, { "epoch": 0.33, "learning_rate": 0.0005299184210526316, "loss": 0.7509, "step": 32910 }, { "epoch": 0.33, "learning_rate": 0.0005298394736842105, "loss": 0.7294, "step": 32920 }, { "epoch": 0.33, "learning_rate": 0.0005297605263157894, "loss": 0.7486, "step": 32930 }, { "epoch": 0.33, "learning_rate": 0.0005296815789473684, "loss": 0.7352, "step": 32940 }, { "epoch": 0.33, "learning_rate": 0.0005296026315789473, "loss": 0.7414, "step": 32950 }, { "epoch": 0.33, "learning_rate": 0.0005295236842105263, "loss": 0.7458, "step": 32960 }, { "epoch": 0.33, "learning_rate": 0.0005294447368421052, "loss": 0.7415, "step": 32970 }, { "epoch": 0.33, "learning_rate": 0.0005293657894736841, "loss": 0.7358, "step": 32980 }, { "epoch": 0.33, "learning_rate": 0.0005292868421052631, "loss": 0.7365, "step": 32990 }, { "epoch": 0.33, "learning_rate": 0.0005292078947368421, "loss": 0.7359, "step": 33000 }, { "epoch": 0.33, "learning_rate": 0.000529128947368421, "loss": 0.7468, "step": 33010 }, { "epoch": 0.33, "learning_rate": 0.00052905, "loss": 0.742, "step": 33020 }, { "epoch": 0.33, "learning_rate": 0.0005289710526315789, "loss": 0.7374, "step": 33030 }, { "epoch": 0.33, "learning_rate": 0.0005288921052631579, "loss": 0.7457, "step": 33040 }, { "epoch": 0.33, "learning_rate": 0.0005288131578947367, "loss": 0.7384, "step": 33050 }, { "epoch": 0.33, "learning_rate": 0.0005287342105263158, "loss": 0.7441, "step": 33060 }, { "epoch": 0.33, "learning_rate": 0.0005286552631578946, "loss": 0.747, "step": 33070 }, { "epoch": 0.33, "learning_rate": 0.0005285763157894737, "loss": 0.7361, "step": 33080 }, { "epoch": 0.33, "learning_rate": 0.0005284973684210526, "loss": 0.7278, "step": 33090 }, { "epoch": 0.33, "learning_rate": 0.0005284184210526316, "loss": 0.7421, "step": 33100 }, { "epoch": 0.33, "learning_rate": 0.0005283394736842105, "loss": 0.7444, "step": 33110 }, { "epoch": 0.33, "learning_rate": 0.0005282605263157894, "loss": 0.7428, "step": 33120 }, { "epoch": 0.33, "learning_rate": 0.0005281815789473684, "loss": 0.7315, "step": 33130 }, { "epoch": 0.33, "learning_rate": 0.0005281026315789473, "loss": 0.7349, "step": 33140 }, { "epoch": 0.33, "learning_rate": 0.0005280236842105263, "loss": 0.7473, "step": 33150 }, { "epoch": 0.33, "learning_rate": 0.0005279447368421052, "loss": 0.7376, "step": 33160 }, { "epoch": 0.33, "learning_rate": 0.0005278657894736842, "loss": 0.7405, "step": 33170 }, { "epoch": 0.33, "learning_rate": 0.0005277868421052632, "loss": 0.7406, "step": 33180 }, { "epoch": 0.33, "learning_rate": 0.0005277078947368421, "loss": 0.7355, "step": 33190 }, { "epoch": 0.33, "learning_rate": 0.000527628947368421, "loss": 0.7402, "step": 33200 }, { "epoch": 0.33, "learning_rate": 0.0005275499999999999, "loss": 0.7363, "step": 33210 }, { "epoch": 0.33, "learning_rate": 0.000527471052631579, "loss": 0.7324, "step": 33220 }, { "epoch": 0.33, "learning_rate": 0.0005273921052631578, "loss": 0.7352, "step": 33230 }, { "epoch": 0.33, "learning_rate": 0.0005273131578947368, "loss": 0.7458, "step": 33240 }, { "epoch": 0.33, "learning_rate": 0.0005272342105263157, "loss": 0.743, "step": 33250 }, { "epoch": 0.33, "learning_rate": 0.0005271552631578947, "loss": 0.7454, "step": 33260 }, { "epoch": 0.33, "learning_rate": 0.0005270763157894737, "loss": 0.7403, "step": 33270 }, { "epoch": 0.33, "learning_rate": 0.0005269973684210525, "loss": 0.7582, "step": 33280 }, { "epoch": 0.33, "learning_rate": 0.0005269184210526316, "loss": 0.7467, "step": 33290 }, { "epoch": 0.33, "learning_rate": 0.0005268394736842104, "loss": 0.7244, "step": 33300 }, { "epoch": 0.33, "learning_rate": 0.0005267605263157895, "loss": 0.7226, "step": 33310 }, { "epoch": 0.33, "learning_rate": 0.0005266815789473683, "loss": 0.7257, "step": 33320 }, { "epoch": 0.33, "learning_rate": 0.0005266026315789474, "loss": 0.7287, "step": 33330 }, { "epoch": 0.33, "learning_rate": 0.0005265236842105262, "loss": 0.716, "step": 33340 }, { "epoch": 0.33, "learning_rate": 0.0005264447368421052, "loss": 0.7227, "step": 33350 }, { "epoch": 0.33, "learning_rate": 0.0005263657894736842, "loss": 0.746, "step": 33360 }, { "epoch": 0.33, "learning_rate": 0.0005262868421052631, "loss": 0.7369, "step": 33370 }, { "epoch": 0.33, "learning_rate": 0.0005262078947368421, "loss": 0.7387, "step": 33380 }, { "epoch": 0.33, "learning_rate": 0.000526128947368421, "loss": 0.7452, "step": 33390 }, { "epoch": 0.33, "learning_rate": 0.00052605, "loss": 0.7576, "step": 33400 }, { "epoch": 0.33, "learning_rate": 0.0005259710526315789, "loss": 0.7297, "step": 33410 }, { "epoch": 0.33, "learning_rate": 0.0005258921052631578, "loss": 0.7438, "step": 33420 }, { "epoch": 0.33, "learning_rate": 0.0005258131578947368, "loss": 0.7385, "step": 33430 }, { "epoch": 0.33, "learning_rate": 0.0005257342105263157, "loss": 0.737, "step": 33440 }, { "epoch": 0.33, "learning_rate": 0.0005256552631578948, "loss": 0.7463, "step": 33450 }, { "epoch": 0.33, "learning_rate": 0.0005255763157894736, "loss": 0.7375, "step": 33460 }, { "epoch": 0.33, "learning_rate": 0.0005254973684210527, "loss": 0.7349, "step": 33470 }, { "epoch": 0.33, "learning_rate": 0.0005254184210526315, "loss": 0.7489, "step": 33480 }, { "epoch": 0.33, "learning_rate": 0.0005253394736842104, "loss": 0.717, "step": 33490 }, { "epoch": 0.34, "learning_rate": 0.0005252605263157894, "loss": 0.7439, "step": 33500 }, { "epoch": 0.34, "learning_rate": 0.0005251815789473683, "loss": 0.7317, "step": 33510 }, { "epoch": 0.34, "learning_rate": 0.0005251026315789473, "loss": 0.7183, "step": 33520 }, { "epoch": 0.34, "learning_rate": 0.0005250236842105262, "loss": 0.7291, "step": 33530 }, { "epoch": 0.34, "learning_rate": 0.0005249447368421052, "loss": 0.7308, "step": 33540 }, { "epoch": 0.34, "learning_rate": 0.0005248657894736841, "loss": 0.732, "step": 33550 }, { "epoch": 0.34, "learning_rate": 0.0005247868421052632, "loss": 0.7272, "step": 33560 }, { "epoch": 0.34, "learning_rate": 0.000524707894736842, "loss": 0.714, "step": 33570 }, { "epoch": 0.34, "learning_rate": 0.000524628947368421, "loss": 0.7142, "step": 33580 }, { "epoch": 0.34, "learning_rate": 0.0005245499999999999, "loss": 0.7135, "step": 33590 }, { "epoch": 0.34, "learning_rate": 0.0005244710526315789, "loss": 0.7322, "step": 33600 }, { "epoch": 0.34, "learning_rate": 0.0005243921052631578, "loss": 0.7229, "step": 33610 }, { "epoch": 0.34, "learning_rate": 0.0005243131578947368, "loss": 0.7302, "step": 33620 }, { "epoch": 0.34, "learning_rate": 0.0005242342105263157, "loss": 0.7106, "step": 33630 }, { "epoch": 0.34, "learning_rate": 0.0005241552631578947, "loss": 0.7233, "step": 33640 }, { "epoch": 0.34, "learning_rate": 0.0005240763157894736, "loss": 0.7127, "step": 33650 }, { "epoch": 0.34, "learning_rate": 0.0005239973684210526, "loss": 0.7349, "step": 33660 }, { "epoch": 0.34, "learning_rate": 0.0005239184210526315, "loss": 0.7202, "step": 33670 }, { "epoch": 0.34, "learning_rate": 0.0005238394736842105, "loss": 0.7272, "step": 33680 }, { "epoch": 0.34, "learning_rate": 0.0005237605263157894, "loss": 0.7234, "step": 33690 }, { "epoch": 0.34, "learning_rate": 0.0005236815789473684, "loss": 0.7197, "step": 33700 }, { "epoch": 0.34, "learning_rate": 0.0005236026315789473, "loss": 0.7289, "step": 33710 }, { "epoch": 0.34, "learning_rate": 0.0005235236842105263, "loss": 0.7314, "step": 33720 }, { "epoch": 0.34, "learning_rate": 0.0005234447368421052, "loss": 0.7327, "step": 33730 }, { "epoch": 0.34, "learning_rate": 0.0005233657894736842, "loss": 0.7303, "step": 33740 }, { "epoch": 0.34, "learning_rate": 0.0005232868421052631, "loss": 0.7394, "step": 33750 }, { "epoch": 0.34, "learning_rate": 0.0005232157894736842, "loss": 0.738, "step": 33760 }, { "epoch": 0.34, "learning_rate": 0.0005231368421052631, "loss": 0.7384, "step": 33770 }, { "epoch": 0.34, "learning_rate": 0.0005230578947368421, "loss": 0.7299, "step": 33780 }, { "epoch": 0.34, "learning_rate": 0.000522978947368421, "loss": 0.7335, "step": 33790 }, { "epoch": 0.34, "learning_rate": 0.0005229, "loss": 0.7289, "step": 33800 }, { "epoch": 0.34, "learning_rate": 0.0005228210526315789, "loss": 0.7436, "step": 33810 }, { "epoch": 0.34, "learning_rate": 0.0005227421052631579, "loss": 0.7481, "step": 33820 }, { "epoch": 0.34, "learning_rate": 0.0005226631578947368, "loss": 0.7415, "step": 33830 }, { "epoch": 0.34, "learning_rate": 0.0005225842105263158, "loss": 0.7187, "step": 33840 }, { "epoch": 0.34, "learning_rate": 0.0005225052631578947, "loss": 0.7431, "step": 33850 }, { "epoch": 0.34, "learning_rate": 0.0005224263157894737, "loss": 0.7364, "step": 33860 }, { "epoch": 0.34, "learning_rate": 0.0005223473684210526, "loss": 0.7437, "step": 33870 }, { "epoch": 0.34, "learning_rate": 0.0005222684210526316, "loss": 0.7448, "step": 33880 }, { "epoch": 0.34, "learning_rate": 0.0005221894736842105, "loss": 0.7319, "step": 33890 }, { "epoch": 0.34, "learning_rate": 0.0005221105263157895, "loss": 0.7297, "step": 33900 }, { "epoch": 0.34, "learning_rate": 0.0005220315789473684, "loss": 0.7319, "step": 33910 }, { "epoch": 0.34, "learning_rate": 0.0005219526315789474, "loss": 0.7375, "step": 33920 }, { "epoch": 0.34, "learning_rate": 0.0005218736842105263, "loss": 0.7316, "step": 33930 }, { "epoch": 0.34, "learning_rate": 0.0005217947368421052, "loss": 0.7477, "step": 33940 }, { "epoch": 0.34, "learning_rate": 0.0005217157894736842, "loss": 0.7337, "step": 33950 }, { "epoch": 0.34, "learning_rate": 0.000521636842105263, "loss": 0.7414, "step": 33960 }, { "epoch": 0.34, "learning_rate": 0.0005215578947368421, "loss": 0.7522, "step": 33970 }, { "epoch": 0.34, "learning_rate": 0.0005214789473684209, "loss": 0.7463, "step": 33980 }, { "epoch": 0.34, "learning_rate": 0.0005214, "loss": 0.7378, "step": 33990 }, { "epoch": 0.34, "learning_rate": 0.0005213210526315789, "loss": 0.7408, "step": 34000 }, { "epoch": 0.34, "learning_rate": 0.0005212421052631579, "loss": 0.7547, "step": 34010 }, { "epoch": 0.34, "learning_rate": 0.0005211631578947368, "loss": 0.742, "step": 34020 }, { "epoch": 0.34, "learning_rate": 0.0005210842105263157, "loss": 0.7354, "step": 34030 }, { "epoch": 0.34, "learning_rate": 0.0005210052631578947, "loss": 0.7217, "step": 34040 }, { "epoch": 0.34, "learning_rate": 0.0005209263157894736, "loss": 0.7308, "step": 34050 }, { "epoch": 0.34, "learning_rate": 0.0005208473684210526, "loss": 0.7299, "step": 34060 }, { "epoch": 0.34, "learning_rate": 0.0005207684210526315, "loss": 0.7335, "step": 34070 }, { "epoch": 0.34, "learning_rate": 0.0005206894736842105, "loss": 0.7334, "step": 34080 }, { "epoch": 0.34, "learning_rate": 0.0005206105263157895, "loss": 0.7333, "step": 34090 }, { "epoch": 0.34, "learning_rate": 0.0005205315789473684, "loss": 0.7233, "step": 34100 }, { "epoch": 0.34, "learning_rate": 0.0005204526315789474, "loss": 0.7271, "step": 34110 }, { "epoch": 0.34, "learning_rate": 0.0005203736842105262, "loss": 0.7459, "step": 34120 }, { "epoch": 0.34, "learning_rate": 0.0005202947368421053, "loss": 0.7334, "step": 34130 }, { "epoch": 0.34, "learning_rate": 0.0005202157894736841, "loss": 0.7251, "step": 34140 }, { "epoch": 0.34, "learning_rate": 0.0005201368421052632, "loss": 0.7363, "step": 34150 }, { "epoch": 0.34, "learning_rate": 0.000520057894736842, "loss": 0.7462, "step": 34160 }, { "epoch": 0.34, "learning_rate": 0.0005199789473684211, "loss": 0.7539, "step": 34170 }, { "epoch": 0.34, "learning_rate": 0.0005199, "loss": 0.7338, "step": 34180 }, { "epoch": 0.34, "learning_rate": 0.0005198210526315788, "loss": 0.7268, "step": 34190 }, { "epoch": 0.34, "learning_rate": 0.0005197421052631579, "loss": 0.7311, "step": 34200 }, { "epoch": 0.34, "learning_rate": 0.0005196631578947367, "loss": 0.7339, "step": 34210 }, { "epoch": 0.34, "learning_rate": 0.0005195842105263158, "loss": 0.7382, "step": 34220 }, { "epoch": 0.34, "learning_rate": 0.0005195052631578946, "loss": 0.7358, "step": 34230 }, { "epoch": 0.34, "learning_rate": 0.0005194263157894737, "loss": 0.7483, "step": 34240 }, { "epoch": 0.34, "learning_rate": 0.0005193473684210525, "loss": 0.7288, "step": 34250 }, { "epoch": 0.34, "learning_rate": 0.0005192684210526315, "loss": 0.7244, "step": 34260 }, { "epoch": 0.34, "learning_rate": 0.0005191894736842105, "loss": 0.7238, "step": 34270 }, { "epoch": 0.34, "learning_rate": 0.0005191105263157894, "loss": 0.7276, "step": 34280 }, { "epoch": 0.34, "learning_rate": 0.0005190315789473684, "loss": 0.7254, "step": 34290 }, { "epoch": 0.34, "learning_rate": 0.0005189526315789473, "loss": 0.7365, "step": 34300 }, { "epoch": 0.34, "learning_rate": 0.0005188736842105263, "loss": 0.7291, "step": 34310 }, { "epoch": 0.34, "learning_rate": 0.0005187947368421052, "loss": 0.7361, "step": 34320 }, { "epoch": 0.34, "learning_rate": 0.0005187157894736841, "loss": 0.7284, "step": 34330 }, { "epoch": 0.34, "learning_rate": 0.0005186368421052631, "loss": 0.735, "step": 34340 }, { "epoch": 0.34, "learning_rate": 0.000518557894736842, "loss": 0.7269, "step": 34350 }, { "epoch": 0.34, "learning_rate": 0.0005184789473684211, "loss": 0.7336, "step": 34360 }, { "epoch": 0.34, "learning_rate": 0.0005183999999999999, "loss": 0.7274, "step": 34370 }, { "epoch": 0.34, "learning_rate": 0.000518321052631579, "loss": 0.7402, "step": 34380 }, { "epoch": 0.34, "learning_rate": 0.0005182421052631578, "loss": 0.7573, "step": 34390 }, { "epoch": 0.34, "learning_rate": 0.0005181631578947368, "loss": 0.7416, "step": 34400 }, { "epoch": 0.34, "learning_rate": 0.0005180842105263157, "loss": 0.7312, "step": 34410 }, { "epoch": 0.34, "learning_rate": 0.0005180052631578947, "loss": 0.7386, "step": 34420 }, { "epoch": 0.34, "learning_rate": 0.0005179263157894736, "loss": 0.7324, "step": 34430 }, { "epoch": 0.34, "learning_rate": 0.0005178473684210526, "loss": 0.7301, "step": 34440 }, { "epoch": 0.34, "learning_rate": 0.0005177684210526316, "loss": 0.7395, "step": 34450 }, { "epoch": 0.34, "learning_rate": 0.0005176894736842105, "loss": 0.7325, "step": 34460 }, { "epoch": 0.34, "learning_rate": 0.0005176105263157895, "loss": 0.7335, "step": 34470 }, { "epoch": 0.34, "learning_rate": 0.0005175315789473683, "loss": 0.7278, "step": 34480 }, { "epoch": 0.34, "learning_rate": 0.0005174526315789473, "loss": 0.7283, "step": 34490 }, { "epoch": 0.34, "learning_rate": 0.0005173736842105262, "loss": 0.7152, "step": 34500 }, { "epoch": 0.35, "learning_rate": 0.0005172947368421052, "loss": 0.7211, "step": 34510 }, { "epoch": 0.35, "learning_rate": 0.0005172157894736841, "loss": 0.7264, "step": 34520 }, { "epoch": 0.35, "learning_rate": 0.0005171368421052631, "loss": 0.7357, "step": 34530 }, { "epoch": 0.35, "learning_rate": 0.0005170578947368421, "loss": 0.724, "step": 34540 }, { "epoch": 0.35, "learning_rate": 0.000516978947368421, "loss": 0.7043, "step": 34550 }, { "epoch": 0.35, "learning_rate": 0.0005168999999999999, "loss": 0.7032, "step": 34560 }, { "epoch": 0.35, "learning_rate": 0.0005168210526315789, "loss": 0.7127, "step": 34570 }, { "epoch": 0.35, "learning_rate": 0.0005167421052631578, "loss": 0.7052, "step": 34580 }, { "epoch": 0.35, "learning_rate": 0.0005166631578947368, "loss": 0.7144, "step": 34590 }, { "epoch": 0.35, "learning_rate": 0.0005165842105263157, "loss": 0.7111, "step": 34600 }, { "epoch": 0.35, "learning_rate": 0.0005165052631578947, "loss": 0.7303, "step": 34610 }, { "epoch": 0.35, "learning_rate": 0.0005164263157894736, "loss": 0.7421, "step": 34620 }, { "epoch": 0.35, "learning_rate": 0.0005163473684210526, "loss": 0.7404, "step": 34630 }, { "epoch": 0.35, "learning_rate": 0.0005162684210526315, "loss": 0.7299, "step": 34640 }, { "epoch": 0.35, "learning_rate": 0.0005161894736842105, "loss": 0.7335, "step": 34650 }, { "epoch": 0.35, "learning_rate": 0.0005161105263157894, "loss": 0.7416, "step": 34660 }, { "epoch": 0.35, "learning_rate": 0.0005160315789473684, "loss": 0.7242, "step": 34670 }, { "epoch": 0.35, "learning_rate": 0.0005159526315789473, "loss": 0.7375, "step": 34680 }, { "epoch": 0.35, "learning_rate": 0.0005158736842105263, "loss": 0.7385, "step": 34690 }, { "epoch": 0.35, "learning_rate": 0.0005157947368421052, "loss": 0.7338, "step": 34700 }, { "epoch": 0.35, "learning_rate": 0.0005157157894736842, "loss": 0.7183, "step": 34710 }, { "epoch": 0.35, "learning_rate": 0.0005156368421052631, "loss": 0.7195, "step": 34720 }, { "epoch": 0.35, "learning_rate": 0.0005155578947368421, "loss": 0.729, "step": 34730 }, { "epoch": 0.35, "learning_rate": 0.000515478947368421, "loss": 0.7371, "step": 34740 }, { "epoch": 0.35, "learning_rate": 0.0005154, "loss": 0.7364, "step": 34750 }, { "epoch": 0.35, "learning_rate": 0.0005153210526315789, "loss": 0.7323, "step": 34760 }, { "epoch": 0.35, "learning_rate": 0.00051525, "loss": 0.7454, "step": 34770 }, { "epoch": 0.35, "learning_rate": 0.0005151710526315789, "loss": 0.7339, "step": 34780 }, { "epoch": 0.35, "learning_rate": 0.0005150921052631579, "loss": 0.7392, "step": 34790 }, { "epoch": 0.35, "learning_rate": 0.0005150131578947368, "loss": 0.7508, "step": 34800 }, { "epoch": 0.35, "learning_rate": 0.0005149342105263158, "loss": 0.7465, "step": 34810 }, { "epoch": 0.35, "learning_rate": 0.0005148552631578947, "loss": 0.7408, "step": 34820 }, { "epoch": 0.35, "learning_rate": 0.0005147763157894737, "loss": 0.7322, "step": 34830 }, { "epoch": 0.35, "learning_rate": 0.0005146973684210526, "loss": 0.7364, "step": 34840 }, { "epoch": 0.35, "learning_rate": 0.0005146184210526316, "loss": 0.7434, "step": 34850 }, { "epoch": 0.35, "learning_rate": 0.0005145394736842105, "loss": 0.7299, "step": 34860 }, { "epoch": 0.35, "learning_rate": 0.0005144605263157893, "loss": 0.7336, "step": 34870 }, { "epoch": 0.35, "learning_rate": 0.0005143815789473684, "loss": 0.748, "step": 34880 }, { "epoch": 0.35, "learning_rate": 0.0005143026315789474, "loss": 0.7367, "step": 34890 }, { "epoch": 0.35, "learning_rate": 0.0005142236842105263, "loss": 0.7221, "step": 34900 }, { "epoch": 0.35, "learning_rate": 0.0005141447368421053, "loss": 0.7281, "step": 34910 }, { "epoch": 0.35, "learning_rate": 0.0005140657894736842, "loss": 0.7425, "step": 34920 }, { "epoch": 0.35, "learning_rate": 0.0005139868421052631, "loss": 0.7206, "step": 34930 }, { "epoch": 0.35, "learning_rate": 0.000513907894736842, "loss": 0.7306, "step": 34940 }, { "epoch": 0.35, "learning_rate": 0.000513828947368421, "loss": 0.7212, "step": 34950 }, { "epoch": 0.35, "learning_rate": 0.0005137499999999999, "loss": 0.7291, "step": 34960 }, { "epoch": 0.35, "learning_rate": 0.0005136710526315789, "loss": 0.7288, "step": 34970 }, { "epoch": 0.35, "learning_rate": 0.0005135921052631579, "loss": 0.7239, "step": 34980 }, { "epoch": 0.35, "learning_rate": 0.0005135131578947368, "loss": 0.713, "step": 34990 }, { "epoch": 0.35, "learning_rate": 0.0005134342105263158, "loss": 0.7263, "step": 35000 }, { "epoch": 0.35, "eval_accuracy": 0.8494970666829337, "eval_loss": 0.72412109375, "eval_runtime": 96.4579, "eval_samples_per_second": 829.378, "eval_steps_per_second": 1.628, "step": 35000 }, { "epoch": 0.35, "learning_rate": 0.0005133552631578947, "loss": 0.7198, "step": 35010 }, { "epoch": 0.35, "learning_rate": 0.0005132763157894737, "loss": 0.7359, "step": 35020 }, { "epoch": 0.35, "learning_rate": 0.0005132052631578946, "loss": 0.7295, "step": 35030 }, { "epoch": 0.35, "learning_rate": 0.0005131263157894736, "loss": 0.7355, "step": 35040 }, { "epoch": 0.35, "learning_rate": 0.0005130473684210525, "loss": 0.7266, "step": 35050 }, { "epoch": 0.35, "learning_rate": 0.0005129684210526316, "loss": 0.7277, "step": 35060 }, { "epoch": 0.35, "learning_rate": 0.0005128894736842104, "loss": 0.7344, "step": 35070 }, { "epoch": 0.35, "learning_rate": 0.0005128105263157895, "loss": 0.716, "step": 35080 }, { "epoch": 0.35, "learning_rate": 0.0005127315789473683, "loss": 0.7256, "step": 35090 }, { "epoch": 0.35, "learning_rate": 0.0005126526315789473, "loss": 0.7139, "step": 35100 }, { "epoch": 0.35, "learning_rate": 0.0005125736842105262, "loss": 0.7295, "step": 35110 }, { "epoch": 0.35, "learning_rate": 0.0005124947368421052, "loss": 0.7233, "step": 35120 }, { "epoch": 0.35, "learning_rate": 0.0005124157894736841, "loss": 0.7247, "step": 35130 }, { "epoch": 0.35, "learning_rate": 0.0005123368421052631, "loss": 0.7187, "step": 35140 }, { "epoch": 0.35, "learning_rate": 0.0005122578947368421, "loss": 0.73, "step": 35150 }, { "epoch": 0.35, "learning_rate": 0.000512178947368421, "loss": 0.7323, "step": 35160 }, { "epoch": 0.35, "learning_rate": 0.0005120999999999999, "loss": 0.725, "step": 35170 }, { "epoch": 0.35, "learning_rate": 0.0005120210526315789, "loss": 0.7245, "step": 35180 }, { "epoch": 0.35, "learning_rate": 0.0005119421052631578, "loss": 0.7183, "step": 35190 }, { "epoch": 0.35, "learning_rate": 0.0005118631578947368, "loss": 0.7255, "step": 35200 }, { "epoch": 0.35, "learning_rate": 0.0005117842105263157, "loss": 0.7156, "step": 35210 }, { "epoch": 0.35, "learning_rate": 0.0005117052631578947, "loss": 0.7228, "step": 35220 }, { "epoch": 0.35, "learning_rate": 0.0005116263157894736, "loss": 0.7315, "step": 35230 }, { "epoch": 0.35, "learning_rate": 0.0005115473684210526, "loss": 0.7284, "step": 35240 }, { "epoch": 0.35, "learning_rate": 0.0005114684210526315, "loss": 0.7303, "step": 35250 }, { "epoch": 0.35, "learning_rate": 0.0005113894736842105, "loss": 0.7175, "step": 35260 }, { "epoch": 0.35, "learning_rate": 0.0005113105263157894, "loss": 0.7213, "step": 35270 }, { "epoch": 0.35, "learning_rate": 0.0005112315789473684, "loss": 0.7116, "step": 35280 }, { "epoch": 0.35, "learning_rate": 0.0005111526315789473, "loss": 0.7201, "step": 35290 }, { "epoch": 0.35, "learning_rate": 0.0005110736842105263, "loss": 0.7214, "step": 35300 }, { "epoch": 0.35, "learning_rate": 0.0005109947368421052, "loss": 0.7244, "step": 35310 }, { "epoch": 0.35, "learning_rate": 0.0005109157894736841, "loss": 0.7074, "step": 35320 }, { "epoch": 0.35, "learning_rate": 0.0005108368421052631, "loss": 0.7161, "step": 35330 }, { "epoch": 0.35, "learning_rate": 0.000510757894736842, "loss": 0.7144, "step": 35340 }, { "epoch": 0.35, "learning_rate": 0.000510678947368421, "loss": 0.7091, "step": 35350 }, { "epoch": 0.35, "learning_rate": 0.0005105999999999999, "loss": 0.7014, "step": 35360 }, { "epoch": 0.35, "learning_rate": 0.0005105210526315789, "loss": 0.7294, "step": 35370 }, { "epoch": 0.35, "learning_rate": 0.0005104421052631578, "loss": 0.7478, "step": 35380 }, { "epoch": 0.35, "learning_rate": 0.0005103631578947368, "loss": 0.7266, "step": 35390 }, { "epoch": 0.35, "learning_rate": 0.0005102842105263157, "loss": 0.7205, "step": 35400 }, { "epoch": 0.35, "learning_rate": 0.0005102052631578947, "loss": 0.7177, "step": 35410 }, { "epoch": 0.35, "learning_rate": 0.0005101263157894736, "loss": 0.7212, "step": 35420 }, { "epoch": 0.35, "learning_rate": 0.0005100473684210526, "loss": 0.7242, "step": 35430 }, { "epoch": 0.35, "learning_rate": 0.0005099684210526315, "loss": 0.723, "step": 35440 }, { "epoch": 0.35, "learning_rate": 0.0005098894736842105, "loss": 0.7216, "step": 35450 }, { "epoch": 0.35, "learning_rate": 0.0005098105263157894, "loss": 0.7143, "step": 35460 }, { "epoch": 0.35, "learning_rate": 0.0005097315789473684, "loss": 0.7294, "step": 35470 }, { "epoch": 0.35, "learning_rate": 0.0005096526315789473, "loss": 0.7084, "step": 35480 }, { "epoch": 0.35, "learning_rate": 0.0005095736842105263, "loss": 0.7262, "step": 35490 }, { "epoch": 0.35, "learning_rate": 0.0005094947368421052, "loss": 0.7139, "step": 35500 }, { "epoch": 0.36, "learning_rate": 0.0005094157894736842, "loss": 0.7152, "step": 35510 }, { "epoch": 0.36, "learning_rate": 0.0005093368421052631, "loss": 0.7184, "step": 35520 }, { "epoch": 0.36, "learning_rate": 0.0005092578947368421, "loss": 0.7286, "step": 35530 }, { "epoch": 0.36, "learning_rate": 0.000509178947368421, "loss": 0.7231, "step": 35540 }, { "epoch": 0.36, "learning_rate": 0.0005091, "loss": 0.7326, "step": 35550 }, { "epoch": 0.36, "learning_rate": 0.0005090210526315789, "loss": 0.7314, "step": 35560 }, { "epoch": 0.36, "learning_rate": 0.0005089421052631579, "loss": 0.7175, "step": 35570 }, { "epoch": 0.36, "learning_rate": 0.0005088631578947368, "loss": 0.7295, "step": 35580 }, { "epoch": 0.36, "learning_rate": 0.0005087842105263158, "loss": 0.7197, "step": 35590 }, { "epoch": 0.36, "learning_rate": 0.0005087052631578947, "loss": 0.7279, "step": 35600 }, { "epoch": 0.36, "learning_rate": 0.0005086263157894737, "loss": 0.7315, "step": 35610 }, { "epoch": 0.36, "learning_rate": 0.0005085473684210526, "loss": 0.7144, "step": 35620 }, { "epoch": 0.36, "learning_rate": 0.0005084684210526315, "loss": 0.7167, "step": 35630 }, { "epoch": 0.36, "learning_rate": 0.0005083894736842105, "loss": 0.7193, "step": 35640 }, { "epoch": 0.36, "learning_rate": 0.0005083105263157894, "loss": 0.728, "step": 35650 }, { "epoch": 0.36, "learning_rate": 0.0005082315789473684, "loss": 0.7328, "step": 35660 }, { "epoch": 0.36, "learning_rate": 0.0005081526315789473, "loss": 0.7275, "step": 35670 }, { "epoch": 0.36, "learning_rate": 0.0005080736842105263, "loss": 0.7131, "step": 35680 }, { "epoch": 0.36, "learning_rate": 0.0005079947368421052, "loss": 0.7281, "step": 35690 }, { "epoch": 0.36, "learning_rate": 0.0005079157894736842, "loss": 0.739, "step": 35700 }, { "epoch": 0.36, "learning_rate": 0.0005078368421052631, "loss": 0.7187, "step": 35710 }, { "epoch": 0.36, "learning_rate": 0.0005077578947368421, "loss": 0.7069, "step": 35720 }, { "epoch": 0.36, "learning_rate": 0.000507678947368421, "loss": 0.709, "step": 35730 }, { "epoch": 0.36, "learning_rate": 0.0005076, "loss": 0.7044, "step": 35740 }, { "epoch": 0.36, "learning_rate": 0.0005075210526315789, "loss": 0.7058, "step": 35750 }, { "epoch": 0.36, "learning_rate": 0.0005074421052631579, "loss": 0.7104, "step": 35760 }, { "epoch": 0.36, "learning_rate": 0.0005073631578947368, "loss": 0.7264, "step": 35770 }, { "epoch": 0.36, "learning_rate": 0.0005072842105263157, "loss": 0.7214, "step": 35780 }, { "epoch": 0.36, "learning_rate": 0.0005072052631578947, "loss": 0.7252, "step": 35790 }, { "epoch": 0.36, "learning_rate": 0.0005071263157894737, "loss": 0.7139, "step": 35800 }, { "epoch": 0.36, "learning_rate": 0.0005070473684210526, "loss": 0.7245, "step": 35810 }, { "epoch": 0.36, "learning_rate": 0.0005069684210526316, "loss": 0.7257, "step": 35820 }, { "epoch": 0.36, "learning_rate": 0.0005068894736842105, "loss": 0.732, "step": 35830 }, { "epoch": 0.36, "learning_rate": 0.0005068105263157895, "loss": 0.7318, "step": 35840 }, { "epoch": 0.36, "learning_rate": 0.0005067315789473683, "loss": 0.7307, "step": 35850 }, { "epoch": 0.36, "learning_rate": 0.0005066526315789474, "loss": 0.7198, "step": 35860 }, { "epoch": 0.36, "learning_rate": 0.0005065736842105262, "loss": 0.7259, "step": 35870 }, { "epoch": 0.36, "learning_rate": 0.0005064947368421053, "loss": 0.7207, "step": 35880 }, { "epoch": 0.36, "learning_rate": 0.0005064157894736842, "loss": 0.7214, "step": 35890 }, { "epoch": 0.36, "learning_rate": 0.0005063368421052632, "loss": 0.716, "step": 35900 }, { "epoch": 0.36, "learning_rate": 0.0005062578947368421, "loss": 0.7169, "step": 35910 }, { "epoch": 0.36, "learning_rate": 0.000506178947368421, "loss": 0.7143, "step": 35920 }, { "epoch": 0.36, "learning_rate": 0.0005061, "loss": 0.7174, "step": 35930 }, { "epoch": 0.36, "learning_rate": 0.0005060210526315788, "loss": 0.7246, "step": 35940 }, { "epoch": 0.36, "learning_rate": 0.0005059421052631579, "loss": 0.7279, "step": 35950 }, { "epoch": 0.36, "learning_rate": 0.0005058631578947367, "loss": 0.7095, "step": 35960 }, { "epoch": 0.36, "learning_rate": 0.0005057842105263158, "loss": 0.7167, "step": 35970 }, { "epoch": 0.36, "learning_rate": 0.0005057052631578947, "loss": 0.7285, "step": 35980 }, { "epoch": 0.36, "learning_rate": 0.0005056263157894737, "loss": 0.7328, "step": 35990 }, { "epoch": 0.36, "learning_rate": 0.0005055473684210526, "loss": 0.7097, "step": 36000 }, { "epoch": 0.36, "learning_rate": 0.0005054684210526315, "loss": 0.7217, "step": 36010 }, { "epoch": 0.36, "learning_rate": 0.0005053894736842105, "loss": 0.7156, "step": 36020 }, { "epoch": 0.36, "learning_rate": 0.0005053105263157894, "loss": 0.7147, "step": 36030 }, { "epoch": 0.36, "learning_rate": 0.0005052315789473684, "loss": 0.7142, "step": 36040 }, { "epoch": 0.36, "learning_rate": 0.0005051526315789473, "loss": 0.7222, "step": 36050 }, { "epoch": 0.36, "learning_rate": 0.0005050736842105263, "loss": 0.7305, "step": 36060 }, { "epoch": 0.36, "learning_rate": 0.0005049947368421053, "loss": 0.7179, "step": 36070 }, { "epoch": 0.36, "learning_rate": 0.0005049157894736841, "loss": 0.7175, "step": 36080 }, { "epoch": 0.36, "learning_rate": 0.0005048368421052632, "loss": 0.7293, "step": 36090 }, { "epoch": 0.36, "learning_rate": 0.000504757894736842, "loss": 0.718, "step": 36100 }, { "epoch": 0.36, "learning_rate": 0.0005046789473684211, "loss": 0.7118, "step": 36110 }, { "epoch": 0.36, "learning_rate": 0.0005045999999999999, "loss": 0.7249, "step": 36120 }, { "epoch": 0.36, "learning_rate": 0.000504521052631579, "loss": 0.718, "step": 36130 }, { "epoch": 0.36, "learning_rate": 0.0005044421052631578, "loss": 0.7196, "step": 36140 }, { "epoch": 0.36, "learning_rate": 0.0005043631578947368, "loss": 0.722, "step": 36150 }, { "epoch": 0.36, "learning_rate": 0.0005042842105263157, "loss": 0.7193, "step": 36160 }, { "epoch": 0.36, "learning_rate": 0.0005042052631578946, "loss": 0.7274, "step": 36170 }, { "epoch": 0.36, "learning_rate": 0.0005041263157894737, "loss": 0.7426, "step": 36180 }, { "epoch": 0.36, "learning_rate": 0.0005040473684210525, "loss": 0.7385, "step": 36190 }, { "epoch": 0.36, "learning_rate": 0.0005039684210526316, "loss": 0.7281, "step": 36200 }, { "epoch": 0.36, "learning_rate": 0.0005038894736842104, "loss": 0.7254, "step": 36210 }, { "epoch": 0.36, "learning_rate": 0.0005038105263157894, "loss": 0.7201, "step": 36220 }, { "epoch": 0.36, "learning_rate": 0.0005037315789473683, "loss": 0.7271, "step": 36230 }, { "epoch": 0.36, "learning_rate": 0.0005036526315789473, "loss": 0.712, "step": 36240 }, { "epoch": 0.36, "learning_rate": 0.0005035736842105262, "loss": 0.7131, "step": 36250 }, { "epoch": 0.36, "learning_rate": 0.0005034947368421052, "loss": 0.7257, "step": 36260 }, { "epoch": 0.36, "learning_rate": 0.0005034157894736842, "loss": 0.7171, "step": 36270 }, { "epoch": 0.36, "learning_rate": 0.0005033368421052631, "loss": 0.7261, "step": 36280 }, { "epoch": 0.36, "learning_rate": 0.0005032578947368421, "loss": 0.7113, "step": 36290 }, { "epoch": 0.36, "learning_rate": 0.000503178947368421, "loss": 0.7236, "step": 36300 }, { "epoch": 0.36, "learning_rate": 0.0005030999999999999, "loss": 0.7233, "step": 36310 }, { "epoch": 0.36, "learning_rate": 0.0005030210526315789, "loss": 0.7248, "step": 36320 }, { "epoch": 0.36, "learning_rate": 0.0005029421052631578, "loss": 0.7149, "step": 36330 }, { "epoch": 0.36, "learning_rate": 0.0005028631578947368, "loss": 0.7202, "step": 36340 }, { "epoch": 0.36, "learning_rate": 0.0005027842105263157, "loss": 0.7303, "step": 36350 }, { "epoch": 0.36, "learning_rate": 0.0005027052631578948, "loss": 0.7318, "step": 36360 }, { "epoch": 0.36, "learning_rate": 0.0005026263157894736, "loss": 0.7295, "step": 36370 }, { "epoch": 0.36, "learning_rate": 0.0005025473684210526, "loss": 0.7153, "step": 36380 }, { "epoch": 0.36, "learning_rate": 0.0005024684210526315, "loss": 0.7242, "step": 36390 }, { "epoch": 0.36, "learning_rate": 0.0005023894736842105, "loss": 0.7082, "step": 36400 }, { "epoch": 0.36, "learning_rate": 0.0005023105263157894, "loss": 0.7298, "step": 36410 }, { "epoch": 0.36, "learning_rate": 0.0005022315789473684, "loss": 0.7359, "step": 36420 }, { "epoch": 0.36, "learning_rate": 0.0005021526315789473, "loss": 0.7232, "step": 36430 }, { "epoch": 0.36, "learning_rate": 0.0005020736842105263, "loss": 0.7138, "step": 36440 }, { "epoch": 0.36, "learning_rate": 0.0005019947368421052, "loss": 0.7274, "step": 36450 }, { "epoch": 0.36, "learning_rate": 0.0005019157894736841, "loss": 0.7177, "step": 36460 }, { "epoch": 0.36, "learning_rate": 0.0005018368421052631, "loss": 0.7288, "step": 36470 }, { "epoch": 0.36, "learning_rate": 0.000501757894736842, "loss": 0.7133, "step": 36480 }, { "epoch": 0.36, "learning_rate": 0.000501678947368421, "loss": 0.7078, "step": 36490 }, { "epoch": 0.36, "learning_rate": 0.0005015999999999999, "loss": 0.7188, "step": 36500 }, { "epoch": 0.37, "learning_rate": 0.0005015210526315789, "loss": 0.7166, "step": 36510 }, { "epoch": 0.37, "learning_rate": 0.0005014421052631578, "loss": 0.7206, "step": 36520 }, { "epoch": 0.37, "learning_rate": 0.0005013631578947368, "loss": 0.7089, "step": 36530 }, { "epoch": 0.37, "learning_rate": 0.0005012842105263157, "loss": 0.7096, "step": 36540 }, { "epoch": 0.37, "learning_rate": 0.0005012052631578947, "loss": 0.7141, "step": 36550 }, { "epoch": 0.37, "learning_rate": 0.0005011263157894736, "loss": 0.7171, "step": 36560 }, { "epoch": 0.37, "learning_rate": 0.0005010473684210526, "loss": 0.7153, "step": 36570 }, { "epoch": 0.37, "learning_rate": 0.0005009684210526315, "loss": 0.7071, "step": 36580 }, { "epoch": 0.37, "learning_rate": 0.0005008894736842105, "loss": 0.7147, "step": 36590 }, { "epoch": 0.37, "learning_rate": 0.0005008105263157894, "loss": 0.7072, "step": 36600 }, { "epoch": 0.37, "learning_rate": 0.0005007315789473684, "loss": 0.7211, "step": 36610 }, { "epoch": 0.37, "learning_rate": 0.0005006526315789473, "loss": 0.7277, "step": 36620 }, { "epoch": 0.37, "learning_rate": 0.0005005736842105263, "loss": 0.721, "step": 36630 }, { "epoch": 0.37, "learning_rate": 0.0005004947368421052, "loss": 0.713, "step": 36640 }, { "epoch": 0.37, "learning_rate": 0.0005004157894736842, "loss": 0.7116, "step": 36650 }, { "epoch": 0.37, "learning_rate": 0.0005003368421052631, "loss": 0.7118, "step": 36660 }, { "epoch": 0.37, "learning_rate": 0.0005002578947368421, "loss": 0.7117, "step": 36670 }, { "epoch": 0.37, "learning_rate": 0.000500178947368421, "loss": 0.7096, "step": 36680 }, { "epoch": 0.37, "learning_rate": 0.0005001, "loss": 0.7168, "step": 36690 }, { "epoch": 0.37, "learning_rate": 0.0005000210526315789, "loss": 0.7151, "step": 36700 }, { "epoch": 0.37, "learning_rate": 0.0004999421052631579, "loss": 0.699, "step": 36710 }, { "epoch": 0.37, "learning_rate": 0.0004998631578947368, "loss": 0.7075, "step": 36720 }, { "epoch": 0.37, "learning_rate": 0.0004997842105263158, "loss": 0.7075, "step": 36730 }, { "epoch": 0.37, "learning_rate": 0.0004997052631578947, "loss": 0.7169, "step": 36740 }, { "epoch": 0.37, "learning_rate": 0.0004996263157894736, "loss": 0.7234, "step": 36750 }, { "epoch": 0.37, "learning_rate": 0.0004995473684210526, "loss": 0.7185, "step": 36760 }, { "epoch": 0.37, "learning_rate": 0.0004994684210526315, "loss": 0.7203, "step": 36770 }, { "epoch": 0.37, "learning_rate": 0.0004993894736842105, "loss": 0.7247, "step": 36780 }, { "epoch": 0.37, "learning_rate": 0.0004993105263157894, "loss": 0.7275, "step": 36790 }, { "epoch": 0.37, "learning_rate": 0.0004992315789473684, "loss": 0.7052, "step": 36800 }, { "epoch": 0.37, "learning_rate": 0.0004991526315789473, "loss": 0.7142, "step": 36810 }, { "epoch": 0.37, "learning_rate": 0.0004990736842105263, "loss": 0.7164, "step": 36820 }, { "epoch": 0.37, "learning_rate": 0.0004989947368421052, "loss": 0.7231, "step": 36830 }, { "epoch": 0.37, "learning_rate": 0.0004989157894736842, "loss": 0.7199, "step": 36840 }, { "epoch": 0.37, "learning_rate": 0.0004988368421052631, "loss": 0.7085, "step": 36850 }, { "epoch": 0.37, "learning_rate": 0.0004987578947368421, "loss": 0.7109, "step": 36860 }, { "epoch": 0.37, "learning_rate": 0.000498678947368421, "loss": 0.7202, "step": 36870 }, { "epoch": 0.37, "learning_rate": 0.0004986, "loss": 0.7232, "step": 36880 }, { "epoch": 0.37, "learning_rate": 0.0004985210526315789, "loss": 0.7137, "step": 36890 }, { "epoch": 0.37, "learning_rate": 0.0004984421052631579, "loss": 0.721, "step": 36900 }, { "epoch": 0.37, "learning_rate": 0.0004983631578947368, "loss": 0.7191, "step": 36910 }, { "epoch": 0.37, "learning_rate": 0.0004982842105263158, "loss": 0.7214, "step": 36920 }, { "epoch": 0.37, "learning_rate": 0.0004982052631578947, "loss": 0.7221, "step": 36930 }, { "epoch": 0.37, "learning_rate": 0.0004981263157894737, "loss": 0.7255, "step": 36940 }, { "epoch": 0.37, "learning_rate": 0.0004980473684210526, "loss": 0.7214, "step": 36950 }, { "epoch": 0.37, "learning_rate": 0.0004979684210526316, "loss": 0.7181, "step": 36960 }, { "epoch": 0.37, "learning_rate": 0.0004978894736842105, "loss": 0.7225, "step": 36970 }, { "epoch": 0.37, "learning_rate": 0.0004978105263157895, "loss": 0.7284, "step": 36980 }, { "epoch": 0.37, "learning_rate": 0.0004977315789473683, "loss": 0.7201, "step": 36990 }, { "epoch": 0.37, "learning_rate": 0.0004976526315789474, "loss": 0.727, "step": 37000 }, { "epoch": 0.37, "learning_rate": 0.0004975736842105263, "loss": 0.7146, "step": 37010 }, { "epoch": 0.37, "learning_rate": 0.0004974947368421053, "loss": 0.7201, "step": 37020 }, { "epoch": 0.37, "learning_rate": 0.0004974236842105262, "loss": 0.7216, "step": 37030 }, { "epoch": 0.37, "learning_rate": 0.0004973447368421053, "loss": 0.7165, "step": 37040 }, { "epoch": 0.37, "learning_rate": 0.0004972657894736841, "loss": 0.7176, "step": 37050 }, { "epoch": 0.37, "learning_rate": 0.0004971868421052631, "loss": 0.7259, "step": 37060 }, { "epoch": 0.37, "learning_rate": 0.0004971078947368421, "loss": 0.7075, "step": 37070 }, { "epoch": 0.37, "learning_rate": 0.000497028947368421, "loss": 0.7061, "step": 37080 }, { "epoch": 0.37, "learning_rate": 0.00049695, "loss": 0.7284, "step": 37090 }, { "epoch": 0.37, "learning_rate": 0.0004968710526315789, "loss": 0.7247, "step": 37100 }, { "epoch": 0.37, "learning_rate": 0.0004967921052631579, "loss": 0.7131, "step": 37110 }, { "epoch": 0.37, "learning_rate": 0.0004967131578947368, "loss": 0.7106, "step": 37120 }, { "epoch": 0.37, "learning_rate": 0.0004966342105263158, "loss": 0.7251, "step": 37130 }, { "epoch": 0.37, "learning_rate": 0.0004965552631578947, "loss": 0.7151, "step": 37140 }, { "epoch": 0.37, "learning_rate": 0.0004964763157894736, "loss": 0.7147, "step": 37150 }, { "epoch": 0.37, "learning_rate": 0.0004963973684210525, "loss": 0.7136, "step": 37160 }, { "epoch": 0.37, "learning_rate": 0.0004963184210526315, "loss": 0.6948, "step": 37170 }, { "epoch": 0.37, "learning_rate": 0.0004962394736842106, "loss": 0.7092, "step": 37180 }, { "epoch": 0.37, "learning_rate": 0.0004961605263157894, "loss": 0.7282, "step": 37190 }, { "epoch": 0.37, "learning_rate": 0.0004960815789473684, "loss": 0.7155, "step": 37200 }, { "epoch": 0.37, "learning_rate": 0.0004960026315789473, "loss": 0.7101, "step": 37210 }, { "epoch": 0.37, "learning_rate": 0.0004959236842105262, "loss": 0.7093, "step": 37220 }, { "epoch": 0.37, "learning_rate": 0.0004958447368421052, "loss": 0.695, "step": 37230 }, { "epoch": 0.37, "learning_rate": 0.0004957657894736841, "loss": 0.7034, "step": 37240 }, { "epoch": 0.37, "learning_rate": 0.0004956868421052631, "loss": 0.6962, "step": 37250 }, { "epoch": 0.37, "learning_rate": 0.000495607894736842, "loss": 0.697, "step": 37260 }, { "epoch": 0.37, "learning_rate": 0.0004955289473684211, "loss": 0.6937, "step": 37270 }, { "epoch": 0.37, "learning_rate": 0.0004954499999999999, "loss": 0.7066, "step": 37280 }, { "epoch": 0.37, "learning_rate": 0.0004953710526315789, "loss": 0.6947, "step": 37290 }, { "epoch": 0.37, "learning_rate": 0.0004952921052631578, "loss": 0.6934, "step": 37300 }, { "epoch": 0.37, "learning_rate": 0.0004952131578947368, "loss": 0.7065, "step": 37310 }, { "epoch": 0.37, "learning_rate": 0.0004951342105263157, "loss": 0.6958, "step": 37320 }, { "epoch": 0.37, "learning_rate": 0.0004950552631578947, "loss": 0.6939, "step": 37330 }, { "epoch": 0.37, "learning_rate": 0.0004949763157894736, "loss": 0.7098, "step": 37340 }, { "epoch": 0.37, "learning_rate": 0.0004948973684210526, "loss": 0.6964, "step": 37350 }, { "epoch": 0.37, "learning_rate": 0.0004948184210526315, "loss": 0.7114, "step": 37360 }, { "epoch": 0.37, "learning_rate": 0.0004947394736842105, "loss": 0.699, "step": 37370 }, { "epoch": 0.37, "learning_rate": 0.0004946605263157894, "loss": 0.7035, "step": 37380 }, { "epoch": 0.37, "learning_rate": 0.0004945815789473684, "loss": 0.7026, "step": 37390 }, { "epoch": 0.37, "learning_rate": 0.0004945026315789473, "loss": 0.6915, "step": 37400 }, { "epoch": 0.37, "learning_rate": 0.0004944236842105263, "loss": 0.7018, "step": 37410 }, { "epoch": 0.37, "learning_rate": 0.0004943447368421052, "loss": 0.6961, "step": 37420 }, { "epoch": 0.37, "learning_rate": 0.0004942657894736842, "loss": 0.7012, "step": 37430 }, { "epoch": 0.37, "learning_rate": 0.0004941868421052631, "loss": 0.6963, "step": 37440 }, { "epoch": 0.37, "learning_rate": 0.000494107894736842, "loss": 0.6939, "step": 37450 }, { "epoch": 0.37, "learning_rate": 0.000494028947368421, "loss": 0.7096, "step": 37460 }, { "epoch": 0.37, "learning_rate": 0.0004939499999999999, "loss": 0.7065, "step": 37470 }, { "epoch": 0.37, "learning_rate": 0.0004938710526315789, "loss": 0.7188, "step": 37480 }, { "epoch": 0.37, "learning_rate": 0.0004937921052631578, "loss": 0.7202, "step": 37490 }, { "epoch": 0.38, "learning_rate": 0.0004937131578947368, "loss": 0.719, "step": 37500 }, { "epoch": 0.38, "eval_accuracy": 0.8522274619295023, "eval_loss": 0.70361328125, "eval_runtime": 97.0969, "eval_samples_per_second": 823.919, "eval_steps_per_second": 1.617, "step": 37500 }, { "epoch": 0.38, "learning_rate": 0.0004936342105263157, "loss": 0.7156, "step": 37510 }, { "epoch": 0.38, "learning_rate": 0.0004935552631578947, "loss": 0.7196, "step": 37520 }, { "epoch": 0.38, "learning_rate": 0.0004934763157894736, "loss": 0.715, "step": 37530 }, { "epoch": 0.38, "learning_rate": 0.0004933973684210526, "loss": 0.7078, "step": 37540 }, { "epoch": 0.38, "learning_rate": 0.0004933184210526315, "loss": 0.7027, "step": 37550 }, { "epoch": 0.38, "learning_rate": 0.0004932394736842105, "loss": 0.7141, "step": 37560 }, { "epoch": 0.38, "learning_rate": 0.0004931605263157894, "loss": 0.7137, "step": 37570 }, { "epoch": 0.38, "learning_rate": 0.0004930815789473684, "loss": 0.7156, "step": 37580 }, { "epoch": 0.38, "learning_rate": 0.0004930026315789473, "loss": 0.7046, "step": 37590 }, { "epoch": 0.38, "learning_rate": 0.0004929236842105263, "loss": 0.7017, "step": 37600 }, { "epoch": 0.38, "learning_rate": 0.0004928447368421052, "loss": 0.7096, "step": 37610 }, { "epoch": 0.38, "learning_rate": 0.0004927657894736842, "loss": 0.7229, "step": 37620 }, { "epoch": 0.38, "learning_rate": 0.0004926868421052631, "loss": 0.7186, "step": 37630 }, { "epoch": 0.38, "learning_rate": 0.0004926078947368421, "loss": 0.7103, "step": 37640 }, { "epoch": 0.38, "learning_rate": 0.000492528947368421, "loss": 0.7069, "step": 37650 }, { "epoch": 0.38, "learning_rate": 0.00049245, "loss": 0.7328, "step": 37660 }, { "epoch": 0.38, "learning_rate": 0.0004923710526315789, "loss": 0.7067, "step": 37670 }, { "epoch": 0.38, "learning_rate": 0.0004922921052631579, "loss": 0.7214, "step": 37680 }, { "epoch": 0.38, "learning_rate": 0.0004922131578947368, "loss": 0.7246, "step": 37690 }, { "epoch": 0.38, "learning_rate": 0.0004921342105263158, "loss": 0.72, "step": 37700 }, { "epoch": 0.38, "learning_rate": 0.0004920552631578947, "loss": 0.7203, "step": 37710 }, { "epoch": 0.38, "learning_rate": 0.0004919763157894737, "loss": 0.7188, "step": 37720 }, { "epoch": 0.38, "learning_rate": 0.0004918973684210526, "loss": 0.7234, "step": 37730 }, { "epoch": 0.38, "learning_rate": 0.0004918184210526315, "loss": 0.7165, "step": 37740 }, { "epoch": 0.38, "learning_rate": 0.0004917394736842105, "loss": 0.7195, "step": 37750 }, { "epoch": 0.38, "learning_rate": 0.0004916605263157894, "loss": 0.7046, "step": 37760 }, { "epoch": 0.38, "learning_rate": 0.0004915815789473684, "loss": 0.7145, "step": 37770 }, { "epoch": 0.38, "learning_rate": 0.0004915026315789473, "loss": 0.7107, "step": 37780 }, { "epoch": 0.38, "learning_rate": 0.0004914236842105263, "loss": 0.7143, "step": 37790 }, { "epoch": 0.38, "learning_rate": 0.0004913447368421052, "loss": 0.7283, "step": 37800 }, { "epoch": 0.38, "learning_rate": 0.0004912657894736842, "loss": 0.7108, "step": 37810 }, { "epoch": 0.38, "learning_rate": 0.0004911868421052631, "loss": 0.699, "step": 37820 }, { "epoch": 0.38, "learning_rate": 0.0004911078947368421, "loss": 0.7078, "step": 37830 }, { "epoch": 0.38, "learning_rate": 0.000491028947368421, "loss": 0.7112, "step": 37840 }, { "epoch": 0.38, "learning_rate": 0.00049095, "loss": 0.713, "step": 37850 }, { "epoch": 0.38, "learning_rate": 0.0004908710526315789, "loss": 0.7074, "step": 37860 }, { "epoch": 0.38, "learning_rate": 0.0004907921052631579, "loss": 0.7112, "step": 37870 }, { "epoch": 0.38, "learning_rate": 0.0004907131578947368, "loss": 0.7033, "step": 37880 }, { "epoch": 0.38, "learning_rate": 0.0004906342105263158, "loss": 0.7322, "step": 37890 }, { "epoch": 0.38, "learning_rate": 0.0004905552631578946, "loss": 0.7089, "step": 37900 }, { "epoch": 0.38, "learning_rate": 0.0004904763157894737, "loss": 0.7081, "step": 37910 }, { "epoch": 0.38, "learning_rate": 0.0004903973684210526, "loss": 0.7105, "step": 37920 }, { "epoch": 0.38, "learning_rate": 0.0004903184210526316, "loss": 0.7295, "step": 37930 }, { "epoch": 0.38, "learning_rate": 0.0004902394736842105, "loss": 0.6959, "step": 37940 }, { "epoch": 0.38, "learning_rate": 0.0004901605263157895, "loss": 0.7188, "step": 37950 }, { "epoch": 0.38, "learning_rate": 0.0004900815789473684, "loss": 0.7233, "step": 37960 }, { "epoch": 0.38, "learning_rate": 0.0004900026315789473, "loss": 0.7068, "step": 37970 }, { "epoch": 0.38, "learning_rate": 0.0004899236842105263, "loss": 0.7028, "step": 37980 }, { "epoch": 0.38, "learning_rate": 0.0004898447368421051, "loss": 0.7088, "step": 37990 }, { "epoch": 0.38, "learning_rate": 0.0004897657894736842, "loss": 0.7146, "step": 38000 }, { "epoch": 0.38, "learning_rate": 0.0004896868421052632, "loss": 0.7026, "step": 38010 }, { "epoch": 0.38, "learning_rate": 0.0004896078947368421, "loss": 0.6985, "step": 38020 }, { "epoch": 0.38, "learning_rate": 0.000489528947368421, "loss": 0.7207, "step": 38030 }, { "epoch": 0.38, "learning_rate": 0.0004894578947368421, "loss": 0.7109, "step": 38040 }, { "epoch": 0.38, "learning_rate": 0.000489378947368421, "loss": 0.7072, "step": 38050 }, { "epoch": 0.38, "learning_rate": 0.0004892999999999999, "loss": 0.7085, "step": 38060 }, { "epoch": 0.38, "learning_rate": 0.000489221052631579, "loss": 0.6987, "step": 38070 }, { "epoch": 0.38, "learning_rate": 0.0004891421052631578, "loss": 0.6996, "step": 38080 }, { "epoch": 0.38, "learning_rate": 0.0004890631578947369, "loss": 0.6952, "step": 38090 }, { "epoch": 0.38, "learning_rate": 0.0004889842105263157, "loss": 0.7141, "step": 38100 }, { "epoch": 0.38, "learning_rate": 0.0004889052631578948, "loss": 0.6997, "step": 38110 }, { "epoch": 0.38, "learning_rate": 0.0004888263157894736, "loss": 0.6953, "step": 38120 }, { "epoch": 0.38, "learning_rate": 0.0004887473684210526, "loss": 0.7028, "step": 38130 }, { "epoch": 0.38, "learning_rate": 0.0004886684210526315, "loss": 0.7287, "step": 38140 }, { "epoch": 0.38, "learning_rate": 0.0004885894736842104, "loss": 0.7033, "step": 38150 }, { "epoch": 0.38, "learning_rate": 0.0004885105263157895, "loss": 0.6955, "step": 38160 }, { "epoch": 0.38, "learning_rate": 0.0004884315789473683, "loss": 0.7003, "step": 38170 }, { "epoch": 0.38, "learning_rate": 0.0004883526315789474, "loss": 0.6893, "step": 38180 }, { "epoch": 0.38, "learning_rate": 0.00048827368421052624, "loss": 0.7037, "step": 38190 }, { "epoch": 0.38, "learning_rate": 0.00048819473684210524, "loss": 0.723, "step": 38200 }, { "epoch": 0.38, "learning_rate": 0.00048811578947368414, "loss": 0.6862, "step": 38210 }, { "epoch": 0.38, "learning_rate": 0.00048803684210526314, "loss": 0.7082, "step": 38220 }, { "epoch": 0.38, "learning_rate": 0.00048795789473684203, "loss": 0.7009, "step": 38230 }, { "epoch": 0.38, "learning_rate": 0.00048787894736842104, "loss": 0.7058, "step": 38240 }, { "epoch": 0.38, "learning_rate": 0.00048779999999999993, "loss": 0.7091, "step": 38250 }, { "epoch": 0.38, "learning_rate": 0.0004877210526315789, "loss": 0.7238, "step": 38260 }, { "epoch": 0.38, "learning_rate": 0.0004876421052631579, "loss": 0.696, "step": 38270 }, { "epoch": 0.38, "learning_rate": 0.0004875631578947368, "loss": 0.7026, "step": 38280 }, { "epoch": 0.38, "learning_rate": 0.0004874842105263158, "loss": 0.7143, "step": 38290 }, { "epoch": 0.38, "learning_rate": 0.00048740526315789467, "loss": 0.7052, "step": 38300 }, { "epoch": 0.38, "learning_rate": 0.0004873263157894737, "loss": 0.7143, "step": 38310 }, { "epoch": 0.38, "learning_rate": 0.00048724736842105257, "loss": 0.7069, "step": 38320 }, { "epoch": 0.38, "learning_rate": 0.0004871684210526315, "loss": 0.71, "step": 38330 }, { "epoch": 0.38, "learning_rate": 0.00048708947368421047, "loss": 0.695, "step": 38340 }, { "epoch": 0.38, "learning_rate": 0.0004870105263157894, "loss": 0.7147, "step": 38350 }, { "epoch": 0.38, "learning_rate": 0.0004869315789473684, "loss": 0.6984, "step": 38360 }, { "epoch": 0.38, "learning_rate": 0.0004868526315789473, "loss": 0.6981, "step": 38370 }, { "epoch": 0.38, "learning_rate": 0.0004867736842105263, "loss": 0.708, "step": 38380 }, { "epoch": 0.38, "learning_rate": 0.0004866947368421052, "loss": 0.6915, "step": 38390 }, { "epoch": 0.38, "learning_rate": 0.00048661578947368416, "loss": 0.6967, "step": 38400 }, { "epoch": 0.38, "learning_rate": 0.0004865368421052631, "loss": 0.7076, "step": 38410 }, { "epoch": 0.38, "learning_rate": 0.00048645789473684205, "loss": 0.7114, "step": 38420 }, { "epoch": 0.38, "learning_rate": 0.000486378947368421, "loss": 0.7053, "step": 38430 }, { "epoch": 0.38, "learning_rate": 0.00048629999999999995, "loss": 0.6946, "step": 38440 }, { "epoch": 0.38, "learning_rate": 0.00048622105263157895, "loss": 0.6925, "step": 38450 }, { "epoch": 0.38, "learning_rate": 0.00048614210526315785, "loss": 0.6931, "step": 38460 }, { "epoch": 0.38, "learning_rate": 0.00048606315789473685, "loss": 0.7136, "step": 38470 }, { "epoch": 0.38, "learning_rate": 0.00048598421052631574, "loss": 0.7081, "step": 38480 }, { "epoch": 0.38, "learning_rate": 0.0004859052631578947, "loss": 0.6948, "step": 38490 }, { "epoch": 0.39, "learning_rate": 0.00048582631578947364, "loss": 0.6915, "step": 38500 }, { "epoch": 0.39, "learning_rate": 0.0004857473684210526, "loss": 0.6917, "step": 38510 }, { "epoch": 0.39, "learning_rate": 0.0004856684210526315, "loss": 0.6973, "step": 38520 }, { "epoch": 0.39, "learning_rate": 0.0004855894736842105, "loss": 0.7004, "step": 38530 }, { "epoch": 0.39, "learning_rate": 0.0004855105263157895, "loss": 0.6954, "step": 38540 }, { "epoch": 0.39, "learning_rate": 0.0004854315789473684, "loss": 0.6859, "step": 38550 }, { "epoch": 0.39, "learning_rate": 0.00048535263157894733, "loss": 0.6932, "step": 38560 }, { "epoch": 0.39, "learning_rate": 0.0004852736842105263, "loss": 0.6987, "step": 38570 }, { "epoch": 0.39, "learning_rate": 0.0004851947368421052, "loss": 0.7112, "step": 38580 }, { "epoch": 0.39, "learning_rate": 0.0004851157894736842, "loss": 0.6718, "step": 38590 }, { "epoch": 0.39, "learning_rate": 0.0004850368421052631, "loss": 0.7012, "step": 38600 }, { "epoch": 0.39, "learning_rate": 0.000484957894736842, "loss": 0.7003, "step": 38610 }, { "epoch": 0.39, "learning_rate": 0.000484878947368421, "loss": 0.6851, "step": 38620 }, { "epoch": 0.39, "learning_rate": 0.00048479999999999997, "loss": 0.6972, "step": 38630 }, { "epoch": 0.39, "learning_rate": 0.0004847210526315789, "loss": 0.7078, "step": 38640 }, { "epoch": 0.39, "learning_rate": 0.00048464210526315786, "loss": 0.7038, "step": 38650 }, { "epoch": 0.39, "learning_rate": 0.0004845631578947368, "loss": 0.6966, "step": 38660 }, { "epoch": 0.39, "learning_rate": 0.00048448421052631576, "loss": 0.7014, "step": 38670 }, { "epoch": 0.39, "learning_rate": 0.00048440526315789465, "loss": 0.7019, "step": 38680 }, { "epoch": 0.39, "learning_rate": 0.00048432631578947366, "loss": 0.7057, "step": 38690 }, { "epoch": 0.39, "learning_rate": 0.00048424736842105255, "loss": 0.6955, "step": 38700 }, { "epoch": 0.39, "learning_rate": 0.00048416842105263155, "loss": 0.6929, "step": 38710 }, { "epoch": 0.39, "learning_rate": 0.0004840894736842105, "loss": 0.696, "step": 38720 }, { "epoch": 0.39, "learning_rate": 0.00048401052631578945, "loss": 0.6963, "step": 38730 }, { "epoch": 0.39, "learning_rate": 0.0004839315789473684, "loss": 0.6944, "step": 38740 }, { "epoch": 0.39, "learning_rate": 0.0004838526315789473, "loss": 0.7043, "step": 38750 }, { "epoch": 0.39, "learning_rate": 0.0004837736842105263, "loss": 0.6921, "step": 38760 }, { "epoch": 0.39, "learning_rate": 0.0004836947368421052, "loss": 0.6853, "step": 38770 }, { "epoch": 0.39, "learning_rate": 0.0004836157894736842, "loss": 0.6994, "step": 38780 }, { "epoch": 0.39, "learning_rate": 0.0004835368421052631, "loss": 0.6953, "step": 38790 }, { "epoch": 0.39, "learning_rate": 0.0004834578947368421, "loss": 0.6904, "step": 38800 }, { "epoch": 0.39, "learning_rate": 0.00048337894736842104, "loss": 0.6889, "step": 38810 }, { "epoch": 0.39, "learning_rate": 0.00048329999999999993, "loss": 0.6786, "step": 38820 }, { "epoch": 0.39, "learning_rate": 0.00048322105263157893, "loss": 0.6897, "step": 38830 }, { "epoch": 0.39, "learning_rate": 0.0004831421052631578, "loss": 0.6931, "step": 38840 }, { "epoch": 0.39, "learning_rate": 0.00048306315789473683, "loss": 0.6884, "step": 38850 }, { "epoch": 0.39, "learning_rate": 0.0004829842105263157, "loss": 0.6975, "step": 38860 }, { "epoch": 0.39, "learning_rate": 0.0004829052631578947, "loss": 0.7129, "step": 38870 }, { "epoch": 0.39, "learning_rate": 0.0004828263157894736, "loss": 0.7159, "step": 38880 }, { "epoch": 0.39, "learning_rate": 0.00048274736842105257, "loss": 0.6956, "step": 38890 }, { "epoch": 0.39, "learning_rate": 0.0004826684210526315, "loss": 0.6964, "step": 38900 }, { "epoch": 0.39, "learning_rate": 0.00048258947368421046, "loss": 0.7002, "step": 38910 }, { "epoch": 0.39, "learning_rate": 0.00048251052631578947, "loss": 0.6982, "step": 38920 }, { "epoch": 0.39, "learning_rate": 0.00048243157894736836, "loss": 0.7088, "step": 38930 }, { "epoch": 0.39, "learning_rate": 0.00048235263157894736, "loss": 0.7087, "step": 38940 }, { "epoch": 0.39, "learning_rate": 0.00048227368421052626, "loss": 0.7199, "step": 38950 }, { "epoch": 0.39, "learning_rate": 0.00048219473684210526, "loss": 0.7218, "step": 38960 }, { "epoch": 0.39, "learning_rate": 0.00048211578947368415, "loss": 0.6993, "step": 38970 }, { "epoch": 0.39, "learning_rate": 0.0004820368421052631, "loss": 0.6952, "step": 38980 }, { "epoch": 0.39, "learning_rate": 0.00048195789473684205, "loss": 0.6993, "step": 38990 }, { "epoch": 0.39, "learning_rate": 0.000481878947368421, "loss": 0.6955, "step": 39000 }, { "epoch": 0.39, "learning_rate": 0.0004818, "loss": 0.6888, "step": 39010 }, { "epoch": 0.39, "learning_rate": 0.0004817210526315789, "loss": 0.6949, "step": 39020 }, { "epoch": 0.39, "learning_rate": 0.0004816421052631579, "loss": 0.689, "step": 39030 }, { "epoch": 0.39, "learning_rate": 0.0004815710526315789, "loss": 0.7001, "step": 39040 }, { "epoch": 0.39, "learning_rate": 0.0004814921052631578, "loss": 0.6921, "step": 39050 }, { "epoch": 0.39, "learning_rate": 0.0004814131578947368, "loss": 0.6863, "step": 39060 }, { "epoch": 0.39, "learning_rate": 0.00048133421052631576, "loss": 0.6886, "step": 39070 }, { "epoch": 0.39, "learning_rate": 0.0004812552631578947, "loss": 0.6878, "step": 39080 }, { "epoch": 0.39, "learning_rate": 0.00048117631578947366, "loss": 0.6889, "step": 39090 }, { "epoch": 0.39, "learning_rate": 0.00048109736842105255, "loss": 0.6952, "step": 39100 }, { "epoch": 0.39, "learning_rate": 0.00048101842105263156, "loss": 0.7014, "step": 39110 }, { "epoch": 0.39, "learning_rate": 0.00048093947368421045, "loss": 0.687, "step": 39120 }, { "epoch": 0.39, "learning_rate": 0.00048086052631578945, "loss": 0.6916, "step": 39130 }, { "epoch": 0.39, "learning_rate": 0.00048078157894736835, "loss": 0.6913, "step": 39140 }, { "epoch": 0.39, "learning_rate": 0.00048070263157894735, "loss": 0.6941, "step": 39150 }, { "epoch": 0.39, "learning_rate": 0.0004806236842105263, "loss": 0.6897, "step": 39160 }, { "epoch": 0.39, "learning_rate": 0.0004805447368421052, "loss": 0.6934, "step": 39170 }, { "epoch": 0.39, "learning_rate": 0.0004804657894736842, "loss": 0.6852, "step": 39180 }, { "epoch": 0.39, "learning_rate": 0.0004803868421052631, "loss": 0.6965, "step": 39190 }, { "epoch": 0.39, "learning_rate": 0.0004803078947368421, "loss": 0.6887, "step": 39200 }, { "epoch": 0.39, "learning_rate": 0.000480228947368421, "loss": 0.6916, "step": 39210 }, { "epoch": 0.39, "learning_rate": 0.00048015, "loss": 0.6985, "step": 39220 }, { "epoch": 0.39, "learning_rate": 0.0004800710526315789, "loss": 0.6974, "step": 39230 }, { "epoch": 0.39, "learning_rate": 0.00047999210526315783, "loss": 0.6817, "step": 39240 }, { "epoch": 0.39, "learning_rate": 0.00047991315789473683, "loss": 0.6742, "step": 39250 }, { "epoch": 0.39, "learning_rate": 0.00047983421052631573, "loss": 0.681, "step": 39260 }, { "epoch": 0.39, "learning_rate": 0.00047975526315789473, "loss": 0.6815, "step": 39270 }, { "epoch": 0.39, "learning_rate": 0.0004796763157894736, "loss": 0.674, "step": 39280 }, { "epoch": 0.39, "learning_rate": 0.0004795973684210526, "loss": 0.6891, "step": 39290 }, { "epoch": 0.39, "learning_rate": 0.0004795184210526315, "loss": 0.6903, "step": 39300 }, { "epoch": 0.39, "learning_rate": 0.00047943947368421047, "loss": 0.6884, "step": 39310 }, { "epoch": 0.39, "learning_rate": 0.0004793605263157894, "loss": 0.7042, "step": 39320 }, { "epoch": 0.39, "learning_rate": 0.00047928157894736837, "loss": 0.6875, "step": 39330 }, { "epoch": 0.39, "learning_rate": 0.0004792026315789473, "loss": 0.6812, "step": 39340 }, { "epoch": 0.39, "learning_rate": 0.00047912368421052626, "loss": 0.6878, "step": 39350 }, { "epoch": 0.39, "learning_rate": 0.00047904473684210526, "loss": 0.6839, "step": 39360 }, { "epoch": 0.39, "learning_rate": 0.00047896578947368416, "loss": 0.7026, "step": 39370 }, { "epoch": 0.39, "learning_rate": 0.00047888684210526316, "loss": 0.6946, "step": 39380 }, { "epoch": 0.39, "learning_rate": 0.00047880789473684206, "loss": 0.7005, "step": 39390 }, { "epoch": 0.39, "learning_rate": 0.000478728947368421, "loss": 0.7001, "step": 39400 }, { "epoch": 0.39, "learning_rate": 0.00047864999999999995, "loss": 0.7012, "step": 39410 }, { "epoch": 0.39, "learning_rate": 0.0004785710526315789, "loss": 0.7053, "step": 39420 }, { "epoch": 0.39, "learning_rate": 0.0004784921052631578, "loss": 0.7048, "step": 39430 }, { "epoch": 0.39, "learning_rate": 0.0004784131578947368, "loss": 0.7034, "step": 39440 }, { "epoch": 0.39, "learning_rate": 0.0004783342105263158, "loss": 0.7107, "step": 39450 }, { "epoch": 0.39, "learning_rate": 0.0004782552631578947, "loss": 0.7069, "step": 39460 }, { "epoch": 0.39, "learning_rate": 0.00047817631578947364, "loss": 0.6973, "step": 39470 }, { "epoch": 0.39, "learning_rate": 0.0004780973684210526, "loss": 0.7031, "step": 39480 }, { "epoch": 0.39, "learning_rate": 0.00047801842105263154, "loss": 0.7056, "step": 39490 }, { "epoch": 0.4, "learning_rate": 0.0004779394736842105, "loss": 0.691, "step": 39500 }, { "epoch": 0.4, "learning_rate": 0.00047786052631578944, "loss": 0.6971, "step": 39510 }, { "epoch": 0.4, "learning_rate": 0.00047778157894736833, "loss": 0.699, "step": 39520 }, { "epoch": 0.4, "learning_rate": 0.00047770263157894733, "loss": 0.7114, "step": 39530 }, { "epoch": 0.4, "learning_rate": 0.0004776236842105263, "loss": 0.7069, "step": 39540 }, { "epoch": 0.4, "learning_rate": 0.00047754473684210523, "loss": 0.6815, "step": 39550 }, { "epoch": 0.4, "learning_rate": 0.0004774657894736842, "loss": 0.7017, "step": 39560 }, { "epoch": 0.4, "learning_rate": 0.0004773868421052631, "loss": 0.7047, "step": 39570 }, { "epoch": 0.4, "learning_rate": 0.0004773078947368421, "loss": 0.6874, "step": 39580 }, { "epoch": 0.4, "learning_rate": 0.00047722894736842097, "loss": 0.7093, "step": 39590 }, { "epoch": 0.4, "learning_rate": 0.00047714999999999997, "loss": 0.695, "step": 39600 }, { "epoch": 0.4, "learning_rate": 0.00047707105263157886, "loss": 0.7006, "step": 39610 }, { "epoch": 0.4, "learning_rate": 0.00047699210526315787, "loss": 0.713, "step": 39620 }, { "epoch": 0.4, "learning_rate": 0.0004769131578947368, "loss": 0.7064, "step": 39630 }, { "epoch": 0.4, "learning_rate": 0.00047683421052631576, "loss": 0.7092, "step": 39640 }, { "epoch": 0.4, "learning_rate": 0.0004767552631578947, "loss": 0.708, "step": 39650 }, { "epoch": 0.4, "learning_rate": 0.0004766763157894736, "loss": 0.6989, "step": 39660 }, { "epoch": 0.4, "learning_rate": 0.0004765973684210526, "loss": 0.7091, "step": 39670 }, { "epoch": 0.4, "learning_rate": 0.0004765184210526315, "loss": 0.6891, "step": 39680 }, { "epoch": 0.4, "learning_rate": 0.0004764394736842105, "loss": 0.701, "step": 39690 }, { "epoch": 0.4, "learning_rate": 0.0004763605263157894, "loss": 0.71, "step": 39700 }, { "epoch": 0.4, "learning_rate": 0.0004762815789473684, "loss": 0.6988, "step": 39710 }, { "epoch": 0.4, "learning_rate": 0.00047620263157894735, "loss": 0.6897, "step": 39720 }, { "epoch": 0.4, "learning_rate": 0.00047612368421052624, "loss": 0.6949, "step": 39730 }, { "epoch": 0.4, "learning_rate": 0.00047604473684210525, "loss": 0.6938, "step": 39740 }, { "epoch": 0.4, "learning_rate": 0.00047596578947368414, "loss": 0.6925, "step": 39750 }, { "epoch": 0.4, "learning_rate": 0.00047588684210526314, "loss": 0.705, "step": 39760 }, { "epoch": 0.4, "learning_rate": 0.00047580789473684204, "loss": 0.6964, "step": 39770 }, { "epoch": 0.4, "learning_rate": 0.00047572894736842104, "loss": 0.6825, "step": 39780 }, { "epoch": 0.4, "learning_rate": 0.00047564999999999993, "loss": 0.6854, "step": 39790 }, { "epoch": 0.4, "learning_rate": 0.0004755710526315789, "loss": 0.7001, "step": 39800 }, { "epoch": 0.4, "learning_rate": 0.0004754921052631579, "loss": 0.6817, "step": 39810 }, { "epoch": 0.4, "learning_rate": 0.0004754131578947368, "loss": 0.6928, "step": 39820 }, { "epoch": 0.4, "learning_rate": 0.0004753342105263158, "loss": 0.6887, "step": 39830 }, { "epoch": 0.4, "learning_rate": 0.0004752552631578947, "loss": 0.6972, "step": 39840 }, { "epoch": 0.4, "learning_rate": 0.0004751763157894737, "loss": 0.6946, "step": 39850 }, { "epoch": 0.4, "learning_rate": 0.00047509736842105257, "loss": 0.6966, "step": 39860 }, { "epoch": 0.4, "learning_rate": 0.0004750184210526316, "loss": 0.7063, "step": 39870 }, { "epoch": 0.4, "learning_rate": 0.00047493947368421047, "loss": 0.6945, "step": 39880 }, { "epoch": 0.4, "learning_rate": 0.0004748605263157894, "loss": 0.6974, "step": 39890 }, { "epoch": 0.4, "learning_rate": 0.00047478157894736836, "loss": 0.6954, "step": 39900 }, { "epoch": 0.4, "learning_rate": 0.0004747026315789473, "loss": 0.6986, "step": 39910 }, { "epoch": 0.4, "learning_rate": 0.0004746236842105263, "loss": 0.7061, "step": 39920 }, { "epoch": 0.4, "learning_rate": 0.0004745447368421052, "loss": 0.6927, "step": 39930 }, { "epoch": 0.4, "learning_rate": 0.0004744657894736842, "loss": 0.6977, "step": 39940 }, { "epoch": 0.4, "learning_rate": 0.0004743868421052631, "loss": 0.6994, "step": 39950 }, { "epoch": 0.4, "learning_rate": 0.00047430789473684205, "loss": 0.6969, "step": 39960 }, { "epoch": 0.4, "learning_rate": 0.000474228947368421, "loss": 0.707, "step": 39970 }, { "epoch": 0.4, "learning_rate": 0.00047414999999999995, "loss": 0.6954, "step": 39980 }, { "epoch": 0.4, "learning_rate": 0.0004740710526315789, "loss": 0.7007, "step": 39990 }, { "epoch": 0.4, "learning_rate": 0.00047399210526315785, "loss": 0.6871, "step": 40000 }, { "epoch": 0.4, "eval_accuracy": 0.8555457696762774, "eval_loss": 0.6865234375, "eval_runtime": 97.6539, "eval_samples_per_second": 819.22, "eval_steps_per_second": 1.608, "step": 40000 }, { "epoch": 0.4, "learning_rate": 0.00047391315789473685, "loss": 0.6924, "step": 40010 }, { "epoch": 0.4, "learning_rate": 0.00047383421052631574, "loss": 0.6849, "step": 40020 }, { "epoch": 0.4, "learning_rate": 0.0004737552631578947, "loss": 0.6881, "step": 40030 }, { "epoch": 0.4, "learning_rate": 0.00047368421052631577, "loss": 0.6896, "step": 40040 }, { "epoch": 0.4, "learning_rate": 0.00047360526315789466, "loss": 0.6836, "step": 40050 }, { "epoch": 0.4, "learning_rate": 0.00047352631578947366, "loss": 0.6836, "step": 40060 }, { "epoch": 0.4, "learning_rate": 0.0004734473684210526, "loss": 0.6805, "step": 40070 }, { "epoch": 0.4, "learning_rate": 0.0004733684210526315, "loss": 0.6913, "step": 40080 }, { "epoch": 0.4, "learning_rate": 0.0004732894736842105, "loss": 0.6908, "step": 40090 }, { "epoch": 0.4, "learning_rate": 0.0004732105263157894, "loss": 0.6953, "step": 40100 }, { "epoch": 0.4, "learning_rate": 0.0004731315789473684, "loss": 0.6889, "step": 40110 }, { "epoch": 0.4, "learning_rate": 0.0004730526315789473, "loss": 0.6899, "step": 40120 }, { "epoch": 0.4, "learning_rate": 0.0004729736842105263, "loss": 0.7003, "step": 40130 }, { "epoch": 0.4, "learning_rate": 0.0004728947368421052, "loss": 0.6891, "step": 40140 }, { "epoch": 0.4, "learning_rate": 0.00047281578947368415, "loss": 0.6883, "step": 40150 }, { "epoch": 0.4, "learning_rate": 0.00047273684210526315, "loss": 0.6824, "step": 40160 }, { "epoch": 0.4, "learning_rate": 0.00047265789473684204, "loss": 0.6874, "step": 40170 }, { "epoch": 0.4, "learning_rate": 0.00047257894736842104, "loss": 0.6972, "step": 40180 }, { "epoch": 0.4, "learning_rate": 0.00047249999999999994, "loss": 0.6928, "step": 40190 }, { "epoch": 0.4, "learning_rate": 0.00047242105263157894, "loss": 0.6907, "step": 40200 }, { "epoch": 0.4, "learning_rate": 0.00047234210526315783, "loss": 0.6792, "step": 40210 }, { "epoch": 0.4, "learning_rate": 0.0004722631578947368, "loss": 0.702, "step": 40220 }, { "epoch": 0.4, "learning_rate": 0.00047218421052631573, "loss": 0.6839, "step": 40230 }, { "epoch": 0.4, "learning_rate": 0.0004721052631578947, "loss": 0.6884, "step": 40240 }, { "epoch": 0.4, "learning_rate": 0.0004720263157894737, "loss": 0.6862, "step": 40250 }, { "epoch": 0.4, "learning_rate": 0.0004719473684210526, "loss": 0.6907, "step": 40260 }, { "epoch": 0.4, "learning_rate": 0.0004718684210526316, "loss": 0.6965, "step": 40270 }, { "epoch": 0.4, "learning_rate": 0.0004717894736842105, "loss": 0.6958, "step": 40280 }, { "epoch": 0.4, "learning_rate": 0.0004717105263157895, "loss": 0.6888, "step": 40290 }, { "epoch": 0.4, "learning_rate": 0.00047163157894736837, "loss": 0.682, "step": 40300 }, { "epoch": 0.4, "learning_rate": 0.0004715526315789473, "loss": 0.6933, "step": 40310 }, { "epoch": 0.4, "learning_rate": 0.00047147368421052627, "loss": 0.6881, "step": 40320 }, { "epoch": 0.4, "learning_rate": 0.0004713947368421052, "loss": 0.6933, "step": 40330 }, { "epoch": 0.4, "learning_rate": 0.0004713157894736842, "loss": 0.6976, "step": 40340 }, { "epoch": 0.4, "learning_rate": 0.0004712368421052631, "loss": 0.6975, "step": 40350 }, { "epoch": 0.4, "learning_rate": 0.0004711578947368421, "loss": 0.6913, "step": 40360 }, { "epoch": 0.4, "learning_rate": 0.000471078947368421, "loss": 0.6992, "step": 40370 }, { "epoch": 0.4, "learning_rate": 0.00047099999999999996, "loss": 0.6916, "step": 40380 }, { "epoch": 0.4, "learning_rate": 0.0004709210526315789, "loss": 0.6828, "step": 40390 }, { "epoch": 0.4, "learning_rate": 0.00047084210526315785, "loss": 0.6956, "step": 40400 }, { "epoch": 0.4, "learning_rate": 0.0004707631578947368, "loss": 0.6947, "step": 40410 }, { "epoch": 0.4, "learning_rate": 0.00047068421052631575, "loss": 0.6874, "step": 40420 }, { "epoch": 0.4, "learning_rate": 0.00047060526315789464, "loss": 0.6968, "step": 40430 }, { "epoch": 0.4, "learning_rate": 0.00047052631578947365, "loss": 0.7027, "step": 40440 }, { "epoch": 0.4, "learning_rate": 0.0004704473684210526, "loss": 0.7011, "step": 40450 }, { "epoch": 0.4, "learning_rate": 0.00047036842105263154, "loss": 0.7095, "step": 40460 }, { "epoch": 0.4, "learning_rate": 0.0004702894736842105, "loss": 0.694, "step": 40470 }, { "epoch": 0.4, "learning_rate": 0.00047021052631578944, "loss": 0.6841, "step": 40480 }, { "epoch": 0.4, "learning_rate": 0.0004701315789473684, "loss": 0.6947, "step": 40490 }, { "epoch": 0.41, "learning_rate": 0.0004700526315789473, "loss": 0.6903, "step": 40500 }, { "epoch": 0.41, "learning_rate": 0.0004699736842105263, "loss": 0.6838, "step": 40510 }, { "epoch": 0.41, "learning_rate": 0.0004698947368421052, "loss": 0.6863, "step": 40520 }, { "epoch": 0.41, "learning_rate": 0.0004698157894736842, "loss": 0.6855, "step": 40530 }, { "epoch": 0.41, "learning_rate": 0.00046973684210526313, "loss": 0.6874, "step": 40540 }, { "epoch": 0.41, "learning_rate": 0.0004696578947368421, "loss": 0.7044, "step": 40550 }, { "epoch": 0.41, "learning_rate": 0.000469578947368421, "loss": 0.6961, "step": 40560 }, { "epoch": 0.41, "learning_rate": 0.0004694999999999999, "loss": 0.695, "step": 40570 }, { "epoch": 0.41, "learning_rate": 0.0004694210526315789, "loss": 0.6925, "step": 40580 }, { "epoch": 0.41, "learning_rate": 0.0004693421052631578, "loss": 0.6871, "step": 40590 }, { "epoch": 0.41, "learning_rate": 0.0004692631578947368, "loss": 0.6961, "step": 40600 }, { "epoch": 0.41, "learning_rate": 0.0004691842105263157, "loss": 0.6917, "step": 40610 }, { "epoch": 0.41, "learning_rate": 0.0004691052631578947, "loss": 0.7046, "step": 40620 }, { "epoch": 0.41, "learning_rate": 0.00046902631578947366, "loss": 0.6964, "step": 40630 }, { "epoch": 0.41, "learning_rate": 0.00046894736842105256, "loss": 0.7025, "step": 40640 }, { "epoch": 0.41, "learning_rate": 0.00046886842105263156, "loss": 0.697, "step": 40650 }, { "epoch": 0.41, "learning_rate": 0.00046878947368421045, "loss": 0.7004, "step": 40660 }, { "epoch": 0.41, "learning_rate": 0.00046871052631578946, "loss": 0.6905, "step": 40670 }, { "epoch": 0.41, "learning_rate": 0.00046863157894736835, "loss": 0.6974, "step": 40680 }, { "epoch": 0.41, "learning_rate": 0.00046855263157894735, "loss": 0.6892, "step": 40690 }, { "epoch": 0.41, "learning_rate": 0.00046847368421052625, "loss": 0.703, "step": 40700 }, { "epoch": 0.41, "learning_rate": 0.0004683947368421052, "loss": 0.6959, "step": 40710 }, { "epoch": 0.41, "learning_rate": 0.0004683157894736842, "loss": 0.7007, "step": 40720 }, { "epoch": 0.41, "learning_rate": 0.0004682368421052631, "loss": 0.7028, "step": 40730 }, { "epoch": 0.41, "learning_rate": 0.0004681578947368421, "loss": 0.6976, "step": 40740 }, { "epoch": 0.41, "learning_rate": 0.000468078947368421, "loss": 0.6759, "step": 40750 }, { "epoch": 0.41, "learning_rate": 0.000468, "loss": 0.687, "step": 40760 }, { "epoch": 0.41, "learning_rate": 0.0004679210526315789, "loss": 0.691, "step": 40770 }, { "epoch": 0.41, "learning_rate": 0.0004678421052631579, "loss": 0.7051, "step": 40780 }, { "epoch": 0.41, "learning_rate": 0.0004677631578947368, "loss": 0.6868, "step": 40790 }, { "epoch": 0.41, "learning_rate": 0.00046768421052631573, "loss": 0.6897, "step": 40800 }, { "epoch": 0.41, "learning_rate": 0.00046760526315789473, "loss": 0.6961, "step": 40810 }, { "epoch": 0.41, "learning_rate": 0.00046752631578947363, "loss": 0.6947, "step": 40820 }, { "epoch": 0.41, "learning_rate": 0.00046744736842105263, "loss": 0.6867, "step": 40830 }, { "epoch": 0.41, "learning_rate": 0.0004673684210526315, "loss": 0.7068, "step": 40840 }, { "epoch": 0.41, "learning_rate": 0.0004672894736842105, "loss": 0.6843, "step": 40850 }, { "epoch": 0.41, "learning_rate": 0.0004672105263157894, "loss": 0.6921, "step": 40860 }, { "epoch": 0.41, "learning_rate": 0.00046713157894736837, "loss": 0.6965, "step": 40870 }, { "epoch": 0.41, "learning_rate": 0.0004670526315789473, "loss": 0.6805, "step": 40880 }, { "epoch": 0.41, "learning_rate": 0.00046697368421052627, "loss": 0.6899, "step": 40890 }, { "epoch": 0.41, "learning_rate": 0.00046689473684210527, "loss": 0.7008, "step": 40900 }, { "epoch": 0.41, "learning_rate": 0.00046681578947368416, "loss": 0.6849, "step": 40910 }, { "epoch": 0.41, "learning_rate": 0.00046673684210526316, "loss": 0.6882, "step": 40920 }, { "epoch": 0.41, "learning_rate": 0.00046665789473684206, "loss": 0.6899, "step": 40930 }, { "epoch": 0.41, "learning_rate": 0.000466578947368421, "loss": 0.6999, "step": 40940 }, { "epoch": 0.41, "learning_rate": 0.00046649999999999996, "loss": 0.6869, "step": 40950 }, { "epoch": 0.41, "learning_rate": 0.0004664210526315789, "loss": 0.6805, "step": 40960 }, { "epoch": 0.41, "learning_rate": 0.00046634210526315785, "loss": 0.7013, "step": 40970 }, { "epoch": 0.41, "learning_rate": 0.0004662631578947368, "loss": 0.7048, "step": 40980 }, { "epoch": 0.41, "learning_rate": 0.0004661842105263157, "loss": 0.6954, "step": 40990 }, { "epoch": 0.41, "learning_rate": 0.0004661052631578947, "loss": 0.6995, "step": 41000 }, { "epoch": 0.41, "learning_rate": 0.00046602631578947364, "loss": 0.6981, "step": 41010 }, { "epoch": 0.41, "learning_rate": 0.0004659473684210526, "loss": 0.6892, "step": 41020 }, { "epoch": 0.41, "learning_rate": 0.00046586842105263154, "loss": 0.7067, "step": 41030 }, { "epoch": 0.41, "learning_rate": 0.0004657973684210526, "loss": 0.7051, "step": 41040 }, { "epoch": 0.41, "learning_rate": 0.0004657184210526315, "loss": 0.6936, "step": 41050 }, { "epoch": 0.41, "learning_rate": 0.00046563947368421046, "loss": 0.6929, "step": 41060 }, { "epoch": 0.41, "learning_rate": 0.00046556052631578946, "loss": 0.6958, "step": 41070 }, { "epoch": 0.41, "learning_rate": 0.00046548157894736836, "loss": 0.689, "step": 41080 }, { "epoch": 0.41, "learning_rate": 0.00046540263157894736, "loss": 0.687, "step": 41090 }, { "epoch": 0.41, "learning_rate": 0.00046532368421052625, "loss": 0.6948, "step": 41100 }, { "epoch": 0.41, "learning_rate": 0.00046524473684210525, "loss": 0.6971, "step": 41110 }, { "epoch": 0.41, "learning_rate": 0.00046516578947368415, "loss": 0.7012, "step": 41120 }, { "epoch": 0.41, "learning_rate": 0.00046508684210526315, "loss": 0.6793, "step": 41130 }, { "epoch": 0.41, "learning_rate": 0.00046500789473684205, "loss": 0.6915, "step": 41140 }, { "epoch": 0.41, "learning_rate": 0.000464928947368421, "loss": 0.6676, "step": 41150 }, { "epoch": 0.41, "learning_rate": 0.00046485, "loss": 0.6703, "step": 41160 }, { "epoch": 0.41, "learning_rate": 0.0004647710526315789, "loss": 0.6636, "step": 41170 }, { "epoch": 0.41, "learning_rate": 0.0004646921052631579, "loss": 0.6597, "step": 41180 }, { "epoch": 0.41, "learning_rate": 0.0004646131578947368, "loss": 0.6886, "step": 41190 }, { "epoch": 0.41, "learning_rate": 0.0004645342105263158, "loss": 0.6793, "step": 41200 }, { "epoch": 0.41, "learning_rate": 0.0004644552631578947, "loss": 0.69, "step": 41210 }, { "epoch": 0.41, "learning_rate": 0.00046437631578947363, "loss": 0.6747, "step": 41220 }, { "epoch": 0.41, "learning_rate": 0.0004642973684210526, "loss": 0.686, "step": 41230 }, { "epoch": 0.41, "learning_rate": 0.00046421842105263153, "loss": 0.6928, "step": 41240 }, { "epoch": 0.41, "learning_rate": 0.00046413947368421053, "loss": 0.7013, "step": 41250 }, { "epoch": 0.41, "learning_rate": 0.0004640605263157894, "loss": 0.695, "step": 41260 }, { "epoch": 0.41, "learning_rate": 0.00046398157894736843, "loss": 0.6906, "step": 41270 }, { "epoch": 0.41, "learning_rate": 0.0004639026315789473, "loss": 0.7013, "step": 41280 }, { "epoch": 0.41, "learning_rate": 0.00046382368421052627, "loss": 0.6922, "step": 41290 }, { "epoch": 0.41, "learning_rate": 0.0004637447368421052, "loss": 0.7055, "step": 41300 }, { "epoch": 0.41, "learning_rate": 0.00046366578947368417, "loss": 0.7107, "step": 41310 }, { "epoch": 0.41, "learning_rate": 0.0004635868421052631, "loss": 0.6923, "step": 41320 }, { "epoch": 0.41, "learning_rate": 0.00046350789473684206, "loss": 0.7092, "step": 41330 }, { "epoch": 0.41, "learning_rate": 0.00046342894736842107, "loss": 0.6952, "step": 41340 }, { "epoch": 0.41, "learning_rate": 0.00046334999999999996, "loss": 0.6947, "step": 41350 }, { "epoch": 0.41, "learning_rate": 0.0004632710526315789, "loss": 0.682, "step": 41360 }, { "epoch": 0.41, "learning_rate": 0.00046319210526315786, "loss": 0.6881, "step": 41370 }, { "epoch": 0.41, "learning_rate": 0.0004631131578947368, "loss": 0.6981, "step": 41380 }, { "epoch": 0.41, "learning_rate": 0.00046303421052631575, "loss": 0.6984, "step": 41390 }, { "epoch": 0.41, "learning_rate": 0.0004629552631578947, "loss": 0.6828, "step": 41400 }, { "epoch": 0.41, "learning_rate": 0.0004628763157894736, "loss": 0.6994, "step": 41410 }, { "epoch": 0.41, "learning_rate": 0.0004627973684210526, "loss": 0.7045, "step": 41420 }, { "epoch": 0.41, "learning_rate": 0.00046271842105263155, "loss": 0.6925, "step": 41430 }, { "epoch": 0.41, "learning_rate": 0.0004626394736842105, "loss": 0.6868, "step": 41440 }, { "epoch": 0.41, "learning_rate": 0.00046256052631578944, "loss": 0.6992, "step": 41450 }, { "epoch": 0.41, "learning_rate": 0.0004624815789473684, "loss": 0.7059, "step": 41460 }, { "epoch": 0.41, "learning_rate": 0.00046240263157894734, "loss": 0.7006, "step": 41470 }, { "epoch": 0.41, "learning_rate": 0.00046232368421052623, "loss": 0.7029, "step": 41480 }, { "epoch": 0.41, "learning_rate": 0.00046224473684210524, "loss": 0.6921, "step": 41490 }, { "epoch": 0.41, "learning_rate": 0.00046216578947368413, "loss": 0.6974, "step": 41500 }, { "epoch": 0.42, "learning_rate": 0.00046208684210526313, "loss": 0.7081, "step": 41510 }, { "epoch": 0.42, "learning_rate": 0.000462007894736842, "loss": 0.6967, "step": 41520 }, { "epoch": 0.42, "learning_rate": 0.00046192894736842103, "loss": 0.6858, "step": 41530 }, { "epoch": 0.42, "learning_rate": 0.00046185, "loss": 0.6899, "step": 41540 }, { "epoch": 0.42, "learning_rate": 0.00046177105263157887, "loss": 0.7043, "step": 41550 }, { "epoch": 0.42, "learning_rate": 0.0004616921052631579, "loss": 0.7068, "step": 41560 }, { "epoch": 0.42, "learning_rate": 0.00046161315789473677, "loss": 0.6884, "step": 41570 }, { "epoch": 0.42, "learning_rate": 0.00046153421052631577, "loss": 0.6911, "step": 41580 }, { "epoch": 0.42, "learning_rate": 0.00046145526315789466, "loss": 0.6755, "step": 41590 }, { "epoch": 0.42, "learning_rate": 0.00046137631578947367, "loss": 0.6848, "step": 41600 }, { "epoch": 0.42, "learning_rate": 0.00046129736842105256, "loss": 0.6875, "step": 41610 }, { "epoch": 0.42, "learning_rate": 0.00046121842105263156, "loss": 0.6835, "step": 41620 }, { "epoch": 0.42, "learning_rate": 0.0004611394736842105, "loss": 0.6804, "step": 41630 }, { "epoch": 0.42, "learning_rate": 0.0004610605263157894, "loss": 0.7054, "step": 41640 }, { "epoch": 0.42, "learning_rate": 0.0004609815789473684, "loss": 0.7074, "step": 41650 }, { "epoch": 0.42, "learning_rate": 0.0004609026315789473, "loss": 0.7067, "step": 41660 }, { "epoch": 0.42, "learning_rate": 0.0004608236842105263, "loss": 0.689, "step": 41670 }, { "epoch": 0.42, "learning_rate": 0.0004607447368421052, "loss": 0.6833, "step": 41680 }, { "epoch": 0.42, "learning_rate": 0.0004606657894736842, "loss": 0.694, "step": 41690 }, { "epoch": 0.42, "learning_rate": 0.0004605868421052631, "loss": 0.6868, "step": 41700 }, { "epoch": 0.42, "learning_rate": 0.00046050789473684204, "loss": 0.6911, "step": 41710 }, { "epoch": 0.42, "learning_rate": 0.00046042894736842105, "loss": 0.6926, "step": 41720 }, { "epoch": 0.42, "learning_rate": 0.00046034999999999994, "loss": 0.7007, "step": 41730 }, { "epoch": 0.42, "learning_rate": 0.00046027105263157894, "loss": 0.6976, "step": 41740 }, { "epoch": 0.42, "learning_rate": 0.00046019210526315784, "loss": 0.6939, "step": 41750 }, { "epoch": 0.42, "learning_rate": 0.00046011315789473684, "loss": 0.6798, "step": 41760 }, { "epoch": 0.42, "learning_rate": 0.00046003421052631573, "loss": 0.6686, "step": 41770 }, { "epoch": 0.42, "learning_rate": 0.0004599552631578947, "loss": 0.6657, "step": 41780 }, { "epoch": 0.42, "learning_rate": 0.00045987631578947363, "loss": 0.6722, "step": 41790 }, { "epoch": 0.42, "learning_rate": 0.0004597973684210526, "loss": 0.667, "step": 41800 }, { "epoch": 0.42, "learning_rate": 0.0004597184210526316, "loss": 0.702, "step": 41810 }, { "epoch": 0.42, "learning_rate": 0.0004596394736842105, "loss": 0.6961, "step": 41820 }, { "epoch": 0.42, "learning_rate": 0.0004595605263157895, "loss": 0.7083, "step": 41830 }, { "epoch": 0.42, "learning_rate": 0.00045948157894736837, "loss": 0.7035, "step": 41840 }, { "epoch": 0.42, "learning_rate": 0.0004594026315789473, "loss": 0.6927, "step": 41850 }, { "epoch": 0.42, "learning_rate": 0.00045932368421052627, "loss": 0.7025, "step": 41860 }, { "epoch": 0.42, "learning_rate": 0.0004592447368421052, "loss": 0.7172, "step": 41870 }, { "epoch": 0.42, "learning_rate": 0.00045916578947368417, "loss": 0.689, "step": 41880 }, { "epoch": 0.42, "learning_rate": 0.0004590868421052631, "loss": 0.7104, "step": 41890 }, { "epoch": 0.42, "learning_rate": 0.0004590078947368421, "loss": 0.6763, "step": 41900 }, { "epoch": 0.42, "learning_rate": 0.000458928947368421, "loss": 0.6951, "step": 41910 }, { "epoch": 0.42, "learning_rate": 0.00045884999999999996, "loss": 0.6861, "step": 41920 }, { "epoch": 0.42, "learning_rate": 0.0004587710526315789, "loss": 0.6988, "step": 41930 }, { "epoch": 0.42, "learning_rate": 0.00045869210526315786, "loss": 0.705, "step": 41940 }, { "epoch": 0.42, "learning_rate": 0.0004586131578947368, "loss": 0.6976, "step": 41950 }, { "epoch": 0.42, "learning_rate": 0.00045853421052631575, "loss": 0.6956, "step": 41960 }, { "epoch": 0.42, "learning_rate": 0.00045845526315789465, "loss": 0.7138, "step": 41970 }, { "epoch": 0.42, "learning_rate": 0.00045837631578947365, "loss": 0.6899, "step": 41980 }, { "epoch": 0.42, "learning_rate": 0.00045829736842105265, "loss": 0.6924, "step": 41990 }, { "epoch": 0.42, "learning_rate": 0.00045821842105263155, "loss": 0.6994, "step": 42000 }, { "epoch": 0.42, "learning_rate": 0.0004581394736842105, "loss": 0.6916, "step": 42010 }, { "epoch": 0.42, "learning_rate": 0.00045806052631578944, "loss": 0.6946, "step": 42020 }, { "epoch": 0.42, "learning_rate": 0.0004579815789473684, "loss": 0.6987, "step": 42030 }, { "epoch": 0.42, "learning_rate": 0.00045791052631578947, "loss": 0.6817, "step": 42040 }, { "epoch": 0.42, "learning_rate": 0.00045783157894736836, "loss": 0.6914, "step": 42050 }, { "epoch": 0.42, "learning_rate": 0.0004577526315789473, "loss": 0.6875, "step": 42060 }, { "epoch": 0.42, "learning_rate": 0.0004576736842105263, "loss": 0.6995, "step": 42070 }, { "epoch": 0.42, "learning_rate": 0.0004575947368421052, "loss": 0.6953, "step": 42080 }, { "epoch": 0.42, "learning_rate": 0.0004575157894736842, "loss": 0.6829, "step": 42090 }, { "epoch": 0.42, "learning_rate": 0.0004574368421052631, "loss": 0.6764, "step": 42100 }, { "epoch": 0.42, "learning_rate": 0.0004573578947368421, "loss": 0.6876, "step": 42110 }, { "epoch": 0.42, "learning_rate": 0.000457278947368421, "loss": 0.705, "step": 42120 }, { "epoch": 0.42, "learning_rate": 0.00045719999999999995, "loss": 0.6856, "step": 42130 }, { "epoch": 0.42, "learning_rate": 0.0004571210526315789, "loss": 0.6913, "step": 42140 }, { "epoch": 0.42, "learning_rate": 0.00045704210526315784, "loss": 0.6964, "step": 42150 }, { "epoch": 0.42, "learning_rate": 0.00045696315789473684, "loss": 0.6934, "step": 42160 }, { "epoch": 0.42, "learning_rate": 0.00045688421052631574, "loss": 0.6899, "step": 42170 }, { "epoch": 0.42, "learning_rate": 0.00045680526315789474, "loss": 0.693, "step": 42180 }, { "epoch": 0.42, "learning_rate": 0.00045672631578947364, "loss": 0.684, "step": 42190 }, { "epoch": 0.42, "learning_rate": 0.0004566473684210526, "loss": 0.6944, "step": 42200 }, { "epoch": 0.42, "learning_rate": 0.00045656842105263153, "loss": 0.6775, "step": 42210 }, { "epoch": 0.42, "learning_rate": 0.0004564894736842105, "loss": 0.6865, "step": 42220 }, { "epoch": 0.42, "learning_rate": 0.00045641052631578943, "loss": 0.6876, "step": 42230 }, { "epoch": 0.42, "learning_rate": 0.0004563315789473684, "loss": 0.6855, "step": 42240 }, { "epoch": 0.42, "learning_rate": 0.0004562526315789474, "loss": 0.6881, "step": 42250 }, { "epoch": 0.42, "learning_rate": 0.0004561736842105263, "loss": 0.6939, "step": 42260 }, { "epoch": 0.42, "learning_rate": 0.0004560947368421052, "loss": 0.6921, "step": 42270 }, { "epoch": 0.42, "learning_rate": 0.00045601578947368417, "loss": 0.6904, "step": 42280 }, { "epoch": 0.42, "learning_rate": 0.0004559368421052631, "loss": 0.6913, "step": 42290 }, { "epoch": 0.42, "learning_rate": 0.00045585789473684207, "loss": 0.6889, "step": 42300 }, { "epoch": 0.42, "learning_rate": 0.000455778947368421, "loss": 0.675, "step": 42310 }, { "epoch": 0.42, "learning_rate": 0.0004556999999999999, "loss": 0.6869, "step": 42320 }, { "epoch": 0.42, "learning_rate": 0.0004556210526315789, "loss": 0.6844, "step": 42330 }, { "epoch": 0.42, "learning_rate": 0.00045554210526315786, "loss": 0.6827, "step": 42340 }, { "epoch": 0.42, "learning_rate": 0.0004554631578947368, "loss": 0.6916, "step": 42350 }, { "epoch": 0.42, "learning_rate": 0.00045538421052631576, "loss": 0.6821, "step": 42360 }, { "epoch": 0.42, "learning_rate": 0.0004553052631578947, "loss": 0.6998, "step": 42370 }, { "epoch": 0.42, "learning_rate": 0.00045522631578947365, "loss": 0.6859, "step": 42380 }, { "epoch": 0.42, "learning_rate": 0.00045514736842105255, "loss": 0.6947, "step": 42390 }, { "epoch": 0.42, "learning_rate": 0.00045506842105263155, "loss": 0.685, "step": 42400 }, { "epoch": 0.42, "learning_rate": 0.00045498947368421044, "loss": 0.6817, "step": 42410 }, { "epoch": 0.42, "learning_rate": 0.00045491052631578945, "loss": 0.6796, "step": 42420 }, { "epoch": 0.42, "learning_rate": 0.0004548315789473684, "loss": 0.6856, "step": 42430 }, { "epoch": 0.42, "learning_rate": 0.00045475263157894734, "loss": 0.699, "step": 42440 }, { "epoch": 0.42, "learning_rate": 0.0004546736842105263, "loss": 0.6971, "step": 42450 }, { "epoch": 0.42, "learning_rate": 0.0004545947368421052, "loss": 0.6875, "step": 42460 }, { "epoch": 0.42, "learning_rate": 0.0004545157894736842, "loss": 0.6906, "step": 42470 }, { "epoch": 0.42, "learning_rate": 0.0004544368421052631, "loss": 0.6809, "step": 42480 }, { "epoch": 0.42, "learning_rate": 0.0004543578947368421, "loss": 0.7047, "step": 42490 }, { "epoch": 0.42, "learning_rate": 0.000454278947368421, "loss": 0.6838, "step": 42500 }, { "epoch": 0.42, "eval_accuracy": 0.8575258035447281, "eval_loss": 0.677734375, "eval_runtime": 96.7246, "eval_samples_per_second": 827.09, "eval_steps_per_second": 1.623, "step": 42500 }, { "epoch": 0.43, "learning_rate": 0.0004542, "loss": 0.6802, "step": 42510 }, { "epoch": 0.43, "learning_rate": 0.0004541210526315789, "loss": 0.6877, "step": 42520 }, { "epoch": 0.43, "learning_rate": 0.0004540421052631579, "loss": 0.6861, "step": 42530 }, { "epoch": 0.43, "learning_rate": 0.0004539631578947368, "loss": 0.6852, "step": 42540 }, { "epoch": 0.43, "learning_rate": 0.0004538842105263157, "loss": 0.6799, "step": 42550 }, { "epoch": 0.43, "learning_rate": 0.0004538052631578947, "loss": 0.6797, "step": 42560 }, { "epoch": 0.43, "learning_rate": 0.0004537263157894736, "loss": 0.6812, "step": 42570 }, { "epoch": 0.43, "learning_rate": 0.0004536473684210526, "loss": 0.6601, "step": 42580 }, { "epoch": 0.43, "learning_rate": 0.0004535684210526315, "loss": 0.675, "step": 42590 }, { "epoch": 0.43, "learning_rate": 0.0004534894736842105, "loss": 0.6841, "step": 42600 }, { "epoch": 0.43, "learning_rate": 0.0004534105263157894, "loss": 0.6829, "step": 42610 }, { "epoch": 0.43, "learning_rate": 0.00045333157894736836, "loss": 0.6653, "step": 42620 }, { "epoch": 0.43, "learning_rate": 0.00045325263157894736, "loss": 0.6829, "step": 42630 }, { "epoch": 0.43, "learning_rate": 0.00045317368421052626, "loss": 0.6849, "step": 42640 }, { "epoch": 0.43, "learning_rate": 0.00045309473684210526, "loss": 0.6735, "step": 42650 }, { "epoch": 0.43, "learning_rate": 0.00045301578947368415, "loss": 0.6897, "step": 42660 }, { "epoch": 0.43, "learning_rate": 0.00045293684210526315, "loss": 0.6926, "step": 42670 }, { "epoch": 0.43, "learning_rate": 0.00045285789473684205, "loss": 0.6934, "step": 42680 }, { "epoch": 0.43, "learning_rate": 0.000452778947368421, "loss": 0.6712, "step": 42690 }, { "epoch": 0.43, "learning_rate": 0.00045269999999999994, "loss": 0.6946, "step": 42700 }, { "epoch": 0.43, "learning_rate": 0.0004526210526315789, "loss": 0.673, "step": 42710 }, { "epoch": 0.43, "learning_rate": 0.0004525421052631579, "loss": 0.6845, "step": 42720 }, { "epoch": 0.43, "learning_rate": 0.0004524631578947368, "loss": 0.6793, "step": 42730 }, { "epoch": 0.43, "learning_rate": 0.0004523842105263158, "loss": 0.6748, "step": 42740 }, { "epoch": 0.43, "learning_rate": 0.0004523052631578947, "loss": 0.6857, "step": 42750 }, { "epoch": 0.43, "learning_rate": 0.00045222631578947363, "loss": 0.6747, "step": 42760 }, { "epoch": 0.43, "learning_rate": 0.0004521473684210526, "loss": 0.6798, "step": 42770 }, { "epoch": 0.43, "learning_rate": 0.00045206842105263153, "loss": 0.6589, "step": 42780 }, { "epoch": 0.43, "learning_rate": 0.0004519894736842105, "loss": 0.6727, "step": 42790 }, { "epoch": 0.43, "learning_rate": 0.00045191052631578943, "loss": 0.6831, "step": 42800 }, { "epoch": 0.43, "learning_rate": 0.00045183157894736843, "loss": 0.6783, "step": 42810 }, { "epoch": 0.43, "learning_rate": 0.0004517526315789473, "loss": 0.6891, "step": 42820 }, { "epoch": 0.43, "learning_rate": 0.00045167368421052627, "loss": 0.6791, "step": 42830 }, { "epoch": 0.43, "learning_rate": 0.0004515947368421052, "loss": 0.674, "step": 42840 }, { "epoch": 0.43, "learning_rate": 0.00045151578947368417, "loss": 0.6715, "step": 42850 }, { "epoch": 0.43, "learning_rate": 0.0004514368421052631, "loss": 0.6802, "step": 42860 }, { "epoch": 0.43, "learning_rate": 0.00045135789473684207, "loss": 0.6719, "step": 42870 }, { "epoch": 0.43, "learning_rate": 0.00045127894736842096, "loss": 0.6785, "step": 42880 }, { "epoch": 0.43, "learning_rate": 0.00045119999999999996, "loss": 0.6824, "step": 42890 }, { "epoch": 0.43, "learning_rate": 0.00045112105263157897, "loss": 0.6907, "step": 42900 }, { "epoch": 0.43, "learning_rate": 0.00045104210526315786, "loss": 0.6926, "step": 42910 }, { "epoch": 0.43, "learning_rate": 0.0004509631578947368, "loss": 0.6813, "step": 42920 }, { "epoch": 0.43, "learning_rate": 0.00045088421052631576, "loss": 0.6878, "step": 42930 }, { "epoch": 0.43, "learning_rate": 0.0004508052631578947, "loss": 0.6756, "step": 42940 }, { "epoch": 0.43, "learning_rate": 0.0004507263157894736, "loss": 0.6811, "step": 42950 }, { "epoch": 0.43, "learning_rate": 0.0004506473684210526, "loss": 0.6798, "step": 42960 }, { "epoch": 0.43, "learning_rate": 0.0004505684210526315, "loss": 0.6847, "step": 42970 }, { "epoch": 0.43, "learning_rate": 0.0004504894736842105, "loss": 0.6842, "step": 42980 }, { "epoch": 0.43, "learning_rate": 0.00045041052631578945, "loss": 0.6751, "step": 42990 }, { "epoch": 0.43, "learning_rate": 0.0004503315789473684, "loss": 0.6784, "step": 43000 }, { "epoch": 0.43, "learning_rate": 0.00045025263157894734, "loss": 0.6883, "step": 43010 }, { "epoch": 0.43, "learning_rate": 0.0004501736842105263, "loss": 0.676, "step": 43020 }, { "epoch": 0.43, "learning_rate": 0.00045009473684210524, "loss": 0.6871, "step": 43030 }, { "epoch": 0.43, "learning_rate": 0.00045001578947368413, "loss": 0.683, "step": 43040 }, { "epoch": 0.43, "learning_rate": 0.0004499447368421052, "loss": 0.6733, "step": 43050 }, { "epoch": 0.43, "learning_rate": 0.00044986578947368416, "loss": 0.684, "step": 43060 }, { "epoch": 0.43, "learning_rate": 0.00044978684210526316, "loss": 0.6839, "step": 43070 }, { "epoch": 0.43, "learning_rate": 0.00044970789473684205, "loss": 0.6992, "step": 43080 }, { "epoch": 0.43, "learning_rate": 0.00044962894736842106, "loss": 0.6915, "step": 43090 }, { "epoch": 0.43, "learning_rate": 0.00044954999999999995, "loss": 0.6869, "step": 43100 }, { "epoch": 0.43, "learning_rate": 0.0004494710526315789, "loss": 0.6782, "step": 43110 }, { "epoch": 0.43, "learning_rate": 0.00044939210526315785, "loss": 0.6847, "step": 43120 }, { "epoch": 0.43, "learning_rate": 0.0004493131578947368, "loss": 0.6886, "step": 43130 }, { "epoch": 0.43, "learning_rate": 0.00044923421052631574, "loss": 0.6848, "step": 43140 }, { "epoch": 0.43, "learning_rate": 0.0004491552631578947, "loss": 0.6659, "step": 43150 }, { "epoch": 0.43, "learning_rate": 0.0004490763157894737, "loss": 0.686, "step": 43160 }, { "epoch": 0.43, "learning_rate": 0.0004489973684210526, "loss": 0.678, "step": 43170 }, { "epoch": 0.43, "learning_rate": 0.00044891842105263154, "loss": 0.6871, "step": 43180 }, { "epoch": 0.43, "learning_rate": 0.0004488394736842105, "loss": 0.6864, "step": 43190 }, { "epoch": 0.43, "learning_rate": 0.00044876052631578943, "loss": 0.6782, "step": 43200 }, { "epoch": 0.43, "learning_rate": 0.0004486815789473684, "loss": 0.6772, "step": 43210 }, { "epoch": 0.43, "learning_rate": 0.00044860263157894733, "loss": 0.6797, "step": 43220 }, { "epoch": 0.43, "learning_rate": 0.0004485236842105262, "loss": 0.6743, "step": 43230 }, { "epoch": 0.43, "learning_rate": 0.0004484447368421052, "loss": 0.6811, "step": 43240 }, { "epoch": 0.43, "learning_rate": 0.0004483657894736842, "loss": 0.6851, "step": 43250 }, { "epoch": 0.43, "learning_rate": 0.0004482868421052631, "loss": 0.6763, "step": 43260 }, { "epoch": 0.43, "learning_rate": 0.00044820789473684207, "loss": 0.6883, "step": 43270 }, { "epoch": 0.43, "learning_rate": 0.000448128947368421, "loss": 0.6758, "step": 43280 }, { "epoch": 0.43, "learning_rate": 0.00044804999999999997, "loss": 0.6941, "step": 43290 }, { "epoch": 0.43, "learning_rate": 0.00044797105263157886, "loss": 0.6786, "step": 43300 }, { "epoch": 0.43, "learning_rate": 0.00044789210526315786, "loss": 0.6783, "step": 43310 }, { "epoch": 0.43, "learning_rate": 0.00044781315789473676, "loss": 0.6819, "step": 43320 }, { "epoch": 0.43, "learning_rate": 0.00044773421052631576, "loss": 0.6831, "step": 43330 }, { "epoch": 0.43, "learning_rate": 0.0004476552631578947, "loss": 0.6831, "step": 43340 }, { "epoch": 0.43, "learning_rate": 0.00044757631578947366, "loss": 0.6886, "step": 43350 }, { "epoch": 0.43, "learning_rate": 0.0004474973684210526, "loss": 0.6761, "step": 43360 }, { "epoch": 0.43, "learning_rate": 0.0004474184210526315, "loss": 0.6773, "step": 43370 }, { "epoch": 0.43, "learning_rate": 0.0004473394736842105, "loss": 0.684, "step": 43380 }, { "epoch": 0.43, "learning_rate": 0.0004472605263157894, "loss": 0.6911, "step": 43390 }, { "epoch": 0.43, "learning_rate": 0.0004471815789473684, "loss": 0.6846, "step": 43400 }, { "epoch": 0.43, "learning_rate": 0.0004471026315789473, "loss": 0.6824, "step": 43410 }, { "epoch": 0.43, "learning_rate": 0.0004470236842105263, "loss": 0.679, "step": 43420 }, { "epoch": 0.43, "learning_rate": 0.00044694473684210524, "loss": 0.6858, "step": 43430 }, { "epoch": 0.43, "learning_rate": 0.0004468657894736842, "loss": 0.6865, "step": 43440 }, { "epoch": 0.43, "learning_rate": 0.00044678684210526314, "loss": 0.6831, "step": 43450 }, { "epoch": 0.43, "learning_rate": 0.00044670789473684203, "loss": 0.6761, "step": 43460 }, { "epoch": 0.43, "learning_rate": 0.00044662894736842104, "loss": 0.6728, "step": 43470 }, { "epoch": 0.43, "learning_rate": 0.00044654999999999993, "loss": 0.6763, "step": 43480 }, { "epoch": 0.43, "learning_rate": 0.00044647105263157893, "loss": 0.6937, "step": 43490 }, { "epoch": 0.43, "learning_rate": 0.00044639210526315783, "loss": 0.6782, "step": 43500 }, { "epoch": 0.44, "learning_rate": 0.00044631315789473683, "loss": 0.6802, "step": 43510 }, { "epoch": 0.44, "learning_rate": 0.0004462342105263158, "loss": 0.6843, "step": 43520 }, { "epoch": 0.44, "learning_rate": 0.00044615526315789467, "loss": 0.6806, "step": 43530 }, { "epoch": 0.44, "learning_rate": 0.0004460763157894737, "loss": 0.6702, "step": 43540 }, { "epoch": 0.44, "learning_rate": 0.00044599736842105257, "loss": 0.673, "step": 43550 }, { "epoch": 0.44, "learning_rate": 0.00044591842105263157, "loss": 0.6804, "step": 43560 }, { "epoch": 0.44, "learning_rate": 0.00044583947368421047, "loss": 0.6717, "step": 43570 }, { "epoch": 0.44, "learning_rate": 0.00044576052631578947, "loss": 0.6729, "step": 43580 }, { "epoch": 0.44, "learning_rate": 0.00044568157894736836, "loss": 0.6647, "step": 43590 }, { "epoch": 0.44, "learning_rate": 0.0004456026315789473, "loss": 0.666, "step": 43600 }, { "epoch": 0.44, "learning_rate": 0.00044552368421052626, "loss": 0.6766, "step": 43610 }, { "epoch": 0.44, "learning_rate": 0.0004454447368421052, "loss": 0.6738, "step": 43620 }, { "epoch": 0.44, "learning_rate": 0.0004453657894736842, "loss": 0.6589, "step": 43630 }, { "epoch": 0.44, "learning_rate": 0.0004452868421052631, "loss": 0.6726, "step": 43640 }, { "epoch": 0.44, "learning_rate": 0.0004452078947368421, "loss": 0.6659, "step": 43650 }, { "epoch": 0.44, "learning_rate": 0.000445128947368421, "loss": 0.6782, "step": 43660 }, { "epoch": 0.44, "learning_rate": 0.00044504999999999995, "loss": 0.6578, "step": 43670 }, { "epoch": 0.44, "learning_rate": 0.0004449710526315789, "loss": 0.6786, "step": 43680 }, { "epoch": 0.44, "learning_rate": 0.00044489210526315785, "loss": 0.6603, "step": 43690 }, { "epoch": 0.44, "learning_rate": 0.0004448131578947368, "loss": 0.655, "step": 43700 }, { "epoch": 0.44, "learning_rate": 0.00044473421052631574, "loss": 0.6451, "step": 43710 }, { "epoch": 0.44, "learning_rate": 0.00044465526315789474, "loss": 0.6692, "step": 43720 }, { "epoch": 0.44, "learning_rate": 0.00044457631578947364, "loss": 0.6575, "step": 43730 }, { "epoch": 0.44, "learning_rate": 0.0004444973684210526, "loss": 0.6682, "step": 43740 }, { "epoch": 0.44, "learning_rate": 0.00044441842105263154, "loss": 0.6701, "step": 43750 }, { "epoch": 0.44, "learning_rate": 0.0004443394736842105, "loss": 0.681, "step": 43760 }, { "epoch": 0.44, "learning_rate": 0.00044426052631578943, "loss": 0.6744, "step": 43770 }, { "epoch": 0.44, "learning_rate": 0.0004441815789473684, "loss": 0.6838, "step": 43780 }, { "epoch": 0.44, "learning_rate": 0.0004441026315789473, "loss": 0.6789, "step": 43790 }, { "epoch": 0.44, "learning_rate": 0.0004440236842105263, "loss": 0.6821, "step": 43800 }, { "epoch": 0.44, "learning_rate": 0.0004439447368421053, "loss": 0.6697, "step": 43810 }, { "epoch": 0.44, "learning_rate": 0.0004438657894736842, "loss": 0.6769, "step": 43820 }, { "epoch": 0.44, "learning_rate": 0.0004437868421052631, "loss": 0.6616, "step": 43830 }, { "epoch": 0.44, "learning_rate": 0.00044370789473684207, "loss": 0.6701, "step": 43840 }, { "epoch": 0.44, "learning_rate": 0.0004436368421052631, "loss": 0.6669, "step": 43850 }, { "epoch": 0.44, "learning_rate": 0.0004435578947368421, "loss": 0.6583, "step": 43860 }, { "epoch": 0.44, "learning_rate": 0.00044347894736842104, "loss": 0.6521, "step": 43870 }, { "epoch": 0.44, "learning_rate": 0.00044339999999999994, "loss": 0.6759, "step": 43880 }, { "epoch": 0.44, "learning_rate": 0.00044332105263157894, "loss": 0.6624, "step": 43890 }, { "epoch": 0.44, "learning_rate": 0.00044324210526315783, "loss": 0.6608, "step": 43900 }, { "epoch": 0.44, "learning_rate": 0.00044316315789473683, "loss": 0.6528, "step": 43910 }, { "epoch": 0.44, "learning_rate": 0.00044308421052631573, "loss": 0.6748, "step": 43920 }, { "epoch": 0.44, "learning_rate": 0.00044300526315789473, "loss": 0.6503, "step": 43930 }, { "epoch": 0.44, "learning_rate": 0.0004429263157894736, "loss": 0.6682, "step": 43940 }, { "epoch": 0.44, "learning_rate": 0.0004428473684210526, "loss": 0.6718, "step": 43950 }, { "epoch": 0.44, "learning_rate": 0.0004427684210526316, "loss": 0.6745, "step": 43960 }, { "epoch": 0.44, "learning_rate": 0.00044268947368421047, "loss": 0.6619, "step": 43970 }, { "epoch": 0.44, "learning_rate": 0.0004426105263157895, "loss": 0.6703, "step": 43980 }, { "epoch": 0.44, "learning_rate": 0.00044253157894736837, "loss": 0.6618, "step": 43990 }, { "epoch": 0.44, "learning_rate": 0.00044245263157894737, "loss": 0.6658, "step": 44000 }, { "epoch": 0.44, "learning_rate": 0.00044237368421052626, "loss": 0.6844, "step": 44010 }, { "epoch": 0.44, "learning_rate": 0.0004422947368421052, "loss": 0.6885, "step": 44020 }, { "epoch": 0.44, "learning_rate": 0.00044221578947368416, "loss": 0.6581, "step": 44030 }, { "epoch": 0.44, "learning_rate": 0.0004421368421052631, "loss": 0.6635, "step": 44040 }, { "epoch": 0.44, "learning_rate": 0.0004420578947368421, "loss": 0.6656, "step": 44050 }, { "epoch": 0.44, "learning_rate": 0.000441978947368421, "loss": 0.6661, "step": 44060 }, { "epoch": 0.44, "learning_rate": 0.0004419, "loss": 0.6651, "step": 44070 }, { "epoch": 0.44, "learning_rate": 0.0004418210526315789, "loss": 0.6614, "step": 44080 }, { "epoch": 0.44, "learning_rate": 0.00044174210526315785, "loss": 0.6768, "step": 44090 }, { "epoch": 0.44, "learning_rate": 0.0004416631578947368, "loss": 0.6879, "step": 44100 }, { "epoch": 0.44, "learning_rate": 0.00044158421052631575, "loss": 0.6754, "step": 44110 }, { "epoch": 0.44, "learning_rate": 0.0004415052631578947, "loss": 0.6872, "step": 44120 }, { "epoch": 0.44, "learning_rate": 0.00044142631578947364, "loss": 0.6782, "step": 44130 }, { "epoch": 0.44, "learning_rate": 0.00044134736842105254, "loss": 0.6851, "step": 44140 }, { "epoch": 0.44, "learning_rate": 0.00044126842105263154, "loss": 0.6729, "step": 44150 }, { "epoch": 0.44, "learning_rate": 0.0004411894736842105, "loss": 0.691, "step": 44160 }, { "epoch": 0.44, "learning_rate": 0.00044111052631578944, "loss": 0.6858, "step": 44170 }, { "epoch": 0.44, "learning_rate": 0.0004410315789473684, "loss": 0.6821, "step": 44180 }, { "epoch": 0.44, "learning_rate": 0.00044095263157894733, "loss": 0.6695, "step": 44190 }, { "epoch": 0.44, "learning_rate": 0.0004408736842105263, "loss": 0.6793, "step": 44200 }, { "epoch": 0.44, "learning_rate": 0.0004407947368421052, "loss": 0.6671, "step": 44210 }, { "epoch": 0.44, "learning_rate": 0.0004407157894736842, "loss": 0.6732, "step": 44220 }, { "epoch": 0.44, "learning_rate": 0.00044063684210526307, "loss": 0.6715, "step": 44230 }, { "epoch": 0.44, "learning_rate": 0.0004405578947368421, "loss": 0.6761, "step": 44240 }, { "epoch": 0.44, "learning_rate": 0.000440478947368421, "loss": 0.6714, "step": 44250 }, { "epoch": 0.44, "learning_rate": 0.00044039999999999997, "loss": 0.6802, "step": 44260 }, { "epoch": 0.44, "learning_rate": 0.0004403210526315789, "loss": 0.6855, "step": 44270 }, { "epoch": 0.44, "learning_rate": 0.0004402421052631578, "loss": 0.6826, "step": 44280 }, { "epoch": 0.44, "learning_rate": 0.0004401631578947368, "loss": 0.6783, "step": 44290 }, { "epoch": 0.44, "learning_rate": 0.0004400842105263157, "loss": 0.6795, "step": 44300 }, { "epoch": 0.44, "learning_rate": 0.0004400052631578947, "loss": 0.6872, "step": 44310 }, { "epoch": 0.44, "learning_rate": 0.0004399263157894736, "loss": 0.6884, "step": 44320 }, { "epoch": 0.44, "learning_rate": 0.0004398473684210526, "loss": 0.6806, "step": 44330 }, { "epoch": 0.44, "learning_rate": 0.00043976842105263156, "loss": 0.679, "step": 44340 }, { "epoch": 0.44, "learning_rate": 0.0004396894736842105, "loss": 0.6755, "step": 44350 }, { "epoch": 0.44, "learning_rate": 0.00043961052631578945, "loss": 0.692, "step": 44360 }, { "epoch": 0.44, "learning_rate": 0.00043953157894736835, "loss": 0.6801, "step": 44370 }, { "epoch": 0.44, "learning_rate": 0.00043945263157894735, "loss": 0.6728, "step": 44380 }, { "epoch": 0.44, "learning_rate": 0.00043937368421052624, "loss": 0.675, "step": 44390 }, { "epoch": 0.44, "learning_rate": 0.00043929473684210525, "loss": 0.669, "step": 44400 }, { "epoch": 0.44, "learning_rate": 0.00043921578947368414, "loss": 0.6866, "step": 44410 }, { "epoch": 0.44, "learning_rate": 0.00043913684210526314, "loss": 0.6831, "step": 44420 }, { "epoch": 0.44, "learning_rate": 0.0004390578947368421, "loss": 0.6796, "step": 44430 }, { "epoch": 0.44, "learning_rate": 0.000438978947368421, "loss": 0.69, "step": 44440 }, { "epoch": 0.44, "learning_rate": 0.0004389, "loss": 0.6708, "step": 44450 }, { "epoch": 0.44, "learning_rate": 0.0004388210526315789, "loss": 0.6892, "step": 44460 }, { "epoch": 0.44, "learning_rate": 0.0004387421052631579, "loss": 0.6883, "step": 44470 }, { "epoch": 0.44, "learning_rate": 0.0004386631578947368, "loss": 0.6838, "step": 44480 }, { "epoch": 0.44, "learning_rate": 0.0004385842105263158, "loss": 0.6874, "step": 44490 }, { "epoch": 0.45, "learning_rate": 0.0004385052631578947, "loss": 0.7018, "step": 44500 }, { "epoch": 0.45, "learning_rate": 0.0004384263157894736, "loss": 0.6789, "step": 44510 }, { "epoch": 0.45, "learning_rate": 0.0004383473684210526, "loss": 0.6831, "step": 44520 }, { "epoch": 0.45, "learning_rate": 0.0004382684210526315, "loss": 0.6841, "step": 44530 }, { "epoch": 0.45, "learning_rate": 0.0004381894736842105, "loss": 0.6891, "step": 44540 }, { "epoch": 0.45, "learning_rate": 0.0004381105263157894, "loss": 0.676, "step": 44550 }, { "epoch": 0.45, "learning_rate": 0.0004380315789473684, "loss": 0.6773, "step": 44560 }, { "epoch": 0.45, "learning_rate": 0.0004379526315789473, "loss": 0.6804, "step": 44570 }, { "epoch": 0.45, "learning_rate": 0.00043787368421052626, "loss": 0.6738, "step": 44580 }, { "epoch": 0.45, "learning_rate": 0.0004377947368421052, "loss": 0.6774, "step": 44590 }, { "epoch": 0.45, "learning_rate": 0.00043771578947368416, "loss": 0.6737, "step": 44600 }, { "epoch": 0.45, "learning_rate": 0.00043763684210526316, "loss": 0.6809, "step": 44610 }, { "epoch": 0.45, "learning_rate": 0.00043755789473684206, "loss": 0.6816, "step": 44620 }, { "epoch": 0.45, "learning_rate": 0.00043747894736842106, "loss": 0.6745, "step": 44630 }, { "epoch": 0.45, "learning_rate": 0.00043739999999999995, "loss": 0.6779, "step": 44640 }, { "epoch": 0.45, "learning_rate": 0.0004373210526315789, "loss": 0.6817, "step": 44650 }, { "epoch": 0.45, "learning_rate": 0.00043724210526315785, "loss": 0.6768, "step": 44660 }, { "epoch": 0.45, "learning_rate": 0.0004371631578947368, "loss": 0.6703, "step": 44670 }, { "epoch": 0.45, "learning_rate": 0.00043708421052631575, "loss": 0.674, "step": 44680 }, { "epoch": 0.45, "learning_rate": 0.0004370052631578947, "loss": 0.6787, "step": 44690 }, { "epoch": 0.45, "learning_rate": 0.0004369263157894736, "loss": 0.6882, "step": 44700 }, { "epoch": 0.45, "learning_rate": 0.0004368473684210526, "loss": 0.6762, "step": 44710 }, { "epoch": 0.45, "learning_rate": 0.0004367684210526316, "loss": 0.6687, "step": 44720 }, { "epoch": 0.45, "learning_rate": 0.0004366894736842105, "loss": 0.6868, "step": 44730 }, { "epoch": 0.45, "learning_rate": 0.00043661052631578944, "loss": 0.6633, "step": 44740 }, { "epoch": 0.45, "learning_rate": 0.0004365315789473684, "loss": 0.6732, "step": 44750 }, { "epoch": 0.45, "learning_rate": 0.00043645263157894733, "loss": 0.6595, "step": 44760 }, { "epoch": 0.45, "learning_rate": 0.0004363736842105262, "loss": 0.6707, "step": 44770 }, { "epoch": 0.45, "learning_rate": 0.00043629473684210523, "loss": 0.6553, "step": 44780 }, { "epoch": 0.45, "learning_rate": 0.0004362157894736841, "loss": 0.6675, "step": 44790 }, { "epoch": 0.45, "learning_rate": 0.0004361368421052631, "loss": 0.6593, "step": 44800 }, { "epoch": 0.45, "learning_rate": 0.0004360578947368421, "loss": 0.6639, "step": 44810 }, { "epoch": 0.45, "learning_rate": 0.000435978947368421, "loss": 0.6538, "step": 44820 }, { "epoch": 0.45, "learning_rate": 0.00043589999999999997, "loss": 0.6666, "step": 44830 }, { "epoch": 0.45, "learning_rate": 0.0004358210526315789, "loss": 0.6583, "step": 44840 }, { "epoch": 0.45, "learning_rate": 0.00043574210526315787, "loss": 0.6791, "step": 44850 }, { "epoch": 0.45, "learning_rate": 0.00043566315789473676, "loss": 0.6504, "step": 44860 }, { "epoch": 0.45, "learning_rate": 0.00043558421052631576, "loss": 0.6677, "step": 44870 }, { "epoch": 0.45, "learning_rate": 0.00043550526315789466, "loss": 0.651, "step": 44880 }, { "epoch": 0.45, "learning_rate": 0.00043542631578947366, "loss": 0.6632, "step": 44890 }, { "epoch": 0.45, "learning_rate": 0.0004353473684210526, "loss": 0.6563, "step": 44900 }, { "epoch": 0.45, "learning_rate": 0.00043526842105263156, "loss": 0.6656, "step": 44910 }, { "epoch": 0.45, "learning_rate": 0.0004351894736842105, "loss": 0.6628, "step": 44920 }, { "epoch": 0.45, "learning_rate": 0.0004351105263157894, "loss": 0.6647, "step": 44930 }, { "epoch": 0.45, "learning_rate": 0.0004350315789473684, "loss": 0.6707, "step": 44940 }, { "epoch": 0.45, "learning_rate": 0.0004349526315789473, "loss": 0.675, "step": 44950 }, { "epoch": 0.45, "learning_rate": 0.0004348736842105263, "loss": 0.6905, "step": 44960 }, { "epoch": 0.45, "learning_rate": 0.0004347947368421052, "loss": 0.6732, "step": 44970 }, { "epoch": 0.45, "learning_rate": 0.0004347157894736842, "loss": 0.6757, "step": 44980 }, { "epoch": 0.45, "learning_rate": 0.00043463684210526314, "loss": 0.685, "step": 44990 }, { "epoch": 0.45, "learning_rate": 0.00043455789473684204, "loss": 0.6752, "step": 45000 }, { "epoch": 0.45, "eval_accuracy": 0.8599255154050108, "eval_loss": 0.6640625, "eval_runtime": 97.1461, "eval_samples_per_second": 823.502, "eval_steps_per_second": 1.616, "step": 45000 }, { "epoch": 0.45, "learning_rate": 0.00043447894736842104, "loss": 0.6799, "step": 45010 }, { "epoch": 0.45, "learning_rate": 0.00043439999999999993, "loss": 0.6723, "step": 45020 }, { "epoch": 0.45, "learning_rate": 0.00043432105263157894, "loss": 0.6637, "step": 45030 }, { "epoch": 0.45, "learning_rate": 0.00043424210526315783, "loss": 0.6702, "step": 45040 }, { "epoch": 0.45, "learning_rate": 0.00043416315789473683, "loss": 0.6607, "step": 45050 }, { "epoch": 0.45, "learning_rate": 0.0004340842105263157, "loss": 0.6635, "step": 45060 }, { "epoch": 0.45, "learning_rate": 0.0004340052631578947, "loss": 0.675, "step": 45070 }, { "epoch": 0.45, "learning_rate": 0.0004339263157894737, "loss": 0.6741, "step": 45080 }, { "epoch": 0.45, "learning_rate": 0.00043384736842105257, "loss": 0.6663, "step": 45090 }, { "epoch": 0.45, "learning_rate": 0.0004337684210526316, "loss": 0.6532, "step": 45100 }, { "epoch": 0.45, "learning_rate": 0.00043368947368421047, "loss": 0.6647, "step": 45110 }, { "epoch": 0.45, "learning_rate": 0.00043361052631578947, "loss": 0.6682, "step": 45120 }, { "epoch": 0.45, "learning_rate": 0.00043353157894736837, "loss": 0.658, "step": 45130 }, { "epoch": 0.45, "learning_rate": 0.0004334526315789473, "loss": 0.6703, "step": 45140 }, { "epoch": 0.45, "learning_rate": 0.00043337368421052626, "loss": 0.6698, "step": 45150 }, { "epoch": 0.45, "learning_rate": 0.0004332947368421052, "loss": 0.6698, "step": 45160 }, { "epoch": 0.45, "learning_rate": 0.0004332157894736842, "loss": 0.6666, "step": 45170 }, { "epoch": 0.45, "learning_rate": 0.0004331368421052631, "loss": 0.6747, "step": 45180 }, { "epoch": 0.45, "learning_rate": 0.0004330578947368421, "loss": 0.6839, "step": 45190 }, { "epoch": 0.45, "learning_rate": 0.000432978947368421, "loss": 0.6637, "step": 45200 }, { "epoch": 0.45, "learning_rate": 0.0004329, "loss": 0.6625, "step": 45210 }, { "epoch": 0.45, "learning_rate": 0.0004328210526315789, "loss": 0.6749, "step": 45220 }, { "epoch": 0.45, "learning_rate": 0.00043274210526315785, "loss": 0.6785, "step": 45230 }, { "epoch": 0.45, "learning_rate": 0.0004326631578947368, "loss": 0.6743, "step": 45240 }, { "epoch": 0.45, "learning_rate": 0.00043258421052631574, "loss": 0.6759, "step": 45250 }, { "epoch": 0.45, "learning_rate": 0.00043250526315789464, "loss": 0.6694, "step": 45260 }, { "epoch": 0.45, "learning_rate": 0.00043242631578947364, "loss": 0.6657, "step": 45270 }, { "epoch": 0.45, "learning_rate": 0.00043234736842105264, "loss": 0.6585, "step": 45280 }, { "epoch": 0.45, "learning_rate": 0.00043226842105263154, "loss": 0.668, "step": 45290 }, { "epoch": 0.45, "learning_rate": 0.0004321894736842105, "loss": 0.6783, "step": 45300 }, { "epoch": 0.45, "learning_rate": 0.00043211052631578943, "loss": 0.6774, "step": 45310 }, { "epoch": 0.45, "learning_rate": 0.0004320315789473684, "loss": 0.674, "step": 45320 }, { "epoch": 0.45, "learning_rate": 0.00043195263157894733, "loss": 0.6705, "step": 45330 }, { "epoch": 0.45, "learning_rate": 0.0004318736842105263, "loss": 0.6672, "step": 45340 }, { "epoch": 0.45, "learning_rate": 0.0004317947368421052, "loss": 0.6842, "step": 45350 }, { "epoch": 0.45, "learning_rate": 0.0004317157894736842, "loss": 0.6807, "step": 45360 }, { "epoch": 0.45, "learning_rate": 0.0004316368421052631, "loss": 0.6782, "step": 45370 }, { "epoch": 0.45, "learning_rate": 0.00043155789473684207, "loss": 0.6739, "step": 45380 }, { "epoch": 0.45, "learning_rate": 0.000431478947368421, "loss": 0.6886, "step": 45390 }, { "epoch": 0.45, "learning_rate": 0.00043139999999999997, "loss": 0.6809, "step": 45400 }, { "epoch": 0.45, "learning_rate": 0.0004313210526315789, "loss": 0.667, "step": 45410 }, { "epoch": 0.45, "learning_rate": 0.0004312421052631578, "loss": 0.6836, "step": 45420 }, { "epoch": 0.45, "learning_rate": 0.0004311631578947368, "loss": 0.6772, "step": 45430 }, { "epoch": 0.45, "learning_rate": 0.0004310842105263157, "loss": 0.6763, "step": 45440 }, { "epoch": 0.45, "learning_rate": 0.0004310052631578947, "loss": 0.6701, "step": 45450 }, { "epoch": 0.45, "learning_rate": 0.00043092631578947366, "loss": 0.6785, "step": 45460 }, { "epoch": 0.45, "learning_rate": 0.0004308473684210526, "loss": 0.6789, "step": 45470 }, { "epoch": 0.45, "learning_rate": 0.00043076842105263156, "loss": 0.6808, "step": 45480 }, { "epoch": 0.45, "learning_rate": 0.00043068947368421045, "loss": 0.6843, "step": 45490 }, { "epoch": 0.46, "learning_rate": 0.00043061052631578945, "loss": 0.6633, "step": 45500 }, { "epoch": 0.46, "learning_rate": 0.00043053157894736835, "loss": 0.6578, "step": 45510 }, { "epoch": 0.46, "learning_rate": 0.00043045263157894735, "loss": 0.6717, "step": 45520 }, { "epoch": 0.46, "learning_rate": 0.00043037368421052624, "loss": 0.669, "step": 45530 }, { "epoch": 0.46, "learning_rate": 0.00043029473684210525, "loss": 0.6861, "step": 45540 }, { "epoch": 0.46, "learning_rate": 0.0004302157894736842, "loss": 0.6764, "step": 45550 }, { "epoch": 0.46, "learning_rate": 0.0004301368421052631, "loss": 0.6799, "step": 45560 }, { "epoch": 0.46, "learning_rate": 0.0004300578947368421, "loss": 0.6657, "step": 45570 }, { "epoch": 0.46, "learning_rate": 0.000429978947368421, "loss": 0.6633, "step": 45580 }, { "epoch": 0.46, "learning_rate": 0.0004299, "loss": 0.6667, "step": 45590 }, { "epoch": 0.46, "learning_rate": 0.0004298210526315789, "loss": 0.6721, "step": 45600 }, { "epoch": 0.46, "learning_rate": 0.0004297421052631579, "loss": 0.6818, "step": 45610 }, { "epoch": 0.46, "learning_rate": 0.0004296631578947368, "loss": 0.6824, "step": 45620 }, { "epoch": 0.46, "learning_rate": 0.0004295842105263157, "loss": 0.6818, "step": 45630 }, { "epoch": 0.46, "learning_rate": 0.00042950526315789473, "loss": 0.6684, "step": 45640 }, { "epoch": 0.46, "learning_rate": 0.00042943421052631575, "loss": 0.6763, "step": 45650 }, { "epoch": 0.46, "learning_rate": 0.0004293552631578947, "loss": 0.6786, "step": 45660 }, { "epoch": 0.46, "learning_rate": 0.00042927631578947365, "loss": 0.6848, "step": 45670 }, { "epoch": 0.46, "learning_rate": 0.0004291973684210526, "loss": 0.6851, "step": 45680 }, { "epoch": 0.46, "learning_rate": 0.00042911842105263154, "loss": 0.6897, "step": 45690 }, { "epoch": 0.46, "learning_rate": 0.00042903947368421055, "loss": 0.6916, "step": 45700 }, { "epoch": 0.46, "learning_rate": 0.00042896052631578944, "loss": 0.6804, "step": 45710 }, { "epoch": 0.46, "learning_rate": 0.0004288815789473684, "loss": 0.6782, "step": 45720 }, { "epoch": 0.46, "learning_rate": 0.00042880263157894734, "loss": 0.6748, "step": 45730 }, { "epoch": 0.46, "learning_rate": 0.0004287236842105263, "loss": 0.6799, "step": 45740 }, { "epoch": 0.46, "learning_rate": 0.00042864473684210523, "loss": 0.6882, "step": 45750 }, { "epoch": 0.46, "learning_rate": 0.0004285657894736842, "loss": 0.6796, "step": 45760 }, { "epoch": 0.46, "learning_rate": 0.0004284868421052631, "loss": 0.6627, "step": 45770 }, { "epoch": 0.46, "learning_rate": 0.0004284078947368421, "loss": 0.6646, "step": 45780 }, { "epoch": 0.46, "learning_rate": 0.00042832894736842097, "loss": 0.6738, "step": 45790 }, { "epoch": 0.46, "learning_rate": 0.00042825, "loss": 0.6697, "step": 45800 }, { "epoch": 0.46, "learning_rate": 0.0004281710526315789, "loss": 0.6758, "step": 45810 }, { "epoch": 0.46, "learning_rate": 0.00042809210526315787, "loss": 0.6837, "step": 45820 }, { "epoch": 0.46, "learning_rate": 0.0004280131578947368, "loss": 0.6833, "step": 45830 }, { "epoch": 0.46, "learning_rate": 0.0004279342105263157, "loss": 0.6777, "step": 45840 }, { "epoch": 0.46, "learning_rate": 0.0004278552631578947, "loss": 0.6806, "step": 45850 }, { "epoch": 0.46, "learning_rate": 0.0004277763157894736, "loss": 0.6843, "step": 45860 }, { "epoch": 0.46, "learning_rate": 0.0004276973684210526, "loss": 0.6858, "step": 45870 }, { "epoch": 0.46, "learning_rate": 0.0004276184210526315, "loss": 0.6962, "step": 45880 }, { "epoch": 0.46, "learning_rate": 0.0004275394736842105, "loss": 0.6877, "step": 45890 }, { "epoch": 0.46, "learning_rate": 0.00042746052631578946, "loss": 0.684, "step": 45900 }, { "epoch": 0.46, "learning_rate": 0.00042738157894736835, "loss": 0.6886, "step": 45910 }, { "epoch": 0.46, "learning_rate": 0.00042730263157894735, "loss": 0.6726, "step": 45920 }, { "epoch": 0.46, "learning_rate": 0.00042722368421052625, "loss": 0.6592, "step": 45930 }, { "epoch": 0.46, "learning_rate": 0.00042714473684210525, "loss": 0.657, "step": 45940 }, { "epoch": 0.46, "learning_rate": 0.00042706578947368414, "loss": 0.6604, "step": 45950 }, { "epoch": 0.46, "learning_rate": 0.00042698684210526315, "loss": 0.6607, "step": 45960 }, { "epoch": 0.46, "learning_rate": 0.00042690789473684204, "loss": 0.6795, "step": 45970 }, { "epoch": 0.46, "learning_rate": 0.000426828947368421, "loss": 0.6574, "step": 45980 }, { "epoch": 0.46, "learning_rate": 0.00042675, "loss": 0.6729, "step": 45990 }, { "epoch": 0.46, "learning_rate": 0.0004266710526315789, "loss": 0.6701, "step": 46000 }, { "epoch": 0.46, "learning_rate": 0.0004265921052631579, "loss": 0.6795, "step": 46010 }, { "epoch": 0.46, "learning_rate": 0.0004265131578947368, "loss": 0.6854, "step": 46020 }, { "epoch": 0.46, "learning_rate": 0.0004264342105263158, "loss": 0.6835, "step": 46030 }, { "epoch": 0.46, "learning_rate": 0.0004263552631578947, "loss": 0.6778, "step": 46040 }, { "epoch": 0.46, "learning_rate": 0.0004262763157894737, "loss": 0.6796, "step": 46050 }, { "epoch": 0.46, "learning_rate": 0.0004261973684210526, "loss": 0.6714, "step": 46060 }, { "epoch": 0.46, "learning_rate": 0.0004261184210526315, "loss": 0.6824, "step": 46070 }, { "epoch": 0.46, "learning_rate": 0.0004260394736842105, "loss": 0.6705, "step": 46080 }, { "epoch": 0.46, "learning_rate": 0.0004259605263157894, "loss": 0.6843, "step": 46090 }, { "epoch": 0.46, "learning_rate": 0.0004258815789473684, "loss": 0.6873, "step": 46100 }, { "epoch": 0.46, "learning_rate": 0.0004258026315789473, "loss": 0.6683, "step": 46110 }, { "epoch": 0.46, "learning_rate": 0.0004257236842105263, "loss": 0.662, "step": 46120 }, { "epoch": 0.46, "learning_rate": 0.0004256447368421052, "loss": 0.6848, "step": 46130 }, { "epoch": 0.46, "learning_rate": 0.00042556578947368416, "loss": 0.6905, "step": 46140 }, { "epoch": 0.46, "learning_rate": 0.0004254868421052631, "loss": 0.6866, "step": 46150 }, { "epoch": 0.46, "learning_rate": 0.00042540789473684206, "loss": 0.6808, "step": 46160 }, { "epoch": 0.46, "learning_rate": 0.00042532894736842106, "loss": 0.6693, "step": 46170 }, { "epoch": 0.46, "learning_rate": 0.00042524999999999996, "loss": 0.6554, "step": 46180 }, { "epoch": 0.46, "learning_rate": 0.00042517105263157896, "loss": 0.6523, "step": 46190 }, { "epoch": 0.46, "learning_rate": 0.00042509210526315785, "loss": 0.6718, "step": 46200 }, { "epoch": 0.46, "learning_rate": 0.0004250131578947368, "loss": 0.6625, "step": 46210 }, { "epoch": 0.46, "learning_rate": 0.00042493421052631575, "loss": 0.6728, "step": 46220 }, { "epoch": 0.46, "learning_rate": 0.0004248552631578947, "loss": 0.6786, "step": 46230 }, { "epoch": 0.46, "learning_rate": 0.00042477631578947365, "loss": 0.6808, "step": 46240 }, { "epoch": 0.46, "learning_rate": 0.0004246973684210526, "loss": 0.6762, "step": 46250 }, { "epoch": 0.46, "learning_rate": 0.0004246184210526316, "loss": 0.6669, "step": 46260 }, { "epoch": 0.46, "learning_rate": 0.0004245394736842105, "loss": 0.6685, "step": 46270 }, { "epoch": 0.46, "learning_rate": 0.00042446052631578944, "loss": 0.6691, "step": 46280 }, { "epoch": 0.46, "learning_rate": 0.0004243815789473684, "loss": 0.6511, "step": 46290 }, { "epoch": 0.46, "learning_rate": 0.00042430263157894733, "loss": 0.6455, "step": 46300 }, { "epoch": 0.46, "learning_rate": 0.0004242236842105263, "loss": 0.6656, "step": 46310 }, { "epoch": 0.46, "learning_rate": 0.00042414473684210523, "loss": 0.6635, "step": 46320 }, { "epoch": 0.46, "learning_rate": 0.0004240657894736841, "loss": 0.6599, "step": 46330 }, { "epoch": 0.46, "learning_rate": 0.00042398684210526313, "loss": 0.6679, "step": 46340 }, { "epoch": 0.46, "learning_rate": 0.000423907894736842, "loss": 0.6538, "step": 46350 }, { "epoch": 0.46, "learning_rate": 0.000423828947368421, "loss": 0.6692, "step": 46360 }, { "epoch": 0.46, "learning_rate": 0.00042375, "loss": 0.6859, "step": 46370 }, { "epoch": 0.46, "learning_rate": 0.0004236710526315789, "loss": 0.679, "step": 46380 }, { "epoch": 0.46, "learning_rate": 0.00042359210526315787, "loss": 0.6792, "step": 46390 }, { "epoch": 0.46, "learning_rate": 0.00042351315789473676, "loss": 0.6799, "step": 46400 }, { "epoch": 0.46, "learning_rate": 0.00042343421052631577, "loss": 0.6776, "step": 46410 }, { "epoch": 0.46, "learning_rate": 0.00042335526315789466, "loss": 0.6638, "step": 46420 }, { "epoch": 0.46, "learning_rate": 0.00042327631578947366, "loss": 0.6603, "step": 46430 }, { "epoch": 0.46, "learning_rate": 0.00042319736842105256, "loss": 0.6629, "step": 46440 }, { "epoch": 0.46, "learning_rate": 0.00042311842105263156, "loss": 0.6683, "step": 46450 }, { "epoch": 0.46, "learning_rate": 0.0004230394736842105, "loss": 0.6739, "step": 46460 }, { "epoch": 0.46, "learning_rate": 0.0004229605263157894, "loss": 0.6617, "step": 46470 }, { "epoch": 0.46, "learning_rate": 0.0004228815789473684, "loss": 0.6792, "step": 46480 }, { "epoch": 0.46, "learning_rate": 0.0004228026315789473, "loss": 0.6733, "step": 46490 }, { "epoch": 0.47, "learning_rate": 0.0004227236842105263, "loss": 0.6617, "step": 46500 }, { "epoch": 0.47, "learning_rate": 0.0004226447368421052, "loss": 0.6792, "step": 46510 }, { "epoch": 0.47, "learning_rate": 0.0004225657894736842, "loss": 0.6797, "step": 46520 }, { "epoch": 0.47, "learning_rate": 0.0004224868421052631, "loss": 0.6739, "step": 46530 }, { "epoch": 0.47, "learning_rate": 0.0004224078947368421, "loss": 0.6763, "step": 46540 }, { "epoch": 0.47, "learning_rate": 0.00042232894736842104, "loss": 0.6887, "step": 46550 }, { "epoch": 0.47, "learning_rate": 0.00042224999999999994, "loss": 0.6853, "step": 46560 }, { "epoch": 0.47, "learning_rate": 0.00042217105263157894, "loss": 0.665, "step": 46570 }, { "epoch": 0.47, "learning_rate": 0.00042209210526315783, "loss": 0.6908, "step": 46580 }, { "epoch": 0.47, "learning_rate": 0.00042201315789473684, "loss": 0.6696, "step": 46590 }, { "epoch": 0.47, "learning_rate": 0.00042193421052631573, "loss": 0.6524, "step": 46600 }, { "epoch": 0.47, "learning_rate": 0.00042185526315789473, "loss": 0.6785, "step": 46610 }, { "epoch": 0.47, "learning_rate": 0.0004217763157894736, "loss": 0.6727, "step": 46620 }, { "epoch": 0.47, "learning_rate": 0.0004216973684210526, "loss": 0.6731, "step": 46630 }, { "epoch": 0.47, "learning_rate": 0.0004216184210526316, "loss": 0.6638, "step": 46640 }, { "epoch": 0.47, "learning_rate": 0.00042153947368421047, "loss": 0.6859, "step": 46650 }, { "epoch": 0.47, "learning_rate": 0.0004214605263157895, "loss": 0.6735, "step": 46660 }, { "epoch": 0.47, "learning_rate": 0.00042138157894736837, "loss": 0.6754, "step": 46670 }, { "epoch": 0.47, "learning_rate": 0.00042130263157894737, "loss": 0.6737, "step": 46680 }, { "epoch": 0.47, "learning_rate": 0.00042122368421052626, "loss": 0.6727, "step": 46690 }, { "epoch": 0.47, "learning_rate": 0.0004211447368421052, "loss": 0.6797, "step": 46700 }, { "epoch": 0.47, "learning_rate": 0.00042106578947368416, "loss": 0.6881, "step": 46710 }, { "epoch": 0.47, "learning_rate": 0.0004209868421052631, "loss": 0.6768, "step": 46720 }, { "epoch": 0.47, "learning_rate": 0.0004209078947368421, "loss": 0.6828, "step": 46730 }, { "epoch": 0.47, "learning_rate": 0.000420828947368421, "loss": 0.6682, "step": 46740 }, { "epoch": 0.47, "learning_rate": 0.00042075, "loss": 0.6918, "step": 46750 }, { "epoch": 0.47, "learning_rate": 0.0004206710526315789, "loss": 0.6486, "step": 46760 }, { "epoch": 0.47, "learning_rate": 0.00042059210526315785, "loss": 0.6764, "step": 46770 }, { "epoch": 0.47, "learning_rate": 0.0004205131578947368, "loss": 0.6808, "step": 46780 }, { "epoch": 0.47, "learning_rate": 0.00042043421052631575, "loss": 0.6777, "step": 46790 }, { "epoch": 0.47, "learning_rate": 0.0004203552631578947, "loss": 0.6736, "step": 46800 }, { "epoch": 0.47, "learning_rate": 0.00042027631578947364, "loss": 0.6823, "step": 46810 }, { "epoch": 0.47, "learning_rate": 0.00042019736842105265, "loss": 0.6693, "step": 46820 }, { "epoch": 0.47, "learning_rate": 0.00042012631578947367, "loss": 0.6722, "step": 46830 }, { "epoch": 0.47, "learning_rate": 0.00042004736842105256, "loss": 0.675, "step": 46840 }, { "epoch": 0.47, "learning_rate": 0.00041996842105263156, "loss": 0.6845, "step": 46850 }, { "epoch": 0.47, "learning_rate": 0.00041988947368421046, "loss": 0.6852, "step": 46860 }, { "epoch": 0.47, "learning_rate": 0.00041981052631578946, "loss": 0.6726, "step": 46870 }, { "epoch": 0.47, "learning_rate": 0.00041973157894736835, "loss": 0.6752, "step": 46880 }, { "epoch": 0.47, "learning_rate": 0.0004196526315789473, "loss": 0.6716, "step": 46890 }, { "epoch": 0.47, "learning_rate": 0.0004195736842105263, "loss": 0.6745, "step": 46900 }, { "epoch": 0.47, "learning_rate": 0.0004194947368421052, "loss": 0.6786, "step": 46910 }, { "epoch": 0.47, "learning_rate": 0.0004194157894736842, "loss": 0.671, "step": 46920 }, { "epoch": 0.47, "learning_rate": 0.0004193368421052631, "loss": 0.6709, "step": 46930 }, { "epoch": 0.47, "learning_rate": 0.0004192578947368421, "loss": 0.6724, "step": 46940 }, { "epoch": 0.47, "learning_rate": 0.000419178947368421, "loss": 0.6708, "step": 46950 }, { "epoch": 0.47, "learning_rate": 0.0004191, "loss": 0.6692, "step": 46960 }, { "epoch": 0.47, "learning_rate": 0.0004190210526315789, "loss": 0.6705, "step": 46970 }, { "epoch": 0.47, "learning_rate": 0.00041894210526315784, "loss": 0.6702, "step": 46980 }, { "epoch": 0.47, "learning_rate": 0.00041886315789473684, "loss": 0.6726, "step": 46990 }, { "epoch": 0.47, "learning_rate": 0.00041878421052631573, "loss": 0.6681, "step": 47000 }, { "epoch": 0.47, "learning_rate": 0.00041870526315789474, "loss": 0.679, "step": 47010 }, { "epoch": 0.47, "learning_rate": 0.00041862631578947363, "loss": 0.6913, "step": 47020 }, { "epoch": 0.47, "learning_rate": 0.00041854736842105263, "loss": 0.6748, "step": 47030 }, { "epoch": 0.47, "learning_rate": 0.00041846842105263153, "loss": 0.6694, "step": 47040 }, { "epoch": 0.47, "learning_rate": 0.0004183894736842105, "loss": 0.6742, "step": 47050 }, { "epoch": 0.47, "learning_rate": 0.0004183105263157894, "loss": 0.6874, "step": 47060 }, { "epoch": 0.47, "learning_rate": 0.00041823157894736837, "loss": 0.6867, "step": 47070 }, { "epoch": 0.47, "learning_rate": 0.0004181526315789474, "loss": 0.6798, "step": 47080 }, { "epoch": 0.47, "learning_rate": 0.00041807368421052627, "loss": 0.6688, "step": 47090 }, { "epoch": 0.47, "learning_rate": 0.00041799473684210527, "loss": 0.6667, "step": 47100 }, { "epoch": 0.47, "learning_rate": 0.00041791578947368417, "loss": 0.6644, "step": 47110 }, { "epoch": 0.47, "learning_rate": 0.0004178368421052631, "loss": 0.6694, "step": 47120 }, { "epoch": 0.47, "learning_rate": 0.00041775789473684206, "loss": 0.675, "step": 47130 }, { "epoch": 0.47, "learning_rate": 0.000417678947368421, "loss": 0.6737, "step": 47140 }, { "epoch": 0.47, "learning_rate": 0.00041759999999999996, "loss": 0.6563, "step": 47150 }, { "epoch": 0.47, "learning_rate": 0.0004175210526315789, "loss": 0.6591, "step": 47160 }, { "epoch": 0.47, "learning_rate": 0.0004174421052631579, "loss": 0.6632, "step": 47170 }, { "epoch": 0.47, "learning_rate": 0.0004173631578947368, "loss": 0.663, "step": 47180 }, { "epoch": 0.47, "learning_rate": 0.00041728421052631575, "loss": 0.6693, "step": 47190 }, { "epoch": 0.47, "learning_rate": 0.0004172052631578947, "loss": 0.6683, "step": 47200 }, { "epoch": 0.47, "learning_rate": 0.00041712631578947365, "loss": 0.6762, "step": 47210 }, { "epoch": 0.47, "learning_rate": 0.0004170473684210526, "loss": 0.6671, "step": 47220 }, { "epoch": 0.47, "learning_rate": 0.00041696842105263155, "loss": 0.6758, "step": 47230 }, { "epoch": 0.47, "learning_rate": 0.00041688947368421044, "loss": 0.6842, "step": 47240 }, { "epoch": 0.47, "learning_rate": 0.00041681052631578944, "loss": 0.6697, "step": 47250 }, { "epoch": 0.47, "learning_rate": 0.0004167315789473684, "loss": 0.6771, "step": 47260 }, { "epoch": 0.47, "learning_rate": 0.00041665263157894734, "loss": 0.6809, "step": 47270 }, { "epoch": 0.47, "learning_rate": 0.0004165736842105263, "loss": 0.681, "step": 47280 }, { "epoch": 0.47, "learning_rate": 0.00041649473684210524, "loss": 0.6644, "step": 47290 }, { "epoch": 0.47, "learning_rate": 0.0004164157894736842, "loss": 0.6846, "step": 47300 }, { "epoch": 0.47, "learning_rate": 0.0004163368421052631, "loss": 0.6707, "step": 47310 }, { "epoch": 0.47, "learning_rate": 0.0004162578947368421, "loss": 0.6552, "step": 47320 }, { "epoch": 0.47, "learning_rate": 0.000416178947368421, "loss": 0.6606, "step": 47330 }, { "epoch": 0.47, "learning_rate": 0.0004161, "loss": 0.6753, "step": 47340 }, { "epoch": 0.47, "learning_rate": 0.0004160210526315789, "loss": 0.6682, "step": 47350 }, { "epoch": 0.47, "learning_rate": 0.0004159421052631579, "loss": 0.6725, "step": 47360 }, { "epoch": 0.47, "learning_rate": 0.0004158631578947368, "loss": 0.666, "step": 47370 }, { "epoch": 0.47, "learning_rate": 0.0004157842105263157, "loss": 0.6741, "step": 47380 }, { "epoch": 0.47, "learning_rate": 0.0004157052631578947, "loss": 0.6695, "step": 47390 }, { "epoch": 0.47, "learning_rate": 0.0004156263157894736, "loss": 0.6607, "step": 47400 }, { "epoch": 0.47, "learning_rate": 0.0004155473684210526, "loss": 0.6789, "step": 47410 }, { "epoch": 0.47, "learning_rate": 0.0004154684210526315, "loss": 0.6655, "step": 47420 }, { "epoch": 0.47, "learning_rate": 0.0004153894736842105, "loss": 0.6711, "step": 47430 }, { "epoch": 0.47, "learning_rate": 0.0004153105263157894, "loss": 0.6638, "step": 47440 }, { "epoch": 0.47, "learning_rate": 0.0004152315789473684, "loss": 0.6634, "step": 47450 }, { "epoch": 0.47, "learning_rate": 0.00041515263157894736, "loss": 0.6739, "step": 47460 }, { "epoch": 0.47, "learning_rate": 0.00041507368421052625, "loss": 0.6712, "step": 47470 }, { "epoch": 0.47, "learning_rate": 0.00041499473684210525, "loss": 0.6556, "step": 47480 }, { "epoch": 0.47, "learning_rate": 0.00041491578947368415, "loss": 0.6629, "step": 47490 }, { "epoch": 0.47, "learning_rate": 0.00041483684210526315, "loss": 0.6603, "step": 47500 }, { "epoch": 0.47, "eval_accuracy": 0.8611843722171862, "eval_loss": 0.65625, "eval_runtime": 97.2871, "eval_samples_per_second": 822.308, "eval_steps_per_second": 1.614, "step": 47500 }, { "epoch": 0.48, "learning_rate": 0.00041475789473684204, "loss": 0.6665, "step": 47510 }, { "epoch": 0.48, "learning_rate": 0.00041467894736842105, "loss": 0.6721, "step": 47520 }, { "epoch": 0.48, "learning_rate": 0.00041459999999999994, "loss": 0.6617, "step": 47530 }, { "epoch": 0.48, "learning_rate": 0.0004145210526315789, "loss": 0.662, "step": 47540 }, { "epoch": 0.48, "learning_rate": 0.0004144421052631579, "loss": 0.6685, "step": 47550 }, { "epoch": 0.48, "learning_rate": 0.0004143631578947368, "loss": 0.665, "step": 47560 }, { "epoch": 0.48, "learning_rate": 0.0004142842105263158, "loss": 0.6763, "step": 47570 }, { "epoch": 0.48, "learning_rate": 0.0004142052631578947, "loss": 0.6729, "step": 47580 }, { "epoch": 0.48, "learning_rate": 0.0004141263157894737, "loss": 0.6688, "step": 47590 }, { "epoch": 0.48, "learning_rate": 0.0004140473684210526, "loss": 0.6775, "step": 47600 }, { "epoch": 0.48, "learning_rate": 0.0004139684210526315, "loss": 0.6608, "step": 47610 }, { "epoch": 0.48, "learning_rate": 0.0004138894736842105, "loss": 0.6663, "step": 47620 }, { "epoch": 0.48, "learning_rate": 0.0004138105263157894, "loss": 0.681, "step": 47630 }, { "epoch": 0.48, "learning_rate": 0.0004137315789473684, "loss": 0.6751, "step": 47640 }, { "epoch": 0.48, "learning_rate": 0.0004136526315789473, "loss": 0.6745, "step": 47650 }, { "epoch": 0.48, "learning_rate": 0.0004135736842105263, "loss": 0.6808, "step": 47660 }, { "epoch": 0.48, "learning_rate": 0.0004134947368421052, "loss": 0.6837, "step": 47670 }, { "epoch": 0.48, "learning_rate": 0.00041341578947368416, "loss": 0.6747, "step": 47680 }, { "epoch": 0.48, "learning_rate": 0.0004133368421052631, "loss": 0.6863, "step": 47690 }, { "epoch": 0.48, "learning_rate": 0.00041325789473684206, "loss": 0.6741, "step": 47700 }, { "epoch": 0.48, "learning_rate": 0.000413178947368421, "loss": 0.6686, "step": 47710 }, { "epoch": 0.48, "learning_rate": 0.00041309999999999996, "loss": 0.6508, "step": 47720 }, { "epoch": 0.48, "learning_rate": 0.00041302105263157896, "loss": 0.6655, "step": 47730 }, { "epoch": 0.48, "learning_rate": 0.00041294210526315785, "loss": 0.6534, "step": 47740 }, { "epoch": 0.48, "learning_rate": 0.0004128631578947368, "loss": 0.6614, "step": 47750 }, { "epoch": 0.48, "learning_rate": 0.00041278421052631575, "loss": 0.6596, "step": 47760 }, { "epoch": 0.48, "learning_rate": 0.0004127052631578947, "loss": 0.6649, "step": 47770 }, { "epoch": 0.48, "learning_rate": 0.00041262631578947365, "loss": 0.6662, "step": 47780 }, { "epoch": 0.48, "learning_rate": 0.0004125473684210526, "loss": 0.6594, "step": 47790 }, { "epoch": 0.48, "learning_rate": 0.0004124684210526315, "loss": 0.6673, "step": 47800 }, { "epoch": 0.48, "learning_rate": 0.0004123894736842105, "loss": 0.6615, "step": 47810 }, { "epoch": 0.48, "learning_rate": 0.0004123105263157895, "loss": 0.6646, "step": 47820 }, { "epoch": 0.48, "learning_rate": 0.0004122315789473684, "loss": 0.6659, "step": 47830 }, { "epoch": 0.48, "learning_rate": 0.00041215263157894734, "loss": 0.6614, "step": 47840 }, { "epoch": 0.48, "learning_rate": 0.0004120736842105263, "loss": 0.6622, "step": 47850 }, { "epoch": 0.48, "learning_rate": 0.00041199473684210523, "loss": 0.6761, "step": 47860 }, { "epoch": 0.48, "learning_rate": 0.00041191578947368413, "loss": 0.6659, "step": 47870 }, { "epoch": 0.48, "learning_rate": 0.00041183684210526313, "loss": 0.6695, "step": 47880 }, { "epoch": 0.48, "learning_rate": 0.000411757894736842, "loss": 0.6684, "step": 47890 }, { "epoch": 0.48, "learning_rate": 0.00041167894736842103, "loss": 0.6677, "step": 47900 }, { "epoch": 0.48, "learning_rate": 0.0004116, "loss": 0.6579, "step": 47910 }, { "epoch": 0.48, "learning_rate": 0.0004115210526315789, "loss": 0.6563, "step": 47920 }, { "epoch": 0.48, "learning_rate": 0.00041144210526315787, "loss": 0.6725, "step": 47930 }, { "epoch": 0.48, "learning_rate": 0.0004113631578947368, "loss": 0.6655, "step": 47940 }, { "epoch": 0.48, "learning_rate": 0.00041128421052631577, "loss": 0.658, "step": 47950 }, { "epoch": 0.48, "learning_rate": 0.00041120526315789466, "loss": 0.683, "step": 47960 }, { "epoch": 0.48, "learning_rate": 0.00041112631578947367, "loss": 0.6687, "step": 47970 }, { "epoch": 0.48, "learning_rate": 0.00041104736842105256, "loss": 0.6673, "step": 47980 }, { "epoch": 0.48, "learning_rate": 0.00041096842105263156, "loss": 0.6745, "step": 47990 }, { "epoch": 0.48, "learning_rate": 0.00041088947368421046, "loss": 0.6717, "step": 48000 }, { "epoch": 0.48, "learning_rate": 0.00041081052631578946, "loss": 0.6718, "step": 48010 }, { "epoch": 0.48, "learning_rate": 0.0004107315789473684, "loss": 0.6721, "step": 48020 }, { "epoch": 0.48, "learning_rate": 0.0004106526315789473, "loss": 0.6806, "step": 48030 }, { "epoch": 0.48, "learning_rate": 0.0004105736842105263, "loss": 0.677, "step": 48040 }, { "epoch": 0.48, "learning_rate": 0.0004104947368421052, "loss": 0.6742, "step": 48050 }, { "epoch": 0.48, "learning_rate": 0.0004104157894736842, "loss": 0.6732, "step": 48060 }, { "epoch": 0.48, "learning_rate": 0.0004103368421052631, "loss": 0.6678, "step": 48070 }, { "epoch": 0.48, "learning_rate": 0.0004102578947368421, "loss": 0.6677, "step": 48080 }, { "epoch": 0.48, "learning_rate": 0.000410178947368421, "loss": 0.6641, "step": 48090 }, { "epoch": 0.48, "learning_rate": 0.00041009999999999994, "loss": 0.68, "step": 48100 }, { "epoch": 0.48, "learning_rate": 0.00041002105263157894, "loss": 0.656, "step": 48110 }, { "epoch": 0.48, "learning_rate": 0.00040994210526315784, "loss": 0.6642, "step": 48120 }, { "epoch": 0.48, "learning_rate": 0.00040986315789473684, "loss": 0.6736, "step": 48130 }, { "epoch": 0.48, "learning_rate": 0.00040978421052631573, "loss": 0.6775, "step": 48140 }, { "epoch": 0.48, "learning_rate": 0.00040970526315789474, "loss": 0.6898, "step": 48150 }, { "epoch": 0.48, "learning_rate": 0.00040962631578947363, "loss": 0.6685, "step": 48160 }, { "epoch": 0.48, "learning_rate": 0.0004095473684210526, "loss": 0.6727, "step": 48170 }, { "epoch": 0.48, "learning_rate": 0.0004094684210526315, "loss": 0.6671, "step": 48180 }, { "epoch": 0.48, "learning_rate": 0.0004093894736842105, "loss": 0.6635, "step": 48190 }, { "epoch": 0.48, "learning_rate": 0.0004093105263157895, "loss": 0.6664, "step": 48200 }, { "epoch": 0.48, "learning_rate": 0.00040923157894736837, "loss": 0.6647, "step": 48210 }, { "epoch": 0.48, "learning_rate": 0.0004091526315789474, "loss": 0.6597, "step": 48220 }, { "epoch": 0.48, "learning_rate": 0.00040907368421052627, "loss": 0.6663, "step": 48230 }, { "epoch": 0.48, "learning_rate": 0.0004089947368421052, "loss": 0.6622, "step": 48240 }, { "epoch": 0.48, "learning_rate": 0.00040891578947368416, "loss": 0.6684, "step": 48250 }, { "epoch": 0.48, "learning_rate": 0.0004088368421052631, "loss": 0.6645, "step": 48260 }, { "epoch": 0.48, "learning_rate": 0.00040875789473684206, "loss": 0.6604, "step": 48270 }, { "epoch": 0.48, "learning_rate": 0.000408678947368421, "loss": 0.6632, "step": 48280 }, { "epoch": 0.48, "learning_rate": 0.0004086, "loss": 0.6635, "step": 48290 }, { "epoch": 0.48, "learning_rate": 0.0004085210526315789, "loss": 0.6639, "step": 48300 }, { "epoch": 0.48, "learning_rate": 0.0004084421052631579, "loss": 0.6704, "step": 48310 }, { "epoch": 0.48, "learning_rate": 0.0004083631578947368, "loss": 0.6704, "step": 48320 }, { "epoch": 0.48, "learning_rate": 0.00040828421052631575, "loss": 0.6678, "step": 48330 }, { "epoch": 0.48, "learning_rate": 0.0004082052631578947, "loss": 0.6555, "step": 48340 }, { "epoch": 0.48, "learning_rate": 0.00040812631578947365, "loss": 0.6691, "step": 48350 }, { "epoch": 0.48, "learning_rate": 0.00040804736842105254, "loss": 0.6684, "step": 48360 }, { "epoch": 0.48, "learning_rate": 0.00040796842105263154, "loss": 0.6829, "step": 48370 }, { "epoch": 0.48, "learning_rate": 0.00040788947368421055, "loss": 0.6842, "step": 48380 }, { "epoch": 0.48, "learning_rate": 0.00040781052631578944, "loss": 0.6686, "step": 48390 }, { "epoch": 0.48, "learning_rate": 0.0004077315789473684, "loss": 0.6714, "step": 48400 }, { "epoch": 0.48, "learning_rate": 0.00040765263157894734, "loss": 0.6656, "step": 48410 }, { "epoch": 0.48, "learning_rate": 0.0004075736842105263, "loss": 0.6594, "step": 48420 }, { "epoch": 0.48, "learning_rate": 0.00040749473684210523, "loss": 0.6602, "step": 48430 }, { "epoch": 0.48, "learning_rate": 0.0004074157894736842, "loss": 0.6531, "step": 48440 }, { "epoch": 0.48, "learning_rate": 0.0004073368421052631, "loss": 0.6682, "step": 48450 }, { "epoch": 0.48, "learning_rate": 0.0004072578947368421, "loss": 0.6538, "step": 48460 }, { "epoch": 0.48, "learning_rate": 0.000407178947368421, "loss": 0.6633, "step": 48470 }, { "epoch": 0.48, "learning_rate": 0.0004071, "loss": 0.6666, "step": 48480 }, { "epoch": 0.48, "learning_rate": 0.0004070210526315789, "loss": 0.677, "step": 48490 }, { "epoch": 0.48, "learning_rate": 0.00040694210526315787, "loss": 0.6798, "step": 48500 }, { "epoch": 0.49, "learning_rate": 0.0004068631578947368, "loss": 0.6707, "step": 48510 }, { "epoch": 0.49, "learning_rate": 0.0004067842105263157, "loss": 0.6508, "step": 48520 }, { "epoch": 0.49, "learning_rate": 0.0004067052631578947, "loss": 0.6545, "step": 48530 }, { "epoch": 0.49, "learning_rate": 0.0004066263157894736, "loss": 0.6535, "step": 48540 }, { "epoch": 0.49, "learning_rate": 0.0004065473684210526, "loss": 0.655, "step": 48550 }, { "epoch": 0.49, "learning_rate": 0.00040646842105263156, "loss": 0.6534, "step": 48560 }, { "epoch": 0.49, "learning_rate": 0.0004063894736842105, "loss": 0.6458, "step": 48570 }, { "epoch": 0.49, "learning_rate": 0.00040631052631578946, "loss": 0.6362, "step": 48580 }, { "epoch": 0.49, "learning_rate": 0.00040623157894736835, "loss": 0.6534, "step": 48590 }, { "epoch": 0.49, "learning_rate": 0.00040615263157894735, "loss": 0.6359, "step": 48600 }, { "epoch": 0.49, "learning_rate": 0.00040607368421052625, "loss": 0.6409, "step": 48610 }, { "epoch": 0.49, "learning_rate": 0.00040599473684210525, "loss": 0.6255, "step": 48620 }, { "epoch": 0.49, "learning_rate": 0.00040591578947368415, "loss": 0.6389, "step": 48630 }, { "epoch": 0.49, "learning_rate": 0.00040583684210526315, "loss": 0.6409, "step": 48640 }, { "epoch": 0.49, "learning_rate": 0.00040575789473684204, "loss": 0.6467, "step": 48650 }, { "epoch": 0.49, "learning_rate": 0.000405678947368421, "loss": 0.6451, "step": 48660 }, { "epoch": 0.49, "learning_rate": 0.0004056, "loss": 0.6524, "step": 48670 }, { "epoch": 0.49, "learning_rate": 0.0004055210526315789, "loss": 0.6358, "step": 48680 }, { "epoch": 0.49, "learning_rate": 0.0004054421052631579, "loss": 0.6621, "step": 48690 }, { "epoch": 0.49, "learning_rate": 0.0004053631578947368, "loss": 0.6634, "step": 48700 }, { "epoch": 0.49, "learning_rate": 0.0004052842105263158, "loss": 0.6623, "step": 48710 }, { "epoch": 0.49, "learning_rate": 0.0004052052631578947, "loss": 0.674, "step": 48720 }, { "epoch": 0.49, "learning_rate": 0.0004051342105263158, "loss": 0.6669, "step": 48730 }, { "epoch": 0.49, "learning_rate": 0.0004050552631578947, "loss": 0.6658, "step": 48740 }, { "epoch": 0.49, "learning_rate": 0.00040497631578947365, "loss": 0.666, "step": 48750 }, { "epoch": 0.49, "learning_rate": 0.0004048973684210526, "loss": 0.67, "step": 48760 }, { "epoch": 0.49, "learning_rate": 0.00040481842105263155, "loss": 0.6487, "step": 48770 }, { "epoch": 0.49, "learning_rate": 0.00040473947368421044, "loss": 0.6497, "step": 48780 }, { "epoch": 0.49, "learning_rate": 0.00040466052631578944, "loss": 0.6421, "step": 48790 }, { "epoch": 0.49, "learning_rate": 0.00040458157894736834, "loss": 0.6627, "step": 48800 }, { "epoch": 0.49, "learning_rate": 0.00040450263157894734, "loss": 0.6718, "step": 48810 }, { "epoch": 0.49, "learning_rate": 0.0004044236842105263, "loss": 0.6636, "step": 48820 }, { "epoch": 0.49, "learning_rate": 0.00040434473684210524, "loss": 0.6705, "step": 48830 }, { "epoch": 0.49, "learning_rate": 0.0004042657894736842, "loss": 0.6773, "step": 48840 }, { "epoch": 0.49, "learning_rate": 0.00040418684210526313, "loss": 0.6702, "step": 48850 }, { "epoch": 0.49, "learning_rate": 0.0004041078947368421, "loss": 0.658, "step": 48860 }, { "epoch": 0.49, "learning_rate": 0.000404028947368421, "loss": 0.6717, "step": 48870 }, { "epoch": 0.49, "learning_rate": 0.00040395, "loss": 0.6848, "step": 48880 }, { "epoch": 0.49, "learning_rate": 0.0004038710526315789, "loss": 0.6769, "step": 48890 }, { "epoch": 0.49, "learning_rate": 0.0004037921052631579, "loss": 0.6567, "step": 48900 }, { "epoch": 0.49, "learning_rate": 0.0004037131578947368, "loss": 0.6604, "step": 48910 }, { "epoch": 0.49, "learning_rate": 0.00040363421052631577, "loss": 0.6588, "step": 48920 }, { "epoch": 0.49, "learning_rate": 0.0004035552631578947, "loss": 0.6665, "step": 48930 }, { "epoch": 0.49, "learning_rate": 0.0004034763157894736, "loss": 0.6531, "step": 48940 }, { "epoch": 0.49, "learning_rate": 0.0004033973684210526, "loss": 0.6615, "step": 48950 }, { "epoch": 0.49, "learning_rate": 0.0004033184210526315, "loss": 0.6544, "step": 48960 }, { "epoch": 0.49, "learning_rate": 0.0004032394736842105, "loss": 0.6442, "step": 48970 }, { "epoch": 0.49, "learning_rate": 0.0004031605263157894, "loss": 0.6489, "step": 48980 }, { "epoch": 0.49, "learning_rate": 0.0004030815789473684, "loss": 0.6303, "step": 48990 }, { "epoch": 0.49, "learning_rate": 0.00040300263157894736, "loss": 0.6541, "step": 49000 }, { "epoch": 0.49, "learning_rate": 0.00040292368421052625, "loss": 0.6591, "step": 49010 }, { "epoch": 0.49, "learning_rate": 0.00040284473684210526, "loss": 0.6793, "step": 49020 }, { "epoch": 0.49, "learning_rate": 0.00040276578947368415, "loss": 0.6784, "step": 49030 }, { "epoch": 0.49, "learning_rate": 0.00040268684210526315, "loss": 0.658, "step": 49040 }, { "epoch": 0.49, "learning_rate": 0.00040260789473684205, "loss": 0.6489, "step": 49050 }, { "epoch": 0.49, "learning_rate": 0.00040252894736842105, "loss": 0.6641, "step": 49060 }, { "epoch": 0.49, "learning_rate": 0.00040244999999999994, "loss": 0.6475, "step": 49070 }, { "epoch": 0.49, "learning_rate": 0.0004023710526315789, "loss": 0.6513, "step": 49080 }, { "epoch": 0.49, "learning_rate": 0.00040229210526315784, "loss": 0.6385, "step": 49090 }, { "epoch": 0.49, "learning_rate": 0.0004022131578947368, "loss": 0.6425, "step": 49100 }, { "epoch": 0.49, "learning_rate": 0.0004021342105263158, "loss": 0.6633, "step": 49110 }, { "epoch": 0.49, "learning_rate": 0.0004020552631578947, "loss": 0.6402, "step": 49120 }, { "epoch": 0.49, "learning_rate": 0.0004019763157894737, "loss": 0.644, "step": 49130 }, { "epoch": 0.49, "learning_rate": 0.0004018973684210526, "loss": 0.665, "step": 49140 }, { "epoch": 0.49, "learning_rate": 0.00040181842105263153, "loss": 0.6602, "step": 49150 }, { "epoch": 0.49, "learning_rate": 0.0004017394736842105, "loss": 0.6461, "step": 49160 }, { "epoch": 0.49, "learning_rate": 0.0004016605263157894, "loss": 0.6612, "step": 49170 }, { "epoch": 0.49, "learning_rate": 0.0004015815789473684, "loss": 0.6735, "step": 49180 }, { "epoch": 0.49, "learning_rate": 0.0004015026315789473, "loss": 0.6647, "step": 49190 }, { "epoch": 0.49, "learning_rate": 0.0004014236842105263, "loss": 0.6665, "step": 49200 }, { "epoch": 0.49, "learning_rate": 0.0004013447368421052, "loss": 0.6698, "step": 49210 }, { "epoch": 0.49, "learning_rate": 0.0004012657894736842, "loss": 0.6686, "step": 49220 }, { "epoch": 0.49, "learning_rate": 0.0004011868421052631, "loss": 0.6732, "step": 49230 }, { "epoch": 0.49, "learning_rate": 0.00040110789473684206, "loss": 0.6715, "step": 49240 }, { "epoch": 0.49, "learning_rate": 0.000401028947368421, "loss": 0.6625, "step": 49250 }, { "epoch": 0.49, "learning_rate": 0.00040094999999999996, "loss": 0.6672, "step": 49260 }, { "epoch": 0.49, "learning_rate": 0.00040087105263157886, "loss": 0.6581, "step": 49270 }, { "epoch": 0.49, "learning_rate": 0.00040079210526315786, "loss": 0.6664, "step": 49280 }, { "epoch": 0.49, "learning_rate": 0.00040071315789473686, "loss": 0.6461, "step": 49290 }, { "epoch": 0.49, "learning_rate": 0.00040063421052631575, "loss": 0.6499, "step": 49300 }, { "epoch": 0.49, "learning_rate": 0.0004005552631578947, "loss": 0.6513, "step": 49310 }, { "epoch": 0.49, "learning_rate": 0.00040047631578947365, "loss": 0.65, "step": 49320 }, { "epoch": 0.49, "learning_rate": 0.0004003973684210526, "loss": 0.6534, "step": 49330 }, { "epoch": 0.49, "learning_rate": 0.00040031842105263155, "loss": 0.6588, "step": 49340 }, { "epoch": 0.49, "learning_rate": 0.0004002394736842105, "loss": 0.6739, "step": 49350 }, { "epoch": 0.49, "learning_rate": 0.0004001605263157894, "loss": 0.667, "step": 49360 }, { "epoch": 0.49, "learning_rate": 0.0004000815789473684, "loss": 0.6504, "step": 49370 }, { "epoch": 0.49, "learning_rate": 0.00040000263157894734, "loss": 0.6691, "step": 49380 }, { "epoch": 0.49, "learning_rate": 0.0003999236842105263, "loss": 0.66, "step": 49390 }, { "epoch": 0.49, "learning_rate": 0.00039984473684210524, "loss": 0.6655, "step": 49400 }, { "epoch": 0.49, "learning_rate": 0.0003997657894736842, "loss": 0.6522, "step": 49410 }, { "epoch": 0.49, "learning_rate": 0.00039968684210526313, "loss": 0.6568, "step": 49420 }, { "epoch": 0.49, "learning_rate": 0.00039960789473684203, "loss": 0.6421, "step": 49430 }, { "epoch": 0.49, "learning_rate": 0.00039952894736842103, "loss": 0.6648, "step": 49440 }, { "epoch": 0.49, "learning_rate": 0.0003994499999999999, "loss": 0.6596, "step": 49450 }, { "epoch": 0.49, "learning_rate": 0.0003993710526315789, "loss": 0.6566, "step": 49460 }, { "epoch": 0.49, "learning_rate": 0.0003992921052631579, "loss": 0.6488, "step": 49470 }, { "epoch": 0.49, "learning_rate": 0.0003992131578947368, "loss": 0.6351, "step": 49480 }, { "epoch": 0.49, "learning_rate": 0.00039913421052631577, "loss": 0.6388, "step": 49490 }, { "epoch": 0.49, "learning_rate": 0.00039905526315789467, "loss": 0.6648, "step": 49500 }, { "epoch": 0.5, "learning_rate": 0.00039897631578947367, "loss": 0.6592, "step": 49510 }, { "epoch": 0.5, "learning_rate": 0.00039889736842105256, "loss": 0.6631, "step": 49520 }, { "epoch": 0.5, "learning_rate": 0.00039881842105263157, "loss": 0.663, "step": 49530 }, { "epoch": 0.5, "learning_rate": 0.00039873947368421046, "loss": 0.6561, "step": 49540 }, { "epoch": 0.5, "learning_rate": 0.00039866052631578946, "loss": 0.6616, "step": 49550 }, { "epoch": 0.5, "learning_rate": 0.0003985815789473684, "loss": 0.6621, "step": 49560 }, { "epoch": 0.5, "learning_rate": 0.0003985026315789473, "loss": 0.6695, "step": 49570 }, { "epoch": 0.5, "learning_rate": 0.0003984236842105263, "loss": 0.6611, "step": 49580 }, { "epoch": 0.5, "learning_rate": 0.0003983447368421052, "loss": 0.6498, "step": 49590 }, { "epoch": 0.5, "learning_rate": 0.0003982657894736842, "loss": 0.6701, "step": 49600 }, { "epoch": 0.5, "learning_rate": 0.0003981868421052631, "loss": 0.6551, "step": 49610 }, { "epoch": 0.5, "learning_rate": 0.0003981078947368421, "loss": 0.6552, "step": 49620 }, { "epoch": 0.5, "learning_rate": 0.000398028947368421, "loss": 0.6551, "step": 49630 }, { "epoch": 0.5, "learning_rate": 0.00039794999999999994, "loss": 0.6551, "step": 49640 }, { "epoch": 0.5, "learning_rate": 0.0003978710526315789, "loss": 0.6727, "step": 49650 }, { "epoch": 0.5, "learning_rate": 0.00039779210526315784, "loss": 0.6609, "step": 49660 }, { "epoch": 0.5, "learning_rate": 0.00039771315789473684, "loss": 0.6655, "step": 49670 }, { "epoch": 0.5, "learning_rate": 0.00039763421052631574, "loss": 0.659, "step": 49680 }, { "epoch": 0.5, "learning_rate": 0.00039755526315789474, "loss": 0.6646, "step": 49690 }, { "epoch": 0.5, "learning_rate": 0.00039747631578947363, "loss": 0.6639, "step": 49700 }, { "epoch": 0.5, "learning_rate": 0.00039739736842105263, "loss": 0.6631, "step": 49710 }, { "epoch": 0.5, "learning_rate": 0.00039731842105263153, "loss": 0.6661, "step": 49720 }, { "epoch": 0.5, "learning_rate": 0.0003972394736842105, "loss": 0.6596, "step": 49730 }, { "epoch": 0.5, "learning_rate": 0.0003971605263157894, "loss": 0.6568, "step": 49740 }, { "epoch": 0.5, "learning_rate": 0.0003970815789473684, "loss": 0.6572, "step": 49750 }, { "epoch": 0.5, "learning_rate": 0.0003970026315789474, "loss": 0.6661, "step": 49760 }, { "epoch": 0.5, "learning_rate": 0.00039692368421052627, "loss": 0.6573, "step": 49770 }, { "epoch": 0.5, "learning_rate": 0.00039684473684210527, "loss": 0.6563, "step": 49780 }, { "epoch": 0.5, "learning_rate": 0.00039676578947368417, "loss": 0.6427, "step": 49790 }, { "epoch": 0.5, "learning_rate": 0.0003966868421052631, "loss": 0.6553, "step": 49800 }, { "epoch": 0.5, "learning_rate": 0.00039660789473684206, "loss": 0.6391, "step": 49810 }, { "epoch": 0.5, "learning_rate": 0.000396528947368421, "loss": 0.6505, "step": 49820 }, { "epoch": 0.5, "learning_rate": 0.00039644999999999996, "loss": 0.6568, "step": 49830 }, { "epoch": 0.5, "learning_rate": 0.0003963710526315789, "loss": 0.6554, "step": 49840 }, { "epoch": 0.5, "learning_rate": 0.0003962921052631579, "loss": 0.6532, "step": 49850 }, { "epoch": 0.5, "learning_rate": 0.0003962131578947368, "loss": 0.6553, "step": 49860 }, { "epoch": 0.5, "learning_rate": 0.00039613421052631575, "loss": 0.6671, "step": 49870 }, { "epoch": 0.5, "learning_rate": 0.0003960552631578947, "loss": 0.6608, "step": 49880 }, { "epoch": 0.5, "learning_rate": 0.00039597631578947365, "loss": 0.6586, "step": 49890 }, { "epoch": 0.5, "learning_rate": 0.0003958973684210526, "loss": 0.6551, "step": 49900 }, { "epoch": 0.5, "learning_rate": 0.00039581842105263155, "loss": 0.6553, "step": 49910 }, { "epoch": 0.5, "learning_rate": 0.00039573947368421044, "loss": 0.6585, "step": 49920 }, { "epoch": 0.5, "learning_rate": 0.00039566052631578944, "loss": 0.6604, "step": 49930 }, { "epoch": 0.5, "learning_rate": 0.0003955815789473684, "loss": 0.6602, "step": 49940 }, { "epoch": 0.5, "learning_rate": 0.00039550263157894734, "loss": 0.6503, "step": 49950 }, { "epoch": 0.5, "learning_rate": 0.0003954236842105263, "loss": 0.6615, "step": 49960 }, { "epoch": 0.5, "learning_rate": 0.00039534473684210524, "loss": 0.6618, "step": 49970 }, { "epoch": 0.5, "learning_rate": 0.0003952657894736842, "loss": 0.6545, "step": 49980 }, { "epoch": 0.5, "learning_rate": 0.0003951868421052631, "loss": 0.6545, "step": 49990 }, { "epoch": 0.5, "learning_rate": 0.0003951078947368421, "loss": 0.6467, "step": 50000 }, { "epoch": 0.5, "eval_accuracy": 0.8627790655085218, "eval_loss": 0.6474609375, "eval_runtime": 97.2319, "eval_samples_per_second": 822.775, "eval_steps_per_second": 1.615, "step": 50000 }, { "epoch": 0.5, "learning_rate": 0.000395028947368421, "loss": 0.6478, "step": 50010 }, { "epoch": 0.5, "learning_rate": 0.00039495, "loss": 0.6406, "step": 50020 }, { "epoch": 0.5, "learning_rate": 0.0003948710526315789, "loss": 0.6548, "step": 50030 }, { "epoch": 0.5, "learning_rate": 0.0003947921052631579, "loss": 0.6629, "step": 50040 }, { "epoch": 0.5, "learning_rate": 0.0003947131578947368, "loss": 0.6648, "step": 50050 }, { "epoch": 0.5, "learning_rate": 0.0003946342105263157, "loss": 0.6518, "step": 50060 }, { "epoch": 0.5, "learning_rate": 0.0003945552631578947, "loss": 0.6682, "step": 50070 }, { "epoch": 0.5, "learning_rate": 0.0003944763157894736, "loss": 0.6668, "step": 50080 }, { "epoch": 0.5, "learning_rate": 0.0003943973684210526, "loss": 0.6572, "step": 50090 }, { "epoch": 0.5, "learning_rate": 0.0003943184210526315, "loss": 0.6682, "step": 50100 }, { "epoch": 0.5, "learning_rate": 0.0003942394736842105, "loss": 0.6581, "step": 50110 }, { "epoch": 0.5, "learning_rate": 0.00039416052631578946, "loss": 0.644, "step": 50120 }, { "epoch": 0.5, "learning_rate": 0.00039408157894736835, "loss": 0.6578, "step": 50130 }, { "epoch": 0.5, "learning_rate": 0.00039400263157894736, "loss": 0.6699, "step": 50140 }, { "epoch": 0.5, "learning_rate": 0.00039392368421052625, "loss": 0.6599, "step": 50150 }, { "epoch": 0.5, "learning_rate": 0.00039384473684210525, "loss": 0.6607, "step": 50160 }, { "epoch": 0.5, "learning_rate": 0.00039376578947368415, "loss": 0.6576, "step": 50170 }, { "epoch": 0.5, "learning_rate": 0.00039368684210526315, "loss": 0.6477, "step": 50180 }, { "epoch": 0.5, "learning_rate": 0.00039360789473684204, "loss": 0.6685, "step": 50190 }, { "epoch": 0.5, "learning_rate": 0.00039352894736842105, "loss": 0.6593, "step": 50200 }, { "epoch": 0.5, "learning_rate": 0.00039345, "loss": 0.6451, "step": 50210 }, { "epoch": 0.5, "learning_rate": 0.0003933710526315789, "loss": 0.6577, "step": 50220 }, { "epoch": 0.5, "learning_rate": 0.0003932921052631579, "loss": 0.6486, "step": 50230 }, { "epoch": 0.5, "learning_rate": 0.0003932131578947368, "loss": 0.6487, "step": 50240 }, { "epoch": 0.5, "learning_rate": 0.0003931342105263158, "loss": 0.6562, "step": 50250 }, { "epoch": 0.5, "learning_rate": 0.0003930552631578947, "loss": 0.6566, "step": 50260 }, { "epoch": 0.5, "learning_rate": 0.0003929763157894737, "loss": 0.6451, "step": 50270 }, { "epoch": 0.5, "learning_rate": 0.0003928973684210526, "loss": 0.6526, "step": 50280 }, { "epoch": 0.5, "learning_rate": 0.00039281842105263153, "loss": 0.65, "step": 50290 }, { "epoch": 0.5, "learning_rate": 0.0003927394736842105, "loss": 0.6615, "step": 50300 }, { "epoch": 0.5, "learning_rate": 0.0003926605263157894, "loss": 0.6484, "step": 50310 }, { "epoch": 0.5, "learning_rate": 0.0003925815789473684, "loss": 0.6562, "step": 50320 }, { "epoch": 0.5, "learning_rate": 0.0003925026315789473, "loss": 0.6456, "step": 50330 }, { "epoch": 0.5, "learning_rate": 0.0003924236842105263, "loss": 0.6401, "step": 50340 }, { "epoch": 0.5, "learning_rate": 0.0003923447368421052, "loss": 0.6466, "step": 50350 }, { "epoch": 0.5, "learning_rate": 0.00039226578947368417, "loss": 0.656, "step": 50360 }, { "epoch": 0.5, "learning_rate": 0.0003921868421052631, "loss": 0.6617, "step": 50370 }, { "epoch": 0.5, "learning_rate": 0.00039210789473684206, "loss": 0.6593, "step": 50380 }, { "epoch": 0.5, "learning_rate": 0.000392028947368421, "loss": 0.6499, "step": 50390 }, { "epoch": 0.5, "learning_rate": 0.00039194999999999996, "loss": 0.6606, "step": 50400 }, { "epoch": 0.5, "learning_rate": 0.000391878947368421, "loss": 0.6769, "step": 50410 }, { "epoch": 0.5, "learning_rate": 0.0003918, "loss": 0.6655, "step": 50420 }, { "epoch": 0.5, "learning_rate": 0.0003917210526315789, "loss": 0.6597, "step": 50430 }, { "epoch": 0.5, "learning_rate": 0.0003916421052631579, "loss": 0.6629, "step": 50440 }, { "epoch": 0.5, "learning_rate": 0.0003915631578947368, "loss": 0.6632, "step": 50450 }, { "epoch": 0.5, "learning_rate": 0.0003914842105263158, "loss": 0.6738, "step": 50460 }, { "epoch": 0.5, "learning_rate": 0.0003914052631578947, "loss": 0.6579, "step": 50470 }, { "epoch": 0.5, "learning_rate": 0.0003913263157894736, "loss": 0.6583, "step": 50480 }, { "epoch": 0.5, "learning_rate": 0.0003912473684210526, "loss": 0.6537, "step": 50490 }, { "epoch": 0.51, "learning_rate": 0.0003911684210526315, "loss": 0.6484, "step": 50500 }, { "epoch": 0.51, "learning_rate": 0.0003910894736842105, "loss": 0.657, "step": 50510 }, { "epoch": 0.51, "learning_rate": 0.0003910105263157894, "loss": 0.6687, "step": 50520 }, { "epoch": 0.51, "learning_rate": 0.0003909315789473684, "loss": 0.6594, "step": 50530 }, { "epoch": 0.51, "learning_rate": 0.0003908526315789473, "loss": 0.6434, "step": 50540 }, { "epoch": 0.51, "learning_rate": 0.00039077368421052626, "loss": 0.6427, "step": 50550 }, { "epoch": 0.51, "learning_rate": 0.00039069473684210526, "loss": 0.6594, "step": 50560 }, { "epoch": 0.51, "learning_rate": 0.00039061578947368415, "loss": 0.6624, "step": 50570 }, { "epoch": 0.51, "learning_rate": 0.00039053684210526316, "loss": 0.6478, "step": 50580 }, { "epoch": 0.51, "learning_rate": 0.00039045789473684205, "loss": 0.6563, "step": 50590 }, { "epoch": 0.51, "learning_rate": 0.00039037894736842105, "loss": 0.6623, "step": 50600 }, { "epoch": 0.51, "learning_rate": 0.00039029999999999995, "loss": 0.645, "step": 50610 }, { "epoch": 0.51, "learning_rate": 0.00039022105263157895, "loss": 0.6657, "step": 50620 }, { "epoch": 0.51, "learning_rate": 0.00039014210526315784, "loss": 0.6548, "step": 50630 }, { "epoch": 0.51, "learning_rate": 0.0003900631578947368, "loss": 0.6546, "step": 50640 }, { "epoch": 0.51, "learning_rate": 0.0003899842105263158, "loss": 0.65, "step": 50650 }, { "epoch": 0.51, "learning_rate": 0.0003899052631578947, "loss": 0.6426, "step": 50660 }, { "epoch": 0.51, "learning_rate": 0.0003898263157894737, "loss": 0.6462, "step": 50670 }, { "epoch": 0.51, "learning_rate": 0.0003897473684210526, "loss": 0.647, "step": 50680 }, { "epoch": 0.51, "learning_rate": 0.0003896684210526316, "loss": 0.6356, "step": 50690 }, { "epoch": 0.51, "learning_rate": 0.0003895894736842105, "loss": 0.6479, "step": 50700 }, { "epoch": 0.51, "learning_rate": 0.00038951052631578943, "loss": 0.6553, "step": 50710 }, { "epoch": 0.51, "learning_rate": 0.0003894315789473684, "loss": 0.6536, "step": 50720 }, { "epoch": 0.51, "learning_rate": 0.0003893526315789473, "loss": 0.6348, "step": 50730 }, { "epoch": 0.51, "learning_rate": 0.0003892736842105263, "loss": 0.639, "step": 50740 }, { "epoch": 0.51, "learning_rate": 0.0003891947368421052, "loss": 0.6415, "step": 50750 }, { "epoch": 0.51, "learning_rate": 0.0003891157894736842, "loss": 0.6507, "step": 50760 }, { "epoch": 0.51, "learning_rate": 0.0003890368421052631, "loss": 0.6582, "step": 50770 }, { "epoch": 0.51, "learning_rate": 0.00038895789473684207, "loss": 0.6578, "step": 50780 }, { "epoch": 0.51, "learning_rate": 0.000388878947368421, "loss": 0.6626, "step": 50790 }, { "epoch": 0.51, "learning_rate": 0.00038879999999999996, "loss": 0.6634, "step": 50800 }, { "epoch": 0.51, "learning_rate": 0.0003887210526315789, "loss": 0.6446, "step": 50810 }, { "epoch": 0.51, "learning_rate": 0.00038864210526315786, "loss": 0.6516, "step": 50820 }, { "epoch": 0.51, "learning_rate": 0.00038856315789473675, "loss": 0.6617, "step": 50830 }, { "epoch": 0.51, "learning_rate": 0.00038848421052631576, "loss": 0.65, "step": 50840 }, { "epoch": 0.51, "learning_rate": 0.0003884052631578947, "loss": 0.6599, "step": 50850 }, { "epoch": 0.51, "learning_rate": 0.00038832631578947365, "loss": 0.6413, "step": 50860 }, { "epoch": 0.51, "learning_rate": 0.0003882473684210526, "loss": 0.6392, "step": 50870 }, { "epoch": 0.51, "learning_rate": 0.00038816842105263155, "loss": 0.6411, "step": 50880 }, { "epoch": 0.51, "learning_rate": 0.0003880894736842105, "loss": 0.648, "step": 50890 }, { "epoch": 0.51, "learning_rate": 0.0003880105263157894, "loss": 0.6573, "step": 50900 }, { "epoch": 0.51, "learning_rate": 0.0003879315789473684, "loss": 0.6687, "step": 50910 }, { "epoch": 0.51, "learning_rate": 0.0003878526315789473, "loss": 0.6561, "step": 50920 }, { "epoch": 0.51, "learning_rate": 0.0003877736842105263, "loss": 0.6528, "step": 50930 }, { "epoch": 0.51, "learning_rate": 0.00038769473684210524, "loss": 0.6566, "step": 50940 }, { "epoch": 0.51, "learning_rate": 0.0003876157894736842, "loss": 0.6565, "step": 50950 }, { "epoch": 0.51, "learning_rate": 0.00038753684210526314, "loss": 0.655, "step": 50960 }, { "epoch": 0.51, "learning_rate": 0.00038745789473684203, "loss": 0.6358, "step": 50970 }, { "epoch": 0.51, "learning_rate": 0.00038737894736842103, "loss": 0.6547, "step": 50980 }, { "epoch": 0.51, "learning_rate": 0.00038729999999999993, "loss": 0.6545, "step": 50990 }, { "epoch": 0.51, "learning_rate": 0.00038722105263157893, "loss": 0.641, "step": 51000 }, { "epoch": 0.51, "learning_rate": 0.0003871421052631578, "loss": 0.6476, "step": 51010 }, { "epoch": 0.51, "learning_rate": 0.0003870631578947368, "loss": 0.6519, "step": 51020 }, { "epoch": 0.51, "learning_rate": 0.0003869842105263158, "loss": 0.6684, "step": 51030 }, { "epoch": 0.51, "learning_rate": 0.0003869052631578947, "loss": 0.64, "step": 51040 }, { "epoch": 0.51, "learning_rate": 0.00038682631578947367, "loss": 0.6486, "step": 51050 }, { "epoch": 0.51, "learning_rate": 0.00038674736842105257, "loss": 0.6557, "step": 51060 }, { "epoch": 0.51, "learning_rate": 0.00038666842105263157, "loss": 0.6658, "step": 51070 }, { "epoch": 0.51, "learning_rate": 0.00038658947368421046, "loss": 0.6544, "step": 51080 }, { "epoch": 0.51, "learning_rate": 0.00038651052631578946, "loss": 0.6711, "step": 51090 }, { "epoch": 0.51, "learning_rate": 0.00038643157894736836, "loss": 0.6641, "step": 51100 }, { "epoch": 0.51, "learning_rate": 0.00038635263157894736, "loss": 0.6559, "step": 51110 }, { "epoch": 0.51, "learning_rate": 0.0003862736842105263, "loss": 0.6534, "step": 51120 }, { "epoch": 0.51, "learning_rate": 0.0003861947368421052, "loss": 0.6714, "step": 51130 }, { "epoch": 0.51, "learning_rate": 0.0003861157894736842, "loss": 0.6672, "step": 51140 }, { "epoch": 0.51, "learning_rate": 0.0003860368421052631, "loss": 0.6625, "step": 51150 }, { "epoch": 0.51, "learning_rate": 0.0003859578947368421, "loss": 0.6477, "step": 51160 }, { "epoch": 0.51, "learning_rate": 0.000385878947368421, "loss": 0.6416, "step": 51170 }, { "epoch": 0.51, "learning_rate": 0.0003858, "loss": 0.6352, "step": 51180 }, { "epoch": 0.51, "learning_rate": 0.0003857210526315789, "loss": 0.6331, "step": 51190 }, { "epoch": 0.51, "learning_rate": 0.00038564210526315784, "loss": 0.644, "step": 51200 }, { "epoch": 0.51, "learning_rate": 0.00038556315789473684, "loss": 0.6553, "step": 51210 }, { "epoch": 0.51, "learning_rate": 0.00038548421052631574, "loss": 0.655, "step": 51220 }, { "epoch": 0.51, "learning_rate": 0.00038540526315789474, "loss": 0.6516, "step": 51230 }, { "epoch": 0.51, "learning_rate": 0.00038532631578947363, "loss": 0.658, "step": 51240 }, { "epoch": 0.51, "learning_rate": 0.00038524736842105264, "loss": 0.6545, "step": 51250 }, { "epoch": 0.51, "learning_rate": 0.00038516842105263153, "loss": 0.6655, "step": 51260 }, { "epoch": 0.51, "learning_rate": 0.0003850894736842105, "loss": 0.66, "step": 51270 }, { "epoch": 0.51, "learning_rate": 0.00038501052631578943, "loss": 0.6617, "step": 51280 }, { "epoch": 0.51, "learning_rate": 0.0003849315789473684, "loss": 0.6322, "step": 51290 }, { "epoch": 0.51, "learning_rate": 0.0003848526315789474, "loss": 0.6551, "step": 51300 }, { "epoch": 0.51, "learning_rate": 0.0003847736842105263, "loss": 0.6413, "step": 51310 }, { "epoch": 0.51, "learning_rate": 0.0003846947368421053, "loss": 0.6444, "step": 51320 }, { "epoch": 0.51, "learning_rate": 0.00038461578947368417, "loss": 0.6408, "step": 51330 }, { "epoch": 0.51, "learning_rate": 0.0003845368421052631, "loss": 0.643, "step": 51340 }, { "epoch": 0.51, "learning_rate": 0.00038445789473684207, "loss": 0.6361, "step": 51350 }, { "epoch": 0.51, "learning_rate": 0.000384378947368421, "loss": 0.6386, "step": 51360 }, { "epoch": 0.51, "learning_rate": 0.00038429999999999996, "loss": 0.6516, "step": 51370 }, { "epoch": 0.51, "learning_rate": 0.0003842210526315789, "loss": 0.6461, "step": 51380 }, { "epoch": 0.51, "learning_rate": 0.0003841421052631578, "loss": 0.6445, "step": 51390 }, { "epoch": 0.51, "learning_rate": 0.0003840631578947368, "loss": 0.652, "step": 51400 }, { "epoch": 0.51, "learning_rate": 0.00038398421052631576, "loss": 0.646, "step": 51410 }, { "epoch": 0.51, "learning_rate": 0.0003839052631578947, "loss": 0.6523, "step": 51420 }, { "epoch": 0.51, "learning_rate": 0.00038382631578947365, "loss": 0.651, "step": 51430 }, { "epoch": 0.51, "learning_rate": 0.0003837473684210526, "loss": 0.6525, "step": 51440 }, { "epoch": 0.51, "learning_rate": 0.00038366842105263155, "loss": 0.6591, "step": 51450 }, { "epoch": 0.51, "learning_rate": 0.00038358947368421044, "loss": 0.6548, "step": 51460 }, { "epoch": 0.51, "learning_rate": 0.00038351052631578945, "loss": 0.6574, "step": 51470 }, { "epoch": 0.51, "learning_rate": 0.00038343157894736834, "loss": 0.6645, "step": 51480 }, { "epoch": 0.51, "learning_rate": 0.00038335263157894734, "loss": 0.6542, "step": 51490 }, { "epoch": 0.52, "learning_rate": 0.0003832736842105263, "loss": 0.6521, "step": 51500 }, { "epoch": 0.52, "learning_rate": 0.00038319473684210524, "loss": 0.6574, "step": 51510 }, { "epoch": 0.52, "learning_rate": 0.0003831157894736842, "loss": 0.6604, "step": 51520 }, { "epoch": 0.52, "learning_rate": 0.00038303684210526314, "loss": 0.6465, "step": 51530 }, { "epoch": 0.52, "learning_rate": 0.0003829578947368421, "loss": 0.6578, "step": 51540 }, { "epoch": 0.52, "learning_rate": 0.000382878947368421, "loss": 0.6521, "step": 51550 }, { "epoch": 0.52, "learning_rate": 0.0003828, "loss": 0.6559, "step": 51560 }, { "epoch": 0.52, "learning_rate": 0.0003827210526315789, "loss": 0.6621, "step": 51570 }, { "epoch": 0.52, "learning_rate": 0.0003826421052631579, "loss": 0.6728, "step": 51580 }, { "epoch": 0.52, "learning_rate": 0.0003825631578947368, "loss": 0.663, "step": 51590 }, { "epoch": 0.52, "learning_rate": 0.0003824842105263158, "loss": 0.6561, "step": 51600 }, { "epoch": 0.52, "learning_rate": 0.0003824052631578947, "loss": 0.653, "step": 51610 }, { "epoch": 0.52, "learning_rate": 0.0003823263157894736, "loss": 0.6648, "step": 51620 }, { "epoch": 0.52, "learning_rate": 0.0003822473684210526, "loss": 0.6565, "step": 51630 }, { "epoch": 0.52, "learning_rate": 0.0003821684210526315, "loss": 0.672, "step": 51640 }, { "epoch": 0.52, "learning_rate": 0.0003820894736842105, "loss": 0.6618, "step": 51650 }, { "epoch": 0.52, "learning_rate": 0.0003820105263157894, "loss": 0.666, "step": 51660 }, { "epoch": 0.52, "learning_rate": 0.0003819315789473684, "loss": 0.6613, "step": 51670 }, { "epoch": 0.52, "learning_rate": 0.00038185263157894736, "loss": 0.6443, "step": 51680 }, { "epoch": 0.52, "learning_rate": 0.00038177368421052625, "loss": 0.6462, "step": 51690 }, { "epoch": 0.52, "learning_rate": 0.00038169473684210526, "loss": 0.6637, "step": 51700 }, { "epoch": 0.52, "learning_rate": 0.00038161578947368415, "loss": 0.6504, "step": 51710 }, { "epoch": 0.52, "learning_rate": 0.00038153684210526315, "loss": 0.6587, "step": 51720 }, { "epoch": 0.52, "learning_rate": 0.00038145789473684205, "loss": 0.6591, "step": 51730 }, { "epoch": 0.52, "learning_rate": 0.00038137894736842105, "loss": 0.6515, "step": 51740 }, { "epoch": 0.52, "learning_rate": 0.00038129999999999994, "loss": 0.6455, "step": 51750 }, { "epoch": 0.52, "learning_rate": 0.0003812210526315789, "loss": 0.6511, "step": 51760 }, { "epoch": 0.52, "learning_rate": 0.0003811421052631579, "loss": 0.6576, "step": 51770 }, { "epoch": 0.52, "learning_rate": 0.0003810631578947368, "loss": 0.6549, "step": 51780 }, { "epoch": 0.52, "learning_rate": 0.0003809842105263158, "loss": 0.6584, "step": 51790 }, { "epoch": 0.52, "learning_rate": 0.0003809052631578947, "loss": 0.6557, "step": 51800 }, { "epoch": 0.52, "learning_rate": 0.0003808263157894737, "loss": 0.6624, "step": 51810 }, { "epoch": 0.52, "learning_rate": 0.0003807473684210526, "loss": 0.6569, "step": 51820 }, { "epoch": 0.52, "learning_rate": 0.00038066842105263153, "loss": 0.6588, "step": 51830 }, { "epoch": 0.52, "learning_rate": 0.0003805894736842105, "loss": 0.6647, "step": 51840 }, { "epoch": 0.52, "learning_rate": 0.00038051052631578943, "loss": 0.6738, "step": 51850 }, { "epoch": 0.52, "learning_rate": 0.00038043157894736843, "loss": 0.6485, "step": 51860 }, { "epoch": 0.52, "learning_rate": 0.0003803526315789473, "loss": 0.6389, "step": 51870 }, { "epoch": 0.52, "learning_rate": 0.0003802736842105263, "loss": 0.6487, "step": 51880 }, { "epoch": 0.52, "learning_rate": 0.0003801947368421052, "loss": 0.6574, "step": 51890 }, { "epoch": 0.52, "learning_rate": 0.0003801157894736842, "loss": 0.6427, "step": 51900 }, { "epoch": 0.52, "learning_rate": 0.0003800368421052631, "loss": 0.6439, "step": 51910 }, { "epoch": 0.52, "learning_rate": 0.00037995789473684207, "loss": 0.6496, "step": 51920 }, { "epoch": 0.52, "learning_rate": 0.000379878947368421, "loss": 0.6539, "step": 51930 }, { "epoch": 0.52, "learning_rate": 0.00037979999999999996, "loss": 0.6414, "step": 51940 }, { "epoch": 0.52, "learning_rate": 0.00037972105263157886, "loss": 0.6469, "step": 51950 }, { "epoch": 0.52, "learning_rate": 0.00037964210526315786, "loss": 0.6624, "step": 51960 }, { "epoch": 0.52, "learning_rate": 0.00037956315789473686, "loss": 0.6523, "step": 51970 }, { "epoch": 0.52, "learning_rate": 0.00037948421052631576, "loss": 0.6602, "step": 51980 }, { "epoch": 0.52, "learning_rate": 0.0003794052631578947, "loss": 0.6606, "step": 51990 }, { "epoch": 0.52, "learning_rate": 0.00037932631578947365, "loss": 0.6581, "step": 52000 }, { "epoch": 0.52, "learning_rate": 0.0003792473684210526, "loss": 0.661, "step": 52010 }, { "epoch": 0.52, "learning_rate": 0.00037916842105263155, "loss": 0.6523, "step": 52020 }, { "epoch": 0.52, "learning_rate": 0.0003790894736842105, "loss": 0.6457, "step": 52030 }, { "epoch": 0.52, "learning_rate": 0.0003790105263157894, "loss": 0.6389, "step": 52040 }, { "epoch": 0.52, "learning_rate": 0.0003789315789473684, "loss": 0.6602, "step": 52050 }, { "epoch": 0.52, "learning_rate": 0.00037885263157894734, "loss": 0.6364, "step": 52060 }, { "epoch": 0.52, "learning_rate": 0.0003787736842105263, "loss": 0.6482, "step": 52070 }, { "epoch": 0.52, "learning_rate": 0.00037869473684210524, "loss": 0.6579, "step": 52080 }, { "epoch": 0.52, "learning_rate": 0.0003786157894736842, "loss": 0.6532, "step": 52090 }, { "epoch": 0.52, "learning_rate": 0.00037853684210526313, "loss": 0.6534, "step": 52100 }, { "epoch": 0.52, "learning_rate": 0.00037845789473684203, "loss": 0.6596, "step": 52110 }, { "epoch": 0.52, "learning_rate": 0.00037837894736842103, "loss": 0.6409, "step": 52120 }, { "epoch": 0.52, "learning_rate": 0.00037830789473684205, "loss": 0.6608, "step": 52130 }, { "epoch": 0.52, "learning_rate": 0.00037822894736842105, "loss": 0.6557, "step": 52140 }, { "epoch": 0.52, "learning_rate": 0.00037814999999999995, "loss": 0.6693, "step": 52150 }, { "epoch": 0.52, "learning_rate": 0.00037807105263157895, "loss": 0.6585, "step": 52160 }, { "epoch": 0.52, "learning_rate": 0.00037799210526315785, "loss": 0.6476, "step": 52170 }, { "epoch": 0.52, "learning_rate": 0.0003779131578947368, "loss": 0.6557, "step": 52180 }, { "epoch": 0.52, "learning_rate": 0.00037783421052631574, "loss": 0.66, "step": 52190 }, { "epoch": 0.52, "learning_rate": 0.0003777552631578947, "loss": 0.653, "step": 52200 }, { "epoch": 0.52, "learning_rate": 0.0003776763157894737, "loss": 0.6686, "step": 52210 }, { "epoch": 0.52, "learning_rate": 0.0003775973684210526, "loss": 0.6333, "step": 52220 }, { "epoch": 0.52, "learning_rate": 0.0003775184210526316, "loss": 0.6293, "step": 52230 }, { "epoch": 0.52, "learning_rate": 0.0003774394736842105, "loss": 0.6291, "step": 52240 }, { "epoch": 0.52, "learning_rate": 0.00037736052631578943, "loss": 0.6429, "step": 52250 }, { "epoch": 0.52, "learning_rate": 0.0003772815789473684, "loss": 0.6422, "step": 52260 }, { "epoch": 0.52, "learning_rate": 0.00037720263157894733, "loss": 0.634, "step": 52270 }, { "epoch": 0.52, "learning_rate": 0.0003771236842105263, "loss": 0.6351, "step": 52280 }, { "epoch": 0.52, "learning_rate": 0.0003770447368421052, "loss": 0.6347, "step": 52290 }, { "epoch": 0.52, "learning_rate": 0.00037696578947368423, "loss": 0.631, "step": 52300 }, { "epoch": 0.52, "learning_rate": 0.0003768868421052631, "loss": 0.6298, "step": 52310 }, { "epoch": 0.52, "learning_rate": 0.0003768078947368421, "loss": 0.642, "step": 52320 }, { "epoch": 0.52, "learning_rate": 0.000376728947368421, "loss": 0.6384, "step": 52330 }, { "epoch": 0.52, "learning_rate": 0.00037664999999999997, "loss": 0.6417, "step": 52340 }, { "epoch": 0.52, "learning_rate": 0.0003765710526315789, "loss": 0.6363, "step": 52350 }, { "epoch": 0.52, "learning_rate": 0.00037649210526315786, "loss": 0.6414, "step": 52360 }, { "epoch": 0.52, "learning_rate": 0.00037641315789473676, "loss": 0.6366, "step": 52370 }, { "epoch": 0.52, "learning_rate": 0.00037633421052631576, "loss": 0.6137, "step": 52380 }, { "epoch": 0.52, "learning_rate": 0.00037625526315789465, "loss": 0.6338, "step": 52390 }, { "epoch": 0.52, "learning_rate": 0.00037617631578947366, "loss": 0.6376, "step": 52400 }, { "epoch": 0.52, "learning_rate": 0.0003760973684210526, "loss": 0.6323, "step": 52410 }, { "epoch": 0.52, "learning_rate": 0.00037601842105263155, "loss": 0.6304, "step": 52420 }, { "epoch": 0.52, "learning_rate": 0.0003759394736842105, "loss": 0.6433, "step": 52430 }, { "epoch": 0.52, "learning_rate": 0.00037586052631578945, "loss": 0.6372, "step": 52440 }, { "epoch": 0.52, "learning_rate": 0.0003757815789473684, "loss": 0.6331, "step": 52450 }, { "epoch": 0.52, "learning_rate": 0.0003757026315789473, "loss": 0.6432, "step": 52460 }, { "epoch": 0.52, "learning_rate": 0.0003756236842105263, "loss": 0.655, "step": 52470 }, { "epoch": 0.52, "learning_rate": 0.0003755447368421052, "loss": 0.6527, "step": 52480 }, { "epoch": 0.52, "learning_rate": 0.0003754657894736842, "loss": 0.6574, "step": 52490 }, { "epoch": 0.53, "learning_rate": 0.00037538684210526314, "loss": 0.665, "step": 52500 }, { "epoch": 0.53, "eval_accuracy": 0.8649771990754928, "eval_loss": 0.6337890625, "eval_runtime": 97.2311, "eval_samples_per_second": 822.782, "eval_steps_per_second": 1.615, "step": 52500 }, { "epoch": 0.53, "learning_rate": 0.0003753078947368421, "loss": 0.6597, "step": 52510 }, { "epoch": 0.53, "learning_rate": 0.00037522894736842104, "loss": 0.6546, "step": 52520 }, { "epoch": 0.53, "learning_rate": 0.00037514999999999993, "loss": 0.6526, "step": 52530 }, { "epoch": 0.53, "learning_rate": 0.00037507105263157893, "loss": 0.6492, "step": 52540 }, { "epoch": 0.53, "learning_rate": 0.0003749921052631578, "loss": 0.6548, "step": 52550 }, { "epoch": 0.53, "learning_rate": 0.00037491315789473683, "loss": 0.6473, "step": 52560 }, { "epoch": 0.53, "learning_rate": 0.0003748342105263157, "loss": 0.6521, "step": 52570 }, { "epoch": 0.53, "learning_rate": 0.0003747552631578947, "loss": 0.6439, "step": 52580 }, { "epoch": 0.53, "learning_rate": 0.0003746763157894737, "loss": 0.6484, "step": 52590 }, { "epoch": 0.53, "learning_rate": 0.00037459736842105257, "loss": 0.653, "step": 52600 }, { "epoch": 0.53, "learning_rate": 0.00037451842105263157, "loss": 0.6508, "step": 52610 }, { "epoch": 0.53, "learning_rate": 0.00037443947368421046, "loss": 0.6658, "step": 52620 }, { "epoch": 0.53, "learning_rate": 0.00037436052631578947, "loss": 0.6619, "step": 52630 }, { "epoch": 0.53, "learning_rate": 0.00037428157894736836, "loss": 0.6419, "step": 52640 }, { "epoch": 0.53, "learning_rate": 0.00037420263157894736, "loss": 0.6504, "step": 52650 }, { "epoch": 0.53, "learning_rate": 0.00037412368421052626, "loss": 0.66, "step": 52660 }, { "epoch": 0.53, "learning_rate": 0.0003740447368421052, "loss": 0.6504, "step": 52670 }, { "epoch": 0.53, "learning_rate": 0.0003739657894736842, "loss": 0.6578, "step": 52680 }, { "epoch": 0.53, "learning_rate": 0.0003738868421052631, "loss": 0.6597, "step": 52690 }, { "epoch": 0.53, "learning_rate": 0.0003738078947368421, "loss": 0.6533, "step": 52700 }, { "epoch": 0.53, "learning_rate": 0.000373728947368421, "loss": 0.662, "step": 52710 }, { "epoch": 0.53, "learning_rate": 0.00037365, "loss": 0.6571, "step": 52720 }, { "epoch": 0.53, "learning_rate": 0.0003735710526315789, "loss": 0.6505, "step": 52730 }, { "epoch": 0.53, "learning_rate": 0.00037349210526315784, "loss": 0.6408, "step": 52740 }, { "epoch": 0.53, "learning_rate": 0.0003734131578947368, "loss": 0.6477, "step": 52750 }, { "epoch": 0.53, "learning_rate": 0.00037333421052631574, "loss": 0.6433, "step": 52760 }, { "epoch": 0.53, "learning_rate": 0.00037325526315789474, "loss": 0.661, "step": 52770 }, { "epoch": 0.53, "learning_rate": 0.00037317631578947364, "loss": 0.6455, "step": 52780 }, { "epoch": 0.53, "learning_rate": 0.00037309736842105264, "loss": 0.6468, "step": 52790 }, { "epoch": 0.53, "learning_rate": 0.00037301842105263153, "loss": 0.6506, "step": 52800 }, { "epoch": 0.53, "learning_rate": 0.00037293947368421054, "loss": 0.6513, "step": 52810 }, { "epoch": 0.53, "learning_rate": 0.00037286052631578943, "loss": 0.651, "step": 52820 }, { "epoch": 0.53, "learning_rate": 0.0003727815789473684, "loss": 0.6478, "step": 52830 }, { "epoch": 0.53, "learning_rate": 0.00037270263157894733, "loss": 0.6523, "step": 52840 }, { "epoch": 0.53, "learning_rate": 0.0003726236842105263, "loss": 0.6505, "step": 52850 }, { "epoch": 0.53, "learning_rate": 0.0003725447368421053, "loss": 0.6493, "step": 52860 }, { "epoch": 0.53, "learning_rate": 0.00037246578947368417, "loss": 0.6512, "step": 52870 }, { "epoch": 0.53, "learning_rate": 0.0003723868421052632, "loss": 0.6598, "step": 52880 }, { "epoch": 0.53, "learning_rate": 0.00037230789473684207, "loss": 0.6508, "step": 52890 }, { "epoch": 0.53, "learning_rate": 0.000372228947368421, "loss": 0.6491, "step": 52900 }, { "epoch": 0.53, "learning_rate": 0.00037214999999999997, "loss": 0.6432, "step": 52910 }, { "epoch": 0.53, "learning_rate": 0.0003720710526315789, "loss": 0.6409, "step": 52920 }, { "epoch": 0.53, "learning_rate": 0.00037199210526315786, "loss": 0.6495, "step": 52930 }, { "epoch": 0.53, "learning_rate": 0.0003719131578947368, "loss": 0.6614, "step": 52940 }, { "epoch": 0.53, "learning_rate": 0.0003718342105263158, "loss": 0.6442, "step": 52950 }, { "epoch": 0.53, "learning_rate": 0.0003717552631578947, "loss": 0.6475, "step": 52960 }, { "epoch": 0.53, "learning_rate": 0.00037167631578947366, "loss": 0.6479, "step": 52970 }, { "epoch": 0.53, "learning_rate": 0.0003715973684210526, "loss": 0.6461, "step": 52980 }, { "epoch": 0.53, "learning_rate": 0.00037151842105263155, "loss": 0.6597, "step": 52990 }, { "epoch": 0.53, "learning_rate": 0.0003714394736842105, "loss": 0.6391, "step": 53000 }, { "epoch": 0.53, "learning_rate": 0.00037136052631578945, "loss": 0.662, "step": 53010 }, { "epoch": 0.53, "learning_rate": 0.00037128157894736834, "loss": 0.657, "step": 53020 }, { "epoch": 0.53, "learning_rate": 0.00037120263157894735, "loss": 0.6601, "step": 53030 }, { "epoch": 0.53, "learning_rate": 0.00037112368421052624, "loss": 0.6527, "step": 53040 }, { "epoch": 0.53, "learning_rate": 0.00037104473684210524, "loss": 0.6575, "step": 53050 }, { "epoch": 0.53, "learning_rate": 0.0003709657894736842, "loss": 0.6459, "step": 53060 }, { "epoch": 0.53, "learning_rate": 0.00037088684210526314, "loss": 0.6423, "step": 53070 }, { "epoch": 0.53, "learning_rate": 0.0003708078947368421, "loss": 0.6438, "step": 53080 }, { "epoch": 0.53, "learning_rate": 0.000370728947368421, "loss": 0.6432, "step": 53090 }, { "epoch": 0.53, "learning_rate": 0.00037065, "loss": 0.6415, "step": 53100 }, { "epoch": 0.53, "learning_rate": 0.0003705710526315789, "loss": 0.6409, "step": 53110 }, { "epoch": 0.53, "learning_rate": 0.0003704921052631579, "loss": 0.6501, "step": 53120 }, { "epoch": 0.53, "learning_rate": 0.0003704131578947368, "loss": 0.6508, "step": 53130 }, { "epoch": 0.53, "learning_rate": 0.0003703342105263158, "loss": 0.6429, "step": 53140 }, { "epoch": 0.53, "learning_rate": 0.0003702552631578947, "loss": 0.6593, "step": 53150 }, { "epoch": 0.53, "learning_rate": 0.0003701763157894736, "loss": 0.6582, "step": 53160 }, { "epoch": 0.53, "learning_rate": 0.0003700973684210526, "loss": 0.6425, "step": 53170 }, { "epoch": 0.53, "learning_rate": 0.0003700184210526315, "loss": 0.6582, "step": 53180 }, { "epoch": 0.53, "learning_rate": 0.0003699394736842105, "loss": 0.6554, "step": 53190 }, { "epoch": 0.53, "learning_rate": 0.0003698605263157894, "loss": 0.6587, "step": 53200 }, { "epoch": 0.53, "learning_rate": 0.00036978947368421054, "loss": 0.6533, "step": 53210 }, { "epoch": 0.53, "learning_rate": 0.00036971052631578944, "loss": 0.656, "step": 53220 }, { "epoch": 0.53, "learning_rate": 0.00036963157894736844, "loss": 0.6619, "step": 53230 }, { "epoch": 0.53, "learning_rate": 0.00036955263157894733, "loss": 0.6465, "step": 53240 }, { "epoch": 0.53, "learning_rate": 0.0003694736842105263, "loss": 0.6526, "step": 53250 }, { "epoch": 0.53, "learning_rate": 0.00036939473684210523, "loss": 0.6582, "step": 53260 }, { "epoch": 0.53, "learning_rate": 0.0003693157894736842, "loss": 0.6478, "step": 53270 }, { "epoch": 0.53, "learning_rate": 0.00036923684210526307, "loss": 0.6473, "step": 53280 }, { "epoch": 0.53, "learning_rate": 0.0003691578947368421, "loss": 0.6559, "step": 53290 }, { "epoch": 0.53, "learning_rate": 0.0003690789473684211, "loss": 0.6522, "step": 53300 }, { "epoch": 0.53, "learning_rate": 0.00036899999999999997, "loss": 0.6699, "step": 53310 }, { "epoch": 0.53, "learning_rate": 0.0003689210526315789, "loss": 0.6507, "step": 53320 }, { "epoch": 0.53, "learning_rate": 0.00036884210526315787, "loss": 0.6577, "step": 53330 }, { "epoch": 0.53, "learning_rate": 0.0003687631578947368, "loss": 0.6501, "step": 53340 }, { "epoch": 0.53, "learning_rate": 0.00036868421052631576, "loss": 0.6583, "step": 53350 }, { "epoch": 0.53, "learning_rate": 0.0003686052631578947, "loss": 0.6705, "step": 53360 }, { "epoch": 0.53, "learning_rate": 0.0003685263157894736, "loss": 0.6437, "step": 53370 }, { "epoch": 0.53, "learning_rate": 0.0003684473684210526, "loss": 0.6594, "step": 53380 }, { "epoch": 0.53, "learning_rate": 0.00036836842105263156, "loss": 0.669, "step": 53390 }, { "epoch": 0.53, "learning_rate": 0.0003682894736842105, "loss": 0.6464, "step": 53400 }, { "epoch": 0.53, "learning_rate": 0.00036821052631578945, "loss": 0.6407, "step": 53410 }, { "epoch": 0.53, "learning_rate": 0.0003681315789473684, "loss": 0.638, "step": 53420 }, { "epoch": 0.53, "learning_rate": 0.00036805263157894735, "loss": 0.6348, "step": 53430 }, { "epoch": 0.53, "learning_rate": 0.00036797368421052624, "loss": 0.6266, "step": 53440 }, { "epoch": 0.53, "learning_rate": 0.00036789473684210525, "loss": 0.6391, "step": 53450 }, { "epoch": 0.53, "learning_rate": 0.00036781578947368414, "loss": 0.6362, "step": 53460 }, { "epoch": 0.53, "learning_rate": 0.00036773684210526314, "loss": 0.6494, "step": 53470 }, { "epoch": 0.53, "learning_rate": 0.00036765789473684204, "loss": 0.6467, "step": 53480 }, { "epoch": 0.53, "learning_rate": 0.00036757894736842104, "loss": 0.6497, "step": 53490 }, { "epoch": 0.54, "learning_rate": 0.0003675, "loss": 0.6531, "step": 53500 }, { "epoch": 0.54, "learning_rate": 0.0003674210526315789, "loss": 0.6574, "step": 53510 }, { "epoch": 0.54, "learning_rate": 0.0003673421052631579, "loss": 0.6492, "step": 53520 }, { "epoch": 0.54, "learning_rate": 0.0003672631578947368, "loss": 0.6602, "step": 53530 }, { "epoch": 0.54, "learning_rate": 0.0003671842105263158, "loss": 0.6606, "step": 53540 }, { "epoch": 0.54, "learning_rate": 0.0003671052631578947, "loss": 0.6547, "step": 53550 }, { "epoch": 0.54, "learning_rate": 0.0003670263157894737, "loss": 0.6544, "step": 53560 }, { "epoch": 0.54, "learning_rate": 0.00036694736842105257, "loss": 0.6526, "step": 53570 }, { "epoch": 0.54, "learning_rate": 0.0003668684210526315, "loss": 0.6398, "step": 53580 }, { "epoch": 0.54, "learning_rate": 0.0003667894736842105, "loss": 0.6504, "step": 53590 }, { "epoch": 0.54, "learning_rate": 0.0003667105263157894, "loss": 0.6406, "step": 53600 }, { "epoch": 0.54, "learning_rate": 0.0003666315789473684, "loss": 0.6389, "step": 53610 }, { "epoch": 0.54, "learning_rate": 0.0003665526315789473, "loss": 0.6466, "step": 53620 }, { "epoch": 0.54, "learning_rate": 0.0003664736842105263, "loss": 0.6452, "step": 53630 }, { "epoch": 0.54, "learning_rate": 0.0003663947368421052, "loss": 0.6412, "step": 53640 }, { "epoch": 0.54, "learning_rate": 0.00036631578947368416, "loss": 0.6413, "step": 53650 }, { "epoch": 0.54, "learning_rate": 0.0003662368421052631, "loss": 0.6475, "step": 53660 }, { "epoch": 0.54, "learning_rate": 0.00036615789473684206, "loss": 0.6469, "step": 53670 }, { "epoch": 0.54, "learning_rate": 0.00036607894736842106, "loss": 0.6475, "step": 53680 }, { "epoch": 0.54, "learning_rate": 0.00036599999999999995, "loss": 0.6378, "step": 53690 }, { "epoch": 0.54, "learning_rate": 0.00036592105263157895, "loss": 0.6466, "step": 53700 }, { "epoch": 0.54, "learning_rate": 0.00036584210526315785, "loss": 0.6557, "step": 53710 }, { "epoch": 0.54, "learning_rate": 0.00036576315789473685, "loss": 0.64, "step": 53720 }, { "epoch": 0.54, "learning_rate": 0.00036568421052631574, "loss": 0.6391, "step": 53730 }, { "epoch": 0.54, "learning_rate": 0.0003656052631578947, "loss": 0.6496, "step": 53740 }, { "epoch": 0.54, "learning_rate": 0.00036552631578947364, "loss": 0.6429, "step": 53750 }, { "epoch": 0.54, "learning_rate": 0.0003654473684210526, "loss": 0.6265, "step": 53760 }, { "epoch": 0.54, "learning_rate": 0.0003653684210526316, "loss": 0.6269, "step": 53770 }, { "epoch": 0.54, "learning_rate": 0.0003652894736842105, "loss": 0.6271, "step": 53780 }, { "epoch": 0.54, "learning_rate": 0.0003652105263157895, "loss": 0.6333, "step": 53790 }, { "epoch": 0.54, "learning_rate": 0.0003651315789473684, "loss": 0.6269, "step": 53800 }, { "epoch": 0.54, "learning_rate": 0.00036505263157894733, "loss": 0.6305, "step": 53810 }, { "epoch": 0.54, "learning_rate": 0.0003649736842105263, "loss": 0.6251, "step": 53820 }, { "epoch": 0.54, "learning_rate": 0.00036489473684210523, "loss": 0.6266, "step": 53830 }, { "epoch": 0.54, "learning_rate": 0.0003648157894736842, "loss": 0.6291, "step": 53840 }, { "epoch": 0.54, "learning_rate": 0.0003647368421052631, "loss": 0.6326, "step": 53850 }, { "epoch": 0.54, "learning_rate": 0.0003646578947368421, "loss": 0.6255, "step": 53860 }, { "epoch": 0.54, "learning_rate": 0.000364578947368421, "loss": 0.6416, "step": 53870 }, { "epoch": 0.54, "learning_rate": 0.00036449999999999997, "loss": 0.6301, "step": 53880 }, { "epoch": 0.54, "learning_rate": 0.0003644210526315789, "loss": 0.6356, "step": 53890 }, { "epoch": 0.54, "learning_rate": 0.00036434210526315787, "loss": 0.6396, "step": 53900 }, { "epoch": 0.54, "learning_rate": 0.0003642631578947368, "loss": 0.6397, "step": 53910 }, { "epoch": 0.54, "learning_rate": 0.00036418421052631576, "loss": 0.624, "step": 53920 }, { "epoch": 0.54, "learning_rate": 0.00036410526315789466, "loss": 0.6294, "step": 53930 }, { "epoch": 0.54, "learning_rate": 0.00036402631578947366, "loss": 0.6254, "step": 53940 }, { "epoch": 0.54, "learning_rate": 0.0003639473684210526, "loss": 0.6404, "step": 53950 }, { "epoch": 0.54, "learning_rate": 0.00036386842105263156, "loss": 0.6266, "step": 53960 }, { "epoch": 0.54, "learning_rate": 0.0003637894736842105, "loss": 0.6206, "step": 53970 }, { "epoch": 0.54, "learning_rate": 0.00036371052631578945, "loss": 0.6368, "step": 53980 }, { "epoch": 0.54, "learning_rate": 0.0003636315789473684, "loss": 0.6368, "step": 53990 }, { "epoch": 0.54, "learning_rate": 0.0003635526315789473, "loss": 0.6188, "step": 54000 }, { "epoch": 0.54, "learning_rate": 0.0003634736842105263, "loss": 0.6486, "step": 54010 }, { "epoch": 0.54, "learning_rate": 0.0003633947368421052, "loss": 0.6524, "step": 54020 }, { "epoch": 0.54, "learning_rate": 0.0003633157894736842, "loss": 0.652, "step": 54030 }, { "epoch": 0.54, "learning_rate": 0.0003632368421052631, "loss": 0.6692, "step": 54040 }, { "epoch": 0.54, "learning_rate": 0.0003631578947368421, "loss": 0.6525, "step": 54050 }, { "epoch": 0.54, "learning_rate": 0.00036307894736842104, "loss": 0.66, "step": 54060 }, { "epoch": 0.54, "learning_rate": 0.00036299999999999993, "loss": 0.6666, "step": 54070 }, { "epoch": 0.54, "learning_rate": 0.00036292105263157894, "loss": 0.6658, "step": 54080 }, { "epoch": 0.54, "learning_rate": 0.00036284210526315783, "loss": 0.6607, "step": 54090 }, { "epoch": 0.54, "learning_rate": 0.00036276315789473683, "loss": 0.644, "step": 54100 }, { "epoch": 0.54, "learning_rate": 0.0003626842105263157, "loss": 0.6426, "step": 54110 }, { "epoch": 0.54, "learning_rate": 0.00036260526315789473, "loss": 0.6392, "step": 54120 }, { "epoch": 0.54, "learning_rate": 0.0003625263157894736, "loss": 0.6323, "step": 54130 }, { "epoch": 0.54, "learning_rate": 0.00036244736842105257, "loss": 0.6458, "step": 54140 }, { "epoch": 0.54, "learning_rate": 0.0003623684210526316, "loss": 0.6532, "step": 54150 }, { "epoch": 0.54, "learning_rate": 0.00036228947368421047, "loss": 0.6577, "step": 54160 }, { "epoch": 0.54, "learning_rate": 0.00036221052631578947, "loss": 0.643, "step": 54170 }, { "epoch": 0.54, "learning_rate": 0.00036213157894736836, "loss": 0.6336, "step": 54180 }, { "epoch": 0.54, "learning_rate": 0.00036205263157894737, "loss": 0.6455, "step": 54190 }, { "epoch": 0.54, "learning_rate": 0.00036197368421052626, "loss": 0.6565, "step": 54200 }, { "epoch": 0.54, "learning_rate": 0.00036189473684210526, "loss": 0.6515, "step": 54210 }, { "epoch": 0.54, "learning_rate": 0.00036181578947368416, "loss": 0.6487, "step": 54220 }, { "epoch": 0.54, "learning_rate": 0.0003617368421052631, "loss": 0.6596, "step": 54230 }, { "epoch": 0.54, "learning_rate": 0.0003616578947368421, "loss": 0.6494, "step": 54240 }, { "epoch": 0.54, "learning_rate": 0.000361578947368421, "loss": 0.6481, "step": 54250 }, { "epoch": 0.54, "learning_rate": 0.0003615, "loss": 0.6374, "step": 54260 }, { "epoch": 0.54, "learning_rate": 0.0003614210526315789, "loss": 0.6385, "step": 54270 }, { "epoch": 0.54, "learning_rate": 0.0003613421052631579, "loss": 0.6185, "step": 54280 }, { "epoch": 0.54, "learning_rate": 0.0003612631578947368, "loss": 0.6492, "step": 54290 }, { "epoch": 0.54, "learning_rate": 0.00036118421052631574, "loss": 0.6324, "step": 54300 }, { "epoch": 0.54, "learning_rate": 0.0003611052631578947, "loss": 0.6396, "step": 54310 }, { "epoch": 0.54, "learning_rate": 0.00036102631578947364, "loss": 0.6416, "step": 54320 }, { "epoch": 0.54, "learning_rate": 0.00036094736842105264, "loss": 0.6386, "step": 54330 }, { "epoch": 0.54, "learning_rate": 0.00036086842105263154, "loss": 0.6363, "step": 54340 }, { "epoch": 0.54, "learning_rate": 0.00036078947368421054, "loss": 0.6421, "step": 54350 }, { "epoch": 0.54, "learning_rate": 0.00036071052631578943, "loss": 0.6554, "step": 54360 }, { "epoch": 0.54, "learning_rate": 0.0003606315789473684, "loss": 0.6553, "step": 54370 }, { "epoch": 0.54, "learning_rate": 0.00036055263157894733, "loss": 0.6402, "step": 54380 }, { "epoch": 0.54, "learning_rate": 0.0003604736842105263, "loss": 0.6519, "step": 54390 }, { "epoch": 0.54, "learning_rate": 0.0003603947368421052, "loss": 0.657, "step": 54400 }, { "epoch": 0.54, "learning_rate": 0.0003603157894736842, "loss": 0.6611, "step": 54410 }, { "epoch": 0.54, "learning_rate": 0.0003602368421052632, "loss": 0.6669, "step": 54420 }, { "epoch": 0.54, "learning_rate": 0.00036015789473684207, "loss": 0.6622, "step": 54430 }, { "epoch": 0.54, "learning_rate": 0.000360078947368421, "loss": 0.6494, "step": 54440 }, { "epoch": 0.54, "learning_rate": 0.00035999999999999997, "loss": 0.6571, "step": 54450 }, { "epoch": 0.54, "learning_rate": 0.0003599210526315789, "loss": 0.6397, "step": 54460 }, { "epoch": 0.54, "learning_rate": 0.00035984210526315787, "loss": 0.6506, "step": 54470 }, { "epoch": 0.54, "learning_rate": 0.0003597631578947368, "loss": 0.6398, "step": 54480 }, { "epoch": 0.54, "learning_rate": 0.0003596842105263157, "loss": 0.6269, "step": 54490 }, { "epoch": 0.55, "learning_rate": 0.0003596052631578947, "loss": 0.6423, "step": 54500 }, { "epoch": 0.55, "learning_rate": 0.00035952631578947366, "loss": 0.6452, "step": 54510 }, { "epoch": 0.55, "learning_rate": 0.00035945526315789473, "loss": 0.644, "step": 54520 }, { "epoch": 0.55, "learning_rate": 0.00035937631578947363, "loss": 0.6432, "step": 54530 }, { "epoch": 0.55, "learning_rate": 0.00035929736842105263, "loss": 0.6419, "step": 54540 }, { "epoch": 0.55, "learning_rate": 0.0003592184210526315, "loss": 0.6584, "step": 54550 }, { "epoch": 0.55, "learning_rate": 0.00035913947368421047, "loss": 0.649, "step": 54560 }, { "epoch": 0.55, "learning_rate": 0.0003590605263157894, "loss": 0.6497, "step": 54570 }, { "epoch": 0.55, "learning_rate": 0.00035898157894736837, "loss": 0.6494, "step": 54580 }, { "epoch": 0.55, "learning_rate": 0.00035890263157894737, "loss": 0.6523, "step": 54590 }, { "epoch": 0.55, "learning_rate": 0.00035882368421052627, "loss": 0.6405, "step": 54600 }, { "epoch": 0.55, "learning_rate": 0.00035874473684210527, "loss": 0.6463, "step": 54610 }, { "epoch": 0.55, "learning_rate": 0.00035866578947368416, "loss": 0.6429, "step": 54620 }, { "epoch": 0.55, "learning_rate": 0.00035858684210526316, "loss": 0.6452, "step": 54630 }, { "epoch": 0.55, "learning_rate": 0.00035850789473684206, "loss": 0.639, "step": 54640 }, { "epoch": 0.55, "learning_rate": 0.000358428947368421, "loss": 0.6324, "step": 54650 }, { "epoch": 0.55, "learning_rate": 0.00035834999999999996, "loss": 0.6429, "step": 54660 }, { "epoch": 0.55, "learning_rate": 0.0003582710526315789, "loss": 0.6392, "step": 54670 }, { "epoch": 0.55, "learning_rate": 0.0003581921052631579, "loss": 0.6248, "step": 54680 }, { "epoch": 0.55, "learning_rate": 0.0003581131578947368, "loss": 0.6205, "step": 54690 }, { "epoch": 0.55, "learning_rate": 0.0003580342105263158, "loss": 0.6234, "step": 54700 }, { "epoch": 0.55, "learning_rate": 0.0003579552631578947, "loss": 0.6364, "step": 54710 }, { "epoch": 0.55, "learning_rate": 0.00035787631578947365, "loss": 0.6511, "step": 54720 }, { "epoch": 0.55, "learning_rate": 0.0003577973684210526, "loss": 0.6481, "step": 54730 }, { "epoch": 0.55, "learning_rate": 0.00035771842105263154, "loss": 0.6538, "step": 54740 }, { "epoch": 0.55, "learning_rate": 0.0003576394736842105, "loss": 0.6412, "step": 54750 }, { "epoch": 0.55, "learning_rate": 0.00035756052631578944, "loss": 0.6432, "step": 54760 }, { "epoch": 0.55, "learning_rate": 0.00035748157894736844, "loss": 0.656, "step": 54770 }, { "epoch": 0.55, "learning_rate": 0.00035740263157894734, "loss": 0.6436, "step": 54780 }, { "epoch": 0.55, "learning_rate": 0.0003573236842105263, "loss": 0.6444, "step": 54790 }, { "epoch": 0.55, "learning_rate": 0.00035724473684210523, "loss": 0.636, "step": 54800 }, { "epoch": 0.55, "learning_rate": 0.0003571657894736842, "loss": 0.6347, "step": 54810 }, { "epoch": 0.55, "learning_rate": 0.00035708684210526313, "loss": 0.6237, "step": 54820 }, { "epoch": 0.55, "learning_rate": 0.0003570078947368421, "loss": 0.6384, "step": 54830 }, { "epoch": 0.55, "learning_rate": 0.00035692894736842097, "loss": 0.6369, "step": 54840 }, { "epoch": 0.55, "learning_rate": 0.00035685, "loss": 0.649, "step": 54850 }, { "epoch": 0.55, "learning_rate": 0.0003567710526315789, "loss": 0.6448, "step": 54860 }, { "epoch": 0.55, "learning_rate": 0.00035669210526315787, "loss": 0.6391, "step": 54870 }, { "epoch": 0.55, "learning_rate": 0.0003566131578947368, "loss": 0.6459, "step": 54880 }, { "epoch": 0.55, "learning_rate": 0.00035653421052631577, "loss": 0.6546, "step": 54890 }, { "epoch": 0.55, "learning_rate": 0.0003564552631578947, "loss": 0.6441, "step": 54900 }, { "epoch": 0.55, "learning_rate": 0.0003563763157894736, "loss": 0.6526, "step": 54910 }, { "epoch": 0.55, "learning_rate": 0.0003562973684210526, "loss": 0.6456, "step": 54920 }, { "epoch": 0.55, "learning_rate": 0.0003562184210526315, "loss": 0.6433, "step": 54930 }, { "epoch": 0.55, "learning_rate": 0.0003561394736842105, "loss": 0.6512, "step": 54940 }, { "epoch": 0.55, "learning_rate": 0.00035606052631578946, "loss": 0.6423, "step": 54950 }, { "epoch": 0.55, "learning_rate": 0.0003559815789473684, "loss": 0.6436, "step": 54960 }, { "epoch": 0.55, "learning_rate": 0.00035590263157894735, "loss": 0.6341, "step": 54970 }, { "epoch": 0.55, "learning_rate": 0.00035582368421052625, "loss": 0.6416, "step": 54980 }, { "epoch": 0.55, "learning_rate": 0.00035574473684210525, "loss": 0.6439, "step": 54990 }, { "epoch": 0.55, "learning_rate": 0.00035566578947368414, "loss": 0.625, "step": 55000 }, { "epoch": 0.55, "eval_accuracy": 0.8664423590057567, "eval_loss": 0.62548828125, "eval_runtime": 97.6676, "eval_samples_per_second": 819.104, "eval_steps_per_second": 1.607, "step": 55000 }, { "epoch": 0.55, "learning_rate": 0.00035558684210526315, "loss": 0.644, "step": 55010 }, { "epoch": 0.55, "learning_rate": 0.00035550789473684204, "loss": 0.6439, "step": 55020 }, { "epoch": 0.55, "learning_rate": 0.00035542894736842104, "loss": 0.646, "step": 55030 }, { "epoch": 0.55, "learning_rate": 0.00035535, "loss": 0.6523, "step": 55040 }, { "epoch": 0.55, "learning_rate": 0.0003552710526315789, "loss": 0.6389, "step": 55050 }, { "epoch": 0.55, "learning_rate": 0.0003551921052631579, "loss": 0.6624, "step": 55060 }, { "epoch": 0.55, "learning_rate": 0.0003551131578947368, "loss": 0.6395, "step": 55070 }, { "epoch": 0.55, "learning_rate": 0.0003550342105263158, "loss": 0.644, "step": 55080 }, { "epoch": 0.55, "learning_rate": 0.0003549552631578947, "loss": 0.6422, "step": 55090 }, { "epoch": 0.55, "learning_rate": 0.0003548763157894737, "loss": 0.6481, "step": 55100 }, { "epoch": 0.55, "learning_rate": 0.0003547973684210526, "loss": 0.6472, "step": 55110 }, { "epoch": 0.55, "learning_rate": 0.0003547184210526316, "loss": 0.649, "step": 55120 }, { "epoch": 0.55, "learning_rate": 0.00035463947368421047, "loss": 0.6428, "step": 55130 }, { "epoch": 0.55, "learning_rate": 0.0003545605263157894, "loss": 0.6383, "step": 55140 }, { "epoch": 0.55, "learning_rate": 0.0003544815789473684, "loss": 0.6402, "step": 55150 }, { "epoch": 0.55, "learning_rate": 0.0003544026315789473, "loss": 0.6516, "step": 55160 }, { "epoch": 0.55, "learning_rate": 0.0003543236842105263, "loss": 0.6429, "step": 55170 }, { "epoch": 0.55, "learning_rate": 0.0003542447368421052, "loss": 0.6501, "step": 55180 }, { "epoch": 0.55, "learning_rate": 0.0003541657894736842, "loss": 0.647, "step": 55190 }, { "epoch": 0.55, "learning_rate": 0.0003540868421052631, "loss": 0.659, "step": 55200 }, { "epoch": 0.55, "learning_rate": 0.00035400789473684206, "loss": 0.6417, "step": 55210 }, { "epoch": 0.55, "learning_rate": 0.000353928947368421, "loss": 0.6509, "step": 55220 }, { "epoch": 0.55, "learning_rate": 0.00035384999999999995, "loss": 0.6471, "step": 55230 }, { "epoch": 0.55, "learning_rate": 0.00035377105263157896, "loss": 0.6479, "step": 55240 }, { "epoch": 0.55, "learning_rate": 0.00035369210526315785, "loss": 0.6385, "step": 55250 }, { "epoch": 0.55, "learning_rate": 0.00035361315789473685, "loss": 0.6525, "step": 55260 }, { "epoch": 0.55, "learning_rate": 0.00035353421052631575, "loss": 0.6489, "step": 55270 }, { "epoch": 0.55, "learning_rate": 0.0003534552631578947, "loss": 0.6523, "step": 55280 }, { "epoch": 0.55, "learning_rate": 0.00035337631578947364, "loss": 0.657, "step": 55290 }, { "epoch": 0.55, "learning_rate": 0.0003532973684210526, "loss": 0.6315, "step": 55300 }, { "epoch": 0.55, "learning_rate": 0.00035321842105263154, "loss": 0.6426, "step": 55310 }, { "epoch": 0.55, "learning_rate": 0.0003531394736842105, "loss": 0.6449, "step": 55320 }, { "epoch": 0.55, "learning_rate": 0.0003530605263157895, "loss": 0.6538, "step": 55330 }, { "epoch": 0.55, "learning_rate": 0.0003529815789473684, "loss": 0.6431, "step": 55340 }, { "epoch": 0.55, "learning_rate": 0.00035290263157894733, "loss": 0.6439, "step": 55350 }, { "epoch": 0.55, "learning_rate": 0.0003528236842105263, "loss": 0.6624, "step": 55360 }, { "epoch": 0.55, "learning_rate": 0.00035274473684210523, "loss": 0.6426, "step": 55370 }, { "epoch": 0.55, "learning_rate": 0.0003526657894736842, "loss": 0.6441, "step": 55380 }, { "epoch": 0.55, "learning_rate": 0.00035258684210526313, "loss": 0.6522, "step": 55390 }, { "epoch": 0.55, "learning_rate": 0.000352507894736842, "loss": 0.6543, "step": 55400 }, { "epoch": 0.55, "learning_rate": 0.000352428947368421, "loss": 0.6452, "step": 55410 }, { "epoch": 0.55, "learning_rate": 0.00035234999999999997, "loss": 0.6691, "step": 55420 }, { "epoch": 0.55, "learning_rate": 0.0003522710526315789, "loss": 0.6552, "step": 55430 }, { "epoch": 0.55, "learning_rate": 0.00035219210526315787, "loss": 0.6545, "step": 55440 }, { "epoch": 0.55, "learning_rate": 0.0003521131578947368, "loss": 0.6435, "step": 55450 }, { "epoch": 0.55, "learning_rate": 0.00035203421052631577, "loss": 0.6458, "step": 55460 }, { "epoch": 0.55, "learning_rate": 0.00035195526315789466, "loss": 0.6472, "step": 55470 }, { "epoch": 0.55, "learning_rate": 0.00035187631578947366, "loss": 0.6469, "step": 55480 }, { "epoch": 0.55, "learning_rate": 0.00035179736842105256, "loss": 0.6351, "step": 55490 }, { "epoch": 0.56, "learning_rate": 0.00035171842105263156, "loss": 0.6439, "step": 55500 }, { "epoch": 0.56, "learning_rate": 0.0003516394736842105, "loss": 0.6513, "step": 55510 }, { "epoch": 0.56, "learning_rate": 0.00035156052631578946, "loss": 0.6383, "step": 55520 }, { "epoch": 0.56, "learning_rate": 0.0003514815789473684, "loss": 0.6356, "step": 55530 }, { "epoch": 0.56, "learning_rate": 0.0003514026315789473, "loss": 0.6444, "step": 55540 }, { "epoch": 0.56, "learning_rate": 0.0003513236842105263, "loss": 0.6421, "step": 55550 }, { "epoch": 0.56, "learning_rate": 0.0003512447368421052, "loss": 0.6535, "step": 55560 }, { "epoch": 0.56, "learning_rate": 0.0003511657894736842, "loss": 0.6477, "step": 55570 }, { "epoch": 0.56, "learning_rate": 0.0003510868421052631, "loss": 0.6432, "step": 55580 }, { "epoch": 0.56, "learning_rate": 0.0003510078947368421, "loss": 0.6337, "step": 55590 }, { "epoch": 0.56, "learning_rate": 0.00035092894736842104, "loss": 0.6365, "step": 55600 }, { "epoch": 0.56, "learning_rate": 0.00035085, "loss": 0.6457, "step": 55610 }, { "epoch": 0.56, "learning_rate": 0.00035077105263157894, "loss": 0.6544, "step": 55620 }, { "epoch": 0.56, "learning_rate": 0.00035069210526315783, "loss": 0.6516, "step": 55630 }, { "epoch": 0.56, "learning_rate": 0.00035061315789473683, "loss": 0.6408, "step": 55640 }, { "epoch": 0.56, "learning_rate": 0.00035053421052631573, "loss": 0.6473, "step": 55650 }, { "epoch": 0.56, "learning_rate": 0.00035045526315789473, "loss": 0.6427, "step": 55660 }, { "epoch": 0.56, "learning_rate": 0.0003503763157894736, "loss": 0.6376, "step": 55670 }, { "epoch": 0.56, "learning_rate": 0.00035029736842105263, "loss": 0.6378, "step": 55680 }, { "epoch": 0.56, "learning_rate": 0.0003502184210526316, "loss": 0.6464, "step": 55690 }, { "epoch": 0.56, "learning_rate": 0.00035013947368421047, "loss": 0.6271, "step": 55700 }, { "epoch": 0.56, "learning_rate": 0.0003500605263157895, "loss": 0.6344, "step": 55710 }, { "epoch": 0.56, "learning_rate": 0.00034998157894736837, "loss": 0.648, "step": 55720 }, { "epoch": 0.56, "learning_rate": 0.00034990263157894737, "loss": 0.643, "step": 55730 }, { "epoch": 0.56, "learning_rate": 0.00034982368421052626, "loss": 0.631, "step": 55740 }, { "epoch": 0.56, "learning_rate": 0.00034974473684210527, "loss": 0.6502, "step": 55750 }, { "epoch": 0.56, "learning_rate": 0.00034966578947368416, "loss": 0.6507, "step": 55760 }, { "epoch": 0.56, "learning_rate": 0.0003495868421052631, "loss": 0.6426, "step": 55770 }, { "epoch": 0.56, "learning_rate": 0.00034950789473684206, "loss": 0.6424, "step": 55780 }, { "epoch": 0.56, "learning_rate": 0.000349428947368421, "loss": 0.6392, "step": 55790 }, { "epoch": 0.56, "learning_rate": 0.00034935, "loss": 0.6421, "step": 55800 }, { "epoch": 0.56, "learning_rate": 0.0003492710526315789, "loss": 0.6324, "step": 55810 }, { "epoch": 0.56, "learning_rate": 0.0003491921052631579, "loss": 0.6444, "step": 55820 }, { "epoch": 0.56, "learning_rate": 0.0003491131578947368, "loss": 0.6447, "step": 55830 }, { "epoch": 0.56, "learning_rate": 0.00034903421052631575, "loss": 0.6435, "step": 55840 }, { "epoch": 0.56, "learning_rate": 0.0003489552631578947, "loss": 0.644, "step": 55850 }, { "epoch": 0.56, "learning_rate": 0.00034887631578947364, "loss": 0.646, "step": 55860 }, { "epoch": 0.56, "learning_rate": 0.0003487973684210526, "loss": 0.6475, "step": 55870 }, { "epoch": 0.56, "learning_rate": 0.00034871842105263154, "loss": 0.6389, "step": 55880 }, { "epoch": 0.56, "learning_rate": 0.00034863947368421054, "loss": 0.6506, "step": 55890 }, { "epoch": 0.56, "learning_rate": 0.00034856052631578944, "loss": 0.6348, "step": 55900 }, { "epoch": 0.56, "learning_rate": 0.0003484815789473684, "loss": 0.6432, "step": 55910 }, { "epoch": 0.56, "learning_rate": 0.00034840263157894733, "loss": 0.6391, "step": 55920 }, { "epoch": 0.56, "learning_rate": 0.0003483236842105263, "loss": 0.6329, "step": 55930 }, { "epoch": 0.56, "learning_rate": 0.00034824473684210523, "loss": 0.6301, "step": 55940 }, { "epoch": 0.56, "learning_rate": 0.0003481657894736842, "loss": 0.6469, "step": 55950 }, { "epoch": 0.56, "learning_rate": 0.00034808684210526307, "loss": 0.6357, "step": 55960 }, { "epoch": 0.56, "learning_rate": 0.0003480078947368421, "loss": 0.6221, "step": 55970 }, { "epoch": 0.56, "learning_rate": 0.0003479289473684211, "loss": 0.6339, "step": 55980 }, { "epoch": 0.56, "learning_rate": 0.00034784999999999997, "loss": 0.6337, "step": 55990 }, { "epoch": 0.56, "learning_rate": 0.0003477710526315789, "loss": 0.6332, "step": 56000 }, { "epoch": 0.56, "learning_rate": 0.00034769210526315787, "loss": 0.6302, "step": 56010 }, { "epoch": 0.56, "learning_rate": 0.0003476131578947368, "loss": 0.6371, "step": 56020 }, { "epoch": 0.56, "learning_rate": 0.0003475342105263157, "loss": 0.6307, "step": 56030 }, { "epoch": 0.56, "learning_rate": 0.0003474552631578947, "loss": 0.639, "step": 56040 }, { "epoch": 0.56, "learning_rate": 0.0003473763157894736, "loss": 0.6372, "step": 56050 }, { "epoch": 0.56, "learning_rate": 0.0003472973684210526, "loss": 0.6372, "step": 56060 }, { "epoch": 0.56, "learning_rate": 0.00034721842105263156, "loss": 0.63, "step": 56070 }, { "epoch": 0.56, "learning_rate": 0.0003471394736842105, "loss": 0.632, "step": 56080 }, { "epoch": 0.56, "learning_rate": 0.00034706052631578945, "loss": 0.6431, "step": 56090 }, { "epoch": 0.56, "learning_rate": 0.0003469815789473684, "loss": 0.6313, "step": 56100 }, { "epoch": 0.56, "learning_rate": 0.00034690263157894735, "loss": 0.6261, "step": 56110 }, { "epoch": 0.56, "learning_rate": 0.00034682368421052625, "loss": 0.6483, "step": 56120 }, { "epoch": 0.56, "learning_rate": 0.00034674473684210525, "loss": 0.6419, "step": 56130 }, { "epoch": 0.56, "learning_rate": 0.00034666578947368414, "loss": 0.6399, "step": 56140 }, { "epoch": 0.56, "learning_rate": 0.00034658684210526314, "loss": 0.6251, "step": 56150 }, { "epoch": 0.56, "learning_rate": 0.0003465078947368421, "loss": 0.6423, "step": 56160 }, { "epoch": 0.56, "learning_rate": 0.00034642894736842104, "loss": 0.6186, "step": 56170 }, { "epoch": 0.56, "learning_rate": 0.00034635, "loss": 0.641, "step": 56180 }, { "epoch": 0.56, "learning_rate": 0.0003462710526315789, "loss": 0.6452, "step": 56190 }, { "epoch": 0.56, "learning_rate": 0.0003461921052631579, "loss": 0.6445, "step": 56200 }, { "epoch": 0.56, "learning_rate": 0.0003461131578947368, "loss": 0.6446, "step": 56210 }, { "epoch": 0.56, "learning_rate": 0.0003460342105263158, "loss": 0.629, "step": 56220 }, { "epoch": 0.56, "learning_rate": 0.0003459552631578947, "loss": 0.644, "step": 56230 }, { "epoch": 0.56, "learning_rate": 0.0003458763157894737, "loss": 0.6362, "step": 56240 }, { "epoch": 0.56, "learning_rate": 0.00034579736842105263, "loss": 0.6241, "step": 56250 }, { "epoch": 0.56, "learning_rate": 0.0003457184210526315, "loss": 0.6325, "step": 56260 }, { "epoch": 0.56, "learning_rate": 0.0003456394736842105, "loss": 0.6376, "step": 56270 }, { "epoch": 0.56, "learning_rate": 0.0003455605263157894, "loss": 0.6394, "step": 56280 }, { "epoch": 0.56, "learning_rate": 0.0003454815789473684, "loss": 0.6292, "step": 56290 }, { "epoch": 0.56, "learning_rate": 0.0003454026315789473, "loss": 0.6353, "step": 56300 }, { "epoch": 0.56, "learning_rate": 0.0003453236842105263, "loss": 0.6216, "step": 56310 }, { "epoch": 0.56, "learning_rate": 0.0003452447368421052, "loss": 0.6399, "step": 56320 }, { "epoch": 0.56, "learning_rate": 0.00034516578947368416, "loss": 0.6352, "step": 56330 }, { "epoch": 0.56, "learning_rate": 0.0003450868421052631, "loss": 0.6526, "step": 56340 }, { "epoch": 0.56, "learning_rate": 0.0003450157894736842, "loss": 0.6381, "step": 56350 }, { "epoch": 0.56, "learning_rate": 0.00034493684210526313, "loss": 0.6281, "step": 56360 }, { "epoch": 0.56, "learning_rate": 0.0003448578947368421, "loss": 0.6319, "step": 56370 }, { "epoch": 0.56, "learning_rate": 0.000344778947368421, "loss": 0.6304, "step": 56380 }, { "epoch": 0.56, "learning_rate": 0.0003447, "loss": 0.6291, "step": 56390 }, { "epoch": 0.56, "learning_rate": 0.00034462105263157887, "loss": 0.6208, "step": 56400 }, { "epoch": 0.56, "learning_rate": 0.00034454210526315787, "loss": 0.6203, "step": 56410 }, { "epoch": 0.56, "learning_rate": 0.0003444631578947368, "loss": 0.6251, "step": 56420 }, { "epoch": 0.56, "learning_rate": 0.00034438421052631577, "loss": 0.6147, "step": 56430 }, { "epoch": 0.56, "learning_rate": 0.0003443052631578947, "loss": 0.6122, "step": 56440 }, { "epoch": 0.56, "learning_rate": 0.00034422631578947367, "loss": 0.6184, "step": 56450 }, { "epoch": 0.56, "learning_rate": 0.0003441473684210526, "loss": 0.6262, "step": 56460 }, { "epoch": 0.56, "learning_rate": 0.0003440684210526315, "loss": 0.6301, "step": 56470 }, { "epoch": 0.56, "learning_rate": 0.0003439894736842105, "loss": 0.61, "step": 56480 }, { "epoch": 0.56, "learning_rate": 0.0003439105263157894, "loss": 0.6155, "step": 56490 }, { "epoch": 0.56, "learning_rate": 0.0003438315789473684, "loss": 0.6194, "step": 56500 }, { "epoch": 0.57, "learning_rate": 0.00034375263157894736, "loss": 0.6331, "step": 56510 }, { "epoch": 0.57, "learning_rate": 0.0003436736842105263, "loss": 0.6275, "step": 56520 }, { "epoch": 0.57, "learning_rate": 0.00034359473684210525, "loss": 0.6382, "step": 56530 }, { "epoch": 0.57, "learning_rate": 0.00034351578947368415, "loss": 0.61, "step": 56540 }, { "epoch": 0.57, "learning_rate": 0.00034343684210526315, "loss": 0.6213, "step": 56550 }, { "epoch": 0.57, "learning_rate": 0.00034335789473684204, "loss": 0.6242, "step": 56560 }, { "epoch": 0.57, "learning_rate": 0.00034327894736842105, "loss": 0.6303, "step": 56570 }, { "epoch": 0.57, "learning_rate": 0.00034319999999999994, "loss": 0.6167, "step": 56580 }, { "epoch": 0.57, "learning_rate": 0.00034312105263157894, "loss": 0.6226, "step": 56590 }, { "epoch": 0.57, "learning_rate": 0.0003430421052631579, "loss": 0.6151, "step": 56600 }, { "epoch": 0.57, "learning_rate": 0.0003429631578947368, "loss": 0.6189, "step": 56610 }, { "epoch": 0.57, "learning_rate": 0.0003428842105263158, "loss": 0.6126, "step": 56620 }, { "epoch": 0.57, "learning_rate": 0.0003428052631578947, "loss": 0.6307, "step": 56630 }, { "epoch": 0.57, "learning_rate": 0.0003427263157894737, "loss": 0.6203, "step": 56640 }, { "epoch": 0.57, "learning_rate": 0.0003426473684210526, "loss": 0.6186, "step": 56650 }, { "epoch": 0.57, "learning_rate": 0.0003425684210526316, "loss": 0.6192, "step": 56660 }, { "epoch": 0.57, "learning_rate": 0.0003424894736842105, "loss": 0.6345, "step": 56670 }, { "epoch": 0.57, "learning_rate": 0.0003424105263157894, "loss": 0.6189, "step": 56680 }, { "epoch": 0.57, "learning_rate": 0.0003423315789473684, "loss": 0.6174, "step": 56690 }, { "epoch": 0.57, "learning_rate": 0.0003422526315789473, "loss": 0.6186, "step": 56700 }, { "epoch": 0.57, "learning_rate": 0.0003421736842105263, "loss": 0.6288, "step": 56710 }, { "epoch": 0.57, "learning_rate": 0.0003420947368421052, "loss": 0.61, "step": 56720 }, { "epoch": 0.57, "learning_rate": 0.0003420157894736842, "loss": 0.6311, "step": 56730 }, { "epoch": 0.57, "learning_rate": 0.0003419368421052631, "loss": 0.6098, "step": 56740 }, { "epoch": 0.57, "learning_rate": 0.00034185789473684206, "loss": 0.6229, "step": 56750 }, { "epoch": 0.57, "learning_rate": 0.000341778947368421, "loss": 0.6144, "step": 56760 }, { "epoch": 0.57, "learning_rate": 0.00034169999999999996, "loss": 0.6202, "step": 56770 }, { "epoch": 0.57, "learning_rate": 0.0003416210526315789, "loss": 0.6269, "step": 56780 }, { "epoch": 0.57, "learning_rate": 0.00034154210526315785, "loss": 0.6207, "step": 56790 }, { "epoch": 0.57, "learning_rate": 0.00034146315789473686, "loss": 0.6011, "step": 56800 }, { "epoch": 0.57, "learning_rate": 0.00034138421052631575, "loss": 0.6114, "step": 56810 }, { "epoch": 0.57, "learning_rate": 0.00034130526315789475, "loss": 0.6172, "step": 56820 }, { "epoch": 0.57, "learning_rate": 0.00034122631578947365, "loss": 0.619, "step": 56830 }, { "epoch": 0.57, "learning_rate": 0.0003411473684210526, "loss": 0.6259, "step": 56840 }, { "epoch": 0.57, "learning_rate": 0.00034106842105263154, "loss": 0.6353, "step": 56850 }, { "epoch": 0.57, "learning_rate": 0.0003409894736842105, "loss": 0.6145, "step": 56860 }, { "epoch": 0.57, "learning_rate": 0.0003409105263157894, "loss": 0.6306, "step": 56870 }, { "epoch": 0.57, "learning_rate": 0.0003408315789473684, "loss": 0.6264, "step": 56880 }, { "epoch": 0.57, "learning_rate": 0.0003407526315789474, "loss": 0.636, "step": 56890 }, { "epoch": 0.57, "learning_rate": 0.0003406736842105263, "loss": 0.6438, "step": 56900 }, { "epoch": 0.57, "learning_rate": 0.00034059473684210523, "loss": 0.6417, "step": 56910 }, { "epoch": 0.57, "learning_rate": 0.0003405157894736842, "loss": 0.6303, "step": 56920 }, { "epoch": 0.57, "learning_rate": 0.00034043684210526313, "loss": 0.6296, "step": 56930 }, { "epoch": 0.57, "learning_rate": 0.0003403578947368421, "loss": 0.6394, "step": 56940 }, { "epoch": 0.57, "learning_rate": 0.000340278947368421, "loss": 0.6436, "step": 56950 }, { "epoch": 0.57, "learning_rate": 0.0003401999999999999, "loss": 0.6471, "step": 56960 }, { "epoch": 0.57, "learning_rate": 0.0003401210526315789, "loss": 0.645, "step": 56970 }, { "epoch": 0.57, "learning_rate": 0.00034004210526315787, "loss": 0.6309, "step": 56980 }, { "epoch": 0.57, "learning_rate": 0.0003399631578947368, "loss": 0.6183, "step": 56990 }, { "epoch": 0.57, "learning_rate": 0.00033988421052631577, "loss": 0.6312, "step": 57000 }, { "epoch": 0.57, "learning_rate": 0.0003398052631578947, "loss": 0.6371, "step": 57010 }, { "epoch": 0.57, "learning_rate": 0.00033972631578947366, "loss": 0.6384, "step": 57020 }, { "epoch": 0.57, "learning_rate": 0.00033964736842105256, "loss": 0.6484, "step": 57030 }, { "epoch": 0.57, "learning_rate": 0.00033956842105263156, "loss": 0.6298, "step": 57040 }, { "epoch": 0.57, "learning_rate": 0.00033948947368421046, "loss": 0.6307, "step": 57050 }, { "epoch": 0.57, "learning_rate": 0.00033941052631578946, "loss": 0.6307, "step": 57060 }, { "epoch": 0.57, "learning_rate": 0.0003393315789473684, "loss": 0.6244, "step": 57070 }, { "epoch": 0.57, "learning_rate": 0.00033925263157894735, "loss": 0.631, "step": 57080 }, { "epoch": 0.57, "learning_rate": 0.0003391736842105263, "loss": 0.638, "step": 57090 }, { "epoch": 0.57, "learning_rate": 0.0003390947368421052, "loss": 0.6357, "step": 57100 }, { "epoch": 0.57, "learning_rate": 0.0003390157894736842, "loss": 0.6357, "step": 57110 }, { "epoch": 0.57, "learning_rate": 0.0003389368421052631, "loss": 0.6268, "step": 57120 }, { "epoch": 0.57, "learning_rate": 0.0003388578947368421, "loss": 0.6304, "step": 57130 }, { "epoch": 0.57, "learning_rate": 0.000338778947368421, "loss": 0.6377, "step": 57140 }, { "epoch": 0.57, "learning_rate": 0.0003387, "loss": 0.6362, "step": 57150 }, { "epoch": 0.57, "learning_rate": 0.00033862105263157894, "loss": 0.6273, "step": 57160 }, { "epoch": 0.57, "learning_rate": 0.00033854210526315784, "loss": 0.6422, "step": 57170 }, { "epoch": 0.57, "learning_rate": 0.00033846315789473684, "loss": 0.6449, "step": 57180 }, { "epoch": 0.57, "learning_rate": 0.00033838421052631573, "loss": 0.6322, "step": 57190 }, { "epoch": 0.57, "learning_rate": 0.00033830526315789473, "loss": 0.6326, "step": 57200 }, { "epoch": 0.57, "learning_rate": 0.00033822631578947363, "loss": 0.6288, "step": 57210 }, { "epoch": 0.57, "learning_rate": 0.00033814736842105263, "loss": 0.6461, "step": 57220 }, { "epoch": 0.57, "learning_rate": 0.0003380684210526315, "loss": 0.637, "step": 57230 }, { "epoch": 0.57, "learning_rate": 0.0003379894736842105, "loss": 0.637, "step": 57240 }, { "epoch": 0.57, "learning_rate": 0.0003379105263157895, "loss": 0.6544, "step": 57250 }, { "epoch": 0.57, "learning_rate": 0.00033783157894736837, "loss": 0.6205, "step": 57260 }, { "epoch": 0.57, "learning_rate": 0.00033775263157894737, "loss": 0.6239, "step": 57270 }, { "epoch": 0.57, "learning_rate": 0.00033767368421052627, "loss": 0.6405, "step": 57280 }, { "epoch": 0.57, "learning_rate": 0.00033759473684210527, "loss": 0.6491, "step": 57290 }, { "epoch": 0.57, "learning_rate": 0.00033751578947368416, "loss": 0.636, "step": 57300 }, { "epoch": 0.57, "learning_rate": 0.00033743684210526317, "loss": 0.633, "step": 57310 }, { "epoch": 0.57, "learning_rate": 0.00033735789473684206, "loss": 0.6305, "step": 57320 }, { "epoch": 0.57, "learning_rate": 0.000337278947368421, "loss": 0.634, "step": 57330 }, { "epoch": 0.57, "learning_rate": 0.0003372, "loss": 0.6291, "step": 57340 }, { "epoch": 0.57, "learning_rate": 0.0003371210526315789, "loss": 0.6426, "step": 57350 }, { "epoch": 0.57, "learning_rate": 0.0003370421052631579, "loss": 0.6482, "step": 57360 }, { "epoch": 0.57, "learning_rate": 0.0003369631578947368, "loss": 0.6401, "step": 57370 }, { "epoch": 0.57, "learning_rate": 0.0003368842105263158, "loss": 0.6438, "step": 57380 }, { "epoch": 0.57, "learning_rate": 0.0003368052631578947, "loss": 0.6434, "step": 57390 }, { "epoch": 0.57, "learning_rate": 0.00033672631578947365, "loss": 0.6377, "step": 57400 }, { "epoch": 0.57, "learning_rate": 0.0003366473684210526, "loss": 0.6433, "step": 57410 }, { "epoch": 0.57, "learning_rate": 0.00033656842105263154, "loss": 0.6356, "step": 57420 }, { "epoch": 0.57, "learning_rate": 0.0003364894736842105, "loss": 0.6279, "step": 57430 }, { "epoch": 0.57, "learning_rate": 0.00033641052631578944, "loss": 0.634, "step": 57440 }, { "epoch": 0.57, "learning_rate": 0.00033633157894736844, "loss": 0.6353, "step": 57450 }, { "epoch": 0.57, "learning_rate": 0.00033625263157894734, "loss": 0.6357, "step": 57460 }, { "epoch": 0.57, "learning_rate": 0.0003361736842105263, "loss": 0.6419, "step": 57470 }, { "epoch": 0.57, "learning_rate": 0.00033609473684210523, "loss": 0.6388, "step": 57480 }, { "epoch": 0.57, "learning_rate": 0.0003360157894736842, "loss": 0.6252, "step": 57490 }, { "epoch": 0.57, "learning_rate": 0.00033593684210526313, "loss": 0.6428, "step": 57500 }, { "epoch": 0.57, "eval_accuracy": 0.8676777163075269, "eval_loss": 0.61767578125, "eval_runtime": 96.8045, "eval_samples_per_second": 826.408, "eval_steps_per_second": 1.622, "step": 57500 }, { "epoch": 0.58, "learning_rate": 0.0003358578947368421, "loss": 0.6304, "step": 57510 }, { "epoch": 0.58, "learning_rate": 0.00033577894736842097, "loss": 0.6299, "step": 57520 }, { "epoch": 0.58, "learning_rate": 0.0003357, "loss": 0.6365, "step": 57530 }, { "epoch": 0.58, "learning_rate": 0.0003356210526315789, "loss": 0.6266, "step": 57540 }, { "epoch": 0.58, "learning_rate": 0.00033554210526315787, "loss": 0.6205, "step": 57550 }, { "epoch": 0.58, "learning_rate": 0.0003354631578947368, "loss": 0.6284, "step": 57560 }, { "epoch": 0.58, "learning_rate": 0.00033538421052631577, "loss": 0.6286, "step": 57570 }, { "epoch": 0.58, "learning_rate": 0.0003353052631578947, "loss": 0.6389, "step": 57580 }, { "epoch": 0.58, "learning_rate": 0.0003352263157894736, "loss": 0.6443, "step": 57590 }, { "epoch": 0.58, "learning_rate": 0.0003351473684210526, "loss": 0.6421, "step": 57600 }, { "epoch": 0.58, "learning_rate": 0.0003350684210526315, "loss": 0.6325, "step": 57610 }, { "epoch": 0.58, "learning_rate": 0.0003349894736842105, "loss": 0.6377, "step": 57620 }, { "epoch": 0.58, "learning_rate": 0.00033491052631578946, "loss": 0.6283, "step": 57630 }, { "epoch": 0.58, "learning_rate": 0.0003348315789473684, "loss": 0.6312, "step": 57640 }, { "epoch": 0.58, "learning_rate": 0.00033475263157894735, "loss": 0.6421, "step": 57650 }, { "epoch": 0.58, "learning_rate": 0.00033467368421052625, "loss": 0.6334, "step": 57660 }, { "epoch": 0.58, "learning_rate": 0.00033459473684210525, "loss": 0.6284, "step": 57670 }, { "epoch": 0.58, "learning_rate": 0.00033451578947368414, "loss": 0.6297, "step": 57680 }, { "epoch": 0.58, "learning_rate": 0.00033443684210526315, "loss": 0.6359, "step": 57690 }, { "epoch": 0.58, "learning_rate": 0.00033435789473684204, "loss": 0.6484, "step": 57700 }, { "epoch": 0.58, "learning_rate": 0.00033427894736842104, "loss": 0.6333, "step": 57710 }, { "epoch": 0.58, "learning_rate": 0.0003342, "loss": 0.6478, "step": 57720 }, { "epoch": 0.58, "learning_rate": 0.0003341210526315789, "loss": 0.6257, "step": 57730 }, { "epoch": 0.58, "learning_rate": 0.0003340421052631579, "loss": 0.6177, "step": 57740 }, { "epoch": 0.58, "learning_rate": 0.0003339631578947368, "loss": 0.6136, "step": 57750 }, { "epoch": 0.58, "learning_rate": 0.0003338842105263158, "loss": 0.6088, "step": 57760 }, { "epoch": 0.58, "learning_rate": 0.0003338052631578947, "loss": 0.6189, "step": 57770 }, { "epoch": 0.58, "learning_rate": 0.0003337263157894737, "loss": 0.6258, "step": 57780 }, { "epoch": 0.58, "learning_rate": 0.0003336473684210526, "loss": 0.623, "step": 57790 }, { "epoch": 0.58, "learning_rate": 0.0003335684210526316, "loss": 0.6204, "step": 57800 }, { "epoch": 0.58, "learning_rate": 0.0003334894736842105, "loss": 0.6175, "step": 57810 }, { "epoch": 0.58, "learning_rate": 0.0003334105263157894, "loss": 0.6306, "step": 57820 }, { "epoch": 0.58, "learning_rate": 0.0003333315789473684, "loss": 0.641, "step": 57830 }, { "epoch": 0.58, "learning_rate": 0.0003332526315789473, "loss": 0.6432, "step": 57840 }, { "epoch": 0.58, "learning_rate": 0.0003331736842105263, "loss": 0.6333, "step": 57850 }, { "epoch": 0.58, "learning_rate": 0.0003330947368421052, "loss": 0.6337, "step": 57860 }, { "epoch": 0.58, "learning_rate": 0.0003330157894736842, "loss": 0.6331, "step": 57870 }, { "epoch": 0.58, "learning_rate": 0.0003329368421052631, "loss": 0.6361, "step": 57880 }, { "epoch": 0.58, "learning_rate": 0.00033285789473684206, "loss": 0.6286, "step": 57890 }, { "epoch": 0.58, "learning_rate": 0.00033277894736842106, "loss": 0.6331, "step": 57900 }, { "epoch": 0.58, "learning_rate": 0.00033269999999999996, "loss": 0.6294, "step": 57910 }, { "epoch": 0.58, "learning_rate": 0.00033262105263157896, "loss": 0.6184, "step": 57920 }, { "epoch": 0.58, "learning_rate": 0.00033254210526315785, "loss": 0.6229, "step": 57930 }, { "epoch": 0.58, "learning_rate": 0.00033246315789473685, "loss": 0.6278, "step": 57940 }, { "epoch": 0.58, "learning_rate": 0.00033238421052631575, "loss": 0.6325, "step": 57950 }, { "epoch": 0.58, "learning_rate": 0.0003323052631578947, "loss": 0.6175, "step": 57960 }, { "epoch": 0.58, "learning_rate": 0.00033222631578947365, "loss": 0.6318, "step": 57970 }, { "epoch": 0.58, "learning_rate": 0.0003321473684210526, "loss": 0.6412, "step": 57980 }, { "epoch": 0.58, "learning_rate": 0.00033206842105263154, "loss": 0.6204, "step": 57990 }, { "epoch": 0.58, "learning_rate": 0.0003319894736842105, "loss": 0.6178, "step": 58000 }, { "epoch": 0.58, "learning_rate": 0.0003319105263157895, "loss": 0.631, "step": 58010 }, { "epoch": 0.58, "learning_rate": 0.0003318315789473684, "loss": 0.6335, "step": 58020 }, { "epoch": 0.58, "learning_rate": 0.00033175263157894734, "loss": 0.629, "step": 58030 }, { "epoch": 0.58, "learning_rate": 0.0003316736842105263, "loss": 0.6225, "step": 58040 }, { "epoch": 0.58, "learning_rate": 0.00033159473684210523, "loss": 0.6384, "step": 58050 }, { "epoch": 0.58, "learning_rate": 0.0003315157894736842, "loss": 0.6271, "step": 58060 }, { "epoch": 0.58, "learning_rate": 0.00033143684210526313, "loss": 0.6272, "step": 58070 }, { "epoch": 0.58, "learning_rate": 0.000331357894736842, "loss": 0.6344, "step": 58080 }, { "epoch": 0.58, "learning_rate": 0.000331278947368421, "loss": 0.6287, "step": 58090 }, { "epoch": 0.58, "learning_rate": 0.0003312, "loss": 0.6213, "step": 58100 }, { "epoch": 0.58, "learning_rate": 0.0003311210526315789, "loss": 0.6273, "step": 58110 }, { "epoch": 0.58, "learning_rate": 0.00033104210526315787, "loss": 0.63, "step": 58120 }, { "epoch": 0.58, "learning_rate": 0.0003309631578947368, "loss": 0.6309, "step": 58130 }, { "epoch": 0.58, "learning_rate": 0.00033088421052631577, "loss": 0.6349, "step": 58140 }, { "epoch": 0.58, "learning_rate": 0.00033080526315789466, "loss": 0.6352, "step": 58150 }, { "epoch": 0.58, "learning_rate": 0.00033072631578947366, "loss": 0.6297, "step": 58160 }, { "epoch": 0.58, "learning_rate": 0.00033064736842105256, "loss": 0.6297, "step": 58170 }, { "epoch": 0.58, "learning_rate": 0.00033056842105263156, "loss": 0.6383, "step": 58180 }, { "epoch": 0.58, "learning_rate": 0.0003304894736842105, "loss": 0.6287, "step": 58190 }, { "epoch": 0.58, "learning_rate": 0.00033041052631578946, "loss": 0.6298, "step": 58200 }, { "epoch": 0.58, "learning_rate": 0.0003303315789473684, "loss": 0.6346, "step": 58210 }, { "epoch": 0.58, "learning_rate": 0.0003302526315789473, "loss": 0.6338, "step": 58220 }, { "epoch": 0.58, "learning_rate": 0.0003301736842105263, "loss": 0.6466, "step": 58230 }, { "epoch": 0.58, "learning_rate": 0.0003300947368421052, "loss": 0.6371, "step": 58240 }, { "epoch": 0.58, "learning_rate": 0.0003300157894736842, "loss": 0.6309, "step": 58250 }, { "epoch": 0.58, "learning_rate": 0.0003299368421052631, "loss": 0.6383, "step": 58260 }, { "epoch": 0.58, "learning_rate": 0.0003298578947368421, "loss": 0.6353, "step": 58270 }, { "epoch": 0.58, "learning_rate": 0.00032977894736842104, "loss": 0.6262, "step": 58280 }, { "epoch": 0.58, "learning_rate": 0.0003297, "loss": 0.6326, "step": 58290 }, { "epoch": 0.58, "learning_rate": 0.00032962105263157894, "loss": 0.6334, "step": 58300 }, { "epoch": 0.58, "learning_rate": 0.00032954210526315783, "loss": 0.6404, "step": 58310 }, { "epoch": 0.58, "learning_rate": 0.00032946315789473684, "loss": 0.6286, "step": 58320 }, { "epoch": 0.58, "learning_rate": 0.00032938421052631573, "loss": 0.6268, "step": 58330 }, { "epoch": 0.58, "learning_rate": 0.00032930526315789473, "loss": 0.6331, "step": 58340 }, { "epoch": 0.58, "learning_rate": 0.00032923421052631575, "loss": 0.6318, "step": 58350 }, { "epoch": 0.58, "learning_rate": 0.00032915526315789476, "loss": 0.6397, "step": 58360 }, { "epoch": 0.58, "learning_rate": 0.00032907631578947365, "loss": 0.6216, "step": 58370 }, { "epoch": 0.58, "learning_rate": 0.0003289973684210526, "loss": 0.6413, "step": 58380 }, { "epoch": 0.58, "learning_rate": 0.00032891842105263155, "loss": 0.6298, "step": 58390 }, { "epoch": 0.58, "learning_rate": 0.0003288394736842105, "loss": 0.6345, "step": 58400 }, { "epoch": 0.58, "learning_rate": 0.00032876052631578944, "loss": 0.6322, "step": 58410 }, { "epoch": 0.58, "learning_rate": 0.0003286815789473684, "loss": 0.638, "step": 58420 }, { "epoch": 0.58, "learning_rate": 0.0003286026315789474, "loss": 0.6288, "step": 58430 }, { "epoch": 0.58, "learning_rate": 0.0003285236842105263, "loss": 0.6297, "step": 58440 }, { "epoch": 0.58, "learning_rate": 0.00032844473684210524, "loss": 0.6264, "step": 58450 }, { "epoch": 0.58, "learning_rate": 0.0003283657894736842, "loss": 0.6328, "step": 58460 }, { "epoch": 0.58, "learning_rate": 0.00032828684210526313, "loss": 0.6424, "step": 58470 }, { "epoch": 0.58, "learning_rate": 0.0003282078947368421, "loss": 0.6358, "step": 58480 }, { "epoch": 0.58, "learning_rate": 0.00032812894736842103, "loss": 0.6158, "step": 58490 }, { "epoch": 0.58, "learning_rate": 0.0003280499999999999, "loss": 0.6232, "step": 58500 }, { "epoch": 0.59, "learning_rate": 0.0003279710526315789, "loss": 0.6338, "step": 58510 }, { "epoch": 0.59, "learning_rate": 0.0003278921052631578, "loss": 0.6393, "step": 58520 }, { "epoch": 0.59, "learning_rate": 0.0003278131578947368, "loss": 0.6383, "step": 58530 }, { "epoch": 0.59, "learning_rate": 0.00032773421052631577, "loss": 0.6386, "step": 58540 }, { "epoch": 0.59, "learning_rate": 0.0003276552631578947, "loss": 0.6394, "step": 58550 }, { "epoch": 0.59, "learning_rate": 0.00032757631578947367, "loss": 0.6503, "step": 58560 }, { "epoch": 0.59, "learning_rate": 0.00032749736842105256, "loss": 0.6437, "step": 58570 }, { "epoch": 0.59, "learning_rate": 0.00032741842105263156, "loss": 0.6389, "step": 58580 }, { "epoch": 0.59, "learning_rate": 0.0003273473684210526, "loss": 0.6397, "step": 58590 }, { "epoch": 0.59, "learning_rate": 0.0003272684210526316, "loss": 0.6371, "step": 58600 }, { "epoch": 0.59, "learning_rate": 0.0003271894736842105, "loss": 0.6372, "step": 58610 }, { "epoch": 0.59, "learning_rate": 0.0003271105263157895, "loss": 0.6464, "step": 58620 }, { "epoch": 0.59, "learning_rate": 0.0003270315789473684, "loss": 0.6482, "step": 58630 }, { "epoch": 0.59, "learning_rate": 0.0003269526315789474, "loss": 0.6394, "step": 58640 }, { "epoch": 0.59, "learning_rate": 0.0003268736842105263, "loss": 0.6346, "step": 58650 }, { "epoch": 0.59, "learning_rate": 0.0003267947368421052, "loss": 0.6476, "step": 58660 }, { "epoch": 0.59, "learning_rate": 0.00032671578947368417, "loss": 0.6379, "step": 58670 }, { "epoch": 0.59, "learning_rate": 0.0003266368421052631, "loss": 0.6384, "step": 58680 }, { "epoch": 0.59, "learning_rate": 0.0003265578947368421, "loss": 0.6229, "step": 58690 }, { "epoch": 0.59, "learning_rate": 0.000326478947368421, "loss": 0.6252, "step": 58700 }, { "epoch": 0.59, "learning_rate": 0.0003264, "loss": 0.6342, "step": 58710 }, { "epoch": 0.59, "learning_rate": 0.0003263210526315789, "loss": 0.6275, "step": 58720 }, { "epoch": 0.59, "learning_rate": 0.00032624210526315786, "loss": 0.6402, "step": 58730 }, { "epoch": 0.59, "learning_rate": 0.0003261631578947368, "loss": 0.6292, "step": 58740 }, { "epoch": 0.59, "learning_rate": 0.00032608421052631576, "loss": 0.6141, "step": 58750 }, { "epoch": 0.59, "learning_rate": 0.0003260052631578947, "loss": 0.6227, "step": 58760 }, { "epoch": 0.59, "learning_rate": 0.00032592631578947365, "loss": 0.636, "step": 58770 }, { "epoch": 0.59, "learning_rate": 0.00032584736842105266, "loss": 0.6358, "step": 58780 }, { "epoch": 0.59, "learning_rate": 0.00032576842105263155, "loss": 0.6214, "step": 58790 }, { "epoch": 0.59, "learning_rate": 0.0003256894736842105, "loss": 0.6314, "step": 58800 }, { "epoch": 0.59, "learning_rate": 0.00032561052631578945, "loss": 0.6404, "step": 58810 }, { "epoch": 0.59, "learning_rate": 0.0003255315789473684, "loss": 0.6314, "step": 58820 }, { "epoch": 0.59, "learning_rate": 0.00032545263157894734, "loss": 0.6287, "step": 58830 }, { "epoch": 0.59, "learning_rate": 0.0003253736842105263, "loss": 0.635, "step": 58840 }, { "epoch": 0.59, "learning_rate": 0.0003252947368421052, "loss": 0.632, "step": 58850 }, { "epoch": 0.59, "learning_rate": 0.0003252157894736842, "loss": 0.6224, "step": 58860 }, { "epoch": 0.59, "learning_rate": 0.00032513684210526314, "loss": 0.6334, "step": 58870 }, { "epoch": 0.59, "learning_rate": 0.0003250578947368421, "loss": 0.6179, "step": 58880 }, { "epoch": 0.59, "learning_rate": 0.00032497894736842103, "loss": 0.6306, "step": 58890 }, { "epoch": 0.59, "learning_rate": 0.0003249, "loss": 0.6364, "step": 58900 }, { "epoch": 0.59, "learning_rate": 0.00032482105263157893, "loss": 0.6397, "step": 58910 }, { "epoch": 0.59, "learning_rate": 0.0003247421052631578, "loss": 0.6311, "step": 58920 }, { "epoch": 0.59, "learning_rate": 0.00032466315789473683, "loss": 0.6396, "step": 58930 }, { "epoch": 0.59, "learning_rate": 0.0003245842105263157, "loss": 0.6295, "step": 58940 }, { "epoch": 0.59, "learning_rate": 0.0003245052631578947, "loss": 0.6268, "step": 58950 }, { "epoch": 0.59, "learning_rate": 0.0003244263157894736, "loss": 0.6296, "step": 58960 }, { "epoch": 0.59, "learning_rate": 0.0003243473684210526, "loss": 0.6345, "step": 58970 }, { "epoch": 0.59, "learning_rate": 0.00032426842105263157, "loss": 0.6227, "step": 58980 }, { "epoch": 0.59, "learning_rate": 0.00032418947368421046, "loss": 0.6243, "step": 58990 }, { "epoch": 0.59, "learning_rate": 0.00032411052631578947, "loss": 0.6189, "step": 59000 }, { "epoch": 0.59, "learning_rate": 0.00032403157894736836, "loss": 0.6153, "step": 59010 }, { "epoch": 0.59, "learning_rate": 0.00032395263157894736, "loss": 0.6217, "step": 59020 }, { "epoch": 0.59, "learning_rate": 0.00032387368421052626, "loss": 0.6358, "step": 59030 }, { "epoch": 0.59, "learning_rate": 0.00032379473684210526, "loss": 0.6353, "step": 59040 }, { "epoch": 0.59, "learning_rate": 0.00032371578947368415, "loss": 0.6306, "step": 59050 }, { "epoch": 0.59, "learning_rate": 0.0003236368421052631, "loss": 0.627, "step": 59060 }, { "epoch": 0.59, "learning_rate": 0.0003235578947368421, "loss": 0.6333, "step": 59070 }, { "epoch": 0.59, "learning_rate": 0.000323478947368421, "loss": 0.6257, "step": 59080 }, { "epoch": 0.59, "learning_rate": 0.0003234, "loss": 0.6248, "step": 59090 }, { "epoch": 0.59, "learning_rate": 0.0003233210526315789, "loss": 0.6364, "step": 59100 }, { "epoch": 0.59, "learning_rate": 0.0003232421052631579, "loss": 0.6307, "step": 59110 }, { "epoch": 0.59, "learning_rate": 0.0003231631578947368, "loss": 0.622, "step": 59120 }, { "epoch": 0.59, "learning_rate": 0.0003230842105263158, "loss": 0.6147, "step": 59130 }, { "epoch": 0.59, "learning_rate": 0.0003230052631578947, "loss": 0.6204, "step": 59140 }, { "epoch": 0.59, "learning_rate": 0.00032292631578947364, "loss": 0.6294, "step": 59150 }, { "epoch": 0.59, "learning_rate": 0.00032284736842105264, "loss": 0.6309, "step": 59160 }, { "epoch": 0.59, "learning_rate": 0.00032276842105263153, "loss": 0.6282, "step": 59170 }, { "epoch": 0.59, "learning_rate": 0.00032268947368421054, "loss": 0.6147, "step": 59180 }, { "epoch": 0.59, "learning_rate": 0.00032261052631578943, "loss": 0.6206, "step": 59190 }, { "epoch": 0.59, "learning_rate": 0.00032253157894736843, "loss": 0.6234, "step": 59200 }, { "epoch": 0.59, "learning_rate": 0.0003224526315789473, "loss": 0.6262, "step": 59210 }, { "epoch": 0.59, "learning_rate": 0.0003223736842105263, "loss": 0.6348, "step": 59220 }, { "epoch": 0.59, "learning_rate": 0.0003222947368421052, "loss": 0.6257, "step": 59230 }, { "epoch": 0.59, "learning_rate": 0.00032221578947368417, "loss": 0.6284, "step": 59240 }, { "epoch": 0.59, "learning_rate": 0.0003221368421052632, "loss": 0.6237, "step": 59250 }, { "epoch": 0.59, "learning_rate": 0.00032205789473684207, "loss": 0.6259, "step": 59260 }, { "epoch": 0.59, "learning_rate": 0.00032197894736842107, "loss": 0.6308, "step": 59270 }, { "epoch": 0.59, "learning_rate": 0.00032189999999999996, "loss": 0.6322, "step": 59280 }, { "epoch": 0.59, "learning_rate": 0.0003218210526315789, "loss": 0.6329, "step": 59290 }, { "epoch": 0.59, "learning_rate": 0.00032174210526315786, "loss": 0.6196, "step": 59300 }, { "epoch": 0.59, "learning_rate": 0.0003216631578947368, "loss": 0.619, "step": 59310 }, { "epoch": 0.59, "learning_rate": 0.00032158421052631576, "loss": 0.6221, "step": 59320 }, { "epoch": 0.59, "learning_rate": 0.0003215052631578947, "loss": 0.6213, "step": 59330 }, { "epoch": 0.59, "learning_rate": 0.0003214263157894737, "loss": 0.6261, "step": 59340 }, { "epoch": 0.59, "learning_rate": 0.0003213473684210526, "loss": 0.6266, "step": 59350 }, { "epoch": 0.59, "learning_rate": 0.00032126842105263155, "loss": 0.6212, "step": 59360 }, { "epoch": 0.59, "learning_rate": 0.0003211894736842105, "loss": 0.6213, "step": 59370 }, { "epoch": 0.59, "learning_rate": 0.00032111052631578945, "loss": 0.6246, "step": 59380 }, { "epoch": 0.59, "learning_rate": 0.0003210315789473684, "loss": 0.6276, "step": 59390 }, { "epoch": 0.59, "learning_rate": 0.00032095263157894734, "loss": 0.6341, "step": 59400 }, { "epoch": 0.59, "learning_rate": 0.00032087368421052624, "loss": 0.6288, "step": 59410 }, { "epoch": 0.59, "learning_rate": 0.00032079473684210524, "loss": 0.6212, "step": 59420 }, { "epoch": 0.59, "learning_rate": 0.0003207157894736842, "loss": 0.633, "step": 59430 }, { "epoch": 0.59, "learning_rate": 0.00032063684210526314, "loss": 0.6285, "step": 59440 }, { "epoch": 0.59, "learning_rate": 0.0003205578947368421, "loss": 0.6212, "step": 59450 }, { "epoch": 0.59, "learning_rate": 0.00032047894736842103, "loss": 0.6343, "step": 59460 }, { "epoch": 0.59, "learning_rate": 0.0003204, "loss": 0.6328, "step": 59470 }, { "epoch": 0.59, "learning_rate": 0.0003203210526315789, "loss": 0.6272, "step": 59480 }, { "epoch": 0.59, "learning_rate": 0.0003202421052631579, "loss": 0.6278, "step": 59490 }, { "epoch": 0.59, "learning_rate": 0.00032016315789473677, "loss": 0.6287, "step": 59500 }, { "epoch": 0.6, "learning_rate": 0.0003200842105263158, "loss": 0.6277, "step": 59510 }, { "epoch": 0.6, "learning_rate": 0.00032000526315789467, "loss": 0.6342, "step": 59520 }, { "epoch": 0.6, "learning_rate": 0.00031992631578947367, "loss": 0.6318, "step": 59530 }, { "epoch": 0.6, "learning_rate": 0.0003198473684210526, "loss": 0.6406, "step": 59540 }, { "epoch": 0.6, "learning_rate": 0.0003197684210526315, "loss": 0.6313, "step": 59550 }, { "epoch": 0.6, "learning_rate": 0.0003196894736842105, "loss": 0.6356, "step": 59560 }, { "epoch": 0.6, "learning_rate": 0.0003196105263157894, "loss": 0.6193, "step": 59570 }, { "epoch": 0.6, "learning_rate": 0.0003195315789473684, "loss": 0.6304, "step": 59580 }, { "epoch": 0.6, "learning_rate": 0.0003194526315789473, "loss": 0.6199, "step": 59590 }, { "epoch": 0.6, "learning_rate": 0.0003193736842105263, "loss": 0.6353, "step": 59600 }, { "epoch": 0.6, "learning_rate": 0.0003192947368421052, "loss": 0.613, "step": 59610 }, { "epoch": 0.6, "learning_rate": 0.0003192157894736842, "loss": 0.6261, "step": 59620 }, { "epoch": 0.6, "learning_rate": 0.00031913684210526315, "loss": 0.6297, "step": 59630 }, { "epoch": 0.6, "learning_rate": 0.00031905789473684205, "loss": 0.6254, "step": 59640 }, { "epoch": 0.6, "learning_rate": 0.00031897894736842105, "loss": 0.6246, "step": 59650 }, { "epoch": 0.6, "learning_rate": 0.00031889999999999995, "loss": 0.6192, "step": 59660 }, { "epoch": 0.6, "learning_rate": 0.00031882105263157895, "loss": 0.6128, "step": 59670 }, { "epoch": 0.6, "learning_rate": 0.00031874210526315784, "loss": 0.6194, "step": 59680 }, { "epoch": 0.6, "learning_rate": 0.00031866315789473684, "loss": 0.6397, "step": 59690 }, { "epoch": 0.6, "learning_rate": 0.00031858421052631574, "loss": 0.635, "step": 59700 }, { "epoch": 0.6, "learning_rate": 0.0003185052631578947, "loss": 0.6295, "step": 59710 }, { "epoch": 0.6, "learning_rate": 0.0003184263157894737, "loss": 0.6401, "step": 59720 }, { "epoch": 0.6, "learning_rate": 0.0003183473684210526, "loss": 0.6183, "step": 59730 }, { "epoch": 0.6, "learning_rate": 0.00031827631578947366, "loss": 0.6335, "step": 59740 }, { "epoch": 0.6, "learning_rate": 0.0003181973684210526, "loss": 0.6204, "step": 59750 }, { "epoch": 0.6, "learning_rate": 0.0003181184210526315, "loss": 0.616, "step": 59760 }, { "epoch": 0.6, "learning_rate": 0.0003180394736842105, "loss": 0.6197, "step": 59770 }, { "epoch": 0.6, "learning_rate": 0.00031796052631578945, "loss": 0.6293, "step": 59780 }, { "epoch": 0.6, "learning_rate": 0.0003178815789473684, "loss": 0.6145, "step": 59790 }, { "epoch": 0.6, "learning_rate": 0.00031780263157894735, "loss": 0.6298, "step": 59800 }, { "epoch": 0.6, "learning_rate": 0.0003177236842105263, "loss": 0.6333, "step": 59810 }, { "epoch": 0.6, "learning_rate": 0.00031764473684210524, "loss": 0.6434, "step": 59820 }, { "epoch": 0.6, "learning_rate": 0.00031756578947368414, "loss": 0.6417, "step": 59830 }, { "epoch": 0.6, "learning_rate": 0.00031748684210526314, "loss": 0.637, "step": 59840 }, { "epoch": 0.6, "learning_rate": 0.00031740789473684204, "loss": 0.6227, "step": 59850 }, { "epoch": 0.6, "learning_rate": 0.00031732894736842104, "loss": 0.6152, "step": 59860 }, { "epoch": 0.6, "learning_rate": 0.00031725, "loss": 0.633, "step": 59870 }, { "epoch": 0.6, "learning_rate": 0.00031717105263157893, "loss": 0.6391, "step": 59880 }, { "epoch": 0.6, "learning_rate": 0.0003170921052631579, "loss": 0.623, "step": 59890 }, { "epoch": 0.6, "learning_rate": 0.0003170131578947368, "loss": 0.6313, "step": 59900 }, { "epoch": 0.6, "learning_rate": 0.0003169342105263158, "loss": 0.6346, "step": 59910 }, { "epoch": 0.6, "learning_rate": 0.0003168552631578947, "loss": 0.6354, "step": 59920 }, { "epoch": 0.6, "learning_rate": 0.0003167763157894737, "loss": 0.6216, "step": 59930 }, { "epoch": 0.6, "learning_rate": 0.00031669736842105257, "loss": 0.6221, "step": 59940 }, { "epoch": 0.6, "learning_rate": 0.00031661842105263157, "loss": 0.613, "step": 59950 }, { "epoch": 0.6, "learning_rate": 0.0003165394736842105, "loss": 0.6269, "step": 59960 }, { "epoch": 0.6, "learning_rate": 0.0003164605263157894, "loss": 0.6301, "step": 59970 }, { "epoch": 0.6, "learning_rate": 0.0003163815789473684, "loss": 0.6229, "step": 59980 }, { "epoch": 0.6, "learning_rate": 0.0003163026315789473, "loss": 0.6208, "step": 59990 }, { "epoch": 0.6, "learning_rate": 0.0003162236842105263, "loss": 0.6275, "step": 60000 }, { "epoch": 0.6, "eval_accuracy": 0.8692195892541361, "eval_loss": 0.61181640625, "eval_runtime": 97.6328, "eval_samples_per_second": 819.396, "eval_steps_per_second": 1.608, "step": 60000 }, { "epoch": 0.6, "learning_rate": 0.0003161447368421052, "loss": 0.632, "step": 60010 }, { "epoch": 0.6, "learning_rate": 0.0003160657894736842, "loss": 0.637, "step": 60020 }, { "epoch": 0.6, "learning_rate": 0.0003159868421052631, "loss": 0.6263, "step": 60030 }, { "epoch": 0.6, "learning_rate": 0.0003159078947368421, "loss": 0.6303, "step": 60040 }, { "epoch": 0.6, "learning_rate": 0.000315828947368421, "loss": 0.6338, "step": 60050 }, { "epoch": 0.6, "learning_rate": 0.00031574999999999995, "loss": 0.6278, "step": 60060 }, { "epoch": 0.6, "learning_rate": 0.00031567105263157895, "loss": 0.6364, "step": 60070 }, { "epoch": 0.6, "learning_rate": 0.00031559210526315785, "loss": 0.6261, "step": 60080 }, { "epoch": 0.6, "learning_rate": 0.00031551315789473685, "loss": 0.6224, "step": 60090 }, { "epoch": 0.6, "learning_rate": 0.00031543421052631574, "loss": 0.6252, "step": 60100 }, { "epoch": 0.6, "learning_rate": 0.00031535526315789475, "loss": 0.6245, "step": 60110 }, { "epoch": 0.6, "learning_rate": 0.00031527631578947364, "loss": 0.6357, "step": 60120 }, { "epoch": 0.6, "learning_rate": 0.0003151973684210526, "loss": 0.6277, "step": 60130 }, { "epoch": 0.6, "learning_rate": 0.00031511842105263154, "loss": 0.6235, "step": 60140 }, { "epoch": 0.6, "learning_rate": 0.0003150394736842105, "loss": 0.6275, "step": 60150 }, { "epoch": 0.6, "learning_rate": 0.0003149605263157895, "loss": 0.625, "step": 60160 }, { "epoch": 0.6, "learning_rate": 0.0003148815789473684, "loss": 0.6379, "step": 60170 }, { "epoch": 0.6, "learning_rate": 0.0003148026315789474, "loss": 0.6249, "step": 60180 }, { "epoch": 0.6, "learning_rate": 0.0003147236842105263, "loss": 0.633, "step": 60190 }, { "epoch": 0.6, "learning_rate": 0.0003146447368421052, "loss": 0.6364, "step": 60200 }, { "epoch": 0.6, "learning_rate": 0.0003145657894736842, "loss": 0.6279, "step": 60210 }, { "epoch": 0.6, "learning_rate": 0.0003144868421052631, "loss": 0.6266, "step": 60220 }, { "epoch": 0.6, "learning_rate": 0.00031440789473684207, "loss": 0.6171, "step": 60230 }, { "epoch": 0.6, "learning_rate": 0.000314328947368421, "loss": 0.6098, "step": 60240 }, { "epoch": 0.6, "learning_rate": 0.00031425, "loss": 0.6407, "step": 60250 }, { "epoch": 0.6, "learning_rate": 0.0003141710526315789, "loss": 0.627, "step": 60260 }, { "epoch": 0.6, "learning_rate": 0.00031409210526315786, "loss": 0.6257, "step": 60270 }, { "epoch": 0.6, "learning_rate": 0.0003140131578947368, "loss": 0.618, "step": 60280 }, { "epoch": 0.6, "learning_rate": 0.00031393421052631576, "loss": 0.6326, "step": 60290 }, { "epoch": 0.6, "learning_rate": 0.0003138552631578947, "loss": 0.634, "step": 60300 }, { "epoch": 0.6, "learning_rate": 0.00031377631578947366, "loss": 0.6198, "step": 60310 }, { "epoch": 0.6, "learning_rate": 0.00031369736842105255, "loss": 0.6212, "step": 60320 }, { "epoch": 0.6, "learning_rate": 0.00031361842105263155, "loss": 0.6295, "step": 60330 }, { "epoch": 0.6, "learning_rate": 0.0003135394736842105, "loss": 0.6275, "step": 60340 }, { "epoch": 0.6, "learning_rate": 0.00031346052631578945, "loss": 0.6225, "step": 60350 }, { "epoch": 0.6, "learning_rate": 0.0003133815789473684, "loss": 0.6308, "step": 60360 }, { "epoch": 0.6, "learning_rate": 0.00031330263157894735, "loss": 0.6313, "step": 60370 }, { "epoch": 0.6, "learning_rate": 0.0003132236842105263, "loss": 0.6352, "step": 60380 }, { "epoch": 0.6, "learning_rate": 0.0003131447368421052, "loss": 0.6164, "step": 60390 }, { "epoch": 0.6, "learning_rate": 0.0003130657894736842, "loss": 0.623, "step": 60400 }, { "epoch": 0.6, "learning_rate": 0.0003129868421052631, "loss": 0.6265, "step": 60410 }, { "epoch": 0.6, "learning_rate": 0.0003129078947368421, "loss": 0.6195, "step": 60420 }, { "epoch": 0.6, "learning_rate": 0.00031282894736842104, "loss": 0.6345, "step": 60430 }, { "epoch": 0.6, "learning_rate": 0.00031275, "loss": 0.6402, "step": 60440 }, { "epoch": 0.6, "learning_rate": 0.00031267105263157893, "loss": 0.6309, "step": 60450 }, { "epoch": 0.6, "learning_rate": 0.00031259210526315783, "loss": 0.609, "step": 60460 }, { "epoch": 0.6, "learning_rate": 0.00031251315789473683, "loss": 0.6225, "step": 60470 }, { "epoch": 0.6, "learning_rate": 0.0003124342105263157, "loss": 0.6204, "step": 60480 }, { "epoch": 0.6, "learning_rate": 0.0003123552631578947, "loss": 0.6171, "step": 60490 }, { "epoch": 0.6, "learning_rate": 0.0003122763157894736, "loss": 0.6381, "step": 60500 }, { "epoch": 0.61, "learning_rate": 0.0003121973684210526, "loss": 0.6226, "step": 60510 }, { "epoch": 0.61, "learning_rate": 0.00031211842105263157, "loss": 0.6345, "step": 60520 }, { "epoch": 0.61, "learning_rate": 0.0003120394736842105, "loss": 0.6231, "step": 60530 }, { "epoch": 0.61, "learning_rate": 0.00031196052631578947, "loss": 0.6288, "step": 60540 }, { "epoch": 0.61, "learning_rate": 0.00031188157894736836, "loss": 0.6224, "step": 60550 }, { "epoch": 0.61, "learning_rate": 0.00031180263157894737, "loss": 0.6392, "step": 60560 }, { "epoch": 0.61, "learning_rate": 0.00031172368421052626, "loss": 0.6262, "step": 60570 }, { "epoch": 0.61, "learning_rate": 0.00031164473684210526, "loss": 0.6266, "step": 60580 }, { "epoch": 0.61, "learning_rate": 0.00031156578947368416, "loss": 0.6389, "step": 60590 }, { "epoch": 0.61, "learning_rate": 0.00031148684210526316, "loss": 0.64, "step": 60600 }, { "epoch": 0.61, "learning_rate": 0.00031140789473684205, "loss": 0.6286, "step": 60610 }, { "epoch": 0.61, "learning_rate": 0.000311328947368421, "loss": 0.6186, "step": 60620 }, { "epoch": 0.61, "learning_rate": 0.00031125, "loss": 0.6166, "step": 60630 }, { "epoch": 0.61, "learning_rate": 0.0003111710526315789, "loss": 0.6251, "step": 60640 }, { "epoch": 0.61, "learning_rate": 0.0003110921052631579, "loss": 0.6116, "step": 60650 }, { "epoch": 0.61, "learning_rate": 0.0003110131578947368, "loss": 0.6275, "step": 60660 }, { "epoch": 0.61, "learning_rate": 0.0003109342105263158, "loss": 0.6184, "step": 60670 }, { "epoch": 0.61, "learning_rate": 0.0003108552631578947, "loss": 0.6228, "step": 60680 }, { "epoch": 0.61, "learning_rate": 0.00031077631578947364, "loss": 0.613, "step": 60690 }, { "epoch": 0.61, "learning_rate": 0.0003106973684210526, "loss": 0.6364, "step": 60700 }, { "epoch": 0.61, "learning_rate": 0.00031061842105263154, "loss": 0.6166, "step": 60710 }, { "epoch": 0.61, "learning_rate": 0.00031053947368421054, "loss": 0.6254, "step": 60720 }, { "epoch": 0.61, "learning_rate": 0.00031046052631578943, "loss": 0.6301, "step": 60730 }, { "epoch": 0.61, "learning_rate": 0.00031038157894736843, "loss": 0.6314, "step": 60740 }, { "epoch": 0.61, "learning_rate": 0.00031030263157894733, "loss": 0.6388, "step": 60750 }, { "epoch": 0.61, "learning_rate": 0.0003102236842105263, "loss": 0.6364, "step": 60760 }, { "epoch": 0.61, "learning_rate": 0.0003101447368421052, "loss": 0.6293, "step": 60770 }, { "epoch": 0.61, "learning_rate": 0.0003100657894736842, "loss": 0.6234, "step": 60780 }, { "epoch": 0.61, "learning_rate": 0.0003099868421052631, "loss": 0.6111, "step": 60790 }, { "epoch": 0.61, "learning_rate": 0.00030990789473684207, "loss": 0.6188, "step": 60800 }, { "epoch": 0.61, "learning_rate": 0.00030982894736842107, "loss": 0.6154, "step": 60810 }, { "epoch": 0.61, "learning_rate": 0.00030974999999999997, "loss": 0.6339, "step": 60820 }, { "epoch": 0.61, "learning_rate": 0.0003096710526315789, "loss": 0.6194, "step": 60830 }, { "epoch": 0.61, "learning_rate": 0.00030959210526315786, "loss": 0.6195, "step": 60840 }, { "epoch": 0.61, "learning_rate": 0.0003095131578947368, "loss": 0.6186, "step": 60850 }, { "epoch": 0.61, "learning_rate": 0.00030943421052631576, "loss": 0.6165, "step": 60860 }, { "epoch": 0.61, "learning_rate": 0.0003093552631578947, "loss": 0.6314, "step": 60870 }, { "epoch": 0.61, "learning_rate": 0.0003092763157894736, "loss": 0.6307, "step": 60880 }, { "epoch": 0.61, "learning_rate": 0.0003091973684210526, "loss": 0.6341, "step": 60890 }, { "epoch": 0.61, "learning_rate": 0.0003091184210526316, "loss": 0.6156, "step": 60900 }, { "epoch": 0.61, "learning_rate": 0.0003090394736842105, "loss": 0.6256, "step": 60910 }, { "epoch": 0.61, "learning_rate": 0.00030896052631578945, "loss": 0.63, "step": 60920 }, { "epoch": 0.61, "learning_rate": 0.0003088815789473684, "loss": 0.632, "step": 60930 }, { "epoch": 0.61, "learning_rate": 0.00030880263157894735, "loss": 0.6306, "step": 60940 }, { "epoch": 0.61, "learning_rate": 0.00030872368421052624, "loss": 0.6324, "step": 60950 }, { "epoch": 0.61, "learning_rate": 0.00030864473684210524, "loss": 0.6291, "step": 60960 }, { "epoch": 0.61, "learning_rate": 0.00030856578947368414, "loss": 0.6204, "step": 60970 }, { "epoch": 0.61, "learning_rate": 0.00030848684210526314, "loss": 0.6199, "step": 60980 }, { "epoch": 0.61, "learning_rate": 0.0003084078947368421, "loss": 0.6295, "step": 60990 }, { "epoch": 0.61, "learning_rate": 0.00030832894736842104, "loss": 0.6233, "step": 61000 }, { "epoch": 0.61, "learning_rate": 0.00030825, "loss": 0.6229, "step": 61010 }, { "epoch": 0.61, "learning_rate": 0.00030817105263157893, "loss": 0.6079, "step": 61020 }, { "epoch": 0.61, "learning_rate": 0.0003080921052631579, "loss": 0.6135, "step": 61030 }, { "epoch": 0.61, "learning_rate": 0.0003080131578947368, "loss": 0.6028, "step": 61040 }, { "epoch": 0.61, "learning_rate": 0.0003079342105263158, "loss": 0.6226, "step": 61050 }, { "epoch": 0.61, "learning_rate": 0.00030785526315789467, "loss": 0.6309, "step": 61060 }, { "epoch": 0.61, "learning_rate": 0.0003077763157894737, "loss": 0.6358, "step": 61070 }, { "epoch": 0.61, "learning_rate": 0.0003076973684210526, "loss": 0.6278, "step": 61080 }, { "epoch": 0.61, "learning_rate": 0.00030761842105263157, "loss": 0.6392, "step": 61090 }, { "epoch": 0.61, "learning_rate": 0.0003075394736842105, "loss": 0.6313, "step": 61100 }, { "epoch": 0.61, "learning_rate": 0.0003074605263157894, "loss": 0.6166, "step": 61110 }, { "epoch": 0.61, "learning_rate": 0.0003073815789473684, "loss": 0.6287, "step": 61120 }, { "epoch": 0.61, "learning_rate": 0.0003073026315789473, "loss": 0.6237, "step": 61130 }, { "epoch": 0.61, "learning_rate": 0.0003072236842105263, "loss": 0.6303, "step": 61140 }, { "epoch": 0.61, "learning_rate": 0.00030715263157894733, "loss": 0.6295, "step": 61150 }, { "epoch": 0.61, "learning_rate": 0.00030707368421052634, "loss": 0.6245, "step": 61160 }, { "epoch": 0.61, "learning_rate": 0.00030699473684210523, "loss": 0.6352, "step": 61170 }, { "epoch": 0.61, "learning_rate": 0.0003069157894736842, "loss": 0.6223, "step": 61180 }, { "epoch": 0.61, "learning_rate": 0.0003068368421052631, "loss": 0.6271, "step": 61190 }, { "epoch": 0.61, "learning_rate": 0.0003067578947368421, "loss": 0.6274, "step": 61200 }, { "epoch": 0.61, "learning_rate": 0.000306678947368421, "loss": 0.6205, "step": 61210 }, { "epoch": 0.61, "learning_rate": 0.00030659999999999997, "loss": 0.6145, "step": 61220 }, { "epoch": 0.61, "learning_rate": 0.00030652105263157887, "loss": 0.6268, "step": 61230 }, { "epoch": 0.61, "learning_rate": 0.00030644210526315787, "loss": 0.6302, "step": 61240 }, { "epoch": 0.61, "learning_rate": 0.00030636315789473687, "loss": 0.6325, "step": 61250 }, { "epoch": 0.61, "learning_rate": 0.00030628421052631576, "loss": 0.6452, "step": 61260 }, { "epoch": 0.61, "learning_rate": 0.0003062052631578947, "loss": 0.6215, "step": 61270 }, { "epoch": 0.61, "learning_rate": 0.00030612631578947366, "loss": 0.6305, "step": 61280 }, { "epoch": 0.61, "learning_rate": 0.0003060473684210526, "loss": 0.6314, "step": 61290 }, { "epoch": 0.61, "learning_rate": 0.0003059684210526315, "loss": 0.6411, "step": 61300 }, { "epoch": 0.61, "learning_rate": 0.0003058894736842105, "loss": 0.639, "step": 61310 }, { "epoch": 0.61, "learning_rate": 0.0003058105263157894, "loss": 0.6268, "step": 61320 }, { "epoch": 0.61, "learning_rate": 0.0003057315789473684, "loss": 0.6194, "step": 61330 }, { "epoch": 0.61, "learning_rate": 0.00030565263157894735, "loss": 0.6243, "step": 61340 }, { "epoch": 0.61, "learning_rate": 0.0003055736842105263, "loss": 0.6305, "step": 61350 }, { "epoch": 0.61, "learning_rate": 0.00030549473684210525, "loss": 0.6338, "step": 61360 }, { "epoch": 0.61, "learning_rate": 0.0003054157894736842, "loss": 0.6212, "step": 61370 }, { "epoch": 0.61, "learning_rate": 0.00030533684210526314, "loss": 0.622, "step": 61380 }, { "epoch": 0.61, "learning_rate": 0.00030525789473684204, "loss": 0.6349, "step": 61390 }, { "epoch": 0.61, "learning_rate": 0.00030517894736842104, "loss": 0.6355, "step": 61400 }, { "epoch": 0.61, "learning_rate": 0.00030509999999999994, "loss": 0.6128, "step": 61410 }, { "epoch": 0.61, "learning_rate": 0.00030502105263157894, "loss": 0.6311, "step": 61420 }, { "epoch": 0.61, "learning_rate": 0.0003049421052631579, "loss": 0.6299, "step": 61430 }, { "epoch": 0.61, "learning_rate": 0.00030486315789473683, "loss": 0.6296, "step": 61440 }, { "epoch": 0.61, "learning_rate": 0.0003047842105263158, "loss": 0.6288, "step": 61450 }, { "epoch": 0.61, "learning_rate": 0.0003047052631578947, "loss": 0.6322, "step": 61460 }, { "epoch": 0.61, "learning_rate": 0.0003046263157894737, "loss": 0.642, "step": 61470 }, { "epoch": 0.61, "learning_rate": 0.0003045473684210526, "loss": 0.6117, "step": 61480 }, { "epoch": 0.61, "learning_rate": 0.0003044684210526316, "loss": 0.6149, "step": 61490 }, { "epoch": 0.61, "learning_rate": 0.00030438947368421047, "loss": 0.6307, "step": 61500 }, { "epoch": 0.62, "learning_rate": 0.00030431052631578947, "loss": 0.6317, "step": 61510 }, { "epoch": 0.62, "learning_rate": 0.0003042315789473684, "loss": 0.6253, "step": 61520 }, { "epoch": 0.62, "learning_rate": 0.0003041526315789473, "loss": 0.6289, "step": 61530 }, { "epoch": 0.62, "learning_rate": 0.0003040736842105263, "loss": 0.6247, "step": 61540 }, { "epoch": 0.62, "learning_rate": 0.0003039947368421052, "loss": 0.6246, "step": 61550 }, { "epoch": 0.62, "learning_rate": 0.0003039157894736842, "loss": 0.6224, "step": 61560 }, { "epoch": 0.62, "learning_rate": 0.0003038368421052631, "loss": 0.6232, "step": 61570 }, { "epoch": 0.62, "learning_rate": 0.0003037578947368421, "loss": 0.632, "step": 61580 }, { "epoch": 0.62, "learning_rate": 0.000303678947368421, "loss": 0.6228, "step": 61590 }, { "epoch": 0.62, "learning_rate": 0.00030359999999999995, "loss": 0.6365, "step": 61600 }, { "epoch": 0.62, "learning_rate": 0.00030352105263157896, "loss": 0.632, "step": 61610 }, { "epoch": 0.62, "learning_rate": 0.00030344210526315785, "loss": 0.6316, "step": 61620 }, { "epoch": 0.62, "learning_rate": 0.00030336315789473685, "loss": 0.639, "step": 61630 }, { "epoch": 0.62, "learning_rate": 0.00030328421052631575, "loss": 0.6287, "step": 61640 }, { "epoch": 0.62, "learning_rate": 0.00030320526315789475, "loss": 0.622, "step": 61650 }, { "epoch": 0.62, "learning_rate": 0.00030312631578947364, "loss": 0.6274, "step": 61660 }, { "epoch": 0.62, "learning_rate": 0.0003030473684210526, "loss": 0.6117, "step": 61670 }, { "epoch": 0.62, "learning_rate": 0.00030296842105263154, "loss": 0.6234, "step": 61680 }, { "epoch": 0.62, "learning_rate": 0.0003028894736842105, "loss": 0.6216, "step": 61690 }, { "epoch": 0.62, "learning_rate": 0.00030281052631578944, "loss": 0.6374, "step": 61700 }, { "epoch": 0.62, "learning_rate": 0.0003027315789473684, "loss": 0.6377, "step": 61710 }, { "epoch": 0.62, "learning_rate": 0.0003026526315789474, "loss": 0.6358, "step": 61720 }, { "epoch": 0.62, "learning_rate": 0.0003025736842105263, "loss": 0.6196, "step": 61730 }, { "epoch": 0.62, "learning_rate": 0.0003024947368421053, "loss": 0.6232, "step": 61740 }, { "epoch": 0.62, "learning_rate": 0.0003024157894736842, "loss": 0.6274, "step": 61750 }, { "epoch": 0.62, "learning_rate": 0.0003023368421052631, "loss": 0.6398, "step": 61760 }, { "epoch": 0.62, "learning_rate": 0.0003022578947368421, "loss": 0.6381, "step": 61770 }, { "epoch": 0.62, "learning_rate": 0.000302178947368421, "loss": 0.6327, "step": 61780 }, { "epoch": 0.62, "learning_rate": 0.0003020999999999999, "loss": 0.621, "step": 61790 }, { "epoch": 0.62, "learning_rate": 0.0003020210526315789, "loss": 0.6335, "step": 61800 }, { "epoch": 0.62, "learning_rate": 0.0003019421052631579, "loss": 0.6233, "step": 61810 }, { "epoch": 0.62, "learning_rate": 0.0003018631578947368, "loss": 0.629, "step": 61820 }, { "epoch": 0.62, "learning_rate": 0.00030178421052631576, "loss": 0.6417, "step": 61830 }, { "epoch": 0.62, "learning_rate": 0.0003017052631578947, "loss": 0.627, "step": 61840 }, { "epoch": 0.62, "learning_rate": 0.00030162631578947366, "loss": 0.6304, "step": 61850 }, { "epoch": 0.62, "learning_rate": 0.0003015473684210526, "loss": 0.6376, "step": 61860 }, { "epoch": 0.62, "learning_rate": 0.00030146842105263156, "loss": 0.6327, "step": 61870 }, { "epoch": 0.62, "learning_rate": 0.00030138947368421045, "loss": 0.619, "step": 61880 }, { "epoch": 0.62, "learning_rate": 0.00030131052631578945, "loss": 0.6299, "step": 61890 }, { "epoch": 0.62, "learning_rate": 0.0003012315789473684, "loss": 0.631, "step": 61900 }, { "epoch": 0.62, "learning_rate": 0.00030115263157894735, "loss": 0.6322, "step": 61910 }, { "epoch": 0.62, "learning_rate": 0.0003010736842105263, "loss": 0.6164, "step": 61920 }, { "epoch": 0.62, "learning_rate": 0.00030099473684210525, "loss": 0.621, "step": 61930 }, { "epoch": 0.62, "learning_rate": 0.0003009157894736842, "loss": 0.631, "step": 61940 }, { "epoch": 0.62, "learning_rate": 0.0003008368421052631, "loss": 0.6438, "step": 61950 }, { "epoch": 0.62, "learning_rate": 0.0003007578947368421, "loss": 0.6347, "step": 61960 }, { "epoch": 0.62, "learning_rate": 0.000300678947368421, "loss": 0.634, "step": 61970 }, { "epoch": 0.62, "learning_rate": 0.0003006, "loss": 0.6197, "step": 61980 }, { "epoch": 0.62, "learning_rate": 0.00030052105263157894, "loss": 0.5923, "step": 61990 }, { "epoch": 0.62, "learning_rate": 0.0003004421052631579, "loss": 0.6237, "step": 62000 }, { "epoch": 0.62, "learning_rate": 0.00030036315789473683, "loss": 0.6013, "step": 62010 }, { "epoch": 0.62, "learning_rate": 0.00030028421052631573, "loss": 0.6049, "step": 62020 }, { "epoch": 0.62, "learning_rate": 0.00030020526315789473, "loss": 0.5971, "step": 62030 }, { "epoch": 0.62, "learning_rate": 0.0003001263157894736, "loss": 0.6033, "step": 62040 }, { "epoch": 0.62, "learning_rate": 0.0003000473684210526, "loss": 0.6055, "step": 62050 }, { "epoch": 0.62, "learning_rate": 0.0002999684210526316, "loss": 0.6101, "step": 62060 }, { "epoch": 0.62, "learning_rate": 0.0002998894736842105, "loss": 0.6141, "step": 62070 }, { "epoch": 0.62, "learning_rate": 0.00029981052631578947, "loss": 0.6111, "step": 62080 }, { "epoch": 0.62, "learning_rate": 0.00029973157894736837, "loss": 0.6125, "step": 62090 }, { "epoch": 0.62, "learning_rate": 0.0002996526315789473, "loss": 0.6139, "step": 62100 }, { "epoch": 0.62, "learning_rate": 0.00029957368421052626, "loss": 0.5965, "step": 62110 }, { "epoch": 0.62, "learning_rate": 0.0002994947368421052, "loss": 0.6188, "step": 62120 }, { "epoch": 0.62, "learning_rate": 0.0002994157894736842, "loss": 0.6104, "step": 62130 }, { "epoch": 0.62, "learning_rate": 0.00029933684210526316, "loss": 0.6224, "step": 62140 }, { "epoch": 0.62, "learning_rate": 0.0002992578947368421, "loss": 0.6328, "step": 62150 }, { "epoch": 0.62, "learning_rate": 0.000299178947368421, "loss": 0.6341, "step": 62160 }, { "epoch": 0.62, "learning_rate": 0.00029909999999999995, "loss": 0.6263, "step": 62170 }, { "epoch": 0.62, "learning_rate": 0.0002990210526315789, "loss": 0.6255, "step": 62180 }, { "epoch": 0.62, "learning_rate": 0.00029894210526315785, "loss": 0.6184, "step": 62190 }, { "epoch": 0.62, "learning_rate": 0.0002988631578947368, "loss": 0.6249, "step": 62200 }, { "epoch": 0.62, "learning_rate": 0.00029878421052631575, "loss": 0.6208, "step": 62210 }, { "epoch": 0.62, "learning_rate": 0.00029870526315789475, "loss": 0.6298, "step": 62220 }, { "epoch": 0.62, "learning_rate": 0.0002986263157894737, "loss": 0.621, "step": 62230 }, { "epoch": 0.62, "learning_rate": 0.0002985473684210526, "loss": 0.6203, "step": 62240 }, { "epoch": 0.62, "learning_rate": 0.00029846842105263154, "loss": 0.6323, "step": 62250 }, { "epoch": 0.62, "learning_rate": 0.0002983894736842105, "loss": 0.6298, "step": 62260 }, { "epoch": 0.62, "learning_rate": 0.00029831052631578943, "loss": 0.6397, "step": 62270 }, { "epoch": 0.62, "learning_rate": 0.0002982315789473684, "loss": 0.6296, "step": 62280 }, { "epoch": 0.62, "learning_rate": 0.00029815263157894733, "loss": 0.6239, "step": 62290 }, { "epoch": 0.62, "learning_rate": 0.0002980736842105263, "loss": 0.6316, "step": 62300 }, { "epoch": 0.62, "learning_rate": 0.00029799473684210523, "loss": 0.6312, "step": 62310 }, { "epoch": 0.62, "learning_rate": 0.0002979157894736842, "loss": 0.6491, "step": 62320 }, { "epoch": 0.62, "learning_rate": 0.0002978368421052631, "loss": 0.6374, "step": 62330 }, { "epoch": 0.62, "learning_rate": 0.0002977578947368421, "loss": 0.6282, "step": 62340 }, { "epoch": 0.62, "learning_rate": 0.000297678947368421, "loss": 0.6291, "step": 62350 }, { "epoch": 0.62, "learning_rate": 0.00029759999999999997, "loss": 0.6272, "step": 62360 }, { "epoch": 0.62, "learning_rate": 0.0002975210526315789, "loss": 0.6113, "step": 62370 }, { "epoch": 0.62, "learning_rate": 0.00029744210526315787, "loss": 0.6234, "step": 62380 }, { "epoch": 0.62, "learning_rate": 0.0002973631578947368, "loss": 0.6311, "step": 62390 }, { "epoch": 0.62, "learning_rate": 0.00029728421052631576, "loss": 0.6298, "step": 62400 }, { "epoch": 0.62, "learning_rate": 0.0002972052631578947, "loss": 0.6361, "step": 62410 }, { "epoch": 0.62, "learning_rate": 0.00029712631578947366, "loss": 0.6314, "step": 62420 }, { "epoch": 0.62, "learning_rate": 0.0002970473684210526, "loss": 0.6183, "step": 62430 }, { "epoch": 0.62, "learning_rate": 0.00029696842105263156, "loss": 0.6201, "step": 62440 }, { "epoch": 0.62, "learning_rate": 0.0002968894736842105, "loss": 0.6338, "step": 62450 }, { "epoch": 0.62, "learning_rate": 0.00029681052631578945, "loss": 0.6498, "step": 62460 }, { "epoch": 0.62, "learning_rate": 0.0002967315789473684, "loss": 0.6313, "step": 62470 }, { "epoch": 0.62, "learning_rate": 0.00029665263157894735, "loss": 0.616, "step": 62480 }, { "epoch": 0.62, "learning_rate": 0.0002965736842105263, "loss": 0.6071, "step": 62490 }, { "epoch": 0.62, "learning_rate": 0.00029649473684210525, "loss": 0.6063, "step": 62500 }, { "epoch": 0.62, "eval_accuracy": 0.8706084189495512, "eval_loss": 0.60107421875, "eval_runtime": 97.0491, "eval_samples_per_second": 824.325, "eval_steps_per_second": 1.618, "step": 62500 }, { "epoch": 0.63, "learning_rate": 0.0002964157894736842, "loss": 0.5934, "step": 62510 }, { "epoch": 0.63, "learning_rate": 0.00029633684210526314, "loss": 0.6107, "step": 62520 }, { "epoch": 0.63, "learning_rate": 0.0002962578947368421, "loss": 0.6027, "step": 62530 }, { "epoch": 0.63, "learning_rate": 0.00029617894736842104, "loss": 0.6043, "step": 62540 }, { "epoch": 0.63, "learning_rate": 0.0002961, "loss": 0.602, "step": 62550 }, { "epoch": 0.63, "learning_rate": 0.00029602105263157894, "loss": 0.6035, "step": 62560 }, { "epoch": 0.63, "learning_rate": 0.0002959421052631579, "loss": 0.6028, "step": 62570 }, { "epoch": 0.63, "learning_rate": 0.0002958631578947368, "loss": 0.6066, "step": 62580 }, { "epoch": 0.63, "learning_rate": 0.0002957842105263158, "loss": 0.5998, "step": 62590 }, { "epoch": 0.63, "learning_rate": 0.00029570526315789473, "loss": 0.6178, "step": 62600 }, { "epoch": 0.63, "learning_rate": 0.0002956263157894737, "loss": 0.619, "step": 62610 }, { "epoch": 0.63, "learning_rate": 0.0002955473684210526, "loss": 0.6166, "step": 62620 }, { "epoch": 0.63, "learning_rate": 0.0002954684210526316, "loss": 0.6262, "step": 62630 }, { "epoch": 0.63, "learning_rate": 0.0002953894736842105, "loss": 0.632, "step": 62640 }, { "epoch": 0.63, "learning_rate": 0.0002953105263157894, "loss": 0.6159, "step": 62650 }, { "epoch": 0.63, "learning_rate": 0.00029523157894736836, "loss": 0.6267, "step": 62660 }, { "epoch": 0.63, "learning_rate": 0.0002951526315789473, "loss": 0.632, "step": 62670 }, { "epoch": 0.63, "learning_rate": 0.00029507368421052626, "loss": 0.6263, "step": 62680 }, { "epoch": 0.63, "learning_rate": 0.00029499473684210526, "loss": 0.6209, "step": 62690 }, { "epoch": 0.63, "learning_rate": 0.0002949157894736842, "loss": 0.6242, "step": 62700 }, { "epoch": 0.63, "learning_rate": 0.00029483684210526316, "loss": 0.6247, "step": 62710 }, { "epoch": 0.63, "learning_rate": 0.0002947578947368421, "loss": 0.6175, "step": 62720 }, { "epoch": 0.63, "learning_rate": 0.000294678947368421, "loss": 0.6334, "step": 62730 }, { "epoch": 0.63, "learning_rate": 0.00029459999999999995, "loss": 0.6237, "step": 62740 }, { "epoch": 0.63, "learning_rate": 0.0002945210526315789, "loss": 0.6233, "step": 62750 }, { "epoch": 0.63, "learning_rate": 0.00029444210526315785, "loss": 0.6153, "step": 62760 }, { "epoch": 0.63, "learning_rate": 0.0002943631578947368, "loss": 0.6262, "step": 62770 }, { "epoch": 0.63, "learning_rate": 0.0002942842105263158, "loss": 0.6317, "step": 62780 }, { "epoch": 0.63, "learning_rate": 0.00029420526315789475, "loss": 0.6091, "step": 62790 }, { "epoch": 0.63, "learning_rate": 0.00029412631578947364, "loss": 0.6173, "step": 62800 }, { "epoch": 0.63, "learning_rate": 0.0002940473684210526, "loss": 0.6332, "step": 62810 }, { "epoch": 0.63, "learning_rate": 0.00029396842105263154, "loss": 0.6217, "step": 62820 }, { "epoch": 0.63, "learning_rate": 0.0002938894736842105, "loss": 0.623, "step": 62830 }, { "epoch": 0.63, "learning_rate": 0.00029381052631578943, "loss": 0.6223, "step": 62840 }, { "epoch": 0.63, "learning_rate": 0.0002937315789473684, "loss": 0.6251, "step": 62850 }, { "epoch": 0.63, "learning_rate": 0.00029365263157894733, "loss": 0.6207, "step": 62860 }, { "epoch": 0.63, "learning_rate": 0.0002935736842105263, "loss": 0.6127, "step": 62870 }, { "epoch": 0.63, "learning_rate": 0.00029349473684210523, "loss": 0.6234, "step": 62880 }, { "epoch": 0.63, "learning_rate": 0.0002934157894736842, "loss": 0.6109, "step": 62890 }, { "epoch": 0.63, "learning_rate": 0.0002933368421052631, "loss": 0.6243, "step": 62900 }, { "epoch": 0.63, "learning_rate": 0.00029325789473684207, "loss": 0.6196, "step": 62910 }, { "epoch": 0.63, "learning_rate": 0.000293178947368421, "loss": 0.6146, "step": 62920 }, { "epoch": 0.63, "learning_rate": 0.00029309999999999997, "loss": 0.6201, "step": 62930 }, { "epoch": 0.63, "learning_rate": 0.0002930210526315789, "loss": 0.6172, "step": 62940 }, { "epoch": 0.63, "learning_rate": 0.00029294210526315787, "loss": 0.6087, "step": 62950 }, { "epoch": 0.63, "learning_rate": 0.0002928631578947368, "loss": 0.6242, "step": 62960 }, { "epoch": 0.63, "learning_rate": 0.00029278421052631576, "loss": 0.6154, "step": 62970 }, { "epoch": 0.63, "learning_rate": 0.00029271315789473684, "loss": 0.6189, "step": 62980 }, { "epoch": 0.63, "learning_rate": 0.0002926342105263158, "loss": 0.6134, "step": 62990 }, { "epoch": 0.63, "learning_rate": 0.0002925552631578947, "loss": 0.6109, "step": 63000 }, { "epoch": 0.63, "learning_rate": 0.00029247631578947363, "loss": 0.6258, "step": 63010 }, { "epoch": 0.63, "learning_rate": 0.0002923973684210526, "loss": 0.6197, "step": 63020 }, { "epoch": 0.63, "learning_rate": 0.0002923184210526316, "loss": 0.615, "step": 63030 }, { "epoch": 0.63, "learning_rate": 0.0002922394736842105, "loss": 0.6197, "step": 63040 }, { "epoch": 0.63, "learning_rate": 0.0002921605263157895, "loss": 0.6159, "step": 63050 }, { "epoch": 0.63, "learning_rate": 0.0002920815789473684, "loss": 0.6103, "step": 63060 }, { "epoch": 0.63, "learning_rate": 0.0002920026315789473, "loss": 0.607, "step": 63070 }, { "epoch": 0.63, "learning_rate": 0.00029192368421052627, "loss": 0.6194, "step": 63080 }, { "epoch": 0.63, "learning_rate": 0.0002918447368421052, "loss": 0.6207, "step": 63090 }, { "epoch": 0.63, "learning_rate": 0.00029176578947368416, "loss": 0.6235, "step": 63100 }, { "epoch": 0.63, "learning_rate": 0.0002916868421052631, "loss": 0.6125, "step": 63110 }, { "epoch": 0.63, "learning_rate": 0.0002916078947368421, "loss": 0.6209, "step": 63120 }, { "epoch": 0.63, "learning_rate": 0.00029152894736842106, "loss": 0.6281, "step": 63130 }, { "epoch": 0.63, "learning_rate": 0.00029145, "loss": 0.608, "step": 63140 }, { "epoch": 0.63, "learning_rate": 0.0002913710526315789, "loss": 0.6118, "step": 63150 }, { "epoch": 0.63, "learning_rate": 0.00029129210526315785, "loss": 0.6085, "step": 63160 }, { "epoch": 0.63, "learning_rate": 0.0002912131578947368, "loss": 0.6079, "step": 63170 }, { "epoch": 0.63, "learning_rate": 0.00029113421052631575, "loss": 0.6188, "step": 63180 }, { "epoch": 0.63, "learning_rate": 0.0002910552631578947, "loss": 0.6259, "step": 63190 }, { "epoch": 0.63, "learning_rate": 0.00029097631578947365, "loss": 0.6247, "step": 63200 }, { "epoch": 0.63, "learning_rate": 0.0002908973684210526, "loss": 0.6283, "step": 63210 }, { "epoch": 0.63, "learning_rate": 0.00029081842105263154, "loss": 0.6154, "step": 63220 }, { "epoch": 0.63, "learning_rate": 0.0002907394736842105, "loss": 0.6136, "step": 63230 }, { "epoch": 0.63, "learning_rate": 0.00029066052631578944, "loss": 0.6131, "step": 63240 }, { "epoch": 0.63, "learning_rate": 0.0002905815789473684, "loss": 0.6044, "step": 63250 }, { "epoch": 0.63, "learning_rate": 0.00029050263157894734, "loss": 0.6216, "step": 63260 }, { "epoch": 0.63, "learning_rate": 0.0002904236842105263, "loss": 0.6183, "step": 63270 }, { "epoch": 0.63, "learning_rate": 0.00029034473684210523, "loss": 0.6204, "step": 63280 }, { "epoch": 0.63, "learning_rate": 0.0002902657894736842, "loss": 0.6173, "step": 63290 }, { "epoch": 0.63, "learning_rate": 0.00029018684210526313, "loss": 0.613, "step": 63300 }, { "epoch": 0.63, "learning_rate": 0.0002901078947368421, "loss": 0.6297, "step": 63310 }, { "epoch": 0.63, "learning_rate": 0.000290028947368421, "loss": 0.6177, "step": 63320 }, { "epoch": 0.63, "learning_rate": 0.00028995, "loss": 0.6246, "step": 63330 }, { "epoch": 0.63, "learning_rate": 0.0002898710526315789, "loss": 0.6265, "step": 63340 }, { "epoch": 0.63, "learning_rate": 0.00028979210526315787, "loss": 0.635, "step": 63350 }, { "epoch": 0.63, "learning_rate": 0.0002897131578947368, "loss": 0.6188, "step": 63360 }, { "epoch": 0.63, "learning_rate": 0.00028963421052631577, "loss": 0.618, "step": 63370 }, { "epoch": 0.63, "learning_rate": 0.0002895552631578947, "loss": 0.6167, "step": 63380 }, { "epoch": 0.63, "learning_rate": 0.00028947631578947366, "loss": 0.6207, "step": 63390 }, { "epoch": 0.63, "learning_rate": 0.0002893973684210526, "loss": 0.6235, "step": 63400 }, { "epoch": 0.63, "learning_rate": 0.00028931842105263156, "loss": 0.6006, "step": 63410 }, { "epoch": 0.63, "learning_rate": 0.0002892394736842105, "loss": 0.6155, "step": 63420 }, { "epoch": 0.63, "learning_rate": 0.00028916052631578946, "loss": 0.6181, "step": 63430 }, { "epoch": 0.63, "learning_rate": 0.0002890815789473684, "loss": 0.6073, "step": 63440 }, { "epoch": 0.63, "learning_rate": 0.00028900263157894735, "loss": 0.6012, "step": 63450 }, { "epoch": 0.63, "learning_rate": 0.0002889236842105263, "loss": 0.6, "step": 63460 }, { "epoch": 0.63, "learning_rate": 0.00028884473684210525, "loss": 0.6143, "step": 63470 }, { "epoch": 0.63, "learning_rate": 0.0002887657894736842, "loss": 0.6202, "step": 63480 }, { "epoch": 0.63, "learning_rate": 0.0002886868421052631, "loss": 0.6072, "step": 63490 }, { "epoch": 0.64, "learning_rate": 0.0002886078947368421, "loss": 0.6031, "step": 63500 }, { "epoch": 0.64, "learning_rate": 0.00028852894736842104, "loss": 0.623, "step": 63510 }, { "epoch": 0.64, "learning_rate": 0.00028845, "loss": 0.6119, "step": 63520 }, { "epoch": 0.64, "learning_rate": 0.00028837105263157894, "loss": 0.6141, "step": 63530 }, { "epoch": 0.64, "learning_rate": 0.0002882921052631579, "loss": 0.607, "step": 63540 }, { "epoch": 0.64, "learning_rate": 0.00028821315789473684, "loss": 0.6221, "step": 63550 }, { "epoch": 0.64, "learning_rate": 0.00028813421052631573, "loss": 0.623, "step": 63560 }, { "epoch": 0.64, "learning_rate": 0.0002880552631578947, "loss": 0.6202, "step": 63570 }, { "epoch": 0.64, "learning_rate": 0.0002879763157894736, "loss": 0.6273, "step": 63580 }, { "epoch": 0.64, "learning_rate": 0.00028789736842105263, "loss": 0.6149, "step": 63590 }, { "epoch": 0.64, "learning_rate": 0.0002878184210526316, "loss": 0.6185, "step": 63600 }, { "epoch": 0.64, "learning_rate": 0.0002877394736842105, "loss": 0.5978, "step": 63610 }, { "epoch": 0.64, "learning_rate": 0.0002876605263157895, "loss": 0.614, "step": 63620 }, { "epoch": 0.64, "learning_rate": 0.0002875815789473684, "loss": 0.6211, "step": 63630 }, { "epoch": 0.64, "learning_rate": 0.0002875026315789473, "loss": 0.6186, "step": 63640 }, { "epoch": 0.64, "learning_rate": 0.00028742368421052626, "loss": 0.6133, "step": 63650 }, { "epoch": 0.64, "learning_rate": 0.0002873447368421052, "loss": 0.6148, "step": 63660 }, { "epoch": 0.64, "learning_rate": 0.00028726578947368416, "loss": 0.6075, "step": 63670 }, { "epoch": 0.64, "learning_rate": 0.00028718684210526316, "loss": 0.6113, "step": 63680 }, { "epoch": 0.64, "learning_rate": 0.0002871078947368421, "loss": 0.6121, "step": 63690 }, { "epoch": 0.64, "learning_rate": 0.00028702894736842106, "loss": 0.6223, "step": 63700 }, { "epoch": 0.64, "learning_rate": 0.00028694999999999995, "loss": 0.6098, "step": 63710 }, { "epoch": 0.64, "learning_rate": 0.0002868710526315789, "loss": 0.6202, "step": 63720 }, { "epoch": 0.64, "learning_rate": 0.00028679210526315785, "loss": 0.6097, "step": 63730 }, { "epoch": 0.64, "learning_rate": 0.0002867131578947368, "loss": 0.6047, "step": 63740 }, { "epoch": 0.64, "learning_rate": 0.00028663421052631575, "loss": 0.609, "step": 63750 }, { "epoch": 0.64, "learning_rate": 0.0002865552631578947, "loss": 0.6166, "step": 63760 }, { "epoch": 0.64, "learning_rate": 0.00028647631578947364, "loss": 0.6015, "step": 63770 }, { "epoch": 0.64, "learning_rate": 0.0002863973684210526, "loss": 0.6228, "step": 63780 }, { "epoch": 0.64, "learning_rate": 0.00028631842105263154, "loss": 0.6104, "step": 63790 }, { "epoch": 0.64, "learning_rate": 0.0002862394736842105, "loss": 0.6093, "step": 63800 }, { "epoch": 0.64, "learning_rate": 0.00028616052631578944, "loss": 0.6078, "step": 63810 }, { "epoch": 0.64, "learning_rate": 0.0002860815789473684, "loss": 0.6127, "step": 63820 }, { "epoch": 0.64, "learning_rate": 0.00028600263157894733, "loss": 0.6215, "step": 63830 }, { "epoch": 0.64, "learning_rate": 0.0002859236842105263, "loss": 0.6187, "step": 63840 }, { "epoch": 0.64, "learning_rate": 0.00028584473684210523, "loss": 0.6207, "step": 63850 }, { "epoch": 0.64, "learning_rate": 0.0002857657894736842, "loss": 0.6077, "step": 63860 }, { "epoch": 0.64, "learning_rate": 0.00028568684210526313, "loss": 0.6114, "step": 63870 }, { "epoch": 0.64, "learning_rate": 0.0002856078947368421, "loss": 0.6312, "step": 63880 }, { "epoch": 0.64, "learning_rate": 0.000285528947368421, "loss": 0.6231, "step": 63890 }, { "epoch": 0.64, "learning_rate": 0.00028544999999999997, "loss": 0.6297, "step": 63900 }, { "epoch": 0.64, "learning_rate": 0.0002853710526315789, "loss": 0.6151, "step": 63910 }, { "epoch": 0.64, "learning_rate": 0.00028529210526315787, "loss": 0.612, "step": 63920 }, { "epoch": 0.64, "learning_rate": 0.0002852131578947368, "loss": 0.6184, "step": 63930 }, { "epoch": 0.64, "learning_rate": 0.00028513421052631577, "loss": 0.6172, "step": 63940 }, { "epoch": 0.64, "learning_rate": 0.0002850552631578947, "loss": 0.6244, "step": 63950 }, { "epoch": 0.64, "learning_rate": 0.00028497631578947366, "loss": 0.6144, "step": 63960 }, { "epoch": 0.64, "learning_rate": 0.0002848973684210526, "loss": 0.6161, "step": 63970 }, { "epoch": 0.64, "learning_rate": 0.00028481842105263156, "loss": 0.6147, "step": 63980 }, { "epoch": 0.64, "learning_rate": 0.0002847394736842105, "loss": 0.6128, "step": 63990 }, { "epoch": 0.64, "learning_rate": 0.00028466052631578946, "loss": 0.6253, "step": 64000 }, { "epoch": 0.64, "learning_rate": 0.0002845815789473684, "loss": 0.6227, "step": 64010 }, { "epoch": 0.64, "learning_rate": 0.00028450263157894735, "loss": 0.6294, "step": 64020 }, { "epoch": 0.64, "learning_rate": 0.0002844236842105263, "loss": 0.6138, "step": 64030 }, { "epoch": 0.64, "learning_rate": 0.00028434473684210525, "loss": 0.6185, "step": 64040 }, { "epoch": 0.64, "learning_rate": 0.00028426578947368414, "loss": 0.6142, "step": 64050 }, { "epoch": 0.64, "learning_rate": 0.00028418684210526315, "loss": 0.6274, "step": 64060 }, { "epoch": 0.64, "learning_rate": 0.0002841078947368421, "loss": 0.6248, "step": 64070 }, { "epoch": 0.64, "learning_rate": 0.00028402894736842104, "loss": 0.617, "step": 64080 }, { "epoch": 0.64, "learning_rate": 0.00028395, "loss": 0.6273, "step": 64090 }, { "epoch": 0.64, "learning_rate": 0.00028387105263157894, "loss": 0.6357, "step": 64100 }, { "epoch": 0.64, "learning_rate": 0.0002837921052631579, "loss": 0.6099, "step": 64110 }, { "epoch": 0.64, "learning_rate": 0.00028371315789473684, "loss": 0.6208, "step": 64120 }, { "epoch": 0.64, "learning_rate": 0.00028363421052631573, "loss": 0.6179, "step": 64130 }, { "epoch": 0.64, "learning_rate": 0.0002835552631578947, "loss": 0.6301, "step": 64140 }, { "epoch": 0.64, "learning_rate": 0.0002834763157894737, "loss": 0.6158, "step": 64150 }, { "epoch": 0.64, "learning_rate": 0.00028339736842105263, "loss": 0.627, "step": 64160 }, { "epoch": 0.64, "learning_rate": 0.0002833184210526316, "loss": 0.6273, "step": 64170 }, { "epoch": 0.64, "learning_rate": 0.0002832394736842105, "loss": 0.6132, "step": 64180 }, { "epoch": 0.64, "learning_rate": 0.0002831605263157895, "loss": 0.6196, "step": 64190 }, { "epoch": 0.64, "learning_rate": 0.00028308157894736837, "loss": 0.6202, "step": 64200 }, { "epoch": 0.64, "learning_rate": 0.0002830026315789473, "loss": 0.6246, "step": 64210 }, { "epoch": 0.64, "learning_rate": 0.00028292368421052626, "loss": 0.623, "step": 64220 }, { "epoch": 0.64, "learning_rate": 0.0002828447368421052, "loss": 0.6155, "step": 64230 }, { "epoch": 0.64, "learning_rate": 0.0002827657894736842, "loss": 0.6124, "step": 64240 }, { "epoch": 0.64, "learning_rate": 0.00028268684210526316, "loss": 0.6046, "step": 64250 }, { "epoch": 0.64, "learning_rate": 0.0002826078947368421, "loss": 0.6129, "step": 64260 }, { "epoch": 0.64, "learning_rate": 0.00028252894736842106, "loss": 0.6272, "step": 64270 }, { "epoch": 0.64, "learning_rate": 0.00028244999999999995, "loss": 0.6276, "step": 64280 }, { "epoch": 0.64, "learning_rate": 0.0002823710526315789, "loss": 0.6191, "step": 64290 }, { "epoch": 0.64, "learning_rate": 0.00028229210526315785, "loss": 0.5869, "step": 64300 }, { "epoch": 0.64, "learning_rate": 0.0002822131578947368, "loss": 0.6192, "step": 64310 }, { "epoch": 0.64, "learning_rate": 0.00028213421052631575, "loss": 0.6201, "step": 64320 }, { "epoch": 0.64, "learning_rate": 0.00028205526315789475, "loss": 0.603, "step": 64330 }, { "epoch": 0.64, "learning_rate": 0.0002819763157894737, "loss": 0.586, "step": 64340 }, { "epoch": 0.64, "learning_rate": 0.0002818973684210526, "loss": 0.6202, "step": 64350 }, { "epoch": 0.64, "learning_rate": 0.00028181842105263154, "loss": 0.6076, "step": 64360 }, { "epoch": 0.64, "learning_rate": 0.0002817394736842105, "loss": 0.6187, "step": 64370 }, { "epoch": 0.64, "learning_rate": 0.00028166052631578944, "loss": 0.6214, "step": 64380 }, { "epoch": 0.64, "learning_rate": 0.0002815815789473684, "loss": 0.6157, "step": 64390 }, { "epoch": 0.64, "learning_rate": 0.00028150263157894733, "loss": 0.6171, "step": 64400 }, { "epoch": 0.64, "learning_rate": 0.0002814236842105263, "loss": 0.6168, "step": 64410 }, { "epoch": 0.64, "learning_rate": 0.00028134473684210523, "loss": 0.6155, "step": 64420 }, { "epoch": 0.64, "learning_rate": 0.0002812657894736842, "loss": 0.6266, "step": 64430 }, { "epoch": 0.64, "learning_rate": 0.0002811868421052631, "loss": 0.6188, "step": 64440 }, { "epoch": 0.64, "learning_rate": 0.0002811078947368421, "loss": 0.6228, "step": 64450 }, { "epoch": 0.64, "learning_rate": 0.000281028947368421, "loss": 0.6205, "step": 64460 }, { "epoch": 0.64, "learning_rate": 0.00028094999999999997, "loss": 0.6278, "step": 64470 }, { "epoch": 0.64, "learning_rate": 0.0002808710526315789, "loss": 0.6114, "step": 64480 }, { "epoch": 0.64, "learning_rate": 0.00028079210526315787, "loss": 0.6115, "step": 64490 }, { "epoch": 0.65, "learning_rate": 0.0002807131578947368, "loss": 0.6262, "step": 64500 }, { "epoch": 0.65, "learning_rate": 0.00028063421052631576, "loss": 0.6117, "step": 64510 }, { "epoch": 0.65, "learning_rate": 0.0002805552631578947, "loss": 0.6196, "step": 64520 }, { "epoch": 0.65, "learning_rate": 0.00028047631578947366, "loss": 0.6172, "step": 64530 }, { "epoch": 0.65, "learning_rate": 0.0002803973684210526, "loss": 0.6063, "step": 64540 }, { "epoch": 0.65, "learning_rate": 0.00028031842105263156, "loss": 0.6109, "step": 64550 }, { "epoch": 0.65, "learning_rate": 0.0002802394736842105, "loss": 0.6074, "step": 64560 }, { "epoch": 0.65, "learning_rate": 0.00028016052631578945, "loss": 0.6105, "step": 64570 }, { "epoch": 0.65, "learning_rate": 0.0002800815789473684, "loss": 0.6199, "step": 64580 }, { "epoch": 0.65, "learning_rate": 0.00028000263157894735, "loss": 0.6157, "step": 64590 }, { "epoch": 0.65, "learning_rate": 0.0002799236842105263, "loss": 0.6174, "step": 64600 }, { "epoch": 0.65, "learning_rate": 0.00027984473684210525, "loss": 0.6083, "step": 64610 }, { "epoch": 0.65, "learning_rate": 0.0002797657894736842, "loss": 0.6119, "step": 64620 }, { "epoch": 0.65, "learning_rate": 0.00027968684210526314, "loss": 0.6272, "step": 64630 }, { "epoch": 0.65, "learning_rate": 0.0002796078947368421, "loss": 0.6078, "step": 64640 }, { "epoch": 0.65, "learning_rate": 0.00027952894736842104, "loss": 0.6267, "step": 64650 }, { "epoch": 0.65, "learning_rate": 0.00027945, "loss": 0.622, "step": 64660 }, { "epoch": 0.65, "learning_rate": 0.00027937105263157894, "loss": 0.6205, "step": 64670 }, { "epoch": 0.65, "learning_rate": 0.0002792921052631579, "loss": 0.6193, "step": 64680 }, { "epoch": 0.65, "learning_rate": 0.0002792131578947368, "loss": 0.6246, "step": 64690 }, { "epoch": 0.65, "learning_rate": 0.00027913421052631573, "loss": 0.627, "step": 64700 }, { "epoch": 0.65, "learning_rate": 0.00027905526315789473, "loss": 0.627, "step": 64710 }, { "epoch": 0.65, "learning_rate": 0.0002789763157894737, "loss": 0.6091, "step": 64720 }, { "epoch": 0.65, "learning_rate": 0.00027889736842105263, "loss": 0.6127, "step": 64730 }, { "epoch": 0.65, "learning_rate": 0.0002788184210526316, "loss": 0.6177, "step": 64740 }, { "epoch": 0.65, "learning_rate": 0.0002787394736842105, "loss": 0.6183, "step": 64750 }, { "epoch": 0.65, "learning_rate": 0.00027866052631578947, "loss": 0.6197, "step": 64760 }, { "epoch": 0.65, "learning_rate": 0.00027858157894736837, "loss": 0.6205, "step": 64770 }, { "epoch": 0.65, "learning_rate": 0.0002785026315789473, "loss": 0.6174, "step": 64780 }, { "epoch": 0.65, "learning_rate": 0.00027842368421052626, "loss": 0.621, "step": 64790 }, { "epoch": 0.65, "learning_rate": 0.00027834473684210527, "loss": 0.6157, "step": 64800 }, { "epoch": 0.65, "learning_rate": 0.0002782657894736842, "loss": 0.6121, "step": 64810 }, { "epoch": 0.65, "learning_rate": 0.00027818684210526316, "loss": 0.6165, "step": 64820 }, { "epoch": 0.65, "learning_rate": 0.0002781078947368421, "loss": 0.6229, "step": 64830 }, { "epoch": 0.65, "learning_rate": 0.000278028947368421, "loss": 0.6208, "step": 64840 }, { "epoch": 0.65, "learning_rate": 0.00027794999999999995, "loss": 0.6162, "step": 64850 }, { "epoch": 0.65, "learning_rate": 0.0002778710526315789, "loss": 0.6046, "step": 64860 }, { "epoch": 0.65, "learning_rate": 0.00027779210526315785, "loss": 0.6047, "step": 64870 }, { "epoch": 0.65, "learning_rate": 0.0002777131578947368, "loss": 0.6118, "step": 64880 }, { "epoch": 0.65, "learning_rate": 0.0002776342105263158, "loss": 0.6132, "step": 64890 }, { "epoch": 0.65, "learning_rate": 0.00027755526315789475, "loss": 0.6199, "step": 64900 }, { "epoch": 0.65, "learning_rate": 0.00027747631578947364, "loss": 0.6303, "step": 64910 }, { "epoch": 0.65, "learning_rate": 0.0002773973684210526, "loss": 0.6183, "step": 64920 }, { "epoch": 0.65, "learning_rate": 0.00027731842105263154, "loss": 0.612, "step": 64930 }, { "epoch": 0.65, "learning_rate": 0.0002772394736842105, "loss": 0.6118, "step": 64940 }, { "epoch": 0.65, "learning_rate": 0.00027716052631578944, "loss": 0.6252, "step": 64950 }, { "epoch": 0.65, "learning_rate": 0.0002770815789473684, "loss": 0.6116, "step": 64960 }, { "epoch": 0.65, "learning_rate": 0.00027700263157894733, "loss": 0.6268, "step": 64970 }, { "epoch": 0.65, "learning_rate": 0.0002769236842105263, "loss": 0.6126, "step": 64980 }, { "epoch": 0.65, "learning_rate": 0.00027685263157894736, "loss": 0.6084, "step": 64990 }, { "epoch": 0.65, "learning_rate": 0.0002767736842105263, "loss": 0.6029, "step": 65000 }, { "epoch": 0.65, "eval_accuracy": 0.8712975328502192, "eval_loss": 0.59521484375, "eval_runtime": 97.298, "eval_samples_per_second": 822.217, "eval_steps_per_second": 1.614, "step": 65000 }, { "epoch": 0.65, "learning_rate": 0.00027669473684210525, "loss": 0.6275, "step": 65010 }, { "epoch": 0.65, "learning_rate": 0.0002766157894736842, "loss": 0.6172, "step": 65020 }, { "epoch": 0.65, "learning_rate": 0.00027653684210526315, "loss": 0.637, "step": 65030 }, { "epoch": 0.65, "learning_rate": 0.00027645789473684204, "loss": 0.6185, "step": 65040 }, { "epoch": 0.65, "learning_rate": 0.00027637894736842105, "loss": 0.6182, "step": 65050 }, { "epoch": 0.65, "learning_rate": 0.0002763, "loss": 0.6237, "step": 65060 }, { "epoch": 0.65, "learning_rate": 0.00027622105263157894, "loss": 0.6106, "step": 65070 }, { "epoch": 0.65, "learning_rate": 0.0002761421052631579, "loss": 0.6254, "step": 65080 }, { "epoch": 0.65, "learning_rate": 0.00027606315789473684, "loss": 0.6164, "step": 65090 }, { "epoch": 0.65, "learning_rate": 0.0002759842105263158, "loss": 0.6195, "step": 65100 }, { "epoch": 0.65, "learning_rate": 0.0002759131578947368, "loss": 0.6106, "step": 65110 }, { "epoch": 0.65, "learning_rate": 0.00027583421052631576, "loss": 0.6329, "step": 65120 }, { "epoch": 0.65, "learning_rate": 0.0002757552631578947, "loss": 0.6166, "step": 65130 }, { "epoch": 0.65, "learning_rate": 0.00027567631578947365, "loss": 0.6229, "step": 65140 }, { "epoch": 0.65, "learning_rate": 0.0002755973684210526, "loss": 0.6177, "step": 65150 }, { "epoch": 0.65, "learning_rate": 0.00027551842105263155, "loss": 0.629, "step": 65160 }, { "epoch": 0.65, "learning_rate": 0.0002754394736842105, "loss": 0.6317, "step": 65170 }, { "epoch": 0.65, "learning_rate": 0.00027536052631578945, "loss": 0.6117, "step": 65180 }, { "epoch": 0.65, "learning_rate": 0.0002752815789473684, "loss": 0.6122, "step": 65190 }, { "epoch": 0.65, "learning_rate": 0.00027520263157894734, "loss": 0.6091, "step": 65200 }, { "epoch": 0.65, "learning_rate": 0.0002751236842105263, "loss": 0.6138, "step": 65210 }, { "epoch": 0.65, "learning_rate": 0.00027504473684210524, "loss": 0.6181, "step": 65220 }, { "epoch": 0.65, "learning_rate": 0.0002749657894736842, "loss": 0.6213, "step": 65230 }, { "epoch": 0.65, "learning_rate": 0.00027488684210526314, "loss": 0.6174, "step": 65240 }, { "epoch": 0.65, "learning_rate": 0.0002748078947368421, "loss": 0.6162, "step": 65250 }, { "epoch": 0.65, "learning_rate": 0.00027472894736842103, "loss": 0.6247, "step": 65260 }, { "epoch": 0.65, "learning_rate": 0.00027465, "loss": 0.6168, "step": 65270 }, { "epoch": 0.65, "learning_rate": 0.00027457105263157893, "loss": 0.6246, "step": 65280 }, { "epoch": 0.65, "learning_rate": 0.0002744921052631579, "loss": 0.6223, "step": 65290 }, { "epoch": 0.65, "learning_rate": 0.0002744131578947368, "loss": 0.6196, "step": 65300 }, { "epoch": 0.65, "learning_rate": 0.0002743342105263158, "loss": 0.6196, "step": 65310 }, { "epoch": 0.65, "learning_rate": 0.0002742552631578947, "loss": 0.6128, "step": 65320 }, { "epoch": 0.65, "learning_rate": 0.00027417631578947367, "loss": 0.6262, "step": 65330 }, { "epoch": 0.65, "learning_rate": 0.0002740973684210526, "loss": 0.6173, "step": 65340 }, { "epoch": 0.65, "learning_rate": 0.00027401842105263157, "loss": 0.6169, "step": 65350 }, { "epoch": 0.65, "learning_rate": 0.0002739394736842105, "loss": 0.6241, "step": 65360 }, { "epoch": 0.65, "learning_rate": 0.00027386052631578946, "loss": 0.6195, "step": 65370 }, { "epoch": 0.65, "learning_rate": 0.00027378157894736836, "loss": 0.6215, "step": 65380 }, { "epoch": 0.65, "learning_rate": 0.0002737026315789473, "loss": 0.62, "step": 65390 }, { "epoch": 0.65, "learning_rate": 0.0002736236842105263, "loss": 0.6165, "step": 65400 }, { "epoch": 0.65, "learning_rate": 0.00027354473684210526, "loss": 0.6089, "step": 65410 }, { "epoch": 0.65, "learning_rate": 0.0002734657894736842, "loss": 0.6126, "step": 65420 }, { "epoch": 0.65, "learning_rate": 0.00027338684210526315, "loss": 0.6049, "step": 65430 }, { "epoch": 0.65, "learning_rate": 0.0002733078947368421, "loss": 0.6083, "step": 65440 }, { "epoch": 0.65, "learning_rate": 0.00027322894736842105, "loss": 0.6204, "step": 65450 }, { "epoch": 0.65, "learning_rate": 0.00027314999999999994, "loss": 0.6178, "step": 65460 }, { "epoch": 0.65, "learning_rate": 0.0002730710526315789, "loss": 0.6159, "step": 65470 }, { "epoch": 0.65, "learning_rate": 0.00027299210526315784, "loss": 0.6279, "step": 65480 }, { "epoch": 0.65, "learning_rate": 0.00027291315789473684, "loss": 0.6057, "step": 65490 }, { "epoch": 0.66, "learning_rate": 0.0002728342105263158, "loss": 0.6107, "step": 65500 }, { "epoch": 0.66, "learning_rate": 0.00027275526315789474, "loss": 0.6107, "step": 65510 }, { "epoch": 0.66, "learning_rate": 0.0002726763157894737, "loss": 0.6217, "step": 65520 }, { "epoch": 0.66, "learning_rate": 0.0002725973684210526, "loss": 0.6232, "step": 65530 }, { "epoch": 0.66, "learning_rate": 0.00027251842105263153, "loss": 0.6323, "step": 65540 }, { "epoch": 0.66, "learning_rate": 0.0002724394736842105, "loss": 0.6193, "step": 65550 }, { "epoch": 0.66, "learning_rate": 0.00027236052631578943, "loss": 0.6173, "step": 65560 }, { "epoch": 0.66, "learning_rate": 0.0002722815789473684, "loss": 0.6192, "step": 65570 }, { "epoch": 0.66, "learning_rate": 0.0002722026315789474, "loss": 0.6125, "step": 65580 }, { "epoch": 0.66, "learning_rate": 0.0002721236842105263, "loss": 0.6193, "step": 65590 }, { "epoch": 0.66, "learning_rate": 0.0002720447368421053, "loss": 0.6226, "step": 65600 }, { "epoch": 0.66, "learning_rate": 0.00027196578947368417, "loss": 0.6216, "step": 65610 }, { "epoch": 0.66, "learning_rate": 0.0002718868421052631, "loss": 0.614, "step": 65620 }, { "epoch": 0.66, "learning_rate": 0.00027180789473684207, "loss": 0.6163, "step": 65630 }, { "epoch": 0.66, "learning_rate": 0.000271728947368421, "loss": 0.622, "step": 65640 }, { "epoch": 0.66, "learning_rate": 0.00027164999999999996, "loss": 0.6128, "step": 65650 }, { "epoch": 0.66, "learning_rate": 0.0002715710526315789, "loss": 0.6167, "step": 65660 }, { "epoch": 0.66, "learning_rate": 0.00027149210526315786, "loss": 0.6153, "step": 65670 }, { "epoch": 0.66, "learning_rate": 0.0002714131578947368, "loss": 0.6106, "step": 65680 }, { "epoch": 0.66, "learning_rate": 0.00027133421052631576, "loss": 0.6077, "step": 65690 }, { "epoch": 0.66, "learning_rate": 0.0002712552631578947, "loss": 0.618, "step": 65700 }, { "epoch": 0.66, "learning_rate": 0.00027117631578947365, "loss": 0.6081, "step": 65710 }, { "epoch": 0.66, "learning_rate": 0.0002710973684210526, "loss": 0.627, "step": 65720 }, { "epoch": 0.66, "learning_rate": 0.00027101842105263155, "loss": 0.6084, "step": 65730 }, { "epoch": 0.66, "learning_rate": 0.0002709394736842105, "loss": 0.6128, "step": 65740 }, { "epoch": 0.66, "learning_rate": 0.00027086052631578945, "loss": 0.6034, "step": 65750 }, { "epoch": 0.66, "learning_rate": 0.0002707815789473684, "loss": 0.6198, "step": 65760 }, { "epoch": 0.66, "learning_rate": 0.00027070263157894734, "loss": 0.6161, "step": 65770 }, { "epoch": 0.66, "learning_rate": 0.0002706236842105263, "loss": 0.6204, "step": 65780 }, { "epoch": 0.66, "learning_rate": 0.00027054473684210524, "loss": 0.6147, "step": 65790 }, { "epoch": 0.66, "learning_rate": 0.0002704657894736842, "loss": 0.6048, "step": 65800 }, { "epoch": 0.66, "learning_rate": 0.00027038684210526314, "loss": 0.6058, "step": 65810 }, { "epoch": 0.66, "learning_rate": 0.0002703078947368421, "loss": 0.6189, "step": 65820 }, { "epoch": 0.66, "learning_rate": 0.00027022894736842103, "loss": 0.6136, "step": 65830 }, { "epoch": 0.66, "learning_rate": 0.00027015, "loss": 0.6153, "step": 65840 }, { "epoch": 0.66, "learning_rate": 0.00027007105263157893, "loss": 0.6257, "step": 65850 }, { "epoch": 0.66, "learning_rate": 0.0002699921052631579, "loss": 0.6279, "step": 65860 }, { "epoch": 0.66, "learning_rate": 0.0002699131578947368, "loss": 0.6163, "step": 65870 }, { "epoch": 0.66, "learning_rate": 0.0002698342105263158, "loss": 0.6127, "step": 65880 }, { "epoch": 0.66, "learning_rate": 0.0002697552631578947, "loss": 0.6032, "step": 65890 }, { "epoch": 0.66, "learning_rate": 0.00026967631578947367, "loss": 0.6238, "step": 65900 }, { "epoch": 0.66, "learning_rate": 0.0002695973684210526, "loss": 0.6083, "step": 65910 }, { "epoch": 0.66, "learning_rate": 0.00026951842105263157, "loss": 0.6041, "step": 65920 }, { "epoch": 0.66, "learning_rate": 0.0002694394736842105, "loss": 0.6112, "step": 65930 }, { "epoch": 0.66, "learning_rate": 0.00026936052631578946, "loss": 0.6085, "step": 65940 }, { "epoch": 0.66, "learning_rate": 0.00026928157894736836, "loss": 0.6115, "step": 65950 }, { "epoch": 0.66, "learning_rate": 0.00026920263157894736, "loss": 0.6172, "step": 65960 }, { "epoch": 0.66, "learning_rate": 0.0002691236842105263, "loss": 0.6136, "step": 65970 }, { "epoch": 0.66, "learning_rate": 0.00026904473684210526, "loss": 0.6114, "step": 65980 }, { "epoch": 0.66, "learning_rate": 0.0002689657894736842, "loss": 0.606, "step": 65990 }, { "epoch": 0.66, "learning_rate": 0.00026888684210526315, "loss": 0.6049, "step": 66000 }, { "epoch": 0.66, "learning_rate": 0.0002688078947368421, "loss": 0.6122, "step": 66010 }, { "epoch": 0.66, "learning_rate": 0.000268728947368421, "loss": 0.613, "step": 66020 }, { "epoch": 0.66, "learning_rate": 0.00026864999999999994, "loss": 0.6181, "step": 66030 }, { "epoch": 0.66, "learning_rate": 0.0002685710526315789, "loss": 0.6076, "step": 66040 }, { "epoch": 0.66, "learning_rate": 0.0002684921052631579, "loss": 0.6094, "step": 66050 }, { "epoch": 0.66, "learning_rate": 0.00026841315789473684, "loss": 0.6061, "step": 66060 }, { "epoch": 0.66, "learning_rate": 0.0002683342105263158, "loss": 0.6089, "step": 66070 }, { "epoch": 0.66, "learning_rate": 0.00026825526315789474, "loss": 0.6199, "step": 66080 }, { "epoch": 0.66, "learning_rate": 0.0002681763157894737, "loss": 0.6147, "step": 66090 }, { "epoch": 0.66, "learning_rate": 0.0002680973684210526, "loss": 0.6188, "step": 66100 }, { "epoch": 0.66, "learning_rate": 0.00026801842105263153, "loss": 0.6109, "step": 66110 }, { "epoch": 0.66, "learning_rate": 0.0002679394736842105, "loss": 0.6093, "step": 66120 }, { "epoch": 0.66, "learning_rate": 0.0002678605263157894, "loss": 0.617, "step": 66130 }, { "epoch": 0.66, "learning_rate": 0.00026778157894736843, "loss": 0.6152, "step": 66140 }, { "epoch": 0.66, "learning_rate": 0.0002677026315789474, "loss": 0.6077, "step": 66150 }, { "epoch": 0.66, "learning_rate": 0.0002676236842105263, "loss": 0.6152, "step": 66160 }, { "epoch": 0.66, "learning_rate": 0.0002675447368421052, "loss": 0.6165, "step": 66170 }, { "epoch": 0.66, "learning_rate": 0.00026746578947368417, "loss": 0.6205, "step": 66180 }, { "epoch": 0.66, "learning_rate": 0.0002673868421052631, "loss": 0.6121, "step": 66190 }, { "epoch": 0.66, "learning_rate": 0.00026730789473684206, "loss": 0.6138, "step": 66200 }, { "epoch": 0.66, "learning_rate": 0.000267228947368421, "loss": 0.6088, "step": 66210 }, { "epoch": 0.66, "learning_rate": 0.00026714999999999996, "loss": 0.6114, "step": 66220 }, { "epoch": 0.66, "learning_rate": 0.0002670710526315789, "loss": 0.6249, "step": 66230 }, { "epoch": 0.66, "learning_rate": 0.00026699210526315786, "loss": 0.6134, "step": 66240 }, { "epoch": 0.66, "learning_rate": 0.0002669131578947368, "loss": 0.6137, "step": 66250 }, { "epoch": 0.66, "learning_rate": 0.00026683421052631575, "loss": 0.6173, "step": 66260 }, { "epoch": 0.66, "learning_rate": 0.0002667552631578947, "loss": 0.6057, "step": 66270 }, { "epoch": 0.66, "learning_rate": 0.00026667631578947365, "loss": 0.6091, "step": 66280 }, { "epoch": 0.66, "learning_rate": 0.0002665973684210526, "loss": 0.6195, "step": 66290 }, { "epoch": 0.66, "learning_rate": 0.00026651842105263155, "loss": 0.6172, "step": 66300 }, { "epoch": 0.66, "learning_rate": 0.0002664394736842105, "loss": 0.6113, "step": 66310 }, { "epoch": 0.66, "learning_rate": 0.00026636052631578944, "loss": 0.6148, "step": 66320 }, { "epoch": 0.66, "learning_rate": 0.0002662815789473684, "loss": 0.605, "step": 66330 }, { "epoch": 0.66, "learning_rate": 0.00026620263157894734, "loss": 0.6161, "step": 66340 }, { "epoch": 0.66, "learning_rate": 0.0002661236842105263, "loss": 0.6172, "step": 66350 }, { "epoch": 0.66, "learning_rate": 0.00026604473684210524, "loss": 0.6171, "step": 66360 }, { "epoch": 0.66, "learning_rate": 0.0002659657894736842, "loss": 0.609, "step": 66370 }, { "epoch": 0.66, "learning_rate": 0.00026588684210526313, "loss": 0.6061, "step": 66380 }, { "epoch": 0.66, "learning_rate": 0.0002658078947368421, "loss": 0.5975, "step": 66390 }, { "epoch": 0.66, "learning_rate": 0.00026572894736842103, "loss": 0.6149, "step": 66400 }, { "epoch": 0.66, "learning_rate": 0.00026565, "loss": 0.623, "step": 66410 }, { "epoch": 0.66, "learning_rate": 0.00026557105263157893, "loss": 0.607, "step": 66420 }, { "epoch": 0.66, "learning_rate": 0.0002654921052631579, "loss": 0.6125, "step": 66430 }, { "epoch": 0.66, "learning_rate": 0.0002654131578947368, "loss": 0.6125, "step": 66440 }, { "epoch": 0.66, "learning_rate": 0.00026533421052631577, "loss": 0.5879, "step": 66450 }, { "epoch": 0.66, "learning_rate": 0.0002652552631578947, "loss": 0.6151, "step": 66460 }, { "epoch": 0.66, "learning_rate": 0.00026517631578947367, "loss": 0.6237, "step": 66470 }, { "epoch": 0.66, "learning_rate": 0.0002650973684210526, "loss": 0.6168, "step": 66480 }, { "epoch": 0.66, "learning_rate": 0.00026501842105263157, "loss": 0.6005, "step": 66490 }, { "epoch": 0.67, "learning_rate": 0.0002649394736842105, "loss": 0.6086, "step": 66500 }, { "epoch": 0.67, "learning_rate": 0.0002648605263157894, "loss": 0.6099, "step": 66510 }, { "epoch": 0.67, "learning_rate": 0.0002647815789473684, "loss": 0.6189, "step": 66520 }, { "epoch": 0.67, "learning_rate": 0.00026470263157894736, "loss": 0.6127, "step": 66530 }, { "epoch": 0.67, "learning_rate": 0.0002646236842105263, "loss": 0.6078, "step": 66540 }, { "epoch": 0.67, "learning_rate": 0.00026454473684210526, "loss": 0.6057, "step": 66550 }, { "epoch": 0.67, "learning_rate": 0.0002644657894736842, "loss": 0.6142, "step": 66560 }, { "epoch": 0.67, "learning_rate": 0.00026438684210526315, "loss": 0.6087, "step": 66570 }, { "epoch": 0.67, "learning_rate": 0.0002643078947368421, "loss": 0.6079, "step": 66580 }, { "epoch": 0.67, "learning_rate": 0.000264228947368421, "loss": 0.5868, "step": 66590 }, { "epoch": 0.67, "learning_rate": 0.00026414999999999994, "loss": 0.5994, "step": 66600 }, { "epoch": 0.67, "learning_rate": 0.00026407105263157895, "loss": 0.5836, "step": 66610 }, { "epoch": 0.67, "learning_rate": 0.0002639921052631579, "loss": 0.6064, "step": 66620 }, { "epoch": 0.67, "learning_rate": 0.0002639210526315789, "loss": 0.6007, "step": 66630 }, { "epoch": 0.67, "learning_rate": 0.00026384210526315786, "loss": 0.5975, "step": 66640 }, { "epoch": 0.67, "learning_rate": 0.0002637631578947368, "loss": 0.5916, "step": 66650 }, { "epoch": 0.67, "learning_rate": 0.00026368421052631576, "loss": 0.6027, "step": 66660 }, { "epoch": 0.67, "learning_rate": 0.0002636052631578947, "loss": 0.6034, "step": 66670 }, { "epoch": 0.67, "learning_rate": 0.00026352631578947366, "loss": 0.6089, "step": 66680 }, { "epoch": 0.67, "learning_rate": 0.0002634473684210526, "loss": 0.5924, "step": 66690 }, { "epoch": 0.67, "learning_rate": 0.00026336842105263155, "loss": 0.5966, "step": 66700 }, { "epoch": 0.67, "learning_rate": 0.0002632894736842105, "loss": 0.59, "step": 66710 }, { "epoch": 0.67, "learning_rate": 0.00026321052631578945, "loss": 0.5982, "step": 66720 }, { "epoch": 0.67, "learning_rate": 0.0002631315789473684, "loss": 0.5976, "step": 66730 }, { "epoch": 0.67, "learning_rate": 0.00026305263157894735, "loss": 0.608, "step": 66740 }, { "epoch": 0.67, "learning_rate": 0.0002629736842105263, "loss": 0.6017, "step": 66750 }, { "epoch": 0.67, "learning_rate": 0.00026289473684210524, "loss": 0.5987, "step": 66760 }, { "epoch": 0.67, "learning_rate": 0.0002628157894736842, "loss": 0.5861, "step": 66770 }, { "epoch": 0.67, "learning_rate": 0.00026273684210526314, "loss": 0.5971, "step": 66780 }, { "epoch": 0.67, "learning_rate": 0.0002626578947368421, "loss": 0.5958, "step": 66790 }, { "epoch": 0.67, "learning_rate": 0.00026257894736842104, "loss": 0.596, "step": 66800 }, { "epoch": 0.67, "learning_rate": 0.0002625, "loss": 0.5842, "step": 66810 }, { "epoch": 0.67, "learning_rate": 0.00026242105263157893, "loss": 0.5935, "step": 66820 }, { "epoch": 0.67, "learning_rate": 0.0002623421052631579, "loss": 0.5913, "step": 66830 }, { "epoch": 0.67, "learning_rate": 0.00026226315789473683, "loss": 0.6111, "step": 66840 }, { "epoch": 0.67, "learning_rate": 0.0002621842105263158, "loss": 0.6131, "step": 66850 }, { "epoch": 0.67, "learning_rate": 0.0002621052631578947, "loss": 0.6299, "step": 66860 }, { "epoch": 0.67, "learning_rate": 0.0002620263157894737, "loss": 0.6226, "step": 66870 }, { "epoch": 0.67, "learning_rate": 0.0002619473684210526, "loss": 0.5992, "step": 66880 }, { "epoch": 0.67, "learning_rate": 0.00026186842105263157, "loss": 0.6188, "step": 66890 }, { "epoch": 0.67, "learning_rate": 0.0002617894736842105, "loss": 0.6228, "step": 66900 }, { "epoch": 0.67, "learning_rate": 0.00026171052631578947, "loss": 0.6228, "step": 66910 }, { "epoch": 0.67, "learning_rate": 0.0002616315789473684, "loss": 0.622, "step": 66920 }, { "epoch": 0.67, "learning_rate": 0.0002615526315789473, "loss": 0.6124, "step": 66930 }, { "epoch": 0.67, "learning_rate": 0.00026147368421052626, "loss": 0.6236, "step": 66940 }, { "epoch": 0.67, "learning_rate": 0.00026139473684210526, "loss": 0.6076, "step": 66950 }, { "epoch": 0.67, "learning_rate": 0.0002613157894736842, "loss": 0.6119, "step": 66960 }, { "epoch": 0.67, "learning_rate": 0.00026123684210526316, "loss": 0.6109, "step": 66970 }, { "epoch": 0.67, "learning_rate": 0.0002611578947368421, "loss": 0.618, "step": 66980 }, { "epoch": 0.67, "learning_rate": 0.00026107894736842105, "loss": 0.6172, "step": 66990 }, { "epoch": 0.67, "learning_rate": 0.000261, "loss": 0.6186, "step": 67000 }, { "epoch": 0.67, "learning_rate": 0.0002609210526315789, "loss": 0.6134, "step": 67010 }, { "epoch": 0.67, "learning_rate": 0.00026084210526315784, "loss": 0.6014, "step": 67020 }, { "epoch": 0.67, "learning_rate": 0.0002607631578947368, "loss": 0.6121, "step": 67030 }, { "epoch": 0.67, "learning_rate": 0.00026068421052631574, "loss": 0.6131, "step": 67040 }, { "epoch": 0.67, "learning_rate": 0.00026060526315789474, "loss": 0.6212, "step": 67050 }, { "epoch": 0.67, "learning_rate": 0.0002605263157894737, "loss": 0.6212, "step": 67060 }, { "epoch": 0.67, "learning_rate": 0.00026044736842105264, "loss": 0.6098, "step": 67070 }, { "epoch": 0.67, "learning_rate": 0.00026036842105263153, "loss": 0.6035, "step": 67080 }, { "epoch": 0.67, "learning_rate": 0.0002602894736842105, "loss": 0.5951, "step": 67090 }, { "epoch": 0.67, "learning_rate": 0.00026021052631578943, "loss": 0.5969, "step": 67100 }, { "epoch": 0.67, "learning_rate": 0.0002601315789473684, "loss": 0.5876, "step": 67110 }, { "epoch": 0.67, "learning_rate": 0.0002600526315789473, "loss": 0.6134, "step": 67120 }, { "epoch": 0.67, "learning_rate": 0.0002599736842105263, "loss": 0.6154, "step": 67130 }, { "epoch": 0.67, "learning_rate": 0.0002598947368421053, "loss": 0.6118, "step": 67140 }, { "epoch": 0.67, "learning_rate": 0.0002598157894736842, "loss": 0.6091, "step": 67150 }, { "epoch": 0.67, "learning_rate": 0.0002597368421052631, "loss": 0.6192, "step": 67160 }, { "epoch": 0.67, "learning_rate": 0.00025965789473684207, "loss": 0.6088, "step": 67170 }, { "epoch": 0.67, "learning_rate": 0.000259578947368421, "loss": 0.6051, "step": 67180 }, { "epoch": 0.67, "learning_rate": 0.00025949999999999997, "loss": 0.6081, "step": 67190 }, { "epoch": 0.67, "learning_rate": 0.0002594210526315789, "loss": 0.6192, "step": 67200 }, { "epoch": 0.67, "learning_rate": 0.00025934210526315786, "loss": 0.6138, "step": 67210 }, { "epoch": 0.67, "learning_rate": 0.0002592631578947368, "loss": 0.6073, "step": 67220 }, { "epoch": 0.67, "learning_rate": 0.00025918421052631576, "loss": 0.6092, "step": 67230 }, { "epoch": 0.67, "learning_rate": 0.0002591052631578947, "loss": 0.6033, "step": 67240 }, { "epoch": 0.67, "learning_rate": 0.00025902631578947365, "loss": 0.5994, "step": 67250 }, { "epoch": 0.67, "learning_rate": 0.0002589473684210526, "loss": 0.6172, "step": 67260 }, { "epoch": 0.67, "learning_rate": 0.00025886842105263155, "loss": 0.6103, "step": 67270 }, { "epoch": 0.67, "learning_rate": 0.0002587894736842105, "loss": 0.6128, "step": 67280 }, { "epoch": 0.67, "learning_rate": 0.00025871052631578945, "loss": 0.6112, "step": 67290 }, { "epoch": 0.67, "learning_rate": 0.0002586315789473684, "loss": 0.6225, "step": 67300 }, { "epoch": 0.67, "learning_rate": 0.00025855263157894734, "loss": 0.6106, "step": 67310 }, { "epoch": 0.67, "learning_rate": 0.0002584736842105263, "loss": 0.6076, "step": 67320 }, { "epoch": 0.67, "learning_rate": 0.00025839473684210524, "loss": 0.61, "step": 67330 }, { "epoch": 0.67, "learning_rate": 0.0002583157894736842, "loss": 0.6145, "step": 67340 }, { "epoch": 0.67, "learning_rate": 0.00025823684210526314, "loss": 0.5988, "step": 67350 }, { "epoch": 0.67, "learning_rate": 0.0002581578947368421, "loss": 0.6034, "step": 67360 }, { "epoch": 0.67, "learning_rate": 0.00025807894736842103, "loss": 0.6168, "step": 67370 }, { "epoch": 0.67, "learning_rate": 0.000258, "loss": 0.6039, "step": 67380 }, { "epoch": 0.67, "learning_rate": 0.00025792105263157893, "loss": 0.6013, "step": 67390 }, { "epoch": 0.67, "learning_rate": 0.0002578421052631579, "loss": 0.6092, "step": 67400 }, { "epoch": 0.67, "learning_rate": 0.00025776315789473683, "loss": 0.6197, "step": 67410 }, { "epoch": 0.67, "learning_rate": 0.0002576842105263158, "loss": 0.6197, "step": 67420 }, { "epoch": 0.67, "learning_rate": 0.0002576052631578947, "loss": 0.6105, "step": 67430 }, { "epoch": 0.67, "learning_rate": 0.00025752631578947367, "loss": 0.6155, "step": 67440 }, { "epoch": 0.67, "learning_rate": 0.0002574473684210526, "loss": 0.6026, "step": 67450 }, { "epoch": 0.67, "learning_rate": 0.00025736842105263157, "loss": 0.6058, "step": 67460 }, { "epoch": 0.67, "learning_rate": 0.0002572894736842105, "loss": 0.6007, "step": 67470 }, { "epoch": 0.67, "learning_rate": 0.00025721052631578947, "loss": 0.6056, "step": 67480 }, { "epoch": 0.67, "learning_rate": 0.0002571315789473684, "loss": 0.6256, "step": 67490 }, { "epoch": 0.68, "learning_rate": 0.0002570526315789473, "loss": 0.615, "step": 67500 }, { "epoch": 0.68, "eval_accuracy": 0.8707361782979045, "eval_loss": 0.5947265625, "eval_runtime": 97.3705, "eval_samples_per_second": 821.604, "eval_steps_per_second": 1.612, "step": 67500 }, { "epoch": 0.68, "learning_rate": 0.0002569736842105263, "loss": 0.6038, "step": 67510 }, { "epoch": 0.68, "learning_rate": 0.00025689473684210526, "loss": 0.6058, "step": 67520 }, { "epoch": 0.68, "learning_rate": 0.0002568157894736842, "loss": 0.6052, "step": 67530 }, { "epoch": 0.68, "learning_rate": 0.00025673684210526316, "loss": 0.6114, "step": 67540 }, { "epoch": 0.68, "learning_rate": 0.0002566578947368421, "loss": 0.606, "step": 67550 }, { "epoch": 0.68, "learning_rate": 0.00025657894736842105, "loss": 0.6142, "step": 67560 }, { "epoch": 0.68, "learning_rate": 0.00025649999999999995, "loss": 0.6073, "step": 67570 }, { "epoch": 0.68, "learning_rate": 0.0002564210526315789, "loss": 0.6219, "step": 67580 }, { "epoch": 0.68, "learning_rate": 0.00025634210526315784, "loss": 0.6162, "step": 67590 }, { "epoch": 0.68, "learning_rate": 0.0002562631578947368, "loss": 0.6199, "step": 67600 }, { "epoch": 0.68, "learning_rate": 0.0002561842105263158, "loss": 0.6224, "step": 67610 }, { "epoch": 0.68, "learning_rate": 0.00025610526315789474, "loss": 0.6154, "step": 67620 }, { "epoch": 0.68, "learning_rate": 0.0002560263157894737, "loss": 0.6133, "step": 67630 }, { "epoch": 0.68, "learning_rate": 0.00025594736842105264, "loss": 0.6166, "step": 67640 }, { "epoch": 0.68, "learning_rate": 0.00025586842105263153, "loss": 0.6177, "step": 67650 }, { "epoch": 0.68, "learning_rate": 0.0002557894736842105, "loss": 0.607, "step": 67660 }, { "epoch": 0.68, "learning_rate": 0.00025571052631578943, "loss": 0.6188, "step": 67670 }, { "epoch": 0.68, "learning_rate": 0.0002556315789473684, "loss": 0.6005, "step": 67680 }, { "epoch": 0.68, "learning_rate": 0.0002555526315789473, "loss": 0.6192, "step": 67690 }, { "epoch": 0.68, "learning_rate": 0.00025547368421052633, "loss": 0.614, "step": 67700 }, { "epoch": 0.68, "learning_rate": 0.0002553947368421053, "loss": 0.6191, "step": 67710 }, { "epoch": 0.68, "learning_rate": 0.00025531578947368417, "loss": 0.6169, "step": 67720 }, { "epoch": 0.68, "learning_rate": 0.0002552368421052631, "loss": 0.6028, "step": 67730 }, { "epoch": 0.68, "learning_rate": 0.00025515789473684207, "loss": 0.6043, "step": 67740 }, { "epoch": 0.68, "learning_rate": 0.000255078947368421, "loss": 0.6143, "step": 67750 }, { "epoch": 0.68, "learning_rate": 0.00025499999999999996, "loss": 0.6207, "step": 67760 }, { "epoch": 0.68, "learning_rate": 0.0002549210526315789, "loss": 0.6236, "step": 67770 }, { "epoch": 0.68, "learning_rate": 0.00025484210526315786, "loss": 0.6054, "step": 67780 }, { "epoch": 0.68, "learning_rate": 0.0002547631578947368, "loss": 0.5998, "step": 67790 }, { "epoch": 0.68, "learning_rate": 0.00025468421052631576, "loss": 0.6129, "step": 67800 }, { "epoch": 0.68, "learning_rate": 0.0002546052631578947, "loss": 0.6191, "step": 67810 }, { "epoch": 0.68, "learning_rate": 0.00025452631578947365, "loss": 0.627, "step": 67820 }, { "epoch": 0.68, "learning_rate": 0.0002544473684210526, "loss": 0.6209, "step": 67830 }, { "epoch": 0.68, "learning_rate": 0.00025436842105263155, "loss": 0.6149, "step": 67840 }, { "epoch": 0.68, "learning_rate": 0.0002542894736842105, "loss": 0.6252, "step": 67850 }, { "epoch": 0.68, "learning_rate": 0.00025421052631578945, "loss": 0.6138, "step": 67860 }, { "epoch": 0.68, "learning_rate": 0.0002541315789473684, "loss": 0.6198, "step": 67870 }, { "epoch": 0.68, "learning_rate": 0.00025405263157894734, "loss": 0.6133, "step": 67880 }, { "epoch": 0.68, "learning_rate": 0.0002539736842105263, "loss": 0.62, "step": 67890 }, { "epoch": 0.68, "learning_rate": 0.00025389473684210524, "loss": 0.6147, "step": 67900 }, { "epoch": 0.68, "learning_rate": 0.0002538157894736842, "loss": 0.6097, "step": 67910 }, { "epoch": 0.68, "learning_rate": 0.00025373684210526314, "loss": 0.6146, "step": 67920 }, { "epoch": 0.68, "learning_rate": 0.00025366578947368416, "loss": 0.6189, "step": 67930 }, { "epoch": 0.68, "learning_rate": 0.0002535868421052631, "loss": 0.6189, "step": 67940 }, { "epoch": 0.68, "learning_rate": 0.0002535078947368421, "loss": 0.6292, "step": 67950 }, { "epoch": 0.68, "learning_rate": 0.00025342894736842106, "loss": 0.6081, "step": 67960 }, { "epoch": 0.68, "learning_rate": 0.00025335, "loss": 0.6154, "step": 67970 }, { "epoch": 0.68, "learning_rate": 0.00025327105263157895, "loss": 0.6009, "step": 67980 }, { "epoch": 0.68, "learning_rate": 0.00025319210526315785, "loss": 0.6056, "step": 67990 }, { "epoch": 0.68, "learning_rate": 0.0002531131578947368, "loss": 0.6165, "step": 68000 }, { "epoch": 0.68, "learning_rate": 0.00025303421052631574, "loss": 0.6294, "step": 68010 }, { "epoch": 0.68, "learning_rate": 0.0002529552631578947, "loss": 0.6219, "step": 68020 }, { "epoch": 0.68, "learning_rate": 0.00025287631578947364, "loss": 0.6237, "step": 68030 }, { "epoch": 0.68, "learning_rate": 0.0002527973684210526, "loss": 0.6049, "step": 68040 }, { "epoch": 0.68, "learning_rate": 0.0002527184210526316, "loss": 0.6195, "step": 68050 }, { "epoch": 0.68, "learning_rate": 0.00025263947368421054, "loss": 0.6178, "step": 68060 }, { "epoch": 0.68, "learning_rate": 0.00025256052631578943, "loss": 0.6155, "step": 68070 }, { "epoch": 0.68, "learning_rate": 0.0002524815789473684, "loss": 0.6085, "step": 68080 }, { "epoch": 0.68, "learning_rate": 0.00025240263157894733, "loss": 0.6197, "step": 68090 }, { "epoch": 0.68, "learning_rate": 0.0002523236842105263, "loss": 0.61, "step": 68100 }, { "epoch": 0.68, "learning_rate": 0.00025224473684210523, "loss": 0.6132, "step": 68110 }, { "epoch": 0.68, "learning_rate": 0.0002521657894736842, "loss": 0.6193, "step": 68120 }, { "epoch": 0.68, "learning_rate": 0.0002520868421052631, "loss": 0.604, "step": 68130 }, { "epoch": 0.68, "learning_rate": 0.00025200789473684207, "loss": 0.6106, "step": 68140 }, { "epoch": 0.68, "learning_rate": 0.000251928947368421, "loss": 0.6148, "step": 68150 }, { "epoch": 0.68, "learning_rate": 0.00025184999999999997, "loss": 0.5906, "step": 68160 }, { "epoch": 0.68, "learning_rate": 0.0002517710526315789, "loss": 0.6127, "step": 68170 }, { "epoch": 0.68, "learning_rate": 0.00025169210526315787, "loss": 0.6092, "step": 68180 }, { "epoch": 0.68, "learning_rate": 0.0002516131578947368, "loss": 0.6262, "step": 68190 }, { "epoch": 0.68, "learning_rate": 0.00025153421052631576, "loss": 0.618, "step": 68200 }, { "epoch": 0.68, "learning_rate": 0.0002514552631578947, "loss": 0.6237, "step": 68210 }, { "epoch": 0.68, "learning_rate": 0.00025137631578947366, "loss": 0.6087, "step": 68220 }, { "epoch": 0.68, "learning_rate": 0.0002512973684210526, "loss": 0.6156, "step": 68230 }, { "epoch": 0.68, "learning_rate": 0.00025121842105263156, "loss": 0.6084, "step": 68240 }, { "epoch": 0.68, "learning_rate": 0.0002511394736842105, "loss": 0.6167, "step": 68250 }, { "epoch": 0.68, "learning_rate": 0.00025106052631578945, "loss": 0.616, "step": 68260 }, { "epoch": 0.68, "learning_rate": 0.0002509815789473684, "loss": 0.6084, "step": 68270 }, { "epoch": 0.68, "learning_rate": 0.00025090263157894735, "loss": 0.6154, "step": 68280 }, { "epoch": 0.68, "learning_rate": 0.0002508236842105263, "loss": 0.6127, "step": 68290 }, { "epoch": 0.68, "learning_rate": 0.00025074473684210525, "loss": 0.6065, "step": 68300 }, { "epoch": 0.68, "learning_rate": 0.0002506657894736842, "loss": 0.6116, "step": 68310 }, { "epoch": 0.68, "learning_rate": 0.00025058684210526314, "loss": 0.6053, "step": 68320 }, { "epoch": 0.68, "learning_rate": 0.0002505078947368421, "loss": 0.6185, "step": 68330 }, { "epoch": 0.68, "learning_rate": 0.00025042894736842104, "loss": 0.6165, "step": 68340 }, { "epoch": 0.68, "learning_rate": 0.00025035, "loss": 0.6125, "step": 68350 }, { "epoch": 0.68, "learning_rate": 0.00025027105263157893, "loss": 0.6134, "step": 68360 }, { "epoch": 0.68, "learning_rate": 0.0002501921052631579, "loss": 0.6174, "step": 68370 }, { "epoch": 0.68, "learning_rate": 0.00025011315789473683, "loss": 0.6202, "step": 68380 }, { "epoch": 0.68, "learning_rate": 0.0002500342105263158, "loss": 0.6204, "step": 68390 }, { "epoch": 0.68, "learning_rate": 0.00024995526315789473, "loss": 0.6053, "step": 68400 }, { "epoch": 0.68, "learning_rate": 0.0002498763157894736, "loss": 0.6128, "step": 68410 }, { "epoch": 0.68, "learning_rate": 0.0002497973684210526, "loss": 0.6161, "step": 68420 }, { "epoch": 0.68, "learning_rate": 0.0002497184210526316, "loss": 0.6068, "step": 68430 }, { "epoch": 0.68, "learning_rate": 0.0002496394736842105, "loss": 0.6023, "step": 68440 }, { "epoch": 0.68, "learning_rate": 0.00024956052631578947, "loss": 0.6008, "step": 68450 }, { "epoch": 0.68, "learning_rate": 0.0002494815789473684, "loss": 0.6074, "step": 68460 }, { "epoch": 0.68, "learning_rate": 0.00024940263157894737, "loss": 0.5798, "step": 68470 }, { "epoch": 0.68, "learning_rate": 0.00024932368421052626, "loss": 0.603, "step": 68480 }, { "epoch": 0.68, "learning_rate": 0.0002492447368421052, "loss": 0.6085, "step": 68490 }, { "epoch": 0.69, "learning_rate": 0.00024916578947368416, "loss": 0.6058, "step": 68500 }, { "epoch": 0.69, "learning_rate": 0.00024908684210526316, "loss": 0.6086, "step": 68510 }, { "epoch": 0.69, "learning_rate": 0.0002490078947368421, "loss": 0.6189, "step": 68520 }, { "epoch": 0.69, "learning_rate": 0.00024892894736842106, "loss": 0.609, "step": 68530 }, { "epoch": 0.69, "learning_rate": 0.00024885, "loss": 0.6023, "step": 68540 }, { "epoch": 0.69, "learning_rate": 0.00024877105263157895, "loss": 0.6105, "step": 68550 }, { "epoch": 0.69, "learning_rate": 0.00024869210526315785, "loss": 0.6157, "step": 68560 }, { "epoch": 0.69, "learning_rate": 0.0002486131578947368, "loss": 0.6098, "step": 68570 }, { "epoch": 0.69, "learning_rate": 0.00024853421052631574, "loss": 0.6157, "step": 68580 }, { "epoch": 0.69, "learning_rate": 0.0002484552631578947, "loss": 0.6039, "step": 68590 }, { "epoch": 0.69, "learning_rate": 0.0002483763157894737, "loss": 0.6003, "step": 68600 }, { "epoch": 0.69, "learning_rate": 0.00024829736842105264, "loss": 0.597, "step": 68610 }, { "epoch": 0.69, "learning_rate": 0.0002482184210526316, "loss": 0.6065, "step": 68620 }, { "epoch": 0.69, "learning_rate": 0.0002481394736842105, "loss": 0.6053, "step": 68630 }, { "epoch": 0.69, "learning_rate": 0.00024806052631578943, "loss": 0.612, "step": 68640 }, { "epoch": 0.69, "learning_rate": 0.0002479815789473684, "loss": 0.6069, "step": 68650 }, { "epoch": 0.69, "learning_rate": 0.00024790263157894733, "loss": 0.6088, "step": 68660 }, { "epoch": 0.69, "learning_rate": 0.0002478236842105263, "loss": 0.6081, "step": 68670 }, { "epoch": 0.69, "learning_rate": 0.0002477447368421052, "loss": 0.6058, "step": 68680 }, { "epoch": 0.69, "learning_rate": 0.0002476657894736842, "loss": 0.6156, "step": 68690 }, { "epoch": 0.69, "learning_rate": 0.0002475868421052632, "loss": 0.6117, "step": 68700 }, { "epoch": 0.69, "learning_rate": 0.00024750789473684207, "loss": 0.6123, "step": 68710 }, { "epoch": 0.69, "learning_rate": 0.000247428947368421, "loss": 0.611, "step": 68720 }, { "epoch": 0.69, "learning_rate": 0.00024734999999999997, "loss": 0.6127, "step": 68730 }, { "epoch": 0.69, "learning_rate": 0.0002472710526315789, "loss": 0.6079, "step": 68740 }, { "epoch": 0.69, "learning_rate": 0.00024719210526315786, "loss": 0.6086, "step": 68750 }, { "epoch": 0.69, "learning_rate": 0.0002471131578947368, "loss": 0.6038, "step": 68760 }, { "epoch": 0.69, "learning_rate": 0.00024703421052631576, "loss": 0.6027, "step": 68770 }, { "epoch": 0.69, "learning_rate": 0.0002469552631578947, "loss": 0.604, "step": 68780 }, { "epoch": 0.69, "learning_rate": 0.00024687631578947366, "loss": 0.6101, "step": 68790 }, { "epoch": 0.69, "learning_rate": 0.0002467973684210526, "loss": 0.5935, "step": 68800 }, { "epoch": 0.69, "learning_rate": 0.00024671842105263155, "loss": 0.613, "step": 68810 }, { "epoch": 0.69, "learning_rate": 0.0002466394736842105, "loss": 0.6102, "step": 68820 }, { "epoch": 0.69, "learning_rate": 0.00024656052631578945, "loss": 0.6134, "step": 68830 }, { "epoch": 0.69, "learning_rate": 0.0002464815789473684, "loss": 0.6165, "step": 68840 }, { "epoch": 0.69, "learning_rate": 0.00024640263157894735, "loss": 0.6066, "step": 68850 }, { "epoch": 0.69, "learning_rate": 0.0002463236842105263, "loss": 0.5881, "step": 68860 }, { "epoch": 0.69, "learning_rate": 0.00024624473684210524, "loss": 0.6132, "step": 68870 }, { "epoch": 0.69, "learning_rate": 0.0002461657894736842, "loss": 0.6242, "step": 68880 }, { "epoch": 0.69, "learning_rate": 0.00024608684210526314, "loss": 0.6087, "step": 68890 }, { "epoch": 0.69, "learning_rate": 0.0002460078947368421, "loss": 0.5976, "step": 68900 }, { "epoch": 0.69, "learning_rate": 0.00024592894736842104, "loss": 0.5987, "step": 68910 }, { "epoch": 0.69, "learning_rate": 0.00024585, "loss": 0.6161, "step": 68920 }, { "epoch": 0.69, "learning_rate": 0.00024577105263157893, "loss": 0.6109, "step": 68930 }, { "epoch": 0.69, "learning_rate": 0.0002456921052631579, "loss": 0.6189, "step": 68940 }, { "epoch": 0.69, "learning_rate": 0.00024561315789473683, "loss": 0.6157, "step": 68950 }, { "epoch": 0.69, "learning_rate": 0.0002455342105263158, "loss": 0.6059, "step": 68960 }, { "epoch": 0.69, "learning_rate": 0.0002454552631578947, "loss": 0.6033, "step": 68970 }, { "epoch": 0.69, "learning_rate": 0.0002453763157894737, "loss": 0.6102, "step": 68980 }, { "epoch": 0.69, "learning_rate": 0.0002452973684210526, "loss": 0.6003, "step": 68990 }, { "epoch": 0.69, "learning_rate": 0.00024521842105263157, "loss": 0.6109, "step": 69000 }, { "epoch": 0.69, "learning_rate": 0.0002451394736842105, "loss": 0.604, "step": 69010 }, { "epoch": 0.69, "learning_rate": 0.00024506052631578947, "loss": 0.6056, "step": 69020 }, { "epoch": 0.69, "learning_rate": 0.0002449815789473684, "loss": 0.6088, "step": 69030 }, { "epoch": 0.69, "learning_rate": 0.00024490263157894737, "loss": 0.5902, "step": 69040 }, { "epoch": 0.69, "learning_rate": 0.00024482368421052626, "loss": 0.6112, "step": 69050 }, { "epoch": 0.69, "learning_rate": 0.0002447447368421052, "loss": 0.611, "step": 69060 }, { "epoch": 0.69, "learning_rate": 0.0002446657894736842, "loss": 0.6094, "step": 69070 }, { "epoch": 0.69, "learning_rate": 0.00024458684210526316, "loss": 0.6069, "step": 69080 }, { "epoch": 0.69, "learning_rate": 0.0002445078947368421, "loss": 0.6074, "step": 69090 }, { "epoch": 0.69, "learning_rate": 0.00024442894736842106, "loss": 0.6131, "step": 69100 }, { "epoch": 0.69, "learning_rate": 0.00024435, "loss": 0.5869, "step": 69110 }, { "epoch": 0.69, "learning_rate": 0.0002442710526315789, "loss": 0.6104, "step": 69120 }, { "epoch": 0.69, "learning_rate": 0.00024419210526315785, "loss": 0.6049, "step": 69130 }, { "epoch": 0.69, "learning_rate": 0.00024411315789473682, "loss": 0.6167, "step": 69140 }, { "epoch": 0.69, "learning_rate": 0.00024403421052631574, "loss": 0.6033, "step": 69150 }, { "epoch": 0.69, "learning_rate": 0.00024395526315789474, "loss": 0.6039, "step": 69160 }, { "epoch": 0.69, "learning_rate": 0.00024387631578947367, "loss": 0.5829, "step": 69170 }, { "epoch": 0.69, "learning_rate": 0.00024379736842105261, "loss": 0.5959, "step": 69180 }, { "epoch": 0.69, "learning_rate": 0.00024371842105263156, "loss": 0.6027, "step": 69190 }, { "epoch": 0.69, "learning_rate": 0.0002436394736842105, "loss": 0.6057, "step": 69200 }, { "epoch": 0.69, "learning_rate": 0.00024356052631578946, "loss": 0.5997, "step": 69210 }, { "epoch": 0.69, "learning_rate": 0.0002434815789473684, "loss": 0.6002, "step": 69220 }, { "epoch": 0.69, "learning_rate": 0.00024340263157894733, "loss": 0.5976, "step": 69230 }, { "epoch": 0.69, "learning_rate": 0.00024332368421052628, "loss": 0.607, "step": 69240 }, { "epoch": 0.69, "learning_rate": 0.00024324473684210523, "loss": 0.6073, "step": 69250 }, { "epoch": 0.69, "learning_rate": 0.0002431657894736842, "loss": 0.5946, "step": 69260 }, { "epoch": 0.69, "learning_rate": 0.00024308684210526315, "loss": 0.6051, "step": 69270 }, { "epoch": 0.69, "learning_rate": 0.0002430078947368421, "loss": 0.598, "step": 69280 }, { "epoch": 0.69, "learning_rate": 0.00024292894736842105, "loss": 0.602, "step": 69290 }, { "epoch": 0.69, "learning_rate": 0.00024284999999999997, "loss": 0.5844, "step": 69300 }, { "epoch": 0.69, "learning_rate": 0.00024277105263157892, "loss": 0.6098, "step": 69310 }, { "epoch": 0.69, "learning_rate": 0.00024269210526315786, "loss": 0.6077, "step": 69320 }, { "epoch": 0.69, "learning_rate": 0.0002426131578947368, "loss": 0.6136, "step": 69330 }, { "epoch": 0.69, "learning_rate": 0.00024253421052631576, "loss": 0.6044, "step": 69340 }, { "epoch": 0.69, "learning_rate": 0.00024245526315789474, "loss": 0.6037, "step": 69350 }, { "epoch": 0.69, "learning_rate": 0.00024237631578947368, "loss": 0.6013, "step": 69360 }, { "epoch": 0.69, "learning_rate": 0.0002422973684210526, "loss": 0.5935, "step": 69370 }, { "epoch": 0.69, "learning_rate": 0.00024221842105263155, "loss": 0.6034, "step": 69380 }, { "epoch": 0.69, "learning_rate": 0.0002421394736842105, "loss": 0.6045, "step": 69390 }, { "epoch": 0.69, "learning_rate": 0.00024206052631578945, "loss": 0.5987, "step": 69400 }, { "epoch": 0.69, "learning_rate": 0.0002419815789473684, "loss": 0.6052, "step": 69410 }, { "epoch": 0.69, "learning_rate": 0.00024190263157894735, "loss": 0.6117, "step": 69420 }, { "epoch": 0.69, "learning_rate": 0.00024182368421052627, "loss": 0.5942, "step": 69430 }, { "epoch": 0.69, "learning_rate": 0.00024174473684210527, "loss": 0.5926, "step": 69440 }, { "epoch": 0.69, "learning_rate": 0.0002416657894736842, "loss": 0.605, "step": 69450 }, { "epoch": 0.69, "learning_rate": 0.00024158684210526314, "loss": 0.5998, "step": 69460 }, { "epoch": 0.69, "learning_rate": 0.0002415078947368421, "loss": 0.6041, "step": 69470 }, { "epoch": 0.69, "learning_rate": 0.00024142894736842104, "loss": 0.5895, "step": 69480 }, { "epoch": 0.69, "learning_rate": 0.00024134999999999998, "loss": 0.5974, "step": 69490 }, { "epoch": 0.69, "learning_rate": 0.00024127105263157893, "loss": 0.5932, "step": 69500 }, { "epoch": 0.7, "learning_rate": 0.00024119210526315785, "loss": 0.6043, "step": 69510 }, { "epoch": 0.7, "learning_rate": 0.0002411131578947368, "loss": 0.605, "step": 69520 }, { "epoch": 0.7, "learning_rate": 0.00024103421052631575, "loss": 0.6021, "step": 69530 }, { "epoch": 0.7, "learning_rate": 0.00024095526315789473, "loss": 0.5918, "step": 69540 }, { "epoch": 0.7, "learning_rate": 0.00024087631578947367, "loss": 0.5874, "step": 69550 }, { "epoch": 0.7, "learning_rate": 0.00024079736842105262, "loss": 0.5916, "step": 69560 }, { "epoch": 0.7, "learning_rate": 0.00024071842105263157, "loss": 0.6072, "step": 69570 }, { "epoch": 0.7, "learning_rate": 0.0002406394736842105, "loss": 0.5992, "step": 69580 }, { "epoch": 0.7, "learning_rate": 0.00024056052631578944, "loss": 0.5925, "step": 69590 }, { "epoch": 0.7, "learning_rate": 0.0002404815789473684, "loss": 0.5935, "step": 69600 }, { "epoch": 0.7, "learning_rate": 0.00024040263157894734, "loss": 0.5945, "step": 69610 }, { "epoch": 0.7, "learning_rate": 0.00024032368421052629, "loss": 0.6028, "step": 69620 }, { "epoch": 0.7, "learning_rate": 0.00024024473684210526, "loss": 0.5978, "step": 69630 }, { "epoch": 0.7, "learning_rate": 0.0002401657894736842, "loss": 0.6066, "step": 69640 }, { "epoch": 0.7, "learning_rate": 0.00024008684210526316, "loss": 0.5957, "step": 69650 }, { "epoch": 0.7, "learning_rate": 0.00024000789473684208, "loss": 0.5925, "step": 69660 }, { "epoch": 0.7, "learning_rate": 0.00023992894736842103, "loss": 0.592, "step": 69670 }, { "epoch": 0.7, "learning_rate": 0.00023984999999999998, "loss": 0.5931, "step": 69680 }, { "epoch": 0.7, "learning_rate": 0.00023977105263157892, "loss": 0.607, "step": 69690 }, { "epoch": 0.7, "learning_rate": 0.00023969210526315787, "loss": 0.5953, "step": 69700 }, { "epoch": 0.7, "learning_rate": 0.00023961315789473682, "loss": 0.5925, "step": 69710 }, { "epoch": 0.7, "learning_rate": 0.0002395342105263158, "loss": 0.6155, "step": 69720 }, { "epoch": 0.7, "learning_rate": 0.00023945526315789472, "loss": 0.6088, "step": 69730 }, { "epoch": 0.7, "learning_rate": 0.00023937631578947367, "loss": 0.5937, "step": 69740 }, { "epoch": 0.7, "learning_rate": 0.0002392973684210526, "loss": 0.6061, "step": 69750 }, { "epoch": 0.7, "learning_rate": 0.00023921842105263156, "loss": 0.6011, "step": 69760 }, { "epoch": 0.7, "learning_rate": 0.0002391394736842105, "loss": 0.6094, "step": 69770 }, { "epoch": 0.7, "learning_rate": 0.00023906052631578946, "loss": 0.6065, "step": 69780 }, { "epoch": 0.7, "learning_rate": 0.00023898157894736838, "loss": 0.5998, "step": 69790 }, { "epoch": 0.7, "learning_rate": 0.00023890263157894733, "loss": 0.6013, "step": 69800 }, { "epoch": 0.7, "learning_rate": 0.00023882368421052628, "loss": 0.6027, "step": 69810 }, { "epoch": 0.7, "learning_rate": 0.00023874473684210525, "loss": 0.6078, "step": 69820 }, { "epoch": 0.7, "learning_rate": 0.0002386657894736842, "loss": 0.6176, "step": 69830 }, { "epoch": 0.7, "learning_rate": 0.00023858684210526315, "loss": 0.6084, "step": 69840 }, { "epoch": 0.7, "learning_rate": 0.0002385078947368421, "loss": 0.605, "step": 69850 }, { "epoch": 0.7, "learning_rate": 0.00023842894736842102, "loss": 0.6058, "step": 69860 }, { "epoch": 0.7, "learning_rate": 0.00023834999999999997, "loss": 0.5942, "step": 69870 }, { "epoch": 0.7, "learning_rate": 0.00023827105263157891, "loss": 0.5972, "step": 69880 }, { "epoch": 0.7, "learning_rate": 0.00023819210526315786, "loss": 0.5919, "step": 69890 }, { "epoch": 0.7, "learning_rate": 0.0002381131578947368, "loss": 0.5953, "step": 69900 }, { "epoch": 0.7, "learning_rate": 0.00023803421052631579, "loss": 0.5834, "step": 69910 }, { "epoch": 0.7, "learning_rate": 0.00023795526315789473, "loss": 0.5894, "step": 69920 }, { "epoch": 0.7, "learning_rate": 0.00023787631578947368, "loss": 0.5811, "step": 69930 }, { "epoch": 0.7, "learning_rate": 0.0002378052631578947, "loss": 0.6117, "step": 69940 }, { "epoch": 0.7, "learning_rate": 0.00023772631578947365, "loss": 0.6008, "step": 69950 }, { "epoch": 0.7, "learning_rate": 0.0002376473684210526, "loss": 0.6081, "step": 69960 }, { "epoch": 0.7, "learning_rate": 0.00023756842105263158, "loss": 0.6041, "step": 69970 }, { "epoch": 0.7, "learning_rate": 0.00023748947368421052, "loss": 0.6165, "step": 69980 }, { "epoch": 0.7, "learning_rate": 0.00023741052631578947, "loss": 0.5945, "step": 69990 }, { "epoch": 0.7, "learning_rate": 0.0002373315789473684, "loss": 0.6078, "step": 70000 }, { "epoch": 0.7, "eval_accuracy": 0.8734435544718635, "eval_loss": 0.583984375, "eval_runtime": 98.2973, "eval_samples_per_second": 813.857, "eval_steps_per_second": 1.597, "step": 70000 }, { "epoch": 0.7, "learning_rate": 0.00023725263157894734, "loss": 0.5939, "step": 70010 }, { "epoch": 0.7, "learning_rate": 0.0002371736842105263, "loss": 0.6039, "step": 70020 }, { "epoch": 0.7, "learning_rate": 0.00023709473684210524, "loss": 0.5968, "step": 70030 }, { "epoch": 0.7, "learning_rate": 0.0002370157894736842, "loss": 0.5757, "step": 70040 }, { "epoch": 0.7, "learning_rate": 0.00023693684210526314, "loss": 0.5833, "step": 70050 }, { "epoch": 0.7, "learning_rate": 0.00023685789473684206, "loss": 0.5848, "step": 70060 }, { "epoch": 0.7, "learning_rate": 0.00023677894736842106, "loss": 0.6055, "step": 70070 }, { "epoch": 0.7, "learning_rate": 0.00023669999999999998, "loss": 0.6161, "step": 70080 }, { "epoch": 0.7, "learning_rate": 0.00023662105263157893, "loss": 0.5991, "step": 70090 }, { "epoch": 0.7, "learning_rate": 0.00023654210526315788, "loss": 0.6033, "step": 70100 }, { "epoch": 0.7, "learning_rate": 0.00023646315789473682, "loss": 0.6065, "step": 70110 }, { "epoch": 0.7, "learning_rate": 0.00023638421052631577, "loss": 0.6057, "step": 70120 }, { "epoch": 0.7, "learning_rate": 0.00023630526315789472, "loss": 0.6078, "step": 70130 }, { "epoch": 0.7, "learning_rate": 0.00023622631578947364, "loss": 0.6102, "step": 70140 }, { "epoch": 0.7, "learning_rate": 0.0002361473684210526, "loss": 0.6025, "step": 70150 }, { "epoch": 0.7, "learning_rate": 0.00023606842105263157, "loss": 0.5977, "step": 70160 }, { "epoch": 0.7, "learning_rate": 0.00023598947368421051, "loss": 0.6034, "step": 70170 }, { "epoch": 0.7, "learning_rate": 0.00023591052631578946, "loss": 0.6022, "step": 70180 }, { "epoch": 0.7, "learning_rate": 0.0002358315789473684, "loss": 0.6029, "step": 70190 }, { "epoch": 0.7, "learning_rate": 0.00023575263157894736, "loss": 0.5929, "step": 70200 }, { "epoch": 0.7, "learning_rate": 0.00023567368421052628, "loss": 0.6114, "step": 70210 }, { "epoch": 0.7, "learning_rate": 0.00023559473684210523, "loss": 0.6118, "step": 70220 }, { "epoch": 0.7, "learning_rate": 0.00023551578947368418, "loss": 0.6014, "step": 70230 }, { "epoch": 0.7, "learning_rate": 0.00023543684210526313, "loss": 0.595, "step": 70240 }, { "epoch": 0.7, "learning_rate": 0.0002353578947368421, "loss": 0.5955, "step": 70250 }, { "epoch": 0.7, "learning_rate": 0.00023527894736842105, "loss": 0.6039, "step": 70260 }, { "epoch": 0.7, "learning_rate": 0.0002352, "loss": 0.6257, "step": 70270 }, { "epoch": 0.7, "learning_rate": 0.00023512105263157892, "loss": 0.6124, "step": 70280 }, { "epoch": 0.7, "learning_rate": 0.00023504210526315787, "loss": 0.6083, "step": 70290 }, { "epoch": 0.7, "learning_rate": 0.00023496315789473682, "loss": 0.6003, "step": 70300 }, { "epoch": 0.7, "learning_rate": 0.00023488421052631576, "loss": 0.6006, "step": 70310 }, { "epoch": 0.7, "learning_rate": 0.0002348052631578947, "loss": 0.5982, "step": 70320 }, { "epoch": 0.7, "learning_rate": 0.00023472631578947366, "loss": 0.6025, "step": 70330 }, { "epoch": 0.7, "learning_rate": 0.00023464736842105258, "loss": 0.6044, "step": 70340 }, { "epoch": 0.7, "learning_rate": 0.00023456842105263158, "loss": 0.5941, "step": 70350 }, { "epoch": 0.7, "learning_rate": 0.0002344894736842105, "loss": 0.597, "step": 70360 }, { "epoch": 0.7, "learning_rate": 0.00023441052631578945, "loss": 0.5882, "step": 70370 }, { "epoch": 0.7, "learning_rate": 0.0002343315789473684, "loss": 0.5973, "step": 70380 }, { "epoch": 0.7, "learning_rate": 0.00023425263157894735, "loss": 0.5996, "step": 70390 }, { "epoch": 0.7, "learning_rate": 0.0002341736842105263, "loss": 0.6067, "step": 70400 }, { "epoch": 0.7, "learning_rate": 0.00023409473684210525, "loss": 0.6058, "step": 70410 }, { "epoch": 0.7, "learning_rate": 0.00023401578947368417, "loss": 0.6062, "step": 70420 }, { "epoch": 0.7, "learning_rate": 0.00023393684210526312, "loss": 0.6052, "step": 70430 }, { "epoch": 0.7, "learning_rate": 0.0002338578947368421, "loss": 0.5923, "step": 70440 }, { "epoch": 0.7, "learning_rate": 0.00023377894736842104, "loss": 0.6033, "step": 70450 }, { "epoch": 0.7, "learning_rate": 0.0002337, "loss": 0.6068, "step": 70460 }, { "epoch": 0.7, "learning_rate": 0.00023362105263157894, "loss": 0.5843, "step": 70470 }, { "epoch": 0.7, "learning_rate": 0.00023354210526315789, "loss": 0.5965, "step": 70480 }, { "epoch": 0.7, "learning_rate": 0.0002334631578947368, "loss": 0.5903, "step": 70490 }, { "epoch": 0.7, "learning_rate": 0.00023338421052631575, "loss": 0.6136, "step": 70500 }, { "epoch": 0.71, "learning_rate": 0.0002333052631578947, "loss": 0.6037, "step": 70510 }, { "epoch": 0.71, "learning_rate": 0.00023322631578947365, "loss": 0.613, "step": 70520 }, { "epoch": 0.71, "learning_rate": 0.00023314736842105263, "loss": 0.6103, "step": 70530 }, { "epoch": 0.71, "learning_rate": 0.00023306842105263157, "loss": 0.5961, "step": 70540 }, { "epoch": 0.71, "learning_rate": 0.00023298947368421052, "loss": 0.6017, "step": 70550 }, { "epoch": 0.71, "learning_rate": 0.00023291052631578947, "loss": 0.6015, "step": 70560 }, { "epoch": 0.71, "learning_rate": 0.0002328315789473684, "loss": 0.6025, "step": 70570 }, { "epoch": 0.71, "learning_rate": 0.00023275263157894734, "loss": 0.5949, "step": 70580 }, { "epoch": 0.71, "learning_rate": 0.0002326736842105263, "loss": 0.6025, "step": 70590 }, { "epoch": 0.71, "learning_rate": 0.00023259473684210524, "loss": 0.6016, "step": 70600 }, { "epoch": 0.71, "learning_rate": 0.00023251578947368419, "loss": 0.5956, "step": 70610 }, { "epoch": 0.71, "learning_rate": 0.00023243684210526313, "loss": 0.597, "step": 70620 }, { "epoch": 0.71, "learning_rate": 0.0002323578947368421, "loss": 0.6034, "step": 70630 }, { "epoch": 0.71, "learning_rate": 0.00023227894736842103, "loss": 0.5926, "step": 70640 }, { "epoch": 0.71, "learning_rate": 0.00023219999999999998, "loss": 0.5916, "step": 70650 }, { "epoch": 0.71, "learning_rate": 0.00023212105263157893, "loss": 0.5967, "step": 70660 }, { "epoch": 0.71, "learning_rate": 0.00023204210526315788, "loss": 0.6008, "step": 70670 }, { "epoch": 0.71, "learning_rate": 0.00023196315789473682, "loss": 0.5852, "step": 70680 }, { "epoch": 0.71, "learning_rate": 0.00023188421052631577, "loss": 0.5923, "step": 70690 }, { "epoch": 0.71, "learning_rate": 0.0002318052631578947, "loss": 0.5933, "step": 70700 }, { "epoch": 0.71, "learning_rate": 0.00023172631578947364, "loss": 0.6051, "step": 70710 }, { "epoch": 0.71, "learning_rate": 0.00023164736842105262, "loss": 0.5979, "step": 70720 }, { "epoch": 0.71, "learning_rate": 0.00023156842105263157, "loss": 0.5987, "step": 70730 }, { "epoch": 0.71, "learning_rate": 0.00023148947368421051, "loss": 0.5959, "step": 70740 }, { "epoch": 0.71, "learning_rate": 0.00023141052631578946, "loss": 0.5904, "step": 70750 }, { "epoch": 0.71, "learning_rate": 0.0002313315789473684, "loss": 0.6042, "step": 70760 }, { "epoch": 0.71, "learning_rate": 0.00023125263157894733, "loss": 0.6005, "step": 70770 }, { "epoch": 0.71, "learning_rate": 0.00023117368421052628, "loss": 0.6, "step": 70780 }, { "epoch": 0.71, "learning_rate": 0.00023109473684210523, "loss": 0.5935, "step": 70790 }, { "epoch": 0.71, "learning_rate": 0.00023101578947368418, "loss": 0.608, "step": 70800 }, { "epoch": 0.71, "learning_rate": 0.00023093684210526315, "loss": 0.6092, "step": 70810 }, { "epoch": 0.71, "learning_rate": 0.0002308578947368421, "loss": 0.5978, "step": 70820 }, { "epoch": 0.71, "learning_rate": 0.00023077894736842105, "loss": 0.6018, "step": 70830 }, { "epoch": 0.71, "learning_rate": 0.0002307, "loss": 0.6036, "step": 70840 }, { "epoch": 0.71, "learning_rate": 0.00023062105263157892, "loss": 0.6035, "step": 70850 }, { "epoch": 0.71, "learning_rate": 0.00023054210526315787, "loss": 0.5986, "step": 70860 }, { "epoch": 0.71, "learning_rate": 0.00023046315789473681, "loss": 0.6031, "step": 70870 }, { "epoch": 0.71, "learning_rate": 0.00023038421052631576, "loss": 0.5977, "step": 70880 }, { "epoch": 0.71, "learning_rate": 0.0002303052631578947, "loss": 0.6025, "step": 70890 }, { "epoch": 0.71, "learning_rate": 0.00023022631578947366, "loss": 0.594, "step": 70900 }, { "epoch": 0.71, "learning_rate": 0.00023014736842105263, "loss": 0.614, "step": 70910 }, { "epoch": 0.71, "learning_rate": 0.00023006842105263156, "loss": 0.5989, "step": 70920 }, { "epoch": 0.71, "learning_rate": 0.0002299894736842105, "loss": 0.595, "step": 70930 }, { "epoch": 0.71, "learning_rate": 0.00022991842105263155, "loss": 0.5981, "step": 70940 }, { "epoch": 0.71, "learning_rate": 0.0002298394736842105, "loss": 0.5895, "step": 70950 }, { "epoch": 0.71, "learning_rate": 0.00022976052631578945, "loss": 0.6078, "step": 70960 }, { "epoch": 0.71, "learning_rate": 0.00022968157894736842, "loss": 0.6083, "step": 70970 }, { "epoch": 0.71, "learning_rate": 0.00022960263157894737, "loss": 0.6098, "step": 70980 }, { "epoch": 0.71, "learning_rate": 0.0002295236842105263, "loss": 0.5887, "step": 70990 }, { "epoch": 0.71, "learning_rate": 0.00022944473684210524, "loss": 0.5875, "step": 71000 }, { "epoch": 0.71, "learning_rate": 0.0002293657894736842, "loss": 0.6078, "step": 71010 }, { "epoch": 0.71, "learning_rate": 0.00022928684210526314, "loss": 0.5973, "step": 71020 }, { "epoch": 0.71, "learning_rate": 0.0002292078947368421, "loss": 0.6166, "step": 71030 }, { "epoch": 0.71, "learning_rate": 0.00022912894736842104, "loss": 0.6056, "step": 71040 }, { "epoch": 0.71, "learning_rate": 0.00022904999999999996, "loss": 0.5935, "step": 71050 }, { "epoch": 0.71, "learning_rate": 0.0002289710526315789, "loss": 0.5915, "step": 71060 }, { "epoch": 0.71, "learning_rate": 0.00022889210526315788, "loss": 0.6014, "step": 71070 }, { "epoch": 0.71, "learning_rate": 0.00022881315789473683, "loss": 0.601, "step": 71080 }, { "epoch": 0.71, "learning_rate": 0.00022873421052631578, "loss": 0.6017, "step": 71090 }, { "epoch": 0.71, "learning_rate": 0.00022865526315789473, "loss": 0.6092, "step": 71100 }, { "epoch": 0.71, "learning_rate": 0.00022857631578947367, "loss": 0.6009, "step": 71110 }, { "epoch": 0.71, "learning_rate": 0.0002284973684210526, "loss": 0.5998, "step": 71120 }, { "epoch": 0.71, "learning_rate": 0.00022841842105263154, "loss": 0.5892, "step": 71130 }, { "epoch": 0.71, "learning_rate": 0.0002283394736842105, "loss": 0.5822, "step": 71140 }, { "epoch": 0.71, "learning_rate": 0.00022826052631578944, "loss": 0.5975, "step": 71150 }, { "epoch": 0.71, "learning_rate": 0.00022818157894736842, "loss": 0.617, "step": 71160 }, { "epoch": 0.71, "learning_rate": 0.00022810263157894736, "loss": 0.5986, "step": 71170 }, { "epoch": 0.71, "learning_rate": 0.0002280236842105263, "loss": 0.5952, "step": 71180 }, { "epoch": 0.71, "learning_rate": 0.00022794473684210526, "loss": 0.6105, "step": 71190 }, { "epoch": 0.71, "learning_rate": 0.00022786578947368418, "loss": 0.6043, "step": 71200 }, { "epoch": 0.71, "learning_rate": 0.00022778684210526313, "loss": 0.6116, "step": 71210 }, { "epoch": 0.71, "learning_rate": 0.00022770789473684208, "loss": 0.6148, "step": 71220 }, { "epoch": 0.71, "learning_rate": 0.00022762894736842103, "loss": 0.611, "step": 71230 }, { "epoch": 0.71, "learning_rate": 0.00022754999999999997, "loss": 0.6016, "step": 71240 }, { "epoch": 0.71, "learning_rate": 0.00022747105263157895, "loss": 0.6035, "step": 71250 }, { "epoch": 0.71, "learning_rate": 0.0002273921052631579, "loss": 0.5876, "step": 71260 }, { "epoch": 0.71, "learning_rate": 0.00022731315789473682, "loss": 0.5937, "step": 71270 }, { "epoch": 0.71, "learning_rate": 0.00022723421052631577, "loss": 0.5963, "step": 71280 }, { "epoch": 0.71, "learning_rate": 0.00022715526315789472, "loss": 0.5965, "step": 71290 }, { "epoch": 0.71, "learning_rate": 0.00022707631578947366, "loss": 0.5937, "step": 71300 }, { "epoch": 0.71, "learning_rate": 0.0002269973684210526, "loss": 0.5938, "step": 71310 }, { "epoch": 0.71, "learning_rate": 0.00022691842105263156, "loss": 0.5797, "step": 71320 }, { "epoch": 0.71, "learning_rate": 0.00022683947368421048, "loss": 0.5936, "step": 71330 }, { "epoch": 0.71, "learning_rate": 0.00022676052631578946, "loss": 0.5935, "step": 71340 }, { "epoch": 0.71, "learning_rate": 0.0002266815789473684, "loss": 0.5929, "step": 71350 }, { "epoch": 0.71, "learning_rate": 0.00022660263157894735, "loss": 0.5987, "step": 71360 }, { "epoch": 0.71, "learning_rate": 0.0002265236842105263, "loss": 0.5836, "step": 71370 }, { "epoch": 0.71, "learning_rate": 0.00022644473684210525, "loss": 0.576, "step": 71380 }, { "epoch": 0.71, "learning_rate": 0.0002263657894736842, "loss": 0.5896, "step": 71390 }, { "epoch": 0.71, "learning_rate": 0.00022628684210526312, "loss": 0.5827, "step": 71400 }, { "epoch": 0.71, "learning_rate": 0.00022620789473684207, "loss": 0.593, "step": 71410 }, { "epoch": 0.71, "learning_rate": 0.00022612894736842102, "loss": 0.5809, "step": 71420 }, { "epoch": 0.71, "learning_rate": 0.00022604999999999997, "loss": 0.5905, "step": 71430 }, { "epoch": 0.71, "learning_rate": 0.00022597105263157894, "loss": 0.5885, "step": 71440 }, { "epoch": 0.71, "learning_rate": 0.0002258921052631579, "loss": 0.5924, "step": 71450 }, { "epoch": 0.71, "learning_rate": 0.00022581315789473684, "loss": 0.5871, "step": 71460 }, { "epoch": 0.71, "learning_rate": 0.00022573421052631579, "loss": 0.5978, "step": 71470 }, { "epoch": 0.71, "learning_rate": 0.0002256552631578947, "loss": 0.5868, "step": 71480 }, { "epoch": 0.71, "learning_rate": 0.00022557631578947365, "loss": 0.596, "step": 71490 }, { "epoch": 0.71, "learning_rate": 0.0002254973684210526, "loss": 0.5952, "step": 71500 }, { "epoch": 0.72, "learning_rate": 0.00022541842105263155, "loss": 0.5963, "step": 71510 }, { "epoch": 0.72, "learning_rate": 0.0002253394736842105, "loss": 0.6128, "step": 71520 }, { "epoch": 0.72, "learning_rate": 0.00022526052631578948, "loss": 0.6038, "step": 71530 }, { "epoch": 0.72, "learning_rate": 0.00022518157894736842, "loss": 0.6014, "step": 71540 }, { "epoch": 0.72, "learning_rate": 0.00022510263157894734, "loss": 0.6037, "step": 71550 }, { "epoch": 0.72, "learning_rate": 0.0002250236842105263, "loss": 0.6062, "step": 71560 }, { "epoch": 0.72, "learning_rate": 0.00022494473684210524, "loss": 0.5995, "step": 71570 }, { "epoch": 0.72, "learning_rate": 0.0002248657894736842, "loss": 0.6024, "step": 71580 }, { "epoch": 0.72, "learning_rate": 0.00022478684210526314, "loss": 0.599, "step": 71590 }, { "epoch": 0.72, "learning_rate": 0.00022470789473684209, "loss": 0.6094, "step": 71600 }, { "epoch": 0.72, "learning_rate": 0.000224628947368421, "loss": 0.6053, "step": 71610 }, { "epoch": 0.72, "learning_rate": 0.00022455, "loss": 0.5928, "step": 71620 }, { "epoch": 0.72, "learning_rate": 0.00022447105263157893, "loss": 0.6068, "step": 71630 }, { "epoch": 0.72, "learning_rate": 0.00022439210526315788, "loss": 0.602, "step": 71640 }, { "epoch": 0.72, "learning_rate": 0.00022431315789473683, "loss": 0.6049, "step": 71650 }, { "epoch": 0.72, "learning_rate": 0.00022423421052631578, "loss": 0.6079, "step": 71660 }, { "epoch": 0.72, "learning_rate": 0.00022415526315789472, "loss": 0.5928, "step": 71670 }, { "epoch": 0.72, "learning_rate": 0.00022407631578947367, "loss": 0.5994, "step": 71680 }, { "epoch": 0.72, "learning_rate": 0.0002239973684210526, "loss": 0.597, "step": 71690 }, { "epoch": 0.72, "learning_rate": 0.00022391842105263154, "loss": 0.6096, "step": 71700 }, { "epoch": 0.72, "learning_rate": 0.0002238394736842105, "loss": 0.6095, "step": 71710 }, { "epoch": 0.72, "learning_rate": 0.00022376052631578947, "loss": 0.6015, "step": 71720 }, { "epoch": 0.72, "learning_rate": 0.00022368157894736841, "loss": 0.6118, "step": 71730 }, { "epoch": 0.72, "learning_rate": 0.00022360263157894736, "loss": 0.6141, "step": 71740 }, { "epoch": 0.72, "learning_rate": 0.0002235236842105263, "loss": 0.6042, "step": 71750 }, { "epoch": 0.72, "learning_rate": 0.00022344473684210523, "loss": 0.608, "step": 71760 }, { "epoch": 0.72, "learning_rate": 0.00022336578947368418, "loss": 0.6134, "step": 71770 }, { "epoch": 0.72, "learning_rate": 0.00022328684210526313, "loss": 0.6184, "step": 71780 }, { "epoch": 0.72, "learning_rate": 0.00022320789473684208, "loss": 0.6086, "step": 71790 }, { "epoch": 0.72, "learning_rate": 0.00022312894736842103, "loss": 0.6171, "step": 71800 }, { "epoch": 0.72, "learning_rate": 0.00022305, "loss": 0.6145, "step": 71810 }, { "epoch": 0.72, "learning_rate": 0.00022297105263157895, "loss": 0.6011, "step": 71820 }, { "epoch": 0.72, "learning_rate": 0.00022289210526315787, "loss": 0.6098, "step": 71830 }, { "epoch": 0.72, "learning_rate": 0.00022281315789473682, "loss": 0.6053, "step": 71840 }, { "epoch": 0.72, "learning_rate": 0.00022273421052631577, "loss": 0.6126, "step": 71850 }, { "epoch": 0.72, "learning_rate": 0.00022265526315789472, "loss": 0.5937, "step": 71860 }, { "epoch": 0.72, "learning_rate": 0.00022257631578947366, "loss": 0.6048, "step": 71870 }, { "epoch": 0.72, "learning_rate": 0.0002224973684210526, "loss": 0.6046, "step": 71880 }, { "epoch": 0.72, "learning_rate": 0.00022241842105263153, "loss": 0.6002, "step": 71890 }, { "epoch": 0.72, "learning_rate": 0.00022233947368421054, "loss": 0.5936, "step": 71900 }, { "epoch": 0.72, "learning_rate": 0.00022226052631578946, "loss": 0.6053, "step": 71910 }, { "epoch": 0.72, "learning_rate": 0.0002221815789473684, "loss": 0.5948, "step": 71920 }, { "epoch": 0.72, "learning_rate": 0.00022210263157894735, "loss": 0.5856, "step": 71930 }, { "epoch": 0.72, "learning_rate": 0.0002220315789473684, "loss": 0.5845, "step": 71940 }, { "epoch": 0.72, "learning_rate": 0.00022195263157894735, "loss": 0.5855, "step": 71950 }, { "epoch": 0.72, "learning_rate": 0.00022187368421052627, "loss": 0.5879, "step": 71960 }, { "epoch": 0.72, "learning_rate": 0.00022179473684210525, "loss": 0.582, "step": 71970 }, { "epoch": 0.72, "learning_rate": 0.0002217157894736842, "loss": 0.5874, "step": 71980 }, { "epoch": 0.72, "learning_rate": 0.00022163684210526314, "loss": 0.5876, "step": 71990 }, { "epoch": 0.72, "learning_rate": 0.0002215578947368421, "loss": 0.5806, "step": 72000 }, { "epoch": 0.72, "learning_rate": 0.00022147894736842104, "loss": 0.5831, "step": 72010 }, { "epoch": 0.72, "learning_rate": 0.0002214, "loss": 0.58, "step": 72020 }, { "epoch": 0.72, "learning_rate": 0.0002213210526315789, "loss": 0.5838, "step": 72030 }, { "epoch": 0.72, "learning_rate": 0.00022124210526315786, "loss": 0.5866, "step": 72040 }, { "epoch": 0.72, "learning_rate": 0.0002211631578947368, "loss": 0.5803, "step": 72050 }, { "epoch": 0.72, "learning_rate": 0.00022108421052631578, "loss": 0.5848, "step": 72060 }, { "epoch": 0.72, "learning_rate": 0.00022100526315789473, "loss": 0.5816, "step": 72070 }, { "epoch": 0.72, "learning_rate": 0.00022092631578947368, "loss": 0.5852, "step": 72080 }, { "epoch": 0.72, "learning_rate": 0.00022084736842105263, "loss": 0.5868, "step": 72090 }, { "epoch": 0.72, "learning_rate": 0.00022076842105263157, "loss": 0.5966, "step": 72100 }, { "epoch": 0.72, "learning_rate": 0.0002206894736842105, "loss": 0.5947, "step": 72110 }, { "epoch": 0.72, "learning_rate": 0.00022061052631578944, "loss": 0.5872, "step": 72120 }, { "epoch": 0.72, "learning_rate": 0.0002205315789473684, "loss": 0.5674, "step": 72130 }, { "epoch": 0.72, "learning_rate": 0.00022045263157894734, "loss": 0.5797, "step": 72140 }, { "epoch": 0.72, "learning_rate": 0.0002203736842105263, "loss": 0.6056, "step": 72150 }, { "epoch": 0.72, "learning_rate": 0.00022029473684210526, "loss": 0.5937, "step": 72160 }, { "epoch": 0.72, "learning_rate": 0.0002202157894736842, "loss": 0.5895, "step": 72170 }, { "epoch": 0.72, "learning_rate": 0.00022013684210526313, "loss": 0.5959, "step": 72180 }, { "epoch": 0.72, "learning_rate": 0.00022005789473684208, "loss": 0.5931, "step": 72190 }, { "epoch": 0.72, "learning_rate": 0.00021997894736842103, "loss": 0.5929, "step": 72200 }, { "epoch": 0.72, "learning_rate": 0.00021989999999999998, "loss": 0.586, "step": 72210 }, { "epoch": 0.72, "learning_rate": 0.00021982105263157893, "loss": 0.5836, "step": 72220 }, { "epoch": 0.72, "learning_rate": 0.00021974210526315787, "loss": 0.6077, "step": 72230 }, { "epoch": 0.72, "learning_rate": 0.0002196631578947368, "loss": 0.5922, "step": 72240 }, { "epoch": 0.72, "learning_rate": 0.00021958421052631577, "loss": 0.597, "step": 72250 }, { "epoch": 0.72, "learning_rate": 0.00021950526315789472, "loss": 0.5902, "step": 72260 }, { "epoch": 0.72, "learning_rate": 0.00021942631578947367, "loss": 0.5983, "step": 72270 }, { "epoch": 0.72, "learning_rate": 0.00021934736842105262, "loss": 0.5861, "step": 72280 }, { "epoch": 0.72, "learning_rate": 0.00021926842105263156, "loss": 0.6096, "step": 72290 }, { "epoch": 0.72, "learning_rate": 0.0002191894736842105, "loss": 0.6019, "step": 72300 }, { "epoch": 0.72, "learning_rate": 0.00021911052631578943, "loss": 0.6009, "step": 72310 }, { "epoch": 0.72, "learning_rate": 0.00021903157894736838, "loss": 0.5955, "step": 72320 }, { "epoch": 0.72, "learning_rate": 0.00021895263157894733, "loss": 0.5925, "step": 72330 }, { "epoch": 0.72, "learning_rate": 0.0002188736842105263, "loss": 0.6106, "step": 72340 }, { "epoch": 0.72, "learning_rate": 0.00021879473684210525, "loss": 0.6014, "step": 72350 }, { "epoch": 0.72, "learning_rate": 0.0002187157894736842, "loss": 0.5975, "step": 72360 }, { "epoch": 0.72, "learning_rate": 0.00021863684210526315, "loss": 0.595, "step": 72370 }, { "epoch": 0.72, "learning_rate": 0.0002185578947368421, "loss": 0.5984, "step": 72380 }, { "epoch": 0.72, "learning_rate": 0.00021847894736842102, "loss": 0.5999, "step": 72390 }, { "epoch": 0.72, "learning_rate": 0.00021839999999999997, "loss": 0.5956, "step": 72400 }, { "epoch": 0.72, "learning_rate": 0.00021832105263157892, "loss": 0.5924, "step": 72410 }, { "epoch": 0.72, "learning_rate": 0.00021824210526315787, "loss": 0.5907, "step": 72420 }, { "epoch": 0.72, "learning_rate": 0.00021816315789473681, "loss": 0.5842, "step": 72430 }, { "epoch": 0.72, "learning_rate": 0.0002180842105263158, "loss": 0.5978, "step": 72440 }, { "epoch": 0.72, "learning_rate": 0.00021800526315789474, "loss": 0.5884, "step": 72450 }, { "epoch": 0.72, "learning_rate": 0.00021792631578947366, "loss": 0.5898, "step": 72460 }, { "epoch": 0.72, "learning_rate": 0.0002178473684210526, "loss": 0.5941, "step": 72470 }, { "epoch": 0.72, "learning_rate": 0.00021776842105263156, "loss": 0.6033, "step": 72480 }, { "epoch": 0.72, "learning_rate": 0.0002176894736842105, "loss": 0.6142, "step": 72490 }, { "epoch": 0.72, "learning_rate": 0.00021761052631578945, "loss": 0.5965, "step": 72500 }, { "epoch": 0.72, "eval_accuracy": 0.8749469840503773, "eval_loss": 0.576171875, "eval_runtime": 99.6694, "eval_samples_per_second": 802.653, "eval_steps_per_second": 1.575, "step": 72500 }, { "epoch": 0.73, "learning_rate": 0.0002175315789473684, "loss": 0.5961, "step": 72510 }, { "epoch": 0.73, "learning_rate": 0.00021745263157894732, "loss": 0.6001, "step": 72520 }, { "epoch": 0.73, "learning_rate": 0.00021737368421052632, "loss": 0.6151, "step": 72530 }, { "epoch": 0.73, "learning_rate": 0.00021729473684210525, "loss": 0.6152, "step": 72540 }, { "epoch": 0.73, "learning_rate": 0.0002172157894736842, "loss": 0.5974, "step": 72550 }, { "epoch": 0.73, "learning_rate": 0.00021713684210526314, "loss": 0.6027, "step": 72560 }, { "epoch": 0.73, "learning_rate": 0.0002170578947368421, "loss": 0.6109, "step": 72570 }, { "epoch": 0.73, "learning_rate": 0.00021697894736842104, "loss": 0.5968, "step": 72580 }, { "epoch": 0.73, "learning_rate": 0.0002169, "loss": 0.6037, "step": 72590 }, { "epoch": 0.73, "learning_rate": 0.0002168210526315789, "loss": 0.5886, "step": 72600 }, { "epoch": 0.73, "learning_rate": 0.00021674210526315786, "loss": 0.5981, "step": 72610 }, { "epoch": 0.73, "learning_rate": 0.00021666315789473683, "loss": 0.6046, "step": 72620 }, { "epoch": 0.73, "learning_rate": 0.00021658421052631578, "loss": 0.5896, "step": 72630 }, { "epoch": 0.73, "learning_rate": 0.00021650526315789473, "loss": 0.5975, "step": 72640 }, { "epoch": 0.73, "learning_rate": 0.00021642631578947368, "loss": 0.5955, "step": 72650 }, { "epoch": 0.73, "learning_rate": 0.00021634736842105262, "loss": 0.592, "step": 72660 }, { "epoch": 0.73, "learning_rate": 0.00021626842105263155, "loss": 0.6062, "step": 72670 }, { "epoch": 0.73, "learning_rate": 0.0002161894736842105, "loss": 0.596, "step": 72680 }, { "epoch": 0.73, "learning_rate": 0.00021611052631578944, "loss": 0.6005, "step": 72690 }, { "epoch": 0.73, "learning_rate": 0.0002160315789473684, "loss": 0.6064, "step": 72700 }, { "epoch": 0.73, "learning_rate": 0.00021595263157894737, "loss": 0.5907, "step": 72710 }, { "epoch": 0.73, "learning_rate": 0.00021587368421052631, "loss": 0.5921, "step": 72720 }, { "epoch": 0.73, "learning_rate": 0.00021579473684210526, "loss": 0.6098, "step": 72730 }, { "epoch": 0.73, "learning_rate": 0.00021571578947368418, "loss": 0.5913, "step": 72740 }, { "epoch": 0.73, "learning_rate": 0.00021563684210526313, "loss": 0.5979, "step": 72750 }, { "epoch": 0.73, "learning_rate": 0.00021555789473684208, "loss": 0.6038, "step": 72760 }, { "epoch": 0.73, "learning_rate": 0.00021547894736842103, "loss": 0.6031, "step": 72770 }, { "epoch": 0.73, "learning_rate": 0.00021539999999999998, "loss": 0.6046, "step": 72780 }, { "epoch": 0.73, "learning_rate": 0.00021532105263157893, "loss": 0.6048, "step": 72790 }, { "epoch": 0.73, "learning_rate": 0.00021524210526315785, "loss": 0.5917, "step": 72800 }, { "epoch": 0.73, "learning_rate": 0.00021516315789473685, "loss": 0.5912, "step": 72810 }, { "epoch": 0.73, "learning_rate": 0.00021508421052631577, "loss": 0.6047, "step": 72820 }, { "epoch": 0.73, "learning_rate": 0.00021500526315789472, "loss": 0.5936, "step": 72830 }, { "epoch": 0.73, "learning_rate": 0.00021492631578947367, "loss": 0.5931, "step": 72840 }, { "epoch": 0.73, "learning_rate": 0.00021484736842105262, "loss": 0.5977, "step": 72850 }, { "epoch": 0.73, "learning_rate": 0.00021476842105263156, "loss": 0.6, "step": 72860 }, { "epoch": 0.73, "learning_rate": 0.0002146894736842105, "loss": 0.5975, "step": 72870 }, { "epoch": 0.73, "learning_rate": 0.00021461052631578943, "loss": 0.5967, "step": 72880 }, { "epoch": 0.73, "learning_rate": 0.00021453157894736838, "loss": 0.6096, "step": 72890 }, { "epoch": 0.73, "learning_rate": 0.00021445263157894736, "loss": 0.5931, "step": 72900 }, { "epoch": 0.73, "learning_rate": 0.0002143736842105263, "loss": 0.5879, "step": 72910 }, { "epoch": 0.73, "learning_rate": 0.00021429473684210525, "loss": 0.5894, "step": 72920 }, { "epoch": 0.73, "learning_rate": 0.0002142157894736842, "loss": 0.5986, "step": 72930 }, { "epoch": 0.73, "learning_rate": 0.00021414473684210522, "loss": 0.585, "step": 72940 }, { "epoch": 0.73, "learning_rate": 0.00021406578947368417, "loss": 0.5892, "step": 72950 }, { "epoch": 0.73, "learning_rate": 0.00021398684210526312, "loss": 0.5951, "step": 72960 }, { "epoch": 0.73, "learning_rate": 0.0002139078947368421, "loss": 0.596, "step": 72970 }, { "epoch": 0.73, "learning_rate": 0.00021382894736842104, "loss": 0.596, "step": 72980 }, { "epoch": 0.73, "learning_rate": 0.00021375, "loss": 0.5977, "step": 72990 }, { "epoch": 0.73, "learning_rate": 0.00021367105263157894, "loss": 0.593, "step": 73000 }, { "epoch": 0.73, "learning_rate": 0.0002135921052631579, "loss": 0.5901, "step": 73010 }, { "epoch": 0.73, "learning_rate": 0.0002135131578947368, "loss": 0.5919, "step": 73020 }, { "epoch": 0.73, "learning_rate": 0.00021343421052631576, "loss": 0.5984, "step": 73030 }, { "epoch": 0.73, "learning_rate": 0.0002133552631578947, "loss": 0.597, "step": 73040 }, { "epoch": 0.73, "learning_rate": 0.00021327631578947365, "loss": 0.6008, "step": 73050 }, { "epoch": 0.73, "learning_rate": 0.00021319736842105263, "loss": 0.5879, "step": 73060 }, { "epoch": 0.73, "learning_rate": 0.00021311842105263158, "loss": 0.5941, "step": 73070 }, { "epoch": 0.73, "learning_rate": 0.00021303947368421053, "loss": 0.588, "step": 73080 }, { "epoch": 0.73, "learning_rate": 0.00021296052631578945, "loss": 0.6014, "step": 73090 }, { "epoch": 0.73, "learning_rate": 0.0002128815789473684, "loss": 0.6001, "step": 73100 }, { "epoch": 0.73, "learning_rate": 0.00021280263157894734, "loss": 0.5957, "step": 73110 }, { "epoch": 0.73, "learning_rate": 0.0002127236842105263, "loss": 0.5876, "step": 73120 }, { "epoch": 0.73, "learning_rate": 0.00021264473684210524, "loss": 0.59, "step": 73130 }, { "epoch": 0.73, "learning_rate": 0.0002125657894736842, "loss": 0.6049, "step": 73140 }, { "epoch": 0.73, "learning_rate": 0.00021248684210526316, "loss": 0.597, "step": 73150 }, { "epoch": 0.73, "learning_rate": 0.0002124078947368421, "loss": 0.604, "step": 73160 }, { "epoch": 0.73, "learning_rate": 0.00021232894736842103, "loss": 0.6002, "step": 73170 }, { "epoch": 0.73, "learning_rate": 0.00021224999999999998, "loss": 0.5974, "step": 73180 }, { "epoch": 0.73, "learning_rate": 0.00021217105263157893, "loss": 0.5976, "step": 73190 }, { "epoch": 0.73, "learning_rate": 0.00021209210526315788, "loss": 0.5933, "step": 73200 }, { "epoch": 0.73, "learning_rate": 0.00021201315789473683, "loss": 0.5826, "step": 73210 }, { "epoch": 0.73, "learning_rate": 0.00021193421052631578, "loss": 0.5848, "step": 73220 }, { "epoch": 0.73, "learning_rate": 0.0002118552631578947, "loss": 0.5892, "step": 73230 }, { "epoch": 0.73, "learning_rate": 0.00021177631578947364, "loss": 0.5816, "step": 73240 }, { "epoch": 0.73, "learning_rate": 0.00021169736842105262, "loss": 0.5683, "step": 73250 }, { "epoch": 0.73, "learning_rate": 0.00021161842105263157, "loss": 0.5969, "step": 73260 }, { "epoch": 0.73, "learning_rate": 0.00021154736842105262, "loss": 0.5983, "step": 73270 }, { "epoch": 0.73, "learning_rate": 0.00021146842105263156, "loss": 0.5935, "step": 73280 }, { "epoch": 0.73, "learning_rate": 0.00021138947368421049, "loss": 0.6056, "step": 73290 }, { "epoch": 0.73, "learning_rate": 0.00021131052631578943, "loss": 0.5983, "step": 73300 }, { "epoch": 0.73, "learning_rate": 0.0002112315789473684, "loss": 0.6023, "step": 73310 }, { "epoch": 0.73, "learning_rate": 0.00021115263157894736, "loss": 0.591, "step": 73320 }, { "epoch": 0.73, "learning_rate": 0.0002110736842105263, "loss": 0.5914, "step": 73330 }, { "epoch": 0.73, "learning_rate": 0.00021099473684210525, "loss": 0.5944, "step": 73340 }, { "epoch": 0.73, "learning_rate": 0.0002109157894736842, "loss": 0.5988, "step": 73350 }, { "epoch": 0.73, "learning_rate": 0.00021083684210526312, "loss": 0.587, "step": 73360 }, { "epoch": 0.73, "learning_rate": 0.00021075789473684207, "loss": 0.5785, "step": 73370 }, { "epoch": 0.73, "learning_rate": 0.00021067894736842102, "loss": 0.6024, "step": 73380 }, { "epoch": 0.73, "learning_rate": 0.00021059999999999997, "loss": 0.5973, "step": 73390 }, { "epoch": 0.73, "learning_rate": 0.00021052105263157892, "loss": 0.5994, "step": 73400 }, { "epoch": 0.73, "learning_rate": 0.0002104421052631579, "loss": 0.5964, "step": 73410 }, { "epoch": 0.73, "learning_rate": 0.00021036315789473684, "loss": 0.5854, "step": 73420 }, { "epoch": 0.73, "learning_rate": 0.0002102842105263158, "loss": 0.5947, "step": 73430 }, { "epoch": 0.73, "learning_rate": 0.0002102052631578947, "loss": 0.5925, "step": 73440 }, { "epoch": 0.73, "learning_rate": 0.00021012631578947366, "loss": 0.5833, "step": 73450 }, { "epoch": 0.73, "learning_rate": 0.0002100473684210526, "loss": 0.5844, "step": 73460 }, { "epoch": 0.73, "learning_rate": 0.00020996842105263156, "loss": 0.5915, "step": 73470 }, { "epoch": 0.73, "learning_rate": 0.0002098894736842105, "loss": 0.5919, "step": 73480 }, { "epoch": 0.73, "learning_rate": 0.00020981052631578945, "loss": 0.5949, "step": 73490 }, { "epoch": 0.73, "learning_rate": 0.00020973157894736843, "loss": 0.5937, "step": 73500 }, { "epoch": 0.74, "learning_rate": 0.00020965263157894735, "loss": 0.5894, "step": 73510 }, { "epoch": 0.74, "learning_rate": 0.0002095736842105263, "loss": 0.5986, "step": 73520 }, { "epoch": 0.74, "learning_rate": 0.00020949473684210525, "loss": 0.5838, "step": 73530 }, { "epoch": 0.74, "learning_rate": 0.0002094157894736842, "loss": 0.5831, "step": 73540 }, { "epoch": 0.74, "learning_rate": 0.00020933684210526314, "loss": 0.5878, "step": 73550 }, { "epoch": 0.74, "learning_rate": 0.0002092578947368421, "loss": 0.5914, "step": 73560 }, { "epoch": 0.74, "learning_rate": 0.000209178947368421, "loss": 0.5783, "step": 73570 }, { "epoch": 0.74, "learning_rate": 0.00020909999999999996, "loss": 0.599, "step": 73580 }, { "epoch": 0.74, "learning_rate": 0.00020902105263157894, "loss": 0.59, "step": 73590 }, { "epoch": 0.74, "learning_rate": 0.00020894210526315788, "loss": 0.5932, "step": 73600 }, { "epoch": 0.74, "learning_rate": 0.00020886315789473683, "loss": 0.5887, "step": 73610 }, { "epoch": 0.74, "learning_rate": 0.00020878421052631578, "loss": 0.5971, "step": 73620 }, { "epoch": 0.74, "learning_rate": 0.00020870526315789473, "loss": 0.5933, "step": 73630 }, { "epoch": 0.74, "learning_rate": 0.00020862631578947368, "loss": 0.5931, "step": 73640 }, { "epoch": 0.74, "learning_rate": 0.0002085473684210526, "loss": 0.5976, "step": 73650 }, { "epoch": 0.74, "learning_rate": 0.00020846842105263155, "loss": 0.5878, "step": 73660 }, { "epoch": 0.74, "learning_rate": 0.0002083894736842105, "loss": 0.5901, "step": 73670 }, { "epoch": 0.74, "learning_rate": 0.00020831052631578944, "loss": 0.5919, "step": 73680 }, { "epoch": 0.74, "learning_rate": 0.00020823157894736842, "loss": 0.6052, "step": 73690 }, { "epoch": 0.74, "learning_rate": 0.00020815263157894737, "loss": 0.5924, "step": 73700 }, { "epoch": 0.74, "learning_rate": 0.00020807368421052631, "loss": 0.5885, "step": 73710 }, { "epoch": 0.74, "learning_rate": 0.00020799473684210524, "loss": 0.596, "step": 73720 }, { "epoch": 0.74, "learning_rate": 0.00020791578947368418, "loss": 0.5957, "step": 73730 }, { "epoch": 0.74, "learning_rate": 0.00020783684210526313, "loss": 0.5959, "step": 73740 }, { "epoch": 0.74, "learning_rate": 0.00020775789473684208, "loss": 0.5915, "step": 73750 }, { "epoch": 0.74, "learning_rate": 0.00020767894736842103, "loss": 0.5849, "step": 73760 }, { "epoch": 0.74, "learning_rate": 0.00020759999999999998, "loss": 0.6014, "step": 73770 }, { "epoch": 0.74, "learning_rate": 0.00020752105263157895, "loss": 0.5915, "step": 73780 }, { "epoch": 0.74, "learning_rate": 0.00020744210526315787, "loss": 0.5988, "step": 73790 }, { "epoch": 0.74, "learning_rate": 0.00020736315789473682, "loss": 0.5941, "step": 73800 }, { "epoch": 0.74, "learning_rate": 0.00020728421052631577, "loss": 0.5902, "step": 73810 }, { "epoch": 0.74, "learning_rate": 0.00020720526315789472, "loss": 0.5847, "step": 73820 }, { "epoch": 0.74, "learning_rate": 0.00020712631578947367, "loss": 0.5897, "step": 73830 }, { "epoch": 0.74, "learning_rate": 0.00020704736842105262, "loss": 0.5903, "step": 73840 }, { "epoch": 0.74, "learning_rate": 0.00020696842105263154, "loss": 0.592, "step": 73850 }, { "epoch": 0.74, "learning_rate": 0.00020688947368421049, "loss": 0.5881, "step": 73860 }, { "epoch": 0.74, "learning_rate": 0.00020681052631578946, "loss": 0.5812, "step": 73870 }, { "epoch": 0.74, "learning_rate": 0.0002067315789473684, "loss": 0.5876, "step": 73880 }, { "epoch": 0.74, "learning_rate": 0.00020665263157894736, "loss": 0.5995, "step": 73890 }, { "epoch": 0.74, "learning_rate": 0.0002065736842105263, "loss": 0.5869, "step": 73900 }, { "epoch": 0.74, "learning_rate": 0.00020649473684210525, "loss": 0.5843, "step": 73910 }, { "epoch": 0.74, "learning_rate": 0.0002064157894736842, "loss": 0.5949, "step": 73920 }, { "epoch": 0.74, "learning_rate": 0.00020633684210526312, "loss": 0.6077, "step": 73930 }, { "epoch": 0.74, "learning_rate": 0.00020625789473684207, "loss": 0.5957, "step": 73940 }, { "epoch": 0.74, "learning_rate": 0.00020617894736842102, "loss": 0.589, "step": 73950 }, { "epoch": 0.74, "learning_rate": 0.0002061, "loss": 0.6004, "step": 73960 }, { "epoch": 0.74, "learning_rate": 0.00020602105263157894, "loss": 0.596, "step": 73970 }, { "epoch": 0.74, "learning_rate": 0.0002059421052631579, "loss": 0.5931, "step": 73980 }, { "epoch": 0.74, "learning_rate": 0.00020586315789473684, "loss": 0.5855, "step": 73990 }, { "epoch": 0.74, "learning_rate": 0.00020578421052631576, "loss": 0.5928, "step": 74000 }, { "epoch": 0.74, "learning_rate": 0.0002057052631578947, "loss": 0.5871, "step": 74010 }, { "epoch": 0.74, "learning_rate": 0.00020562631578947366, "loss": 0.597, "step": 74020 }, { "epoch": 0.74, "learning_rate": 0.0002055473684210526, "loss": 0.5806, "step": 74030 }, { "epoch": 0.74, "learning_rate": 0.00020546842105263155, "loss": 0.5913, "step": 74040 }, { "epoch": 0.74, "learning_rate": 0.0002053894736842105, "loss": 0.587, "step": 74050 }, { "epoch": 0.74, "learning_rate": 0.00020531052631578948, "loss": 0.5958, "step": 74060 }, { "epoch": 0.74, "learning_rate": 0.00020523157894736843, "loss": 0.5976, "step": 74070 }, { "epoch": 0.74, "learning_rate": 0.00020515263157894735, "loss": 0.5926, "step": 74080 }, { "epoch": 0.74, "learning_rate": 0.0002050736842105263, "loss": 0.5866, "step": 74090 }, { "epoch": 0.74, "learning_rate": 0.00020499473684210524, "loss": 0.5865, "step": 74100 }, { "epoch": 0.74, "learning_rate": 0.0002049157894736842, "loss": 0.596, "step": 74110 }, { "epoch": 0.74, "learning_rate": 0.00020483684210526314, "loss": 0.5871, "step": 74120 }, { "epoch": 0.74, "learning_rate": 0.0002047578947368421, "loss": 0.5887, "step": 74130 }, { "epoch": 0.74, "learning_rate": 0.000204678947368421, "loss": 0.58, "step": 74140 }, { "epoch": 0.74, "learning_rate": 0.00020459999999999999, "loss": 0.5958, "step": 74150 }, { "epoch": 0.74, "learning_rate": 0.00020452105263157893, "loss": 0.5887, "step": 74160 }, { "epoch": 0.74, "learning_rate": 0.00020444210526315788, "loss": 0.6021, "step": 74170 }, { "epoch": 0.74, "learning_rate": 0.00020436315789473683, "loss": 0.5973, "step": 74180 }, { "epoch": 0.74, "learning_rate": 0.00020428421052631578, "loss": 0.5917, "step": 74190 }, { "epoch": 0.74, "learning_rate": 0.00020420526315789473, "loss": 0.5885, "step": 74200 }, { "epoch": 0.74, "learning_rate": 0.00020412631578947365, "loss": 0.5923, "step": 74210 }, { "epoch": 0.74, "learning_rate": 0.0002040473684210526, "loss": 0.5991, "step": 74220 }, { "epoch": 0.74, "learning_rate": 0.00020396842105263155, "loss": 0.594, "step": 74230 }, { "epoch": 0.74, "learning_rate": 0.00020388947368421052, "loss": 0.5846, "step": 74240 }, { "epoch": 0.74, "learning_rate": 0.00020381052631578947, "loss": 0.5927, "step": 74250 }, { "epoch": 0.74, "learning_rate": 0.00020373157894736842, "loss": 0.5902, "step": 74260 }, { "epoch": 0.74, "learning_rate": 0.00020365263157894737, "loss": 0.5959, "step": 74270 }, { "epoch": 0.74, "learning_rate": 0.0002035736842105263, "loss": 0.595, "step": 74280 }, { "epoch": 0.74, "learning_rate": 0.00020349473684210524, "loss": 0.5999, "step": 74290 }, { "epoch": 0.74, "learning_rate": 0.00020341578947368418, "loss": 0.5981, "step": 74300 }, { "epoch": 0.74, "learning_rate": 0.00020333684210526313, "loss": 0.5802, "step": 74310 }, { "epoch": 0.74, "learning_rate": 0.00020325789473684208, "loss": 0.5898, "step": 74320 }, { "epoch": 0.74, "learning_rate": 0.00020317894736842103, "loss": 0.5907, "step": 74330 }, { "epoch": 0.74, "learning_rate": 0.0002031, "loss": 0.5945, "step": 74340 }, { "epoch": 0.74, "learning_rate": 0.00020302105263157895, "loss": 0.5891, "step": 74350 }, { "epoch": 0.74, "learning_rate": 0.00020294210526315787, "loss": 0.5939, "step": 74360 }, { "epoch": 0.74, "learning_rate": 0.00020286315789473682, "loss": 0.5917, "step": 74370 }, { "epoch": 0.74, "learning_rate": 0.00020278421052631577, "loss": 0.6046, "step": 74380 }, { "epoch": 0.74, "learning_rate": 0.00020270526315789472, "loss": 0.5886, "step": 74390 }, { "epoch": 0.74, "learning_rate": 0.00020262631578947367, "loss": 0.5927, "step": 74400 }, { "epoch": 0.74, "learning_rate": 0.00020254736842105261, "loss": 0.5799, "step": 74410 }, { "epoch": 0.74, "learning_rate": 0.00020246842105263154, "loss": 0.5988, "step": 74420 }, { "epoch": 0.74, "learning_rate": 0.0002023894736842105, "loss": 0.5747, "step": 74430 }, { "epoch": 0.74, "learning_rate": 0.00020231052631578946, "loss": 0.6025, "step": 74440 }, { "epoch": 0.74, "learning_rate": 0.0002022315789473684, "loss": 0.5961, "step": 74450 }, { "epoch": 0.74, "learning_rate": 0.00020215263157894736, "loss": 0.6067, "step": 74460 }, { "epoch": 0.74, "learning_rate": 0.0002020736842105263, "loss": 0.5855, "step": 74470 }, { "epoch": 0.74, "learning_rate": 0.00020199473684210525, "loss": 0.603, "step": 74480 }, { "epoch": 0.74, "learning_rate": 0.00020191578947368417, "loss": 0.5925, "step": 74490 }, { "epoch": 0.74, "learning_rate": 0.00020183684210526312, "loss": 0.5881, "step": 74500 }, { "epoch": 0.75, "learning_rate": 0.00020175789473684207, "loss": 0.5895, "step": 74510 }, { "epoch": 0.75, "learning_rate": 0.00020167894736842105, "loss": 0.5814, "step": 74520 }, { "epoch": 0.75, "learning_rate": 0.0002016, "loss": 0.5903, "step": 74530 }, { "epoch": 0.75, "learning_rate": 0.00020152105263157894, "loss": 0.5922, "step": 74540 }, { "epoch": 0.75, "learning_rate": 0.0002014421052631579, "loss": 0.5976, "step": 74550 }, { "epoch": 0.75, "learning_rate": 0.00020136315789473684, "loss": 0.5997, "step": 74560 }, { "epoch": 0.75, "learning_rate": 0.00020128421052631576, "loss": 0.5949, "step": 74570 }, { "epoch": 0.75, "learning_rate": 0.0002012052631578947, "loss": 0.5946, "step": 74580 }, { "epoch": 0.75, "learning_rate": 0.00020112631578947366, "loss": 0.5893, "step": 74590 }, { "epoch": 0.75, "learning_rate": 0.0002010473684210526, "loss": 0.5897, "step": 74600 }, { "epoch": 0.75, "learning_rate": 0.00020096842105263155, "loss": 0.5852, "step": 74610 }, { "epoch": 0.75, "learning_rate": 0.00020088947368421053, "loss": 0.5922, "step": 74620 }, { "epoch": 0.75, "learning_rate": 0.00020081052631578948, "loss": 0.5941, "step": 74630 }, { "epoch": 0.75, "learning_rate": 0.0002007315789473684, "loss": 0.5982, "step": 74640 }, { "epoch": 0.75, "learning_rate": 0.00020065263157894735, "loss": 0.5922, "step": 74650 }, { "epoch": 0.75, "learning_rate": 0.0002005736842105263, "loss": 0.5886, "step": 74660 }, { "epoch": 0.75, "learning_rate": 0.00020049473684210524, "loss": 0.5966, "step": 74670 }, { "epoch": 0.75, "learning_rate": 0.0002004157894736842, "loss": 0.5947, "step": 74680 }, { "epoch": 0.75, "learning_rate": 0.00020033684210526314, "loss": 0.601, "step": 74690 }, { "epoch": 0.75, "learning_rate": 0.00020025789473684206, "loss": 0.5969, "step": 74700 }, { "epoch": 0.75, "learning_rate": 0.00020017894736842104, "loss": 0.593, "step": 74710 }, { "epoch": 0.75, "learning_rate": 0.00020009999999999998, "loss": 0.5953, "step": 74720 }, { "epoch": 0.75, "learning_rate": 0.00020002105263157893, "loss": 0.5841, "step": 74730 }, { "epoch": 0.75, "learning_rate": 0.00019994210526315788, "loss": 0.5884, "step": 74740 }, { "epoch": 0.75, "learning_rate": 0.00019986315789473683, "loss": 0.5938, "step": 74750 }, { "epoch": 0.75, "learning_rate": 0.00019978421052631578, "loss": 0.588, "step": 74760 }, { "epoch": 0.75, "learning_rate": 0.0001997052631578947, "loss": 0.597, "step": 74770 }, { "epoch": 0.75, "learning_rate": 0.00019962631578947365, "loss": 0.602, "step": 74780 }, { "epoch": 0.75, "learning_rate": 0.0001995473684210526, "loss": 0.6009, "step": 74790 }, { "epoch": 0.75, "learning_rate": 0.00019946842105263157, "loss": 0.5925, "step": 74800 }, { "epoch": 0.75, "learning_rate": 0.00019938947368421052, "loss": 0.6012, "step": 74810 }, { "epoch": 0.75, "learning_rate": 0.00019931052631578947, "loss": 0.6001, "step": 74820 }, { "epoch": 0.75, "learning_rate": 0.00019923157894736842, "loss": 0.5994, "step": 74830 }, { "epoch": 0.75, "learning_rate": 0.00019915263157894736, "loss": 0.5961, "step": 74840 }, { "epoch": 0.75, "learning_rate": 0.00019907368421052629, "loss": 0.6031, "step": 74850 }, { "epoch": 0.75, "learning_rate": 0.00019899473684210523, "loss": 0.5944, "step": 74860 }, { "epoch": 0.75, "learning_rate": 0.00019891578947368418, "loss": 0.5957, "step": 74870 }, { "epoch": 0.75, "learning_rate": 0.00019883684210526313, "loss": 0.5954, "step": 74880 }, { "epoch": 0.75, "learning_rate": 0.00019875789473684208, "loss": 0.5925, "step": 74890 }, { "epoch": 0.75, "learning_rate": 0.00019867894736842105, "loss": 0.5936, "step": 74900 }, { "epoch": 0.75, "learning_rate": 0.0001986, "loss": 0.6004, "step": 74910 }, { "epoch": 0.75, "learning_rate": 0.00019852105263157892, "loss": 0.5987, "step": 74920 }, { "epoch": 0.75, "learning_rate": 0.00019844210526315787, "loss": 0.5971, "step": 74930 }, { "epoch": 0.75, "learning_rate": 0.00019836315789473682, "loss": 0.5983, "step": 74940 }, { "epoch": 0.75, "learning_rate": 0.00019828421052631577, "loss": 0.5889, "step": 74950 }, { "epoch": 0.75, "learning_rate": 0.00019820526315789472, "loss": 0.5969, "step": 74960 }, { "epoch": 0.75, "learning_rate": 0.00019812631578947367, "loss": 0.6017, "step": 74970 }, { "epoch": 0.75, "learning_rate": 0.0001980473684210526, "loss": 0.5875, "step": 74980 }, { "epoch": 0.75, "learning_rate": 0.0001979684210526316, "loss": 0.5982, "step": 74990 }, { "epoch": 0.75, "learning_rate": 0.0001978894736842105, "loss": 0.5837, "step": 75000 }, { "epoch": 0.75, "eval_accuracy": 0.8770046977807257, "eval_loss": 0.56396484375, "eval_runtime": 98.5907, "eval_samples_per_second": 811.436, "eval_steps_per_second": 1.592, "step": 75000 }, { "epoch": 0.75, "learning_rate": 0.00019781052631578946, "loss": 0.5881, "step": 75010 }, { "epoch": 0.75, "learning_rate": 0.0001977315789473684, "loss": 0.5946, "step": 75020 }, { "epoch": 0.75, "learning_rate": 0.00019765263157894736, "loss": 0.588, "step": 75030 }, { "epoch": 0.75, "learning_rate": 0.0001975736842105263, "loss": 0.5886, "step": 75040 }, { "epoch": 0.75, "learning_rate": 0.00019749473684210525, "loss": 0.5792, "step": 75050 }, { "epoch": 0.75, "learning_rate": 0.00019741578947368417, "loss": 0.5923, "step": 75060 }, { "epoch": 0.75, "learning_rate": 0.00019733684210526312, "loss": 0.6011, "step": 75070 }, { "epoch": 0.75, "learning_rate": 0.0001972578947368421, "loss": 0.5931, "step": 75080 }, { "epoch": 0.75, "learning_rate": 0.00019717894736842105, "loss": 0.5948, "step": 75090 }, { "epoch": 0.75, "learning_rate": 0.0001971, "loss": 0.5909, "step": 75100 }, { "epoch": 0.75, "learning_rate": 0.00019702105263157894, "loss": 0.5982, "step": 75110 }, { "epoch": 0.75, "learning_rate": 0.0001969421052631579, "loss": 0.5944, "step": 75120 }, { "epoch": 0.75, "learning_rate": 0.0001968631578947368, "loss": 0.5918, "step": 75130 }, { "epoch": 0.75, "learning_rate": 0.00019678421052631576, "loss": 0.5985, "step": 75140 }, { "epoch": 0.75, "learning_rate": 0.0001967052631578947, "loss": 0.5968, "step": 75150 }, { "epoch": 0.75, "learning_rate": 0.00019662631578947366, "loss": 0.5939, "step": 75160 }, { "epoch": 0.75, "learning_rate": 0.0001965473684210526, "loss": 0.5976, "step": 75170 }, { "epoch": 0.75, "learning_rate": 0.00019646842105263158, "loss": 0.5988, "step": 75180 }, { "epoch": 0.75, "learning_rate": 0.00019638947368421053, "loss": 0.6072, "step": 75190 }, { "epoch": 0.75, "learning_rate": 0.00019631052631578945, "loss": 0.5909, "step": 75200 }, { "epoch": 0.75, "learning_rate": 0.0001962315789473684, "loss": 0.5811, "step": 75210 }, { "epoch": 0.75, "learning_rate": 0.00019615263157894735, "loss": 0.5932, "step": 75220 }, { "epoch": 0.75, "learning_rate": 0.0001960736842105263, "loss": 0.5838, "step": 75230 }, { "epoch": 0.75, "learning_rate": 0.00019599473684210524, "loss": 0.5789, "step": 75240 }, { "epoch": 0.75, "learning_rate": 0.0001959157894736842, "loss": 0.5852, "step": 75250 }, { "epoch": 0.75, "learning_rate": 0.0001958368421052631, "loss": 0.5922, "step": 75260 }, { "epoch": 0.75, "learning_rate": 0.00019575789473684211, "loss": 0.5971, "step": 75270 }, { "epoch": 0.75, "learning_rate": 0.00019568684210526314, "loss": 0.59, "step": 75280 }, { "epoch": 0.75, "learning_rate": 0.00019560789473684208, "loss": 0.5934, "step": 75290 }, { "epoch": 0.75, "learning_rate": 0.00019552894736842103, "loss": 0.5961, "step": 75300 }, { "epoch": 0.75, "learning_rate": 0.00019544999999999998, "loss": 0.5928, "step": 75310 }, { "epoch": 0.75, "learning_rate": 0.00019537105263157893, "loss": 0.5838, "step": 75320 }, { "epoch": 0.75, "learning_rate": 0.0001952921052631579, "loss": 0.5978, "step": 75330 }, { "epoch": 0.75, "learning_rate": 0.00019521315789473683, "loss": 0.5994, "step": 75340 }, { "epoch": 0.75, "learning_rate": 0.00019513421052631577, "loss": 0.5816, "step": 75350 }, { "epoch": 0.75, "learning_rate": 0.00019505526315789472, "loss": 0.6051, "step": 75360 }, { "epoch": 0.75, "learning_rate": 0.00019497631578947367, "loss": 0.5852, "step": 75370 }, { "epoch": 0.75, "learning_rate": 0.00019489736842105262, "loss": 0.5833, "step": 75380 }, { "epoch": 0.75, "learning_rate": 0.00019481842105263157, "loss": 0.5901, "step": 75390 }, { "epoch": 0.75, "learning_rate": 0.0001947394736842105, "loss": 0.5961, "step": 75400 }, { "epoch": 0.75, "learning_rate": 0.00019466052631578944, "loss": 0.5979, "step": 75410 }, { "epoch": 0.75, "learning_rate": 0.00019458157894736838, "loss": 0.5892, "step": 75420 }, { "epoch": 0.75, "learning_rate": 0.00019450263157894736, "loss": 0.6031, "step": 75430 }, { "epoch": 0.75, "learning_rate": 0.0001944236842105263, "loss": 0.592, "step": 75440 }, { "epoch": 0.75, "learning_rate": 0.00019434473684210526, "loss": 0.6006, "step": 75450 }, { "epoch": 0.75, "learning_rate": 0.0001942657894736842, "loss": 0.5834, "step": 75460 }, { "epoch": 0.75, "learning_rate": 0.00019418684210526315, "loss": 0.5959, "step": 75470 }, { "epoch": 0.75, "learning_rate": 0.00019410789473684207, "loss": 0.5937, "step": 75480 }, { "epoch": 0.75, "learning_rate": 0.00019402894736842102, "loss": 0.5794, "step": 75490 }, { "epoch": 0.76, "learning_rate": 0.00019394999999999997, "loss": 0.589, "step": 75500 }, { "epoch": 0.76, "learning_rate": 0.00019387105263157892, "loss": 0.5825, "step": 75510 }, { "epoch": 0.76, "learning_rate": 0.0001937921052631579, "loss": 0.5892, "step": 75520 }, { "epoch": 0.76, "learning_rate": 0.00019371315789473684, "loss": 0.5842, "step": 75530 }, { "epoch": 0.76, "learning_rate": 0.0001936342105263158, "loss": 0.5992, "step": 75540 }, { "epoch": 0.76, "learning_rate": 0.0001935552631578947, "loss": 0.5851, "step": 75550 }, { "epoch": 0.76, "learning_rate": 0.00019347631578947366, "loss": 0.5842, "step": 75560 }, { "epoch": 0.76, "learning_rate": 0.0001933973684210526, "loss": 0.5936, "step": 75570 }, { "epoch": 0.76, "learning_rate": 0.00019331842105263156, "loss": 0.58, "step": 75580 }, { "epoch": 0.76, "learning_rate": 0.0001932394736842105, "loss": 0.5877, "step": 75590 }, { "epoch": 0.76, "learning_rate": 0.00019316052631578945, "loss": 0.5899, "step": 75600 }, { "epoch": 0.76, "learning_rate": 0.00019308157894736843, "loss": 0.5859, "step": 75610 }, { "epoch": 0.76, "learning_rate": 0.00019300263157894738, "loss": 0.5906, "step": 75620 }, { "epoch": 0.76, "learning_rate": 0.0001929236842105263, "loss": 0.6013, "step": 75630 }, { "epoch": 0.76, "learning_rate": 0.00019284473684210525, "loss": 0.5922, "step": 75640 }, { "epoch": 0.76, "learning_rate": 0.0001927657894736842, "loss": 0.593, "step": 75650 }, { "epoch": 0.76, "learning_rate": 0.00019268684210526314, "loss": 0.5846, "step": 75660 }, { "epoch": 0.76, "learning_rate": 0.0001926078947368421, "loss": 0.6047, "step": 75670 }, { "epoch": 0.76, "learning_rate": 0.00019252894736842104, "loss": 0.5999, "step": 75680 }, { "epoch": 0.76, "learning_rate": 0.00019244999999999996, "loss": 0.5929, "step": 75690 }, { "epoch": 0.76, "learning_rate": 0.0001923710526315789, "loss": 0.5971, "step": 75700 }, { "epoch": 0.76, "learning_rate": 0.00019229210526315789, "loss": 0.5924, "step": 75710 }, { "epoch": 0.76, "learning_rate": 0.00019221315789473683, "loss": 0.5932, "step": 75720 }, { "epoch": 0.76, "learning_rate": 0.00019213421052631578, "loss": 0.5953, "step": 75730 }, { "epoch": 0.76, "learning_rate": 0.00019205526315789473, "loss": 0.5933, "step": 75740 }, { "epoch": 0.76, "learning_rate": 0.00019197631578947368, "loss": 0.5838, "step": 75750 }, { "epoch": 0.76, "learning_rate": 0.0001918973684210526, "loss": 0.5949, "step": 75760 }, { "epoch": 0.76, "learning_rate": 0.00019181842105263155, "loss": 0.583, "step": 75770 }, { "epoch": 0.76, "learning_rate": 0.0001917394736842105, "loss": 0.5799, "step": 75780 }, { "epoch": 0.76, "learning_rate": 0.00019166052631578944, "loss": 0.5821, "step": 75790 }, { "epoch": 0.76, "learning_rate": 0.00019158157894736842, "loss": 0.582, "step": 75800 }, { "epoch": 0.76, "learning_rate": 0.00019150263157894737, "loss": 0.5926, "step": 75810 }, { "epoch": 0.76, "learning_rate": 0.00019142368421052632, "loss": 0.5814, "step": 75820 }, { "epoch": 0.76, "learning_rate": 0.00019134473684210524, "loss": 0.5775, "step": 75830 }, { "epoch": 0.76, "learning_rate": 0.00019126578947368419, "loss": 0.589, "step": 75840 }, { "epoch": 0.76, "learning_rate": 0.00019118684210526313, "loss": 0.5776, "step": 75850 }, { "epoch": 0.76, "learning_rate": 0.00019110789473684208, "loss": 0.5853, "step": 75860 }, { "epoch": 0.76, "learning_rate": 0.00019102894736842103, "loss": 0.5883, "step": 75870 }, { "epoch": 0.76, "learning_rate": 0.00019094999999999998, "loss": 0.5909, "step": 75880 }, { "epoch": 0.76, "learning_rate": 0.00019087105263157895, "loss": 0.592, "step": 75890 }, { "epoch": 0.76, "learning_rate": 0.0001907921052631579, "loss": 0.5967, "step": 75900 }, { "epoch": 0.76, "learning_rate": 0.00019071315789473682, "loss": 0.5933, "step": 75910 }, { "epoch": 0.76, "learning_rate": 0.00019063421052631577, "loss": 0.5856, "step": 75920 }, { "epoch": 0.76, "learning_rate": 0.00019055526315789472, "loss": 0.5865, "step": 75930 }, { "epoch": 0.76, "learning_rate": 0.00019047631578947367, "loss": 0.5849, "step": 75940 }, { "epoch": 0.76, "learning_rate": 0.00019039736842105262, "loss": 0.5971, "step": 75950 }, { "epoch": 0.76, "learning_rate": 0.00019031842105263157, "loss": 0.5906, "step": 75960 }, { "epoch": 0.76, "learning_rate": 0.0001902394736842105, "loss": 0.5767, "step": 75970 }, { "epoch": 0.76, "learning_rate": 0.00019016052631578944, "loss": 0.5784, "step": 75980 }, { "epoch": 0.76, "learning_rate": 0.0001900815789473684, "loss": 0.5832, "step": 75990 }, { "epoch": 0.76, "learning_rate": 0.00019000263157894736, "loss": 0.5943, "step": 76000 }, { "epoch": 0.76, "learning_rate": 0.0001899236842105263, "loss": 0.5917, "step": 76010 }, { "epoch": 0.76, "learning_rate": 0.00018984473684210526, "loss": 0.596, "step": 76020 }, { "epoch": 0.76, "learning_rate": 0.0001897657894736842, "loss": 0.588, "step": 76030 }, { "epoch": 0.76, "learning_rate": 0.00018968684210526313, "loss": 0.6016, "step": 76040 }, { "epoch": 0.76, "learning_rate": 0.00018960789473684207, "loss": 0.5879, "step": 76050 }, { "epoch": 0.76, "learning_rate": 0.00018952894736842102, "loss": 0.5827, "step": 76060 }, { "epoch": 0.76, "learning_rate": 0.00018944999999999997, "loss": 0.5805, "step": 76070 }, { "epoch": 0.76, "learning_rate": 0.00018937105263157895, "loss": 0.5955, "step": 76080 }, { "epoch": 0.76, "learning_rate": 0.0001892921052631579, "loss": 0.5977, "step": 76090 }, { "epoch": 0.76, "learning_rate": 0.00018921315789473684, "loss": 0.5988, "step": 76100 }, { "epoch": 0.76, "learning_rate": 0.0001891342105263158, "loss": 0.5972, "step": 76110 }, { "epoch": 0.76, "learning_rate": 0.0001890552631578947, "loss": 0.5906, "step": 76120 }, { "epoch": 0.76, "learning_rate": 0.00018897631578947366, "loss": 0.5982, "step": 76130 }, { "epoch": 0.76, "learning_rate": 0.0001888973684210526, "loss": 0.5872, "step": 76140 }, { "epoch": 0.76, "learning_rate": 0.00018881842105263156, "loss": 0.5906, "step": 76150 }, { "epoch": 0.76, "learning_rate": 0.0001887394736842105, "loss": 0.6073, "step": 76160 }, { "epoch": 0.76, "learning_rate": 0.00018866052631578948, "loss": 0.6051, "step": 76170 }, { "epoch": 0.76, "learning_rate": 0.00018858157894736843, "loss": 0.5927, "step": 76180 }, { "epoch": 0.76, "learning_rate": 0.00018850263157894735, "loss": 0.5896, "step": 76190 }, { "epoch": 0.76, "learning_rate": 0.0001884236842105263, "loss": 0.586, "step": 76200 }, { "epoch": 0.76, "learning_rate": 0.00018834473684210525, "loss": 0.5846, "step": 76210 }, { "epoch": 0.76, "learning_rate": 0.0001882657894736842, "loss": 0.5976, "step": 76220 }, { "epoch": 0.76, "learning_rate": 0.00018818684210526314, "loss": 0.5955, "step": 76230 }, { "epoch": 0.76, "learning_rate": 0.0001881078947368421, "loss": 0.5932, "step": 76240 }, { "epoch": 0.76, "learning_rate": 0.000188028947368421, "loss": 0.593, "step": 76250 }, { "epoch": 0.76, "learning_rate": 0.00018794999999999996, "loss": 0.5816, "step": 76260 }, { "epoch": 0.76, "learning_rate": 0.00018787105263157894, "loss": 0.5801, "step": 76270 }, { "epoch": 0.76, "learning_rate": 0.00018779999999999998, "loss": 0.5974, "step": 76280 }, { "epoch": 0.76, "learning_rate": 0.00018772105263157893, "loss": 0.5982, "step": 76290 }, { "epoch": 0.76, "learning_rate": 0.00018764210526315788, "loss": 0.5994, "step": 76300 }, { "epoch": 0.76, "learning_rate": 0.0001875631578947368, "loss": 0.5968, "step": 76310 }, { "epoch": 0.76, "learning_rate": 0.00018748421052631575, "loss": 0.6025, "step": 76320 }, { "epoch": 0.76, "learning_rate": 0.00018740526315789473, "loss": 0.5866, "step": 76330 }, { "epoch": 0.76, "learning_rate": 0.00018732631578947367, "loss": 0.5938, "step": 76340 }, { "epoch": 0.76, "learning_rate": 0.00018724736842105262, "loss": 0.5912, "step": 76350 }, { "epoch": 0.76, "learning_rate": 0.00018716842105263157, "loss": 0.5797, "step": 76360 }, { "epoch": 0.76, "learning_rate": 0.00018708947368421052, "loss": 0.6013, "step": 76370 }, { "epoch": 0.76, "learning_rate": 0.00018701052631578947, "loss": 0.5921, "step": 76380 }, { "epoch": 0.76, "learning_rate": 0.0001869315789473684, "loss": 0.5994, "step": 76390 }, { "epoch": 0.76, "learning_rate": 0.00018685263157894734, "loss": 0.5708, "step": 76400 }, { "epoch": 0.76, "learning_rate": 0.00018677368421052628, "loss": 0.5767, "step": 76410 }, { "epoch": 0.76, "learning_rate": 0.00018669473684210523, "loss": 0.5678, "step": 76420 }, { "epoch": 0.76, "learning_rate": 0.0001866157894736842, "loss": 0.5777, "step": 76430 }, { "epoch": 0.76, "learning_rate": 0.00018653684210526316, "loss": 0.5798, "step": 76440 }, { "epoch": 0.76, "learning_rate": 0.0001864578947368421, "loss": 0.5749, "step": 76450 }, { "epoch": 0.76, "learning_rate": 0.00018637894736842103, "loss": 0.5691, "step": 76460 }, { "epoch": 0.76, "learning_rate": 0.00018629999999999997, "loss": 0.5552, "step": 76470 }, { "epoch": 0.76, "learning_rate": 0.00018622105263157892, "loss": 0.5727, "step": 76480 }, { "epoch": 0.76, "learning_rate": 0.00018614210526315787, "loss": 0.5828, "step": 76490 }, { "epoch": 0.77, "learning_rate": 0.00018606315789473682, "loss": 0.5758, "step": 76500 }, { "epoch": 0.77, "learning_rate": 0.00018598421052631577, "loss": 0.5768, "step": 76510 }, { "epoch": 0.77, "learning_rate": 0.00018590526315789474, "loss": 0.5644, "step": 76520 }, { "epoch": 0.77, "learning_rate": 0.0001858263157894737, "loss": 0.5651, "step": 76530 }, { "epoch": 0.77, "learning_rate": 0.0001857473684210526, "loss": 0.5793, "step": 76540 }, { "epoch": 0.77, "learning_rate": 0.00018566842105263156, "loss": 0.5897, "step": 76550 }, { "epoch": 0.77, "learning_rate": 0.0001855894736842105, "loss": 0.5769, "step": 76560 }, { "epoch": 0.77, "learning_rate": 0.00018551052631578946, "loss": 0.5866, "step": 76570 }, { "epoch": 0.77, "learning_rate": 0.0001854315789473684, "loss": 0.5854, "step": 76580 }, { "epoch": 0.77, "learning_rate": 0.00018535263157894735, "loss": 0.5828, "step": 76590 }, { "epoch": 0.77, "learning_rate": 0.00018527368421052628, "loss": 0.581, "step": 76600 }, { "epoch": 0.77, "learning_rate": 0.00018519473684210525, "loss": 0.57, "step": 76610 }, { "epoch": 0.77, "learning_rate": 0.0001851157894736842, "loss": 0.5655, "step": 76620 }, { "epoch": 0.77, "learning_rate": 0.00018503684210526315, "loss": 0.5806, "step": 76630 }, { "epoch": 0.77, "learning_rate": 0.0001849578947368421, "loss": 0.5757, "step": 76640 }, { "epoch": 0.77, "learning_rate": 0.00018488684210526314, "loss": 0.5872, "step": 76650 }, { "epoch": 0.77, "learning_rate": 0.00018480789473684207, "loss": 0.5703, "step": 76660 }, { "epoch": 0.77, "learning_rate": 0.000184728947368421, "loss": 0.5723, "step": 76670 }, { "epoch": 0.77, "learning_rate": 0.00018465, "loss": 0.5638, "step": 76680 }, { "epoch": 0.77, "learning_rate": 0.00018457105263157894, "loss": 0.5819, "step": 76690 }, { "epoch": 0.77, "learning_rate": 0.00018449210526315789, "loss": 0.5739, "step": 76700 }, { "epoch": 0.77, "learning_rate": 0.00018441315789473683, "loss": 0.5862, "step": 76710 }, { "epoch": 0.77, "learning_rate": 0.00018433421052631578, "loss": 0.5625, "step": 76720 }, { "epoch": 0.77, "learning_rate": 0.00018425526315789473, "loss": 0.5555, "step": 76730 }, { "epoch": 0.77, "learning_rate": 0.00018417631578947365, "loss": 0.5683, "step": 76740 }, { "epoch": 0.77, "learning_rate": 0.0001840973684210526, "loss": 0.5669, "step": 76750 }, { "epoch": 0.77, "learning_rate": 0.00018401842105263155, "loss": 0.5604, "step": 76760 }, { "epoch": 0.77, "learning_rate": 0.00018393947368421052, "loss": 0.5773, "step": 76770 }, { "epoch": 0.77, "learning_rate": 0.00018386052631578947, "loss": 0.5661, "step": 76780 }, { "epoch": 0.77, "learning_rate": 0.00018378157894736842, "loss": 0.5645, "step": 76790 }, { "epoch": 0.77, "learning_rate": 0.00018370263157894737, "loss": 0.5782, "step": 76800 }, { "epoch": 0.77, "learning_rate": 0.0001836236842105263, "loss": 0.5892, "step": 76810 }, { "epoch": 0.77, "learning_rate": 0.00018354473684210524, "loss": 0.5789, "step": 76820 }, { "epoch": 0.77, "learning_rate": 0.00018346578947368419, "loss": 0.5755, "step": 76830 }, { "epoch": 0.77, "learning_rate": 0.00018338684210526313, "loss": 0.5721, "step": 76840 }, { "epoch": 0.77, "learning_rate": 0.00018330789473684208, "loss": 0.5785, "step": 76850 }, { "epoch": 0.77, "learning_rate": 0.00018322894736842106, "loss": 0.5665, "step": 76860 }, { "epoch": 0.77, "learning_rate": 0.00018315, "loss": 0.567, "step": 76870 }, { "epoch": 0.77, "learning_rate": 0.00018307105263157893, "loss": 0.5833, "step": 76880 }, { "epoch": 0.77, "learning_rate": 0.00018299210526315788, "loss": 0.596, "step": 76890 }, { "epoch": 0.77, "learning_rate": 0.00018291315789473682, "loss": 0.5872, "step": 76900 }, { "epoch": 0.77, "learning_rate": 0.00018283421052631577, "loss": 0.5898, "step": 76910 }, { "epoch": 0.77, "learning_rate": 0.00018275526315789472, "loss": 0.5931, "step": 76920 }, { "epoch": 0.77, "learning_rate": 0.00018267631578947367, "loss": 0.592, "step": 76930 }, { "epoch": 0.77, "learning_rate": 0.0001825973684210526, "loss": 0.5917, "step": 76940 }, { "epoch": 0.77, "learning_rate": 0.00018251842105263154, "loss": 0.5895, "step": 76950 }, { "epoch": 0.77, "learning_rate": 0.00018243947368421051, "loss": 0.5849, "step": 76960 }, { "epoch": 0.77, "learning_rate": 0.00018236052631578946, "loss": 0.5931, "step": 76970 }, { "epoch": 0.77, "learning_rate": 0.0001822815789473684, "loss": 0.5846, "step": 76980 }, { "epoch": 0.77, "learning_rate": 0.00018220263157894736, "loss": 0.5949, "step": 76990 }, { "epoch": 0.77, "learning_rate": 0.0001821236842105263, "loss": 0.5936, "step": 77000 }, { "epoch": 0.77, "learning_rate": 0.00018204473684210526, "loss": 0.5891, "step": 77010 }, { "epoch": 0.77, "learning_rate": 0.00018196578947368418, "loss": 0.5882, "step": 77020 }, { "epoch": 0.77, "learning_rate": 0.00018188684210526313, "loss": 0.5777, "step": 77030 }, { "epoch": 0.77, "learning_rate": 0.00018180789473684207, "loss": 0.5873, "step": 77040 }, { "epoch": 0.77, "learning_rate": 0.00018172894736842105, "loss": 0.5893, "step": 77050 }, { "epoch": 0.77, "learning_rate": 0.00018165, "loss": 0.5855, "step": 77060 }, { "epoch": 0.77, "learning_rate": 0.00018157105263157895, "loss": 0.5977, "step": 77070 }, { "epoch": 0.77, "learning_rate": 0.0001814921052631579, "loss": 0.6028, "step": 77080 }, { "epoch": 0.77, "learning_rate": 0.00018141315789473682, "loss": 0.5925, "step": 77090 }, { "epoch": 0.77, "learning_rate": 0.00018133421052631576, "loss": 0.5958, "step": 77100 }, { "epoch": 0.77, "learning_rate": 0.0001812552631578947, "loss": 0.5835, "step": 77110 }, { "epoch": 0.77, "learning_rate": 0.00018117631578947366, "loss": 0.5854, "step": 77120 }, { "epoch": 0.77, "learning_rate": 0.0001810973684210526, "loss": 0.5894, "step": 77130 }, { "epoch": 0.77, "learning_rate": 0.00018101842105263158, "loss": 0.5928, "step": 77140 }, { "epoch": 0.77, "learning_rate": 0.00018093947368421053, "loss": 0.5896, "step": 77150 }, { "epoch": 0.77, "learning_rate": 0.00018086052631578948, "loss": 0.5805, "step": 77160 }, { "epoch": 0.77, "learning_rate": 0.0001807815789473684, "loss": 0.5839, "step": 77170 }, { "epoch": 0.77, "learning_rate": 0.00018070263157894735, "loss": 0.5863, "step": 77180 }, { "epoch": 0.77, "learning_rate": 0.0001806236842105263, "loss": 0.5859, "step": 77190 }, { "epoch": 0.77, "learning_rate": 0.00018054473684210525, "loss": 0.5914, "step": 77200 }, { "epoch": 0.77, "learning_rate": 0.0001804657894736842, "loss": 0.5905, "step": 77210 }, { "epoch": 0.77, "learning_rate": 0.00018038684210526314, "loss": 0.5948, "step": 77220 }, { "epoch": 0.77, "learning_rate": 0.00018030789473684206, "loss": 0.5882, "step": 77230 }, { "epoch": 0.77, "learning_rate": 0.00018022894736842104, "loss": 0.59, "step": 77240 }, { "epoch": 0.77, "learning_rate": 0.00018015, "loss": 0.5804, "step": 77250 }, { "epoch": 0.77, "learning_rate": 0.00018007105263157894, "loss": 0.5853, "step": 77260 }, { "epoch": 0.77, "learning_rate": 0.00017999210526315788, "loss": 0.5858, "step": 77270 }, { "epoch": 0.77, "learning_rate": 0.00017991315789473683, "loss": 0.6019, "step": 77280 }, { "epoch": 0.77, "learning_rate": 0.00017983421052631578, "loss": 0.599, "step": 77290 }, { "epoch": 0.77, "learning_rate": 0.0001797552631578947, "loss": 0.5872, "step": 77300 }, { "epoch": 0.77, "learning_rate": 0.00017967631578947365, "loss": 0.5993, "step": 77310 }, { "epoch": 0.77, "learning_rate": 0.0001795973684210526, "loss": 0.6082, "step": 77320 }, { "epoch": 0.77, "learning_rate": 0.00017951842105263157, "loss": 0.5967, "step": 77330 }, { "epoch": 0.77, "learning_rate": 0.00017943947368421052, "loss": 0.5995, "step": 77340 }, { "epoch": 0.77, "learning_rate": 0.00017936052631578947, "loss": 0.5943, "step": 77350 }, { "epoch": 0.77, "learning_rate": 0.00017928157894736842, "loss": 0.5915, "step": 77360 }, { "epoch": 0.77, "learning_rate": 0.00017920263157894734, "loss": 0.5712, "step": 77370 }, { "epoch": 0.77, "learning_rate": 0.0001791236842105263, "loss": 0.5805, "step": 77380 }, { "epoch": 0.77, "learning_rate": 0.00017904473684210524, "loss": 0.5757, "step": 77390 }, { "epoch": 0.77, "learning_rate": 0.00017896578947368419, "loss": 0.5926, "step": 77400 }, { "epoch": 0.77, "learning_rate": 0.00017888684210526313, "loss": 0.5889, "step": 77410 }, { "epoch": 0.77, "learning_rate": 0.0001788078947368421, "loss": 0.5812, "step": 77420 }, { "epoch": 0.77, "learning_rate": 0.00017872894736842106, "loss": 0.5989, "step": 77430 }, { "epoch": 0.77, "learning_rate": 0.00017865, "loss": 0.5755, "step": 77440 }, { "epoch": 0.77, "learning_rate": 0.00017857105263157893, "loss": 0.5941, "step": 77450 }, { "epoch": 0.77, "learning_rate": 0.00017849210526315788, "loss": 0.5814, "step": 77460 }, { "epoch": 0.77, "learning_rate": 0.00017841315789473682, "loss": 0.5839, "step": 77470 }, { "epoch": 0.77, "learning_rate": 0.00017833421052631577, "loss": 0.5837, "step": 77480 }, { "epoch": 0.77, "learning_rate": 0.00017825526315789472, "loss": 0.5764, "step": 77490 }, { "epoch": 0.78, "learning_rate": 0.00017817631578947367, "loss": 0.5866, "step": 77500 }, { "epoch": 0.78, "eval_accuracy": 0.8773601876392698, "eval_loss": 0.5615234375, "eval_runtime": 98.3407, "eval_samples_per_second": 813.499, "eval_steps_per_second": 1.596, "step": 77500 }, { "epoch": 0.78, "learning_rate": 0.0001780973684210526, "loss": 0.5687, "step": 77510 }, { "epoch": 0.78, "learning_rate": 0.00017801842105263156, "loss": 0.5813, "step": 77520 }, { "epoch": 0.78, "learning_rate": 0.0001779394736842105, "loss": 0.5739, "step": 77530 }, { "epoch": 0.78, "learning_rate": 0.00017786052631578946, "loss": 0.5825, "step": 77540 }, { "epoch": 0.78, "learning_rate": 0.0001777815789473684, "loss": 0.5699, "step": 77550 }, { "epoch": 0.78, "learning_rate": 0.00017770263157894736, "loss": 0.5796, "step": 77560 }, { "epoch": 0.78, "learning_rate": 0.0001776236842105263, "loss": 0.571, "step": 77570 }, { "epoch": 0.78, "learning_rate": 0.00017754473684210523, "loss": 0.5849, "step": 77580 }, { "epoch": 0.78, "learning_rate": 0.00017746578947368418, "loss": 0.5826, "step": 77590 }, { "epoch": 0.78, "learning_rate": 0.00017738684210526312, "loss": 0.6047, "step": 77600 }, { "epoch": 0.78, "learning_rate": 0.0001773078947368421, "loss": 0.5951, "step": 77610 }, { "epoch": 0.78, "learning_rate": 0.00017722894736842105, "loss": 0.5876, "step": 77620 }, { "epoch": 0.78, "learning_rate": 0.00017715, "loss": 0.5915, "step": 77630 }, { "epoch": 0.78, "learning_rate": 0.00017707105263157894, "loss": 0.5894, "step": 77640 }, { "epoch": 0.78, "learning_rate": 0.0001769921052631579, "loss": 0.5891, "step": 77650 }, { "epoch": 0.78, "learning_rate": 0.00017691315789473681, "loss": 0.5863, "step": 77660 }, { "epoch": 0.78, "learning_rate": 0.00017683421052631576, "loss": 0.5891, "step": 77670 }, { "epoch": 0.78, "learning_rate": 0.0001767552631578947, "loss": 0.5976, "step": 77680 }, { "epoch": 0.78, "learning_rate": 0.00017667631578947366, "loss": 0.5871, "step": 77690 }, { "epoch": 0.78, "learning_rate": 0.00017659736842105263, "loss": 0.596, "step": 77700 }, { "epoch": 0.78, "learning_rate": 0.00017651842105263158, "loss": 0.592, "step": 77710 }, { "epoch": 0.78, "learning_rate": 0.00017643947368421053, "loss": 0.6012, "step": 77720 }, { "epoch": 0.78, "learning_rate": 0.00017636052631578945, "loss": 0.5855, "step": 77730 }, { "epoch": 0.78, "learning_rate": 0.0001762815789473684, "loss": 0.5898, "step": 77740 }, { "epoch": 0.78, "learning_rate": 0.00017620263157894735, "loss": 0.6017, "step": 77750 }, { "epoch": 0.78, "learning_rate": 0.0001761236842105263, "loss": 0.5926, "step": 77760 }, { "epoch": 0.78, "learning_rate": 0.00017604473684210525, "loss": 0.591, "step": 77770 }, { "epoch": 0.78, "learning_rate": 0.0001759657894736842, "loss": 0.596, "step": 77780 }, { "epoch": 0.78, "learning_rate": 0.00017588684210526311, "loss": 0.5855, "step": 77790 }, { "epoch": 0.78, "learning_rate": 0.0001758078947368421, "loss": 0.5889, "step": 77800 }, { "epoch": 0.78, "learning_rate": 0.00017572894736842104, "loss": 0.5921, "step": 77810 }, { "epoch": 0.78, "learning_rate": 0.00017565, "loss": 0.5874, "step": 77820 }, { "epoch": 0.78, "learning_rate": 0.00017557105263157894, "loss": 0.5985, "step": 77830 }, { "epoch": 0.78, "learning_rate": 0.00017549210526315788, "loss": 0.5736, "step": 77840 }, { "epoch": 0.78, "learning_rate": 0.00017541315789473683, "loss": 0.5707, "step": 77850 }, { "epoch": 0.78, "learning_rate": 0.00017533421052631575, "loss": 0.5861, "step": 77860 }, { "epoch": 0.78, "learning_rate": 0.0001752552631578947, "loss": 0.5843, "step": 77870 }, { "epoch": 0.78, "learning_rate": 0.00017517631578947365, "loss": 0.5825, "step": 77880 }, { "epoch": 0.78, "learning_rate": 0.00017509736842105263, "loss": 0.5816, "step": 77890 }, { "epoch": 0.78, "learning_rate": 0.00017501842105263157, "loss": 0.5691, "step": 77900 }, { "epoch": 0.78, "learning_rate": 0.00017493947368421052, "loss": 0.5769, "step": 77910 }, { "epoch": 0.78, "learning_rate": 0.00017486052631578947, "loss": 0.5875, "step": 77920 }, { "epoch": 0.78, "learning_rate": 0.00017478157894736842, "loss": 0.5881, "step": 77930 }, { "epoch": 0.78, "learning_rate": 0.00017470263157894734, "loss": 0.5793, "step": 77940 }, { "epoch": 0.78, "learning_rate": 0.0001746236842105263, "loss": 0.5803, "step": 77950 }, { "epoch": 0.78, "learning_rate": 0.00017454473684210524, "loss": 0.5789, "step": 77960 }, { "epoch": 0.78, "learning_rate": 0.00017446578947368418, "loss": 0.5912, "step": 77970 }, { "epoch": 0.78, "learning_rate": 0.00017438684210526316, "loss": 0.5939, "step": 77980 }, { "epoch": 0.78, "learning_rate": 0.0001743078947368421, "loss": 0.5964, "step": 77990 }, { "epoch": 0.78, "learning_rate": 0.00017422894736842106, "loss": 0.5815, "step": 78000 }, { "epoch": 0.78, "learning_rate": 0.00017414999999999998, "loss": 0.5832, "step": 78010 }, { "epoch": 0.78, "learning_rate": 0.00017407105263157893, "loss": 0.5843, "step": 78020 }, { "epoch": 0.78, "learning_rate": 0.00017399210526315787, "loss": 0.5763, "step": 78030 }, { "epoch": 0.78, "learning_rate": 0.00017391315789473682, "loss": 0.5813, "step": 78040 }, { "epoch": 0.78, "learning_rate": 0.00017383421052631577, "loss": 0.5774, "step": 78050 }, { "epoch": 0.78, "learning_rate": 0.00017375526315789472, "loss": 0.5784, "step": 78060 }, { "epoch": 0.78, "learning_rate": 0.0001736763157894737, "loss": 0.5669, "step": 78070 }, { "epoch": 0.78, "learning_rate": 0.00017359736842105264, "loss": 0.5702, "step": 78080 }, { "epoch": 0.78, "learning_rate": 0.00017351842105263156, "loss": 0.5785, "step": 78090 }, { "epoch": 0.78, "learning_rate": 0.0001734394736842105, "loss": 0.5729, "step": 78100 }, { "epoch": 0.78, "learning_rate": 0.00017336052631578946, "loss": 0.5699, "step": 78110 }, { "epoch": 0.78, "learning_rate": 0.0001732815789473684, "loss": 0.578, "step": 78120 }, { "epoch": 0.78, "learning_rate": 0.00017320263157894736, "loss": 0.5723, "step": 78130 }, { "epoch": 0.78, "learning_rate": 0.0001731236842105263, "loss": 0.5908, "step": 78140 }, { "epoch": 0.78, "learning_rate": 0.00017304473684210523, "loss": 0.5829, "step": 78150 }, { "epoch": 0.78, "learning_rate": 0.00017296578947368417, "loss": 0.59, "step": 78160 }, { "epoch": 0.78, "learning_rate": 0.00017288684210526315, "loss": 0.5888, "step": 78170 }, { "epoch": 0.78, "learning_rate": 0.0001728078947368421, "loss": 0.5903, "step": 78180 }, { "epoch": 0.78, "learning_rate": 0.00017272894736842105, "loss": 0.5908, "step": 78190 }, { "epoch": 0.78, "learning_rate": 0.00017265, "loss": 0.5988, "step": 78200 }, { "epoch": 0.78, "learning_rate": 0.00017257105263157894, "loss": 0.5943, "step": 78210 }, { "epoch": 0.78, "learning_rate": 0.00017249210526315786, "loss": 0.5881, "step": 78220 }, { "epoch": 0.78, "learning_rate": 0.0001724131578947368, "loss": 0.5965, "step": 78230 }, { "epoch": 0.78, "learning_rate": 0.00017233421052631576, "loss": 0.598, "step": 78240 }, { "epoch": 0.78, "learning_rate": 0.0001722552631578947, "loss": 0.5954, "step": 78250 }, { "epoch": 0.78, "learning_rate": 0.00017217631578947369, "loss": 0.6063, "step": 78260 }, { "epoch": 0.78, "learning_rate": 0.00017209736842105263, "loss": 0.607, "step": 78270 }, { "epoch": 0.78, "learning_rate": 0.00017201842105263158, "loss": 0.5848, "step": 78280 }, { "epoch": 0.78, "learning_rate": 0.0001719394736842105, "loss": 0.5862, "step": 78290 }, { "epoch": 0.78, "learning_rate": 0.00017186052631578945, "loss": 0.5925, "step": 78300 }, { "epoch": 0.78, "learning_rate": 0.0001717815789473684, "loss": 0.5859, "step": 78310 }, { "epoch": 0.78, "learning_rate": 0.00017170263157894735, "loss": 0.5928, "step": 78320 }, { "epoch": 0.78, "learning_rate": 0.0001716236842105263, "loss": 0.5863, "step": 78330 }, { "epoch": 0.78, "learning_rate": 0.00017154473684210524, "loss": 0.5849, "step": 78340 }, { "epoch": 0.78, "learning_rate": 0.00017146578947368422, "loss": 0.5821, "step": 78350 }, { "epoch": 0.78, "learning_rate": 0.00017138684210526317, "loss": 0.5849, "step": 78360 }, { "epoch": 0.78, "learning_rate": 0.0001713078947368421, "loss": 0.5799, "step": 78370 }, { "epoch": 0.78, "learning_rate": 0.00017122894736842104, "loss": 0.5936, "step": 78380 }, { "epoch": 0.78, "learning_rate": 0.00017114999999999999, "loss": 0.5874, "step": 78390 }, { "epoch": 0.78, "learning_rate": 0.00017107105263157893, "loss": 0.5931, "step": 78400 }, { "epoch": 0.78, "learning_rate": 0.00017099210526315788, "loss": 0.5942, "step": 78410 }, { "epoch": 0.78, "learning_rate": 0.00017091315789473683, "loss": 0.5835, "step": 78420 }, { "epoch": 0.78, "learning_rate": 0.00017083421052631575, "loss": 0.585, "step": 78430 }, { "epoch": 0.78, "learning_rate": 0.0001707552631578947, "loss": 0.5985, "step": 78440 }, { "epoch": 0.78, "learning_rate": 0.00017067631578947368, "loss": 0.5995, "step": 78450 }, { "epoch": 0.78, "learning_rate": 0.00017059736842105262, "loss": 0.5992, "step": 78460 }, { "epoch": 0.78, "learning_rate": 0.00017051842105263157, "loss": 0.5808, "step": 78470 }, { "epoch": 0.78, "learning_rate": 0.00017043947368421052, "loss": 0.5961, "step": 78480 }, { "epoch": 0.78, "learning_rate": 0.00017036052631578947, "loss": 0.5751, "step": 78490 }, { "epoch": 0.79, "learning_rate": 0.0001702815789473684, "loss": 0.5802, "step": 78500 }, { "epoch": 0.79, "learning_rate": 0.00017020263157894734, "loss": 0.5793, "step": 78510 }, { "epoch": 0.79, "learning_rate": 0.0001701236842105263, "loss": 0.5904, "step": 78520 }, { "epoch": 0.79, "learning_rate": 0.00017004473684210524, "loss": 0.5702, "step": 78530 }, { "epoch": 0.79, "learning_rate": 0.0001699657894736842, "loss": 0.5715, "step": 78540 }, { "epoch": 0.79, "learning_rate": 0.00016988684210526316, "loss": 0.5723, "step": 78550 }, { "epoch": 0.79, "learning_rate": 0.0001698078947368421, "loss": 0.5778, "step": 78560 }, { "epoch": 0.79, "learning_rate": 0.00016973684210526313, "loss": 0.5969, "step": 78570 }, { "epoch": 0.79, "learning_rate": 0.00016965789473684208, "loss": 0.6002, "step": 78580 }, { "epoch": 0.79, "learning_rate": 0.00016957894736842102, "loss": 0.5946, "step": 78590 }, { "epoch": 0.79, "learning_rate": 0.00016949999999999997, "loss": 0.5937, "step": 78600 }, { "epoch": 0.79, "learning_rate": 0.00016942105263157895, "loss": 0.5941, "step": 78610 }, { "epoch": 0.79, "learning_rate": 0.0001693421052631579, "loss": 0.611, "step": 78620 }, { "epoch": 0.79, "learning_rate": 0.00016926315789473684, "loss": 0.5932, "step": 78630 }, { "epoch": 0.79, "learning_rate": 0.00016918421052631577, "loss": 0.6037, "step": 78640 }, { "epoch": 0.79, "learning_rate": 0.00016910526315789471, "loss": 0.596, "step": 78650 }, { "epoch": 0.79, "learning_rate": 0.00016902631578947366, "loss": 0.5977, "step": 78660 }, { "epoch": 0.79, "learning_rate": 0.0001689473684210526, "loss": 0.6065, "step": 78670 }, { "epoch": 0.79, "learning_rate": 0.00016886842105263156, "loss": 0.6014, "step": 78680 }, { "epoch": 0.79, "learning_rate": 0.0001687894736842105, "loss": 0.5962, "step": 78690 }, { "epoch": 0.79, "learning_rate": 0.00016871052631578948, "loss": 0.5967, "step": 78700 }, { "epoch": 0.79, "learning_rate": 0.0001686315789473684, "loss": 0.5962, "step": 78710 }, { "epoch": 0.79, "learning_rate": 0.00016855263157894735, "loss": 0.5877, "step": 78720 }, { "epoch": 0.79, "learning_rate": 0.0001684736842105263, "loss": 0.5971, "step": 78730 }, { "epoch": 0.79, "learning_rate": 0.00016839473684210525, "loss": 0.5903, "step": 78740 }, { "epoch": 0.79, "learning_rate": 0.0001683157894736842, "loss": 0.5888, "step": 78750 }, { "epoch": 0.79, "learning_rate": 0.00016823684210526315, "loss": 0.5827, "step": 78760 }, { "epoch": 0.79, "learning_rate": 0.00016815789473684207, "loss": 0.581, "step": 78770 }, { "epoch": 0.79, "learning_rate": 0.00016807894736842102, "loss": 0.574, "step": 78780 }, { "epoch": 0.79, "learning_rate": 0.000168, "loss": 0.5862, "step": 78790 }, { "epoch": 0.79, "learning_rate": 0.00016792105263157894, "loss": 0.5846, "step": 78800 }, { "epoch": 0.79, "learning_rate": 0.0001678421052631579, "loss": 0.595, "step": 78810 }, { "epoch": 0.79, "learning_rate": 0.00016776315789473684, "loss": 0.5791, "step": 78820 }, { "epoch": 0.79, "learning_rate": 0.00016768421052631578, "loss": 0.5823, "step": 78830 }, { "epoch": 0.79, "learning_rate": 0.00016760526315789473, "loss": 0.5918, "step": 78840 }, { "epoch": 0.79, "learning_rate": 0.00016752631578947365, "loss": 0.5902, "step": 78850 }, { "epoch": 0.79, "learning_rate": 0.0001674473684210526, "loss": 0.5868, "step": 78860 }, { "epoch": 0.79, "learning_rate": 0.00016736842105263155, "loss": 0.5845, "step": 78870 }, { "epoch": 0.79, "learning_rate": 0.0001672894736842105, "loss": 0.5821, "step": 78880 }, { "epoch": 0.79, "learning_rate": 0.00016721052631578947, "loss": 0.5929, "step": 78890 }, { "epoch": 0.79, "learning_rate": 0.00016713157894736842, "loss": 0.5958, "step": 78900 }, { "epoch": 0.79, "learning_rate": 0.00016705263157894737, "loss": 0.5871, "step": 78910 }, { "epoch": 0.79, "learning_rate": 0.0001669736842105263, "loss": 0.5838, "step": 78920 }, { "epoch": 0.79, "learning_rate": 0.00016689473684210524, "loss": 0.5899, "step": 78930 }, { "epoch": 0.79, "learning_rate": 0.0001668157894736842, "loss": 0.5921, "step": 78940 }, { "epoch": 0.79, "learning_rate": 0.00016673684210526314, "loss": 0.5951, "step": 78950 }, { "epoch": 0.79, "learning_rate": 0.00016665789473684208, "loss": 0.5961, "step": 78960 }, { "epoch": 0.79, "learning_rate": 0.00016657894736842103, "loss": 0.5973, "step": 78970 }, { "epoch": 0.79, "learning_rate": 0.0001665, "loss": 0.5763, "step": 78980 }, { "epoch": 0.79, "learning_rate": 0.00016642105263157896, "loss": 0.5862, "step": 78990 }, { "epoch": 0.79, "learning_rate": 0.00016634210526315788, "loss": 0.592, "step": 79000 }, { "epoch": 0.79, "learning_rate": 0.00016626315789473683, "loss": 0.5803, "step": 79010 }, { "epoch": 0.79, "learning_rate": 0.00016618421052631577, "loss": 0.5817, "step": 79020 }, { "epoch": 0.79, "learning_rate": 0.00016610526315789472, "loss": 0.5859, "step": 79030 }, { "epoch": 0.79, "learning_rate": 0.00016602631578947367, "loss": 0.5815, "step": 79040 }, { "epoch": 0.79, "learning_rate": 0.00016594736842105262, "loss": 0.5921, "step": 79050 }, { "epoch": 0.79, "learning_rate": 0.00016586842105263154, "loss": 0.5914, "step": 79060 }, { "epoch": 0.79, "learning_rate": 0.00016578947368421052, "loss": 0.58, "step": 79070 }, { "epoch": 0.79, "learning_rate": 0.00016571052631578946, "loss": 0.5861, "step": 79080 }, { "epoch": 0.79, "learning_rate": 0.0001656315789473684, "loss": 0.5866, "step": 79090 }, { "epoch": 0.79, "learning_rate": 0.00016555263157894736, "loss": 0.5887, "step": 79100 }, { "epoch": 0.79, "learning_rate": 0.0001654736842105263, "loss": 0.5906, "step": 79110 }, { "epoch": 0.79, "learning_rate": 0.00016539473684210526, "loss": 0.5837, "step": 79120 }, { "epoch": 0.79, "learning_rate": 0.00016531578947368418, "loss": 0.585, "step": 79130 }, { "epoch": 0.79, "learning_rate": 0.00016523684210526313, "loss": 0.5758, "step": 79140 }, { "epoch": 0.79, "learning_rate": 0.00016515789473684208, "loss": 0.5805, "step": 79150 }, { "epoch": 0.79, "learning_rate": 0.00016507894736842102, "loss": 0.5879, "step": 79160 }, { "epoch": 0.79, "learning_rate": 0.000165, "loss": 0.5835, "step": 79170 }, { "epoch": 0.79, "learning_rate": 0.00016492105263157895, "loss": 0.5816, "step": 79180 }, { "epoch": 0.79, "learning_rate": 0.0001648421052631579, "loss": 0.5812, "step": 79190 }, { "epoch": 0.79, "learning_rate": 0.00016476315789473682, "loss": 0.5868, "step": 79200 }, { "epoch": 0.79, "learning_rate": 0.00016468421052631577, "loss": 0.5846, "step": 79210 }, { "epoch": 0.79, "learning_rate": 0.0001646052631578947, "loss": 0.6016, "step": 79220 }, { "epoch": 0.79, "learning_rate": 0.00016452631578947366, "loss": 0.5815, "step": 79230 }, { "epoch": 0.79, "learning_rate": 0.0001644473684210526, "loss": 0.5742, "step": 79240 }, { "epoch": 0.79, "learning_rate": 0.00016436842105263156, "loss": 0.5834, "step": 79250 }, { "epoch": 0.79, "learning_rate": 0.00016428947368421053, "loss": 0.5736, "step": 79260 }, { "epoch": 0.79, "learning_rate": 0.00016421052631578948, "loss": 0.5911, "step": 79270 }, { "epoch": 0.79, "learning_rate": 0.0001641315789473684, "loss": 0.5774, "step": 79280 }, { "epoch": 0.79, "learning_rate": 0.00016405263157894735, "loss": 0.5825, "step": 79290 }, { "epoch": 0.79, "learning_rate": 0.0001639736842105263, "loss": 0.5693, "step": 79300 }, { "epoch": 0.79, "learning_rate": 0.00016389473684210525, "loss": 0.561, "step": 79310 }, { "epoch": 0.79, "learning_rate": 0.0001638157894736842, "loss": 0.573, "step": 79320 }, { "epoch": 0.79, "learning_rate": 0.00016373684210526314, "loss": 0.5787, "step": 79330 }, { "epoch": 0.79, "learning_rate": 0.00016365789473684207, "loss": 0.5856, "step": 79340 }, { "epoch": 0.79, "learning_rate": 0.00016357894736842104, "loss": 0.5764, "step": 79350 }, { "epoch": 0.79, "learning_rate": 0.0001635, "loss": 0.569, "step": 79360 }, { "epoch": 0.79, "learning_rate": 0.00016342105263157894, "loss": 0.5721, "step": 79370 }, { "epoch": 0.79, "learning_rate": 0.00016334210526315789, "loss": 0.5951, "step": 79380 }, { "epoch": 0.79, "learning_rate": 0.00016326315789473683, "loss": 0.5956, "step": 79390 }, { "epoch": 0.79, "learning_rate": 0.00016318421052631578, "loss": 0.5996, "step": 79400 }, { "epoch": 0.79, "learning_rate": 0.0001631052631578947, "loss": 0.5883, "step": 79410 }, { "epoch": 0.79, "learning_rate": 0.00016302631578947365, "loss": 0.5969, "step": 79420 }, { "epoch": 0.79, "learning_rate": 0.0001629473684210526, "loss": 0.5874, "step": 79430 }, { "epoch": 0.79, "learning_rate": 0.00016286842105263155, "loss": 0.589, "step": 79440 }, { "epoch": 0.79, "learning_rate": 0.00016278947368421052, "loss": 0.588, "step": 79450 }, { "epoch": 0.79, "learning_rate": 0.00016271052631578947, "loss": 0.5926, "step": 79460 }, { "epoch": 0.79, "learning_rate": 0.00016263157894736842, "loss": 0.5965, "step": 79470 }, { "epoch": 0.79, "learning_rate": 0.00016255263157894737, "loss": 0.5897, "step": 79480 }, { "epoch": 0.79, "learning_rate": 0.0001624736842105263, "loss": 0.5836, "step": 79490 }, { "epoch": 0.8, "learning_rate": 0.00016239473684210524, "loss": 0.58, "step": 79500 }, { "epoch": 0.8, "learning_rate": 0.0001623157894736842, "loss": 0.581, "step": 79510 }, { "epoch": 0.8, "learning_rate": 0.00016223684210526314, "loss": 0.5848, "step": 79520 }, { "epoch": 0.8, "learning_rate": 0.00016215789473684208, "loss": 0.5796, "step": 79530 }, { "epoch": 0.8, "learning_rate": 0.00016207894736842106, "loss": 0.5901, "step": 79540 }, { "epoch": 0.8, "learning_rate": 0.000162, "loss": 0.5849, "step": 79550 }, { "epoch": 0.8, "learning_rate": 0.00016192105263157893, "loss": 0.5809, "step": 79560 }, { "epoch": 0.8, "learning_rate": 0.00016184210526315788, "loss": 0.5734, "step": 79570 }, { "epoch": 0.8, "learning_rate": 0.00016176315789473683, "loss": 0.5786, "step": 79580 }, { "epoch": 0.8, "learning_rate": 0.00016168421052631577, "loss": 0.5781, "step": 79590 }, { "epoch": 0.8, "learning_rate": 0.00016160526315789472, "loss": 0.5922, "step": 79600 }, { "epoch": 0.8, "learning_rate": 0.00016152631578947367, "loss": 0.5848, "step": 79610 }, { "epoch": 0.8, "learning_rate": 0.0001614473684210526, "loss": 0.5804, "step": 79620 }, { "epoch": 0.8, "learning_rate": 0.00016136842105263157, "loss": 0.5825, "step": 79630 }, { "epoch": 0.8, "learning_rate": 0.00016128947368421052, "loss": 0.5732, "step": 79640 }, { "epoch": 0.8, "learning_rate": 0.00016121052631578946, "loss": 0.5813, "step": 79650 }, { "epoch": 0.8, "learning_rate": 0.0001611315789473684, "loss": 0.5909, "step": 79660 }, { "epoch": 0.8, "learning_rate": 0.00016105263157894736, "loss": 0.5927, "step": 79670 }, { "epoch": 0.8, "learning_rate": 0.0001609736842105263, "loss": 0.5889, "step": 79680 }, { "epoch": 0.8, "learning_rate": 0.00016089473684210523, "loss": 0.5828, "step": 79690 }, { "epoch": 0.8, "learning_rate": 0.00016081578947368418, "loss": 0.595, "step": 79700 }, { "epoch": 0.8, "learning_rate": 0.00016073684210526313, "loss": 0.5848, "step": 79710 }, { "epoch": 0.8, "learning_rate": 0.0001606578947368421, "loss": 0.5866, "step": 79720 }, { "epoch": 0.8, "learning_rate": 0.00016057894736842105, "loss": 0.5735, "step": 79730 }, { "epoch": 0.8, "learning_rate": 0.0001605, "loss": 0.5765, "step": 79740 }, { "epoch": 0.8, "learning_rate": 0.00016042105263157895, "loss": 0.5838, "step": 79750 }, { "epoch": 0.8, "learning_rate": 0.0001603421052631579, "loss": 0.5802, "step": 79760 }, { "epoch": 0.8, "learning_rate": 0.00016026315789473682, "loss": 0.5899, "step": 79770 }, { "epoch": 0.8, "learning_rate": 0.00016018421052631576, "loss": 0.5889, "step": 79780 }, { "epoch": 0.8, "learning_rate": 0.0001601052631578947, "loss": 0.5812, "step": 79790 }, { "epoch": 0.8, "learning_rate": 0.00016002631578947366, "loss": 0.5699, "step": 79800 }, { "epoch": 0.8, "learning_rate": 0.0001599473684210526, "loss": 0.5774, "step": 79810 }, { "epoch": 0.8, "learning_rate": 0.00015986842105263158, "loss": 0.5809, "step": 79820 }, { "epoch": 0.8, "learning_rate": 0.00015978947368421053, "loss": 0.5825, "step": 79830 }, { "epoch": 0.8, "learning_rate": 0.00015971052631578945, "loss": 0.5816, "step": 79840 }, { "epoch": 0.8, "learning_rate": 0.0001596315789473684, "loss": 0.5878, "step": 79850 }, { "epoch": 0.8, "learning_rate": 0.00015955263157894735, "loss": 0.586, "step": 79860 }, { "epoch": 0.8, "learning_rate": 0.0001594736842105263, "loss": 0.5965, "step": 79870 }, { "epoch": 0.8, "learning_rate": 0.00015939473684210525, "loss": 0.5867, "step": 79880 }, { "epoch": 0.8, "learning_rate": 0.0001593157894736842, "loss": 0.5934, "step": 79890 }, { "epoch": 0.8, "learning_rate": 0.00015923684210526312, "loss": 0.5853, "step": 79900 }, { "epoch": 0.8, "learning_rate": 0.00015915789473684212, "loss": 0.5937, "step": 79910 }, { "epoch": 0.8, "learning_rate": 0.00015907894736842104, "loss": 0.5861, "step": 79920 }, { "epoch": 0.8, "learning_rate": 0.000159, "loss": 0.5847, "step": 79930 }, { "epoch": 0.8, "learning_rate": 0.00015892105263157894, "loss": 0.5819, "step": 79940 }, { "epoch": 0.8, "learning_rate": 0.00015884210526315789, "loss": 0.5847, "step": 79950 }, { "epoch": 0.8, "learning_rate": 0.00015876315789473683, "loss": 0.5844, "step": 79960 }, { "epoch": 0.8, "learning_rate": 0.00015868421052631578, "loss": 0.5868, "step": 79970 }, { "epoch": 0.8, "learning_rate": 0.0001586052631578947, "loss": 0.5931, "step": 79980 }, { "epoch": 0.8, "learning_rate": 0.00015852631578947365, "loss": 0.5832, "step": 79990 }, { "epoch": 0.8, "learning_rate": 0.00015844736842105263, "loss": 0.5835, "step": 80000 }, { "epoch": 0.8, "eval_accuracy": 0.8784877003135946, "eval_loss": 0.55615234375, "eval_runtime": 99.7857, "eval_samples_per_second": 801.718, "eval_steps_per_second": 1.573, "step": 80000 }, { "epoch": 0.8, "learning_rate": 0.00015836842105263158, "loss": 0.5931, "step": 80010 }, { "epoch": 0.8, "learning_rate": 0.00015828947368421052, "loss": 0.5987, "step": 80020 }, { "epoch": 0.8, "learning_rate": 0.00015821052631578947, "loss": 0.5891, "step": 80030 }, { "epoch": 0.8, "learning_rate": 0.00015813157894736842, "loss": 0.5803, "step": 80040 }, { "epoch": 0.8, "learning_rate": 0.00015805263157894734, "loss": 0.5794, "step": 80050 }, { "epoch": 0.8, "learning_rate": 0.0001579736842105263, "loss": 0.5788, "step": 80060 }, { "epoch": 0.8, "learning_rate": 0.00015789473684210524, "loss": 0.5733, "step": 80070 }, { "epoch": 0.8, "learning_rate": 0.00015781578947368419, "loss": 0.5641, "step": 80080 }, { "epoch": 0.8, "learning_rate": 0.00015773684210526313, "loss": 0.5627, "step": 80090 }, { "epoch": 0.8, "learning_rate": 0.0001576578947368421, "loss": 0.5655, "step": 80100 }, { "epoch": 0.8, "learning_rate": 0.00015757894736842106, "loss": 0.5681, "step": 80110 }, { "epoch": 0.8, "learning_rate": 0.00015749999999999998, "loss": 0.5754, "step": 80120 }, { "epoch": 0.8, "learning_rate": 0.00015742105263157893, "loss": 0.5619, "step": 80130 }, { "epoch": 0.8, "learning_rate": 0.00015734210526315788, "loss": 0.5702, "step": 80140 }, { "epoch": 0.8, "learning_rate": 0.00015726315789473682, "loss": 0.574, "step": 80150 }, { "epoch": 0.8, "learning_rate": 0.00015718421052631577, "loss": 0.5703, "step": 80160 }, { "epoch": 0.8, "learning_rate": 0.00015710526315789472, "loss": 0.5702, "step": 80170 }, { "epoch": 0.8, "learning_rate": 0.00015702631578947364, "loss": 0.5722, "step": 80180 }, { "epoch": 0.8, "learning_rate": 0.00015694736842105264, "loss": 0.5736, "step": 80190 }, { "epoch": 0.8, "learning_rate": 0.00015686842105263157, "loss": 0.5659, "step": 80200 }, { "epoch": 0.8, "learning_rate": 0.00015678947368421051, "loss": 0.5753, "step": 80210 }, { "epoch": 0.8, "learning_rate": 0.00015671052631578946, "loss": 0.5607, "step": 80220 }, { "epoch": 0.8, "learning_rate": 0.0001566315789473684, "loss": 0.5664, "step": 80230 }, { "epoch": 0.8, "learning_rate": 0.00015655263157894736, "loss": 0.5535, "step": 80240 }, { "epoch": 0.8, "learning_rate": 0.0001564736842105263, "loss": 0.5607, "step": 80250 }, { "epoch": 0.8, "learning_rate": 0.00015639473684210523, "loss": 0.5691, "step": 80260 }, { "epoch": 0.8, "learning_rate": 0.00015631578947368418, "loss": 0.563, "step": 80270 }, { "epoch": 0.8, "learning_rate": 0.00015623684210526315, "loss": 0.56, "step": 80280 }, { "epoch": 0.8, "learning_rate": 0.0001561578947368421, "loss": 0.5626, "step": 80290 }, { "epoch": 0.8, "learning_rate": 0.00015607894736842105, "loss": 0.5638, "step": 80300 }, { "epoch": 0.8, "learning_rate": 0.000156, "loss": 0.5645, "step": 80310 }, { "epoch": 0.8, "learning_rate": 0.00015592105263157895, "loss": 0.5762, "step": 80320 }, { "epoch": 0.8, "learning_rate": 0.00015584210526315787, "loss": 0.5436, "step": 80330 }, { "epoch": 0.8, "learning_rate": 0.00015576315789473681, "loss": 0.5765, "step": 80340 }, { "epoch": 0.8, "learning_rate": 0.00015568421052631576, "loss": 0.5858, "step": 80350 }, { "epoch": 0.8, "learning_rate": 0.0001556052631578947, "loss": 0.5911, "step": 80360 }, { "epoch": 0.8, "learning_rate": 0.00015552631578947366, "loss": 0.5866, "step": 80370 }, { "epoch": 0.8, "learning_rate": 0.00015544736842105264, "loss": 0.5814, "step": 80380 }, { "epoch": 0.8, "learning_rate": 0.00015536842105263158, "loss": 0.583, "step": 80390 }, { "epoch": 0.8, "learning_rate": 0.00015528947368421053, "loss": 0.5753, "step": 80400 }, { "epoch": 0.8, "learning_rate": 0.00015521052631578945, "loss": 0.595, "step": 80410 }, { "epoch": 0.8, "learning_rate": 0.0001551315789473684, "loss": 0.5942, "step": 80420 }, { "epoch": 0.8, "learning_rate": 0.00015505263157894735, "loss": 0.5923, "step": 80430 }, { "epoch": 0.8, "learning_rate": 0.0001549736842105263, "loss": 0.5812, "step": 80440 }, { "epoch": 0.8, "learning_rate": 0.00015489473684210525, "loss": 0.5761, "step": 80450 }, { "epoch": 0.8, "learning_rate": 0.0001548157894736842, "loss": 0.5892, "step": 80460 }, { "epoch": 0.8, "learning_rate": 0.00015473684210526317, "loss": 0.583, "step": 80470 }, { "epoch": 0.8, "learning_rate": 0.0001546578947368421, "loss": 0.5818, "step": 80480 }, { "epoch": 0.8, "learning_rate": 0.00015457894736842104, "loss": 0.5782, "step": 80490 }, { "epoch": 0.81, "learning_rate": 0.0001545, "loss": 0.5796, "step": 80500 }, { "epoch": 0.81, "learning_rate": 0.00015442105263157894, "loss": 0.5846, "step": 80510 }, { "epoch": 0.81, "learning_rate": 0.00015434210526315788, "loss": 0.5836, "step": 80520 }, { "epoch": 0.81, "learning_rate": 0.00015426315789473683, "loss": 0.5852, "step": 80530 }, { "epoch": 0.81, "learning_rate": 0.00015418421052631575, "loss": 0.5989, "step": 80540 }, { "epoch": 0.81, "learning_rate": 0.0001541052631578947, "loss": 0.5874, "step": 80550 }, { "epoch": 0.81, "learning_rate": 0.00015402631578947368, "loss": 0.5817, "step": 80560 }, { "epoch": 0.81, "learning_rate": 0.00015394736842105263, "loss": 0.5643, "step": 80570 }, { "epoch": 0.81, "learning_rate": 0.00015387631578947367, "loss": 0.5793, "step": 80580 }, { "epoch": 0.81, "learning_rate": 0.00015379736842105262, "loss": 0.5642, "step": 80590 }, { "epoch": 0.81, "learning_rate": 0.00015371842105263157, "loss": 0.5814, "step": 80600 }, { "epoch": 0.81, "learning_rate": 0.0001536394736842105, "loss": 0.5886, "step": 80610 }, { "epoch": 0.81, "learning_rate": 0.00015356052631578944, "loss": 0.5893, "step": 80620 }, { "epoch": 0.81, "learning_rate": 0.00015348157894736842, "loss": 0.5857, "step": 80630 }, { "epoch": 0.81, "learning_rate": 0.00015340263157894736, "loss": 0.5738, "step": 80640 }, { "epoch": 0.81, "learning_rate": 0.0001533236842105263, "loss": 0.5745, "step": 80650 }, { "epoch": 0.81, "learning_rate": 0.00015324473684210526, "loss": 0.5835, "step": 80660 }, { "epoch": 0.81, "learning_rate": 0.0001531657894736842, "loss": 0.5711, "step": 80670 }, { "epoch": 0.81, "learning_rate": 0.00015308684210526313, "loss": 0.5938, "step": 80680 }, { "epoch": 0.81, "learning_rate": 0.00015300789473684208, "loss": 0.5784, "step": 80690 }, { "epoch": 0.81, "learning_rate": 0.00015292894736842103, "loss": 0.5649, "step": 80700 }, { "epoch": 0.81, "learning_rate": 0.00015284999999999997, "loss": 0.5775, "step": 80710 }, { "epoch": 0.81, "learning_rate": 0.00015277105263157895, "loss": 0.5788, "step": 80720 }, { "epoch": 0.81, "learning_rate": 0.0001526921052631579, "loss": 0.5825, "step": 80730 }, { "epoch": 0.81, "learning_rate": 0.00015261315789473685, "loss": 0.5768, "step": 80740 }, { "epoch": 0.81, "learning_rate": 0.00015253421052631577, "loss": 0.5765, "step": 80750 }, { "epoch": 0.81, "learning_rate": 0.00015245526315789472, "loss": 0.5839, "step": 80760 }, { "epoch": 0.81, "learning_rate": 0.00015237631578947366, "loss": 0.5694, "step": 80770 }, { "epoch": 0.81, "learning_rate": 0.0001522973684210526, "loss": 0.5798, "step": 80780 }, { "epoch": 0.81, "learning_rate": 0.00015221842105263156, "loss": 0.5916, "step": 80790 }, { "epoch": 0.81, "learning_rate": 0.0001521394736842105, "loss": 0.5865, "step": 80800 }, { "epoch": 0.81, "learning_rate": 0.00015206052631578943, "loss": 0.585, "step": 80810 }, { "epoch": 0.81, "learning_rate": 0.00015198157894736843, "loss": 0.5772, "step": 80820 }, { "epoch": 0.81, "learning_rate": 0.00015190263157894735, "loss": 0.5854, "step": 80830 }, { "epoch": 0.81, "learning_rate": 0.0001518236842105263, "loss": 0.5884, "step": 80840 }, { "epoch": 0.81, "learning_rate": 0.00015174473684210525, "loss": 0.5866, "step": 80850 }, { "epoch": 0.81, "learning_rate": 0.0001516657894736842, "loss": 0.5776, "step": 80860 }, { "epoch": 0.81, "learning_rate": 0.00015158684210526315, "loss": 0.5877, "step": 80870 }, { "epoch": 0.81, "learning_rate": 0.0001515078947368421, "loss": 0.5977, "step": 80880 }, { "epoch": 0.81, "learning_rate": 0.00015142894736842102, "loss": 0.5751, "step": 80890 }, { "epoch": 0.81, "learning_rate": 0.00015134999999999997, "loss": 0.5772, "step": 80900 }, { "epoch": 0.81, "learning_rate": 0.00015127105263157894, "loss": 0.5785, "step": 80910 }, { "epoch": 0.81, "learning_rate": 0.0001511921052631579, "loss": 0.5769, "step": 80920 }, { "epoch": 0.81, "learning_rate": 0.00015111315789473684, "loss": 0.5816, "step": 80930 }, { "epoch": 0.81, "learning_rate": 0.00015103421052631579, "loss": 0.5778, "step": 80940 }, { "epoch": 0.81, "learning_rate": 0.00015095526315789473, "loss": 0.5827, "step": 80950 }, { "epoch": 0.81, "learning_rate": 0.00015087631578947366, "loss": 0.5702, "step": 80960 }, { "epoch": 0.81, "learning_rate": 0.0001507973684210526, "loss": 0.5725, "step": 80970 }, { "epoch": 0.81, "learning_rate": 0.00015071842105263155, "loss": 0.5662, "step": 80980 }, { "epoch": 0.81, "learning_rate": 0.0001506394736842105, "loss": 0.5715, "step": 80990 }, { "epoch": 0.81, "learning_rate": 0.00015056052631578948, "loss": 0.571, "step": 81000 }, { "epoch": 0.81, "learning_rate": 0.00015048157894736842, "loss": 0.5752, "step": 81010 }, { "epoch": 0.81, "learning_rate": 0.00015040263157894737, "loss": 0.5633, "step": 81020 }, { "epoch": 0.81, "learning_rate": 0.00015032368421052632, "loss": 0.5713, "step": 81030 }, { "epoch": 0.81, "learning_rate": 0.00015024473684210524, "loss": 0.5765, "step": 81040 }, { "epoch": 0.81, "learning_rate": 0.0001501657894736842, "loss": 0.5673, "step": 81050 }, { "epoch": 0.81, "learning_rate": 0.00015008684210526314, "loss": 0.5729, "step": 81060 }, { "epoch": 0.81, "learning_rate": 0.0001500078947368421, "loss": 0.5759, "step": 81070 }, { "epoch": 0.81, "learning_rate": 0.00014992894736842103, "loss": 0.5741, "step": 81080 }, { "epoch": 0.81, "learning_rate": 0.00014984999999999998, "loss": 0.5722, "step": 81090 }, { "epoch": 0.81, "learning_rate": 0.00014977105263157893, "loss": 0.5785, "step": 81100 }, { "epoch": 0.81, "learning_rate": 0.00014969210526315788, "loss": 0.5802, "step": 81110 }, { "epoch": 0.81, "learning_rate": 0.00014961315789473683, "loss": 0.5773, "step": 81120 }, { "epoch": 0.81, "learning_rate": 0.00014953421052631578, "loss": 0.5748, "step": 81130 }, { "epoch": 0.81, "learning_rate": 0.00014945526315789472, "loss": 0.5515, "step": 81140 }, { "epoch": 0.81, "learning_rate": 0.00014937631578947367, "loss": 0.5761, "step": 81150 }, { "epoch": 0.81, "learning_rate": 0.00014929736842105262, "loss": 0.5603, "step": 81160 }, { "epoch": 0.81, "learning_rate": 0.00014921842105263157, "loss": 0.5695, "step": 81170 }, { "epoch": 0.81, "learning_rate": 0.00014913947368421052, "loss": 0.5781, "step": 81180 }, { "epoch": 0.81, "learning_rate": 0.00014906052631578947, "loss": 0.5679, "step": 81190 }, { "epoch": 0.81, "learning_rate": 0.00014898157894736841, "loss": 0.5697, "step": 81200 }, { "epoch": 0.81, "learning_rate": 0.00014890263157894736, "loss": 0.5576, "step": 81210 }, { "epoch": 0.81, "learning_rate": 0.0001488236842105263, "loss": 0.5711, "step": 81220 }, { "epoch": 0.81, "learning_rate": 0.00014874473684210526, "loss": 0.5682, "step": 81230 }, { "epoch": 0.81, "learning_rate": 0.00014866578947368418, "loss": 0.5714, "step": 81240 }, { "epoch": 0.81, "learning_rate": 0.00014858684210526313, "loss": 0.5631, "step": 81250 }, { "epoch": 0.81, "learning_rate": 0.0001485078947368421, "loss": 0.5921, "step": 81260 }, { "epoch": 0.81, "learning_rate": 0.00014842894736842105, "loss": 0.5783, "step": 81270 }, { "epoch": 0.81, "learning_rate": 0.00014834999999999997, "loss": 0.5733, "step": 81280 }, { "epoch": 0.81, "learning_rate": 0.00014827105263157892, "loss": 0.5809, "step": 81290 }, { "epoch": 0.81, "learning_rate": 0.0001481921052631579, "loss": 0.5854, "step": 81300 }, { "epoch": 0.81, "learning_rate": 0.00014811315789473685, "loss": 0.5747, "step": 81310 }, { "epoch": 0.81, "learning_rate": 0.00014803421052631577, "loss": 0.5947, "step": 81320 }, { "epoch": 0.81, "learning_rate": 0.00014795526315789472, "loss": 0.5814, "step": 81330 }, { "epoch": 0.81, "learning_rate": 0.00014787631578947366, "loss": 0.5882, "step": 81340 }, { "epoch": 0.81, "learning_rate": 0.0001477973684210526, "loss": 0.574, "step": 81350 }, { "epoch": 0.81, "learning_rate": 0.00014771842105263156, "loss": 0.5781, "step": 81360 }, { "epoch": 0.81, "learning_rate": 0.0001476394736842105, "loss": 0.5737, "step": 81370 }, { "epoch": 0.81, "learning_rate": 0.00014756052631578946, "loss": 0.577, "step": 81380 }, { "epoch": 0.81, "learning_rate": 0.0001474815789473684, "loss": 0.5697, "step": 81390 }, { "epoch": 0.81, "learning_rate": 0.00014740263157894735, "loss": 0.5641, "step": 81400 }, { "epoch": 0.81, "learning_rate": 0.0001473236842105263, "loss": 0.5671, "step": 81410 }, { "epoch": 0.81, "learning_rate": 0.00014724473684210525, "loss": 0.5739, "step": 81420 }, { "epoch": 0.81, "learning_rate": 0.0001471657894736842, "loss": 0.5815, "step": 81430 }, { "epoch": 0.81, "learning_rate": 0.00014708684210526315, "loss": 0.5678, "step": 81440 }, { "epoch": 0.81, "learning_rate": 0.0001470078947368421, "loss": 0.5733, "step": 81450 }, { "epoch": 0.81, "learning_rate": 0.00014692894736842104, "loss": 0.5691, "step": 81460 }, { "epoch": 0.81, "learning_rate": 0.00014685, "loss": 0.5652, "step": 81470 }, { "epoch": 0.81, "learning_rate": 0.00014677105263157894, "loss": 0.578, "step": 81480 }, { "epoch": 0.81, "learning_rate": 0.0001466921052631579, "loss": 0.571, "step": 81490 }, { "epoch": 0.81, "learning_rate": 0.00014661315789473684, "loss": 0.5736, "step": 81500 }, { "epoch": 0.82, "learning_rate": 0.00014653421052631578, "loss": 0.5844, "step": 81510 }, { "epoch": 0.82, "learning_rate": 0.00014645526315789473, "loss": 0.5876, "step": 81520 }, { "epoch": 0.82, "learning_rate": 0.00014637631578947365, "loss": 0.5787, "step": 81530 }, { "epoch": 0.82, "learning_rate": 0.00014629736842105263, "loss": 0.5773, "step": 81540 }, { "epoch": 0.82, "learning_rate": 0.00014621842105263158, "loss": 0.5797, "step": 81550 }, { "epoch": 0.82, "learning_rate": 0.0001461394736842105, "loss": 0.5792, "step": 81560 }, { "epoch": 0.82, "learning_rate": 0.00014606052631578945, "loss": 0.5893, "step": 81570 }, { "epoch": 0.82, "learning_rate": 0.00014598947368421052, "loss": 0.5873, "step": 81580 }, { "epoch": 0.82, "learning_rate": 0.00014591052631578944, "loss": 0.5808, "step": 81590 }, { "epoch": 0.82, "learning_rate": 0.00014583157894736842, "loss": 0.5913, "step": 81600 }, { "epoch": 0.82, "learning_rate": 0.00014575263157894737, "loss": 0.5545, "step": 81610 }, { "epoch": 0.82, "learning_rate": 0.00014567368421052632, "loss": 0.557, "step": 81620 }, { "epoch": 0.82, "learning_rate": 0.00014559473684210524, "loss": 0.5605, "step": 81630 }, { "epoch": 0.82, "learning_rate": 0.00014551578947368419, "loss": 0.5748, "step": 81640 }, { "epoch": 0.82, "learning_rate": 0.00014543684210526316, "loss": 0.5755, "step": 81650 }, { "epoch": 0.82, "learning_rate": 0.00014535789473684208, "loss": 0.582, "step": 81660 }, { "epoch": 0.82, "learning_rate": 0.00014527894736842103, "loss": 0.5609, "step": 81670 }, { "epoch": 0.82, "learning_rate": 0.00014519999999999998, "loss": 0.5772, "step": 81680 }, { "epoch": 0.82, "learning_rate": 0.00014512105263157893, "loss": 0.5669, "step": 81690 }, { "epoch": 0.82, "learning_rate": 0.00014504210526315788, "loss": 0.5783, "step": 81700 }, { "epoch": 0.82, "learning_rate": 0.00014496315789473682, "loss": 0.5776, "step": 81710 }, { "epoch": 0.82, "learning_rate": 0.00014488421052631577, "loss": 0.5697, "step": 81720 }, { "epoch": 0.82, "learning_rate": 0.00014480526315789472, "loss": 0.5707, "step": 81730 }, { "epoch": 0.82, "learning_rate": 0.00014472631578947367, "loss": 0.5693, "step": 81740 }, { "epoch": 0.82, "learning_rate": 0.00014464736842105262, "loss": 0.5816, "step": 81750 }, { "epoch": 0.82, "learning_rate": 0.00014456842105263157, "loss": 0.5844, "step": 81760 }, { "epoch": 0.82, "learning_rate": 0.0001444894736842105, "loss": 0.574, "step": 81770 }, { "epoch": 0.82, "learning_rate": 0.00014441052631578946, "loss": 0.5768, "step": 81780 }, { "epoch": 0.82, "learning_rate": 0.0001443315789473684, "loss": 0.5779, "step": 81790 }, { "epoch": 0.82, "learning_rate": 0.00014425263157894736, "loss": 0.5867, "step": 81800 }, { "epoch": 0.82, "learning_rate": 0.0001441736842105263, "loss": 0.5844, "step": 81810 }, { "epoch": 0.82, "learning_rate": 0.00014409473684210525, "loss": 0.5804, "step": 81820 }, { "epoch": 0.82, "learning_rate": 0.0001440157894736842, "loss": 0.5758, "step": 81830 }, { "epoch": 0.82, "learning_rate": 0.00014393684210526315, "loss": 0.5703, "step": 81840 }, { "epoch": 0.82, "learning_rate": 0.0001438578947368421, "loss": 0.5617, "step": 81850 }, { "epoch": 0.82, "learning_rate": 0.00014377894736842105, "loss": 0.5782, "step": 81860 }, { "epoch": 0.82, "learning_rate": 0.00014369999999999997, "loss": 0.5749, "step": 81870 }, { "epoch": 0.82, "learning_rate": 0.00014362105263157894, "loss": 0.5667, "step": 81880 }, { "epoch": 0.82, "learning_rate": 0.0001435421052631579, "loss": 0.5723, "step": 81890 }, { "epoch": 0.82, "learning_rate": 0.00014346315789473684, "loss": 0.5713, "step": 81900 }, { "epoch": 0.82, "learning_rate": 0.00014338421052631576, "loss": 0.5775, "step": 81910 }, { "epoch": 0.82, "learning_rate": 0.0001433052631578947, "loss": 0.5775, "step": 81920 }, { "epoch": 0.82, "learning_rate": 0.00014322631578947369, "loss": 0.5758, "step": 81930 }, { "epoch": 0.82, "learning_rate": 0.00014314736842105263, "loss": 0.5675, "step": 81940 }, { "epoch": 0.82, "learning_rate": 0.00014306842105263156, "loss": 0.5707, "step": 81950 }, { "epoch": 0.82, "learning_rate": 0.0001429894736842105, "loss": 0.5756, "step": 81960 }, { "epoch": 0.82, "learning_rate": 0.00014291052631578945, "loss": 0.5799, "step": 81970 }, { "epoch": 0.82, "learning_rate": 0.0001428315789473684, "loss": 0.5749, "step": 81980 }, { "epoch": 0.82, "learning_rate": 0.00014275263157894735, "loss": 0.5787, "step": 81990 }, { "epoch": 0.82, "learning_rate": 0.0001426736842105263, "loss": 0.5879, "step": 82000 }, { "epoch": 0.82, "learning_rate": 0.00014259473684210525, "loss": 0.5665, "step": 82010 }, { "epoch": 0.82, "learning_rate": 0.0001425157894736842, "loss": 0.5831, "step": 82020 }, { "epoch": 0.82, "learning_rate": 0.00014243684210526314, "loss": 0.5762, "step": 82030 }, { "epoch": 0.82, "learning_rate": 0.0001423578947368421, "loss": 0.5659, "step": 82040 }, { "epoch": 0.82, "learning_rate": 0.00014227894736842104, "loss": 0.5767, "step": 82050 }, { "epoch": 0.82, "learning_rate": 0.0001422, "loss": 0.5791, "step": 82060 }, { "epoch": 0.82, "learning_rate": 0.00014212105263157894, "loss": 0.5824, "step": 82070 }, { "epoch": 0.82, "learning_rate": 0.00014204210526315788, "loss": 0.5663, "step": 82080 }, { "epoch": 0.82, "learning_rate": 0.00014196315789473683, "loss": 0.5722, "step": 82090 }, { "epoch": 0.82, "learning_rate": 0.00014188421052631578, "loss": 0.5732, "step": 82100 }, { "epoch": 0.82, "learning_rate": 0.00014180526315789473, "loss": 0.5631, "step": 82110 }, { "epoch": 0.82, "learning_rate": 0.00014172631578947368, "loss": 0.5808, "step": 82120 }, { "epoch": 0.82, "learning_rate": 0.00014164736842105263, "loss": 0.5758, "step": 82130 }, { "epoch": 0.82, "learning_rate": 0.00014156842105263157, "loss": 0.5772, "step": 82140 }, { "epoch": 0.82, "learning_rate": 0.0001414894736842105, "loss": 0.5568, "step": 82150 }, { "epoch": 0.82, "learning_rate": 0.00014141052631578947, "loss": 0.5579, "step": 82160 }, { "epoch": 0.82, "learning_rate": 0.00014133157894736842, "loss": 0.557, "step": 82170 }, { "epoch": 0.82, "learning_rate": 0.00014125263157894737, "loss": 0.5525, "step": 82180 }, { "epoch": 0.82, "learning_rate": 0.0001411736842105263, "loss": 0.5627, "step": 82190 }, { "epoch": 0.82, "learning_rate": 0.00014109473684210524, "loss": 0.5533, "step": 82200 }, { "epoch": 0.82, "learning_rate": 0.0001410157894736842, "loss": 0.5594, "step": 82210 }, { "epoch": 0.82, "learning_rate": 0.00014093684210526316, "loss": 0.5646, "step": 82220 }, { "epoch": 0.82, "learning_rate": 0.00014085789473684208, "loss": 0.5733, "step": 82230 }, { "epoch": 0.82, "learning_rate": 0.00014077894736842103, "loss": 0.5598, "step": 82240 }, { "epoch": 0.82, "learning_rate": 0.00014069999999999998, "loss": 0.5533, "step": 82250 }, { "epoch": 0.82, "learning_rate": 0.00014062105263157895, "loss": 0.5643, "step": 82260 }, { "epoch": 0.82, "learning_rate": 0.00014054210526315787, "loss": 0.5852, "step": 82270 }, { "epoch": 0.82, "learning_rate": 0.00014046315789473682, "loss": 0.5951, "step": 82280 }, { "epoch": 0.82, "learning_rate": 0.00014038421052631577, "loss": 0.5833, "step": 82290 }, { "epoch": 0.82, "learning_rate": 0.00014030526315789472, "loss": 0.5765, "step": 82300 }, { "epoch": 0.82, "learning_rate": 0.00014022631578947367, "loss": 0.569, "step": 82310 }, { "epoch": 0.82, "learning_rate": 0.00014014736842105262, "loss": 0.5765, "step": 82320 }, { "epoch": 0.82, "learning_rate": 0.00014006842105263156, "loss": 0.5693, "step": 82330 }, { "epoch": 0.82, "learning_rate": 0.0001399894736842105, "loss": 0.5802, "step": 82340 }, { "epoch": 0.82, "learning_rate": 0.00013991052631578946, "loss": 0.5775, "step": 82350 }, { "epoch": 0.82, "learning_rate": 0.0001398315789473684, "loss": 0.5754, "step": 82360 }, { "epoch": 0.82, "learning_rate": 0.00013975263157894736, "loss": 0.5753, "step": 82370 }, { "epoch": 0.82, "learning_rate": 0.0001396736842105263, "loss": 0.5813, "step": 82380 }, { "epoch": 0.82, "learning_rate": 0.00013959473684210525, "loss": 0.5823, "step": 82390 }, { "epoch": 0.82, "learning_rate": 0.0001395157894736842, "loss": 0.5783, "step": 82400 }, { "epoch": 0.82, "learning_rate": 0.00013943684210526315, "loss": 0.5688, "step": 82410 }, { "epoch": 0.82, "learning_rate": 0.0001393578947368421, "loss": 0.584, "step": 82420 }, { "epoch": 0.82, "learning_rate": 0.00013927894736842105, "loss": 0.5766, "step": 82430 }, { "epoch": 0.82, "learning_rate": 0.0001392, "loss": 0.5804, "step": 82440 }, { "epoch": 0.82, "learning_rate": 0.00013912105263157894, "loss": 0.587, "step": 82450 }, { "epoch": 0.82, "learning_rate": 0.0001390421052631579, "loss": 0.58, "step": 82460 }, { "epoch": 0.82, "learning_rate": 0.0001389631578947368, "loss": 0.5762, "step": 82470 }, { "epoch": 0.82, "learning_rate": 0.00013888421052631576, "loss": 0.5972, "step": 82480 }, { "epoch": 0.82, "learning_rate": 0.00013880526315789474, "loss": 0.5743, "step": 82490 }, { "epoch": 0.82, "learning_rate": 0.00013872631578947369, "loss": 0.5889, "step": 82500 }, { "epoch": 0.82, "eval_accuracy": 0.8794982706220447, "eval_loss": 0.5517578125, "eval_runtime": 100.2401, "eval_samples_per_second": 798.084, "eval_steps_per_second": 1.566, "step": 82500 }, { "epoch": 0.83, "learning_rate": 0.0001386473684210526, "loss": 0.5757, "step": 82510 }, { "epoch": 0.83, "learning_rate": 0.00013856842105263155, "loss": 0.5756, "step": 82520 }, { "epoch": 0.83, "learning_rate": 0.00013848947368421053, "loss": 0.5738, "step": 82530 }, { "epoch": 0.83, "learning_rate": 0.00013841052631578948, "loss": 0.5746, "step": 82540 }, { "epoch": 0.83, "learning_rate": 0.00013833947368421053, "loss": 0.5783, "step": 82550 }, { "epoch": 0.83, "learning_rate": 0.00013826052631578947, "loss": 0.5797, "step": 82560 }, { "epoch": 0.83, "learning_rate": 0.00013818157894736842, "loss": 0.5635, "step": 82570 }, { "epoch": 0.83, "learning_rate": 0.00013810263157894734, "loss": 0.5661, "step": 82580 }, { "epoch": 0.83, "learning_rate": 0.0001380236842105263, "loss": 0.5643, "step": 82590 }, { "epoch": 0.83, "learning_rate": 0.00013794473684210527, "loss": 0.5672, "step": 82600 }, { "epoch": 0.83, "learning_rate": 0.0001378657894736842, "loss": 0.5687, "step": 82610 }, { "epoch": 0.83, "learning_rate": 0.00013778684210526314, "loss": 0.5608, "step": 82620 }, { "epoch": 0.83, "learning_rate": 0.00013770789473684209, "loss": 0.5723, "step": 82630 }, { "epoch": 0.83, "learning_rate": 0.00013762894736842103, "loss": 0.5653, "step": 82640 }, { "epoch": 0.83, "learning_rate": 0.00013754999999999998, "loss": 0.5741, "step": 82650 }, { "epoch": 0.83, "learning_rate": 0.00013747105263157893, "loss": 0.5806, "step": 82660 }, { "epoch": 0.83, "learning_rate": 0.00013739210526315788, "loss": 0.5871, "step": 82670 }, { "epoch": 0.83, "learning_rate": 0.00013731315789473683, "loss": 0.5728, "step": 82680 }, { "epoch": 0.83, "learning_rate": 0.00013723421052631578, "loss": 0.5813, "step": 82690 }, { "epoch": 0.83, "learning_rate": 0.00013715526315789472, "loss": 0.589, "step": 82700 }, { "epoch": 0.83, "learning_rate": 0.00013707631578947367, "loss": 0.5815, "step": 82710 }, { "epoch": 0.83, "learning_rate": 0.00013699736842105262, "loss": 0.5947, "step": 82720 }, { "epoch": 0.83, "learning_rate": 0.00013691842105263157, "loss": 0.575, "step": 82730 }, { "epoch": 0.83, "learning_rate": 0.00013683947368421052, "loss": 0.5756, "step": 82740 }, { "epoch": 0.83, "learning_rate": 0.00013676052631578947, "loss": 0.5771, "step": 82750 }, { "epoch": 0.83, "learning_rate": 0.00013668157894736841, "loss": 0.5708, "step": 82760 }, { "epoch": 0.83, "learning_rate": 0.00013660263157894736, "loss": 0.5732, "step": 82770 }, { "epoch": 0.83, "learning_rate": 0.00013652368421052628, "loss": 0.5662, "step": 82780 }, { "epoch": 0.83, "learning_rate": 0.00013644473684210526, "loss": 0.5629, "step": 82790 }, { "epoch": 0.83, "learning_rate": 0.0001363657894736842, "loss": 0.5633, "step": 82800 }, { "epoch": 0.83, "learning_rate": 0.00013628684210526316, "loss": 0.5677, "step": 82810 }, { "epoch": 0.83, "learning_rate": 0.00013620789473684208, "loss": 0.5756, "step": 82820 }, { "epoch": 0.83, "learning_rate": 0.00013612894736842105, "loss": 0.5683, "step": 82830 }, { "epoch": 0.83, "learning_rate": 0.00013605, "loss": 0.5652, "step": 82840 }, { "epoch": 0.83, "learning_rate": 0.00013597105263157895, "loss": 0.5726, "step": 82850 }, { "epoch": 0.83, "learning_rate": 0.00013589210526315787, "loss": 0.5673, "step": 82860 }, { "epoch": 0.83, "learning_rate": 0.00013581315789473682, "loss": 0.5751, "step": 82870 }, { "epoch": 0.83, "learning_rate": 0.0001357342105263158, "loss": 0.5849, "step": 82880 }, { "epoch": 0.83, "learning_rate": 0.00013565526315789471, "loss": 0.5781, "step": 82890 }, { "epoch": 0.83, "learning_rate": 0.00013557631578947366, "loss": 0.5906, "step": 82900 }, { "epoch": 0.83, "learning_rate": 0.0001354973684210526, "loss": 0.5825, "step": 82910 }, { "epoch": 0.83, "learning_rate": 0.00013541842105263156, "loss": 0.5814, "step": 82920 }, { "epoch": 0.83, "learning_rate": 0.0001353394736842105, "loss": 0.5805, "step": 82930 }, { "epoch": 0.83, "learning_rate": 0.00013526052631578946, "loss": 0.59, "step": 82940 }, { "epoch": 0.83, "learning_rate": 0.0001351815789473684, "loss": 0.5801, "step": 82950 }, { "epoch": 0.83, "learning_rate": 0.00013510263157894735, "loss": 0.5804, "step": 82960 }, { "epoch": 0.83, "learning_rate": 0.0001350236842105263, "loss": 0.5688, "step": 82970 }, { "epoch": 0.83, "learning_rate": 0.00013494473684210525, "loss": 0.5784, "step": 82980 }, { "epoch": 0.83, "learning_rate": 0.0001348657894736842, "loss": 0.5782, "step": 82990 }, { "epoch": 0.83, "learning_rate": 0.00013478684210526315, "loss": 0.5714, "step": 83000 }, { "epoch": 0.83, "learning_rate": 0.0001347078947368421, "loss": 0.5778, "step": 83010 }, { "epoch": 0.83, "learning_rate": 0.00013462894736842104, "loss": 0.5644, "step": 83020 }, { "epoch": 0.83, "learning_rate": 0.00013455, "loss": 0.5734, "step": 83030 }, { "epoch": 0.83, "learning_rate": 0.00013447105263157894, "loss": 0.5741, "step": 83040 }, { "epoch": 0.83, "learning_rate": 0.0001343921052631579, "loss": 0.571, "step": 83050 }, { "epoch": 0.83, "learning_rate": 0.00013431315789473684, "loss": 0.57, "step": 83060 }, { "epoch": 0.83, "learning_rate": 0.00013423421052631578, "loss": 0.5812, "step": 83070 }, { "epoch": 0.83, "learning_rate": 0.00013415526315789473, "loss": 0.5815, "step": 83080 }, { "epoch": 0.83, "learning_rate": 0.00013407631578947368, "loss": 0.5782, "step": 83090 }, { "epoch": 0.83, "learning_rate": 0.0001339973684210526, "loss": 0.5726, "step": 83100 }, { "epoch": 0.83, "learning_rate": 0.00013391842105263158, "loss": 0.5714, "step": 83110 }, { "epoch": 0.83, "learning_rate": 0.00013383947368421053, "loss": 0.5716, "step": 83120 }, { "epoch": 0.83, "learning_rate": 0.00013376052631578947, "loss": 0.5585, "step": 83130 }, { "epoch": 0.83, "learning_rate": 0.0001336815789473684, "loss": 0.5718, "step": 83140 }, { "epoch": 0.83, "learning_rate": 0.00013360263157894734, "loss": 0.5602, "step": 83150 }, { "epoch": 0.83, "learning_rate": 0.00013352368421052632, "loss": 0.5593, "step": 83160 }, { "epoch": 0.83, "learning_rate": 0.00013344473684210527, "loss": 0.5625, "step": 83170 }, { "epoch": 0.83, "learning_rate": 0.0001333657894736842, "loss": 0.572, "step": 83180 }, { "epoch": 0.83, "learning_rate": 0.00013328684210526314, "loss": 0.5735, "step": 83190 }, { "epoch": 0.83, "learning_rate": 0.00013320789473684208, "loss": 0.5645, "step": 83200 }, { "epoch": 0.83, "learning_rate": 0.00013312894736842103, "loss": 0.5663, "step": 83210 }, { "epoch": 0.83, "learning_rate": 0.00013304999999999998, "loss": 0.5636, "step": 83220 }, { "epoch": 0.83, "learning_rate": 0.00013297105263157893, "loss": 0.5556, "step": 83230 }, { "epoch": 0.83, "learning_rate": 0.00013289210526315788, "loss": 0.5663, "step": 83240 }, { "epoch": 0.83, "learning_rate": 0.00013281315789473683, "loss": 0.5591, "step": 83250 }, { "epoch": 0.83, "learning_rate": 0.00013273421052631577, "loss": 0.5604, "step": 83260 }, { "epoch": 0.83, "learning_rate": 0.00013265526315789472, "loss": 0.5677, "step": 83270 }, { "epoch": 0.83, "learning_rate": 0.00013257631578947367, "loss": 0.5581, "step": 83280 }, { "epoch": 0.83, "learning_rate": 0.00013249736842105262, "loss": 0.5548, "step": 83290 }, { "epoch": 0.83, "learning_rate": 0.00013241842105263157, "loss": 0.5691, "step": 83300 }, { "epoch": 0.83, "learning_rate": 0.00013233947368421052, "loss": 0.5538, "step": 83310 }, { "epoch": 0.83, "learning_rate": 0.00013226052631578946, "loss": 0.563, "step": 83320 }, { "epoch": 0.83, "learning_rate": 0.0001321815789473684, "loss": 0.5574, "step": 83330 }, { "epoch": 0.83, "learning_rate": 0.00013210263157894736, "loss": 0.5488, "step": 83340 }, { "epoch": 0.83, "learning_rate": 0.0001320236842105263, "loss": 0.5604, "step": 83350 }, { "epoch": 0.83, "learning_rate": 0.00013194473684210526, "loss": 0.562, "step": 83360 }, { "epoch": 0.83, "learning_rate": 0.0001318657894736842, "loss": 0.5502, "step": 83370 }, { "epoch": 0.83, "learning_rate": 0.00013178684210526313, "loss": 0.5579, "step": 83380 }, { "epoch": 0.83, "learning_rate": 0.0001317078947368421, "loss": 0.5547, "step": 83390 }, { "epoch": 0.83, "learning_rate": 0.00013162894736842105, "loss": 0.5577, "step": 83400 }, { "epoch": 0.83, "learning_rate": 0.00013155, "loss": 0.5585, "step": 83410 }, { "epoch": 0.83, "learning_rate": 0.00013147105263157892, "loss": 0.5655, "step": 83420 }, { "epoch": 0.83, "learning_rate": 0.00013139210526315787, "loss": 0.5482, "step": 83430 }, { "epoch": 0.83, "learning_rate": 0.00013131315789473684, "loss": 0.5582, "step": 83440 }, { "epoch": 0.83, "learning_rate": 0.0001312342105263158, "loss": 0.5532, "step": 83450 }, { "epoch": 0.83, "learning_rate": 0.00013115526315789471, "loss": 0.5531, "step": 83460 }, { "epoch": 0.83, "learning_rate": 0.00013107631578947366, "loss": 0.5593, "step": 83470 }, { "epoch": 0.83, "learning_rate": 0.0001309973684210526, "loss": 0.5784, "step": 83480 }, { "epoch": 0.83, "learning_rate": 0.00013091842105263159, "loss": 0.5901, "step": 83490 }, { "epoch": 0.83, "learning_rate": 0.0001308394736842105, "loss": 0.5839, "step": 83500 }, { "epoch": 0.84, "learning_rate": 0.00013076052631578946, "loss": 0.5809, "step": 83510 }, { "epoch": 0.84, "learning_rate": 0.0001306815789473684, "loss": 0.5645, "step": 83520 }, { "epoch": 0.84, "learning_rate": 0.00013060263157894735, "loss": 0.557, "step": 83530 }, { "epoch": 0.84, "learning_rate": 0.0001305236842105263, "loss": 0.5608, "step": 83540 }, { "epoch": 0.84, "learning_rate": 0.00013044473684210525, "loss": 0.5725, "step": 83550 }, { "epoch": 0.84, "learning_rate": 0.0001303657894736842, "loss": 0.5722, "step": 83560 }, { "epoch": 0.84, "learning_rate": 0.00013028684210526314, "loss": 0.5826, "step": 83570 }, { "epoch": 0.84, "learning_rate": 0.0001302078947368421, "loss": 0.5803, "step": 83580 }, { "epoch": 0.84, "learning_rate": 0.00013012894736842104, "loss": 0.5769, "step": 83590 }, { "epoch": 0.84, "learning_rate": 0.00013005, "loss": 0.5756, "step": 83600 }, { "epoch": 0.84, "learning_rate": 0.00012997105263157894, "loss": 0.5828, "step": 83610 }, { "epoch": 0.84, "learning_rate": 0.00012989210526315789, "loss": 0.5682, "step": 83620 }, { "epoch": 0.84, "learning_rate": 0.00012981315789473683, "loss": 0.5652, "step": 83630 }, { "epoch": 0.84, "learning_rate": 0.00012973421052631578, "loss": 0.5668, "step": 83640 }, { "epoch": 0.84, "learning_rate": 0.00012965526315789473, "loss": 0.5774, "step": 83650 }, { "epoch": 0.84, "learning_rate": 0.00012957631578947368, "loss": 0.5789, "step": 83660 }, { "epoch": 0.84, "learning_rate": 0.00012949736842105263, "loss": 0.5753, "step": 83670 }, { "epoch": 0.84, "learning_rate": 0.00012941842105263158, "loss": 0.5741, "step": 83680 }, { "epoch": 0.84, "learning_rate": 0.00012933947368421052, "loss": 0.577, "step": 83690 }, { "epoch": 0.84, "learning_rate": 0.00012926052631578945, "loss": 0.5776, "step": 83700 }, { "epoch": 0.84, "learning_rate": 0.0001291815789473684, "loss": 0.581, "step": 83710 }, { "epoch": 0.84, "learning_rate": 0.00012910263157894737, "loss": 0.5839, "step": 83720 }, { "epoch": 0.84, "learning_rate": 0.00012902368421052632, "loss": 0.5858, "step": 83730 }, { "epoch": 0.84, "learning_rate": 0.00012894473684210524, "loss": 0.5822, "step": 83740 }, { "epoch": 0.84, "learning_rate": 0.0001288657894736842, "loss": 0.5686, "step": 83750 }, { "epoch": 0.84, "learning_rate": 0.00012878684210526314, "loss": 0.5732, "step": 83760 }, { "epoch": 0.84, "learning_rate": 0.0001287078947368421, "loss": 0.5713, "step": 83770 }, { "epoch": 0.84, "learning_rate": 0.00012862894736842103, "loss": 0.5744, "step": 83780 }, { "epoch": 0.84, "learning_rate": 0.00012854999999999998, "loss": 0.5802, "step": 83790 }, { "epoch": 0.84, "learning_rate": 0.00012847105263157893, "loss": 0.5658, "step": 83800 }, { "epoch": 0.84, "learning_rate": 0.00012839210526315788, "loss": 0.5678, "step": 83810 }, { "epoch": 0.84, "learning_rate": 0.00012831315789473683, "loss": 0.5731, "step": 83820 }, { "epoch": 0.84, "learning_rate": 0.00012823421052631577, "loss": 0.567, "step": 83830 }, { "epoch": 0.84, "learning_rate": 0.00012815526315789472, "loss": 0.5827, "step": 83840 }, { "epoch": 0.84, "learning_rate": 0.00012807631578947367, "loss": 0.5555, "step": 83850 }, { "epoch": 0.84, "learning_rate": 0.00012799736842105262, "loss": 0.5572, "step": 83860 }, { "epoch": 0.84, "learning_rate": 0.00012791842105263157, "loss": 0.5613, "step": 83870 }, { "epoch": 0.84, "learning_rate": 0.00012783947368421052, "loss": 0.5544, "step": 83880 }, { "epoch": 0.84, "learning_rate": 0.00012776052631578946, "loss": 0.5537, "step": 83890 }, { "epoch": 0.84, "learning_rate": 0.0001276815789473684, "loss": 0.5513, "step": 83900 }, { "epoch": 0.84, "learning_rate": 0.00012760263157894736, "loss": 0.5503, "step": 83910 }, { "epoch": 0.84, "learning_rate": 0.0001275236842105263, "loss": 0.5586, "step": 83920 }, { "epoch": 0.84, "learning_rate": 0.00012744473684210526, "loss": 0.5536, "step": 83930 }, { "epoch": 0.84, "learning_rate": 0.0001273657894736842, "loss": 0.559, "step": 83940 }, { "epoch": 0.84, "learning_rate": 0.00012728684210526315, "loss": 0.5508, "step": 83950 }, { "epoch": 0.84, "learning_rate": 0.0001272078947368421, "loss": 0.5663, "step": 83960 }, { "epoch": 0.84, "learning_rate": 0.00012712894736842105, "loss": 0.5546, "step": 83970 }, { "epoch": 0.84, "learning_rate": 0.00012705, "loss": 0.5634, "step": 83980 }, { "epoch": 0.84, "learning_rate": 0.00012697105263157892, "loss": 0.551, "step": 83990 }, { "epoch": 0.84, "learning_rate": 0.0001268921052631579, "loss": 0.563, "step": 84000 }, { "epoch": 0.84, "learning_rate": 0.00012681315789473684, "loss": 0.5495, "step": 84010 }, { "epoch": 0.84, "learning_rate": 0.00012673421052631576, "loss": 0.5614, "step": 84020 }, { "epoch": 0.84, "learning_rate": 0.0001266552631578947, "loss": 0.5684, "step": 84030 }, { "epoch": 0.84, "learning_rate": 0.0001265763157894737, "loss": 0.5754, "step": 84040 }, { "epoch": 0.84, "learning_rate": 0.00012649736842105264, "loss": 0.5728, "step": 84050 }, { "epoch": 0.84, "learning_rate": 0.00012641842105263156, "loss": 0.577, "step": 84060 }, { "epoch": 0.84, "learning_rate": 0.0001263394736842105, "loss": 0.5716, "step": 84070 }, { "epoch": 0.84, "learning_rate": 0.00012626052631578945, "loss": 0.5776, "step": 84080 }, { "epoch": 0.84, "learning_rate": 0.00012618157894736843, "loss": 0.5699, "step": 84090 }, { "epoch": 0.84, "learning_rate": 0.00012610263157894735, "loss": 0.5577, "step": 84100 }, { "epoch": 0.84, "learning_rate": 0.0001260236842105263, "loss": 0.5633, "step": 84110 }, { "epoch": 0.84, "learning_rate": 0.00012594473684210525, "loss": 0.5601, "step": 84120 }, { "epoch": 0.84, "learning_rate": 0.0001258657894736842, "loss": 0.5728, "step": 84130 }, { "epoch": 0.84, "learning_rate": 0.00012578684210526314, "loss": 0.5721, "step": 84140 }, { "epoch": 0.84, "learning_rate": 0.0001257078947368421, "loss": 0.5712, "step": 84150 }, { "epoch": 0.84, "learning_rate": 0.00012562894736842104, "loss": 0.5693, "step": 84160 }, { "epoch": 0.84, "learning_rate": 0.00012555, "loss": 0.5685, "step": 84170 }, { "epoch": 0.84, "learning_rate": 0.00012547105263157894, "loss": 0.5685, "step": 84180 }, { "epoch": 0.84, "learning_rate": 0.00012539210526315789, "loss": 0.5807, "step": 84190 }, { "epoch": 0.84, "learning_rate": 0.00012531315789473683, "loss": 0.5684, "step": 84200 }, { "epoch": 0.84, "learning_rate": 0.00012523421052631578, "loss": 0.5761, "step": 84210 }, { "epoch": 0.84, "learning_rate": 0.00012515526315789473, "loss": 0.5613, "step": 84220 }, { "epoch": 0.84, "learning_rate": 0.00012507631578947368, "loss": 0.5687, "step": 84230 }, { "epoch": 0.84, "learning_rate": 0.00012499736842105263, "loss": 0.5559, "step": 84240 }, { "epoch": 0.84, "learning_rate": 0.00012491842105263158, "loss": 0.5647, "step": 84250 }, { "epoch": 0.84, "learning_rate": 0.00012483947368421052, "loss": 0.5591, "step": 84260 }, { "epoch": 0.84, "learning_rate": 0.00012476052631578944, "loss": 0.5701, "step": 84270 }, { "epoch": 0.84, "learning_rate": 0.00012468157894736842, "loss": 0.5585, "step": 84280 }, { "epoch": 0.84, "learning_rate": 0.00012460263157894737, "loss": 0.5514, "step": 84290 }, { "epoch": 0.84, "learning_rate": 0.00012452368421052632, "loss": 0.5582, "step": 84300 }, { "epoch": 0.84, "learning_rate": 0.00012444473684210524, "loss": 0.562, "step": 84310 }, { "epoch": 0.84, "learning_rate": 0.0001243657894736842, "loss": 0.564, "step": 84320 }, { "epoch": 0.84, "learning_rate": 0.00012428684210526316, "loss": 0.5697, "step": 84330 }, { "epoch": 0.84, "learning_rate": 0.00012420789473684208, "loss": 0.5632, "step": 84340 }, { "epoch": 0.84, "learning_rate": 0.00012412894736842103, "loss": 0.5635, "step": 84350 }, { "epoch": 0.84, "learning_rate": 0.00012404999999999998, "loss": 0.5632, "step": 84360 }, { "epoch": 0.84, "learning_rate": 0.00012397105263157895, "loss": 0.5672, "step": 84370 }, { "epoch": 0.84, "learning_rate": 0.00012389210526315788, "loss": 0.5793, "step": 84380 }, { "epoch": 0.84, "learning_rate": 0.00012381315789473682, "loss": 0.5808, "step": 84390 }, { "epoch": 0.84, "learning_rate": 0.00012373421052631577, "loss": 0.564, "step": 84400 }, { "epoch": 0.84, "learning_rate": 0.00012365526315789472, "loss": 0.5755, "step": 84410 }, { "epoch": 0.84, "learning_rate": 0.00012357631578947367, "loss": 0.5771, "step": 84420 }, { "epoch": 0.84, "learning_rate": 0.00012349736842105262, "loss": 0.5823, "step": 84430 }, { "epoch": 0.84, "learning_rate": 0.00012341842105263157, "loss": 0.5742, "step": 84440 }, { "epoch": 0.84, "learning_rate": 0.00012333947368421051, "loss": 0.5693, "step": 84450 }, { "epoch": 0.84, "learning_rate": 0.00012326052631578946, "loss": 0.567, "step": 84460 }, { "epoch": 0.84, "learning_rate": 0.0001231815789473684, "loss": 0.5529, "step": 84470 }, { "epoch": 0.84, "learning_rate": 0.00012310263157894736, "loss": 0.5767, "step": 84480 }, { "epoch": 0.84, "learning_rate": 0.0001230236842105263, "loss": 0.5642, "step": 84490 }, { "epoch": 0.84, "learning_rate": 0.00012294473684210526, "loss": 0.5659, "step": 84500 }, { "epoch": 0.85, "learning_rate": 0.0001228657894736842, "loss": 0.559, "step": 84510 }, { "epoch": 0.85, "learning_rate": 0.00012278684210526315, "loss": 0.5606, "step": 84520 }, { "epoch": 0.85, "learning_rate": 0.0001227078947368421, "loss": 0.5623, "step": 84530 }, { "epoch": 0.85, "learning_rate": 0.00012262894736842105, "loss": 0.5589, "step": 84540 }, { "epoch": 0.85, "learning_rate": 0.0001225578947368421, "loss": 0.5546, "step": 84550 }, { "epoch": 0.85, "learning_rate": 0.00012247894736842105, "loss": 0.5621, "step": 84560 }, { "epoch": 0.85, "learning_rate": 0.0001224, "loss": 0.5675, "step": 84570 }, { "epoch": 0.85, "learning_rate": 0.00012232105263157894, "loss": 0.5709, "step": 84580 }, { "epoch": 0.85, "learning_rate": 0.0001222421052631579, "loss": 0.5649, "step": 84590 }, { "epoch": 0.85, "learning_rate": 0.00012216315789473684, "loss": 0.5662, "step": 84600 }, { "epoch": 0.85, "learning_rate": 0.00012208421052631576, "loss": 0.567, "step": 84610 }, { "epoch": 0.85, "learning_rate": 0.00012200526315789474, "loss": 0.574, "step": 84620 }, { "epoch": 0.85, "learning_rate": 0.00012192631578947368, "loss": 0.5738, "step": 84630 }, { "epoch": 0.85, "learning_rate": 0.00012184736842105262, "loss": 0.5599, "step": 84640 }, { "epoch": 0.85, "learning_rate": 0.00012176842105263157, "loss": 0.573, "step": 84650 }, { "epoch": 0.85, "learning_rate": 0.00012168947368421051, "loss": 0.5676, "step": 84660 }, { "epoch": 0.85, "learning_rate": 0.00012161052631578946, "loss": 0.576, "step": 84670 }, { "epoch": 0.85, "learning_rate": 0.00012153157894736841, "loss": 0.5811, "step": 84680 }, { "epoch": 0.85, "learning_rate": 0.00012145263157894736, "loss": 0.5674, "step": 84690 }, { "epoch": 0.85, "learning_rate": 0.0001213736842105263, "loss": 0.5682, "step": 84700 }, { "epoch": 0.85, "learning_rate": 0.00012129473684210524, "loss": 0.5699, "step": 84710 }, { "epoch": 0.85, "learning_rate": 0.0001212157894736842, "loss": 0.5615, "step": 84720 }, { "epoch": 0.85, "learning_rate": 0.00012113684210526315, "loss": 0.5685, "step": 84730 }, { "epoch": 0.85, "learning_rate": 0.00012105789473684209, "loss": 0.5693, "step": 84740 }, { "epoch": 0.85, "learning_rate": 0.00012097894736842104, "loss": 0.5577, "step": 84750 }, { "epoch": 0.85, "learning_rate": 0.0001209, "loss": 0.5568, "step": 84760 }, { "epoch": 0.85, "learning_rate": 0.00012082105263157895, "loss": 0.5626, "step": 84770 }, { "epoch": 0.85, "learning_rate": 0.00012074210526315788, "loss": 0.556, "step": 84780 }, { "epoch": 0.85, "learning_rate": 0.00012066315789473683, "loss": 0.5557, "step": 84790 }, { "epoch": 0.85, "learning_rate": 0.00012058421052631578, "loss": 0.5754, "step": 84800 }, { "epoch": 0.85, "learning_rate": 0.00012050526315789473, "loss": 0.5728, "step": 84810 }, { "epoch": 0.85, "learning_rate": 0.00012042631578947367, "loss": 0.5641, "step": 84820 }, { "epoch": 0.85, "learning_rate": 0.00012034736842105262, "loss": 0.5645, "step": 84830 }, { "epoch": 0.85, "learning_rate": 0.00012026842105263157, "loss": 0.5695, "step": 84840 }, { "epoch": 0.85, "learning_rate": 0.0001201894736842105, "loss": 0.5729, "step": 84850 }, { "epoch": 0.85, "learning_rate": 0.00012011052631578947, "loss": 0.5728, "step": 84860 }, { "epoch": 0.85, "learning_rate": 0.00012003157894736842, "loss": 0.5681, "step": 84870 }, { "epoch": 0.85, "learning_rate": 0.00011995263157894735, "loss": 0.5715, "step": 84880 }, { "epoch": 0.85, "learning_rate": 0.0001198736842105263, "loss": 0.5862, "step": 84890 }, { "epoch": 0.85, "learning_rate": 0.00011979473684210526, "loss": 0.5762, "step": 84900 }, { "epoch": 0.85, "learning_rate": 0.00011971578947368421, "loss": 0.5818, "step": 84910 }, { "epoch": 0.85, "learning_rate": 0.00011963684210526314, "loss": 0.5682, "step": 84920 }, { "epoch": 0.85, "learning_rate": 0.00011955789473684209, "loss": 0.5733, "step": 84930 }, { "epoch": 0.85, "learning_rate": 0.00011947894736842104, "loss": 0.5591, "step": 84940 }, { "epoch": 0.85, "learning_rate": 0.0001194, "loss": 0.5695, "step": 84950 }, { "epoch": 0.85, "learning_rate": 0.00011932105263157894, "loss": 0.5686, "step": 84960 }, { "epoch": 0.85, "learning_rate": 0.00011924210526315789, "loss": 0.5588, "step": 84970 }, { "epoch": 0.85, "learning_rate": 0.00011916315789473683, "loss": 0.5544, "step": 84980 }, { "epoch": 0.85, "learning_rate": 0.00011908421052631577, "loss": 0.5651, "step": 84990 }, { "epoch": 0.85, "learning_rate": 0.00011900526315789473, "loss": 0.5688, "step": 85000 }, { "epoch": 0.85, "eval_accuracy": 0.8806242189602479, "eval_loss": 0.54638671875, "eval_runtime": 98.7484, "eval_samples_per_second": 810.14, "eval_steps_per_second": 1.59, "step": 85000 }, { "epoch": 0.85, "learning_rate": 0.00011892631578947368, "loss": 0.5437, "step": 85010 }, { "epoch": 0.85, "learning_rate": 0.00011884736842105261, "loss": 0.5461, "step": 85020 }, { "epoch": 0.85, "learning_rate": 0.00011876842105263156, "loss": 0.5477, "step": 85030 }, { "epoch": 0.85, "learning_rate": 0.00011868947368421052, "loss": 0.5339, "step": 85040 }, { "epoch": 0.85, "learning_rate": 0.00011861052631578947, "loss": 0.5559, "step": 85050 }, { "epoch": 0.85, "learning_rate": 0.0001185315789473684, "loss": 0.5535, "step": 85060 }, { "epoch": 0.85, "learning_rate": 0.00011845263157894735, "loss": 0.5652, "step": 85070 }, { "epoch": 0.85, "learning_rate": 0.0001183736842105263, "loss": 0.5791, "step": 85080 }, { "epoch": 0.85, "learning_rate": 0.00011829473684210526, "loss": 0.5687, "step": 85090 }, { "epoch": 0.85, "learning_rate": 0.0001182157894736842, "loss": 0.5531, "step": 85100 }, { "epoch": 0.85, "learning_rate": 0.00011813684210526315, "loss": 0.5756, "step": 85110 }, { "epoch": 0.85, "learning_rate": 0.0001180578947368421, "loss": 0.577, "step": 85120 }, { "epoch": 0.85, "learning_rate": 0.00011797894736842103, "loss": 0.5682, "step": 85130 }, { "epoch": 0.85, "learning_rate": 0.00011789999999999999, "loss": 0.5811, "step": 85140 }, { "epoch": 0.85, "learning_rate": 0.00011782105263157894, "loss": 0.5629, "step": 85150 }, { "epoch": 0.85, "learning_rate": 0.00011774210526315788, "loss": 0.5632, "step": 85160 }, { "epoch": 0.85, "learning_rate": 0.00011766315789473682, "loss": 0.5655, "step": 85170 }, { "epoch": 0.85, "learning_rate": 0.00011758421052631579, "loss": 0.5734, "step": 85180 }, { "epoch": 0.85, "learning_rate": 0.00011750526315789473, "loss": 0.5627, "step": 85190 }, { "epoch": 0.85, "learning_rate": 0.00011742631578947367, "loss": 0.5543, "step": 85200 }, { "epoch": 0.85, "learning_rate": 0.00011734736842105262, "loss": 0.5608, "step": 85210 }, { "epoch": 0.85, "learning_rate": 0.00011726842105263157, "loss": 0.5596, "step": 85220 }, { "epoch": 0.85, "learning_rate": 0.00011718947368421053, "loss": 0.5672, "step": 85230 }, { "epoch": 0.85, "learning_rate": 0.00011711052631578946, "loss": 0.5619, "step": 85240 }, { "epoch": 0.85, "learning_rate": 0.00011703157894736841, "loss": 0.5518, "step": 85250 }, { "epoch": 0.85, "learning_rate": 0.00011695263157894736, "loss": 0.5648, "step": 85260 }, { "epoch": 0.85, "learning_rate": 0.00011687368421052632, "loss": 0.5722, "step": 85270 }, { "epoch": 0.85, "learning_rate": 0.00011679473684210526, "loss": 0.5628, "step": 85280 }, { "epoch": 0.85, "learning_rate": 0.0001167157894736842, "loss": 0.573, "step": 85290 }, { "epoch": 0.85, "learning_rate": 0.00011663684210526315, "loss": 0.5702, "step": 85300 }, { "epoch": 0.85, "learning_rate": 0.00011655789473684209, "loss": 0.5746, "step": 85310 }, { "epoch": 0.85, "learning_rate": 0.00011647894736842105, "loss": 0.5577, "step": 85320 }, { "epoch": 0.85, "learning_rate": 0.0001164, "loss": 0.5549, "step": 85330 }, { "epoch": 0.85, "learning_rate": 0.00011632105263157893, "loss": 0.5478, "step": 85340 }, { "epoch": 0.85, "learning_rate": 0.00011624210526315788, "loss": 0.567, "step": 85350 }, { "epoch": 0.85, "learning_rate": 0.00011616315789473683, "loss": 0.5715, "step": 85360 }, { "epoch": 0.85, "learning_rate": 0.00011608421052631579, "loss": 0.5782, "step": 85370 }, { "epoch": 0.85, "learning_rate": 0.00011600526315789472, "loss": 0.5762, "step": 85380 }, { "epoch": 0.85, "learning_rate": 0.00011592631578947367, "loss": 0.5778, "step": 85390 }, { "epoch": 0.85, "learning_rate": 0.00011584736842105262, "loss": 0.5692, "step": 85400 }, { "epoch": 0.85, "learning_rate": 0.00011576842105263158, "loss": 0.5604, "step": 85410 }, { "epoch": 0.85, "learning_rate": 0.00011568947368421052, "loss": 0.5752, "step": 85420 }, { "epoch": 0.85, "learning_rate": 0.00011561052631578947, "loss": 0.5626, "step": 85430 }, { "epoch": 0.85, "learning_rate": 0.00011553157894736841, "loss": 0.5703, "step": 85440 }, { "epoch": 0.85, "learning_rate": 0.00011545263157894735, "loss": 0.5747, "step": 85450 }, { "epoch": 0.85, "learning_rate": 0.00011537368421052631, "loss": 0.5706, "step": 85460 }, { "epoch": 0.85, "learning_rate": 0.00011529473684210526, "loss": 0.5577, "step": 85470 }, { "epoch": 0.85, "learning_rate": 0.0001152157894736842, "loss": 0.56, "step": 85480 }, { "epoch": 0.85, "learning_rate": 0.00011513684210526314, "loss": 0.5659, "step": 85490 }, { "epoch": 0.85, "learning_rate": 0.00011505789473684209, "loss": 0.5639, "step": 85500 }, { "epoch": 0.86, "learning_rate": 0.00011497894736842105, "loss": 0.5692, "step": 85510 }, { "epoch": 0.86, "learning_rate": 0.00011489999999999999, "loss": 0.5699, "step": 85520 }, { "epoch": 0.86, "learning_rate": 0.00011482105263157894, "loss": 0.5704, "step": 85530 }, { "epoch": 0.86, "learning_rate": 0.00011474210526315788, "loss": 0.5697, "step": 85540 }, { "epoch": 0.86, "learning_rate": 0.00011466315789473685, "loss": 0.572, "step": 85550 }, { "epoch": 0.86, "learning_rate": 0.00011459210526315788, "loss": 0.5686, "step": 85560 }, { "epoch": 0.86, "learning_rate": 0.00011451315789473684, "loss": 0.5621, "step": 85570 }, { "epoch": 0.86, "learning_rate": 0.00011443421052631578, "loss": 0.5729, "step": 85580 }, { "epoch": 0.86, "learning_rate": 0.00011435526315789473, "loss": 0.5724, "step": 85590 }, { "epoch": 0.86, "learning_rate": 0.00011427631578947367, "loss": 0.5769, "step": 85600 }, { "epoch": 0.86, "learning_rate": 0.00011419736842105262, "loss": 0.5802, "step": 85610 }, { "epoch": 0.86, "learning_rate": 0.00011411842105263157, "loss": 0.5811, "step": 85620 }, { "epoch": 0.86, "learning_rate": 0.00011403947368421052, "loss": 0.5598, "step": 85630 }, { "epoch": 0.86, "learning_rate": 0.00011396052631578947, "loss": 0.5693, "step": 85640 }, { "epoch": 0.86, "learning_rate": 0.0001138815789473684, "loss": 0.5682, "step": 85650 }, { "epoch": 0.86, "learning_rate": 0.00011380263157894735, "loss": 0.5734, "step": 85660 }, { "epoch": 0.86, "learning_rate": 0.00011372368421052631, "loss": 0.5715, "step": 85670 }, { "epoch": 0.86, "learning_rate": 0.00011364473684210526, "loss": 0.5658, "step": 85680 }, { "epoch": 0.86, "learning_rate": 0.0001135657894736842, "loss": 0.5767, "step": 85690 }, { "epoch": 0.86, "learning_rate": 0.00011348684210526314, "loss": 0.5685, "step": 85700 }, { "epoch": 0.86, "learning_rate": 0.0001134078947368421, "loss": 0.5574, "step": 85710 }, { "epoch": 0.86, "learning_rate": 0.00011332894736842105, "loss": 0.5676, "step": 85720 }, { "epoch": 0.86, "learning_rate": 0.00011324999999999999, "loss": 0.5559, "step": 85730 }, { "epoch": 0.86, "learning_rate": 0.00011317105263157894, "loss": 0.5546, "step": 85740 }, { "epoch": 0.86, "learning_rate": 0.00011309210526315788, "loss": 0.554, "step": 85750 }, { "epoch": 0.86, "learning_rate": 0.00011301315789473683, "loss": 0.5602, "step": 85760 }, { "epoch": 0.86, "learning_rate": 0.00011293421052631578, "loss": 0.5702, "step": 85770 }, { "epoch": 0.86, "learning_rate": 0.00011285526315789473, "loss": 0.5716, "step": 85780 }, { "epoch": 0.86, "learning_rate": 0.00011277631578947366, "loss": 0.5761, "step": 85790 }, { "epoch": 0.86, "learning_rate": 0.00011269736842105261, "loss": 0.5572, "step": 85800 }, { "epoch": 0.86, "learning_rate": 0.00011261842105263157, "loss": 0.5822, "step": 85810 }, { "epoch": 0.86, "learning_rate": 0.00011253947368421052, "loss": 0.5724, "step": 85820 }, { "epoch": 0.86, "learning_rate": 0.00011246052631578946, "loss": 0.5678, "step": 85830 }, { "epoch": 0.86, "learning_rate": 0.0001123815789473684, "loss": 0.5632, "step": 85840 }, { "epoch": 0.86, "learning_rate": 0.00011230263157894737, "loss": 0.5662, "step": 85850 }, { "epoch": 0.86, "learning_rate": 0.00011222368421052632, "loss": 0.5786, "step": 85860 }, { "epoch": 0.86, "learning_rate": 0.00011214473684210525, "loss": 0.5667, "step": 85870 }, { "epoch": 0.86, "learning_rate": 0.0001120657894736842, "loss": 0.5742, "step": 85880 }, { "epoch": 0.86, "learning_rate": 0.00011198684210526315, "loss": 0.5646, "step": 85890 }, { "epoch": 0.86, "learning_rate": 0.0001119078947368421, "loss": 0.5775, "step": 85900 }, { "epoch": 0.86, "learning_rate": 0.00011182894736842104, "loss": 0.5587, "step": 85910 }, { "epoch": 0.86, "learning_rate": 0.00011174999999999999, "loss": 0.5676, "step": 85920 }, { "epoch": 0.86, "learning_rate": 0.00011167105263157893, "loss": 0.5542, "step": 85930 }, { "epoch": 0.86, "learning_rate": 0.00011159210526315788, "loss": 0.5562, "step": 85940 }, { "epoch": 0.86, "learning_rate": 0.00011151315789473684, "loss": 0.555, "step": 85950 }, { "epoch": 0.86, "learning_rate": 0.00011143421052631579, "loss": 0.5691, "step": 85960 }, { "epoch": 0.86, "learning_rate": 0.00011135526315789472, "loss": 0.5806, "step": 85970 }, { "epoch": 0.86, "learning_rate": 0.00011127631578947367, "loss": 0.5658, "step": 85980 }, { "epoch": 0.86, "learning_rate": 0.00011119736842105263, "loss": 0.5802, "step": 85990 }, { "epoch": 0.86, "learning_rate": 0.00011111842105263158, "loss": 0.5685, "step": 86000 }, { "epoch": 0.86, "learning_rate": 0.00011103947368421051, "loss": 0.5868, "step": 86010 }, { "epoch": 0.86, "learning_rate": 0.00011096052631578946, "loss": 0.5774, "step": 86020 }, { "epoch": 0.86, "learning_rate": 0.00011088157894736841, "loss": 0.5792, "step": 86030 }, { "epoch": 0.86, "learning_rate": 0.00011080263157894736, "loss": 0.572, "step": 86040 }, { "epoch": 0.86, "learning_rate": 0.0001107236842105263, "loss": 0.5763, "step": 86050 }, { "epoch": 0.86, "learning_rate": 0.00011064473684210525, "loss": 0.5744, "step": 86060 }, { "epoch": 0.86, "learning_rate": 0.0001105657894736842, "loss": 0.5807, "step": 86070 }, { "epoch": 0.86, "learning_rate": 0.00011048684210526314, "loss": 0.578, "step": 86080 }, { "epoch": 0.86, "learning_rate": 0.0001104078947368421, "loss": 0.5676, "step": 86090 }, { "epoch": 0.86, "learning_rate": 0.00011032894736842105, "loss": 0.5795, "step": 86100 }, { "epoch": 0.86, "learning_rate": 0.00011024999999999998, "loss": 0.5555, "step": 86110 }, { "epoch": 0.86, "learning_rate": 0.00011017105263157893, "loss": 0.5618, "step": 86120 }, { "epoch": 0.86, "learning_rate": 0.00011009210526315789, "loss": 0.572, "step": 86130 }, { "epoch": 0.86, "learning_rate": 0.00011001315789473684, "loss": 0.5662, "step": 86140 }, { "epoch": 0.86, "learning_rate": 0.00010993421052631578, "loss": 0.5739, "step": 86150 }, { "epoch": 0.86, "learning_rate": 0.00010985526315789472, "loss": 0.5603, "step": 86160 }, { "epoch": 0.86, "learning_rate": 0.00010977631578947367, "loss": 0.5634, "step": 86170 }, { "epoch": 0.86, "learning_rate": 0.00010969736842105263, "loss": 0.5684, "step": 86180 }, { "epoch": 0.86, "learning_rate": 0.00010961842105263157, "loss": 0.5711, "step": 86190 }, { "epoch": 0.86, "learning_rate": 0.00010953947368421052, "loss": 0.5731, "step": 86200 }, { "epoch": 0.86, "learning_rate": 0.00010946052631578947, "loss": 0.5663, "step": 86210 }, { "epoch": 0.86, "learning_rate": 0.0001093815789473684, "loss": 0.569, "step": 86220 }, { "epoch": 0.86, "learning_rate": 0.00010930263157894736, "loss": 0.566, "step": 86230 }, { "epoch": 0.86, "learning_rate": 0.00010922368421052631, "loss": 0.5642, "step": 86240 }, { "epoch": 0.86, "learning_rate": 0.00010914473684210525, "loss": 0.5614, "step": 86250 }, { "epoch": 0.86, "learning_rate": 0.0001090657894736842, "loss": 0.554, "step": 86260 }, { "epoch": 0.86, "learning_rate": 0.00010898684210526316, "loss": 0.5498, "step": 86270 }, { "epoch": 0.86, "learning_rate": 0.0001089078947368421, "loss": 0.5544, "step": 86280 }, { "epoch": 0.86, "learning_rate": 0.00010882894736842104, "loss": 0.5617, "step": 86290 }, { "epoch": 0.86, "learning_rate": 0.00010874999999999999, "loss": 0.5575, "step": 86300 }, { "epoch": 0.86, "learning_rate": 0.00010867105263157894, "loss": 0.5585, "step": 86310 }, { "epoch": 0.86, "learning_rate": 0.0001085921052631579, "loss": 0.5603, "step": 86320 }, { "epoch": 0.86, "learning_rate": 0.00010851315789473683, "loss": 0.5498, "step": 86330 }, { "epoch": 0.86, "learning_rate": 0.00010843421052631578, "loss": 0.5574, "step": 86340 }, { "epoch": 0.86, "learning_rate": 0.00010835526315789473, "loss": 0.5768, "step": 86350 }, { "epoch": 0.86, "learning_rate": 0.00010827631578947366, "loss": 0.5804, "step": 86360 }, { "epoch": 0.86, "learning_rate": 0.00010819736842105263, "loss": 0.5593, "step": 86370 }, { "epoch": 0.86, "learning_rate": 0.00010811842105263157, "loss": 0.5625, "step": 86380 }, { "epoch": 0.86, "learning_rate": 0.00010803947368421051, "loss": 0.5592, "step": 86390 }, { "epoch": 0.86, "learning_rate": 0.00010796052631578946, "loss": 0.5645, "step": 86400 }, { "epoch": 0.86, "learning_rate": 0.00010788157894736842, "loss": 0.5598, "step": 86410 }, { "epoch": 0.86, "learning_rate": 0.00010780263157894737, "loss": 0.567, "step": 86420 }, { "epoch": 0.86, "learning_rate": 0.0001077236842105263, "loss": 0.5729, "step": 86430 }, { "epoch": 0.86, "learning_rate": 0.00010764473684210525, "loss": 0.5636, "step": 86440 }, { "epoch": 0.86, "learning_rate": 0.0001075657894736842, "loss": 0.5656, "step": 86450 }, { "epoch": 0.86, "learning_rate": 0.00010748684210526316, "loss": 0.571, "step": 86460 }, { "epoch": 0.86, "learning_rate": 0.0001074078947368421, "loss": 0.5764, "step": 86470 }, { "epoch": 0.86, "learning_rate": 0.00010732894736842104, "loss": 0.5652, "step": 86480 }, { "epoch": 0.86, "learning_rate": 0.00010724999999999999, "loss": 0.5716, "step": 86490 }, { "epoch": 0.86, "learning_rate": 0.00010717105263157893, "loss": 0.572, "step": 86500 }, { "epoch": 0.87, "learning_rate": 0.00010709210526315789, "loss": 0.5799, "step": 86510 }, { "epoch": 0.87, "learning_rate": 0.00010701315789473684, "loss": 0.5555, "step": 86520 }, { "epoch": 0.87, "learning_rate": 0.00010693421052631578, "loss": 0.568, "step": 86530 }, { "epoch": 0.87, "learning_rate": 0.00010685526315789472, "loss": 0.5589, "step": 86540 }, { "epoch": 0.87, "learning_rate": 0.00010677631578947368, "loss": 0.5774, "step": 86550 }, { "epoch": 0.87, "learning_rate": 0.00010670526315789472, "loss": 0.5726, "step": 86560 }, { "epoch": 0.87, "learning_rate": 0.00010662631578947368, "loss": 0.5817, "step": 86570 }, { "epoch": 0.87, "learning_rate": 0.00010654736842105263, "loss": 0.5636, "step": 86580 }, { "epoch": 0.87, "learning_rate": 0.00010646842105263157, "loss": 0.5581, "step": 86590 }, { "epoch": 0.87, "learning_rate": 0.00010638947368421051, "loss": 0.5577, "step": 86600 }, { "epoch": 0.87, "learning_rate": 0.00010631052631578946, "loss": 0.5483, "step": 86610 }, { "epoch": 0.87, "learning_rate": 0.00010623157894736842, "loss": 0.5685, "step": 86620 }, { "epoch": 0.87, "learning_rate": 0.00010615263157894737, "loss": 0.5518, "step": 86630 }, { "epoch": 0.87, "learning_rate": 0.0001060736842105263, "loss": 0.5655, "step": 86640 }, { "epoch": 0.87, "learning_rate": 0.00010599473684210525, "loss": 0.5541, "step": 86650 }, { "epoch": 0.87, "learning_rate": 0.00010591578947368421, "loss": 0.5508, "step": 86660 }, { "epoch": 0.87, "learning_rate": 0.00010583684210526315, "loss": 0.5664, "step": 86670 }, { "epoch": 0.87, "learning_rate": 0.0001057578947368421, "loss": 0.5632, "step": 86680 }, { "epoch": 0.87, "learning_rate": 0.00010567894736842104, "loss": 0.5748, "step": 86690 }, { "epoch": 0.87, "learning_rate": 0.00010559999999999998, "loss": 0.5573, "step": 86700 }, { "epoch": 0.87, "learning_rate": 0.00010552105263157894, "loss": 0.5541, "step": 86710 }, { "epoch": 0.87, "learning_rate": 0.00010544210526315789, "loss": 0.5653, "step": 86720 }, { "epoch": 0.87, "learning_rate": 0.00010536315789473684, "loss": 0.5746, "step": 86730 }, { "epoch": 0.87, "learning_rate": 0.00010528421052631577, "loss": 0.5615, "step": 86740 }, { "epoch": 0.87, "learning_rate": 0.00010520526315789472, "loss": 0.5608, "step": 86750 }, { "epoch": 0.87, "learning_rate": 0.00010512631578947368, "loss": 0.5663, "step": 86760 }, { "epoch": 0.87, "learning_rate": 0.00010504736842105263, "loss": 0.5796, "step": 86770 }, { "epoch": 0.87, "learning_rate": 0.00010496842105263156, "loss": 0.5762, "step": 86780 }, { "epoch": 0.87, "learning_rate": 0.00010488947368421051, "loss": 0.5784, "step": 86790 }, { "epoch": 0.87, "learning_rate": 0.00010481052631578947, "loss": 0.5815, "step": 86800 }, { "epoch": 0.87, "learning_rate": 0.00010473157894736841, "loss": 0.5618, "step": 86810 }, { "epoch": 0.87, "learning_rate": 0.00010465263157894736, "loss": 0.563, "step": 86820 }, { "epoch": 0.87, "learning_rate": 0.0001045736842105263, "loss": 0.5587, "step": 86830 }, { "epoch": 0.87, "learning_rate": 0.00010449473684210525, "loss": 0.5658, "step": 86840 }, { "epoch": 0.87, "learning_rate": 0.0001044157894736842, "loss": 0.5656, "step": 86850 }, { "epoch": 0.87, "learning_rate": 0.00010433684210526315, "loss": 0.5544, "step": 86860 }, { "epoch": 0.87, "learning_rate": 0.0001042578947368421, "loss": 0.5633, "step": 86870 }, { "epoch": 0.87, "learning_rate": 0.00010417894736842103, "loss": 0.5526, "step": 86880 }, { "epoch": 0.87, "learning_rate": 0.00010409999999999998, "loss": 0.5532, "step": 86890 }, { "epoch": 0.87, "learning_rate": 0.00010402105263157894, "loss": 0.5569, "step": 86900 }, { "epoch": 0.87, "learning_rate": 0.00010394210526315789, "loss": 0.5502, "step": 86910 }, { "epoch": 0.87, "learning_rate": 0.00010386315789473683, "loss": 0.557, "step": 86920 }, { "epoch": 0.87, "learning_rate": 0.00010378421052631578, "loss": 0.5648, "step": 86930 }, { "epoch": 0.87, "learning_rate": 0.00010370526315789474, "loss": 0.5772, "step": 86940 }, { "epoch": 0.87, "learning_rate": 0.00010362631578947369, "loss": 0.5599, "step": 86950 }, { "epoch": 0.87, "learning_rate": 0.00010354736842105262, "loss": 0.5584, "step": 86960 }, { "epoch": 0.87, "learning_rate": 0.00010346842105263157, "loss": 0.5611, "step": 86970 }, { "epoch": 0.87, "learning_rate": 0.00010338947368421052, "loss": 0.5718, "step": 86980 }, { "epoch": 0.87, "learning_rate": 0.00010331052631578947, "loss": 0.5668, "step": 86990 }, { "epoch": 0.87, "learning_rate": 0.00010323157894736841, "loss": 0.5624, "step": 87000 }, { "epoch": 0.87, "learning_rate": 0.00010315263157894736, "loss": 0.5586, "step": 87010 }, { "epoch": 0.87, "learning_rate": 0.0001030736842105263, "loss": 0.5577, "step": 87020 }, { "epoch": 0.87, "learning_rate": 0.00010299473684210525, "loss": 0.56, "step": 87030 }, { "epoch": 0.87, "learning_rate": 0.00010291578947368421, "loss": 0.5702, "step": 87040 }, { "epoch": 0.87, "learning_rate": 0.00010283684210526316, "loss": 0.565, "step": 87050 }, { "epoch": 0.87, "learning_rate": 0.00010275789473684209, "loss": 0.5633, "step": 87060 }, { "epoch": 0.87, "learning_rate": 0.00010267894736842104, "loss": 0.5588, "step": 87070 }, { "epoch": 0.87, "learning_rate": 0.0001026, "loss": 0.5518, "step": 87080 }, { "epoch": 0.87, "learning_rate": 0.00010252105263157895, "loss": 0.5547, "step": 87090 }, { "epoch": 0.87, "learning_rate": 0.00010244210526315788, "loss": 0.5612, "step": 87100 }, { "epoch": 0.87, "learning_rate": 0.00010236315789473683, "loss": 0.5587, "step": 87110 }, { "epoch": 0.87, "learning_rate": 0.00010228421052631578, "loss": 0.5513, "step": 87120 }, { "epoch": 0.87, "learning_rate": 0.00010220526315789473, "loss": 0.5647, "step": 87130 }, { "epoch": 0.87, "learning_rate": 0.00010212631578947368, "loss": 0.557, "step": 87140 }, { "epoch": 0.87, "learning_rate": 0.00010204736842105262, "loss": 0.5613, "step": 87150 }, { "epoch": 0.87, "learning_rate": 0.00010196842105263156, "loss": 0.5687, "step": 87160 }, { "epoch": 0.87, "learning_rate": 0.00010188947368421051, "loss": 0.5627, "step": 87170 }, { "epoch": 0.87, "learning_rate": 0.00010181052631578947, "loss": 0.5628, "step": 87180 }, { "epoch": 0.87, "learning_rate": 0.00010173157894736842, "loss": 0.5652, "step": 87190 }, { "epoch": 0.87, "learning_rate": 0.00010165263157894735, "loss": 0.5752, "step": 87200 }, { "epoch": 0.87, "learning_rate": 0.0001015736842105263, "loss": 0.561, "step": 87210 }, { "epoch": 0.87, "learning_rate": 0.00010149473684210526, "loss": 0.5793, "step": 87220 }, { "epoch": 0.87, "learning_rate": 0.00010141578947368421, "loss": 0.5658, "step": 87230 }, { "epoch": 0.87, "learning_rate": 0.00010133684210526315, "loss": 0.5796, "step": 87240 }, { "epoch": 0.87, "learning_rate": 0.0001012578947368421, "loss": 0.5635, "step": 87250 }, { "epoch": 0.87, "learning_rate": 0.00010117894736842104, "loss": 0.563, "step": 87260 }, { "epoch": 0.87, "learning_rate": 0.0001011, "loss": 0.5685, "step": 87270 }, { "epoch": 0.87, "learning_rate": 0.00010102105263157894, "loss": 0.5507, "step": 87280 }, { "epoch": 0.87, "learning_rate": 0.00010094210526315789, "loss": 0.5601, "step": 87290 }, { "epoch": 0.87, "learning_rate": 0.00010086315789473684, "loss": 0.553, "step": 87300 }, { "epoch": 0.87, "learning_rate": 0.00010078421052631577, "loss": 0.5715, "step": 87310 }, { "epoch": 0.87, "learning_rate": 0.00010070526315789473, "loss": 0.5684, "step": 87320 }, { "epoch": 0.87, "learning_rate": 0.00010062631578947368, "loss": 0.5795, "step": 87330 }, { "epoch": 0.87, "learning_rate": 0.00010054736842105262, "loss": 0.568, "step": 87340 }, { "epoch": 0.87, "learning_rate": 0.00010046842105263156, "loss": 0.5803, "step": 87350 }, { "epoch": 0.87, "learning_rate": 0.00010038947368421053, "loss": 0.5711, "step": 87360 }, { "epoch": 0.87, "learning_rate": 0.00010031052631578947, "loss": 0.5625, "step": 87370 }, { "epoch": 0.87, "learning_rate": 0.00010023157894736841, "loss": 0.5512, "step": 87380 }, { "epoch": 0.87, "learning_rate": 0.00010015263157894736, "loss": 0.5513, "step": 87390 }, { "epoch": 0.87, "learning_rate": 0.0001000736842105263, "loss": 0.5541, "step": 87400 }, { "epoch": 0.87, "learning_rate": 9.999473684210527e-05, "loss": 0.5676, "step": 87410 }, { "epoch": 0.87, "learning_rate": 9.99157894736842e-05, "loss": 0.5792, "step": 87420 }, { "epoch": 0.87, "learning_rate": 9.983684210526315e-05, "loss": 0.5725, "step": 87430 }, { "epoch": 0.87, "learning_rate": 9.97578947368421e-05, "loss": 0.5694, "step": 87440 }, { "epoch": 0.87, "learning_rate": 9.967894736842103e-05, "loss": 0.5583, "step": 87450 }, { "epoch": 0.87, "learning_rate": 9.96e-05, "loss": 0.5669, "step": 87460 }, { "epoch": 0.87, "learning_rate": 9.952105263157894e-05, "loss": 0.571, "step": 87470 }, { "epoch": 0.87, "learning_rate": 9.944210526315788e-05, "loss": 0.5622, "step": 87480 }, { "epoch": 0.87, "learning_rate": 9.936315789473683e-05, "loss": 0.5697, "step": 87490 }, { "epoch": 0.88, "learning_rate": 9.928421052631579e-05, "loss": 0.5629, "step": 87500 }, { "epoch": 0.88, "eval_accuracy": 0.8814306303350578, "eval_loss": 0.541015625, "eval_runtime": 97.7663, "eval_samples_per_second": 818.278, "eval_steps_per_second": 1.606, "step": 87500 }, { "epoch": 0.88, "learning_rate": 9.920526315789474e-05, "loss": 0.5606, "step": 87510 }, { "epoch": 0.88, "learning_rate": 9.912631578947367e-05, "loss": 0.5534, "step": 87520 }, { "epoch": 0.88, "learning_rate": 9.904736842105262e-05, "loss": 0.5601, "step": 87530 }, { "epoch": 0.88, "learning_rate": 9.896842105263157e-05, "loss": 0.5721, "step": 87540 }, { "epoch": 0.88, "learning_rate": 9.888947368421053e-05, "loss": 0.5701, "step": 87550 }, { "epoch": 0.88, "learning_rate": 9.881842105263156e-05, "loss": 0.5598, "step": 87560 }, { "epoch": 0.88, "learning_rate": 9.873947368421053e-05, "loss": 0.57, "step": 87570 }, { "epoch": 0.88, "learning_rate": 9.866052631578946e-05, "loss": 0.5681, "step": 87580 }, { "epoch": 0.88, "learning_rate": 9.858157894736841e-05, "loss": 0.5678, "step": 87590 }, { "epoch": 0.88, "learning_rate": 9.850263157894736e-05, "loss": 0.5571, "step": 87600 }, { "epoch": 0.88, "learning_rate": 9.84236842105263e-05, "loss": 0.5652, "step": 87610 }, { "epoch": 0.88, "learning_rate": 9.834473684210525e-05, "loss": 0.5726, "step": 87620 }, { "epoch": 0.88, "learning_rate": 9.82657894736842e-05, "loss": 0.565, "step": 87630 }, { "epoch": 0.88, "learning_rate": 9.818684210526315e-05, "loss": 0.5593, "step": 87640 }, { "epoch": 0.88, "learning_rate": 9.810789473684209e-05, "loss": 0.5588, "step": 87650 }, { "epoch": 0.88, "learning_rate": 9.802894736842105e-05, "loss": 0.5545, "step": 87660 }, { "epoch": 0.88, "learning_rate": 9.795e-05, "loss": 0.5685, "step": 87670 }, { "epoch": 0.88, "learning_rate": 9.787105263157894e-05, "loss": 0.5628, "step": 87680 }, { "epoch": 0.88, "learning_rate": 9.779210526315788e-05, "loss": 0.5589, "step": 87690 }, { "epoch": 0.88, "learning_rate": 9.771315789473683e-05, "loss": 0.5618, "step": 87700 }, { "epoch": 0.88, "learning_rate": 9.763421052631579e-05, "loss": 0.5652, "step": 87710 }, { "epoch": 0.88, "learning_rate": 9.755526315789474e-05, "loss": 0.5586, "step": 87720 }, { "epoch": 0.88, "learning_rate": 9.747631578947367e-05, "loss": 0.5566, "step": 87730 }, { "epoch": 0.88, "learning_rate": 9.739736842105262e-05, "loss": 0.5615, "step": 87740 }, { "epoch": 0.88, "learning_rate": 9.731842105263157e-05, "loss": 0.5643, "step": 87750 }, { "epoch": 0.88, "learning_rate": 9.723947368421052e-05, "loss": 0.5548, "step": 87760 }, { "epoch": 0.88, "learning_rate": 9.716052631578947e-05, "loss": 0.5805, "step": 87770 }, { "epoch": 0.88, "learning_rate": 9.708157894736841e-05, "loss": 0.5654, "step": 87780 }, { "epoch": 0.88, "learning_rate": 9.700263157894735e-05, "loss": 0.573, "step": 87790 }, { "epoch": 0.88, "learning_rate": 9.692368421052631e-05, "loss": 0.5512, "step": 87800 }, { "epoch": 0.88, "learning_rate": 9.684473684210526e-05, "loss": 0.5689, "step": 87810 }, { "epoch": 0.88, "learning_rate": 9.67657894736842e-05, "loss": 0.5723, "step": 87820 }, { "epoch": 0.88, "learning_rate": 9.668684210526314e-05, "loss": 0.5695, "step": 87830 }, { "epoch": 0.88, "learning_rate": 9.660789473684209e-05, "loss": 0.5572, "step": 87840 }, { "epoch": 0.88, "learning_rate": 9.652894736842105e-05, "loss": 0.5727, "step": 87850 }, { "epoch": 0.88, "learning_rate": 9.645e-05, "loss": 0.5558, "step": 87860 }, { "epoch": 0.88, "learning_rate": 9.637105263157893e-05, "loss": 0.5543, "step": 87870 }, { "epoch": 0.88, "learning_rate": 9.629210526315788e-05, "loss": 0.5576, "step": 87880 }, { "epoch": 0.88, "learning_rate": 9.621315789473684e-05, "loss": 0.5601, "step": 87890 }, { "epoch": 0.88, "learning_rate": 9.613421052631578e-05, "loss": 0.5651, "step": 87900 }, { "epoch": 0.88, "learning_rate": 9.605526315789473e-05, "loss": 0.5664, "step": 87910 }, { "epoch": 0.88, "learning_rate": 9.597631578947368e-05, "loss": 0.5646, "step": 87920 }, { "epoch": 0.88, "learning_rate": 9.589736842105261e-05, "loss": 0.5531, "step": 87930 }, { "epoch": 0.88, "learning_rate": 9.581842105263157e-05, "loss": 0.5607, "step": 87940 }, { "epoch": 0.88, "learning_rate": 9.573947368421052e-05, "loss": 0.5465, "step": 87950 }, { "epoch": 0.88, "learning_rate": 9.566052631578947e-05, "loss": 0.5573, "step": 87960 }, { "epoch": 0.88, "learning_rate": 9.55815789473684e-05, "loss": 0.5427, "step": 87970 }, { "epoch": 0.88, "learning_rate": 9.550263157894735e-05, "loss": 0.5453, "step": 87980 }, { "epoch": 0.88, "learning_rate": 9.542368421052631e-05, "loss": 0.5498, "step": 87990 }, { "epoch": 0.88, "learning_rate": 9.534473684210526e-05, "loss": 0.5599, "step": 88000 }, { "epoch": 0.88, "learning_rate": 9.52657894736842e-05, "loss": 0.5659, "step": 88010 }, { "epoch": 0.88, "learning_rate": 9.518684210526315e-05, "loss": 0.5546, "step": 88020 }, { "epoch": 0.88, "learning_rate": 9.510789473684211e-05, "loss": 0.5659, "step": 88030 }, { "epoch": 0.88, "learning_rate": 9.502894736842106e-05, "loss": 0.5566, "step": 88040 }, { "epoch": 0.88, "learning_rate": 9.494999999999999e-05, "loss": 0.5683, "step": 88050 }, { "epoch": 0.88, "learning_rate": 9.487105263157894e-05, "loss": 0.5522, "step": 88060 }, { "epoch": 0.88, "learning_rate": 9.479210526315789e-05, "loss": 0.5766, "step": 88070 }, { "epoch": 0.88, "learning_rate": 9.471315789473684e-05, "loss": 0.5714, "step": 88080 }, { "epoch": 0.88, "learning_rate": 9.463421052631578e-05, "loss": 0.5489, "step": 88090 }, { "epoch": 0.88, "learning_rate": 9.456315789473685e-05, "loss": 0.5552, "step": 88100 }, { "epoch": 0.88, "learning_rate": 9.448421052631578e-05, "loss": 0.5597, "step": 88110 }, { "epoch": 0.88, "learning_rate": 9.440526315789473e-05, "loss": 0.5621, "step": 88120 }, { "epoch": 0.88, "learning_rate": 9.432631578947368e-05, "loss": 0.5716, "step": 88130 }, { "epoch": 0.88, "learning_rate": 9.424736842105261e-05, "loss": 0.5607, "step": 88140 }, { "epoch": 0.88, "learning_rate": 9.416842105263157e-05, "loss": 0.5636, "step": 88150 }, { "epoch": 0.88, "learning_rate": 9.408947368421052e-05, "loss": 0.5482, "step": 88160 }, { "epoch": 0.88, "learning_rate": 9.401052631578947e-05, "loss": 0.5493, "step": 88170 }, { "epoch": 0.88, "learning_rate": 9.39315789473684e-05, "loss": 0.5473, "step": 88180 }, { "epoch": 0.88, "learning_rate": 9.385263157894737e-05, "loss": 0.5527, "step": 88190 }, { "epoch": 0.88, "learning_rate": 9.377368421052631e-05, "loss": 0.5507, "step": 88200 }, { "epoch": 0.88, "learning_rate": 9.369473684210525e-05, "loss": 0.5551, "step": 88210 }, { "epoch": 0.88, "learning_rate": 9.36157894736842e-05, "loss": 0.5569, "step": 88220 }, { "epoch": 0.88, "learning_rate": 9.353684210526315e-05, "loss": 0.5631, "step": 88230 }, { "epoch": 0.88, "learning_rate": 9.345789473684211e-05, "loss": 0.5582, "step": 88240 }, { "epoch": 0.88, "learning_rate": 9.337894736842104e-05, "loss": 0.5558, "step": 88250 }, { "epoch": 0.88, "learning_rate": 9.329999999999999e-05, "loss": 0.5542, "step": 88260 }, { "epoch": 0.88, "learning_rate": 9.322105263157894e-05, "loss": 0.5714, "step": 88270 }, { "epoch": 0.88, "learning_rate": 9.314210526315787e-05, "loss": 0.5604, "step": 88280 }, { "epoch": 0.88, "learning_rate": 9.306315789473684e-05, "loss": 0.5693, "step": 88290 }, { "epoch": 0.88, "learning_rate": 9.298421052631578e-05, "loss": 0.5553, "step": 88300 }, { "epoch": 0.88, "learning_rate": 9.290526315789473e-05, "loss": 0.5649, "step": 88310 }, { "epoch": 0.88, "learning_rate": 9.282631578947367e-05, "loss": 0.5605, "step": 88320 }, { "epoch": 0.88, "learning_rate": 9.274736842105263e-05, "loss": 0.5625, "step": 88330 }, { "epoch": 0.88, "learning_rate": 9.266842105263158e-05, "loss": 0.5614, "step": 88340 }, { "epoch": 0.88, "learning_rate": 9.258947368421051e-05, "loss": 0.5655, "step": 88350 }, { "epoch": 0.88, "learning_rate": 9.251052631578946e-05, "loss": 0.5665, "step": 88360 }, { "epoch": 0.88, "learning_rate": 9.243157894736841e-05, "loss": 0.5502, "step": 88370 }, { "epoch": 0.88, "learning_rate": 9.235263157894737e-05, "loss": 0.5712, "step": 88380 }, { "epoch": 0.88, "learning_rate": 9.22736842105263e-05, "loss": 0.5596, "step": 88390 }, { "epoch": 0.88, "learning_rate": 9.219473684210525e-05, "loss": 0.565, "step": 88400 }, { "epoch": 0.88, "learning_rate": 9.21157894736842e-05, "loss": 0.5525, "step": 88410 }, { "epoch": 0.88, "learning_rate": 9.203684210526314e-05, "loss": 0.561, "step": 88420 }, { "epoch": 0.88, "learning_rate": 9.19578947368421e-05, "loss": 0.5502, "step": 88430 }, { "epoch": 0.88, "learning_rate": 9.187894736842105e-05, "loss": 0.5437, "step": 88440 }, { "epoch": 0.88, "learning_rate": 9.18e-05, "loss": 0.5445, "step": 88450 }, { "epoch": 0.88, "learning_rate": 9.172105263157893e-05, "loss": 0.563, "step": 88460 }, { "epoch": 0.88, "learning_rate": 9.164210526315789e-05, "loss": 0.5559, "step": 88470 }, { "epoch": 0.88, "learning_rate": 9.156315789473684e-05, "loss": 0.5428, "step": 88480 }, { "epoch": 0.88, "learning_rate": 9.148421052631579e-05, "loss": 0.5585, "step": 88490 }, { "epoch": 0.89, "learning_rate": 9.140526315789472e-05, "loss": 0.555, "step": 88500 }, { "epoch": 0.89, "learning_rate": 9.132631578947367e-05, "loss": 0.5511, "step": 88510 }, { "epoch": 0.89, "learning_rate": 9.124736842105263e-05, "loss": 0.5623, "step": 88520 }, { "epoch": 0.89, "learning_rate": 9.116842105263157e-05, "loss": 0.5565, "step": 88530 }, { "epoch": 0.89, "learning_rate": 9.108947368421052e-05, "loss": 0.5556, "step": 88540 }, { "epoch": 0.89, "learning_rate": 9.101052631578946e-05, "loss": 0.5597, "step": 88550 }, { "epoch": 0.89, "learning_rate": 9.09315789473684e-05, "loss": 0.5525, "step": 88560 }, { "epoch": 0.89, "learning_rate": 9.085263157894736e-05, "loss": 0.5513, "step": 88570 }, { "epoch": 0.89, "learning_rate": 9.077368421052631e-05, "loss": 0.5645, "step": 88580 }, { "epoch": 0.89, "learning_rate": 9.069473684210526e-05, "loss": 0.565, "step": 88590 }, { "epoch": 0.89, "learning_rate": 9.061578947368419e-05, "loss": 0.5631, "step": 88600 }, { "epoch": 0.89, "learning_rate": 9.053684210526315e-05, "loss": 0.5731, "step": 88610 }, { "epoch": 0.89, "learning_rate": 9.04578947368421e-05, "loss": 0.5634, "step": 88620 }, { "epoch": 0.89, "learning_rate": 9.037894736842105e-05, "loss": 0.5574, "step": 88630 }, { "epoch": 0.89, "learning_rate": 9.029999999999999e-05, "loss": 0.5612, "step": 88640 }, { "epoch": 0.89, "learning_rate": 9.022105263157893e-05, "loss": 0.5519, "step": 88650 }, { "epoch": 0.89, "learning_rate": 9.01421052631579e-05, "loss": 0.5633, "step": 88660 }, { "epoch": 0.89, "learning_rate": 9.006315789473683e-05, "loss": 0.5493, "step": 88670 }, { "epoch": 0.89, "learning_rate": 8.998421052631578e-05, "loss": 0.5427, "step": 88680 }, { "epoch": 0.89, "learning_rate": 8.990526315789473e-05, "loss": 0.5537, "step": 88690 }, { "epoch": 0.89, "learning_rate": 8.982631578947366e-05, "loss": 0.5618, "step": 88700 }, { "epoch": 0.89, "learning_rate": 8.974736842105262e-05, "loss": 0.5594, "step": 88710 }, { "epoch": 0.89, "learning_rate": 8.966842105263157e-05, "loss": 0.5613, "step": 88720 }, { "epoch": 0.89, "learning_rate": 8.958947368421052e-05, "loss": 0.5563, "step": 88730 }, { "epoch": 0.89, "learning_rate": 8.951052631578946e-05, "loss": 0.558, "step": 88740 }, { "epoch": 0.89, "learning_rate": 8.943157894736842e-05, "loss": 0.5611, "step": 88750 }, { "epoch": 0.89, "learning_rate": 8.935263157894737e-05, "loss": 0.5707, "step": 88760 }, { "epoch": 0.89, "learning_rate": 8.927368421052631e-05, "loss": 0.5711, "step": 88770 }, { "epoch": 0.89, "learning_rate": 8.919473684210525e-05, "loss": 0.5677, "step": 88780 }, { "epoch": 0.89, "learning_rate": 8.91157894736842e-05, "loss": 0.5558, "step": 88790 }, { "epoch": 0.89, "learning_rate": 8.903684210526316e-05, "loss": 0.5675, "step": 88800 }, { "epoch": 0.89, "learning_rate": 8.895789473684211e-05, "loss": 0.5635, "step": 88810 }, { "epoch": 0.89, "learning_rate": 8.887894736842104e-05, "loss": 0.5631, "step": 88820 }, { "epoch": 0.89, "learning_rate": 8.879999999999999e-05, "loss": 0.5617, "step": 88830 }, { "epoch": 0.89, "learning_rate": 8.872105263157894e-05, "loss": 0.5618, "step": 88840 }, { "epoch": 0.89, "learning_rate": 8.864210526315789e-05, "loss": 0.5515, "step": 88850 }, { "epoch": 0.89, "learning_rate": 8.856315789473683e-05, "loss": 0.5603, "step": 88860 }, { "epoch": 0.89, "learning_rate": 8.848421052631578e-05, "loss": 0.555, "step": 88870 }, { "epoch": 0.89, "learning_rate": 8.840526315789472e-05, "loss": 0.5655, "step": 88880 }, { "epoch": 0.89, "learning_rate": 8.832631578947368e-05, "loss": 0.544, "step": 88890 }, { "epoch": 0.89, "learning_rate": 8.824736842105263e-05, "loss": 0.5488, "step": 88900 }, { "epoch": 0.89, "learning_rate": 8.816842105263158e-05, "loss": 0.5457, "step": 88910 }, { "epoch": 0.89, "learning_rate": 8.808947368421051e-05, "loss": 0.5529, "step": 88920 }, { "epoch": 0.89, "learning_rate": 8.801052631578946e-05, "loss": 0.545, "step": 88930 }, { "epoch": 0.89, "learning_rate": 8.793157894736842e-05, "loss": 0.5531, "step": 88940 }, { "epoch": 0.89, "learning_rate": 8.785263157894737e-05, "loss": 0.5613, "step": 88950 }, { "epoch": 0.89, "learning_rate": 8.77736842105263e-05, "loss": 0.5542, "step": 88960 }, { "epoch": 0.89, "learning_rate": 8.769473684210525e-05, "loss": 0.5573, "step": 88970 }, { "epoch": 0.89, "learning_rate": 8.76157894736842e-05, "loss": 0.5621, "step": 88980 }, { "epoch": 0.89, "learning_rate": 8.753684210526315e-05, "loss": 0.5609, "step": 88990 }, { "epoch": 0.89, "learning_rate": 8.74578947368421e-05, "loss": 0.5511, "step": 89000 }, { "epoch": 0.89, "learning_rate": 8.737894736842105e-05, "loss": 0.551, "step": 89010 }, { "epoch": 0.89, "learning_rate": 8.729999999999998e-05, "loss": 0.5672, "step": 89020 }, { "epoch": 0.89, "learning_rate": 8.722105263157894e-05, "loss": 0.5635, "step": 89030 }, { "epoch": 0.89, "learning_rate": 8.714210526315789e-05, "loss": 0.5622, "step": 89040 }, { "epoch": 0.89, "learning_rate": 8.706315789473684e-05, "loss": 0.5481, "step": 89050 }, { "epoch": 0.89, "learning_rate": 8.698421052631577e-05, "loss": 0.5586, "step": 89060 }, { "epoch": 0.89, "learning_rate": 8.690526315789472e-05, "loss": 0.5593, "step": 89070 }, { "epoch": 0.89, "learning_rate": 8.682631578947368e-05, "loss": 0.5607, "step": 89080 }, { "epoch": 0.89, "learning_rate": 8.674736842105263e-05, "loss": 0.5487, "step": 89090 }, { "epoch": 0.89, "learning_rate": 8.666842105263157e-05, "loss": 0.5549, "step": 89100 }, { "epoch": 0.89, "learning_rate": 8.658947368421052e-05, "loss": 0.5402, "step": 89110 }, { "epoch": 0.89, "learning_rate": 8.651052631578946e-05, "loss": 0.5462, "step": 89120 }, { "epoch": 0.89, "learning_rate": 8.643157894736841e-05, "loss": 0.5393, "step": 89130 }, { "epoch": 0.89, "learning_rate": 8.635263157894736e-05, "loss": 0.5485, "step": 89140 }, { "epoch": 0.89, "learning_rate": 8.627368421052631e-05, "loss": 0.5348, "step": 89150 }, { "epoch": 0.89, "learning_rate": 8.619473684210524e-05, "loss": 0.5457, "step": 89160 }, { "epoch": 0.89, "learning_rate": 8.61157894736842e-05, "loss": 0.5443, "step": 89170 }, { "epoch": 0.89, "learning_rate": 8.603684210526315e-05, "loss": 0.5416, "step": 89180 }, { "epoch": 0.89, "learning_rate": 8.59578947368421e-05, "loss": 0.5547, "step": 89190 }, { "epoch": 0.89, "learning_rate": 8.587894736842104e-05, "loss": 0.5567, "step": 89200 }, { "epoch": 0.89, "learning_rate": 8.579999999999998e-05, "loss": 0.5465, "step": 89210 }, { "epoch": 0.89, "learning_rate": 8.572105263157895e-05, "loss": 0.5436, "step": 89220 }, { "epoch": 0.89, "learning_rate": 8.56421052631579e-05, "loss": 0.5476, "step": 89230 }, { "epoch": 0.89, "learning_rate": 8.556315789473683e-05, "loss": 0.5521, "step": 89240 }, { "epoch": 0.89, "learning_rate": 8.548421052631578e-05, "loss": 0.5449, "step": 89250 }, { "epoch": 0.89, "learning_rate": 8.540526315789473e-05, "loss": 0.5518, "step": 89260 }, { "epoch": 0.89, "learning_rate": 8.532631578947369e-05, "loss": 0.5415, "step": 89270 }, { "epoch": 0.89, "learning_rate": 8.524736842105262e-05, "loss": 0.5642, "step": 89280 }, { "epoch": 0.89, "learning_rate": 8.516842105263157e-05, "loss": 0.5591, "step": 89290 }, { "epoch": 0.89, "learning_rate": 8.508947368421052e-05, "loss": 0.5477, "step": 89300 }, { "epoch": 0.89, "learning_rate": 8.501052631578947e-05, "loss": 0.5412, "step": 89310 }, { "epoch": 0.89, "learning_rate": 8.493157894736842e-05, "loss": 0.557, "step": 89320 }, { "epoch": 0.89, "learning_rate": 8.485263157894736e-05, "loss": 0.5437, "step": 89330 }, { "epoch": 0.89, "learning_rate": 8.47736842105263e-05, "loss": 0.5496, "step": 89340 }, { "epoch": 0.89, "learning_rate": 8.469473684210525e-05, "loss": 0.5582, "step": 89350 }, { "epoch": 0.89, "learning_rate": 8.461578947368421e-05, "loss": 0.5559, "step": 89360 }, { "epoch": 0.89, "learning_rate": 8.453684210526316e-05, "loss": 0.5559, "step": 89370 }, { "epoch": 0.89, "learning_rate": 8.445789473684209e-05, "loss": 0.5458, "step": 89380 }, { "epoch": 0.89, "learning_rate": 8.437894736842104e-05, "loss": 0.5466, "step": 89390 }, { "epoch": 0.89, "learning_rate": 8.43e-05, "loss": 0.5434, "step": 89400 }, { "epoch": 0.89, "learning_rate": 8.422105263157895e-05, "loss": 0.5571, "step": 89410 }, { "epoch": 0.89, "learning_rate": 8.414210526315789e-05, "loss": 0.5477, "step": 89420 }, { "epoch": 0.89, "learning_rate": 8.406315789473683e-05, "loss": 0.5559, "step": 89430 }, { "epoch": 0.89, "learning_rate": 8.398421052631578e-05, "loss": 0.5469, "step": 89440 }, { "epoch": 0.89, "learning_rate": 8.390526315789473e-05, "loss": 0.5594, "step": 89450 }, { "epoch": 0.89, "learning_rate": 8.382631578947368e-05, "loss": 0.5549, "step": 89460 }, { "epoch": 0.89, "learning_rate": 8.374736842105263e-05, "loss": 0.5616, "step": 89470 }, { "epoch": 0.89, "learning_rate": 8.366842105263156e-05, "loss": 0.5534, "step": 89480 }, { "epoch": 0.89, "learning_rate": 8.358947368421051e-05, "loss": 0.5546, "step": 89490 }, { "epoch": 0.9, "learning_rate": 8.351052631578947e-05, "loss": 0.5519, "step": 89500 }, { "epoch": 0.9, "learning_rate": 8.343157894736842e-05, "loss": 0.5491, "step": 89510 }, { "epoch": 0.9, "learning_rate": 8.335263157894736e-05, "loss": 0.5394, "step": 89520 }, { "epoch": 0.9, "learning_rate": 8.32736842105263e-05, "loss": 0.5497, "step": 89530 }, { "epoch": 0.9, "learning_rate": 8.319473684210527e-05, "loss": 0.56, "step": 89540 }, { "epoch": 0.9, "learning_rate": 8.311578947368421e-05, "loss": 0.5481, "step": 89550 }, { "epoch": 0.9, "learning_rate": 8.303684210526315e-05, "loss": 0.5543, "step": 89560 }, { "epoch": 0.9, "learning_rate": 8.29578947368421e-05, "loss": 0.5625, "step": 89570 }, { "epoch": 0.9, "learning_rate": 8.287894736842104e-05, "loss": 0.5411, "step": 89580 }, { "epoch": 0.9, "learning_rate": 8.28e-05, "loss": 0.5489, "step": 89590 }, { "epoch": 0.9, "learning_rate": 8.272105263157894e-05, "loss": 0.542, "step": 89600 }, { "epoch": 0.9, "learning_rate": 8.264210526315789e-05, "loss": 0.5521, "step": 89610 }, { "epoch": 0.9, "learning_rate": 8.256315789473682e-05, "loss": 0.5438, "step": 89620 }, { "epoch": 0.9, "learning_rate": 8.248421052631577e-05, "loss": 0.5569, "step": 89630 }, { "epoch": 0.9, "learning_rate": 8.240526315789473e-05, "loss": 0.5563, "step": 89640 }, { "epoch": 0.9, "learning_rate": 8.232631578947368e-05, "loss": 0.5735, "step": 89650 }, { "epoch": 0.9, "learning_rate": 8.224736842105262e-05, "loss": 0.5714, "step": 89660 }, { "epoch": 0.9, "learning_rate": 8.216842105263157e-05, "loss": 0.5744, "step": 89670 }, { "epoch": 0.9, "learning_rate": 8.208947368421053e-05, "loss": 0.5674, "step": 89680 }, { "epoch": 0.9, "learning_rate": 8.201052631578948e-05, "loss": 0.5549, "step": 89690 }, { "epoch": 0.9, "learning_rate": 8.193157894736841e-05, "loss": 0.5675, "step": 89700 }, { "epoch": 0.9, "learning_rate": 8.185263157894736e-05, "loss": 0.5662, "step": 89710 }, { "epoch": 0.9, "learning_rate": 8.177368421052631e-05, "loss": 0.5685, "step": 89720 }, { "epoch": 0.9, "learning_rate": 8.169473684210527e-05, "loss": 0.5617, "step": 89730 }, { "epoch": 0.9, "learning_rate": 8.16157894736842e-05, "loss": 0.5657, "step": 89740 }, { "epoch": 0.9, "learning_rate": 8.153684210526315e-05, "loss": 0.5598, "step": 89750 }, { "epoch": 0.9, "learning_rate": 8.14578947368421e-05, "loss": 0.5597, "step": 89760 }, { "epoch": 0.9, "learning_rate": 8.137894736842104e-05, "loss": 0.5574, "step": 89770 }, { "epoch": 0.9, "learning_rate": 8.13e-05, "loss": 0.5665, "step": 89780 }, { "epoch": 0.9, "learning_rate": 8.122105263157895e-05, "loss": 0.568, "step": 89790 }, { "epoch": 0.9, "learning_rate": 8.114210526315788e-05, "loss": 0.5616, "step": 89800 }, { "epoch": 0.9, "learning_rate": 8.106315789473683e-05, "loss": 0.5512, "step": 89810 }, { "epoch": 0.9, "learning_rate": 8.098421052631579e-05, "loss": 0.5602, "step": 89820 }, { "epoch": 0.9, "learning_rate": 8.090526315789474e-05, "loss": 0.5565, "step": 89830 }, { "epoch": 0.9, "learning_rate": 8.082631578947367e-05, "loss": 0.5572, "step": 89840 }, { "epoch": 0.9, "learning_rate": 8.074736842105262e-05, "loss": 0.5627, "step": 89850 }, { "epoch": 0.9, "learning_rate": 8.066842105263157e-05, "loss": 0.5689, "step": 89860 }, { "epoch": 0.9, "learning_rate": 8.058947368421053e-05, "loss": 0.5642, "step": 89870 }, { "epoch": 0.9, "learning_rate": 8.051052631578947e-05, "loss": 0.5434, "step": 89880 }, { "epoch": 0.9, "learning_rate": 8.043157894736842e-05, "loss": 0.5498, "step": 89890 }, { "epoch": 0.9, "learning_rate": 8.035263157894736e-05, "loss": 0.5351, "step": 89900 }, { "epoch": 0.9, "learning_rate": 8.02736842105263e-05, "loss": 0.5417, "step": 89910 }, { "epoch": 0.9, "learning_rate": 8.019473684210526e-05, "loss": 0.5542, "step": 89920 }, { "epoch": 0.9, "learning_rate": 8.011578947368421e-05, "loss": 0.5592, "step": 89930 }, { "epoch": 0.9, "learning_rate": 8.003684210526314e-05, "loss": 0.5733, "step": 89940 }, { "epoch": 0.9, "learning_rate": 7.995789473684209e-05, "loss": 0.571, "step": 89950 }, { "epoch": 0.9, "learning_rate": 7.987894736842105e-05, "loss": 0.5549, "step": 89960 }, { "epoch": 0.9, "learning_rate": 7.98e-05, "loss": 0.5582, "step": 89970 }, { "epoch": 0.9, "learning_rate": 7.972105263157894e-05, "loss": 0.564, "step": 89980 }, { "epoch": 0.9, "learning_rate": 7.964210526315788e-05, "loss": 0.565, "step": 89990 }, { "epoch": 0.9, "learning_rate": 7.956315789473683e-05, "loss": 0.5724, "step": 90000 }, { "epoch": 0.9, "eval_accuracy": 0.8820988856910535, "eval_loss": 0.537109375, "eval_runtime": 97.2134, "eval_samples_per_second": 822.932, "eval_steps_per_second": 1.615, "step": 90000 }, { "epoch": 0.9, "learning_rate": 7.94842105263158e-05, "loss": 0.5633, "step": 90010 }, { "epoch": 0.9, "learning_rate": 7.940526315789473e-05, "loss": 0.555, "step": 90020 }, { "epoch": 0.9, "learning_rate": 7.932631578947368e-05, "loss": 0.5656, "step": 90030 }, { "epoch": 0.9, "learning_rate": 7.924736842105263e-05, "loss": 0.5636, "step": 90040 }, { "epoch": 0.9, "learning_rate": 7.916842105263156e-05, "loss": 0.5523, "step": 90050 }, { "epoch": 0.9, "learning_rate": 7.908947368421052e-05, "loss": 0.5567, "step": 90060 }, { "epoch": 0.9, "learning_rate": 7.901052631578947e-05, "loss": 0.5561, "step": 90070 }, { "epoch": 0.9, "learning_rate": 7.89315789473684e-05, "loss": 0.5568, "step": 90080 }, { "epoch": 0.9, "learning_rate": 7.885263157894735e-05, "loss": 0.5597, "step": 90090 }, { "epoch": 0.9, "learning_rate": 7.878157894736842e-05, "loss": 0.5579, "step": 90100 }, { "epoch": 0.9, "learning_rate": 7.870263157894735e-05, "loss": 0.5622, "step": 90110 }, { "epoch": 0.9, "learning_rate": 7.862368421052631e-05, "loss": 0.5563, "step": 90120 }, { "epoch": 0.9, "learning_rate": 7.854473684210526e-05, "loss": 0.5649, "step": 90130 }, { "epoch": 0.9, "learning_rate": 7.846578947368421e-05, "loss": 0.5568, "step": 90140 }, { "epoch": 0.9, "learning_rate": 7.838684210526314e-05, "loss": 0.5674, "step": 90150 }, { "epoch": 0.9, "learning_rate": 7.830789473684209e-05, "loss": 0.5641, "step": 90160 }, { "epoch": 0.9, "learning_rate": 7.822894736842105e-05, "loss": 0.5598, "step": 90170 }, { "epoch": 0.9, "learning_rate": 7.815e-05, "loss": 0.567, "step": 90180 }, { "epoch": 0.9, "learning_rate": 7.807105263157894e-05, "loss": 0.5678, "step": 90190 }, { "epoch": 0.9, "learning_rate": 7.799210526315789e-05, "loss": 0.5584, "step": 90200 }, { "epoch": 0.9, "learning_rate": 7.791315789473683e-05, "loss": 0.566, "step": 90210 }, { "epoch": 0.9, "learning_rate": 7.783421052631578e-05, "loss": 0.5538, "step": 90220 }, { "epoch": 0.9, "learning_rate": 7.775526315789473e-05, "loss": 0.5639, "step": 90230 }, { "epoch": 0.9, "learning_rate": 7.767631578947368e-05, "loss": 0.5665, "step": 90240 }, { "epoch": 0.9, "learning_rate": 7.759736842105261e-05, "loss": 0.5691, "step": 90250 }, { "epoch": 0.9, "learning_rate": 7.751842105263158e-05, "loss": 0.5702, "step": 90260 }, { "epoch": 0.9, "learning_rate": 7.743947368421052e-05, "loss": 0.5608, "step": 90270 }, { "epoch": 0.9, "learning_rate": 7.736052631578947e-05, "loss": 0.5412, "step": 90280 }, { "epoch": 0.9, "learning_rate": 7.72815789473684e-05, "loss": 0.554, "step": 90290 }, { "epoch": 0.9, "learning_rate": 7.720263157894735e-05, "loss": 0.5615, "step": 90300 }, { "epoch": 0.9, "learning_rate": 7.712368421052632e-05, "loss": 0.5645, "step": 90310 }, { "epoch": 0.9, "learning_rate": 7.704473684210526e-05, "loss": 0.5545, "step": 90320 }, { "epoch": 0.9, "learning_rate": 7.69657894736842e-05, "loss": 0.5475, "step": 90330 }, { "epoch": 0.9, "learning_rate": 7.688684210526315e-05, "loss": 0.5529, "step": 90340 }, { "epoch": 0.9, "learning_rate": 7.68078947368421e-05, "loss": 0.5619, "step": 90350 }, { "epoch": 0.9, "learning_rate": 7.672894736842104e-05, "loss": 0.5584, "step": 90360 }, { "epoch": 0.9, "learning_rate": 7.664999999999999e-05, "loss": 0.5619, "step": 90370 }, { "epoch": 0.9, "learning_rate": 7.657105263157894e-05, "loss": 0.5522, "step": 90380 }, { "epoch": 0.9, "learning_rate": 7.649210526315788e-05, "loss": 0.5677, "step": 90390 }, { "epoch": 0.9, "learning_rate": 7.641315789473684e-05, "loss": 0.5508, "step": 90400 }, { "epoch": 0.9, "learning_rate": 7.633421052631579e-05, "loss": 0.5749, "step": 90410 }, { "epoch": 0.9, "learning_rate": 7.625526315789473e-05, "loss": 0.5545, "step": 90420 }, { "epoch": 0.9, "learning_rate": 7.617631578947367e-05, "loss": 0.5567, "step": 90430 }, { "epoch": 0.9, "learning_rate": 7.609736842105262e-05, "loss": 0.5543, "step": 90440 }, { "epoch": 0.9, "learning_rate": 7.601842105263158e-05, "loss": 0.5609, "step": 90450 }, { "epoch": 0.9, "learning_rate": 7.593947368421053e-05, "loss": 0.5603, "step": 90460 }, { "epoch": 0.9, "learning_rate": 7.586052631578946e-05, "loss": 0.5612, "step": 90470 }, { "epoch": 0.9, "learning_rate": 7.578157894736841e-05, "loss": 0.5648, "step": 90480 }, { "epoch": 0.9, "learning_rate": 7.570263157894736e-05, "loss": 0.5653, "step": 90490 }, { "epoch": 0.91, "learning_rate": 7.562368421052632e-05, "loss": 0.5616, "step": 90500 }, { "epoch": 0.91, "learning_rate": 7.554473684210526e-05, "loss": 0.5541, "step": 90510 }, { "epoch": 0.91, "learning_rate": 7.54657894736842e-05, "loss": 0.563, "step": 90520 }, { "epoch": 0.91, "learning_rate": 7.538684210526315e-05, "loss": 0.5585, "step": 90530 }, { "epoch": 0.91, "learning_rate": 7.53078947368421e-05, "loss": 0.544, "step": 90540 }, { "epoch": 0.91, "learning_rate": 7.522894736842105e-05, "loss": 0.5487, "step": 90550 }, { "epoch": 0.91, "learning_rate": 7.515e-05, "loss": 0.5548, "step": 90560 }, { "epoch": 0.91, "learning_rate": 7.507105263157893e-05, "loss": 0.5547, "step": 90570 }, { "epoch": 0.91, "learning_rate": 7.49921052631579e-05, "loss": 0.5555, "step": 90580 }, { "epoch": 0.91, "learning_rate": 7.491315789473683e-05, "loss": 0.5652, "step": 90590 }, { "epoch": 0.91, "learning_rate": 7.483421052631579e-05, "loss": 0.5609, "step": 90600 }, { "epoch": 0.91, "learning_rate": 7.475526315789473e-05, "loss": 0.5549, "step": 90610 }, { "epoch": 0.91, "learning_rate": 7.467631578947369e-05, "loss": 0.5575, "step": 90620 }, { "epoch": 0.91, "learning_rate": 7.459736842105262e-05, "loss": 0.5562, "step": 90630 }, { "epoch": 0.91, "learning_rate": 7.451842105263157e-05, "loss": 0.5562, "step": 90640 }, { "epoch": 0.91, "learning_rate": 7.443947368421052e-05, "loss": 0.5541, "step": 90650 }, { "epoch": 0.91, "learning_rate": 7.436052631578947e-05, "loss": 0.5454, "step": 90660 }, { "epoch": 0.91, "learning_rate": 7.428157894736841e-05, "loss": 0.5686, "step": 90670 }, { "epoch": 0.91, "learning_rate": 7.420263157894736e-05, "loss": 0.5682, "step": 90680 }, { "epoch": 0.91, "learning_rate": 7.412368421052631e-05, "loss": 0.5579, "step": 90690 }, { "epoch": 0.91, "learning_rate": 7.404473684210526e-05, "loss": 0.562, "step": 90700 }, { "epoch": 0.91, "learning_rate": 7.39657894736842e-05, "loss": 0.5628, "step": 90710 }, { "epoch": 0.91, "learning_rate": 7.388684210526316e-05, "loss": 0.5559, "step": 90720 }, { "epoch": 0.91, "learning_rate": 7.380789473684209e-05, "loss": 0.5576, "step": 90730 }, { "epoch": 0.91, "learning_rate": 7.372894736842105e-05, "loss": 0.5532, "step": 90740 }, { "epoch": 0.91, "learning_rate": 7.364999999999999e-05, "loss": 0.5491, "step": 90750 }, { "epoch": 0.91, "learning_rate": 7.357105263157895e-05, "loss": 0.5621, "step": 90760 }, { "epoch": 0.91, "learning_rate": 7.349210526315788e-05, "loss": 0.5491, "step": 90770 }, { "epoch": 0.91, "learning_rate": 7.341315789473683e-05, "loss": 0.5474, "step": 90780 }, { "epoch": 0.91, "learning_rate": 7.333421052631578e-05, "loss": 0.561, "step": 90790 }, { "epoch": 0.91, "learning_rate": 7.325526315789473e-05, "loss": 0.5569, "step": 90800 }, { "epoch": 0.91, "learning_rate": 7.317631578947368e-05, "loss": 0.5555, "step": 90810 }, { "epoch": 0.91, "learning_rate": 7.309736842105263e-05, "loss": 0.5637, "step": 90820 }, { "epoch": 0.91, "learning_rate": 7.301842105263157e-05, "loss": 0.5755, "step": 90830 }, { "epoch": 0.91, "learning_rate": 7.293947368421052e-05, "loss": 0.5626, "step": 90840 }, { "epoch": 0.91, "learning_rate": 7.286052631578946e-05, "loss": 0.5753, "step": 90850 }, { "epoch": 0.91, "learning_rate": 7.278157894736842e-05, "loss": 0.5672, "step": 90860 }, { "epoch": 0.91, "learning_rate": 7.270263157894735e-05, "loss": 0.5644, "step": 90870 }, { "epoch": 0.91, "learning_rate": 7.262368421052632e-05, "loss": 0.5543, "step": 90880 }, { "epoch": 0.91, "learning_rate": 7.254473684210525e-05, "loss": 0.5625, "step": 90890 }, { "epoch": 0.91, "learning_rate": 7.246578947368421e-05, "loss": 0.5621, "step": 90900 }, { "epoch": 0.91, "learning_rate": 7.238684210526315e-05, "loss": 0.565, "step": 90910 }, { "epoch": 0.91, "learning_rate": 7.23078947368421e-05, "loss": 0.5587, "step": 90920 }, { "epoch": 0.91, "learning_rate": 7.222894736842104e-05, "loss": 0.5577, "step": 90930 }, { "epoch": 0.91, "learning_rate": 7.214999999999999e-05, "loss": 0.5575, "step": 90940 }, { "epoch": 0.91, "learning_rate": 7.207105263157894e-05, "loss": 0.5588, "step": 90950 }, { "epoch": 0.91, "learning_rate": 7.199210526315789e-05, "loss": 0.5574, "step": 90960 }, { "epoch": 0.91, "learning_rate": 7.191315789473684e-05, "loss": 0.5623, "step": 90970 }, { "epoch": 0.91, "learning_rate": 7.183421052631579e-05, "loss": 0.5494, "step": 90980 }, { "epoch": 0.91, "learning_rate": 7.175526315789473e-05, "loss": 0.5557, "step": 90990 }, { "epoch": 0.91, "learning_rate": 7.167631578947368e-05, "loss": 0.5554, "step": 91000 }, { "epoch": 0.91, "learning_rate": 7.159736842105262e-05, "loss": 0.5635, "step": 91010 }, { "epoch": 0.91, "learning_rate": 7.151842105263158e-05, "loss": 0.5585, "step": 91020 }, { "epoch": 0.91, "learning_rate": 7.143947368421051e-05, "loss": 0.5526, "step": 91030 }, { "epoch": 0.91, "learning_rate": 7.136052631578947e-05, "loss": 0.5539, "step": 91040 }, { "epoch": 0.91, "learning_rate": 7.128157894736841e-05, "loss": 0.5575, "step": 91050 }, { "epoch": 0.91, "learning_rate": 7.120263157894736e-05, "loss": 0.5483, "step": 91060 }, { "epoch": 0.91, "learning_rate": 7.11236842105263e-05, "loss": 0.5647, "step": 91070 }, { "epoch": 0.91, "learning_rate": 7.104473684210525e-05, "loss": 0.5564, "step": 91080 }, { "epoch": 0.91, "learning_rate": 7.09657894736842e-05, "loss": 0.5727, "step": 91090 }, { "epoch": 0.91, "learning_rate": 7.089473684210525e-05, "loss": 0.5502, "step": 91100 }, { "epoch": 0.91, "learning_rate": 7.081578947368421e-05, "loss": 0.5648, "step": 91110 }, { "epoch": 0.91, "learning_rate": 7.073684210526315e-05, "loss": 0.5551, "step": 91120 }, { "epoch": 0.91, "learning_rate": 7.06578947368421e-05, "loss": 0.5649, "step": 91130 }, { "epoch": 0.91, "learning_rate": 7.057894736842104e-05, "loss": 0.5595, "step": 91140 }, { "epoch": 0.91, "learning_rate": 7.049999999999999e-05, "loss": 0.5592, "step": 91150 }, { "epoch": 0.91, "learning_rate": 7.042105263157894e-05, "loss": 0.5569, "step": 91160 }, { "epoch": 0.91, "learning_rate": 7.034210526315789e-05, "loss": 0.5577, "step": 91170 }, { "epoch": 0.91, "learning_rate": 7.026315789473684e-05, "loss": 0.567, "step": 91180 }, { "epoch": 0.91, "learning_rate": 7.018421052631579e-05, "loss": 0.5574, "step": 91190 }, { "epoch": 0.91, "learning_rate": 7.010526315789473e-05, "loss": 0.5472, "step": 91200 }, { "epoch": 0.91, "learning_rate": 7.002631578947368e-05, "loss": 0.5459, "step": 91210 }, { "epoch": 0.91, "learning_rate": 6.994736842105262e-05, "loss": 0.5495, "step": 91220 }, { "epoch": 0.91, "learning_rate": 6.986842105263158e-05, "loss": 0.5771, "step": 91230 }, { "epoch": 0.91, "learning_rate": 6.978947368421051e-05, "loss": 0.5726, "step": 91240 }, { "epoch": 0.91, "learning_rate": 6.971052631578948e-05, "loss": 0.5685, "step": 91250 }, { "epoch": 0.91, "learning_rate": 6.963157894736841e-05, "loss": 0.568, "step": 91260 }, { "epoch": 0.91, "learning_rate": 6.955263157894737e-05, "loss": 0.5633, "step": 91270 }, { "epoch": 0.91, "learning_rate": 6.947368421052631e-05, "loss": 0.544, "step": 91280 }, { "epoch": 0.91, "learning_rate": 6.939473684210526e-05, "loss": 0.5422, "step": 91290 }, { "epoch": 0.91, "learning_rate": 6.93157894736842e-05, "loss": 0.5397, "step": 91300 }, { "epoch": 0.91, "learning_rate": 6.923684210526315e-05, "loss": 0.565, "step": 91310 }, { "epoch": 0.91, "learning_rate": 6.91578947368421e-05, "loss": 0.5302, "step": 91320 }, { "epoch": 0.91, "learning_rate": 6.907894736842105e-05, "loss": 0.5405, "step": 91330 }, { "epoch": 0.91, "learning_rate": 6.9e-05, "loss": 0.5276, "step": 91340 }, { "epoch": 0.91, "learning_rate": 6.892105263157894e-05, "loss": 0.5401, "step": 91350 }, { "epoch": 0.91, "learning_rate": 6.884210526315788e-05, "loss": 0.5361, "step": 91360 }, { "epoch": 0.91, "learning_rate": 6.876315789473684e-05, "loss": 0.5328, "step": 91370 }, { "epoch": 0.91, "learning_rate": 6.868421052631578e-05, "loss": 0.5281, "step": 91380 }, { "epoch": 0.91, "learning_rate": 6.860526315789474e-05, "loss": 0.5363, "step": 91390 }, { "epoch": 0.91, "learning_rate": 6.852631578947367e-05, "loss": 0.5341, "step": 91400 }, { "epoch": 0.91, "learning_rate": 6.844736842105263e-05, "loss": 0.545, "step": 91410 }, { "epoch": 0.91, "learning_rate": 6.836842105263157e-05, "loss": 0.5369, "step": 91420 }, { "epoch": 0.91, "learning_rate": 6.828947368421052e-05, "loss": 0.5564, "step": 91430 }, { "epoch": 0.91, "learning_rate": 6.821052631578947e-05, "loss": 0.5442, "step": 91440 }, { "epoch": 0.91, "learning_rate": 6.813157894736841e-05, "loss": 0.5697, "step": 91450 }, { "epoch": 0.91, "learning_rate": 6.805263157894736e-05, "loss": 0.5594, "step": 91460 }, { "epoch": 0.91, "learning_rate": 6.797368421052631e-05, "loss": 0.5676, "step": 91470 }, { "epoch": 0.91, "learning_rate": 6.789473684210526e-05, "loss": 0.561, "step": 91480 }, { "epoch": 0.91, "learning_rate": 6.781578947368421e-05, "loss": 0.5583, "step": 91490 }, { "epoch": 0.92, "learning_rate": 6.773684210526316e-05, "loss": 0.5535, "step": 91500 }, { "epoch": 0.92, "learning_rate": 6.76578947368421e-05, "loss": 0.5649, "step": 91510 }, { "epoch": 0.92, "learning_rate": 6.757894736842104e-05, "loss": 0.559, "step": 91520 }, { "epoch": 0.92, "learning_rate": 6.75e-05, "loss": 0.5572, "step": 91530 }, { "epoch": 0.92, "learning_rate": 6.742105263157894e-05, "loss": 0.5607, "step": 91540 }, { "epoch": 0.92, "learning_rate": 6.73421052631579e-05, "loss": 0.5493, "step": 91550 }, { "epoch": 0.92, "learning_rate": 6.726315789473683e-05, "loss": 0.5643, "step": 91560 }, { "epoch": 0.92, "learning_rate": 6.71842105263158e-05, "loss": 0.5615, "step": 91570 }, { "epoch": 0.92, "learning_rate": 6.710526315789473e-05, "loss": 0.5482, "step": 91580 }, { "epoch": 0.92, "learning_rate": 6.702631578947368e-05, "loss": 0.5503, "step": 91590 }, { "epoch": 0.92, "learning_rate": 6.694736842105263e-05, "loss": 0.5373, "step": 91600 }, { "epoch": 0.92, "learning_rate": 6.686842105263157e-05, "loss": 0.538, "step": 91610 }, { "epoch": 0.92, "learning_rate": 6.678947368421052e-05, "loss": 0.541, "step": 91620 }, { "epoch": 0.92, "learning_rate": 6.671052631578947e-05, "loss": 0.5373, "step": 91630 }, { "epoch": 0.92, "learning_rate": 6.663157894736842e-05, "loss": 0.5468, "step": 91640 }, { "epoch": 0.92, "learning_rate": 6.655263157894737e-05, "loss": 0.5584, "step": 91650 }, { "epoch": 0.92, "learning_rate": 6.64736842105263e-05, "loss": 0.5783, "step": 91660 }, { "epoch": 0.92, "learning_rate": 6.639473684210526e-05, "loss": 0.5676, "step": 91670 }, { "epoch": 0.92, "learning_rate": 6.63157894736842e-05, "loss": 0.5649, "step": 91680 }, { "epoch": 0.92, "learning_rate": 6.623684210526316e-05, "loss": 0.5692, "step": 91690 }, { "epoch": 0.92, "learning_rate": 6.61578947368421e-05, "loss": 0.573, "step": 91700 }, { "epoch": 0.92, "learning_rate": 6.607894736842106e-05, "loss": 0.5864, "step": 91710 }, { "epoch": 0.92, "learning_rate": 6.599999999999999e-05, "loss": 0.5886, "step": 91720 }, { "epoch": 0.92, "learning_rate": 6.592105263157894e-05, "loss": 0.5783, "step": 91730 }, { "epoch": 0.92, "learning_rate": 6.584210526315789e-05, "loss": 0.5594, "step": 91740 }, { "epoch": 0.92, "learning_rate": 6.576315789473684e-05, "loss": 0.5523, "step": 91750 }, { "epoch": 0.92, "learning_rate": 6.568421052631578e-05, "loss": 0.5319, "step": 91760 }, { "epoch": 0.92, "learning_rate": 6.560526315789473e-05, "loss": 0.5527, "step": 91770 }, { "epoch": 0.92, "learning_rate": 6.552631578947368e-05, "loss": 0.545, "step": 91780 }, { "epoch": 0.92, "learning_rate": 6.544736842105263e-05, "loss": 0.5667, "step": 91790 }, { "epoch": 0.92, "learning_rate": 6.536842105263156e-05, "loss": 0.5477, "step": 91800 }, { "epoch": 0.92, "learning_rate": 6.528947368421053e-05, "loss": 0.5548, "step": 91810 }, { "epoch": 0.92, "learning_rate": 6.521052631578946e-05, "loss": 0.5453, "step": 91820 }, { "epoch": 0.92, "learning_rate": 6.513157894736842e-05, "loss": 0.5416, "step": 91830 }, { "epoch": 0.92, "learning_rate": 6.505263157894736e-05, "loss": 0.5347, "step": 91840 }, { "epoch": 0.92, "learning_rate": 6.497368421052632e-05, "loss": 0.5378, "step": 91850 }, { "epoch": 0.92, "learning_rate": 6.489473684210525e-05, "loss": 0.5349, "step": 91860 }, { "epoch": 0.92, "learning_rate": 6.48157894736842e-05, "loss": 0.5472, "step": 91870 }, { "epoch": 0.92, "learning_rate": 6.473684210526315e-05, "loss": 0.5486, "step": 91880 }, { "epoch": 0.92, "learning_rate": 6.46578947368421e-05, "loss": 0.5664, "step": 91890 }, { "epoch": 0.92, "learning_rate": 6.457894736842105e-05, "loss": 0.5623, "step": 91900 }, { "epoch": 0.92, "learning_rate": 6.45e-05, "loss": 0.5685, "step": 91910 }, { "epoch": 0.92, "learning_rate": 6.442105263157894e-05, "loss": 0.5764, "step": 91920 }, { "epoch": 0.92, "learning_rate": 6.434210526315789e-05, "loss": 0.5662, "step": 91930 }, { "epoch": 0.92, "learning_rate": 6.426315789473683e-05, "loss": 0.5745, "step": 91940 }, { "epoch": 0.92, "learning_rate": 6.418421052631579e-05, "loss": 0.5882, "step": 91950 }, { "epoch": 0.92, "learning_rate": 6.410526315789472e-05, "loss": 0.5799, "step": 91960 }, { "epoch": 0.92, "learning_rate": 6.402631578947369e-05, "loss": 0.5643, "step": 91970 }, { "epoch": 0.92, "learning_rate": 6.394736842105262e-05, "loss": 0.5662, "step": 91980 }, { "epoch": 0.92, "learning_rate": 6.386842105263158e-05, "loss": 0.5588, "step": 91990 }, { "epoch": 0.92, "learning_rate": 6.378947368421052e-05, "loss": 0.5582, "step": 92000 }, { "epoch": 0.92, "learning_rate": 6.371052631578947e-05, "loss": 0.5585, "step": 92010 }, { "epoch": 0.92, "learning_rate": 6.363157894736841e-05, "loss": 0.5563, "step": 92020 }, { "epoch": 0.92, "learning_rate": 6.355263157894736e-05, "loss": 0.5533, "step": 92030 }, { "epoch": 0.92, "learning_rate": 6.347368421052631e-05, "loss": 0.559, "step": 92040 }, { "epoch": 0.92, "learning_rate": 6.339473684210526e-05, "loss": 0.5579, "step": 92050 }, { "epoch": 0.92, "learning_rate": 6.33157894736842e-05, "loss": 0.5512, "step": 92060 }, { "epoch": 0.92, "learning_rate": 6.323684210526315e-05, "loss": 0.5438, "step": 92070 }, { "epoch": 0.92, "learning_rate": 6.315789473684209e-05, "loss": 0.539, "step": 92080 }, { "epoch": 0.92, "learning_rate": 6.307894736842105e-05, "loss": 0.548, "step": 92090 }, { "epoch": 0.92, "learning_rate": 6.30078947368421e-05, "loss": 0.5482, "step": 92100 }, { "epoch": 0.92, "learning_rate": 6.292894736842105e-05, "loss": 0.5486, "step": 92110 }, { "epoch": 0.92, "learning_rate": 6.285e-05, "loss": 0.5571, "step": 92120 }, { "epoch": 0.92, "learning_rate": 6.277105263157894e-05, "loss": 0.5558, "step": 92130 }, { "epoch": 0.92, "learning_rate": 6.269210526315789e-05, "loss": 0.5622, "step": 92140 }, { "epoch": 0.92, "learning_rate": 6.261315789473684e-05, "loss": 0.5607, "step": 92150 }, { "epoch": 0.92, "learning_rate": 6.253421052631579e-05, "loss": 0.5745, "step": 92160 }, { "epoch": 0.92, "learning_rate": 6.245526315789472e-05, "loss": 0.5646, "step": 92170 }, { "epoch": 0.92, "learning_rate": 6.237631578947369e-05, "loss": 0.5684, "step": 92180 }, { "epoch": 0.92, "learning_rate": 6.229736842105262e-05, "loss": 0.5744, "step": 92190 }, { "epoch": 0.92, "learning_rate": 6.221842105263157e-05, "loss": 0.5742, "step": 92200 }, { "epoch": 0.92, "learning_rate": 6.213947368421052e-05, "loss": 0.5668, "step": 92210 }, { "epoch": 0.92, "learning_rate": 6.206052631578947e-05, "loss": 0.5487, "step": 92220 }, { "epoch": 0.92, "learning_rate": 6.198157894736841e-05, "loss": 0.5567, "step": 92230 }, { "epoch": 0.92, "learning_rate": 6.190263157894736e-05, "loss": 0.5486, "step": 92240 }, { "epoch": 0.92, "learning_rate": 6.182368421052631e-05, "loss": 0.5625, "step": 92250 }, { "epoch": 0.92, "learning_rate": 6.174473684210526e-05, "loss": 0.5562, "step": 92260 }, { "epoch": 0.92, "learning_rate": 6.166578947368421e-05, "loss": 0.549, "step": 92270 }, { "epoch": 0.92, "learning_rate": 6.158684210526316e-05, "loss": 0.5432, "step": 92280 }, { "epoch": 0.92, "learning_rate": 6.15078947368421e-05, "loss": 0.5462, "step": 92290 }, { "epoch": 0.92, "learning_rate": 6.142894736842105e-05, "loss": 0.5472, "step": 92300 }, { "epoch": 0.92, "learning_rate": 6.134999999999999e-05, "loss": 0.5388, "step": 92310 }, { "epoch": 0.92, "learning_rate": 6.127105263157895e-05, "loss": 0.535, "step": 92320 }, { "epoch": 0.92, "learning_rate": 6.119210526315788e-05, "loss": 0.5383, "step": 92330 }, { "epoch": 0.92, "learning_rate": 6.111315789473685e-05, "loss": 0.5431, "step": 92340 }, { "epoch": 0.92, "learning_rate": 6.103421052631578e-05, "loss": 0.5502, "step": 92350 }, { "epoch": 0.92, "learning_rate": 6.0955263157894735e-05, "loss": 0.5704, "step": 92360 }, { "epoch": 0.92, "learning_rate": 6.087631578947368e-05, "loss": 0.5768, "step": 92370 }, { "epoch": 0.92, "learning_rate": 6.0797368421052625e-05, "loss": 0.576, "step": 92380 }, { "epoch": 0.92, "learning_rate": 6.071842105263157e-05, "loss": 0.581, "step": 92390 }, { "epoch": 0.92, "learning_rate": 6.063947368421052e-05, "loss": 0.5734, "step": 92400 }, { "epoch": 0.92, "learning_rate": 6.056052631578947e-05, "loss": 0.5678, "step": 92410 }, { "epoch": 0.92, "learning_rate": 6.048157894736841e-05, "loss": 0.5702, "step": 92420 }, { "epoch": 0.92, "learning_rate": 6.0402631578947367e-05, "loss": 0.5698, "step": 92430 }, { "epoch": 0.92, "learning_rate": 6.032368421052631e-05, "loss": 0.579, "step": 92440 }, { "epoch": 0.92, "learning_rate": 6.0244736842105256e-05, "loss": 0.5601, "step": 92450 }, { "epoch": 0.92, "learning_rate": 6.0165789473684205e-05, "loss": 0.5581, "step": 92460 }, { "epoch": 0.92, "learning_rate": 6.008684210526315e-05, "loss": 0.562, "step": 92470 }, { "epoch": 0.92, "learning_rate": 6.00078947368421e-05, "loss": 0.5585, "step": 92480 }, { "epoch": 0.92, "learning_rate": 5.992894736842104e-05, "loss": 0.5743, "step": 92490 }, { "epoch": 0.93, "learning_rate": 5.985e-05, "loss": 0.5482, "step": 92500 }, { "epoch": 0.93, "eval_accuracy": 0.8834968490752123, "eval_loss": 0.52978515625, "eval_runtime": 97.1569, "eval_samples_per_second": 823.411, "eval_steps_per_second": 1.616, "step": 92500 }, { "epoch": 0.93, "learning_rate": 5.977105263157894e-05, "loss": 0.5555, "step": 92510 }, { "epoch": 0.93, "learning_rate": 5.969210526315789e-05, "loss": 0.5454, "step": 92520 }, { "epoch": 0.93, "learning_rate": 5.9613157894736836e-05, "loss": 0.5387, "step": 92530 }, { "epoch": 0.93, "learning_rate": 5.9534210526315784e-05, "loss": 0.5603, "step": 92540 }, { "epoch": 0.93, "learning_rate": 5.945526315789473e-05, "loss": 0.5328, "step": 92550 }, { "epoch": 0.93, "learning_rate": 5.9376315789473674e-05, "loss": 0.5441, "step": 92560 }, { "epoch": 0.93, "learning_rate": 5.929736842105263e-05, "loss": 0.557, "step": 92570 }, { "epoch": 0.93, "learning_rate": 5.921842105263157e-05, "loss": 0.5579, "step": 92580 }, { "epoch": 0.93, "learning_rate": 5.913947368421052e-05, "loss": 0.5615, "step": 92590 }, { "epoch": 0.93, "learning_rate": 5.906052631578947e-05, "loss": 0.5585, "step": 92600 }, { "epoch": 0.93, "learning_rate": 5.8981578947368416e-05, "loss": 0.5563, "step": 92610 }, { "epoch": 0.93, "learning_rate": 5.8902631578947364e-05, "loss": 0.5681, "step": 92620 }, { "epoch": 0.93, "learning_rate": 5.882368421052631e-05, "loss": 0.5568, "step": 92630 }, { "epoch": 0.93, "learning_rate": 5.874473684210526e-05, "loss": 0.5636, "step": 92640 }, { "epoch": 0.93, "learning_rate": 5.86657894736842e-05, "loss": 0.552, "step": 92650 }, { "epoch": 0.93, "learning_rate": 5.858684210526315e-05, "loss": 0.5579, "step": 92660 }, { "epoch": 0.93, "learning_rate": 5.85078947368421e-05, "loss": 0.5406, "step": 92670 }, { "epoch": 0.93, "learning_rate": 5.842894736842105e-05, "loss": 0.5514, "step": 92680 }, { "epoch": 0.93, "learning_rate": 5.8349999999999995e-05, "loss": 0.5416, "step": 92690 }, { "epoch": 0.93, "learning_rate": 5.8271052631578944e-05, "loss": 0.5424, "step": 92700 }, { "epoch": 0.93, "learning_rate": 5.819210526315789e-05, "loss": 0.5331, "step": 92710 }, { "epoch": 0.93, "learning_rate": 5.8113157894736833e-05, "loss": 0.545, "step": 92720 }, { "epoch": 0.93, "learning_rate": 5.803421052631578e-05, "loss": 0.5257, "step": 92730 }, { "epoch": 0.93, "learning_rate": 5.795526315789473e-05, "loss": 0.5363, "step": 92740 }, { "epoch": 0.93, "learning_rate": 5.787631578947368e-05, "loss": 0.5345, "step": 92750 }, { "epoch": 0.93, "learning_rate": 5.779736842105263e-05, "loss": 0.5373, "step": 92760 }, { "epoch": 0.93, "learning_rate": 5.7718421052631575e-05, "loss": 0.5339, "step": 92770 }, { "epoch": 0.93, "learning_rate": 5.763947368421052e-05, "loss": 0.5269, "step": 92780 }, { "epoch": 0.93, "learning_rate": 5.7560526315789465e-05, "loss": 0.5483, "step": 92790 }, { "epoch": 0.93, "learning_rate": 5.748157894736842e-05, "loss": 0.5536, "step": 92800 }, { "epoch": 0.93, "learning_rate": 5.740263157894736e-05, "loss": 0.5591, "step": 92810 }, { "epoch": 0.93, "learning_rate": 5.732368421052631e-05, "loss": 0.5613, "step": 92820 }, { "epoch": 0.93, "learning_rate": 5.724473684210526e-05, "loss": 0.562, "step": 92830 }, { "epoch": 0.93, "learning_rate": 5.7165789473684206e-05, "loss": 0.5623, "step": 92840 }, { "epoch": 0.93, "learning_rate": 5.7086842105263155e-05, "loss": 0.5591, "step": 92850 }, { "epoch": 0.93, "learning_rate": 5.70078947368421e-05, "loss": 0.5741, "step": 92860 }, { "epoch": 0.93, "learning_rate": 5.692894736842105e-05, "loss": 0.5545, "step": 92870 }, { "epoch": 0.93, "learning_rate": 5.684999999999999e-05, "loss": 0.5488, "step": 92880 }, { "epoch": 0.93, "learning_rate": 5.677105263157894e-05, "loss": 0.5598, "step": 92890 }, { "epoch": 0.93, "learning_rate": 5.669210526315789e-05, "loss": 0.5502, "step": 92900 }, { "epoch": 0.93, "learning_rate": 5.661315789473684e-05, "loss": 0.5513, "step": 92910 }, { "epoch": 0.93, "learning_rate": 5.6534210526315786e-05, "loss": 0.5383, "step": 92920 }, { "epoch": 0.93, "learning_rate": 5.6455263157894734e-05, "loss": 0.5458, "step": 92930 }, { "epoch": 0.93, "learning_rate": 5.637631578947368e-05, "loss": 0.536, "step": 92940 }, { "epoch": 0.93, "learning_rate": 5.6297368421052624e-05, "loss": 0.5503, "step": 92950 }, { "epoch": 0.93, "learning_rate": 5.621842105263157e-05, "loss": 0.5452, "step": 92960 }, { "epoch": 0.93, "learning_rate": 5.613947368421052e-05, "loss": 0.5285, "step": 92970 }, { "epoch": 0.93, "learning_rate": 5.606052631578947e-05, "loss": 0.5303, "step": 92980 }, { "epoch": 0.93, "learning_rate": 5.598157894736842e-05, "loss": 0.5272, "step": 92990 }, { "epoch": 0.93, "learning_rate": 5.5902631578947366e-05, "loss": 0.5252, "step": 93000 }, { "epoch": 0.93, "learning_rate": 5.5823684210526314e-05, "loss": 0.5261, "step": 93010 }, { "epoch": 0.93, "learning_rate": 5.5744736842105255e-05, "loss": 0.5248, "step": 93020 }, { "epoch": 0.93, "learning_rate": 5.5665789473684204e-05, "loss": 0.53, "step": 93030 }, { "epoch": 0.93, "learning_rate": 5.558684210526315e-05, "loss": 0.5336, "step": 93040 }, { "epoch": 0.93, "learning_rate": 5.55078947368421e-05, "loss": 0.5329, "step": 93050 }, { "epoch": 0.93, "learning_rate": 5.542894736842105e-05, "loss": 0.5422, "step": 93060 }, { "epoch": 0.93, "learning_rate": 5.535e-05, "loss": 0.5743, "step": 93070 }, { "epoch": 0.93, "learning_rate": 5.5271052631578945e-05, "loss": 0.5587, "step": 93080 }, { "epoch": 0.93, "learning_rate": 5.5192105263157894e-05, "loss": 0.5608, "step": 93090 }, { "epoch": 0.93, "learning_rate": 5.5113157894736835e-05, "loss": 0.5682, "step": 93100 }, { "epoch": 0.93, "learning_rate": 5.504210526315789e-05, "loss": 0.5697, "step": 93110 }, { "epoch": 0.93, "learning_rate": 5.496315789473683e-05, "loss": 0.567, "step": 93120 }, { "epoch": 0.93, "learning_rate": 5.4884210526315786e-05, "loss": 0.5714, "step": 93130 }, { "epoch": 0.93, "learning_rate": 5.480526315789473e-05, "loss": 0.5616, "step": 93140 }, { "epoch": 0.93, "learning_rate": 5.472631578947368e-05, "loss": 0.5684, "step": 93150 }, { "epoch": 0.93, "learning_rate": 5.4647368421052625e-05, "loss": 0.559, "step": 93160 }, { "epoch": 0.93, "learning_rate": 5.456842105263158e-05, "loss": 0.5566, "step": 93170 }, { "epoch": 0.93, "learning_rate": 5.448947368421052e-05, "loss": 0.5645, "step": 93180 }, { "epoch": 0.93, "learning_rate": 5.441052631578946e-05, "loss": 0.5635, "step": 93190 }, { "epoch": 0.93, "learning_rate": 5.433157894736842e-05, "loss": 0.5472, "step": 93200 }, { "epoch": 0.93, "learning_rate": 5.425263157894736e-05, "loss": 0.5437, "step": 93210 }, { "epoch": 0.93, "learning_rate": 5.4173684210526314e-05, "loss": 0.5467, "step": 93220 }, { "epoch": 0.93, "learning_rate": 5.4094736842105256e-05, "loss": 0.5473, "step": 93230 }, { "epoch": 0.93, "learning_rate": 5.401578947368421e-05, "loss": 0.5332, "step": 93240 }, { "epoch": 0.93, "learning_rate": 5.393684210526315e-05, "loss": 0.5336, "step": 93250 }, { "epoch": 0.93, "learning_rate": 5.3857894736842094e-05, "loss": 0.5374, "step": 93260 }, { "epoch": 0.93, "learning_rate": 5.377894736842105e-05, "loss": 0.5484, "step": 93270 }, { "epoch": 0.93, "learning_rate": 5.369999999999999e-05, "loss": 0.5483, "step": 93280 }, { "epoch": 0.93, "learning_rate": 5.3621052631578946e-05, "loss": 0.5549, "step": 93290 }, { "epoch": 0.93, "learning_rate": 5.354210526315789e-05, "loss": 0.5658, "step": 93300 }, { "epoch": 0.93, "learning_rate": 5.346315789473684e-05, "loss": 0.5748, "step": 93310 }, { "epoch": 0.93, "learning_rate": 5.3384210526315784e-05, "loss": 0.5751, "step": 93320 }, { "epoch": 0.93, "learning_rate": 5.3305263157894725e-05, "loss": 0.5793, "step": 93330 }, { "epoch": 0.93, "learning_rate": 5.322631578947368e-05, "loss": 0.5721, "step": 93340 }, { "epoch": 0.93, "learning_rate": 5.314736842105262e-05, "loss": 0.576, "step": 93350 }, { "epoch": 0.93, "learning_rate": 5.306842105263158e-05, "loss": 0.5655, "step": 93360 }, { "epoch": 0.93, "learning_rate": 5.298947368421052e-05, "loss": 0.5695, "step": 93370 }, { "epoch": 0.93, "learning_rate": 5.2910526315789474e-05, "loss": 0.5617, "step": 93380 }, { "epoch": 0.93, "learning_rate": 5.2831578947368415e-05, "loss": 0.5572, "step": 93390 }, { "epoch": 0.93, "learning_rate": 5.2752631578947364e-05, "loss": 0.5574, "step": 93400 }, { "epoch": 0.93, "learning_rate": 5.267368421052631e-05, "loss": 0.5526, "step": 93410 }, { "epoch": 0.93, "learning_rate": 5.259473684210525e-05, "loss": 0.5534, "step": 93420 }, { "epoch": 0.93, "learning_rate": 5.251578947368421e-05, "loss": 0.551, "step": 93430 }, { "epoch": 0.93, "learning_rate": 5.243684210526315e-05, "loss": 0.5488, "step": 93440 }, { "epoch": 0.93, "learning_rate": 5.2357894736842105e-05, "loss": 0.5515, "step": 93450 }, { "epoch": 0.93, "learning_rate": 5.2278947368421047e-05, "loss": 0.5414, "step": 93460 }, { "epoch": 0.93, "learning_rate": 5.2199999999999995e-05, "loss": 0.5343, "step": 93470 }, { "epoch": 0.93, "learning_rate": 5.212105263157894e-05, "loss": 0.5388, "step": 93480 }, { "epoch": 0.93, "learning_rate": 5.2042105263157885e-05, "loss": 0.5437, "step": 93490 }, { "epoch": 0.94, "learning_rate": 5.196315789473684e-05, "loss": 0.5417, "step": 93500 }, { "epoch": 0.94, "learning_rate": 5.188421052631578e-05, "loss": 0.5515, "step": 93510 }, { "epoch": 0.94, "learning_rate": 5.1805263157894736e-05, "loss": 0.5587, "step": 93520 }, { "epoch": 0.94, "learning_rate": 5.172631578947368e-05, "loss": 0.5602, "step": 93530 }, { "epoch": 0.94, "learning_rate": 5.1647368421052626e-05, "loss": 0.5655, "step": 93540 }, { "epoch": 0.94, "learning_rate": 5.1568421052631575e-05, "loss": 0.5536, "step": 93550 }, { "epoch": 0.94, "learning_rate": 5.1489473684210516e-05, "loss": 0.5767, "step": 93560 }, { "epoch": 0.94, "learning_rate": 5.141052631578947e-05, "loss": 0.5697, "step": 93570 }, { "epoch": 0.94, "learning_rate": 5.133157894736841e-05, "loss": 0.5656, "step": 93580 }, { "epoch": 0.94, "learning_rate": 5.125263157894737e-05, "loss": 0.5627, "step": 93590 }, { "epoch": 0.94, "learning_rate": 5.117368421052631e-05, "loss": 0.5727, "step": 93600 }, { "epoch": 0.94, "learning_rate": 5.109473684210526e-05, "loss": 0.5581, "step": 93610 }, { "epoch": 0.94, "learning_rate": 5.1015789473684206e-05, "loss": 0.5502, "step": 93620 }, { "epoch": 0.94, "learning_rate": 5.0936842105263154e-05, "loss": 0.5505, "step": 93630 }, { "epoch": 0.94, "learning_rate": 5.08578947368421e-05, "loss": 0.5572, "step": 93640 }, { "epoch": 0.94, "learning_rate": 5.0778947368421044e-05, "loss": 0.5467, "step": 93650 }, { "epoch": 0.94, "learning_rate": 5.07e-05, "loss": 0.5507, "step": 93660 }, { "epoch": 0.94, "learning_rate": 5.062105263157894e-05, "loss": 0.5508, "step": 93670 }, { "epoch": 0.94, "learning_rate": 5.054210526315789e-05, "loss": 0.5531, "step": 93680 }, { "epoch": 0.94, "learning_rate": 5.046315789473684e-05, "loss": 0.5446, "step": 93690 }, { "epoch": 0.94, "learning_rate": 5.0384210526315786e-05, "loss": 0.5452, "step": 93700 }, { "epoch": 0.94, "learning_rate": 5.0305263157894734e-05, "loss": 0.5435, "step": 93710 }, { "epoch": 0.94, "learning_rate": 5.0226315789473675e-05, "loss": 0.5439, "step": 93720 }, { "epoch": 0.94, "learning_rate": 5.014736842105263e-05, "loss": 0.5391, "step": 93730 }, { "epoch": 0.94, "learning_rate": 5.006842105263157e-05, "loss": 0.5426, "step": 93740 }, { "epoch": 0.94, "learning_rate": 4.998947368421052e-05, "loss": 0.5356, "step": 93750 }, { "epoch": 0.94, "learning_rate": 4.991052631578947e-05, "loss": 0.5665, "step": 93760 }, { "epoch": 0.94, "learning_rate": 4.983157894736842e-05, "loss": 0.5653, "step": 93770 }, { "epoch": 0.94, "learning_rate": 4.9752631578947365e-05, "loss": 0.5651, "step": 93780 }, { "epoch": 0.94, "learning_rate": 4.967368421052631e-05, "loss": 0.5734, "step": 93790 }, { "epoch": 0.94, "learning_rate": 4.959473684210526e-05, "loss": 0.5607, "step": 93800 }, { "epoch": 0.94, "learning_rate": 4.95157894736842e-05, "loss": 0.5729, "step": 93810 }, { "epoch": 0.94, "learning_rate": 4.943684210526315e-05, "loss": 0.5705, "step": 93820 }, { "epoch": 0.94, "learning_rate": 4.93578947368421e-05, "loss": 0.564, "step": 93830 }, { "epoch": 0.94, "learning_rate": 4.927894736842105e-05, "loss": 0.5598, "step": 93840 }, { "epoch": 0.94, "learning_rate": 4.9199999999999997e-05, "loss": 0.551, "step": 93850 }, { "epoch": 0.94, "learning_rate": 4.9121052631578945e-05, "loss": 0.5414, "step": 93860 }, { "epoch": 0.94, "learning_rate": 4.904210526315789e-05, "loss": 0.5508, "step": 93870 }, { "epoch": 0.94, "learning_rate": 4.8963157894736835e-05, "loss": 0.5563, "step": 93880 }, { "epoch": 0.94, "learning_rate": 4.888421052631578e-05, "loss": 0.5493, "step": 93890 }, { "epoch": 0.94, "learning_rate": 4.880526315789473e-05, "loss": 0.5347, "step": 93900 }, { "epoch": 0.94, "learning_rate": 4.872631578947368e-05, "loss": 0.5435, "step": 93910 }, { "epoch": 0.94, "learning_rate": 4.864736842105263e-05, "loss": 0.5397, "step": 93920 }, { "epoch": 0.94, "learning_rate": 4.8568421052631576e-05, "loss": 0.5361, "step": 93930 }, { "epoch": 0.94, "learning_rate": 4.8489473684210524e-05, "loss": 0.533, "step": 93940 }, { "epoch": 0.94, "learning_rate": 4.8410526315789466e-05, "loss": 0.5266, "step": 93950 }, { "epoch": 0.94, "learning_rate": 4.8331578947368414e-05, "loss": 0.5381, "step": 93960 }, { "epoch": 0.94, "learning_rate": 4.825263157894736e-05, "loss": 0.5453, "step": 93970 }, { "epoch": 0.94, "learning_rate": 4.817368421052631e-05, "loss": 0.5353, "step": 93980 }, { "epoch": 0.94, "learning_rate": 4.809473684210526e-05, "loss": 0.5374, "step": 93990 }, { "epoch": 0.94, "learning_rate": 4.801578947368421e-05, "loss": 0.5461, "step": 94000 }, { "epoch": 0.94, "learning_rate": 4.7936842105263156e-05, "loss": 0.5481, "step": 94010 }, { "epoch": 0.94, "learning_rate": 4.78578947368421e-05, "loss": 0.5603, "step": 94020 }, { "epoch": 0.94, "learning_rate": 4.7778947368421046e-05, "loss": 0.5625, "step": 94030 }, { "epoch": 0.94, "learning_rate": 4.7699999999999994e-05, "loss": 0.568, "step": 94040 }, { "epoch": 0.94, "learning_rate": 4.762105263157894e-05, "loss": 0.5599, "step": 94050 }, { "epoch": 0.94, "learning_rate": 4.754210526315789e-05, "loss": 0.5612, "step": 94060 }, { "epoch": 0.94, "learning_rate": 4.746315789473684e-05, "loss": 0.5454, "step": 94070 }, { "epoch": 0.94, "learning_rate": 4.738421052631579e-05, "loss": 0.5487, "step": 94080 }, { "epoch": 0.94, "learning_rate": 4.7305263157894735e-05, "loss": 0.5448, "step": 94090 }, { "epoch": 0.94, "learning_rate": 4.722631578947368e-05, "loss": 0.5491, "step": 94100 }, { "epoch": 0.94, "learning_rate": 4.715526315789473e-05, "loss": 0.5526, "step": 94110 }, { "epoch": 0.94, "learning_rate": 4.707631578947369e-05, "loss": 0.5447, "step": 94120 }, { "epoch": 0.94, "learning_rate": 4.699736842105263e-05, "loss": 0.5488, "step": 94130 }, { "epoch": 0.94, "learning_rate": 4.691842105263157e-05, "loss": 0.5453, "step": 94140 }, { "epoch": 0.94, "learning_rate": 4.6839473684210525e-05, "loss": 0.5392, "step": 94150 }, { "epoch": 0.94, "learning_rate": 4.6760526315789467e-05, "loss": 0.5412, "step": 94160 }, { "epoch": 0.94, "learning_rate": 4.668157894736842e-05, "loss": 0.5335, "step": 94170 }, { "epoch": 0.94, "learning_rate": 4.660263157894736e-05, "loss": 0.5324, "step": 94180 }, { "epoch": 0.94, "learning_rate": 4.652368421052632e-05, "loss": 0.5441, "step": 94190 }, { "epoch": 0.94, "learning_rate": 4.644473684210526e-05, "loss": 0.5295, "step": 94200 }, { "epoch": 0.94, "learning_rate": 4.63657894736842e-05, "loss": 0.5475, "step": 94210 }, { "epoch": 0.94, "learning_rate": 4.6286842105263156e-05, "loss": 0.5501, "step": 94220 }, { "epoch": 0.94, "learning_rate": 4.62078947368421e-05, "loss": 0.567, "step": 94230 }, { "epoch": 0.94, "learning_rate": 4.612894736842105e-05, "loss": 0.5567, "step": 94240 }, { "epoch": 0.94, "learning_rate": 4.6049999999999994e-05, "loss": 0.5674, "step": 94250 }, { "epoch": 0.94, "learning_rate": 4.597105263157895e-05, "loss": 0.5685, "step": 94260 }, { "epoch": 0.94, "learning_rate": 4.589210526315789e-05, "loss": 0.5618, "step": 94270 }, { "epoch": 0.94, "learning_rate": 4.581315789473683e-05, "loss": 0.5669, "step": 94280 }, { "epoch": 0.94, "learning_rate": 4.573421052631579e-05, "loss": 0.5627, "step": 94290 }, { "epoch": 0.94, "learning_rate": 4.565526315789473e-05, "loss": 0.5577, "step": 94300 }, { "epoch": 0.94, "learning_rate": 4.5576315789473684e-05, "loss": 0.5517, "step": 94310 }, { "epoch": 0.94, "learning_rate": 4.5497368421052626e-05, "loss": 0.5455, "step": 94320 }, { "epoch": 0.94, "learning_rate": 4.541842105263158e-05, "loss": 0.5478, "step": 94330 }, { "epoch": 0.94, "learning_rate": 4.533947368421052e-05, "loss": 0.5443, "step": 94340 }, { "epoch": 0.94, "learning_rate": 4.5260526315789464e-05, "loss": 0.5539, "step": 94350 }, { "epoch": 0.94, "learning_rate": 4.518157894736842e-05, "loss": 0.5513, "step": 94360 }, { "epoch": 0.94, "learning_rate": 4.510263157894736e-05, "loss": 0.5428, "step": 94370 }, { "epoch": 0.94, "learning_rate": 4.5023684210526316e-05, "loss": 0.5497, "step": 94380 }, { "epoch": 0.94, "learning_rate": 4.494473684210526e-05, "loss": 0.5517, "step": 94390 }, { "epoch": 0.94, "learning_rate": 4.486578947368421e-05, "loss": 0.5433, "step": 94400 }, { "epoch": 0.94, "learning_rate": 4.4786842105263154e-05, "loss": 0.5405, "step": 94410 }, { "epoch": 0.94, "learning_rate": 4.4707894736842095e-05, "loss": 0.5429, "step": 94420 }, { "epoch": 0.94, "learning_rate": 4.462894736842105e-05, "loss": 0.5556, "step": 94430 }, { "epoch": 0.94, "learning_rate": 4.454999999999999e-05, "loss": 0.5511, "step": 94440 }, { "epoch": 0.94, "learning_rate": 4.447105263157895e-05, "loss": 0.5604, "step": 94450 }, { "epoch": 0.94, "learning_rate": 4.439210526315789e-05, "loss": 0.5699, "step": 94460 }, { "epoch": 0.94, "learning_rate": 4.4313157894736844e-05, "loss": 0.5701, "step": 94470 }, { "epoch": 0.94, "learning_rate": 4.4234210526315785e-05, "loss": 0.5673, "step": 94480 }, { "epoch": 0.94, "learning_rate": 4.415526315789473e-05, "loss": 0.5775, "step": 94490 }, { "epoch": 0.94, "learning_rate": 4.407631578947368e-05, "loss": 0.5747, "step": 94500 }, { "epoch": 0.95, "learning_rate": 4.399736842105262e-05, "loss": 0.5858, "step": 94510 }, { "epoch": 0.95, "learning_rate": 4.391842105263158e-05, "loss": 0.5853, "step": 94520 }, { "epoch": 0.95, "learning_rate": 4.383947368421052e-05, "loss": 0.5728, "step": 94530 }, { "epoch": 0.95, "learning_rate": 4.3760526315789475e-05, "loss": 0.5671, "step": 94540 }, { "epoch": 0.95, "learning_rate": 4.3681578947368416e-05, "loss": 0.5611, "step": 94550 }, { "epoch": 0.95, "learning_rate": 4.360263157894736e-05, "loss": 0.5633, "step": 94560 }, { "epoch": 0.95, "learning_rate": 4.352368421052631e-05, "loss": 0.5577, "step": 94570 }, { "epoch": 0.95, "learning_rate": 4.3444736842105255e-05, "loss": 0.5653, "step": 94580 }, { "epoch": 0.95, "learning_rate": 4.336578947368421e-05, "loss": 0.5482, "step": 94590 }, { "epoch": 0.95, "learning_rate": 4.328684210526315e-05, "loss": 0.5551, "step": 94600 }, { "epoch": 0.95, "learning_rate": 4.3207894736842106e-05, "loss": 0.5538, "step": 94610 }, { "epoch": 0.95, "learning_rate": 4.312894736842105e-05, "loss": 0.5382, "step": 94620 }, { "epoch": 0.95, "learning_rate": 4.3049999999999996e-05, "loss": 0.5542, "step": 94630 }, { "epoch": 0.95, "learning_rate": 4.2971052631578944e-05, "loss": 0.5521, "step": 94640 }, { "epoch": 0.95, "learning_rate": 4.2892105263157886e-05, "loss": 0.5469, "step": 94650 }, { "epoch": 0.95, "learning_rate": 4.281315789473684e-05, "loss": 0.5476, "step": 94660 }, { "epoch": 0.95, "learning_rate": 4.273421052631578e-05, "loss": 0.5429, "step": 94670 }, { "epoch": 0.95, "learning_rate": 4.265526315789474e-05, "loss": 0.5527, "step": 94680 }, { "epoch": 0.95, "learning_rate": 4.257631578947368e-05, "loss": 0.5632, "step": 94690 }, { "epoch": 0.95, "learning_rate": 4.249736842105263e-05, "loss": 0.5661, "step": 94700 }, { "epoch": 0.95, "learning_rate": 4.2418421052631576e-05, "loss": 0.5854, "step": 94710 }, { "epoch": 0.95, "learning_rate": 4.233947368421052e-05, "loss": 0.5721, "step": 94720 }, { "epoch": 0.95, "learning_rate": 4.226052631578947e-05, "loss": 0.5783, "step": 94730 }, { "epoch": 0.95, "learning_rate": 4.2181578947368414e-05, "loss": 0.5672, "step": 94740 }, { "epoch": 0.95, "learning_rate": 4.210263157894737e-05, "loss": 0.572, "step": 94750 }, { "epoch": 0.95, "learning_rate": 4.202368421052631e-05, "loss": 0.5643, "step": 94760 }, { "epoch": 0.95, "learning_rate": 4.194473684210526e-05, "loss": 0.5759, "step": 94770 }, { "epoch": 0.95, "learning_rate": 4.186578947368421e-05, "loss": 0.5706, "step": 94780 }, { "epoch": 0.95, "learning_rate": 4.178684210526315e-05, "loss": 0.5665, "step": 94790 }, { "epoch": 0.95, "learning_rate": 4.1707894736842104e-05, "loss": 0.5499, "step": 94800 }, { "epoch": 0.95, "learning_rate": 4.1628947368421045e-05, "loss": 0.548, "step": 94810 }, { "epoch": 0.95, "learning_rate": 4.155e-05, "loss": 0.5503, "step": 94820 }, { "epoch": 0.95, "learning_rate": 4.147105263157894e-05, "loss": 0.558, "step": 94830 }, { "epoch": 0.95, "learning_rate": 4.139210526315789e-05, "loss": 0.5583, "step": 94840 }, { "epoch": 0.95, "learning_rate": 4.131315789473684e-05, "loss": 0.548, "step": 94850 }, { "epoch": 0.95, "learning_rate": 4.123421052631579e-05, "loss": 0.5449, "step": 94860 }, { "epoch": 0.95, "learning_rate": 4.1155263157894735e-05, "loss": 0.5324, "step": 94870 }, { "epoch": 0.95, "learning_rate": 4.1076315789473677e-05, "loss": 0.5373, "step": 94880 }, { "epoch": 0.95, "learning_rate": 4.099736842105263e-05, "loss": 0.5574, "step": 94890 }, { "epoch": 0.95, "learning_rate": 4.091842105263157e-05, "loss": 0.5446, "step": 94900 }, { "epoch": 0.95, "learning_rate": 4.083947368421052e-05, "loss": 0.5565, "step": 94910 }, { "epoch": 0.95, "learning_rate": 4.076052631578947e-05, "loss": 0.5674, "step": 94920 }, { "epoch": 0.95, "learning_rate": 4.068157894736842e-05, "loss": 0.5634, "step": 94930 }, { "epoch": 0.95, "learning_rate": 4.0602631578947366e-05, "loss": 0.5778, "step": 94940 }, { "epoch": 0.95, "learning_rate": 4.052368421052631e-05, "loss": 0.5761, "step": 94950 }, { "epoch": 0.95, "learning_rate": 4.044473684210526e-05, "loss": 0.571, "step": 94960 }, { "epoch": 0.95, "learning_rate": 4.0365789473684205e-05, "loss": 0.5681, "step": 94970 }, { "epoch": 0.95, "learning_rate": 4.028684210526315e-05, "loss": 0.5841, "step": 94980 }, { "epoch": 0.95, "learning_rate": 4.02078947368421e-05, "loss": 0.5761, "step": 94990 }, { "epoch": 0.95, "learning_rate": 4.012894736842105e-05, "loss": 0.5829, "step": 95000 }, { "epoch": 0.95, "eval_accuracy": 0.8839367508449035, "eval_loss": 0.52685546875, "eval_runtime": 98.3189, "eval_samples_per_second": 813.679, "eval_steps_per_second": 1.597, "step": 95000 }, { "epoch": 0.95, "learning_rate": 4.005e-05, "loss": 0.5568, "step": 95010 }, { "epoch": 0.95, "learning_rate": 3.997105263157894e-05, "loss": 0.5498, "step": 95020 }, { "epoch": 0.95, "learning_rate": 3.9892105263157894e-05, "loss": 0.5406, "step": 95030 }, { "epoch": 0.95, "learning_rate": 3.9813157894736836e-05, "loss": 0.5532, "step": 95040 }, { "epoch": 0.95, "learning_rate": 3.9734210526315784e-05, "loss": 0.5551, "step": 95050 }, { "epoch": 0.95, "learning_rate": 3.965526315789473e-05, "loss": 0.5595, "step": 95060 }, { "epoch": 0.95, "learning_rate": 3.957631578947368e-05, "loss": 0.5469, "step": 95070 }, { "epoch": 0.95, "learning_rate": 3.949736842105263e-05, "loss": 0.5454, "step": 95080 }, { "epoch": 0.95, "learning_rate": 3.941842105263158e-05, "loss": 0.5446, "step": 95090 }, { "epoch": 0.95, "learning_rate": 3.9339473684210526e-05, "loss": 0.535, "step": 95100 }, { "epoch": 0.95, "learning_rate": 3.9268421052631574e-05, "loss": 0.536, "step": 95110 }, { "epoch": 0.95, "learning_rate": 3.918947368421053e-05, "loss": 0.5379, "step": 95120 }, { "epoch": 0.95, "learning_rate": 3.911052631578947e-05, "loss": 0.5307, "step": 95130 }, { "epoch": 0.95, "learning_rate": 3.903157894736841e-05, "loss": 0.5353, "step": 95140 }, { "epoch": 0.95, "learning_rate": 3.895263157894737e-05, "loss": 0.5446, "step": 95150 }, { "epoch": 0.95, "learning_rate": 3.887368421052631e-05, "loss": 0.5689, "step": 95160 }, { "epoch": 0.95, "learning_rate": 3.8794736842105264e-05, "loss": 0.568, "step": 95170 }, { "epoch": 0.95, "learning_rate": 3.8715789473684205e-05, "loss": 0.5719, "step": 95180 }, { "epoch": 0.95, "learning_rate": 3.863684210526316e-05, "loss": 0.5674, "step": 95190 }, { "epoch": 0.95, "learning_rate": 3.85578947368421e-05, "loss": 0.5561, "step": 95200 }, { "epoch": 0.95, "learning_rate": 3.847894736842104e-05, "loss": 0.5712, "step": 95210 }, { "epoch": 0.95, "learning_rate": 3.84e-05, "loss": 0.5603, "step": 95220 }, { "epoch": 0.95, "learning_rate": 3.832105263157894e-05, "loss": 0.5641, "step": 95230 }, { "epoch": 0.95, "learning_rate": 3.8242105263157895e-05, "loss": 0.5608, "step": 95240 }, { "epoch": 0.95, "learning_rate": 3.8163157894736836e-05, "loss": 0.5598, "step": 95250 }, { "epoch": 0.95, "learning_rate": 3.808421052631579e-05, "loss": 0.5671, "step": 95260 }, { "epoch": 0.95, "learning_rate": 3.800526315789473e-05, "loss": 0.5663, "step": 95270 }, { "epoch": 0.95, "learning_rate": 3.7926315789473675e-05, "loss": 0.5518, "step": 95280 }, { "epoch": 0.95, "learning_rate": 3.784736842105263e-05, "loss": 0.5732, "step": 95290 }, { "epoch": 0.95, "learning_rate": 3.776842105263157e-05, "loss": 0.5565, "step": 95300 }, { "epoch": 0.95, "learning_rate": 3.7689473684210526e-05, "loss": 0.5531, "step": 95310 }, { "epoch": 0.95, "learning_rate": 3.761052631578947e-05, "loss": 0.5512, "step": 95320 }, { "epoch": 0.95, "learning_rate": 3.753157894736842e-05, "loss": 0.5591, "step": 95330 }, { "epoch": 0.95, "learning_rate": 3.7452631578947364e-05, "loss": 0.553, "step": 95340 }, { "epoch": 0.95, "learning_rate": 3.737368421052631e-05, "loss": 0.5402, "step": 95350 }, { "epoch": 0.95, "learning_rate": 3.729473684210526e-05, "loss": 0.5408, "step": 95360 }, { "epoch": 0.95, "learning_rate": 3.721578947368421e-05, "loss": 0.5403, "step": 95370 }, { "epoch": 0.95, "learning_rate": 3.713684210526316e-05, "loss": 0.5423, "step": 95380 }, { "epoch": 0.95, "learning_rate": 3.70578947368421e-05, "loss": 0.5352, "step": 95390 }, { "epoch": 0.95, "learning_rate": 3.697894736842105e-05, "loss": 0.5481, "step": 95400 }, { "epoch": 0.95, "learning_rate": 3.6899999999999996e-05, "loss": 0.5533, "step": 95410 }, { "epoch": 0.95, "learning_rate": 3.6821052631578944e-05, "loss": 0.548, "step": 95420 }, { "epoch": 0.95, "learning_rate": 3.674210526315789e-05, "loss": 0.5597, "step": 95430 }, { "epoch": 0.95, "learning_rate": 3.666315789473684e-05, "loss": 0.5572, "step": 95440 }, { "epoch": 0.95, "learning_rate": 3.658421052631579e-05, "loss": 0.5442, "step": 95450 }, { "epoch": 0.95, "learning_rate": 3.650526315789473e-05, "loss": 0.554, "step": 95460 }, { "epoch": 0.95, "learning_rate": 3.642631578947368e-05, "loss": 0.5457, "step": 95470 }, { "epoch": 0.95, "learning_rate": 3.634736842105263e-05, "loss": 0.5552, "step": 95480 }, { "epoch": 0.95, "learning_rate": 3.6268421052631575e-05, "loss": 0.5455, "step": 95490 }, { "epoch": 0.95, "learning_rate": 3.6189473684210524e-05, "loss": 0.5406, "step": 95500 }, { "epoch": 0.96, "learning_rate": 3.611052631578947e-05, "loss": 0.5361, "step": 95510 }, { "epoch": 0.96, "learning_rate": 3.603157894736842e-05, "loss": 0.5373, "step": 95520 }, { "epoch": 0.96, "learning_rate": 3.595263157894736e-05, "loss": 0.5197, "step": 95530 }, { "epoch": 0.96, "learning_rate": 3.587368421052631e-05, "loss": 0.5248, "step": 95540 }, { "epoch": 0.96, "learning_rate": 3.579473684210526e-05, "loss": 0.5302, "step": 95550 }, { "epoch": 0.96, "learning_rate": 3.571578947368421e-05, "loss": 0.5375, "step": 95560 }, { "epoch": 0.96, "learning_rate": 3.5636842105263155e-05, "loss": 0.5219, "step": 95570 }, { "epoch": 0.96, "learning_rate": 3.55578947368421e-05, "loss": 0.5273, "step": 95580 }, { "epoch": 0.96, "learning_rate": 3.547894736842105e-05, "loss": 0.5231, "step": 95590 }, { "epoch": 0.96, "learning_rate": 3.539999999999999e-05, "loss": 0.5261, "step": 95600 }, { "epoch": 0.96, "learning_rate": 3.532105263157894e-05, "loss": 0.5414, "step": 95610 }, { "epoch": 0.96, "learning_rate": 3.524210526315789e-05, "loss": 0.539, "step": 95620 }, { "epoch": 0.96, "learning_rate": 3.516315789473684e-05, "loss": 0.5431, "step": 95630 }, { "epoch": 0.96, "learning_rate": 3.5084210526315786e-05, "loss": 0.5294, "step": 95640 }, { "epoch": 0.96, "learning_rate": 3.5005263157894735e-05, "loss": 0.5412, "step": 95650 }, { "epoch": 0.96, "learning_rate": 3.492631578947368e-05, "loss": 0.5516, "step": 95660 }, { "epoch": 0.96, "learning_rate": 3.4847368421052624e-05, "loss": 0.5552, "step": 95670 }, { "epoch": 0.96, "learning_rate": 3.476842105263157e-05, "loss": 0.55, "step": 95680 }, { "epoch": 0.96, "learning_rate": 3.468947368421052e-05, "loss": 0.5607, "step": 95690 }, { "epoch": 0.96, "learning_rate": 3.461052631578947e-05, "loss": 0.5547, "step": 95700 }, { "epoch": 0.96, "learning_rate": 3.453157894736842e-05, "loss": 0.5573, "step": 95710 }, { "epoch": 0.96, "learning_rate": 3.4452631578947366e-05, "loss": 0.5617, "step": 95720 }, { "epoch": 0.96, "learning_rate": 3.4373684210526314e-05, "loss": 0.565, "step": 95730 }, { "epoch": 0.96, "learning_rate": 3.4294736842105256e-05, "loss": 0.5638, "step": 95740 }, { "epoch": 0.96, "learning_rate": 3.4215789473684204e-05, "loss": 0.5517, "step": 95750 }, { "epoch": 0.96, "learning_rate": 3.413684210526315e-05, "loss": 0.5676, "step": 95760 }, { "epoch": 0.96, "learning_rate": 3.40578947368421e-05, "loss": 0.5414, "step": 95770 }, { "epoch": 0.96, "learning_rate": 3.397894736842105e-05, "loss": 0.5531, "step": 95780 }, { "epoch": 0.96, "learning_rate": 3.39e-05, "loss": 0.5515, "step": 95790 }, { "epoch": 0.96, "learning_rate": 3.3821052631578946e-05, "loss": 0.5547, "step": 95800 }, { "epoch": 0.96, "learning_rate": 3.374210526315789e-05, "loss": 0.5481, "step": 95810 }, { "epoch": 0.96, "learning_rate": 3.3663157894736835e-05, "loss": 0.5464, "step": 95820 }, { "epoch": 0.96, "learning_rate": 3.3584210526315784e-05, "loss": 0.5296, "step": 95830 }, { "epoch": 0.96, "learning_rate": 3.350526315789473e-05, "loss": 0.5566, "step": 95840 }, { "epoch": 0.96, "learning_rate": 3.342631578947368e-05, "loss": 0.5197, "step": 95850 }, { "epoch": 0.96, "learning_rate": 3.334736842105263e-05, "loss": 0.5252, "step": 95860 }, { "epoch": 0.96, "learning_rate": 3.326842105263158e-05, "loss": 0.5438, "step": 95870 }, { "epoch": 0.96, "learning_rate": 3.318947368421052e-05, "loss": 0.5394, "step": 95880 }, { "epoch": 0.96, "learning_rate": 3.311052631578947e-05, "loss": 0.5398, "step": 95890 }, { "epoch": 0.96, "learning_rate": 3.3031578947368415e-05, "loss": 0.5438, "step": 95900 }, { "epoch": 0.96, "learning_rate": 3.295263157894736e-05, "loss": 0.5639, "step": 95910 }, { "epoch": 0.96, "learning_rate": 3.287368421052631e-05, "loss": 0.5493, "step": 95920 }, { "epoch": 0.96, "learning_rate": 3.279473684210526e-05, "loss": 0.5642, "step": 95930 }, { "epoch": 0.96, "learning_rate": 3.271578947368421e-05, "loss": 0.5573, "step": 95940 }, { "epoch": 0.96, "learning_rate": 3.2636842105263157e-05, "loss": 0.5567, "step": 95950 }, { "epoch": 0.96, "learning_rate": 3.25578947368421e-05, "loss": 0.5554, "step": 95960 }, { "epoch": 0.96, "learning_rate": 3.2478947368421046e-05, "loss": 0.5513, "step": 95970 }, { "epoch": 0.96, "learning_rate": 3.2399999999999995e-05, "loss": 0.5468, "step": 95980 }, { "epoch": 0.96, "learning_rate": 3.232105263157894e-05, "loss": 0.5452, "step": 95990 }, { "epoch": 0.96, "learning_rate": 3.224210526315789e-05, "loss": 0.5277, "step": 96000 }, { "epoch": 0.96, "learning_rate": 3.216315789473684e-05, "loss": 0.5449, "step": 96010 }, { "epoch": 0.96, "learning_rate": 3.208421052631579e-05, "loss": 0.5374, "step": 96020 }, { "epoch": 0.96, "learning_rate": 3.200526315789473e-05, "loss": 0.5532, "step": 96030 }, { "epoch": 0.96, "learning_rate": 3.192631578947368e-05, "loss": 0.5255, "step": 96040 }, { "epoch": 0.96, "learning_rate": 3.1847368421052626e-05, "loss": 0.5336, "step": 96050 }, { "epoch": 0.96, "learning_rate": 3.1768421052631574e-05, "loss": 0.5327, "step": 96060 }, { "epoch": 0.96, "learning_rate": 3.168947368421052e-05, "loss": 0.53, "step": 96070 }, { "epoch": 0.96, "learning_rate": 3.161052631578947e-05, "loss": 0.5198, "step": 96080 }, { "epoch": 0.96, "learning_rate": 3.153157894736842e-05, "loss": 0.5226, "step": 96090 }, { "epoch": 0.96, "learning_rate": 3.145263157894736e-05, "loss": 0.5224, "step": 96100 }, { "epoch": 0.96, "learning_rate": 3.138157894736842e-05, "loss": 0.5325, "step": 96110 }, { "epoch": 0.96, "learning_rate": 3.1302631578947364e-05, "loss": 0.5368, "step": 96120 }, { "epoch": 0.96, "learning_rate": 3.122368421052631e-05, "loss": 0.5431, "step": 96130 }, { "epoch": 0.96, "learning_rate": 3.114473684210526e-05, "loss": 0.5476, "step": 96140 }, { "epoch": 0.96, "learning_rate": 3.106578947368421e-05, "loss": 0.5619, "step": 96150 }, { "epoch": 0.96, "learning_rate": 3.098684210526316e-05, "loss": 0.5504, "step": 96160 }, { "epoch": 0.96, "learning_rate": 3.0907894736842105e-05, "loss": 0.5671, "step": 96170 }, { "epoch": 0.96, "learning_rate": 3.0828947368421054e-05, "loss": 0.5477, "step": 96180 }, { "epoch": 0.96, "learning_rate": 3.0749999999999995e-05, "loss": 0.5622, "step": 96190 }, { "epoch": 0.96, "learning_rate": 3.0671052631578944e-05, "loss": 0.5505, "step": 96200 }, { "epoch": 0.96, "learning_rate": 3.059210526315789e-05, "loss": 0.5486, "step": 96210 }, { "epoch": 0.96, "learning_rate": 3.051315789473684e-05, "loss": 0.5309, "step": 96220 }, { "epoch": 0.96, "learning_rate": 3.043421052631579e-05, "loss": 0.5347, "step": 96230 }, { "epoch": 0.96, "learning_rate": 3.0355263157894737e-05, "loss": 0.5366, "step": 96240 }, { "epoch": 0.96, "learning_rate": 3.0276315789473682e-05, "loss": 0.5322, "step": 96250 }, { "epoch": 0.96, "learning_rate": 3.0197368421052627e-05, "loss": 0.5508, "step": 96260 }, { "epoch": 0.96, "learning_rate": 3.0118421052631575e-05, "loss": 0.5386, "step": 96270 }, { "epoch": 0.96, "learning_rate": 3.0039473684210523e-05, "loss": 0.5458, "step": 96280 }, { "epoch": 0.96, "learning_rate": 2.996052631578947e-05, "loss": 0.5432, "step": 96290 }, { "epoch": 0.96, "learning_rate": 2.988157894736842e-05, "loss": 0.5317, "step": 96300 }, { "epoch": 0.96, "learning_rate": 2.9802631578947368e-05, "loss": 0.5331, "step": 96310 }, { "epoch": 0.96, "learning_rate": 2.9723684210526316e-05, "loss": 0.5273, "step": 96320 }, { "epoch": 0.96, "learning_rate": 2.9644736842105258e-05, "loss": 0.5326, "step": 96330 }, { "epoch": 0.96, "learning_rate": 2.9565789473684206e-05, "loss": 0.5359, "step": 96340 }, { "epoch": 0.96, "learning_rate": 2.9486842105263155e-05, "loss": 0.5455, "step": 96350 }, { "epoch": 0.96, "learning_rate": 2.9407894736842103e-05, "loss": 0.5487, "step": 96360 }, { "epoch": 0.96, "learning_rate": 2.932894736842105e-05, "loss": 0.5657, "step": 96370 }, { "epoch": 0.96, "learning_rate": 2.925e-05, "loss": 0.5638, "step": 96380 }, { "epoch": 0.96, "learning_rate": 2.9171052631578948e-05, "loss": 0.5571, "step": 96390 }, { "epoch": 0.96, "learning_rate": 2.909210526315789e-05, "loss": 0.5652, "step": 96400 }, { "epoch": 0.96, "learning_rate": 2.9013157894736838e-05, "loss": 0.5728, "step": 96410 }, { "epoch": 0.96, "learning_rate": 2.8934210526315786e-05, "loss": 0.5647, "step": 96420 }, { "epoch": 0.96, "learning_rate": 2.8855263157894734e-05, "loss": 0.5567, "step": 96430 }, { "epoch": 0.96, "learning_rate": 2.8776315789473683e-05, "loss": 0.558, "step": 96440 }, { "epoch": 0.96, "learning_rate": 2.869736842105263e-05, "loss": 0.5515, "step": 96450 }, { "epoch": 0.96, "learning_rate": 2.861842105263158e-05, "loss": 0.5398, "step": 96460 }, { "epoch": 0.96, "learning_rate": 2.853947368421052e-05, "loss": 0.5497, "step": 96470 }, { "epoch": 0.96, "learning_rate": 2.846052631578947e-05, "loss": 0.5523, "step": 96480 }, { "epoch": 0.96, "learning_rate": 2.8381578947368417e-05, "loss": 0.557, "step": 96490 }, { "epoch": 0.96, "learning_rate": 2.8302631578947366e-05, "loss": 0.5566, "step": 96500 }, { "epoch": 0.97, "learning_rate": 2.8223684210526314e-05, "loss": 0.5379, "step": 96510 }, { "epoch": 0.97, "learning_rate": 2.8144736842105262e-05, "loss": 0.5407, "step": 96520 }, { "epoch": 0.97, "learning_rate": 2.806578947368421e-05, "loss": 0.5258, "step": 96530 }, { "epoch": 0.97, "learning_rate": 2.798684210526316e-05, "loss": 0.54, "step": 96540 }, { "epoch": 0.97, "learning_rate": 2.79078947368421e-05, "loss": 0.524, "step": 96550 }, { "epoch": 0.97, "learning_rate": 2.782894736842105e-05, "loss": 0.5204, "step": 96560 }, { "epoch": 0.97, "learning_rate": 2.7749999999999997e-05, "loss": 0.5338, "step": 96570 }, { "epoch": 0.97, "learning_rate": 2.7671052631578945e-05, "loss": 0.541, "step": 96580 }, { "epoch": 0.97, "learning_rate": 2.7592105263157893e-05, "loss": 0.5475, "step": 96590 }, { "epoch": 0.97, "learning_rate": 2.7513157894736842e-05, "loss": 0.5553, "step": 96600 }, { "epoch": 0.97, "learning_rate": 2.743421052631579e-05, "loss": 0.565, "step": 96610 }, { "epoch": 0.97, "learning_rate": 2.735526315789473e-05, "loss": 0.5593, "step": 96620 }, { "epoch": 0.97, "learning_rate": 2.727631578947368e-05, "loss": 0.5644, "step": 96630 }, { "epoch": 0.97, "learning_rate": 2.7197368421052628e-05, "loss": 0.571, "step": 96640 }, { "epoch": 0.97, "learning_rate": 2.7118421052631577e-05, "loss": 0.569, "step": 96650 }, { "epoch": 0.97, "learning_rate": 2.7039473684210525e-05, "loss": 0.5711, "step": 96660 }, { "epoch": 0.97, "learning_rate": 2.6960526315789473e-05, "loss": 0.5648, "step": 96670 }, { "epoch": 0.97, "learning_rate": 2.688157894736842e-05, "loss": 0.5571, "step": 96680 }, { "epoch": 0.97, "learning_rate": 2.6802631578947363e-05, "loss": 0.5504, "step": 96690 }, { "epoch": 0.97, "learning_rate": 2.672368421052631e-05, "loss": 0.5558, "step": 96700 }, { "epoch": 0.97, "learning_rate": 2.664473684210526e-05, "loss": 0.5563, "step": 96710 }, { "epoch": 0.97, "learning_rate": 2.6565789473684208e-05, "loss": 0.5604, "step": 96720 }, { "epoch": 0.97, "learning_rate": 2.6486842105263156e-05, "loss": 0.5428, "step": 96730 }, { "epoch": 0.97, "learning_rate": 2.6407894736842104e-05, "loss": 0.5404, "step": 96740 }, { "epoch": 0.97, "learning_rate": 2.6328947368421053e-05, "loss": 0.5365, "step": 96750 }, { "epoch": 0.97, "learning_rate": 2.6249999999999998e-05, "loss": 0.5401, "step": 96760 }, { "epoch": 0.97, "learning_rate": 2.6171052631578943e-05, "loss": 0.5342, "step": 96770 }, { "epoch": 0.97, "learning_rate": 2.609210526315789e-05, "loss": 0.5354, "step": 96780 }, { "epoch": 0.97, "learning_rate": 2.601315789473684e-05, "loss": 0.5306, "step": 96790 }, { "epoch": 0.97, "learning_rate": 2.5934210526315788e-05, "loss": 0.536, "step": 96800 }, { "epoch": 0.97, "learning_rate": 2.5855263157894736e-05, "loss": 0.5567, "step": 96810 }, { "epoch": 0.97, "learning_rate": 2.5776315789473684e-05, "loss": 0.5455, "step": 96820 }, { "epoch": 0.97, "learning_rate": 2.569736842105263e-05, "loss": 0.5507, "step": 96830 }, { "epoch": 0.97, "learning_rate": 2.5618421052631574e-05, "loss": 0.5625, "step": 96840 }, { "epoch": 0.97, "learning_rate": 2.5539473684210522e-05, "loss": 0.5549, "step": 96850 }, { "epoch": 0.97, "learning_rate": 2.546052631578947e-05, "loss": 0.5499, "step": 96860 }, { "epoch": 0.97, "learning_rate": 2.538157894736842e-05, "loss": 0.5542, "step": 96870 }, { "epoch": 0.97, "learning_rate": 2.5302631578947367e-05, "loss": 0.5479, "step": 96880 }, { "epoch": 0.97, "learning_rate": 2.5223684210526315e-05, "loss": 0.5515, "step": 96890 }, { "epoch": 0.97, "learning_rate": 2.514473684210526e-05, "loss": 0.5341, "step": 96900 }, { "epoch": 0.97, "learning_rate": 2.506578947368421e-05, "loss": 0.5378, "step": 96910 }, { "epoch": 0.97, "learning_rate": 2.4986842105263154e-05, "loss": 0.5312, "step": 96920 }, { "epoch": 0.97, "learning_rate": 2.4907894736842102e-05, "loss": 0.5169, "step": 96930 }, { "epoch": 0.97, "learning_rate": 2.482894736842105e-05, "loss": 0.5368, "step": 96940 }, { "epoch": 0.97, "learning_rate": 2.475e-05, "loss": 0.5416, "step": 96950 }, { "epoch": 0.97, "learning_rate": 2.4671052631578947e-05, "loss": 0.5266, "step": 96960 }, { "epoch": 0.97, "learning_rate": 2.4592105263157892e-05, "loss": 0.5371, "step": 96970 }, { "epoch": 0.97, "learning_rate": 2.451315789473684e-05, "loss": 0.547, "step": 96980 }, { "epoch": 0.97, "learning_rate": 2.443421052631579e-05, "loss": 0.5425, "step": 96990 }, { "epoch": 0.97, "learning_rate": 2.4355263157894733e-05, "loss": 0.5337, "step": 97000 }, { "epoch": 0.97, "learning_rate": 2.427631578947368e-05, "loss": 0.5488, "step": 97010 }, { "epoch": 0.97, "learning_rate": 2.419736842105263e-05, "loss": 0.5378, "step": 97020 }, { "epoch": 0.97, "learning_rate": 2.4118421052631578e-05, "loss": 0.5393, "step": 97030 }, { "epoch": 0.97, "learning_rate": 2.4039473684210523e-05, "loss": 0.5527, "step": 97040 }, { "epoch": 0.97, "learning_rate": 2.396052631578947e-05, "loss": 0.5498, "step": 97050 }, { "epoch": 0.97, "learning_rate": 2.388157894736842e-05, "loss": 0.5658, "step": 97060 }, { "epoch": 0.97, "learning_rate": 2.3802631578947365e-05, "loss": 0.561, "step": 97070 }, { "epoch": 0.97, "learning_rate": 2.3723684210526313e-05, "loss": 0.5657, "step": 97080 }, { "epoch": 0.97, "learning_rate": 2.364473684210526e-05, "loss": 0.5672, "step": 97090 }, { "epoch": 0.97, "learning_rate": 2.356578947368421e-05, "loss": 0.5773, "step": 97100 }, { "epoch": 0.97, "learning_rate": 2.349473684210526e-05, "loss": 0.5737, "step": 97110 }, { "epoch": 0.97, "learning_rate": 2.341578947368421e-05, "loss": 0.5672, "step": 97120 }, { "epoch": 0.97, "learning_rate": 2.3336842105263154e-05, "loss": 0.5636, "step": 97130 }, { "epoch": 0.97, "learning_rate": 2.3257894736842102e-05, "loss": 0.5599, "step": 97140 }, { "epoch": 0.97, "learning_rate": 2.317894736842105e-05, "loss": 0.5634, "step": 97150 }, { "epoch": 0.97, "learning_rate": 2.31e-05, "loss": 0.5527, "step": 97160 }, { "epoch": 0.97, "learning_rate": 2.3021052631578944e-05, "loss": 0.5462, "step": 97170 }, { "epoch": 0.97, "learning_rate": 2.2942105263157892e-05, "loss": 0.5676, "step": 97180 }, { "epoch": 0.97, "learning_rate": 2.286315789473684e-05, "loss": 0.5544, "step": 97190 }, { "epoch": 0.97, "learning_rate": 2.278421052631579e-05, "loss": 0.5497, "step": 97200 }, { "epoch": 0.97, "learning_rate": 2.2705263157894734e-05, "loss": 0.5432, "step": 97210 }, { "epoch": 0.97, "learning_rate": 2.2626315789473682e-05, "loss": 0.5352, "step": 97220 }, { "epoch": 0.97, "learning_rate": 2.254736842105263e-05, "loss": 0.5462, "step": 97230 }, { "epoch": 0.97, "learning_rate": 2.246842105263158e-05, "loss": 0.5296, "step": 97240 }, { "epoch": 0.97, "learning_rate": 2.2389473684210524e-05, "loss": 0.5315, "step": 97250 }, { "epoch": 0.97, "learning_rate": 2.2310526315789472e-05, "loss": 0.5399, "step": 97260 }, { "epoch": 0.97, "learning_rate": 2.223157894736842e-05, "loss": 0.542, "step": 97270 }, { "epoch": 0.97, "learning_rate": 2.2152631578947365e-05, "loss": 0.5405, "step": 97280 }, { "epoch": 0.97, "learning_rate": 2.2073684210526313e-05, "loss": 0.5418, "step": 97290 }, { "epoch": 0.97, "learning_rate": 2.1994736842105262e-05, "loss": 0.5631, "step": 97300 }, { "epoch": 0.97, "learning_rate": 2.191578947368421e-05, "loss": 0.5695, "step": 97310 }, { "epoch": 0.97, "learning_rate": 2.183684210526316e-05, "loss": 0.5689, "step": 97320 }, { "epoch": 0.97, "learning_rate": 2.1757894736842103e-05, "loss": 0.5672, "step": 97330 }, { "epoch": 0.97, "learning_rate": 2.167894736842105e-05, "loss": 0.5668, "step": 97340 }, { "epoch": 0.97, "learning_rate": 2.1599999999999996e-05, "loss": 0.5717, "step": 97350 }, { "epoch": 0.97, "learning_rate": 2.1521052631578945e-05, "loss": 0.5578, "step": 97360 }, { "epoch": 0.97, "learning_rate": 2.1442105263157893e-05, "loss": 0.5658, "step": 97370 }, { "epoch": 0.97, "learning_rate": 2.136315789473684e-05, "loss": 0.5531, "step": 97380 }, { "epoch": 0.97, "learning_rate": 2.128421052631579e-05, "loss": 0.5455, "step": 97390 }, { "epoch": 0.97, "learning_rate": 2.1205263157894735e-05, "loss": 0.5528, "step": 97400 }, { "epoch": 0.97, "learning_rate": 2.1126315789473683e-05, "loss": 0.5488, "step": 97410 }, { "epoch": 0.97, "learning_rate": 2.1047368421052628e-05, "loss": 0.5441, "step": 97420 }, { "epoch": 0.97, "learning_rate": 2.0968421052631576e-05, "loss": 0.5538, "step": 97430 }, { "epoch": 0.97, "learning_rate": 2.0889473684210524e-05, "loss": 0.5341, "step": 97440 }, { "epoch": 0.97, "learning_rate": 2.0810526315789473e-05, "loss": 0.5429, "step": 97450 }, { "epoch": 0.97, "learning_rate": 2.073157894736842e-05, "loss": 0.5401, "step": 97460 }, { "epoch": 0.97, "learning_rate": 2.065263157894737e-05, "loss": 0.5352, "step": 97470 }, { "epoch": 0.97, "learning_rate": 2.0573684210526314e-05, "loss": 0.5236, "step": 97480 }, { "epoch": 0.97, "learning_rate": 2.049473684210526e-05, "loss": 0.5251, "step": 97490 }, { "epoch": 0.97, "learning_rate": 2.0415789473684207e-05, "loss": 0.5369, "step": 97500 }, { "epoch": 0.97, "eval_accuracy": 0.8848131360716049, "eval_loss": 0.5224609375, "eval_runtime": 97.6227, "eval_samples_per_second": 819.482, "eval_steps_per_second": 1.608, "step": 97500 }, { "epoch": 0.98, "learning_rate": 2.0336842105263156e-05, "loss": 0.5494, "step": 97510 }, { "epoch": 0.98, "learning_rate": 2.0257894736842104e-05, "loss": 0.5459, "step": 97520 }, { "epoch": 0.98, "learning_rate": 2.0178947368421052e-05, "loss": 0.5491, "step": 97530 }, { "epoch": 0.98, "learning_rate": 2.01e-05, "loss": 0.5605, "step": 97540 }, { "epoch": 0.98, "learning_rate": 2.002105263157895e-05, "loss": 0.5561, "step": 97550 }, { "epoch": 0.98, "learning_rate": 1.994210526315789e-05, "loss": 0.5638, "step": 97560 }, { "epoch": 0.98, "learning_rate": 1.986315789473684e-05, "loss": 0.5807, "step": 97570 }, { "epoch": 0.98, "learning_rate": 1.9784210526315787e-05, "loss": 0.5669, "step": 97580 }, { "epoch": 0.98, "learning_rate": 1.9705263157894735e-05, "loss": 0.568, "step": 97590 }, { "epoch": 0.98, "learning_rate": 1.9626315789473684e-05, "loss": 0.5584, "step": 97600 }, { "epoch": 0.98, "learning_rate": 1.9547368421052632e-05, "loss": 0.5545, "step": 97610 }, { "epoch": 0.98, "learning_rate": 1.946842105263158e-05, "loss": 0.5602, "step": 97620 }, { "epoch": 0.98, "learning_rate": 1.9389473684210522e-05, "loss": 0.5527, "step": 97630 }, { "epoch": 0.98, "learning_rate": 1.931052631578947e-05, "loss": 0.5406, "step": 97640 }, { "epoch": 0.98, "learning_rate": 1.923157894736842e-05, "loss": 0.5477, "step": 97650 }, { "epoch": 0.98, "learning_rate": 1.9152631578947367e-05, "loss": 0.5427, "step": 97660 }, { "epoch": 0.98, "learning_rate": 1.9073684210526315e-05, "loss": 0.547, "step": 97670 }, { "epoch": 0.98, "learning_rate": 1.8994736842105263e-05, "loss": 0.5386, "step": 97680 }, { "epoch": 0.98, "learning_rate": 1.891578947368421e-05, "loss": 0.546, "step": 97690 }, { "epoch": 0.98, "learning_rate": 1.8836842105263153e-05, "loss": 0.5366, "step": 97700 }, { "epoch": 0.98, "learning_rate": 1.87578947368421e-05, "loss": 0.546, "step": 97710 }, { "epoch": 0.98, "learning_rate": 1.867894736842105e-05, "loss": 0.5417, "step": 97720 }, { "epoch": 0.98, "learning_rate": 1.8599999999999998e-05, "loss": 0.5293, "step": 97730 }, { "epoch": 0.98, "learning_rate": 1.8521052631578946e-05, "loss": 0.5449, "step": 97740 }, { "epoch": 0.98, "learning_rate": 1.844210526315789e-05, "loss": 0.5507, "step": 97750 }, { "epoch": 0.98, "learning_rate": 1.836315789473684e-05, "loss": 0.5582, "step": 97760 }, { "epoch": 0.98, "learning_rate": 1.8284210526315788e-05, "loss": 0.5668, "step": 97770 }, { "epoch": 0.98, "learning_rate": 1.8205263157894736e-05, "loss": 0.5599, "step": 97780 }, { "epoch": 0.98, "learning_rate": 1.812631578947368e-05, "loss": 0.5694, "step": 97790 }, { "epoch": 0.98, "learning_rate": 1.804736842105263e-05, "loss": 0.5687, "step": 97800 }, { "epoch": 0.98, "learning_rate": 1.7968421052631578e-05, "loss": 0.5715, "step": 97810 }, { "epoch": 0.98, "learning_rate": 1.7889473684210526e-05, "loss": 0.5682, "step": 97820 }, { "epoch": 0.98, "learning_rate": 1.781052631578947e-05, "loss": 0.5604, "step": 97830 }, { "epoch": 0.98, "learning_rate": 1.773157894736842e-05, "loss": 0.5544, "step": 97840 }, { "epoch": 0.98, "learning_rate": 1.7652631578947368e-05, "loss": 0.562, "step": 97850 }, { "epoch": 0.98, "learning_rate": 1.7573684210526312e-05, "loss": 0.5514, "step": 97860 }, { "epoch": 0.98, "learning_rate": 1.749473684210526e-05, "loss": 0.5468, "step": 97870 }, { "epoch": 0.98, "learning_rate": 1.741578947368421e-05, "loss": 0.5509, "step": 97880 }, { "epoch": 0.98, "learning_rate": 1.7336842105263157e-05, "loss": 0.5626, "step": 97890 }, { "epoch": 0.98, "learning_rate": 1.7257894736842102e-05, "loss": 0.5478, "step": 97900 }, { "epoch": 0.98, "learning_rate": 1.717894736842105e-05, "loss": 0.5339, "step": 97910 }, { "epoch": 0.98, "learning_rate": 1.71e-05, "loss": 0.5395, "step": 97920 }, { "epoch": 0.98, "learning_rate": 1.7021052631578944e-05, "loss": 0.5329, "step": 97930 }, { "epoch": 0.98, "learning_rate": 1.6942105263157892e-05, "loss": 0.5265, "step": 97940 }, { "epoch": 0.98, "learning_rate": 1.686315789473684e-05, "loss": 0.5336, "step": 97950 }, { "epoch": 0.98, "learning_rate": 1.678421052631579e-05, "loss": 0.5251, "step": 97960 }, { "epoch": 0.98, "learning_rate": 1.6705263157894734e-05, "loss": 0.5393, "step": 97970 }, { "epoch": 0.98, "learning_rate": 1.6626315789473682e-05, "loss": 0.5467, "step": 97980 }, { "epoch": 0.98, "learning_rate": 1.654736842105263e-05, "loss": 0.5554, "step": 97990 }, { "epoch": 0.98, "learning_rate": 1.6468421052631575e-05, "loss": 0.5688, "step": 98000 }, { "epoch": 0.98, "learning_rate": 1.6389473684210523e-05, "loss": 0.5638, "step": 98010 }, { "epoch": 0.98, "learning_rate": 1.6310526315789472e-05, "loss": 0.5665, "step": 98020 }, { "epoch": 0.98, "learning_rate": 1.623157894736842e-05, "loss": 0.5707, "step": 98030 }, { "epoch": 0.98, "learning_rate": 1.6152631578947365e-05, "loss": 0.5603, "step": 98040 }, { "epoch": 0.98, "learning_rate": 1.6073684210526313e-05, "loss": 0.5643, "step": 98050 }, { "epoch": 0.98, "learning_rate": 1.599473684210526e-05, "loss": 0.5695, "step": 98060 }, { "epoch": 0.98, "learning_rate": 1.5915789473684207e-05, "loss": 0.5653, "step": 98070 }, { "epoch": 0.98, "learning_rate": 1.5836842105263155e-05, "loss": 0.5578, "step": 98080 }, { "epoch": 0.98, "learning_rate": 1.5757894736842103e-05, "loss": 0.5433, "step": 98090 }, { "epoch": 0.98, "learning_rate": 1.567894736842105e-05, "loss": 0.5342, "step": 98100 }, { "epoch": 0.98, "learning_rate": 1.5599999999999996e-05, "loss": 0.5509, "step": 98110 }, { "epoch": 0.98, "learning_rate": 1.552894736842105e-05, "loss": 0.5387, "step": 98120 }, { "epoch": 0.98, "learning_rate": 1.545e-05, "loss": 0.5577, "step": 98130 }, { "epoch": 0.98, "learning_rate": 1.5371052631578944e-05, "loss": 0.5427, "step": 98140 }, { "epoch": 0.98, "learning_rate": 1.5292105263157893e-05, "loss": 0.5401, "step": 98150 }, { "epoch": 0.98, "learning_rate": 1.5213157894736841e-05, "loss": 0.5331, "step": 98160 }, { "epoch": 0.98, "learning_rate": 1.5134210526315788e-05, "loss": 0.5166, "step": 98170 }, { "epoch": 0.98, "learning_rate": 1.5055263157894736e-05, "loss": 0.5319, "step": 98180 }, { "epoch": 0.98, "learning_rate": 1.4976315789473684e-05, "loss": 0.5298, "step": 98190 }, { "epoch": 0.98, "learning_rate": 1.4897368421052629e-05, "loss": 0.5225, "step": 98200 }, { "epoch": 0.98, "learning_rate": 1.4818421052631577e-05, "loss": 0.5472, "step": 98210 }, { "epoch": 0.98, "learning_rate": 1.4739473684210526e-05, "loss": 0.5575, "step": 98220 }, { "epoch": 0.98, "learning_rate": 1.4660526315789474e-05, "loss": 0.5471, "step": 98230 }, { "epoch": 0.98, "learning_rate": 1.4581578947368419e-05, "loss": 0.565, "step": 98240 }, { "epoch": 0.98, "learning_rate": 1.4502631578947367e-05, "loss": 0.5484, "step": 98250 }, { "epoch": 0.98, "learning_rate": 1.4423684210526316e-05, "loss": 0.5682, "step": 98260 }, { "epoch": 0.98, "learning_rate": 1.434473684210526e-05, "loss": 0.5639, "step": 98270 }, { "epoch": 0.98, "learning_rate": 1.4265789473684209e-05, "loss": 0.5628, "step": 98280 }, { "epoch": 0.98, "learning_rate": 1.4186842105263157e-05, "loss": 0.5645, "step": 98290 }, { "epoch": 0.98, "learning_rate": 1.4107894736842105e-05, "loss": 0.558, "step": 98300 }, { "epoch": 0.98, "learning_rate": 1.402894736842105e-05, "loss": 0.5468, "step": 98310 }, { "epoch": 0.98, "learning_rate": 1.3949999999999999e-05, "loss": 0.555, "step": 98320 }, { "epoch": 0.98, "learning_rate": 1.3871052631578947e-05, "loss": 0.5515, "step": 98330 }, { "epoch": 0.98, "learning_rate": 1.3792105263157892e-05, "loss": 0.5408, "step": 98340 }, { "epoch": 0.98, "learning_rate": 1.371315789473684e-05, "loss": 0.5386, "step": 98350 }, { "epoch": 0.98, "learning_rate": 1.3634210526315788e-05, "loss": 0.54, "step": 98360 }, { "epoch": 0.98, "learning_rate": 1.3555263157894737e-05, "loss": 0.5423, "step": 98370 }, { "epoch": 0.98, "learning_rate": 1.3476315789473682e-05, "loss": 0.5279, "step": 98380 }, { "epoch": 0.98, "learning_rate": 1.339736842105263e-05, "loss": 0.537, "step": 98390 }, { "epoch": 0.98, "learning_rate": 1.3318421052631578e-05, "loss": 0.536, "step": 98400 }, { "epoch": 0.98, "learning_rate": 1.3239473684210527e-05, "loss": 0.5422, "step": 98410 }, { "epoch": 0.98, "learning_rate": 1.3160526315789471e-05, "loss": 0.544, "step": 98420 }, { "epoch": 0.98, "learning_rate": 1.308157894736842e-05, "loss": 0.5422, "step": 98430 }, { "epoch": 0.98, "learning_rate": 1.3002631578947368e-05, "loss": 0.5319, "step": 98440 }, { "epoch": 0.98, "learning_rate": 1.2923684210526315e-05, "loss": 0.5468, "step": 98450 }, { "epoch": 0.98, "learning_rate": 1.2844736842105261e-05, "loss": 0.5449, "step": 98460 }, { "epoch": 0.98, "learning_rate": 1.276578947368421e-05, "loss": 0.5569, "step": 98470 }, { "epoch": 0.98, "learning_rate": 1.2686842105263158e-05, "loss": 0.5599, "step": 98480 }, { "epoch": 0.98, "learning_rate": 1.2607894736842104e-05, "loss": 0.5608, "step": 98490 }, { "epoch": 0.98, "learning_rate": 1.2528947368421051e-05, "loss": 0.5747, "step": 98500 }, { "epoch": 0.99, "learning_rate": 1.245e-05, "loss": 0.5713, "step": 98510 }, { "epoch": 0.99, "learning_rate": 1.2371052631578946e-05, "loss": 0.5587, "step": 98520 }, { "epoch": 0.99, "learning_rate": 1.2292105263157893e-05, "loss": 0.568, "step": 98530 }, { "epoch": 0.99, "learning_rate": 1.2213157894736841e-05, "loss": 0.5496, "step": 98540 }, { "epoch": 0.99, "learning_rate": 1.213421052631579e-05, "loss": 0.5667, "step": 98550 }, { "epoch": 0.99, "learning_rate": 1.2055263157894736e-05, "loss": 0.5402, "step": 98560 }, { "epoch": 0.99, "learning_rate": 1.1976315789473682e-05, "loss": 0.5294, "step": 98570 }, { "epoch": 0.99, "learning_rate": 1.189736842105263e-05, "loss": 0.5306, "step": 98580 }, { "epoch": 0.99, "learning_rate": 1.1818421052631577e-05, "loss": 0.5317, "step": 98590 }, { "epoch": 0.99, "learning_rate": 1.1739473684210526e-05, "loss": 0.5424, "step": 98600 }, { "epoch": 0.99, "learning_rate": 1.1660526315789472e-05, "loss": 0.5175, "step": 98610 }, { "epoch": 0.99, "learning_rate": 1.158157894736842e-05, "loss": 0.5112, "step": 98620 }, { "epoch": 0.99, "learning_rate": 1.1502631578947367e-05, "loss": 0.5215, "step": 98630 }, { "epoch": 0.99, "learning_rate": 1.1423684210526315e-05, "loss": 0.5233, "step": 98640 }, { "epoch": 0.99, "learning_rate": 1.1344736842105262e-05, "loss": 0.5117, "step": 98650 }, { "epoch": 0.99, "learning_rate": 1.1265789473684209e-05, "loss": 0.5186, "step": 98660 }, { "epoch": 0.99, "learning_rate": 1.1186842105263157e-05, "loss": 0.5214, "step": 98670 }, { "epoch": 0.99, "learning_rate": 1.1107894736842105e-05, "loss": 0.5307, "step": 98680 }, { "epoch": 0.99, "learning_rate": 1.1028947368421052e-05, "loss": 0.5245, "step": 98690 }, { "epoch": 0.99, "learning_rate": 1.0949999999999998e-05, "loss": 0.5476, "step": 98700 }, { "epoch": 0.99, "learning_rate": 1.0871052631578947e-05, "loss": 0.5443, "step": 98710 }, { "epoch": 0.99, "learning_rate": 1.0792105263157895e-05, "loss": 0.5479, "step": 98720 }, { "epoch": 0.99, "learning_rate": 1.071315789473684e-05, "loss": 0.5547, "step": 98730 }, { "epoch": 0.99, "learning_rate": 1.0634210526315788e-05, "loss": 0.5645, "step": 98740 }, { "epoch": 0.99, "learning_rate": 1.0555263157894737e-05, "loss": 0.5748, "step": 98750 }, { "epoch": 0.99, "learning_rate": 1.0476315789473683e-05, "loss": 0.5618, "step": 98760 }, { "epoch": 0.99, "learning_rate": 1.039736842105263e-05, "loss": 0.5506, "step": 98770 }, { "epoch": 0.99, "learning_rate": 1.0318421052631578e-05, "loss": 0.529, "step": 98780 }, { "epoch": 0.99, "learning_rate": 1.0239473684210526e-05, "loss": 0.536, "step": 98790 }, { "epoch": 0.99, "learning_rate": 1.0160526315789473e-05, "loss": 0.5472, "step": 98800 }, { "epoch": 0.99, "learning_rate": 1.008157894736842e-05, "loss": 0.5439, "step": 98810 }, { "epoch": 0.99, "learning_rate": 1.0002631578947368e-05, "loss": 0.5529, "step": 98820 }, { "epoch": 0.99, "learning_rate": 9.923684210526316e-06, "loss": 0.5439, "step": 98830 }, { "epoch": 0.99, "learning_rate": 9.844736842105261e-06, "loss": 0.533, "step": 98840 }, { "epoch": 0.99, "learning_rate": 9.76578947368421e-06, "loss": 0.5391, "step": 98850 }, { "epoch": 0.99, "learning_rate": 9.686842105263158e-06, "loss": 0.5274, "step": 98860 }, { "epoch": 0.99, "learning_rate": 9.607894736842106e-06, "loss": 0.5355, "step": 98870 }, { "epoch": 0.99, "learning_rate": 9.528947368421051e-06, "loss": 0.5272, "step": 98880 }, { "epoch": 0.99, "learning_rate": 9.45e-06, "loss": 0.5291, "step": 98890 }, { "epoch": 0.99, "learning_rate": 9.371052631578946e-06, "loss": 0.5353, "step": 98900 }, { "epoch": 0.99, "learning_rate": 9.292105263157894e-06, "loss": 0.5427, "step": 98910 }, { "epoch": 0.99, "learning_rate": 9.21315789473684e-06, "loss": 0.5431, "step": 98920 }, { "epoch": 0.99, "learning_rate": 9.134210526315789e-06, "loss": 0.5431, "step": 98930 }, { "epoch": 0.99, "learning_rate": 9.055263157894736e-06, "loss": 0.5569, "step": 98940 }, { "epoch": 0.99, "learning_rate": 8.976315789473684e-06, "loss": 0.563, "step": 98950 }, { "epoch": 0.99, "learning_rate": 8.89736842105263e-06, "loss": 0.5641, "step": 98960 }, { "epoch": 0.99, "learning_rate": 8.818421052631577e-06, "loss": 0.5705, "step": 98970 }, { "epoch": 0.99, "learning_rate": 8.74736842105263e-06, "loss": 0.5701, "step": 98980 }, { "epoch": 0.99, "learning_rate": 8.668421052631579e-06, "loss": 0.5524, "step": 98990 }, { "epoch": 0.99, "learning_rate": 8.589473684210525e-06, "loss": 0.5472, "step": 99000 }, { "epoch": 0.99, "learning_rate": 8.510526315789472e-06, "loss": 0.5477, "step": 99010 }, { "epoch": 0.99, "learning_rate": 8.43157894736842e-06, "loss": 0.5383, "step": 99020 }, { "epoch": 0.99, "learning_rate": 8.352631578947367e-06, "loss": 0.5362, "step": 99030 }, { "epoch": 0.99, "learning_rate": 8.273684210526315e-06, "loss": 0.5428, "step": 99040 }, { "epoch": 0.99, "learning_rate": 8.194736842105262e-06, "loss": 0.5357, "step": 99050 }, { "epoch": 0.99, "learning_rate": 8.11578947368421e-06, "loss": 0.5365, "step": 99060 }, { "epoch": 0.99, "learning_rate": 8.036842105263157e-06, "loss": 0.5419, "step": 99070 }, { "epoch": 0.99, "learning_rate": 7.957894736842103e-06, "loss": 0.5353, "step": 99080 }, { "epoch": 0.99, "learning_rate": 7.878947368421052e-06, "loss": 0.5401, "step": 99090 }, { "epoch": 0.99, "learning_rate": 7.799999999999998e-06, "loss": 0.5318, "step": 99100 }, { "epoch": 0.99, "learning_rate": 7.721052631578946e-06, "loss": 0.5423, "step": 99110 }, { "epoch": 0.99, "learning_rate": 7.642105263157893e-06, "loss": 0.5223, "step": 99120 }, { "epoch": 0.99, "learning_rate": 7.563157894736842e-06, "loss": 0.5314, "step": 99130 }, { "epoch": 0.99, "learning_rate": 7.484210526315789e-06, "loss": 0.5375, "step": 99140 }, { "epoch": 0.99, "learning_rate": 7.405263157894736e-06, "loss": 0.5475, "step": 99150 }, { "epoch": 0.99, "learning_rate": 7.326315789473684e-06, "loss": 0.5481, "step": 99160 }, { "epoch": 0.99, "learning_rate": 7.24736842105263e-06, "loss": 0.5519, "step": 99170 }, { "epoch": 0.99, "learning_rate": 7.168421052631579e-06, "loss": 0.5675, "step": 99180 }, { "epoch": 0.99, "learning_rate": 7.089473684210525e-06, "loss": 0.5612, "step": 99190 }, { "epoch": 0.99, "learning_rate": 7.0105263157894736e-06, "loss": 0.5572, "step": 99200 }, { "epoch": 0.99, "learning_rate": 6.93157894736842e-06, "loss": 0.5683, "step": 99210 }, { "epoch": 0.99, "learning_rate": 6.8526315789473685e-06, "loss": 0.5542, "step": 99220 }, { "epoch": 0.99, "learning_rate": 6.773684210526315e-06, "loss": 0.5614, "step": 99230 }, { "epoch": 0.99, "learning_rate": 6.694736842105262e-06, "loss": 0.5456, "step": 99240 }, { "epoch": 0.99, "learning_rate": 6.61578947368421e-06, "loss": 0.5442, "step": 99250 }, { "epoch": 0.99, "learning_rate": 6.536842105263157e-06, "loss": 0.5416, "step": 99260 }, { "epoch": 0.99, "learning_rate": 6.457894736842105e-06, "loss": 0.5463, "step": 99270 }, { "epoch": 0.99, "learning_rate": 6.3789473684210515e-06, "loss": 0.5546, "step": 99280 }, { "epoch": 0.99, "learning_rate": 6.3e-06, "loss": 0.5339, "step": 99290 }, { "epoch": 0.99, "learning_rate": 6.2210526315789464e-06, "loss": 0.5405, "step": 99300 }, { "epoch": 0.99, "learning_rate": 6.142105263157894e-06, "loss": 0.533, "step": 99310 }, { "epoch": 0.99, "learning_rate": 6.063157894736841e-06, "loss": 0.5315, "step": 99320 }, { "epoch": 0.99, "learning_rate": 5.984210526315789e-06, "loss": 0.541, "step": 99330 }, { "epoch": 0.99, "learning_rate": 5.905263157894736e-06, "loss": 0.5386, "step": 99340 }, { "epoch": 0.99, "learning_rate": 5.826315789473684e-06, "loss": 0.5231, "step": 99350 }, { "epoch": 0.99, "learning_rate": 5.747368421052631e-06, "loss": 0.5433, "step": 99360 }, { "epoch": 0.99, "learning_rate": 5.668421052631579e-06, "loss": 0.5502, "step": 99370 }, { "epoch": 0.99, "learning_rate": 5.589473684210526e-06, "loss": 0.5589, "step": 99380 }, { "epoch": 0.99, "learning_rate": 5.5105263157894735e-06, "loss": 0.5584, "step": 99390 }, { "epoch": 0.99, "learning_rate": 5.43157894736842e-06, "loss": 0.5622, "step": 99400 }, { "epoch": 0.99, "learning_rate": 5.3526315789473684e-06, "loss": 0.562, "step": 99410 }, { "epoch": 0.99, "learning_rate": 5.273684210526315e-06, "loss": 0.572, "step": 99420 }, { "epoch": 0.99, "learning_rate": 5.194736842105263e-06, "loss": 0.5653, "step": 99430 }, { "epoch": 0.99, "learning_rate": 5.11578947368421e-06, "loss": 0.5621, "step": 99440 }, { "epoch": 0.99, "learning_rate": 5.036842105263158e-06, "loss": 0.5488, "step": 99450 }, { "epoch": 0.99, "learning_rate": 4.957894736842105e-06, "loss": 0.5379, "step": 99460 }, { "epoch": 0.99, "learning_rate": 4.8789473684210515e-06, "loss": 0.5541, "step": 99470 }, { "epoch": 0.99, "learning_rate": 4.8e-06, "loss": 0.548, "step": 99480 }, { "epoch": 0.99, "learning_rate": 4.721052631578946e-06, "loss": 0.5427, "step": 99490 }, { "epoch": 0.99, "learning_rate": 4.642105263157895e-06, "loss": 0.5592, "step": 99500 }, { "epoch": 1.0, "learning_rate": 4.563157894736841e-06, "loss": 0.5521, "step": 99510 }, { "epoch": 1.0, "learning_rate": 4.484210526315789e-06, "loss": 0.5467, "step": 99520 }, { "epoch": 1.0, "learning_rate": 4.405263157894736e-06, "loss": 0.5407, "step": 99530 }, { "epoch": 1.0, "learning_rate": 4.326315789473684e-06, "loss": 0.5413, "step": 99540 }, { "epoch": 1.0, "learning_rate": 4.247368421052631e-06, "loss": 0.5337, "step": 99550 }, { "epoch": 1.0, "learning_rate": 4.168421052631579e-06, "loss": 0.5315, "step": 99560 }, { "epoch": 1.0, "learning_rate": 4.089473684210526e-06, "loss": 0.5279, "step": 99570 }, { "epoch": 1.0, "learning_rate": 4.0105263157894735e-06, "loss": 0.5294, "step": 99580 }, { "epoch": 1.0, "learning_rate": 3.93157894736842e-06, "loss": 0.5355, "step": 99590 }, { "epoch": 1.0, "learning_rate": 3.8526315789473676e-06, "loss": 0.5406, "step": 99600 }, { "epoch": 1.0, "learning_rate": 3.7736842105263154e-06, "loss": 0.5498, "step": 99610 }, { "epoch": 1.0, "learning_rate": 3.694736842105263e-06, "loss": 0.55, "step": 99620 }, { "epoch": 1.0, "learning_rate": 3.6157894736842103e-06, "loss": 0.5627, "step": 99630 }, { "epoch": 1.0, "learning_rate": 3.536842105263158e-06, "loss": 0.5577, "step": 99640 }, { "epoch": 1.0, "learning_rate": 3.4578947368421053e-06, "loss": 0.5669, "step": 99650 }, { "epoch": 1.0, "learning_rate": 3.378947368421052e-06, "loss": 0.5615, "step": 99660 }, { "epoch": 1.0, "learning_rate": 3.2999999999999993e-06, "loss": 0.5618, "step": 99670 }, { "epoch": 1.0, "learning_rate": 3.2210526315789468e-06, "loss": 0.551, "step": 99680 }, { "epoch": 1.0, "learning_rate": 3.1421052631578942e-06, "loss": 0.5513, "step": 99690 }, { "epoch": 1.0, "learning_rate": 3.0631578947368417e-06, "loss": 0.5531, "step": 99700 }, { "epoch": 1.0, "learning_rate": 2.984210526315789e-06, "loss": 0.5431, "step": 99710 }, { "epoch": 1.0, "learning_rate": 2.9052631578947366e-06, "loss": 0.537, "step": 99720 }, { "epoch": 1.0, "learning_rate": 2.826315789473684e-06, "loss": 0.5366, "step": 99730 }, { "epoch": 1.0, "learning_rate": 2.747368421052631e-06, "loss": 0.5352, "step": 99740 }, { "epoch": 1.0, "learning_rate": 2.6684210526315785e-06, "loss": 0.5337, "step": 99750 }, { "epoch": 1.0, "learning_rate": 2.589473684210526e-06, "loss": 0.5264, "step": 99760 }, { "epoch": 1.0, "learning_rate": 2.5105263157894735e-06, "loss": 0.5306, "step": 99770 }, { "epoch": 1.0, "learning_rate": 2.431578947368421e-06, "loss": 0.537, "step": 99780 }, { "epoch": 1.0, "learning_rate": 2.3526315789473684e-06, "loss": 0.5183, "step": 99790 }, { "epoch": 1.0, "learning_rate": 2.2736842105263154e-06, "loss": 0.525, "step": 99800 }, { "epoch": 1.0, "learning_rate": 2.194736842105263e-06, "loss": 0.5179, "step": 99810 }, { "epoch": 1.0, "learning_rate": 2.1157894736842103e-06, "loss": 0.525, "step": 99820 }, { "epoch": 1.0, "learning_rate": 2.0368421052631578e-06, "loss": 0.5342, "step": 99830 }, { "epoch": 1.0, "learning_rate": 1.957894736842105e-06, "loss": 0.553, "step": 99840 }, { "epoch": 1.0, "learning_rate": 1.8789473684210525e-06, "loss": 0.5481, "step": 99850 }, { "epoch": 1.0, "learning_rate": 1.8e-06, "loss": 0.5443, "step": 99860 }, { "epoch": 1.0, "learning_rate": 1.7210526315789474e-06, "loss": 0.5509, "step": 99870 }, { "epoch": 1.0, "learning_rate": 1.6421052631578944e-06, "loss": 0.5453, "step": 99880 }, { "epoch": 1.0, "learning_rate": 1.5631578947368419e-06, "loss": 0.5505, "step": 99890 }, { "epoch": 1.0, "learning_rate": 1.4842105263157893e-06, "loss": 0.5574, "step": 99900 }, { "epoch": 1.0, "learning_rate": 1.4052631578947368e-06, "loss": 0.5573, "step": 99910 }, { "epoch": 1.0, "learning_rate": 1.326315789473684e-06, "loss": 0.553, "step": 99920 }, { "epoch": 1.0, "learning_rate": 1.2473684210526315e-06, "loss": 0.5504, "step": 99930 }, { "epoch": 1.0, "learning_rate": 1.168421052631579e-06, "loss": 0.5388, "step": 99940 }, { "epoch": 1.0, "learning_rate": 1.0894736842105262e-06, "loss": 0.534, "step": 99950 }, { "epoch": 1.0, "learning_rate": 1.0105263157894736e-06, "loss": 0.5417, "step": 99960 }, { "epoch": 1.0, "learning_rate": 9.31578947368421e-07, "loss": 0.5396, "step": 99970 }, { "epoch": 1.0, "learning_rate": 8.526315789473684e-07, "loss": 0.5378, "step": 99980 }, { "epoch": 1.0, "learning_rate": 7.736842105263157e-07, "loss": 0.5274, "step": 99990 }, { "epoch": 1.0, "learning_rate": 6.947368421052631e-07, "loss": 0.532, "step": 100000 }, { "epoch": 1.0, "eval_accuracy": 0.8854931609881757, "eval_loss": 0.51953125, "eval_runtime": 99.2784, "eval_samples_per_second": 805.815, "eval_steps_per_second": 1.581, "step": 100000 }, { "epoch": 1.0, "step": 100000, "total_flos": 3.58736203874304e+19, "train_loss": 0.8820991159725189, "train_runtime": 81002.545, "train_samples_per_second": 1264.158, "train_steps_per_second": 1.235 } ], "logging_steps": 10, "max_steps": 100000, "num_train_epochs": 9223372036854775807, "save_steps": 2500, "total_flos": 3.58736203874304e+19, "trial_name": null, "trial_params": null }