{ "best_metric": 1.806269645690918, "best_model_checkpoint": "gpt_alpaca_gpt4/checkpoint-31880", "epoch": 10.0, "global_step": 31880, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.996863237139272e-06, "loss": 72.7405, "step": 10 }, { "epoch": 0.01, "learning_rate": 9.993726474278545e-06, "loss": 18.2176, "step": 20 }, { "epoch": 0.01, "learning_rate": 9.990589711417818e-06, "loss": 5.3223, "step": 30 }, { "epoch": 0.01, "learning_rate": 9.98745294855709e-06, "loss": 4.4019, "step": 40 }, { "epoch": 0.02, "learning_rate": 9.984316185696362e-06, "loss": 3.6758, "step": 50 }, { "epoch": 0.02, "learning_rate": 9.981179422835635e-06, "loss": 3.4823, "step": 60 }, { "epoch": 0.02, "learning_rate": 9.978042659974908e-06, "loss": 3.024, "step": 70 }, { "epoch": 0.03, "learning_rate": 9.974905897114179e-06, "loss": 2.9245, "step": 80 }, { "epoch": 0.03, "learning_rate": 9.971769134253452e-06, "loss": 2.7949, "step": 90 }, { "epoch": 0.03, "learning_rate": 9.968632371392723e-06, "loss": 2.7182, "step": 100 }, { "epoch": 0.03, "learning_rate": 9.965495608531996e-06, "loss": 2.7099, "step": 110 }, { "epoch": 0.04, "learning_rate": 9.962358845671269e-06, "loss": 2.565, "step": 120 }, { "epoch": 0.04, "learning_rate": 9.95922208281054e-06, "loss": 2.5437, "step": 130 }, { "epoch": 0.04, "learning_rate": 9.956085319949813e-06, "loss": 2.4238, "step": 140 }, { "epoch": 0.05, "learning_rate": 9.952948557089086e-06, "loss": 2.538, "step": 150 }, { "epoch": 0.05, "learning_rate": 9.949811794228357e-06, "loss": 2.4926, "step": 160 }, { "epoch": 0.05, "learning_rate": 9.94667503136763e-06, "loss": 2.4414, "step": 170 }, { "epoch": 0.06, "learning_rate": 9.943538268506901e-06, "loss": 2.4165, "step": 180 }, { "epoch": 0.06, "learning_rate": 9.940401505646174e-06, "loss": 2.4311, "step": 190 }, { "epoch": 0.06, "learning_rate": 9.937264742785447e-06, "loss": 2.461, "step": 200 }, { "epoch": 0.07, "learning_rate": 9.934127979924718e-06, "loss": 2.4626, "step": 210 }, { "epoch": 0.07, "learning_rate": 9.930991217063991e-06, "loss": 2.4015, "step": 220 }, { "epoch": 0.07, "learning_rate": 9.927854454203262e-06, "loss": 2.4421, "step": 230 }, { "epoch": 0.08, "learning_rate": 9.924717691342535e-06, "loss": 2.3519, "step": 240 }, { "epoch": 0.08, "learning_rate": 9.921580928481808e-06, "loss": 2.4021, "step": 250 }, { "epoch": 0.08, "learning_rate": 9.918444165621079e-06, "loss": 2.363, "step": 260 }, { "epoch": 0.08, "learning_rate": 9.915307402760352e-06, "loss": 2.3165, "step": 270 }, { "epoch": 0.09, "learning_rate": 9.912170639899625e-06, "loss": 2.3224, "step": 280 }, { "epoch": 0.09, "learning_rate": 9.909033877038898e-06, "loss": 2.4254, "step": 290 }, { "epoch": 0.09, "learning_rate": 9.905897114178169e-06, "loss": 2.3419, "step": 300 }, { "epoch": 0.1, "learning_rate": 9.90276035131744e-06, "loss": 2.3501, "step": 310 }, { "epoch": 0.1, "learning_rate": 9.899623588456713e-06, "loss": 2.3411, "step": 320 }, { "epoch": 0.1, "learning_rate": 9.896486825595986e-06, "loss": 2.3303, "step": 330 }, { "epoch": 0.11, "learning_rate": 9.893350062735259e-06, "loss": 2.342, "step": 340 }, { "epoch": 0.11, "learning_rate": 9.89021329987453e-06, "loss": 2.3027, "step": 350 }, { "epoch": 0.11, "learning_rate": 9.887076537013803e-06, "loss": 2.3274, "step": 360 }, { "epoch": 0.12, "learning_rate": 9.883939774153076e-06, "loss": 2.3229, "step": 370 }, { "epoch": 0.12, "learning_rate": 9.880803011292347e-06, "loss": 2.273, "step": 380 }, { "epoch": 0.12, "learning_rate": 9.87766624843162e-06, "loss": 2.3415, "step": 390 }, { "epoch": 0.13, "learning_rate": 9.874529485570891e-06, "loss": 2.2101, "step": 400 }, { "epoch": 0.13, "learning_rate": 9.871392722710164e-06, "loss": 2.3486, "step": 410 }, { "epoch": 0.13, "learning_rate": 9.868255959849437e-06, "loss": 2.2376, "step": 420 }, { "epoch": 0.13, "learning_rate": 9.865119196988708e-06, "loss": 2.1976, "step": 430 }, { "epoch": 0.14, "learning_rate": 9.861982434127981e-06, "loss": 2.2299, "step": 440 }, { "epoch": 0.14, "learning_rate": 9.858845671267254e-06, "loss": 2.2861, "step": 450 }, { "epoch": 0.14, "learning_rate": 9.855708908406525e-06, "loss": 2.2025, "step": 460 }, { "epoch": 0.15, "learning_rate": 9.852572145545798e-06, "loss": 2.2542, "step": 470 }, { "epoch": 0.15, "learning_rate": 9.849435382685069e-06, "loss": 2.3063, "step": 480 }, { "epoch": 0.15, "learning_rate": 9.846298619824342e-06, "loss": 2.2572, "step": 490 }, { "epoch": 0.16, "learning_rate": 9.843161856963615e-06, "loss": 2.1989, "step": 500 }, { "epoch": 0.16, "learning_rate": 9.840025094102886e-06, "loss": 2.2597, "step": 510 }, { "epoch": 0.16, "learning_rate": 9.836888331242159e-06, "loss": 2.2688, "step": 520 }, { "epoch": 0.17, "learning_rate": 9.833751568381432e-06, "loss": 2.2682, "step": 530 }, { "epoch": 0.17, "learning_rate": 9.830614805520703e-06, "loss": 2.2296, "step": 540 }, { "epoch": 0.17, "learning_rate": 9.827478042659976e-06, "loss": 2.3093, "step": 550 }, { "epoch": 0.18, "learning_rate": 9.824341279799247e-06, "loss": 2.2688, "step": 560 }, { "epoch": 0.18, "learning_rate": 9.82120451693852e-06, "loss": 2.195, "step": 570 }, { "epoch": 0.18, "learning_rate": 9.818067754077793e-06, "loss": 2.1817, "step": 580 }, { "epoch": 0.19, "learning_rate": 9.814930991217064e-06, "loss": 2.3147, "step": 590 }, { "epoch": 0.19, "learning_rate": 9.811794228356337e-06, "loss": 2.2445, "step": 600 }, { "epoch": 0.19, "learning_rate": 9.808657465495608e-06, "loss": 2.2774, "step": 610 }, { "epoch": 0.19, "learning_rate": 9.805520702634881e-06, "loss": 2.107, "step": 620 }, { "epoch": 0.2, "learning_rate": 9.802383939774154e-06, "loss": 2.284, "step": 630 }, { "epoch": 0.2, "learning_rate": 9.799247176913425e-06, "loss": 2.2508, "step": 640 }, { "epoch": 0.2, "learning_rate": 9.796110414052698e-06, "loss": 2.2765, "step": 650 }, { "epoch": 0.21, "learning_rate": 9.792973651191971e-06, "loss": 2.3185, "step": 660 }, { "epoch": 0.21, "learning_rate": 9.789836888331244e-06, "loss": 2.2105, "step": 670 }, { "epoch": 0.21, "learning_rate": 9.786700125470515e-06, "loss": 2.1659, "step": 680 }, { "epoch": 0.22, "learning_rate": 9.783563362609786e-06, "loss": 2.189, "step": 690 }, { "epoch": 0.22, "learning_rate": 9.78042659974906e-06, "loss": 2.223, "step": 700 }, { "epoch": 0.22, "learning_rate": 9.777289836888332e-06, "loss": 2.2449, "step": 710 }, { "epoch": 0.23, "learning_rate": 9.774153074027605e-06, "loss": 2.2252, "step": 720 }, { "epoch": 0.23, "learning_rate": 9.771016311166876e-06, "loss": 2.1512, "step": 730 }, { "epoch": 0.23, "learning_rate": 9.767879548306149e-06, "loss": 2.1801, "step": 740 }, { "epoch": 0.24, "learning_rate": 9.764742785445422e-06, "loss": 2.2061, "step": 750 }, { "epoch": 0.24, "learning_rate": 9.761606022584693e-06, "loss": 2.2141, "step": 760 }, { "epoch": 0.24, "learning_rate": 9.758469259723966e-06, "loss": 2.1549, "step": 770 }, { "epoch": 0.24, "learning_rate": 9.755332496863237e-06, "loss": 2.2603, "step": 780 }, { "epoch": 0.25, "learning_rate": 9.75219573400251e-06, "loss": 2.2523, "step": 790 }, { "epoch": 0.25, "learning_rate": 9.749058971141783e-06, "loss": 2.1641, "step": 800 }, { "epoch": 0.25, "learning_rate": 9.745922208281054e-06, "loss": 2.1834, "step": 810 }, { "epoch": 0.26, "learning_rate": 9.742785445420327e-06, "loss": 2.2233, "step": 820 }, { "epoch": 0.26, "learning_rate": 9.7396486825596e-06, "loss": 2.1291, "step": 830 }, { "epoch": 0.26, "learning_rate": 9.736511919698871e-06, "loss": 2.1742, "step": 840 }, { "epoch": 0.27, "learning_rate": 9.733375156838144e-06, "loss": 2.2465, "step": 850 }, { "epoch": 0.27, "learning_rate": 9.730238393977415e-06, "loss": 2.1936, "step": 860 }, { "epoch": 0.27, "learning_rate": 9.727101631116688e-06, "loss": 2.1433, "step": 870 }, { "epoch": 0.28, "learning_rate": 9.723964868255961e-06, "loss": 2.1827, "step": 880 }, { "epoch": 0.28, "learning_rate": 9.720828105395232e-06, "loss": 2.2212, "step": 890 }, { "epoch": 0.28, "learning_rate": 9.717691342534505e-06, "loss": 2.1738, "step": 900 }, { "epoch": 0.29, "learning_rate": 9.714554579673778e-06, "loss": 2.13, "step": 910 }, { "epoch": 0.29, "learning_rate": 9.711417816813051e-06, "loss": 2.2494, "step": 920 }, { "epoch": 0.29, "learning_rate": 9.708281053952322e-06, "loss": 2.1904, "step": 930 }, { "epoch": 0.29, "learning_rate": 9.705144291091593e-06, "loss": 2.2029, "step": 940 }, { "epoch": 0.3, "learning_rate": 9.702007528230866e-06, "loss": 2.1897, "step": 950 }, { "epoch": 0.3, "learning_rate": 9.69887076537014e-06, "loss": 2.2508, "step": 960 }, { "epoch": 0.3, "learning_rate": 9.695734002509412e-06, "loss": 2.1216, "step": 970 }, { "epoch": 0.31, "learning_rate": 9.692597239648683e-06, "loss": 2.2157, "step": 980 }, { "epoch": 0.31, "learning_rate": 9.689460476787954e-06, "loss": 2.2398, "step": 990 }, { "epoch": 0.31, "learning_rate": 9.686323713927227e-06, "loss": 2.2389, "step": 1000 }, { "epoch": 0.32, "learning_rate": 9.6831869510665e-06, "loss": 2.1701, "step": 1010 }, { "epoch": 0.32, "learning_rate": 9.680050188205773e-06, "loss": 2.0708, "step": 1020 }, { "epoch": 0.32, "learning_rate": 9.676913425345044e-06, "loss": 2.1543, "step": 1030 }, { "epoch": 0.33, "learning_rate": 9.673776662484317e-06, "loss": 2.1717, "step": 1040 }, { "epoch": 0.33, "learning_rate": 9.67063989962359e-06, "loss": 2.2078, "step": 1050 }, { "epoch": 0.33, "learning_rate": 9.667503136762861e-06, "loss": 2.2218, "step": 1060 }, { "epoch": 0.34, "learning_rate": 9.664366373902134e-06, "loss": 2.177, "step": 1070 }, { "epoch": 0.34, "learning_rate": 9.661229611041405e-06, "loss": 2.1876, "step": 1080 }, { "epoch": 0.34, "learning_rate": 9.658092848180678e-06, "loss": 2.2374, "step": 1090 }, { "epoch": 0.35, "learning_rate": 9.654956085319951e-06, "loss": 2.1146, "step": 1100 }, { "epoch": 0.35, "learning_rate": 9.651819322459222e-06, "loss": 2.2132, "step": 1110 }, { "epoch": 0.35, "learning_rate": 9.648682559598495e-06, "loss": 2.1513, "step": 1120 }, { "epoch": 0.35, "learning_rate": 9.645545796737768e-06, "loss": 2.2177, "step": 1130 }, { "epoch": 0.36, "learning_rate": 9.64240903387704e-06, "loss": 2.1884, "step": 1140 }, { "epoch": 0.36, "learning_rate": 9.639272271016312e-06, "loss": 2.1925, "step": 1150 }, { "epoch": 0.36, "learning_rate": 9.636135508155583e-06, "loss": 2.2103, "step": 1160 }, { "epoch": 0.37, "learning_rate": 9.632998745294856e-06, "loss": 2.2355, "step": 1170 }, { "epoch": 0.37, "learning_rate": 9.62986198243413e-06, "loss": 2.1594, "step": 1180 }, { "epoch": 0.37, "learning_rate": 9.6267252195734e-06, "loss": 2.1985, "step": 1190 }, { "epoch": 0.38, "learning_rate": 9.623588456712673e-06, "loss": 2.1001, "step": 1200 }, { "epoch": 0.38, "learning_rate": 9.620451693851946e-06, "loss": 2.1757, "step": 1210 }, { "epoch": 0.38, "learning_rate": 9.617314930991219e-06, "loss": 2.2362, "step": 1220 }, { "epoch": 0.39, "learning_rate": 9.61417816813049e-06, "loss": 2.1813, "step": 1230 }, { "epoch": 0.39, "learning_rate": 9.611041405269761e-06, "loss": 2.1573, "step": 1240 }, { "epoch": 0.39, "learning_rate": 9.607904642409034e-06, "loss": 2.2222, "step": 1250 }, { "epoch": 0.4, "learning_rate": 9.604767879548307e-06, "loss": 2.1565, "step": 1260 }, { "epoch": 0.4, "learning_rate": 9.60163111668758e-06, "loss": 2.1788, "step": 1270 }, { "epoch": 0.4, "learning_rate": 9.598494353826851e-06, "loss": 2.1476, "step": 1280 }, { "epoch": 0.4, "learning_rate": 9.595357590966123e-06, "loss": 2.1585, "step": 1290 }, { "epoch": 0.41, "learning_rate": 9.592220828105397e-06, "loss": 2.1576, "step": 1300 }, { "epoch": 0.41, "learning_rate": 9.589084065244668e-06, "loss": 2.1962, "step": 1310 }, { "epoch": 0.41, "learning_rate": 9.585947302383941e-06, "loss": 2.1579, "step": 1320 }, { "epoch": 0.42, "learning_rate": 9.582810539523212e-06, "loss": 2.1679, "step": 1330 }, { "epoch": 0.42, "learning_rate": 9.579673776662485e-06, "loss": 2.0721, "step": 1340 }, { "epoch": 0.42, "learning_rate": 9.576537013801758e-06, "loss": 2.1763, "step": 1350 }, { "epoch": 0.43, "learning_rate": 9.57340025094103e-06, "loss": 2.1384, "step": 1360 }, { "epoch": 0.43, "learning_rate": 9.5702634880803e-06, "loss": 2.1536, "step": 1370 }, { "epoch": 0.43, "learning_rate": 9.567126725219574e-06, "loss": 2.1527, "step": 1380 }, { "epoch": 0.44, "learning_rate": 9.563989962358846e-06, "loss": 2.1347, "step": 1390 }, { "epoch": 0.44, "learning_rate": 9.56085319949812e-06, "loss": 2.0246, "step": 1400 }, { "epoch": 0.44, "learning_rate": 9.55771643663739e-06, "loss": 2.1478, "step": 1410 }, { "epoch": 0.45, "learning_rate": 9.554579673776663e-06, "loss": 2.1347, "step": 1420 }, { "epoch": 0.45, "learning_rate": 9.551442910915936e-06, "loss": 2.109, "step": 1430 }, { "epoch": 0.45, "learning_rate": 9.548306148055207e-06, "loss": 2.1055, "step": 1440 }, { "epoch": 0.45, "learning_rate": 9.54516938519448e-06, "loss": 2.1315, "step": 1450 }, { "epoch": 0.46, "learning_rate": 9.542032622333752e-06, "loss": 2.0917, "step": 1460 }, { "epoch": 0.46, "learning_rate": 9.538895859473024e-06, "loss": 2.0579, "step": 1470 }, { "epoch": 0.46, "learning_rate": 9.535759096612297e-06, "loss": 2.0573, "step": 1480 }, { "epoch": 0.47, "learning_rate": 9.532622333751569e-06, "loss": 2.1078, "step": 1490 }, { "epoch": 0.47, "learning_rate": 9.529485570890841e-06, "loss": 2.1503, "step": 1500 }, { "epoch": 0.47, "learning_rate": 9.526348808030114e-06, "loss": 2.0821, "step": 1510 }, { "epoch": 0.48, "learning_rate": 9.523212045169386e-06, "loss": 2.1639, "step": 1520 }, { "epoch": 0.48, "learning_rate": 9.520075282308658e-06, "loss": 2.1036, "step": 1530 }, { "epoch": 0.48, "learning_rate": 9.51693851944793e-06, "loss": 2.0536, "step": 1540 }, { "epoch": 0.49, "learning_rate": 9.513801756587203e-06, "loss": 2.1113, "step": 1550 }, { "epoch": 0.49, "learning_rate": 9.510664993726475e-06, "loss": 2.1774, "step": 1560 }, { "epoch": 0.49, "learning_rate": 9.507528230865747e-06, "loss": 2.0827, "step": 1570 }, { "epoch": 0.5, "learning_rate": 9.50439146800502e-06, "loss": 2.1706, "step": 1580 }, { "epoch": 0.5, "learning_rate": 9.501254705144292e-06, "loss": 2.1416, "step": 1590 }, { "epoch": 0.5, "learning_rate": 9.498117942283565e-06, "loss": 2.0689, "step": 1600 }, { "epoch": 0.51, "learning_rate": 9.494981179422836e-06, "loss": 2.1223, "step": 1610 }, { "epoch": 0.51, "learning_rate": 9.491844416562108e-06, "loss": 2.1743, "step": 1620 }, { "epoch": 0.51, "learning_rate": 9.48870765370138e-06, "loss": 2.1426, "step": 1630 }, { "epoch": 0.51, "learning_rate": 9.485570890840653e-06, "loss": 2.0827, "step": 1640 }, { "epoch": 0.52, "learning_rate": 9.482434127979926e-06, "loss": 2.0918, "step": 1650 }, { "epoch": 0.52, "learning_rate": 9.479297365119198e-06, "loss": 2.1753, "step": 1660 }, { "epoch": 0.52, "learning_rate": 9.476160602258469e-06, "loss": 2.1543, "step": 1670 }, { "epoch": 0.53, "learning_rate": 9.473023839397743e-06, "loss": 2.1697, "step": 1680 }, { "epoch": 0.53, "learning_rate": 9.469887076537015e-06, "loss": 2.1075, "step": 1690 }, { "epoch": 0.53, "learning_rate": 9.466750313676287e-06, "loss": 2.1636, "step": 1700 }, { "epoch": 0.54, "learning_rate": 9.463613550815559e-06, "loss": 2.219, "step": 1710 }, { "epoch": 0.54, "learning_rate": 9.460476787954832e-06, "loss": 2.1835, "step": 1720 }, { "epoch": 0.54, "learning_rate": 9.457340025094104e-06, "loss": 2.1288, "step": 1730 }, { "epoch": 0.55, "learning_rate": 9.454203262233376e-06, "loss": 2.218, "step": 1740 }, { "epoch": 0.55, "learning_rate": 9.451066499372648e-06, "loss": 2.19, "step": 1750 }, { "epoch": 0.55, "learning_rate": 9.44792973651192e-06, "loss": 2.0468, "step": 1760 }, { "epoch": 0.56, "learning_rate": 9.444792973651193e-06, "loss": 2.0883, "step": 1770 }, { "epoch": 0.56, "learning_rate": 9.441656210790465e-06, "loss": 2.1965, "step": 1780 }, { "epoch": 0.56, "learning_rate": 9.438519447929737e-06, "loss": 2.1823, "step": 1790 }, { "epoch": 0.56, "learning_rate": 9.43538268506901e-06, "loss": 2.1347, "step": 1800 }, { "epoch": 0.57, "learning_rate": 9.432245922208282e-06, "loss": 2.1487, "step": 1810 }, { "epoch": 0.57, "learning_rate": 9.429109159347554e-06, "loss": 2.1439, "step": 1820 }, { "epoch": 0.57, "learning_rate": 9.425972396486827e-06, "loss": 2.0782, "step": 1830 }, { "epoch": 0.58, "learning_rate": 9.422835633626098e-06, "loss": 2.1454, "step": 1840 }, { "epoch": 0.58, "learning_rate": 9.41969887076537e-06, "loss": 2.1088, "step": 1850 }, { "epoch": 0.58, "learning_rate": 9.416562107904644e-06, "loss": 2.1606, "step": 1860 }, { "epoch": 0.59, "learning_rate": 9.413425345043915e-06, "loss": 2.1161, "step": 1870 }, { "epoch": 0.59, "learning_rate": 9.410288582183188e-06, "loss": 2.0404, "step": 1880 }, { "epoch": 0.59, "learning_rate": 9.40715181932246e-06, "loss": 2.0949, "step": 1890 }, { "epoch": 0.6, "learning_rate": 9.404015056461733e-06, "loss": 2.1427, "step": 1900 }, { "epoch": 0.6, "learning_rate": 9.400878293601005e-06, "loss": 2.2431, "step": 1910 }, { "epoch": 0.6, "learning_rate": 9.397741530740276e-06, "loss": 2.1508, "step": 1920 }, { "epoch": 0.61, "learning_rate": 9.394604767879549e-06, "loss": 2.1498, "step": 1930 }, { "epoch": 0.61, "learning_rate": 9.391468005018822e-06, "loss": 2.1357, "step": 1940 }, { "epoch": 0.61, "learning_rate": 9.388331242158094e-06, "loss": 2.122, "step": 1950 }, { "epoch": 0.61, "learning_rate": 9.385194479297366e-06, "loss": 2.1007, "step": 1960 }, { "epoch": 0.62, "learning_rate": 9.382057716436639e-06, "loss": 2.1127, "step": 1970 }, { "epoch": 0.62, "learning_rate": 9.378920953575911e-06, "loss": 2.061, "step": 1980 }, { "epoch": 0.62, "learning_rate": 9.375784190715183e-06, "loss": 2.1258, "step": 1990 }, { "epoch": 0.63, "learning_rate": 9.372647427854456e-06, "loss": 2.1015, "step": 2000 }, { "epoch": 0.63, "learning_rate": 9.369510664993727e-06, "loss": 2.103, "step": 2010 }, { "epoch": 0.63, "learning_rate": 9.366373902133e-06, "loss": 2.1019, "step": 2020 }, { "epoch": 0.64, "learning_rate": 9.363237139272273e-06, "loss": 2.0751, "step": 2030 }, { "epoch": 0.64, "learning_rate": 9.360100376411544e-06, "loss": 2.1114, "step": 2040 }, { "epoch": 0.64, "learning_rate": 9.356963613550817e-06, "loss": 2.1199, "step": 2050 }, { "epoch": 0.65, "learning_rate": 9.35382685069009e-06, "loss": 2.1365, "step": 2060 }, { "epoch": 0.65, "learning_rate": 9.35069008782936e-06, "loss": 2.1187, "step": 2070 }, { "epoch": 0.65, "learning_rate": 9.347553324968634e-06, "loss": 2.0958, "step": 2080 }, { "epoch": 0.66, "learning_rate": 9.344416562107905e-06, "loss": 2.0829, "step": 2090 }, { "epoch": 0.66, "learning_rate": 9.341279799247178e-06, "loss": 2.1963, "step": 2100 }, { "epoch": 0.66, "learning_rate": 9.33814303638645e-06, "loss": 2.0786, "step": 2110 }, { "epoch": 0.66, "learning_rate": 9.335006273525722e-06, "loss": 2.1495, "step": 2120 }, { "epoch": 0.67, "learning_rate": 9.331869510664995e-06, "loss": 2.0716, "step": 2130 }, { "epoch": 0.67, "learning_rate": 9.328732747804266e-06, "loss": 2.1562, "step": 2140 }, { "epoch": 0.67, "learning_rate": 9.325595984943539e-06, "loss": 2.148, "step": 2150 }, { "epoch": 0.68, "learning_rate": 9.322459222082812e-06, "loss": 2.0868, "step": 2160 }, { "epoch": 0.68, "learning_rate": 9.319322459222083e-06, "loss": 2.0862, "step": 2170 }, { "epoch": 0.68, "learning_rate": 9.316185696361356e-06, "loss": 2.019, "step": 2180 }, { "epoch": 0.69, "learning_rate": 9.313048933500629e-06, "loss": 2.1371, "step": 2190 }, { "epoch": 0.69, "learning_rate": 9.309912170639902e-06, "loss": 2.1355, "step": 2200 }, { "epoch": 0.69, "learning_rate": 9.306775407779173e-06, "loss": 2.0676, "step": 2210 }, { "epoch": 0.7, "learning_rate": 9.303638644918444e-06, "loss": 2.0913, "step": 2220 }, { "epoch": 0.7, "learning_rate": 9.300501882057717e-06, "loss": 2.0628, "step": 2230 }, { "epoch": 0.7, "learning_rate": 9.29736511919699e-06, "loss": 2.1189, "step": 2240 }, { "epoch": 0.71, "learning_rate": 9.294228356336261e-06, "loss": 2.0403, "step": 2250 }, { "epoch": 0.71, "learning_rate": 9.291091593475534e-06, "loss": 2.1222, "step": 2260 }, { "epoch": 0.71, "learning_rate": 9.287954830614807e-06, "loss": 2.1159, "step": 2270 }, { "epoch": 0.72, "learning_rate": 9.28481806775408e-06, "loss": 2.0344, "step": 2280 }, { "epoch": 0.72, "learning_rate": 9.28168130489335e-06, "loss": 2.079, "step": 2290 }, { "epoch": 0.72, "learning_rate": 9.278544542032622e-06, "loss": 2.1272, "step": 2300 }, { "epoch": 0.72, "learning_rate": 9.275407779171895e-06, "loss": 2.1058, "step": 2310 }, { "epoch": 0.73, "learning_rate": 9.272271016311168e-06, "loss": 2.1079, "step": 2320 }, { "epoch": 0.73, "learning_rate": 9.26913425345044e-06, "loss": 2.1314, "step": 2330 }, { "epoch": 0.73, "learning_rate": 9.265997490589712e-06, "loss": 2.0434, "step": 2340 }, { "epoch": 0.74, "learning_rate": 9.262860727728985e-06, "loss": 2.1497, "step": 2350 }, { "epoch": 0.74, "learning_rate": 9.259723964868258e-06, "loss": 2.0986, "step": 2360 }, { "epoch": 0.74, "learning_rate": 9.256587202007529e-06, "loss": 2.1581, "step": 2370 }, { "epoch": 0.75, "learning_rate": 9.253450439146802e-06, "loss": 2.0301, "step": 2380 }, { "epoch": 0.75, "learning_rate": 9.250313676286073e-06, "loss": 2.0729, "step": 2390 }, { "epoch": 0.75, "learning_rate": 9.247176913425346e-06, "loss": 2.0853, "step": 2400 }, { "epoch": 0.76, "learning_rate": 9.244040150564619e-06, "loss": 2.0289, "step": 2410 }, { "epoch": 0.76, "learning_rate": 9.24090338770389e-06, "loss": 2.0674, "step": 2420 }, { "epoch": 0.76, "learning_rate": 9.237766624843163e-06, "loss": 2.1135, "step": 2430 }, { "epoch": 0.77, "learning_rate": 9.234629861982434e-06, "loss": 2.0942, "step": 2440 }, { "epoch": 0.77, "learning_rate": 9.231493099121707e-06, "loss": 2.0733, "step": 2450 }, { "epoch": 0.77, "learning_rate": 9.22835633626098e-06, "loss": 2.0782, "step": 2460 }, { "epoch": 0.77, "learning_rate": 9.225219573400251e-06, "loss": 2.0589, "step": 2470 }, { "epoch": 0.78, "learning_rate": 9.222082810539524e-06, "loss": 2.114, "step": 2480 }, { "epoch": 0.78, "learning_rate": 9.218946047678797e-06, "loss": 2.1313, "step": 2490 }, { "epoch": 0.78, "learning_rate": 9.215809284818068e-06, "loss": 2.1386, "step": 2500 }, { "epoch": 0.79, "learning_rate": 9.21267252195734e-06, "loss": 2.0737, "step": 2510 }, { "epoch": 0.79, "learning_rate": 9.209535759096612e-06, "loss": 2.0333, "step": 2520 }, { "epoch": 0.79, "learning_rate": 9.206398996235885e-06, "loss": 2.1316, "step": 2530 }, { "epoch": 0.8, "learning_rate": 9.203262233375158e-06, "loss": 2.1812, "step": 2540 }, { "epoch": 0.8, "learning_rate": 9.200125470514429e-06, "loss": 2.1846, "step": 2550 }, { "epoch": 0.8, "learning_rate": 9.196988707653702e-06, "loss": 1.9971, "step": 2560 }, { "epoch": 0.81, "learning_rate": 9.193851944792975e-06, "loss": 2.0572, "step": 2570 }, { "epoch": 0.81, "learning_rate": 9.190715181932248e-06, "loss": 2.0798, "step": 2580 }, { "epoch": 0.81, "learning_rate": 9.187578419071519e-06, "loss": 2.1017, "step": 2590 }, { "epoch": 0.82, "learning_rate": 9.18444165621079e-06, "loss": 2.062, "step": 2600 }, { "epoch": 0.82, "learning_rate": 9.181304893350063e-06, "loss": 2.0854, "step": 2610 }, { "epoch": 0.82, "learning_rate": 9.178168130489336e-06, "loss": 2.0734, "step": 2620 }, { "epoch": 0.82, "learning_rate": 9.175031367628609e-06, "loss": 2.1468, "step": 2630 }, { "epoch": 0.83, "learning_rate": 9.17189460476788e-06, "loss": 2.0528, "step": 2640 }, { "epoch": 0.83, "learning_rate": 9.168757841907153e-06, "loss": 2.0246, "step": 2650 }, { "epoch": 0.83, "learning_rate": 9.165621079046426e-06, "loss": 2.0171, "step": 2660 }, { "epoch": 0.84, "learning_rate": 9.162484316185697e-06, "loss": 2.0079, "step": 2670 }, { "epoch": 0.84, "learning_rate": 9.15934755332497e-06, "loss": 2.0205, "step": 2680 }, { "epoch": 0.84, "learning_rate": 9.156210790464241e-06, "loss": 2.021, "step": 2690 }, { "epoch": 0.85, "learning_rate": 9.153074027603514e-06, "loss": 2.1194, "step": 2700 }, { "epoch": 0.85, "learning_rate": 9.149937264742787e-06, "loss": 2.1149, "step": 2710 }, { "epoch": 0.85, "learning_rate": 9.146800501882058e-06, "loss": 2.0479, "step": 2720 }, { "epoch": 0.86, "learning_rate": 9.143663739021331e-06, "loss": 2.0321, "step": 2730 }, { "epoch": 0.86, "learning_rate": 9.140526976160604e-06, "loss": 2.0622, "step": 2740 }, { "epoch": 0.86, "learning_rate": 9.137390213299875e-06, "loss": 2.0104, "step": 2750 }, { "epoch": 0.87, "learning_rate": 9.134253450439148e-06, "loss": 2.0323, "step": 2760 }, { "epoch": 0.87, "learning_rate": 9.131116687578419e-06, "loss": 2.0314, "step": 2770 }, { "epoch": 0.87, "learning_rate": 9.127979924717692e-06, "loss": 2.0728, "step": 2780 }, { "epoch": 0.88, "learning_rate": 9.124843161856965e-06, "loss": 1.9149, "step": 2790 }, { "epoch": 0.88, "learning_rate": 9.121706398996236e-06, "loss": 2.1232, "step": 2800 }, { "epoch": 0.88, "learning_rate": 9.118569636135509e-06, "loss": 2.1028, "step": 2810 }, { "epoch": 0.88, "learning_rate": 9.11543287327478e-06, "loss": 2.1481, "step": 2820 }, { "epoch": 0.89, "learning_rate": 9.112296110414055e-06, "loss": 2.0296, "step": 2830 }, { "epoch": 0.89, "learning_rate": 9.109159347553326e-06, "loss": 2.108, "step": 2840 }, { "epoch": 0.89, "learning_rate": 9.106022584692597e-06, "loss": 2.0785, "step": 2850 }, { "epoch": 0.9, "learning_rate": 9.10288582183187e-06, "loss": 2.0647, "step": 2860 }, { "epoch": 0.9, "learning_rate": 9.099749058971143e-06, "loss": 2.0573, "step": 2870 }, { "epoch": 0.9, "learning_rate": 9.096612296110416e-06, "loss": 2.008, "step": 2880 }, { "epoch": 0.91, "learning_rate": 9.093475533249687e-06, "loss": 2.1762, "step": 2890 }, { "epoch": 0.91, "learning_rate": 9.090338770388958e-06, "loss": 2.0266, "step": 2900 }, { "epoch": 0.91, "learning_rate": 9.087202007528231e-06, "loss": 1.9967, "step": 2910 }, { "epoch": 0.92, "learning_rate": 9.084065244667504e-06, "loss": 2.1213, "step": 2920 }, { "epoch": 0.92, "learning_rate": 9.080928481806777e-06, "loss": 2.0734, "step": 2930 }, { "epoch": 0.92, "learning_rate": 9.077791718946048e-06, "loss": 2.0931, "step": 2940 }, { "epoch": 0.93, "learning_rate": 9.074654956085321e-06, "loss": 2.1039, "step": 2950 }, { "epoch": 0.93, "learning_rate": 9.071518193224594e-06, "loss": 2.031, "step": 2960 }, { "epoch": 0.93, "learning_rate": 9.068381430363865e-06, "loss": 2.1349, "step": 2970 }, { "epoch": 0.93, "learning_rate": 9.065244667503138e-06, "loss": 2.1053, "step": 2980 }, { "epoch": 0.94, "learning_rate": 9.06210790464241e-06, "loss": 2.1441, "step": 2990 }, { "epoch": 0.94, "learning_rate": 9.058971141781682e-06, "loss": 2.1022, "step": 3000 }, { "epoch": 0.94, "learning_rate": 9.055834378920955e-06, "loss": 2.0802, "step": 3010 }, { "epoch": 0.95, "learning_rate": 9.052697616060226e-06, "loss": 2.1117, "step": 3020 }, { "epoch": 0.95, "learning_rate": 9.049560853199499e-06, "loss": 2.1069, "step": 3030 }, { "epoch": 0.95, "learning_rate": 9.046424090338772e-06, "loss": 2.0744, "step": 3040 }, { "epoch": 0.96, "learning_rate": 9.043287327478043e-06, "loss": 2.0427, "step": 3050 }, { "epoch": 0.96, "learning_rate": 9.040150564617316e-06, "loss": 2.0614, "step": 3060 }, { "epoch": 0.96, "learning_rate": 9.037013801756587e-06, "loss": 2.1204, "step": 3070 }, { "epoch": 0.97, "learning_rate": 9.03387703889586e-06, "loss": 2.0268, "step": 3080 }, { "epoch": 0.97, "learning_rate": 9.030740276035133e-06, "loss": 2.0976, "step": 3090 }, { "epoch": 0.97, "learning_rate": 9.027603513174404e-06, "loss": 2.0615, "step": 3100 }, { "epoch": 0.98, "learning_rate": 9.024466750313677e-06, "loss": 2.0721, "step": 3110 }, { "epoch": 0.98, "learning_rate": 9.02132998745295e-06, "loss": 2.0986, "step": 3120 }, { "epoch": 0.98, "learning_rate": 9.018193224592221e-06, "loss": 2.0805, "step": 3130 }, { "epoch": 0.98, "learning_rate": 9.015056461731494e-06, "loss": 2.0576, "step": 3140 }, { "epoch": 0.99, "learning_rate": 9.011919698870765e-06, "loss": 2.0693, "step": 3150 }, { "epoch": 0.99, "learning_rate": 9.008782936010038e-06, "loss": 2.0951, "step": 3160 }, { "epoch": 0.99, "learning_rate": 9.005646173149311e-06, "loss": 2.067, "step": 3170 }, { "epoch": 1.0, "learning_rate": 9.002509410288582e-06, "loss": 2.0476, "step": 3180 }, { "epoch": 1.0, "eval_loss": 1.9344778060913086, "eval_runtime": 13.6121, "eval_samples_per_second": 73.464, "eval_steps_per_second": 4.628, "step": 3188 }, { "epoch": 1.0, "learning_rate": 8.999372647427855e-06, "loss": 2.0991, "step": 3190 }, { "epoch": 1.0, "learning_rate": 8.996235884567126e-06, "loss": 2.1577, "step": 3200 }, { "epoch": 1.01, "learning_rate": 8.993099121706401e-06, "loss": 2.0382, "step": 3210 }, { "epoch": 1.01, "learning_rate": 8.989962358845672e-06, "loss": 2.0758, "step": 3220 }, { "epoch": 1.01, "learning_rate": 8.986825595984943e-06, "loss": 2.0445, "step": 3230 }, { "epoch": 1.02, "learning_rate": 8.983688833124216e-06, "loss": 1.9499, "step": 3240 }, { "epoch": 1.02, "learning_rate": 8.980552070263489e-06, "loss": 2.0883, "step": 3250 }, { "epoch": 1.02, "learning_rate": 8.977415307402762e-06, "loss": 2.1311, "step": 3260 }, { "epoch": 1.03, "learning_rate": 8.974278544542033e-06, "loss": 2.0249, "step": 3270 }, { "epoch": 1.03, "learning_rate": 8.971141781681304e-06, "loss": 2.0753, "step": 3280 }, { "epoch": 1.03, "learning_rate": 8.968005018820577e-06, "loss": 2.0507, "step": 3290 }, { "epoch": 1.04, "learning_rate": 8.96486825595985e-06, "loss": 2.0353, "step": 3300 }, { "epoch": 1.04, "learning_rate": 8.961731493099123e-06, "loss": 2.0154, "step": 3310 }, { "epoch": 1.04, "learning_rate": 8.958594730238394e-06, "loss": 2.035, "step": 3320 }, { "epoch": 1.04, "learning_rate": 8.955457967377667e-06, "loss": 2.0578, "step": 3330 }, { "epoch": 1.05, "learning_rate": 8.95232120451694e-06, "loss": 1.9501, "step": 3340 }, { "epoch": 1.05, "learning_rate": 8.949184441656211e-06, "loss": 1.9773, "step": 3350 }, { "epoch": 1.05, "learning_rate": 8.946047678795484e-06, "loss": 2.0441, "step": 3360 }, { "epoch": 1.06, "learning_rate": 8.942910915934755e-06, "loss": 2.1017, "step": 3370 }, { "epoch": 1.06, "learning_rate": 8.939774153074028e-06, "loss": 2.0324, "step": 3380 }, { "epoch": 1.06, "learning_rate": 8.936637390213301e-06, "loss": 2.1506, "step": 3390 }, { "epoch": 1.07, "learning_rate": 8.933500627352572e-06, "loss": 2.0552, "step": 3400 }, { "epoch": 1.07, "learning_rate": 8.930363864491845e-06, "loss": 2.031, "step": 3410 }, { "epoch": 1.07, "learning_rate": 8.927227101631118e-06, "loss": 1.9732, "step": 3420 }, { "epoch": 1.08, "learning_rate": 8.92409033877039e-06, "loss": 2.02, "step": 3430 }, { "epoch": 1.08, "learning_rate": 8.920953575909662e-06, "loss": 2.0123, "step": 3440 }, { "epoch": 1.08, "learning_rate": 8.917816813048933e-06, "loss": 2.0242, "step": 3450 }, { "epoch": 1.09, "learning_rate": 8.914680050188206e-06, "loss": 2.0093, "step": 3460 }, { "epoch": 1.09, "learning_rate": 8.91154328732748e-06, "loss": 2.0639, "step": 3470 }, { "epoch": 1.09, "learning_rate": 8.90840652446675e-06, "loss": 2.0396, "step": 3480 }, { "epoch": 1.09, "learning_rate": 8.905269761606023e-06, "loss": 2.0431, "step": 3490 }, { "epoch": 1.1, "learning_rate": 8.902132998745296e-06, "loss": 1.9602, "step": 3500 }, { "epoch": 1.1, "learning_rate": 8.898996235884569e-06, "loss": 2.0483, "step": 3510 }, { "epoch": 1.1, "learning_rate": 8.89585947302384e-06, "loss": 2.0904, "step": 3520 }, { "epoch": 1.11, "learning_rate": 8.892722710163111e-06, "loss": 2.0676, "step": 3530 }, { "epoch": 1.11, "learning_rate": 8.889585947302384e-06, "loss": 2.095, "step": 3540 }, { "epoch": 1.11, "learning_rate": 8.886449184441657e-06, "loss": 2.0368, "step": 3550 }, { "epoch": 1.12, "learning_rate": 8.88331242158093e-06, "loss": 1.9903, "step": 3560 }, { "epoch": 1.12, "learning_rate": 8.880175658720201e-06, "loss": 2.0505, "step": 3570 }, { "epoch": 1.12, "learning_rate": 8.877038895859473e-06, "loss": 2.0756, "step": 3580 }, { "epoch": 1.13, "learning_rate": 8.873902132998745e-06, "loss": 2.0298, "step": 3590 }, { "epoch": 1.13, "learning_rate": 8.870765370138018e-06, "loss": 2.1276, "step": 3600 }, { "epoch": 1.13, "learning_rate": 8.867628607277291e-06, "loss": 1.9594, "step": 3610 }, { "epoch": 1.14, "learning_rate": 8.864491844416562e-06, "loss": 2.047, "step": 3620 }, { "epoch": 1.14, "learning_rate": 8.861355081555835e-06, "loss": 2.056, "step": 3630 }, { "epoch": 1.14, "learning_rate": 8.858218318695108e-06, "loss": 2.0638, "step": 3640 }, { "epoch": 1.14, "learning_rate": 8.85508155583438e-06, "loss": 2.0316, "step": 3650 }, { "epoch": 1.15, "learning_rate": 8.851944792973652e-06, "loss": 2.0345, "step": 3660 }, { "epoch": 1.15, "learning_rate": 8.848808030112923e-06, "loss": 2.0186, "step": 3670 }, { "epoch": 1.15, "learning_rate": 8.845671267252196e-06, "loss": 2.0418, "step": 3680 }, { "epoch": 1.16, "learning_rate": 8.84253450439147e-06, "loss": 1.9689, "step": 3690 }, { "epoch": 1.16, "learning_rate": 8.83939774153074e-06, "loss": 2.0088, "step": 3700 }, { "epoch": 1.16, "learning_rate": 8.836260978670013e-06, "loss": 2.0619, "step": 3710 }, { "epoch": 1.17, "learning_rate": 8.833124215809286e-06, "loss": 2.0637, "step": 3720 }, { "epoch": 1.17, "learning_rate": 8.829987452948557e-06, "loss": 1.9639, "step": 3730 }, { "epoch": 1.17, "learning_rate": 8.82685069008783e-06, "loss": 2.02, "step": 3740 }, { "epoch": 1.18, "learning_rate": 8.823713927227102e-06, "loss": 1.9772, "step": 3750 }, { "epoch": 1.18, "learning_rate": 8.820577164366374e-06, "loss": 1.9963, "step": 3760 }, { "epoch": 1.18, "learning_rate": 8.817440401505647e-06, "loss": 2.0384, "step": 3770 }, { "epoch": 1.19, "learning_rate": 8.814303638644919e-06, "loss": 2.1394, "step": 3780 }, { "epoch": 1.19, "learning_rate": 8.811166875784191e-06, "loss": 2.0293, "step": 3790 }, { "epoch": 1.19, "learning_rate": 8.808030112923464e-06, "loss": 2.0535, "step": 3800 }, { "epoch": 1.2, "learning_rate": 8.804893350062737e-06, "loss": 2.0553, "step": 3810 }, { "epoch": 1.2, "learning_rate": 8.801756587202008e-06, "loss": 1.9466, "step": 3820 }, { "epoch": 1.2, "learning_rate": 8.79861982434128e-06, "loss": 2.099, "step": 3830 }, { "epoch": 1.2, "learning_rate": 8.795483061480552e-06, "loss": 2.0236, "step": 3840 }, { "epoch": 1.21, "learning_rate": 8.792346298619825e-06, "loss": 1.9915, "step": 3850 }, { "epoch": 1.21, "learning_rate": 8.789209535759098e-06, "loss": 2.07, "step": 3860 }, { "epoch": 1.21, "learning_rate": 8.78607277289837e-06, "loss": 2.0757, "step": 3870 }, { "epoch": 1.22, "learning_rate": 8.782936010037642e-06, "loss": 2.0571, "step": 3880 }, { "epoch": 1.22, "learning_rate": 8.779799247176915e-06, "loss": 2.0173, "step": 3890 }, { "epoch": 1.22, "learning_rate": 8.776662484316186e-06, "loss": 2.014, "step": 3900 }, { "epoch": 1.23, "learning_rate": 8.77352572145546e-06, "loss": 2.0263, "step": 3910 }, { "epoch": 1.23, "learning_rate": 8.77038895859473e-06, "loss": 2.024, "step": 3920 }, { "epoch": 1.23, "learning_rate": 8.767252195734003e-06, "loss": 2.0025, "step": 3930 }, { "epoch": 1.24, "learning_rate": 8.764115432873276e-06, "loss": 2.0337, "step": 3940 }, { "epoch": 1.24, "learning_rate": 8.760978670012547e-06, "loss": 2.0432, "step": 3950 }, { "epoch": 1.24, "learning_rate": 8.75784190715182e-06, "loss": 2.0928, "step": 3960 }, { "epoch": 1.25, "learning_rate": 8.754705144291092e-06, "loss": 2.0108, "step": 3970 }, { "epoch": 1.25, "learning_rate": 8.751568381430364e-06, "loss": 2.0603, "step": 3980 }, { "epoch": 1.25, "learning_rate": 8.748431618569637e-06, "loss": 2.1245, "step": 3990 }, { "epoch": 1.25, "learning_rate": 8.745294855708909e-06, "loss": 2.0799, "step": 4000 }, { "epoch": 1.26, "learning_rate": 8.742158092848181e-06, "loss": 2.083, "step": 4010 }, { "epoch": 1.26, "learning_rate": 8.739021329987454e-06, "loss": 1.9249, "step": 4020 }, { "epoch": 1.26, "learning_rate": 8.735884567126726e-06, "loss": 2.0568, "step": 4030 }, { "epoch": 1.27, "learning_rate": 8.732747804265998e-06, "loss": 2.037, "step": 4040 }, { "epoch": 1.27, "learning_rate": 8.72961104140527e-06, "loss": 2.0338, "step": 4050 }, { "epoch": 1.27, "learning_rate": 8.726474278544543e-06, "loss": 1.9625, "step": 4060 }, { "epoch": 1.28, "learning_rate": 8.723337515683815e-06, "loss": 2.0356, "step": 4070 }, { "epoch": 1.28, "learning_rate": 8.720200752823087e-06, "loss": 2.0143, "step": 4080 }, { "epoch": 1.28, "learning_rate": 8.71706398996236e-06, "loss": 2.0188, "step": 4090 }, { "epoch": 1.29, "learning_rate": 8.713927227101632e-06, "loss": 1.9693, "step": 4100 }, { "epoch": 1.29, "learning_rate": 8.710790464240904e-06, "loss": 1.9619, "step": 4110 }, { "epoch": 1.29, "learning_rate": 8.707653701380176e-06, "loss": 2.012, "step": 4120 }, { "epoch": 1.3, "learning_rate": 8.704516938519448e-06, "loss": 2.0252, "step": 4130 }, { "epoch": 1.3, "learning_rate": 8.70138017565872e-06, "loss": 2.0667, "step": 4140 }, { "epoch": 1.3, "learning_rate": 8.698243412797993e-06, "loss": 1.9636, "step": 4150 }, { "epoch": 1.3, "learning_rate": 8.695106649937265e-06, "loss": 2.0088, "step": 4160 }, { "epoch": 1.31, "learning_rate": 8.691969887076538e-06, "loss": 2.0431, "step": 4170 }, { "epoch": 1.31, "learning_rate": 8.68883312421581e-06, "loss": 1.9949, "step": 4180 }, { "epoch": 1.31, "learning_rate": 8.685696361355083e-06, "loss": 2.0387, "step": 4190 }, { "epoch": 1.32, "learning_rate": 8.682559598494355e-06, "loss": 2.0097, "step": 4200 }, { "epoch": 1.32, "learning_rate": 8.679422835633626e-06, "loss": 1.9744, "step": 4210 }, { "epoch": 1.32, "learning_rate": 8.676286072772899e-06, "loss": 2.0095, "step": 4220 }, { "epoch": 1.33, "learning_rate": 8.673149309912172e-06, "loss": 2.0816, "step": 4230 }, { "epoch": 1.33, "learning_rate": 8.670012547051444e-06, "loss": 2.0112, "step": 4240 }, { "epoch": 1.33, "learning_rate": 8.666875784190716e-06, "loss": 1.9396, "step": 4250 }, { "epoch": 1.34, "learning_rate": 8.663739021329989e-06, "loss": 2.0894, "step": 4260 }, { "epoch": 1.34, "learning_rate": 8.660602258469261e-06, "loss": 2.0172, "step": 4270 }, { "epoch": 1.34, "learning_rate": 8.657465495608533e-06, "loss": 2.0465, "step": 4280 }, { "epoch": 1.35, "learning_rate": 8.654328732747805e-06, "loss": 1.9854, "step": 4290 }, { "epoch": 1.35, "learning_rate": 8.651191969887077e-06, "loss": 2.0557, "step": 4300 }, { "epoch": 1.35, "learning_rate": 8.64805520702635e-06, "loss": 2.0323, "step": 4310 }, { "epoch": 1.36, "learning_rate": 8.644918444165622e-06, "loss": 2.0219, "step": 4320 }, { "epoch": 1.36, "learning_rate": 8.641781681304894e-06, "loss": 2.0503, "step": 4330 }, { "epoch": 1.36, "learning_rate": 8.638644918444167e-06, "loss": 2.0392, "step": 4340 }, { "epoch": 1.36, "learning_rate": 8.635508155583438e-06, "loss": 2.0205, "step": 4350 }, { "epoch": 1.37, "learning_rate": 8.63237139272271e-06, "loss": 2.0776, "step": 4360 }, { "epoch": 1.37, "learning_rate": 8.629234629861984e-06, "loss": 2.0107, "step": 4370 }, { "epoch": 1.37, "learning_rate": 8.626097867001255e-06, "loss": 1.9607, "step": 4380 }, { "epoch": 1.38, "learning_rate": 8.622961104140528e-06, "loss": 2.0339, "step": 4390 }, { "epoch": 1.38, "learning_rate": 8.6198243412798e-06, "loss": 1.9847, "step": 4400 }, { "epoch": 1.38, "learning_rate": 8.616687578419072e-06, "loss": 1.9772, "step": 4410 }, { "epoch": 1.39, "learning_rate": 8.613550815558345e-06, "loss": 2.0, "step": 4420 }, { "epoch": 1.39, "learning_rate": 8.610414052697616e-06, "loss": 1.9789, "step": 4430 }, { "epoch": 1.39, "learning_rate": 8.607277289836889e-06, "loss": 2.021, "step": 4440 }, { "epoch": 1.4, "learning_rate": 8.604140526976162e-06, "loss": 2.0075, "step": 4450 }, { "epoch": 1.4, "learning_rate": 8.601003764115433e-06, "loss": 1.948, "step": 4460 }, { "epoch": 1.4, "learning_rate": 8.597867001254706e-06, "loss": 1.9574, "step": 4470 }, { "epoch": 1.41, "learning_rate": 8.594730238393979e-06, "loss": 1.9524, "step": 4480 }, { "epoch": 1.41, "learning_rate": 8.591593475533251e-06, "loss": 2.0068, "step": 4490 }, { "epoch": 1.41, "learning_rate": 8.588456712672523e-06, "loss": 2.0517, "step": 4500 }, { "epoch": 1.41, "learning_rate": 8.585319949811794e-06, "loss": 2.0514, "step": 4510 }, { "epoch": 1.42, "learning_rate": 8.582183186951067e-06, "loss": 2.0359, "step": 4520 }, { "epoch": 1.42, "learning_rate": 8.57904642409034e-06, "loss": 2.0724, "step": 4530 }, { "epoch": 1.42, "learning_rate": 8.575909661229613e-06, "loss": 2.0501, "step": 4540 }, { "epoch": 1.43, "learning_rate": 8.572772898368884e-06, "loss": 1.9899, "step": 4550 }, { "epoch": 1.43, "learning_rate": 8.569636135508157e-06, "loss": 2.0427, "step": 4560 }, { "epoch": 1.43, "learning_rate": 8.56649937264743e-06, "loss": 2.0431, "step": 4570 }, { "epoch": 1.44, "learning_rate": 8.5633626097867e-06, "loss": 2.0224, "step": 4580 }, { "epoch": 1.44, "learning_rate": 8.560225846925974e-06, "loss": 2.0217, "step": 4590 }, { "epoch": 1.44, "learning_rate": 8.557089084065245e-06, "loss": 2.0551, "step": 4600 }, { "epoch": 1.45, "learning_rate": 8.553952321204518e-06, "loss": 2.0365, "step": 4610 }, { "epoch": 1.45, "learning_rate": 8.55081555834379e-06, "loss": 2.0334, "step": 4620 }, { "epoch": 1.45, "learning_rate": 8.547678795483062e-06, "loss": 2.0267, "step": 4630 }, { "epoch": 1.46, "learning_rate": 8.544542032622335e-06, "loss": 2.0201, "step": 4640 }, { "epoch": 1.46, "learning_rate": 8.541405269761608e-06, "loss": 1.9535, "step": 4650 }, { "epoch": 1.46, "learning_rate": 8.538268506900879e-06, "loss": 2.0361, "step": 4660 }, { "epoch": 1.46, "learning_rate": 8.535131744040152e-06, "loss": 2.0938, "step": 4670 }, { "epoch": 1.47, "learning_rate": 8.531994981179423e-06, "loss": 1.9426, "step": 4680 }, { "epoch": 1.47, "learning_rate": 8.528858218318696e-06, "loss": 2.0573, "step": 4690 }, { "epoch": 1.47, "learning_rate": 8.525721455457969e-06, "loss": 1.96, "step": 4700 }, { "epoch": 1.48, "learning_rate": 8.52258469259724e-06, "loss": 2.039, "step": 4710 }, { "epoch": 1.48, "learning_rate": 8.519447929736513e-06, "loss": 2.0648, "step": 4720 }, { "epoch": 1.48, "learning_rate": 8.516311166875784e-06, "loss": 1.9469, "step": 4730 }, { "epoch": 1.49, "learning_rate": 8.513174404015057e-06, "loss": 2.0733, "step": 4740 }, { "epoch": 1.49, "learning_rate": 8.51003764115433e-06, "loss": 2.0004, "step": 4750 }, { "epoch": 1.49, "learning_rate": 8.506900878293601e-06, "loss": 2.014, "step": 4760 }, { "epoch": 1.5, "learning_rate": 8.503764115432874e-06, "loss": 1.9856, "step": 4770 }, { "epoch": 1.5, "learning_rate": 8.500627352572147e-06, "loss": 2.0515, "step": 4780 }, { "epoch": 1.5, "learning_rate": 8.49749058971142e-06, "loss": 2.0236, "step": 4790 }, { "epoch": 1.51, "learning_rate": 8.49435382685069e-06, "loss": 2.0687, "step": 4800 }, { "epoch": 1.51, "learning_rate": 8.491217063989962e-06, "loss": 1.9949, "step": 4810 }, { "epoch": 1.51, "learning_rate": 8.488080301129235e-06, "loss": 2.0212, "step": 4820 }, { "epoch": 1.52, "learning_rate": 8.484943538268508e-06, "loss": 2.0332, "step": 4830 }, { "epoch": 1.52, "learning_rate": 8.48180677540778e-06, "loss": 2.0289, "step": 4840 }, { "epoch": 1.52, "learning_rate": 8.478670012547052e-06, "loss": 2.0191, "step": 4850 }, { "epoch": 1.52, "learning_rate": 8.475533249686325e-06, "loss": 1.9577, "step": 4860 }, { "epoch": 1.53, "learning_rate": 8.472396486825598e-06, "loss": 2.0939, "step": 4870 }, { "epoch": 1.53, "learning_rate": 8.469259723964869e-06, "loss": 1.9506, "step": 4880 }, { "epoch": 1.53, "learning_rate": 8.46612296110414e-06, "loss": 1.8801, "step": 4890 }, { "epoch": 1.54, "learning_rate": 8.462986198243413e-06, "loss": 2.0006, "step": 4900 }, { "epoch": 1.54, "learning_rate": 8.459849435382686e-06, "loss": 2.0214, "step": 4910 }, { "epoch": 1.54, "learning_rate": 8.456712672521959e-06, "loss": 2.0355, "step": 4920 }, { "epoch": 1.55, "learning_rate": 8.45357590966123e-06, "loss": 2.0364, "step": 4930 }, { "epoch": 1.55, "learning_rate": 8.450439146800503e-06, "loss": 1.9857, "step": 4940 }, { "epoch": 1.55, "learning_rate": 8.447302383939776e-06, "loss": 1.9923, "step": 4950 }, { "epoch": 1.56, "learning_rate": 8.444165621079047e-06, "loss": 2.0392, "step": 4960 }, { "epoch": 1.56, "learning_rate": 8.44102885821832e-06, "loss": 2.041, "step": 4970 }, { "epoch": 1.56, "learning_rate": 8.437892095357591e-06, "loss": 2.0334, "step": 4980 }, { "epoch": 1.57, "learning_rate": 8.434755332496864e-06, "loss": 1.9577, "step": 4990 }, { "epoch": 1.57, "learning_rate": 8.431618569636137e-06, "loss": 2.0012, "step": 5000 }, { "epoch": 1.57, "learning_rate": 8.428481806775408e-06, "loss": 2.0923, "step": 5010 }, { "epoch": 1.57, "learning_rate": 8.425345043914681e-06, "loss": 1.9933, "step": 5020 }, { "epoch": 1.58, "learning_rate": 8.422208281053954e-06, "loss": 2.0297, "step": 5030 }, { "epoch": 1.58, "learning_rate": 8.419071518193225e-06, "loss": 1.9755, "step": 5040 }, { "epoch": 1.58, "learning_rate": 8.415934755332498e-06, "loss": 2.0193, "step": 5050 }, { "epoch": 1.59, "learning_rate": 8.412797992471769e-06, "loss": 2.048, "step": 5060 }, { "epoch": 1.59, "learning_rate": 8.409661229611042e-06, "loss": 1.9316, "step": 5070 }, { "epoch": 1.59, "learning_rate": 8.406524466750315e-06, "loss": 2.0211, "step": 5080 }, { "epoch": 1.6, "learning_rate": 8.403387703889586e-06, "loss": 2.0684, "step": 5090 }, { "epoch": 1.6, "learning_rate": 8.400250941028859e-06, "loss": 1.9619, "step": 5100 }, { "epoch": 1.6, "learning_rate": 8.39711417816813e-06, "loss": 1.9671, "step": 5110 }, { "epoch": 1.61, "learning_rate": 8.393977415307403e-06, "loss": 1.9211, "step": 5120 }, { "epoch": 1.61, "learning_rate": 8.390840652446676e-06, "loss": 1.9531, "step": 5130 }, { "epoch": 1.61, "learning_rate": 8.387703889585947e-06, "loss": 2.0234, "step": 5140 }, { "epoch": 1.62, "learning_rate": 8.38456712672522e-06, "loss": 2.0263, "step": 5150 }, { "epoch": 1.62, "learning_rate": 8.381430363864493e-06, "loss": 2.038, "step": 5160 }, { "epoch": 1.62, "learning_rate": 8.378293601003766e-06, "loss": 2.0003, "step": 5170 }, { "epoch": 1.62, "learning_rate": 8.375156838143037e-06, "loss": 2.003, "step": 5180 }, { "epoch": 1.63, "learning_rate": 8.372020075282308e-06, "loss": 2.0148, "step": 5190 }, { "epoch": 1.63, "learning_rate": 8.368883312421581e-06, "loss": 2.0672, "step": 5200 }, { "epoch": 1.63, "learning_rate": 8.365746549560854e-06, "loss": 2.0141, "step": 5210 }, { "epoch": 1.64, "learning_rate": 8.362609786700127e-06, "loss": 1.9545, "step": 5220 }, { "epoch": 1.64, "learning_rate": 8.359473023839398e-06, "loss": 1.87, "step": 5230 }, { "epoch": 1.64, "learning_rate": 8.356336260978671e-06, "loss": 1.9519, "step": 5240 }, { "epoch": 1.65, "learning_rate": 8.353199498117944e-06, "loss": 2.0665, "step": 5250 }, { "epoch": 1.65, "learning_rate": 8.350062735257215e-06, "loss": 1.9662, "step": 5260 }, { "epoch": 1.65, "learning_rate": 8.346925972396488e-06, "loss": 1.9447, "step": 5270 }, { "epoch": 1.66, "learning_rate": 8.343789209535759e-06, "loss": 2.0348, "step": 5280 }, { "epoch": 1.66, "learning_rate": 8.340652446675032e-06, "loss": 2.1091, "step": 5290 }, { "epoch": 1.66, "learning_rate": 8.337515683814305e-06, "loss": 2.0061, "step": 5300 }, { "epoch": 1.67, "learning_rate": 8.334378920953576e-06, "loss": 1.9448, "step": 5310 }, { "epoch": 1.67, "learning_rate": 8.331242158092849e-06, "loss": 1.9846, "step": 5320 }, { "epoch": 1.67, "learning_rate": 8.328105395232122e-06, "loss": 2.0324, "step": 5330 }, { "epoch": 1.68, "learning_rate": 8.324968632371393e-06, "loss": 2.0742, "step": 5340 }, { "epoch": 1.68, "learning_rate": 8.321831869510666e-06, "loss": 2.0402, "step": 5350 }, { "epoch": 1.68, "learning_rate": 8.318695106649937e-06, "loss": 2.0432, "step": 5360 }, { "epoch": 1.68, "learning_rate": 8.31555834378921e-06, "loss": 2.0438, "step": 5370 }, { "epoch": 1.69, "learning_rate": 8.312421580928483e-06, "loss": 2.0219, "step": 5380 }, { "epoch": 1.69, "learning_rate": 8.309284818067754e-06, "loss": 1.9956, "step": 5390 }, { "epoch": 1.69, "learning_rate": 8.306148055207027e-06, "loss": 2.099, "step": 5400 }, { "epoch": 1.7, "learning_rate": 8.3030112923463e-06, "loss": 2.0049, "step": 5410 }, { "epoch": 1.7, "learning_rate": 8.299874529485573e-06, "loss": 2.0073, "step": 5420 }, { "epoch": 1.7, "learning_rate": 8.296737766624844e-06, "loss": 1.922, "step": 5430 }, { "epoch": 1.71, "learning_rate": 8.293601003764115e-06, "loss": 2.0012, "step": 5440 }, { "epoch": 1.71, "learning_rate": 8.290464240903388e-06, "loss": 1.9273, "step": 5450 }, { "epoch": 1.71, "learning_rate": 8.287327478042661e-06, "loss": 1.9773, "step": 5460 }, { "epoch": 1.72, "learning_rate": 8.284190715181934e-06, "loss": 1.9979, "step": 5470 }, { "epoch": 1.72, "learning_rate": 8.281053952321205e-06, "loss": 2.0172, "step": 5480 }, { "epoch": 1.72, "learning_rate": 8.277917189460476e-06, "loss": 2.0063, "step": 5490 }, { "epoch": 1.73, "learning_rate": 8.27478042659975e-06, "loss": 1.9983, "step": 5500 }, { "epoch": 1.73, "learning_rate": 8.271643663739022e-06, "loss": 1.9818, "step": 5510 }, { "epoch": 1.73, "learning_rate": 8.268506900878295e-06, "loss": 1.9305, "step": 5520 }, { "epoch": 1.73, "learning_rate": 8.265370138017566e-06, "loss": 2.0492, "step": 5530 }, { "epoch": 1.74, "learning_rate": 8.262233375156839e-06, "loss": 2.0331, "step": 5540 }, { "epoch": 1.74, "learning_rate": 8.259096612296112e-06, "loss": 2.0205, "step": 5550 }, { "epoch": 1.74, "learning_rate": 8.255959849435383e-06, "loss": 1.9663, "step": 5560 }, { "epoch": 1.75, "learning_rate": 8.252823086574656e-06, "loss": 2.0373, "step": 5570 }, { "epoch": 1.75, "learning_rate": 8.249686323713927e-06, "loss": 2.0176, "step": 5580 }, { "epoch": 1.75, "learning_rate": 8.2465495608532e-06, "loss": 1.9972, "step": 5590 }, { "epoch": 1.76, "learning_rate": 8.243412797992473e-06, "loss": 2.0384, "step": 5600 }, { "epoch": 1.76, "learning_rate": 8.240276035131744e-06, "loss": 2.0283, "step": 5610 }, { "epoch": 1.76, "learning_rate": 8.237139272271017e-06, "loss": 1.8957, "step": 5620 }, { "epoch": 1.77, "learning_rate": 8.23400250941029e-06, "loss": 2.0583, "step": 5630 }, { "epoch": 1.77, "learning_rate": 8.230865746549561e-06, "loss": 1.9984, "step": 5640 }, { "epoch": 1.77, "learning_rate": 8.227728983688834e-06, "loss": 2.0165, "step": 5650 }, { "epoch": 1.78, "learning_rate": 8.224592220828105e-06, "loss": 2.0825, "step": 5660 }, { "epoch": 1.78, "learning_rate": 8.221455457967378e-06, "loss": 1.9821, "step": 5670 }, { "epoch": 1.78, "learning_rate": 8.218318695106651e-06, "loss": 1.9673, "step": 5680 }, { "epoch": 1.78, "learning_rate": 8.215181932245922e-06, "loss": 1.9948, "step": 5690 }, { "epoch": 1.79, "learning_rate": 8.212045169385195e-06, "loss": 2.0304, "step": 5700 }, { "epoch": 1.79, "learning_rate": 8.208908406524468e-06, "loss": 2.0357, "step": 5710 }, { "epoch": 1.79, "learning_rate": 8.205771643663741e-06, "loss": 2.0254, "step": 5720 }, { "epoch": 1.8, "learning_rate": 8.202634880803012e-06, "loss": 2.0155, "step": 5730 }, { "epoch": 1.8, "learning_rate": 8.199498117942283e-06, "loss": 2.0461, "step": 5740 }, { "epoch": 1.8, "learning_rate": 8.196361355081556e-06, "loss": 1.9988, "step": 5750 }, { "epoch": 1.81, "learning_rate": 8.193224592220829e-06, "loss": 2.0348, "step": 5760 }, { "epoch": 1.81, "learning_rate": 8.1900878293601e-06, "loss": 1.979, "step": 5770 }, { "epoch": 1.81, "learning_rate": 8.186951066499373e-06, "loss": 2.0056, "step": 5780 }, { "epoch": 1.82, "learning_rate": 8.183814303638644e-06, "loss": 2.0097, "step": 5790 }, { "epoch": 1.82, "learning_rate": 8.180677540777919e-06, "loss": 2.0079, "step": 5800 }, { "epoch": 1.82, "learning_rate": 8.17754077791719e-06, "loss": 1.9262, "step": 5810 }, { "epoch": 1.83, "learning_rate": 8.174404015056461e-06, "loss": 1.9064, "step": 5820 }, { "epoch": 1.83, "learning_rate": 8.171267252195734e-06, "loss": 2.0339, "step": 5830 }, { "epoch": 1.83, "learning_rate": 8.168130489335007e-06, "loss": 1.9952, "step": 5840 }, { "epoch": 1.84, "learning_rate": 8.16499372647428e-06, "loss": 1.9242, "step": 5850 }, { "epoch": 1.84, "learning_rate": 8.161856963613551e-06, "loss": 1.9661, "step": 5860 }, { "epoch": 1.84, "learning_rate": 8.158720200752822e-06, "loss": 1.9482, "step": 5870 }, { "epoch": 1.84, "learning_rate": 8.155583437892095e-06, "loss": 1.927, "step": 5880 }, { "epoch": 1.85, "learning_rate": 8.152446675031368e-06, "loss": 1.9963, "step": 5890 }, { "epoch": 1.85, "learning_rate": 8.149309912170641e-06, "loss": 2.0093, "step": 5900 }, { "epoch": 1.85, "learning_rate": 8.146173149309912e-06, "loss": 2.0204, "step": 5910 }, { "epoch": 1.86, "learning_rate": 8.143036386449185e-06, "loss": 2.0233, "step": 5920 }, { "epoch": 1.86, "learning_rate": 8.139899623588458e-06, "loss": 2.0393, "step": 5930 }, { "epoch": 1.86, "learning_rate": 8.13676286072773e-06, "loss": 2.0474, "step": 5940 }, { "epoch": 1.87, "learning_rate": 8.133626097867002e-06, "loss": 1.9908, "step": 5950 }, { "epoch": 1.87, "learning_rate": 8.130489335006273e-06, "loss": 1.9855, "step": 5960 }, { "epoch": 1.87, "learning_rate": 8.127352572145546e-06, "loss": 1.9181, "step": 5970 }, { "epoch": 1.88, "learning_rate": 8.12421580928482e-06, "loss": 1.9918, "step": 5980 }, { "epoch": 1.88, "learning_rate": 8.12107904642409e-06, "loss": 1.9965, "step": 5990 }, { "epoch": 1.88, "learning_rate": 8.117942283563363e-06, "loss": 2.0341, "step": 6000 }, { "epoch": 1.89, "learning_rate": 8.114805520702636e-06, "loss": 2.0354, "step": 6010 }, { "epoch": 1.89, "learning_rate": 8.111668757841907e-06, "loss": 1.9341, "step": 6020 }, { "epoch": 1.89, "learning_rate": 8.10853199498118e-06, "loss": 2.0183, "step": 6030 }, { "epoch": 1.89, "learning_rate": 8.105395232120451e-06, "loss": 2.0165, "step": 6040 }, { "epoch": 1.9, "learning_rate": 8.102258469259724e-06, "loss": 1.9712, "step": 6050 }, { "epoch": 1.9, "learning_rate": 8.099121706398997e-06, "loss": 1.8994, "step": 6060 }, { "epoch": 1.9, "learning_rate": 8.095984943538268e-06, "loss": 1.9247, "step": 6070 }, { "epoch": 1.91, "learning_rate": 8.092848180677541e-06, "loss": 2.0145, "step": 6080 }, { "epoch": 1.91, "learning_rate": 8.089711417816814e-06, "loss": 2.0431, "step": 6090 }, { "epoch": 1.91, "learning_rate": 8.086574654956087e-06, "loss": 1.9666, "step": 6100 }, { "epoch": 1.92, "learning_rate": 8.083437892095358e-06, "loss": 1.9638, "step": 6110 }, { "epoch": 1.92, "learning_rate": 8.08030112923463e-06, "loss": 1.994, "step": 6120 }, { "epoch": 1.92, "learning_rate": 8.077164366373902e-06, "loss": 1.9206, "step": 6130 }, { "epoch": 1.93, "learning_rate": 8.074027603513175e-06, "loss": 1.9814, "step": 6140 }, { "epoch": 1.93, "learning_rate": 8.070890840652448e-06, "loss": 2.0413, "step": 6150 }, { "epoch": 1.93, "learning_rate": 8.06775407779172e-06, "loss": 2.0384, "step": 6160 }, { "epoch": 1.94, "learning_rate": 8.06461731493099e-06, "loss": 2.0274, "step": 6170 }, { "epoch": 1.94, "learning_rate": 8.061480552070265e-06, "loss": 1.9409, "step": 6180 }, { "epoch": 1.94, "learning_rate": 8.058343789209536e-06, "loss": 1.9933, "step": 6190 }, { "epoch": 1.94, "learning_rate": 8.05520702634881e-06, "loss": 2.0091, "step": 6200 }, { "epoch": 1.95, "learning_rate": 8.05207026348808e-06, "loss": 2.0404, "step": 6210 }, { "epoch": 1.95, "learning_rate": 8.048933500627353e-06, "loss": 1.9856, "step": 6220 }, { "epoch": 1.95, "learning_rate": 8.045796737766626e-06, "loss": 2.0037, "step": 6230 }, { "epoch": 1.96, "learning_rate": 8.042659974905897e-06, "loss": 1.9664, "step": 6240 }, { "epoch": 1.96, "learning_rate": 8.03952321204517e-06, "loss": 2.0269, "step": 6250 }, { "epoch": 1.96, "learning_rate": 8.036386449184442e-06, "loss": 2.0265, "step": 6260 }, { "epoch": 1.97, "learning_rate": 8.033249686323714e-06, "loss": 1.9277, "step": 6270 }, { "epoch": 1.97, "learning_rate": 8.030112923462987e-06, "loss": 1.9276, "step": 6280 }, { "epoch": 1.97, "learning_rate": 8.026976160602259e-06, "loss": 2.0135, "step": 6290 }, { "epoch": 1.98, "learning_rate": 8.023839397741531e-06, "loss": 2.0078, "step": 6300 }, { "epoch": 1.98, "learning_rate": 8.020702634880804e-06, "loss": 2.0469, "step": 6310 }, { "epoch": 1.98, "learning_rate": 8.017565872020076e-06, "loss": 2.0029, "step": 6320 }, { "epoch": 1.99, "learning_rate": 8.014429109159348e-06, "loss": 1.9948, "step": 6330 }, { "epoch": 1.99, "learning_rate": 8.01129234629862e-06, "loss": 1.9673, "step": 6340 }, { "epoch": 1.99, "learning_rate": 8.008155583437892e-06, "loss": 1.9532, "step": 6350 }, { "epoch": 1.99, "learning_rate": 8.005018820577165e-06, "loss": 1.9704, "step": 6360 }, { "epoch": 2.0, "learning_rate": 8.001882057716437e-06, "loss": 1.9536, "step": 6370 }, { "epoch": 2.0, "eval_loss": 1.885330319404602, "eval_runtime": 13.6115, "eval_samples_per_second": 73.467, "eval_steps_per_second": 4.628, "step": 6376 }, { "epoch": 2.0, "learning_rate": 7.99874529485571e-06, "loss": 1.9421, "step": 6380 }, { "epoch": 2.0, "learning_rate": 7.995608531994982e-06, "loss": 1.9968, "step": 6390 }, { "epoch": 2.01, "learning_rate": 7.992471769134255e-06, "loss": 1.9777, "step": 6400 }, { "epoch": 2.01, "learning_rate": 7.989335006273526e-06, "loss": 1.9875, "step": 6410 }, { "epoch": 2.01, "learning_rate": 7.986198243412798e-06, "loss": 2.008, "step": 6420 }, { "epoch": 2.02, "learning_rate": 7.98306148055207e-06, "loss": 1.9478, "step": 6430 }, { "epoch": 2.02, "learning_rate": 7.979924717691343e-06, "loss": 1.946, "step": 6440 }, { "epoch": 2.02, "learning_rate": 7.976787954830616e-06, "loss": 2.0113, "step": 6450 }, { "epoch": 2.03, "learning_rate": 7.973651191969888e-06, "loss": 1.9268, "step": 6460 }, { "epoch": 2.03, "learning_rate": 7.97051442910916e-06, "loss": 1.974, "step": 6470 }, { "epoch": 2.03, "learning_rate": 7.967377666248433e-06, "loss": 1.942, "step": 6480 }, { "epoch": 2.04, "learning_rate": 7.964240903387705e-06, "loss": 2.0326, "step": 6490 }, { "epoch": 2.04, "learning_rate": 7.961104140526977e-06, "loss": 1.9315, "step": 6500 }, { "epoch": 2.04, "learning_rate": 7.957967377666249e-06, "loss": 1.9985, "step": 6510 }, { "epoch": 2.05, "learning_rate": 7.954830614805521e-06, "loss": 1.9155, "step": 6520 }, { "epoch": 2.05, "learning_rate": 7.951693851944794e-06, "loss": 2.0259, "step": 6530 }, { "epoch": 2.05, "learning_rate": 7.948557089084066e-06, "loss": 1.9476, "step": 6540 }, { "epoch": 2.05, "learning_rate": 7.945420326223338e-06, "loss": 1.971, "step": 6550 }, { "epoch": 2.06, "learning_rate": 7.942283563362611e-06, "loss": 1.9633, "step": 6560 }, { "epoch": 2.06, "learning_rate": 7.939146800501883e-06, "loss": 1.9443, "step": 6570 }, { "epoch": 2.06, "learning_rate": 7.936010037641155e-06, "loss": 2.0084, "step": 6580 }, { "epoch": 2.07, "learning_rate": 7.932873274780427e-06, "loss": 1.9961, "step": 6590 }, { "epoch": 2.07, "learning_rate": 7.9297365119197e-06, "loss": 2.0168, "step": 6600 }, { "epoch": 2.07, "learning_rate": 7.926599749058972e-06, "loss": 1.9562, "step": 6610 }, { "epoch": 2.08, "learning_rate": 7.923462986198244e-06, "loss": 1.9232, "step": 6620 }, { "epoch": 2.08, "learning_rate": 7.920326223337517e-06, "loss": 1.9462, "step": 6630 }, { "epoch": 2.08, "learning_rate": 7.917189460476788e-06, "loss": 1.9134, "step": 6640 }, { "epoch": 2.09, "learning_rate": 7.91405269761606e-06, "loss": 1.9443, "step": 6650 }, { "epoch": 2.09, "learning_rate": 7.910915934755334e-06, "loss": 1.9693, "step": 6660 }, { "epoch": 2.09, "learning_rate": 7.907779171894605e-06, "loss": 1.9401, "step": 6670 }, { "epoch": 2.1, "learning_rate": 7.904642409033878e-06, "loss": 1.9176, "step": 6680 }, { "epoch": 2.1, "learning_rate": 7.90150564617315e-06, "loss": 2.0308, "step": 6690 }, { "epoch": 2.1, "learning_rate": 7.898368883312422e-06, "loss": 2.0137, "step": 6700 }, { "epoch": 2.1, "learning_rate": 7.895232120451695e-06, "loss": 1.9504, "step": 6710 }, { "epoch": 2.11, "learning_rate": 7.892095357590966e-06, "loss": 1.9489, "step": 6720 }, { "epoch": 2.11, "learning_rate": 7.888958594730239e-06, "loss": 1.9366, "step": 6730 }, { "epoch": 2.11, "learning_rate": 7.885821831869512e-06, "loss": 1.9421, "step": 6740 }, { "epoch": 2.12, "learning_rate": 7.882685069008783e-06, "loss": 1.9611, "step": 6750 }, { "epoch": 2.12, "learning_rate": 7.879548306148056e-06, "loss": 1.9346, "step": 6760 }, { "epoch": 2.12, "learning_rate": 7.876411543287329e-06, "loss": 1.9328, "step": 6770 }, { "epoch": 2.13, "learning_rate": 7.873274780426601e-06, "loss": 2.0131, "step": 6780 }, { "epoch": 2.13, "learning_rate": 7.870138017565873e-06, "loss": 1.9271, "step": 6790 }, { "epoch": 2.13, "learning_rate": 7.867001254705144e-06, "loss": 1.9835, "step": 6800 }, { "epoch": 2.14, "learning_rate": 7.863864491844417e-06, "loss": 2.045, "step": 6810 }, { "epoch": 2.14, "learning_rate": 7.86072772898369e-06, "loss": 1.975, "step": 6820 }, { "epoch": 2.14, "learning_rate": 7.857590966122963e-06, "loss": 1.9628, "step": 6830 }, { "epoch": 2.15, "learning_rate": 7.854454203262234e-06, "loss": 2.0207, "step": 6840 }, { "epoch": 2.15, "learning_rate": 7.851317440401507e-06, "loss": 2.0245, "step": 6850 }, { "epoch": 2.15, "learning_rate": 7.84818067754078e-06, "loss": 1.9582, "step": 6860 }, { "epoch": 2.15, "learning_rate": 7.84504391468005e-06, "loss": 1.9514, "step": 6870 }, { "epoch": 2.16, "learning_rate": 7.841907151819324e-06, "loss": 2.0367, "step": 6880 }, { "epoch": 2.16, "learning_rate": 7.838770388958595e-06, "loss": 1.9545, "step": 6890 }, { "epoch": 2.16, "learning_rate": 7.835633626097868e-06, "loss": 2.0572, "step": 6900 }, { "epoch": 2.17, "learning_rate": 7.83249686323714e-06, "loss": 1.9569, "step": 6910 }, { "epoch": 2.17, "learning_rate": 7.829360100376412e-06, "loss": 1.9457, "step": 6920 }, { "epoch": 2.17, "learning_rate": 7.826223337515685e-06, "loss": 1.9065, "step": 6930 }, { "epoch": 2.18, "learning_rate": 7.823086574654956e-06, "loss": 1.9493, "step": 6940 }, { "epoch": 2.18, "learning_rate": 7.819949811794229e-06, "loss": 1.9474, "step": 6950 }, { "epoch": 2.18, "learning_rate": 7.816813048933502e-06, "loss": 1.9068, "step": 6960 }, { "epoch": 2.19, "learning_rate": 7.813676286072773e-06, "loss": 1.9634, "step": 6970 }, { "epoch": 2.19, "learning_rate": 7.810539523212046e-06, "loss": 1.8905, "step": 6980 }, { "epoch": 2.19, "learning_rate": 7.807402760351319e-06, "loss": 1.9783, "step": 6990 }, { "epoch": 2.2, "learning_rate": 7.80426599749059e-06, "loss": 1.9273, "step": 7000 }, { "epoch": 2.2, "learning_rate": 7.801129234629863e-06, "loss": 1.9219, "step": 7010 }, { "epoch": 2.2, "learning_rate": 7.797992471769134e-06, "loss": 1.847, "step": 7020 }, { "epoch": 2.21, "learning_rate": 7.794855708908407e-06, "loss": 1.9704, "step": 7030 }, { "epoch": 2.21, "learning_rate": 7.79171894604768e-06, "loss": 1.9821, "step": 7040 }, { "epoch": 2.21, "learning_rate": 7.788582183186951e-06, "loss": 1.9342, "step": 7050 }, { "epoch": 2.21, "learning_rate": 7.785445420326224e-06, "loss": 2.0013, "step": 7060 }, { "epoch": 2.22, "learning_rate": 7.782308657465497e-06, "loss": 1.9475, "step": 7070 }, { "epoch": 2.22, "learning_rate": 7.77917189460477e-06, "loss": 1.9484, "step": 7080 }, { "epoch": 2.22, "learning_rate": 7.77603513174404e-06, "loss": 1.9431, "step": 7090 }, { "epoch": 2.23, "learning_rate": 7.772898368883312e-06, "loss": 1.9094, "step": 7100 }, { "epoch": 2.23, "learning_rate": 7.769761606022585e-06, "loss": 1.9403, "step": 7110 }, { "epoch": 2.23, "learning_rate": 7.766624843161858e-06, "loss": 2.0046, "step": 7120 }, { "epoch": 2.24, "learning_rate": 7.76348808030113e-06, "loss": 1.9858, "step": 7130 }, { "epoch": 2.24, "learning_rate": 7.760351317440402e-06, "loss": 2.0215, "step": 7140 }, { "epoch": 2.24, "learning_rate": 7.757214554579675e-06, "loss": 1.951, "step": 7150 }, { "epoch": 2.25, "learning_rate": 7.754077791718948e-06, "loss": 1.9801, "step": 7160 }, { "epoch": 2.25, "learning_rate": 7.750941028858219e-06, "loss": 1.9526, "step": 7170 }, { "epoch": 2.25, "learning_rate": 7.747804265997492e-06, "loss": 1.971, "step": 7180 }, { "epoch": 2.26, "learning_rate": 7.744667503136763e-06, "loss": 1.8804, "step": 7190 }, { "epoch": 2.26, "learning_rate": 7.741530740276036e-06, "loss": 2.0343, "step": 7200 }, { "epoch": 2.26, "learning_rate": 7.738393977415309e-06, "loss": 1.9925, "step": 7210 }, { "epoch": 2.26, "learning_rate": 7.73525721455458e-06, "loss": 1.9465, "step": 7220 }, { "epoch": 2.27, "learning_rate": 7.732120451693853e-06, "loss": 1.8588, "step": 7230 }, { "epoch": 2.27, "learning_rate": 7.728983688833126e-06, "loss": 1.9369, "step": 7240 }, { "epoch": 2.27, "learning_rate": 7.725846925972397e-06, "loss": 1.9037, "step": 7250 }, { "epoch": 2.28, "learning_rate": 7.72271016311167e-06, "loss": 1.9352, "step": 7260 }, { "epoch": 2.28, "learning_rate": 7.719573400250941e-06, "loss": 1.9501, "step": 7270 }, { "epoch": 2.28, "learning_rate": 7.716436637390214e-06, "loss": 1.9301, "step": 7280 }, { "epoch": 2.29, "learning_rate": 7.713299874529487e-06, "loss": 1.9778, "step": 7290 }, { "epoch": 2.29, "learning_rate": 7.710163111668758e-06, "loss": 1.9291, "step": 7300 }, { "epoch": 2.29, "learning_rate": 7.70702634880803e-06, "loss": 2.0028, "step": 7310 }, { "epoch": 2.3, "learning_rate": 7.703889585947302e-06, "loss": 1.9809, "step": 7320 }, { "epoch": 2.3, "learning_rate": 7.700752823086577e-06, "loss": 1.9964, "step": 7330 }, { "epoch": 2.3, "learning_rate": 7.697616060225848e-06, "loss": 1.8848, "step": 7340 }, { "epoch": 2.31, "learning_rate": 7.694479297365119e-06, "loss": 1.901, "step": 7350 }, { "epoch": 2.31, "learning_rate": 7.691342534504392e-06, "loss": 1.9539, "step": 7360 }, { "epoch": 2.31, "learning_rate": 7.688205771643665e-06, "loss": 1.9882, "step": 7370 }, { "epoch": 2.31, "learning_rate": 7.685069008782938e-06, "loss": 1.9577, "step": 7380 }, { "epoch": 2.32, "learning_rate": 7.681932245922209e-06, "loss": 1.9839, "step": 7390 }, { "epoch": 2.32, "learning_rate": 7.67879548306148e-06, "loss": 1.9388, "step": 7400 }, { "epoch": 2.32, "learning_rate": 7.675658720200753e-06, "loss": 1.9851, "step": 7410 }, { "epoch": 2.33, "learning_rate": 7.672521957340026e-06, "loss": 2.0086, "step": 7420 }, { "epoch": 2.33, "learning_rate": 7.669385194479299e-06, "loss": 1.9862, "step": 7430 }, { "epoch": 2.33, "learning_rate": 7.66624843161857e-06, "loss": 1.9839, "step": 7440 }, { "epoch": 2.34, "learning_rate": 7.663111668757843e-06, "loss": 1.9765, "step": 7450 }, { "epoch": 2.34, "learning_rate": 7.659974905897116e-06, "loss": 1.9471, "step": 7460 }, { "epoch": 2.34, "learning_rate": 7.656838143036387e-06, "loss": 1.9762, "step": 7470 }, { "epoch": 2.35, "learning_rate": 7.65370138017566e-06, "loss": 1.9488, "step": 7480 }, { "epoch": 2.35, "learning_rate": 7.650564617314931e-06, "loss": 1.8924, "step": 7490 }, { "epoch": 2.35, "learning_rate": 7.647427854454204e-06, "loss": 1.978, "step": 7500 }, { "epoch": 2.36, "learning_rate": 7.644291091593477e-06, "loss": 1.9168, "step": 7510 }, { "epoch": 2.36, "learning_rate": 7.641154328732748e-06, "loss": 1.9963, "step": 7520 }, { "epoch": 2.36, "learning_rate": 7.638017565872021e-06, "loss": 1.9229, "step": 7530 }, { "epoch": 2.37, "learning_rate": 7.634880803011294e-06, "loss": 1.9212, "step": 7540 }, { "epoch": 2.37, "learning_rate": 7.631744040150565e-06, "loss": 1.95, "step": 7550 }, { "epoch": 2.37, "learning_rate": 7.628607277289838e-06, "loss": 1.9406, "step": 7560 }, { "epoch": 2.37, "learning_rate": 7.62547051442911e-06, "loss": 2.0137, "step": 7570 }, { "epoch": 2.38, "learning_rate": 7.622333751568381e-06, "loss": 2.01, "step": 7580 }, { "epoch": 2.38, "learning_rate": 7.619196988707655e-06, "loss": 1.9069, "step": 7590 }, { "epoch": 2.38, "learning_rate": 7.616060225846926e-06, "loss": 1.988, "step": 7600 }, { "epoch": 2.39, "learning_rate": 7.612923462986199e-06, "loss": 1.9774, "step": 7610 }, { "epoch": 2.39, "learning_rate": 7.609786700125471e-06, "loss": 1.9948, "step": 7620 }, { "epoch": 2.39, "learning_rate": 7.606649937264743e-06, "loss": 1.9218, "step": 7630 }, { "epoch": 2.4, "learning_rate": 7.603513174404016e-06, "loss": 1.9851, "step": 7640 }, { "epoch": 2.4, "learning_rate": 7.600376411543288e-06, "loss": 1.9778, "step": 7650 }, { "epoch": 2.4, "learning_rate": 7.597239648682561e-06, "loss": 1.9702, "step": 7660 }, { "epoch": 2.41, "learning_rate": 7.594102885821832e-06, "loss": 1.8713, "step": 7670 }, { "epoch": 2.41, "learning_rate": 7.590966122961104e-06, "loss": 1.9425, "step": 7680 }, { "epoch": 2.41, "learning_rate": 7.587829360100377e-06, "loss": 1.9917, "step": 7690 }, { "epoch": 2.42, "learning_rate": 7.584692597239649e-06, "loss": 1.9039, "step": 7700 }, { "epoch": 2.42, "learning_rate": 7.581555834378922e-06, "loss": 1.939, "step": 7710 }, { "epoch": 2.42, "learning_rate": 7.578419071518194e-06, "loss": 1.9926, "step": 7720 }, { "epoch": 2.42, "learning_rate": 7.575282308657466e-06, "loss": 1.9994, "step": 7730 }, { "epoch": 2.43, "learning_rate": 7.572145545796739e-06, "loss": 1.9109, "step": 7740 }, { "epoch": 2.43, "learning_rate": 7.56900878293601e-06, "loss": 2.0109, "step": 7750 }, { "epoch": 2.43, "learning_rate": 7.565872020075283e-06, "loss": 1.9353, "step": 7760 }, { "epoch": 2.44, "learning_rate": 7.562735257214555e-06, "loss": 1.9251, "step": 7770 }, { "epoch": 2.44, "learning_rate": 7.559598494353827e-06, "loss": 1.9734, "step": 7780 }, { "epoch": 2.44, "learning_rate": 7.5564617314931e-06, "loss": 1.9337, "step": 7790 }, { "epoch": 2.45, "learning_rate": 7.553324968632372e-06, "loss": 1.9499, "step": 7800 }, { "epoch": 2.45, "learning_rate": 7.550188205771645e-06, "loss": 1.9695, "step": 7810 }, { "epoch": 2.45, "learning_rate": 7.547051442910916e-06, "loss": 1.9732, "step": 7820 }, { "epoch": 2.46, "learning_rate": 7.543914680050188e-06, "loss": 1.9596, "step": 7830 }, { "epoch": 2.46, "learning_rate": 7.540777917189461e-06, "loss": 1.9799, "step": 7840 }, { "epoch": 2.46, "learning_rate": 7.537641154328733e-06, "loss": 2.0665, "step": 7850 }, { "epoch": 2.47, "learning_rate": 7.534504391468006e-06, "loss": 2.0205, "step": 7860 }, { "epoch": 2.47, "learning_rate": 7.531367628607278e-06, "loss": 1.9653, "step": 7870 }, { "epoch": 2.47, "learning_rate": 7.52823086574655e-06, "loss": 1.9521, "step": 7880 }, { "epoch": 2.47, "learning_rate": 7.525094102885823e-06, "loss": 1.9296, "step": 7890 }, { "epoch": 2.48, "learning_rate": 7.521957340025094e-06, "loss": 1.9706, "step": 7900 }, { "epoch": 2.48, "learning_rate": 7.518820577164367e-06, "loss": 1.9166, "step": 7910 }, { "epoch": 2.48, "learning_rate": 7.515683814303639e-06, "loss": 1.9223, "step": 7920 }, { "epoch": 2.49, "learning_rate": 7.512547051442911e-06, "loss": 1.9756, "step": 7930 }, { "epoch": 2.49, "learning_rate": 7.509410288582184e-06, "loss": 1.9335, "step": 7940 }, { "epoch": 2.49, "learning_rate": 7.506273525721456e-06, "loss": 1.94, "step": 7950 }, { "epoch": 2.5, "learning_rate": 7.503136762860729e-06, "loss": 1.9802, "step": 7960 }, { "epoch": 2.5, "learning_rate": 7.500000000000001e-06, "loss": 1.9913, "step": 7970 }, { "epoch": 2.5, "learning_rate": 7.496863237139272e-06, "loss": 1.9385, "step": 7980 }, { "epoch": 2.51, "learning_rate": 7.493726474278545e-06, "loss": 1.9581, "step": 7990 }, { "epoch": 2.51, "learning_rate": 7.490589711417817e-06, "loss": 1.9955, "step": 8000 }, { "epoch": 2.51, "learning_rate": 7.48745294855709e-06, "loss": 1.9008, "step": 8010 }, { "epoch": 2.52, "learning_rate": 7.484316185696362e-06, "loss": 1.9286, "step": 8020 }, { "epoch": 2.52, "learning_rate": 7.481179422835634e-06, "loss": 1.9209, "step": 8030 }, { "epoch": 2.52, "learning_rate": 7.478042659974907e-06, "loss": 1.9612, "step": 8040 }, { "epoch": 2.53, "learning_rate": 7.474905897114178e-06, "loss": 2.0569, "step": 8050 }, { "epoch": 2.53, "learning_rate": 7.471769134253451e-06, "loss": 1.9281, "step": 8060 }, { "epoch": 2.53, "learning_rate": 7.468632371392723e-06, "loss": 1.8916, "step": 8070 }, { "epoch": 2.53, "learning_rate": 7.465495608531995e-06, "loss": 1.9725, "step": 8080 }, { "epoch": 2.54, "learning_rate": 7.462358845671268e-06, "loss": 1.9004, "step": 8090 }, { "epoch": 2.54, "learning_rate": 7.45922208281054e-06, "loss": 1.97, "step": 8100 }, { "epoch": 2.54, "learning_rate": 7.456085319949813e-06, "loss": 1.9328, "step": 8110 }, { "epoch": 2.55, "learning_rate": 7.452948557089085e-06, "loss": 1.9547, "step": 8120 }, { "epoch": 2.55, "learning_rate": 7.449811794228356e-06, "loss": 1.9816, "step": 8130 }, { "epoch": 2.55, "learning_rate": 7.446675031367629e-06, "loss": 2.0046, "step": 8140 }, { "epoch": 2.56, "learning_rate": 7.443538268506901e-06, "loss": 1.958, "step": 8150 }, { "epoch": 2.56, "learning_rate": 7.440401505646174e-06, "loss": 1.9852, "step": 8160 }, { "epoch": 2.56, "learning_rate": 7.437264742785446e-06, "loss": 1.9922, "step": 8170 }, { "epoch": 2.57, "learning_rate": 7.434127979924718e-06, "loss": 1.9294, "step": 8180 }, { "epoch": 2.57, "learning_rate": 7.430991217063991e-06, "loss": 1.9844, "step": 8190 }, { "epoch": 2.57, "learning_rate": 7.427854454203262e-06, "loss": 1.9652, "step": 8200 }, { "epoch": 2.58, "learning_rate": 7.424717691342535e-06, "loss": 1.9633, "step": 8210 }, { "epoch": 2.58, "learning_rate": 7.421580928481807e-06, "loss": 1.9338, "step": 8220 }, { "epoch": 2.58, "learning_rate": 7.418444165621079e-06, "loss": 1.9838, "step": 8230 }, { "epoch": 2.58, "learning_rate": 7.415307402760352e-06, "loss": 1.9171, "step": 8240 }, { "epoch": 2.59, "learning_rate": 7.412170639899624e-06, "loss": 2.0102, "step": 8250 }, { "epoch": 2.59, "learning_rate": 7.409033877038897e-06, "loss": 1.959, "step": 8260 }, { "epoch": 2.59, "learning_rate": 7.405897114178169e-06, "loss": 1.9088, "step": 8270 }, { "epoch": 2.6, "learning_rate": 7.40276035131744e-06, "loss": 1.9144, "step": 8280 }, { "epoch": 2.6, "learning_rate": 7.399623588456713e-06, "loss": 1.8979, "step": 8290 }, { "epoch": 2.6, "learning_rate": 7.396486825595985e-06, "loss": 2.0027, "step": 8300 }, { "epoch": 2.61, "learning_rate": 7.393350062735258e-06, "loss": 1.8945, "step": 8310 }, { "epoch": 2.61, "learning_rate": 7.39021329987453e-06, "loss": 2.0055, "step": 8320 }, { "epoch": 2.61, "learning_rate": 7.387076537013802e-06, "loss": 1.9817, "step": 8330 }, { "epoch": 2.62, "learning_rate": 7.383939774153075e-06, "loss": 1.9699, "step": 8340 }, { "epoch": 2.62, "learning_rate": 7.380803011292346e-06, "loss": 1.9523, "step": 8350 }, { "epoch": 2.62, "learning_rate": 7.37766624843162e-06, "loss": 1.9713, "step": 8360 }, { "epoch": 2.63, "learning_rate": 7.374529485570891e-06, "loss": 2.0491, "step": 8370 }, { "epoch": 2.63, "learning_rate": 7.371392722710163e-06, "loss": 1.9829, "step": 8380 }, { "epoch": 2.63, "learning_rate": 7.368255959849436e-06, "loss": 1.9455, "step": 8390 }, { "epoch": 2.63, "learning_rate": 7.365119196988708e-06, "loss": 2.0571, "step": 8400 }, { "epoch": 2.64, "learning_rate": 7.361982434127981e-06, "loss": 1.9815, "step": 8410 }, { "epoch": 2.64, "learning_rate": 7.358845671267253e-06, "loss": 1.9506, "step": 8420 }, { "epoch": 2.64, "learning_rate": 7.355708908406524e-06, "loss": 1.9344, "step": 8430 }, { "epoch": 2.65, "learning_rate": 7.352572145545797e-06, "loss": 2.0732, "step": 8440 }, { "epoch": 2.65, "learning_rate": 7.349435382685069e-06, "loss": 1.9703, "step": 8450 }, { "epoch": 2.65, "learning_rate": 7.346298619824341e-06, "loss": 1.9605, "step": 8460 }, { "epoch": 2.66, "learning_rate": 7.343161856963614e-06, "loss": 1.9911, "step": 8470 }, { "epoch": 2.66, "learning_rate": 7.340025094102886e-06, "loss": 1.8882, "step": 8480 }, { "epoch": 2.66, "learning_rate": 7.336888331242159e-06, "loss": 2.0295, "step": 8490 }, { "epoch": 2.67, "learning_rate": 7.333751568381431e-06, "loss": 1.9635, "step": 8500 }, { "epoch": 2.67, "learning_rate": 7.3306148055207025e-06, "loss": 1.9396, "step": 8510 }, { "epoch": 2.67, "learning_rate": 7.327478042659975e-06, "loss": 1.9473, "step": 8520 }, { "epoch": 2.68, "learning_rate": 7.324341279799247e-06, "loss": 1.9323, "step": 8530 }, { "epoch": 2.68, "learning_rate": 7.32120451693852e-06, "loss": 1.9396, "step": 8540 }, { "epoch": 2.68, "learning_rate": 7.318067754077792e-06, "loss": 1.9011, "step": 8550 }, { "epoch": 2.69, "learning_rate": 7.314930991217064e-06, "loss": 1.9256, "step": 8560 }, { "epoch": 2.69, "learning_rate": 7.311794228356337e-06, "loss": 1.9724, "step": 8570 }, { "epoch": 2.69, "learning_rate": 7.3086574654956085e-06, "loss": 1.9135, "step": 8580 }, { "epoch": 2.69, "learning_rate": 7.305520702634881e-06, "loss": 1.9444, "step": 8590 }, { "epoch": 2.7, "learning_rate": 7.302383939774153e-06, "loss": 1.8843, "step": 8600 }, { "epoch": 2.7, "learning_rate": 7.2992471769134255e-06, "loss": 1.9864, "step": 8610 }, { "epoch": 2.7, "learning_rate": 7.296110414052698e-06, "loss": 1.9368, "step": 8620 }, { "epoch": 2.71, "learning_rate": 7.29297365119197e-06, "loss": 1.9264, "step": 8630 }, { "epoch": 2.71, "learning_rate": 7.289836888331243e-06, "loss": 1.9519, "step": 8640 }, { "epoch": 2.71, "learning_rate": 7.286700125470515e-06, "loss": 1.9394, "step": 8650 }, { "epoch": 2.72, "learning_rate": 7.2835633626097865e-06, "loss": 1.9725, "step": 8660 }, { "epoch": 2.72, "learning_rate": 7.2804265997490594e-06, "loss": 1.945, "step": 8670 }, { "epoch": 2.72, "learning_rate": 7.2772898368883315e-06, "loss": 1.9679, "step": 8680 }, { "epoch": 2.73, "learning_rate": 7.274153074027604e-06, "loss": 1.9632, "step": 8690 }, { "epoch": 2.73, "learning_rate": 7.271016311166876e-06, "loss": 1.9299, "step": 8700 }, { "epoch": 2.73, "learning_rate": 7.2678795483061485e-06, "loss": 1.8859, "step": 8710 }, { "epoch": 2.74, "learning_rate": 7.264742785445421e-06, "loss": 1.9887, "step": 8720 }, { "epoch": 2.74, "learning_rate": 7.2616060225846925e-06, "loss": 1.9067, "step": 8730 }, { "epoch": 2.74, "learning_rate": 7.258469259723966e-06, "loss": 1.9445, "step": 8740 }, { "epoch": 2.74, "learning_rate": 7.2553324968632375e-06, "loss": 1.9635, "step": 8750 }, { "epoch": 2.75, "learning_rate": 7.2521957340025095e-06, "loss": 1.9772, "step": 8760 }, { "epoch": 2.75, "learning_rate": 7.249058971141782e-06, "loss": 1.9778, "step": 8770 }, { "epoch": 2.75, "learning_rate": 7.2459222082810545e-06, "loss": 1.9077, "step": 8780 }, { "epoch": 2.76, "learning_rate": 7.242785445420327e-06, "loss": 2.0154, "step": 8790 }, { "epoch": 2.76, "learning_rate": 7.239648682559599e-06, "loss": 1.9579, "step": 8800 }, { "epoch": 2.76, "learning_rate": 7.236511919698871e-06, "loss": 1.9892, "step": 8810 }, { "epoch": 2.77, "learning_rate": 7.2333751568381435e-06, "loss": 1.927, "step": 8820 }, { "epoch": 2.77, "learning_rate": 7.2302383939774155e-06, "loss": 1.964, "step": 8830 }, { "epoch": 2.77, "learning_rate": 7.2271016311166884e-06, "loss": 1.9346, "step": 8840 }, { "epoch": 2.78, "learning_rate": 7.2239648682559605e-06, "loss": 1.8882, "step": 8850 }, { "epoch": 2.78, "learning_rate": 7.2208281053952325e-06, "loss": 1.9195, "step": 8860 }, { "epoch": 2.78, "learning_rate": 7.217691342534505e-06, "loss": 1.949, "step": 8870 }, { "epoch": 2.79, "learning_rate": 7.2145545796737775e-06, "loss": 1.9805, "step": 8880 }, { "epoch": 2.79, "learning_rate": 7.21141781681305e-06, "loss": 1.962, "step": 8890 }, { "epoch": 2.79, "learning_rate": 7.2082810539523215e-06, "loss": 1.9169, "step": 8900 }, { "epoch": 2.79, "learning_rate": 7.205144291091594e-06, "loss": 2.0492, "step": 8910 }, { "epoch": 2.8, "learning_rate": 7.2020075282308665e-06, "loss": 2.0174, "step": 8920 }, { "epoch": 2.8, "learning_rate": 7.1988707653701385e-06, "loss": 1.969, "step": 8930 }, { "epoch": 2.8, "learning_rate": 7.195734002509411e-06, "loss": 1.9197, "step": 8940 }, { "epoch": 2.81, "learning_rate": 7.1925972396486835e-06, "loss": 2.0031, "step": 8950 }, { "epoch": 2.81, "learning_rate": 7.189460476787955e-06, "loss": 1.9558, "step": 8960 }, { "epoch": 2.81, "learning_rate": 7.1863237139272276e-06, "loss": 1.94, "step": 8970 }, { "epoch": 2.82, "learning_rate": 7.1831869510665e-06, "loss": 1.9337, "step": 8980 }, { "epoch": 2.82, "learning_rate": 7.1800501882057725e-06, "loss": 1.9076, "step": 8990 }, { "epoch": 2.82, "learning_rate": 7.1769134253450445e-06, "loss": 1.9973, "step": 9000 }, { "epoch": 2.83, "learning_rate": 7.173776662484317e-06, "loss": 2.0062, "step": 9010 }, { "epoch": 2.83, "learning_rate": 7.1706398996235895e-06, "loss": 1.9347, "step": 9020 }, { "epoch": 2.83, "learning_rate": 7.1675031367628615e-06, "loss": 1.9871, "step": 9030 }, { "epoch": 2.84, "learning_rate": 7.164366373902134e-06, "loss": 1.9448, "step": 9040 }, { "epoch": 2.84, "learning_rate": 7.161229611041406e-06, "loss": 1.9392, "step": 9050 }, { "epoch": 2.84, "learning_rate": 7.158092848180678e-06, "loss": 1.9694, "step": 9060 }, { "epoch": 2.85, "learning_rate": 7.1549560853199505e-06, "loss": 1.8943, "step": 9070 }, { "epoch": 2.85, "learning_rate": 7.151819322459223e-06, "loss": 1.9112, "step": 9080 }, { "epoch": 2.85, "learning_rate": 7.1486825595984955e-06, "loss": 1.971, "step": 9090 }, { "epoch": 2.85, "learning_rate": 7.1455457967377675e-06, "loss": 2.0105, "step": 9100 }, { "epoch": 2.86, "learning_rate": 7.142409033877039e-06, "loss": 1.9241, "step": 9110 }, { "epoch": 2.86, "learning_rate": 7.1392722710163125e-06, "loss": 2.0026, "step": 9120 }, { "epoch": 2.86, "learning_rate": 7.136135508155584e-06, "loss": 1.9557, "step": 9130 }, { "epoch": 2.87, "learning_rate": 7.1329987452948566e-06, "loss": 1.9448, "step": 9140 }, { "epoch": 2.87, "learning_rate": 7.129861982434129e-06, "loss": 1.8889, "step": 9150 }, { "epoch": 2.87, "learning_rate": 7.126725219573401e-06, "loss": 1.9239, "step": 9160 }, { "epoch": 2.88, "learning_rate": 7.1235884567126735e-06, "loss": 2.022, "step": 9170 }, { "epoch": 2.88, "learning_rate": 7.120451693851946e-06, "loss": 1.9578, "step": 9180 }, { "epoch": 2.88, "learning_rate": 7.1173149309912185e-06, "loss": 1.9896, "step": 9190 }, { "epoch": 2.89, "learning_rate": 7.11417816813049e-06, "loss": 1.9313, "step": 9200 }, { "epoch": 2.89, "learning_rate": 7.111041405269762e-06, "loss": 1.8818, "step": 9210 }, { "epoch": 2.89, "learning_rate": 7.107904642409035e-06, "loss": 2.0096, "step": 9220 }, { "epoch": 2.9, "learning_rate": 7.104767879548307e-06, "loss": 1.9768, "step": 9230 }, { "epoch": 2.9, "learning_rate": 7.1016311166875795e-06, "loss": 1.9801, "step": 9240 }, { "epoch": 2.9, "learning_rate": 7.098494353826852e-06, "loss": 1.8505, "step": 9250 }, { "epoch": 2.9, "learning_rate": 7.095357590966123e-06, "loss": 1.9086, "step": 9260 }, { "epoch": 2.91, "learning_rate": 7.0922208281053965e-06, "loss": 1.9544, "step": 9270 }, { "epoch": 2.91, "learning_rate": 7.089084065244668e-06, "loss": 1.8913, "step": 9280 }, { "epoch": 2.91, "learning_rate": 7.085947302383941e-06, "loss": 1.9611, "step": 9290 }, { "epoch": 2.92, "learning_rate": 7.082810539523213e-06, "loss": 1.9619, "step": 9300 }, { "epoch": 2.92, "learning_rate": 7.079673776662485e-06, "loss": 1.9245, "step": 9310 }, { "epoch": 2.92, "learning_rate": 7.076537013801758e-06, "loss": 1.9268, "step": 9320 }, { "epoch": 2.93, "learning_rate": 7.07340025094103e-06, "loss": 1.9609, "step": 9330 }, { "epoch": 2.93, "learning_rate": 7.070263488080301e-06, "loss": 1.9626, "step": 9340 }, { "epoch": 2.93, "learning_rate": 7.067126725219574e-06, "loss": 1.9686, "step": 9350 }, { "epoch": 2.94, "learning_rate": 7.063989962358846e-06, "loss": 1.9401, "step": 9360 }, { "epoch": 2.94, "learning_rate": 7.060853199498119e-06, "loss": 2.0358, "step": 9370 }, { "epoch": 2.94, "learning_rate": 7.057716436637391e-06, "loss": 1.9737, "step": 9380 }, { "epoch": 2.95, "learning_rate": 7.054579673776663e-06, "loss": 1.8853, "step": 9390 }, { "epoch": 2.95, "learning_rate": 7.051442910915936e-06, "loss": 1.899, "step": 9400 }, { "epoch": 2.95, "learning_rate": 7.048306148055208e-06, "loss": 1.9412, "step": 9410 }, { "epoch": 2.95, "learning_rate": 7.045169385194481e-06, "loss": 1.9403, "step": 9420 }, { "epoch": 2.96, "learning_rate": 7.042032622333752e-06, "loss": 1.9022, "step": 9430 }, { "epoch": 2.96, "learning_rate": 7.038895859473024e-06, "loss": 1.9292, "step": 9440 }, { "epoch": 2.96, "learning_rate": 7.035759096612297e-06, "loss": 1.8933, "step": 9450 }, { "epoch": 2.97, "learning_rate": 7.032622333751569e-06, "loss": 1.9615, "step": 9460 }, { "epoch": 2.97, "learning_rate": 7.029485570890842e-06, "loss": 1.9655, "step": 9470 }, { "epoch": 2.97, "learning_rate": 7.026348808030114e-06, "loss": 1.9946, "step": 9480 }, { "epoch": 2.98, "learning_rate": 7.023212045169385e-06, "loss": 2.0278, "step": 9490 }, { "epoch": 2.98, "learning_rate": 7.020075282308658e-06, "loss": 1.9942, "step": 9500 }, { "epoch": 2.98, "learning_rate": 7.01693851944793e-06, "loss": 1.8639, "step": 9510 }, { "epoch": 2.99, "learning_rate": 7.013801756587203e-06, "loss": 1.9608, "step": 9520 }, { "epoch": 2.99, "learning_rate": 7.010664993726475e-06, "loss": 1.976, "step": 9530 }, { "epoch": 2.99, "learning_rate": 7.007528230865747e-06, "loss": 1.9355, "step": 9540 }, { "epoch": 3.0, "learning_rate": 7.00439146800502e-06, "loss": 1.9438, "step": 9550 }, { "epoch": 3.0, "learning_rate": 7.001254705144292e-06, "loss": 1.9831, "step": 9560 }, { "epoch": 3.0, "eval_loss": 1.8584038019180298, "eval_runtime": 13.6198, "eval_samples_per_second": 73.423, "eval_steps_per_second": 4.626, "step": 9564 }, { "epoch": 3.0, "learning_rate": 6.998117942283565e-06, "loss": 1.9902, "step": 9570 }, { "epoch": 3.01, "learning_rate": 6.994981179422836e-06, "loss": 1.9514, "step": 9580 }, { "epoch": 3.01, "learning_rate": 6.991844416562108e-06, "loss": 1.9013, "step": 9590 }, { "epoch": 3.01, "learning_rate": 6.988707653701381e-06, "loss": 1.9126, "step": 9600 }, { "epoch": 3.01, "learning_rate": 6.985570890840653e-06, "loss": 1.8983, "step": 9610 }, { "epoch": 3.02, "learning_rate": 6.982434127979926e-06, "loss": 1.9678, "step": 9620 }, { "epoch": 3.02, "learning_rate": 6.979297365119198e-06, "loss": 1.9002, "step": 9630 }, { "epoch": 3.02, "learning_rate": 6.976160602258469e-06, "loss": 1.9332, "step": 9640 }, { "epoch": 3.03, "learning_rate": 6.973023839397743e-06, "loss": 1.936, "step": 9650 }, { "epoch": 3.03, "learning_rate": 6.969887076537014e-06, "loss": 1.8892, "step": 9660 }, { "epoch": 3.03, "learning_rate": 6.966750313676287e-06, "loss": 1.9125, "step": 9670 }, { "epoch": 3.04, "learning_rate": 6.963613550815559e-06, "loss": 1.954, "step": 9680 }, { "epoch": 3.04, "learning_rate": 6.960476787954831e-06, "loss": 1.8804, "step": 9690 }, { "epoch": 3.04, "learning_rate": 6.957340025094104e-06, "loss": 1.9061, "step": 9700 }, { "epoch": 3.05, "learning_rate": 6.954203262233376e-06, "loss": 1.9739, "step": 9710 }, { "epoch": 3.05, "learning_rate": 6.951066499372649e-06, "loss": 1.8721, "step": 9720 }, { "epoch": 3.05, "learning_rate": 6.94792973651192e-06, "loss": 1.893, "step": 9730 }, { "epoch": 3.06, "learning_rate": 6.944792973651192e-06, "loss": 1.9453, "step": 9740 }, { "epoch": 3.06, "learning_rate": 6.941656210790465e-06, "loss": 1.9334, "step": 9750 }, { "epoch": 3.06, "learning_rate": 6.938519447929737e-06, "loss": 1.9699, "step": 9760 }, { "epoch": 3.06, "learning_rate": 6.93538268506901e-06, "loss": 1.9684, "step": 9770 }, { "epoch": 3.07, "learning_rate": 6.932245922208282e-06, "loss": 1.9842, "step": 9780 }, { "epoch": 3.07, "learning_rate": 6.929109159347554e-06, "loss": 1.9021, "step": 9790 }, { "epoch": 3.07, "learning_rate": 6.925972396486827e-06, "loss": 1.9448, "step": 9800 }, { "epoch": 3.08, "learning_rate": 6.922835633626098e-06, "loss": 1.9272, "step": 9810 }, { "epoch": 3.08, "learning_rate": 6.919698870765371e-06, "loss": 1.9218, "step": 9820 }, { "epoch": 3.08, "learning_rate": 6.916562107904643e-06, "loss": 1.9197, "step": 9830 }, { "epoch": 3.09, "learning_rate": 6.913425345043915e-06, "loss": 1.9336, "step": 9840 }, { "epoch": 3.09, "learning_rate": 6.910288582183188e-06, "loss": 1.9579, "step": 9850 }, { "epoch": 3.09, "learning_rate": 6.90715181932246e-06, "loss": 1.8816, "step": 9860 }, { "epoch": 3.1, "learning_rate": 6.904015056461733e-06, "loss": 1.9185, "step": 9870 }, { "epoch": 3.1, "learning_rate": 6.900878293601004e-06, "loss": 2.0019, "step": 9880 }, { "epoch": 3.1, "learning_rate": 6.897741530740276e-06, "loss": 1.9087, "step": 9890 }, { "epoch": 3.11, "learning_rate": 6.894604767879549e-06, "loss": 1.9677, "step": 9900 }, { "epoch": 3.11, "learning_rate": 6.891468005018821e-06, "loss": 1.9059, "step": 9910 }, { "epoch": 3.11, "learning_rate": 6.888331242158094e-06, "loss": 1.8861, "step": 9920 }, { "epoch": 3.11, "learning_rate": 6.885194479297366e-06, "loss": 1.8885, "step": 9930 }, { "epoch": 3.12, "learning_rate": 6.882057716436638e-06, "loss": 1.9043, "step": 9940 }, { "epoch": 3.12, "learning_rate": 6.878920953575911e-06, "loss": 1.9341, "step": 9950 }, { "epoch": 3.12, "learning_rate": 6.875784190715182e-06, "loss": 1.8998, "step": 9960 }, { "epoch": 3.13, "learning_rate": 6.872647427854455e-06, "loss": 1.8727, "step": 9970 }, { "epoch": 3.13, "learning_rate": 6.869510664993727e-06, "loss": 1.8693, "step": 9980 }, { "epoch": 3.13, "learning_rate": 6.866373902132999e-06, "loss": 1.9166, "step": 9990 }, { "epoch": 3.14, "learning_rate": 6.863237139272272e-06, "loss": 1.8551, "step": 10000 }, { "epoch": 3.14, "learning_rate": 6.860100376411544e-06, "loss": 1.9196, "step": 10010 }, { "epoch": 3.14, "learning_rate": 6.856963613550817e-06, "loss": 1.97, "step": 10020 }, { "epoch": 3.15, "learning_rate": 6.853826850690089e-06, "loss": 1.9672, "step": 10030 }, { "epoch": 3.15, "learning_rate": 6.85069008782936e-06, "loss": 1.8929, "step": 10040 }, { "epoch": 3.15, "learning_rate": 6.847553324968633e-06, "loss": 1.9713, "step": 10050 }, { "epoch": 3.16, "learning_rate": 6.844416562107905e-06, "loss": 1.9029, "step": 10060 }, { "epoch": 3.16, "learning_rate": 6.841279799247178e-06, "loss": 1.9074, "step": 10070 }, { "epoch": 3.16, "learning_rate": 6.83814303638645e-06, "loss": 1.9178, "step": 10080 }, { "epoch": 3.16, "learning_rate": 6.835006273525722e-06, "loss": 1.9428, "step": 10090 }, { "epoch": 3.17, "learning_rate": 6.831869510664995e-06, "loss": 1.9178, "step": 10100 }, { "epoch": 3.17, "learning_rate": 6.828732747804266e-06, "loss": 1.8657, "step": 10110 }, { "epoch": 3.17, "learning_rate": 6.825595984943539e-06, "loss": 1.9652, "step": 10120 }, { "epoch": 3.18, "learning_rate": 6.822459222082811e-06, "loss": 1.8956, "step": 10130 }, { "epoch": 3.18, "learning_rate": 6.819322459222083e-06, "loss": 1.945, "step": 10140 }, { "epoch": 3.18, "learning_rate": 6.816185696361356e-06, "loss": 1.9497, "step": 10150 }, { "epoch": 3.19, "learning_rate": 6.813048933500628e-06, "loss": 1.9494, "step": 10160 }, { "epoch": 3.19, "learning_rate": 6.809912170639901e-06, "loss": 2.0337, "step": 10170 }, { "epoch": 3.19, "learning_rate": 6.806775407779173e-06, "loss": 1.8585, "step": 10180 }, { "epoch": 3.2, "learning_rate": 6.803638644918444e-06, "loss": 1.9345, "step": 10190 }, { "epoch": 3.2, "learning_rate": 6.800501882057717e-06, "loss": 1.867, "step": 10200 }, { "epoch": 3.2, "learning_rate": 6.797365119196989e-06, "loss": 1.9511, "step": 10210 }, { "epoch": 3.21, "learning_rate": 6.794228356336261e-06, "loss": 1.9295, "step": 10220 }, { "epoch": 3.21, "learning_rate": 6.791091593475534e-06, "loss": 1.8682, "step": 10230 }, { "epoch": 3.21, "learning_rate": 6.787954830614806e-06, "loss": 1.9623, "step": 10240 }, { "epoch": 3.22, "learning_rate": 6.784818067754079e-06, "loss": 1.8572, "step": 10250 }, { "epoch": 3.22, "learning_rate": 6.78168130489335e-06, "loss": 1.913, "step": 10260 }, { "epoch": 3.22, "learning_rate": 6.778544542032622e-06, "loss": 1.8812, "step": 10270 }, { "epoch": 3.22, "learning_rate": 6.775407779171895e-06, "loss": 1.9372, "step": 10280 }, { "epoch": 3.23, "learning_rate": 6.772271016311167e-06, "loss": 1.9093, "step": 10290 }, { "epoch": 3.23, "learning_rate": 6.76913425345044e-06, "loss": 1.9351, "step": 10300 }, { "epoch": 3.23, "learning_rate": 6.765997490589712e-06, "loss": 1.9689, "step": 10310 }, { "epoch": 3.24, "learning_rate": 6.762860727728984e-06, "loss": 1.9457, "step": 10320 }, { "epoch": 3.24, "learning_rate": 6.759723964868257e-06, "loss": 1.9288, "step": 10330 }, { "epoch": 3.24, "learning_rate": 6.756587202007528e-06, "loss": 1.9382, "step": 10340 }, { "epoch": 3.25, "learning_rate": 6.753450439146801e-06, "loss": 1.868, "step": 10350 }, { "epoch": 3.25, "learning_rate": 6.750313676286073e-06, "loss": 1.9053, "step": 10360 }, { "epoch": 3.25, "learning_rate": 6.747176913425345e-06, "loss": 1.9358, "step": 10370 }, { "epoch": 3.26, "learning_rate": 6.744040150564618e-06, "loss": 1.9296, "step": 10380 }, { "epoch": 3.26, "learning_rate": 6.74090338770389e-06, "loss": 1.9294, "step": 10390 }, { "epoch": 3.26, "learning_rate": 6.737766624843163e-06, "loss": 1.9203, "step": 10400 }, { "epoch": 3.27, "learning_rate": 6.734629861982434e-06, "loss": 1.9498, "step": 10410 }, { "epoch": 3.27, "learning_rate": 6.731493099121706e-06, "loss": 1.8721, "step": 10420 }, { "epoch": 3.27, "learning_rate": 6.728356336260979e-06, "loss": 1.9807, "step": 10430 }, { "epoch": 3.27, "learning_rate": 6.725219573400251e-06, "loss": 1.9952, "step": 10440 }, { "epoch": 3.28, "learning_rate": 6.722082810539524e-06, "loss": 1.8955, "step": 10450 }, { "epoch": 3.28, "learning_rate": 6.718946047678796e-06, "loss": 1.954, "step": 10460 }, { "epoch": 3.28, "learning_rate": 6.715809284818068e-06, "loss": 1.9439, "step": 10470 }, { "epoch": 3.29, "learning_rate": 6.712672521957341e-06, "loss": 1.9301, "step": 10480 }, { "epoch": 3.29, "learning_rate": 6.709535759096612e-06, "loss": 1.9255, "step": 10490 }, { "epoch": 3.29, "learning_rate": 6.706398996235885e-06, "loss": 1.8499, "step": 10500 }, { "epoch": 3.3, "learning_rate": 6.703262233375157e-06, "loss": 1.8126, "step": 10510 }, { "epoch": 3.3, "learning_rate": 6.700125470514429e-06, "loss": 1.9344, "step": 10520 }, { "epoch": 3.3, "learning_rate": 6.696988707653702e-06, "loss": 1.9274, "step": 10530 }, { "epoch": 3.31, "learning_rate": 6.693851944792974e-06, "loss": 1.8945, "step": 10540 }, { "epoch": 3.31, "learning_rate": 6.690715181932247e-06, "loss": 1.8958, "step": 10550 }, { "epoch": 3.31, "learning_rate": 6.687578419071519e-06, "loss": 1.9337, "step": 10560 }, { "epoch": 3.32, "learning_rate": 6.68444165621079e-06, "loss": 1.9253, "step": 10570 }, { "epoch": 3.32, "learning_rate": 6.681304893350063e-06, "loss": 1.9544, "step": 10580 }, { "epoch": 3.32, "learning_rate": 6.678168130489335e-06, "loss": 1.8968, "step": 10590 }, { "epoch": 3.32, "learning_rate": 6.675031367628608e-06, "loss": 1.9505, "step": 10600 }, { "epoch": 3.33, "learning_rate": 6.67189460476788e-06, "loss": 1.9339, "step": 10610 }, { "epoch": 3.33, "learning_rate": 6.668757841907152e-06, "loss": 2.0069, "step": 10620 }, { "epoch": 3.33, "learning_rate": 6.665621079046425e-06, "loss": 1.8384, "step": 10630 }, { "epoch": 3.34, "learning_rate": 6.662484316185696e-06, "loss": 1.981, "step": 10640 }, { "epoch": 3.34, "learning_rate": 6.659347553324969e-06, "loss": 1.9825, "step": 10650 }, { "epoch": 3.34, "learning_rate": 6.656210790464241e-06, "loss": 1.8938, "step": 10660 }, { "epoch": 3.35, "learning_rate": 6.653074027603513e-06, "loss": 1.89, "step": 10670 }, { "epoch": 3.35, "learning_rate": 6.649937264742786e-06, "loss": 1.9083, "step": 10680 }, { "epoch": 3.35, "learning_rate": 6.646800501882058e-06, "loss": 1.8732, "step": 10690 }, { "epoch": 3.36, "learning_rate": 6.643663739021331e-06, "loss": 1.89, "step": 10700 }, { "epoch": 3.36, "learning_rate": 6.640526976160603e-06, "loss": 1.9387, "step": 10710 }, { "epoch": 3.36, "learning_rate": 6.637390213299874e-06, "loss": 1.9319, "step": 10720 }, { "epoch": 3.37, "learning_rate": 6.634253450439147e-06, "loss": 1.9586, "step": 10730 }, { "epoch": 3.37, "learning_rate": 6.631116687578419e-06, "loss": 1.9244, "step": 10740 }, { "epoch": 3.37, "learning_rate": 6.627979924717692e-06, "loss": 1.9045, "step": 10750 }, { "epoch": 3.38, "learning_rate": 6.624843161856964e-06, "loss": 1.8757, "step": 10760 }, { "epoch": 3.38, "learning_rate": 6.621706398996236e-06, "loss": 1.8907, "step": 10770 }, { "epoch": 3.38, "learning_rate": 6.618569636135509e-06, "loss": 1.9341, "step": 10780 }, { "epoch": 3.38, "learning_rate": 6.61543287327478e-06, "loss": 1.9399, "step": 10790 }, { "epoch": 3.39, "learning_rate": 6.612296110414054e-06, "loss": 1.9653, "step": 10800 }, { "epoch": 3.39, "learning_rate": 6.609159347553325e-06, "loss": 1.9733, "step": 10810 }, { "epoch": 3.39, "learning_rate": 6.606022584692597e-06, "loss": 1.924, "step": 10820 }, { "epoch": 3.4, "learning_rate": 6.60288582183187e-06, "loss": 1.9161, "step": 10830 }, { "epoch": 3.4, "learning_rate": 6.599749058971142e-06, "loss": 1.9374, "step": 10840 }, { "epoch": 3.4, "learning_rate": 6.596612296110415e-06, "loss": 2.0109, "step": 10850 }, { "epoch": 3.41, "learning_rate": 6.593475533249687e-06, "loss": 1.8972, "step": 10860 }, { "epoch": 3.41, "learning_rate": 6.5903387703889584e-06, "loss": 1.8119, "step": 10870 }, { "epoch": 3.41, "learning_rate": 6.587202007528231e-06, "loss": 1.8996, "step": 10880 }, { "epoch": 3.42, "learning_rate": 6.584065244667503e-06, "loss": 2.019, "step": 10890 }, { "epoch": 3.42, "learning_rate": 6.580928481806776e-06, "loss": 1.9472, "step": 10900 }, { "epoch": 3.42, "learning_rate": 6.577791718946048e-06, "loss": 1.9423, "step": 10910 }, { "epoch": 3.43, "learning_rate": 6.57465495608532e-06, "loss": 1.9815, "step": 10920 }, { "epoch": 3.43, "learning_rate": 6.571518193224593e-06, "loss": 1.8826, "step": 10930 }, { "epoch": 3.43, "learning_rate": 6.568381430363865e-06, "loss": 1.8968, "step": 10940 }, { "epoch": 3.43, "learning_rate": 6.565244667503138e-06, "loss": 1.9269, "step": 10950 }, { "epoch": 3.44, "learning_rate": 6.562107904642409e-06, "loss": 1.8981, "step": 10960 }, { "epoch": 3.44, "learning_rate": 6.5589711417816814e-06, "loss": 1.9447, "step": 10970 }, { "epoch": 3.44, "learning_rate": 6.555834378920954e-06, "loss": 1.9025, "step": 10980 }, { "epoch": 3.45, "learning_rate": 6.552697616060226e-06, "loss": 1.8283, "step": 10990 }, { "epoch": 3.45, "learning_rate": 6.549560853199499e-06, "loss": 1.8838, "step": 11000 }, { "epoch": 3.45, "learning_rate": 6.546424090338771e-06, "loss": 1.9003, "step": 11010 }, { "epoch": 3.46, "learning_rate": 6.5432873274780425e-06, "loss": 1.9028, "step": 11020 }, { "epoch": 3.46, "learning_rate": 6.540150564617315e-06, "loss": 1.9489, "step": 11030 }, { "epoch": 3.46, "learning_rate": 6.5370138017565874e-06, "loss": 1.9402, "step": 11040 }, { "epoch": 3.47, "learning_rate": 6.53387703889586e-06, "loss": 1.8734, "step": 11050 }, { "epoch": 3.47, "learning_rate": 6.530740276035132e-06, "loss": 1.9259, "step": 11060 }, { "epoch": 3.47, "learning_rate": 6.527603513174404e-06, "loss": 1.8362, "step": 11070 }, { "epoch": 3.48, "learning_rate": 6.524466750313677e-06, "loss": 1.9451, "step": 11080 }, { "epoch": 3.48, "learning_rate": 6.521329987452949e-06, "loss": 1.8961, "step": 11090 }, { "epoch": 3.48, "learning_rate": 6.5181932245922206e-06, "loss": 1.9401, "step": 11100 }, { "epoch": 3.48, "learning_rate": 6.5150564617314934e-06, "loss": 1.9333, "step": 11110 }, { "epoch": 3.49, "learning_rate": 6.5119196988707655e-06, "loss": 1.8602, "step": 11120 }, { "epoch": 3.49, "learning_rate": 6.508782936010038e-06, "loss": 1.9425, "step": 11130 }, { "epoch": 3.49, "learning_rate": 6.5056461731493104e-06, "loss": 1.9593, "step": 11140 }, { "epoch": 3.5, "learning_rate": 6.5025094102885825e-06, "loss": 1.8614, "step": 11150 }, { "epoch": 3.5, "learning_rate": 6.499372647427855e-06, "loss": 1.8861, "step": 11160 }, { "epoch": 3.5, "learning_rate": 6.4962358845671266e-06, "loss": 1.9378, "step": 11170 }, { "epoch": 3.51, "learning_rate": 6.4930991217064e-06, "loss": 1.9192, "step": 11180 }, { "epoch": 3.51, "learning_rate": 6.4899623588456715e-06, "loss": 1.8886, "step": 11190 }, { "epoch": 3.51, "learning_rate": 6.4868255959849435e-06, "loss": 1.8916, "step": 11200 }, { "epoch": 3.52, "learning_rate": 6.4836888331242164e-06, "loss": 1.9126, "step": 11210 }, { "epoch": 3.52, "learning_rate": 6.4805520702634885e-06, "loss": 1.9063, "step": 11220 }, { "epoch": 3.52, "learning_rate": 6.477415307402761e-06, "loss": 1.9309, "step": 11230 }, { "epoch": 3.53, "learning_rate": 6.474278544542033e-06, "loss": 1.949, "step": 11240 }, { "epoch": 3.53, "learning_rate": 6.471141781681305e-06, "loss": 1.9189, "step": 11250 }, { "epoch": 3.53, "learning_rate": 6.4680050188205775e-06, "loss": 1.9416, "step": 11260 }, { "epoch": 3.54, "learning_rate": 6.4648682559598496e-06, "loss": 1.9865, "step": 11270 }, { "epoch": 3.54, "learning_rate": 6.4617314930991224e-06, "loss": 1.8799, "step": 11280 }, { "epoch": 3.54, "learning_rate": 6.4585947302383945e-06, "loss": 1.9771, "step": 11290 }, { "epoch": 3.54, "learning_rate": 6.4554579673776665e-06, "loss": 1.933, "step": 11300 }, { "epoch": 3.55, "learning_rate": 6.4523212045169394e-06, "loss": 1.889, "step": 11310 }, { "epoch": 3.55, "learning_rate": 6.4491844416562115e-06, "loss": 1.9768, "step": 11320 }, { "epoch": 3.55, "learning_rate": 6.446047678795484e-06, "loss": 1.8904, "step": 11330 }, { "epoch": 3.56, "learning_rate": 6.4429109159347556e-06, "loss": 1.988, "step": 11340 }, { "epoch": 3.56, "learning_rate": 6.439774153074028e-06, "loss": 1.9245, "step": 11350 }, { "epoch": 3.56, "learning_rate": 6.4366373902133005e-06, "loss": 2.0271, "step": 11360 }, { "epoch": 3.57, "learning_rate": 6.4335006273525725e-06, "loss": 1.9249, "step": 11370 }, { "epoch": 3.57, "learning_rate": 6.4303638644918454e-06, "loss": 1.9529, "step": 11380 }, { "epoch": 3.57, "learning_rate": 6.4272271016311175e-06, "loss": 1.9427, "step": 11390 }, { "epoch": 3.58, "learning_rate": 6.424090338770389e-06, "loss": 1.8737, "step": 11400 }, { "epoch": 3.58, "learning_rate": 6.4209535759096616e-06, "loss": 1.934, "step": 11410 }, { "epoch": 3.58, "learning_rate": 6.417816813048934e-06, "loss": 1.97, "step": 11420 }, { "epoch": 3.59, "learning_rate": 6.4146800501882065e-06, "loss": 1.8813, "step": 11430 }, { "epoch": 3.59, "learning_rate": 6.4115432873274786e-06, "loss": 1.9036, "step": 11440 }, { "epoch": 3.59, "learning_rate": 6.408406524466751e-06, "loss": 1.9235, "step": 11450 }, { "epoch": 3.59, "learning_rate": 6.4052697616060235e-06, "loss": 1.9351, "step": 11460 }, { "epoch": 3.6, "learning_rate": 6.4021329987452955e-06, "loss": 1.8937, "step": 11470 }, { "epoch": 3.6, "learning_rate": 6.3989962358845684e-06, "loss": 1.8795, "step": 11480 }, { "epoch": 3.6, "learning_rate": 6.39585947302384e-06, "loss": 1.9792, "step": 11490 }, { "epoch": 3.61, "learning_rate": 6.392722710163112e-06, "loss": 1.9934, "step": 11500 }, { "epoch": 3.61, "learning_rate": 6.3895859473023846e-06, "loss": 1.955, "step": 11510 }, { "epoch": 3.61, "learning_rate": 6.386449184441657e-06, "loss": 1.9538, "step": 11520 }, { "epoch": 3.62, "learning_rate": 6.3833124215809295e-06, "loss": 1.9228, "step": 11530 }, { "epoch": 3.62, "learning_rate": 6.3801756587202015e-06, "loss": 1.8933, "step": 11540 }, { "epoch": 3.62, "learning_rate": 6.377038895859473e-06, "loss": 1.8376, "step": 11550 }, { "epoch": 3.63, "learning_rate": 6.373902132998746e-06, "loss": 1.8814, "step": 11560 }, { "epoch": 3.63, "learning_rate": 6.370765370138018e-06, "loss": 1.9261, "step": 11570 }, { "epoch": 3.63, "learning_rate": 6.3676286072772906e-06, "loss": 1.9451, "step": 11580 }, { "epoch": 3.64, "learning_rate": 6.364491844416563e-06, "loss": 1.9204, "step": 11590 }, { "epoch": 3.64, "learning_rate": 6.361355081555835e-06, "loss": 1.9825, "step": 11600 }, { "epoch": 3.64, "learning_rate": 6.3582183186951076e-06, "loss": 1.9243, "step": 11610 }, { "epoch": 3.64, "learning_rate": 6.35508155583438e-06, "loss": 1.9351, "step": 11620 }, { "epoch": 3.65, "learning_rate": 6.3519447929736525e-06, "loss": 1.8751, "step": 11630 }, { "epoch": 3.65, "learning_rate": 6.348808030112924e-06, "loss": 1.8572, "step": 11640 }, { "epoch": 3.65, "learning_rate": 6.345671267252196e-06, "loss": 1.8854, "step": 11650 }, { "epoch": 3.66, "learning_rate": 6.342534504391469e-06, "loss": 1.8833, "step": 11660 }, { "epoch": 3.66, "learning_rate": 6.339397741530741e-06, "loss": 1.8723, "step": 11670 }, { "epoch": 3.66, "learning_rate": 6.3362609786700136e-06, "loss": 1.8794, "step": 11680 }, { "epoch": 3.67, "learning_rate": 6.333124215809286e-06, "loss": 1.9154, "step": 11690 }, { "epoch": 3.67, "learning_rate": 6.329987452948557e-06, "loss": 1.8708, "step": 11700 }, { "epoch": 3.67, "learning_rate": 6.3268506900878305e-06, "loss": 1.8722, "step": 11710 }, { "epoch": 3.68, "learning_rate": 6.323713927227102e-06, "loss": 1.9407, "step": 11720 }, { "epoch": 3.68, "learning_rate": 6.320577164366375e-06, "loss": 1.9523, "step": 11730 }, { "epoch": 3.68, "learning_rate": 6.317440401505647e-06, "loss": 1.9717, "step": 11740 }, { "epoch": 3.69, "learning_rate": 6.314303638644919e-06, "loss": 1.921, "step": 11750 }, { "epoch": 3.69, "learning_rate": 6.311166875784192e-06, "loss": 1.9393, "step": 11760 }, { "epoch": 3.69, "learning_rate": 6.308030112923464e-06, "loss": 1.9194, "step": 11770 }, { "epoch": 3.7, "learning_rate": 6.3048933500627365e-06, "loss": 1.9261, "step": 11780 }, { "epoch": 3.7, "learning_rate": 6.301756587202008e-06, "loss": 1.9923, "step": 11790 }, { "epoch": 3.7, "learning_rate": 6.29861982434128e-06, "loss": 1.9222, "step": 11800 }, { "epoch": 3.7, "learning_rate": 6.295483061480553e-06, "loss": 1.8534, "step": 11810 }, { "epoch": 3.71, "learning_rate": 6.292346298619825e-06, "loss": 1.845, "step": 11820 }, { "epoch": 3.71, "learning_rate": 6.289209535759098e-06, "loss": 1.9502, "step": 11830 }, { "epoch": 3.71, "learning_rate": 6.28607277289837e-06, "loss": 1.8621, "step": 11840 }, { "epoch": 3.72, "learning_rate": 6.282936010037642e-06, "loss": 1.9693, "step": 11850 }, { "epoch": 3.72, "learning_rate": 6.279799247176915e-06, "loss": 1.9975, "step": 11860 }, { "epoch": 3.72, "learning_rate": 6.276662484316186e-06, "loss": 1.9713, "step": 11870 }, { "epoch": 3.73, "learning_rate": 6.273525721455459e-06, "loss": 1.8751, "step": 11880 }, { "epoch": 3.73, "learning_rate": 6.270388958594731e-06, "loss": 1.9712, "step": 11890 }, { "epoch": 3.73, "learning_rate": 6.267252195734003e-06, "loss": 1.8879, "step": 11900 }, { "epoch": 3.74, "learning_rate": 6.264115432873276e-06, "loss": 1.9103, "step": 11910 }, { "epoch": 3.74, "learning_rate": 6.260978670012548e-06, "loss": 1.9438, "step": 11920 }, { "epoch": 3.74, "learning_rate": 6.257841907151821e-06, "loss": 1.8902, "step": 11930 }, { "epoch": 3.75, "learning_rate": 6.254705144291092e-06, "loss": 1.9667, "step": 11940 }, { "epoch": 3.75, "learning_rate": 6.251568381430364e-06, "loss": 1.9765, "step": 11950 }, { "epoch": 3.75, "learning_rate": 6.248431618569637e-06, "loss": 1.8914, "step": 11960 }, { "epoch": 3.75, "learning_rate": 6.245294855708909e-06, "loss": 1.9268, "step": 11970 }, { "epoch": 3.76, "learning_rate": 6.242158092848181e-06, "loss": 1.9264, "step": 11980 }, { "epoch": 3.76, "learning_rate": 6.239021329987454e-06, "loss": 1.8823, "step": 11990 }, { "epoch": 3.76, "learning_rate": 6.235884567126726e-06, "loss": 1.9149, "step": 12000 }, { "epoch": 3.77, "learning_rate": 6.232747804265999e-06, "loss": 1.8905, "step": 12010 }, { "epoch": 3.77, "learning_rate": 6.22961104140527e-06, "loss": 1.8442, "step": 12020 }, { "epoch": 3.77, "learning_rate": 6.226474278544542e-06, "loss": 1.9242, "step": 12030 }, { "epoch": 3.78, "learning_rate": 6.223337515683815e-06, "loss": 1.8998, "step": 12040 }, { "epoch": 3.78, "learning_rate": 6.220200752823087e-06, "loss": 1.8563, "step": 12050 }, { "epoch": 3.78, "learning_rate": 6.21706398996236e-06, "loss": 1.9342, "step": 12060 }, { "epoch": 3.79, "learning_rate": 6.213927227101632e-06, "loss": 1.9558, "step": 12070 }, { "epoch": 3.79, "learning_rate": 6.210790464240903e-06, "loss": 1.9036, "step": 12080 }, { "epoch": 3.79, "learning_rate": 6.207653701380177e-06, "loss": 2.0094, "step": 12090 }, { "epoch": 3.8, "learning_rate": 6.204516938519448e-06, "loss": 1.9639, "step": 12100 }, { "epoch": 3.8, "learning_rate": 6.201380175658721e-06, "loss": 1.9426, "step": 12110 }, { "epoch": 3.8, "learning_rate": 6.198243412797993e-06, "loss": 1.8982, "step": 12120 }, { "epoch": 3.8, "learning_rate": 6.195106649937265e-06, "loss": 1.9271, "step": 12130 }, { "epoch": 3.81, "learning_rate": 6.191969887076538e-06, "loss": 1.9468, "step": 12140 }, { "epoch": 3.81, "learning_rate": 6.18883312421581e-06, "loss": 1.7622, "step": 12150 }, { "epoch": 3.81, "learning_rate": 6.185696361355083e-06, "loss": 1.8988, "step": 12160 }, { "epoch": 3.82, "learning_rate": 6.182559598494354e-06, "loss": 1.8992, "step": 12170 }, { "epoch": 3.82, "learning_rate": 6.179422835633626e-06, "loss": 1.8374, "step": 12180 }, { "epoch": 3.82, "learning_rate": 6.176286072772899e-06, "loss": 1.9129, "step": 12190 }, { "epoch": 3.83, "learning_rate": 6.173149309912171e-06, "loss": 1.9074, "step": 12200 }, { "epoch": 3.83, "learning_rate": 6.170012547051444e-06, "loss": 1.8697, "step": 12210 }, { "epoch": 3.83, "learning_rate": 6.166875784190716e-06, "loss": 1.9257, "step": 12220 }, { "epoch": 3.84, "learning_rate": 6.163739021329988e-06, "loss": 1.9101, "step": 12230 }, { "epoch": 3.84, "learning_rate": 6.160602258469261e-06, "loss": 1.9156, "step": 12240 }, { "epoch": 3.84, "learning_rate": 6.157465495608532e-06, "loss": 1.9763, "step": 12250 }, { "epoch": 3.85, "learning_rate": 6.154328732747805e-06, "loss": 1.9401, "step": 12260 }, { "epoch": 3.85, "learning_rate": 6.151191969887077e-06, "loss": 1.8985, "step": 12270 }, { "epoch": 3.85, "learning_rate": 6.148055207026349e-06, "loss": 1.9099, "step": 12280 }, { "epoch": 3.86, "learning_rate": 6.144918444165622e-06, "loss": 1.9538, "step": 12290 }, { "epoch": 3.86, "learning_rate": 6.141781681304894e-06, "loss": 1.8808, "step": 12300 }, { "epoch": 3.86, "learning_rate": 6.138644918444167e-06, "loss": 1.9, "step": 12310 }, { "epoch": 3.86, "learning_rate": 6.135508155583438e-06, "loss": 1.8517, "step": 12320 }, { "epoch": 3.87, "learning_rate": 6.13237139272271e-06, "loss": 1.9548, "step": 12330 }, { "epoch": 3.87, "learning_rate": 6.129234629861983e-06, "loss": 1.9095, "step": 12340 }, { "epoch": 3.87, "learning_rate": 6.126097867001255e-06, "loss": 1.9689, "step": 12350 }, { "epoch": 3.88, "learning_rate": 6.122961104140528e-06, "loss": 1.9209, "step": 12360 }, { "epoch": 3.88, "learning_rate": 6.1198243412798e-06, "loss": 1.859, "step": 12370 }, { "epoch": 3.88, "learning_rate": 6.116687578419072e-06, "loss": 1.8979, "step": 12380 }, { "epoch": 3.89, "learning_rate": 6.113550815558345e-06, "loss": 1.929, "step": 12390 }, { "epoch": 3.89, "learning_rate": 6.110414052697616e-06, "loss": 1.9601, "step": 12400 }, { "epoch": 3.89, "learning_rate": 6.107277289836889e-06, "loss": 1.928, "step": 12410 }, { "epoch": 3.9, "learning_rate": 6.104140526976161e-06, "loss": 1.9434, "step": 12420 }, { "epoch": 3.9, "learning_rate": 6.101003764115433e-06, "loss": 1.8637, "step": 12430 }, { "epoch": 3.9, "learning_rate": 6.097867001254706e-06, "loss": 1.9568, "step": 12440 }, { "epoch": 3.91, "learning_rate": 6.094730238393978e-06, "loss": 1.9046, "step": 12450 }, { "epoch": 3.91, "learning_rate": 6.091593475533251e-06, "loss": 1.8826, "step": 12460 }, { "epoch": 3.91, "learning_rate": 6.088456712672523e-06, "loss": 1.8399, "step": 12470 }, { "epoch": 3.91, "learning_rate": 6.085319949811794e-06, "loss": 1.9354, "step": 12480 }, { "epoch": 3.92, "learning_rate": 6.082183186951067e-06, "loss": 1.9155, "step": 12490 }, { "epoch": 3.92, "learning_rate": 6.079046424090339e-06, "loss": 1.8871, "step": 12500 }, { "epoch": 3.92, "learning_rate": 6.075909661229612e-06, "loss": 1.8971, "step": 12510 }, { "epoch": 3.93, "learning_rate": 6.072772898368884e-06, "loss": 1.9115, "step": 12520 }, { "epoch": 3.93, "learning_rate": 6.069636135508156e-06, "loss": 1.8963, "step": 12530 }, { "epoch": 3.93, "learning_rate": 6.066499372647429e-06, "loss": 1.9291, "step": 12540 }, { "epoch": 3.94, "learning_rate": 6.0633626097867e-06, "loss": 1.9453, "step": 12550 }, { "epoch": 3.94, "learning_rate": 6.060225846925973e-06, "loss": 1.9084, "step": 12560 }, { "epoch": 3.94, "learning_rate": 6.057089084065245e-06, "loss": 1.9131, "step": 12570 }, { "epoch": 3.95, "learning_rate": 6.053952321204517e-06, "loss": 1.9279, "step": 12580 }, { "epoch": 3.95, "learning_rate": 6.05081555834379e-06, "loss": 1.8515, "step": 12590 }, { "epoch": 3.95, "learning_rate": 6.047678795483062e-06, "loss": 1.7713, "step": 12600 }, { "epoch": 3.96, "learning_rate": 6.044542032622335e-06, "loss": 1.9936, "step": 12610 }, { "epoch": 3.96, "learning_rate": 6.041405269761607e-06, "loss": 1.9475, "step": 12620 }, { "epoch": 3.96, "learning_rate": 6.038268506900878e-06, "loss": 1.8708, "step": 12630 }, { "epoch": 3.96, "learning_rate": 6.035131744040151e-06, "loss": 1.9751, "step": 12640 }, { "epoch": 3.97, "learning_rate": 6.031994981179423e-06, "loss": 1.908, "step": 12650 }, { "epoch": 3.97, "learning_rate": 6.028858218318696e-06, "loss": 1.8999, "step": 12660 }, { "epoch": 3.97, "learning_rate": 6.025721455457968e-06, "loss": 1.8607, "step": 12670 }, { "epoch": 3.98, "learning_rate": 6.02258469259724e-06, "loss": 1.921, "step": 12680 }, { "epoch": 3.98, "learning_rate": 6.019447929736513e-06, "loss": 1.8849, "step": 12690 }, { "epoch": 3.98, "learning_rate": 6.016311166875784e-06, "loss": 1.9265, "step": 12700 }, { "epoch": 3.99, "learning_rate": 6.013174404015057e-06, "loss": 1.9314, "step": 12710 }, { "epoch": 3.99, "learning_rate": 6.010037641154329e-06, "loss": 1.9201, "step": 12720 }, { "epoch": 3.99, "learning_rate": 6.006900878293601e-06, "loss": 1.9461, "step": 12730 }, { "epoch": 4.0, "learning_rate": 6.003764115432874e-06, "loss": 1.9463, "step": 12740 }, { "epoch": 4.0, "learning_rate": 6.000627352572146e-06, "loss": 1.8387, "step": 12750 }, { "epoch": 4.0, "eval_loss": 1.8398990631103516, "eval_runtime": 13.6302, "eval_samples_per_second": 73.366, "eval_steps_per_second": 4.622, "step": 12752 }, { "epoch": 4.0, "learning_rate": 5.997490589711419e-06, "loss": 1.8553, "step": 12760 }, { "epoch": 4.01, "learning_rate": 5.994353826850691e-06, "loss": 1.8496, "step": 12770 }, { "epoch": 4.01, "learning_rate": 5.991217063989962e-06, "loss": 1.9154, "step": 12780 }, { "epoch": 4.01, "learning_rate": 5.988080301129235e-06, "loss": 1.8631, "step": 12790 }, { "epoch": 4.02, "learning_rate": 5.984943538268507e-06, "loss": 1.9562, "step": 12800 }, { "epoch": 4.02, "learning_rate": 5.98180677540778e-06, "loss": 1.9106, "step": 12810 }, { "epoch": 4.02, "learning_rate": 5.978670012547052e-06, "loss": 1.9179, "step": 12820 }, { "epoch": 4.02, "learning_rate": 5.975533249686324e-06, "loss": 1.8616, "step": 12830 }, { "epoch": 4.03, "learning_rate": 5.972396486825597e-06, "loss": 1.903, "step": 12840 }, { "epoch": 4.03, "learning_rate": 5.969259723964868e-06, "loss": 1.959, "step": 12850 }, { "epoch": 4.03, "learning_rate": 5.96612296110414e-06, "loss": 1.8867, "step": 12860 }, { "epoch": 4.04, "learning_rate": 5.962986198243413e-06, "loss": 1.889, "step": 12870 }, { "epoch": 4.04, "learning_rate": 5.959849435382685e-06, "loss": 1.9415, "step": 12880 }, { "epoch": 4.04, "learning_rate": 5.956712672521958e-06, "loss": 1.9152, "step": 12890 }, { "epoch": 4.05, "learning_rate": 5.95357590966123e-06, "loss": 1.8811, "step": 12900 }, { "epoch": 4.05, "learning_rate": 5.950439146800502e-06, "loss": 1.8701, "step": 12910 }, { "epoch": 4.05, "learning_rate": 5.947302383939775e-06, "loss": 1.9534, "step": 12920 }, { "epoch": 4.06, "learning_rate": 5.944165621079046e-06, "loss": 1.8416, "step": 12930 }, { "epoch": 4.06, "learning_rate": 5.941028858218319e-06, "loss": 1.9234, "step": 12940 }, { "epoch": 4.06, "learning_rate": 5.937892095357591e-06, "loss": 1.953, "step": 12950 }, { "epoch": 4.07, "learning_rate": 5.934755332496863e-06, "loss": 1.8865, "step": 12960 }, { "epoch": 4.07, "learning_rate": 5.931618569636136e-06, "loss": 1.9868, "step": 12970 }, { "epoch": 4.07, "learning_rate": 5.928481806775408e-06, "loss": 1.9212, "step": 12980 }, { "epoch": 4.07, "learning_rate": 5.925345043914681e-06, "loss": 1.8667, "step": 12990 }, { "epoch": 4.08, "learning_rate": 5.922208281053953e-06, "loss": 1.9371, "step": 13000 }, { "epoch": 4.08, "learning_rate": 5.919071518193224e-06, "loss": 1.9271, "step": 13010 }, { "epoch": 4.08, "learning_rate": 5.915934755332497e-06, "loss": 1.9149, "step": 13020 }, { "epoch": 4.09, "learning_rate": 5.912797992471769e-06, "loss": 1.9657, "step": 13030 }, { "epoch": 4.09, "learning_rate": 5.909661229611042e-06, "loss": 1.8185, "step": 13040 }, { "epoch": 4.09, "learning_rate": 5.906524466750314e-06, "loss": 1.943, "step": 13050 }, { "epoch": 4.1, "learning_rate": 5.903387703889586e-06, "loss": 1.8899, "step": 13060 }, { "epoch": 4.1, "learning_rate": 5.900250941028859e-06, "loss": 1.8154, "step": 13070 }, { "epoch": 4.1, "learning_rate": 5.89711417816813e-06, "loss": 1.884, "step": 13080 }, { "epoch": 4.11, "learning_rate": 5.893977415307403e-06, "loss": 1.8621, "step": 13090 }, { "epoch": 4.11, "learning_rate": 5.890840652446675e-06, "loss": 1.8992, "step": 13100 }, { "epoch": 4.11, "learning_rate": 5.887703889585947e-06, "loss": 1.8549, "step": 13110 }, { "epoch": 4.12, "learning_rate": 5.88456712672522e-06, "loss": 1.8813, "step": 13120 }, { "epoch": 4.12, "learning_rate": 5.881430363864492e-06, "loss": 1.887, "step": 13130 }, { "epoch": 4.12, "learning_rate": 5.878293601003765e-06, "loss": 1.8874, "step": 13140 }, { "epoch": 4.12, "learning_rate": 5.875156838143037e-06, "loss": 1.9424, "step": 13150 }, { "epoch": 4.13, "learning_rate": 5.872020075282308e-06, "loss": 1.841, "step": 13160 }, { "epoch": 4.13, "learning_rate": 5.868883312421581e-06, "loss": 1.9826, "step": 13170 }, { "epoch": 4.13, "learning_rate": 5.865746549560853e-06, "loss": 1.9082, "step": 13180 }, { "epoch": 4.14, "learning_rate": 5.862609786700126e-06, "loss": 1.9529, "step": 13190 }, { "epoch": 4.14, "learning_rate": 5.859473023839398e-06, "loss": 1.8686, "step": 13200 }, { "epoch": 4.14, "learning_rate": 5.85633626097867e-06, "loss": 1.8908, "step": 13210 }, { "epoch": 4.15, "learning_rate": 5.853199498117943e-06, "loss": 1.8777, "step": 13220 }, { "epoch": 4.15, "learning_rate": 5.850062735257214e-06, "loss": 1.9514, "step": 13230 }, { "epoch": 4.15, "learning_rate": 5.846925972396488e-06, "loss": 1.8951, "step": 13240 }, { "epoch": 4.16, "learning_rate": 5.843789209535759e-06, "loss": 1.9109, "step": 13250 }, { "epoch": 4.16, "learning_rate": 5.840652446675031e-06, "loss": 1.9708, "step": 13260 }, { "epoch": 4.16, "learning_rate": 5.837515683814304e-06, "loss": 1.9357, "step": 13270 }, { "epoch": 4.17, "learning_rate": 5.834378920953576e-06, "loss": 1.9021, "step": 13280 }, { "epoch": 4.17, "learning_rate": 5.831242158092849e-06, "loss": 1.9638, "step": 13290 }, { "epoch": 4.17, "learning_rate": 5.828105395232121e-06, "loss": 1.8816, "step": 13300 }, { "epoch": 4.18, "learning_rate": 5.8249686323713925e-06, "loss": 1.8564, "step": 13310 }, { "epoch": 4.18, "learning_rate": 5.821831869510665e-06, "loss": 1.9092, "step": 13320 }, { "epoch": 4.18, "learning_rate": 5.818695106649937e-06, "loss": 1.9668, "step": 13330 }, { "epoch": 4.18, "learning_rate": 5.81555834378921e-06, "loss": 1.8914, "step": 13340 }, { "epoch": 4.19, "learning_rate": 5.812421580928482e-06, "loss": 1.924, "step": 13350 }, { "epoch": 4.19, "learning_rate": 5.809284818067754e-06, "loss": 1.8328, "step": 13360 }, { "epoch": 4.19, "learning_rate": 5.806148055207027e-06, "loss": 1.9482, "step": 13370 }, { "epoch": 4.2, "learning_rate": 5.803011292346299e-06, "loss": 1.8671, "step": 13380 }, { "epoch": 4.2, "learning_rate": 5.799874529485572e-06, "loss": 1.9224, "step": 13390 }, { "epoch": 4.2, "learning_rate": 5.796737766624843e-06, "loss": 1.927, "step": 13400 }, { "epoch": 4.21, "learning_rate": 5.7936010037641154e-06, "loss": 1.8817, "step": 13410 }, { "epoch": 4.21, "learning_rate": 5.790464240903388e-06, "loss": 1.8894, "step": 13420 }, { "epoch": 4.21, "learning_rate": 5.78732747804266e-06, "loss": 1.8203, "step": 13430 }, { "epoch": 4.22, "learning_rate": 5.784190715181933e-06, "loss": 1.8829, "step": 13440 }, { "epoch": 4.22, "learning_rate": 5.781053952321205e-06, "loss": 1.9016, "step": 13450 }, { "epoch": 4.22, "learning_rate": 5.7779171894604765e-06, "loss": 1.8105, "step": 13460 }, { "epoch": 4.23, "learning_rate": 5.774780426599749e-06, "loss": 1.9625, "step": 13470 }, { "epoch": 4.23, "learning_rate": 5.7716436637390215e-06, "loss": 1.9637, "step": 13480 }, { "epoch": 4.23, "learning_rate": 5.768506900878294e-06, "loss": 2.0088, "step": 13490 }, { "epoch": 4.23, "learning_rate": 5.765370138017566e-06, "loss": 1.8245, "step": 13500 }, { "epoch": 4.24, "learning_rate": 5.7622333751568384e-06, "loss": 1.9708, "step": 13510 }, { "epoch": 4.24, "learning_rate": 5.759096612296111e-06, "loss": 1.9308, "step": 13520 }, { "epoch": 4.24, "learning_rate": 5.755959849435383e-06, "loss": 1.8563, "step": 13530 }, { "epoch": 4.25, "learning_rate": 5.752823086574656e-06, "loss": 1.8595, "step": 13540 }, { "epoch": 4.25, "learning_rate": 5.7496863237139275e-06, "loss": 1.9206, "step": 13550 }, { "epoch": 4.25, "learning_rate": 5.7465495608531995e-06, "loss": 1.8673, "step": 13560 }, { "epoch": 4.26, "learning_rate": 5.743412797992472e-06, "loss": 1.9263, "step": 13570 }, { "epoch": 4.26, "learning_rate": 5.7402760351317444e-06, "loss": 1.9281, "step": 13580 }, { "epoch": 4.26, "learning_rate": 5.737139272271017e-06, "loss": 1.8446, "step": 13590 }, { "epoch": 4.27, "learning_rate": 5.734002509410289e-06, "loss": 1.9267, "step": 13600 }, { "epoch": 4.27, "learning_rate": 5.730865746549561e-06, "loss": 1.9336, "step": 13610 }, { "epoch": 4.27, "learning_rate": 5.727728983688834e-06, "loss": 1.765, "step": 13620 }, { "epoch": 4.28, "learning_rate": 5.7245922208281055e-06, "loss": 1.9527, "step": 13630 }, { "epoch": 4.28, "learning_rate": 5.721455457967378e-06, "loss": 1.9254, "step": 13640 }, { "epoch": 4.28, "learning_rate": 5.7183186951066505e-06, "loss": 1.8996, "step": 13650 }, { "epoch": 4.28, "learning_rate": 5.7151819322459225e-06, "loss": 1.8477, "step": 13660 }, { "epoch": 4.29, "learning_rate": 5.712045169385195e-06, "loss": 1.8041, "step": 13670 }, { "epoch": 4.29, "learning_rate": 5.7089084065244674e-06, "loss": 1.8327, "step": 13680 }, { "epoch": 4.29, "learning_rate": 5.70577164366374e-06, "loss": 1.8609, "step": 13690 }, { "epoch": 4.3, "learning_rate": 5.7026348808030115e-06, "loss": 1.9412, "step": 13700 }, { "epoch": 4.3, "learning_rate": 5.6994981179422836e-06, "loss": 1.8599, "step": 13710 }, { "epoch": 4.3, "learning_rate": 5.6963613550815565e-06, "loss": 1.9285, "step": 13720 }, { "epoch": 4.31, "learning_rate": 5.6932245922208285e-06, "loss": 1.8325, "step": 13730 }, { "epoch": 4.31, "learning_rate": 5.6900878293601006e-06, "loss": 1.9623, "step": 13740 }, { "epoch": 4.31, "learning_rate": 5.6869510664993734e-06, "loss": 1.8725, "step": 13750 }, { "epoch": 4.32, "learning_rate": 5.683814303638645e-06, "loss": 1.7946, "step": 13760 }, { "epoch": 4.32, "learning_rate": 5.680677540777918e-06, "loss": 2.0362, "step": 13770 }, { "epoch": 4.32, "learning_rate": 5.67754077791719e-06, "loss": 1.8396, "step": 13780 }, { "epoch": 4.33, "learning_rate": 5.674404015056462e-06, "loss": 1.9147, "step": 13790 }, { "epoch": 4.33, "learning_rate": 5.6712672521957345e-06, "loss": 1.9386, "step": 13800 }, { "epoch": 4.33, "learning_rate": 5.6681304893350066e-06, "loss": 1.8763, "step": 13810 }, { "epoch": 4.34, "learning_rate": 5.6649937264742795e-06, "loss": 1.8439, "step": 13820 }, { "epoch": 4.34, "learning_rate": 5.6618569636135515e-06, "loss": 1.9102, "step": 13830 }, { "epoch": 4.34, "learning_rate": 5.658720200752823e-06, "loss": 1.9619, "step": 13840 }, { "epoch": 4.34, "learning_rate": 5.655583437892096e-06, "loss": 1.879, "step": 13850 }, { "epoch": 4.35, "learning_rate": 5.652446675031368e-06, "loss": 1.9166, "step": 13860 }, { "epoch": 4.35, "learning_rate": 5.6493099121706405e-06, "loss": 1.809, "step": 13870 }, { "epoch": 4.35, "learning_rate": 5.6461731493099126e-06, "loss": 1.8136, "step": 13880 }, { "epoch": 4.36, "learning_rate": 5.643036386449185e-06, "loss": 1.8419, "step": 13890 }, { "epoch": 4.36, "learning_rate": 5.6398996235884575e-06, "loss": 1.9239, "step": 13900 }, { "epoch": 4.36, "learning_rate": 5.6367628607277296e-06, "loss": 1.9159, "step": 13910 }, { "epoch": 4.37, "learning_rate": 5.6336260978670024e-06, "loss": 1.8297, "step": 13920 }, { "epoch": 4.37, "learning_rate": 5.630489335006274e-06, "loss": 1.9141, "step": 13930 }, { "epoch": 4.37, "learning_rate": 5.627352572145546e-06, "loss": 1.8885, "step": 13940 }, { "epoch": 4.38, "learning_rate": 5.624215809284819e-06, "loss": 1.9794, "step": 13950 }, { "epoch": 4.38, "learning_rate": 5.621079046424091e-06, "loss": 1.9734, "step": 13960 }, { "epoch": 4.38, "learning_rate": 5.6179422835633635e-06, "loss": 1.8771, "step": 13970 }, { "epoch": 4.39, "learning_rate": 5.6148055207026356e-06, "loss": 1.873, "step": 13980 }, { "epoch": 4.39, "learning_rate": 5.611668757841907e-06, "loss": 1.9766, "step": 13990 }, { "epoch": 4.39, "learning_rate": 5.60853199498118e-06, "loss": 1.8123, "step": 14000 }, { "epoch": 4.39, "learning_rate": 5.605395232120452e-06, "loss": 1.873, "step": 14010 }, { "epoch": 4.4, "learning_rate": 5.602258469259725e-06, "loss": 1.8561, "step": 14020 }, { "epoch": 4.4, "learning_rate": 5.599121706398997e-06, "loss": 1.9332, "step": 14030 }, { "epoch": 4.4, "learning_rate": 5.595984943538269e-06, "loss": 1.897, "step": 14040 }, { "epoch": 4.41, "learning_rate": 5.5928481806775416e-06, "loss": 1.887, "step": 14050 }, { "epoch": 4.41, "learning_rate": 5.589711417816814e-06, "loss": 1.956, "step": 14060 }, { "epoch": 4.41, "learning_rate": 5.5865746549560865e-06, "loss": 1.9316, "step": 14070 }, { "epoch": 4.42, "learning_rate": 5.583437892095358e-06, "loss": 1.8974, "step": 14080 }, { "epoch": 4.42, "learning_rate": 5.58030112923463e-06, "loss": 1.9333, "step": 14090 }, { "epoch": 4.42, "learning_rate": 5.577164366373903e-06, "loss": 1.896, "step": 14100 }, { "epoch": 4.43, "learning_rate": 5.574027603513175e-06, "loss": 1.8135, "step": 14110 }, { "epoch": 4.43, "learning_rate": 5.5708908406524476e-06, "loss": 1.8435, "step": 14120 }, { "epoch": 4.43, "learning_rate": 5.56775407779172e-06, "loss": 1.8622, "step": 14130 }, { "epoch": 4.44, "learning_rate": 5.564617314930991e-06, "loss": 1.9146, "step": 14140 }, { "epoch": 4.44, "learning_rate": 5.5614805520702646e-06, "loss": 1.8822, "step": 14150 }, { "epoch": 4.44, "learning_rate": 5.558343789209536e-06, "loss": 1.9138, "step": 14160 }, { "epoch": 4.44, "learning_rate": 5.555207026348809e-06, "loss": 1.8841, "step": 14170 }, { "epoch": 4.45, "learning_rate": 5.552070263488081e-06, "loss": 1.9401, "step": 14180 }, { "epoch": 4.45, "learning_rate": 5.548933500627353e-06, "loss": 1.8915, "step": 14190 }, { "epoch": 4.45, "learning_rate": 5.545796737766626e-06, "loss": 1.8691, "step": 14200 }, { "epoch": 4.46, "learning_rate": 5.542659974905898e-06, "loss": 1.8973, "step": 14210 }, { "epoch": 4.46, "learning_rate": 5.5395232120451706e-06, "loss": 1.9184, "step": 14220 }, { "epoch": 4.46, "learning_rate": 5.536386449184442e-06, "loss": 1.913, "step": 14230 }, { "epoch": 4.47, "learning_rate": 5.533249686323714e-06, "loss": 1.8781, "step": 14240 }, { "epoch": 4.47, "learning_rate": 5.530112923462987e-06, "loss": 1.9734, "step": 14250 }, { "epoch": 4.47, "learning_rate": 5.526976160602259e-06, "loss": 1.8681, "step": 14260 }, { "epoch": 4.48, "learning_rate": 5.523839397741532e-06, "loss": 1.848, "step": 14270 }, { "epoch": 4.48, "learning_rate": 5.520702634880804e-06, "loss": 1.8625, "step": 14280 }, { "epoch": 4.48, "learning_rate": 5.517565872020076e-06, "loss": 1.9166, "step": 14290 }, { "epoch": 4.49, "learning_rate": 5.514429109159349e-06, "loss": 1.8398, "step": 14300 }, { "epoch": 4.49, "learning_rate": 5.51129234629862e-06, "loss": 1.867, "step": 14310 }, { "epoch": 4.49, "learning_rate": 5.508155583437893e-06, "loss": 1.9392, "step": 14320 }, { "epoch": 4.49, "learning_rate": 5.505018820577165e-06, "loss": 1.9181, "step": 14330 }, { "epoch": 4.5, "learning_rate": 5.501882057716437e-06, "loss": 1.904, "step": 14340 }, { "epoch": 4.5, "learning_rate": 5.49874529485571e-06, "loss": 1.8688, "step": 14350 }, { "epoch": 4.5, "learning_rate": 5.495608531994982e-06, "loss": 1.8458, "step": 14360 }, { "epoch": 4.51, "learning_rate": 5.492471769134255e-06, "loss": 1.8275, "step": 14370 }, { "epoch": 4.51, "learning_rate": 5.489335006273526e-06, "loss": 1.9024, "step": 14380 }, { "epoch": 4.51, "learning_rate": 5.486198243412798e-06, "loss": 1.8987, "step": 14390 }, { "epoch": 4.52, "learning_rate": 5.483061480552071e-06, "loss": 1.8981, "step": 14400 }, { "epoch": 4.52, "learning_rate": 5.479924717691343e-06, "loss": 1.9143, "step": 14410 }, { "epoch": 4.52, "learning_rate": 5.476787954830616e-06, "loss": 1.8899, "step": 14420 }, { "epoch": 4.53, "learning_rate": 5.473651191969888e-06, "loss": 1.8701, "step": 14430 }, { "epoch": 4.53, "learning_rate": 5.47051442910916e-06, "loss": 1.927, "step": 14440 }, { "epoch": 4.53, "learning_rate": 5.467377666248433e-06, "loss": 1.8568, "step": 14450 }, { "epoch": 4.54, "learning_rate": 5.464240903387704e-06, "loss": 1.9297, "step": 14460 }, { "epoch": 4.54, "learning_rate": 5.461104140526977e-06, "loss": 1.9036, "step": 14470 }, { "epoch": 4.54, "learning_rate": 5.457967377666249e-06, "loss": 1.8501, "step": 14480 }, { "epoch": 4.55, "learning_rate": 5.454830614805521e-06, "loss": 1.8453, "step": 14490 }, { "epoch": 4.55, "learning_rate": 5.451693851944794e-06, "loss": 1.8577, "step": 14500 }, { "epoch": 4.55, "learning_rate": 5.448557089084066e-06, "loss": 1.8766, "step": 14510 }, { "epoch": 4.55, "learning_rate": 5.445420326223339e-06, "loss": 1.9118, "step": 14520 }, { "epoch": 4.56, "learning_rate": 5.442283563362611e-06, "loss": 1.9961, "step": 14530 }, { "epoch": 4.56, "learning_rate": 5.439146800501882e-06, "loss": 1.8682, "step": 14540 }, { "epoch": 4.56, "learning_rate": 5.436010037641155e-06, "loss": 1.8594, "step": 14550 }, { "epoch": 4.57, "learning_rate": 5.432873274780427e-06, "loss": 1.861, "step": 14560 }, { "epoch": 4.57, "learning_rate": 5.4297365119197e-06, "loss": 1.9484, "step": 14570 }, { "epoch": 4.57, "learning_rate": 5.426599749058972e-06, "loss": 1.8979, "step": 14580 }, { "epoch": 4.58, "learning_rate": 5.423462986198244e-06, "loss": 1.8477, "step": 14590 }, { "epoch": 4.58, "learning_rate": 5.420326223337517e-06, "loss": 1.926, "step": 14600 }, { "epoch": 4.58, "learning_rate": 5.417189460476788e-06, "loss": 1.9283, "step": 14610 }, { "epoch": 4.59, "learning_rate": 5.41405269761606e-06, "loss": 1.8808, "step": 14620 }, { "epoch": 4.59, "learning_rate": 5.410915934755333e-06, "loss": 1.9147, "step": 14630 }, { "epoch": 4.59, "learning_rate": 5.407779171894605e-06, "loss": 1.8446, "step": 14640 }, { "epoch": 4.6, "learning_rate": 5.404642409033878e-06, "loss": 1.8838, "step": 14650 }, { "epoch": 4.6, "learning_rate": 5.40150564617315e-06, "loss": 1.9644, "step": 14660 }, { "epoch": 4.6, "learning_rate": 5.398368883312422e-06, "loss": 1.96, "step": 14670 }, { "epoch": 4.6, "learning_rate": 5.395232120451695e-06, "loss": 1.8574, "step": 14680 }, { "epoch": 4.61, "learning_rate": 5.392095357590966e-06, "loss": 1.8522, "step": 14690 }, { "epoch": 4.61, "learning_rate": 5.388958594730239e-06, "loss": 1.902, "step": 14700 }, { "epoch": 4.61, "learning_rate": 5.385821831869511e-06, "loss": 1.8718, "step": 14710 }, { "epoch": 4.62, "learning_rate": 5.382685069008783e-06, "loss": 1.8183, "step": 14720 }, { "epoch": 4.62, "learning_rate": 5.379548306148056e-06, "loss": 1.91, "step": 14730 }, { "epoch": 4.62, "learning_rate": 5.376411543287328e-06, "loss": 1.8759, "step": 14740 }, { "epoch": 4.63, "learning_rate": 5.373274780426601e-06, "loss": 1.9229, "step": 14750 }, { "epoch": 4.63, "learning_rate": 5.370138017565872e-06, "loss": 1.8429, "step": 14760 }, { "epoch": 4.63, "learning_rate": 5.367001254705144e-06, "loss": 1.9231, "step": 14770 }, { "epoch": 4.64, "learning_rate": 5.363864491844417e-06, "loss": 1.8604, "step": 14780 }, { "epoch": 4.64, "learning_rate": 5.360727728983689e-06, "loss": 1.8929, "step": 14790 }, { "epoch": 4.64, "learning_rate": 5.357590966122962e-06, "loss": 1.8052, "step": 14800 }, { "epoch": 4.65, "learning_rate": 5.354454203262234e-06, "loss": 1.9085, "step": 14810 }, { "epoch": 4.65, "learning_rate": 5.351317440401506e-06, "loss": 1.902, "step": 14820 }, { "epoch": 4.65, "learning_rate": 5.348180677540779e-06, "loss": 1.8918, "step": 14830 }, { "epoch": 4.65, "learning_rate": 5.34504391468005e-06, "loss": 1.883, "step": 14840 }, { "epoch": 4.66, "learning_rate": 5.341907151819323e-06, "loss": 1.8666, "step": 14850 }, { "epoch": 4.66, "learning_rate": 5.338770388958595e-06, "loss": 1.8527, "step": 14860 }, { "epoch": 4.66, "learning_rate": 5.335633626097867e-06, "loss": 1.8875, "step": 14870 }, { "epoch": 4.67, "learning_rate": 5.33249686323714e-06, "loss": 1.9765, "step": 14880 }, { "epoch": 4.67, "learning_rate": 5.329360100376412e-06, "loss": 1.7983, "step": 14890 }, { "epoch": 4.67, "learning_rate": 5.326223337515685e-06, "loss": 1.895, "step": 14900 }, { "epoch": 4.68, "learning_rate": 5.323086574654956e-06, "loss": 1.9591, "step": 14910 }, { "epoch": 4.68, "learning_rate": 5.319949811794228e-06, "loss": 1.9159, "step": 14920 }, { "epoch": 4.68, "learning_rate": 5.316813048933501e-06, "loss": 1.8443, "step": 14930 }, { "epoch": 4.69, "learning_rate": 5.313676286072773e-06, "loss": 1.9166, "step": 14940 }, { "epoch": 4.69, "learning_rate": 5.310539523212046e-06, "loss": 1.8883, "step": 14950 }, { "epoch": 4.69, "learning_rate": 5.307402760351318e-06, "loss": 1.9, "step": 14960 }, { "epoch": 4.7, "learning_rate": 5.30426599749059e-06, "loss": 1.7444, "step": 14970 }, { "epoch": 4.7, "learning_rate": 5.301129234629863e-06, "loss": 1.8834, "step": 14980 }, { "epoch": 4.7, "learning_rate": 5.297992471769134e-06, "loss": 1.8931, "step": 14990 }, { "epoch": 4.71, "learning_rate": 5.294855708908407e-06, "loss": 1.8867, "step": 15000 }, { "epoch": 4.71, "learning_rate": 5.291718946047679e-06, "loss": 1.8816, "step": 15010 }, { "epoch": 4.71, "learning_rate": 5.288582183186951e-06, "loss": 1.8777, "step": 15020 }, { "epoch": 4.71, "learning_rate": 5.285445420326224e-06, "loss": 1.9305, "step": 15030 }, { "epoch": 4.72, "learning_rate": 5.282308657465496e-06, "loss": 1.9274, "step": 15040 }, { "epoch": 4.72, "learning_rate": 5.279171894604769e-06, "loss": 1.8658, "step": 15050 }, { "epoch": 4.72, "learning_rate": 5.276035131744041e-06, "loss": 1.9291, "step": 15060 }, { "epoch": 4.73, "learning_rate": 5.272898368883312e-06, "loss": 1.8839, "step": 15070 }, { "epoch": 4.73, "learning_rate": 5.269761606022585e-06, "loss": 1.8327, "step": 15080 }, { "epoch": 4.73, "learning_rate": 5.266624843161857e-06, "loss": 1.8446, "step": 15090 }, { "epoch": 4.74, "learning_rate": 5.26348808030113e-06, "loss": 1.8522, "step": 15100 }, { "epoch": 4.74, "learning_rate": 5.260351317440402e-06, "loss": 1.7783, "step": 15110 }, { "epoch": 4.74, "learning_rate": 5.257214554579674e-06, "loss": 1.8502, "step": 15120 }, { "epoch": 4.75, "learning_rate": 5.254077791718947e-06, "loss": 1.8753, "step": 15130 }, { "epoch": 4.75, "learning_rate": 5.250941028858218e-06, "loss": 1.9161, "step": 15140 }, { "epoch": 4.75, "learning_rate": 5.247804265997491e-06, "loss": 1.9177, "step": 15150 }, { "epoch": 4.76, "learning_rate": 5.244667503136763e-06, "loss": 1.8746, "step": 15160 }, { "epoch": 4.76, "learning_rate": 5.241530740276035e-06, "loss": 1.9254, "step": 15170 }, { "epoch": 4.76, "learning_rate": 5.238393977415308e-06, "loss": 1.8629, "step": 15180 }, { "epoch": 4.76, "learning_rate": 5.23525721455458e-06, "loss": 1.9017, "step": 15190 }, { "epoch": 4.77, "learning_rate": 5.232120451693853e-06, "loss": 1.8562, "step": 15200 }, { "epoch": 4.77, "learning_rate": 5.228983688833125e-06, "loss": 1.9271, "step": 15210 }, { "epoch": 4.77, "learning_rate": 5.225846925972396e-06, "loss": 1.8436, "step": 15220 }, { "epoch": 4.78, "learning_rate": 5.222710163111669e-06, "loss": 1.9025, "step": 15230 }, { "epoch": 4.78, "learning_rate": 5.219573400250941e-06, "loss": 1.9184, "step": 15240 }, { "epoch": 4.78, "learning_rate": 5.216436637390214e-06, "loss": 1.9348, "step": 15250 }, { "epoch": 4.79, "learning_rate": 5.213299874529486e-06, "loss": 1.8559, "step": 15260 }, { "epoch": 4.79, "learning_rate": 5.210163111668758e-06, "loss": 1.833, "step": 15270 }, { "epoch": 4.79, "learning_rate": 5.207026348808031e-06, "loss": 1.8749, "step": 15280 }, { "epoch": 4.8, "learning_rate": 5.203889585947302e-06, "loss": 1.8664, "step": 15290 }, { "epoch": 4.8, "learning_rate": 5.200752823086576e-06, "loss": 1.9232, "step": 15300 }, { "epoch": 4.8, "learning_rate": 5.197616060225847e-06, "loss": 1.9121, "step": 15310 }, { "epoch": 4.81, "learning_rate": 5.194479297365119e-06, "loss": 1.9184, "step": 15320 }, { "epoch": 4.81, "learning_rate": 5.191342534504392e-06, "loss": 1.865, "step": 15330 }, { "epoch": 4.81, "learning_rate": 5.188205771643664e-06, "loss": 1.9039, "step": 15340 }, { "epoch": 4.81, "learning_rate": 5.185069008782937e-06, "loss": 1.8667, "step": 15350 }, { "epoch": 4.82, "learning_rate": 5.181932245922209e-06, "loss": 1.8866, "step": 15360 }, { "epoch": 4.82, "learning_rate": 5.17879548306148e-06, "loss": 1.8476, "step": 15370 }, { "epoch": 4.82, "learning_rate": 5.175658720200753e-06, "loss": 1.8192, "step": 15380 }, { "epoch": 4.83, "learning_rate": 5.172521957340025e-06, "loss": 1.8782, "step": 15390 }, { "epoch": 4.83, "learning_rate": 5.169385194479298e-06, "loss": 1.9038, "step": 15400 }, { "epoch": 4.83, "learning_rate": 5.16624843161857e-06, "loss": 1.9254, "step": 15410 }, { "epoch": 4.84, "learning_rate": 5.163111668757842e-06, "loss": 1.9146, "step": 15420 }, { "epoch": 4.84, "learning_rate": 5.159974905897115e-06, "loss": 1.8519, "step": 15430 }, { "epoch": 4.84, "learning_rate": 5.156838143036387e-06, "loss": 1.8355, "step": 15440 }, { "epoch": 4.85, "learning_rate": 5.15370138017566e-06, "loss": 1.8274, "step": 15450 }, { "epoch": 4.85, "learning_rate": 5.150564617314931e-06, "loss": 1.9244, "step": 15460 }, { "epoch": 4.85, "learning_rate": 5.147427854454203e-06, "loss": 1.912, "step": 15470 }, { "epoch": 4.86, "learning_rate": 5.144291091593476e-06, "loss": 1.9544, "step": 15480 }, { "epoch": 4.86, "learning_rate": 5.141154328732748e-06, "loss": 1.8746, "step": 15490 }, { "epoch": 4.86, "learning_rate": 5.13801756587202e-06, "loss": 1.906, "step": 15500 }, { "epoch": 4.87, "learning_rate": 5.134880803011293e-06, "loss": 1.9023, "step": 15510 }, { "epoch": 4.87, "learning_rate": 5.131744040150564e-06, "loss": 1.9178, "step": 15520 }, { "epoch": 4.87, "learning_rate": 5.128607277289837e-06, "loss": 1.8647, "step": 15530 }, { "epoch": 4.87, "learning_rate": 5.125470514429109e-06, "loss": 1.9327, "step": 15540 }, { "epoch": 4.88, "learning_rate": 5.122333751568381e-06, "loss": 1.8794, "step": 15550 }, { "epoch": 4.88, "learning_rate": 5.119196988707654e-06, "loss": 1.975, "step": 15560 }, { "epoch": 4.88, "learning_rate": 5.116060225846926e-06, "loss": 1.9128, "step": 15570 }, { "epoch": 4.89, "learning_rate": 5.112923462986199e-06, "loss": 1.9147, "step": 15580 }, { "epoch": 4.89, "learning_rate": 5.109786700125471e-06, "loss": 1.9159, "step": 15590 }, { "epoch": 4.89, "learning_rate": 5.106649937264742e-06, "loss": 1.9288, "step": 15600 }, { "epoch": 4.9, "learning_rate": 5.103513174404015e-06, "loss": 1.8773, "step": 15610 }, { "epoch": 4.9, "learning_rate": 5.100376411543287e-06, "loss": 1.8637, "step": 15620 }, { "epoch": 4.9, "learning_rate": 5.09723964868256e-06, "loss": 1.9168, "step": 15630 }, { "epoch": 4.91, "learning_rate": 5.094102885821832e-06, "loss": 1.8844, "step": 15640 }, { "epoch": 4.91, "learning_rate": 5.090966122961104e-06, "loss": 1.9208, "step": 15650 }, { "epoch": 4.91, "learning_rate": 5.087829360100377e-06, "loss": 1.8717, "step": 15660 }, { "epoch": 4.92, "learning_rate": 5.084692597239648e-06, "loss": 1.9348, "step": 15670 }, { "epoch": 4.92, "learning_rate": 5.081555834378922e-06, "loss": 1.8565, "step": 15680 }, { "epoch": 4.92, "learning_rate": 5.078419071518193e-06, "loss": 1.8659, "step": 15690 }, { "epoch": 4.92, "learning_rate": 5.075282308657465e-06, "loss": 1.8334, "step": 15700 }, { "epoch": 4.93, "learning_rate": 5.072145545796738e-06, "loss": 1.8489, "step": 15710 }, { "epoch": 4.93, "learning_rate": 5.06900878293601e-06, "loss": 1.8754, "step": 15720 }, { "epoch": 4.93, "learning_rate": 5.065872020075283e-06, "loss": 1.901, "step": 15730 }, { "epoch": 4.94, "learning_rate": 5.062735257214555e-06, "loss": 1.8595, "step": 15740 }, { "epoch": 4.94, "learning_rate": 5.0595984943538265e-06, "loss": 1.7944, "step": 15750 }, { "epoch": 4.94, "learning_rate": 5.056461731493099e-06, "loss": 1.8644, "step": 15760 }, { "epoch": 4.95, "learning_rate": 5.053324968632371e-06, "loss": 1.8543, "step": 15770 }, { "epoch": 4.95, "learning_rate": 5.050188205771644e-06, "loss": 1.8879, "step": 15780 }, { "epoch": 4.95, "learning_rate": 5.047051442910916e-06, "loss": 1.9381, "step": 15790 }, { "epoch": 4.96, "learning_rate": 5.043914680050188e-06, "loss": 1.9313, "step": 15800 }, { "epoch": 4.96, "learning_rate": 5.040777917189461e-06, "loss": 1.8585, "step": 15810 }, { "epoch": 4.96, "learning_rate": 5.037641154328733e-06, "loss": 1.9605, "step": 15820 }, { "epoch": 4.97, "learning_rate": 5.034504391468006e-06, "loss": 1.8209, "step": 15830 }, { "epoch": 4.97, "learning_rate": 5.031367628607277e-06, "loss": 1.7861, "step": 15840 }, { "epoch": 4.97, "learning_rate": 5.0282308657465495e-06, "loss": 1.8689, "step": 15850 }, { "epoch": 4.97, "learning_rate": 5.025094102885822e-06, "loss": 1.8678, "step": 15860 }, { "epoch": 4.98, "learning_rate": 5.021957340025094e-06, "loss": 1.981, "step": 15870 }, { "epoch": 4.98, "learning_rate": 5.018820577164367e-06, "loss": 1.769, "step": 15880 }, { "epoch": 4.98, "learning_rate": 5.015683814303639e-06, "loss": 1.8633, "step": 15890 }, { "epoch": 4.99, "learning_rate": 5.0125470514429105e-06, "loss": 2.0299, "step": 15900 }, { "epoch": 4.99, "learning_rate": 5.0094102885821834e-06, "loss": 1.9424, "step": 15910 }, { "epoch": 4.99, "learning_rate": 5.0062735257214555e-06, "loss": 1.9456, "step": 15920 }, { "epoch": 5.0, "learning_rate": 5.003136762860728e-06, "loss": 1.8269, "step": 15930 }, { "epoch": 5.0, "learning_rate": 5e-06, "loss": 1.8382, "step": 15940 }, { "epoch": 5.0, "eval_loss": 1.828749418258667, "eval_runtime": 13.6137, "eval_samples_per_second": 73.455, "eval_steps_per_second": 4.628, "step": 15940 }, { "epoch": 5.0, "learning_rate": 4.9968632371392725e-06, "loss": 1.9341, "step": 15950 }, { "epoch": 5.01, "learning_rate": 4.993726474278545e-06, "loss": 1.8757, "step": 15960 }, { "epoch": 5.01, "learning_rate": 4.990589711417817e-06, "loss": 1.8885, "step": 15970 }, { "epoch": 5.01, "learning_rate": 4.9874529485570894e-06, "loss": 1.806, "step": 15980 }, { "epoch": 5.02, "learning_rate": 4.9843161856963615e-06, "loss": 1.8617, "step": 15990 }, { "epoch": 5.02, "learning_rate": 4.981179422835634e-06, "loss": 1.8927, "step": 16000 }, { "epoch": 5.02, "learning_rate": 4.978042659974906e-06, "loss": 1.8202, "step": 16010 }, { "epoch": 5.03, "learning_rate": 4.9749058971141785e-06, "loss": 1.898, "step": 16020 }, { "epoch": 5.03, "learning_rate": 4.9717691342534505e-06, "loss": 1.8298, "step": 16030 }, { "epoch": 5.03, "learning_rate": 4.968632371392723e-06, "loss": 1.8598, "step": 16040 }, { "epoch": 5.03, "learning_rate": 4.9654956085319954e-06, "loss": 1.9222, "step": 16050 }, { "epoch": 5.04, "learning_rate": 4.9623588456712675e-06, "loss": 1.8679, "step": 16060 }, { "epoch": 5.04, "learning_rate": 4.9592220828105395e-06, "loss": 1.8874, "step": 16070 }, { "epoch": 5.04, "learning_rate": 4.9560853199498124e-06, "loss": 1.8677, "step": 16080 }, { "epoch": 5.05, "learning_rate": 4.9529485570890845e-06, "loss": 1.753, "step": 16090 }, { "epoch": 5.05, "learning_rate": 4.9498117942283565e-06, "loss": 1.759, "step": 16100 }, { "epoch": 5.05, "learning_rate": 4.946675031367629e-06, "loss": 1.8249, "step": 16110 }, { "epoch": 5.06, "learning_rate": 4.9435382685069015e-06, "loss": 1.9584, "step": 16120 }, { "epoch": 5.06, "learning_rate": 4.9404015056461735e-06, "loss": 1.9016, "step": 16130 }, { "epoch": 5.06, "learning_rate": 4.9372647427854455e-06, "loss": 1.7952, "step": 16140 }, { "epoch": 5.07, "learning_rate": 4.9341279799247184e-06, "loss": 1.8447, "step": 16150 }, { "epoch": 5.07, "learning_rate": 4.9309912170639905e-06, "loss": 1.8471, "step": 16160 }, { "epoch": 5.07, "learning_rate": 4.9278544542032625e-06, "loss": 1.7276, "step": 16170 }, { "epoch": 5.08, "learning_rate": 4.9247176913425346e-06, "loss": 1.7841, "step": 16180 }, { "epoch": 5.08, "learning_rate": 4.9215809284818075e-06, "loss": 1.8478, "step": 16190 }, { "epoch": 5.08, "learning_rate": 4.9184441656210795e-06, "loss": 1.9077, "step": 16200 }, { "epoch": 5.08, "learning_rate": 4.9153074027603516e-06, "loss": 1.9702, "step": 16210 }, { "epoch": 5.09, "learning_rate": 4.912170639899624e-06, "loss": 1.8798, "step": 16220 }, { "epoch": 5.09, "learning_rate": 4.9090338770388965e-06, "loss": 1.8828, "step": 16230 }, { "epoch": 5.09, "learning_rate": 4.9058971141781685e-06, "loss": 1.858, "step": 16240 }, { "epoch": 5.1, "learning_rate": 4.902760351317441e-06, "loss": 1.9143, "step": 16250 }, { "epoch": 5.1, "learning_rate": 4.899623588456713e-06, "loss": 1.845, "step": 16260 }, { "epoch": 5.1, "learning_rate": 4.8964868255959855e-06, "loss": 1.9097, "step": 16270 }, { "epoch": 5.11, "learning_rate": 4.8933500627352576e-06, "loss": 1.8443, "step": 16280 }, { "epoch": 5.11, "learning_rate": 4.89021329987453e-06, "loss": 1.8654, "step": 16290 }, { "epoch": 5.11, "learning_rate": 4.8870765370138025e-06, "loss": 1.9197, "step": 16300 }, { "epoch": 5.12, "learning_rate": 4.8839397741530745e-06, "loss": 1.8181, "step": 16310 }, { "epoch": 5.12, "learning_rate": 4.880803011292347e-06, "loss": 1.8374, "step": 16320 }, { "epoch": 5.12, "learning_rate": 4.877666248431619e-06, "loss": 1.7974, "step": 16330 }, { "epoch": 5.13, "learning_rate": 4.8745294855708915e-06, "loss": 1.8759, "step": 16340 }, { "epoch": 5.13, "learning_rate": 4.8713927227101636e-06, "loss": 1.8612, "step": 16350 }, { "epoch": 5.13, "learning_rate": 4.868255959849436e-06, "loss": 1.8389, "step": 16360 }, { "epoch": 5.13, "learning_rate": 4.865119196988708e-06, "loss": 1.8966, "step": 16370 }, { "epoch": 5.14, "learning_rate": 4.8619824341279805e-06, "loss": 1.9176, "step": 16380 }, { "epoch": 5.14, "learning_rate": 4.858845671267253e-06, "loss": 1.8387, "step": 16390 }, { "epoch": 5.14, "learning_rate": 4.8557089084065255e-06, "loss": 1.8397, "step": 16400 }, { "epoch": 5.15, "learning_rate": 4.852572145545797e-06, "loss": 1.8909, "step": 16410 }, { "epoch": 5.15, "learning_rate": 4.84943538268507e-06, "loss": 1.8981, "step": 16420 }, { "epoch": 5.15, "learning_rate": 4.846298619824342e-06, "loss": 1.8632, "step": 16430 }, { "epoch": 5.16, "learning_rate": 4.843161856963614e-06, "loss": 1.8874, "step": 16440 }, { "epoch": 5.16, "learning_rate": 4.8400250941028866e-06, "loss": 1.8557, "step": 16450 }, { "epoch": 5.16, "learning_rate": 4.836888331242159e-06, "loss": 1.8918, "step": 16460 }, { "epoch": 5.17, "learning_rate": 4.833751568381431e-06, "loss": 1.8779, "step": 16470 }, { "epoch": 5.17, "learning_rate": 4.830614805520703e-06, "loss": 1.8792, "step": 16480 }, { "epoch": 5.17, "learning_rate": 4.827478042659976e-06, "loss": 1.9252, "step": 16490 }, { "epoch": 5.18, "learning_rate": 4.824341279799248e-06, "loss": 1.7958, "step": 16500 }, { "epoch": 5.18, "learning_rate": 4.82120451693852e-06, "loss": 1.9067, "step": 16510 }, { "epoch": 5.18, "learning_rate": 4.818067754077792e-06, "loss": 1.7881, "step": 16520 }, { "epoch": 5.19, "learning_rate": 4.814930991217065e-06, "loss": 1.9106, "step": 16530 }, { "epoch": 5.19, "learning_rate": 4.811794228356337e-06, "loss": 1.8793, "step": 16540 }, { "epoch": 5.19, "learning_rate": 4.8086574654956095e-06, "loss": 1.9255, "step": 16550 }, { "epoch": 5.19, "learning_rate": 4.805520702634881e-06, "loss": 1.9354, "step": 16560 }, { "epoch": 5.2, "learning_rate": 4.802383939774154e-06, "loss": 1.9018, "step": 16570 }, { "epoch": 5.2, "learning_rate": 4.799247176913426e-06, "loss": 1.8821, "step": 16580 }, { "epoch": 5.2, "learning_rate": 4.7961104140526986e-06, "loss": 1.8658, "step": 16590 }, { "epoch": 5.21, "learning_rate": 4.792973651191971e-06, "loss": 1.8904, "step": 16600 }, { "epoch": 5.21, "learning_rate": 4.789836888331243e-06, "loss": 1.8478, "step": 16610 }, { "epoch": 5.21, "learning_rate": 4.786700125470515e-06, "loss": 1.8945, "step": 16620 }, { "epoch": 5.22, "learning_rate": 4.783563362609787e-06, "loss": 1.8028, "step": 16630 }, { "epoch": 5.22, "learning_rate": 4.78042659974906e-06, "loss": 1.8476, "step": 16640 }, { "epoch": 5.22, "learning_rate": 4.777289836888332e-06, "loss": 1.8735, "step": 16650 }, { "epoch": 5.23, "learning_rate": 4.774153074027604e-06, "loss": 1.9773, "step": 16660 }, { "epoch": 5.23, "learning_rate": 4.771016311166876e-06, "loss": 1.8794, "step": 16670 }, { "epoch": 5.23, "learning_rate": 4.767879548306149e-06, "loss": 1.8646, "step": 16680 }, { "epoch": 5.24, "learning_rate": 4.764742785445421e-06, "loss": 1.8607, "step": 16690 }, { "epoch": 5.24, "learning_rate": 4.761606022584693e-06, "loss": 1.9471, "step": 16700 }, { "epoch": 5.24, "learning_rate": 4.758469259723965e-06, "loss": 1.8419, "step": 16710 }, { "epoch": 5.24, "learning_rate": 4.755332496863238e-06, "loss": 1.8559, "step": 16720 }, { "epoch": 5.25, "learning_rate": 4.75219573400251e-06, "loss": 1.8327, "step": 16730 }, { "epoch": 5.25, "learning_rate": 4.749058971141783e-06, "loss": 1.9288, "step": 16740 }, { "epoch": 5.25, "learning_rate": 4.745922208281054e-06, "loss": 1.8511, "step": 16750 }, { "epoch": 5.26, "learning_rate": 4.742785445420327e-06, "loss": 1.9087, "step": 16760 }, { "epoch": 5.26, "learning_rate": 4.739648682559599e-06, "loss": 1.8333, "step": 16770 }, { "epoch": 5.26, "learning_rate": 4.736511919698872e-06, "loss": 1.8701, "step": 16780 }, { "epoch": 5.27, "learning_rate": 4.733375156838144e-06, "loss": 1.9376, "step": 16790 }, { "epoch": 5.27, "learning_rate": 4.730238393977416e-06, "loss": 1.848, "step": 16800 }, { "epoch": 5.27, "learning_rate": 4.727101631116688e-06, "loss": 1.7526, "step": 16810 }, { "epoch": 5.28, "learning_rate": 4.72396486825596e-06, "loss": 1.8326, "step": 16820 }, { "epoch": 5.28, "learning_rate": 4.720828105395233e-06, "loss": 1.8106, "step": 16830 }, { "epoch": 5.28, "learning_rate": 4.717691342534505e-06, "loss": 1.8723, "step": 16840 }, { "epoch": 5.29, "learning_rate": 4.714554579673777e-06, "loss": 1.8554, "step": 16850 }, { "epoch": 5.29, "learning_rate": 4.711417816813049e-06, "loss": 1.847, "step": 16860 }, { "epoch": 5.29, "learning_rate": 4.708281053952322e-06, "loss": 1.9274, "step": 16870 }, { "epoch": 5.29, "learning_rate": 4.705144291091594e-06, "loss": 1.8982, "step": 16880 }, { "epoch": 5.3, "learning_rate": 4.702007528230867e-06, "loss": 1.8529, "step": 16890 }, { "epoch": 5.3, "learning_rate": 4.698870765370138e-06, "loss": 1.853, "step": 16900 }, { "epoch": 5.3, "learning_rate": 4.695734002509411e-06, "loss": 1.8639, "step": 16910 }, { "epoch": 5.31, "learning_rate": 4.692597239648683e-06, "loss": 1.9326, "step": 16920 }, { "epoch": 5.31, "learning_rate": 4.689460476787956e-06, "loss": 1.9268, "step": 16930 }, { "epoch": 5.31, "learning_rate": 4.686323713927228e-06, "loss": 1.9351, "step": 16940 }, { "epoch": 5.32, "learning_rate": 4.6831869510665e-06, "loss": 1.9076, "step": 16950 }, { "epoch": 5.32, "learning_rate": 4.680050188205772e-06, "loss": 1.8779, "step": 16960 }, { "epoch": 5.32, "learning_rate": 4.676913425345045e-06, "loss": 1.8081, "step": 16970 }, { "epoch": 5.33, "learning_rate": 4.673776662484317e-06, "loss": 1.908, "step": 16980 }, { "epoch": 5.33, "learning_rate": 4.670639899623589e-06, "loss": 1.8247, "step": 16990 }, { "epoch": 5.33, "learning_rate": 4.667503136762861e-06, "loss": 1.8314, "step": 17000 }, { "epoch": 5.34, "learning_rate": 4.664366373902133e-06, "loss": 1.8752, "step": 17010 }, { "epoch": 5.34, "learning_rate": 4.661229611041406e-06, "loss": 1.8456, "step": 17020 }, { "epoch": 5.34, "learning_rate": 4.658092848180678e-06, "loss": 1.8795, "step": 17030 }, { "epoch": 5.35, "learning_rate": 4.654956085319951e-06, "loss": 1.8288, "step": 17040 }, { "epoch": 5.35, "learning_rate": 4.651819322459222e-06, "loss": 1.8917, "step": 17050 }, { "epoch": 5.35, "learning_rate": 4.648682559598495e-06, "loss": 1.8626, "step": 17060 }, { "epoch": 5.35, "learning_rate": 4.645545796737767e-06, "loss": 1.8603, "step": 17070 }, { "epoch": 5.36, "learning_rate": 4.64240903387704e-06, "loss": 1.8863, "step": 17080 }, { "epoch": 5.36, "learning_rate": 4.639272271016311e-06, "loss": 1.8566, "step": 17090 }, { "epoch": 5.36, "learning_rate": 4.636135508155584e-06, "loss": 1.9462, "step": 17100 }, { "epoch": 5.37, "learning_rate": 4.632998745294856e-06, "loss": 1.8992, "step": 17110 }, { "epoch": 5.37, "learning_rate": 4.629861982434129e-06, "loss": 1.8613, "step": 17120 }, { "epoch": 5.37, "learning_rate": 4.626725219573401e-06, "loss": 1.8436, "step": 17130 }, { "epoch": 5.38, "learning_rate": 4.623588456712673e-06, "loss": 1.9199, "step": 17140 }, { "epoch": 5.38, "learning_rate": 4.620451693851945e-06, "loss": 1.9195, "step": 17150 }, { "epoch": 5.38, "learning_rate": 4.617314930991217e-06, "loss": 1.8925, "step": 17160 }, { "epoch": 5.39, "learning_rate": 4.61417816813049e-06, "loss": 1.9009, "step": 17170 }, { "epoch": 5.39, "learning_rate": 4.611041405269762e-06, "loss": 1.8888, "step": 17180 }, { "epoch": 5.39, "learning_rate": 4.607904642409034e-06, "loss": 1.8742, "step": 17190 }, { "epoch": 5.4, "learning_rate": 4.604767879548306e-06, "loss": 1.8256, "step": 17200 }, { "epoch": 5.4, "learning_rate": 4.601631116687579e-06, "loss": 1.8244, "step": 17210 }, { "epoch": 5.4, "learning_rate": 4.598494353826851e-06, "loss": 1.8462, "step": 17220 }, { "epoch": 5.4, "learning_rate": 4.595357590966124e-06, "loss": 1.9451, "step": 17230 }, { "epoch": 5.41, "learning_rate": 4.592220828105395e-06, "loss": 1.8587, "step": 17240 }, { "epoch": 5.41, "learning_rate": 4.589084065244668e-06, "loss": 1.9033, "step": 17250 }, { "epoch": 5.41, "learning_rate": 4.58594730238394e-06, "loss": 1.8973, "step": 17260 }, { "epoch": 5.42, "learning_rate": 4.582810539523213e-06, "loss": 1.8485, "step": 17270 }, { "epoch": 5.42, "learning_rate": 4.579673776662485e-06, "loss": 1.8579, "step": 17280 }, { "epoch": 5.42, "learning_rate": 4.576537013801757e-06, "loss": 1.9269, "step": 17290 }, { "epoch": 5.43, "learning_rate": 4.573400250941029e-06, "loss": 1.8768, "step": 17300 }, { "epoch": 5.43, "learning_rate": 4.570263488080302e-06, "loss": 1.9166, "step": 17310 }, { "epoch": 5.43, "learning_rate": 4.567126725219574e-06, "loss": 1.871, "step": 17320 }, { "epoch": 5.44, "learning_rate": 4.563989962358846e-06, "loss": 1.8885, "step": 17330 }, { "epoch": 5.44, "learning_rate": 4.560853199498118e-06, "loss": 1.8639, "step": 17340 }, { "epoch": 5.44, "learning_rate": 4.55771643663739e-06, "loss": 1.8968, "step": 17350 }, { "epoch": 5.45, "learning_rate": 4.554579673776663e-06, "loss": 1.9012, "step": 17360 }, { "epoch": 5.45, "learning_rate": 4.551442910915935e-06, "loss": 1.8502, "step": 17370 }, { "epoch": 5.45, "learning_rate": 4.548306148055208e-06, "loss": 1.8242, "step": 17380 }, { "epoch": 5.45, "learning_rate": 4.545169385194479e-06, "loss": 1.9132, "step": 17390 }, { "epoch": 5.46, "learning_rate": 4.542032622333752e-06, "loss": 1.8413, "step": 17400 }, { "epoch": 5.46, "learning_rate": 4.538895859473024e-06, "loss": 1.9127, "step": 17410 }, { "epoch": 5.46, "learning_rate": 4.535759096612297e-06, "loss": 1.9078, "step": 17420 }, { "epoch": 5.47, "learning_rate": 4.532622333751569e-06, "loss": 1.9186, "step": 17430 }, { "epoch": 5.47, "learning_rate": 4.529485570890841e-06, "loss": 1.8833, "step": 17440 }, { "epoch": 5.47, "learning_rate": 4.526348808030113e-06, "loss": 1.8754, "step": 17450 }, { "epoch": 5.48, "learning_rate": 4.523212045169386e-06, "loss": 1.8861, "step": 17460 }, { "epoch": 5.48, "learning_rate": 4.520075282308658e-06, "loss": 1.7951, "step": 17470 }, { "epoch": 5.48, "learning_rate": 4.51693851944793e-06, "loss": 1.9778, "step": 17480 }, { "epoch": 5.49, "learning_rate": 4.513801756587202e-06, "loss": 1.8056, "step": 17490 }, { "epoch": 5.49, "learning_rate": 4.510664993726475e-06, "loss": 1.8797, "step": 17500 }, { "epoch": 5.49, "learning_rate": 4.507528230865747e-06, "loss": 1.8466, "step": 17510 }, { "epoch": 5.5, "learning_rate": 4.504391468005019e-06, "loss": 1.9108, "step": 17520 }, { "epoch": 5.5, "learning_rate": 4.501254705144291e-06, "loss": 1.9139, "step": 17530 }, { "epoch": 5.5, "learning_rate": 4.498117942283563e-06, "loss": 1.8835, "step": 17540 }, { "epoch": 5.51, "learning_rate": 4.494981179422836e-06, "loss": 1.8341, "step": 17550 }, { "epoch": 5.51, "learning_rate": 4.491844416562108e-06, "loss": 1.8345, "step": 17560 }, { "epoch": 5.51, "learning_rate": 4.488707653701381e-06, "loss": 1.8053, "step": 17570 }, { "epoch": 5.51, "learning_rate": 4.485570890840652e-06, "loss": 1.8901, "step": 17580 }, { "epoch": 5.52, "learning_rate": 4.482434127979925e-06, "loss": 1.882, "step": 17590 }, { "epoch": 5.52, "learning_rate": 4.479297365119197e-06, "loss": 1.8712, "step": 17600 }, { "epoch": 5.52, "learning_rate": 4.47616060225847e-06, "loss": 1.8634, "step": 17610 }, { "epoch": 5.53, "learning_rate": 4.473023839397742e-06, "loss": 1.8308, "step": 17620 }, { "epoch": 5.53, "learning_rate": 4.469887076537014e-06, "loss": 1.8628, "step": 17630 }, { "epoch": 5.53, "learning_rate": 4.466750313676286e-06, "loss": 1.87, "step": 17640 }, { "epoch": 5.54, "learning_rate": 4.463613550815559e-06, "loss": 1.8509, "step": 17650 }, { "epoch": 5.54, "learning_rate": 4.460476787954831e-06, "loss": 1.861, "step": 17660 }, { "epoch": 5.54, "learning_rate": 4.457340025094103e-06, "loss": 1.8706, "step": 17670 }, { "epoch": 5.55, "learning_rate": 4.454203262233375e-06, "loss": 1.9479, "step": 17680 }, { "epoch": 5.55, "learning_rate": 4.451066499372648e-06, "loss": 1.8876, "step": 17690 }, { "epoch": 5.55, "learning_rate": 4.44792973651192e-06, "loss": 1.8264, "step": 17700 }, { "epoch": 5.56, "learning_rate": 4.444792973651192e-06, "loss": 1.7968, "step": 17710 }, { "epoch": 5.56, "learning_rate": 4.441656210790465e-06, "loss": 1.8326, "step": 17720 }, { "epoch": 5.56, "learning_rate": 4.438519447929736e-06, "loss": 1.7976, "step": 17730 }, { "epoch": 5.56, "learning_rate": 4.435382685069009e-06, "loss": 1.8688, "step": 17740 }, { "epoch": 5.57, "learning_rate": 4.432245922208281e-06, "loss": 1.8763, "step": 17750 }, { "epoch": 5.57, "learning_rate": 4.429109159347554e-06, "loss": 1.872, "step": 17760 }, { "epoch": 5.57, "learning_rate": 4.425972396486826e-06, "loss": 1.8453, "step": 17770 }, { "epoch": 5.58, "learning_rate": 4.422835633626098e-06, "loss": 1.911, "step": 17780 }, { "epoch": 5.58, "learning_rate": 4.41969887076537e-06, "loss": 1.831, "step": 17790 }, { "epoch": 5.58, "learning_rate": 4.416562107904643e-06, "loss": 1.8414, "step": 17800 }, { "epoch": 5.59, "learning_rate": 4.413425345043915e-06, "loss": 1.8333, "step": 17810 }, { "epoch": 5.59, "learning_rate": 4.410288582183187e-06, "loss": 1.8801, "step": 17820 }, { "epoch": 5.59, "learning_rate": 4.407151819322459e-06, "loss": 1.9353, "step": 17830 }, { "epoch": 5.6, "learning_rate": 4.404015056461732e-06, "loss": 1.8287, "step": 17840 }, { "epoch": 5.6, "learning_rate": 4.400878293601004e-06, "loss": 1.8044, "step": 17850 }, { "epoch": 5.6, "learning_rate": 4.397741530740276e-06, "loss": 1.8164, "step": 17860 }, { "epoch": 5.61, "learning_rate": 4.394604767879549e-06, "loss": 1.8935, "step": 17870 }, { "epoch": 5.61, "learning_rate": 4.391468005018821e-06, "loss": 1.8232, "step": 17880 }, { "epoch": 5.61, "learning_rate": 4.388331242158093e-06, "loss": 1.8721, "step": 17890 }, { "epoch": 5.61, "learning_rate": 4.385194479297365e-06, "loss": 1.9139, "step": 17900 }, { "epoch": 5.62, "learning_rate": 4.382057716436638e-06, "loss": 1.9591, "step": 17910 }, { "epoch": 5.62, "learning_rate": 4.37892095357591e-06, "loss": 1.7527, "step": 17920 }, { "epoch": 5.62, "learning_rate": 4.375784190715182e-06, "loss": 1.841, "step": 17930 }, { "epoch": 5.63, "learning_rate": 4.372647427854454e-06, "loss": 1.7895, "step": 17940 }, { "epoch": 5.63, "learning_rate": 4.369510664993727e-06, "loss": 1.9204, "step": 17950 }, { "epoch": 5.63, "learning_rate": 4.366373902132999e-06, "loss": 1.8384, "step": 17960 }, { "epoch": 5.64, "learning_rate": 4.363237139272271e-06, "loss": 1.8726, "step": 17970 }, { "epoch": 5.64, "learning_rate": 4.360100376411543e-06, "loss": 1.9236, "step": 17980 }, { "epoch": 5.64, "learning_rate": 4.356963613550816e-06, "loss": 1.867, "step": 17990 }, { "epoch": 5.65, "learning_rate": 4.353826850690088e-06, "loss": 1.8962, "step": 18000 }, { "epoch": 5.65, "learning_rate": 4.35069008782936e-06, "loss": 1.8803, "step": 18010 }, { "epoch": 5.65, "learning_rate": 4.347553324968632e-06, "loss": 1.9137, "step": 18020 }, { "epoch": 5.66, "learning_rate": 4.344416562107905e-06, "loss": 1.8177, "step": 18030 }, { "epoch": 5.66, "learning_rate": 4.341279799247177e-06, "loss": 1.923, "step": 18040 }, { "epoch": 5.66, "learning_rate": 4.338143036386449e-06, "loss": 1.8752, "step": 18050 }, { "epoch": 5.66, "learning_rate": 4.335006273525722e-06, "loss": 1.7683, "step": 18060 }, { "epoch": 5.67, "learning_rate": 4.331869510664994e-06, "loss": 1.8277, "step": 18070 }, { "epoch": 5.67, "learning_rate": 4.328732747804266e-06, "loss": 1.8932, "step": 18080 }, { "epoch": 5.67, "learning_rate": 4.325595984943538e-06, "loss": 1.8611, "step": 18090 }, { "epoch": 5.68, "learning_rate": 4.322459222082811e-06, "loss": 1.9092, "step": 18100 }, { "epoch": 5.68, "learning_rate": 4.319322459222083e-06, "loss": 1.8887, "step": 18110 }, { "epoch": 5.68, "learning_rate": 4.316185696361355e-06, "loss": 1.8339, "step": 18120 }, { "epoch": 5.69, "learning_rate": 4.313048933500627e-06, "loss": 1.8809, "step": 18130 }, { "epoch": 5.69, "learning_rate": 4.3099121706399e-06, "loss": 1.838, "step": 18140 }, { "epoch": 5.69, "learning_rate": 4.306775407779172e-06, "loss": 1.9172, "step": 18150 }, { "epoch": 5.7, "learning_rate": 4.303638644918444e-06, "loss": 1.8116, "step": 18160 }, { "epoch": 5.7, "learning_rate": 4.300501882057716e-06, "loss": 1.9088, "step": 18170 }, { "epoch": 5.7, "learning_rate": 4.297365119196989e-06, "loss": 1.8633, "step": 18180 }, { "epoch": 5.71, "learning_rate": 4.294228356336261e-06, "loss": 1.8457, "step": 18190 }, { "epoch": 5.71, "learning_rate": 4.291091593475533e-06, "loss": 1.8625, "step": 18200 }, { "epoch": 5.71, "learning_rate": 4.287954830614806e-06, "loss": 1.895, "step": 18210 }, { "epoch": 5.72, "learning_rate": 4.284818067754078e-06, "loss": 1.8002, "step": 18220 }, { "epoch": 5.72, "learning_rate": 4.28168130489335e-06, "loss": 1.8445, "step": 18230 }, { "epoch": 5.72, "learning_rate": 4.278544542032622e-06, "loss": 1.8905, "step": 18240 }, { "epoch": 5.72, "learning_rate": 4.275407779171895e-06, "loss": 1.9096, "step": 18250 }, { "epoch": 5.73, "learning_rate": 4.272271016311167e-06, "loss": 1.9088, "step": 18260 }, { "epoch": 5.73, "learning_rate": 4.269134253450439e-06, "loss": 1.9363, "step": 18270 }, { "epoch": 5.73, "learning_rate": 4.2659974905897114e-06, "loss": 1.8302, "step": 18280 }, { "epoch": 5.74, "learning_rate": 4.262860727728984e-06, "loss": 1.9126, "step": 18290 }, { "epoch": 5.74, "learning_rate": 4.259723964868256e-06, "loss": 1.8292, "step": 18300 }, { "epoch": 5.74, "learning_rate": 4.256587202007528e-06, "loss": 1.8952, "step": 18310 }, { "epoch": 5.75, "learning_rate": 4.2534504391468005e-06, "loss": 1.8559, "step": 18320 }, { "epoch": 5.75, "learning_rate": 4.250313676286073e-06, "loss": 1.8406, "step": 18330 }, { "epoch": 5.75, "learning_rate": 4.247176913425345e-06, "loss": 1.8789, "step": 18340 }, { "epoch": 5.76, "learning_rate": 4.2440401505646174e-06, "loss": 1.9105, "step": 18350 }, { "epoch": 5.76, "learning_rate": 4.24090338770389e-06, "loss": 1.8775, "step": 18360 }, { "epoch": 5.76, "learning_rate": 4.237766624843162e-06, "loss": 1.8922, "step": 18370 }, { "epoch": 5.77, "learning_rate": 4.2346298619824344e-06, "loss": 1.8724, "step": 18380 }, { "epoch": 5.77, "learning_rate": 4.2314930991217065e-06, "loss": 1.8211, "step": 18390 }, { "epoch": 5.77, "learning_rate": 4.228356336260979e-06, "loss": 1.8813, "step": 18400 }, { "epoch": 5.77, "learning_rate": 4.225219573400251e-06, "loss": 1.8587, "step": 18410 }, { "epoch": 5.78, "learning_rate": 4.2220828105395235e-06, "loss": 1.8624, "step": 18420 }, { "epoch": 5.78, "learning_rate": 4.2189460476787955e-06, "loss": 1.8503, "step": 18430 }, { "epoch": 5.78, "learning_rate": 4.215809284818068e-06, "loss": 1.8128, "step": 18440 }, { "epoch": 5.79, "learning_rate": 4.2126725219573404e-06, "loss": 1.8921, "step": 18450 }, { "epoch": 5.79, "learning_rate": 4.2095357590966125e-06, "loss": 1.8751, "step": 18460 }, { "epoch": 5.79, "learning_rate": 4.2063989962358845e-06, "loss": 1.8635, "step": 18470 }, { "epoch": 5.8, "learning_rate": 4.203262233375157e-06, "loss": 1.8687, "step": 18480 }, { "epoch": 5.8, "learning_rate": 4.2001254705144295e-06, "loss": 1.9078, "step": 18490 }, { "epoch": 5.8, "learning_rate": 4.1969887076537015e-06, "loss": 1.8334, "step": 18500 }, { "epoch": 5.81, "learning_rate": 4.1938519447929736e-06, "loss": 1.8734, "step": 18510 }, { "epoch": 5.81, "learning_rate": 4.1907151819322464e-06, "loss": 1.9317, "step": 18520 }, { "epoch": 5.81, "learning_rate": 4.1875784190715185e-06, "loss": 1.8736, "step": 18530 }, { "epoch": 5.82, "learning_rate": 4.1844416562107905e-06, "loss": 1.9142, "step": 18540 }, { "epoch": 5.82, "learning_rate": 4.1813048933500634e-06, "loss": 1.8304, "step": 18550 }, { "epoch": 5.82, "learning_rate": 4.1781681304893355e-06, "loss": 1.8861, "step": 18560 }, { "epoch": 5.82, "learning_rate": 4.1750313676286075e-06, "loss": 1.8007, "step": 18570 }, { "epoch": 5.83, "learning_rate": 4.1718946047678796e-06, "loss": 1.9076, "step": 18580 }, { "epoch": 5.83, "learning_rate": 4.1687578419071525e-06, "loss": 1.7931, "step": 18590 }, { "epoch": 5.83, "learning_rate": 4.1656210790464245e-06, "loss": 1.9116, "step": 18600 }, { "epoch": 5.84, "learning_rate": 4.1624843161856965e-06, "loss": 1.8149, "step": 18610 }, { "epoch": 5.84, "learning_rate": 4.159347553324969e-06, "loss": 1.8394, "step": 18620 }, { "epoch": 5.84, "learning_rate": 4.1562107904642415e-06, "loss": 1.8453, "step": 18630 }, { "epoch": 5.85, "learning_rate": 4.1530740276035135e-06, "loss": 1.9149, "step": 18640 }, { "epoch": 5.85, "learning_rate": 4.149937264742786e-06, "loss": 1.865, "step": 18650 }, { "epoch": 5.85, "learning_rate": 4.146800501882058e-06, "loss": 1.9112, "step": 18660 }, { "epoch": 5.86, "learning_rate": 4.1436637390213305e-06, "loss": 1.8812, "step": 18670 }, { "epoch": 5.86, "learning_rate": 4.1405269761606026e-06, "loss": 1.8773, "step": 18680 }, { "epoch": 5.86, "learning_rate": 4.137390213299875e-06, "loss": 1.8407, "step": 18690 }, { "epoch": 5.87, "learning_rate": 4.1342534504391475e-06, "loss": 1.9239, "step": 18700 }, { "epoch": 5.87, "learning_rate": 4.1311166875784195e-06, "loss": 1.8716, "step": 18710 }, { "epoch": 5.87, "learning_rate": 4.127979924717692e-06, "loss": 1.9464, "step": 18720 }, { "epoch": 5.88, "learning_rate": 4.124843161856964e-06, "loss": 1.7955, "step": 18730 }, { "epoch": 5.88, "learning_rate": 4.1217063989962365e-06, "loss": 1.8431, "step": 18740 }, { "epoch": 5.88, "learning_rate": 4.1185696361355086e-06, "loss": 1.8306, "step": 18750 }, { "epoch": 5.88, "learning_rate": 4.115432873274781e-06, "loss": 1.8927, "step": 18760 }, { "epoch": 5.89, "learning_rate": 4.112296110414053e-06, "loss": 1.918, "step": 18770 }, { "epoch": 5.89, "learning_rate": 4.1091593475533255e-06, "loss": 1.8618, "step": 18780 }, { "epoch": 5.89, "learning_rate": 4.106022584692598e-06, "loss": 1.9345, "step": 18790 }, { "epoch": 5.9, "learning_rate": 4.1028858218318705e-06, "loss": 1.9022, "step": 18800 }, { "epoch": 5.9, "learning_rate": 4.099749058971142e-06, "loss": 1.8736, "step": 18810 }, { "epoch": 5.9, "learning_rate": 4.0966122961104146e-06, "loss": 1.8973, "step": 18820 }, { "epoch": 5.91, "learning_rate": 4.093475533249687e-06, "loss": 1.8944, "step": 18830 }, { "epoch": 5.91, "learning_rate": 4.0903387703889595e-06, "loss": 1.9158, "step": 18840 }, { "epoch": 5.91, "learning_rate": 4.087202007528231e-06, "loss": 1.8534, "step": 18850 }, { "epoch": 5.92, "learning_rate": 4.084065244667504e-06, "loss": 1.8687, "step": 18860 }, { "epoch": 5.92, "learning_rate": 4.080928481806776e-06, "loss": 1.9409, "step": 18870 }, { "epoch": 5.92, "learning_rate": 4.077791718946048e-06, "loss": 1.8839, "step": 18880 }, { "epoch": 5.93, "learning_rate": 4.0746549560853206e-06, "loss": 1.8978, "step": 18890 }, { "epoch": 5.93, "learning_rate": 4.071518193224593e-06, "loss": 1.8626, "step": 18900 }, { "epoch": 5.93, "learning_rate": 4.068381430363865e-06, "loss": 1.8504, "step": 18910 }, { "epoch": 5.93, "learning_rate": 4.065244667503137e-06, "loss": 1.8749, "step": 18920 }, { "epoch": 5.94, "learning_rate": 4.06210790464241e-06, "loss": 1.8506, "step": 18930 }, { "epoch": 5.94, "learning_rate": 4.058971141781682e-06, "loss": 1.931, "step": 18940 }, { "epoch": 5.94, "learning_rate": 4.055834378920954e-06, "loss": 1.8148, "step": 18950 }, { "epoch": 5.95, "learning_rate": 4.052697616060226e-06, "loss": 1.8824, "step": 18960 }, { "epoch": 5.95, "learning_rate": 4.049560853199499e-06, "loss": 1.8675, "step": 18970 }, { "epoch": 5.95, "learning_rate": 4.046424090338771e-06, "loss": 1.8647, "step": 18980 }, { "epoch": 5.96, "learning_rate": 4.0432873274780436e-06, "loss": 1.8516, "step": 18990 }, { "epoch": 5.96, "learning_rate": 4.040150564617315e-06, "loss": 1.8243, "step": 19000 }, { "epoch": 5.96, "learning_rate": 4.037013801756588e-06, "loss": 1.8079, "step": 19010 }, { "epoch": 5.97, "learning_rate": 4.03387703889586e-06, "loss": 1.9742, "step": 19020 }, { "epoch": 5.97, "learning_rate": 4.030740276035133e-06, "loss": 1.861, "step": 19030 }, { "epoch": 5.97, "learning_rate": 4.027603513174405e-06, "loss": 1.8048, "step": 19040 }, { "epoch": 5.98, "learning_rate": 4.024466750313677e-06, "loss": 1.825, "step": 19050 }, { "epoch": 5.98, "learning_rate": 4.021329987452949e-06, "loss": 1.903, "step": 19060 }, { "epoch": 5.98, "learning_rate": 4.018193224592221e-06, "loss": 1.8486, "step": 19070 }, { "epoch": 5.98, "learning_rate": 4.015056461731494e-06, "loss": 1.887, "step": 19080 }, { "epoch": 5.99, "learning_rate": 4.011919698870766e-06, "loss": 1.888, "step": 19090 }, { "epoch": 5.99, "learning_rate": 4.008782936010038e-06, "loss": 1.8812, "step": 19100 }, { "epoch": 5.99, "learning_rate": 4.00564617314931e-06, "loss": 1.8965, "step": 19110 }, { "epoch": 6.0, "learning_rate": 4.002509410288583e-06, "loss": 1.8517, "step": 19120 }, { "epoch": 6.0, "eval_loss": 1.820218563079834, "eval_runtime": 13.6104, "eval_samples_per_second": 73.473, "eval_steps_per_second": 4.629, "step": 19128 }, { "epoch": 6.0, "learning_rate": 3.999372647427855e-06, "loss": 1.8515, "step": 19130 }, { "epoch": 6.0, "learning_rate": 3.996235884567128e-06, "loss": 1.8202, "step": 19140 }, { "epoch": 6.01, "learning_rate": 3.993099121706399e-06, "loss": 1.8573, "step": 19150 }, { "epoch": 6.01, "learning_rate": 3.989962358845672e-06, "loss": 1.8828, "step": 19160 }, { "epoch": 6.01, "learning_rate": 3.986825595984944e-06, "loss": 1.872, "step": 19170 }, { "epoch": 6.02, "learning_rate": 3.983688833124217e-06, "loss": 1.8282, "step": 19180 }, { "epoch": 6.02, "learning_rate": 3.980552070263489e-06, "loss": 1.8519, "step": 19190 }, { "epoch": 6.02, "learning_rate": 3.977415307402761e-06, "loss": 1.9018, "step": 19200 }, { "epoch": 6.03, "learning_rate": 3.974278544542033e-06, "loss": 1.8212, "step": 19210 }, { "epoch": 6.03, "learning_rate": 3.971141781681306e-06, "loss": 1.8703, "step": 19220 }, { "epoch": 6.03, "learning_rate": 3.968005018820578e-06, "loss": 1.9376, "step": 19230 }, { "epoch": 6.04, "learning_rate": 3.96486825595985e-06, "loss": 1.7754, "step": 19240 }, { "epoch": 6.04, "learning_rate": 3.961731493099122e-06, "loss": 1.7899, "step": 19250 }, { "epoch": 6.04, "learning_rate": 3.958594730238394e-06, "loss": 1.8671, "step": 19260 }, { "epoch": 6.04, "learning_rate": 3.955457967377667e-06, "loss": 1.7892, "step": 19270 }, { "epoch": 6.05, "learning_rate": 3.952321204516939e-06, "loss": 1.8795, "step": 19280 }, { "epoch": 6.05, "learning_rate": 3.949184441656211e-06, "loss": 1.8955, "step": 19290 }, { "epoch": 6.05, "learning_rate": 3.946047678795483e-06, "loss": 1.8663, "step": 19300 }, { "epoch": 6.06, "learning_rate": 3.942910915934756e-06, "loss": 1.8066, "step": 19310 }, { "epoch": 6.06, "learning_rate": 3.939774153074028e-06, "loss": 1.8993, "step": 19320 }, { "epoch": 6.06, "learning_rate": 3.936637390213301e-06, "loss": 1.9433, "step": 19330 }, { "epoch": 6.07, "learning_rate": 3.933500627352572e-06, "loss": 1.8686, "step": 19340 }, { "epoch": 6.07, "learning_rate": 3.930363864491845e-06, "loss": 1.8731, "step": 19350 }, { "epoch": 6.07, "learning_rate": 3.927227101631117e-06, "loss": 1.8142, "step": 19360 }, { "epoch": 6.08, "learning_rate": 3.92409033877039e-06, "loss": 1.8284, "step": 19370 }, { "epoch": 6.08, "learning_rate": 3.920953575909662e-06, "loss": 1.8389, "step": 19380 }, { "epoch": 6.08, "learning_rate": 3.917816813048934e-06, "loss": 1.8045, "step": 19390 }, { "epoch": 6.09, "learning_rate": 3.914680050188206e-06, "loss": 1.9117, "step": 19400 }, { "epoch": 6.09, "learning_rate": 3.911543287327478e-06, "loss": 1.7794, "step": 19410 }, { "epoch": 6.09, "learning_rate": 3.908406524466751e-06, "loss": 1.7901, "step": 19420 }, { "epoch": 6.09, "learning_rate": 3.905269761606023e-06, "loss": 1.8203, "step": 19430 }, { "epoch": 6.1, "learning_rate": 3.902132998745295e-06, "loss": 1.8181, "step": 19440 }, { "epoch": 6.1, "learning_rate": 3.898996235884567e-06, "loss": 1.8661, "step": 19450 }, { "epoch": 6.1, "learning_rate": 3.89585947302384e-06, "loss": 1.838, "step": 19460 }, { "epoch": 6.11, "learning_rate": 3.892722710163112e-06, "loss": 1.8502, "step": 19470 }, { "epoch": 6.11, "learning_rate": 3.889585947302385e-06, "loss": 1.8204, "step": 19480 }, { "epoch": 6.11, "learning_rate": 3.886449184441656e-06, "loss": 1.8692, "step": 19490 }, { "epoch": 6.12, "learning_rate": 3.883312421580929e-06, "loss": 1.8647, "step": 19500 }, { "epoch": 6.12, "learning_rate": 3.880175658720201e-06, "loss": 1.8123, "step": 19510 }, { "epoch": 6.12, "learning_rate": 3.877038895859474e-06, "loss": 1.832, "step": 19520 }, { "epoch": 6.13, "learning_rate": 3.873902132998746e-06, "loss": 1.8847, "step": 19530 }, { "epoch": 6.13, "learning_rate": 3.870765370138018e-06, "loss": 1.8446, "step": 19540 }, { "epoch": 6.13, "learning_rate": 3.86762860727729e-06, "loss": 1.8965, "step": 19550 }, { "epoch": 6.14, "learning_rate": 3.864491844416563e-06, "loss": 1.873, "step": 19560 }, { "epoch": 6.14, "learning_rate": 3.861355081555835e-06, "loss": 1.8918, "step": 19570 }, { "epoch": 6.14, "learning_rate": 3.858218318695107e-06, "loss": 1.8685, "step": 19580 }, { "epoch": 6.14, "learning_rate": 3.855081555834379e-06, "loss": 1.794, "step": 19590 }, { "epoch": 6.15, "learning_rate": 3.851944792973651e-06, "loss": 1.8445, "step": 19600 }, { "epoch": 6.15, "learning_rate": 3.848808030112924e-06, "loss": 1.906, "step": 19610 }, { "epoch": 6.15, "learning_rate": 3.845671267252196e-06, "loss": 1.8826, "step": 19620 }, { "epoch": 6.16, "learning_rate": 3.842534504391469e-06, "loss": 1.7995, "step": 19630 }, { "epoch": 6.16, "learning_rate": 3.83939774153074e-06, "loss": 1.8395, "step": 19640 }, { "epoch": 6.16, "learning_rate": 3.836260978670013e-06, "loss": 1.7938, "step": 19650 }, { "epoch": 6.17, "learning_rate": 3.833124215809285e-06, "loss": 1.8405, "step": 19660 }, { "epoch": 6.17, "learning_rate": 3.829987452948558e-06, "loss": 1.8978, "step": 19670 }, { "epoch": 6.17, "learning_rate": 3.82685069008783e-06, "loss": 1.8358, "step": 19680 }, { "epoch": 6.18, "learning_rate": 3.823713927227102e-06, "loss": 1.8231, "step": 19690 }, { "epoch": 6.18, "learning_rate": 3.820577164366374e-06, "loss": 1.8856, "step": 19700 }, { "epoch": 6.18, "learning_rate": 3.817440401505647e-06, "loss": 1.833, "step": 19710 }, { "epoch": 6.19, "learning_rate": 3.814303638644919e-06, "loss": 1.7791, "step": 19720 }, { "epoch": 6.19, "learning_rate": 3.8111668757841906e-06, "loss": 1.8833, "step": 19730 }, { "epoch": 6.19, "learning_rate": 3.808030112923463e-06, "loss": 1.8569, "step": 19740 }, { "epoch": 6.2, "learning_rate": 3.8048933500627355e-06, "loss": 1.8558, "step": 19750 }, { "epoch": 6.2, "learning_rate": 3.801756587202008e-06, "loss": 1.7897, "step": 19760 }, { "epoch": 6.2, "learning_rate": 3.7986198243412804e-06, "loss": 1.7784, "step": 19770 }, { "epoch": 6.2, "learning_rate": 3.795483061480552e-06, "loss": 1.8789, "step": 19780 }, { "epoch": 6.21, "learning_rate": 3.7923462986198245e-06, "loss": 1.8205, "step": 19790 }, { "epoch": 6.21, "learning_rate": 3.789209535759097e-06, "loss": 1.7749, "step": 19800 }, { "epoch": 6.21, "learning_rate": 3.7860727728983695e-06, "loss": 1.896, "step": 19810 }, { "epoch": 6.22, "learning_rate": 3.7829360100376415e-06, "loss": 1.8102, "step": 19820 }, { "epoch": 6.22, "learning_rate": 3.7797992471769136e-06, "loss": 1.8213, "step": 19830 }, { "epoch": 6.22, "learning_rate": 3.776662484316186e-06, "loss": 1.8603, "step": 19840 }, { "epoch": 6.23, "learning_rate": 3.773525721455458e-06, "loss": 1.8553, "step": 19850 }, { "epoch": 6.23, "learning_rate": 3.7703889585947305e-06, "loss": 1.8214, "step": 19860 }, { "epoch": 6.23, "learning_rate": 3.767252195734003e-06, "loss": 1.8503, "step": 19870 }, { "epoch": 6.24, "learning_rate": 3.764115432873275e-06, "loss": 1.8402, "step": 19880 }, { "epoch": 6.24, "learning_rate": 3.760978670012547e-06, "loss": 1.8608, "step": 19890 }, { "epoch": 6.24, "learning_rate": 3.7578419071518196e-06, "loss": 1.8155, "step": 19900 }, { "epoch": 6.25, "learning_rate": 3.754705144291092e-06, "loss": 1.8343, "step": 19910 }, { "epoch": 6.25, "learning_rate": 3.7515683814303645e-06, "loss": 1.799, "step": 19920 }, { "epoch": 6.25, "learning_rate": 3.748431618569636e-06, "loss": 1.8569, "step": 19930 }, { "epoch": 6.25, "learning_rate": 3.7452948557089086e-06, "loss": 1.866, "step": 19940 }, { "epoch": 6.26, "learning_rate": 3.742158092848181e-06, "loss": 1.8655, "step": 19950 }, { "epoch": 6.26, "learning_rate": 3.7390213299874535e-06, "loss": 1.8749, "step": 19960 }, { "epoch": 6.26, "learning_rate": 3.7358845671267256e-06, "loss": 1.8648, "step": 19970 }, { "epoch": 6.27, "learning_rate": 3.7327478042659976e-06, "loss": 1.8451, "step": 19980 }, { "epoch": 6.27, "learning_rate": 3.72961104140527e-06, "loss": 1.851, "step": 19990 }, { "epoch": 6.27, "learning_rate": 3.7264742785445425e-06, "loss": 1.8748, "step": 20000 }, { "epoch": 6.28, "learning_rate": 3.7233375156838146e-06, "loss": 1.8593, "step": 20010 }, { "epoch": 6.28, "learning_rate": 3.720200752823087e-06, "loss": 1.8805, "step": 20020 }, { "epoch": 6.28, "learning_rate": 3.717063989962359e-06, "loss": 1.8956, "step": 20030 }, { "epoch": 6.29, "learning_rate": 3.713927227101631e-06, "loss": 1.7666, "step": 20040 }, { "epoch": 6.29, "learning_rate": 3.7107904642409036e-06, "loss": 1.8467, "step": 20050 }, { "epoch": 6.29, "learning_rate": 3.707653701380176e-06, "loss": 1.8006, "step": 20060 }, { "epoch": 6.3, "learning_rate": 3.7045169385194486e-06, "loss": 1.857, "step": 20070 }, { "epoch": 6.3, "learning_rate": 3.70138017565872e-06, "loss": 1.8936, "step": 20080 }, { "epoch": 6.3, "learning_rate": 3.6982434127979926e-06, "loss": 1.878, "step": 20090 }, { "epoch": 6.3, "learning_rate": 3.695106649937265e-06, "loss": 1.8468, "step": 20100 }, { "epoch": 6.31, "learning_rate": 3.6919698870765376e-06, "loss": 1.8623, "step": 20110 }, { "epoch": 6.31, "learning_rate": 3.68883312421581e-06, "loss": 1.8862, "step": 20120 }, { "epoch": 6.31, "learning_rate": 3.6856963613550817e-06, "loss": 1.919, "step": 20130 }, { "epoch": 6.32, "learning_rate": 3.682559598494354e-06, "loss": 1.8015, "step": 20140 }, { "epoch": 6.32, "learning_rate": 3.6794228356336266e-06, "loss": 1.9012, "step": 20150 }, { "epoch": 6.32, "learning_rate": 3.6762860727728987e-06, "loss": 1.7934, "step": 20160 }, { "epoch": 6.33, "learning_rate": 3.6731493099121707e-06, "loss": 1.8981, "step": 20170 }, { "epoch": 6.33, "learning_rate": 3.670012547051443e-06, "loss": 1.829, "step": 20180 }, { "epoch": 6.33, "learning_rate": 3.6668757841907156e-06, "loss": 1.881, "step": 20190 }, { "epoch": 6.34, "learning_rate": 3.6637390213299877e-06, "loss": 1.8448, "step": 20200 }, { "epoch": 6.34, "learning_rate": 3.66060225846926e-06, "loss": 1.9271, "step": 20210 }, { "epoch": 6.34, "learning_rate": 3.657465495608532e-06, "loss": 1.8187, "step": 20220 }, { "epoch": 6.35, "learning_rate": 3.6543287327478042e-06, "loss": 1.9268, "step": 20230 }, { "epoch": 6.35, "learning_rate": 3.6511919698870767e-06, "loss": 1.8618, "step": 20240 }, { "epoch": 6.35, "learning_rate": 3.648055207026349e-06, "loss": 1.9267, "step": 20250 }, { "epoch": 6.36, "learning_rate": 3.6449184441656216e-06, "loss": 1.8365, "step": 20260 }, { "epoch": 6.36, "learning_rate": 3.6417816813048933e-06, "loss": 1.872, "step": 20270 }, { "epoch": 6.36, "learning_rate": 3.6386449184441657e-06, "loss": 1.9315, "step": 20280 }, { "epoch": 6.36, "learning_rate": 3.635508155583438e-06, "loss": 1.8915, "step": 20290 }, { "epoch": 6.37, "learning_rate": 3.6323713927227107e-06, "loss": 1.8635, "step": 20300 }, { "epoch": 6.37, "learning_rate": 3.629234629861983e-06, "loss": 1.8777, "step": 20310 }, { "epoch": 6.37, "learning_rate": 3.6260978670012548e-06, "loss": 1.9139, "step": 20320 }, { "epoch": 6.38, "learning_rate": 3.6229611041405272e-06, "loss": 1.7894, "step": 20330 }, { "epoch": 6.38, "learning_rate": 3.6198243412797997e-06, "loss": 1.8212, "step": 20340 }, { "epoch": 6.38, "learning_rate": 3.6166875784190717e-06, "loss": 1.8739, "step": 20350 }, { "epoch": 6.39, "learning_rate": 3.6135508155583442e-06, "loss": 1.8713, "step": 20360 }, { "epoch": 6.39, "learning_rate": 3.6104140526976163e-06, "loss": 1.9083, "step": 20370 }, { "epoch": 6.39, "learning_rate": 3.6072772898368887e-06, "loss": 1.8237, "step": 20380 }, { "epoch": 6.4, "learning_rate": 3.6041405269761608e-06, "loss": 1.8525, "step": 20390 }, { "epoch": 6.4, "learning_rate": 3.6010037641154332e-06, "loss": 1.9006, "step": 20400 }, { "epoch": 6.4, "learning_rate": 3.5978670012547057e-06, "loss": 1.7635, "step": 20410 }, { "epoch": 6.41, "learning_rate": 3.5947302383939773e-06, "loss": 1.8394, "step": 20420 }, { "epoch": 6.41, "learning_rate": 3.59159347553325e-06, "loss": 1.8701, "step": 20430 }, { "epoch": 6.41, "learning_rate": 3.5884567126725223e-06, "loss": 1.8752, "step": 20440 }, { "epoch": 6.41, "learning_rate": 3.5853199498117947e-06, "loss": 1.8211, "step": 20450 }, { "epoch": 6.42, "learning_rate": 3.582183186951067e-06, "loss": 1.8772, "step": 20460 }, { "epoch": 6.42, "learning_rate": 3.579046424090339e-06, "loss": 1.849, "step": 20470 }, { "epoch": 6.42, "learning_rate": 3.5759096612296113e-06, "loss": 1.8903, "step": 20480 }, { "epoch": 6.43, "learning_rate": 3.5727728983688838e-06, "loss": 1.8642, "step": 20490 }, { "epoch": 6.43, "learning_rate": 3.5696361355081562e-06, "loss": 1.9196, "step": 20500 }, { "epoch": 6.43, "learning_rate": 3.5664993726474283e-06, "loss": 1.7683, "step": 20510 }, { "epoch": 6.44, "learning_rate": 3.5633626097867003e-06, "loss": 1.8828, "step": 20520 }, { "epoch": 6.44, "learning_rate": 3.560225846925973e-06, "loss": 1.828, "step": 20530 }, { "epoch": 6.44, "learning_rate": 3.557089084065245e-06, "loss": 1.8525, "step": 20540 }, { "epoch": 6.45, "learning_rate": 3.5539523212045173e-06, "loss": 1.8449, "step": 20550 }, { "epoch": 6.45, "learning_rate": 3.5508155583437898e-06, "loss": 1.8893, "step": 20560 }, { "epoch": 6.45, "learning_rate": 3.5476787954830614e-06, "loss": 1.854, "step": 20570 }, { "epoch": 6.46, "learning_rate": 3.544542032622334e-06, "loss": 1.8377, "step": 20580 }, { "epoch": 6.46, "learning_rate": 3.5414052697616063e-06, "loss": 1.813, "step": 20590 }, { "epoch": 6.46, "learning_rate": 3.538268506900879e-06, "loss": 1.8363, "step": 20600 }, { "epoch": 6.46, "learning_rate": 3.5351317440401504e-06, "loss": 1.8302, "step": 20610 }, { "epoch": 6.47, "learning_rate": 3.531994981179423e-06, "loss": 1.9071, "step": 20620 }, { "epoch": 6.47, "learning_rate": 3.5288582183186954e-06, "loss": 1.8615, "step": 20630 }, { "epoch": 6.47, "learning_rate": 3.525721455457968e-06, "loss": 1.8583, "step": 20640 }, { "epoch": 6.48, "learning_rate": 3.5225846925972403e-06, "loss": 1.8154, "step": 20650 }, { "epoch": 6.48, "learning_rate": 3.519447929736512e-06, "loss": 1.8424, "step": 20660 }, { "epoch": 6.48, "learning_rate": 3.5163111668757844e-06, "loss": 1.8462, "step": 20670 }, { "epoch": 6.49, "learning_rate": 3.513174404015057e-06, "loss": 1.9179, "step": 20680 }, { "epoch": 6.49, "learning_rate": 3.510037641154329e-06, "loss": 1.8476, "step": 20690 }, { "epoch": 6.49, "learning_rate": 3.5069008782936014e-06, "loss": 1.8685, "step": 20700 }, { "epoch": 6.5, "learning_rate": 3.5037641154328734e-06, "loss": 1.845, "step": 20710 }, { "epoch": 6.5, "learning_rate": 3.500627352572146e-06, "loss": 1.8744, "step": 20720 }, { "epoch": 6.5, "learning_rate": 3.497490589711418e-06, "loss": 1.9114, "step": 20730 }, { "epoch": 6.51, "learning_rate": 3.4943538268506904e-06, "loss": 1.8563, "step": 20740 }, { "epoch": 6.51, "learning_rate": 3.491217063989963e-06, "loss": 1.866, "step": 20750 }, { "epoch": 6.51, "learning_rate": 3.4880803011292345e-06, "loss": 1.939, "step": 20760 }, { "epoch": 6.52, "learning_rate": 3.484943538268507e-06, "loss": 1.8258, "step": 20770 }, { "epoch": 6.52, "learning_rate": 3.4818067754077794e-06, "loss": 1.903, "step": 20780 }, { "epoch": 6.52, "learning_rate": 3.478670012547052e-06, "loss": 1.8604, "step": 20790 }, { "epoch": 6.52, "learning_rate": 3.4755332496863244e-06, "loss": 1.7805, "step": 20800 }, { "epoch": 6.53, "learning_rate": 3.472396486825596e-06, "loss": 1.8821, "step": 20810 }, { "epoch": 6.53, "learning_rate": 3.4692597239648684e-06, "loss": 1.8822, "step": 20820 }, { "epoch": 6.53, "learning_rate": 3.466122961104141e-06, "loss": 1.8069, "step": 20830 }, { "epoch": 6.54, "learning_rate": 3.4629861982434134e-06, "loss": 1.8125, "step": 20840 }, { "epoch": 6.54, "learning_rate": 3.4598494353826854e-06, "loss": 1.9261, "step": 20850 }, { "epoch": 6.54, "learning_rate": 3.4567126725219575e-06, "loss": 1.7982, "step": 20860 }, { "epoch": 6.55, "learning_rate": 3.45357590966123e-06, "loss": 1.9647, "step": 20870 }, { "epoch": 6.55, "learning_rate": 3.450439146800502e-06, "loss": 1.7362, "step": 20880 }, { "epoch": 6.55, "learning_rate": 3.4473023839397745e-06, "loss": 1.828, "step": 20890 }, { "epoch": 6.56, "learning_rate": 3.444165621079047e-06, "loss": 1.8861, "step": 20900 }, { "epoch": 6.56, "learning_rate": 3.441028858218319e-06, "loss": 1.9139, "step": 20910 }, { "epoch": 6.56, "learning_rate": 3.437892095357591e-06, "loss": 1.866, "step": 20920 }, { "epoch": 6.57, "learning_rate": 3.4347553324968635e-06, "loss": 1.7982, "step": 20930 }, { "epoch": 6.57, "learning_rate": 3.431618569636136e-06, "loss": 1.884, "step": 20940 }, { "epoch": 6.57, "learning_rate": 3.4284818067754084e-06, "loss": 1.7966, "step": 20950 }, { "epoch": 6.57, "learning_rate": 3.42534504391468e-06, "loss": 1.9128, "step": 20960 }, { "epoch": 6.58, "learning_rate": 3.4222082810539525e-06, "loss": 1.8859, "step": 20970 }, { "epoch": 6.58, "learning_rate": 3.419071518193225e-06, "loss": 1.8191, "step": 20980 }, { "epoch": 6.58, "learning_rate": 3.4159347553324974e-06, "loss": 1.8634, "step": 20990 }, { "epoch": 6.59, "learning_rate": 3.4127979924717695e-06, "loss": 1.8503, "step": 21000 }, { "epoch": 6.59, "learning_rate": 3.4096612296110415e-06, "loss": 1.9214, "step": 21010 }, { "epoch": 6.59, "learning_rate": 3.406524466750314e-06, "loss": 1.9069, "step": 21020 }, { "epoch": 6.6, "learning_rate": 3.4033877038895865e-06, "loss": 1.8755, "step": 21030 }, { "epoch": 6.6, "learning_rate": 3.4002509410288585e-06, "loss": 1.899, "step": 21040 }, { "epoch": 6.6, "learning_rate": 3.3971141781681306e-06, "loss": 1.8116, "step": 21050 }, { "epoch": 6.61, "learning_rate": 3.393977415307403e-06, "loss": 1.838, "step": 21060 }, { "epoch": 6.61, "learning_rate": 3.390840652446675e-06, "loss": 1.7901, "step": 21070 }, { "epoch": 6.61, "learning_rate": 3.3877038895859475e-06, "loss": 1.7779, "step": 21080 }, { "epoch": 6.62, "learning_rate": 3.38456712672522e-06, "loss": 1.869, "step": 21090 }, { "epoch": 6.62, "learning_rate": 3.381430363864492e-06, "loss": 1.8738, "step": 21100 }, { "epoch": 6.62, "learning_rate": 3.378293601003764e-06, "loss": 1.8513, "step": 21110 }, { "epoch": 6.62, "learning_rate": 3.3751568381430366e-06, "loss": 1.8699, "step": 21120 }, { "epoch": 6.63, "learning_rate": 3.372020075282309e-06, "loss": 1.9343, "step": 21130 }, { "epoch": 6.63, "learning_rate": 3.3688833124215815e-06, "loss": 1.8008, "step": 21140 }, { "epoch": 6.63, "learning_rate": 3.365746549560853e-06, "loss": 1.8803, "step": 21150 }, { "epoch": 6.64, "learning_rate": 3.3626097867001256e-06, "loss": 1.8176, "step": 21160 }, { "epoch": 6.64, "learning_rate": 3.359473023839398e-06, "loss": 1.855, "step": 21170 }, { "epoch": 6.64, "learning_rate": 3.3563362609786705e-06, "loss": 1.8621, "step": 21180 }, { "epoch": 6.65, "learning_rate": 3.3531994981179426e-06, "loss": 1.8497, "step": 21190 }, { "epoch": 6.65, "learning_rate": 3.3500627352572146e-06, "loss": 1.7359, "step": 21200 }, { "epoch": 6.65, "learning_rate": 3.346925972396487e-06, "loss": 1.8435, "step": 21210 }, { "epoch": 6.66, "learning_rate": 3.3437892095357596e-06, "loss": 1.6913, "step": 21220 }, { "epoch": 6.66, "learning_rate": 3.3406524466750316e-06, "loss": 1.924, "step": 21230 }, { "epoch": 6.66, "learning_rate": 3.337515683814304e-06, "loss": 1.8733, "step": 21240 }, { "epoch": 6.67, "learning_rate": 3.334378920953576e-06, "loss": 1.8878, "step": 21250 }, { "epoch": 6.67, "learning_rate": 3.331242158092848e-06, "loss": 1.8516, "step": 21260 }, { "epoch": 6.67, "learning_rate": 3.3281053952321206e-06, "loss": 1.8499, "step": 21270 }, { "epoch": 6.68, "learning_rate": 3.324968632371393e-06, "loss": 1.8517, "step": 21280 }, { "epoch": 6.68, "learning_rate": 3.3218318695106656e-06, "loss": 1.8365, "step": 21290 }, { "epoch": 6.68, "learning_rate": 3.318695106649937e-06, "loss": 1.8175, "step": 21300 }, { "epoch": 6.68, "learning_rate": 3.3155583437892097e-06, "loss": 1.8422, "step": 21310 }, { "epoch": 6.69, "learning_rate": 3.312421580928482e-06, "loss": 1.7359, "step": 21320 }, { "epoch": 6.69, "learning_rate": 3.3092848180677546e-06, "loss": 1.8765, "step": 21330 }, { "epoch": 6.69, "learning_rate": 3.306148055207027e-06, "loss": 1.8147, "step": 21340 }, { "epoch": 6.7, "learning_rate": 3.3030112923462987e-06, "loss": 1.9332, "step": 21350 }, { "epoch": 6.7, "learning_rate": 3.299874529485571e-06, "loss": 1.8503, "step": 21360 }, { "epoch": 6.7, "learning_rate": 3.2967377666248436e-06, "loss": 1.9164, "step": 21370 }, { "epoch": 6.71, "learning_rate": 3.2936010037641157e-06, "loss": 1.8094, "step": 21380 }, { "epoch": 6.71, "learning_rate": 3.290464240903388e-06, "loss": 1.8952, "step": 21390 }, { "epoch": 6.71, "learning_rate": 3.28732747804266e-06, "loss": 1.9792, "step": 21400 }, { "epoch": 6.72, "learning_rate": 3.2841907151819326e-06, "loss": 1.8349, "step": 21410 }, { "epoch": 6.72, "learning_rate": 3.2810539523212047e-06, "loss": 1.8463, "step": 21420 }, { "epoch": 6.72, "learning_rate": 3.277917189460477e-06, "loss": 1.8696, "step": 21430 }, { "epoch": 6.73, "learning_rate": 3.2747804265997496e-06, "loss": 1.8152, "step": 21440 }, { "epoch": 6.73, "learning_rate": 3.2716436637390213e-06, "loss": 1.8882, "step": 21450 }, { "epoch": 6.73, "learning_rate": 3.2685069008782937e-06, "loss": 1.8587, "step": 21460 }, { "epoch": 6.73, "learning_rate": 3.265370138017566e-06, "loss": 1.8516, "step": 21470 }, { "epoch": 6.74, "learning_rate": 3.2622333751568387e-06, "loss": 1.84, "step": 21480 }, { "epoch": 6.74, "learning_rate": 3.2590966122961103e-06, "loss": 1.8502, "step": 21490 }, { "epoch": 6.74, "learning_rate": 3.2559598494353827e-06, "loss": 1.9171, "step": 21500 }, { "epoch": 6.75, "learning_rate": 3.2528230865746552e-06, "loss": 1.8936, "step": 21510 }, { "epoch": 6.75, "learning_rate": 3.2496863237139277e-06, "loss": 1.8184, "step": 21520 }, { "epoch": 6.75, "learning_rate": 3.2465495608532e-06, "loss": 1.852, "step": 21530 }, { "epoch": 6.76, "learning_rate": 3.2434127979924718e-06, "loss": 1.7877, "step": 21540 }, { "epoch": 6.76, "learning_rate": 3.2402760351317442e-06, "loss": 1.8432, "step": 21550 }, { "epoch": 6.76, "learning_rate": 3.2371392722710167e-06, "loss": 1.8481, "step": 21560 }, { "epoch": 6.77, "learning_rate": 3.2340025094102888e-06, "loss": 1.856, "step": 21570 }, { "epoch": 6.77, "learning_rate": 3.2308657465495612e-06, "loss": 1.8798, "step": 21580 }, { "epoch": 6.77, "learning_rate": 3.2277289836888333e-06, "loss": 1.8807, "step": 21590 }, { "epoch": 6.78, "learning_rate": 3.2245922208281057e-06, "loss": 1.8887, "step": 21600 }, { "epoch": 6.78, "learning_rate": 3.2214554579673778e-06, "loss": 1.8997, "step": 21610 }, { "epoch": 6.78, "learning_rate": 3.2183186951066502e-06, "loss": 1.8024, "step": 21620 }, { "epoch": 6.78, "learning_rate": 3.2151819322459227e-06, "loss": 1.8396, "step": 21630 }, { "epoch": 6.79, "learning_rate": 3.2120451693851943e-06, "loss": 1.8613, "step": 21640 }, { "epoch": 6.79, "learning_rate": 3.208908406524467e-06, "loss": 1.8002, "step": 21650 }, { "epoch": 6.79, "learning_rate": 3.2057716436637393e-06, "loss": 1.8375, "step": 21660 }, { "epoch": 6.8, "learning_rate": 3.2026348808030117e-06, "loss": 1.8506, "step": 21670 }, { "epoch": 6.8, "learning_rate": 3.1994981179422842e-06, "loss": 1.7801, "step": 21680 }, { "epoch": 6.8, "learning_rate": 3.196361355081556e-06, "loss": 1.8555, "step": 21690 }, { "epoch": 6.81, "learning_rate": 3.1932245922208283e-06, "loss": 1.9085, "step": 21700 }, { "epoch": 6.81, "learning_rate": 3.1900878293601008e-06, "loss": 1.8794, "step": 21710 }, { "epoch": 6.81, "learning_rate": 3.186951066499373e-06, "loss": 1.8402, "step": 21720 }, { "epoch": 6.82, "learning_rate": 3.1838143036386453e-06, "loss": 1.8715, "step": 21730 }, { "epoch": 6.82, "learning_rate": 3.1806775407779173e-06, "loss": 1.8458, "step": 21740 }, { "epoch": 6.82, "learning_rate": 3.17754077791719e-06, "loss": 1.866, "step": 21750 }, { "epoch": 6.83, "learning_rate": 3.174404015056462e-06, "loss": 1.8352, "step": 21760 }, { "epoch": 6.83, "learning_rate": 3.1712672521957343e-06, "loss": 1.927, "step": 21770 }, { "epoch": 6.83, "learning_rate": 3.1681304893350068e-06, "loss": 1.9368, "step": 21780 }, { "epoch": 6.84, "learning_rate": 3.1649937264742784e-06, "loss": 1.875, "step": 21790 }, { "epoch": 6.84, "learning_rate": 3.161856963613551e-06, "loss": 1.8245, "step": 21800 }, { "epoch": 6.84, "learning_rate": 3.1587202007528233e-06, "loss": 1.742, "step": 21810 }, { "epoch": 6.84, "learning_rate": 3.155583437892096e-06, "loss": 1.8363, "step": 21820 }, { "epoch": 6.85, "learning_rate": 3.1524466750313683e-06, "loss": 1.8633, "step": 21830 }, { "epoch": 6.85, "learning_rate": 3.14930991217064e-06, "loss": 1.8008, "step": 21840 }, { "epoch": 6.85, "learning_rate": 3.1461731493099124e-06, "loss": 1.8937, "step": 21850 }, { "epoch": 6.86, "learning_rate": 3.143036386449185e-06, "loss": 1.7756, "step": 21860 }, { "epoch": 6.86, "learning_rate": 3.1398996235884573e-06, "loss": 1.8713, "step": 21870 }, { "epoch": 6.86, "learning_rate": 3.1367628607277293e-06, "loss": 1.8834, "step": 21880 }, { "epoch": 6.87, "learning_rate": 3.1336260978670014e-06, "loss": 1.907, "step": 21890 }, { "epoch": 6.87, "learning_rate": 3.130489335006274e-06, "loss": 1.8408, "step": 21900 }, { "epoch": 6.87, "learning_rate": 3.127352572145546e-06, "loss": 1.8588, "step": 21910 }, { "epoch": 6.88, "learning_rate": 3.1242158092848184e-06, "loss": 1.8575, "step": 21920 }, { "epoch": 6.88, "learning_rate": 3.1210790464240904e-06, "loss": 1.834, "step": 21930 }, { "epoch": 6.88, "learning_rate": 3.117942283563363e-06, "loss": 1.8749, "step": 21940 }, { "epoch": 6.89, "learning_rate": 3.114805520702635e-06, "loss": 1.8892, "step": 21950 }, { "epoch": 6.89, "learning_rate": 3.1116687578419074e-06, "loss": 1.8717, "step": 21960 }, { "epoch": 6.89, "learning_rate": 3.10853199498118e-06, "loss": 1.8847, "step": 21970 }, { "epoch": 6.89, "learning_rate": 3.1053952321204515e-06, "loss": 1.9115, "step": 21980 }, { "epoch": 6.9, "learning_rate": 3.102258469259724e-06, "loss": 1.8327, "step": 21990 }, { "epoch": 6.9, "learning_rate": 3.0991217063989964e-06, "loss": 1.8495, "step": 22000 }, { "epoch": 6.9, "learning_rate": 3.095984943538269e-06, "loss": 1.9436, "step": 22010 }, { "epoch": 6.91, "learning_rate": 3.0928481806775414e-06, "loss": 1.8471, "step": 22020 }, { "epoch": 6.91, "learning_rate": 3.089711417816813e-06, "loss": 1.9117, "step": 22030 }, { "epoch": 6.91, "learning_rate": 3.0865746549560855e-06, "loss": 1.8174, "step": 22040 }, { "epoch": 6.92, "learning_rate": 3.083437892095358e-06, "loss": 1.8559, "step": 22050 }, { "epoch": 6.92, "learning_rate": 3.0803011292346304e-06, "loss": 1.8634, "step": 22060 }, { "epoch": 6.92, "learning_rate": 3.0771643663739024e-06, "loss": 1.9015, "step": 22070 }, { "epoch": 6.93, "learning_rate": 3.0740276035131745e-06, "loss": 1.7752, "step": 22080 }, { "epoch": 6.93, "learning_rate": 3.070890840652447e-06, "loss": 1.8868, "step": 22090 }, { "epoch": 6.93, "learning_rate": 3.067754077791719e-06, "loss": 1.9024, "step": 22100 }, { "epoch": 6.94, "learning_rate": 3.0646173149309915e-06, "loss": 1.8447, "step": 22110 }, { "epoch": 6.94, "learning_rate": 3.061480552070264e-06, "loss": 1.8881, "step": 22120 }, { "epoch": 6.94, "learning_rate": 3.058343789209536e-06, "loss": 1.8455, "step": 22130 }, { "epoch": 6.94, "learning_rate": 3.055207026348808e-06, "loss": 1.8524, "step": 22140 }, { "epoch": 6.95, "learning_rate": 3.0520702634880805e-06, "loss": 1.8238, "step": 22150 }, { "epoch": 6.95, "learning_rate": 3.048933500627353e-06, "loss": 1.8148, "step": 22160 }, { "epoch": 6.95, "learning_rate": 3.0457967377666254e-06, "loss": 1.8265, "step": 22170 }, { "epoch": 6.96, "learning_rate": 3.042659974905897e-06, "loss": 1.857, "step": 22180 }, { "epoch": 6.96, "learning_rate": 3.0395232120451695e-06, "loss": 1.8612, "step": 22190 }, { "epoch": 6.96, "learning_rate": 3.036386449184442e-06, "loss": 1.8471, "step": 22200 }, { "epoch": 6.97, "learning_rate": 3.0332496863237145e-06, "loss": 1.7689, "step": 22210 }, { "epoch": 6.97, "learning_rate": 3.0301129234629865e-06, "loss": 1.848, "step": 22220 }, { "epoch": 6.97, "learning_rate": 3.0269761606022585e-06, "loss": 1.8965, "step": 22230 }, { "epoch": 6.98, "learning_rate": 3.023839397741531e-06, "loss": 1.8563, "step": 22240 }, { "epoch": 6.98, "learning_rate": 3.0207026348808035e-06, "loss": 1.8156, "step": 22250 }, { "epoch": 6.98, "learning_rate": 3.0175658720200755e-06, "loss": 1.7559, "step": 22260 }, { "epoch": 6.99, "learning_rate": 3.014429109159348e-06, "loss": 1.8958, "step": 22270 }, { "epoch": 6.99, "learning_rate": 3.01129234629862e-06, "loss": 1.8257, "step": 22280 }, { "epoch": 6.99, "learning_rate": 3.008155583437892e-06, "loss": 1.8366, "step": 22290 }, { "epoch": 6.99, "learning_rate": 3.0050188205771646e-06, "loss": 1.884, "step": 22300 }, { "epoch": 7.0, "learning_rate": 3.001882057716437e-06, "loss": 1.8405, "step": 22310 }, { "epoch": 7.0, "eval_loss": 1.8138045072555542, "eval_runtime": 13.6048, "eval_samples_per_second": 73.504, "eval_steps_per_second": 4.631, "step": 22316 }, { "epoch": 7.0, "learning_rate": 2.9987452948557095e-06, "loss": 1.9038, "step": 22320 }, { "epoch": 7.0, "learning_rate": 2.995608531994981e-06, "loss": 1.8173, "step": 22330 }, { "epoch": 7.01, "learning_rate": 2.9924717691342536e-06, "loss": 1.7758, "step": 22340 }, { "epoch": 7.01, "learning_rate": 2.989335006273526e-06, "loss": 1.7838, "step": 22350 }, { "epoch": 7.01, "learning_rate": 2.9861982434127985e-06, "loss": 1.899, "step": 22360 }, { "epoch": 7.02, "learning_rate": 2.98306148055207e-06, "loss": 1.8519, "step": 22370 }, { "epoch": 7.02, "learning_rate": 2.9799247176913426e-06, "loss": 1.8812, "step": 22380 }, { "epoch": 7.02, "learning_rate": 2.976787954830615e-06, "loss": 1.8858, "step": 22390 }, { "epoch": 7.03, "learning_rate": 2.9736511919698875e-06, "loss": 1.8638, "step": 22400 }, { "epoch": 7.03, "learning_rate": 2.9705144291091596e-06, "loss": 1.9083, "step": 22410 }, { "epoch": 7.03, "learning_rate": 2.9673776662484316e-06, "loss": 1.837, "step": 22420 }, { "epoch": 7.04, "learning_rate": 2.964240903387704e-06, "loss": 1.835, "step": 22430 }, { "epoch": 7.04, "learning_rate": 2.9611041405269766e-06, "loss": 1.7929, "step": 22440 }, { "epoch": 7.04, "learning_rate": 2.9579673776662486e-06, "loss": 1.8362, "step": 22450 }, { "epoch": 7.05, "learning_rate": 2.954830614805521e-06, "loss": 1.9089, "step": 22460 }, { "epoch": 7.05, "learning_rate": 2.951693851944793e-06, "loss": 1.8951, "step": 22470 }, { "epoch": 7.05, "learning_rate": 2.948557089084065e-06, "loss": 1.8888, "step": 22480 }, { "epoch": 7.05, "learning_rate": 2.9454203262233376e-06, "loss": 1.8226, "step": 22490 }, { "epoch": 7.06, "learning_rate": 2.94228356336261e-06, "loss": 1.7939, "step": 22500 }, { "epoch": 7.06, "learning_rate": 2.9391468005018826e-06, "loss": 1.7777, "step": 22510 }, { "epoch": 7.06, "learning_rate": 2.936010037641154e-06, "loss": 1.8305, "step": 22520 }, { "epoch": 7.07, "learning_rate": 2.9328732747804267e-06, "loss": 1.8603, "step": 22530 }, { "epoch": 7.07, "learning_rate": 2.929736511919699e-06, "loss": 1.8084, "step": 22540 }, { "epoch": 7.07, "learning_rate": 2.9265997490589716e-06, "loss": 1.9212, "step": 22550 }, { "epoch": 7.08, "learning_rate": 2.923462986198244e-06, "loss": 1.7966, "step": 22560 }, { "epoch": 7.08, "learning_rate": 2.9203262233375157e-06, "loss": 1.7703, "step": 22570 }, { "epoch": 7.08, "learning_rate": 2.917189460476788e-06, "loss": 1.8393, "step": 22580 }, { "epoch": 7.09, "learning_rate": 2.9140526976160606e-06, "loss": 1.7991, "step": 22590 }, { "epoch": 7.09, "learning_rate": 2.9109159347553327e-06, "loss": 1.8828, "step": 22600 }, { "epoch": 7.09, "learning_rate": 2.907779171894605e-06, "loss": 1.8453, "step": 22610 }, { "epoch": 7.1, "learning_rate": 2.904642409033877e-06, "loss": 1.8254, "step": 22620 }, { "epoch": 7.1, "learning_rate": 2.9015056461731497e-06, "loss": 1.8666, "step": 22630 }, { "epoch": 7.1, "learning_rate": 2.8983688833124217e-06, "loss": 1.7819, "step": 22640 }, { "epoch": 7.1, "learning_rate": 2.895232120451694e-06, "loss": 1.8606, "step": 22650 }, { "epoch": 7.11, "learning_rate": 2.8920953575909666e-06, "loss": 1.8466, "step": 22660 }, { "epoch": 7.11, "learning_rate": 2.8889585947302383e-06, "loss": 1.7494, "step": 22670 }, { "epoch": 7.11, "learning_rate": 2.8858218318695107e-06, "loss": 1.9066, "step": 22680 }, { "epoch": 7.12, "learning_rate": 2.882685069008783e-06, "loss": 1.8383, "step": 22690 }, { "epoch": 7.12, "learning_rate": 2.8795483061480557e-06, "loss": 1.9013, "step": 22700 }, { "epoch": 7.12, "learning_rate": 2.876411543287328e-06, "loss": 1.8759, "step": 22710 }, { "epoch": 7.13, "learning_rate": 2.8732747804265998e-06, "loss": 1.835, "step": 22720 }, { "epoch": 7.13, "learning_rate": 2.8701380175658722e-06, "loss": 1.8592, "step": 22730 }, { "epoch": 7.13, "learning_rate": 2.8670012547051447e-06, "loss": 1.7547, "step": 22740 }, { "epoch": 7.14, "learning_rate": 2.863864491844417e-06, "loss": 1.8349, "step": 22750 }, { "epoch": 7.14, "learning_rate": 2.860727728983689e-06, "loss": 1.8603, "step": 22760 }, { "epoch": 7.14, "learning_rate": 2.8575909661229612e-06, "loss": 1.8041, "step": 22770 }, { "epoch": 7.15, "learning_rate": 2.8544542032622337e-06, "loss": 1.8681, "step": 22780 }, { "epoch": 7.15, "learning_rate": 2.8513174404015058e-06, "loss": 1.8604, "step": 22790 }, { "epoch": 7.15, "learning_rate": 2.8481806775407782e-06, "loss": 1.8059, "step": 22800 }, { "epoch": 7.15, "learning_rate": 2.8450439146800503e-06, "loss": 1.9027, "step": 22810 }, { "epoch": 7.16, "learning_rate": 2.8419071518193223e-06, "loss": 1.8495, "step": 22820 }, { "epoch": 7.16, "learning_rate": 2.838770388958595e-06, "loss": 1.8858, "step": 22830 }, { "epoch": 7.16, "learning_rate": 2.8356336260978673e-06, "loss": 1.8676, "step": 22840 }, { "epoch": 7.17, "learning_rate": 2.8324968632371397e-06, "loss": 1.7541, "step": 22850 }, { "epoch": 7.17, "learning_rate": 2.8293601003764113e-06, "loss": 1.8457, "step": 22860 }, { "epoch": 7.17, "learning_rate": 2.826223337515684e-06, "loss": 1.814, "step": 22870 }, { "epoch": 7.18, "learning_rate": 2.8230865746549563e-06, "loss": 1.7803, "step": 22880 }, { "epoch": 7.18, "learning_rate": 2.8199498117942288e-06, "loss": 1.8027, "step": 22890 }, { "epoch": 7.18, "learning_rate": 2.8168130489335012e-06, "loss": 1.8909, "step": 22900 }, { "epoch": 7.19, "learning_rate": 2.813676286072773e-06, "loss": 1.7367, "step": 22910 }, { "epoch": 7.19, "learning_rate": 2.8105395232120453e-06, "loss": 1.917, "step": 22920 }, { "epoch": 7.19, "learning_rate": 2.8074027603513178e-06, "loss": 1.7805, "step": 22930 }, { "epoch": 7.2, "learning_rate": 2.80426599749059e-06, "loss": 1.8529, "step": 22940 }, { "epoch": 7.2, "learning_rate": 2.8011292346298623e-06, "loss": 1.8119, "step": 22950 }, { "epoch": 7.2, "learning_rate": 2.7979924717691343e-06, "loss": 1.7894, "step": 22960 }, { "epoch": 7.21, "learning_rate": 2.794855708908407e-06, "loss": 1.8554, "step": 22970 }, { "epoch": 7.21, "learning_rate": 2.791718946047679e-06, "loss": 1.8412, "step": 22980 }, { "epoch": 7.21, "learning_rate": 2.7885821831869513e-06, "loss": 1.8466, "step": 22990 }, { "epoch": 7.21, "learning_rate": 2.7854454203262238e-06, "loss": 1.8696, "step": 23000 }, { "epoch": 7.22, "learning_rate": 2.7823086574654954e-06, "loss": 1.7816, "step": 23010 }, { "epoch": 7.22, "learning_rate": 2.779171894604768e-06, "loss": 1.7382, "step": 23020 }, { "epoch": 7.22, "learning_rate": 2.7760351317440403e-06, "loss": 1.8222, "step": 23030 }, { "epoch": 7.23, "learning_rate": 2.772898368883313e-06, "loss": 1.8955, "step": 23040 }, { "epoch": 7.23, "learning_rate": 2.7697616060225853e-06, "loss": 1.7663, "step": 23050 }, { "epoch": 7.23, "learning_rate": 2.766624843161857e-06, "loss": 1.8362, "step": 23060 }, { "epoch": 7.24, "learning_rate": 2.7634880803011294e-06, "loss": 1.9312, "step": 23070 }, { "epoch": 7.24, "learning_rate": 2.760351317440402e-06, "loss": 1.7954, "step": 23080 }, { "epoch": 7.24, "learning_rate": 2.7572145545796743e-06, "loss": 1.8945, "step": 23090 }, { "epoch": 7.25, "learning_rate": 2.7540777917189464e-06, "loss": 1.8134, "step": 23100 }, { "epoch": 7.25, "learning_rate": 2.7509410288582184e-06, "loss": 1.8199, "step": 23110 }, { "epoch": 7.25, "learning_rate": 2.747804265997491e-06, "loss": 1.8319, "step": 23120 }, { "epoch": 7.26, "learning_rate": 2.744667503136763e-06, "loss": 1.9164, "step": 23130 }, { "epoch": 7.26, "learning_rate": 2.7415307402760354e-06, "loss": 1.8681, "step": 23140 }, { "epoch": 7.26, "learning_rate": 2.738393977415308e-06, "loss": 1.867, "step": 23150 }, { "epoch": 7.26, "learning_rate": 2.73525721455458e-06, "loss": 1.8132, "step": 23160 }, { "epoch": 7.27, "learning_rate": 2.732120451693852e-06, "loss": 1.9204, "step": 23170 }, { "epoch": 7.27, "learning_rate": 2.7289836888331244e-06, "loss": 1.7829, "step": 23180 }, { "epoch": 7.27, "learning_rate": 2.725846925972397e-06, "loss": 1.8375, "step": 23190 }, { "epoch": 7.28, "learning_rate": 2.7227101631116693e-06, "loss": 1.7998, "step": 23200 }, { "epoch": 7.28, "learning_rate": 2.719573400250941e-06, "loss": 1.8404, "step": 23210 }, { "epoch": 7.28, "learning_rate": 2.7164366373902134e-06, "loss": 1.8477, "step": 23220 }, { "epoch": 7.29, "learning_rate": 2.713299874529486e-06, "loss": 1.8236, "step": 23230 }, { "epoch": 7.29, "learning_rate": 2.7101631116687584e-06, "loss": 1.8057, "step": 23240 }, { "epoch": 7.29, "learning_rate": 2.70702634880803e-06, "loss": 1.9069, "step": 23250 }, { "epoch": 7.3, "learning_rate": 2.7038895859473025e-06, "loss": 1.8121, "step": 23260 }, { "epoch": 7.3, "learning_rate": 2.700752823086575e-06, "loss": 1.8416, "step": 23270 }, { "epoch": 7.3, "learning_rate": 2.6976160602258474e-06, "loss": 1.8124, "step": 23280 }, { "epoch": 7.31, "learning_rate": 2.6944792973651194e-06, "loss": 1.811, "step": 23290 }, { "epoch": 7.31, "learning_rate": 2.6913425345043915e-06, "loss": 1.8152, "step": 23300 }, { "epoch": 7.31, "learning_rate": 2.688205771643664e-06, "loss": 1.8165, "step": 23310 }, { "epoch": 7.31, "learning_rate": 2.685069008782936e-06, "loss": 1.8534, "step": 23320 }, { "epoch": 7.32, "learning_rate": 2.6819322459222085e-06, "loss": 1.8197, "step": 23330 }, { "epoch": 7.32, "learning_rate": 2.678795483061481e-06, "loss": 1.8595, "step": 23340 }, { "epoch": 7.32, "learning_rate": 2.675658720200753e-06, "loss": 1.8172, "step": 23350 }, { "epoch": 7.33, "learning_rate": 2.672521957340025e-06, "loss": 1.8871, "step": 23360 }, { "epoch": 7.33, "learning_rate": 2.6693851944792975e-06, "loss": 1.8423, "step": 23370 }, { "epoch": 7.33, "learning_rate": 2.66624843161857e-06, "loss": 1.8303, "step": 23380 }, { "epoch": 7.34, "learning_rate": 2.6631116687578424e-06, "loss": 1.8854, "step": 23390 }, { "epoch": 7.34, "learning_rate": 2.659974905897114e-06, "loss": 1.8654, "step": 23400 }, { "epoch": 7.34, "learning_rate": 2.6568381430363865e-06, "loss": 1.7865, "step": 23410 }, { "epoch": 7.35, "learning_rate": 2.653701380175659e-06, "loss": 1.8362, "step": 23420 }, { "epoch": 7.35, "learning_rate": 2.6505646173149315e-06, "loss": 1.9038, "step": 23430 }, { "epoch": 7.35, "learning_rate": 2.6474278544542035e-06, "loss": 1.8733, "step": 23440 }, { "epoch": 7.36, "learning_rate": 2.6442910915934756e-06, "loss": 1.8038, "step": 23450 }, { "epoch": 7.36, "learning_rate": 2.641154328732748e-06, "loss": 1.9265, "step": 23460 }, { "epoch": 7.36, "learning_rate": 2.6380175658720205e-06, "loss": 1.8768, "step": 23470 }, { "epoch": 7.37, "learning_rate": 2.6348808030112925e-06, "loss": 1.827, "step": 23480 }, { "epoch": 7.37, "learning_rate": 2.631744040150565e-06, "loss": 1.7804, "step": 23490 }, { "epoch": 7.37, "learning_rate": 2.628607277289837e-06, "loss": 1.8359, "step": 23500 }, { "epoch": 7.37, "learning_rate": 2.625470514429109e-06, "loss": 1.8243, "step": 23510 }, { "epoch": 7.38, "learning_rate": 2.6223337515683816e-06, "loss": 1.8012, "step": 23520 }, { "epoch": 7.38, "learning_rate": 2.619196988707654e-06, "loss": 1.7882, "step": 23530 }, { "epoch": 7.38, "learning_rate": 2.6160602258469265e-06, "loss": 1.8081, "step": 23540 }, { "epoch": 7.39, "learning_rate": 2.612923462986198e-06, "loss": 1.9011, "step": 23550 }, { "epoch": 7.39, "learning_rate": 2.6097867001254706e-06, "loss": 1.847, "step": 23560 }, { "epoch": 7.39, "learning_rate": 2.606649937264743e-06, "loss": 1.8524, "step": 23570 }, { "epoch": 7.4, "learning_rate": 2.6035131744040155e-06, "loss": 1.8984, "step": 23580 }, { "epoch": 7.4, "learning_rate": 2.600376411543288e-06, "loss": 1.9214, "step": 23590 }, { "epoch": 7.4, "learning_rate": 2.5972396486825596e-06, "loss": 1.8319, "step": 23600 }, { "epoch": 7.41, "learning_rate": 2.594102885821832e-06, "loss": 1.8231, "step": 23610 }, { "epoch": 7.41, "learning_rate": 2.5909661229611045e-06, "loss": 1.8265, "step": 23620 }, { "epoch": 7.41, "learning_rate": 2.5878293601003766e-06, "loss": 1.8764, "step": 23630 }, { "epoch": 7.42, "learning_rate": 2.584692597239649e-06, "loss": 1.7521, "step": 23640 }, { "epoch": 7.42, "learning_rate": 2.581555834378921e-06, "loss": 1.8229, "step": 23650 }, { "epoch": 7.42, "learning_rate": 2.5784190715181936e-06, "loss": 1.902, "step": 23660 }, { "epoch": 7.42, "learning_rate": 2.5752823086574656e-06, "loss": 1.8338, "step": 23670 }, { "epoch": 7.43, "learning_rate": 2.572145545796738e-06, "loss": 1.8587, "step": 23680 }, { "epoch": 7.43, "learning_rate": 2.56900878293601e-06, "loss": 1.8247, "step": 23690 }, { "epoch": 7.43, "learning_rate": 2.565872020075282e-06, "loss": 1.8186, "step": 23700 }, { "epoch": 7.44, "learning_rate": 2.5627352572145546e-06, "loss": 1.8091, "step": 23710 }, { "epoch": 7.44, "learning_rate": 2.559598494353827e-06, "loss": 1.8887, "step": 23720 }, { "epoch": 7.44, "learning_rate": 2.5564617314930996e-06, "loss": 1.8898, "step": 23730 }, { "epoch": 7.45, "learning_rate": 2.553324968632371e-06, "loss": 1.8661, "step": 23740 }, { "epoch": 7.45, "learning_rate": 2.5501882057716437e-06, "loss": 1.7916, "step": 23750 }, { "epoch": 7.45, "learning_rate": 2.547051442910916e-06, "loss": 1.9033, "step": 23760 }, { "epoch": 7.46, "learning_rate": 2.5439146800501886e-06, "loss": 1.8425, "step": 23770 }, { "epoch": 7.46, "learning_rate": 2.540777917189461e-06, "loss": 1.9196, "step": 23780 }, { "epoch": 7.46, "learning_rate": 2.5376411543287327e-06, "loss": 1.865, "step": 23790 }, { "epoch": 7.47, "learning_rate": 2.534504391468005e-06, "loss": 1.8719, "step": 23800 }, { "epoch": 7.47, "learning_rate": 2.5313676286072776e-06, "loss": 1.8598, "step": 23810 }, { "epoch": 7.47, "learning_rate": 2.5282308657465497e-06, "loss": 1.8614, "step": 23820 }, { "epoch": 7.47, "learning_rate": 2.525094102885822e-06, "loss": 1.7822, "step": 23830 }, { "epoch": 7.48, "learning_rate": 2.521957340025094e-06, "loss": 1.8435, "step": 23840 }, { "epoch": 7.48, "learning_rate": 2.5188205771643667e-06, "loss": 1.8492, "step": 23850 }, { "epoch": 7.48, "learning_rate": 2.5156838143036387e-06, "loss": 1.7792, "step": 23860 }, { "epoch": 7.49, "learning_rate": 2.512547051442911e-06, "loss": 1.8483, "step": 23870 }, { "epoch": 7.49, "learning_rate": 2.5094102885821836e-06, "loss": 1.8582, "step": 23880 }, { "epoch": 7.49, "learning_rate": 2.5062735257214553e-06, "loss": 1.8717, "step": 23890 }, { "epoch": 7.5, "learning_rate": 2.5031367628607277e-06, "loss": 1.8539, "step": 23900 }, { "epoch": 7.5, "learning_rate": 2.5e-06, "loss": 1.776, "step": 23910 }, { "epoch": 7.5, "learning_rate": 2.4968632371392727e-06, "loss": 1.7718, "step": 23920 }, { "epoch": 7.51, "learning_rate": 2.4937264742785447e-06, "loss": 1.8526, "step": 23930 }, { "epoch": 7.51, "learning_rate": 2.490589711417817e-06, "loss": 1.8475, "step": 23940 }, { "epoch": 7.51, "learning_rate": 2.4874529485570892e-06, "loss": 1.926, "step": 23950 }, { "epoch": 7.52, "learning_rate": 2.4843161856963617e-06, "loss": 1.8306, "step": 23960 }, { "epoch": 7.52, "learning_rate": 2.4811794228356337e-06, "loss": 1.8626, "step": 23970 }, { "epoch": 7.52, "learning_rate": 2.4780426599749062e-06, "loss": 1.8465, "step": 23980 }, { "epoch": 7.53, "learning_rate": 2.4749058971141783e-06, "loss": 1.9154, "step": 23990 }, { "epoch": 7.53, "learning_rate": 2.4717691342534507e-06, "loss": 1.8894, "step": 24000 }, { "epoch": 7.53, "learning_rate": 2.4686323713927228e-06, "loss": 1.8561, "step": 24010 }, { "epoch": 7.53, "learning_rate": 2.4654956085319952e-06, "loss": 1.9377, "step": 24020 }, { "epoch": 7.54, "learning_rate": 2.4623588456712673e-06, "loss": 1.797, "step": 24030 }, { "epoch": 7.54, "learning_rate": 2.4592220828105398e-06, "loss": 1.8609, "step": 24040 }, { "epoch": 7.54, "learning_rate": 2.456085319949812e-06, "loss": 1.9067, "step": 24050 }, { "epoch": 7.55, "learning_rate": 2.4529485570890843e-06, "loss": 1.8253, "step": 24060 }, { "epoch": 7.55, "learning_rate": 2.4498117942283563e-06, "loss": 1.8497, "step": 24070 }, { "epoch": 7.55, "learning_rate": 2.4466750313676288e-06, "loss": 1.9389, "step": 24080 }, { "epoch": 7.56, "learning_rate": 2.4435382685069012e-06, "loss": 1.8728, "step": 24090 }, { "epoch": 7.56, "learning_rate": 2.4404015056461733e-06, "loss": 1.8859, "step": 24100 }, { "epoch": 7.56, "learning_rate": 2.4372647427854458e-06, "loss": 1.8464, "step": 24110 }, { "epoch": 7.57, "learning_rate": 2.434127979924718e-06, "loss": 1.874, "step": 24120 }, { "epoch": 7.57, "learning_rate": 2.4309912170639903e-06, "loss": 1.8951, "step": 24130 }, { "epoch": 7.57, "learning_rate": 2.4278544542032627e-06, "loss": 1.7477, "step": 24140 }, { "epoch": 7.58, "learning_rate": 2.424717691342535e-06, "loss": 1.9261, "step": 24150 }, { "epoch": 7.58, "learning_rate": 2.421580928481807e-06, "loss": 1.8659, "step": 24160 }, { "epoch": 7.58, "learning_rate": 2.4184441656210793e-06, "loss": 1.7717, "step": 24170 }, { "epoch": 7.58, "learning_rate": 2.4153074027603513e-06, "loss": 1.8292, "step": 24180 }, { "epoch": 7.59, "learning_rate": 2.412170639899624e-06, "loss": 1.824, "step": 24190 }, { "epoch": 7.59, "learning_rate": 2.409033877038896e-06, "loss": 1.8637, "step": 24200 }, { "epoch": 7.59, "learning_rate": 2.4058971141781683e-06, "loss": 1.7925, "step": 24210 }, { "epoch": 7.6, "learning_rate": 2.4027603513174404e-06, "loss": 1.8698, "step": 24220 }, { "epoch": 7.6, "learning_rate": 2.399623588456713e-06, "loss": 1.8401, "step": 24230 }, { "epoch": 7.6, "learning_rate": 2.3964868255959853e-06, "loss": 1.8329, "step": 24240 }, { "epoch": 7.61, "learning_rate": 2.3933500627352574e-06, "loss": 1.8431, "step": 24250 }, { "epoch": 7.61, "learning_rate": 2.39021329987453e-06, "loss": 1.8929, "step": 24260 }, { "epoch": 7.61, "learning_rate": 2.387076537013802e-06, "loss": 1.7704, "step": 24270 }, { "epoch": 7.62, "learning_rate": 2.3839397741530743e-06, "loss": 1.7222, "step": 24280 }, { "epoch": 7.62, "learning_rate": 2.3808030112923464e-06, "loss": 1.8664, "step": 24290 }, { "epoch": 7.62, "learning_rate": 2.377666248431619e-06, "loss": 1.8683, "step": 24300 }, { "epoch": 7.63, "learning_rate": 2.3745294855708913e-06, "loss": 1.8453, "step": 24310 }, { "epoch": 7.63, "learning_rate": 2.3713927227101634e-06, "loss": 1.8595, "step": 24320 }, { "epoch": 7.63, "learning_rate": 2.368255959849436e-06, "loss": 1.8028, "step": 24330 }, { "epoch": 7.63, "learning_rate": 2.365119196988708e-06, "loss": 1.8816, "step": 24340 }, { "epoch": 7.64, "learning_rate": 2.36198243412798e-06, "loss": 1.71, "step": 24350 }, { "epoch": 7.64, "learning_rate": 2.3588456712672524e-06, "loss": 1.8009, "step": 24360 }, { "epoch": 7.64, "learning_rate": 2.3557089084065244e-06, "loss": 1.7629, "step": 24370 }, { "epoch": 7.65, "learning_rate": 2.352572145545797e-06, "loss": 1.7828, "step": 24380 }, { "epoch": 7.65, "learning_rate": 2.349435382685069e-06, "loss": 1.88, "step": 24390 }, { "epoch": 7.65, "learning_rate": 2.3462986198243414e-06, "loss": 1.8083, "step": 24400 }, { "epoch": 7.66, "learning_rate": 2.343161856963614e-06, "loss": 1.7952, "step": 24410 }, { "epoch": 7.66, "learning_rate": 2.340025094102886e-06, "loss": 1.8013, "step": 24420 }, { "epoch": 7.66, "learning_rate": 2.3368883312421584e-06, "loss": 1.775, "step": 24430 }, { "epoch": 7.67, "learning_rate": 2.3337515683814304e-06, "loss": 1.8962, "step": 24440 }, { "epoch": 7.67, "learning_rate": 2.330614805520703e-06, "loss": 1.8714, "step": 24450 }, { "epoch": 7.67, "learning_rate": 2.3274780426599754e-06, "loss": 1.8892, "step": 24460 }, { "epoch": 7.68, "learning_rate": 2.3243412797992474e-06, "loss": 1.8778, "step": 24470 }, { "epoch": 7.68, "learning_rate": 2.32120451693852e-06, "loss": 1.9277, "step": 24480 }, { "epoch": 7.68, "learning_rate": 2.318067754077792e-06, "loss": 1.8388, "step": 24490 }, { "epoch": 7.69, "learning_rate": 2.3149309912170644e-06, "loss": 1.7903, "step": 24500 }, { "epoch": 7.69, "learning_rate": 2.3117942283563365e-06, "loss": 1.8229, "step": 24510 }, { "epoch": 7.69, "learning_rate": 2.3086574654956085e-06, "loss": 1.8884, "step": 24520 }, { "epoch": 7.69, "learning_rate": 2.305520702634881e-06, "loss": 1.8402, "step": 24530 }, { "epoch": 7.7, "learning_rate": 2.302383939774153e-06, "loss": 1.9046, "step": 24540 }, { "epoch": 7.7, "learning_rate": 2.2992471769134255e-06, "loss": 1.9163, "step": 24550 }, { "epoch": 7.7, "learning_rate": 2.2961104140526975e-06, "loss": 1.8306, "step": 24560 }, { "epoch": 7.71, "learning_rate": 2.29297365119197e-06, "loss": 1.8335, "step": 24570 }, { "epoch": 7.71, "learning_rate": 2.2898368883312425e-06, "loss": 1.8506, "step": 24580 }, { "epoch": 7.71, "learning_rate": 2.2867001254705145e-06, "loss": 1.9307, "step": 24590 }, { "epoch": 7.72, "learning_rate": 2.283563362609787e-06, "loss": 1.8544, "step": 24600 }, { "epoch": 7.72, "learning_rate": 2.280426599749059e-06, "loss": 1.7701, "step": 24610 }, { "epoch": 7.72, "learning_rate": 2.2772898368883315e-06, "loss": 1.8246, "step": 24620 }, { "epoch": 7.73, "learning_rate": 2.274153074027604e-06, "loss": 1.804, "step": 24630 }, { "epoch": 7.73, "learning_rate": 2.271016311166876e-06, "loss": 1.8989, "step": 24640 }, { "epoch": 7.73, "learning_rate": 2.2678795483061485e-06, "loss": 1.8234, "step": 24650 }, { "epoch": 7.74, "learning_rate": 2.2647427854454205e-06, "loss": 1.8747, "step": 24660 }, { "epoch": 7.74, "learning_rate": 2.261606022584693e-06, "loss": 1.8087, "step": 24670 }, { "epoch": 7.74, "learning_rate": 2.258469259723965e-06, "loss": 1.9062, "step": 24680 }, { "epoch": 7.74, "learning_rate": 2.2553324968632375e-06, "loss": 1.8029, "step": 24690 }, { "epoch": 7.75, "learning_rate": 2.2521957340025095e-06, "loss": 1.8597, "step": 24700 }, { "epoch": 7.75, "learning_rate": 2.2490589711417816e-06, "loss": 1.742, "step": 24710 }, { "epoch": 7.75, "learning_rate": 2.245922208281054e-06, "loss": 1.9272, "step": 24720 }, { "epoch": 7.76, "learning_rate": 2.242785445420326e-06, "loss": 1.7777, "step": 24730 }, { "epoch": 7.76, "learning_rate": 2.2396486825595986e-06, "loss": 1.8376, "step": 24740 }, { "epoch": 7.76, "learning_rate": 2.236511919698871e-06, "loss": 1.847, "step": 24750 }, { "epoch": 7.77, "learning_rate": 2.233375156838143e-06, "loss": 1.8417, "step": 24760 }, { "epoch": 7.77, "learning_rate": 2.2302383939774155e-06, "loss": 1.894, "step": 24770 }, { "epoch": 7.77, "learning_rate": 2.2271016311166876e-06, "loss": 1.7936, "step": 24780 }, { "epoch": 7.78, "learning_rate": 2.22396486825596e-06, "loss": 1.8742, "step": 24790 }, { "epoch": 7.78, "learning_rate": 2.2208281053952325e-06, "loss": 1.8119, "step": 24800 }, { "epoch": 7.78, "learning_rate": 2.2176913425345046e-06, "loss": 1.8426, "step": 24810 }, { "epoch": 7.79, "learning_rate": 2.214554579673777e-06, "loss": 1.9206, "step": 24820 }, { "epoch": 7.79, "learning_rate": 2.211417816813049e-06, "loss": 1.811, "step": 24830 }, { "epoch": 7.79, "learning_rate": 2.2082810539523216e-06, "loss": 1.8007, "step": 24840 }, { "epoch": 7.79, "learning_rate": 2.2051442910915936e-06, "loss": 1.8859, "step": 24850 }, { "epoch": 7.8, "learning_rate": 2.202007528230866e-06, "loss": 1.8268, "step": 24860 }, { "epoch": 7.8, "learning_rate": 2.198870765370138e-06, "loss": 1.852, "step": 24870 }, { "epoch": 7.8, "learning_rate": 2.1957340025094106e-06, "loss": 1.7632, "step": 24880 }, { "epoch": 7.81, "learning_rate": 2.1925972396486826e-06, "loss": 1.8561, "step": 24890 }, { "epoch": 7.81, "learning_rate": 2.189460476787955e-06, "loss": 1.817, "step": 24900 }, { "epoch": 7.81, "learning_rate": 2.186323713927227e-06, "loss": 1.792, "step": 24910 }, { "epoch": 7.82, "learning_rate": 2.1831869510664996e-06, "loss": 1.8024, "step": 24920 }, { "epoch": 7.82, "learning_rate": 2.1800501882057717e-06, "loss": 1.8756, "step": 24930 }, { "epoch": 7.82, "learning_rate": 2.176913425345044e-06, "loss": 1.8239, "step": 24940 }, { "epoch": 7.83, "learning_rate": 2.173776662484316e-06, "loss": 1.846, "step": 24950 }, { "epoch": 7.83, "learning_rate": 2.1706398996235886e-06, "loss": 1.8936, "step": 24960 }, { "epoch": 7.83, "learning_rate": 2.167503136762861e-06, "loss": 1.9536, "step": 24970 }, { "epoch": 7.84, "learning_rate": 2.164366373902133e-06, "loss": 1.8185, "step": 24980 }, { "epoch": 7.84, "learning_rate": 2.1612296110414056e-06, "loss": 1.8076, "step": 24990 }, { "epoch": 7.84, "learning_rate": 2.1580928481806777e-06, "loss": 1.9388, "step": 25000 }, { "epoch": 7.85, "learning_rate": 2.15495608531995e-06, "loss": 1.7815, "step": 25010 }, { "epoch": 7.85, "learning_rate": 2.151819322459222e-06, "loss": 1.8411, "step": 25020 }, { "epoch": 7.85, "learning_rate": 2.1486825595984946e-06, "loss": 1.858, "step": 25030 }, { "epoch": 7.85, "learning_rate": 2.1455457967377667e-06, "loss": 1.8114, "step": 25040 }, { "epoch": 7.86, "learning_rate": 2.142409033877039e-06, "loss": 1.8516, "step": 25050 }, { "epoch": 7.86, "learning_rate": 2.139272271016311e-06, "loss": 1.8683, "step": 25060 }, { "epoch": 7.86, "learning_rate": 2.1361355081555837e-06, "loss": 1.9183, "step": 25070 }, { "epoch": 7.87, "learning_rate": 2.1329987452948557e-06, "loss": 1.8695, "step": 25080 }, { "epoch": 7.87, "learning_rate": 2.129861982434128e-06, "loss": 1.8753, "step": 25090 }, { "epoch": 7.87, "learning_rate": 2.1267252195734002e-06, "loss": 1.8406, "step": 25100 }, { "epoch": 7.88, "learning_rate": 2.1235884567126727e-06, "loss": 1.8115, "step": 25110 }, { "epoch": 7.88, "learning_rate": 2.120451693851945e-06, "loss": 1.9119, "step": 25120 }, { "epoch": 7.88, "learning_rate": 2.1173149309912172e-06, "loss": 1.7686, "step": 25130 }, { "epoch": 7.89, "learning_rate": 2.1141781681304897e-06, "loss": 1.9034, "step": 25140 }, { "epoch": 7.89, "learning_rate": 2.1110414052697617e-06, "loss": 1.8498, "step": 25150 }, { "epoch": 7.89, "learning_rate": 2.107904642409034e-06, "loss": 1.8183, "step": 25160 }, { "epoch": 7.9, "learning_rate": 2.1047678795483062e-06, "loss": 1.8255, "step": 25170 }, { "epoch": 7.9, "learning_rate": 2.1016311166875787e-06, "loss": 1.8853, "step": 25180 }, { "epoch": 7.9, "learning_rate": 2.0984943538268508e-06, "loss": 1.8643, "step": 25190 }, { "epoch": 7.9, "learning_rate": 2.0953575909661232e-06, "loss": 1.8537, "step": 25200 }, { "epoch": 7.91, "learning_rate": 2.0922208281053953e-06, "loss": 1.8535, "step": 25210 }, { "epoch": 7.91, "learning_rate": 2.0890840652446677e-06, "loss": 1.8192, "step": 25220 }, { "epoch": 7.91, "learning_rate": 2.0859473023839398e-06, "loss": 1.9235, "step": 25230 }, { "epoch": 7.92, "learning_rate": 2.0828105395232122e-06, "loss": 1.7911, "step": 25240 }, { "epoch": 7.92, "learning_rate": 2.0796737766624843e-06, "loss": 1.8031, "step": 25250 }, { "epoch": 7.92, "learning_rate": 2.0765370138017568e-06, "loss": 1.8374, "step": 25260 }, { "epoch": 7.93, "learning_rate": 2.073400250941029e-06, "loss": 1.8302, "step": 25270 }, { "epoch": 7.93, "learning_rate": 2.0702634880803013e-06, "loss": 1.7694, "step": 25280 }, { "epoch": 7.93, "learning_rate": 2.0671267252195737e-06, "loss": 1.8412, "step": 25290 }, { "epoch": 7.94, "learning_rate": 2.063989962358846e-06, "loss": 1.8374, "step": 25300 }, { "epoch": 7.94, "learning_rate": 2.0608531994981183e-06, "loss": 1.8052, "step": 25310 }, { "epoch": 7.94, "learning_rate": 2.0577164366373903e-06, "loss": 1.826, "step": 25320 }, { "epoch": 7.95, "learning_rate": 2.0545796737766628e-06, "loss": 1.884, "step": 25330 }, { "epoch": 7.95, "learning_rate": 2.0514429109159352e-06, "loss": 1.8261, "step": 25340 }, { "epoch": 7.95, "learning_rate": 2.0483061480552073e-06, "loss": 1.8037, "step": 25350 }, { "epoch": 7.95, "learning_rate": 2.0451693851944798e-06, "loss": 1.8361, "step": 25360 }, { "epoch": 7.96, "learning_rate": 2.042032622333752e-06, "loss": 1.789, "step": 25370 }, { "epoch": 7.96, "learning_rate": 2.038895859473024e-06, "loss": 1.8791, "step": 25380 }, { "epoch": 7.96, "learning_rate": 2.0357590966122963e-06, "loss": 1.7095, "step": 25390 }, { "epoch": 7.97, "learning_rate": 2.0326223337515684e-06, "loss": 1.8753, "step": 25400 }, { "epoch": 7.97, "learning_rate": 2.029485570890841e-06, "loss": 1.6906, "step": 25410 }, { "epoch": 7.97, "learning_rate": 2.026348808030113e-06, "loss": 1.8425, "step": 25420 }, { "epoch": 7.98, "learning_rate": 2.0232120451693853e-06, "loss": 1.8265, "step": 25430 }, { "epoch": 7.98, "learning_rate": 2.0200752823086574e-06, "loss": 1.8445, "step": 25440 }, { "epoch": 7.98, "learning_rate": 2.01693851944793e-06, "loss": 1.901, "step": 25450 }, { "epoch": 7.99, "learning_rate": 2.0138017565872023e-06, "loss": 1.855, "step": 25460 }, { "epoch": 7.99, "learning_rate": 2.0106649937264744e-06, "loss": 1.8212, "step": 25470 }, { "epoch": 7.99, "learning_rate": 2.007528230865747e-06, "loss": 1.8864, "step": 25480 }, { "epoch": 8.0, "learning_rate": 2.004391468005019e-06, "loss": 1.8775, "step": 25490 }, { "epoch": 8.0, "learning_rate": 2.0012547051442913e-06, "loss": 1.8761, "step": 25500 }, { "epoch": 8.0, "eval_loss": 1.8085027933120728, "eval_runtime": 13.6113, "eval_samples_per_second": 73.468, "eval_steps_per_second": 4.628, "step": 25504 }, { "epoch": 8.0, "learning_rate": 1.998117942283564e-06, "loss": 1.8419, "step": 25510 }, { "epoch": 8.01, "learning_rate": 1.994981179422836e-06, "loss": 1.8554, "step": 25520 }, { "epoch": 8.01, "learning_rate": 1.9918444165621083e-06, "loss": 1.8783, "step": 25530 }, { "epoch": 8.01, "learning_rate": 1.9887076537013804e-06, "loss": 1.8445, "step": 25540 }, { "epoch": 8.01, "learning_rate": 1.985570890840653e-06, "loss": 1.8647, "step": 25550 }, { "epoch": 8.02, "learning_rate": 1.982434127979925e-06, "loss": 1.7844, "step": 25560 }, { "epoch": 8.02, "learning_rate": 1.979297365119197e-06, "loss": 1.8327, "step": 25570 }, { "epoch": 8.02, "learning_rate": 1.9761606022584694e-06, "loss": 1.8807, "step": 25580 }, { "epoch": 8.03, "learning_rate": 1.9730238393977414e-06, "loss": 1.8707, "step": 25590 }, { "epoch": 8.03, "learning_rate": 1.969887076537014e-06, "loss": 1.8531, "step": 25600 }, { "epoch": 8.03, "learning_rate": 1.966750313676286e-06, "loss": 1.8546, "step": 25610 }, { "epoch": 8.04, "learning_rate": 1.9636135508155584e-06, "loss": 1.8613, "step": 25620 }, { "epoch": 8.04, "learning_rate": 1.960476787954831e-06, "loss": 1.8682, "step": 25630 }, { "epoch": 8.04, "learning_rate": 1.957340025094103e-06, "loss": 1.8709, "step": 25640 }, { "epoch": 8.05, "learning_rate": 1.9542032622333754e-06, "loss": 1.8032, "step": 25650 }, { "epoch": 8.05, "learning_rate": 1.9510664993726475e-06, "loss": 1.8855, "step": 25660 }, { "epoch": 8.05, "learning_rate": 1.94792973651192e-06, "loss": 1.8255, "step": 25670 }, { "epoch": 8.06, "learning_rate": 1.9447929736511924e-06, "loss": 1.8782, "step": 25680 }, { "epoch": 8.06, "learning_rate": 1.9416562107904644e-06, "loss": 1.8759, "step": 25690 }, { "epoch": 8.06, "learning_rate": 1.938519447929737e-06, "loss": 1.8345, "step": 25700 }, { "epoch": 8.06, "learning_rate": 1.935382685069009e-06, "loss": 1.8346, "step": 25710 }, { "epoch": 8.07, "learning_rate": 1.9322459222082814e-06, "loss": 1.8408, "step": 25720 }, { "epoch": 8.07, "learning_rate": 1.9291091593475535e-06, "loss": 1.848, "step": 25730 }, { "epoch": 8.07, "learning_rate": 1.9259723964868255e-06, "loss": 1.8057, "step": 25740 }, { "epoch": 8.08, "learning_rate": 1.922835633626098e-06, "loss": 1.7701, "step": 25750 }, { "epoch": 8.08, "learning_rate": 1.91969887076537e-06, "loss": 1.8032, "step": 25760 }, { "epoch": 8.08, "learning_rate": 1.9165621079046425e-06, "loss": 1.8393, "step": 25770 }, { "epoch": 8.09, "learning_rate": 1.913425345043915e-06, "loss": 1.7831, "step": 25780 }, { "epoch": 8.09, "learning_rate": 1.910288582183187e-06, "loss": 1.7805, "step": 25790 }, { "epoch": 8.09, "learning_rate": 1.9071518193224595e-06, "loss": 1.8198, "step": 25800 }, { "epoch": 8.1, "learning_rate": 1.9040150564617315e-06, "loss": 1.7913, "step": 25810 }, { "epoch": 8.1, "learning_rate": 1.900878293601004e-06, "loss": 1.7361, "step": 25820 }, { "epoch": 8.1, "learning_rate": 1.897741530740276e-06, "loss": 1.8002, "step": 25830 }, { "epoch": 8.11, "learning_rate": 1.8946047678795485e-06, "loss": 1.8309, "step": 25840 }, { "epoch": 8.11, "learning_rate": 1.8914680050188208e-06, "loss": 1.8588, "step": 25850 }, { "epoch": 8.11, "learning_rate": 1.888331242158093e-06, "loss": 1.8685, "step": 25860 }, { "epoch": 8.11, "learning_rate": 1.8851944792973653e-06, "loss": 1.7893, "step": 25870 }, { "epoch": 8.12, "learning_rate": 1.8820577164366375e-06, "loss": 1.7893, "step": 25880 }, { "epoch": 8.12, "learning_rate": 1.8789209535759098e-06, "loss": 1.9111, "step": 25890 }, { "epoch": 8.12, "learning_rate": 1.8757841907151822e-06, "loss": 1.8303, "step": 25900 }, { "epoch": 8.13, "learning_rate": 1.8726474278544543e-06, "loss": 1.8459, "step": 25910 }, { "epoch": 8.13, "learning_rate": 1.8695106649937268e-06, "loss": 1.8117, "step": 25920 }, { "epoch": 8.13, "learning_rate": 1.8663739021329988e-06, "loss": 1.7602, "step": 25930 }, { "epoch": 8.14, "learning_rate": 1.8632371392722713e-06, "loss": 1.8715, "step": 25940 }, { "epoch": 8.14, "learning_rate": 1.8601003764115435e-06, "loss": 1.8285, "step": 25950 }, { "epoch": 8.14, "learning_rate": 1.8569636135508156e-06, "loss": 1.7996, "step": 25960 }, { "epoch": 8.15, "learning_rate": 1.853826850690088e-06, "loss": 1.8965, "step": 25970 }, { "epoch": 8.15, "learning_rate": 1.85069008782936e-06, "loss": 1.7853, "step": 25980 }, { "epoch": 8.15, "learning_rate": 1.8475533249686326e-06, "loss": 1.8037, "step": 25990 }, { "epoch": 8.16, "learning_rate": 1.844416562107905e-06, "loss": 1.786, "step": 26000 }, { "epoch": 8.16, "learning_rate": 1.841279799247177e-06, "loss": 1.9019, "step": 26010 }, { "epoch": 8.16, "learning_rate": 1.8381430363864493e-06, "loss": 1.9225, "step": 26020 }, { "epoch": 8.16, "learning_rate": 1.8350062735257216e-06, "loss": 1.8514, "step": 26030 }, { "epoch": 8.17, "learning_rate": 1.8318695106649938e-06, "loss": 1.8365, "step": 26040 }, { "epoch": 8.17, "learning_rate": 1.828732747804266e-06, "loss": 1.7828, "step": 26050 }, { "epoch": 8.17, "learning_rate": 1.8255959849435384e-06, "loss": 1.8321, "step": 26060 }, { "epoch": 8.18, "learning_rate": 1.8224592220828108e-06, "loss": 1.7532, "step": 26070 }, { "epoch": 8.18, "learning_rate": 1.8193224592220829e-06, "loss": 1.8928, "step": 26080 }, { "epoch": 8.18, "learning_rate": 1.8161856963613553e-06, "loss": 1.8561, "step": 26090 }, { "epoch": 8.19, "learning_rate": 1.8130489335006274e-06, "loss": 1.9077, "step": 26100 }, { "epoch": 8.19, "learning_rate": 1.8099121706398999e-06, "loss": 1.815, "step": 26110 }, { "epoch": 8.19, "learning_rate": 1.8067754077791721e-06, "loss": 1.7752, "step": 26120 }, { "epoch": 8.2, "learning_rate": 1.8036386449184444e-06, "loss": 1.8435, "step": 26130 }, { "epoch": 8.2, "learning_rate": 1.8005018820577166e-06, "loss": 1.8642, "step": 26140 }, { "epoch": 8.2, "learning_rate": 1.7973651191969887e-06, "loss": 1.807, "step": 26150 }, { "epoch": 8.21, "learning_rate": 1.7942283563362611e-06, "loss": 1.8056, "step": 26160 }, { "epoch": 8.21, "learning_rate": 1.7910915934755336e-06, "loss": 1.8102, "step": 26170 }, { "epoch": 8.21, "learning_rate": 1.7879548306148056e-06, "loss": 1.9075, "step": 26180 }, { "epoch": 8.22, "learning_rate": 1.7848180677540781e-06, "loss": 1.7885, "step": 26190 }, { "epoch": 8.22, "learning_rate": 1.7816813048933502e-06, "loss": 1.8633, "step": 26200 }, { "epoch": 8.22, "learning_rate": 1.7785445420326224e-06, "loss": 1.8132, "step": 26210 }, { "epoch": 8.22, "learning_rate": 1.7754077791718949e-06, "loss": 1.8301, "step": 26220 }, { "epoch": 8.23, "learning_rate": 1.772271016311167e-06, "loss": 1.8868, "step": 26230 }, { "epoch": 8.23, "learning_rate": 1.7691342534504394e-06, "loss": 1.8068, "step": 26240 }, { "epoch": 8.23, "learning_rate": 1.7659974905897114e-06, "loss": 1.816, "step": 26250 }, { "epoch": 8.24, "learning_rate": 1.762860727728984e-06, "loss": 1.7982, "step": 26260 }, { "epoch": 8.24, "learning_rate": 1.759723964868256e-06, "loss": 1.8405, "step": 26270 }, { "epoch": 8.24, "learning_rate": 1.7565872020075284e-06, "loss": 1.8382, "step": 26280 }, { "epoch": 8.25, "learning_rate": 1.7534504391468007e-06, "loss": 1.7793, "step": 26290 }, { "epoch": 8.25, "learning_rate": 1.750313676286073e-06, "loss": 1.8145, "step": 26300 }, { "epoch": 8.25, "learning_rate": 1.7471769134253452e-06, "loss": 1.8759, "step": 26310 }, { "epoch": 8.26, "learning_rate": 1.7440401505646172e-06, "loss": 1.8452, "step": 26320 }, { "epoch": 8.26, "learning_rate": 1.7409033877038897e-06, "loss": 1.8489, "step": 26330 }, { "epoch": 8.26, "learning_rate": 1.7377666248431622e-06, "loss": 1.8525, "step": 26340 }, { "epoch": 8.27, "learning_rate": 1.7346298619824342e-06, "loss": 1.8752, "step": 26350 }, { "epoch": 8.27, "learning_rate": 1.7314930991217067e-06, "loss": 1.8404, "step": 26360 }, { "epoch": 8.27, "learning_rate": 1.7283563362609787e-06, "loss": 1.8458, "step": 26370 }, { "epoch": 8.27, "learning_rate": 1.725219573400251e-06, "loss": 1.8859, "step": 26380 }, { "epoch": 8.28, "learning_rate": 1.7220828105395235e-06, "loss": 1.7915, "step": 26390 }, { "epoch": 8.28, "learning_rate": 1.7189460476787955e-06, "loss": 1.9167, "step": 26400 }, { "epoch": 8.28, "learning_rate": 1.715809284818068e-06, "loss": 1.8609, "step": 26410 }, { "epoch": 8.29, "learning_rate": 1.71267252195734e-06, "loss": 1.9143, "step": 26420 }, { "epoch": 8.29, "learning_rate": 1.7095357590966125e-06, "loss": 1.809, "step": 26430 }, { "epoch": 8.29, "learning_rate": 1.7063989962358847e-06, "loss": 1.7668, "step": 26440 }, { "epoch": 8.3, "learning_rate": 1.703262233375157e-06, "loss": 1.8096, "step": 26450 }, { "epoch": 8.3, "learning_rate": 1.7001254705144293e-06, "loss": 1.8033, "step": 26460 }, { "epoch": 8.3, "learning_rate": 1.6969887076537015e-06, "loss": 1.8294, "step": 26470 }, { "epoch": 8.31, "learning_rate": 1.6938519447929738e-06, "loss": 1.7282, "step": 26480 }, { "epoch": 8.31, "learning_rate": 1.690715181932246e-06, "loss": 1.8274, "step": 26490 }, { "epoch": 8.31, "learning_rate": 1.6875784190715183e-06, "loss": 1.811, "step": 26500 }, { "epoch": 8.32, "learning_rate": 1.6844416562107908e-06, "loss": 1.8093, "step": 26510 }, { "epoch": 8.32, "learning_rate": 1.6813048933500628e-06, "loss": 1.8854, "step": 26520 }, { "epoch": 8.32, "learning_rate": 1.6781681304893353e-06, "loss": 1.8327, "step": 26530 }, { "epoch": 8.32, "learning_rate": 1.6750313676286073e-06, "loss": 1.8446, "step": 26540 }, { "epoch": 8.33, "learning_rate": 1.6718946047678798e-06, "loss": 1.8623, "step": 26550 }, { "epoch": 8.33, "learning_rate": 1.668757841907152e-06, "loss": 1.8318, "step": 26560 }, { "epoch": 8.33, "learning_rate": 1.665621079046424e-06, "loss": 1.7852, "step": 26570 }, { "epoch": 8.34, "learning_rate": 1.6624843161856965e-06, "loss": 1.7953, "step": 26580 }, { "epoch": 8.34, "learning_rate": 1.6593475533249686e-06, "loss": 1.9024, "step": 26590 }, { "epoch": 8.34, "learning_rate": 1.656210790464241e-06, "loss": 1.838, "step": 26600 }, { "epoch": 8.35, "learning_rate": 1.6530740276035135e-06, "loss": 1.9028, "step": 26610 }, { "epoch": 8.35, "learning_rate": 1.6499372647427856e-06, "loss": 1.8269, "step": 26620 }, { "epoch": 8.35, "learning_rate": 1.6468005018820578e-06, "loss": 1.8538, "step": 26630 }, { "epoch": 8.36, "learning_rate": 1.64366373902133e-06, "loss": 1.8703, "step": 26640 }, { "epoch": 8.36, "learning_rate": 1.6405269761606023e-06, "loss": 1.769, "step": 26650 }, { "epoch": 8.36, "learning_rate": 1.6373902132998748e-06, "loss": 1.7672, "step": 26660 }, { "epoch": 8.37, "learning_rate": 1.6342534504391469e-06, "loss": 1.8547, "step": 26670 }, { "epoch": 8.37, "learning_rate": 1.6311166875784193e-06, "loss": 1.8411, "step": 26680 }, { "epoch": 8.37, "learning_rate": 1.6279799247176914e-06, "loss": 1.813, "step": 26690 }, { "epoch": 8.38, "learning_rate": 1.6248431618569638e-06, "loss": 1.9123, "step": 26700 }, { "epoch": 8.38, "learning_rate": 1.6217063989962359e-06, "loss": 1.844, "step": 26710 }, { "epoch": 8.38, "learning_rate": 1.6185696361355084e-06, "loss": 1.8704, "step": 26720 }, { "epoch": 8.38, "learning_rate": 1.6154328732747806e-06, "loss": 1.7696, "step": 26730 }, { "epoch": 8.39, "learning_rate": 1.6122961104140529e-06, "loss": 1.8558, "step": 26740 }, { "epoch": 8.39, "learning_rate": 1.6091593475533251e-06, "loss": 1.8155, "step": 26750 }, { "epoch": 8.39, "learning_rate": 1.6060225846925972e-06, "loss": 1.8761, "step": 26760 }, { "epoch": 8.4, "learning_rate": 1.6028858218318696e-06, "loss": 1.7798, "step": 26770 }, { "epoch": 8.4, "learning_rate": 1.5997490589711421e-06, "loss": 1.8079, "step": 26780 }, { "epoch": 8.4, "learning_rate": 1.5966122961104142e-06, "loss": 1.8161, "step": 26790 }, { "epoch": 8.41, "learning_rate": 1.5934755332496864e-06, "loss": 1.8797, "step": 26800 }, { "epoch": 8.41, "learning_rate": 1.5903387703889587e-06, "loss": 1.8012, "step": 26810 }, { "epoch": 8.41, "learning_rate": 1.587202007528231e-06, "loss": 1.865, "step": 26820 }, { "epoch": 8.42, "learning_rate": 1.5840652446675034e-06, "loss": 1.808, "step": 26830 }, { "epoch": 8.42, "learning_rate": 1.5809284818067754e-06, "loss": 1.8396, "step": 26840 }, { "epoch": 8.42, "learning_rate": 1.577791718946048e-06, "loss": 1.794, "step": 26850 }, { "epoch": 8.43, "learning_rate": 1.57465495608532e-06, "loss": 1.8262, "step": 26860 }, { "epoch": 8.43, "learning_rate": 1.5715181932245924e-06, "loss": 1.8224, "step": 26870 }, { "epoch": 8.43, "learning_rate": 1.5683814303638647e-06, "loss": 1.8647, "step": 26880 }, { "epoch": 8.43, "learning_rate": 1.565244667503137e-06, "loss": 1.8389, "step": 26890 }, { "epoch": 8.44, "learning_rate": 1.5621079046424092e-06, "loss": 1.8384, "step": 26900 }, { "epoch": 8.44, "learning_rate": 1.5589711417816814e-06, "loss": 1.8201, "step": 26910 }, { "epoch": 8.44, "learning_rate": 1.5558343789209537e-06, "loss": 1.8713, "step": 26920 }, { "epoch": 8.45, "learning_rate": 1.5526976160602257e-06, "loss": 1.7515, "step": 26930 }, { "epoch": 8.45, "learning_rate": 1.5495608531994982e-06, "loss": 1.832, "step": 26940 }, { "epoch": 8.45, "learning_rate": 1.5464240903387707e-06, "loss": 1.8487, "step": 26950 }, { "epoch": 8.46, "learning_rate": 1.5432873274780427e-06, "loss": 1.7924, "step": 26960 }, { "epoch": 8.46, "learning_rate": 1.5401505646173152e-06, "loss": 1.7845, "step": 26970 }, { "epoch": 8.46, "learning_rate": 1.5370138017565872e-06, "loss": 1.8839, "step": 26980 }, { "epoch": 8.47, "learning_rate": 1.5338770388958595e-06, "loss": 1.7541, "step": 26990 }, { "epoch": 8.47, "learning_rate": 1.530740276035132e-06, "loss": 1.7754, "step": 27000 }, { "epoch": 8.47, "learning_rate": 1.527603513174404e-06, "loss": 1.8529, "step": 27010 }, { "epoch": 8.48, "learning_rate": 1.5244667503136765e-06, "loss": 1.8047, "step": 27020 }, { "epoch": 8.48, "learning_rate": 1.5213299874529485e-06, "loss": 1.8239, "step": 27030 }, { "epoch": 8.48, "learning_rate": 1.518193224592221e-06, "loss": 1.8293, "step": 27040 }, { "epoch": 8.48, "learning_rate": 1.5150564617314932e-06, "loss": 1.8962, "step": 27050 }, { "epoch": 8.49, "learning_rate": 1.5119196988707655e-06, "loss": 1.803, "step": 27060 }, { "epoch": 8.49, "learning_rate": 1.5087829360100378e-06, "loss": 1.819, "step": 27070 }, { "epoch": 8.49, "learning_rate": 1.50564617314931e-06, "loss": 1.839, "step": 27080 }, { "epoch": 8.5, "learning_rate": 1.5025094102885823e-06, "loss": 1.8884, "step": 27090 }, { "epoch": 8.5, "learning_rate": 1.4993726474278547e-06, "loss": 1.9084, "step": 27100 }, { "epoch": 8.5, "learning_rate": 1.4962358845671268e-06, "loss": 1.8082, "step": 27110 }, { "epoch": 8.51, "learning_rate": 1.4930991217063993e-06, "loss": 1.7976, "step": 27120 }, { "epoch": 8.51, "learning_rate": 1.4899623588456713e-06, "loss": 1.815, "step": 27130 }, { "epoch": 8.51, "learning_rate": 1.4868255959849438e-06, "loss": 1.8745, "step": 27140 }, { "epoch": 8.52, "learning_rate": 1.4836888331242158e-06, "loss": 1.8226, "step": 27150 }, { "epoch": 8.52, "learning_rate": 1.4805520702634883e-06, "loss": 1.8518, "step": 27160 }, { "epoch": 8.52, "learning_rate": 1.4774153074027605e-06, "loss": 1.8528, "step": 27170 }, { "epoch": 8.53, "learning_rate": 1.4742785445420326e-06, "loss": 1.8632, "step": 27180 }, { "epoch": 8.53, "learning_rate": 1.471141781681305e-06, "loss": 1.7614, "step": 27190 }, { "epoch": 8.53, "learning_rate": 1.468005018820577e-06, "loss": 1.7805, "step": 27200 }, { "epoch": 8.54, "learning_rate": 1.4648682559598496e-06, "loss": 1.8009, "step": 27210 }, { "epoch": 8.54, "learning_rate": 1.461731493099122e-06, "loss": 1.8522, "step": 27220 }, { "epoch": 8.54, "learning_rate": 1.458594730238394e-06, "loss": 1.806, "step": 27230 }, { "epoch": 8.54, "learning_rate": 1.4554579673776663e-06, "loss": 1.8533, "step": 27240 }, { "epoch": 8.55, "learning_rate": 1.4523212045169386e-06, "loss": 1.9368, "step": 27250 }, { "epoch": 8.55, "learning_rate": 1.4491844416562109e-06, "loss": 1.8579, "step": 27260 }, { "epoch": 8.55, "learning_rate": 1.4460476787954833e-06, "loss": 1.8546, "step": 27270 }, { "epoch": 8.56, "learning_rate": 1.4429109159347554e-06, "loss": 1.7599, "step": 27280 }, { "epoch": 8.56, "learning_rate": 1.4397741530740278e-06, "loss": 1.9231, "step": 27290 }, { "epoch": 8.56, "learning_rate": 1.4366373902132999e-06, "loss": 1.8332, "step": 27300 }, { "epoch": 8.57, "learning_rate": 1.4335006273525723e-06, "loss": 1.7885, "step": 27310 }, { "epoch": 8.57, "learning_rate": 1.4303638644918446e-06, "loss": 1.849, "step": 27320 }, { "epoch": 8.57, "learning_rate": 1.4272271016311169e-06, "loss": 1.8045, "step": 27330 }, { "epoch": 8.58, "learning_rate": 1.4240903387703891e-06, "loss": 1.8928, "step": 27340 }, { "epoch": 8.58, "learning_rate": 1.4209535759096612e-06, "loss": 1.8827, "step": 27350 }, { "epoch": 8.58, "learning_rate": 1.4178168130489336e-06, "loss": 1.8549, "step": 27360 }, { "epoch": 8.59, "learning_rate": 1.4146800501882057e-06, "loss": 1.787, "step": 27370 }, { "epoch": 8.59, "learning_rate": 1.4115432873274781e-06, "loss": 1.8102, "step": 27380 }, { "epoch": 8.59, "learning_rate": 1.4084065244667506e-06, "loss": 1.8703, "step": 27390 }, { "epoch": 8.59, "learning_rate": 1.4052697616060227e-06, "loss": 1.8759, "step": 27400 }, { "epoch": 8.6, "learning_rate": 1.402132998745295e-06, "loss": 1.8059, "step": 27410 }, { "epoch": 8.6, "learning_rate": 1.3989962358845672e-06, "loss": 1.8641, "step": 27420 }, { "epoch": 8.6, "learning_rate": 1.3958594730238394e-06, "loss": 1.7733, "step": 27430 }, { "epoch": 8.61, "learning_rate": 1.3927227101631119e-06, "loss": 1.8804, "step": 27440 }, { "epoch": 8.61, "learning_rate": 1.389585947302384e-06, "loss": 1.8322, "step": 27450 }, { "epoch": 8.61, "learning_rate": 1.3864491844416564e-06, "loss": 1.8676, "step": 27460 }, { "epoch": 8.62, "learning_rate": 1.3833124215809285e-06, "loss": 1.7624, "step": 27470 }, { "epoch": 8.62, "learning_rate": 1.380175658720201e-06, "loss": 1.8463, "step": 27480 }, { "epoch": 8.62, "learning_rate": 1.3770388958594732e-06, "loss": 1.7873, "step": 27490 }, { "epoch": 8.63, "learning_rate": 1.3739021329987454e-06, "loss": 1.8458, "step": 27500 }, { "epoch": 8.63, "learning_rate": 1.3707653701380177e-06, "loss": 1.9638, "step": 27510 }, { "epoch": 8.63, "learning_rate": 1.36762860727729e-06, "loss": 1.8246, "step": 27520 }, { "epoch": 8.64, "learning_rate": 1.3644918444165622e-06, "loss": 1.7838, "step": 27530 }, { "epoch": 8.64, "learning_rate": 1.3613550815558347e-06, "loss": 1.859, "step": 27540 }, { "epoch": 8.64, "learning_rate": 1.3582183186951067e-06, "loss": 1.8282, "step": 27550 }, { "epoch": 8.64, "learning_rate": 1.3550815558343792e-06, "loss": 1.7744, "step": 27560 }, { "epoch": 8.65, "learning_rate": 1.3519447929736512e-06, "loss": 1.8345, "step": 27570 }, { "epoch": 8.65, "learning_rate": 1.3488080301129237e-06, "loss": 1.8246, "step": 27580 }, { "epoch": 8.65, "learning_rate": 1.3456712672521957e-06, "loss": 1.8078, "step": 27590 }, { "epoch": 8.66, "learning_rate": 1.342534504391468e-06, "loss": 1.8357, "step": 27600 }, { "epoch": 8.66, "learning_rate": 1.3393977415307405e-06, "loss": 1.8526, "step": 27610 }, { "epoch": 8.66, "learning_rate": 1.3362609786700125e-06, "loss": 1.8486, "step": 27620 }, { "epoch": 8.67, "learning_rate": 1.333124215809285e-06, "loss": 1.8583, "step": 27630 }, { "epoch": 8.67, "learning_rate": 1.329987452948557e-06, "loss": 1.8361, "step": 27640 }, { "epoch": 8.67, "learning_rate": 1.3268506900878295e-06, "loss": 1.8049, "step": 27650 }, { "epoch": 8.68, "learning_rate": 1.3237139272271018e-06, "loss": 1.8453, "step": 27660 }, { "epoch": 8.68, "learning_rate": 1.320577164366374e-06, "loss": 1.7916, "step": 27670 }, { "epoch": 8.68, "learning_rate": 1.3174404015056463e-06, "loss": 1.8771, "step": 27680 }, { "epoch": 8.69, "learning_rate": 1.3143036386449185e-06, "loss": 1.8396, "step": 27690 }, { "epoch": 8.69, "learning_rate": 1.3111668757841908e-06, "loss": 1.8477, "step": 27700 }, { "epoch": 8.69, "learning_rate": 1.3080301129234632e-06, "loss": 1.896, "step": 27710 }, { "epoch": 8.7, "learning_rate": 1.3048933500627353e-06, "loss": 1.8484, "step": 27720 }, { "epoch": 8.7, "learning_rate": 1.3017565872020078e-06, "loss": 1.8034, "step": 27730 }, { "epoch": 8.7, "learning_rate": 1.2986198243412798e-06, "loss": 1.8502, "step": 27740 }, { "epoch": 8.7, "learning_rate": 1.2954830614805523e-06, "loss": 1.8693, "step": 27750 }, { "epoch": 8.71, "learning_rate": 1.2923462986198245e-06, "loss": 1.8367, "step": 27760 }, { "epoch": 8.71, "learning_rate": 1.2892095357590968e-06, "loss": 1.849, "step": 27770 }, { "epoch": 8.71, "learning_rate": 1.286072772898369e-06, "loss": 1.8938, "step": 27780 }, { "epoch": 8.72, "learning_rate": 1.282936010037641e-06, "loss": 1.8266, "step": 27790 }, { "epoch": 8.72, "learning_rate": 1.2797992471769136e-06, "loss": 1.7925, "step": 27800 }, { "epoch": 8.72, "learning_rate": 1.2766624843161856e-06, "loss": 1.7811, "step": 27810 }, { "epoch": 8.73, "learning_rate": 1.273525721455458e-06, "loss": 1.9285, "step": 27820 }, { "epoch": 8.73, "learning_rate": 1.2703889585947305e-06, "loss": 1.8258, "step": 27830 }, { "epoch": 8.73, "learning_rate": 1.2672521957340026e-06, "loss": 1.8142, "step": 27840 }, { "epoch": 8.74, "learning_rate": 1.2641154328732748e-06, "loss": 1.7592, "step": 27850 }, { "epoch": 8.74, "learning_rate": 1.260978670012547e-06, "loss": 1.7749, "step": 27860 }, { "epoch": 8.74, "learning_rate": 1.2578419071518194e-06, "loss": 1.893, "step": 27870 }, { "epoch": 8.75, "learning_rate": 1.2547051442910918e-06, "loss": 1.7893, "step": 27880 }, { "epoch": 8.75, "learning_rate": 1.2515683814303639e-06, "loss": 1.8241, "step": 27890 }, { "epoch": 8.75, "learning_rate": 1.2484316185696363e-06, "loss": 1.8894, "step": 27900 }, { "epoch": 8.75, "learning_rate": 1.2452948557089086e-06, "loss": 1.8563, "step": 27910 }, { "epoch": 8.76, "learning_rate": 1.2421580928481808e-06, "loss": 1.8682, "step": 27920 }, { "epoch": 8.76, "learning_rate": 1.2390213299874531e-06, "loss": 1.8305, "step": 27930 }, { "epoch": 8.76, "learning_rate": 1.2358845671267254e-06, "loss": 1.7523, "step": 27940 }, { "epoch": 8.77, "learning_rate": 1.2327478042659976e-06, "loss": 1.7347, "step": 27950 }, { "epoch": 8.77, "learning_rate": 1.2296110414052699e-06, "loss": 1.8742, "step": 27960 }, { "epoch": 8.77, "learning_rate": 1.2264742785445421e-06, "loss": 1.8519, "step": 27970 }, { "epoch": 8.78, "learning_rate": 1.2233375156838144e-06, "loss": 1.8233, "step": 27980 }, { "epoch": 8.78, "learning_rate": 1.2202007528230866e-06, "loss": 1.8308, "step": 27990 }, { "epoch": 8.78, "learning_rate": 1.217063989962359e-06, "loss": 1.8121, "step": 28000 }, { "epoch": 8.79, "learning_rate": 1.2139272271016314e-06, "loss": 1.8179, "step": 28010 }, { "epoch": 8.79, "learning_rate": 1.2107904642409034e-06, "loss": 1.8859, "step": 28020 }, { "epoch": 8.79, "learning_rate": 1.2076537013801757e-06, "loss": 1.776, "step": 28030 }, { "epoch": 8.8, "learning_rate": 1.204516938519448e-06, "loss": 1.7907, "step": 28040 }, { "epoch": 8.8, "learning_rate": 1.2013801756587202e-06, "loss": 1.8682, "step": 28050 }, { "epoch": 8.8, "learning_rate": 1.1982434127979927e-06, "loss": 1.8239, "step": 28060 }, { "epoch": 8.8, "learning_rate": 1.195106649937265e-06, "loss": 1.8007, "step": 28070 }, { "epoch": 8.81, "learning_rate": 1.1919698870765372e-06, "loss": 1.7608, "step": 28080 }, { "epoch": 8.81, "learning_rate": 1.1888331242158094e-06, "loss": 1.8069, "step": 28090 }, { "epoch": 8.81, "learning_rate": 1.1856963613550817e-06, "loss": 1.8402, "step": 28100 }, { "epoch": 8.82, "learning_rate": 1.182559598494354e-06, "loss": 1.8986, "step": 28110 }, { "epoch": 8.82, "learning_rate": 1.1794228356336262e-06, "loss": 1.8938, "step": 28120 }, { "epoch": 8.82, "learning_rate": 1.1762860727728985e-06, "loss": 1.8132, "step": 28130 }, { "epoch": 8.83, "learning_rate": 1.1731493099121707e-06, "loss": 1.8571, "step": 28140 }, { "epoch": 8.83, "learning_rate": 1.170012547051443e-06, "loss": 1.8696, "step": 28150 }, { "epoch": 8.83, "learning_rate": 1.1668757841907152e-06, "loss": 1.8399, "step": 28160 }, { "epoch": 8.84, "learning_rate": 1.1637390213299877e-06, "loss": 1.806, "step": 28170 }, { "epoch": 8.84, "learning_rate": 1.16060225846926e-06, "loss": 1.884, "step": 28180 }, { "epoch": 8.84, "learning_rate": 1.1574654956085322e-06, "loss": 1.7557, "step": 28190 }, { "epoch": 8.85, "learning_rate": 1.1543287327478042e-06, "loss": 1.8812, "step": 28200 }, { "epoch": 8.85, "learning_rate": 1.1511919698870765e-06, "loss": 1.7912, "step": 28210 }, { "epoch": 8.85, "learning_rate": 1.1480552070263488e-06, "loss": 1.7655, "step": 28220 }, { "epoch": 8.86, "learning_rate": 1.1449184441656212e-06, "loss": 1.7835, "step": 28230 }, { "epoch": 8.86, "learning_rate": 1.1417816813048935e-06, "loss": 1.8024, "step": 28240 }, { "epoch": 8.86, "learning_rate": 1.1386449184441657e-06, "loss": 1.8642, "step": 28250 }, { "epoch": 8.86, "learning_rate": 1.135508155583438e-06, "loss": 1.8401, "step": 28260 }, { "epoch": 8.87, "learning_rate": 1.1323713927227103e-06, "loss": 1.7648, "step": 28270 }, { "epoch": 8.87, "learning_rate": 1.1292346298619825e-06, "loss": 1.8564, "step": 28280 }, { "epoch": 8.87, "learning_rate": 1.1260978670012548e-06, "loss": 1.864, "step": 28290 }, { "epoch": 8.88, "learning_rate": 1.122961104140527e-06, "loss": 1.888, "step": 28300 }, { "epoch": 8.88, "learning_rate": 1.1198243412797993e-06, "loss": 1.871, "step": 28310 }, { "epoch": 8.88, "learning_rate": 1.1166875784190715e-06, "loss": 1.8735, "step": 28320 }, { "epoch": 8.89, "learning_rate": 1.1135508155583438e-06, "loss": 1.8082, "step": 28330 }, { "epoch": 8.89, "learning_rate": 1.1104140526976163e-06, "loss": 1.8749, "step": 28340 }, { "epoch": 8.89, "learning_rate": 1.1072772898368885e-06, "loss": 1.7936, "step": 28350 }, { "epoch": 8.9, "learning_rate": 1.1041405269761608e-06, "loss": 1.8015, "step": 28360 }, { "epoch": 8.9, "learning_rate": 1.101003764115433e-06, "loss": 1.8876, "step": 28370 }, { "epoch": 8.9, "learning_rate": 1.0978670012547053e-06, "loss": 1.8649, "step": 28380 }, { "epoch": 8.91, "learning_rate": 1.0947302383939775e-06, "loss": 1.7942, "step": 28390 }, { "epoch": 8.91, "learning_rate": 1.0915934755332498e-06, "loss": 1.8252, "step": 28400 }, { "epoch": 8.91, "learning_rate": 1.088456712672522e-06, "loss": 1.8082, "step": 28410 }, { "epoch": 8.91, "learning_rate": 1.0853199498117943e-06, "loss": 1.8855, "step": 28420 }, { "epoch": 8.92, "learning_rate": 1.0821831869510666e-06, "loss": 1.8745, "step": 28430 }, { "epoch": 8.92, "learning_rate": 1.0790464240903388e-06, "loss": 1.856, "step": 28440 }, { "epoch": 8.92, "learning_rate": 1.075909661229611e-06, "loss": 1.7749, "step": 28450 }, { "epoch": 8.93, "learning_rate": 1.0727728983688833e-06, "loss": 1.7862, "step": 28460 }, { "epoch": 8.93, "learning_rate": 1.0696361355081556e-06, "loss": 1.811, "step": 28470 }, { "epoch": 8.93, "learning_rate": 1.0664993726474279e-06, "loss": 1.8381, "step": 28480 }, { "epoch": 8.94, "learning_rate": 1.0633626097867001e-06, "loss": 1.8144, "step": 28490 }, { "epoch": 8.94, "learning_rate": 1.0602258469259726e-06, "loss": 1.9025, "step": 28500 }, { "epoch": 8.94, "learning_rate": 1.0570890840652448e-06, "loss": 1.7983, "step": 28510 }, { "epoch": 8.95, "learning_rate": 1.053952321204517e-06, "loss": 1.8449, "step": 28520 }, { "epoch": 8.95, "learning_rate": 1.0508155583437894e-06, "loss": 1.8314, "step": 28530 }, { "epoch": 8.95, "learning_rate": 1.0476787954830616e-06, "loss": 1.8405, "step": 28540 }, { "epoch": 8.96, "learning_rate": 1.0445420326223339e-06, "loss": 1.8417, "step": 28550 }, { "epoch": 8.96, "learning_rate": 1.0414052697616061e-06, "loss": 1.7298, "step": 28560 }, { "epoch": 8.96, "learning_rate": 1.0382685069008784e-06, "loss": 1.919, "step": 28570 }, { "epoch": 8.96, "learning_rate": 1.0351317440401506e-06, "loss": 1.8295, "step": 28580 }, { "epoch": 8.97, "learning_rate": 1.031994981179423e-06, "loss": 1.8841, "step": 28590 }, { "epoch": 8.97, "learning_rate": 1.0288582183186952e-06, "loss": 1.8232, "step": 28600 }, { "epoch": 8.97, "learning_rate": 1.0257214554579676e-06, "loss": 1.7992, "step": 28610 }, { "epoch": 8.98, "learning_rate": 1.0225846925972399e-06, "loss": 1.8448, "step": 28620 }, { "epoch": 8.98, "learning_rate": 1.019447929736512e-06, "loss": 1.8227, "step": 28630 }, { "epoch": 8.98, "learning_rate": 1.0163111668757842e-06, "loss": 1.8837, "step": 28640 }, { "epoch": 8.99, "learning_rate": 1.0131744040150564e-06, "loss": 1.814, "step": 28650 }, { "epoch": 8.99, "learning_rate": 1.0100376411543287e-06, "loss": 1.8313, "step": 28660 }, { "epoch": 8.99, "learning_rate": 1.0069008782936012e-06, "loss": 1.7769, "step": 28670 }, { "epoch": 9.0, "learning_rate": 1.0037641154328734e-06, "loss": 1.8154, "step": 28680 }, { "epoch": 9.0, "learning_rate": 1.0006273525721457e-06, "loss": 1.786, "step": 28690 }, { "epoch": 9.0, "eval_loss": 1.8068885803222656, "eval_runtime": 13.6002, "eval_samples_per_second": 73.528, "eval_steps_per_second": 4.632, "step": 28692 }, { "epoch": 9.0, "learning_rate": 9.97490589711418e-07, "loss": 1.8434, "step": 28700 }, { "epoch": 9.01, "learning_rate": 9.943538268506902e-07, "loss": 1.7965, "step": 28710 }, { "epoch": 9.01, "learning_rate": 9.912170639899624e-07, "loss": 1.8304, "step": 28720 }, { "epoch": 9.01, "learning_rate": 9.880803011292347e-07, "loss": 1.8272, "step": 28730 }, { "epoch": 9.02, "learning_rate": 9.84943538268507e-07, "loss": 1.8509, "step": 28740 }, { "epoch": 9.02, "learning_rate": 9.818067754077792e-07, "loss": 1.8452, "step": 28750 }, { "epoch": 9.02, "learning_rate": 9.786700125470515e-07, "loss": 1.7812, "step": 28760 }, { "epoch": 9.02, "learning_rate": 9.755332496863237e-07, "loss": 1.8231, "step": 28770 }, { "epoch": 9.03, "learning_rate": 9.723964868255962e-07, "loss": 1.7942, "step": 28780 }, { "epoch": 9.03, "learning_rate": 9.692597239648685e-07, "loss": 1.8309, "step": 28790 }, { "epoch": 9.03, "learning_rate": 9.661229611041407e-07, "loss": 1.8246, "step": 28800 }, { "epoch": 9.04, "learning_rate": 9.629861982434128e-07, "loss": 1.8079, "step": 28810 }, { "epoch": 9.04, "learning_rate": 9.59849435382685e-07, "loss": 1.8027, "step": 28820 }, { "epoch": 9.04, "learning_rate": 9.567126725219575e-07, "loss": 1.834, "step": 28830 }, { "epoch": 9.05, "learning_rate": 9.535759096612297e-07, "loss": 1.8374, "step": 28840 }, { "epoch": 9.05, "learning_rate": 9.50439146800502e-07, "loss": 1.7601, "step": 28850 }, { "epoch": 9.05, "learning_rate": 9.473023839397742e-07, "loss": 1.803, "step": 28860 }, { "epoch": 9.06, "learning_rate": 9.441656210790465e-07, "loss": 1.7717, "step": 28870 }, { "epoch": 9.06, "learning_rate": 9.410288582183188e-07, "loss": 1.8937, "step": 28880 }, { "epoch": 9.06, "learning_rate": 9.378920953575911e-07, "loss": 1.8349, "step": 28890 }, { "epoch": 9.07, "learning_rate": 9.347553324968634e-07, "loss": 1.7933, "step": 28900 }, { "epoch": 9.07, "learning_rate": 9.316185696361356e-07, "loss": 1.8891, "step": 28910 }, { "epoch": 9.07, "learning_rate": 9.284818067754078e-07, "loss": 1.8436, "step": 28920 }, { "epoch": 9.07, "learning_rate": 9.2534504391468e-07, "loss": 1.8642, "step": 28930 }, { "epoch": 9.08, "learning_rate": 9.222082810539525e-07, "loss": 1.8881, "step": 28940 }, { "epoch": 9.08, "learning_rate": 9.190715181932247e-07, "loss": 1.8353, "step": 28950 }, { "epoch": 9.08, "learning_rate": 9.159347553324969e-07, "loss": 1.9411, "step": 28960 }, { "epoch": 9.09, "learning_rate": 9.127979924717692e-07, "loss": 1.7851, "step": 28970 }, { "epoch": 9.09, "learning_rate": 9.096612296110414e-07, "loss": 1.8456, "step": 28980 }, { "epoch": 9.09, "learning_rate": 9.065244667503137e-07, "loss": 1.8592, "step": 28990 }, { "epoch": 9.1, "learning_rate": 9.033877038895861e-07, "loss": 1.7884, "step": 29000 }, { "epoch": 9.1, "learning_rate": 9.002509410288583e-07, "loss": 1.9313, "step": 29010 }, { "epoch": 9.1, "learning_rate": 8.971141781681306e-07, "loss": 1.8029, "step": 29020 }, { "epoch": 9.11, "learning_rate": 8.939774153074028e-07, "loss": 1.7627, "step": 29030 }, { "epoch": 9.11, "learning_rate": 8.908406524466751e-07, "loss": 1.8337, "step": 29040 }, { "epoch": 9.11, "learning_rate": 8.877038895859474e-07, "loss": 1.8205, "step": 29050 }, { "epoch": 9.12, "learning_rate": 8.845671267252197e-07, "loss": 1.8569, "step": 29060 }, { "epoch": 9.12, "learning_rate": 8.81430363864492e-07, "loss": 1.8458, "step": 29070 }, { "epoch": 9.12, "learning_rate": 8.782936010037642e-07, "loss": 1.7912, "step": 29080 }, { "epoch": 9.12, "learning_rate": 8.751568381430365e-07, "loss": 1.8414, "step": 29090 }, { "epoch": 9.13, "learning_rate": 8.720200752823086e-07, "loss": 1.8249, "step": 29100 }, { "epoch": 9.13, "learning_rate": 8.688833124215811e-07, "loss": 1.8705, "step": 29110 }, { "epoch": 9.13, "learning_rate": 8.657465495608533e-07, "loss": 1.805, "step": 29120 }, { "epoch": 9.14, "learning_rate": 8.626097867001255e-07, "loss": 1.8525, "step": 29130 }, { "epoch": 9.14, "learning_rate": 8.594730238393978e-07, "loss": 1.9076, "step": 29140 }, { "epoch": 9.14, "learning_rate": 8.5633626097867e-07, "loss": 1.8028, "step": 29150 }, { "epoch": 9.15, "learning_rate": 8.531994981179424e-07, "loss": 1.795, "step": 29160 }, { "epoch": 9.15, "learning_rate": 8.500627352572146e-07, "loss": 1.8149, "step": 29170 }, { "epoch": 9.15, "learning_rate": 8.469259723964869e-07, "loss": 1.8113, "step": 29180 }, { "epoch": 9.16, "learning_rate": 8.437892095357591e-07, "loss": 1.749, "step": 29190 }, { "epoch": 9.16, "learning_rate": 8.406524466750314e-07, "loss": 1.8426, "step": 29200 }, { "epoch": 9.16, "learning_rate": 8.375156838143037e-07, "loss": 1.8539, "step": 29210 }, { "epoch": 9.17, "learning_rate": 8.34378920953576e-07, "loss": 1.795, "step": 29220 }, { "epoch": 9.17, "learning_rate": 8.312421580928483e-07, "loss": 1.8827, "step": 29230 }, { "epoch": 9.17, "learning_rate": 8.281053952321205e-07, "loss": 1.7929, "step": 29240 }, { "epoch": 9.18, "learning_rate": 8.249686323713928e-07, "loss": 1.822, "step": 29250 }, { "epoch": 9.18, "learning_rate": 8.21831869510665e-07, "loss": 1.8702, "step": 29260 }, { "epoch": 9.18, "learning_rate": 8.186951066499374e-07, "loss": 1.8072, "step": 29270 }, { "epoch": 9.18, "learning_rate": 8.155583437892097e-07, "loss": 1.9117, "step": 29280 }, { "epoch": 9.19, "learning_rate": 8.124215809284819e-07, "loss": 1.8963, "step": 29290 }, { "epoch": 9.19, "learning_rate": 8.092848180677542e-07, "loss": 1.908, "step": 29300 }, { "epoch": 9.19, "learning_rate": 8.061480552070264e-07, "loss": 1.7967, "step": 29310 }, { "epoch": 9.2, "learning_rate": 8.030112923462986e-07, "loss": 1.8093, "step": 29320 }, { "epoch": 9.2, "learning_rate": 7.998745294855711e-07, "loss": 1.8192, "step": 29330 }, { "epoch": 9.2, "learning_rate": 7.967377666248432e-07, "loss": 1.8415, "step": 29340 }, { "epoch": 9.21, "learning_rate": 7.936010037641155e-07, "loss": 1.7622, "step": 29350 }, { "epoch": 9.21, "learning_rate": 7.904642409033877e-07, "loss": 1.791, "step": 29360 }, { "epoch": 9.21, "learning_rate": 7.8732747804266e-07, "loss": 1.7661, "step": 29370 }, { "epoch": 9.22, "learning_rate": 7.841907151819323e-07, "loss": 1.7692, "step": 29380 }, { "epoch": 9.22, "learning_rate": 7.810539523212046e-07, "loss": 1.8309, "step": 29390 }, { "epoch": 9.22, "learning_rate": 7.779171894604768e-07, "loss": 1.9005, "step": 29400 }, { "epoch": 9.23, "learning_rate": 7.747804265997491e-07, "loss": 1.7942, "step": 29410 }, { "epoch": 9.23, "learning_rate": 7.716436637390214e-07, "loss": 1.8144, "step": 29420 }, { "epoch": 9.23, "learning_rate": 7.685069008782936e-07, "loss": 1.7952, "step": 29430 }, { "epoch": 9.23, "learning_rate": 7.65370138017566e-07, "loss": 1.8524, "step": 29440 }, { "epoch": 9.24, "learning_rate": 7.622333751568382e-07, "loss": 1.8202, "step": 29450 }, { "epoch": 9.24, "learning_rate": 7.590966122961105e-07, "loss": 1.8161, "step": 29460 }, { "epoch": 9.24, "learning_rate": 7.559598494353828e-07, "loss": 1.8541, "step": 29470 }, { "epoch": 9.25, "learning_rate": 7.52823086574655e-07, "loss": 1.849, "step": 29480 }, { "epoch": 9.25, "learning_rate": 7.496863237139274e-07, "loss": 1.8604, "step": 29490 }, { "epoch": 9.25, "learning_rate": 7.465495608531996e-07, "loss": 1.8133, "step": 29500 }, { "epoch": 9.26, "learning_rate": 7.434127979924719e-07, "loss": 1.86, "step": 29510 }, { "epoch": 9.26, "learning_rate": 7.402760351317441e-07, "loss": 1.7641, "step": 29520 }, { "epoch": 9.26, "learning_rate": 7.371392722710163e-07, "loss": 1.7825, "step": 29530 }, { "epoch": 9.27, "learning_rate": 7.340025094102885e-07, "loss": 1.7355, "step": 29540 }, { "epoch": 9.27, "learning_rate": 7.30865746549561e-07, "loss": 1.7702, "step": 29550 }, { "epoch": 9.27, "learning_rate": 7.277289836888332e-07, "loss": 1.8893, "step": 29560 }, { "epoch": 9.28, "learning_rate": 7.245922208281054e-07, "loss": 1.8506, "step": 29570 }, { "epoch": 9.28, "learning_rate": 7.214554579673777e-07, "loss": 1.8254, "step": 29580 }, { "epoch": 9.28, "learning_rate": 7.183186951066499e-07, "loss": 1.8266, "step": 29590 }, { "epoch": 9.28, "learning_rate": 7.151819322459223e-07, "loss": 1.8025, "step": 29600 }, { "epoch": 9.29, "learning_rate": 7.120451693851946e-07, "loss": 1.8061, "step": 29610 }, { "epoch": 9.29, "learning_rate": 7.089084065244668e-07, "loss": 1.9078, "step": 29620 }, { "epoch": 9.29, "learning_rate": 7.057716436637391e-07, "loss": 1.8518, "step": 29630 }, { "epoch": 9.3, "learning_rate": 7.026348808030113e-07, "loss": 1.8402, "step": 29640 }, { "epoch": 9.3, "learning_rate": 6.994981179422836e-07, "loss": 1.8586, "step": 29650 }, { "epoch": 9.3, "learning_rate": 6.963613550815559e-07, "loss": 1.8409, "step": 29660 }, { "epoch": 9.31, "learning_rate": 6.932245922208282e-07, "loss": 1.819, "step": 29670 }, { "epoch": 9.31, "learning_rate": 6.900878293601005e-07, "loss": 1.8325, "step": 29680 }, { "epoch": 9.31, "learning_rate": 6.869510664993727e-07, "loss": 1.8588, "step": 29690 }, { "epoch": 9.32, "learning_rate": 6.83814303638645e-07, "loss": 1.8201, "step": 29700 }, { "epoch": 9.32, "learning_rate": 6.806775407779173e-07, "loss": 1.8307, "step": 29710 }, { "epoch": 9.32, "learning_rate": 6.775407779171896e-07, "loss": 1.7905, "step": 29720 }, { "epoch": 9.33, "learning_rate": 6.744040150564618e-07, "loss": 1.8419, "step": 29730 }, { "epoch": 9.33, "learning_rate": 6.71267252195734e-07, "loss": 1.7966, "step": 29740 }, { "epoch": 9.33, "learning_rate": 6.681304893350063e-07, "loss": 1.7745, "step": 29750 }, { "epoch": 9.34, "learning_rate": 6.649937264742785e-07, "loss": 1.8687, "step": 29760 }, { "epoch": 9.34, "learning_rate": 6.618569636135509e-07, "loss": 1.8453, "step": 29770 }, { "epoch": 9.34, "learning_rate": 6.587202007528231e-07, "loss": 1.8108, "step": 29780 }, { "epoch": 9.34, "learning_rate": 6.555834378920954e-07, "loss": 1.8947, "step": 29790 }, { "epoch": 9.35, "learning_rate": 6.524466750313676e-07, "loss": 1.8328, "step": 29800 }, { "epoch": 9.35, "learning_rate": 6.493099121706399e-07, "loss": 1.7725, "step": 29810 }, { "epoch": 9.35, "learning_rate": 6.461731493099123e-07, "loss": 1.8631, "step": 29820 }, { "epoch": 9.36, "learning_rate": 6.430363864491845e-07, "loss": 1.7941, "step": 29830 }, { "epoch": 9.36, "learning_rate": 6.398996235884568e-07, "loss": 1.7644, "step": 29840 }, { "epoch": 9.36, "learning_rate": 6.36762860727729e-07, "loss": 1.8353, "step": 29850 }, { "epoch": 9.37, "learning_rate": 6.336260978670013e-07, "loss": 1.8568, "step": 29860 }, { "epoch": 9.37, "learning_rate": 6.304893350062735e-07, "loss": 1.8817, "step": 29870 }, { "epoch": 9.37, "learning_rate": 6.273525721455459e-07, "loss": 1.7775, "step": 29880 }, { "epoch": 9.38, "learning_rate": 6.242158092848182e-07, "loss": 1.9279, "step": 29890 }, { "epoch": 9.38, "learning_rate": 6.210790464240904e-07, "loss": 1.8116, "step": 29900 }, { "epoch": 9.38, "learning_rate": 6.179422835633627e-07, "loss": 1.8166, "step": 29910 }, { "epoch": 9.39, "learning_rate": 6.148055207026349e-07, "loss": 1.7839, "step": 29920 }, { "epoch": 9.39, "learning_rate": 6.116687578419072e-07, "loss": 1.8313, "step": 29930 }, { "epoch": 9.39, "learning_rate": 6.085319949811795e-07, "loss": 1.8809, "step": 29940 }, { "epoch": 9.39, "learning_rate": 6.053952321204517e-07, "loss": 1.8002, "step": 29950 }, { "epoch": 9.4, "learning_rate": 6.02258469259724e-07, "loss": 1.8235, "step": 29960 }, { "epoch": 9.4, "learning_rate": 5.991217063989963e-07, "loss": 1.7913, "step": 29970 }, { "epoch": 9.4, "learning_rate": 5.959849435382686e-07, "loss": 1.7681, "step": 29980 }, { "epoch": 9.41, "learning_rate": 5.928481806775408e-07, "loss": 1.8847, "step": 29990 }, { "epoch": 9.41, "learning_rate": 5.897114178168131e-07, "loss": 1.865, "step": 30000 }, { "epoch": 9.41, "learning_rate": 5.865746549560854e-07, "loss": 1.8295, "step": 30010 }, { "epoch": 9.42, "learning_rate": 5.834378920953576e-07, "loss": 1.8705, "step": 30020 }, { "epoch": 9.42, "learning_rate": 5.8030112923463e-07, "loss": 1.7746, "step": 30030 }, { "epoch": 9.42, "learning_rate": 5.771643663739021e-07, "loss": 1.8573, "step": 30040 }, { "epoch": 9.43, "learning_rate": 5.740276035131744e-07, "loss": 1.8142, "step": 30050 }, { "epoch": 9.43, "learning_rate": 5.708908406524467e-07, "loss": 1.8023, "step": 30060 }, { "epoch": 9.43, "learning_rate": 5.67754077791719e-07, "loss": 1.8147, "step": 30070 }, { "epoch": 9.44, "learning_rate": 5.646173149309913e-07, "loss": 1.799, "step": 30080 }, { "epoch": 9.44, "learning_rate": 5.614805520702635e-07, "loss": 1.8694, "step": 30090 }, { "epoch": 9.44, "learning_rate": 5.583437892095358e-07, "loss": 1.872, "step": 30100 }, { "epoch": 9.44, "learning_rate": 5.552070263488081e-07, "loss": 1.8582, "step": 30110 }, { "epoch": 9.45, "learning_rate": 5.520702634880804e-07, "loss": 1.7729, "step": 30120 }, { "epoch": 9.45, "learning_rate": 5.489335006273526e-07, "loss": 1.8739, "step": 30130 }, { "epoch": 9.45, "learning_rate": 5.457967377666249e-07, "loss": 1.85, "step": 30140 }, { "epoch": 9.46, "learning_rate": 5.426599749058972e-07, "loss": 1.8112, "step": 30150 }, { "epoch": 9.46, "learning_rate": 5.395232120451694e-07, "loss": 1.8687, "step": 30160 }, { "epoch": 9.46, "learning_rate": 5.363864491844417e-07, "loss": 1.8587, "step": 30170 }, { "epoch": 9.47, "learning_rate": 5.332496863237139e-07, "loss": 1.8573, "step": 30180 }, { "epoch": 9.47, "learning_rate": 5.301129234629863e-07, "loss": 1.7881, "step": 30190 }, { "epoch": 9.47, "learning_rate": 5.269761606022585e-07, "loss": 1.8975, "step": 30200 }, { "epoch": 9.48, "learning_rate": 5.238393977415308e-07, "loss": 1.8516, "step": 30210 }, { "epoch": 9.48, "learning_rate": 5.207026348808031e-07, "loss": 1.8825, "step": 30220 }, { "epoch": 9.48, "learning_rate": 5.175658720200753e-07, "loss": 1.7745, "step": 30230 }, { "epoch": 9.49, "learning_rate": 5.144291091593476e-07, "loss": 1.8163, "step": 30240 }, { "epoch": 9.49, "learning_rate": 5.112923462986199e-07, "loss": 1.8587, "step": 30250 }, { "epoch": 9.49, "learning_rate": 5.081555834378921e-07, "loss": 1.8959, "step": 30260 }, { "epoch": 9.49, "learning_rate": 5.050188205771643e-07, "loss": 1.8437, "step": 30270 }, { "epoch": 9.5, "learning_rate": 5.018820577164367e-07, "loss": 1.8326, "step": 30280 }, { "epoch": 9.5, "learning_rate": 4.98745294855709e-07, "loss": 1.8475, "step": 30290 }, { "epoch": 9.5, "learning_rate": 4.956085319949812e-07, "loss": 1.8396, "step": 30300 }, { "epoch": 9.51, "learning_rate": 4.924717691342535e-07, "loss": 1.8421, "step": 30310 }, { "epoch": 9.51, "learning_rate": 4.893350062735257e-07, "loss": 1.8836, "step": 30320 }, { "epoch": 9.51, "learning_rate": 4.861982434127981e-07, "loss": 1.8699, "step": 30330 }, { "epoch": 9.52, "learning_rate": 4.830614805520704e-07, "loss": 1.7754, "step": 30340 }, { "epoch": 9.52, "learning_rate": 4.799247176913425e-07, "loss": 1.7852, "step": 30350 }, { "epoch": 9.52, "learning_rate": 4.7678795483061487e-07, "loss": 1.829, "step": 30360 }, { "epoch": 9.53, "learning_rate": 4.736511919698871e-07, "loss": 1.8097, "step": 30370 }, { "epoch": 9.53, "learning_rate": 4.705144291091594e-07, "loss": 1.8907, "step": 30380 }, { "epoch": 9.53, "learning_rate": 4.673776662484317e-07, "loss": 1.8662, "step": 30390 }, { "epoch": 9.54, "learning_rate": 4.642409033877039e-07, "loss": 1.8575, "step": 30400 }, { "epoch": 9.54, "learning_rate": 4.6110414052697626e-07, "loss": 1.8372, "step": 30410 }, { "epoch": 9.54, "learning_rate": 4.5796737766624846e-07, "loss": 1.8453, "step": 30420 }, { "epoch": 9.55, "learning_rate": 4.548306148055207e-07, "loss": 1.875, "step": 30430 }, { "epoch": 9.55, "learning_rate": 4.5169385194479303e-07, "loss": 1.8458, "step": 30440 }, { "epoch": 9.55, "learning_rate": 4.485570890840653e-07, "loss": 1.7614, "step": 30450 }, { "epoch": 9.55, "learning_rate": 4.4542032622333754e-07, "loss": 1.8266, "step": 30460 }, { "epoch": 9.56, "learning_rate": 4.4228356336260985e-07, "loss": 1.8236, "step": 30470 }, { "epoch": 9.56, "learning_rate": 4.391468005018821e-07, "loss": 1.8841, "step": 30480 }, { "epoch": 9.56, "learning_rate": 4.360100376411543e-07, "loss": 1.8424, "step": 30490 }, { "epoch": 9.57, "learning_rate": 4.3287327478042667e-07, "loss": 1.8284, "step": 30500 }, { "epoch": 9.57, "learning_rate": 4.297365119196989e-07, "loss": 1.7975, "step": 30510 }, { "epoch": 9.57, "learning_rate": 4.265997490589712e-07, "loss": 1.86, "step": 30520 }, { "epoch": 9.58, "learning_rate": 4.2346298619824344e-07, "loss": 1.8916, "step": 30530 }, { "epoch": 9.58, "learning_rate": 4.203262233375157e-07, "loss": 1.8348, "step": 30540 }, { "epoch": 9.58, "learning_rate": 4.17189460476788e-07, "loss": 1.8673, "step": 30550 }, { "epoch": 9.59, "learning_rate": 4.1405269761606027e-07, "loss": 1.8117, "step": 30560 }, { "epoch": 9.59, "learning_rate": 4.109159347553325e-07, "loss": 1.8157, "step": 30570 }, { "epoch": 9.59, "learning_rate": 4.0777917189460483e-07, "loss": 1.8566, "step": 30580 }, { "epoch": 9.6, "learning_rate": 4.046424090338771e-07, "loss": 1.9022, "step": 30590 }, { "epoch": 9.6, "learning_rate": 4.015056461731493e-07, "loss": 1.8392, "step": 30600 }, { "epoch": 9.6, "learning_rate": 3.983688833124216e-07, "loss": 1.8338, "step": 30610 }, { "epoch": 9.6, "learning_rate": 3.9523212045169386e-07, "loss": 1.8496, "step": 30620 }, { "epoch": 9.61, "learning_rate": 3.9209535759096617e-07, "loss": 1.8472, "step": 30630 }, { "epoch": 9.61, "learning_rate": 3.889585947302384e-07, "loss": 1.8065, "step": 30640 }, { "epoch": 9.61, "learning_rate": 3.858218318695107e-07, "loss": 1.7734, "step": 30650 }, { "epoch": 9.62, "learning_rate": 3.82685069008783e-07, "loss": 1.8462, "step": 30660 }, { "epoch": 9.62, "learning_rate": 3.7954830614805525e-07, "loss": 1.8648, "step": 30670 }, { "epoch": 9.62, "learning_rate": 3.764115432873275e-07, "loss": 1.7626, "step": 30680 }, { "epoch": 9.63, "learning_rate": 3.732747804265998e-07, "loss": 1.8509, "step": 30690 }, { "epoch": 9.63, "learning_rate": 3.7013801756587207e-07, "loss": 1.8544, "step": 30700 }, { "epoch": 9.63, "learning_rate": 3.670012547051443e-07, "loss": 1.8313, "step": 30710 }, { "epoch": 9.64, "learning_rate": 3.638644918444166e-07, "loss": 1.8364, "step": 30720 }, { "epoch": 9.64, "learning_rate": 3.6072772898368884e-07, "loss": 1.833, "step": 30730 }, { "epoch": 9.64, "learning_rate": 3.5759096612296115e-07, "loss": 1.8429, "step": 30740 }, { "epoch": 9.65, "learning_rate": 3.544542032622334e-07, "loss": 1.807, "step": 30750 }, { "epoch": 9.65, "learning_rate": 3.5131744040150566e-07, "loss": 1.8482, "step": 30760 }, { "epoch": 9.65, "learning_rate": 3.4818067754077797e-07, "loss": 1.8544, "step": 30770 }, { "epoch": 9.65, "learning_rate": 3.4504391468005023e-07, "loss": 1.8451, "step": 30780 }, { "epoch": 9.66, "learning_rate": 3.419071518193225e-07, "loss": 1.9084, "step": 30790 }, { "epoch": 9.66, "learning_rate": 3.387703889585948e-07, "loss": 1.8419, "step": 30800 }, { "epoch": 9.66, "learning_rate": 3.35633626097867e-07, "loss": 1.7771, "step": 30810 }, { "epoch": 9.67, "learning_rate": 3.3249686323713926e-07, "loss": 1.7244, "step": 30820 }, { "epoch": 9.67, "learning_rate": 3.2936010037641157e-07, "loss": 1.8022, "step": 30830 }, { "epoch": 9.67, "learning_rate": 3.262233375156838e-07, "loss": 1.8439, "step": 30840 }, { "epoch": 9.68, "learning_rate": 3.2308657465495613e-07, "loss": 1.7798, "step": 30850 }, { "epoch": 9.68, "learning_rate": 3.199498117942284e-07, "loss": 1.8043, "step": 30860 }, { "epoch": 9.68, "learning_rate": 3.1681304893350065e-07, "loss": 1.7272, "step": 30870 }, { "epoch": 9.69, "learning_rate": 3.1367628607277296e-07, "loss": 1.8712, "step": 30880 }, { "epoch": 9.69, "learning_rate": 3.105395232120452e-07, "loss": 1.7728, "step": 30890 }, { "epoch": 9.69, "learning_rate": 3.0740276035131747e-07, "loss": 1.8384, "step": 30900 }, { "epoch": 9.7, "learning_rate": 3.042659974905897e-07, "loss": 1.8052, "step": 30910 }, { "epoch": 9.7, "learning_rate": 3.01129234629862e-07, "loss": 1.8385, "step": 30920 }, { "epoch": 9.7, "learning_rate": 2.979924717691343e-07, "loss": 1.9009, "step": 30930 }, { "epoch": 9.71, "learning_rate": 2.9485570890840655e-07, "loss": 1.8618, "step": 30940 }, { "epoch": 9.71, "learning_rate": 2.917189460476788e-07, "loss": 1.8397, "step": 30950 }, { "epoch": 9.71, "learning_rate": 2.8858218318695106e-07, "loss": 1.7626, "step": 30960 }, { "epoch": 9.71, "learning_rate": 2.8544542032622337e-07, "loss": 1.858, "step": 30970 }, { "epoch": 9.72, "learning_rate": 2.8230865746549563e-07, "loss": 1.8049, "step": 30980 }, { "epoch": 9.72, "learning_rate": 2.791718946047679e-07, "loss": 1.8149, "step": 30990 }, { "epoch": 9.72, "learning_rate": 2.760351317440402e-07, "loss": 1.8721, "step": 31000 }, { "epoch": 9.73, "learning_rate": 2.7289836888331245e-07, "loss": 1.8053, "step": 31010 }, { "epoch": 9.73, "learning_rate": 2.697616060225847e-07, "loss": 1.8912, "step": 31020 }, { "epoch": 9.73, "learning_rate": 2.6662484316185696e-07, "loss": 1.7795, "step": 31030 }, { "epoch": 9.74, "learning_rate": 2.634880803011293e-07, "loss": 1.794, "step": 31040 }, { "epoch": 9.74, "learning_rate": 2.6035131744040153e-07, "loss": 1.8035, "step": 31050 }, { "epoch": 9.74, "learning_rate": 2.572145545796738e-07, "loss": 1.8038, "step": 31060 }, { "epoch": 9.75, "learning_rate": 2.5407779171894604e-07, "loss": 1.8127, "step": 31070 }, { "epoch": 9.75, "learning_rate": 2.5094102885821835e-07, "loss": 1.7435, "step": 31080 }, { "epoch": 9.75, "learning_rate": 2.478042659974906e-07, "loss": 1.8577, "step": 31090 }, { "epoch": 9.76, "learning_rate": 2.4466750313676287e-07, "loss": 1.8321, "step": 31100 }, { "epoch": 9.76, "learning_rate": 2.415307402760352e-07, "loss": 1.8336, "step": 31110 }, { "epoch": 9.76, "learning_rate": 2.3839397741530743e-07, "loss": 1.8103, "step": 31120 }, { "epoch": 9.76, "learning_rate": 2.352572145545797e-07, "loss": 1.8046, "step": 31130 }, { "epoch": 9.77, "learning_rate": 2.3212045169385195e-07, "loss": 1.7768, "step": 31140 }, { "epoch": 9.77, "learning_rate": 2.2898368883312423e-07, "loss": 1.8558, "step": 31150 }, { "epoch": 9.77, "learning_rate": 2.2584692597239651e-07, "loss": 1.7743, "step": 31160 }, { "epoch": 9.78, "learning_rate": 2.2271016311166877e-07, "loss": 1.8035, "step": 31170 }, { "epoch": 9.78, "learning_rate": 2.1957340025094105e-07, "loss": 1.778, "step": 31180 }, { "epoch": 9.78, "learning_rate": 2.1643663739021334e-07, "loss": 1.7578, "step": 31190 }, { "epoch": 9.79, "learning_rate": 2.132998745294856e-07, "loss": 1.9089, "step": 31200 }, { "epoch": 9.79, "learning_rate": 2.1016311166875785e-07, "loss": 1.8688, "step": 31210 }, { "epoch": 9.79, "learning_rate": 2.0702634880803013e-07, "loss": 1.8124, "step": 31220 }, { "epoch": 9.8, "learning_rate": 2.0388958594730242e-07, "loss": 1.8198, "step": 31230 }, { "epoch": 9.8, "learning_rate": 2.0075282308657465e-07, "loss": 1.8393, "step": 31240 }, { "epoch": 9.8, "learning_rate": 1.9761606022584693e-07, "loss": 1.7781, "step": 31250 }, { "epoch": 9.81, "learning_rate": 1.944792973651192e-07, "loss": 1.8233, "step": 31260 }, { "epoch": 9.81, "learning_rate": 1.913425345043915e-07, "loss": 1.8858, "step": 31270 }, { "epoch": 9.81, "learning_rate": 1.8820577164366375e-07, "loss": 1.7169, "step": 31280 }, { "epoch": 9.81, "learning_rate": 1.8506900878293604e-07, "loss": 1.8674, "step": 31290 }, { "epoch": 9.82, "learning_rate": 1.819322459222083e-07, "loss": 1.8088, "step": 31300 }, { "epoch": 9.82, "learning_rate": 1.7879548306148058e-07, "loss": 1.7843, "step": 31310 }, { "epoch": 9.82, "learning_rate": 1.7565872020075283e-07, "loss": 1.8251, "step": 31320 }, { "epoch": 9.83, "learning_rate": 1.7252195734002512e-07, "loss": 1.9427, "step": 31330 }, { "epoch": 9.83, "learning_rate": 1.693851944792974e-07, "loss": 1.7929, "step": 31340 }, { "epoch": 9.83, "learning_rate": 1.6624843161856963e-07, "loss": 1.8276, "step": 31350 }, { "epoch": 9.84, "learning_rate": 1.631116687578419e-07, "loss": 1.8108, "step": 31360 }, { "epoch": 9.84, "learning_rate": 1.599749058971142e-07, "loss": 1.8391, "step": 31370 }, { "epoch": 9.84, "learning_rate": 1.5683814303638648e-07, "loss": 1.8235, "step": 31380 }, { "epoch": 9.85, "learning_rate": 1.5370138017565873e-07, "loss": 1.8547, "step": 31390 }, { "epoch": 9.85, "learning_rate": 1.50564617314931e-07, "loss": 1.8215, "step": 31400 }, { "epoch": 9.85, "learning_rate": 1.4742785445420327e-07, "loss": 1.7807, "step": 31410 }, { "epoch": 9.86, "learning_rate": 1.4429109159347553e-07, "loss": 1.8327, "step": 31420 }, { "epoch": 9.86, "learning_rate": 1.4115432873274781e-07, "loss": 1.741, "step": 31430 }, { "epoch": 9.86, "learning_rate": 1.380175658720201e-07, "loss": 1.7717, "step": 31440 }, { "epoch": 9.87, "learning_rate": 1.3488080301129235e-07, "loss": 1.853, "step": 31450 }, { "epoch": 9.87, "learning_rate": 1.3174404015056464e-07, "loss": 1.8866, "step": 31460 }, { "epoch": 9.87, "learning_rate": 1.286072772898369e-07, "loss": 1.7436, "step": 31470 }, { "epoch": 9.87, "learning_rate": 1.2547051442910918e-07, "loss": 1.7374, "step": 31480 }, { "epoch": 9.88, "learning_rate": 1.2233375156838143e-07, "loss": 1.8098, "step": 31490 }, { "epoch": 9.88, "learning_rate": 1.1919698870765372e-07, "loss": 1.7861, "step": 31500 }, { "epoch": 9.88, "learning_rate": 1.1606022584692597e-07, "loss": 1.8022, "step": 31510 }, { "epoch": 9.89, "learning_rate": 1.1292346298619826e-07, "loss": 1.8897, "step": 31520 }, { "epoch": 9.89, "learning_rate": 1.0978670012547053e-07, "loss": 1.7814, "step": 31530 }, { "epoch": 9.89, "learning_rate": 1.066499372647428e-07, "loss": 1.8239, "step": 31540 }, { "epoch": 9.9, "learning_rate": 1.0351317440401507e-07, "loss": 1.8212, "step": 31550 }, { "epoch": 9.9, "learning_rate": 1.0037641154328732e-07, "loss": 1.7385, "step": 31560 }, { "epoch": 9.9, "learning_rate": 9.72396486825596e-08, "loss": 1.8555, "step": 31570 }, { "epoch": 9.91, "learning_rate": 9.410288582183188e-08, "loss": 1.8566, "step": 31580 }, { "epoch": 9.91, "learning_rate": 9.096612296110415e-08, "loss": 1.822, "step": 31590 }, { "epoch": 9.91, "learning_rate": 8.782936010037642e-08, "loss": 1.8715, "step": 31600 }, { "epoch": 9.92, "learning_rate": 8.46925972396487e-08, "loss": 1.8113, "step": 31610 }, { "epoch": 9.92, "learning_rate": 8.155583437892096e-08, "loss": 1.832, "step": 31620 }, { "epoch": 9.92, "learning_rate": 7.841907151819324e-08, "loss": 1.7301, "step": 31630 }, { "epoch": 9.92, "learning_rate": 7.52823086574655e-08, "loss": 1.7224, "step": 31640 }, { "epoch": 9.93, "learning_rate": 7.214554579673777e-08, "loss": 1.8191, "step": 31650 }, { "epoch": 9.93, "learning_rate": 6.900878293601005e-08, "loss": 1.815, "step": 31660 }, { "epoch": 9.93, "learning_rate": 6.587202007528232e-08, "loss": 1.8129, "step": 31670 }, { "epoch": 9.94, "learning_rate": 6.273525721455459e-08, "loss": 1.8734, "step": 31680 }, { "epoch": 9.94, "learning_rate": 5.959849435382686e-08, "loss": 1.8497, "step": 31690 }, { "epoch": 9.94, "learning_rate": 5.646173149309913e-08, "loss": 1.7949, "step": 31700 }, { "epoch": 9.95, "learning_rate": 5.33249686323714e-08, "loss": 1.8628, "step": 31710 }, { "epoch": 9.95, "learning_rate": 5.018820577164366e-08, "loss": 1.7708, "step": 31720 }, { "epoch": 9.95, "learning_rate": 4.705144291091594e-08, "loss": 1.8327, "step": 31730 }, { "epoch": 9.96, "learning_rate": 4.391468005018821e-08, "loss": 1.9184, "step": 31740 }, { "epoch": 9.96, "learning_rate": 4.077791718946048e-08, "loss": 1.8494, "step": 31750 }, { "epoch": 9.96, "learning_rate": 3.764115432873275e-08, "loss": 1.7633, "step": 31760 }, { "epoch": 9.97, "learning_rate": 3.4504391468005024e-08, "loss": 1.7958, "step": 31770 }, { "epoch": 9.97, "learning_rate": 3.1367628607277294e-08, "loss": 1.7584, "step": 31780 }, { "epoch": 9.97, "learning_rate": 2.8230865746549564e-08, "loss": 1.8353, "step": 31790 }, { "epoch": 9.97, "learning_rate": 2.509410288582183e-08, "loss": 1.8028, "step": 31800 }, { "epoch": 9.98, "learning_rate": 2.1957340025094104e-08, "loss": 1.8179, "step": 31810 }, { "epoch": 9.98, "learning_rate": 1.8820577164366374e-08, "loss": 1.768, "step": 31820 }, { "epoch": 9.98, "learning_rate": 1.5683814303638647e-08, "loss": 1.8442, "step": 31830 }, { "epoch": 9.99, "learning_rate": 1.2547051442910915e-08, "loss": 1.8952, "step": 31840 }, { "epoch": 9.99, "learning_rate": 9.410288582183187e-09, "loss": 1.8367, "step": 31850 }, { "epoch": 9.99, "learning_rate": 6.273525721455458e-09, "loss": 1.8358, "step": 31860 }, { "epoch": 10.0, "learning_rate": 3.136762860727729e-09, "loss": 1.861, "step": 31870 }, { "epoch": 10.0, "learning_rate": 0.0, "loss": 1.8891, "step": 31880 }, { "epoch": 10.0, "eval_loss": 1.806269645690918, "eval_runtime": 13.6101, "eval_samples_per_second": 73.475, "eval_steps_per_second": 4.629, "step": 31880 } ], "max_steps": 31880, "num_train_epochs": 10, "total_flos": 1.2339389647872e+17, "trial_name": null, "trial_params": null }