{ "best_metric": null, "best_model_checkpoint": null, "epoch": 42.17566368661708, "global_step": 618000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 1e-05, "loss": 9.5383, "step": 1000 }, { "epoch": 0.14, "learning_rate": 2e-05, "loss": 4.7497, "step": 2000 }, { "epoch": 0.2, "learning_rate": 3e-05, "loss": 3.6415, "step": 3000 }, { "epoch": 0.27, "learning_rate": 4e-05, "loss": 3.1335, "step": 4000 }, { "epoch": 0.34, "learning_rate": 5e-05, "loss": 2.9182, "step": 5000 }, { "epoch": 0.41, "learning_rate": 6e-05, "loss": 2.7694, "step": 6000 }, { "epoch": 0.48, "learning_rate": 7e-05, "loss": 2.6519, "step": 7000 }, { "epoch": 0.55, "learning_rate": 8e-05, "loss": 2.5605, "step": 8000 }, { "epoch": 0.61, "learning_rate": 9e-05, "loss": 2.5034, "step": 9000 }, { "epoch": 0.68, "learning_rate": 0.0001, "loss": 2.4401, "step": 10000 }, { "epoch": 0.75, "learning_rate": 9.98989898989899e-05, "loss": 2.3808, "step": 11000 }, { "epoch": 0.82, "learning_rate": 9.97979797979798e-05, "loss": 2.331, "step": 12000 }, { "epoch": 0.89, "learning_rate": 9.96969696969697e-05, "loss": 2.2899, "step": 13000 }, { "epoch": 0.96, "learning_rate": 9.95959595959596e-05, "loss": 2.2701, "step": 14000 }, { "epoch": 1.02, "learning_rate": 9.94949494949495e-05, "loss": 2.2551, "step": 15000 }, { "epoch": 1.09, "learning_rate": 9.939393939393939e-05, "loss": 2.2178, "step": 16000 }, { "epoch": 1.16, "learning_rate": 9.92929292929293e-05, "loss": 2.2199, "step": 17000 }, { "epoch": 1.23, "learning_rate": 9.919191919191919e-05, "loss": 2.2129, "step": 18000 }, { "epoch": 1.3, "learning_rate": 9.909090909090911e-05, "loss": 2.2136, "step": 19000 }, { "epoch": 1.36, "learning_rate": 9.8989898989899e-05, "loss": 2.2057, "step": 20000 }, { "epoch": 1.43, "learning_rate": 9.888888888888889e-05, "loss": 2.2007, "step": 21000 }, { "epoch": 1.5, "learning_rate": 9.87878787878788e-05, "loss": 2.2019, "step": 22000 }, { "epoch": 1.57, "learning_rate": 9.868686868686869e-05, "loss": 2.2009, "step": 23000 }, { "epoch": 1.64, "learning_rate": 9.85858585858586e-05, "loss": 2.1896, "step": 24000 }, { "epoch": 1.71, "learning_rate": 9.848484848484849e-05, "loss": 2.1862, "step": 25000 }, { "epoch": 1.77, "learning_rate": 9.838383838383838e-05, "loss": 2.1872, "step": 26000 }, { "epoch": 1.84, "learning_rate": 9.828282828282829e-05, "loss": 2.1935, "step": 27000 }, { "epoch": 1.91, "learning_rate": 9.818181818181818e-05, "loss": 2.1856, "step": 28000 }, { "epoch": 1.98, "learning_rate": 9.808080808080809e-05, "loss": 2.1705, "step": 29000 }, { "epoch": 2.05, "learning_rate": 9.797979797979798e-05, "loss": 2.1708, "step": 30000 }, { "epoch": 2.12, "learning_rate": 9.787878787878789e-05, "loss": 2.1601, "step": 31000 }, { "epoch": 2.18, "learning_rate": 9.777777777777778e-05, "loss": 2.1639, "step": 32000 }, { "epoch": 2.25, "learning_rate": 9.767676767676767e-05, "loss": 2.1561, "step": 33000 }, { "epoch": 2.32, "learning_rate": 9.757575757575758e-05, "loss": 2.1528, "step": 34000 }, { "epoch": 2.39, "learning_rate": 9.747474747474747e-05, "loss": 2.1561, "step": 35000 }, { "epoch": 2.46, "learning_rate": 9.737373737373738e-05, "loss": 2.1663, "step": 36000 }, { "epoch": 2.53, "learning_rate": 9.727272727272728e-05, "loss": 2.152, "step": 37000 }, { "epoch": 2.59, "learning_rate": 9.717171717171718e-05, "loss": 2.1551, "step": 38000 }, { "epoch": 2.66, "learning_rate": 9.707070707070708e-05, "loss": 2.1453, "step": 39000 }, { "epoch": 2.73, "learning_rate": 9.696969696969698e-05, "loss": 2.1524, "step": 40000 }, { "epoch": 2.8, "learning_rate": 9.686868686868688e-05, "loss": 2.1523, "step": 41000 }, { "epoch": 2.87, "learning_rate": 9.676767676767677e-05, "loss": 2.1457, "step": 42000 }, { "epoch": 2.93, "learning_rate": 9.666666666666667e-05, "loss": 2.1527, "step": 43000 }, { "epoch": 3.0, "learning_rate": 9.656565656565657e-05, "loss": 2.1446, "step": 44000 }, { "epoch": 3.07, "learning_rate": 9.646464646464647e-05, "loss": 2.1267, "step": 45000 }, { "epoch": 3.14, "learning_rate": 9.636363636363637e-05, "loss": 2.122, "step": 46000 }, { "epoch": 3.21, "learning_rate": 9.626262626262627e-05, "loss": 2.126, "step": 47000 }, { "epoch": 3.28, "learning_rate": 9.616161616161616e-05, "loss": 2.1321, "step": 48000 }, { "epoch": 3.34, "learning_rate": 9.606060606060606e-05, "loss": 2.1251, "step": 49000 }, { "epoch": 3.41, "learning_rate": 9.595959595959596e-05, "loss": 2.1242, "step": 50000 }, { "epoch": 3.48, "learning_rate": 9.585858585858586e-05, "loss": 2.1297, "step": 51000 }, { "epoch": 3.55, "learning_rate": 9.575757575757576e-05, "loss": 2.1262, "step": 52000 }, { "epoch": 3.62, "learning_rate": 9.565656565656566e-05, "loss": 2.1166, "step": 53000 }, { "epoch": 3.69, "learning_rate": 9.555555555555557e-05, "loss": 2.128, "step": 54000 }, { "epoch": 3.75, "learning_rate": 9.545454545454546e-05, "loss": 2.1239, "step": 55000 }, { "epoch": 3.82, "learning_rate": 9.535353535353537e-05, "loss": 2.1275, "step": 56000 }, { "epoch": 3.89, "learning_rate": 9.525252525252526e-05, "loss": 2.1234, "step": 57000 }, { "epoch": 3.96, "learning_rate": 9.515151515151515e-05, "loss": 2.1243, "step": 58000 }, { "epoch": 4.03, "learning_rate": 9.505050505050506e-05, "loss": 2.1213, "step": 59000 }, { "epoch": 4.09, "learning_rate": 9.494949494949495e-05, "loss": 2.1085, "step": 60000 }, { "epoch": 4.16, "learning_rate": 9.484848484848486e-05, "loss": 2.0972, "step": 61000 }, { "epoch": 4.23, "learning_rate": 9.474747474747475e-05, "loss": 2.1019, "step": 62000 }, { "epoch": 4.3, "learning_rate": 9.464646464646464e-05, "loss": 2.1029, "step": 63000 }, { "epoch": 4.37, "learning_rate": 9.454545454545455e-05, "loss": 2.0965, "step": 64000 }, { "epoch": 4.44, "learning_rate": 9.444444444444444e-05, "loss": 2.0978, "step": 65000 }, { "epoch": 4.5, "learning_rate": 9.434343434343435e-05, "loss": 2.1097, "step": 66000 }, { "epoch": 4.57, "learning_rate": 9.424242424242424e-05, "loss": 2.0989, "step": 67000 }, { "epoch": 4.64, "learning_rate": 9.414141414141415e-05, "loss": 2.1079, "step": 68000 }, { "epoch": 4.71, "learning_rate": 9.404040404040404e-05, "loss": 2.1017, "step": 69000 }, { "epoch": 4.78, "learning_rate": 9.393939393939395e-05, "loss": 2.1007, "step": 70000 }, { "epoch": 4.85, "learning_rate": 9.383838383838385e-05, "loss": 2.1046, "step": 71000 }, { "epoch": 4.91, "learning_rate": 9.373737373737375e-05, "loss": 2.0981, "step": 72000 }, { "epoch": 4.98, "learning_rate": 9.363636363636364e-05, "loss": 2.1042, "step": 73000 }, { "epoch": 5.05, "learning_rate": 9.353535353535354e-05, "loss": 2.0911, "step": 74000 }, { "epoch": 5.12, "learning_rate": 9.343434343434344e-05, "loss": 2.0847, "step": 75000 }, { "epoch": 5.19, "learning_rate": 9.333333333333334e-05, "loss": 2.0833, "step": 76000 }, { "epoch": 5.25, "learning_rate": 9.323232323232324e-05, "loss": 2.0855, "step": 77000 }, { "epoch": 5.32, "learning_rate": 9.313131313131314e-05, "loss": 2.0893, "step": 78000 }, { "epoch": 5.39, "learning_rate": 9.303030303030303e-05, "loss": 2.0899, "step": 79000 }, { "epoch": 5.46, "learning_rate": 9.292929292929293e-05, "loss": 2.0869, "step": 80000 }, { "epoch": 5.53, "learning_rate": 9.282828282828283e-05, "loss": 2.085, "step": 81000 }, { "epoch": 5.6, "learning_rate": 9.272727272727273e-05, "loss": 2.0878, "step": 82000 }, { "epoch": 5.66, "learning_rate": 9.262626262626263e-05, "loss": 2.0877, "step": 83000 }, { "epoch": 5.73, "learning_rate": 9.252525252525253e-05, "loss": 2.0886, "step": 84000 }, { "epoch": 5.8, "learning_rate": 9.242424242424242e-05, "loss": 2.0939, "step": 85000 }, { "epoch": 5.87, "learning_rate": 9.232323232323232e-05, "loss": 2.0732, "step": 86000 }, { "epoch": 5.94, "learning_rate": 9.222222222222223e-05, "loss": 2.0847, "step": 87000 }, { "epoch": 6.01, "learning_rate": 9.212121212121214e-05, "loss": 2.0904, "step": 88000 }, { "epoch": 6.07, "learning_rate": 9.202020202020203e-05, "loss": 2.0627, "step": 89000 }, { "epoch": 6.14, "learning_rate": 9.191919191919192e-05, "loss": 2.0621, "step": 90000 }, { "epoch": 6.21, "learning_rate": 9.181818181818183e-05, "loss": 2.0659, "step": 91000 }, { "epoch": 6.28, "learning_rate": 9.171717171717172e-05, "loss": 2.067, "step": 92000 }, { "epoch": 6.35, "learning_rate": 9.161616161616163e-05, "loss": 2.0773, "step": 93000 }, { "epoch": 6.42, "learning_rate": 9.151515151515152e-05, "loss": 2.0734, "step": 94000 }, { "epoch": 6.48, "learning_rate": 9.141414141414141e-05, "loss": 2.0703, "step": 95000 }, { "epoch": 6.55, "learning_rate": 9.131313131313132e-05, "loss": 2.074, "step": 96000 }, { "epoch": 6.62, "learning_rate": 9.121212121212121e-05, "loss": 2.0704, "step": 97000 }, { "epoch": 6.69, "learning_rate": 9.111111111111112e-05, "loss": 2.0752, "step": 98000 }, { "epoch": 6.76, "learning_rate": 9.101010101010101e-05, "loss": 2.081, "step": 99000 }, { "epoch": 6.82, "learning_rate": 9.090909090909092e-05, "loss": 2.0663, "step": 100000 }, { "epoch": 6.89, "learning_rate": 9.080808080808081e-05, "loss": 2.08, "step": 101000 }, { "epoch": 6.96, "learning_rate": 9.07070707070707e-05, "loss": 2.0778, "step": 102000 }, { "epoch": 7.03, "learning_rate": 9.060606060606061e-05, "loss": 2.0575, "step": 103000 }, { "epoch": 7.1, "learning_rate": 9.050505050505052e-05, "loss": 2.0668, "step": 104000 }, { "epoch": 7.17, "learning_rate": 9.040404040404041e-05, "loss": 2.0648, "step": 105000 }, { "epoch": 7.23, "learning_rate": 9.030303030303031e-05, "loss": 2.0684, "step": 106000 }, { "epoch": 7.3, "learning_rate": 9.02020202020202e-05, "loss": 2.0587, "step": 107000 }, { "epoch": 7.37, "learning_rate": 9.010101010101011e-05, "loss": 2.0687, "step": 108000 }, { "epoch": 7.44, "learning_rate": 9e-05, "loss": 2.0673, "step": 109000 }, { "epoch": 7.51, "learning_rate": 8.98989898989899e-05, "loss": 2.0664, "step": 110000 }, { "epoch": 7.58, "learning_rate": 8.97979797979798e-05, "loss": 2.0621, "step": 111000 }, { "epoch": 7.64, "learning_rate": 8.96969696969697e-05, "loss": 2.0662, "step": 112000 }, { "epoch": 7.71, "learning_rate": 8.95959595959596e-05, "loss": 2.0629, "step": 113000 }, { "epoch": 7.78, "learning_rate": 8.94949494949495e-05, "loss": 2.067, "step": 114000 }, { "epoch": 7.85, "learning_rate": 8.93939393939394e-05, "loss": 2.0496, "step": 115000 }, { "epoch": 7.92, "learning_rate": 8.92929292929293e-05, "loss": 2.0551, "step": 116000 }, { "epoch": 7.98, "learning_rate": 8.919191919191919e-05, "loss": 2.0644, "step": 117000 }, { "epoch": 8.05, "learning_rate": 8.90909090909091e-05, "loss": 2.0443, "step": 118000 }, { "epoch": 8.12, "learning_rate": 8.898989898989899e-05, "loss": 2.0336, "step": 119000 }, { "epoch": 8.19, "learning_rate": 8.888888888888889e-05, "loss": 2.0566, "step": 120000 }, { "epoch": 8.26, "learning_rate": 8.87878787878788e-05, "loss": 2.0554, "step": 121000 }, { "epoch": 8.33, "learning_rate": 8.868686868686869e-05, "loss": 2.0694, "step": 122000 }, { "epoch": 8.39, "learning_rate": 8.85858585858586e-05, "loss": 2.0965, "step": 123000 }, { "epoch": 8.46, "learning_rate": 8.848484848484849e-05, "loss": 2.0782, "step": 124000 }, { "epoch": 8.53, "learning_rate": 8.83838383838384e-05, "loss": 2.0969, "step": 125000 }, { "epoch": 8.6, "learning_rate": 8.828282828282829e-05, "loss": 2.0904, "step": 126000 }, { "epoch": 8.67, "learning_rate": 8.818181818181818e-05, "loss": 2.0893, "step": 127000 }, { "epoch": 8.74, "learning_rate": 8.808080808080809e-05, "loss": 2.0651, "step": 128000 }, { "epoch": 8.8, "learning_rate": 8.797979797979798e-05, "loss": 2.0593, "step": 129000 }, { "epoch": 8.87, "learning_rate": 8.787878787878789e-05, "loss": 2.0756, "step": 130000 }, { "epoch": 8.94, "learning_rate": 8.777777777777778e-05, "loss": 2.0774, "step": 131000 }, { "epoch": 9.01, "learning_rate": 8.767676767676767e-05, "loss": 2.0747, "step": 132000 }, { "epoch": 9.08, "learning_rate": 8.757575757575758e-05, "loss": 2.0535, "step": 133000 }, { "epoch": 9.14, "learning_rate": 8.747474747474747e-05, "loss": 2.0424, "step": 134000 }, { "epoch": 9.21, "learning_rate": 8.737373737373738e-05, "loss": 2.0593, "step": 135000 }, { "epoch": 9.28, "learning_rate": 8.727272727272727e-05, "loss": 2.0917, "step": 136000 }, { "epoch": 9.35, "learning_rate": 8.717171717171718e-05, "loss": 2.0821, "step": 137000 }, { "epoch": 9.42, "learning_rate": 8.707070707070707e-05, "loss": 2.0826, "step": 138000 }, { "epoch": 9.49, "learning_rate": 8.696969696969698e-05, "loss": 2.0706, "step": 139000 }, { "epoch": 9.55, "learning_rate": 8.686868686868688e-05, "loss": 2.0645, "step": 140000 }, { "epoch": 9.62, "learning_rate": 8.676767676767678e-05, "loss": 2.0742, "step": 141000 }, { "epoch": 9.69, "learning_rate": 8.666666666666667e-05, "loss": 2.0539, "step": 142000 }, { "epoch": 9.76, "learning_rate": 8.656565656565657e-05, "loss": 2.0711, "step": 143000 }, { "epoch": 9.83, "learning_rate": 8.646464646464647e-05, "loss": 2.082, "step": 144000 }, { "epoch": 9.9, "learning_rate": 8.636363636363637e-05, "loss": 2.0967, "step": 145000 }, { "epoch": 9.96, "learning_rate": 8.626262626262627e-05, "loss": 2.0903, "step": 146000 }, { "epoch": 10.03, "learning_rate": 8.616161616161616e-05, "loss": 2.0641, "step": 147000 }, { "epoch": 10.1, "learning_rate": 8.606060606060606e-05, "loss": 2.0476, "step": 148000 }, { "epoch": 10.17, "learning_rate": 8.595959595959596e-05, "loss": 2.0659, "step": 149000 }, { "epoch": 10.24, "learning_rate": 8.585858585858586e-05, "loss": 2.0945, "step": 150000 }, { "epoch": 10.31, "learning_rate": 8.575757575757576e-05, "loss": 2.0726, "step": 151000 }, { "epoch": 10.37, "learning_rate": 8.565656565656566e-05, "loss": 2.0658, "step": 152000 }, { "epoch": 10.44, "learning_rate": 8.555555555555556e-05, "loss": 2.0801, "step": 153000 }, { "epoch": 10.51, "learning_rate": 8.545454545454545e-05, "loss": 2.1605, "step": 154000 }, { "epoch": 10.58, "learning_rate": 8.535353535353535e-05, "loss": 2.1355, "step": 155000 }, { "epoch": 10.65, "learning_rate": 8.525252525252526e-05, "loss": 2.1316, "step": 156000 }, { "epoch": 10.71, "learning_rate": 8.515151515151515e-05, "loss": 2.096, "step": 157000 }, { "epoch": 10.78, "learning_rate": 8.505050505050506e-05, "loss": 2.1297, "step": 158000 }, { "epoch": 10.85, "learning_rate": 8.494949494949495e-05, "loss": 2.1931, "step": 159000 }, { "epoch": 10.92, "learning_rate": 8.484848484848486e-05, "loss": 2.2619, "step": 160000 }, { "epoch": 10.99, "learning_rate": 8.474747474747475e-05, "loss": 2.419, "step": 161000 }, { "epoch": 11.06, "learning_rate": 8.464646464646466e-05, "loss": 2.1826, "step": 162000 }, { "epoch": 11.12, "learning_rate": 8.454545454545455e-05, "loss": 2.1488, "step": 163000 }, { "epoch": 11.19, "learning_rate": 8.444444444444444e-05, "loss": 2.15, "step": 164000 }, { "epoch": 11.26, "learning_rate": 8.434343434343435e-05, "loss": 2.123, "step": 165000 }, { "epoch": 11.33, "learning_rate": 8.424242424242424e-05, "loss": 2.1706, "step": 166000 }, { "epoch": 11.4, "learning_rate": 8.414141414141415e-05, "loss": 2.1742, "step": 167000 }, { "epoch": 11.47, "learning_rate": 8.404040404040404e-05, "loss": 2.1976, "step": 168000 }, { "epoch": 11.53, "learning_rate": 8.393939393939393e-05, "loss": 2.3259, "step": 169000 }, { "epoch": 11.6, "learning_rate": 8.383838383838384e-05, "loss": 2.1211, "step": 170000 }, { "epoch": 11.67, "learning_rate": 8.373737373737373e-05, "loss": 2.1042, "step": 171000 }, { "epoch": 11.74, "learning_rate": 8.363636363636364e-05, "loss": 2.0911, "step": 172000 }, { "epoch": 11.81, "learning_rate": 8.353535353535355e-05, "loss": 2.2741, "step": 173000 }, { "epoch": 11.87, "learning_rate": 8.343434343434344e-05, "loss": 2.5175, "step": 174000 }, { "epoch": 11.94, "learning_rate": 8.333333333333334e-05, "loss": 2.2128, "step": 175000 }, { "epoch": 12.01, "learning_rate": 8.323232323232324e-05, "loss": 2.2981, "step": 176000 }, { "epoch": 12.08, "learning_rate": 8.313131313131314e-05, "loss": 2.3141, "step": 177000 }, { "epoch": 12.15, "learning_rate": 8.303030303030304e-05, "loss": 2.5312, "step": 178000 }, { "epoch": 12.22, "learning_rate": 8.292929292929293e-05, "loss": 2.5787, "step": 179000 }, { "epoch": 12.28, "learning_rate": 8.282828282828283e-05, "loss": 2.5192, "step": 180000 }, { "epoch": 12.35, "learning_rate": 8.272727272727273e-05, "loss": 2.5287, "step": 181000 }, { "epoch": 12.42, "learning_rate": 8.262626262626263e-05, "loss": 2.3686, "step": 182000 }, { "epoch": 12.49, "learning_rate": 8.252525252525253e-05, "loss": 2.4153, "step": 183000 }, { "epoch": 12.56, "learning_rate": 8.242424242424243e-05, "loss": 2.3315, "step": 184000 }, { "epoch": 12.63, "learning_rate": 8.232323232323233e-05, "loss": 2.2995, "step": 185000 }, { "epoch": 12.69, "learning_rate": 8.222222222222222e-05, "loss": 2.2194, "step": 186000 }, { "epoch": 12.76, "learning_rate": 8.212121212121212e-05, "loss": 2.2759, "step": 187000 }, { "epoch": 12.83, "learning_rate": 8.202020202020202e-05, "loss": 2.2116, "step": 188000 }, { "epoch": 12.9, "learning_rate": 8.191919191919192e-05, "loss": 2.1894, "step": 189000 }, { "epoch": 12.97, "learning_rate": 8.181818181818183e-05, "loss": 2.275, "step": 190000 }, { "epoch": 13.03, "learning_rate": 8.171717171717172e-05, "loss": 2.2904, "step": 191000 }, { "epoch": 13.1, "learning_rate": 8.161616161616163e-05, "loss": 2.145, "step": 192000 }, { "epoch": 13.17, "learning_rate": 8.151515151515152e-05, "loss": 2.2841, "step": 193000 }, { "epoch": 13.24, "learning_rate": 8.141414141414141e-05, "loss": 2.2248, "step": 194000 }, { "epoch": 13.31, "learning_rate": 8.131313131313132e-05, "loss": 2.1465, "step": 195000 }, { "epoch": 13.38, "learning_rate": 8.121212121212121e-05, "loss": 2.2754, "step": 196000 }, { "epoch": 13.44, "learning_rate": 8.111111111111112e-05, "loss": 2.1961, "step": 197000 }, { "epoch": 13.51, "learning_rate": 8.101010101010101e-05, "loss": 2.1129, "step": 198000 }, { "epoch": 13.58, "learning_rate": 8.090909090909092e-05, "loss": 2.049, "step": 199000 }, { "epoch": 13.65, "learning_rate": 8.080808080808081e-05, "loss": 2.0999, "step": 200000 }, { "epoch": 13.72, "learning_rate": 8.07070707070707e-05, "loss": 2.0524, "step": 201000 }, { "epoch": 13.79, "learning_rate": 8.060606060606061e-05, "loss": 2.0746, "step": 202000 }, { "epoch": 13.85, "learning_rate": 8.05050505050505e-05, "loss": 2.0608, "step": 203000 }, { "epoch": 13.92, "learning_rate": 8.040404040404041e-05, "loss": 2.0636, "step": 204000 }, { "epoch": 13.99, "learning_rate": 8.03030303030303e-05, "loss": 2.1086, "step": 205000 }, { "epoch": 14.06, "learning_rate": 8.02020202020202e-05, "loss": 2.0534, "step": 206000 }, { "epoch": 14.13, "learning_rate": 8.010101010101011e-05, "loss": 2.0447, "step": 207000 }, { "epoch": 14.2, "learning_rate": 8e-05, "loss": 2.0554, "step": 208000 }, { "epoch": 14.26, "learning_rate": 7.989898989898991e-05, "loss": 2.0521, "step": 209000 }, { "epoch": 14.33, "learning_rate": 7.97979797979798e-05, "loss": 2.0783, "step": 210000 }, { "epoch": 14.4, "learning_rate": 7.96969696969697e-05, "loss": 2.0249, "step": 211000 }, { "epoch": 14.47, "learning_rate": 7.95959595959596e-05, "loss": 2.0453, "step": 212000 }, { "epoch": 14.54, "learning_rate": 7.94949494949495e-05, "loss": 2.0673, "step": 213000 }, { "epoch": 14.6, "learning_rate": 7.93939393939394e-05, "loss": 2.0899, "step": 214000 }, { "epoch": 14.67, "learning_rate": 7.92929292929293e-05, "loss": 2.0947, "step": 215000 }, { "epoch": 14.74, "learning_rate": 7.919191919191919e-05, "loss": 2.1157, "step": 216000 }, { "epoch": 14.81, "learning_rate": 7.90909090909091e-05, "loss": 2.1019, "step": 217000 }, { "epoch": 14.88, "learning_rate": 7.898989898989899e-05, "loss": 2.0353, "step": 218000 }, { "epoch": 14.95, "learning_rate": 7.88888888888889e-05, "loss": 2.0738, "step": 219000 }, { "epoch": 15.01, "learning_rate": 7.878787878787879e-05, "loss": 2.0294, "step": 220000 }, { "epoch": 15.08, "learning_rate": 7.868686868686869e-05, "loss": 2.0545, "step": 221000 }, { "epoch": 15.15, "learning_rate": 7.858585858585859e-05, "loss": 2.0275, "step": 222000 }, { "epoch": 15.22, "learning_rate": 7.848484848484848e-05, "loss": 2.0213, "step": 223000 }, { "epoch": 15.29, "learning_rate": 7.83838383838384e-05, "loss": 2.1165, "step": 224000 }, { "epoch": 15.36, "learning_rate": 7.828282828282829e-05, "loss": 2.2891, "step": 225000 }, { "epoch": 15.42, "learning_rate": 7.818181818181818e-05, "loss": 2.2343, "step": 226000 }, { "epoch": 15.49, "learning_rate": 7.808080808080809e-05, "loss": 2.1072, "step": 227000 }, { "epoch": 15.56, "learning_rate": 7.797979797979798e-05, "loss": 2.0385, "step": 228000 }, { "epoch": 15.63, "learning_rate": 7.787878787878789e-05, "loss": 2.0403, "step": 229000 }, { "epoch": 15.7, "learning_rate": 7.777777777777778e-05, "loss": 2.0622, "step": 230000 }, { "epoch": 15.76, "learning_rate": 7.767676767676769e-05, "loss": 2.064, "step": 231000 }, { "epoch": 15.83, "learning_rate": 7.757575757575758e-05, "loss": 2.0442, "step": 232000 }, { "epoch": 15.9, "learning_rate": 7.747474747474747e-05, "loss": 2.116, "step": 233000 }, { "epoch": 15.97, "learning_rate": 7.737373737373738e-05, "loss": 2.096, "step": 234000 }, { "epoch": 16.04, "learning_rate": 7.727272727272727e-05, "loss": 2.0111, "step": 235000 }, { "epoch": 16.11, "learning_rate": 7.717171717171718e-05, "loss": 2.0181, "step": 236000 }, { "epoch": 16.17, "learning_rate": 7.707070707070707e-05, "loss": 2.0254, "step": 237000 }, { "epoch": 16.24, "learning_rate": 7.696969696969696e-05, "loss": 2.0063, "step": 238000 }, { "epoch": 16.31, "learning_rate": 7.686868686868687e-05, "loss": 2.0242, "step": 239000 }, { "epoch": 16.38, "learning_rate": 7.676767676767676e-05, "loss": 2.0098, "step": 240000 }, { "epoch": 16.45, "learning_rate": 7.666666666666667e-05, "loss": 2.0107, "step": 241000 }, { "epoch": 16.52, "learning_rate": 7.656565656565658e-05, "loss": 1.9859, "step": 242000 }, { "epoch": 16.58, "learning_rate": 7.646464646464647e-05, "loss": 1.9935, "step": 243000 }, { "epoch": 16.65, "learning_rate": 7.636363636363637e-05, "loss": 1.9964, "step": 244000 }, { "epoch": 16.72, "learning_rate": 7.626262626262627e-05, "loss": 2.007, "step": 245000 }, { "epoch": 16.79, "learning_rate": 7.616161616161617e-05, "loss": 2.0081, "step": 246000 }, { "epoch": 16.86, "learning_rate": 7.606060606060607e-05, "loss": 1.9986, "step": 247000 }, { "epoch": 16.92, "learning_rate": 7.595959595959596e-05, "loss": 1.9793, "step": 248000 }, { "epoch": 16.99, "learning_rate": 7.585858585858586e-05, "loss": 1.9847, "step": 249000 }, { "epoch": 17.06, "learning_rate": 7.575757575757576e-05, "loss": 1.9761, "step": 250000 }, { "epoch": 17.13, "learning_rate": 7.565656565656566e-05, "loss": 1.9759, "step": 251000 }, { "epoch": 17.2, "learning_rate": 7.555555555555556e-05, "loss": 1.9798, "step": 252000 }, { "epoch": 17.27, "learning_rate": 7.545454545454545e-05, "loss": 1.9842, "step": 253000 }, { "epoch": 17.33, "learning_rate": 7.535353535353536e-05, "loss": 1.9833, "step": 254000 }, { "epoch": 17.4, "learning_rate": 7.525252525252525e-05, "loss": 1.9705, "step": 255000 }, { "epoch": 17.47, "learning_rate": 7.515151515151515e-05, "loss": 1.9836, "step": 256000 }, { "epoch": 17.54, "learning_rate": 7.505050505050505e-05, "loss": 1.9864, "step": 257000 }, { "epoch": 17.61, "learning_rate": 7.494949494949495e-05, "loss": 1.9785, "step": 258000 }, { "epoch": 17.68, "learning_rate": 7.484848484848486e-05, "loss": 1.9697, "step": 259000 }, { "epoch": 17.74, "learning_rate": 7.474747474747475e-05, "loss": 1.9712, "step": 260000 }, { "epoch": 17.81, "learning_rate": 7.464646464646466e-05, "loss": 1.9776, "step": 261000 }, { "epoch": 17.88, "learning_rate": 7.454545454545455e-05, "loss": 1.9699, "step": 262000 }, { "epoch": 17.95, "learning_rate": 7.444444444444444e-05, "loss": 1.9782, "step": 263000 }, { "epoch": 18.02, "learning_rate": 7.434343434343435e-05, "loss": 1.9702, "step": 264000 }, { "epoch": 18.09, "learning_rate": 7.424242424242424e-05, "loss": 1.9601, "step": 265000 }, { "epoch": 18.15, "learning_rate": 7.414141414141415e-05, "loss": 1.9608, "step": 266000 }, { "epoch": 18.22, "learning_rate": 7.404040404040404e-05, "loss": 1.9589, "step": 267000 }, { "epoch": 18.29, "learning_rate": 7.393939393939395e-05, "loss": 1.9528, "step": 268000 }, { "epoch": 18.36, "learning_rate": 7.383838383838384e-05, "loss": 1.9575, "step": 269000 }, { "epoch": 18.43, "learning_rate": 7.373737373737373e-05, "loss": 1.9953, "step": 270000 }, { "epoch": 18.49, "learning_rate": 7.363636363636364e-05, "loss": 1.9614, "step": 271000 }, { "epoch": 18.56, "learning_rate": 7.353535353535353e-05, "loss": 1.964, "step": 272000 }, { "epoch": 18.63, "learning_rate": 7.343434343434344e-05, "loss": 1.9651, "step": 273000 }, { "epoch": 18.7, "learning_rate": 7.333333333333333e-05, "loss": 1.9599, "step": 274000 }, { "epoch": 18.77, "learning_rate": 7.323232323232324e-05, "loss": 1.9755, "step": 275000 }, { "epoch": 18.84, "learning_rate": 7.313131313131314e-05, "loss": 1.9657, "step": 276000 }, { "epoch": 18.9, "learning_rate": 7.303030303030304e-05, "loss": 1.9657, "step": 277000 }, { "epoch": 18.97, "learning_rate": 7.292929292929293e-05, "loss": 1.9714, "step": 278000 }, { "epoch": 19.04, "learning_rate": 7.282828282828284e-05, "loss": 1.9643, "step": 279000 }, { "epoch": 19.11, "learning_rate": 7.272727272727273e-05, "loss": 1.9539, "step": 280000 }, { "epoch": 19.18, "learning_rate": 7.262626262626263e-05, "loss": 1.9581, "step": 281000 }, { "epoch": 19.25, "learning_rate": 7.252525252525253e-05, "loss": 1.9495, "step": 282000 }, { "epoch": 19.31, "learning_rate": 7.242424242424243e-05, "loss": 1.9487, "step": 283000 }, { "epoch": 19.38, "learning_rate": 7.232323232323233e-05, "loss": 1.9573, "step": 284000 }, { "epoch": 19.45, "learning_rate": 7.222222222222222e-05, "loss": 1.9594, "step": 285000 }, { "epoch": 19.52, "learning_rate": 7.212121212121213e-05, "loss": 1.9636, "step": 286000 }, { "epoch": 19.59, "learning_rate": 7.202020202020202e-05, "loss": 1.9613, "step": 287000 }, { "epoch": 19.65, "learning_rate": 7.191919191919192e-05, "loss": 1.9623, "step": 288000 }, { "epoch": 19.72, "learning_rate": 7.181818181818182e-05, "loss": 1.9493, "step": 289000 }, { "epoch": 19.79, "learning_rate": 7.171717171717171e-05, "loss": 1.9615, "step": 290000 }, { "epoch": 19.86, "learning_rate": 7.161616161616162e-05, "loss": 1.9681, "step": 291000 }, { "epoch": 19.93, "learning_rate": 7.151515151515152e-05, "loss": 1.962, "step": 292000 }, { "epoch": 20.0, "learning_rate": 7.141414141414143e-05, "loss": 1.9621, "step": 293000 }, { "epoch": 20.06, "learning_rate": 7.131313131313132e-05, "loss": 1.9574, "step": 294000 }, { "epoch": 20.13, "learning_rate": 7.121212121212121e-05, "loss": 1.9537, "step": 295000 }, { "epoch": 20.2, "learning_rate": 7.111111111111112e-05, "loss": 1.9443, "step": 296000 }, { "epoch": 20.27, "learning_rate": 7.101010101010101e-05, "loss": 1.9501, "step": 297000 }, { "epoch": 20.34, "learning_rate": 7.090909090909092e-05, "loss": 1.9529, "step": 298000 }, { "epoch": 20.41, "learning_rate": 7.080808080808081e-05, "loss": 1.9457, "step": 299000 }, { "epoch": 20.47, "learning_rate": 7.07070707070707e-05, "loss": 1.9563, "step": 300000 }, { "epoch": 20.54, "learning_rate": 7.060606060606061e-05, "loss": 1.9558, "step": 301000 }, { "epoch": 20.61, "learning_rate": 7.05050505050505e-05, "loss": 1.9429, "step": 302000 }, { "epoch": 20.68, "learning_rate": 7.040404040404041e-05, "loss": 1.9544, "step": 303000 }, { "epoch": 20.75, "learning_rate": 7.03030303030303e-05, "loss": 1.9502, "step": 304000 }, { "epoch": 20.81, "learning_rate": 7.020202020202021e-05, "loss": 1.9402, "step": 305000 }, { "epoch": 20.88, "learning_rate": 7.01010101010101e-05, "loss": 1.9467, "step": 306000 }, { "epoch": 20.95, "learning_rate": 7e-05, "loss": 1.9576, "step": 307000 }, { "epoch": 21.02, "learning_rate": 6.98989898989899e-05, "loss": 1.9497, "step": 308000 }, { "epoch": 21.09, "learning_rate": 6.97979797979798e-05, "loss": 1.9299, "step": 309000 }, { "epoch": 21.16, "learning_rate": 6.96969696969697e-05, "loss": 1.931, "step": 310000 }, { "epoch": 21.22, "learning_rate": 6.95959595959596e-05, "loss": 1.9313, "step": 311000 }, { "epoch": 21.29, "learning_rate": 6.94949494949495e-05, "loss": 1.9371, "step": 312000 }, { "epoch": 21.36, "learning_rate": 6.93939393939394e-05, "loss": 1.9374, "step": 313000 }, { "epoch": 21.43, "learning_rate": 6.92929292929293e-05, "loss": 1.9464, "step": 314000 }, { "epoch": 21.5, "learning_rate": 6.91919191919192e-05, "loss": 1.9375, "step": 315000 }, { "epoch": 21.57, "learning_rate": 6.90909090909091e-05, "loss": 1.9554, "step": 316000 }, { "epoch": 21.63, "learning_rate": 6.898989898989899e-05, "loss": 1.9501, "step": 317000 }, { "epoch": 21.7, "learning_rate": 6.88888888888889e-05, "loss": 1.9453, "step": 318000 }, { "epoch": 21.77, "learning_rate": 6.878787878787879e-05, "loss": 1.948, "step": 319000 }, { "epoch": 21.84, "learning_rate": 6.86868686868687e-05, "loss": 1.9421, "step": 320000 }, { "epoch": 21.91, "learning_rate": 6.858585858585859e-05, "loss": 1.9427, "step": 321000 }, { "epoch": 21.98, "learning_rate": 6.848484848484848e-05, "loss": 1.942, "step": 322000 }, { "epoch": 22.04, "learning_rate": 6.838383838383839e-05, "loss": 1.9254, "step": 323000 }, { "epoch": 22.11, "learning_rate": 6.828282828282828e-05, "loss": 1.9229, "step": 324000 }, { "epoch": 22.18, "learning_rate": 6.818181818181818e-05, "loss": 1.9383, "step": 325000 }, { "epoch": 22.25, "learning_rate": 6.808080808080809e-05, "loss": 1.93, "step": 326000 }, { "epoch": 22.32, "learning_rate": 6.797979797979798e-05, "loss": 1.9283, "step": 327000 }, { "epoch": 22.38, "learning_rate": 6.787878787878789e-05, "loss": 1.9212, "step": 328000 }, { "epoch": 22.45, "learning_rate": 6.777777777777778e-05, "loss": 1.9378, "step": 329000 }, { "epoch": 22.52, "learning_rate": 6.767676767676769e-05, "loss": 1.9261, "step": 330000 }, { "epoch": 22.59, "learning_rate": 6.757575757575758e-05, "loss": 1.9309, "step": 331000 }, { "epoch": 22.66, "learning_rate": 6.747474747474747e-05, "loss": 1.9382, "step": 332000 }, { "epoch": 22.73, "learning_rate": 6.737373737373738e-05, "loss": 1.9318, "step": 333000 }, { "epoch": 22.79, "learning_rate": 6.727272727272727e-05, "loss": 1.9335, "step": 334000 }, { "epoch": 22.86, "learning_rate": 6.717171717171718e-05, "loss": 1.9274, "step": 335000 }, { "epoch": 22.93, "learning_rate": 6.707070707070707e-05, "loss": 1.9596, "step": 336000 }, { "epoch": 23.0, "learning_rate": 6.696969696969696e-05, "loss": 1.958, "step": 337000 }, { "epoch": 23.07, "learning_rate": 6.686868686868687e-05, "loss": 1.9141, "step": 338000 }, { "epoch": 23.14, "learning_rate": 6.676767676767676e-05, "loss": 1.9159, "step": 339000 }, { "epoch": 23.2, "learning_rate": 6.666666666666667e-05, "loss": 1.9139, "step": 340000 }, { "epoch": 23.27, "learning_rate": 6.656565656565656e-05, "loss": 1.9245, "step": 341000 }, { "epoch": 23.34, "learning_rate": 6.646464646464647e-05, "loss": 1.9372, "step": 342000 }, { "epoch": 23.41, "learning_rate": 6.636363636363638e-05, "loss": 1.9581, "step": 343000 }, { "epoch": 23.48, "learning_rate": 6.626262626262627e-05, "loss": 1.9318, "step": 344000 }, { "epoch": 23.54, "learning_rate": 6.616161616161617e-05, "loss": 1.9198, "step": 345000 }, { "epoch": 23.61, "learning_rate": 6.606060606060607e-05, "loss": 1.9282, "step": 346000 }, { "epoch": 23.68, "learning_rate": 6.595959595959596e-05, "loss": 1.9436, "step": 347000 }, { "epoch": 23.75, "learning_rate": 6.585858585858587e-05, "loss": 1.9203, "step": 348000 }, { "epoch": 23.82, "learning_rate": 6.575757575757576e-05, "loss": 1.9366, "step": 349000 }, { "epoch": 23.89, "learning_rate": 6.565656565656566e-05, "loss": 1.9428, "step": 350000 }, { "epoch": 23.95, "learning_rate": 6.555555555555556e-05, "loss": 1.9318, "step": 351000 }, { "epoch": 24.02, "learning_rate": 6.545454545454546e-05, "loss": 1.9355, "step": 352000 }, { "epoch": 24.09, "learning_rate": 6.535353535353536e-05, "loss": 1.9145, "step": 353000 }, { "epoch": 24.16, "learning_rate": 6.525252525252525e-05, "loss": 1.9115, "step": 354000 }, { "epoch": 24.23, "learning_rate": 6.515151515151516e-05, "loss": 1.9213, "step": 355000 }, { "epoch": 24.3, "learning_rate": 6.505050505050505e-05, "loss": 1.9245, "step": 356000 }, { "epoch": 24.36, "learning_rate": 6.494949494949495e-05, "loss": 1.917, "step": 357000 }, { "epoch": 24.43, "learning_rate": 6.484848484848485e-05, "loss": 1.9185, "step": 358000 }, { "epoch": 24.5, "learning_rate": 6.474747474747474e-05, "loss": 1.9142, "step": 359000 }, { "epoch": 24.57, "learning_rate": 6.464646464646466e-05, "loss": 1.9252, "step": 360000 }, { "epoch": 24.64, "learning_rate": 6.454545454545455e-05, "loss": 1.9239, "step": 361000 }, { "epoch": 24.7, "learning_rate": 6.444444444444446e-05, "loss": 1.9295, "step": 362000 }, { "epoch": 24.77, "learning_rate": 6.434343434343435e-05, "loss": 1.9208, "step": 363000 }, { "epoch": 24.84, "learning_rate": 6.424242424242424e-05, "loss": 1.9248, "step": 364000 }, { "epoch": 24.91, "learning_rate": 6.414141414141415e-05, "loss": 1.9252, "step": 365000 }, { "epoch": 24.98, "learning_rate": 6.404040404040404e-05, "loss": 1.9157, "step": 366000 }, { "epoch": 25.05, "learning_rate": 6.393939393939395e-05, "loss": 1.9075, "step": 367000 }, { "epoch": 25.11, "learning_rate": 6.383838383838384e-05, "loss": 1.9123, "step": 368000 }, { "epoch": 25.18, "learning_rate": 6.373737373737373e-05, "loss": 1.9076, "step": 369000 }, { "epoch": 25.25, "learning_rate": 6.363636363636364e-05, "loss": 1.9021, "step": 370000 }, { "epoch": 25.32, "learning_rate": 6.353535353535353e-05, "loss": 1.908, "step": 371000 }, { "epoch": 25.39, "learning_rate": 6.343434343434344e-05, "loss": 1.9114, "step": 372000 }, { "epoch": 25.46, "learning_rate": 6.333333333333333e-05, "loss": 1.9109, "step": 373000 }, { "epoch": 25.52, "learning_rate": 6.323232323232323e-05, "loss": 1.9256, "step": 374000 }, { "epoch": 25.59, "learning_rate": 6.313131313131313e-05, "loss": 1.8997, "step": 375000 }, { "epoch": 25.66, "learning_rate": 6.303030303030302e-05, "loss": 1.9127, "step": 376000 }, { "epoch": 25.73, "learning_rate": 6.292929292929294e-05, "loss": 1.9078, "step": 377000 }, { "epoch": 25.8, "learning_rate": 6.282828282828284e-05, "loss": 1.9061, "step": 378000 }, { "epoch": 25.87, "learning_rate": 6.272727272727273e-05, "loss": 1.9052, "step": 379000 }, { "epoch": 25.93, "learning_rate": 6.262626262626264e-05, "loss": 1.9222, "step": 380000 }, { "epoch": 26.0, "learning_rate": 6.252525252525253e-05, "loss": 1.9265, "step": 381000 }, { "epoch": 26.07, "learning_rate": 6.242424242424243e-05, "loss": 1.8832, "step": 382000 }, { "epoch": 26.14, "learning_rate": 6.232323232323233e-05, "loss": 1.8892, "step": 383000 }, { "epoch": 26.21, "learning_rate": 6.222222222222222e-05, "loss": 1.9029, "step": 384000 }, { "epoch": 26.27, "learning_rate": 6.212121212121213e-05, "loss": 1.8877, "step": 385000 }, { "epoch": 26.34, "learning_rate": 6.202020202020202e-05, "loss": 1.8896, "step": 386000 }, { "epoch": 26.41, "learning_rate": 6.191919191919192e-05, "loss": 1.8923, "step": 387000 }, { "epoch": 26.48, "learning_rate": 6.181818181818182e-05, "loss": 1.893, "step": 388000 }, { "epoch": 26.55, "learning_rate": 6.171717171717172e-05, "loss": 1.8968, "step": 389000 }, { "epoch": 26.62, "learning_rate": 6.161616161616162e-05, "loss": 1.8984, "step": 390000 }, { "epoch": 26.68, "learning_rate": 6.151515151515151e-05, "loss": 1.8957, "step": 391000 }, { "epoch": 26.75, "learning_rate": 6.141414141414142e-05, "loss": 1.9199, "step": 392000 }, { "epoch": 26.82, "learning_rate": 6.131313131313131e-05, "loss": 1.9087, "step": 393000 }, { "epoch": 26.89, "learning_rate": 6.121212121212121e-05, "loss": 1.8965, "step": 394000 }, { "epoch": 26.96, "learning_rate": 6.111111111111112e-05, "loss": 1.9072, "step": 395000 }, { "epoch": 27.03, "learning_rate": 6.101010101010102e-05, "loss": 1.8876, "step": 396000 }, { "epoch": 27.09, "learning_rate": 6.090909090909091e-05, "loss": 1.8765, "step": 397000 }, { "epoch": 27.16, "learning_rate": 6.080808080808081e-05, "loss": 1.8807, "step": 398000 }, { "epoch": 27.23, "learning_rate": 6.070707070707071e-05, "loss": 1.8844, "step": 399000 }, { "epoch": 27.3, "learning_rate": 6.060606060606061e-05, "loss": 1.8854, "step": 400000 }, { "epoch": 27.37, "learning_rate": 6.050505050505051e-05, "loss": 1.8842, "step": 401000 }, { "epoch": 27.43, "learning_rate": 6.040404040404041e-05, "loss": 1.8819, "step": 402000 }, { "epoch": 27.5, "learning_rate": 6.03030303030303e-05, "loss": 1.8845, "step": 403000 }, { "epoch": 27.57, "learning_rate": 6.02020202020202e-05, "loss": 1.8937, "step": 404000 }, { "epoch": 27.64, "learning_rate": 6.01010101010101e-05, "loss": 1.8968, "step": 405000 }, { "epoch": 27.71, "learning_rate": 6e-05, "loss": 1.8977, "step": 406000 }, { "epoch": 27.78, "learning_rate": 5.98989898989899e-05, "loss": 1.9079, "step": 407000 }, { "epoch": 27.84, "learning_rate": 5.97979797979798e-05, "loss": 1.8881, "step": 408000 }, { "epoch": 27.91, "learning_rate": 5.969696969696969e-05, "loss": 1.8946, "step": 409000 }, { "epoch": 27.98, "learning_rate": 5.959595959595959e-05, "loss": 1.8977, "step": 410000 }, { "epoch": 28.05, "learning_rate": 5.949494949494949e-05, "loss": 1.8837, "step": 411000 }, { "epoch": 28.12, "learning_rate": 5.93939393939394e-05, "loss": 1.8735, "step": 412000 }, { "epoch": 28.19, "learning_rate": 5.92929292929293e-05, "loss": 1.8734, "step": 413000 }, { "epoch": 28.25, "learning_rate": 5.91919191919192e-05, "loss": 1.8736, "step": 414000 }, { "epoch": 28.32, "learning_rate": 5.90909090909091e-05, "loss": 1.876, "step": 415000 }, { "epoch": 28.39, "learning_rate": 5.8989898989898996e-05, "loss": 1.8764, "step": 416000 }, { "epoch": 28.46, "learning_rate": 5.8888888888888896e-05, "loss": 1.8784, "step": 417000 }, { "epoch": 28.53, "learning_rate": 5.878787878787879e-05, "loss": 1.8773, "step": 418000 }, { "epoch": 28.59, "learning_rate": 5.868686868686869e-05, "loss": 1.8866, "step": 419000 }, { "epoch": 28.66, "learning_rate": 5.858585858585859e-05, "loss": 1.8784, "step": 420000 }, { "epoch": 28.73, "learning_rate": 5.848484848484849e-05, "loss": 1.8731, "step": 421000 }, { "epoch": 28.8, "learning_rate": 5.8383838383838386e-05, "loss": 1.8827, "step": 422000 }, { "epoch": 28.87, "learning_rate": 5.8282828282828286e-05, "loss": 1.8921, "step": 423000 }, { "epoch": 28.94, "learning_rate": 5.818181818181818e-05, "loss": 1.8861, "step": 424000 }, { "epoch": 29.0, "learning_rate": 5.808080808080808e-05, "loss": 1.8838, "step": 425000 }, { "epoch": 29.07, "learning_rate": 5.797979797979798e-05, "loss": 1.8487, "step": 426000 }, { "epoch": 29.14, "learning_rate": 5.787878787878788e-05, "loss": 1.8684, "step": 427000 }, { "epoch": 29.21, "learning_rate": 5.7777777777777776e-05, "loss": 1.8671, "step": 428000 }, { "epoch": 29.28, "learning_rate": 5.767676767676768e-05, "loss": 1.8728, "step": 429000 }, { "epoch": 29.35, "learning_rate": 5.757575757575758e-05, "loss": 1.8634, "step": 430000 }, { "epoch": 29.41, "learning_rate": 5.747474747474748e-05, "loss": 1.8702, "step": 431000 }, { "epoch": 29.48, "learning_rate": 5.737373737373738e-05, "loss": 1.8626, "step": 432000 }, { "epoch": 29.55, "learning_rate": 5.727272727272728e-05, "loss": 1.8644, "step": 433000 }, { "epoch": 29.62, "learning_rate": 5.717171717171717e-05, "loss": 1.8648, "step": 434000 }, { "epoch": 29.69, "learning_rate": 5.707070707070707e-05, "loss": 1.8812, "step": 435000 }, { "epoch": 29.75, "learning_rate": 5.696969696969697e-05, "loss": 1.8792, "step": 436000 }, { "epoch": 29.82, "learning_rate": 5.686868686868687e-05, "loss": 1.8721, "step": 437000 }, { "epoch": 29.89, "learning_rate": 5.676767676767677e-05, "loss": 1.8738, "step": 438000 }, { "epoch": 29.96, "learning_rate": 5.666666666666667e-05, "loss": 1.8698, "step": 439000 }, { "epoch": 30.03, "learning_rate": 5.6565656565656563e-05, "loss": 1.8668, "step": 440000 }, { "epoch": 30.1, "learning_rate": 5.646464646464646e-05, "loss": 1.8679, "step": 441000 }, { "epoch": 30.16, "learning_rate": 5.636363636363636e-05, "loss": 1.8553, "step": 442000 }, { "epoch": 30.23, "learning_rate": 5.626262626262626e-05, "loss": 1.8619, "step": 443000 }, { "epoch": 30.3, "learning_rate": 5.616161616161616e-05, "loss": 1.8603, "step": 444000 }, { "epoch": 30.37, "learning_rate": 5.606060606060606e-05, "loss": 1.8522, "step": 445000 }, { "epoch": 30.44, "learning_rate": 5.595959595959597e-05, "loss": 1.8555, "step": 446000 }, { "epoch": 30.51, "learning_rate": 5.5858585858585867e-05, "loss": 1.8652, "step": 447000 }, { "epoch": 30.57, "learning_rate": 5.5757575757575766e-05, "loss": 1.869, "step": 448000 }, { "epoch": 30.64, "learning_rate": 5.5656565656565666e-05, "loss": 1.8724, "step": 449000 }, { "epoch": 30.71, "learning_rate": 5.555555555555556e-05, "loss": 1.872, "step": 450000 }, { "epoch": 30.78, "learning_rate": 5.545454545454546e-05, "loss": 1.8835, "step": 451000 }, { "epoch": 30.85, "learning_rate": 5.535353535353536e-05, "loss": 1.8809, "step": 452000 }, { "epoch": 30.92, "learning_rate": 5.525252525252526e-05, "loss": 1.8676, "step": 453000 }, { "epoch": 30.98, "learning_rate": 5.5151515151515156e-05, "loss": 1.8664, "step": 454000 }, { "epoch": 31.05, "learning_rate": 5.5050505050505056e-05, "loss": 1.8574, "step": 455000 }, { "epoch": 31.12, "learning_rate": 5.494949494949495e-05, "loss": 1.8459, "step": 456000 }, { "epoch": 31.19, "learning_rate": 5.484848484848485e-05, "loss": 1.8499, "step": 457000 }, { "epoch": 31.26, "learning_rate": 5.474747474747475e-05, "loss": 1.8599, "step": 458000 }, { "epoch": 31.32, "learning_rate": 5.464646464646465e-05, "loss": 1.8599, "step": 459000 }, { "epoch": 31.39, "learning_rate": 5.4545454545454546e-05, "loss": 1.8497, "step": 460000 }, { "epoch": 31.46, "learning_rate": 5.4444444444444446e-05, "loss": 1.8446, "step": 461000 }, { "epoch": 31.53, "learning_rate": 5.434343434343434e-05, "loss": 1.8597, "step": 462000 }, { "epoch": 31.6, "learning_rate": 5.424242424242425e-05, "loss": 1.8566, "step": 463000 }, { "epoch": 31.67, "learning_rate": 5.414141414141415e-05, "loss": 1.8522, "step": 464000 }, { "epoch": 31.73, "learning_rate": 5.4040404040404044e-05, "loss": 1.8561, "step": 465000 }, { "epoch": 31.8, "learning_rate": 5.393939393939394e-05, "loss": 1.8611, "step": 466000 }, { "epoch": 31.87, "learning_rate": 5.383838383838384e-05, "loss": 1.8597, "step": 467000 }, { "epoch": 31.94, "learning_rate": 5.373737373737374e-05, "loss": 1.8595, "step": 468000 }, { "epoch": 32.01, "learning_rate": 5.363636363636364e-05, "loss": 1.8521, "step": 469000 }, { "epoch": 32.08, "learning_rate": 5.353535353535354e-05, "loss": 1.8372, "step": 470000 }, { "epoch": 32.14, "learning_rate": 5.3434343434343434e-05, "loss": 1.8355, "step": 471000 }, { "epoch": 32.21, "learning_rate": 5.333333333333333e-05, "loss": 1.8346, "step": 472000 }, { "epoch": 32.28, "learning_rate": 5.323232323232323e-05, "loss": 1.8356, "step": 473000 }, { "epoch": 32.35, "learning_rate": 5.313131313131313e-05, "loss": 1.8456, "step": 474000 }, { "epoch": 32.42, "learning_rate": 5.303030303030303e-05, "loss": 1.8486, "step": 475000 }, { "epoch": 32.48, "learning_rate": 5.292929292929293e-05, "loss": 1.8491, "step": 476000 }, { "epoch": 32.55, "learning_rate": 5.2828282828282824e-05, "loss": 1.8537, "step": 477000 }, { "epoch": 32.62, "learning_rate": 5.272727272727272e-05, "loss": 1.8467, "step": 478000 }, { "epoch": 32.69, "learning_rate": 5.262626262626262e-05, "loss": 1.8423, "step": 479000 }, { "epoch": 32.76, "learning_rate": 5.2525252525252536e-05, "loss": 1.8391, "step": 480000 }, { "epoch": 32.83, "learning_rate": 5.242424242424243e-05, "loss": 1.8486, "step": 481000 }, { "epoch": 32.89, "learning_rate": 5.232323232323233e-05, "loss": 1.8546, "step": 482000 }, { "epoch": 32.96, "learning_rate": 5.222222222222223e-05, "loss": 1.8524, "step": 483000 }, { "epoch": 33.03, "learning_rate": 5.212121212121213e-05, "loss": 1.8406, "step": 484000 }, { "epoch": 33.1, "learning_rate": 5.2020202020202026e-05, "loss": 1.8378, "step": 485000 }, { "epoch": 33.17, "learning_rate": 5.1919191919191926e-05, "loss": 1.8405, "step": 486000 }, { "epoch": 33.24, "learning_rate": 5.181818181818182e-05, "loss": 1.8288, "step": 487000 }, { "epoch": 33.3, "learning_rate": 5.171717171717172e-05, "loss": 1.8366, "step": 488000 }, { "epoch": 33.37, "learning_rate": 5.161616161616162e-05, "loss": 1.8326, "step": 489000 }, { "epoch": 33.44, "learning_rate": 5.151515151515152e-05, "loss": 1.8352, "step": 490000 }, { "epoch": 33.51, "learning_rate": 5.1414141414141416e-05, "loss": 1.8486, "step": 491000 }, { "epoch": 33.58, "learning_rate": 5.1313131313131316e-05, "loss": 1.8439, "step": 492000 }, { "epoch": 33.64, "learning_rate": 5.121212121212121e-05, "loss": 1.835, "step": 493000 }, { "epoch": 33.71, "learning_rate": 5.111111111111111e-05, "loss": 1.8384, "step": 494000 }, { "epoch": 33.78, "learning_rate": 5.101010101010101e-05, "loss": 1.8336, "step": 495000 }, { "epoch": 33.85, "learning_rate": 5.090909090909091e-05, "loss": 1.8417, "step": 496000 }, { "epoch": 33.92, "learning_rate": 5.080808080808081e-05, "loss": 1.833, "step": 497000 }, { "epoch": 33.99, "learning_rate": 5.070707070707071e-05, "loss": 1.8349, "step": 498000 }, { "epoch": 34.05, "learning_rate": 5.060606060606061e-05, "loss": 1.8225, "step": 499000 }, { "epoch": 34.12, "learning_rate": 5.050505050505051e-05, "loss": 1.8171, "step": 500000 }, { "epoch": 34.19, "learning_rate": 5.040404040404041e-05, "loss": 1.8255, "step": 501000 }, { "epoch": 34.26, "learning_rate": 5.030303030303031e-05, "loss": 1.8315, "step": 502000 }, { "epoch": 34.33, "learning_rate": 5.0202020202020203e-05, "loss": 1.8147, "step": 503000 }, { "epoch": 34.4, "learning_rate": 5.01010101010101e-05, "loss": 1.8238, "step": 504000 }, { "epoch": 34.46, "learning_rate": 5e-05, "loss": 1.8204, "step": 505000 }, { "epoch": 34.53, "learning_rate": 4.98989898989899e-05, "loss": 1.8317, "step": 506000 }, { "epoch": 34.6, "learning_rate": 4.97979797979798e-05, "loss": 1.827, "step": 507000 }, { "epoch": 34.67, "learning_rate": 4.9696969696969694e-05, "loss": 1.8251, "step": 508000 }, { "epoch": 34.74, "learning_rate": 4.9595959595959594e-05, "loss": 1.8268, "step": 509000 }, { "epoch": 34.81, "learning_rate": 4.94949494949495e-05, "loss": 1.8315, "step": 510000 }, { "epoch": 34.87, "learning_rate": 4.93939393939394e-05, "loss": 1.8233, "step": 511000 }, { "epoch": 34.94, "learning_rate": 4.92929292929293e-05, "loss": 1.828, "step": 512000 }, { "epoch": 35.01, "learning_rate": 4.919191919191919e-05, "loss": 1.8284, "step": 513000 }, { "epoch": 35.08, "learning_rate": 4.909090909090909e-05, "loss": 1.8055, "step": 514000 }, { "epoch": 35.15, "learning_rate": 4.898989898989899e-05, "loss": 1.8103, "step": 515000 }, { "epoch": 35.21, "learning_rate": 4.888888888888889e-05, "loss": 1.8142, "step": 516000 }, { "epoch": 35.28, "learning_rate": 4.878787878787879e-05, "loss": 1.8117, "step": 517000 }, { "epoch": 35.35, "learning_rate": 4.868686868686869e-05, "loss": 1.8103, "step": 518000 }, { "epoch": 35.42, "learning_rate": 4.858585858585859e-05, "loss": 1.8143, "step": 519000 }, { "epoch": 35.49, "learning_rate": 4.848484848484849e-05, "loss": 1.8213, "step": 520000 }, { "epoch": 35.56, "learning_rate": 4.838383838383839e-05, "loss": 1.8182, "step": 521000 }, { "epoch": 35.62, "learning_rate": 4.828282828282829e-05, "loss": 1.8203, "step": 522000 }, { "epoch": 35.69, "learning_rate": 4.8181818181818186e-05, "loss": 1.8281, "step": 523000 }, { "epoch": 35.76, "learning_rate": 4.808080808080808e-05, "loss": 1.822, "step": 524000 }, { "epoch": 35.83, "learning_rate": 4.797979797979798e-05, "loss": 1.8278, "step": 525000 }, { "epoch": 35.9, "learning_rate": 4.787878787878788e-05, "loss": 1.8325, "step": 526000 }, { "epoch": 35.97, "learning_rate": 4.7777777777777784e-05, "loss": 1.8316, "step": 527000 }, { "epoch": 36.03, "learning_rate": 4.7676767676767684e-05, "loss": 1.8289, "step": 528000 }, { "epoch": 36.1, "learning_rate": 4.7575757575757576e-05, "loss": 1.8067, "step": 529000 }, { "epoch": 36.17, "learning_rate": 4.7474747474747476e-05, "loss": 1.807, "step": 530000 }, { "epoch": 36.24, "learning_rate": 4.7373737373737375e-05, "loss": 1.806, "step": 531000 }, { "epoch": 36.31, "learning_rate": 4.7272727272727275e-05, "loss": 1.8101, "step": 532000 }, { "epoch": 36.37, "learning_rate": 4.7171717171717174e-05, "loss": 1.8131, "step": 533000 }, { "epoch": 36.44, "learning_rate": 4.7070707070707074e-05, "loss": 1.8, "step": 534000 }, { "epoch": 36.51, "learning_rate": 4.696969696969697e-05, "loss": 1.812, "step": 535000 }, { "epoch": 36.58, "learning_rate": 4.686868686868687e-05, "loss": 1.816, "step": 536000 }, { "epoch": 36.65, "learning_rate": 4.676767676767677e-05, "loss": 1.8185, "step": 537000 }, { "epoch": 36.72, "learning_rate": 4.666666666666667e-05, "loss": 1.8251, "step": 538000 }, { "epoch": 36.78, "learning_rate": 4.656565656565657e-05, "loss": 1.8209, "step": 539000 }, { "epoch": 36.85, "learning_rate": 4.6464646464646464e-05, "loss": 1.8106, "step": 540000 }, { "epoch": 36.92, "learning_rate": 4.636363636363636e-05, "loss": 1.8164, "step": 541000 }, { "epoch": 36.99, "learning_rate": 4.626262626262626e-05, "loss": 1.8069, "step": 542000 }, { "epoch": 37.06, "learning_rate": 4.616161616161616e-05, "loss": 1.8001, "step": 543000 }, { "epoch": 37.13, "learning_rate": 4.606060606060607e-05, "loss": 1.8007, "step": 544000 }, { "epoch": 37.19, "learning_rate": 4.595959595959596e-05, "loss": 1.7957, "step": 545000 }, { "epoch": 37.26, "learning_rate": 4.585858585858586e-05, "loss": 1.7947, "step": 546000 }, { "epoch": 37.33, "learning_rate": 4.575757575757576e-05, "loss": 1.7996, "step": 547000 }, { "epoch": 37.4, "learning_rate": 4.565656565656566e-05, "loss": 1.8067, "step": 548000 }, { "epoch": 37.47, "learning_rate": 4.555555555555556e-05, "loss": 1.7941, "step": 549000 }, { "epoch": 37.53, "learning_rate": 4.545454545454546e-05, "loss": 1.8103, "step": 550000 }, { "epoch": 37.6, "learning_rate": 4.535353535353535e-05, "loss": 1.8167, "step": 551000 }, { "epoch": 37.67, "learning_rate": 4.525252525252526e-05, "loss": 1.7929, "step": 552000 }, { "epoch": 37.74, "learning_rate": 4.515151515151516e-05, "loss": 1.8093, "step": 553000 }, { "epoch": 37.81, "learning_rate": 4.5050505050505056e-05, "loss": 1.7967, "step": 554000 }, { "epoch": 37.88, "learning_rate": 4.494949494949495e-05, "loss": 1.8117, "step": 555000 }, { "epoch": 37.94, "learning_rate": 4.484848484848485e-05, "loss": 1.8075, "step": 556000 }, { "epoch": 38.01, "learning_rate": 4.474747474747475e-05, "loss": 1.8045, "step": 557000 }, { "epoch": 38.08, "learning_rate": 4.464646464646465e-05, "loss": 1.7865, "step": 558000 }, { "epoch": 38.15, "learning_rate": 4.454545454545455e-05, "loss": 1.7938, "step": 559000 }, { "epoch": 38.22, "learning_rate": 4.4444444444444447e-05, "loss": 1.7902, "step": 560000 }, { "epoch": 38.29, "learning_rate": 4.4343434343434346e-05, "loss": 1.7901, "step": 561000 }, { "epoch": 38.35, "learning_rate": 4.4242424242424246e-05, "loss": 1.7913, "step": 562000 }, { "epoch": 38.42, "learning_rate": 4.4141414141414145e-05, "loss": 1.7926, "step": 563000 }, { "epoch": 38.49, "learning_rate": 4.4040404040404044e-05, "loss": 1.7879, "step": 564000 }, { "epoch": 38.56, "learning_rate": 4.3939393939393944e-05, "loss": 1.8, "step": 565000 }, { "epoch": 38.63, "learning_rate": 4.383838383838384e-05, "loss": 1.7938, "step": 566000 }, { "epoch": 38.7, "learning_rate": 4.3737373737373736e-05, "loss": 1.8053, "step": 567000 }, { "epoch": 38.76, "learning_rate": 4.3636363636363636e-05, "loss": 1.7921, "step": 568000 }, { "epoch": 38.83, "learning_rate": 4.3535353535353535e-05, "loss": 1.7817, "step": 569000 }, { "epoch": 38.9, "learning_rate": 4.343434343434344e-05, "loss": 1.7984, "step": 570000 }, { "epoch": 38.97, "learning_rate": 4.3333333333333334e-05, "loss": 1.7954, "step": 571000 }, { "epoch": 39.04, "learning_rate": 4.3232323232323234e-05, "loss": 1.7833, "step": 572000 }, { "epoch": 39.1, "learning_rate": 4.313131313131313e-05, "loss": 1.7776, "step": 573000 }, { "epoch": 39.17, "learning_rate": 4.303030303030303e-05, "loss": 1.7852, "step": 574000 }, { "epoch": 39.24, "learning_rate": 4.292929292929293e-05, "loss": 1.7951, "step": 575000 }, { "epoch": 39.31, "learning_rate": 4.282828282828283e-05, "loss": 1.7839, "step": 576000 }, { "epoch": 39.38, "learning_rate": 4.2727272727272724e-05, "loss": 1.7821, "step": 577000 }, { "epoch": 39.45, "learning_rate": 4.262626262626263e-05, "loss": 1.783, "step": 578000 }, { "epoch": 39.51, "learning_rate": 4.252525252525253e-05, "loss": 1.7816, "step": 579000 }, { "epoch": 39.58, "learning_rate": 4.242424242424243e-05, "loss": 1.7924, "step": 580000 }, { "epoch": 39.65, "learning_rate": 4.232323232323233e-05, "loss": 1.7834, "step": 581000 }, { "epoch": 39.72, "learning_rate": 4.222222222222222e-05, "loss": 1.7972, "step": 582000 }, { "epoch": 39.79, "learning_rate": 4.212121212121212e-05, "loss": 1.7938, "step": 583000 }, { "epoch": 39.86, "learning_rate": 4.202020202020202e-05, "loss": 1.7803, "step": 584000 }, { "epoch": 39.92, "learning_rate": 4.191919191919192e-05, "loss": 1.7924, "step": 585000 }, { "epoch": 39.99, "learning_rate": 4.181818181818182e-05, "loss": 1.7996, "step": 586000 }, { "epoch": 40.06, "learning_rate": 4.171717171717172e-05, "loss": 1.77, "step": 587000 }, { "epoch": 40.13, "learning_rate": 4.161616161616162e-05, "loss": 1.7698, "step": 588000 }, { "epoch": 40.2, "learning_rate": 4.151515151515152e-05, "loss": 1.7667, "step": 589000 }, { "epoch": 40.26, "learning_rate": 4.141414141414142e-05, "loss": 1.7716, "step": 590000 }, { "epoch": 40.33, "learning_rate": 4.131313131313132e-05, "loss": 1.7766, "step": 591000 }, { "epoch": 40.4, "learning_rate": 4.1212121212121216e-05, "loss": 1.7693, "step": 592000 }, { "epoch": 40.47, "learning_rate": 4.111111111111111e-05, "loss": 1.777, "step": 593000 }, { "epoch": 40.54, "learning_rate": 4.101010101010101e-05, "loss": 1.7927, "step": 594000 }, { "epoch": 40.61, "learning_rate": 4.0909090909090915e-05, "loss": 1.7779, "step": 595000 }, { "epoch": 40.67, "learning_rate": 4.0808080808080814e-05, "loss": 1.7847, "step": 596000 }, { "epoch": 40.74, "learning_rate": 4.070707070707071e-05, "loss": 1.7704, "step": 597000 }, { "epoch": 40.81, "learning_rate": 4.0606060606060606e-05, "loss": 1.7834, "step": 598000 }, { "epoch": 40.88, "learning_rate": 4.0505050505050506e-05, "loss": 1.7867, "step": 599000 }, { "epoch": 40.95, "learning_rate": 4.0404040404040405e-05, "loss": 1.774, "step": 600000 }, { "epoch": 41.02, "learning_rate": 4.0303030303030305e-05, "loss": 1.7701, "step": 601000 }, { "epoch": 41.08, "learning_rate": 4.0202020202020204e-05, "loss": 1.7565, "step": 602000 }, { "epoch": 41.15, "learning_rate": 4.01010101010101e-05, "loss": 1.7608, "step": 603000 }, { "epoch": 41.22, "learning_rate": 4e-05, "loss": 1.762, "step": 604000 }, { "epoch": 41.29, "learning_rate": 3.98989898989899e-05, "loss": 1.7698, "step": 605000 }, { "epoch": 41.36, "learning_rate": 3.97979797979798e-05, "loss": 1.7702, "step": 606000 }, { "epoch": 41.42, "learning_rate": 3.96969696969697e-05, "loss": 1.7616, "step": 607000 }, { "epoch": 41.49, "learning_rate": 3.9595959595959594e-05, "loss": 1.7687, "step": 608000 }, { "epoch": 41.56, "learning_rate": 3.9494949494949494e-05, "loss": 1.7729, "step": 609000 }, { "epoch": 41.63, "learning_rate": 3.939393939393939e-05, "loss": 1.7644, "step": 610000 }, { "epoch": 41.7, "learning_rate": 3.929292929292929e-05, "loss": 1.7657, "step": 611000 }, { "epoch": 41.77, "learning_rate": 3.91919191919192e-05, "loss": 1.7771, "step": 612000 }, { "epoch": 41.83, "learning_rate": 3.909090909090909e-05, "loss": 1.7604, "step": 613000 }, { "epoch": 41.9, "learning_rate": 3.898989898989899e-05, "loss": 1.7766, "step": 614000 }, { "epoch": 41.97, "learning_rate": 3.888888888888889e-05, "loss": 1.7751, "step": 615000 }, { "epoch": 42.04, "learning_rate": 3.878787878787879e-05, "loss": 1.7577, "step": 616000 }, { "epoch": 42.11, "learning_rate": 3.868686868686869e-05, "loss": 1.7575, "step": 617000 }, { "epoch": 42.18, "learning_rate": 3.858585858585859e-05, "loss": 1.746, "step": 618000 } ], "max_steps": 1000000, "num_train_epochs": 69, "total_flos": 7.731601160277059e+17, "trial_name": null, "trial_params": null }