{ "best_metric": 2.4961869532998874e-13, "best_model_checkpoint": "./checkpoint-1000", "epoch": 2.999922845459455, "global_step": 9720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0, "loss": 11.5079, "step": 1 }, { "epoch": 0.0, "learning_rate": 0.0, "loss": 11.1203, "step": 2 }, { "epoch": 0.0, "learning_rate": 0.0, "loss": 10.6958, "step": 3 }, { "epoch": 0.0, "learning_rate": 0.0, "loss": 11.2693, "step": 4 }, { "epoch": 0.0, "learning_rate": 0.0, "loss": 11.6199, "step": 5 }, { "epoch": 0.0, "learning_rate": 6.569998523345044e-07, "loss": 10.6054, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.3139997046690089e-06, "loss": 10.9386, "step": 7 }, { "epoch": 0.0, "learning_rate": 1.3139997046690089e-06, "loss": 10.9872, "step": 8 }, { "epoch": 0.0, "learning_rate": 1.9709995570035133e-06, "loss": 10.8356, "step": 9 }, { "epoch": 0.0, "learning_rate": 2.6279994093380178e-06, "loss": 11.1359, "step": 10 }, { "epoch": 0.0, "learning_rate": 3.284999261672522e-06, "loss": 10.0616, "step": 11 }, { "epoch": 0.0, "learning_rate": 3.941999114007027e-06, "loss": 9.5432, "step": 12 }, { "epoch": 0.0, "learning_rate": 4.598998966341531e-06, "loss": 9.1687, "step": 13 }, { "epoch": 0.0, "learning_rate": 5.2559988186760355e-06, "loss": 9.1152, "step": 14 }, { "epoch": 0.0, "learning_rate": 5.9129986710105395e-06, "loss": 8.438, "step": 15 }, { "epoch": 0.0, "learning_rate": 6.569998523345044e-06, "loss": 8.3793, "step": 16 }, { "epoch": 0.01, "learning_rate": 7.2269983756795484e-06, "loss": 7.4295, "step": 17 }, { "epoch": 0.01, "learning_rate": 7.883998228014053e-06, "loss": 7.3944, "step": 18 }, { "epoch": 0.01, "learning_rate": 8.540998080348557e-06, "loss": 6.8893, "step": 19 }, { "epoch": 0.01, "learning_rate": 9.197997932683061e-06, "loss": 6.8123, "step": 20 }, { "epoch": 0.01, "learning_rate": 9.854997785017565e-06, "loss": 6.2127, "step": 21 }, { "epoch": 0.01, "learning_rate": 1.0511997637352071e-05, "loss": 6.5409, "step": 22 }, { "epoch": 0.01, "learning_rate": 1.1168997489686577e-05, "loss": 5.9446, "step": 23 }, { "epoch": 0.01, "learning_rate": 1.1825997342021079e-05, "loss": 5.9737, "step": 24 }, { "epoch": 0.01, "learning_rate": 1.2482997194355583e-05, "loss": 5.7396, "step": 25 }, { "epoch": 0.01, "learning_rate": 1.3139997046690089e-05, "loss": 5.4264, "step": 26 }, { "epoch": 0.01, "learning_rate": 1.3796996899024595e-05, "loss": 5.2598, "step": 27 }, { "epoch": 0.01, "learning_rate": 1.4453996751359097e-05, "loss": 5.4661, "step": 28 }, { "epoch": 0.01, "learning_rate": 1.5110996603693601e-05, "loss": 4.965, "step": 29 }, { "epoch": 0.01, "learning_rate": 1.5767996456028107e-05, "loss": 5.1495, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.642499630836261e-05, "loss": 5.2457, "step": 31 }, { "epoch": 0.01, "learning_rate": 1.7081996160697115e-05, "loss": 4.7293, "step": 32 }, { "epoch": 0.01, "learning_rate": 1.773899601303162e-05, "loss": 4.7455, "step": 33 }, { "epoch": 0.01, "learning_rate": 1.8395995865366123e-05, "loss": 4.8807, "step": 34 }, { "epoch": 0.01, "learning_rate": 1.905299571770063e-05, "loss": 5.0133, "step": 35 }, { "epoch": 0.01, "learning_rate": 1.970999557003513e-05, "loss": 4.5158, "step": 36 }, { "epoch": 0.01, "learning_rate": 2.0366995422369638e-05, "loss": 4.6027, "step": 37 }, { "epoch": 0.01, "learning_rate": 2.1023995274704142e-05, "loss": 4.5853, "step": 38 }, { "epoch": 0.01, "learning_rate": 2.1680995127038646e-05, "loss": 4.5911, "step": 39 }, { "epoch": 0.01, "learning_rate": 2.2337994979373154e-05, "loss": 4.432, "step": 40 }, { "epoch": 0.01, "learning_rate": 2.2994994831707658e-05, "loss": 4.1745, "step": 41 }, { "epoch": 0.01, "learning_rate": 2.3651994684042158e-05, "loss": 4.3357, "step": 42 }, { "epoch": 0.01, "learning_rate": 2.4308994536376662e-05, "loss": 4.1965, "step": 43 }, { "epoch": 0.01, "learning_rate": 2.4965994388711166e-05, "loss": 4.11, "step": 44 }, { "epoch": 0.01, "learning_rate": 2.5622994241045674e-05, "loss": 4.0005, "step": 45 }, { "epoch": 0.01, "learning_rate": 2.6279994093380178e-05, "loss": 3.9979, "step": 46 }, { "epoch": 0.01, "learning_rate": 2.693699394571468e-05, "loss": 3.808, "step": 47 }, { "epoch": 0.01, "learning_rate": 2.759399379804919e-05, "loss": 3.5335, "step": 48 }, { "epoch": 0.02, "learning_rate": 2.8250993650383686e-05, "loss": 3.2427, "step": 49 }, { "epoch": 0.02, "learning_rate": 2.8907993502718194e-05, "loss": 3.2449, "step": 50 }, { "epoch": 0.02, "learning_rate": 2.9564993355052698e-05, "loss": 6.2136, "step": 51 }, { "epoch": 0.02, "learning_rate": 3.0221993207387202e-05, "loss": 5.8481, "step": 52 }, { "epoch": 0.02, "learning_rate": 3.0878993059721706e-05, "loss": 5.5996, "step": 53 }, { "epoch": 0.02, "learning_rate": 3.153599291205621e-05, "loss": 5.5013, "step": 54 }, { "epoch": 0.02, "learning_rate": 3.219299276439072e-05, "loss": 5.3008, "step": 55 }, { "epoch": 0.02, "learning_rate": 3.284999261672522e-05, "loss": 5.3777, "step": 56 }, { "epoch": 0.02, "learning_rate": 3.350699246905972e-05, "loss": 5.0772, "step": 57 }, { "epoch": 0.02, "learning_rate": 3.416399232139423e-05, "loss": 4.8636, "step": 58 }, { "epoch": 0.02, "learning_rate": 3.482099217372874e-05, "loss": 4.8621, "step": 59 }, { "epoch": 0.02, "learning_rate": 3.547799202606324e-05, "loss": 4.7417, "step": 60 }, { "epoch": 0.02, "learning_rate": 3.6134991878397745e-05, "loss": 4.8393, "step": 61 }, { "epoch": 0.02, "learning_rate": 3.6791991730732245e-05, "loss": 4.6815, "step": 62 }, { "epoch": 0.02, "learning_rate": 3.744899158306675e-05, "loss": 4.8777, "step": 63 }, { "epoch": 0.02, "learning_rate": 3.810599143540126e-05, "loss": 4.8993, "step": 64 }, { "epoch": 0.02, "learning_rate": 3.876299128773576e-05, "loss": 4.6594, "step": 65 }, { "epoch": 0.02, "learning_rate": 3.941999114007026e-05, "loss": 4.6317, "step": 66 }, { "epoch": 0.02, "learning_rate": 4.007699099240477e-05, "loss": 4.5564, "step": 67 }, { "epoch": 0.02, "learning_rate": 4.0733990844739276e-05, "loss": 4.5359, "step": 68 }, { "epoch": 0.02, "learning_rate": 4.139099069707378e-05, "loss": 4.6462, "step": 69 }, { "epoch": 0.02, "learning_rate": 4.2047990549408284e-05, "loss": 4.5479, "step": 70 }, { "epoch": 0.02, "learning_rate": 4.270499040174279e-05, "loss": 4.6167, "step": 71 }, { "epoch": 0.02, "learning_rate": 4.336199025407729e-05, "loss": 4.4873, "step": 72 }, { "epoch": 0.02, "learning_rate": 4.40189901064118e-05, "loss": 4.5333, "step": 73 }, { "epoch": 0.02, "learning_rate": 4.467598995874631e-05, "loss": 4.4942, "step": 74 }, { "epoch": 0.02, "learning_rate": 4.533298981108081e-05, "loss": 4.5229, "step": 75 }, { "epoch": 0.02, "learning_rate": 4.5989989663415315e-05, "loss": 4.2522, "step": 76 }, { "epoch": 0.02, "learning_rate": 4.664698951574981e-05, "loss": 4.4598, "step": 77 }, { "epoch": 0.02, "learning_rate": 4.7303989368084316e-05, "loss": 4.3976, "step": 78 }, { "epoch": 0.02, "learning_rate": 4.796098922041882e-05, "loss": 4.3418, "step": 79 }, { "epoch": 0.02, "learning_rate": 4.8617989072753324e-05, "loss": 4.5355, "step": 80 }, { "epoch": 0.02, "learning_rate": 4.927498892508783e-05, "loss": 4.1474, "step": 81 }, { "epoch": 0.03, "learning_rate": 4.993198877742233e-05, "loss": 4.3487, "step": 82 }, { "epoch": 0.03, "learning_rate": 5.058898862975684e-05, "loss": 4.2385, "step": 83 }, { "epoch": 0.03, "learning_rate": 5.124598848209135e-05, "loss": 3.9705, "step": 84 }, { "epoch": 0.03, "learning_rate": 5.190298833442585e-05, "loss": 4.0977, "step": 85 }, { "epoch": 0.03, "learning_rate": 5.2559988186760355e-05, "loss": 4.3691, "step": 86 }, { "epoch": 0.03, "learning_rate": 5.321698803909486e-05, "loss": 4.2919, "step": 87 }, { "epoch": 0.03, "learning_rate": 5.387398789142936e-05, "loss": 4.1494, "step": 88 }, { "epoch": 0.03, "learning_rate": 5.453098774376387e-05, "loss": 4.0668, "step": 89 }, { "epoch": 0.03, "learning_rate": 5.518798759609838e-05, "loss": 4.1362, "step": 90 }, { "epoch": 0.03, "learning_rate": 5.584498744843288e-05, "loss": 4.0048, "step": 91 }, { "epoch": 0.03, "learning_rate": 5.650198730076737e-05, "loss": 4.0575, "step": 92 }, { "epoch": 0.03, "learning_rate": 5.715898715310188e-05, "loss": 3.9414, "step": 93 }, { "epoch": 0.03, "learning_rate": 5.781598700543639e-05, "loss": 3.5729, "step": 94 }, { "epoch": 0.03, "learning_rate": 5.847298685777089e-05, "loss": 3.8777, "step": 95 }, { "epoch": 0.03, "learning_rate": 5.9129986710105395e-05, "loss": 3.4894, "step": 96 }, { "epoch": 0.03, "learning_rate": 5.97869865624399e-05, "loss": 3.2646, "step": 97 }, { "epoch": 0.03, "learning_rate": 6.0443986414774403e-05, "loss": 3.1012, "step": 98 }, { "epoch": 0.03, "learning_rate": 6.110098626710891e-05, "loss": 3.3252, "step": 99 }, { "epoch": 0.03, "learning_rate": 6.175798611944341e-05, "loss": 3.0965, "step": 100 }, { "epoch": 0.03, "learning_rate": 6.241498597177793e-05, "loss": 6.1741, "step": 101 }, { "epoch": 0.03, "learning_rate": 6.307198582411243e-05, "loss": 5.7244, "step": 102 }, { "epoch": 0.03, "learning_rate": 6.372898567644693e-05, "loss": 5.2937, "step": 103 }, { "epoch": 0.03, "learning_rate": 6.438598552878144e-05, "loss": 4.9471, "step": 104 }, { "epoch": 0.03, "learning_rate": 6.504298538111594e-05, "loss": 4.8998, "step": 105 }, { "epoch": 0.03, "learning_rate": 6.569998523345044e-05, "loss": 5.0121, "step": 106 }, { "epoch": 0.03, "learning_rate": 6.635698508578496e-05, "loss": 4.7973, "step": 107 }, { "epoch": 0.03, "learning_rate": 6.701398493811944e-05, "loss": 4.7891, "step": 108 }, { "epoch": 0.03, "learning_rate": 6.767098479045396e-05, "loss": 4.7888, "step": 109 }, { "epoch": 0.03, "learning_rate": 6.832798464278846e-05, "loss": 4.8595, "step": 110 }, { "epoch": 0.03, "learning_rate": 6.898498449512296e-05, "loss": 4.4903, "step": 111 }, { "epoch": 0.03, "learning_rate": 6.964198434745747e-05, "loss": 4.6337, "step": 112 }, { "epoch": 0.03, "learning_rate": 7.029898419979197e-05, "loss": 4.4506, "step": 113 }, { "epoch": 0.04, "learning_rate": 7.095598405212647e-05, "loss": 4.526, "step": 114 }, { "epoch": 0.04, "learning_rate": 7.161298390446099e-05, "loss": 4.5671, "step": 115 }, { "epoch": 0.04, "learning_rate": 7.226998375679549e-05, "loss": 4.4214, "step": 116 }, { "epoch": 0.04, "learning_rate": 7.292698360912999e-05, "loss": 4.6995, "step": 117 }, { "epoch": 0.04, "learning_rate": 7.358398346146449e-05, "loss": 4.4702, "step": 118 }, { "epoch": 0.04, "learning_rate": 7.4240983313799e-05, "loss": 4.4675, "step": 119 }, { "epoch": 0.04, "learning_rate": 7.48979831661335e-05, "loss": 4.5423, "step": 120 }, { "epoch": 0.04, "learning_rate": 7.5554983018468e-05, "loss": 4.3751, "step": 121 }, { "epoch": 0.04, "learning_rate": 7.621198287080252e-05, "loss": 4.3965, "step": 122 }, { "epoch": 0.04, "learning_rate": 7.686898272313702e-05, "loss": 4.3742, "step": 123 }, { "epoch": 0.04, "learning_rate": 7.752598257547152e-05, "loss": 4.2151, "step": 124 }, { "epoch": 0.04, "learning_rate": 7.818298242780602e-05, "loss": 4.2806, "step": 125 }, { "epoch": 0.04, "learning_rate": 7.883998228014052e-05, "loss": 4.2132, "step": 126 }, { "epoch": 0.04, "learning_rate": 7.949698213247504e-05, "loss": 4.2403, "step": 127 }, { "epoch": 0.04, "learning_rate": 8.015398198480954e-05, "loss": 4.3122, "step": 128 }, { "epoch": 0.04, "learning_rate": 8.081098183714404e-05, "loss": 4.4731, "step": 129 }, { "epoch": 0.04, "learning_rate": 8.146798168947855e-05, "loss": 4.2146, "step": 130 }, { "epoch": 0.04, "learning_rate": 8.212498154181305e-05, "loss": 4.0394, "step": 131 }, { "epoch": 0.04, "learning_rate": 8.278198139414755e-05, "loss": 4.1229, "step": 132 }, { "epoch": 0.04, "learning_rate": 8.343898124648207e-05, "loss": 3.9938, "step": 133 }, { "epoch": 0.04, "learning_rate": 8.409598109881657e-05, "loss": 4.0056, "step": 134 }, { "epoch": 0.04, "learning_rate": 8.475298095115107e-05, "loss": 3.9828, "step": 135 }, { "epoch": 0.04, "learning_rate": 8.540998080348558e-05, "loss": 4.1734, "step": 136 }, { "epoch": 0.04, "learning_rate": 8.606698065582008e-05, "loss": 3.8288, "step": 137 }, { "epoch": 0.04, "learning_rate": 8.672398050815458e-05, "loss": 4.073, "step": 138 }, { "epoch": 0.04, "learning_rate": 8.73809803604891e-05, "loss": 3.9704, "step": 139 }, { "epoch": 0.04, "learning_rate": 8.80379802128236e-05, "loss": 3.773, "step": 140 }, { "epoch": 0.04, "learning_rate": 8.86949800651581e-05, "loss": 3.7891, "step": 141 }, { "epoch": 0.04, "learning_rate": 8.935197991749261e-05, "loss": 3.697, "step": 142 }, { "epoch": 0.04, "learning_rate": 9.000897976982711e-05, "loss": 3.7628, "step": 143 }, { "epoch": 0.04, "learning_rate": 9.066597962216162e-05, "loss": 3.7288, "step": 144 }, { "epoch": 0.04, "learning_rate": 9.132297947449613e-05, "loss": 3.7362, "step": 145 }, { "epoch": 0.05, "learning_rate": 9.197997932683063e-05, "loss": 3.6183, "step": 146 }, { "epoch": 0.05, "learning_rate": 9.263697917916512e-05, "loss": 3.4819, "step": 147 }, { "epoch": 0.05, "learning_rate": 9.329397903149962e-05, "loss": 3.4482, "step": 148 }, { "epoch": 0.05, "learning_rate": 9.395097888383412e-05, "loss": 3.0363, "step": 149 }, { "epoch": 0.05, "learning_rate": 9.460797873616863e-05, "loss": 2.9486, "step": 150 }, { "epoch": 0.05, "learning_rate": 9.526497858850313e-05, "loss": 6.0248, "step": 151 }, { "epoch": 0.05, "learning_rate": 9.592197844083763e-05, "loss": 5.5281, "step": 152 }, { "epoch": 0.05, "learning_rate": 9.657897829317215e-05, "loss": 5.1906, "step": 153 }, { "epoch": 0.05, "learning_rate": 9.723597814550665e-05, "loss": 5.0286, "step": 154 }, { "epoch": 0.05, "learning_rate": 9.789297799784115e-05, "loss": 4.7783, "step": 155 }, { "epoch": 0.05, "learning_rate": 9.854997785017566e-05, "loss": 4.5936, "step": 156 }, { "epoch": 0.05, "learning_rate": 9.920697770251016e-05, "loss": 4.7536, "step": 157 }, { "epoch": 0.05, "learning_rate": 9.986397755484466e-05, "loss": 4.5334, "step": 158 }, { "epoch": 0.05, "learning_rate": 0.00010052097740717918, "loss": 4.557, "step": 159 }, { "epoch": 0.05, "learning_rate": 0.00010117797725951368, "loss": 4.4231, "step": 160 }, { "epoch": 0.05, "learning_rate": 0.00010183497711184818, "loss": 4.3759, "step": 161 }, { "epoch": 0.05, "learning_rate": 0.0001024919769641827, "loss": 4.4743, "step": 162 }, { "epoch": 0.05, "learning_rate": 0.0001031489768165172, "loss": 4.4708, "step": 163 }, { "epoch": 0.05, "learning_rate": 0.0001038059766688517, "loss": 4.4006, "step": 164 }, { "epoch": 0.05, "learning_rate": 0.00010446297652118621, "loss": 4.5357, "step": 165 }, { "epoch": 0.05, "learning_rate": 0.00010511997637352071, "loss": 4.2262, "step": 166 }, { "epoch": 0.05, "learning_rate": 0.00010577697622585521, "loss": 4.6843, "step": 167 }, { "epoch": 0.05, "learning_rate": 0.00010643397607818973, "loss": 4.3731, "step": 168 }, { "epoch": 0.05, "learning_rate": 0.00010709097593052423, "loss": 4.247, "step": 169 }, { "epoch": 0.05, "learning_rate": 0.00010774797578285873, "loss": 4.2101, "step": 170 }, { "epoch": 0.05, "learning_rate": 0.00010840497563519324, "loss": 4.1394, "step": 171 }, { "epoch": 0.05, "learning_rate": 0.00010906197548752774, "loss": 4.2254, "step": 172 }, { "epoch": 0.05, "learning_rate": 0.00010971897533986224, "loss": 4.5508, "step": 173 }, { "epoch": 0.05, "learning_rate": 0.00011037597519219676, "loss": 4.0966, "step": 174 }, { "epoch": 0.05, "learning_rate": 0.00011103297504453126, "loss": 4.3946, "step": 175 }, { "epoch": 0.05, "learning_rate": 0.00011168997489686576, "loss": 4.1662, "step": 176 }, { "epoch": 0.05, "learning_rate": 0.00011234697474920027, "loss": 4.3428, "step": 177 }, { "epoch": 0.05, "learning_rate": 0.00011300397460153475, "loss": 4.3491, "step": 178 }, { "epoch": 0.06, "learning_rate": 0.00011366097445386926, "loss": 4.3456, "step": 179 }, { "epoch": 0.06, "learning_rate": 0.00011431797430620376, "loss": 4.0542, "step": 180 }, { "epoch": 0.06, "learning_rate": 0.00011497497415853826, "loss": 4.2489, "step": 181 }, { "epoch": 0.06, "learning_rate": 0.00011563197401087277, "loss": 3.9877, "step": 182 }, { "epoch": 0.06, "learning_rate": 0.00011628897386320728, "loss": 4.057, "step": 183 }, { "epoch": 0.06, "learning_rate": 0.00011694597371554178, "loss": 4.1179, "step": 184 }, { "epoch": 0.06, "learning_rate": 0.00011760297356787629, "loss": 4.1351, "step": 185 }, { "epoch": 0.06, "learning_rate": 0.00011825997342021079, "loss": 4.1048, "step": 186 }, { "epoch": 0.06, "learning_rate": 0.00011891697327254529, "loss": 4.0181, "step": 187 }, { "epoch": 0.06, "learning_rate": 0.0001195739731248798, "loss": 3.973, "step": 188 }, { "epoch": 0.06, "learning_rate": 0.0001202309729772143, "loss": 3.9068, "step": 189 }, { "epoch": 0.06, "learning_rate": 0.00012088797282954881, "loss": 3.7849, "step": 190 }, { "epoch": 0.06, "learning_rate": 0.00012154497268188332, "loss": 3.9339, "step": 191 }, { "epoch": 0.06, "learning_rate": 0.00012220197253421782, "loss": 3.6721, "step": 192 }, { "epoch": 0.06, "learning_rate": 0.00012285897238655232, "loss": 3.7949, "step": 193 }, { "epoch": 0.06, "learning_rate": 0.00012351597223888682, "loss": 3.3948, "step": 194 }, { "epoch": 0.06, "learning_rate": 0.00012417297209122132, "loss": 3.6419, "step": 195 }, { "epoch": 0.06, "learning_rate": 0.00012482997194355585, "loss": 3.228, "step": 196 }, { "epoch": 0.06, "learning_rate": 0.00012548697179589035, "loss": 3.2732, "step": 197 }, { "epoch": 0.06, "learning_rate": 0.00012614397164822485, "loss": 3.3839, "step": 198 }, { "epoch": 0.06, "learning_rate": 0.00012680097150055935, "loss": 2.9711, "step": 199 }, { "epoch": 0.06, "learning_rate": 0.00012745797135289385, "loss": 2.9525, "step": 200 }, { "epoch": 0.06, "learning_rate": 0.00012811497120522835, "loss": 6.2523, "step": 201 }, { "epoch": 0.06, "learning_rate": 0.00012877197105756288, "loss": 5.485, "step": 202 }, { "epoch": 0.06, "learning_rate": 0.00012942897090989738, "loss": 5.1387, "step": 203 }, { "epoch": 0.06, "learning_rate": 0.00013008597076223188, "loss": 4.9817, "step": 204 }, { "epoch": 0.06, "learning_rate": 0.00013074297061456638, "loss": 4.5802, "step": 205 }, { "epoch": 0.06, "learning_rate": 0.00013139997046690088, "loss": 4.6599, "step": 206 }, { "epoch": 0.06, "learning_rate": 0.00013205697031923539, "loss": 4.5162, "step": 207 }, { "epoch": 0.06, "learning_rate": 0.0001327139701715699, "loss": 4.5406, "step": 208 }, { "epoch": 0.06, "learning_rate": 0.00013337097002390441, "loss": 4.7411, "step": 209 }, { "epoch": 0.06, "learning_rate": 0.0001340279698762389, "loss": 4.3877, "step": 210 }, { "epoch": 0.07, "learning_rate": 0.0001346849697285734, "loss": 4.4414, "step": 211 }, { "epoch": 0.07, "learning_rate": 0.00013534196958090792, "loss": 4.4552, "step": 212 }, { "epoch": 0.07, "learning_rate": 0.00013599896943324242, "loss": 4.3745, "step": 213 }, { "epoch": 0.07, "learning_rate": 0.00013665596928557692, "loss": 4.4989, "step": 214 }, { "epoch": 0.07, "learning_rate": 0.00013731296913791142, "loss": 4.34, "step": 215 }, { "epoch": 0.07, "learning_rate": 0.00013796996899024592, "loss": 4.4169, "step": 216 }, { "epoch": 0.07, "learning_rate": 0.00013862696884258042, "loss": 4.2321, "step": 217 }, { "epoch": 0.07, "learning_rate": 0.00013928396869491495, "loss": 4.1761, "step": 218 }, { "epoch": 0.07, "learning_rate": 0.00013994096854724945, "loss": 4.2764, "step": 219 }, { "epoch": 0.07, "learning_rate": 0.00014059796839958395, "loss": 4.3849, "step": 220 }, { "epoch": 0.07, "learning_rate": 0.00014125496825191845, "loss": 4.4056, "step": 221 }, { "epoch": 0.07, "learning_rate": 0.00014191196810425295, "loss": 4.2634, "step": 222 }, { "epoch": 0.07, "learning_rate": 0.00014256896795658745, "loss": 4.5204, "step": 223 }, { "epoch": 0.07, "learning_rate": 0.00014322596780892198, "loss": 4.3385, "step": 224 }, { "epoch": 0.07, "learning_rate": 0.00014388296766125648, "loss": 4.3638, "step": 225 }, { "epoch": 0.07, "learning_rate": 0.00014453996751359098, "loss": 4.1867, "step": 226 }, { "epoch": 0.07, "learning_rate": 0.00014519696736592548, "loss": 4.4033, "step": 227 }, { "epoch": 0.07, "learning_rate": 0.00014585396721825998, "loss": 4.2058, "step": 228 }, { "epoch": 0.07, "learning_rate": 0.00014651096707059448, "loss": 4.313, "step": 229 }, { "epoch": 0.07, "learning_rate": 0.00014716796692292898, "loss": 4.2648, "step": 230 }, { "epoch": 0.07, "learning_rate": 0.0001478249667752635, "loss": 4.3322, "step": 231 }, { "epoch": 0.07, "learning_rate": 0.000148481966627598, "loss": 4.2088, "step": 232 }, { "epoch": 0.07, "learning_rate": 0.0001491389664799325, "loss": 4.3424, "step": 233 }, { "epoch": 0.07, "learning_rate": 0.000149795966332267, "loss": 3.9832, "step": 234 }, { "epoch": 0.07, "learning_rate": 0.0001504529661846015, "loss": 4.2187, "step": 235 }, { "epoch": 0.07, "learning_rate": 0.000151109966036936, "loss": 3.9592, "step": 236 }, { "epoch": 0.07, "learning_rate": 0.00015176696588927054, "loss": 4.0884, "step": 237 }, { "epoch": 0.07, "learning_rate": 0.00015242396574160504, "loss": 4.0517, "step": 238 }, { "epoch": 0.07, "learning_rate": 0.00015308096559393954, "loss": 4.1011, "step": 239 }, { "epoch": 0.07, "learning_rate": 0.00015373796544627404, "loss": 4.0743, "step": 240 }, { "epoch": 0.07, "learning_rate": 0.00015439496529860854, "loss": 3.7718, "step": 241 }, { "epoch": 0.07, "learning_rate": 0.00015505196515094304, "loss": 4.1137, "step": 242 }, { "epoch": 0.07, "learning_rate": 0.00015570896500327754, "loss": 3.8398, "step": 243 }, { "epoch": 0.08, "learning_rate": 0.00015636596485561204, "loss": 3.7475, "step": 244 }, { "epoch": 0.08, "learning_rate": 0.00015702296470794654, "loss": 3.3048, "step": 245 }, { "epoch": 0.08, "learning_rate": 0.00015767996456028105, "loss": 3.4871, "step": 246 }, { "epoch": 0.08, "learning_rate": 0.00015833696441261557, "loss": 3.4238, "step": 247 }, { "epoch": 0.08, "learning_rate": 0.00015899396426495007, "loss": 3.1184, "step": 248 }, { "epoch": 0.08, "learning_rate": 0.00015965096411728457, "loss": 2.9245, "step": 249 }, { "epoch": 0.08, "learning_rate": 0.00016030796396961908, "loss": 2.8914, "step": 250 }, { "epoch": 0.08, "learning_rate": 0.00016096496382195358, "loss": 5.7254, "step": 251 }, { "epoch": 0.08, "learning_rate": 0.00016162196367428808, "loss": 5.2385, "step": 252 }, { "epoch": 0.08, "learning_rate": 0.0001622789635266226, "loss": 4.9273, "step": 253 }, { "epoch": 0.08, "learning_rate": 0.0001629359633789571, "loss": 4.8504, "step": 254 }, { "epoch": 0.08, "learning_rate": 0.0001635929632312916, "loss": 4.7786, "step": 255 }, { "epoch": 0.08, "learning_rate": 0.0001642499630836261, "loss": 4.4117, "step": 256 }, { "epoch": 0.08, "learning_rate": 0.0001649069629359606, "loss": 4.6535, "step": 257 }, { "epoch": 0.08, "learning_rate": 0.0001655639627882951, "loss": 4.6439, "step": 258 }, { "epoch": 0.08, "learning_rate": 0.0001662209626406296, "loss": 4.4627, "step": 259 }, { "epoch": 0.08, "learning_rate": 0.00016687796249296414, "loss": 4.4111, "step": 260 }, { "epoch": 0.08, "learning_rate": 0.00016753496234529864, "loss": 4.338, "step": 261 }, { "epoch": 0.08, "learning_rate": 0.00016819196219763314, "loss": 4.4795, "step": 262 }, { "epoch": 0.08, "learning_rate": 0.00016884896204996764, "loss": 4.3954, "step": 263 }, { "epoch": 0.08, "learning_rate": 0.00016950596190230214, "loss": 4.517, "step": 264 }, { "epoch": 0.08, "learning_rate": 0.00017016296175463664, "loss": 4.4148, "step": 265 }, { "epoch": 0.08, "learning_rate": 0.00017081996160697117, "loss": 4.707, "step": 266 }, { "epoch": 0.08, "learning_rate": 0.00017147696145930567, "loss": 4.5414, "step": 267 }, { "epoch": 0.08, "learning_rate": 0.00017213396131164017, "loss": 4.38, "step": 268 }, { "epoch": 0.08, "learning_rate": 0.00017279096116397467, "loss": 4.6632, "step": 269 }, { "epoch": 0.08, "learning_rate": 0.00017344796101630917, "loss": 4.3069, "step": 270 }, { "epoch": 0.08, "learning_rate": 0.00017410496086864367, "loss": 4.4658, "step": 271 }, { "epoch": 0.08, "learning_rate": 0.0001747619607209782, "loss": 4.6071, "step": 272 }, { "epoch": 0.08, "learning_rate": 0.0001754189605733127, "loss": 4.6362, "step": 273 }, { "epoch": 0.08, "learning_rate": 0.0001760759604256472, "loss": 4.4037, "step": 274 }, { "epoch": 0.08, "learning_rate": 0.0001767329602779817, "loss": 4.2221, "step": 275 }, { "epoch": 0.09, "learning_rate": 0.0001773899601303162, "loss": 4.4658, "step": 276 }, { "epoch": 0.09, "learning_rate": 0.0001780469599826507, "loss": 4.3507, "step": 277 }, { "epoch": 0.09, "learning_rate": 0.00017870395983498523, "loss": 4.6712, "step": 278 }, { "epoch": 0.09, "learning_rate": 0.00017936095968731973, "loss": 4.4491, "step": 279 }, { "epoch": 0.09, "learning_rate": 0.00018001795953965423, "loss": 4.0762, "step": 280 }, { "epoch": 0.09, "learning_rate": 0.00018067495939198873, "loss": 4.2759, "step": 281 }, { "epoch": 0.09, "learning_rate": 0.00018133195924432323, "loss": 4.0968, "step": 282 }, { "epoch": 0.09, "learning_rate": 0.00018198895909665773, "loss": 4.2658, "step": 283 }, { "epoch": 0.09, "learning_rate": 0.00018264595894899226, "loss": 4.3617, "step": 284 }, { "epoch": 0.09, "learning_rate": 0.00018330295880132676, "loss": 3.9741, "step": 285 }, { "epoch": 0.09, "learning_rate": 0.00018395995865366126, "loss": 4.0995, "step": 286 }, { "epoch": 0.09, "learning_rate": 0.00018461695850599576, "loss": 3.9358, "step": 287 }, { "epoch": 0.09, "learning_rate": 0.00018527395835833023, "loss": 3.9463, "step": 288 }, { "epoch": 0.09, "learning_rate": 0.00018593095821066474, "loss": 4.2269, "step": 289 }, { "epoch": 0.09, "learning_rate": 0.00018658795806299924, "loss": 3.7091, "step": 290 }, { "epoch": 0.09, "learning_rate": 0.00018724495791533374, "loss": 3.7328, "step": 291 }, { "epoch": 0.09, "learning_rate": 0.00018790195776766824, "loss": 3.9004, "step": 292 }, { "epoch": 0.09, "learning_rate": 0.00018855895762000276, "loss": 3.4115, "step": 293 }, { "epoch": 0.09, "learning_rate": 0.00018921595747233727, "loss": 3.309, "step": 294 }, { "epoch": 0.09, "learning_rate": 0.00018987295732467177, "loss": 3.7864, "step": 295 }, { "epoch": 0.09, "learning_rate": 0.00019052995717700627, "loss": 3.2292, "step": 296 }, { "epoch": 0.09, "learning_rate": 0.00019118695702934077, "loss": 3.3521, "step": 297 }, { "epoch": 0.09, "learning_rate": 0.00019184395688167527, "loss": 3.4121, "step": 298 }, { "epoch": 0.09, "learning_rate": 0.0001925009567340098, "loss": 3.2202, "step": 299 }, { "epoch": 0.09, "learning_rate": 0.0001931579565863443, "loss": 2.961, "step": 300 }, { "epoch": 0.09, "learning_rate": 0.0001938149564386788, "loss": 5.8341, "step": 301 }, { "epoch": 0.09, "learning_rate": 0.0001944719562910133, "loss": 5.1676, "step": 302 }, { "epoch": 0.09, "learning_rate": 0.0001951289561433478, "loss": 5.1416, "step": 303 }, { "epoch": 0.09, "learning_rate": 0.0001957859559956823, "loss": 4.8569, "step": 304 }, { "epoch": 0.09, "learning_rate": 0.00019644295584801683, "loss": 4.7374, "step": 305 }, { "epoch": 0.09, "learning_rate": 0.00019709995570035133, "loss": 4.6583, "step": 306 }, { "epoch": 0.09, "learning_rate": 0.00019775695555268583, "loss": 4.838, "step": 307 }, { "epoch": 0.1, "learning_rate": 0.00019841395540502033, "loss": 4.5544, "step": 308 }, { "epoch": 0.1, "learning_rate": 0.00019907095525735483, "loss": 4.6132, "step": 309 }, { "epoch": 0.1, "learning_rate": 0.00019972795510968933, "loss": 4.2333, "step": 310 }, { "epoch": 0.1, "learning_rate": 0.00020038495496202386, "loss": 4.4807, "step": 311 }, { "epoch": 0.1, "learning_rate": 0.00020104195481435836, "loss": 4.4115, "step": 312 }, { "epoch": 0.1, "learning_rate": 0.00020169895466669286, "loss": 5.0106, "step": 313 }, { "epoch": 0.1, "learning_rate": 0.00020235595451902736, "loss": 4.5739, "step": 314 }, { "epoch": 0.1, "learning_rate": 0.00020301295437136186, "loss": 4.4143, "step": 315 }, { "epoch": 0.1, "learning_rate": 0.00020366995422369636, "loss": 4.5588, "step": 316 }, { "epoch": 0.1, "learning_rate": 0.0002043269540760309, "loss": 4.5803, "step": 317 }, { "epoch": 0.1, "learning_rate": 0.0002049839539283654, "loss": 4.6836, "step": 318 }, { "epoch": 0.1, "learning_rate": 0.0002056409537806999, "loss": 4.305, "step": 319 }, { "epoch": 0.1, "learning_rate": 0.0002062979536330344, "loss": 4.2923, "step": 320 }, { "epoch": 0.1, "learning_rate": 0.0002069549534853689, "loss": 4.4126, "step": 321 }, { "epoch": 0.1, "learning_rate": 0.0002076119533377034, "loss": 4.3654, "step": 322 }, { "epoch": 0.1, "learning_rate": 0.0002082689531900379, "loss": 4.4962, "step": 323 }, { "epoch": 0.1, "learning_rate": 0.00020892595304237242, "loss": 4.1178, "step": 324 }, { "epoch": 0.1, "learning_rate": 0.00020958295289470692, "loss": 4.4859, "step": 325 }, { "epoch": 0.1, "learning_rate": 0.00021023995274704142, "loss": 4.273, "step": 326 }, { "epoch": 0.1, "learning_rate": 0.00021089695259937592, "loss": 4.5106, "step": 327 }, { "epoch": 0.1, "learning_rate": 0.00021155395245171042, "loss": 4.048, "step": 328 }, { "epoch": 0.1, "learning_rate": 0.00021221095230404492, "loss": 4.2437, "step": 329 }, { "epoch": 0.1, "learning_rate": 0.00021286795215637945, "loss": 4.1273, "step": 330 }, { "epoch": 0.1, "learning_rate": 0.00021352495200871395, "loss": 4.3951, "step": 331 }, { "epoch": 0.1, "learning_rate": 0.00021418195186104845, "loss": 4.1273, "step": 332 }, { "epoch": 0.1, "learning_rate": 0.00021483895171338295, "loss": 4.1606, "step": 333 }, { "epoch": 0.1, "learning_rate": 0.00021549595156571745, "loss": 3.9064, "step": 334 }, { "epoch": 0.1, "learning_rate": 0.00021615295141805195, "loss": 4.1578, "step": 335 }, { "epoch": 0.1, "learning_rate": 0.00021680995127038648, "loss": 3.7668, "step": 336 }, { "epoch": 0.1, "learning_rate": 0.00021746695112272098, "loss": 4.0668, "step": 337 }, { "epoch": 0.1, "learning_rate": 0.00021812395097505548, "loss": 4.1243, "step": 338 }, { "epoch": 0.1, "learning_rate": 0.00021878095082738998, "loss": 3.9848, "step": 339 }, { "epoch": 0.1, "learning_rate": 0.00021943795067972448, "loss": 3.7768, "step": 340 }, { "epoch": 0.11, "learning_rate": 0.00022009495053205898, "loss": 3.7398, "step": 341 }, { "epoch": 0.11, "learning_rate": 0.0002207519503843935, "loss": 3.6799, "step": 342 }, { "epoch": 0.11, "learning_rate": 0.000221408950236728, "loss": 3.4966, "step": 343 }, { "epoch": 0.11, "learning_rate": 0.00022206595008906251, "loss": 3.2034, "step": 344 }, { "epoch": 0.11, "learning_rate": 0.00022272294994139701, "loss": 3.4896, "step": 345 }, { "epoch": 0.11, "learning_rate": 0.00022337994979373151, "loss": 3.4473, "step": 346 }, { "epoch": 0.11, "learning_rate": 0.00022403694964606602, "loss": 3.4358, "step": 347 }, { "epoch": 0.11, "learning_rate": 0.00022469394949840054, "loss": 3.371, "step": 348 }, { "epoch": 0.11, "learning_rate": 0.00022535094935073504, "loss": 3.3348, "step": 349 }, { "epoch": 0.11, "learning_rate": 0.0002260079492030695, "loss": 3.4836, "step": 350 }, { "epoch": 0.11, "learning_rate": 0.00022666494905540402, "loss": 7.9123, "step": 351 }, { "epoch": 0.11, "learning_rate": 0.00022732194890773852, "loss": 6.0544, "step": 352 }, { "epoch": 0.11, "learning_rate": 0.00022797894876007302, "loss": 5.1407, "step": 353 }, { "epoch": 0.11, "learning_rate": 0.00022863594861240752, "loss": 4.9978, "step": 354 }, { "epoch": 0.11, "learning_rate": 0.00022929294846474202, "loss": 4.8652, "step": 355 }, { "epoch": 0.11, "learning_rate": 0.00022994994831707652, "loss": 4.7007, "step": 356 }, { "epoch": 0.11, "learning_rate": 0.00023060694816941105, "loss": 4.4921, "step": 357 }, { "epoch": 0.11, "learning_rate": 0.00023126394802174555, "loss": 4.6645, "step": 358 }, { "epoch": 0.11, "learning_rate": 0.00023192094787408005, "loss": 4.4648, "step": 359 }, { "epoch": 0.11, "learning_rate": 0.00023257794772641455, "loss": 4.6883, "step": 360 }, { "epoch": 0.11, "learning_rate": 0.00023323494757874905, "loss": 4.5958, "step": 361 }, { "epoch": 0.11, "learning_rate": 0.00023389194743108355, "loss": 4.5218, "step": 362 }, { "epoch": 0.11, "learning_rate": 0.00023454894728341808, "loss": 4.6279, "step": 363 }, { "epoch": 0.11, "learning_rate": 0.00023520594713575258, "loss": 4.5532, "step": 364 }, { "epoch": 0.11, "learning_rate": 0.00023586294698808708, "loss": 4.5643, "step": 365 }, { "epoch": 0.11, "learning_rate": 0.00023651994684042158, "loss": 4.6094, "step": 366 }, { "epoch": 0.11, "learning_rate": 0.00023717694669275608, "loss": 4.3253, "step": 367 }, { "epoch": 0.11, "learning_rate": 0.00023783394654509058, "loss": 4.4929, "step": 368 }, { "epoch": 0.11, "learning_rate": 0.0002384909463974251, "loss": 4.4787, "step": 369 }, { "epoch": 0.11, "learning_rate": 0.0002391479462497596, "loss": 4.6377, "step": 370 }, { "epoch": 0.11, "learning_rate": 0.0002398049461020941, "loss": 4.3335, "step": 371 }, { "epoch": 0.11, "learning_rate": 0.0002404619459544286, "loss": 4.3176, "step": 372 }, { "epoch": 0.12, "learning_rate": 0.0002411189458067631, "loss": 4.8085, "step": 373 }, { "epoch": 0.12, "learning_rate": 0.00024177594565909761, "loss": 4.5685, "step": 374 }, { "epoch": 0.12, "learning_rate": 0.00024243294551143214, "loss": 4.4958, "step": 375 }, { "epoch": 0.12, "learning_rate": 0.00024308994536376664, "loss": 4.5047, "step": 376 }, { "epoch": 0.12, "learning_rate": 0.00024374694521610114, "loss": 4.2801, "step": 377 }, { "epoch": 0.12, "learning_rate": 0.00024440394506843564, "loss": 4.4224, "step": 378 }, { "epoch": 0.12, "learning_rate": 0.00024506094492077017, "loss": 4.3793, "step": 379 }, { "epoch": 0.12, "learning_rate": 0.00024571794477310464, "loss": 4.1101, "step": 380 }, { "epoch": 0.12, "learning_rate": 0.00024637494462543917, "loss": 4.2544, "step": 381 }, { "epoch": 0.12, "learning_rate": 0.00024703194447777365, "loss": 4.4169, "step": 382 }, { "epoch": 0.12, "learning_rate": 0.0002476889443301082, "loss": 4.295, "step": 383 }, { "epoch": 0.12, "learning_rate": 0.00024834594418244265, "loss": 4.5854, "step": 384 }, { "epoch": 0.12, "learning_rate": 0.0002490029440347772, "loss": 4.1853, "step": 385 }, { "epoch": 0.12, "learning_rate": 0.0002496599438871117, "loss": 4.1355, "step": 386 }, { "epoch": 0.12, "learning_rate": 0.0002503169437394462, "loss": 4.1311, "step": 387 }, { "epoch": 0.12, "learning_rate": 0.0002509739435917807, "loss": 4.1599, "step": 388 }, { "epoch": 0.12, "learning_rate": 0.0002516309434441152, "loss": 3.9209, "step": 389 }, { "epoch": 0.12, "learning_rate": 0.0002522879432964497, "loss": 3.866, "step": 390 }, { "epoch": 0.12, "learning_rate": 0.00025294494314878423, "loss": 3.6069, "step": 391 }, { "epoch": 0.12, "learning_rate": 0.0002536019430011187, "loss": 3.8343, "step": 392 }, { "epoch": 0.12, "learning_rate": 0.00025425894285345323, "loss": 3.8334, "step": 393 }, { "epoch": 0.12, "learning_rate": 0.0002549159427057877, "loss": 3.6018, "step": 394 }, { "epoch": 0.12, "learning_rate": 0.00025557294255812224, "loss": 3.5376, "step": 395 }, { "epoch": 0.12, "learning_rate": 0.0002562299424104567, "loss": 3.48, "step": 396 }, { "epoch": 0.12, "learning_rate": 0.00025688694226279124, "loss": 3.5055, "step": 397 }, { "epoch": 0.12, "learning_rate": 0.00025754394211512576, "loss": 3.2152, "step": 398 }, { "epoch": 0.12, "learning_rate": 0.00025820094196746024, "loss": 3.0883, "step": 399 }, { "epoch": 0.12, "learning_rate": 0.00025885794181979477, "loss": 3.0299, "step": 400 }, { "epoch": 0.12, "learning_rate": 0.00025951494167212924, "loss": 6.0321, "step": 401 }, { "epoch": 0.12, "learning_rate": 0.00026017194152446377, "loss": 5.3678, "step": 402 }, { "epoch": 0.12, "learning_rate": 0.0002608289413767983, "loss": 4.7082, "step": 403 }, { "epoch": 0.12, "learning_rate": 0.00026148594122913277, "loss": 4.8758, "step": 404 }, { "epoch": 0.12, "learning_rate": 0.0002621429410814673, "loss": 5.021, "step": 405 }, { "epoch": 0.13, "learning_rate": 0.00026279994093380177, "loss": 4.6093, "step": 406 }, { "epoch": 0.13, "learning_rate": 0.0002634569407861363, "loss": 4.705, "step": 407 }, { "epoch": 0.13, "learning_rate": 0.00026411394063847077, "loss": 4.652, "step": 408 }, { "epoch": 0.13, "learning_rate": 0.0002647709404908053, "loss": 4.554, "step": 409 }, { "epoch": 0.13, "learning_rate": 0.0002654279403431398, "loss": 4.4065, "step": 410 }, { "epoch": 0.13, "learning_rate": 0.0002660849401954743, "loss": 4.4372, "step": 411 }, { "epoch": 0.13, "learning_rate": 0.00026674194004780883, "loss": 4.3657, "step": 412 }, { "epoch": 0.13, "learning_rate": 0.0002673989399001433, "loss": 4.4588, "step": 413 }, { "epoch": 0.13, "learning_rate": 0.0002680559397524778, "loss": 4.3591, "step": 414 }, { "epoch": 0.13, "learning_rate": 0.0002687129396048123, "loss": 4.6762, "step": 415 }, { "epoch": 0.13, "learning_rate": 0.0002693699394571468, "loss": 4.4518, "step": 416 }, { "epoch": 0.13, "learning_rate": 0.0002700269393094813, "loss": 4.6972, "step": 417 }, { "epoch": 0.13, "learning_rate": 0.00027068393916181583, "loss": 4.4306, "step": 418 }, { "epoch": 0.13, "learning_rate": 0.0002713409390141503, "loss": 4.4664, "step": 419 }, { "epoch": 0.13, "learning_rate": 0.00027199793886648483, "loss": 4.4462, "step": 420 }, { "epoch": 0.13, "learning_rate": 0.0002726549387188193, "loss": 4.3648, "step": 421 }, { "epoch": 0.13, "learning_rate": 0.00027331193857115383, "loss": 4.3616, "step": 422 }, { "epoch": 0.13, "learning_rate": 0.0002739689384234883, "loss": 4.3607, "step": 423 }, { "epoch": 0.13, "learning_rate": 0.00027462593827582284, "loss": 4.1811, "step": 424 }, { "epoch": 0.13, "learning_rate": 0.00027528293812815736, "loss": 4.2743, "step": 425 }, { "epoch": 0.13, "learning_rate": 0.00027593993798049184, "loss": 3.996, "step": 426 }, { "epoch": 0.13, "learning_rate": 0.00027659693783282636, "loss": 4.3638, "step": 427 }, { "epoch": 0.13, "learning_rate": 0.00027725393768516084, "loss": 4.2065, "step": 428 }, { "epoch": 0.13, "learning_rate": 0.00027791093753749537, "loss": 4.3029, "step": 429 }, { "epoch": 0.13, "learning_rate": 0.0002785679373898299, "loss": 4.1737, "step": 430 }, { "epoch": 0.13, "learning_rate": 0.00027922493724216437, "loss": 4.1625, "step": 431 }, { "epoch": 0.13, "learning_rate": 0.0002798819370944989, "loss": 4.2028, "step": 432 }, { "epoch": 0.13, "learning_rate": 0.00028053893694683337, "loss": 4.2445, "step": 433 }, { "epoch": 0.13, "learning_rate": 0.0002811959367991679, "loss": 4.1621, "step": 434 }, { "epoch": 0.13, "learning_rate": 0.00028185293665150237, "loss": 4.2919, "step": 435 }, { "epoch": 0.13, "learning_rate": 0.0002825099365038369, "loss": 3.8902, "step": 436 }, { "epoch": 0.13, "learning_rate": 0.0002831669363561714, "loss": 4.1929, "step": 437 }, { "epoch": 0.14, "learning_rate": 0.0002838239362085059, "loss": 4.1263, "step": 438 }, { "epoch": 0.14, "learning_rate": 0.0002844809360608404, "loss": 4.0029, "step": 439 }, { "epoch": 0.14, "learning_rate": 0.0002851379359131749, "loss": 3.9629, "step": 440 }, { "epoch": 0.14, "learning_rate": 0.0002857949357655094, "loss": 3.9936, "step": 441 }, { "epoch": 0.14, "learning_rate": 0.00028645193561784395, "loss": 3.8461, "step": 442 }, { "epoch": 0.14, "learning_rate": 0.00028710893547017843, "loss": 3.7729, "step": 443 }, { "epoch": 0.14, "learning_rate": 0.00028776593532251296, "loss": 3.8678, "step": 444 }, { "epoch": 0.14, "learning_rate": 0.00028842293517484743, "loss": 3.8235, "step": 445 }, { "epoch": 0.14, "learning_rate": 0.00028907993502718196, "loss": 3.6168, "step": 446 }, { "epoch": 0.14, "learning_rate": 0.00028973693487951643, "loss": 4.0479, "step": 447 }, { "epoch": 0.14, "learning_rate": 0.00029039393473185096, "loss": 3.4743, "step": 448 }, { "epoch": 0.14, "learning_rate": 0.0002910509345841855, "loss": 3.3913, "step": 449 }, { "epoch": 0.14, "learning_rate": 0.00029170793443651996, "loss": 3.0779, "step": 450 }, { "epoch": 0.14, "learning_rate": 0.0002923649342888545, "loss": 6.6536, "step": 451 }, { "epoch": 0.14, "learning_rate": 0.00029302193414118896, "loss": 5.6577, "step": 452 }, { "epoch": 0.14, "learning_rate": 0.0002936789339935235, "loss": 5.1422, "step": 453 }, { "epoch": 0.14, "learning_rate": 0.00029433593384585796, "loss": 5.2125, "step": 454 }, { "epoch": 0.14, "learning_rate": 0.0002949929336981925, "loss": 4.9079, "step": 455 }, { "epoch": 0.14, "learning_rate": 0.000295649933550527, "loss": 4.9266, "step": 456 }, { "epoch": 0.14, "learning_rate": 0.0002963069334028615, "loss": 4.7769, "step": 457 }, { "epoch": 0.14, "learning_rate": 0.000296963933255196, "loss": 4.7966, "step": 458 }, { "epoch": 0.14, "learning_rate": 0.0002976209331075305, "loss": 4.7017, "step": 459 }, { "epoch": 0.14, "learning_rate": 0.000298277932959865, "loss": 4.6102, "step": 460 }, { "epoch": 0.14, "learning_rate": 0.00029893493281219955, "loss": 4.6471, "step": 461 }, { "epoch": 0.14, "learning_rate": 0.000299591932664534, "loss": 4.2794, "step": 462 }, { "epoch": 0.14, "learning_rate": 0.00030024893251686855, "loss": 4.8004, "step": 463 }, { "epoch": 0.14, "learning_rate": 0.000300905932369203, "loss": 4.6793, "step": 464 }, { "epoch": 0.14, "learning_rate": 0.00030156293222153755, "loss": 4.4926, "step": 465 }, { "epoch": 0.14, "learning_rate": 0.000302219932073872, "loss": 4.2163, "step": 466 }, { "epoch": 0.14, "learning_rate": 0.00030287693192620655, "loss": 4.5306, "step": 467 }, { "epoch": 0.14, "learning_rate": 0.0003035339317785411, "loss": 4.4579, "step": 468 }, { "epoch": 0.14, "learning_rate": 0.00030419093163087555, "loss": 4.4814, "step": 469 }, { "epoch": 0.15, "learning_rate": 0.0003048479314832101, "loss": 4.578, "step": 470 }, { "epoch": 0.15, "learning_rate": 0.00030550493133554455, "loss": 4.4622, "step": 471 }, { "epoch": 0.15, "learning_rate": 0.0003061619311878791, "loss": 4.4143, "step": 472 }, { "epoch": 0.15, "learning_rate": 0.0003068189310402136, "loss": 4.4382, "step": 473 }, { "epoch": 0.15, "learning_rate": 0.0003074759308925481, "loss": 4.4204, "step": 474 }, { "epoch": 0.15, "learning_rate": 0.00030813293074488256, "loss": 4.3629, "step": 475 }, { "epoch": 0.15, "learning_rate": 0.0003087899305972171, "loss": 4.4076, "step": 476 }, { "epoch": 0.15, "learning_rate": 0.00030944693044955156, "loss": 4.279, "step": 477 }, { "epoch": 0.15, "learning_rate": 0.0003101039303018861, "loss": 4.4649, "step": 478 }, { "epoch": 0.15, "learning_rate": 0.00031076093015422056, "loss": 4.2617, "step": 479 }, { "epoch": 0.15, "learning_rate": 0.0003114179300065551, "loss": 4.1989, "step": 480 }, { "epoch": 0.15, "learning_rate": 0.00031207492985888956, "loss": 4.1309, "step": 481 }, { "epoch": 0.15, "learning_rate": 0.0003127319297112241, "loss": 4.254, "step": 482 }, { "epoch": 0.15, "learning_rate": 0.0003133889295635586, "loss": 4.1839, "step": 483 }, { "epoch": 0.15, "learning_rate": 0.0003140459294158931, "loss": 4.1503, "step": 484 }, { "epoch": 0.15, "learning_rate": 0.0003147029292682276, "loss": 4.1915, "step": 485 }, { "epoch": 0.15, "learning_rate": 0.0003153599291205621, "loss": 4.2792, "step": 486 }, { "epoch": 0.15, "learning_rate": 0.0003160169289728966, "loss": 4.1449, "step": 487 }, { "epoch": 0.15, "learning_rate": 0.00031667392882523115, "loss": 4.0662, "step": 488 }, { "epoch": 0.15, "learning_rate": 0.0003173309286775656, "loss": 4.2186, "step": 489 }, { "epoch": 0.15, "learning_rate": 0.00031798792852990015, "loss": 4.0839, "step": 490 }, { "epoch": 0.15, "learning_rate": 0.0003186449283822346, "loss": 4.0271, "step": 491 }, { "epoch": 0.15, "learning_rate": 0.00031930192823456915, "loss": 3.5625, "step": 492 }, { "epoch": 0.15, "learning_rate": 0.0003199589280869036, "loss": 3.7838, "step": 493 }, { "epoch": 0.15, "learning_rate": 0.00032061592793923815, "loss": 3.8771, "step": 494 }, { "epoch": 0.15, "learning_rate": 0.0003212729277915727, "loss": 3.8744, "step": 495 }, { "epoch": 0.15, "learning_rate": 0.00032192992764390715, "loss": 3.3559, "step": 496 }, { "epoch": 0.15, "learning_rate": 0.0003225869274962417, "loss": 3.3418, "step": 497 }, { "epoch": 0.15, "learning_rate": 0.00032324392734857615, "loss": 3.363, "step": 498 }, { "epoch": 0.15, "learning_rate": 0.0003239009272009107, "loss": 3.0908, "step": 499 }, { "epoch": 0.15, "learning_rate": 0.0003245579270532452, "loss": 3.1046, "step": 500 }, { "epoch": 0.15, "eval_bleu": 0.0, "eval_loss": 5.527187824249268, "eval_runtime": 1371.8331, "eval_samples_per_second": 10.759, "eval_steps_per_second": 1.345, "step": 500 }, { "epoch": 0.15, "learning_rate": 0.0003252149269055797, "loss": 6.4631, "step": 501 }, { "epoch": 0.15, "learning_rate": 0.0003258719267579142, "loss": 5.9971, "step": 502 }, { "epoch": 0.16, "learning_rate": 0.0003265289266102487, "loss": 5.3966, "step": 503 }, { "epoch": 0.16, "learning_rate": 0.0003271859264625832, "loss": 5.2348, "step": 504 }, { "epoch": 0.16, "learning_rate": 0.0003278429263149177, "loss": 4.9756, "step": 505 }, { "epoch": 0.16, "learning_rate": 0.0003284999261672522, "loss": 4.8721, "step": 506 }, { "epoch": 0.16, "learning_rate": 0.000328464297108015, "loss": 5.1726, "step": 507 }, { "epoch": 0.16, "learning_rate": 0.0003284286680487778, "loss": 4.8913, "step": 508 }, { "epoch": 0.16, "learning_rate": 0.0003283930389895405, "loss": 4.8133, "step": 509 }, { "epoch": 0.16, "learning_rate": 0.00032835740993030334, "loss": 4.5986, "step": 510 }, { "epoch": 0.16, "learning_rate": 0.00032832178087106607, "loss": 4.7957, "step": 511 }, { "epoch": 0.16, "learning_rate": 0.00032828615181182885, "loss": 4.6052, "step": 512 }, { "epoch": 0.16, "learning_rate": 0.00032825052275259163, "loss": 4.611, "step": 513 }, { "epoch": 0.16, "learning_rate": 0.00032821489369335436, "loss": 4.6585, "step": 514 }, { "epoch": 0.16, "learning_rate": 0.00032817926463411714, "loss": 4.2312, "step": 515 }, { "epoch": 0.16, "learning_rate": 0.0003281436355748799, "loss": 4.5522, "step": 516 }, { "epoch": 0.16, "learning_rate": 0.0003281080065156427, "loss": 4.6756, "step": 517 }, { "epoch": 0.16, "learning_rate": 0.0003280723774564055, "loss": 4.3275, "step": 518 }, { "epoch": 0.16, "learning_rate": 0.0003280367483971682, "loss": 4.5071, "step": 519 }, { "epoch": 0.16, "learning_rate": 0.000328001119337931, "loss": 4.4637, "step": 520 }, { "epoch": 0.16, "learning_rate": 0.0003279654902786938, "loss": 4.5248, "step": 521 }, { "epoch": 0.16, "learning_rate": 0.00032792986121945656, "loss": 4.482, "step": 522 }, { "epoch": 0.16, "learning_rate": 0.00032789423216021934, "loss": 4.5743, "step": 523 }, { "epoch": 0.16, "learning_rate": 0.00032785860310098207, "loss": 4.7417, "step": 524 }, { "epoch": 0.16, "learning_rate": 0.00032782297404174485, "loss": 4.6575, "step": 525 }, { "epoch": 0.16, "learning_rate": 0.00032778734498250763, "loss": 4.5024, "step": 526 }, { "epoch": 0.16, "learning_rate": 0.0003277517159232704, "loss": 4.5208, "step": 527 }, { "epoch": 0.16, "learning_rate": 0.00032771608686403314, "loss": 4.4044, "step": 528 }, { "epoch": 0.16, "learning_rate": 0.0003276804578047959, "loss": 4.2717, "step": 529 }, { "epoch": 0.16, "learning_rate": 0.0003276448287455587, "loss": 4.3591, "step": 530 }, { "epoch": 0.16, "learning_rate": 0.0003276091996863215, "loss": 4.2841, "step": 531 }, { "epoch": 0.16, "learning_rate": 0.00032757357062708427, "loss": 4.0601, "step": 532 }, { "epoch": 0.16, "learning_rate": 0.000327537941567847, "loss": 4.2274, "step": 533 }, { "epoch": 0.16, "learning_rate": 0.00032750231250860983, "loss": 4.2085, "step": 534 }, { "epoch": 0.17, "learning_rate": 0.00032746668344937256, "loss": 3.9464, "step": 535 }, { "epoch": 0.17, "learning_rate": 0.00032743105439013534, "loss": 4.2287, "step": 536 }, { "epoch": 0.17, "learning_rate": 0.0003273954253308981, "loss": 4.4058, "step": 537 }, { "epoch": 0.17, "learning_rate": 0.00032735979627166085, "loss": 4.0904, "step": 538 }, { "epoch": 0.17, "learning_rate": 0.0003273241672124237, "loss": 3.8756, "step": 539 }, { "epoch": 0.17, "learning_rate": 0.0003272885381531864, "loss": 4.1717, "step": 540 }, { "epoch": 0.17, "learning_rate": 0.0003272529090939492, "loss": 3.8294, "step": 541 }, { "epoch": 0.17, "learning_rate": 0.000327217280034712, "loss": 3.8332, "step": 542 }, { "epoch": 0.17, "learning_rate": 0.0003271816509754747, "loss": 3.6963, "step": 543 }, { "epoch": 0.17, "learning_rate": 0.00032714602191623754, "loss": 3.752, "step": 544 }, { "epoch": 0.17, "learning_rate": 0.00032711039285700027, "loss": 3.6952, "step": 545 }, { "epoch": 0.17, "learning_rate": 0.00032707476379776305, "loss": 3.6828, "step": 546 }, { "epoch": 0.17, "learning_rate": 0.00032703913473852584, "loss": 3.2342, "step": 547 }, { "epoch": 0.17, "learning_rate": 0.00032700350567928856, "loss": 3.1144, "step": 548 }, { "epoch": 0.17, "learning_rate": 0.00032696787662005134, "loss": 3.1742, "step": 549 }, { "epoch": 0.17, "learning_rate": 0.0003269322475608141, "loss": 3.0925, "step": 550 }, { "epoch": 0.17, "learning_rate": 0.0003268966185015769, "loss": 6.8604, "step": 551 }, { "epoch": 0.17, "learning_rate": 0.0003268609894423397, "loss": 5.9804, "step": 552 }, { "epoch": 0.17, "learning_rate": 0.00032682536038310247, "loss": 5.3944, "step": 553 }, { "epoch": 0.17, "learning_rate": 0.0003267897313238652, "loss": 5.124, "step": 554 }, { "epoch": 0.17, "learning_rate": 0.000326754102264628, "loss": 4.8878, "step": 555 }, { "epoch": 0.17, "learning_rate": 0.00032671847320539076, "loss": 4.6555, "step": 556 }, { "epoch": 0.17, "learning_rate": 0.00032668284414615355, "loss": 4.8814, "step": 557 }, { "epoch": 0.17, "learning_rate": 0.00032664721508691633, "loss": 4.7107, "step": 558 }, { "epoch": 0.17, "learning_rate": 0.00032661158602767905, "loss": 4.7047, "step": 559 }, { "epoch": 0.17, "learning_rate": 0.00032657595696844184, "loss": 4.5857, "step": 560 }, { "epoch": 0.17, "learning_rate": 0.0003265403279092046, "loss": 4.7638, "step": 561 }, { "epoch": 0.17, "learning_rate": 0.00032650469884996735, "loss": 4.5761, "step": 562 }, { "epoch": 0.17, "learning_rate": 0.0003264690697907302, "loss": 4.6643, "step": 563 }, { "epoch": 0.17, "learning_rate": 0.0003264334407314929, "loss": 4.6397, "step": 564 }, { "epoch": 0.17, "learning_rate": 0.0003263978116722557, "loss": 4.5706, "step": 565 }, { "epoch": 0.17, "learning_rate": 0.0003263621826130185, "loss": 4.3672, "step": 566 }, { "epoch": 0.17, "learning_rate": 0.0003263265535537812, "loss": 4.5335, "step": 567 }, { "epoch": 0.18, "learning_rate": 0.00032629092449454404, "loss": 4.5029, "step": 568 }, { "epoch": 0.18, "learning_rate": 0.00032625529543530677, "loss": 4.679, "step": 569 }, { "epoch": 0.18, "learning_rate": 0.00032621966637606955, "loss": 4.6395, "step": 570 }, { "epoch": 0.18, "learning_rate": 0.00032618403731683233, "loss": 4.439, "step": 571 }, { "epoch": 0.18, "learning_rate": 0.00032614840825759506, "loss": 4.3253, "step": 572 }, { "epoch": 0.18, "learning_rate": 0.0003261127791983579, "loss": 4.4057, "step": 573 }, { "epoch": 0.18, "learning_rate": 0.0003260771501391206, "loss": 4.4629, "step": 574 }, { "epoch": 0.18, "learning_rate": 0.0003260415210798834, "loss": 4.286, "step": 575 }, { "epoch": 0.18, "learning_rate": 0.0003260058920206462, "loss": 4.2038, "step": 576 }, { "epoch": 0.18, "learning_rate": 0.00032597026296140897, "loss": 4.3508, "step": 577 }, { "epoch": 0.18, "learning_rate": 0.00032593463390217175, "loss": 4.4798, "step": 578 }, { "epoch": 0.18, "learning_rate": 0.0003258990048429345, "loss": 4.281, "step": 579 }, { "epoch": 0.18, "learning_rate": 0.00032586337578369726, "loss": 4.2855, "step": 580 }, { "epoch": 0.18, "learning_rate": 0.00032582774672446004, "loss": 4.3629, "step": 581 }, { "epoch": 0.18, "learning_rate": 0.0003257921176652228, "loss": 4.095, "step": 582 }, { "epoch": 0.18, "learning_rate": 0.00032575648860598555, "loss": 4.2636, "step": 583 }, { "epoch": 0.18, "learning_rate": 0.00032572085954674833, "loss": 4.3922, "step": 584 }, { "epoch": 0.18, "learning_rate": 0.0003256852304875111, "loss": 4.4354, "step": 585 }, { "epoch": 0.18, "learning_rate": 0.0003256496014282739, "loss": 4.2108, "step": 586 }, { "epoch": 0.18, "learning_rate": 0.0003256139723690367, "loss": 4.1252, "step": 587 }, { "epoch": 0.18, "learning_rate": 0.0003255783433097994, "loss": 4.2144, "step": 588 }, { "epoch": 0.18, "learning_rate": 0.0003255427142505622, "loss": 4.1965, "step": 589 }, { "epoch": 0.18, "learning_rate": 0.00032550708519132497, "loss": 3.8201, "step": 590 }, { "epoch": 0.18, "learning_rate": 0.00032547145613208775, "loss": 4.0785, "step": 591 }, { "epoch": 0.18, "learning_rate": 0.00032543582707285053, "loss": 3.647, "step": 592 }, { "epoch": 0.18, "learning_rate": 0.00032540019801361326, "loss": 3.8259, "step": 593 }, { "epoch": 0.18, "learning_rate": 0.00032536456895437604, "loss": 3.3992, "step": 594 }, { "epoch": 0.18, "learning_rate": 0.0003253289398951388, "loss": 3.8117, "step": 595 }, { "epoch": 0.18, "learning_rate": 0.0003252933108359016, "loss": 3.5423, "step": 596 }, { "epoch": 0.18, "learning_rate": 0.0003252576817766644, "loss": 3.2909, "step": 597 }, { "epoch": 0.18, "learning_rate": 0.0003252220527174271, "loss": 3.2371, "step": 598 }, { "epoch": 0.18, "learning_rate": 0.0003251864236581899, "loss": 3.1458, "step": 599 }, { "epoch": 0.19, "learning_rate": 0.0003251507945989527, "loss": 2.8437, "step": 600 }, { "epoch": 0.19, "learning_rate": 0.00032511516553971546, "loss": 6.1913, "step": 601 }, { "epoch": 0.19, "learning_rate": 0.00032507953648047824, "loss": 5.8923, "step": 602 }, { "epoch": 0.19, "learning_rate": 0.00032504390742124097, "loss": 5.2444, "step": 603 }, { "epoch": 0.19, "learning_rate": 0.00032500827836200375, "loss": 4.9218, "step": 604 }, { "epoch": 0.19, "learning_rate": 0.00032497264930276653, "loss": 4.584, "step": 605 }, { "epoch": 0.19, "learning_rate": 0.0003249370202435293, "loss": 4.8353, "step": 606 }, { "epoch": 0.19, "learning_rate": 0.0003249013911842921, "loss": 4.5922, "step": 607 }, { "epoch": 0.19, "learning_rate": 0.0003248657621250548, "loss": 4.8958, "step": 608 }, { "epoch": 0.19, "learning_rate": 0.0003248301330658176, "loss": 4.5731, "step": 609 }, { "epoch": 0.19, "learning_rate": 0.0003247945040065804, "loss": 4.4427, "step": 610 }, { "epoch": 0.19, "learning_rate": 0.00032475887494734317, "loss": 4.6072, "step": 611 }, { "epoch": 0.19, "learning_rate": 0.00032472324588810595, "loss": 4.644, "step": 612 }, { "epoch": 0.19, "learning_rate": 0.0003246876168288687, "loss": 4.4587, "step": 613 }, { "epoch": 0.19, "learning_rate": 0.00032465198776963146, "loss": 4.6023, "step": 614 }, { "epoch": 0.19, "learning_rate": 0.00032461635871039424, "loss": 4.522, "step": 615 }, { "epoch": 0.19, "learning_rate": 0.000324580729651157, "loss": 4.7412, "step": 616 }, { "epoch": 0.19, "learning_rate": 0.00032454510059191975, "loss": 4.4786, "step": 617 }, { "epoch": 0.19, "learning_rate": 0.00032450947153268254, "loss": 4.3927, "step": 618 }, { "epoch": 0.19, "learning_rate": 0.0003244738424734453, "loss": 4.3706, "step": 619 }, { "epoch": 0.19, "learning_rate": 0.0003244382134142081, "loss": 4.4966, "step": 620 }, { "epoch": 0.19, "learning_rate": 0.0003244025843549709, "loss": 4.6049, "step": 621 }, { "epoch": 0.19, "learning_rate": 0.0003243669552957336, "loss": 4.4932, "step": 622 }, { "epoch": 0.19, "learning_rate": 0.0003243313262364964, "loss": 4.3992, "step": 623 }, { "epoch": 0.19, "learning_rate": 0.00032429569717725917, "loss": 4.4008, "step": 624 }, { "epoch": 0.19, "learning_rate": 0.00032426006811802195, "loss": 4.5263, "step": 625 }, { "epoch": 0.19, "learning_rate": 0.00032422443905878474, "loss": 4.1794, "step": 626 }, { "epoch": 0.19, "learning_rate": 0.00032418880999954746, "loss": 4.5676, "step": 627 }, { "epoch": 0.19, "learning_rate": 0.0003241531809403103, "loss": 4.1694, "step": 628 }, { "epoch": 0.19, "learning_rate": 0.00032411755188107303, "loss": 4.3338, "step": 629 }, { "epoch": 0.19, "learning_rate": 0.0003240819228218358, "loss": 4.7834, "step": 630 }, { "epoch": 0.19, "learning_rate": 0.0003240462937625986, "loss": 4.6943, "step": 631 }, { "epoch": 0.2, "learning_rate": 0.0003240106647033613, "loss": 4.3402, "step": 632 }, { "epoch": 0.2, "learning_rate": 0.00032397503564412415, "loss": 4.2537, "step": 633 }, { "epoch": 0.2, "learning_rate": 0.0003239394065848869, "loss": 4.1278, "step": 634 }, { "epoch": 0.2, "learning_rate": 0.00032390377752564966, "loss": 3.9416, "step": 635 }, { "epoch": 0.2, "learning_rate": 0.00032386814846641245, "loss": 4.2324, "step": 636 }, { "epoch": 0.2, "learning_rate": 0.0003238325194071752, "loss": 4.0345, "step": 637 }, { "epoch": 0.2, "learning_rate": 0.000323796890347938, "loss": 4.0859, "step": 638 }, { "epoch": 0.2, "learning_rate": 0.00032376126128870074, "loss": 3.9759, "step": 639 }, { "epoch": 0.2, "learning_rate": 0.0003237256322294635, "loss": 3.9151, "step": 640 }, { "epoch": 0.2, "learning_rate": 0.0003236900031702263, "loss": 3.6031, "step": 641 }, { "epoch": 0.2, "learning_rate": 0.00032365437411098903, "loss": 3.9811, "step": 642 }, { "epoch": 0.2, "learning_rate": 0.0003236187450517518, "loss": 3.7555, "step": 643 }, { "epoch": 0.2, "learning_rate": 0.0003235831159925146, "loss": 3.6389, "step": 644 }, { "epoch": 0.2, "learning_rate": 0.0003235474869332774, "loss": 3.5065, "step": 645 }, { "epoch": 0.2, "learning_rate": 0.00032351185787404016, "loss": 3.4343, "step": 646 }, { "epoch": 0.2, "learning_rate": 0.00032347622881480294, "loss": 3.3052, "step": 647 }, { "epoch": 0.2, "learning_rate": 0.00032344059975556567, "loss": 3.3348, "step": 648 }, { "epoch": 0.2, "learning_rate": 0.00032340497069632845, "loss": 3.173, "step": 649 }, { "epoch": 0.2, "learning_rate": 0.00032336934163709123, "loss": 2.7573, "step": 650 }, { "epoch": 0.2, "learning_rate": 0.00032333371257785396, "loss": 6.2311, "step": 651 }, { "epoch": 0.2, "learning_rate": 0.0003232980835186168, "loss": 5.9324, "step": 652 }, { "epoch": 0.2, "learning_rate": 0.0003232624544593795, "loss": 5.4267, "step": 653 }, { "epoch": 0.2, "learning_rate": 0.0003232268254001423, "loss": 5.29, "step": 654 }, { "epoch": 0.2, "learning_rate": 0.0003231911963409051, "loss": 5.1996, "step": 655 }, { "epoch": 0.2, "learning_rate": 0.0003231555672816678, "loss": 4.7653, "step": 656 }, { "epoch": 0.2, "learning_rate": 0.00032311993822243065, "loss": 4.8765, "step": 657 }, { "epoch": 0.2, "learning_rate": 0.0003230843091631934, "loss": 4.8486, "step": 658 }, { "epoch": 0.2, "learning_rate": 0.00032304868010395616, "loss": 4.5627, "step": 659 }, { "epoch": 0.2, "learning_rate": 0.00032301305104471894, "loss": 4.7988, "step": 660 }, { "epoch": 0.2, "learning_rate": 0.00032297742198548167, "loss": 4.6356, "step": 661 }, { "epoch": 0.2, "learning_rate": 0.0003229417929262445, "loss": 4.5247, "step": 662 }, { "epoch": 0.2, "learning_rate": 0.00032290616386700723, "loss": 4.8718, "step": 663 }, { "epoch": 0.2, "learning_rate": 0.00032287053480777, "loss": 4.5409, "step": 664 }, { "epoch": 0.21, "learning_rate": 0.0003228349057485328, "loss": 4.4832, "step": 665 }, { "epoch": 0.21, "learning_rate": 0.0003227992766892955, "loss": 4.5016, "step": 666 }, { "epoch": 0.21, "learning_rate": 0.00032276364763005836, "loss": 4.7042, "step": 667 }, { "epoch": 0.21, "learning_rate": 0.0003227280185708211, "loss": 4.4653, "step": 668 }, { "epoch": 0.21, "learning_rate": 0.00032269238951158387, "loss": 4.3768, "step": 669 }, { "epoch": 0.21, "learning_rate": 0.00032265676045234665, "loss": 4.6229, "step": 670 }, { "epoch": 0.21, "learning_rate": 0.00032262113139310943, "loss": 4.5951, "step": 671 }, { "epoch": 0.21, "learning_rate": 0.0003225855023338722, "loss": 4.5205, "step": 672 }, { "epoch": 0.21, "learning_rate": 0.00032254987327463494, "loss": 4.4447, "step": 673 }, { "epoch": 0.21, "learning_rate": 0.0003225142442153977, "loss": 4.739, "step": 674 }, { "epoch": 0.21, "learning_rate": 0.0003224786151561605, "loss": 4.4509, "step": 675 }, { "epoch": 0.21, "learning_rate": 0.0003224429860969233, "loss": 4.7115, "step": 676 }, { "epoch": 0.21, "learning_rate": 0.000322407357037686, "loss": 4.3424, "step": 677 }, { "epoch": 0.21, "learning_rate": 0.0003223717279784488, "loss": 4.4603, "step": 678 }, { "epoch": 0.21, "learning_rate": 0.0003223360989192116, "loss": 4.614, "step": 679 }, { "epoch": 0.21, "learning_rate": 0.00032230046985997436, "loss": 4.4052, "step": 680 }, { "epoch": 0.21, "learning_rate": 0.00032226484080073714, "loss": 4.7224, "step": 681 }, { "epoch": 0.21, "learning_rate": 0.00032222921174149987, "loss": 4.4315, "step": 682 }, { "epoch": 0.21, "learning_rate": 0.00032219358268226265, "loss": 4.798, "step": 683 }, { "epoch": 0.21, "learning_rate": 0.00032215795362302543, "loss": 4.1718, "step": 684 }, { "epoch": 0.21, "learning_rate": 0.00032212232456378816, "loss": 4.2697, "step": 685 }, { "epoch": 0.21, "learning_rate": 0.000322086695504551, "loss": 4.4247, "step": 686 }, { "epoch": 0.21, "learning_rate": 0.0003220510664453137, "loss": 4.3495, "step": 687 }, { "epoch": 0.21, "learning_rate": 0.0003220154373860765, "loss": 4.084, "step": 688 }, { "epoch": 0.21, "learning_rate": 0.0003219798083268393, "loss": 4.0288, "step": 689 }, { "epoch": 0.21, "learning_rate": 0.00032194417926760207, "loss": 4.0901, "step": 690 }, { "epoch": 0.21, "learning_rate": 0.00032190855020836485, "loss": 4.0617, "step": 691 }, { "epoch": 0.21, "learning_rate": 0.0003218729211491276, "loss": 4.0171, "step": 692 }, { "epoch": 0.21, "learning_rate": 0.00032183729208989036, "loss": 4.438, "step": 693 }, { "epoch": 0.21, "learning_rate": 0.00032180166303065314, "loss": 3.9928, "step": 694 }, { "epoch": 0.21, "learning_rate": 0.0003217660339714159, "loss": 3.9308, "step": 695 }, { "epoch": 0.21, "learning_rate": 0.0003217304049121787, "loss": 3.8863, "step": 696 }, { "epoch": 0.22, "learning_rate": 0.00032169477585294144, "loss": 3.7087, "step": 697 }, { "epoch": 0.22, "learning_rate": 0.0003216591467937042, "loss": 3.6466, "step": 698 }, { "epoch": 0.22, "learning_rate": 0.000321623517734467, "loss": 3.4242, "step": 699 }, { "epoch": 0.22, "learning_rate": 0.0003215878886752298, "loss": 3.3662, "step": 700 }, { "epoch": 0.22, "learning_rate": 0.00032155225961599256, "loss": 6.47, "step": 701 }, { "epoch": 0.22, "learning_rate": 0.0003215166305567553, "loss": 6.4183, "step": 702 }, { "epoch": 0.22, "learning_rate": 0.0003214810014975181, "loss": 6.0522, "step": 703 }, { "epoch": 0.22, "learning_rate": 0.00032144537243828085, "loss": 5.6719, "step": 704 }, { "epoch": 0.22, "learning_rate": 0.00032140974337904364, "loss": 5.5593, "step": 705 }, { "epoch": 0.22, "learning_rate": 0.0003213741143198064, "loss": 5.3021, "step": 706 }, { "epoch": 0.22, "learning_rate": 0.00032133848526056915, "loss": 5.6657, "step": 707 }, { "epoch": 0.22, "learning_rate": 0.00032130285620133193, "loss": 4.9884, "step": 708 }, { "epoch": 0.22, "learning_rate": 0.0003212672271420947, "loss": 4.9615, "step": 709 }, { "epoch": 0.22, "learning_rate": 0.0003212315980828575, "loss": 4.8835, "step": 710 }, { "epoch": 0.22, "learning_rate": 0.0003211959690236202, "loss": 5.0906, "step": 711 }, { "epoch": 0.22, "learning_rate": 0.000321160339964383, "loss": 4.816, "step": 712 }, { "epoch": 0.22, "learning_rate": 0.0003211247109051458, "loss": 4.7756, "step": 713 }, { "epoch": 0.22, "learning_rate": 0.00032108908184590857, "loss": 5.1242, "step": 714 }, { "epoch": 0.22, "learning_rate": 0.00032105345278667135, "loss": 5.1941, "step": 715 }, { "epoch": 0.22, "learning_rate": 0.0003210178237274341, "loss": 4.8952, "step": 716 }, { "epoch": 0.22, "learning_rate": 0.00032098219466819686, "loss": 4.5833, "step": 717 }, { "epoch": 0.22, "learning_rate": 0.00032094656560895964, "loss": 4.9627, "step": 718 }, { "epoch": 0.22, "learning_rate": 0.0003209109365497224, "loss": 4.6861, "step": 719 }, { "epoch": 0.22, "learning_rate": 0.0003208753074904852, "loss": 4.806, "step": 720 }, { "epoch": 0.22, "learning_rate": 0.00032083967843124793, "loss": 4.6327, "step": 721 }, { "epoch": 0.22, "learning_rate": 0.00032080404937201077, "loss": 4.7109, "step": 722 }, { "epoch": 0.22, "learning_rate": 0.0003207684203127735, "loss": 4.7094, "step": 723 }, { "epoch": 0.22, "learning_rate": 0.0003207327912535363, "loss": 4.6903, "step": 724 }, { "epoch": 0.22, "learning_rate": 0.00032069716219429906, "loss": 4.7569, "step": 725 }, { "epoch": 0.22, "learning_rate": 0.0003206615331350618, "loss": 4.649, "step": 726 }, { "epoch": 0.22, "learning_rate": 0.0003206259040758246, "loss": 4.5057, "step": 727 }, { "epoch": 0.22, "learning_rate": 0.00032059027501658735, "loss": 4.5998, "step": 728 }, { "epoch": 0.22, "learning_rate": 0.00032055464595735013, "loss": 4.453, "step": 729 }, { "epoch": 0.23, "learning_rate": 0.0003205190168981129, "loss": 4.6019, "step": 730 }, { "epoch": 0.23, "learning_rate": 0.00032048338783887564, "loss": 4.4339, "step": 731 }, { "epoch": 0.23, "learning_rate": 0.0003204477587796384, "loss": 4.7953, "step": 732 }, { "epoch": 0.23, "learning_rate": 0.0003204121297204012, "loss": 4.2888, "step": 733 }, { "epoch": 0.23, "learning_rate": 0.000320376500661164, "loss": 4.5335, "step": 734 }, { "epoch": 0.23, "learning_rate": 0.00032034087160192677, "loss": 4.2448, "step": 735 }, { "epoch": 0.23, "learning_rate": 0.0003203052425426895, "loss": 4.3432, "step": 736 }, { "epoch": 0.23, "learning_rate": 0.0003202696134834523, "loss": 4.2657, "step": 737 }, { "epoch": 0.23, "learning_rate": 0.00032023398442421506, "loss": 4.2923, "step": 738 }, { "epoch": 0.23, "learning_rate": 0.00032019835536497784, "loss": 4.0112, "step": 739 }, { "epoch": 0.23, "learning_rate": 0.0003201627263057406, "loss": 4.4751, "step": 740 }, { "epoch": 0.23, "learning_rate": 0.0003201270972465034, "loss": 4.2462, "step": 741 }, { "epoch": 0.23, "learning_rate": 0.00032009146818726613, "loss": 4.0629, "step": 742 }, { "epoch": 0.23, "learning_rate": 0.0003200558391280289, "loss": 3.9804, "step": 743 }, { "epoch": 0.23, "learning_rate": 0.0003200202100687917, "loss": 3.7141, "step": 744 }, { "epoch": 0.23, "learning_rate": 0.0003199845810095544, "loss": 3.7757, "step": 745 }, { "epoch": 0.23, "learning_rate": 0.00031994895195031726, "loss": 3.5529, "step": 746 }, { "epoch": 0.23, "learning_rate": 0.00031991332289108, "loss": 3.6752, "step": 747 }, { "epoch": 0.23, "learning_rate": 0.00031987769383184277, "loss": 3.8655, "step": 748 }, { "epoch": 0.23, "learning_rate": 0.00031984206477260555, "loss": 3.4067, "step": 749 }, { "epoch": 0.23, "learning_rate": 0.0003198064357133683, "loss": 3.1073, "step": 750 }, { "epoch": 0.23, "learning_rate": 0.0003197708066541311, "loss": 6.4978, "step": 751 }, { "epoch": 0.23, "learning_rate": 0.00031973517759489384, "loss": 5.5928, "step": 752 }, { "epoch": 0.23, "learning_rate": 0.0003196995485356566, "loss": 5.4286, "step": 753 }, { "epoch": 0.23, "learning_rate": 0.0003196639194764194, "loss": 5.0086, "step": 754 }, { "epoch": 0.23, "learning_rate": 0.00031962829041718213, "loss": 5.1504, "step": 755 }, { "epoch": 0.23, "learning_rate": 0.00031959266135794497, "loss": 5.0985, "step": 756 }, { "epoch": 0.23, "learning_rate": 0.0003195570322987077, "loss": 5.0739, "step": 757 }, { "epoch": 0.23, "learning_rate": 0.0003195214032394705, "loss": 4.6801, "step": 758 }, { "epoch": 0.23, "learning_rate": 0.00031948577418023326, "loss": 4.8236, "step": 759 }, { "epoch": 0.23, "learning_rate": 0.000319450145120996, "loss": 4.9626, "step": 760 }, { "epoch": 0.23, "learning_rate": 0.0003194145160617588, "loss": 4.7853, "step": 761 }, { "epoch": 0.24, "learning_rate": 0.00031937888700252155, "loss": 4.7102, "step": 762 }, { "epoch": 0.24, "learning_rate": 0.00031934325794328433, "loss": 4.9914, "step": 763 }, { "epoch": 0.24, "learning_rate": 0.0003193076288840471, "loss": 4.9741, "step": 764 }, { "epoch": 0.24, "learning_rate": 0.0003192719998248099, "loss": 4.5619, "step": 765 }, { "epoch": 0.24, "learning_rate": 0.0003192363707655726, "loss": 4.8508, "step": 766 }, { "epoch": 0.24, "learning_rate": 0.0003192007417063354, "loss": 4.9575, "step": 767 }, { "epoch": 0.24, "learning_rate": 0.0003191651126470982, "loss": 4.8022, "step": 768 }, { "epoch": 0.24, "learning_rate": 0.00031912948358786097, "loss": 4.7418, "step": 769 }, { "epoch": 0.24, "learning_rate": 0.00031909385452862375, "loss": 4.8474, "step": 770 }, { "epoch": 0.24, "learning_rate": 0.0003190582254693865, "loss": 4.8716, "step": 771 }, { "epoch": 0.24, "learning_rate": 0.00031902259641014926, "loss": 4.6858, "step": 772 }, { "epoch": 0.24, "learning_rate": 0.00031898696735091205, "loss": 4.5442, "step": 773 }, { "epoch": 0.24, "learning_rate": 0.00031895133829167483, "loss": 4.4317, "step": 774 }, { "epoch": 0.24, "learning_rate": 0.0003189157092324376, "loss": 4.7171, "step": 775 }, { "epoch": 0.24, "learning_rate": 0.00031888008017320034, "loss": 4.7172, "step": 776 }, { "epoch": 0.24, "learning_rate": 0.0003188444511139631, "loss": 4.8377, "step": 777 }, { "epoch": 0.24, "learning_rate": 0.0003188088220547259, "loss": 4.6487, "step": 778 }, { "epoch": 0.24, "learning_rate": 0.00031877319299548863, "loss": 4.5178, "step": 779 }, { "epoch": 0.24, "learning_rate": 0.00031873756393625146, "loss": 4.541, "step": 780 }, { "epoch": 0.24, "learning_rate": 0.0003187019348770142, "loss": 4.5854, "step": 781 }, { "epoch": 0.24, "learning_rate": 0.000318666305817777, "loss": 4.6579, "step": 782 }, { "epoch": 0.24, "learning_rate": 0.00031863067675853976, "loss": 4.4028, "step": 783 }, { "epoch": 0.24, "learning_rate": 0.00031859504769930254, "loss": 4.3095, "step": 784 }, { "epoch": 0.24, "learning_rate": 0.0003185594186400653, "loss": 4.5231, "step": 785 }, { "epoch": 0.24, "learning_rate": 0.00031852378958082805, "loss": 4.2833, "step": 786 }, { "epoch": 0.24, "learning_rate": 0.00031848816052159083, "loss": 4.274, "step": 787 }, { "epoch": 0.24, "learning_rate": 0.0003184525314623536, "loss": 4.5985, "step": 788 }, { "epoch": 0.24, "learning_rate": 0.0003184169024031164, "loss": 5.0541, "step": 789 }, { "epoch": 0.24, "learning_rate": 0.0003183812733438792, "loss": 4.1923, "step": 790 }, { "epoch": 0.24, "learning_rate": 0.0003183456442846419, "loss": 4.1639, "step": 791 }, { "epoch": 0.24, "learning_rate": 0.0003183100152254047, "loss": 3.9991, "step": 792 }, { "epoch": 0.24, "learning_rate": 0.00031827438616616747, "loss": 4.0063, "step": 793 }, { "epoch": 0.25, "learning_rate": 0.00031823875710693025, "loss": 3.8333, "step": 794 }, { "epoch": 0.25, "learning_rate": 0.00031820312804769303, "loss": 4.1733, "step": 795 }, { "epoch": 0.25, "learning_rate": 0.00031816749898845576, "loss": 3.7356, "step": 796 }, { "epoch": 0.25, "learning_rate": 0.00031813186992921854, "loss": 3.8003, "step": 797 }, { "epoch": 0.25, "learning_rate": 0.0003180962408699813, "loss": 3.4117, "step": 798 }, { "epoch": 0.25, "learning_rate": 0.0003180606118107441, "loss": 3.2608, "step": 799 }, { "epoch": 0.25, "learning_rate": 0.00031802498275150683, "loss": 3.2906, "step": 800 }, { "epoch": 0.25, "learning_rate": 0.0003179893536922696, "loss": 6.6787, "step": 801 }, { "epoch": 0.25, "learning_rate": 0.0003179537246330324, "loss": 6.4735, "step": 802 }, { "epoch": 0.25, "learning_rate": 0.0003179180955737952, "loss": 5.8129, "step": 803 }, { "epoch": 0.25, "learning_rate": 0.00031788246651455796, "loss": 5.2917, "step": 804 }, { "epoch": 0.25, "learning_rate": 0.0003178468374553207, "loss": 5.4524, "step": 805 }, { "epoch": 0.25, "learning_rate": 0.00031781120839608347, "loss": 5.2113, "step": 806 }, { "epoch": 0.25, "learning_rate": 0.00031777557933684625, "loss": 5.1347, "step": 807 }, { "epoch": 0.25, "learning_rate": 0.00031773995027760903, "loss": 5.0582, "step": 808 }, { "epoch": 0.25, "learning_rate": 0.0003177043212183718, "loss": 5.0439, "step": 809 }, { "epoch": 0.25, "learning_rate": 0.00031766869215913454, "loss": 4.8616, "step": 810 }, { "epoch": 0.25, "learning_rate": 0.0003176330630998974, "loss": 4.4569, "step": 811 }, { "epoch": 0.25, "learning_rate": 0.0003175974340406601, "loss": 4.7055, "step": 812 }, { "epoch": 0.25, "learning_rate": 0.0003175618049814229, "loss": 4.9301, "step": 813 }, { "epoch": 0.25, "learning_rate": 0.00031752617592218567, "loss": 4.6795, "step": 814 }, { "epoch": 0.25, "learning_rate": 0.0003174905468629484, "loss": 4.866, "step": 815 }, { "epoch": 0.25, "learning_rate": 0.00031745491780371123, "loss": 4.6602, "step": 816 }, { "epoch": 0.25, "learning_rate": 0.00031741928874447396, "loss": 4.9522, "step": 817 }, { "epoch": 0.25, "learning_rate": 0.00031738365968523674, "loss": 5.0115, "step": 818 }, { "epoch": 0.25, "learning_rate": 0.0003173480306259995, "loss": 4.5117, "step": 819 }, { "epoch": 0.25, "learning_rate": 0.00031731240156676225, "loss": 4.6974, "step": 820 }, { "epoch": 0.25, "learning_rate": 0.0003172767725075251, "loss": 4.9883, "step": 821 }, { "epoch": 0.25, "learning_rate": 0.0003172411434482878, "loss": 4.4035, "step": 822 }, { "epoch": 0.25, "learning_rate": 0.0003172055143890506, "loss": 4.636, "step": 823 }, { "epoch": 0.25, "learning_rate": 0.0003171698853298134, "loss": 4.878, "step": 824 }, { "epoch": 0.25, "learning_rate": 0.0003171342562705761, "loss": 4.5198, "step": 825 }, { "epoch": 0.25, "learning_rate": 0.0003170986272113389, "loss": 4.4156, "step": 826 }, { "epoch": 0.26, "learning_rate": 0.00031706299815210167, "loss": 4.6267, "step": 827 }, { "epoch": 0.26, "learning_rate": 0.00031702736909286445, "loss": 4.778, "step": 828 }, { "epoch": 0.26, "learning_rate": 0.00031699174003362723, "loss": 4.5432, "step": 829 }, { "epoch": 0.26, "learning_rate": 0.00031695611097438996, "loss": 4.756, "step": 830 }, { "epoch": 0.26, "learning_rate": 0.00031692048191515274, "loss": 4.5705, "step": 831 }, { "epoch": 0.26, "learning_rate": 0.0003168848528559155, "loss": 4.5189, "step": 832 }, { "epoch": 0.26, "learning_rate": 0.0003168492237966783, "loss": 4.4518, "step": 833 }, { "epoch": 0.26, "learning_rate": 0.0003168135947374411, "loss": 4.33, "step": 834 }, { "epoch": 0.26, "learning_rate": 0.00031677796567820387, "loss": 4.3499, "step": 835 }, { "epoch": 0.26, "learning_rate": 0.0003167423366189666, "loss": 4.2432, "step": 836 }, { "epoch": 0.26, "learning_rate": 0.0003167067075597294, "loss": 4.5634, "step": 837 }, { "epoch": 0.26, "learning_rate": 0.00031667107850049216, "loss": 4.1849, "step": 838 }, { "epoch": 0.26, "learning_rate": 0.0003166354494412549, "loss": 4.1699, "step": 839 }, { "epoch": 0.26, "learning_rate": 0.0003165998203820177, "loss": 4.2506, "step": 840 }, { "epoch": 0.26, "learning_rate": 0.00031656419132278045, "loss": 4.2697, "step": 841 }, { "epoch": 0.26, "learning_rate": 0.00031652856226354324, "loss": 4.0719, "step": 842 }, { "epoch": 0.26, "learning_rate": 0.000316492933204306, "loss": 3.6326, "step": 843 }, { "epoch": 0.26, "learning_rate": 0.00031645730414506875, "loss": 3.5361, "step": 844 }, { "epoch": 0.26, "learning_rate": 0.0003164216750858316, "loss": 3.6797, "step": 845 }, { "epoch": 0.26, "learning_rate": 0.0003163860460265943, "loss": 3.7435, "step": 846 }, { "epoch": 0.26, "learning_rate": 0.0003163504169673571, "loss": 3.6399, "step": 847 }, { "epoch": 0.26, "learning_rate": 0.00031631478790811987, "loss": 3.6595, "step": 848 }, { "epoch": 0.26, "learning_rate": 0.0003162791588488826, "loss": 3.2975, "step": 849 }, { "epoch": 0.26, "learning_rate": 0.00031624352978964544, "loss": 3.052, "step": 850 }, { "epoch": 0.26, "learning_rate": 0.00031620790073040816, "loss": 6.6104, "step": 851 }, { "epoch": 0.26, "learning_rate": 0.00031617227167117095, "loss": 5.9987, "step": 852 }, { "epoch": 0.26, "learning_rate": 0.00031613664261193373, "loss": 5.9487, "step": 853 }, { "epoch": 0.26, "learning_rate": 0.00031610101355269646, "loss": 5.4157, "step": 854 }, { "epoch": 0.26, "learning_rate": 0.0003160653844934593, "loss": 5.2302, "step": 855 }, { "epoch": 0.26, "learning_rate": 0.000316029755434222, "loss": 5.1533, "step": 856 }, { "epoch": 0.26, "learning_rate": 0.0003159941263749848, "loss": 5.0236, "step": 857 }, { "epoch": 0.26, "learning_rate": 0.0003159584973157476, "loss": 4.9179, "step": 858 }, { "epoch": 0.27, "learning_rate": 0.00031592286825651036, "loss": 4.9565, "step": 859 }, { "epoch": 0.27, "learning_rate": 0.0003158872391972731, "loss": 4.7953, "step": 860 }, { "epoch": 0.27, "learning_rate": 0.0003158516101380359, "loss": 4.6231, "step": 861 }, { "epoch": 0.27, "learning_rate": 0.00031581598107879866, "loss": 4.9661, "step": 862 }, { "epoch": 0.27, "learning_rate": 0.00031578035201956144, "loss": 4.7249, "step": 863 }, { "epoch": 0.27, "learning_rate": 0.0003157447229603242, "loss": 4.8068, "step": 864 }, { "epoch": 0.27, "learning_rate": 0.00031570909390108695, "loss": 4.8051, "step": 865 }, { "epoch": 0.27, "learning_rate": 0.00031567346484184973, "loss": 4.8017, "step": 866 }, { "epoch": 0.27, "learning_rate": 0.0003156378357826125, "loss": 4.6119, "step": 867 }, { "epoch": 0.27, "learning_rate": 0.0003156022067233753, "loss": 4.7729, "step": 868 }, { "epoch": 0.27, "learning_rate": 0.0003155665776641381, "loss": 5.0054, "step": 869 }, { "epoch": 0.27, "learning_rate": 0.0003155309486049008, "loss": 4.8891, "step": 870 }, { "epoch": 0.27, "learning_rate": 0.0003154953195456636, "loss": 4.4381, "step": 871 }, { "epoch": 0.27, "learning_rate": 0.00031545969048642637, "loss": 4.5233, "step": 872 }, { "epoch": 0.27, "learning_rate": 0.0003154240614271891, "loss": 4.5199, "step": 873 }, { "epoch": 0.27, "learning_rate": 0.00031538843236795193, "loss": 5.0761, "step": 874 }, { "epoch": 0.27, "learning_rate": 0.00031535280330871466, "loss": 4.5819, "step": 875 }, { "epoch": 0.27, "learning_rate": 0.00031531717424947744, "loss": 4.7146, "step": 876 }, { "epoch": 0.27, "learning_rate": 0.0003152815451902402, "loss": 4.5888, "step": 877 }, { "epoch": 0.27, "learning_rate": 0.000315245916131003, "loss": 4.4594, "step": 878 }, { "epoch": 0.27, "learning_rate": 0.0003152102870717658, "loss": 4.5146, "step": 879 }, { "epoch": 0.27, "learning_rate": 0.0003151746580125285, "loss": 4.2043, "step": 880 }, { "epoch": 0.27, "learning_rate": 0.0003151390289532913, "loss": 4.3887, "step": 881 }, { "epoch": 0.27, "learning_rate": 0.0003151033998940541, "loss": 4.3032, "step": 882 }, { "epoch": 0.27, "learning_rate": 0.00031506777083481686, "loss": 4.313, "step": 883 }, { "epoch": 0.27, "learning_rate": 0.00031503214177557964, "loss": 4.2478, "step": 884 }, { "epoch": 0.27, "learning_rate": 0.00031499651271634237, "loss": 4.2393, "step": 885 }, { "epoch": 0.27, "learning_rate": 0.00031496088365710515, "loss": 3.9922, "step": 886 }, { "epoch": 0.27, "learning_rate": 0.00031492525459786793, "loss": 4.4679, "step": 887 }, { "epoch": 0.27, "learning_rate": 0.0003148896255386307, "loss": 4.2588, "step": 888 }, { "epoch": 0.27, "learning_rate": 0.0003148539964793935, "loss": 4.3304, "step": 889 }, { "epoch": 0.27, "learning_rate": 0.0003148183674201562, "loss": 4.0801, "step": 890 }, { "epoch": 0.27, "learning_rate": 0.000314782738360919, "loss": 4.0099, "step": 891 }, { "epoch": 0.28, "learning_rate": 0.0003147471093016818, "loss": 3.8702, "step": 892 }, { "epoch": 0.28, "learning_rate": 0.00031471148024244457, "loss": 4.2203, "step": 893 }, { "epoch": 0.28, "learning_rate": 0.0003146758511832073, "loss": 3.7047, "step": 894 }, { "epoch": 0.28, "learning_rate": 0.0003146402221239701, "loss": 3.8448, "step": 895 }, { "epoch": 0.28, "learning_rate": 0.00031460459306473286, "loss": 3.4714, "step": 896 }, { "epoch": 0.28, "learning_rate": 0.00031456896400549564, "loss": 3.2731, "step": 897 }, { "epoch": 0.28, "learning_rate": 0.0003145333349462584, "loss": 3.4538, "step": 898 }, { "epoch": 0.28, "learning_rate": 0.00031449770588702115, "loss": 3.4279, "step": 899 }, { "epoch": 0.28, "learning_rate": 0.00031446207682778393, "loss": 3.1156, "step": 900 }, { "epoch": 0.28, "learning_rate": 0.0003144264477685467, "loss": 6.2662, "step": 901 }, { "epoch": 0.28, "learning_rate": 0.0003143908187093095, "loss": 5.6305, "step": 902 }, { "epoch": 0.28, "learning_rate": 0.0003143551896500723, "loss": 5.3684, "step": 903 }, { "epoch": 0.28, "learning_rate": 0.000314319560590835, "loss": 5.3337, "step": 904 }, { "epoch": 0.28, "learning_rate": 0.00031428393153159784, "loss": 5.0379, "step": 905 }, { "epoch": 0.28, "learning_rate": 0.00031424830247236057, "loss": 4.8474, "step": 906 }, { "epoch": 0.28, "learning_rate": 0.00031421267341312335, "loss": 5.2066, "step": 907 }, { "epoch": 0.28, "learning_rate": 0.00031417704435388613, "loss": 4.9112, "step": 908 }, { "epoch": 0.28, "learning_rate": 0.00031414141529464886, "loss": 4.9829, "step": 909 }, { "epoch": 0.28, "learning_rate": 0.0003141057862354117, "loss": 4.7379, "step": 910 }, { "epoch": 0.28, "learning_rate": 0.0003140701571761744, "loss": 4.7751, "step": 911 }, { "epoch": 0.28, "learning_rate": 0.0003140345281169372, "loss": 4.7586, "step": 912 }, { "epoch": 0.28, "learning_rate": 0.0003139988990577, "loss": 4.8118, "step": 913 }, { "epoch": 0.28, "learning_rate": 0.0003139632699984627, "loss": 4.9603, "step": 914 }, { "epoch": 0.28, "learning_rate": 0.00031392764093922555, "loss": 5.0008, "step": 915 }, { "epoch": 0.28, "learning_rate": 0.0003138920118799883, "loss": 4.9402, "step": 916 }, { "epoch": 0.28, "learning_rate": 0.00031385638282075106, "loss": 4.6815, "step": 917 }, { "epoch": 0.28, "learning_rate": 0.00031382075376151385, "loss": 4.7787, "step": 918 }, { "epoch": 0.28, "learning_rate": 0.00031378512470227657, "loss": 4.7789, "step": 919 }, { "epoch": 0.28, "learning_rate": 0.00031374949564303935, "loss": 4.9569, "step": 920 }, { "epoch": 0.28, "learning_rate": 0.00031371386658380214, "loss": 4.8704, "step": 921 }, { "epoch": 0.28, "learning_rate": 0.0003136782375245649, "loss": 4.6449, "step": 922 }, { "epoch": 0.28, "learning_rate": 0.0003136426084653277, "loss": 4.6035, "step": 923 }, { "epoch": 0.29, "learning_rate": 0.00031360697940609043, "loss": 4.8499, "step": 924 }, { "epoch": 0.29, "learning_rate": 0.0003135713503468532, "loss": 4.6556, "step": 925 }, { "epoch": 0.29, "learning_rate": 0.000313535721287616, "loss": 4.73, "step": 926 }, { "epoch": 0.29, "learning_rate": 0.0003135000922283788, "loss": 4.7274, "step": 927 }, { "epoch": 0.29, "learning_rate": 0.0003134644631691415, "loss": 4.9487, "step": 928 }, { "epoch": 0.29, "learning_rate": 0.00031342883410990434, "loss": 4.3901, "step": 929 }, { "epoch": 0.29, "learning_rate": 0.00031339320505066706, "loss": 4.5403, "step": 930 }, { "epoch": 0.29, "learning_rate": 0.00031335757599142985, "loss": 4.2667, "step": 931 }, { "epoch": 0.29, "learning_rate": 0.00031332194693219263, "loss": 4.6732, "step": 932 }, { "epoch": 0.29, "learning_rate": 0.00031328631787295536, "loss": 4.5, "step": 933 }, { "epoch": 0.29, "learning_rate": 0.0003132506888137182, "loss": 4.7049, "step": 934 }, { "epoch": 0.29, "learning_rate": 0.0003132150597544809, "loss": 4.2075, "step": 935 }, { "epoch": 0.29, "learning_rate": 0.0003131794306952437, "loss": 4.3334, "step": 936 }, { "epoch": 0.29, "learning_rate": 0.0003131438016360065, "loss": 4.2938, "step": 937 }, { "epoch": 0.29, "learning_rate": 0.0003131081725767692, "loss": 4.3269, "step": 938 }, { "epoch": 0.29, "learning_rate": 0.00031307254351753205, "loss": 4.1757, "step": 939 }, { "epoch": 0.29, "learning_rate": 0.0003130369144582948, "loss": 4.0869, "step": 940 }, { "epoch": 0.29, "learning_rate": 0.00031300128539905756, "loss": 4.16, "step": 941 }, { "epoch": 0.29, "learning_rate": 0.00031296565633982034, "loss": 4.0857, "step": 942 }, { "epoch": 0.29, "learning_rate": 0.00031293002728058307, "loss": 3.875, "step": 943 }, { "epoch": 0.29, "learning_rate": 0.0003128943982213459, "loss": 4.0039, "step": 944 }, { "epoch": 0.29, "learning_rate": 0.00031285876916210863, "loss": 3.905, "step": 945 }, { "epoch": 0.29, "learning_rate": 0.0003128231401028714, "loss": 3.5325, "step": 946 }, { "epoch": 0.29, "learning_rate": 0.0003127875110436342, "loss": 3.5509, "step": 947 }, { "epoch": 0.29, "learning_rate": 0.000312751881984397, "loss": 3.6116, "step": 948 }, { "epoch": 0.29, "learning_rate": 0.00031271625292515976, "loss": 3.2935, "step": 949 }, { "epoch": 0.29, "learning_rate": 0.0003126806238659225, "loss": 3.1373, "step": 950 }, { "epoch": 0.29, "learning_rate": 0.00031264499480668527, "loss": 6.1515, "step": 951 }, { "epoch": 0.29, "learning_rate": 0.00031260936574744805, "loss": 5.8838, "step": 952 }, { "epoch": 0.29, "learning_rate": 0.00031257373668821083, "loss": 5.7219, "step": 953 }, { "epoch": 0.29, "learning_rate": 0.00031253810762897356, "loss": 5.3645, "step": 954 }, { "epoch": 0.29, "learning_rate": 0.00031250247856973634, "loss": 5.3646, "step": 955 }, { "epoch": 0.3, "learning_rate": 0.0003124668495104991, "loss": 5.2483, "step": 956 }, { "epoch": 0.3, "learning_rate": 0.0003124312204512619, "loss": 5.0023, "step": 957 }, { "epoch": 0.3, "learning_rate": 0.0003123955913920247, "loss": 5.2127, "step": 958 }, { "epoch": 0.3, "learning_rate": 0.0003123599623327874, "loss": 4.9758, "step": 959 }, { "epoch": 0.3, "learning_rate": 0.0003123243332735502, "loss": 4.6177, "step": 960 }, { "epoch": 0.3, "learning_rate": 0.000312288704214313, "loss": 4.5279, "step": 961 }, { "epoch": 0.3, "learning_rate": 0.0003122530751550757, "loss": 4.9342, "step": 962 }, { "epoch": 0.3, "learning_rate": 0.00031221744609583854, "loss": 4.8577, "step": 963 }, { "epoch": 0.3, "learning_rate": 0.00031218181703660127, "loss": 4.7622, "step": 964 }, { "epoch": 0.3, "learning_rate": 0.00031214618797736405, "loss": 4.8761, "step": 965 }, { "epoch": 0.3, "learning_rate": 0.00031211055891812683, "loss": 4.7664, "step": 966 }, { "epoch": 0.3, "learning_rate": 0.00031207492985888956, "loss": 4.9797, "step": 967 }, { "epoch": 0.3, "learning_rate": 0.0003120393007996524, "loss": 4.7389, "step": 968 }, { "epoch": 0.3, "learning_rate": 0.0003120036717404151, "loss": 4.7964, "step": 969 }, { "epoch": 0.3, "learning_rate": 0.0003119680426811779, "loss": 4.4982, "step": 970 }, { "epoch": 0.3, "learning_rate": 0.0003119324136219407, "loss": 4.4666, "step": 971 }, { "epoch": 0.3, "learning_rate": 0.00031189678456270347, "loss": 4.9018, "step": 972 }, { "epoch": 0.3, "learning_rate": 0.00031186115550346625, "loss": 4.6384, "step": 973 }, { "epoch": 0.3, "learning_rate": 0.000311825526444229, "loss": 4.6017, "step": 974 }, { "epoch": 0.3, "learning_rate": 0.00031178989738499176, "loss": 4.6073, "step": 975 }, { "epoch": 0.3, "learning_rate": 0.00031175426832575454, "loss": 4.5389, "step": 976 }, { "epoch": 0.3, "learning_rate": 0.0003117186392665173, "loss": 4.2345, "step": 977 }, { "epoch": 0.3, "learning_rate": 0.0003116830102072801, "loss": 4.5278, "step": 978 }, { "epoch": 0.3, "learning_rate": 0.00031164738114804283, "loss": 4.4742, "step": 979 }, { "epoch": 0.3, "learning_rate": 0.0003116117520888056, "loss": 4.583, "step": 980 }, { "epoch": 0.3, "learning_rate": 0.0003115761230295684, "loss": 4.5069, "step": 981 }, { "epoch": 0.3, "learning_rate": 0.0003115404939703312, "loss": 4.4788, "step": 982 }, { "epoch": 0.3, "learning_rate": 0.00031150486491109396, "loss": 4.5548, "step": 983 }, { "epoch": 0.3, "learning_rate": 0.0003114692358518567, "loss": 4.071, "step": 984 }, { "epoch": 0.3, "learning_rate": 0.00031143360679261947, "loss": 4.3909, "step": 985 }, { "epoch": 0.3, "learning_rate": 0.00031139797773338225, "loss": 4.1309, "step": 986 }, { "epoch": 0.3, "learning_rate": 0.00031136234867414504, "loss": 4.0651, "step": 987 }, { "epoch": 0.3, "learning_rate": 0.00031132671961490776, "loss": 3.8988, "step": 988 }, { "epoch": 0.31, "learning_rate": 0.00031129109055567055, "loss": 4.2998, "step": 989 }, { "epoch": 0.31, "learning_rate": 0.0003112554614964333, "loss": 3.8281, "step": 990 }, { "epoch": 0.31, "learning_rate": 0.0003112198324371961, "loss": 4.1739, "step": 991 }, { "epoch": 0.31, "learning_rate": 0.0003111842033779589, "loss": 4.2605, "step": 992 }, { "epoch": 0.31, "learning_rate": 0.0003111485743187216, "loss": 3.9964, "step": 993 }, { "epoch": 0.31, "learning_rate": 0.0003111129452594844, "loss": 3.8471, "step": 994 }, { "epoch": 0.31, "learning_rate": 0.0003110773162002472, "loss": 3.853, "step": 995 }, { "epoch": 0.31, "learning_rate": 0.00031104168714100996, "loss": 3.6185, "step": 996 }, { "epoch": 0.31, "learning_rate": 0.00031100605808177275, "loss": 3.3647, "step": 997 }, { "epoch": 0.31, "learning_rate": 0.0003109704290225355, "loss": 3.5623, "step": 998 }, { "epoch": 0.31, "learning_rate": 0.0003109347999632983, "loss": 3.0593, "step": 999 }, { "epoch": 0.31, "learning_rate": 0.00031089917090406104, "loss": 3.0735, "step": 1000 }, { "epoch": 0.31, "eval_bleu": 2.4961869532998874e-13, "eval_loss": 5.599357604980469, "eval_runtime": 2585.0388, "eval_samples_per_second": 5.71, "eval_steps_per_second": 0.714, "step": 1000 }, { "epoch": 0.31, "learning_rate": 0.0003108635418448238, "loss": 6.6653, "step": 1001 }, { "epoch": 0.31, "learning_rate": 0.0003108279127855866, "loss": 5.993, "step": 1002 }, { "epoch": 0.31, "learning_rate": 0.00031079228372634933, "loss": 5.2619, "step": 1003 }, { "epoch": 0.31, "learning_rate": 0.00031075665466711216, "loss": 5.315, "step": 1004 }, { "epoch": 0.31, "learning_rate": 0.0003107210256078749, "loss": 5.2509, "step": 1005 }, { "epoch": 0.31, "learning_rate": 0.0003106853965486377, "loss": 5.0751, "step": 1006 }, { "epoch": 0.31, "learning_rate": 0.00031064976748940046, "loss": 5.0152, "step": 1007 }, { "epoch": 0.31, "learning_rate": 0.0003106141384301632, "loss": 4.8883, "step": 1008 }, { "epoch": 0.31, "learning_rate": 0.00031057850937092597, "loss": 4.9054, "step": 1009 }, { "epoch": 0.31, "learning_rate": 0.00031054288031168875, "loss": 5.1369, "step": 1010 }, { "epoch": 0.31, "learning_rate": 0.00031050725125245153, "loss": 4.8128, "step": 1011 }, { "epoch": 0.31, "learning_rate": 0.0003104716221932143, "loss": 4.5203, "step": 1012 }, { "epoch": 0.31, "learning_rate": 0.00031043599313397704, "loss": 4.4711, "step": 1013 }, { "epoch": 0.31, "learning_rate": 0.0003104003640747398, "loss": 4.6929, "step": 1014 }, { "epoch": 0.31, "learning_rate": 0.0003103647350155026, "loss": 4.7625, "step": 1015 }, { "epoch": 0.31, "learning_rate": 0.0003103291059562654, "loss": 4.8906, "step": 1016 }, { "epoch": 0.31, "learning_rate": 0.00031029347689702817, "loss": 4.8319, "step": 1017 }, { "epoch": 0.31, "learning_rate": 0.0003102578478377909, "loss": 4.3231, "step": 1018 }, { "epoch": 0.31, "learning_rate": 0.0003102222187785537, "loss": 4.5903, "step": 1019 }, { "epoch": 0.31, "learning_rate": 0.00031018658971931646, "loss": 4.6172, "step": 1020 }, { "epoch": 0.32, "learning_rate": 0.00031015096066007924, "loss": 5.0053, "step": 1021 }, { "epoch": 0.32, "learning_rate": 0.00031011533160084197, "loss": 4.7422, "step": 1022 }, { "epoch": 0.32, "learning_rate": 0.0003100797025416048, "loss": 4.5341, "step": 1023 }, { "epoch": 0.32, "learning_rate": 0.00031004407348236753, "loss": 4.6354, "step": 1024 }, { "epoch": 0.32, "learning_rate": 0.0003100084444231303, "loss": 4.5176, "step": 1025 }, { "epoch": 0.32, "learning_rate": 0.0003099728153638931, "loss": 4.3475, "step": 1026 }, { "epoch": 0.32, "learning_rate": 0.0003099371863046558, "loss": 4.4205, "step": 1027 }, { "epoch": 0.32, "learning_rate": 0.00030990155724541866, "loss": 4.645, "step": 1028 }, { "epoch": 0.32, "learning_rate": 0.0003098659281861814, "loss": 4.6165, "step": 1029 }, { "epoch": 0.32, "learning_rate": 0.00030983029912694417, "loss": 4.5766, "step": 1030 }, { "epoch": 0.32, "learning_rate": 0.00030979467006770695, "loss": 4.4004, "step": 1031 }, { "epoch": 0.32, "learning_rate": 0.0003097590410084697, "loss": 4.6851, "step": 1032 }, { "epoch": 0.32, "learning_rate": 0.0003097234119492325, "loss": 4.4411, "step": 1033 }, { "epoch": 0.32, "learning_rate": 0.00030968778288999524, "loss": 4.406, "step": 1034 }, { "epoch": 0.32, "learning_rate": 0.000309652153830758, "loss": 4.3268, "step": 1035 }, { "epoch": 0.32, "learning_rate": 0.0003096165247715208, "loss": 5.0019, "step": 1036 }, { "epoch": 0.32, "learning_rate": 0.00030958089571228353, "loss": 4.3991, "step": 1037 }, { "epoch": 0.32, "learning_rate": 0.00030954526665304637, "loss": 4.2463, "step": 1038 }, { "epoch": 0.32, "learning_rate": 0.0003095096375938091, "loss": 4.3169, "step": 1039 }, { "epoch": 0.32, "learning_rate": 0.0003094740085345719, "loss": 4.1203, "step": 1040 }, { "epoch": 0.32, "learning_rate": 0.00030943837947533466, "loss": 4.3825, "step": 1041 }, { "epoch": 0.32, "learning_rate": 0.00030940275041609744, "loss": 4.0861, "step": 1042 }, { "epoch": 0.32, "learning_rate": 0.00030936712135686017, "loss": 4.1061, "step": 1043 }, { "epoch": 0.32, "learning_rate": 0.00030933149229762295, "loss": 3.6647, "step": 1044 }, { "epoch": 0.32, "learning_rate": 0.00030929586323838573, "loss": 3.6897, "step": 1045 }, { "epoch": 0.32, "learning_rate": 0.0003092602341791485, "loss": 3.6912, "step": 1046 }, { "epoch": 0.32, "learning_rate": 0.0003092246051199113, "loss": 3.6683, "step": 1047 }, { "epoch": 0.32, "learning_rate": 0.000309188976060674, "loss": 3.2097, "step": 1048 }, { "epoch": 0.32, "learning_rate": 0.0003091533470014368, "loss": 3.4005, "step": 1049 }, { "epoch": 0.32, "learning_rate": 0.0003091177179421996, "loss": 3.1207, "step": 1050 }, { "epoch": 0.32, "learning_rate": 0.00030908208888296237, "loss": 6.6301, "step": 1051 }, { "epoch": 0.32, "learning_rate": 0.00030904645982372515, "loss": 5.6808, "step": 1052 }, { "epoch": 0.32, "learning_rate": 0.0003090108307644879, "loss": 5.4759, "step": 1053 }, { "epoch": 0.33, "learning_rate": 0.00030897520170525066, "loss": 5.703, "step": 1054 }, { "epoch": 0.33, "learning_rate": 0.00030893957264601344, "loss": 5.3103, "step": 1055 }, { "epoch": 0.33, "learning_rate": 0.00030890394358677617, "loss": 5.0595, "step": 1056 }, { "epoch": 0.33, "learning_rate": 0.000308868314527539, "loss": 4.7962, "step": 1057 }, { "epoch": 0.33, "learning_rate": 0.00030883268546830174, "loss": 5.0354, "step": 1058 }, { "epoch": 0.33, "learning_rate": 0.0003087970564090645, "loss": 4.854, "step": 1059 }, { "epoch": 0.33, "learning_rate": 0.0003087614273498273, "loss": 4.6391, "step": 1060 }, { "epoch": 0.33, "learning_rate": 0.00030872579829059, "loss": 5.0223, "step": 1061 }, { "epoch": 0.33, "learning_rate": 0.00030869016923135286, "loss": 4.8927, "step": 1062 }, { "epoch": 0.33, "learning_rate": 0.0003086545401721156, "loss": 4.4353, "step": 1063 }, { "epoch": 0.33, "learning_rate": 0.00030861891111287837, "loss": 5.0053, "step": 1064 }, { "epoch": 0.33, "learning_rate": 0.00030858328205364115, "loss": 4.6891, "step": 1065 }, { "epoch": 0.33, "learning_rate": 0.00030854765299440394, "loss": 4.7675, "step": 1066 }, { "epoch": 0.33, "learning_rate": 0.0003085120239351667, "loss": 4.5319, "step": 1067 }, { "epoch": 0.33, "learning_rate": 0.00030847639487592945, "loss": 4.7498, "step": 1068 }, { "epoch": 0.33, "learning_rate": 0.00030844076581669223, "loss": 4.6845, "step": 1069 }, { "epoch": 0.33, "learning_rate": 0.000308405136757455, "loss": 4.4324, "step": 1070 }, { "epoch": 0.33, "learning_rate": 0.0003083695076982178, "loss": 4.9639, "step": 1071 }, { "epoch": 0.33, "learning_rate": 0.0003083338786389806, "loss": 4.58, "step": 1072 }, { "epoch": 0.33, "learning_rate": 0.0003082982495797433, "loss": 4.7139, "step": 1073 }, { "epoch": 0.33, "learning_rate": 0.0003082626205205061, "loss": 4.5709, "step": 1074 }, { "epoch": 0.33, "learning_rate": 0.00030822699146126886, "loss": 4.4785, "step": 1075 }, { "epoch": 0.33, "learning_rate": 0.00030819136240203165, "loss": 4.531, "step": 1076 }, { "epoch": 0.33, "learning_rate": 0.0003081557333427944, "loss": 4.3016, "step": 1077 }, { "epoch": 0.33, "learning_rate": 0.00030812010428355716, "loss": 4.5966, "step": 1078 }, { "epoch": 0.33, "learning_rate": 0.00030808447522431994, "loss": 4.2484, "step": 1079 }, { "epoch": 0.33, "learning_rate": 0.0003080488461650827, "loss": 4.741, "step": 1080 }, { "epoch": 0.33, "learning_rate": 0.0003080132171058455, "loss": 4.3773, "step": 1081 }, { "epoch": 0.33, "learning_rate": 0.00030797758804660823, "loss": 4.6006, "step": 1082 }, { "epoch": 0.33, "learning_rate": 0.000307941958987371, "loss": 4.3654, "step": 1083 }, { "epoch": 0.33, "learning_rate": 0.0003079063299281338, "loss": 4.1866, "step": 1084 }, { "epoch": 0.33, "learning_rate": 0.0003078707008688966, "loss": 4.2877, "step": 1085 }, { "epoch": 0.34, "learning_rate": 0.00030783507180965936, "loss": 4.6805, "step": 1086 }, { "epoch": 0.34, "learning_rate": 0.0003077994427504221, "loss": 4.3094, "step": 1087 }, { "epoch": 0.34, "learning_rate": 0.00030776381369118487, "loss": 4.1768, "step": 1088 }, { "epoch": 0.34, "learning_rate": 0.00030772818463194765, "loss": 4.1984, "step": 1089 }, { "epoch": 0.34, "learning_rate": 0.00030769255557271043, "loss": 4.3514, "step": 1090 }, { "epoch": 0.34, "learning_rate": 0.0003076569265134732, "loss": 4.148, "step": 1091 }, { "epoch": 0.34, "learning_rate": 0.00030762129745423594, "loss": 3.7951, "step": 1092 }, { "epoch": 0.34, "learning_rate": 0.0003075856683949988, "loss": 4.2785, "step": 1093 }, { "epoch": 0.34, "learning_rate": 0.0003075500393357615, "loss": 3.8842, "step": 1094 }, { "epoch": 0.34, "learning_rate": 0.0003075144102765243, "loss": 3.6878, "step": 1095 }, { "epoch": 0.34, "learning_rate": 0.00030747878121728707, "loss": 3.4125, "step": 1096 }, { "epoch": 0.34, "learning_rate": 0.0003074431521580498, "loss": 3.4207, "step": 1097 }, { "epoch": 0.34, "learning_rate": 0.00030740752309881263, "loss": 3.3905, "step": 1098 }, { "epoch": 0.34, "learning_rate": 0.00030737189403957536, "loss": 3.3195, "step": 1099 }, { "epoch": 0.34, "learning_rate": 0.00030733626498033814, "loss": 3.5056, "step": 1100 }, { "epoch": 0.34, "learning_rate": 0.0003073006359211009, "loss": 6.0964, "step": 1101 }, { "epoch": 0.34, "learning_rate": 0.00030726500686186365, "loss": 5.6064, "step": 1102 }, { "epoch": 0.34, "learning_rate": 0.00030722937780262643, "loss": 5.8648, "step": 1103 }, { "epoch": 0.34, "learning_rate": 0.0003071937487433892, "loss": 5.3246, "step": 1104 }, { "epoch": 0.34, "learning_rate": 0.000307158119684152, "loss": 4.8642, "step": 1105 }, { "epoch": 0.34, "learning_rate": 0.0003071224906249148, "loss": 5.1456, "step": 1106 }, { "epoch": 0.34, "learning_rate": 0.0003070868615656775, "loss": 5.1185, "step": 1107 }, { "epoch": 0.34, "learning_rate": 0.0003070512325064403, "loss": 5.0108, "step": 1108 }, { "epoch": 0.34, "learning_rate": 0.00030701560344720307, "loss": 4.745, "step": 1109 }, { "epoch": 0.34, "learning_rate": 0.00030697997438796585, "loss": 5.1636, "step": 1110 }, { "epoch": 0.34, "learning_rate": 0.00030694434532872863, "loss": 4.728, "step": 1111 }, { "epoch": 0.34, "learning_rate": 0.00030690871626949136, "loss": 4.7272, "step": 1112 }, { "epoch": 0.34, "learning_rate": 0.00030687308721025414, "loss": 4.8844, "step": 1113 }, { "epoch": 0.34, "learning_rate": 0.0003068374581510169, "loss": 4.8765, "step": 1114 }, { "epoch": 0.34, "learning_rate": 0.0003068018290917797, "loss": 4.8006, "step": 1115 }, { "epoch": 0.34, "learning_rate": 0.00030676620003254243, "loss": 4.6507, "step": 1116 }, { "epoch": 0.34, "learning_rate": 0.00030673057097330527, "loss": 4.4997, "step": 1117 }, { "epoch": 0.35, "learning_rate": 0.000306694941914068, "loss": 4.7253, "step": 1118 }, { "epoch": 0.35, "learning_rate": 0.0003066593128548308, "loss": 4.6503, "step": 1119 }, { "epoch": 0.35, "learning_rate": 0.00030662368379559356, "loss": 4.6227, "step": 1120 }, { "epoch": 0.35, "learning_rate": 0.0003065880547363563, "loss": 4.7162, "step": 1121 }, { "epoch": 0.35, "learning_rate": 0.0003065524256771191, "loss": 4.6373, "step": 1122 }, { "epoch": 0.35, "learning_rate": 0.00030651679661788185, "loss": 4.5818, "step": 1123 }, { "epoch": 0.35, "learning_rate": 0.00030648116755864463, "loss": 4.5671, "step": 1124 }, { "epoch": 0.35, "learning_rate": 0.0003064455384994074, "loss": 4.4879, "step": 1125 }, { "epoch": 0.35, "learning_rate": 0.00030640990944017014, "loss": 4.2044, "step": 1126 }, { "epoch": 0.35, "learning_rate": 0.000306374280380933, "loss": 4.6453, "step": 1127 }, { "epoch": 0.35, "learning_rate": 0.0003063386513216957, "loss": 4.4925, "step": 1128 }, { "epoch": 0.35, "learning_rate": 0.0003063030222624585, "loss": 4.4086, "step": 1129 }, { "epoch": 0.35, "learning_rate": 0.00030626739320322127, "loss": 4.7093, "step": 1130 }, { "epoch": 0.35, "learning_rate": 0.000306231764143984, "loss": 4.5648, "step": 1131 }, { "epoch": 0.35, "learning_rate": 0.00030619613508474684, "loss": 4.3671, "step": 1132 }, { "epoch": 0.35, "learning_rate": 0.00030616050602550956, "loss": 4.4897, "step": 1133 }, { "epoch": 0.35, "learning_rate": 0.00030612487696627234, "loss": 4.3607, "step": 1134 }, { "epoch": 0.35, "learning_rate": 0.0003060892479070351, "loss": 4.259, "step": 1135 }, { "epoch": 0.35, "learning_rate": 0.0003060536188477979, "loss": 4.4788, "step": 1136 }, { "epoch": 0.35, "learning_rate": 0.00030601798978856064, "loss": 3.9928, "step": 1137 }, { "epoch": 0.35, "learning_rate": 0.0003059823607293234, "loss": 3.9577, "step": 1138 }, { "epoch": 0.35, "learning_rate": 0.0003059467316700862, "loss": 4.0182, "step": 1139 }, { "epoch": 0.35, "learning_rate": 0.000305911102610849, "loss": 4.0482, "step": 1140 }, { "epoch": 0.35, "learning_rate": 0.00030587547355161176, "loss": 4.2836, "step": 1141 }, { "epoch": 0.35, "learning_rate": 0.0003058398444923745, "loss": 3.9542, "step": 1142 }, { "epoch": 0.35, "learning_rate": 0.0003058042154331373, "loss": 3.9558, "step": 1143 }, { "epoch": 0.35, "learning_rate": 0.00030576858637390006, "loss": 4.2283, "step": 1144 }, { "epoch": 0.35, "learning_rate": 0.00030573295731466284, "loss": 4.0596, "step": 1145 }, { "epoch": 0.35, "learning_rate": 0.0003056973282554256, "loss": 3.9759, "step": 1146 }, { "epoch": 0.35, "learning_rate": 0.00030566169919618835, "loss": 3.7101, "step": 1147 }, { "epoch": 0.35, "learning_rate": 0.00030562607013695113, "loss": 3.4012, "step": 1148 }, { "epoch": 0.35, "learning_rate": 0.0003055904410777139, "loss": 3.583, "step": 1149 }, { "epoch": 0.35, "learning_rate": 0.00030555481201847664, "loss": 3.093, "step": 1150 }, { "epoch": 0.36, "learning_rate": 0.0003055191829592395, "loss": 6.2242, "step": 1151 }, { "epoch": 0.36, "learning_rate": 0.0003054835539000022, "loss": 6.008, "step": 1152 }, { "epoch": 0.36, "learning_rate": 0.000305447924840765, "loss": 5.5076, "step": 1153 }, { "epoch": 0.36, "learning_rate": 0.00030541229578152777, "loss": 5.2832, "step": 1154 }, { "epoch": 0.36, "learning_rate": 0.0003053766667222905, "loss": 4.8887, "step": 1155 }, { "epoch": 0.36, "learning_rate": 0.00030534103766305333, "loss": 4.8274, "step": 1156 }, { "epoch": 0.36, "learning_rate": 0.00030530540860381606, "loss": 4.7412, "step": 1157 }, { "epoch": 0.36, "learning_rate": 0.00030526977954457884, "loss": 4.9053, "step": 1158 }, { "epoch": 0.36, "learning_rate": 0.0003052341504853416, "loss": 4.8952, "step": 1159 }, { "epoch": 0.36, "learning_rate": 0.0003051985214261044, "loss": 4.9197, "step": 1160 }, { "epoch": 0.36, "learning_rate": 0.0003051628923668672, "loss": 4.7081, "step": 1161 }, { "epoch": 0.36, "learning_rate": 0.0003051272633076299, "loss": 4.6475, "step": 1162 }, { "epoch": 0.36, "learning_rate": 0.0003050916342483927, "loss": 4.5716, "step": 1163 }, { "epoch": 0.36, "learning_rate": 0.0003050560051891555, "loss": 4.692, "step": 1164 }, { "epoch": 0.36, "learning_rate": 0.00030502037612991826, "loss": 4.4114, "step": 1165 }, { "epoch": 0.36, "learning_rate": 0.00030498474707068104, "loss": 4.7366, "step": 1166 }, { "epoch": 0.36, "learning_rate": 0.00030494911801144377, "loss": 4.5152, "step": 1167 }, { "epoch": 0.36, "learning_rate": 0.00030491348895220655, "loss": 4.5177, "step": 1168 }, { "epoch": 0.36, "learning_rate": 0.00030487785989296933, "loss": 4.7018, "step": 1169 }, { "epoch": 0.36, "learning_rate": 0.0003048422308337321, "loss": 4.7174, "step": 1170 }, { "epoch": 0.36, "learning_rate": 0.00030480660177449484, "loss": 4.2952, "step": 1171 }, { "epoch": 0.36, "learning_rate": 0.0003047709727152576, "loss": 4.527, "step": 1172 }, { "epoch": 0.36, "learning_rate": 0.0003047353436560204, "loss": 4.5972, "step": 1173 }, { "epoch": 0.36, "learning_rate": 0.0003046997145967832, "loss": 4.6169, "step": 1174 }, { "epoch": 0.36, "learning_rate": 0.00030466408553754597, "loss": 4.4527, "step": 1175 }, { "epoch": 0.36, "learning_rate": 0.0003046284564783087, "loss": 4.529, "step": 1176 }, { "epoch": 0.36, "learning_rate": 0.0003045928274190715, "loss": 4.5556, "step": 1177 }, { "epoch": 0.36, "learning_rate": 0.00030455719835983426, "loss": 4.3729, "step": 1178 }, { "epoch": 0.36, "learning_rate": 0.00030452156930059704, "loss": 4.4536, "step": 1179 }, { "epoch": 0.36, "learning_rate": 0.0003044859402413598, "loss": 4.1811, "step": 1180 }, { "epoch": 0.36, "learning_rate": 0.00030445031118212255, "loss": 4.7439, "step": 1181 }, { "epoch": 0.36, "learning_rate": 0.00030441468212288533, "loss": 4.3372, "step": 1182 }, { "epoch": 0.37, "learning_rate": 0.0003043790530636481, "loss": 4.0854, "step": 1183 }, { "epoch": 0.37, "learning_rate": 0.0003043434240044109, "loss": 4.413, "step": 1184 }, { "epoch": 0.37, "learning_rate": 0.0003043077949451737, "loss": 4.224, "step": 1185 }, { "epoch": 0.37, "learning_rate": 0.0003042721658859364, "loss": 4.1362, "step": 1186 }, { "epoch": 0.37, "learning_rate": 0.00030423653682669924, "loss": 4.2719, "step": 1187 }, { "epoch": 0.37, "learning_rate": 0.00030420090776746197, "loss": 4.0646, "step": 1188 }, { "epoch": 0.37, "learning_rate": 0.00030416527870822475, "loss": 4.3789, "step": 1189 }, { "epoch": 0.37, "learning_rate": 0.00030412964964898753, "loss": 3.9628, "step": 1190 }, { "epoch": 0.37, "learning_rate": 0.00030409402058975026, "loss": 4.2604, "step": 1191 }, { "epoch": 0.37, "learning_rate": 0.00030405839153051304, "loss": 3.632, "step": 1192 }, { "epoch": 0.37, "learning_rate": 0.0003040227624712758, "loss": 3.6616, "step": 1193 }, { "epoch": 0.37, "learning_rate": 0.0003039871334120386, "loss": 3.8303, "step": 1194 }, { "epoch": 0.37, "learning_rate": 0.0003039515043528014, "loss": 3.9684, "step": 1195 }, { "epoch": 0.37, "learning_rate": 0.0003039158752935641, "loss": 3.4594, "step": 1196 }, { "epoch": 0.37, "learning_rate": 0.0003038802462343269, "loss": 3.3668, "step": 1197 }, { "epoch": 0.37, "learning_rate": 0.0003038446171750897, "loss": 3.3288, "step": 1198 }, { "epoch": 0.37, "learning_rate": 0.00030380898811585246, "loss": 3.1448, "step": 1199 }, { "epoch": 0.37, "learning_rate": 0.00030377335905661524, "loss": 3.0514, "step": 1200 }, { "epoch": 0.37, "learning_rate": 0.00030373772999737797, "loss": 6.3367, "step": 1201 }, { "epoch": 0.37, "learning_rate": 0.00030370210093814075, "loss": 5.6882, "step": 1202 }, { "epoch": 0.37, "learning_rate": 0.00030366647187890354, "loss": 5.7425, "step": 1203 }, { "epoch": 0.37, "learning_rate": 0.0003036308428196663, "loss": 5.6542, "step": 1204 }, { "epoch": 0.37, "learning_rate": 0.00030359521376042904, "loss": 5.1333, "step": 1205 }, { "epoch": 0.37, "learning_rate": 0.0003035595847011919, "loss": 5.0806, "step": 1206 }, { "epoch": 0.37, "learning_rate": 0.0003035239556419546, "loss": 5.0461, "step": 1207 }, { "epoch": 0.37, "learning_rate": 0.0003034883265827174, "loss": 4.7644, "step": 1208 }, { "epoch": 0.37, "learning_rate": 0.00030345269752348017, "loss": 4.7942, "step": 1209 }, { "epoch": 0.37, "learning_rate": 0.0003034170684642429, "loss": 5.3676, "step": 1210 }, { "epoch": 0.37, "learning_rate": 0.00030338143940500574, "loss": 5.022, "step": 1211 }, { "epoch": 0.37, "learning_rate": 0.00030334581034576846, "loss": 4.7159, "step": 1212 }, { "epoch": 0.37, "learning_rate": 0.00030331018128653125, "loss": 4.8108, "step": 1213 }, { "epoch": 0.37, "learning_rate": 0.00030327455222729403, "loss": 4.8116, "step": 1214 }, { "epoch": 0.37, "learning_rate": 0.00030323892316805676, "loss": 4.435, "step": 1215 }, { "epoch": 0.38, "learning_rate": 0.0003032032941088196, "loss": 4.4862, "step": 1216 }, { "epoch": 0.38, "learning_rate": 0.0003031676650495823, "loss": 4.614, "step": 1217 }, { "epoch": 0.38, "learning_rate": 0.0003031320359903451, "loss": 4.6822, "step": 1218 }, { "epoch": 0.38, "learning_rate": 0.0003030964069311079, "loss": 4.4671, "step": 1219 }, { "epoch": 0.38, "learning_rate": 0.0003030607778718706, "loss": 4.4582, "step": 1220 }, { "epoch": 0.38, "learning_rate": 0.00030302514881263345, "loss": 4.7106, "step": 1221 }, { "epoch": 0.38, "learning_rate": 0.0003029895197533962, "loss": 4.9276, "step": 1222 }, { "epoch": 0.38, "learning_rate": 0.00030295389069415896, "loss": 4.485, "step": 1223 }, { "epoch": 0.38, "learning_rate": 0.00030291826163492174, "loss": 4.4922, "step": 1224 }, { "epoch": 0.38, "learning_rate": 0.00030288263257568447, "loss": 4.6685, "step": 1225 }, { "epoch": 0.38, "learning_rate": 0.0003028470035164473, "loss": 4.6698, "step": 1226 }, { "epoch": 0.38, "learning_rate": 0.00030281137445721003, "loss": 4.6098, "step": 1227 }, { "epoch": 0.38, "learning_rate": 0.0003027757453979728, "loss": 4.3242, "step": 1228 }, { "epoch": 0.38, "learning_rate": 0.0003027401163387356, "loss": 4.2578, "step": 1229 }, { "epoch": 0.38, "learning_rate": 0.0003027044872794984, "loss": 4.4822, "step": 1230 }, { "epoch": 0.38, "learning_rate": 0.0003026688582202611, "loss": 4.096, "step": 1231 }, { "epoch": 0.38, "learning_rate": 0.0003026332291610239, "loss": 4.5255, "step": 1232 }, { "epoch": 0.38, "learning_rate": 0.00030259760010178667, "loss": 4.0442, "step": 1233 }, { "epoch": 0.38, "learning_rate": 0.00030256197104254945, "loss": 4.2654, "step": 1234 }, { "epoch": 0.38, "learning_rate": 0.00030252634198331223, "loss": 4.4865, "step": 1235 }, { "epoch": 0.38, "learning_rate": 0.00030249071292407496, "loss": 4.1198, "step": 1236 }, { "epoch": 0.38, "learning_rate": 0.00030245508386483774, "loss": 4.1931, "step": 1237 }, { "epoch": 0.38, "learning_rate": 0.0003024194548056005, "loss": 4.5079, "step": 1238 }, { "epoch": 0.38, "learning_rate": 0.00030238382574636325, "loss": 3.8155, "step": 1239 }, { "epoch": 0.38, "learning_rate": 0.0003023481966871261, "loss": 3.8261, "step": 1240 }, { "epoch": 0.38, "learning_rate": 0.0003023125676278888, "loss": 4.0365, "step": 1241 }, { "epoch": 0.38, "learning_rate": 0.0003022769385686516, "loss": 4.0544, "step": 1242 }, { "epoch": 0.38, "learning_rate": 0.0003022413095094144, "loss": 4.0254, "step": 1243 }, { "epoch": 0.38, "learning_rate": 0.0003022056804501771, "loss": 3.801, "step": 1244 }, { "epoch": 0.38, "learning_rate": 0.00030217005139093994, "loss": 3.61, "step": 1245 }, { "epoch": 0.38, "learning_rate": 0.00030213442233170267, "loss": 3.4299, "step": 1246 }, { "epoch": 0.38, "learning_rate": 0.00030209879327246545, "loss": 3.747, "step": 1247 }, { "epoch": 0.39, "learning_rate": 0.00030206316421322823, "loss": 3.1419, "step": 1248 }, { "epoch": 0.39, "learning_rate": 0.000302027535153991, "loss": 3.211, "step": 1249 }, { "epoch": 0.39, "learning_rate": 0.0003019919060947538, "loss": 3.1184, "step": 1250 }, { "epoch": 0.39, "learning_rate": 0.0003019562770355165, "loss": 5.8578, "step": 1251 }, { "epoch": 0.39, "learning_rate": 0.0003019206479762793, "loss": 5.5349, "step": 1252 }, { "epoch": 0.39, "learning_rate": 0.0003018850189170421, "loss": 5.3148, "step": 1253 }, { "epoch": 0.39, "learning_rate": 0.00030184938985780487, "loss": 5.2813, "step": 1254 }, { "epoch": 0.39, "learning_rate": 0.00030181376079856765, "loss": 5.0839, "step": 1255 }, { "epoch": 0.39, "learning_rate": 0.0003017781317393304, "loss": 5.1541, "step": 1256 }, { "epoch": 0.39, "learning_rate": 0.00030174250268009316, "loss": 4.8838, "step": 1257 }, { "epoch": 0.39, "learning_rate": 0.00030170687362085594, "loss": 4.9196, "step": 1258 }, { "epoch": 0.39, "learning_rate": 0.0003016712445616187, "loss": 4.7085, "step": 1259 }, { "epoch": 0.39, "learning_rate": 0.0003016356155023815, "loss": 4.6361, "step": 1260 }, { "epoch": 0.39, "learning_rate": 0.00030159998644314423, "loss": 4.7814, "step": 1261 }, { "epoch": 0.39, "learning_rate": 0.000301564357383907, "loss": 4.7659, "step": 1262 }, { "epoch": 0.39, "learning_rate": 0.0003015287283246698, "loss": 4.5659, "step": 1263 }, { "epoch": 0.39, "learning_rate": 0.0003014930992654326, "loss": 4.5773, "step": 1264 }, { "epoch": 0.39, "learning_rate": 0.0003014574702061953, "loss": 4.7589, "step": 1265 }, { "epoch": 0.39, "learning_rate": 0.0003014218411469581, "loss": 4.8611, "step": 1266 }, { "epoch": 0.39, "learning_rate": 0.00030138621208772087, "loss": 4.3796, "step": 1267 }, { "epoch": 0.39, "learning_rate": 0.00030135058302848365, "loss": 4.6032, "step": 1268 }, { "epoch": 0.39, "learning_rate": 0.00030131495396924643, "loss": 4.847, "step": 1269 }, { "epoch": 0.39, "learning_rate": 0.00030127932491000916, "loss": 4.5729, "step": 1270 }, { "epoch": 0.39, "learning_rate": 0.00030124369585077194, "loss": 4.5536, "step": 1271 }, { "epoch": 0.39, "learning_rate": 0.0003012080667915347, "loss": 4.584, "step": 1272 }, { "epoch": 0.39, "learning_rate": 0.0003011724377322975, "loss": 4.7049, "step": 1273 }, { "epoch": 0.39, "learning_rate": 0.0003011368086730603, "loss": 4.8543, "step": 1274 }, { "epoch": 0.39, "learning_rate": 0.000301101179613823, "loss": 4.4413, "step": 1275 }, { "epoch": 0.39, "learning_rate": 0.0003010655505545858, "loss": 4.3694, "step": 1276 }, { "epoch": 0.39, "learning_rate": 0.0003010299214953486, "loss": 4.436, "step": 1277 }, { "epoch": 0.39, "learning_rate": 0.00030099429243611136, "loss": 4.2365, "step": 1278 }, { "epoch": 0.39, "learning_rate": 0.00030095866337687414, "loss": 4.292, "step": 1279 }, { "epoch": 0.4, "learning_rate": 0.00030092303431763687, "loss": 4.407, "step": 1280 }, { "epoch": 0.4, "learning_rate": 0.0003008874052583997, "loss": 4.5448, "step": 1281 }, { "epoch": 0.4, "learning_rate": 0.00030085177619916244, "loss": 4.5506, "step": 1282 }, { "epoch": 0.4, "learning_rate": 0.0003008161471399252, "loss": 4.2262, "step": 1283 }, { "epoch": 0.4, "learning_rate": 0.000300780518080688, "loss": 4.359, "step": 1284 }, { "epoch": 0.4, "learning_rate": 0.00030074488902145073, "loss": 4.3358, "step": 1285 }, { "epoch": 0.4, "learning_rate": 0.0003007092599622135, "loss": 4.217, "step": 1286 }, { "epoch": 0.4, "learning_rate": 0.0003006736309029763, "loss": 4.0225, "step": 1287 }, { "epoch": 0.4, "learning_rate": 0.0003006380018437391, "loss": 4.2107, "step": 1288 }, { "epoch": 0.4, "learning_rate": 0.00030060237278450186, "loss": 4.2255, "step": 1289 }, { "epoch": 0.4, "learning_rate": 0.0003005667437252646, "loss": 4.0149, "step": 1290 }, { "epoch": 0.4, "learning_rate": 0.00030053111466602736, "loss": 3.7913, "step": 1291 }, { "epoch": 0.4, "learning_rate": 0.00030049548560679015, "loss": 3.9904, "step": 1292 }, { "epoch": 0.4, "learning_rate": 0.00030045985654755293, "loss": 3.6373, "step": 1293 }, { "epoch": 0.4, "learning_rate": 0.0003004242274883157, "loss": 3.7541, "step": 1294 }, { "epoch": 0.4, "learning_rate": 0.00030038859842907844, "loss": 3.3555, "step": 1295 }, { "epoch": 0.4, "learning_rate": 0.0003003529693698412, "loss": 3.2242, "step": 1296 }, { "epoch": 0.4, "learning_rate": 0.000300317340310604, "loss": 3.3962, "step": 1297 }, { "epoch": 0.4, "learning_rate": 0.0003002817112513668, "loss": 3.0672, "step": 1298 }, { "epoch": 0.4, "learning_rate": 0.0003002460821921295, "loss": 3.2196, "step": 1299 }, { "epoch": 0.4, "learning_rate": 0.00030021045313289235, "loss": 2.8454, "step": 1300 }, { "epoch": 0.4, "learning_rate": 0.0003001748240736551, "loss": 6.1441, "step": 1301 }, { "epoch": 0.4, "learning_rate": 0.00030013919501441786, "loss": 5.9909, "step": 1302 }, { "epoch": 0.4, "learning_rate": 0.00030010356595518064, "loss": 5.3787, "step": 1303 }, { "epoch": 0.4, "learning_rate": 0.00030006793689594337, "loss": 5.157, "step": 1304 }, { "epoch": 0.4, "learning_rate": 0.0003000323078367062, "loss": 5.518, "step": 1305 }, { "epoch": 0.4, "learning_rate": 0.00029999667877746893, "loss": 4.9633, "step": 1306 }, { "epoch": 0.4, "learning_rate": 0.0002999610497182317, "loss": 5.153, "step": 1307 }, { "epoch": 0.4, "learning_rate": 0.0002999254206589945, "loss": 4.9611, "step": 1308 }, { "epoch": 0.4, "learning_rate": 0.0002998897915997572, "loss": 4.8557, "step": 1309 }, { "epoch": 0.4, "learning_rate": 0.00029985416254052006, "loss": 5.0678, "step": 1310 }, { "epoch": 0.4, "learning_rate": 0.0002998185334812828, "loss": 4.795, "step": 1311 }, { "epoch": 0.4, "learning_rate": 0.00029978290442204557, "loss": 5.0049, "step": 1312 }, { "epoch": 0.41, "learning_rate": 0.00029974727536280835, "loss": 4.8876, "step": 1313 }, { "epoch": 0.41, "learning_rate": 0.0002997116463035711, "loss": 4.782, "step": 1314 }, { "epoch": 0.41, "learning_rate": 0.0002996760172443339, "loss": 4.6578, "step": 1315 }, { "epoch": 0.41, "learning_rate": 0.00029964038818509664, "loss": 4.6495, "step": 1316 }, { "epoch": 0.41, "learning_rate": 0.0002996047591258594, "loss": 4.739, "step": 1317 }, { "epoch": 0.41, "learning_rate": 0.0002995691300666222, "loss": 4.5213, "step": 1318 }, { "epoch": 0.41, "learning_rate": 0.00029953350100738493, "loss": 4.8373, "step": 1319 }, { "epoch": 0.41, "learning_rate": 0.0002994978719481477, "loss": 4.698, "step": 1320 }, { "epoch": 0.41, "learning_rate": 0.0002994622428889105, "loss": 4.5739, "step": 1321 }, { "epoch": 0.41, "learning_rate": 0.0002994266138296733, "loss": 4.734, "step": 1322 }, { "epoch": 0.41, "learning_rate": 0.00029939098477043606, "loss": 4.44, "step": 1323 }, { "epoch": 0.41, "learning_rate": 0.00029935535571119884, "loss": 4.6595, "step": 1324 }, { "epoch": 0.41, "learning_rate": 0.00029931972665196157, "loss": 4.0913, "step": 1325 }, { "epoch": 0.41, "learning_rate": 0.00029928409759272435, "loss": 4.6594, "step": 1326 }, { "epoch": 0.41, "learning_rate": 0.00029924846853348713, "loss": 4.4297, "step": 1327 }, { "epoch": 0.41, "learning_rate": 0.0002992128394742499, "loss": 4.3579, "step": 1328 }, { "epoch": 0.41, "learning_rate": 0.0002991772104150127, "loss": 4.5808, "step": 1329 }, { "epoch": 0.41, "learning_rate": 0.0002991415813557754, "loss": 4.5049, "step": 1330 }, { "epoch": 0.41, "learning_rate": 0.0002991059522965382, "loss": 4.4733, "step": 1331 }, { "epoch": 0.41, "learning_rate": 0.000299070323237301, "loss": 4.2266, "step": 1332 }, { "epoch": 0.41, "learning_rate": 0.0002990346941780637, "loss": 4.3146, "step": 1333 }, { "epoch": 0.41, "learning_rate": 0.00029899906511882655, "loss": 4.5649, "step": 1334 }, { "epoch": 0.41, "learning_rate": 0.0002989634360595893, "loss": 4.3494, "step": 1335 }, { "epoch": 0.41, "learning_rate": 0.00029892780700035206, "loss": 4.1564, "step": 1336 }, { "epoch": 0.41, "learning_rate": 0.00029889217794111484, "loss": 4.3388, "step": 1337 }, { "epoch": 0.41, "learning_rate": 0.00029885654888187757, "loss": 3.8988, "step": 1338 }, { "epoch": 0.41, "learning_rate": 0.0002988209198226404, "loss": 4.0641, "step": 1339 }, { "epoch": 0.41, "learning_rate": 0.00029878529076340313, "loss": 3.8808, "step": 1340 }, { "epoch": 0.41, "learning_rate": 0.0002987496617041659, "loss": 4.0006, "step": 1341 }, { "epoch": 0.41, "learning_rate": 0.0002987140326449287, "loss": 3.9012, "step": 1342 }, { "epoch": 0.41, "learning_rate": 0.0002986784035856915, "loss": 3.9728, "step": 1343 }, { "epoch": 0.41, "learning_rate": 0.00029864277452645426, "loss": 3.7354, "step": 1344 }, { "epoch": 0.42, "learning_rate": 0.000298607145467217, "loss": 3.3499, "step": 1345 }, { "epoch": 0.42, "learning_rate": 0.00029857151640797977, "loss": 3.5908, "step": 1346 }, { "epoch": 0.42, "learning_rate": 0.00029853588734874255, "loss": 3.2084, "step": 1347 }, { "epoch": 0.42, "learning_rate": 0.00029850025828950534, "loss": 3.1736, "step": 1348 }, { "epoch": 0.42, "learning_rate": 0.0002984646292302681, "loss": 3.2985, "step": 1349 }, { "epoch": 0.42, "learning_rate": 0.00029842900017103084, "loss": 2.8575, "step": 1350 }, { "epoch": 0.42, "learning_rate": 0.0002983933711117936, "loss": 6.0847, "step": 1351 }, { "epoch": 0.42, "learning_rate": 0.0002983577420525564, "loss": 5.4779, "step": 1352 }, { "epoch": 0.42, "learning_rate": 0.0002983221129933192, "loss": 5.318, "step": 1353 }, { "epoch": 0.42, "learning_rate": 0.0002982864839340819, "loss": 5.1022, "step": 1354 }, { "epoch": 0.42, "learning_rate": 0.0002982508548748447, "loss": 5.3079, "step": 1355 }, { "epoch": 0.42, "learning_rate": 0.0002982152258156075, "loss": 5.0206, "step": 1356 }, { "epoch": 0.42, "learning_rate": 0.00029817959675637026, "loss": 4.474, "step": 1357 }, { "epoch": 0.42, "learning_rate": 0.00029814396769713305, "loss": 4.9875, "step": 1358 }, { "epoch": 0.42, "learning_rate": 0.0002981083386378958, "loss": 5.0414, "step": 1359 }, { "epoch": 0.42, "learning_rate": 0.00029807270957865856, "loss": 5.2205, "step": 1360 }, { "epoch": 0.42, "learning_rate": 0.00029803708051942134, "loss": 4.7262, "step": 1361 }, { "epoch": 0.42, "learning_rate": 0.0002980014514601841, "loss": 4.8278, "step": 1362 }, { "epoch": 0.42, "learning_rate": 0.0002979658224009469, "loss": 4.7, "step": 1363 }, { "epoch": 0.42, "learning_rate": 0.00029793019334170963, "loss": 4.7975, "step": 1364 }, { "epoch": 0.42, "learning_rate": 0.0002978945642824724, "loss": 4.6585, "step": 1365 }, { "epoch": 0.42, "learning_rate": 0.0002978589352232352, "loss": 4.4247, "step": 1366 }, { "epoch": 0.42, "learning_rate": 0.000297823306163998, "loss": 4.6945, "step": 1367 }, { "epoch": 0.42, "learning_rate": 0.00029778767710476076, "loss": 4.6252, "step": 1368 }, { "epoch": 0.42, "learning_rate": 0.0002977520480455235, "loss": 4.9368, "step": 1369 }, { "epoch": 0.42, "learning_rate": 0.00029771641898628627, "loss": 4.8987, "step": 1370 }, { "epoch": 0.42, "learning_rate": 0.00029768078992704905, "loss": 4.578, "step": 1371 }, { "epoch": 0.42, "learning_rate": 0.00029764516086781183, "loss": 4.5628, "step": 1372 }, { "epoch": 0.42, "learning_rate": 0.0002976095318085746, "loss": 4.6645, "step": 1373 }, { "epoch": 0.42, "learning_rate": 0.00029757390274933734, "loss": 4.4381, "step": 1374 }, { "epoch": 0.42, "learning_rate": 0.0002975382736901002, "loss": 4.2857, "step": 1375 }, { "epoch": 0.42, "learning_rate": 0.0002975026446308629, "loss": 4.3584, "step": 1376 }, { "epoch": 0.42, "learning_rate": 0.0002974670155716257, "loss": 4.4446, "step": 1377 }, { "epoch": 0.43, "learning_rate": 0.00029743138651238847, "loss": 4.4407, "step": 1378 }, { "epoch": 0.43, "learning_rate": 0.0002973957574531512, "loss": 4.2401, "step": 1379 }, { "epoch": 0.43, "learning_rate": 0.000297360128393914, "loss": 4.2348, "step": 1380 }, { "epoch": 0.43, "learning_rate": 0.00029732449933467676, "loss": 4.1892, "step": 1381 }, { "epoch": 0.43, "learning_rate": 0.00029728887027543954, "loss": 4.4059, "step": 1382 }, { "epoch": 0.43, "learning_rate": 0.0002972532412162023, "loss": 4.0896, "step": 1383 }, { "epoch": 0.43, "learning_rate": 0.00029721761215696505, "loss": 4.3106, "step": 1384 }, { "epoch": 0.43, "learning_rate": 0.00029718198309772783, "loss": 4.1299, "step": 1385 }, { "epoch": 0.43, "learning_rate": 0.0002971463540384906, "loss": 4.2921, "step": 1386 }, { "epoch": 0.43, "learning_rate": 0.0002971107249792534, "loss": 4.0942, "step": 1387 }, { "epoch": 0.43, "learning_rate": 0.0002970750959200161, "loss": 4.0125, "step": 1388 }, { "epoch": 0.43, "learning_rate": 0.0002970394668607789, "loss": 4.2825, "step": 1389 }, { "epoch": 0.43, "learning_rate": 0.0002970038378015417, "loss": 4.2438, "step": 1390 }, { "epoch": 0.43, "learning_rate": 0.00029696820874230447, "loss": 3.9249, "step": 1391 }, { "epoch": 0.43, "learning_rate": 0.00029693257968306725, "loss": 3.763, "step": 1392 }, { "epoch": 0.43, "learning_rate": 0.00029689695062383, "loss": 3.7987, "step": 1393 }, { "epoch": 0.43, "learning_rate": 0.0002968613215645928, "loss": 3.891, "step": 1394 }, { "epoch": 0.43, "learning_rate": 0.00029682569250535554, "loss": 3.824, "step": 1395 }, { "epoch": 0.43, "learning_rate": 0.0002967900634461183, "loss": 3.4456, "step": 1396 }, { "epoch": 0.43, "learning_rate": 0.0002967544343868811, "loss": 3.4597, "step": 1397 }, { "epoch": 0.43, "learning_rate": 0.00029671880532764383, "loss": 3.0946, "step": 1398 }, { "epoch": 0.43, "learning_rate": 0.00029668317626840667, "loss": 3.0734, "step": 1399 }, { "epoch": 0.43, "learning_rate": 0.0002966475472091694, "loss": 3.1359, "step": 1400 }, { "epoch": 0.43, "learning_rate": 0.0002966119181499322, "loss": 6.2601, "step": 1401 }, { "epoch": 0.43, "learning_rate": 0.00029657628909069496, "loss": 5.6439, "step": 1402 }, { "epoch": 0.43, "learning_rate": 0.0002965406600314577, "loss": 5.5004, "step": 1403 }, { "epoch": 0.43, "learning_rate": 0.0002965050309722205, "loss": 5.0289, "step": 1404 }, { "epoch": 0.43, "learning_rate": 0.00029646940191298325, "loss": 5.0701, "step": 1405 }, { "epoch": 0.43, "learning_rate": 0.00029643377285374603, "loss": 4.6878, "step": 1406 }, { "epoch": 0.43, "learning_rate": 0.0002963981437945088, "loss": 5.1698, "step": 1407 }, { "epoch": 0.43, "learning_rate": 0.00029636251473527154, "loss": 4.5904, "step": 1408 }, { "epoch": 0.43, "learning_rate": 0.0002963268856760344, "loss": 4.8074, "step": 1409 }, { "epoch": 0.44, "learning_rate": 0.0002962912566167971, "loss": 4.8577, "step": 1410 }, { "epoch": 0.44, "learning_rate": 0.0002962556275575599, "loss": 4.9662, "step": 1411 }, { "epoch": 0.44, "learning_rate": 0.00029621999849832267, "loss": 4.6517, "step": 1412 }, { "epoch": 0.44, "learning_rate": 0.0002961843694390854, "loss": 4.643, "step": 1413 }, { "epoch": 0.44, "learning_rate": 0.0002961487403798482, "loss": 4.8603, "step": 1414 }, { "epoch": 0.44, "learning_rate": 0.00029611311132061096, "loss": 4.6163, "step": 1415 }, { "epoch": 0.44, "learning_rate": 0.00029607748226137374, "loss": 4.5498, "step": 1416 }, { "epoch": 0.44, "learning_rate": 0.0002960418532021365, "loss": 4.5429, "step": 1417 }, { "epoch": 0.44, "learning_rate": 0.0002960062241428993, "loss": 4.6, "step": 1418 }, { "epoch": 0.44, "learning_rate": 0.00029597059508366204, "loss": 4.4403, "step": 1419 }, { "epoch": 0.44, "learning_rate": 0.0002959349660244248, "loss": 4.649, "step": 1420 }, { "epoch": 0.44, "learning_rate": 0.0002958993369651876, "loss": 4.5089, "step": 1421 }, { "epoch": 0.44, "learning_rate": 0.0002958637079059504, "loss": 4.7455, "step": 1422 }, { "epoch": 0.44, "learning_rate": 0.00029582807884671316, "loss": 4.6444, "step": 1423 }, { "epoch": 0.44, "learning_rate": 0.0002957924497874759, "loss": 4.5978, "step": 1424 }, { "epoch": 0.44, "learning_rate": 0.00029575682072823867, "loss": 4.6573, "step": 1425 }, { "epoch": 0.44, "learning_rate": 0.00029572119166900145, "loss": 4.6228, "step": 1426 }, { "epoch": 0.44, "learning_rate": 0.0002956855626097642, "loss": 4.5709, "step": 1427 }, { "epoch": 0.44, "learning_rate": 0.000295649933550527, "loss": 4.3087, "step": 1428 }, { "epoch": 0.44, "learning_rate": 0.00029561430449128975, "loss": 4.3838, "step": 1429 }, { "epoch": 0.44, "learning_rate": 0.00029557867543205253, "loss": 4.5056, "step": 1430 }, { "epoch": 0.44, "learning_rate": 0.0002955430463728153, "loss": 4.6182, "step": 1431 }, { "epoch": 0.44, "learning_rate": 0.00029550741731357804, "loss": 4.1732, "step": 1432 }, { "epoch": 0.44, "learning_rate": 0.0002954717882543409, "loss": 4.3924, "step": 1433 }, { "epoch": 0.44, "learning_rate": 0.0002954361591951036, "loss": 4.4977, "step": 1434 }, { "epoch": 0.44, "learning_rate": 0.0002954005301358664, "loss": 4.1768, "step": 1435 }, { "epoch": 0.44, "learning_rate": 0.00029536490107662916, "loss": 4.2573, "step": 1436 }, { "epoch": 0.44, "learning_rate": 0.00029532927201739195, "loss": 4.229, "step": 1437 }, { "epoch": 0.44, "learning_rate": 0.00029529364295815473, "loss": 4.0335, "step": 1438 }, { "epoch": 0.44, "learning_rate": 0.00029525801389891746, "loss": 3.9274, "step": 1439 }, { "epoch": 0.44, "learning_rate": 0.00029522238483968024, "loss": 4.2552, "step": 1440 }, { "epoch": 0.44, "learning_rate": 0.000295186755780443, "loss": 3.9059, "step": 1441 }, { "epoch": 0.45, "learning_rate": 0.0002951511267212058, "loss": 3.6717, "step": 1442 }, { "epoch": 0.45, "learning_rate": 0.0002951154976619686, "loss": 3.9661, "step": 1443 }, { "epoch": 0.45, "learning_rate": 0.0002950798686027313, "loss": 3.8879, "step": 1444 }, { "epoch": 0.45, "learning_rate": 0.0002950442395434941, "loss": 3.7356, "step": 1445 }, { "epoch": 0.45, "learning_rate": 0.0002950086104842569, "loss": 3.5681, "step": 1446 }, { "epoch": 0.45, "learning_rate": 0.00029497298142501966, "loss": 3.2617, "step": 1447 }, { "epoch": 0.45, "learning_rate": 0.0002949373523657824, "loss": 3.0631, "step": 1448 }, { "epoch": 0.45, "learning_rate": 0.00029490172330654517, "loss": 3.2543, "step": 1449 }, { "epoch": 0.45, "learning_rate": 0.00029486609424730795, "loss": 3.0812, "step": 1450 }, { "epoch": 0.45, "learning_rate": 0.00029483046518807073, "loss": 6.3088, "step": 1451 }, { "epoch": 0.45, "learning_rate": 0.0002947948361288335, "loss": 5.5911, "step": 1452 }, { "epoch": 0.45, "learning_rate": 0.00029475920706959624, "loss": 5.2747, "step": 1453 }, { "epoch": 0.45, "learning_rate": 0.000294723578010359, "loss": 5.2358, "step": 1454 }, { "epoch": 0.45, "learning_rate": 0.0002946879489511218, "loss": 4.873, "step": 1455 }, { "epoch": 0.45, "learning_rate": 0.0002946523198918846, "loss": 5.103, "step": 1456 }, { "epoch": 0.45, "learning_rate": 0.00029461669083264737, "loss": 5.0973, "step": 1457 }, { "epoch": 0.45, "learning_rate": 0.0002945810617734101, "loss": 4.6137, "step": 1458 }, { "epoch": 0.45, "learning_rate": 0.0002945454327141729, "loss": 4.5452, "step": 1459 }, { "epoch": 0.45, "learning_rate": 0.00029450980365493566, "loss": 4.8113, "step": 1460 }, { "epoch": 0.45, "learning_rate": 0.00029447417459569844, "loss": 4.831, "step": 1461 }, { "epoch": 0.45, "learning_rate": 0.0002944385455364612, "loss": 4.6025, "step": 1462 }, { "epoch": 0.45, "learning_rate": 0.00029440291647722395, "loss": 4.7336, "step": 1463 }, { "epoch": 0.45, "learning_rate": 0.0002943672874179868, "loss": 4.6847, "step": 1464 }, { "epoch": 0.45, "learning_rate": 0.0002943316583587495, "loss": 4.7134, "step": 1465 }, { "epoch": 0.45, "learning_rate": 0.0002942960292995123, "loss": 4.4742, "step": 1466 }, { "epoch": 0.45, "learning_rate": 0.0002942604002402751, "loss": 4.9778, "step": 1467 }, { "epoch": 0.45, "learning_rate": 0.0002942247711810378, "loss": 4.5412, "step": 1468 }, { "epoch": 0.45, "learning_rate": 0.0002941891421218006, "loss": 4.6059, "step": 1469 }, { "epoch": 0.45, "learning_rate": 0.00029415351306256337, "loss": 4.9422, "step": 1470 }, { "epoch": 0.45, "learning_rate": 0.00029411788400332615, "loss": 4.4398, "step": 1471 }, { "epoch": 0.45, "learning_rate": 0.00029408225494408893, "loss": 4.5057, "step": 1472 }, { "epoch": 0.45, "learning_rate": 0.00029404662588485166, "loss": 4.3073, "step": 1473 }, { "epoch": 0.45, "learning_rate": 0.00029401099682561444, "loss": 4.549, "step": 1474 }, { "epoch": 0.46, "learning_rate": 0.0002939753677663772, "loss": 4.6019, "step": 1475 }, { "epoch": 0.46, "learning_rate": 0.00029393973870714, "loss": 4.6149, "step": 1476 }, { "epoch": 0.46, "learning_rate": 0.0002939041096479028, "loss": 4.4029, "step": 1477 }, { "epoch": 0.46, "learning_rate": 0.0002938684805886655, "loss": 4.283, "step": 1478 }, { "epoch": 0.46, "learning_rate": 0.0002938328515294283, "loss": 4.2084, "step": 1479 }, { "epoch": 0.46, "learning_rate": 0.0002937972224701911, "loss": 4.1736, "step": 1480 }, { "epoch": 0.46, "learning_rate": 0.00029376159341095386, "loss": 4.3942, "step": 1481 }, { "epoch": 0.46, "learning_rate": 0.0002937259643517166, "loss": 4.6199, "step": 1482 }, { "epoch": 0.46, "learning_rate": 0.00029369033529247937, "loss": 4.1039, "step": 1483 }, { "epoch": 0.46, "learning_rate": 0.00029365470623324215, "loss": 4.3398, "step": 1484 }, { "epoch": 0.46, "learning_rate": 0.00029361907717400493, "loss": 4.4076, "step": 1485 }, { "epoch": 0.46, "learning_rate": 0.0002935834481147677, "loss": 4.0263, "step": 1486 }, { "epoch": 0.46, "learning_rate": 0.00029354781905553044, "loss": 4.2947, "step": 1487 }, { "epoch": 0.46, "learning_rate": 0.0002935121899962933, "loss": 3.9938, "step": 1488 }, { "epoch": 0.46, "learning_rate": 0.000293476560937056, "loss": 4.0327, "step": 1489 }, { "epoch": 0.46, "learning_rate": 0.0002934409318778188, "loss": 3.8864, "step": 1490 }, { "epoch": 0.46, "learning_rate": 0.00029340530281858157, "loss": 3.6926, "step": 1491 }, { "epoch": 0.46, "learning_rate": 0.0002933696737593443, "loss": 4.0948, "step": 1492 }, { "epoch": 0.46, "learning_rate": 0.00029333404470010713, "loss": 3.7913, "step": 1493 }, { "epoch": 0.46, "learning_rate": 0.00029329841564086986, "loss": 3.4333, "step": 1494 }, { "epoch": 0.46, "learning_rate": 0.00029326278658163264, "loss": 3.5553, "step": 1495 }, { "epoch": 0.46, "learning_rate": 0.0002932271575223954, "loss": 3.4108, "step": 1496 }, { "epoch": 0.46, "learning_rate": 0.00029319152846315815, "loss": 3.3711, "step": 1497 }, { "epoch": 0.46, "learning_rate": 0.000293155899403921, "loss": 3.1167, "step": 1498 }, { "epoch": 0.46, "learning_rate": 0.0002931202703446837, "loss": 3.1449, "step": 1499 }, { "epoch": 0.46, "learning_rate": 0.0002930846412854465, "loss": 2.9879, "step": 1500 }, { "epoch": 0.46, "eval_bleu": 0.0, "eval_loss": 5.218113899230957, "eval_runtime": 1382.8233, "eval_samples_per_second": 10.674, "eval_steps_per_second": 1.334, "step": 1500 }, { "epoch": 0.46, "learning_rate": 0.0002930490122262093, "loss": 5.9271, "step": 1501 }, { "epoch": 0.46, "learning_rate": 0.000293013383166972, "loss": 5.8332, "step": 1502 }, { "epoch": 0.46, "learning_rate": 0.0002929777541077348, "loss": 5.0913, "step": 1503 }, { "epoch": 0.46, "learning_rate": 0.0002929421250484976, "loss": 5.1893, "step": 1504 }, { "epoch": 0.46, "learning_rate": 0.00029290649598926035, "loss": 4.824, "step": 1505 }, { "epoch": 0.46, "learning_rate": 0.00029287086693002314, "loss": 5.0932, "step": 1506 }, { "epoch": 0.47, "learning_rate": 0.0002928352378707859, "loss": 4.6225, "step": 1507 }, { "epoch": 0.47, "learning_rate": 0.00029279960881154865, "loss": 5.0119, "step": 1508 }, { "epoch": 0.47, "learning_rate": 0.00029276397975231143, "loss": 4.5293, "step": 1509 }, { "epoch": 0.47, "learning_rate": 0.0002927283506930742, "loss": 4.4856, "step": 1510 }, { "epoch": 0.47, "learning_rate": 0.000292692721633837, "loss": 4.6485, "step": 1511 }, { "epoch": 0.47, "learning_rate": 0.0002926570925745998, "loss": 4.7876, "step": 1512 }, { "epoch": 0.47, "learning_rate": 0.0002926214635153625, "loss": 4.6034, "step": 1513 }, { "epoch": 0.47, "learning_rate": 0.0002925858344561253, "loss": 4.7034, "step": 1514 }, { "epoch": 0.47, "learning_rate": 0.00029255020539688807, "loss": 4.8002, "step": 1515 }, { "epoch": 0.47, "learning_rate": 0.0002925145763376508, "loss": 4.6793, "step": 1516 }, { "epoch": 0.47, "learning_rate": 0.00029247894727841363, "loss": 4.6064, "step": 1517 }, { "epoch": 0.47, "learning_rate": 0.00029244331821917636, "loss": 4.6033, "step": 1518 }, { "epoch": 0.47, "learning_rate": 0.00029240768915993914, "loss": 4.5903, "step": 1519 }, { "epoch": 0.47, "learning_rate": 0.0002923720601007019, "loss": 4.5567, "step": 1520 }, { "epoch": 0.47, "learning_rate": 0.00029233643104146465, "loss": 3.9217, "step": 1521 }, { "epoch": 0.47, "learning_rate": 0.0002923008019822275, "loss": 4.3524, "step": 1522 }, { "epoch": 0.47, "learning_rate": 0.0002922651729229902, "loss": 4.4129, "step": 1523 }, { "epoch": 0.47, "learning_rate": 0.000292229543863753, "loss": 4.4007, "step": 1524 }, { "epoch": 0.47, "learning_rate": 0.0002921939148045158, "loss": 4.3499, "step": 1525 }, { "epoch": 0.47, "learning_rate": 0.0002921582857452785, "loss": 4.3925, "step": 1526 }, { "epoch": 0.47, "learning_rate": 0.00029212265668604134, "loss": 4.5935, "step": 1527 }, { "epoch": 0.47, "learning_rate": 0.00029208702762680407, "loss": 4.3094, "step": 1528 }, { "epoch": 0.47, "learning_rate": 0.00029205139856756685, "loss": 4.3955, "step": 1529 }, { "epoch": 0.47, "learning_rate": 0.00029201576950832963, "loss": 4.1156, "step": 1530 }, { "epoch": 0.47, "learning_rate": 0.0002919801404490924, "loss": 4.2628, "step": 1531 }, { "epoch": 0.47, "learning_rate": 0.0002919445113898552, "loss": 4.333, "step": 1532 }, { "epoch": 0.47, "learning_rate": 0.0002919088823306179, "loss": 4.1443, "step": 1533 }, { "epoch": 0.47, "learning_rate": 0.0002918732532713807, "loss": 4.47, "step": 1534 }, { "epoch": 0.47, "learning_rate": 0.0002918376242121435, "loss": 4.2293, "step": 1535 }, { "epoch": 0.47, "learning_rate": 0.00029180199515290627, "loss": 3.9859, "step": 1536 }, { "epoch": 0.47, "learning_rate": 0.00029176636609366905, "loss": 4.0588, "step": 1537 }, { "epoch": 0.47, "learning_rate": 0.0002917307370344318, "loss": 3.9031, "step": 1538 }, { "epoch": 0.47, "learning_rate": 0.00029169510797519456, "loss": 4.1155, "step": 1539 }, { "epoch": 0.48, "learning_rate": 0.00029165947891595734, "loss": 3.6605, "step": 1540 }, { "epoch": 0.48, "learning_rate": 0.0002916238498567201, "loss": 3.9866, "step": 1541 }, { "epoch": 0.48, "learning_rate": 0.00029158822079748285, "loss": 4.113, "step": 1542 }, { "epoch": 0.48, "learning_rate": 0.00029155259173824563, "loss": 3.7653, "step": 1543 }, { "epoch": 0.48, "learning_rate": 0.0002915169626790084, "loss": 3.6642, "step": 1544 }, { "epoch": 0.48, "learning_rate": 0.0002914813336197712, "loss": 3.566, "step": 1545 }, { "epoch": 0.48, "learning_rate": 0.000291445704560534, "loss": 3.7636, "step": 1546 }, { "epoch": 0.48, "learning_rate": 0.0002914100755012967, "loss": 3.2914, "step": 1547 }, { "epoch": 0.48, "learning_rate": 0.0002913744464420595, "loss": 3.3236, "step": 1548 }, { "epoch": 0.48, "learning_rate": 0.00029133881738282227, "loss": 3.2093, "step": 1549 }, { "epoch": 0.48, "learning_rate": 0.000291303188323585, "loss": 2.8138, "step": 1550 }, { "epoch": 0.48, "learning_rate": 0.00029126755926434783, "loss": 6.0967, "step": 1551 }, { "epoch": 0.48, "learning_rate": 0.00029123193020511056, "loss": 5.6046, "step": 1552 }, { "epoch": 0.48, "learning_rate": 0.00029119630114587334, "loss": 5.2391, "step": 1553 }, { "epoch": 0.48, "learning_rate": 0.0002911606720866361, "loss": 5.0387, "step": 1554 }, { "epoch": 0.48, "learning_rate": 0.0002911250430273989, "loss": 4.8765, "step": 1555 }, { "epoch": 0.48, "learning_rate": 0.0002910894139681617, "loss": 5.0187, "step": 1556 }, { "epoch": 0.48, "learning_rate": 0.0002910537849089244, "loss": 4.8688, "step": 1557 }, { "epoch": 0.48, "learning_rate": 0.00029101815584968725, "loss": 4.7368, "step": 1558 }, { "epoch": 0.48, "learning_rate": 0.00029098252679045, "loss": 4.6912, "step": 1559 }, { "epoch": 0.48, "learning_rate": 0.00029094689773121276, "loss": 4.6317, "step": 1560 }, { "epoch": 0.48, "learning_rate": 0.00029091126867197554, "loss": 4.5266, "step": 1561 }, { "epoch": 0.48, "learning_rate": 0.00029087563961273827, "loss": 4.8036, "step": 1562 }, { "epoch": 0.48, "learning_rate": 0.00029084001055350105, "loss": 4.3639, "step": 1563 }, { "epoch": 0.48, "learning_rate": 0.00029080438149426383, "loss": 4.8885, "step": 1564 }, { "epoch": 0.48, "learning_rate": 0.0002907687524350266, "loss": 4.8304, "step": 1565 }, { "epoch": 0.48, "learning_rate": 0.0002907331233757894, "loss": 4.4184, "step": 1566 }, { "epoch": 0.48, "learning_rate": 0.0002906974943165521, "loss": 4.7267, "step": 1567 }, { "epoch": 0.48, "learning_rate": 0.0002906618652573149, "loss": 4.4361, "step": 1568 }, { "epoch": 0.48, "learning_rate": 0.0002906262361980777, "loss": 4.85, "step": 1569 }, { "epoch": 0.48, "learning_rate": 0.00029059060713884047, "loss": 4.6357, "step": 1570 }, { "epoch": 0.48, "learning_rate": 0.00029055497807960325, "loss": 4.4048, "step": 1571 }, { "epoch": 0.49, "learning_rate": 0.000290519349020366, "loss": 4.3945, "step": 1572 }, { "epoch": 0.49, "learning_rate": 0.00029048371996112876, "loss": 4.4739, "step": 1573 }, { "epoch": 0.49, "learning_rate": 0.00029044809090189155, "loss": 4.4761, "step": 1574 }, { "epoch": 0.49, "learning_rate": 0.00029041246184265433, "loss": 4.2886, "step": 1575 }, { "epoch": 0.49, "learning_rate": 0.00029037683278341705, "loss": 4.2753, "step": 1576 }, { "epoch": 0.49, "learning_rate": 0.00029034120372417984, "loss": 4.3819, "step": 1577 }, { "epoch": 0.49, "learning_rate": 0.0002903055746649426, "loss": 4.2209, "step": 1578 }, { "epoch": 0.49, "learning_rate": 0.0002902699456057054, "loss": 4.4748, "step": 1579 }, { "epoch": 0.49, "learning_rate": 0.0002902343165464682, "loss": 4.6274, "step": 1580 }, { "epoch": 0.49, "learning_rate": 0.0002901986874872309, "loss": 4.6299, "step": 1581 }, { "epoch": 0.49, "learning_rate": 0.00029016305842799375, "loss": 4.2846, "step": 1582 }, { "epoch": 0.49, "learning_rate": 0.0002901274293687565, "loss": 4.3461, "step": 1583 }, { "epoch": 0.49, "learning_rate": 0.00029009180030951926, "loss": 4.05, "step": 1584 }, { "epoch": 0.49, "learning_rate": 0.00029005617125028204, "loss": 4.0239, "step": 1585 }, { "epoch": 0.49, "learning_rate": 0.00029002054219104477, "loss": 4.0345, "step": 1586 }, { "epoch": 0.49, "learning_rate": 0.0002899849131318076, "loss": 3.9146, "step": 1587 }, { "epoch": 0.49, "learning_rate": 0.00028994928407257033, "loss": 4.1882, "step": 1588 }, { "epoch": 0.49, "learning_rate": 0.0002899136550133331, "loss": 3.9867, "step": 1589 }, { "epoch": 0.49, "learning_rate": 0.0002898780259540959, "loss": 4.0643, "step": 1590 }, { "epoch": 0.49, "learning_rate": 0.0002898423968948586, "loss": 3.8283, "step": 1591 }, { "epoch": 0.49, "learning_rate": 0.00028980676783562146, "loss": 3.8347, "step": 1592 }, { "epoch": 0.49, "learning_rate": 0.0002897711387763842, "loss": 3.8835, "step": 1593 }, { "epoch": 0.49, "learning_rate": 0.00028973550971714697, "loss": 3.613, "step": 1594 }, { "epoch": 0.49, "learning_rate": 0.00028969988065790975, "loss": 3.7456, "step": 1595 }, { "epoch": 0.49, "learning_rate": 0.0002896642515986725, "loss": 3.4203, "step": 1596 }, { "epoch": 0.49, "learning_rate": 0.00028962862253943526, "loss": 3.4784, "step": 1597 }, { "epoch": 0.49, "learning_rate": 0.00028959299348019804, "loss": 3.2668, "step": 1598 }, { "epoch": 0.49, "learning_rate": 0.0002895573644209608, "loss": 3.1252, "step": 1599 }, { "epoch": 0.49, "learning_rate": 0.0002895217353617236, "loss": 2.9936, "step": 1600 }, { "epoch": 0.49, "learning_rate": 0.0002894861063024864, "loss": 6.226, "step": 1601 }, { "epoch": 0.49, "learning_rate": 0.0002894504772432491, "loss": 5.4487, "step": 1602 }, { "epoch": 0.49, "learning_rate": 0.0002894148481840119, "loss": 5.1382, "step": 1603 }, { "epoch": 0.5, "learning_rate": 0.0002893792191247747, "loss": 5.2172, "step": 1604 }, { "epoch": 0.5, "learning_rate": 0.00028934359006553746, "loss": 5.1825, "step": 1605 }, { "epoch": 0.5, "learning_rate": 0.00028930796100630024, "loss": 4.8012, "step": 1606 }, { "epoch": 0.5, "learning_rate": 0.00028927233194706297, "loss": 4.7798, "step": 1607 }, { "epoch": 0.5, "learning_rate": 0.00028923670288782575, "loss": 4.6503, "step": 1608 }, { "epoch": 0.5, "learning_rate": 0.00028920107382858853, "loss": 4.6209, "step": 1609 }, { "epoch": 0.5, "learning_rate": 0.00028916544476935126, "loss": 4.8553, "step": 1610 }, { "epoch": 0.5, "learning_rate": 0.0002891298157101141, "loss": 4.4706, "step": 1611 }, { "epoch": 0.5, "learning_rate": 0.0002890941866508768, "loss": 4.6565, "step": 1612 }, { "epoch": 0.5, "learning_rate": 0.0002890585575916396, "loss": 4.6162, "step": 1613 }, { "epoch": 0.5, "learning_rate": 0.0002890229285324024, "loss": 4.7839, "step": 1614 }, { "epoch": 0.5, "learning_rate": 0.0002889872994731651, "loss": 4.62, "step": 1615 }, { "epoch": 0.5, "learning_rate": 0.00028895167041392795, "loss": 4.6899, "step": 1616 }, { "epoch": 0.5, "learning_rate": 0.0002889160413546907, "loss": 4.6458, "step": 1617 }, { "epoch": 0.5, "learning_rate": 0.00028888041229545346, "loss": 4.2545, "step": 1618 }, { "epoch": 0.5, "learning_rate": 0.00028884478323621624, "loss": 4.5741, "step": 1619 }, { "epoch": 0.5, "learning_rate": 0.00028880915417697897, "loss": 4.7165, "step": 1620 }, { "epoch": 0.5, "learning_rate": 0.0002887735251177418, "loss": 4.5225, "step": 1621 }, { "epoch": 0.5, "learning_rate": 0.00028873789605850453, "loss": 4.6099, "step": 1622 }, { "epoch": 0.5, "learning_rate": 0.0002887022669992673, "loss": 4.3643, "step": 1623 }, { "epoch": 0.5, "learning_rate": 0.0002886666379400301, "loss": 4.7348, "step": 1624 }, { "epoch": 0.5, "learning_rate": 0.0002886310088807929, "loss": 4.4335, "step": 1625 }, { "epoch": 0.5, "learning_rate": 0.00028859537982155566, "loss": 4.3101, "step": 1626 }, { "epoch": 0.5, "learning_rate": 0.0002885597507623184, "loss": 4.5908, "step": 1627 }, { "epoch": 0.5, "learning_rate": 0.00028852412170308117, "loss": 4.6177, "step": 1628 }, { "epoch": 0.5, "learning_rate": 0.00028848849264384395, "loss": 4.4698, "step": 1629 }, { "epoch": 0.5, "learning_rate": 0.00028845286358460673, "loss": 4.4572, "step": 1630 }, { "epoch": 0.5, "learning_rate": 0.00028841723452536946, "loss": 4.3128, "step": 1631 }, { "epoch": 0.5, "learning_rate": 0.00028838160546613224, "loss": 4.3521, "step": 1632 }, { "epoch": 0.5, "learning_rate": 0.000288345976406895, "loss": 4.317, "step": 1633 }, { "epoch": 0.5, "learning_rate": 0.0002883103473476578, "loss": 4.3642, "step": 1634 }, { "epoch": 0.5, "learning_rate": 0.0002882747182884206, "loss": 4.1254, "step": 1635 }, { "epoch": 0.5, "learning_rate": 0.0002882390892291833, "loss": 4.0972, "step": 1636 }, { "epoch": 0.51, "learning_rate": 0.0002882034601699461, "loss": 4.0258, "step": 1637 }, { "epoch": 0.51, "learning_rate": 0.0002881678311107089, "loss": 3.9691, "step": 1638 }, { "epoch": 0.51, "learning_rate": 0.00028813220205147166, "loss": 4.1063, "step": 1639 }, { "epoch": 0.51, "learning_rate": 0.00028809657299223444, "loss": 3.9501, "step": 1640 }, { "epoch": 0.51, "learning_rate": 0.00028806094393299717, "loss": 4.1064, "step": 1641 }, { "epoch": 0.51, "learning_rate": 0.00028802531487375995, "loss": 3.8949, "step": 1642 }, { "epoch": 0.51, "learning_rate": 0.00028798968581452274, "loss": 3.9081, "step": 1643 }, { "epoch": 0.51, "learning_rate": 0.0002879540567552855, "loss": 3.7898, "step": 1644 }, { "epoch": 0.51, "learning_rate": 0.0002879184276960483, "loss": 3.4469, "step": 1645 }, { "epoch": 0.51, "learning_rate": 0.00028788279863681103, "loss": 3.3873, "step": 1646 }, { "epoch": 0.51, "learning_rate": 0.0002878471695775738, "loss": 3.2614, "step": 1647 }, { "epoch": 0.51, "learning_rate": 0.0002878115405183366, "loss": 3.0555, "step": 1648 }, { "epoch": 0.51, "learning_rate": 0.00028777591145909937, "loss": 2.9158, "step": 1649 }, { "epoch": 0.51, "learning_rate": 0.00028774028239986215, "loss": 2.9319, "step": 1650 }, { "epoch": 0.51, "learning_rate": 0.0002877046533406249, "loss": 5.8169, "step": 1651 }, { "epoch": 0.51, "learning_rate": 0.0002876690242813877, "loss": 5.9246, "step": 1652 }, { "epoch": 0.51, "learning_rate": 0.00028763339522215045, "loss": 5.3571, "step": 1653 }, { "epoch": 0.51, "learning_rate": 0.00028759776616291323, "loss": 5.0096, "step": 1654 }, { "epoch": 0.51, "learning_rate": 0.000287562137103676, "loss": 5.0218, "step": 1655 }, { "epoch": 0.51, "learning_rate": 0.00028752650804443874, "loss": 4.7095, "step": 1656 }, { "epoch": 0.51, "learning_rate": 0.0002874908789852015, "loss": 4.9994, "step": 1657 }, { "epoch": 0.51, "learning_rate": 0.0002874552499259643, "loss": 4.7598, "step": 1658 }, { "epoch": 0.51, "learning_rate": 0.0002874196208667271, "loss": 4.6655, "step": 1659 }, { "epoch": 0.51, "learning_rate": 0.00028738399180748987, "loss": 4.5321, "step": 1660 }, { "epoch": 0.51, "learning_rate": 0.0002873483627482526, "loss": 4.6492, "step": 1661 }, { "epoch": 0.51, "learning_rate": 0.0002873127336890154, "loss": 4.8681, "step": 1662 }, { "epoch": 0.51, "learning_rate": 0.00028727710462977816, "loss": 4.5626, "step": 1663 }, { "epoch": 0.51, "learning_rate": 0.00028724147557054094, "loss": 4.5711, "step": 1664 }, { "epoch": 0.51, "learning_rate": 0.00028720584651130367, "loss": 4.7182, "step": 1665 }, { "epoch": 0.51, "learning_rate": 0.00028717021745206645, "loss": 4.6934, "step": 1666 }, { "epoch": 0.51, "learning_rate": 0.00028713458839282923, "loss": 4.875, "step": 1667 }, { "epoch": 0.51, "learning_rate": 0.000287098959333592, "loss": 4.2952, "step": 1668 }, { "epoch": 0.52, "learning_rate": 0.0002870633302743548, "loss": 4.6768, "step": 1669 }, { "epoch": 0.52, "learning_rate": 0.0002870277012151175, "loss": 4.4892, "step": 1670 }, { "epoch": 0.52, "learning_rate": 0.0002869920721558803, "loss": 4.7037, "step": 1671 }, { "epoch": 0.52, "learning_rate": 0.0002869564430966431, "loss": 4.2983, "step": 1672 }, { "epoch": 0.52, "learning_rate": 0.00028692081403740587, "loss": 4.5209, "step": 1673 }, { "epoch": 0.52, "learning_rate": 0.00028688518497816865, "loss": 4.6206, "step": 1674 }, { "epoch": 0.52, "learning_rate": 0.0002868495559189314, "loss": 4.2462, "step": 1675 }, { "epoch": 0.52, "learning_rate": 0.0002868139268596942, "loss": 4.2332, "step": 1676 }, { "epoch": 0.52, "learning_rate": 0.00028677829780045694, "loss": 4.5599, "step": 1677 }, { "epoch": 0.52, "learning_rate": 0.0002867426687412197, "loss": 4.6862, "step": 1678 }, { "epoch": 0.52, "learning_rate": 0.0002867070396819825, "loss": 4.1977, "step": 1679 }, { "epoch": 0.52, "learning_rate": 0.00028667141062274523, "loss": 4.4284, "step": 1680 }, { "epoch": 0.52, "learning_rate": 0.00028663578156350807, "loss": 4.7382, "step": 1681 }, { "epoch": 0.52, "learning_rate": 0.0002866001525042708, "loss": 4.3225, "step": 1682 }, { "epoch": 0.52, "learning_rate": 0.0002865645234450336, "loss": 4.1831, "step": 1683 }, { "epoch": 0.52, "learning_rate": 0.00028652889438579636, "loss": 4.2036, "step": 1684 }, { "epoch": 0.52, "learning_rate": 0.0002864932653265591, "loss": 4.1341, "step": 1685 }, { "epoch": 0.52, "learning_rate": 0.0002864576362673219, "loss": 4.1981, "step": 1686 }, { "epoch": 0.52, "learning_rate": 0.00028642200720808465, "loss": 4.2529, "step": 1687 }, { "epoch": 0.52, "learning_rate": 0.00028638637814884743, "loss": 3.9305, "step": 1688 }, { "epoch": 0.52, "learning_rate": 0.0002863507490896102, "loss": 3.8363, "step": 1689 }, { "epoch": 0.52, "learning_rate": 0.00028631512003037294, "loss": 3.8006, "step": 1690 }, { "epoch": 0.52, "learning_rate": 0.0002862794909711357, "loss": 3.9113, "step": 1691 }, { "epoch": 0.52, "learning_rate": 0.0002862438619118985, "loss": 3.8559, "step": 1692 }, { "epoch": 0.52, "learning_rate": 0.0002862082328526613, "loss": 3.8625, "step": 1693 }, { "epoch": 0.52, "learning_rate": 0.00028617260379342407, "loss": 3.671, "step": 1694 }, { "epoch": 0.52, "learning_rate": 0.00028613697473418685, "loss": 3.5207, "step": 1695 }, { "epoch": 0.52, "learning_rate": 0.0002861013456749496, "loss": 3.4143, "step": 1696 }, { "epoch": 0.52, "learning_rate": 0.00028606571661571236, "loss": 3.3294, "step": 1697 }, { "epoch": 0.52, "learning_rate": 0.00028603008755647514, "loss": 3.2698, "step": 1698 }, { "epoch": 0.52, "learning_rate": 0.00028599445849723787, "loss": 2.8946, "step": 1699 }, { "epoch": 0.52, "learning_rate": 0.0002859588294380007, "loss": 2.8252, "step": 1700 }, { "epoch": 0.52, "learning_rate": 0.00028592320037876343, "loss": 5.9559, "step": 1701 }, { "epoch": 0.53, "learning_rate": 0.0002858875713195262, "loss": 5.4915, "step": 1702 }, { "epoch": 0.53, "learning_rate": 0.000285851942260289, "loss": 5.3747, "step": 1703 }, { "epoch": 0.53, "learning_rate": 0.0002858163132010517, "loss": 5.3121, "step": 1704 }, { "epoch": 0.53, "learning_rate": 0.00028578068414181456, "loss": 4.8444, "step": 1705 }, { "epoch": 0.53, "learning_rate": 0.0002857450550825773, "loss": 5.0988, "step": 1706 }, { "epoch": 0.53, "learning_rate": 0.00028570942602334007, "loss": 4.8321, "step": 1707 }, { "epoch": 0.53, "learning_rate": 0.00028567379696410285, "loss": 4.4809, "step": 1708 }, { "epoch": 0.53, "learning_rate": 0.0002856381679048656, "loss": 4.5086, "step": 1709 }, { "epoch": 0.53, "learning_rate": 0.0002856025388456284, "loss": 4.7842, "step": 1710 }, { "epoch": 0.53, "learning_rate": 0.00028556690978639114, "loss": 4.7684, "step": 1711 }, { "epoch": 0.53, "learning_rate": 0.0002855312807271539, "loss": 4.5887, "step": 1712 }, { "epoch": 0.53, "learning_rate": 0.0002854956516679167, "loss": 4.672, "step": 1713 }, { "epoch": 0.53, "learning_rate": 0.00028546002260867944, "loss": 4.4023, "step": 1714 }, { "epoch": 0.53, "learning_rate": 0.00028542439354944227, "loss": 4.4104, "step": 1715 }, { "epoch": 0.53, "learning_rate": 0.000285388764490205, "loss": 4.6319, "step": 1716 }, { "epoch": 0.53, "learning_rate": 0.0002853531354309678, "loss": 4.5523, "step": 1717 }, { "epoch": 0.53, "learning_rate": 0.00028531750637173056, "loss": 4.7205, "step": 1718 }, { "epoch": 0.53, "learning_rate": 0.00028528187731249335, "loss": 4.4206, "step": 1719 }, { "epoch": 0.53, "learning_rate": 0.0002852462482532561, "loss": 4.7251, "step": 1720 }, { "epoch": 0.53, "learning_rate": 0.00028521061919401885, "loss": 4.491, "step": 1721 }, { "epoch": 0.53, "learning_rate": 0.00028517499013478164, "loss": 4.2801, "step": 1722 }, { "epoch": 0.53, "learning_rate": 0.0002851393610755444, "loss": 4.3607, "step": 1723 }, { "epoch": 0.53, "learning_rate": 0.0002851037320163072, "loss": 4.2176, "step": 1724 }, { "epoch": 0.53, "learning_rate": 0.00028506810295706993, "loss": 4.4153, "step": 1725 }, { "epoch": 0.53, "learning_rate": 0.0002850324738978327, "loss": 4.3146, "step": 1726 }, { "epoch": 0.53, "learning_rate": 0.0002849968448385955, "loss": 4.4574, "step": 1727 }, { "epoch": 0.53, "learning_rate": 0.0002849612157793583, "loss": 4.2592, "step": 1728 }, { "epoch": 0.53, "learning_rate": 0.00028492558672012106, "loss": 4.4472, "step": 1729 }, { "epoch": 0.53, "learning_rate": 0.0002848899576608838, "loss": 4.4364, "step": 1730 }, { "epoch": 0.53, "learning_rate": 0.00028485432860164656, "loss": 4.5569, "step": 1731 }, { "epoch": 0.53, "learning_rate": 0.00028481869954240935, "loss": 4.3802, "step": 1732 }, { "epoch": 0.53, "learning_rate": 0.00028478307048317213, "loss": 4.2796, "step": 1733 }, { "epoch": 0.54, "learning_rate": 0.0002847474414239349, "loss": 4.095, "step": 1734 }, { "epoch": 0.54, "learning_rate": 0.00028471181236469764, "loss": 4.1858, "step": 1735 }, { "epoch": 0.54, "learning_rate": 0.0002846761833054604, "loss": 4.1695, "step": 1736 }, { "epoch": 0.54, "learning_rate": 0.0002846405542462232, "loss": 4.0288, "step": 1737 }, { "epoch": 0.54, "learning_rate": 0.000284604925186986, "loss": 4.1276, "step": 1738 }, { "epoch": 0.54, "learning_rate": 0.00028456929612774877, "loss": 3.8841, "step": 1739 }, { "epoch": 0.54, "learning_rate": 0.0002845336670685115, "loss": 3.8218, "step": 1740 }, { "epoch": 0.54, "learning_rate": 0.0002844980380092743, "loss": 3.7556, "step": 1741 }, { "epoch": 0.54, "learning_rate": 0.00028446240895003706, "loss": 3.864, "step": 1742 }, { "epoch": 0.54, "learning_rate": 0.00028442677989079984, "loss": 3.846, "step": 1743 }, { "epoch": 0.54, "learning_rate": 0.0002843911508315626, "loss": 3.6336, "step": 1744 }, { "epoch": 0.54, "learning_rate": 0.00028435552177232535, "loss": 3.4695, "step": 1745 }, { "epoch": 0.54, "learning_rate": 0.00028431989271308813, "loss": 3.632, "step": 1746 }, { "epoch": 0.54, "learning_rate": 0.0002842842636538509, "loss": 3.4329, "step": 1747 }, { "epoch": 0.54, "learning_rate": 0.0002842486345946137, "loss": 3.6854, "step": 1748 }, { "epoch": 0.54, "learning_rate": 0.0002842130055353765, "loss": 3.0649, "step": 1749 }, { "epoch": 0.54, "learning_rate": 0.0002841773764761392, "loss": 2.9265, "step": 1750 }, { "epoch": 0.54, "learning_rate": 0.000284141747416902, "loss": 5.4338, "step": 1751 }, { "epoch": 0.54, "learning_rate": 0.00028410611835766477, "loss": 5.2903, "step": 1752 }, { "epoch": 0.54, "learning_rate": 0.00028407048929842755, "loss": 4.8795, "step": 1753 }, { "epoch": 0.54, "learning_rate": 0.00028403486023919033, "loss": 4.7638, "step": 1754 }, { "epoch": 0.54, "learning_rate": 0.00028399923117995306, "loss": 5.0412, "step": 1755 }, { "epoch": 0.54, "learning_rate": 0.00028396360212071584, "loss": 4.8736, "step": 1756 }, { "epoch": 0.54, "learning_rate": 0.0002839279730614786, "loss": 4.8018, "step": 1757 }, { "epoch": 0.54, "learning_rate": 0.0002838923440022414, "loss": 5.0769, "step": 1758 }, { "epoch": 0.54, "learning_rate": 0.00028385671494300413, "loss": 4.4701, "step": 1759 }, { "epoch": 0.54, "learning_rate": 0.0002838210858837669, "loss": 4.4708, "step": 1760 }, { "epoch": 0.54, "learning_rate": 0.0002837854568245297, "loss": 4.6064, "step": 1761 }, { "epoch": 0.54, "learning_rate": 0.0002837498277652925, "loss": 4.8933, "step": 1762 }, { "epoch": 0.54, "learning_rate": 0.00028371419870605526, "loss": 4.5085, "step": 1763 }, { "epoch": 0.54, "learning_rate": 0.000283678569646818, "loss": 4.4442, "step": 1764 }, { "epoch": 0.54, "learning_rate": 0.00028364294058758077, "loss": 4.8048, "step": 1765 }, { "epoch": 0.55, "learning_rate": 0.00028360731152834355, "loss": 4.7817, "step": 1766 }, { "epoch": 0.55, "learning_rate": 0.00028357168246910633, "loss": 4.5038, "step": 1767 }, { "epoch": 0.55, "learning_rate": 0.0002835360534098691, "loss": 4.5987, "step": 1768 }, { "epoch": 0.55, "learning_rate": 0.00028350042435063184, "loss": 4.8134, "step": 1769 }, { "epoch": 0.55, "learning_rate": 0.0002834647952913947, "loss": 4.5126, "step": 1770 }, { "epoch": 0.55, "learning_rate": 0.0002834291662321574, "loss": 4.2741, "step": 1771 }, { "epoch": 0.55, "learning_rate": 0.0002833935371729202, "loss": 4.3485, "step": 1772 }, { "epoch": 0.55, "learning_rate": 0.00028335790811368297, "loss": 4.3584, "step": 1773 }, { "epoch": 0.55, "learning_rate": 0.0002833222790544457, "loss": 4.4953, "step": 1774 }, { "epoch": 0.55, "learning_rate": 0.00028328664999520853, "loss": 4.3292, "step": 1775 }, { "epoch": 0.55, "learning_rate": 0.00028325102093597126, "loss": 4.5157, "step": 1776 }, { "epoch": 0.55, "learning_rate": 0.00028321539187673404, "loss": 4.122, "step": 1777 }, { "epoch": 0.55, "learning_rate": 0.0002831797628174968, "loss": 4.4656, "step": 1778 }, { "epoch": 0.55, "learning_rate": 0.00028314413375825955, "loss": 4.3855, "step": 1779 }, { "epoch": 0.55, "learning_rate": 0.00028310850469902233, "loss": 4.2659, "step": 1780 }, { "epoch": 0.55, "learning_rate": 0.0002830728756397851, "loss": 4.1749, "step": 1781 }, { "epoch": 0.55, "learning_rate": 0.0002830372465805479, "loss": 4.2162, "step": 1782 }, { "epoch": 0.55, "learning_rate": 0.0002830016175213107, "loss": 4.0078, "step": 1783 }, { "epoch": 0.55, "learning_rate": 0.0002829659884620734, "loss": 4.3881, "step": 1784 }, { "epoch": 0.55, "learning_rate": 0.0002829303594028362, "loss": 3.9709, "step": 1785 }, { "epoch": 0.55, "learning_rate": 0.00028289473034359897, "loss": 4.1335, "step": 1786 }, { "epoch": 0.55, "learning_rate": 0.00028285910128436175, "loss": 4.0052, "step": 1787 }, { "epoch": 0.55, "learning_rate": 0.00028282347222512454, "loss": 4.0536, "step": 1788 }, { "epoch": 0.55, "learning_rate": 0.0002827878431658873, "loss": 3.8297, "step": 1789 }, { "epoch": 0.55, "learning_rate": 0.00028275221410665005, "loss": 3.857, "step": 1790 }, { "epoch": 0.55, "learning_rate": 0.0002827165850474128, "loss": 3.471, "step": 1791 }, { "epoch": 0.55, "learning_rate": 0.0002826809559881756, "loss": 3.8361, "step": 1792 }, { "epoch": 0.55, "learning_rate": 0.00028264532692893834, "loss": 3.8958, "step": 1793 }, { "epoch": 0.55, "learning_rate": 0.00028260969786970117, "loss": 4.051, "step": 1794 }, { "epoch": 0.55, "learning_rate": 0.0002825740688104639, "loss": 3.5728, "step": 1795 }, { "epoch": 0.55, "learning_rate": 0.0002825384397512267, "loss": 3.3148, "step": 1796 }, { "epoch": 0.55, "learning_rate": 0.00028250281069198946, "loss": 3.6328, "step": 1797 }, { "epoch": 0.55, "learning_rate": 0.0002824671816327522, "loss": 3.0205, "step": 1798 }, { "epoch": 0.56, "learning_rate": 0.00028243155257351503, "loss": 3.0506, "step": 1799 }, { "epoch": 0.56, "learning_rate": 0.00028239592351427776, "loss": 3.0353, "step": 1800 }, { "epoch": 0.56, "learning_rate": 0.00028236029445504054, "loss": 6.1927, "step": 1801 }, { "epoch": 0.56, "learning_rate": 0.0002823246653958033, "loss": 5.62, "step": 1802 }, { "epoch": 0.56, "learning_rate": 0.00028228903633656605, "loss": 5.6133, "step": 1803 }, { "epoch": 0.56, "learning_rate": 0.0002822534072773289, "loss": 5.0318, "step": 1804 }, { "epoch": 0.56, "learning_rate": 0.0002822177782180916, "loss": 5.2845, "step": 1805 }, { "epoch": 0.56, "learning_rate": 0.0002821821491588544, "loss": 4.7732, "step": 1806 }, { "epoch": 0.56, "learning_rate": 0.0002821465200996172, "loss": 4.4498, "step": 1807 }, { "epoch": 0.56, "learning_rate": 0.0002821108910403799, "loss": 4.3548, "step": 1808 }, { "epoch": 0.56, "learning_rate": 0.00028207526198114274, "loss": 4.7838, "step": 1809 }, { "epoch": 0.56, "learning_rate": 0.00028203963292190547, "loss": 4.577, "step": 1810 }, { "epoch": 0.56, "learning_rate": 0.00028200400386266825, "loss": 4.9805, "step": 1811 }, { "epoch": 0.56, "learning_rate": 0.00028196837480343103, "loss": 4.6141, "step": 1812 }, { "epoch": 0.56, "learning_rate": 0.0002819327457441938, "loss": 4.6616, "step": 1813 }, { "epoch": 0.56, "learning_rate": 0.0002818971166849566, "loss": 4.3279, "step": 1814 }, { "epoch": 0.56, "learning_rate": 0.0002818614876257193, "loss": 4.5409, "step": 1815 }, { "epoch": 0.56, "learning_rate": 0.0002818258585664821, "loss": 4.384, "step": 1816 }, { "epoch": 0.56, "learning_rate": 0.0002817902295072449, "loss": 4.2928, "step": 1817 }, { "epoch": 0.56, "learning_rate": 0.00028175460044800767, "loss": 4.5664, "step": 1818 }, { "epoch": 0.56, "learning_rate": 0.0002817189713887704, "loss": 4.3505, "step": 1819 }, { "epoch": 0.56, "learning_rate": 0.0002816833423295332, "loss": 4.4975, "step": 1820 }, { "epoch": 0.56, "learning_rate": 0.00028164771327029596, "loss": 4.2909, "step": 1821 }, { "epoch": 0.56, "learning_rate": 0.00028161208421105874, "loss": 4.7649, "step": 1822 }, { "epoch": 0.56, "learning_rate": 0.0002815764551518215, "loss": 4.5508, "step": 1823 }, { "epoch": 0.56, "learning_rate": 0.00028154082609258425, "loss": 4.4209, "step": 1824 }, { "epoch": 0.56, "learning_rate": 0.00028150519703334703, "loss": 4.1213, "step": 1825 }, { "epoch": 0.56, "learning_rate": 0.0002814695679741098, "loss": 4.3874, "step": 1826 }, { "epoch": 0.56, "learning_rate": 0.00028143393891487254, "loss": 4.3513, "step": 1827 }, { "epoch": 0.56, "learning_rate": 0.0002813983098556354, "loss": 4.367, "step": 1828 }, { "epoch": 0.56, "learning_rate": 0.0002813626807963981, "loss": 4.416, "step": 1829 }, { "epoch": 0.56, "learning_rate": 0.0002813270517371609, "loss": 4.1103, "step": 1830 }, { "epoch": 0.57, "learning_rate": 0.00028129142267792367, "loss": 4.1808, "step": 1831 }, { "epoch": 0.57, "learning_rate": 0.00028125579361868645, "loss": 4.1928, "step": 1832 }, { "epoch": 0.57, "learning_rate": 0.00028122016455944923, "loss": 4.3737, "step": 1833 }, { "epoch": 0.57, "learning_rate": 0.00028118453550021196, "loss": 3.8465, "step": 1834 }, { "epoch": 0.57, "learning_rate": 0.00028114890644097474, "loss": 4.0205, "step": 1835 }, { "epoch": 0.57, "learning_rate": 0.0002811132773817375, "loss": 4.0079, "step": 1836 }, { "epoch": 0.57, "learning_rate": 0.0002810776483225003, "loss": 3.8324, "step": 1837 }, { "epoch": 0.57, "learning_rate": 0.0002810420192632631, "loss": 4.1645, "step": 1838 }, { "epoch": 0.57, "learning_rate": 0.0002810063902040258, "loss": 4.1409, "step": 1839 }, { "epoch": 0.57, "learning_rate": 0.0002809707611447886, "loss": 4.0916, "step": 1840 }, { "epoch": 0.57, "learning_rate": 0.0002809351320855514, "loss": 3.8341, "step": 1841 }, { "epoch": 0.57, "learning_rate": 0.00028089950302631416, "loss": 3.6373, "step": 1842 }, { "epoch": 0.57, "learning_rate": 0.00028086387396707694, "loss": 3.7619, "step": 1843 }, { "epoch": 0.57, "learning_rate": 0.00028082824490783967, "loss": 3.3177, "step": 1844 }, { "epoch": 0.57, "learning_rate": 0.00028079261584860245, "loss": 3.3486, "step": 1845 }, { "epoch": 0.57, "learning_rate": 0.00028075698678936523, "loss": 3.4016, "step": 1846 }, { "epoch": 0.57, "learning_rate": 0.000280721357730128, "loss": 3.4434, "step": 1847 }, { "epoch": 0.57, "learning_rate": 0.0002806857286708908, "loss": 3.2736, "step": 1848 }, { "epoch": 0.57, "learning_rate": 0.0002806500996116535, "loss": 2.9675, "step": 1849 }, { "epoch": 0.57, "learning_rate": 0.0002806144705524163, "loss": 2.4712, "step": 1850 }, { "epoch": 0.57, "learning_rate": 0.0002805788414931791, "loss": 5.931, "step": 1851 }, { "epoch": 0.57, "learning_rate": 0.00028054321243394187, "loss": 5.1531, "step": 1852 }, { "epoch": 0.57, "learning_rate": 0.0002805075833747046, "loss": 5.1907, "step": 1853 }, { "epoch": 0.57, "learning_rate": 0.0002804719543154674, "loss": 5.3324, "step": 1854 }, { "epoch": 0.57, "learning_rate": 0.00028043632525623016, "loss": 5.3411, "step": 1855 }, { "epoch": 0.57, "learning_rate": 0.00028040069619699294, "loss": 4.6327, "step": 1856 }, { "epoch": 0.57, "learning_rate": 0.0002803650671377557, "loss": 5.0004, "step": 1857 }, { "epoch": 0.57, "learning_rate": 0.00028032943807851845, "loss": 4.5061, "step": 1858 }, { "epoch": 0.57, "learning_rate": 0.0002802938090192813, "loss": 4.753, "step": 1859 }, { "epoch": 0.57, "learning_rate": 0.000280258179960044, "loss": 4.6296, "step": 1860 }, { "epoch": 0.57, "learning_rate": 0.0002802225509008068, "loss": 4.8117, "step": 1861 }, { "epoch": 0.57, "learning_rate": 0.0002801869218415696, "loss": 4.6888, "step": 1862 }, { "epoch": 0.57, "learning_rate": 0.0002801512927823323, "loss": 4.8442, "step": 1863 }, { "epoch": 0.58, "learning_rate": 0.00028011566372309514, "loss": 4.6244, "step": 1864 }, { "epoch": 0.58, "learning_rate": 0.00028008003466385787, "loss": 4.5149, "step": 1865 }, { "epoch": 0.58, "learning_rate": 0.00028004440560462065, "loss": 4.4641, "step": 1866 }, { "epoch": 0.58, "learning_rate": 0.00028000877654538344, "loss": 4.4827, "step": 1867 }, { "epoch": 0.58, "learning_rate": 0.00027997314748614616, "loss": 4.465, "step": 1868 }, { "epoch": 0.58, "learning_rate": 0.000279937518426909, "loss": 4.6763, "step": 1869 }, { "epoch": 0.58, "learning_rate": 0.00027990188936767173, "loss": 4.2837, "step": 1870 }, { "epoch": 0.58, "learning_rate": 0.0002798662603084345, "loss": 4.3393, "step": 1871 }, { "epoch": 0.58, "learning_rate": 0.0002798306312491973, "loss": 4.1916, "step": 1872 }, { "epoch": 0.58, "learning_rate": 0.00027979500218996, "loss": 4.1749, "step": 1873 }, { "epoch": 0.58, "learning_rate": 0.0002797593731307228, "loss": 4.2497, "step": 1874 }, { "epoch": 0.58, "learning_rate": 0.0002797237440714856, "loss": 4.1868, "step": 1875 }, { "epoch": 0.58, "learning_rate": 0.00027968811501224836, "loss": 4.3625, "step": 1876 }, { "epoch": 0.58, "learning_rate": 0.00027965248595301115, "loss": 4.5243, "step": 1877 }, { "epoch": 0.58, "learning_rate": 0.0002796168568937739, "loss": 4.2954, "step": 1878 }, { "epoch": 0.58, "learning_rate": 0.00027958122783453666, "loss": 4.2173, "step": 1879 }, { "epoch": 0.58, "learning_rate": 0.00027954559877529944, "loss": 4.4197, "step": 1880 }, { "epoch": 0.58, "learning_rate": 0.0002795099697160622, "loss": 4.3507, "step": 1881 }, { "epoch": 0.58, "learning_rate": 0.000279474340656825, "loss": 4.2776, "step": 1882 }, { "epoch": 0.58, "learning_rate": 0.0002794387115975878, "loss": 3.9773, "step": 1883 }, { "epoch": 0.58, "learning_rate": 0.0002794030825383505, "loss": 4.2877, "step": 1884 }, { "epoch": 0.58, "learning_rate": 0.0002793674534791133, "loss": 4.0587, "step": 1885 }, { "epoch": 0.58, "learning_rate": 0.0002793318244198761, "loss": 4.091, "step": 1886 }, { "epoch": 0.58, "learning_rate": 0.0002792961953606388, "loss": 3.9697, "step": 1887 }, { "epoch": 0.58, "learning_rate": 0.00027926056630140164, "loss": 3.8383, "step": 1888 }, { "epoch": 0.58, "learning_rate": 0.00027922493724216437, "loss": 4.0129, "step": 1889 }, { "epoch": 0.58, "learning_rate": 0.00027918930818292715, "loss": 4.0093, "step": 1890 }, { "epoch": 0.58, "learning_rate": 0.00027915367912368993, "loss": 3.5314, "step": 1891 }, { "epoch": 0.58, "learning_rate": 0.00027911805006445266, "loss": 3.5054, "step": 1892 }, { "epoch": 0.58, "learning_rate": 0.0002790824210052155, "loss": 3.6333, "step": 1893 }, { "epoch": 0.58, "learning_rate": 0.0002790467919459782, "loss": 3.7856, "step": 1894 }, { "epoch": 0.58, "learning_rate": 0.000279011162886741, "loss": 3.656, "step": 1895 }, { "epoch": 0.59, "learning_rate": 0.0002789755338275038, "loss": 3.6999, "step": 1896 }, { "epoch": 0.59, "learning_rate": 0.0002789399047682665, "loss": 3.6861, "step": 1897 }, { "epoch": 0.59, "learning_rate": 0.00027890427570902935, "loss": 3.0799, "step": 1898 }, { "epoch": 0.59, "learning_rate": 0.0002788686466497921, "loss": 2.9319, "step": 1899 }, { "epoch": 0.59, "learning_rate": 0.00027883301759055486, "loss": 2.8406, "step": 1900 }, { "epoch": 0.59, "learning_rate": 0.00027879738853131764, "loss": 6.0849, "step": 1901 }, { "epoch": 0.59, "learning_rate": 0.0002787617594720804, "loss": 5.3402, "step": 1902 }, { "epoch": 0.59, "learning_rate": 0.0002787261304128432, "loss": 5.1385, "step": 1903 }, { "epoch": 0.59, "learning_rate": 0.00027869050135360593, "loss": 5.1564, "step": 1904 }, { "epoch": 0.59, "learning_rate": 0.0002786548722943687, "loss": 4.997, "step": 1905 }, { "epoch": 0.59, "learning_rate": 0.0002786192432351315, "loss": 4.7909, "step": 1906 }, { "epoch": 0.59, "learning_rate": 0.0002785836141758943, "loss": 4.7124, "step": 1907 }, { "epoch": 0.59, "learning_rate": 0.000278547985116657, "loss": 4.9631, "step": 1908 }, { "epoch": 0.59, "learning_rate": 0.0002785123560574198, "loss": 4.9777, "step": 1909 }, { "epoch": 0.59, "learning_rate": 0.00027847672699818257, "loss": 4.6659, "step": 1910 }, { "epoch": 0.59, "learning_rate": 0.00027844109793894535, "loss": 4.7956, "step": 1911 }, { "epoch": 0.59, "learning_rate": 0.00027840546887970813, "loss": 4.7263, "step": 1912 }, { "epoch": 0.59, "learning_rate": 0.00027836983982047086, "loss": 4.4392, "step": 1913 }, { "epoch": 0.59, "learning_rate": 0.00027833421076123364, "loss": 4.6713, "step": 1914 }, { "epoch": 0.59, "learning_rate": 0.0002782985817019964, "loss": 4.5464, "step": 1915 }, { "epoch": 0.59, "learning_rate": 0.0002782629526427592, "loss": 4.6236, "step": 1916 }, { "epoch": 0.59, "learning_rate": 0.000278227323583522, "loss": 4.6247, "step": 1917 }, { "epoch": 0.59, "learning_rate": 0.0002781916945242847, "loss": 4.5863, "step": 1918 }, { "epoch": 0.59, "learning_rate": 0.0002781560654650475, "loss": 4.3881, "step": 1919 }, { "epoch": 0.59, "learning_rate": 0.0002781204364058103, "loss": 4.2146, "step": 1920 }, { "epoch": 0.59, "learning_rate": 0.000278084807346573, "loss": 4.3224, "step": 1921 }, { "epoch": 0.59, "learning_rate": 0.00027804917828733584, "loss": 4.2245, "step": 1922 }, { "epoch": 0.59, "learning_rate": 0.00027801354922809857, "loss": 4.3457, "step": 1923 }, { "epoch": 0.59, "learning_rate": 0.00027797792016886135, "loss": 4.0761, "step": 1924 }, { "epoch": 0.59, "learning_rate": 0.00027794229110962413, "loss": 4.5551, "step": 1925 }, { "epoch": 0.59, "learning_rate": 0.0002779066620503869, "loss": 4.2646, "step": 1926 }, { "epoch": 0.59, "learning_rate": 0.0002778710329911497, "loss": 4.3472, "step": 1927 }, { "epoch": 0.6, "learning_rate": 0.0002778354039319124, "loss": 4.4012, "step": 1928 }, { "epoch": 0.6, "learning_rate": 0.0002777997748726752, "loss": 4.2185, "step": 1929 }, { "epoch": 0.6, "learning_rate": 0.000277764145813438, "loss": 4.0906, "step": 1930 }, { "epoch": 0.6, "learning_rate": 0.00027772851675420077, "loss": 4.1106, "step": 1931 }, { "epoch": 0.6, "learning_rate": 0.00027769288769496355, "loss": 4.5432, "step": 1932 }, { "epoch": 0.6, "learning_rate": 0.0002776572586357263, "loss": 4.1548, "step": 1933 }, { "epoch": 0.6, "learning_rate": 0.00027762162957648906, "loss": 3.9916, "step": 1934 }, { "epoch": 0.6, "learning_rate": 0.00027758600051725184, "loss": 4.2435, "step": 1935 }, { "epoch": 0.6, "learning_rate": 0.0002775503714580146, "loss": 3.9455, "step": 1936 }, { "epoch": 0.6, "learning_rate": 0.0002775147423987774, "loss": 3.9697, "step": 1937 }, { "epoch": 0.6, "learning_rate": 0.00027747911333954014, "loss": 4.0098, "step": 1938 }, { "epoch": 0.6, "learning_rate": 0.0002774434842803029, "loss": 3.97, "step": 1939 }, { "epoch": 0.6, "learning_rate": 0.0002774078552210657, "loss": 3.9678, "step": 1940 }, { "epoch": 0.6, "learning_rate": 0.0002773722261618285, "loss": 3.6627, "step": 1941 }, { "epoch": 0.6, "learning_rate": 0.0002773365971025912, "loss": 3.7294, "step": 1942 }, { "epoch": 0.6, "learning_rate": 0.000277300968043354, "loss": 3.4384, "step": 1943 }, { "epoch": 0.6, "learning_rate": 0.0002772653389841168, "loss": 3.3939, "step": 1944 }, { "epoch": 0.6, "learning_rate": 0.00027722970992487956, "loss": 3.3806, "step": 1945 }, { "epoch": 0.6, "learning_rate": 0.00027719408086564234, "loss": 3.3158, "step": 1946 }, { "epoch": 0.6, "learning_rate": 0.00027715845180640506, "loss": 3.6413, "step": 1947 }, { "epoch": 0.6, "learning_rate": 0.00027712282274716785, "loss": 3.1799, "step": 1948 }, { "epoch": 0.6, "learning_rate": 0.00027708719368793063, "loss": 2.8909, "step": 1949 }, { "epoch": 0.6, "learning_rate": 0.0002770515646286934, "loss": 2.7417, "step": 1950 }, { "epoch": 0.6, "learning_rate": 0.0002770159355694562, "loss": 5.8252, "step": 1951 }, { "epoch": 0.6, "learning_rate": 0.0002769803065102189, "loss": 5.8696, "step": 1952 }, { "epoch": 0.6, "learning_rate": 0.00027694467745098176, "loss": 5.3137, "step": 1953 }, { "epoch": 0.6, "learning_rate": 0.0002769090483917445, "loss": 5.3433, "step": 1954 }, { "epoch": 0.6, "learning_rate": 0.00027687341933250727, "loss": 5.0567, "step": 1955 }, { "epoch": 0.6, "learning_rate": 0.00027683779027327005, "loss": 4.6707, "step": 1956 }, { "epoch": 0.6, "learning_rate": 0.0002768021612140328, "loss": 4.7928, "step": 1957 }, { "epoch": 0.6, "learning_rate": 0.0002767665321547956, "loss": 4.685, "step": 1958 }, { "epoch": 0.6, "learning_rate": 0.00027673090309555834, "loss": 4.8176, "step": 1959 }, { "epoch": 0.6, "learning_rate": 0.0002766952740363211, "loss": 4.766, "step": 1960 }, { "epoch": 0.61, "learning_rate": 0.0002766596449770839, "loss": 4.9201, "step": 1961 }, { "epoch": 0.61, "learning_rate": 0.00027662401591784663, "loss": 4.605, "step": 1962 }, { "epoch": 0.61, "learning_rate": 0.00027658838685860947, "loss": 4.8998, "step": 1963 }, { "epoch": 0.61, "learning_rate": 0.0002765527577993722, "loss": 4.6064, "step": 1964 }, { "epoch": 0.61, "learning_rate": 0.000276517128740135, "loss": 4.2127, "step": 1965 }, { "epoch": 0.61, "learning_rate": 0.00027648149968089776, "loss": 4.6782, "step": 1966 }, { "epoch": 0.61, "learning_rate": 0.0002764458706216605, "loss": 4.3776, "step": 1967 }, { "epoch": 0.61, "learning_rate": 0.00027641024156242327, "loss": 4.5418, "step": 1968 }, { "epoch": 0.61, "learning_rate": 0.00027637461250318605, "loss": 4.4247, "step": 1969 }, { "epoch": 0.61, "learning_rate": 0.00027633898344394883, "loss": 4.3267, "step": 1970 }, { "epoch": 0.61, "learning_rate": 0.0002763033543847116, "loss": 4.3468, "step": 1971 }, { "epoch": 0.61, "learning_rate": 0.00027626772532547434, "loss": 4.1767, "step": 1972 }, { "epoch": 0.61, "learning_rate": 0.0002762320962662371, "loss": 4.4511, "step": 1973 }, { "epoch": 0.61, "learning_rate": 0.0002761964672069999, "loss": 4.2063, "step": 1974 }, { "epoch": 0.61, "learning_rate": 0.0002761608381477627, "loss": 4.3169, "step": 1975 }, { "epoch": 0.61, "learning_rate": 0.0002761252090885254, "loss": 4.2372, "step": 1976 }, { "epoch": 0.61, "learning_rate": 0.00027608958002928825, "loss": 4.6001, "step": 1977 }, { "epoch": 0.61, "learning_rate": 0.000276053950970051, "loss": 4.5791, "step": 1978 }, { "epoch": 0.61, "learning_rate": 0.00027601832191081376, "loss": 4.2773, "step": 1979 }, { "epoch": 0.61, "learning_rate": 0.00027598269285157654, "loss": 4.4102, "step": 1980 }, { "epoch": 0.61, "learning_rate": 0.00027594706379233927, "loss": 4.3947, "step": 1981 }, { "epoch": 0.61, "learning_rate": 0.0002759114347331021, "loss": 4.3096, "step": 1982 }, { "epoch": 0.61, "learning_rate": 0.00027587580567386483, "loss": 4.0216, "step": 1983 }, { "epoch": 0.61, "learning_rate": 0.0002758401766146276, "loss": 4.1195, "step": 1984 }, { "epoch": 0.61, "learning_rate": 0.0002758045475553904, "loss": 3.9684, "step": 1985 }, { "epoch": 0.61, "learning_rate": 0.0002757689184961531, "loss": 3.7992, "step": 1986 }, { "epoch": 0.61, "learning_rate": 0.00027573328943691596, "loss": 3.9759, "step": 1987 }, { "epoch": 0.61, "learning_rate": 0.0002756976603776787, "loss": 4.179, "step": 1988 }, { "epoch": 0.61, "learning_rate": 0.00027566203131844147, "loss": 4.1401, "step": 1989 }, { "epoch": 0.61, "learning_rate": 0.00027562640225920425, "loss": 3.7779, "step": 1990 }, { "epoch": 0.61, "learning_rate": 0.000275590773199967, "loss": 3.8421, "step": 1991 }, { "epoch": 0.61, "learning_rate": 0.0002755551441407298, "loss": 3.5797, "step": 1992 }, { "epoch": 0.62, "learning_rate": 0.00027551951508149254, "loss": 3.5298, "step": 1993 }, { "epoch": 0.62, "learning_rate": 0.0002754838860222553, "loss": 3.4437, "step": 1994 }, { "epoch": 0.62, "learning_rate": 0.0002754482569630181, "loss": 3.303, "step": 1995 }, { "epoch": 0.62, "learning_rate": 0.0002754126279037809, "loss": 3.3195, "step": 1996 }, { "epoch": 0.62, "learning_rate": 0.00027537699884454367, "loss": 3.074, "step": 1997 }, { "epoch": 0.62, "learning_rate": 0.0002753413697853064, "loss": 3.0603, "step": 1998 }, { "epoch": 0.62, "learning_rate": 0.0002753057407260692, "loss": 3.0066, "step": 1999 }, { "epoch": 0.62, "learning_rate": 0.00027527011166683196, "loss": 2.9617, "step": 2000 }, { "epoch": 0.62, "eval_bleu": 0.0, "eval_loss": 4.966036796569824, "eval_runtime": 2574.9105, "eval_samples_per_second": 5.732, "eval_steps_per_second": 0.717, "step": 2000 }, { "epoch": 0.62, "learning_rate": 0.00027523448260759474, "loss": 6.1769, "step": 2001 }, { "epoch": 0.62, "learning_rate": 0.00027519885354835747, "loss": 5.6556, "step": 2002 }, { "epoch": 0.62, "learning_rate": 0.00027516322448912025, "loss": 5.3767, "step": 2003 }, { "epoch": 0.62, "learning_rate": 0.00027512759542988304, "loss": 4.9736, "step": 2004 }, { "epoch": 0.62, "learning_rate": 0.0002750919663706458, "loss": 4.7481, "step": 2005 }, { "epoch": 0.62, "learning_rate": 0.0002750563373114086, "loss": 4.8907, "step": 2006 }, { "epoch": 0.62, "learning_rate": 0.0002750207082521713, "loss": 4.5035, "step": 2007 }, { "epoch": 0.62, "learning_rate": 0.0002749850791929341, "loss": 4.7699, "step": 2008 }, { "epoch": 0.62, "learning_rate": 0.0002749494501336969, "loss": 4.631, "step": 2009 }, { "epoch": 0.62, "learning_rate": 0.00027491382107445967, "loss": 4.6193, "step": 2010 }, { "epoch": 0.62, "learning_rate": 0.00027487819201522245, "loss": 4.6941, "step": 2011 }, { "epoch": 0.62, "learning_rate": 0.0002748425629559852, "loss": 4.3448, "step": 2012 }, { "epoch": 0.62, "learning_rate": 0.00027480693389674796, "loss": 4.6623, "step": 2013 }, { "epoch": 0.62, "learning_rate": 0.00027477130483751075, "loss": 4.5536, "step": 2014 }, { "epoch": 0.62, "learning_rate": 0.0002747356757782735, "loss": 4.6681, "step": 2015 }, { "epoch": 0.62, "learning_rate": 0.0002747000467190363, "loss": 4.418, "step": 2016 }, { "epoch": 0.62, "learning_rate": 0.00027466441765979904, "loss": 4.3259, "step": 2017 }, { "epoch": 0.62, "learning_rate": 0.0002746287886005618, "loss": 4.3832, "step": 2018 }, { "epoch": 0.62, "learning_rate": 0.0002745931595413246, "loss": 4.3156, "step": 2019 }, { "epoch": 0.62, "learning_rate": 0.0002745575304820874, "loss": 4.4399, "step": 2020 }, { "epoch": 0.62, "learning_rate": 0.00027452190142285016, "loss": 4.6134, "step": 2021 }, { "epoch": 0.62, "learning_rate": 0.0002744862723636129, "loss": 4.3347, "step": 2022 }, { "epoch": 0.62, "learning_rate": 0.0002744506433043757, "loss": 4.5988, "step": 2023 }, { "epoch": 0.62, "learning_rate": 0.00027441501424513846, "loss": 4.4484, "step": 2024 }, { "epoch": 0.62, "learning_rate": 0.00027437938518590124, "loss": 4.3457, "step": 2025 }, { "epoch": 0.63, "learning_rate": 0.000274343756126664, "loss": 4.0325, "step": 2026 }, { "epoch": 0.63, "learning_rate": 0.00027430812706742675, "loss": 4.015, "step": 2027 }, { "epoch": 0.63, "learning_rate": 0.00027427249800818953, "loss": 4.2661, "step": 2028 }, { "epoch": 0.63, "learning_rate": 0.0002742368689489523, "loss": 4.2043, "step": 2029 }, { "epoch": 0.63, "learning_rate": 0.0002742012398897151, "loss": 4.1988, "step": 2030 }, { "epoch": 0.63, "learning_rate": 0.0002741656108304779, "loss": 4.0476, "step": 2031 }, { "epoch": 0.63, "learning_rate": 0.0002741299817712406, "loss": 4.0984, "step": 2032 }, { "epoch": 0.63, "learning_rate": 0.0002740943527120034, "loss": 4.0974, "step": 2033 }, { "epoch": 0.63, "learning_rate": 0.00027405872365276617, "loss": 4.0471, "step": 2034 }, { "epoch": 0.63, "learning_rate": 0.00027402309459352895, "loss": 3.9909, "step": 2035 }, { "epoch": 0.63, "learning_rate": 0.0002739874655342917, "loss": 4.0386, "step": 2036 }, { "epoch": 0.63, "learning_rate": 0.00027395183647505446, "loss": 3.9185, "step": 2037 }, { "epoch": 0.63, "learning_rate": 0.00027391620741581724, "loss": 3.7998, "step": 2038 }, { "epoch": 0.63, "learning_rate": 0.00027388057835658, "loss": 3.9185, "step": 2039 }, { "epoch": 0.63, "learning_rate": 0.0002738449492973428, "loss": 3.7441, "step": 2040 }, { "epoch": 0.63, "learning_rate": 0.00027380932023810553, "loss": 3.5783, "step": 2041 }, { "epoch": 0.63, "learning_rate": 0.0002737736911788683, "loss": 3.6455, "step": 2042 }, { "epoch": 0.63, "learning_rate": 0.0002737380621196311, "loss": 3.6768, "step": 2043 }, { "epoch": 0.63, "learning_rate": 0.0002737024330603939, "loss": 3.5912, "step": 2044 }, { "epoch": 0.63, "learning_rate": 0.00027366680400115666, "loss": 3.7213, "step": 2045 }, { "epoch": 0.63, "learning_rate": 0.0002736311749419194, "loss": 3.1424, "step": 2046 }, { "epoch": 0.63, "learning_rate": 0.0002735955458826822, "loss": 3.5562, "step": 2047 }, { "epoch": 0.63, "learning_rate": 0.00027355991682344495, "loss": 3.27, "step": 2048 }, { "epoch": 0.63, "learning_rate": 0.00027352428776420773, "loss": 2.9097, "step": 2049 }, { "epoch": 0.63, "learning_rate": 0.0002734886587049705, "loss": 2.9704, "step": 2050 }, { "epoch": 0.63, "learning_rate": 0.00027345302964573324, "loss": 5.5976, "step": 2051 }, { "epoch": 0.63, "learning_rate": 0.0002734174005864961, "loss": 5.1167, "step": 2052 }, { "epoch": 0.63, "learning_rate": 0.0002733817715272588, "loss": 5.0992, "step": 2053 }, { "epoch": 0.63, "learning_rate": 0.0002733461424680216, "loss": 4.8379, "step": 2054 }, { "epoch": 0.63, "learning_rate": 0.00027331051340878437, "loss": 4.8975, "step": 2055 }, { "epoch": 0.63, "learning_rate": 0.0002732748843495471, "loss": 4.4934, "step": 2056 }, { "epoch": 0.63, "learning_rate": 0.0002732392552903099, "loss": 4.6695, "step": 2057 }, { "epoch": 0.64, "learning_rate": 0.00027320362623107266, "loss": 4.893, "step": 2058 }, { "epoch": 0.64, "learning_rate": 0.00027316799717183544, "loss": 4.4798, "step": 2059 }, { "epoch": 0.64, "learning_rate": 0.0002731323681125982, "loss": 4.5974, "step": 2060 }, { "epoch": 0.64, "learning_rate": 0.00027309673905336095, "loss": 4.4924, "step": 2061 }, { "epoch": 0.64, "learning_rate": 0.00027306110999412373, "loss": 4.3781, "step": 2062 }, { "epoch": 0.64, "learning_rate": 0.0002730254809348865, "loss": 4.2143, "step": 2063 }, { "epoch": 0.64, "learning_rate": 0.0002729898518756493, "loss": 4.5611, "step": 2064 }, { "epoch": 0.64, "learning_rate": 0.0002729542228164121, "loss": 4.8, "step": 2065 }, { "epoch": 0.64, "learning_rate": 0.0002729185937571748, "loss": 4.4736, "step": 2066 }, { "epoch": 0.64, "learning_rate": 0.0002728829646979376, "loss": 4.7248, "step": 2067 }, { "epoch": 0.64, "learning_rate": 0.00027284733563870037, "loss": 4.694, "step": 2068 }, { "epoch": 0.64, "learning_rate": 0.00027281170657946315, "loss": 4.6907, "step": 2069 }, { "epoch": 0.64, "learning_rate": 0.0002727760775202259, "loss": 4.3891, "step": 2070 }, { "epoch": 0.64, "learning_rate": 0.0002727404484609887, "loss": 4.5203, "step": 2071 }, { "epoch": 0.64, "learning_rate": 0.00027270481940175144, "loss": 4.2891, "step": 2072 }, { "epoch": 0.64, "learning_rate": 0.0002726691903425142, "loss": 4.407, "step": 2073 }, { "epoch": 0.64, "learning_rate": 0.000272633561283277, "loss": 4.1381, "step": 2074 }, { "epoch": 0.64, "learning_rate": 0.00027259793222403974, "loss": 4.4153, "step": 2075 }, { "epoch": 0.64, "learning_rate": 0.00027256230316480257, "loss": 4.4035, "step": 2076 }, { "epoch": 0.64, "learning_rate": 0.0002725266741055653, "loss": 4.3938, "step": 2077 }, { "epoch": 0.64, "learning_rate": 0.0002724910450463281, "loss": 4.2848, "step": 2078 }, { "epoch": 0.64, "learning_rate": 0.00027245541598709086, "loss": 4.5552, "step": 2079 }, { "epoch": 0.64, "learning_rate": 0.0002724197869278536, "loss": 4.2738, "step": 2080 }, { "epoch": 0.64, "learning_rate": 0.0002723841578686164, "loss": 4.3799, "step": 2081 }, { "epoch": 0.64, "learning_rate": 0.00027234852880937915, "loss": 4.2782, "step": 2082 }, { "epoch": 0.64, "learning_rate": 0.00027231289975014194, "loss": 4.0154, "step": 2083 }, { "epoch": 0.64, "learning_rate": 0.0002722772706909047, "loss": 4.1351, "step": 2084 }, { "epoch": 0.64, "learning_rate": 0.00027224164163166745, "loss": 4.1901, "step": 2085 }, { "epoch": 0.64, "learning_rate": 0.0002722060125724303, "loss": 4.1203, "step": 2086 }, { "epoch": 0.64, "learning_rate": 0.000272170383513193, "loss": 3.8366, "step": 2087 }, { "epoch": 0.64, "learning_rate": 0.0002721347544539558, "loss": 3.8378, "step": 2088 }, { "epoch": 0.64, "learning_rate": 0.0002720991253947186, "loss": 4.0113, "step": 2089 }, { "epoch": 0.65, "learning_rate": 0.00027206349633548136, "loss": 4.1015, "step": 2090 }, { "epoch": 0.65, "learning_rate": 0.0002720278672762441, "loss": 3.9593, "step": 2091 }, { "epoch": 0.65, "learning_rate": 0.00027199223821700686, "loss": 3.5824, "step": 2092 }, { "epoch": 0.65, "learning_rate": 0.00027195660915776965, "loss": 3.7761, "step": 2093 }, { "epoch": 0.65, "learning_rate": 0.00027192098009853243, "loss": 3.7241, "step": 2094 }, { "epoch": 0.65, "learning_rate": 0.0002718853510392952, "loss": 3.4441, "step": 2095 }, { "epoch": 0.65, "learning_rate": 0.00027184972198005794, "loss": 3.165, "step": 2096 }, { "epoch": 0.65, "learning_rate": 0.0002718140929208207, "loss": 3.0505, "step": 2097 }, { "epoch": 0.65, "learning_rate": 0.0002717784638615835, "loss": 3.194, "step": 2098 }, { "epoch": 0.65, "learning_rate": 0.0002717428348023463, "loss": 2.8755, "step": 2099 }, { "epoch": 0.65, "learning_rate": 0.00027170720574310907, "loss": 2.7077, "step": 2100 }, { "epoch": 0.65, "learning_rate": 0.0002716715766838718, "loss": 5.6802, "step": 2101 }, { "epoch": 0.65, "learning_rate": 0.0002716359476246346, "loss": 5.4096, "step": 2102 }, { "epoch": 0.65, "learning_rate": 0.00027160031856539736, "loss": 5.338, "step": 2103 }, { "epoch": 0.65, "learning_rate": 0.0002715646895061601, "loss": 5.325, "step": 2104 }, { "epoch": 0.65, "learning_rate": 0.0002715290604469229, "loss": 5.099, "step": 2105 }, { "epoch": 0.65, "learning_rate": 0.00027149343138768565, "loss": 4.8985, "step": 2106 }, { "epoch": 0.65, "learning_rate": 0.00027145780232844843, "loss": 4.9076, "step": 2107 }, { "epoch": 0.65, "learning_rate": 0.0002714221732692112, "loss": 4.5141, "step": 2108 }, { "epoch": 0.65, "learning_rate": 0.00027138654420997394, "loss": 4.7797, "step": 2109 }, { "epoch": 0.65, "learning_rate": 0.0002713509151507368, "loss": 4.4077, "step": 2110 }, { "epoch": 0.65, "learning_rate": 0.0002713152860914995, "loss": 4.6801, "step": 2111 }, { "epoch": 0.65, "learning_rate": 0.0002712796570322623, "loss": 4.7318, "step": 2112 }, { "epoch": 0.65, "learning_rate": 0.00027124402797302507, "loss": 4.5695, "step": 2113 }, { "epoch": 0.65, "learning_rate": 0.00027120839891378785, "loss": 4.4002, "step": 2114 }, { "epoch": 0.65, "learning_rate": 0.00027117276985455063, "loss": 4.5413, "step": 2115 }, { "epoch": 0.65, "learning_rate": 0.00027113714079531336, "loss": 4.4605, "step": 2116 }, { "epoch": 0.65, "learning_rate": 0.00027110151173607614, "loss": 4.5996, "step": 2117 }, { "epoch": 0.65, "learning_rate": 0.0002710658826768389, "loss": 4.4574, "step": 2118 }, { "epoch": 0.65, "learning_rate": 0.0002710302536176017, "loss": 4.5211, "step": 2119 }, { "epoch": 0.65, "learning_rate": 0.0002709946245583645, "loss": 4.6674, "step": 2120 }, { "epoch": 0.65, "learning_rate": 0.0002709589954991272, "loss": 4.5588, "step": 2121 }, { "epoch": 0.65, "learning_rate": 0.00027092336643989, "loss": 4.7214, "step": 2122 }, { "epoch": 0.66, "learning_rate": 0.0002708877373806528, "loss": 4.336, "step": 2123 }, { "epoch": 0.66, "learning_rate": 0.00027085210832141556, "loss": 4.4832, "step": 2124 }, { "epoch": 0.66, "learning_rate": 0.00027081647926217834, "loss": 4.2008, "step": 2125 }, { "epoch": 0.66, "learning_rate": 0.00027078085020294107, "loss": 4.2849, "step": 2126 }, { "epoch": 0.66, "learning_rate": 0.00027074522114370385, "loss": 4.3505, "step": 2127 }, { "epoch": 0.66, "learning_rate": 0.00027070959208446663, "loss": 4.2519, "step": 2128 }, { "epoch": 0.66, "learning_rate": 0.0002706739630252294, "loss": 4.0948, "step": 2129 }, { "epoch": 0.66, "learning_rate": 0.00027063833396599214, "loss": 4.1339, "step": 2130 }, { "epoch": 0.66, "learning_rate": 0.0002706027049067549, "loss": 4.3383, "step": 2131 }, { "epoch": 0.66, "learning_rate": 0.0002705670758475177, "loss": 4.2567, "step": 2132 }, { "epoch": 0.66, "learning_rate": 0.0002705314467882805, "loss": 4.3303, "step": 2133 }, { "epoch": 0.66, "learning_rate": 0.00027049581772904327, "loss": 4.1213, "step": 2134 }, { "epoch": 0.66, "learning_rate": 0.000270460188669806, "loss": 3.9718, "step": 2135 }, { "epoch": 0.66, "learning_rate": 0.0002704245596105688, "loss": 4.0548, "step": 2136 }, { "epoch": 0.66, "learning_rate": 0.00027038893055133156, "loss": 4.0638, "step": 2137 }, { "epoch": 0.66, "learning_rate": 0.00027035330149209434, "loss": 3.8733, "step": 2138 }, { "epoch": 0.66, "learning_rate": 0.0002703176724328571, "loss": 3.7628, "step": 2139 }, { "epoch": 0.66, "learning_rate": 0.00027028204337361985, "loss": 3.8237, "step": 2140 }, { "epoch": 0.66, "learning_rate": 0.0002702464143143827, "loss": 3.6339, "step": 2141 }, { "epoch": 0.66, "learning_rate": 0.0002702107852551454, "loss": 3.7266, "step": 2142 }, { "epoch": 0.66, "learning_rate": 0.0002701751561959082, "loss": 3.5038, "step": 2143 }, { "epoch": 0.66, "learning_rate": 0.000270139527136671, "loss": 3.8761, "step": 2144 }, { "epoch": 0.66, "learning_rate": 0.0002701038980774337, "loss": 3.3854, "step": 2145 }, { "epoch": 0.66, "learning_rate": 0.00027006826901819654, "loss": 2.971, "step": 2146 }, { "epoch": 0.66, "learning_rate": 0.00027003263995895927, "loss": 3.2835, "step": 2147 }, { "epoch": 0.66, "learning_rate": 0.00026999701089972205, "loss": 3.215, "step": 2148 }, { "epoch": 0.66, "learning_rate": 0.00026996138184048484, "loss": 2.8101, "step": 2149 }, { "epoch": 0.66, "learning_rate": 0.00026992575278124756, "loss": 2.6127, "step": 2150 }, { "epoch": 0.66, "learning_rate": 0.00026989012372201034, "loss": 5.4214, "step": 2151 }, { "epoch": 0.66, "learning_rate": 0.0002698544946627731, "loss": 5.1861, "step": 2152 }, { "epoch": 0.66, "learning_rate": 0.0002698188656035359, "loss": 5.1411, "step": 2153 }, { "epoch": 0.66, "learning_rate": 0.0002697832365442987, "loss": 4.9494, "step": 2154 }, { "epoch": 0.67, "learning_rate": 0.0002697476074850614, "loss": 4.8036, "step": 2155 }, { "epoch": 0.67, "learning_rate": 0.0002697119784258242, "loss": 4.7427, "step": 2156 }, { "epoch": 0.67, "learning_rate": 0.000269676349366587, "loss": 5.1107, "step": 2157 }, { "epoch": 0.67, "learning_rate": 0.00026964072030734976, "loss": 5.0204, "step": 2158 }, { "epoch": 0.67, "learning_rate": 0.00026960509124811255, "loss": 4.4267, "step": 2159 }, { "epoch": 0.67, "learning_rate": 0.00026956946218887533, "loss": 4.8836, "step": 2160 }, { "epoch": 0.67, "learning_rate": 0.00026953383312963806, "loss": 4.404, "step": 2161 }, { "epoch": 0.67, "learning_rate": 0.00026949820407040084, "loss": 4.6778, "step": 2162 }, { "epoch": 0.67, "learning_rate": 0.0002694625750111636, "loss": 4.4312, "step": 2163 }, { "epoch": 0.67, "learning_rate": 0.00026942694595192635, "loss": 4.7257, "step": 2164 }, { "epoch": 0.67, "learning_rate": 0.0002693913168926892, "loss": 4.5319, "step": 2165 }, { "epoch": 0.67, "learning_rate": 0.0002693556878334519, "loss": 4.5441, "step": 2166 }, { "epoch": 0.67, "learning_rate": 0.0002693200587742147, "loss": 4.4688, "step": 2167 }, { "epoch": 0.67, "learning_rate": 0.0002692844297149775, "loss": 4.2892, "step": 2168 }, { "epoch": 0.67, "learning_rate": 0.0002692488006557402, "loss": 4.3047, "step": 2169 }, { "epoch": 0.67, "learning_rate": 0.00026921317159650304, "loss": 4.7218, "step": 2170 }, { "epoch": 0.67, "learning_rate": 0.00026917754253726577, "loss": 4.3161, "step": 2171 }, { "epoch": 0.67, "learning_rate": 0.00026914191347802855, "loss": 4.4661, "step": 2172 }, { "epoch": 0.67, "learning_rate": 0.00026910628441879133, "loss": 4.4211, "step": 2173 }, { "epoch": 0.67, "learning_rate": 0.00026907065535955406, "loss": 4.5731, "step": 2174 }, { "epoch": 0.67, "learning_rate": 0.0002690350263003169, "loss": 4.2675, "step": 2175 }, { "epoch": 0.67, "learning_rate": 0.0002689993972410796, "loss": 4.3119, "step": 2176 }, { "epoch": 0.67, "learning_rate": 0.0002689637681818424, "loss": 4.4024, "step": 2177 }, { "epoch": 0.67, "learning_rate": 0.0002689281391226052, "loss": 4.2476, "step": 2178 }, { "epoch": 0.67, "learning_rate": 0.0002688925100633679, "loss": 4.1693, "step": 2179 }, { "epoch": 0.67, "learning_rate": 0.00026885688100413075, "loss": 4.1901, "step": 2180 }, { "epoch": 0.67, "learning_rate": 0.0002688212519448935, "loss": 4.2276, "step": 2181 }, { "epoch": 0.67, "learning_rate": 0.00026878562288565626, "loss": 3.9802, "step": 2182 }, { "epoch": 0.67, "learning_rate": 0.00026874999382641904, "loss": 3.8448, "step": 2183 }, { "epoch": 0.67, "learning_rate": 0.0002687143647671818, "loss": 3.92, "step": 2184 }, { "epoch": 0.67, "learning_rate": 0.00026867873570794455, "loss": 4.1696, "step": 2185 }, { "epoch": 0.67, "learning_rate": 0.00026864310664870733, "loss": 4.2616, "step": 2186 }, { "epoch": 0.67, "learning_rate": 0.0002686074775894701, "loss": 4.0214, "step": 2187 }, { "epoch": 0.68, "learning_rate": 0.0002685718485302329, "loss": 4.1242, "step": 2188 }, { "epoch": 0.68, "learning_rate": 0.0002685362194709957, "loss": 3.8292, "step": 2189 }, { "epoch": 0.68, "learning_rate": 0.0002685005904117584, "loss": 3.8482, "step": 2190 }, { "epoch": 0.68, "learning_rate": 0.0002684649613525212, "loss": 3.6833, "step": 2191 }, { "epoch": 0.68, "learning_rate": 0.00026842933229328397, "loss": 3.6177, "step": 2192 }, { "epoch": 0.68, "learning_rate": 0.00026839370323404675, "loss": 3.1868, "step": 2193 }, { "epoch": 0.68, "learning_rate": 0.00026835807417480953, "loss": 3.5675, "step": 2194 }, { "epoch": 0.68, "learning_rate": 0.00026832244511557226, "loss": 3.4567, "step": 2195 }, { "epoch": 0.68, "learning_rate": 0.00026828681605633504, "loss": 3.2616, "step": 2196 }, { "epoch": 0.68, "learning_rate": 0.0002682511869970978, "loss": 3.1958, "step": 2197 }, { "epoch": 0.68, "learning_rate": 0.00026821555793786055, "loss": 3.01, "step": 2198 }, { "epoch": 0.68, "learning_rate": 0.0002681799288786234, "loss": 2.9395, "step": 2199 }, { "epoch": 0.68, "learning_rate": 0.0002681442998193861, "loss": 2.8252, "step": 2200 }, { "epoch": 0.68, "learning_rate": 0.0002681086707601489, "loss": 5.7191, "step": 2201 }, { "epoch": 0.68, "learning_rate": 0.0002680730417009117, "loss": 5.8459, "step": 2202 }, { "epoch": 0.68, "learning_rate": 0.0002680374126416744, "loss": 5.5128, "step": 2203 }, { "epoch": 0.68, "learning_rate": 0.00026800178358243724, "loss": 5.4134, "step": 2204 }, { "epoch": 0.68, "learning_rate": 0.00026796615452319997, "loss": 5.1334, "step": 2205 }, { "epoch": 0.68, "learning_rate": 0.00026793052546396275, "loss": 4.7954, "step": 2206 }, { "epoch": 0.68, "learning_rate": 0.00026789489640472553, "loss": 4.5874, "step": 2207 }, { "epoch": 0.68, "learning_rate": 0.0002678592673454883, "loss": 4.8741, "step": 2208 }, { "epoch": 0.68, "learning_rate": 0.0002678236382862511, "loss": 4.8996, "step": 2209 }, { "epoch": 0.68, "learning_rate": 0.0002677880092270138, "loss": 4.6359, "step": 2210 }, { "epoch": 0.68, "learning_rate": 0.0002677523801677766, "loss": 4.669, "step": 2211 }, { "epoch": 0.68, "learning_rate": 0.0002677167511085394, "loss": 4.497, "step": 2212 }, { "epoch": 0.68, "learning_rate": 0.00026768112204930217, "loss": 4.7719, "step": 2213 }, { "epoch": 0.68, "learning_rate": 0.00026764549299006495, "loss": 4.7294, "step": 2214 }, { "epoch": 0.68, "learning_rate": 0.0002676098639308277, "loss": 4.7212, "step": 2215 }, { "epoch": 0.68, "learning_rate": 0.00026757423487159046, "loss": 4.5129, "step": 2216 }, { "epoch": 0.68, "learning_rate": 0.00026753860581235324, "loss": 4.5215, "step": 2217 }, { "epoch": 0.68, "learning_rate": 0.000267502976753116, "loss": 4.4539, "step": 2218 }, { "epoch": 0.68, "learning_rate": 0.00026746734769387875, "loss": 4.1164, "step": 2219 }, { "epoch": 0.69, "learning_rate": 0.00026743171863464154, "loss": 4.2726, "step": 2220 }, { "epoch": 0.69, "learning_rate": 0.0002673960895754043, "loss": 4.3926, "step": 2221 }, { "epoch": 0.69, "learning_rate": 0.0002673604605161671, "loss": 4.5402, "step": 2222 }, { "epoch": 0.69, "learning_rate": 0.0002673248314569299, "loss": 4.6661, "step": 2223 }, { "epoch": 0.69, "learning_rate": 0.0002672892023976926, "loss": 4.3806, "step": 2224 }, { "epoch": 0.69, "learning_rate": 0.0002672535733384554, "loss": 4.3723, "step": 2225 }, { "epoch": 0.69, "learning_rate": 0.00026721794427921817, "loss": 4.3477, "step": 2226 }, { "epoch": 0.69, "learning_rate": 0.00026718231521998095, "loss": 4.3915, "step": 2227 }, { "epoch": 0.69, "learning_rate": 0.00026714668616074374, "loss": 4.4253, "step": 2228 }, { "epoch": 0.69, "learning_rate": 0.00026711105710150646, "loss": 4.0907, "step": 2229 }, { "epoch": 0.69, "learning_rate": 0.00026707542804226925, "loss": 4.1971, "step": 2230 }, { "epoch": 0.69, "learning_rate": 0.00026703979898303203, "loss": 4.3432, "step": 2231 }, { "epoch": 0.69, "learning_rate": 0.0002670041699237948, "loss": 4.2279, "step": 2232 }, { "epoch": 0.69, "learning_rate": 0.0002669685408645576, "loss": 4.2818, "step": 2233 }, { "epoch": 0.69, "learning_rate": 0.0002669329118053203, "loss": 4.1163, "step": 2234 }, { "epoch": 0.69, "learning_rate": 0.00026689728274608315, "loss": 4.1745, "step": 2235 }, { "epoch": 0.69, "learning_rate": 0.0002668616536868459, "loss": 4.1069, "step": 2236 }, { "epoch": 0.69, "learning_rate": 0.00026682602462760866, "loss": 3.875, "step": 2237 }, { "epoch": 0.69, "learning_rate": 0.00026679039556837145, "loss": 3.6572, "step": 2238 }, { "epoch": 0.69, "learning_rate": 0.0002667547665091342, "loss": 3.9336, "step": 2239 }, { "epoch": 0.69, "learning_rate": 0.000266719137449897, "loss": 4.0039, "step": 2240 }, { "epoch": 0.69, "learning_rate": 0.00026668350839065974, "loss": 4.0218, "step": 2241 }, { "epoch": 0.69, "learning_rate": 0.0002666478793314225, "loss": 3.8118, "step": 2242 }, { "epoch": 0.69, "learning_rate": 0.0002666122502721853, "loss": 3.5175, "step": 2243 }, { "epoch": 0.69, "learning_rate": 0.00026657662121294803, "loss": 3.8809, "step": 2244 }, { "epoch": 0.69, "learning_rate": 0.0002665409921537108, "loss": 3.3924, "step": 2245 }, { "epoch": 0.69, "learning_rate": 0.0002665053630944736, "loss": 3.1747, "step": 2246 }, { "epoch": 0.69, "learning_rate": 0.0002664697340352364, "loss": 3.3114, "step": 2247 }, { "epoch": 0.69, "learning_rate": 0.00026643410497599916, "loss": 3.09, "step": 2248 }, { "epoch": 0.69, "learning_rate": 0.0002663984759167619, "loss": 2.9028, "step": 2249 }, { "epoch": 0.69, "learning_rate": 0.00026636284685752467, "loss": 2.7627, "step": 2250 }, { "epoch": 0.69, "learning_rate": 0.00026632721779828745, "loss": 5.8523, "step": 2251 }, { "epoch": 0.7, "learning_rate": 0.00026629158873905023, "loss": 5.5851, "step": 2252 }, { "epoch": 0.7, "learning_rate": 0.00026625595967981296, "loss": 5.082, "step": 2253 }, { "epoch": 0.7, "learning_rate": 0.0002662203306205758, "loss": 5.3091, "step": 2254 }, { "epoch": 0.7, "learning_rate": 0.0002661847015613385, "loss": 4.8769, "step": 2255 }, { "epoch": 0.7, "learning_rate": 0.0002661490725021013, "loss": 4.5123, "step": 2256 }, { "epoch": 0.7, "learning_rate": 0.0002661134434428641, "loss": 4.4974, "step": 2257 }, { "epoch": 0.7, "learning_rate": 0.0002660778143836268, "loss": 4.5921, "step": 2258 }, { "epoch": 0.7, "learning_rate": 0.00026604218532438965, "loss": 4.5983, "step": 2259 }, { "epoch": 0.7, "learning_rate": 0.0002660065562651524, "loss": 4.7629, "step": 2260 }, { "epoch": 0.7, "learning_rate": 0.00026597092720591516, "loss": 4.587, "step": 2261 }, { "epoch": 0.7, "learning_rate": 0.00026593529814667794, "loss": 4.5702, "step": 2262 }, { "epoch": 0.7, "learning_rate": 0.00026589966908744067, "loss": 4.2578, "step": 2263 }, { "epoch": 0.7, "learning_rate": 0.0002658640400282035, "loss": 4.5776, "step": 2264 }, { "epoch": 0.7, "learning_rate": 0.00026582841096896623, "loss": 4.3071, "step": 2265 }, { "epoch": 0.7, "learning_rate": 0.000265792781909729, "loss": 4.5268, "step": 2266 }, { "epoch": 0.7, "learning_rate": 0.0002657571528504918, "loss": 4.6491, "step": 2267 }, { "epoch": 0.7, "learning_rate": 0.0002657215237912545, "loss": 4.4708, "step": 2268 }, { "epoch": 0.7, "learning_rate": 0.00026568589473201736, "loss": 4.331, "step": 2269 }, { "epoch": 0.7, "learning_rate": 0.0002656502656727801, "loss": 4.7472, "step": 2270 }, { "epoch": 0.7, "learning_rate": 0.00026561463661354287, "loss": 4.4351, "step": 2271 }, { "epoch": 0.7, "learning_rate": 0.00026557900755430565, "loss": 4.1651, "step": 2272 }, { "epoch": 0.7, "learning_rate": 0.0002655433784950684, "loss": 4.0908, "step": 2273 }, { "epoch": 0.7, "learning_rate": 0.0002655077494358312, "loss": 4.3298, "step": 2274 }, { "epoch": 0.7, "learning_rate": 0.00026547212037659394, "loss": 4.4526, "step": 2275 }, { "epoch": 0.7, "learning_rate": 0.0002654364913173567, "loss": 4.4924, "step": 2276 }, { "epoch": 0.7, "learning_rate": 0.0002654008622581195, "loss": 4.2038, "step": 2277 }, { "epoch": 0.7, "learning_rate": 0.0002653652331988823, "loss": 4.375, "step": 2278 }, { "epoch": 0.7, "learning_rate": 0.000265329604139645, "loss": 4.2648, "step": 2279 }, { "epoch": 0.7, "learning_rate": 0.0002652939750804078, "loss": 4.2919, "step": 2280 }, { "epoch": 0.7, "learning_rate": 0.0002652583460211706, "loss": 3.9786, "step": 2281 }, { "epoch": 0.7, "learning_rate": 0.00026522271696193336, "loss": 4.1125, "step": 2282 }, { "epoch": 0.7, "learning_rate": 0.00026518708790269614, "loss": 4.0368, "step": 2283 }, { "epoch": 0.7, "learning_rate": 0.00026515145884345887, "loss": 4.0796, "step": 2284 }, { "epoch": 0.71, "learning_rate": 0.00026511582978422165, "loss": 4.2385, "step": 2285 }, { "epoch": 0.71, "learning_rate": 0.00026508020072498443, "loss": 3.9758, "step": 2286 }, { "epoch": 0.71, "learning_rate": 0.00026504457166574716, "loss": 4.1498, "step": 2287 }, { "epoch": 0.71, "learning_rate": 0.00026500894260651, "loss": 4.0593, "step": 2288 }, { "epoch": 0.71, "learning_rate": 0.0002649733135472727, "loss": 3.5912, "step": 2289 }, { "epoch": 0.71, "learning_rate": 0.0002649376844880355, "loss": 3.7663, "step": 2290 }, { "epoch": 0.71, "learning_rate": 0.0002649020554287983, "loss": 3.6628, "step": 2291 }, { "epoch": 0.71, "learning_rate": 0.000264866426369561, "loss": 3.6855, "step": 2292 }, { "epoch": 0.71, "learning_rate": 0.00026483079731032385, "loss": 3.9472, "step": 2293 }, { "epoch": 0.71, "learning_rate": 0.0002647951682510866, "loss": 3.5427, "step": 2294 }, { "epoch": 0.71, "learning_rate": 0.00026475953919184936, "loss": 3.2918, "step": 2295 }, { "epoch": 0.71, "learning_rate": 0.00026472391013261214, "loss": 3.4156, "step": 2296 }, { "epoch": 0.71, "learning_rate": 0.0002646882810733749, "loss": 3.2335, "step": 2297 }, { "epoch": 0.71, "learning_rate": 0.0002646526520141377, "loss": 3.1201, "step": 2298 }, { "epoch": 0.71, "learning_rate": 0.00026461702295490044, "loss": 2.8926, "step": 2299 }, { "epoch": 0.71, "learning_rate": 0.0002645813938956632, "loss": 2.8963, "step": 2300 }, { "epoch": 0.71, "learning_rate": 0.000264545764836426, "loss": 5.5156, "step": 2301 }, { "epoch": 0.71, "learning_rate": 0.0002645101357771888, "loss": 5.4828, "step": 2302 }, { "epoch": 0.71, "learning_rate": 0.00026447450671795156, "loss": 4.8972, "step": 2303 }, { "epoch": 0.71, "learning_rate": 0.0002644388776587143, "loss": 4.7764, "step": 2304 }, { "epoch": 0.71, "learning_rate": 0.0002644032485994771, "loss": 4.6781, "step": 2305 }, { "epoch": 0.71, "learning_rate": 0.00026436761954023985, "loss": 4.5116, "step": 2306 }, { "epoch": 0.71, "learning_rate": 0.00026433199048100264, "loss": 4.7391, "step": 2307 }, { "epoch": 0.71, "learning_rate": 0.0002642963614217654, "loss": 4.7079, "step": 2308 }, { "epoch": 0.71, "learning_rate": 0.00026426073236252815, "loss": 4.6104, "step": 2309 }, { "epoch": 0.71, "learning_rate": 0.00026422510330329093, "loss": 4.5504, "step": 2310 }, { "epoch": 0.71, "learning_rate": 0.0002641894742440537, "loss": 4.7064, "step": 2311 }, { "epoch": 0.71, "learning_rate": 0.0002641538451848165, "loss": 4.544, "step": 2312 }, { "epoch": 0.71, "learning_rate": 0.0002641182161255792, "loss": 4.3515, "step": 2313 }, { "epoch": 0.71, "learning_rate": 0.000264082587066342, "loss": 4.3783, "step": 2314 }, { "epoch": 0.71, "learning_rate": 0.0002640469580071048, "loss": 4.6333, "step": 2315 }, { "epoch": 0.71, "learning_rate": 0.00026401132894786757, "loss": 4.5166, "step": 2316 }, { "epoch": 0.72, "learning_rate": 0.00026397569988863035, "loss": 4.4685, "step": 2317 }, { "epoch": 0.72, "learning_rate": 0.0002639400708293931, "loss": 4.5128, "step": 2318 }, { "epoch": 0.72, "learning_rate": 0.00026390444177015586, "loss": 4.6736, "step": 2319 }, { "epoch": 0.72, "learning_rate": 0.00026386881271091864, "loss": 4.3192, "step": 2320 }, { "epoch": 0.72, "learning_rate": 0.0002638331836516814, "loss": 4.5259, "step": 2321 }, { "epoch": 0.72, "learning_rate": 0.0002637975545924442, "loss": 4.4803, "step": 2322 }, { "epoch": 0.72, "learning_rate": 0.00026376192553320693, "loss": 4.4336, "step": 2323 }, { "epoch": 0.72, "learning_rate": 0.0002637262964739697, "loss": 4.2775, "step": 2324 }, { "epoch": 0.72, "learning_rate": 0.0002636906674147325, "loss": 4.4003, "step": 2325 }, { "epoch": 0.72, "learning_rate": 0.0002636550383554953, "loss": 4.3128, "step": 2326 }, { "epoch": 0.72, "learning_rate": 0.00026361940929625806, "loss": 4.1117, "step": 2327 }, { "epoch": 0.72, "learning_rate": 0.0002635837802370208, "loss": 4.1381, "step": 2328 }, { "epoch": 0.72, "learning_rate": 0.0002635481511777836, "loss": 4.188, "step": 2329 }, { "epoch": 0.72, "learning_rate": 0.00026351252211854635, "loss": 4.2737, "step": 2330 }, { "epoch": 0.72, "learning_rate": 0.00026347689305930913, "loss": 3.9289, "step": 2331 }, { "epoch": 0.72, "learning_rate": 0.0002634412640000719, "loss": 4.5693, "step": 2332 }, { "epoch": 0.72, "learning_rate": 0.00026340563494083464, "loss": 4.2732, "step": 2333 }, { "epoch": 0.72, "learning_rate": 0.0002633700058815974, "loss": 4.089, "step": 2334 }, { "epoch": 0.72, "learning_rate": 0.0002633343768223602, "loss": 4.2653, "step": 2335 }, { "epoch": 0.72, "learning_rate": 0.000263298747763123, "loss": 4.293, "step": 2336 }, { "epoch": 0.72, "learning_rate": 0.00026326311870388577, "loss": 4.1921, "step": 2337 }, { "epoch": 0.72, "learning_rate": 0.0002632274896446485, "loss": 3.8543, "step": 2338 }, { "epoch": 0.72, "learning_rate": 0.0002631918605854113, "loss": 3.7271, "step": 2339 }, { "epoch": 0.72, "learning_rate": 0.00026315623152617406, "loss": 4.192, "step": 2340 }, { "epoch": 0.72, "learning_rate": 0.00026312060246693684, "loss": 3.6994, "step": 2341 }, { "epoch": 0.72, "learning_rate": 0.0002630849734076996, "loss": 3.5377, "step": 2342 }, { "epoch": 0.72, "learning_rate": 0.00026304934434846235, "loss": 3.7842, "step": 2343 }, { "epoch": 0.72, "learning_rate": 0.00026301371528922513, "loss": 3.7985, "step": 2344 }, { "epoch": 0.72, "learning_rate": 0.0002629780862299879, "loss": 3.482, "step": 2345 }, { "epoch": 0.72, "learning_rate": 0.0002629424571707507, "loss": 3.3045, "step": 2346 }, { "epoch": 0.72, "learning_rate": 0.0002629068281115134, "loss": 3.4809, "step": 2347 }, { "epoch": 0.72, "learning_rate": 0.00026287119905227626, "loss": 3.0863, "step": 2348 }, { "epoch": 0.72, "learning_rate": 0.000262835569993039, "loss": 3.2275, "step": 2349 }, { "epoch": 0.73, "learning_rate": 0.00026279994093380177, "loss": 2.9093, "step": 2350 }, { "epoch": 0.73, "learning_rate": 0.00026276431187456455, "loss": 5.4127, "step": 2351 }, { "epoch": 0.73, "learning_rate": 0.0002627286828153273, "loss": 5.5323, "step": 2352 }, { "epoch": 0.73, "learning_rate": 0.0002626930537560901, "loss": 5.1589, "step": 2353 }, { "epoch": 0.73, "learning_rate": 0.00026265742469685284, "loss": 4.807, "step": 2354 }, { "epoch": 0.73, "learning_rate": 0.0002626217956376156, "loss": 4.8033, "step": 2355 }, { "epoch": 0.73, "learning_rate": 0.0002625861665783784, "loss": 4.5973, "step": 2356 }, { "epoch": 0.73, "learning_rate": 0.00026255053751914113, "loss": 4.8542, "step": 2357 }, { "epoch": 0.73, "learning_rate": 0.00026251490845990397, "loss": 4.3649, "step": 2358 }, { "epoch": 0.73, "learning_rate": 0.0002624792794006667, "loss": 4.8726, "step": 2359 }, { "epoch": 0.73, "learning_rate": 0.0002624436503414295, "loss": 4.5285, "step": 2360 }, { "epoch": 0.73, "learning_rate": 0.00026240802128219226, "loss": 4.6399, "step": 2361 }, { "epoch": 0.73, "learning_rate": 0.000262372392222955, "loss": 4.4844, "step": 2362 }, { "epoch": 0.73, "learning_rate": 0.0002623367631637178, "loss": 4.4909, "step": 2363 }, { "epoch": 0.73, "learning_rate": 0.00026230113410448055, "loss": 4.6307, "step": 2364 }, { "epoch": 0.73, "learning_rate": 0.00026226550504524334, "loss": 4.2898, "step": 2365 }, { "epoch": 0.73, "learning_rate": 0.0002622298759860061, "loss": 4.683, "step": 2366 }, { "epoch": 0.73, "learning_rate": 0.00026219424692676884, "loss": 4.4114, "step": 2367 }, { "epoch": 0.73, "learning_rate": 0.0002621586178675316, "loss": 4.5216, "step": 2368 }, { "epoch": 0.73, "learning_rate": 0.0002621229888082944, "loss": 4.3767, "step": 2369 }, { "epoch": 0.73, "learning_rate": 0.0002620873597490572, "loss": 4.1361, "step": 2370 }, { "epoch": 0.73, "learning_rate": 0.00026205173068981997, "loss": 4.6045, "step": 2371 }, { "epoch": 0.73, "learning_rate": 0.00026201610163058275, "loss": 4.31, "step": 2372 }, { "epoch": 0.73, "learning_rate": 0.0002619804725713455, "loss": 4.4309, "step": 2373 }, { "epoch": 0.73, "learning_rate": 0.00026194484351210826, "loss": 4.1245, "step": 2374 }, { "epoch": 0.73, "learning_rate": 0.00026190921445287105, "loss": 4.061, "step": 2375 }, { "epoch": 0.73, "learning_rate": 0.00026187358539363383, "loss": 4.1325, "step": 2376 }, { "epoch": 0.73, "learning_rate": 0.0002618379563343966, "loss": 4.4997, "step": 2377 }, { "epoch": 0.73, "learning_rate": 0.00026180232727515934, "loss": 4.3111, "step": 2378 }, { "epoch": 0.73, "learning_rate": 0.0002617666982159221, "loss": 4.225, "step": 2379 }, { "epoch": 0.73, "learning_rate": 0.0002617310691566849, "loss": 4.1639, "step": 2380 }, { "epoch": 0.73, "learning_rate": 0.00026169544009744763, "loss": 4.3238, "step": 2381 }, { "epoch": 0.74, "learning_rate": 0.00026165981103821046, "loss": 4.0697, "step": 2382 }, { "epoch": 0.74, "learning_rate": 0.0002616241819789732, "loss": 4.0639, "step": 2383 }, { "epoch": 0.74, "learning_rate": 0.000261588552919736, "loss": 4.0016, "step": 2384 }, { "epoch": 0.74, "learning_rate": 0.00026155292386049876, "loss": 4.0495, "step": 2385 }, { "epoch": 0.74, "learning_rate": 0.0002615172948012615, "loss": 3.8519, "step": 2386 }, { "epoch": 0.74, "learning_rate": 0.0002614816657420243, "loss": 3.7634, "step": 2387 }, { "epoch": 0.74, "learning_rate": 0.00026144603668278705, "loss": 4.1468, "step": 2388 }, { "epoch": 0.74, "learning_rate": 0.00026141040762354983, "loss": 3.5792, "step": 2389 }, { "epoch": 0.74, "learning_rate": 0.0002613747785643126, "loss": 3.6677, "step": 2390 }, { "epoch": 0.74, "learning_rate": 0.0002613391495050754, "loss": 3.9524, "step": 2391 }, { "epoch": 0.74, "learning_rate": 0.0002613035204458382, "loss": 3.5922, "step": 2392 }, { "epoch": 0.74, "learning_rate": 0.0002612678913866009, "loss": 3.6583, "step": 2393 }, { "epoch": 0.74, "learning_rate": 0.0002612322623273637, "loss": 3.3502, "step": 2394 }, { "epoch": 0.74, "learning_rate": 0.00026119663326812647, "loss": 3.4239, "step": 2395 }, { "epoch": 0.74, "learning_rate": 0.00026116100420888925, "loss": 3.2053, "step": 2396 }, { "epoch": 0.74, "learning_rate": 0.00026112537514965203, "loss": 3.2898, "step": 2397 }, { "epoch": 0.74, "learning_rate": 0.00026108974609041476, "loss": 3.1544, "step": 2398 }, { "epoch": 0.74, "learning_rate": 0.00026105411703117754, "loss": 2.9352, "step": 2399 }, { "epoch": 0.74, "learning_rate": 0.0002610184879719403, "loss": 2.8481, "step": 2400 }, { "epoch": 0.74, "learning_rate": 0.0002609828589127031, "loss": 5.6219, "step": 2401 }, { "epoch": 0.74, "learning_rate": 0.00026094722985346583, "loss": 5.5077, "step": 2402 }, { "epoch": 0.74, "learning_rate": 0.0002609116007942286, "loss": 4.78, "step": 2403 }, { "epoch": 0.74, "learning_rate": 0.0002608759717349914, "loss": 4.8887, "step": 2404 }, { "epoch": 0.74, "learning_rate": 0.0002608403426757542, "loss": 4.7734, "step": 2405 }, { "epoch": 0.74, "learning_rate": 0.00026080471361651696, "loss": 4.4328, "step": 2406 }, { "epoch": 0.74, "learning_rate": 0.0002607690845572797, "loss": 4.9363, "step": 2407 }, { "epoch": 0.74, "learning_rate": 0.00026073345549804247, "loss": 4.3731, "step": 2408 }, { "epoch": 0.74, "learning_rate": 0.00026069782643880525, "loss": 4.7249, "step": 2409 }, { "epoch": 0.74, "learning_rate": 0.00026066219737956803, "loss": 4.8923, "step": 2410 }, { "epoch": 0.74, "learning_rate": 0.0002606265683203308, "loss": 4.5366, "step": 2411 }, { "epoch": 0.74, "learning_rate": 0.00026059093926109354, "loss": 4.4079, "step": 2412 }, { "epoch": 0.74, "learning_rate": 0.0002605553102018563, "loss": 4.2715, "step": 2413 }, { "epoch": 0.75, "learning_rate": 0.0002605196811426191, "loss": 4.7195, "step": 2414 }, { "epoch": 0.75, "learning_rate": 0.0002604840520833819, "loss": 4.5711, "step": 2415 }, { "epoch": 0.75, "learning_rate": 0.00026044842302414467, "loss": 4.6974, "step": 2416 }, { "epoch": 0.75, "learning_rate": 0.0002604127939649074, "loss": 4.4431, "step": 2417 }, { "epoch": 0.75, "learning_rate": 0.00026037716490567023, "loss": 4.5794, "step": 2418 }, { "epoch": 0.75, "learning_rate": 0.00026034153584643296, "loss": 4.2757, "step": 2419 }, { "epoch": 0.75, "learning_rate": 0.00026030590678719574, "loss": 4.1613, "step": 2420 }, { "epoch": 0.75, "learning_rate": 0.0002602702777279585, "loss": 4.4725, "step": 2421 }, { "epoch": 0.75, "learning_rate": 0.00026023464866872125, "loss": 4.1682, "step": 2422 }, { "epoch": 0.75, "learning_rate": 0.0002601990196094841, "loss": 4.5955, "step": 2423 }, { "epoch": 0.75, "learning_rate": 0.0002601633905502468, "loss": 4.2187, "step": 2424 }, { "epoch": 0.75, "learning_rate": 0.0002601277614910096, "loss": 4.3871, "step": 2425 }, { "epoch": 0.75, "learning_rate": 0.0002600921324317724, "loss": 4.1902, "step": 2426 }, { "epoch": 0.75, "learning_rate": 0.0002600565033725351, "loss": 4.5625, "step": 2427 }, { "epoch": 0.75, "learning_rate": 0.0002600208743132979, "loss": 4.3426, "step": 2428 }, { "epoch": 0.75, "learning_rate": 0.00025998524525406067, "loss": 4.1461, "step": 2429 }, { "epoch": 0.75, "learning_rate": 0.00025994961619482345, "loss": 4.3465, "step": 2430 }, { "epoch": 0.75, "learning_rate": 0.00025991398713558623, "loss": 4.4043, "step": 2431 }, { "epoch": 0.75, "learning_rate": 0.00025987835807634896, "loss": 4.1648, "step": 2432 }, { "epoch": 0.75, "learning_rate": 0.00025984272901711174, "loss": 4.0564, "step": 2433 }, { "epoch": 0.75, "learning_rate": 0.0002598070999578745, "loss": 3.9525, "step": 2434 }, { "epoch": 0.75, "learning_rate": 0.0002597714708986373, "loss": 4.1194, "step": 2435 }, { "epoch": 0.75, "learning_rate": 0.0002597358418394001, "loss": 4.1319, "step": 2436 }, { "epoch": 0.75, "learning_rate": 0.0002597002127801628, "loss": 3.736, "step": 2437 }, { "epoch": 0.75, "learning_rate": 0.0002596645837209256, "loss": 3.9117, "step": 2438 }, { "epoch": 0.75, "learning_rate": 0.0002596289546616884, "loss": 3.7947, "step": 2439 }, { "epoch": 0.75, "learning_rate": 0.00025959332560245116, "loss": 3.6747, "step": 2440 }, { "epoch": 0.75, "learning_rate": 0.0002595576965432139, "loss": 3.6864, "step": 2441 }, { "epoch": 0.75, "learning_rate": 0.0002595220674839767, "loss": 3.696, "step": 2442 }, { "epoch": 0.75, "learning_rate": 0.00025948643842473945, "loss": 3.9071, "step": 2443 }, { "epoch": 0.75, "learning_rate": 0.00025945080936550224, "loss": 3.1786, "step": 2444 }, { "epoch": 0.75, "learning_rate": 0.000259415180306265, "loss": 3.7053, "step": 2445 }, { "epoch": 0.75, "learning_rate": 0.00025937955124702775, "loss": 3.2856, "step": 2446 }, { "epoch": 0.76, "learning_rate": 0.0002593439221877906, "loss": 3.3361, "step": 2447 }, { "epoch": 0.76, "learning_rate": 0.0002593082931285533, "loss": 2.7972, "step": 2448 }, { "epoch": 0.76, "learning_rate": 0.0002592726640693161, "loss": 2.8921, "step": 2449 }, { "epoch": 0.76, "learning_rate": 0.00025923703501007887, "loss": 2.8109, "step": 2450 }, { "epoch": 0.76, "learning_rate": 0.0002592014059508416, "loss": 5.58, "step": 2451 }, { "epoch": 0.76, "learning_rate": 0.00025916577689160444, "loss": 5.2312, "step": 2452 }, { "epoch": 0.76, "learning_rate": 0.00025913014783236716, "loss": 5.116, "step": 2453 }, { "epoch": 0.76, "learning_rate": 0.00025909451877312995, "loss": 4.7117, "step": 2454 }, { "epoch": 0.76, "learning_rate": 0.00025905888971389273, "loss": 4.8747, "step": 2455 }, { "epoch": 0.76, "learning_rate": 0.00025902326065465546, "loss": 4.5118, "step": 2456 }, { "epoch": 0.76, "learning_rate": 0.0002589876315954183, "loss": 4.5341, "step": 2457 }, { "epoch": 0.76, "learning_rate": 0.000258952002536181, "loss": 4.8384, "step": 2458 }, { "epoch": 0.76, "learning_rate": 0.0002589163734769438, "loss": 4.6472, "step": 2459 }, { "epoch": 0.76, "learning_rate": 0.0002588807444177066, "loss": 4.4225, "step": 2460 }, { "epoch": 0.76, "learning_rate": 0.0002588451153584693, "loss": 4.7712, "step": 2461 }, { "epoch": 0.76, "learning_rate": 0.0002588094862992321, "loss": 4.5142, "step": 2462 }, { "epoch": 0.76, "learning_rate": 0.0002587738572399949, "loss": 4.3298, "step": 2463 }, { "epoch": 0.76, "learning_rate": 0.00025873822818075766, "loss": 4.2856, "step": 2464 }, { "epoch": 0.76, "learning_rate": 0.00025870259912152044, "loss": 4.5749, "step": 2465 }, { "epoch": 0.76, "learning_rate": 0.0002586669700622832, "loss": 4.1792, "step": 2466 }, { "epoch": 0.76, "learning_rate": 0.00025863134100304595, "loss": 4.3868, "step": 2467 }, { "epoch": 0.76, "learning_rate": 0.00025859571194380873, "loss": 4.3901, "step": 2468 }, { "epoch": 0.76, "learning_rate": 0.0002585600828845715, "loss": 4.3862, "step": 2469 }, { "epoch": 0.76, "learning_rate": 0.0002585244538253343, "loss": 4.2981, "step": 2470 }, { "epoch": 0.76, "learning_rate": 0.0002584888247660971, "loss": 4.4714, "step": 2471 }, { "epoch": 0.76, "learning_rate": 0.0002584531957068598, "loss": 4.19, "step": 2472 }, { "epoch": 0.76, "learning_rate": 0.0002584175666476226, "loss": 4.2361, "step": 2473 }, { "epoch": 0.76, "learning_rate": 0.00025838193758838537, "loss": 4.4165, "step": 2474 }, { "epoch": 0.76, "learning_rate": 0.0002583463085291481, "loss": 4.0266, "step": 2475 }, { "epoch": 0.76, "learning_rate": 0.00025831067946991093, "loss": 4.2148, "step": 2476 }, { "epoch": 0.76, "learning_rate": 0.00025827505041067366, "loss": 4.0271, "step": 2477 }, { "epoch": 0.76, "learning_rate": 0.00025823942135143644, "loss": 4.1752, "step": 2478 }, { "epoch": 0.77, "learning_rate": 0.0002582037922921992, "loss": 4.2301, "step": 2479 }, { "epoch": 0.77, "learning_rate": 0.00025816816323296195, "loss": 3.8347, "step": 2480 }, { "epoch": 0.77, "learning_rate": 0.0002581325341737248, "loss": 3.8505, "step": 2481 }, { "epoch": 0.77, "learning_rate": 0.0002580969051144875, "loss": 4.0187, "step": 2482 }, { "epoch": 0.77, "learning_rate": 0.0002580612760552503, "loss": 4.025, "step": 2483 }, { "epoch": 0.77, "learning_rate": 0.0002580256469960131, "loss": 4.0884, "step": 2484 }, { "epoch": 0.77, "learning_rate": 0.00025799001793677586, "loss": 3.6315, "step": 2485 }, { "epoch": 0.77, "learning_rate": 0.00025795438887753864, "loss": 4.0414, "step": 2486 }, { "epoch": 0.77, "learning_rate": 0.00025791875981830137, "loss": 3.9985, "step": 2487 }, { "epoch": 0.77, "learning_rate": 0.00025788313075906415, "loss": 3.9457, "step": 2488 }, { "epoch": 0.77, "learning_rate": 0.00025784750169982693, "loss": 3.7088, "step": 2489 }, { "epoch": 0.77, "learning_rate": 0.0002578118726405897, "loss": 3.7256, "step": 2490 }, { "epoch": 0.77, "learning_rate": 0.0002577762435813525, "loss": 3.623, "step": 2491 }, { "epoch": 0.77, "learning_rate": 0.0002577406145221152, "loss": 3.5128, "step": 2492 }, { "epoch": 0.77, "learning_rate": 0.000257704985462878, "loss": 3.2699, "step": 2493 }, { "epoch": 0.77, "learning_rate": 0.0002576693564036408, "loss": 3.4199, "step": 2494 }, { "epoch": 0.77, "learning_rate": 0.00025763372734440357, "loss": 3.4769, "step": 2495 }, { "epoch": 0.77, "learning_rate": 0.0002575980982851663, "loss": 3.3512, "step": 2496 }, { "epoch": 0.77, "learning_rate": 0.0002575624692259291, "loss": 3.396, "step": 2497 }, { "epoch": 0.77, "learning_rate": 0.00025752684016669186, "loss": 3.0352, "step": 2498 }, { "epoch": 0.77, "learning_rate": 0.00025749121110745464, "loss": 2.7582, "step": 2499 }, { "epoch": 0.77, "learning_rate": 0.0002574555820482174, "loss": 2.8816, "step": 2500 }, { "epoch": 0.77, "eval_bleu": 0.0, "eval_loss": 4.629215240478516, "eval_runtime": 2570.0611, "eval_samples_per_second": 5.743, "eval_steps_per_second": 0.718, "step": 2500 }, { "epoch": 0.77, "learning_rate": 0.00025741995298898015, "loss": 5.5757, "step": 2501 }, { "epoch": 0.77, "learning_rate": 0.00025738432392974293, "loss": 5.2847, "step": 2502 }, { "epoch": 0.77, "learning_rate": 0.0002573486948705057, "loss": 4.9658, "step": 2503 }, { "epoch": 0.77, "learning_rate": 0.0002573130658112685, "loss": 4.9746, "step": 2504 }, { "epoch": 0.77, "learning_rate": 0.0002572774367520313, "loss": 4.713, "step": 2505 }, { "epoch": 0.77, "learning_rate": 0.000257241807692794, "loss": 4.5189, "step": 2506 }, { "epoch": 0.77, "learning_rate": 0.0002572061786335568, "loss": 4.5148, "step": 2507 }, { "epoch": 0.77, "learning_rate": 0.00025717054957431957, "loss": 4.728, "step": 2508 }, { "epoch": 0.77, "learning_rate": 0.00025713492051508235, "loss": 4.5271, "step": 2509 }, { "epoch": 0.77, "learning_rate": 0.00025709929145584513, "loss": 4.5733, "step": 2510 }, { "epoch": 0.77, "learning_rate": 0.00025706366239660786, "loss": 4.4387, "step": 2511 }, { "epoch": 0.78, "learning_rate": 0.0002570280333373707, "loss": 4.4914, "step": 2512 }, { "epoch": 0.78, "learning_rate": 0.0002570280333373707, "loss": 4.5071, "step": 2513 }, { "epoch": 0.78, "learning_rate": 0.0002569924042781334, "loss": 4.4377, "step": 2514 }, { "epoch": 0.78, "learning_rate": 0.0002569567752188962, "loss": 4.6628, "step": 2515 }, { "epoch": 0.78, "learning_rate": 0.000256921146159659, "loss": 4.5694, "step": 2516 }, { "epoch": 0.78, "learning_rate": 0.0002568855171004217, "loss": 4.5888, "step": 2517 }, { "epoch": 0.78, "learning_rate": 0.00025684988804118455, "loss": 4.0517, "step": 2518 }, { "epoch": 0.78, "learning_rate": 0.0002568142589819473, "loss": 4.5035, "step": 2519 }, { "epoch": 0.78, "learning_rate": 0.00025677862992271006, "loss": 4.4972, "step": 2520 }, { "epoch": 0.78, "learning_rate": 0.00025674300086347285, "loss": 4.1945, "step": 2521 }, { "epoch": 0.78, "learning_rate": 0.00025670737180423557, "loss": 4.0409, "step": 2522 }, { "epoch": 0.78, "learning_rate": 0.00025667174274499835, "loss": 4.4237, "step": 2523 }, { "epoch": 0.78, "learning_rate": 0.00025663611368576114, "loss": 4.3724, "step": 2524 }, { "epoch": 0.78, "learning_rate": 0.0002566004846265239, "loss": 4.3978, "step": 2525 }, { "epoch": 0.78, "learning_rate": 0.0002565648555672867, "loss": 4.2491, "step": 2526 }, { "epoch": 0.78, "learning_rate": 0.00025652922650804943, "loss": 4.2489, "step": 2527 }, { "epoch": 0.78, "learning_rate": 0.0002564935974488122, "loss": 4.2097, "step": 2528 }, { "epoch": 0.78, "learning_rate": 0.000256457968389575, "loss": 4.4035, "step": 2529 }, { "epoch": 0.78, "learning_rate": 0.0002564223393303378, "loss": 4.0259, "step": 2530 }, { "epoch": 0.78, "learning_rate": 0.0002563867102711005, "loss": 4.1706, "step": 2531 }, { "epoch": 0.78, "learning_rate": 0.0002563510812118633, "loss": 4.3027, "step": 2532 }, { "epoch": 0.78, "learning_rate": 0.00025631545215262607, "loss": 4.2495, "step": 2533 }, { "epoch": 0.78, "learning_rate": 0.00025627982309338885, "loss": 3.9484, "step": 2534 }, { "epoch": 0.78, "learning_rate": 0.00025624419403415163, "loss": 3.9936, "step": 2535 }, { "epoch": 0.78, "learning_rate": 0.00025620856497491436, "loss": 4.3274, "step": 2536 }, { "epoch": 0.78, "learning_rate": 0.0002561729359156772, "loss": 3.7381, "step": 2537 }, { "epoch": 0.78, "learning_rate": 0.0002561373068564399, "loss": 3.6598, "step": 2538 }, { "epoch": 0.78, "learning_rate": 0.0002561016777972027, "loss": 4.0983, "step": 2539 }, { "epoch": 0.78, "learning_rate": 0.0002560660487379655, "loss": 3.8014, "step": 2540 }, { "epoch": 0.78, "learning_rate": 0.0002560304196787282, "loss": 3.591, "step": 2541 }, { "epoch": 0.78, "learning_rate": 0.00025599479061949105, "loss": 3.5541, "step": 2542 }, { "epoch": 0.78, "learning_rate": 0.0002559591615602538, "loss": 3.2301, "step": 2543 }, { "epoch": 0.79, "learning_rate": 0.00025592353250101656, "loss": 3.415, "step": 2544 }, { "epoch": 0.79, "learning_rate": 0.00025588790344177934, "loss": 3.2344, "step": 2545 }, { "epoch": 0.79, "learning_rate": 0.00025585227438254207, "loss": 3.2152, "step": 2546 }, { "epoch": 0.79, "learning_rate": 0.0002558166453233049, "loss": 3.0609, "step": 2547 }, { "epoch": 0.79, "learning_rate": 0.00025578101626406763, "loss": 2.7601, "step": 2548 }, { "epoch": 0.79, "learning_rate": 0.0002557453872048304, "loss": 2.7428, "step": 2549 }, { "epoch": 0.79, "learning_rate": 0.0002557097581455932, "loss": 2.6817, "step": 2550 }, { "epoch": 0.79, "learning_rate": 0.0002556741290863559, "loss": 5.4404, "step": 2551 }, { "epoch": 0.79, "learning_rate": 0.00025563850002711876, "loss": 5.3606, "step": 2552 }, { "epoch": 0.79, "learning_rate": 0.0002556028709678815, "loss": 5.0698, "step": 2553 }, { "epoch": 0.79, "learning_rate": 0.00025556724190864427, "loss": 4.89, "step": 2554 }, { "epoch": 0.79, "learning_rate": 0.00025553161284940705, "loss": 4.6995, "step": 2555 }, { "epoch": 0.79, "learning_rate": 0.00025549598379016983, "loss": 4.6063, "step": 2556 }, { "epoch": 0.79, "learning_rate": 0.00025546035473093256, "loss": 4.436, "step": 2557 }, { "epoch": 0.79, "learning_rate": 0.00025542472567169534, "loss": 4.3352, "step": 2558 }, { "epoch": 0.79, "learning_rate": 0.0002553890966124581, "loss": 4.6661, "step": 2559 }, { "epoch": 0.79, "learning_rate": 0.0002553534675532209, "loss": 4.4041, "step": 2560 }, { "epoch": 0.79, "learning_rate": 0.0002553178384939837, "loss": 4.4643, "step": 2561 }, { "epoch": 0.79, "learning_rate": 0.0002552822094347464, "loss": 4.3703, "step": 2562 }, { "epoch": 0.79, "learning_rate": 0.0002552465803755092, "loss": 4.1509, "step": 2563 }, { "epoch": 0.79, "learning_rate": 0.000255210951316272, "loss": 4.3058, "step": 2564 }, { "epoch": 0.79, "learning_rate": 0.0002551753222570347, "loss": 4.3526, "step": 2565 }, { "epoch": 0.79, "learning_rate": 0.00025513969319779754, "loss": 4.3695, "step": 2566 }, { "epoch": 0.79, "learning_rate": 0.00025510406413856027, "loss": 4.2624, "step": 2567 }, { "epoch": 0.79, "learning_rate": 0.00025506843507932305, "loss": 4.2271, "step": 2568 }, { "epoch": 0.79, "learning_rate": 0.00025503280602008583, "loss": 4.1177, "step": 2569 }, { "epoch": 0.79, "learning_rate": 0.00025499717696084856, "loss": 4.1103, "step": 2570 }, { "epoch": 0.79, "learning_rate": 0.0002549615479016114, "loss": 4.1808, "step": 2571 }, { "epoch": 0.79, "learning_rate": 0.0002549259188423741, "loss": 4.1405, "step": 2572 }, { "epoch": 0.79, "learning_rate": 0.0002548902897831369, "loss": 4.2116, "step": 2573 }, { "epoch": 0.79, "learning_rate": 0.0002548546607238997, "loss": 4.1636, "step": 2574 }, { "epoch": 0.79, "learning_rate": 0.0002548190316646624, "loss": 3.8042, "step": 2575 }, { "epoch": 0.8, "learning_rate": 0.00025478340260542525, "loss": 3.956, "step": 2576 }, { "epoch": 0.8, "learning_rate": 0.000254747773546188, "loss": 3.878, "step": 2577 }, { "epoch": 0.8, "learning_rate": 0.00025471214448695076, "loss": 3.8237, "step": 2578 }, { "epoch": 0.8, "learning_rate": 0.00025467651542771354, "loss": 4.0869, "step": 2579 }, { "epoch": 0.8, "learning_rate": 0.0002546408863684763, "loss": 4.0401, "step": 2580 }, { "epoch": 0.8, "learning_rate": 0.0002546052573092391, "loss": 4.086, "step": 2581 }, { "epoch": 0.8, "learning_rate": 0.00025456962825000183, "loss": 3.8983, "step": 2582 }, { "epoch": 0.8, "learning_rate": 0.0002545339991907646, "loss": 3.7856, "step": 2583 }, { "epoch": 0.8, "learning_rate": 0.0002544983701315274, "loss": 3.7518, "step": 2584 }, { "epoch": 0.8, "learning_rate": 0.0002544627410722902, "loss": 3.6817, "step": 2585 }, { "epoch": 0.8, "learning_rate": 0.00025442711201305296, "loss": 3.6225, "step": 2586 }, { "epoch": 0.8, "learning_rate": 0.0002543914829538157, "loss": 3.6017, "step": 2587 }, { "epoch": 0.8, "learning_rate": 0.00025435585389457847, "loss": 3.4799, "step": 2588 }, { "epoch": 0.8, "learning_rate": 0.00025432022483534125, "loss": 3.5111, "step": 2589 }, { "epoch": 0.8, "learning_rate": 0.00025428459577610404, "loss": 3.4776, "step": 2590 }, { "epoch": 0.8, "learning_rate": 0.00025424896671686676, "loss": 3.2313, "step": 2591 }, { "epoch": 0.8, "learning_rate": 0.00025421333765762955, "loss": 3.6537, "step": 2592 }, { "epoch": 0.8, "learning_rate": 0.0002541777085983923, "loss": 3.3233, "step": 2593 }, { "epoch": 0.8, "learning_rate": 0.0002541420795391551, "loss": 3.2158, "step": 2594 }, { "epoch": 0.8, "learning_rate": 0.0002541064504799179, "loss": 3.0938, "step": 2595 }, { "epoch": 0.8, "learning_rate": 0.0002540708214206806, "loss": 3.0766, "step": 2596 }, { "epoch": 0.8, "learning_rate": 0.0002540351923614434, "loss": 2.8116, "step": 2597 }, { "epoch": 0.8, "learning_rate": 0.0002539995633022062, "loss": 2.9469, "step": 2598 }, { "epoch": 0.8, "learning_rate": 0.00025396393424296896, "loss": 2.6102, "step": 2599 }, { "epoch": 0.8, "learning_rate": 0.00025392830518373175, "loss": 2.4305, "step": 2600 }, { "epoch": 0.8, "learning_rate": 0.0002538926761244945, "loss": 5.0191, "step": 2601 }, { "epoch": 0.8, "learning_rate": 0.00025385704706525726, "loss": 4.8744, "step": 2602 }, { "epoch": 0.8, "learning_rate": 0.00025382141800602004, "loss": 4.711, "step": 2603 }, { "epoch": 0.8, "learning_rate": 0.0002537857889467828, "loss": 4.6278, "step": 2604 }, { "epoch": 0.8, "learning_rate": 0.0002537501598875456, "loss": 4.5719, "step": 2605 }, { "epoch": 0.8, "learning_rate": 0.00025371453082830833, "loss": 4.4946, "step": 2606 }, { "epoch": 0.8, "learning_rate": 0.00025367890176907116, "loss": 4.3692, "step": 2607 }, { "epoch": 0.8, "learning_rate": 0.0002536432727098339, "loss": 4.4509, "step": 2608 }, { "epoch": 0.81, "learning_rate": 0.0002536076436505967, "loss": 4.4362, "step": 2609 }, { "epoch": 0.81, "learning_rate": 0.00025357201459135946, "loss": 4.216, "step": 2610 }, { "epoch": 0.81, "learning_rate": 0.0002535363855321222, "loss": 4.0902, "step": 2611 }, { "epoch": 0.81, "learning_rate": 0.00025350075647288497, "loss": 4.3517, "step": 2612 }, { "epoch": 0.81, "learning_rate": 0.00025346512741364775, "loss": 4.3637, "step": 2613 }, { "epoch": 0.81, "learning_rate": 0.00025342949835441053, "loss": 4.1776, "step": 2614 }, { "epoch": 0.81, "learning_rate": 0.0002533938692951733, "loss": 4.0667, "step": 2615 }, { "epoch": 0.81, "learning_rate": 0.00025335824023593604, "loss": 4.2753, "step": 2616 }, { "epoch": 0.81, "learning_rate": 0.0002533226111766988, "loss": 4.0334, "step": 2617 }, { "epoch": 0.81, "learning_rate": 0.0002532869821174616, "loss": 3.9675, "step": 2618 }, { "epoch": 0.81, "learning_rate": 0.0002532513530582244, "loss": 4.0972, "step": 2619 }, { "epoch": 0.81, "learning_rate": 0.00025321572399898717, "loss": 4.1103, "step": 2620 }, { "epoch": 0.81, "learning_rate": 0.0002531800949397499, "loss": 4.2297, "step": 2621 }, { "epoch": 0.81, "learning_rate": 0.0002531444658805127, "loss": 4.0127, "step": 2622 }, { "epoch": 0.81, "learning_rate": 0.00025310883682127546, "loss": 4.1252, "step": 2623 }, { "epoch": 0.81, "learning_rate": 0.00025307320776203824, "loss": 3.9704, "step": 2624 }, { "epoch": 0.81, "learning_rate": 0.00025303757870280097, "loss": 3.9824, "step": 2625 }, { "epoch": 0.81, "learning_rate": 0.00025300194964356375, "loss": 4.1631, "step": 2626 }, { "epoch": 0.81, "learning_rate": 0.00025296632058432653, "loss": 3.9456, "step": 2627 }, { "epoch": 0.81, "learning_rate": 0.0002529306915250893, "loss": 3.9519, "step": 2628 }, { "epoch": 0.81, "learning_rate": 0.0002528950624658521, "loss": 3.8485, "step": 2629 }, { "epoch": 0.81, "learning_rate": 0.0002528594334066148, "loss": 3.9384, "step": 2630 }, { "epoch": 0.81, "learning_rate": 0.00025282380434737766, "loss": 3.6721, "step": 2631 }, { "epoch": 0.81, "learning_rate": 0.0002527881752881404, "loss": 3.9117, "step": 2632 }, { "epoch": 0.81, "learning_rate": 0.00025275254622890317, "loss": 4.018, "step": 2633 }, { "epoch": 0.81, "learning_rate": 0.00025271691716966595, "loss": 3.6935, "step": 2634 }, { "epoch": 0.81, "learning_rate": 0.0002526812881104287, "loss": 3.7908, "step": 2635 }, { "epoch": 0.81, "learning_rate": 0.0002526456590511915, "loss": 3.8382, "step": 2636 }, { "epoch": 0.81, "learning_rate": 0.00025261002999195424, "loss": 3.4631, "step": 2637 }, { "epoch": 0.81, "learning_rate": 0.000252574400932717, "loss": 3.4809, "step": 2638 }, { "epoch": 0.81, "learning_rate": 0.0002525387718734798, "loss": 3.549, "step": 2639 }, { "epoch": 0.81, "learning_rate": 0.00025250314281424253, "loss": 3.6252, "step": 2640 }, { "epoch": 0.82, "learning_rate": 0.00025246751375500537, "loss": 3.4157, "step": 2641 }, { "epoch": 0.82, "learning_rate": 0.0002524318846957681, "loss": 3.3236, "step": 2642 }, { "epoch": 0.82, "learning_rate": 0.0002523962556365309, "loss": 3.3568, "step": 2643 }, { "epoch": 0.82, "learning_rate": 0.00025236062657729366, "loss": 3.1308, "step": 2644 }, { "epoch": 0.82, "learning_rate": 0.0002523249975180564, "loss": 3.2226, "step": 2645 }, { "epoch": 0.82, "learning_rate": 0.00025228936845881917, "loss": 2.9759, "step": 2646 }, { "epoch": 0.82, "learning_rate": 0.00025225373939958195, "loss": 3.1991, "step": 2647 }, { "epoch": 0.82, "learning_rate": 0.00025221811034034473, "loss": 2.8763, "step": 2648 }, { "epoch": 0.82, "learning_rate": 0.0002521824812811075, "loss": 2.4978, "step": 2649 }, { "epoch": 0.82, "learning_rate": 0.0002521468522218703, "loss": 2.4495, "step": 2650 }, { "epoch": 0.82, "learning_rate": 0.000252111223162633, "loss": 5.3153, "step": 2651 }, { "epoch": 0.82, "learning_rate": 0.0002520755941033958, "loss": 4.8939, "step": 2652 }, { "epoch": 0.82, "learning_rate": 0.0002520399650441586, "loss": 4.7659, "step": 2653 }, { "epoch": 0.82, "learning_rate": 0.00025200433598492137, "loss": 4.7056, "step": 2654 }, { "epoch": 0.82, "learning_rate": 0.00025196870692568415, "loss": 4.5684, "step": 2655 }, { "epoch": 0.82, "learning_rate": 0.0002519330778664469, "loss": 4.3812, "step": 2656 }, { "epoch": 0.82, "learning_rate": 0.00025189744880720966, "loss": 4.4827, "step": 2657 }, { "epoch": 0.82, "learning_rate": 0.00025186181974797244, "loss": 4.2564, "step": 2658 }, { "epoch": 0.82, "learning_rate": 0.00025182619068873517, "loss": 4.5515, "step": 2659 }, { "epoch": 0.82, "learning_rate": 0.000251790561629498, "loss": 4.3191, "step": 2660 }, { "epoch": 0.82, "learning_rate": 0.00025175493257026074, "loss": 4.2825, "step": 2661 }, { "epoch": 0.82, "learning_rate": 0.0002517193035110235, "loss": 4.1944, "step": 2662 }, { "epoch": 0.82, "learning_rate": 0.0002516836744517863, "loss": 4.293, "step": 2663 }, { "epoch": 0.82, "learning_rate": 0.000251648045392549, "loss": 4.2297, "step": 2664 }, { "epoch": 0.82, "learning_rate": 0.00025161241633331186, "loss": 4.1846, "step": 2665 }, { "epoch": 0.82, "learning_rate": 0.0002515767872740746, "loss": 4.063, "step": 2666 }, { "epoch": 0.82, "learning_rate": 0.00025154115821483737, "loss": 4.3292, "step": 2667 }, { "epoch": 0.82, "learning_rate": 0.00025150552915560015, "loss": 4.0888, "step": 2668 }, { "epoch": 0.82, "learning_rate": 0.0002514699000963629, "loss": 4.1236, "step": 2669 }, { "epoch": 0.82, "learning_rate": 0.0002514342710371257, "loss": 4.0076, "step": 2670 }, { "epoch": 0.82, "learning_rate": 0.00025139864197788845, "loss": 3.8963, "step": 2671 }, { "epoch": 0.82, "learning_rate": 0.00025136301291865123, "loss": 3.9127, "step": 2672 }, { "epoch": 0.82, "learning_rate": 0.000251327383859414, "loss": 3.9635, "step": 2673 }, { "epoch": 0.83, "learning_rate": 0.0002512917548001768, "loss": 3.8568, "step": 2674 }, { "epoch": 0.83, "learning_rate": 0.0002512561257409396, "loss": 4.0404, "step": 2675 }, { "epoch": 0.83, "learning_rate": 0.0002512204966817023, "loss": 3.9645, "step": 2676 }, { "epoch": 0.83, "learning_rate": 0.0002511848676224651, "loss": 3.9574, "step": 2677 }, { "epoch": 0.83, "learning_rate": 0.00025114923856322786, "loss": 3.8694, "step": 2678 }, { "epoch": 0.83, "learning_rate": 0.00025111360950399065, "loss": 3.7471, "step": 2679 }, { "epoch": 0.83, "learning_rate": 0.0002510779804447534, "loss": 3.7963, "step": 2680 }, { "epoch": 0.83, "learning_rate": 0.00025104235138551616, "loss": 3.7481, "step": 2681 }, { "epoch": 0.83, "learning_rate": 0.00025100672232627894, "loss": 3.8706, "step": 2682 }, { "epoch": 0.83, "learning_rate": 0.0002509710932670417, "loss": 3.8604, "step": 2683 }, { "epoch": 0.83, "learning_rate": 0.0002509354642078045, "loss": 3.7611, "step": 2684 }, { "epoch": 0.83, "learning_rate": 0.00025089983514856723, "loss": 3.5409, "step": 2685 }, { "epoch": 0.83, "learning_rate": 0.00025086420608933, "loss": 3.7339, "step": 2686 }, { "epoch": 0.83, "learning_rate": 0.0002508285770300928, "loss": 3.6357, "step": 2687 }, { "epoch": 0.83, "learning_rate": 0.0002507929479708556, "loss": 3.605, "step": 2688 }, { "epoch": 0.83, "learning_rate": 0.00025075731891161836, "loss": 3.6572, "step": 2689 }, { "epoch": 0.83, "learning_rate": 0.0002507216898523811, "loss": 3.6468, "step": 2690 }, { "epoch": 0.83, "learning_rate": 0.00025068606079314387, "loss": 3.3461, "step": 2691 }, { "epoch": 0.83, "learning_rate": 0.00025065043173390665, "loss": 3.4089, "step": 2692 }, { "epoch": 0.83, "learning_rate": 0.00025061480267466943, "loss": 3.3006, "step": 2693 }, { "epoch": 0.83, "learning_rate": 0.0002505791736154322, "loss": 3.3784, "step": 2694 }, { "epoch": 0.83, "learning_rate": 0.00025054354455619494, "loss": 3.0976, "step": 2695 }, { "epoch": 0.83, "learning_rate": 0.0002505079154969577, "loss": 2.9745, "step": 2696 }, { "epoch": 0.83, "learning_rate": 0.0002504722864377205, "loss": 2.8536, "step": 2697 }, { "epoch": 0.83, "learning_rate": 0.0002504366573784833, "loss": 2.8292, "step": 2698 }, { "epoch": 0.83, "learning_rate": 0.00025040102831924607, "loss": 2.8176, "step": 2699 }, { "epoch": 0.83, "learning_rate": 0.0002503653992600088, "loss": 2.658, "step": 2700 }, { "epoch": 0.83, "learning_rate": 0.00025032977020077163, "loss": 5.4987, "step": 2701 }, { "epoch": 0.83, "learning_rate": 0.00025029414114153436, "loss": 5.2087, "step": 2702 }, { "epoch": 0.83, "learning_rate": 0.00025025851208229714, "loss": 4.7573, "step": 2703 }, { "epoch": 0.83, "learning_rate": 0.0002502228830230599, "loss": 4.4529, "step": 2704 }, { "epoch": 0.83, "learning_rate": 0.00025018725396382265, "loss": 4.5227, "step": 2705 }, { "epoch": 0.84, "learning_rate": 0.00025015162490458543, "loss": 4.4291, "step": 2706 }, { "epoch": 0.84, "learning_rate": 0.0002501159958453482, "loss": 4.3855, "step": 2707 }, { "epoch": 0.84, "learning_rate": 0.000250080366786111, "loss": 4.2062, "step": 2708 }, { "epoch": 0.84, "learning_rate": 0.0002500447377268738, "loss": 4.2823, "step": 2709 }, { "epoch": 0.84, "learning_rate": 0.0002500091086676365, "loss": 4.3326, "step": 2710 }, { "epoch": 0.84, "learning_rate": 0.0002499734796083993, "loss": 4.4894, "step": 2711 }, { "epoch": 0.84, "learning_rate": 0.00024993785054916207, "loss": 4.2602, "step": 2712 }, { "epoch": 0.84, "learning_rate": 0.00024990222148992485, "loss": 4.2127, "step": 2713 }, { "epoch": 0.84, "learning_rate": 0.00024986659243068763, "loss": 4.4705, "step": 2714 }, { "epoch": 0.84, "learning_rate": 0.00024983096337145036, "loss": 4.2217, "step": 2715 }, { "epoch": 0.84, "learning_rate": 0.00024979533431221314, "loss": 4.1255, "step": 2716 }, { "epoch": 0.84, "learning_rate": 0.0002497597052529759, "loss": 4.0255, "step": 2717 }, { "epoch": 0.84, "learning_rate": 0.0002497240761937387, "loss": 4.1824, "step": 2718 }, { "epoch": 0.84, "learning_rate": 0.00024968844713450143, "loss": 4.1604, "step": 2719 }, { "epoch": 0.84, "learning_rate": 0.0002496528180752642, "loss": 4.0386, "step": 2720 }, { "epoch": 0.84, "learning_rate": 0.000249617189016027, "loss": 4.2167, "step": 2721 }, { "epoch": 0.84, "learning_rate": 0.0002495815599567898, "loss": 3.8919, "step": 2722 }, { "epoch": 0.84, "learning_rate": 0.00024954593089755256, "loss": 4.1581, "step": 2723 }, { "epoch": 0.84, "learning_rate": 0.0002495103018383153, "loss": 4.0393, "step": 2724 }, { "epoch": 0.84, "learning_rate": 0.0002494746727790781, "loss": 3.9959, "step": 2725 }, { "epoch": 0.84, "learning_rate": 0.00024943904371984085, "loss": 3.947, "step": 2726 }, { "epoch": 0.84, "learning_rate": 0.00024940341466060363, "loss": 3.7699, "step": 2727 }, { "epoch": 0.84, "learning_rate": 0.0002493677856013664, "loss": 3.9608, "step": 2728 }, { "epoch": 0.84, "learning_rate": 0.00024933215654212914, "loss": 3.757, "step": 2729 }, { "epoch": 0.84, "learning_rate": 0.000249296527482892, "loss": 3.8792, "step": 2730 }, { "epoch": 0.84, "learning_rate": 0.0002492608984236547, "loss": 3.8096, "step": 2731 }, { "epoch": 0.84, "learning_rate": 0.0002492252693644175, "loss": 3.6003, "step": 2732 }, { "epoch": 0.84, "learning_rate": 0.00024918964030518027, "loss": 3.6477, "step": 2733 }, { "epoch": 0.84, "learning_rate": 0.000249154011245943, "loss": 3.6283, "step": 2734 }, { "epoch": 0.84, "learning_rate": 0.00024911838218670584, "loss": 3.656, "step": 2735 }, { "epoch": 0.84, "learning_rate": 0.00024908275312746856, "loss": 3.4448, "step": 2736 }, { "epoch": 0.84, "learning_rate": 0.00024904712406823134, "loss": 3.4105, "step": 2737 }, { "epoch": 0.84, "learning_rate": 0.0002490114950089941, "loss": 3.6094, "step": 2738 }, { "epoch": 0.85, "learning_rate": 0.00024897586594975685, "loss": 3.5526, "step": 2739 }, { "epoch": 0.85, "learning_rate": 0.00024894023689051964, "loss": 3.4898, "step": 2740 }, { "epoch": 0.85, "learning_rate": 0.0002489046078312824, "loss": 3.5501, "step": 2741 }, { "epoch": 0.85, "learning_rate": 0.0002488689787720452, "loss": 3.2843, "step": 2742 }, { "epoch": 0.85, "learning_rate": 0.000248833349712808, "loss": 3.2016, "step": 2743 }, { "epoch": 0.85, "learning_rate": 0.00024879772065357076, "loss": 3.3859, "step": 2744 }, { "epoch": 0.85, "learning_rate": 0.0002487620915943335, "loss": 3.0138, "step": 2745 }, { "epoch": 0.85, "learning_rate": 0.0002487264625350963, "loss": 3.0063, "step": 2746 }, { "epoch": 0.85, "learning_rate": 0.00024869083347585906, "loss": 2.8542, "step": 2747 }, { "epoch": 0.85, "learning_rate": 0.00024865520441662184, "loss": 2.5747, "step": 2748 }, { "epoch": 0.85, "learning_rate": 0.0002486195753573846, "loss": 2.7381, "step": 2749 }, { "epoch": 0.85, "learning_rate": 0.00024858394629814735, "loss": 2.2629, "step": 2750 }, { "epoch": 0.85, "learning_rate": 0.00024854831723891013, "loss": 5.754, "step": 2751 }, { "epoch": 0.85, "learning_rate": 0.0002485126881796729, "loss": 5.1719, "step": 2752 }, { "epoch": 0.85, "learning_rate": 0.00024847705912043564, "loss": 4.9049, "step": 2753 }, { "epoch": 0.85, "learning_rate": 0.0002484414300611985, "loss": 4.7782, "step": 2754 }, { "epoch": 0.85, "learning_rate": 0.0002484058010019612, "loss": 4.1999, "step": 2755 }, { "epoch": 0.85, "learning_rate": 0.000248370171942724, "loss": 4.5041, "step": 2756 }, { "epoch": 0.85, "learning_rate": 0.00024833454288348677, "loss": 4.2243, "step": 2757 }, { "epoch": 0.85, "learning_rate": 0.0002482989138242495, "loss": 4.5833, "step": 2758 }, { "epoch": 0.85, "learning_rate": 0.00024826328476501233, "loss": 4.3533, "step": 2759 }, { "epoch": 0.85, "learning_rate": 0.00024822765570577506, "loss": 4.2367, "step": 2760 }, { "epoch": 0.85, "learning_rate": 0.00024819202664653784, "loss": 4.1026, "step": 2761 }, { "epoch": 0.85, "learning_rate": 0.0002481563975873006, "loss": 4.1035, "step": 2762 }, { "epoch": 0.85, "learning_rate": 0.00024812076852806335, "loss": 4.2734, "step": 2763 }, { "epoch": 0.85, "learning_rate": 0.0002480851394688262, "loss": 4.1579, "step": 2764 }, { "epoch": 0.85, "learning_rate": 0.0002480495104095889, "loss": 4.272, "step": 2765 }, { "epoch": 0.85, "learning_rate": 0.0002480138813503517, "loss": 4.1484, "step": 2766 }, { "epoch": 0.85, "learning_rate": 0.0002479782522911145, "loss": 4.1021, "step": 2767 }, { "epoch": 0.85, "learning_rate": 0.00024794262323187726, "loss": 4.1433, "step": 2768 }, { "epoch": 0.85, "learning_rate": 0.00024790699417264004, "loss": 3.9402, "step": 2769 }, { "epoch": 0.85, "learning_rate": 0.00024787136511340277, "loss": 3.8692, "step": 2770 }, { "epoch": 0.86, "learning_rate": 0.00024783573605416555, "loss": 4.1394, "step": 2771 }, { "epoch": 0.86, "learning_rate": 0.00024780010699492833, "loss": 4.1031, "step": 2772 }, { "epoch": 0.86, "learning_rate": 0.0002477644779356911, "loss": 4.0076, "step": 2773 }, { "epoch": 0.86, "learning_rate": 0.00024772884887645384, "loss": 4.038, "step": 2774 }, { "epoch": 0.86, "learning_rate": 0.0002476932198172166, "loss": 3.9126, "step": 2775 }, { "epoch": 0.86, "learning_rate": 0.0002476575907579794, "loss": 4.2113, "step": 2776 }, { "epoch": 0.86, "learning_rate": 0.0002476219616987422, "loss": 3.83, "step": 2777 }, { "epoch": 0.86, "learning_rate": 0.00024758633263950497, "loss": 3.8967, "step": 2778 }, { "epoch": 0.86, "learning_rate": 0.0002475507035802677, "loss": 3.7935, "step": 2779 }, { "epoch": 0.86, "learning_rate": 0.0002475150745210305, "loss": 3.8635, "step": 2780 }, { "epoch": 0.86, "learning_rate": 0.00024747944546179326, "loss": 3.9863, "step": 2781 }, { "epoch": 0.86, "learning_rate": 0.00024744381640255604, "loss": 3.8237, "step": 2782 }, { "epoch": 0.86, "learning_rate": 0.0002474081873433188, "loss": 3.8387, "step": 2783 }, { "epoch": 0.86, "learning_rate": 0.00024737255828408155, "loss": 3.5427, "step": 2784 }, { "epoch": 0.86, "learning_rate": 0.00024733692922484433, "loss": 3.7273, "step": 2785 }, { "epoch": 0.86, "learning_rate": 0.0002473013001656071, "loss": 3.6756, "step": 2786 }, { "epoch": 0.86, "learning_rate": 0.0002472656711063699, "loss": 3.5526, "step": 2787 }, { "epoch": 0.86, "learning_rate": 0.0002472300420471327, "loss": 3.4313, "step": 2788 }, { "epoch": 0.86, "learning_rate": 0.0002471944129878954, "loss": 3.4677, "step": 2789 }, { "epoch": 0.86, "learning_rate": 0.0002471587839286582, "loss": 3.7668, "step": 2790 }, { "epoch": 0.86, "learning_rate": 0.00024712315486942097, "loss": 3.5568, "step": 2791 }, { "epoch": 0.86, "learning_rate": 0.00024708752581018375, "loss": 3.366, "step": 2792 }, { "epoch": 0.86, "learning_rate": 0.00024705189675094653, "loss": 3.2278, "step": 2793 }, { "epoch": 0.86, "learning_rate": 0.00024701626769170926, "loss": 3.282, "step": 2794 }, { "epoch": 0.86, "learning_rate": 0.00024698063863247204, "loss": 3.0045, "step": 2795 }, { "epoch": 0.86, "learning_rate": 0.0002469450095732348, "loss": 3.1054, "step": 2796 }, { "epoch": 0.86, "learning_rate": 0.0002469093805139976, "loss": 2.8257, "step": 2797 }, { "epoch": 0.86, "learning_rate": 0.0002468737514547604, "loss": 2.7304, "step": 2798 }, { "epoch": 0.86, "learning_rate": 0.0002468381223955231, "loss": 2.7284, "step": 2799 }, { "epoch": 0.86, "learning_rate": 0.0002468024933362859, "loss": 2.5429, "step": 2800 }, { "epoch": 0.86, "learning_rate": 0.0002467668642770487, "loss": 5.268, "step": 2801 }, { "epoch": 0.86, "learning_rate": 0.00024673123521781146, "loss": 5.0074, "step": 2802 }, { "epoch": 0.87, "learning_rate": 0.00024669560615857424, "loss": 4.8031, "step": 2803 }, { "epoch": 0.87, "learning_rate": 0.00024665997709933697, "loss": 4.6615, "step": 2804 }, { "epoch": 0.87, "learning_rate": 0.00024662434804009975, "loss": 4.4112, "step": 2805 }, { "epoch": 0.87, "learning_rate": 0.00024658871898086254, "loss": 4.3456, "step": 2806 }, { "epoch": 0.87, "learning_rate": 0.0002465530899216253, "loss": 4.6209, "step": 2807 }, { "epoch": 0.87, "learning_rate": 0.00024651746086238804, "loss": 4.3433, "step": 2808 }, { "epoch": 0.87, "learning_rate": 0.0002464818318031508, "loss": 4.3165, "step": 2809 }, { "epoch": 0.87, "learning_rate": 0.0002464462027439136, "loss": 4.2259, "step": 2810 }, { "epoch": 0.87, "learning_rate": 0.0002464105736846764, "loss": 4.3759, "step": 2811 }, { "epoch": 0.87, "learning_rate": 0.00024637494462543917, "loss": 4.3307, "step": 2812 }, { "epoch": 0.87, "learning_rate": 0.0002463393155662019, "loss": 4.2332, "step": 2813 }, { "epoch": 0.87, "learning_rate": 0.00024630368650696474, "loss": 3.9422, "step": 2814 }, { "epoch": 0.87, "learning_rate": 0.00024626805744772746, "loss": 4.4347, "step": 2815 }, { "epoch": 0.87, "learning_rate": 0.00024623242838849025, "loss": 4.3249, "step": 2816 }, { "epoch": 0.87, "learning_rate": 0.00024619679932925303, "loss": 4.3051, "step": 2817 }, { "epoch": 0.87, "learning_rate": 0.00024616117027001576, "loss": 4.4575, "step": 2818 }, { "epoch": 0.87, "learning_rate": 0.0002461255412107786, "loss": 4.0797, "step": 2819 }, { "epoch": 0.87, "learning_rate": 0.0002460899121515413, "loss": 4.2489, "step": 2820 }, { "epoch": 0.87, "learning_rate": 0.0002460542830923041, "loss": 3.9804, "step": 2821 }, { "epoch": 0.87, "learning_rate": 0.0002460186540330669, "loss": 4.0563, "step": 2822 }, { "epoch": 0.87, "learning_rate": 0.0002459830249738296, "loss": 4.2551, "step": 2823 }, { "epoch": 0.87, "learning_rate": 0.00024594739591459245, "loss": 3.922, "step": 2824 }, { "epoch": 0.87, "learning_rate": 0.0002459117668553552, "loss": 3.9128, "step": 2825 }, { "epoch": 0.87, "learning_rate": 0.00024587613779611796, "loss": 4.0141, "step": 2826 }, { "epoch": 0.87, "learning_rate": 0.00024584050873688074, "loss": 4.0312, "step": 2827 }, { "epoch": 0.87, "learning_rate": 0.00024580487967764347, "loss": 3.8548, "step": 2828 }, { "epoch": 0.87, "learning_rate": 0.0002457692506184063, "loss": 4.1262, "step": 2829 }, { "epoch": 0.87, "learning_rate": 0.00024573362155916903, "loss": 4.0388, "step": 2830 }, { "epoch": 0.87, "learning_rate": 0.0002456979924999318, "loss": 4.0011, "step": 2831 }, { "epoch": 0.87, "learning_rate": 0.0002456623634406946, "loss": 3.7304, "step": 2832 }, { "epoch": 0.87, "learning_rate": 0.0002456267343814573, "loss": 3.7532, "step": 2833 }, { "epoch": 0.87, "learning_rate": 0.0002455911053222201, "loss": 3.958, "step": 2834 }, { "epoch": 0.87, "learning_rate": 0.0002455554762629829, "loss": 3.7444, "step": 2835 }, { "epoch": 0.88, "learning_rate": 0.00024551984720374567, "loss": 3.6189, "step": 2836 }, { "epoch": 0.88, "learning_rate": 0.00024548421814450845, "loss": 3.5782, "step": 2837 }, { "epoch": 0.88, "learning_rate": 0.00024544858908527123, "loss": 3.6067, "step": 2838 }, { "epoch": 0.88, "learning_rate": 0.00024541296002603396, "loss": 3.692, "step": 2839 }, { "epoch": 0.88, "learning_rate": 0.00024537733096679674, "loss": 3.195, "step": 2840 }, { "epoch": 0.88, "learning_rate": 0.0002453417019075595, "loss": 3.5531, "step": 2841 }, { "epoch": 0.88, "learning_rate": 0.00024530607284832225, "loss": 3.3082, "step": 2842 }, { "epoch": 0.88, "learning_rate": 0.0002452704437890851, "loss": 3.3136, "step": 2843 }, { "epoch": 0.88, "learning_rate": 0.0002452348147298478, "loss": 3.0921, "step": 2844 }, { "epoch": 0.88, "learning_rate": 0.0002451991856706106, "loss": 3.2452, "step": 2845 }, { "epoch": 0.88, "learning_rate": 0.0002451635566113734, "loss": 2.9009, "step": 2846 }, { "epoch": 0.88, "learning_rate": 0.0002451279275521361, "loss": 2.8125, "step": 2847 }, { "epoch": 0.88, "learning_rate": 0.00024509229849289894, "loss": 2.7623, "step": 2848 }, { "epoch": 0.88, "learning_rate": 0.00024505666943366167, "loss": 2.6036, "step": 2849 }, { "epoch": 0.88, "learning_rate": 0.00024502104037442445, "loss": 2.5381, "step": 2850 }, { "epoch": 0.88, "learning_rate": 0.00024498541131518723, "loss": 5.3456, "step": 2851 }, { "epoch": 0.88, "learning_rate": 0.00024494978225594996, "loss": 4.9337, "step": 2852 }, { "epoch": 0.88, "learning_rate": 0.0002449141531967128, "loss": 5.0114, "step": 2853 }, { "epoch": 0.88, "learning_rate": 0.0002448785241374755, "loss": 4.6392, "step": 2854 }, { "epoch": 0.88, "learning_rate": 0.0002448428950782383, "loss": 4.422, "step": 2855 }, { "epoch": 0.88, "learning_rate": 0.0002448072660190011, "loss": 4.3948, "step": 2856 }, { "epoch": 0.88, "learning_rate": 0.00024477163695976387, "loss": 4.1018, "step": 2857 }, { "epoch": 0.88, "learning_rate": 0.00024473600790052665, "loss": 4.2018, "step": 2858 }, { "epoch": 0.88, "learning_rate": 0.0002447003788412894, "loss": 4.2994, "step": 2859 }, { "epoch": 0.88, "learning_rate": 0.00024466474978205216, "loss": 4.0564, "step": 2860 }, { "epoch": 0.88, "learning_rate": 0.00024462912072281494, "loss": 4.2178, "step": 2861 }, { "epoch": 0.88, "learning_rate": 0.0002445934916635777, "loss": 4.033, "step": 2862 }, { "epoch": 0.88, "learning_rate": 0.0002445578626043405, "loss": 4.2237, "step": 2863 }, { "epoch": 0.88, "learning_rate": 0.00024452223354510323, "loss": 4.0525, "step": 2864 }, { "epoch": 0.88, "learning_rate": 0.000244486604485866, "loss": 4.025, "step": 2865 }, { "epoch": 0.88, "learning_rate": 0.0002444509754266288, "loss": 4.192, "step": 2866 }, { "epoch": 0.88, "learning_rate": 0.0002444153463673916, "loss": 4.2727, "step": 2867 }, { "epoch": 0.89, "learning_rate": 0.0002443797173081543, "loss": 4.108, "step": 2868 }, { "epoch": 0.89, "learning_rate": 0.0002443440882489171, "loss": 4.0206, "step": 2869 }, { "epoch": 0.89, "learning_rate": 0.00024430845918967987, "loss": 3.9575, "step": 2870 }, { "epoch": 0.89, "learning_rate": 0.00024427283013044265, "loss": 3.7781, "step": 2871 }, { "epoch": 0.89, "learning_rate": 0.00024423720107120543, "loss": 3.9284, "step": 2872 }, { "epoch": 0.89, "learning_rate": 0.00024420157201196816, "loss": 3.9372, "step": 2873 }, { "epoch": 0.89, "learning_rate": 0.00024416594295273094, "loss": 4.002, "step": 2874 }, { "epoch": 0.89, "learning_rate": 0.00024413031389349373, "loss": 4.0255, "step": 2875 }, { "epoch": 0.89, "learning_rate": 0.00024409468483425648, "loss": 3.8838, "step": 2876 }, { "epoch": 0.89, "learning_rate": 0.00024405905577501926, "loss": 3.9523, "step": 2877 }, { "epoch": 0.89, "learning_rate": 0.00024402342671578202, "loss": 4.0318, "step": 2878 }, { "epoch": 0.89, "learning_rate": 0.00024398779765654483, "loss": 3.7941, "step": 2879 }, { "epoch": 0.89, "learning_rate": 0.00024395216859730758, "loss": 3.8414, "step": 2880 }, { "epoch": 0.89, "learning_rate": 0.00024391653953807034, "loss": 3.7683, "step": 2881 }, { "epoch": 0.89, "learning_rate": 0.00024388091047883312, "loss": 3.6342, "step": 2882 }, { "epoch": 0.89, "learning_rate": 0.0002438452814195959, "loss": 3.7049, "step": 2883 }, { "epoch": 0.89, "learning_rate": 0.00024380965236035868, "loss": 4.0264, "step": 2884 }, { "epoch": 0.89, "learning_rate": 0.00024377402330112144, "loss": 3.8285, "step": 2885 }, { "epoch": 0.89, "learning_rate": 0.0002437383942418842, "loss": 3.6939, "step": 2886 }, { "epoch": 0.89, "learning_rate": 0.000243702765182647, "loss": 3.4416, "step": 2887 }, { "epoch": 0.89, "learning_rate": 0.00024366713612340975, "loss": 3.5991, "step": 2888 }, { "epoch": 0.89, "learning_rate": 0.0002436315070641725, "loss": 3.426, "step": 2889 }, { "epoch": 0.89, "learning_rate": 0.0002435958780049353, "loss": 3.3307, "step": 2890 }, { "epoch": 0.89, "learning_rate": 0.00024356024894569805, "loss": 3.3516, "step": 2891 }, { "epoch": 0.89, "learning_rate": 0.00024352461988646086, "loss": 3.2901, "step": 2892 }, { "epoch": 0.89, "learning_rate": 0.0002434889908272236, "loss": 3.1694, "step": 2893 }, { "epoch": 0.89, "learning_rate": 0.00024345336176798636, "loss": 3.0758, "step": 2894 }, { "epoch": 0.89, "learning_rate": 0.00024341773270874915, "loss": 3.2264, "step": 2895 }, { "epoch": 0.89, "learning_rate": 0.0002433821036495119, "loss": 2.8788, "step": 2896 }, { "epoch": 0.89, "learning_rate": 0.0002433464745902747, "loss": 2.651, "step": 2897 }, { "epoch": 0.89, "learning_rate": 0.00024331084553103747, "loss": 2.6848, "step": 2898 }, { "epoch": 0.89, "learning_rate": 0.00024327521647180022, "loss": 2.4058, "step": 2899 }, { "epoch": 0.89, "learning_rate": 0.000243239587412563, "loss": 2.3926, "step": 2900 }, { "epoch": 0.9, "learning_rate": 0.00024320395835332576, "loss": 5.1485, "step": 2901 }, { "epoch": 0.9, "learning_rate": 0.00024316832929408854, "loss": 4.9246, "step": 2902 }, { "epoch": 0.9, "learning_rate": 0.00024313270023485132, "loss": 4.928, "step": 2903 }, { "epoch": 0.9, "learning_rate": 0.00024309707117561408, "loss": 4.4354, "step": 2904 }, { "epoch": 0.9, "learning_rate": 0.00024306144211637686, "loss": 4.2858, "step": 2905 }, { "epoch": 0.9, "learning_rate": 0.00024302581305713964, "loss": 4.5205, "step": 2906 }, { "epoch": 0.9, "learning_rate": 0.0002429901839979024, "loss": 4.2951, "step": 2907 }, { "epoch": 0.9, "learning_rate": 0.00024295455493866518, "loss": 4.4189, "step": 2908 }, { "epoch": 0.9, "learning_rate": 0.00024291892587942793, "loss": 4.3068, "step": 2909 }, { "epoch": 0.9, "learning_rate": 0.00024288329682019068, "loss": 4.3974, "step": 2910 }, { "epoch": 0.9, "learning_rate": 0.0002428476677609535, "loss": 4.1441, "step": 2911 }, { "epoch": 0.9, "learning_rate": 0.00024281203870171625, "loss": 4.2917, "step": 2912 }, { "epoch": 0.9, "learning_rate": 0.00024277640964247903, "loss": 4.1983, "step": 2913 }, { "epoch": 0.9, "learning_rate": 0.00024274078058324179, "loss": 3.9089, "step": 2914 }, { "epoch": 0.9, "learning_rate": 0.00024270515152400454, "loss": 4.1418, "step": 2915 }, { "epoch": 0.9, "learning_rate": 0.00024266952246476735, "loss": 4.0739, "step": 2916 }, { "epoch": 0.9, "learning_rate": 0.0002426338934055301, "loss": 4.0071, "step": 2917 }, { "epoch": 0.9, "learning_rate": 0.00024259826434629289, "loss": 4.0607, "step": 2918 }, { "epoch": 0.9, "learning_rate": 0.00024256263528705564, "loss": 4.0089, "step": 2919 }, { "epoch": 0.9, "learning_rate": 0.0002425270062278184, "loss": 4.0633, "step": 2920 }, { "epoch": 0.9, "learning_rate": 0.0002424913771685812, "loss": 3.916, "step": 2921 }, { "epoch": 0.9, "learning_rate": 0.00024245574810934396, "loss": 4.0718, "step": 2922 }, { "epoch": 0.9, "learning_rate": 0.00024242011905010671, "loss": 3.8374, "step": 2923 }, { "epoch": 0.9, "learning_rate": 0.0002423844899908695, "loss": 3.9778, "step": 2924 }, { "epoch": 0.9, "learning_rate": 0.00024234886093163225, "loss": 3.9684, "step": 2925 }, { "epoch": 0.9, "learning_rate": 0.00024231323187239506, "loss": 3.8807, "step": 2926 }, { "epoch": 0.9, "learning_rate": 0.00024227760281315781, "loss": 4.0067, "step": 2927 }, { "epoch": 0.9, "learning_rate": 0.00024224197375392057, "loss": 3.9332, "step": 2928 }, { "epoch": 0.9, "learning_rate": 0.00024220634469468335, "loss": 3.8465, "step": 2929 }, { "epoch": 0.9, "learning_rate": 0.00024217071563544613, "loss": 3.9522, "step": 2930 }, { "epoch": 0.9, "learning_rate": 0.00024213508657620891, "loss": 3.6859, "step": 2931 }, { "epoch": 0.9, "learning_rate": 0.00024209945751697167, "loss": 3.9702, "step": 2932 }, { "epoch": 0.91, "learning_rate": 0.00024206382845773442, "loss": 3.6527, "step": 2933 }, { "epoch": 0.91, "learning_rate": 0.00024202819939849723, "loss": 3.4784, "step": 2934 }, { "epoch": 0.91, "learning_rate": 0.00024199257033926, "loss": 3.6105, "step": 2935 }, { "epoch": 0.91, "learning_rate": 0.00024195694128002274, "loss": 3.6, "step": 2936 }, { "epoch": 0.91, "learning_rate": 0.00024192131222078552, "loss": 3.4741, "step": 2937 }, { "epoch": 0.91, "learning_rate": 0.00024188568316154828, "loss": 3.4962, "step": 2938 }, { "epoch": 0.91, "learning_rate": 0.0002418500541023111, "loss": 3.2246, "step": 2939 }, { "epoch": 0.91, "learning_rate": 0.00024181442504307384, "loss": 3.5243, "step": 2940 }, { "epoch": 0.91, "learning_rate": 0.0002417787959838366, "loss": 3.4914, "step": 2941 }, { "epoch": 0.91, "learning_rate": 0.00024174316692459938, "loss": 3.217, "step": 2942 }, { "epoch": 0.91, "learning_rate": 0.00024170753786536213, "loss": 3.3969, "step": 2943 }, { "epoch": 0.91, "learning_rate": 0.00024167190880612494, "loss": 3.1569, "step": 2944 }, { "epoch": 0.91, "learning_rate": 0.0002416362797468877, "loss": 3.0429, "step": 2945 }, { "epoch": 0.91, "learning_rate": 0.00024160065068765045, "loss": 2.9023, "step": 2946 }, { "epoch": 0.91, "learning_rate": 0.00024156502162841323, "loss": 2.8648, "step": 2947 }, { "epoch": 0.91, "learning_rate": 0.000241529392569176, "loss": 2.6455, "step": 2948 }, { "epoch": 0.91, "learning_rate": 0.00024149376350993877, "loss": 2.4866, "step": 2949 }, { "epoch": 0.91, "learning_rate": 0.00024145813445070155, "loss": 2.3031, "step": 2950 }, { "epoch": 0.91, "learning_rate": 0.0002414225053914643, "loss": 5.1586, "step": 2951 }, { "epoch": 0.91, "learning_rate": 0.0002413868763322271, "loss": 4.8623, "step": 2952 }, { "epoch": 0.91, "learning_rate": 0.00024135124727298987, "loss": 4.5682, "step": 2953 }, { "epoch": 0.91, "learning_rate": 0.00024131561821375263, "loss": 4.5075, "step": 2954 }, { "epoch": 0.91, "learning_rate": 0.0002412799891545154, "loss": 4.2716, "step": 2955 }, { "epoch": 0.91, "learning_rate": 0.00024124436009527816, "loss": 4.3472, "step": 2956 }, { "epoch": 0.91, "learning_rate": 0.00024120873103604092, "loss": 4.2731, "step": 2957 }, { "epoch": 0.91, "learning_rate": 0.00024117310197680373, "loss": 4.435, "step": 2958 }, { "epoch": 0.91, "learning_rate": 0.00024113747291756648, "loss": 4.1926, "step": 2959 }, { "epoch": 0.91, "learning_rate": 0.00024110184385832926, "loss": 4.2232, "step": 2960 }, { "epoch": 0.91, "learning_rate": 0.00024106621479909202, "loss": 4.2754, "step": 2961 }, { "epoch": 0.91, "learning_rate": 0.00024103058573985477, "loss": 4.2501, "step": 2962 }, { "epoch": 0.91, "learning_rate": 0.00024099495668061758, "loss": 3.9383, "step": 2963 }, { "epoch": 0.91, "learning_rate": 0.00024095932762138034, "loss": 3.8235, "step": 2964 }, { "epoch": 0.92, "learning_rate": 0.00024092369856214312, "loss": 4.0403, "step": 2965 }, { "epoch": 0.92, "learning_rate": 0.00024088806950290587, "loss": 4.1881, "step": 2966 }, { "epoch": 0.92, "learning_rate": 0.00024085244044366863, "loss": 3.8996, "step": 2967 }, { "epoch": 0.92, "learning_rate": 0.00024081681138443144, "loss": 4.2, "step": 2968 }, { "epoch": 0.92, "learning_rate": 0.0002407811823251942, "loss": 4.1397, "step": 2969 }, { "epoch": 0.92, "learning_rate": 0.00024074555326595695, "loss": 4.059, "step": 2970 }, { "epoch": 0.92, "learning_rate": 0.00024070992420671973, "loss": 4.17, "step": 2971 }, { "epoch": 0.92, "learning_rate": 0.00024067429514748248, "loss": 4.1393, "step": 2972 }, { "epoch": 0.92, "learning_rate": 0.0002406386660882453, "loss": 4.0688, "step": 2973 }, { "epoch": 0.92, "learning_rate": 0.00024060303702900805, "loss": 3.9898, "step": 2974 }, { "epoch": 0.92, "learning_rate": 0.0002405674079697708, "loss": 3.8472, "step": 2975 }, { "epoch": 0.92, "learning_rate": 0.00024053177891053358, "loss": 3.8999, "step": 2976 }, { "epoch": 0.92, "learning_rate": 0.00024049614985129637, "loss": 3.7652, "step": 2977 }, { "epoch": 0.92, "learning_rate": 0.00024046052079205915, "loss": 3.9337, "step": 2978 }, { "epoch": 0.92, "learning_rate": 0.0002404248917328219, "loss": 3.7308, "step": 2979 }, { "epoch": 0.92, "learning_rate": 0.00024038926267358466, "loss": 3.5359, "step": 2980 }, { "epoch": 0.92, "learning_rate": 0.00024035363361434747, "loss": 3.5994, "step": 2981 }, { "epoch": 0.92, "learning_rate": 0.00024031800455511022, "loss": 3.77, "step": 2982 }, { "epoch": 0.92, "learning_rate": 0.00024028237549587298, "loss": 3.5036, "step": 2983 }, { "epoch": 0.92, "learning_rate": 0.00024024674643663576, "loss": 3.8905, "step": 2984 }, { "epoch": 0.92, "learning_rate": 0.0002402111173773985, "loss": 3.5129, "step": 2985 }, { "epoch": 0.92, "learning_rate": 0.00024017548831816132, "loss": 3.6096, "step": 2986 }, { "epoch": 0.92, "learning_rate": 0.00024013985925892408, "loss": 3.4196, "step": 2987 }, { "epoch": 0.92, "learning_rate": 0.00024010423019968683, "loss": 3.5883, "step": 2988 }, { "epoch": 0.92, "learning_rate": 0.0002400686011404496, "loss": 3.3895, "step": 2989 }, { "epoch": 0.92, "learning_rate": 0.00024003297208121237, "loss": 3.1491, "step": 2990 }, { "epoch": 0.92, "learning_rate": 0.00023999734302197512, "loss": 3.0395, "step": 2991 }, { "epoch": 0.92, "learning_rate": 0.00023996171396273793, "loss": 3.311, "step": 2992 }, { "epoch": 0.92, "learning_rate": 0.00023992608490350069, "loss": 3.1326, "step": 2993 }, { "epoch": 0.92, "learning_rate": 0.00023989045584426347, "loss": 3.2458, "step": 2994 }, { "epoch": 0.92, "learning_rate": 0.00023985482678502622, "loss": 3.1501, "step": 2995 }, { "epoch": 0.92, "learning_rate": 0.000239819197725789, "loss": 3.0105, "step": 2996 }, { "epoch": 0.92, "learning_rate": 0.0002397835686665518, "loss": 2.6193, "step": 2997 }, { "epoch": 0.93, "learning_rate": 0.00023974793960731454, "loss": 2.7533, "step": 2998 }, { "epoch": 0.93, "learning_rate": 0.00023971231054807732, "loss": 2.8436, "step": 2999 }, { "epoch": 0.93, "learning_rate": 0.0002396766814888401, "loss": 2.5174, "step": 3000 }, { "epoch": 0.93, "eval_bleu": 0.0, "eval_loss": 4.649660110473633, "eval_runtime": 2573.2938, "eval_samples_per_second": 5.736, "eval_steps_per_second": 0.717, "step": 3000 }, { "epoch": 0.93, "learning_rate": 0.00023964105242960286, "loss": 5.1895, "step": 3001 }, { "epoch": 0.93, "learning_rate": 0.00023960542337036564, "loss": 5.0417, "step": 3002 }, { "epoch": 0.93, "learning_rate": 0.0002395697943111284, "loss": 4.9672, "step": 3003 }, { "epoch": 0.93, "learning_rate": 0.00023953416525189115, "loss": 4.8772, "step": 3004 }, { "epoch": 0.93, "learning_rate": 0.00023949853619265396, "loss": 4.5442, "step": 3005 }, { "epoch": 0.93, "learning_rate": 0.00023946290713341671, "loss": 4.2894, "step": 3006 }, { "epoch": 0.93, "learning_rate": 0.0002394272780741795, "loss": 4.2901, "step": 3007 }, { "epoch": 0.93, "learning_rate": 0.00023939164901494225, "loss": 4.3114, "step": 3008 }, { "epoch": 0.93, "learning_rate": 0.000239356019955705, "loss": 4.3435, "step": 3009 }, { "epoch": 0.93, "learning_rate": 0.00023932039089646782, "loss": 4.1441, "step": 3010 }, { "epoch": 0.93, "learning_rate": 0.00023928476183723057, "loss": 4.2567, "step": 3011 }, { "epoch": 0.93, "learning_rate": 0.00023924913277799335, "loss": 4.0212, "step": 3012 }, { "epoch": 0.93, "learning_rate": 0.0002392135037187561, "loss": 4.1907, "step": 3013 }, { "epoch": 0.93, "learning_rate": 0.00023917787465951886, "loss": 4.0466, "step": 3014 }, { "epoch": 0.93, "learning_rate": 0.00023914224560028167, "loss": 3.9375, "step": 3015 }, { "epoch": 0.93, "learning_rate": 0.00023910661654104443, "loss": 4.3079, "step": 3016 }, { "epoch": 0.93, "learning_rate": 0.00023907098748180718, "loss": 3.9988, "step": 3017 }, { "epoch": 0.93, "learning_rate": 0.00023903535842256996, "loss": 4.2982, "step": 3018 }, { "epoch": 0.93, "learning_rate": 0.00023899972936333272, "loss": 4.203, "step": 3019 }, { "epoch": 0.93, "learning_rate": 0.00023896410030409553, "loss": 3.9713, "step": 3020 }, { "epoch": 0.93, "learning_rate": 0.00023892847124485828, "loss": 3.9173, "step": 3021 }, { "epoch": 0.93, "learning_rate": 0.00023889284218562104, "loss": 4.0286, "step": 3022 }, { "epoch": 0.93, "learning_rate": 0.00023885721312638382, "loss": 3.9564, "step": 3023 }, { "epoch": 0.93, "learning_rate": 0.0002388215840671466, "loss": 4.1124, "step": 3024 }, { "epoch": 0.93, "learning_rate": 0.00023878595500790938, "loss": 3.9557, "step": 3025 }, { "epoch": 0.93, "learning_rate": 0.00023875032594867214, "loss": 4.0339, "step": 3026 }, { "epoch": 0.93, "learning_rate": 0.0002387146968894349, "loss": 3.8846, "step": 3027 }, { "epoch": 0.93, "learning_rate": 0.0002386790678301977, "loss": 3.9659, "step": 3028 }, { "epoch": 0.93, "learning_rate": 0.00023864343877096045, "loss": 4.0218, "step": 3029 }, { "epoch": 0.94, "learning_rate": 0.0002386078097117232, "loss": 3.7102, "step": 3030 }, { "epoch": 0.94, "learning_rate": 0.000238572180652486, "loss": 3.7506, "step": 3031 }, { "epoch": 0.94, "learning_rate": 0.00023853655159324875, "loss": 3.8144, "step": 3032 }, { "epoch": 0.94, "learning_rate": 0.00023850092253401155, "loss": 3.4837, "step": 3033 }, { "epoch": 0.94, "learning_rate": 0.0002384652934747743, "loss": 3.513, "step": 3034 }, { "epoch": 0.94, "learning_rate": 0.00023842966441553706, "loss": 3.5956, "step": 3035 }, { "epoch": 0.94, "learning_rate": 0.00023839403535629985, "loss": 3.565, "step": 3036 }, { "epoch": 0.94, "learning_rate": 0.0002383584062970626, "loss": 3.7052, "step": 3037 }, { "epoch": 0.94, "learning_rate": 0.00023832277723782536, "loss": 3.7947, "step": 3038 }, { "epoch": 0.94, "learning_rate": 0.00023828714817858816, "loss": 3.261, "step": 3039 }, { "epoch": 0.94, "learning_rate": 0.00023825151911935092, "loss": 3.2484, "step": 3040 }, { "epoch": 0.94, "learning_rate": 0.0002382158900601137, "loss": 3.5019, "step": 3041 }, { "epoch": 0.94, "learning_rate": 0.00023818026100087646, "loss": 3.2125, "step": 3042 }, { "epoch": 0.94, "learning_rate": 0.00023814463194163924, "loss": 3.0068, "step": 3043 }, { "epoch": 0.94, "learning_rate": 0.00023810900288240202, "loss": 2.9614, "step": 3044 }, { "epoch": 0.94, "learning_rate": 0.00023807337382316477, "loss": 3.0928, "step": 3045 }, { "epoch": 0.94, "learning_rate": 0.00023803774476392756, "loss": 2.995, "step": 3046 }, { "epoch": 0.94, "learning_rate": 0.00023800211570469034, "loss": 2.7878, "step": 3047 }, { "epoch": 0.94, "learning_rate": 0.0002379664866454531, "loss": 2.6381, "step": 3048 }, { "epoch": 0.94, "learning_rate": 0.00023793085758621587, "loss": 2.4016, "step": 3049 }, { "epoch": 0.94, "learning_rate": 0.00023789522852697863, "loss": 2.4339, "step": 3050 }, { "epoch": 0.94, "learning_rate": 0.00023785959946774138, "loss": 5.1597, "step": 3051 }, { "epoch": 0.94, "learning_rate": 0.0002378239704085042, "loss": 5.0598, "step": 3052 }, { "epoch": 0.94, "learning_rate": 0.00023778834134926695, "loss": 4.6554, "step": 3053 }, { "epoch": 0.94, "learning_rate": 0.00023775271229002973, "loss": 4.4459, "step": 3054 }, { "epoch": 0.94, "learning_rate": 0.00023771708323079248, "loss": 4.3191, "step": 3055 }, { "epoch": 0.94, "learning_rate": 0.00023768145417155524, "loss": 4.2706, "step": 3056 }, { "epoch": 0.94, "learning_rate": 0.00023764582511231805, "loss": 4.4143, "step": 3057 }, { "epoch": 0.94, "learning_rate": 0.0002376101960530808, "loss": 4.5541, "step": 3058 }, { "epoch": 0.94, "learning_rate": 0.00023757456699384359, "loss": 4.1362, "step": 3059 }, { "epoch": 0.94, "learning_rate": 0.00023753893793460634, "loss": 4.4401, "step": 3060 }, { "epoch": 0.94, "learning_rate": 0.0002375033088753691, "loss": 4.2569, "step": 3061 }, { "epoch": 0.94, "learning_rate": 0.0002374676798161319, "loss": 4.2379, "step": 3062 }, { "epoch": 0.95, "learning_rate": 0.00023743205075689466, "loss": 4.1407, "step": 3063 }, { "epoch": 0.95, "learning_rate": 0.0002373964216976574, "loss": 4.0396, "step": 3064 }, { "epoch": 0.95, "learning_rate": 0.0002373607926384202, "loss": 3.9111, "step": 3065 }, { "epoch": 0.95, "learning_rate": 0.00023732516357918295, "loss": 3.85, "step": 3066 }, { "epoch": 0.95, "learning_rate": 0.00023728953451994576, "loss": 3.9206, "step": 3067 }, { "epoch": 0.95, "learning_rate": 0.00023725390546070851, "loss": 4.0397, "step": 3068 }, { "epoch": 0.95, "learning_rate": 0.00023721827640147127, "loss": 3.9172, "step": 3069 }, { "epoch": 0.95, "learning_rate": 0.00023718264734223405, "loss": 4.1089, "step": 3070 }, { "epoch": 0.95, "learning_rate": 0.00023714701828299683, "loss": 4.2583, "step": 3071 }, { "epoch": 0.95, "learning_rate": 0.0002371113892237596, "loss": 4.0153, "step": 3072 }, { "epoch": 0.95, "learning_rate": 0.00023707576016452237, "loss": 3.9331, "step": 3073 }, { "epoch": 0.95, "learning_rate": 0.00023704013110528512, "loss": 3.9203, "step": 3074 }, { "epoch": 0.95, "learning_rate": 0.00023700450204604793, "loss": 4.0776, "step": 3075 }, { "epoch": 0.95, "learning_rate": 0.0002369688729868107, "loss": 3.8417, "step": 3076 }, { "epoch": 0.95, "learning_rate": 0.00023693324392757344, "loss": 3.9044, "step": 3077 }, { "epoch": 0.95, "learning_rate": 0.00023689761486833622, "loss": 3.8752, "step": 3078 }, { "epoch": 0.95, "learning_rate": 0.00023686198580909898, "loss": 3.7261, "step": 3079 }, { "epoch": 0.95, "learning_rate": 0.0002368263567498618, "loss": 3.9201, "step": 3080 }, { "epoch": 0.95, "learning_rate": 0.00023679072769062454, "loss": 3.8859, "step": 3081 }, { "epoch": 0.95, "learning_rate": 0.0002367550986313873, "loss": 3.8958, "step": 3082 }, { "epoch": 0.95, "learning_rate": 0.00023671946957215008, "loss": 3.8624, "step": 3083 }, { "epoch": 0.95, "learning_rate": 0.00023668384051291283, "loss": 3.7008, "step": 3084 }, { "epoch": 0.95, "learning_rate": 0.0002366482114536756, "loss": 3.4738, "step": 3085 }, { "epoch": 0.95, "learning_rate": 0.0002366125823944384, "loss": 3.6188, "step": 3086 }, { "epoch": 0.95, "learning_rate": 0.00023657695333520115, "loss": 3.5793, "step": 3087 }, { "epoch": 0.95, "learning_rate": 0.00023654132427596393, "loss": 3.4939, "step": 3088 }, { "epoch": 0.95, "learning_rate": 0.0002365056952167267, "loss": 3.1608, "step": 3089 }, { "epoch": 0.95, "learning_rate": 0.00023647006615748947, "loss": 3.2663, "step": 3090 }, { "epoch": 0.95, "learning_rate": 0.00023643443709825225, "loss": 3.3071, "step": 3091 }, { "epoch": 0.95, "learning_rate": 0.000236398808039015, "loss": 3.2175, "step": 3092 }, { "epoch": 0.95, "learning_rate": 0.0002363631789797778, "loss": 3.1249, "step": 3093 }, { "epoch": 0.95, "learning_rate": 0.00023632754992054057, "loss": 3.1137, "step": 3094 }, { "epoch": 0.96, "learning_rate": 0.00023629192086130333, "loss": 3.0907, "step": 3095 }, { "epoch": 0.96, "learning_rate": 0.0002362562918020661, "loss": 3.0262, "step": 3096 }, { "epoch": 0.96, "learning_rate": 0.00023622066274282886, "loss": 2.6885, "step": 3097 }, { "epoch": 0.96, "learning_rate": 0.00023618503368359162, "loss": 2.6656, "step": 3098 }, { "epoch": 0.96, "learning_rate": 0.00023614940462435443, "loss": 2.5574, "step": 3099 }, { "epoch": 0.96, "learning_rate": 0.00023611377556511718, "loss": 2.3727, "step": 3100 }, { "epoch": 0.96, "learning_rate": 0.00023607814650587996, "loss": 5.2261, "step": 3101 }, { "epoch": 0.96, "learning_rate": 0.00023604251744664272, "loss": 4.9689, "step": 3102 }, { "epoch": 0.96, "learning_rate": 0.00023600688838740547, "loss": 4.798, "step": 3103 }, { "epoch": 0.96, "learning_rate": 0.00023597125932816828, "loss": 4.779, "step": 3104 }, { "epoch": 0.96, "learning_rate": 0.00023593563026893104, "loss": 4.7496, "step": 3105 }, { "epoch": 0.96, "learning_rate": 0.0002359000012096938, "loss": 4.5507, "step": 3106 }, { "epoch": 0.96, "learning_rate": 0.00023586437215045657, "loss": 4.372, "step": 3107 }, { "epoch": 0.96, "learning_rate": 0.00023582874309121933, "loss": 4.1717, "step": 3108 }, { "epoch": 0.96, "learning_rate": 0.00023579311403198214, "loss": 4.2175, "step": 3109 }, { "epoch": 0.96, "learning_rate": 0.0002357574849727449, "loss": 3.9289, "step": 3110 }, { "epoch": 0.96, "learning_rate": 0.00023572185591350765, "loss": 4.1815, "step": 3111 }, { "epoch": 0.96, "learning_rate": 0.00023568622685427043, "loss": 4.0538, "step": 3112 }, { "epoch": 0.96, "learning_rate": 0.00023565059779503318, "loss": 4.1981, "step": 3113 }, { "epoch": 0.96, "learning_rate": 0.000235614968735796, "loss": 4.1601, "step": 3114 }, { "epoch": 0.96, "learning_rate": 0.00023557933967655875, "loss": 3.969, "step": 3115 }, { "epoch": 0.96, "learning_rate": 0.0002355437106173215, "loss": 4.1585, "step": 3116 }, { "epoch": 0.96, "learning_rate": 0.0002355080815580843, "loss": 4.1056, "step": 3117 }, { "epoch": 0.96, "learning_rate": 0.00023547245249884707, "loss": 4.1191, "step": 3118 }, { "epoch": 0.96, "learning_rate": 0.00023543682343960982, "loss": 3.9015, "step": 3119 }, { "epoch": 0.96, "learning_rate": 0.0002354011943803726, "loss": 4.2587, "step": 3120 }, { "epoch": 0.96, "learning_rate": 0.00023536556532113536, "loss": 3.9025, "step": 3121 }, { "epoch": 0.96, "learning_rate": 0.00023532993626189817, "loss": 3.8839, "step": 3122 }, { "epoch": 0.96, "learning_rate": 0.00023529430720266092, "loss": 3.9939, "step": 3123 }, { "epoch": 0.96, "learning_rate": 0.00023525867814342368, "loss": 3.9057, "step": 3124 }, { "epoch": 0.96, "learning_rate": 0.00023522304908418646, "loss": 3.8479, "step": 3125 }, { "epoch": 0.96, "learning_rate": 0.0002351874200249492, "loss": 4.1093, "step": 3126 }, { "epoch": 0.97, "learning_rate": 0.00023515179096571202, "loss": 3.9502, "step": 3127 }, { "epoch": 0.97, "learning_rate": 0.00023511616190647478, "loss": 3.8979, "step": 3128 }, { "epoch": 0.97, "learning_rate": 0.00023508053284723753, "loss": 3.9489, "step": 3129 }, { "epoch": 0.97, "learning_rate": 0.0002350449037880003, "loss": 3.6262, "step": 3130 }, { "epoch": 0.97, "learning_rate": 0.00023500927472876307, "loss": 3.7309, "step": 3131 }, { "epoch": 0.97, "learning_rate": 0.00023497364566952582, "loss": 3.6783, "step": 3132 }, { "epoch": 0.97, "learning_rate": 0.00023493801661028863, "loss": 3.5578, "step": 3133 }, { "epoch": 0.97, "learning_rate": 0.00023490238755105139, "loss": 3.8483, "step": 3134 }, { "epoch": 0.97, "learning_rate": 0.00023486675849181417, "loss": 3.5235, "step": 3135 }, { "epoch": 0.97, "learning_rate": 0.00023483112943257692, "loss": 3.4825, "step": 3136 }, { "epoch": 0.97, "learning_rate": 0.0002347955003733397, "loss": 3.5824, "step": 3137 }, { "epoch": 0.97, "learning_rate": 0.00023475987131410249, "loss": 3.3131, "step": 3138 }, { "epoch": 0.97, "learning_rate": 0.00023472424225486524, "loss": 3.5068, "step": 3139 }, { "epoch": 0.97, "learning_rate": 0.00023468861319562802, "loss": 3.4612, "step": 3140 }, { "epoch": 0.97, "learning_rate": 0.0002346529841363908, "loss": 3.4827, "step": 3141 }, { "epoch": 0.97, "learning_rate": 0.00023461735507715356, "loss": 3.2186, "step": 3142 }, { "epoch": 0.97, "learning_rate": 0.00023458172601791634, "loss": 3.1641, "step": 3143 }, { "epoch": 0.97, "learning_rate": 0.0002345460969586791, "loss": 3.1822, "step": 3144 }, { "epoch": 0.97, "learning_rate": 0.00023451046789944185, "loss": 2.5353, "step": 3145 }, { "epoch": 0.97, "learning_rate": 0.00023447483884020466, "loss": 2.9143, "step": 3146 }, { "epoch": 0.97, "learning_rate": 0.00023443920978096741, "loss": 2.6803, "step": 3147 }, { "epoch": 0.97, "learning_rate": 0.0002344035807217302, "loss": 2.5106, "step": 3148 }, { "epoch": 0.97, "learning_rate": 0.00023436795166249295, "loss": 2.3812, "step": 3149 }, { "epoch": 0.97, "learning_rate": 0.0002343323226032557, "loss": 2.4964, "step": 3150 }, { "epoch": 0.97, "learning_rate": 0.00023429669354401851, "loss": 5.3176, "step": 3151 }, { "epoch": 0.97, "learning_rate": 0.00023426106448478127, "loss": 4.8833, "step": 3152 }, { "epoch": 0.97, "learning_rate": 0.00023422543542554402, "loss": 4.6634, "step": 3153 }, { "epoch": 0.97, "learning_rate": 0.0002341898063663068, "loss": 4.4617, "step": 3154 }, { "epoch": 0.97, "learning_rate": 0.00023415417730706956, "loss": 4.2766, "step": 3155 }, { "epoch": 0.97, "learning_rate": 0.00023411854824783237, "loss": 4.5698, "step": 3156 }, { "epoch": 0.97, "learning_rate": 0.00023408291918859512, "loss": 4.1772, "step": 3157 }, { "epoch": 0.97, "learning_rate": 0.00023404729012935788, "loss": 4.3714, "step": 3158 }, { "epoch": 0.97, "learning_rate": 0.00023401166107012066, "loss": 4.4489, "step": 3159 }, { "epoch": 0.98, "learning_rate": 0.00023397603201088342, "loss": 4.196, "step": 3160 }, { "epoch": 0.98, "learning_rate": 0.00023394040295164623, "loss": 4.5322, "step": 3161 }, { "epoch": 0.98, "learning_rate": 0.00023390477389240898, "loss": 4.2698, "step": 3162 }, { "epoch": 0.98, "learning_rate": 0.00023386914483317173, "loss": 4.2684, "step": 3163 }, { "epoch": 0.98, "learning_rate": 0.00023383351577393454, "loss": 4.0842, "step": 3164 }, { "epoch": 0.98, "learning_rate": 0.0002337978867146973, "loss": 4.2798, "step": 3165 }, { "epoch": 0.98, "learning_rate": 0.00023376225765546005, "loss": 3.998, "step": 3166 }, { "epoch": 0.98, "learning_rate": 0.00023372662859622284, "loss": 4.0314, "step": 3167 }, { "epoch": 0.98, "learning_rate": 0.0002336909995369856, "loss": 3.9714, "step": 3168 }, { "epoch": 0.98, "learning_rate": 0.0002336553704777484, "loss": 3.9638, "step": 3169 }, { "epoch": 0.98, "learning_rate": 0.00023361974141851115, "loss": 4.1141, "step": 3170 }, { "epoch": 0.98, "learning_rate": 0.0002335841123592739, "loss": 3.9792, "step": 3171 }, { "epoch": 0.98, "learning_rate": 0.0002335484833000367, "loss": 3.7714, "step": 3172 }, { "epoch": 0.98, "learning_rate": 0.00023351285424079945, "loss": 3.8314, "step": 3173 }, { "epoch": 0.98, "learning_rate": 0.00023347722518156225, "loss": 3.8603, "step": 3174 }, { "epoch": 0.98, "learning_rate": 0.000233441596122325, "loss": 3.8265, "step": 3175 }, { "epoch": 0.98, "learning_rate": 0.00023340596706308776, "loss": 3.8483, "step": 3176 }, { "epoch": 0.98, "learning_rate": 0.00023337033800385055, "loss": 3.7276, "step": 3177 }, { "epoch": 0.98, "learning_rate": 0.0002333347089446133, "loss": 3.8043, "step": 3178 }, { "epoch": 0.98, "learning_rate": 0.00023329907988537605, "loss": 3.6952, "step": 3179 }, { "epoch": 0.98, "learning_rate": 0.00023326345082613886, "loss": 3.4846, "step": 3180 }, { "epoch": 0.98, "learning_rate": 0.00023322782176690162, "loss": 3.553, "step": 3181 }, { "epoch": 0.98, "learning_rate": 0.0002331921927076644, "loss": 3.6315, "step": 3182 }, { "epoch": 0.98, "learning_rate": 0.00023315656364842716, "loss": 3.5619, "step": 3183 }, { "epoch": 0.98, "learning_rate": 0.00023312093458918994, "loss": 3.771, "step": 3184 }, { "epoch": 0.98, "learning_rate": 0.00023308530552995272, "loss": 3.3283, "step": 3185 }, { "epoch": 0.98, "learning_rate": 0.00023304967647071547, "loss": 3.6697, "step": 3186 }, { "epoch": 0.98, "learning_rate": 0.00023301404741147823, "loss": 3.4642, "step": 3187 }, { "epoch": 0.98, "learning_rate": 0.00023297841835224104, "loss": 3.4893, "step": 3188 }, { "epoch": 0.98, "learning_rate": 0.0002329427892930038, "loss": 3.3099, "step": 3189 }, { "epoch": 0.98, "learning_rate": 0.00023290716023376657, "loss": 3.2631, "step": 3190 }, { "epoch": 0.98, "learning_rate": 0.00023287153117452933, "loss": 3.2937, "step": 3191 }, { "epoch": 0.99, "learning_rate": 0.00023283590211529208, "loss": 3.3957, "step": 3192 }, { "epoch": 0.99, "learning_rate": 0.0002328002730560549, "loss": 3.3246, "step": 3193 }, { "epoch": 0.99, "learning_rate": 0.00023276464399681765, "loss": 3.1247, "step": 3194 }, { "epoch": 0.99, "learning_rate": 0.00023272901493758043, "loss": 3.1037, "step": 3195 }, { "epoch": 0.99, "learning_rate": 0.00023269338587834318, "loss": 2.5896, "step": 3196 }, { "epoch": 0.99, "learning_rate": 0.00023265775681910594, "loss": 2.7472, "step": 3197 }, { "epoch": 0.99, "learning_rate": 0.00023262212775986875, "loss": 2.6087, "step": 3198 }, { "epoch": 0.99, "learning_rate": 0.0002325864987006315, "loss": 2.4268, "step": 3199 }, { "epoch": 0.99, "learning_rate": 0.00023255086964139426, "loss": 2.5054, "step": 3200 }, { "epoch": 0.99, "learning_rate": 0.00023251524058215704, "loss": 5.2924, "step": 3201 }, { "epoch": 0.99, "learning_rate": 0.0002324796115229198, "loss": 5.0185, "step": 3202 }, { "epoch": 0.99, "learning_rate": 0.0002324439824636826, "loss": 4.5099, "step": 3203 }, { "epoch": 0.99, "learning_rate": 0.00023240835340444536, "loss": 4.4024, "step": 3204 }, { "epoch": 0.99, "learning_rate": 0.0002323727243452081, "loss": 4.2863, "step": 3205 }, { "epoch": 0.99, "learning_rate": 0.0002323370952859709, "loss": 4.4508, "step": 3206 }, { "epoch": 0.99, "learning_rate": 0.00023230146622673368, "loss": 4.2674, "step": 3207 }, { "epoch": 0.99, "learning_rate": 0.00023226583716749646, "loss": 4.3007, "step": 3208 }, { "epoch": 0.99, "learning_rate": 0.0002322302081082592, "loss": 4.2449, "step": 3209 }, { "epoch": 0.99, "learning_rate": 0.00023219457904902197, "loss": 4.1617, "step": 3210 }, { "epoch": 0.99, "learning_rate": 0.00023215894998978478, "loss": 4.285, "step": 3211 }, { "epoch": 0.99, "learning_rate": 0.00023212332093054753, "loss": 3.9879, "step": 3212 }, { "epoch": 0.99, "learning_rate": 0.00023208769187131029, "loss": 3.9902, "step": 3213 }, { "epoch": 0.99, "learning_rate": 0.00023205206281207307, "loss": 3.9019, "step": 3214 }, { "epoch": 0.99, "learning_rate": 0.00023201643375283582, "loss": 3.9673, "step": 3215 }, { "epoch": 0.99, "learning_rate": 0.00023198080469359863, "loss": 4.1152, "step": 3216 }, { "epoch": 0.99, "learning_rate": 0.0002319451756343614, "loss": 3.9981, "step": 3217 }, { "epoch": 0.99, "learning_rate": 0.00023190954657512414, "loss": 4.0401, "step": 3218 }, { "epoch": 0.99, "learning_rate": 0.00023187391751588692, "loss": 3.907, "step": 3219 }, { "epoch": 0.99, "learning_rate": 0.00023183828845664968, "loss": 3.9421, "step": 3220 }, { "epoch": 0.99, "learning_rate": 0.0002318026593974125, "loss": 3.6716, "step": 3221 }, { "epoch": 0.99, "learning_rate": 0.00023176703033817524, "loss": 3.973, "step": 3222 }, { "epoch": 0.99, "learning_rate": 0.000231731401278938, "loss": 4.0262, "step": 3223 }, { "epoch": 0.99, "learning_rate": 0.00023169577221970078, "loss": 3.8582, "step": 3224 }, { "epoch": 1.0, "learning_rate": 0.00023166014316046353, "loss": 3.7747, "step": 3225 }, { "epoch": 1.0, "learning_rate": 0.0002316245141012263, "loss": 3.6881, "step": 3226 }, { "epoch": 1.0, "learning_rate": 0.0002315888850419891, "loss": 3.6077, "step": 3227 }, { "epoch": 1.0, "learning_rate": 0.00023155325598275185, "loss": 3.6868, "step": 3228 }, { "epoch": 1.0, "learning_rate": 0.00023151762692351463, "loss": 3.5107, "step": 3229 }, { "epoch": 1.0, "learning_rate": 0.0002314819978642774, "loss": 3.5508, "step": 3230 }, { "epoch": 1.0, "learning_rate": 0.00023144636880504017, "loss": 3.4857, "step": 3231 }, { "epoch": 1.0, "learning_rate": 0.00023141073974580295, "loss": 3.3828, "step": 3232 }, { "epoch": 1.0, "learning_rate": 0.0002313751106865657, "loss": 3.2626, "step": 3233 }, { "epoch": 1.0, "learning_rate": 0.00023133948162732846, "loss": 3.1632, "step": 3234 }, { "epoch": 1.0, "learning_rate": 0.00023130385256809127, "loss": 3.2648, "step": 3235 }, { "epoch": 1.0, "learning_rate": 0.00023126822350885403, "loss": 3.0365, "step": 3236 }, { "epoch": 1.0, "learning_rate": 0.0002312325944496168, "loss": 2.6895, "step": 3237 }, { "epoch": 1.0, "learning_rate": 0.00023119696539037956, "loss": 2.5704, "step": 3238 }, { "epoch": 1.0, "learning_rate": 0.00023116133633114232, "loss": 2.684, "step": 3239 }, { "epoch": 1.0, "learning_rate": 0.00023112570727190513, "loss": 2.3827, "step": 3240 }, { "epoch": 1.0, "learning_rate": 0.00023109007821266788, "loss": 5.4764, "step": 3241 }, { "epoch": 1.0, "learning_rate": 0.00023105444915343066, "loss": 4.8597, "step": 3242 }, { "epoch": 1.0, "learning_rate": 0.00023101882009419342, "loss": 4.5493, "step": 3243 }, { "epoch": 1.0, "learning_rate": 0.00023098319103495617, "loss": 4.6225, "step": 3244 }, { "epoch": 1.0, "learning_rate": 0.00023094756197571898, "loss": 4.2237, "step": 3245 }, { "epoch": 1.0, "learning_rate": 0.00023091193291648174, "loss": 4.1768, "step": 3246 }, { "epoch": 1.0, "learning_rate": 0.0002308763038572445, "loss": 3.7428, "step": 3247 }, { "epoch": 1.0, "learning_rate": 0.00023084067479800727, "loss": 4.2356, "step": 3248 }, { "epoch": 1.0, "learning_rate": 0.00023080504573877003, "loss": 3.9862, "step": 3249 }, { "epoch": 1.0, "learning_rate": 0.00023076941667953284, "loss": 3.7712, "step": 3250 }, { "epoch": 1.0, "learning_rate": 0.0002307337876202956, "loss": 3.8686, "step": 3251 }, { "epoch": 1.0, "learning_rate": 0.00023069815856105835, "loss": 3.8503, "step": 3252 }, { "epoch": 1.0, "learning_rate": 0.00023066252950182113, "loss": 3.9793, "step": 3253 }, { "epoch": 1.0, "learning_rate": 0.0002306269004425839, "loss": 3.7719, "step": 3254 }, { "epoch": 1.0, "learning_rate": 0.0002305912713833467, "loss": 3.7653, "step": 3255 }, { "epoch": 1.0, "learning_rate": 0.00023055564232410945, "loss": 3.7087, "step": 3256 }, { "epoch": 1.01, "learning_rate": 0.0002305200132648722, "loss": 3.7529, "step": 3257 }, { "epoch": 1.01, "learning_rate": 0.000230484384205635, "loss": 3.7963, "step": 3258 }, { "epoch": 1.01, "learning_rate": 0.00023044875514639776, "loss": 3.4675, "step": 3259 }, { "epoch": 1.01, "learning_rate": 0.00023041312608716052, "loss": 3.8207, "step": 3260 }, { "epoch": 1.01, "learning_rate": 0.0002303774970279233, "loss": 3.6712, "step": 3261 }, { "epoch": 1.01, "learning_rate": 0.00023034186796868606, "loss": 3.705, "step": 3262 }, { "epoch": 1.01, "learning_rate": 0.00023030623890944887, "loss": 3.7285, "step": 3263 }, { "epoch": 1.01, "learning_rate": 0.00023027060985021162, "loss": 3.8801, "step": 3264 }, { "epoch": 1.01, "learning_rate": 0.00023023498079097437, "loss": 3.5659, "step": 3265 }, { "epoch": 1.01, "learning_rate": 0.00023019935173173716, "loss": 3.8416, "step": 3266 }, { "epoch": 1.01, "learning_rate": 0.0002301637226724999, "loss": 3.5377, "step": 3267 }, { "epoch": 1.01, "learning_rate": 0.00023012809361326267, "loss": 3.5871, "step": 3268 }, { "epoch": 1.01, "learning_rate": 0.00023009246455402548, "loss": 3.7477, "step": 3269 }, { "epoch": 1.01, "learning_rate": 0.00023005683549478823, "loss": 3.7505, "step": 3270 }, { "epoch": 1.01, "learning_rate": 0.000230021206435551, "loss": 3.2975, "step": 3271 }, { "epoch": 1.01, "learning_rate": 0.00022998557737631377, "loss": 3.4778, "step": 3272 }, { "epoch": 1.01, "learning_rate": 0.00022994994831707652, "loss": 3.5677, "step": 3273 }, { "epoch": 1.01, "learning_rate": 0.00022991431925783933, "loss": 3.306, "step": 3274 }, { "epoch": 1.01, "learning_rate": 0.00022987869019860208, "loss": 3.3762, "step": 3275 }, { "epoch": 1.01, "learning_rate": 0.00022984306113936487, "loss": 3.231, "step": 3276 }, { "epoch": 1.01, "learning_rate": 0.00022980743208012762, "loss": 3.4824, "step": 3277 }, { "epoch": 1.01, "learning_rate": 0.0002297718030208904, "loss": 3.1722, "step": 3278 }, { "epoch": 1.01, "learning_rate": 0.00022973617396165319, "loss": 3.2316, "step": 3279 }, { "epoch": 1.01, "learning_rate": 0.00022970054490241594, "loss": 3.0759, "step": 3280 }, { "epoch": 1.01, "learning_rate": 0.0002296649158431787, "loss": 3.1859, "step": 3281 }, { "epoch": 1.01, "learning_rate": 0.0002296292867839415, "loss": 3.1602, "step": 3282 }, { "epoch": 1.01, "learning_rate": 0.00022959365772470426, "loss": 2.8832, "step": 3283 }, { "epoch": 1.01, "learning_rate": 0.00022955802866546704, "loss": 2.7656, "step": 3284 }, { "epoch": 1.01, "learning_rate": 0.0002295223996062298, "loss": 2.623, "step": 3285 }, { "epoch": 1.01, "learning_rate": 0.00022948677054699255, "loss": 2.897, "step": 3286 }, { "epoch": 1.01, "learning_rate": 0.00022945114148775536, "loss": 2.4938, "step": 3287 }, { "epoch": 1.01, "learning_rate": 0.00022941551242851811, "loss": 2.5806, "step": 3288 }, { "epoch": 1.02, "learning_rate": 0.0002293798833692809, "loss": 2.435, "step": 3289 }, { "epoch": 1.02, "learning_rate": 0.00022934425431004365, "loss": 2.1989, "step": 3290 }, { "epoch": 1.02, "learning_rate": 0.0002293086252508064, "loss": 5.2817, "step": 3291 }, { "epoch": 1.02, "learning_rate": 0.00022927299619156921, "loss": 4.8864, "step": 3292 }, { "epoch": 1.02, "learning_rate": 0.00022923736713233197, "loss": 4.7242, "step": 3293 }, { "epoch": 1.02, "learning_rate": 0.00022920173807309472, "loss": 4.5526, "step": 3294 }, { "epoch": 1.02, "learning_rate": 0.0002291661090138575, "loss": 4.1508, "step": 3295 }, { "epoch": 1.02, "learning_rate": 0.00022913047995462026, "loss": 3.9627, "step": 3296 }, { "epoch": 1.02, "learning_rate": 0.00022909485089538307, "loss": 4.1625, "step": 3297 }, { "epoch": 1.02, "learning_rate": 0.00022905922183614582, "loss": 3.95, "step": 3298 }, { "epoch": 1.02, "learning_rate": 0.00022902359277690858, "loss": 4.1361, "step": 3299 }, { "epoch": 1.02, "learning_rate": 0.00022898796371767136, "loss": 4.1307, "step": 3300 }, { "epoch": 1.02, "learning_rate": 0.00022895233465843414, "loss": 3.8352, "step": 3301 }, { "epoch": 1.02, "learning_rate": 0.0002289167055991969, "loss": 3.7877, "step": 3302 }, { "epoch": 1.02, "learning_rate": 0.00022888107653995968, "loss": 3.9865, "step": 3303 }, { "epoch": 1.02, "learning_rate": 0.00022884544748072243, "loss": 3.9345, "step": 3304 }, { "epoch": 1.02, "learning_rate": 0.00022880981842148524, "loss": 3.8008, "step": 3305 }, { "epoch": 1.02, "learning_rate": 0.000228774189362248, "loss": 3.6873, "step": 3306 }, { "epoch": 1.02, "learning_rate": 0.00022873856030301075, "loss": 3.9142, "step": 3307 }, { "epoch": 1.02, "learning_rate": 0.00022870293124377353, "loss": 3.6214, "step": 3308 }, { "epoch": 1.02, "learning_rate": 0.0002286673021845363, "loss": 3.6497, "step": 3309 }, { "epoch": 1.02, "learning_rate": 0.0002286316731252991, "loss": 3.7443, "step": 3310 }, { "epoch": 1.02, "learning_rate": 0.00022859604406606185, "loss": 3.6966, "step": 3311 }, { "epoch": 1.02, "learning_rate": 0.0002285604150068246, "loss": 3.7438, "step": 3312 }, { "epoch": 1.02, "learning_rate": 0.0002285247859475874, "loss": 3.5737, "step": 3313 }, { "epoch": 1.02, "learning_rate": 0.00022848915688835014, "loss": 3.9687, "step": 3314 }, { "epoch": 1.02, "learning_rate": 0.0002284535278291129, "loss": 3.4963, "step": 3315 }, { "epoch": 1.02, "learning_rate": 0.0002284178987698757, "loss": 3.6447, "step": 3316 }, { "epoch": 1.02, "learning_rate": 0.00022838226971063846, "loss": 3.5669, "step": 3317 }, { "epoch": 1.02, "learning_rate": 0.00022834664065140124, "loss": 3.4375, "step": 3318 }, { "epoch": 1.02, "learning_rate": 0.000228311011592164, "loss": 3.3905, "step": 3319 }, { "epoch": 1.02, "learning_rate": 0.00022827538253292675, "loss": 3.6282, "step": 3320 }, { "epoch": 1.02, "learning_rate": 0.00022823975347368956, "loss": 3.7218, "step": 3321 }, { "epoch": 1.03, "learning_rate": 0.00022820412441445232, "loss": 3.5181, "step": 3322 }, { "epoch": 1.03, "learning_rate": 0.0002281684953552151, "loss": 3.2657, "step": 3323 }, { "epoch": 1.03, "learning_rate": 0.00022813286629597785, "loss": 3.3909, "step": 3324 }, { "epoch": 1.03, "learning_rate": 0.00022809723723674064, "loss": 3.2919, "step": 3325 }, { "epoch": 1.03, "learning_rate": 0.00022806160817750342, "loss": 3.3143, "step": 3326 }, { "epoch": 1.03, "learning_rate": 0.00022802597911826617, "loss": 3.1186, "step": 3327 }, { "epoch": 1.03, "learning_rate": 0.00022799035005902893, "loss": 3.3313, "step": 3328 }, { "epoch": 1.03, "learning_rate": 0.00022795472099979174, "loss": 3.1917, "step": 3329 }, { "epoch": 1.03, "learning_rate": 0.0002279190919405545, "loss": 3.3493, "step": 3330 }, { "epoch": 1.03, "learning_rate": 0.00022788346288131727, "loss": 3.0516, "step": 3331 }, { "epoch": 1.03, "learning_rate": 0.00022784783382208003, "loss": 3.0342, "step": 3332 }, { "epoch": 1.03, "learning_rate": 0.00022781220476284278, "loss": 2.9794, "step": 3333 }, { "epoch": 1.03, "learning_rate": 0.0002277765757036056, "loss": 2.7809, "step": 3334 }, { "epoch": 1.03, "learning_rate": 0.00022774094664436835, "loss": 2.7251, "step": 3335 }, { "epoch": 1.03, "learning_rate": 0.00022770531758513113, "loss": 2.6732, "step": 3336 }, { "epoch": 1.03, "learning_rate": 0.00022766968852589388, "loss": 2.6377, "step": 3337 }, { "epoch": 1.03, "learning_rate": 0.00022763405946665664, "loss": 2.4325, "step": 3338 }, { "epoch": 1.03, "learning_rate": 0.00022759843040741945, "loss": 2.2867, "step": 3339 }, { "epoch": 1.03, "learning_rate": 0.0002275628013481822, "loss": 2.4385, "step": 3340 }, { "epoch": 1.03, "learning_rate": 0.00022752717228894496, "loss": 5.2534, "step": 3341 }, { "epoch": 1.03, "learning_rate": 0.00022749154322970774, "loss": 4.881, "step": 3342 }, { "epoch": 1.03, "learning_rate": 0.0002274559141704705, "loss": 4.6975, "step": 3343 }, { "epoch": 1.03, "learning_rate": 0.0002274202851112333, "loss": 4.5906, "step": 3344 }, { "epoch": 1.03, "learning_rate": 0.00022738465605199606, "loss": 4.2472, "step": 3345 }, { "epoch": 1.03, "learning_rate": 0.0002273490269927588, "loss": 4.2546, "step": 3346 }, { "epoch": 1.03, "learning_rate": 0.0002273133979335216, "loss": 3.9274, "step": 3347 }, { "epoch": 1.03, "learning_rate": 0.00022727776887428438, "loss": 4.1792, "step": 3348 }, { "epoch": 1.03, "learning_rate": 0.00022724213981504713, "loss": 4.0565, "step": 3349 }, { "epoch": 1.03, "learning_rate": 0.0002272065107558099, "loss": 3.8837, "step": 3350 }, { "epoch": 1.03, "learning_rate": 0.00022717088169657267, "loss": 4.0692, "step": 3351 }, { "epoch": 1.03, "learning_rate": 0.00022713525263733548, "loss": 3.8501, "step": 3352 }, { "epoch": 1.03, "learning_rate": 0.00022709962357809823, "loss": 3.8368, "step": 3353 }, { "epoch": 1.04, "learning_rate": 0.00022706399451886099, "loss": 4.0414, "step": 3354 }, { "epoch": 1.04, "learning_rate": 0.00022702836545962377, "loss": 3.9174, "step": 3355 }, { "epoch": 1.04, "learning_rate": 0.00022699273640038652, "loss": 3.7743, "step": 3356 }, { "epoch": 1.04, "learning_rate": 0.00022695710734114933, "loss": 3.6508, "step": 3357 }, { "epoch": 1.04, "learning_rate": 0.00022692147828191209, "loss": 4.017, "step": 3358 }, { "epoch": 1.04, "learning_rate": 0.00022688584922267484, "loss": 3.8742, "step": 3359 }, { "epoch": 1.04, "learning_rate": 0.00022685022016343762, "loss": 3.6234, "step": 3360 }, { "epoch": 1.04, "learning_rate": 0.00022681459110420038, "loss": 3.7314, "step": 3361 }, { "epoch": 1.04, "learning_rate": 0.00022677896204496313, "loss": 3.6142, "step": 3362 }, { "epoch": 1.04, "learning_rate": 0.00022674333298572594, "loss": 3.7584, "step": 3363 }, { "epoch": 1.04, "learning_rate": 0.0002267077039264887, "loss": 3.7109, "step": 3364 }, { "epoch": 1.04, "learning_rate": 0.00022667207486725148, "loss": 3.3795, "step": 3365 }, { "epoch": 1.04, "learning_rate": 0.00022663644580801423, "loss": 3.6356, "step": 3366 }, { "epoch": 1.04, "learning_rate": 0.000226600816748777, "loss": 3.5525, "step": 3367 }, { "epoch": 1.04, "learning_rate": 0.0002265651876895398, "loss": 3.5048, "step": 3368 }, { "epoch": 1.04, "learning_rate": 0.00022652955863030255, "loss": 3.3858, "step": 3369 }, { "epoch": 1.04, "learning_rate": 0.00022649392957106533, "loss": 3.589, "step": 3370 }, { "epoch": 1.04, "learning_rate": 0.0002264583005118281, "loss": 3.4631, "step": 3371 }, { "epoch": 1.04, "learning_rate": 0.00022642267145259087, "loss": 3.2303, "step": 3372 }, { "epoch": 1.04, "learning_rate": 0.00022638704239335365, "loss": 3.4989, "step": 3373 }, { "epoch": 1.04, "learning_rate": 0.0002263514133341164, "loss": 3.2227, "step": 3374 }, { "epoch": 1.04, "learning_rate": 0.00022631578427487916, "loss": 3.224, "step": 3375 }, { "epoch": 1.04, "learning_rate": 0.00022628015521564197, "loss": 3.4244, "step": 3376 }, { "epoch": 1.04, "learning_rate": 0.00022624452615640472, "loss": 3.1714, "step": 3377 }, { "epoch": 1.04, "learning_rate": 0.0002262088970971675, "loss": 3.1927, "step": 3378 }, { "epoch": 1.04, "learning_rate": 0.00022617326803793026, "loss": 3.2285, "step": 3379 }, { "epoch": 1.04, "learning_rate": 0.00022613763897869302, "loss": 3.211, "step": 3380 }, { "epoch": 1.04, "learning_rate": 0.00022610200991945583, "loss": 2.8586, "step": 3381 }, { "epoch": 1.04, "learning_rate": 0.00022606638086021858, "loss": 2.8186, "step": 3382 }, { "epoch": 1.04, "learning_rate": 0.00022603075180098133, "loss": 3.0257, "step": 3383 }, { "epoch": 1.04, "learning_rate": 0.00022599512274174412, "loss": 2.9242, "step": 3384 }, { "epoch": 1.04, "learning_rate": 0.00022595949368250687, "loss": 2.6077, "step": 3385 }, { "epoch": 1.05, "learning_rate": 0.00022592386462326968, "loss": 2.675, "step": 3386 }, { "epoch": 1.05, "learning_rate": 0.00022588823556403244, "loss": 2.3658, "step": 3387 }, { "epoch": 1.05, "learning_rate": 0.0002258526065047952, "loss": 2.507, "step": 3388 }, { "epoch": 1.05, "learning_rate": 0.00022581697744555797, "loss": 2.5374, "step": 3389 }, { "epoch": 1.05, "learning_rate": 0.00022578134838632073, "loss": 2.2848, "step": 3390 }, { "epoch": 1.05, "learning_rate": 0.00022574571932708354, "loss": 4.8547, "step": 3391 }, { "epoch": 1.05, "learning_rate": 0.0002257100902678463, "loss": 4.6589, "step": 3392 }, { "epoch": 1.05, "learning_rate": 0.00022567446120860905, "loss": 4.6273, "step": 3393 }, { "epoch": 1.05, "learning_rate": 0.00022563883214937183, "loss": 4.3349, "step": 3394 }, { "epoch": 1.05, "learning_rate": 0.0002256032030901346, "loss": 4.5368, "step": 3395 }, { "epoch": 1.05, "learning_rate": 0.00022556757403089736, "loss": 4.1337, "step": 3396 }, { "epoch": 1.05, "learning_rate": 0.00022553194497166015, "loss": 3.8984, "step": 3397 }, { "epoch": 1.05, "learning_rate": 0.0002254963159124229, "loss": 4.2195, "step": 3398 }, { "epoch": 1.05, "learning_rate": 0.0002254606868531857, "loss": 4.0098, "step": 3399 }, { "epoch": 1.05, "learning_rate": 0.00022542505779394846, "loss": 3.9825, "step": 3400 }, { "epoch": 1.05, "learning_rate": 0.00022538942873471122, "loss": 3.8282, "step": 3401 }, { "epoch": 1.05, "learning_rate": 0.000225353799675474, "loss": 3.9848, "step": 3402 }, { "epoch": 1.05, "learning_rate": 0.00022531817061623676, "loss": 3.7879, "step": 3403 }, { "epoch": 1.05, "learning_rate": 0.00022528254155699956, "loss": 4.1537, "step": 3404 }, { "epoch": 1.05, "learning_rate": 0.00022524691249776232, "loss": 3.9608, "step": 3405 }, { "epoch": 1.05, "learning_rate": 0.00022521128343852507, "loss": 3.8603, "step": 3406 }, { "epoch": 1.05, "learning_rate": 0.00022517565437928786, "loss": 3.8071, "step": 3407 }, { "epoch": 1.05, "learning_rate": 0.0002251400253200506, "loss": 3.566, "step": 3408 }, { "epoch": 1.05, "learning_rate": 0.00022510439626081337, "loss": 3.8155, "step": 3409 }, { "epoch": 1.05, "learning_rate": 0.00022506876720157617, "loss": 3.7392, "step": 3410 }, { "epoch": 1.05, "learning_rate": 0.00022503313814233893, "loss": 3.8503, "step": 3411 }, { "epoch": 1.05, "learning_rate": 0.0002249975090831017, "loss": 3.5335, "step": 3412 }, { "epoch": 1.05, "learning_rate": 0.00022496188002386447, "loss": 3.7815, "step": 3413 }, { "epoch": 1.05, "learning_rate": 0.00022492625096462722, "loss": 3.5028, "step": 3414 }, { "epoch": 1.05, "learning_rate": 0.00022489062190539003, "loss": 3.7152, "step": 3415 }, { "epoch": 1.05, "learning_rate": 0.00022485499284615278, "loss": 3.7448, "step": 3416 }, { "epoch": 1.05, "learning_rate": 0.00022481936378691557, "loss": 3.6014, "step": 3417 }, { "epoch": 1.05, "learning_rate": 0.00022478373472767832, "loss": 3.4361, "step": 3418 }, { "epoch": 1.06, "learning_rate": 0.0002247481056684411, "loss": 3.5516, "step": 3419 }, { "epoch": 1.06, "learning_rate": 0.00022471247660920388, "loss": 3.8486, "step": 3420 }, { "epoch": 1.06, "learning_rate": 0.00022467684754996664, "loss": 3.6489, "step": 3421 }, { "epoch": 1.06, "learning_rate": 0.0002246412184907294, "loss": 3.3495, "step": 3422 }, { "epoch": 1.06, "learning_rate": 0.0002246055894314922, "loss": 3.3214, "step": 3423 }, { "epoch": 1.06, "learning_rate": 0.00022456996037225496, "loss": 3.3547, "step": 3424 }, { "epoch": 1.06, "learning_rate": 0.00022453433131301774, "loss": 3.3462, "step": 3425 }, { "epoch": 1.06, "learning_rate": 0.0002244987022537805, "loss": 3.0726, "step": 3426 }, { "epoch": 1.06, "learning_rate": 0.00022446307319454325, "loss": 3.0833, "step": 3427 }, { "epoch": 1.06, "learning_rate": 0.00022442744413530606, "loss": 3.0179, "step": 3428 }, { "epoch": 1.06, "learning_rate": 0.0002243918150760688, "loss": 2.9077, "step": 3429 }, { "epoch": 1.06, "learning_rate": 0.00022435618601683157, "loss": 3.1785, "step": 3430 }, { "epoch": 1.06, "learning_rate": 0.00022432055695759435, "loss": 2.9056, "step": 3431 }, { "epoch": 1.06, "learning_rate": 0.0002242849278983571, "loss": 2.8467, "step": 3432 }, { "epoch": 1.06, "learning_rate": 0.00022424929883911991, "loss": 2.8522, "step": 3433 }, { "epoch": 1.06, "learning_rate": 0.00022421366977988267, "loss": 2.8692, "step": 3434 }, { "epoch": 1.06, "learning_rate": 0.00022417804072064542, "loss": 2.7796, "step": 3435 }, { "epoch": 1.06, "learning_rate": 0.0002241424116614082, "loss": 2.5265, "step": 3436 }, { "epoch": 1.06, "learning_rate": 0.00022410678260217096, "loss": 2.5337, "step": 3437 }, { "epoch": 1.06, "learning_rate": 0.00022407115354293377, "loss": 2.5023, "step": 3438 }, { "epoch": 1.06, "learning_rate": 0.00022403552448369652, "loss": 2.1777, "step": 3439 }, { "epoch": 1.06, "learning_rate": 0.00022399989542445928, "loss": 2.3044, "step": 3440 }, { "epoch": 1.06, "learning_rate": 0.00022396426636522206, "loss": 5.2354, "step": 3441 }, { "epoch": 1.06, "learning_rate": 0.00022392863730598484, "loss": 4.7865, "step": 3442 }, { "epoch": 1.06, "learning_rate": 0.0002238930082467476, "loss": 4.6137, "step": 3443 }, { "epoch": 1.06, "learning_rate": 0.00022385737918751038, "loss": 4.1235, "step": 3444 }, { "epoch": 1.06, "learning_rate": 0.00022382175012827313, "loss": 4.3777, "step": 3445 }, { "epoch": 1.06, "learning_rate": 0.00022378612106903594, "loss": 4.1836, "step": 3446 }, { "epoch": 1.06, "learning_rate": 0.0002237504920097987, "loss": 4.1246, "step": 3447 }, { "epoch": 1.06, "learning_rate": 0.00022371486295056145, "loss": 4.3068, "step": 3448 }, { "epoch": 1.06, "learning_rate": 0.00022367923389132423, "loss": 3.7803, "step": 3449 }, { "epoch": 1.06, "learning_rate": 0.000223643604832087, "loss": 4.0443, "step": 3450 }, { "epoch": 1.07, "learning_rate": 0.0002236079757728498, "loss": 3.7497, "step": 3451 }, { "epoch": 1.07, "learning_rate": 0.00022357234671361255, "loss": 3.8822, "step": 3452 }, { "epoch": 1.07, "learning_rate": 0.0002235367176543753, "loss": 3.8341, "step": 3453 }, { "epoch": 1.07, "learning_rate": 0.0002235010885951381, "loss": 3.8673, "step": 3454 }, { "epoch": 1.07, "learning_rate": 0.00022346545953590084, "loss": 3.9177, "step": 3455 }, { "epoch": 1.07, "learning_rate": 0.0002234298304766636, "loss": 3.7889, "step": 3456 }, { "epoch": 1.07, "learning_rate": 0.0002233942014174264, "loss": 3.7346, "step": 3457 }, { "epoch": 1.07, "learning_rate": 0.00022335857235818916, "loss": 3.8982, "step": 3458 }, { "epoch": 1.07, "learning_rate": 0.00022332294329895194, "loss": 3.8405, "step": 3459 }, { "epoch": 1.07, "learning_rate": 0.0002232873142397147, "loss": 3.8455, "step": 3460 }, { "epoch": 1.07, "learning_rate": 0.00022325168518047745, "loss": 3.5226, "step": 3461 }, { "epoch": 1.07, "learning_rate": 0.00022321605612124026, "loss": 3.571, "step": 3462 }, { "epoch": 1.07, "learning_rate": 0.00022318042706200302, "loss": 3.6178, "step": 3463 }, { "epoch": 1.07, "learning_rate": 0.00022314479800276577, "loss": 3.5389, "step": 3464 }, { "epoch": 1.07, "learning_rate": 0.00022310916894352858, "loss": 3.5795, "step": 3465 }, { "epoch": 1.07, "learning_rate": 0.00022307353988429134, "loss": 3.4307, "step": 3466 }, { "epoch": 1.07, "learning_rate": 0.00022303791082505412, "loss": 3.5843, "step": 3467 }, { "epoch": 1.07, "learning_rate": 0.00022300228176581687, "loss": 3.6868, "step": 3468 }, { "epoch": 1.07, "learning_rate": 0.00022296665270657963, "loss": 3.6785, "step": 3469 }, { "epoch": 1.07, "learning_rate": 0.00022293102364734244, "loss": 3.3745, "step": 3470 }, { "epoch": 1.07, "learning_rate": 0.0002228953945881052, "loss": 3.4477, "step": 3471 }, { "epoch": 1.07, "learning_rate": 0.00022285976552886797, "loss": 3.5856, "step": 3472 }, { "epoch": 1.07, "learning_rate": 0.00022282413646963073, "loss": 3.4279, "step": 3473 }, { "epoch": 1.07, "learning_rate": 0.00022278850741039348, "loss": 3.2228, "step": 3474 }, { "epoch": 1.07, "learning_rate": 0.0002227528783511563, "loss": 3.2735, "step": 3475 }, { "epoch": 1.07, "learning_rate": 0.00022271724929191905, "loss": 3.1802, "step": 3476 }, { "epoch": 1.07, "learning_rate": 0.0002226816202326818, "loss": 3.4058, "step": 3477 }, { "epoch": 1.07, "learning_rate": 0.00022264599117344458, "loss": 3.263, "step": 3478 }, { "epoch": 1.07, "learning_rate": 0.00022261036211420734, "loss": 3.035, "step": 3479 }, { "epoch": 1.07, "learning_rate": 0.00022257473305497015, "loss": 2.7861, "step": 3480 }, { "epoch": 1.07, "learning_rate": 0.0002225391039957329, "loss": 3.047, "step": 3481 }, { "epoch": 1.07, "learning_rate": 0.00022250347493649566, "loss": 2.8772, "step": 3482 }, { "epoch": 1.07, "learning_rate": 0.00022246784587725844, "loss": 3.0736, "step": 3483 }, { "epoch": 1.08, "learning_rate": 0.0002224322168180212, "loss": 2.5762, "step": 3484 }, { "epoch": 1.08, "learning_rate": 0.000222396587758784, "loss": 2.6045, "step": 3485 }, { "epoch": 1.08, "learning_rate": 0.00022236095869954676, "loss": 2.6508, "step": 3486 }, { "epoch": 1.08, "learning_rate": 0.0002223253296403095, "loss": 2.634, "step": 3487 }, { "epoch": 1.08, "learning_rate": 0.0002222897005810723, "loss": 2.4015, "step": 3488 }, { "epoch": 1.08, "learning_rate": 0.00022225407152183508, "loss": 2.2374, "step": 3489 }, { "epoch": 1.08, "learning_rate": 0.00022221844246259783, "loss": 2.1403, "step": 3490 }, { "epoch": 1.08, "learning_rate": 0.0002221828134033606, "loss": 4.9263, "step": 3491 }, { "epoch": 1.08, "learning_rate": 0.00022214718434412337, "loss": 4.5969, "step": 3492 }, { "epoch": 1.08, "learning_rate": 0.00022211155528488618, "loss": 4.6296, "step": 3493 }, { "epoch": 1.08, "learning_rate": 0.00022207592622564893, "loss": 4.4994, "step": 3494 }, { "epoch": 1.08, "learning_rate": 0.00022204029716641169, "loss": 4.1733, "step": 3495 }, { "epoch": 1.08, "learning_rate": 0.00022200466810717447, "loss": 4.2153, "step": 3496 }, { "epoch": 1.08, "learning_rate": 0.00022196903904793722, "loss": 4.0729, "step": 3497 }, { "epoch": 1.08, "learning_rate": 0.00022193340998869998, "loss": 4.1431, "step": 3498 }, { "epoch": 1.08, "learning_rate": 0.00022189778092946279, "loss": 4.0883, "step": 3499 }, { "epoch": 1.08, "learning_rate": 0.00022186215187022554, "loss": 4.1027, "step": 3500 }, { "epoch": 1.08, "eval_bleu": 0.0, "eval_loss": 4.329113960266113, "eval_runtime": 2566.7512, "eval_samples_per_second": 5.75, "eval_steps_per_second": 0.719, "step": 3500 }, { "epoch": 1.08, "learning_rate": 0.00022182652281098832, "loss": 3.7445, "step": 3501 }, { "epoch": 1.08, "learning_rate": 0.00022179089375175108, "loss": 3.8809, "step": 3502 }, { "epoch": 1.08, "learning_rate": 0.00022175526469251383, "loss": 4.0919, "step": 3503 }, { "epoch": 1.08, "learning_rate": 0.00022171963563327664, "loss": 3.8528, "step": 3504 }, { "epoch": 1.08, "learning_rate": 0.0002216840065740394, "loss": 3.934, "step": 3505 }, { "epoch": 1.08, "learning_rate": 0.00022164837751480218, "loss": 3.6634, "step": 3506 }, { "epoch": 1.08, "learning_rate": 0.00022161274845556493, "loss": 3.6396, "step": 3507 }, { "epoch": 1.08, "learning_rate": 0.0002215771193963277, "loss": 4.0179, "step": 3508 }, { "epoch": 1.08, "learning_rate": 0.0002215414903370905, "loss": 3.6498, "step": 3509 }, { "epoch": 1.08, "learning_rate": 0.00022150586127785325, "loss": 3.6283, "step": 3510 }, { "epoch": 1.08, "learning_rate": 0.000221470232218616, "loss": 3.5379, "step": 3511 }, { "epoch": 1.08, "learning_rate": 0.00022143460315937881, "loss": 3.5872, "step": 3512 }, { "epoch": 1.08, "learning_rate": 0.00022139897410014157, "loss": 3.5759, "step": 3513 }, { "epoch": 1.08, "learning_rate": 0.00022136334504090435, "loss": 3.5041, "step": 3514 }, { "epoch": 1.08, "learning_rate": 0.0002213277159816671, "loss": 3.3848, "step": 3515 }, { "epoch": 1.09, "learning_rate": 0.00022129208692242986, "loss": 3.5209, "step": 3516 }, { "epoch": 1.09, "learning_rate": 0.00022125645786319267, "loss": 3.6466, "step": 3517 }, { "epoch": 1.09, "learning_rate": 0.00022122082880395542, "loss": 3.3708, "step": 3518 }, { "epoch": 1.09, "learning_rate": 0.0002211851997447182, "loss": 3.3692, "step": 3519 }, { "epoch": 1.09, "learning_rate": 0.00022114957068548096, "loss": 3.7085, "step": 3520 }, { "epoch": 1.09, "learning_rate": 0.00022111394162624372, "loss": 3.418, "step": 3521 }, { "epoch": 1.09, "learning_rate": 0.00022107831256700652, "loss": 3.3103, "step": 3522 }, { "epoch": 1.09, "learning_rate": 0.00022104268350776928, "loss": 3.4353, "step": 3523 }, { "epoch": 1.09, "learning_rate": 0.00022100705444853203, "loss": 3.2997, "step": 3524 }, { "epoch": 1.09, "learning_rate": 0.00022097142538929482, "loss": 3.1203, "step": 3525 }, { "epoch": 1.09, "learning_rate": 0.00022093579633005757, "loss": 3.2134, "step": 3526 }, { "epoch": 1.09, "learning_rate": 0.00022090016727082038, "loss": 3.173, "step": 3527 }, { "epoch": 1.09, "learning_rate": 0.00022086453821158313, "loss": 3.2338, "step": 3528 }, { "epoch": 1.09, "learning_rate": 0.0002208289091523459, "loss": 3.1562, "step": 3529 }, { "epoch": 1.09, "learning_rate": 0.00022079328009310867, "loss": 3.417, "step": 3530 }, { "epoch": 1.09, "learning_rate": 0.00022075765103387143, "loss": 3.117, "step": 3531 }, { "epoch": 1.09, "learning_rate": 0.00022072202197463424, "loss": 2.6628, "step": 3532 }, { "epoch": 1.09, "learning_rate": 0.000220686392915397, "loss": 2.8636, "step": 3533 }, { "epoch": 1.09, "learning_rate": 0.00022065076385615974, "loss": 2.8624, "step": 3534 }, { "epoch": 1.09, "learning_rate": 0.00022061513479692253, "loss": 2.7263, "step": 3535 }, { "epoch": 1.09, "learning_rate": 0.0002205795057376853, "loss": 2.7002, "step": 3536 }, { "epoch": 1.09, "learning_rate": 0.00022054387667844806, "loss": 2.6403, "step": 3537 }, { "epoch": 1.09, "learning_rate": 0.00022050824761921085, "loss": 2.5853, "step": 3538 }, { "epoch": 1.09, "learning_rate": 0.0002204726185599736, "loss": 2.3399, "step": 3539 }, { "epoch": 1.09, "learning_rate": 0.0002204369895007364, "loss": 2.195, "step": 3540 }, { "epoch": 1.09, "learning_rate": 0.00022040136044149916, "loss": 5.1365, "step": 3541 }, { "epoch": 1.09, "learning_rate": 0.00022036573138226192, "loss": 4.8125, "step": 3542 }, { "epoch": 1.09, "learning_rate": 0.0002203301023230247, "loss": 4.2869, "step": 3543 }, { "epoch": 1.09, "learning_rate": 0.00022029447326378745, "loss": 4.5352, "step": 3544 }, { "epoch": 1.09, "learning_rate": 0.0002202588442045502, "loss": 4.0569, "step": 3545 }, { "epoch": 1.09, "learning_rate": 0.00022022321514531302, "loss": 4.0005, "step": 3546 }, { "epoch": 1.09, "learning_rate": 0.00022018758608607577, "loss": 4.0507, "step": 3547 }, { "epoch": 1.1, "learning_rate": 0.00022015195702683856, "loss": 3.8679, "step": 3548 }, { "epoch": 1.1, "learning_rate": 0.0002201163279676013, "loss": 3.9171, "step": 3549 }, { "epoch": 1.1, "learning_rate": 0.00022008069890836406, "loss": 3.9393, "step": 3550 }, { "epoch": 1.1, "learning_rate": 0.00022004506984912687, "loss": 4.0229, "step": 3551 }, { "epoch": 1.1, "learning_rate": 0.00022000944078988963, "loss": 3.8507, "step": 3552 }, { "epoch": 1.1, "learning_rate": 0.0002199738117306524, "loss": 3.8655, "step": 3553 }, { "epoch": 1.1, "learning_rate": 0.00021993818267141517, "loss": 3.7772, "step": 3554 }, { "epoch": 1.1, "learning_rate": 0.00021990255361217795, "loss": 3.7759, "step": 3555 }, { "epoch": 1.1, "learning_rate": 0.00021986692455294073, "loss": 3.8893, "step": 3556 }, { "epoch": 1.1, "learning_rate": 0.00021983129549370348, "loss": 3.6709, "step": 3557 }, { "epoch": 1.1, "learning_rate": 0.00021979566643446624, "loss": 3.9443, "step": 3558 }, { "epoch": 1.1, "learning_rate": 0.00021976003737522905, "loss": 3.8262, "step": 3559 }, { "epoch": 1.1, "learning_rate": 0.0002197244083159918, "loss": 3.87, "step": 3560 }, { "epoch": 1.1, "learning_rate": 0.00021968877925675458, "loss": 3.8376, "step": 3561 }, { "epoch": 1.1, "learning_rate": 0.00021965315019751734, "loss": 3.6727, "step": 3562 }, { "epoch": 1.1, "learning_rate": 0.0002196175211382801, "loss": 3.5641, "step": 3563 }, { "epoch": 1.1, "learning_rate": 0.0002195818920790429, "loss": 3.5567, "step": 3564 }, { "epoch": 1.1, "learning_rate": 0.00021954626301980566, "loss": 3.5487, "step": 3565 }, { "epoch": 1.1, "learning_rate": 0.00021951063396056844, "loss": 3.2647, "step": 3566 }, { "epoch": 1.1, "learning_rate": 0.0002194750049013312, "loss": 3.4869, "step": 3567 }, { "epoch": 1.1, "learning_rate": 0.00021943937584209395, "loss": 3.5559, "step": 3568 }, { "epoch": 1.1, "learning_rate": 0.00021940374678285676, "loss": 3.5276, "step": 3569 }, { "epoch": 1.1, "learning_rate": 0.0002193681177236195, "loss": 3.3603, "step": 3570 }, { "epoch": 1.1, "learning_rate": 0.00021933248866438227, "loss": 3.6102, "step": 3571 }, { "epoch": 1.1, "learning_rate": 0.00021929685960514505, "loss": 3.4474, "step": 3572 }, { "epoch": 1.1, "learning_rate": 0.0002192612305459078, "loss": 3.4479, "step": 3573 }, { "epoch": 1.1, "learning_rate": 0.0002192256014866706, "loss": 3.1307, "step": 3574 }, { "epoch": 1.1, "learning_rate": 0.00021918997242743337, "loss": 3.118, "step": 3575 }, { "epoch": 1.1, "learning_rate": 0.00021915434336819612, "loss": 3.3475, "step": 3576 }, { "epoch": 1.1, "learning_rate": 0.0002191187143089589, "loss": 3.2515, "step": 3577 }, { "epoch": 1.1, "learning_rate": 0.00021908308524972166, "loss": 3.197, "step": 3578 }, { "epoch": 1.1, "learning_rate": 0.00021904745619048444, "loss": 3.0067, "step": 3579 }, { "epoch": 1.1, "learning_rate": 0.00021901182713124722, "loss": 3.0633, "step": 3580 }, { "epoch": 1.11, "learning_rate": 0.00021897619807200998, "loss": 3.3697, "step": 3581 }, { "epoch": 1.11, "learning_rate": 0.00021894056901277276, "loss": 2.9279, "step": 3582 }, { "epoch": 1.11, "learning_rate": 0.00021890493995353554, "loss": 2.8304, "step": 3583 }, { "epoch": 1.11, "learning_rate": 0.0002188693108942983, "loss": 2.7378, "step": 3584 }, { "epoch": 1.11, "learning_rate": 0.00021883368183506108, "loss": 2.6224, "step": 3585 }, { "epoch": 1.11, "learning_rate": 0.00021879805277582383, "loss": 2.5158, "step": 3586 }, { "epoch": 1.11, "learning_rate": 0.00021876242371658664, "loss": 2.309, "step": 3587 }, { "epoch": 1.11, "learning_rate": 0.0002187267946573494, "loss": 2.3903, "step": 3588 }, { "epoch": 1.11, "learning_rate": 0.00021869116559811215, "loss": 2.3519, "step": 3589 }, { "epoch": 1.11, "learning_rate": 0.00021865553653887493, "loss": 2.3764, "step": 3590 }, { "epoch": 1.11, "learning_rate": 0.0002186199074796377, "loss": 4.9679, "step": 3591 }, { "epoch": 1.11, "learning_rate": 0.00021858427842040044, "loss": 4.4186, "step": 3592 }, { "epoch": 1.11, "learning_rate": 0.00021854864936116325, "loss": 4.5821, "step": 3593 }, { "epoch": 1.11, "learning_rate": 0.000218513020301926, "loss": 4.3527, "step": 3594 }, { "epoch": 1.11, "learning_rate": 0.0002184773912426888, "loss": 4.0442, "step": 3595 }, { "epoch": 1.11, "learning_rate": 0.00021844176218345154, "loss": 4.4095, "step": 3596 }, { "epoch": 1.11, "learning_rate": 0.0002184061331242143, "loss": 3.9542, "step": 3597 }, { "epoch": 1.11, "learning_rate": 0.0002183705040649771, "loss": 4.0053, "step": 3598 }, { "epoch": 1.11, "learning_rate": 0.00021833487500573986, "loss": 3.9912, "step": 3599 }, { "epoch": 1.11, "learning_rate": 0.00021829924594650264, "loss": 3.6964, "step": 3600 }, { "epoch": 1.11, "learning_rate": 0.0002182636168872654, "loss": 3.9628, "step": 3601 }, { "epoch": 1.11, "learning_rate": 0.00021822798782802818, "loss": 3.508, "step": 3602 }, { "epoch": 1.11, "learning_rate": 0.00021819235876879096, "loss": 3.7829, "step": 3603 }, { "epoch": 1.11, "learning_rate": 0.00021815672970955372, "loss": 3.8636, "step": 3604 }, { "epoch": 1.11, "learning_rate": 0.00021812110065031647, "loss": 3.5438, "step": 3605 }, { "epoch": 1.11, "learning_rate": 0.00021808547159107928, "loss": 3.9929, "step": 3606 }, { "epoch": 1.11, "learning_rate": 0.00021804984253184204, "loss": 3.6298, "step": 3607 }, { "epoch": 1.11, "learning_rate": 0.00021801421347260482, "loss": 4.0625, "step": 3608 }, { "epoch": 1.11, "learning_rate": 0.00021797858441336757, "loss": 3.8902, "step": 3609 }, { "epoch": 1.11, "learning_rate": 0.00021794295535413033, "loss": 3.854, "step": 3610 }, { "epoch": 1.11, "learning_rate": 0.00021790732629489314, "loss": 3.3604, "step": 3611 }, { "epoch": 1.11, "learning_rate": 0.0002178716972356559, "loss": 3.7369, "step": 3612 }, { "epoch": 1.12, "learning_rate": 0.00021783606817641865, "loss": 3.7086, "step": 3613 }, { "epoch": 1.12, "learning_rate": 0.00021780043911718143, "loss": 3.665, "step": 3614 }, { "epoch": 1.12, "learning_rate": 0.00021776481005794418, "loss": 3.7161, "step": 3615 }, { "epoch": 1.12, "learning_rate": 0.000217729180998707, "loss": 3.5851, "step": 3616 }, { "epoch": 1.12, "learning_rate": 0.00021769355193946975, "loss": 3.8927, "step": 3617 }, { "epoch": 1.12, "learning_rate": 0.0002176579228802325, "loss": 3.5339, "step": 3618 }, { "epoch": 1.12, "learning_rate": 0.00021762229382099528, "loss": 3.6542, "step": 3619 }, { "epoch": 1.12, "learning_rate": 0.00021758666476175804, "loss": 3.3814, "step": 3620 }, { "epoch": 1.12, "learning_rate": 0.00021755103570252085, "loss": 3.2715, "step": 3621 }, { "epoch": 1.12, "learning_rate": 0.0002175154066432836, "loss": 3.5746, "step": 3622 }, { "epoch": 1.12, "learning_rate": 0.00021747977758404636, "loss": 3.4965, "step": 3623 }, { "epoch": 1.12, "learning_rate": 0.00021744414852480914, "loss": 3.3232, "step": 3624 }, { "epoch": 1.12, "learning_rate": 0.0002174085194655719, "loss": 3.5453, "step": 3625 }, { "epoch": 1.12, "learning_rate": 0.00021737289040633467, "loss": 3.1705, "step": 3626 }, { "epoch": 1.12, "learning_rate": 0.00021733726134709746, "loss": 3.1225, "step": 3627 }, { "epoch": 1.12, "learning_rate": 0.0002173016322878602, "loss": 3.0805, "step": 3628 }, { "epoch": 1.12, "learning_rate": 0.000217266003228623, "loss": 3.0205, "step": 3629 }, { "epoch": 1.12, "learning_rate": 0.00021723037416938577, "loss": 3.0141, "step": 3630 }, { "epoch": 1.12, "learning_rate": 0.00021719474511014853, "loss": 3.0227, "step": 3631 }, { "epoch": 1.12, "learning_rate": 0.0002171591160509113, "loss": 3.1335, "step": 3632 }, { "epoch": 1.12, "learning_rate": 0.00021712348699167407, "loss": 2.8616, "step": 3633 }, { "epoch": 1.12, "learning_rate": 0.00021708785793243688, "loss": 2.7484, "step": 3634 }, { "epoch": 1.12, "learning_rate": 0.00021705222887319963, "loss": 2.7113, "step": 3635 }, { "epoch": 1.12, "learning_rate": 0.00021701659981396238, "loss": 2.4392, "step": 3636 }, { "epoch": 1.12, "learning_rate": 0.00021698097075472517, "loss": 2.5949, "step": 3637 }, { "epoch": 1.12, "learning_rate": 0.00021694534169548792, "loss": 2.4943, "step": 3638 }, { "epoch": 1.12, "learning_rate": 0.00021690971263625068, "loss": 2.2826, "step": 3639 }, { "epoch": 1.12, "learning_rate": 0.00021687408357701349, "loss": 2.2249, "step": 3640 }, { "epoch": 1.12, "learning_rate": 0.00021683845451777624, "loss": 5.0556, "step": 3641 }, { "epoch": 1.12, "learning_rate": 0.00021680282545853902, "loss": 4.9701, "step": 3642 }, { "epoch": 1.12, "learning_rate": 0.00021676719639930178, "loss": 4.3252, "step": 3643 }, { "epoch": 1.12, "learning_rate": 0.00021673156734006453, "loss": 4.265, "step": 3644 }, { "epoch": 1.12, "learning_rate": 0.00021669593828082734, "loss": 4.347, "step": 3645 }, { "epoch": 1.13, "learning_rate": 0.0002166603092215901, "loss": 4.2215, "step": 3646 }, { "epoch": 1.13, "learning_rate": 0.00021662468016235288, "loss": 4.1503, "step": 3647 }, { "epoch": 1.13, "learning_rate": 0.00021658905110311563, "loss": 4.1576, "step": 3648 }, { "epoch": 1.13, "learning_rate": 0.0002165534220438784, "loss": 4.1181, "step": 3649 }, { "epoch": 1.13, "learning_rate": 0.0002165177929846412, "loss": 3.8666, "step": 3650 }, { "epoch": 1.13, "learning_rate": 0.00021648216392540395, "loss": 4.0027, "step": 3651 }, { "epoch": 1.13, "learning_rate": 0.0002164465348661667, "loss": 3.9986, "step": 3652 }, { "epoch": 1.13, "learning_rate": 0.00021641090580692951, "loss": 3.8255, "step": 3653 }, { "epoch": 1.13, "learning_rate": 0.00021637527674769227, "loss": 3.8742, "step": 3654 }, { "epoch": 1.13, "learning_rate": 0.00021633964768845505, "loss": 3.7998, "step": 3655 }, { "epoch": 1.13, "learning_rate": 0.0002163040186292178, "loss": 3.9956, "step": 3656 }, { "epoch": 1.13, "learning_rate": 0.00021626838956998056, "loss": 3.7248, "step": 3657 }, { "epoch": 1.13, "learning_rate": 0.00021623276051074337, "loss": 3.7784, "step": 3658 }, { "epoch": 1.13, "learning_rate": 0.00021619713145150612, "loss": 3.9118, "step": 3659 }, { "epoch": 1.13, "learning_rate": 0.00021616150239226888, "loss": 3.6909, "step": 3660 }, { "epoch": 1.13, "learning_rate": 0.00021612587333303166, "loss": 3.6122, "step": 3661 }, { "epoch": 1.13, "learning_rate": 0.00021609024427379442, "loss": 3.4199, "step": 3662 }, { "epoch": 1.13, "learning_rate": 0.00021605461521455722, "loss": 3.8526, "step": 3663 }, { "epoch": 1.13, "learning_rate": 0.00021601898615531998, "loss": 3.5979, "step": 3664 }, { "epoch": 1.13, "learning_rate": 0.00021598335709608273, "loss": 3.5794, "step": 3665 }, { "epoch": 1.13, "learning_rate": 0.00021594772803684552, "loss": 3.5851, "step": 3666 }, { "epoch": 1.13, "learning_rate": 0.00021591209897760827, "loss": 3.5038, "step": 3667 }, { "epoch": 1.13, "learning_rate": 0.00021587646991837108, "loss": 3.7951, "step": 3668 }, { "epoch": 1.13, "learning_rate": 0.00021584084085913383, "loss": 3.3288, "step": 3669 }, { "epoch": 1.13, "learning_rate": 0.0002158052117998966, "loss": 3.475, "step": 3670 }, { "epoch": 1.13, "learning_rate": 0.00021576958274065937, "loss": 3.0766, "step": 3671 }, { "epoch": 1.13, "learning_rate": 0.00021573395368142213, "loss": 3.7131, "step": 3672 }, { "epoch": 1.13, "learning_rate": 0.0002156983246221849, "loss": 3.503, "step": 3673 }, { "epoch": 1.13, "learning_rate": 0.0002156626955629477, "loss": 3.4958, "step": 3674 }, { "epoch": 1.13, "learning_rate": 0.00021562706650371044, "loss": 3.3921, "step": 3675 }, { "epoch": 1.13, "learning_rate": 0.00021559143744447323, "loss": 3.2112, "step": 3676 }, { "epoch": 1.13, "learning_rate": 0.000215555808385236, "loss": 2.9011, "step": 3677 }, { "epoch": 1.14, "learning_rate": 0.00021552017932599876, "loss": 3.044, "step": 3678 }, { "epoch": 1.14, "learning_rate": 0.00021548455026676154, "loss": 2.8073, "step": 3679 }, { "epoch": 1.14, "learning_rate": 0.0002154489212075243, "loss": 2.99, "step": 3680 }, { "epoch": 1.14, "learning_rate": 0.0002154132921482871, "loss": 2.8782, "step": 3681 }, { "epoch": 1.14, "learning_rate": 0.00021537766308904986, "loss": 2.7559, "step": 3682 }, { "epoch": 1.14, "learning_rate": 0.00021534203402981262, "loss": 2.8149, "step": 3683 }, { "epoch": 1.14, "learning_rate": 0.0002153064049705754, "loss": 2.7567, "step": 3684 }, { "epoch": 1.14, "learning_rate": 0.00021527077591133815, "loss": 2.542, "step": 3685 }, { "epoch": 1.14, "learning_rate": 0.0002152351468521009, "loss": 2.6179, "step": 3686 }, { "epoch": 1.14, "learning_rate": 0.00021519951779286372, "loss": 2.5535, "step": 3687 }, { "epoch": 1.14, "learning_rate": 0.00021516388873362647, "loss": 2.3951, "step": 3688 }, { "epoch": 1.14, "learning_rate": 0.00021512825967438925, "loss": 2.3171, "step": 3689 }, { "epoch": 1.14, "learning_rate": 0.000215092630615152, "loss": 2.3259, "step": 3690 }, { "epoch": 1.14, "learning_rate": 0.00021505700155591476, "loss": 5.1075, "step": 3691 }, { "epoch": 1.14, "learning_rate": 0.00021502137249667757, "loss": 4.8097, "step": 3692 }, { "epoch": 1.14, "learning_rate": 0.00021498574343744033, "loss": 4.2654, "step": 3693 }, { "epoch": 1.14, "learning_rate": 0.00021495011437820308, "loss": 4.2753, "step": 3694 }, { "epoch": 1.14, "learning_rate": 0.00021491448531896586, "loss": 4.0222, "step": 3695 }, { "epoch": 1.14, "learning_rate": 0.00021487885625972865, "loss": 3.9405, "step": 3696 }, { "epoch": 1.14, "learning_rate": 0.00021484322720049143, "loss": 4.1678, "step": 3697 }, { "epoch": 1.14, "learning_rate": 0.00021480759814125418, "loss": 4.1229, "step": 3698 }, { "epoch": 1.14, "learning_rate": 0.00021477196908201694, "loss": 3.9476, "step": 3699 }, { "epoch": 1.14, "learning_rate": 0.00021473634002277975, "loss": 4.0646, "step": 3700 }, { "epoch": 1.14, "learning_rate": 0.0002147007109635425, "loss": 4.1456, "step": 3701 }, { "epoch": 1.14, "learning_rate": 0.00021466508190430528, "loss": 3.6841, "step": 3702 }, { "epoch": 1.14, "learning_rate": 0.00021462945284506804, "loss": 4.0765, "step": 3703 }, { "epoch": 1.14, "learning_rate": 0.0002145938237858308, "loss": 3.8705, "step": 3704 }, { "epoch": 1.14, "learning_rate": 0.0002145581947265936, "loss": 3.9964, "step": 3705 }, { "epoch": 1.14, "learning_rate": 0.00021452256566735636, "loss": 3.6951, "step": 3706 }, { "epoch": 1.14, "learning_rate": 0.0002144869366081191, "loss": 3.8308, "step": 3707 }, { "epoch": 1.14, "learning_rate": 0.0002144513075488819, "loss": 3.8429, "step": 3708 }, { "epoch": 1.14, "learning_rate": 0.00021441567848964465, "loss": 3.7294, "step": 3709 }, { "epoch": 1.15, "learning_rate": 0.00021438004943040746, "loss": 3.6664, "step": 3710 }, { "epoch": 1.15, "learning_rate": 0.0002143444203711702, "loss": 3.8397, "step": 3711 }, { "epoch": 1.15, "learning_rate": 0.00021430879131193297, "loss": 3.4751, "step": 3712 }, { "epoch": 1.15, "learning_rate": 0.00021427316225269575, "loss": 3.6625, "step": 3713 }, { "epoch": 1.15, "learning_rate": 0.0002142375331934585, "loss": 3.6257, "step": 3714 }, { "epoch": 1.15, "learning_rate": 0.0002142019041342213, "loss": 3.4867, "step": 3715 }, { "epoch": 1.15, "learning_rate": 0.00021416627507498407, "loss": 3.5768, "step": 3716 }, { "epoch": 1.15, "learning_rate": 0.00021413064601574682, "loss": 3.5201, "step": 3717 }, { "epoch": 1.15, "learning_rate": 0.0002140950169565096, "loss": 3.3912, "step": 3718 }, { "epoch": 1.15, "learning_rate": 0.00021405938789727236, "loss": 3.4237, "step": 3719 }, { "epoch": 1.15, "learning_rate": 0.00021402375883803514, "loss": 3.277, "step": 3720 }, { "epoch": 1.15, "learning_rate": 0.00021398812977879792, "loss": 3.4923, "step": 3721 }, { "epoch": 1.15, "learning_rate": 0.00021395250071956068, "loss": 3.2683, "step": 3722 }, { "epoch": 1.15, "learning_rate": 0.00021391687166032349, "loss": 3.5315, "step": 3723 }, { "epoch": 1.15, "learning_rate": 0.00021388124260108624, "loss": 3.4321, "step": 3724 }, { "epoch": 1.15, "learning_rate": 0.000213845613541849, "loss": 3.0989, "step": 3725 }, { "epoch": 1.15, "learning_rate": 0.00021380998448261178, "loss": 3.4685, "step": 3726 }, { "epoch": 1.15, "learning_rate": 0.00021377435542337453, "loss": 2.9722, "step": 3727 }, { "epoch": 1.15, "learning_rate": 0.00021373872636413734, "loss": 3.0785, "step": 3728 }, { "epoch": 1.15, "learning_rate": 0.0002137030973049001, "loss": 2.8634, "step": 3729 }, { "epoch": 1.15, "learning_rate": 0.00021366746824566285, "loss": 3.0073, "step": 3730 }, { "epoch": 1.15, "learning_rate": 0.00021363183918642563, "loss": 2.8957, "step": 3731 }, { "epoch": 1.15, "learning_rate": 0.0002135962101271884, "loss": 2.8998, "step": 3732 }, { "epoch": 1.15, "learning_rate": 0.00021356058106795114, "loss": 2.6544, "step": 3733 }, { "epoch": 1.15, "learning_rate": 0.00021352495200871395, "loss": 2.8827, "step": 3734 }, { "epoch": 1.15, "learning_rate": 0.0002134893229494767, "loss": 2.6104, "step": 3735 }, { "epoch": 1.15, "learning_rate": 0.0002134536938902395, "loss": 2.4351, "step": 3736 }, { "epoch": 1.15, "learning_rate": 0.00021341806483100224, "loss": 2.5664, "step": 3737 }, { "epoch": 1.15, "learning_rate": 0.000213382435771765, "loss": 2.3942, "step": 3738 }, { "epoch": 1.15, "learning_rate": 0.0002133468067125278, "loss": 2.3263, "step": 3739 }, { "epoch": 1.15, "learning_rate": 0.00021331117765329056, "loss": 2.1375, "step": 3740 }, { "epoch": 1.15, "learning_rate": 0.00021327554859405332, "loss": 4.9413, "step": 3741 }, { "epoch": 1.15, "learning_rate": 0.0002132399195348161, "loss": 4.6035, "step": 3742 }, { "epoch": 1.16, "learning_rate": 0.00021320429047557888, "loss": 4.5939, "step": 3743 }, { "epoch": 1.16, "learning_rate": 0.00021316866141634166, "loss": 4.3925, "step": 3744 }, { "epoch": 1.16, "learning_rate": 0.00021313303235710442, "loss": 4.4223, "step": 3745 }, { "epoch": 1.16, "learning_rate": 0.00021309740329786717, "loss": 3.9904, "step": 3746 }, { "epoch": 1.16, "learning_rate": 0.00021306177423862998, "loss": 4.1845, "step": 3747 }, { "epoch": 1.16, "learning_rate": 0.00021302614517939273, "loss": 4.065, "step": 3748 }, { "epoch": 1.16, "learning_rate": 0.00021299051612015552, "loss": 4.0944, "step": 3749 }, { "epoch": 1.16, "learning_rate": 0.00021295488706091827, "loss": 3.8699, "step": 3750 }, { "epoch": 1.16, "learning_rate": 0.00021291925800168103, "loss": 3.9307, "step": 3751 }, { "epoch": 1.16, "learning_rate": 0.00021288362894244384, "loss": 3.8036, "step": 3752 }, { "epoch": 1.16, "learning_rate": 0.0002128479998832066, "loss": 3.9143, "step": 3753 }, { "epoch": 1.16, "learning_rate": 0.00021281237082396934, "loss": 3.8288, "step": 3754 }, { "epoch": 1.16, "learning_rate": 0.00021277674176473213, "loss": 4.1583, "step": 3755 }, { "epoch": 1.16, "learning_rate": 0.00021274111270549488, "loss": 3.6806, "step": 3756 }, { "epoch": 1.16, "learning_rate": 0.0002127054836462577, "loss": 3.8103, "step": 3757 }, { "epoch": 1.16, "learning_rate": 0.00021266985458702045, "loss": 3.8431, "step": 3758 }, { "epoch": 1.16, "learning_rate": 0.0002126342255277832, "loss": 3.9103, "step": 3759 }, { "epoch": 1.16, "learning_rate": 0.00021259859646854598, "loss": 3.7451, "step": 3760 }, { "epoch": 1.16, "learning_rate": 0.00021256296740930874, "loss": 3.8477, "step": 3761 }, { "epoch": 1.16, "learning_rate": 0.00021252733835007155, "loss": 3.9205, "step": 3762 }, { "epoch": 1.16, "learning_rate": 0.0002124917092908343, "loss": 3.6388, "step": 3763 }, { "epoch": 1.16, "learning_rate": 0.00021245608023159706, "loss": 3.6408, "step": 3764 }, { "epoch": 1.16, "learning_rate": 0.00021242045117235984, "loss": 3.6903, "step": 3765 }, { "epoch": 1.16, "learning_rate": 0.0002123848221131226, "loss": 3.5758, "step": 3766 }, { "epoch": 1.16, "learning_rate": 0.00021234919305388537, "loss": 3.468, "step": 3767 }, { "epoch": 1.16, "learning_rate": 0.00021231356399464816, "loss": 3.546, "step": 3768 }, { "epoch": 1.16, "learning_rate": 0.0002122779349354109, "loss": 3.443, "step": 3769 }, { "epoch": 1.16, "learning_rate": 0.00021224230587617372, "loss": 3.4192, "step": 3770 }, { "epoch": 1.16, "learning_rate": 0.00021220667681693647, "loss": 3.3478, "step": 3771 }, { "epoch": 1.16, "learning_rate": 0.00021217104775769923, "loss": 3.3836, "step": 3772 }, { "epoch": 1.16, "learning_rate": 0.000212135418698462, "loss": 3.4257, "step": 3773 }, { "epoch": 1.16, "learning_rate": 0.00021209978963922477, "loss": 3.5594, "step": 3774 }, { "epoch": 1.17, "learning_rate": 0.00021206416057998752, "loss": 3.2849, "step": 3775 }, { "epoch": 1.17, "learning_rate": 0.00021202853152075033, "loss": 3.1643, "step": 3776 }, { "epoch": 1.17, "learning_rate": 0.00021199290246151308, "loss": 3.2061, "step": 3777 }, { "epoch": 1.17, "learning_rate": 0.00021195727340227587, "loss": 3.1215, "step": 3778 }, { "epoch": 1.17, "learning_rate": 0.00021192164434303862, "loss": 2.9831, "step": 3779 }, { "epoch": 1.17, "learning_rate": 0.00021188601528380138, "loss": 2.9483, "step": 3780 }, { "epoch": 1.17, "learning_rate": 0.00021185038622456418, "loss": 3.0607, "step": 3781 }, { "epoch": 1.17, "learning_rate": 0.00021181475716532694, "loss": 2.958, "step": 3782 }, { "epoch": 1.17, "learning_rate": 0.00021177912810608972, "loss": 2.9157, "step": 3783 }, { "epoch": 1.17, "learning_rate": 0.00021174349904685248, "loss": 2.597, "step": 3784 }, { "epoch": 1.17, "learning_rate": 0.00021170786998761523, "loss": 2.7457, "step": 3785 }, { "epoch": 1.17, "learning_rate": 0.00021167224092837804, "loss": 2.4981, "step": 3786 }, { "epoch": 1.17, "learning_rate": 0.0002116366118691408, "loss": 2.4557, "step": 3787 }, { "epoch": 1.17, "learning_rate": 0.00021160098280990355, "loss": 2.5203, "step": 3788 }, { "epoch": 1.17, "learning_rate": 0.00021156535375066633, "loss": 2.3898, "step": 3789 }, { "epoch": 1.17, "learning_rate": 0.0002115297246914291, "loss": 2.2686, "step": 3790 }, { "epoch": 1.17, "learning_rate": 0.0002114940956321919, "loss": 4.8405, "step": 3791 }, { "epoch": 1.17, "learning_rate": 0.00021145846657295465, "loss": 4.7778, "step": 3792 }, { "epoch": 1.17, "learning_rate": 0.0002114228375137174, "loss": 4.5258, "step": 3793 }, { "epoch": 1.17, "learning_rate": 0.0002113872084544802, "loss": 4.1723, "step": 3794 }, { "epoch": 1.17, "learning_rate": 0.00021135157939524297, "loss": 4.0683, "step": 3795 }, { "epoch": 1.17, "learning_rate": 0.00021131595033600575, "loss": 4.0587, "step": 3796 }, { "epoch": 1.17, "learning_rate": 0.0002112803212767685, "loss": 3.9326, "step": 3797 }, { "epoch": 1.17, "learning_rate": 0.00021124469221753126, "loss": 3.9666, "step": 3798 }, { "epoch": 1.17, "learning_rate": 0.00021120906315829407, "loss": 3.7685, "step": 3799 }, { "epoch": 1.17, "learning_rate": 0.00021117343409905682, "loss": 3.6005, "step": 3800 }, { "epoch": 1.17, "learning_rate": 0.00021113780503981958, "loss": 4.0159, "step": 3801 }, { "epoch": 1.17, "learning_rate": 0.00021110217598058236, "loss": 3.7804, "step": 3802 }, { "epoch": 1.17, "learning_rate": 0.00021106654692134511, "loss": 3.6372, "step": 3803 }, { "epoch": 1.17, "learning_rate": 0.00021103091786210792, "loss": 3.793, "step": 3804 }, { "epoch": 1.17, "learning_rate": 0.00021099528880287068, "loss": 3.9444, "step": 3805 }, { "epoch": 1.17, "learning_rate": 0.00021095965974363343, "loss": 3.8123, "step": 3806 }, { "epoch": 1.17, "learning_rate": 0.00021092403068439622, "loss": 3.4506, "step": 3807 }, { "epoch": 1.18, "learning_rate": 0.00021088840162515897, "loss": 3.643, "step": 3808 }, { "epoch": 1.18, "learning_rate": 0.00021085277256592172, "loss": 3.816, "step": 3809 }, { "epoch": 1.18, "learning_rate": 0.00021081714350668453, "loss": 3.7531, "step": 3810 }, { "epoch": 1.18, "learning_rate": 0.0002107815144474473, "loss": 3.67, "step": 3811 }, { "epoch": 1.18, "learning_rate": 0.00021074588538821007, "loss": 3.422, "step": 3812 }, { "epoch": 1.18, "learning_rate": 0.00021071025632897285, "loss": 3.6633, "step": 3813 }, { "epoch": 1.18, "learning_rate": 0.0002106746272697356, "loss": 3.4557, "step": 3814 }, { "epoch": 1.18, "learning_rate": 0.0002106389982104984, "loss": 3.6747, "step": 3815 }, { "epoch": 1.18, "learning_rate": 0.00021060336915126114, "loss": 3.3116, "step": 3816 }, { "epoch": 1.18, "learning_rate": 0.00021056774009202395, "loss": 3.5444, "step": 3817 }, { "epoch": 1.18, "learning_rate": 0.0002105321110327867, "loss": 3.4997, "step": 3818 }, { "epoch": 1.18, "learning_rate": 0.00021049648197354946, "loss": 3.6271, "step": 3819 }, { "epoch": 1.18, "learning_rate": 0.00021046085291431224, "loss": 3.3925, "step": 3820 }, { "epoch": 1.18, "learning_rate": 0.000210425223855075, "loss": 3.476, "step": 3821 }, { "epoch": 1.18, "learning_rate": 0.00021038959479583775, "loss": 3.3939, "step": 3822 }, { "epoch": 1.18, "learning_rate": 0.00021035396573660056, "loss": 3.3488, "step": 3823 }, { "epoch": 1.18, "learning_rate": 0.00021031833667736332, "loss": 3.1716, "step": 3824 }, { "epoch": 1.18, "learning_rate": 0.0002102827076181261, "loss": 3.1065, "step": 3825 }, { "epoch": 1.18, "learning_rate": 0.00021024707855888885, "loss": 3.0623, "step": 3826 }, { "epoch": 1.18, "learning_rate": 0.0002102114494996516, "loss": 3.1655, "step": 3827 }, { "epoch": 1.18, "learning_rate": 0.00021017582044041442, "loss": 2.9909, "step": 3828 }, { "epoch": 1.18, "learning_rate": 0.00021014019138117717, "loss": 2.9355, "step": 3829 }, { "epoch": 1.18, "learning_rate": 0.00021010456232193995, "loss": 3.0908, "step": 3830 }, { "epoch": 1.18, "learning_rate": 0.0002100689332627027, "loss": 2.9964, "step": 3831 }, { "epoch": 1.18, "learning_rate": 0.00021003330420346546, "loss": 3.048, "step": 3832 }, { "epoch": 1.18, "learning_rate": 0.00020999767514422827, "loss": 2.7156, "step": 3833 }, { "epoch": 1.18, "learning_rate": 0.00020996204608499103, "loss": 2.7042, "step": 3834 }, { "epoch": 1.18, "learning_rate": 0.00020992641702575378, "loss": 2.7597, "step": 3835 }, { "epoch": 1.18, "learning_rate": 0.00020989078796651656, "loss": 2.4959, "step": 3836 }, { "epoch": 1.18, "learning_rate": 0.00020985515890727935, "loss": 2.572, "step": 3837 }, { "epoch": 1.18, "learning_rate": 0.00020981952984804213, "loss": 2.3327, "step": 3838 }, { "epoch": 1.18, "learning_rate": 0.00020978390078880488, "loss": 2.3436, "step": 3839 }, { "epoch": 1.19, "learning_rate": 0.00020974827172956764, "loss": 2.111, "step": 3840 }, { "epoch": 1.19, "learning_rate": 0.00020971264267033045, "loss": 4.9518, "step": 3841 }, { "epoch": 1.19, "learning_rate": 0.0002096770136110932, "loss": 4.7616, "step": 3842 }, { "epoch": 1.19, "learning_rate": 0.00020964138455185598, "loss": 4.2814, "step": 3843 }, { "epoch": 1.19, "learning_rate": 0.00020960575549261874, "loss": 4.5324, "step": 3844 }, { "epoch": 1.19, "learning_rate": 0.0002095701264333815, "loss": 4.1269, "step": 3845 }, { "epoch": 1.19, "learning_rate": 0.0002095344973741443, "loss": 4.0965, "step": 3846 }, { "epoch": 1.19, "learning_rate": 0.00020949886831490706, "loss": 4.0684, "step": 3847 }, { "epoch": 1.19, "learning_rate": 0.0002094632392556698, "loss": 4.1811, "step": 3848 }, { "epoch": 1.19, "learning_rate": 0.0002094276101964326, "loss": 3.8931, "step": 3849 }, { "epoch": 1.19, "learning_rate": 0.00020939198113719535, "loss": 3.8323, "step": 3850 }, { "epoch": 1.19, "learning_rate": 0.00020935635207795816, "loss": 4.1512, "step": 3851 }, { "epoch": 1.19, "learning_rate": 0.0002093207230187209, "loss": 3.9295, "step": 3852 }, { "epoch": 1.19, "learning_rate": 0.00020928509395948367, "loss": 3.6747, "step": 3853 }, { "epoch": 1.19, "learning_rate": 0.00020924946490024645, "loss": 3.6895, "step": 3854 }, { "epoch": 1.19, "learning_rate": 0.0002092138358410092, "loss": 3.9206, "step": 3855 }, { "epoch": 1.19, "learning_rate": 0.00020917820678177196, "loss": 3.9048, "step": 3856 }, { "epoch": 1.19, "learning_rate": 0.00020914257772253477, "loss": 3.7613, "step": 3857 }, { "epoch": 1.19, "learning_rate": 0.00020910694866329752, "loss": 3.7803, "step": 3858 }, { "epoch": 1.19, "learning_rate": 0.0002090713196040603, "loss": 3.5204, "step": 3859 }, { "epoch": 1.19, "learning_rate": 0.00020903569054482309, "loss": 3.7355, "step": 3860 }, { "epoch": 1.19, "learning_rate": 0.00020900006148558584, "loss": 3.6747, "step": 3861 }, { "epoch": 1.19, "learning_rate": 0.00020896443242634862, "loss": 3.4319, "step": 3862 }, { "epoch": 1.19, "learning_rate": 0.00020892880336711138, "loss": 3.523, "step": 3863 }, { "epoch": 1.19, "learning_rate": 0.00020889317430787419, "loss": 3.4138, "step": 3864 }, { "epoch": 1.19, "learning_rate": 0.00020885754524863694, "loss": 3.5175, "step": 3865 }, { "epoch": 1.19, "learning_rate": 0.0002088219161893997, "loss": 3.5736, "step": 3866 }, { "epoch": 1.19, "learning_rate": 0.00020878628713016248, "loss": 3.2976, "step": 3867 }, { "epoch": 1.19, "learning_rate": 0.00020875065807092523, "loss": 3.6764, "step": 3868 }, { "epoch": 1.19, "learning_rate": 0.000208715029011688, "loss": 3.5368, "step": 3869 }, { "epoch": 1.19, "learning_rate": 0.0002086793999524508, "loss": 3.3701, "step": 3870 }, { "epoch": 1.19, "learning_rate": 0.00020864377089321355, "loss": 3.4867, "step": 3871 }, { "epoch": 1.2, "learning_rate": 0.00020860814183397633, "loss": 3.3768, "step": 3872 }, { "epoch": 1.2, "learning_rate": 0.0002085725127747391, "loss": 3.3416, "step": 3873 }, { "epoch": 1.2, "learning_rate": 0.00020853688371550184, "loss": 3.4187, "step": 3874 }, { "epoch": 1.2, "learning_rate": 0.00020850125465626465, "loss": 3.244, "step": 3875 }, { "epoch": 1.2, "learning_rate": 0.0002084656255970274, "loss": 3.2067, "step": 3876 }, { "epoch": 1.2, "learning_rate": 0.0002084299965377902, "loss": 3.2976, "step": 3877 }, { "epoch": 1.2, "learning_rate": 0.00020839436747855294, "loss": 3.1766, "step": 3878 }, { "epoch": 1.2, "learning_rate": 0.0002083587384193157, "loss": 3.2158, "step": 3879 }, { "epoch": 1.2, "learning_rate": 0.0002083231093600785, "loss": 2.9337, "step": 3880 }, { "epoch": 1.2, "learning_rate": 0.00020828748030084126, "loss": 2.9373, "step": 3881 }, { "epoch": 1.2, "learning_rate": 0.00020825185124160402, "loss": 2.8662, "step": 3882 }, { "epoch": 1.2, "learning_rate": 0.0002082162221823668, "loss": 2.8634, "step": 3883 }, { "epoch": 1.2, "learning_rate": 0.00020818059312312958, "loss": 2.753, "step": 3884 }, { "epoch": 1.2, "learning_rate": 0.00020814496406389236, "loss": 2.5606, "step": 3885 }, { "epoch": 1.2, "learning_rate": 0.00020810933500465512, "loss": 2.5708, "step": 3886 }, { "epoch": 1.2, "learning_rate": 0.00020807370594541787, "loss": 2.4448, "step": 3887 }, { "epoch": 1.2, "learning_rate": 0.00020803807688618068, "loss": 2.3447, "step": 3888 }, { "epoch": 1.2, "learning_rate": 0.00020800244782694343, "loss": 2.1252, "step": 3889 }, { "epoch": 1.2, "learning_rate": 0.0002079668187677062, "loss": 2.0913, "step": 3890 }, { "epoch": 1.2, "learning_rate": 0.00020793118970846897, "loss": 4.6698, "step": 3891 }, { "epoch": 1.2, "learning_rate": 0.00020789556064923173, "loss": 4.6599, "step": 3892 }, { "epoch": 1.2, "learning_rate": 0.00020785993158999453, "loss": 4.2264, "step": 3893 }, { "epoch": 1.2, "learning_rate": 0.0002078243025307573, "loss": 4.3761, "step": 3894 }, { "epoch": 1.2, "learning_rate": 0.00020778867347152004, "loss": 4.0745, "step": 3895 }, { "epoch": 1.2, "learning_rate": 0.00020775304441228283, "loss": 3.9686, "step": 3896 }, { "epoch": 1.2, "learning_rate": 0.00020771741535304558, "loss": 3.773, "step": 3897 }, { "epoch": 1.2, "learning_rate": 0.0002076817862938084, "loss": 3.8665, "step": 3898 }, { "epoch": 1.2, "learning_rate": 0.00020764615723457114, "loss": 3.7586, "step": 3899 }, { "epoch": 1.2, "learning_rate": 0.0002076105281753339, "loss": 3.9404, "step": 3900 }, { "epoch": 1.2, "learning_rate": 0.00020757489911609668, "loss": 4.0543, "step": 3901 }, { "epoch": 1.2, "learning_rate": 0.00020753927005685944, "loss": 4.041, "step": 3902 }, { "epoch": 1.2, "learning_rate": 0.00020750364099762222, "loss": 3.5808, "step": 3903 }, { "epoch": 1.2, "learning_rate": 0.000207468011938385, "loss": 3.5349, "step": 3904 }, { "epoch": 1.21, "learning_rate": 0.00020743238287914775, "loss": 3.6957, "step": 3905 }, { "epoch": 1.21, "learning_rate": 0.00020739675381991054, "loss": 3.6008, "step": 3906 }, { "epoch": 1.21, "learning_rate": 0.00020736112476067332, "loss": 3.7735, "step": 3907 }, { "epoch": 1.21, "learning_rate": 0.00020732549570143607, "loss": 3.5541, "step": 3908 }, { "epoch": 1.21, "learning_rate": 0.00020728986664219885, "loss": 3.6948, "step": 3909 }, { "epoch": 1.21, "learning_rate": 0.0002072542375829616, "loss": 3.6745, "step": 3910 }, { "epoch": 1.21, "learning_rate": 0.00020721860852372442, "loss": 3.5198, "step": 3911 }, { "epoch": 1.21, "learning_rate": 0.00020718297946448717, "loss": 3.6279, "step": 3912 }, { "epoch": 1.21, "learning_rate": 0.00020714735040524993, "loss": 3.4345, "step": 3913 }, { "epoch": 1.21, "learning_rate": 0.0002071117213460127, "loss": 3.4261, "step": 3914 }, { "epoch": 1.21, "learning_rate": 0.00020707609228677546, "loss": 3.5164, "step": 3915 }, { "epoch": 1.21, "learning_rate": 0.00020704046322753822, "loss": 3.5485, "step": 3916 }, { "epoch": 1.21, "learning_rate": 0.00020700483416830103, "loss": 3.7133, "step": 3917 }, { "epoch": 1.21, "learning_rate": 0.00020696920510906378, "loss": 3.5885, "step": 3918 }, { "epoch": 1.21, "learning_rate": 0.00020693357604982657, "loss": 3.3758, "step": 3919 }, { "epoch": 1.21, "learning_rate": 0.00020689794699058932, "loss": 3.2216, "step": 3920 }, { "epoch": 1.21, "learning_rate": 0.00020686231793135207, "loss": 3.4085, "step": 3921 }, { "epoch": 1.21, "learning_rate": 0.00020682668887211488, "loss": 3.4565, "step": 3922 }, { "epoch": 1.21, "learning_rate": 0.00020679105981287764, "loss": 3.0165, "step": 3923 }, { "epoch": 1.21, "learning_rate": 0.00020675543075364042, "loss": 3.3217, "step": 3924 }, { "epoch": 1.21, "learning_rate": 0.00020671980169440318, "loss": 3.1182, "step": 3925 }, { "epoch": 1.21, "learning_rate": 0.00020668417263516593, "loss": 3.234, "step": 3926 }, { "epoch": 1.21, "learning_rate": 0.00020664854357592874, "loss": 2.9308, "step": 3927 }, { "epoch": 1.21, "learning_rate": 0.0002066129145166915, "loss": 3.0146, "step": 3928 }, { "epoch": 1.21, "learning_rate": 0.00020657728545745425, "loss": 3.138, "step": 3929 }, { "epoch": 1.21, "learning_rate": 0.00020654165639821703, "loss": 2.8432, "step": 3930 }, { "epoch": 1.21, "learning_rate": 0.0002065060273389798, "loss": 2.9042, "step": 3931 }, { "epoch": 1.21, "learning_rate": 0.0002064703982797426, "loss": 2.7021, "step": 3932 }, { "epoch": 1.21, "learning_rate": 0.00020643476922050535, "loss": 2.7552, "step": 3933 }, { "epoch": 1.21, "learning_rate": 0.0002063991401612681, "loss": 2.5438, "step": 3934 }, { "epoch": 1.21, "learning_rate": 0.0002063635111020309, "loss": 2.4752, "step": 3935 }, { "epoch": 1.21, "learning_rate": 0.00020632788204279367, "loss": 2.5751, "step": 3936 }, { "epoch": 1.22, "learning_rate": 0.00020629225298355642, "loss": 2.3778, "step": 3937 }, { "epoch": 1.22, "learning_rate": 0.0002062566239243192, "loss": 2.3704, "step": 3938 }, { "epoch": 1.22, "learning_rate": 0.00020622099486508196, "loss": 2.1014, "step": 3939 }, { "epoch": 1.22, "learning_rate": 0.00020618536580584477, "loss": 2.3672, "step": 3940 }, { "epoch": 1.22, "learning_rate": 0.00020614973674660752, "loss": 4.9756, "step": 3941 }, { "epoch": 1.22, "learning_rate": 0.00020611410768737028, "loss": 4.7258, "step": 3942 }, { "epoch": 1.22, "learning_rate": 0.00020607847862813306, "loss": 4.5277, "step": 3943 }, { "epoch": 1.22, "learning_rate": 0.00020604284956889581, "loss": 4.2526, "step": 3944 }, { "epoch": 1.22, "learning_rate": 0.00020600722050965862, "loss": 4.0235, "step": 3945 }, { "epoch": 1.22, "learning_rate": 0.00020597159145042138, "loss": 4.1879, "step": 3946 }, { "epoch": 1.22, "learning_rate": 0.00020593596239118413, "loss": 3.9848, "step": 3947 }, { "epoch": 1.22, "learning_rate": 0.00020590033333194691, "loss": 3.7477, "step": 3948 }, { "epoch": 1.22, "learning_rate": 0.00020586470427270967, "loss": 3.7518, "step": 3949 }, { "epoch": 1.22, "learning_rate": 0.00020582907521347245, "loss": 4.0188, "step": 3950 }, { "epoch": 1.22, "learning_rate": 0.00020579344615423523, "loss": 4.0866, "step": 3951 }, { "epoch": 1.22, "learning_rate": 0.000205757817094998, "loss": 3.7769, "step": 3952 }, { "epoch": 1.22, "learning_rate": 0.00020572218803576077, "loss": 3.893, "step": 3953 }, { "epoch": 1.22, "learning_rate": 0.00020568655897652355, "loss": 3.876, "step": 3954 }, { "epoch": 1.22, "learning_rate": 0.0002056509299172863, "loss": 3.7795, "step": 3955 }, { "epoch": 1.22, "learning_rate": 0.0002056153008580491, "loss": 3.726, "step": 3956 }, { "epoch": 1.22, "learning_rate": 0.00020557967179881184, "loss": 3.7535, "step": 3957 }, { "epoch": 1.22, "learning_rate": 0.00020554404273957465, "loss": 3.7489, "step": 3958 }, { "epoch": 1.22, "learning_rate": 0.0002055084136803374, "loss": 3.7134, "step": 3959 }, { "epoch": 1.22, "learning_rate": 0.00020547278462110016, "loss": 3.4635, "step": 3960 }, { "epoch": 1.22, "learning_rate": 0.00020543715556186294, "loss": 3.6018, "step": 3961 }, { "epoch": 1.22, "learning_rate": 0.0002054015265026257, "loss": 3.4967, "step": 3962 }, { "epoch": 1.22, "learning_rate": 0.00020536589744338845, "loss": 3.642, "step": 3963 }, { "epoch": 1.22, "learning_rate": 0.00020533026838415126, "loss": 3.5785, "step": 3964 }, { "epoch": 1.22, "learning_rate": 0.00020529463932491402, "loss": 3.4768, "step": 3965 }, { "epoch": 1.22, "learning_rate": 0.0002052590102656768, "loss": 3.3572, "step": 3966 }, { "epoch": 1.22, "learning_rate": 0.00020522338120643955, "loss": 3.5165, "step": 3967 }, { "epoch": 1.22, "learning_rate": 0.0002051877521472023, "loss": 3.4371, "step": 3968 }, { "epoch": 1.22, "learning_rate": 0.00020515212308796512, "loss": 3.4396, "step": 3969 }, { "epoch": 1.23, "learning_rate": 0.00020511649402872787, "loss": 3.2334, "step": 3970 }, { "epoch": 1.23, "learning_rate": 0.00020508086496949063, "loss": 3.4137, "step": 3971 }, { "epoch": 1.23, "learning_rate": 0.0002050452359102534, "loss": 3.4548, "step": 3972 }, { "epoch": 1.23, "learning_rate": 0.00020500960685101616, "loss": 3.1184, "step": 3973 }, { "epoch": 1.23, "learning_rate": 0.00020497397779177897, "loss": 3.4517, "step": 3974 }, { "epoch": 1.23, "learning_rate": 0.00020493834873254173, "loss": 3.1708, "step": 3975 }, { "epoch": 1.23, "learning_rate": 0.00020490271967330448, "loss": 2.9798, "step": 3976 }, { "epoch": 1.23, "learning_rate": 0.00020486709061406726, "loss": 3.0701, "step": 3977 }, { "epoch": 1.23, "learning_rate": 0.00020483146155483005, "loss": 3.0844, "step": 3978 }, { "epoch": 1.23, "learning_rate": 0.00020479583249559283, "loss": 3.0765, "step": 3979 }, { "epoch": 1.23, "learning_rate": 0.00020476020343635558, "loss": 2.9867, "step": 3980 }, { "epoch": 1.23, "learning_rate": 0.00020472457437711834, "loss": 3.0396, "step": 3981 }, { "epoch": 1.23, "learning_rate": 0.00020468894531788115, "loss": 2.9261, "step": 3982 }, { "epoch": 1.23, "learning_rate": 0.0002046533162586439, "loss": 2.8977, "step": 3983 }, { "epoch": 1.23, "learning_rate": 0.00020461768719940666, "loss": 2.8621, "step": 3984 }, { "epoch": 1.23, "learning_rate": 0.00020458205814016944, "loss": 2.6248, "step": 3985 }, { "epoch": 1.23, "learning_rate": 0.0002045464290809322, "loss": 2.5816, "step": 3986 }, { "epoch": 1.23, "learning_rate": 0.000204510800021695, "loss": 2.5025, "step": 3987 }, { "epoch": 1.23, "learning_rate": 0.00020447517096245776, "loss": 2.2911, "step": 3988 }, { "epoch": 1.23, "learning_rate": 0.0002044395419032205, "loss": 2.2851, "step": 3989 }, { "epoch": 1.23, "learning_rate": 0.0002044039128439833, "loss": 1.9868, "step": 3990 }, { "epoch": 1.23, "learning_rate": 0.00020436828378474605, "loss": 4.8458, "step": 3991 }, { "epoch": 1.23, "learning_rate": 0.00020433265472550886, "loss": 4.4478, "step": 3992 }, { "epoch": 1.23, "learning_rate": 0.0002042970256662716, "loss": 4.4579, "step": 3993 }, { "epoch": 1.23, "learning_rate": 0.00020426139660703437, "loss": 4.2727, "step": 3994 }, { "epoch": 1.23, "learning_rate": 0.00020422576754779715, "loss": 4.3846, "step": 3995 }, { "epoch": 1.23, "learning_rate": 0.0002041901384885599, "loss": 4.1739, "step": 3996 }, { "epoch": 1.23, "learning_rate": 0.00020415450942932268, "loss": 4.2414, "step": 3997 }, { "epoch": 1.23, "learning_rate": 0.00020411888037008547, "loss": 3.9333, "step": 3998 }, { "epoch": 1.23, "learning_rate": 0.00020408325131084822, "loss": 3.6524, "step": 3999 }, { "epoch": 1.23, "learning_rate": 0.000204047622251611, "loss": 3.8403, "step": 4000 }, { "epoch": 1.23, "eval_bleu": 4.614693226090912e-15, "eval_loss": 4.254786491394043, "eval_runtime": 2559.2675, "eval_samples_per_second": 5.767, "eval_steps_per_second": 0.721, "step": 4000 }, { "epoch": 1.23, "learning_rate": 0.00020401199319237378, "loss": 3.9412, "step": 4001 }, { "epoch": 1.24, "learning_rate": 0.00020397636413313654, "loss": 3.9411, "step": 4002 }, { "epoch": 1.24, "learning_rate": 0.00020394073507389932, "loss": 3.9369, "step": 4003 }, { "epoch": 1.24, "learning_rate": 0.00020390510601466208, "loss": 3.7577, "step": 4004 }, { "epoch": 1.24, "learning_rate": 0.00020386947695542483, "loss": 3.7133, "step": 4005 }, { "epoch": 1.24, "learning_rate": 0.00020383384789618764, "loss": 3.5817, "step": 4006 }, { "epoch": 1.24, "learning_rate": 0.0002037982188369504, "loss": 3.7704, "step": 4007 }, { "epoch": 1.24, "learning_rate": 0.00020376258977771318, "loss": 3.6969, "step": 4008 }, { "epoch": 1.24, "learning_rate": 0.00020372696071847593, "loss": 3.5987, "step": 4009 }, { "epoch": 1.24, "learning_rate": 0.00020369133165923869, "loss": 3.5088, "step": 4010 }, { "epoch": 1.24, "learning_rate": 0.0002036557026000015, "loss": 3.5038, "step": 4011 }, { "epoch": 1.24, "learning_rate": 0.00020362007354076425, "loss": 3.608, "step": 4012 }, { "epoch": 1.24, "learning_rate": 0.00020358444448152703, "loss": 3.6325, "step": 4013 }, { "epoch": 1.24, "learning_rate": 0.00020354881542228979, "loss": 3.4694, "step": 4014 }, { "epoch": 1.24, "learning_rate": 0.00020351318636305254, "loss": 3.8725, "step": 4015 }, { "epoch": 1.24, "learning_rate": 0.00020347755730381535, "loss": 3.5531, "step": 4016 }, { "epoch": 1.24, "learning_rate": 0.0002034419282445781, "loss": 3.416, "step": 4017 }, { "epoch": 1.24, "learning_rate": 0.00020340629918534086, "loss": 3.6539, "step": 4018 }, { "epoch": 1.24, "learning_rate": 0.00020337067012610364, "loss": 3.5366, "step": 4019 }, { "epoch": 1.24, "learning_rate": 0.0002033350410668664, "loss": 3.3454, "step": 4020 }, { "epoch": 1.24, "learning_rate": 0.0002032994120076292, "loss": 3.2973, "step": 4021 }, { "epoch": 1.24, "learning_rate": 0.00020326378294839196, "loss": 3.2673, "step": 4022 }, { "epoch": 1.24, "learning_rate": 0.00020322815388915471, "loss": 3.5005, "step": 4023 }, { "epoch": 1.24, "learning_rate": 0.0002031925248299175, "loss": 3.1922, "step": 4024 }, { "epoch": 1.24, "learning_rate": 0.00020315689577068028, "loss": 3.2475, "step": 4025 }, { "epoch": 1.24, "learning_rate": 0.00020312126671144306, "loss": 2.9176, "step": 4026 }, { "epoch": 1.24, "learning_rate": 0.00020308563765220582, "loss": 3.1856, "step": 4027 }, { "epoch": 1.24, "learning_rate": 0.00020305000859296857, "loss": 2.9013, "step": 4028 }, { "epoch": 1.24, "learning_rate": 0.00020301437953373138, "loss": 2.9738, "step": 4029 }, { "epoch": 1.24, "learning_rate": 0.00020297875047449413, "loss": 3.222, "step": 4030 }, { "epoch": 1.24, "learning_rate": 0.0002029431214152569, "loss": 2.8784, "step": 4031 }, { "epoch": 1.24, "learning_rate": 0.00020290749235601967, "loss": 2.9746, "step": 4032 }, { "epoch": 1.24, "learning_rate": 0.00020287186329678243, "loss": 2.8643, "step": 4033 }, { "epoch": 1.25, "learning_rate": 0.00020283623423754523, "loss": 2.512, "step": 4034 }, { "epoch": 1.25, "learning_rate": 0.000202800605178308, "loss": 2.6302, "step": 4035 }, { "epoch": 1.25, "learning_rate": 0.00020276497611907074, "loss": 2.5145, "step": 4036 }, { "epoch": 1.25, "learning_rate": 0.00020272934705983353, "loss": 2.3529, "step": 4037 }, { "epoch": 1.25, "learning_rate": 0.00020269371800059628, "loss": 2.5676, "step": 4038 }, { "epoch": 1.25, "learning_rate": 0.0002026580889413591, "loss": 2.267, "step": 4039 }, { "epoch": 1.25, "learning_rate": 0.00020262245988212184, "loss": 2.1623, "step": 4040 }, { "epoch": 1.25, "learning_rate": 0.0002025868308228846, "loss": 5.1702, "step": 4041 }, { "epoch": 1.25, "learning_rate": 0.00020255120176364738, "loss": 4.637, "step": 4042 }, { "epoch": 1.25, "learning_rate": 0.00020251557270441014, "loss": 4.1834, "step": 4043 }, { "epoch": 1.25, "learning_rate": 0.00020247994364517292, "loss": 4.2682, "step": 4044 }, { "epoch": 1.25, "learning_rate": 0.0002024443145859357, "loss": 3.768, "step": 4045 }, { "epoch": 1.25, "learning_rate": 0.00020240868552669845, "loss": 4.3185, "step": 4046 }, { "epoch": 1.25, "learning_rate": 0.00020237305646746124, "loss": 4.0905, "step": 4047 }, { "epoch": 1.25, "learning_rate": 0.00020233742740822402, "loss": 3.8557, "step": 4048 }, { "epoch": 1.25, "learning_rate": 0.00020230179834898677, "loss": 3.7776, "step": 4049 }, { "epoch": 1.25, "learning_rate": 0.00020226616928974955, "loss": 3.9932, "step": 4050 }, { "epoch": 1.25, "learning_rate": 0.0002022305402305123, "loss": 3.7661, "step": 4051 }, { "epoch": 1.25, "learning_rate": 0.00020219491117127506, "loss": 3.6129, "step": 4052 }, { "epoch": 1.25, "learning_rate": 0.00020215928211203787, "loss": 3.7883, "step": 4053 }, { "epoch": 1.25, "learning_rate": 0.00020212365305280063, "loss": 3.5686, "step": 4054 }, { "epoch": 1.25, "learning_rate": 0.0002020880239935634, "loss": 3.8698, "step": 4055 }, { "epoch": 1.25, "learning_rate": 0.00020205239493432616, "loss": 3.9632, "step": 4056 }, { "epoch": 1.25, "learning_rate": 0.00020201676587508892, "loss": 3.5946, "step": 4057 }, { "epoch": 1.25, "learning_rate": 0.00020198113681585173, "loss": 3.7324, "step": 4058 }, { "epoch": 1.25, "learning_rate": 0.00020194550775661448, "loss": 3.6483, "step": 4059 }, { "epoch": 1.25, "learning_rate": 0.00020190987869737726, "loss": 3.592, "step": 4060 }, { "epoch": 1.25, "learning_rate": 0.00020187424963814002, "loss": 3.8618, "step": 4061 }, { "epoch": 1.25, "learning_rate": 0.00020183862057890277, "loss": 3.5915, "step": 4062 }, { "epoch": 1.25, "learning_rate": 0.00020180299151966558, "loss": 3.5862, "step": 4063 }, { "epoch": 1.25, "learning_rate": 0.00020176736246042834, "loss": 3.5308, "step": 4064 }, { "epoch": 1.25, "learning_rate": 0.0002017317334011911, "loss": 3.5936, "step": 4065 }, { "epoch": 1.25, "learning_rate": 0.00020169610434195387, "loss": 3.6883, "step": 4066 }, { "epoch": 1.26, "learning_rate": 0.00020166047528271663, "loss": 3.2618, "step": 4067 }, { "epoch": 1.26, "learning_rate": 0.00020162484622347944, "loss": 3.5509, "step": 4068 }, { "epoch": 1.26, "learning_rate": 0.0002015892171642422, "loss": 3.5532, "step": 4069 }, { "epoch": 1.26, "learning_rate": 0.00020155358810500495, "loss": 3.3666, "step": 4070 }, { "epoch": 1.26, "learning_rate": 0.00020151795904576773, "loss": 3.3565, "step": 4071 }, { "epoch": 1.26, "learning_rate": 0.0002014823299865305, "loss": 3.191, "step": 4072 }, { "epoch": 1.26, "learning_rate": 0.0002014467009272933, "loss": 3.2705, "step": 4073 }, { "epoch": 1.26, "learning_rate": 0.00020141107186805605, "loss": 3.1202, "step": 4074 }, { "epoch": 1.26, "learning_rate": 0.0002013754428088188, "loss": 3.2307, "step": 4075 }, { "epoch": 1.26, "learning_rate": 0.0002013398137495816, "loss": 3.0596, "step": 4076 }, { "epoch": 1.26, "learning_rate": 0.00020130418469034437, "loss": 3.2147, "step": 4077 }, { "epoch": 1.26, "learning_rate": 0.00020126855563110712, "loss": 2.9279, "step": 4078 }, { "epoch": 1.26, "learning_rate": 0.0002012329265718699, "loss": 2.9907, "step": 4079 }, { "epoch": 1.26, "learning_rate": 0.00020119729751263266, "loss": 2.9992, "step": 4080 }, { "epoch": 1.26, "learning_rate": 0.00020116166845339547, "loss": 2.8377, "step": 4081 }, { "epoch": 1.26, "learning_rate": 0.00020112603939415822, "loss": 2.9127, "step": 4082 }, { "epoch": 1.26, "learning_rate": 0.00020109041033492098, "loss": 2.8503, "step": 4083 }, { "epoch": 1.26, "learning_rate": 0.00020105478127568376, "loss": 2.6834, "step": 4084 }, { "epoch": 1.26, "learning_rate": 0.0002010191522164465, "loss": 2.6571, "step": 4085 }, { "epoch": 1.26, "learning_rate": 0.00020098352315720927, "loss": 2.4462, "step": 4086 }, { "epoch": 1.26, "learning_rate": 0.00020094789409797208, "loss": 2.3052, "step": 4087 }, { "epoch": 1.26, "learning_rate": 0.00020091226503873483, "loss": 2.2622, "step": 4088 }, { "epoch": 1.26, "learning_rate": 0.00020087663597949761, "loss": 2.2503, "step": 4089 }, { "epoch": 1.26, "learning_rate": 0.00020084100692026037, "loss": 2.109, "step": 4090 }, { "epoch": 1.26, "learning_rate": 0.00020080537786102315, "loss": 4.9508, "step": 4091 }, { "epoch": 1.26, "learning_rate": 0.00020076974880178593, "loss": 4.4312, "step": 4092 }, { "epoch": 1.26, "learning_rate": 0.0002007341197425487, "loss": 4.6276, "step": 4093 }, { "epoch": 1.26, "learning_rate": 0.00020069849068331147, "loss": 4.0516, "step": 4094 }, { "epoch": 1.26, "learning_rate": 0.00020066286162407425, "loss": 4.3796, "step": 4095 }, { "epoch": 1.26, "learning_rate": 0.000200627232564837, "loss": 3.8635, "step": 4096 }, { "epoch": 1.26, "learning_rate": 0.0002005916035055998, "loss": 4.0627, "step": 4097 }, { "epoch": 1.26, "learning_rate": 0.00020055597444636254, "loss": 4.1408, "step": 4098 }, { "epoch": 1.27, "learning_rate": 0.0002005203453871253, "loss": 4.1166, "step": 4099 }, { "epoch": 1.27, "learning_rate": 0.0002004847163278881, "loss": 4.1022, "step": 4100 }, { "epoch": 1.27, "learning_rate": 0.00020044908726865086, "loss": 3.7815, "step": 4101 }, { "epoch": 1.27, "learning_rate": 0.00020041345820941364, "loss": 4.1798, "step": 4102 }, { "epoch": 1.27, "learning_rate": 0.0002003778291501764, "loss": 3.8032, "step": 4103 }, { "epoch": 1.27, "learning_rate": 0.00020034220009093915, "loss": 3.7528, "step": 4104 }, { "epoch": 1.27, "learning_rate": 0.00020030657103170196, "loss": 3.8882, "step": 4105 }, { "epoch": 1.27, "learning_rate": 0.00020027094197246472, "loss": 3.9117, "step": 4106 }, { "epoch": 1.27, "learning_rate": 0.0002002353129132275, "loss": 3.817, "step": 4107 }, { "epoch": 1.27, "learning_rate": 0.00020019968385399025, "loss": 3.6919, "step": 4108 }, { "epoch": 1.27, "learning_rate": 0.000200164054794753, "loss": 3.5369, "step": 4109 }, { "epoch": 1.27, "learning_rate": 0.00020012842573551582, "loss": 3.5369, "step": 4110 }, { "epoch": 1.27, "learning_rate": 0.00020009279667627857, "loss": 3.5223, "step": 4111 }, { "epoch": 1.27, "learning_rate": 0.00020005716761704133, "loss": 3.6681, "step": 4112 }, { "epoch": 1.27, "learning_rate": 0.0002000215385578041, "loss": 3.6064, "step": 4113 }, { "epoch": 1.27, "learning_rate": 0.00019998590949856686, "loss": 3.4679, "step": 4114 }, { "epoch": 1.27, "learning_rate": 0.00019995028043932967, "loss": 3.4889, "step": 4115 }, { "epoch": 1.27, "learning_rate": 0.00019991465138009243, "loss": 3.2571, "step": 4116 }, { "epoch": 1.27, "learning_rate": 0.00019987902232085518, "loss": 3.5201, "step": 4117 }, { "epoch": 1.27, "learning_rate": 0.000199843393261618, "loss": 3.5836, "step": 4118 }, { "epoch": 1.27, "learning_rate": 0.00019980776420238074, "loss": 3.2441, "step": 4119 }, { "epoch": 1.27, "learning_rate": 0.00019977213514314353, "loss": 3.4903, "step": 4120 }, { "epoch": 1.27, "learning_rate": 0.00019973650608390628, "loss": 3.3805, "step": 4121 }, { "epoch": 1.27, "learning_rate": 0.00019970087702466904, "loss": 3.4315, "step": 4122 }, { "epoch": 1.27, "learning_rate": 0.00019966524796543185, "loss": 3.1959, "step": 4123 }, { "epoch": 1.27, "learning_rate": 0.0001996296189061946, "loss": 3.0789, "step": 4124 }, { "epoch": 1.27, "learning_rate": 0.00019959398984695735, "loss": 2.9943, "step": 4125 }, { "epoch": 1.27, "learning_rate": 0.00019955836078772014, "loss": 2.7903, "step": 4126 }, { "epoch": 1.27, "learning_rate": 0.0001995227317284829, "loss": 3.1347, "step": 4127 }, { "epoch": 1.27, "learning_rate": 0.0001994871026692457, "loss": 3.3276, "step": 4128 }, { "epoch": 1.27, "learning_rate": 0.00019945147361000846, "loss": 2.9905, "step": 4129 }, { "epoch": 1.27, "learning_rate": 0.0001994158445507712, "loss": 3.1775, "step": 4130 }, { "epoch": 1.27, "learning_rate": 0.000199380215491534, "loss": 2.668, "step": 4131 }, { "epoch": 1.28, "learning_rate": 0.00019934458643229675, "loss": 2.6774, "step": 4132 }, { "epoch": 1.28, "learning_rate": 0.0001993089573730595, "loss": 2.5979, "step": 4133 }, { "epoch": 1.28, "learning_rate": 0.0001992733283138223, "loss": 2.652, "step": 4134 }, { "epoch": 1.28, "learning_rate": 0.00019923769925458507, "loss": 2.4616, "step": 4135 }, { "epoch": 1.28, "learning_rate": 0.00019920207019534785, "loss": 2.5962, "step": 4136 }, { "epoch": 1.28, "learning_rate": 0.0001991664411361106, "loss": 2.1622, "step": 4137 }, { "epoch": 1.28, "learning_rate": 0.00019913081207687338, "loss": 2.2383, "step": 4138 }, { "epoch": 1.28, "learning_rate": 0.00019909518301763617, "loss": 2.1423, "step": 4139 }, { "epoch": 1.28, "learning_rate": 0.00019905955395839892, "loss": 1.9743, "step": 4140 }, { "epoch": 1.28, "learning_rate": 0.0001990239248991617, "loss": 4.7776, "step": 4141 }, { "epoch": 1.28, "learning_rate": 0.00019898829583992448, "loss": 4.7889, "step": 4142 }, { "epoch": 1.28, "learning_rate": 0.00019895266678068724, "loss": 4.5863, "step": 4143 }, { "epoch": 1.28, "learning_rate": 0.00019891703772145002, "loss": 4.2464, "step": 4144 }, { "epoch": 1.28, "learning_rate": 0.00019888140866221278, "loss": 4.1614, "step": 4145 }, { "epoch": 1.28, "learning_rate": 0.00019884577960297553, "loss": 3.9973, "step": 4146 }, { "epoch": 1.28, "learning_rate": 0.00019881015054373834, "loss": 4.1638, "step": 4147 }, { "epoch": 1.28, "learning_rate": 0.0001987745214845011, "loss": 3.9015, "step": 4148 }, { "epoch": 1.28, "learning_rate": 0.00019873889242526388, "loss": 4.0901, "step": 4149 }, { "epoch": 1.28, "learning_rate": 0.00019870326336602663, "loss": 4.0003, "step": 4150 }, { "epoch": 1.28, "learning_rate": 0.00019866763430678939, "loss": 3.538, "step": 4151 }, { "epoch": 1.28, "learning_rate": 0.0001986320052475522, "loss": 3.9631, "step": 4152 }, { "epoch": 1.28, "learning_rate": 0.00019859637618831495, "loss": 3.7521, "step": 4153 }, { "epoch": 1.28, "learning_rate": 0.00019856074712907773, "loss": 3.935, "step": 4154 }, { "epoch": 1.28, "learning_rate": 0.00019852511806984049, "loss": 4.082, "step": 4155 }, { "epoch": 1.28, "learning_rate": 0.00019848948901060324, "loss": 3.5419, "step": 4156 }, { "epoch": 1.28, "learning_rate": 0.00019845385995136605, "loss": 3.3202, "step": 4157 }, { "epoch": 1.28, "learning_rate": 0.0001984182308921288, "loss": 3.9738, "step": 4158 }, { "epoch": 1.28, "learning_rate": 0.00019838260183289156, "loss": 3.5651, "step": 4159 }, { "epoch": 1.28, "learning_rate": 0.00019834697277365434, "loss": 3.579, "step": 4160 }, { "epoch": 1.28, "learning_rate": 0.0001983113437144171, "loss": 3.7046, "step": 4161 }, { "epoch": 1.28, "learning_rate": 0.0001982757146551799, "loss": 3.6878, "step": 4162 }, { "epoch": 1.28, "learning_rate": 0.00019824008559594266, "loss": 3.4205, "step": 4163 }, { "epoch": 1.29, "learning_rate": 0.00019820445653670541, "loss": 3.6325, "step": 4164 }, { "epoch": 1.29, "learning_rate": 0.00019816882747746822, "loss": 3.6469, "step": 4165 }, { "epoch": 1.29, "learning_rate": 0.00019813319841823098, "loss": 3.6515, "step": 4166 }, { "epoch": 1.29, "learning_rate": 0.00019809756935899373, "loss": 3.518, "step": 4167 }, { "epoch": 1.29, "learning_rate": 0.00019806194029975651, "loss": 3.3475, "step": 4168 }, { "epoch": 1.29, "learning_rate": 0.00019802631124051927, "loss": 3.3057, "step": 4169 }, { "epoch": 1.29, "learning_rate": 0.00019799068218128208, "loss": 3.4445, "step": 4170 }, { "epoch": 1.29, "learning_rate": 0.00019795505312204483, "loss": 3.3243, "step": 4171 }, { "epoch": 1.29, "learning_rate": 0.0001979194240628076, "loss": 3.2831, "step": 4172 }, { "epoch": 1.29, "learning_rate": 0.00019788379500357037, "loss": 3.2324, "step": 4173 }, { "epoch": 1.29, "learning_rate": 0.00019784816594433312, "loss": 3.3951, "step": 4174 }, { "epoch": 1.29, "learning_rate": 0.00019781253688509593, "loss": 3.1669, "step": 4175 }, { "epoch": 1.29, "learning_rate": 0.0001977769078258587, "loss": 3.2009, "step": 4176 }, { "epoch": 1.29, "learning_rate": 0.00019774127876662144, "loss": 3.2058, "step": 4177 }, { "epoch": 1.29, "learning_rate": 0.00019770564970738422, "loss": 3.0035, "step": 4178 }, { "epoch": 1.29, "learning_rate": 0.00019767002064814698, "loss": 3.3009, "step": 4179 }, { "epoch": 1.29, "learning_rate": 0.00019763439158890973, "loss": 2.8675, "step": 4180 }, { "epoch": 1.29, "learning_rate": 0.00019759876252967254, "loss": 2.9386, "step": 4181 }, { "epoch": 1.29, "learning_rate": 0.0001975631334704353, "loss": 3.0342, "step": 4182 }, { "epoch": 1.29, "learning_rate": 0.00019752750441119808, "loss": 2.6496, "step": 4183 }, { "epoch": 1.29, "learning_rate": 0.00019749187535196083, "loss": 2.4077, "step": 4184 }, { "epoch": 1.29, "learning_rate": 0.00019745624629272362, "loss": 2.485, "step": 4185 }, { "epoch": 1.29, "learning_rate": 0.0001974206172334864, "loss": 2.3658, "step": 4186 }, { "epoch": 1.29, "learning_rate": 0.00019738498817424915, "loss": 2.4802, "step": 4187 }, { "epoch": 1.29, "learning_rate": 0.00019734935911501194, "loss": 2.336, "step": 4188 }, { "epoch": 1.29, "learning_rate": 0.00019731373005577472, "loss": 2.2165, "step": 4189 }, { "epoch": 1.29, "learning_rate": 0.00019727810099653747, "loss": 1.8735, "step": 4190 }, { "epoch": 1.29, "learning_rate": 0.00019724247193730025, "loss": 5.1235, "step": 4191 }, { "epoch": 1.29, "learning_rate": 0.000197206842878063, "loss": 4.5345, "step": 4192 }, { "epoch": 1.29, "learning_rate": 0.00019717121381882576, "loss": 4.2376, "step": 4193 }, { "epoch": 1.29, "learning_rate": 0.00019713558475958857, "loss": 4.1777, "step": 4194 }, { "epoch": 1.29, "learning_rate": 0.00019709995570035133, "loss": 4.0507, "step": 4195 }, { "epoch": 1.3, "learning_rate": 0.0001970643266411141, "loss": 3.9894, "step": 4196 }, { "epoch": 1.3, "learning_rate": 0.00019702869758187686, "loss": 3.9485, "step": 4197 }, { "epoch": 1.3, "learning_rate": 0.00019699306852263962, "loss": 4.0695, "step": 4198 }, { "epoch": 1.3, "learning_rate": 0.00019695743946340243, "loss": 3.974, "step": 4199 }, { "epoch": 1.3, "learning_rate": 0.00019692181040416518, "loss": 3.9045, "step": 4200 }, { "epoch": 1.3, "learning_rate": 0.00019688618134492794, "loss": 3.8913, "step": 4201 }, { "epoch": 1.3, "learning_rate": 0.00019685055228569072, "loss": 3.906, "step": 4202 }, { "epoch": 1.3, "learning_rate": 0.00019681492322645347, "loss": 3.8258, "step": 4203 }, { "epoch": 1.3, "learning_rate": 0.00019677929416721628, "loss": 3.7487, "step": 4204 }, { "epoch": 1.3, "learning_rate": 0.00019674366510797904, "loss": 3.8174, "step": 4205 }, { "epoch": 1.3, "learning_rate": 0.0001967080360487418, "loss": 3.6031, "step": 4206 }, { "epoch": 1.3, "learning_rate": 0.00019667240698950457, "loss": 3.8142, "step": 4207 }, { "epoch": 1.3, "learning_rate": 0.00019663677793026736, "loss": 3.56, "step": 4208 }, { "epoch": 1.3, "learning_rate": 0.00019660114887103014, "loss": 3.87, "step": 4209 }, { "epoch": 1.3, "learning_rate": 0.0001965655198117929, "loss": 3.5016, "step": 4210 }, { "epoch": 1.3, "learning_rate": 0.00019652989075255565, "loss": 3.7182, "step": 4211 }, { "epoch": 1.3, "learning_rate": 0.00019649426169331846, "loss": 3.5533, "step": 4212 }, { "epoch": 1.3, "learning_rate": 0.0001964586326340812, "loss": 3.5139, "step": 4213 }, { "epoch": 1.3, "learning_rate": 0.00019642300357484397, "loss": 3.468, "step": 4214 }, { "epoch": 1.3, "learning_rate": 0.00019638737451560675, "loss": 3.2553, "step": 4215 }, { "epoch": 1.3, "learning_rate": 0.0001963517454563695, "loss": 3.5977, "step": 4216 }, { "epoch": 1.3, "learning_rate": 0.0001963161163971323, "loss": 3.6332, "step": 4217 }, { "epoch": 1.3, "learning_rate": 0.00019628048733789507, "loss": 3.2914, "step": 4218 }, { "epoch": 1.3, "learning_rate": 0.00019624485827865782, "loss": 3.2852, "step": 4219 }, { "epoch": 1.3, "learning_rate": 0.0001962092292194206, "loss": 3.4892, "step": 4220 }, { "epoch": 1.3, "learning_rate": 0.00019617360016018336, "loss": 3.3141, "step": 4221 }, { "epoch": 1.3, "learning_rate": 0.00019613797110094617, "loss": 3.2893, "step": 4222 }, { "epoch": 1.3, "learning_rate": 0.00019610234204170892, "loss": 3.0638, "step": 4223 }, { "epoch": 1.3, "learning_rate": 0.00019606671298247168, "loss": 3.3924, "step": 4224 }, { "epoch": 1.3, "learning_rate": 0.00019603108392323446, "loss": 3.0383, "step": 4225 }, { "epoch": 1.3, "learning_rate": 0.0001959954548639972, "loss": 3.3122, "step": 4226 }, { "epoch": 1.3, "learning_rate": 0.00019595982580475997, "loss": 3.2657, "step": 4227 }, { "epoch": 1.3, "learning_rate": 0.00019592419674552278, "loss": 3.1933, "step": 4228 }, { "epoch": 1.31, "learning_rate": 0.00019588856768628553, "loss": 3.0377, "step": 4229 }, { "epoch": 1.31, "learning_rate": 0.0001958529386270483, "loss": 2.7775, "step": 4230 }, { "epoch": 1.31, "learning_rate": 0.00019581730956781107, "loss": 2.8229, "step": 4231 }, { "epoch": 1.31, "learning_rate": 0.00019578168050857385, "loss": 2.9715, "step": 4232 }, { "epoch": 1.31, "learning_rate": 0.00019574605144933663, "loss": 2.8702, "step": 4233 }, { "epoch": 1.31, "learning_rate": 0.0001957104223900994, "loss": 2.8079, "step": 4234 }, { "epoch": 1.31, "learning_rate": 0.00019567479333086217, "loss": 2.5521, "step": 4235 }, { "epoch": 1.31, "learning_rate": 0.00019563916427162495, "loss": 2.5671, "step": 4236 }, { "epoch": 1.31, "learning_rate": 0.0001956035352123877, "loss": 2.264, "step": 4237 }, { "epoch": 1.31, "learning_rate": 0.0001955679061531505, "loss": 2.3838, "step": 4238 }, { "epoch": 1.31, "learning_rate": 0.00019553227709391324, "loss": 2.0693, "step": 4239 }, { "epoch": 1.31, "learning_rate": 0.000195496648034676, "loss": 2.0695, "step": 4240 }, { "epoch": 1.31, "learning_rate": 0.0001954610189754388, "loss": 4.756, "step": 4241 }, { "epoch": 1.31, "learning_rate": 0.00019542538991620156, "loss": 4.3353, "step": 4242 }, { "epoch": 1.31, "learning_rate": 0.00019538976085696434, "loss": 4.3121, "step": 4243 }, { "epoch": 1.31, "learning_rate": 0.0001953541317977271, "loss": 4.0447, "step": 4244 }, { "epoch": 1.31, "learning_rate": 0.00019531850273848985, "loss": 4.1033, "step": 4245 }, { "epoch": 1.31, "learning_rate": 0.00019528287367925266, "loss": 3.9433, "step": 4246 }, { "epoch": 1.31, "learning_rate": 0.00019524724462001542, "loss": 4.0171, "step": 4247 }, { "epoch": 1.31, "learning_rate": 0.00019521161556077817, "loss": 3.6522, "step": 4248 }, { "epoch": 1.31, "learning_rate": 0.00019517598650154095, "loss": 3.839, "step": 4249 }, { "epoch": 1.31, "learning_rate": 0.0001951403574423037, "loss": 4.0754, "step": 4250 }, { "epoch": 1.31, "learning_rate": 0.00019510472838306652, "loss": 3.839, "step": 4251 }, { "epoch": 1.31, "learning_rate": 0.00019506909932382927, "loss": 3.8577, "step": 4252 }, { "epoch": 1.31, "learning_rate": 0.00019503347026459203, "loss": 3.5299, "step": 4253 }, { "epoch": 1.31, "learning_rate": 0.0001949978412053548, "loss": 3.8549, "step": 4254 }, { "epoch": 1.31, "learning_rate": 0.0001949622121461176, "loss": 3.8678, "step": 4255 }, { "epoch": 1.31, "learning_rate": 0.00019492658308688037, "loss": 3.7703, "step": 4256 }, { "epoch": 1.31, "learning_rate": 0.00019489095402764313, "loss": 3.4962, "step": 4257 }, { "epoch": 1.31, "learning_rate": 0.00019485532496840588, "loss": 3.6904, "step": 4258 }, { "epoch": 1.31, "learning_rate": 0.0001948196959091687, "loss": 3.7313, "step": 4259 }, { "epoch": 1.31, "learning_rate": 0.00019478406684993144, "loss": 3.6372, "step": 4260 }, { "epoch": 1.32, "learning_rate": 0.0001947484377906942, "loss": 3.6272, "step": 4261 }, { "epoch": 1.32, "learning_rate": 0.00019471280873145698, "loss": 3.4883, "step": 4262 }, { "epoch": 1.32, "learning_rate": 0.00019467717967221974, "loss": 3.834, "step": 4263 }, { "epoch": 1.32, "learning_rate": 0.00019464155061298254, "loss": 3.5524, "step": 4264 }, { "epoch": 1.32, "learning_rate": 0.0001946059215537453, "loss": 3.5772, "step": 4265 }, { "epoch": 1.32, "learning_rate": 0.00019457029249450805, "loss": 3.5734, "step": 4266 }, { "epoch": 1.32, "learning_rate": 0.00019453466343527084, "loss": 3.5523, "step": 4267 }, { "epoch": 1.32, "learning_rate": 0.0001944990343760336, "loss": 3.397, "step": 4268 }, { "epoch": 1.32, "learning_rate": 0.0001944634053167964, "loss": 3.4044, "step": 4269 }, { "epoch": 1.32, "learning_rate": 0.00019442777625755915, "loss": 3.5637, "step": 4270 }, { "epoch": 1.32, "learning_rate": 0.0001943921471983219, "loss": 3.3532, "step": 4271 }, { "epoch": 1.32, "learning_rate": 0.0001943565181390847, "loss": 3.4568, "step": 4272 }, { "epoch": 1.32, "learning_rate": 0.00019432088907984745, "loss": 3.1275, "step": 4273 }, { "epoch": 1.32, "learning_rate": 0.0001942852600206102, "loss": 3.4894, "step": 4274 }, { "epoch": 1.32, "learning_rate": 0.000194249630961373, "loss": 3.1959, "step": 4275 }, { "epoch": 1.32, "learning_rate": 0.00019421400190213576, "loss": 3.033, "step": 4276 }, { "epoch": 1.32, "learning_rate": 0.00019417837284289855, "loss": 3.1806, "step": 4277 }, { "epoch": 1.32, "learning_rate": 0.0001941427437836613, "loss": 2.8889, "step": 4278 }, { "epoch": 1.32, "learning_rate": 0.00019410711472442408, "loss": 3.0244, "step": 4279 }, { "epoch": 1.32, "learning_rate": 0.00019407148566518686, "loss": 2.7249, "step": 4280 }, { "epoch": 1.32, "learning_rate": 0.00019403585660594962, "loss": 2.9049, "step": 4281 }, { "epoch": 1.32, "learning_rate": 0.00019400022754671237, "loss": 3.1185, "step": 4282 }, { "epoch": 1.32, "learning_rate": 0.00019396459848747518, "loss": 2.83, "step": 4283 }, { "epoch": 1.32, "learning_rate": 0.00019392896942823794, "loss": 2.5706, "step": 4284 }, { "epoch": 1.32, "learning_rate": 0.00019389334036900072, "loss": 2.5303, "step": 4285 }, { "epoch": 1.32, "learning_rate": 0.00019385771130976347, "loss": 2.5937, "step": 4286 }, { "epoch": 1.32, "learning_rate": 0.00019382208225052623, "loss": 2.3114, "step": 4287 }, { "epoch": 1.32, "learning_rate": 0.00019378645319128904, "loss": 2.2517, "step": 4288 }, { "epoch": 1.32, "learning_rate": 0.0001937508241320518, "loss": 2.2919, "step": 4289 }, { "epoch": 1.32, "learning_rate": 0.00019371519507281458, "loss": 2.0103, "step": 4290 }, { "epoch": 1.32, "learning_rate": 0.00019367956601357733, "loss": 4.8257, "step": 4291 }, { "epoch": 1.32, "learning_rate": 0.00019364393695434008, "loss": 4.7632, "step": 4292 }, { "epoch": 1.32, "learning_rate": 0.0001936083078951029, "loss": 4.2425, "step": 4293 }, { "epoch": 1.33, "learning_rate": 0.00019357267883586565, "loss": 4.0317, "step": 4294 }, { "epoch": 1.33, "learning_rate": 0.0001935370497766284, "loss": 3.9432, "step": 4295 }, { "epoch": 1.33, "learning_rate": 0.00019350142071739119, "loss": 3.8889, "step": 4296 }, { "epoch": 1.33, "learning_rate": 0.00019346579165815394, "loss": 4.0436, "step": 4297 }, { "epoch": 1.33, "learning_rate": 0.00019343016259891675, "loss": 3.6109, "step": 4298 }, { "epoch": 1.33, "learning_rate": 0.0001933945335396795, "loss": 3.8944, "step": 4299 }, { "epoch": 1.33, "learning_rate": 0.00019335890448044226, "loss": 3.8379, "step": 4300 }, { "epoch": 1.33, "learning_rate": 0.00019332327542120504, "loss": 3.7243, "step": 4301 }, { "epoch": 1.33, "learning_rate": 0.00019328764636196782, "loss": 3.6167, "step": 4302 }, { "epoch": 1.33, "learning_rate": 0.0001932520173027306, "loss": 3.7305, "step": 4303 }, { "epoch": 1.33, "learning_rate": 0.00019321638824349336, "loss": 3.8325, "step": 4304 }, { "epoch": 1.33, "learning_rate": 0.00019318075918425611, "loss": 3.7357, "step": 4305 }, { "epoch": 1.33, "learning_rate": 0.00019314513012501892, "loss": 3.6141, "step": 4306 }, { "epoch": 1.33, "learning_rate": 0.00019310950106578168, "loss": 3.8405, "step": 4307 }, { "epoch": 1.33, "learning_rate": 0.00019307387200654443, "loss": 3.8167, "step": 4308 }, { "epoch": 1.33, "learning_rate": 0.00019303824294730721, "loss": 3.5087, "step": 4309 }, { "epoch": 1.33, "learning_rate": 0.00019300261388806997, "loss": 3.5746, "step": 4310 }, { "epoch": 1.33, "learning_rate": 0.00019296698482883278, "loss": 3.5271, "step": 4311 }, { "epoch": 1.33, "learning_rate": 0.00019293135576959553, "loss": 3.6927, "step": 4312 }, { "epoch": 1.33, "learning_rate": 0.0001928957267103583, "loss": 3.8038, "step": 4313 }, { "epoch": 1.33, "learning_rate": 0.00019286009765112107, "loss": 3.7544, "step": 4314 }, { "epoch": 1.33, "learning_rate": 0.00019282446859188382, "loss": 3.3028, "step": 4315 }, { "epoch": 1.33, "learning_rate": 0.00019278883953264663, "loss": 3.3647, "step": 4316 }, { "epoch": 1.33, "learning_rate": 0.0001927532104734094, "loss": 3.7141, "step": 4317 }, { "epoch": 1.33, "learning_rate": 0.00019271758141417214, "loss": 3.4735, "step": 4318 }, { "epoch": 1.33, "learning_rate": 0.00019268195235493492, "loss": 3.5988, "step": 4319 }, { "epoch": 1.33, "learning_rate": 0.00019264632329569768, "loss": 3.4519, "step": 4320 }, { "epoch": 1.33, "learning_rate": 0.00019261069423646043, "loss": 3.3051, "step": 4321 }, { "epoch": 1.33, "learning_rate": 0.00019257506517722324, "loss": 3.2725, "step": 4322 }, { "epoch": 1.33, "learning_rate": 0.000192539436117986, "loss": 3.3351, "step": 4323 }, { "epoch": 1.33, "learning_rate": 0.00019250380705874878, "loss": 3.1123, "step": 4324 }, { "epoch": 1.33, "learning_rate": 0.00019246817799951153, "loss": 3.0785, "step": 4325 }, { "epoch": 1.34, "learning_rate": 0.00019243254894027432, "loss": 3.2448, "step": 4326 }, { "epoch": 1.34, "learning_rate": 0.0001923969198810371, "loss": 2.9339, "step": 4327 }, { "epoch": 1.34, "learning_rate": 0.00019236129082179985, "loss": 2.7783, "step": 4328 }, { "epoch": 1.34, "learning_rate": 0.0001923256617625626, "loss": 2.9941, "step": 4329 }, { "epoch": 1.34, "learning_rate": 0.00019229003270332542, "loss": 2.7995, "step": 4330 }, { "epoch": 1.34, "learning_rate": 0.00019225440364408817, "loss": 2.9015, "step": 4331 }, { "epoch": 1.34, "learning_rate": 0.00019221877458485095, "loss": 2.8588, "step": 4332 }, { "epoch": 1.34, "learning_rate": 0.0001921831455256137, "loss": 2.6093, "step": 4333 }, { "epoch": 1.34, "learning_rate": 0.00019214751646637646, "loss": 2.6802, "step": 4334 }, { "epoch": 1.34, "learning_rate": 0.00019211188740713927, "loss": 2.783, "step": 4335 }, { "epoch": 1.34, "learning_rate": 0.00019207625834790203, "loss": 2.3952, "step": 4336 }, { "epoch": 1.34, "learning_rate": 0.0001920406292886648, "loss": 2.4198, "step": 4337 }, { "epoch": 1.34, "learning_rate": 0.00019200500022942756, "loss": 2.3446, "step": 4338 }, { "epoch": 1.34, "learning_rate": 0.00019196937117019032, "loss": 2.1385, "step": 4339 }, { "epoch": 1.34, "learning_rate": 0.00019193374211095313, "loss": 1.8476, "step": 4340 }, { "epoch": 1.34, "learning_rate": 0.00019189811305171588, "loss": 4.8288, "step": 4341 }, { "epoch": 1.34, "learning_rate": 0.00019186248399247864, "loss": 4.5279, "step": 4342 }, { "epoch": 1.34, "learning_rate": 0.00019182685493324142, "loss": 4.3287, "step": 4343 }, { "epoch": 1.34, "learning_rate": 0.00019179122587400417, "loss": 4.0691, "step": 4344 }, { "epoch": 1.34, "learning_rate": 0.00019175559681476698, "loss": 4.1254, "step": 4345 }, { "epoch": 1.34, "learning_rate": 0.00019171996775552974, "loss": 4.1111, "step": 4346 }, { "epoch": 1.34, "learning_rate": 0.0001916843386962925, "loss": 3.708, "step": 4347 }, { "epoch": 1.34, "learning_rate": 0.00019164870963705527, "loss": 3.9224, "step": 4348 }, { "epoch": 1.34, "learning_rate": 0.00019161308057781806, "loss": 4.02, "step": 4349 }, { "epoch": 1.34, "learning_rate": 0.00019157745151858084, "loss": 3.7059, "step": 4350 }, { "epoch": 1.34, "learning_rate": 0.0001915418224593436, "loss": 3.8375, "step": 4351 }, { "epoch": 1.34, "learning_rate": 0.00019150619340010635, "loss": 3.5442, "step": 4352 }, { "epoch": 1.34, "learning_rate": 0.00019147056434086916, "loss": 3.6605, "step": 4353 }, { "epoch": 1.34, "learning_rate": 0.0001914349352816319, "loss": 3.6316, "step": 4354 }, { "epoch": 1.34, "learning_rate": 0.00019139930622239467, "loss": 3.4278, "step": 4355 }, { "epoch": 1.34, "learning_rate": 0.00019136367716315745, "loss": 3.7944, "step": 4356 }, { "epoch": 1.34, "learning_rate": 0.0001913280481039202, "loss": 3.5659, "step": 4357 }, { "epoch": 1.35, "learning_rate": 0.000191292419044683, "loss": 3.7439, "step": 4358 }, { "epoch": 1.35, "learning_rate": 0.00019125678998544577, "loss": 3.5405, "step": 4359 }, { "epoch": 1.35, "learning_rate": 0.00019122116092620852, "loss": 3.6779, "step": 4360 }, { "epoch": 1.35, "learning_rate": 0.0001911855318669713, "loss": 3.6648, "step": 4361 }, { "epoch": 1.35, "learning_rate": 0.00019114990280773406, "loss": 3.5249, "step": 4362 }, { "epoch": 1.35, "learning_rate": 0.0001911142737484968, "loss": 3.4776, "step": 4363 }, { "epoch": 1.35, "learning_rate": 0.00019107864468925962, "loss": 3.4766, "step": 4364 }, { "epoch": 1.35, "learning_rate": 0.00019104301563002238, "loss": 3.3854, "step": 4365 }, { "epoch": 1.35, "learning_rate": 0.00019100738657078516, "loss": 3.4335, "step": 4366 }, { "epoch": 1.35, "learning_rate": 0.0001909717575115479, "loss": 3.3647, "step": 4367 }, { "epoch": 1.35, "learning_rate": 0.00019093612845231067, "loss": 3.1917, "step": 4368 }, { "epoch": 1.35, "learning_rate": 0.00019090049939307348, "loss": 3.4183, "step": 4369 }, { "epoch": 1.35, "learning_rate": 0.00019086487033383623, "loss": 3.3175, "step": 4370 }, { "epoch": 1.35, "learning_rate": 0.000190829241274599, "loss": 3.1259, "step": 4371 }, { "epoch": 1.35, "learning_rate": 0.00019079361221536177, "loss": 3.2634, "step": 4372 }, { "epoch": 1.35, "learning_rate": 0.00019075798315612455, "loss": 2.988, "step": 4373 }, { "epoch": 1.35, "learning_rate": 0.00019072235409688733, "loss": 3.272, "step": 4374 }, { "epoch": 1.35, "learning_rate": 0.00019068672503765009, "loss": 3.1217, "step": 4375 }, { "epoch": 1.35, "learning_rate": 0.00019065109597841284, "loss": 2.9683, "step": 4376 }, { "epoch": 1.35, "learning_rate": 0.00019061546691917565, "loss": 2.9318, "step": 4377 }, { "epoch": 1.35, "learning_rate": 0.0001905798378599384, "loss": 3.2093, "step": 4378 }, { "epoch": 1.35, "learning_rate": 0.00019054420880070119, "loss": 2.9556, "step": 4379 }, { "epoch": 1.35, "learning_rate": 0.00019050857974146394, "loss": 3.105, "step": 4380 }, { "epoch": 1.35, "learning_rate": 0.0001904729506822267, "loss": 3.0631, "step": 4381 }, { "epoch": 1.35, "learning_rate": 0.0001904373216229895, "loss": 3.0865, "step": 4382 }, { "epoch": 1.35, "learning_rate": 0.00019040169256375226, "loss": 2.6952, "step": 4383 }, { "epoch": 1.35, "learning_rate": 0.00019036606350451504, "loss": 2.6302, "step": 4384 }, { "epoch": 1.35, "learning_rate": 0.0001903304344452778, "loss": 2.7201, "step": 4385 }, { "epoch": 1.35, "learning_rate": 0.00019029480538604055, "loss": 2.6162, "step": 4386 }, { "epoch": 1.35, "learning_rate": 0.00019025917632680336, "loss": 2.3992, "step": 4387 }, { "epoch": 1.35, "learning_rate": 0.00019022354726756611, "loss": 2.2308, "step": 4388 }, { "epoch": 1.35, "learning_rate": 0.00019018791820832887, "loss": 2.1123, "step": 4389 }, { "epoch": 1.35, "learning_rate": 0.00019015228914909165, "loss": 1.9149, "step": 4390 }, { "epoch": 1.36, "learning_rate": 0.0001901166600898544, "loss": 4.8282, "step": 4391 }, { "epoch": 1.36, "learning_rate": 0.00019008103103061722, "loss": 4.3143, "step": 4392 }, { "epoch": 1.36, "learning_rate": 0.00019004540197137997, "loss": 4.3973, "step": 4393 }, { "epoch": 1.36, "learning_rate": 0.00019000977291214272, "loss": 4.3903, "step": 4394 }, { "epoch": 1.36, "learning_rate": 0.0001899741438529055, "loss": 4.2074, "step": 4395 }, { "epoch": 1.36, "learning_rate": 0.0001899385147936683, "loss": 4.0264, "step": 4396 }, { "epoch": 1.36, "learning_rate": 0.00018990288573443104, "loss": 3.833, "step": 4397 }, { "epoch": 1.36, "learning_rate": 0.00018986725667519383, "loss": 3.8743, "step": 4398 }, { "epoch": 1.36, "learning_rate": 0.00018983162761595658, "loss": 3.8749, "step": 4399 }, { "epoch": 1.36, "learning_rate": 0.0001897959985567194, "loss": 3.836, "step": 4400 }, { "epoch": 1.36, "learning_rate": 0.00018976036949748214, "loss": 3.7608, "step": 4401 }, { "epoch": 1.36, "learning_rate": 0.0001897247404382449, "loss": 3.8409, "step": 4402 }, { "epoch": 1.36, "learning_rate": 0.00018968911137900768, "loss": 4.0019, "step": 4403 }, { "epoch": 1.36, "learning_rate": 0.00018965348231977044, "loss": 4.0407, "step": 4404 }, { "epoch": 1.36, "learning_rate": 0.00018961785326053324, "loss": 3.7862, "step": 4405 }, { "epoch": 1.36, "learning_rate": 0.000189582224201296, "loss": 3.6244, "step": 4406 }, { "epoch": 1.36, "learning_rate": 0.00018954659514205875, "loss": 3.8824, "step": 4407 }, { "epoch": 1.36, "learning_rate": 0.00018951096608282154, "loss": 3.631, "step": 4408 }, { "epoch": 1.36, "learning_rate": 0.0001894753370235843, "loss": 3.7132, "step": 4409 }, { "epoch": 1.36, "learning_rate": 0.00018943970796434705, "loss": 3.6952, "step": 4410 }, { "epoch": 1.36, "learning_rate": 0.00018940407890510985, "loss": 3.4235, "step": 4411 }, { "epoch": 1.36, "learning_rate": 0.0001893684498458726, "loss": 3.3931, "step": 4412 }, { "epoch": 1.36, "learning_rate": 0.0001893328207866354, "loss": 3.5529, "step": 4413 }, { "epoch": 1.36, "learning_rate": 0.00018929719172739815, "loss": 3.7881, "step": 4414 }, { "epoch": 1.36, "learning_rate": 0.0001892615626681609, "loss": 3.3116, "step": 4415 }, { "epoch": 1.36, "learning_rate": 0.0001892259336089237, "loss": 3.3944, "step": 4416 }, { "epoch": 1.36, "learning_rate": 0.00018919030454968646, "loss": 3.6446, "step": 4417 }, { "epoch": 1.36, "learning_rate": 0.00018915467549044925, "loss": 3.3038, "step": 4418 }, { "epoch": 1.36, "learning_rate": 0.000189119046431212, "loss": 3.2889, "step": 4419 }, { "epoch": 1.36, "learning_rate": 0.00018908341737197478, "loss": 3.3144, "step": 4420 }, { "epoch": 1.36, "learning_rate": 0.00018904778831273756, "loss": 3.1515, "step": 4421 }, { "epoch": 1.36, "learning_rate": 0.00018901215925350032, "loss": 3.223, "step": 4422 }, { "epoch": 1.37, "learning_rate": 0.00018897653019426307, "loss": 3.2184, "step": 4423 }, { "epoch": 1.37, "learning_rate": 0.00018894090113502588, "loss": 3.3345, "step": 4424 }, { "epoch": 1.37, "learning_rate": 0.00018890527207578864, "loss": 3.0234, "step": 4425 }, { "epoch": 1.37, "learning_rate": 0.00018886964301655142, "loss": 2.9133, "step": 4426 }, { "epoch": 1.37, "learning_rate": 0.00018883401395731417, "loss": 3.0385, "step": 4427 }, { "epoch": 1.37, "learning_rate": 0.00018879838489807693, "loss": 3.0332, "step": 4428 }, { "epoch": 1.37, "learning_rate": 0.00018876275583883974, "loss": 2.9399, "step": 4429 }, { "epoch": 1.37, "learning_rate": 0.0001887271267796025, "loss": 2.8619, "step": 4430 }, { "epoch": 1.37, "learning_rate": 0.00018869149772036527, "loss": 2.9731, "step": 4431 }, { "epoch": 1.37, "learning_rate": 0.00018865586866112803, "loss": 2.7368, "step": 4432 }, { "epoch": 1.37, "learning_rate": 0.00018862023960189078, "loss": 2.8557, "step": 4433 }, { "epoch": 1.37, "learning_rate": 0.0001885846105426536, "loss": 2.3573, "step": 4434 }, { "epoch": 1.37, "learning_rate": 0.00018854898148341635, "loss": 2.7865, "step": 4435 }, { "epoch": 1.37, "learning_rate": 0.0001885133524241791, "loss": 2.4505, "step": 4436 }, { "epoch": 1.37, "learning_rate": 0.00018847772336494188, "loss": 2.4733, "step": 4437 }, { "epoch": 1.37, "learning_rate": 0.00018844209430570464, "loss": 2.1499, "step": 4438 }, { "epoch": 1.37, "learning_rate": 0.00018840646524646745, "loss": 2.1309, "step": 4439 }, { "epoch": 1.37, "learning_rate": 0.0001883708361872302, "loss": 1.9715, "step": 4440 }, { "epoch": 1.37, "learning_rate": 0.00018833520712799296, "loss": 5.1261, "step": 4441 }, { "epoch": 1.37, "learning_rate": 0.00018829957806875574, "loss": 4.448, "step": 4442 }, { "epoch": 1.37, "learning_rate": 0.00018826394900951852, "loss": 4.7683, "step": 4443 }, { "epoch": 1.37, "learning_rate": 0.00018822831995028128, "loss": 4.2514, "step": 4444 }, { "epoch": 1.37, "learning_rate": 0.00018819269089104406, "loss": 4.2426, "step": 4445 }, { "epoch": 1.37, "learning_rate": 0.0001881570618318068, "loss": 3.9375, "step": 4446 }, { "epoch": 1.37, "learning_rate": 0.00018812143277256962, "loss": 3.7905, "step": 4447 }, { "epoch": 1.37, "learning_rate": 0.00018808580371333238, "loss": 4.0176, "step": 4448 }, { "epoch": 1.37, "learning_rate": 0.00018805017465409513, "loss": 3.8078, "step": 4449 }, { "epoch": 1.37, "learning_rate": 0.0001880145455948579, "loss": 3.6778, "step": 4450 }, { "epoch": 1.37, "learning_rate": 0.00018797891653562067, "loss": 3.8909, "step": 4451 }, { "epoch": 1.37, "learning_rate": 0.00018794328747638348, "loss": 3.9963, "step": 4452 }, { "epoch": 1.37, "learning_rate": 0.00018790765841714623, "loss": 3.9153, "step": 4453 }, { "epoch": 1.37, "learning_rate": 0.000187872029357909, "loss": 3.8126, "step": 4454 }, { "epoch": 1.37, "learning_rate": 0.00018783640029867177, "loss": 3.6202, "step": 4455 }, { "epoch": 1.38, "learning_rate": 0.00018780077123943452, "loss": 3.6812, "step": 4456 }, { "epoch": 1.38, "learning_rate": 0.00018776514218019728, "loss": 3.7388, "step": 4457 }, { "epoch": 1.38, "learning_rate": 0.0001877295131209601, "loss": 3.5055, "step": 4458 }, { "epoch": 1.38, "learning_rate": 0.00018769388406172284, "loss": 3.7038, "step": 4459 }, { "epoch": 1.38, "learning_rate": 0.00018765825500248562, "loss": 3.8466, "step": 4460 }, { "epoch": 1.38, "learning_rate": 0.00018762262594324838, "loss": 3.6387, "step": 4461 }, { "epoch": 1.38, "learning_rate": 0.00018758699688401113, "loss": 3.5933, "step": 4462 }, { "epoch": 1.38, "learning_rate": 0.00018755136782477394, "loss": 3.5309, "step": 4463 }, { "epoch": 1.38, "learning_rate": 0.0001875157387655367, "loss": 3.5349, "step": 4464 }, { "epoch": 1.38, "learning_rate": 0.00018748010970629948, "loss": 3.4066, "step": 4465 }, { "epoch": 1.38, "learning_rate": 0.00018744448064706226, "loss": 3.3315, "step": 4466 }, { "epoch": 1.38, "learning_rate": 0.00018740885158782502, "loss": 3.5529, "step": 4467 }, { "epoch": 1.38, "learning_rate": 0.0001873732225285878, "loss": 3.4812, "step": 4468 }, { "epoch": 1.38, "learning_rate": 0.00018733759346935055, "loss": 3.3587, "step": 4469 }, { "epoch": 1.38, "learning_rate": 0.0001873019644101133, "loss": 3.3246, "step": 4470 }, { "epoch": 1.38, "learning_rate": 0.00018726633535087612, "loss": 3.3587, "step": 4471 }, { "epoch": 1.38, "learning_rate": 0.00018723070629163887, "loss": 3.1762, "step": 4472 }, { "epoch": 1.38, "learning_rate": 0.00018719507723240165, "loss": 3.011, "step": 4473 }, { "epoch": 1.38, "learning_rate": 0.0001871594481731644, "loss": 2.9766, "step": 4474 }, { "epoch": 1.38, "learning_rate": 0.00018712381911392716, "loss": 3.1962, "step": 4475 }, { "epoch": 1.38, "learning_rate": 0.00018708819005468997, "loss": 3.1711, "step": 4476 }, { "epoch": 1.38, "learning_rate": 0.00018705256099545273, "loss": 2.8417, "step": 4477 }, { "epoch": 1.38, "learning_rate": 0.00018701693193621548, "loss": 2.8981, "step": 4478 }, { "epoch": 1.38, "learning_rate": 0.00018698130287697826, "loss": 2.9314, "step": 4479 }, { "epoch": 1.38, "learning_rate": 0.00018694567381774102, "loss": 2.8706, "step": 4480 }, { "epoch": 1.38, "learning_rate": 0.00018691004475850383, "loss": 3.0429, "step": 4481 }, { "epoch": 1.38, "learning_rate": 0.00018687441569926658, "loss": 2.757, "step": 4482 }, { "epoch": 1.38, "learning_rate": 0.00018683878664002934, "loss": 2.6737, "step": 4483 }, { "epoch": 1.38, "learning_rate": 0.00018680315758079212, "loss": 2.771, "step": 4484 }, { "epoch": 1.38, "learning_rate": 0.00018676752852155487, "loss": 2.5789, "step": 4485 }, { "epoch": 1.38, "learning_rate": 0.00018673189946231768, "loss": 2.6008, "step": 4486 }, { "epoch": 1.38, "learning_rate": 0.00018669627040308044, "loss": 2.5054, "step": 4487 }, { "epoch": 1.39, "learning_rate": 0.0001866606413438432, "loss": 2.2605, "step": 4488 }, { "epoch": 1.39, "learning_rate": 0.00018662501228460597, "loss": 2.2089, "step": 4489 }, { "epoch": 1.39, "learning_rate": 0.00018658938322536875, "loss": 1.9788, "step": 4490 }, { "epoch": 1.39, "learning_rate": 0.0001865537541661315, "loss": 4.9339, "step": 4491 }, { "epoch": 1.39, "learning_rate": 0.0001865181251068943, "loss": 4.4801, "step": 4492 }, { "epoch": 1.39, "learning_rate": 0.00018648249604765705, "loss": 4.3051, "step": 4493 }, { "epoch": 1.39, "learning_rate": 0.00018644686698841986, "loss": 4.3707, "step": 4494 }, { "epoch": 1.39, "learning_rate": 0.0001864112379291826, "loss": 3.7899, "step": 4495 }, { "epoch": 1.39, "learning_rate": 0.00018637560886994536, "loss": 3.8678, "step": 4496 }, { "epoch": 1.39, "learning_rate": 0.00018633997981070815, "loss": 3.6558, "step": 4497 }, { "epoch": 1.39, "learning_rate": 0.0001863043507514709, "loss": 3.791, "step": 4498 }, { "epoch": 1.39, "learning_rate": 0.0001862687216922337, "loss": 3.891, "step": 4499 }, { "epoch": 1.39, "learning_rate": 0.00018623309263299647, "loss": 4.0396, "step": 4500 }, { "epoch": 1.39, "eval_bleu": 0.0, "eval_loss": 4.281228065490723, "eval_runtime": 2569.7097, "eval_samples_per_second": 5.744, "eval_steps_per_second": 0.718, "step": 4500 }, { "epoch": 1.39, "learning_rate": 0.00018619746357375922, "loss": 4.1089, "step": 4501 }, { "epoch": 1.39, "learning_rate": 0.000186161834514522, "loss": 3.625, "step": 4502 }, { "epoch": 1.39, "learning_rate": 0.00018612620545528476, "loss": 3.6791, "step": 4503 }, { "epoch": 1.39, "learning_rate": 0.0001860905763960475, "loss": 3.718, "step": 4504 }, { "epoch": 1.39, "learning_rate": 0.00018605494733681032, "loss": 3.9028, "step": 4505 }, { "epoch": 1.39, "learning_rate": 0.00018601931827757308, "loss": 3.7382, "step": 4506 }, { "epoch": 1.39, "learning_rate": 0.00018598368921833586, "loss": 3.6261, "step": 4507 }, { "epoch": 1.39, "learning_rate": 0.0001859480601590986, "loss": 3.6259, "step": 4508 }, { "epoch": 1.39, "learning_rate": 0.00018591243109986137, "loss": 3.6202, "step": 4509 }, { "epoch": 1.39, "learning_rate": 0.00018587680204062418, "loss": 3.6358, "step": 4510 }, { "epoch": 1.39, "learning_rate": 0.00018584117298138693, "loss": 3.8154, "step": 4511 }, { "epoch": 1.39, "learning_rate": 0.00018580554392214969, "loss": 3.8293, "step": 4512 }, { "epoch": 1.39, "learning_rate": 0.0001857699148629125, "loss": 3.7113, "step": 4513 }, { "epoch": 1.39, "learning_rate": 0.00018573428580367525, "loss": 3.2368, "step": 4514 }, { "epoch": 1.39, "learning_rate": 0.00018569865674443803, "loss": 3.6301, "step": 4515 }, { "epoch": 1.39, "learning_rate": 0.00018566302768520079, "loss": 3.3745, "step": 4516 }, { "epoch": 1.39, "learning_rate": 0.00018562739862596354, "loss": 3.5114, "step": 4517 }, { "epoch": 1.39, "learning_rate": 0.00018559176956672635, "loss": 3.3647, "step": 4518 }, { "epoch": 1.39, "learning_rate": 0.0001855561405074891, "loss": 3.5545, "step": 4519 }, { "epoch": 1.4, "learning_rate": 0.00018552051144825189, "loss": 3.3762, "step": 4520 }, { "epoch": 1.4, "learning_rate": 0.00018548488238901464, "loss": 3.4799, "step": 4521 }, { "epoch": 1.4, "learning_rate": 0.0001854492533297774, "loss": 3.6488, "step": 4522 }, { "epoch": 1.4, "learning_rate": 0.0001854136242705402, "loss": 3.2391, "step": 4523 }, { "epoch": 1.4, "learning_rate": 0.00018537799521130296, "loss": 3.1481, "step": 4524 }, { "epoch": 1.4, "learning_rate": 0.00018534236615206571, "loss": 3.0651, "step": 4525 }, { "epoch": 1.4, "learning_rate": 0.0001853067370928285, "loss": 3.1124, "step": 4526 }, { "epoch": 1.4, "learning_rate": 0.00018527110803359125, "loss": 3.3026, "step": 4527 }, { "epoch": 1.4, "learning_rate": 0.00018523547897435406, "loss": 3.2073, "step": 4528 }, { "epoch": 1.4, "learning_rate": 0.00018519984991511681, "loss": 3.3296, "step": 4529 }, { "epoch": 1.4, "learning_rate": 0.00018516422085587957, "loss": 3.242, "step": 4530 }, { "epoch": 1.4, "learning_rate": 0.00018512859179664235, "loss": 2.8497, "step": 4531 }, { "epoch": 1.4, "learning_rate": 0.0001850929627374051, "loss": 2.7863, "step": 4532 }, { "epoch": 1.4, "learning_rate": 0.00018505733367816791, "loss": 3.0001, "step": 4533 }, { "epoch": 1.4, "learning_rate": 0.00018502170461893067, "loss": 2.8538, "step": 4534 }, { "epoch": 1.4, "learning_rate": 0.00018498607555969342, "loss": 2.8208, "step": 4535 }, { "epoch": 1.4, "learning_rate": 0.0001849504465004562, "loss": 2.4614, "step": 4536 }, { "epoch": 1.4, "learning_rate": 0.000184914817441219, "loss": 2.2898, "step": 4537 }, { "epoch": 1.4, "learning_rate": 0.00018487918838198174, "loss": 2.4263, "step": 4538 }, { "epoch": 1.4, "learning_rate": 0.00018484355932274452, "loss": 2.3912, "step": 4539 }, { "epoch": 1.4, "learning_rate": 0.00018480793026350728, "loss": 2.2211, "step": 4540 }, { "epoch": 1.4, "learning_rate": 0.0001847723012042701, "loss": 4.6913, "step": 4541 }, { "epoch": 1.4, "learning_rate": 0.00018473667214503284, "loss": 4.5808, "step": 4542 }, { "epoch": 1.4, "learning_rate": 0.0001847010430857956, "loss": 4.3213, "step": 4543 }, { "epoch": 1.4, "learning_rate": 0.00018466541402655838, "loss": 4.3237, "step": 4544 }, { "epoch": 1.4, "learning_rate": 0.00018462978496732113, "loss": 4.1259, "step": 4545 }, { "epoch": 1.4, "learning_rate": 0.00018459415590808394, "loss": 3.7563, "step": 4546 }, { "epoch": 1.4, "learning_rate": 0.0001845585268488467, "loss": 3.8599, "step": 4547 }, { "epoch": 1.4, "learning_rate": 0.00018452289778960945, "loss": 3.8666, "step": 4548 }, { "epoch": 1.4, "learning_rate": 0.00018448726873037223, "loss": 3.9719, "step": 4549 }, { "epoch": 1.4, "learning_rate": 0.000184451639671135, "loss": 3.6684, "step": 4550 }, { "epoch": 1.4, "learning_rate": 0.00018441601061189774, "loss": 3.8653, "step": 4551 }, { "epoch": 1.4, "learning_rate": 0.00018438038155266055, "loss": 3.7699, "step": 4552 }, { "epoch": 1.41, "learning_rate": 0.0001843447524934233, "loss": 3.3755, "step": 4553 }, { "epoch": 1.41, "learning_rate": 0.0001843091234341861, "loss": 3.8257, "step": 4554 }, { "epoch": 1.41, "learning_rate": 0.00018427349437494884, "loss": 3.9191, "step": 4555 }, { "epoch": 1.41, "learning_rate": 0.00018423786531571163, "loss": 3.7407, "step": 4556 }, { "epoch": 1.41, "learning_rate": 0.0001842022362564744, "loss": 3.7211, "step": 4557 }, { "epoch": 1.41, "learning_rate": 0.00018416660719723716, "loss": 3.7524, "step": 4558 }, { "epoch": 1.41, "learning_rate": 0.00018413097813799992, "loss": 3.6465, "step": 4559 }, { "epoch": 1.41, "learning_rate": 0.00018409534907876273, "loss": 3.4391, "step": 4560 }, { "epoch": 1.41, "learning_rate": 0.00018405972001952548, "loss": 3.4938, "step": 4561 }, { "epoch": 1.41, "learning_rate": 0.00018402409096028826, "loss": 3.5418, "step": 4562 }, { "epoch": 1.41, "learning_rate": 0.00018398846190105102, "loss": 3.7986, "step": 4563 }, { "epoch": 1.41, "learning_rate": 0.00018395283284181377, "loss": 3.5064, "step": 4564 }, { "epoch": 1.41, "learning_rate": 0.00018391720378257658, "loss": 3.6342, "step": 4565 }, { "epoch": 1.41, "learning_rate": 0.00018388157472333934, "loss": 3.5529, "step": 4566 }, { "epoch": 1.41, "learning_rate": 0.00018384594566410212, "loss": 3.2995, "step": 4567 }, { "epoch": 1.41, "learning_rate": 0.00018381031660486487, "loss": 3.4198, "step": 4568 }, { "epoch": 1.41, "learning_rate": 0.00018377468754562763, "loss": 3.4463, "step": 4569 }, { "epoch": 1.41, "learning_rate": 0.00018373905848639044, "loss": 3.4324, "step": 4570 }, { "epoch": 1.41, "learning_rate": 0.0001837034294271532, "loss": 3.2894, "step": 4571 }, { "epoch": 1.41, "learning_rate": 0.00018366780036791595, "loss": 3.1692, "step": 4572 }, { "epoch": 1.41, "learning_rate": 0.00018363217130867873, "loss": 3.3645, "step": 4573 }, { "epoch": 1.41, "learning_rate": 0.00018359654224944148, "loss": 3.101, "step": 4574 }, { "epoch": 1.41, "learning_rate": 0.0001835609131902043, "loss": 3.2577, "step": 4575 }, { "epoch": 1.41, "learning_rate": 0.00018352528413096705, "loss": 2.9987, "step": 4576 }, { "epoch": 1.41, "learning_rate": 0.0001834896550717298, "loss": 2.8119, "step": 4577 }, { "epoch": 1.41, "learning_rate": 0.00018345402601249258, "loss": 2.8331, "step": 4578 }, { "epoch": 1.41, "learning_rate": 0.00018341839695325534, "loss": 3.0664, "step": 4579 }, { "epoch": 1.41, "learning_rate": 0.00018338276789401815, "loss": 2.943, "step": 4580 }, { "epoch": 1.41, "learning_rate": 0.0001833471388347809, "loss": 2.7136, "step": 4581 }, { "epoch": 1.41, "learning_rate": 0.00018331150977554366, "loss": 2.6434, "step": 4582 }, { "epoch": 1.41, "learning_rate": 0.00018327588071630644, "loss": 2.6797, "step": 4583 }, { "epoch": 1.41, "learning_rate": 0.00018324025165706922, "loss": 2.5416, "step": 4584 }, { "epoch": 1.42, "learning_rate": 0.00018320462259783198, "loss": 2.4623, "step": 4585 }, { "epoch": 1.42, "learning_rate": 0.00018316899353859476, "loss": 2.5722, "step": 4586 }, { "epoch": 1.42, "learning_rate": 0.0001831333644793575, "loss": 2.3202, "step": 4587 }, { "epoch": 1.42, "learning_rate": 0.00018309773542012032, "loss": 2.1332, "step": 4588 }, { "epoch": 1.42, "learning_rate": 0.00018306210636088308, "loss": 2.1581, "step": 4589 }, { "epoch": 1.42, "learning_rate": 0.00018302647730164583, "loss": 2.035, "step": 4590 }, { "epoch": 1.42, "learning_rate": 0.0001829908482424086, "loss": 4.4287, "step": 4591 }, { "epoch": 1.42, "learning_rate": 0.00018295521918317137, "loss": 4.4819, "step": 4592 }, { "epoch": 1.42, "learning_rate": 0.00018291959012393412, "loss": 4.3897, "step": 4593 }, { "epoch": 1.42, "learning_rate": 0.00018288396106469693, "loss": 4.1682, "step": 4594 }, { "epoch": 1.42, "learning_rate": 0.00018284833200545969, "loss": 3.9331, "step": 4595 }, { "epoch": 1.42, "learning_rate": 0.00018281270294622247, "loss": 4.2788, "step": 4596 }, { "epoch": 1.42, "learning_rate": 0.00018277707388698522, "loss": 3.7613, "step": 4597 }, { "epoch": 1.42, "learning_rate": 0.00018274144482774798, "loss": 4.033, "step": 4598 }, { "epoch": 1.42, "learning_rate": 0.0001827058157685108, "loss": 3.8745, "step": 4599 }, { "epoch": 1.42, "learning_rate": 0.00018267018670927354, "loss": 3.8303, "step": 4600 }, { "epoch": 1.42, "learning_rate": 0.00018263455765003632, "loss": 3.5588, "step": 4601 }, { "epoch": 1.42, "learning_rate": 0.00018259892859079908, "loss": 3.534, "step": 4602 }, { "epoch": 1.42, "learning_rate": 0.00018256329953156186, "loss": 3.6018, "step": 4603 }, { "epoch": 1.42, "learning_rate": 0.00018252767047232464, "loss": 3.6259, "step": 4604 }, { "epoch": 1.42, "learning_rate": 0.0001824920414130874, "loss": 3.5376, "step": 4605 }, { "epoch": 1.42, "learning_rate": 0.00018245641235385015, "loss": 3.4657, "step": 4606 }, { "epoch": 1.42, "learning_rate": 0.00018242078329461296, "loss": 3.6521, "step": 4607 }, { "epoch": 1.42, "learning_rate": 0.00018238515423537572, "loss": 3.5062, "step": 4608 }, { "epoch": 1.42, "learning_rate": 0.0001823495251761385, "loss": 3.6734, "step": 4609 }, { "epoch": 1.42, "learning_rate": 0.00018231389611690125, "loss": 3.5809, "step": 4610 }, { "epoch": 1.42, "learning_rate": 0.000182278267057664, "loss": 3.3967, "step": 4611 }, { "epoch": 1.42, "learning_rate": 0.00018224263799842682, "loss": 3.7291, "step": 4612 }, { "epoch": 1.42, "learning_rate": 0.00018220700893918957, "loss": 3.6, "step": 4613 }, { "epoch": 1.42, "learning_rate": 0.00018217137987995235, "loss": 3.6512, "step": 4614 }, { "epoch": 1.42, "learning_rate": 0.0001821357508207151, "loss": 3.4838, "step": 4615 }, { "epoch": 1.42, "learning_rate": 0.00018210012176147786, "loss": 3.2941, "step": 4616 }, { "epoch": 1.42, "learning_rate": 0.00018206449270224067, "loss": 3.4516, "step": 4617 }, { "epoch": 1.43, "learning_rate": 0.00018202886364300343, "loss": 3.3152, "step": 4618 }, { "epoch": 1.43, "learning_rate": 0.00018199323458376618, "loss": 3.6056, "step": 4619 }, { "epoch": 1.43, "learning_rate": 0.00018195760552452896, "loss": 3.4139, "step": 4620 }, { "epoch": 1.43, "learning_rate": 0.00018192197646529172, "loss": 3.3395, "step": 4621 }, { "epoch": 1.43, "learning_rate": 0.00018188634740605453, "loss": 3.333, "step": 4622 }, { "epoch": 1.43, "learning_rate": 0.00018185071834681728, "loss": 3.0048, "step": 4623 }, { "epoch": 1.43, "learning_rate": 0.00018181508928758004, "loss": 2.9755, "step": 4624 }, { "epoch": 1.43, "learning_rate": 0.00018177946022834282, "loss": 3.0569, "step": 4625 }, { "epoch": 1.43, "learning_rate": 0.00018174383116910557, "loss": 3.259, "step": 4626 }, { "epoch": 1.43, "learning_rate": 0.00018170820210986838, "loss": 2.9735, "step": 4627 }, { "epoch": 1.43, "learning_rate": 0.00018167257305063114, "loss": 2.9929, "step": 4628 }, { "epoch": 1.43, "learning_rate": 0.0001816369439913939, "loss": 2.9022, "step": 4629 }, { "epoch": 1.43, "learning_rate": 0.00018160131493215667, "loss": 2.8732, "step": 4630 }, { "epoch": 1.43, "learning_rate": 0.00018156568587291945, "loss": 2.6221, "step": 4631 }, { "epoch": 1.43, "learning_rate": 0.0001815300568136822, "loss": 2.675, "step": 4632 }, { "epoch": 1.43, "learning_rate": 0.000181494427754445, "loss": 2.6749, "step": 4633 }, { "epoch": 1.43, "learning_rate": 0.00018145879869520775, "loss": 2.6124, "step": 4634 }, { "epoch": 1.43, "learning_rate": 0.00018142316963597055, "loss": 2.6988, "step": 4635 }, { "epoch": 1.43, "learning_rate": 0.0001813875405767333, "loss": 2.6791, "step": 4636 }, { "epoch": 1.43, "learning_rate": 0.00018135191151749606, "loss": 2.3058, "step": 4637 }, { "epoch": 1.43, "learning_rate": 0.00018131628245825885, "loss": 2.1523, "step": 4638 }, { "epoch": 1.43, "learning_rate": 0.0001812806533990216, "loss": 1.9923, "step": 4639 }, { "epoch": 1.43, "learning_rate": 0.00018124502433978436, "loss": 2.1073, "step": 4640 }, { "epoch": 1.43, "learning_rate": 0.00018120939528054716, "loss": 4.6725, "step": 4641 }, { "epoch": 1.43, "learning_rate": 0.00018117376622130992, "loss": 4.3977, "step": 4642 }, { "epoch": 1.43, "learning_rate": 0.0001811381371620727, "loss": 4.2943, "step": 4643 }, { "epoch": 1.43, "learning_rate": 0.00018110250810283546, "loss": 4.1266, "step": 4644 }, { "epoch": 1.43, "learning_rate": 0.0001810668790435982, "loss": 4.0593, "step": 4645 }, { "epoch": 1.43, "learning_rate": 0.00018103124998436102, "loss": 3.9568, "step": 4646 }, { "epoch": 1.43, "learning_rate": 0.00018099562092512377, "loss": 3.945, "step": 4647 }, { "epoch": 1.43, "learning_rate": 0.00018095999186588656, "loss": 4.0293, "step": 4648 }, { "epoch": 1.43, "learning_rate": 0.0001809243628066493, "loss": 3.5411, "step": 4649 }, { "epoch": 1.44, "learning_rate": 0.0001808887337474121, "loss": 3.9185, "step": 4650 }, { "epoch": 1.44, "learning_rate": 0.00018085310468817487, "loss": 3.6484, "step": 4651 }, { "epoch": 1.44, "learning_rate": 0.00018081747562893763, "loss": 3.9226, "step": 4652 }, { "epoch": 1.44, "learning_rate": 0.00018078184656970038, "loss": 3.594, "step": 4653 }, { "epoch": 1.44, "learning_rate": 0.0001807462175104632, "loss": 3.5263, "step": 4654 }, { "epoch": 1.44, "learning_rate": 0.00018071058845122595, "loss": 3.9934, "step": 4655 }, { "epoch": 1.44, "learning_rate": 0.00018067495939198873, "loss": 3.5791, "step": 4656 }, { "epoch": 1.44, "learning_rate": 0.00018063933033275148, "loss": 3.592, "step": 4657 }, { "epoch": 1.44, "learning_rate": 0.00018060370127351424, "loss": 3.8117, "step": 4658 }, { "epoch": 1.44, "learning_rate": 0.00018056807221427705, "loss": 3.6178, "step": 4659 }, { "epoch": 1.44, "learning_rate": 0.0001805324431550398, "loss": 3.5588, "step": 4660 }, { "epoch": 1.44, "learning_rate": 0.00018049681409580259, "loss": 3.6615, "step": 4661 }, { "epoch": 1.44, "learning_rate": 0.00018046118503656534, "loss": 3.8338, "step": 4662 }, { "epoch": 1.44, "learning_rate": 0.0001804255559773281, "loss": 3.3399, "step": 4663 }, { "epoch": 1.44, "learning_rate": 0.0001803899269180909, "loss": 3.5183, "step": 4664 }, { "epoch": 1.44, "learning_rate": 0.00018035429785885366, "loss": 3.308, "step": 4665 }, { "epoch": 1.44, "learning_rate": 0.0001803186687996164, "loss": 3.314, "step": 4666 }, { "epoch": 1.44, "learning_rate": 0.0001802830397403792, "loss": 3.33, "step": 4667 }, { "epoch": 1.44, "learning_rate": 0.00018024741068114195, "loss": 3.2981, "step": 4668 }, { "epoch": 1.44, "learning_rate": 0.00018021178162190476, "loss": 3.3088, "step": 4669 }, { "epoch": 1.44, "learning_rate": 0.00018017615256266751, "loss": 3.4031, "step": 4670 }, { "epoch": 1.44, "learning_rate": 0.00018014052350343027, "loss": 3.2715, "step": 4671 }, { "epoch": 1.44, "learning_rate": 0.00018010489444419305, "loss": 3.4788, "step": 4672 }, { "epoch": 1.44, "learning_rate": 0.0001800692653849558, "loss": 3.242, "step": 4673 }, { "epoch": 1.44, "learning_rate": 0.0001800336363257186, "loss": 3.2025, "step": 4674 }, { "epoch": 1.44, "learning_rate": 0.00017999800726648137, "loss": 2.9685, "step": 4675 }, { "epoch": 1.44, "learning_rate": 0.00017996237820724412, "loss": 3.0026, "step": 4676 }, { "epoch": 1.44, "learning_rate": 0.0001799267491480069, "loss": 2.9887, "step": 4677 }, { "epoch": 1.44, "learning_rate": 0.0001798911200887697, "loss": 2.8831, "step": 4678 }, { "epoch": 1.44, "learning_rate": 0.00017985549102953244, "loss": 2.7915, "step": 4679 }, { "epoch": 1.44, "learning_rate": 0.00017981986197029522, "loss": 3.1384, "step": 4680 }, { "epoch": 1.44, "learning_rate": 0.00017978423291105798, "loss": 2.8363, "step": 4681 }, { "epoch": 1.45, "learning_rate": 0.0001797486038518208, "loss": 2.9632, "step": 4682 }, { "epoch": 1.45, "learning_rate": 0.00017971297479258354, "loss": 2.6556, "step": 4683 }, { "epoch": 1.45, "learning_rate": 0.0001796773457333463, "loss": 2.6813, "step": 4684 }, { "epoch": 1.45, "learning_rate": 0.00017964171667410908, "loss": 2.3847, "step": 4685 }, { "epoch": 1.45, "learning_rate": 0.00017960608761487183, "loss": 2.3975, "step": 4686 }, { "epoch": 1.45, "learning_rate": 0.0001795704585556346, "loss": 2.3641, "step": 4687 }, { "epoch": 1.45, "learning_rate": 0.0001795348294963974, "loss": 2.3751, "step": 4688 }, { "epoch": 1.45, "learning_rate": 0.00017949920043716015, "loss": 2.1873, "step": 4689 }, { "epoch": 1.45, "learning_rate": 0.00017946357137792293, "loss": 1.8901, "step": 4690 }, { "epoch": 1.45, "learning_rate": 0.0001794279423186857, "loss": 4.7918, "step": 4691 }, { "epoch": 1.45, "learning_rate": 0.00017939231325944844, "loss": 4.4306, "step": 4692 }, { "epoch": 1.45, "learning_rate": 0.00017935668420021125, "loss": 4.1826, "step": 4693 }, { "epoch": 1.45, "learning_rate": 0.000179321055140974, "loss": 4.145, "step": 4694 }, { "epoch": 1.45, "learning_rate": 0.0001792854260817368, "loss": 4.1038, "step": 4695 }, { "epoch": 1.45, "learning_rate": 0.00017924979702249954, "loss": 4.3339, "step": 4696 }, { "epoch": 1.45, "learning_rate": 0.00017921416796326233, "loss": 4.0556, "step": 4697 }, { "epoch": 1.45, "learning_rate": 0.0001791785389040251, "loss": 3.9273, "step": 4698 }, { "epoch": 1.45, "learning_rate": 0.00017914290984478786, "loss": 3.8608, "step": 4699 }, { "epoch": 1.45, "learning_rate": 0.00017910728078555062, "loss": 3.7859, "step": 4700 }, { "epoch": 1.45, "learning_rate": 0.00017907165172631343, "loss": 3.8096, "step": 4701 }, { "epoch": 1.45, "learning_rate": 0.00017903602266707618, "loss": 3.7621, "step": 4702 }, { "epoch": 1.45, "learning_rate": 0.00017900039360783896, "loss": 3.6431, "step": 4703 }, { "epoch": 1.45, "learning_rate": 0.00017896476454860172, "loss": 3.7445, "step": 4704 }, { "epoch": 1.45, "learning_rate": 0.00017892913548936447, "loss": 3.7266, "step": 4705 }, { "epoch": 1.45, "learning_rate": 0.00017889350643012728, "loss": 3.69, "step": 4706 }, { "epoch": 1.45, "learning_rate": 0.00017885787737089004, "loss": 3.4656, "step": 4707 }, { "epoch": 1.45, "learning_rate": 0.0001788222483116528, "loss": 3.6179, "step": 4708 }, { "epoch": 1.45, "learning_rate": 0.00017878661925241557, "loss": 3.3894, "step": 4709 }, { "epoch": 1.45, "learning_rate": 0.00017875099019317833, "loss": 3.4449, "step": 4710 }, { "epoch": 1.45, "learning_rate": 0.00017871536113394114, "loss": 3.7462, "step": 4711 }, { "epoch": 1.45, "learning_rate": 0.0001786797320747039, "loss": 3.5377, "step": 4712 }, { "epoch": 1.45, "learning_rate": 0.00017864410301546665, "loss": 3.4482, "step": 4713 }, { "epoch": 1.45, "learning_rate": 0.00017860847395622943, "loss": 3.493, "step": 4714 }, { "epoch": 1.46, "learning_rate": 0.00017857284489699218, "loss": 3.4481, "step": 4715 }, { "epoch": 1.46, "learning_rate": 0.000178537215837755, "loss": 3.4747, "step": 4716 }, { "epoch": 1.46, "learning_rate": 0.00017850158677851775, "loss": 3.1804, "step": 4717 }, { "epoch": 1.46, "learning_rate": 0.0001784659577192805, "loss": 3.475, "step": 4718 }, { "epoch": 1.46, "learning_rate": 0.00017843032866004328, "loss": 3.2605, "step": 4719 }, { "epoch": 1.46, "learning_rate": 0.00017839469960080604, "loss": 3.3107, "step": 4720 }, { "epoch": 1.46, "learning_rate": 0.00017835907054156882, "loss": 3.1157, "step": 4721 }, { "epoch": 1.46, "learning_rate": 0.0001783234414823316, "loss": 3.2522, "step": 4722 }, { "epoch": 1.46, "learning_rate": 0.00017828781242309436, "loss": 3.4637, "step": 4723 }, { "epoch": 1.46, "learning_rate": 0.00017825218336385717, "loss": 3.0153, "step": 4724 }, { "epoch": 1.46, "learning_rate": 0.00017821655430461992, "loss": 2.9652, "step": 4725 }, { "epoch": 1.46, "learning_rate": 0.00017818092524538268, "loss": 2.9703, "step": 4726 }, { "epoch": 1.46, "learning_rate": 0.00017814529618614546, "loss": 2.7454, "step": 4727 }, { "epoch": 1.46, "learning_rate": 0.0001781096671269082, "loss": 3.1581, "step": 4728 }, { "epoch": 1.46, "learning_rate": 0.00017807403806767102, "loss": 2.7166, "step": 4729 }, { "epoch": 1.46, "learning_rate": 0.00017803840900843378, "loss": 2.5271, "step": 4730 }, { "epoch": 1.46, "learning_rate": 0.00017800277994919653, "loss": 2.7026, "step": 4731 }, { "epoch": 1.46, "learning_rate": 0.0001779671508899593, "loss": 2.6339, "step": 4732 }, { "epoch": 1.46, "learning_rate": 0.00017793152183072207, "loss": 2.6924, "step": 4733 }, { "epoch": 1.46, "learning_rate": 0.00017789589277148482, "loss": 2.6907, "step": 4734 }, { "epoch": 1.46, "learning_rate": 0.00017786026371224763, "loss": 2.3836, "step": 4735 }, { "epoch": 1.46, "learning_rate": 0.00017782463465301039, "loss": 2.3973, "step": 4736 }, { "epoch": 1.46, "learning_rate": 0.00017778900559377317, "loss": 2.2537, "step": 4737 }, { "epoch": 1.46, "learning_rate": 0.00017775337653453592, "loss": 2.0545, "step": 4738 }, { "epoch": 1.46, "learning_rate": 0.00017771774747529868, "loss": 2.0958, "step": 4739 }, { "epoch": 1.46, "learning_rate": 0.00017768211841606149, "loss": 1.9499, "step": 4740 }, { "epoch": 1.46, "learning_rate": 0.00017764648935682424, "loss": 4.6222, "step": 4741 }, { "epoch": 1.46, "learning_rate": 0.00017761086029758702, "loss": 4.401, "step": 4742 }, { "epoch": 1.46, "learning_rate": 0.00017757523123834978, "loss": 4.271, "step": 4743 }, { "epoch": 1.46, "learning_rate": 0.00017753960217911256, "loss": 4.027, "step": 4744 }, { "epoch": 1.46, "learning_rate": 0.00017750397311987534, "loss": 4.2307, "step": 4745 }, { "epoch": 1.46, "learning_rate": 0.0001774683440606381, "loss": 4.0846, "step": 4746 }, { "epoch": 1.47, "learning_rate": 0.00017743271500140085, "loss": 4.0746, "step": 4747 }, { "epoch": 1.47, "learning_rate": 0.00017739708594216366, "loss": 3.677, "step": 4748 }, { "epoch": 1.47, "learning_rate": 0.00017736145688292641, "loss": 3.6718, "step": 4749 }, { "epoch": 1.47, "learning_rate": 0.0001773258278236892, "loss": 3.6752, "step": 4750 }, { "epoch": 1.47, "learning_rate": 0.00017729019876445195, "loss": 3.9156, "step": 4751 }, { "epoch": 1.47, "learning_rate": 0.0001772545697052147, "loss": 3.8832, "step": 4752 }, { "epoch": 1.47, "learning_rate": 0.00017721894064597751, "loss": 3.5671, "step": 4753 }, { "epoch": 1.47, "learning_rate": 0.00017718331158674027, "loss": 3.6431, "step": 4754 }, { "epoch": 1.47, "learning_rate": 0.00017714768252750302, "loss": 3.7322, "step": 4755 }, { "epoch": 1.47, "learning_rate": 0.0001771120534682658, "loss": 3.6364, "step": 4756 }, { "epoch": 1.47, "learning_rate": 0.00017707642440902856, "loss": 3.5702, "step": 4757 }, { "epoch": 1.47, "learning_rate": 0.00017704079534979137, "loss": 3.3503, "step": 4758 }, { "epoch": 1.47, "learning_rate": 0.00017700516629055412, "loss": 3.6083, "step": 4759 }, { "epoch": 1.47, "learning_rate": 0.00017696953723131688, "loss": 3.6549, "step": 4760 }, { "epoch": 1.47, "learning_rate": 0.00017693390817207966, "loss": 3.8239, "step": 4761 }, { "epoch": 1.47, "learning_rate": 0.00017689827911284242, "loss": 3.6375, "step": 4762 }, { "epoch": 1.47, "learning_rate": 0.00017686265005360523, "loss": 3.4365, "step": 4763 }, { "epoch": 1.47, "learning_rate": 0.00017682702099436798, "loss": 3.3429, "step": 4764 }, { "epoch": 1.47, "learning_rate": 0.00017679139193513073, "loss": 3.3905, "step": 4765 }, { "epoch": 1.47, "learning_rate": 0.00017675576287589352, "loss": 3.2386, "step": 4766 }, { "epoch": 1.47, "learning_rate": 0.00017672013381665627, "loss": 3.3342, "step": 4767 }, { "epoch": 1.47, "learning_rate": 0.00017668450475741905, "loss": 3.3042, "step": 4768 }, { "epoch": 1.47, "learning_rate": 0.00017664887569818184, "loss": 3.0734, "step": 4769 }, { "epoch": 1.47, "learning_rate": 0.0001766132466389446, "loss": 3.1943, "step": 4770 }, { "epoch": 1.47, "learning_rate": 0.0001765776175797074, "loss": 3.2742, "step": 4771 }, { "epoch": 1.47, "learning_rate": 0.00017654198852047015, "loss": 3.2435, "step": 4772 }, { "epoch": 1.47, "learning_rate": 0.0001765063594612329, "loss": 3.2274, "step": 4773 }, { "epoch": 1.47, "learning_rate": 0.0001764707304019957, "loss": 3.1006, "step": 4774 }, { "epoch": 1.47, "learning_rate": 0.00017643510134275845, "loss": 3.1299, "step": 4775 }, { "epoch": 1.47, "learning_rate": 0.00017639947228352125, "loss": 3.0492, "step": 4776 }, { "epoch": 1.47, "learning_rate": 0.000176363843224284, "loss": 2.9599, "step": 4777 }, { "epoch": 1.47, "learning_rate": 0.00017632821416504676, "loss": 2.943, "step": 4778 }, { "epoch": 1.47, "learning_rate": 0.00017629258510580955, "loss": 2.9126, "step": 4779 }, { "epoch": 1.48, "learning_rate": 0.0001762569560465723, "loss": 2.7192, "step": 4780 }, { "epoch": 1.48, "learning_rate": 0.00017622132698733506, "loss": 2.6527, "step": 4781 }, { "epoch": 1.48, "learning_rate": 0.00017618569792809786, "loss": 2.9702, "step": 4782 }, { "epoch": 1.48, "learning_rate": 0.00017615006886886062, "loss": 2.6954, "step": 4783 }, { "epoch": 1.48, "learning_rate": 0.0001761144398096234, "loss": 2.7093, "step": 4784 }, { "epoch": 1.48, "learning_rate": 0.00017607881075038616, "loss": 2.3574, "step": 4785 }, { "epoch": 1.48, "learning_rate": 0.0001760431816911489, "loss": 2.4928, "step": 4786 }, { "epoch": 1.48, "learning_rate": 0.00017600755263191172, "loss": 2.5376, "step": 4787 }, { "epoch": 1.48, "learning_rate": 0.00017597192357267447, "loss": 2.2693, "step": 4788 }, { "epoch": 1.48, "learning_rate": 0.00017593629451343723, "loss": 2.2265, "step": 4789 }, { "epoch": 1.48, "learning_rate": 0.0001759006654542, "loss": 1.8167, "step": 4790 }, { "epoch": 1.48, "learning_rate": 0.0001758650363949628, "loss": 4.4275, "step": 4791 }, { "epoch": 1.48, "learning_rate": 0.00017582940733572557, "loss": 4.6542, "step": 4792 }, { "epoch": 1.48, "learning_rate": 0.00017579377827648833, "loss": 4.1792, "step": 4793 }, { "epoch": 1.48, "learning_rate": 0.00017575814921725108, "loss": 4.284, "step": 4794 }, { "epoch": 1.48, "learning_rate": 0.0001757225201580139, "loss": 4.1344, "step": 4795 }, { "epoch": 1.48, "learning_rate": 0.00017568689109877665, "loss": 3.65, "step": 4796 }, { "epoch": 1.48, "learning_rate": 0.00017565126203953943, "loss": 3.8079, "step": 4797 }, { "epoch": 1.48, "learning_rate": 0.00017561563298030218, "loss": 3.926, "step": 4798 }, { "epoch": 1.48, "learning_rate": 0.00017558000392106494, "loss": 3.6262, "step": 4799 }, { "epoch": 1.48, "learning_rate": 0.00017554437486182775, "loss": 3.6807, "step": 4800 }, { "epoch": 1.48, "learning_rate": 0.0001755087458025905, "loss": 3.6051, "step": 4801 }, { "epoch": 1.48, "learning_rate": 0.00017547311674335326, "loss": 3.7835, "step": 4802 }, { "epoch": 1.48, "learning_rate": 0.00017543748768411604, "loss": 3.7049, "step": 4803 }, { "epoch": 1.48, "learning_rate": 0.0001754018586248788, "loss": 3.8645, "step": 4804 }, { "epoch": 1.48, "learning_rate": 0.0001753662295656416, "loss": 3.6909, "step": 4805 }, { "epoch": 1.48, "learning_rate": 0.00017533060050640436, "loss": 3.6918, "step": 4806 }, { "epoch": 1.48, "learning_rate": 0.0001752949714471671, "loss": 3.8078, "step": 4807 }, { "epoch": 1.48, "learning_rate": 0.0001752593423879299, "loss": 3.782, "step": 4808 }, { "epoch": 1.48, "learning_rate": 0.00017522371332869265, "loss": 3.5386, "step": 4809 }, { "epoch": 1.48, "learning_rate": 0.00017518808426945546, "loss": 3.5134, "step": 4810 }, { "epoch": 1.48, "learning_rate": 0.0001751524552102182, "loss": 3.6518, "step": 4811 }, { "epoch": 1.49, "learning_rate": 0.00017511682615098097, "loss": 3.6094, "step": 4812 }, { "epoch": 1.49, "learning_rate": 0.00017508119709174375, "loss": 3.345, "step": 4813 }, { "epoch": 1.49, "learning_rate": 0.00017504556803250653, "loss": 3.3999, "step": 4814 }, { "epoch": 1.49, "learning_rate": 0.00017500993897326929, "loss": 3.428, "step": 4815 }, { "epoch": 1.49, "learning_rate": 0.00017497430991403207, "loss": 3.6267, "step": 4816 }, { "epoch": 1.49, "learning_rate": 0.00017493868085479482, "loss": 3.2533, "step": 4817 }, { "epoch": 1.49, "learning_rate": 0.00017490305179555763, "loss": 3.4008, "step": 4818 }, { "epoch": 1.49, "learning_rate": 0.0001748674227363204, "loss": 3.3549, "step": 4819 }, { "epoch": 1.49, "learning_rate": 0.00017483179367708314, "loss": 3.4212, "step": 4820 }, { "epoch": 1.49, "learning_rate": 0.00017479616461784592, "loss": 3.3165, "step": 4821 }, { "epoch": 1.49, "learning_rate": 0.00017476053555860868, "loss": 3.1172, "step": 4822 }, { "epoch": 1.49, "learning_rate": 0.0001747249064993715, "loss": 3.1252, "step": 4823 }, { "epoch": 1.49, "learning_rate": 0.00017468927744013424, "loss": 3.1517, "step": 4824 }, { "epoch": 1.49, "learning_rate": 0.000174653648380897, "loss": 2.9612, "step": 4825 }, { "epoch": 1.49, "learning_rate": 0.00017461801932165978, "loss": 3.0524, "step": 4826 }, { "epoch": 1.49, "learning_rate": 0.00017458239026242253, "loss": 3.0204, "step": 4827 }, { "epoch": 1.49, "learning_rate": 0.0001745467612031853, "loss": 2.8512, "step": 4828 }, { "epoch": 1.49, "learning_rate": 0.0001745111321439481, "loss": 2.9428, "step": 4829 }, { "epoch": 1.49, "learning_rate": 0.00017447550308471085, "loss": 2.7993, "step": 4830 }, { "epoch": 1.49, "learning_rate": 0.00017443987402547363, "loss": 2.9659, "step": 4831 }, { "epoch": 1.49, "learning_rate": 0.0001744042449662364, "loss": 2.6619, "step": 4832 }, { "epoch": 1.49, "learning_rate": 0.00017436861590699914, "loss": 2.4531, "step": 4833 }, { "epoch": 1.49, "learning_rate": 0.00017433298684776195, "loss": 2.3437, "step": 4834 }, { "epoch": 1.49, "learning_rate": 0.0001742973577885247, "loss": 2.3828, "step": 4835 }, { "epoch": 1.49, "learning_rate": 0.00017426172872928746, "loss": 2.3757, "step": 4836 }, { "epoch": 1.49, "learning_rate": 0.00017422609967005024, "loss": 2.366, "step": 4837 }, { "epoch": 1.49, "learning_rate": 0.00017419047061081303, "loss": 2.2645, "step": 4838 }, { "epoch": 1.49, "learning_rate": 0.0001741548415515758, "loss": 1.9537, "step": 4839 }, { "epoch": 1.49, "learning_rate": 0.00017411921249233856, "loss": 1.9982, "step": 4840 }, { "epoch": 1.49, "learning_rate": 0.00017408358343310132, "loss": 4.7958, "step": 4841 }, { "epoch": 1.49, "learning_rate": 0.00017404795437386413, "loss": 4.2977, "step": 4842 }, { "epoch": 1.49, "learning_rate": 0.00017401232531462688, "loss": 4.4349, "step": 4843 }, { "epoch": 1.5, "learning_rate": 0.00017397669625538966, "loss": 4.1587, "step": 4844 }, { "epoch": 1.5, "learning_rate": 0.00017394106719615242, "loss": 4.0518, "step": 4845 }, { "epoch": 1.5, "learning_rate": 0.00017390543813691517, "loss": 3.724, "step": 4846 }, { "epoch": 1.5, "learning_rate": 0.00017386980907767798, "loss": 3.6911, "step": 4847 }, { "epoch": 1.5, "learning_rate": 0.00017383418001844074, "loss": 3.7774, "step": 4848 }, { "epoch": 1.5, "learning_rate": 0.0001737985509592035, "loss": 3.9569, "step": 4849 }, { "epoch": 1.5, "learning_rate": 0.00017376292189996627, "loss": 3.8986, "step": 4850 }, { "epoch": 1.5, "learning_rate": 0.00017372729284072903, "loss": 3.6954, "step": 4851 }, { "epoch": 1.5, "learning_rate": 0.00017369166378149184, "loss": 3.4838, "step": 4852 }, { "epoch": 1.5, "learning_rate": 0.0001736560347222546, "loss": 3.5859, "step": 4853 }, { "epoch": 1.5, "learning_rate": 0.00017362040566301735, "loss": 3.5272, "step": 4854 }, { "epoch": 1.5, "learning_rate": 0.00017358477660378013, "loss": 3.8501, "step": 4855 }, { "epoch": 1.5, "learning_rate": 0.00017354914754454288, "loss": 3.8488, "step": 4856 }, { "epoch": 1.5, "learning_rate": 0.0001735135184853057, "loss": 3.4773, "step": 4857 }, { "epoch": 1.5, "learning_rate": 0.00017347788942606845, "loss": 3.0753, "step": 4858 }, { "epoch": 1.5, "learning_rate": 0.0001734422603668312, "loss": 3.5781, "step": 4859 }, { "epoch": 1.5, "learning_rate": 0.00017340663130759398, "loss": 3.6138, "step": 4860 }, { "epoch": 1.5, "learning_rate": 0.00017337100224835676, "loss": 3.5067, "step": 4861 }, { "epoch": 1.5, "learning_rate": 0.00017333537318911952, "loss": 3.3507, "step": 4862 }, { "epoch": 1.5, "learning_rate": 0.0001732997441298823, "loss": 3.7711, "step": 4863 }, { "epoch": 1.5, "learning_rate": 0.00017326411507064506, "loss": 3.5116, "step": 4864 }, { "epoch": 1.5, "learning_rate": 0.00017322848601140787, "loss": 3.2464, "step": 4865 }, { "epoch": 1.5, "learning_rate": 0.00017319285695217062, "loss": 3.6144, "step": 4866 }, { "epoch": 1.5, "learning_rate": 0.00017315722789293337, "loss": 3.5795, "step": 4867 }, { "epoch": 1.5, "learning_rate": 0.00017312159883369616, "loss": 3.1394, "step": 4868 }, { "epoch": 1.5, "learning_rate": 0.0001730859697744589, "loss": 3.3071, "step": 4869 }, { "epoch": 1.5, "learning_rate": 0.00017305034071522167, "loss": 3.1332, "step": 4870 }, { "epoch": 1.5, "learning_rate": 0.00017301471165598448, "loss": 3.0444, "step": 4871 }, { "epoch": 1.5, "learning_rate": 0.00017297908259674723, "loss": 3.1078, "step": 4872 }, { "epoch": 1.5, "learning_rate": 0.00017294345353751, "loss": 3.1978, "step": 4873 }, { "epoch": 1.5, "learning_rate": 0.00017290782447827277, "loss": 3.2887, "step": 4874 }, { "epoch": 1.5, "learning_rate": 0.00017287219541903552, "loss": 3.3596, "step": 4875 }, { "epoch": 1.5, "learning_rate": 0.00017283656635979833, "loss": 3.1373, "step": 4876 }, { "epoch": 1.51, "learning_rate": 0.00017280093730056109, "loss": 2.9264, "step": 4877 }, { "epoch": 1.51, "learning_rate": 0.00017276530824132387, "loss": 2.8808, "step": 4878 }, { "epoch": 1.51, "learning_rate": 0.00017272967918208662, "loss": 2.81, "step": 4879 }, { "epoch": 1.51, "learning_rate": 0.00017269405012284938, "loss": 2.5874, "step": 4880 }, { "epoch": 1.51, "learning_rate": 0.00017265842106361219, "loss": 2.8685, "step": 4881 }, { "epoch": 1.51, "learning_rate": 0.00017262279200437494, "loss": 2.642, "step": 4882 }, { "epoch": 1.51, "learning_rate": 0.0001725871629451377, "loss": 2.4158, "step": 4883 }, { "epoch": 1.51, "learning_rate": 0.00017255153388590048, "loss": 2.2786, "step": 4884 }, { "epoch": 1.51, "learning_rate": 0.00017251590482666326, "loss": 2.2767, "step": 4885 }, { "epoch": 1.51, "learning_rate": 0.00017248027576742604, "loss": 2.1929, "step": 4886 }, { "epoch": 1.51, "learning_rate": 0.0001724446467081888, "loss": 2.321, "step": 4887 }, { "epoch": 1.51, "learning_rate": 0.00017240901764895155, "loss": 1.9975, "step": 4888 }, { "epoch": 1.51, "learning_rate": 0.00017237338858971436, "loss": 2.1176, "step": 4889 }, { "epoch": 1.51, "learning_rate": 0.00017233775953047711, "loss": 2.1125, "step": 4890 }, { "epoch": 1.51, "learning_rate": 0.0001723021304712399, "loss": 4.5649, "step": 4891 }, { "epoch": 1.51, "learning_rate": 0.00017226650141200265, "loss": 4.4347, "step": 4892 }, { "epoch": 1.51, "learning_rate": 0.0001722308723527654, "loss": 4.1302, "step": 4893 }, { "epoch": 1.51, "learning_rate": 0.00017219524329352821, "loss": 4.1306, "step": 4894 }, { "epoch": 1.51, "learning_rate": 0.00017215961423429097, "loss": 4.1397, "step": 4895 }, { "epoch": 1.51, "learning_rate": 0.00017212398517505372, "loss": 3.998, "step": 4896 }, { "epoch": 1.51, "learning_rate": 0.0001720883561158165, "loss": 3.9344, "step": 4897 }, { "epoch": 1.51, "learning_rate": 0.00017205272705657926, "loss": 4.0342, "step": 4898 }, { "epoch": 1.51, "learning_rate": 0.00017201709799734207, "loss": 3.9455, "step": 4899 }, { "epoch": 1.51, "learning_rate": 0.00017198146893810482, "loss": 3.5029, "step": 4900 }, { "epoch": 1.51, "learning_rate": 0.00017194583987886758, "loss": 3.7697, "step": 4901 }, { "epoch": 1.51, "learning_rate": 0.00017191021081963036, "loss": 3.8397, "step": 4902 }, { "epoch": 1.51, "learning_rate": 0.00017187458176039312, "loss": 3.6819, "step": 4903 }, { "epoch": 1.51, "learning_rate": 0.0001718389527011559, "loss": 3.7436, "step": 4904 }, { "epoch": 1.51, "learning_rate": 0.00017180332364191868, "loss": 3.7442, "step": 4905 }, { "epoch": 1.51, "learning_rate": 0.00017176769458268143, "loss": 3.5739, "step": 4906 }, { "epoch": 1.51, "learning_rate": 0.00017173206552344422, "loss": 3.7032, "step": 4907 }, { "epoch": 1.51, "learning_rate": 0.000171696436464207, "loss": 3.5674, "step": 4908 }, { "epoch": 1.52, "learning_rate": 0.00017166080740496975, "loss": 3.3831, "step": 4909 }, { "epoch": 1.52, "learning_rate": 0.00017162517834573253, "loss": 3.534, "step": 4910 }, { "epoch": 1.52, "learning_rate": 0.0001715895492864953, "loss": 3.4172, "step": 4911 }, { "epoch": 1.52, "learning_rate": 0.0001715539202272581, "loss": 3.4799, "step": 4912 }, { "epoch": 1.52, "learning_rate": 0.00017151829116802085, "loss": 3.5243, "step": 4913 }, { "epoch": 1.52, "learning_rate": 0.0001714826621087836, "loss": 3.3501, "step": 4914 }, { "epoch": 1.52, "learning_rate": 0.0001714470330495464, "loss": 3.4357, "step": 4915 }, { "epoch": 1.52, "learning_rate": 0.00017141140399030914, "loss": 3.3698, "step": 4916 }, { "epoch": 1.52, "learning_rate": 0.0001713757749310719, "loss": 3.3821, "step": 4917 }, { "epoch": 1.52, "learning_rate": 0.0001713401458718347, "loss": 3.1376, "step": 4918 }, { "epoch": 1.52, "learning_rate": 0.00017130451681259746, "loss": 3.41, "step": 4919 }, { "epoch": 1.52, "learning_rate": 0.00017126888775336024, "loss": 3.3301, "step": 4920 }, { "epoch": 1.52, "learning_rate": 0.000171233258694123, "loss": 3.0893, "step": 4921 }, { "epoch": 1.52, "learning_rate": 0.00017119762963488575, "loss": 3.0963, "step": 4922 }, { "epoch": 1.52, "learning_rate": 0.00017116200057564856, "loss": 2.9594, "step": 4923 }, { "epoch": 1.52, "learning_rate": 0.00017112637151641132, "loss": 2.8193, "step": 4924 }, { "epoch": 1.52, "learning_rate": 0.0001710907424571741, "loss": 3.2229, "step": 4925 }, { "epoch": 1.52, "learning_rate": 0.00017105511339793685, "loss": 3.1508, "step": 4926 }, { "epoch": 1.52, "learning_rate": 0.0001710194843386996, "loss": 3.1167, "step": 4927 }, { "epoch": 1.52, "learning_rate": 0.00017098385527946242, "loss": 2.8698, "step": 4928 }, { "epoch": 1.52, "learning_rate": 0.00017094822622022517, "loss": 2.9609, "step": 4929 }, { "epoch": 1.52, "learning_rate": 0.00017091259716098793, "loss": 2.8707, "step": 4930 }, { "epoch": 1.52, "learning_rate": 0.0001708769681017507, "loss": 2.6924, "step": 4931 }, { "epoch": 1.52, "learning_rate": 0.0001708413390425135, "loss": 2.7957, "step": 4932 }, { "epoch": 1.52, "learning_rate": 0.00017080570998327627, "loss": 2.4449, "step": 4933 }, { "epoch": 1.52, "learning_rate": 0.00017077008092403903, "loss": 2.3071, "step": 4934 }, { "epoch": 1.52, "learning_rate": 0.00017073445186480178, "loss": 2.5278, "step": 4935 }, { "epoch": 1.52, "learning_rate": 0.0001706988228055646, "loss": 2.3266, "step": 4936 }, { "epoch": 1.52, "learning_rate": 0.00017066319374632735, "loss": 2.1352, "step": 4937 }, { "epoch": 1.52, "learning_rate": 0.00017062756468709013, "loss": 2.1713, "step": 4938 }, { "epoch": 1.52, "learning_rate": 0.00017059193562785288, "loss": 1.9682, "step": 4939 }, { "epoch": 1.52, "learning_rate": 0.00017055630656861564, "loss": 2.0487, "step": 4940 }, { "epoch": 1.52, "learning_rate": 0.00017052067750937845, "loss": 4.6505, "step": 4941 }, { "epoch": 1.53, "learning_rate": 0.0001704850484501412, "loss": 4.4596, "step": 4942 }, { "epoch": 1.53, "learning_rate": 0.00017044941939090396, "loss": 3.9328, "step": 4943 }, { "epoch": 1.53, "learning_rate": 0.00017041379033166674, "loss": 3.8647, "step": 4944 }, { "epoch": 1.53, "learning_rate": 0.0001703781612724295, "loss": 3.9741, "step": 4945 }, { "epoch": 1.53, "learning_rate": 0.0001703425322131923, "loss": 3.7937, "step": 4946 }, { "epoch": 1.53, "learning_rate": 0.00017030690315395506, "loss": 3.498, "step": 4947 }, { "epoch": 1.53, "learning_rate": 0.0001702712740947178, "loss": 3.6309, "step": 4948 }, { "epoch": 1.53, "learning_rate": 0.0001702356450354806, "loss": 3.7367, "step": 4949 }, { "epoch": 1.53, "learning_rate": 0.00017020001597624335, "loss": 3.757, "step": 4950 }, { "epoch": 1.53, "learning_rate": 0.00017016438691700613, "loss": 3.8489, "step": 4951 }, { "epoch": 1.53, "learning_rate": 0.0001701287578577689, "loss": 3.7928, "step": 4952 }, { "epoch": 1.53, "learning_rate": 0.00017009312879853167, "loss": 3.6257, "step": 4953 }, { "epoch": 1.53, "learning_rate": 0.00017005749973929445, "loss": 3.5836, "step": 4954 }, { "epoch": 1.53, "learning_rate": 0.00017002187068005723, "loss": 3.5459, "step": 4955 }, { "epoch": 1.53, "learning_rate": 0.00016998624162081999, "loss": 3.819, "step": 4956 }, { "epoch": 1.53, "learning_rate": 0.00016995061256158277, "loss": 3.5434, "step": 4957 }, { "epoch": 1.53, "learning_rate": 0.00016991498350234552, "loss": 3.466, "step": 4958 }, { "epoch": 1.53, "learning_rate": 0.00016987935444310833, "loss": 3.7567, "step": 4959 }, { "epoch": 1.53, "learning_rate": 0.00016984372538387109, "loss": 3.6599, "step": 4960 }, { "epoch": 1.53, "learning_rate": 0.00016980809632463384, "loss": 3.326, "step": 4961 }, { "epoch": 1.53, "learning_rate": 0.00016977246726539662, "loss": 3.5011, "step": 4962 }, { "epoch": 1.53, "learning_rate": 0.00016973683820615938, "loss": 3.3324, "step": 4963 }, { "epoch": 1.53, "learning_rate": 0.00016970120914692213, "loss": 3.3067, "step": 4964 }, { "epoch": 1.53, "learning_rate": 0.00016966558008768494, "loss": 3.2229, "step": 4965 }, { "epoch": 1.53, "learning_rate": 0.0001696299510284477, "loss": 3.2735, "step": 4966 }, { "epoch": 1.53, "learning_rate": 0.00016959432196921048, "loss": 3.3442, "step": 4967 }, { "epoch": 1.53, "learning_rate": 0.00016955869290997323, "loss": 3.376, "step": 4968 }, { "epoch": 1.53, "learning_rate": 0.000169523063850736, "loss": 3.1062, "step": 4969 }, { "epoch": 1.53, "learning_rate": 0.0001694874347914988, "loss": 3.4486, "step": 4970 }, { "epoch": 1.53, "learning_rate": 0.00016945180573226155, "loss": 3.055, "step": 4971 }, { "epoch": 1.53, "learning_rate": 0.00016941617667302433, "loss": 3.3777, "step": 4972 }, { "epoch": 1.53, "learning_rate": 0.0001693805476137871, "loss": 2.9605, "step": 4973 }, { "epoch": 1.54, "learning_rate": 0.00016934491855454984, "loss": 3.0799, "step": 4974 }, { "epoch": 1.54, "learning_rate": 0.00016930928949531265, "loss": 3.101, "step": 4975 }, { "epoch": 1.54, "learning_rate": 0.0001692736604360754, "loss": 2.8785, "step": 4976 }, { "epoch": 1.54, "learning_rate": 0.00016923803137683816, "loss": 3.0342, "step": 4977 }, { "epoch": 1.54, "learning_rate": 0.00016920240231760094, "loss": 2.9606, "step": 4978 }, { "epoch": 1.54, "learning_rate": 0.00016916677325836373, "loss": 2.8398, "step": 4979 }, { "epoch": 1.54, "learning_rate": 0.0001691311441991265, "loss": 2.8264, "step": 4980 }, { "epoch": 1.54, "learning_rate": 0.00016909551513988926, "loss": 2.7263, "step": 4981 }, { "epoch": 1.54, "learning_rate": 0.00016905988608065202, "loss": 2.4636, "step": 4982 }, { "epoch": 1.54, "learning_rate": 0.00016902425702141483, "loss": 2.6144, "step": 4983 }, { "epoch": 1.54, "learning_rate": 0.00016898862796217758, "loss": 2.4867, "step": 4984 }, { "epoch": 1.54, "learning_rate": 0.00016895299890294033, "loss": 2.4298, "step": 4985 }, { "epoch": 1.54, "learning_rate": 0.00016891736984370312, "loss": 2.2732, "step": 4986 }, { "epoch": 1.54, "learning_rate": 0.00016888174078446587, "loss": 2.2657, "step": 4987 }, { "epoch": 1.54, "learning_rate": 0.00016884611172522868, "loss": 2.0809, "step": 4988 }, { "epoch": 1.54, "learning_rate": 0.00016881048266599144, "loss": 2.1756, "step": 4989 }, { "epoch": 1.54, "learning_rate": 0.0001687748536067542, "loss": 1.9941, "step": 4990 }, { "epoch": 1.54, "learning_rate": 0.00016873922454751697, "loss": 4.7083, "step": 4991 }, { "epoch": 1.54, "learning_rate": 0.00016870359548827973, "loss": 4.4278, "step": 4992 }, { "epoch": 1.54, "learning_rate": 0.00016866796642904254, "loss": 4.4681, "step": 4993 }, { "epoch": 1.54, "learning_rate": 0.0001686323373698053, "loss": 3.9955, "step": 4994 }, { "epoch": 1.54, "learning_rate": 0.00016859670831056805, "loss": 4.0113, "step": 4995 }, { "epoch": 1.54, "learning_rate": 0.00016856107925133083, "loss": 3.8935, "step": 4996 }, { "epoch": 1.54, "learning_rate": 0.00016852545019209358, "loss": 4.1576, "step": 4997 }, { "epoch": 1.54, "learning_rate": 0.00016848982113285636, "loss": 3.8884, "step": 4998 }, { "epoch": 1.54, "learning_rate": 0.00016845419207361915, "loss": 3.7954, "step": 4999 }, { "epoch": 1.54, "learning_rate": 0.0001684185630143819, "loss": 3.856, "step": 5000 }, { "epoch": 1.54, "eval_bleu": 2.2622740788922234e-13, "eval_loss": 4.170206069946289, "eval_runtime": 2580.4115, "eval_samples_per_second": 5.72, "eval_steps_per_second": 0.715, "step": 5000 }, { "epoch": 1.54, "learning_rate": 0.00016838293395514468, "loss": 3.5963, "step": 5001 }, { "epoch": 1.54, "learning_rate": 0.00016834730489590746, "loss": 3.9321, "step": 5002 }, { "epoch": 1.54, "learning_rate": 0.00016831167583667022, "loss": 3.8039, "step": 5003 }, { "epoch": 1.54, "learning_rate": 0.000168276046777433, "loss": 3.654, "step": 5004 }, { "epoch": 1.54, "learning_rate": 0.00016824041771819576, "loss": 3.6374, "step": 5005 }, { "epoch": 1.55, "learning_rate": 0.00016820478865895856, "loss": 3.4612, "step": 5006 }, { "epoch": 1.55, "learning_rate": 0.00016816915959972132, "loss": 3.3838, "step": 5007 }, { "epoch": 1.55, "learning_rate": 0.00016813353054048407, "loss": 3.5514, "step": 5008 }, { "epoch": 1.55, "learning_rate": 0.00016809790148124686, "loss": 3.5937, "step": 5009 }, { "epoch": 1.55, "learning_rate": 0.0001680622724220096, "loss": 3.7692, "step": 5010 }, { "epoch": 1.55, "learning_rate": 0.00016802664336277237, "loss": 3.8193, "step": 5011 }, { "epoch": 1.55, "learning_rate": 0.00016799101430353517, "loss": 3.7518, "step": 5012 }, { "epoch": 1.55, "learning_rate": 0.00016795538524429793, "loss": 3.4959, "step": 5013 }, { "epoch": 1.55, "learning_rate": 0.0001679197561850607, "loss": 3.6865, "step": 5014 }, { "epoch": 1.55, "learning_rate": 0.00016788412712582347, "loss": 3.5194, "step": 5015 }, { "epoch": 1.55, "learning_rate": 0.00016784849806658622, "loss": 3.6062, "step": 5016 }, { "epoch": 1.55, "learning_rate": 0.00016781286900734903, "loss": 3.0849, "step": 5017 }, { "epoch": 1.55, "learning_rate": 0.00016777723994811178, "loss": 3.3316, "step": 5018 }, { "epoch": 1.55, "learning_rate": 0.00016774161088887457, "loss": 3.5343, "step": 5019 }, { "epoch": 1.55, "learning_rate": 0.00016770598182963732, "loss": 3.1095, "step": 5020 }, { "epoch": 1.55, "learning_rate": 0.00016767035277040008, "loss": 3.2945, "step": 5021 }, { "epoch": 1.55, "learning_rate": 0.00016763472371116288, "loss": 3.0817, "step": 5022 }, { "epoch": 1.55, "learning_rate": 0.00016759909465192564, "loss": 3.0471, "step": 5023 }, { "epoch": 1.55, "learning_rate": 0.0001675634655926884, "loss": 2.9613, "step": 5024 }, { "epoch": 1.55, "learning_rate": 0.00016752783653345118, "loss": 3.1743, "step": 5025 }, { "epoch": 1.55, "learning_rate": 0.00016749220747421396, "loss": 3.0343, "step": 5026 }, { "epoch": 1.55, "learning_rate": 0.00016745657841497674, "loss": 2.9221, "step": 5027 }, { "epoch": 1.55, "learning_rate": 0.0001674209493557395, "loss": 2.9922, "step": 5028 }, { "epoch": 1.55, "learning_rate": 0.00016738532029650225, "loss": 2.8047, "step": 5029 }, { "epoch": 1.55, "learning_rate": 0.00016734969123726506, "loss": 2.7419, "step": 5030 }, { "epoch": 1.55, "learning_rate": 0.0001673140621780278, "loss": 2.9277, "step": 5031 }, { "epoch": 1.55, "learning_rate": 0.00016727843311879057, "loss": 3.0666, "step": 5032 }, { "epoch": 1.55, "learning_rate": 0.00016724280405955335, "loss": 2.6994, "step": 5033 }, { "epoch": 1.55, "learning_rate": 0.0001672071750003161, "loss": 2.4348, "step": 5034 }, { "epoch": 1.55, "learning_rate": 0.00016717154594107891, "loss": 2.4378, "step": 5035 }, { "epoch": 1.55, "learning_rate": 0.00016713591688184167, "loss": 2.4857, "step": 5036 }, { "epoch": 1.55, "learning_rate": 0.00016710028782260442, "loss": 2.2687, "step": 5037 }, { "epoch": 1.55, "learning_rate": 0.0001670646587633672, "loss": 2.0499, "step": 5038 }, { "epoch": 1.56, "learning_rate": 0.00016702902970412996, "loss": 2.1065, "step": 5039 }, { "epoch": 1.56, "learning_rate": 0.00016699340064489277, "loss": 2.0045, "step": 5040 }, { "epoch": 1.56, "learning_rate": 0.00016695777158565552, "loss": 4.7496, "step": 5041 }, { "epoch": 1.56, "learning_rate": 0.00016692214252641828, "loss": 4.3594, "step": 5042 }, { "epoch": 1.56, "learning_rate": 0.00016688651346718106, "loss": 4.255, "step": 5043 }, { "epoch": 1.56, "learning_rate": 0.00016685088440794382, "loss": 4.0346, "step": 5044 }, { "epoch": 1.56, "learning_rate": 0.0001668152553487066, "loss": 4.2133, "step": 5045 }, { "epoch": 1.56, "learning_rate": 0.00016677962628946938, "loss": 4.1195, "step": 5046 }, { "epoch": 1.56, "learning_rate": 0.00016674399723023213, "loss": 3.9856, "step": 5047 }, { "epoch": 1.56, "learning_rate": 0.00016670836817099492, "loss": 3.796, "step": 5048 }, { "epoch": 1.56, "learning_rate": 0.0001666727391117577, "loss": 3.9051, "step": 5049 }, { "epoch": 1.56, "learning_rate": 0.00016663711005252045, "loss": 3.7122, "step": 5050 }, { "epoch": 1.56, "learning_rate": 0.00016660148099328323, "loss": 3.6531, "step": 5051 }, { "epoch": 1.56, "learning_rate": 0.000166565851934046, "loss": 3.6407, "step": 5052 }, { "epoch": 1.56, "learning_rate": 0.0001665302228748088, "loss": 3.7328, "step": 5053 }, { "epoch": 1.56, "learning_rate": 0.00016649459381557155, "loss": 3.6517, "step": 5054 }, { "epoch": 1.56, "learning_rate": 0.0001664589647563343, "loss": 3.6565, "step": 5055 }, { "epoch": 1.56, "learning_rate": 0.0001664233356970971, "loss": 3.7824, "step": 5056 }, { "epoch": 1.56, "learning_rate": 0.00016638770663785984, "loss": 3.6188, "step": 5057 }, { "epoch": 1.56, "learning_rate": 0.0001663520775786226, "loss": 3.722, "step": 5058 }, { "epoch": 1.56, "learning_rate": 0.0001663164485193854, "loss": 3.4814, "step": 5059 }, { "epoch": 1.56, "learning_rate": 0.00016628081946014816, "loss": 3.7033, "step": 5060 }, { "epoch": 1.56, "learning_rate": 0.00016624519040091094, "loss": 3.5496, "step": 5061 }, { "epoch": 1.56, "learning_rate": 0.0001662095613416737, "loss": 3.5623, "step": 5062 }, { "epoch": 1.56, "learning_rate": 0.00016617393228243645, "loss": 3.5102, "step": 5063 }, { "epoch": 1.56, "learning_rate": 0.00016613830322319926, "loss": 3.5526, "step": 5064 }, { "epoch": 1.56, "learning_rate": 0.00016610267416396202, "loss": 3.3809, "step": 5065 }, { "epoch": 1.56, "learning_rate": 0.00016606704510472477, "loss": 3.2297, "step": 5066 }, { "epoch": 1.56, "learning_rate": 0.00016603141604548755, "loss": 3.1222, "step": 5067 }, { "epoch": 1.56, "learning_rate": 0.0001659957869862503, "loss": 3.159, "step": 5068 }, { "epoch": 1.56, "learning_rate": 0.00016596015792701312, "loss": 2.9245, "step": 5069 }, { "epoch": 1.56, "learning_rate": 0.00016592452886777587, "loss": 3.0366, "step": 5070 }, { "epoch": 1.57, "learning_rate": 0.00016588889980853863, "loss": 3.0104, "step": 5071 }, { "epoch": 1.57, "learning_rate": 0.0001658532707493014, "loss": 3.1461, "step": 5072 }, { "epoch": 1.57, "learning_rate": 0.0001658176416900642, "loss": 3.1299, "step": 5073 }, { "epoch": 1.57, "learning_rate": 0.00016578201263082697, "loss": 2.9052, "step": 5074 }, { "epoch": 1.57, "learning_rate": 0.00016574638357158973, "loss": 3.0537, "step": 5075 }, { "epoch": 1.57, "learning_rate": 0.00016571075451235248, "loss": 3.0296, "step": 5076 }, { "epoch": 1.57, "learning_rate": 0.0001656751254531153, "loss": 2.9194, "step": 5077 }, { "epoch": 1.57, "learning_rate": 0.00016563949639387805, "loss": 2.9633, "step": 5078 }, { "epoch": 1.57, "learning_rate": 0.0001656038673346408, "loss": 2.855, "step": 5079 }, { "epoch": 1.57, "learning_rate": 0.00016556823827540358, "loss": 2.708, "step": 5080 }, { "epoch": 1.57, "learning_rate": 0.00016553260921616634, "loss": 2.7977, "step": 5081 }, { "epoch": 1.57, "learning_rate": 0.00016549698015692915, "loss": 2.4751, "step": 5082 }, { "epoch": 1.57, "learning_rate": 0.0001654613510976919, "loss": 2.6037, "step": 5083 }, { "epoch": 1.57, "learning_rate": 0.00016542572203845466, "loss": 2.7135, "step": 5084 }, { "epoch": 1.57, "learning_rate": 0.00016539009297921744, "loss": 2.4936, "step": 5085 }, { "epoch": 1.57, "learning_rate": 0.0001653544639199802, "loss": 2.2641, "step": 5086 }, { "epoch": 1.57, "learning_rate": 0.000165318834860743, "loss": 2.2471, "step": 5087 }, { "epoch": 1.57, "learning_rate": 0.00016528320580150576, "loss": 2.1959, "step": 5088 }, { "epoch": 1.57, "learning_rate": 0.0001652475767422685, "loss": 1.9294, "step": 5089 }, { "epoch": 1.57, "learning_rate": 0.0001652119476830313, "loss": 2.0809, "step": 5090 }, { "epoch": 1.57, "learning_rate": 0.00016517631862379405, "loss": 4.5385, "step": 5091 }, { "epoch": 1.57, "learning_rate": 0.00016514068956455683, "loss": 4.1514, "step": 5092 }, { "epoch": 1.57, "learning_rate": 0.0001651050605053196, "loss": 4.2218, "step": 5093 }, { "epoch": 1.57, "learning_rate": 0.00016506943144608237, "loss": 4.0605, "step": 5094 }, { "epoch": 1.57, "learning_rate": 0.00016503380238684515, "loss": 4.1388, "step": 5095 }, { "epoch": 1.57, "learning_rate": 0.00016499817332760793, "loss": 3.6764, "step": 5096 }, { "epoch": 1.57, "learning_rate": 0.00016496254426837069, "loss": 4.0067, "step": 5097 }, { "epoch": 1.57, "learning_rate": 0.00016492691520913347, "loss": 3.8095, "step": 5098 }, { "epoch": 1.57, "learning_rate": 0.00016489128614989622, "loss": 3.9261, "step": 5099 }, { "epoch": 1.57, "learning_rate": 0.00016485565709065898, "loss": 3.6464, "step": 5100 }, { "epoch": 1.57, "learning_rate": 0.00016482002803142179, "loss": 3.7204, "step": 5101 }, { "epoch": 1.57, "learning_rate": 0.00016478439897218454, "loss": 3.4486, "step": 5102 }, { "epoch": 1.57, "learning_rate": 0.00016474876991294732, "loss": 3.8439, "step": 5103 }, { "epoch": 1.58, "learning_rate": 0.00016471314085371008, "loss": 3.8556, "step": 5104 }, { "epoch": 1.58, "learning_rate": 0.00016467751179447283, "loss": 3.6765, "step": 5105 }, { "epoch": 1.58, "learning_rate": 0.00016464188273523564, "loss": 3.7793, "step": 5106 }, { "epoch": 1.58, "learning_rate": 0.0001646062536759984, "loss": 3.6761, "step": 5107 }, { "epoch": 1.58, "learning_rate": 0.00016457062461676118, "loss": 3.4965, "step": 5108 }, { "epoch": 1.58, "learning_rate": 0.00016453499555752393, "loss": 3.4632, "step": 5109 }, { "epoch": 1.58, "learning_rate": 0.0001644993664982867, "loss": 3.5923, "step": 5110 }, { "epoch": 1.58, "learning_rate": 0.0001644637374390495, "loss": 3.5198, "step": 5111 }, { "epoch": 1.58, "learning_rate": 0.00016442810837981225, "loss": 3.6408, "step": 5112 }, { "epoch": 1.58, "learning_rate": 0.000164392479320575, "loss": 3.4747, "step": 5113 }, { "epoch": 1.58, "learning_rate": 0.0001643568502613378, "loss": 3.4336, "step": 5114 }, { "epoch": 1.58, "learning_rate": 0.00016432122120210054, "loss": 3.2525, "step": 5115 }, { "epoch": 1.58, "learning_rate": 0.00016428559214286335, "loss": 3.4363, "step": 5116 }, { "epoch": 1.58, "learning_rate": 0.0001642499630836261, "loss": 3.4483, "step": 5117 }, { "epoch": 1.58, "learning_rate": 0.0001642143340243889, "loss": 3.4264, "step": 5118 }, { "epoch": 1.58, "learning_rate": 0.00016417870496515167, "loss": 3.1427, "step": 5119 }, { "epoch": 1.58, "learning_rate": 0.00016414307590591442, "loss": 3.2216, "step": 5120 }, { "epoch": 1.58, "learning_rate": 0.00016410744684667718, "loss": 3.0903, "step": 5121 }, { "epoch": 1.58, "learning_rate": 0.00016407181778743996, "loss": 2.8815, "step": 5122 }, { "epoch": 1.58, "learning_rate": 0.00016403618872820274, "loss": 3.3242, "step": 5123 }, { "epoch": 1.58, "learning_rate": 0.0001640005596689655, "loss": 3.1474, "step": 5124 }, { "epoch": 1.58, "learning_rate": 0.00016396493060972828, "loss": 3.3035, "step": 5125 }, { "epoch": 1.58, "learning_rate": 0.00016392930155049103, "loss": 2.9555, "step": 5126 }, { "epoch": 1.58, "learning_rate": 0.00016389367249125382, "loss": 2.7503, "step": 5127 }, { "epoch": 1.58, "learning_rate": 0.00016385804343201657, "loss": 2.9028, "step": 5128 }, { "epoch": 1.58, "learning_rate": 0.00016382241437277935, "loss": 2.6267, "step": 5129 }, { "epoch": 1.58, "learning_rate": 0.00016378678531354213, "loss": 2.553, "step": 5130 }, { "epoch": 1.58, "learning_rate": 0.00016375115625430492, "loss": 2.7375, "step": 5131 }, { "epoch": 1.58, "learning_rate": 0.00016371552719506767, "loss": 2.5582, "step": 5132 }, { "epoch": 1.58, "learning_rate": 0.00016367989813583043, "loss": 2.5497, "step": 5133 }, { "epoch": 1.58, "learning_rate": 0.0001636442690765932, "loss": 2.927, "step": 5134 }, { "epoch": 1.58, "learning_rate": 0.000163608640017356, "loss": 2.5739, "step": 5135 }, { "epoch": 1.59, "learning_rate": 0.00016357301095811877, "loss": 2.7608, "step": 5136 }, { "epoch": 1.59, "learning_rate": 0.00016353738189888153, "loss": 2.3265, "step": 5137 }, { "epoch": 1.59, "learning_rate": 0.00016350175283964428, "loss": 2.4279, "step": 5138 }, { "epoch": 1.59, "learning_rate": 0.00016346612378040706, "loss": 2.7313, "step": 5139 }, { "epoch": 1.59, "learning_rate": 0.00016343049472116985, "loss": 2.083, "step": 5140 }, { "epoch": 1.59, "learning_rate": 0.0001633948656619326, "loss": 4.6043, "step": 5141 }, { "epoch": 1.59, "learning_rate": 0.00016335923660269538, "loss": 4.2703, "step": 5142 }, { "epoch": 1.59, "learning_rate": 0.00016332360754345816, "loss": 4.5211, "step": 5143 }, { "epoch": 1.59, "learning_rate": 0.00016328797848422092, "loss": 4.322, "step": 5144 }, { "epoch": 1.59, "learning_rate": 0.00016325234942498367, "loss": 4.1857, "step": 5145 }, { "epoch": 1.59, "learning_rate": 0.00016321672036574646, "loss": 4.2685, "step": 5146 }, { "epoch": 1.59, "learning_rate": 0.00016318109130650924, "loss": 3.942, "step": 5147 }, { "epoch": 1.59, "learning_rate": 0.00016314546224727202, "loss": 4.027, "step": 5148 }, { "epoch": 1.59, "learning_rate": 0.00016310983318803477, "loss": 4.0084, "step": 5149 }, { "epoch": 1.59, "learning_rate": 0.00016307420412879753, "loss": 3.8896, "step": 5150 }, { "epoch": 1.59, "learning_rate": 0.0001630385750695603, "loss": 3.9545, "step": 5151 }, { "epoch": 1.59, "learning_rate": 0.0001630029460103231, "loss": 4.0051, "step": 5152 }, { "epoch": 1.59, "learning_rate": 0.00016296731695108587, "loss": 4.0763, "step": 5153 }, { "epoch": 1.59, "learning_rate": 0.00016293168789184863, "loss": 3.9928, "step": 5154 }, { "epoch": 1.59, "learning_rate": 0.0001628960588326114, "loss": 4.1413, "step": 5155 }, { "epoch": 1.59, "learning_rate": 0.00016286042977337417, "loss": 3.6431, "step": 5156 }, { "epoch": 1.59, "learning_rate": 0.00016282480071413695, "loss": 3.6869, "step": 5157 }, { "epoch": 1.59, "learning_rate": 0.0001627891716548997, "loss": 3.6262, "step": 5158 }, { "epoch": 1.59, "learning_rate": 0.00016275354259566248, "loss": 3.5168, "step": 5159 }, { "epoch": 1.59, "learning_rate": 0.00016271791353642527, "loss": 3.5706, "step": 5160 }, { "epoch": 1.59, "learning_rate": 0.00016268228447718802, "loss": 3.466, "step": 5161 }, { "epoch": 1.59, "learning_rate": 0.0001626466554179508, "loss": 3.6046, "step": 5162 }, { "epoch": 1.59, "learning_rate": 0.00016261102635871356, "loss": 3.4636, "step": 5163 }, { "epoch": 1.59, "learning_rate": 0.00016257539729947634, "loss": 3.5983, "step": 5164 }, { "epoch": 1.59, "learning_rate": 0.00016253976824023912, "loss": 3.4472, "step": 5165 }, { "epoch": 1.59, "learning_rate": 0.00016250413918100188, "loss": 3.3257, "step": 5166 }, { "epoch": 1.59, "learning_rate": 0.00016246851012176466, "loss": 3.3518, "step": 5167 }, { "epoch": 1.6, "learning_rate": 0.0001624328810625274, "loss": 3.5599, "step": 5168 }, { "epoch": 1.6, "learning_rate": 0.0001623972520032902, "loss": 3.3584, "step": 5169 }, { "epoch": 1.6, "learning_rate": 0.00016236162294405298, "loss": 3.3488, "step": 5170 }, { "epoch": 1.6, "learning_rate": 0.00016232599388481573, "loss": 3.2606, "step": 5171 }, { "epoch": 1.6, "learning_rate": 0.0001622903648255785, "loss": 3.3869, "step": 5172 }, { "epoch": 1.6, "learning_rate": 0.00016225473576634127, "loss": 2.9113, "step": 5173 }, { "epoch": 1.6, "learning_rate": 0.00016221910670710405, "loss": 3.1682, "step": 5174 }, { "epoch": 1.6, "learning_rate": 0.0001621834776478668, "loss": 3.0919, "step": 5175 }, { "epoch": 1.6, "learning_rate": 0.00016214784858862959, "loss": 2.8764, "step": 5176 }, { "epoch": 1.6, "learning_rate": 0.00016211221952939237, "loss": 2.9131, "step": 5177 }, { "epoch": 1.6, "learning_rate": 0.00016207659047015515, "loss": 2.8004, "step": 5178 }, { "epoch": 1.6, "learning_rate": 0.0001620409614109179, "loss": 2.9883, "step": 5179 }, { "epoch": 1.6, "learning_rate": 0.00016200533235168066, "loss": 2.7768, "step": 5180 }, { "epoch": 1.6, "learning_rate": 0.00016196970329244344, "loss": 2.818, "step": 5181 }, { "epoch": 1.6, "learning_rate": 0.00016193407423320622, "loss": 2.2615, "step": 5182 }, { "epoch": 1.6, "learning_rate": 0.000161898445173969, "loss": 2.6329, "step": 5183 }, { "epoch": 1.6, "learning_rate": 0.00016186281611473176, "loss": 2.6819, "step": 5184 }, { "epoch": 1.6, "learning_rate": 0.00016182718705549451, "loss": 2.3662, "step": 5185 }, { "epoch": 1.6, "learning_rate": 0.0001617915579962573, "loss": 2.3583, "step": 5186 }, { "epoch": 1.6, "learning_rate": 0.00016175592893702008, "loss": 2.0906, "step": 5187 }, { "epoch": 1.6, "learning_rate": 0.00016172029987778283, "loss": 2.1285, "step": 5188 }, { "epoch": 1.6, "learning_rate": 0.00016168467081854561, "loss": 2.0296, "step": 5189 }, { "epoch": 1.6, "learning_rate": 0.0001616490417593084, "loss": 1.8773, "step": 5190 }, { "epoch": 1.6, "learning_rate": 0.00016161341270007115, "loss": 4.7715, "step": 5191 }, { "epoch": 1.6, "learning_rate": 0.0001615777836408339, "loss": 4.2783, "step": 5192 }, { "epoch": 1.6, "learning_rate": 0.0001615421545815967, "loss": 4.2123, "step": 5193 }, { "epoch": 1.6, "learning_rate": 0.00016150652552235947, "loss": 4.026, "step": 5194 }, { "epoch": 1.6, "learning_rate": 0.00016147089646312225, "loss": 3.7333, "step": 5195 }, { "epoch": 1.6, "learning_rate": 0.000161435267403885, "loss": 4.0494, "step": 5196 }, { "epoch": 1.6, "learning_rate": 0.00016139963834464776, "loss": 3.8554, "step": 5197 }, { "epoch": 1.6, "learning_rate": 0.00016136400928541054, "loss": 3.8574, "step": 5198 }, { "epoch": 1.6, "learning_rate": 0.00016132838022617333, "loss": 3.8806, "step": 5199 }, { "epoch": 1.6, "learning_rate": 0.0001612927511669361, "loss": 3.471, "step": 5200 }, { "epoch": 1.61, "learning_rate": 0.00016125712210769886, "loss": 3.7846, "step": 5201 }, { "epoch": 1.61, "learning_rate": 0.00016122149304846164, "loss": 3.7306, "step": 5202 }, { "epoch": 1.61, "learning_rate": 0.0001611858639892244, "loss": 3.554, "step": 5203 }, { "epoch": 1.61, "learning_rate": 0.00016115023492998718, "loss": 3.6427, "step": 5204 }, { "epoch": 1.61, "learning_rate": 0.00016111460587074994, "loss": 3.6213, "step": 5205 }, { "epoch": 1.61, "learning_rate": 0.00016107897681151272, "loss": 3.8517, "step": 5206 }, { "epoch": 1.61, "learning_rate": 0.0001610433477522755, "loss": 3.6802, "step": 5207 }, { "epoch": 1.61, "learning_rate": 0.00016100771869303825, "loss": 3.8714, "step": 5208 }, { "epoch": 1.61, "learning_rate": 0.00016097208963380104, "loss": 3.5008, "step": 5209 }, { "epoch": 1.61, "learning_rate": 0.0001609364605745638, "loss": 3.6019, "step": 5210 }, { "epoch": 1.61, "learning_rate": 0.00016090083151532657, "loss": 3.5131, "step": 5211 }, { "epoch": 1.61, "learning_rate": 0.00016086520245608935, "loss": 3.4922, "step": 5212 }, { "epoch": 1.61, "learning_rate": 0.0001608295733968521, "loss": 3.4151, "step": 5213 }, { "epoch": 1.61, "learning_rate": 0.0001607939443376149, "loss": 3.3909, "step": 5214 }, { "epoch": 1.61, "learning_rate": 0.00016075831527837765, "loss": 3.2531, "step": 5215 }, { "epoch": 1.61, "learning_rate": 0.00016072268621914043, "loss": 3.4263, "step": 5216 }, { "epoch": 1.61, "learning_rate": 0.0001606870571599032, "loss": 3.2036, "step": 5217 }, { "epoch": 1.61, "learning_rate": 0.00016065142810066596, "loss": 3.1855, "step": 5218 }, { "epoch": 1.61, "learning_rate": 0.00016061579904142875, "loss": 3.2582, "step": 5219 }, { "epoch": 1.61, "learning_rate": 0.0001605801699821915, "loss": 3.3939, "step": 5220 }, { "epoch": 1.61, "learning_rate": 0.00016054454092295428, "loss": 3.1356, "step": 5221 }, { "epoch": 1.61, "learning_rate": 0.00016050891186371704, "loss": 3.1504, "step": 5222 }, { "epoch": 1.61, "learning_rate": 0.00016047328280447982, "loss": 2.9691, "step": 5223 }, { "epoch": 1.61, "learning_rate": 0.0001604376537452426, "loss": 3.0669, "step": 5224 }, { "epoch": 1.61, "learning_rate": 0.00016040202468600538, "loss": 2.7848, "step": 5225 }, { "epoch": 1.61, "learning_rate": 0.00016036639562676814, "loss": 3.0582, "step": 5226 }, { "epoch": 1.61, "learning_rate": 0.0001603307665675309, "loss": 2.896, "step": 5227 }, { "epoch": 1.61, "learning_rate": 0.00016029513750829367, "loss": 2.9343, "step": 5228 }, { "epoch": 1.61, "learning_rate": 0.00016025950844905646, "loss": 2.8872, "step": 5229 }, { "epoch": 1.61, "learning_rate": 0.0001602238793898192, "loss": 2.664, "step": 5230 }, { "epoch": 1.61, "learning_rate": 0.000160188250330582, "loss": 2.779, "step": 5231 }, { "epoch": 1.61, "learning_rate": 0.00016015262127134475, "loss": 2.5402, "step": 5232 }, { "epoch": 1.62, "learning_rate": 0.00016011699221210753, "loss": 2.3321, "step": 5233 }, { "epoch": 1.62, "learning_rate": 0.0001600813631528703, "loss": 2.5587, "step": 5234 }, { "epoch": 1.62, "learning_rate": 0.00016004573409363307, "loss": 2.5617, "step": 5235 }, { "epoch": 1.62, "learning_rate": 0.00016001010503439585, "loss": 2.3759, "step": 5236 }, { "epoch": 1.62, "learning_rate": 0.00015997447597515863, "loss": 2.1283, "step": 5237 }, { "epoch": 1.62, "learning_rate": 0.00015993884691592138, "loss": 2.0047, "step": 5238 }, { "epoch": 1.62, "learning_rate": 0.00015990321785668414, "loss": 2.1114, "step": 5239 }, { "epoch": 1.62, "learning_rate": 0.00015986758879744692, "loss": 1.6458, "step": 5240 }, { "epoch": 1.62, "learning_rate": 0.0001598319597382097, "loss": 4.4028, "step": 5241 }, { "epoch": 1.62, "learning_rate": 0.00015979633067897249, "loss": 4.1807, "step": 5242 }, { "epoch": 1.62, "learning_rate": 0.00015976070161973524, "loss": 4.0226, "step": 5243 }, { "epoch": 1.62, "learning_rate": 0.000159725072560498, "loss": 4.0885, "step": 5244 }, { "epoch": 1.62, "learning_rate": 0.00015968944350126078, "loss": 4.0597, "step": 5245 }, { "epoch": 1.62, "learning_rate": 0.00015965381444202356, "loss": 3.9328, "step": 5246 }, { "epoch": 1.62, "learning_rate": 0.0001596181853827863, "loss": 3.9196, "step": 5247 }, { "epoch": 1.62, "learning_rate": 0.0001595825563235491, "loss": 3.7107, "step": 5248 }, { "epoch": 1.62, "learning_rate": 0.00015954692726431188, "loss": 3.7358, "step": 5249 }, { "epoch": 1.62, "learning_rate": 0.00015951129820507463, "loss": 3.5633, "step": 5250 }, { "epoch": 1.62, "learning_rate": 0.00015947566914583741, "loss": 3.6134, "step": 5251 }, { "epoch": 1.62, "learning_rate": 0.00015944004008660017, "loss": 3.8023, "step": 5252 }, { "epoch": 1.62, "learning_rate": 0.00015940441102736295, "loss": 3.564, "step": 5253 }, { "epoch": 1.62, "learning_rate": 0.00015936878196812573, "loss": 3.6418, "step": 5254 }, { "epoch": 1.62, "learning_rate": 0.0001593331529088885, "loss": 3.501, "step": 5255 }, { "epoch": 1.62, "learning_rate": 0.00015929752384965127, "loss": 3.7053, "step": 5256 }, { "epoch": 1.62, "learning_rate": 0.00015926189479041402, "loss": 3.7097, "step": 5257 }, { "epoch": 1.62, "learning_rate": 0.0001592262657311768, "loss": 3.5674, "step": 5258 }, { "epoch": 1.62, "learning_rate": 0.0001591906366719396, "loss": 3.4934, "step": 5259 }, { "epoch": 1.62, "learning_rate": 0.00015915500761270234, "loss": 3.4788, "step": 5260 }, { "epoch": 1.62, "learning_rate": 0.00015911937855346512, "loss": 3.512, "step": 5261 }, { "epoch": 1.62, "learning_rate": 0.00015908374949422788, "loss": 3.4634, "step": 5262 }, { "epoch": 1.62, "learning_rate": 0.00015904812043499066, "loss": 3.3822, "step": 5263 }, { "epoch": 1.62, "learning_rate": 0.00015901249137575342, "loss": 3.402, "step": 5264 }, { "epoch": 1.62, "learning_rate": 0.0001589768623165162, "loss": 3.4611, "step": 5265 }, { "epoch": 1.63, "learning_rate": 0.00015894123325727898, "loss": 3.3815, "step": 5266 }, { "epoch": 1.63, "learning_rate": 0.00015890560419804173, "loss": 3.4136, "step": 5267 }, { "epoch": 1.63, "learning_rate": 0.00015886997513880452, "loss": 3.422, "step": 5268 }, { "epoch": 1.63, "learning_rate": 0.00015883434607956727, "loss": 3.0335, "step": 5269 }, { "epoch": 1.63, "learning_rate": 0.00015879871702033005, "loss": 3.1037, "step": 5270 }, { "epoch": 1.63, "learning_rate": 0.00015876308796109283, "loss": 3.2943, "step": 5271 }, { "epoch": 1.63, "learning_rate": 0.00015872745890185562, "loss": 2.9649, "step": 5272 }, { "epoch": 1.63, "learning_rate": 0.00015869182984261837, "loss": 3.2643, "step": 5273 }, { "epoch": 1.63, "learning_rate": 0.00015865620078338113, "loss": 3.2008, "step": 5274 }, { "epoch": 1.63, "learning_rate": 0.0001586205717241439, "loss": 3.1336, "step": 5275 }, { "epoch": 1.63, "learning_rate": 0.0001585849426649067, "loss": 3.0109, "step": 5276 }, { "epoch": 1.63, "learning_rate": 0.00015854931360566944, "loss": 2.9037, "step": 5277 }, { "epoch": 1.63, "learning_rate": 0.00015851368454643223, "loss": 2.9982, "step": 5278 }, { "epoch": 1.63, "learning_rate": 0.00015847805548719498, "loss": 2.7704, "step": 5279 }, { "epoch": 1.63, "learning_rate": 0.00015844242642795776, "loss": 2.7598, "step": 5280 }, { "epoch": 1.63, "learning_rate": 0.00015840679736872054, "loss": 2.6462, "step": 5281 }, { "epoch": 1.63, "learning_rate": 0.0001583711683094833, "loss": 2.484, "step": 5282 }, { "epoch": 1.63, "learning_rate": 0.00015833553925024608, "loss": 2.3663, "step": 5283 }, { "epoch": 1.63, "learning_rate": 0.00015829991019100886, "loss": 2.4378, "step": 5284 }, { "epoch": 1.63, "learning_rate": 0.00015826428113177162, "loss": 2.4262, "step": 5285 }, { "epoch": 1.63, "learning_rate": 0.00015822865207253437, "loss": 2.0486, "step": 5286 }, { "epoch": 1.63, "learning_rate": 0.00015819302301329715, "loss": 2.0989, "step": 5287 }, { "epoch": 1.63, "learning_rate": 0.00015815739395405994, "loss": 2.0386, "step": 5288 }, { "epoch": 1.63, "learning_rate": 0.00015812176489482272, "loss": 2.0589, "step": 5289 }, { "epoch": 1.63, "learning_rate": 0.00015808613583558547, "loss": 1.8574, "step": 5290 }, { "epoch": 1.63, "learning_rate": 0.00015805050677634823, "loss": 4.5484, "step": 5291 }, { "epoch": 1.63, "learning_rate": 0.000158014877717111, "loss": 4.3412, "step": 5292 }, { "epoch": 1.63, "learning_rate": 0.0001579792486578738, "loss": 4.0992, "step": 5293 }, { "epoch": 1.63, "learning_rate": 0.00015794361959863655, "loss": 3.865, "step": 5294 }, { "epoch": 1.63, "learning_rate": 0.00015790799053939933, "loss": 4.1363, "step": 5295 }, { "epoch": 1.63, "learning_rate": 0.0001578723614801621, "loss": 3.836, "step": 5296 }, { "epoch": 1.63, "learning_rate": 0.00015783673242092486, "loss": 3.6615, "step": 5297 }, { "epoch": 1.64, "learning_rate": 0.00015780110336168765, "loss": 3.9645, "step": 5298 }, { "epoch": 1.64, "learning_rate": 0.0001577654743024504, "loss": 3.9112, "step": 5299 }, { "epoch": 1.64, "learning_rate": 0.00015772984524321318, "loss": 3.9274, "step": 5300 }, { "epoch": 1.64, "learning_rate": 0.00015769421618397597, "loss": 3.5643, "step": 5301 }, { "epoch": 1.64, "learning_rate": 0.00015765858712473872, "loss": 3.7087, "step": 5302 }, { "epoch": 1.64, "learning_rate": 0.0001576229580655015, "loss": 3.5157, "step": 5303 }, { "epoch": 1.64, "learning_rate": 0.00015758732900626426, "loss": 3.6347, "step": 5304 }, { "epoch": 1.64, "learning_rate": 0.00015755169994702704, "loss": 3.6884, "step": 5305 }, { "epoch": 1.64, "learning_rate": 0.00015751607088778982, "loss": 3.6845, "step": 5306 }, { "epoch": 1.64, "learning_rate": 0.00015748044182855258, "loss": 3.8435, "step": 5307 }, { "epoch": 1.64, "learning_rate": 0.00015744481276931536, "loss": 3.3399, "step": 5308 }, { "epoch": 1.64, "learning_rate": 0.0001574091837100781, "loss": 3.6724, "step": 5309 }, { "epoch": 1.64, "learning_rate": 0.0001573735546508409, "loss": 3.4578, "step": 5310 }, { "epoch": 1.64, "learning_rate": 0.00015733792559160365, "loss": 3.335, "step": 5311 }, { "epoch": 1.64, "learning_rate": 0.00015730229653236643, "loss": 3.4325, "step": 5312 }, { "epoch": 1.64, "learning_rate": 0.0001572666674731292, "loss": 3.5355, "step": 5313 }, { "epoch": 1.64, "learning_rate": 0.00015723103841389197, "loss": 3.3331, "step": 5314 }, { "epoch": 1.64, "learning_rate": 0.00015719540935465475, "loss": 3.4525, "step": 5315 }, { "epoch": 1.64, "learning_rate": 0.0001571597802954175, "loss": 3.4096, "step": 5316 }, { "epoch": 1.64, "learning_rate": 0.00015712415123618029, "loss": 3.4189, "step": 5317 }, { "epoch": 1.64, "learning_rate": 0.00015708852217694307, "loss": 3.3634, "step": 5318 }, { "epoch": 1.64, "learning_rate": 0.00015705289311770585, "loss": 3.2232, "step": 5319 }, { "epoch": 1.64, "learning_rate": 0.0001570172640584686, "loss": 3.1236, "step": 5320 }, { "epoch": 1.64, "learning_rate": 0.00015698163499923136, "loss": 2.9329, "step": 5321 }, { "epoch": 1.64, "learning_rate": 0.00015694600593999414, "loss": 2.8678, "step": 5322 }, { "epoch": 1.64, "learning_rate": 0.00015691037688075692, "loss": 2.9798, "step": 5323 }, { "epoch": 1.64, "learning_rate": 0.00015687474782151968, "loss": 3.1061, "step": 5324 }, { "epoch": 1.64, "learning_rate": 0.00015683911876228246, "loss": 2.9691, "step": 5325 }, { "epoch": 1.64, "learning_rate": 0.00015680348970304521, "loss": 3.0512, "step": 5326 }, { "epoch": 1.64, "learning_rate": 0.000156767860643808, "loss": 2.8258, "step": 5327 }, { "epoch": 1.64, "learning_rate": 0.00015673223158457075, "loss": 2.7082, "step": 5328 }, { "epoch": 1.64, "learning_rate": 0.00015669660252533353, "loss": 2.6516, "step": 5329 }, { "epoch": 1.65, "learning_rate": 0.00015666097346609631, "loss": 2.7729, "step": 5330 }, { "epoch": 1.65, "learning_rate": 0.0001566253444068591, "loss": 2.487, "step": 5331 }, { "epoch": 1.65, "learning_rate": 0.00015658971534762185, "loss": 2.8002, "step": 5332 }, { "epoch": 1.65, "learning_rate": 0.0001565540862883846, "loss": 2.4237, "step": 5333 }, { "epoch": 1.65, "learning_rate": 0.0001565184572291474, "loss": 2.5043, "step": 5334 }, { "epoch": 1.65, "learning_rate": 0.00015648282816991017, "loss": 2.248, "step": 5335 }, { "epoch": 1.65, "learning_rate": 0.00015644719911067295, "loss": 2.3324, "step": 5336 }, { "epoch": 1.65, "learning_rate": 0.0001564115700514357, "loss": 2.2492, "step": 5337 }, { "epoch": 1.65, "learning_rate": 0.0001563759409921985, "loss": 2.0222, "step": 5338 }, { "epoch": 1.65, "learning_rate": 0.00015634031193296124, "loss": 1.8288, "step": 5339 }, { "epoch": 1.65, "learning_rate": 0.00015630468287372402, "loss": 2.0684, "step": 5340 }, { "epoch": 1.65, "learning_rate": 0.00015626905381448678, "loss": 4.6978, "step": 5341 }, { "epoch": 1.65, "learning_rate": 0.00015623342475524956, "loss": 4.4149, "step": 5342 }, { "epoch": 1.65, "learning_rate": 0.00015619779569601234, "loss": 4.1851, "step": 5343 }, { "epoch": 1.65, "learning_rate": 0.0001561621666367751, "loss": 4.0687, "step": 5344 }, { "epoch": 1.65, "learning_rate": 0.00015612653757753785, "loss": 3.7786, "step": 5345 }, { "epoch": 1.65, "learning_rate": 0.00015609090851830063, "loss": 3.9241, "step": 5346 }, { "epoch": 1.65, "learning_rate": 0.00015605527945906342, "loss": 3.9158, "step": 5347 }, { "epoch": 1.65, "learning_rate": 0.0001560196503998262, "loss": 3.67, "step": 5348 }, { "epoch": 1.65, "learning_rate": 0.00015598402134058895, "loss": 3.9437, "step": 5349 }, { "epoch": 1.65, "learning_rate": 0.00015594839228135174, "loss": 3.6236, "step": 5350 }, { "epoch": 1.65, "learning_rate": 0.0001559127632221145, "loss": 3.4508, "step": 5351 }, { "epoch": 1.65, "learning_rate": 0.00015587713416287727, "loss": 3.7527, "step": 5352 }, { "epoch": 1.65, "learning_rate": 0.00015584150510364005, "loss": 3.5063, "step": 5353 }, { "epoch": 1.65, "learning_rate": 0.0001558058760444028, "loss": 3.5275, "step": 5354 }, { "epoch": 1.65, "learning_rate": 0.0001557702469851656, "loss": 3.5498, "step": 5355 }, { "epoch": 1.65, "learning_rate": 0.00015573461792592834, "loss": 3.6228, "step": 5356 }, { "epoch": 1.65, "learning_rate": 0.00015569898886669113, "loss": 3.3563, "step": 5357 }, { "epoch": 1.65, "learning_rate": 0.00015566335980745388, "loss": 3.612, "step": 5358 }, { "epoch": 1.65, "learning_rate": 0.00015562773074821666, "loss": 3.262, "step": 5359 }, { "epoch": 1.65, "learning_rate": 0.00015559210168897945, "loss": 3.4583, "step": 5360 }, { "epoch": 1.65, "learning_rate": 0.0001555564726297422, "loss": 3.5562, "step": 5361 }, { "epoch": 1.65, "learning_rate": 0.00015552084357050498, "loss": 3.5809, "step": 5362 }, { "epoch": 1.66, "learning_rate": 0.00015548521451126774, "loss": 3.4615, "step": 5363 }, { "epoch": 1.66, "learning_rate": 0.00015544958545203052, "loss": 3.3333, "step": 5364 }, { "epoch": 1.66, "learning_rate": 0.0001554139563927933, "loss": 3.6466, "step": 5365 }, { "epoch": 1.66, "learning_rate": 0.00015537832733355608, "loss": 3.3663, "step": 5366 }, { "epoch": 1.66, "learning_rate": 0.00015534269827431884, "loss": 3.2724, "step": 5367 }, { "epoch": 1.66, "learning_rate": 0.0001553070692150816, "loss": 3.3544, "step": 5368 }, { "epoch": 1.66, "learning_rate": 0.00015527144015584437, "loss": 3.1482, "step": 5369 }, { "epoch": 1.66, "learning_rate": 0.00015523581109660716, "loss": 3.4169, "step": 5370 }, { "epoch": 1.66, "learning_rate": 0.0001552001820373699, "loss": 3.0173, "step": 5371 }, { "epoch": 1.66, "learning_rate": 0.0001551645529781327, "loss": 3.2318, "step": 5372 }, { "epoch": 1.66, "learning_rate": 0.00015512892391889545, "loss": 2.9219, "step": 5373 }, { "epoch": 1.66, "learning_rate": 0.00015509329485965823, "loss": 3.0258, "step": 5374 }, { "epoch": 1.66, "learning_rate": 0.00015505766580042098, "loss": 2.98, "step": 5375 }, { "epoch": 1.66, "learning_rate": 0.00015502203674118377, "loss": 3.0908, "step": 5376 }, { "epoch": 1.66, "learning_rate": 0.00015498640768194655, "loss": 2.9656, "step": 5377 }, { "epoch": 1.66, "learning_rate": 0.00015495077862270933, "loss": 2.6355, "step": 5378 }, { "epoch": 1.66, "learning_rate": 0.00015491514956347208, "loss": 2.666, "step": 5379 }, { "epoch": 1.66, "learning_rate": 0.00015487952050423484, "loss": 2.6063, "step": 5380 }, { "epoch": 1.66, "learning_rate": 0.00015484389144499762, "loss": 2.6801, "step": 5381 }, { "epoch": 1.66, "learning_rate": 0.0001548082623857604, "loss": 2.7131, "step": 5382 }, { "epoch": 1.66, "learning_rate": 0.00015477263332652318, "loss": 2.4428, "step": 5383 }, { "epoch": 1.66, "learning_rate": 0.00015473700426728594, "loss": 2.4866, "step": 5384 }, { "epoch": 1.66, "learning_rate": 0.00015470137520804872, "loss": 2.3281, "step": 5385 }, { "epoch": 1.66, "learning_rate": 0.00015466574614881148, "loss": 2.2687, "step": 5386 }, { "epoch": 1.66, "learning_rate": 0.00015463011708957426, "loss": 2.1568, "step": 5387 }, { "epoch": 1.66, "learning_rate": 0.000154594488030337, "loss": 1.9512, "step": 5388 }, { "epoch": 1.66, "learning_rate": 0.0001545588589710998, "loss": 1.9971, "step": 5389 }, { "epoch": 1.66, "learning_rate": 0.00015452322991186258, "loss": 1.8034, "step": 5390 }, { "epoch": 1.66, "learning_rate": 0.00015448760085262533, "loss": 4.5996, "step": 5391 }, { "epoch": 1.66, "learning_rate": 0.00015445197179338809, "loss": 4.3325, "step": 5392 }, { "epoch": 1.66, "learning_rate": 0.00015441634273415087, "loss": 4.3082, "step": 5393 }, { "epoch": 1.66, "learning_rate": 0.00015438071367491365, "loss": 4.0709, "step": 5394 }, { "epoch": 1.67, "learning_rate": 0.00015434508461567643, "loss": 3.9002, "step": 5395 }, { "epoch": 1.67, "learning_rate": 0.00015430945555643919, "loss": 3.8123, "step": 5396 }, { "epoch": 1.67, "learning_rate": 0.00015427382649720197, "loss": 4.0367, "step": 5397 }, { "epoch": 1.67, "learning_rate": 0.00015423819743796472, "loss": 3.9584, "step": 5398 }, { "epoch": 1.67, "learning_rate": 0.0001542025683787275, "loss": 3.981, "step": 5399 }, { "epoch": 1.67, "learning_rate": 0.0001541669393194903, "loss": 3.7749, "step": 5400 }, { "epoch": 1.67, "learning_rate": 0.00015413131026025304, "loss": 3.6046, "step": 5401 }, { "epoch": 1.67, "learning_rate": 0.00015409568120101582, "loss": 3.6083, "step": 5402 }, { "epoch": 1.67, "learning_rate": 0.00015406005214177858, "loss": 3.6249, "step": 5403 }, { "epoch": 1.67, "learning_rate": 0.00015402442308254136, "loss": 3.6604, "step": 5404 }, { "epoch": 1.67, "learning_rate": 0.00015398879402330411, "loss": 3.6986, "step": 5405 }, { "epoch": 1.67, "learning_rate": 0.0001539531649640669, "loss": 3.5999, "step": 5406 }, { "epoch": 1.67, "learning_rate": 0.00015391753590482968, "loss": 3.3804, "step": 5407 }, { "epoch": 1.67, "learning_rate": 0.00015388190684559243, "loss": 3.8272, "step": 5408 }, { "epoch": 1.67, "learning_rate": 0.00015384627778635522, "loss": 3.3957, "step": 5409 }, { "epoch": 1.67, "learning_rate": 0.00015381064872711797, "loss": 3.4414, "step": 5410 }, { "epoch": 1.67, "learning_rate": 0.00015377501966788075, "loss": 3.393, "step": 5411 }, { "epoch": 1.67, "learning_rate": 0.00015373939060864353, "loss": 3.4137, "step": 5412 }, { "epoch": 1.67, "learning_rate": 0.00015370376154940632, "loss": 3.2973, "step": 5413 }, { "epoch": 1.67, "learning_rate": 0.00015366813249016907, "loss": 3.4753, "step": 5414 }, { "epoch": 1.67, "learning_rate": 0.00015363250343093183, "loss": 3.3918, "step": 5415 }, { "epoch": 1.67, "learning_rate": 0.0001535968743716946, "loss": 3.2406, "step": 5416 }, { "epoch": 1.67, "learning_rate": 0.0001535612453124574, "loss": 3.0821, "step": 5417 }, { "epoch": 1.67, "learning_rate": 0.00015352561625322014, "loss": 2.9332, "step": 5418 }, { "epoch": 1.67, "learning_rate": 0.00015348998719398293, "loss": 3.1528, "step": 5419 }, { "epoch": 1.67, "learning_rate": 0.00015345435813474568, "loss": 2.9657, "step": 5420 }, { "epoch": 1.67, "learning_rate": 0.00015341872907550846, "loss": 3.1445, "step": 5421 }, { "epoch": 1.67, "learning_rate": 0.00015338310001627122, "loss": 2.7626, "step": 5422 }, { "epoch": 1.67, "learning_rate": 0.000153347470957034, "loss": 3.0536, "step": 5423 }, { "epoch": 1.67, "learning_rate": 0.00015331184189779678, "loss": 2.7758, "step": 5424 }, { "epoch": 1.67, "learning_rate": 0.00015327621283855956, "loss": 2.69, "step": 5425 }, { "epoch": 1.67, "learning_rate": 0.00015324058377932232, "loss": 2.9515, "step": 5426 }, { "epoch": 1.67, "learning_rate": 0.00015320495472008507, "loss": 2.7783, "step": 5427 }, { "epoch": 1.68, "learning_rate": 0.00015316932566084785, "loss": 2.7437, "step": 5428 }, { "epoch": 1.68, "learning_rate": 0.00015313369660161064, "loss": 2.6291, "step": 5429 }, { "epoch": 1.68, "learning_rate": 0.00015309806754237342, "loss": 2.7039, "step": 5430 }, { "epoch": 1.68, "learning_rate": 0.00015306243848313617, "loss": 2.6126, "step": 5431 }, { "epoch": 1.68, "learning_rate": 0.00015302680942389895, "loss": 2.8178, "step": 5432 }, { "epoch": 1.68, "learning_rate": 0.0001529911803646617, "loss": 2.6131, "step": 5433 }, { "epoch": 1.68, "learning_rate": 0.0001529555513054245, "loss": 2.562, "step": 5434 }, { "epoch": 1.68, "learning_rate": 0.00015291992224618725, "loss": 2.3232, "step": 5435 }, { "epoch": 1.68, "learning_rate": 0.00015288429318695003, "loss": 2.191, "step": 5436 }, { "epoch": 1.68, "learning_rate": 0.0001528486641277128, "loss": 1.9938, "step": 5437 }, { "epoch": 1.68, "learning_rate": 0.00015281303506847556, "loss": 1.8411, "step": 5438 }, { "epoch": 1.68, "learning_rate": 0.00015277740600923832, "loss": 1.913, "step": 5439 }, { "epoch": 1.68, "learning_rate": 0.0001527417769500011, "loss": 1.8718, "step": 5440 }, { "epoch": 1.68, "learning_rate": 0.00015270614789076388, "loss": 4.6979, "step": 5441 }, { "epoch": 1.68, "learning_rate": 0.00015267051883152666, "loss": 4.1634, "step": 5442 }, { "epoch": 1.68, "learning_rate": 0.00015263488977228942, "loss": 4.0241, "step": 5443 }, { "epoch": 1.68, "learning_rate": 0.0001525992607130522, "loss": 3.9266, "step": 5444 }, { "epoch": 1.68, "learning_rate": 0.00015256363165381496, "loss": 3.9151, "step": 5445 }, { "epoch": 1.68, "learning_rate": 0.00015252800259457774, "loss": 3.7785, "step": 5446 }, { "epoch": 1.68, "learning_rate": 0.00015249237353534052, "loss": 3.7194, "step": 5447 }, { "epoch": 1.68, "learning_rate": 0.00015245674447610327, "loss": 3.811, "step": 5448 }, { "epoch": 1.68, "learning_rate": 0.00015242111541686606, "loss": 4.0665, "step": 5449 }, { "epoch": 1.68, "learning_rate": 0.0001523854863576288, "loss": 3.6027, "step": 5450 }, { "epoch": 1.68, "learning_rate": 0.0001523498572983916, "loss": 3.8156, "step": 5451 }, { "epoch": 1.68, "learning_rate": 0.00015231422823915435, "loss": 3.7478, "step": 5452 }, { "epoch": 1.68, "learning_rate": 0.00015227859917991713, "loss": 3.5301, "step": 5453 }, { "epoch": 1.68, "learning_rate": 0.0001522429701206799, "loss": 3.602, "step": 5454 }, { "epoch": 1.68, "learning_rate": 0.00015220734106144267, "loss": 3.6644, "step": 5455 }, { "epoch": 1.68, "learning_rate": 0.00015217171200220545, "loss": 3.751, "step": 5456 }, { "epoch": 1.68, "learning_rate": 0.0001521360829429682, "loss": 3.5989, "step": 5457 }, { "epoch": 1.68, "learning_rate": 0.00015210045388373098, "loss": 3.5263, "step": 5458 }, { "epoch": 1.68, "learning_rate": 0.00015206482482449377, "loss": 3.541, "step": 5459 }, { "epoch": 1.69, "learning_rate": 0.00015202919576525652, "loss": 3.3655, "step": 5460 }, { "epoch": 1.69, "learning_rate": 0.0001519935667060193, "loss": 3.2226, "step": 5461 }, { "epoch": 1.69, "learning_rate": 0.00015195793764678206, "loss": 3.4594, "step": 5462 }, { "epoch": 1.69, "learning_rate": 0.00015192230858754484, "loss": 3.3427, "step": 5463 }, { "epoch": 1.69, "learning_rate": 0.00015188667952830762, "loss": 3.4615, "step": 5464 }, { "epoch": 1.69, "learning_rate": 0.00015185105046907038, "loss": 3.2545, "step": 5465 }, { "epoch": 1.69, "learning_rate": 0.00015181542140983316, "loss": 3.311, "step": 5466 }, { "epoch": 1.69, "learning_rate": 0.00015177979235059594, "loss": 3.2136, "step": 5467 }, { "epoch": 1.69, "learning_rate": 0.0001517441632913587, "loss": 3.1311, "step": 5468 }, { "epoch": 1.69, "learning_rate": 0.00015170853423212145, "loss": 3.2447, "step": 5469 }, { "epoch": 1.69, "learning_rate": 0.00015167290517288423, "loss": 3.1598, "step": 5470 }, { "epoch": 1.69, "learning_rate": 0.00015163727611364701, "loss": 3.0128, "step": 5471 }, { "epoch": 1.69, "learning_rate": 0.0001516016470544098, "loss": 3.1044, "step": 5472 }, { "epoch": 1.69, "learning_rate": 0.00015156601799517255, "loss": 3.1648, "step": 5473 }, { "epoch": 1.69, "learning_rate": 0.0001515303889359353, "loss": 2.8662, "step": 5474 }, { "epoch": 1.69, "learning_rate": 0.0001514947598766981, "loss": 3.0589, "step": 5475 }, { "epoch": 1.69, "learning_rate": 0.00015145913081746087, "loss": 2.9643, "step": 5476 }, { "epoch": 1.69, "learning_rate": 0.00015142350175822365, "loss": 2.8947, "step": 5477 }, { "epoch": 1.69, "learning_rate": 0.0001513878726989864, "loss": 2.7112, "step": 5478 }, { "epoch": 1.69, "learning_rate": 0.0001513522436397492, "loss": 2.6601, "step": 5479 }, { "epoch": 1.69, "learning_rate": 0.00015131661458051194, "loss": 2.7519, "step": 5480 }, { "epoch": 1.69, "learning_rate": 0.00015128098552127472, "loss": 2.6266, "step": 5481 }, { "epoch": 1.69, "learning_rate": 0.00015124535646203748, "loss": 2.6025, "step": 5482 }, { "epoch": 1.69, "learning_rate": 0.00015120972740280026, "loss": 2.6708, "step": 5483 }, { "epoch": 1.69, "learning_rate": 0.00015117409834356304, "loss": 2.541, "step": 5484 }, { "epoch": 1.69, "learning_rate": 0.0001511384692843258, "loss": 2.1244, "step": 5485 }, { "epoch": 1.69, "learning_rate": 0.00015110284022508855, "loss": 2.1802, "step": 5486 }, { "epoch": 1.69, "learning_rate": 0.00015106721116585133, "loss": 2.3398, "step": 5487 }, { "epoch": 1.69, "learning_rate": 0.00015103158210661412, "loss": 2.3417, "step": 5488 }, { "epoch": 1.69, "learning_rate": 0.0001509959530473769, "loss": 1.9809, "step": 5489 }, { "epoch": 1.69, "learning_rate": 0.00015096032398813965, "loss": 1.8383, "step": 5490 }, { "epoch": 1.69, "learning_rate": 0.00015092469492890243, "loss": 4.4891, "step": 5491 }, { "epoch": 1.7, "learning_rate": 0.0001508890658696652, "loss": 4.4108, "step": 5492 }, { "epoch": 1.7, "learning_rate": 0.00015085343681042797, "loss": 4.0917, "step": 5493 }, { "epoch": 1.7, "learning_rate": 0.00015081780775119075, "loss": 3.9461, "step": 5494 }, { "epoch": 1.7, "learning_rate": 0.0001507821786919535, "loss": 3.8929, "step": 5495 }, { "epoch": 1.7, "learning_rate": 0.0001507465496327163, "loss": 3.8981, "step": 5496 }, { "epoch": 1.7, "learning_rate": 0.00015071092057347904, "loss": 3.6854, "step": 5497 }, { "epoch": 1.7, "learning_rate": 0.00015067529151424183, "loss": 3.6973, "step": 5498 }, { "epoch": 1.7, "learning_rate": 0.00015063966245500458, "loss": 3.8432, "step": 5499 }, { "epoch": 1.7, "learning_rate": 0.00015060403339576736, "loss": 3.6918, "step": 5500 }, { "epoch": 1.7, "eval_bleu": 4.030183230582677e-18, "eval_loss": 4.157788276672363, "eval_runtime": 2571.6238, "eval_samples_per_second": 5.74, "eval_steps_per_second": 0.717, "step": 5500 }, { "epoch": 1.7, "learning_rate": 0.00015056840433653014, "loss": 3.658, "step": 5501 }, { "epoch": 1.7, "learning_rate": 0.0001505327752772929, "loss": 3.8132, "step": 5502 }, { "epoch": 1.7, "learning_rate": 0.00015049714621805568, "loss": 3.8641, "step": 5503 }, { "epoch": 1.7, "learning_rate": 0.00015046151715881844, "loss": 3.5749, "step": 5504 }, { "epoch": 1.7, "learning_rate": 0.00015042588809958122, "loss": 3.663, "step": 5505 }, { "epoch": 1.7, "learning_rate": 0.000150390259040344, "loss": 3.4505, "step": 5506 }, { "epoch": 1.7, "learning_rate": 0.00015035462998110675, "loss": 3.6202, "step": 5507 }, { "epoch": 1.7, "learning_rate": 0.00015031900092186954, "loss": 3.5025, "step": 5508 }, { "epoch": 1.7, "learning_rate": 0.0001502833718626323, "loss": 3.5353, "step": 5509 }, { "epoch": 1.7, "learning_rate": 0.00015024774280339507, "loss": 3.6737, "step": 5510 }, { "epoch": 1.7, "learning_rate": 0.00015021211374415786, "loss": 3.267, "step": 5511 }, { "epoch": 1.7, "learning_rate": 0.0001501764846849206, "loss": 3.524, "step": 5512 }, { "epoch": 1.7, "learning_rate": 0.0001501408556256834, "loss": 3.4764, "step": 5513 }, { "epoch": 1.7, "learning_rate": 0.00015010522656644617, "loss": 3.3606, "step": 5514 }, { "epoch": 1.7, "learning_rate": 0.00015006959750720893, "loss": 3.147, "step": 5515 }, { "epoch": 1.7, "learning_rate": 0.00015003396844797168, "loss": 3.2406, "step": 5516 }, { "epoch": 1.7, "learning_rate": 0.00014999833938873447, "loss": 3.2133, "step": 5517 }, { "epoch": 1.7, "learning_rate": 0.00014996271032949725, "loss": 3.5627, "step": 5518 }, { "epoch": 1.7, "learning_rate": 0.00014992708127026003, "loss": 3.0251, "step": 5519 }, { "epoch": 1.7, "learning_rate": 0.00014989145221102278, "loss": 3.3646, "step": 5520 }, { "epoch": 1.7, "learning_rate": 0.00014985582315178554, "loss": 2.9825, "step": 5521 }, { "epoch": 1.7, "learning_rate": 0.00014982019409254832, "loss": 3.1127, "step": 5522 }, { "epoch": 1.7, "learning_rate": 0.0001497845650333111, "loss": 3.1705, "step": 5523 }, { "epoch": 1.7, "learning_rate": 0.00014974893597407386, "loss": 2.7239, "step": 5524 }, { "epoch": 1.71, "learning_rate": 0.00014971330691483664, "loss": 3.009, "step": 5525 }, { "epoch": 1.71, "learning_rate": 0.00014967767785559942, "loss": 2.9662, "step": 5526 }, { "epoch": 1.71, "learning_rate": 0.00014964204879636218, "loss": 2.8263, "step": 5527 }, { "epoch": 1.71, "learning_rate": 0.00014960641973712496, "loss": 2.8388, "step": 5528 }, { "epoch": 1.71, "learning_rate": 0.0001495707906778877, "loss": 2.529, "step": 5529 }, { "epoch": 1.71, "learning_rate": 0.0001495351616186505, "loss": 2.5943, "step": 5530 }, { "epoch": 1.71, "learning_rate": 0.00014949953255941328, "loss": 2.5313, "step": 5531 }, { "epoch": 1.71, "learning_rate": 0.00014946390350017603, "loss": 2.6059, "step": 5532 }, { "epoch": 1.71, "learning_rate": 0.00014942827444093879, "loss": 2.3711, "step": 5533 }, { "epoch": 1.71, "learning_rate": 0.00014939264538170157, "loss": 2.4221, "step": 5534 }, { "epoch": 1.71, "learning_rate": 0.00014935701632246435, "loss": 2.2722, "step": 5535 }, { "epoch": 1.71, "learning_rate": 0.00014932138726322713, "loss": 2.0282, "step": 5536 }, { "epoch": 1.71, "learning_rate": 0.00014928575820398989, "loss": 2.3052, "step": 5537 }, { "epoch": 1.71, "learning_rate": 0.00014925012914475267, "loss": 2.1528, "step": 5538 }, { "epoch": 1.71, "learning_rate": 0.00014921450008551542, "loss": 1.9045, "step": 5539 }, { "epoch": 1.71, "learning_rate": 0.0001491788710262782, "loss": 1.6845, "step": 5540 }, { "epoch": 1.71, "learning_rate": 0.00014914324196704096, "loss": 4.6296, "step": 5541 }, { "epoch": 1.71, "learning_rate": 0.00014910761290780374, "loss": 4.1702, "step": 5542 }, { "epoch": 1.71, "learning_rate": 0.00014907198384856652, "loss": 4.1163, "step": 5543 }, { "epoch": 1.71, "learning_rate": 0.00014903635478932928, "loss": 3.6316, "step": 5544 }, { "epoch": 1.71, "learning_rate": 0.00014900072573009206, "loss": 4.0896, "step": 5545 }, { "epoch": 1.71, "learning_rate": 0.00014896509667085481, "loss": 3.6407, "step": 5546 }, { "epoch": 1.71, "learning_rate": 0.0001489294676116176, "loss": 3.6821, "step": 5547 }, { "epoch": 1.71, "learning_rate": 0.00014889383855238038, "loss": 3.5323, "step": 5548 }, { "epoch": 1.71, "learning_rate": 0.00014885820949314313, "loss": 3.5287, "step": 5549 }, { "epoch": 1.71, "learning_rate": 0.00014882258043390591, "loss": 3.4893, "step": 5550 }, { "epoch": 1.71, "learning_rate": 0.00014878695137466867, "loss": 3.6535, "step": 5551 }, { "epoch": 1.71, "learning_rate": 0.00014875132231543145, "loss": 3.5942, "step": 5552 }, { "epoch": 1.71, "learning_rate": 0.00014871569325619423, "loss": 3.5964, "step": 5553 }, { "epoch": 1.71, "learning_rate": 0.000148680064196957, "loss": 3.5881, "step": 5554 }, { "epoch": 1.71, "learning_rate": 0.00014864443513771977, "loss": 3.5855, "step": 5555 }, { "epoch": 1.71, "learning_rate": 0.00014860880607848252, "loss": 3.4316, "step": 5556 }, { "epoch": 1.72, "learning_rate": 0.0001485731770192453, "loss": 3.3362, "step": 5557 }, { "epoch": 1.72, "learning_rate": 0.00014853754796000806, "loss": 3.7759, "step": 5558 }, { "epoch": 1.72, "learning_rate": 0.00014850191890077084, "loss": 3.7938, "step": 5559 }, { "epoch": 1.72, "learning_rate": 0.00014846628984153362, "loss": 3.365, "step": 5560 }, { "epoch": 1.72, "learning_rate": 0.0001484306607822964, "loss": 3.2844, "step": 5561 }, { "epoch": 1.72, "learning_rate": 0.00014839503172305916, "loss": 3.186, "step": 5562 }, { "epoch": 1.72, "learning_rate": 0.00014835940266382192, "loss": 3.3805, "step": 5563 }, { "epoch": 1.72, "learning_rate": 0.0001483237736045847, "loss": 3.4512, "step": 5564 }, { "epoch": 1.72, "learning_rate": 0.00014828814454534748, "loss": 3.5641, "step": 5565 }, { "epoch": 1.72, "learning_rate": 0.00014825251548611026, "loss": 3.2269, "step": 5566 }, { "epoch": 1.72, "learning_rate": 0.00014821688642687302, "loss": 3.0515, "step": 5567 }, { "epoch": 1.72, "learning_rate": 0.00014818125736763577, "loss": 3.4683, "step": 5568 }, { "epoch": 1.72, "learning_rate": 0.00014814562830839855, "loss": 3.1459, "step": 5569 }, { "epoch": 1.72, "learning_rate": 0.00014810999924916134, "loss": 3.0918, "step": 5570 }, { "epoch": 1.72, "learning_rate": 0.0001480743701899241, "loss": 2.9646, "step": 5571 }, { "epoch": 1.72, "learning_rate": 0.00014803874113068687, "loss": 3.2952, "step": 5572 }, { "epoch": 1.72, "learning_rate": 0.00014800311207144965, "loss": 3.0044, "step": 5573 }, { "epoch": 1.72, "learning_rate": 0.0001479674830122124, "loss": 2.9747, "step": 5574 }, { "epoch": 1.72, "learning_rate": 0.0001479318539529752, "loss": 2.8545, "step": 5575 }, { "epoch": 1.72, "learning_rate": 0.00014789622489373795, "loss": 2.8722, "step": 5576 }, { "epoch": 1.72, "learning_rate": 0.00014786059583450073, "loss": 2.6834, "step": 5577 }, { "epoch": 1.72, "learning_rate": 0.0001478249667752635, "loss": 2.6967, "step": 5578 }, { "epoch": 1.72, "learning_rate": 0.00014778933771602626, "loss": 2.6918, "step": 5579 }, { "epoch": 1.72, "learning_rate": 0.00014775370865678902, "loss": 2.7595, "step": 5580 }, { "epoch": 1.72, "learning_rate": 0.0001477180795975518, "loss": 2.6591, "step": 5581 }, { "epoch": 1.72, "learning_rate": 0.00014768245053831458, "loss": 2.6631, "step": 5582 }, { "epoch": 1.72, "learning_rate": 0.00014764682147907736, "loss": 2.2898, "step": 5583 }, { "epoch": 1.72, "learning_rate": 0.00014761119241984012, "loss": 2.4094, "step": 5584 }, { "epoch": 1.72, "learning_rate": 0.0001475755633606029, "loss": 2.1231, "step": 5585 }, { "epoch": 1.72, "learning_rate": 0.00014753993430136566, "loss": 2.0845, "step": 5586 }, { "epoch": 1.72, "learning_rate": 0.00014750430524212844, "loss": 2.0964, "step": 5587 }, { "epoch": 1.72, "learning_rate": 0.0001474686761828912, "loss": 1.9888, "step": 5588 }, { "epoch": 1.72, "learning_rate": 0.00014743304712365397, "loss": 1.837, "step": 5589 }, { "epoch": 1.73, "learning_rate": 0.00014739741806441676, "loss": 1.8832, "step": 5590 }, { "epoch": 1.73, "learning_rate": 0.0001473617890051795, "loss": 4.2848, "step": 5591 }, { "epoch": 1.73, "learning_rate": 0.0001473261599459423, "loss": 4.1003, "step": 5592 }, { "epoch": 1.73, "learning_rate": 0.00014729053088670505, "loss": 4.1547, "step": 5593 }, { "epoch": 1.73, "learning_rate": 0.00014725490182746783, "loss": 4.0653, "step": 5594 }, { "epoch": 1.73, "learning_rate": 0.0001472192727682306, "loss": 3.7567, "step": 5595 }, { "epoch": 1.73, "learning_rate": 0.0001471836437089934, "loss": 3.6083, "step": 5596 }, { "epoch": 1.73, "learning_rate": 0.00014714801464975615, "loss": 3.6028, "step": 5597 }, { "epoch": 1.73, "learning_rate": 0.0001471123855905189, "loss": 3.5621, "step": 5598 }, { "epoch": 1.73, "learning_rate": 0.00014707675653128168, "loss": 3.6638, "step": 5599 }, { "epoch": 1.73, "learning_rate": 0.00014704112747204447, "loss": 3.676, "step": 5600 }, { "epoch": 1.73, "learning_rate": 0.00014700549841280722, "loss": 3.6808, "step": 5601 }, { "epoch": 1.73, "learning_rate": 0.00014696986935357, "loss": 3.6699, "step": 5602 }, { "epoch": 1.73, "learning_rate": 0.00014693424029433276, "loss": 3.3798, "step": 5603 }, { "epoch": 1.73, "learning_rate": 0.00014689861123509554, "loss": 3.6955, "step": 5604 }, { "epoch": 1.73, "learning_rate": 0.0001468629821758583, "loss": 3.6643, "step": 5605 }, { "epoch": 1.73, "learning_rate": 0.00014682735311662108, "loss": 3.4743, "step": 5606 }, { "epoch": 1.73, "learning_rate": 0.00014679172405738386, "loss": 3.5675, "step": 5607 }, { "epoch": 1.73, "learning_rate": 0.00014675609499814664, "loss": 3.3213, "step": 5608 }, { "epoch": 1.73, "learning_rate": 0.0001467204659389094, "loss": 3.2201, "step": 5609 }, { "epoch": 1.73, "learning_rate": 0.00014668483687967215, "loss": 3.5433, "step": 5610 }, { "epoch": 1.73, "learning_rate": 0.00014664920782043493, "loss": 3.3921, "step": 5611 }, { "epoch": 1.73, "learning_rate": 0.0001466135787611977, "loss": 3.3963, "step": 5612 }, { "epoch": 1.73, "learning_rate": 0.0001465779497019605, "loss": 3.4627, "step": 5613 }, { "epoch": 1.73, "learning_rate": 0.00014654232064272325, "loss": 3.2099, "step": 5614 }, { "epoch": 1.73, "learning_rate": 0.000146506691583486, "loss": 3.3638, "step": 5615 }, { "epoch": 1.73, "learning_rate": 0.0001464710625242488, "loss": 3.3504, "step": 5616 }, { "epoch": 1.73, "learning_rate": 0.00014643543346501157, "loss": 3.3775, "step": 5617 }, { "epoch": 1.73, "learning_rate": 0.00014639980440577432, "loss": 3.2816, "step": 5618 }, { "epoch": 1.73, "learning_rate": 0.0001463641753465371, "loss": 3.1509, "step": 5619 }, { "epoch": 1.73, "learning_rate": 0.0001463285462872999, "loss": 2.9796, "step": 5620 }, { "epoch": 1.73, "learning_rate": 0.00014629291722806264, "loss": 3.0476, "step": 5621 }, { "epoch": 1.74, "learning_rate": 0.0001462572881688254, "loss": 2.975, "step": 5622 }, { "epoch": 1.74, "learning_rate": 0.00014622165910958818, "loss": 2.9154, "step": 5623 }, { "epoch": 1.74, "learning_rate": 0.00014618603005035096, "loss": 2.8408, "step": 5624 }, { "epoch": 1.74, "learning_rate": 0.00014615040099111374, "loss": 2.8701, "step": 5625 }, { "epoch": 1.74, "learning_rate": 0.0001461147719318765, "loss": 2.8103, "step": 5626 }, { "epoch": 1.74, "learning_rate": 0.00014607914287263925, "loss": 2.784, "step": 5627 }, { "epoch": 1.74, "learning_rate": 0.00014604351381340203, "loss": 2.7294, "step": 5628 }, { "epoch": 1.74, "learning_rate": 0.00014600788475416482, "loss": 2.7379, "step": 5629 }, { "epoch": 1.74, "learning_rate": 0.0001459722556949276, "loss": 2.3428, "step": 5630 }, { "epoch": 1.74, "learning_rate": 0.00014593662663569035, "loss": 2.4326, "step": 5631 }, { "epoch": 1.74, "learning_rate": 0.00014590099757645313, "loss": 2.3228, "step": 5632 }, { "epoch": 1.74, "learning_rate": 0.0001458653685172159, "loss": 2.4725, "step": 5633 }, { "epoch": 1.74, "learning_rate": 0.00014582973945797867, "loss": 2.2962, "step": 5634 }, { "epoch": 1.74, "learning_rate": 0.00014579411039874143, "loss": 2.4484, "step": 5635 }, { "epoch": 1.74, "learning_rate": 0.0001457584813395042, "loss": 2.2544, "step": 5636 }, { "epoch": 1.74, "learning_rate": 0.000145722852280267, "loss": 2.0403, "step": 5637 }, { "epoch": 1.74, "learning_rate": 0.00014568722322102974, "loss": 1.9084, "step": 5638 }, { "epoch": 1.74, "learning_rate": 0.0001456515941617925, "loss": 1.7714, "step": 5639 }, { "epoch": 1.74, "learning_rate": 0.00014561596510255528, "loss": 2.093, "step": 5640 }, { "epoch": 1.74, "learning_rate": 0.00014558033604331806, "loss": 4.4372, "step": 5641 }, { "epoch": 1.74, "learning_rate": 0.00014554470698408084, "loss": 4.3955, "step": 5642 }, { "epoch": 1.74, "learning_rate": 0.00014550907792484363, "loss": 4.0727, "step": 5643 }, { "epoch": 1.74, "learning_rate": 0.00014547344886560638, "loss": 4.1392, "step": 5644 }, { "epoch": 1.74, "learning_rate": 0.00014543781980636914, "loss": 3.7589, "step": 5645 }, { "epoch": 1.74, "learning_rate": 0.00014540219074713192, "loss": 3.8234, "step": 5646 }, { "epoch": 1.74, "learning_rate": 0.0001453665616878947, "loss": 3.7734, "step": 5647 }, { "epoch": 1.74, "learning_rate": 0.00014533093262865745, "loss": 3.6684, "step": 5648 }, { "epoch": 1.74, "learning_rate": 0.00014529530356942024, "loss": 3.6131, "step": 5649 }, { "epoch": 1.74, "learning_rate": 0.000145259674510183, "loss": 3.5179, "step": 5650 }, { "epoch": 1.74, "learning_rate": 0.00014522404545094577, "loss": 3.6042, "step": 5651 }, { "epoch": 1.74, "learning_rate": 0.00014518841639170853, "loss": 3.6474, "step": 5652 }, { "epoch": 1.74, "learning_rate": 0.0001451527873324713, "loss": 3.4946, "step": 5653 }, { "epoch": 1.75, "learning_rate": 0.0001451171582732341, "loss": 3.2698, "step": 5654 }, { "epoch": 1.75, "learning_rate": 0.00014508152921399687, "loss": 3.43, "step": 5655 }, { "epoch": 1.75, "learning_rate": 0.00014504590015475963, "loss": 3.4751, "step": 5656 }, { "epoch": 1.75, "learning_rate": 0.00014501027109552238, "loss": 3.5235, "step": 5657 }, { "epoch": 1.75, "learning_rate": 0.00014497464203628516, "loss": 3.4055, "step": 5658 }, { "epoch": 1.75, "learning_rate": 0.00014493901297704795, "loss": 3.3193, "step": 5659 }, { "epoch": 1.75, "learning_rate": 0.00014490338391781073, "loss": 3.3369, "step": 5660 }, { "epoch": 1.75, "learning_rate": 0.00014486775485857348, "loss": 3.2913, "step": 5661 }, { "epoch": 1.75, "learning_rate": 0.00014483212579933624, "loss": 3.2146, "step": 5662 }, { "epoch": 1.75, "learning_rate": 0.00014479649674009902, "loss": 3.5114, "step": 5663 }, { "epoch": 1.75, "learning_rate": 0.0001447608676808618, "loss": 3.2997, "step": 5664 }, { "epoch": 1.75, "learning_rate": 0.00014472523862162456, "loss": 3.1016, "step": 5665 }, { "epoch": 1.75, "learning_rate": 0.00014468960956238734, "loss": 3.4805, "step": 5666 }, { "epoch": 1.75, "learning_rate": 0.00014465398050315012, "loss": 3.4458, "step": 5667 }, { "epoch": 1.75, "learning_rate": 0.00014461835144391287, "loss": 2.9711, "step": 5668 }, { "epoch": 1.75, "learning_rate": 0.00014458272238467563, "loss": 3.1976, "step": 5669 }, { "epoch": 1.75, "learning_rate": 0.0001445470933254384, "loss": 3.0139, "step": 5670 }, { "epoch": 1.75, "learning_rate": 0.0001445114642662012, "loss": 3.0189, "step": 5671 }, { "epoch": 1.75, "learning_rate": 0.00014447583520696398, "loss": 2.8537, "step": 5672 }, { "epoch": 1.75, "learning_rate": 0.00014444020614772673, "loss": 2.9053, "step": 5673 }, { "epoch": 1.75, "learning_rate": 0.00014440457708848948, "loss": 2.8189, "step": 5674 }, { "epoch": 1.75, "learning_rate": 0.00014436894802925227, "loss": 2.8961, "step": 5675 }, { "epoch": 1.75, "learning_rate": 0.00014433331897001505, "loss": 3.1378, "step": 5676 }, { "epoch": 1.75, "learning_rate": 0.00014429768991077783, "loss": 2.8621, "step": 5677 }, { "epoch": 1.75, "learning_rate": 0.00014426206085154059, "loss": 2.6243, "step": 5678 }, { "epoch": 1.75, "learning_rate": 0.00014422643179230337, "loss": 2.9517, "step": 5679 }, { "epoch": 1.75, "learning_rate": 0.00014419080273306612, "loss": 2.736, "step": 5680 }, { "epoch": 1.75, "learning_rate": 0.0001441551736738289, "loss": 2.4369, "step": 5681 }, { "epoch": 1.75, "learning_rate": 0.00014411954461459166, "loss": 2.3669, "step": 5682 }, { "epoch": 1.75, "learning_rate": 0.00014408391555535444, "loss": 2.4263, "step": 5683 }, { "epoch": 1.75, "learning_rate": 0.00014404828649611722, "loss": 2.6093, "step": 5684 }, { "epoch": 1.75, "learning_rate": 0.00014401265743687998, "loss": 2.145, "step": 5685 }, { "epoch": 1.75, "learning_rate": 0.00014397702837764276, "loss": 2.211, "step": 5686 }, { "epoch": 1.76, "learning_rate": 0.00014394139931840551, "loss": 2.1429, "step": 5687 }, { "epoch": 1.76, "learning_rate": 0.0001439057702591683, "loss": 1.8354, "step": 5688 }, { "epoch": 1.76, "learning_rate": 0.00014387014119993108, "loss": 1.7859, "step": 5689 }, { "epoch": 1.76, "learning_rate": 0.00014383451214069386, "loss": 1.9359, "step": 5690 }, { "epoch": 1.76, "learning_rate": 0.00014379888308145661, "loss": 4.3598, "step": 5691 }, { "epoch": 1.76, "learning_rate": 0.00014376325402221937, "loss": 4.1434, "step": 5692 }, { "epoch": 1.76, "learning_rate": 0.00014372762496298215, "loss": 4.1539, "step": 5693 }, { "epoch": 1.76, "learning_rate": 0.00014369199590374493, "loss": 4.1399, "step": 5694 }, { "epoch": 1.76, "learning_rate": 0.0001436563668445077, "loss": 3.6838, "step": 5695 }, { "epoch": 1.76, "learning_rate": 0.00014362073778527047, "loss": 3.9645, "step": 5696 }, { "epoch": 1.76, "learning_rate": 0.00014358510872603322, "loss": 3.7597, "step": 5697 }, { "epoch": 1.76, "learning_rate": 0.000143549479666796, "loss": 3.6812, "step": 5698 }, { "epoch": 1.76, "learning_rate": 0.00014351385060755876, "loss": 3.8042, "step": 5699 }, { "epoch": 1.76, "learning_rate": 0.00014347822154832154, "loss": 3.4445, "step": 5700 }, { "epoch": 1.76, "learning_rate": 0.00014344259248908432, "loss": 3.6026, "step": 5701 }, { "epoch": 1.76, "learning_rate": 0.0001434069634298471, "loss": 3.5622, "step": 5702 }, { "epoch": 1.76, "learning_rate": 0.00014337133437060986, "loss": 3.6085, "step": 5703 }, { "epoch": 1.76, "learning_rate": 0.00014333570531137262, "loss": 3.2756, "step": 5704 }, { "epoch": 1.76, "learning_rate": 0.0001433000762521354, "loss": 3.4594, "step": 5705 }, { "epoch": 1.76, "learning_rate": 0.00014326444719289818, "loss": 3.6386, "step": 5706 }, { "epoch": 1.76, "learning_rate": 0.00014322881813366096, "loss": 3.5918, "step": 5707 }, { "epoch": 1.76, "learning_rate": 0.00014319318907442372, "loss": 3.3417, "step": 5708 }, { "epoch": 1.76, "learning_rate": 0.00014315756001518647, "loss": 3.6192, "step": 5709 }, { "epoch": 1.76, "learning_rate": 0.00014312193095594925, "loss": 3.4743, "step": 5710 }, { "epoch": 1.76, "learning_rate": 0.00014308630189671203, "loss": 3.3527, "step": 5711 }, { "epoch": 1.76, "learning_rate": 0.0001430506728374748, "loss": 3.3838, "step": 5712 }, { "epoch": 1.76, "learning_rate": 0.00014301504377823757, "loss": 3.3154, "step": 5713 }, { "epoch": 1.76, "learning_rate": 0.00014297941471900035, "loss": 3.3733, "step": 5714 }, { "epoch": 1.76, "learning_rate": 0.0001429437856597631, "loss": 3.4356, "step": 5715 }, { "epoch": 1.76, "learning_rate": 0.00014290815660052586, "loss": 3.2536, "step": 5716 }, { "epoch": 1.76, "learning_rate": 0.00014287252754128864, "loss": 3.2536, "step": 5717 }, { "epoch": 1.76, "learning_rate": 0.00014283689848205143, "loss": 3.2298, "step": 5718 }, { "epoch": 1.77, "learning_rate": 0.0001428012694228142, "loss": 3.0417, "step": 5719 }, { "epoch": 1.77, "learning_rate": 0.00014276564036357696, "loss": 3.17, "step": 5720 }, { "epoch": 1.77, "learning_rate": 0.00014273001130433972, "loss": 3.173, "step": 5721 }, { "epoch": 1.77, "learning_rate": 0.0001426943822451025, "loss": 3.0711, "step": 5722 }, { "epoch": 1.77, "learning_rate": 0.00014265875318586528, "loss": 3.0671, "step": 5723 }, { "epoch": 1.77, "learning_rate": 0.00014262312412662806, "loss": 2.843, "step": 5724 }, { "epoch": 1.77, "learning_rate": 0.00014258749506739082, "loss": 3.0564, "step": 5725 }, { "epoch": 1.77, "learning_rate": 0.0001425518660081536, "loss": 2.8392, "step": 5726 }, { "epoch": 1.77, "learning_rate": 0.00014251623694891635, "loss": 2.6445, "step": 5727 }, { "epoch": 1.77, "learning_rate": 0.00014248060788967914, "loss": 2.5914, "step": 5728 }, { "epoch": 1.77, "learning_rate": 0.0001424449788304419, "loss": 2.5418, "step": 5729 }, { "epoch": 1.77, "learning_rate": 0.00014240934977120467, "loss": 2.4745, "step": 5730 }, { "epoch": 1.77, "learning_rate": 0.00014237372071196746, "loss": 2.6439, "step": 5731 }, { "epoch": 1.77, "learning_rate": 0.0001423380916527302, "loss": 2.5404, "step": 5732 }, { "epoch": 1.77, "learning_rate": 0.000142302462593493, "loss": 2.3958, "step": 5733 }, { "epoch": 1.77, "learning_rate": 0.00014226683353425575, "loss": 2.4156, "step": 5734 }, { "epoch": 1.77, "learning_rate": 0.00014223120447501853, "loss": 2.3506, "step": 5735 }, { "epoch": 1.77, "learning_rate": 0.0001421955754157813, "loss": 2.2906, "step": 5736 }, { "epoch": 1.77, "learning_rate": 0.00014215994635654407, "loss": 2.1339, "step": 5737 }, { "epoch": 1.77, "learning_rate": 0.00014212431729730685, "loss": 2.0886, "step": 5738 }, { "epoch": 1.77, "learning_rate": 0.0001420886882380696, "loss": 1.9057, "step": 5739 }, { "epoch": 1.77, "learning_rate": 0.00014205305917883238, "loss": 1.8557, "step": 5740 }, { "epoch": 1.77, "learning_rate": 0.00014201743011959517, "loss": 4.5447, "step": 5741 }, { "epoch": 1.77, "learning_rate": 0.00014198180106035792, "loss": 4.255, "step": 5742 }, { "epoch": 1.77, "learning_rate": 0.0001419461720011207, "loss": 4.0219, "step": 5743 }, { "epoch": 1.77, "learning_rate": 0.00014191054294188346, "loss": 3.7892, "step": 5744 }, { "epoch": 1.77, "learning_rate": 0.00014187491388264624, "loss": 3.7616, "step": 5745 }, { "epoch": 1.77, "learning_rate": 0.000141839284823409, "loss": 3.8975, "step": 5746 }, { "epoch": 1.77, "learning_rate": 0.00014180365576417178, "loss": 3.7408, "step": 5747 }, { "epoch": 1.77, "learning_rate": 0.00014176802670493456, "loss": 3.6341, "step": 5748 }, { "epoch": 1.77, "learning_rate": 0.00014173239764569734, "loss": 3.8169, "step": 5749 }, { "epoch": 1.77, "learning_rate": 0.0001416967685864601, "loss": 3.3752, "step": 5750 }, { "epoch": 1.77, "learning_rate": 0.00014166113952722285, "loss": 3.4488, "step": 5751 }, { "epoch": 1.78, "learning_rate": 0.00014162551046798563, "loss": 3.632, "step": 5752 }, { "epoch": 1.78, "learning_rate": 0.0001415898814087484, "loss": 3.3617, "step": 5753 }, { "epoch": 1.78, "learning_rate": 0.00014155425234951117, "loss": 3.1134, "step": 5754 }, { "epoch": 1.78, "learning_rate": 0.00014151862329027395, "loss": 3.6366, "step": 5755 }, { "epoch": 1.78, "learning_rate": 0.0001414829942310367, "loss": 3.3295, "step": 5756 }, { "epoch": 1.78, "learning_rate": 0.00014144736517179949, "loss": 3.6436, "step": 5757 }, { "epoch": 1.78, "learning_rate": 0.00014141173611256227, "loss": 3.4852, "step": 5758 }, { "epoch": 1.78, "learning_rate": 0.00014137610705332502, "loss": 3.1439, "step": 5759 }, { "epoch": 1.78, "learning_rate": 0.0001413404779940878, "loss": 3.3862, "step": 5760 }, { "epoch": 1.78, "learning_rate": 0.00014130484893485059, "loss": 3.5979, "step": 5761 }, { "epoch": 1.78, "learning_rate": 0.00014126921987561334, "loss": 3.4132, "step": 5762 }, { "epoch": 1.78, "learning_rate": 0.0001412335908163761, "loss": 3.3857, "step": 5763 }, { "epoch": 1.78, "learning_rate": 0.00014119796175713888, "loss": 3.3123, "step": 5764 }, { "epoch": 1.78, "learning_rate": 0.00014116233269790166, "loss": 2.8581, "step": 5765 }, { "epoch": 1.78, "learning_rate": 0.00014112670363866444, "loss": 3.0842, "step": 5766 }, { "epoch": 1.78, "learning_rate": 0.0001410910745794272, "loss": 3.4408, "step": 5767 }, { "epoch": 1.78, "learning_rate": 0.00014105544552018995, "loss": 3.0885, "step": 5768 }, { "epoch": 1.78, "learning_rate": 0.00014101981646095273, "loss": 3.0074, "step": 5769 }, { "epoch": 1.78, "learning_rate": 0.00014098418740171551, "loss": 2.9398, "step": 5770 }, { "epoch": 1.78, "learning_rate": 0.0001409485583424783, "loss": 3.0733, "step": 5771 }, { "epoch": 1.78, "learning_rate": 0.00014091292928324105, "loss": 3.3067, "step": 5772 }, { "epoch": 1.78, "learning_rate": 0.00014087730022400383, "loss": 2.7651, "step": 5773 }, { "epoch": 1.78, "learning_rate": 0.0001408416711647666, "loss": 2.83, "step": 5774 }, { "epoch": 1.78, "learning_rate": 0.00014080604210552937, "loss": 2.8875, "step": 5775 }, { "epoch": 1.78, "learning_rate": 0.00014077041304629212, "loss": 2.6647, "step": 5776 }, { "epoch": 1.78, "learning_rate": 0.0001407347839870549, "loss": 2.8033, "step": 5777 }, { "epoch": 1.78, "learning_rate": 0.0001406991549278177, "loss": 2.7867, "step": 5778 }, { "epoch": 1.78, "learning_rate": 0.00014066352586858044, "loss": 2.5961, "step": 5779 }, { "epoch": 1.78, "learning_rate": 0.00014062789680934323, "loss": 2.6612, "step": 5780 }, { "epoch": 1.78, "learning_rate": 0.00014059226775010598, "loss": 2.438, "step": 5781 }, { "epoch": 1.78, "learning_rate": 0.00014055663869086876, "loss": 2.3715, "step": 5782 }, { "epoch": 1.78, "learning_rate": 0.00014052100963163154, "loss": 2.3758, "step": 5783 }, { "epoch": 1.79, "learning_rate": 0.0001404853805723943, "loss": 2.3585, "step": 5784 }, { "epoch": 1.79, "learning_rate": 0.00014044975151315708, "loss": 2.2134, "step": 5785 }, { "epoch": 1.79, "learning_rate": 0.00014041412245391984, "loss": 1.9247, "step": 5786 }, { "epoch": 1.79, "learning_rate": 0.00014037849339468262, "loss": 2.0851, "step": 5787 }, { "epoch": 1.79, "learning_rate": 0.0001403428643354454, "loss": 2.0736, "step": 5788 }, { "epoch": 1.79, "learning_rate": 0.00014030723527620815, "loss": 1.7418, "step": 5789 }, { "epoch": 1.79, "learning_rate": 0.00014027160621697094, "loss": 1.7725, "step": 5790 }, { "epoch": 1.79, "learning_rate": 0.0001402359771577337, "loss": 4.4998, "step": 5791 }, { "epoch": 1.79, "learning_rate": 0.00014020034809849647, "loss": 4.1821, "step": 5792 }, { "epoch": 1.79, "learning_rate": 0.00014016471903925923, "loss": 4.3068, "step": 5793 }, { "epoch": 1.79, "learning_rate": 0.000140129089980022, "loss": 3.7252, "step": 5794 }, { "epoch": 1.79, "learning_rate": 0.0001400934609207848, "loss": 4.0177, "step": 5795 }, { "epoch": 1.79, "learning_rate": 0.00014005783186154757, "loss": 3.7797, "step": 5796 }, { "epoch": 1.79, "learning_rate": 0.00014002220280231033, "loss": 3.7268, "step": 5797 }, { "epoch": 1.79, "learning_rate": 0.00013998657374307308, "loss": 3.7286, "step": 5798 }, { "epoch": 1.79, "learning_rate": 0.00013995094468383586, "loss": 3.5305, "step": 5799 }, { "epoch": 1.79, "learning_rate": 0.00013991531562459865, "loss": 3.6905, "step": 5800 }, { "epoch": 1.79, "learning_rate": 0.0001398796865653614, "loss": 3.4284, "step": 5801 }, { "epoch": 1.79, "learning_rate": 0.00013984405750612418, "loss": 3.9358, "step": 5802 }, { "epoch": 1.79, "learning_rate": 0.00013980842844688694, "loss": 3.5555, "step": 5803 }, { "epoch": 1.79, "learning_rate": 0.00013977279938764972, "loss": 3.5612, "step": 5804 }, { "epoch": 1.79, "learning_rate": 0.0001397371703284125, "loss": 3.3969, "step": 5805 }, { "epoch": 1.79, "learning_rate": 0.00013970154126917526, "loss": 3.5514, "step": 5806 }, { "epoch": 1.79, "learning_rate": 0.00013966591220993804, "loss": 3.5584, "step": 5807 }, { "epoch": 1.79, "learning_rate": 0.00013963028315070082, "loss": 3.2074, "step": 5808 }, { "epoch": 1.79, "learning_rate": 0.00013959465409146357, "loss": 3.3537, "step": 5809 }, { "epoch": 1.79, "learning_rate": 0.00013955902503222633, "loss": 3.4354, "step": 5810 }, { "epoch": 1.79, "learning_rate": 0.0001395233959729891, "loss": 3.2865, "step": 5811 }, { "epoch": 1.79, "learning_rate": 0.0001394877669137519, "loss": 3.3629, "step": 5812 }, { "epoch": 1.79, "learning_rate": 0.00013945213785451467, "loss": 3.3553, "step": 5813 }, { "epoch": 1.79, "learning_rate": 0.00013941650879527743, "loss": 3.5052, "step": 5814 }, { "epoch": 1.79, "learning_rate": 0.0001393808797360402, "loss": 3.3821, "step": 5815 }, { "epoch": 1.8, "learning_rate": 0.00013934525067680297, "loss": 3.2393, "step": 5816 }, { "epoch": 1.8, "learning_rate": 0.00013930962161756575, "loss": 3.5431, "step": 5817 }, { "epoch": 1.8, "learning_rate": 0.0001392739925583285, "loss": 2.9671, "step": 5818 }, { "epoch": 1.8, "learning_rate": 0.00013923836349909128, "loss": 2.9769, "step": 5819 }, { "epoch": 1.8, "learning_rate": 0.00013920273443985407, "loss": 2.9013, "step": 5820 }, { "epoch": 1.8, "learning_rate": 0.00013916710538061682, "loss": 3.0875, "step": 5821 }, { "epoch": 1.8, "learning_rate": 0.0001391314763213796, "loss": 3.0266, "step": 5822 }, { "epoch": 1.8, "learning_rate": 0.00013909584726214236, "loss": 2.9963, "step": 5823 }, { "epoch": 1.8, "learning_rate": 0.00013906021820290514, "loss": 2.8772, "step": 5824 }, { "epoch": 1.8, "learning_rate": 0.00013902458914366792, "loss": 2.8313, "step": 5825 }, { "epoch": 1.8, "learning_rate": 0.00013898896008443068, "loss": 2.8887, "step": 5826 }, { "epoch": 1.8, "learning_rate": 0.00013895333102519346, "loss": 2.6444, "step": 5827 }, { "epoch": 1.8, "learning_rate": 0.0001389177019659562, "loss": 2.7736, "step": 5828 }, { "epoch": 1.8, "learning_rate": 0.000138882072906719, "loss": 2.5794, "step": 5829 }, { "epoch": 1.8, "learning_rate": 0.00013884644384748178, "loss": 2.7664, "step": 5830 }, { "epoch": 1.8, "learning_rate": 0.00013881081478824453, "loss": 2.5491, "step": 5831 }, { "epoch": 1.8, "learning_rate": 0.0001387751857290073, "loss": 2.4192, "step": 5832 }, { "epoch": 1.8, "learning_rate": 0.00013873955666977007, "loss": 2.3881, "step": 5833 }, { "epoch": 1.8, "learning_rate": 0.00013870392761053285, "loss": 2.4427, "step": 5834 }, { "epoch": 1.8, "learning_rate": 0.0001386682985512956, "loss": 2.1865, "step": 5835 }, { "epoch": 1.8, "learning_rate": 0.0001386326694920584, "loss": 2.1334, "step": 5836 }, { "epoch": 1.8, "learning_rate": 0.00013859704043282117, "loss": 1.8397, "step": 5837 }, { "epoch": 1.8, "learning_rate": 0.00013856141137358392, "loss": 2.1014, "step": 5838 }, { "epoch": 1.8, "learning_rate": 0.0001385257823143467, "loss": 1.7367, "step": 5839 }, { "epoch": 1.8, "learning_rate": 0.00013849015325510946, "loss": 1.8255, "step": 5840 }, { "epoch": 1.8, "learning_rate": 0.00013845452419587224, "loss": 4.5626, "step": 5841 }, { "epoch": 1.8, "learning_rate": 0.00013841889513663502, "loss": 4.2866, "step": 5842 }, { "epoch": 1.8, "learning_rate": 0.0001383832660773978, "loss": 3.9008, "step": 5843 }, { "epoch": 1.8, "learning_rate": 0.00013834763701816056, "loss": 3.9697, "step": 5844 }, { "epoch": 1.8, "learning_rate": 0.00013831200795892332, "loss": 3.9319, "step": 5845 }, { "epoch": 1.8, "learning_rate": 0.0001382763788996861, "loss": 3.8111, "step": 5846 }, { "epoch": 1.8, "learning_rate": 0.00013824074984044888, "loss": 3.8537, "step": 5847 }, { "epoch": 1.8, "learning_rate": 0.00013820512078121163, "loss": 3.9585, "step": 5848 }, { "epoch": 1.81, "learning_rate": 0.00013816949172197442, "loss": 3.5808, "step": 5849 }, { "epoch": 1.81, "learning_rate": 0.00013813386266273717, "loss": 3.7264, "step": 5850 }, { "epoch": 1.81, "learning_rate": 0.00013809823360349995, "loss": 3.8275, "step": 5851 }, { "epoch": 1.81, "learning_rate": 0.0001380626045442627, "loss": 3.7473, "step": 5852 }, { "epoch": 1.81, "learning_rate": 0.0001380269754850255, "loss": 3.5695, "step": 5853 }, { "epoch": 1.81, "learning_rate": 0.00013799134642578827, "loss": 3.7141, "step": 5854 }, { "epoch": 1.81, "learning_rate": 0.00013795571736655105, "loss": 3.2946, "step": 5855 }, { "epoch": 1.81, "learning_rate": 0.0001379200883073138, "loss": 3.3726, "step": 5856 }, { "epoch": 1.81, "learning_rate": 0.00013788445924807656, "loss": 3.4202, "step": 5857 }, { "epoch": 1.81, "learning_rate": 0.00013784883018883934, "loss": 3.4304, "step": 5858 }, { "epoch": 1.81, "learning_rate": 0.00013781320112960213, "loss": 3.3203, "step": 5859 }, { "epoch": 1.81, "learning_rate": 0.0001377775720703649, "loss": 3.3563, "step": 5860 }, { "epoch": 1.81, "learning_rate": 0.00013774194301112766, "loss": 3.5916, "step": 5861 }, { "epoch": 1.81, "learning_rate": 0.00013770631395189044, "loss": 3.4161, "step": 5862 }, { "epoch": 1.81, "learning_rate": 0.0001376706848926532, "loss": 3.5122, "step": 5863 }, { "epoch": 1.81, "learning_rate": 0.00013763505583341598, "loss": 3.5159, "step": 5864 }, { "epoch": 1.81, "learning_rate": 0.00013759942677417874, "loss": 3.2272, "step": 5865 }, { "epoch": 1.81, "learning_rate": 0.00013756379771494152, "loss": 3.1416, "step": 5866 }, { "epoch": 1.81, "learning_rate": 0.0001375281686557043, "loss": 3.3573, "step": 5867 }, { "epoch": 1.81, "learning_rate": 0.00013749253959646705, "loss": 3.0445, "step": 5868 }, { "epoch": 1.81, "learning_rate": 0.00013745691053722984, "loss": 3.1516, "step": 5869 }, { "epoch": 1.81, "learning_rate": 0.0001374212814779926, "loss": 3.2233, "step": 5870 }, { "epoch": 1.81, "learning_rate": 0.00013738565241875537, "loss": 2.9618, "step": 5871 }, { "epoch": 1.81, "learning_rate": 0.00013735002335951815, "loss": 3.2353, "step": 5872 }, { "epoch": 1.81, "learning_rate": 0.0001373143943002809, "loss": 2.9632, "step": 5873 }, { "epoch": 1.81, "learning_rate": 0.0001372787652410437, "loss": 2.7811, "step": 5874 }, { "epoch": 1.81, "learning_rate": 0.00013724313618180645, "loss": 2.8286, "step": 5875 }, { "epoch": 1.81, "learning_rate": 0.00013720750712256923, "loss": 2.6761, "step": 5876 }, { "epoch": 1.81, "learning_rate": 0.000137171878063332, "loss": 2.7204, "step": 5877 }, { "epoch": 1.81, "learning_rate": 0.00013713624900409476, "loss": 2.4742, "step": 5878 }, { "epoch": 1.81, "learning_rate": 0.00013710061994485755, "loss": 2.4664, "step": 5879 }, { "epoch": 1.81, "learning_rate": 0.0001370649908856203, "loss": 2.6297, "step": 5880 }, { "epoch": 1.82, "learning_rate": 0.00013702936182638308, "loss": 2.5812, "step": 5881 }, { "epoch": 1.82, "learning_rate": 0.00013699373276714584, "loss": 2.5629, "step": 5882 }, { "epoch": 1.82, "learning_rate": 0.00013695810370790862, "loss": 2.3846, "step": 5883 }, { "epoch": 1.82, "learning_rate": 0.0001369224746486714, "loss": 2.4844, "step": 5884 }, { "epoch": 1.82, "learning_rate": 0.00013688684558943416, "loss": 2.202, "step": 5885 }, { "epoch": 1.82, "learning_rate": 0.00013685121653019694, "loss": 2.0192, "step": 5886 }, { "epoch": 1.82, "learning_rate": 0.0001368155874709597, "loss": 1.7672, "step": 5887 }, { "epoch": 1.82, "learning_rate": 0.00013677995841172248, "loss": 1.9306, "step": 5888 }, { "epoch": 1.82, "learning_rate": 0.00013674432935248526, "loss": 1.875, "step": 5889 }, { "epoch": 1.82, "learning_rate": 0.00013670870029324804, "loss": 1.868, "step": 5890 }, { "epoch": 1.82, "learning_rate": 0.0001366730712340108, "loss": 4.6997, "step": 5891 }, { "epoch": 1.82, "learning_rate": 0.00013663744217477355, "loss": 4.2344, "step": 5892 }, { "epoch": 1.82, "learning_rate": 0.00013660181311553633, "loss": 3.8964, "step": 5893 }, { "epoch": 1.82, "learning_rate": 0.0001365661840562991, "loss": 4.0173, "step": 5894 }, { "epoch": 1.82, "learning_rate": 0.00013653055499706187, "loss": 3.9137, "step": 5895 }, { "epoch": 1.82, "learning_rate": 0.00013649492593782465, "loss": 3.8561, "step": 5896 }, { "epoch": 1.82, "learning_rate": 0.0001364592968785874, "loss": 3.418, "step": 5897 }, { "epoch": 1.82, "learning_rate": 0.00013642366781935019, "loss": 3.4838, "step": 5898 }, { "epoch": 1.82, "learning_rate": 0.00013638803876011294, "loss": 3.5848, "step": 5899 }, { "epoch": 1.82, "learning_rate": 0.00013635240970087572, "loss": 3.6944, "step": 5900 }, { "epoch": 1.82, "learning_rate": 0.0001363167806416385, "loss": 3.3327, "step": 5901 }, { "epoch": 1.82, "learning_rate": 0.00013628115158240129, "loss": 3.6176, "step": 5902 }, { "epoch": 1.82, "learning_rate": 0.00013624552252316404, "loss": 3.8383, "step": 5903 }, { "epoch": 1.82, "learning_rate": 0.0001362098934639268, "loss": 3.5444, "step": 5904 }, { "epoch": 1.82, "learning_rate": 0.00013617426440468958, "loss": 3.5803, "step": 5905 }, { "epoch": 1.82, "learning_rate": 0.00013613863534545236, "loss": 3.4823, "step": 5906 }, { "epoch": 1.82, "learning_rate": 0.00013610300628621514, "loss": 3.5749, "step": 5907 }, { "epoch": 1.82, "learning_rate": 0.0001360673772269779, "loss": 3.0849, "step": 5908 }, { "epoch": 1.82, "learning_rate": 0.00013603174816774068, "loss": 3.3145, "step": 5909 }, { "epoch": 1.82, "learning_rate": 0.00013599611910850343, "loss": 3.2813, "step": 5910 }, { "epoch": 1.82, "learning_rate": 0.00013596049004926621, "loss": 3.387, "step": 5911 }, { "epoch": 1.82, "learning_rate": 0.00013592486099002897, "loss": 3.2134, "step": 5912 }, { "epoch": 1.82, "learning_rate": 0.00013588923193079175, "loss": 3.3901, "step": 5913 }, { "epoch": 1.83, "learning_rate": 0.00013585360287155453, "loss": 3.2539, "step": 5914 }, { "epoch": 1.83, "learning_rate": 0.0001358179738123173, "loss": 3.1583, "step": 5915 }, { "epoch": 1.83, "learning_rate": 0.00013578234475308004, "loss": 3.2365, "step": 5916 }, { "epoch": 1.83, "learning_rate": 0.00013574671569384282, "loss": 3.3301, "step": 5917 }, { "epoch": 1.83, "learning_rate": 0.0001357110866346056, "loss": 2.9674, "step": 5918 }, { "epoch": 1.83, "learning_rate": 0.0001356754575753684, "loss": 3.1886, "step": 5919 }, { "epoch": 1.83, "learning_rate": 0.00013563982851613114, "loss": 3.0777, "step": 5920 }, { "epoch": 1.83, "learning_rate": 0.00013560419945689392, "loss": 2.919, "step": 5921 }, { "epoch": 1.83, "learning_rate": 0.00013556857039765668, "loss": 2.9699, "step": 5922 }, { "epoch": 1.83, "learning_rate": 0.00013553294133841946, "loss": 3.2553, "step": 5923 }, { "epoch": 1.83, "learning_rate": 0.00013549731227918224, "loss": 3.0933, "step": 5924 }, { "epoch": 1.83, "learning_rate": 0.000135461683219945, "loss": 3.114, "step": 5925 }, { "epoch": 1.83, "learning_rate": 0.00013542605416070778, "loss": 2.7203, "step": 5926 }, { "epoch": 1.83, "learning_rate": 0.00013539042510147053, "loss": 2.6611, "step": 5927 }, { "epoch": 1.83, "learning_rate": 0.00013535479604223332, "loss": 2.577, "step": 5928 }, { "epoch": 1.83, "learning_rate": 0.00013531916698299607, "loss": 2.5996, "step": 5929 }, { "epoch": 1.83, "learning_rate": 0.00013528353792375885, "loss": 2.3903, "step": 5930 }, { "epoch": 1.83, "learning_rate": 0.00013524790886452163, "loss": 2.4562, "step": 5931 }, { "epoch": 1.83, "learning_rate": 0.0001352122798052844, "loss": 2.5713, "step": 5932 }, { "epoch": 1.83, "learning_rate": 0.00013517665074604717, "loss": 2.1677, "step": 5933 }, { "epoch": 1.83, "learning_rate": 0.00013514102168680993, "loss": 2.0905, "step": 5934 }, { "epoch": 1.83, "learning_rate": 0.0001351053926275727, "loss": 2.3372, "step": 5935 }, { "epoch": 1.83, "learning_rate": 0.0001350697635683355, "loss": 1.7569, "step": 5936 }, { "epoch": 1.83, "learning_rate": 0.00013503413450909827, "loss": 1.9582, "step": 5937 }, { "epoch": 1.83, "learning_rate": 0.00013499850544986103, "loss": 1.8115, "step": 5938 }, { "epoch": 1.83, "learning_rate": 0.00013496287639062378, "loss": 1.7901, "step": 5939 }, { "epoch": 1.83, "learning_rate": 0.00013492724733138656, "loss": 1.8771, "step": 5940 }, { "epoch": 1.83, "learning_rate": 0.00013489161827214935, "loss": 4.5518, "step": 5941 }, { "epoch": 1.83, "learning_rate": 0.0001348559892129121, "loss": 3.931, "step": 5942 }, { "epoch": 1.83, "learning_rate": 0.00013482036015367488, "loss": 4.075, "step": 5943 }, { "epoch": 1.83, "learning_rate": 0.00013478473109443766, "loss": 3.7665, "step": 5944 }, { "epoch": 1.83, "learning_rate": 0.00013474910203520042, "loss": 3.7563, "step": 5945 }, { "epoch": 1.84, "learning_rate": 0.00013471347297596317, "loss": 3.944, "step": 5946 }, { "epoch": 1.84, "learning_rate": 0.00013467784391672596, "loss": 3.8102, "step": 5947 }, { "epoch": 1.84, "learning_rate": 0.00013464221485748874, "loss": 3.936, "step": 5948 }, { "epoch": 1.84, "learning_rate": 0.00013460658579825152, "loss": 3.6858, "step": 5949 }, { "epoch": 1.84, "learning_rate": 0.00013457095673901427, "loss": 3.4338, "step": 5950 }, { "epoch": 1.84, "learning_rate": 0.00013453532767977703, "loss": 3.6434, "step": 5951 }, { "epoch": 1.84, "learning_rate": 0.0001344996986205398, "loss": 3.2834, "step": 5952 }, { "epoch": 1.84, "learning_rate": 0.0001344640695613026, "loss": 3.5688, "step": 5953 }, { "epoch": 1.84, "learning_rate": 0.00013442844050206537, "loss": 3.808, "step": 5954 }, { "epoch": 1.84, "learning_rate": 0.00013439281144282813, "loss": 3.3996, "step": 5955 }, { "epoch": 1.84, "learning_rate": 0.0001343571823835909, "loss": 3.5764, "step": 5956 }, { "epoch": 1.84, "learning_rate": 0.00013432155332435367, "loss": 3.5001, "step": 5957 }, { "epoch": 1.84, "learning_rate": 0.00013428592426511645, "loss": 3.4617, "step": 5958 }, { "epoch": 1.84, "learning_rate": 0.0001342502952058792, "loss": 3.2654, "step": 5959 }, { "epoch": 1.84, "learning_rate": 0.00013421466614664198, "loss": 3.2503, "step": 5960 }, { "epoch": 1.84, "learning_rate": 0.00013417903708740477, "loss": 3.3622, "step": 5961 }, { "epoch": 1.84, "learning_rate": 0.00013414340802816752, "loss": 3.295, "step": 5962 }, { "epoch": 1.84, "learning_rate": 0.00013410777896893028, "loss": 3.6142, "step": 5963 }, { "epoch": 1.84, "learning_rate": 0.00013407214990969306, "loss": 3.0737, "step": 5964 }, { "epoch": 1.84, "learning_rate": 0.00013403652085045584, "loss": 3.3459, "step": 5965 }, { "epoch": 1.84, "learning_rate": 0.00013400089179121862, "loss": 3.19, "step": 5966 }, { "epoch": 1.84, "learning_rate": 0.00013396526273198138, "loss": 3.0298, "step": 5967 }, { "epoch": 1.84, "learning_rate": 0.00013392963367274416, "loss": 3.1364, "step": 5968 }, { "epoch": 1.84, "learning_rate": 0.0001338940046135069, "loss": 3.1435, "step": 5969 }, { "epoch": 1.84, "learning_rate": 0.0001338583755542697, "loss": 3.1396, "step": 5970 }, { "epoch": 1.84, "learning_rate": 0.00013382274649503248, "loss": 3.03, "step": 5971 }, { "epoch": 1.84, "learning_rate": 0.00013378711743579523, "loss": 2.9322, "step": 5972 }, { "epoch": 1.84, "learning_rate": 0.000133751488376558, "loss": 2.5041, "step": 5973 }, { "epoch": 1.84, "learning_rate": 0.00013371585931732077, "loss": 2.8171, "step": 5974 }, { "epoch": 1.84, "learning_rate": 0.00013368023025808355, "loss": 3.1091, "step": 5975 }, { "epoch": 1.84, "learning_rate": 0.0001336446011988463, "loss": 2.5389, "step": 5976 }, { "epoch": 1.84, "learning_rate": 0.00013360897213960909, "loss": 2.9834, "step": 5977 }, { "epoch": 1.84, "learning_rate": 0.00013357334308037187, "loss": 2.4387, "step": 5978 }, { "epoch": 1.85, "learning_rate": 0.00013353771402113462, "loss": 2.5696, "step": 5979 }, { "epoch": 1.85, "learning_rate": 0.0001335020849618974, "loss": 2.6627, "step": 5980 }, { "epoch": 1.85, "learning_rate": 0.00013346645590266016, "loss": 2.67, "step": 5981 }, { "epoch": 1.85, "learning_rate": 0.00013343082684342294, "loss": 2.3773, "step": 5982 }, { "epoch": 1.85, "learning_rate": 0.00013339519778418572, "loss": 2.481, "step": 5983 }, { "epoch": 1.85, "learning_rate": 0.0001333595687249485, "loss": 2.1774, "step": 5984 }, { "epoch": 1.85, "learning_rate": 0.00013332393966571126, "loss": 2.2818, "step": 5985 }, { "epoch": 1.85, "learning_rate": 0.00013328831060647401, "loss": 1.9512, "step": 5986 }, { "epoch": 1.85, "learning_rate": 0.0001332526815472368, "loss": 2.0007, "step": 5987 }, { "epoch": 1.85, "learning_rate": 0.00013321705248799958, "loss": 1.9723, "step": 5988 }, { "epoch": 1.85, "learning_rate": 0.00013318142342876233, "loss": 1.7827, "step": 5989 }, { "epoch": 1.85, "learning_rate": 0.00013314579436952511, "loss": 1.7195, "step": 5990 }, { "epoch": 1.85, "learning_rate": 0.0001331101653102879, "loss": 4.4681, "step": 5991 }, { "epoch": 1.85, "learning_rate": 0.00013307453625105065, "loss": 3.9231, "step": 5992 }, { "epoch": 1.85, "learning_rate": 0.0001330389071918134, "loss": 3.9347, "step": 5993 }, { "epoch": 1.85, "learning_rate": 0.0001330032781325762, "loss": 3.8091, "step": 5994 }, { "epoch": 1.85, "learning_rate": 0.00013296764907333897, "loss": 3.7506, "step": 5995 }, { "epoch": 1.85, "learning_rate": 0.00013293202001410175, "loss": 3.6377, "step": 5996 }, { "epoch": 1.85, "learning_rate": 0.0001328963909548645, "loss": 3.6111, "step": 5997 }, { "epoch": 1.85, "learning_rate": 0.00013286076189562726, "loss": 3.7294, "step": 5998 }, { "epoch": 1.85, "learning_rate": 0.00013282513283639004, "loss": 3.6815, "step": 5999 }, { "epoch": 1.85, "learning_rate": 0.00013278950377715283, "loss": 3.4118, "step": 6000 }, { "epoch": 1.85, "eval_bleu": 2.4864461373906615e-13, "eval_loss": 4.131202220916748, "eval_runtime": 2568.1952, "eval_samples_per_second": 5.747, "eval_steps_per_second": 0.718, "step": 6000 }, { "epoch": 1.85, "learning_rate": 0.0001327538747179156, "loss": 3.752, "step": 6001 }, { "epoch": 1.85, "learning_rate": 0.00013271824565867836, "loss": 3.6127, "step": 6002 }, { "epoch": 1.85, "learning_rate": 0.00013268261659944114, "loss": 3.491, "step": 6003 }, { "epoch": 1.85, "learning_rate": 0.0001326469875402039, "loss": 3.5244, "step": 6004 }, { "epoch": 1.85, "learning_rate": 0.00013261135848096668, "loss": 3.5025, "step": 6005 }, { "epoch": 1.85, "learning_rate": 0.00013257572942172944, "loss": 3.48, "step": 6006 }, { "epoch": 1.85, "learning_rate": 0.00013254010036249222, "loss": 3.3622, "step": 6007 }, { "epoch": 1.85, "learning_rate": 0.000132504471303255, "loss": 3.4931, "step": 6008 }, { "epoch": 1.85, "learning_rate": 0.00013246884224401775, "loss": 3.3361, "step": 6009 }, { "epoch": 1.85, "learning_rate": 0.0001324332131847805, "loss": 3.3524, "step": 6010 }, { "epoch": 1.86, "learning_rate": 0.0001323975841255433, "loss": 3.3579, "step": 6011 }, { "epoch": 1.86, "learning_rate": 0.00013236195506630607, "loss": 3.586, "step": 6012 }, { "epoch": 1.86, "learning_rate": 0.00013232632600706885, "loss": 3.4122, "step": 6013 }, { "epoch": 1.86, "learning_rate": 0.0001322906969478316, "loss": 3.1571, "step": 6014 }, { "epoch": 1.86, "learning_rate": 0.0001322550678885944, "loss": 3.3911, "step": 6015 }, { "epoch": 1.86, "learning_rate": 0.00013221943882935715, "loss": 3.1801, "step": 6016 }, { "epoch": 1.86, "learning_rate": 0.00013218380977011993, "loss": 3.3695, "step": 6017 }, { "epoch": 1.86, "learning_rate": 0.0001321481807108827, "loss": 3.3284, "step": 6018 }, { "epoch": 1.86, "learning_rate": 0.00013211255165164546, "loss": 3.2316, "step": 6019 }, { "epoch": 1.86, "learning_rate": 0.00013207692259240825, "loss": 3.0599, "step": 6020 }, { "epoch": 1.86, "learning_rate": 0.000132041293533171, "loss": 2.9838, "step": 6021 }, { "epoch": 1.86, "learning_rate": 0.00013200566447393378, "loss": 3.1124, "step": 6022 }, { "epoch": 1.86, "learning_rate": 0.00013197003541469654, "loss": 3.1385, "step": 6023 }, { "epoch": 1.86, "learning_rate": 0.00013193440635545932, "loss": 2.6956, "step": 6024 }, { "epoch": 1.86, "learning_rate": 0.0001318987772962221, "loss": 2.7608, "step": 6025 }, { "epoch": 1.86, "learning_rate": 0.00013186314823698486, "loss": 2.8516, "step": 6026 }, { "epoch": 1.86, "learning_rate": 0.00013182751917774764, "loss": 2.6226, "step": 6027 }, { "epoch": 1.86, "learning_rate": 0.0001317918901185104, "loss": 2.6602, "step": 6028 }, { "epoch": 1.86, "learning_rate": 0.00013175626105927317, "loss": 2.8539, "step": 6029 }, { "epoch": 1.86, "learning_rate": 0.00013172063200003596, "loss": 2.7956, "step": 6030 }, { "epoch": 1.86, "learning_rate": 0.0001316850029407987, "loss": 2.4599, "step": 6031 }, { "epoch": 1.86, "learning_rate": 0.0001316493738815615, "loss": 2.4493, "step": 6032 }, { "epoch": 1.86, "learning_rate": 0.00013161374482232425, "loss": 2.1797, "step": 6033 }, { "epoch": 1.86, "learning_rate": 0.00013157811576308703, "loss": 2.2403, "step": 6034 }, { "epoch": 1.86, "learning_rate": 0.0001315424867038498, "loss": 2.1513, "step": 6035 }, { "epoch": 1.86, "learning_rate": 0.00013150685764461257, "loss": 2.0846, "step": 6036 }, { "epoch": 1.86, "learning_rate": 0.00013147122858537535, "loss": 1.853, "step": 6037 }, { "epoch": 1.86, "learning_rate": 0.00013143559952613813, "loss": 2.0322, "step": 6038 }, { "epoch": 1.86, "learning_rate": 0.00013139997046690088, "loss": 1.5869, "step": 6039 }, { "epoch": 1.86, "learning_rate": 0.00013136434140766364, "loss": 1.6679, "step": 6040 }, { "epoch": 1.86, "learning_rate": 0.00013132871234842642, "loss": 4.4113, "step": 6041 }, { "epoch": 1.86, "learning_rate": 0.0001312930832891892, "loss": 4.2237, "step": 6042 }, { "epoch": 1.87, "learning_rate": 0.00013125745422995199, "loss": 3.9464, "step": 6043 }, { "epoch": 1.87, "learning_rate": 0.00013122182517071474, "loss": 3.8926, "step": 6044 }, { "epoch": 1.87, "learning_rate": 0.0001311861961114775, "loss": 3.5047, "step": 6045 }, { "epoch": 1.87, "learning_rate": 0.00013115056705224028, "loss": 3.3792, "step": 6046 }, { "epoch": 1.87, "learning_rate": 0.00013111493799300306, "loss": 3.7744, "step": 6047 }, { "epoch": 1.87, "learning_rate": 0.0001310793089337658, "loss": 3.321, "step": 6048 }, { "epoch": 1.87, "learning_rate": 0.0001310436798745286, "loss": 3.672, "step": 6049 }, { "epoch": 1.87, "learning_rate": 0.00013100805081529138, "loss": 3.7048, "step": 6050 }, { "epoch": 1.87, "learning_rate": 0.00013097242175605413, "loss": 3.3416, "step": 6051 }, { "epoch": 1.87, "learning_rate": 0.00013093679269681691, "loss": 3.4552, "step": 6052 }, { "epoch": 1.87, "learning_rate": 0.00013090116363757967, "loss": 3.2367, "step": 6053 }, { "epoch": 1.87, "learning_rate": 0.00013086553457834245, "loss": 3.204, "step": 6054 }, { "epoch": 1.87, "learning_rate": 0.00013082990551910523, "loss": 3.4702, "step": 6055 }, { "epoch": 1.87, "learning_rate": 0.000130794276459868, "loss": 3.3258, "step": 6056 }, { "epoch": 1.87, "learning_rate": 0.00013075864740063074, "loss": 3.6332, "step": 6057 }, { "epoch": 1.87, "learning_rate": 0.00013072301834139352, "loss": 3.5916, "step": 6058 }, { "epoch": 1.87, "learning_rate": 0.0001306873892821563, "loss": 3.4383, "step": 6059 }, { "epoch": 1.87, "learning_rate": 0.0001306517602229191, "loss": 3.2962, "step": 6060 }, { "epoch": 1.87, "learning_rate": 0.00013061613116368184, "loss": 3.2363, "step": 6061 }, { "epoch": 1.87, "learning_rate": 0.00013058050210444462, "loss": 3.1746, "step": 6062 }, { "epoch": 1.87, "learning_rate": 0.00013054487304520738, "loss": 3.2743, "step": 6063 }, { "epoch": 1.87, "learning_rate": 0.00013050924398597016, "loss": 3.3248, "step": 6064 }, { "epoch": 1.87, "learning_rate": 0.00013047361492673292, "loss": 3.1668, "step": 6065 }, { "epoch": 1.87, "learning_rate": 0.0001304379858674957, "loss": 3.2781, "step": 6066 }, { "epoch": 1.87, "learning_rate": 0.00013040235680825848, "loss": 3.0551, "step": 6067 }, { "epoch": 1.87, "learning_rate": 0.00013036672774902123, "loss": 3.1617, "step": 6068 }, { "epoch": 1.87, "learning_rate": 0.00013033109868978402, "loss": 2.9368, "step": 6069 }, { "epoch": 1.87, "learning_rate": 0.00013029546963054677, "loss": 3.0594, "step": 6070 }, { "epoch": 1.87, "learning_rate": 0.00013025984057130955, "loss": 2.9856, "step": 6071 }, { "epoch": 1.87, "learning_rate": 0.00013022421151207233, "loss": 3.0231, "step": 6072 }, { "epoch": 1.87, "learning_rate": 0.00013018858245283512, "loss": 3.1744, "step": 6073 }, { "epoch": 1.87, "learning_rate": 0.00013015295339359787, "loss": 2.9685, "step": 6074 }, { "epoch": 1.87, "learning_rate": 0.00013011732433436063, "loss": 2.9049, "step": 6075 }, { "epoch": 1.88, "learning_rate": 0.0001300816952751234, "loss": 2.5497, "step": 6076 }, { "epoch": 1.88, "learning_rate": 0.0001300460662158862, "loss": 2.663, "step": 6077 }, { "epoch": 1.88, "learning_rate": 0.00013001043715664894, "loss": 2.6952, "step": 6078 }, { "epoch": 1.88, "learning_rate": 0.00012997480809741173, "loss": 2.6857, "step": 6079 }, { "epoch": 1.88, "learning_rate": 0.00012993917903817448, "loss": 2.2846, "step": 6080 }, { "epoch": 1.88, "learning_rate": 0.00012990354997893726, "loss": 2.3562, "step": 6081 }, { "epoch": 1.88, "learning_rate": 0.00012986792091970004, "loss": 2.4923, "step": 6082 }, { "epoch": 1.88, "learning_rate": 0.0001298322918604628, "loss": 2.3585, "step": 6083 }, { "epoch": 1.88, "learning_rate": 0.00012979666280122558, "loss": 2.308, "step": 6084 }, { "epoch": 1.88, "learning_rate": 0.00012976103374198836, "loss": 2.2434, "step": 6085 }, { "epoch": 1.88, "learning_rate": 0.00012972540468275112, "loss": 2.1674, "step": 6086 }, { "epoch": 1.88, "learning_rate": 0.00012968977562351387, "loss": 2.0708, "step": 6087 }, { "epoch": 1.88, "learning_rate": 0.00012965414656427665, "loss": 2.0111, "step": 6088 }, { "epoch": 1.88, "learning_rate": 0.00012961851750503944, "loss": 1.765, "step": 6089 }, { "epoch": 1.88, "learning_rate": 0.00012958288844580222, "loss": 1.7188, "step": 6090 }, { "epoch": 1.88, "learning_rate": 0.00012954725938656497, "loss": 4.4693, "step": 6091 }, { "epoch": 1.88, "learning_rate": 0.00012951163032732773, "loss": 4.0489, "step": 6092 }, { "epoch": 1.88, "learning_rate": 0.0001294760012680905, "loss": 3.7864, "step": 6093 }, { "epoch": 1.88, "learning_rate": 0.0001294403722088533, "loss": 3.9008, "step": 6094 }, { "epoch": 1.88, "learning_rate": 0.00012940474314961605, "loss": 3.721, "step": 6095 }, { "epoch": 1.88, "learning_rate": 0.00012936911409037883, "loss": 3.7364, "step": 6096 }, { "epoch": 1.88, "learning_rate": 0.0001293334850311416, "loss": 3.7179, "step": 6097 }, { "epoch": 1.88, "learning_rate": 0.00012929785597190436, "loss": 3.3805, "step": 6098 }, { "epoch": 1.88, "learning_rate": 0.00012926222691266715, "loss": 3.4667, "step": 6099 }, { "epoch": 1.88, "learning_rate": 0.0001292265978534299, "loss": 3.6643, "step": 6100 }, { "epoch": 1.88, "learning_rate": 0.00012919096879419268, "loss": 3.5647, "step": 6101 }, { "epoch": 1.88, "learning_rate": 0.00012915533973495547, "loss": 3.3146, "step": 6102 }, { "epoch": 1.88, "learning_rate": 0.00012911971067571822, "loss": 3.6588, "step": 6103 }, { "epoch": 1.88, "learning_rate": 0.00012908408161648097, "loss": 3.5325, "step": 6104 }, { "epoch": 1.88, "learning_rate": 0.00012904845255724376, "loss": 3.2702, "step": 6105 }, { "epoch": 1.88, "learning_rate": 0.00012901282349800654, "loss": 3.4937, "step": 6106 }, { "epoch": 1.88, "learning_rate": 0.00012897719443876932, "loss": 3.386, "step": 6107 }, { "epoch": 1.89, "learning_rate": 0.00012894156537953208, "loss": 3.5427, "step": 6108 }, { "epoch": 1.89, "learning_rate": 0.00012890593632029486, "loss": 3.4888, "step": 6109 }, { "epoch": 1.89, "learning_rate": 0.0001288703072610576, "loss": 3.2706, "step": 6110 }, { "epoch": 1.89, "learning_rate": 0.0001288346782018204, "loss": 3.2448, "step": 6111 }, { "epoch": 1.89, "learning_rate": 0.00012879904914258315, "loss": 3.2949, "step": 6112 }, { "epoch": 1.89, "learning_rate": 0.00012876342008334593, "loss": 3.4233, "step": 6113 }, { "epoch": 1.89, "learning_rate": 0.0001287277910241087, "loss": 3.1605, "step": 6114 }, { "epoch": 1.89, "learning_rate": 0.00012869216196487147, "loss": 3.4345, "step": 6115 }, { "epoch": 1.89, "learning_rate": 0.00012865653290563425, "loss": 3.1638, "step": 6116 }, { "epoch": 1.89, "learning_rate": 0.000128620903846397, "loss": 3.0649, "step": 6117 }, { "epoch": 1.89, "learning_rate": 0.00012858527478715979, "loss": 2.8955, "step": 6118 }, { "epoch": 1.89, "learning_rate": 0.00012854964572792257, "loss": 3.0884, "step": 6119 }, { "epoch": 1.89, "learning_rate": 0.00012851401666868535, "loss": 2.9642, "step": 6120 }, { "epoch": 1.89, "learning_rate": 0.0001284783876094481, "loss": 3.1684, "step": 6121 }, { "epoch": 1.89, "learning_rate": 0.00012844275855021086, "loss": 3.0019, "step": 6122 }, { "epoch": 1.89, "learning_rate": 0.00012840712949097364, "loss": 2.7471, "step": 6123 }, { "epoch": 1.89, "learning_rate": 0.00012837150043173642, "loss": 2.7786, "step": 6124 }, { "epoch": 1.89, "learning_rate": 0.00012833587137249918, "loss": 2.7105, "step": 6125 }, { "epoch": 1.89, "learning_rate": 0.00012830024231326196, "loss": 2.874, "step": 6126 }, { "epoch": 1.89, "learning_rate": 0.00012826461325402471, "loss": 2.5026, "step": 6127 }, { "epoch": 1.89, "learning_rate": 0.0001282289841947875, "loss": 2.6514, "step": 6128 }, { "epoch": 1.89, "learning_rate": 0.00012819335513555025, "loss": 2.536, "step": 6129 }, { "epoch": 1.89, "learning_rate": 0.00012815772607631303, "loss": 2.5392, "step": 6130 }, { "epoch": 1.89, "learning_rate": 0.00012812209701707581, "loss": 2.5288, "step": 6131 }, { "epoch": 1.89, "learning_rate": 0.0001280864679578386, "loss": 2.2261, "step": 6132 }, { "epoch": 1.89, "learning_rate": 0.00012805083889860135, "loss": 2.2867, "step": 6133 }, { "epoch": 1.89, "learning_rate": 0.0001280152098393641, "loss": 2.4738, "step": 6134 }, { "epoch": 1.89, "learning_rate": 0.0001279795807801269, "loss": 2.1634, "step": 6135 }, { "epoch": 1.89, "learning_rate": 0.00012794395172088967, "loss": 2.0825, "step": 6136 }, { "epoch": 1.89, "learning_rate": 0.00012790832266165245, "loss": 2.0493, "step": 6137 }, { "epoch": 1.89, "learning_rate": 0.0001278726936024152, "loss": 1.7895, "step": 6138 }, { "epoch": 1.89, "learning_rate": 0.00012783706454317796, "loss": 1.9103, "step": 6139 }, { "epoch": 1.89, "learning_rate": 0.00012780143548394074, "loss": 1.6702, "step": 6140 }, { "epoch": 1.9, "learning_rate": 0.00012776580642470352, "loss": 4.0141, "step": 6141 }, { "epoch": 1.9, "learning_rate": 0.00012773017736546628, "loss": 4.1807, "step": 6142 }, { "epoch": 1.9, "learning_rate": 0.00012769454830622906, "loss": 3.8057, "step": 6143 }, { "epoch": 1.9, "learning_rate": 0.00012765891924699184, "loss": 3.8518, "step": 6144 }, { "epoch": 1.9, "learning_rate": 0.0001276232901877546, "loss": 3.6612, "step": 6145 }, { "epoch": 1.9, "learning_rate": 0.00012758766112851735, "loss": 3.625, "step": 6146 }, { "epoch": 1.9, "learning_rate": 0.00012755203206928013, "loss": 3.589, "step": 6147 }, { "epoch": 1.9, "learning_rate": 0.00012751640301004292, "loss": 3.525, "step": 6148 }, { "epoch": 1.9, "learning_rate": 0.0001274807739508057, "loss": 3.5978, "step": 6149 }, { "epoch": 1.9, "learning_rate": 0.00012744514489156845, "loss": 3.7018, "step": 6150 }, { "epoch": 1.9, "learning_rate": 0.0001274095158323312, "loss": 3.3978, "step": 6151 }, { "epoch": 1.9, "learning_rate": 0.000127373886773094, "loss": 3.4101, "step": 6152 }, { "epoch": 1.9, "learning_rate": 0.00012733825771385677, "loss": 3.3752, "step": 6153 }, { "epoch": 1.9, "learning_rate": 0.00012730262865461955, "loss": 3.4096, "step": 6154 }, { "epoch": 1.9, "learning_rate": 0.0001272669995953823, "loss": 3.6248, "step": 6155 }, { "epoch": 1.9, "learning_rate": 0.0001272313705361451, "loss": 3.3092, "step": 6156 }, { "epoch": 1.9, "learning_rate": 0.00012719574147690784, "loss": 3.6158, "step": 6157 }, { "epoch": 1.9, "learning_rate": 0.00012716011241767063, "loss": 3.3156, "step": 6158 }, { "epoch": 1.9, "learning_rate": 0.00012712448335843338, "loss": 3.2952, "step": 6159 }, { "epoch": 1.9, "learning_rate": 0.00012708885429919616, "loss": 3.3187, "step": 6160 }, { "epoch": 1.9, "learning_rate": 0.00012705322523995895, "loss": 3.2348, "step": 6161 }, { "epoch": 1.9, "learning_rate": 0.0001270175961807217, "loss": 3.1555, "step": 6162 }, { "epoch": 1.9, "learning_rate": 0.00012698196712148448, "loss": 3.3545, "step": 6163 }, { "epoch": 1.9, "learning_rate": 0.00012694633806224724, "loss": 3.0691, "step": 6164 }, { "epoch": 1.9, "learning_rate": 0.00012691070900301002, "loss": 3.2594, "step": 6165 }, { "epoch": 1.9, "learning_rate": 0.0001268750799437728, "loss": 3.1374, "step": 6166 }, { "epoch": 1.9, "learning_rate": 0.00012683945088453558, "loss": 3.2788, "step": 6167 }, { "epoch": 1.9, "learning_rate": 0.00012680382182529834, "loss": 3.0143, "step": 6168 }, { "epoch": 1.9, "learning_rate": 0.0001267681927660611, "loss": 3.0504, "step": 6169 }, { "epoch": 1.9, "learning_rate": 0.00012673256370682387, "loss": 3.0275, "step": 6170 }, { "epoch": 1.9, "learning_rate": 0.00012669693464758666, "loss": 2.9656, "step": 6171 }, { "epoch": 1.9, "learning_rate": 0.0001266613055883494, "loss": 2.8567, "step": 6172 }, { "epoch": 1.91, "learning_rate": 0.0001266256765291122, "loss": 2.8394, "step": 6173 }, { "epoch": 1.91, "learning_rate": 0.00012659004746987495, "loss": 2.6523, "step": 6174 }, { "epoch": 1.91, "learning_rate": 0.00012655441841063773, "loss": 2.7721, "step": 6175 }, { "epoch": 1.91, "learning_rate": 0.00012651878935140048, "loss": 2.8439, "step": 6176 }, { "epoch": 1.91, "learning_rate": 0.00012648316029216327, "loss": 2.7845, "step": 6177 }, { "epoch": 1.91, "learning_rate": 0.00012644753123292605, "loss": 2.6195, "step": 6178 }, { "epoch": 1.91, "learning_rate": 0.00012641190217368883, "loss": 2.5627, "step": 6179 }, { "epoch": 1.91, "learning_rate": 0.00012637627311445158, "loss": 2.4224, "step": 6180 }, { "epoch": 1.91, "learning_rate": 0.00012634064405521434, "loss": 2.2148, "step": 6181 }, { "epoch": 1.91, "learning_rate": 0.00012630501499597712, "loss": 2.3441, "step": 6182 }, { "epoch": 1.91, "learning_rate": 0.0001262693859367399, "loss": 2.3608, "step": 6183 }, { "epoch": 1.91, "learning_rate": 0.00012623375687750268, "loss": 2.2942, "step": 6184 }, { "epoch": 1.91, "learning_rate": 0.00012619812781826544, "loss": 2.0802, "step": 6185 }, { "epoch": 1.91, "learning_rate": 0.0001261624987590282, "loss": 2.004, "step": 6186 }, { "epoch": 1.91, "learning_rate": 0.00012612686969979098, "loss": 1.914, "step": 6187 }, { "epoch": 1.91, "learning_rate": 0.00012609124064055376, "loss": 1.8443, "step": 6188 }, { "epoch": 1.91, "learning_rate": 0.0001260556115813165, "loss": 1.6837, "step": 6189 }, { "epoch": 1.91, "learning_rate": 0.0001260199825220793, "loss": 1.5332, "step": 6190 }, { "epoch": 1.91, "learning_rate": 0.00012598435346284208, "loss": 4.4366, "step": 6191 }, { "epoch": 1.91, "learning_rate": 0.00012594872440360483, "loss": 4.3487, "step": 6192 }, { "epoch": 1.91, "learning_rate": 0.00012591309534436759, "loss": 4.0833, "step": 6193 }, { "epoch": 1.91, "learning_rate": 0.00012587746628513037, "loss": 3.9538, "step": 6194 }, { "epoch": 1.91, "learning_rate": 0.00012584183722589315, "loss": 4.2508, "step": 6195 }, { "epoch": 1.91, "learning_rate": 0.00012580620816665593, "loss": 3.4193, "step": 6196 }, { "epoch": 1.91, "learning_rate": 0.00012577057910741869, "loss": 3.3557, "step": 6197 }, { "epoch": 1.91, "learning_rate": 0.00012573495004818144, "loss": 3.8474, "step": 6198 }, { "epoch": 1.91, "learning_rate": 0.00012569932098894422, "loss": 3.6939, "step": 6199 }, { "epoch": 1.91, "learning_rate": 0.000125663691929707, "loss": 3.5276, "step": 6200 }, { "epoch": 1.91, "learning_rate": 0.0001256280628704698, "loss": 3.5977, "step": 6201 }, { "epoch": 1.91, "learning_rate": 0.00012559243381123254, "loss": 3.5591, "step": 6202 }, { "epoch": 1.91, "learning_rate": 0.00012555680475199532, "loss": 3.4502, "step": 6203 }, { "epoch": 1.91, "learning_rate": 0.00012552117569275808, "loss": 3.4961, "step": 6204 }, { "epoch": 1.92, "learning_rate": 0.00012548554663352086, "loss": 3.2569, "step": 6205 }, { "epoch": 1.92, "learning_rate": 0.00012544991757428361, "loss": 3.1582, "step": 6206 }, { "epoch": 1.92, "learning_rate": 0.0001254142885150464, "loss": 3.3687, "step": 6207 }, { "epoch": 1.92, "learning_rate": 0.00012537865945580918, "loss": 3.2443, "step": 6208 }, { "epoch": 1.92, "learning_rate": 0.00012534303039657193, "loss": 3.1947, "step": 6209 }, { "epoch": 1.92, "learning_rate": 0.00012530740133733472, "loss": 3.34, "step": 6210 }, { "epoch": 1.92, "learning_rate": 0.00012527177227809747, "loss": 3.294, "step": 6211 }, { "epoch": 1.92, "learning_rate": 0.00012523614321886025, "loss": 3.2394, "step": 6212 }, { "epoch": 1.92, "learning_rate": 0.00012520051415962303, "loss": 3.3341, "step": 6213 }, { "epoch": 1.92, "learning_rate": 0.00012516488510038582, "loss": 3.2142, "step": 6214 }, { "epoch": 1.92, "learning_rate": 0.00012512925604114857, "loss": 3.3231, "step": 6215 }, { "epoch": 1.92, "learning_rate": 0.00012509362698191133, "loss": 3.2161, "step": 6216 }, { "epoch": 1.92, "learning_rate": 0.0001250579979226741, "loss": 3.025, "step": 6217 }, { "epoch": 1.92, "learning_rate": 0.0001250223688634369, "loss": 2.9952, "step": 6218 }, { "epoch": 1.92, "learning_rate": 0.00012498673980419964, "loss": 2.825, "step": 6219 }, { "epoch": 1.92, "learning_rate": 0.00012495111074496243, "loss": 2.8667, "step": 6220 }, { "epoch": 1.92, "learning_rate": 0.00012491548168572518, "loss": 2.9087, "step": 6221 }, { "epoch": 1.92, "learning_rate": 0.00012487985262648796, "loss": 2.9174, "step": 6222 }, { "epoch": 1.92, "learning_rate": 0.00012484422356725072, "loss": 3.13, "step": 6223 }, { "epoch": 1.92, "learning_rate": 0.0001248085945080135, "loss": 2.7399, "step": 6224 }, { "epoch": 1.92, "learning_rate": 0.00012477296544877628, "loss": 2.8083, "step": 6225 }, { "epoch": 1.92, "learning_rate": 0.00012473733638953906, "loss": 2.5095, "step": 6226 }, { "epoch": 1.92, "learning_rate": 0.00012470170733030182, "loss": 2.5915, "step": 6227 }, { "epoch": 1.92, "learning_rate": 0.00012466607827106457, "loss": 2.5622, "step": 6228 }, { "epoch": 1.92, "learning_rate": 0.00012463044921182735, "loss": 2.3931, "step": 6229 }, { "epoch": 1.92, "learning_rate": 0.00012459482015259014, "loss": 2.5797, "step": 6230 }, { "epoch": 1.92, "learning_rate": 0.00012455919109335292, "loss": 2.3015, "step": 6231 }, { "epoch": 1.92, "learning_rate": 0.00012452356203411567, "loss": 2.1765, "step": 6232 }, { "epoch": 1.92, "learning_rate": 0.00012448793297487843, "loss": 2.2767, "step": 6233 }, { "epoch": 1.92, "learning_rate": 0.0001244523039156412, "loss": 2.1792, "step": 6234 }, { "epoch": 1.92, "learning_rate": 0.000124416674856404, "loss": 2.038, "step": 6235 }, { "epoch": 1.92, "learning_rate": 0.00012438104579716675, "loss": 2.1239, "step": 6236 }, { "epoch": 1.92, "learning_rate": 0.00012434541673792953, "loss": 2.1532, "step": 6237 }, { "epoch": 1.93, "learning_rate": 0.0001243097876786923, "loss": 2.0321, "step": 6238 }, { "epoch": 1.93, "learning_rate": 0.00012427415861945506, "loss": 1.8024, "step": 6239 }, { "epoch": 1.93, "learning_rate": 0.00012423852956021782, "loss": 1.7173, "step": 6240 }, { "epoch": 1.93, "learning_rate": 0.0001242029005009806, "loss": 4.3351, "step": 6241 }, { "epoch": 1.93, "learning_rate": 0.00012416727144174338, "loss": 4.4694, "step": 6242 }, { "epoch": 1.93, "learning_rate": 0.00012413164238250616, "loss": 3.8428, "step": 6243 }, { "epoch": 1.93, "learning_rate": 0.00012409601332326892, "loss": 3.8765, "step": 6244 }, { "epoch": 1.93, "learning_rate": 0.00012406038426403167, "loss": 3.5356, "step": 6245 }, { "epoch": 1.93, "learning_rate": 0.00012402475520479446, "loss": 3.6469, "step": 6246 }, { "epoch": 1.93, "learning_rate": 0.00012398912614555724, "loss": 3.7448, "step": 6247 }, { "epoch": 1.93, "learning_rate": 0.00012395349708632002, "loss": 3.6421, "step": 6248 }, { "epoch": 1.93, "learning_rate": 0.00012391786802708277, "loss": 3.4599, "step": 6249 }, { "epoch": 1.93, "learning_rate": 0.00012388223896784556, "loss": 3.5939, "step": 6250 }, { "epoch": 1.93, "learning_rate": 0.0001238466099086083, "loss": 3.476, "step": 6251 }, { "epoch": 1.93, "learning_rate": 0.0001238109808493711, "loss": 3.7101, "step": 6252 }, { "epoch": 1.93, "learning_rate": 0.00012377535179013385, "loss": 3.5088, "step": 6253 }, { "epoch": 1.93, "learning_rate": 0.00012373972273089663, "loss": 3.4404, "step": 6254 }, { "epoch": 1.93, "learning_rate": 0.0001237040936716594, "loss": 3.167, "step": 6255 }, { "epoch": 1.93, "learning_rate": 0.00012366846461242217, "loss": 3.2573, "step": 6256 }, { "epoch": 1.93, "learning_rate": 0.00012363283555318495, "loss": 3.2855, "step": 6257 }, { "epoch": 1.93, "learning_rate": 0.0001235972064939477, "loss": 3.3245, "step": 6258 }, { "epoch": 1.93, "learning_rate": 0.00012356157743471048, "loss": 3.4611, "step": 6259 }, { "epoch": 1.93, "learning_rate": 0.00012352594837547327, "loss": 3.4554, "step": 6260 }, { "epoch": 1.93, "learning_rate": 0.00012349031931623602, "loss": 3.3117, "step": 6261 }, { "epoch": 1.93, "learning_rate": 0.0001234546902569988, "loss": 2.9961, "step": 6262 }, { "epoch": 1.93, "learning_rate": 0.00012341906119776156, "loss": 3.4245, "step": 6263 }, { "epoch": 1.93, "learning_rate": 0.00012338343213852434, "loss": 3.2503, "step": 6264 }, { "epoch": 1.93, "learning_rate": 0.00012334780307928712, "loss": 3.1815, "step": 6265 }, { "epoch": 1.93, "learning_rate": 0.00012331217402004988, "loss": 3.1395, "step": 6266 }, { "epoch": 1.93, "learning_rate": 0.00012327654496081266, "loss": 3.0552, "step": 6267 }, { "epoch": 1.93, "learning_rate": 0.0001232409159015754, "loss": 3.0211, "step": 6268 }, { "epoch": 1.93, "learning_rate": 0.0001232052868423382, "loss": 2.8067, "step": 6269 }, { "epoch": 1.94, "learning_rate": 0.00012316965778310095, "loss": 2.9134, "step": 6270 }, { "epoch": 1.94, "learning_rate": 0.00012313402872386373, "loss": 3.0687, "step": 6271 }, { "epoch": 1.94, "learning_rate": 0.00012309839966462651, "loss": 2.6641, "step": 6272 }, { "epoch": 1.94, "learning_rate": 0.0001230627706053893, "loss": 2.8185, "step": 6273 }, { "epoch": 1.94, "learning_rate": 0.00012302714154615205, "loss": 2.6145, "step": 6274 }, { "epoch": 1.94, "learning_rate": 0.0001229915124869148, "loss": 2.7394, "step": 6275 }, { "epoch": 1.94, "learning_rate": 0.0001229558834276776, "loss": 2.5418, "step": 6276 }, { "epoch": 1.94, "learning_rate": 0.00012292025436844037, "loss": 2.6461, "step": 6277 }, { "epoch": 1.94, "learning_rate": 0.00012288462530920315, "loss": 2.4767, "step": 6278 }, { "epoch": 1.94, "learning_rate": 0.0001228489962499659, "loss": 2.4668, "step": 6279 }, { "epoch": 1.94, "learning_rate": 0.00012281336719072866, "loss": 2.4523, "step": 6280 }, { "epoch": 1.94, "learning_rate": 0.00012277773813149144, "loss": 2.4706, "step": 6281 }, { "epoch": 1.94, "learning_rate": 0.00012274210907225422, "loss": 2.2497, "step": 6282 }, { "epoch": 1.94, "learning_rate": 0.00012270648001301698, "loss": 2.3645, "step": 6283 }, { "epoch": 1.94, "learning_rate": 0.00012267085095377976, "loss": 2.0245, "step": 6284 }, { "epoch": 1.94, "learning_rate": 0.00012263522189454254, "loss": 2.0714, "step": 6285 }, { "epoch": 1.94, "learning_rate": 0.0001225995928353053, "loss": 1.9947, "step": 6286 }, { "epoch": 1.94, "learning_rate": 0.00012256396377606805, "loss": 1.9098, "step": 6287 }, { "epoch": 1.94, "learning_rate": 0.00012252833471683083, "loss": 1.7932, "step": 6288 }, { "epoch": 1.94, "learning_rate": 0.00012249270565759362, "loss": 1.8808, "step": 6289 }, { "epoch": 1.94, "learning_rate": 0.0001224570765983564, "loss": 1.6662, "step": 6290 }, { "epoch": 1.94, "learning_rate": 0.00012242144753911915, "loss": 4.4508, "step": 6291 }, { "epoch": 1.94, "learning_rate": 0.00012238581847988193, "loss": 4.1852, "step": 6292 }, { "epoch": 1.94, "learning_rate": 0.0001223501894206447, "loss": 3.9447, "step": 6293 }, { "epoch": 1.94, "learning_rate": 0.00012231456036140747, "loss": 3.7443, "step": 6294 }, { "epoch": 1.94, "learning_rate": 0.00012227893130217025, "loss": 3.7074, "step": 6295 }, { "epoch": 1.94, "learning_rate": 0.000122243302242933, "loss": 3.5568, "step": 6296 }, { "epoch": 1.94, "learning_rate": 0.0001222076731836958, "loss": 3.8518, "step": 6297 }, { "epoch": 1.94, "learning_rate": 0.00012217204412445854, "loss": 3.5252, "step": 6298 }, { "epoch": 1.94, "learning_rate": 0.00012213641506522133, "loss": 3.6679, "step": 6299 }, { "epoch": 1.94, "learning_rate": 0.00012210078600598408, "loss": 3.2884, "step": 6300 }, { "epoch": 1.94, "learning_rate": 0.00012206515694674686, "loss": 3.7034, "step": 6301 }, { "epoch": 1.94, "learning_rate": 0.00012202952788750963, "loss": 3.5611, "step": 6302 }, { "epoch": 1.95, "learning_rate": 0.00012199389882827241, "loss": 3.4053, "step": 6303 }, { "epoch": 1.95, "learning_rate": 0.00012195826976903517, "loss": 3.3161, "step": 6304 }, { "epoch": 1.95, "learning_rate": 0.00012192264070979795, "loss": 3.6776, "step": 6305 }, { "epoch": 1.95, "learning_rate": 0.00012188701165056072, "loss": 3.2157, "step": 6306 }, { "epoch": 1.95, "learning_rate": 0.0001218513825913235, "loss": 3.2946, "step": 6307 }, { "epoch": 1.95, "learning_rate": 0.00012181575353208625, "loss": 3.2176, "step": 6308 }, { "epoch": 1.95, "learning_rate": 0.00012178012447284902, "loss": 3.2924, "step": 6309 }, { "epoch": 1.95, "learning_rate": 0.0001217444954136118, "loss": 3.2238, "step": 6310 }, { "epoch": 1.95, "learning_rate": 0.00012170886635437457, "loss": 3.1917, "step": 6311 }, { "epoch": 1.95, "learning_rate": 0.00012167323729513736, "loss": 3.0384, "step": 6312 }, { "epoch": 1.95, "learning_rate": 0.00012163760823590011, "loss": 3.2451, "step": 6313 }, { "epoch": 1.95, "learning_rate": 0.00012160197917666288, "loss": 3.2805, "step": 6314 }, { "epoch": 1.95, "learning_rate": 0.00012156635011742566, "loss": 3.2868, "step": 6315 }, { "epoch": 1.95, "learning_rate": 0.00012153072105818843, "loss": 3.2563, "step": 6316 }, { "epoch": 1.95, "learning_rate": 0.0001214950919989512, "loss": 3.2465, "step": 6317 }, { "epoch": 1.95, "learning_rate": 0.00012145946293971397, "loss": 3.1684, "step": 6318 }, { "epoch": 1.95, "learning_rate": 0.00012142383388047675, "loss": 3.0084, "step": 6319 }, { "epoch": 1.95, "learning_rate": 0.00012138820482123952, "loss": 2.7951, "step": 6320 }, { "epoch": 1.95, "learning_rate": 0.00012135257576200227, "loss": 3.0063, "step": 6321 }, { "epoch": 1.95, "learning_rate": 0.00012131694670276505, "loss": 2.7995, "step": 6322 }, { "epoch": 1.95, "learning_rate": 0.00012128131764352782, "loss": 2.8224, "step": 6323 }, { "epoch": 1.95, "learning_rate": 0.0001212456885842906, "loss": 2.9922, "step": 6324 }, { "epoch": 1.95, "learning_rate": 0.00012121005952505336, "loss": 2.6167, "step": 6325 }, { "epoch": 1.95, "learning_rate": 0.00012117443046581613, "loss": 2.6477, "step": 6326 }, { "epoch": 1.95, "learning_rate": 0.00012113880140657891, "loss": 2.5494, "step": 6327 }, { "epoch": 1.95, "learning_rate": 0.00012110317234734168, "loss": 2.4819, "step": 6328 }, { "epoch": 1.95, "learning_rate": 0.00012106754328810446, "loss": 2.2827, "step": 6329 }, { "epoch": 1.95, "learning_rate": 0.00012103191422886721, "loss": 2.4099, "step": 6330 }, { "epoch": 1.95, "learning_rate": 0.00012099628516963, "loss": 2.3359, "step": 6331 }, { "epoch": 1.95, "learning_rate": 0.00012096065611039276, "loss": 2.0893, "step": 6332 }, { "epoch": 1.95, "learning_rate": 0.00012092502705115554, "loss": 2.227, "step": 6333 }, { "epoch": 1.95, "learning_rate": 0.0001208893979919183, "loss": 2.2745, "step": 6334 }, { "epoch": 1.96, "learning_rate": 0.00012085376893268107, "loss": 2.0862, "step": 6335 }, { "epoch": 1.96, "learning_rate": 0.00012081813987344385, "loss": 2.0579, "step": 6336 }, { "epoch": 1.96, "learning_rate": 0.00012078251081420662, "loss": 1.8122, "step": 6337 }, { "epoch": 1.96, "learning_rate": 0.00012074688175496939, "loss": 1.8825, "step": 6338 }, { "epoch": 1.96, "learning_rate": 0.00012071125269573215, "loss": 1.7406, "step": 6339 }, { "epoch": 1.96, "learning_rate": 0.00012067562363649494, "loss": 1.6829, "step": 6340 }, { "epoch": 1.96, "learning_rate": 0.0001206399945772577, "loss": 4.3326, "step": 6341 }, { "epoch": 1.96, "learning_rate": 0.00012060436551802046, "loss": 3.9366, "step": 6342 }, { "epoch": 1.96, "learning_rate": 0.00012056873645878324, "loss": 4.0536, "step": 6343 }, { "epoch": 1.96, "learning_rate": 0.00012053310739954601, "loss": 3.856, "step": 6344 }, { "epoch": 1.96, "learning_rate": 0.00012049747834030879, "loss": 3.6989, "step": 6345 }, { "epoch": 1.96, "learning_rate": 0.00012046184928107156, "loss": 3.5423, "step": 6346 }, { "epoch": 1.96, "learning_rate": 0.00012042622022183431, "loss": 3.6103, "step": 6347 }, { "epoch": 1.96, "learning_rate": 0.0001203905911625971, "loss": 3.5651, "step": 6348 }, { "epoch": 1.96, "learning_rate": 0.00012035496210335986, "loss": 3.5381, "step": 6349 }, { "epoch": 1.96, "learning_rate": 0.00012031933304412265, "loss": 3.5106, "step": 6350 }, { "epoch": 1.96, "learning_rate": 0.0001202837039848854, "loss": 3.291, "step": 6351 }, { "epoch": 1.96, "learning_rate": 0.00012024807492564818, "loss": 3.4486, "step": 6352 }, { "epoch": 1.96, "learning_rate": 0.00012021244586641095, "loss": 3.3991, "step": 6353 }, { "epoch": 1.96, "learning_rate": 0.00012017681680717373, "loss": 3.3376, "step": 6354 }, { "epoch": 1.96, "learning_rate": 0.00012014118774793649, "loss": 3.1131, "step": 6355 }, { "epoch": 1.96, "learning_rate": 0.00012010555868869926, "loss": 3.3873, "step": 6356 }, { "epoch": 1.96, "learning_rate": 0.00012006992962946204, "loss": 3.3403, "step": 6357 }, { "epoch": 1.96, "learning_rate": 0.0001200343005702248, "loss": 3.3094, "step": 6358 }, { "epoch": 1.96, "learning_rate": 0.00011999867151098756, "loss": 3.6904, "step": 6359 }, { "epoch": 1.96, "learning_rate": 0.00011996304245175034, "loss": 3.2395, "step": 6360 }, { "epoch": 1.96, "learning_rate": 0.00011992741339251311, "loss": 3.2525, "step": 6361 }, { "epoch": 1.96, "learning_rate": 0.0001198917843332759, "loss": 3.1715, "step": 6362 }, { "epoch": 1.96, "learning_rate": 0.00011985615527403866, "loss": 3.1244, "step": 6363 }, { "epoch": 1.96, "learning_rate": 0.00011982052621480143, "loss": 2.9708, "step": 6364 }, { "epoch": 1.96, "learning_rate": 0.0001197848971555642, "loss": 3.0925, "step": 6365 }, { "epoch": 1.96, "learning_rate": 0.00011974926809632698, "loss": 3.1954, "step": 6366 }, { "epoch": 1.97, "learning_rate": 0.00011971363903708975, "loss": 3.2942, "step": 6367 }, { "epoch": 1.97, "learning_rate": 0.0001196780099778525, "loss": 3.1907, "step": 6368 }, { "epoch": 1.97, "learning_rate": 0.00011964238091861529, "loss": 2.9039, "step": 6369 }, { "epoch": 1.97, "learning_rate": 0.00011960675185937805, "loss": 2.7689, "step": 6370 }, { "epoch": 1.97, "learning_rate": 0.00011957112280014084, "loss": 3.0075, "step": 6371 }, { "epoch": 1.97, "learning_rate": 0.00011953549374090359, "loss": 3.208, "step": 6372 }, { "epoch": 1.97, "learning_rate": 0.00011949986468166636, "loss": 2.8777, "step": 6373 }, { "epoch": 1.97, "learning_rate": 0.00011946423562242914, "loss": 3.0454, "step": 6374 }, { "epoch": 1.97, "learning_rate": 0.00011942860656319191, "loss": 2.6165, "step": 6375 }, { "epoch": 1.97, "learning_rate": 0.00011939297750395469, "loss": 2.605, "step": 6376 }, { "epoch": 1.97, "learning_rate": 0.00011935734844471745, "loss": 2.5803, "step": 6377 }, { "epoch": 1.97, "learning_rate": 0.00011932171938548023, "loss": 2.5476, "step": 6378 }, { "epoch": 1.97, "learning_rate": 0.000119286090326243, "loss": 2.4307, "step": 6379 }, { "epoch": 1.97, "learning_rate": 0.00011925046126700578, "loss": 2.1848, "step": 6380 }, { "epoch": 1.97, "learning_rate": 0.00011921483220776853, "loss": 2.2606, "step": 6381 }, { "epoch": 1.97, "learning_rate": 0.0001191792031485313, "loss": 2.3911, "step": 6382 }, { "epoch": 1.97, "learning_rate": 0.00011914357408929408, "loss": 2.3356, "step": 6383 }, { "epoch": 1.97, "learning_rate": 0.00011910794503005685, "loss": 1.9967, "step": 6384 }, { "epoch": 1.97, "learning_rate": 0.00011907231597081962, "loss": 1.9493, "step": 6385 }, { "epoch": 1.97, "learning_rate": 0.00011903668691158239, "loss": 1.8802, "step": 6386 }, { "epoch": 1.97, "learning_rate": 0.00011900105785234517, "loss": 1.918, "step": 6387 }, { "epoch": 1.97, "learning_rate": 0.00011896542879310794, "loss": 1.7918, "step": 6388 }, { "epoch": 1.97, "learning_rate": 0.00011892979973387069, "loss": 1.7174, "step": 6389 }, { "epoch": 1.97, "learning_rate": 0.00011889417067463347, "loss": 1.7381, "step": 6390 }, { "epoch": 1.97, "learning_rate": 0.00011885854161539624, "loss": 4.4643, "step": 6391 }, { "epoch": 1.97, "learning_rate": 0.00011882291255615902, "loss": 4.0275, "step": 6392 }, { "epoch": 1.97, "learning_rate": 0.00011878728349692179, "loss": 3.8139, "step": 6393 }, { "epoch": 1.97, "learning_rate": 0.00011875165443768455, "loss": 3.567, "step": 6394 }, { "epoch": 1.97, "learning_rate": 0.00011871602537844733, "loss": 3.4271, "step": 6395 }, { "epoch": 1.97, "learning_rate": 0.0001186803963192101, "loss": 3.6323, "step": 6396 }, { "epoch": 1.97, "learning_rate": 0.00011864476725997288, "loss": 3.5337, "step": 6397 }, { "epoch": 1.97, "learning_rate": 0.00011860913820073563, "loss": 3.5572, "step": 6398 }, { "epoch": 1.97, "learning_rate": 0.00011857350914149842, "loss": 3.4327, "step": 6399 }, { "epoch": 1.98, "learning_rate": 0.00011853788008226118, "loss": 3.3467, "step": 6400 }, { "epoch": 1.98, "learning_rate": 0.00011850225102302397, "loss": 3.4823, "step": 6401 }, { "epoch": 1.98, "learning_rate": 0.00011846662196378672, "loss": 3.6176, "step": 6402 }, { "epoch": 1.98, "learning_rate": 0.00011843099290454949, "loss": 3.5742, "step": 6403 }, { "epoch": 1.98, "learning_rate": 0.00011839536384531227, "loss": 3.5294, "step": 6404 }, { "epoch": 1.98, "learning_rate": 0.00011835973478607504, "loss": 3.5319, "step": 6405 }, { "epoch": 1.98, "learning_rate": 0.0001183241057268378, "loss": 3.3953, "step": 6406 }, { "epoch": 1.98, "learning_rate": 0.00011828847666760058, "loss": 3.4968, "step": 6407 }, { "epoch": 1.98, "learning_rate": 0.00011825284760836334, "loss": 3.5302, "step": 6408 }, { "epoch": 1.98, "learning_rate": 0.00011821721854912613, "loss": 3.1863, "step": 6409 }, { "epoch": 1.98, "learning_rate": 0.0001181815894898889, "loss": 3.2545, "step": 6410 }, { "epoch": 1.98, "learning_rate": 0.00011814596043065166, "loss": 3.1285, "step": 6411 }, { "epoch": 1.98, "learning_rate": 0.00011811033137141443, "loss": 3.1186, "step": 6412 }, { "epoch": 1.98, "learning_rate": 0.00011807470231217721, "loss": 3.2511, "step": 6413 }, { "epoch": 1.98, "learning_rate": 0.00011803907325293998, "loss": 3.1158, "step": 6414 }, { "epoch": 1.98, "learning_rate": 0.00011800344419370274, "loss": 3.0221, "step": 6415 }, { "epoch": 1.98, "learning_rate": 0.00011796781513446552, "loss": 2.826, "step": 6416 }, { "epoch": 1.98, "learning_rate": 0.00011793218607522829, "loss": 3.192, "step": 6417 }, { "epoch": 1.98, "learning_rate": 0.00011789655701599107, "loss": 3.1183, "step": 6418 }, { "epoch": 1.98, "learning_rate": 0.00011786092795675382, "loss": 2.9161, "step": 6419 }, { "epoch": 1.98, "learning_rate": 0.00011782529889751659, "loss": 3.0305, "step": 6420 }, { "epoch": 1.98, "learning_rate": 0.00011778966983827937, "loss": 2.715, "step": 6421 }, { "epoch": 1.98, "learning_rate": 0.00011775404077904216, "loss": 2.791, "step": 6422 }, { "epoch": 1.98, "learning_rate": 0.00011771841171980491, "loss": 2.5531, "step": 6423 }, { "epoch": 1.98, "learning_rate": 0.00011768278266056768, "loss": 2.5369, "step": 6424 }, { "epoch": 1.98, "learning_rate": 0.00011764715360133046, "loss": 2.7432, "step": 6425 }, { "epoch": 1.98, "learning_rate": 0.00011761152454209323, "loss": 2.7252, "step": 6426 }, { "epoch": 1.98, "learning_rate": 0.00011757589548285601, "loss": 2.3303, "step": 6427 }, { "epoch": 1.98, "learning_rate": 0.00011754026642361877, "loss": 2.6895, "step": 6428 }, { "epoch": 1.98, "learning_rate": 0.00011750463736438153, "loss": 2.479, "step": 6429 }, { "epoch": 1.98, "learning_rate": 0.00011746900830514432, "loss": 2.4087, "step": 6430 }, { "epoch": 1.98, "learning_rate": 0.00011743337924590708, "loss": 2.558, "step": 6431 }, { "epoch": 1.99, "learning_rate": 0.00011739775018666985, "loss": 2.2772, "step": 6432 }, { "epoch": 1.99, "learning_rate": 0.00011736212112743262, "loss": 2.5903, "step": 6433 }, { "epoch": 1.99, "learning_rate": 0.0001173264920681954, "loss": 2.1338, "step": 6434 }, { "epoch": 1.99, "learning_rate": 0.00011729086300895817, "loss": 1.991, "step": 6435 }, { "epoch": 1.99, "learning_rate": 0.00011725523394972093, "loss": 2.0097, "step": 6436 }, { "epoch": 1.99, "learning_rate": 0.00011721960489048371, "loss": 1.8478, "step": 6437 }, { "epoch": 1.99, "learning_rate": 0.00011718397583124648, "loss": 1.8268, "step": 6438 }, { "epoch": 1.99, "learning_rate": 0.00011714834677200926, "loss": 1.7238, "step": 6439 }, { "epoch": 1.99, "learning_rate": 0.00011711271771277201, "loss": 1.6445, "step": 6440 }, { "epoch": 1.99, "learning_rate": 0.00011707708865353478, "loss": 4.2796, "step": 6441 }, { "epoch": 1.99, "learning_rate": 0.00011704145959429756, "loss": 4.29, "step": 6442 }, { "epoch": 1.99, "learning_rate": 0.00011700583053506033, "loss": 3.6304, "step": 6443 }, { "epoch": 1.99, "learning_rate": 0.00011697020147582311, "loss": 3.9167, "step": 6444 }, { "epoch": 1.99, "learning_rate": 0.00011693457241658587, "loss": 3.799, "step": 6445 }, { "epoch": 1.99, "learning_rate": 0.00011689894335734865, "loss": 3.4665, "step": 6446 }, { "epoch": 1.99, "learning_rate": 0.00011686331429811142, "loss": 3.2171, "step": 6447 }, { "epoch": 1.99, "learning_rate": 0.0001168276852388742, "loss": 3.6258, "step": 6448 }, { "epoch": 1.99, "learning_rate": 0.00011679205617963695, "loss": 3.6364, "step": 6449 }, { "epoch": 1.99, "learning_rate": 0.00011675642712039972, "loss": 3.463, "step": 6450 }, { "epoch": 1.99, "learning_rate": 0.0001167207980611625, "loss": 3.452, "step": 6451 }, { "epoch": 1.99, "learning_rate": 0.00011668516900192527, "loss": 3.1833, "step": 6452 }, { "epoch": 1.99, "learning_rate": 0.00011664953994268803, "loss": 3.4622, "step": 6453 }, { "epoch": 1.99, "learning_rate": 0.00011661391088345081, "loss": 3.4034, "step": 6454 }, { "epoch": 1.99, "learning_rate": 0.00011657828182421358, "loss": 3.5632, "step": 6455 }, { "epoch": 1.99, "learning_rate": 0.00011654265276497636, "loss": 3.2367, "step": 6456 }, { "epoch": 1.99, "learning_rate": 0.00011650702370573911, "loss": 3.208, "step": 6457 }, { "epoch": 1.99, "learning_rate": 0.0001164713946465019, "loss": 3.3104, "step": 6458 }, { "epoch": 1.99, "learning_rate": 0.00011643576558726466, "loss": 3.1702, "step": 6459 }, { "epoch": 1.99, "learning_rate": 0.00011640013652802745, "loss": 2.971, "step": 6460 }, { "epoch": 1.99, "learning_rate": 0.00011636450746879021, "loss": 3.0789, "step": 6461 }, { "epoch": 1.99, "learning_rate": 0.00011632887840955297, "loss": 3.0441, "step": 6462 }, { "epoch": 1.99, "learning_rate": 0.00011629324935031575, "loss": 2.9161, "step": 6463 }, { "epoch": 1.99, "learning_rate": 0.00011625762029107852, "loss": 3.1718, "step": 6464 }, { "epoch": 2.0, "learning_rate": 0.0001162219912318413, "loss": 2.9247, "step": 6465 }, { "epoch": 2.0, "learning_rate": 0.00011618636217260406, "loss": 3.0687, "step": 6466 }, { "epoch": 2.0, "learning_rate": 0.00011615073311336684, "loss": 2.7461, "step": 6467 }, { "epoch": 2.0, "learning_rate": 0.0001161151040541296, "loss": 2.7759, "step": 6468 }, { "epoch": 2.0, "learning_rate": 0.00011607947499489239, "loss": 2.814, "step": 6469 }, { "epoch": 2.0, "learning_rate": 0.00011604384593565514, "loss": 2.5327, "step": 6470 }, { "epoch": 2.0, "learning_rate": 0.00011600821687641791, "loss": 2.4799, "step": 6471 }, { "epoch": 2.0, "learning_rate": 0.0001159725878171807, "loss": 2.3533, "step": 6472 }, { "epoch": 2.0, "learning_rate": 0.00011593695875794346, "loss": 2.6088, "step": 6473 }, { "epoch": 2.0, "learning_rate": 0.00011590132969870624, "loss": 2.453, "step": 6474 }, { "epoch": 2.0, "learning_rate": 0.000115865700639469, "loss": 2.2397, "step": 6475 }, { "epoch": 2.0, "learning_rate": 0.00011583007158023177, "loss": 1.9031, "step": 6476 }, { "epoch": 2.0, "learning_rate": 0.00011579444252099455, "loss": 1.9986, "step": 6477 }, { "epoch": 2.0, "learning_rate": 0.00011575881346175732, "loss": 2.0547, "step": 6478 }, { "epoch": 2.0, "learning_rate": 0.00011572318440252009, "loss": 1.6775, "step": 6479 }, { "epoch": 2.0, "learning_rate": 0.00011568755534328285, "loss": 1.6999, "step": 6480 }, { "epoch": 2.0, "learning_rate": 0.00011565192628404564, "loss": 4.3112, "step": 6481 }, { "epoch": 2.0, "learning_rate": 0.0001156162972248084, "loss": 3.8602, "step": 6482 }, { "epoch": 2.0, "learning_rate": 0.00011558066816557116, "loss": 3.4041, "step": 6483 }, { "epoch": 2.0, "learning_rate": 0.00011554503910633394, "loss": 3.5769, "step": 6484 }, { "epoch": 2.0, "learning_rate": 0.00011550941004709671, "loss": 3.4436, "step": 6485 }, { "epoch": 2.0, "learning_rate": 0.00011547378098785949, "loss": 3.1102, "step": 6486 }, { "epoch": 2.0, "learning_rate": 0.00011543815192862225, "loss": 3.1508, "step": 6487 }, { "epoch": 2.0, "learning_rate": 0.00011540252286938501, "loss": 3.0366, "step": 6488 }, { "epoch": 2.0, "learning_rate": 0.0001153668938101478, "loss": 3.2328, "step": 6489 }, { "epoch": 2.0, "learning_rate": 0.00011533126475091056, "loss": 2.8792, "step": 6490 }, { "epoch": 2.0, "learning_rate": 0.00011529563569167335, "loss": 2.9948, "step": 6491 }, { "epoch": 2.0, "learning_rate": 0.0001152600066324361, "loss": 3.0081, "step": 6492 }, { "epoch": 2.0, "learning_rate": 0.00011522437757319888, "loss": 3.0525, "step": 6493 }, { "epoch": 2.0, "learning_rate": 0.00011518874851396165, "loss": 2.9947, "step": 6494 }, { "epoch": 2.0, "learning_rate": 0.00011515311945472443, "loss": 2.9529, "step": 6495 }, { "epoch": 2.0, "learning_rate": 0.00011511749039548719, "loss": 3.1256, "step": 6496 }, { "epoch": 2.01, "learning_rate": 0.00011508186133624996, "loss": 3.0644, "step": 6497 }, { "epoch": 2.01, "learning_rate": 0.00011504623227701274, "loss": 3.0221, "step": 6498 }, { "epoch": 2.01, "learning_rate": 0.0001150106032177755, "loss": 3.0206, "step": 6499 }, { "epoch": 2.01, "learning_rate": 0.00011497497415853826, "loss": 2.9148, "step": 6500 }, { "epoch": 2.01, "eval_bleu": 3.042772821821628e-14, "eval_loss": 4.028158187866211, "eval_runtime": 2574.1053, "eval_samples_per_second": 5.734, "eval_steps_per_second": 0.717, "step": 6500 }, { "epoch": 2.01, "learning_rate": 0.00011493934509930104, "loss": 2.8993, "step": 6501 }, { "epoch": 2.01, "learning_rate": 0.00011490371604006381, "loss": 3.095, "step": 6502 }, { "epoch": 2.01, "learning_rate": 0.00011486808698082659, "loss": 2.9025, "step": 6503 }, { "epoch": 2.01, "learning_rate": 0.00011483245792158935, "loss": 2.8447, "step": 6504 }, { "epoch": 2.01, "learning_rate": 0.00011479682886235213, "loss": 2.7538, "step": 6505 }, { "epoch": 2.01, "learning_rate": 0.0001147611998031149, "loss": 2.903, "step": 6506 }, { "epoch": 2.01, "learning_rate": 0.00011472557074387768, "loss": 2.5932, "step": 6507 }, { "epoch": 2.01, "learning_rate": 0.00011468994168464045, "loss": 2.5023, "step": 6508 }, { "epoch": 2.01, "learning_rate": 0.0001146543126254032, "loss": 2.524, "step": 6509 }, { "epoch": 2.01, "learning_rate": 0.00011461868356616598, "loss": 2.6866, "step": 6510 }, { "epoch": 2.01, "learning_rate": 0.00011458305450692875, "loss": 2.3969, "step": 6511 }, { "epoch": 2.01, "learning_rate": 0.00011454742544769153, "loss": 2.386, "step": 6512 }, { "epoch": 2.01, "learning_rate": 0.00011451179638845429, "loss": 2.5887, "step": 6513 }, { "epoch": 2.01, "learning_rate": 0.00011447616732921707, "loss": 2.1673, "step": 6514 }, { "epoch": 2.01, "learning_rate": 0.00011444053826997984, "loss": 2.2874, "step": 6515 }, { "epoch": 2.01, "learning_rate": 0.00011440490921074262, "loss": 2.2893, "step": 6516 }, { "epoch": 2.01, "learning_rate": 0.00011436928015150538, "loss": 2.3741, "step": 6517 }, { "epoch": 2.01, "learning_rate": 0.00011433365109226814, "loss": 2.1559, "step": 6518 }, { "epoch": 2.01, "learning_rate": 0.00011429802203303093, "loss": 2.0367, "step": 6519 }, { "epoch": 2.01, "learning_rate": 0.0001142623929737937, "loss": 2.0505, "step": 6520 }, { "epoch": 2.01, "learning_rate": 0.00011422676391455645, "loss": 1.7658, "step": 6521 }, { "epoch": 2.01, "learning_rate": 0.00011419113485531923, "loss": 1.963, "step": 6522 }, { "epoch": 2.01, "learning_rate": 0.000114155505796082, "loss": 2.0053, "step": 6523 }, { "epoch": 2.01, "learning_rate": 0.00011411987673684478, "loss": 1.7896, "step": 6524 }, { "epoch": 2.01, "learning_rate": 0.00011408424767760755, "loss": 1.8566, "step": 6525 }, { "epoch": 2.01, "learning_rate": 0.00011404861861837032, "loss": 1.6627, "step": 6526 }, { "epoch": 2.01, "learning_rate": 0.00011401298955913309, "loss": 1.5814, "step": 6527 }, { "epoch": 2.01, "learning_rate": 0.00011397736049989587, "loss": 1.5535, "step": 6528 }, { "epoch": 2.02, "learning_rate": 0.00011394173144065864, "loss": 1.5668, "step": 6529 }, { "epoch": 2.02, "learning_rate": 0.00011390610238142139, "loss": 1.5186, "step": 6530 }, { "epoch": 2.02, "learning_rate": 0.00011387047332218417, "loss": 4.0115, "step": 6531 }, { "epoch": 2.02, "learning_rate": 0.00011383484426294694, "loss": 3.9812, "step": 6532 }, { "epoch": 2.02, "learning_rate": 0.00011379921520370972, "loss": 3.6604, "step": 6533 }, { "epoch": 2.02, "learning_rate": 0.00011376358614447248, "loss": 3.5509, "step": 6534 }, { "epoch": 2.02, "learning_rate": 0.00011372795708523525, "loss": 3.4303, "step": 6535 }, { "epoch": 2.02, "learning_rate": 0.00011369232802599803, "loss": 3.4668, "step": 6536 }, { "epoch": 2.02, "learning_rate": 0.0001136566989667608, "loss": 3.3841, "step": 6537 }, { "epoch": 2.02, "learning_rate": 0.00011362106990752357, "loss": 3.0714, "step": 6538 }, { "epoch": 2.02, "learning_rate": 0.00011358544084828633, "loss": 3.2686, "step": 6539 }, { "epoch": 2.02, "learning_rate": 0.00011354981178904912, "loss": 3.3071, "step": 6540 }, { "epoch": 2.02, "learning_rate": 0.00011351418272981188, "loss": 3.1565, "step": 6541 }, { "epoch": 2.02, "learning_rate": 0.00011347855367057467, "loss": 3.0182, "step": 6542 }, { "epoch": 2.02, "learning_rate": 0.00011344292461133742, "loss": 3.2453, "step": 6543 }, { "epoch": 2.02, "learning_rate": 0.00011340729555210019, "loss": 2.9619, "step": 6544 }, { "epoch": 2.02, "learning_rate": 0.00011337166649286297, "loss": 3.0996, "step": 6545 }, { "epoch": 2.02, "learning_rate": 0.00011333603743362574, "loss": 2.9627, "step": 6546 }, { "epoch": 2.02, "learning_rate": 0.0001133004083743885, "loss": 2.9814, "step": 6547 }, { "epoch": 2.02, "learning_rate": 0.00011326477931515128, "loss": 3.0741, "step": 6548 }, { "epoch": 2.02, "learning_rate": 0.00011322915025591404, "loss": 2.9039, "step": 6549 }, { "epoch": 2.02, "learning_rate": 0.00011319352119667683, "loss": 2.9234, "step": 6550 }, { "epoch": 2.02, "learning_rate": 0.00011315789213743958, "loss": 2.7293, "step": 6551 }, { "epoch": 2.02, "learning_rate": 0.00011312226307820236, "loss": 2.8402, "step": 6552 }, { "epoch": 2.02, "learning_rate": 0.00011308663401896513, "loss": 2.7189, "step": 6553 }, { "epoch": 2.02, "learning_rate": 0.00011305100495972791, "loss": 2.723, "step": 6554 }, { "epoch": 2.02, "learning_rate": 0.00011301537590049067, "loss": 2.468, "step": 6555 }, { "epoch": 2.02, "learning_rate": 0.00011297974684125344, "loss": 2.6912, "step": 6556 }, { "epoch": 2.02, "learning_rate": 0.00011294411778201622, "loss": 2.645, "step": 6557 }, { "epoch": 2.02, "learning_rate": 0.00011290848872277899, "loss": 2.475, "step": 6558 }, { "epoch": 2.02, "learning_rate": 0.00011287285966354177, "loss": 2.5526, "step": 6559 }, { "epoch": 2.02, "learning_rate": 0.00011283723060430452, "loss": 2.6966, "step": 6560 }, { "epoch": 2.02, "learning_rate": 0.0001128016015450673, "loss": 2.4613, "step": 6561 }, { "epoch": 2.03, "learning_rate": 0.00011276597248583007, "loss": 2.5327, "step": 6562 }, { "epoch": 2.03, "learning_rate": 0.00011273034342659285, "loss": 2.4223, "step": 6563 }, { "epoch": 2.03, "learning_rate": 0.00011269471436735561, "loss": 2.2428, "step": 6564 }, { "epoch": 2.03, "learning_rate": 0.00011265908530811838, "loss": 2.5619, "step": 6565 }, { "epoch": 2.03, "learning_rate": 0.00011262345624888116, "loss": 2.1988, "step": 6566 }, { "epoch": 2.03, "learning_rate": 0.00011258782718964393, "loss": 2.1883, "step": 6567 }, { "epoch": 2.03, "learning_rate": 0.00011255219813040668, "loss": 2.0917, "step": 6568 }, { "epoch": 2.03, "learning_rate": 0.00011251656907116946, "loss": 2.0602, "step": 6569 }, { "epoch": 2.03, "learning_rate": 0.00011248094001193223, "loss": 2.2042, "step": 6570 }, { "epoch": 2.03, "learning_rate": 0.00011244531095269501, "loss": 1.9537, "step": 6571 }, { "epoch": 2.03, "learning_rate": 0.00011240968189345778, "loss": 1.8875, "step": 6572 }, { "epoch": 2.03, "learning_rate": 0.00011237405283422055, "loss": 1.9476, "step": 6573 }, { "epoch": 2.03, "learning_rate": 0.00011233842377498332, "loss": 1.7358, "step": 6574 }, { "epoch": 2.03, "learning_rate": 0.0001123027947157461, "loss": 1.8412, "step": 6575 }, { "epoch": 2.03, "learning_rate": 0.00011226716565650887, "loss": 1.7834, "step": 6576 }, { "epoch": 2.03, "learning_rate": 0.00011223153659727162, "loss": 1.7059, "step": 6577 }, { "epoch": 2.03, "learning_rate": 0.0001121959075380344, "loss": 1.6407, "step": 6578 }, { "epoch": 2.03, "learning_rate": 0.00011216027847879717, "loss": 1.3587, "step": 6579 }, { "epoch": 2.03, "learning_rate": 0.00011212464941955996, "loss": 1.4483, "step": 6580 }, { "epoch": 2.03, "learning_rate": 0.00011208902036032271, "loss": 4.1891, "step": 6581 }, { "epoch": 2.03, "learning_rate": 0.00011205339130108548, "loss": 3.6822, "step": 6582 }, { "epoch": 2.03, "learning_rate": 0.00011201776224184826, "loss": 3.8089, "step": 6583 }, { "epoch": 2.03, "learning_rate": 0.00011198213318261103, "loss": 3.3374, "step": 6584 }, { "epoch": 2.03, "learning_rate": 0.0001119465041233738, "loss": 3.2984, "step": 6585 }, { "epoch": 2.03, "learning_rate": 0.00011191087506413657, "loss": 3.1327, "step": 6586 }, { "epoch": 2.03, "learning_rate": 0.00011187524600489935, "loss": 3.3811, "step": 6587 }, { "epoch": 2.03, "learning_rate": 0.00011183961694566212, "loss": 3.2754, "step": 6588 }, { "epoch": 2.03, "learning_rate": 0.0001118039878864249, "loss": 3.4036, "step": 6589 }, { "epoch": 2.03, "learning_rate": 0.00011176835882718765, "loss": 3.2466, "step": 6590 }, { "epoch": 2.03, "learning_rate": 0.00011173272976795042, "loss": 3.0652, "step": 6591 }, { "epoch": 2.03, "learning_rate": 0.0001116971007087132, "loss": 2.925, "step": 6592 }, { "epoch": 2.03, "learning_rate": 0.00011166147164947597, "loss": 2.8919, "step": 6593 }, { "epoch": 2.04, "learning_rate": 0.00011162584259023873, "loss": 3.1966, "step": 6594 }, { "epoch": 2.04, "learning_rate": 0.00011159021353100151, "loss": 3.0383, "step": 6595 }, { "epoch": 2.04, "learning_rate": 0.00011155458447176429, "loss": 3.1049, "step": 6596 }, { "epoch": 2.04, "learning_rate": 0.00011151895541252706, "loss": 2.8841, "step": 6597 }, { "epoch": 2.04, "learning_rate": 0.00011148332635328981, "loss": 3.1001, "step": 6598 }, { "epoch": 2.04, "learning_rate": 0.0001114476972940526, "loss": 2.8866, "step": 6599 }, { "epoch": 2.04, "learning_rate": 0.00011141206823481536, "loss": 3.0198, "step": 6600 }, { "epoch": 2.04, "learning_rate": 0.00011137643917557815, "loss": 2.833, "step": 6601 }, { "epoch": 2.04, "learning_rate": 0.0001113408101163409, "loss": 2.7192, "step": 6602 }, { "epoch": 2.04, "learning_rate": 0.00011130518105710367, "loss": 2.8455, "step": 6603 }, { "epoch": 2.04, "learning_rate": 0.00011126955199786645, "loss": 2.5502, "step": 6604 }, { "epoch": 2.04, "learning_rate": 0.00011123392293862922, "loss": 2.9362, "step": 6605 }, { "epoch": 2.04, "learning_rate": 0.000111198293879392, "loss": 2.7703, "step": 6606 }, { "epoch": 2.04, "learning_rate": 0.00011116266482015476, "loss": 2.7916, "step": 6607 }, { "epoch": 2.04, "learning_rate": 0.00011112703576091754, "loss": 2.6947, "step": 6608 }, { "epoch": 2.04, "learning_rate": 0.0001110914067016803, "loss": 2.6207, "step": 6609 }, { "epoch": 2.04, "learning_rate": 0.00011105577764244309, "loss": 2.5449, "step": 6610 }, { "epoch": 2.04, "learning_rate": 0.00011102014858320584, "loss": 2.3841, "step": 6611 }, { "epoch": 2.04, "learning_rate": 0.00011098451952396861, "loss": 2.5461, "step": 6612 }, { "epoch": 2.04, "learning_rate": 0.00011094889046473139, "loss": 2.5012, "step": 6613 }, { "epoch": 2.04, "learning_rate": 0.00011091326140549416, "loss": 2.2376, "step": 6614 }, { "epoch": 2.04, "learning_rate": 0.00011087763234625692, "loss": 2.3564, "step": 6615 }, { "epoch": 2.04, "learning_rate": 0.0001108420032870197, "loss": 2.3199, "step": 6616 }, { "epoch": 2.04, "learning_rate": 0.00011080637422778247, "loss": 2.1498, "step": 6617 }, { "epoch": 2.04, "learning_rate": 0.00011077074516854525, "loss": 2.2694, "step": 6618 }, { "epoch": 2.04, "learning_rate": 0.000110735116109308, "loss": 2.1036, "step": 6619 }, { "epoch": 2.04, "learning_rate": 0.00011069948705007078, "loss": 1.9732, "step": 6620 }, { "epoch": 2.04, "learning_rate": 0.00011066385799083355, "loss": 2.1049, "step": 6621 }, { "epoch": 2.04, "learning_rate": 0.00011062822893159633, "loss": 1.8685, "step": 6622 }, { "epoch": 2.04, "learning_rate": 0.0001105925998723591, "loss": 1.585, "step": 6623 }, { "epoch": 2.04, "learning_rate": 0.00011055697081312186, "loss": 1.9859, "step": 6624 }, { "epoch": 2.04, "learning_rate": 0.00011052134175388464, "loss": 1.7531, "step": 6625 }, { "epoch": 2.05, "learning_rate": 0.00011048571269464741, "loss": 1.5564, "step": 6626 }, { "epoch": 2.05, "learning_rate": 0.00011045008363541019, "loss": 1.5823, "step": 6627 }, { "epoch": 2.05, "learning_rate": 0.00011041445457617294, "loss": 1.4735, "step": 6628 }, { "epoch": 2.05, "learning_rate": 0.00011037882551693571, "loss": 1.5498, "step": 6629 }, { "epoch": 2.05, "learning_rate": 0.0001103431964576985, "loss": 1.4144, "step": 6630 }, { "epoch": 2.05, "learning_rate": 0.00011030756739846126, "loss": 3.9211, "step": 6631 }, { "epoch": 2.05, "learning_rate": 0.00011027193833922403, "loss": 4.0421, "step": 6632 }, { "epoch": 2.05, "learning_rate": 0.0001102363092799868, "loss": 3.4149, "step": 6633 }, { "epoch": 2.05, "learning_rate": 0.00011020068022074958, "loss": 3.46, "step": 6634 }, { "epoch": 2.05, "learning_rate": 0.00011016505116151235, "loss": 3.4306, "step": 6635 }, { "epoch": 2.05, "learning_rate": 0.0001101294221022751, "loss": 3.3964, "step": 6636 }, { "epoch": 2.05, "learning_rate": 0.00011009379304303789, "loss": 3.133, "step": 6637 }, { "epoch": 2.05, "learning_rate": 0.00011005816398380066, "loss": 3.4635, "step": 6638 }, { "epoch": 2.05, "learning_rate": 0.00011002253492456344, "loss": 3.2331, "step": 6639 }, { "epoch": 2.05, "learning_rate": 0.0001099869058653262, "loss": 3.042, "step": 6640 }, { "epoch": 2.05, "learning_rate": 0.00010995127680608897, "loss": 2.8603, "step": 6641 }, { "epoch": 2.05, "learning_rate": 0.00010991564774685174, "loss": 3.3571, "step": 6642 }, { "epoch": 2.05, "learning_rate": 0.00010988001868761452, "loss": 3.0635, "step": 6643 }, { "epoch": 2.05, "learning_rate": 0.00010984438962837729, "loss": 2.9387, "step": 6644 }, { "epoch": 2.05, "learning_rate": 0.00010980876056914005, "loss": 3.0088, "step": 6645 }, { "epoch": 2.05, "learning_rate": 0.00010977313150990283, "loss": 3.2358, "step": 6646 }, { "epoch": 2.05, "learning_rate": 0.0001097375024506656, "loss": 2.7372, "step": 6647 }, { "epoch": 2.05, "learning_rate": 0.00010970187339142838, "loss": 2.8706, "step": 6648 }, { "epoch": 2.05, "learning_rate": 0.00010966624433219113, "loss": 2.9393, "step": 6649 }, { "epoch": 2.05, "learning_rate": 0.0001096306152729539, "loss": 2.9428, "step": 6650 }, { "epoch": 2.05, "learning_rate": 0.00010959498621371668, "loss": 2.706, "step": 6651 }, { "epoch": 2.05, "learning_rate": 0.00010955935715447945, "loss": 2.7872, "step": 6652 }, { "epoch": 2.05, "learning_rate": 0.00010952372809524222, "loss": 2.7953, "step": 6653 }, { "epoch": 2.05, "learning_rate": 0.00010948809903600499, "loss": 2.7283, "step": 6654 }, { "epoch": 2.05, "learning_rate": 0.00010945246997676777, "loss": 2.9031, "step": 6655 }, { "epoch": 2.05, "learning_rate": 0.00010941684091753054, "loss": 2.3934, "step": 6656 }, { "epoch": 2.05, "learning_rate": 0.00010938121185829332, "loss": 2.6522, "step": 6657 }, { "epoch": 2.05, "learning_rate": 0.00010934558279905608, "loss": 2.4981, "step": 6658 }, { "epoch": 2.06, "learning_rate": 0.00010930995373981884, "loss": 2.6997, "step": 6659 }, { "epoch": 2.06, "learning_rate": 0.00010927432468058163, "loss": 2.5212, "step": 6660 }, { "epoch": 2.06, "learning_rate": 0.0001092386956213444, "loss": 2.5728, "step": 6661 }, { "epoch": 2.06, "learning_rate": 0.00010920306656210715, "loss": 2.2189, "step": 6662 }, { "epoch": 2.06, "learning_rate": 0.00010916743750286993, "loss": 2.3778, "step": 6663 }, { "epoch": 2.06, "learning_rate": 0.0001091318084436327, "loss": 2.2217, "step": 6664 }, { "epoch": 2.06, "learning_rate": 0.00010909617938439548, "loss": 2.3179, "step": 6665 }, { "epoch": 2.06, "learning_rate": 0.00010906055032515824, "loss": 2.2392, "step": 6666 }, { "epoch": 2.06, "learning_rate": 0.00010902492126592102, "loss": 2.0508, "step": 6667 }, { "epoch": 2.06, "learning_rate": 0.00010898929220668379, "loss": 2.1421, "step": 6668 }, { "epoch": 2.06, "learning_rate": 0.00010895366314744657, "loss": 1.9896, "step": 6669 }, { "epoch": 2.06, "learning_rate": 0.00010891803408820932, "loss": 2.2937, "step": 6670 }, { "epoch": 2.06, "learning_rate": 0.00010888240502897209, "loss": 2.0295, "step": 6671 }, { "epoch": 2.06, "learning_rate": 0.00010884677596973487, "loss": 1.9067, "step": 6672 }, { "epoch": 2.06, "learning_rate": 0.00010881114691049764, "loss": 1.6967, "step": 6673 }, { "epoch": 2.06, "learning_rate": 0.00010877551785126042, "loss": 1.7408, "step": 6674 }, { "epoch": 2.06, "learning_rate": 0.00010873988879202318, "loss": 1.7335, "step": 6675 }, { "epoch": 2.06, "learning_rate": 0.00010870425973278595, "loss": 1.4996, "step": 6676 }, { "epoch": 2.06, "learning_rate": 0.00010866863067354873, "loss": 1.693, "step": 6677 }, { "epoch": 2.06, "learning_rate": 0.0001086330016143115, "loss": 1.4793, "step": 6678 }, { "epoch": 2.06, "learning_rate": 0.00010859737255507426, "loss": 1.5471, "step": 6679 }, { "epoch": 2.06, "learning_rate": 0.00010856174349583703, "loss": 1.4735, "step": 6680 }, { "epoch": 2.06, "learning_rate": 0.00010852611443659981, "loss": 4.2272, "step": 6681 }, { "epoch": 2.06, "learning_rate": 0.00010849048537736258, "loss": 3.7386, "step": 6682 }, { "epoch": 2.06, "learning_rate": 0.00010845485631812534, "loss": 3.7507, "step": 6683 }, { "epoch": 2.06, "learning_rate": 0.00010841922725888812, "loss": 3.4383, "step": 6684 }, { "epoch": 2.06, "learning_rate": 0.00010838359819965089, "loss": 3.4366, "step": 6685 }, { "epoch": 2.06, "learning_rate": 0.00010834796914041367, "loss": 3.2472, "step": 6686 }, { "epoch": 2.06, "learning_rate": 0.00010831234008117644, "loss": 3.3389, "step": 6687 }, { "epoch": 2.06, "learning_rate": 0.0001082767110219392, "loss": 3.3233, "step": 6688 }, { "epoch": 2.06, "learning_rate": 0.00010824108196270198, "loss": 3.594, "step": 6689 }, { "epoch": 2.06, "learning_rate": 0.00010820545290346476, "loss": 3.1586, "step": 6690 }, { "epoch": 2.07, "learning_rate": 0.00010816982384422753, "loss": 3.1905, "step": 6691 }, { "epoch": 2.07, "learning_rate": 0.00010813419478499028, "loss": 3.1702, "step": 6692 }, { "epoch": 2.07, "learning_rate": 0.00010809856572575306, "loss": 3.0376, "step": 6693 }, { "epoch": 2.07, "learning_rate": 0.00010806293666651583, "loss": 3.3847, "step": 6694 }, { "epoch": 2.07, "learning_rate": 0.00010802730760727861, "loss": 3.0483, "step": 6695 }, { "epoch": 2.07, "learning_rate": 0.00010799167854804137, "loss": 3.0372, "step": 6696 }, { "epoch": 2.07, "learning_rate": 0.00010795604948880414, "loss": 2.9742, "step": 6697 }, { "epoch": 2.07, "learning_rate": 0.00010792042042956692, "loss": 2.8438, "step": 6698 }, { "epoch": 2.07, "learning_rate": 0.00010788479137032969, "loss": 2.839, "step": 6699 }, { "epoch": 2.07, "learning_rate": 0.00010784916231109245, "loss": 2.82, "step": 6700 }, { "epoch": 2.07, "learning_rate": 0.00010781353325185522, "loss": 2.8822, "step": 6701 }, { "epoch": 2.07, "learning_rate": 0.000107777904192618, "loss": 2.9541, "step": 6702 }, { "epoch": 2.07, "learning_rate": 0.00010774227513338077, "loss": 2.8304, "step": 6703 }, { "epoch": 2.07, "learning_rate": 0.00010770664607414355, "loss": 2.699, "step": 6704 }, { "epoch": 2.07, "learning_rate": 0.00010767101701490631, "loss": 2.6638, "step": 6705 }, { "epoch": 2.07, "learning_rate": 0.00010763538795566908, "loss": 2.8597, "step": 6706 }, { "epoch": 2.07, "learning_rate": 0.00010759975889643186, "loss": 2.6487, "step": 6707 }, { "epoch": 2.07, "learning_rate": 0.00010756412983719463, "loss": 2.6656, "step": 6708 }, { "epoch": 2.07, "learning_rate": 0.00010752850077795738, "loss": 2.4464, "step": 6709 }, { "epoch": 2.07, "learning_rate": 0.00010749287171872016, "loss": 2.5298, "step": 6710 }, { "epoch": 2.07, "learning_rate": 0.00010745724265948293, "loss": 2.2329, "step": 6711 }, { "epoch": 2.07, "learning_rate": 0.00010742161360024571, "loss": 2.6153, "step": 6712 }, { "epoch": 2.07, "learning_rate": 0.00010738598454100847, "loss": 2.3679, "step": 6713 }, { "epoch": 2.07, "learning_rate": 0.00010735035548177125, "loss": 2.3106, "step": 6714 }, { "epoch": 2.07, "learning_rate": 0.00010731472642253402, "loss": 2.3148, "step": 6715 }, { "epoch": 2.07, "learning_rate": 0.0001072790973632968, "loss": 2.2709, "step": 6716 }, { "epoch": 2.07, "learning_rate": 0.00010724346830405956, "loss": 2.0408, "step": 6717 }, { "epoch": 2.07, "learning_rate": 0.00010720783924482232, "loss": 2.1912, "step": 6718 }, { "epoch": 2.07, "learning_rate": 0.0001071722101855851, "loss": 2.255, "step": 6719 }, { "epoch": 2.07, "learning_rate": 0.00010713658112634787, "loss": 2.0005, "step": 6720 }, { "epoch": 2.07, "learning_rate": 0.00010710095206711066, "loss": 2.1383, "step": 6721 }, { "epoch": 2.07, "learning_rate": 0.00010706532300787341, "loss": 1.9746, "step": 6722 }, { "epoch": 2.07, "learning_rate": 0.00010702969394863618, "loss": 1.8708, "step": 6723 }, { "epoch": 2.08, "learning_rate": 0.00010699406488939896, "loss": 2.0502, "step": 6724 }, { "epoch": 2.08, "learning_rate": 0.00010695843583016174, "loss": 1.8066, "step": 6725 }, { "epoch": 2.08, "learning_rate": 0.0001069228067709245, "loss": 1.7719, "step": 6726 }, { "epoch": 2.08, "learning_rate": 0.00010688717771168727, "loss": 1.6049, "step": 6727 }, { "epoch": 2.08, "learning_rate": 0.00010685154865245005, "loss": 1.676, "step": 6728 }, { "epoch": 2.08, "learning_rate": 0.00010681591959321282, "loss": 1.653, "step": 6729 }, { "epoch": 2.08, "learning_rate": 0.00010678029053397557, "loss": 1.6481, "step": 6730 }, { "epoch": 2.08, "learning_rate": 0.00010674466147473835, "loss": 4.2176, "step": 6731 }, { "epoch": 2.08, "learning_rate": 0.00010670903241550112, "loss": 3.9395, "step": 6732 }, { "epoch": 2.08, "learning_rate": 0.0001066734033562639, "loss": 3.414, "step": 6733 }, { "epoch": 2.08, "learning_rate": 0.00010663777429702666, "loss": 3.3841, "step": 6734 }, { "epoch": 2.08, "learning_rate": 0.00010660214523778944, "loss": 3.3388, "step": 6735 }, { "epoch": 2.08, "learning_rate": 0.00010656651617855221, "loss": 3.1991, "step": 6736 }, { "epoch": 2.08, "learning_rate": 0.00010653088711931499, "loss": 3.4822, "step": 6737 }, { "epoch": 2.08, "learning_rate": 0.00010649525806007776, "loss": 3.2995, "step": 6738 }, { "epoch": 2.08, "learning_rate": 0.00010645962900084051, "loss": 3.0478, "step": 6739 }, { "epoch": 2.08, "learning_rate": 0.0001064239999416033, "loss": 3.131, "step": 6740 }, { "epoch": 2.08, "learning_rate": 0.00010638837088236606, "loss": 3.1576, "step": 6741 }, { "epoch": 2.08, "learning_rate": 0.00010635274182312885, "loss": 2.9552, "step": 6742 }, { "epoch": 2.08, "learning_rate": 0.0001063171127638916, "loss": 2.9475, "step": 6743 }, { "epoch": 2.08, "learning_rate": 0.00010628148370465437, "loss": 3.049, "step": 6744 }, { "epoch": 2.08, "learning_rate": 0.00010624585464541715, "loss": 3.119, "step": 6745 }, { "epoch": 2.08, "learning_rate": 0.00010621022558617992, "loss": 3.0643, "step": 6746 }, { "epoch": 2.08, "learning_rate": 0.00010617459652694269, "loss": 3.1145, "step": 6747 }, { "epoch": 2.08, "learning_rate": 0.00010613896746770546, "loss": 3.0809, "step": 6748 }, { "epoch": 2.08, "learning_rate": 0.00010610333840846824, "loss": 3.2176, "step": 6749 }, { "epoch": 2.08, "learning_rate": 0.000106067709349231, "loss": 2.9402, "step": 6750 }, { "epoch": 2.08, "learning_rate": 0.00010603208028999376, "loss": 2.877, "step": 6751 }, { "epoch": 2.08, "learning_rate": 0.00010599645123075654, "loss": 2.6399, "step": 6752 }, { "epoch": 2.08, "learning_rate": 0.00010596082217151931, "loss": 2.6628, "step": 6753 }, { "epoch": 2.08, "learning_rate": 0.00010592519311228209, "loss": 2.9589, "step": 6754 }, { "epoch": 2.08, "learning_rate": 0.00010588956405304486, "loss": 2.4684, "step": 6755 }, { "epoch": 2.09, "learning_rate": 0.00010585393499380762, "loss": 2.5053, "step": 6756 }, { "epoch": 2.09, "learning_rate": 0.0001058183059345704, "loss": 2.7898, "step": 6757 }, { "epoch": 2.09, "learning_rate": 0.00010578267687533317, "loss": 2.6135, "step": 6758 }, { "epoch": 2.09, "learning_rate": 0.00010574704781609595, "loss": 2.3924, "step": 6759 }, { "epoch": 2.09, "learning_rate": 0.0001057114187568587, "loss": 2.5524, "step": 6760 }, { "epoch": 2.09, "learning_rate": 0.00010567578969762148, "loss": 2.3589, "step": 6761 }, { "epoch": 2.09, "learning_rate": 0.00010564016063838425, "loss": 2.5457, "step": 6762 }, { "epoch": 2.09, "learning_rate": 0.00010560453157914703, "loss": 2.3449, "step": 6763 }, { "epoch": 2.09, "learning_rate": 0.00010556890251990979, "loss": 2.2094, "step": 6764 }, { "epoch": 2.09, "learning_rate": 0.00010553327346067256, "loss": 2.3364, "step": 6765 }, { "epoch": 2.09, "learning_rate": 0.00010549764440143534, "loss": 2.1815, "step": 6766 }, { "epoch": 2.09, "learning_rate": 0.00010546201534219811, "loss": 2.1512, "step": 6767 }, { "epoch": 2.09, "learning_rate": 0.00010542638628296086, "loss": 2.0824, "step": 6768 }, { "epoch": 2.09, "learning_rate": 0.00010539075722372364, "loss": 1.8254, "step": 6769 }, { "epoch": 2.09, "learning_rate": 0.00010535512816448643, "loss": 1.986, "step": 6770 }, { "epoch": 2.09, "learning_rate": 0.0001053194991052492, "loss": 1.833, "step": 6771 }, { "epoch": 2.09, "learning_rate": 0.00010528387004601198, "loss": 1.8148, "step": 6772 }, { "epoch": 2.09, "learning_rate": 0.00010524824098677473, "loss": 1.813, "step": 6773 }, { "epoch": 2.09, "learning_rate": 0.0001052126119275375, "loss": 1.7249, "step": 6774 }, { "epoch": 2.09, "learning_rate": 0.00010517698286830028, "loss": 1.8822, "step": 6775 }, { "epoch": 2.09, "learning_rate": 0.00010514135380906305, "loss": 1.7373, "step": 6776 }, { "epoch": 2.09, "learning_rate": 0.0001051057247498258, "loss": 1.5375, "step": 6777 }, { "epoch": 2.09, "learning_rate": 0.00010507009569058859, "loss": 1.5699, "step": 6778 }, { "epoch": 2.09, "learning_rate": 0.00010503446663135135, "loss": 1.5355, "step": 6779 }, { "epoch": 2.09, "learning_rate": 0.00010499883757211414, "loss": 1.601, "step": 6780 }, { "epoch": 2.09, "learning_rate": 0.00010496320851287689, "loss": 4.027, "step": 6781 }, { "epoch": 2.09, "learning_rate": 0.00010492757945363967, "loss": 3.7195, "step": 6782 }, { "epoch": 2.09, "learning_rate": 0.00010489195039440244, "loss": 3.5488, "step": 6783 }, { "epoch": 2.09, "learning_rate": 0.00010485632133516522, "loss": 3.7329, "step": 6784 }, { "epoch": 2.09, "learning_rate": 0.00010482069227592799, "loss": 3.4978, "step": 6785 }, { "epoch": 2.09, "learning_rate": 0.00010478506321669075, "loss": 3.3975, "step": 6786 }, { "epoch": 2.09, "learning_rate": 0.00010474943415745353, "loss": 3.3864, "step": 6787 }, { "epoch": 2.1, "learning_rate": 0.0001047138050982163, "loss": 3.2962, "step": 6788 }, { "epoch": 2.1, "learning_rate": 0.00010467817603897908, "loss": 3.4337, "step": 6789 }, { "epoch": 2.1, "learning_rate": 0.00010464254697974183, "loss": 3.0863, "step": 6790 }, { "epoch": 2.1, "learning_rate": 0.0001046069179205046, "loss": 3.0575, "step": 6791 }, { "epoch": 2.1, "learning_rate": 0.00010457128886126738, "loss": 3.1208, "step": 6792 }, { "epoch": 2.1, "learning_rate": 0.00010453565980203015, "loss": 2.9817, "step": 6793 }, { "epoch": 2.1, "learning_rate": 0.00010450003074279292, "loss": 3.1266, "step": 6794 }, { "epoch": 2.1, "learning_rate": 0.00010446440168355569, "loss": 2.9504, "step": 6795 }, { "epoch": 2.1, "learning_rate": 0.00010442877262431847, "loss": 2.9294, "step": 6796 }, { "epoch": 2.1, "learning_rate": 0.00010439314356508124, "loss": 3.0287, "step": 6797 }, { "epoch": 2.1, "learning_rate": 0.000104357514505844, "loss": 3.0241, "step": 6798 }, { "epoch": 2.1, "learning_rate": 0.00010432188544660678, "loss": 2.9447, "step": 6799 }, { "epoch": 2.1, "learning_rate": 0.00010428625638736954, "loss": 2.8845, "step": 6800 }, { "epoch": 2.1, "learning_rate": 0.00010425062732813233, "loss": 2.8644, "step": 6801 }, { "epoch": 2.1, "learning_rate": 0.0001042149982688951, "loss": 2.526, "step": 6802 }, { "epoch": 2.1, "learning_rate": 0.00010417936920965785, "loss": 2.8357, "step": 6803 }, { "epoch": 2.1, "learning_rate": 0.00010414374015042063, "loss": 2.7722, "step": 6804 }, { "epoch": 2.1, "learning_rate": 0.0001041081110911834, "loss": 2.5179, "step": 6805 }, { "epoch": 2.1, "learning_rate": 0.00010407248203194618, "loss": 2.8025, "step": 6806 }, { "epoch": 2.1, "learning_rate": 0.00010403685297270894, "loss": 2.6135, "step": 6807 }, { "epoch": 2.1, "learning_rate": 0.00010400122391347172, "loss": 2.4911, "step": 6808 }, { "epoch": 2.1, "learning_rate": 0.00010396559485423449, "loss": 2.5968, "step": 6809 }, { "epoch": 2.1, "learning_rate": 0.00010392996579499727, "loss": 2.6267, "step": 6810 }, { "epoch": 2.1, "learning_rate": 0.00010389433673576002, "loss": 2.4699, "step": 6811 }, { "epoch": 2.1, "learning_rate": 0.00010385870767652279, "loss": 2.2511, "step": 6812 }, { "epoch": 2.1, "learning_rate": 0.00010382307861728557, "loss": 2.1994, "step": 6813 }, { "epoch": 2.1, "learning_rate": 0.00010378744955804834, "loss": 2.5834, "step": 6814 }, { "epoch": 2.1, "learning_rate": 0.00010375182049881111, "loss": 2.4132, "step": 6815 }, { "epoch": 2.1, "learning_rate": 0.00010371619143957388, "loss": 2.2885, "step": 6816 }, { "epoch": 2.1, "learning_rate": 0.00010368056238033666, "loss": 2.2937, "step": 6817 }, { "epoch": 2.1, "learning_rate": 0.00010364493332109943, "loss": 2.1916, "step": 6818 }, { "epoch": 2.1, "learning_rate": 0.00010360930426186221, "loss": 2.1889, "step": 6819 }, { "epoch": 2.1, "learning_rate": 0.00010357367520262496, "loss": 2.1748, "step": 6820 }, { "epoch": 2.11, "learning_rate": 0.00010353804614338773, "loss": 2.1198, "step": 6821 }, { "epoch": 2.11, "learning_rate": 0.00010350241708415051, "loss": 1.8255, "step": 6822 }, { "epoch": 2.11, "learning_rate": 0.00010346678802491328, "loss": 1.9655, "step": 6823 }, { "epoch": 2.11, "learning_rate": 0.00010343115896567604, "loss": 1.5755, "step": 6824 }, { "epoch": 2.11, "learning_rate": 0.00010339552990643882, "loss": 1.7794, "step": 6825 }, { "epoch": 2.11, "learning_rate": 0.00010335990084720159, "loss": 1.5916, "step": 6826 }, { "epoch": 2.11, "learning_rate": 0.00010332427178796437, "loss": 1.9121, "step": 6827 }, { "epoch": 2.11, "learning_rate": 0.00010328864272872712, "loss": 1.5924, "step": 6828 }, { "epoch": 2.11, "learning_rate": 0.0001032530136694899, "loss": 1.5807, "step": 6829 }, { "epoch": 2.11, "learning_rate": 0.00010321738461025267, "loss": 1.509, "step": 6830 }, { "epoch": 2.11, "learning_rate": 0.00010318175555101546, "loss": 4.0181, "step": 6831 }, { "epoch": 2.11, "learning_rate": 0.00010314612649177821, "loss": 3.7851, "step": 6832 }, { "epoch": 2.11, "learning_rate": 0.00010311049743254098, "loss": 3.8218, "step": 6833 }, { "epoch": 2.11, "learning_rate": 0.00010307486837330376, "loss": 3.3311, "step": 6834 }, { "epoch": 2.11, "learning_rate": 0.00010303923931406653, "loss": 3.4001, "step": 6835 }, { "epoch": 2.11, "learning_rate": 0.00010300361025482931, "loss": 3.1248, "step": 6836 }, { "epoch": 2.11, "learning_rate": 0.00010296798119559207, "loss": 3.3355, "step": 6837 }, { "epoch": 2.11, "learning_rate": 0.00010293235213635483, "loss": 3.2488, "step": 6838 }, { "epoch": 2.11, "learning_rate": 0.00010289672307711762, "loss": 3.2512, "step": 6839 }, { "epoch": 2.11, "learning_rate": 0.00010286109401788038, "loss": 3.1948, "step": 6840 }, { "epoch": 2.11, "learning_rate": 0.00010282546495864315, "loss": 2.8782, "step": 6841 }, { "epoch": 2.11, "learning_rate": 0.00010278983589940592, "loss": 3.1984, "step": 6842 }, { "epoch": 2.11, "learning_rate": 0.0001027542068401687, "loss": 3.305, "step": 6843 }, { "epoch": 2.11, "learning_rate": 0.00010271857778093147, "loss": 3.1312, "step": 6844 }, { "epoch": 2.11, "learning_rate": 0.00010268294872169423, "loss": 3.2467, "step": 6845 }, { "epoch": 2.11, "learning_rate": 0.00010264731966245701, "loss": 2.9008, "step": 6846 }, { "epoch": 2.11, "learning_rate": 0.00010261169060321978, "loss": 3.0515, "step": 6847 }, { "epoch": 2.11, "learning_rate": 0.00010257606154398256, "loss": 2.7849, "step": 6848 }, { "epoch": 2.11, "learning_rate": 0.00010254043248474531, "loss": 3.0263, "step": 6849 }, { "epoch": 2.11, "learning_rate": 0.00010250480342550808, "loss": 2.7956, "step": 6850 }, { "epoch": 2.11, "learning_rate": 0.00010246917436627086, "loss": 2.8083, "step": 6851 }, { "epoch": 2.11, "learning_rate": 0.00010243354530703363, "loss": 2.7364, "step": 6852 }, { "epoch": 2.12, "learning_rate": 0.00010239791624779641, "loss": 2.6416, "step": 6853 }, { "epoch": 2.12, "learning_rate": 0.00010236228718855917, "loss": 2.7854, "step": 6854 }, { "epoch": 2.12, "learning_rate": 0.00010232665812932195, "loss": 2.5164, "step": 6855 }, { "epoch": 2.12, "learning_rate": 0.00010229102907008472, "loss": 2.9212, "step": 6856 }, { "epoch": 2.12, "learning_rate": 0.0001022554000108475, "loss": 2.5381, "step": 6857 }, { "epoch": 2.12, "learning_rate": 0.00010221977095161026, "loss": 2.591, "step": 6858 }, { "epoch": 2.12, "learning_rate": 0.00010218414189237302, "loss": 2.487, "step": 6859 }, { "epoch": 2.12, "learning_rate": 0.0001021485128331358, "loss": 2.3172, "step": 6860 }, { "epoch": 2.12, "learning_rate": 0.00010211288377389857, "loss": 2.2826, "step": 6861 }, { "epoch": 2.12, "learning_rate": 0.00010207725471466134, "loss": 2.2676, "step": 6862 }, { "epoch": 2.12, "learning_rate": 0.00010204162565542411, "loss": 2.4021, "step": 6863 }, { "epoch": 2.12, "learning_rate": 0.00010200599659618689, "loss": 2.5505, "step": 6864 }, { "epoch": 2.12, "learning_rate": 0.00010197036753694966, "loss": 2.4505, "step": 6865 }, { "epoch": 2.12, "learning_rate": 0.00010193473847771242, "loss": 2.3745, "step": 6866 }, { "epoch": 2.12, "learning_rate": 0.0001018991094184752, "loss": 2.3471, "step": 6867 }, { "epoch": 2.12, "learning_rate": 0.00010186348035923797, "loss": 2.234, "step": 6868 }, { "epoch": 2.12, "learning_rate": 0.00010182785130000075, "loss": 2.0486, "step": 6869 }, { "epoch": 2.12, "learning_rate": 0.00010179222224076352, "loss": 1.8601, "step": 6870 }, { "epoch": 2.12, "learning_rate": 0.00010175659318152627, "loss": 1.9034, "step": 6871 }, { "epoch": 2.12, "learning_rate": 0.00010172096412228905, "loss": 2.0267, "step": 6872 }, { "epoch": 2.12, "learning_rate": 0.00010168533506305182, "loss": 1.793, "step": 6873 }, { "epoch": 2.12, "learning_rate": 0.0001016497060038146, "loss": 1.5908, "step": 6874 }, { "epoch": 2.12, "learning_rate": 0.00010161407694457736, "loss": 1.6571, "step": 6875 }, { "epoch": 2.12, "learning_rate": 0.00010157844788534014, "loss": 1.7025, "step": 6876 }, { "epoch": 2.12, "learning_rate": 0.00010154281882610291, "loss": 1.6723, "step": 6877 }, { "epoch": 2.12, "learning_rate": 0.00010150718976686569, "loss": 1.4563, "step": 6878 }, { "epoch": 2.12, "learning_rate": 0.00010147156070762844, "loss": 1.4884, "step": 6879 }, { "epoch": 2.12, "learning_rate": 0.00010143593164839121, "loss": 1.4144, "step": 6880 }, { "epoch": 2.12, "learning_rate": 0.000101400302589154, "loss": 4.1636, "step": 6881 }, { "epoch": 2.12, "learning_rate": 0.00010136467352991676, "loss": 3.9834, "step": 6882 }, { "epoch": 2.12, "learning_rate": 0.00010132904447067954, "loss": 3.6091, "step": 6883 }, { "epoch": 2.12, "learning_rate": 0.0001012934154114423, "loss": 3.3629, "step": 6884 }, { "epoch": 2.12, "learning_rate": 0.00010125778635220507, "loss": 3.2727, "step": 6885 }, { "epoch": 2.13, "learning_rate": 0.00010122215729296785, "loss": 3.1479, "step": 6886 }, { "epoch": 2.13, "learning_rate": 0.00010118652823373062, "loss": 3.3427, "step": 6887 }, { "epoch": 2.13, "learning_rate": 0.00010115089917449339, "loss": 3.1874, "step": 6888 }, { "epoch": 2.13, "learning_rate": 0.00010111527011525615, "loss": 3.0249, "step": 6889 }, { "epoch": 2.13, "learning_rate": 0.00010107964105601894, "loss": 3.2617, "step": 6890 }, { "epoch": 2.13, "learning_rate": 0.0001010440119967817, "loss": 3.2121, "step": 6891 }, { "epoch": 2.13, "learning_rate": 0.00010100838293754446, "loss": 3.2809, "step": 6892 }, { "epoch": 2.13, "learning_rate": 0.00010097275387830724, "loss": 3.024, "step": 6893 }, { "epoch": 2.13, "learning_rate": 0.00010093712481907001, "loss": 2.9289, "step": 6894 }, { "epoch": 2.13, "learning_rate": 0.00010090149575983279, "loss": 2.8105, "step": 6895 }, { "epoch": 2.13, "learning_rate": 0.00010086586670059555, "loss": 2.839, "step": 6896 }, { "epoch": 2.13, "learning_rate": 0.00010083023764135831, "loss": 2.9934, "step": 6897 }, { "epoch": 2.13, "learning_rate": 0.0001007946085821211, "loss": 2.8787, "step": 6898 }, { "epoch": 2.13, "learning_rate": 0.00010075897952288386, "loss": 2.992, "step": 6899 }, { "epoch": 2.13, "learning_rate": 0.00010072335046364665, "loss": 2.9649, "step": 6900 }, { "epoch": 2.13, "learning_rate": 0.0001006877214044094, "loss": 2.6845, "step": 6901 }, { "epoch": 2.13, "learning_rate": 0.00010065209234517218, "loss": 2.946, "step": 6902 }, { "epoch": 2.13, "learning_rate": 0.00010061646328593495, "loss": 2.7703, "step": 6903 }, { "epoch": 2.13, "learning_rate": 0.00010058083422669773, "loss": 2.7239, "step": 6904 }, { "epoch": 2.13, "learning_rate": 0.00010054520516746049, "loss": 2.6666, "step": 6905 }, { "epoch": 2.13, "learning_rate": 0.00010050957610822326, "loss": 2.5851, "step": 6906 }, { "epoch": 2.13, "learning_rate": 0.00010047394704898604, "loss": 2.704, "step": 6907 }, { "epoch": 2.13, "learning_rate": 0.00010043831798974881, "loss": 2.7292, "step": 6908 }, { "epoch": 2.13, "learning_rate": 0.00010040268893051158, "loss": 2.4195, "step": 6909 }, { "epoch": 2.13, "learning_rate": 0.00010036705987127434, "loss": 2.5844, "step": 6910 }, { "epoch": 2.13, "learning_rate": 0.00010033143081203713, "loss": 2.3988, "step": 6911 }, { "epoch": 2.13, "learning_rate": 0.0001002958017527999, "loss": 2.4287, "step": 6912 }, { "epoch": 2.13, "learning_rate": 0.00010026017269356265, "loss": 2.4358, "step": 6913 }, { "epoch": 2.13, "learning_rate": 0.00010022454363432543, "loss": 2.1797, "step": 6914 }, { "epoch": 2.13, "learning_rate": 0.0001001889145750882, "loss": 2.2145, "step": 6915 }, { "epoch": 2.13, "learning_rate": 0.00010015328551585098, "loss": 2.1095, "step": 6916 }, { "epoch": 2.13, "learning_rate": 0.00010011765645661375, "loss": 2.226, "step": 6917 }, { "epoch": 2.14, "learning_rate": 0.0001000820273973765, "loss": 2.0479, "step": 6918 }, { "epoch": 2.14, "learning_rate": 0.00010004639833813929, "loss": 2.0911, "step": 6919 }, { "epoch": 2.14, "learning_rate": 0.00010001076927890205, "loss": 2.132, "step": 6920 }, { "epoch": 2.14, "learning_rate": 9.997514021966484e-05, "loss": 1.938, "step": 6921 }, { "epoch": 2.14, "learning_rate": 9.993951116042759e-05, "loss": 1.904, "step": 6922 }, { "epoch": 2.14, "learning_rate": 9.990388210119037e-05, "loss": 1.7755, "step": 6923 }, { "epoch": 2.14, "learning_rate": 9.986825304195314e-05, "loss": 1.78, "step": 6924 }, { "epoch": 2.14, "learning_rate": 9.983262398271592e-05, "loss": 1.7125, "step": 6925 }, { "epoch": 2.14, "learning_rate": 9.979699492347868e-05, "loss": 1.6069, "step": 6926 }, { "epoch": 2.14, "learning_rate": 9.976136586424145e-05, "loss": 1.571, "step": 6927 }, { "epoch": 2.14, "learning_rate": 9.972573680500423e-05, "loss": 1.469, "step": 6928 }, { "epoch": 2.14, "learning_rate": 9.9690107745767e-05, "loss": 1.5851, "step": 6929 }, { "epoch": 2.14, "learning_rate": 9.965447868652975e-05, "loss": 1.4929, "step": 6930 }, { "epoch": 2.14, "learning_rate": 9.961884962729253e-05, "loss": 3.8439, "step": 6931 }, { "epoch": 2.14, "learning_rate": 9.95832205680553e-05, "loss": 3.9943, "step": 6932 }, { "epoch": 2.14, "learning_rate": 9.954759150881808e-05, "loss": 3.6159, "step": 6933 }, { "epoch": 2.14, "learning_rate": 9.951196244958085e-05, "loss": 3.079, "step": 6934 }, { "epoch": 2.14, "learning_rate": 9.947633339034362e-05, "loss": 3.3909, "step": 6935 }, { "epoch": 2.14, "learning_rate": 9.944070433110639e-05, "loss": 3.1971, "step": 6936 }, { "epoch": 2.14, "learning_rate": 9.940507527186917e-05, "loss": 2.9335, "step": 6937 }, { "epoch": 2.14, "learning_rate": 9.936944621263194e-05, "loss": 3.2814, "step": 6938 }, { "epoch": 2.14, "learning_rate": 9.933381715339469e-05, "loss": 3.1233, "step": 6939 }, { "epoch": 2.14, "learning_rate": 9.929818809415747e-05, "loss": 3.0175, "step": 6940 }, { "epoch": 2.14, "learning_rate": 9.926255903492024e-05, "loss": 3.1412, "step": 6941 }, { "epoch": 2.14, "learning_rate": 9.922692997568302e-05, "loss": 3.1114, "step": 6942 }, { "epoch": 2.14, "learning_rate": 9.919130091644578e-05, "loss": 3.1215, "step": 6943 }, { "epoch": 2.14, "learning_rate": 9.915567185720855e-05, "loss": 2.8719, "step": 6944 }, { "epoch": 2.14, "learning_rate": 9.912004279797133e-05, "loss": 2.8315, "step": 6945 }, { "epoch": 2.14, "learning_rate": 9.908441373873411e-05, "loss": 2.8352, "step": 6946 }, { "epoch": 2.14, "learning_rate": 9.904878467949687e-05, "loss": 2.8364, "step": 6947 }, { "epoch": 2.14, "learning_rate": 9.901315562025963e-05, "loss": 2.7274, "step": 6948 }, { "epoch": 2.14, "learning_rate": 9.897752656102242e-05, "loss": 3.0258, "step": 6949 }, { "epoch": 2.15, "learning_rate": 9.894189750178518e-05, "loss": 2.6869, "step": 6950 }, { "epoch": 2.15, "learning_rate": 9.890626844254797e-05, "loss": 2.7359, "step": 6951 }, { "epoch": 2.15, "learning_rate": 9.887063938331072e-05, "loss": 2.6575, "step": 6952 }, { "epoch": 2.15, "learning_rate": 9.883501032407349e-05, "loss": 2.5025, "step": 6953 }, { "epoch": 2.15, "learning_rate": 9.879938126483627e-05, "loss": 2.8654, "step": 6954 }, { "epoch": 2.15, "learning_rate": 9.876375220559904e-05, "loss": 2.8307, "step": 6955 }, { "epoch": 2.15, "learning_rate": 9.872812314636181e-05, "loss": 2.5592, "step": 6956 }, { "epoch": 2.15, "learning_rate": 9.869249408712458e-05, "loss": 2.4106, "step": 6957 }, { "epoch": 2.15, "learning_rate": 9.865686502788736e-05, "loss": 2.4343, "step": 6958 }, { "epoch": 2.15, "learning_rate": 9.862123596865013e-05, "loss": 2.782, "step": 6959 }, { "epoch": 2.15, "learning_rate": 9.858560690941288e-05, "loss": 2.5913, "step": 6960 }, { "epoch": 2.15, "learning_rate": 9.854997785017566e-05, "loss": 2.2276, "step": 6961 }, { "epoch": 2.15, "learning_rate": 9.851434879093843e-05, "loss": 2.5371, "step": 6962 }, { "epoch": 2.15, "learning_rate": 9.847871973170121e-05, "loss": 2.3627, "step": 6963 }, { "epoch": 2.15, "learning_rate": 9.844309067246397e-05, "loss": 2.1625, "step": 6964 }, { "epoch": 2.15, "learning_rate": 9.840746161322674e-05, "loss": 2.04, "step": 6965 }, { "epoch": 2.15, "learning_rate": 9.837183255398952e-05, "loss": 1.9796, "step": 6966 }, { "epoch": 2.15, "learning_rate": 9.833620349475229e-05, "loss": 2.169, "step": 6967 }, { "epoch": 2.15, "learning_rate": 9.830057443551507e-05, "loss": 2.2094, "step": 6968 }, { "epoch": 2.15, "learning_rate": 9.826494537627782e-05, "loss": 2.1764, "step": 6969 }, { "epoch": 2.15, "learning_rate": 9.82293163170406e-05, "loss": 2.2942, "step": 6970 }, { "epoch": 2.15, "learning_rate": 9.819368725780337e-05, "loss": 2.171, "step": 6971 }, { "epoch": 2.15, "learning_rate": 9.815805819856616e-05, "loss": 2.0024, "step": 6972 }, { "epoch": 2.15, "learning_rate": 9.812242913932891e-05, "loss": 1.7423, "step": 6973 }, { "epoch": 2.15, "learning_rate": 9.808680008009168e-05, "loss": 1.5334, "step": 6974 }, { "epoch": 2.15, "learning_rate": 9.805117102085446e-05, "loss": 1.7629, "step": 6975 }, { "epoch": 2.15, "learning_rate": 9.801554196161723e-05, "loss": 1.5446, "step": 6976 }, { "epoch": 2.15, "learning_rate": 9.797991290237998e-05, "loss": 1.855, "step": 6977 }, { "epoch": 2.15, "learning_rate": 9.794428384314277e-05, "loss": 1.4562, "step": 6978 }, { "epoch": 2.15, "learning_rate": 9.790865478390553e-05, "loss": 1.4098, "step": 6979 }, { "epoch": 2.15, "learning_rate": 9.787302572466832e-05, "loss": 1.4501, "step": 6980 }, { "epoch": 2.15, "learning_rate": 9.783739666543108e-05, "loss": 3.8114, "step": 6981 }, { "epoch": 2.15, "learning_rate": 9.780176760619385e-05, "loss": 3.8206, "step": 6982 }, { "epoch": 2.16, "learning_rate": 9.776613854695662e-05, "loss": 3.4749, "step": 6983 }, { "epoch": 2.16, "learning_rate": 9.77305094877194e-05, "loss": 3.2544, "step": 6984 }, { "epoch": 2.16, "learning_rate": 9.769488042848217e-05, "loss": 3.2258, "step": 6985 }, { "epoch": 2.16, "learning_rate": 9.765925136924493e-05, "loss": 3.2236, "step": 6986 }, { "epoch": 2.16, "learning_rate": 9.762362231000771e-05, "loss": 3.2548, "step": 6987 }, { "epoch": 2.16, "learning_rate": 9.758799325077048e-05, "loss": 3.1096, "step": 6988 }, { "epoch": 2.16, "learning_rate": 9.755236419153326e-05, "loss": 2.954, "step": 6989 }, { "epoch": 2.16, "learning_rate": 9.751673513229601e-05, "loss": 3.1953, "step": 6990 }, { "epoch": 2.16, "learning_rate": 9.74811060730588e-05, "loss": 3.1113, "step": 6991 }, { "epoch": 2.16, "learning_rate": 9.744547701382156e-05, "loss": 3.0404, "step": 6992 }, { "epoch": 2.16, "learning_rate": 9.740984795458434e-05, "loss": 3.0546, "step": 6993 }, { "epoch": 2.16, "learning_rate": 9.73742188953471e-05, "loss": 3.1705, "step": 6994 }, { "epoch": 2.16, "learning_rate": 9.733858983610987e-05, "loss": 2.8259, "step": 6995 }, { "epoch": 2.16, "learning_rate": 9.730296077687265e-05, "loss": 2.8109, "step": 6996 }, { "epoch": 2.16, "learning_rate": 9.726733171763542e-05, "loss": 3.0071, "step": 6997 }, { "epoch": 2.16, "learning_rate": 9.72317026583982e-05, "loss": 2.7021, "step": 6998 }, { "epoch": 2.16, "learning_rate": 9.719607359916095e-05, "loss": 2.801, "step": 6999 }, { "epoch": 2.16, "learning_rate": 9.716044453992372e-05, "loss": 2.6245, "step": 7000 }, { "epoch": 2.16, "eval_bleu": 2.8620835163254036e-20, "eval_loss": 4.03611946105957, "eval_runtime": 2538.6544, "eval_samples_per_second": 5.814, "eval_steps_per_second": 0.727, "step": 7000 }, { "epoch": 2.16, "learning_rate": 9.71248154806865e-05, "loss": 2.574, "step": 7001 }, { "epoch": 2.16, "learning_rate": 9.708918642144927e-05, "loss": 2.9205, "step": 7002 }, { "epoch": 2.16, "learning_rate": 9.705355736221204e-05, "loss": 2.6834, "step": 7003 }, { "epoch": 2.16, "learning_rate": 9.701792830297481e-05, "loss": 2.5536, "step": 7004 }, { "epoch": 2.16, "learning_rate": 9.698229924373759e-05, "loss": 2.4832, "step": 7005 }, { "epoch": 2.16, "learning_rate": 9.694667018450036e-05, "loss": 2.5602, "step": 7006 }, { "epoch": 2.16, "learning_rate": 9.691104112526311e-05, "loss": 2.6101, "step": 7007 }, { "epoch": 2.16, "learning_rate": 9.68754120660259e-05, "loss": 2.4503, "step": 7008 }, { "epoch": 2.16, "learning_rate": 9.683978300678867e-05, "loss": 2.3585, "step": 7009 }, { "epoch": 2.16, "learning_rate": 9.680415394755145e-05, "loss": 2.4902, "step": 7010 }, { "epoch": 2.16, "learning_rate": 9.67685248883142e-05, "loss": 2.2664, "step": 7011 }, { "epoch": 2.16, "learning_rate": 9.673289582907697e-05, "loss": 2.5169, "step": 7012 }, { "epoch": 2.16, "learning_rate": 9.669726676983975e-05, "loss": 2.1318, "step": 7013 }, { "epoch": 2.16, "learning_rate": 9.666163771060252e-05, "loss": 2.2824, "step": 7014 }, { "epoch": 2.17, "learning_rate": 9.66260086513653e-05, "loss": 2.3317, "step": 7015 }, { "epoch": 2.17, "learning_rate": 9.659037959212806e-05, "loss": 2.2483, "step": 7016 }, { "epoch": 2.17, "learning_rate": 9.655475053289084e-05, "loss": 1.7396, "step": 7017 }, { "epoch": 2.17, "learning_rate": 9.651912147365361e-05, "loss": 2.0274, "step": 7018 }, { "epoch": 2.17, "learning_rate": 9.648349241441639e-05, "loss": 1.9994, "step": 7019 }, { "epoch": 2.17, "learning_rate": 9.644786335517914e-05, "loss": 1.9376, "step": 7020 }, { "epoch": 2.17, "learning_rate": 9.641223429594191e-05, "loss": 1.8724, "step": 7021 }, { "epoch": 2.17, "learning_rate": 9.63766052367047e-05, "loss": 1.885, "step": 7022 }, { "epoch": 2.17, "learning_rate": 9.634097617746746e-05, "loss": 1.7585, "step": 7023 }, { "epoch": 2.17, "learning_rate": 9.630534711823022e-05, "loss": 1.6454, "step": 7024 }, { "epoch": 2.17, "learning_rate": 9.6269718058993e-05, "loss": 1.5186, "step": 7025 }, { "epoch": 2.17, "learning_rate": 9.623408899975577e-05, "loss": 1.6844, "step": 7026 }, { "epoch": 2.17, "learning_rate": 9.619845994051855e-05, "loss": 1.6363, "step": 7027 }, { "epoch": 2.17, "learning_rate": 9.61628308812813e-05, "loss": 1.4135, "step": 7028 }, { "epoch": 2.17, "learning_rate": 9.612720182204409e-05, "loss": 1.3912, "step": 7029 }, { "epoch": 2.17, "learning_rate": 9.609157276280685e-05, "loss": 1.5031, "step": 7030 }, { "epoch": 2.17, "learning_rate": 9.605594370356964e-05, "loss": 4.1439, "step": 7031 }, { "epoch": 2.17, "learning_rate": 9.60203146443324e-05, "loss": 4.0924, "step": 7032 }, { "epoch": 2.17, "learning_rate": 9.598468558509516e-05, "loss": 3.574, "step": 7033 }, { "epoch": 2.17, "learning_rate": 9.594905652585794e-05, "loss": 3.42, "step": 7034 }, { "epoch": 2.17, "learning_rate": 9.591342746662071e-05, "loss": 3.3048, "step": 7035 }, { "epoch": 2.17, "learning_rate": 9.587779840738349e-05, "loss": 3.0996, "step": 7036 }, { "epoch": 2.17, "learning_rate": 9.584216934814625e-05, "loss": 3.3798, "step": 7037 }, { "epoch": 2.17, "learning_rate": 9.580654028890903e-05, "loss": 3.1394, "step": 7038 }, { "epoch": 2.17, "learning_rate": 9.57709112296718e-05, "loss": 3.0501, "step": 7039 }, { "epoch": 2.17, "learning_rate": 9.573528217043458e-05, "loss": 2.9482, "step": 7040 }, { "epoch": 2.17, "learning_rate": 9.569965311119733e-05, "loss": 3.1753, "step": 7041 }, { "epoch": 2.17, "learning_rate": 9.56640240519601e-05, "loss": 3.0658, "step": 7042 }, { "epoch": 2.17, "learning_rate": 9.562839499272288e-05, "loss": 3.309, "step": 7043 }, { "epoch": 2.17, "learning_rate": 9.559276593348565e-05, "loss": 2.9948, "step": 7044 }, { "epoch": 2.17, "learning_rate": 9.55571368742484e-05, "loss": 2.9465, "step": 7045 }, { "epoch": 2.17, "learning_rate": 9.552150781501119e-05, "loss": 3.063, "step": 7046 }, { "epoch": 2.17, "learning_rate": 9.548587875577396e-05, "loss": 2.5341, "step": 7047 }, { "epoch": 2.18, "learning_rate": 9.545024969653674e-05, "loss": 2.8882, "step": 7048 }, { "epoch": 2.18, "learning_rate": 9.54146206372995e-05, "loss": 3.1003, "step": 7049 }, { "epoch": 2.18, "learning_rate": 9.537899157806227e-05, "loss": 2.7254, "step": 7050 }, { "epoch": 2.18, "learning_rate": 9.534336251882504e-05, "loss": 2.6725, "step": 7051 }, { "epoch": 2.18, "learning_rate": 9.530773345958782e-05, "loss": 2.5556, "step": 7052 }, { "epoch": 2.18, "learning_rate": 9.527210440035059e-05, "loss": 2.9176, "step": 7053 }, { "epoch": 2.18, "learning_rate": 9.523647534111335e-05, "loss": 2.6866, "step": 7054 }, { "epoch": 2.18, "learning_rate": 9.520084628187613e-05, "loss": 2.7528, "step": 7055 }, { "epoch": 2.18, "learning_rate": 9.51652172226389e-05, "loss": 2.7326, "step": 7056 }, { "epoch": 2.18, "learning_rate": 9.512958816340168e-05, "loss": 2.8559, "step": 7057 }, { "epoch": 2.18, "learning_rate": 9.509395910416443e-05, "loss": 2.437, "step": 7058 }, { "epoch": 2.18, "learning_rate": 9.50583300449272e-05, "loss": 2.3919, "step": 7059 }, { "epoch": 2.18, "learning_rate": 9.502270098568999e-05, "loss": 2.5903, "step": 7060 }, { "epoch": 2.18, "learning_rate": 9.498707192645275e-05, "loss": 2.2275, "step": 7061 }, { "epoch": 2.18, "learning_rate": 9.495144286721552e-05, "loss": 2.4596, "step": 7062 }, { "epoch": 2.18, "learning_rate": 9.491581380797829e-05, "loss": 2.2534, "step": 7063 }, { "epoch": 2.18, "learning_rate": 9.488018474874107e-05, "loss": 2.1276, "step": 7064 }, { "epoch": 2.18, "learning_rate": 9.484455568950384e-05, "loss": 2.2766, "step": 7065 }, { "epoch": 2.18, "learning_rate": 9.480892663026662e-05, "loss": 1.99, "step": 7066 }, { "epoch": 2.18, "learning_rate": 9.477329757102938e-05, "loss": 2.0476, "step": 7067 }, { "epoch": 2.18, "learning_rate": 9.473766851179215e-05, "loss": 1.9067, "step": 7068 }, { "epoch": 2.18, "learning_rate": 9.470203945255493e-05, "loss": 2.0632, "step": 7069 }, { "epoch": 2.18, "learning_rate": 9.46664103933177e-05, "loss": 2.0635, "step": 7070 }, { "epoch": 2.18, "learning_rate": 9.463078133408045e-05, "loss": 1.9559, "step": 7071 }, { "epoch": 2.18, "learning_rate": 9.459515227484323e-05, "loss": 1.9201, "step": 7072 }, { "epoch": 2.18, "learning_rate": 9.4559523215606e-05, "loss": 1.8794, "step": 7073 }, { "epoch": 2.18, "learning_rate": 9.452389415636878e-05, "loss": 1.8192, "step": 7074 }, { "epoch": 2.18, "learning_rate": 9.448826509713154e-05, "loss": 1.5874, "step": 7075 }, { "epoch": 2.18, "learning_rate": 9.445263603789432e-05, "loss": 1.7251, "step": 7076 }, { "epoch": 2.18, "learning_rate": 9.441700697865709e-05, "loss": 1.6698, "step": 7077 }, { "epoch": 2.18, "learning_rate": 9.438137791941987e-05, "loss": 1.4697, "step": 7078 }, { "epoch": 2.18, "learning_rate": 9.434574886018264e-05, "loss": 1.5252, "step": 7079 }, { "epoch": 2.19, "learning_rate": 9.431011980094539e-05, "loss": 1.3799, "step": 7080 }, { "epoch": 2.19, "learning_rate": 9.427449074170817e-05, "loss": 4.0793, "step": 7081 }, { "epoch": 2.19, "learning_rate": 9.423886168247094e-05, "loss": 3.9396, "step": 7082 }, { "epoch": 2.19, "learning_rate": 9.420323262323372e-05, "loss": 3.4548, "step": 7083 }, { "epoch": 2.19, "learning_rate": 9.416760356399648e-05, "loss": 3.5485, "step": 7084 }, { "epoch": 2.19, "learning_rate": 9.413197450475926e-05, "loss": 3.3483, "step": 7085 }, { "epoch": 2.19, "learning_rate": 9.409634544552203e-05, "loss": 3.2709, "step": 7086 }, { "epoch": 2.19, "learning_rate": 9.406071638628481e-05, "loss": 3.2314, "step": 7087 }, { "epoch": 2.19, "learning_rate": 9.402508732704757e-05, "loss": 3.0133, "step": 7088 }, { "epoch": 2.19, "learning_rate": 9.398945826781033e-05, "loss": 3.2818, "step": 7089 }, { "epoch": 2.19, "learning_rate": 9.395382920857312e-05, "loss": 3.3336, "step": 7090 }, { "epoch": 2.19, "learning_rate": 9.391820014933588e-05, "loss": 3.2512, "step": 7091 }, { "epoch": 2.19, "learning_rate": 9.388257109009864e-05, "loss": 2.8849, "step": 7092 }, { "epoch": 2.19, "learning_rate": 9.384694203086142e-05, "loss": 3.2229, "step": 7093 }, { "epoch": 2.19, "learning_rate": 9.381131297162419e-05, "loss": 2.6233, "step": 7094 }, { "epoch": 2.19, "learning_rate": 9.377568391238697e-05, "loss": 2.509, "step": 7095 }, { "epoch": 2.19, "learning_rate": 9.374005485314974e-05, "loss": 2.8394, "step": 7096 }, { "epoch": 2.19, "learning_rate": 9.370442579391251e-05, "loss": 2.847, "step": 7097 }, { "epoch": 2.19, "learning_rate": 9.366879673467528e-05, "loss": 2.8392, "step": 7098 }, { "epoch": 2.19, "learning_rate": 9.363316767543806e-05, "loss": 2.665, "step": 7099 }, { "epoch": 2.19, "learning_rate": 9.359753861620083e-05, "loss": 2.9042, "step": 7100 }, { "epoch": 2.19, "learning_rate": 9.356190955696358e-05, "loss": 2.772, "step": 7101 }, { "epoch": 2.19, "learning_rate": 9.352628049772636e-05, "loss": 2.9732, "step": 7102 }, { "epoch": 2.19, "learning_rate": 9.349065143848913e-05, "loss": 2.6224, "step": 7103 }, { "epoch": 2.19, "learning_rate": 9.345502237925191e-05, "loss": 2.9007, "step": 7104 }, { "epoch": 2.19, "learning_rate": 9.341939332001467e-05, "loss": 2.5735, "step": 7105 }, { "epoch": 2.19, "learning_rate": 9.338376426077744e-05, "loss": 2.5649, "step": 7106 }, { "epoch": 2.19, "learning_rate": 9.334813520154022e-05, "loss": 2.4071, "step": 7107 }, { "epoch": 2.19, "learning_rate": 9.331250614230299e-05, "loss": 2.4105, "step": 7108 }, { "epoch": 2.19, "learning_rate": 9.327687708306575e-05, "loss": 2.3269, "step": 7109 }, { "epoch": 2.19, "learning_rate": 9.324124802382852e-05, "loss": 2.0712, "step": 7110 }, { "epoch": 2.19, "learning_rate": 9.32056189645913e-05, "loss": 2.2462, "step": 7111 }, { "epoch": 2.2, "learning_rate": 9.316998990535407e-05, "loss": 2.1748, "step": 7112 }, { "epoch": 2.2, "learning_rate": 9.313436084611686e-05, "loss": 2.5728, "step": 7113 }, { "epoch": 2.2, "learning_rate": 9.309873178687961e-05, "loss": 2.3369, "step": 7114 }, { "epoch": 2.2, "learning_rate": 9.306310272764238e-05, "loss": 2.046, "step": 7115 }, { "epoch": 2.2, "learning_rate": 9.302747366840516e-05, "loss": 2.3248, "step": 7116 }, { "epoch": 2.2, "learning_rate": 9.299184460916793e-05, "loss": 2.0643, "step": 7117 }, { "epoch": 2.2, "learning_rate": 9.295621554993068e-05, "loss": 2.1423, "step": 7118 }, { "epoch": 2.2, "learning_rate": 9.292058649069347e-05, "loss": 2.1726, "step": 7119 }, { "epoch": 2.2, "learning_rate": 9.288495743145625e-05, "loss": 1.9508, "step": 7120 }, { "epoch": 2.2, "learning_rate": 9.284932837221902e-05, "loss": 2.0883, "step": 7121 }, { "epoch": 2.2, "learning_rate": 9.281369931298177e-05, "loss": 1.8155, "step": 7122 }, { "epoch": 2.2, "learning_rate": 9.277807025374455e-05, "loss": 1.9422, "step": 7123 }, { "epoch": 2.2, "learning_rate": 9.274244119450732e-05, "loss": 1.9246, "step": 7124 }, { "epoch": 2.2, "learning_rate": 9.27068121352701e-05, "loss": 1.8587, "step": 7125 }, { "epoch": 2.2, "learning_rate": 9.267118307603286e-05, "loss": 1.6912, "step": 7126 }, { "epoch": 2.2, "learning_rate": 9.263555401679563e-05, "loss": 1.5325, "step": 7127 }, { "epoch": 2.2, "learning_rate": 9.259992495755841e-05, "loss": 1.5973, "step": 7128 }, { "epoch": 2.2, "learning_rate": 9.256429589832118e-05, "loss": 1.3618, "step": 7129 }, { "epoch": 2.2, "learning_rate": 9.252866683908396e-05, "loss": 1.4086, "step": 7130 }, { "epoch": 2.2, "learning_rate": 9.249303777984671e-05, "loss": 4.0022, "step": 7131 }, { "epoch": 2.2, "learning_rate": 9.24574087206095e-05, "loss": 3.7553, "step": 7132 }, { "epoch": 2.2, "learning_rate": 9.242177966137226e-05, "loss": 3.1806, "step": 7133 }, { "epoch": 2.2, "learning_rate": 9.238615060213504e-05, "loss": 3.5333, "step": 7134 }, { "epoch": 2.2, "learning_rate": 9.23505215428978e-05, "loss": 3.2706, "step": 7135 }, { "epoch": 2.2, "learning_rate": 9.231489248366057e-05, "loss": 3.2139, "step": 7136 }, { "epoch": 2.2, "learning_rate": 9.227926342442335e-05, "loss": 3.103, "step": 7137 }, { "epoch": 2.2, "learning_rate": 9.224363436518612e-05, "loss": 3.3027, "step": 7138 }, { "epoch": 2.2, "learning_rate": 9.220800530594887e-05, "loss": 3.1915, "step": 7139 }, { "epoch": 2.2, "learning_rate": 9.217237624671165e-05, "loss": 3.0811, "step": 7140 }, { "epoch": 2.2, "learning_rate": 9.213674718747442e-05, "loss": 3.1187, "step": 7141 }, { "epoch": 2.2, "learning_rate": 9.21011181282372e-05, "loss": 2.8269, "step": 7142 }, { "epoch": 2.2, "learning_rate": 9.206548906899996e-05, "loss": 2.9461, "step": 7143 }, { "epoch": 2.2, "learning_rate": 9.202986000976274e-05, "loss": 3.0701, "step": 7144 }, { "epoch": 2.21, "learning_rate": 9.199423095052551e-05, "loss": 2.6662, "step": 7145 }, { "epoch": 2.21, "learning_rate": 9.195860189128829e-05, "loss": 3.0339, "step": 7146 }, { "epoch": 2.21, "learning_rate": 9.192297283205106e-05, "loss": 3.0879, "step": 7147 }, { "epoch": 2.21, "learning_rate": 9.188734377281381e-05, "loss": 2.7355, "step": 7148 }, { "epoch": 2.21, "learning_rate": 9.18517147135766e-05, "loss": 2.7228, "step": 7149 }, { "epoch": 2.21, "learning_rate": 9.181608565433936e-05, "loss": 3.1554, "step": 7150 }, { "epoch": 2.21, "learning_rate": 9.178045659510215e-05, "loss": 2.656, "step": 7151 }, { "epoch": 2.21, "learning_rate": 9.17448275358649e-05, "loss": 2.7158, "step": 7152 }, { "epoch": 2.21, "learning_rate": 9.170919847662767e-05, "loss": 2.8427, "step": 7153 }, { "epoch": 2.21, "learning_rate": 9.167356941739045e-05, "loss": 2.6793, "step": 7154 }, { "epoch": 2.21, "learning_rate": 9.163794035815322e-05, "loss": 2.5299, "step": 7155 }, { "epoch": 2.21, "learning_rate": 9.160231129891599e-05, "loss": 2.7473, "step": 7156 }, { "epoch": 2.21, "learning_rate": 9.156668223967876e-05, "loss": 2.6903, "step": 7157 }, { "epoch": 2.21, "learning_rate": 9.153105318044154e-05, "loss": 2.5411, "step": 7158 }, { "epoch": 2.21, "learning_rate": 9.14954241212043e-05, "loss": 2.4399, "step": 7159 }, { "epoch": 2.21, "learning_rate": 9.145979506196706e-05, "loss": 2.2822, "step": 7160 }, { "epoch": 2.21, "learning_rate": 9.142416600272984e-05, "loss": 2.4888, "step": 7161 }, { "epoch": 2.21, "learning_rate": 9.138853694349261e-05, "loss": 2.2486, "step": 7162 }, { "epoch": 2.21, "learning_rate": 9.13529078842554e-05, "loss": 2.2903, "step": 7163 }, { "epoch": 2.21, "learning_rate": 9.131727882501816e-05, "loss": 2.2899, "step": 7164 }, { "epoch": 2.21, "learning_rate": 9.128164976578093e-05, "loss": 2.2444, "step": 7165 }, { "epoch": 2.21, "learning_rate": 9.12460207065437e-05, "loss": 2.097, "step": 7166 }, { "epoch": 2.21, "learning_rate": 9.121039164730648e-05, "loss": 2.0552, "step": 7167 }, { "epoch": 2.21, "learning_rate": 9.117476258806925e-05, "loss": 2.032, "step": 7168 }, { "epoch": 2.21, "learning_rate": 9.1139133528832e-05, "loss": 2.1937, "step": 7169 }, { "epoch": 2.21, "learning_rate": 9.110350446959479e-05, "loss": 1.9071, "step": 7170 }, { "epoch": 2.21, "learning_rate": 9.106787541035755e-05, "loss": 1.8828, "step": 7171 }, { "epoch": 2.21, "learning_rate": 9.103224635112034e-05, "loss": 1.7573, "step": 7172 }, { "epoch": 2.21, "learning_rate": 9.099661729188309e-05, "loss": 1.829, "step": 7173 }, { "epoch": 2.21, "learning_rate": 9.096098823264586e-05, "loss": 1.5917, "step": 7174 }, { "epoch": 2.21, "learning_rate": 9.092535917340864e-05, "loss": 1.664, "step": 7175 }, { "epoch": 2.21, "learning_rate": 9.088973011417141e-05, "loss": 1.6731, "step": 7176 }, { "epoch": 2.22, "learning_rate": 9.085410105493419e-05, "loss": 1.3628, "step": 7177 }, { "epoch": 2.22, "learning_rate": 9.081847199569695e-05, "loss": 1.3825, "step": 7178 }, { "epoch": 2.22, "learning_rate": 9.078284293645973e-05, "loss": 1.344, "step": 7179 }, { "epoch": 2.22, "learning_rate": 9.07472138772225e-05, "loss": 1.4166, "step": 7180 }, { "epoch": 2.22, "learning_rate": 9.071158481798528e-05, "loss": 3.9118, "step": 7181 }, { "epoch": 2.22, "learning_rate": 9.067595575874803e-05, "loss": 3.8231, "step": 7182 }, { "epoch": 2.22, "learning_rate": 9.06403266995108e-05, "loss": 3.2446, "step": 7183 }, { "epoch": 2.22, "learning_rate": 9.060469764027358e-05, "loss": 3.4876, "step": 7184 }, { "epoch": 2.22, "learning_rate": 9.056906858103635e-05, "loss": 3.3315, "step": 7185 }, { "epoch": 2.22, "learning_rate": 9.05334395217991e-05, "loss": 3.005, "step": 7186 }, { "epoch": 2.22, "learning_rate": 9.049781046256189e-05, "loss": 3.4032, "step": 7187 }, { "epoch": 2.22, "learning_rate": 9.046218140332466e-05, "loss": 3.1122, "step": 7188 }, { "epoch": 2.22, "learning_rate": 9.042655234408744e-05, "loss": 3.0031, "step": 7189 }, { "epoch": 2.22, "learning_rate": 9.039092328485019e-05, "loss": 2.7823, "step": 7190 }, { "epoch": 2.22, "learning_rate": 9.035529422561297e-05, "loss": 3.2138, "step": 7191 }, { "epoch": 2.22, "learning_rate": 9.031966516637574e-05, "loss": 3.2384, "step": 7192 }, { "epoch": 2.22, "learning_rate": 9.028403610713852e-05, "loss": 3.061, "step": 7193 }, { "epoch": 2.22, "learning_rate": 9.024840704790129e-05, "loss": 2.6907, "step": 7194 }, { "epoch": 2.22, "learning_rate": 9.021277798866405e-05, "loss": 2.8913, "step": 7195 }, { "epoch": 2.22, "learning_rate": 9.017714892942683e-05, "loss": 3.0697, "step": 7196 }, { "epoch": 2.22, "learning_rate": 9.01415198701896e-05, "loss": 2.7372, "step": 7197 }, { "epoch": 2.22, "learning_rate": 9.010589081095238e-05, "loss": 2.8258, "step": 7198 }, { "epoch": 2.22, "learning_rate": 9.007026175171513e-05, "loss": 2.8758, "step": 7199 }, { "epoch": 2.22, "learning_rate": 9.00346326924779e-05, "loss": 2.758, "step": 7200 }, { "epoch": 2.22, "learning_rate": 8.999900363324068e-05, "loss": 2.7042, "step": 7201 }, { "epoch": 2.22, "learning_rate": 8.996337457400345e-05, "loss": 2.8882, "step": 7202 }, { "epoch": 2.22, "learning_rate": 8.992774551476622e-05, "loss": 2.7637, "step": 7203 }, { "epoch": 2.22, "learning_rate": 8.989211645552899e-05, "loss": 2.5469, "step": 7204 }, { "epoch": 2.22, "learning_rate": 8.985648739629177e-05, "loss": 2.5556, "step": 7205 }, { "epoch": 2.22, "learning_rate": 8.982085833705454e-05, "loss": 2.4803, "step": 7206 }, { "epoch": 2.22, "learning_rate": 8.97852292778173e-05, "loss": 2.4158, "step": 7207 }, { "epoch": 2.22, "learning_rate": 8.974960021858008e-05, "loss": 2.4123, "step": 7208 }, { "epoch": 2.22, "learning_rate": 8.971397115934284e-05, "loss": 2.4978, "step": 7209 }, { "epoch": 2.23, "learning_rate": 8.967834210010563e-05, "loss": 2.4013, "step": 7210 }, { "epoch": 2.23, "learning_rate": 8.96427130408684e-05, "loss": 2.1052, "step": 7211 }, { "epoch": 2.23, "learning_rate": 8.960708398163116e-05, "loss": 2.4701, "step": 7212 }, { "epoch": 2.23, "learning_rate": 8.957145492239393e-05, "loss": 2.3424, "step": 7213 }, { "epoch": 2.23, "learning_rate": 8.953582586315671e-05, "loss": 2.2205, "step": 7214 }, { "epoch": 2.23, "learning_rate": 8.950019680391948e-05, "loss": 2.4749, "step": 7215 }, { "epoch": 2.23, "learning_rate": 8.946456774468224e-05, "loss": 2.1082, "step": 7216 }, { "epoch": 2.23, "learning_rate": 8.942893868544502e-05, "loss": 2.2423, "step": 7217 }, { "epoch": 2.23, "learning_rate": 8.939330962620779e-05, "loss": 2.1538, "step": 7218 }, { "epoch": 2.23, "learning_rate": 8.935768056697057e-05, "loss": 2.0366, "step": 7219 }, { "epoch": 2.23, "learning_rate": 8.932205150773332e-05, "loss": 2.0335, "step": 7220 }, { "epoch": 2.23, "learning_rate": 8.928642244849609e-05, "loss": 1.9583, "step": 7221 }, { "epoch": 2.23, "learning_rate": 8.925079338925887e-05, "loss": 1.911, "step": 7222 }, { "epoch": 2.23, "learning_rate": 8.921516433002164e-05, "loss": 1.8713, "step": 7223 }, { "epoch": 2.23, "learning_rate": 8.917953527078441e-05, "loss": 1.7365, "step": 7224 }, { "epoch": 2.23, "learning_rate": 8.914390621154718e-05, "loss": 1.6285, "step": 7225 }, { "epoch": 2.23, "learning_rate": 8.910827715230996e-05, "loss": 1.5177, "step": 7226 }, { "epoch": 2.23, "learning_rate": 8.907264809307273e-05, "loss": 1.6017, "step": 7227 }, { "epoch": 2.23, "learning_rate": 8.903701903383551e-05, "loss": 1.4585, "step": 7228 }, { "epoch": 2.23, "learning_rate": 8.900138997459827e-05, "loss": 1.5473, "step": 7229 }, { "epoch": 2.23, "learning_rate": 8.896576091536103e-05, "loss": 1.4081, "step": 7230 }, { "epoch": 2.23, "learning_rate": 8.893013185612382e-05, "loss": 4.0931, "step": 7231 }, { "epoch": 2.23, "learning_rate": 8.889450279688658e-05, "loss": 3.8113, "step": 7232 }, { "epoch": 2.23, "learning_rate": 8.885887373764934e-05, "loss": 3.5583, "step": 7233 }, { "epoch": 2.23, "learning_rate": 8.882324467841212e-05, "loss": 3.4705, "step": 7234 }, { "epoch": 2.23, "learning_rate": 8.878761561917489e-05, "loss": 3.2775, "step": 7235 }, { "epoch": 2.23, "learning_rate": 8.875198655993767e-05, "loss": 3.144, "step": 7236 }, { "epoch": 2.23, "learning_rate": 8.871635750070043e-05, "loss": 2.9541, "step": 7237 }, { "epoch": 2.23, "learning_rate": 8.868072844146321e-05, "loss": 3.1904, "step": 7238 }, { "epoch": 2.23, "learning_rate": 8.864509938222598e-05, "loss": 2.912, "step": 7239 }, { "epoch": 2.23, "learning_rate": 8.860947032298876e-05, "loss": 3.1277, "step": 7240 }, { "epoch": 2.23, "learning_rate": 8.857384126375151e-05, "loss": 2.9082, "step": 7241 }, { "epoch": 2.24, "learning_rate": 8.853821220451428e-05, "loss": 2.9693, "step": 7242 }, { "epoch": 2.24, "learning_rate": 8.850258314527706e-05, "loss": 2.671, "step": 7243 }, { "epoch": 2.24, "learning_rate": 8.846695408603983e-05, "loss": 2.9176, "step": 7244 }, { "epoch": 2.24, "learning_rate": 8.843132502680261e-05, "loss": 2.978, "step": 7245 }, { "epoch": 2.24, "learning_rate": 8.839569596756537e-05, "loss": 3.0011, "step": 7246 }, { "epoch": 2.24, "learning_rate": 8.836006690832814e-05, "loss": 2.8607, "step": 7247 }, { "epoch": 2.24, "learning_rate": 8.832443784909092e-05, "loss": 2.8081, "step": 7248 }, { "epoch": 2.24, "learning_rate": 8.82888087898537e-05, "loss": 2.7689, "step": 7249 }, { "epoch": 2.24, "learning_rate": 8.825317973061645e-05, "loss": 2.7566, "step": 7250 }, { "epoch": 2.24, "learning_rate": 8.821755067137922e-05, "loss": 2.6587, "step": 7251 }, { "epoch": 2.24, "learning_rate": 8.8181921612142e-05, "loss": 2.6488, "step": 7252 }, { "epoch": 2.24, "learning_rate": 8.814629255290477e-05, "loss": 2.662, "step": 7253 }, { "epoch": 2.24, "learning_rate": 8.811066349366753e-05, "loss": 2.7426, "step": 7254 }, { "epoch": 2.24, "learning_rate": 8.807503443443031e-05, "loss": 2.4835, "step": 7255 }, { "epoch": 2.24, "learning_rate": 8.803940537519308e-05, "loss": 2.4472, "step": 7256 }, { "epoch": 2.24, "learning_rate": 8.800377631595586e-05, "loss": 2.7407, "step": 7257 }, { "epoch": 2.24, "learning_rate": 8.796814725671861e-05, "loss": 2.4041, "step": 7258 }, { "epoch": 2.24, "learning_rate": 8.79325181974814e-05, "loss": 2.3973, "step": 7259 }, { "epoch": 2.24, "learning_rate": 8.789688913824416e-05, "loss": 2.194, "step": 7260 }, { "epoch": 2.24, "learning_rate": 8.786126007900695e-05, "loss": 2.3359, "step": 7261 }, { "epoch": 2.24, "learning_rate": 8.782563101976971e-05, "loss": 2.1763, "step": 7262 }, { "epoch": 2.24, "learning_rate": 8.779000196053247e-05, "loss": 2.3156, "step": 7263 }, { "epoch": 2.24, "learning_rate": 8.775437290129525e-05, "loss": 2.0597, "step": 7264 }, { "epoch": 2.24, "learning_rate": 8.771874384205802e-05, "loss": 2.2585, "step": 7265 }, { "epoch": 2.24, "learning_rate": 8.76831147828208e-05, "loss": 2.1648, "step": 7266 }, { "epoch": 2.24, "learning_rate": 8.764748572358356e-05, "loss": 2.3241, "step": 7267 }, { "epoch": 2.24, "learning_rate": 8.761185666434632e-05, "loss": 1.9216, "step": 7268 }, { "epoch": 2.24, "learning_rate": 8.75762276051091e-05, "loss": 1.7388, "step": 7269 }, { "epoch": 2.24, "learning_rate": 8.754059854587187e-05, "loss": 1.8373, "step": 7270 }, { "epoch": 2.24, "learning_rate": 8.750496948663464e-05, "loss": 1.7429, "step": 7271 }, { "epoch": 2.24, "learning_rate": 8.746934042739741e-05, "loss": 1.8981, "step": 7272 }, { "epoch": 2.24, "learning_rate": 8.74337113681602e-05, "loss": 1.8534, "step": 7273 }, { "epoch": 2.25, "learning_rate": 8.739808230892296e-05, "loss": 1.7233, "step": 7274 }, { "epoch": 2.25, "learning_rate": 8.736245324968574e-05, "loss": 1.5557, "step": 7275 }, { "epoch": 2.25, "learning_rate": 8.73268241904485e-05, "loss": 1.6298, "step": 7276 }, { "epoch": 2.25, "learning_rate": 8.729119513121127e-05, "loss": 1.4592, "step": 7277 }, { "epoch": 2.25, "learning_rate": 8.725556607197405e-05, "loss": 1.4525, "step": 7278 }, { "epoch": 2.25, "learning_rate": 8.721993701273682e-05, "loss": 1.4594, "step": 7279 }, { "epoch": 2.25, "learning_rate": 8.718430795349957e-05, "loss": 1.4226, "step": 7280 }, { "epoch": 2.25, "learning_rate": 8.714867889426235e-05, "loss": 3.9869, "step": 7281 }, { "epoch": 2.25, "learning_rate": 8.711304983502512e-05, "loss": 3.4118, "step": 7282 }, { "epoch": 2.25, "learning_rate": 8.70774207757879e-05, "loss": 3.3932, "step": 7283 }, { "epoch": 2.25, "learning_rate": 8.704179171655066e-05, "loss": 3.4529, "step": 7284 }, { "epoch": 2.25, "learning_rate": 8.700616265731344e-05, "loss": 3.1607, "step": 7285 }, { "epoch": 2.25, "learning_rate": 8.697053359807621e-05, "loss": 3.1739, "step": 7286 }, { "epoch": 2.25, "learning_rate": 8.693490453883899e-05, "loss": 3.0788, "step": 7287 }, { "epoch": 2.25, "learning_rate": 8.689927547960175e-05, "loss": 2.7633, "step": 7288 }, { "epoch": 2.25, "learning_rate": 8.686364642036451e-05, "loss": 2.9515, "step": 7289 }, { "epoch": 2.25, "learning_rate": 8.68280173611273e-05, "loss": 3.1472, "step": 7290 }, { "epoch": 2.25, "learning_rate": 8.679238830189006e-05, "loss": 2.8283, "step": 7291 }, { "epoch": 2.25, "learning_rate": 8.675675924265285e-05, "loss": 2.9223, "step": 7292 }, { "epoch": 2.25, "learning_rate": 8.67211301834156e-05, "loss": 2.7886, "step": 7293 }, { "epoch": 2.25, "learning_rate": 8.668550112417838e-05, "loss": 3.0067, "step": 7294 }, { "epoch": 2.25, "learning_rate": 8.664987206494115e-05, "loss": 2.9514, "step": 7295 }, { "epoch": 2.25, "learning_rate": 8.661424300570393e-05, "loss": 2.8021, "step": 7296 }, { "epoch": 2.25, "learning_rate": 8.657861394646669e-05, "loss": 2.8767, "step": 7297 }, { "epoch": 2.25, "learning_rate": 8.654298488722946e-05, "loss": 3.0472, "step": 7298 }, { "epoch": 2.25, "learning_rate": 8.650735582799224e-05, "loss": 2.7621, "step": 7299 }, { "epoch": 2.25, "learning_rate": 8.6471726768755e-05, "loss": 2.8706, "step": 7300 }, { "epoch": 2.25, "learning_rate": 8.643609770951776e-05, "loss": 2.6389, "step": 7301 }, { "epoch": 2.25, "learning_rate": 8.640046865028054e-05, "loss": 2.4966, "step": 7302 }, { "epoch": 2.25, "learning_rate": 8.636483959104331e-05, "loss": 2.8713, "step": 7303 }, { "epoch": 2.25, "learning_rate": 8.632921053180609e-05, "loss": 2.5554, "step": 7304 }, { "epoch": 2.25, "learning_rate": 8.629358147256885e-05, "loss": 2.5989, "step": 7305 }, { "epoch": 2.25, "learning_rate": 8.625795241333163e-05, "loss": 2.5358, "step": 7306 }, { "epoch": 2.26, "learning_rate": 8.62223233540944e-05, "loss": 2.4453, "step": 7307 }, { "epoch": 2.26, "learning_rate": 8.618669429485718e-05, "loss": 2.4131, "step": 7308 }, { "epoch": 2.26, "learning_rate": 8.615106523561995e-05, "loss": 2.5333, "step": 7309 }, { "epoch": 2.26, "learning_rate": 8.61154361763827e-05, "loss": 2.2447, "step": 7310 }, { "epoch": 2.26, "learning_rate": 8.607980711714548e-05, "loss": 2.236, "step": 7311 }, { "epoch": 2.26, "learning_rate": 8.604417805790825e-05, "loss": 2.3854, "step": 7312 }, { "epoch": 2.26, "learning_rate": 8.600854899867103e-05, "loss": 2.6612, "step": 7313 }, { "epoch": 2.26, "learning_rate": 8.597291993943379e-05, "loss": 2.3226, "step": 7314 }, { "epoch": 2.26, "learning_rate": 8.593729088019656e-05, "loss": 1.947, "step": 7315 }, { "epoch": 2.26, "learning_rate": 8.590166182095934e-05, "loss": 2.2354, "step": 7316 }, { "epoch": 2.26, "learning_rate": 8.586603276172211e-05, "loss": 2.1071, "step": 7317 }, { "epoch": 2.26, "learning_rate": 8.583040370248488e-05, "loss": 1.9427, "step": 7318 }, { "epoch": 2.26, "learning_rate": 8.579477464324764e-05, "loss": 1.86, "step": 7319 }, { "epoch": 2.26, "learning_rate": 8.575914558401043e-05, "loss": 2.1141, "step": 7320 }, { "epoch": 2.26, "learning_rate": 8.57235165247732e-05, "loss": 1.7861, "step": 7321 }, { "epoch": 2.26, "learning_rate": 8.568788746553595e-05, "loss": 1.6747, "step": 7322 }, { "epoch": 2.26, "learning_rate": 8.565225840629873e-05, "loss": 1.5317, "step": 7323 }, { "epoch": 2.26, "learning_rate": 8.56166293470615e-05, "loss": 1.4805, "step": 7324 }, { "epoch": 2.26, "learning_rate": 8.558100028782428e-05, "loss": 1.8469, "step": 7325 }, { "epoch": 2.26, "learning_rate": 8.554537122858705e-05, "loss": 1.4073, "step": 7326 }, { "epoch": 2.26, "learning_rate": 8.55097421693498e-05, "loss": 1.6262, "step": 7327 }, { "epoch": 2.26, "learning_rate": 8.547411311011259e-05, "loss": 1.5072, "step": 7328 }, { "epoch": 2.26, "learning_rate": 8.543848405087536e-05, "loss": 1.3857, "step": 7329 }, { "epoch": 2.26, "learning_rate": 8.540285499163814e-05, "loss": 1.2527, "step": 7330 }, { "epoch": 2.26, "learning_rate": 8.536722593240089e-05, "loss": 4.0039, "step": 7331 }, { "epoch": 2.26, "learning_rate": 8.533159687316367e-05, "loss": 3.4839, "step": 7332 }, { "epoch": 2.26, "learning_rate": 8.529596781392644e-05, "loss": 3.5989, "step": 7333 }, { "epoch": 2.26, "learning_rate": 8.526033875468922e-05, "loss": 3.4113, "step": 7334 }, { "epoch": 2.26, "learning_rate": 8.522470969545198e-05, "loss": 3.1882, "step": 7335 }, { "epoch": 2.26, "learning_rate": 8.518908063621475e-05, "loss": 3.107, "step": 7336 }, { "epoch": 2.26, "learning_rate": 8.515345157697753e-05, "loss": 3.115, "step": 7337 }, { "epoch": 2.26, "learning_rate": 8.51178225177403e-05, "loss": 3.3886, "step": 7338 }, { "epoch": 2.27, "learning_rate": 8.508219345850307e-05, "loss": 2.9071, "step": 7339 }, { "epoch": 2.27, "learning_rate": 8.504656439926583e-05, "loss": 3.2551, "step": 7340 }, { "epoch": 2.27, "learning_rate": 8.501093534002862e-05, "loss": 2.6899, "step": 7341 }, { "epoch": 2.27, "learning_rate": 8.497530628079138e-05, "loss": 2.6294, "step": 7342 }, { "epoch": 2.27, "learning_rate": 8.493967722155417e-05, "loss": 2.9224, "step": 7343 }, { "epoch": 2.27, "learning_rate": 8.490404816231692e-05, "loss": 2.8362, "step": 7344 }, { "epoch": 2.27, "learning_rate": 8.486841910307969e-05, "loss": 2.7795, "step": 7345 }, { "epoch": 2.27, "learning_rate": 8.483279004384247e-05, "loss": 3.1438, "step": 7346 }, { "epoch": 2.27, "learning_rate": 8.479716098460524e-05, "loss": 2.8074, "step": 7347 }, { "epoch": 2.27, "learning_rate": 8.4761531925368e-05, "loss": 2.8382, "step": 7348 }, { "epoch": 2.27, "learning_rate": 8.472590286613078e-05, "loss": 2.5524, "step": 7349 }, { "epoch": 2.27, "learning_rate": 8.469027380689354e-05, "loss": 2.6243, "step": 7350 }, { "epoch": 2.27, "learning_rate": 8.465464474765633e-05, "loss": 2.7082, "step": 7351 }, { "epoch": 2.27, "learning_rate": 8.461901568841908e-05, "loss": 2.581, "step": 7352 }, { "epoch": 2.27, "learning_rate": 8.458338662918186e-05, "loss": 2.839, "step": 7353 }, { "epoch": 2.27, "learning_rate": 8.454775756994463e-05, "loss": 2.4519, "step": 7354 }, { "epoch": 2.27, "learning_rate": 8.451212851070741e-05, "loss": 2.6922, "step": 7355 }, { "epoch": 2.27, "learning_rate": 8.447649945147017e-05, "loss": 2.4649, "step": 7356 }, { "epoch": 2.27, "learning_rate": 8.444087039223294e-05, "loss": 2.6324, "step": 7357 }, { "epoch": 2.27, "learning_rate": 8.440524133299572e-05, "loss": 2.5245, "step": 7358 }, { "epoch": 2.27, "learning_rate": 8.436961227375849e-05, "loss": 2.3805, "step": 7359 }, { "epoch": 2.27, "learning_rate": 8.433398321452127e-05, "loss": 2.2849, "step": 7360 }, { "epoch": 2.27, "learning_rate": 8.429835415528402e-05, "loss": 2.45, "step": 7361 }, { "epoch": 2.27, "learning_rate": 8.426272509604679e-05, "loss": 2.3264, "step": 7362 }, { "epoch": 2.27, "learning_rate": 8.422709603680957e-05, "loss": 2.3016, "step": 7363 }, { "epoch": 2.27, "learning_rate": 8.419146697757234e-05, "loss": 2.2555, "step": 7364 }, { "epoch": 2.27, "learning_rate": 8.415583791833511e-05, "loss": 2.1756, "step": 7365 }, { "epoch": 2.27, "learning_rate": 8.412020885909788e-05, "loss": 1.9768, "step": 7366 }, { "epoch": 2.27, "learning_rate": 8.408457979986066e-05, "loss": 1.9893, "step": 7367 }, { "epoch": 2.27, "learning_rate": 8.404895074062343e-05, "loss": 1.945, "step": 7368 }, { "epoch": 2.27, "learning_rate": 8.401332168138618e-05, "loss": 1.8732, "step": 7369 }, { "epoch": 2.27, "learning_rate": 8.397769262214896e-05, "loss": 1.8894, "step": 7370 }, { "epoch": 2.27, "learning_rate": 8.394206356291173e-05, "loss": 2.0709, "step": 7371 }, { "epoch": 2.28, "learning_rate": 8.390643450367451e-05, "loss": 1.8056, "step": 7372 }, { "epoch": 2.28, "learning_rate": 8.387080544443728e-05, "loss": 1.7692, "step": 7373 }, { "epoch": 2.28, "learning_rate": 8.383517638520004e-05, "loss": 1.6711, "step": 7374 }, { "epoch": 2.28, "learning_rate": 8.379954732596282e-05, "loss": 1.5355, "step": 7375 }, { "epoch": 2.28, "learning_rate": 8.376391826672559e-05, "loss": 1.5167, "step": 7376 }, { "epoch": 2.28, "learning_rate": 8.372828920748837e-05, "loss": 1.3761, "step": 7377 }, { "epoch": 2.28, "learning_rate": 8.369266014825112e-05, "loss": 1.5712, "step": 7378 }, { "epoch": 2.28, "learning_rate": 8.36570310890139e-05, "loss": 1.383, "step": 7379 }, { "epoch": 2.28, "learning_rate": 8.362140202977667e-05, "loss": 1.3392, "step": 7380 }, { "epoch": 2.28, "learning_rate": 8.358577297053946e-05, "loss": 4.1555, "step": 7381 }, { "epoch": 2.28, "learning_rate": 8.355014391130221e-05, "loss": 3.5113, "step": 7382 }, { "epoch": 2.28, "learning_rate": 8.351451485206498e-05, "loss": 3.7362, "step": 7383 }, { "epoch": 2.28, "learning_rate": 8.347888579282776e-05, "loss": 3.3676, "step": 7384 }, { "epoch": 2.28, "learning_rate": 8.344325673359053e-05, "loss": 3.4055, "step": 7385 }, { "epoch": 2.28, "learning_rate": 8.34076276743533e-05, "loss": 3.2574, "step": 7386 }, { "epoch": 2.28, "learning_rate": 8.337199861511607e-05, "loss": 3.0435, "step": 7387 }, { "epoch": 2.28, "learning_rate": 8.333636955587885e-05, "loss": 3.2044, "step": 7388 }, { "epoch": 2.28, "learning_rate": 8.330074049664162e-05, "loss": 3.1019, "step": 7389 }, { "epoch": 2.28, "learning_rate": 8.32651114374044e-05, "loss": 3.0177, "step": 7390 }, { "epoch": 2.28, "learning_rate": 8.322948237816715e-05, "loss": 2.835, "step": 7391 }, { "epoch": 2.28, "learning_rate": 8.319385331892992e-05, "loss": 2.9252, "step": 7392 }, { "epoch": 2.28, "learning_rate": 8.31582242596927e-05, "loss": 2.8171, "step": 7393 }, { "epoch": 2.28, "learning_rate": 8.312259520045547e-05, "loss": 2.7037, "step": 7394 }, { "epoch": 2.28, "learning_rate": 8.308696614121823e-05, "loss": 3.0213, "step": 7395 }, { "epoch": 2.28, "learning_rate": 8.305133708198101e-05, "loss": 2.8775, "step": 7396 }, { "epoch": 2.28, "learning_rate": 8.301570802274378e-05, "loss": 2.7093, "step": 7397 }, { "epoch": 2.28, "learning_rate": 8.298007896350656e-05, "loss": 2.9679, "step": 7398 }, { "epoch": 2.28, "learning_rate": 8.294444990426931e-05, "loss": 2.7833, "step": 7399 }, { "epoch": 2.28, "learning_rate": 8.29088208450321e-05, "loss": 2.8266, "step": 7400 }, { "epoch": 2.28, "learning_rate": 8.287319178579486e-05, "loss": 2.68, "step": 7401 }, { "epoch": 2.28, "learning_rate": 8.283756272655765e-05, "loss": 2.5568, "step": 7402 }, { "epoch": 2.28, "learning_rate": 8.28019336673204e-05, "loss": 2.9066, "step": 7403 }, { "epoch": 2.29, "learning_rate": 8.276630460808317e-05, "loss": 2.3456, "step": 7404 }, { "epoch": 2.29, "learning_rate": 8.273067554884595e-05, "loss": 2.3838, "step": 7405 }, { "epoch": 2.29, "learning_rate": 8.269504648960872e-05, "loss": 2.5798, "step": 7406 }, { "epoch": 2.29, "learning_rate": 8.26594174303715e-05, "loss": 2.5223, "step": 7407 }, { "epoch": 2.29, "learning_rate": 8.262378837113426e-05, "loss": 2.4106, "step": 7408 }, { "epoch": 2.29, "learning_rate": 8.258815931189702e-05, "loss": 2.3584, "step": 7409 }, { "epoch": 2.29, "learning_rate": 8.25525302526598e-05, "loss": 2.1321, "step": 7410 }, { "epoch": 2.29, "learning_rate": 8.251690119342257e-05, "loss": 2.2294, "step": 7411 }, { "epoch": 2.29, "learning_rate": 8.248127213418534e-05, "loss": 2.4526, "step": 7412 }, { "epoch": 2.29, "learning_rate": 8.244564307494811e-05, "loss": 1.9779, "step": 7413 }, { "epoch": 2.29, "learning_rate": 8.241001401571089e-05, "loss": 1.9256, "step": 7414 }, { "epoch": 2.29, "learning_rate": 8.237438495647366e-05, "loss": 2.045, "step": 7415 }, { "epoch": 2.29, "learning_rate": 8.233875589723642e-05, "loss": 1.9307, "step": 7416 }, { "epoch": 2.29, "learning_rate": 8.23031268379992e-05, "loss": 1.8251, "step": 7417 }, { "epoch": 2.29, "learning_rate": 8.226749777876197e-05, "loss": 1.8073, "step": 7418 }, { "epoch": 2.29, "learning_rate": 8.223186871952475e-05, "loss": 2.0955, "step": 7419 }, { "epoch": 2.29, "learning_rate": 8.21962396602875e-05, "loss": 1.8342, "step": 7420 }, { "epoch": 2.29, "learning_rate": 8.216061060105027e-05, "loss": 2.1738, "step": 7421 }, { "epoch": 2.29, "learning_rate": 8.212498154181305e-05, "loss": 1.8269, "step": 7422 }, { "epoch": 2.29, "learning_rate": 8.208935248257583e-05, "loss": 1.6411, "step": 7423 }, { "epoch": 2.29, "learning_rate": 8.205372342333859e-05, "loss": 1.8276, "step": 7424 }, { "epoch": 2.29, "learning_rate": 8.201809436410137e-05, "loss": 1.689, "step": 7425 }, { "epoch": 2.29, "learning_rate": 8.198246530486414e-05, "loss": 1.2682, "step": 7426 }, { "epoch": 2.29, "learning_rate": 8.194683624562691e-05, "loss": 1.3829, "step": 7427 }, { "epoch": 2.29, "learning_rate": 8.191120718638968e-05, "loss": 1.411, "step": 7428 }, { "epoch": 2.29, "learning_rate": 8.187557812715246e-05, "loss": 1.3233, "step": 7429 }, { "epoch": 2.29, "learning_rate": 8.183994906791521e-05, "loss": 1.3459, "step": 7430 }, { "epoch": 2.29, "learning_rate": 8.1804320008678e-05, "loss": 3.9149, "step": 7431 }, { "epoch": 2.29, "learning_rate": 8.176869094944076e-05, "loss": 3.5717, "step": 7432 }, { "epoch": 2.29, "learning_rate": 8.173306189020353e-05, "loss": 3.6903, "step": 7433 }, { "epoch": 2.29, "learning_rate": 8.16974328309663e-05, "loss": 3.0654, "step": 7434 }, { "epoch": 2.29, "learning_rate": 8.166180377172908e-05, "loss": 3.2325, "step": 7435 }, { "epoch": 2.3, "learning_rate": 8.162617471249184e-05, "loss": 2.9976, "step": 7436 }, { "epoch": 2.3, "learning_rate": 8.159054565325462e-05, "loss": 3.003, "step": 7437 }, { "epoch": 2.3, "learning_rate": 8.155491659401739e-05, "loss": 2.9751, "step": 7438 }, { "epoch": 2.3, "learning_rate": 8.151928753478016e-05, "loss": 2.8654, "step": 7439 }, { "epoch": 2.3, "learning_rate": 8.148365847554294e-05, "loss": 2.9618, "step": 7440 }, { "epoch": 2.3, "learning_rate": 8.14480294163057e-05, "loss": 2.7485, "step": 7441 }, { "epoch": 2.3, "learning_rate": 8.141240035706847e-05, "loss": 2.8901, "step": 7442 }, { "epoch": 2.3, "learning_rate": 8.137677129783124e-05, "loss": 2.7413, "step": 7443 }, { "epoch": 2.3, "learning_rate": 8.134114223859401e-05, "loss": 2.9215, "step": 7444 }, { "epoch": 2.3, "learning_rate": 8.130551317935678e-05, "loss": 2.7968, "step": 7445 }, { "epoch": 2.3, "learning_rate": 8.126988412011956e-05, "loss": 3.1416, "step": 7446 }, { "epoch": 2.3, "learning_rate": 8.123425506088233e-05, "loss": 2.7434, "step": 7447 }, { "epoch": 2.3, "learning_rate": 8.11986260016451e-05, "loss": 2.6512, "step": 7448 }, { "epoch": 2.3, "learning_rate": 8.116299694240787e-05, "loss": 2.7086, "step": 7449 }, { "epoch": 2.3, "learning_rate": 8.112736788317063e-05, "loss": 2.5756, "step": 7450 }, { "epoch": 2.3, "learning_rate": 8.10917388239334e-05, "loss": 2.7522, "step": 7451 }, { "epoch": 2.3, "learning_rate": 8.105610976469618e-05, "loss": 2.8828, "step": 7452 }, { "epoch": 2.3, "learning_rate": 8.102048070545895e-05, "loss": 2.5501, "step": 7453 }, { "epoch": 2.3, "learning_rate": 8.098485164622172e-05, "loss": 2.5649, "step": 7454 }, { "epoch": 2.3, "learning_rate": 8.09492225869845e-05, "loss": 2.4903, "step": 7455 }, { "epoch": 2.3, "learning_rate": 8.091359352774726e-05, "loss": 2.483, "step": 7456 }, { "epoch": 2.3, "learning_rate": 8.087796446851004e-05, "loss": 2.5665, "step": 7457 }, { "epoch": 2.3, "learning_rate": 8.084233540927281e-05, "loss": 2.5078, "step": 7458 }, { "epoch": 2.3, "learning_rate": 8.080670635003558e-05, "loss": 2.4216, "step": 7459 }, { "epoch": 2.3, "learning_rate": 8.077107729079834e-05, "loss": 2.4534, "step": 7460 }, { "epoch": 2.3, "learning_rate": 8.073544823156113e-05, "loss": 2.2759, "step": 7461 }, { "epoch": 2.3, "learning_rate": 8.069981917232388e-05, "loss": 2.3877, "step": 7462 }, { "epoch": 2.3, "learning_rate": 8.066419011308666e-05, "loss": 2.3674, "step": 7463 }, { "epoch": 2.3, "learning_rate": 8.062856105384943e-05, "loss": 2.2703, "step": 7464 }, { "epoch": 2.3, "learning_rate": 8.05929319946122e-05, "loss": 2.2484, "step": 7465 }, { "epoch": 2.3, "learning_rate": 8.055730293537497e-05, "loss": 1.9137, "step": 7466 }, { "epoch": 2.3, "learning_rate": 8.052167387613775e-05, "loss": 2.0042, "step": 7467 }, { "epoch": 2.3, "learning_rate": 8.048604481690052e-05, "loss": 1.8161, "step": 7468 }, { "epoch": 2.31, "learning_rate": 8.045041575766329e-05, "loss": 1.7166, "step": 7469 }, { "epoch": 2.31, "learning_rate": 8.041478669842605e-05, "loss": 1.9269, "step": 7470 }, { "epoch": 2.31, "learning_rate": 8.037915763918882e-05, "loss": 1.9599, "step": 7471 }, { "epoch": 2.31, "learning_rate": 8.03435285799516e-05, "loss": 1.7545, "step": 7472 }, { "epoch": 2.31, "learning_rate": 8.030789952071437e-05, "loss": 1.6191, "step": 7473 }, { "epoch": 2.31, "learning_rate": 8.027227046147714e-05, "loss": 1.7029, "step": 7474 }, { "epoch": 2.31, "learning_rate": 8.023664140223991e-05, "loss": 1.4696, "step": 7475 }, { "epoch": 2.31, "learning_rate": 8.020101234300269e-05, "loss": 1.3825, "step": 7476 }, { "epoch": 2.31, "learning_rate": 8.016538328376545e-05, "loss": 1.6883, "step": 7477 }, { "epoch": 2.31, "learning_rate": 8.012975422452823e-05, "loss": 1.5452, "step": 7478 }, { "epoch": 2.31, "learning_rate": 8.0094125165291e-05, "loss": 1.2535, "step": 7479 }, { "epoch": 2.31, "learning_rate": 8.005849610605376e-05, "loss": 1.3116, "step": 7480 }, { "epoch": 2.31, "learning_rate": 8.002286704681653e-05, "loss": 3.9622, "step": 7481 }, { "epoch": 2.31, "learning_rate": 7.998723798757931e-05, "loss": 3.4976, "step": 7482 }, { "epoch": 2.31, "learning_rate": 7.995160892834207e-05, "loss": 3.4168, "step": 7483 }, { "epoch": 2.31, "learning_rate": 7.991597986910485e-05, "loss": 3.5935, "step": 7484 }, { "epoch": 2.31, "learning_rate": 7.988035080986762e-05, "loss": 2.8975, "step": 7485 }, { "epoch": 2.31, "learning_rate": 7.984472175063039e-05, "loss": 3.2137, "step": 7486 }, { "epoch": 2.31, "learning_rate": 7.980909269139316e-05, "loss": 2.8585, "step": 7487 }, { "epoch": 2.31, "learning_rate": 7.977346363215594e-05, "loss": 3.054, "step": 7488 }, { "epoch": 2.31, "learning_rate": 7.973783457291871e-05, "loss": 2.844, "step": 7489 }, { "epoch": 2.31, "learning_rate": 7.970220551368148e-05, "loss": 2.7525, "step": 7490 }, { "epoch": 2.31, "learning_rate": 7.966657645444424e-05, "loss": 2.9564, "step": 7491 }, { "epoch": 2.31, "learning_rate": 7.963094739520701e-05, "loss": 3.0422, "step": 7492 }, { "epoch": 2.31, "learning_rate": 7.95953183359698e-05, "loss": 3.0033, "step": 7493 }, { "epoch": 2.31, "learning_rate": 7.955968927673256e-05, "loss": 2.9347, "step": 7494 }, { "epoch": 2.31, "learning_rate": 7.952406021749533e-05, "loss": 2.7911, "step": 7495 }, { "epoch": 2.31, "learning_rate": 7.94884311582581e-05, "loss": 2.7826, "step": 7496 }, { "epoch": 2.31, "learning_rate": 7.945280209902087e-05, "loss": 2.7779, "step": 7497 }, { "epoch": 2.31, "learning_rate": 7.941717303978364e-05, "loss": 2.7568, "step": 7498 }, { "epoch": 2.31, "learning_rate": 7.938154398054642e-05, "loss": 2.8117, "step": 7499 }, { "epoch": 2.31, "learning_rate": 7.934591492130919e-05, "loss": 2.4924, "step": 7500 }, { "epoch": 2.31, "eval_bleu": 2.1186204163334704e-13, "eval_loss": 3.8835675716400146, "eval_runtime": 2570.3045, "eval_samples_per_second": 5.743, "eval_steps_per_second": 0.718, "step": 7500 }, { "epoch": 2.32, "learning_rate": 7.931028586207195e-05, "loss": 3.1026, "step": 7501 }, { "epoch": 2.32, "learning_rate": 7.927465680283472e-05, "loss": 2.5167, "step": 7502 }, { "epoch": 2.32, "learning_rate": 7.923902774359749e-05, "loss": 2.7082, "step": 7503 }, { "epoch": 2.32, "learning_rate": 7.920339868436027e-05, "loss": 2.4041, "step": 7504 }, { "epoch": 2.32, "learning_rate": 7.916776962512304e-05, "loss": 2.4252, "step": 7505 }, { "epoch": 2.32, "learning_rate": 7.913214056588581e-05, "loss": 2.3165, "step": 7506 }, { "epoch": 2.32, "learning_rate": 7.909651150664858e-05, "loss": 2.5555, "step": 7507 }, { "epoch": 2.32, "learning_rate": 7.906088244741136e-05, "loss": 2.3461, "step": 7508 }, { "epoch": 2.32, "learning_rate": 7.902525338817411e-05, "loss": 2.3082, "step": 7509 }, { "epoch": 2.32, "learning_rate": 7.89896243289369e-05, "loss": 2.6328, "step": 7510 }, { "epoch": 2.32, "learning_rate": 7.895399526969966e-05, "loss": 2.4581, "step": 7511 }, { "epoch": 2.32, "learning_rate": 7.891836621046243e-05, "loss": 2.2054, "step": 7512 }, { "epoch": 2.32, "learning_rate": 7.88827371512252e-05, "loss": 2.0143, "step": 7513 }, { "epoch": 2.32, "learning_rate": 7.884710809198798e-05, "loss": 2.1773, "step": 7514 }, { "epoch": 2.32, "learning_rate": 7.881147903275075e-05, "loss": 2.0321, "step": 7515 }, { "epoch": 2.32, "learning_rate": 7.877584997351352e-05, "loss": 1.9164, "step": 7516 }, { "epoch": 2.32, "learning_rate": 7.874022091427629e-05, "loss": 2.115, "step": 7517 }, { "epoch": 2.32, "learning_rate": 7.870459185503906e-05, "loss": 1.8817, "step": 7518 }, { "epoch": 2.32, "learning_rate": 7.866896279580182e-05, "loss": 1.858, "step": 7519 }, { "epoch": 2.32, "learning_rate": 7.86333337365646e-05, "loss": 1.7912, "step": 7520 }, { "epoch": 2.32, "learning_rate": 7.859770467732737e-05, "loss": 1.5856, "step": 7521 }, { "epoch": 2.32, "learning_rate": 7.856207561809014e-05, "loss": 1.7842, "step": 7522 }, { "epoch": 2.32, "learning_rate": 7.852644655885292e-05, "loss": 1.8922, "step": 7523 }, { "epoch": 2.32, "learning_rate": 7.849081749961568e-05, "loss": 1.735, "step": 7524 }, { "epoch": 2.32, "learning_rate": 7.845518844037846e-05, "loss": 1.5129, "step": 7525 }, { "epoch": 2.32, "learning_rate": 7.841955938114123e-05, "loss": 1.345, "step": 7526 }, { "epoch": 2.32, "learning_rate": 7.8383930321904e-05, "loss": 1.352, "step": 7527 }, { "epoch": 2.32, "learning_rate": 7.834830126266677e-05, "loss": 1.2819, "step": 7528 }, { "epoch": 2.32, "learning_rate": 7.831267220342955e-05, "loss": 1.3264, "step": 7529 }, { "epoch": 2.32, "learning_rate": 7.82770431441923e-05, "loss": 1.357, "step": 7530 }, { "epoch": 2.32, "learning_rate": 7.824141408495508e-05, "loss": 3.7947, "step": 7531 }, { "epoch": 2.32, "learning_rate": 7.820578502571785e-05, "loss": 3.7291, "step": 7532 }, { "epoch": 2.32, "learning_rate": 7.817015596648062e-05, "loss": 3.3865, "step": 7533 }, { "epoch": 2.33, "learning_rate": 7.813452690724339e-05, "loss": 3.3081, "step": 7534 }, { "epoch": 2.33, "learning_rate": 7.809889784800617e-05, "loss": 3.1611, "step": 7535 }, { "epoch": 2.33, "learning_rate": 7.806326878876893e-05, "loss": 2.9382, "step": 7536 }, { "epoch": 2.33, "learning_rate": 7.802763972953171e-05, "loss": 3.0917, "step": 7537 }, { "epoch": 2.33, "learning_rate": 7.799201067029448e-05, "loss": 3.0087, "step": 7538 }, { "epoch": 2.33, "learning_rate": 7.795638161105724e-05, "loss": 3.0693, "step": 7539 }, { "epoch": 2.33, "learning_rate": 7.792075255182003e-05, "loss": 2.8523, "step": 7540 }, { "epoch": 2.33, "learning_rate": 7.78851234925828e-05, "loss": 2.8185, "step": 7541 }, { "epoch": 2.33, "learning_rate": 7.784949443334556e-05, "loss": 3.1713, "step": 7542 }, { "epoch": 2.33, "learning_rate": 7.781386537410833e-05, "loss": 2.941, "step": 7543 }, { "epoch": 2.33, "learning_rate": 7.77782363148711e-05, "loss": 2.678, "step": 7544 }, { "epoch": 2.33, "learning_rate": 7.774260725563387e-05, "loss": 2.5384, "step": 7545 }, { "epoch": 2.33, "learning_rate": 7.770697819639665e-05, "loss": 2.5304, "step": 7546 }, { "epoch": 2.33, "learning_rate": 7.767134913715942e-05, "loss": 2.6411, "step": 7547 }, { "epoch": 2.33, "learning_rate": 7.763572007792219e-05, "loss": 2.7365, "step": 7548 }, { "epoch": 2.33, "learning_rate": 7.760009101868496e-05, "loss": 2.6854, "step": 7549 }, { "epoch": 2.33, "learning_rate": 7.756446195944772e-05, "loss": 2.8272, "step": 7550 }, { "epoch": 2.33, "learning_rate": 7.752883290021049e-05, "loss": 2.5442, "step": 7551 }, { "epoch": 2.33, "learning_rate": 7.749320384097327e-05, "loss": 2.4563, "step": 7552 }, { "epoch": 2.33, "learning_rate": 7.745757478173604e-05, "loss": 2.5408, "step": 7553 }, { "epoch": 2.33, "learning_rate": 7.742194572249881e-05, "loss": 2.5569, "step": 7554 }, { "epoch": 2.33, "learning_rate": 7.738631666326159e-05, "loss": 2.4231, "step": 7555 }, { "epoch": 2.33, "learning_rate": 7.735068760402436e-05, "loss": 2.3596, "step": 7556 }, { "epoch": 2.33, "learning_rate": 7.731505854478713e-05, "loss": 2.1234, "step": 7557 }, { "epoch": 2.33, "learning_rate": 7.72794294855499e-05, "loss": 2.3171, "step": 7558 }, { "epoch": 2.33, "learning_rate": 7.724380042631267e-05, "loss": 2.5613, "step": 7559 }, { "epoch": 2.33, "learning_rate": 7.720817136707543e-05, "loss": 2.3768, "step": 7560 }, { "epoch": 2.33, "learning_rate": 7.717254230783822e-05, "loss": 2.0545, "step": 7561 }, { "epoch": 2.33, "learning_rate": 7.713691324860098e-05, "loss": 2.2502, "step": 7562 }, { "epoch": 2.33, "learning_rate": 7.710128418936375e-05, "loss": 2.2215, "step": 7563 }, { "epoch": 2.33, "learning_rate": 7.706565513012652e-05, "loss": 2.0275, "step": 7564 }, { "epoch": 2.33, "learning_rate": 7.703002607088929e-05, "loss": 2.0886, "step": 7565 }, { "epoch": 2.34, "learning_rate": 7.699439701165206e-05, "loss": 1.8885, "step": 7566 }, { "epoch": 2.34, "learning_rate": 7.695876795241484e-05, "loss": 1.8684, "step": 7567 }, { "epoch": 2.34, "learning_rate": 7.692313889317761e-05, "loss": 2.0401, "step": 7568 }, { "epoch": 2.34, "learning_rate": 7.688750983394038e-05, "loss": 1.6281, "step": 7569 }, { "epoch": 2.34, "learning_rate": 7.685188077470316e-05, "loss": 1.6126, "step": 7570 }, { "epoch": 2.34, "learning_rate": 7.681625171546591e-05, "loss": 1.6012, "step": 7571 }, { "epoch": 2.34, "learning_rate": 7.67806226562287e-05, "loss": 1.8957, "step": 7572 }, { "epoch": 2.34, "learning_rate": 7.674499359699146e-05, "loss": 1.5524, "step": 7573 }, { "epoch": 2.34, "learning_rate": 7.670936453775423e-05, "loss": 1.5746, "step": 7574 }, { "epoch": 2.34, "learning_rate": 7.6673735478517e-05, "loss": 1.4824, "step": 7575 }, { "epoch": 2.34, "learning_rate": 7.663810641927978e-05, "loss": 1.3893, "step": 7576 }, { "epoch": 2.34, "learning_rate": 7.660247736004254e-05, "loss": 1.4933, "step": 7577 }, { "epoch": 2.34, "learning_rate": 7.656684830080532e-05, "loss": 1.2601, "step": 7578 }, { "epoch": 2.34, "learning_rate": 7.653121924156809e-05, "loss": 1.4801, "step": 7579 }, { "epoch": 2.34, "learning_rate": 7.649559018233085e-05, "loss": 1.4058, "step": 7580 }, { "epoch": 2.34, "learning_rate": 7.645996112309362e-05, "loss": 3.7893, "step": 7581 }, { "epoch": 2.34, "learning_rate": 7.64243320638564e-05, "loss": 3.3854, "step": 7582 }, { "epoch": 2.34, "learning_rate": 7.638870300461916e-05, "loss": 3.6847, "step": 7583 }, { "epoch": 2.34, "learning_rate": 7.635307394538194e-05, "loss": 3.3516, "step": 7584 }, { "epoch": 2.34, "learning_rate": 7.631744488614471e-05, "loss": 2.8726, "step": 7585 }, { "epoch": 2.34, "learning_rate": 7.628181582690748e-05, "loss": 2.9758, "step": 7586 }, { "epoch": 2.34, "learning_rate": 7.624618676767026e-05, "loss": 3.2309, "step": 7587 }, { "epoch": 2.34, "learning_rate": 7.621055770843303e-05, "loss": 2.7325, "step": 7588 }, { "epoch": 2.34, "learning_rate": 7.61749286491958e-05, "loss": 2.9371, "step": 7589 }, { "epoch": 2.34, "learning_rate": 7.613929958995856e-05, "loss": 2.9833, "step": 7590 }, { "epoch": 2.34, "learning_rate": 7.610367053072133e-05, "loss": 2.7978, "step": 7591 }, { "epoch": 2.34, "learning_rate": 7.60680414714841e-05, "loss": 2.7741, "step": 7592 }, { "epoch": 2.34, "learning_rate": 7.603241241224688e-05, "loss": 2.8195, "step": 7593 }, { "epoch": 2.34, "learning_rate": 7.599678335300965e-05, "loss": 2.7497, "step": 7594 }, { "epoch": 2.34, "learning_rate": 7.596115429377242e-05, "loss": 2.7129, "step": 7595 }, { "epoch": 2.34, "learning_rate": 7.592552523453519e-05, "loss": 2.5611, "step": 7596 }, { "epoch": 2.34, "learning_rate": 7.588989617529797e-05, "loss": 2.6992, "step": 7597 }, { "epoch": 2.35, "learning_rate": 7.585426711606073e-05, "loss": 2.6889, "step": 7598 }, { "epoch": 2.35, "learning_rate": 7.581863805682351e-05, "loss": 2.4659, "step": 7599 }, { "epoch": 2.35, "learning_rate": 7.578300899758628e-05, "loss": 2.5871, "step": 7600 }, { "epoch": 2.35, "learning_rate": 7.574737993834904e-05, "loss": 2.5718, "step": 7601 }, { "epoch": 2.35, "learning_rate": 7.571175087911183e-05, "loss": 2.4882, "step": 7602 }, { "epoch": 2.35, "learning_rate": 7.56761218198746e-05, "loss": 2.2575, "step": 7603 }, { "epoch": 2.35, "learning_rate": 7.564049276063736e-05, "loss": 2.6308, "step": 7604 }, { "epoch": 2.35, "learning_rate": 7.560486370140013e-05, "loss": 2.3178, "step": 7605 }, { "epoch": 2.35, "learning_rate": 7.55692346421629e-05, "loss": 2.4872, "step": 7606 }, { "epoch": 2.35, "learning_rate": 7.553360558292567e-05, "loss": 2.1628, "step": 7607 }, { "epoch": 2.35, "learning_rate": 7.549797652368845e-05, "loss": 2.392, "step": 7608 }, { "epoch": 2.35, "learning_rate": 7.546234746445122e-05, "loss": 2.3006, "step": 7609 }, { "epoch": 2.35, "learning_rate": 7.542671840521399e-05, "loss": 2.2781, "step": 7610 }, { "epoch": 2.35, "learning_rate": 7.539108934597675e-05, "loss": 2.2025, "step": 7611 }, { "epoch": 2.35, "learning_rate": 7.535546028673952e-05, "loss": 1.9751, "step": 7612 }, { "epoch": 2.35, "learning_rate": 7.531983122750229e-05, "loss": 2.0512, "step": 7613 }, { "epoch": 2.35, "learning_rate": 7.528420216826507e-05, "loss": 1.9669, "step": 7614 }, { "epoch": 2.35, "learning_rate": 7.524857310902784e-05, "loss": 2.0624, "step": 7615 }, { "epoch": 2.35, "learning_rate": 7.521294404979061e-05, "loss": 1.8951, "step": 7616 }, { "epoch": 2.35, "learning_rate": 7.517731499055338e-05, "loss": 1.9394, "step": 7617 }, { "epoch": 2.35, "learning_rate": 7.514168593131615e-05, "loss": 1.8421, "step": 7618 }, { "epoch": 2.35, "learning_rate": 7.510605687207893e-05, "loss": 1.739, "step": 7619 }, { "epoch": 2.35, "learning_rate": 7.50704278128417e-05, "loss": 1.9675, "step": 7620 }, { "epoch": 2.35, "learning_rate": 7.503479875360446e-05, "loss": 1.7248, "step": 7621 }, { "epoch": 2.35, "learning_rate": 7.499916969436723e-05, "loss": 1.8458, "step": 7622 }, { "epoch": 2.35, "learning_rate": 7.496354063513001e-05, "loss": 1.736, "step": 7623 }, { "epoch": 2.35, "learning_rate": 7.492791157589277e-05, "loss": 1.6196, "step": 7624 }, { "epoch": 2.35, "learning_rate": 7.489228251665555e-05, "loss": 1.6609, "step": 7625 }, { "epoch": 2.35, "learning_rate": 7.485665345741832e-05, "loss": 1.4505, "step": 7626 }, { "epoch": 2.35, "learning_rate": 7.482102439818109e-05, "loss": 1.4612, "step": 7627 }, { "epoch": 2.35, "learning_rate": 7.478539533894386e-05, "loss": 1.3866, "step": 7628 }, { "epoch": 2.35, "learning_rate": 7.474976627970664e-05, "loss": 1.2129, "step": 7629 }, { "epoch": 2.35, "learning_rate": 7.471413722046939e-05, "loss": 1.45, "step": 7630 }, { "epoch": 2.36, "learning_rate": 7.467850816123217e-05, "loss": 4.2251, "step": 7631 }, { "epoch": 2.36, "learning_rate": 7.464287910199494e-05, "loss": 3.6898, "step": 7632 }, { "epoch": 2.36, "learning_rate": 7.460725004275771e-05, "loss": 3.4566, "step": 7633 }, { "epoch": 2.36, "learning_rate": 7.457162098352048e-05, "loss": 2.9826, "step": 7634 }, { "epoch": 2.36, "learning_rate": 7.453599192428326e-05, "loss": 3.1194, "step": 7635 }, { "epoch": 2.36, "learning_rate": 7.450036286504603e-05, "loss": 2.9838, "step": 7636 }, { "epoch": 2.36, "learning_rate": 7.44647338058088e-05, "loss": 3.1525, "step": 7637 }, { "epoch": 2.36, "learning_rate": 7.442910474657157e-05, "loss": 3.0632, "step": 7638 }, { "epoch": 2.36, "learning_rate": 7.439347568733433e-05, "loss": 2.8537, "step": 7639 }, { "epoch": 2.36, "learning_rate": 7.435784662809712e-05, "loss": 2.9539, "step": 7640 }, { "epoch": 2.36, "learning_rate": 7.432221756885988e-05, "loss": 2.8468, "step": 7641 }, { "epoch": 2.36, "learning_rate": 7.428658850962265e-05, "loss": 2.8823, "step": 7642 }, { "epoch": 2.36, "learning_rate": 7.425095945038542e-05, "loss": 2.9821, "step": 7643 }, { "epoch": 2.36, "learning_rate": 7.42153303911482e-05, "loss": 2.8934, "step": 7644 }, { "epoch": 2.36, "learning_rate": 7.417970133191096e-05, "loss": 2.7269, "step": 7645 }, { "epoch": 2.36, "learning_rate": 7.414407227267374e-05, "loss": 2.6669, "step": 7646 }, { "epoch": 2.36, "learning_rate": 7.410844321343651e-05, "loss": 2.8512, "step": 7647 }, { "epoch": 2.36, "learning_rate": 7.407281415419928e-05, "loss": 2.7353, "step": 7648 }, { "epoch": 2.36, "learning_rate": 7.403718509496204e-05, "loss": 2.3502, "step": 7649 }, { "epoch": 2.36, "learning_rate": 7.400155603572483e-05, "loss": 2.5331, "step": 7650 }, { "epoch": 2.36, "learning_rate": 7.39659269764876e-05, "loss": 2.4242, "step": 7651 }, { "epoch": 2.36, "learning_rate": 7.393029791725036e-05, "loss": 2.6075, "step": 7652 }, { "epoch": 2.36, "learning_rate": 7.389466885801313e-05, "loss": 2.6051, "step": 7653 }, { "epoch": 2.36, "learning_rate": 7.38590397987759e-05, "loss": 2.6306, "step": 7654 }, { "epoch": 2.36, "learning_rate": 7.382341073953868e-05, "loss": 2.3518, "step": 7655 }, { "epoch": 2.36, "learning_rate": 7.378778168030145e-05, "loss": 2.3441, "step": 7656 }, { "epoch": 2.36, "learning_rate": 7.375215262106422e-05, "loss": 2.5802, "step": 7657 }, { "epoch": 2.36, "learning_rate": 7.371652356182699e-05, "loss": 2.2663, "step": 7658 }, { "epoch": 2.36, "learning_rate": 7.368089450258976e-05, "loss": 2.3043, "step": 7659 }, { "epoch": 2.36, "learning_rate": 7.364526544335252e-05, "loss": 2.3244, "step": 7660 }, { "epoch": 2.36, "learning_rate": 7.36096363841153e-05, "loss": 2.1115, "step": 7661 }, { "epoch": 2.36, "learning_rate": 7.357400732487807e-05, "loss": 2.093, "step": 7662 }, { "epoch": 2.37, "learning_rate": 7.353837826564084e-05, "loss": 2.2874, "step": 7663 }, { "epoch": 2.37, "learning_rate": 7.350274920640361e-05, "loss": 2.1176, "step": 7664 }, { "epoch": 2.37, "learning_rate": 7.346712014716638e-05, "loss": 1.9713, "step": 7665 }, { "epoch": 2.37, "learning_rate": 7.343149108792915e-05, "loss": 1.9171, "step": 7666 }, { "epoch": 2.37, "learning_rate": 7.339586202869193e-05, "loss": 1.9053, "step": 7667 }, { "epoch": 2.37, "learning_rate": 7.33602329694547e-05, "loss": 1.8139, "step": 7668 }, { "epoch": 2.37, "learning_rate": 7.332460391021747e-05, "loss": 1.8448, "step": 7669 }, { "epoch": 2.37, "learning_rate": 7.328897485098025e-05, "loss": 1.4769, "step": 7670 }, { "epoch": 2.37, "learning_rate": 7.3253345791743e-05, "loss": 1.7171, "step": 7671 }, { "epoch": 2.37, "learning_rate": 7.321771673250578e-05, "loss": 1.7301, "step": 7672 }, { "epoch": 2.37, "learning_rate": 7.318208767326855e-05, "loss": 1.8105, "step": 7673 }, { "epoch": 2.37, "learning_rate": 7.314645861403132e-05, "loss": 1.6341, "step": 7674 }, { "epoch": 2.37, "learning_rate": 7.311082955479409e-05, "loss": 1.6876, "step": 7675 }, { "epoch": 2.37, "learning_rate": 7.307520049555687e-05, "loss": 1.4963, "step": 7676 }, { "epoch": 2.37, "learning_rate": 7.303957143631963e-05, "loss": 1.3702, "step": 7677 }, { "epoch": 2.37, "learning_rate": 7.300394237708241e-05, "loss": 1.3357, "step": 7678 }, { "epoch": 2.37, "learning_rate": 7.296831331784518e-05, "loss": 1.3969, "step": 7679 }, { "epoch": 2.37, "learning_rate": 7.293268425860794e-05, "loss": 1.2642, "step": 7680 }, { "epoch": 2.37, "learning_rate": 7.289705519937071e-05, "loss": 3.4763, "step": 7681 }, { "epoch": 2.37, "learning_rate": 7.28614261401335e-05, "loss": 3.4817, "step": 7682 }, { "epoch": 2.37, "learning_rate": 7.282579708089625e-05, "loss": 3.551, "step": 7683 }, { "epoch": 2.37, "learning_rate": 7.279016802165903e-05, "loss": 3.1054, "step": 7684 }, { "epoch": 2.37, "learning_rate": 7.275453896242181e-05, "loss": 3.072, "step": 7685 }, { "epoch": 2.37, "learning_rate": 7.271890990318457e-05, "loss": 2.9524, "step": 7686 }, { "epoch": 2.37, "learning_rate": 7.268328084394735e-05, "loss": 3.0583, "step": 7687 }, { "epoch": 2.37, "learning_rate": 7.264765178471012e-05, "loss": 2.9368, "step": 7688 }, { "epoch": 2.37, "learning_rate": 7.261202272547289e-05, "loss": 3.1746, "step": 7689 }, { "epoch": 2.37, "learning_rate": 7.257639366623565e-05, "loss": 2.8037, "step": 7690 }, { "epoch": 2.37, "learning_rate": 7.254076460699844e-05, "loss": 2.8145, "step": 7691 }, { "epoch": 2.37, "learning_rate": 7.250513554776119e-05, "loss": 2.661, "step": 7692 }, { "epoch": 2.37, "learning_rate": 7.246950648852397e-05, "loss": 2.9017, "step": 7693 }, { "epoch": 2.37, "learning_rate": 7.243387742928674e-05, "loss": 2.6463, "step": 7694 }, { "epoch": 2.37, "learning_rate": 7.239824837004951e-05, "loss": 2.6542, "step": 7695 }, { "epoch": 2.38, "learning_rate": 7.236261931081228e-05, "loss": 2.7907, "step": 7696 }, { "epoch": 2.38, "learning_rate": 7.232699025157506e-05, "loss": 2.6997, "step": 7697 }, { "epoch": 2.38, "learning_rate": 7.229136119233781e-05, "loss": 2.4586, "step": 7698 }, { "epoch": 2.38, "learning_rate": 7.22557321331006e-05, "loss": 2.8381, "step": 7699 }, { "epoch": 2.38, "learning_rate": 7.222010307386336e-05, "loss": 2.5212, "step": 7700 }, { "epoch": 2.38, "learning_rate": 7.218447401462613e-05, "loss": 2.51, "step": 7701 }, { "epoch": 2.38, "learning_rate": 7.214884495538892e-05, "loss": 2.3454, "step": 7702 }, { "epoch": 2.38, "learning_rate": 7.211321589615168e-05, "loss": 2.3398, "step": 7703 }, { "epoch": 2.38, "learning_rate": 7.207758683691445e-05, "loss": 2.2402, "step": 7704 }, { "epoch": 2.38, "learning_rate": 7.204195777767722e-05, "loss": 2.457, "step": 7705 }, { "epoch": 2.38, "learning_rate": 7.200632871843999e-05, "loss": 2.6168, "step": 7706 }, { "epoch": 2.38, "learning_rate": 7.197069965920276e-05, "loss": 2.5826, "step": 7707 }, { "epoch": 2.38, "learning_rate": 7.193507059996554e-05, "loss": 2.2733, "step": 7708 }, { "epoch": 2.38, "learning_rate": 7.189944154072831e-05, "loss": 2.417, "step": 7709 }, { "epoch": 2.38, "learning_rate": 7.186381248149108e-05, "loss": 2.2342, "step": 7710 }, { "epoch": 2.38, "learning_rate": 7.182818342225384e-05, "loss": 2.0266, "step": 7711 }, { "epoch": 2.38, "learning_rate": 7.179255436301661e-05, "loss": 2.1959, "step": 7712 }, { "epoch": 2.38, "learning_rate": 7.175692530377938e-05, "loss": 2.1205, "step": 7713 }, { "epoch": 2.38, "learning_rate": 7.172129624454216e-05, "loss": 2.2253, "step": 7714 }, { "epoch": 2.38, "learning_rate": 7.168566718530493e-05, "loss": 1.8714, "step": 7715 }, { "epoch": 2.38, "learning_rate": 7.16500381260677e-05, "loss": 1.8876, "step": 7716 }, { "epoch": 2.38, "learning_rate": 7.161440906683048e-05, "loss": 1.9299, "step": 7717 }, { "epoch": 2.38, "learning_rate": 7.157878000759324e-05, "loss": 2.1508, "step": 7718 }, { "epoch": 2.38, "learning_rate": 7.154315094835602e-05, "loss": 1.8397, "step": 7719 }, { "epoch": 2.38, "learning_rate": 7.150752188911879e-05, "loss": 1.7628, "step": 7720 }, { "epoch": 2.38, "learning_rate": 7.147189282988155e-05, "loss": 1.7074, "step": 7721 }, { "epoch": 2.38, "learning_rate": 7.143626377064432e-05, "loss": 1.773, "step": 7722 }, { "epoch": 2.38, "learning_rate": 7.14006347114071e-05, "loss": 1.7621, "step": 7723 }, { "epoch": 2.38, "learning_rate": 7.136500565216986e-05, "loss": 1.5361, "step": 7724 }, { "epoch": 2.38, "learning_rate": 7.132937659293264e-05, "loss": 1.6136, "step": 7725 }, { "epoch": 2.38, "learning_rate": 7.129374753369541e-05, "loss": 1.5032, "step": 7726 }, { "epoch": 2.38, "learning_rate": 7.125811847445818e-05, "loss": 1.4019, "step": 7727 }, { "epoch": 2.39, "learning_rate": 7.122248941522095e-05, "loss": 1.4135, "step": 7728 }, { "epoch": 2.39, "learning_rate": 7.118686035598373e-05, "loss": 1.2504, "step": 7729 }, { "epoch": 2.39, "learning_rate": 7.11512312967465e-05, "loss": 1.1386, "step": 7730 }, { "epoch": 2.39, "learning_rate": 7.111560223750926e-05, "loss": 3.385, "step": 7731 }, { "epoch": 2.39, "learning_rate": 7.107997317827203e-05, "loss": 3.4787, "step": 7732 }, { "epoch": 2.39, "learning_rate": 7.10443441190348e-05, "loss": 3.3064, "step": 7733 }, { "epoch": 2.39, "learning_rate": 7.100871505979758e-05, "loss": 3.2023, "step": 7734 }, { "epoch": 2.39, "learning_rate": 7.097308600056035e-05, "loss": 2.8663, "step": 7735 }, { "epoch": 2.39, "learning_rate": 7.093745694132312e-05, "loss": 3.1788, "step": 7736 }, { "epoch": 2.39, "learning_rate": 7.090182788208589e-05, "loss": 3.1374, "step": 7737 }, { "epoch": 2.39, "learning_rate": 7.086619882284867e-05, "loss": 2.7899, "step": 7738 }, { "epoch": 2.39, "learning_rate": 7.083056976361142e-05, "loss": 2.8757, "step": 7739 }, { "epoch": 2.39, "learning_rate": 7.07949407043742e-05, "loss": 2.7851, "step": 7740 }, { "epoch": 2.39, "learning_rate": 7.075931164513697e-05, "loss": 2.6727, "step": 7741 }, { "epoch": 2.39, "learning_rate": 7.072368258589974e-05, "loss": 2.8675, "step": 7742 }, { "epoch": 2.39, "learning_rate": 7.068805352666251e-05, "loss": 2.6687, "step": 7743 }, { "epoch": 2.39, "learning_rate": 7.065242446742529e-05, "loss": 2.7614, "step": 7744 }, { "epoch": 2.39, "learning_rate": 7.061679540818805e-05, "loss": 2.5521, "step": 7745 }, { "epoch": 2.39, "learning_rate": 7.058116634895083e-05, "loss": 2.7908, "step": 7746 }, { "epoch": 2.39, "learning_rate": 7.05455372897136e-05, "loss": 2.4883, "step": 7747 }, { "epoch": 2.39, "learning_rate": 7.050990823047637e-05, "loss": 2.6457, "step": 7748 }, { "epoch": 2.39, "learning_rate": 7.047427917123915e-05, "loss": 2.8473, "step": 7749 }, { "epoch": 2.39, "learning_rate": 7.043865011200192e-05, "loss": 2.6216, "step": 7750 }, { "epoch": 2.39, "learning_rate": 7.040302105276468e-05, "loss": 2.4056, "step": 7751 }, { "epoch": 2.39, "learning_rate": 7.036739199352745e-05, "loss": 2.432, "step": 7752 }, { "epoch": 2.39, "learning_rate": 7.033176293429022e-05, "loss": 2.3974, "step": 7753 }, { "epoch": 2.39, "learning_rate": 7.029613387505299e-05, "loss": 2.4859, "step": 7754 }, { "epoch": 2.39, "learning_rate": 7.026050481581577e-05, "loss": 2.5073, "step": 7755 }, { "epoch": 2.39, "learning_rate": 7.022487575657854e-05, "loss": 2.5543, "step": 7756 }, { "epoch": 2.39, "learning_rate": 7.018924669734131e-05, "loss": 2.513, "step": 7757 }, { "epoch": 2.39, "learning_rate": 7.015361763810408e-05, "loss": 2.4267, "step": 7758 }, { "epoch": 2.39, "learning_rate": 7.011798857886685e-05, "loss": 2.4187, "step": 7759 }, { "epoch": 2.4, "learning_rate": 7.008235951962961e-05, "loss": 2.1179, "step": 7760 }, { "epoch": 2.4, "learning_rate": 7.00467304603924e-05, "loss": 2.1867, "step": 7761 }, { "epoch": 2.4, "learning_rate": 7.001110140115516e-05, "loss": 2.1019, "step": 7762 }, { "epoch": 2.4, "learning_rate": 6.997547234191793e-05, "loss": 2.0118, "step": 7763 }, { "epoch": 2.4, "learning_rate": 6.99398432826807e-05, "loss": 2.0783, "step": 7764 }, { "epoch": 2.4, "learning_rate": 6.990421422344347e-05, "loss": 1.7735, "step": 7765 }, { "epoch": 2.4, "learning_rate": 6.986858516420625e-05, "loss": 2.1662, "step": 7766 }, { "epoch": 2.4, "learning_rate": 6.983295610496902e-05, "loss": 1.8919, "step": 7767 }, { "epoch": 2.4, "learning_rate": 6.979732704573179e-05, "loss": 1.6556, "step": 7768 }, { "epoch": 2.4, "learning_rate": 6.976169798649456e-05, "loss": 1.9232, "step": 7769 }, { "epoch": 2.4, "learning_rate": 6.972606892725734e-05, "loss": 1.5576, "step": 7770 }, { "epoch": 2.4, "learning_rate": 6.96904398680201e-05, "loss": 1.7531, "step": 7771 }, { "epoch": 2.4, "learning_rate": 6.965481080878287e-05, "loss": 1.7591, "step": 7772 }, { "epoch": 2.4, "learning_rate": 6.961918174954564e-05, "loss": 1.6962, "step": 7773 }, { "epoch": 2.4, "learning_rate": 6.958355269030841e-05, "loss": 1.5536, "step": 7774 }, { "epoch": 2.4, "learning_rate": 6.954792363107118e-05, "loss": 1.5093, "step": 7775 }, { "epoch": 2.4, "learning_rate": 6.951229457183396e-05, "loss": 1.4562, "step": 7776 }, { "epoch": 2.4, "learning_rate": 6.947666551259673e-05, "loss": 1.3053, "step": 7777 }, { "epoch": 2.4, "learning_rate": 6.94410364533595e-05, "loss": 1.2665, "step": 7778 }, { "epoch": 2.4, "learning_rate": 6.940540739412227e-05, "loss": 1.2663, "step": 7779 }, { "epoch": 2.4, "learning_rate": 6.936977833488503e-05, "loss": 1.3505, "step": 7780 }, { "epoch": 2.4, "learning_rate": 6.93341492756478e-05, "loss": 3.7558, "step": 7781 }, { "epoch": 2.4, "learning_rate": 6.929852021641058e-05, "loss": 3.3249, "step": 7782 }, { "epoch": 2.4, "learning_rate": 6.926289115717335e-05, "loss": 3.1812, "step": 7783 }, { "epoch": 2.4, "learning_rate": 6.922726209793612e-05, "loss": 3.0523, "step": 7784 }, { "epoch": 2.4, "learning_rate": 6.91916330386989e-05, "loss": 2.816, "step": 7785 }, { "epoch": 2.4, "learning_rate": 6.915600397946166e-05, "loss": 2.897, "step": 7786 }, { "epoch": 2.4, "learning_rate": 6.912037492022444e-05, "loss": 2.9205, "step": 7787 }, { "epoch": 2.4, "learning_rate": 6.908474586098721e-05, "loss": 2.9991, "step": 7788 }, { "epoch": 2.4, "learning_rate": 6.904911680174998e-05, "loss": 3.0929, "step": 7789 }, { "epoch": 2.4, "learning_rate": 6.901348774251274e-05, "loss": 2.7561, "step": 7790 }, { "epoch": 2.4, "learning_rate": 6.897785868327553e-05, "loss": 2.6904, "step": 7791 }, { "epoch": 2.4, "learning_rate": 6.894222962403828e-05, "loss": 2.6866, "step": 7792 }, { "epoch": 2.41, "learning_rate": 6.890660056480106e-05, "loss": 2.8004, "step": 7793 }, { "epoch": 2.41, "learning_rate": 6.887097150556383e-05, "loss": 2.4816, "step": 7794 }, { "epoch": 2.41, "learning_rate": 6.88353424463266e-05, "loss": 2.7183, "step": 7795 }, { "epoch": 2.41, "learning_rate": 6.879971338708937e-05, "loss": 2.4381, "step": 7796 }, { "epoch": 2.41, "learning_rate": 6.876408432785215e-05, "loss": 2.6938, "step": 7797 }, { "epoch": 2.41, "learning_rate": 6.872845526861492e-05, "loss": 2.5151, "step": 7798 }, { "epoch": 2.41, "learning_rate": 6.869282620937769e-05, "loss": 2.509, "step": 7799 }, { "epoch": 2.41, "learning_rate": 6.865719715014045e-05, "loss": 2.3981, "step": 7800 }, { "epoch": 2.41, "learning_rate": 6.862156809090322e-05, "loss": 2.3659, "step": 7801 }, { "epoch": 2.41, "learning_rate": 6.8585939031666e-05, "loss": 2.4368, "step": 7802 }, { "epoch": 2.41, "learning_rate": 6.855030997242877e-05, "loss": 2.2669, "step": 7803 }, { "epoch": 2.41, "learning_rate": 6.851468091319154e-05, "loss": 2.3747, "step": 7804 }, { "epoch": 2.41, "learning_rate": 6.847905185395431e-05, "loss": 2.3811, "step": 7805 }, { "epoch": 2.41, "learning_rate": 6.844342279471708e-05, "loss": 2.257, "step": 7806 }, { "epoch": 2.41, "learning_rate": 6.840779373547985e-05, "loss": 2.4318, "step": 7807 }, { "epoch": 2.41, "learning_rate": 6.837216467624263e-05, "loss": 1.9572, "step": 7808 }, { "epoch": 2.41, "learning_rate": 6.83365356170054e-05, "loss": 2.0928, "step": 7809 }, { "epoch": 2.41, "learning_rate": 6.830090655776817e-05, "loss": 2.0709, "step": 7810 }, { "epoch": 2.41, "learning_rate": 6.826527749853093e-05, "loss": 2.0569, "step": 7811 }, { "epoch": 2.41, "learning_rate": 6.82296484392937e-05, "loss": 1.9202, "step": 7812 }, { "epoch": 2.41, "learning_rate": 6.819401938005647e-05, "loss": 1.7589, "step": 7813 }, { "epoch": 2.41, "learning_rate": 6.815839032081925e-05, "loss": 1.9078, "step": 7814 }, { "epoch": 2.41, "learning_rate": 6.812276126158202e-05, "loss": 1.6702, "step": 7815 }, { "epoch": 2.41, "learning_rate": 6.808713220234479e-05, "loss": 1.8967, "step": 7816 }, { "epoch": 2.41, "learning_rate": 6.805150314310757e-05, "loss": 2.0073, "step": 7817 }, { "epoch": 2.41, "learning_rate": 6.801587408387034e-05, "loss": 1.7825, "step": 7818 }, { "epoch": 2.41, "learning_rate": 6.798024502463311e-05, "loss": 1.7133, "step": 7819 }, { "epoch": 2.41, "learning_rate": 6.794461596539588e-05, "loss": 1.6444, "step": 7820 }, { "epoch": 2.41, "learning_rate": 6.790898690615864e-05, "loss": 1.4581, "step": 7821 }, { "epoch": 2.41, "learning_rate": 6.787335784692141e-05, "loss": 1.424, "step": 7822 }, { "epoch": 2.41, "learning_rate": 6.78377287876842e-05, "loss": 1.7781, "step": 7823 }, { "epoch": 2.41, "learning_rate": 6.780209972844696e-05, "loss": 1.4845, "step": 7824 }, { "epoch": 2.42, "learning_rate": 6.776647066920973e-05, "loss": 1.4216, "step": 7825 }, { "epoch": 2.42, "learning_rate": 6.77308416099725e-05, "loss": 1.5291, "step": 7826 }, { "epoch": 2.42, "learning_rate": 6.769521255073527e-05, "loss": 1.2039, "step": 7827 }, { "epoch": 2.42, "learning_rate": 6.765958349149804e-05, "loss": 1.3614, "step": 7828 }, { "epoch": 2.42, "learning_rate": 6.762395443226082e-05, "loss": 1.2885, "step": 7829 }, { "epoch": 2.42, "learning_rate": 6.758832537302359e-05, "loss": 1.1618, "step": 7830 }, { "epoch": 2.42, "learning_rate": 6.755269631378635e-05, "loss": 3.8549, "step": 7831 }, { "epoch": 2.42, "learning_rate": 6.751706725454914e-05, "loss": 3.1316, "step": 7832 }, { "epoch": 2.42, "learning_rate": 6.748143819531189e-05, "loss": 3.1912, "step": 7833 }, { "epoch": 2.42, "learning_rate": 6.744580913607467e-05, "loss": 3.0965, "step": 7834 }, { "epoch": 2.42, "learning_rate": 6.741018007683744e-05, "loss": 3.1449, "step": 7835 }, { "epoch": 2.42, "learning_rate": 6.737455101760021e-05, "loss": 2.8179, "step": 7836 }, { "epoch": 2.42, "learning_rate": 6.733892195836298e-05, "loss": 2.8559, "step": 7837 }, { "epoch": 2.42, "learning_rate": 6.730329289912576e-05, "loss": 2.8135, "step": 7838 }, { "epoch": 2.42, "learning_rate": 6.726766383988851e-05, "loss": 3.0382, "step": 7839 }, { "epoch": 2.42, "learning_rate": 6.72320347806513e-05, "loss": 2.9399, "step": 7840 }, { "epoch": 2.42, "learning_rate": 6.719640572141406e-05, "loss": 2.8862, "step": 7841 }, { "epoch": 2.42, "learning_rate": 6.716077666217683e-05, "loss": 2.7485, "step": 7842 }, { "epoch": 2.42, "learning_rate": 6.71251476029396e-05, "loss": 2.6385, "step": 7843 }, { "epoch": 2.42, "learning_rate": 6.708951854370238e-05, "loss": 2.7138, "step": 7844 }, { "epoch": 2.42, "learning_rate": 6.705388948446514e-05, "loss": 2.6547, "step": 7845 }, { "epoch": 2.42, "learning_rate": 6.701826042522792e-05, "loss": 2.7052, "step": 7846 }, { "epoch": 2.42, "learning_rate": 6.698263136599069e-05, "loss": 2.7174, "step": 7847 }, { "epoch": 2.42, "learning_rate": 6.694700230675346e-05, "loss": 2.8173, "step": 7848 }, { "epoch": 2.42, "learning_rate": 6.691137324751624e-05, "loss": 2.5625, "step": 7849 }, { "epoch": 2.42, "learning_rate": 6.6875744188279e-05, "loss": 2.6696, "step": 7850 }, { "epoch": 2.42, "learning_rate": 6.684011512904177e-05, "loss": 2.6878, "step": 7851 }, { "epoch": 2.42, "learning_rate": 6.680448606980454e-05, "loss": 2.3361, "step": 7852 }, { "epoch": 2.42, "learning_rate": 6.676885701056731e-05, "loss": 2.3596, "step": 7853 }, { "epoch": 2.42, "learning_rate": 6.673322795133008e-05, "loss": 2.4507, "step": 7854 }, { "epoch": 2.42, "learning_rate": 6.669759889209286e-05, "loss": 2.5421, "step": 7855 }, { "epoch": 2.42, "learning_rate": 6.666196983285563e-05, "loss": 2.0953, "step": 7856 }, { "epoch": 2.42, "learning_rate": 6.66263407736184e-05, "loss": 2.3242, "step": 7857 }, { "epoch": 2.43, "learning_rate": 6.659071171438117e-05, "loss": 2.2409, "step": 7858 }, { "epoch": 2.43, "learning_rate": 6.655508265514395e-05, "loss": 2.1697, "step": 7859 }, { "epoch": 2.43, "learning_rate": 6.65194535959067e-05, "loss": 2.1331, "step": 7860 }, { "epoch": 2.43, "learning_rate": 6.648382453666949e-05, "loss": 2.2246, "step": 7861 }, { "epoch": 2.43, "learning_rate": 6.644819547743225e-05, "loss": 1.7736, "step": 7862 }, { "epoch": 2.43, "learning_rate": 6.641256641819502e-05, "loss": 2.135, "step": 7863 }, { "epoch": 2.43, "learning_rate": 6.63769373589578e-05, "loss": 1.8572, "step": 7864 }, { "epoch": 2.43, "learning_rate": 6.634130829972057e-05, "loss": 1.9604, "step": 7865 }, { "epoch": 2.43, "learning_rate": 6.630567924048334e-05, "loss": 1.8674, "step": 7866 }, { "epoch": 2.43, "learning_rate": 6.627005018124611e-05, "loss": 1.9261, "step": 7867 }, { "epoch": 2.43, "learning_rate": 6.623442112200888e-05, "loss": 1.7326, "step": 7868 }, { "epoch": 2.43, "learning_rate": 6.619879206277165e-05, "loss": 1.7613, "step": 7869 }, { "epoch": 2.43, "learning_rate": 6.616316300353443e-05, "loss": 1.8381, "step": 7870 }, { "epoch": 2.43, "learning_rate": 6.61275339442972e-05, "loss": 1.7016, "step": 7871 }, { "epoch": 2.43, "learning_rate": 6.609190488505996e-05, "loss": 1.7409, "step": 7872 }, { "epoch": 2.43, "learning_rate": 6.605627582582273e-05, "loss": 1.4567, "step": 7873 }, { "epoch": 2.43, "learning_rate": 6.60206467665855e-05, "loss": 1.4967, "step": 7874 }, { "epoch": 2.43, "learning_rate": 6.598501770734827e-05, "loss": 1.4032, "step": 7875 }, { "epoch": 2.43, "learning_rate": 6.594938864811105e-05, "loss": 1.5433, "step": 7876 }, { "epoch": 2.43, "learning_rate": 6.591375958887382e-05, "loss": 1.2509, "step": 7877 }, { "epoch": 2.43, "learning_rate": 6.587813052963659e-05, "loss": 1.4249, "step": 7878 }, { "epoch": 2.43, "learning_rate": 6.584250147039936e-05, "loss": 1.2086, "step": 7879 }, { "epoch": 2.43, "learning_rate": 6.580687241116212e-05, "loss": 1.2866, "step": 7880 }, { "epoch": 2.43, "learning_rate": 6.57712433519249e-05, "loss": 4.0232, "step": 7881 }, { "epoch": 2.43, "learning_rate": 6.573561429268767e-05, "loss": 3.3427, "step": 7882 }, { "epoch": 2.43, "learning_rate": 6.569998523345044e-05, "loss": 3.3558, "step": 7883 }, { "epoch": 2.43, "learning_rate": 6.566435617421321e-05, "loss": 3.3439, "step": 7884 }, { "epoch": 2.43, "learning_rate": 6.562872711497599e-05, "loss": 2.969, "step": 7885 }, { "epoch": 2.43, "learning_rate": 6.559309805573875e-05, "loss": 3.0058, "step": 7886 }, { "epoch": 2.43, "learning_rate": 6.555746899650153e-05, "loss": 3.015, "step": 7887 }, { "epoch": 2.43, "learning_rate": 6.55218399372643e-05, "loss": 2.7362, "step": 7888 }, { "epoch": 2.43, "learning_rate": 6.548621087802707e-05, "loss": 2.8832, "step": 7889 }, { "epoch": 2.44, "learning_rate": 6.545058181878983e-05, "loss": 2.889, "step": 7890 }, { "epoch": 2.44, "learning_rate": 6.541495275955262e-05, "loss": 2.6142, "step": 7891 }, { "epoch": 2.44, "learning_rate": 6.537932370031537e-05, "loss": 2.8801, "step": 7892 }, { "epoch": 2.44, "learning_rate": 6.534369464107815e-05, "loss": 2.6102, "step": 7893 }, { "epoch": 2.44, "learning_rate": 6.530806558184092e-05, "loss": 2.567, "step": 7894 }, { "epoch": 2.44, "learning_rate": 6.527243652260369e-05, "loss": 2.7, "step": 7895 }, { "epoch": 2.44, "learning_rate": 6.523680746336646e-05, "loss": 2.526, "step": 7896 }, { "epoch": 2.44, "learning_rate": 6.520117840412924e-05, "loss": 2.6436, "step": 7897 }, { "epoch": 2.44, "learning_rate": 6.516554934489201e-05, "loss": 2.4397, "step": 7898 }, { "epoch": 2.44, "learning_rate": 6.512992028565478e-05, "loss": 2.4029, "step": 7899 }, { "epoch": 2.44, "learning_rate": 6.509429122641756e-05, "loss": 2.3413, "step": 7900 }, { "epoch": 2.44, "learning_rate": 6.505866216718031e-05, "loss": 2.445, "step": 7901 }, { "epoch": 2.44, "learning_rate": 6.50230331079431e-05, "loss": 2.439, "step": 7902 }, { "epoch": 2.44, "learning_rate": 6.498740404870586e-05, "loss": 2.4618, "step": 7903 }, { "epoch": 2.44, "learning_rate": 6.495177498946863e-05, "loss": 2.3954, "step": 7904 }, { "epoch": 2.44, "learning_rate": 6.49161459302314e-05, "loss": 2.4628, "step": 7905 }, { "epoch": 2.44, "learning_rate": 6.488051687099418e-05, "loss": 2.3536, "step": 7906 }, { "epoch": 2.44, "learning_rate": 6.484488781175694e-05, "loss": 2.3192, "step": 7907 }, { "epoch": 2.44, "learning_rate": 6.480925875251972e-05, "loss": 2.2436, "step": 7908 }, { "epoch": 2.44, "learning_rate": 6.477362969328249e-05, "loss": 2.0623, "step": 7909 }, { "epoch": 2.44, "learning_rate": 6.473800063404525e-05, "loss": 2.1135, "step": 7910 }, { "epoch": 2.44, "learning_rate": 6.470237157480802e-05, "loss": 2.1445, "step": 7911 }, { "epoch": 2.44, "learning_rate": 6.470237157480802e-05, "loss": 2.2949, "step": 7912 }, { "epoch": 2.44, "learning_rate": 6.46667425155708e-05, "loss": 2.1258, "step": 7913 }, { "epoch": 2.44, "learning_rate": 6.463111345633357e-05, "loss": 1.9946, "step": 7914 }, { "epoch": 2.44, "learning_rate": 6.459548439709634e-05, "loss": 1.7539, "step": 7915 }, { "epoch": 2.44, "learning_rate": 6.455985533785911e-05, "loss": 1.8515, "step": 7916 }, { "epoch": 2.44, "learning_rate": 6.452422627862188e-05, "loss": 1.9675, "step": 7917 }, { "epoch": 2.44, "learning_rate": 6.448859721938466e-05, "loss": 1.6075, "step": 7918 }, { "epoch": 2.44, "learning_rate": 6.445296816014743e-05, "loss": 1.7512, "step": 7919 }, { "epoch": 2.44, "learning_rate": 6.44173391009102e-05, "loss": 1.658, "step": 7920 }, { "epoch": 2.44, "learning_rate": 6.438171004167297e-05, "loss": 1.6142, "step": 7921 }, { "epoch": 2.45, "learning_rate": 6.434608098243573e-05, "loss": 1.6225, "step": 7922 }, { "epoch": 2.45, "learning_rate": 6.43104519231985e-05, "loss": 1.6056, "step": 7923 }, { "epoch": 2.45, "learning_rate": 6.427482286396128e-05, "loss": 1.4985, "step": 7924 }, { "epoch": 2.45, "learning_rate": 6.423919380472405e-05, "loss": 1.4567, "step": 7925 }, { "epoch": 2.45, "learning_rate": 6.420356474548682e-05, "loss": 1.2598, "step": 7926 }, { "epoch": 2.45, "learning_rate": 6.416793568624959e-05, "loss": 1.3144, "step": 7927 }, { "epoch": 2.45, "learning_rate": 6.413230662701236e-05, "loss": 1.2632, "step": 7928 }, { "epoch": 2.45, "learning_rate": 6.409667756777513e-05, "loss": 1.2334, "step": 7929 }, { "epoch": 2.45, "learning_rate": 6.406104850853791e-05, "loss": 1.2923, "step": 7930 }, { "epoch": 2.45, "learning_rate": 6.402541944930068e-05, "loss": 3.5951, "step": 7931 }, { "epoch": 2.45, "learning_rate": 6.398979039006344e-05, "loss": 3.7701, "step": 7932 }, { "epoch": 2.45, "learning_rate": 6.395416133082623e-05, "loss": 3.1551, "step": 7933 }, { "epoch": 2.45, "learning_rate": 6.391853227158898e-05, "loss": 3.2705, "step": 7934 }, { "epoch": 2.45, "learning_rate": 6.388290321235176e-05, "loss": 3.1958, "step": 7935 }, { "epoch": 2.45, "learning_rate": 6.384727415311453e-05, "loss": 3.0288, "step": 7936 }, { "epoch": 2.45, "learning_rate": 6.38116450938773e-05, "loss": 2.8311, "step": 7937 }, { "epoch": 2.45, "learning_rate": 6.377601603464007e-05, "loss": 2.7868, "step": 7938 }, { "epoch": 2.45, "learning_rate": 6.374038697540285e-05, "loss": 3.1898, "step": 7939 }, { "epoch": 2.45, "learning_rate": 6.37047579161656e-05, "loss": 2.8153, "step": 7940 }, { "epoch": 2.45, "learning_rate": 6.366912885692839e-05, "loss": 2.4941, "step": 7941 }, { "epoch": 2.45, "learning_rate": 6.363349979769115e-05, "loss": 2.7173, "step": 7942 }, { "epoch": 2.45, "learning_rate": 6.359787073845392e-05, "loss": 2.6117, "step": 7943 }, { "epoch": 2.45, "learning_rate": 6.356224167921669e-05, "loss": 2.6274, "step": 7944 }, { "epoch": 2.45, "learning_rate": 6.352661261997947e-05, "loss": 2.9317, "step": 7945 }, { "epoch": 2.45, "learning_rate": 6.349098356074224e-05, "loss": 2.6433, "step": 7946 }, { "epoch": 2.45, "learning_rate": 6.345535450150501e-05, "loss": 2.6089, "step": 7947 }, { "epoch": 2.45, "learning_rate": 6.341972544226779e-05, "loss": 2.6618, "step": 7948 }, { "epoch": 2.45, "learning_rate": 6.338409638303055e-05, "loss": 2.5497, "step": 7949 }, { "epoch": 2.45, "learning_rate": 6.334846732379333e-05, "loss": 2.4734, "step": 7950 }, { "epoch": 2.45, "learning_rate": 6.33128382645561e-05, "loss": 2.6527, "step": 7951 }, { "epoch": 2.45, "learning_rate": 6.327720920531886e-05, "loss": 2.3293, "step": 7952 }, { "epoch": 2.45, "learning_rate": 6.324158014608163e-05, "loss": 2.2725, "step": 7953 }, { "epoch": 2.45, "learning_rate": 6.320595108684441e-05, "loss": 2.1242, "step": 7954 }, { "epoch": 2.46, "learning_rate": 6.317032202760717e-05, "loss": 2.6352, "step": 7955 }, { "epoch": 2.46, "learning_rate": 6.313469296836995e-05, "loss": 2.1152, "step": 7956 }, { "epoch": 2.46, "learning_rate": 6.309906390913272e-05, "loss": 2.5651, "step": 7957 }, { "epoch": 2.46, "learning_rate": 6.306343484989549e-05, "loss": 2.3933, "step": 7958 }, { "epoch": 2.46, "learning_rate": 6.302780579065826e-05, "loss": 2.3546, "step": 7959 }, { "epoch": 2.46, "learning_rate": 6.299217673142104e-05, "loss": 2.2679, "step": 7960 }, { "epoch": 2.46, "learning_rate": 6.295654767218379e-05, "loss": 2.2121, "step": 7961 }, { "epoch": 2.46, "learning_rate": 6.292091861294657e-05, "loss": 2.0256, "step": 7962 }, { "epoch": 2.46, "learning_rate": 6.288528955370934e-05, "loss": 1.6857, "step": 7963 }, { "epoch": 2.46, "learning_rate": 6.284966049447211e-05, "loss": 2.0285, "step": 7964 }, { "epoch": 2.46, "learning_rate": 6.28140314352349e-05, "loss": 1.73, "step": 7965 }, { "epoch": 2.46, "learning_rate": 6.277840237599766e-05, "loss": 1.8257, "step": 7966 }, { "epoch": 2.46, "learning_rate": 6.274277331676043e-05, "loss": 1.7126, "step": 7967 }, { "epoch": 2.46, "learning_rate": 6.27071442575232e-05, "loss": 1.7914, "step": 7968 }, { "epoch": 2.46, "learning_rate": 6.267151519828597e-05, "loss": 1.8199, "step": 7969 }, { "epoch": 2.46, "learning_rate": 6.263588613904873e-05, "loss": 1.7294, "step": 7970 }, { "epoch": 2.46, "learning_rate": 6.260025707981152e-05, "loss": 1.403, "step": 7971 }, { "epoch": 2.46, "learning_rate": 6.256462802057429e-05, "loss": 1.495, "step": 7972 }, { "epoch": 2.46, "learning_rate": 6.252899896133705e-05, "loss": 1.4786, "step": 7973 }, { "epoch": 2.46, "learning_rate": 6.249336990209982e-05, "loss": 1.3949, "step": 7974 }, { "epoch": 2.46, "learning_rate": 6.245774084286259e-05, "loss": 1.5175, "step": 7975 }, { "epoch": 2.46, "learning_rate": 6.242211178362536e-05, "loss": 1.4641, "step": 7976 }, { "epoch": 2.46, "learning_rate": 6.238648272438814e-05, "loss": 1.3942, "step": 7977 }, { "epoch": 2.46, "learning_rate": 6.235085366515091e-05, "loss": 1.0813, "step": 7978 }, { "epoch": 2.46, "learning_rate": 6.231522460591368e-05, "loss": 1.22, "step": 7979 }, { "epoch": 2.46, "learning_rate": 6.227959554667646e-05, "loss": 1.3055, "step": 7980 }, { "epoch": 2.46, "learning_rate": 6.224396648743921e-05, "loss": 3.9451, "step": 7981 }, { "epoch": 2.46, "learning_rate": 6.2208337428202e-05, "loss": 3.5157, "step": 7982 }, { "epoch": 2.46, "learning_rate": 6.217270836896476e-05, "loss": 3.0303, "step": 7983 }, { "epoch": 2.46, "learning_rate": 6.213707930972753e-05, "loss": 2.9565, "step": 7984 }, { "epoch": 2.46, "learning_rate": 6.21014502504903e-05, "loss": 2.9541, "step": 7985 }, { "epoch": 2.46, "learning_rate": 6.206582119125308e-05, "loss": 2.8482, "step": 7986 }, { "epoch": 2.47, "learning_rate": 6.203019213201584e-05, "loss": 2.8123, "step": 7987 }, { "epoch": 2.47, "learning_rate": 6.199456307277862e-05, "loss": 2.6416, "step": 7988 }, { "epoch": 2.47, "learning_rate": 6.195893401354139e-05, "loss": 2.7831, "step": 7989 }, { "epoch": 2.47, "learning_rate": 6.192330495430416e-05, "loss": 2.7502, "step": 7990 }, { "epoch": 2.47, "learning_rate": 6.188767589506692e-05, "loss": 2.8059, "step": 7991 }, { "epoch": 2.47, "learning_rate": 6.18520468358297e-05, "loss": 2.711, "step": 7992 }, { "epoch": 2.47, "learning_rate": 6.181641777659247e-05, "loss": 2.6527, "step": 7993 }, { "epoch": 2.47, "learning_rate": 6.178078871735524e-05, "loss": 2.6607, "step": 7994 }, { "epoch": 2.47, "learning_rate": 6.174515965811801e-05, "loss": 2.7427, "step": 7995 }, { "epoch": 2.47, "learning_rate": 6.170953059888078e-05, "loss": 2.4373, "step": 7996 }, { "epoch": 2.47, "learning_rate": 6.167390153964356e-05, "loss": 2.4249, "step": 7997 }, { "epoch": 2.47, "learning_rate": 6.163827248040633e-05, "loss": 2.4883, "step": 7998 }, { "epoch": 2.47, "learning_rate": 6.16026434211691e-05, "loss": 2.4887, "step": 7999 }, { "epoch": 2.47, "learning_rate": 6.156701436193187e-05, "loss": 2.6693, "step": 8000 }, { "epoch": 2.47, "eval_bleu": 2.002877100137286e-15, "eval_loss": 3.558516025543213, "eval_runtime": 2573.2042, "eval_samples_per_second": 5.736, "eval_steps_per_second": 0.717, "step": 8000 }, { "epoch": 2.47, "learning_rate": 6.153138530269465e-05, "loss": 2.2599, "step": 8001 }, { "epoch": 2.47, "learning_rate": 6.14957562434574e-05, "loss": 2.3822, "step": 8002 }, { "epoch": 2.47, "learning_rate": 6.146012718422018e-05, "loss": 2.4467, "step": 8003 }, { "epoch": 2.47, "learning_rate": 6.142449812498295e-05, "loss": 2.5537, "step": 8004 }, { "epoch": 2.47, "learning_rate": 6.138886906574572e-05, "loss": 2.3505, "step": 8005 }, { "epoch": 2.47, "learning_rate": 6.138886906574572e-05, "loss": 2.2556, "step": 8006 }, { "epoch": 2.47, "learning_rate": 6.135324000650849e-05, "loss": 2.1347, "step": 8007 }, { "epoch": 2.47, "learning_rate": 6.131761094727127e-05, "loss": 2.2455, "step": 8008 }, { "epoch": 2.47, "learning_rate": 6.128198188803403e-05, "loss": 2.2675, "step": 8009 }, { "epoch": 2.47, "learning_rate": 6.124635282879681e-05, "loss": 2.1256, "step": 8010 }, { "epoch": 2.47, "learning_rate": 6.121072376955958e-05, "loss": 2.0208, "step": 8011 }, { "epoch": 2.47, "learning_rate": 6.117509471032234e-05, "loss": 2.1248, "step": 8012 }, { "epoch": 2.47, "learning_rate": 6.113946565108513e-05, "loss": 2.0277, "step": 8013 }, { "epoch": 2.47, "learning_rate": 6.11038365918479e-05, "loss": 1.9769, "step": 8014 }, { "epoch": 2.47, "learning_rate": 6.106820753261066e-05, "loss": 1.8413, "step": 8015 }, { "epoch": 2.47, "learning_rate": 6.103257847337343e-05, "loss": 1.976, "step": 8016 }, { "epoch": 2.47, "learning_rate": 6.0996949414136207e-05, "loss": 1.6732, "step": 8017 }, { "epoch": 2.47, "learning_rate": 6.0961320354898975e-05, "loss": 1.8202, "step": 8018 }, { "epoch": 2.47, "learning_rate": 6.092569129566175e-05, "loss": 1.6768, "step": 8019 }, { "epoch": 2.48, "learning_rate": 6.089006223642451e-05, "loss": 1.832, "step": 8020 }, { "epoch": 2.48, "learning_rate": 6.085443317718729e-05, "loss": 1.4765, "step": 8021 }, { "epoch": 2.48, "learning_rate": 6.0818804117950055e-05, "loss": 1.6575, "step": 8022 }, { "epoch": 2.48, "learning_rate": 6.078317505871283e-05, "loss": 1.7091, "step": 8023 }, { "epoch": 2.48, "learning_rate": 6.07475459994756e-05, "loss": 1.4353, "step": 8024 }, { "epoch": 2.48, "learning_rate": 6.0711916940238373e-05, "loss": 1.3867, "step": 8025 }, { "epoch": 2.48, "learning_rate": 6.0676287881001135e-05, "loss": 1.3543, "step": 8026 }, { "epoch": 2.48, "learning_rate": 6.064065882176391e-05, "loss": 1.4172, "step": 8027 }, { "epoch": 2.48, "learning_rate": 6.060502976252668e-05, "loss": 1.294, "step": 8028 }, { "epoch": 2.48, "learning_rate": 6.0569400703289454e-05, "loss": 1.2624, "step": 8029 }, { "epoch": 2.48, "learning_rate": 6.053377164405223e-05, "loss": 1.122, "step": 8030 }, { "epoch": 2.48, "learning_rate": 6.0498142584815e-05, "loss": 3.7671, "step": 8031 }, { "epoch": 2.48, "learning_rate": 6.046251352557777e-05, "loss": 3.3127, "step": 8032 }, { "epoch": 2.48, "learning_rate": 6.0426884466340534e-05, "loss": 3.3436, "step": 8033 }, { "epoch": 2.48, "learning_rate": 6.039125540710331e-05, "loss": 2.9302, "step": 8034 }, { "epoch": 2.48, "learning_rate": 6.035562634786608e-05, "loss": 2.8277, "step": 8035 }, { "epoch": 2.48, "learning_rate": 6.031999728862885e-05, "loss": 2.6586, "step": 8036 }, { "epoch": 2.48, "learning_rate": 6.028436822939162e-05, "loss": 2.6908, "step": 8037 }, { "epoch": 2.48, "learning_rate": 6.0248739170154396e-05, "loss": 2.8662, "step": 8038 }, { "epoch": 2.48, "learning_rate": 6.021311011091716e-05, "loss": 2.9442, "step": 8039 }, { "epoch": 2.48, "learning_rate": 6.017748105167993e-05, "loss": 2.6158, "step": 8040 }, { "epoch": 2.48, "learning_rate": 6.01418519924427e-05, "loss": 2.8784, "step": 8041 }, { "epoch": 2.48, "learning_rate": 6.0106222933205476e-05, "loss": 2.6543, "step": 8042 }, { "epoch": 2.48, "learning_rate": 6.0070593873968244e-05, "loss": 2.7793, "step": 8043 }, { "epoch": 2.48, "learning_rate": 6.003496481473102e-05, "loss": 2.6663, "step": 8044 }, { "epoch": 2.48, "learning_rate": 5.999933575549378e-05, "loss": 2.4359, "step": 8045 }, { "epoch": 2.48, "learning_rate": 5.9963706696256556e-05, "loss": 2.5488, "step": 8046 }, { "epoch": 2.48, "learning_rate": 5.992807763701933e-05, "loss": 2.5064, "step": 8047 }, { "epoch": 2.48, "learning_rate": 5.98924485777821e-05, "loss": 2.5299, "step": 8048 }, { "epoch": 2.48, "learning_rate": 5.9856819518544874e-05, "loss": 2.5013, "step": 8049 }, { "epoch": 2.48, "learning_rate": 5.982119045930764e-05, "loss": 2.3937, "step": 8050 }, { "epoch": 2.48, "learning_rate": 5.978556140007042e-05, "loss": 2.4979, "step": 8051 }, { "epoch": 2.49, "learning_rate": 5.974993234083318e-05, "loss": 2.2496, "step": 8052 }, { "epoch": 2.49, "learning_rate": 5.9714303281595954e-05, "loss": 2.4653, "step": 8053 }, { "epoch": 2.49, "learning_rate": 5.967867422235872e-05, "loss": 2.2077, "step": 8054 }, { "epoch": 2.49, "learning_rate": 5.96430451631215e-05, "loss": 2.2817, "step": 8055 }, { "epoch": 2.49, "learning_rate": 5.9607416103884266e-05, "loss": 2.3318, "step": 8056 }, { "epoch": 2.49, "learning_rate": 5.957178704464704e-05, "loss": 2.0848, "step": 8057 }, { "epoch": 2.49, "learning_rate": 5.953615798540981e-05, "loss": 2.1588, "step": 8058 }, { "epoch": 2.49, "learning_rate": 5.9500528926172585e-05, "loss": 2.2365, "step": 8059 }, { "epoch": 2.49, "learning_rate": 5.9464899866935346e-05, "loss": 2.132, "step": 8060 }, { "epoch": 2.49, "learning_rate": 5.942927080769812e-05, "loss": 1.9113, "step": 8061 }, { "epoch": 2.49, "learning_rate": 5.9393641748460896e-05, "loss": 1.9459, "step": 8062 }, { "epoch": 2.49, "learning_rate": 5.9358012689223665e-05, "loss": 1.7673, "step": 8063 }, { "epoch": 2.49, "learning_rate": 5.932238362998644e-05, "loss": 1.8131, "step": 8064 }, { "epoch": 2.49, "learning_rate": 5.928675457074921e-05, "loss": 2.0157, "step": 8065 }, { "epoch": 2.49, "learning_rate": 5.925112551151198e-05, "loss": 2.0441, "step": 8066 }, { "epoch": 2.49, "learning_rate": 5.9215496452274745e-05, "loss": 1.9258, "step": 8067 }, { "epoch": 2.49, "learning_rate": 5.917986739303752e-05, "loss": 1.6283, "step": 8068 }, { "epoch": 2.49, "learning_rate": 5.914423833380029e-05, "loss": 1.4977, "step": 8069 }, { "epoch": 2.49, "learning_rate": 5.910860927456306e-05, "loss": 1.53, "step": 8070 }, { "epoch": 2.49, "learning_rate": 5.907298021532583e-05, "loss": 1.5743, "step": 8071 }, { "epoch": 2.49, "learning_rate": 5.903735115608861e-05, "loss": 1.7177, "step": 8072 }, { "epoch": 2.49, "learning_rate": 5.900172209685137e-05, "loss": 1.6372, "step": 8073 }, { "epoch": 2.49, "learning_rate": 5.896609303761414e-05, "loss": 1.3698, "step": 8074 }, { "epoch": 2.49, "learning_rate": 5.893046397837691e-05, "loss": 1.5741, "step": 8075 }, { "epoch": 2.49, "learning_rate": 5.889483491913969e-05, "loss": 1.2633, "step": 8076 }, { "epoch": 2.49, "learning_rate": 5.8859205859902455e-05, "loss": 1.4312, "step": 8077 }, { "epoch": 2.49, "learning_rate": 5.882357680066523e-05, "loss": 1.2641, "step": 8078 }, { "epoch": 2.49, "learning_rate": 5.8787947741428005e-05, "loss": 1.1613, "step": 8079 }, { "epoch": 2.49, "learning_rate": 5.875231868219077e-05, "loss": 1.0927, "step": 8080 }, { "epoch": 2.49, "learning_rate": 5.871668962295354e-05, "loss": 3.7584, "step": 8081 }, { "epoch": 2.49, "learning_rate": 5.868106056371631e-05, "loss": 3.29, "step": 8082 }, { "epoch": 2.49, "learning_rate": 5.8645431504479085e-05, "loss": 2.88, "step": 8083 }, { "epoch": 2.5, "learning_rate": 5.8609802445241854e-05, "loss": 2.8912, "step": 8084 }, { "epoch": 2.5, "learning_rate": 5.857417338600463e-05, "loss": 2.9102, "step": 8085 }, { "epoch": 2.5, "learning_rate": 5.853854432676739e-05, "loss": 2.791, "step": 8086 }, { "epoch": 2.5, "learning_rate": 5.8502915267530165e-05, "loss": 2.8283, "step": 8087 }, { "epoch": 2.5, "learning_rate": 5.8467286208292934e-05, "loss": 2.812, "step": 8088 }, { "epoch": 2.5, "learning_rate": 5.843165714905571e-05, "loss": 2.6868, "step": 8089 }, { "epoch": 2.5, "learning_rate": 5.839602808981848e-05, "loss": 2.7071, "step": 8090 }, { "epoch": 2.5, "learning_rate": 5.836039903058125e-05, "loss": 2.6749, "step": 8091 }, { "epoch": 2.5, "learning_rate": 5.8324769971344014e-05, "loss": 2.324, "step": 8092 }, { "epoch": 2.5, "learning_rate": 5.828914091210679e-05, "loss": 2.4571, "step": 8093 }, { "epoch": 2.5, "learning_rate": 5.825351185286956e-05, "loss": 2.304, "step": 8094 }, { "epoch": 2.5, "learning_rate": 5.821788279363233e-05, "loss": 2.6561, "step": 8095 }, { "epoch": 2.5, "learning_rate": 5.818225373439511e-05, "loss": 2.5749, "step": 8096 }, { "epoch": 2.5, "learning_rate": 5.8146624675157876e-05, "loss": 2.606, "step": 8097 }, { "epoch": 2.5, "learning_rate": 5.811099561592065e-05, "loss": 2.2299, "step": 8098 }, { "epoch": 2.5, "learning_rate": 5.807536655668342e-05, "loss": 2.4148, "step": 8099 }, { "epoch": 2.5, "learning_rate": 5.8039737497446194e-05, "loss": 2.387, "step": 8100 }, { "epoch": 2.5, "learning_rate": 5.8004108438208956e-05, "loss": 2.4299, "step": 8101 }, { "epoch": 2.5, "learning_rate": 5.796847937897173e-05, "loss": 2.4114, "step": 8102 }, { "epoch": 2.5, "learning_rate": 5.79328503197345e-05, "loss": 2.4003, "step": 8103 }, { "epoch": 2.5, "learning_rate": 5.7897221260497274e-05, "loss": 2.279, "step": 8104 }, { "epoch": 2.5, "learning_rate": 5.786159220126004e-05, "loss": 2.1557, "step": 8105 }, { "epoch": 2.5, "learning_rate": 5.782596314202282e-05, "loss": 2.3724, "step": 8106 }, { "epoch": 2.5, "learning_rate": 5.779033408278558e-05, "loss": 2.009, "step": 8107 }, { "epoch": 2.5, "learning_rate": 5.7754705023548354e-05, "loss": 2.1518, "step": 8108 }, { "epoch": 2.5, "learning_rate": 5.771907596431112e-05, "loss": 2.1651, "step": 8109 }, { "epoch": 2.5, "learning_rate": 5.76834469050739e-05, "loss": 1.9886, "step": 8110 }, { "epoch": 2.5, "learning_rate": 5.764781784583667e-05, "loss": 2.0058, "step": 8111 }, { "epoch": 2.5, "learning_rate": 5.761218878659944e-05, "loss": 1.8827, "step": 8112 }, { "epoch": 2.5, "learning_rate": 5.7576559727362216e-05, "loss": 2.0571, "step": 8113 }, { "epoch": 2.5, "learning_rate": 5.754093066812498e-05, "loss": 1.7886, "step": 8114 }, { "epoch": 2.5, "learning_rate": 5.750530160888775e-05, "loss": 1.7758, "step": 8115 }, { "epoch": 2.5, "learning_rate": 5.746967254965052e-05, "loss": 1.6548, "step": 8116 }, { "epoch": 2.51, "learning_rate": 5.7434043490413296e-05, "loss": 1.7934, "step": 8117 }, { "epoch": 2.51, "learning_rate": 5.7398414431176065e-05, "loss": 1.8331, "step": 8118 }, { "epoch": 2.51, "learning_rate": 5.736278537193884e-05, "loss": 1.6385, "step": 8119 }, { "epoch": 2.51, "learning_rate": 5.73271563127016e-05, "loss": 1.4586, "step": 8120 }, { "epoch": 2.51, "learning_rate": 5.7291527253464376e-05, "loss": 1.3626, "step": 8121 }, { "epoch": 2.51, "learning_rate": 5.7255898194227145e-05, "loss": 1.6016, "step": 8122 }, { "epoch": 2.51, "learning_rate": 5.722026913498992e-05, "loss": 1.4789, "step": 8123 }, { "epoch": 2.51, "learning_rate": 5.718464007575269e-05, "loss": 1.2389, "step": 8124 }, { "epoch": 2.51, "learning_rate": 5.714901101651546e-05, "loss": 1.3368, "step": 8125 }, { "epoch": 2.51, "learning_rate": 5.7113381957278225e-05, "loss": 1.2305, "step": 8126 }, { "epoch": 2.51, "learning_rate": 5.7077752898041e-05, "loss": 1.147, "step": 8127 }, { "epoch": 2.51, "learning_rate": 5.7042123838803775e-05, "loss": 1.3414, "step": 8128 }, { "epoch": 2.51, "learning_rate": 5.700649477956654e-05, "loss": 1.1006, "step": 8129 }, { "epoch": 2.51, "learning_rate": 5.697086572032932e-05, "loss": 1.2604, "step": 8130 }, { "epoch": 2.51, "learning_rate": 5.693523666109209e-05, "loss": 3.5442, "step": 8131 }, { "epoch": 2.51, "learning_rate": 5.689960760185486e-05, "loss": 3.2533, "step": 8132 }, { "epoch": 2.51, "learning_rate": 5.686397854261762e-05, "loss": 2.9217, "step": 8133 }, { "epoch": 2.51, "learning_rate": 5.68283494833804e-05, "loss": 2.8576, "step": 8134 }, { "epoch": 2.51, "learning_rate": 5.679272042414317e-05, "loss": 2.6886, "step": 8135 }, { "epoch": 2.51, "learning_rate": 5.675709136490594e-05, "loss": 2.719, "step": 8136 }, { "epoch": 2.51, "learning_rate": 5.672146230566871e-05, "loss": 2.5422, "step": 8137 }, { "epoch": 2.51, "learning_rate": 5.6685833246431485e-05, "loss": 2.6128, "step": 8138 }, { "epoch": 2.51, "learning_rate": 5.665020418719425e-05, "loss": 2.7544, "step": 8139 }, { "epoch": 2.51, "learning_rate": 5.661457512795702e-05, "loss": 2.6845, "step": 8140 }, { "epoch": 2.51, "learning_rate": 5.657894606871979e-05, "loss": 2.7276, "step": 8141 }, { "epoch": 2.51, "learning_rate": 5.6543317009482565e-05, "loss": 2.5652, "step": 8142 }, { "epoch": 2.51, "learning_rate": 5.6507687950245334e-05, "loss": 2.5879, "step": 8143 }, { "epoch": 2.51, "learning_rate": 5.647205889100811e-05, "loss": 2.268, "step": 8144 }, { "epoch": 2.51, "learning_rate": 5.6436429831770884e-05, "loss": 2.3543, "step": 8145 }, { "epoch": 2.51, "learning_rate": 5.640080077253365e-05, "loss": 2.4159, "step": 8146 }, { "epoch": 2.51, "learning_rate": 5.636517171329643e-05, "loss": 2.698, "step": 8147 }, { "epoch": 2.51, "learning_rate": 5.632954265405919e-05, "loss": 2.5123, "step": 8148 }, { "epoch": 2.52, "learning_rate": 5.6293913594821964e-05, "loss": 2.4639, "step": 8149 }, { "epoch": 2.52, "learning_rate": 5.625828453558473e-05, "loss": 2.3406, "step": 8150 }, { "epoch": 2.52, "learning_rate": 5.622265547634751e-05, "loss": 2.4839, "step": 8151 }, { "epoch": 2.52, "learning_rate": 5.6187026417110276e-05, "loss": 2.3045, "step": 8152 }, { "epoch": 2.52, "learning_rate": 5.615139735787305e-05, "loss": 2.3945, "step": 8153 }, { "epoch": 2.52, "learning_rate": 5.611576829863581e-05, "loss": 2.3175, "step": 8154 }, { "epoch": 2.52, "learning_rate": 5.608013923939859e-05, "loss": 1.9502, "step": 8155 }, { "epoch": 2.52, "learning_rate": 5.6044510180161356e-05, "loss": 2.2553, "step": 8156 }, { "epoch": 2.52, "learning_rate": 5.600888112092413e-05, "loss": 2.2579, "step": 8157 }, { "epoch": 2.52, "learning_rate": 5.59732520616869e-05, "loss": 1.9157, "step": 8158 }, { "epoch": 2.52, "learning_rate": 5.5937623002449674e-05, "loss": 2.2055, "step": 8159 }, { "epoch": 2.52, "learning_rate": 5.590199394321245e-05, "loss": 1.9103, "step": 8160 }, { "epoch": 2.52, "learning_rate": 5.586636488397521e-05, "loss": 2.0723, "step": 8161 }, { "epoch": 2.52, "learning_rate": 5.5830735824737986e-05, "loss": 2.0191, "step": 8162 }, { "epoch": 2.52, "learning_rate": 5.5795106765500754e-05, "loss": 1.9351, "step": 8163 }, { "epoch": 2.52, "learning_rate": 5.575947770626353e-05, "loss": 1.7624, "step": 8164 }, { "epoch": 2.52, "learning_rate": 5.57238486470263e-05, "loss": 2.0485, "step": 8165 }, { "epoch": 2.52, "learning_rate": 5.568821958778907e-05, "loss": 1.8379, "step": 8166 }, { "epoch": 2.52, "learning_rate": 5.5652590528551834e-05, "loss": 1.9475, "step": 8167 }, { "epoch": 2.52, "learning_rate": 5.561696146931461e-05, "loss": 1.6924, "step": 8168 }, { "epoch": 2.52, "learning_rate": 5.558133241007738e-05, "loss": 1.6278, "step": 8169 }, { "epoch": 2.52, "learning_rate": 5.554570335084015e-05, "loss": 1.4579, "step": 8170 }, { "epoch": 2.52, "learning_rate": 5.551007429160292e-05, "loss": 1.4883, "step": 8171 }, { "epoch": 2.52, "learning_rate": 5.5474445232365696e-05, "loss": 1.5816, "step": 8172 }, { "epoch": 2.52, "learning_rate": 5.543881617312846e-05, "loss": 1.6799, "step": 8173 }, { "epoch": 2.52, "learning_rate": 5.540318711389123e-05, "loss": 1.4614, "step": 8174 }, { "epoch": 2.52, "learning_rate": 5.5367558054654e-05, "loss": 1.4349, "step": 8175 }, { "epoch": 2.52, "learning_rate": 5.5331928995416776e-05, "loss": 1.3221, "step": 8176 }, { "epoch": 2.52, "learning_rate": 5.529629993617955e-05, "loss": 1.2526, "step": 8177 }, { "epoch": 2.52, "learning_rate": 5.526067087694232e-05, "loss": 1.2352, "step": 8178 }, { "epoch": 2.52, "learning_rate": 5.5225041817705095e-05, "loss": 1.0473, "step": 8179 }, { "epoch": 2.52, "learning_rate": 5.5189412758467857e-05, "loss": 1.0403, "step": 8180 }, { "epoch": 2.52, "learning_rate": 5.515378369923063e-05, "loss": 3.7584, "step": 8181 }, { "epoch": 2.53, "learning_rate": 5.51181546399934e-05, "loss": 3.1954, "step": 8182 }, { "epoch": 2.53, "learning_rate": 5.5082525580756175e-05, "loss": 3.0258, "step": 8183 }, { "epoch": 2.53, "learning_rate": 5.504689652151894e-05, "loss": 3.0764, "step": 8184 }, { "epoch": 2.53, "learning_rate": 5.501126746228172e-05, "loss": 2.8416, "step": 8185 }, { "epoch": 2.53, "learning_rate": 5.497563840304449e-05, "loss": 3.0023, "step": 8186 }, { "epoch": 2.53, "learning_rate": 5.494000934380726e-05, "loss": 2.7052, "step": 8187 }, { "epoch": 2.53, "learning_rate": 5.4904380284570023e-05, "loss": 2.9448, "step": 8188 }, { "epoch": 2.53, "learning_rate": 5.48687512253328e-05, "loss": 3.0347, "step": 8189 }, { "epoch": 2.53, "learning_rate": 5.483312216609557e-05, "loss": 2.8951, "step": 8190 }, { "epoch": 2.53, "learning_rate": 5.479749310685834e-05, "loss": 2.5392, "step": 8191 }, { "epoch": 2.53, "learning_rate": 5.476186404762111e-05, "loss": 2.3996, "step": 8192 }, { "epoch": 2.53, "learning_rate": 5.4726234988383885e-05, "loss": 2.4283, "step": 8193 }, { "epoch": 2.53, "learning_rate": 5.469060592914666e-05, "loss": 2.2762, "step": 8194 }, { "epoch": 2.53, "learning_rate": 5.465497686990942e-05, "loss": 2.582, "step": 8195 }, { "epoch": 2.53, "learning_rate": 5.46193478106722e-05, "loss": 2.6537, "step": 8196 }, { "epoch": 2.53, "learning_rate": 5.4583718751434965e-05, "loss": 2.4001, "step": 8197 }, { "epoch": 2.53, "learning_rate": 5.454808969219774e-05, "loss": 2.3131, "step": 8198 }, { "epoch": 2.53, "learning_rate": 5.451246063296051e-05, "loss": 2.4533, "step": 8199 }, { "epoch": 2.53, "learning_rate": 5.4476831573723284e-05, "loss": 2.486, "step": 8200 }, { "epoch": 2.53, "learning_rate": 5.4441202514486046e-05, "loss": 2.0978, "step": 8201 }, { "epoch": 2.53, "learning_rate": 5.440557345524882e-05, "loss": 2.1464, "step": 8202 }, { "epoch": 2.53, "learning_rate": 5.436994439601159e-05, "loss": 2.1308, "step": 8203 }, { "epoch": 2.53, "learning_rate": 5.4334315336774364e-05, "loss": 2.1413, "step": 8204 }, { "epoch": 2.53, "learning_rate": 5.429868627753713e-05, "loss": 2.2564, "step": 8205 }, { "epoch": 2.53, "learning_rate": 5.426305721829991e-05, "loss": 1.9395, "step": 8206 }, { "epoch": 2.53, "learning_rate": 5.422742815906267e-05, "loss": 1.9924, "step": 8207 }, { "epoch": 2.53, "learning_rate": 5.4191799099825444e-05, "loss": 1.8784, "step": 8208 }, { "epoch": 2.53, "learning_rate": 5.415617004058822e-05, "loss": 2.0902, "step": 8209 }, { "epoch": 2.53, "learning_rate": 5.412054098135099e-05, "loss": 1.8776, "step": 8210 }, { "epoch": 2.53, "learning_rate": 5.408491192211376e-05, "loss": 1.8037, "step": 8211 }, { "epoch": 2.53, "learning_rate": 5.404928286287653e-05, "loss": 1.9831, "step": 8212 }, { "epoch": 2.53, "learning_rate": 5.4013653803639306e-05, "loss": 1.853, "step": 8213 }, { "epoch": 2.54, "learning_rate": 5.397802474440207e-05, "loss": 1.7513, "step": 8214 }, { "epoch": 2.54, "learning_rate": 5.394239568516484e-05, "loss": 2.029, "step": 8215 }, { "epoch": 2.54, "learning_rate": 5.390676662592761e-05, "loss": 1.7699, "step": 8216 }, { "epoch": 2.54, "learning_rate": 5.3871137566690386e-05, "loss": 1.7862, "step": 8217 }, { "epoch": 2.54, "learning_rate": 5.3835508507453154e-05, "loss": 1.8584, "step": 8218 }, { "epoch": 2.54, "learning_rate": 5.379987944821593e-05, "loss": 1.5326, "step": 8219 }, { "epoch": 2.54, "learning_rate": 5.376425038897869e-05, "loss": 1.5104, "step": 8220 }, { "epoch": 2.54, "learning_rate": 5.3728621329741466e-05, "loss": 1.5799, "step": 8221 }, { "epoch": 2.54, "learning_rate": 5.3692992270504235e-05, "loss": 1.4118, "step": 8222 }, { "epoch": 2.54, "learning_rate": 5.365736321126701e-05, "loss": 1.534, "step": 8223 }, { "epoch": 2.54, "learning_rate": 5.362173415202978e-05, "loss": 1.3795, "step": 8224 }, { "epoch": 2.54, "learning_rate": 5.358610509279255e-05, "loss": 1.3729, "step": 8225 }, { "epoch": 2.54, "learning_rate": 5.355047603355533e-05, "loss": 1.2419, "step": 8226 }, { "epoch": 2.54, "learning_rate": 5.351484697431809e-05, "loss": 1.2568, "step": 8227 }, { "epoch": 2.54, "learning_rate": 5.347921791508087e-05, "loss": 1.2249, "step": 8228 }, { "epoch": 2.54, "learning_rate": 5.344358885584363e-05, "loss": 0.9961, "step": 8229 }, { "epoch": 2.54, "learning_rate": 5.340795979660641e-05, "loss": 1.2348, "step": 8230 }, { "epoch": 2.54, "learning_rate": 5.3372330737369177e-05, "loss": 3.4172, "step": 8231 }, { "epoch": 2.54, "learning_rate": 5.333670167813195e-05, "loss": 3.0685, "step": 8232 }, { "epoch": 2.54, "learning_rate": 5.330107261889472e-05, "loss": 2.9726, "step": 8233 }, { "epoch": 2.54, "learning_rate": 5.3265443559657495e-05, "loss": 2.95, "step": 8234 }, { "epoch": 2.54, "learning_rate": 5.3229814500420257e-05, "loss": 3.1676, "step": 8235 }, { "epoch": 2.54, "learning_rate": 5.319418544118303e-05, "loss": 2.8175, "step": 8236 }, { "epoch": 2.54, "learning_rate": 5.31585563819458e-05, "loss": 2.6649, "step": 8237 }, { "epoch": 2.54, "learning_rate": 5.3122927322708575e-05, "loss": 2.6926, "step": 8238 }, { "epoch": 2.54, "learning_rate": 5.3087298263471343e-05, "loss": 2.7466, "step": 8239 }, { "epoch": 2.54, "learning_rate": 5.305166920423412e-05, "loss": 2.7711, "step": 8240 }, { "epoch": 2.54, "learning_rate": 5.301604014499688e-05, "loss": 2.6793, "step": 8241 }, { "epoch": 2.54, "learning_rate": 5.2980411085759655e-05, "loss": 2.6521, "step": 8242 }, { "epoch": 2.54, "learning_rate": 5.294478202652243e-05, "loss": 2.7193, "step": 8243 }, { "epoch": 2.54, "learning_rate": 5.29091529672852e-05, "loss": 2.5258, "step": 8244 }, { "epoch": 2.54, "learning_rate": 5.2873523908047974e-05, "loss": 2.4916, "step": 8245 }, { "epoch": 2.55, "learning_rate": 5.283789484881074e-05, "loss": 2.5156, "step": 8246 }, { "epoch": 2.55, "learning_rate": 5.280226578957352e-05, "loss": 2.4302, "step": 8247 }, { "epoch": 2.55, "learning_rate": 5.276663673033628e-05, "loss": 2.2268, "step": 8248 }, { "epoch": 2.55, "learning_rate": 5.2731007671099054e-05, "loss": 2.2694, "step": 8249 }, { "epoch": 2.55, "learning_rate": 5.269537861186182e-05, "loss": 2.4475, "step": 8250 }, { "epoch": 2.55, "learning_rate": 5.26597495526246e-05, "loss": 2.3569, "step": 8251 }, { "epoch": 2.55, "learning_rate": 5.2624120493387366e-05, "loss": 2.232, "step": 8252 }, { "epoch": 2.55, "learning_rate": 5.258849143415014e-05, "loss": 2.193, "step": 8253 }, { "epoch": 2.55, "learning_rate": 5.25528623749129e-05, "loss": 2.2134, "step": 8254 }, { "epoch": 2.55, "learning_rate": 5.251723331567568e-05, "loss": 2.2043, "step": 8255 }, { "epoch": 2.55, "learning_rate": 5.2481604256438446e-05, "loss": 2.1611, "step": 8256 }, { "epoch": 2.55, "learning_rate": 5.244597519720122e-05, "loss": 2.0997, "step": 8257 }, { "epoch": 2.55, "learning_rate": 5.2410346137963996e-05, "loss": 1.9993, "step": 8258 }, { "epoch": 2.55, "learning_rate": 5.2374717078726764e-05, "loss": 2.1637, "step": 8259 }, { "epoch": 2.55, "learning_rate": 5.233908801948954e-05, "loss": 2.0658, "step": 8260 }, { "epoch": 2.55, "learning_rate": 5.23034589602523e-05, "loss": 1.9571, "step": 8261 }, { "epoch": 2.55, "learning_rate": 5.2267829901015076e-05, "loss": 1.9184, "step": 8262 }, { "epoch": 2.55, "learning_rate": 5.2232200841777844e-05, "loss": 2.0527, "step": 8263 }, { "epoch": 2.55, "learning_rate": 5.219657178254062e-05, "loss": 1.7992, "step": 8264 }, { "epoch": 2.55, "learning_rate": 5.216094272330339e-05, "loss": 1.5691, "step": 8265 }, { "epoch": 2.55, "learning_rate": 5.212531366406616e-05, "loss": 1.3656, "step": 8266 }, { "epoch": 2.55, "learning_rate": 5.2089684604828924e-05, "loss": 1.876, "step": 8267 }, { "epoch": 2.55, "learning_rate": 5.20540555455917e-05, "loss": 1.6886, "step": 8268 }, { "epoch": 2.55, "learning_rate": 5.201842648635447e-05, "loss": 1.6744, "step": 8269 }, { "epoch": 2.55, "learning_rate": 5.198279742711724e-05, "loss": 1.5301, "step": 8270 }, { "epoch": 2.55, "learning_rate": 5.194716836788001e-05, "loss": 1.6009, "step": 8271 }, { "epoch": 2.55, "learning_rate": 5.1911539308642786e-05, "loss": 1.3767, "step": 8272 }, { "epoch": 2.55, "learning_rate": 5.1875910249405554e-05, "loss": 1.3455, "step": 8273 }, { "epoch": 2.55, "learning_rate": 5.184028119016833e-05, "loss": 1.3417, "step": 8274 }, { "epoch": 2.55, "learning_rate": 5.1804652130931105e-05, "loss": 1.2838, "step": 8275 }, { "epoch": 2.55, "learning_rate": 5.1769023071693866e-05, "loss": 1.2066, "step": 8276 }, { "epoch": 2.55, "learning_rate": 5.173339401245664e-05, "loss": 1.2598, "step": 8277 }, { "epoch": 2.55, "learning_rate": 5.169776495321941e-05, "loss": 1.414, "step": 8278 }, { "epoch": 2.56, "learning_rate": 5.1662135893982185e-05, "loss": 1.1093, "step": 8279 }, { "epoch": 2.56, "learning_rate": 5.162650683474495e-05, "loss": 1.1603, "step": 8280 }, { "epoch": 2.56, "learning_rate": 5.159087777550773e-05, "loss": 3.4933, "step": 8281 }, { "epoch": 2.56, "learning_rate": 5.155524871627049e-05, "loss": 3.0199, "step": 8282 }, { "epoch": 2.56, "learning_rate": 5.1519619657033265e-05, "loss": 3.0621, "step": 8283 }, { "epoch": 2.56, "learning_rate": 5.148399059779603e-05, "loss": 2.9217, "step": 8284 }, { "epoch": 2.56, "learning_rate": 5.144836153855881e-05, "loss": 2.6092, "step": 8285 }, { "epoch": 2.56, "learning_rate": 5.1412732479321577e-05, "loss": 2.6336, "step": 8286 }, { "epoch": 2.56, "learning_rate": 5.137710342008435e-05, "loss": 2.6181, "step": 8287 }, { "epoch": 2.56, "learning_rate": 5.134147436084711e-05, "loss": 2.836, "step": 8288 }, { "epoch": 2.56, "learning_rate": 5.130584530160989e-05, "loss": 2.5397, "step": 8289 }, { "epoch": 2.56, "learning_rate": 5.127021624237266e-05, "loss": 2.5716, "step": 8290 }, { "epoch": 2.56, "learning_rate": 5.123458718313543e-05, "loss": 2.4341, "step": 8291 }, { "epoch": 2.56, "learning_rate": 5.119895812389821e-05, "loss": 2.4736, "step": 8292 }, { "epoch": 2.56, "learning_rate": 5.1163329064660975e-05, "loss": 2.4941, "step": 8293 }, { "epoch": 2.56, "learning_rate": 5.112770000542375e-05, "loss": 2.3374, "step": 8294 }, { "epoch": 2.56, "learning_rate": 5.109207094618651e-05, "loss": 2.5927, "step": 8295 }, { "epoch": 2.56, "learning_rate": 5.105644188694929e-05, "loss": 2.3975, "step": 8296 }, { "epoch": 2.56, "learning_rate": 5.1020812827712055e-05, "loss": 2.4854, "step": 8297 }, { "epoch": 2.56, "learning_rate": 5.098518376847483e-05, "loss": 2.6614, "step": 8298 }, { "epoch": 2.56, "learning_rate": 5.09495547092376e-05, "loss": 2.3891, "step": 8299 }, { "epoch": 2.56, "learning_rate": 5.0913925650000374e-05, "loss": 2.2268, "step": 8300 }, { "epoch": 2.56, "learning_rate": 5.0878296590763135e-05, "loss": 2.1707, "step": 8301 }, { "epoch": 2.56, "learning_rate": 5.084266753152591e-05, "loss": 2.2187, "step": 8302 }, { "epoch": 2.56, "learning_rate": 5.080703847228868e-05, "loss": 2.1911, "step": 8303 }, { "epoch": 2.56, "learning_rate": 5.0771409413051454e-05, "loss": 2.2114, "step": 8304 }, { "epoch": 2.56, "learning_rate": 5.073578035381422e-05, "loss": 2.1526, "step": 8305 }, { "epoch": 2.56, "learning_rate": 5.0700151294577e-05, "loss": 2.3647, "step": 8306 }, { "epoch": 2.56, "learning_rate": 5.066452223533977e-05, "loss": 2.0714, "step": 8307 }, { "epoch": 2.56, "learning_rate": 5.0628893176102534e-05, "loss": 2.2013, "step": 8308 }, { "epoch": 2.56, "learning_rate": 5.059326411686531e-05, "loss": 2.1523, "step": 8309 }, { "epoch": 2.56, "learning_rate": 5.055763505762808e-05, "loss": 1.9394, "step": 8310 }, { "epoch": 2.57, "learning_rate": 5.052200599839085e-05, "loss": 1.7759, "step": 8311 }, { "epoch": 2.57, "learning_rate": 5.048637693915362e-05, "loss": 2.0797, "step": 8312 }, { "epoch": 2.57, "learning_rate": 5.0450747879916396e-05, "loss": 1.9336, "step": 8313 }, { "epoch": 2.57, "learning_rate": 5.041511882067916e-05, "loss": 1.8288, "step": 8314 }, { "epoch": 2.57, "learning_rate": 5.037948976144193e-05, "loss": 1.7948, "step": 8315 }, { "epoch": 2.57, "learning_rate": 5.03438607022047e-05, "loss": 1.8427, "step": 8316 }, { "epoch": 2.57, "learning_rate": 5.0308231642967476e-05, "loss": 1.5765, "step": 8317 }, { "epoch": 2.57, "learning_rate": 5.0272602583730244e-05, "loss": 1.6686, "step": 8318 }, { "epoch": 2.57, "learning_rate": 5.023697352449302e-05, "loss": 1.6521, "step": 8319 }, { "epoch": 2.57, "learning_rate": 5.020134446525579e-05, "loss": 1.4198, "step": 8320 }, { "epoch": 2.57, "learning_rate": 5.016571540601856e-05, "loss": 1.549, "step": 8321 }, { "epoch": 2.57, "learning_rate": 5.0130086346781324e-05, "loss": 1.6739, "step": 8322 }, { "epoch": 2.57, "learning_rate": 5.00944572875441e-05, "loss": 1.4012, "step": 8323 }, { "epoch": 2.57, "learning_rate": 5.0058828228306874e-05, "loss": 1.3636, "step": 8324 }, { "epoch": 2.57, "learning_rate": 5.002319916906964e-05, "loss": 1.3276, "step": 8325 }, { "epoch": 2.57, "learning_rate": 4.998757010983242e-05, "loss": 1.3004, "step": 8326 }, { "epoch": 2.57, "learning_rate": 4.9951941050595186e-05, "loss": 1.228, "step": 8327 }, { "epoch": 2.57, "learning_rate": 4.991631199135796e-05, "loss": 1.332, "step": 8328 }, { "epoch": 2.57, "learning_rate": 4.988068293212072e-05, "loss": 1.0974, "step": 8329 }, { "epoch": 2.57, "learning_rate": 4.98450538728835e-05, "loss": 1.1592, "step": 8330 }, { "epoch": 2.57, "learning_rate": 4.9809424813646266e-05, "loss": 3.5586, "step": 8331 }, { "epoch": 2.57, "learning_rate": 4.977379575440904e-05, "loss": 3.3555, "step": 8332 }, { "epoch": 2.57, "learning_rate": 4.973816669517181e-05, "loss": 3.032, "step": 8333 }, { "epoch": 2.57, "learning_rate": 4.9702537635934585e-05, "loss": 2.678, "step": 8334 }, { "epoch": 2.57, "learning_rate": 4.9666908576697346e-05, "loss": 2.9682, "step": 8335 }, { "epoch": 2.57, "learning_rate": 4.963127951746012e-05, "loss": 2.7617, "step": 8336 }, { "epoch": 2.57, "learning_rate": 4.959565045822289e-05, "loss": 2.6088, "step": 8337 }, { "epoch": 2.57, "learning_rate": 4.9560021398985665e-05, "loss": 2.3312, "step": 8338 }, { "epoch": 2.57, "learning_rate": 4.952439233974843e-05, "loss": 2.6546, "step": 8339 }, { "epoch": 2.57, "learning_rate": 4.948876328051121e-05, "loss": 2.5418, "step": 8340 }, { "epoch": 2.57, "learning_rate": 4.9453134221273983e-05, "loss": 2.452, "step": 8341 }, { "epoch": 2.57, "learning_rate": 4.9417505162036745e-05, "loss": 2.7838, "step": 8342 }, { "epoch": 2.57, "learning_rate": 4.938187610279952e-05, "loss": 2.3311, "step": 8343 }, { "epoch": 2.58, "learning_rate": 4.934624704356229e-05, "loss": 2.3479, "step": 8344 }, { "epoch": 2.58, "learning_rate": 4.9310617984325063e-05, "loss": 2.2318, "step": 8345 }, { "epoch": 2.58, "learning_rate": 4.927498892508783e-05, "loss": 2.4403, "step": 8346 }, { "epoch": 2.58, "learning_rate": 4.923935986585061e-05, "loss": 2.2913, "step": 8347 }, { "epoch": 2.58, "learning_rate": 4.920373080661337e-05, "loss": 2.4643, "step": 8348 }, { "epoch": 2.58, "learning_rate": 4.9168101747376144e-05, "loss": 2.242, "step": 8349 }, { "epoch": 2.58, "learning_rate": 4.913247268813891e-05, "loss": 2.4522, "step": 8350 }, { "epoch": 2.58, "learning_rate": 4.909684362890169e-05, "loss": 2.2363, "step": 8351 }, { "epoch": 2.58, "learning_rate": 4.9061214569664455e-05, "loss": 2.3315, "step": 8352 }, { "epoch": 2.58, "learning_rate": 4.902558551042723e-05, "loss": 2.2199, "step": 8353 }, { "epoch": 2.58, "learning_rate": 4.898995645118999e-05, "loss": 2.1899, "step": 8354 }, { "epoch": 2.58, "learning_rate": 4.895432739195277e-05, "loss": 1.9787, "step": 8355 }, { "epoch": 2.58, "learning_rate": 4.891869833271554e-05, "loss": 2.0953, "step": 8356 }, { "epoch": 2.58, "learning_rate": 4.888306927347831e-05, "loss": 2.0047, "step": 8357 }, { "epoch": 2.58, "learning_rate": 4.8847440214241086e-05, "loss": 2.0378, "step": 8358 }, { "epoch": 2.58, "learning_rate": 4.8811811155003854e-05, "loss": 1.9387, "step": 8359 }, { "epoch": 2.58, "learning_rate": 4.877618209576663e-05, "loss": 1.9875, "step": 8360 }, { "epoch": 2.58, "learning_rate": 4.87405530365294e-05, "loss": 1.9935, "step": 8361 }, { "epoch": 2.58, "learning_rate": 4.870492397729217e-05, "loss": 1.8747, "step": 8362 }, { "epoch": 2.58, "learning_rate": 4.8669294918054934e-05, "loss": 1.8184, "step": 8363 }, { "epoch": 2.58, "learning_rate": 4.863366585881771e-05, "loss": 1.8756, "step": 8364 }, { "epoch": 2.58, "learning_rate": 4.859803679958048e-05, "loss": 1.6471, "step": 8365 }, { "epoch": 2.58, "learning_rate": 4.856240774034325e-05, "loss": 1.6878, "step": 8366 }, { "epoch": 2.58, "learning_rate": 4.852677868110602e-05, "loss": 1.6078, "step": 8367 }, { "epoch": 2.58, "learning_rate": 4.8491149621868796e-05, "loss": 1.768, "step": 8368 }, { "epoch": 2.58, "learning_rate": 4.845552056263156e-05, "loss": 1.5816, "step": 8369 }, { "epoch": 2.58, "learning_rate": 4.841989150339433e-05, "loss": 1.6848, "step": 8370 }, { "epoch": 2.58, "learning_rate": 4.83842624441571e-05, "loss": 1.422, "step": 8371 }, { "epoch": 2.58, "learning_rate": 4.8348633384919876e-05, "loss": 1.4297, "step": 8372 }, { "epoch": 2.58, "learning_rate": 4.831300432568265e-05, "loss": 1.6117, "step": 8373 }, { "epoch": 2.58, "learning_rate": 4.827737526644542e-05, "loss": 1.3079, "step": 8374 }, { "epoch": 2.58, "learning_rate": 4.8241746207208194e-05, "loss": 1.1261, "step": 8375 }, { "epoch": 2.59, "learning_rate": 4.8206117147970956e-05, "loss": 1.1872, "step": 8376 }, { "epoch": 2.59, "learning_rate": 4.817048808873373e-05, "loss": 1.2389, "step": 8377 }, { "epoch": 2.59, "learning_rate": 4.81348590294965e-05, "loss": 1.1146, "step": 8378 }, { "epoch": 2.59, "learning_rate": 4.8099229970259275e-05, "loss": 1.0483, "step": 8379 }, { "epoch": 2.59, "learning_rate": 4.806360091102204e-05, "loss": 1.1466, "step": 8380 }, { "epoch": 2.59, "learning_rate": 4.802797185178482e-05, "loss": 3.173, "step": 8381 }, { "epoch": 2.59, "learning_rate": 4.799234279254758e-05, "loss": 2.7077, "step": 8382 }, { "epoch": 2.59, "learning_rate": 4.7956713733310355e-05, "loss": 3.0078, "step": 8383 }, { "epoch": 2.59, "learning_rate": 4.792108467407312e-05, "loss": 2.9462, "step": 8384 }, { "epoch": 2.59, "learning_rate": 4.78854556148359e-05, "loss": 2.752, "step": 8385 }, { "epoch": 2.59, "learning_rate": 4.7849826555598666e-05, "loss": 2.691, "step": 8386 }, { "epoch": 2.59, "learning_rate": 4.781419749636144e-05, "loss": 2.9445, "step": 8387 }, { "epoch": 2.59, "learning_rate": 4.77785684371242e-05, "loss": 2.5408, "step": 8388 }, { "epoch": 2.59, "learning_rate": 4.774293937788698e-05, "loss": 2.5275, "step": 8389 }, { "epoch": 2.59, "learning_rate": 4.770731031864975e-05, "loss": 2.4246, "step": 8390 }, { "epoch": 2.59, "learning_rate": 4.767168125941252e-05, "loss": 2.7441, "step": 8391 }, { "epoch": 2.59, "learning_rate": 4.7636052200175297e-05, "loss": 2.5259, "step": 8392 }, { "epoch": 2.59, "learning_rate": 4.7600423140938065e-05, "loss": 2.1714, "step": 8393 }, { "epoch": 2.59, "learning_rate": 4.756479408170084e-05, "loss": 2.502, "step": 8394 }, { "epoch": 2.59, "learning_rate": 4.75291650224636e-05, "loss": 2.425, "step": 8395 }, { "epoch": 2.59, "learning_rate": 4.749353596322638e-05, "loss": 2.2356, "step": 8396 }, { "epoch": 2.59, "learning_rate": 4.7457906903989145e-05, "loss": 2.1971, "step": 8397 }, { "epoch": 2.59, "learning_rate": 4.742227784475192e-05, "loss": 2.1961, "step": 8398 }, { "epoch": 2.59, "learning_rate": 4.738664878551469e-05, "loss": 2.1759, "step": 8399 }, { "epoch": 2.59, "learning_rate": 4.7351019726277464e-05, "loss": 1.8855, "step": 8400 }, { "epoch": 2.59, "learning_rate": 4.7315390667040225e-05, "loss": 2.3121, "step": 8401 }, { "epoch": 2.59, "learning_rate": 4.7279761607803e-05, "loss": 2.2639, "step": 8402 }, { "epoch": 2.59, "learning_rate": 4.724413254856577e-05, "loss": 2.1183, "step": 8403 }, { "epoch": 2.59, "learning_rate": 4.7208503489328544e-05, "loss": 2.0216, "step": 8404 }, { "epoch": 2.59, "learning_rate": 4.717287443009132e-05, "loss": 1.9545, "step": 8405 }, { "epoch": 2.59, "learning_rate": 4.713724537085409e-05, "loss": 2.0568, "step": 8406 }, { "epoch": 2.59, "learning_rate": 4.710161631161686e-05, "loss": 2.0345, "step": 8407 }, { "epoch": 2.6, "learning_rate": 4.706598725237963e-05, "loss": 1.8838, "step": 8408 }, { "epoch": 2.6, "learning_rate": 4.7030358193142406e-05, "loss": 1.879, "step": 8409 }, { "epoch": 2.6, "learning_rate": 4.699472913390517e-05, "loss": 2.1507, "step": 8410 }, { "epoch": 2.6, "learning_rate": 4.695910007466794e-05, "loss": 1.6642, "step": 8411 }, { "epoch": 2.6, "learning_rate": 4.692347101543071e-05, "loss": 1.7561, "step": 8412 }, { "epoch": 2.6, "learning_rate": 4.6887841956193486e-05, "loss": 1.4998, "step": 8413 }, { "epoch": 2.6, "learning_rate": 4.6852212896956254e-05, "loss": 1.8276, "step": 8414 }, { "epoch": 2.6, "learning_rate": 4.681658383771903e-05, "loss": 1.7421, "step": 8415 }, { "epoch": 2.6, "learning_rate": 4.678095477848179e-05, "loss": 1.4917, "step": 8416 }, { "epoch": 2.6, "learning_rate": 4.6745325719244566e-05, "loss": 1.5564, "step": 8417 }, { "epoch": 2.6, "learning_rate": 4.6709696660007334e-05, "loss": 1.5428, "step": 8418 }, { "epoch": 2.6, "learning_rate": 4.667406760077011e-05, "loss": 1.6164, "step": 8419 }, { "epoch": 2.6, "learning_rate": 4.663843854153288e-05, "loss": 1.5527, "step": 8420 }, { "epoch": 2.6, "learning_rate": 4.660280948229565e-05, "loss": 1.6131, "step": 8421 }, { "epoch": 2.6, "learning_rate": 4.656718042305843e-05, "loss": 1.3453, "step": 8422 }, { "epoch": 2.6, "learning_rate": 4.653155136382119e-05, "loss": 1.5347, "step": 8423 }, { "epoch": 2.6, "learning_rate": 4.6495922304583964e-05, "loss": 1.1795, "step": 8424 }, { "epoch": 2.6, "learning_rate": 4.646029324534673e-05, "loss": 1.2764, "step": 8425 }, { "epoch": 2.6, "learning_rate": 4.642466418610951e-05, "loss": 1.2242, "step": 8426 }, { "epoch": 2.6, "learning_rate": 4.6389035126872276e-05, "loss": 1.3614, "step": 8427 }, { "epoch": 2.6, "learning_rate": 4.635340606763505e-05, "loss": 1.0933, "step": 8428 }, { "epoch": 2.6, "learning_rate": 4.631777700839781e-05, "loss": 1.1188, "step": 8429 }, { "epoch": 2.6, "learning_rate": 4.628214794916059e-05, "loss": 1.0321, "step": 8430 }, { "epoch": 2.6, "learning_rate": 4.6246518889923356e-05, "loss": 3.4742, "step": 8431 }, { "epoch": 2.6, "learning_rate": 4.621088983068613e-05, "loss": 3.3373, "step": 8432 }, { "epoch": 2.6, "learning_rate": 4.61752607714489e-05, "loss": 2.643, "step": 8433 }, { "epoch": 2.6, "learning_rate": 4.6139631712211675e-05, "loss": 3.0011, "step": 8434 }, { "epoch": 2.6, "learning_rate": 4.6104002652974436e-05, "loss": 2.7243, "step": 8435 }, { "epoch": 2.6, "learning_rate": 4.606837359373721e-05, "loss": 2.7661, "step": 8436 }, { "epoch": 2.6, "learning_rate": 4.603274453449998e-05, "loss": 2.6334, "step": 8437 }, { "epoch": 2.6, "learning_rate": 4.5997115475262755e-05, "loss": 2.6395, "step": 8438 }, { "epoch": 2.6, "learning_rate": 4.596148641602553e-05, "loss": 2.5444, "step": 8439 }, { "epoch": 2.6, "learning_rate": 4.59258573567883e-05, "loss": 2.5461, "step": 8440 }, { "epoch": 2.61, "learning_rate": 4.589022829755107e-05, "loss": 2.4782, "step": 8441 }, { "epoch": 2.61, "learning_rate": 4.5854599238313835e-05, "loss": 2.2592, "step": 8442 }, { "epoch": 2.61, "learning_rate": 4.581897017907661e-05, "loss": 2.4776, "step": 8443 }, { "epoch": 2.61, "learning_rate": 4.578334111983938e-05, "loss": 2.1969, "step": 8444 }, { "epoch": 2.61, "learning_rate": 4.574771206060215e-05, "loss": 2.4991, "step": 8445 }, { "epoch": 2.61, "learning_rate": 4.571208300136492e-05, "loss": 2.4161, "step": 8446 }, { "epoch": 2.61, "learning_rate": 4.56764539421277e-05, "loss": 2.2196, "step": 8447 }, { "epoch": 2.61, "learning_rate": 4.5640824882890465e-05, "loss": 2.272, "step": 8448 }, { "epoch": 2.61, "learning_rate": 4.560519582365324e-05, "loss": 2.2166, "step": 8449 }, { "epoch": 2.61, "learning_rate": 4.5569566764416e-05, "loss": 2.217, "step": 8450 }, { "epoch": 2.61, "learning_rate": 4.553393770517878e-05, "loss": 2.3055, "step": 8451 }, { "epoch": 2.61, "learning_rate": 4.5498308645941545e-05, "loss": 2.1132, "step": 8452 }, { "epoch": 2.61, "learning_rate": 4.546267958670432e-05, "loss": 2.2798, "step": 8453 }, { "epoch": 2.61, "learning_rate": 4.5427050527467095e-05, "loss": 2.1686, "step": 8454 }, { "epoch": 2.61, "learning_rate": 4.5391421468229864e-05, "loss": 1.96, "step": 8455 }, { "epoch": 2.61, "learning_rate": 4.535579240899264e-05, "loss": 1.9831, "step": 8456 }, { "epoch": 2.61, "learning_rate": 4.53201633497554e-05, "loss": 1.8924, "step": 8457 }, { "epoch": 2.61, "learning_rate": 4.5284534290518175e-05, "loss": 2.4104, "step": 8458 }, { "epoch": 2.61, "learning_rate": 4.5248905231280944e-05, "loss": 1.9125, "step": 8459 }, { "epoch": 2.61, "learning_rate": 4.521327617204372e-05, "loss": 1.9806, "step": 8460 }, { "epoch": 2.61, "learning_rate": 4.517764711280649e-05, "loss": 2.0554, "step": 8461 }, { "epoch": 2.61, "learning_rate": 4.514201805356926e-05, "loss": 1.7737, "step": 8462 }, { "epoch": 2.61, "learning_rate": 4.5106388994332024e-05, "loss": 1.856, "step": 8463 }, { "epoch": 2.61, "learning_rate": 4.50707599350948e-05, "loss": 1.938, "step": 8464 }, { "epoch": 2.61, "learning_rate": 4.503513087585757e-05, "loss": 1.7618, "step": 8465 }, { "epoch": 2.61, "learning_rate": 4.499950181662034e-05, "loss": 1.7406, "step": 8466 }, { "epoch": 2.61, "learning_rate": 4.496387275738311e-05, "loss": 1.5867, "step": 8467 }, { "epoch": 2.61, "learning_rate": 4.4928243698145886e-05, "loss": 1.6939, "step": 8468 }, { "epoch": 2.61, "learning_rate": 4.489261463890865e-05, "loss": 1.4081, "step": 8469 }, { "epoch": 2.61, "learning_rate": 4.485698557967142e-05, "loss": 1.6938, "step": 8470 }, { "epoch": 2.61, "learning_rate": 4.48213565204342e-05, "loss": 1.4048, "step": 8471 }, { "epoch": 2.61, "learning_rate": 4.4785727461196966e-05, "loss": 1.4009, "step": 8472 }, { "epoch": 2.62, "learning_rate": 4.475009840195974e-05, "loss": 1.5236, "step": 8473 }, { "epoch": 2.62, "learning_rate": 4.471446934272251e-05, "loss": 1.2313, "step": 8474 }, { "epoch": 2.62, "learning_rate": 4.4678840283485284e-05, "loss": 1.1846, "step": 8475 }, { "epoch": 2.62, "learning_rate": 4.4643211224248046e-05, "loss": 1.2222, "step": 8476 }, { "epoch": 2.62, "learning_rate": 4.460758216501082e-05, "loss": 1.0356, "step": 8477 }, { "epoch": 2.62, "learning_rate": 4.457195310577359e-05, "loss": 1.0133, "step": 8478 }, { "epoch": 2.62, "learning_rate": 4.4536324046536364e-05, "loss": 1.0698, "step": 8479 }, { "epoch": 2.62, "learning_rate": 4.450069498729913e-05, "loss": 1.093, "step": 8480 }, { "epoch": 2.62, "learning_rate": 4.446506592806191e-05, "loss": 3.54, "step": 8481 }, { "epoch": 2.62, "learning_rate": 4.442943686882467e-05, "loss": 2.9909, "step": 8482 }, { "epoch": 2.62, "learning_rate": 4.4393807809587444e-05, "loss": 2.7814, "step": 8483 }, { "epoch": 2.62, "learning_rate": 4.435817875035021e-05, "loss": 2.9187, "step": 8484 }, { "epoch": 2.62, "learning_rate": 4.432254969111299e-05, "loss": 2.8055, "step": 8485 }, { "epoch": 2.62, "learning_rate": 4.4286920631875756e-05, "loss": 2.7927, "step": 8486 }, { "epoch": 2.62, "learning_rate": 4.425129157263853e-05, "loss": 2.6639, "step": 8487 }, { "epoch": 2.62, "learning_rate": 4.4215662513401306e-05, "loss": 2.5406, "step": 8488 }, { "epoch": 2.62, "learning_rate": 4.418003345416407e-05, "loss": 2.2983, "step": 8489 }, { "epoch": 2.62, "learning_rate": 4.414440439492685e-05, "loss": 2.8534, "step": 8490 }, { "epoch": 2.62, "learning_rate": 4.410877533568961e-05, "loss": 2.6029, "step": 8491 }, { "epoch": 2.62, "learning_rate": 4.4073146276452386e-05, "loss": 2.3614, "step": 8492 }, { "epoch": 2.62, "learning_rate": 4.4037517217215155e-05, "loss": 2.3908, "step": 8493 }, { "epoch": 2.62, "learning_rate": 4.400188815797793e-05, "loss": 2.331, "step": 8494 }, { "epoch": 2.62, "learning_rate": 4.39662590987407e-05, "loss": 2.3133, "step": 8495 }, { "epoch": 2.62, "learning_rate": 4.393063003950347e-05, "loss": 2.2488, "step": 8496 }, { "epoch": 2.62, "learning_rate": 4.3895000980266235e-05, "loss": 2.4253, "step": 8497 }, { "epoch": 2.62, "learning_rate": 4.385937192102901e-05, "loss": 2.3801, "step": 8498 }, { "epoch": 2.62, "learning_rate": 4.382374286179178e-05, "loss": 2.2543, "step": 8499 }, { "epoch": 2.62, "learning_rate": 4.378811380255455e-05, "loss": 2.0685, "step": 8500 }, { "epoch": 2.62, "eval_bleu": 2.20913340971076e-14, "eval_loss": 3.2877273559570312, "eval_runtime": 2579.3551, "eval_samples_per_second": 5.722, "eval_steps_per_second": 0.715, "step": 8500 }, { "epoch": 2.62, "learning_rate": 4.375248474331732e-05, "loss": 2.1469, "step": 8501 }, { "epoch": 2.62, "learning_rate": 4.37168556840801e-05, "loss": 2.2822, "step": 8502 }, { "epoch": 2.62, "learning_rate": 4.368122662484287e-05, "loss": 2.2191, "step": 8503 }, { "epoch": 2.62, "learning_rate": 4.364559756560563e-05, "loss": 2.4262, "step": 8504 }, { "epoch": 2.62, "learning_rate": 4.360996850636841e-05, "loss": 2.1317, "step": 8505 }, { "epoch": 2.63, "learning_rate": 4.357433944713118e-05, "loss": 1.9848, "step": 8506 }, { "epoch": 2.63, "learning_rate": 4.353871038789395e-05, "loss": 1.9256, "step": 8507 }, { "epoch": 2.63, "learning_rate": 4.350308132865672e-05, "loss": 2.0429, "step": 8508 }, { "epoch": 2.63, "learning_rate": 4.3467452269419495e-05, "loss": 2.0093, "step": 8509 }, { "epoch": 2.63, "learning_rate": 4.343182321018226e-05, "loss": 1.9739, "step": 8510 }, { "epoch": 2.63, "learning_rate": 4.339619415094503e-05, "loss": 1.8503, "step": 8511 }, { "epoch": 2.63, "learning_rate": 4.33605650917078e-05, "loss": 1.9031, "step": 8512 }, { "epoch": 2.63, "learning_rate": 4.3324936032470575e-05, "loss": 1.676, "step": 8513 }, { "epoch": 2.63, "learning_rate": 4.3289306973233344e-05, "loss": 1.9112, "step": 8514 }, { "epoch": 2.63, "learning_rate": 4.325367791399612e-05, "loss": 1.7589, "step": 8515 }, { "epoch": 2.63, "learning_rate": 4.321804885475888e-05, "loss": 1.6799, "step": 8516 }, { "epoch": 2.63, "learning_rate": 4.3182419795521655e-05, "loss": 1.7056, "step": 8517 }, { "epoch": 2.63, "learning_rate": 4.3146790736284424e-05, "loss": 1.55, "step": 8518 }, { "epoch": 2.63, "learning_rate": 4.31111616770472e-05, "loss": 1.3344, "step": 8519 }, { "epoch": 2.63, "learning_rate": 4.3075532617809974e-05, "loss": 1.5643, "step": 8520 }, { "epoch": 2.63, "learning_rate": 4.303990355857274e-05, "loss": 1.382, "step": 8521 }, { "epoch": 2.63, "learning_rate": 4.300427449933552e-05, "loss": 1.569, "step": 8522 }, { "epoch": 2.63, "learning_rate": 4.296864544009828e-05, "loss": 1.3835, "step": 8523 }, { "epoch": 2.63, "learning_rate": 4.2933016380861054e-05, "loss": 1.6066, "step": 8524 }, { "epoch": 2.63, "learning_rate": 4.289738732162382e-05, "loss": 1.2078, "step": 8525 }, { "epoch": 2.63, "learning_rate": 4.28617582623866e-05, "loss": 1.1748, "step": 8526 }, { "epoch": 2.63, "learning_rate": 4.2826129203149366e-05, "loss": 1.0292, "step": 8527 }, { "epoch": 2.63, "learning_rate": 4.279050014391214e-05, "loss": 1.1066, "step": 8528 }, { "epoch": 2.63, "learning_rate": 4.27548710846749e-05, "loss": 0.9119, "step": 8529 }, { "epoch": 2.63, "learning_rate": 4.271924202543768e-05, "loss": 1.0693, "step": 8530 }, { "epoch": 2.63, "learning_rate": 4.2683612966200446e-05, "loss": 3.2263, "step": 8531 }, { "epoch": 2.63, "learning_rate": 4.264798390696322e-05, "loss": 3.5073, "step": 8532 }, { "epoch": 2.63, "learning_rate": 4.261235484772599e-05, "loss": 2.7969, "step": 8533 }, { "epoch": 2.63, "learning_rate": 4.2576725788488764e-05, "loss": 2.5947, "step": 8534 }, { "epoch": 2.63, "learning_rate": 4.254109672925153e-05, "loss": 2.6302, "step": 8535 }, { "epoch": 2.63, "learning_rate": 4.250546767001431e-05, "loss": 2.7069, "step": 8536 }, { "epoch": 2.63, "learning_rate": 4.246983861077708e-05, "loss": 2.3792, "step": 8537 }, { "epoch": 2.64, "learning_rate": 4.2434209551539844e-05, "loss": 2.3323, "step": 8538 }, { "epoch": 2.64, "learning_rate": 4.239858049230262e-05, "loss": 2.5842, "step": 8539 }, { "epoch": 2.64, "learning_rate": 4.236295143306539e-05, "loss": 2.4951, "step": 8540 }, { "epoch": 2.64, "learning_rate": 4.232732237382816e-05, "loss": 2.4822, "step": 8541 }, { "epoch": 2.64, "learning_rate": 4.229169331459093e-05, "loss": 2.4839, "step": 8542 }, { "epoch": 2.64, "learning_rate": 4.2256064255353706e-05, "loss": 2.3317, "step": 8543 }, { "epoch": 2.64, "learning_rate": 4.222043519611647e-05, "loss": 2.6171, "step": 8544 }, { "epoch": 2.64, "learning_rate": 4.218480613687924e-05, "loss": 2.3279, "step": 8545 }, { "epoch": 2.64, "learning_rate": 4.214917707764201e-05, "loss": 2.1871, "step": 8546 }, { "epoch": 2.64, "learning_rate": 4.2113548018404786e-05, "loss": 2.3625, "step": 8547 }, { "epoch": 2.64, "learning_rate": 4.2077918959167555e-05, "loss": 2.1307, "step": 8548 }, { "epoch": 2.64, "learning_rate": 4.204228989993033e-05, "loss": 2.2423, "step": 8549 }, { "epoch": 2.64, "learning_rate": 4.200666084069309e-05, "loss": 2.3798, "step": 8550 }, { "epoch": 2.64, "learning_rate": 4.1971031781455866e-05, "loss": 2.0154, "step": 8551 }, { "epoch": 2.64, "learning_rate": 4.193540272221864e-05, "loss": 2.1338, "step": 8552 }, { "epoch": 2.64, "learning_rate": 4.189977366298141e-05, "loss": 2.046, "step": 8553 }, { "epoch": 2.64, "learning_rate": 4.1864144603744185e-05, "loss": 2.1768, "step": 8554 }, { "epoch": 2.64, "learning_rate": 4.182851554450695e-05, "loss": 2.0919, "step": 8555 }, { "epoch": 2.64, "learning_rate": 4.179288648526973e-05, "loss": 2.1003, "step": 8556 }, { "epoch": 2.64, "learning_rate": 4.175725742603249e-05, "loss": 2.0332, "step": 8557 }, { "epoch": 2.64, "learning_rate": 4.1721628366795265e-05, "loss": 2.0287, "step": 8558 }, { "epoch": 2.64, "learning_rate": 4.1685999307558033e-05, "loss": 1.8779, "step": 8559 }, { "epoch": 2.64, "learning_rate": 4.165037024832081e-05, "loss": 1.7839, "step": 8560 }, { "epoch": 2.64, "learning_rate": 4.161474118908358e-05, "loss": 1.7274, "step": 8561 }, { "epoch": 2.64, "learning_rate": 4.157911212984635e-05, "loss": 1.7322, "step": 8562 }, { "epoch": 2.64, "learning_rate": 4.1543483070609113e-05, "loss": 1.8227, "step": 8563 }, { "epoch": 2.64, "learning_rate": 4.150785401137189e-05, "loss": 1.5665, "step": 8564 }, { "epoch": 2.64, "learning_rate": 4.147222495213466e-05, "loss": 1.7331, "step": 8565 }, { "epoch": 2.64, "learning_rate": 4.143659589289743e-05, "loss": 1.5922, "step": 8566 }, { "epoch": 2.64, "learning_rate": 4.14009668336602e-05, "loss": 1.9765, "step": 8567 }, { "epoch": 2.64, "learning_rate": 4.1365337774422975e-05, "loss": 1.5426, "step": 8568 }, { "epoch": 2.64, "learning_rate": 4.132970871518575e-05, "loss": 1.6497, "step": 8569 }, { "epoch": 2.65, "learning_rate": 4.129407965594851e-05, "loss": 1.6288, "step": 8570 }, { "epoch": 2.65, "learning_rate": 4.125845059671129e-05, "loss": 1.5125, "step": 8571 }, { "epoch": 2.65, "learning_rate": 4.1222821537474055e-05, "loss": 1.3643, "step": 8572 }, { "epoch": 2.65, "learning_rate": 4.118719247823683e-05, "loss": 1.3482, "step": 8573 }, { "epoch": 2.65, "learning_rate": 4.11515634189996e-05, "loss": 1.393, "step": 8574 }, { "epoch": 2.65, "learning_rate": 4.1115934359762374e-05, "loss": 1.2808, "step": 8575 }, { "epoch": 2.65, "learning_rate": 4.1080305300525136e-05, "loss": 1.1576, "step": 8576 }, { "epoch": 2.65, "learning_rate": 4.104467624128792e-05, "loss": 1.0982, "step": 8577 }, { "epoch": 2.65, "learning_rate": 4.1009047182050686e-05, "loss": 0.8979, "step": 8578 }, { "epoch": 2.65, "learning_rate": 4.0973418122813454e-05, "loss": 1.0367, "step": 8579 }, { "epoch": 2.65, "learning_rate": 4.093778906357623e-05, "loss": 1.1712, "step": 8580 }, { "epoch": 2.65, "learning_rate": 4.0902160004339e-05, "loss": 3.4911, "step": 8581 }, { "epoch": 2.65, "learning_rate": 4.0866530945101766e-05, "loss": 2.8177, "step": 8582 }, { "epoch": 2.65, "learning_rate": 4.083090188586454e-05, "loss": 2.8718, "step": 8583 }, { "epoch": 2.65, "learning_rate": 4.079527282662731e-05, "loss": 2.6964, "step": 8584 }, { "epoch": 2.65, "learning_rate": 4.075964376739008e-05, "loss": 2.6821, "step": 8585 }, { "epoch": 2.65, "learning_rate": 4.072401470815285e-05, "loss": 2.4918, "step": 8586 }, { "epoch": 2.65, "learning_rate": 4.068838564891562e-05, "loss": 2.6305, "step": 8587 }, { "epoch": 2.65, "learning_rate": 4.065275658967839e-05, "loss": 2.6828, "step": 8588 }, { "epoch": 2.65, "learning_rate": 4.0617127530441164e-05, "loss": 2.6812, "step": 8589 }, { "epoch": 2.65, "learning_rate": 4.058149847120393e-05, "loss": 2.4479, "step": 8590 }, { "epoch": 2.65, "learning_rate": 4.05458694119667e-05, "loss": 2.3508, "step": 8591 }, { "epoch": 2.65, "learning_rate": 4.0510240352729476e-05, "loss": 2.5319, "step": 8592 }, { "epoch": 2.65, "learning_rate": 4.047461129349225e-05, "loss": 2.3875, "step": 8593 }, { "epoch": 2.65, "learning_rate": 4.043898223425502e-05, "loss": 2.441, "step": 8594 }, { "epoch": 2.65, "learning_rate": 4.040335317501779e-05, "loss": 2.2016, "step": 8595 }, { "epoch": 2.65, "learning_rate": 4.036772411578056e-05, "loss": 2.3764, "step": 8596 }, { "epoch": 2.65, "learning_rate": 4.033209505654333e-05, "loss": 2.2361, "step": 8597 }, { "epoch": 2.65, "learning_rate": 4.02964659973061e-05, "loss": 2.3448, "step": 8598 }, { "epoch": 2.65, "learning_rate": 4.0260836938068875e-05, "loss": 2.1709, "step": 8599 }, { "epoch": 2.65, "learning_rate": 4.022520787883164e-05, "loss": 2.2646, "step": 8600 }, { "epoch": 2.65, "learning_rate": 4.018957881959441e-05, "loss": 2.2957, "step": 8601 }, { "epoch": 2.65, "learning_rate": 4.0153949760357186e-05, "loss": 1.9544, "step": 8602 }, { "epoch": 2.66, "learning_rate": 4.0118320701119955e-05, "loss": 2.0882, "step": 8603 }, { "epoch": 2.66, "learning_rate": 4.008269164188272e-05, "loss": 1.9899, "step": 8604 }, { "epoch": 2.66, "learning_rate": 4.00470625826455e-05, "loss": 2.1356, "step": 8605 }, { "epoch": 2.66, "learning_rate": 4.0011433523408267e-05, "loss": 2.0048, "step": 8606 }, { "epoch": 2.66, "learning_rate": 3.9975804464171035e-05, "loss": 1.9664, "step": 8607 }, { "epoch": 2.66, "learning_rate": 3.994017540493381e-05, "loss": 2.0805, "step": 8608 }, { "epoch": 2.66, "learning_rate": 3.990454634569658e-05, "loss": 1.9445, "step": 8609 }, { "epoch": 2.66, "learning_rate": 3.9868917286459353e-05, "loss": 1.8846, "step": 8610 }, { "epoch": 2.66, "learning_rate": 3.983328822722212e-05, "loss": 2.2493, "step": 8611 }, { "epoch": 2.66, "learning_rate": 3.97976591679849e-05, "loss": 1.6035, "step": 8612 }, { "epoch": 2.66, "learning_rate": 3.9762030108747665e-05, "loss": 1.7305, "step": 8613 }, { "epoch": 2.66, "learning_rate": 3.9726401049510433e-05, "loss": 1.6426, "step": 8614 }, { "epoch": 2.66, "learning_rate": 3.969077199027321e-05, "loss": 1.478, "step": 8615 }, { "epoch": 2.66, "learning_rate": 3.965514293103598e-05, "loss": 1.512, "step": 8616 }, { "epoch": 2.66, "learning_rate": 3.9619513871798745e-05, "loss": 1.5429, "step": 8617 }, { "epoch": 2.66, "learning_rate": 3.958388481256152e-05, "loss": 1.5241, "step": 8618 }, { "epoch": 2.66, "learning_rate": 3.954825575332429e-05, "loss": 1.4434, "step": 8619 }, { "epoch": 2.66, "learning_rate": 3.951262669408706e-05, "loss": 1.4638, "step": 8620 }, { "epoch": 2.66, "learning_rate": 3.947699763484983e-05, "loss": 1.385, "step": 8621 }, { "epoch": 2.66, "learning_rate": 3.94413685756126e-05, "loss": 1.5409, "step": 8622 }, { "epoch": 2.66, "learning_rate": 3.9405739516375375e-05, "loss": 1.3256, "step": 8623 }, { "epoch": 2.66, "learning_rate": 3.9370110457138144e-05, "loss": 1.1654, "step": 8624 }, { "epoch": 2.66, "learning_rate": 3.933448139790091e-05, "loss": 1.3536, "step": 8625 }, { "epoch": 2.66, "learning_rate": 3.929885233866369e-05, "loss": 1.1143, "step": 8626 }, { "epoch": 2.66, "learning_rate": 3.926322327942646e-05, "loss": 1.119, "step": 8627 }, { "epoch": 2.66, "learning_rate": 3.922759422018923e-05, "loss": 1.0633, "step": 8628 }, { "epoch": 2.66, "learning_rate": 3.9191965160952e-05, "loss": 1.0743, "step": 8629 }, { "epoch": 2.66, "learning_rate": 3.9156336101714774e-05, "loss": 0.8956, "step": 8630 }, { "epoch": 2.66, "learning_rate": 3.912070704247754e-05, "loss": 3.2413, "step": 8631 }, { "epoch": 2.66, "learning_rate": 3.908507798324031e-05, "loss": 3.0011, "step": 8632 }, { "epoch": 2.66, "learning_rate": 3.9049448924003086e-05, "loss": 2.8318, "step": 8633 }, { "epoch": 2.66, "learning_rate": 3.9013819864765854e-05, "loss": 2.594, "step": 8634 }, { "epoch": 2.67, "learning_rate": 3.897819080552862e-05, "loss": 2.4316, "step": 8635 }, { "epoch": 2.67, "learning_rate": 3.89425617462914e-05, "loss": 2.3443, "step": 8636 }, { "epoch": 2.67, "learning_rate": 3.8906932687054166e-05, "loss": 2.6312, "step": 8637 }, { "epoch": 2.67, "learning_rate": 3.8871303627816934e-05, "loss": 2.4433, "step": 8638 }, { "epoch": 2.67, "learning_rate": 3.883567456857971e-05, "loss": 2.3638, "step": 8639 }, { "epoch": 2.67, "learning_rate": 3.880004550934248e-05, "loss": 2.2736, "step": 8640 }, { "epoch": 2.67, "learning_rate": 3.8764416450105246e-05, "loss": 2.5882, "step": 8641 }, { "epoch": 2.67, "learning_rate": 3.872878739086802e-05, "loss": 2.2664, "step": 8642 }, { "epoch": 2.67, "learning_rate": 3.8693158331630796e-05, "loss": 2.3464, "step": 8643 }, { "epoch": 2.67, "learning_rate": 3.8657529272393564e-05, "loss": 2.165, "step": 8644 }, { "epoch": 2.67, "learning_rate": 3.862190021315633e-05, "loss": 2.3288, "step": 8645 }, { "epoch": 2.67, "learning_rate": 3.858627115391911e-05, "loss": 2.5067, "step": 8646 }, { "epoch": 2.67, "learning_rate": 3.8550642094681876e-05, "loss": 2.283, "step": 8647 }, { "epoch": 2.67, "learning_rate": 3.8515013035444645e-05, "loss": 2.3993, "step": 8648 }, { "epoch": 2.67, "learning_rate": 3.847938397620742e-05, "loss": 2.0414, "step": 8649 }, { "epoch": 2.67, "learning_rate": 3.844375491697019e-05, "loss": 2.1921, "step": 8650 }, { "epoch": 2.67, "learning_rate": 3.8408125857732956e-05, "loss": 2.2321, "step": 8651 }, { "epoch": 2.67, "learning_rate": 3.837249679849573e-05, "loss": 1.8299, "step": 8652 }, { "epoch": 2.67, "learning_rate": 3.83368677392585e-05, "loss": 1.9531, "step": 8653 }, { "epoch": 2.67, "learning_rate": 3.830123868002127e-05, "loss": 2.0732, "step": 8654 }, { "epoch": 2.67, "learning_rate": 3.826560962078404e-05, "loss": 2.221, "step": 8655 }, { "epoch": 2.67, "learning_rate": 3.822998056154681e-05, "loss": 2.0567, "step": 8656 }, { "epoch": 2.67, "learning_rate": 3.819435150230958e-05, "loss": 1.9991, "step": 8657 }, { "epoch": 2.67, "learning_rate": 3.8158722443072355e-05, "loss": 1.7683, "step": 8658 }, { "epoch": 2.67, "learning_rate": 3.812309338383513e-05, "loss": 1.9542, "step": 8659 }, { "epoch": 2.67, "learning_rate": 3.80874643245979e-05, "loss": 1.9438, "step": 8660 }, { "epoch": 2.67, "learning_rate": 3.8051835265360667e-05, "loss": 1.7889, "step": 8661 }, { "epoch": 2.67, "learning_rate": 3.801620620612344e-05, "loss": 1.6625, "step": 8662 }, { "epoch": 2.67, "learning_rate": 3.798057714688621e-05, "loss": 1.7258, "step": 8663 }, { "epoch": 2.67, "learning_rate": 3.7944948087648985e-05, "loss": 1.6512, "step": 8664 }, { "epoch": 2.67, "learning_rate": 3.7909319028411753e-05, "loss": 1.8556, "step": 8665 }, { "epoch": 2.67, "learning_rate": 3.787368996917452e-05, "loss": 1.5633, "step": 8666 }, { "epoch": 2.67, "learning_rate": 3.78380609099373e-05, "loss": 1.4296, "step": 8667 }, { "epoch": 2.68, "learning_rate": 3.7802431850700065e-05, "loss": 1.5353, "step": 8668 }, { "epoch": 2.68, "learning_rate": 3.7766802791462834e-05, "loss": 1.4823, "step": 8669 }, { "epoch": 2.68, "learning_rate": 3.773117373222561e-05, "loss": 1.3965, "step": 8670 }, { "epoch": 2.68, "learning_rate": 3.769554467298838e-05, "loss": 1.2066, "step": 8671 }, { "epoch": 2.68, "learning_rate": 3.7659915613751145e-05, "loss": 1.5283, "step": 8672 }, { "epoch": 2.68, "learning_rate": 3.762428655451392e-05, "loss": 1.3302, "step": 8673 }, { "epoch": 2.68, "learning_rate": 3.758865749527669e-05, "loss": 1.215, "step": 8674 }, { "epoch": 2.68, "learning_rate": 3.7553028436039464e-05, "loss": 1.1867, "step": 8675 }, { "epoch": 2.68, "learning_rate": 3.751739937680223e-05, "loss": 1.0024, "step": 8676 }, { "epoch": 2.68, "learning_rate": 3.748177031756501e-05, "loss": 1.0517, "step": 8677 }, { "epoch": 2.68, "learning_rate": 3.7446141258327776e-05, "loss": 1.1019, "step": 8678 }, { "epoch": 2.68, "learning_rate": 3.7410512199090544e-05, "loss": 1.0939, "step": 8679 }, { "epoch": 2.68, "learning_rate": 3.737488313985332e-05, "loss": 1.3092, "step": 8680 }, { "epoch": 2.68, "learning_rate": 3.733925408061609e-05, "loss": 3.203, "step": 8681 }, { "epoch": 2.68, "learning_rate": 3.7303625021378856e-05, "loss": 3.1734, "step": 8682 }, { "epoch": 2.68, "learning_rate": 3.726799596214163e-05, "loss": 2.9, "step": 8683 }, { "epoch": 2.68, "learning_rate": 3.72323669029044e-05, "loss": 2.9804, "step": 8684 }, { "epoch": 2.68, "learning_rate": 3.719673784366717e-05, "loss": 2.6025, "step": 8685 }, { "epoch": 2.68, "learning_rate": 3.716110878442994e-05, "loss": 2.4717, "step": 8686 }, { "epoch": 2.68, "learning_rate": 3.712547972519271e-05, "loss": 2.3171, "step": 8687 }, { "epoch": 2.68, "learning_rate": 3.708985066595548e-05, "loss": 2.5881, "step": 8688 }, { "epoch": 2.68, "learning_rate": 3.7054221606718254e-05, "loss": 2.6308, "step": 8689 }, { "epoch": 2.68, "learning_rate": 3.701859254748102e-05, "loss": 2.3887, "step": 8690 }, { "epoch": 2.68, "learning_rate": 3.69829634882438e-05, "loss": 2.1645, "step": 8691 }, { "epoch": 2.68, "learning_rate": 3.6947334429006566e-05, "loss": 2.4104, "step": 8692 }, { "epoch": 2.68, "learning_rate": 3.691170536976934e-05, "loss": 2.2669, "step": 8693 }, { "epoch": 2.68, "learning_rate": 3.687607631053211e-05, "loss": 2.256, "step": 8694 }, { "epoch": 2.68, "learning_rate": 3.684044725129488e-05, "loss": 2.2489, "step": 8695 }, { "epoch": 2.68, "learning_rate": 3.680481819205765e-05, "loss": 1.9817, "step": 8696 }, { "epoch": 2.68, "learning_rate": 3.676918913282042e-05, "loss": 2.3393, "step": 8697 }, { "epoch": 2.68, "learning_rate": 3.673356007358319e-05, "loss": 2.1278, "step": 8698 }, { "epoch": 2.68, "learning_rate": 3.6697931014345965e-05, "loss": 2.058, "step": 8699 }, { "epoch": 2.69, "learning_rate": 3.666230195510873e-05, "loss": 2.2174, "step": 8700 }, { "epoch": 2.69, "learning_rate": 3.66266728958715e-05, "loss": 2.3576, "step": 8701 }, { "epoch": 2.69, "learning_rate": 3.6591043836634276e-05, "loss": 2.2886, "step": 8702 }, { "epoch": 2.69, "learning_rate": 3.6555414777397045e-05, "loss": 2.0598, "step": 8703 }, { "epoch": 2.69, "learning_rate": 3.651978571815981e-05, "loss": 2.2779, "step": 8704 }, { "epoch": 2.69, "learning_rate": 3.648415665892259e-05, "loss": 1.9708, "step": 8705 }, { "epoch": 2.69, "learning_rate": 3.6448527599685356e-05, "loss": 2.1095, "step": 8706 }, { "epoch": 2.69, "learning_rate": 3.6412898540448125e-05, "loss": 1.995, "step": 8707 }, { "epoch": 2.69, "learning_rate": 3.6377269481210907e-05, "loss": 1.7666, "step": 8708 }, { "epoch": 2.69, "learning_rate": 3.6341640421973675e-05, "loss": 1.7425, "step": 8709 }, { "epoch": 2.69, "learning_rate": 3.630601136273644e-05, "loss": 1.7809, "step": 8710 }, { "epoch": 2.69, "learning_rate": 3.627038230349922e-05, "loss": 1.53, "step": 8711 }, { "epoch": 2.69, "learning_rate": 3.6234753244261987e-05, "loss": 1.8267, "step": 8712 }, { "epoch": 2.69, "learning_rate": 3.6199124185024755e-05, "loss": 1.7729, "step": 8713 }, { "epoch": 2.69, "learning_rate": 3.616349512578753e-05, "loss": 1.8739, "step": 8714 }, { "epoch": 2.69, "learning_rate": 3.61278660665503e-05, "loss": 1.7379, "step": 8715 }, { "epoch": 2.69, "learning_rate": 3.609223700731307e-05, "loss": 1.5983, "step": 8716 }, { "epoch": 2.69, "learning_rate": 3.605660794807584e-05, "loss": 1.4337, "step": 8717 }, { "epoch": 2.69, "learning_rate": 3.602097888883861e-05, "loss": 1.3407, "step": 8718 }, { "epoch": 2.69, "learning_rate": 3.598534982960138e-05, "loss": 1.4168, "step": 8719 }, { "epoch": 2.69, "learning_rate": 3.5949720770364153e-05, "loss": 1.4175, "step": 8720 }, { "epoch": 2.69, "learning_rate": 3.591409171112692e-05, "loss": 1.4496, "step": 8721 }, { "epoch": 2.69, "learning_rate": 3.587846265188969e-05, "loss": 1.2093, "step": 8722 }, { "epoch": 2.69, "learning_rate": 3.5842833592652465e-05, "loss": 1.3046, "step": 8723 }, { "epoch": 2.69, "learning_rate": 3.580720453341524e-05, "loss": 1.1475, "step": 8724 }, { "epoch": 2.69, "learning_rate": 3.577157547417801e-05, "loss": 1.3085, "step": 8725 }, { "epoch": 2.69, "learning_rate": 3.573594641494078e-05, "loss": 1.2364, "step": 8726 }, { "epoch": 2.69, "learning_rate": 3.570031735570355e-05, "loss": 1.1155, "step": 8727 }, { "epoch": 2.69, "learning_rate": 3.566468829646632e-05, "loss": 1.1294, "step": 8728 }, { "epoch": 2.69, "learning_rate": 3.562905923722909e-05, "loss": 1.0927, "step": 8729 }, { "epoch": 2.69, "learning_rate": 3.5593430177991864e-05, "loss": 1.1637, "step": 8730 }, { "epoch": 2.69, "learning_rate": 3.555780111875463e-05, "loss": 3.0995, "step": 8731 }, { "epoch": 2.7, "learning_rate": 3.55221720595174e-05, "loss": 2.9785, "step": 8732 }, { "epoch": 2.7, "learning_rate": 3.5486543000280176e-05, "loss": 3.1387, "step": 8733 }, { "epoch": 2.7, "learning_rate": 3.5450913941042944e-05, "loss": 2.8221, "step": 8734 }, { "epoch": 2.7, "learning_rate": 3.541528488180571e-05, "loss": 2.586, "step": 8735 }, { "epoch": 2.7, "learning_rate": 3.537965582256849e-05, "loss": 2.4397, "step": 8736 }, { "epoch": 2.7, "learning_rate": 3.5344026763331256e-05, "loss": 2.4182, "step": 8737 }, { "epoch": 2.7, "learning_rate": 3.5308397704094024e-05, "loss": 2.2567, "step": 8738 }, { "epoch": 2.7, "learning_rate": 3.52727686448568e-05, "loss": 2.6196, "step": 8739 }, { "epoch": 2.7, "learning_rate": 3.5237139585619574e-05, "loss": 2.5208, "step": 8740 }, { "epoch": 2.7, "learning_rate": 3.520151052638234e-05, "loss": 2.3853, "step": 8741 }, { "epoch": 2.7, "learning_rate": 3.516588146714511e-05, "loss": 2.6301, "step": 8742 }, { "epoch": 2.7, "learning_rate": 3.5130252407907886e-05, "loss": 2.3486, "step": 8743 }, { "epoch": 2.7, "learning_rate": 3.5094623348670654e-05, "loss": 2.1794, "step": 8744 }, { "epoch": 2.7, "learning_rate": 3.505899428943342e-05, "loss": 2.1705, "step": 8745 }, { "epoch": 2.7, "learning_rate": 3.50233652301962e-05, "loss": 2.3571, "step": 8746 }, { "epoch": 2.7, "learning_rate": 3.4987736170958966e-05, "loss": 2.0542, "step": 8747 }, { "epoch": 2.7, "learning_rate": 3.4952107111721734e-05, "loss": 2.0125, "step": 8748 }, { "epoch": 2.7, "learning_rate": 3.491647805248451e-05, "loss": 2.2302, "step": 8749 }, { "epoch": 2.7, "learning_rate": 3.488084899324728e-05, "loss": 2.2169, "step": 8750 }, { "epoch": 2.7, "learning_rate": 3.484521993401005e-05, "loss": 1.9865, "step": 8751 }, { "epoch": 2.7, "learning_rate": 3.480959087477282e-05, "loss": 1.9271, "step": 8752 }, { "epoch": 2.7, "learning_rate": 3.477396181553559e-05, "loss": 2.08, "step": 8753 }, { "epoch": 2.7, "learning_rate": 3.4738332756298365e-05, "loss": 2.1886, "step": 8754 }, { "epoch": 2.7, "learning_rate": 3.470270369706113e-05, "loss": 2.1434, "step": 8755 }, { "epoch": 2.7, "learning_rate": 3.46670746378239e-05, "loss": 2.1595, "step": 8756 }, { "epoch": 2.7, "learning_rate": 3.4631445578586676e-05, "loss": 2.1558, "step": 8757 }, { "epoch": 2.7, "learning_rate": 3.459581651934945e-05, "loss": 1.9609, "step": 8758 }, { "epoch": 2.7, "learning_rate": 3.456018746011222e-05, "loss": 1.9519, "step": 8759 }, { "epoch": 2.7, "learning_rate": 3.452455840087499e-05, "loss": 1.9261, "step": 8760 }, { "epoch": 2.7, "learning_rate": 3.448892934163776e-05, "loss": 1.8133, "step": 8761 }, { "epoch": 2.7, "learning_rate": 3.445330028240053e-05, "loss": 1.7639, "step": 8762 }, { "epoch": 2.7, "learning_rate": 3.44176712231633e-05, "loss": 1.7807, "step": 8763 }, { "epoch": 2.7, "learning_rate": 3.4382042163926075e-05, "loss": 1.6519, "step": 8764 }, { "epoch": 2.71, "learning_rate": 3.434641310468884e-05, "loss": 1.7491, "step": 8765 }, { "epoch": 2.71, "learning_rate": 3.431078404545161e-05, "loss": 1.4836, "step": 8766 }, { "epoch": 2.71, "learning_rate": 3.427515498621439e-05, "loss": 1.4353, "step": 8767 }, { "epoch": 2.71, "learning_rate": 3.4239525926977155e-05, "loss": 1.4934, "step": 8768 }, { "epoch": 2.71, "learning_rate": 3.420389686773992e-05, "loss": 1.3197, "step": 8769 }, { "epoch": 2.71, "learning_rate": 3.41682678085027e-05, "loss": 1.6196, "step": 8770 }, { "epoch": 2.71, "learning_rate": 3.413263874926547e-05, "loss": 1.2623, "step": 8771 }, { "epoch": 2.71, "learning_rate": 3.4097009690028235e-05, "loss": 1.5216, "step": 8772 }, { "epoch": 2.71, "learning_rate": 3.406138063079101e-05, "loss": 1.2342, "step": 8773 }, { "epoch": 2.71, "learning_rate": 3.4025751571553785e-05, "loss": 1.4132, "step": 8774 }, { "epoch": 2.71, "learning_rate": 3.3990122512316554e-05, "loss": 1.1421, "step": 8775 }, { "epoch": 2.71, "learning_rate": 3.395449345307932e-05, "loss": 1.1525, "step": 8776 }, { "epoch": 2.71, "learning_rate": 3.39188643938421e-05, "loss": 1.0395, "step": 8777 }, { "epoch": 2.71, "learning_rate": 3.3883235334604865e-05, "loss": 1.1098, "step": 8778 }, { "epoch": 2.71, "learning_rate": 3.3847606275367634e-05, "loss": 1.0651, "step": 8779 }, { "epoch": 2.71, "learning_rate": 3.381197721613041e-05, "loss": 1.0496, "step": 8780 }, { "epoch": 2.71, "learning_rate": 3.377634815689318e-05, "loss": 3.3162, "step": 8781 }, { "epoch": 2.71, "learning_rate": 3.3740719097655945e-05, "loss": 3.0573, "step": 8782 }, { "epoch": 2.71, "learning_rate": 3.370509003841872e-05, "loss": 2.6484, "step": 8783 }, { "epoch": 2.71, "learning_rate": 3.366946097918149e-05, "loss": 2.5218, "step": 8784 }, { "epoch": 2.71, "learning_rate": 3.363383191994426e-05, "loss": 2.5642, "step": 8785 }, { "epoch": 2.71, "learning_rate": 3.359820286070703e-05, "loss": 2.6789, "step": 8786 }, { "epoch": 2.71, "learning_rate": 3.35625738014698e-05, "loss": 2.136, "step": 8787 }, { "epoch": 2.71, "learning_rate": 3.352694474223257e-05, "loss": 2.5443, "step": 8788 }, { "epoch": 2.71, "learning_rate": 3.3491315682995344e-05, "loss": 2.4994, "step": 8789 }, { "epoch": 2.71, "learning_rate": 3.345568662375812e-05, "loss": 2.2999, "step": 8790 }, { "epoch": 2.71, "learning_rate": 3.342005756452089e-05, "loss": 2.2913, "step": 8791 }, { "epoch": 2.71, "learning_rate": 3.3384428505283656e-05, "loss": 2.3712, "step": 8792 }, { "epoch": 2.71, "learning_rate": 3.334879944604643e-05, "loss": 1.8845, "step": 8793 }, { "epoch": 2.71, "learning_rate": 3.33131703868092e-05, "loss": 2.2713, "step": 8794 }, { "epoch": 2.71, "learning_rate": 3.3277541327571974e-05, "loss": 2.4481, "step": 8795 }, { "epoch": 2.71, "learning_rate": 3.324191226833474e-05, "loss": 2.4337, "step": 8796 }, { "epoch": 2.72, "learning_rate": 3.320628320909751e-05, "loss": 2.0293, "step": 8797 }, { "epoch": 2.72, "learning_rate": 3.3170654149860286e-05, "loss": 2.1357, "step": 8798 }, { "epoch": 2.72, "learning_rate": 3.3135025090623054e-05, "loss": 2.0468, "step": 8799 }, { "epoch": 2.72, "learning_rate": 3.309939603138582e-05, "loss": 2.0657, "step": 8800 }, { "epoch": 2.72, "learning_rate": 3.30637669721486e-05, "loss": 2.0085, "step": 8801 }, { "epoch": 2.72, "learning_rate": 3.3028137912911366e-05, "loss": 2.1713, "step": 8802 }, { "epoch": 2.72, "learning_rate": 3.2992508853674134e-05, "loss": 1.851, "step": 8803 }, { "epoch": 2.72, "learning_rate": 3.295687979443691e-05, "loss": 1.842, "step": 8804 }, { "epoch": 2.72, "learning_rate": 3.292125073519968e-05, "loss": 2.1869, "step": 8805 }, { "epoch": 2.72, "learning_rate": 3.288562167596245e-05, "loss": 1.9077, "step": 8806 }, { "epoch": 2.72, "learning_rate": 3.284999261672522e-05, "loss": 1.8607, "step": 8807 }, { "epoch": 2.72, "learning_rate": 3.2814363557487996e-05, "loss": 1.5588, "step": 8808 }, { "epoch": 2.72, "learning_rate": 3.2778734498250765e-05, "loss": 1.7249, "step": 8809 }, { "epoch": 2.72, "learning_rate": 3.274310543901353e-05, "loss": 1.9512, "step": 8810 }, { "epoch": 2.72, "learning_rate": 3.270747637977631e-05, "loss": 1.8065, "step": 8811 }, { "epoch": 2.72, "learning_rate": 3.2671847320539076e-05, "loss": 1.9297, "step": 8812 }, { "epoch": 2.72, "learning_rate": 3.2636218261301845e-05, "loss": 1.8698, "step": 8813 }, { "epoch": 2.72, "learning_rate": 3.260058920206462e-05, "loss": 1.663, "step": 8814 }, { "epoch": 2.72, "learning_rate": 3.256496014282739e-05, "loss": 1.7202, "step": 8815 }, { "epoch": 2.72, "learning_rate": 3.2529331083590156e-05, "loss": 1.4414, "step": 8816 }, { "epoch": 2.72, "learning_rate": 3.249370202435293e-05, "loss": 1.2541, "step": 8817 }, { "epoch": 2.72, "learning_rate": 3.24580729651157e-05, "loss": 1.3554, "step": 8818 }, { "epoch": 2.72, "learning_rate": 3.242244390587847e-05, "loss": 1.4526, "step": 8819 }, { "epoch": 2.72, "learning_rate": 3.238681484664124e-05, "loss": 1.3007, "step": 8820 }, { "epoch": 2.72, "learning_rate": 3.235118578740401e-05, "loss": 1.4615, "step": 8821 }, { "epoch": 2.72, "learning_rate": 3.231555672816679e-05, "loss": 1.4956, "step": 8822 }, { "epoch": 2.72, "learning_rate": 3.2279927668929555e-05, "loss": 1.0949, "step": 8823 }, { "epoch": 2.72, "learning_rate": 3.224429860969233e-05, "loss": 1.258, "step": 8824 }, { "epoch": 2.72, "learning_rate": 3.22086695504551e-05, "loss": 1.3014, "step": 8825 }, { "epoch": 2.72, "learning_rate": 3.217304049121787e-05, "loss": 1.0525, "step": 8826 }, { "epoch": 2.72, "learning_rate": 3.213741143198064e-05, "loss": 1.0314, "step": 8827 }, { "epoch": 2.72, "learning_rate": 3.210178237274341e-05, "loss": 1.0685, "step": 8828 }, { "epoch": 2.72, "learning_rate": 3.206615331350618e-05, "loss": 1.0276, "step": 8829 }, { "epoch": 2.73, "learning_rate": 3.2030524254268954e-05, "loss": 0.8688, "step": 8830 }, { "epoch": 2.73, "learning_rate": 3.199489519503172e-05, "loss": 3.5084, "step": 8831 }, { "epoch": 2.73, "learning_rate": 3.195926613579449e-05, "loss": 3.0941, "step": 8832 }, { "epoch": 2.73, "learning_rate": 3.1923637076557265e-05, "loss": 2.9308, "step": 8833 }, { "epoch": 2.73, "learning_rate": 3.1888008017320034e-05, "loss": 2.3758, "step": 8834 }, { "epoch": 2.73, "learning_rate": 3.18523789580828e-05, "loss": 2.3274, "step": 8835 }, { "epoch": 2.73, "learning_rate": 3.181674989884558e-05, "loss": 2.3781, "step": 8836 }, { "epoch": 2.73, "learning_rate": 3.1781120839608345e-05, "loss": 2.5207, "step": 8837 }, { "epoch": 2.73, "learning_rate": 3.174549178037112e-05, "loss": 2.2698, "step": 8838 }, { "epoch": 2.73, "learning_rate": 3.1709862721133896e-05, "loss": 2.2565, "step": 8839 }, { "epoch": 2.73, "learning_rate": 3.1674233661896664e-05, "loss": 2.4983, "step": 8840 }, { "epoch": 2.73, "learning_rate": 3.163860460265943e-05, "loss": 2.6029, "step": 8841 }, { "epoch": 2.73, "learning_rate": 3.160297554342221e-05, "loss": 2.3531, "step": 8842 }, { "epoch": 2.73, "learning_rate": 3.1567346484184976e-05, "loss": 2.1889, "step": 8843 }, { "epoch": 2.73, "learning_rate": 3.1531717424947744e-05, "loss": 1.9821, "step": 8844 }, { "epoch": 2.73, "learning_rate": 3.149608836571052e-05, "loss": 2.2924, "step": 8845 }, { "epoch": 2.73, "learning_rate": 3.146045930647329e-05, "loss": 2.2335, "step": 8846 }, { "epoch": 2.73, "learning_rate": 3.1424830247236056e-05, "loss": 2.038, "step": 8847 }, { "epoch": 2.73, "learning_rate": 3.138920118799883e-05, "loss": 2.1488, "step": 8848 }, { "epoch": 2.73, "learning_rate": 3.13535721287616e-05, "loss": 2.1255, "step": 8849 }, { "epoch": 2.73, "learning_rate": 3.131794306952437e-05, "loss": 2.1363, "step": 8850 }, { "epoch": 2.73, "learning_rate": 3.128231401028714e-05, "loss": 2.1867, "step": 8851 }, { "epoch": 2.73, "learning_rate": 3.124668495104991e-05, "loss": 1.86, "step": 8852 }, { "epoch": 2.73, "learning_rate": 3.121105589181268e-05, "loss": 2.199, "step": 8853 }, { "epoch": 2.73, "learning_rate": 3.1175426832575454e-05, "loss": 2.0914, "step": 8854 }, { "epoch": 2.73, "learning_rate": 3.113979777333823e-05, "loss": 1.7708, "step": 8855 }, { "epoch": 2.73, "learning_rate": 3.1104168714101e-05, "loss": 1.6936, "step": 8856 }, { "epoch": 2.73, "learning_rate": 3.1068539654863766e-05, "loss": 1.8469, "step": 8857 }, { "epoch": 2.73, "learning_rate": 3.103291059562654e-05, "loss": 2.1752, "step": 8858 }, { "epoch": 2.73, "learning_rate": 3.099728153638931e-05, "loss": 1.9092, "step": 8859 }, { "epoch": 2.73, "learning_rate": 3.096165247715208e-05, "loss": 1.7292, "step": 8860 }, { "epoch": 2.73, "learning_rate": 3.092602341791485e-05, "loss": 1.8697, "step": 8861 }, { "epoch": 2.74, "learning_rate": 3.089039435867762e-05, "loss": 1.7217, "step": 8862 }, { "epoch": 2.74, "learning_rate": 3.085476529944039e-05, "loss": 1.904, "step": 8863 }, { "epoch": 2.74, "learning_rate": 3.0819136240203165e-05, "loss": 1.4469, "step": 8864 }, { "epoch": 2.74, "learning_rate": 3.078350718096593e-05, "loss": 1.72, "step": 8865 }, { "epoch": 2.74, "learning_rate": 3.07478781217287e-05, "loss": 1.7053, "step": 8866 }, { "epoch": 2.74, "learning_rate": 3.0712249062491476e-05, "loss": 1.5843, "step": 8867 }, { "epoch": 2.74, "learning_rate": 3.0676620003254245e-05, "loss": 1.7706, "step": 8868 }, { "epoch": 2.74, "learning_rate": 3.064099094401701e-05, "loss": 1.3637, "step": 8869 }, { "epoch": 2.74, "learning_rate": 3.060536188477979e-05, "loss": 1.4612, "step": 8870 }, { "epoch": 2.74, "learning_rate": 3.056973282554256e-05, "loss": 1.3208, "step": 8871 }, { "epoch": 2.74, "learning_rate": 3.053410376630533e-05, "loss": 1.2085, "step": 8872 }, { "epoch": 2.74, "learning_rate": 3.0498474707068103e-05, "loss": 1.2714, "step": 8873 }, { "epoch": 2.74, "learning_rate": 3.0462845647830875e-05, "loss": 1.2285, "step": 8874 }, { "epoch": 2.74, "learning_rate": 3.0427216588593643e-05, "loss": 1.0194, "step": 8875 }, { "epoch": 2.74, "learning_rate": 3.0391587529356415e-05, "loss": 1.2047, "step": 8876 }, { "epoch": 2.74, "learning_rate": 3.0355958470119187e-05, "loss": 1.0737, "step": 8877 }, { "epoch": 2.74, "learning_rate": 3.0320329410881955e-05, "loss": 0.9116, "step": 8878 }, { "epoch": 2.74, "learning_rate": 3.0284700351644727e-05, "loss": 1.0016, "step": 8879 }, { "epoch": 2.74, "learning_rate": 3.02490712924075e-05, "loss": 0.9452, "step": 8880 }, { "epoch": 2.74, "learning_rate": 3.0213442233170267e-05, "loss": 3.3411, "step": 8881 }, { "epoch": 2.74, "learning_rate": 3.017781317393304e-05, "loss": 2.9053, "step": 8882 }, { "epoch": 2.74, "learning_rate": 3.014218411469581e-05, "loss": 2.7845, "step": 8883 }, { "epoch": 2.74, "learning_rate": 3.010655505545858e-05, "loss": 2.7785, "step": 8884 }, { "epoch": 2.74, "learning_rate": 3.007092599622135e-05, "loss": 2.7756, "step": 8885 }, { "epoch": 2.74, "learning_rate": 3.0035296936984122e-05, "loss": 2.5861, "step": 8886 }, { "epoch": 2.74, "learning_rate": 2.999966787774689e-05, "loss": 2.4832, "step": 8887 }, { "epoch": 2.74, "learning_rate": 2.9964038818509665e-05, "loss": 2.5987, "step": 8888 }, { "epoch": 2.74, "learning_rate": 2.9928409759272437e-05, "loss": 2.2145, "step": 8889 }, { "epoch": 2.74, "learning_rate": 2.989278070003521e-05, "loss": 2.1036, "step": 8890 }, { "epoch": 2.74, "learning_rate": 2.9857151640797977e-05, "loss": 2.1703, "step": 8891 }, { "epoch": 2.74, "learning_rate": 2.982152258156075e-05, "loss": 2.4701, "step": 8892 }, { "epoch": 2.74, "learning_rate": 2.978589352232352e-05, "loss": 2.2195, "step": 8893 }, { "epoch": 2.75, "learning_rate": 2.9750264463086292e-05, "loss": 2.2235, "step": 8894 }, { "epoch": 2.75, "learning_rate": 2.971463540384906e-05, "loss": 2.2967, "step": 8895 }, { "epoch": 2.75, "learning_rate": 2.9679006344611832e-05, "loss": 2.3609, "step": 8896 }, { "epoch": 2.75, "learning_rate": 2.9643377285374604e-05, "loss": 2.1932, "step": 8897 }, { "epoch": 2.75, "learning_rate": 2.9607748226137372e-05, "loss": 2.1367, "step": 8898 }, { "epoch": 2.75, "learning_rate": 2.9572119166900144e-05, "loss": 2.0415, "step": 8899 }, { "epoch": 2.75, "learning_rate": 2.9536490107662916e-05, "loss": 2.208, "step": 8900 }, { "epoch": 2.75, "learning_rate": 2.9500861048425684e-05, "loss": 2.0407, "step": 8901 }, { "epoch": 2.75, "learning_rate": 2.9465231989188456e-05, "loss": 2.103, "step": 8902 }, { "epoch": 2.75, "learning_rate": 2.9429602929951228e-05, "loss": 1.7616, "step": 8903 }, { "epoch": 2.75, "learning_rate": 2.9393973870714003e-05, "loss": 2.1302, "step": 8904 }, { "epoch": 2.75, "learning_rate": 2.935834481147677e-05, "loss": 1.898, "step": 8905 }, { "epoch": 2.75, "learning_rate": 2.9322715752239543e-05, "loss": 2.0871, "step": 8906 }, { "epoch": 2.75, "learning_rate": 2.9287086693002314e-05, "loss": 2.0918, "step": 8907 }, { "epoch": 2.75, "learning_rate": 2.9251457633765083e-05, "loss": 1.7868, "step": 8908 }, { "epoch": 2.75, "learning_rate": 2.9215828574527854e-05, "loss": 1.9503, "step": 8909 }, { "epoch": 2.75, "learning_rate": 2.9180199515290626e-05, "loss": 1.6819, "step": 8910 }, { "epoch": 2.75, "learning_rate": 2.9144570456053394e-05, "loss": 1.8408, "step": 8911 }, { "epoch": 2.75, "learning_rate": 2.9108941396816166e-05, "loss": 1.697, "step": 8912 }, { "epoch": 2.75, "learning_rate": 2.9073312337578938e-05, "loss": 1.4994, "step": 8913 }, { "epoch": 2.75, "learning_rate": 2.903768327834171e-05, "loss": 1.5205, "step": 8914 }, { "epoch": 2.75, "learning_rate": 2.9002054219104478e-05, "loss": 1.6618, "step": 8915 }, { "epoch": 2.75, "learning_rate": 2.896642515986725e-05, "loss": 1.7515, "step": 8916 }, { "epoch": 2.75, "learning_rate": 2.893079610063002e-05, "loss": 1.3434, "step": 8917 }, { "epoch": 2.75, "learning_rate": 2.889516704139279e-05, "loss": 1.5827, "step": 8918 }, { "epoch": 2.75, "learning_rate": 2.885953798215556e-05, "loss": 1.5304, "step": 8919 }, { "epoch": 2.75, "learning_rate": 2.8823908922918336e-05, "loss": 1.3532, "step": 8920 }, { "epoch": 2.75, "learning_rate": 2.8788279863681108e-05, "loss": 1.2369, "step": 8921 }, { "epoch": 2.75, "learning_rate": 2.8752650804443876e-05, "loss": 1.362, "step": 8922 }, { "epoch": 2.75, "learning_rate": 2.8717021745206648e-05, "loss": 1.1346, "step": 8923 }, { "epoch": 2.75, "learning_rate": 2.868139268596942e-05, "loss": 1.126, "step": 8924 }, { "epoch": 2.75, "learning_rate": 2.8645763626732188e-05, "loss": 1.0173, "step": 8925 }, { "epoch": 2.75, "learning_rate": 2.861013456749496e-05, "loss": 1.0653, "step": 8926 }, { "epoch": 2.76, "learning_rate": 2.857450550825773e-05, "loss": 1.1413, "step": 8927 }, { "epoch": 2.76, "learning_rate": 2.85388764490205e-05, "loss": 1.0217, "step": 8928 }, { "epoch": 2.76, "learning_rate": 2.850324738978327e-05, "loss": 0.969, "step": 8929 }, { "epoch": 2.76, "learning_rate": 2.8467618330546043e-05, "loss": 1.019, "step": 8930 }, { "epoch": 2.76, "learning_rate": 2.843198927130881e-05, "loss": 3.2161, "step": 8931 }, { "epoch": 2.76, "learning_rate": 2.8396360212071583e-05, "loss": 2.8417, "step": 8932 }, { "epoch": 2.76, "learning_rate": 2.8360731152834355e-05, "loss": 2.7811, "step": 8933 }, { "epoch": 2.76, "learning_rate": 2.8325102093597123e-05, "loss": 2.3102, "step": 8934 }, { "epoch": 2.76, "learning_rate": 2.8289473034359895e-05, "loss": 2.5295, "step": 8935 }, { "epoch": 2.76, "learning_rate": 2.8253843975122667e-05, "loss": 2.4044, "step": 8936 }, { "epoch": 2.76, "learning_rate": 2.8218214915885442e-05, "loss": 2.3383, "step": 8937 }, { "epoch": 2.76, "learning_rate": 2.8182585856648214e-05, "loss": 2.3592, "step": 8938 }, { "epoch": 2.76, "learning_rate": 2.8146956797410982e-05, "loss": 2.1102, "step": 8939 }, { "epoch": 2.76, "learning_rate": 2.8111327738173754e-05, "loss": 2.359, "step": 8940 }, { "epoch": 2.76, "learning_rate": 2.8075698678936525e-05, "loss": 2.3723, "step": 8941 }, { "epoch": 2.76, "learning_rate": 2.8040069619699294e-05, "loss": 2.2799, "step": 8942 }, { "epoch": 2.76, "learning_rate": 2.8004440560462065e-05, "loss": 2.269, "step": 8943 }, { "epoch": 2.76, "learning_rate": 2.7968811501224837e-05, "loss": 2.3846, "step": 8944 }, { "epoch": 2.76, "learning_rate": 2.7933182441987605e-05, "loss": 2.1949, "step": 8945 }, { "epoch": 2.76, "learning_rate": 2.7897553382750377e-05, "loss": 2.1183, "step": 8946 }, { "epoch": 2.76, "learning_rate": 2.786192432351315e-05, "loss": 2.0441, "step": 8947 }, { "epoch": 2.76, "learning_rate": 2.7826295264275917e-05, "loss": 2.221, "step": 8948 }, { "epoch": 2.76, "learning_rate": 2.779066620503869e-05, "loss": 1.7355, "step": 8949 }, { "epoch": 2.76, "learning_rate": 2.775503714580146e-05, "loss": 2.0264, "step": 8950 }, { "epoch": 2.76, "learning_rate": 2.771940808656423e-05, "loss": 1.9366, "step": 8951 }, { "epoch": 2.76, "learning_rate": 2.7683779027327e-05, "loss": 2.0758, "step": 8952 }, { "epoch": 2.76, "learning_rate": 2.7648149968089776e-05, "loss": 2.0584, "step": 8953 }, { "epoch": 2.76, "learning_rate": 2.7612520908852547e-05, "loss": 1.8181, "step": 8954 }, { "epoch": 2.76, "learning_rate": 2.7576891849615316e-05, "loss": 1.9546, "step": 8955 }, { "epoch": 2.76, "learning_rate": 2.7541262790378088e-05, "loss": 1.9036, "step": 8956 }, { "epoch": 2.76, "learning_rate": 2.750563373114086e-05, "loss": 2.015, "step": 8957 }, { "epoch": 2.76, "learning_rate": 2.747000467190363e-05, "loss": 1.9219, "step": 8958 }, { "epoch": 2.77, "learning_rate": 2.74343756126664e-05, "loss": 1.7293, "step": 8959 }, { "epoch": 2.77, "learning_rate": 2.739874655342917e-05, "loss": 1.7931, "step": 8960 }, { "epoch": 2.77, "learning_rate": 2.7363117494191943e-05, "loss": 1.755, "step": 8961 }, { "epoch": 2.77, "learning_rate": 2.732748843495471e-05, "loss": 1.7506, "step": 8962 }, { "epoch": 2.77, "learning_rate": 2.7291859375717483e-05, "loss": 2.0728, "step": 8963 }, { "epoch": 2.77, "learning_rate": 2.7256230316480254e-05, "loss": 1.5895, "step": 8964 }, { "epoch": 2.77, "learning_rate": 2.7220601257243023e-05, "loss": 1.6548, "step": 8965 }, { "epoch": 2.77, "learning_rate": 2.7184972198005794e-05, "loss": 1.4509, "step": 8966 }, { "epoch": 2.77, "learning_rate": 2.7149343138768566e-05, "loss": 1.6684, "step": 8967 }, { "epoch": 2.77, "learning_rate": 2.7113714079531335e-05, "loss": 1.4958, "step": 8968 }, { "epoch": 2.77, "learning_rate": 2.707808502029411e-05, "loss": 1.3203, "step": 8969 }, { "epoch": 2.77, "learning_rate": 2.704245596105688e-05, "loss": 1.416, "step": 8970 }, { "epoch": 2.77, "learning_rate": 2.7006826901819653e-05, "loss": 1.4314, "step": 8971 }, { "epoch": 2.77, "learning_rate": 2.697119784258242e-05, "loss": 1.4236, "step": 8972 }, { "epoch": 2.77, "learning_rate": 2.6935568783345193e-05, "loss": 1.2881, "step": 8973 }, { "epoch": 2.77, "learning_rate": 2.6899939724107965e-05, "loss": 1.2562, "step": 8974 }, { "epoch": 2.77, "learning_rate": 2.6864310664870733e-05, "loss": 0.9651, "step": 8975 }, { "epoch": 2.77, "learning_rate": 2.6828681605633505e-05, "loss": 1.0363, "step": 8976 }, { "epoch": 2.77, "learning_rate": 2.6793052546396277e-05, "loss": 1.0563, "step": 8977 }, { "epoch": 2.77, "learning_rate": 2.6757423487159045e-05, "loss": 1.0939, "step": 8978 }, { "epoch": 2.77, "learning_rate": 2.6721794427921817e-05, "loss": 0.8473, "step": 8979 }, { "epoch": 2.77, "learning_rate": 2.6686165368684588e-05, "loss": 1.0253, "step": 8980 }, { "epoch": 2.77, "learning_rate": 2.665053630944736e-05, "loss": 3.0819, "step": 8981 }, { "epoch": 2.77, "learning_rate": 2.6614907250210128e-05, "loss": 2.7862, "step": 8982 }, { "epoch": 2.77, "learning_rate": 2.65792781909729e-05, "loss": 3.0111, "step": 8983 }, { "epoch": 2.77, "learning_rate": 2.6543649131735672e-05, "loss": 2.7109, "step": 8984 }, { "epoch": 2.77, "learning_rate": 2.650802007249844e-05, "loss": 2.61, "step": 8985 }, { "epoch": 2.77, "learning_rate": 2.6472391013261215e-05, "loss": 2.0676, "step": 8986 }, { "epoch": 2.77, "learning_rate": 2.6436761954023987e-05, "loss": 2.7454, "step": 8987 }, { "epoch": 2.77, "learning_rate": 2.640113289478676e-05, "loss": 2.092, "step": 8988 }, { "epoch": 2.77, "learning_rate": 2.6365503835549527e-05, "loss": 2.4148, "step": 8989 }, { "epoch": 2.77, "learning_rate": 2.63298747763123e-05, "loss": 2.1129, "step": 8990 }, { "epoch": 2.77, "learning_rate": 2.629424571707507e-05, "loss": 2.2946, "step": 8991 }, { "epoch": 2.78, "learning_rate": 2.625861665783784e-05, "loss": 2.2556, "step": 8992 }, { "epoch": 2.78, "learning_rate": 2.622298759860061e-05, "loss": 2.2588, "step": 8993 }, { "epoch": 2.78, "learning_rate": 2.6187358539363382e-05, "loss": 2.053, "step": 8994 }, { "epoch": 2.78, "learning_rate": 2.615172948012615e-05, "loss": 2.323, "step": 8995 }, { "epoch": 2.78, "learning_rate": 2.6116100420888922e-05, "loss": 2.0555, "step": 8996 }, { "epoch": 2.78, "learning_rate": 2.6080471361651694e-05, "loss": 1.8683, "step": 8997 }, { "epoch": 2.78, "learning_rate": 2.6044842302414462e-05, "loss": 2.2882, "step": 8998 }, { "epoch": 2.78, "learning_rate": 2.6009213243177234e-05, "loss": 2.1629, "step": 8999 }, { "epoch": 2.78, "learning_rate": 2.5973584183940006e-05, "loss": 2.1419, "step": 9000 }, { "epoch": 2.78, "eval_bleu": 3.7749272962721704e-15, "eval_loss": 3.1467533111572266, "eval_runtime": 2628.7198, "eval_samples_per_second": 5.615, "eval_steps_per_second": 0.702, "step": 9000 }, { "epoch": 2.78, "learning_rate": 2.5937955124702777e-05, "loss": 1.97, "step": 9001 }, { "epoch": 2.78, "learning_rate": 2.5902326065465552e-05, "loss": 2.1375, "step": 9002 }, { "epoch": 2.78, "learning_rate": 2.586669700622832e-05, "loss": 2.2439, "step": 9003 }, { "epoch": 2.78, "learning_rate": 2.5831067946991092e-05, "loss": 1.6859, "step": 9004 }, { "epoch": 2.78, "learning_rate": 2.5795438887753864e-05, "loss": 2.096, "step": 9005 }, { "epoch": 2.78, "learning_rate": 2.5759809828516632e-05, "loss": 1.8178, "step": 9006 }, { "epoch": 2.78, "learning_rate": 2.5724180769279404e-05, "loss": 1.8452, "step": 9007 }, { "epoch": 2.78, "learning_rate": 2.5688551710042176e-05, "loss": 1.6782, "step": 9008 }, { "epoch": 2.78, "learning_rate": 2.5652922650804944e-05, "loss": 1.7384, "step": 9009 }, { "epoch": 2.78, "learning_rate": 2.5617293591567716e-05, "loss": 1.7038, "step": 9010 }, { "epoch": 2.78, "learning_rate": 2.5581664532330488e-05, "loss": 1.8304, "step": 9011 }, { "epoch": 2.78, "learning_rate": 2.5546035473093256e-05, "loss": 1.6061, "step": 9012 }, { "epoch": 2.78, "learning_rate": 2.5510406413856028e-05, "loss": 1.8001, "step": 9013 }, { "epoch": 2.78, "learning_rate": 2.54747773546188e-05, "loss": 1.6911, "step": 9014 }, { "epoch": 2.78, "learning_rate": 2.5439148295381568e-05, "loss": 1.3874, "step": 9015 }, { "epoch": 2.78, "learning_rate": 2.540351923614434e-05, "loss": 1.3692, "step": 9016 }, { "epoch": 2.78, "learning_rate": 2.536789017690711e-05, "loss": 1.4226, "step": 9017 }, { "epoch": 2.78, "learning_rate": 2.5332261117669886e-05, "loss": 1.3455, "step": 9018 }, { "epoch": 2.78, "learning_rate": 2.5296632058432654e-05, "loss": 1.2716, "step": 9019 }, { "epoch": 2.78, "learning_rate": 2.5261002999195426e-05, "loss": 1.5093, "step": 9020 }, { "epoch": 2.78, "learning_rate": 2.5225373939958198e-05, "loss": 1.3959, "step": 9021 }, { "epoch": 2.78, "learning_rate": 2.5189744880720966e-05, "loss": 1.3418, "step": 9022 }, { "epoch": 2.78, "learning_rate": 2.5154115821483738e-05, "loss": 1.1889, "step": 9023 }, { "epoch": 2.79, "learning_rate": 2.511848676224651e-05, "loss": 1.1683, "step": 9024 }, { "epoch": 2.79, "learning_rate": 2.508285770300928e-05, "loss": 1.1083, "step": 9025 }, { "epoch": 2.79, "learning_rate": 2.504722864377205e-05, "loss": 1.0367, "step": 9026 }, { "epoch": 2.79, "learning_rate": 2.501159958453482e-05, "loss": 1.1692, "step": 9027 }, { "epoch": 2.79, "learning_rate": 2.4975970525297593e-05, "loss": 0.9068, "step": 9028 }, { "epoch": 2.79, "learning_rate": 2.494034146606036e-05, "loss": 1.0479, "step": 9029 }, { "epoch": 2.79, "learning_rate": 2.4904712406823133e-05, "loss": 1.0714, "step": 9030 }, { "epoch": 2.79, "learning_rate": 2.4869083347585905e-05, "loss": 3.3258, "step": 9031 }, { "epoch": 2.79, "learning_rate": 2.4833454288348673e-05, "loss": 2.8721, "step": 9032 }, { "epoch": 2.79, "learning_rate": 2.4797825229111445e-05, "loss": 2.6033, "step": 9033 }, { "epoch": 2.79, "learning_rate": 2.4762196169874217e-05, "loss": 2.5923, "step": 9034 }, { "epoch": 2.79, "learning_rate": 2.4726567110636992e-05, "loss": 2.4301, "step": 9035 }, { "epoch": 2.79, "learning_rate": 2.469093805139976e-05, "loss": 2.4921, "step": 9036 }, { "epoch": 2.79, "learning_rate": 2.4655308992162532e-05, "loss": 2.5695, "step": 9037 }, { "epoch": 2.79, "learning_rate": 2.4619679932925303e-05, "loss": 2.4802, "step": 9038 }, { "epoch": 2.79, "learning_rate": 2.4584050873688072e-05, "loss": 2.1631, "step": 9039 }, { "epoch": 2.79, "learning_rate": 2.4548421814450843e-05, "loss": 2.2621, "step": 9040 }, { "epoch": 2.79, "learning_rate": 2.4512792755213615e-05, "loss": 2.3666, "step": 9041 }, { "epoch": 2.79, "learning_rate": 2.4477163695976384e-05, "loss": 2.185, "step": 9042 }, { "epoch": 2.79, "learning_rate": 2.4441534636739155e-05, "loss": 2.1612, "step": 9043 }, { "epoch": 2.79, "learning_rate": 2.4405905577501927e-05, "loss": 2.1222, "step": 9044 }, { "epoch": 2.79, "learning_rate": 2.43702765182647e-05, "loss": 2.5561, "step": 9045 }, { "epoch": 2.79, "learning_rate": 2.4334647459027467e-05, "loss": 2.2493, "step": 9046 }, { "epoch": 2.79, "learning_rate": 2.429901839979024e-05, "loss": 2.28, "step": 9047 }, { "epoch": 2.79, "learning_rate": 2.426338934055301e-05, "loss": 1.9667, "step": 9048 }, { "epoch": 2.79, "learning_rate": 2.422776028131578e-05, "loss": 1.9692, "step": 9049 }, { "epoch": 2.79, "learning_rate": 2.419213122207855e-05, "loss": 2.1087, "step": 9050 }, { "epoch": 2.79, "learning_rate": 2.4156502162841326e-05, "loss": 1.8012, "step": 9051 }, { "epoch": 2.79, "learning_rate": 2.4120873103604097e-05, "loss": 1.9767, "step": 9052 }, { "epoch": 2.79, "learning_rate": 2.4085244044366866e-05, "loss": 1.9178, "step": 9053 }, { "epoch": 2.79, "learning_rate": 2.4049614985129637e-05, "loss": 1.8851, "step": 9054 }, { "epoch": 2.79, "learning_rate": 2.401398592589241e-05, "loss": 1.9495, "step": 9055 }, { "epoch": 2.8, "learning_rate": 2.3978356866655177e-05, "loss": 2.0378, "step": 9056 }, { "epoch": 2.8, "learning_rate": 2.394272780741795e-05, "loss": 1.8832, "step": 9057 }, { "epoch": 2.8, "learning_rate": 2.390709874818072e-05, "loss": 1.7432, "step": 9058 }, { "epoch": 2.8, "learning_rate": 2.387146968894349e-05, "loss": 1.838, "step": 9059 }, { "epoch": 2.8, "learning_rate": 2.383584062970626e-05, "loss": 1.818, "step": 9060 }, { "epoch": 2.8, "learning_rate": 2.3800211570469032e-05, "loss": 1.6792, "step": 9061 }, { "epoch": 2.8, "learning_rate": 2.37645825112318e-05, "loss": 1.775, "step": 9062 }, { "epoch": 2.8, "learning_rate": 2.3728953451994572e-05, "loss": 1.6518, "step": 9063 }, { "epoch": 2.8, "learning_rate": 2.3693324392757344e-05, "loss": 1.2475, "step": 9064 }, { "epoch": 2.8, "learning_rate": 2.3657695333520113e-05, "loss": 1.6546, "step": 9065 }, { "epoch": 2.8, "learning_rate": 2.3622066274282884e-05, "loss": 1.5611, "step": 9066 }, { "epoch": 2.8, "learning_rate": 2.358643721504566e-05, "loss": 1.5995, "step": 9067 }, { "epoch": 2.8, "learning_rate": 2.355080815580843e-05, "loss": 1.316, "step": 9068 }, { "epoch": 2.8, "learning_rate": 2.3515179096571203e-05, "loss": 1.1295, "step": 9069 }, { "epoch": 2.8, "learning_rate": 2.347955003733397e-05, "loss": 1.2791, "step": 9070 }, { "epoch": 2.8, "learning_rate": 2.3443920978096743e-05, "loss": 1.2844, "step": 9071 }, { "epoch": 2.8, "learning_rate": 2.3408291918859515e-05, "loss": 1.1046, "step": 9072 }, { "epoch": 2.8, "learning_rate": 2.3372662859622283e-05, "loss": 1.1296, "step": 9073 }, { "epoch": 2.8, "learning_rate": 2.3337033800385055e-05, "loss": 1.0497, "step": 9074 }, { "epoch": 2.8, "learning_rate": 2.3301404741147826e-05, "loss": 1.4272, "step": 9075 }, { "epoch": 2.8, "learning_rate": 2.3265775681910595e-05, "loss": 1.1721, "step": 9076 }, { "epoch": 2.8, "learning_rate": 2.3230146622673366e-05, "loss": 1.0827, "step": 9077 }, { "epoch": 2.8, "learning_rate": 2.3194517563436138e-05, "loss": 1.0484, "step": 9078 }, { "epoch": 2.8, "learning_rate": 2.3158888504198906e-05, "loss": 0.8893, "step": 9079 }, { "epoch": 2.8, "learning_rate": 2.3123259444961678e-05, "loss": 1.1005, "step": 9080 }, { "epoch": 2.8, "learning_rate": 2.308763038572445e-05, "loss": 3.1303, "step": 9081 }, { "epoch": 2.8, "learning_rate": 2.3052001326487218e-05, "loss": 2.8084, "step": 9082 }, { "epoch": 2.8, "learning_rate": 2.301637226724999e-05, "loss": 2.5695, "step": 9083 }, { "epoch": 2.8, "learning_rate": 2.2980743208012765e-05, "loss": 2.5344, "step": 9084 }, { "epoch": 2.8, "learning_rate": 2.2945114148775537e-05, "loss": 2.677, "step": 9085 }, { "epoch": 2.8, "learning_rate": 2.2909485089538305e-05, "loss": 2.6768, "step": 9086 }, { "epoch": 2.8, "learning_rate": 2.2873856030301077e-05, "loss": 2.2205, "step": 9087 }, { "epoch": 2.8, "learning_rate": 2.283822697106385e-05, "loss": 2.2422, "step": 9088 }, { "epoch": 2.81, "learning_rate": 2.280259791182662e-05, "loss": 2.4955, "step": 9089 }, { "epoch": 2.81, "learning_rate": 2.276696885258939e-05, "loss": 1.9918, "step": 9090 }, { "epoch": 2.81, "learning_rate": 2.273133979335216e-05, "loss": 2.1013, "step": 9091 }, { "epoch": 2.81, "learning_rate": 2.2695710734114932e-05, "loss": 2.6919, "step": 9092 }, { "epoch": 2.81, "learning_rate": 2.26600816748777e-05, "loss": 2.2238, "step": 9093 }, { "epoch": 2.81, "learning_rate": 2.2624452615640472e-05, "loss": 2.1374, "step": 9094 }, { "epoch": 2.81, "learning_rate": 2.2588823556403244e-05, "loss": 2.2546, "step": 9095 }, { "epoch": 2.81, "learning_rate": 2.2553194497166012e-05, "loss": 2.4077, "step": 9096 }, { "epoch": 2.81, "learning_rate": 2.2517565437928784e-05, "loss": 2.2144, "step": 9097 }, { "epoch": 2.81, "learning_rate": 2.2481936378691555e-05, "loss": 2.4087, "step": 9098 }, { "epoch": 2.81, "learning_rate": 2.2446307319454324e-05, "loss": 2.1558, "step": 9099 }, { "epoch": 2.81, "learning_rate": 2.24106782602171e-05, "loss": 1.9806, "step": 9100 }, { "epoch": 2.81, "learning_rate": 2.237504920097987e-05, "loss": 1.867, "step": 9101 }, { "epoch": 2.81, "learning_rate": 2.2339420141742642e-05, "loss": 1.9735, "step": 9102 }, { "epoch": 2.81, "learning_rate": 2.230379108250541e-05, "loss": 2.0857, "step": 9103 }, { "epoch": 2.81, "learning_rate": 2.2268162023268182e-05, "loss": 1.9958, "step": 9104 }, { "epoch": 2.81, "learning_rate": 2.2232532964030954e-05, "loss": 1.8067, "step": 9105 }, { "epoch": 2.81, "learning_rate": 2.2196903904793722e-05, "loss": 1.9471, "step": 9106 }, { "epoch": 2.81, "learning_rate": 2.2161274845556494e-05, "loss": 1.7782, "step": 9107 }, { "epoch": 2.81, "learning_rate": 2.2125645786319266e-05, "loss": 1.7547, "step": 9108 }, { "epoch": 2.81, "learning_rate": 2.2090016727082034e-05, "loss": 1.7654, "step": 9109 }, { "epoch": 2.81, "learning_rate": 2.2054387667844806e-05, "loss": 1.7527, "step": 9110 }, { "epoch": 2.81, "learning_rate": 2.2018758608607577e-05, "loss": 1.6685, "step": 9111 }, { "epoch": 2.81, "learning_rate": 2.198312954937035e-05, "loss": 1.9333, "step": 9112 }, { "epoch": 2.81, "learning_rate": 2.1947500490133117e-05, "loss": 1.5426, "step": 9113 }, { "epoch": 2.81, "learning_rate": 2.191187143089589e-05, "loss": 1.5614, "step": 9114 }, { "epoch": 2.81, "learning_rate": 2.187624237165866e-05, "loss": 1.5731, "step": 9115 }, { "epoch": 2.81, "learning_rate": 2.1840613312421436e-05, "loss": 1.4687, "step": 9116 }, { "epoch": 2.81, "learning_rate": 2.1804984253184204e-05, "loss": 1.3526, "step": 9117 }, { "epoch": 2.81, "learning_rate": 2.1769355193946976e-05, "loss": 1.5758, "step": 9118 }, { "epoch": 2.81, "learning_rate": 2.1733726134709748e-05, "loss": 1.2259, "step": 9119 }, { "epoch": 2.81, "learning_rate": 2.1698097075472516e-05, "loss": 1.2875, "step": 9120 }, { "epoch": 2.82, "learning_rate": 2.1662468016235288e-05, "loss": 1.3442, "step": 9121 }, { "epoch": 2.82, "learning_rate": 2.162683895699806e-05, "loss": 1.0998, "step": 9122 }, { "epoch": 2.82, "learning_rate": 2.1591209897760828e-05, "loss": 1.3629, "step": 9123 }, { "epoch": 2.82, "learning_rate": 2.15555808385236e-05, "loss": 1.035, "step": 9124 }, { "epoch": 2.82, "learning_rate": 2.151995177928637e-05, "loss": 1.1485, "step": 9125 }, { "epoch": 2.82, "learning_rate": 2.148432272004914e-05, "loss": 1.0344, "step": 9126 }, { "epoch": 2.82, "learning_rate": 2.144869366081191e-05, "loss": 1.0704, "step": 9127 }, { "epoch": 2.82, "learning_rate": 2.1413064601574683e-05, "loss": 0.916, "step": 9128 }, { "epoch": 2.82, "learning_rate": 2.137743554233745e-05, "loss": 1.0182, "step": 9129 }, { "epoch": 2.82, "learning_rate": 2.1341806483100223e-05, "loss": 1.0006, "step": 9130 }, { "epoch": 2.82, "learning_rate": 2.1306177423862995e-05, "loss": 3.3531, "step": 9131 }, { "epoch": 2.82, "learning_rate": 2.1270548364625766e-05, "loss": 3.0445, "step": 9132 }, { "epoch": 2.82, "learning_rate": 2.123491930538854e-05, "loss": 2.4459, "step": 9133 }, { "epoch": 2.82, "learning_rate": 2.119929024615131e-05, "loss": 2.5358, "step": 9134 }, { "epoch": 2.82, "learning_rate": 2.116366118691408e-05, "loss": 2.1953, "step": 9135 }, { "epoch": 2.82, "learning_rate": 2.1128032127676853e-05, "loss": 2.3559, "step": 9136 }, { "epoch": 2.82, "learning_rate": 2.109240306843962e-05, "loss": 2.4563, "step": 9137 }, { "epoch": 2.82, "learning_rate": 2.1056774009202393e-05, "loss": 2.3574, "step": 9138 }, { "epoch": 2.82, "learning_rate": 2.1021144949965165e-05, "loss": 2.0433, "step": 9139 }, { "epoch": 2.82, "learning_rate": 2.0985515890727933e-05, "loss": 2.3527, "step": 9140 }, { "epoch": 2.82, "learning_rate": 2.0949886831490705e-05, "loss": 2.3369, "step": 9141 }, { "epoch": 2.82, "learning_rate": 2.0914257772253477e-05, "loss": 2.4, "step": 9142 }, { "epoch": 2.82, "learning_rate": 2.0878628713016245e-05, "loss": 2.0807, "step": 9143 }, { "epoch": 2.82, "learning_rate": 2.0842999653779017e-05, "loss": 2.3883, "step": 9144 }, { "epoch": 2.82, "learning_rate": 2.080737059454179e-05, "loss": 2.362, "step": 9145 }, { "epoch": 2.82, "learning_rate": 2.0771741535304557e-05, "loss": 2.0861, "step": 9146 }, { "epoch": 2.82, "learning_rate": 2.073611247606733e-05, "loss": 2.0139, "step": 9147 }, { "epoch": 2.82, "learning_rate": 2.07004834168301e-05, "loss": 2.0064, "step": 9148 }, { "epoch": 2.82, "learning_rate": 2.0664854357592875e-05, "loss": 2.1708, "step": 9149 }, { "epoch": 2.82, "learning_rate": 2.0629225298355644e-05, "loss": 2.0394, "step": 9150 }, { "epoch": 2.82, "learning_rate": 2.0593596239118415e-05, "loss": 1.9574, "step": 9151 }, { "epoch": 2.82, "learning_rate": 2.0557967179881187e-05, "loss": 1.8588, "step": 9152 }, { "epoch": 2.82, "learning_rate": 2.052233812064396e-05, "loss": 2.0818, "step": 9153 }, { "epoch": 2.83, "learning_rate": 2.0486709061406727e-05, "loss": 1.9408, "step": 9154 }, { "epoch": 2.83, "learning_rate": 2.04510800021695e-05, "loss": 1.9927, "step": 9155 }, { "epoch": 2.83, "learning_rate": 2.041545094293227e-05, "loss": 1.9435, "step": 9156 }, { "epoch": 2.83, "learning_rate": 2.037982188369504e-05, "loss": 2.0422, "step": 9157 }, { "epoch": 2.83, "learning_rate": 2.034419282445781e-05, "loss": 1.8605, "step": 9158 }, { "epoch": 2.83, "learning_rate": 2.0308563765220582e-05, "loss": 1.8695, "step": 9159 }, { "epoch": 2.83, "learning_rate": 2.027293470598335e-05, "loss": 1.5394, "step": 9160 }, { "epoch": 2.83, "learning_rate": 2.0237305646746126e-05, "loss": 1.7766, "step": 9161 }, { "epoch": 2.83, "learning_rate": 2.0201676587508894e-05, "loss": 1.7102, "step": 9162 }, { "epoch": 2.83, "learning_rate": 2.0166047528271666e-05, "loss": 1.5642, "step": 9163 }, { "epoch": 2.83, "learning_rate": 2.0130418469034437e-05, "loss": 1.7443, "step": 9164 }, { "epoch": 2.83, "learning_rate": 2.0094789409797206e-05, "loss": 1.4618, "step": 9165 }, { "epoch": 2.83, "learning_rate": 2.0059160350559977e-05, "loss": 1.3652, "step": 9166 }, { "epoch": 2.83, "learning_rate": 2.002353129132275e-05, "loss": 1.4272, "step": 9167 }, { "epoch": 2.83, "learning_rate": 1.9987902232085517e-05, "loss": 1.4589, "step": 9168 }, { "epoch": 2.83, "learning_rate": 1.995227317284829e-05, "loss": 1.2943, "step": 9169 }, { "epoch": 2.83, "learning_rate": 1.991664411361106e-05, "loss": 1.4461, "step": 9170 }, { "epoch": 2.83, "learning_rate": 1.9881015054373833e-05, "loss": 1.4581, "step": 9171 }, { "epoch": 2.83, "learning_rate": 1.9845385995136604e-05, "loss": 1.3286, "step": 9172 }, { "epoch": 2.83, "learning_rate": 1.9809756935899373e-05, "loss": 1.2587, "step": 9173 }, { "epoch": 2.83, "learning_rate": 1.9774127876662144e-05, "loss": 1.0284, "step": 9174 }, { "epoch": 2.83, "learning_rate": 1.9738498817424916e-05, "loss": 1.1601, "step": 9175 }, { "epoch": 2.83, "learning_rate": 1.9702869758187688e-05, "loss": 1.2744, "step": 9176 }, { "epoch": 2.83, "learning_rate": 1.9667240698950456e-05, "loss": 1.0123, "step": 9177 }, { "epoch": 2.83, "learning_rate": 1.963161163971323e-05, "loss": 1.1021, "step": 9178 }, { "epoch": 2.83, "learning_rate": 1.9595982580476e-05, "loss": 0.915, "step": 9179 }, { "epoch": 2.83, "learning_rate": 1.956035352123877e-05, "loss": 0.9215, "step": 9180 }, { "epoch": 2.83, "learning_rate": 1.9524724462001543e-05, "loss": 3.1756, "step": 9181 }, { "epoch": 2.83, "learning_rate": 1.948909540276431e-05, "loss": 2.6713, "step": 9182 }, { "epoch": 2.83, "learning_rate": 1.9453466343527083e-05, "loss": 2.7708, "step": 9183 }, { "epoch": 2.83, "learning_rate": 1.9417837284289855e-05, "loss": 2.4908, "step": 9184 }, { "epoch": 2.83, "learning_rate": 1.9382208225052623e-05, "loss": 2.5566, "step": 9185 }, { "epoch": 2.84, "learning_rate": 1.9346579165815398e-05, "loss": 2.3188, "step": 9186 }, { "epoch": 2.84, "learning_rate": 1.9310950106578166e-05, "loss": 2.6201, "step": 9187 }, { "epoch": 2.84, "learning_rate": 1.9275321047340938e-05, "loss": 2.2284, "step": 9188 }, { "epoch": 2.84, "learning_rate": 1.923969198810371e-05, "loss": 2.4743, "step": 9189 }, { "epoch": 2.84, "learning_rate": 1.9204062928866478e-05, "loss": 2.0805, "step": 9190 }, { "epoch": 2.84, "learning_rate": 1.916843386962925e-05, "loss": 2.1274, "step": 9191 }, { "epoch": 2.84, "learning_rate": 1.913280481039202e-05, "loss": 2.1963, "step": 9192 }, { "epoch": 2.84, "learning_rate": 1.909717575115479e-05, "loss": 2.2713, "step": 9193 }, { "epoch": 2.84, "learning_rate": 1.9061546691917565e-05, "loss": 2.211, "step": 9194 }, { "epoch": 2.84, "learning_rate": 1.9025917632680333e-05, "loss": 2.1541, "step": 9195 }, { "epoch": 2.84, "learning_rate": 1.8990288573443105e-05, "loss": 2.1459, "step": 9196 }, { "epoch": 2.84, "learning_rate": 1.8954659514205877e-05, "loss": 1.9152, "step": 9197 }, { "epoch": 2.84, "learning_rate": 1.891903045496865e-05, "loss": 1.9448, "step": 9198 }, { "epoch": 2.84, "learning_rate": 1.8883401395731417e-05, "loss": 1.888, "step": 9199 }, { "epoch": 2.84, "learning_rate": 1.884777233649419e-05, "loss": 2.0033, "step": 9200 }, { "epoch": 2.84, "learning_rate": 1.881214327725696e-05, "loss": 1.8299, "step": 9201 }, { "epoch": 2.84, "learning_rate": 1.8776514218019732e-05, "loss": 1.7124, "step": 9202 }, { "epoch": 2.84, "learning_rate": 1.8740885158782504e-05, "loss": 1.8388, "step": 9203 }, { "epoch": 2.84, "learning_rate": 1.8705256099545272e-05, "loss": 1.9312, "step": 9204 }, { "epoch": 2.84, "learning_rate": 1.8669627040308044e-05, "loss": 1.7445, "step": 9205 }, { "epoch": 2.84, "learning_rate": 1.8633997981070815e-05, "loss": 1.9263, "step": 9206 }, { "epoch": 2.84, "learning_rate": 1.8598368921833584e-05, "loss": 1.7701, "step": 9207 }, { "epoch": 2.84, "learning_rate": 1.8562739862596355e-05, "loss": 1.6873, "step": 9208 }, { "epoch": 2.84, "learning_rate": 1.8527110803359127e-05, "loss": 1.8336, "step": 9209 }, { "epoch": 2.84, "learning_rate": 1.84914817441219e-05, "loss": 1.609, "step": 9210 }, { "epoch": 2.84, "learning_rate": 1.845585268488467e-05, "loss": 1.8059, "step": 9211 }, { "epoch": 2.84, "learning_rate": 1.842022362564744e-05, "loss": 1.7066, "step": 9212 }, { "epoch": 2.84, "learning_rate": 1.838459456641021e-05, "loss": 1.7682, "step": 9213 }, { "epoch": 2.84, "learning_rate": 1.8348965507172982e-05, "loss": 1.4516, "step": 9214 }, { "epoch": 2.84, "learning_rate": 1.831333644793575e-05, "loss": 1.6452, "step": 9215 }, { "epoch": 2.84, "learning_rate": 1.8277707388698522e-05, "loss": 1.3835, "step": 9216 }, { "epoch": 2.84, "learning_rate": 1.8242078329461294e-05, "loss": 1.4088, "step": 9217 }, { "epoch": 2.84, "learning_rate": 1.8206449270224062e-05, "loss": 1.3198, "step": 9218 }, { "epoch": 2.85, "learning_rate": 1.8170820210986837e-05, "loss": 1.4678, "step": 9219 }, { "epoch": 2.85, "learning_rate": 1.813519115174961e-05, "loss": 1.4052, "step": 9220 }, { "epoch": 2.85, "learning_rate": 1.8099562092512377e-05, "loss": 1.3398, "step": 9221 }, { "epoch": 2.85, "learning_rate": 1.806393303327515e-05, "loss": 1.2748, "step": 9222 }, { "epoch": 2.85, "learning_rate": 1.802830397403792e-05, "loss": 1.1996, "step": 9223 }, { "epoch": 2.85, "learning_rate": 1.799267491480069e-05, "loss": 1.2748, "step": 9224 }, { "epoch": 2.85, "learning_rate": 1.795704585556346e-05, "loss": 1.0661, "step": 9225 }, { "epoch": 2.85, "learning_rate": 1.7921416796326233e-05, "loss": 1.1277, "step": 9226 }, { "epoch": 2.85, "learning_rate": 1.7885787737089004e-05, "loss": 0.9891, "step": 9227 }, { "epoch": 2.85, "learning_rate": 1.7850158677851776e-05, "loss": 1.0338, "step": 9228 }, { "epoch": 2.85, "learning_rate": 1.7814529618614544e-05, "loss": 0.9677, "step": 9229 }, { "epoch": 2.85, "learning_rate": 1.7778900559377316e-05, "loss": 0.937, "step": 9230 }, { "epoch": 2.85, "learning_rate": 1.7743271500140088e-05, "loss": 3.4525, "step": 9231 }, { "epoch": 2.85, "learning_rate": 1.7707642440902856e-05, "loss": 2.9205, "step": 9232 }, { "epoch": 2.85, "learning_rate": 1.7672013381665628e-05, "loss": 2.8542, "step": 9233 }, { "epoch": 2.85, "learning_rate": 1.76363843224284e-05, "loss": 2.453, "step": 9234 }, { "epoch": 2.85, "learning_rate": 1.760075526319117e-05, "loss": 2.4218, "step": 9235 }, { "epoch": 2.85, "learning_rate": 1.7565126203953943e-05, "loss": 2.3871, "step": 9236 }, { "epoch": 2.85, "learning_rate": 1.752949714471671e-05, "loss": 2.375, "step": 9237 }, { "epoch": 2.85, "learning_rate": 1.7493868085479483e-05, "loss": 2.4762, "step": 9238 }, { "epoch": 2.85, "learning_rate": 1.7458239026242255e-05, "loss": 2.6032, "step": 9239 }, { "epoch": 2.85, "learning_rate": 1.7422609967005026e-05, "loss": 2.0634, "step": 9240 }, { "epoch": 2.85, "learning_rate": 1.7386980907767795e-05, "loss": 2.6034, "step": 9241 }, { "epoch": 2.85, "learning_rate": 1.7351351848530566e-05, "loss": 2.0552, "step": 9242 }, { "epoch": 2.85, "learning_rate": 1.7315722789293338e-05, "loss": 2.2523, "step": 9243 }, { "epoch": 2.85, "learning_rate": 1.728009373005611e-05, "loss": 2.1175, "step": 9244 }, { "epoch": 2.85, "learning_rate": 1.724446467081888e-05, "loss": 2.2848, "step": 9245 }, { "epoch": 2.85, "learning_rate": 1.720883561158165e-05, "loss": 1.9653, "step": 9246 }, { "epoch": 2.85, "learning_rate": 1.717320655234442e-05, "loss": 2.2814, "step": 9247 }, { "epoch": 2.85, "learning_rate": 1.7137577493107193e-05, "loss": 1.8074, "step": 9248 }, { "epoch": 2.85, "learning_rate": 1.710194843386996e-05, "loss": 1.9985, "step": 9249 }, { "epoch": 2.85, "learning_rate": 1.7066319374632733e-05, "loss": 2.271, "step": 9250 }, { "epoch": 2.86, "learning_rate": 1.7030690315395505e-05, "loss": 2.0514, "step": 9251 }, { "epoch": 2.86, "learning_rate": 1.6995061256158277e-05, "loss": 2.0182, "step": 9252 }, { "epoch": 2.86, "learning_rate": 1.695943219692105e-05, "loss": 1.961, "step": 9253 }, { "epoch": 2.86, "learning_rate": 1.6923803137683817e-05, "loss": 1.9745, "step": 9254 }, { "epoch": 2.86, "learning_rate": 1.688817407844659e-05, "loss": 1.9143, "step": 9255 }, { "epoch": 2.86, "learning_rate": 1.685254501920936e-05, "loss": 1.6308, "step": 9256 }, { "epoch": 2.86, "learning_rate": 1.681691595997213e-05, "loss": 1.4813, "step": 9257 }, { "epoch": 2.86, "learning_rate": 1.67812869007349e-05, "loss": 1.7538, "step": 9258 }, { "epoch": 2.86, "learning_rate": 1.6745657841497672e-05, "loss": 1.8964, "step": 9259 }, { "epoch": 2.86, "learning_rate": 1.6710028782260444e-05, "loss": 1.7032, "step": 9260 }, { "epoch": 2.86, "learning_rate": 1.6674399723023215e-05, "loss": 1.6049, "step": 9261 }, { "epoch": 2.86, "learning_rate": 1.6638770663785987e-05, "loss": 1.7524, "step": 9262 }, { "epoch": 2.86, "learning_rate": 1.6603141604548755e-05, "loss": 1.8346, "step": 9263 }, { "epoch": 2.86, "learning_rate": 1.6567512545311527e-05, "loss": 1.5288, "step": 9264 }, { "epoch": 2.86, "learning_rate": 1.65318834860743e-05, "loss": 1.4837, "step": 9265 }, { "epoch": 2.86, "learning_rate": 1.6496254426837067e-05, "loss": 1.3777, "step": 9266 }, { "epoch": 2.86, "learning_rate": 1.646062536759984e-05, "loss": 1.4537, "step": 9267 }, { "epoch": 2.86, "learning_rate": 1.642499630836261e-05, "loss": 1.1861, "step": 9268 }, { "epoch": 2.86, "learning_rate": 1.6389367249125382e-05, "loss": 1.3106, "step": 9269 }, { "epoch": 2.86, "learning_rate": 1.6353738189888154e-05, "loss": 1.1824, "step": 9270 }, { "epoch": 2.86, "learning_rate": 1.6318109130650922e-05, "loss": 1.2435, "step": 9271 }, { "epoch": 2.86, "learning_rate": 1.6282480071413694e-05, "loss": 1.1592, "step": 9272 }, { "epoch": 2.86, "learning_rate": 1.6246851012176466e-05, "loss": 1.2488, "step": 9273 }, { "epoch": 2.86, "learning_rate": 1.6211221952939234e-05, "loss": 1.1027, "step": 9274 }, { "epoch": 2.86, "learning_rate": 1.6175592893702006e-05, "loss": 1.157, "step": 9275 }, { "epoch": 2.86, "learning_rate": 1.6139963834464778e-05, "loss": 1.0814, "step": 9276 }, { "epoch": 2.86, "learning_rate": 1.610433477522755e-05, "loss": 1.0278, "step": 9277 }, { "epoch": 2.86, "learning_rate": 1.606870571599032e-05, "loss": 1.0059, "step": 9278 }, { "epoch": 2.86, "learning_rate": 1.603307665675309e-05, "loss": 1.0831, "step": 9279 }, { "epoch": 2.86, "learning_rate": 1.599744759751586e-05, "loss": 1.0278, "step": 9280 }, { "epoch": 2.86, "learning_rate": 1.5961818538278633e-05, "loss": 2.7525, "step": 9281 }, { "epoch": 2.86, "learning_rate": 1.59261894790414e-05, "loss": 3.1347, "step": 9282 }, { "epoch": 2.87, "learning_rate": 1.5890560419804173e-05, "loss": 2.7701, "step": 9283 }, { "epoch": 2.87, "learning_rate": 1.5854931360566948e-05, "loss": 2.5865, "step": 9284 }, { "epoch": 2.87, "learning_rate": 1.5819302301329716e-05, "loss": 2.2108, "step": 9285 }, { "epoch": 2.87, "learning_rate": 1.5783673242092488e-05, "loss": 2.0711, "step": 9286 }, { "epoch": 2.87, "learning_rate": 1.574804418285526e-05, "loss": 2.3002, "step": 9287 }, { "epoch": 2.87, "learning_rate": 1.5712415123618028e-05, "loss": 2.2201, "step": 9288 }, { "epoch": 2.87, "learning_rate": 1.56767860643808e-05, "loss": 2.3435, "step": 9289 }, { "epoch": 2.87, "learning_rate": 1.564115700514357e-05, "loss": 2.282, "step": 9290 }, { "epoch": 2.87, "learning_rate": 1.560552794590634e-05, "loss": 2.0258, "step": 9291 }, { "epoch": 2.87, "learning_rate": 1.5569898886669115e-05, "loss": 2.3217, "step": 9292 }, { "epoch": 2.87, "learning_rate": 1.5534269827431883e-05, "loss": 2.2276, "step": 9293 }, { "epoch": 2.87, "learning_rate": 1.5498640768194655e-05, "loss": 2.1246, "step": 9294 }, { "epoch": 2.87, "learning_rate": 1.5463011708957426e-05, "loss": 2.1234, "step": 9295 }, { "epoch": 2.87, "learning_rate": 1.5427382649720195e-05, "loss": 2.1468, "step": 9296 }, { "epoch": 2.87, "learning_rate": 1.5391753590482966e-05, "loss": 1.939, "step": 9297 }, { "epoch": 2.87, "learning_rate": 1.5356124531245738e-05, "loss": 2.08, "step": 9298 }, { "epoch": 2.87, "learning_rate": 1.5320495472008507e-05, "loss": 2.0662, "step": 9299 }, { "epoch": 2.87, "learning_rate": 1.528486641277128e-05, "loss": 1.9498, "step": 9300 }, { "epoch": 2.87, "learning_rate": 1.5249237353534052e-05, "loss": 2.1597, "step": 9301 }, { "epoch": 2.87, "learning_rate": 1.5213608294296822e-05, "loss": 1.9647, "step": 9302 }, { "epoch": 2.87, "learning_rate": 1.5177979235059593e-05, "loss": 1.8979, "step": 9303 }, { "epoch": 2.87, "learning_rate": 1.5142350175822363e-05, "loss": 1.7903, "step": 9304 }, { "epoch": 2.87, "learning_rate": 1.5106721116585133e-05, "loss": 1.73, "step": 9305 }, { "epoch": 2.87, "learning_rate": 1.5071092057347905e-05, "loss": 1.8519, "step": 9306 }, { "epoch": 2.87, "learning_rate": 1.5035462998110675e-05, "loss": 1.9056, "step": 9307 }, { "epoch": 2.87, "learning_rate": 1.4999833938873445e-05, "loss": 1.7979, "step": 9308 }, { "epoch": 2.87, "learning_rate": 1.4964204879636219e-05, "loss": 1.4864, "step": 9309 }, { "epoch": 2.87, "learning_rate": 1.4928575820398989e-05, "loss": 1.7691, "step": 9310 }, { "epoch": 2.87, "learning_rate": 1.489294676116176e-05, "loss": 1.5696, "step": 9311 }, { "epoch": 2.87, "learning_rate": 1.485731770192453e-05, "loss": 1.6051, "step": 9312 }, { "epoch": 2.87, "learning_rate": 1.4821688642687302e-05, "loss": 1.4115, "step": 9313 }, { "epoch": 2.87, "learning_rate": 1.4786059583450072e-05, "loss": 1.4722, "step": 9314 }, { "epoch": 2.87, "learning_rate": 1.4750430524212842e-05, "loss": 1.4686, "step": 9315 }, { "epoch": 2.88, "learning_rate": 1.4714801464975614e-05, "loss": 1.3013, "step": 9316 }, { "epoch": 2.88, "learning_rate": 1.4679172405738385e-05, "loss": 1.6424, "step": 9317 }, { "epoch": 2.88, "learning_rate": 1.4643543346501157e-05, "loss": 1.4999, "step": 9318 }, { "epoch": 2.88, "learning_rate": 1.4607914287263927e-05, "loss": 1.2951, "step": 9319 }, { "epoch": 2.88, "learning_rate": 1.4572285228026697e-05, "loss": 1.2951, "step": 9320 }, { "epoch": 2.88, "learning_rate": 1.4536656168789469e-05, "loss": 1.2499, "step": 9321 }, { "epoch": 2.88, "learning_rate": 1.4501027109552239e-05, "loss": 1.3529, "step": 9322 }, { "epoch": 2.88, "learning_rate": 1.446539805031501e-05, "loss": 1.2637, "step": 9323 }, { "epoch": 2.88, "learning_rate": 1.442976899107778e-05, "loss": 1.0397, "step": 9324 }, { "epoch": 2.88, "learning_rate": 1.4394139931840554e-05, "loss": 1.2979, "step": 9325 }, { "epoch": 2.88, "learning_rate": 1.4358510872603324e-05, "loss": 1.1258, "step": 9326 }, { "epoch": 2.88, "learning_rate": 1.4322881813366094e-05, "loss": 0.988, "step": 9327 }, { "epoch": 2.88, "learning_rate": 1.4287252754128866e-05, "loss": 1.1183, "step": 9328 }, { "epoch": 2.88, "learning_rate": 1.4251623694891636e-05, "loss": 1.0278, "step": 9329 }, { "epoch": 2.88, "learning_rate": 1.4215994635654406e-05, "loss": 0.882, "step": 9330 }, { "epoch": 2.88, "learning_rate": 1.4180365576417178e-05, "loss": 3.0492, "step": 9331 }, { "epoch": 2.88, "learning_rate": 1.4144736517179948e-05, "loss": 2.7018, "step": 9332 }, { "epoch": 2.88, "learning_rate": 1.4109107457942721e-05, "loss": 2.6609, "step": 9333 }, { "epoch": 2.88, "learning_rate": 1.4073478398705491e-05, "loss": 2.6763, "step": 9334 }, { "epoch": 2.88, "learning_rate": 1.4037849339468263e-05, "loss": 2.3435, "step": 9335 }, { "epoch": 2.88, "learning_rate": 1.4002220280231033e-05, "loss": 2.589, "step": 9336 }, { "epoch": 2.88, "learning_rate": 1.3966591220993803e-05, "loss": 2.3427, "step": 9337 }, { "epoch": 2.88, "learning_rate": 1.3930962161756574e-05, "loss": 2.2953, "step": 9338 }, { "epoch": 2.88, "learning_rate": 1.3895333102519344e-05, "loss": 2.4515, "step": 9339 }, { "epoch": 2.88, "learning_rate": 1.3859704043282114e-05, "loss": 2.2911, "step": 9340 }, { "epoch": 2.88, "learning_rate": 1.3824074984044888e-05, "loss": 2.2437, "step": 9341 }, { "epoch": 2.88, "learning_rate": 1.3788445924807658e-05, "loss": 2.2096, "step": 9342 }, { "epoch": 2.88, "learning_rate": 1.375281686557043e-05, "loss": 2.075, "step": 9343 }, { "epoch": 2.88, "learning_rate": 1.37171878063332e-05, "loss": 2.2111, "step": 9344 }, { "epoch": 2.88, "learning_rate": 1.3681558747095971e-05, "loss": 2.0948, "step": 9345 }, { "epoch": 2.88, "learning_rate": 1.3645929687858741e-05, "loss": 1.9966, "step": 9346 }, { "epoch": 2.88, "learning_rate": 1.3610300628621511e-05, "loss": 2.1935, "step": 9347 }, { "epoch": 2.89, "learning_rate": 1.3574671569384283e-05, "loss": 2.0304, "step": 9348 }, { "epoch": 2.89, "learning_rate": 1.3539042510147055e-05, "loss": 2.0299, "step": 9349 }, { "epoch": 2.89, "learning_rate": 1.3503413450909827e-05, "loss": 1.9996, "step": 9350 }, { "epoch": 2.89, "learning_rate": 1.3467784391672597e-05, "loss": 1.9886, "step": 9351 }, { "epoch": 2.89, "learning_rate": 1.3432155332435367e-05, "loss": 2.1139, "step": 9352 }, { "epoch": 2.89, "learning_rate": 1.3396526273198138e-05, "loss": 1.8984, "step": 9353 }, { "epoch": 2.89, "learning_rate": 1.3360897213960908e-05, "loss": 1.9278, "step": 9354 }, { "epoch": 2.89, "learning_rate": 1.332526815472368e-05, "loss": 1.9168, "step": 9355 }, { "epoch": 2.89, "learning_rate": 1.328963909548645e-05, "loss": 1.8398, "step": 9356 }, { "epoch": 2.89, "learning_rate": 1.325401003624922e-05, "loss": 1.9367, "step": 9357 }, { "epoch": 2.89, "learning_rate": 1.3218380977011993e-05, "loss": 1.6776, "step": 9358 }, { "epoch": 2.89, "learning_rate": 1.3182751917774763e-05, "loss": 1.4912, "step": 9359 }, { "epoch": 2.89, "learning_rate": 1.3147122858537535e-05, "loss": 1.475, "step": 9360 }, { "epoch": 2.89, "learning_rate": 1.3111493799300305e-05, "loss": 1.888, "step": 9361 }, { "epoch": 2.89, "learning_rate": 1.3075864740063075e-05, "loss": 1.7385, "step": 9362 }, { "epoch": 2.89, "learning_rate": 1.3040235680825847e-05, "loss": 1.5282, "step": 9363 }, { "epoch": 2.89, "learning_rate": 1.3004606621588617e-05, "loss": 1.5744, "step": 9364 }, { "epoch": 2.89, "learning_rate": 1.2968977562351389e-05, "loss": 1.5786, "step": 9365 }, { "epoch": 2.89, "learning_rate": 1.293334850311416e-05, "loss": 1.4349, "step": 9366 }, { "epoch": 2.89, "learning_rate": 1.2897719443876932e-05, "loss": 1.4073, "step": 9367 }, { "epoch": 2.89, "learning_rate": 1.2862090384639702e-05, "loss": 1.146, "step": 9368 }, { "epoch": 2.89, "learning_rate": 1.2826461325402472e-05, "loss": 1.4762, "step": 9369 }, { "epoch": 2.89, "learning_rate": 1.2790832266165244e-05, "loss": 1.2287, "step": 9370 }, { "epoch": 2.89, "learning_rate": 1.2755203206928014e-05, "loss": 1.2606, "step": 9371 }, { "epoch": 2.89, "learning_rate": 1.2719574147690784e-05, "loss": 1.0997, "step": 9372 }, { "epoch": 2.89, "learning_rate": 1.2683945088453556e-05, "loss": 1.2352, "step": 9373 }, { "epoch": 2.89, "learning_rate": 1.2648316029216327e-05, "loss": 1.0461, "step": 9374 }, { "epoch": 2.89, "learning_rate": 1.2612686969979099e-05, "loss": 0.9814, "step": 9375 }, { "epoch": 2.89, "learning_rate": 1.2577057910741869e-05, "loss": 1.1068, "step": 9376 }, { "epoch": 2.89, "learning_rate": 1.254142885150464e-05, "loss": 0.9725, "step": 9377 }, { "epoch": 2.89, "learning_rate": 1.250579979226741e-05, "loss": 1.0271, "step": 9378 }, { "epoch": 2.89, "learning_rate": 1.247017073303018e-05, "loss": 0.8541, "step": 9379 }, { "epoch": 2.89, "learning_rate": 1.2434541673792952e-05, "loss": 0.95, "step": 9380 }, { "epoch": 2.9, "learning_rate": 1.2398912614555722e-05, "loss": 3.1451, "step": 9381 }, { "epoch": 2.9, "learning_rate": 1.2363283555318496e-05, "loss": 2.7918, "step": 9382 }, { "epoch": 2.9, "learning_rate": 1.2327654496081266e-05, "loss": 2.5621, "step": 9383 }, { "epoch": 2.9, "learning_rate": 1.2292025436844036e-05, "loss": 2.5334, "step": 9384 }, { "epoch": 2.9, "learning_rate": 1.2256396377606808e-05, "loss": 2.2924, "step": 9385 }, { "epoch": 2.9, "learning_rate": 1.2220767318369578e-05, "loss": 2.544, "step": 9386 }, { "epoch": 2.9, "learning_rate": 1.218513825913235e-05, "loss": 2.4469, "step": 9387 }, { "epoch": 2.9, "learning_rate": 1.214950919989512e-05, "loss": 2.1778, "step": 9388 }, { "epoch": 2.9, "learning_rate": 1.211388014065789e-05, "loss": 2.2828, "step": 9389 }, { "epoch": 2.9, "learning_rate": 1.2078251081420663e-05, "loss": 2.2468, "step": 9390 }, { "epoch": 2.9, "learning_rate": 1.2042622022183433e-05, "loss": 2.098, "step": 9391 }, { "epoch": 2.9, "learning_rate": 1.2006992962946204e-05, "loss": 1.9191, "step": 9392 }, { "epoch": 2.9, "learning_rate": 1.1971363903708975e-05, "loss": 2.1173, "step": 9393 }, { "epoch": 2.9, "learning_rate": 1.1935734844471745e-05, "loss": 2.1061, "step": 9394 }, { "epoch": 2.9, "learning_rate": 1.1900105785234516e-05, "loss": 1.9389, "step": 9395 }, { "epoch": 2.9, "learning_rate": 1.1864476725997286e-05, "loss": 2.1359, "step": 9396 }, { "epoch": 2.9, "learning_rate": 1.1828847666760056e-05, "loss": 2.4053, "step": 9397 }, { "epoch": 2.9, "learning_rate": 1.179321860752283e-05, "loss": 2.0414, "step": 9398 }, { "epoch": 2.9, "learning_rate": 1.1757589548285601e-05, "loss": 2.3455, "step": 9399 }, { "epoch": 2.9, "learning_rate": 1.1721960489048371e-05, "loss": 1.9381, "step": 9400 }, { "epoch": 2.9, "learning_rate": 1.1686331429811141e-05, "loss": 1.8706, "step": 9401 }, { "epoch": 2.9, "learning_rate": 1.1650702370573913e-05, "loss": 2.0346, "step": 9402 }, { "epoch": 2.9, "learning_rate": 1.1615073311336683e-05, "loss": 1.8787, "step": 9403 }, { "epoch": 2.9, "learning_rate": 1.1579444252099453e-05, "loss": 1.7475, "step": 9404 }, { "epoch": 2.9, "learning_rate": 1.1543815192862225e-05, "loss": 1.8621, "step": 9405 }, { "epoch": 2.9, "learning_rate": 1.1508186133624995e-05, "loss": 1.6786, "step": 9406 }, { "epoch": 2.9, "learning_rate": 1.1472557074387768e-05, "loss": 1.7458, "step": 9407 }, { "epoch": 2.9, "learning_rate": 1.1436928015150538e-05, "loss": 1.822, "step": 9408 }, { "epoch": 2.9, "learning_rate": 1.140129895591331e-05, "loss": 1.6836, "step": 9409 }, { "epoch": 2.9, "learning_rate": 1.136566989667608e-05, "loss": 1.9154, "step": 9410 }, { "epoch": 2.9, "learning_rate": 1.133004083743885e-05, "loss": 2.0012, "step": 9411 }, { "epoch": 2.9, "learning_rate": 1.1294411778201622e-05, "loss": 1.5844, "step": 9412 }, { "epoch": 2.91, "learning_rate": 1.1258782718964392e-05, "loss": 1.3809, "step": 9413 }, { "epoch": 2.91, "learning_rate": 1.1223153659727162e-05, "loss": 1.5041, "step": 9414 }, { "epoch": 2.91, "learning_rate": 1.1187524600489935e-05, "loss": 1.3799, "step": 9415 }, { "epoch": 2.91, "learning_rate": 1.1151895541252705e-05, "loss": 1.3562, "step": 9416 }, { "epoch": 2.91, "learning_rate": 1.1116266482015477e-05, "loss": 1.4116, "step": 9417 }, { "epoch": 2.91, "learning_rate": 1.1080637422778247e-05, "loss": 1.4143, "step": 9418 }, { "epoch": 2.91, "learning_rate": 1.1045008363541017e-05, "loss": 1.3605, "step": 9419 }, { "epoch": 2.91, "learning_rate": 1.1009379304303789e-05, "loss": 1.4236, "step": 9420 }, { "epoch": 2.91, "learning_rate": 1.0973750245066559e-05, "loss": 1.3578, "step": 9421 }, { "epoch": 2.91, "learning_rate": 1.093812118582933e-05, "loss": 1.0433, "step": 9422 }, { "epoch": 2.91, "learning_rate": 1.0902492126592102e-05, "loss": 1.3, "step": 9423 }, { "epoch": 2.91, "learning_rate": 1.0866863067354874e-05, "loss": 1.2402, "step": 9424 }, { "epoch": 2.91, "learning_rate": 1.0831234008117644e-05, "loss": 1.0894, "step": 9425 }, { "epoch": 2.91, "learning_rate": 1.0795604948880414e-05, "loss": 1.0451, "step": 9426 }, { "epoch": 2.91, "learning_rate": 1.0759975889643186e-05, "loss": 0.987, "step": 9427 }, { "epoch": 2.91, "learning_rate": 1.0724346830405956e-05, "loss": 1.0492, "step": 9428 }, { "epoch": 2.91, "learning_rate": 1.0688717771168726e-05, "loss": 0.8768, "step": 9429 }, { "epoch": 2.91, "learning_rate": 1.0653088711931497e-05, "loss": 0.9437, "step": 9430 }, { "epoch": 2.91, "learning_rate": 1.061745965269427e-05, "loss": 3.0649, "step": 9431 }, { "epoch": 2.91, "learning_rate": 1.058183059345704e-05, "loss": 2.8014, "step": 9432 }, { "epoch": 2.91, "learning_rate": 1.054620153421981e-05, "loss": 2.5883, "step": 9433 }, { "epoch": 2.91, "learning_rate": 1.0510572474982582e-05, "loss": 2.6257, "step": 9434 }, { "epoch": 2.91, "learning_rate": 1.0474943415745352e-05, "loss": 2.6101, "step": 9435 }, { "epoch": 2.91, "learning_rate": 1.0439314356508122e-05, "loss": 2.2227, "step": 9436 }, { "epoch": 2.91, "learning_rate": 1.0403685297270894e-05, "loss": 2.5429, "step": 9437 }, { "epoch": 2.91, "learning_rate": 1.0368056238033664e-05, "loss": 2.2053, "step": 9438 }, { "epoch": 2.91, "learning_rate": 1.0332427178796438e-05, "loss": 2.0374, "step": 9439 }, { "epoch": 2.91, "learning_rate": 1.0296798119559208e-05, "loss": 2.0571, "step": 9440 }, { "epoch": 2.91, "learning_rate": 1.026116906032198e-05, "loss": 2.3049, "step": 9441 }, { "epoch": 2.91, "learning_rate": 1.022554000108475e-05, "loss": 2.296, "step": 9442 }, { "epoch": 2.91, "learning_rate": 1.018991094184752e-05, "loss": 2.0852, "step": 9443 }, { "epoch": 2.91, "learning_rate": 1.0154281882610291e-05, "loss": 2.061, "step": 9444 }, { "epoch": 2.92, "learning_rate": 1.0118652823373063e-05, "loss": 1.8929, "step": 9445 }, { "epoch": 2.92, "learning_rate": 1.0083023764135833e-05, "loss": 2.0834, "step": 9446 }, { "epoch": 2.92, "learning_rate": 1.0047394704898603e-05, "loss": 2.2027, "step": 9447 }, { "epoch": 2.92, "learning_rate": 1.0011765645661375e-05, "loss": 2.0275, "step": 9448 }, { "epoch": 2.92, "learning_rate": 9.976136586424145e-06, "loss": 2.2227, "step": 9449 }, { "epoch": 2.92, "learning_rate": 9.940507527186916e-06, "loss": 1.9459, "step": 9450 }, { "epoch": 2.92, "learning_rate": 9.904878467949686e-06, "loss": 2.1818, "step": 9451 }, { "epoch": 2.92, "learning_rate": 9.869249408712458e-06, "loss": 1.8763, "step": 9452 }, { "epoch": 2.92, "learning_rate": 9.833620349475228e-06, "loss": 1.9124, "step": 9453 }, { "epoch": 2.92, "learning_rate": 9.797991290238e-06, "loss": 2.0718, "step": 9454 }, { "epoch": 2.92, "learning_rate": 9.762362231000771e-06, "loss": 1.8463, "step": 9455 }, { "epoch": 2.92, "learning_rate": 9.726733171763541e-06, "loss": 1.6763, "step": 9456 }, { "epoch": 2.92, "learning_rate": 9.691104112526311e-06, "loss": 1.696, "step": 9457 }, { "epoch": 2.92, "learning_rate": 9.655475053289083e-06, "loss": 1.7865, "step": 9458 }, { "epoch": 2.92, "learning_rate": 9.619845994051855e-06, "loss": 1.9251, "step": 9459 }, { "epoch": 2.92, "learning_rate": 9.584216934814625e-06, "loss": 1.8367, "step": 9460 }, { "epoch": 2.92, "learning_rate": 9.548587875577395e-06, "loss": 1.7728, "step": 9461 }, { "epoch": 2.92, "learning_rate": 9.512958816340167e-06, "loss": 1.6027, "step": 9462 }, { "epoch": 2.92, "learning_rate": 9.477329757102938e-06, "loss": 1.799, "step": 9463 }, { "epoch": 2.92, "learning_rate": 9.441700697865708e-06, "loss": 1.6403, "step": 9464 }, { "epoch": 2.92, "learning_rate": 9.40607163862848e-06, "loss": 1.4852, "step": 9465 }, { "epoch": 2.92, "learning_rate": 9.370442579391252e-06, "loss": 1.4173, "step": 9466 }, { "epoch": 2.92, "learning_rate": 9.334813520154022e-06, "loss": 1.5533, "step": 9467 }, { "epoch": 2.92, "learning_rate": 9.299184460916792e-06, "loss": 1.3633, "step": 9468 }, { "epoch": 2.92, "learning_rate": 9.263555401679564e-06, "loss": 1.3083, "step": 9469 }, { "epoch": 2.92, "learning_rate": 9.227926342442335e-06, "loss": 1.308, "step": 9470 }, { "epoch": 2.92, "learning_rate": 9.192297283205105e-06, "loss": 1.0692, "step": 9471 }, { "epoch": 2.92, "learning_rate": 9.156668223967875e-06, "loss": 1.1021, "step": 9472 }, { "epoch": 2.92, "learning_rate": 9.121039164730647e-06, "loss": 1.2118, "step": 9473 }, { "epoch": 2.92, "learning_rate": 9.085410105493419e-06, "loss": 1.1548, "step": 9474 }, { "epoch": 2.92, "learning_rate": 9.049781046256189e-06, "loss": 1.1081, "step": 9475 }, { "epoch": 2.92, "learning_rate": 9.01415198701896e-06, "loss": 1.1504, "step": 9476 }, { "epoch": 2.92, "learning_rate": 8.97852292778173e-06, "loss": 0.9251, "step": 9477 }, { "epoch": 2.93, "learning_rate": 8.942893868544502e-06, "loss": 0.991, "step": 9478 }, { "epoch": 2.93, "learning_rate": 8.907264809307272e-06, "loss": 0.9884, "step": 9479 }, { "epoch": 2.93, "learning_rate": 8.871635750070044e-06, "loss": 0.9667, "step": 9480 }, { "epoch": 2.93, "learning_rate": 8.836006690832814e-06, "loss": 2.8228, "step": 9481 }, { "epoch": 2.93, "learning_rate": 8.800377631595586e-06, "loss": 3.1934, "step": 9482 }, { "epoch": 2.93, "learning_rate": 8.764748572358356e-06, "loss": 2.6028, "step": 9483 }, { "epoch": 2.93, "learning_rate": 8.729119513121127e-06, "loss": 2.3047, "step": 9484 }, { "epoch": 2.93, "learning_rate": 8.693490453883897e-06, "loss": 2.1804, "step": 9485 }, { "epoch": 2.93, "learning_rate": 8.657861394646669e-06, "loss": 2.2493, "step": 9486 }, { "epoch": 2.93, "learning_rate": 8.62223233540944e-06, "loss": 2.4625, "step": 9487 }, { "epoch": 2.93, "learning_rate": 8.58660327617221e-06, "loss": 2.3951, "step": 9488 }, { "epoch": 2.93, "learning_rate": 8.55097421693498e-06, "loss": 2.2439, "step": 9489 }, { "epoch": 2.93, "learning_rate": 8.515345157697753e-06, "loss": 2.2117, "step": 9490 }, { "epoch": 2.93, "learning_rate": 8.479716098460524e-06, "loss": 2.1491, "step": 9491 }, { "epoch": 2.93, "learning_rate": 8.444087039223294e-06, "loss": 1.9351, "step": 9492 }, { "epoch": 2.93, "learning_rate": 8.408457979986064e-06, "loss": 1.7992, "step": 9493 }, { "epoch": 2.93, "learning_rate": 8.372828920748836e-06, "loss": 2.0094, "step": 9494 }, { "epoch": 2.93, "learning_rate": 8.337199861511608e-06, "loss": 1.8854, "step": 9495 }, { "epoch": 2.93, "learning_rate": 8.301570802274378e-06, "loss": 1.9184, "step": 9496 }, { "epoch": 2.93, "learning_rate": 8.26594174303715e-06, "loss": 1.9433, "step": 9497 }, { "epoch": 2.93, "learning_rate": 8.23031268379992e-06, "loss": 1.7687, "step": 9498 }, { "epoch": 2.93, "learning_rate": 8.194683624562691e-06, "loss": 2.0878, "step": 9499 }, { "epoch": 2.93, "learning_rate": 8.159054565325461e-06, "loss": 1.9175, "step": 9500 }, { "epoch": 2.93, "eval_bleu": 1.5827445667544166e-18, "eval_loss": 3.0510671138763428, "eval_runtime": 2619.9089, "eval_samples_per_second": 5.634, "eval_steps_per_second": 0.704, "step": 9500 }, { "epoch": 2.93, "learning_rate": 8.123425506088233e-06, "loss": 1.9421, "step": 9501 }, { "epoch": 2.93, "learning_rate": 8.087796446851003e-06, "loss": 1.6852, "step": 9502 }, { "epoch": 2.93, "learning_rate": 8.052167387613775e-06, "loss": 1.9491, "step": 9503 }, { "epoch": 2.93, "learning_rate": 8.016538328376545e-06, "loss": 1.9421, "step": 9504 }, { "epoch": 2.93, "learning_rate": 7.980909269139316e-06, "loss": 1.9617, "step": 9505 }, { "epoch": 2.93, "learning_rate": 7.945280209902086e-06, "loss": 1.8439, "step": 9506 }, { "epoch": 2.93, "learning_rate": 7.909651150664858e-06, "loss": 1.6839, "step": 9507 }, { "epoch": 2.93, "learning_rate": 7.87402209142763e-06, "loss": 1.639, "step": 9508 }, { "epoch": 2.93, "learning_rate": 7.8383930321904e-06, "loss": 1.5324, "step": 9509 }, { "epoch": 2.94, "learning_rate": 7.80276397295317e-06, "loss": 1.7009, "step": 9510 }, { "epoch": 2.94, "learning_rate": 7.767134913715942e-06, "loss": 1.4478, "step": 9511 }, { "epoch": 2.94, "learning_rate": 7.731505854478713e-06, "loss": 1.6354, "step": 9512 }, { "epoch": 2.94, "learning_rate": 7.695876795241483e-06, "loss": 1.532, "step": 9513 }, { "epoch": 2.94, "learning_rate": 7.660247736004253e-06, "loss": 1.4991, "step": 9514 }, { "epoch": 2.94, "learning_rate": 7.624618676767026e-06, "loss": 1.5178, "step": 9515 }, { "epoch": 2.94, "learning_rate": 7.588989617529797e-06, "loss": 1.5689, "step": 9516 }, { "epoch": 2.94, "learning_rate": 7.553360558292567e-06, "loss": 1.5031, "step": 9517 }, { "epoch": 2.94, "learning_rate": 7.5177314990553376e-06, "loss": 1.6065, "step": 9518 }, { "epoch": 2.94, "learning_rate": 7.482102439818109e-06, "loss": 1.5157, "step": 9519 }, { "epoch": 2.94, "learning_rate": 7.44647338058088e-06, "loss": 1.2304, "step": 9520 }, { "epoch": 2.94, "learning_rate": 7.410844321343651e-06, "loss": 1.1117, "step": 9521 }, { "epoch": 2.94, "learning_rate": 7.375215262106421e-06, "loss": 1.3179, "step": 9522 }, { "epoch": 2.94, "learning_rate": 7.339586202869193e-06, "loss": 1.1632, "step": 9523 }, { "epoch": 2.94, "learning_rate": 7.303957143631964e-06, "loss": 0.9703, "step": 9524 }, { "epoch": 2.94, "learning_rate": 7.2683280843947345e-06, "loss": 1.1412, "step": 9525 }, { "epoch": 2.94, "learning_rate": 7.232699025157505e-06, "loss": 0.8973, "step": 9526 }, { "epoch": 2.94, "learning_rate": 7.197069965920277e-06, "loss": 0.927, "step": 9527 }, { "epoch": 2.94, "learning_rate": 7.161440906683047e-06, "loss": 1.0374, "step": 9528 }, { "epoch": 2.94, "learning_rate": 7.125811847445818e-06, "loss": 0.9249, "step": 9529 }, { "epoch": 2.94, "learning_rate": 7.090182788208589e-06, "loss": 1.0137, "step": 9530 }, { "epoch": 2.94, "learning_rate": 7.0545537289713605e-06, "loss": 3.1992, "step": 9531 }, { "epoch": 2.94, "learning_rate": 7.018924669734131e-06, "loss": 2.955, "step": 9532 }, { "epoch": 2.94, "learning_rate": 6.983295610496901e-06, "loss": 2.4761, "step": 9533 }, { "epoch": 2.94, "learning_rate": 6.947666551259672e-06, "loss": 2.6519, "step": 9534 }, { "epoch": 2.94, "learning_rate": 6.912037492022444e-06, "loss": 2.3562, "step": 9535 }, { "epoch": 2.94, "learning_rate": 6.876408432785215e-06, "loss": 2.318, "step": 9536 }, { "epoch": 2.94, "learning_rate": 6.840779373547986e-06, "loss": 2.2666, "step": 9537 }, { "epoch": 2.94, "learning_rate": 6.805150314310756e-06, "loss": 2.4338, "step": 9538 }, { "epoch": 2.94, "learning_rate": 6.769521255073527e-06, "loss": 1.9336, "step": 9539 }, { "epoch": 2.94, "learning_rate": 6.733892195836298e-06, "loss": 2.0247, "step": 9540 }, { "epoch": 2.94, "learning_rate": 6.698263136599069e-06, "loss": 2.2633, "step": 9541 }, { "epoch": 2.94, "learning_rate": 6.66263407736184e-06, "loss": 2.2944, "step": 9542 }, { "epoch": 2.95, "learning_rate": 6.62700501812461e-06, "loss": 2.0059, "step": 9543 }, { "epoch": 2.95, "learning_rate": 6.591375958887382e-06, "loss": 1.8876, "step": 9544 }, { "epoch": 2.95, "learning_rate": 6.555746899650153e-06, "loss": 2.0158, "step": 9545 }, { "epoch": 2.95, "learning_rate": 6.5201178404129234e-06, "loss": 1.8958, "step": 9546 }, { "epoch": 2.95, "learning_rate": 6.484488781175694e-06, "loss": 1.8154, "step": 9547 }, { "epoch": 2.95, "learning_rate": 6.448859721938466e-06, "loss": 2.0031, "step": 9548 }, { "epoch": 2.95, "learning_rate": 6.413230662701236e-06, "loss": 1.9046, "step": 9549 }, { "epoch": 2.95, "learning_rate": 6.377601603464007e-06, "loss": 1.7662, "step": 9550 }, { "epoch": 2.95, "learning_rate": 6.341972544226778e-06, "loss": 1.8266, "step": 9551 }, { "epoch": 2.95, "learning_rate": 6.3063434849895495e-06, "loss": 1.9874, "step": 9552 }, { "epoch": 2.95, "learning_rate": 6.27071442575232e-06, "loss": 1.9894, "step": 9553 }, { "epoch": 2.95, "learning_rate": 6.23508536651509e-06, "loss": 1.9142, "step": 9554 }, { "epoch": 2.95, "learning_rate": 6.199456307277861e-06, "loss": 1.9071, "step": 9555 }, { "epoch": 2.95, "learning_rate": 6.163827248040633e-06, "loss": 1.5023, "step": 9556 }, { "epoch": 2.95, "learning_rate": 6.128198188803404e-06, "loss": 1.9704, "step": 9557 }, { "epoch": 2.95, "learning_rate": 6.092569129566175e-06, "loss": 1.711, "step": 9558 }, { "epoch": 2.95, "learning_rate": 6.056940070328945e-06, "loss": 1.494, "step": 9559 }, { "epoch": 2.95, "learning_rate": 6.021311011091716e-06, "loss": 1.5974, "step": 9560 }, { "epoch": 2.95, "learning_rate": 5.985681951854487e-06, "loss": 1.3329, "step": 9561 }, { "epoch": 2.95, "learning_rate": 5.950052892617258e-06, "loss": 1.7576, "step": 9562 }, { "epoch": 2.95, "learning_rate": 5.914423833380028e-06, "loss": 1.4196, "step": 9563 }, { "epoch": 2.95, "learning_rate": 5.878794774142801e-06, "loss": 1.5928, "step": 9564 }, { "epoch": 2.95, "learning_rate": 5.843165714905571e-06, "loss": 1.5253, "step": 9565 }, { "epoch": 2.95, "learning_rate": 5.8075366556683416e-06, "loss": 1.2918, "step": 9566 }, { "epoch": 2.95, "learning_rate": 5.7719075964311124e-06, "loss": 1.3825, "step": 9567 }, { "epoch": 2.95, "learning_rate": 5.736278537193884e-06, "loss": 1.2231, "step": 9568 }, { "epoch": 2.95, "learning_rate": 5.700649477956655e-06, "loss": 1.3448, "step": 9569 }, { "epoch": 2.95, "learning_rate": 5.665020418719425e-06, "loss": 1.2047, "step": 9570 }, { "epoch": 2.95, "learning_rate": 5.629391359482196e-06, "loss": 1.2959, "step": 9571 }, { "epoch": 2.95, "learning_rate": 5.593762300244968e-06, "loss": 1.1479, "step": 9572 }, { "epoch": 2.95, "learning_rate": 5.5581332410077385e-06, "loss": 1.0769, "step": 9573 }, { "epoch": 2.95, "learning_rate": 5.5225041817705085e-06, "loss": 1.005, "step": 9574 }, { "epoch": 2.96, "learning_rate": 5.486875122533279e-06, "loss": 0.9161, "step": 9575 }, { "epoch": 2.96, "learning_rate": 5.451246063296051e-06, "loss": 1.1845, "step": 9576 }, { "epoch": 2.96, "learning_rate": 5.415617004058822e-06, "loss": 1.1245, "step": 9577 }, { "epoch": 2.96, "learning_rate": 5.379987944821593e-06, "loss": 1.0074, "step": 9578 }, { "epoch": 2.96, "learning_rate": 5.344358885584363e-06, "loss": 0.777, "step": 9579 }, { "epoch": 2.96, "learning_rate": 5.308729826347135e-06, "loss": 0.9761, "step": 9580 }, { "epoch": 2.96, "learning_rate": 5.273100767109905e-06, "loss": 3.0781, "step": 9581 }, { "epoch": 2.96, "learning_rate": 5.237471707872676e-06, "loss": 2.7149, "step": 9582 }, { "epoch": 2.96, "learning_rate": 5.201842648635447e-06, "loss": 2.4177, "step": 9583 }, { "epoch": 2.96, "learning_rate": 5.166213589398219e-06, "loss": 2.3557, "step": 9584 }, { "epoch": 2.96, "learning_rate": 5.13058453016099e-06, "loss": 2.3639, "step": 9585 }, { "epoch": 2.96, "learning_rate": 5.09495547092376e-06, "loss": 2.3823, "step": 9586 }, { "epoch": 2.96, "learning_rate": 5.059326411686531e-06, "loss": 2.4537, "step": 9587 }, { "epoch": 2.96, "learning_rate": 5.023697352449301e-06, "loss": 2.3316, "step": 9588 }, { "epoch": 2.96, "learning_rate": 4.988068293212072e-06, "loss": 2.0534, "step": 9589 }, { "epoch": 2.96, "learning_rate": 4.952439233974843e-06, "loss": 2.3118, "step": 9590 }, { "epoch": 2.96, "learning_rate": 4.916810174737614e-06, "loss": 2.2839, "step": 9591 }, { "epoch": 2.96, "learning_rate": 4.881181115500386e-06, "loss": 2.0061, "step": 9592 }, { "epoch": 2.96, "learning_rate": 4.845552056263156e-06, "loss": 2.0694, "step": 9593 }, { "epoch": 2.96, "learning_rate": 4.8099229970259275e-06, "loss": 2.2367, "step": 9594 }, { "epoch": 2.96, "learning_rate": 4.7742939377886975e-06, "loss": 2.2238, "step": 9595 }, { "epoch": 2.96, "learning_rate": 4.738664878551469e-06, "loss": 2.1866, "step": 9596 }, { "epoch": 2.96, "learning_rate": 4.70303581931424e-06, "loss": 2.189, "step": 9597 }, { "epoch": 2.96, "learning_rate": 4.667406760077011e-06, "loss": 1.9049, "step": 9598 }, { "epoch": 2.96, "learning_rate": 4.631777700839782e-06, "loss": 1.9036, "step": 9599 }, { "epoch": 2.96, "learning_rate": 4.596148641602553e-06, "loss": 2.2067, "step": 9600 }, { "epoch": 2.96, "learning_rate": 4.5605195823653235e-06, "loss": 1.7287, "step": 9601 }, { "epoch": 2.96, "learning_rate": 4.524890523128094e-06, "loss": 1.8241, "step": 9602 }, { "epoch": 2.96, "learning_rate": 4.489261463890865e-06, "loss": 2.1436, "step": 9603 }, { "epoch": 2.96, "learning_rate": 4.453632404653636e-06, "loss": 1.7473, "step": 9604 }, { "epoch": 2.96, "learning_rate": 4.418003345416407e-06, "loss": 1.5794, "step": 9605 }, { "epoch": 2.96, "learning_rate": 4.382374286179178e-06, "loss": 1.7757, "step": 9606 }, { "epoch": 2.97, "learning_rate": 4.346745226941949e-06, "loss": 1.7814, "step": 9607 }, { "epoch": 2.97, "learning_rate": 4.31111616770472e-06, "loss": 1.9176, "step": 9608 }, { "epoch": 2.97, "learning_rate": 4.27548710846749e-06, "loss": 1.5518, "step": 9609 }, { "epoch": 2.97, "learning_rate": 4.239858049230262e-06, "loss": 1.6683, "step": 9610 }, { "epoch": 2.97, "learning_rate": 4.204228989993032e-06, "loss": 1.6852, "step": 9611 }, { "epoch": 2.97, "learning_rate": 4.168599930755804e-06, "loss": 1.5382, "step": 9612 }, { "epoch": 2.97, "learning_rate": 4.132970871518575e-06, "loss": 1.5865, "step": 9613 }, { "epoch": 2.97, "learning_rate": 4.0973418122813456e-06, "loss": 1.5392, "step": 9614 }, { "epoch": 2.97, "learning_rate": 4.0617127530441164e-06, "loss": 1.6195, "step": 9615 }, { "epoch": 2.97, "learning_rate": 4.026083693806887e-06, "loss": 1.2314, "step": 9616 }, { "epoch": 2.97, "learning_rate": 3.990454634569658e-06, "loss": 1.4378, "step": 9617 }, { "epoch": 2.97, "learning_rate": 3.954825575332429e-06, "loss": 1.3567, "step": 9618 }, { "epoch": 2.97, "learning_rate": 3.9191965160952e-06, "loss": 1.2381, "step": 9619 }, { "epoch": 2.97, "learning_rate": 3.883567456857971e-06, "loss": 1.1308, "step": 9620 }, { "epoch": 2.97, "learning_rate": 3.847938397620742e-06, "loss": 1.4152, "step": 9621 }, { "epoch": 2.97, "learning_rate": 3.812309338383513e-06, "loss": 1.4504, "step": 9622 }, { "epoch": 2.97, "learning_rate": 3.7766802791462834e-06, "loss": 1.0987, "step": 9623 }, { "epoch": 2.97, "learning_rate": 3.7410512199090546e-06, "loss": 1.221, "step": 9624 }, { "epoch": 2.97, "learning_rate": 3.7054221606718255e-06, "loss": 1.0713, "step": 9625 }, { "epoch": 2.97, "learning_rate": 3.6697931014345964e-06, "loss": 1.0441, "step": 9626 }, { "epoch": 2.97, "learning_rate": 3.6341640421973672e-06, "loss": 0.8949, "step": 9627 }, { "epoch": 2.97, "learning_rate": 3.5985349829601385e-06, "loss": 0.9717, "step": 9628 }, { "epoch": 2.97, "learning_rate": 3.562905923722909e-06, "loss": 0.8657, "step": 9629 }, { "epoch": 2.97, "learning_rate": 3.5272768644856802e-06, "loss": 0.836, "step": 9630 }, { "epoch": 2.97, "learning_rate": 3.4916478052484507e-06, "loss": 3.2072, "step": 9631 }, { "epoch": 2.97, "learning_rate": 3.456018746011222e-06, "loss": 2.8631, "step": 9632 }, { "epoch": 2.97, "learning_rate": 3.420389686773993e-06, "loss": 2.7104, "step": 9633 }, { "epoch": 2.97, "learning_rate": 3.3847606275367637e-06, "loss": 2.6306, "step": 9634 }, { "epoch": 2.97, "learning_rate": 3.3491315682995346e-06, "loss": 2.8402, "step": 9635 }, { "epoch": 2.97, "learning_rate": 3.313502509062305e-06, "loss": 2.2221, "step": 9636 }, { "epoch": 2.97, "learning_rate": 3.2778734498250763e-06, "loss": 2.3995, "step": 9637 }, { "epoch": 2.97, "learning_rate": 3.242244390587847e-06, "loss": 2.1021, "step": 9638 }, { "epoch": 2.97, "learning_rate": 3.206615331350618e-06, "loss": 2.1753, "step": 9639 }, { "epoch": 2.98, "learning_rate": 3.170986272113389e-06, "loss": 2.2605, "step": 9640 }, { "epoch": 2.98, "learning_rate": 3.13535721287616e-06, "loss": 2.2511, "step": 9641 }, { "epoch": 2.98, "learning_rate": 3.0997281536389306e-06, "loss": 2.2129, "step": 9642 }, { "epoch": 2.98, "learning_rate": 3.064099094401702e-06, "loss": 2.0257, "step": 9643 }, { "epoch": 2.98, "learning_rate": 3.0284700351644723e-06, "loss": 2.0224, "step": 9644 }, { "epoch": 2.98, "learning_rate": 2.9928409759272436e-06, "loss": 1.9464, "step": 9645 }, { "epoch": 2.98, "learning_rate": 2.957211916690014e-06, "loss": 1.921, "step": 9646 }, { "epoch": 2.98, "learning_rate": 2.9215828574527854e-06, "loss": 1.9435, "step": 9647 }, { "epoch": 2.98, "learning_rate": 2.8859537982155562e-06, "loss": 2.2296, "step": 9648 }, { "epoch": 2.98, "learning_rate": 2.8503247389783275e-06, "loss": 1.7179, "step": 9649 }, { "epoch": 2.98, "learning_rate": 2.814695679741098e-06, "loss": 2.0163, "step": 9650 }, { "epoch": 2.98, "learning_rate": 2.7790666205038692e-06, "loss": 1.8746, "step": 9651 }, { "epoch": 2.98, "learning_rate": 2.7434375612666397e-06, "loss": 2.1264, "step": 9652 }, { "epoch": 2.98, "learning_rate": 2.707808502029411e-06, "loss": 1.6816, "step": 9653 }, { "epoch": 2.98, "learning_rate": 2.6721794427921814e-06, "loss": 1.9046, "step": 9654 }, { "epoch": 2.98, "learning_rate": 2.6365503835549527e-06, "loss": 1.7281, "step": 9655 }, { "epoch": 2.98, "learning_rate": 2.6009213243177236e-06, "loss": 1.7914, "step": 9656 }, { "epoch": 2.98, "learning_rate": 2.565292265080495e-06, "loss": 1.5076, "step": 9657 }, { "epoch": 2.98, "learning_rate": 2.5296632058432657e-06, "loss": 1.726, "step": 9658 }, { "epoch": 2.98, "learning_rate": 2.494034146606036e-06, "loss": 1.8808, "step": 9659 }, { "epoch": 2.98, "learning_rate": 2.458405087368807e-06, "loss": 1.754, "step": 9660 }, { "epoch": 2.98, "learning_rate": 2.422776028131578e-06, "loss": 1.7944, "step": 9661 }, { "epoch": 2.98, "learning_rate": 2.3871469688943487e-06, "loss": 1.672, "step": 9662 }, { "epoch": 2.98, "learning_rate": 2.35151790965712e-06, "loss": 1.3362, "step": 9663 }, { "epoch": 2.98, "learning_rate": 2.315888850419891e-06, "loss": 1.6027, "step": 9664 }, { "epoch": 2.98, "learning_rate": 2.2802597911826617e-06, "loss": 1.46, "step": 9665 }, { "epoch": 2.98, "learning_rate": 2.2446307319454326e-06, "loss": 1.4726, "step": 9666 }, { "epoch": 2.98, "learning_rate": 2.2090016727082035e-06, "loss": 1.4147, "step": 9667 }, { "epoch": 2.98, "learning_rate": 2.1733726134709743e-06, "loss": 1.3789, "step": 9668 }, { "epoch": 2.98, "learning_rate": 2.137743554233745e-06, "loss": 1.4164, "step": 9669 }, { "epoch": 2.98, "learning_rate": 2.102114494996516e-06, "loss": 1.2731, "step": 9670 }, { "epoch": 2.98, "learning_rate": 2.0664854357592874e-06, "loss": 1.1165, "step": 9671 }, { "epoch": 2.99, "learning_rate": 2.0308563765220582e-06, "loss": 1.0977, "step": 9672 }, { "epoch": 2.99, "learning_rate": 1.995227317284829e-06, "loss": 1.0438, "step": 9673 }, { "epoch": 2.99, "learning_rate": 1.9595982580476e-06, "loss": 1.0962, "step": 9674 }, { "epoch": 2.99, "learning_rate": 1.923969198810371e-06, "loss": 1.0614, "step": 9675 }, { "epoch": 2.99, "learning_rate": 1.8883401395731417e-06, "loss": 0.9073, "step": 9676 }, { "epoch": 2.99, "learning_rate": 1.8527110803359128e-06, "loss": 0.9881, "step": 9677 }, { "epoch": 2.99, "learning_rate": 1.8170820210986836e-06, "loss": 0.9171, "step": 9678 }, { "epoch": 2.99, "learning_rate": 1.7814529618614545e-06, "loss": 1.0174, "step": 9679 }, { "epoch": 2.99, "learning_rate": 1.7458239026242253e-06, "loss": 0.9757, "step": 9680 }, { "epoch": 2.99, "learning_rate": 1.7101948433869964e-06, "loss": 3.1418, "step": 9681 }, { "epoch": 2.99, "learning_rate": 1.6745657841497673e-06, "loss": 2.6657, "step": 9682 }, { "epoch": 2.99, "learning_rate": 1.6389367249125381e-06, "loss": 2.3569, "step": 9683 }, { "epoch": 2.99, "learning_rate": 1.603307665675309e-06, "loss": 2.6663, "step": 9684 }, { "epoch": 2.99, "learning_rate": 1.56767860643808e-06, "loss": 2.4925, "step": 9685 }, { "epoch": 2.99, "learning_rate": 1.532049547200851e-06, "loss": 2.1699, "step": 9686 }, { "epoch": 2.99, "learning_rate": 1.4964204879636218e-06, "loss": 2.2108, "step": 9687 }, { "epoch": 2.99, "learning_rate": 1.4607914287263927e-06, "loss": 2.2998, "step": 9688 }, { "epoch": 2.99, "learning_rate": 1.4251623694891638e-06, "loss": 2.2607, "step": 9689 }, { "epoch": 2.99, "learning_rate": 1.3895333102519346e-06, "loss": 2.1735, "step": 9690 }, { "epoch": 2.99, "learning_rate": 1.3539042510147055e-06, "loss": 2.2706, "step": 9691 }, { "epoch": 2.99, "learning_rate": 1.3182751917774763e-06, "loss": 1.9037, "step": 9692 }, { "epoch": 2.99, "learning_rate": 1.2826461325402474e-06, "loss": 1.8137, "step": 9693 }, { "epoch": 2.99, "learning_rate": 1.247017073303018e-06, "loss": 2.0697, "step": 9694 }, { "epoch": 2.99, "learning_rate": 1.211388014065789e-06, "loss": 2.0527, "step": 9695 }, { "epoch": 2.99, "learning_rate": 1.17575895482856e-06, "loss": 1.8727, "step": 9696 }, { "epoch": 2.99, "learning_rate": 1.1401298955913309e-06, "loss": 2.0334, "step": 9697 }, { "epoch": 2.99, "learning_rate": 1.1045008363541017e-06, "loss": 2.0171, "step": 9698 }, { "epoch": 2.99, "learning_rate": 1.0688717771168726e-06, "loss": 1.896, "step": 9699 }, { "epoch": 2.99, "learning_rate": 1.0332427178796437e-06, "loss": 1.8235, "step": 9700 }, { "epoch": 2.99, "learning_rate": 9.976136586424145e-07, "loss": 1.7797, "step": 9701 }, { "epoch": 2.99, "learning_rate": 9.619845994051854e-07, "loss": 2.0114, "step": 9702 }, { "epoch": 2.99, "learning_rate": 9.263555401679564e-07, "loss": 1.636, "step": 9703 }, { "epoch": 2.99, "learning_rate": 8.907264809307272e-07, "loss": 1.653, "step": 9704 }, { "epoch": 3.0, "learning_rate": 8.550974216934982e-07, "loss": 1.5532, "step": 9705 }, { "epoch": 3.0, "learning_rate": 8.194683624562691e-07, "loss": 1.6229, "step": 9706 }, { "epoch": 3.0, "learning_rate": 7.8383930321904e-07, "loss": 1.5868, "step": 9707 }, { "epoch": 3.0, "learning_rate": 7.482102439818109e-07, "loss": 1.5558, "step": 9708 }, { "epoch": 3.0, "learning_rate": 7.125811847445819e-07, "loss": 1.3796, "step": 9709 }, { "epoch": 3.0, "learning_rate": 6.769521255073527e-07, "loss": 1.3825, "step": 9710 }, { "epoch": 3.0, "learning_rate": 6.413230662701237e-07, "loss": 1.3212, "step": 9711 }, { "epoch": 3.0, "learning_rate": 6.056940070328945e-07, "loss": 1.2322, "step": 9712 }, { "epoch": 3.0, "learning_rate": 5.700649477956654e-07, "loss": 1.1678, "step": 9713 }, { "epoch": 3.0, "learning_rate": 5.344358885584363e-07, "loss": 1.2295, "step": 9714 }, { "epoch": 3.0, "learning_rate": 4.988068293212073e-07, "loss": 0.932, "step": 9715 }, { "epoch": 3.0, "learning_rate": 4.631777700839782e-07, "loss": 1.0625, "step": 9716 }, { "epoch": 3.0, "learning_rate": 4.275487108467491e-07, "loss": 0.994, "step": 9717 }, { "epoch": 3.0, "learning_rate": 3.9191965160952e-07, "loss": 1.0231, "step": 9718 }, { "epoch": 3.0, "learning_rate": 3.5629059237229094e-07, "loss": 0.9733, "step": 9719 }, { "epoch": 3.0, "learning_rate": 3.2066153313506185e-07, "loss": 0.9251, "step": 9720 }, { "epoch": 3.0, "step": 9720, "total_flos": 0.0, "train_loss": 3.260483605049765, "train_runtime": 93795.5595, "train_samples_per_second": 6.633, "train_steps_per_second": 0.104 }, { "epoch": 3.0, "step": 9720, "total_flos": 0.0, "train_loss": 0.0, "train_runtime": 15.9016, "train_samples_per_second": 39123.207, "train_steps_per_second": 611.259 }, { "epoch": 3.0, "step": 9720, "total_flos": 0.0, "train_loss": 0.0, "train_runtime": 8.7447, "train_samples_per_second": 71142.69, "train_steps_per_second": 1111.529 }, { "epoch": 3.0, "step": 9720, "total_flos": 0.0, "train_loss": 0.0, "train_runtime": 8.4854, "train_samples_per_second": 73316.525, "train_steps_per_second": 1145.493 } ], "max_steps": 9720, "num_train_epochs": 3, "total_flos": 0.0, "trial_name": null, "trial_params": null }