{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9996966939642099, "eval_steps": 500, "global_step": 3296, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.0101010101010103e-06, "loss": 1.8633, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.0202020202020206e-06, "loss": 1.9231, "step": 2 }, { "epoch": 0.0, "learning_rate": 3.0303030303030305e-06, "loss": 1.9055, "step": 3 }, { "epoch": 0.0, "learning_rate": 4.040404040404041e-06, "loss": 1.9633, "step": 4 }, { "epoch": 0.0, "learning_rate": 5.050505050505051e-06, "loss": 1.8808, "step": 5 }, { "epoch": 0.0, "learning_rate": 6.060606060606061e-06, "loss": 1.909, "step": 6 }, { "epoch": 0.0, "learning_rate": 7.0707070707070704e-06, "loss": 1.8316, "step": 7 }, { "epoch": 0.0, "learning_rate": 8.080808080808082e-06, "loss": 1.9861, "step": 8 }, { "epoch": 0.0, "learning_rate": 9.090909090909091e-06, "loss": 1.9275, "step": 9 }, { "epoch": 0.0, "learning_rate": 1.0101010101010101e-05, "loss": 1.9124, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.1111111111111112e-05, "loss": 1.8939, "step": 11 }, { "epoch": 0.0, "learning_rate": 1.2121212121212122e-05, "loss": 2.0385, "step": 12 }, { "epoch": 0.0, "learning_rate": 1.3131313131313134e-05, "loss": 2.0347, "step": 13 }, { "epoch": 0.0, "learning_rate": 1.4141414141414141e-05, "loss": 1.8868, "step": 14 }, { "epoch": 0.0, "learning_rate": 1.5151515151515153e-05, "loss": 1.8613, "step": 15 }, { "epoch": 0.0, "learning_rate": 1.6161616161616165e-05, "loss": 1.8848, "step": 16 }, { "epoch": 0.01, "learning_rate": 1.7171717171717173e-05, "loss": 1.9092, "step": 17 }, { "epoch": 0.01, "learning_rate": 1.8181818181818182e-05, "loss": 1.6739, "step": 18 }, { "epoch": 0.01, "learning_rate": 1.919191919191919e-05, "loss": 1.988, "step": 19 }, { "epoch": 0.01, "learning_rate": 2.0202020202020203e-05, "loss": 1.8289, "step": 20 }, { "epoch": 0.01, "learning_rate": 2.1212121212121215e-05, "loss": 1.7232, "step": 21 }, { "epoch": 0.01, "learning_rate": 2.2222222222222223e-05, "loss": 1.8513, "step": 22 }, { "epoch": 0.01, "learning_rate": 2.3232323232323232e-05, "loss": 2.014, "step": 23 }, { "epoch": 0.01, "learning_rate": 2.4242424242424244e-05, "loss": 1.8007, "step": 24 }, { "epoch": 0.01, "learning_rate": 2.5252525252525256e-05, "loss": 1.9294, "step": 25 }, { "epoch": 0.01, "learning_rate": 2.6262626262626268e-05, "loss": 1.9203, "step": 26 }, { "epoch": 0.01, "learning_rate": 2.7272727272727273e-05, "loss": 1.9148, "step": 27 }, { "epoch": 0.01, "learning_rate": 2.8282828282828282e-05, "loss": 1.8395, "step": 28 }, { "epoch": 0.01, "learning_rate": 2.9292929292929294e-05, "loss": 1.835, "step": 29 }, { "epoch": 0.01, "learning_rate": 3.0303030303030306e-05, "loss": 1.7653, "step": 30 }, { "epoch": 0.01, "learning_rate": 3.131313131313132e-05, "loss": 1.6854, "step": 31 }, { "epoch": 0.01, "learning_rate": 3.232323232323233e-05, "loss": 1.88, "step": 32 }, { "epoch": 0.01, "learning_rate": 3.3333333333333335e-05, "loss": 1.7785, "step": 33 }, { "epoch": 0.01, "learning_rate": 3.434343434343435e-05, "loss": 1.752, "step": 34 }, { "epoch": 0.01, "learning_rate": 3.535353535353535e-05, "loss": 1.7532, "step": 35 }, { "epoch": 0.01, "learning_rate": 3.6363636363636364e-05, "loss": 1.7821, "step": 36 }, { "epoch": 0.01, "learning_rate": 3.7373737373737376e-05, "loss": 1.8982, "step": 37 }, { "epoch": 0.01, "learning_rate": 3.838383838383838e-05, "loss": 1.6377, "step": 38 }, { "epoch": 0.01, "learning_rate": 3.939393939393939e-05, "loss": 1.7507, "step": 39 }, { "epoch": 0.01, "learning_rate": 4.0404040404040405e-05, "loss": 1.7987, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.141414141414142e-05, "loss": 1.8028, "step": 41 }, { "epoch": 0.01, "learning_rate": 4.242424242424243e-05, "loss": 1.6686, "step": 42 }, { "epoch": 0.01, "learning_rate": 4.343434343434344e-05, "loss": 1.6383, "step": 43 }, { "epoch": 0.01, "learning_rate": 4.4444444444444447e-05, "loss": 1.6653, "step": 44 }, { "epoch": 0.01, "learning_rate": 4.545454545454546e-05, "loss": 1.6997, "step": 45 }, { "epoch": 0.01, "learning_rate": 4.6464646464646464e-05, "loss": 1.6495, "step": 46 }, { "epoch": 0.01, "learning_rate": 4.7474747474747476e-05, "loss": 1.5897, "step": 47 }, { "epoch": 0.01, "learning_rate": 4.848484848484849e-05, "loss": 1.6288, "step": 48 }, { "epoch": 0.01, "learning_rate": 4.94949494949495e-05, "loss": 1.6632, "step": 49 }, { "epoch": 0.02, "learning_rate": 5.050505050505051e-05, "loss": 1.6594, "step": 50 }, { "epoch": 0.02, "learning_rate": 5.151515151515152e-05, "loss": 1.6156, "step": 51 }, { "epoch": 0.02, "learning_rate": 5.2525252525252536e-05, "loss": 1.5707, "step": 52 }, { "epoch": 0.02, "learning_rate": 5.353535353535354e-05, "loss": 1.5728, "step": 53 }, { "epoch": 0.02, "learning_rate": 5.4545454545454546e-05, "loss": 1.5207, "step": 54 }, { "epoch": 0.02, "learning_rate": 5.555555555555556e-05, "loss": 1.5462, "step": 55 }, { "epoch": 0.02, "learning_rate": 5.6565656565656563e-05, "loss": 1.6467, "step": 56 }, { "epoch": 0.02, "learning_rate": 5.757575757575758e-05, "loss": 1.5504, "step": 57 }, { "epoch": 0.02, "learning_rate": 5.858585858585859e-05, "loss": 1.459, "step": 58 }, { "epoch": 0.02, "learning_rate": 5.959595959595959e-05, "loss": 1.5515, "step": 59 }, { "epoch": 0.02, "learning_rate": 6.060606060606061e-05, "loss": 1.5928, "step": 60 }, { "epoch": 0.02, "learning_rate": 6.161616161616162e-05, "loss": 1.5198, "step": 61 }, { "epoch": 0.02, "learning_rate": 6.262626262626264e-05, "loss": 1.4452, "step": 62 }, { "epoch": 0.02, "learning_rate": 6.363636363636364e-05, "loss": 1.5087, "step": 63 }, { "epoch": 0.02, "learning_rate": 6.464646464646466e-05, "loss": 1.4231, "step": 64 }, { "epoch": 0.02, "learning_rate": 6.565656565656566e-05, "loss": 1.5227, "step": 65 }, { "epoch": 0.02, "learning_rate": 6.666666666666667e-05, "loss": 1.3923, "step": 66 }, { "epoch": 0.02, "learning_rate": 6.767676767676769e-05, "loss": 1.5338, "step": 67 }, { "epoch": 0.02, "learning_rate": 6.86868686868687e-05, "loss": 1.4982, "step": 68 }, { "epoch": 0.02, "learning_rate": 6.96969696969697e-05, "loss": 1.3882, "step": 69 }, { "epoch": 0.02, "learning_rate": 7.07070707070707e-05, "loss": 1.4886, "step": 70 }, { "epoch": 0.02, "learning_rate": 7.171717171717171e-05, "loss": 1.423, "step": 71 }, { "epoch": 0.02, "learning_rate": 7.272727272727273e-05, "loss": 1.3788, "step": 72 }, { "epoch": 0.02, "learning_rate": 7.373737373737373e-05, "loss": 1.4167, "step": 73 }, { "epoch": 0.02, "learning_rate": 7.474747474747475e-05, "loss": 1.3068, "step": 74 }, { "epoch": 0.02, "learning_rate": 7.575757575757576e-05, "loss": 1.3894, "step": 75 }, { "epoch": 0.02, "learning_rate": 7.676767676767676e-05, "loss": 1.3856, "step": 76 }, { "epoch": 0.02, "learning_rate": 7.777777777777778e-05, "loss": 1.3203, "step": 77 }, { "epoch": 0.02, "learning_rate": 7.878787878787879e-05, "loss": 1.3499, "step": 78 }, { "epoch": 0.02, "learning_rate": 7.97979797979798e-05, "loss": 1.4018, "step": 79 }, { "epoch": 0.02, "learning_rate": 8.080808080808081e-05, "loss": 1.3253, "step": 80 }, { "epoch": 0.02, "learning_rate": 8.181818181818183e-05, "loss": 1.3263, "step": 81 }, { "epoch": 0.02, "learning_rate": 8.282828282828283e-05, "loss": 1.264, "step": 82 }, { "epoch": 0.03, "learning_rate": 8.383838383838384e-05, "loss": 1.2527, "step": 83 }, { "epoch": 0.03, "learning_rate": 8.484848484848486e-05, "loss": 1.2749, "step": 84 }, { "epoch": 0.03, "learning_rate": 8.585858585858586e-05, "loss": 1.2999, "step": 85 }, { "epoch": 0.03, "learning_rate": 8.686868686868688e-05, "loss": 1.2821, "step": 86 }, { "epoch": 0.03, "learning_rate": 8.787878787878789e-05, "loss": 1.2289, "step": 87 }, { "epoch": 0.03, "learning_rate": 8.888888888888889e-05, "loss": 1.2415, "step": 88 }, { "epoch": 0.03, "learning_rate": 8.98989898989899e-05, "loss": 1.1864, "step": 89 }, { "epoch": 0.03, "learning_rate": 9.090909090909092e-05, "loss": 1.2288, "step": 90 }, { "epoch": 0.03, "learning_rate": 9.191919191919192e-05, "loss": 1.2178, "step": 91 }, { "epoch": 0.03, "learning_rate": 9.292929292929293e-05, "loss": 1.2828, "step": 92 }, { "epoch": 0.03, "learning_rate": 9.393939393939395e-05, "loss": 1.1603, "step": 93 }, { "epoch": 0.03, "learning_rate": 9.494949494949495e-05, "loss": 1.1286, "step": 94 }, { "epoch": 0.03, "learning_rate": 9.595959595959596e-05, "loss": 1.1438, "step": 95 }, { "epoch": 0.03, "learning_rate": 9.696969696969698e-05, "loss": 1.1002, "step": 96 }, { "epoch": 0.03, "learning_rate": 9.797979797979798e-05, "loss": 1.1301, "step": 97 }, { "epoch": 0.03, "learning_rate": 9.8989898989899e-05, "loss": 1.1358, "step": 98 }, { "epoch": 0.03, "learning_rate": 0.0001, "loss": 1.1064, "step": 99 }, { "epoch": 0.03, "learning_rate": 9.999997587414017e-05, "loss": 1.1574, "step": 100 }, { "epoch": 0.03, "learning_rate": 9.999990349658396e-05, "loss": 1.0915, "step": 101 }, { "epoch": 0.03, "learning_rate": 9.99997828674012e-05, "loss": 1.0702, "step": 102 }, { "epoch": 0.03, "learning_rate": 9.999961398670833e-05, "loss": 1.1018, "step": 103 }, { "epoch": 0.03, "learning_rate": 9.99993968546683e-05, "loss": 1.0827, "step": 104 }, { "epoch": 0.03, "learning_rate": 9.999913147149067e-05, "loss": 1.121, "step": 105 }, { "epoch": 0.03, "learning_rate": 9.999881783743153e-05, "loss": 1.054, "step": 106 }, { "epoch": 0.03, "learning_rate": 9.999845595279355e-05, "loss": 1.0972, "step": 107 }, { "epoch": 0.03, "learning_rate": 9.999804581792595e-05, "loss": 1.0195, "step": 108 }, { "epoch": 0.03, "learning_rate": 9.999758743322457e-05, "loss": 1.1207, "step": 109 }, { "epoch": 0.03, "learning_rate": 9.99970807991317e-05, "loss": 1.0777, "step": 110 }, { "epoch": 0.03, "learning_rate": 9.99965259161363e-05, "loss": 1.0646, "step": 111 }, { "epoch": 0.03, "learning_rate": 9.999592278477388e-05, "loss": 1.0128, "step": 112 }, { "epoch": 0.03, "learning_rate": 9.999527140562641e-05, "loss": 1.0578, "step": 113 }, { "epoch": 0.03, "learning_rate": 9.999457177932254e-05, "loss": 0.9787, "step": 114 }, { "epoch": 0.03, "learning_rate": 9.999382390653743e-05, "loss": 1.0226, "step": 115 }, { "epoch": 0.04, "learning_rate": 9.99930277879928e-05, "loss": 0.9807, "step": 116 }, { "epoch": 0.04, "learning_rate": 9.999218342445693e-05, "loss": 0.9283, "step": 117 }, { "epoch": 0.04, "learning_rate": 9.999129081674464e-05, "loss": 1.0021, "step": 118 }, { "epoch": 0.04, "learning_rate": 9.999034996571737e-05, "loss": 0.9764, "step": 119 }, { "epoch": 0.04, "learning_rate": 9.998936087228303e-05, "loss": 0.9609, "step": 120 }, { "epoch": 0.04, "learning_rate": 9.998832353739615e-05, "loss": 1.0195, "step": 121 }, { "epoch": 0.04, "learning_rate": 9.99872379620578e-05, "loss": 1.0218, "step": 122 }, { "epoch": 0.04, "learning_rate": 9.99861041473156e-05, "loss": 0.93, "step": 123 }, { "epoch": 0.04, "learning_rate": 9.99849220942637e-05, "loss": 0.9552, "step": 124 }, { "epoch": 0.04, "learning_rate": 9.998369180404283e-05, "loss": 1.0074, "step": 125 }, { "epoch": 0.04, "learning_rate": 9.998241327784026e-05, "loss": 0.9349, "step": 126 }, { "epoch": 0.04, "learning_rate": 9.998108651688982e-05, "loss": 0.9677, "step": 127 }, { "epoch": 0.04, "learning_rate": 9.997971152247188e-05, "loss": 0.9649, "step": 128 }, { "epoch": 0.04, "learning_rate": 9.997828829591336e-05, "loss": 0.9551, "step": 129 }, { "epoch": 0.04, "learning_rate": 9.99768168385877e-05, "loss": 0.9622, "step": 130 }, { "epoch": 0.04, "learning_rate": 9.997529715191494e-05, "loss": 0.9214, "step": 131 }, { "epoch": 0.04, "learning_rate": 9.997372923736159e-05, "loss": 0.9168, "step": 132 }, { "epoch": 0.04, "learning_rate": 9.997211309644079e-05, "loss": 0.8886, "step": 133 }, { "epoch": 0.04, "learning_rate": 9.997044873071213e-05, "loss": 0.9652, "step": 134 }, { "epoch": 0.04, "learning_rate": 9.99687361417818e-05, "loss": 0.9154, "step": 135 }, { "epoch": 0.04, "learning_rate": 9.996697533130251e-05, "loss": 0.953, "step": 136 }, { "epoch": 0.04, "learning_rate": 9.996516630097348e-05, "loss": 0.9304, "step": 137 }, { "epoch": 0.04, "learning_rate": 9.99633090525405e-05, "loss": 0.8705, "step": 138 }, { "epoch": 0.04, "learning_rate": 9.99614035877959e-05, "loss": 0.8915, "step": 139 }, { "epoch": 0.04, "learning_rate": 9.995944990857849e-05, "loss": 0.914, "step": 140 }, { "epoch": 0.04, "learning_rate": 9.995744801677364e-05, "loss": 0.8831, "step": 141 }, { "epoch": 0.04, "learning_rate": 9.995539791431326e-05, "loss": 0.8859, "step": 142 }, { "epoch": 0.04, "learning_rate": 9.995329960317576e-05, "loss": 0.8997, "step": 143 }, { "epoch": 0.04, "learning_rate": 9.995115308538609e-05, "loss": 0.9294, "step": 144 }, { "epoch": 0.04, "learning_rate": 9.99489583630157e-05, "loss": 0.9135, "step": 145 }, { "epoch": 0.04, "learning_rate": 9.994671543818258e-05, "loss": 0.923, "step": 146 }, { "epoch": 0.04, "learning_rate": 9.994442431305124e-05, "loss": 0.9217, "step": 147 }, { "epoch": 0.04, "learning_rate": 9.994208498983266e-05, "loss": 0.8998, "step": 148 }, { "epoch": 0.05, "learning_rate": 9.993969747078442e-05, "loss": 0.9356, "step": 149 }, { "epoch": 0.05, "learning_rate": 9.993726175821051e-05, "loss": 0.9598, "step": 150 }, { "epoch": 0.05, "learning_rate": 9.99347778544615e-05, "loss": 0.9072, "step": 151 }, { "epoch": 0.05, "learning_rate": 9.993224576193444e-05, "loss": 0.8612, "step": 152 }, { "epoch": 0.05, "learning_rate": 9.992966548307289e-05, "loss": 0.8972, "step": 153 }, { "epoch": 0.05, "learning_rate": 9.99270370203669e-05, "loss": 0.9117, "step": 154 }, { "epoch": 0.05, "learning_rate": 9.992436037635303e-05, "loss": 0.8922, "step": 155 }, { "epoch": 0.05, "learning_rate": 9.992163555361432e-05, "loss": 0.8702, "step": 156 }, { "epoch": 0.05, "learning_rate": 9.991886255478033e-05, "loss": 0.8854, "step": 157 }, { "epoch": 0.05, "learning_rate": 9.991604138252711e-05, "loss": 0.9209, "step": 158 }, { "epoch": 0.05, "learning_rate": 9.991317203957717e-05, "loss": 0.8657, "step": 159 }, { "epoch": 0.05, "learning_rate": 9.991025452869956e-05, "loss": 0.9012, "step": 160 }, { "epoch": 0.05, "learning_rate": 9.990728885270973e-05, "loss": 0.9007, "step": 161 }, { "epoch": 0.05, "learning_rate": 9.990427501446968e-05, "loss": 0.9369, "step": 162 }, { "epoch": 0.05, "learning_rate": 9.990121301688787e-05, "loss": 0.8571, "step": 163 }, { "epoch": 0.05, "learning_rate": 9.989810286291923e-05, "loss": 0.8261, "step": 164 }, { "epoch": 0.05, "learning_rate": 9.989494455556517e-05, "loss": 0.897, "step": 165 }, { "epoch": 0.05, "learning_rate": 9.989173809787356e-05, "loss": 0.8262, "step": 166 }, { "epoch": 0.05, "learning_rate": 9.988848349293874e-05, "loss": 0.8745, "step": 167 }, { "epoch": 0.05, "learning_rate": 9.988518074390152e-05, "loss": 0.8966, "step": 168 }, { "epoch": 0.05, "learning_rate": 9.988182985394916e-05, "loss": 0.8715, "step": 169 }, { "epoch": 0.05, "learning_rate": 9.98784308263154e-05, "loss": 0.9112, "step": 170 }, { "epoch": 0.05, "learning_rate": 9.987498366428041e-05, "loss": 0.8587, "step": 171 }, { "epoch": 0.05, "learning_rate": 9.98714883711708e-05, "loss": 0.8667, "step": 172 }, { "epoch": 0.05, "learning_rate": 9.986794495035968e-05, "loss": 0.9306, "step": 173 }, { "epoch": 0.05, "learning_rate": 9.986435340526656e-05, "loss": 0.9066, "step": 174 }, { "epoch": 0.05, "learning_rate": 9.986071373935741e-05, "loss": 0.8688, "step": 175 }, { "epoch": 0.05, "learning_rate": 9.985702595614461e-05, "loss": 0.8881, "step": 176 }, { "epoch": 0.05, "learning_rate": 9.985329005918702e-05, "loss": 0.8239, "step": 177 }, { "epoch": 0.05, "learning_rate": 9.984950605208992e-05, "loss": 0.8853, "step": 178 }, { "epoch": 0.05, "learning_rate": 9.984567393850497e-05, "loss": 0.8967, "step": 179 }, { "epoch": 0.05, "learning_rate": 9.984179372213032e-05, "loss": 0.8991, "step": 180 }, { "epoch": 0.05, "learning_rate": 9.983786540671051e-05, "loss": 0.8484, "step": 181 }, { "epoch": 0.06, "learning_rate": 9.983388899603647e-05, "loss": 0.9386, "step": 182 }, { "epoch": 0.06, "learning_rate": 9.982986449394562e-05, "loss": 0.8621, "step": 183 }, { "epoch": 0.06, "learning_rate": 9.982579190432171e-05, "loss": 0.8335, "step": 184 }, { "epoch": 0.06, "learning_rate": 9.982167123109495e-05, "loss": 0.8654, "step": 185 }, { "epoch": 0.06, "learning_rate": 9.981750247824191e-05, "loss": 0.8947, "step": 186 }, { "epoch": 0.06, "learning_rate": 9.981328564978558e-05, "loss": 0.8813, "step": 187 }, { "epoch": 0.06, "learning_rate": 9.980902074979536e-05, "loss": 0.9296, "step": 188 }, { "epoch": 0.06, "learning_rate": 9.980470778238704e-05, "loss": 0.8604, "step": 189 }, { "epoch": 0.06, "learning_rate": 9.980034675172274e-05, "loss": 0.7984, "step": 190 }, { "epoch": 0.06, "learning_rate": 9.979593766201103e-05, "loss": 0.8715, "step": 191 }, { "epoch": 0.06, "learning_rate": 9.979148051750684e-05, "loss": 0.8005, "step": 192 }, { "epoch": 0.06, "learning_rate": 9.978697532251144e-05, "loss": 0.8274, "step": 193 }, { "epoch": 0.06, "learning_rate": 9.978242208137251e-05, "loss": 0.8353, "step": 194 }, { "epoch": 0.06, "learning_rate": 9.977782079848413e-05, "loss": 0.8827, "step": 195 }, { "epoch": 0.06, "learning_rate": 9.977317147828662e-05, "loss": 0.8565, "step": 196 }, { "epoch": 0.06, "learning_rate": 9.976847412526678e-05, "loss": 0.8938, "step": 197 }, { "epoch": 0.06, "learning_rate": 9.97637287439577e-05, "loss": 0.8553, "step": 198 }, { "epoch": 0.06, "learning_rate": 9.975893533893885e-05, "loss": 0.8808, "step": 199 }, { "epoch": 0.06, "learning_rate": 9.975409391483601e-05, "loss": 0.8052, "step": 200 }, { "epoch": 0.06, "learning_rate": 9.974920447632134e-05, "loss": 0.862, "step": 201 }, { "epoch": 0.06, "learning_rate": 9.974426702811332e-05, "loss": 0.9213, "step": 202 }, { "epoch": 0.06, "learning_rate": 9.973928157497674e-05, "loss": 0.9066, "step": 203 }, { "epoch": 0.06, "learning_rate": 9.973424812172274e-05, "loss": 0.9066, "step": 204 }, { "epoch": 0.06, "learning_rate": 9.972916667320878e-05, "loss": 0.8308, "step": 205 }, { "epoch": 0.06, "learning_rate": 9.972403723433863e-05, "loss": 0.8676, "step": 206 }, { "epoch": 0.06, "learning_rate": 9.971885981006238e-05, "loss": 0.8148, "step": 207 }, { "epoch": 0.06, "learning_rate": 9.971363440537642e-05, "loss": 0.8266, "step": 208 }, { "epoch": 0.06, "learning_rate": 9.970836102532343e-05, "loss": 0.9138, "step": 209 }, { "epoch": 0.06, "learning_rate": 9.970303967499242e-05, "loss": 0.8945, "step": 210 }, { "epoch": 0.06, "learning_rate": 9.969767035951867e-05, "loss": 0.9139, "step": 211 }, { "epoch": 0.06, "learning_rate": 9.969225308408377e-05, "loss": 0.8462, "step": 212 }, { "epoch": 0.06, "learning_rate": 9.968678785391554e-05, "loss": 0.8476, "step": 213 }, { "epoch": 0.06, "learning_rate": 9.968127467428817e-05, "loss": 0.8655, "step": 214 }, { "epoch": 0.07, "learning_rate": 9.9675713550522e-05, "loss": 0.8433, "step": 215 }, { "epoch": 0.07, "learning_rate": 9.967010448798375e-05, "loss": 0.8414, "step": 216 }, { "epoch": 0.07, "learning_rate": 9.966444749208637e-05, "loss": 0.8439, "step": 217 }, { "epoch": 0.07, "learning_rate": 9.965874256828902e-05, "loss": 0.8494, "step": 218 }, { "epoch": 0.07, "learning_rate": 9.965298972209715e-05, "loss": 0.8762, "step": 219 }, { "epoch": 0.07, "learning_rate": 9.96471889590625e-05, "loss": 0.8305, "step": 220 }, { "epoch": 0.07, "learning_rate": 9.964134028478295e-05, "loss": 0.9019, "step": 221 }, { "epoch": 0.07, "learning_rate": 9.96354437049027e-05, "loss": 0.8089, "step": 222 }, { "epoch": 0.07, "learning_rate": 9.962949922511215e-05, "loss": 0.8128, "step": 223 }, { "epoch": 0.07, "learning_rate": 9.962350685114793e-05, "loss": 0.865, "step": 224 }, { "epoch": 0.07, "learning_rate": 9.961746658879288e-05, "loss": 0.8541, "step": 225 }, { "epoch": 0.07, "learning_rate": 9.961137844387604e-05, "loss": 0.8338, "step": 226 }, { "epoch": 0.07, "learning_rate": 9.960524242227273e-05, "loss": 0.825, "step": 227 }, { "epoch": 0.07, "learning_rate": 9.959905852990439e-05, "loss": 0.8614, "step": 228 }, { "epoch": 0.07, "learning_rate": 9.95928267727387e-05, "loss": 0.8463, "step": 229 }, { "epoch": 0.07, "learning_rate": 9.95865471567895e-05, "loss": 0.8341, "step": 230 }, { "epoch": 0.07, "learning_rate": 9.958021968811684e-05, "loss": 0.8335, "step": 231 }, { "epoch": 0.07, "learning_rate": 9.957384437282698e-05, "loss": 0.8685, "step": 232 }, { "epoch": 0.07, "learning_rate": 9.956742121707225e-05, "loss": 0.8414, "step": 233 }, { "epoch": 0.07, "learning_rate": 9.95609502270513e-05, "loss": 0.878, "step": 234 }, { "epoch": 0.07, "learning_rate": 9.955443140900879e-05, "loss": 0.8478, "step": 235 }, { "epoch": 0.07, "learning_rate": 9.954786476923565e-05, "loss": 0.8425, "step": 236 }, { "epoch": 0.07, "learning_rate": 9.954125031406887e-05, "loss": 0.7972, "step": 237 }, { "epoch": 0.07, "learning_rate": 9.953458804989166e-05, "loss": 0.8586, "step": 238 }, { "epoch": 0.07, "learning_rate": 9.952787798313332e-05, "loss": 0.8062, "step": 239 }, { "epoch": 0.07, "learning_rate": 9.95211201202693e-05, "loss": 0.8854, "step": 240 }, { "epoch": 0.07, "learning_rate": 9.951431446782118e-05, "loss": 0.8047, "step": 241 }, { "epoch": 0.07, "learning_rate": 9.950746103235663e-05, "loss": 0.862, "step": 242 }, { "epoch": 0.07, "learning_rate": 9.950055982048946e-05, "loss": 0.8918, "step": 243 }, { "epoch": 0.07, "learning_rate": 9.949361083887956e-05, "loss": 0.8227, "step": 244 }, { "epoch": 0.07, "learning_rate": 9.948661409423296e-05, "loss": 0.8926, "step": 245 }, { "epoch": 0.07, "learning_rate": 9.947956959330177e-05, "loss": 0.7969, "step": 246 }, { "epoch": 0.07, "learning_rate": 9.947247734288415e-05, "loss": 0.8154, "step": 247 }, { "epoch": 0.08, "learning_rate": 9.946533734982436e-05, "loss": 0.8333, "step": 248 }, { "epoch": 0.08, "learning_rate": 9.945814962101275e-05, "loss": 0.8129, "step": 249 }, { "epoch": 0.08, "learning_rate": 9.945091416338573e-05, "loss": 0.8292, "step": 250 }, { "epoch": 0.08, "learning_rate": 9.944363098392576e-05, "loss": 0.7903, "step": 251 }, { "epoch": 0.08, "learning_rate": 9.943630008966136e-05, "loss": 0.8701, "step": 252 }, { "epoch": 0.08, "learning_rate": 9.94289214876671e-05, "loss": 0.8327, "step": 253 }, { "epoch": 0.08, "learning_rate": 9.942149518506358e-05, "loss": 0.7888, "step": 254 }, { "epoch": 0.08, "learning_rate": 9.941402118901744e-05, "loss": 0.8497, "step": 255 }, { "epoch": 0.08, "learning_rate": 9.940649950674132e-05, "loss": 0.7952, "step": 256 }, { "epoch": 0.08, "learning_rate": 9.939893014549395e-05, "loss": 0.8586, "step": 257 }, { "epoch": 0.08, "learning_rate": 9.939131311257998e-05, "loss": 0.8365, "step": 258 }, { "epoch": 0.08, "learning_rate": 9.938364841535013e-05, "loss": 0.8792, "step": 259 }, { "epoch": 0.08, "learning_rate": 9.93759360612011e-05, "loss": 0.8545, "step": 260 }, { "epoch": 0.08, "learning_rate": 9.936817605757556e-05, "loss": 0.862, "step": 261 }, { "epoch": 0.08, "learning_rate": 9.93603684119622e-05, "loss": 0.808, "step": 262 }, { "epoch": 0.08, "learning_rate": 9.935251313189564e-05, "loss": 0.8498, "step": 263 }, { "epoch": 0.08, "learning_rate": 9.934461022495653e-05, "loss": 0.8571, "step": 264 }, { "epoch": 0.08, "learning_rate": 9.933665969877141e-05, "loss": 0.8342, "step": 265 }, { "epoch": 0.08, "learning_rate": 9.932866156101285e-05, "loss": 0.8221, "step": 266 }, { "epoch": 0.08, "learning_rate": 9.932061581939929e-05, "loss": 0.8076, "step": 267 }, { "epoch": 0.08, "learning_rate": 9.931252248169518e-05, "loss": 0.8206, "step": 268 }, { "epoch": 0.08, "learning_rate": 9.930438155571085e-05, "loss": 0.8417, "step": 269 }, { "epoch": 0.08, "learning_rate": 9.929619304930257e-05, "loss": 0.8215, "step": 270 }, { "epoch": 0.08, "learning_rate": 9.928795697037255e-05, "loss": 0.8542, "step": 271 }, { "epoch": 0.08, "learning_rate": 9.927967332686887e-05, "loss": 0.8751, "step": 272 }, { "epoch": 0.08, "learning_rate": 9.927134212678553e-05, "loss": 0.8039, "step": 273 }, { "epoch": 0.08, "learning_rate": 9.926296337816244e-05, "loss": 0.8084, "step": 274 }, { "epoch": 0.08, "learning_rate": 9.925453708908537e-05, "loss": 0.8174, "step": 275 }, { "epoch": 0.08, "learning_rate": 9.924606326768599e-05, "loss": 0.8475, "step": 276 }, { "epoch": 0.08, "learning_rate": 9.923754192214183e-05, "loss": 0.8549, "step": 277 }, { "epoch": 0.08, "learning_rate": 9.922897306067627e-05, "loss": 0.8582, "step": 278 }, { "epoch": 0.08, "learning_rate": 9.922035669155853e-05, "loss": 0.8711, "step": 279 }, { "epoch": 0.08, "learning_rate": 9.921169282310376e-05, "loss": 0.7717, "step": 280 }, { "epoch": 0.09, "learning_rate": 9.920298146367286e-05, "loss": 0.7766, "step": 281 }, { "epoch": 0.09, "learning_rate": 9.919422262167259e-05, "loss": 0.8002, "step": 282 }, { "epoch": 0.09, "learning_rate": 9.918541630555555e-05, "loss": 0.7913, "step": 283 }, { "epoch": 0.09, "learning_rate": 9.91765625238201e-05, "loss": 0.8766, "step": 284 }, { "epoch": 0.09, "learning_rate": 9.91676612850105e-05, "loss": 0.8225, "step": 285 }, { "epoch": 0.09, "learning_rate": 9.91587125977167e-05, "loss": 0.8257, "step": 286 }, { "epoch": 0.09, "learning_rate": 9.914971647057451e-05, "loss": 0.8004, "step": 287 }, { "epoch": 0.09, "learning_rate": 9.914067291226552e-05, "loss": 0.8651, "step": 288 }, { "epoch": 0.09, "learning_rate": 9.913158193151706e-05, "loss": 0.8117, "step": 289 }, { "epoch": 0.09, "learning_rate": 9.912244353710223e-05, "loss": 0.8008, "step": 290 }, { "epoch": 0.09, "learning_rate": 9.91132577378399e-05, "loss": 0.8045, "step": 291 }, { "epoch": 0.09, "learning_rate": 9.910402454259469e-05, "loss": 0.7756, "step": 292 }, { "epoch": 0.09, "learning_rate": 9.909474396027695e-05, "loss": 0.8608, "step": 293 }, { "epoch": 0.09, "learning_rate": 9.908541599984276e-05, "loss": 0.8222, "step": 294 }, { "epoch": 0.09, "learning_rate": 9.90760406702939e-05, "loss": 0.8111, "step": 295 }, { "epoch": 0.09, "learning_rate": 9.906661798067793e-05, "loss": 0.7815, "step": 296 }, { "epoch": 0.09, "learning_rate": 9.905714794008802e-05, "loss": 0.8152, "step": 297 }, { "epoch": 0.09, "learning_rate": 9.904763055766313e-05, "loss": 0.8113, "step": 298 }, { "epoch": 0.09, "learning_rate": 9.903806584258785e-05, "loss": 0.8457, "step": 299 }, { "epoch": 0.09, "learning_rate": 9.902845380409244e-05, "loss": 0.8044, "step": 300 }, { "epoch": 0.09, "learning_rate": 9.901879445145286e-05, "loss": 0.8398, "step": 301 }, { "epoch": 0.09, "learning_rate": 9.900908779399069e-05, "loss": 0.7989, "step": 302 }, { "epoch": 0.09, "learning_rate": 9.899933384107325e-05, "loss": 0.8122, "step": 303 }, { "epoch": 0.09, "learning_rate": 9.898953260211338e-05, "loss": 0.8245, "step": 304 }, { "epoch": 0.09, "learning_rate": 9.897968408656966e-05, "loss": 0.8112, "step": 305 }, { "epoch": 0.09, "learning_rate": 9.896978830394622e-05, "loss": 0.8362, "step": 306 }, { "epoch": 0.09, "learning_rate": 9.895984526379281e-05, "loss": 0.7912, "step": 307 }, { "epoch": 0.09, "learning_rate": 9.894985497570485e-05, "loss": 0.8625, "step": 308 }, { "epoch": 0.09, "learning_rate": 9.893981744932327e-05, "loss": 0.8365, "step": 309 }, { "epoch": 0.09, "learning_rate": 9.892973269433468e-05, "loss": 0.7978, "step": 310 }, { "epoch": 0.09, "learning_rate": 9.891960072047116e-05, "loss": 0.7931, "step": 311 }, { "epoch": 0.09, "learning_rate": 9.890942153751045e-05, "loss": 0.8402, "step": 312 }, { "epoch": 0.09, "learning_rate": 9.889919515527579e-05, "loss": 0.8311, "step": 313 }, { "epoch": 0.1, "learning_rate": 9.8888921583636e-05, "loss": 0.8027, "step": 314 }, { "epoch": 0.1, "learning_rate": 9.887860083250544e-05, "loss": 0.8155, "step": 315 }, { "epoch": 0.1, "learning_rate": 9.886823291184399e-05, "loss": 0.8547, "step": 316 }, { "epoch": 0.1, "learning_rate": 9.885781783165702e-05, "loss": 0.7892, "step": 317 }, { "epoch": 0.1, "learning_rate": 9.884735560199547e-05, "loss": 0.7884, "step": 318 }, { "epoch": 0.1, "learning_rate": 9.883684623295573e-05, "loss": 0.8595, "step": 319 }, { "epoch": 0.1, "learning_rate": 9.882628973467972e-05, "loss": 0.8205, "step": 320 }, { "epoch": 0.1, "learning_rate": 9.881568611735482e-05, "loss": 0.8294, "step": 321 }, { "epoch": 0.1, "learning_rate": 9.880503539121389e-05, "loss": 0.7895, "step": 322 }, { "epoch": 0.1, "learning_rate": 9.879433756653523e-05, "loss": 0.8313, "step": 323 }, { "epoch": 0.1, "learning_rate": 9.878359265364263e-05, "loss": 0.8443, "step": 324 }, { "epoch": 0.1, "learning_rate": 9.877280066290529e-05, "loss": 0.7923, "step": 325 }, { "epoch": 0.1, "learning_rate": 9.876196160473784e-05, "loss": 0.7969, "step": 326 }, { "epoch": 0.1, "learning_rate": 9.875107548960036e-05, "loss": 0.8497, "step": 327 }, { "epoch": 0.1, "learning_rate": 9.874014232799832e-05, "loss": 0.8572, "step": 328 }, { "epoch": 0.1, "learning_rate": 9.872916213048262e-05, "loss": 0.844, "step": 329 }, { "epoch": 0.1, "learning_rate": 9.871813490764949e-05, "loss": 0.7983, "step": 330 }, { "epoch": 0.1, "learning_rate": 9.87070606701406e-05, "loss": 0.7798, "step": 331 }, { "epoch": 0.1, "learning_rate": 9.869593942864295e-05, "loss": 0.8218, "step": 332 }, { "epoch": 0.1, "learning_rate": 9.868477119388896e-05, "loss": 0.8599, "step": 333 }, { "epoch": 0.1, "learning_rate": 9.867355597665632e-05, "loss": 0.813, "step": 334 }, { "epoch": 0.1, "learning_rate": 9.866229378776813e-05, "loss": 0.7974, "step": 335 }, { "epoch": 0.1, "learning_rate": 9.865098463809277e-05, "loss": 0.7912, "step": 336 }, { "epoch": 0.1, "learning_rate": 9.863962853854398e-05, "loss": 0.8535, "step": 337 }, { "epoch": 0.1, "learning_rate": 9.862822550008074e-05, "loss": 0.833, "step": 338 }, { "epoch": 0.1, "learning_rate": 9.861677553370745e-05, "loss": 0.7942, "step": 339 }, { "epoch": 0.1, "learning_rate": 9.860527865047365e-05, "loss": 0.8421, "step": 340 }, { "epoch": 0.1, "learning_rate": 9.859373486147428e-05, "loss": 0.8181, "step": 341 }, { "epoch": 0.1, "learning_rate": 9.858214417784943e-05, "loss": 0.7724, "step": 342 }, { "epoch": 0.1, "learning_rate": 9.857050661078457e-05, "loss": 0.8403, "step": 343 }, { "epoch": 0.1, "learning_rate": 9.855882217151033e-05, "loss": 0.8235, "step": 344 }, { "epoch": 0.1, "learning_rate": 9.85470908713026e-05, "loss": 0.7841, "step": 345 }, { "epoch": 0.1, "learning_rate": 9.853531272148249e-05, "loss": 0.8222, "step": 346 }, { "epoch": 0.11, "learning_rate": 9.852348773341631e-05, "loss": 0.7938, "step": 347 }, { "epoch": 0.11, "learning_rate": 9.851161591851556e-05, "loss": 0.7754, "step": 348 }, { "epoch": 0.11, "learning_rate": 9.8499697288237e-05, "loss": 0.7871, "step": 349 }, { "epoch": 0.11, "learning_rate": 9.848773185408248e-05, "loss": 0.802, "step": 350 }, { "epoch": 0.11, "learning_rate": 9.847571962759907e-05, "loss": 0.8189, "step": 351 }, { "epoch": 0.11, "learning_rate": 9.8463660620379e-05, "loss": 0.8888, "step": 352 }, { "epoch": 0.11, "learning_rate": 9.845155484405957e-05, "loss": 0.8528, "step": 353 }, { "epoch": 0.11, "learning_rate": 9.843940231032333e-05, "loss": 0.8805, "step": 354 }, { "epoch": 0.11, "learning_rate": 9.842720303089785e-05, "loss": 0.8067, "step": 355 }, { "epoch": 0.11, "learning_rate": 9.84149570175559e-05, "loss": 0.8087, "step": 356 }, { "epoch": 0.11, "learning_rate": 9.840266428211525e-05, "loss": 0.828, "step": 357 }, { "epoch": 0.11, "learning_rate": 9.839032483643886e-05, "loss": 0.8389, "step": 358 }, { "epoch": 0.11, "learning_rate": 9.837793869243468e-05, "loss": 0.8183, "step": 359 }, { "epoch": 0.11, "learning_rate": 9.83655058620558e-05, "loss": 0.7756, "step": 360 }, { "epoch": 0.11, "learning_rate": 9.835302635730032e-05, "loss": 0.7641, "step": 361 }, { "epoch": 0.11, "learning_rate": 9.834050019021138e-05, "loss": 0.8207, "step": 362 }, { "epoch": 0.11, "learning_rate": 9.832792737287715e-05, "loss": 0.8389, "step": 363 }, { "epoch": 0.11, "learning_rate": 9.831530791743085e-05, "loss": 0.8007, "step": 364 }, { "epoch": 0.11, "learning_rate": 9.83026418360507e-05, "loss": 0.8259, "step": 365 }, { "epoch": 0.11, "learning_rate": 9.82899291409599e-05, "loss": 0.7886, "step": 366 }, { "epoch": 0.11, "learning_rate": 9.82771698444266e-05, "loss": 0.7822, "step": 367 }, { "epoch": 0.11, "learning_rate": 9.8264363958764e-05, "loss": 0.8176, "step": 368 }, { "epoch": 0.11, "learning_rate": 9.825151149633021e-05, "loss": 0.7527, "step": 369 }, { "epoch": 0.11, "learning_rate": 9.82386124695283e-05, "loss": 0.8518, "step": 370 }, { "epoch": 0.11, "learning_rate": 9.822566689080628e-05, "loss": 0.8215, "step": 371 }, { "epoch": 0.11, "learning_rate": 9.821267477265705e-05, "loss": 0.8283, "step": 372 }, { "epoch": 0.11, "learning_rate": 9.819963612761849e-05, "loss": 0.8052, "step": 373 }, { "epoch": 0.11, "learning_rate": 9.818655096827331e-05, "loss": 0.8534, "step": 374 }, { "epoch": 0.11, "learning_rate": 9.817341930724914e-05, "loss": 0.8423, "step": 375 }, { "epoch": 0.11, "learning_rate": 9.816024115721851e-05, "loss": 0.8708, "step": 376 }, { "epoch": 0.11, "learning_rate": 9.814701653089878e-05, "loss": 0.8201, "step": 377 }, { "epoch": 0.11, "learning_rate": 9.813374544105216e-05, "loss": 0.8461, "step": 378 }, { "epoch": 0.11, "learning_rate": 9.81204279004857e-05, "loss": 0.8174, "step": 379 }, { "epoch": 0.12, "learning_rate": 9.81070639220513e-05, "loss": 0.7612, "step": 380 }, { "epoch": 0.12, "learning_rate": 9.809365351864565e-05, "loss": 0.825, "step": 381 }, { "epoch": 0.12, "learning_rate": 9.808019670321028e-05, "loss": 0.802, "step": 382 }, { "epoch": 0.12, "learning_rate": 9.806669348873143e-05, "loss": 0.8196, "step": 383 }, { "epoch": 0.12, "learning_rate": 9.805314388824018e-05, "loss": 0.7596, "step": 384 }, { "epoch": 0.12, "learning_rate": 9.803954791481239e-05, "loss": 0.8339, "step": 385 }, { "epoch": 0.12, "learning_rate": 9.802590558156862e-05, "loss": 0.8097, "step": 386 }, { "epoch": 0.12, "learning_rate": 9.801221690167419e-05, "loss": 0.8369, "step": 387 }, { "epoch": 0.12, "learning_rate": 9.799848188833916e-05, "loss": 0.8019, "step": 388 }, { "epoch": 0.12, "learning_rate": 9.798470055481827e-05, "loss": 0.8105, "step": 389 }, { "epoch": 0.12, "learning_rate": 9.7970872914411e-05, "loss": 0.7788, "step": 390 }, { "epoch": 0.12, "learning_rate": 9.795699898046149e-05, "loss": 0.8249, "step": 391 }, { "epoch": 0.12, "learning_rate": 9.794307876635856e-05, "loss": 0.8265, "step": 392 }, { "epoch": 0.12, "learning_rate": 9.792911228553569e-05, "loss": 0.7772, "step": 393 }, { "epoch": 0.12, "learning_rate": 9.791509955147104e-05, "loss": 0.8092, "step": 394 }, { "epoch": 0.12, "learning_rate": 9.790104057768737e-05, "loss": 0.7881, "step": 395 }, { "epoch": 0.12, "learning_rate": 9.788693537775204e-05, "loss": 0.739, "step": 396 }, { "epoch": 0.12, "learning_rate": 9.787278396527711e-05, "loss": 0.8652, "step": 397 }, { "epoch": 0.12, "learning_rate": 9.785858635391914e-05, "loss": 0.7778, "step": 398 }, { "epoch": 0.12, "learning_rate": 9.78443425573793e-05, "loss": 0.8355, "step": 399 }, { "epoch": 0.12, "learning_rate": 9.78300525894034e-05, "loss": 0.771, "step": 400 }, { "epoch": 0.12, "learning_rate": 9.78157164637817e-05, "loss": 0.8541, "step": 401 }, { "epoch": 0.12, "learning_rate": 9.780133419434908e-05, "loss": 0.7998, "step": 402 }, { "epoch": 0.12, "learning_rate": 9.77869057949849e-05, "loss": 0.7897, "step": 403 }, { "epoch": 0.12, "learning_rate": 9.777243127961311e-05, "loss": 0.7979, "step": 404 }, { "epoch": 0.12, "learning_rate": 9.775791066220205e-05, "loss": 0.7813, "step": 405 }, { "epoch": 0.12, "learning_rate": 9.774334395676467e-05, "loss": 0.8385, "step": 406 }, { "epoch": 0.12, "learning_rate": 9.772873117735831e-05, "loss": 0.8235, "step": 407 }, { "epoch": 0.12, "learning_rate": 9.771407233808482e-05, "loss": 0.8155, "step": 408 }, { "epoch": 0.12, "learning_rate": 9.769936745309047e-05, "loss": 0.8562, "step": 409 }, { "epoch": 0.12, "learning_rate": 9.7684616536566e-05, "loss": 0.76, "step": 410 }, { "epoch": 0.12, "learning_rate": 9.766981960274653e-05, "loss": 0.816, "step": 411 }, { "epoch": 0.12, "learning_rate": 9.765497666591163e-05, "loss": 0.7813, "step": 412 }, { "epoch": 0.13, "learning_rate": 9.764008774038522e-05, "loss": 0.7408, "step": 413 }, { "epoch": 0.13, "learning_rate": 9.762515284053567e-05, "loss": 0.7957, "step": 414 }, { "epoch": 0.13, "learning_rate": 9.761017198077562e-05, "loss": 0.8077, "step": 415 }, { "epoch": 0.13, "learning_rate": 9.759514517556214e-05, "loss": 0.7814, "step": 416 }, { "epoch": 0.13, "learning_rate": 9.758007243939661e-05, "loss": 0.8237, "step": 417 }, { "epoch": 0.13, "learning_rate": 9.756495378682474e-05, "loss": 0.786, "step": 418 }, { "epoch": 0.13, "learning_rate": 9.754978923243655e-05, "loss": 0.8505, "step": 419 }, { "epoch": 0.13, "learning_rate": 9.753457879086635e-05, "loss": 0.7588, "step": 420 }, { "epoch": 0.13, "learning_rate": 9.751932247679276e-05, "loss": 0.7857, "step": 421 }, { "epoch": 0.13, "learning_rate": 9.750402030493863e-05, "loss": 0.8137, "step": 422 }, { "epoch": 0.13, "learning_rate": 9.748867229007108e-05, "loss": 0.7931, "step": 423 }, { "epoch": 0.13, "learning_rate": 9.747327844700147e-05, "loss": 0.811, "step": 424 }, { "epoch": 0.13, "learning_rate": 9.745783879058541e-05, "loss": 0.8273, "step": 425 }, { "epoch": 0.13, "learning_rate": 9.744235333572268e-05, "loss": 0.8292, "step": 426 }, { "epoch": 0.13, "learning_rate": 9.742682209735727e-05, "loss": 0.8166, "step": 427 }, { "epoch": 0.13, "learning_rate": 9.741124509047739e-05, "loss": 0.8142, "step": 428 }, { "epoch": 0.13, "learning_rate": 9.739562233011536e-05, "loss": 0.8379, "step": 429 }, { "epoch": 0.13, "learning_rate": 9.737995383134769e-05, "loss": 0.7926, "step": 430 }, { "epoch": 0.13, "learning_rate": 9.736423960929502e-05, "loss": 0.787, "step": 431 }, { "epoch": 0.13, "learning_rate": 9.734847967912211e-05, "loss": 0.8295, "step": 432 }, { "epoch": 0.13, "learning_rate": 9.733267405603784e-05, "loss": 0.7942, "step": 433 }, { "epoch": 0.13, "learning_rate": 9.731682275529518e-05, "loss": 0.8645, "step": 434 }, { "epoch": 0.13, "learning_rate": 9.730092579219119e-05, "loss": 0.8234, "step": 435 }, { "epoch": 0.13, "learning_rate": 9.728498318206696e-05, "loss": 0.7758, "step": 436 }, { "epoch": 0.13, "learning_rate": 9.726899494030768e-05, "loss": 0.832, "step": 437 }, { "epoch": 0.13, "learning_rate": 9.725296108234254e-05, "loss": 0.7695, "step": 438 }, { "epoch": 0.13, "learning_rate": 9.723688162364478e-05, "loss": 0.8174, "step": 439 }, { "epoch": 0.13, "learning_rate": 9.72207565797316e-05, "loss": 0.8599, "step": 440 }, { "epoch": 0.13, "learning_rate": 9.720458596616426e-05, "loss": 0.8357, "step": 441 }, { "epoch": 0.13, "learning_rate": 9.718836979854794e-05, "loss": 0.7929, "step": 442 }, { "epoch": 0.13, "learning_rate": 9.717210809253179e-05, "loss": 0.814, "step": 443 }, { "epoch": 0.13, "learning_rate": 9.715580086380893e-05, "loss": 0.8126, "step": 444 }, { "epoch": 0.13, "learning_rate": 9.71394481281164e-05, "loss": 0.8015, "step": 445 }, { "epoch": 0.14, "learning_rate": 9.712304990123513e-05, "loss": 0.7913, "step": 446 }, { "epoch": 0.14, "learning_rate": 9.710660619899e-05, "loss": 0.8303, "step": 447 }, { "epoch": 0.14, "learning_rate": 9.709011703724973e-05, "loss": 0.822, "step": 448 }, { "epoch": 0.14, "learning_rate": 9.707358243192694e-05, "loss": 0.8148, "step": 449 }, { "epoch": 0.14, "learning_rate": 9.705700239897809e-05, "loss": 0.7697, "step": 450 }, { "epoch": 0.14, "learning_rate": 9.704037695440346e-05, "loss": 0.8205, "step": 451 }, { "epoch": 0.14, "learning_rate": 9.702370611424721e-05, "loss": 0.7867, "step": 452 }, { "epoch": 0.14, "learning_rate": 9.700698989459727e-05, "loss": 0.8471, "step": 453 }, { "epoch": 0.14, "learning_rate": 9.699022831158533e-05, "loss": 0.7667, "step": 454 }, { "epoch": 0.14, "learning_rate": 9.697342138138695e-05, "loss": 0.7735, "step": 455 }, { "epoch": 0.14, "learning_rate": 9.695656912022133e-05, "loss": 0.7845, "step": 456 }, { "epoch": 0.14, "learning_rate": 9.693967154435154e-05, "loss": 0.8263, "step": 457 }, { "epoch": 0.14, "learning_rate": 9.692272867008429e-05, "loss": 0.7872, "step": 458 }, { "epoch": 0.14, "learning_rate": 9.690574051377006e-05, "loss": 0.8182, "step": 459 }, { "epoch": 0.14, "learning_rate": 9.688870709180298e-05, "loss": 0.8639, "step": 460 }, { "epoch": 0.14, "learning_rate": 9.68716284206209e-05, "loss": 0.8861, "step": 461 }, { "epoch": 0.14, "learning_rate": 9.685450451670531e-05, "loss": 0.9063, "step": 462 }, { "epoch": 0.14, "learning_rate": 9.683733539658139e-05, "loss": 0.7893, "step": 463 }, { "epoch": 0.14, "learning_rate": 9.682012107681792e-05, "loss": 0.8024, "step": 464 }, { "epoch": 0.14, "learning_rate": 9.680286157402733e-05, "loss": 0.775, "step": 465 }, { "epoch": 0.14, "learning_rate": 9.67855569048656e-05, "loss": 0.8473, "step": 466 }, { "epoch": 0.14, "learning_rate": 9.676820708603234e-05, "loss": 0.788, "step": 467 }, { "epoch": 0.14, "learning_rate": 9.675081213427076e-05, "loss": 0.8134, "step": 468 }, { "epoch": 0.14, "learning_rate": 9.673337206636753e-05, "loss": 0.733, "step": 469 }, { "epoch": 0.14, "learning_rate": 9.671588689915293e-05, "loss": 0.8235, "step": 470 }, { "epoch": 0.14, "learning_rate": 9.669835664950077e-05, "loss": 0.7862, "step": 471 }, { "epoch": 0.14, "learning_rate": 9.668078133432834e-05, "loss": 0.8668, "step": 472 }, { "epoch": 0.14, "learning_rate": 9.66631609705964e-05, "loss": 0.7417, "step": 473 }, { "epoch": 0.14, "learning_rate": 9.664549557530924e-05, "loss": 0.81, "step": 474 }, { "epoch": 0.14, "learning_rate": 9.662778516551455e-05, "loss": 0.793, "step": 475 }, { "epoch": 0.14, "learning_rate": 9.661002975830349e-05, "loss": 0.8067, "step": 476 }, { "epoch": 0.14, "learning_rate": 9.659222937081065e-05, "loss": 0.849, "step": 477 }, { "epoch": 0.14, "learning_rate": 9.6574384020214e-05, "loss": 0.8304, "step": 478 }, { "epoch": 0.15, "learning_rate": 9.655649372373491e-05, "loss": 0.8202, "step": 479 }, { "epoch": 0.15, "learning_rate": 9.653855849863815e-05, "loss": 0.797, "step": 480 }, { "epoch": 0.15, "learning_rate": 9.652057836223182e-05, "loss": 0.8395, "step": 481 }, { "epoch": 0.15, "learning_rate": 9.650255333186739e-05, "loss": 0.8004, "step": 482 }, { "epoch": 0.15, "learning_rate": 9.648448342493963e-05, "loss": 0.804, "step": 483 }, { "epoch": 0.15, "learning_rate": 9.646636865888659e-05, "loss": 0.7811, "step": 484 }, { "epoch": 0.15, "learning_rate": 9.644820905118966e-05, "loss": 0.7926, "step": 485 }, { "epoch": 0.15, "learning_rate": 9.643000461937348e-05, "loss": 0.8227, "step": 486 }, { "epoch": 0.15, "learning_rate": 9.641175538100598e-05, "loss": 0.8404, "step": 487 }, { "epoch": 0.15, "learning_rate": 9.639346135369827e-05, "loss": 0.7609, "step": 488 }, { "epoch": 0.15, "learning_rate": 9.637512255510475e-05, "loss": 0.8462, "step": 489 }, { "epoch": 0.15, "learning_rate": 9.635673900292295e-05, "loss": 0.7827, "step": 490 }, { "epoch": 0.15, "learning_rate": 9.633831071489366e-05, "loss": 0.8072, "step": 491 }, { "epoch": 0.15, "learning_rate": 9.631983770880079e-05, "loss": 0.8394, "step": 492 }, { "epoch": 0.15, "learning_rate": 9.630132000247145e-05, "loss": 0.7893, "step": 493 }, { "epoch": 0.15, "learning_rate": 9.628275761377584e-05, "loss": 0.7784, "step": 494 }, { "epoch": 0.15, "learning_rate": 9.626415056062732e-05, "loss": 0.8091, "step": 495 }, { "epoch": 0.15, "learning_rate": 9.624549886098235e-05, "loss": 0.8208, "step": 496 }, { "epoch": 0.15, "learning_rate": 9.622680253284042e-05, "loss": 0.7964, "step": 497 }, { "epoch": 0.15, "learning_rate": 9.620806159424416e-05, "loss": 0.8144, "step": 498 }, { "epoch": 0.15, "learning_rate": 9.618927606327922e-05, "loss": 0.8335, "step": 499 }, { "epoch": 0.15, "learning_rate": 9.617044595807427e-05, "loss": 0.813, "step": 500 }, { "epoch": 0.15, "learning_rate": 9.615157129680103e-05, "loss": 0.8256, "step": 501 }, { "epoch": 0.15, "learning_rate": 9.613265209767417e-05, "loss": 0.8098, "step": 502 }, { "epoch": 0.15, "learning_rate": 9.611368837895138e-05, "loss": 0.7957, "step": 503 }, { "epoch": 0.15, "learning_rate": 9.60946801589333e-05, "loss": 0.76, "step": 504 }, { "epoch": 0.15, "learning_rate": 9.607562745596352e-05, "loss": 0.8081, "step": 505 }, { "epoch": 0.15, "learning_rate": 9.605653028842856e-05, "loss": 0.7616, "step": 506 }, { "epoch": 0.15, "learning_rate": 9.603738867475783e-05, "loss": 0.8285, "step": 507 }, { "epoch": 0.15, "learning_rate": 9.601820263342365e-05, "loss": 0.8447, "step": 508 }, { "epoch": 0.15, "learning_rate": 9.599897218294122e-05, "loss": 0.7539, "step": 509 }, { "epoch": 0.15, "learning_rate": 9.597969734186856e-05, "loss": 0.8244, "step": 510 }, { "epoch": 0.15, "learning_rate": 9.596037812880658e-05, "loss": 0.7804, "step": 511 }, { "epoch": 0.16, "learning_rate": 9.594101456239898e-05, "loss": 0.833, "step": 512 }, { "epoch": 0.16, "learning_rate": 9.592160666133226e-05, "loss": 0.8357, "step": 513 }, { "epoch": 0.16, "learning_rate": 9.590215444433573e-05, "loss": 0.7717, "step": 514 }, { "epoch": 0.16, "learning_rate": 9.58826579301814e-05, "loss": 0.7954, "step": 515 }, { "epoch": 0.16, "learning_rate": 9.586311713768413e-05, "loss": 0.7219, "step": 516 }, { "epoch": 0.16, "learning_rate": 9.584353208570145e-05, "loss": 0.7928, "step": 517 }, { "epoch": 0.16, "learning_rate": 9.582390279313358e-05, "loss": 0.7794, "step": 518 }, { "epoch": 0.16, "learning_rate": 9.580422927892348e-05, "loss": 0.714, "step": 519 }, { "epoch": 0.16, "learning_rate": 9.578451156205677e-05, "loss": 0.8217, "step": 520 }, { "epoch": 0.16, "learning_rate": 9.576474966156172e-05, "loss": 0.8005, "step": 521 }, { "epoch": 0.16, "learning_rate": 9.574494359650925e-05, "loss": 0.7574, "step": 522 }, { "epoch": 0.16, "learning_rate": 9.57250933860129e-05, "loss": 0.775, "step": 523 }, { "epoch": 0.16, "learning_rate": 9.570519904922877e-05, "loss": 0.7875, "step": 524 }, { "epoch": 0.16, "learning_rate": 9.568526060535562e-05, "loss": 0.7957, "step": 525 }, { "epoch": 0.16, "learning_rate": 9.566527807363473e-05, "loss": 0.8152, "step": 526 }, { "epoch": 0.16, "learning_rate": 9.56452514733499e-05, "loss": 0.7544, "step": 527 }, { "epoch": 0.16, "learning_rate": 9.56251808238275e-05, "loss": 0.8074, "step": 528 }, { "epoch": 0.16, "learning_rate": 9.560506614443642e-05, "loss": 0.8095, "step": 529 }, { "epoch": 0.16, "learning_rate": 9.558490745458799e-05, "loss": 0.7528, "step": 530 }, { "epoch": 0.16, "learning_rate": 9.556470477373607e-05, "loss": 0.8464, "step": 531 }, { "epoch": 0.16, "learning_rate": 9.554445812137691e-05, "loss": 0.8216, "step": 532 }, { "epoch": 0.16, "learning_rate": 9.552416751704924e-05, "loss": 0.7816, "step": 533 }, { "epoch": 0.16, "learning_rate": 9.550383298033419e-05, "loss": 0.7983, "step": 534 }, { "epoch": 0.16, "learning_rate": 9.548345453085528e-05, "loss": 0.803, "step": 535 }, { "epoch": 0.16, "learning_rate": 9.546303218827843e-05, "loss": 0.7938, "step": 536 }, { "epoch": 0.16, "learning_rate": 9.544256597231189e-05, "loss": 0.789, "step": 537 }, { "epoch": 0.16, "learning_rate": 9.542205590270626e-05, "loss": 0.7721, "step": 538 }, { "epoch": 0.16, "learning_rate": 9.540150199925448e-05, "loss": 0.7842, "step": 539 }, { "epoch": 0.16, "learning_rate": 9.538090428179177e-05, "loss": 0.8263, "step": 540 }, { "epoch": 0.16, "learning_rate": 9.536026277019561e-05, "loss": 0.796, "step": 541 }, { "epoch": 0.16, "learning_rate": 9.53395774843858e-05, "loss": 0.8364, "step": 542 }, { "epoch": 0.16, "learning_rate": 9.531884844432433e-05, "loss": 0.8681, "step": 543 }, { "epoch": 0.16, "learning_rate": 9.529807567001544e-05, "loss": 0.8174, "step": 544 }, { "epoch": 0.17, "learning_rate": 9.527725918150558e-05, "loss": 0.781, "step": 545 }, { "epoch": 0.17, "learning_rate": 9.52563989988834e-05, "loss": 0.756, "step": 546 }, { "epoch": 0.17, "learning_rate": 9.523549514227965e-05, "loss": 0.7461, "step": 547 }, { "epoch": 0.17, "learning_rate": 9.521454763186729e-05, "loss": 0.8017, "step": 548 }, { "epoch": 0.17, "learning_rate": 9.519355648786139e-05, "loss": 0.827, "step": 549 }, { "epoch": 0.17, "learning_rate": 9.517252173051911e-05, "loss": 0.76, "step": 550 }, { "epoch": 0.17, "learning_rate": 9.515144338013974e-05, "loss": 0.7764, "step": 551 }, { "epoch": 0.17, "learning_rate": 9.513032145706461e-05, "loss": 0.7756, "step": 552 }, { "epoch": 0.17, "learning_rate": 9.510915598167709e-05, "loss": 0.7873, "step": 553 }, { "epoch": 0.17, "learning_rate": 9.508794697440257e-05, "loss": 0.7737, "step": 554 }, { "epoch": 0.17, "learning_rate": 9.506669445570853e-05, "loss": 0.7966, "step": 555 }, { "epoch": 0.17, "learning_rate": 9.504539844610431e-05, "loss": 0.8085, "step": 556 }, { "epoch": 0.17, "learning_rate": 9.502405896614137e-05, "loss": 0.7768, "step": 557 }, { "epoch": 0.17, "learning_rate": 9.500267603641298e-05, "loss": 0.8034, "step": 558 }, { "epoch": 0.17, "learning_rate": 9.498124967755442e-05, "loss": 0.8102, "step": 559 }, { "epoch": 0.17, "learning_rate": 9.495977991024287e-05, "loss": 0.8266, "step": 560 }, { "epoch": 0.17, "learning_rate": 9.493826675519739e-05, "loss": 0.7868, "step": 561 }, { "epoch": 0.17, "learning_rate": 9.491671023317893e-05, "loss": 0.781, "step": 562 }, { "epoch": 0.17, "learning_rate": 9.489511036499025e-05, "loss": 0.7741, "step": 563 }, { "epoch": 0.17, "learning_rate": 9.487346717147598e-05, "loss": 0.8392, "step": 564 }, { "epoch": 0.17, "learning_rate": 9.485178067352253e-05, "loss": 0.8576, "step": 565 }, { "epoch": 0.17, "learning_rate": 9.483005089205814e-05, "loss": 0.8266, "step": 566 }, { "epoch": 0.17, "learning_rate": 9.480827784805278e-05, "loss": 0.7591, "step": 567 }, { "epoch": 0.17, "learning_rate": 9.47864615625182e-05, "loss": 0.8255, "step": 568 }, { "epoch": 0.17, "learning_rate": 9.476460205650785e-05, "loss": 0.869, "step": 569 }, { "epoch": 0.17, "learning_rate": 9.474269935111693e-05, "loss": 0.8244, "step": 570 }, { "epoch": 0.17, "learning_rate": 9.472075346748226e-05, "loss": 0.8356, "step": 571 }, { "epoch": 0.17, "learning_rate": 9.46987644267824e-05, "loss": 0.7821, "step": 572 }, { "epoch": 0.17, "learning_rate": 9.467673225023755e-05, "loss": 0.764, "step": 573 }, { "epoch": 0.17, "learning_rate": 9.465465695910949e-05, "loss": 0.7927, "step": 574 }, { "epoch": 0.17, "learning_rate": 9.463253857470164e-05, "loss": 0.8544, "step": 575 }, { "epoch": 0.17, "learning_rate": 9.4610377118359e-05, "loss": 0.7739, "step": 576 }, { "epoch": 0.18, "learning_rate": 9.458817261146817e-05, "loss": 0.7898, "step": 577 }, { "epoch": 0.18, "learning_rate": 9.456592507545721e-05, "loss": 0.7947, "step": 578 }, { "epoch": 0.18, "learning_rate": 9.454363453179578e-05, "loss": 0.8427, "step": 579 }, { "epoch": 0.18, "learning_rate": 9.452130100199503e-05, "loss": 0.8136, "step": 580 }, { "epoch": 0.18, "learning_rate": 9.449892450760758e-05, "loss": 0.7874, "step": 581 }, { "epoch": 0.18, "learning_rate": 9.447650507022751e-05, "loss": 0.7904, "step": 582 }, { "epoch": 0.18, "learning_rate": 9.445404271149036e-05, "loss": 0.7292, "step": 583 }, { "epoch": 0.18, "learning_rate": 9.443153745307307e-05, "loss": 0.7534, "step": 584 }, { "epoch": 0.18, "learning_rate": 9.440898931669399e-05, "loss": 0.7257, "step": 585 }, { "epoch": 0.18, "learning_rate": 9.438639832411284e-05, "loss": 0.8543, "step": 586 }, { "epoch": 0.18, "learning_rate": 9.436376449713073e-05, "loss": 0.7718, "step": 587 }, { "epoch": 0.18, "learning_rate": 9.434108785759006e-05, "loss": 0.8165, "step": 588 }, { "epoch": 0.18, "learning_rate": 9.431836842737456e-05, "loss": 0.7546, "step": 589 }, { "epoch": 0.18, "learning_rate": 9.429560622840927e-05, "loss": 0.7939, "step": 590 }, { "epoch": 0.18, "learning_rate": 9.42728012826605e-05, "loss": 0.805, "step": 591 }, { "epoch": 0.18, "learning_rate": 9.42499536121358e-05, "loss": 0.8023, "step": 592 }, { "epoch": 0.18, "learning_rate": 9.422706323888397e-05, "loss": 0.7785, "step": 593 }, { "epoch": 0.18, "learning_rate": 9.420413018499501e-05, "loss": 0.7923, "step": 594 }, { "epoch": 0.18, "learning_rate": 9.418115447260007e-05, "loss": 0.8118, "step": 595 }, { "epoch": 0.18, "learning_rate": 9.415813612387155e-05, "loss": 0.817, "step": 596 }, { "epoch": 0.18, "learning_rate": 9.41350751610229e-05, "loss": 0.7503, "step": 597 }, { "epoch": 0.18, "learning_rate": 9.41119716063088e-05, "loss": 0.8003, "step": 598 }, { "epoch": 0.18, "learning_rate": 9.408882548202494e-05, "loss": 0.786, "step": 599 }, { "epoch": 0.18, "learning_rate": 9.406563681050811e-05, "loss": 0.7764, "step": 600 }, { "epoch": 0.18, "learning_rate": 9.40424056141362e-05, "loss": 0.838, "step": 601 }, { "epoch": 0.18, "learning_rate": 9.401913191532812e-05, "loss": 0.8095, "step": 602 }, { "epoch": 0.18, "learning_rate": 9.399581573654375e-05, "loss": 0.7854, "step": 603 }, { "epoch": 0.18, "learning_rate": 9.397245710028406e-05, "loss": 0.7896, "step": 604 }, { "epoch": 0.18, "learning_rate": 9.39490560290909e-05, "loss": 0.8408, "step": 605 }, { "epoch": 0.18, "learning_rate": 9.392561254554713e-05, "loss": 0.8373, "step": 606 }, { "epoch": 0.18, "learning_rate": 9.390212667227649e-05, "loss": 0.8207, "step": 607 }, { "epoch": 0.18, "learning_rate": 9.387859843194369e-05, "loss": 0.738, "step": 608 }, { "epoch": 0.18, "learning_rate": 9.385502784725425e-05, "loss": 0.8365, "step": 609 }, { "epoch": 0.19, "learning_rate": 9.383141494095463e-05, "loss": 0.8155, "step": 610 }, { "epoch": 0.19, "learning_rate": 9.380775973583208e-05, "loss": 0.7783, "step": 611 }, { "epoch": 0.19, "learning_rate": 9.37840622547147e-05, "loss": 0.8031, "step": 612 }, { "epoch": 0.19, "learning_rate": 9.376032252047136e-05, "loss": 0.7761, "step": 613 }, { "epoch": 0.19, "learning_rate": 9.373654055601173e-05, "loss": 0.7578, "step": 614 }, { "epoch": 0.19, "learning_rate": 9.37127163842862e-05, "loss": 0.7997, "step": 615 }, { "epoch": 0.19, "learning_rate": 9.368885002828596e-05, "loss": 0.8301, "step": 616 }, { "epoch": 0.19, "learning_rate": 9.366494151104284e-05, "loss": 0.8342, "step": 617 }, { "epoch": 0.19, "learning_rate": 9.364099085562936e-05, "loss": 0.7615, "step": 618 }, { "epoch": 0.19, "learning_rate": 9.361699808515876e-05, "loss": 0.7676, "step": 619 }, { "epoch": 0.19, "learning_rate": 9.359296322278485e-05, "loss": 0.779, "step": 620 }, { "epoch": 0.19, "learning_rate": 9.356888629170215e-05, "loss": 0.7913, "step": 621 }, { "epoch": 0.19, "learning_rate": 9.354476731514569e-05, "loss": 0.7762, "step": 622 }, { "epoch": 0.19, "learning_rate": 9.352060631639114e-05, "loss": 0.7518, "step": 623 }, { "epoch": 0.19, "learning_rate": 9.349640331875467e-05, "loss": 0.7616, "step": 624 }, { "epoch": 0.19, "learning_rate": 9.3472158345593e-05, "loss": 0.7669, "step": 625 }, { "epoch": 0.19, "learning_rate": 9.344787142030338e-05, "loss": 0.8012, "step": 626 }, { "epoch": 0.19, "learning_rate": 9.342354256632352e-05, "loss": 0.8343, "step": 627 }, { "epoch": 0.19, "learning_rate": 9.33991718071316e-05, "loss": 0.8018, "step": 628 }, { "epoch": 0.19, "learning_rate": 9.337475916624626e-05, "loss": 0.8081, "step": 629 }, { "epoch": 0.19, "learning_rate": 9.335030466722651e-05, "loss": 0.8386, "step": 630 }, { "epoch": 0.19, "learning_rate": 9.33258083336718e-05, "loss": 0.8052, "step": 631 }, { "epoch": 0.19, "learning_rate": 9.330127018922194e-05, "loss": 0.778, "step": 632 }, { "epoch": 0.19, "learning_rate": 9.327669025755706e-05, "loss": 0.7628, "step": 633 }, { "epoch": 0.19, "learning_rate": 9.325206856239767e-05, "loss": 0.774, "step": 634 }, { "epoch": 0.19, "learning_rate": 9.322740512750452e-05, "loss": 0.7823, "step": 635 }, { "epoch": 0.19, "learning_rate": 9.320269997667869e-05, "loss": 0.781, "step": 636 }, { "epoch": 0.19, "learning_rate": 9.31779531337615e-05, "loss": 0.7532, "step": 637 }, { "epoch": 0.19, "learning_rate": 9.31531646226345e-05, "loss": 0.8052, "step": 638 }, { "epoch": 0.19, "learning_rate": 9.312833446721947e-05, "loss": 0.7474, "step": 639 }, { "epoch": 0.19, "learning_rate": 9.310346269147833e-05, "loss": 0.8268, "step": 640 }, { "epoch": 0.19, "learning_rate": 9.307854931941325e-05, "loss": 0.812, "step": 641 }, { "epoch": 0.19, "learning_rate": 9.305359437506645e-05, "loss": 0.8046, "step": 642 }, { "epoch": 0.2, "learning_rate": 9.302859788252033e-05, "loss": 0.7873, "step": 643 }, { "epoch": 0.2, "learning_rate": 9.300355986589735e-05, "loss": 0.8082, "step": 644 }, { "epoch": 0.2, "learning_rate": 9.297848034936006e-05, "loss": 0.8341, "step": 645 }, { "epoch": 0.2, "learning_rate": 9.295335935711107e-05, "loss": 0.8036, "step": 646 }, { "epoch": 0.2, "learning_rate": 9.292819691339298e-05, "loss": 0.7595, "step": 647 }, { "epoch": 0.2, "learning_rate": 9.290299304248844e-05, "loss": 0.7731, "step": 648 }, { "epoch": 0.2, "learning_rate": 9.287774776872003e-05, "loss": 0.8299, "step": 649 }, { "epoch": 0.2, "learning_rate": 9.285246111645032e-05, "loss": 0.819, "step": 650 }, { "epoch": 0.2, "learning_rate": 9.282713311008179e-05, "loss": 0.8346, "step": 651 }, { "epoch": 0.2, "learning_rate": 9.280176377405685e-05, "loss": 0.7809, "step": 652 }, { "epoch": 0.2, "learning_rate": 9.277635313285777e-05, "loss": 0.7709, "step": 653 }, { "epoch": 0.2, "learning_rate": 9.275090121100669e-05, "loss": 0.7666, "step": 654 }, { "epoch": 0.2, "learning_rate": 9.272540803306562e-05, "loss": 0.7593, "step": 655 }, { "epoch": 0.2, "learning_rate": 9.26998736236363e-05, "loss": 0.7752, "step": 656 }, { "epoch": 0.2, "learning_rate": 9.267429800736037e-05, "loss": 0.8072, "step": 657 }, { "epoch": 0.2, "learning_rate": 9.264868120891912e-05, "loss": 0.7584, "step": 658 }, { "epoch": 0.2, "learning_rate": 9.262302325303369e-05, "loss": 0.8471, "step": 659 }, { "epoch": 0.2, "learning_rate": 9.259732416446489e-05, "loss": 0.8202, "step": 660 }, { "epoch": 0.2, "learning_rate": 9.257158396801319e-05, "loss": 0.744, "step": 661 }, { "epoch": 0.2, "learning_rate": 9.254580268851878e-05, "loss": 0.8121, "step": 662 }, { "epoch": 0.2, "learning_rate": 9.25199803508615e-05, "loss": 0.8145, "step": 663 }, { "epoch": 0.2, "learning_rate": 9.249411697996078e-05, "loss": 0.7514, "step": 664 }, { "epoch": 0.2, "learning_rate": 9.246821260077564e-05, "loss": 0.7883, "step": 665 }, { "epoch": 0.2, "learning_rate": 9.244226723830473e-05, "loss": 0.7791, "step": 666 }, { "epoch": 0.2, "learning_rate": 9.241628091758621e-05, "loss": 0.7899, "step": 667 }, { "epoch": 0.2, "learning_rate": 9.239025366369775e-05, "loss": 0.794, "step": 668 }, { "epoch": 0.2, "learning_rate": 9.236418550175659e-05, "loss": 0.8575, "step": 669 }, { "epoch": 0.2, "learning_rate": 9.233807645691936e-05, "loss": 0.7692, "step": 670 }, { "epoch": 0.2, "learning_rate": 9.231192655438221e-05, "loss": 0.721, "step": 671 }, { "epoch": 0.2, "learning_rate": 9.228573581938067e-05, "loss": 0.728, "step": 672 }, { "epoch": 0.2, "learning_rate": 9.225950427718975e-05, "loss": 0.7699, "step": 673 }, { "epoch": 0.2, "learning_rate": 9.223323195312374e-05, "loss": 0.7734, "step": 674 }, { "epoch": 0.2, "learning_rate": 9.220691887253635e-05, "loss": 0.7817, "step": 675 }, { "epoch": 0.21, "learning_rate": 9.21805650608206e-05, "loss": 0.836, "step": 676 }, { "epoch": 0.21, "learning_rate": 9.215417054340888e-05, "loss": 0.8002, "step": 677 }, { "epoch": 0.21, "learning_rate": 9.212773534577272e-05, "loss": 0.8112, "step": 678 }, { "epoch": 0.21, "learning_rate": 9.210125949342306e-05, "loss": 0.819, "step": 679 }, { "epoch": 0.21, "learning_rate": 9.207474301190999e-05, "loss": 0.7735, "step": 680 }, { "epoch": 0.21, "learning_rate": 9.204818592682282e-05, "loss": 0.7893, "step": 681 }, { "epoch": 0.21, "learning_rate": 9.202158826379005e-05, "loss": 0.7598, "step": 682 }, { "epoch": 0.21, "learning_rate": 9.199495004847935e-05, "loss": 0.7901, "step": 683 }, { "epoch": 0.21, "learning_rate": 9.19682713065975e-05, "loss": 0.8066, "step": 684 }, { "epoch": 0.21, "learning_rate": 9.194155206389042e-05, "loss": 0.8423, "step": 685 }, { "epoch": 0.21, "learning_rate": 9.191479234614308e-05, "loss": 0.7715, "step": 686 }, { "epoch": 0.21, "learning_rate": 9.188799217917955e-05, "loss": 0.7957, "step": 687 }, { "epoch": 0.21, "learning_rate": 9.186115158886289e-05, "loss": 0.779, "step": 688 }, { "epoch": 0.21, "learning_rate": 9.183427060109522e-05, "loss": 0.8037, "step": 689 }, { "epoch": 0.21, "learning_rate": 9.180734924181758e-05, "loss": 0.8032, "step": 690 }, { "epoch": 0.21, "learning_rate": 9.178038753701004e-05, "loss": 0.8421, "step": 691 }, { "epoch": 0.21, "learning_rate": 9.175338551269155e-05, "loss": 0.861, "step": 692 }, { "epoch": 0.21, "learning_rate": 9.172634319492002e-05, "loss": 0.7872, "step": 693 }, { "epoch": 0.21, "learning_rate": 9.169926060979219e-05, "loss": 0.8411, "step": 694 }, { "epoch": 0.21, "learning_rate": 9.16721377834437e-05, "loss": 0.8107, "step": 695 }, { "epoch": 0.21, "learning_rate": 9.164497474204899e-05, "loss": 0.8022, "step": 696 }, { "epoch": 0.21, "learning_rate": 9.161777151182136e-05, "loss": 0.7906, "step": 697 }, { "epoch": 0.21, "learning_rate": 9.159052811901286e-05, "loss": 0.811, "step": 698 }, { "epoch": 0.21, "learning_rate": 9.156324458991427e-05, "loss": 0.7425, "step": 699 }, { "epoch": 0.21, "learning_rate": 9.153592095085517e-05, "loss": 0.7856, "step": 700 }, { "epoch": 0.21, "learning_rate": 9.150855722820377e-05, "loss": 0.7915, "step": 701 }, { "epoch": 0.21, "learning_rate": 9.148115344836705e-05, "loss": 0.8064, "step": 702 }, { "epoch": 0.21, "learning_rate": 9.145370963779057e-05, "loss": 0.832, "step": 703 }, { "epoch": 0.21, "learning_rate": 9.142622582295856e-05, "loss": 0.7789, "step": 704 }, { "epoch": 0.21, "learning_rate": 9.139870203039384e-05, "loss": 0.7977, "step": 705 }, { "epoch": 0.21, "learning_rate": 9.137113828665783e-05, "loss": 0.7872, "step": 706 }, { "epoch": 0.21, "learning_rate": 9.134353461835048e-05, "loss": 0.7962, "step": 707 }, { "epoch": 0.21, "learning_rate": 9.131589105211029e-05, "loss": 0.7988, "step": 708 }, { "epoch": 0.22, "learning_rate": 9.128820761461423e-05, "loss": 0.7949, "step": 709 }, { "epoch": 0.22, "learning_rate": 9.126048433257779e-05, "loss": 0.7514, "step": 710 }, { "epoch": 0.22, "learning_rate": 9.12327212327549e-05, "loss": 0.8216, "step": 711 }, { "epoch": 0.22, "learning_rate": 9.120491834193787e-05, "loss": 0.7108, "step": 712 }, { "epoch": 0.22, "learning_rate": 9.117707568695749e-05, "loss": 0.7858, "step": 713 }, { "epoch": 0.22, "learning_rate": 9.114919329468282e-05, "loss": 0.7574, "step": 714 }, { "epoch": 0.22, "learning_rate": 9.112127119202138e-05, "loss": 0.7344, "step": 715 }, { "epoch": 0.22, "learning_rate": 9.109330940591895e-05, "loss": 0.8208, "step": 716 }, { "epoch": 0.22, "learning_rate": 9.106530796335962e-05, "loss": 0.7271, "step": 717 }, { "epoch": 0.22, "learning_rate": 9.103726689136571e-05, "loss": 0.7608, "step": 718 }, { "epoch": 0.22, "learning_rate": 9.100918621699786e-05, "loss": 0.8096, "step": 719 }, { "epoch": 0.22, "learning_rate": 9.098106596735484e-05, "loss": 0.8107, "step": 720 }, { "epoch": 0.22, "learning_rate": 9.095290616957372e-05, "loss": 0.8264, "step": 721 }, { "epoch": 0.22, "learning_rate": 9.092470685082963e-05, "loss": 0.7889, "step": 722 }, { "epoch": 0.22, "learning_rate": 9.089646803833589e-05, "loss": 0.7991, "step": 723 }, { "epoch": 0.22, "learning_rate": 9.086818975934392e-05, "loss": 0.8078, "step": 724 }, { "epoch": 0.22, "learning_rate": 9.083987204114326e-05, "loss": 0.7966, "step": 725 }, { "epoch": 0.22, "learning_rate": 9.081151491106144e-05, "loss": 0.7337, "step": 726 }, { "epoch": 0.22, "learning_rate": 9.07831183964641e-05, "loss": 0.7079, "step": 727 }, { "epoch": 0.22, "learning_rate": 9.075468252475486e-05, "loss": 0.797, "step": 728 }, { "epoch": 0.22, "learning_rate": 9.072620732337526e-05, "loss": 0.783, "step": 729 }, { "epoch": 0.22, "learning_rate": 9.06976928198049e-05, "loss": 0.7765, "step": 730 }, { "epoch": 0.22, "learning_rate": 9.066913904156125e-05, "loss": 0.8062, "step": 731 }, { "epoch": 0.22, "learning_rate": 9.064054601619966e-05, "loss": 0.7892, "step": 732 }, { "epoch": 0.22, "learning_rate": 9.061191377131341e-05, "loss": 0.7938, "step": 733 }, { "epoch": 0.22, "learning_rate": 9.05832423345336e-05, "loss": 0.9122, "step": 734 }, { "epoch": 0.22, "learning_rate": 9.055453173352913e-05, "loss": 0.7596, "step": 735 }, { "epoch": 0.22, "learning_rate": 9.052578199600675e-05, "loss": 0.7501, "step": 736 }, { "epoch": 0.22, "learning_rate": 9.04969931497109e-05, "loss": 0.7765, "step": 737 }, { "epoch": 0.22, "learning_rate": 9.046816522242385e-05, "loss": 0.8002, "step": 738 }, { "epoch": 0.22, "learning_rate": 9.043929824196553e-05, "loss": 0.7867, "step": 739 }, { "epoch": 0.22, "learning_rate": 9.041039223619358e-05, "loss": 0.7618, "step": 740 }, { "epoch": 0.22, "learning_rate": 9.038144723300326e-05, "loss": 0.7879, "step": 741 }, { "epoch": 0.23, "learning_rate": 9.03524632603275e-05, "loss": 0.7226, "step": 742 }, { "epoch": 0.23, "learning_rate": 9.032344034613684e-05, "loss": 0.7644, "step": 743 }, { "epoch": 0.23, "learning_rate": 9.02943785184394e-05, "loss": 0.8251, "step": 744 }, { "epoch": 0.23, "learning_rate": 9.026527780528085e-05, "loss": 0.8507, "step": 745 }, { "epoch": 0.23, "learning_rate": 9.023613823474432e-05, "loss": 0.8242, "step": 746 }, { "epoch": 0.23, "learning_rate": 9.020695983495057e-05, "loss": 0.772, "step": 747 }, { "epoch": 0.23, "learning_rate": 9.017774263405771e-05, "loss": 0.7635, "step": 748 }, { "epoch": 0.23, "learning_rate": 9.014848666026138e-05, "loss": 0.8365, "step": 749 }, { "epoch": 0.23, "learning_rate": 9.011919194179458e-05, "loss": 0.7516, "step": 750 }, { "epoch": 0.23, "learning_rate": 9.008985850692772e-05, "loss": 0.7904, "step": 751 }, { "epoch": 0.23, "learning_rate": 9.006048638396858e-05, "loss": 0.7895, "step": 752 }, { "epoch": 0.23, "learning_rate": 9.003107560126226e-05, "loss": 0.7481, "step": 753 }, { "epoch": 0.23, "learning_rate": 9.000162618719119e-05, "loss": 0.778, "step": 754 }, { "epoch": 0.23, "learning_rate": 8.997213817017507e-05, "loss": 0.7557, "step": 755 }, { "epoch": 0.23, "learning_rate": 8.994261157867083e-05, "loss": 0.7607, "step": 756 }, { "epoch": 0.23, "learning_rate": 8.991304644117265e-05, "loss": 0.7673, "step": 757 }, { "epoch": 0.23, "learning_rate": 8.988344278621192e-05, "loss": 0.7309, "step": 758 }, { "epoch": 0.23, "learning_rate": 8.985380064235719e-05, "loss": 0.7733, "step": 759 }, { "epoch": 0.23, "learning_rate": 8.982412003821412e-05, "loss": 0.8118, "step": 760 }, { "epoch": 0.23, "learning_rate": 8.979440100242555e-05, "loss": 0.7748, "step": 761 }, { "epoch": 0.23, "learning_rate": 8.976464356367134e-05, "loss": 0.8216, "step": 762 }, { "epoch": 0.23, "learning_rate": 8.973484775066844e-05, "loss": 0.7978, "step": 763 }, { "epoch": 0.23, "learning_rate": 8.970501359217086e-05, "loss": 0.8272, "step": 764 }, { "epoch": 0.23, "learning_rate": 8.967514111696958e-05, "loss": 0.7373, "step": 765 }, { "epoch": 0.23, "learning_rate": 8.964523035389255e-05, "loss": 0.8176, "step": 766 }, { "epoch": 0.23, "learning_rate": 8.961528133180471e-05, "loss": 0.7803, "step": 767 }, { "epoch": 0.23, "learning_rate": 8.958529407960788e-05, "loss": 0.8807, "step": 768 }, { "epoch": 0.23, "learning_rate": 8.955526862624079e-05, "loss": 0.7478, "step": 769 }, { "epoch": 0.23, "learning_rate": 8.952520500067905e-05, "loss": 0.7342, "step": 770 }, { "epoch": 0.23, "learning_rate": 8.949510323193507e-05, "loss": 0.7255, "step": 771 }, { "epoch": 0.23, "learning_rate": 8.946496334905811e-05, "loss": 0.8448, "step": 772 }, { "epoch": 0.23, "learning_rate": 8.943478538113419e-05, "loss": 0.7624, "step": 773 }, { "epoch": 0.23, "learning_rate": 8.940456935728608e-05, "loss": 0.8022, "step": 774 }, { "epoch": 0.24, "learning_rate": 8.937431530667328e-05, "loss": 0.752, "step": 775 }, { "epoch": 0.24, "learning_rate": 8.9344023258492e-05, "loss": 0.8036, "step": 776 }, { "epoch": 0.24, "learning_rate": 8.931369324197511e-05, "loss": 0.7534, "step": 777 }, { "epoch": 0.24, "learning_rate": 8.928332528639212e-05, "loss": 0.7786, "step": 778 }, { "epoch": 0.24, "learning_rate": 8.925291942104915e-05, "loss": 0.7484, "step": 779 }, { "epoch": 0.24, "learning_rate": 8.922247567528888e-05, "loss": 0.7701, "step": 780 }, { "epoch": 0.24, "learning_rate": 8.919199407849059e-05, "loss": 0.7852, "step": 781 }, { "epoch": 0.24, "learning_rate": 8.916147466007009e-05, "loss": 0.8092, "step": 782 }, { "epoch": 0.24, "learning_rate": 8.913091744947965e-05, "loss": 0.7697, "step": 783 }, { "epoch": 0.24, "learning_rate": 8.9100322476208e-05, "loss": 0.7945, "step": 784 }, { "epoch": 0.24, "learning_rate": 8.906968976978041e-05, "loss": 0.7905, "step": 785 }, { "epoch": 0.24, "learning_rate": 8.903901935975845e-05, "loss": 0.7999, "step": 786 }, { "epoch": 0.24, "learning_rate": 8.900831127574011e-05, "loss": 0.8145, "step": 787 }, { "epoch": 0.24, "learning_rate": 8.897756554735977e-05, "loss": 0.7433, "step": 788 }, { "epoch": 0.24, "learning_rate": 8.89467822042881e-05, "loss": 0.7503, "step": 789 }, { "epoch": 0.24, "learning_rate": 8.89159612762321e-05, "loss": 0.7492, "step": 790 }, { "epoch": 0.24, "learning_rate": 8.888510279293503e-05, "loss": 0.7612, "step": 791 }, { "epoch": 0.24, "learning_rate": 8.885420678417637e-05, "loss": 0.8071, "step": 792 }, { "epoch": 0.24, "learning_rate": 8.882327327977185e-05, "loss": 0.7692, "step": 793 }, { "epoch": 0.24, "learning_rate": 8.879230230957334e-05, "loss": 0.8508, "step": 794 }, { "epoch": 0.24, "learning_rate": 8.876129390346892e-05, "loss": 0.811, "step": 795 }, { "epoch": 0.24, "learning_rate": 8.873024809138272e-05, "loss": 0.7929, "step": 796 }, { "epoch": 0.24, "learning_rate": 8.869916490327509e-05, "loss": 0.8009, "step": 797 }, { "epoch": 0.24, "learning_rate": 8.866804436914232e-05, "loss": 0.7721, "step": 798 }, { "epoch": 0.24, "learning_rate": 8.86368865190168e-05, "loss": 0.7667, "step": 799 }, { "epoch": 0.24, "learning_rate": 8.860569138296696e-05, "loss": 0.7897, "step": 800 }, { "epoch": 0.24, "learning_rate": 8.857445899109715e-05, "loss": 0.7218, "step": 801 }, { "epoch": 0.24, "learning_rate": 8.854318937354771e-05, "loss": 0.8015, "step": 802 }, { "epoch": 0.24, "learning_rate": 8.85118825604949e-05, "loss": 0.8021, "step": 803 }, { "epoch": 0.24, "learning_rate": 8.848053858215086e-05, "loss": 0.8146, "step": 804 }, { "epoch": 0.24, "learning_rate": 8.844915746876362e-05, "loss": 0.8232, "step": 805 }, { "epoch": 0.24, "learning_rate": 8.841773925061702e-05, "loss": 0.7741, "step": 806 }, { "epoch": 0.24, "learning_rate": 8.838628395803074e-05, "loss": 0.7441, "step": 807 }, { "epoch": 0.25, "learning_rate": 8.835479162136022e-05, "loss": 0.7712, "step": 808 }, { "epoch": 0.25, "learning_rate": 8.832326227099662e-05, "loss": 0.7828, "step": 809 }, { "epoch": 0.25, "learning_rate": 8.829169593736687e-05, "loss": 0.8007, "step": 810 }, { "epoch": 0.25, "learning_rate": 8.826009265093355e-05, "loss": 0.7761, "step": 811 }, { "epoch": 0.25, "learning_rate": 8.822845244219495e-05, "loss": 0.7913, "step": 812 }, { "epoch": 0.25, "learning_rate": 8.819677534168493e-05, "loss": 0.7578, "step": 813 }, { "epoch": 0.25, "learning_rate": 8.8165061379973e-05, "loss": 0.8385, "step": 814 }, { "epoch": 0.25, "learning_rate": 8.813331058766421e-05, "loss": 0.7884, "step": 815 }, { "epoch": 0.25, "learning_rate": 8.810152299539917e-05, "loss": 0.8459, "step": 816 }, { "epoch": 0.25, "learning_rate": 8.806969863385402e-05, "loss": 0.7599, "step": 817 }, { "epoch": 0.25, "learning_rate": 8.803783753374032e-05, "loss": 0.7552, "step": 818 }, { "epoch": 0.25, "learning_rate": 8.800593972580516e-05, "loss": 0.7739, "step": 819 }, { "epoch": 0.25, "learning_rate": 8.797400524083101e-05, "loss": 0.7955, "step": 820 }, { "epoch": 0.25, "learning_rate": 8.794203410963576e-05, "loss": 0.7679, "step": 821 }, { "epoch": 0.25, "learning_rate": 8.791002636307264e-05, "loss": 0.7901, "step": 822 }, { "epoch": 0.25, "learning_rate": 8.787798203203024e-05, "loss": 0.7644, "step": 823 }, { "epoch": 0.25, "learning_rate": 8.78459011474324e-05, "loss": 0.8375, "step": 824 }, { "epoch": 0.25, "learning_rate": 8.781378374023834e-05, "loss": 0.7739, "step": 825 }, { "epoch": 0.25, "learning_rate": 8.778162984144242e-05, "loss": 0.744, "step": 826 }, { "epoch": 0.25, "learning_rate": 8.774943948207426e-05, "loss": 0.7758, "step": 827 }, { "epoch": 0.25, "learning_rate": 8.771721269319868e-05, "loss": 0.8418, "step": 828 }, { "epoch": 0.25, "learning_rate": 8.768494950591562e-05, "loss": 0.836, "step": 829 }, { "epoch": 0.25, "learning_rate": 8.765264995136018e-05, "loss": 0.7909, "step": 830 }, { "epoch": 0.25, "learning_rate": 8.762031406070255e-05, "loss": 0.759, "step": 831 }, { "epoch": 0.25, "learning_rate": 8.758794186514795e-05, "loss": 0.7717, "step": 832 }, { "epoch": 0.25, "learning_rate": 8.755553339593667e-05, "loss": 0.7677, "step": 833 }, { "epoch": 0.25, "learning_rate": 8.752308868434403e-05, "loss": 0.762, "step": 834 }, { "epoch": 0.25, "learning_rate": 8.749060776168023e-05, "loss": 0.714, "step": 835 }, { "epoch": 0.25, "learning_rate": 8.745809065929054e-05, "loss": 0.766, "step": 836 }, { "epoch": 0.25, "learning_rate": 8.742553740855506e-05, "loss": 0.7802, "step": 837 }, { "epoch": 0.25, "learning_rate": 8.739294804088877e-05, "loss": 0.7617, "step": 838 }, { "epoch": 0.25, "learning_rate": 8.736032258774158e-05, "loss": 0.7373, "step": 839 }, { "epoch": 0.25, "learning_rate": 8.732766108059813e-05, "loss": 0.7271, "step": 840 }, { "epoch": 0.26, "learning_rate": 8.729496355097793e-05, "loss": 0.7311, "step": 841 }, { "epoch": 0.26, "learning_rate": 8.726223003043519e-05, "loss": 0.8357, "step": 842 }, { "epoch": 0.26, "learning_rate": 8.722946055055891e-05, "loss": 0.8024, "step": 843 }, { "epoch": 0.26, "learning_rate": 8.719665514297275e-05, "loss": 0.7859, "step": 844 }, { "epoch": 0.26, "learning_rate": 8.716381383933507e-05, "loss": 0.7863, "step": 845 }, { "epoch": 0.26, "learning_rate": 8.713093667133883e-05, "loss": 0.7379, "step": 846 }, { "epoch": 0.26, "learning_rate": 8.709802367071166e-05, "loss": 0.7259, "step": 847 }, { "epoch": 0.26, "learning_rate": 8.706507486921572e-05, "loss": 0.6927, "step": 848 }, { "epoch": 0.26, "learning_rate": 8.703209029864774e-05, "loss": 0.7944, "step": 849 }, { "epoch": 0.26, "learning_rate": 8.699906999083898e-05, "loss": 0.7317, "step": 850 }, { "epoch": 0.26, "learning_rate": 8.696601397765514e-05, "loss": 0.812, "step": 851 }, { "epoch": 0.26, "learning_rate": 8.693292229099644e-05, "loss": 0.868, "step": 852 }, { "epoch": 0.26, "learning_rate": 8.689979496279746e-05, "loss": 0.7675, "step": 853 }, { "epoch": 0.26, "learning_rate": 8.686663202502726e-05, "loss": 0.7982, "step": 854 }, { "epoch": 0.26, "learning_rate": 8.683343350968918e-05, "loss": 0.8133, "step": 855 }, { "epoch": 0.26, "learning_rate": 8.680019944882094e-05, "loss": 0.7675, "step": 856 }, { "epoch": 0.26, "learning_rate": 8.676692987449455e-05, "loss": 0.7486, "step": 857 }, { "epoch": 0.26, "learning_rate": 8.67336248188163e-05, "loss": 0.7951, "step": 858 }, { "epoch": 0.26, "learning_rate": 8.670028431392671e-05, "loss": 0.8034, "step": 859 }, { "epoch": 0.26, "learning_rate": 8.666690839200051e-05, "loss": 0.775, "step": 860 }, { "epoch": 0.26, "learning_rate": 8.663349708524662e-05, "loss": 0.801, "step": 861 }, { "epoch": 0.26, "learning_rate": 8.660005042590808e-05, "loss": 0.7961, "step": 862 }, { "epoch": 0.26, "learning_rate": 8.656656844626209e-05, "loss": 0.7701, "step": 863 }, { "epoch": 0.26, "learning_rate": 8.653305117861992e-05, "loss": 0.8147, "step": 864 }, { "epoch": 0.26, "learning_rate": 8.649949865532686e-05, "loss": 0.7661, "step": 865 }, { "epoch": 0.26, "learning_rate": 8.646591090876224e-05, "loss": 0.6985, "step": 866 }, { "epoch": 0.26, "learning_rate": 8.643228797133944e-05, "loss": 0.7918, "step": 867 }, { "epoch": 0.26, "learning_rate": 8.639862987550571e-05, "loss": 0.7646, "step": 868 }, { "epoch": 0.26, "learning_rate": 8.636493665374228e-05, "loss": 0.7557, "step": 869 }, { "epoch": 0.26, "learning_rate": 8.633120833856427e-05, "loss": 0.8162, "step": 870 }, { "epoch": 0.26, "learning_rate": 8.629744496252065e-05, "loss": 0.812, "step": 871 }, { "epoch": 0.26, "learning_rate": 8.626364655819426e-05, "loss": 0.7971, "step": 872 }, { "epoch": 0.26, "learning_rate": 8.622981315820171e-05, "loss": 0.77, "step": 873 }, { "epoch": 0.27, "learning_rate": 8.619594479519341e-05, "loss": 0.7548, "step": 874 }, { "epoch": 0.27, "learning_rate": 8.616204150185349e-05, "loss": 0.8093, "step": 875 }, { "epoch": 0.27, "learning_rate": 8.612810331089976e-05, "loss": 0.8281, "step": 876 }, { "epoch": 0.27, "learning_rate": 8.60941302550838e-05, "loss": 0.7888, "step": 877 }, { "epoch": 0.27, "learning_rate": 8.606012236719073e-05, "loss": 0.7574, "step": 878 }, { "epoch": 0.27, "learning_rate": 8.602607968003935e-05, "loss": 0.7566, "step": 879 }, { "epoch": 0.27, "learning_rate": 8.599200222648203e-05, "loss": 0.7825, "step": 880 }, { "epoch": 0.27, "learning_rate": 8.595789003940468e-05, "loss": 0.7867, "step": 881 }, { "epoch": 0.27, "learning_rate": 8.592374315172672e-05, "loss": 0.7591, "step": 882 }, { "epoch": 0.27, "learning_rate": 8.588956159640109e-05, "loss": 0.7643, "step": 883 }, { "epoch": 0.27, "learning_rate": 8.585534540641416e-05, "loss": 0.7763, "step": 884 }, { "epoch": 0.27, "learning_rate": 8.582109461478572e-05, "loss": 0.7443, "step": 885 }, { "epoch": 0.27, "learning_rate": 8.578680925456896e-05, "loss": 0.7508, "step": 886 }, { "epoch": 0.27, "learning_rate": 8.575248935885047e-05, "loss": 0.8003, "step": 887 }, { "epoch": 0.27, "learning_rate": 8.571813496075009e-05, "loss": 0.7091, "step": 888 }, { "epoch": 0.27, "learning_rate": 8.568374609342101e-05, "loss": 0.7975, "step": 889 }, { "epoch": 0.27, "learning_rate": 8.564932279004967e-05, "loss": 0.7731, "step": 890 }, { "epoch": 0.27, "learning_rate": 8.561486508385573e-05, "loss": 0.7993, "step": 891 }, { "epoch": 0.27, "learning_rate": 8.558037300809208e-05, "loss": 0.7822, "step": 892 }, { "epoch": 0.27, "learning_rate": 8.554584659604474e-05, "loss": 0.7657, "step": 893 }, { "epoch": 0.27, "learning_rate": 8.551128588103292e-05, "loss": 0.7427, "step": 894 }, { "epoch": 0.27, "learning_rate": 8.547669089640886e-05, "loss": 0.75, "step": 895 }, { "epoch": 0.27, "learning_rate": 8.54420616755579e-05, "loss": 0.7756, "step": 896 }, { "epoch": 0.27, "learning_rate": 8.540739825189849e-05, "loss": 0.7591, "step": 897 }, { "epoch": 0.27, "learning_rate": 8.537270065888197e-05, "loss": 0.6704, "step": 898 }, { "epoch": 0.27, "learning_rate": 8.533796892999273e-05, "loss": 0.703, "step": 899 }, { "epoch": 0.27, "learning_rate": 8.530320309874809e-05, "loss": 0.8126, "step": 900 }, { "epoch": 0.27, "learning_rate": 8.526840319869826e-05, "loss": 0.8042, "step": 901 }, { "epoch": 0.27, "learning_rate": 8.523356926342634e-05, "loss": 0.7664, "step": 902 }, { "epoch": 0.27, "learning_rate": 8.51987013265483e-05, "loss": 0.8338, "step": 903 }, { "epoch": 0.27, "learning_rate": 8.516379942171287e-05, "loss": 0.7627, "step": 904 }, { "epoch": 0.27, "learning_rate": 8.512886358260162e-05, "loss": 0.8157, "step": 905 }, { "epoch": 0.27, "learning_rate": 8.509389384292878e-05, "loss": 0.7231, "step": 906 }, { "epoch": 0.28, "learning_rate": 8.50588902364414e-05, "loss": 0.7761, "step": 907 }, { "epoch": 0.28, "learning_rate": 8.502385279691914e-05, "loss": 0.7785, "step": 908 }, { "epoch": 0.28, "learning_rate": 8.498878155817437e-05, "loss": 0.7792, "step": 909 }, { "epoch": 0.28, "learning_rate": 8.4953676554052e-05, "loss": 0.8256, "step": 910 }, { "epoch": 0.28, "learning_rate": 8.491853781842958e-05, "loss": 0.784, "step": 911 }, { "epoch": 0.28, "learning_rate": 8.488336538521721e-05, "loss": 0.8104, "step": 912 }, { "epoch": 0.28, "learning_rate": 8.484815928835749e-05, "loss": 0.8059, "step": 913 }, { "epoch": 0.28, "learning_rate": 8.481291956182552e-05, "loss": 0.7795, "step": 914 }, { "epoch": 0.28, "learning_rate": 8.477764623962882e-05, "loss": 0.8054, "step": 915 }, { "epoch": 0.28, "learning_rate": 8.47423393558074e-05, "loss": 0.801, "step": 916 }, { "epoch": 0.28, "learning_rate": 8.470699894443357e-05, "loss": 0.7805, "step": 917 }, { "epoch": 0.28, "learning_rate": 8.467162503961208e-05, "loss": 0.7679, "step": 918 }, { "epoch": 0.28, "learning_rate": 8.463621767547998e-05, "loss": 0.7501, "step": 919 }, { "epoch": 0.28, "learning_rate": 8.460077688620654e-05, "loss": 0.7465, "step": 920 }, { "epoch": 0.28, "learning_rate": 8.456530270599338e-05, "loss": 0.7692, "step": 921 }, { "epoch": 0.28, "learning_rate": 8.452979516907429e-05, "loss": 0.7561, "step": 922 }, { "epoch": 0.28, "learning_rate": 8.449425430971529e-05, "loss": 0.7897, "step": 923 }, { "epoch": 0.28, "learning_rate": 8.44586801622145e-05, "loss": 0.8137, "step": 924 }, { "epoch": 0.28, "learning_rate": 8.44230727609022e-05, "loss": 0.7783, "step": 925 }, { "epoch": 0.28, "learning_rate": 8.438743214014076e-05, "loss": 0.7967, "step": 926 }, { "epoch": 0.28, "learning_rate": 8.435175833432461e-05, "loss": 0.7615, "step": 927 }, { "epoch": 0.28, "learning_rate": 8.431605137788019e-05, "loss": 0.7389, "step": 928 }, { "epoch": 0.28, "learning_rate": 8.428031130526595e-05, "loss": 0.7774, "step": 929 }, { "epoch": 0.28, "learning_rate": 8.424453815097229e-05, "loss": 0.7845, "step": 930 }, { "epoch": 0.28, "learning_rate": 8.420873194952152e-05, "loss": 0.8249, "step": 931 }, { "epoch": 0.28, "learning_rate": 8.417289273546789e-05, "loss": 0.7547, "step": 932 }, { "epoch": 0.28, "learning_rate": 8.413702054339743e-05, "loss": 0.7847, "step": 933 }, { "epoch": 0.28, "learning_rate": 8.410111540792807e-05, "loss": 0.726, "step": 934 }, { "epoch": 0.28, "learning_rate": 8.40651773637095e-05, "loss": 0.7641, "step": 935 }, { "epoch": 0.28, "learning_rate": 8.402920644542315e-05, "loss": 0.7753, "step": 936 }, { "epoch": 0.28, "learning_rate": 8.39932026877822e-05, "loss": 0.7716, "step": 937 }, { "epoch": 0.28, "learning_rate": 8.395716612553153e-05, "loss": 0.7889, "step": 938 }, { "epoch": 0.28, "learning_rate": 8.392109679344764e-05, "loss": 0.7903, "step": 939 }, { "epoch": 0.29, "learning_rate": 8.388499472633868e-05, "loss": 0.8035, "step": 940 }, { "epoch": 0.29, "learning_rate": 8.38488599590444e-05, "loss": 0.7656, "step": 941 }, { "epoch": 0.29, "learning_rate": 8.381269252643609e-05, "loss": 0.8041, "step": 942 }, { "epoch": 0.29, "learning_rate": 8.377649246341654e-05, "loss": 0.7705, "step": 943 }, { "epoch": 0.29, "learning_rate": 8.374025980492011e-05, "loss": 0.7452, "step": 944 }, { "epoch": 0.29, "learning_rate": 8.370399458591251e-05, "loss": 0.8241, "step": 945 }, { "epoch": 0.29, "learning_rate": 8.366769684139096e-05, "loss": 0.8012, "step": 946 }, { "epoch": 0.29, "learning_rate": 8.363136660638398e-05, "loss": 0.7404, "step": 947 }, { "epoch": 0.29, "learning_rate": 8.359500391595156e-05, "loss": 0.7737, "step": 948 }, { "epoch": 0.29, "learning_rate": 8.35586088051849e-05, "loss": 0.766, "step": 949 }, { "epoch": 0.29, "learning_rate": 8.352218130920657e-05, "loss": 0.7769, "step": 950 }, { "epoch": 0.29, "learning_rate": 8.348572146317032e-05, "loss": 0.7689, "step": 951 }, { "epoch": 0.29, "learning_rate": 8.344922930226117e-05, "loss": 0.7775, "step": 952 }, { "epoch": 0.29, "learning_rate": 8.341270486169534e-05, "loss": 0.7758, "step": 953 }, { "epoch": 0.29, "learning_rate": 8.337614817672013e-05, "loss": 0.803, "step": 954 }, { "epoch": 0.29, "learning_rate": 8.3339559282614e-05, "loss": 0.7808, "step": 955 }, { "epoch": 0.29, "learning_rate": 8.330293821468653e-05, "loss": 0.8331, "step": 956 }, { "epoch": 0.29, "learning_rate": 8.326628500827826e-05, "loss": 0.7563, "step": 957 }, { "epoch": 0.29, "learning_rate": 8.322959969876085e-05, "loss": 0.7649, "step": 958 }, { "epoch": 0.29, "learning_rate": 8.319288232153684e-05, "loss": 0.8049, "step": 959 }, { "epoch": 0.29, "learning_rate": 8.315613291203976e-05, "loss": 0.7865, "step": 960 }, { "epoch": 0.29, "learning_rate": 8.31193515057341e-05, "loss": 0.7853, "step": 961 }, { "epoch": 0.29, "learning_rate": 8.308253813811513e-05, "loss": 0.7744, "step": 962 }, { "epoch": 0.29, "learning_rate": 8.304569284470904e-05, "loss": 0.788, "step": 963 }, { "epoch": 0.29, "learning_rate": 8.300881566107281e-05, "loss": 0.7897, "step": 964 }, { "epoch": 0.29, "learning_rate": 8.297190662279419e-05, "loss": 0.6977, "step": 965 }, { "epoch": 0.29, "learning_rate": 8.293496576549167e-05, "loss": 0.802, "step": 966 }, { "epoch": 0.29, "learning_rate": 8.289799312481442e-05, "loss": 0.8012, "step": 967 }, { "epoch": 0.29, "learning_rate": 8.286098873644235e-05, "loss": 0.8121, "step": 968 }, { "epoch": 0.29, "learning_rate": 8.282395263608596e-05, "loss": 0.7542, "step": 969 }, { "epoch": 0.29, "learning_rate": 8.278688485948633e-05, "loss": 0.7465, "step": 970 }, { "epoch": 0.29, "learning_rate": 8.274978544241517e-05, "loss": 0.71, "step": 971 }, { "epoch": 0.29, "learning_rate": 8.271265442067469e-05, "loss": 0.8408, "step": 972 }, { "epoch": 0.3, "learning_rate": 8.26754918300976e-05, "loss": 0.7726, "step": 973 }, { "epoch": 0.3, "learning_rate": 8.263829770654707e-05, "loss": 0.8239, "step": 974 }, { "epoch": 0.3, "learning_rate": 8.260107208591671e-05, "loss": 0.805, "step": 975 }, { "epoch": 0.3, "learning_rate": 8.256381500413054e-05, "loss": 0.7924, "step": 976 }, { "epoch": 0.3, "learning_rate": 8.25265264971429e-05, "loss": 0.7691, "step": 977 }, { "epoch": 0.3, "learning_rate": 8.248920660093851e-05, "loss": 0.7533, "step": 978 }, { "epoch": 0.3, "learning_rate": 8.245185535153232e-05, "loss": 0.7669, "step": 979 }, { "epoch": 0.3, "learning_rate": 8.241447278496961e-05, "loss": 0.8011, "step": 980 }, { "epoch": 0.3, "learning_rate": 8.237705893732582e-05, "loss": 0.7753, "step": 981 }, { "epoch": 0.3, "learning_rate": 8.233961384470659e-05, "loss": 0.7872, "step": 982 }, { "epoch": 0.3, "learning_rate": 8.230213754324773e-05, "loss": 0.798, "step": 983 }, { "epoch": 0.3, "learning_rate": 8.226463006911516e-05, "loss": 0.7808, "step": 984 }, { "epoch": 0.3, "learning_rate": 8.22270914585049e-05, "loss": 0.7663, "step": 985 }, { "epoch": 0.3, "learning_rate": 8.218952174764298e-05, "loss": 0.7848, "step": 986 }, { "epoch": 0.3, "learning_rate": 8.215192097278548e-05, "loss": 0.7654, "step": 987 }, { "epoch": 0.3, "learning_rate": 8.211428917021842e-05, "loss": 0.8185, "step": 988 }, { "epoch": 0.3, "learning_rate": 8.207662637625779e-05, "loss": 0.7978, "step": 989 }, { "epoch": 0.3, "learning_rate": 8.203893262724949e-05, "loss": 0.8194, "step": 990 }, { "epoch": 0.3, "learning_rate": 8.200120795956929e-05, "loss": 0.7826, "step": 991 }, { "epoch": 0.3, "learning_rate": 8.196345240962277e-05, "loss": 0.7494, "step": 992 }, { "epoch": 0.3, "learning_rate": 8.192566601384535e-05, "loss": 0.8039, "step": 993 }, { "epoch": 0.3, "learning_rate": 8.18878488087022e-05, "loss": 0.8084, "step": 994 }, { "epoch": 0.3, "learning_rate": 8.185000083068822e-05, "loss": 0.7847, "step": 995 }, { "epoch": 0.3, "learning_rate": 8.181212211632799e-05, "loss": 0.7925, "step": 996 }, { "epoch": 0.3, "learning_rate": 8.177421270217583e-05, "loss": 0.7835, "step": 997 }, { "epoch": 0.3, "learning_rate": 8.173627262481556e-05, "loss": 0.8027, "step": 998 }, { "epoch": 0.3, "learning_rate": 8.169830192086071e-05, "loss": 0.8098, "step": 999 }, { "epoch": 0.3, "learning_rate": 8.16603006269543e-05, "loss": 0.7554, "step": 1000 }, { "epoch": 0.3, "learning_rate": 8.162226877976887e-05, "loss": 0.7676, "step": 1001 }, { "epoch": 0.3, "learning_rate": 8.158420641600648e-05, "loss": 0.752, "step": 1002 }, { "epoch": 0.3, "learning_rate": 8.15461135723986e-05, "loss": 0.7192, "step": 1003 }, { "epoch": 0.3, "learning_rate": 8.150799028570617e-05, "loss": 0.7915, "step": 1004 }, { "epoch": 0.3, "learning_rate": 8.146983659271943e-05, "loss": 0.7503, "step": 1005 }, { "epoch": 0.31, "learning_rate": 8.143165253025804e-05, "loss": 0.7169, "step": 1006 }, { "epoch": 0.31, "learning_rate": 8.139343813517092e-05, "loss": 0.7432, "step": 1007 }, { "epoch": 0.31, "learning_rate": 8.135519344433627e-05, "loss": 0.8069, "step": 1008 }, { "epoch": 0.31, "learning_rate": 8.131691849466153e-05, "loss": 0.7291, "step": 1009 }, { "epoch": 0.31, "learning_rate": 8.127861332308335e-05, "loss": 0.8496, "step": 1010 }, { "epoch": 0.31, "learning_rate": 8.124027796656757e-05, "loss": 0.7399, "step": 1011 }, { "epoch": 0.31, "learning_rate": 8.120191246210907e-05, "loss": 0.7606, "step": 1012 }, { "epoch": 0.31, "learning_rate": 8.11635168467319e-05, "loss": 0.7897, "step": 1013 }, { "epoch": 0.31, "learning_rate": 8.112509115748917e-05, "loss": 0.7513, "step": 1014 }, { "epoch": 0.31, "learning_rate": 8.108663543146297e-05, "loss": 0.8234, "step": 1015 }, { "epoch": 0.31, "learning_rate": 8.104814970576441e-05, "loss": 0.7639, "step": 1016 }, { "epoch": 0.31, "learning_rate": 8.100963401753354e-05, "loss": 0.7706, "step": 1017 }, { "epoch": 0.31, "learning_rate": 8.097108840393931e-05, "loss": 0.7491, "step": 1018 }, { "epoch": 0.31, "learning_rate": 8.093251290217958e-05, "loss": 0.764, "step": 1019 }, { "epoch": 0.31, "learning_rate": 8.089390754948101e-05, "loss": 0.7693, "step": 1020 }, { "epoch": 0.31, "learning_rate": 8.085527238309913e-05, "loss": 0.7513, "step": 1021 }, { "epoch": 0.31, "learning_rate": 8.081660744031819e-05, "loss": 0.7438, "step": 1022 }, { "epoch": 0.31, "learning_rate": 8.077791275845118e-05, "loss": 0.7826, "step": 1023 }, { "epoch": 0.31, "learning_rate": 8.07391883748398e-05, "loss": 0.7913, "step": 1024 }, { "epoch": 0.31, "learning_rate": 8.070043432685441e-05, "loss": 0.7135, "step": 1025 }, { "epoch": 0.31, "learning_rate": 8.066165065189402e-05, "loss": 0.7763, "step": 1026 }, { "epoch": 0.31, "learning_rate": 8.062283738738619e-05, "loss": 0.8236, "step": 1027 }, { "epoch": 0.31, "learning_rate": 8.058399457078705e-05, "loss": 0.7688, "step": 1028 }, { "epoch": 0.31, "learning_rate": 8.054512223958126e-05, "loss": 0.7745, "step": 1029 }, { "epoch": 0.31, "learning_rate": 8.050622043128197e-05, "loss": 0.7504, "step": 1030 }, { "epoch": 0.31, "learning_rate": 8.046728918343076e-05, "loss": 0.8176, "step": 1031 }, { "epoch": 0.31, "learning_rate": 8.042832853359762e-05, "loss": 0.7751, "step": 1032 }, { "epoch": 0.31, "learning_rate": 8.038933851938091e-05, "loss": 0.7458, "step": 1033 }, { "epoch": 0.31, "learning_rate": 8.035031917840734e-05, "loss": 0.7733, "step": 1034 }, { "epoch": 0.31, "learning_rate": 8.03112705483319e-05, "loss": 0.7716, "step": 1035 }, { "epoch": 0.31, "learning_rate": 8.02721926668379e-05, "loss": 0.8331, "step": 1036 }, { "epoch": 0.31, "learning_rate": 8.02330855716368e-05, "loss": 0.7509, "step": 1037 }, { "epoch": 0.31, "learning_rate": 8.019394930046831e-05, "loss": 0.7525, "step": 1038 }, { "epoch": 0.32, "learning_rate": 8.015478389110027e-05, "loss": 0.7887, "step": 1039 }, { "epoch": 0.32, "learning_rate": 8.011558938132866e-05, "loss": 0.7697, "step": 1040 }, { "epoch": 0.32, "learning_rate": 8.007636580897752e-05, "loss": 0.7902, "step": 1041 }, { "epoch": 0.32, "learning_rate": 8.003711321189895e-05, "loss": 0.7387, "step": 1042 }, { "epoch": 0.32, "learning_rate": 7.999783162797306e-05, "loss": 0.7245, "step": 1043 }, { "epoch": 0.32, "learning_rate": 7.995852109510791e-05, "loss": 0.7939, "step": 1044 }, { "epoch": 0.32, "learning_rate": 7.991918165123955e-05, "loss": 0.7293, "step": 1045 }, { "epoch": 0.32, "learning_rate": 7.987981333433185e-05, "loss": 0.7384, "step": 1046 }, { "epoch": 0.32, "learning_rate": 7.984041618237664e-05, "loss": 0.7679, "step": 1047 }, { "epoch": 0.32, "learning_rate": 7.98009902333935e-05, "loss": 0.8042, "step": 1048 }, { "epoch": 0.32, "learning_rate": 7.976153552542983e-05, "loss": 0.7734, "step": 1049 }, { "epoch": 0.32, "learning_rate": 7.972205209656076e-05, "loss": 0.7427, "step": 1050 }, { "epoch": 0.32, "learning_rate": 7.96825399848892e-05, "loss": 0.759, "step": 1051 }, { "epoch": 0.32, "learning_rate": 7.964299922854567e-05, "loss": 0.7656, "step": 1052 }, { "epoch": 0.32, "learning_rate": 7.960342986568836e-05, "loss": 0.7324, "step": 1053 }, { "epoch": 0.32, "learning_rate": 7.956383193450307e-05, "loss": 0.7785, "step": 1054 }, { "epoch": 0.32, "learning_rate": 7.952420547320316e-05, "loss": 0.8135, "step": 1055 }, { "epoch": 0.32, "learning_rate": 7.948455052002955e-05, "loss": 0.7771, "step": 1056 }, { "epoch": 0.32, "learning_rate": 7.944486711325061e-05, "loss": 0.7867, "step": 1057 }, { "epoch": 0.32, "learning_rate": 7.940515529116219e-05, "loss": 0.7292, "step": 1058 }, { "epoch": 0.32, "learning_rate": 7.936541509208757e-05, "loss": 0.7804, "step": 1059 }, { "epoch": 0.32, "learning_rate": 7.932564655437742e-05, "loss": 0.7712, "step": 1060 }, { "epoch": 0.32, "learning_rate": 7.928584971640974e-05, "loss": 0.8031, "step": 1061 }, { "epoch": 0.32, "learning_rate": 7.924602461658985e-05, "loss": 0.7634, "step": 1062 }, { "epoch": 0.32, "learning_rate": 7.920617129335033e-05, "loss": 0.8193, "step": 1063 }, { "epoch": 0.32, "learning_rate": 7.916628978515103e-05, "loss": 0.7818, "step": 1064 }, { "epoch": 0.32, "learning_rate": 7.912638013047895e-05, "loss": 0.799, "step": 1065 }, { "epoch": 0.32, "learning_rate": 7.908644236784829e-05, "loss": 0.7225, "step": 1066 }, { "epoch": 0.32, "learning_rate": 7.904647653580036e-05, "loss": 0.7441, "step": 1067 }, { "epoch": 0.32, "learning_rate": 7.900648267290358e-05, "loss": 0.7804, "step": 1068 }, { "epoch": 0.32, "learning_rate": 7.896646081775338e-05, "loss": 0.7611, "step": 1069 }, { "epoch": 0.32, "learning_rate": 7.892641100897227e-05, "loss": 0.7354, "step": 1070 }, { "epoch": 0.32, "learning_rate": 7.888633328520963e-05, "loss": 0.7508, "step": 1071 }, { "epoch": 0.33, "learning_rate": 7.884622768514188e-05, "loss": 0.7821, "step": 1072 }, { "epoch": 0.33, "learning_rate": 7.88060942474723e-05, "loss": 0.701, "step": 1073 }, { "epoch": 0.33, "learning_rate": 7.876593301093104e-05, "loss": 0.7687, "step": 1074 }, { "epoch": 0.33, "learning_rate": 7.872574401427506e-05, "loss": 0.7735, "step": 1075 }, { "epoch": 0.33, "learning_rate": 7.868552729628813e-05, "loss": 0.8186, "step": 1076 }, { "epoch": 0.33, "learning_rate": 7.864528289578077e-05, "loss": 0.72, "step": 1077 }, { "epoch": 0.33, "learning_rate": 7.860501085159023e-05, "loss": 0.8134, "step": 1078 }, { "epoch": 0.33, "learning_rate": 7.856471120258036e-05, "loss": 0.7277, "step": 1079 }, { "epoch": 0.33, "learning_rate": 7.852438398764177e-05, "loss": 0.7464, "step": 1080 }, { "epoch": 0.33, "learning_rate": 7.848402924569158e-05, "loss": 0.7617, "step": 1081 }, { "epoch": 0.33, "learning_rate": 7.844364701567351e-05, "loss": 0.7414, "step": 1082 }, { "epoch": 0.33, "learning_rate": 7.840323733655778e-05, "loss": 0.7637, "step": 1083 }, { "epoch": 0.33, "learning_rate": 7.836280024734118e-05, "loss": 0.7819, "step": 1084 }, { "epoch": 0.33, "learning_rate": 7.832233578704682e-05, "loss": 0.7649, "step": 1085 }, { "epoch": 0.33, "learning_rate": 7.828184399472433e-05, "loss": 0.7027, "step": 1086 }, { "epoch": 0.33, "learning_rate": 7.824132490944967e-05, "loss": 0.7916, "step": 1087 }, { "epoch": 0.33, "learning_rate": 7.820077857032517e-05, "loss": 0.803, "step": 1088 }, { "epoch": 0.33, "learning_rate": 7.816020501647944e-05, "loss": 0.7622, "step": 1089 }, { "epoch": 0.33, "learning_rate": 7.811960428706735e-05, "loss": 0.8146, "step": 1090 }, { "epoch": 0.33, "learning_rate": 7.807897642126998e-05, "loss": 0.7801, "step": 1091 }, { "epoch": 0.33, "learning_rate": 7.803832145829464e-05, "loss": 0.7858, "step": 1092 }, { "epoch": 0.33, "learning_rate": 7.799763943737475e-05, "loss": 0.7838, "step": 1093 }, { "epoch": 0.33, "learning_rate": 7.79569303977699e-05, "loss": 0.7889, "step": 1094 }, { "epoch": 0.33, "learning_rate": 7.791619437876566e-05, "loss": 0.7282, "step": 1095 }, { "epoch": 0.33, "learning_rate": 7.787543141967373e-05, "loss": 0.8056, "step": 1096 }, { "epoch": 0.33, "learning_rate": 7.783464155983174e-05, "loss": 0.7393, "step": 1097 }, { "epoch": 0.33, "learning_rate": 7.779382483860332e-05, "loss": 0.8255, "step": 1098 }, { "epoch": 0.33, "learning_rate": 7.775298129537801e-05, "loss": 0.7467, "step": 1099 }, { "epoch": 0.33, "learning_rate": 7.771211096957125e-05, "loss": 0.7709, "step": 1100 }, { "epoch": 0.33, "learning_rate": 7.767121390062426e-05, "loss": 0.7689, "step": 1101 }, { "epoch": 0.33, "learning_rate": 7.763029012800418e-05, "loss": 0.8276, "step": 1102 }, { "epoch": 0.33, "learning_rate": 7.758933969120381e-05, "loss": 0.7481, "step": 1103 }, { "epoch": 0.33, "learning_rate": 7.754836262974177e-05, "loss": 0.76, "step": 1104 }, { "epoch": 0.34, "learning_rate": 7.75073589831623e-05, "loss": 0.7615, "step": 1105 }, { "epoch": 0.34, "learning_rate": 7.746632879103535e-05, "loss": 0.8079, "step": 1106 }, { "epoch": 0.34, "learning_rate": 7.742527209295644e-05, "loss": 0.8305, "step": 1107 }, { "epoch": 0.34, "learning_rate": 7.738418892854675e-05, "loss": 0.7639, "step": 1108 }, { "epoch": 0.34, "learning_rate": 7.734307933745288e-05, "loss": 0.8359, "step": 1109 }, { "epoch": 0.34, "learning_rate": 7.730194335934704e-05, "loss": 0.7854, "step": 1110 }, { "epoch": 0.34, "learning_rate": 7.726078103392684e-05, "loss": 0.7286, "step": 1111 }, { "epoch": 0.34, "learning_rate": 7.721959240091537e-05, "loss": 0.7503, "step": 1112 }, { "epoch": 0.34, "learning_rate": 7.717837750006106e-05, "loss": 0.7443, "step": 1113 }, { "epoch": 0.34, "learning_rate": 7.71371363711377e-05, "loss": 0.7698, "step": 1114 }, { "epoch": 0.34, "learning_rate": 7.709586905394441e-05, "loss": 0.788, "step": 1115 }, { "epoch": 0.34, "learning_rate": 7.705457558830557e-05, "loss": 0.7743, "step": 1116 }, { "epoch": 0.34, "learning_rate": 7.70132560140708e-05, "loss": 0.8163, "step": 1117 }, { "epoch": 0.34, "learning_rate": 7.697191037111488e-05, "loss": 0.7607, "step": 1118 }, { "epoch": 0.34, "learning_rate": 7.693053869933782e-05, "loss": 0.7759, "step": 1119 }, { "epoch": 0.34, "learning_rate": 7.688914103866466e-05, "loss": 0.7697, "step": 1120 }, { "epoch": 0.34, "learning_rate": 7.684771742904562e-05, "loss": 0.7708, "step": 1121 }, { "epoch": 0.34, "learning_rate": 7.680626791045586e-05, "loss": 0.7608, "step": 1122 }, { "epoch": 0.34, "learning_rate": 7.676479252289562e-05, "loss": 0.7214, "step": 1123 }, { "epoch": 0.34, "learning_rate": 7.672329130639005e-05, "loss": 0.803, "step": 1124 }, { "epoch": 0.34, "learning_rate": 7.668176430098928e-05, "loss": 0.775, "step": 1125 }, { "epoch": 0.34, "learning_rate": 7.664021154676829e-05, "loss": 0.745, "step": 1126 }, { "epoch": 0.34, "learning_rate": 7.659863308382691e-05, "loss": 0.7576, "step": 1127 }, { "epoch": 0.34, "learning_rate": 7.655702895228978e-05, "loss": 0.7628, "step": 1128 }, { "epoch": 0.34, "learning_rate": 7.651539919230634e-05, "loss": 0.7536, "step": 1129 }, { "epoch": 0.34, "learning_rate": 7.647374384405071e-05, "loss": 0.8197, "step": 1130 }, { "epoch": 0.34, "learning_rate": 7.643206294772176e-05, "loss": 0.7176, "step": 1131 }, { "epoch": 0.34, "learning_rate": 7.639035654354296e-05, "loss": 0.744, "step": 1132 }, { "epoch": 0.34, "learning_rate": 7.634862467176249e-05, "loss": 0.7932, "step": 1133 }, { "epoch": 0.34, "learning_rate": 7.630686737265296e-05, "loss": 0.7996, "step": 1134 }, { "epoch": 0.34, "learning_rate": 7.626508468651163e-05, "loss": 0.7748, "step": 1135 }, { "epoch": 0.34, "learning_rate": 7.622327665366025e-05, "loss": 0.7538, "step": 1136 }, { "epoch": 0.34, "learning_rate": 7.6181443314445e-05, "loss": 0.8012, "step": 1137 }, { "epoch": 0.35, "learning_rate": 7.613958470923648e-05, "loss": 0.8168, "step": 1138 }, { "epoch": 0.35, "learning_rate": 7.609770087842969e-05, "loss": 0.7253, "step": 1139 }, { "epoch": 0.35, "learning_rate": 7.605579186244396e-05, "loss": 0.7637, "step": 1140 }, { "epoch": 0.35, "learning_rate": 7.601385770172294e-05, "loss": 0.7956, "step": 1141 }, { "epoch": 0.35, "learning_rate": 7.597189843673451e-05, "loss": 0.7502, "step": 1142 }, { "epoch": 0.35, "learning_rate": 7.592991410797087e-05, "loss": 0.7464, "step": 1143 }, { "epoch": 0.35, "learning_rate": 7.588790475594828e-05, "loss": 0.7921, "step": 1144 }, { "epoch": 0.35, "learning_rate": 7.584587042120723e-05, "loss": 0.7533, "step": 1145 }, { "epoch": 0.35, "learning_rate": 7.58038111443123e-05, "loss": 0.799, "step": 1146 }, { "epoch": 0.35, "learning_rate": 7.576172696585215e-05, "loss": 0.7425, "step": 1147 }, { "epoch": 0.35, "learning_rate": 7.571961792643944e-05, "loss": 0.6939, "step": 1148 }, { "epoch": 0.35, "learning_rate": 7.567748406671084e-05, "loss": 0.7577, "step": 1149 }, { "epoch": 0.35, "learning_rate": 7.563532542732699e-05, "loss": 0.7326, "step": 1150 }, { "epoch": 0.35, "learning_rate": 7.55931420489724e-05, "loss": 0.7408, "step": 1151 }, { "epoch": 0.35, "learning_rate": 7.555093397235552e-05, "loss": 0.7904, "step": 1152 }, { "epoch": 0.35, "learning_rate": 7.550870123820857e-05, "loss": 0.7719, "step": 1153 }, { "epoch": 0.35, "learning_rate": 7.54664438872876e-05, "loss": 0.7244, "step": 1154 }, { "epoch": 0.35, "learning_rate": 7.54241619603724e-05, "loss": 0.7852, "step": 1155 }, { "epoch": 0.35, "learning_rate": 7.538185549826648e-05, "loss": 0.7834, "step": 1156 }, { "epoch": 0.35, "learning_rate": 7.533952454179707e-05, "loss": 0.8088, "step": 1157 }, { "epoch": 0.35, "learning_rate": 7.529716913181492e-05, "loss": 0.7465, "step": 1158 }, { "epoch": 0.35, "learning_rate": 7.525478930919454e-05, "loss": 0.7517, "step": 1159 }, { "epoch": 0.35, "learning_rate": 7.521238511483387e-05, "loss": 0.7236, "step": 1160 }, { "epoch": 0.35, "learning_rate": 7.516995658965442e-05, "loss": 0.7932, "step": 1161 }, { "epoch": 0.35, "learning_rate": 7.512750377460118e-05, "loss": 0.8311, "step": 1162 }, { "epoch": 0.35, "learning_rate": 7.508502671064259e-05, "loss": 0.7566, "step": 1163 }, { "epoch": 0.35, "learning_rate": 7.504252543877047e-05, "loss": 0.7773, "step": 1164 }, { "epoch": 0.35, "learning_rate": 7.500000000000001e-05, "loss": 0.8108, "step": 1165 }, { "epoch": 0.35, "learning_rate": 7.49574504353697e-05, "loss": 0.7546, "step": 1166 }, { "epoch": 0.35, "learning_rate": 7.491487678594137e-05, "loss": 0.7764, "step": 1167 }, { "epoch": 0.35, "learning_rate": 7.487227909280003e-05, "loss": 0.7792, "step": 1168 }, { "epoch": 0.35, "learning_rate": 7.482965739705392e-05, "loss": 0.7675, "step": 1169 }, { "epoch": 0.35, "learning_rate": 7.478701173983445e-05, "loss": 0.7632, "step": 1170 }, { "epoch": 0.36, "learning_rate": 7.474434216229614e-05, "loss": 0.7882, "step": 1171 }, { "epoch": 0.36, "learning_rate": 7.47016487056166e-05, "loss": 0.7374, "step": 1172 }, { "epoch": 0.36, "learning_rate": 7.46589314109965e-05, "loss": 0.7733, "step": 1173 }, { "epoch": 0.36, "learning_rate": 7.461619031965948e-05, "loss": 0.7472, "step": 1174 }, { "epoch": 0.36, "learning_rate": 7.457342547285216e-05, "loss": 0.7693, "step": 1175 }, { "epoch": 0.36, "learning_rate": 7.453063691184411e-05, "loss": 0.7453, "step": 1176 }, { "epoch": 0.36, "learning_rate": 7.448782467792775e-05, "loss": 0.7598, "step": 1177 }, { "epoch": 0.36, "learning_rate": 7.444498881241835e-05, "loss": 0.7714, "step": 1178 }, { "epoch": 0.36, "learning_rate": 7.440212935665402e-05, "loss": 0.8058, "step": 1179 }, { "epoch": 0.36, "learning_rate": 7.435924635199558e-05, "loss": 0.7456, "step": 1180 }, { "epoch": 0.36, "learning_rate": 7.431633983982659e-05, "loss": 0.7745, "step": 1181 }, { "epoch": 0.36, "learning_rate": 7.427340986155337e-05, "loss": 0.8051, "step": 1182 }, { "epoch": 0.36, "learning_rate": 7.423045645860479e-05, "loss": 0.7751, "step": 1183 }, { "epoch": 0.36, "learning_rate": 7.418747967243233e-05, "loss": 0.7563, "step": 1184 }, { "epoch": 0.36, "learning_rate": 7.414447954451014e-05, "loss": 0.7725, "step": 1185 }, { "epoch": 0.36, "learning_rate": 7.410145611633475e-05, "loss": 0.8116, "step": 1186 }, { "epoch": 0.36, "learning_rate": 7.405840942942529e-05, "loss": 0.8099, "step": 1187 }, { "epoch": 0.36, "learning_rate": 7.401533952532329e-05, "loss": 0.7887, "step": 1188 }, { "epoch": 0.36, "learning_rate": 7.397224644559267e-05, "loss": 0.7224, "step": 1189 }, { "epoch": 0.36, "learning_rate": 7.392913023181976e-05, "loss": 0.7309, "step": 1190 }, { "epoch": 0.36, "learning_rate": 7.388599092561315e-05, "loss": 0.7644, "step": 1191 }, { "epoch": 0.36, "learning_rate": 7.384282856860379e-05, "loss": 0.7149, "step": 1192 }, { "epoch": 0.36, "learning_rate": 7.379964320244483e-05, "loss": 0.7421, "step": 1193 }, { "epoch": 0.36, "learning_rate": 7.375643486881163e-05, "loss": 0.8221, "step": 1194 }, { "epoch": 0.36, "learning_rate": 7.371320360940171e-05, "loss": 0.7489, "step": 1195 }, { "epoch": 0.36, "learning_rate": 7.366994946593475e-05, "loss": 0.7359, "step": 1196 }, { "epoch": 0.36, "learning_rate": 7.362667248015246e-05, "loss": 0.7564, "step": 1197 }, { "epoch": 0.36, "learning_rate": 7.358337269381863e-05, "loss": 0.6961, "step": 1198 }, { "epoch": 0.36, "learning_rate": 7.354005014871903e-05, "loss": 0.736, "step": 1199 }, { "epoch": 0.36, "learning_rate": 7.349670488666143e-05, "loss": 0.8149, "step": 1200 }, { "epoch": 0.36, "learning_rate": 7.345333694947547e-05, "loss": 0.7926, "step": 1201 }, { "epoch": 0.36, "learning_rate": 7.340994637901273e-05, "loss": 0.7513, "step": 1202 }, { "epoch": 0.36, "learning_rate": 7.336653321714657e-05, "loss": 0.7462, "step": 1203 }, { "epoch": 0.37, "learning_rate": 7.332309750577222e-05, "loss": 0.7632, "step": 1204 }, { "epoch": 0.37, "learning_rate": 7.327963928680661e-05, "loss": 0.7837, "step": 1205 }, { "epoch": 0.37, "learning_rate": 7.323615860218843e-05, "loss": 0.7519, "step": 1206 }, { "epoch": 0.37, "learning_rate": 7.319265549387802e-05, "loss": 0.8132, "step": 1207 }, { "epoch": 0.37, "learning_rate": 7.314913000385741e-05, "loss": 0.7833, "step": 1208 }, { "epoch": 0.37, "learning_rate": 7.310558217413015e-05, "loss": 0.7985, "step": 1209 }, { "epoch": 0.37, "learning_rate": 7.306201204672144e-05, "loss": 0.7996, "step": 1210 }, { "epoch": 0.37, "learning_rate": 7.301841966367788e-05, "loss": 0.7432, "step": 1211 }, { "epoch": 0.37, "learning_rate": 7.29748050670677e-05, "loss": 0.7662, "step": 1212 }, { "epoch": 0.37, "learning_rate": 7.293116829898043e-05, "loss": 0.8276, "step": 1213 }, { "epoch": 0.37, "learning_rate": 7.28875094015271e-05, "loss": 0.7788, "step": 1214 }, { "epoch": 0.37, "learning_rate": 7.284382841684e-05, "loss": 0.7632, "step": 1215 }, { "epoch": 0.37, "learning_rate": 7.280012538707282e-05, "loss": 0.7603, "step": 1216 }, { "epoch": 0.37, "learning_rate": 7.275640035440045e-05, "loss": 0.7358, "step": 1217 }, { "epoch": 0.37, "learning_rate": 7.271265336101907e-05, "loss": 0.7905, "step": 1218 }, { "epoch": 0.37, "learning_rate": 7.266888444914605e-05, "loss": 0.8486, "step": 1219 }, { "epoch": 0.37, "learning_rate": 7.262509366101987e-05, "loss": 0.763, "step": 1220 }, { "epoch": 0.37, "learning_rate": 7.258128103890015e-05, "loss": 0.7366, "step": 1221 }, { "epoch": 0.37, "learning_rate": 7.253744662506759e-05, "loss": 0.8081, "step": 1222 }, { "epoch": 0.37, "learning_rate": 7.24935904618239e-05, "loss": 0.7912, "step": 1223 }, { "epoch": 0.37, "learning_rate": 7.244971259149178e-05, "loss": 0.739, "step": 1224 }, { "epoch": 0.37, "learning_rate": 7.240581305641489e-05, "loss": 0.7724, "step": 1225 }, { "epoch": 0.37, "learning_rate": 7.236189189895779e-05, "loss": 0.8061, "step": 1226 }, { "epoch": 0.37, "learning_rate": 7.231794916150591e-05, "loss": 0.6636, "step": 1227 }, { "epoch": 0.37, "learning_rate": 7.22739848864655e-05, "loss": 0.7961, "step": 1228 }, { "epoch": 0.37, "learning_rate": 7.22299991162636e-05, "loss": 0.7703, "step": 1229 }, { "epoch": 0.37, "learning_rate": 7.218599189334799e-05, "loss": 0.7006, "step": 1230 }, { "epoch": 0.37, "learning_rate": 7.214196326018716e-05, "loss": 0.7646, "step": 1231 }, { "epoch": 0.37, "learning_rate": 7.209791325927023e-05, "loss": 0.682, "step": 1232 }, { "epoch": 0.37, "learning_rate": 7.2053841933107e-05, "loss": 0.7029, "step": 1233 }, { "epoch": 0.37, "learning_rate": 7.20097493242278e-05, "loss": 0.7755, "step": 1234 }, { "epoch": 0.37, "learning_rate": 7.196563547518351e-05, "loss": 0.7757, "step": 1235 }, { "epoch": 0.37, "learning_rate": 7.19215004285455e-05, "loss": 0.7389, "step": 1236 }, { "epoch": 0.38, "learning_rate": 7.187734422690564e-05, "loss": 0.7252, "step": 1237 }, { "epoch": 0.38, "learning_rate": 7.183316691287618e-05, "loss": 0.7659, "step": 1238 }, { "epoch": 0.38, "learning_rate": 7.178896852908972e-05, "loss": 0.8266, "step": 1239 }, { "epoch": 0.38, "learning_rate": 7.174474911819924e-05, "loss": 0.7643, "step": 1240 }, { "epoch": 0.38, "learning_rate": 7.170050872287797e-05, "loss": 0.7659, "step": 1241 }, { "epoch": 0.38, "learning_rate": 7.165624738581944e-05, "loss": 0.7807, "step": 1242 }, { "epoch": 0.38, "learning_rate": 7.161196514973734e-05, "loss": 0.7689, "step": 1243 }, { "epoch": 0.38, "learning_rate": 7.15676620573656e-05, "loss": 0.7013, "step": 1244 }, { "epoch": 0.38, "learning_rate": 7.152333815145817e-05, "loss": 0.7756, "step": 1245 }, { "epoch": 0.38, "learning_rate": 7.147899347478915e-05, "loss": 0.8093, "step": 1246 }, { "epoch": 0.38, "learning_rate": 7.143462807015271e-05, "loss": 0.7267, "step": 1247 }, { "epoch": 0.38, "learning_rate": 7.139024198036299e-05, "loss": 0.7494, "step": 1248 }, { "epoch": 0.38, "learning_rate": 7.134583524825404e-05, "loss": 0.759, "step": 1249 }, { "epoch": 0.38, "learning_rate": 7.130140791667995e-05, "loss": 0.7639, "step": 1250 }, { "epoch": 0.38, "learning_rate": 7.125696002851458e-05, "loss": 0.698, "step": 1251 }, { "epoch": 0.38, "learning_rate": 7.121249162665168e-05, "loss": 0.7927, "step": 1252 }, { "epoch": 0.38, "learning_rate": 7.116800275400481e-05, "loss": 0.7907, "step": 1253 }, { "epoch": 0.38, "learning_rate": 7.112349345350724e-05, "loss": 0.7883, "step": 1254 }, { "epoch": 0.38, "learning_rate": 7.107896376811199e-05, "loss": 0.7753, "step": 1255 }, { "epoch": 0.38, "learning_rate": 7.10344137407917e-05, "loss": 0.7748, "step": 1256 }, { "epoch": 0.38, "learning_rate": 7.098984341453874e-05, "loss": 0.7134, "step": 1257 }, { "epoch": 0.38, "learning_rate": 7.094525283236495e-05, "loss": 0.8006, "step": 1258 }, { "epoch": 0.38, "learning_rate": 7.090064203730182e-05, "loss": 0.789, "step": 1259 }, { "epoch": 0.38, "learning_rate": 7.085601107240027e-05, "loss": 0.7794, "step": 1260 }, { "epoch": 0.38, "learning_rate": 7.081135998073073e-05, "loss": 0.7592, "step": 1261 }, { "epoch": 0.38, "learning_rate": 7.076668880538303e-05, "loss": 0.726, "step": 1262 }, { "epoch": 0.38, "learning_rate": 7.07219975894664e-05, "loss": 0.7799, "step": 1263 }, { "epoch": 0.38, "learning_rate": 7.067728637610939e-05, "loss": 0.8126, "step": 1264 }, { "epoch": 0.38, "learning_rate": 7.063255520845989e-05, "loss": 0.7477, "step": 1265 }, { "epoch": 0.38, "learning_rate": 7.058780412968499e-05, "loss": 0.7497, "step": 1266 }, { "epoch": 0.38, "learning_rate": 7.054303318297102e-05, "loss": 0.7693, "step": 1267 }, { "epoch": 0.38, "learning_rate": 7.049824241152347e-05, "loss": 0.7963, "step": 1268 }, { "epoch": 0.38, "learning_rate": 7.045343185856701e-05, "loss": 0.7884, "step": 1269 }, { "epoch": 0.39, "learning_rate": 7.040860156734533e-05, "loss": 0.7478, "step": 1270 }, { "epoch": 0.39, "learning_rate": 7.036375158112122e-05, "loss": 0.7519, "step": 1271 }, { "epoch": 0.39, "learning_rate": 7.031888194317646e-05, "loss": 0.7575, "step": 1272 }, { "epoch": 0.39, "learning_rate": 7.027399269681178e-05, "loss": 0.7765, "step": 1273 }, { "epoch": 0.39, "learning_rate": 7.022908388534686e-05, "loss": 0.744, "step": 1274 }, { "epoch": 0.39, "learning_rate": 7.018415555212026e-05, "loss": 0.7544, "step": 1275 }, { "epoch": 0.39, "learning_rate": 7.013920774048935e-05, "loss": 0.7332, "step": 1276 }, { "epoch": 0.39, "learning_rate": 7.009424049383032e-05, "loss": 0.6938, "step": 1277 }, { "epoch": 0.39, "learning_rate": 7.00492538555381e-05, "loss": 0.6962, "step": 1278 }, { "epoch": 0.39, "learning_rate": 7.000424786902635e-05, "loss": 0.7982, "step": 1279 }, { "epoch": 0.39, "learning_rate": 6.995922257772739e-05, "loss": 0.7732, "step": 1280 }, { "epoch": 0.39, "learning_rate": 6.99141780250922e-05, "loss": 0.7282, "step": 1281 }, { "epoch": 0.39, "learning_rate": 6.986911425459028e-05, "loss": 0.7596, "step": 1282 }, { "epoch": 0.39, "learning_rate": 6.982403130970972e-05, "loss": 0.7607, "step": 1283 }, { "epoch": 0.39, "learning_rate": 6.977892923395716e-05, "loss": 0.7965, "step": 1284 }, { "epoch": 0.39, "learning_rate": 6.973380807085763e-05, "loss": 0.7608, "step": 1285 }, { "epoch": 0.39, "learning_rate": 6.968866786395458e-05, "loss": 0.7899, "step": 1286 }, { "epoch": 0.39, "learning_rate": 6.96435086568099e-05, "loss": 0.7628, "step": 1287 }, { "epoch": 0.39, "learning_rate": 6.959833049300377e-05, "loss": 0.7567, "step": 1288 }, { "epoch": 0.39, "learning_rate": 6.955313341613464e-05, "loss": 0.7361, "step": 1289 }, { "epoch": 0.39, "learning_rate": 6.950791746981927e-05, "loss": 0.7871, "step": 1290 }, { "epoch": 0.39, "learning_rate": 6.946268269769261e-05, "loss": 0.8193, "step": 1291 }, { "epoch": 0.39, "learning_rate": 6.941742914340776e-05, "loss": 0.7934, "step": 1292 }, { "epoch": 0.39, "learning_rate": 6.937215685063594e-05, "loss": 0.7462, "step": 1293 }, { "epoch": 0.39, "learning_rate": 6.932686586306649e-05, "loss": 0.7322, "step": 1294 }, { "epoch": 0.39, "learning_rate": 6.92815562244068e-05, "loss": 0.778, "step": 1295 }, { "epoch": 0.39, "learning_rate": 6.923622797838216e-05, "loss": 0.7087, "step": 1296 }, { "epoch": 0.39, "learning_rate": 6.919088116873594e-05, "loss": 0.7026, "step": 1297 }, { "epoch": 0.39, "learning_rate": 6.914551583922935e-05, "loss": 0.7792, "step": 1298 }, { "epoch": 0.39, "learning_rate": 6.91001320336415e-05, "loss": 0.7737, "step": 1299 }, { "epoch": 0.39, "learning_rate": 6.905472979576933e-05, "loss": 0.6906, "step": 1300 }, { "epoch": 0.39, "learning_rate": 6.900930916942754e-05, "loss": 0.7533, "step": 1301 }, { "epoch": 0.39, "learning_rate": 6.89638701984486e-05, "loss": 0.7303, "step": 1302 }, { "epoch": 0.4, "learning_rate": 6.89184129266827e-05, "loss": 0.746, "step": 1303 }, { "epoch": 0.4, "learning_rate": 6.887293739799764e-05, "loss": 0.7618, "step": 1304 }, { "epoch": 0.4, "learning_rate": 6.882744365627892e-05, "loss": 0.7386, "step": 1305 }, { "epoch": 0.4, "learning_rate": 6.878193174542951e-05, "loss": 0.7709, "step": 1306 }, { "epoch": 0.4, "learning_rate": 6.873640170937002e-05, "loss": 0.7083, "step": 1307 }, { "epoch": 0.4, "learning_rate": 6.869085359203844e-05, "loss": 0.8299, "step": 1308 }, { "epoch": 0.4, "learning_rate": 6.864528743739033e-05, "loss": 0.7897, "step": 1309 }, { "epoch": 0.4, "learning_rate": 6.859970328939856e-05, "loss": 0.7464, "step": 1310 }, { "epoch": 0.4, "learning_rate": 6.855410119205342e-05, "loss": 0.7494, "step": 1311 }, { "epoch": 0.4, "learning_rate": 6.850848118936246e-05, "loss": 0.7801, "step": 1312 }, { "epoch": 0.4, "learning_rate": 6.84628433253506e-05, "loss": 0.8032, "step": 1313 }, { "epoch": 0.4, "learning_rate": 6.841718764405994e-05, "loss": 0.7464, "step": 1314 }, { "epoch": 0.4, "learning_rate": 6.837151418954977e-05, "loss": 0.7889, "step": 1315 }, { "epoch": 0.4, "learning_rate": 6.832582300589657e-05, "loss": 0.7745, "step": 1316 }, { "epoch": 0.4, "learning_rate": 6.828011413719385e-05, "loss": 0.8047, "step": 1317 }, { "epoch": 0.4, "learning_rate": 6.82343876275523e-05, "loss": 0.7525, "step": 1318 }, { "epoch": 0.4, "learning_rate": 6.818864352109953e-05, "loss": 0.7233, "step": 1319 }, { "epoch": 0.4, "learning_rate": 6.814288186198022e-05, "loss": 0.7653, "step": 1320 }, { "epoch": 0.4, "learning_rate": 6.809710269435589e-05, "loss": 0.7317, "step": 1321 }, { "epoch": 0.4, "learning_rate": 6.805130606240508e-05, "loss": 0.7684, "step": 1322 }, { "epoch": 0.4, "learning_rate": 6.800549201032304e-05, "loss": 0.7358, "step": 1323 }, { "epoch": 0.4, "learning_rate": 6.795966058232195e-05, "loss": 0.778, "step": 1324 }, { "epoch": 0.4, "learning_rate": 6.79138118226307e-05, "loss": 0.7633, "step": 1325 }, { "epoch": 0.4, "learning_rate": 6.786794577549494e-05, "loss": 0.7522, "step": 1326 }, { "epoch": 0.4, "learning_rate": 6.782206248517695e-05, "loss": 0.7899, "step": 1327 }, { "epoch": 0.4, "learning_rate": 6.777616199595574e-05, "loss": 0.7242, "step": 1328 }, { "epoch": 0.4, "learning_rate": 6.773024435212678e-05, "loss": 0.7524, "step": 1329 }, { "epoch": 0.4, "learning_rate": 6.768430959800225e-05, "loss": 0.7703, "step": 1330 }, { "epoch": 0.4, "learning_rate": 6.763835777791071e-05, "loss": 0.7403, "step": 1331 }, { "epoch": 0.4, "learning_rate": 6.75923889361973e-05, "loss": 0.747, "step": 1332 }, { "epoch": 0.4, "learning_rate": 6.754640311722347e-05, "loss": 0.7682, "step": 1333 }, { "epoch": 0.4, "learning_rate": 6.750040036536718e-05, "loss": 0.7829, "step": 1334 }, { "epoch": 0.4, "learning_rate": 6.745438072502264e-05, "loss": 0.7846, "step": 1335 }, { "epoch": 0.41, "learning_rate": 6.740834424060038e-05, "loss": 0.7499, "step": 1336 }, { "epoch": 0.41, "learning_rate": 6.736229095652719e-05, "loss": 0.7259, "step": 1337 }, { "epoch": 0.41, "learning_rate": 6.73162209172461e-05, "loss": 0.8029, "step": 1338 }, { "epoch": 0.41, "learning_rate": 6.727013416721625e-05, "loss": 0.7531, "step": 1339 }, { "epoch": 0.41, "learning_rate": 6.722403075091296e-05, "loss": 0.794, "step": 1340 }, { "epoch": 0.41, "learning_rate": 6.717791071282759e-05, "loss": 0.8078, "step": 1341 }, { "epoch": 0.41, "learning_rate": 6.71317740974676e-05, "loss": 0.7497, "step": 1342 }, { "epoch": 0.41, "learning_rate": 6.708562094935636e-05, "loss": 0.717, "step": 1343 }, { "epoch": 0.41, "learning_rate": 6.703945131303328e-05, "loss": 0.8052, "step": 1344 }, { "epoch": 0.41, "learning_rate": 6.699326523305363e-05, "loss": 0.7477, "step": 1345 }, { "epoch": 0.41, "learning_rate": 6.69470627539886e-05, "loss": 0.7545, "step": 1346 }, { "epoch": 0.41, "learning_rate": 6.690084392042513e-05, "loss": 0.7501, "step": 1347 }, { "epoch": 0.41, "learning_rate": 6.685460877696602e-05, "loss": 0.7703, "step": 1348 }, { "epoch": 0.41, "learning_rate": 6.680835736822975e-05, "loss": 0.7023, "step": 1349 }, { "epoch": 0.41, "learning_rate": 6.676208973885051e-05, "loss": 0.7829, "step": 1350 }, { "epoch": 0.41, "learning_rate": 6.671580593347817e-05, "loss": 0.7456, "step": 1351 }, { "epoch": 0.41, "learning_rate": 6.666950599677822e-05, "loss": 0.8168, "step": 1352 }, { "epoch": 0.41, "learning_rate": 6.662318997343163e-05, "loss": 0.766, "step": 1353 }, { "epoch": 0.41, "learning_rate": 6.657685790813499e-05, "loss": 0.7756, "step": 1354 }, { "epoch": 0.41, "learning_rate": 6.653050984560035e-05, "loss": 0.8133, "step": 1355 }, { "epoch": 0.41, "learning_rate": 6.648414583055516e-05, "loss": 0.7727, "step": 1356 }, { "epoch": 0.41, "learning_rate": 6.643776590774231e-05, "loss": 0.8121, "step": 1357 }, { "epoch": 0.41, "learning_rate": 6.639137012192e-05, "loss": 0.7872, "step": 1358 }, { "epoch": 0.41, "learning_rate": 6.634495851786178e-05, "loss": 0.7634, "step": 1359 }, { "epoch": 0.41, "learning_rate": 6.629853114035642e-05, "loss": 0.783, "step": 1360 }, { "epoch": 0.41, "learning_rate": 6.625208803420796e-05, "loss": 0.8194, "step": 1361 }, { "epoch": 0.41, "learning_rate": 6.62056292442356e-05, "loss": 0.8002, "step": 1362 }, { "epoch": 0.41, "learning_rate": 6.615915481527361e-05, "loss": 0.7306, "step": 1363 }, { "epoch": 0.41, "learning_rate": 6.61126647921715e-05, "loss": 0.7756, "step": 1364 }, { "epoch": 0.41, "learning_rate": 6.606615921979367e-05, "loss": 0.7078, "step": 1365 }, { "epoch": 0.41, "learning_rate": 6.601963814301962e-05, "loss": 0.7558, "step": 1366 }, { "epoch": 0.41, "learning_rate": 6.597310160674381e-05, "loss": 0.7643, "step": 1367 }, { "epoch": 0.41, "learning_rate": 6.592654965587559e-05, "loss": 0.8122, "step": 1368 }, { "epoch": 0.42, "learning_rate": 6.587998233533917e-05, "loss": 0.7314, "step": 1369 }, { "epoch": 0.42, "learning_rate": 6.583339969007363e-05, "loss": 0.8005, "step": 1370 }, { "epoch": 0.42, "learning_rate": 6.578680176503283e-05, "loss": 0.8093, "step": 1371 }, { "epoch": 0.42, "learning_rate": 6.574018860518539e-05, "loss": 0.778, "step": 1372 }, { "epoch": 0.42, "learning_rate": 6.569356025551454e-05, "loss": 0.7489, "step": 1373 }, { "epoch": 0.42, "learning_rate": 6.564691676101832e-05, "loss": 0.7646, "step": 1374 }, { "epoch": 0.42, "learning_rate": 6.560025816670928e-05, "loss": 0.7678, "step": 1375 }, { "epoch": 0.42, "learning_rate": 6.555358451761455e-05, "loss": 0.7687, "step": 1376 }, { "epoch": 0.42, "learning_rate": 6.550689585877583e-05, "loss": 0.6989, "step": 1377 }, { "epoch": 0.42, "learning_rate": 6.546019223524927e-05, "loss": 0.7292, "step": 1378 }, { "epoch": 0.42, "learning_rate": 6.541347369210547e-05, "loss": 0.7864, "step": 1379 }, { "epoch": 0.42, "learning_rate": 6.536674027442944e-05, "loss": 0.7557, "step": 1380 }, { "epoch": 0.42, "learning_rate": 6.531999202732055e-05, "loss": 0.804, "step": 1381 }, { "epoch": 0.42, "learning_rate": 6.527322899589244e-05, "loss": 0.7508, "step": 1382 }, { "epoch": 0.42, "learning_rate": 6.522645122527305e-05, "loss": 0.7551, "step": 1383 }, { "epoch": 0.42, "learning_rate": 6.517965876060452e-05, "loss": 0.7254, "step": 1384 }, { "epoch": 0.42, "learning_rate": 6.513285164704324e-05, "loss": 0.7533, "step": 1385 }, { "epoch": 0.42, "learning_rate": 6.508602992975963e-05, "loss": 0.8179, "step": 1386 }, { "epoch": 0.42, "learning_rate": 6.503919365393827e-05, "loss": 0.7496, "step": 1387 }, { "epoch": 0.42, "learning_rate": 6.49923428647778e-05, "loss": 0.7486, "step": 1388 }, { "epoch": 0.42, "learning_rate": 6.494547760749084e-05, "loss": 0.7217, "step": 1389 }, { "epoch": 0.42, "learning_rate": 6.489859792730396e-05, "loss": 0.7816, "step": 1390 }, { "epoch": 0.42, "learning_rate": 6.485170386945765e-05, "loss": 0.7669, "step": 1391 }, { "epoch": 0.42, "learning_rate": 6.480479547920632e-05, "loss": 0.7854, "step": 1392 }, { "epoch": 0.42, "learning_rate": 6.475787280181818e-05, "loss": 0.7744, "step": 1393 }, { "epoch": 0.42, "learning_rate": 6.471093588257518e-05, "loss": 0.7469, "step": 1394 }, { "epoch": 0.42, "learning_rate": 6.466398476677313e-05, "loss": 0.8064, "step": 1395 }, { "epoch": 0.42, "learning_rate": 6.461701949972143e-05, "loss": 0.7752, "step": 1396 }, { "epoch": 0.42, "learning_rate": 6.457004012674316e-05, "loss": 0.7426, "step": 1397 }, { "epoch": 0.42, "learning_rate": 6.452304669317508e-05, "loss": 0.7349, "step": 1398 }, { "epoch": 0.42, "learning_rate": 6.447603924436744e-05, "loss": 0.7339, "step": 1399 }, { "epoch": 0.42, "learning_rate": 6.442901782568405e-05, "loss": 0.7218, "step": 1400 }, { "epoch": 0.42, "learning_rate": 6.43819824825022e-05, "loss": 0.7922, "step": 1401 }, { "epoch": 0.43, "learning_rate": 6.43349332602126e-05, "loss": 0.7987, "step": 1402 }, { "epoch": 0.43, "learning_rate": 6.428787020421938e-05, "loss": 0.7848, "step": 1403 }, { "epoch": 0.43, "learning_rate": 6.424079335994001e-05, "loss": 0.7758, "step": 1404 }, { "epoch": 0.43, "learning_rate": 6.419370277280527e-05, "loss": 0.7765, "step": 1405 }, { "epoch": 0.43, "learning_rate": 6.414659848825917e-05, "loss": 0.743, "step": 1406 }, { "epoch": 0.43, "learning_rate": 6.409948055175899e-05, "loss": 0.7078, "step": 1407 }, { "epoch": 0.43, "learning_rate": 6.405234900877513e-05, "loss": 0.7926, "step": 1408 }, { "epoch": 0.43, "learning_rate": 6.40052039047912e-05, "loss": 0.7663, "step": 1409 }, { "epoch": 0.43, "learning_rate": 6.395804528530379e-05, "loss": 0.7695, "step": 1410 }, { "epoch": 0.43, "learning_rate": 6.391087319582264e-05, "loss": 0.7314, "step": 1411 }, { "epoch": 0.43, "learning_rate": 6.38636876818704e-05, "loss": 0.7232, "step": 1412 }, { "epoch": 0.43, "learning_rate": 6.381648878898275e-05, "loss": 0.7626, "step": 1413 }, { "epoch": 0.43, "learning_rate": 6.376927656270819e-05, "loss": 0.7033, "step": 1414 }, { "epoch": 0.43, "learning_rate": 6.37220510486082e-05, "loss": 0.7453, "step": 1415 }, { "epoch": 0.43, "learning_rate": 6.367481229225699e-05, "loss": 0.7026, "step": 1416 }, { "epoch": 0.43, "learning_rate": 6.362756033924159e-05, "loss": 0.6962, "step": 1417 }, { "epoch": 0.43, "learning_rate": 6.358029523516178e-05, "loss": 0.8255, "step": 1418 }, { "epoch": 0.43, "learning_rate": 6.353301702562999e-05, "loss": 0.7196, "step": 1419 }, { "epoch": 0.43, "learning_rate": 6.348572575627133e-05, "loss": 0.7198, "step": 1420 }, { "epoch": 0.43, "learning_rate": 6.34384214727235e-05, "loss": 0.7449, "step": 1421 }, { "epoch": 0.43, "learning_rate": 6.339110422063675e-05, "loss": 0.7525, "step": 1422 }, { "epoch": 0.43, "learning_rate": 6.334377404567386e-05, "loss": 0.7828, "step": 1423 }, { "epoch": 0.43, "learning_rate": 6.329643099351009e-05, "loss": 0.7648, "step": 1424 }, { "epoch": 0.43, "learning_rate": 6.32490751098331e-05, "loss": 0.7526, "step": 1425 }, { "epoch": 0.43, "learning_rate": 6.320170644034293e-05, "loss": 0.7796, "step": 1426 }, { "epoch": 0.43, "learning_rate": 6.315432503075201e-05, "loss": 0.7365, "step": 1427 }, { "epoch": 0.43, "learning_rate": 6.310693092678502e-05, "loss": 0.7943, "step": 1428 }, { "epoch": 0.43, "learning_rate": 6.305952417417888e-05, "loss": 0.7505, "step": 1429 }, { "epoch": 0.43, "learning_rate": 6.301210481868277e-05, "loss": 0.7366, "step": 1430 }, { "epoch": 0.43, "learning_rate": 6.296467290605797e-05, "loss": 0.7244, "step": 1431 }, { "epoch": 0.43, "learning_rate": 6.291722848207792e-05, "loss": 0.7104, "step": 1432 }, { "epoch": 0.43, "learning_rate": 6.286977159252812e-05, "loss": 0.7512, "step": 1433 }, { "epoch": 0.43, "learning_rate": 6.28223022832061e-05, "loss": 0.7789, "step": 1434 }, { "epoch": 0.44, "learning_rate": 6.277482059992138e-05, "loss": 0.8227, "step": 1435 }, { "epoch": 0.44, "learning_rate": 6.272732658849541e-05, "loss": 0.7593, "step": 1436 }, { "epoch": 0.44, "learning_rate": 6.267982029476152e-05, "loss": 0.7439, "step": 1437 }, { "epoch": 0.44, "learning_rate": 6.263230176456498e-05, "loss": 0.7478, "step": 1438 }, { "epoch": 0.44, "learning_rate": 6.258477104376276e-05, "loss": 0.7639, "step": 1439 }, { "epoch": 0.44, "learning_rate": 6.253722817822364e-05, "loss": 0.7458, "step": 1440 }, { "epoch": 0.44, "learning_rate": 6.248967321382815e-05, "loss": 0.7347, "step": 1441 }, { "epoch": 0.44, "learning_rate": 6.244210619646843e-05, "loss": 0.7362, "step": 1442 }, { "epoch": 0.44, "learning_rate": 6.239452717204831e-05, "loss": 0.7549, "step": 1443 }, { "epoch": 0.44, "learning_rate": 6.23469361864832e-05, "loss": 0.7594, "step": 1444 }, { "epoch": 0.44, "learning_rate": 6.229933328569999e-05, "loss": 0.7434, "step": 1445 }, { "epoch": 0.44, "learning_rate": 6.225171851563716e-05, "loss": 0.7913, "step": 1446 }, { "epoch": 0.44, "learning_rate": 6.220409192224457e-05, "loss": 0.7935, "step": 1447 }, { "epoch": 0.44, "learning_rate": 6.215645355148355e-05, "loss": 0.7481, "step": 1448 }, { "epoch": 0.44, "learning_rate": 6.210880344932675e-05, "loss": 0.7635, "step": 1449 }, { "epoch": 0.44, "learning_rate": 6.206114166175816e-05, "loss": 0.745, "step": 1450 }, { "epoch": 0.44, "learning_rate": 6.201346823477303e-05, "loss": 0.7514, "step": 1451 }, { "epoch": 0.44, "learning_rate": 6.19657832143779e-05, "loss": 0.7859, "step": 1452 }, { "epoch": 0.44, "learning_rate": 6.19180866465904e-05, "loss": 0.7838, "step": 1453 }, { "epoch": 0.44, "learning_rate": 6.18703785774394e-05, "loss": 0.7374, "step": 1454 }, { "epoch": 0.44, "learning_rate": 6.18226590529648e-05, "loss": 0.7773, "step": 1455 }, { "epoch": 0.44, "learning_rate": 6.17749281192176e-05, "loss": 0.8125, "step": 1456 }, { "epoch": 0.44, "learning_rate": 6.172718582225977e-05, "loss": 0.7686, "step": 1457 }, { "epoch": 0.44, "learning_rate": 6.16794322081643e-05, "loss": 0.7538, "step": 1458 }, { "epoch": 0.44, "learning_rate": 6.163166732301505e-05, "loss": 0.7613, "step": 1459 }, { "epoch": 0.44, "learning_rate": 6.158389121290679e-05, "loss": 0.748, "step": 1460 }, { "epoch": 0.44, "learning_rate": 6.15361039239451e-05, "loss": 0.7537, "step": 1461 }, { "epoch": 0.44, "learning_rate": 6.148830550224635e-05, "loss": 0.7834, "step": 1462 }, { "epoch": 0.44, "learning_rate": 6.144049599393766e-05, "loss": 0.8176, "step": 1463 }, { "epoch": 0.44, "learning_rate": 6.139267544515689e-05, "loss": 0.7914, "step": 1464 }, { "epoch": 0.44, "learning_rate": 6.134484390205246e-05, "loss": 0.7818, "step": 1465 }, { "epoch": 0.44, "learning_rate": 6.12970014107835e-05, "loss": 0.7457, "step": 1466 }, { "epoch": 0.44, "learning_rate": 6.124914801751961e-05, "loss": 0.8201, "step": 1467 }, { "epoch": 0.45, "learning_rate": 6.120128376844098e-05, "loss": 0.77, "step": 1468 }, { "epoch": 0.45, "learning_rate": 6.11534087097383e-05, "loss": 0.7761, "step": 1469 }, { "epoch": 0.45, "learning_rate": 6.11055228876126e-05, "loss": 0.7171, "step": 1470 }, { "epoch": 0.45, "learning_rate": 6.105762634827534e-05, "loss": 0.6949, "step": 1471 }, { "epoch": 0.45, "learning_rate": 6.1009719137948374e-05, "loss": 0.7731, "step": 1472 }, { "epoch": 0.45, "learning_rate": 6.096180130286375e-05, "loss": 0.7158, "step": 1473 }, { "epoch": 0.45, "learning_rate": 6.091387288926388e-05, "loss": 0.7769, "step": 1474 }, { "epoch": 0.45, "learning_rate": 6.08659339434013e-05, "loss": 0.7704, "step": 1475 }, { "epoch": 0.45, "learning_rate": 6.081798451153875e-05, "loss": 0.7455, "step": 1476 }, { "epoch": 0.45, "learning_rate": 6.0770024639949074e-05, "loss": 0.681, "step": 1477 }, { "epoch": 0.45, "learning_rate": 6.072205437491521e-05, "loss": 0.748, "step": 1478 }, { "epoch": 0.45, "learning_rate": 6.067407376273009e-05, "loss": 0.7569, "step": 1479 }, { "epoch": 0.45, "learning_rate": 6.06260828496967e-05, "loss": 0.7204, "step": 1480 }, { "epoch": 0.45, "learning_rate": 6.0578081682127874e-05, "loss": 0.7225, "step": 1481 }, { "epoch": 0.45, "learning_rate": 6.053007030634642e-05, "loss": 0.7489, "step": 1482 }, { "epoch": 0.45, "learning_rate": 6.0482048768684954e-05, "loss": 0.8256, "step": 1483 }, { "epoch": 0.45, "learning_rate": 6.0434017115485906e-05, "loss": 0.6905, "step": 1484 }, { "epoch": 0.45, "learning_rate": 6.038597539310149e-05, "loss": 0.7455, "step": 1485 }, { "epoch": 0.45, "learning_rate": 6.033792364789361e-05, "loss": 0.7155, "step": 1486 }, { "epoch": 0.45, "learning_rate": 6.028986192623386e-05, "loss": 0.7622, "step": 1487 }, { "epoch": 0.45, "learning_rate": 6.024179027450343e-05, "loss": 0.8168, "step": 1488 }, { "epoch": 0.45, "learning_rate": 6.019370873909315e-05, "loss": 0.8196, "step": 1489 }, { "epoch": 0.45, "learning_rate": 6.014561736640334e-05, "loss": 0.7425, "step": 1490 }, { "epoch": 0.45, "learning_rate": 6.009751620284383e-05, "loss": 0.7685, "step": 1491 }, { "epoch": 0.45, "learning_rate": 6.00494052948339e-05, "loss": 0.7329, "step": 1492 }, { "epoch": 0.45, "learning_rate": 6.0001284688802226e-05, "loss": 0.7549, "step": 1493 }, { "epoch": 0.45, "learning_rate": 5.995315443118685e-05, "loss": 0.7488, "step": 1494 }, { "epoch": 0.45, "learning_rate": 5.9905014568435125e-05, "loss": 0.7465, "step": 1495 }, { "epoch": 0.45, "learning_rate": 5.985686514700368e-05, "loss": 0.7787, "step": 1496 }, { "epoch": 0.45, "learning_rate": 5.9808706213358355e-05, "loss": 0.7722, "step": 1497 }, { "epoch": 0.45, "learning_rate": 5.976053781397417e-05, "loss": 0.7955, "step": 1498 }, { "epoch": 0.45, "learning_rate": 5.9712359995335307e-05, "loss": 0.7171, "step": 1499 }, { "epoch": 0.45, "learning_rate": 5.966417280393502e-05, "loss": 0.7532, "step": 1500 }, { "epoch": 0.46, "learning_rate": 5.961597628627557e-05, "loss": 0.7193, "step": 1501 }, { "epoch": 0.46, "learning_rate": 5.9567770488868305e-05, "loss": 0.7223, "step": 1502 }, { "epoch": 0.46, "learning_rate": 5.951955545823342e-05, "loss": 0.7258, "step": 1503 }, { "epoch": 0.46, "learning_rate": 5.947133124090014e-05, "loss": 0.7477, "step": 1504 }, { "epoch": 0.46, "learning_rate": 5.942309788340644e-05, "loss": 0.7596, "step": 1505 }, { "epoch": 0.46, "learning_rate": 5.9374855432299206e-05, "loss": 0.7591, "step": 1506 }, { "epoch": 0.46, "learning_rate": 5.9326603934134027e-05, "loss": 0.7423, "step": 1507 }, { "epoch": 0.46, "learning_rate": 5.9278343435475294e-05, "loss": 0.7293, "step": 1508 }, { "epoch": 0.46, "learning_rate": 5.923007398289603e-05, "loss": 0.7967, "step": 1509 }, { "epoch": 0.46, "learning_rate": 5.918179562297791e-05, "loss": 0.7659, "step": 1510 }, { "epoch": 0.46, "learning_rate": 5.913350840231124e-05, "loss": 0.7377, "step": 1511 }, { "epoch": 0.46, "learning_rate": 5.908521236749481e-05, "loss": 0.7784, "step": 1512 }, { "epoch": 0.46, "learning_rate": 5.9036907565136e-05, "loss": 0.735, "step": 1513 }, { "epoch": 0.46, "learning_rate": 5.8988594041850566e-05, "loss": 0.7222, "step": 1514 }, { "epoch": 0.46, "learning_rate": 5.894027184426274e-05, "loss": 0.7857, "step": 1515 }, { "epoch": 0.46, "learning_rate": 5.889194101900509e-05, "loss": 0.7693, "step": 1516 }, { "epoch": 0.46, "learning_rate": 5.8843601612718545e-05, "loss": 0.7409, "step": 1517 }, { "epoch": 0.46, "learning_rate": 5.8795253672052265e-05, "loss": 0.7623, "step": 1518 }, { "epoch": 0.46, "learning_rate": 5.87468972436637e-05, "loss": 0.78, "step": 1519 }, { "epoch": 0.46, "learning_rate": 5.869853237421846e-05, "loss": 0.7765, "step": 1520 }, { "epoch": 0.46, "learning_rate": 5.8650159110390325e-05, "loss": 0.786, "step": 1521 }, { "epoch": 0.46, "learning_rate": 5.860177749886112e-05, "loss": 0.7324, "step": 1522 }, { "epoch": 0.46, "learning_rate": 5.85533875863208e-05, "loss": 0.7942, "step": 1523 }, { "epoch": 0.46, "learning_rate": 5.850498941946727e-05, "loss": 0.7348, "step": 1524 }, { "epoch": 0.46, "learning_rate": 5.8456583045006444e-05, "loss": 0.7656, "step": 1525 }, { "epoch": 0.46, "learning_rate": 5.8408168509652115e-05, "loss": 0.7566, "step": 1526 }, { "epoch": 0.46, "learning_rate": 5.8359745860126e-05, "loss": 0.7299, "step": 1527 }, { "epoch": 0.46, "learning_rate": 5.831131514315761e-05, "loss": 0.7496, "step": 1528 }, { "epoch": 0.46, "learning_rate": 5.826287640548425e-05, "loss": 0.7531, "step": 1529 }, { "epoch": 0.46, "learning_rate": 5.821442969385097e-05, "loss": 0.7765, "step": 1530 }, { "epoch": 0.46, "learning_rate": 5.8165975055010524e-05, "loss": 0.765, "step": 1531 }, { "epoch": 0.46, "learning_rate": 5.811751253572329e-05, "loss": 0.8302, "step": 1532 }, { "epoch": 0.46, "learning_rate": 5.806904218275727e-05, "loss": 0.7289, "step": 1533 }, { "epoch": 0.47, "learning_rate": 5.8020564042888015e-05, "loss": 0.7644, "step": 1534 }, { "epoch": 0.47, "learning_rate": 5.797207816289861e-05, "loss": 0.7277, "step": 1535 }, { "epoch": 0.47, "learning_rate": 5.792358458957959e-05, "loss": 0.7399, "step": 1536 }, { "epoch": 0.47, "learning_rate": 5.787508336972893e-05, "loss": 0.6677, "step": 1537 }, { "epoch": 0.47, "learning_rate": 5.782657455015197e-05, "loss": 0.7433, "step": 1538 }, { "epoch": 0.47, "learning_rate": 5.777805817766137e-05, "loss": 0.8112, "step": 1539 }, { "epoch": 0.47, "learning_rate": 5.7729534299077125e-05, "loss": 0.7519, "step": 1540 }, { "epoch": 0.47, "learning_rate": 5.768100296122645e-05, "loss": 0.7992, "step": 1541 }, { "epoch": 0.47, "learning_rate": 5.7632464210943726e-05, "loss": 0.7391, "step": 1542 }, { "epoch": 0.47, "learning_rate": 5.758391809507054e-05, "loss": 0.7075, "step": 1543 }, { "epoch": 0.47, "learning_rate": 5.753536466045555e-05, "loss": 0.774, "step": 1544 }, { "epoch": 0.47, "learning_rate": 5.7486803953954514e-05, "loss": 0.7299, "step": 1545 }, { "epoch": 0.47, "learning_rate": 5.7438236022430144e-05, "loss": 0.7869, "step": 1546 }, { "epoch": 0.47, "learning_rate": 5.738966091275221e-05, "loss": 0.7365, "step": 1547 }, { "epoch": 0.47, "learning_rate": 5.734107867179732e-05, "loss": 0.7199, "step": 1548 }, { "epoch": 0.47, "learning_rate": 5.729248934644903e-05, "loss": 0.7701, "step": 1549 }, { "epoch": 0.47, "learning_rate": 5.7243892983597705e-05, "loss": 0.7518, "step": 1550 }, { "epoch": 0.47, "learning_rate": 5.7195289630140526e-05, "loss": 0.7625, "step": 1551 }, { "epoch": 0.47, "learning_rate": 5.7146679332981366e-05, "loss": 0.7713, "step": 1552 }, { "epoch": 0.47, "learning_rate": 5.709806213903086e-05, "loss": 0.7717, "step": 1553 }, { "epoch": 0.47, "learning_rate": 5.7049438095206266e-05, "loss": 0.7111, "step": 1554 }, { "epoch": 0.47, "learning_rate": 5.700080724843147e-05, "loss": 0.7786, "step": 1555 }, { "epoch": 0.47, "learning_rate": 5.6952169645636866e-05, "loss": 0.8203, "step": 1556 }, { "epoch": 0.47, "learning_rate": 5.690352533375948e-05, "loss": 0.7795, "step": 1557 }, { "epoch": 0.47, "learning_rate": 5.6854874359742684e-05, "loss": 0.7511, "step": 1558 }, { "epoch": 0.47, "learning_rate": 5.680621677053638e-05, "loss": 0.7848, "step": 1559 }, { "epoch": 0.47, "learning_rate": 5.67575526130968e-05, "loss": 0.7404, "step": 1560 }, { "epoch": 0.47, "learning_rate": 5.670888193438654e-05, "loss": 0.8163, "step": 1561 }, { "epoch": 0.47, "learning_rate": 5.666020478137446e-05, "loss": 0.7399, "step": 1562 }, { "epoch": 0.47, "learning_rate": 5.6611521201035724e-05, "loss": 0.7311, "step": 1563 }, { "epoch": 0.47, "learning_rate": 5.656283124035162e-05, "loss": 0.7722, "step": 1564 }, { "epoch": 0.47, "learning_rate": 5.651413494630966e-05, "loss": 0.7284, "step": 1565 }, { "epoch": 0.47, "learning_rate": 5.6465432365903424e-05, "loss": 0.7334, "step": 1566 }, { "epoch": 0.48, "learning_rate": 5.64167235461326e-05, "loss": 0.7499, "step": 1567 }, { "epoch": 0.48, "learning_rate": 5.636800853400285e-05, "loss": 0.786, "step": 1568 }, { "epoch": 0.48, "learning_rate": 5.631928737652584e-05, "loss": 0.7313, "step": 1569 }, { "epoch": 0.48, "learning_rate": 5.627056012071918e-05, "loss": 0.7544, "step": 1570 }, { "epoch": 0.48, "learning_rate": 5.622182681360634e-05, "loss": 0.7492, "step": 1571 }, { "epoch": 0.48, "learning_rate": 5.617308750221664e-05, "loss": 0.7508, "step": 1572 }, { "epoch": 0.48, "learning_rate": 5.612434223358518e-05, "loss": 0.7847, "step": 1573 }, { "epoch": 0.48, "learning_rate": 5.607559105475283e-05, "loss": 0.7677, "step": 1574 }, { "epoch": 0.48, "learning_rate": 5.602683401276615e-05, "loss": 0.7515, "step": 1575 }, { "epoch": 0.48, "learning_rate": 5.5978071154677384e-05, "loss": 0.7577, "step": 1576 }, { "epoch": 0.48, "learning_rate": 5.592930252754432e-05, "loss": 0.7647, "step": 1577 }, { "epoch": 0.48, "learning_rate": 5.5880528178430416e-05, "loss": 0.7735, "step": 1578 }, { "epoch": 0.48, "learning_rate": 5.583174815440454e-05, "loss": 0.7632, "step": 1579 }, { "epoch": 0.48, "learning_rate": 5.578296250254114e-05, "loss": 0.7588, "step": 1580 }, { "epoch": 0.48, "learning_rate": 5.573417126992003e-05, "loss": 0.7434, "step": 1581 }, { "epoch": 0.48, "learning_rate": 5.568537450362642e-05, "loss": 0.7318, "step": 1582 }, { "epoch": 0.48, "learning_rate": 5.5636572250750875e-05, "loss": 0.7758, "step": 1583 }, { "epoch": 0.48, "learning_rate": 5.558776455838926e-05, "loss": 0.7673, "step": 1584 }, { "epoch": 0.48, "learning_rate": 5.553895147364264e-05, "loss": 0.7217, "step": 1585 }, { "epoch": 0.48, "learning_rate": 5.5490133043617375e-05, "loss": 0.7905, "step": 1586 }, { "epoch": 0.48, "learning_rate": 5.544130931542488e-05, "loss": 0.7447, "step": 1587 }, { "epoch": 0.48, "learning_rate": 5.539248033618176e-05, "loss": 0.8118, "step": 1588 }, { "epoch": 0.48, "learning_rate": 5.534364615300964e-05, "loss": 0.8049, "step": 1589 }, { "epoch": 0.48, "learning_rate": 5.5294806813035214e-05, "loss": 0.7634, "step": 1590 }, { "epoch": 0.48, "learning_rate": 5.524596236339009e-05, "loss": 0.7241, "step": 1591 }, { "epoch": 0.48, "learning_rate": 5.5197112851210866e-05, "loss": 0.7256, "step": 1592 }, { "epoch": 0.48, "learning_rate": 5.514825832363899e-05, "loss": 0.7609, "step": 1593 }, { "epoch": 0.48, "learning_rate": 5.509939882782077e-05, "loss": 0.7788, "step": 1594 }, { "epoch": 0.48, "learning_rate": 5.5050534410907294e-05, "loss": 0.756, "step": 1595 }, { "epoch": 0.48, "learning_rate": 5.5001665120054415e-05, "loss": 0.7237, "step": 1596 }, { "epoch": 0.48, "learning_rate": 5.4952791002422665e-05, "loss": 0.7785, "step": 1597 }, { "epoch": 0.48, "learning_rate": 5.490391210517726e-05, "loss": 0.7431, "step": 1598 }, { "epoch": 0.48, "learning_rate": 5.4855028475488e-05, "loss": 0.7944, "step": 1599 }, { "epoch": 0.49, "learning_rate": 5.48061401605293e-05, "loss": 0.7401, "step": 1600 }, { "epoch": 0.49, "learning_rate": 5.475724720748002e-05, "loss": 0.7081, "step": 1601 }, { "epoch": 0.49, "learning_rate": 5.4708349663523585e-05, "loss": 0.7446, "step": 1602 }, { "epoch": 0.49, "learning_rate": 5.4659447575847776e-05, "loss": 0.7333, "step": 1603 }, { "epoch": 0.49, "learning_rate": 5.461054099164482e-05, "loss": 0.766, "step": 1604 }, { "epoch": 0.49, "learning_rate": 5.4561629958111224e-05, "loss": 0.743, "step": 1605 }, { "epoch": 0.49, "learning_rate": 5.451271452244784e-05, "loss": 0.7462, "step": 1606 }, { "epoch": 0.49, "learning_rate": 5.446379473185972e-05, "loss": 0.7146, "step": 1607 }, { "epoch": 0.49, "learning_rate": 5.441487063355617e-05, "loss": 0.7563, "step": 1608 }, { "epoch": 0.49, "learning_rate": 5.4365942274750616e-05, "loss": 0.7741, "step": 1609 }, { "epoch": 0.49, "learning_rate": 5.4317009702660606e-05, "loss": 0.7602, "step": 1610 }, { "epoch": 0.49, "learning_rate": 5.426807296450776e-05, "loss": 0.7822, "step": 1611 }, { "epoch": 0.49, "learning_rate": 5.421913210751769e-05, "loss": 0.7496, "step": 1612 }, { "epoch": 0.49, "learning_rate": 5.417018717892004e-05, "loss": 0.7069, "step": 1613 }, { "epoch": 0.49, "learning_rate": 5.4121238225948346e-05, "loss": 0.7717, "step": 1614 }, { "epoch": 0.49, "learning_rate": 5.407228529584e-05, "loss": 0.7663, "step": 1615 }, { "epoch": 0.49, "learning_rate": 5.402332843583631e-05, "loss": 0.7626, "step": 1616 }, { "epoch": 0.49, "learning_rate": 5.397436769318228e-05, "loss": 0.7028, "step": 1617 }, { "epoch": 0.49, "learning_rate": 5.392540311512675e-05, "loss": 0.7412, "step": 1618 }, { "epoch": 0.49, "learning_rate": 5.38764347489222e-05, "loss": 0.8152, "step": 1619 }, { "epoch": 0.49, "learning_rate": 5.38274626418248e-05, "loss": 0.7479, "step": 1620 }, { "epoch": 0.49, "learning_rate": 5.37784868410943e-05, "loss": 0.7549, "step": 1621 }, { "epoch": 0.49, "learning_rate": 5.372950739399404e-05, "loss": 0.7798, "step": 1622 }, { "epoch": 0.49, "learning_rate": 5.368052434779088e-05, "loss": 0.7735, "step": 1623 }, { "epoch": 0.49, "learning_rate": 5.3631537749755155e-05, "loss": 0.6685, "step": 1624 }, { "epoch": 0.49, "learning_rate": 5.358254764716059e-05, "loss": 0.7505, "step": 1625 }, { "epoch": 0.49, "learning_rate": 5.353355408728433e-05, "loss": 0.7712, "step": 1626 }, { "epoch": 0.49, "learning_rate": 5.348455711740684e-05, "loss": 0.7902, "step": 1627 }, { "epoch": 0.49, "learning_rate": 5.343555678481189e-05, "loss": 0.7894, "step": 1628 }, { "epoch": 0.49, "learning_rate": 5.338655313678649e-05, "loss": 0.7438, "step": 1629 }, { "epoch": 0.49, "learning_rate": 5.3337546220620825e-05, "loss": 0.7848, "step": 1630 }, { "epoch": 0.49, "learning_rate": 5.328853608360827e-05, "loss": 0.7497, "step": 1631 }, { "epoch": 0.49, "learning_rate": 5.3239522773045295e-05, "loss": 0.7892, "step": 1632 }, { "epoch": 0.5, "learning_rate": 5.319050633623142e-05, "loss": 0.7698, "step": 1633 }, { "epoch": 0.5, "learning_rate": 5.314148682046921e-05, "loss": 0.7858, "step": 1634 }, { "epoch": 0.5, "learning_rate": 5.3092464273064166e-05, "loss": 0.7545, "step": 1635 }, { "epoch": 0.5, "learning_rate": 5.304343874132474e-05, "loss": 0.7692, "step": 1636 }, { "epoch": 0.5, "learning_rate": 5.2994410272562256e-05, "loss": 0.7119, "step": 1637 }, { "epoch": 0.5, "learning_rate": 5.294537891409086e-05, "loss": 0.7677, "step": 1638 }, { "epoch": 0.5, "learning_rate": 5.289634471322753e-05, "loss": 0.8099, "step": 1639 }, { "epoch": 0.5, "learning_rate": 5.284730771729192e-05, "loss": 0.7622, "step": 1640 }, { "epoch": 0.5, "learning_rate": 5.279826797360644e-05, "loss": 0.7798, "step": 1641 }, { "epoch": 0.5, "learning_rate": 5.2749225529496126e-05, "loss": 0.7696, "step": 1642 }, { "epoch": 0.5, "learning_rate": 5.270018043228862e-05, "loss": 0.7567, "step": 1643 }, { "epoch": 0.5, "learning_rate": 5.265113272931412e-05, "loss": 0.6954, "step": 1644 }, { "epoch": 0.5, "learning_rate": 5.2602082467905364e-05, "loss": 0.7598, "step": 1645 }, { "epoch": 0.5, "learning_rate": 5.2553029695397525e-05, "loss": 0.7078, "step": 1646 }, { "epoch": 0.5, "learning_rate": 5.2503974459128246e-05, "loss": 0.7932, "step": 1647 }, { "epoch": 0.5, "learning_rate": 5.245491680643747e-05, "loss": 0.7716, "step": 1648 }, { "epoch": 0.5, "learning_rate": 5.240585678466755e-05, "loss": 0.7168, "step": 1649 }, { "epoch": 0.5, "learning_rate": 5.2356794441163094e-05, "loss": 0.7442, "step": 1650 }, { "epoch": 0.5, "learning_rate": 5.2307729823270935e-05, "loss": 0.771, "step": 1651 }, { "epoch": 0.5, "learning_rate": 5.2258662978340135e-05, "loss": 0.7253, "step": 1652 }, { "epoch": 0.5, "learning_rate": 5.220959395372186e-05, "loss": 0.7701, "step": 1653 }, { "epoch": 0.5, "learning_rate": 5.2160522796769426e-05, "loss": 0.7795, "step": 1654 }, { "epoch": 0.5, "learning_rate": 5.211144955483821e-05, "loss": 0.7366, "step": 1655 }, { "epoch": 0.5, "learning_rate": 5.206237427528553e-05, "loss": 0.7465, "step": 1656 }, { "epoch": 0.5, "learning_rate": 5.201329700547076e-05, "loss": 0.7737, "step": 1657 }, { "epoch": 0.5, "learning_rate": 5.196421779275513e-05, "loss": 0.7301, "step": 1658 }, { "epoch": 0.5, "learning_rate": 5.191513668450178e-05, "loss": 0.7291, "step": 1659 }, { "epoch": 0.5, "learning_rate": 5.1866053728075646e-05, "loss": 0.7568, "step": 1660 }, { "epoch": 0.5, "learning_rate": 5.1816968970843504e-05, "loss": 0.7247, "step": 1661 }, { "epoch": 0.5, "learning_rate": 5.176788246017379e-05, "loss": 0.754, "step": 1662 }, { "epoch": 0.5, "learning_rate": 5.171879424343671e-05, "loss": 0.7246, "step": 1663 }, { "epoch": 0.5, "learning_rate": 5.166970436800407e-05, "loss": 0.7209, "step": 1664 }, { "epoch": 0.51, "learning_rate": 5.162061288124929e-05, "loss": 0.7284, "step": 1665 }, { "epoch": 0.51, "learning_rate": 5.157151983054733e-05, "loss": 0.7725, "step": 1666 }, { "epoch": 0.51, "learning_rate": 5.152242526327471e-05, "loss": 0.7759, "step": 1667 }, { "epoch": 0.51, "learning_rate": 5.147332922680933e-05, "loss": 0.7803, "step": 1668 }, { "epoch": 0.51, "learning_rate": 5.1424231768530584e-05, "loss": 0.7156, "step": 1669 }, { "epoch": 0.51, "learning_rate": 5.137513293581918e-05, "loss": 0.731, "step": 1670 }, { "epoch": 0.51, "learning_rate": 5.132603277605722e-05, "loss": 0.7282, "step": 1671 }, { "epoch": 0.51, "learning_rate": 5.127693133662801e-05, "loss": 0.8006, "step": 1672 }, { "epoch": 0.51, "learning_rate": 5.1227828664916135e-05, "loss": 0.7549, "step": 1673 }, { "epoch": 0.51, "learning_rate": 5.117872480830738e-05, "loss": 0.6845, "step": 1674 }, { "epoch": 0.51, "learning_rate": 5.1129619814188646e-05, "loss": 0.7959, "step": 1675 }, { "epoch": 0.51, "learning_rate": 5.108051372994793e-05, "loss": 0.7694, "step": 1676 }, { "epoch": 0.51, "learning_rate": 5.103140660297432e-05, "loss": 0.7787, "step": 1677 }, { "epoch": 0.51, "learning_rate": 5.0982298480657854e-05, "loss": 0.8145, "step": 1678 }, { "epoch": 0.51, "learning_rate": 5.093318941038957e-05, "loss": 0.7265, "step": 1679 }, { "epoch": 0.51, "learning_rate": 5.0884079439561404e-05, "loss": 0.7723, "step": 1680 }, { "epoch": 0.51, "learning_rate": 5.0834968615566195e-05, "loss": 0.7632, "step": 1681 }, { "epoch": 0.51, "learning_rate": 5.078585698579753e-05, "loss": 0.7182, "step": 1682 }, { "epoch": 0.51, "learning_rate": 5.0736744597649844e-05, "loss": 0.704, "step": 1683 }, { "epoch": 0.51, "learning_rate": 5.0687631498518284e-05, "loss": 0.7499, "step": 1684 }, { "epoch": 0.51, "learning_rate": 5.0638517735798696e-05, "loss": 0.7553, "step": 1685 }, { "epoch": 0.51, "learning_rate": 5.058940335688752e-05, "loss": 0.776, "step": 1686 }, { "epoch": 0.51, "learning_rate": 5.054028840918183e-05, "loss": 0.7354, "step": 1687 }, { "epoch": 0.51, "learning_rate": 5.049117294007926e-05, "loss": 0.7592, "step": 1688 }, { "epoch": 0.51, "learning_rate": 5.044205699697789e-05, "loss": 0.7591, "step": 1689 }, { "epoch": 0.51, "learning_rate": 5.039294062727632e-05, "loss": 0.7947, "step": 1690 }, { "epoch": 0.51, "learning_rate": 5.034382387837354e-05, "loss": 0.7198, "step": 1691 }, { "epoch": 0.51, "learning_rate": 5.029470679766888e-05, "loss": 0.7801, "step": 1692 }, { "epoch": 0.51, "learning_rate": 5.024558943256202e-05, "loss": 0.7105, "step": 1693 }, { "epoch": 0.51, "learning_rate": 5.019647183045292e-05, "loss": 0.7476, "step": 1694 }, { "epoch": 0.51, "learning_rate": 5.014735403874174e-05, "loss": 0.7762, "step": 1695 }, { "epoch": 0.51, "learning_rate": 5.009823610482886e-05, "loss": 0.7245, "step": 1696 }, { "epoch": 0.51, "learning_rate": 5.004911807611475e-05, "loss": 0.7546, "step": 1697 }, { "epoch": 0.52, "learning_rate": 5e-05, "loss": 0.7434, "step": 1698 }, { "epoch": 0.52, "learning_rate": 4.995088192388527e-05, "loss": 0.7535, "step": 1699 }, { "epoch": 0.52, "learning_rate": 4.990176389517115e-05, "loss": 0.7727, "step": 1700 }, { "epoch": 0.52, "learning_rate": 4.985264596125826e-05, "loss": 0.7063, "step": 1701 }, { "epoch": 0.52, "learning_rate": 4.980352816954709e-05, "loss": 0.743, "step": 1702 }, { "epoch": 0.52, "learning_rate": 4.975441056743799e-05, "loss": 0.7291, "step": 1703 }, { "epoch": 0.52, "learning_rate": 4.9705293202331134e-05, "loss": 0.7042, "step": 1704 }, { "epoch": 0.52, "learning_rate": 4.9656176121626466e-05, "loss": 0.7214, "step": 1705 }, { "epoch": 0.52, "learning_rate": 4.9607059372723685e-05, "loss": 0.7251, "step": 1706 }, { "epoch": 0.52, "learning_rate": 4.9557943003022125e-05, "loss": 0.6921, "step": 1707 }, { "epoch": 0.52, "learning_rate": 4.950882705992075e-05, "loss": 0.7562, "step": 1708 }, { "epoch": 0.52, "learning_rate": 4.945971159081817e-05, "loss": 0.7462, "step": 1709 }, { "epoch": 0.52, "learning_rate": 4.9410596643112496e-05, "loss": 0.7435, "step": 1710 }, { "epoch": 0.52, "learning_rate": 4.936148226420132e-05, "loss": 0.7945, "step": 1711 }, { "epoch": 0.52, "learning_rate": 4.931236850148171e-05, "loss": 0.7802, "step": 1712 }, { "epoch": 0.52, "learning_rate": 4.926325540235017e-05, "loss": 0.7402, "step": 1713 }, { "epoch": 0.52, "learning_rate": 4.9214143014202484e-05, "loss": 0.7783, "step": 1714 }, { "epoch": 0.52, "learning_rate": 4.916503138443384e-05, "loss": 0.7882, "step": 1715 }, { "epoch": 0.52, "learning_rate": 4.9115920560438594e-05, "loss": 0.7594, "step": 1716 }, { "epoch": 0.52, "learning_rate": 4.906681058961044e-05, "loss": 0.7496, "step": 1717 }, { "epoch": 0.52, "learning_rate": 4.9017701519342164e-05, "loss": 0.6938, "step": 1718 }, { "epoch": 0.52, "learning_rate": 4.89685933970257e-05, "loss": 0.7314, "step": 1719 }, { "epoch": 0.52, "learning_rate": 4.891948627005207e-05, "loss": 0.7601, "step": 1720 }, { "epoch": 0.52, "learning_rate": 4.8870380185811365e-05, "loss": 0.7762, "step": 1721 }, { "epoch": 0.52, "learning_rate": 4.8821275191692634e-05, "loss": 0.7524, "step": 1722 }, { "epoch": 0.52, "learning_rate": 4.877217133508388e-05, "loss": 0.7807, "step": 1723 }, { "epoch": 0.52, "learning_rate": 4.8723068663372006e-05, "loss": 0.7248, "step": 1724 }, { "epoch": 0.52, "learning_rate": 4.86739672239428e-05, "loss": 0.7592, "step": 1725 }, { "epoch": 0.52, "learning_rate": 4.862486706418083e-05, "loss": 0.777, "step": 1726 }, { "epoch": 0.52, "learning_rate": 4.857576823146944e-05, "loss": 0.7235, "step": 1727 }, { "epoch": 0.52, "learning_rate": 4.852667077319068e-05, "loss": 0.7172, "step": 1728 }, { "epoch": 0.52, "learning_rate": 4.8477574736725304e-05, "loss": 0.7917, "step": 1729 }, { "epoch": 0.52, "learning_rate": 4.842848016945267e-05, "loss": 0.7373, "step": 1730 }, { "epoch": 0.53, "learning_rate": 4.837938711875073e-05, "loss": 0.7836, "step": 1731 }, { "epoch": 0.53, "learning_rate": 4.8330295631995934e-05, "loss": 0.7342, "step": 1732 }, { "epoch": 0.53, "learning_rate": 4.8281205756563304e-05, "loss": 0.7842, "step": 1733 }, { "epoch": 0.53, "learning_rate": 4.8232117539826215e-05, "loss": 0.8001, "step": 1734 }, { "epoch": 0.53, "learning_rate": 4.818303102915652e-05, "loss": 0.7007, "step": 1735 }, { "epoch": 0.53, "learning_rate": 4.813394627192435e-05, "loss": 0.731, "step": 1736 }, { "epoch": 0.53, "learning_rate": 4.8084863315498234e-05, "loss": 0.7482, "step": 1737 }, { "epoch": 0.53, "learning_rate": 4.803578220724488e-05, "loss": 0.7339, "step": 1738 }, { "epoch": 0.53, "learning_rate": 4.798670299452926e-05, "loss": 0.7667, "step": 1739 }, { "epoch": 0.53, "learning_rate": 4.7937625724714466e-05, "loss": 0.7359, "step": 1740 }, { "epoch": 0.53, "learning_rate": 4.78885504451618e-05, "loss": 0.7414, "step": 1741 }, { "epoch": 0.53, "learning_rate": 4.783947720323058e-05, "loss": 0.7432, "step": 1742 }, { "epoch": 0.53, "learning_rate": 4.7790406046278145e-05, "loss": 0.7701, "step": 1743 }, { "epoch": 0.53, "learning_rate": 4.7741337021659884e-05, "loss": 0.7713, "step": 1744 }, { "epoch": 0.53, "learning_rate": 4.769227017672907e-05, "loss": 0.7254, "step": 1745 }, { "epoch": 0.53, "learning_rate": 4.7643205558836924e-05, "loss": 0.7787, "step": 1746 }, { "epoch": 0.53, "learning_rate": 4.759414321533247e-05, "loss": 0.7459, "step": 1747 }, { "epoch": 0.53, "learning_rate": 4.754508319356253e-05, "loss": 0.7698, "step": 1748 }, { "epoch": 0.53, "learning_rate": 4.7496025540871766e-05, "loss": 0.7545, "step": 1749 }, { "epoch": 0.53, "learning_rate": 4.744697030460248e-05, "loss": 0.7327, "step": 1750 }, { "epoch": 0.53, "learning_rate": 4.7397917532094655e-05, "loss": 0.7497, "step": 1751 }, { "epoch": 0.53, "learning_rate": 4.7348867270685885e-05, "loss": 0.739, "step": 1752 }, { "epoch": 0.53, "learning_rate": 4.7299819567711394e-05, "loss": 0.7906, "step": 1753 }, { "epoch": 0.53, "learning_rate": 4.7250774470503885e-05, "loss": 0.7517, "step": 1754 }, { "epoch": 0.53, "learning_rate": 4.7201732026393574e-05, "loss": 0.7521, "step": 1755 }, { "epoch": 0.53, "learning_rate": 4.7152692282708086e-05, "loss": 0.7748, "step": 1756 }, { "epoch": 0.53, "learning_rate": 4.710365528677249e-05, "loss": 0.7548, "step": 1757 }, { "epoch": 0.53, "learning_rate": 4.705462108590915e-05, "loss": 0.6638, "step": 1758 }, { "epoch": 0.53, "learning_rate": 4.700558972743777e-05, "loss": 0.7745, "step": 1759 }, { "epoch": 0.53, "learning_rate": 4.695656125867527e-05, "loss": 0.7602, "step": 1760 }, { "epoch": 0.53, "learning_rate": 4.6907535726935846e-05, "loss": 0.7458, "step": 1761 }, { "epoch": 0.53, "learning_rate": 4.685851317953081e-05, "loss": 0.7632, "step": 1762 }, { "epoch": 0.53, "learning_rate": 4.680949366376858e-05, "loss": 0.7849, "step": 1763 }, { "epoch": 0.54, "learning_rate": 4.676047722695471e-05, "loss": 0.7895, "step": 1764 }, { "epoch": 0.54, "learning_rate": 4.671146391639173e-05, "loss": 0.7349, "step": 1765 }, { "epoch": 0.54, "learning_rate": 4.6662453779379186e-05, "loss": 0.7382, "step": 1766 }, { "epoch": 0.54, "learning_rate": 4.6613446863213515e-05, "loss": 0.7742, "step": 1767 }, { "epoch": 0.54, "learning_rate": 4.656444321518811e-05, "loss": 0.7738, "step": 1768 }, { "epoch": 0.54, "learning_rate": 4.651544288259317e-05, "loss": 0.7473, "step": 1769 }, { "epoch": 0.54, "learning_rate": 4.646644591271569e-05, "loss": 0.7862, "step": 1770 }, { "epoch": 0.54, "learning_rate": 4.641745235283942e-05, "loss": 0.792, "step": 1771 }, { "epoch": 0.54, "learning_rate": 4.636846225024486e-05, "loss": 0.7339, "step": 1772 }, { "epoch": 0.54, "learning_rate": 4.631947565220913e-05, "loss": 0.7541, "step": 1773 }, { "epoch": 0.54, "learning_rate": 4.627049260600596e-05, "loss": 0.8041, "step": 1774 }, { "epoch": 0.54, "learning_rate": 4.622151315890572e-05, "loss": 0.7676, "step": 1775 }, { "epoch": 0.54, "learning_rate": 4.6172537358175214e-05, "loss": 0.8095, "step": 1776 }, { "epoch": 0.54, "learning_rate": 4.6123565251077815e-05, "loss": 0.7495, "step": 1777 }, { "epoch": 0.54, "learning_rate": 4.6074596884873273e-05, "loss": 0.7869, "step": 1778 }, { "epoch": 0.54, "learning_rate": 4.6025632306817725e-05, "loss": 0.7775, "step": 1779 }, { "epoch": 0.54, "learning_rate": 4.597667156416371e-05, "loss": 0.7773, "step": 1780 }, { "epoch": 0.54, "learning_rate": 4.5927714704160006e-05, "loss": 0.7854, "step": 1781 }, { "epoch": 0.54, "learning_rate": 4.587876177405168e-05, "loss": 0.7308, "step": 1782 }, { "epoch": 0.54, "learning_rate": 4.5829812821079965e-05, "loss": 0.7504, "step": 1783 }, { "epoch": 0.54, "learning_rate": 4.578086789248232e-05, "loss": 0.7976, "step": 1784 }, { "epoch": 0.54, "learning_rate": 4.573192703549226e-05, "loss": 0.8111, "step": 1785 }, { "epoch": 0.54, "learning_rate": 4.568299029733942e-05, "loss": 0.7731, "step": 1786 }, { "epoch": 0.54, "learning_rate": 4.563405772524939e-05, "loss": 0.7449, "step": 1787 }, { "epoch": 0.54, "learning_rate": 4.558512936644383e-05, "loss": 0.794, "step": 1788 }, { "epoch": 0.54, "learning_rate": 4.5536205268140294e-05, "loss": 0.7373, "step": 1789 }, { "epoch": 0.54, "learning_rate": 4.548728547755218e-05, "loss": 0.7499, "step": 1790 }, { "epoch": 0.54, "learning_rate": 4.543837004188878e-05, "loss": 0.7343, "step": 1791 }, { "epoch": 0.54, "learning_rate": 4.538945900835519e-05, "loss": 0.7899, "step": 1792 }, { "epoch": 0.54, "learning_rate": 4.5340552424152236e-05, "loss": 0.8121, "step": 1793 }, { "epoch": 0.54, "learning_rate": 4.529165033647643e-05, "loss": 0.7956, "step": 1794 }, { "epoch": 0.54, "learning_rate": 4.524275279251998e-05, "loss": 0.7792, "step": 1795 }, { "epoch": 0.54, "learning_rate": 4.5193859839470714e-05, "loss": 0.7174, "step": 1796 }, { "epoch": 0.55, "learning_rate": 4.514497152451201e-05, "loss": 0.7349, "step": 1797 }, { "epoch": 0.55, "learning_rate": 4.509608789482276e-05, "loss": 0.7468, "step": 1798 }, { "epoch": 0.55, "learning_rate": 4.504720899757734e-05, "loss": 0.7733, "step": 1799 }, { "epoch": 0.55, "learning_rate": 4.499833487994559e-05, "loss": 0.7999, "step": 1800 }, { "epoch": 0.55, "learning_rate": 4.494946558909272e-05, "loss": 0.714, "step": 1801 }, { "epoch": 0.55, "learning_rate": 4.4900601172179244e-05, "loss": 0.8068, "step": 1802 }, { "epoch": 0.55, "learning_rate": 4.4851741676361014e-05, "loss": 0.7425, "step": 1803 }, { "epoch": 0.55, "learning_rate": 4.4802887148789146e-05, "loss": 0.7784, "step": 1804 }, { "epoch": 0.55, "learning_rate": 4.475403763660992e-05, "loss": 0.7793, "step": 1805 }, { "epoch": 0.55, "learning_rate": 4.470519318696481e-05, "loss": 0.7501, "step": 1806 }, { "epoch": 0.55, "learning_rate": 4.4656353846990365e-05, "loss": 0.7212, "step": 1807 }, { "epoch": 0.55, "learning_rate": 4.4607519663818245e-05, "loss": 0.7842, "step": 1808 }, { "epoch": 0.55, "learning_rate": 4.4558690684575135e-05, "loss": 0.7606, "step": 1809 }, { "epoch": 0.55, "learning_rate": 4.450986695638265e-05, "loss": 0.6761, "step": 1810 }, { "epoch": 0.55, "learning_rate": 4.446104852635735e-05, "loss": 0.7626, "step": 1811 }, { "epoch": 0.55, "learning_rate": 4.4412235441610755e-05, "loss": 0.7776, "step": 1812 }, { "epoch": 0.55, "learning_rate": 4.4363427749249136e-05, "loss": 0.7326, "step": 1813 }, { "epoch": 0.55, "learning_rate": 4.431462549637359e-05, "loss": 0.7722, "step": 1814 }, { "epoch": 0.55, "learning_rate": 4.4265828730079987e-05, "loss": 0.7457, "step": 1815 }, { "epoch": 0.55, "learning_rate": 4.4217037497458874e-05, "loss": 0.7104, "step": 1816 }, { "epoch": 0.55, "learning_rate": 4.4168251845595466e-05, "loss": 0.8087, "step": 1817 }, { "epoch": 0.55, "learning_rate": 4.4119471821569616e-05, "loss": 0.746, "step": 1818 }, { "epoch": 0.55, "learning_rate": 4.407069747245568e-05, "loss": 0.7635, "step": 1819 }, { "epoch": 0.55, "learning_rate": 4.4021928845322635e-05, "loss": 0.7872, "step": 1820 }, { "epoch": 0.55, "learning_rate": 4.397316598723385e-05, "loss": 0.6957, "step": 1821 }, { "epoch": 0.55, "learning_rate": 4.392440894524719e-05, "loss": 0.7321, "step": 1822 }, { "epoch": 0.55, "learning_rate": 4.3875657766414824e-05, "loss": 0.7404, "step": 1823 }, { "epoch": 0.55, "learning_rate": 4.382691249778337e-05, "loss": 0.7659, "step": 1824 }, { "epoch": 0.55, "learning_rate": 4.377817318639368e-05, "loss": 0.785, "step": 1825 }, { "epoch": 0.55, "learning_rate": 4.372943987928082e-05, "loss": 0.7439, "step": 1826 }, { "epoch": 0.55, "learning_rate": 4.368071262347417e-05, "loss": 0.8116, "step": 1827 }, { "epoch": 0.55, "learning_rate": 4.363199146599717e-05, "loss": 0.7699, "step": 1828 }, { "epoch": 0.55, "learning_rate": 4.358327645386743e-05, "loss": 0.7882, "step": 1829 }, { "epoch": 0.56, "learning_rate": 4.353456763409658e-05, "loss": 0.7648, "step": 1830 }, { "epoch": 0.56, "learning_rate": 4.3485865053690356e-05, "loss": 0.6952, "step": 1831 }, { "epoch": 0.56, "learning_rate": 4.34371687596484e-05, "loss": 0.756, "step": 1832 }, { "epoch": 0.56, "learning_rate": 4.33884787989643e-05, "loss": 0.7727, "step": 1833 }, { "epoch": 0.56, "learning_rate": 4.3339795218625534e-05, "loss": 0.7871, "step": 1834 }, { "epoch": 0.56, "learning_rate": 4.3291118065613475e-05, "loss": 0.7767, "step": 1835 }, { "epoch": 0.56, "learning_rate": 4.324244738690322e-05, "loss": 0.7533, "step": 1836 }, { "epoch": 0.56, "learning_rate": 4.319378322946364e-05, "loss": 0.7928, "step": 1837 }, { "epoch": 0.56, "learning_rate": 4.314512564025733e-05, "loss": 0.6832, "step": 1838 }, { "epoch": 0.56, "learning_rate": 4.309647466624054e-05, "loss": 0.737, "step": 1839 }, { "epoch": 0.56, "learning_rate": 4.304783035436314e-05, "loss": 0.7507, "step": 1840 }, { "epoch": 0.56, "learning_rate": 4.2999192751568564e-05, "loss": 0.7401, "step": 1841 }, { "epoch": 0.56, "learning_rate": 4.295056190479373e-05, "loss": 0.7799, "step": 1842 }, { "epoch": 0.56, "learning_rate": 4.2901937860969144e-05, "loss": 0.7268, "step": 1843 }, { "epoch": 0.56, "learning_rate": 4.2853320667018646e-05, "loss": 0.7604, "step": 1844 }, { "epoch": 0.56, "learning_rate": 4.28047103698595e-05, "loss": 0.7804, "step": 1845 }, { "epoch": 0.56, "learning_rate": 4.27561070164023e-05, "loss": 0.7336, "step": 1846 }, { "epoch": 0.56, "learning_rate": 4.270751065355099e-05, "loss": 0.7553, "step": 1847 }, { "epoch": 0.56, "learning_rate": 4.265892132820269e-05, "loss": 0.7345, "step": 1848 }, { "epoch": 0.56, "learning_rate": 4.2610339087247817e-05, "loss": 0.7626, "step": 1849 }, { "epoch": 0.56, "learning_rate": 4.2561763977569854e-05, "loss": 0.7712, "step": 1850 }, { "epoch": 0.56, "learning_rate": 4.25131960460455e-05, "loss": 0.7509, "step": 1851 }, { "epoch": 0.56, "learning_rate": 4.2464635339544455e-05, "loss": 0.7307, "step": 1852 }, { "epoch": 0.56, "learning_rate": 4.2416081904929475e-05, "loss": 0.6754, "step": 1853 }, { "epoch": 0.56, "learning_rate": 4.236753578905627e-05, "loss": 0.7185, "step": 1854 }, { "epoch": 0.56, "learning_rate": 4.231899703877356e-05, "loss": 0.7549, "step": 1855 }, { "epoch": 0.56, "learning_rate": 4.2270465700922887e-05, "loss": 0.8061, "step": 1856 }, { "epoch": 0.56, "learning_rate": 4.222194182233863e-05, "loss": 0.7336, "step": 1857 }, { "epoch": 0.56, "learning_rate": 4.2173425449848045e-05, "loss": 0.7703, "step": 1858 }, { "epoch": 0.56, "learning_rate": 4.2124916630271074e-05, "loss": 0.7629, "step": 1859 }, { "epoch": 0.56, "learning_rate": 4.207641541042041e-05, "loss": 0.7275, "step": 1860 }, { "epoch": 0.56, "learning_rate": 4.20279218371014e-05, "loss": 0.7441, "step": 1861 }, { "epoch": 0.56, "learning_rate": 4.197943595711198e-05, "loss": 0.7719, "step": 1862 }, { "epoch": 0.57, "learning_rate": 4.193095781724274e-05, "loss": 0.7443, "step": 1863 }, { "epoch": 0.57, "learning_rate": 4.188248746427673e-05, "loss": 0.7683, "step": 1864 }, { "epoch": 0.57, "learning_rate": 4.18340249449895e-05, "loss": 0.7235, "step": 1865 }, { "epoch": 0.57, "learning_rate": 4.1785570306149034e-05, "loss": 0.813, "step": 1866 }, { "epoch": 0.57, "learning_rate": 4.1737123594515756e-05, "loss": 0.7266, "step": 1867 }, { "epoch": 0.57, "learning_rate": 4.16886848568424e-05, "loss": 0.8035, "step": 1868 }, { "epoch": 0.57, "learning_rate": 4.1640254139874016e-05, "loss": 0.8255, "step": 1869 }, { "epoch": 0.57, "learning_rate": 4.159183149034788e-05, "loss": 0.7684, "step": 1870 }, { "epoch": 0.57, "learning_rate": 4.154341695499357e-05, "loss": 0.7653, "step": 1871 }, { "epoch": 0.57, "learning_rate": 4.149501058053274e-05, "loss": 0.7225, "step": 1872 }, { "epoch": 0.57, "learning_rate": 4.144661241367922e-05, "loss": 0.73, "step": 1873 }, { "epoch": 0.57, "learning_rate": 4.1398222501138884e-05, "loss": 0.7819, "step": 1874 }, { "epoch": 0.57, "learning_rate": 4.134984088960969e-05, "loss": 0.7172, "step": 1875 }, { "epoch": 0.57, "learning_rate": 4.1301467625781545e-05, "loss": 0.7297, "step": 1876 }, { "epoch": 0.57, "learning_rate": 4.125310275633631e-05, "loss": 0.7793, "step": 1877 }, { "epoch": 0.57, "learning_rate": 4.120474632794774e-05, "loss": 0.7085, "step": 1878 }, { "epoch": 0.57, "learning_rate": 4.115639838728147e-05, "loss": 0.7228, "step": 1879 }, { "epoch": 0.57, "learning_rate": 4.110805898099493e-05, "loss": 0.7243, "step": 1880 }, { "epoch": 0.57, "learning_rate": 4.1059728155737286e-05, "loss": 0.7472, "step": 1881 }, { "epoch": 0.57, "learning_rate": 4.101140595814944e-05, "loss": 0.7627, "step": 1882 }, { "epoch": 0.57, "learning_rate": 4.096309243486402e-05, "loss": 0.8097, "step": 1883 }, { "epoch": 0.57, "learning_rate": 4.091478763250519e-05, "loss": 0.6939, "step": 1884 }, { "epoch": 0.57, "learning_rate": 4.086649159768878e-05, "loss": 0.7902, "step": 1885 }, { "epoch": 0.57, "learning_rate": 4.081820437702209e-05, "loss": 0.7907, "step": 1886 }, { "epoch": 0.57, "learning_rate": 4.076992601710399e-05, "loss": 0.7617, "step": 1887 }, { "epoch": 0.57, "learning_rate": 4.072165656452472e-05, "loss": 0.7784, "step": 1888 }, { "epoch": 0.57, "learning_rate": 4.0673396065865985e-05, "loss": 0.7497, "step": 1889 }, { "epoch": 0.57, "learning_rate": 4.0625144567700806e-05, "loss": 0.7583, "step": 1890 }, { "epoch": 0.57, "learning_rate": 4.057690211659358e-05, "loss": 0.7566, "step": 1891 }, { "epoch": 0.57, "learning_rate": 4.0528668759099885e-05, "loss": 0.7062, "step": 1892 }, { "epoch": 0.57, "learning_rate": 4.0480444541766576e-05, "loss": 0.7601, "step": 1893 }, { "epoch": 0.57, "learning_rate": 4.043222951113171e-05, "loss": 0.7583, "step": 1894 }, { "epoch": 0.57, "learning_rate": 4.038402371372444e-05, "loss": 0.8076, "step": 1895 }, { "epoch": 0.58, "learning_rate": 4.0335827196065015e-05, "loss": 0.7776, "step": 1896 }, { "epoch": 0.58, "learning_rate": 4.02876400046647e-05, "loss": 0.7226, "step": 1897 }, { "epoch": 0.58, "learning_rate": 4.023946218602584e-05, "loss": 0.7299, "step": 1898 }, { "epoch": 0.58, "learning_rate": 4.0191293786641656e-05, "loss": 0.8007, "step": 1899 }, { "epoch": 0.58, "learning_rate": 4.014313485299634e-05, "loss": 0.7896, "step": 1900 }, { "epoch": 0.58, "learning_rate": 4.009498543156487e-05, "loss": 0.7675, "step": 1901 }, { "epoch": 0.58, "learning_rate": 4.004684556881315e-05, "loss": 0.7504, "step": 1902 }, { "epoch": 0.58, "learning_rate": 3.9998715311197785e-05, "loss": 0.7463, "step": 1903 }, { "epoch": 0.58, "learning_rate": 3.9950594705166116e-05, "loss": 0.714, "step": 1904 }, { "epoch": 0.58, "learning_rate": 3.990248379715617e-05, "loss": 0.7832, "step": 1905 }, { "epoch": 0.58, "learning_rate": 3.9854382633596664e-05, "loss": 0.7161, "step": 1906 }, { "epoch": 0.58, "learning_rate": 3.9806291260906864e-05, "loss": 0.7801, "step": 1907 }, { "epoch": 0.58, "learning_rate": 3.9758209725496573e-05, "loss": 0.7676, "step": 1908 }, { "epoch": 0.58, "learning_rate": 3.9710138073766156e-05, "loss": 0.7958, "step": 1909 }, { "epoch": 0.58, "learning_rate": 3.9662076352106394e-05, "loss": 0.7253, "step": 1910 }, { "epoch": 0.58, "learning_rate": 3.961402460689852e-05, "loss": 0.7602, "step": 1911 }, { "epoch": 0.58, "learning_rate": 3.9565982884514105e-05, "loss": 0.7066, "step": 1912 }, { "epoch": 0.58, "learning_rate": 3.951795123131505e-05, "loss": 0.763, "step": 1913 }, { "epoch": 0.58, "learning_rate": 3.946992969365359e-05, "loss": 0.7354, "step": 1914 }, { "epoch": 0.58, "learning_rate": 3.9421918317872145e-05, "loss": 0.761, "step": 1915 }, { "epoch": 0.58, "learning_rate": 3.9373917150303326e-05, "loss": 0.7132, "step": 1916 }, { "epoch": 0.58, "learning_rate": 3.932592623726991e-05, "loss": 0.7033, "step": 1917 }, { "epoch": 0.58, "learning_rate": 3.927794562508481e-05, "loss": 0.8009, "step": 1918 }, { "epoch": 0.58, "learning_rate": 3.922997536005094e-05, "loss": 0.7688, "step": 1919 }, { "epoch": 0.58, "learning_rate": 3.9182015488461274e-05, "loss": 0.7556, "step": 1920 }, { "epoch": 0.58, "learning_rate": 3.91340660565987e-05, "loss": 0.7789, "step": 1921 }, { "epoch": 0.58, "learning_rate": 3.9086127110736126e-05, "loss": 0.7322, "step": 1922 }, { "epoch": 0.58, "learning_rate": 3.9038198697136254e-05, "loss": 0.7831, "step": 1923 }, { "epoch": 0.58, "learning_rate": 3.899028086205165e-05, "loss": 0.7518, "step": 1924 }, { "epoch": 0.58, "learning_rate": 3.8942373651724654e-05, "loss": 0.7902, "step": 1925 }, { "epoch": 0.58, "learning_rate": 3.889447711238742e-05, "loss": 0.7749, "step": 1926 }, { "epoch": 0.58, "learning_rate": 3.884659129026172e-05, "loss": 0.7827, "step": 1927 }, { "epoch": 0.58, "learning_rate": 3.8798716231559017e-05, "loss": 0.7971, "step": 1928 }, { "epoch": 0.59, "learning_rate": 3.87508519824804e-05, "loss": 0.7054, "step": 1929 }, { "epoch": 0.59, "learning_rate": 3.870299858921652e-05, "loss": 0.7748, "step": 1930 }, { "epoch": 0.59, "learning_rate": 3.865515609794755e-05, "loss": 0.7558, "step": 1931 }, { "epoch": 0.59, "learning_rate": 3.8607324554843136e-05, "loss": 0.7866, "step": 1932 }, { "epoch": 0.59, "learning_rate": 3.855950400606233e-05, "loss": 0.7621, "step": 1933 }, { "epoch": 0.59, "learning_rate": 3.8511694497753656e-05, "loss": 0.7953, "step": 1934 }, { "epoch": 0.59, "learning_rate": 3.8463896076054924e-05, "loss": 0.8109, "step": 1935 }, { "epoch": 0.59, "learning_rate": 3.841610878709323e-05, "loss": 0.7719, "step": 1936 }, { "epoch": 0.59, "learning_rate": 3.8368332676984955e-05, "loss": 0.7358, "step": 1937 }, { "epoch": 0.59, "learning_rate": 3.832056779183571e-05, "loss": 0.7308, "step": 1938 }, { "epoch": 0.59, "learning_rate": 3.827281417774024e-05, "loss": 0.7608, "step": 1939 }, { "epoch": 0.59, "learning_rate": 3.822507188078243e-05, "loss": 0.7233, "step": 1940 }, { "epoch": 0.59, "learning_rate": 3.817734094703521e-05, "loss": 0.7375, "step": 1941 }, { "epoch": 0.59, "learning_rate": 3.812962142256061e-05, "loss": 0.8261, "step": 1942 }, { "epoch": 0.59, "learning_rate": 3.808191335340962e-05, "loss": 0.7752, "step": 1943 }, { "epoch": 0.59, "learning_rate": 3.803421678562213e-05, "loss": 0.7187, "step": 1944 }, { "epoch": 0.59, "learning_rate": 3.7986531765226964e-05, "loss": 0.7182, "step": 1945 }, { "epoch": 0.59, "learning_rate": 3.7938858338241854e-05, "loss": 0.7333, "step": 1946 }, { "epoch": 0.59, "learning_rate": 3.7891196550673266e-05, "loss": 0.7333, "step": 1947 }, { "epoch": 0.59, "learning_rate": 3.7843546448516465e-05, "loss": 0.7899, "step": 1948 }, { "epoch": 0.59, "learning_rate": 3.779590807775544e-05, "loss": 0.7779, "step": 1949 }, { "epoch": 0.59, "learning_rate": 3.774828148436285e-05, "loss": 0.7619, "step": 1950 }, { "epoch": 0.59, "learning_rate": 3.770066671430003e-05, "loss": 0.7755, "step": 1951 }, { "epoch": 0.59, "learning_rate": 3.765306381351683e-05, "loss": 0.7444, "step": 1952 }, { "epoch": 0.59, "learning_rate": 3.760547282795169e-05, "loss": 0.7424, "step": 1953 }, { "epoch": 0.59, "learning_rate": 3.755789380353158e-05, "loss": 0.7187, "step": 1954 }, { "epoch": 0.59, "learning_rate": 3.751032678617187e-05, "loss": 0.7631, "step": 1955 }, { "epoch": 0.59, "learning_rate": 3.746277182177635e-05, "loss": 0.7602, "step": 1956 }, { "epoch": 0.59, "learning_rate": 3.741522895623725e-05, "loss": 0.7146, "step": 1957 }, { "epoch": 0.59, "learning_rate": 3.7367698235435036e-05, "loss": 0.727, "step": 1958 }, { "epoch": 0.59, "learning_rate": 3.732017970523848e-05, "loss": 0.7705, "step": 1959 }, { "epoch": 0.59, "learning_rate": 3.727267341150461e-05, "loss": 0.7865, "step": 1960 }, { "epoch": 0.59, "learning_rate": 3.722517940007863e-05, "loss": 0.716, "step": 1961 }, { "epoch": 0.6, "learning_rate": 3.7177697716793913e-05, "loss": 0.7169, "step": 1962 }, { "epoch": 0.6, "learning_rate": 3.713022840747189e-05, "loss": 0.736, "step": 1963 }, { "epoch": 0.6, "learning_rate": 3.7082771517922074e-05, "loss": 0.7498, "step": 1964 }, { "epoch": 0.6, "learning_rate": 3.703532709394203e-05, "loss": 0.7746, "step": 1965 }, { "epoch": 0.6, "learning_rate": 3.6987895181317247e-05, "loss": 0.6536, "step": 1966 }, { "epoch": 0.6, "learning_rate": 3.694047582582113e-05, "loss": 0.7654, "step": 1967 }, { "epoch": 0.6, "learning_rate": 3.689306907321498e-05, "loss": 0.7228, "step": 1968 }, { "epoch": 0.6, "learning_rate": 3.6845674969248e-05, "loss": 0.7456, "step": 1969 }, { "epoch": 0.6, "learning_rate": 3.679829355965707e-05, "loss": 0.7409, "step": 1970 }, { "epoch": 0.6, "learning_rate": 3.675092489016693e-05, "loss": 0.7763, "step": 1971 }, { "epoch": 0.6, "learning_rate": 3.670356900648991e-05, "loss": 0.8116, "step": 1972 }, { "epoch": 0.6, "learning_rate": 3.665622595432615e-05, "loss": 0.7142, "step": 1973 }, { "epoch": 0.6, "learning_rate": 3.660889577936326e-05, "loss": 0.7398, "step": 1974 }, { "epoch": 0.6, "learning_rate": 3.656157852727652e-05, "loss": 0.8077, "step": 1975 }, { "epoch": 0.6, "learning_rate": 3.6514274243728675e-05, "loss": 0.8214, "step": 1976 }, { "epoch": 0.6, "learning_rate": 3.646698297437001e-05, "loss": 0.7204, "step": 1977 }, { "epoch": 0.6, "learning_rate": 3.6419704764838236e-05, "loss": 0.7404, "step": 1978 }, { "epoch": 0.6, "learning_rate": 3.637243966075842e-05, "loss": 0.7803, "step": 1979 }, { "epoch": 0.6, "learning_rate": 3.632518770774303e-05, "loss": 0.7616, "step": 1980 }, { "epoch": 0.6, "learning_rate": 3.627794895139181e-05, "loss": 0.7341, "step": 1981 }, { "epoch": 0.6, "learning_rate": 3.623072343729182e-05, "loss": 0.712, "step": 1982 }, { "epoch": 0.6, "learning_rate": 3.6183511211017286e-05, "loss": 0.7391, "step": 1983 }, { "epoch": 0.6, "learning_rate": 3.61363123181296e-05, "loss": 0.742, "step": 1984 }, { "epoch": 0.6, "learning_rate": 3.608912680417737e-05, "loss": 0.7279, "step": 1985 }, { "epoch": 0.6, "learning_rate": 3.604195471469621e-05, "loss": 0.7036, "step": 1986 }, { "epoch": 0.6, "learning_rate": 3.5994796095208826e-05, "loss": 0.7599, "step": 1987 }, { "epoch": 0.6, "learning_rate": 3.594765099122487e-05, "loss": 0.7607, "step": 1988 }, { "epoch": 0.6, "learning_rate": 3.5900519448241025e-05, "loss": 0.7305, "step": 1989 }, { "epoch": 0.6, "learning_rate": 3.585340151174084e-05, "loss": 0.7659, "step": 1990 }, { "epoch": 0.6, "learning_rate": 3.580629722719475e-05, "loss": 0.6184, "step": 1991 }, { "epoch": 0.6, "learning_rate": 3.5759206640059984e-05, "loss": 0.755, "step": 1992 }, { "epoch": 0.6, "learning_rate": 3.571212979578062e-05, "loss": 0.7189, "step": 1993 }, { "epoch": 0.6, "learning_rate": 3.5665066739787414e-05, "loss": 0.7292, "step": 1994 }, { "epoch": 0.61, "learning_rate": 3.5618017517497825e-05, "loss": 0.7035, "step": 1995 }, { "epoch": 0.61, "learning_rate": 3.557098217431596e-05, "loss": 0.7389, "step": 1996 }, { "epoch": 0.61, "learning_rate": 3.5523960755632574e-05, "loss": 0.751, "step": 1997 }, { "epoch": 0.61, "learning_rate": 3.5476953306824936e-05, "loss": 0.7508, "step": 1998 }, { "epoch": 0.61, "learning_rate": 3.542995987325685e-05, "loss": 0.7289, "step": 1999 }, { "epoch": 0.61, "learning_rate": 3.538298050027859e-05, "loss": 0.7277, "step": 2000 }, { "epoch": 0.61, "learning_rate": 3.533601523322688e-05, "loss": 0.7365, "step": 2001 }, { "epoch": 0.61, "learning_rate": 3.528906411742482e-05, "loss": 0.7378, "step": 2002 }, { "epoch": 0.61, "learning_rate": 3.524212719818185e-05, "loss": 0.777, "step": 2003 }, { "epoch": 0.61, "learning_rate": 3.519520452079368e-05, "loss": 0.7269, "step": 2004 }, { "epoch": 0.61, "learning_rate": 3.514829613054236e-05, "loss": 0.6955, "step": 2005 }, { "epoch": 0.61, "learning_rate": 3.510140207269607e-05, "loss": 0.7176, "step": 2006 }, { "epoch": 0.61, "learning_rate": 3.505452239250918e-05, "loss": 0.7155, "step": 2007 }, { "epoch": 0.61, "learning_rate": 3.50076571352222e-05, "loss": 0.7608, "step": 2008 }, { "epoch": 0.61, "learning_rate": 3.496080634606174e-05, "loss": 0.7605, "step": 2009 }, { "epoch": 0.61, "learning_rate": 3.4913970070240386e-05, "loss": 0.7477, "step": 2010 }, { "epoch": 0.61, "learning_rate": 3.4867148352956794e-05, "loss": 0.7712, "step": 2011 }, { "epoch": 0.61, "learning_rate": 3.482034123939548e-05, "loss": 0.6913, "step": 2012 }, { "epoch": 0.61, "learning_rate": 3.477354877472697e-05, "loss": 0.7254, "step": 2013 }, { "epoch": 0.61, "learning_rate": 3.472677100410758e-05, "loss": 0.7925, "step": 2014 }, { "epoch": 0.61, "learning_rate": 3.4680007972679476e-05, "loss": 0.7728, "step": 2015 }, { "epoch": 0.61, "learning_rate": 3.463325972557056e-05, "loss": 0.7499, "step": 2016 }, { "epoch": 0.61, "learning_rate": 3.4586526307894534e-05, "loss": 0.7496, "step": 2017 }, { "epoch": 0.61, "learning_rate": 3.453980776475075e-05, "loss": 0.7861, "step": 2018 }, { "epoch": 0.61, "learning_rate": 3.449310414122418e-05, "loss": 0.802, "step": 2019 }, { "epoch": 0.61, "learning_rate": 3.4446415482385464e-05, "loss": 0.77, "step": 2020 }, { "epoch": 0.61, "learning_rate": 3.439974183329073e-05, "loss": 0.7314, "step": 2021 }, { "epoch": 0.61, "learning_rate": 3.4353083238981694e-05, "loss": 0.7987, "step": 2022 }, { "epoch": 0.61, "learning_rate": 3.4306439744485454e-05, "loss": 0.809, "step": 2023 }, { "epoch": 0.61, "learning_rate": 3.425981139481464e-05, "loss": 0.7432, "step": 2024 }, { "epoch": 0.61, "learning_rate": 3.421319823496718e-05, "loss": 0.6939, "step": 2025 }, { "epoch": 0.61, "learning_rate": 3.4166600309926387e-05, "loss": 0.7985, "step": 2026 }, { "epoch": 0.61, "learning_rate": 3.4120017664660836e-05, "loss": 0.738, "step": 2027 }, { "epoch": 0.62, "learning_rate": 3.407345034412442e-05, "loss": 0.7179, "step": 2028 }, { "epoch": 0.62, "learning_rate": 3.40268983932562e-05, "loss": 0.7247, "step": 2029 }, { "epoch": 0.62, "learning_rate": 3.398036185698038e-05, "loss": 0.7587, "step": 2030 }, { "epoch": 0.62, "learning_rate": 3.393384078020634e-05, "loss": 0.7171, "step": 2031 }, { "epoch": 0.62, "learning_rate": 3.388733520782852e-05, "loss": 0.7542, "step": 2032 }, { "epoch": 0.62, "learning_rate": 3.3840845184726386e-05, "loss": 0.7661, "step": 2033 }, { "epoch": 0.62, "learning_rate": 3.379437075576443e-05, "loss": 0.7511, "step": 2034 }, { "epoch": 0.62, "learning_rate": 3.374791196579204e-05, "loss": 0.7137, "step": 2035 }, { "epoch": 0.62, "learning_rate": 3.370146885964358e-05, "loss": 0.7785, "step": 2036 }, { "epoch": 0.62, "learning_rate": 3.3655041482138236e-05, "loss": 0.7539, "step": 2037 }, { "epoch": 0.62, "learning_rate": 3.360862987808001e-05, "loss": 0.7522, "step": 2038 }, { "epoch": 0.62, "learning_rate": 3.356223409225769e-05, "loss": 0.7029, "step": 2039 }, { "epoch": 0.62, "learning_rate": 3.351585416944485e-05, "loss": 0.7183, "step": 2040 }, { "epoch": 0.62, "learning_rate": 3.346949015439966e-05, "loss": 0.7419, "step": 2041 }, { "epoch": 0.62, "learning_rate": 3.342314209186502e-05, "loss": 0.7574, "step": 2042 }, { "epoch": 0.62, "learning_rate": 3.3376810026568384e-05, "loss": 0.7329, "step": 2043 }, { "epoch": 0.62, "learning_rate": 3.33304940032218e-05, "loss": 0.7552, "step": 2044 }, { "epoch": 0.62, "learning_rate": 3.328419406652183e-05, "loss": 0.8082, "step": 2045 }, { "epoch": 0.62, "learning_rate": 3.323791026114951e-05, "loss": 0.764, "step": 2046 }, { "epoch": 0.62, "learning_rate": 3.319164263177026e-05, "loss": 0.7728, "step": 2047 }, { "epoch": 0.62, "learning_rate": 3.314539122303399e-05, "loss": 0.7846, "step": 2048 }, { "epoch": 0.62, "learning_rate": 3.309915607957487e-05, "loss": 0.7434, "step": 2049 }, { "epoch": 0.62, "learning_rate": 3.305293724601141e-05, "loss": 0.774, "step": 2050 }, { "epoch": 0.62, "learning_rate": 3.300673476694636e-05, "loss": 0.7912, "step": 2051 }, { "epoch": 0.62, "learning_rate": 3.296054868696673e-05, "loss": 0.7837, "step": 2052 }, { "epoch": 0.62, "learning_rate": 3.291437905064365e-05, "loss": 0.7395, "step": 2053 }, { "epoch": 0.62, "learning_rate": 3.286822590253243e-05, "loss": 0.7586, "step": 2054 }, { "epoch": 0.62, "learning_rate": 3.282208928717241e-05, "loss": 0.7906, "step": 2055 }, { "epoch": 0.62, "learning_rate": 3.2775969249087054e-05, "loss": 0.7698, "step": 2056 }, { "epoch": 0.62, "learning_rate": 3.272986583278376e-05, "loss": 0.7511, "step": 2057 }, { "epoch": 0.62, "learning_rate": 3.2683779082753916e-05, "loss": 0.7401, "step": 2058 }, { "epoch": 0.62, "learning_rate": 3.2637709043472806e-05, "loss": 0.736, "step": 2059 }, { "epoch": 0.62, "learning_rate": 3.259165575939963e-05, "loss": 0.6984, "step": 2060 }, { "epoch": 0.63, "learning_rate": 3.254561927497738e-05, "loss": 0.752, "step": 2061 }, { "epoch": 0.63, "learning_rate": 3.249959963463283e-05, "loss": 0.6841, "step": 2062 }, { "epoch": 0.63, "learning_rate": 3.2453596882776524e-05, "loss": 0.7674, "step": 2063 }, { "epoch": 0.63, "learning_rate": 3.240761106380271e-05, "loss": 0.8027, "step": 2064 }, { "epoch": 0.63, "learning_rate": 3.2361642222089295e-05, "loss": 0.752, "step": 2065 }, { "epoch": 0.63, "learning_rate": 3.231569040199778e-05, "loss": 0.7586, "step": 2066 }, { "epoch": 0.63, "learning_rate": 3.226975564787322e-05, "loss": 0.7827, "step": 2067 }, { "epoch": 0.63, "learning_rate": 3.222383800404428e-05, "loss": 0.7716, "step": 2068 }, { "epoch": 0.63, "learning_rate": 3.217793751482305e-05, "loss": 0.7458, "step": 2069 }, { "epoch": 0.63, "learning_rate": 3.2132054224505084e-05, "loss": 0.8015, "step": 2070 }, { "epoch": 0.63, "learning_rate": 3.20861881773693e-05, "loss": 0.6875, "step": 2071 }, { "epoch": 0.63, "learning_rate": 3.2040339417678064e-05, "loss": 0.7066, "step": 2072 }, { "epoch": 0.63, "learning_rate": 3.199450798967697e-05, "loss": 0.8046, "step": 2073 }, { "epoch": 0.63, "learning_rate": 3.1948693937594954e-05, "loss": 0.7807, "step": 2074 }, { "epoch": 0.63, "learning_rate": 3.1902897305644095e-05, "loss": 0.7701, "step": 2075 }, { "epoch": 0.63, "learning_rate": 3.185711813801979e-05, "loss": 0.7196, "step": 2076 }, { "epoch": 0.63, "learning_rate": 3.181135647890047e-05, "loss": 0.7252, "step": 2077 }, { "epoch": 0.63, "learning_rate": 3.176561237244772e-05, "loss": 0.7372, "step": 2078 }, { "epoch": 0.63, "learning_rate": 3.1719885862806144e-05, "loss": 0.7469, "step": 2079 }, { "epoch": 0.63, "learning_rate": 3.167417699410345e-05, "loss": 0.7634, "step": 2080 }, { "epoch": 0.63, "learning_rate": 3.1628485810450234e-05, "loss": 0.7664, "step": 2081 }, { "epoch": 0.63, "learning_rate": 3.158281235594006e-05, "loss": 0.7439, "step": 2082 }, { "epoch": 0.63, "learning_rate": 3.15371566746494e-05, "loss": 0.773, "step": 2083 }, { "epoch": 0.63, "learning_rate": 3.1491518810637545e-05, "loss": 0.817, "step": 2084 }, { "epoch": 0.63, "learning_rate": 3.1445898807946616e-05, "loss": 0.7253, "step": 2085 }, { "epoch": 0.63, "learning_rate": 3.140029671060145e-05, "loss": 0.7483, "step": 2086 }, { "epoch": 0.63, "learning_rate": 3.135471256260968e-05, "loss": 0.7254, "step": 2087 }, { "epoch": 0.63, "learning_rate": 3.130914640796157e-05, "loss": 0.7625, "step": 2088 }, { "epoch": 0.63, "learning_rate": 3.1263598290630006e-05, "loss": 0.801, "step": 2089 }, { "epoch": 0.63, "learning_rate": 3.1218068254570485e-05, "loss": 0.7548, "step": 2090 }, { "epoch": 0.63, "learning_rate": 3.117255634372109e-05, "loss": 0.757, "step": 2091 }, { "epoch": 0.63, "learning_rate": 3.112706260200236e-05, "loss": 0.7442, "step": 2092 }, { "epoch": 0.63, "learning_rate": 3.108158707331732e-05, "loss": 0.7816, "step": 2093 }, { "epoch": 0.64, "learning_rate": 3.10361298015514e-05, "loss": 0.7463, "step": 2094 }, { "epoch": 0.64, "learning_rate": 3.0990690830572475e-05, "loss": 0.7737, "step": 2095 }, { "epoch": 0.64, "learning_rate": 3.094527020423069e-05, "loss": 0.7243, "step": 2096 }, { "epoch": 0.64, "learning_rate": 3.089986796635851e-05, "loss": 0.7155, "step": 2097 }, { "epoch": 0.64, "learning_rate": 3.0854484160770645e-05, "loss": 0.7746, "step": 2098 }, { "epoch": 0.64, "learning_rate": 3.0809118831264066e-05, "loss": 0.7177, "step": 2099 }, { "epoch": 0.64, "learning_rate": 3.0763772021617855e-05, "loss": 0.7367, "step": 2100 }, { "epoch": 0.64, "learning_rate": 3.071844377559323e-05, "loss": 0.7923, "step": 2101 }, { "epoch": 0.64, "learning_rate": 3.0673134136933504e-05, "loss": 0.7422, "step": 2102 }, { "epoch": 0.64, "learning_rate": 3.0627843149364065e-05, "loss": 0.7638, "step": 2103 }, { "epoch": 0.64, "learning_rate": 3.0582570856592255e-05, "loss": 0.7609, "step": 2104 }, { "epoch": 0.64, "learning_rate": 3.0537317302307404e-05, "loss": 0.7681, "step": 2105 }, { "epoch": 0.64, "learning_rate": 3.0492082530180727e-05, "loss": 0.7748, "step": 2106 }, { "epoch": 0.64, "learning_rate": 3.044686658386537e-05, "loss": 0.7622, "step": 2107 }, { "epoch": 0.64, "learning_rate": 3.0401669506996256e-05, "loss": 0.8164, "step": 2108 }, { "epoch": 0.64, "learning_rate": 3.035649134319012e-05, "loss": 0.816, "step": 2109 }, { "epoch": 0.64, "learning_rate": 3.031133213604541e-05, "loss": 0.7106, "step": 2110 }, { "epoch": 0.64, "learning_rate": 3.0266191929142384e-05, "loss": 0.7509, "step": 2111 }, { "epoch": 0.64, "learning_rate": 3.0221070766042847e-05, "loss": 0.696, "step": 2112 }, { "epoch": 0.64, "learning_rate": 3.017596869029028e-05, "loss": 0.8113, "step": 2113 }, { "epoch": 0.64, "learning_rate": 3.013088574540974e-05, "loss": 0.8027, "step": 2114 }, { "epoch": 0.64, "learning_rate": 3.0085821974907817e-05, "loss": 0.8042, "step": 2115 }, { "epoch": 0.64, "learning_rate": 3.0040777422272615e-05, "loss": 0.7259, "step": 2116 }, { "epoch": 0.64, "learning_rate": 2.9995752130973666e-05, "loss": 0.7289, "step": 2117 }, { "epoch": 0.64, "learning_rate": 2.99507461444619e-05, "loss": 0.7399, "step": 2118 }, { "epoch": 0.64, "learning_rate": 2.9905759506169683e-05, "loss": 0.7649, "step": 2119 }, { "epoch": 0.64, "learning_rate": 2.9860792259510656e-05, "loss": 0.727, "step": 2120 }, { "epoch": 0.64, "learning_rate": 2.9815844447879747e-05, "loss": 0.71, "step": 2121 }, { "epoch": 0.64, "learning_rate": 2.977091611465313e-05, "loss": 0.7449, "step": 2122 }, { "epoch": 0.64, "learning_rate": 2.9726007303188226e-05, "loss": 0.7245, "step": 2123 }, { "epoch": 0.64, "learning_rate": 2.9681118056823553e-05, "loss": 0.7582, "step": 2124 }, { "epoch": 0.64, "learning_rate": 2.96362484188788e-05, "loss": 0.7446, "step": 2125 }, { "epoch": 0.64, "learning_rate": 2.9591398432654676e-05, "loss": 0.7526, "step": 2126 }, { "epoch": 0.65, "learning_rate": 2.9546568141433006e-05, "loss": 0.7725, "step": 2127 }, { "epoch": 0.65, "learning_rate": 2.950175758847654e-05, "loss": 0.76, "step": 2128 }, { "epoch": 0.65, "learning_rate": 2.9456966817029007e-05, "loss": 0.7774, "step": 2129 }, { "epoch": 0.65, "learning_rate": 2.941219587031502e-05, "loss": 0.7003, "step": 2130 }, { "epoch": 0.65, "learning_rate": 2.9367444791540112e-05, "loss": 0.7372, "step": 2131 }, { "epoch": 0.65, "learning_rate": 2.9322713623890606e-05, "loss": 0.7143, "step": 2132 }, { "epoch": 0.65, "learning_rate": 2.9278002410533605e-05, "loss": 0.716, "step": 2133 }, { "epoch": 0.65, "learning_rate": 2.9233311194616974e-05, "loss": 0.7607, "step": 2134 }, { "epoch": 0.65, "learning_rate": 2.9188640019269288e-05, "loss": 0.7703, "step": 2135 }, { "epoch": 0.65, "learning_rate": 2.914398892759975e-05, "loss": 0.7732, "step": 2136 }, { "epoch": 0.65, "learning_rate": 2.9099357962698193e-05, "loss": 0.7478, "step": 2137 }, { "epoch": 0.65, "learning_rate": 2.9054747167635054e-05, "loss": 0.7716, "step": 2138 }, { "epoch": 0.65, "learning_rate": 2.9010156585461264e-05, "loss": 0.6877, "step": 2139 }, { "epoch": 0.65, "learning_rate": 2.8965586259208295e-05, "loss": 0.7184, "step": 2140 }, { "epoch": 0.65, "learning_rate": 2.8921036231888027e-05, "loss": 0.7235, "step": 2141 }, { "epoch": 0.65, "learning_rate": 2.8876506546492756e-05, "loss": 0.7248, "step": 2142 }, { "epoch": 0.65, "learning_rate": 2.8831997245995186e-05, "loss": 0.6778, "step": 2143 }, { "epoch": 0.65, "learning_rate": 2.8787508373348315e-05, "loss": 0.7238, "step": 2144 }, { "epoch": 0.65, "learning_rate": 2.874303997148543e-05, "loss": 0.7269, "step": 2145 }, { "epoch": 0.65, "learning_rate": 2.8698592083320054e-05, "loss": 0.7716, "step": 2146 }, { "epoch": 0.65, "learning_rate": 2.865416475174596e-05, "loss": 0.7946, "step": 2147 }, { "epoch": 0.65, "learning_rate": 2.8609758019637033e-05, "loss": 0.7976, "step": 2148 }, { "epoch": 0.65, "learning_rate": 2.8565371929847284e-05, "loss": 0.7128, "step": 2149 }, { "epoch": 0.65, "learning_rate": 2.8521006525210846e-05, "loss": 0.6593, "step": 2150 }, { "epoch": 0.65, "learning_rate": 2.8476661848541853e-05, "loss": 0.7005, "step": 2151 }, { "epoch": 0.65, "learning_rate": 2.8432337942634425e-05, "loss": 0.7392, "step": 2152 }, { "epoch": 0.65, "learning_rate": 2.8388034850262646e-05, "loss": 0.7146, "step": 2153 }, { "epoch": 0.65, "learning_rate": 2.8343752614180568e-05, "loss": 0.7289, "step": 2154 }, { "epoch": 0.65, "learning_rate": 2.829949127712205e-05, "loss": 0.7807, "step": 2155 }, { "epoch": 0.65, "learning_rate": 2.8255250881800797e-05, "loss": 0.6975, "step": 2156 }, { "epoch": 0.65, "learning_rate": 2.8211031470910298e-05, "loss": 0.8123, "step": 2157 }, { "epoch": 0.65, "learning_rate": 2.8166833087123844e-05, "loss": 0.7264, "step": 2158 }, { "epoch": 0.65, "learning_rate": 2.8122655773094375e-05, "loss": 0.7487, "step": 2159 }, { "epoch": 0.66, "learning_rate": 2.8078499571454498e-05, "loss": 0.7611, "step": 2160 }, { "epoch": 0.66, "learning_rate": 2.803436452481651e-05, "loss": 0.7757, "step": 2161 }, { "epoch": 0.66, "learning_rate": 2.79902506757722e-05, "loss": 0.7615, "step": 2162 }, { "epoch": 0.66, "learning_rate": 2.7946158066893003e-05, "loss": 0.798, "step": 2163 }, { "epoch": 0.66, "learning_rate": 2.7902086740729777e-05, "loss": 0.7873, "step": 2164 }, { "epoch": 0.66, "learning_rate": 2.7858036739812842e-05, "loss": 0.7558, "step": 2165 }, { "epoch": 0.66, "learning_rate": 2.7814008106652012e-05, "loss": 0.7663, "step": 2166 }, { "epoch": 0.66, "learning_rate": 2.7770000883736406e-05, "loss": 0.7383, "step": 2167 }, { "epoch": 0.66, "learning_rate": 2.7726015113534514e-05, "loss": 0.7761, "step": 2168 }, { "epoch": 0.66, "learning_rate": 2.768205083849409e-05, "loss": 0.7787, "step": 2169 }, { "epoch": 0.66, "learning_rate": 2.7638108101042215e-05, "loss": 0.75, "step": 2170 }, { "epoch": 0.66, "learning_rate": 2.7594186943585128e-05, "loss": 0.7113, "step": 2171 }, { "epoch": 0.66, "learning_rate": 2.7550287408508246e-05, "loss": 0.7659, "step": 2172 }, { "epoch": 0.66, "learning_rate": 2.7506409538176115e-05, "loss": 0.6887, "step": 2173 }, { "epoch": 0.66, "learning_rate": 2.7462553374932427e-05, "loss": 0.7166, "step": 2174 }, { "epoch": 0.66, "learning_rate": 2.7418718961099864e-05, "loss": 0.719, "step": 2175 }, { "epoch": 0.66, "learning_rate": 2.737490633898016e-05, "loss": 0.6847, "step": 2176 }, { "epoch": 0.66, "learning_rate": 2.733111555085397e-05, "loss": 0.7194, "step": 2177 }, { "epoch": 0.66, "learning_rate": 2.728734663898094e-05, "loss": 0.7358, "step": 2178 }, { "epoch": 0.66, "learning_rate": 2.7243599645599576e-05, "loss": 0.7255, "step": 2179 }, { "epoch": 0.66, "learning_rate": 2.7199874612927202e-05, "loss": 0.7741, "step": 2180 }, { "epoch": 0.66, "learning_rate": 2.7156171583160016e-05, "loss": 0.7104, "step": 2181 }, { "epoch": 0.66, "learning_rate": 2.7112490598472905e-05, "loss": 0.7105, "step": 2182 }, { "epoch": 0.66, "learning_rate": 2.706883170101957e-05, "loss": 0.7111, "step": 2183 }, { "epoch": 0.66, "learning_rate": 2.7025194932932314e-05, "loss": 0.7552, "step": 2184 }, { "epoch": 0.66, "learning_rate": 2.698158033632211e-05, "loss": 0.7563, "step": 2185 }, { "epoch": 0.66, "learning_rate": 2.6937987953278587e-05, "loss": 0.7305, "step": 2186 }, { "epoch": 0.66, "learning_rate": 2.6894417825869854e-05, "loss": 0.7367, "step": 2187 }, { "epoch": 0.66, "learning_rate": 2.6850869996142614e-05, "loss": 0.7511, "step": 2188 }, { "epoch": 0.66, "learning_rate": 2.6807344506121973e-05, "loss": 0.7547, "step": 2189 }, { "epoch": 0.66, "learning_rate": 2.6763841397811573e-05, "loss": 0.7912, "step": 2190 }, { "epoch": 0.66, "learning_rate": 2.67203607131934e-05, "loss": 0.7207, "step": 2191 }, { "epoch": 0.66, "learning_rate": 2.6676902494227795e-05, "loss": 0.7566, "step": 2192 }, { "epoch": 0.67, "learning_rate": 2.6633466782853432e-05, "loss": 0.6978, "step": 2193 }, { "epoch": 0.67, "learning_rate": 2.6590053620987287e-05, "loss": 0.7556, "step": 2194 }, { "epoch": 0.67, "learning_rate": 2.6546663050524546e-05, "loss": 0.7442, "step": 2195 }, { "epoch": 0.67, "learning_rate": 2.65032951133386e-05, "loss": 0.7866, "step": 2196 }, { "epoch": 0.67, "learning_rate": 2.6459949851280978e-05, "loss": 0.7655, "step": 2197 }, { "epoch": 0.67, "learning_rate": 2.641662730618139e-05, "loss": 0.6976, "step": 2198 }, { "epoch": 0.67, "learning_rate": 2.6373327519847563e-05, "loss": 0.7168, "step": 2199 }, { "epoch": 0.67, "learning_rate": 2.6330050534065255e-05, "loss": 0.7634, "step": 2200 }, { "epoch": 0.67, "learning_rate": 2.628679639059829e-05, "loss": 0.7649, "step": 2201 }, { "epoch": 0.67, "learning_rate": 2.624356513118837e-05, "loss": 0.8091, "step": 2202 }, { "epoch": 0.67, "learning_rate": 2.6200356797555175e-05, "loss": 0.8003, "step": 2203 }, { "epoch": 0.67, "learning_rate": 2.6157171431396223e-05, "loss": 0.6728, "step": 2204 }, { "epoch": 0.67, "learning_rate": 2.6114009074386846e-05, "loss": 0.7032, "step": 2205 }, { "epoch": 0.67, "learning_rate": 2.6070869768180255e-05, "loss": 0.7621, "step": 2206 }, { "epoch": 0.67, "learning_rate": 2.602775355440734e-05, "loss": 0.7166, "step": 2207 }, { "epoch": 0.67, "learning_rate": 2.598466047467673e-05, "loss": 0.7324, "step": 2208 }, { "epoch": 0.67, "learning_rate": 2.5941590570574714e-05, "loss": 0.7623, "step": 2209 }, { "epoch": 0.67, "learning_rate": 2.5898543883665256e-05, "loss": 0.7186, "step": 2210 }, { "epoch": 0.67, "learning_rate": 2.5855520455489885e-05, "loss": 0.7388, "step": 2211 }, { "epoch": 0.67, "learning_rate": 2.5812520327567656e-05, "loss": 0.7328, "step": 2212 }, { "epoch": 0.67, "learning_rate": 2.5769543541395225e-05, "loss": 0.7177, "step": 2213 }, { "epoch": 0.67, "learning_rate": 2.5726590138446642e-05, "loss": 0.7648, "step": 2214 }, { "epoch": 0.67, "learning_rate": 2.568366016017342e-05, "loss": 0.7485, "step": 2215 }, { "epoch": 0.67, "learning_rate": 2.5640753648004435e-05, "loss": 0.7589, "step": 2216 }, { "epoch": 0.67, "learning_rate": 2.5597870643346e-05, "loss": 0.7091, "step": 2217 }, { "epoch": 0.67, "learning_rate": 2.555501118758167e-05, "loss": 0.7603, "step": 2218 }, { "epoch": 0.67, "learning_rate": 2.5512175322072275e-05, "loss": 0.7487, "step": 2219 }, { "epoch": 0.67, "learning_rate": 2.5469363088155902e-05, "loss": 0.7336, "step": 2220 }, { "epoch": 0.67, "learning_rate": 2.542657452714785e-05, "loss": 0.7701, "step": 2221 }, { "epoch": 0.67, "learning_rate": 2.538380968034053e-05, "loss": 0.7427, "step": 2222 }, { "epoch": 0.67, "learning_rate": 2.5341068589003512e-05, "loss": 0.7422, "step": 2223 }, { "epoch": 0.67, "learning_rate": 2.5298351294383395e-05, "loss": 0.7531, "step": 2224 }, { "epoch": 0.67, "learning_rate": 2.525565783770387e-05, "loss": 0.6986, "step": 2225 }, { "epoch": 0.68, "learning_rate": 2.521298826016557e-05, "loss": 0.7733, "step": 2226 }, { "epoch": 0.68, "learning_rate": 2.5170342602946102e-05, "loss": 0.7109, "step": 2227 }, { "epoch": 0.68, "learning_rate": 2.5127720907199982e-05, "loss": 0.7683, "step": 2228 }, { "epoch": 0.68, "learning_rate": 2.5085123214058644e-05, "loss": 0.7471, "step": 2229 }, { "epoch": 0.68, "learning_rate": 2.5042549564630306e-05, "loss": 0.7813, "step": 2230 }, { "epoch": 0.68, "learning_rate": 2.500000000000001e-05, "loss": 0.7647, "step": 2231 }, { "epoch": 0.68, "learning_rate": 2.4957474561229528e-05, "loss": 0.745, "step": 2232 }, { "epoch": 0.68, "learning_rate": 2.4914973289357413e-05, "loss": 0.7089, "step": 2233 }, { "epoch": 0.68, "learning_rate": 2.4872496225398823e-05, "loss": 0.7579, "step": 2234 }, { "epoch": 0.68, "learning_rate": 2.4830043410345598e-05, "loss": 0.7978, "step": 2235 }, { "epoch": 0.68, "learning_rate": 2.4787614885166138e-05, "loss": 0.7259, "step": 2236 }, { "epoch": 0.68, "learning_rate": 2.4745210690805474e-05, "loss": 0.7623, "step": 2237 }, { "epoch": 0.68, "learning_rate": 2.470283086818509e-05, "loss": 0.7472, "step": 2238 }, { "epoch": 0.68, "learning_rate": 2.4660475458202968e-05, "loss": 0.7196, "step": 2239 }, { "epoch": 0.68, "learning_rate": 2.461814450173352e-05, "loss": 0.7367, "step": 2240 }, { "epoch": 0.68, "learning_rate": 2.4575838039627613e-05, "loss": 0.7474, "step": 2241 }, { "epoch": 0.68, "learning_rate": 2.4533556112712402e-05, "loss": 0.7182, "step": 2242 }, { "epoch": 0.68, "learning_rate": 2.4491298761791436e-05, "loss": 0.7604, "step": 2243 }, { "epoch": 0.68, "learning_rate": 2.4449066027644475e-05, "loss": 0.7304, "step": 2244 }, { "epoch": 0.68, "learning_rate": 2.4406857951027594e-05, "loss": 0.7598, "step": 2245 }, { "epoch": 0.68, "learning_rate": 2.4364674572673026e-05, "loss": 0.7352, "step": 2246 }, { "epoch": 0.68, "learning_rate": 2.4322515933289176e-05, "loss": 0.7791, "step": 2247 }, { "epoch": 0.68, "learning_rate": 2.428038207356057e-05, "loss": 0.7904, "step": 2248 }, { "epoch": 0.68, "learning_rate": 2.4238273034147863e-05, "loss": 0.7313, "step": 2249 }, { "epoch": 0.68, "learning_rate": 2.4196188855687707e-05, "loss": 0.7608, "step": 2250 }, { "epoch": 0.68, "learning_rate": 2.4154129578792784e-05, "loss": 0.7169, "step": 2251 }, { "epoch": 0.68, "learning_rate": 2.4112095244051726e-05, "loss": 0.6858, "step": 2252 }, { "epoch": 0.68, "learning_rate": 2.4070085892029144e-05, "loss": 0.8037, "step": 2253 }, { "epoch": 0.68, "learning_rate": 2.4028101563265493e-05, "loss": 0.6901, "step": 2254 }, { "epoch": 0.68, "learning_rate": 2.398614229827709e-05, "loss": 0.7171, "step": 2255 }, { "epoch": 0.68, "learning_rate": 2.3944208137556056e-05, "loss": 0.7604, "step": 2256 }, { "epoch": 0.68, "learning_rate": 2.3902299121570333e-05, "loss": 0.7216, "step": 2257 }, { "epoch": 0.68, "learning_rate": 2.3860415290763545e-05, "loss": 0.7416, "step": 2258 }, { "epoch": 0.69, "learning_rate": 2.3818556685555026e-05, "loss": 0.7212, "step": 2259 }, { "epoch": 0.69, "learning_rate": 2.3776723346339756e-05, "loss": 0.7828, "step": 2260 }, { "epoch": 0.69, "learning_rate": 2.3734915313488378e-05, "loss": 0.7358, "step": 2261 }, { "epoch": 0.69, "learning_rate": 2.3693132627347047e-05, "loss": 0.78, "step": 2262 }, { "epoch": 0.69, "learning_rate": 2.365137532823753e-05, "loss": 0.7918, "step": 2263 }, { "epoch": 0.69, "learning_rate": 2.360964345645702e-05, "loss": 0.7658, "step": 2264 }, { "epoch": 0.69, "learning_rate": 2.3567937052278243e-05, "loss": 0.7703, "step": 2265 }, { "epoch": 0.69, "learning_rate": 2.35262561559493e-05, "loss": 0.7696, "step": 2266 }, { "epoch": 0.69, "learning_rate": 2.3484600807693687e-05, "loss": 0.7751, "step": 2267 }, { "epoch": 0.69, "learning_rate": 2.344297104771022e-05, "loss": 0.7396, "step": 2268 }, { "epoch": 0.69, "learning_rate": 2.3401366916173102e-05, "loss": 0.7349, "step": 2269 }, { "epoch": 0.69, "learning_rate": 2.3359788453231724e-05, "loss": 0.7578, "step": 2270 }, { "epoch": 0.69, "learning_rate": 2.3318235699010733e-05, "loss": 0.7919, "step": 2271 }, { "epoch": 0.69, "learning_rate": 2.3276708693609943e-05, "loss": 0.7888, "step": 2272 }, { "epoch": 0.69, "learning_rate": 2.3235207477104392e-05, "loss": 0.7481, "step": 2273 }, { "epoch": 0.69, "learning_rate": 2.319373208954415e-05, "loss": 0.7912, "step": 2274 }, { "epoch": 0.69, "learning_rate": 2.3152282570954382e-05, "loss": 0.7729, "step": 2275 }, { "epoch": 0.69, "learning_rate": 2.3110858961335335e-05, "loss": 0.7735, "step": 2276 }, { "epoch": 0.69, "learning_rate": 2.306946130066219e-05, "loss": 0.734, "step": 2277 }, { "epoch": 0.69, "learning_rate": 2.302808962888513e-05, "loss": 0.7587, "step": 2278 }, { "epoch": 0.69, "learning_rate": 2.298674398592921e-05, "loss": 0.7329, "step": 2279 }, { "epoch": 0.69, "learning_rate": 2.2945424411694433e-05, "loss": 0.7917, "step": 2280 }, { "epoch": 0.69, "learning_rate": 2.2904130946055597e-05, "loss": 0.7279, "step": 2281 }, { "epoch": 0.69, "learning_rate": 2.2862863628862298e-05, "loss": 0.7801, "step": 2282 }, { "epoch": 0.69, "learning_rate": 2.282162249993895e-05, "loss": 0.7263, "step": 2283 }, { "epoch": 0.69, "learning_rate": 2.2780407599084623e-05, "loss": 0.7917, "step": 2284 }, { "epoch": 0.69, "learning_rate": 2.2739218966073155e-05, "loss": 0.7063, "step": 2285 }, { "epoch": 0.69, "learning_rate": 2.2698056640652975e-05, "loss": 0.7038, "step": 2286 }, { "epoch": 0.69, "learning_rate": 2.265692066254712e-05, "loss": 0.7693, "step": 2287 }, { "epoch": 0.69, "learning_rate": 2.2615811071453265e-05, "loss": 0.7579, "step": 2288 }, { "epoch": 0.69, "learning_rate": 2.2574727907043558e-05, "loss": 0.7577, "step": 2289 }, { "epoch": 0.69, "learning_rate": 2.253367120896467e-05, "loss": 0.7888, "step": 2290 }, { "epoch": 0.69, "learning_rate": 2.24926410168377e-05, "loss": 0.7134, "step": 2291 }, { "epoch": 0.7, "learning_rate": 2.245163737025824e-05, "loss": 0.7642, "step": 2292 }, { "epoch": 0.7, "learning_rate": 2.2410660308796194e-05, "loss": 0.7572, "step": 2293 }, { "epoch": 0.7, "learning_rate": 2.2369709871995837e-05, "loss": 0.7482, "step": 2294 }, { "epoch": 0.7, "learning_rate": 2.2328786099375735e-05, "loss": 0.7299, "step": 2295 }, { "epoch": 0.7, "learning_rate": 2.228788903042877e-05, "loss": 0.7448, "step": 2296 }, { "epoch": 0.7, "learning_rate": 2.2247018704622004e-05, "loss": 0.7224, "step": 2297 }, { "epoch": 0.7, "learning_rate": 2.2206175161396703e-05, "loss": 0.7318, "step": 2298 }, { "epoch": 0.7, "learning_rate": 2.2165358440168272e-05, "loss": 0.727, "step": 2299 }, { "epoch": 0.7, "learning_rate": 2.2124568580326295e-05, "loss": 0.7752, "step": 2300 }, { "epoch": 0.7, "learning_rate": 2.208380562123436e-05, "loss": 0.7014, "step": 2301 }, { "epoch": 0.7, "learning_rate": 2.2043069602230116e-05, "loss": 0.8019, "step": 2302 }, { "epoch": 0.7, "learning_rate": 2.2002360562625255e-05, "loss": 0.7241, "step": 2303 }, { "epoch": 0.7, "learning_rate": 2.1961678541705366e-05, "loss": 0.7514, "step": 2304 }, { "epoch": 0.7, "learning_rate": 2.1921023578730025e-05, "loss": 0.753, "step": 2305 }, { "epoch": 0.7, "learning_rate": 2.188039571293267e-05, "loss": 0.7268, "step": 2306 }, { "epoch": 0.7, "learning_rate": 2.1839794983520557e-05, "loss": 0.7543, "step": 2307 }, { "epoch": 0.7, "learning_rate": 2.1799221429674827e-05, "loss": 0.734, "step": 2308 }, { "epoch": 0.7, "learning_rate": 2.175867509055033e-05, "loss": 0.7035, "step": 2309 }, { "epoch": 0.7, "learning_rate": 2.1718156005275685e-05, "loss": 0.7633, "step": 2310 }, { "epoch": 0.7, "learning_rate": 2.1677664212953186e-05, "loss": 0.7337, "step": 2311 }, { "epoch": 0.7, "learning_rate": 2.1637199752658838e-05, "loss": 0.7383, "step": 2312 }, { "epoch": 0.7, "learning_rate": 2.1596762663442218e-05, "loss": 0.7597, "step": 2313 }, { "epoch": 0.7, "learning_rate": 2.155635298432651e-05, "loss": 0.7123, "step": 2314 }, { "epoch": 0.7, "learning_rate": 2.1515970754308423e-05, "loss": 0.7793, "step": 2315 }, { "epoch": 0.7, "learning_rate": 2.1475616012358236e-05, "loss": 0.801, "step": 2316 }, { "epoch": 0.7, "learning_rate": 2.1435288797419644e-05, "loss": 0.7056, "step": 2317 }, { "epoch": 0.7, "learning_rate": 2.1394989148409806e-05, "loss": 0.7173, "step": 2318 }, { "epoch": 0.7, "learning_rate": 2.1354717104219236e-05, "loss": 0.7308, "step": 2319 }, { "epoch": 0.7, "learning_rate": 2.1314472703711887e-05, "loss": 0.7596, "step": 2320 }, { "epoch": 0.7, "learning_rate": 2.127425598572496e-05, "loss": 0.761, "step": 2321 }, { "epoch": 0.7, "learning_rate": 2.1234066989068972e-05, "loss": 0.7479, "step": 2322 }, { "epoch": 0.7, "learning_rate": 2.119390575252771e-05, "loss": 0.7207, "step": 2323 }, { "epoch": 0.7, "learning_rate": 2.1153772314858115e-05, "loss": 0.6939, "step": 2324 }, { "epoch": 0.71, "learning_rate": 2.1113666714790377e-05, "loss": 0.7055, "step": 2325 }, { "epoch": 0.71, "learning_rate": 2.1073588991027747e-05, "loss": 0.7101, "step": 2326 }, { "epoch": 0.71, "learning_rate": 2.1033539182246604e-05, "loss": 0.7472, "step": 2327 }, { "epoch": 0.71, "learning_rate": 2.0993517327096417e-05, "loss": 0.7581, "step": 2328 }, { "epoch": 0.71, "learning_rate": 2.0953523464199644e-05, "loss": 0.6817, "step": 2329 }, { "epoch": 0.71, "learning_rate": 2.0913557632151725e-05, "loss": 0.7423, "step": 2330 }, { "epoch": 0.71, "learning_rate": 2.0873619869521053e-05, "loss": 0.7215, "step": 2331 }, { "epoch": 0.71, "learning_rate": 2.0833710214848983e-05, "loss": 0.7633, "step": 2332 }, { "epoch": 0.71, "learning_rate": 2.0793828706649675e-05, "loss": 0.7688, "step": 2333 }, { "epoch": 0.71, "learning_rate": 2.0753975383410168e-05, "loss": 0.7058, "step": 2334 }, { "epoch": 0.71, "learning_rate": 2.071415028359026e-05, "loss": 0.777, "step": 2335 }, { "epoch": 0.71, "learning_rate": 2.0674353445622585e-05, "loss": 0.7185, "step": 2336 }, { "epoch": 0.71, "learning_rate": 2.063458490791244e-05, "loss": 0.7526, "step": 2337 }, { "epoch": 0.71, "learning_rate": 2.059484470883782e-05, "loss": 0.7369, "step": 2338 }, { "epoch": 0.71, "learning_rate": 2.0555132886749407e-05, "loss": 0.7638, "step": 2339 }, { "epoch": 0.71, "learning_rate": 2.051544947997047e-05, "loss": 0.7279, "step": 2340 }, { "epoch": 0.71, "learning_rate": 2.0475794526796856e-05, "loss": 0.7414, "step": 2341 }, { "epoch": 0.71, "learning_rate": 2.0436168065496936e-05, "loss": 0.7417, "step": 2342 }, { "epoch": 0.71, "learning_rate": 2.0396570134311655e-05, "loss": 0.7583, "step": 2343 }, { "epoch": 0.71, "learning_rate": 2.0357000771454334e-05, "loss": 0.7549, "step": 2344 }, { "epoch": 0.71, "learning_rate": 2.0317460015110807e-05, "loss": 0.7529, "step": 2345 }, { "epoch": 0.71, "learning_rate": 2.0277947903439226e-05, "loss": 0.769, "step": 2346 }, { "epoch": 0.71, "learning_rate": 2.023846447457018e-05, "loss": 0.7406, "step": 2347 }, { "epoch": 0.71, "learning_rate": 2.0199009766606507e-05, "loss": 0.7224, "step": 2348 }, { "epoch": 0.71, "learning_rate": 2.0159583817623366e-05, "loss": 0.7559, "step": 2349 }, { "epoch": 0.71, "learning_rate": 2.012018666566814e-05, "loss": 0.757, "step": 2350 }, { "epoch": 0.71, "learning_rate": 2.008081834876046e-05, "loss": 0.8052, "step": 2351 }, { "epoch": 0.71, "learning_rate": 2.00414789048921e-05, "loss": 0.739, "step": 2352 }, { "epoch": 0.71, "learning_rate": 2.000216837202696e-05, "loss": 0.7822, "step": 2353 }, { "epoch": 0.71, "learning_rate": 1.996288678810105e-05, "loss": 0.7068, "step": 2354 }, { "epoch": 0.71, "learning_rate": 1.9923634191022484e-05, "loss": 0.7705, "step": 2355 }, { "epoch": 0.71, "learning_rate": 1.988441061867135e-05, "loss": 0.7707, "step": 2356 }, { "epoch": 0.71, "learning_rate": 1.9845216108899744e-05, "loss": 0.7163, "step": 2357 }, { "epoch": 0.72, "learning_rate": 1.9806050699531697e-05, "loss": 0.7296, "step": 2358 }, { "epoch": 0.72, "learning_rate": 1.9766914428363213e-05, "loss": 0.7725, "step": 2359 }, { "epoch": 0.72, "learning_rate": 1.9727807333162124e-05, "loss": 0.734, "step": 2360 }, { "epoch": 0.72, "learning_rate": 1.9688729451668114e-05, "loss": 0.7743, "step": 2361 }, { "epoch": 0.72, "learning_rate": 1.9649680821592676e-05, "loss": 0.777, "step": 2362 }, { "epoch": 0.72, "learning_rate": 1.9610661480619107e-05, "loss": 0.7198, "step": 2363 }, { "epoch": 0.72, "learning_rate": 1.957167146640238e-05, "loss": 0.7273, "step": 2364 }, { "epoch": 0.72, "learning_rate": 1.953271081656924e-05, "loss": 0.7483, "step": 2365 }, { "epoch": 0.72, "learning_rate": 1.949377956871801e-05, "loss": 0.7235, "step": 2366 }, { "epoch": 0.72, "learning_rate": 1.9454877760418732e-05, "loss": 0.7742, "step": 2367 }, { "epoch": 0.72, "learning_rate": 1.941600542921296e-05, "loss": 0.7838, "step": 2368 }, { "epoch": 0.72, "learning_rate": 1.9377162612613832e-05, "loss": 0.7578, "step": 2369 }, { "epoch": 0.72, "learning_rate": 1.9338349348105982e-05, "loss": 0.7549, "step": 2370 }, { "epoch": 0.72, "learning_rate": 1.9299565673145592e-05, "loss": 0.7128, "step": 2371 }, { "epoch": 0.72, "learning_rate": 1.9260811625160212e-05, "loss": 0.6873, "step": 2372 }, { "epoch": 0.72, "learning_rate": 1.9222087241548837e-05, "loss": 0.7831, "step": 2373 }, { "epoch": 0.72, "learning_rate": 1.9183392559681812e-05, "loss": 0.6914, "step": 2374 }, { "epoch": 0.72, "learning_rate": 1.9144727616900872e-05, "loss": 0.6847, "step": 2375 }, { "epoch": 0.72, "learning_rate": 1.910609245051899e-05, "loss": 0.762, "step": 2376 }, { "epoch": 0.72, "learning_rate": 1.906748709782044e-05, "loss": 0.7317, "step": 2377 }, { "epoch": 0.72, "learning_rate": 1.9028911596060693e-05, "loss": 0.737, "step": 2378 }, { "epoch": 0.72, "learning_rate": 1.8990365982466473e-05, "loss": 0.7321, "step": 2379 }, { "epoch": 0.72, "learning_rate": 1.8951850294235607e-05, "loss": 0.7425, "step": 2380 }, { "epoch": 0.72, "learning_rate": 1.891336456853705e-05, "loss": 0.6862, "step": 2381 }, { "epoch": 0.72, "learning_rate": 1.887490884251084e-05, "loss": 0.7399, "step": 2382 }, { "epoch": 0.72, "learning_rate": 1.8836483153268113e-05, "loss": 0.7825, "step": 2383 }, { "epoch": 0.72, "learning_rate": 1.879808753789094e-05, "loss": 0.7433, "step": 2384 }, { "epoch": 0.72, "learning_rate": 1.8759722033432448e-05, "loss": 0.8002, "step": 2385 }, { "epoch": 0.72, "learning_rate": 1.872138667691665e-05, "loss": 0.7864, "step": 2386 }, { "epoch": 0.72, "learning_rate": 1.868308150533847e-05, "loss": 0.7503, "step": 2387 }, { "epoch": 0.72, "learning_rate": 1.8644806555663742e-05, "loss": 0.7276, "step": 2388 }, { "epoch": 0.72, "learning_rate": 1.86065618648291e-05, "loss": 0.7074, "step": 2389 }, { "epoch": 0.72, "learning_rate": 1.856834746974196e-05, "loss": 0.7462, "step": 2390 }, { "epoch": 0.73, "learning_rate": 1.853016340728057e-05, "loss": 0.7099, "step": 2391 }, { "epoch": 0.73, "learning_rate": 1.8492009714293845e-05, "loss": 0.7249, "step": 2392 }, { "epoch": 0.73, "learning_rate": 1.8453886427601407e-05, "loss": 0.7409, "step": 2393 }, { "epoch": 0.73, "learning_rate": 1.8415793583993528e-05, "loss": 0.6899, "step": 2394 }, { "epoch": 0.73, "learning_rate": 1.837773122023114e-05, "loss": 0.7353, "step": 2395 }, { "epoch": 0.73, "learning_rate": 1.833969937304572e-05, "loss": 0.766, "step": 2396 }, { "epoch": 0.73, "learning_rate": 1.830169807913931e-05, "loss": 0.7897, "step": 2397 }, { "epoch": 0.73, "learning_rate": 1.826372737518444e-05, "loss": 0.7816, "step": 2398 }, { "epoch": 0.73, "learning_rate": 1.8225787297824193e-05, "loss": 0.6971, "step": 2399 }, { "epoch": 0.73, "learning_rate": 1.818787788367202e-05, "loss": 0.7128, "step": 2400 }, { "epoch": 0.73, "learning_rate": 1.8149999169311815e-05, "loss": 0.7808, "step": 2401 }, { "epoch": 0.73, "learning_rate": 1.8112151191297822e-05, "loss": 0.7415, "step": 2402 }, { "epoch": 0.73, "learning_rate": 1.8074333986154674e-05, "loss": 0.7145, "step": 2403 }, { "epoch": 0.73, "learning_rate": 1.8036547590377256e-05, "loss": 0.6938, "step": 2404 }, { "epoch": 0.73, "learning_rate": 1.799879204043072e-05, "loss": 0.7243, "step": 2405 }, { "epoch": 0.73, "learning_rate": 1.7961067372750523e-05, "loss": 0.7375, "step": 2406 }, { "epoch": 0.73, "learning_rate": 1.7923373623742213e-05, "loss": 0.7315, "step": 2407 }, { "epoch": 0.73, "learning_rate": 1.7885710829781594e-05, "loss": 0.6955, "step": 2408 }, { "epoch": 0.73, "learning_rate": 1.784807902721452e-05, "loss": 0.7438, "step": 2409 }, { "epoch": 0.73, "learning_rate": 1.7810478252357022e-05, "loss": 0.7289, "step": 2410 }, { "epoch": 0.73, "learning_rate": 1.7772908541495104e-05, "loss": 0.7234, "step": 2411 }, { "epoch": 0.73, "learning_rate": 1.773536993088485e-05, "loss": 0.7088, "step": 2412 }, { "epoch": 0.73, "learning_rate": 1.7697862456752273e-05, "loss": 0.7271, "step": 2413 }, { "epoch": 0.73, "learning_rate": 1.7660386155293424e-05, "loss": 0.7162, "step": 2414 }, { "epoch": 0.73, "learning_rate": 1.7622941062674203e-05, "loss": 0.7463, "step": 2415 }, { "epoch": 0.73, "learning_rate": 1.7585527215030413e-05, "loss": 0.7125, "step": 2416 }, { "epoch": 0.73, "learning_rate": 1.7548144648467678e-05, "loss": 0.7781, "step": 2417 }, { "epoch": 0.73, "learning_rate": 1.7510793399061503e-05, "loss": 0.7738, "step": 2418 }, { "epoch": 0.73, "learning_rate": 1.7473473502857112e-05, "loss": 0.7072, "step": 2419 }, { "epoch": 0.73, "learning_rate": 1.7436184995869488e-05, "loss": 0.7132, "step": 2420 }, { "epoch": 0.73, "learning_rate": 1.7398927914083297e-05, "loss": 0.7327, "step": 2421 }, { "epoch": 0.73, "learning_rate": 1.7361702293452947e-05, "loss": 0.7346, "step": 2422 }, { "epoch": 0.73, "learning_rate": 1.732450816990242e-05, "loss": 0.7429, "step": 2423 }, { "epoch": 0.74, "learning_rate": 1.728734557932533e-05, "loss": 0.7497, "step": 2424 }, { "epoch": 0.74, "learning_rate": 1.7250214557584836e-05, "loss": 0.7618, "step": 2425 }, { "epoch": 0.74, "learning_rate": 1.7213115140513686e-05, "loss": 0.7446, "step": 2426 }, { "epoch": 0.74, "learning_rate": 1.7176047363914056e-05, "loss": 0.7278, "step": 2427 }, { "epoch": 0.74, "learning_rate": 1.713901126355766e-05, "loss": 0.7531, "step": 2428 }, { "epoch": 0.74, "learning_rate": 1.710200687518557e-05, "loss": 0.7974, "step": 2429 }, { "epoch": 0.74, "learning_rate": 1.7065034234508342e-05, "loss": 0.7713, "step": 2430 }, { "epoch": 0.74, "learning_rate": 1.7028093377205823e-05, "loss": 0.7331, "step": 2431 }, { "epoch": 0.74, "learning_rate": 1.69911843389272e-05, "loss": 0.7753, "step": 2432 }, { "epoch": 0.74, "learning_rate": 1.6954307155290962e-05, "loss": 0.7595, "step": 2433 }, { "epoch": 0.74, "learning_rate": 1.6917461861884882e-05, "loss": 0.8098, "step": 2434 }, { "epoch": 0.74, "learning_rate": 1.688064849426592e-05, "loss": 0.7041, "step": 2435 }, { "epoch": 0.74, "learning_rate": 1.684386708796025e-05, "loss": 0.7717, "step": 2436 }, { "epoch": 0.74, "learning_rate": 1.6807117678463176e-05, "loss": 0.8235, "step": 2437 }, { "epoch": 0.74, "learning_rate": 1.6770400301239165e-05, "loss": 0.7246, "step": 2438 }, { "epoch": 0.74, "learning_rate": 1.673371499172174e-05, "loss": 0.7517, "step": 2439 }, { "epoch": 0.74, "learning_rate": 1.6697061785313488e-05, "loss": 0.715, "step": 2440 }, { "epoch": 0.74, "learning_rate": 1.6660440717385993e-05, "loss": 0.7728, "step": 2441 }, { "epoch": 0.74, "learning_rate": 1.6623851823279885e-05, "loss": 0.7474, "step": 2442 }, { "epoch": 0.74, "learning_rate": 1.6587295138304677e-05, "loss": 0.7481, "step": 2443 }, { "epoch": 0.74, "learning_rate": 1.655077069773884e-05, "loss": 0.7158, "step": 2444 }, { "epoch": 0.74, "learning_rate": 1.6514278536829687e-05, "loss": 0.7355, "step": 2445 }, { "epoch": 0.74, "learning_rate": 1.647781869079345e-05, "loss": 0.7245, "step": 2446 }, { "epoch": 0.74, "learning_rate": 1.6441391194815098e-05, "loss": 0.7115, "step": 2447 }, { "epoch": 0.74, "learning_rate": 1.6404996084048452e-05, "loss": 0.7217, "step": 2448 }, { "epoch": 0.74, "learning_rate": 1.6368633393616012e-05, "loss": 0.7469, "step": 2449 }, { "epoch": 0.74, "learning_rate": 1.633230315860906e-05, "loss": 0.7102, "step": 2450 }, { "epoch": 0.74, "learning_rate": 1.62960054140875e-05, "loss": 0.7382, "step": 2451 }, { "epoch": 0.74, "learning_rate": 1.6259740195079903e-05, "loss": 0.7519, "step": 2452 }, { "epoch": 0.74, "learning_rate": 1.622350753658345e-05, "loss": 0.7577, "step": 2453 }, { "epoch": 0.74, "learning_rate": 1.6187307473563916e-05, "loss": 0.6988, "step": 2454 }, { "epoch": 0.74, "learning_rate": 1.615114004095561e-05, "loss": 0.7619, "step": 2455 }, { "epoch": 0.74, "learning_rate": 1.6115005273661334e-05, "loss": 0.7806, "step": 2456 }, { "epoch": 0.75, "learning_rate": 1.607890320655237e-05, "loss": 0.7494, "step": 2457 }, { "epoch": 0.75, "learning_rate": 1.604283387446849e-05, "loss": 0.7091, "step": 2458 }, { "epoch": 0.75, "learning_rate": 1.6006797312217814e-05, "loss": 0.7068, "step": 2459 }, { "epoch": 0.75, "learning_rate": 1.5970793554576872e-05, "loss": 0.7657, "step": 2460 }, { "epoch": 0.75, "learning_rate": 1.5934822636290515e-05, "loss": 0.7789, "step": 2461 }, { "epoch": 0.75, "learning_rate": 1.5898884592071938e-05, "loss": 0.6524, "step": 2462 }, { "epoch": 0.75, "learning_rate": 1.5862979456602583e-05, "loss": 0.7218, "step": 2463 }, { "epoch": 0.75, "learning_rate": 1.582710726453214e-05, "loss": 0.7321, "step": 2464 }, { "epoch": 0.75, "learning_rate": 1.5791268050478486e-05, "loss": 0.7207, "step": 2465 }, { "epoch": 0.75, "learning_rate": 1.5755461849027732e-05, "loss": 0.7606, "step": 2466 }, { "epoch": 0.75, "learning_rate": 1.5719688694734057e-05, "loss": 0.7185, "step": 2467 }, { "epoch": 0.75, "learning_rate": 1.5683948622119827e-05, "loss": 0.752, "step": 2468 }, { "epoch": 0.75, "learning_rate": 1.56482416656754e-05, "loss": 0.7727, "step": 2469 }, { "epoch": 0.75, "learning_rate": 1.5612567859859255e-05, "loss": 0.7253, "step": 2470 }, { "epoch": 0.75, "learning_rate": 1.557692723909782e-05, "loss": 0.7064, "step": 2471 }, { "epoch": 0.75, "learning_rate": 1.5541319837785507e-05, "loss": 0.7878, "step": 2472 }, { "epoch": 0.75, "learning_rate": 1.5505745690284712e-05, "loss": 0.7296, "step": 2473 }, { "epoch": 0.75, "learning_rate": 1.5470204830925705e-05, "loss": 0.7461, "step": 2474 }, { "epoch": 0.75, "learning_rate": 1.543469729400662e-05, "loss": 0.7574, "step": 2475 }, { "epoch": 0.75, "learning_rate": 1.5399223113793455e-05, "loss": 0.711, "step": 2476 }, { "epoch": 0.75, "learning_rate": 1.536378232452003e-05, "loss": 0.7238, "step": 2477 }, { "epoch": 0.75, "learning_rate": 1.532837496038792e-05, "loss": 0.7357, "step": 2478 }, { "epoch": 0.75, "learning_rate": 1.5293001055566442e-05, "loss": 0.773, "step": 2479 }, { "epoch": 0.75, "learning_rate": 1.5257660644192617e-05, "loss": 0.7509, "step": 2480 }, { "epoch": 0.75, "learning_rate": 1.5222353760371194e-05, "loss": 0.6898, "step": 2481 }, { "epoch": 0.75, "learning_rate": 1.5187080438174512e-05, "loss": 0.721, "step": 2482 }, { "epoch": 0.75, "learning_rate": 1.5151840711642534e-05, "loss": 0.7453, "step": 2483 }, { "epoch": 0.75, "learning_rate": 1.5116634614782798e-05, "loss": 0.7779, "step": 2484 }, { "epoch": 0.75, "learning_rate": 1.5081462181570427e-05, "loss": 0.6818, "step": 2485 }, { "epoch": 0.75, "learning_rate": 1.5046323445948018e-05, "loss": 0.7125, "step": 2486 }, { "epoch": 0.75, "learning_rate": 1.5011218441825642e-05, "loss": 0.7321, "step": 2487 }, { "epoch": 0.75, "learning_rate": 1.497614720308086e-05, "loss": 0.7734, "step": 2488 }, { "epoch": 0.75, "learning_rate": 1.4941109763558602e-05, "loss": 0.7633, "step": 2489 }, { "epoch": 0.76, "learning_rate": 1.4906106157071226e-05, "loss": 0.7117, "step": 2490 }, { "epoch": 0.76, "learning_rate": 1.4871136417398406e-05, "loss": 0.7472, "step": 2491 }, { "epoch": 0.76, "learning_rate": 1.4836200578287124e-05, "loss": 0.8038, "step": 2492 }, { "epoch": 0.76, "learning_rate": 1.4801298673451703e-05, "loss": 0.7254, "step": 2493 }, { "epoch": 0.76, "learning_rate": 1.4766430736573656e-05, "loss": 0.7244, "step": 2494 }, { "epoch": 0.76, "learning_rate": 1.473159680130175e-05, "loss": 0.7731, "step": 2495 }, { "epoch": 0.76, "learning_rate": 1.4696796901251913e-05, "loss": 0.7229, "step": 2496 }, { "epoch": 0.76, "learning_rate": 1.466203107000727e-05, "loss": 0.7034, "step": 2497 }, { "epoch": 0.76, "learning_rate": 1.4627299341118039e-05, "loss": 0.7445, "step": 2498 }, { "epoch": 0.76, "learning_rate": 1.4592601748101531e-05, "loss": 0.7349, "step": 2499 }, { "epoch": 0.76, "learning_rate": 1.4557938324442095e-05, "loss": 0.7368, "step": 2500 }, { "epoch": 0.76, "learning_rate": 1.4523309103591159e-05, "loss": 0.7817, "step": 2501 }, { "epoch": 0.76, "learning_rate": 1.4488714118967101e-05, "loss": 0.703, "step": 2502 }, { "epoch": 0.76, "learning_rate": 1.4454153403955267e-05, "loss": 0.7307, "step": 2503 }, { "epoch": 0.76, "learning_rate": 1.4419626991907925e-05, "loss": 0.738, "step": 2504 }, { "epoch": 0.76, "learning_rate": 1.4385134916144277e-05, "loss": 0.7154, "step": 2505 }, { "epoch": 0.76, "learning_rate": 1.4350677209950347e-05, "loss": 0.7638, "step": 2506 }, { "epoch": 0.76, "learning_rate": 1.4316253906578996e-05, "loss": 0.7587, "step": 2507 }, { "epoch": 0.76, "learning_rate": 1.428186503924992e-05, "loss": 0.7593, "step": 2508 }, { "epoch": 0.76, "learning_rate": 1.4247510641149525e-05, "loss": 0.7525, "step": 2509 }, { "epoch": 0.76, "learning_rate": 1.4213190745431033e-05, "loss": 0.7876, "step": 2510 }, { "epoch": 0.76, "learning_rate": 1.4178905385214292e-05, "loss": 0.7493, "step": 2511 }, { "epoch": 0.76, "learning_rate": 1.4144654593585844e-05, "loss": 0.7691, "step": 2512 }, { "epoch": 0.76, "learning_rate": 1.4110438403598914e-05, "loss": 0.8167, "step": 2513 }, { "epoch": 0.76, "learning_rate": 1.4076256848273289e-05, "loss": 0.7517, "step": 2514 }, { "epoch": 0.76, "learning_rate": 1.4042109960595339e-05, "loss": 0.6999, "step": 2515 }, { "epoch": 0.76, "learning_rate": 1.4007997773517972e-05, "loss": 0.7531, "step": 2516 }, { "epoch": 0.76, "learning_rate": 1.3973920319960655e-05, "loss": 0.6892, "step": 2517 }, { "epoch": 0.76, "learning_rate": 1.3939877632809278e-05, "loss": 0.7847, "step": 2518 }, { "epoch": 0.76, "learning_rate": 1.3905869744916222e-05, "loss": 0.7372, "step": 2519 }, { "epoch": 0.76, "learning_rate": 1.3871896689100238e-05, "loss": 0.7487, "step": 2520 }, { "epoch": 0.76, "learning_rate": 1.3837958498146531e-05, "loss": 0.7664, "step": 2521 }, { "epoch": 0.76, "learning_rate": 1.3804055204806604e-05, "loss": 0.7474, "step": 2522 }, { "epoch": 0.77, "learning_rate": 1.3770186841798305e-05, "loss": 0.7022, "step": 2523 }, { "epoch": 0.77, "learning_rate": 1.3736353441805749e-05, "loss": 0.7303, "step": 2524 }, { "epoch": 0.77, "learning_rate": 1.3702555037479365e-05, "loss": 0.7432, "step": 2525 }, { "epoch": 0.77, "learning_rate": 1.3668791661435759e-05, "loss": 0.7346, "step": 2526 }, { "epoch": 0.77, "learning_rate": 1.3635063346257731e-05, "loss": 0.7738, "step": 2527 }, { "epoch": 0.77, "learning_rate": 1.3601370124494305e-05, "loss": 0.7649, "step": 2528 }, { "epoch": 0.77, "learning_rate": 1.3567712028660563e-05, "loss": 0.7275, "step": 2529 }, { "epoch": 0.77, "learning_rate": 1.3534089091237755e-05, "loss": 0.7285, "step": 2530 }, { "epoch": 0.77, "learning_rate": 1.350050134467314e-05, "loss": 0.7161, "step": 2531 }, { "epoch": 0.77, "learning_rate": 1.3466948821380083e-05, "loss": 0.7463, "step": 2532 }, { "epoch": 0.77, "learning_rate": 1.3433431553737902e-05, "loss": 0.721, "step": 2533 }, { "epoch": 0.77, "learning_rate": 1.3399949574091919e-05, "loss": 0.7101, "step": 2534 }, { "epoch": 0.77, "learning_rate": 1.336650291475338e-05, "loss": 0.7693, "step": 2535 }, { "epoch": 0.77, "learning_rate": 1.3333091607999492e-05, "loss": 0.7624, "step": 2536 }, { "epoch": 0.77, "learning_rate": 1.3299715686073294e-05, "loss": 0.7693, "step": 2537 }, { "epoch": 0.77, "learning_rate": 1.326637518118371e-05, "loss": 0.7181, "step": 2538 }, { "epoch": 0.77, "learning_rate": 1.3233070125505448e-05, "loss": 0.7684, "step": 2539 }, { "epoch": 0.77, "learning_rate": 1.3199800551179064e-05, "loss": 0.8055, "step": 2540 }, { "epoch": 0.77, "learning_rate": 1.3166566490310828e-05, "loss": 0.6937, "step": 2541 }, { "epoch": 0.77, "learning_rate": 1.3133367974972754e-05, "loss": 0.7257, "step": 2542 }, { "epoch": 0.77, "learning_rate": 1.310020503720254e-05, "loss": 0.7819, "step": 2543 }, { "epoch": 0.77, "learning_rate": 1.3067077709003583e-05, "loss": 0.7085, "step": 2544 }, { "epoch": 0.77, "learning_rate": 1.303398602234488e-05, "loss": 0.8088, "step": 2545 }, { "epoch": 0.77, "learning_rate": 1.3000930009161049e-05, "loss": 0.7239, "step": 2546 }, { "epoch": 0.77, "learning_rate": 1.2967909701352265e-05, "loss": 0.8169, "step": 2547 }, { "epoch": 0.77, "learning_rate": 1.2934925130784291e-05, "loss": 0.676, "step": 2548 }, { "epoch": 0.77, "learning_rate": 1.290197632928834e-05, "loss": 0.7319, "step": 2549 }, { "epoch": 0.77, "learning_rate": 1.2869063328661175e-05, "loss": 0.781, "step": 2550 }, { "epoch": 0.77, "learning_rate": 1.283618616066493e-05, "loss": 0.723, "step": 2551 }, { "epoch": 0.77, "learning_rate": 1.280334485702725e-05, "loss": 0.7323, "step": 2552 }, { "epoch": 0.77, "learning_rate": 1.2770539449441093e-05, "loss": 0.8009, "step": 2553 }, { "epoch": 0.77, "learning_rate": 1.2737769969564817e-05, "loss": 0.7648, "step": 2554 }, { "epoch": 0.77, "learning_rate": 1.2705036449022074e-05, "loss": 0.7153, "step": 2555 }, { "epoch": 0.78, "learning_rate": 1.2672338919401866e-05, "loss": 0.7002, "step": 2556 }, { "epoch": 0.78, "learning_rate": 1.2639677412258427e-05, "loss": 0.7208, "step": 2557 }, { "epoch": 0.78, "learning_rate": 1.2607051959111226e-05, "loss": 0.7383, "step": 2558 }, { "epoch": 0.78, "learning_rate": 1.257446259144494e-05, "loss": 0.7494, "step": 2559 }, { "epoch": 0.78, "learning_rate": 1.2541909340709458e-05, "loss": 0.7089, "step": 2560 }, { "epoch": 0.78, "learning_rate": 1.2509392238319768e-05, "loss": 0.7381, "step": 2561 }, { "epoch": 0.78, "learning_rate": 1.2476911315655999e-05, "loss": 0.7202, "step": 2562 }, { "epoch": 0.78, "learning_rate": 1.2444466604063331e-05, "loss": 0.7704, "step": 2563 }, { "epoch": 0.78, "learning_rate": 1.2412058134852068e-05, "loss": 0.7422, "step": 2564 }, { "epoch": 0.78, "learning_rate": 1.2379685939297475e-05, "loss": 0.7612, "step": 2565 }, { "epoch": 0.78, "learning_rate": 1.2347350048639838e-05, "loss": 0.7, "step": 2566 }, { "epoch": 0.78, "learning_rate": 1.2315050494084391e-05, "loss": 0.7356, "step": 2567 }, { "epoch": 0.78, "learning_rate": 1.228278730680134e-05, "loss": 0.7158, "step": 2568 }, { "epoch": 0.78, "learning_rate": 1.2250560517925746e-05, "loss": 0.717, "step": 2569 }, { "epoch": 0.78, "learning_rate": 1.2218370158557596e-05, "loss": 0.7167, "step": 2570 }, { "epoch": 0.78, "learning_rate": 1.2186216259761663e-05, "loss": 0.7309, "step": 2571 }, { "epoch": 0.78, "learning_rate": 1.2154098852567592e-05, "loss": 0.7196, "step": 2572 }, { "epoch": 0.78, "learning_rate": 1.2122017967969774e-05, "loss": 0.7462, "step": 2573 }, { "epoch": 0.78, "learning_rate": 1.2089973636927365e-05, "loss": 0.7708, "step": 2574 }, { "epoch": 0.78, "learning_rate": 1.2057965890364237e-05, "loss": 0.7614, "step": 2575 }, { "epoch": 0.78, "learning_rate": 1.202599475916899e-05, "loss": 0.7629, "step": 2576 }, { "epoch": 0.78, "learning_rate": 1.1994060274194851e-05, "loss": 0.7535, "step": 2577 }, { "epoch": 0.78, "learning_rate": 1.1962162466259696e-05, "loss": 0.749, "step": 2578 }, { "epoch": 0.78, "learning_rate": 1.1930301366145996e-05, "loss": 0.7343, "step": 2579 }, { "epoch": 0.78, "learning_rate": 1.1898477004600839e-05, "loss": 0.753, "step": 2580 }, { "epoch": 0.78, "learning_rate": 1.1866689412335802e-05, "loss": 0.707, "step": 2581 }, { "epoch": 0.78, "learning_rate": 1.183493862002702e-05, "loss": 0.7815, "step": 2582 }, { "epoch": 0.78, "learning_rate": 1.180322465831507e-05, "loss": 0.7912, "step": 2583 }, { "epoch": 0.78, "learning_rate": 1.1771547557805058e-05, "loss": 0.7835, "step": 2584 }, { "epoch": 0.78, "learning_rate": 1.1739907349066453e-05, "loss": 0.7652, "step": 2585 }, { "epoch": 0.78, "learning_rate": 1.1708304062633152e-05, "loss": 0.7672, "step": 2586 }, { "epoch": 0.78, "learning_rate": 1.167673772900339e-05, "loss": 0.7699, "step": 2587 }, { "epoch": 0.78, "learning_rate": 1.1645208378639799e-05, "loss": 0.6944, "step": 2588 }, { "epoch": 0.79, "learning_rate": 1.1613716041969258e-05, "loss": 0.7727, "step": 2589 }, { "epoch": 0.79, "learning_rate": 1.1582260749382979e-05, "loss": 0.7376, "step": 2590 }, { "epoch": 0.79, "learning_rate": 1.1550842531236377e-05, "loss": 0.6969, "step": 2591 }, { "epoch": 0.79, "learning_rate": 1.1519461417849143e-05, "loss": 0.7668, "step": 2592 }, { "epoch": 0.79, "learning_rate": 1.1488117439505109e-05, "loss": 0.7268, "step": 2593 }, { "epoch": 0.79, "learning_rate": 1.1456810626452285e-05, "loss": 0.7523, "step": 2594 }, { "epoch": 0.79, "learning_rate": 1.1425541008902851e-05, "loss": 0.755, "step": 2595 }, { "epoch": 0.79, "learning_rate": 1.1394308617033045e-05, "loss": 0.7746, "step": 2596 }, { "epoch": 0.79, "learning_rate": 1.13631134809832e-05, "loss": 0.6957, "step": 2597 }, { "epoch": 0.79, "learning_rate": 1.1331955630857682e-05, "loss": 0.801, "step": 2598 }, { "epoch": 0.79, "learning_rate": 1.1300835096724916e-05, "loss": 0.7897, "step": 2599 }, { "epoch": 0.79, "learning_rate": 1.1269751908617277e-05, "loss": 0.7731, "step": 2600 }, { "epoch": 0.79, "learning_rate": 1.1238706096531105e-05, "loss": 0.7504, "step": 2601 }, { "epoch": 0.79, "learning_rate": 1.120769769042666e-05, "loss": 0.7567, "step": 2602 }, { "epoch": 0.79, "learning_rate": 1.1176726720228158e-05, "loss": 0.7418, "step": 2603 }, { "epoch": 0.79, "learning_rate": 1.1145793215823636e-05, "loss": 0.7431, "step": 2604 }, { "epoch": 0.79, "learning_rate": 1.1114897207064984e-05, "loss": 0.677, "step": 2605 }, { "epoch": 0.79, "learning_rate": 1.1084038723767898e-05, "loss": 0.7924, "step": 2606 }, { "epoch": 0.79, "learning_rate": 1.1053217795711906e-05, "loss": 0.6774, "step": 2607 }, { "epoch": 0.79, "learning_rate": 1.102243445264025e-05, "loss": 0.7391, "step": 2608 }, { "epoch": 0.79, "learning_rate": 1.0991688724259901e-05, "loss": 0.6866, "step": 2609 }, { "epoch": 0.79, "learning_rate": 1.0960980640241569e-05, "loss": 0.6967, "step": 2610 }, { "epoch": 0.79, "learning_rate": 1.0930310230219603e-05, "loss": 0.6763, "step": 2611 }, { "epoch": 0.79, "learning_rate": 1.089967752379199e-05, "loss": 0.7024, "step": 2612 }, { "epoch": 0.79, "learning_rate": 1.0869082550520365e-05, "loss": 0.822, "step": 2613 }, { "epoch": 0.79, "learning_rate": 1.0838525339929905e-05, "loss": 0.705, "step": 2614 }, { "epoch": 0.79, "learning_rate": 1.0808005921509406e-05, "loss": 0.781, "step": 2615 }, { "epoch": 0.79, "learning_rate": 1.077752432471113e-05, "loss": 0.7766, "step": 2616 }, { "epoch": 0.79, "learning_rate": 1.0747080578950875e-05, "loss": 0.7457, "step": 2617 }, { "epoch": 0.79, "learning_rate": 1.0716674713607883e-05, "loss": 0.7609, "step": 2618 }, { "epoch": 0.79, "learning_rate": 1.0686306758024888e-05, "loss": 0.7313, "step": 2619 }, { "epoch": 0.79, "learning_rate": 1.0655976741508e-05, "loss": 0.7415, "step": 2620 }, { "epoch": 0.79, "learning_rate": 1.0625684693326727e-05, "loss": 0.74, "step": 2621 }, { "epoch": 0.8, "learning_rate": 1.0595430642713927e-05, "loss": 0.7273, "step": 2622 }, { "epoch": 0.8, "learning_rate": 1.056521461886582e-05, "loss": 0.7685, "step": 2623 }, { "epoch": 0.8, "learning_rate": 1.0535036650941905e-05, "loss": 0.7228, "step": 2624 }, { "epoch": 0.8, "learning_rate": 1.0504896768064942e-05, "loss": 0.7092, "step": 2625 }, { "epoch": 0.8, "learning_rate": 1.0474794999320964e-05, "loss": 0.7419, "step": 2626 }, { "epoch": 0.8, "learning_rate": 1.044473137375922e-05, "loss": 0.8079, "step": 2627 }, { "epoch": 0.8, "learning_rate": 1.0414705920392137e-05, "loss": 0.7183, "step": 2628 }, { "epoch": 0.8, "learning_rate": 1.0384718668195315e-05, "loss": 0.801, "step": 2629 }, { "epoch": 0.8, "learning_rate": 1.035476964610746e-05, "loss": 0.7598, "step": 2630 }, { "epoch": 0.8, "learning_rate": 1.0324858883030442e-05, "loss": 0.7912, "step": 2631 }, { "epoch": 0.8, "learning_rate": 1.029498640782915e-05, "loss": 0.7714, "step": 2632 }, { "epoch": 0.8, "learning_rate": 1.0265152249331571e-05, "loss": 0.72, "step": 2633 }, { "epoch": 0.8, "learning_rate": 1.0235356436328675e-05, "loss": 0.7393, "step": 2634 }, { "epoch": 0.8, "learning_rate": 1.0205598997574462e-05, "loss": 0.7767, "step": 2635 }, { "epoch": 0.8, "learning_rate": 1.0175879961785884e-05, "loss": 0.7171, "step": 2636 }, { "epoch": 0.8, "learning_rate": 1.0146199357642826e-05, "loss": 0.7482, "step": 2637 }, { "epoch": 0.8, "learning_rate": 1.0116557213788075e-05, "loss": 0.7085, "step": 2638 }, { "epoch": 0.8, "learning_rate": 1.008695355882735e-05, "loss": 0.7254, "step": 2639 }, { "epoch": 0.8, "learning_rate": 1.0057388421329184e-05, "loss": 0.6834, "step": 2640 }, { "epoch": 0.8, "learning_rate": 1.0027861829824952e-05, "loss": 0.7554, "step": 2641 }, { "epoch": 0.8, "learning_rate": 9.998373812808815e-06, "loss": 0.7168, "step": 2642 }, { "epoch": 0.8, "learning_rate": 9.968924398737744e-06, "loss": 0.7266, "step": 2643 }, { "epoch": 0.8, "learning_rate": 9.939513616031437e-06, "loss": 0.7442, "step": 2644 }, { "epoch": 0.8, "learning_rate": 9.910141493072301e-06, "loss": 0.7514, "step": 2645 }, { "epoch": 0.8, "learning_rate": 9.880808058205426e-06, "loss": 0.6791, "step": 2646 }, { "epoch": 0.8, "learning_rate": 9.851513339738628e-06, "loss": 0.7246, "step": 2647 }, { "epoch": 0.8, "learning_rate": 9.822257365942294e-06, "loss": 0.6934, "step": 2648 }, { "epoch": 0.8, "learning_rate": 9.793040165049449e-06, "loss": 0.699, "step": 2649 }, { "epoch": 0.8, "learning_rate": 9.763861765255684e-06, "loss": 0.7557, "step": 2650 }, { "epoch": 0.8, "learning_rate": 9.734722194719175e-06, "loss": 0.724, "step": 2651 }, { "epoch": 0.8, "learning_rate": 9.7056214815606e-06, "loss": 0.7626, "step": 2652 }, { "epoch": 0.8, "learning_rate": 9.67655965386316e-06, "loss": 0.7635, "step": 2653 }, { "epoch": 0.8, "learning_rate": 9.647536739672497e-06, "loss": 0.7588, "step": 2654 }, { "epoch": 0.81, "learning_rate": 9.618552766996752e-06, "loss": 0.7781, "step": 2655 }, { "epoch": 0.81, "learning_rate": 9.589607763806435e-06, "loss": 0.7443, "step": 2656 }, { "epoch": 0.81, "learning_rate": 9.560701758034473e-06, "loss": 0.7424, "step": 2657 }, { "epoch": 0.81, "learning_rate": 9.531834777576143e-06, "loss": 0.7271, "step": 2658 }, { "epoch": 0.81, "learning_rate": 9.503006850289098e-06, "loss": 0.7771, "step": 2659 }, { "epoch": 0.81, "learning_rate": 9.474218003993273e-06, "loss": 0.7359, "step": 2660 }, { "epoch": 0.81, "learning_rate": 9.445468266470875e-06, "loss": 0.7309, "step": 2661 }, { "epoch": 0.81, "learning_rate": 9.416757665466414e-06, "loss": 0.7706, "step": 2662 }, { "epoch": 0.81, "learning_rate": 9.388086228686604e-06, "loss": 0.729, "step": 2663 }, { "epoch": 0.81, "learning_rate": 9.359453983800354e-06, "loss": 0.7821, "step": 2664 }, { "epoch": 0.81, "learning_rate": 9.330860958438764e-06, "loss": 0.7371, "step": 2665 }, { "epoch": 0.81, "learning_rate": 9.302307180195107e-06, "loss": 0.7276, "step": 2666 }, { "epoch": 0.81, "learning_rate": 9.273792676624749e-06, "loss": 0.7655, "step": 2667 }, { "epoch": 0.81, "learning_rate": 9.245317475245168e-06, "loss": 0.7211, "step": 2668 }, { "epoch": 0.81, "learning_rate": 9.2168816035359e-06, "loss": 0.6918, "step": 2669 }, { "epoch": 0.81, "learning_rate": 9.188485088938564e-06, "loss": 0.7082, "step": 2670 }, { "epoch": 0.81, "learning_rate": 9.160127958856757e-06, "loss": 0.7564, "step": 2671 }, { "epoch": 0.81, "learning_rate": 9.13181024065608e-06, "loss": 0.7571, "step": 2672 }, { "epoch": 0.81, "learning_rate": 9.103531961664118e-06, "loss": 0.7796, "step": 2673 }, { "epoch": 0.81, "learning_rate": 9.075293149170371e-06, "loss": 0.7456, "step": 2674 }, { "epoch": 0.81, "learning_rate": 9.047093830426285e-06, "loss": 0.7498, "step": 2675 }, { "epoch": 0.81, "learning_rate": 9.018934032645154e-06, "loss": 0.6756, "step": 2676 }, { "epoch": 0.81, "learning_rate": 8.99081378300215e-06, "loss": 0.7312, "step": 2677 }, { "epoch": 0.81, "learning_rate": 8.962733108634291e-06, "loss": 0.7383, "step": 2678 }, { "epoch": 0.81, "learning_rate": 8.934692036640396e-06, "loss": 0.7728, "step": 2679 }, { "epoch": 0.81, "learning_rate": 8.906690594081057e-06, "loss": 0.781, "step": 2680 }, { "epoch": 0.81, "learning_rate": 8.878728807978615e-06, "loss": 0.7451, "step": 2681 }, { "epoch": 0.81, "learning_rate": 8.850806705317183e-06, "loss": 0.7866, "step": 2682 }, { "epoch": 0.81, "learning_rate": 8.822924313042536e-06, "loss": 0.7679, "step": 2683 }, { "epoch": 0.81, "learning_rate": 8.795081658062143e-06, "loss": 0.7523, "step": 2684 }, { "epoch": 0.81, "learning_rate": 8.767278767245113e-06, "loss": 0.7907, "step": 2685 }, { "epoch": 0.81, "learning_rate": 8.73951566742221e-06, "loss": 0.7637, "step": 2686 }, { "epoch": 0.81, "learning_rate": 8.711792385385781e-06, "loss": 0.7189, "step": 2687 }, { "epoch": 0.82, "learning_rate": 8.68410894788973e-06, "loss": 0.7277, "step": 2688 }, { "epoch": 0.82, "learning_rate": 8.656465381649526e-06, "loss": 0.6983, "step": 2689 }, { "epoch": 0.82, "learning_rate": 8.62886171334218e-06, "loss": 0.7098, "step": 2690 }, { "epoch": 0.82, "learning_rate": 8.60129796960617e-06, "loss": 0.7174, "step": 2691 }, { "epoch": 0.82, "learning_rate": 8.57377417704145e-06, "loss": 0.7136, "step": 2692 }, { "epoch": 0.82, "learning_rate": 8.546290362209441e-06, "loss": 0.7992, "step": 2693 }, { "epoch": 0.82, "learning_rate": 8.518846551632953e-06, "loss": 0.771, "step": 2694 }, { "epoch": 0.82, "learning_rate": 8.491442771796231e-06, "loss": 0.7955, "step": 2695 }, { "epoch": 0.82, "learning_rate": 8.464079049144852e-06, "loss": 0.7895, "step": 2696 }, { "epoch": 0.82, "learning_rate": 8.436755410085733e-06, "loss": 0.7498, "step": 2697 }, { "epoch": 0.82, "learning_rate": 8.409471880987153e-06, "loss": 0.738, "step": 2698 }, { "epoch": 0.82, "learning_rate": 8.38222848817864e-06, "loss": 0.7731, "step": 2699 }, { "epoch": 0.82, "learning_rate": 8.355025257951016e-06, "loss": 0.7376, "step": 2700 }, { "epoch": 0.82, "learning_rate": 8.327862216556309e-06, "loss": 0.7118, "step": 2701 }, { "epoch": 0.82, "learning_rate": 8.300739390207818e-06, "loss": 0.7911, "step": 2702 }, { "epoch": 0.82, "learning_rate": 8.273656805079993e-06, "loss": 0.7464, "step": 2703 }, { "epoch": 0.82, "learning_rate": 8.246614487308463e-06, "loss": 0.8009, "step": 2704 }, { "epoch": 0.82, "learning_rate": 8.219612462989967e-06, "loss": 0.7809, "step": 2705 }, { "epoch": 0.82, "learning_rate": 8.19265075818243e-06, "loss": 0.763, "step": 2706 }, { "epoch": 0.82, "learning_rate": 8.165729398904803e-06, "loss": 0.7869, "step": 2707 }, { "epoch": 0.82, "learning_rate": 8.138848411137123e-06, "loss": 0.7892, "step": 2708 }, { "epoch": 0.82, "learning_rate": 8.112007820820461e-06, "loss": 0.7672, "step": 2709 }, { "epoch": 0.82, "learning_rate": 8.085207653856924e-06, "loss": 0.7795, "step": 2710 }, { "epoch": 0.82, "learning_rate": 8.058447936109598e-06, "loss": 0.7741, "step": 2711 }, { "epoch": 0.82, "learning_rate": 8.031728693402502e-06, "loss": 0.7681, "step": 2712 }, { "epoch": 0.82, "learning_rate": 8.005049951520666e-06, "loss": 0.7579, "step": 2713 }, { "epoch": 0.82, "learning_rate": 7.978411736209951e-06, "loss": 0.8054, "step": 2714 }, { "epoch": 0.82, "learning_rate": 7.951814073177188e-06, "loss": 0.6839, "step": 2715 }, { "epoch": 0.82, "learning_rate": 7.925256988090018e-06, "loss": 0.7503, "step": 2716 }, { "epoch": 0.82, "learning_rate": 7.898740506576935e-06, "loss": 0.7285, "step": 2717 }, { "epoch": 0.82, "learning_rate": 7.87226465422728e-06, "loss": 0.7285, "step": 2718 }, { "epoch": 0.82, "learning_rate": 7.845829456591141e-06, "loss": 0.7416, "step": 2719 }, { "epoch": 0.82, "learning_rate": 7.819434939179398e-06, "loss": 0.7245, "step": 2720 }, { "epoch": 0.83, "learning_rate": 7.793081127463659e-06, "loss": 0.7411, "step": 2721 }, { "epoch": 0.83, "learning_rate": 7.766768046876278e-06, "loss": 0.7968, "step": 2722 }, { "epoch": 0.83, "learning_rate": 7.740495722810271e-06, "loss": 0.7705, "step": 2723 }, { "epoch": 0.83, "learning_rate": 7.714264180619323e-06, "loss": 0.7177, "step": 2724 }, { "epoch": 0.83, "learning_rate": 7.688073445617799e-06, "loss": 0.7278, "step": 2725 }, { "epoch": 0.83, "learning_rate": 7.661923543080646e-06, "loss": 0.726, "step": 2726 }, { "epoch": 0.83, "learning_rate": 7.635814498243427e-06, "loss": 0.7271, "step": 2727 }, { "epoch": 0.83, "learning_rate": 7.609746336302242e-06, "loss": 0.7118, "step": 2728 }, { "epoch": 0.83, "learning_rate": 7.583719082413798e-06, "loss": 0.7519, "step": 2729 }, { "epoch": 0.83, "learning_rate": 7.55773276169528e-06, "loss": 0.76, "step": 2730 }, { "epoch": 0.83, "learning_rate": 7.531787399224372e-06, "loss": 0.713, "step": 2731 }, { "epoch": 0.83, "learning_rate": 7.505883020039239e-06, "loss": 0.7864, "step": 2732 }, { "epoch": 0.83, "learning_rate": 7.4800196491385144e-06, "loss": 0.7166, "step": 2733 }, { "epoch": 0.83, "learning_rate": 7.4541973114812145e-06, "loss": 0.7447, "step": 2734 }, { "epoch": 0.83, "learning_rate": 7.428416031986818e-06, "loss": 0.7631, "step": 2735 }, { "epoch": 0.83, "learning_rate": 7.402675835535111e-06, "loss": 0.7336, "step": 2736 }, { "epoch": 0.83, "learning_rate": 7.376976746966302e-06, "loss": 0.7798, "step": 2737 }, { "epoch": 0.83, "learning_rate": 7.35131879108088e-06, "loss": 0.7828, "step": 2738 }, { "epoch": 0.83, "learning_rate": 7.32570199263965e-06, "loss": 0.7289, "step": 2739 }, { "epoch": 0.83, "learning_rate": 7.300126376363697e-06, "loss": 0.7624, "step": 2740 }, { "epoch": 0.83, "learning_rate": 7.274591966934391e-06, "loss": 0.7515, "step": 2741 }, { "epoch": 0.83, "learning_rate": 7.249098788993308e-06, "loss": 0.7421, "step": 2742 }, { "epoch": 0.83, "learning_rate": 7.223646867142237e-06, "loss": 0.8432, "step": 2743 }, { "epoch": 0.83, "learning_rate": 7.19823622594315e-06, "loss": 0.7342, "step": 2744 }, { "epoch": 0.83, "learning_rate": 7.172866889918206e-06, "loss": 0.755, "step": 2745 }, { "epoch": 0.83, "learning_rate": 7.147538883549687e-06, "loss": 0.7655, "step": 2746 }, { "epoch": 0.83, "learning_rate": 7.1222522312799745e-06, "loss": 0.6858, "step": 2747 }, { "epoch": 0.83, "learning_rate": 7.097006957511559e-06, "loss": 0.7019, "step": 2748 }, { "epoch": 0.83, "learning_rate": 7.07180308660702e-06, "loss": 0.7671, "step": 2749 }, { "epoch": 0.83, "learning_rate": 7.046640642888941e-06, "loss": 0.7555, "step": 2750 }, { "epoch": 0.83, "learning_rate": 7.0215196506399515e-06, "loss": 0.7352, "step": 2751 }, { "epoch": 0.83, "learning_rate": 6.996440134102661e-06, "loss": 0.7441, "step": 2752 }, { "epoch": 0.84, "learning_rate": 6.9714021174796864e-06, "loss": 0.7281, "step": 2753 }, { "epoch": 0.84, "learning_rate": 6.94640562493355e-06, "loss": 0.7624, "step": 2754 }, { "epoch": 0.84, "learning_rate": 6.921450680586755e-06, "loss": 0.7565, "step": 2755 }, { "epoch": 0.84, "learning_rate": 6.896537308521656e-06, "loss": 0.7467, "step": 2756 }, { "epoch": 0.84, "learning_rate": 6.8716655327805355e-06, "loss": 0.803, "step": 2757 }, { "epoch": 0.84, "learning_rate": 6.846835377365502e-06, "loss": 0.7498, "step": 2758 }, { "epoch": 0.84, "learning_rate": 6.822046866238513e-06, "loss": 0.7592, "step": 2759 }, { "epoch": 0.84, "learning_rate": 6.7973000233213125e-06, "loss": 0.771, "step": 2760 }, { "epoch": 0.84, "learning_rate": 6.772594872495492e-06, "loss": 0.7103, "step": 2761 }, { "epoch": 0.84, "learning_rate": 6.747931437602345e-06, "loss": 0.7579, "step": 2762 }, { "epoch": 0.84, "learning_rate": 6.723309742442951e-06, "loss": 0.7638, "step": 2763 }, { "epoch": 0.84, "learning_rate": 6.698729810778065e-06, "loss": 0.7429, "step": 2764 }, { "epoch": 0.84, "learning_rate": 6.6741916663282e-06, "loss": 0.7522, "step": 2765 }, { "epoch": 0.84, "learning_rate": 6.649695332773498e-06, "loss": 0.7539, "step": 2766 }, { "epoch": 0.84, "learning_rate": 6.625240833753754e-06, "loss": 0.7013, "step": 2767 }, { "epoch": 0.84, "learning_rate": 6.600828192868402e-06, "loss": 0.7216, "step": 2768 }, { "epoch": 0.84, "learning_rate": 6.576457433676492e-06, "loss": 0.7579, "step": 2769 }, { "epoch": 0.84, "learning_rate": 6.552128579696642e-06, "loss": 0.7385, "step": 2770 }, { "epoch": 0.84, "learning_rate": 6.527841654407024e-06, "loss": 0.715, "step": 2771 }, { "epoch": 0.84, "learning_rate": 6.50359668124535e-06, "loss": 0.7709, "step": 2772 }, { "epoch": 0.84, "learning_rate": 6.4793936836088755e-06, "loss": 0.7259, "step": 2773 }, { "epoch": 0.84, "learning_rate": 6.4552326848543064e-06, "loss": 0.7519, "step": 2774 }, { "epoch": 0.84, "learning_rate": 6.4311137082978535e-06, "loss": 0.7643, "step": 2775 }, { "epoch": 0.84, "learning_rate": 6.407036777215136e-06, "loss": 0.7636, "step": 2776 }, { "epoch": 0.84, "learning_rate": 6.3830019148412525e-06, "loss": 0.7314, "step": 2777 }, { "epoch": 0.84, "learning_rate": 6.359009144370648e-06, "loss": 0.7759, "step": 2778 }, { "epoch": 0.84, "learning_rate": 6.335058488957179e-06, "loss": 0.7847, "step": 2779 }, { "epoch": 0.84, "learning_rate": 6.311149971714042e-06, "loss": 0.7507, "step": 2780 }, { "epoch": 0.84, "learning_rate": 6.287283615713796e-06, "loss": 0.7468, "step": 2781 }, { "epoch": 0.84, "learning_rate": 6.263459443988284e-06, "loss": 0.6938, "step": 2782 }, { "epoch": 0.84, "learning_rate": 6.239677479528649e-06, "loss": 0.7414, "step": 2783 }, { "epoch": 0.84, "learning_rate": 6.215937745285305e-06, "loss": 0.754, "step": 2784 }, { "epoch": 0.84, "learning_rate": 6.192240264167925e-06, "loss": 0.7741, "step": 2785 }, { "epoch": 0.85, "learning_rate": 6.168585059045379e-06, "loss": 0.7031, "step": 2786 }, { "epoch": 0.85, "learning_rate": 6.1449721527457535e-06, "loss": 0.7223, "step": 2787 }, { "epoch": 0.85, "learning_rate": 6.121401568056329e-06, "loss": 0.7129, "step": 2788 }, { "epoch": 0.85, "learning_rate": 6.097873327723519e-06, "loss": 0.7337, "step": 2789 }, { "epoch": 0.85, "learning_rate": 6.07438745445289e-06, "loss": 0.7603, "step": 2790 }, { "epoch": 0.85, "learning_rate": 6.050943970909101e-06, "loss": 0.7421, "step": 2791 }, { "epoch": 0.85, "learning_rate": 6.027542899715949e-06, "loss": 0.7836, "step": 2792 }, { "epoch": 0.85, "learning_rate": 6.004184263456253e-06, "loss": 0.7682, "step": 2793 }, { "epoch": 0.85, "learning_rate": 5.980868084671892e-06, "loss": 0.73, "step": 2794 }, { "epoch": 0.85, "learning_rate": 5.957594385863807e-06, "loss": 0.7076, "step": 2795 }, { "epoch": 0.85, "learning_rate": 5.934363189491893e-06, "loss": 0.7694, "step": 2796 }, { "epoch": 0.85, "learning_rate": 5.911174517975076e-06, "loss": 0.6999, "step": 2797 }, { "epoch": 0.85, "learning_rate": 5.888028393691209e-06, "loss": 0.7382, "step": 2798 }, { "epoch": 0.85, "learning_rate": 5.864924838977087e-06, "loss": 0.7492, "step": 2799 }, { "epoch": 0.85, "learning_rate": 5.841863876128456e-06, "loss": 0.721, "step": 2800 }, { "epoch": 0.85, "learning_rate": 5.81884552739993e-06, "loss": 0.7424, "step": 2801 }, { "epoch": 0.85, "learning_rate": 5.795869815005006e-06, "loss": 0.7626, "step": 2802 }, { "epoch": 0.85, "learning_rate": 5.772936761116027e-06, "loss": 0.6982, "step": 2803 }, { "epoch": 0.85, "learning_rate": 5.7500463878642e-06, "loss": 0.7291, "step": 2804 }, { "epoch": 0.85, "learning_rate": 5.727198717339511e-06, "loss": 0.7114, "step": 2805 }, { "epoch": 0.85, "learning_rate": 5.704393771590744e-06, "loss": 0.7567, "step": 2806 }, { "epoch": 0.85, "learning_rate": 5.681631572625451e-06, "loss": 0.7939, "step": 2807 }, { "epoch": 0.85, "learning_rate": 5.658912142409956e-06, "loss": 0.7496, "step": 2808 }, { "epoch": 0.85, "learning_rate": 5.636235502869286e-06, "loss": 0.7341, "step": 2809 }, { "epoch": 0.85, "learning_rate": 5.613601675887165e-06, "loss": 0.7384, "step": 2810 }, { "epoch": 0.85, "learning_rate": 5.591010683306014e-06, "loss": 0.7116, "step": 2811 }, { "epoch": 0.85, "learning_rate": 5.568462546926939e-06, "loss": 0.7468, "step": 2812 }, { "epoch": 0.85, "learning_rate": 5.545957288509651e-06, "loss": 0.7744, "step": 2813 }, { "epoch": 0.85, "learning_rate": 5.523494929772488e-06, "loss": 0.733, "step": 2814 }, { "epoch": 0.85, "learning_rate": 5.501075492392427e-06, "loss": 0.7559, "step": 2815 }, { "epoch": 0.85, "learning_rate": 5.478698998004967e-06, "loss": 0.7558, "step": 2816 }, { "epoch": 0.85, "learning_rate": 5.456365468204222e-06, "loss": 0.7455, "step": 2817 }, { "epoch": 0.85, "learning_rate": 5.4340749245428036e-06, "loss": 0.721, "step": 2818 }, { "epoch": 0.86, "learning_rate": 5.411827388531837e-06, "loss": 0.7762, "step": 2819 }, { "epoch": 0.86, "learning_rate": 5.389622881640993e-06, "loss": 0.7174, "step": 2820 }, { "epoch": 0.86, "learning_rate": 5.367461425298359e-06, "loss": 0.8125, "step": 2821 }, { "epoch": 0.86, "learning_rate": 5.345343040890516e-06, "loss": 0.7312, "step": 2822 }, { "epoch": 0.86, "learning_rate": 5.323267749762451e-06, "loss": 0.7307, "step": 2823 }, { "epoch": 0.86, "learning_rate": 5.301235573217595e-06, "loss": 0.7442, "step": 2824 }, { "epoch": 0.86, "learning_rate": 5.2792465325177455e-06, "loss": 0.7607, "step": 2825 }, { "epoch": 0.86, "learning_rate": 5.257300648883096e-06, "loss": 0.7263, "step": 2826 }, { "epoch": 0.86, "learning_rate": 5.235397943492154e-06, "loss": 0.773, "step": 2827 }, { "epoch": 0.86, "learning_rate": 5.213538437481807e-06, "loss": 0.7741, "step": 2828 }, { "epoch": 0.86, "learning_rate": 5.191722151947226e-06, "loss": 0.7425, "step": 2829 }, { "epoch": 0.86, "learning_rate": 5.169949107941874e-06, "loss": 0.6804, "step": 2830 }, { "epoch": 0.86, "learning_rate": 5.148219326477472e-06, "loss": 0.7185, "step": 2831 }, { "epoch": 0.86, "learning_rate": 5.126532828524033e-06, "loss": 0.7448, "step": 2832 }, { "epoch": 0.86, "learning_rate": 5.104889635009763e-06, "loss": 0.7666, "step": 2833 }, { "epoch": 0.86, "learning_rate": 5.083289766821075e-06, "loss": 0.7842, "step": 2834 }, { "epoch": 0.86, "learning_rate": 5.061733244802608e-06, "loss": 0.7425, "step": 2835 }, { "epoch": 0.86, "learning_rate": 5.040220089757125e-06, "loss": 0.7258, "step": 2836 }, { "epoch": 0.86, "learning_rate": 5.018750322445581e-06, "loss": 0.7602, "step": 2837 }, { "epoch": 0.86, "learning_rate": 4.9973239635870305e-06, "loss": 0.7392, "step": 2838 }, { "epoch": 0.86, "learning_rate": 4.975941033858639e-06, "loss": 0.7136, "step": 2839 }, { "epoch": 0.86, "learning_rate": 4.954601553895682e-06, "loss": 0.7754, "step": 2840 }, { "epoch": 0.86, "learning_rate": 4.933305544291483e-06, "loss": 0.8121, "step": 2841 }, { "epoch": 0.86, "learning_rate": 4.912053025597429e-06, "loss": 0.7862, "step": 2842 }, { "epoch": 0.86, "learning_rate": 4.890844018322921e-06, "loss": 0.7376, "step": 2843 }, { "epoch": 0.86, "learning_rate": 4.869678542935397e-06, "loss": 0.7308, "step": 2844 }, { "epoch": 0.86, "learning_rate": 4.848556619860262e-06, "loss": 0.7865, "step": 2845 }, { "epoch": 0.86, "learning_rate": 4.827478269480895e-06, "loss": 0.7506, "step": 2846 }, { "epoch": 0.86, "learning_rate": 4.80644351213862e-06, "loss": 0.7129, "step": 2847 }, { "epoch": 0.86, "learning_rate": 4.78545236813272e-06, "loss": 0.7873, "step": 2848 }, { "epoch": 0.86, "learning_rate": 4.764504857720364e-06, "loss": 0.7774, "step": 2849 }, { "epoch": 0.86, "learning_rate": 4.74360100111661e-06, "loss": 0.732, "step": 2850 }, { "epoch": 0.86, "learning_rate": 4.722740818494414e-06, "loss": 0.7129, "step": 2851 }, { "epoch": 0.87, "learning_rate": 4.701924329984564e-06, "loss": 0.7277, "step": 2852 }, { "epoch": 0.87, "learning_rate": 4.681151555675689e-06, "loss": 0.7862, "step": 2853 }, { "epoch": 0.87, "learning_rate": 4.660422515614216e-06, "loss": 0.7795, "step": 2854 }, { "epoch": 0.87, "learning_rate": 4.639737229804403e-06, "loss": 0.7131, "step": 2855 }, { "epoch": 0.87, "learning_rate": 4.619095718208255e-06, "loss": 0.7192, "step": 2856 }, { "epoch": 0.87, "learning_rate": 4.598498000745527e-06, "loss": 0.6739, "step": 2857 }, { "epoch": 0.87, "learning_rate": 4.577944097293746e-06, "loss": 0.6864, "step": 2858 }, { "epoch": 0.87, "learning_rate": 4.557434027688118e-06, "loss": 0.7783, "step": 2859 }, { "epoch": 0.87, "learning_rate": 4.5369678117215765e-06, "loss": 0.7408, "step": 2860 }, { "epoch": 0.87, "learning_rate": 4.516545469144729e-06, "loss": 0.7019, "step": 2861 }, { "epoch": 0.87, "learning_rate": 4.496167019665815e-06, "loss": 0.7042, "step": 2862 }, { "epoch": 0.87, "learning_rate": 4.475832482950765e-06, "loss": 0.8067, "step": 2863 }, { "epoch": 0.87, "learning_rate": 4.455541878623098e-06, "loss": 0.7562, "step": 2864 }, { "epoch": 0.87, "learning_rate": 4.435295226263941e-06, "loss": 0.6954, "step": 2865 }, { "epoch": 0.87, "learning_rate": 4.415092545412003e-06, "loss": 0.7379, "step": 2866 }, { "epoch": 0.87, "learning_rate": 4.394933855563582e-06, "loss": 0.7312, "step": 2867 }, { "epoch": 0.87, "learning_rate": 4.374819176172501e-06, "loss": 0.7524, "step": 2868 }, { "epoch": 0.87, "learning_rate": 4.354748526650116e-06, "loss": 0.7217, "step": 2869 }, { "epoch": 0.87, "learning_rate": 4.3347219263652875e-06, "loss": 0.7501, "step": 2870 }, { "epoch": 0.87, "learning_rate": 4.3147393946443845e-06, "loss": 0.7356, "step": 2871 }, { "epoch": 0.87, "learning_rate": 4.294800950771233e-06, "loss": 0.7437, "step": 2872 }, { "epoch": 0.87, "learning_rate": 4.274906613987123e-06, "loss": 0.8085, "step": 2873 }, { "epoch": 0.87, "learning_rate": 4.255056403490754e-06, "loss": 0.7655, "step": 2874 }, { "epoch": 0.87, "learning_rate": 4.2352503384382845e-06, "loss": 0.7075, "step": 2875 }, { "epoch": 0.87, "learning_rate": 4.215488437943238e-06, "loss": 0.742, "step": 2876 }, { "epoch": 0.87, "learning_rate": 4.195770721076525e-06, "loss": 0.7934, "step": 2877 }, { "epoch": 0.87, "learning_rate": 4.176097206866431e-06, "loss": 0.8113, "step": 2878 }, { "epoch": 0.87, "learning_rate": 4.1564679142985615e-06, "loss": 0.7103, "step": 2879 }, { "epoch": 0.87, "learning_rate": 4.136882862315871e-06, "loss": 0.8049, "step": 2880 }, { "epoch": 0.87, "learning_rate": 4.117342069818603e-06, "loss": 0.7682, "step": 2881 }, { "epoch": 0.87, "learning_rate": 4.097845555664287e-06, "loss": 0.7235, "step": 2882 }, { "epoch": 0.87, "learning_rate": 4.0783933386677465e-06, "loss": 0.8004, "step": 2883 }, { "epoch": 0.87, "learning_rate": 4.058985437601026e-06, "loss": 0.679, "step": 2884 }, { "epoch": 0.88, "learning_rate": 4.039621871193427e-06, "loss": 0.7325, "step": 2885 }, { "epoch": 0.88, "learning_rate": 4.020302658131442e-06, "loss": 0.7433, "step": 2886 }, { "epoch": 0.88, "learning_rate": 4.001027817058789e-06, "loss": 0.7154, "step": 2887 }, { "epoch": 0.88, "learning_rate": 3.9817973665763565e-06, "loss": 0.7923, "step": 2888 }, { "epoch": 0.88, "learning_rate": 3.962611325242177e-06, "loss": 0.808, "step": 2889 }, { "epoch": 0.88, "learning_rate": 3.943469711571446e-06, "loss": 0.6454, "step": 2890 }, { "epoch": 0.88, "learning_rate": 3.924372544036481e-06, "loss": 0.807, "step": 2891 }, { "epoch": 0.88, "learning_rate": 3.905319841066707e-06, "loss": 0.6836, "step": 2892 }, { "epoch": 0.88, "learning_rate": 3.886311621048638e-06, "loss": 0.8221, "step": 2893 }, { "epoch": 0.88, "learning_rate": 3.8673479023258465e-06, "loss": 0.7367, "step": 2894 }, { "epoch": 0.88, "learning_rate": 3.848428703198987e-06, "loss": 0.687, "step": 2895 }, { "epoch": 0.88, "learning_rate": 3.829554041925743e-06, "loss": 0.7311, "step": 2896 }, { "epoch": 0.88, "learning_rate": 3.8107239367207893e-06, "loss": 0.7198, "step": 2897 }, { "epoch": 0.88, "learning_rate": 3.791938405755846e-06, "loss": 0.783, "step": 2898 }, { "epoch": 0.88, "learning_rate": 3.773197467159584e-06, "loss": 0.7452, "step": 2899 }, { "epoch": 0.88, "learning_rate": 3.7545011390176634e-06, "loss": 0.7369, "step": 2900 }, { "epoch": 0.88, "learning_rate": 3.735849439372685e-06, "loss": 0.7738, "step": 2901 }, { "epoch": 0.88, "learning_rate": 3.717242386224162e-06, "loss": 0.7846, "step": 2902 }, { "epoch": 0.88, "learning_rate": 3.698679997528559e-06, "loss": 0.6895, "step": 2903 }, { "epoch": 0.88, "learning_rate": 3.6801622911992184e-06, "loss": 0.7443, "step": 2904 }, { "epoch": 0.88, "learning_rate": 3.6616892851063624e-06, "loss": 0.7319, "step": 2905 }, { "epoch": 0.88, "learning_rate": 3.6432609970770637e-06, "loss": 0.7905, "step": 2906 }, { "epoch": 0.88, "learning_rate": 3.6248774448952695e-06, "loss": 0.7462, "step": 2907 }, { "epoch": 0.88, "learning_rate": 3.606538646301738e-06, "loss": 0.6949, "step": 2908 }, { "epoch": 0.88, "learning_rate": 3.5882446189940356e-06, "loss": 0.7349, "step": 2909 }, { "epoch": 0.88, "learning_rate": 3.5699953806265175e-06, "loss": 0.7807, "step": 2910 }, { "epoch": 0.88, "learning_rate": 3.551790948810352e-06, "loss": 0.7887, "step": 2911 }, { "epoch": 0.88, "learning_rate": 3.5336313411134304e-06, "loss": 0.7891, "step": 2912 }, { "epoch": 0.88, "learning_rate": 3.51551657506039e-06, "loss": 0.7355, "step": 2913 }, { "epoch": 0.88, "learning_rate": 3.497446668132609e-06, "loss": 0.7712, "step": 2914 }, { "epoch": 0.88, "learning_rate": 3.4794216377681764e-06, "loss": 0.7548, "step": 2915 }, { "epoch": 0.88, "learning_rate": 3.4614415013618564e-06, "loss": 0.7854, "step": 2916 }, { "epoch": 0.88, "learning_rate": 3.443506276265096e-06, "loss": 0.7198, "step": 2917 }, { "epoch": 0.89, "learning_rate": 3.4256159797860177e-06, "loss": 0.7678, "step": 2918 }, { "epoch": 0.89, "learning_rate": 3.407770629189361e-06, "loss": 0.7104, "step": 2919 }, { "epoch": 0.89, "learning_rate": 3.389970241696516e-06, "loss": 0.7638, "step": 2920 }, { "epoch": 0.89, "learning_rate": 3.372214834485449e-06, "loss": 0.7765, "step": 2921 }, { "epoch": 0.89, "learning_rate": 3.354504424690763e-06, "loss": 0.7217, "step": 2922 }, { "epoch": 0.89, "learning_rate": 3.336839029403599e-06, "loss": 0.7562, "step": 2923 }, { "epoch": 0.89, "learning_rate": 3.3192186656716694e-06, "loss": 0.7535, "step": 2924 }, { "epoch": 0.89, "learning_rate": 3.3016433504992283e-06, "loss": 0.7288, "step": 2925 }, { "epoch": 0.89, "learning_rate": 3.284113100847075e-06, "loss": 0.784, "step": 2926 }, { "epoch": 0.89, "learning_rate": 3.2666279336324877e-06, "loss": 0.7217, "step": 2927 }, { "epoch": 0.89, "learning_rate": 3.249187865729264e-06, "loss": 0.7626, "step": 2928 }, { "epoch": 0.89, "learning_rate": 3.2317929139676574e-06, "loss": 0.7255, "step": 2929 }, { "epoch": 0.89, "learning_rate": 3.214443095134406e-06, "loss": 0.7247, "step": 2930 }, { "epoch": 0.89, "learning_rate": 3.197138425972679e-06, "loss": 0.7005, "step": 2931 }, { "epoch": 0.89, "learning_rate": 3.179878923182078e-06, "loss": 0.7536, "step": 2932 }, { "epoch": 0.89, "learning_rate": 3.162664603418608e-06, "loss": 0.7344, "step": 2933 }, { "epoch": 0.89, "learning_rate": 3.1454954832946913e-06, "loss": 0.7839, "step": 2934 }, { "epoch": 0.89, "learning_rate": 3.128371579379119e-06, "loss": 0.7143, "step": 2935 }, { "epoch": 0.89, "learning_rate": 3.1112929081970388e-06, "loss": 0.7555, "step": 2936 }, { "epoch": 0.89, "learning_rate": 3.094259486229956e-06, "loss": 0.7352, "step": 2937 }, { "epoch": 0.89, "learning_rate": 3.0772713299157153e-06, "loss": 0.7086, "step": 2938 }, { "epoch": 0.89, "learning_rate": 3.0603284556484644e-06, "loss": 0.7776, "step": 2939 }, { "epoch": 0.89, "learning_rate": 3.0434308797786747e-06, "loss": 0.7759, "step": 2940 }, { "epoch": 0.89, "learning_rate": 3.026578618613063e-06, "loss": 0.7728, "step": 2941 }, { "epoch": 0.89, "learning_rate": 3.00977168841467e-06, "loss": 0.7853, "step": 2942 }, { "epoch": 0.89, "learning_rate": 2.993010105402744e-06, "loss": 0.7, "step": 2943 }, { "epoch": 0.89, "learning_rate": 2.9762938857527954e-06, "loss": 0.7382, "step": 2944 }, { "epoch": 0.89, "learning_rate": 2.959623045596538e-06, "loss": 0.7221, "step": 2945 }, { "epoch": 0.89, "learning_rate": 2.942997601021924e-06, "loss": 0.7245, "step": 2946 }, { "epoch": 0.89, "learning_rate": 2.92641756807307e-06, "loss": 0.7239, "step": 2947 }, { "epoch": 0.89, "learning_rate": 2.9098829627502765e-06, "loss": 0.7332, "step": 2948 }, { "epoch": 0.89, "learning_rate": 2.8933938010100026e-06, "loss": 0.7284, "step": 2949 }, { "epoch": 0.89, "learning_rate": 2.8769500987648743e-06, "loss": 0.7384, "step": 2950 }, { "epoch": 0.9, "learning_rate": 2.8605518718836157e-06, "loss": 0.7026, "step": 2951 }, { "epoch": 0.9, "learning_rate": 2.8441991361910835e-06, "loss": 0.7427, "step": 2952 }, { "epoch": 0.9, "learning_rate": 2.8278919074682166e-06, "loss": 0.7203, "step": 2953 }, { "epoch": 0.9, "learning_rate": 2.8116302014520755e-06, "loss": 0.7259, "step": 2954 }, { "epoch": 0.9, "learning_rate": 2.7954140338357525e-06, "loss": 0.76, "step": 2955 }, { "epoch": 0.9, "learning_rate": 2.7792434202684124e-06, "loss": 0.7468, "step": 2956 }, { "epoch": 0.9, "learning_rate": 2.7631183763552393e-06, "loss": 0.7091, "step": 2957 }, { "epoch": 0.9, "learning_rate": 2.747038917657474e-06, "loss": 0.7859, "step": 2958 }, { "epoch": 0.9, "learning_rate": 2.731005059692332e-06, "loss": 0.7538, "step": 2959 }, { "epoch": 0.9, "learning_rate": 2.715016817933047e-06, "loss": 0.7601, "step": 2960 }, { "epoch": 0.9, "learning_rate": 2.699074207808816e-06, "loss": 0.662, "step": 2961 }, { "epoch": 0.9, "learning_rate": 2.683177244704821e-06, "loss": 0.7342, "step": 2962 }, { "epoch": 0.9, "learning_rate": 2.667325943962162e-06, "loss": 0.7957, "step": 2963 }, { "epoch": 0.9, "learning_rate": 2.651520320877893e-06, "loss": 0.6909, "step": 2964 }, { "epoch": 0.9, "learning_rate": 2.6357603907049833e-06, "loss": 0.7098, "step": 2965 }, { "epoch": 0.9, "learning_rate": 2.6200461686523124e-06, "loss": 0.7179, "step": 2966 }, { "epoch": 0.9, "learning_rate": 2.6043776698846444e-06, "loss": 0.7452, "step": 2967 }, { "epoch": 0.9, "learning_rate": 2.588754909522617e-06, "loss": 0.6976, "step": 2968 }, { "epoch": 0.9, "learning_rate": 2.573177902642726e-06, "loss": 0.6459, "step": 2969 }, { "epoch": 0.9, "learning_rate": 2.5576466642773243e-06, "loss": 0.7191, "step": 2970 }, { "epoch": 0.9, "learning_rate": 2.542161209414601e-06, "loss": 0.7664, "step": 2971 }, { "epoch": 0.9, "learning_rate": 2.5267215529985342e-06, "loss": 0.7474, "step": 2972 }, { "epoch": 0.9, "learning_rate": 2.5113277099289288e-06, "loss": 0.753, "step": 2973 }, { "epoch": 0.9, "learning_rate": 2.4959796950613845e-06, "loss": 0.746, "step": 2974 }, { "epoch": 0.9, "learning_rate": 2.4806775232072534e-06, "loss": 0.7083, "step": 2975 }, { "epoch": 0.9, "learning_rate": 2.4654212091336558e-06, "loss": 0.7351, "step": 2976 }, { "epoch": 0.9, "learning_rate": 2.45021076756346e-06, "loss": 0.7475, "step": 2977 }, { "epoch": 0.9, "learning_rate": 2.4350462131752738e-06, "loss": 0.7315, "step": 2978 }, { "epoch": 0.9, "learning_rate": 2.4199275606033966e-06, "loss": 0.7439, "step": 2979 }, { "epoch": 0.9, "learning_rate": 2.404854824437874e-06, "loss": 0.7153, "step": 2980 }, { "epoch": 0.9, "learning_rate": 2.3898280192243884e-06, "loss": 0.7946, "step": 2981 }, { "epoch": 0.9, "learning_rate": 2.374847159464344e-06, "loss": 0.7352, "step": 2982 }, { "epoch": 0.9, "learning_rate": 2.359912259614777e-06, "loss": 0.7985, "step": 2983 }, { "epoch": 0.91, "learning_rate": 2.3450233340883753e-06, "loss": 0.7844, "step": 2984 }, { "epoch": 0.91, "learning_rate": 2.330180397253473e-06, "loss": 0.7514, "step": 2985 }, { "epoch": 0.91, "learning_rate": 2.3153834634340067e-06, "loss": 0.7592, "step": 2986 }, { "epoch": 0.91, "learning_rate": 2.300632546909537e-06, "loss": 0.7574, "step": 2987 }, { "epoch": 0.91, "learning_rate": 2.285927661915188e-06, "loss": 0.741, "step": 2988 }, { "epoch": 0.91, "learning_rate": 2.2712688226416977e-06, "loss": 0.7731, "step": 2989 }, { "epoch": 0.91, "learning_rate": 2.256656043235339e-06, "loss": 0.7273, "step": 2990 }, { "epoch": 0.91, "learning_rate": 2.2420893377979536e-06, "loss": 0.7251, "step": 2991 }, { "epoch": 0.91, "learning_rate": 2.2275687203869023e-06, "loss": 0.7476, "step": 2992 }, { "epoch": 0.91, "learning_rate": 2.213094205015098e-06, "loss": 0.752, "step": 2993 }, { "epoch": 0.91, "learning_rate": 2.198665805650929e-06, "loss": 0.7546, "step": 2994 }, { "epoch": 0.91, "learning_rate": 2.1842835362183066e-06, "loss": 0.75, "step": 2995 }, { "epoch": 0.91, "learning_rate": 2.169947410596607e-06, "loss": 0.7715, "step": 2996 }, { "epoch": 0.91, "learning_rate": 2.1556574426206955e-06, "loss": 0.7078, "step": 2997 }, { "epoch": 0.91, "learning_rate": 2.1414136460808807e-06, "loss": 0.8013, "step": 2998 }, { "epoch": 0.91, "learning_rate": 2.1272160347229008e-06, "loss": 0.7556, "step": 2999 }, { "epoch": 0.91, "learning_rate": 2.1130646222479565e-06, "loss": 0.7319, "step": 3000 }, { "epoch": 0.91, "learning_rate": 2.098959422312641e-06, "loss": 0.7703, "step": 3001 }, { "epoch": 0.91, "learning_rate": 2.084900448528959e-06, "loss": 0.7317, "step": 3002 }, { "epoch": 0.91, "learning_rate": 2.0708877144643092e-06, "loss": 0.7342, "step": 3003 }, { "epoch": 0.91, "learning_rate": 2.0569212336414458e-06, "loss": 0.7582, "step": 3004 }, { "epoch": 0.91, "learning_rate": 2.0430010195385153e-06, "loss": 0.7768, "step": 3005 }, { "epoch": 0.91, "learning_rate": 2.0291270855890056e-06, "loss": 0.7307, "step": 3006 }, { "epoch": 0.91, "learning_rate": 2.0152994451817343e-06, "loss": 0.7174, "step": 3007 }, { "epoch": 0.91, "learning_rate": 2.0015181116608496e-06, "loss": 0.7805, "step": 3008 }, { "epoch": 0.91, "learning_rate": 1.9877830983258126e-06, "loss": 0.6806, "step": 3009 }, { "epoch": 0.91, "learning_rate": 1.974094418431388e-06, "loss": 0.7005, "step": 3010 }, { "epoch": 0.91, "learning_rate": 1.9604520851876198e-06, "loss": 0.7729, "step": 3011 }, { "epoch": 0.91, "learning_rate": 1.9468561117598204e-06, "loss": 0.7359, "step": 3012 }, { "epoch": 0.91, "learning_rate": 1.9333065112685846e-06, "loss": 0.7842, "step": 3013 }, { "epoch": 0.91, "learning_rate": 1.9198032967897404e-06, "loss": 0.7096, "step": 3014 }, { "epoch": 0.91, "learning_rate": 1.9063464813543541e-06, "loss": 0.7561, "step": 3015 }, { "epoch": 0.91, "learning_rate": 1.8929360779487038e-06, "loss": 0.79, "step": 3016 }, { "epoch": 0.92, "learning_rate": 1.879572099514304e-06, "loss": 0.7918, "step": 3017 }, { "epoch": 0.92, "learning_rate": 1.8662545589478552e-06, "loss": 0.7324, "step": 3018 }, { "epoch": 0.92, "learning_rate": 1.8529834691012217e-06, "loss": 0.6975, "step": 3019 }, { "epoch": 0.92, "learning_rate": 1.839758842781486e-06, "loss": 0.703, "step": 3020 }, { "epoch": 0.92, "learning_rate": 1.8265806927508511e-06, "loss": 0.7823, "step": 3021 }, { "epoch": 0.92, "learning_rate": 1.8134490317266994e-06, "loss": 0.7987, "step": 3022 }, { "epoch": 0.92, "learning_rate": 1.800363872381522e-06, "loss": 0.7075, "step": 3023 }, { "epoch": 0.92, "learning_rate": 1.7873252273429509e-06, "loss": 0.737, "step": 3024 }, { "epoch": 0.92, "learning_rate": 1.7743331091937321e-06, "loss": 0.7865, "step": 3025 }, { "epoch": 0.92, "learning_rate": 1.7613875304717032e-06, "loss": 0.7342, "step": 3026 }, { "epoch": 0.92, "learning_rate": 1.748488503669793e-06, "loss": 0.7484, "step": 3027 }, { "epoch": 0.92, "learning_rate": 1.7356360412359996e-06, "loss": 0.7432, "step": 3028 }, { "epoch": 0.92, "learning_rate": 1.7228301555734016e-06, "loss": 0.7548, "step": 3029 }, { "epoch": 0.92, "learning_rate": 1.7100708590401193e-06, "loss": 0.703, "step": 3030 }, { "epoch": 0.92, "learning_rate": 1.6973581639493086e-06, "loss": 0.7697, "step": 3031 }, { "epoch": 0.92, "learning_rate": 1.6846920825691503e-06, "loss": 0.7861, "step": 3032 }, { "epoch": 0.92, "learning_rate": 1.6720726271228615e-06, "loss": 0.7059, "step": 3033 }, { "epoch": 0.92, "learning_rate": 1.6594998097886393e-06, "loss": 0.7972, "step": 3034 }, { "epoch": 0.92, "learning_rate": 1.6469736426997006e-06, "loss": 0.8121, "step": 3035 }, { "epoch": 0.92, "learning_rate": 1.6344941379442035e-06, "loss": 0.7169, "step": 3036 }, { "epoch": 0.92, "learning_rate": 1.6220613075653202e-06, "loss": 0.7438, "step": 3037 }, { "epoch": 0.92, "learning_rate": 1.6096751635611528e-06, "loss": 0.7103, "step": 3038 }, { "epoch": 0.92, "learning_rate": 1.5973357178847515e-06, "loss": 0.7325, "step": 3039 }, { "epoch": 0.92, "learning_rate": 1.5850429824441181e-06, "loss": 0.7685, "step": 3040 }, { "epoch": 0.92, "learning_rate": 1.5727969691021472e-06, "loss": 0.7115, "step": 3041 }, { "epoch": 0.92, "learning_rate": 1.5605976896766795e-06, "loss": 0.7341, "step": 3042 }, { "epoch": 0.92, "learning_rate": 1.5484451559404311e-06, "loss": 0.7527, "step": 3043 }, { "epoch": 0.92, "learning_rate": 1.5363393796210158e-06, "loss": 0.6805, "step": 3044 }, { "epoch": 0.92, "learning_rate": 1.5242803724009269e-06, "loss": 0.7682, "step": 3045 }, { "epoch": 0.92, "learning_rate": 1.5122681459175225e-06, "loss": 0.7392, "step": 3046 }, { "epoch": 0.92, "learning_rate": 1.500302711763002e-06, "loss": 0.7795, "step": 3047 }, { "epoch": 0.92, "learning_rate": 1.488384081484434e-06, "loss": 0.7362, "step": 3048 }, { "epoch": 0.92, "learning_rate": 1.4765122665837071e-06, "loss": 0.7571, "step": 3049 }, { "epoch": 0.93, "learning_rate": 1.464687278517518e-06, "loss": 0.7168, "step": 3050 }, { "epoch": 0.93, "learning_rate": 1.4529091286973995e-06, "loss": 0.787, "step": 3051 }, { "epoch": 0.93, "learning_rate": 1.4411778284896648e-06, "loss": 0.7679, "step": 3052 }, { "epoch": 0.93, "learning_rate": 1.429493389215425e-06, "loss": 0.7405, "step": 3053 }, { "epoch": 0.93, "learning_rate": 1.4178558221505712e-06, "loss": 0.7532, "step": 3054 }, { "epoch": 0.93, "learning_rate": 1.4062651385257364e-06, "loss": 0.7714, "step": 3055 }, { "epoch": 0.93, "learning_rate": 1.3947213495263512e-06, "loss": 0.7505, "step": 3056 }, { "epoch": 0.93, "learning_rate": 1.3832244662925599e-06, "loss": 0.7622, "step": 3057 }, { "epoch": 0.93, "learning_rate": 1.3717744999192538e-06, "loss": 0.7462, "step": 3058 }, { "epoch": 0.93, "learning_rate": 1.3603714614560337e-06, "loss": 0.7731, "step": 3059 }, { "epoch": 0.93, "learning_rate": 1.34901536190723e-06, "loss": 0.7379, "step": 3060 }, { "epoch": 0.93, "learning_rate": 1.3377062122318706e-06, "loss": 0.7548, "step": 3061 }, { "epoch": 0.93, "learning_rate": 1.3264440233436815e-06, "loss": 0.7697, "step": 3062 }, { "epoch": 0.93, "learning_rate": 1.3152288061110518e-06, "loss": 0.7503, "step": 3063 }, { "epoch": 0.93, "learning_rate": 1.3040605713570464e-06, "loss": 0.6872, "step": 3064 }, { "epoch": 0.93, "learning_rate": 1.2929393298594106e-06, "loss": 0.7417, "step": 3065 }, { "epoch": 0.93, "learning_rate": 1.2818650923505204e-06, "loss": 0.8047, "step": 3066 }, { "epoch": 0.93, "learning_rate": 1.2708378695173883e-06, "loss": 0.7507, "step": 3067 }, { "epoch": 0.93, "learning_rate": 1.2598576720016741e-06, "loss": 0.7526, "step": 3068 }, { "epoch": 0.93, "learning_rate": 1.2489245103996405e-06, "loss": 0.7115, "step": 3069 }, { "epoch": 0.93, "learning_rate": 1.2380383952621644e-06, "loss": 0.7395, "step": 3070 }, { "epoch": 0.93, "learning_rate": 1.22719933709472e-06, "loss": 0.7487, "step": 3071 }, { "epoch": 0.93, "learning_rate": 1.216407346357379e-06, "loss": 0.817, "step": 3072 }, { "epoch": 0.93, "learning_rate": 1.2056624334647716e-06, "loss": 0.7302, "step": 3073 }, { "epoch": 0.93, "learning_rate": 1.19496460878612e-06, "loss": 0.7366, "step": 3074 }, { "epoch": 0.93, "learning_rate": 1.1843138826451827e-06, "loss": 0.7569, "step": 3075 }, { "epoch": 0.93, "learning_rate": 1.1737102653202826e-06, "loss": 0.7921, "step": 3076 }, { "epoch": 0.93, "learning_rate": 1.1631537670442783e-06, "loss": 0.793, "step": 3077 }, { "epoch": 0.93, "learning_rate": 1.1526443980045487e-06, "loss": 0.7174, "step": 3078 }, { "epoch": 0.93, "learning_rate": 1.1421821683429924e-06, "loss": 0.7618, "step": 3079 }, { "epoch": 0.93, "learning_rate": 1.1317670881560272e-06, "loss": 0.7485, "step": 3080 }, { "epoch": 0.93, "learning_rate": 1.1213991674945635e-06, "loss": 0.792, "step": 3081 }, { "epoch": 0.93, "learning_rate": 1.1110784163639975e-06, "loss": 0.7571, "step": 3082 }, { "epoch": 0.94, "learning_rate": 1.1008048447242124e-06, "loss": 0.7881, "step": 3083 }, { "epoch": 0.94, "learning_rate": 1.0905784624895554e-06, "loss": 0.7587, "step": 3084 }, { "epoch": 0.94, "learning_rate": 1.0803992795288432e-06, "loss": 0.7713, "step": 3085 }, { "epoch": 0.94, "learning_rate": 1.0702673056653345e-06, "loss": 0.7673, "step": 3086 }, { "epoch": 0.94, "learning_rate": 1.060182550676725e-06, "loss": 0.7301, "step": 3087 }, { "epoch": 0.94, "learning_rate": 1.050145024295157e-06, "loss": 0.7962, "step": 3088 }, { "epoch": 0.94, "learning_rate": 1.040154736207194e-06, "loss": 0.6852, "step": 3089 }, { "epoch": 0.94, "learning_rate": 1.0302116960538012e-06, "loss": 0.6991, "step": 3090 }, { "epoch": 0.94, "learning_rate": 1.0203159134303474e-06, "loss": 0.7506, "step": 3091 }, { "epoch": 0.94, "learning_rate": 1.0104673978866164e-06, "loss": 0.7603, "step": 3092 }, { "epoch": 0.94, "learning_rate": 1.0006661589267551e-06, "loss": 0.7643, "step": 3093 }, { "epoch": 0.94, "learning_rate": 9.909122060093035e-07, "loss": 0.7689, "step": 3094 }, { "epoch": 0.94, "learning_rate": 9.812055485471539e-07, "loss": 0.7039, "step": 3095 }, { "epoch": 0.94, "learning_rate": 9.715461959075745e-07, "loss": 0.7431, "step": 3096 }, { "epoch": 0.94, "learning_rate": 9.619341574121643e-07, "loss": 0.731, "step": 3097 }, { "epoch": 0.94, "learning_rate": 9.523694423368701e-07, "loss": 0.7524, "step": 3098 }, { "epoch": 0.94, "learning_rate": 9.42852059911975e-07, "loss": 0.77, "step": 3099 }, { "epoch": 0.94, "learning_rate": 9.333820193220821e-07, "loss": 0.7519, "step": 3100 }, { "epoch": 0.94, "learning_rate": 9.239593297061033e-07, "loss": 0.7355, "step": 3101 }, { "epoch": 0.94, "learning_rate": 9.145840001572537e-07, "loss": 0.7792, "step": 3102 }, { "epoch": 0.94, "learning_rate": 9.052560397230625e-07, "loss": 0.7388, "step": 3103 }, { "epoch": 0.94, "learning_rate": 8.959754574053126e-07, "loss": 0.7525, "step": 3104 }, { "epoch": 0.94, "learning_rate": 8.867422621601063e-07, "loss": 0.7177, "step": 3105 }, { "epoch": 0.94, "learning_rate": 8.775564628977772e-07, "loss": 0.7266, "step": 3106 }, { "epoch": 0.94, "learning_rate": 8.684180684829513e-07, "loss": 0.7009, "step": 3107 }, { "epoch": 0.94, "learning_rate": 8.59327087734485e-07, "loss": 0.7436, "step": 3108 }, { "epoch": 0.94, "learning_rate": 8.502835294254885e-07, "loss": 0.6992, "step": 3109 }, { "epoch": 0.94, "learning_rate": 8.412874022833028e-07, "loss": 0.7593, "step": 3110 }, { "epoch": 0.94, "learning_rate": 8.323387149895112e-07, "loss": 0.7786, "step": 3111 }, { "epoch": 0.94, "learning_rate": 8.234374761799002e-07, "loss": 0.7197, "step": 3112 }, { "epoch": 0.94, "learning_rate": 8.145836944444651e-07, "loss": 0.7676, "step": 3113 }, { "epoch": 0.94, "learning_rate": 8.057773783274103e-07, "loss": 0.7282, "step": 3114 }, { "epoch": 0.94, "learning_rate": 7.970185363271431e-07, "loss": 0.7151, "step": 3115 }, { "epoch": 0.95, "learning_rate": 7.883071768962413e-07, "loss": 0.7086, "step": 3116 }, { "epoch": 0.95, "learning_rate": 7.796433084414689e-07, "loss": 0.6798, "step": 3117 }, { "epoch": 0.95, "learning_rate": 7.71026939323749e-07, "loss": 0.7217, "step": 3118 }, { "epoch": 0.95, "learning_rate": 7.624580778581802e-07, "loss": 0.6992, "step": 3119 }, { "epoch": 0.95, "learning_rate": 7.539367323140145e-07, "loss": 0.7356, "step": 3120 }, { "epoch": 0.95, "learning_rate": 7.454629109146294e-07, "loss": 0.7332, "step": 3121 }, { "epoch": 0.95, "learning_rate": 7.370366218375668e-07, "loss": 0.752, "step": 3122 }, { "epoch": 0.95, "learning_rate": 7.286578732144777e-07, "loss": 0.7478, "step": 3123 }, { "epoch": 0.95, "learning_rate": 7.203266731311442e-07, "loss": 0.7244, "step": 3124 }, { "epoch": 0.95, "learning_rate": 7.120430296274683e-07, "loss": 0.789, "step": 3125 }, { "epoch": 0.95, "learning_rate": 7.038069506974332e-07, "loss": 0.7465, "step": 3126 }, { "epoch": 0.95, "learning_rate": 6.956184442891589e-07, "loss": 0.7551, "step": 3127 }, { "epoch": 0.95, "learning_rate": 6.874775183048299e-07, "loss": 0.8022, "step": 3128 }, { "epoch": 0.95, "learning_rate": 6.793841806007118e-07, "loss": 0.7306, "step": 3129 }, { "epoch": 0.95, "learning_rate": 6.713384389871569e-07, "loss": 0.7655, "step": 3130 }, { "epoch": 0.95, "learning_rate": 6.633403012285877e-07, "loss": 0.7831, "step": 3131 }, { "epoch": 0.95, "learning_rate": 6.553897750434746e-07, "loss": 0.7783, "step": 3132 }, { "epoch": 0.95, "learning_rate": 6.474868681043578e-07, "loss": 0.7801, "step": 3133 }, { "epoch": 0.95, "learning_rate": 6.396315880378034e-07, "loss": 0.7747, "step": 3134 }, { "epoch": 0.95, "learning_rate": 6.318239424244421e-07, "loss": 0.7304, "step": 3135 }, { "epoch": 0.95, "learning_rate": 6.240639387989077e-07, "loss": 0.7373, "step": 3136 }, { "epoch": 0.95, "learning_rate": 6.163515846498713e-07, "loss": 0.7555, "step": 3137 }, { "epoch": 0.95, "learning_rate": 6.086868874200236e-07, "loss": 0.7622, "step": 3138 }, { "epoch": 0.95, "learning_rate": 6.01069854506059e-07, "loss": 0.6893, "step": 3139 }, { "epoch": 0.95, "learning_rate": 5.935004932586807e-07, "loss": 0.7179, "step": 3140 }, { "epoch": 0.95, "learning_rate": 5.859788109825793e-07, "loss": 0.7305, "step": 3141 }, { "epoch": 0.95, "learning_rate": 5.785048149364314e-07, "loss": 0.7409, "step": 3142 }, { "epoch": 0.95, "learning_rate": 5.710785123329121e-07, "loss": 0.7824, "step": 3143 }, { "epoch": 0.95, "learning_rate": 5.636999103386442e-07, "loss": 0.7528, "step": 3144 }, { "epoch": 0.95, "learning_rate": 5.563690160742485e-07, "loss": 0.7099, "step": 3145 }, { "epoch": 0.95, "learning_rate": 5.49085836614277e-07, "loss": 0.7009, "step": 3146 }, { "epoch": 0.95, "learning_rate": 5.418503789872575e-07, "loss": 0.7137, "step": 3147 }, { "epoch": 0.95, "learning_rate": 5.346626501756546e-07, "loss": 0.7015, "step": 3148 }, { "epoch": 0.96, "learning_rate": 5.275226571158698e-07, "loss": 0.8094, "step": 3149 }, { "epoch": 0.96, "learning_rate": 5.204304066982357e-07, "loss": 0.6906, "step": 3150 }, { "epoch": 0.96, "learning_rate": 5.133859057670332e-07, "loss": 0.7487, "step": 3151 }, { "epoch": 0.96, "learning_rate": 5.063891611204352e-07, "loss": 0.6705, "step": 3152 }, { "epoch": 0.96, "learning_rate": 4.994401795105519e-07, "loss": 0.6909, "step": 3153 }, { "epoch": 0.96, "learning_rate": 4.925389676433745e-07, "loss": 0.7916, "step": 3154 }, { "epoch": 0.96, "learning_rate": 4.856855321788256e-07, "loss": 0.8041, "step": 3155 }, { "epoch": 0.96, "learning_rate": 4.788798797306981e-07, "loss": 0.6801, "step": 3156 }, { "epoch": 0.96, "learning_rate": 4.7212201686668287e-07, "loss": 0.7186, "step": 3157 }, { "epoch": 0.96, "learning_rate": 4.6541195010834093e-07, "loss": 0.7666, "step": 3158 }, { "epoch": 0.96, "learning_rate": 4.587496859311313e-07, "loss": 0.7095, "step": 3159 }, { "epoch": 0.96, "learning_rate": 4.521352307643667e-07, "loss": 0.7874, "step": 3160 }, { "epoch": 0.96, "learning_rate": 4.4556859099121326e-07, "loss": 0.7086, "step": 3161 }, { "epoch": 0.96, "learning_rate": 4.390497729487131e-07, "loss": 0.8259, "step": 3162 }, { "epoch": 0.96, "learning_rate": 4.32578782927745e-07, "loss": 0.6754, "step": 3163 }, { "epoch": 0.96, "learning_rate": 4.2615562717303604e-07, "loss": 0.7278, "step": 3164 }, { "epoch": 0.96, "learning_rate": 4.1978031188316116e-07, "loss": 0.7722, "step": 3165 }, { "epoch": 0.96, "learning_rate": 4.1345284321051006e-07, "loss": 0.7606, "step": 3166 }, { "epoch": 0.96, "learning_rate": 4.071732272613149e-07, "loss": 0.7685, "step": 3167 }, { "epoch": 0.96, "learning_rate": 4.0094147009561157e-07, "loss": 0.7237, "step": 3168 }, { "epoch": 0.96, "learning_rate": 3.9475757772726717e-07, "loss": 0.7099, "step": 3169 }, { "epoch": 0.96, "learning_rate": 3.88621556123947e-07, "loss": 0.7899, "step": 3170 }, { "epoch": 0.96, "learning_rate": 3.8253341120713106e-07, "loss": 0.7778, "step": 3171 }, { "epoch": 0.96, "learning_rate": 3.764931488520751e-07, "loss": 0.6997, "step": 3172 }, { "epoch": 0.96, "learning_rate": 3.705007748878497e-07, "loss": 0.6888, "step": 3173 }, { "epoch": 0.96, "learning_rate": 3.6455629509730136e-07, "loss": 0.7834, "step": 3174 }, { "epoch": 0.96, "learning_rate": 3.5865971521705233e-07, "loss": 0.7468, "step": 3175 }, { "epoch": 0.96, "learning_rate": 3.5281104093751185e-07, "loss": 0.6764, "step": 3176 }, { "epoch": 0.96, "learning_rate": 3.470102779028428e-07, "loss": 0.6854, "step": 3177 }, { "epoch": 0.96, "learning_rate": 3.412574317109896e-07, "loss": 0.7452, "step": 3178 }, { "epoch": 0.96, "learning_rate": 3.35552507913639e-07, "loss": 0.7385, "step": 3179 }, { "epoch": 0.96, "learning_rate": 3.2989551201624835e-07, "loss": 0.7461, "step": 3180 }, { "epoch": 0.96, "learning_rate": 3.2428644947800625e-07, "loss": 0.6688, "step": 3181 }, { "epoch": 0.97, "learning_rate": 3.1872532571184943e-07, "loss": 0.7203, "step": 3182 }, { "epoch": 0.97, "learning_rate": 3.1321214608446283e-07, "loss": 0.7412, "step": 3183 }, { "epoch": 0.97, "learning_rate": 3.077469159162405e-07, "loss": 0.701, "step": 3184 }, { "epoch": 0.97, "learning_rate": 3.023296404813303e-07, "loss": 0.7185, "step": 3185 }, { "epoch": 0.97, "learning_rate": 2.969603250075781e-07, "loss": 0.7309, "step": 3186 }, { "epoch": 0.97, "learning_rate": 2.916389746765724e-07, "loss": 0.774, "step": 3187 }, { "epoch": 0.97, "learning_rate": 2.8636559462358883e-07, "loss": 0.6885, "step": 3188 }, { "epoch": 0.97, "learning_rate": 2.8114018993762316e-07, "loss": 0.7302, "step": 3189 }, { "epoch": 0.97, "learning_rate": 2.759627656613695e-07, "loss": 0.7416, "step": 3190 }, { "epoch": 0.97, "learning_rate": 2.7083332679122e-07, "loss": 0.7404, "step": 3191 }, { "epoch": 0.97, "learning_rate": 2.6575187827725944e-07, "loss": 0.7971, "step": 3192 }, { "epoch": 0.97, "learning_rate": 2.6071842502326527e-07, "loss": 0.7382, "step": 3193 }, { "epoch": 0.97, "learning_rate": 2.557329718866852e-07, "loss": 0.7498, "step": 3194 }, { "epoch": 0.97, "learning_rate": 2.5079552367865964e-07, "loss": 0.6989, "step": 3195 }, { "epoch": 0.97, "learning_rate": 2.459060851639994e-07, "loss": 0.7668, "step": 3196 }, { "epoch": 0.97, "learning_rate": 2.4106466106116333e-07, "loss": 0.7836, "step": 3197 }, { "epoch": 0.97, "learning_rate": 2.3627125604231415e-07, "loss": 0.7555, "step": 3198 }, { "epoch": 0.97, "learning_rate": 2.3152587473323494e-07, "loss": 0.6826, "step": 3199 }, { "epoch": 0.97, "learning_rate": 2.2682852171339585e-07, "loss": 0.7275, "step": 3200 }, { "epoch": 0.97, "learning_rate": 2.2217920151588744e-07, "loss": 0.7243, "step": 3201 }, { "epoch": 0.97, "learning_rate": 2.1757791862748178e-07, "loss": 0.7265, "step": 3202 }, { "epoch": 0.97, "learning_rate": 2.130246774885658e-07, "loss": 0.8124, "step": 3203 }, { "epoch": 0.97, "learning_rate": 2.085194824931691e-07, "loss": 0.7578, "step": 3204 }, { "epoch": 0.97, "learning_rate": 2.0406233798896946e-07, "loss": 0.7603, "step": 3205 }, { "epoch": 0.97, "learning_rate": 1.996532482772595e-07, "loss": 0.7827, "step": 3206 }, { "epoch": 0.97, "learning_rate": 1.9529221761296902e-07, "loss": 0.7477, "step": 3207 }, { "epoch": 0.97, "learning_rate": 1.9097925020463147e-07, "loss": 0.7044, "step": 3208 }, { "epoch": 0.97, "learning_rate": 1.867143502144175e-07, "loss": 0.6887, "step": 3209 }, { "epoch": 0.97, "learning_rate": 1.8249752175809598e-07, "loss": 0.7708, "step": 3210 }, { "epoch": 0.97, "learning_rate": 1.7832876890505612e-07, "loss": 0.7338, "step": 3211 }, { "epoch": 0.97, "learning_rate": 1.7420809567829098e-07, "loss": 0.7242, "step": 3212 }, { "epoch": 0.97, "learning_rate": 1.7013550605438078e-07, "loss": 0.7077, "step": 3213 }, { "epoch": 0.97, "learning_rate": 1.6611100396352609e-07, "loss": 0.7059, "step": 3214 }, { "epoch": 0.98, "learning_rate": 1.6213459328950352e-07, "loss": 0.7185, "step": 3215 }, { "epoch": 0.98, "learning_rate": 1.5820627786968246e-07, "loss": 0.7504, "step": 3216 }, { "epoch": 0.98, "learning_rate": 1.5432606149503036e-07, "loss": 0.6928, "step": 3217 }, { "epoch": 0.98, "learning_rate": 1.5049394791009086e-07, "loss": 0.7407, "step": 3218 }, { "epoch": 0.98, "learning_rate": 1.4670994081297795e-07, "loss": 0.7389, "step": 3219 }, { "epoch": 0.98, "learning_rate": 1.429740438553928e-07, "loss": 0.7752, "step": 3220 }, { "epoch": 0.98, "learning_rate": 1.3928626064260153e-07, "loss": 0.7614, "step": 3221 }, { "epoch": 0.98, "learning_rate": 1.356465947334462e-07, "loss": 0.7509, "step": 3222 }, { "epoch": 0.98, "learning_rate": 1.3205504964032278e-07, "loss": 0.7378, "step": 3223 }, { "epoch": 0.98, "learning_rate": 1.285116288292032e-07, "loss": 0.7648, "step": 3224 }, { "epoch": 0.98, "learning_rate": 1.250163357196077e-07, "loss": 0.7836, "step": 3225 }, { "epoch": 0.98, "learning_rate": 1.215691736846103e-07, "loss": 0.6738, "step": 3226 }, { "epoch": 0.98, "learning_rate": 1.1817014605084442e-07, "loss": 0.7293, "step": 3227 }, { "epoch": 0.98, "learning_rate": 1.1481925609849175e-07, "loss": 0.7444, "step": 3228 }, { "epoch": 0.98, "learning_rate": 1.1151650706127115e-07, "loss": 0.7541, "step": 3229 }, { "epoch": 0.98, "learning_rate": 1.0826190212645526e-07, "loss": 0.7186, "step": 3230 }, { "epoch": 0.98, "learning_rate": 1.0505544443484283e-07, "loss": 0.7299, "step": 3231 }, { "epoch": 0.98, "learning_rate": 1.0189713708078085e-07, "loss": 0.726, "step": 3232 }, { "epoch": 0.98, "learning_rate": 9.878698311214241e-08, "loss": 0.7448, "step": 3233 }, { "epoch": 0.98, "learning_rate": 9.572498553033216e-08, "loss": 0.7289, "step": 3234 }, { "epoch": 0.98, "learning_rate": 9.271114729028086e-08, "loss": 0.7459, "step": 3235 }, { "epoch": 0.98, "learning_rate": 8.974547130045086e-08, "loss": 0.6947, "step": 3236 }, { "epoch": 0.98, "learning_rate": 8.682796042282503e-08, "loss": 0.7614, "step": 3237 }, { "epoch": 0.98, "learning_rate": 8.395861747289013e-08, "loss": 0.7947, "step": 3238 }, { "epoch": 0.98, "learning_rate": 8.113744521967003e-08, "loss": 0.7007, "step": 3239 }, { "epoch": 0.98, "learning_rate": 7.836444638568697e-08, "loss": 0.7076, "step": 3240 }, { "epoch": 0.98, "learning_rate": 7.563962364698918e-08, "loss": 0.7417, "step": 3241 }, { "epoch": 0.98, "learning_rate": 7.296297963311216e-08, "loss": 0.7315, "step": 3242 }, { "epoch": 0.98, "learning_rate": 7.033451692711746e-08, "loss": 0.7377, "step": 3243 }, { "epoch": 0.98, "learning_rate": 6.775423806556492e-08, "loss": 0.7455, "step": 3244 }, { "epoch": 0.98, "learning_rate": 6.522214553850159e-08, "loss": 0.7942, "step": 3245 }, { "epoch": 0.98, "learning_rate": 6.273824178949506e-08, "loss": 0.6822, "step": 3246 }, { "epoch": 0.98, "learning_rate": 6.030252921558899e-08, "loss": 0.7314, "step": 3247 }, { "epoch": 0.99, "learning_rate": 5.791501016733647e-08, "loss": 0.7174, "step": 3248 }, { "epoch": 0.99, "learning_rate": 5.5575686948772245e-08, "loss": 0.7658, "step": 3249 }, { "epoch": 0.99, "learning_rate": 5.3284561817423805e-08, "loss": 0.7354, "step": 3250 }, { "epoch": 0.99, "learning_rate": 5.1041636984305864e-08, "loss": 0.7334, "step": 3251 }, { "epoch": 0.99, "learning_rate": 4.8846914613914776e-08, "loss": 0.7542, "step": 3252 }, { "epoch": 0.99, "learning_rate": 4.6700396824239656e-08, "loss": 0.7447, "step": 3253 }, { "epoch": 0.99, "learning_rate": 4.460208568674018e-08, "loss": 0.7671, "step": 3254 }, { "epoch": 0.99, "learning_rate": 4.255198322636322e-08, "loss": 0.7171, "step": 3255 }, { "epoch": 0.99, "learning_rate": 4.055009142152067e-08, "loss": 0.7892, "step": 3256 }, { "epoch": 0.99, "learning_rate": 3.859641220410604e-08, "loss": 0.7788, "step": 3257 }, { "epoch": 0.99, "learning_rate": 3.669094745950008e-08, "loss": 0.7817, "step": 3258 }, { "epoch": 0.99, "learning_rate": 3.4833699026531884e-08, "loss": 0.7306, "step": 3259 }, { "epoch": 0.99, "learning_rate": 3.3024668697506644e-08, "loss": 0.782, "step": 3260 }, { "epoch": 0.99, "learning_rate": 3.1263858218205655e-08, "loss": 0.717, "step": 3261 }, { "epoch": 0.99, "learning_rate": 2.9551269287875216e-08, "loss": 0.7138, "step": 3262 }, { "epoch": 0.99, "learning_rate": 2.7886903559221077e-08, "loss": 0.8222, "step": 3263 }, { "epoch": 0.99, "learning_rate": 2.6270762638408442e-08, "loss": 0.7206, "step": 3264 }, { "epoch": 0.99, "learning_rate": 2.4702848085073062e-08, "loss": 0.7143, "step": 3265 }, { "epoch": 0.99, "learning_rate": 2.3183161412299036e-08, "loss": 0.725, "step": 3266 }, { "epoch": 0.99, "learning_rate": 2.1711704086646578e-08, "loss": 0.7315, "step": 3267 }, { "epoch": 0.99, "learning_rate": 2.028847752811869e-08, "loss": 0.8005, "step": 3268 }, { "epoch": 0.99, "learning_rate": 1.891348311017782e-08, "loss": 0.7883, "step": 3269 }, { "epoch": 0.99, "learning_rate": 1.7586722159740333e-08, "loss": 0.7752, "step": 3270 }, { "epoch": 0.99, "learning_rate": 1.6308195957182027e-08, "loss": 0.7154, "step": 3271 }, { "epoch": 0.99, "learning_rate": 1.5077905736310406e-08, "loss": 0.8071, "step": 3272 }, { "epoch": 0.99, "learning_rate": 1.3895852684409072e-08, "loss": 0.7682, "step": 3273 }, { "epoch": 0.99, "learning_rate": 1.276203794219888e-08, "loss": 0.7527, "step": 3274 }, { "epoch": 0.99, "learning_rate": 1.1676462603849025e-08, "loss": 0.771, "step": 3275 }, { "epoch": 0.99, "learning_rate": 1.063912771697706e-08, "loss": 0.7247, "step": 3276 }, { "epoch": 0.99, "learning_rate": 9.650034282643327e-09, "loss": 0.7487, "step": 3277 }, { "epoch": 0.99, "learning_rate": 8.709183255362074e-09, "loss": 0.8258, "step": 3278 }, { "epoch": 0.99, "learning_rate": 7.816575543084793e-09, "loss": 0.698, "step": 3279 }, { "epoch": 0.99, "learning_rate": 6.97221200720577e-09, "loss": 0.7494, "step": 3280 }, { "epoch": 1.0, "learning_rate": 6.176093462573196e-09, "loss": 0.733, "step": 3281 }, { "epoch": 1.0, "learning_rate": 5.4282206774614044e-09, "loss": 0.7214, "step": 3282 }, { "epoch": 1.0, "learning_rate": 4.728594373593076e-09, "loss": 0.7024, "step": 3283 }, { "epoch": 1.0, "learning_rate": 4.07721522613369e-09, "loss": 0.7179, "step": 3284 }, { "epoch": 1.0, "learning_rate": 3.474083863691524e-09, "loss": 0.708, "step": 3285 }, { "epoch": 1.0, "learning_rate": 2.9192008683009974e-09, "loss": 0.7516, "step": 3286 }, { "epoch": 1.0, "learning_rate": 2.41256677544488e-09, "loss": 0.7416, "step": 3287 }, { "epoch": 1.0, "learning_rate": 1.9541820740487384e-09, "loss": 0.7628, "step": 3288 }, { "epoch": 1.0, "learning_rate": 1.5440472064587319e-09, "loss": 0.6821, "step": 3289 }, { "epoch": 1.0, "learning_rate": 1.1821625684804715e-09, "loss": 0.8069, "step": 3290 }, { "epoch": 1.0, "learning_rate": 8.685285093401607e-10, "loss": 0.7549, "step": 3291 }, { "epoch": 1.0, "learning_rate": 6.031453317068004e-10, "loss": 0.7707, "step": 3292 }, { "epoch": 1.0, "learning_rate": 3.8601329167553546e-10, "loss": 0.7503, "step": 3293 }, { "epoch": 1.0, "learning_rate": 2.1713259880096203e-10, "loss": 0.7457, "step": 3294 }, { "epoch": 1.0, "learning_rate": 9.650341604716673e-11, "loss": 0.7652, "step": 3295 }, { "epoch": 1.0, "learning_rate": 2.412585983213589e-11, "loss": 0.7451, "step": 3296 }, { "epoch": 1.0, "step": 3296, "total_flos": 3.989940585059068e+20, "train_loss": 0.7938173473134492, "train_runtime": 22674.6528, "train_samples_per_second": 74.442, "train_steps_per_second": 0.145 } ], "logging_steps": 1.0, "max_steps": 3297, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "total_flos": 3.989940585059068e+20, "train_batch_size": 2, "trial_name": null, "trial_params": null }