{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 2895, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.7471264367816094e-08, "loss": 2.7126, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.1494252873563219e-07, "loss": 2.425, "step": 2 }, { "epoch": 0.0, "learning_rate": 1.7241379310344828e-07, "loss": 2.3544, "step": 3 }, { "epoch": 0.0, "learning_rate": 2.2988505747126437e-07, "loss": 2.4626, "step": 4 }, { "epoch": 0.0, "learning_rate": 2.8735632183908047e-07, "loss": 2.6959, "step": 5 }, { "epoch": 0.0, "learning_rate": 3.4482758620689656e-07, "loss": 2.5117, "step": 6 }, { "epoch": 0.0, "learning_rate": 4.0229885057471266e-07, "loss": 2.855, "step": 7 }, { "epoch": 0.0, "learning_rate": 4.5977011494252875e-07, "loss": 2.6648, "step": 8 }, { "epoch": 0.0, "learning_rate": 5.172413793103449e-07, "loss": 2.4414, "step": 9 }, { "epoch": 0.0, "learning_rate": 5.747126436781609e-07, "loss": 2.5166, "step": 10 }, { "epoch": 0.0, "learning_rate": 6.321839080459771e-07, "loss": 2.4006, "step": 11 }, { "epoch": 0.0, "learning_rate": 6.896551724137931e-07, "loss": 2.364, "step": 12 }, { "epoch": 0.0, "learning_rate": 7.471264367816093e-07, "loss": 3.1431, "step": 13 }, { "epoch": 0.0, "learning_rate": 8.045977011494253e-07, "loss": 2.1589, "step": 14 }, { "epoch": 0.01, "learning_rate": 8.620689655172415e-07, "loss": 2.276, "step": 15 }, { "epoch": 0.01, "learning_rate": 9.195402298850575e-07, "loss": 2.116, "step": 16 }, { "epoch": 0.01, "learning_rate": 9.770114942528738e-07, "loss": 2.1445, "step": 17 }, { "epoch": 0.01, "learning_rate": 1.0344827586206898e-06, "loss": 2.0437, "step": 18 }, { "epoch": 0.01, "learning_rate": 1.0919540229885058e-06, "loss": 2.2366, "step": 19 }, { "epoch": 0.01, "learning_rate": 1.1494252873563219e-06, "loss": 2.3108, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.2068965517241381e-06, "loss": 2.2736, "step": 21 }, { "epoch": 0.01, "learning_rate": 1.2643678160919542e-06, "loss": 2.2449, "step": 22 }, { "epoch": 0.01, "learning_rate": 1.3218390804597702e-06, "loss": 1.9043, "step": 23 }, { "epoch": 0.01, "learning_rate": 1.3793103448275862e-06, "loss": 2.2062, "step": 24 }, { "epoch": 0.01, "learning_rate": 1.4367816091954023e-06, "loss": 1.9777, "step": 25 }, { "epoch": 0.01, "learning_rate": 1.4942528735632185e-06, "loss": 2.4331, "step": 26 }, { "epoch": 0.01, "learning_rate": 1.5517241379310346e-06, "loss": 2.2592, "step": 27 }, { "epoch": 0.01, "learning_rate": 1.6091954022988506e-06, "loss": 2.049, "step": 28 }, { "epoch": 0.01, "learning_rate": 1.6666666666666667e-06, "loss": 2.1498, "step": 29 }, { "epoch": 0.01, "learning_rate": 1.724137931034483e-06, "loss": 2.0183, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.781609195402299e-06, "loss": 2.1259, "step": 31 }, { "epoch": 0.01, "learning_rate": 1.839080459770115e-06, "loss": 2.4711, "step": 32 }, { "epoch": 0.01, "learning_rate": 1.896551724137931e-06, "loss": 1.9858, "step": 33 }, { "epoch": 0.01, "learning_rate": 1.9540229885057475e-06, "loss": 1.9794, "step": 34 }, { "epoch": 0.01, "learning_rate": 2.0114942528735633e-06, "loss": 2.0211, "step": 35 }, { "epoch": 0.01, "learning_rate": 2.0689655172413796e-06, "loss": 1.902, "step": 36 }, { "epoch": 0.01, "learning_rate": 2.1264367816091954e-06, "loss": 2.0026, "step": 37 }, { "epoch": 0.01, "learning_rate": 2.1839080459770117e-06, "loss": 2.4802, "step": 38 }, { "epoch": 0.01, "learning_rate": 2.241379310344828e-06, "loss": 2.0874, "step": 39 }, { "epoch": 0.01, "learning_rate": 2.2988505747126437e-06, "loss": 1.9207, "step": 40 }, { "epoch": 0.01, "learning_rate": 2.35632183908046e-06, "loss": 1.9673, "step": 41 }, { "epoch": 0.01, "learning_rate": 2.4137931034482762e-06, "loss": 1.9995, "step": 42 }, { "epoch": 0.01, "learning_rate": 2.471264367816092e-06, "loss": 2.184, "step": 43 }, { "epoch": 0.02, "learning_rate": 2.5287356321839083e-06, "loss": 2.1146, "step": 44 }, { "epoch": 0.02, "learning_rate": 2.5862068965517246e-06, "loss": 2.0881, "step": 45 }, { "epoch": 0.02, "learning_rate": 2.6436781609195404e-06, "loss": 1.8828, "step": 46 }, { "epoch": 0.02, "learning_rate": 2.7011494252873567e-06, "loss": 1.7513, "step": 47 }, { "epoch": 0.02, "learning_rate": 2.7586206896551725e-06, "loss": 1.9071, "step": 48 }, { "epoch": 0.02, "learning_rate": 2.8160919540229887e-06, "loss": 1.9152, "step": 49 }, { "epoch": 0.02, "learning_rate": 2.8735632183908046e-06, "loss": 1.8535, "step": 50 }, { "epoch": 0.02, "learning_rate": 2.931034482758621e-06, "loss": 2.2013, "step": 51 }, { "epoch": 0.02, "learning_rate": 2.988505747126437e-06, "loss": 1.9717, "step": 52 }, { "epoch": 0.02, "learning_rate": 3.0459770114942533e-06, "loss": 2.0167, "step": 53 }, { "epoch": 0.02, "learning_rate": 3.103448275862069e-06, "loss": 1.7583, "step": 54 }, { "epoch": 0.02, "learning_rate": 3.1609195402298854e-06, "loss": 1.8179, "step": 55 }, { "epoch": 0.02, "learning_rate": 3.2183908045977012e-06, "loss": 1.8552, "step": 56 }, { "epoch": 0.02, "learning_rate": 3.2758620689655175e-06, "loss": 2.3542, "step": 57 }, { "epoch": 0.02, "learning_rate": 3.3333333333333333e-06, "loss": 2.0376, "step": 58 }, { "epoch": 0.02, "learning_rate": 3.3908045977011496e-06, "loss": 1.8488, "step": 59 }, { "epoch": 0.02, "learning_rate": 3.448275862068966e-06, "loss": 1.9641, "step": 60 }, { "epoch": 0.02, "learning_rate": 3.505747126436782e-06, "loss": 1.9702, "step": 61 }, { "epoch": 0.02, "learning_rate": 3.563218390804598e-06, "loss": 2.1044, "step": 62 }, { "epoch": 0.02, "learning_rate": 3.620689655172414e-06, "loss": 2.2709, "step": 63 }, { "epoch": 0.02, "learning_rate": 3.67816091954023e-06, "loss": 1.6617, "step": 64 }, { "epoch": 0.02, "learning_rate": 3.7356321839080462e-06, "loss": 1.9772, "step": 65 }, { "epoch": 0.02, "learning_rate": 3.793103448275862e-06, "loss": 1.9725, "step": 66 }, { "epoch": 0.02, "learning_rate": 3.850574712643678e-06, "loss": 1.9172, "step": 67 }, { "epoch": 0.02, "learning_rate": 3.908045977011495e-06, "loss": 1.9529, "step": 68 }, { "epoch": 0.02, "learning_rate": 3.96551724137931e-06, "loss": 2.2123, "step": 69 }, { "epoch": 0.02, "learning_rate": 4.022988505747127e-06, "loss": 2.1967, "step": 70 }, { "epoch": 0.02, "learning_rate": 4.080459770114943e-06, "loss": 1.9795, "step": 71 }, { "epoch": 0.02, "learning_rate": 4.137931034482759e-06, "loss": 1.7227, "step": 72 }, { "epoch": 0.03, "learning_rate": 4.1954022988505746e-06, "loss": 1.8639, "step": 73 }, { "epoch": 0.03, "learning_rate": 4.252873563218391e-06, "loss": 2.0745, "step": 74 }, { "epoch": 0.03, "learning_rate": 4.310344827586207e-06, "loss": 1.8057, "step": 75 }, { "epoch": 0.03, "learning_rate": 4.367816091954023e-06, "loss": 1.9341, "step": 76 }, { "epoch": 0.03, "learning_rate": 4.42528735632184e-06, "loss": 1.979, "step": 77 }, { "epoch": 0.03, "learning_rate": 4.482758620689656e-06, "loss": 1.8057, "step": 78 }, { "epoch": 0.03, "learning_rate": 4.540229885057471e-06, "loss": 1.6328, "step": 79 }, { "epoch": 0.03, "learning_rate": 4.5977011494252875e-06, "loss": 1.803, "step": 80 }, { "epoch": 0.03, "learning_rate": 4.655172413793104e-06, "loss": 2.0334, "step": 81 }, { "epoch": 0.03, "learning_rate": 4.71264367816092e-06, "loss": 2.3002, "step": 82 }, { "epoch": 0.03, "learning_rate": 4.770114942528735e-06, "loss": 2.1635, "step": 83 }, { "epoch": 0.03, "learning_rate": 4.8275862068965525e-06, "loss": 1.8455, "step": 84 }, { "epoch": 0.03, "learning_rate": 4.885057471264369e-06, "loss": 1.8735, "step": 85 }, { "epoch": 0.03, "learning_rate": 4.942528735632184e-06, "loss": 1.9312, "step": 86 }, { "epoch": 0.03, "learning_rate": 5e-06, "loss": 1.7037, "step": 87 }, { "epoch": 0.03, "learning_rate": 4.999998435356119e-06, "loss": 2.0765, "step": 88 }, { "epoch": 0.03, "learning_rate": 4.9999937414264324e-06, "loss": 2.1008, "step": 89 }, { "epoch": 0.03, "learning_rate": 4.999985918216818e-06, "loss": 1.7188, "step": 90 }, { "epoch": 0.03, "learning_rate": 4.999974965737065e-06, "loss": 1.7616, "step": 91 }, { "epoch": 0.03, "learning_rate": 4.999960884000886e-06, "loss": 1.8135, "step": 92 }, { "epoch": 0.03, "learning_rate": 4.999943673025905e-06, "loss": 1.8334, "step": 93 }, { "epoch": 0.03, "learning_rate": 4.999923332833667e-06, "loss": 2.1368, "step": 94 }, { "epoch": 0.03, "learning_rate": 4.999899863449631e-06, "loss": 2.1011, "step": 95 }, { "epoch": 0.03, "learning_rate": 4.999873264903175e-06, "loss": 1.8987, "step": 96 }, { "epoch": 0.03, "learning_rate": 4.999843537227591e-06, "loss": 1.6826, "step": 97 }, { "epoch": 0.03, "learning_rate": 4.999810680460092e-06, "loss": 1.7384, "step": 98 }, { "epoch": 0.03, "learning_rate": 4.999774694641803e-06, "loss": 1.7974, "step": 99 }, { "epoch": 0.03, "learning_rate": 4.999735579817769e-06, "loss": 1.8331, "step": 100 }, { "epoch": 0.03, "learning_rate": 4.999693336036951e-06, "loss": 2.1909, "step": 101 }, { "epoch": 0.04, "learning_rate": 4.999647963352225e-06, "loss": 1.9014, "step": 102 }, { "epoch": 0.04, "learning_rate": 4.999599461820387e-06, "loss": 1.7346, "step": 103 }, { "epoch": 0.04, "learning_rate": 4.999547831502145e-06, "loss": 1.7107, "step": 104 }, { "epoch": 0.04, "learning_rate": 4.999493072462126e-06, "loss": 1.8956, "step": 105 }, { "epoch": 0.04, "learning_rate": 4.999435184768873e-06, "loss": 1.658, "step": 106 }, { "epoch": 0.04, "learning_rate": 4.999374168494844e-06, "loss": 2.2114, "step": 107 }, { "epoch": 0.04, "learning_rate": 4.999310023716415e-06, "loss": 2.0304, "step": 108 }, { "epoch": 0.04, "learning_rate": 4.999242750513876e-06, "loss": 1.6919, "step": 109 }, { "epoch": 0.04, "learning_rate": 4.999172348971435e-06, "loss": 2.0115, "step": 110 }, { "epoch": 0.04, "learning_rate": 4.999098819177214e-06, "loss": 1.9218, "step": 111 }, { "epoch": 0.04, "learning_rate": 4.999022161223252e-06, "loss": 1.8375, "step": 112 }, { "epoch": 0.04, "learning_rate": 4.998942375205502e-06, "loss": 1.968, "step": 113 }, { "epoch": 0.04, "learning_rate": 4.998859461223834e-06, "loss": 2.0923, "step": 114 }, { "epoch": 0.04, "learning_rate": 4.998773419382033e-06, "loss": 1.7773, "step": 115 }, { "epoch": 0.04, "learning_rate": 4.9986842497877974e-06, "loss": 1.7469, "step": 116 }, { "epoch": 0.04, "learning_rate": 4.9985919525527435e-06, "loss": 1.8499, "step": 117 }, { "epoch": 0.04, "learning_rate": 4.9984965277924005e-06, "loss": 1.8977, "step": 118 }, { "epoch": 0.04, "learning_rate": 4.998397975626213e-06, "loss": 2.1108, "step": 119 }, { "epoch": 0.04, "learning_rate": 4.998296296177542e-06, "loss": 2.2831, "step": 120 }, { "epoch": 0.04, "learning_rate": 4.998191489573658e-06, "loss": 1.9568, "step": 121 }, { "epoch": 0.04, "learning_rate": 4.998083555945752e-06, "loss": 1.8009, "step": 122 }, { "epoch": 0.04, "learning_rate": 4.997972495428924e-06, "loss": 1.6427, "step": 123 }, { "epoch": 0.04, "learning_rate": 4.9978583081621916e-06, "loss": 1.7247, "step": 124 }, { "epoch": 0.04, "learning_rate": 4.997740994288484e-06, "loss": 1.8075, "step": 125 }, { "epoch": 0.04, "learning_rate": 4.997620553954645e-06, "loss": 2.3145, "step": 126 }, { "epoch": 0.04, "learning_rate": 4.997496987311431e-06, "loss": 2.0317, "step": 127 }, { "epoch": 0.04, "learning_rate": 4.997370294513513e-06, "loss": 1.785, "step": 128 }, { "epoch": 0.04, "learning_rate": 4.997240475719474e-06, "loss": 1.9126, "step": 129 }, { "epoch": 0.04, "learning_rate": 4.997107531091809e-06, "loss": 1.6692, "step": 130 }, { "epoch": 0.05, "learning_rate": 4.996971460796929e-06, "loss": 1.8687, "step": 131 }, { "epoch": 0.05, "learning_rate": 4.996832265005154e-06, "loss": 1.9957, "step": 132 }, { "epoch": 0.05, "learning_rate": 4.996689943890718e-06, "loss": 1.7616, "step": 133 }, { "epoch": 0.05, "learning_rate": 4.996544497631766e-06, "loss": 1.8129, "step": 134 }, { "epoch": 0.05, "learning_rate": 4.996395926410354e-06, "loss": 1.7905, "step": 135 }, { "epoch": 0.05, "learning_rate": 4.9962442304124545e-06, "loss": 1.6907, "step": 136 }, { "epoch": 0.05, "learning_rate": 4.9960894098279446e-06, "loss": 1.9406, "step": 137 }, { "epoch": 0.05, "learning_rate": 4.9959314648506156e-06, "loss": 2.3276, "step": 138 }, { "epoch": 0.05, "learning_rate": 4.995770395678171e-06, "loss": 2.1047, "step": 139 }, { "epoch": 0.05, "learning_rate": 4.995606202512222e-06, "loss": 1.7505, "step": 140 }, { "epoch": 0.05, "learning_rate": 4.995438885558294e-06, "loss": 1.9136, "step": 141 }, { "epoch": 0.05, "learning_rate": 4.9952684450258185e-06, "loss": 1.8525, "step": 142 }, { "epoch": 0.05, "learning_rate": 4.995094881128138e-06, "loss": 2.1827, "step": 143 }, { "epoch": 0.05, "learning_rate": 4.994918194082506e-06, "loss": 1.9668, "step": 144 }, { "epoch": 0.05, "learning_rate": 4.994738384110085e-06, "loss": 1.9009, "step": 145 }, { "epoch": 0.05, "learning_rate": 4.994555451435943e-06, "loss": 1.6691, "step": 146 }, { "epoch": 0.05, "learning_rate": 4.994369396289063e-06, "loss": 1.8245, "step": 147 }, { "epoch": 0.05, "learning_rate": 4.994180218902331e-06, "loss": 1.7714, "step": 148 }, { "epoch": 0.05, "learning_rate": 4.9939879195125454e-06, "loss": 1.7565, "step": 149 }, { "epoch": 0.05, "learning_rate": 4.993792498360407e-06, "loss": 1.7969, "step": 150 }, { "epoch": 0.05, "learning_rate": 4.993593955690529e-06, "loss": 2.4619, "step": 151 }, { "epoch": 0.05, "learning_rate": 4.993392291751431e-06, "loss": 1.7338, "step": 152 }, { "epoch": 0.05, "learning_rate": 4.9931875067955385e-06, "loss": 1.8517, "step": 153 }, { "epoch": 0.05, "learning_rate": 4.992979601079183e-06, "loss": 1.9028, "step": 154 }, { "epoch": 0.05, "learning_rate": 4.9927685748626035e-06, "loss": 1.8336, "step": 155 }, { "epoch": 0.05, "learning_rate": 4.992554428409945e-06, "loss": 1.9169, "step": 156 }, { "epoch": 0.05, "learning_rate": 4.992337161989258e-06, "loss": 1.8737, "step": 157 }, { "epoch": 0.05, "learning_rate": 4.992116775872498e-06, "loss": 2.0925, "step": 158 }, { "epoch": 0.05, "learning_rate": 4.991893270335526e-06, "loss": 1.764, "step": 159 }, { "epoch": 0.06, "learning_rate": 4.991666645658106e-06, "loss": 1.8926, "step": 160 }, { "epoch": 0.06, "learning_rate": 4.991436902123909e-06, "loss": 1.7073, "step": 161 }, { "epoch": 0.06, "learning_rate": 4.9912040400205075e-06, "loss": 2.0762, "step": 162 }, { "epoch": 0.06, "learning_rate": 4.990968059639379e-06, "loss": 2.0638, "step": 163 }, { "epoch": 0.06, "learning_rate": 4.990728961275904e-06, "loss": 1.9783, "step": 164 }, { "epoch": 0.06, "learning_rate": 4.990486745229364e-06, "loss": 1.625, "step": 165 }, { "epoch": 0.06, "learning_rate": 4.990241411802946e-06, "loss": 1.6901, "step": 166 }, { "epoch": 0.06, "learning_rate": 4.989992961303738e-06, "loss": 1.7535, "step": 167 }, { "epoch": 0.06, "learning_rate": 4.989741394042728e-06, "loss": 1.6691, "step": 168 }, { "epoch": 0.06, "learning_rate": 4.989486710334806e-06, "loss": 2.0612, "step": 169 }, { "epoch": 0.06, "learning_rate": 4.989228910498766e-06, "loss": 1.8822, "step": 170 }, { "epoch": 0.06, "learning_rate": 4.988967994857297e-06, "loss": 1.8177, "step": 171 }, { "epoch": 0.06, "learning_rate": 4.988703963736993e-06, "loss": 1.8627, "step": 172 }, { "epoch": 0.06, "learning_rate": 4.988436817468345e-06, "loss": 1.938, "step": 173 }, { "epoch": 0.06, "learning_rate": 4.988166556385744e-06, "loss": 1.7721, "step": 174 }, { "epoch": 0.06, "learning_rate": 4.9878931808274796e-06, "loss": 2.0786, "step": 175 }, { "epoch": 0.06, "learning_rate": 4.987616691135741e-06, "loss": 2.1532, "step": 176 }, { "epoch": 0.06, "learning_rate": 4.987337087656614e-06, "loss": 1.8542, "step": 177 }, { "epoch": 0.06, "learning_rate": 4.9870543707400835e-06, "loss": 1.8748, "step": 178 }, { "epoch": 0.06, "learning_rate": 4.986768540740028e-06, "loss": 1.8253, "step": 179 }, { "epoch": 0.06, "learning_rate": 4.986479598014228e-06, "loss": 1.7429, "step": 180 }, { "epoch": 0.06, "learning_rate": 4.986187542924356e-06, "loss": 1.6228, "step": 181 }, { "epoch": 0.06, "learning_rate": 4.985892375835981e-06, "loss": 2.1307, "step": 182 }, { "epoch": 0.06, "learning_rate": 4.9855940971185705e-06, "loss": 1.8079, "step": 183 }, { "epoch": 0.06, "learning_rate": 4.985292707145482e-06, "loss": 1.7218, "step": 184 }, { "epoch": 0.06, "learning_rate": 4.984988206293972e-06, "loss": 1.6802, "step": 185 }, { "epoch": 0.06, "learning_rate": 4.984680594945187e-06, "loss": 1.9153, "step": 186 }, { "epoch": 0.06, "learning_rate": 4.98436987348417e-06, "loss": 1.6455, "step": 187 }, { "epoch": 0.06, "learning_rate": 4.9840560422998554e-06, "loss": 1.8881, "step": 188 }, { "epoch": 0.07, "learning_rate": 4.983739101785071e-06, "loss": 1.8119, "step": 189 }, { "epoch": 0.07, "learning_rate": 4.983419052336534e-06, "loss": 1.7557, "step": 190 }, { "epoch": 0.07, "learning_rate": 4.983095894354858e-06, "loss": 2.2417, "step": 191 }, { "epoch": 0.07, "learning_rate": 4.982769628244543e-06, "loss": 1.859, "step": 192 }, { "epoch": 0.07, "learning_rate": 4.98244025441398e-06, "loss": 1.8998, "step": 193 }, { "epoch": 0.07, "learning_rate": 4.9821077732754545e-06, "loss": 1.9042, "step": 194 }, { "epoch": 0.07, "learning_rate": 4.981772185245135e-06, "loss": 2.0677, "step": 195 }, { "epoch": 0.07, "learning_rate": 4.981433490743085e-06, "loss": 1.854, "step": 196 }, { "epoch": 0.07, "learning_rate": 4.98109169019325e-06, "loss": 1.7332, "step": 197 }, { "epoch": 0.07, "learning_rate": 4.98074678402347e-06, "loss": 1.7855, "step": 198 }, { "epoch": 0.07, "learning_rate": 4.980398772665468e-06, "loss": 1.8859, "step": 199 }, { "epoch": 0.07, "learning_rate": 4.980047656554856e-06, "loss": 1.9543, "step": 200 }, { "epoch": 0.07, "learning_rate": 4.97969343613113e-06, "loss": 1.9417, "step": 201 }, { "epoch": 0.07, "learning_rate": 4.979336111837673e-06, "loss": 1.6423, "step": 202 }, { "epoch": 0.07, "learning_rate": 4.978975684121755e-06, "loss": 1.6694, "step": 203 }, { "epoch": 0.07, "learning_rate": 4.978612153434527e-06, "loss": 1.8812, "step": 204 }, { "epoch": 0.07, "learning_rate": 4.978245520231026e-06, "loss": 1.8615, "step": 205 }, { "epoch": 0.07, "learning_rate": 4.977875784970173e-06, "loss": 1.6489, "step": 206 }, { "epoch": 0.07, "learning_rate": 4.977502948114772e-06, "loss": 2.1211, "step": 207 }, { "epoch": 0.07, "learning_rate": 4.977127010131506e-06, "loss": 1.7628, "step": 208 }, { "epoch": 0.07, "learning_rate": 4.976747971490945e-06, "loss": 1.7246, "step": 209 }, { "epoch": 0.07, "learning_rate": 4.976365832667536e-06, "loss": 1.7592, "step": 210 }, { "epoch": 0.07, "learning_rate": 4.975980594139608e-06, "loss": 1.9518, "step": 211 }, { "epoch": 0.07, "learning_rate": 4.97559225638937e-06, "loss": 1.8853, "step": 212 }, { "epoch": 0.07, "learning_rate": 4.975200819902911e-06, "loss": 1.9926, "step": 213 }, { "epoch": 0.07, "learning_rate": 4.974806285170196e-06, "loss": 1.868, "step": 214 }, { "epoch": 0.07, "learning_rate": 4.9744086526850724e-06, "loss": 1.8221, "step": 215 }, { "epoch": 0.07, "learning_rate": 4.974007922945261e-06, "loss": 1.7184, "step": 216 }, { "epoch": 0.07, "learning_rate": 4.973604096452361e-06, "loss": 1.8124, "step": 217 }, { "epoch": 0.08, "learning_rate": 4.9731971737118515e-06, "loss": 1.7635, "step": 218 }, { "epoch": 0.08, "learning_rate": 4.97278715523308e-06, "loss": 1.9463, "step": 219 }, { "epoch": 0.08, "learning_rate": 4.972374041529274e-06, "loss": 1.8154, "step": 220 }, { "epoch": 0.08, "learning_rate": 4.971957833117534e-06, "loss": 1.7689, "step": 221 }, { "epoch": 0.08, "learning_rate": 4.971538530518836e-06, "loss": 1.8409, "step": 222 }, { "epoch": 0.08, "learning_rate": 4.971116134258026e-06, "loss": 1.8191, "step": 223 }, { "epoch": 0.08, "learning_rate": 4.970690644863823e-06, "loss": 1.6981, "step": 224 }, { "epoch": 0.08, "learning_rate": 4.970262062868821e-06, "loss": 1.9333, "step": 225 }, { "epoch": 0.08, "learning_rate": 4.969830388809479e-06, "loss": 2.0526, "step": 226 }, { "epoch": 0.08, "learning_rate": 4.969395623226133e-06, "loss": 2.1851, "step": 227 }, { "epoch": 0.08, "learning_rate": 4.968957766662984e-06, "loss": 1.7596, "step": 228 }, { "epoch": 0.08, "learning_rate": 4.968516819668106e-06, "loss": 1.8781, "step": 229 }, { "epoch": 0.08, "learning_rate": 4.968072782793436e-06, "loss": 1.7031, "step": 230 }, { "epoch": 0.08, "learning_rate": 4.967625656594782e-06, "loss": 1.8096, "step": 231 }, { "epoch": 0.08, "learning_rate": 4.967175441631821e-06, "loss": 2.2448, "step": 232 }, { "epoch": 0.08, "learning_rate": 4.966722138468092e-06, "loss": 1.7375, "step": 233 }, { "epoch": 0.08, "learning_rate": 4.966265747671002e-06, "loss": 1.7572, "step": 234 }, { "epoch": 0.08, "learning_rate": 4.9658062698118215e-06, "loss": 1.8229, "step": 235 }, { "epoch": 0.08, "learning_rate": 4.965343705465687e-06, "loss": 1.8838, "step": 236 }, { "epoch": 0.08, "learning_rate": 4.9648780552115975e-06, "loss": 1.9242, "step": 237 }, { "epoch": 0.08, "learning_rate": 4.9644093196324125e-06, "loss": 1.8077, "step": 238 }, { "epoch": 0.08, "learning_rate": 4.963937499314857e-06, "loss": 1.9575, "step": 239 }, { "epoch": 0.08, "learning_rate": 4.963462594849515e-06, "loss": 1.8138, "step": 240 }, { "epoch": 0.08, "learning_rate": 4.962984606830832e-06, "loss": 1.6907, "step": 241 }, { "epoch": 0.08, "learning_rate": 4.962503535857113e-06, "loss": 1.6273, "step": 242 }, { "epoch": 0.08, "learning_rate": 4.962019382530521e-06, "loss": 1.9977, "step": 243 }, { "epoch": 0.08, "learning_rate": 4.961532147457078e-06, "loss": 1.9376, "step": 244 }, { "epoch": 0.08, "learning_rate": 4.961041831246665e-06, "loss": 2.0092, "step": 245 }, { "epoch": 0.08, "learning_rate": 4.960548434513016e-06, "loss": 1.7285, "step": 246 }, { "epoch": 0.09, "learning_rate": 4.960051957873726e-06, "loss": 1.8662, "step": 247 }, { "epoch": 0.09, "learning_rate": 4.959552401950239e-06, "loss": 1.7808, "step": 248 }, { "epoch": 0.09, "learning_rate": 4.95904976736786e-06, "loss": 1.7422, "step": 249 }, { "epoch": 0.09, "learning_rate": 4.958544054755741e-06, "loss": 1.7941, "step": 250 }, { "epoch": 0.09, "learning_rate": 4.958035264746892e-06, "loss": 1.9929, "step": 251 }, { "epoch": 0.09, "learning_rate": 4.957523397978174e-06, "loss": 1.7219, "step": 252 }, { "epoch": 0.09, "learning_rate": 4.957008455090296e-06, "loss": 1.6617, "step": 253 }, { "epoch": 0.09, "learning_rate": 4.956490436727822e-06, "loss": 1.8433, "step": 254 }, { "epoch": 0.09, "learning_rate": 4.955969343539162e-06, "loss": 1.9265, "step": 255 }, { "epoch": 0.09, "learning_rate": 4.955445176176577e-06, "loss": 1.9056, "step": 256 }, { "epoch": 0.09, "learning_rate": 4.954917935296175e-06, "loss": 2.129, "step": 257 }, { "epoch": 0.09, "learning_rate": 4.954387621557911e-06, "loss": 1.9752, "step": 258 }, { "epoch": 0.09, "learning_rate": 4.953854235625587e-06, "loss": 1.7487, "step": 259 }, { "epoch": 0.09, "learning_rate": 4.95331777816685e-06, "loss": 1.6887, "step": 260 }, { "epoch": 0.09, "learning_rate": 4.952778249853192e-06, "loss": 1.8336, "step": 261 }, { "epoch": 0.09, "learning_rate": 4.952235651359949e-06, "loss": 1.7571, "step": 262 }, { "epoch": 0.09, "learning_rate": 4.9516899833663e-06, "loss": 2.3219, "step": 263 }, { "epoch": 0.09, "learning_rate": 4.951141246555265e-06, "loss": 1.8097, "step": 264 }, { "epoch": 0.09, "learning_rate": 4.9505894416137065e-06, "loss": 1.738, "step": 265 }, { "epoch": 0.09, "learning_rate": 4.950034569232327e-06, "loss": 1.8682, "step": 266 }, { "epoch": 0.09, "learning_rate": 4.94947663010567e-06, "loss": 1.7324, "step": 267 }, { "epoch": 0.09, "learning_rate": 4.948915624932114e-06, "loss": 1.8149, "step": 268 }, { "epoch": 0.09, "learning_rate": 4.948351554413879e-06, "loss": 2.1174, "step": 269 }, { "epoch": 0.09, "learning_rate": 4.947784419257021e-06, "loss": 1.9054, "step": 270 }, { "epoch": 0.09, "learning_rate": 4.94721422017143e-06, "loss": 1.8542, "step": 271 }, { "epoch": 0.09, "learning_rate": 4.946640957870835e-06, "loss": 1.964, "step": 272 }, { "epoch": 0.09, "learning_rate": 4.946064633072795e-06, "loss": 1.9568, "step": 273 }, { "epoch": 0.09, "learning_rate": 4.9454852464987054e-06, "loss": 1.6981, "step": 274 }, { "epoch": 0.09, "learning_rate": 4.944902798873794e-06, "loss": 1.7056, "step": 275 }, { "epoch": 0.1, "learning_rate": 4.9443172909271174e-06, "loss": 2.0697, "step": 276 }, { "epoch": 0.1, "learning_rate": 4.943728723391566e-06, "loss": 1.9714, "step": 277 }, { "epoch": 0.1, "learning_rate": 4.943137097003859e-06, "loss": 1.6085, "step": 278 }, { "epoch": 0.1, "learning_rate": 4.942542412504543e-06, "loss": 2.0314, "step": 279 }, { "epoch": 0.1, "learning_rate": 4.941944670637994e-06, "loss": 1.71, "step": 280 }, { "epoch": 0.1, "learning_rate": 4.941343872152415e-06, "loss": 1.792, "step": 281 }, { "epoch": 0.1, "learning_rate": 4.9407400177998335e-06, "loss": 2.2795, "step": 282 }, { "epoch": 0.1, "learning_rate": 4.940133108336105e-06, "loss": 1.8363, "step": 283 }, { "epoch": 0.1, "learning_rate": 4.939523144520905e-06, "loss": 1.6188, "step": 284 }, { "epoch": 0.1, "learning_rate": 4.938910127117735e-06, "loss": 1.5261, "step": 285 }, { "epoch": 0.1, "learning_rate": 4.938294056893919e-06, "loss": 1.7389, "step": 286 }, { "epoch": 0.1, "learning_rate": 4.937674934620601e-06, "loss": 1.5614, "step": 287 }, { "epoch": 0.1, "learning_rate": 4.9370527610727455e-06, "loss": 2.1388, "step": 288 }, { "epoch": 0.1, "learning_rate": 4.936427537029137e-06, "loss": 2.0022, "step": 289 }, { "epoch": 0.1, "learning_rate": 4.935799263272378e-06, "loss": 1.7131, "step": 290 }, { "epoch": 0.1, "learning_rate": 4.935167940588887e-06, "loss": 1.9196, "step": 291 }, { "epoch": 0.1, "learning_rate": 4.934533569768901e-06, "loss": 1.7609, "step": 292 }, { "epoch": 0.1, "learning_rate": 4.933896151606472e-06, "loss": 2.0388, "step": 293 }, { "epoch": 0.1, "learning_rate": 4.933255686899465e-06, "loss": 2.1051, "step": 294 }, { "epoch": 0.1, "learning_rate": 4.93261217644956e-06, "loss": 2.0481, "step": 295 }, { "epoch": 0.1, "learning_rate": 4.9319656210622485e-06, "loss": 1.7111, "step": 296 }, { "epoch": 0.1, "learning_rate": 4.9313160215468336e-06, "loss": 1.8776, "step": 297 }, { "epoch": 0.1, "learning_rate": 4.930663378716429e-06, "loss": 1.9254, "step": 298 }, { "epoch": 0.1, "learning_rate": 4.930007693387957e-06, "loss": 1.5739, "step": 299 }, { "epoch": 0.1, "learning_rate": 4.92934896638215e-06, "loss": 1.8416, "step": 300 }, { "epoch": 0.1, "learning_rate": 4.928687198523546e-06, "loss": 1.816, "step": 301 }, { "epoch": 0.1, "learning_rate": 4.928022390640489e-06, "loss": 1.7281, "step": 302 }, { "epoch": 0.1, "learning_rate": 4.927354543565131e-06, "loss": 1.7129, "step": 303 }, { "epoch": 0.11, "learning_rate": 4.926683658133423e-06, "loss": 1.6382, "step": 304 }, { "epoch": 0.11, "learning_rate": 4.926009735185127e-06, "loss": 1.625, "step": 305 }, { "epoch": 0.11, "learning_rate": 4.925332775563798e-06, "loss": 1.7473, "step": 306 }, { "epoch": 0.11, "learning_rate": 4.924652780116799e-06, "loss": 1.9012, "step": 307 }, { "epoch": 0.11, "learning_rate": 4.9239697496952904e-06, "loss": 1.6865, "step": 308 }, { "epoch": 0.11, "learning_rate": 4.923283685154231e-06, "loss": 1.6992, "step": 309 }, { "epoch": 0.11, "learning_rate": 4.922594587352379e-06, "loss": 1.625, "step": 310 }, { "epoch": 0.11, "learning_rate": 4.921902457152289e-06, "loss": 1.9689, "step": 311 }, { "epoch": 0.11, "learning_rate": 4.921207295420309e-06, "loss": 1.7051, "step": 312 }, { "epoch": 0.11, "learning_rate": 4.9205091030265835e-06, "loss": 2.1389, "step": 313 }, { "epoch": 0.11, "learning_rate": 4.919807880845053e-06, "loss": 1.9307, "step": 314 }, { "epoch": 0.11, "learning_rate": 4.9191036297534455e-06, "loss": 1.572, "step": 315 }, { "epoch": 0.11, "learning_rate": 4.918396350633284e-06, "loss": 1.7227, "step": 316 }, { "epoch": 0.11, "learning_rate": 4.917686044369879e-06, "loss": 1.6267, "step": 317 }, { "epoch": 0.11, "learning_rate": 4.916972711852334e-06, "loss": 1.7241, "step": 318 }, { "epoch": 0.11, "learning_rate": 4.916256353973535e-06, "loss": 1.7939, "step": 319 }, { "epoch": 0.11, "learning_rate": 4.9155369716301605e-06, "loss": 2.0115, "step": 320 }, { "epoch": 0.11, "learning_rate": 4.914814565722671e-06, "loss": 1.6427, "step": 321 }, { "epoch": 0.11, "learning_rate": 4.914089137155314e-06, "loss": 1.7227, "step": 322 }, { "epoch": 0.11, "learning_rate": 4.913360686836117e-06, "loss": 1.681, "step": 323 }, { "epoch": 0.11, "learning_rate": 4.912629215676895e-06, "loss": 1.8427, "step": 324 }, { "epoch": 0.11, "learning_rate": 4.91189472459324e-06, "loss": 1.6587, "step": 325 }, { "epoch": 0.11, "learning_rate": 4.911157214504526e-06, "loss": 2.266, "step": 326 }, { "epoch": 0.11, "learning_rate": 4.9104166863339065e-06, "loss": 1.6953, "step": 327 }, { "epoch": 0.11, "learning_rate": 4.90967314100831e-06, "loss": 1.5491, "step": 328 }, { "epoch": 0.11, "learning_rate": 4.908926579458444e-06, "loss": 1.7158, "step": 329 }, { "epoch": 0.11, "learning_rate": 4.9081770026187915e-06, "loss": 1.6509, "step": 330 }, { "epoch": 0.11, "learning_rate": 4.907424411427608e-06, "loss": 1.8713, "step": 331 }, { "epoch": 0.11, "learning_rate": 4.906668806826925e-06, "loss": 1.8608, "step": 332 }, { "epoch": 0.12, "learning_rate": 4.905910189762543e-06, "loss": 1.6635, "step": 333 }, { "epoch": 0.12, "learning_rate": 4.905148561184033e-06, "loss": 1.7612, "step": 334 }, { "epoch": 0.12, "learning_rate": 4.90438392204474e-06, "loss": 1.8059, "step": 335 }, { "epoch": 0.12, "learning_rate": 4.903616273301773e-06, "loss": 1.828, "step": 336 }, { "epoch": 0.12, "learning_rate": 4.902845615916009e-06, "loss": 1.6395, "step": 337 }, { "epoch": 0.12, "learning_rate": 4.902071950852092e-06, "loss": 2.0361, "step": 338 }, { "epoch": 0.12, "learning_rate": 4.901295279078431e-06, "loss": 1.9907, "step": 339 }, { "epoch": 0.12, "learning_rate": 4.900515601567197e-06, "loss": 1.4902, "step": 340 }, { "epoch": 0.12, "learning_rate": 4.899732919294323e-06, "loss": 1.726, "step": 341 }, { "epoch": 0.12, "learning_rate": 4.8989472332395065e-06, "loss": 1.6626, "step": 342 }, { "epoch": 0.12, "learning_rate": 4.898158544386201e-06, "loss": 1.8291, "step": 343 }, { "epoch": 0.12, "learning_rate": 4.897366853721621e-06, "loss": 1.9325, "step": 344 }, { "epoch": 0.12, "learning_rate": 4.896572162236737e-06, "loss": 2.1219, "step": 345 }, { "epoch": 0.12, "learning_rate": 4.895774470926277e-06, "loss": 1.7058, "step": 346 }, { "epoch": 0.12, "learning_rate": 4.894973780788722e-06, "loss": 1.9067, "step": 347 }, { "epoch": 0.12, "learning_rate": 4.89417009282631e-06, "loss": 1.8574, "step": 348 }, { "epoch": 0.12, "learning_rate": 4.893363408045029e-06, "loss": 1.8591, "step": 349 }, { "epoch": 0.12, "learning_rate": 4.892553727454616e-06, "loss": 1.8195, "step": 350 }, { "epoch": 0.12, "learning_rate": 4.8917410520685635e-06, "loss": 2.1687, "step": 351 }, { "epoch": 0.12, "learning_rate": 4.890925382904108e-06, "loss": 1.6517, "step": 352 }, { "epoch": 0.12, "learning_rate": 4.890106720982235e-06, "loss": 1.5997, "step": 353 }, { "epoch": 0.12, "learning_rate": 4.889285067327676e-06, "loss": 1.8695, "step": 354 }, { "epoch": 0.12, "learning_rate": 4.888460422968908e-06, "loss": 1.7572, "step": 355 }, { "epoch": 0.12, "learning_rate": 4.88763278893815e-06, "loss": 1.6259, "step": 356 }, { "epoch": 0.12, "learning_rate": 4.886802166271365e-06, "loss": 2.2151, "step": 357 }, { "epoch": 0.12, "learning_rate": 4.8859685560082545e-06, "loss": 1.9657, "step": 358 }, { "epoch": 0.12, "learning_rate": 4.885131959192262e-06, "loss": 1.8574, "step": 359 }, { "epoch": 0.12, "learning_rate": 4.884292376870567e-06, "loss": 1.7059, "step": 360 }, { "epoch": 0.12, "learning_rate": 4.883449810094091e-06, "loss": 1.9122, "step": 361 }, { "epoch": 0.13, "learning_rate": 4.882604259917483e-06, "loss": 1.7825, "step": 362 }, { "epoch": 0.13, "learning_rate": 4.881755727399134e-06, "loss": 1.8052, "step": 363 }, { "epoch": 0.13, "learning_rate": 4.880904213601164e-06, "loss": 2.1097, "step": 364 }, { "epoch": 0.13, "learning_rate": 4.880049719589426e-06, "loss": 1.714, "step": 365 }, { "epoch": 0.13, "learning_rate": 4.879192246433502e-06, "loss": 1.6958, "step": 366 }, { "epoch": 0.13, "learning_rate": 4.878331795206705e-06, "loss": 1.7676, "step": 367 }, { "epoch": 0.13, "learning_rate": 4.877468366986074e-06, "loss": 1.6802, "step": 368 }, { "epoch": 0.13, "learning_rate": 4.876601962852378e-06, "loss": 2.1416, "step": 369 }, { "epoch": 0.13, "learning_rate": 4.875732583890104e-06, "loss": 1.7701, "step": 370 }, { "epoch": 0.13, "learning_rate": 4.8748602311874695e-06, "loss": 1.7543, "step": 371 }, { "epoch": 0.13, "learning_rate": 4.87398490583641e-06, "loss": 1.8131, "step": 372 }, { "epoch": 0.13, "learning_rate": 4.873106608932585e-06, "loss": 1.6602, "step": 373 }, { "epoch": 0.13, "learning_rate": 4.872225341575371e-06, "loss": 1.7737, "step": 374 }, { "epoch": 0.13, "learning_rate": 4.8713411048678635e-06, "loss": 1.7151, "step": 375 }, { "epoch": 0.13, "learning_rate": 4.870453899916877e-06, "loss": 2.3633, "step": 376 }, { "epoch": 0.13, "learning_rate": 4.869563727832936e-06, "loss": 1.6776, "step": 377 }, { "epoch": 0.13, "learning_rate": 4.868670589730284e-06, "loss": 1.7703, "step": 378 }, { "epoch": 0.13, "learning_rate": 4.8677744867268764e-06, "loss": 1.6688, "step": 379 }, { "epoch": 0.13, "learning_rate": 4.866875419944378e-06, "loss": 1.7532, "step": 380 }, { "epoch": 0.13, "learning_rate": 4.865973390508164e-06, "loss": 1.6788, "step": 381 }, { "epoch": 0.13, "learning_rate": 4.8650683995473184e-06, "loss": 2.2261, "step": 382 }, { "epoch": 0.13, "learning_rate": 4.864160448194632e-06, "loss": 1.9855, "step": 383 }, { "epoch": 0.13, "learning_rate": 4.863249537586601e-06, "loss": 1.701, "step": 384 }, { "epoch": 0.13, "learning_rate": 4.8623356688634275e-06, "loss": 1.5588, "step": 385 }, { "epoch": 0.13, "learning_rate": 4.861418843169012e-06, "loss": 1.517, "step": 386 }, { "epoch": 0.13, "learning_rate": 4.860499061650962e-06, "loss": 1.7457, "step": 387 }, { "epoch": 0.13, "learning_rate": 4.85957632546058e-06, "loss": 1.9128, "step": 388 }, { "epoch": 0.13, "learning_rate": 4.8586506357528695e-06, "loss": 1.689, "step": 389 }, { "epoch": 0.13, "learning_rate": 4.857721993686531e-06, "loss": 1.6876, "step": 390 }, { "epoch": 0.14, "learning_rate": 4.856790400423958e-06, "loss": 1.7119, "step": 391 }, { "epoch": 0.14, "learning_rate": 4.855855857131242e-06, "loss": 1.6633, "step": 392 }, { "epoch": 0.14, "learning_rate": 4.854918364978163e-06, "loss": 1.7073, "step": 393 }, { "epoch": 0.14, "learning_rate": 4.853977925138195e-06, "loss": 1.9648, "step": 394 }, { "epoch": 0.14, "learning_rate": 4.8530345387885e-06, "loss": 1.8922, "step": 395 }, { "epoch": 0.14, "learning_rate": 4.85208820710993e-06, "loss": 1.8396, "step": 396 }, { "epoch": 0.14, "learning_rate": 4.851138931287024e-06, "loss": 1.6429, "step": 397 }, { "epoch": 0.14, "learning_rate": 4.850186712508002e-06, "loss": 1.6483, "step": 398 }, { "epoch": 0.14, "learning_rate": 4.849231551964771e-06, "loss": 1.6809, "step": 399 }, { "epoch": 0.14, "learning_rate": 4.848273450852921e-06, "loss": 1.6942, "step": 400 }, { "epoch": 0.14, "learning_rate": 4.847312410371723e-06, "loss": 2.2966, "step": 401 }, { "epoch": 0.14, "learning_rate": 4.846348431724123e-06, "loss": 1.7292, "step": 402 }, { "epoch": 0.14, "learning_rate": 4.845381516116749e-06, "loss": 1.7676, "step": 403 }, { "epoch": 0.14, "learning_rate": 4.844411664759903e-06, "loss": 1.6472, "step": 404 }, { "epoch": 0.14, "learning_rate": 4.843438878867563e-06, "loss": 1.7872, "step": 405 }, { "epoch": 0.14, "learning_rate": 4.842463159657381e-06, "loss": 1.8199, "step": 406 }, { "epoch": 0.14, "learning_rate": 4.841484508350679e-06, "loss": 2.0803, "step": 407 }, { "epoch": 0.14, "learning_rate": 4.840502926172449e-06, "loss": 2.0967, "step": 408 }, { "epoch": 0.14, "learning_rate": 4.839518414351352e-06, "loss": 1.8354, "step": 409 }, { "epoch": 0.14, "learning_rate": 4.838530974119717e-06, "loss": 1.8302, "step": 410 }, { "epoch": 0.14, "learning_rate": 4.837540606713538e-06, "loss": 1.6964, "step": 411 }, { "epoch": 0.14, "learning_rate": 4.836547313372472e-06, "loss": 1.7388, "step": 412 }, { "epoch": 0.14, "learning_rate": 4.835551095339839e-06, "loss": 2.069, "step": 413 }, { "epoch": 0.14, "learning_rate": 4.834551953862622e-06, "loss": 1.609, "step": 414 }, { "epoch": 0.14, "learning_rate": 4.83354989019146e-06, "loss": 1.7528, "step": 415 }, { "epoch": 0.14, "learning_rate": 4.832544905580652e-06, "loss": 1.8171, "step": 416 }, { "epoch": 0.14, "learning_rate": 4.831537001288151e-06, "loss": 1.7941, "step": 417 }, { "epoch": 0.14, "learning_rate": 4.830526178575568e-06, "loss": 1.7943, "step": 418 }, { "epoch": 0.14, "learning_rate": 4.829512438708164e-06, "loss": 1.9308, "step": 419 }, { "epoch": 0.15, "learning_rate": 4.8284957829548515e-06, "loss": 1.9082, "step": 420 }, { "epoch": 0.15, "learning_rate": 4.827476212588196e-06, "loss": 1.6912, "step": 421 }, { "epoch": 0.15, "learning_rate": 4.8264537288844084e-06, "loss": 1.4539, "step": 422 }, { "epoch": 0.15, "learning_rate": 4.825428333123346e-06, "loss": 1.6744, "step": 423 }, { "epoch": 0.15, "learning_rate": 4.824400026588513e-06, "loss": 1.7429, "step": 424 }, { "epoch": 0.15, "learning_rate": 4.823368810567056e-06, "loss": 1.6724, "step": 425 }, { "epoch": 0.15, "learning_rate": 4.822334686349765e-06, "loss": 1.9696, "step": 426 }, { "epoch": 0.15, "learning_rate": 4.821297655231067e-06, "loss": 1.6907, "step": 427 }, { "epoch": 0.15, "learning_rate": 4.820257718509029e-06, "loss": 1.6188, "step": 428 }, { "epoch": 0.15, "learning_rate": 4.8192148774853575e-06, "loss": 1.5756, "step": 429 }, { "epoch": 0.15, "learning_rate": 4.818169133465392e-06, "loss": 1.6631, "step": 430 }, { "epoch": 0.15, "learning_rate": 4.817120487758105e-06, "loss": 2.0249, "step": 431 }, { "epoch": 0.15, "learning_rate": 4.816068941676103e-06, "loss": 2.0798, "step": 432 }, { "epoch": 0.15, "learning_rate": 4.815014496535622e-06, "loss": 1.6232, "step": 433 }, { "epoch": 0.15, "learning_rate": 4.813957153656527e-06, "loss": 1.5571, "step": 434 }, { "epoch": 0.15, "learning_rate": 4.81289691436231e-06, "loss": 1.7389, "step": 435 }, { "epoch": 0.15, "learning_rate": 4.8118337799800875e-06, "loss": 1.6548, "step": 436 }, { "epoch": 0.15, "learning_rate": 4.8107677518406025e-06, "loss": 1.6433, "step": 437 }, { "epoch": 0.15, "learning_rate": 4.809698831278217e-06, "loss": 1.8896, "step": 438 }, { "epoch": 0.15, "learning_rate": 4.8086270196309174e-06, "loss": 1.8723, "step": 439 }, { "epoch": 0.15, "learning_rate": 4.807552318240304e-06, "loss": 1.6306, "step": 440 }, { "epoch": 0.15, "learning_rate": 4.806474728451598e-06, "loss": 1.7194, "step": 441 }, { "epoch": 0.15, "learning_rate": 4.805394251613633e-06, "loss": 1.7053, "step": 442 }, { "epoch": 0.15, "learning_rate": 4.804310889078861e-06, "loss": 1.6339, "step": 443 }, { "epoch": 0.15, "learning_rate": 4.803224642203342e-06, "loss": 1.7432, "step": 444 }, { "epoch": 0.15, "learning_rate": 4.8021355123467476e-06, "loss": 1.7236, "step": 445 }, { "epoch": 0.15, "learning_rate": 4.8010435008723585e-06, "loss": 1.8425, "step": 446 }, { "epoch": 0.15, "learning_rate": 4.799948609147061e-06, "loss": 1.6771, "step": 447 }, { "epoch": 0.15, "learning_rate": 4.798850838541348e-06, "loss": 1.7214, "step": 448 }, { "epoch": 0.16, "learning_rate": 4.797750190429316e-06, "loss": 1.7185, "step": 449 }, { "epoch": 0.16, "learning_rate": 4.796646666188663e-06, "loss": 1.5056, "step": 450 }, { "epoch": 0.16, "learning_rate": 4.7955402672006855e-06, "loss": 1.929, "step": 451 }, { "epoch": 0.16, "learning_rate": 4.794430994850281e-06, "loss": 1.5205, "step": 452 }, { "epoch": 0.16, "learning_rate": 4.793318850525943e-06, "loss": 1.6604, "step": 453 }, { "epoch": 0.16, "learning_rate": 4.792203835619758e-06, "loss": 1.8004, "step": 454 }, { "epoch": 0.16, "learning_rate": 4.791085951527408e-06, "loss": 1.6337, "step": 455 }, { "epoch": 0.16, "learning_rate": 4.789965199648165e-06, "loss": 1.7939, "step": 456 }, { "epoch": 0.16, "learning_rate": 4.788841581384891e-06, "loss": 1.9037, "step": 457 }, { "epoch": 0.16, "learning_rate": 4.7877150981440365e-06, "loss": 1.772, "step": 458 }, { "epoch": 0.16, "learning_rate": 4.7865857513356365e-06, "loss": 1.6724, "step": 459 }, { "epoch": 0.16, "learning_rate": 4.785453542373313e-06, "loss": 1.6775, "step": 460 }, { "epoch": 0.16, "learning_rate": 4.784318472674267e-06, "loss": 1.6168, "step": 461 }, { "epoch": 0.16, "learning_rate": 4.783180543659285e-06, "loss": 1.9585, "step": 462 }, { "epoch": 0.16, "learning_rate": 4.782039756752728e-06, "loss": 1.7933, "step": 463 }, { "epoch": 0.16, "learning_rate": 4.780896113382536e-06, "loss": 2.0927, "step": 464 }, { "epoch": 0.16, "learning_rate": 4.779749614980225e-06, "loss": 1.6718, "step": 465 }, { "epoch": 0.16, "learning_rate": 4.778600262980886e-06, "loss": 1.4799, "step": 466 }, { "epoch": 0.16, "learning_rate": 4.777448058823179e-06, "loss": 1.8669, "step": 467 }, { "epoch": 0.16, "learning_rate": 4.776293003949335e-06, "loss": 1.8411, "step": 468 }, { "epoch": 0.16, "learning_rate": 4.775135099805154e-06, "loss": 1.7786, "step": 469 }, { "epoch": 0.16, "learning_rate": 4.7739743478400015e-06, "loss": 1.9187, "step": 470 }, { "epoch": 0.16, "learning_rate": 4.77281074950681e-06, "loss": 1.5656, "step": 471 }, { "epoch": 0.16, "learning_rate": 4.771644306262071e-06, "loss": 1.739, "step": 472 }, { "epoch": 0.16, "learning_rate": 4.77047501956584e-06, "loss": 1.6261, "step": 473 }, { "epoch": 0.16, "learning_rate": 4.769302890881732e-06, "loss": 1.7069, "step": 474 }, { "epoch": 0.16, "learning_rate": 4.768127921676916e-06, "loss": 1.998, "step": 475 }, { "epoch": 0.16, "learning_rate": 4.766950113422121e-06, "loss": 2.4265, "step": 476 }, { "epoch": 0.16, "learning_rate": 4.765769467591626e-06, "loss": 1.7155, "step": 477 }, { "epoch": 0.17, "learning_rate": 4.764585985663263e-06, "loss": 1.8049, "step": 478 }, { "epoch": 0.17, "learning_rate": 4.763399669118414e-06, "loss": 1.8051, "step": 479 }, { "epoch": 0.17, "learning_rate": 4.76221051944201e-06, "loss": 1.812, "step": 480 }, { "epoch": 0.17, "learning_rate": 4.761018538122528e-06, "loss": 1.6136, "step": 481 }, { "epoch": 0.17, "learning_rate": 4.759823726651989e-06, "loss": 2.1241, "step": 482 }, { "epoch": 0.17, "learning_rate": 4.758626086525956e-06, "loss": 1.818, "step": 483 }, { "epoch": 0.17, "learning_rate": 4.757425619243533e-06, "loss": 1.647, "step": 484 }, { "epoch": 0.17, "learning_rate": 4.756222326307364e-06, "loss": 1.5809, "step": 485 }, { "epoch": 0.17, "learning_rate": 4.755016209223629e-06, "loss": 1.8387, "step": 486 }, { "epoch": 0.17, "learning_rate": 4.753807269502041e-06, "loss": 1.7245, "step": 487 }, { "epoch": 0.17, "learning_rate": 4.752595508655849e-06, "loss": 2.1196, "step": 488 }, { "epoch": 0.17, "learning_rate": 4.751380928201834e-06, "loss": 1.9556, "step": 489 }, { "epoch": 0.17, "learning_rate": 4.750163529660303e-06, "loss": 1.5474, "step": 490 }, { "epoch": 0.17, "learning_rate": 4.7489433145550935e-06, "loss": 1.7289, "step": 491 }, { "epoch": 0.17, "learning_rate": 4.747720284413565e-06, "loss": 1.6249, "step": 492 }, { "epoch": 0.17, "learning_rate": 4.746494440766605e-06, "loss": 1.6211, "step": 493 }, { "epoch": 0.17, "learning_rate": 4.7452657851486195e-06, "loss": 2.1083, "step": 494 }, { "epoch": 0.17, "learning_rate": 4.744034319097536e-06, "loss": 1.8129, "step": 495 }, { "epoch": 0.17, "learning_rate": 4.742800044154797e-06, "loss": 1.7332, "step": 496 }, { "epoch": 0.17, "learning_rate": 4.7415629618653655e-06, "loss": 1.8232, "step": 497 }, { "epoch": 0.17, "learning_rate": 4.740323073777716e-06, "loss": 1.568, "step": 498 }, { "epoch": 0.17, "learning_rate": 4.739080381443834e-06, "loss": 1.6915, "step": 499 }, { "epoch": 0.17, "learning_rate": 4.737834886419217e-06, "loss": 1.6617, "step": 500 }, { "epoch": 0.17, "learning_rate": 4.736586590262869e-06, "loss": 1.9706, "step": 501 }, { "epoch": 0.17, "learning_rate": 4.735335494537302e-06, "loss": 1.7797, "step": 502 }, { "epoch": 0.17, "learning_rate": 4.734081600808531e-06, "loss": 1.6351, "step": 503 }, { "epoch": 0.17, "learning_rate": 4.732824910646074e-06, "loss": 1.7429, "step": 504 }, { "epoch": 0.17, "learning_rate": 4.7315654256229495e-06, "loss": 1.8927, "step": 505 }, { "epoch": 0.17, "learning_rate": 4.730303147315672e-06, "loss": 1.6012, "step": 506 }, { "epoch": 0.18, "learning_rate": 4.7290380773042575e-06, "loss": 2.0358, "step": 507 }, { "epoch": 0.18, "learning_rate": 4.727770217172211e-06, "loss": 1.5986, "step": 508 }, { "epoch": 0.18, "learning_rate": 4.726499568506532e-06, "loss": 1.6857, "step": 509 }, { "epoch": 0.18, "learning_rate": 4.725226132897711e-06, "loss": 1.6968, "step": 510 }, { "epoch": 0.18, "learning_rate": 4.723949911939728e-06, "loss": 1.7529, "step": 511 }, { "epoch": 0.18, "learning_rate": 4.722670907230047e-06, "loss": 1.475, "step": 512 }, { "epoch": 0.18, "learning_rate": 4.721389120369617e-06, "loss": 1.9364, "step": 513 }, { "epoch": 0.18, "learning_rate": 4.72010455296287e-06, "loss": 1.7943, "step": 514 }, { "epoch": 0.18, "learning_rate": 4.718817206617719e-06, "loss": 1.6116, "step": 515 }, { "epoch": 0.18, "learning_rate": 4.717527082945555e-06, "loss": 1.5585, "step": 516 }, { "epoch": 0.18, "learning_rate": 4.716234183561245e-06, "loss": 1.8351, "step": 517 }, { "epoch": 0.18, "learning_rate": 4.71493851008313e-06, "loss": 1.5905, "step": 518 }, { "epoch": 0.18, "learning_rate": 4.7136400641330245e-06, "loss": 2.0134, "step": 519 }, { "epoch": 0.18, "learning_rate": 4.712338847336214e-06, "loss": 1.7638, "step": 520 }, { "epoch": 0.18, "learning_rate": 4.71103486132145e-06, "loss": 1.5477, "step": 521 }, { "epoch": 0.18, "learning_rate": 4.7097281077209525e-06, "loss": 1.9656, "step": 522 }, { "epoch": 0.18, "learning_rate": 4.708418588170403e-06, "loss": 1.8575, "step": 523 }, { "epoch": 0.18, "learning_rate": 4.7071063043089496e-06, "loss": 1.8024, "step": 524 }, { "epoch": 0.18, "learning_rate": 4.705791257779196e-06, "loss": 1.8038, "step": 525 }, { "epoch": 0.18, "learning_rate": 4.704473450227205e-06, "loss": 1.8387, "step": 526 }, { "epoch": 0.18, "learning_rate": 4.703152883302498e-06, "loss": 1.7745, "step": 527 }, { "epoch": 0.18, "learning_rate": 4.701829558658047e-06, "loss": 1.8134, "step": 528 }, { "epoch": 0.18, "learning_rate": 4.700503477950278e-06, "loss": 1.8501, "step": 529 }, { "epoch": 0.18, "learning_rate": 4.699174642839067e-06, "loss": 1.5815, "step": 530 }, { "epoch": 0.18, "learning_rate": 4.697843054987738e-06, "loss": 1.8564, "step": 531 }, { "epoch": 0.18, "learning_rate": 4.696508716063055e-06, "loss": 1.9713, "step": 532 }, { "epoch": 0.18, "learning_rate": 4.695171627735236e-06, "loss": 1.5959, "step": 533 }, { "epoch": 0.18, "learning_rate": 4.693831791677931e-06, "loss": 1.6866, "step": 534 }, { "epoch": 0.18, "learning_rate": 4.692489209568234e-06, "loss": 1.524, "step": 535 }, { "epoch": 0.19, "learning_rate": 4.691143883086675e-06, "loss": 1.701, "step": 536 }, { "epoch": 0.19, "learning_rate": 4.68979581391722e-06, "loss": 1.3861, "step": 537 }, { "epoch": 0.19, "learning_rate": 4.6884450037472675e-06, "loss": 2.2195, "step": 538 }, { "epoch": 0.19, "learning_rate": 4.6870914542676464e-06, "loss": 1.9835, "step": 539 }, { "epoch": 0.19, "learning_rate": 4.685735167172616e-06, "loss": 1.646, "step": 540 }, { "epoch": 0.19, "learning_rate": 4.684376144159861e-06, "loss": 1.7354, "step": 541 }, { "epoch": 0.19, "learning_rate": 4.6830143869304904e-06, "loss": 1.6039, "step": 542 }, { "epoch": 0.19, "learning_rate": 4.681649897189036e-06, "loss": 1.6527, "step": 543 }, { "epoch": 0.19, "learning_rate": 4.680282676643452e-06, "loss": 1.7144, "step": 544 }, { "epoch": 0.19, "learning_rate": 4.678912727005107e-06, "loss": 1.863, "step": 545 }, { "epoch": 0.19, "learning_rate": 4.6775400499887894e-06, "loss": 1.5349, "step": 546 }, { "epoch": 0.19, "learning_rate": 4.676164647312699e-06, "loss": 1.6719, "step": 547 }, { "epoch": 0.19, "learning_rate": 4.674786520698448e-06, "loss": 1.6122, "step": 548 }, { "epoch": 0.19, "learning_rate": 4.673405671871057e-06, "loss": 1.7246, "step": 549 }, { "epoch": 0.19, "learning_rate": 4.672022102558958e-06, "loss": 1.8107, "step": 550 }, { "epoch": 0.19, "learning_rate": 4.670635814493985e-06, "loss": 2.1741, "step": 551 }, { "epoch": 0.19, "learning_rate": 4.669246809411373e-06, "loss": 1.8389, "step": 552 }, { "epoch": 0.19, "learning_rate": 4.667855089049764e-06, "loss": 1.6061, "step": 553 }, { "epoch": 0.19, "learning_rate": 4.666460655151194e-06, "loss": 1.611, "step": 554 }, { "epoch": 0.19, "learning_rate": 4.665063509461098e-06, "loss": 1.7712, "step": 555 }, { "epoch": 0.19, "learning_rate": 4.663663653728301e-06, "loss": 1.5791, "step": 556 }, { "epoch": 0.19, "learning_rate": 4.662261089705027e-06, "loss": 2.1592, "step": 557 }, { "epoch": 0.19, "learning_rate": 4.660855819146887e-06, "loss": 1.819, "step": 558 }, { "epoch": 0.19, "learning_rate": 4.659447843812876e-06, "loss": 1.6985, "step": 559 }, { "epoch": 0.19, "learning_rate": 4.65803716546538e-06, "loss": 1.636, "step": 560 }, { "epoch": 0.19, "learning_rate": 4.656623785870168e-06, "loss": 1.9989, "step": 561 }, { "epoch": 0.19, "learning_rate": 4.655207706796385e-06, "loss": 1.7091, "step": 562 }, { "epoch": 0.19, "learning_rate": 4.653788930016562e-06, "loss": 1.8801, "step": 563 }, { "epoch": 0.19, "learning_rate": 4.6523674573066e-06, "loss": 1.6505, "step": 564 }, { "epoch": 0.2, "learning_rate": 4.6509432904457815e-06, "loss": 1.6475, "step": 565 }, { "epoch": 0.2, "learning_rate": 4.649516431216755e-06, "loss": 1.6565, "step": 566 }, { "epoch": 0.2, "learning_rate": 4.648086881405542e-06, "loss": 1.772, "step": 567 }, { "epoch": 0.2, "learning_rate": 4.646654642801533e-06, "loss": 1.7118, "step": 568 }, { "epoch": 0.2, "learning_rate": 4.645219717197483e-06, "loss": 1.8055, "step": 569 }, { "epoch": 0.2, "learning_rate": 4.643782106389506e-06, "loss": 1.868, "step": 570 }, { "epoch": 0.2, "learning_rate": 4.642341812177086e-06, "loss": 1.6547, "step": 571 }, { "epoch": 0.2, "learning_rate": 4.640898836363059e-06, "loss": 1.8109, "step": 572 }, { "epoch": 0.2, "learning_rate": 4.639453180753619e-06, "loss": 1.7761, "step": 573 }, { "epoch": 0.2, "learning_rate": 4.638004847158316e-06, "loss": 1.6091, "step": 574 }, { "epoch": 0.2, "learning_rate": 4.636553837390051e-06, "loss": 1.5925, "step": 575 }, { "epoch": 0.2, "learning_rate": 4.635100153265075e-06, "loss": 1.9025, "step": 576 }, { "epoch": 0.2, "learning_rate": 4.633643796602985e-06, "loss": 1.7584, "step": 577 }, { "epoch": 0.2, "learning_rate": 4.632184769226727e-06, "loss": 1.658, "step": 578 }, { "epoch": 0.2, "learning_rate": 4.630723072962584e-06, "loss": 1.7423, "step": 579 }, { "epoch": 0.2, "learning_rate": 4.6292587096401866e-06, "loss": 1.7279, "step": 580 }, { "epoch": 0.2, "learning_rate": 4.627791681092499e-06, "loss": 1.6208, "step": 581 }, { "epoch": 0.2, "learning_rate": 4.6263219891558245e-06, "loss": 2.0728, "step": 582 }, { "epoch": 0.2, "learning_rate": 4.624849635669797e-06, "loss": 1.7264, "step": 583 }, { "epoch": 0.2, "learning_rate": 4.623374622477384e-06, "loss": 1.8367, "step": 584 }, { "epoch": 0.2, "learning_rate": 4.621896951424882e-06, "loss": 1.802, "step": 585 }, { "epoch": 0.2, "learning_rate": 4.620416624361914e-06, "loss": 1.66, "step": 586 }, { "epoch": 0.2, "learning_rate": 4.6189336431414275e-06, "loss": 1.6498, "step": 587 }, { "epoch": 0.2, "learning_rate": 4.6174480096196936e-06, "loss": 1.9319, "step": 588 }, { "epoch": 0.2, "learning_rate": 4.615959725656301e-06, "loss": 1.73, "step": 589 }, { "epoch": 0.2, "learning_rate": 4.614468793114158e-06, "loss": 1.7504, "step": 590 }, { "epoch": 0.2, "learning_rate": 4.612975213859487e-06, "loss": 1.7698, "step": 591 }, { "epoch": 0.2, "learning_rate": 4.611478989761824e-06, "loss": 1.4825, "step": 592 }, { "epoch": 0.2, "learning_rate": 4.609980122694015e-06, "loss": 1.6835, "step": 593 }, { "epoch": 0.21, "learning_rate": 4.608478614532215e-06, "loss": 2.0554, "step": 594 }, { "epoch": 0.21, "learning_rate": 4.606974467155884e-06, "loss": 1.9517, "step": 595 }, { "epoch": 0.21, "learning_rate": 4.605467682447785e-06, "loss": 1.6522, "step": 596 }, { "epoch": 0.21, "learning_rate": 4.6039582622939855e-06, "loss": 1.6697, "step": 597 }, { "epoch": 0.21, "learning_rate": 4.602446208583847e-06, "loss": 1.7078, "step": 598 }, { "epoch": 0.21, "learning_rate": 4.600931523210032e-06, "loss": 1.6674, "step": 599 }, { "epoch": 0.21, "learning_rate": 4.5994142080684956e-06, "loss": 1.7052, "step": 600 }, { "epoch": 0.21, "learning_rate": 4.597894265058481e-06, "loss": 1.7789, "step": 601 }, { "epoch": 0.21, "learning_rate": 4.596371696082527e-06, "loss": 1.8427, "step": 602 }, { "epoch": 0.21, "learning_rate": 4.594846503046453e-06, "loss": 1.6661, "step": 603 }, { "epoch": 0.21, "learning_rate": 4.593318687859369e-06, "loss": 1.8199, "step": 604 }, { "epoch": 0.21, "learning_rate": 4.591788252433664e-06, "loss": 1.6559, "step": 605 }, { "epoch": 0.21, "learning_rate": 4.590255198685006e-06, "loss": 1.7515, "step": 606 }, { "epoch": 0.21, "learning_rate": 4.588719528532342e-06, "loss": 2.0728, "step": 607 }, { "epoch": 0.21, "learning_rate": 4.587181243897893e-06, "loss": 1.9795, "step": 608 }, { "epoch": 0.21, "learning_rate": 4.585640346707153e-06, "loss": 1.7863, "step": 609 }, { "epoch": 0.21, "learning_rate": 4.584096838888889e-06, "loss": 1.5801, "step": 610 }, { "epoch": 0.21, "learning_rate": 4.5825507223751294e-06, "loss": 1.6801, "step": 611 }, { "epoch": 0.21, "learning_rate": 4.581001999101173e-06, "loss": 1.743, "step": 612 }, { "epoch": 0.21, "learning_rate": 4.579450671005581e-06, "loss": 2.0109, "step": 613 }, { "epoch": 0.21, "learning_rate": 4.577896740030174e-06, "loss": 1.8135, "step": 614 }, { "epoch": 0.21, "learning_rate": 4.5763402081200295e-06, "loss": 1.5588, "step": 615 }, { "epoch": 0.21, "learning_rate": 4.574781077223483e-06, "loss": 1.7463, "step": 616 }, { "epoch": 0.21, "learning_rate": 4.5732193492921225e-06, "loss": 1.5564, "step": 617 }, { "epoch": 0.21, "learning_rate": 4.571655026280785e-06, "loss": 1.6451, "step": 618 }, { "epoch": 0.21, "learning_rate": 4.570088110147559e-06, "loss": 1.8606, "step": 619 }, { "epoch": 0.21, "learning_rate": 4.568518602853776e-06, "loss": 1.7297, "step": 620 }, { "epoch": 0.21, "learning_rate": 4.5669465063640135e-06, "loss": 1.5826, "step": 621 }, { "epoch": 0.21, "learning_rate": 4.565371822646086e-06, "loss": 1.7098, "step": 622 }, { "epoch": 0.22, "learning_rate": 4.56379455367105e-06, "loss": 1.5327, "step": 623 }, { "epoch": 0.22, "learning_rate": 4.562214701413199e-06, "loss": 1.6626, "step": 624 }, { "epoch": 0.22, "learning_rate": 4.560632267850054e-06, "loss": 1.9491, "step": 625 }, { "epoch": 0.22, "learning_rate": 4.559047254962373e-06, "loss": 2.2792, "step": 626 }, { "epoch": 0.22, "learning_rate": 4.5574596647341414e-06, "loss": 1.8392, "step": 627 }, { "epoch": 0.22, "learning_rate": 4.555869499152569e-06, "loss": 1.531, "step": 628 }, { "epoch": 0.22, "learning_rate": 4.55427676020809e-06, "loss": 1.7469, "step": 629 }, { "epoch": 0.22, "learning_rate": 4.55268144989436e-06, "loss": 1.7002, "step": 630 }, { "epoch": 0.22, "learning_rate": 4.551083570208251e-06, "loss": 1.821, "step": 631 }, { "epoch": 0.22, "learning_rate": 4.549483123149857e-06, "loss": 2.1182, "step": 632 }, { "epoch": 0.22, "learning_rate": 4.54788011072248e-06, "loss": 1.8763, "step": 633 }, { "epoch": 0.22, "learning_rate": 4.546274534932634e-06, "loss": 1.6222, "step": 634 }, { "epoch": 0.22, "learning_rate": 4.544666397790043e-06, "loss": 1.796, "step": 635 }, { "epoch": 0.22, "learning_rate": 4.543055701307637e-06, "loss": 1.6084, "step": 636 }, { "epoch": 0.22, "learning_rate": 4.541442447501549e-06, "loss": 2.035, "step": 637 }, { "epoch": 0.22, "learning_rate": 4.539826638391113e-06, "loss": 1.9061, "step": 638 }, { "epoch": 0.22, "learning_rate": 4.538208275998861e-06, "loss": 2.0668, "step": 639 }, { "epoch": 0.22, "learning_rate": 4.536587362350523e-06, "loss": 1.6511, "step": 640 }, { "epoch": 0.22, "learning_rate": 4.53496389947502e-06, "loss": 1.6191, "step": 641 }, { "epoch": 0.22, "learning_rate": 4.533337889404466e-06, "loss": 1.9313, "step": 642 }, { "epoch": 0.22, "learning_rate": 4.531709334174161e-06, "loss": 1.6699, "step": 643 }, { "epoch": 0.22, "learning_rate": 4.5300782358225935e-06, "loss": 1.7368, "step": 644 }, { "epoch": 0.22, "learning_rate": 4.528444596391433e-06, "loss": 1.9272, "step": 645 }, { "epoch": 0.22, "learning_rate": 4.526808417925531e-06, "loss": 2.0154, "step": 646 }, { "epoch": 0.22, "learning_rate": 4.525169702472917e-06, "loss": 1.7787, "step": 647 }, { "epoch": 0.22, "learning_rate": 4.523528452084796e-06, "loss": 1.7227, "step": 648 }, { "epoch": 0.22, "learning_rate": 4.521884668815545e-06, "loss": 1.7294, "step": 649 }, { "epoch": 0.22, "learning_rate": 4.5202383547227134e-06, "loss": 1.5785, "step": 650 }, { "epoch": 0.22, "learning_rate": 4.518589511867018e-06, "loss": 2.1024, "step": 651 }, { "epoch": 0.23, "learning_rate": 4.516938142312338e-06, "loss": 1.5596, "step": 652 }, { "epoch": 0.23, "learning_rate": 4.5152842481257186e-06, "loss": 1.6089, "step": 653 }, { "epoch": 0.23, "learning_rate": 4.513627831377365e-06, "loss": 1.6814, "step": 654 }, { "epoch": 0.23, "learning_rate": 4.511968894140639e-06, "loss": 1.5472, "step": 655 }, { "epoch": 0.23, "learning_rate": 4.510307438492057e-06, "loss": 1.6272, "step": 656 }, { "epoch": 0.23, "learning_rate": 4.508643466511286e-06, "loss": 1.8386, "step": 657 }, { "epoch": 0.23, "learning_rate": 4.506976980281149e-06, "loss": 1.4637, "step": 658 }, { "epoch": 0.23, "learning_rate": 4.50530798188761e-06, "loss": 1.632, "step": 659 }, { "epoch": 0.23, "learning_rate": 4.503636473419779e-06, "loss": 1.6591, "step": 660 }, { "epoch": 0.23, "learning_rate": 4.501962456969908e-06, "loss": 1.8883, "step": 661 }, { "epoch": 0.23, "learning_rate": 4.500285934633391e-06, "loss": 1.827, "step": 662 }, { "epoch": 0.23, "learning_rate": 4.498606908508754e-06, "loss": 1.8048, "step": 663 }, { "epoch": 0.23, "learning_rate": 4.496925380697659e-06, "loss": 1.8291, "step": 664 }, { "epoch": 0.23, "learning_rate": 4.495241353304902e-06, "loss": 1.635, "step": 665 }, { "epoch": 0.23, "learning_rate": 4.4935548284384044e-06, "loss": 1.4899, "step": 666 }, { "epoch": 0.23, "learning_rate": 4.491865808209215e-06, "loss": 1.6028, "step": 667 }, { "epoch": 0.23, "learning_rate": 4.490174294731506e-06, "loss": 1.6913, "step": 668 }, { "epoch": 0.23, "learning_rate": 4.48848029012257e-06, "loss": 1.8993, "step": 669 }, { "epoch": 0.23, "learning_rate": 4.486783796502819e-06, "loss": 1.8142, "step": 670 }, { "epoch": 0.23, "learning_rate": 4.485084815995778e-06, "loss": 1.572, "step": 671 }, { "epoch": 0.23, "learning_rate": 4.4833833507280884e-06, "loss": 1.7372, "step": 672 }, { "epoch": 0.23, "learning_rate": 4.481679402829499e-06, "loss": 1.641, "step": 673 }, { "epoch": 0.23, "learning_rate": 4.479972974432867e-06, "loss": 1.6334, "step": 674 }, { "epoch": 0.23, "learning_rate": 4.478264067674155e-06, "loss": 1.8, "step": 675 }, { "epoch": 0.23, "learning_rate": 4.476552684692428e-06, "loss": 2.1053, "step": 676 }, { "epoch": 0.23, "learning_rate": 4.474838827629848e-06, "loss": 1.7045, "step": 677 }, { "epoch": 0.23, "learning_rate": 4.473122498631679e-06, "loss": 1.8032, "step": 678 }, { "epoch": 0.23, "learning_rate": 4.4714036998462715e-06, "loss": 1.6067, "step": 679 }, { "epoch": 0.23, "learning_rate": 4.469682433425076e-06, "loss": 1.9213, "step": 680 }, { "epoch": 0.24, "learning_rate": 4.4679587015226255e-06, "loss": 1.6627, "step": 681 }, { "epoch": 0.24, "learning_rate": 4.4662325062965425e-06, "loss": 1.9966, "step": 682 }, { "epoch": 0.24, "learning_rate": 4.46450384990753e-06, "loss": 1.9271, "step": 683 }, { "epoch": 0.24, "learning_rate": 4.462772734519375e-06, "loss": 1.5444, "step": 684 }, { "epoch": 0.24, "learning_rate": 4.46103916229894e-06, "loss": 1.4606, "step": 685 }, { "epoch": 0.24, "learning_rate": 4.459303135416163e-06, "loss": 1.7762, "step": 686 }, { "epoch": 0.24, "learning_rate": 4.457564656044056e-06, "loss": 1.8307, "step": 687 }, { "epoch": 0.24, "learning_rate": 4.4558237263586995e-06, "loss": 2.0927, "step": 688 }, { "epoch": 0.24, "learning_rate": 4.454080348539241e-06, "loss": 1.79, "step": 689 }, { "epoch": 0.24, "learning_rate": 4.452334524767895e-06, "loss": 1.7286, "step": 690 }, { "epoch": 0.24, "learning_rate": 4.450586257229932e-06, "loss": 1.7914, "step": 691 }, { "epoch": 0.24, "learning_rate": 4.448835548113688e-06, "loss": 1.7086, "step": 692 }, { "epoch": 0.24, "learning_rate": 4.447082399610549e-06, "loss": 1.7506, "step": 693 }, { "epoch": 0.24, "learning_rate": 4.445326813914962e-06, "loss": 1.7959, "step": 694 }, { "epoch": 0.24, "learning_rate": 4.443568793224415e-06, "loss": 1.8273, "step": 695 }, { "epoch": 0.24, "learning_rate": 4.441808339739453e-06, "loss": 1.6317, "step": 696 }, { "epoch": 0.24, "learning_rate": 4.44004545566366e-06, "loss": 1.5846, "step": 697 }, { "epoch": 0.24, "learning_rate": 4.438280143203665e-06, "loss": 1.5472, "step": 698 }, { "epoch": 0.24, "learning_rate": 4.436512404569136e-06, "loss": 1.7263, "step": 699 }, { "epoch": 0.24, "learning_rate": 4.43474224197278e-06, "loss": 1.7725, "step": 700 }, { "epoch": 0.24, "learning_rate": 4.432969657630335e-06, "loss": 2.0526, "step": 701 }, { "epoch": 0.24, "learning_rate": 4.431194653760571e-06, "loss": 1.7046, "step": 702 }, { "epoch": 0.24, "learning_rate": 4.429417232585288e-06, "loss": 1.7502, "step": 703 }, { "epoch": 0.24, "learning_rate": 4.427637396329311e-06, "loss": 1.6365, "step": 704 }, { "epoch": 0.24, "learning_rate": 4.425855147220487e-06, "loss": 1.5908, "step": 705 }, { "epoch": 0.24, "learning_rate": 4.424070487489685e-06, "loss": 1.5892, "step": 706 }, { "epoch": 0.24, "learning_rate": 4.42228341937079e-06, "loss": 1.9229, "step": 707 }, { "epoch": 0.24, "learning_rate": 4.420493945100702e-06, "loss": 1.7855, "step": 708 }, { "epoch": 0.24, "learning_rate": 4.418702066919333e-06, "loss": 1.8712, "step": 709 }, { "epoch": 0.25, "learning_rate": 4.416907787069605e-06, "loss": 1.6011, "step": 710 }, { "epoch": 0.25, "learning_rate": 4.415111107797445e-06, "loss": 1.624, "step": 711 }, { "epoch": 0.25, "learning_rate": 4.413312031351784e-06, "loss": 1.7113, "step": 712 }, { "epoch": 0.25, "learning_rate": 4.411510559984551e-06, "loss": 2.0076, "step": 713 }, { "epoch": 0.25, "learning_rate": 4.4097066959506765e-06, "loss": 1.8783, "step": 714 }, { "epoch": 0.25, "learning_rate": 4.407900441508084e-06, "loss": 1.5155, "step": 715 }, { "epoch": 0.25, "learning_rate": 4.406091798917689e-06, "loss": 1.8436, "step": 716 }, { "epoch": 0.25, "learning_rate": 4.404280770443398e-06, "loss": 1.6487, "step": 717 }, { "epoch": 0.25, "learning_rate": 4.4024673583521024e-06, "loss": 1.7977, "step": 718 }, { "epoch": 0.25, "learning_rate": 4.400651564913676e-06, "loss": 1.8883, "step": 719 }, { "epoch": 0.25, "learning_rate": 4.398833392400977e-06, "loss": 1.748, "step": 720 }, { "epoch": 0.25, "learning_rate": 4.3970128430898385e-06, "loss": 1.98, "step": 721 }, { "epoch": 0.25, "learning_rate": 4.395189919259069e-06, "loss": 1.937, "step": 722 }, { "epoch": 0.25, "learning_rate": 4.3933646231904505e-06, "loss": 1.7778, "step": 723 }, { "epoch": 0.25, "learning_rate": 4.391536957168733e-06, "loss": 1.7026, "step": 724 }, { "epoch": 0.25, "learning_rate": 4.389706923481633e-06, "loss": 1.656, "step": 725 }, { "epoch": 0.25, "learning_rate": 4.387874524419835e-06, "loss": 2.1406, "step": 726 }, { "epoch": 0.25, "learning_rate": 4.386039762276976e-06, "loss": 1.7344, "step": 727 }, { "epoch": 0.25, "learning_rate": 4.384202639349658e-06, "loss": 1.585, "step": 728 }, { "epoch": 0.25, "learning_rate": 4.382363157937436e-06, "loss": 1.531, "step": 729 }, { "epoch": 0.25, "learning_rate": 4.380521320342815e-06, "loss": 1.7162, "step": 730 }, { "epoch": 0.25, "learning_rate": 4.378677128871251e-06, "loss": 1.7798, "step": 731 }, { "epoch": 0.25, "learning_rate": 4.3768305858311465e-06, "loss": 1.9088, "step": 732 }, { "epoch": 0.25, "learning_rate": 4.3749816935338485e-06, "loss": 1.8926, "step": 733 }, { "epoch": 0.25, "learning_rate": 4.373130454293643e-06, "loss": 1.5885, "step": 734 }, { "epoch": 0.25, "learning_rate": 4.3712768704277535e-06, "loss": 1.7736, "step": 735 }, { "epoch": 0.25, "learning_rate": 4.369420944256338e-06, "loss": 1.858, "step": 736 }, { "epoch": 0.25, "learning_rate": 4.367562678102491e-06, "loss": 1.5923, "step": 737 }, { "epoch": 0.25, "learning_rate": 4.365702074292227e-06, "loss": 1.8184, "step": 738 }, { "epoch": 0.26, "learning_rate": 4.363839135154497e-06, "loss": 1.7145, "step": 739 }, { "epoch": 0.26, "learning_rate": 4.361973863021167e-06, "loss": 1.4647, "step": 740 }, { "epoch": 0.26, "learning_rate": 4.360106260227027e-06, "loss": 1.8484, "step": 741 }, { "epoch": 0.26, "learning_rate": 4.358236329109783e-06, "loss": 1.6688, "step": 742 }, { "epoch": 0.26, "learning_rate": 4.356364072010059e-06, "loss": 1.5632, "step": 743 }, { "epoch": 0.26, "learning_rate": 4.354489491271383e-06, "loss": 1.8214, "step": 744 }, { "epoch": 0.26, "learning_rate": 4.3526125892401985e-06, "loss": 2.0405, "step": 745 }, { "epoch": 0.26, "learning_rate": 4.350733368265853e-06, "loss": 1.6229, "step": 746 }, { "epoch": 0.26, "learning_rate": 4.348851830700594e-06, "loss": 1.7039, "step": 747 }, { "epoch": 0.26, "learning_rate": 4.346967978899571e-06, "loss": 1.5597, "step": 748 }, { "epoch": 0.26, "learning_rate": 4.34508181522083e-06, "loss": 1.8022, "step": 749 }, { "epoch": 0.26, "learning_rate": 4.34319334202531e-06, "loss": 1.5382, "step": 750 }, { "epoch": 0.26, "learning_rate": 4.3413025616768426e-06, "loss": 1.9211, "step": 751 }, { "epoch": 0.26, "learning_rate": 4.3394094765421455e-06, "loss": 1.7253, "step": 752 }, { "epoch": 0.26, "learning_rate": 4.337514088990822e-06, "loss": 1.7549, "step": 753 }, { "epoch": 0.26, "learning_rate": 4.335616401395357e-06, "loss": 1.6497, "step": 754 }, { "epoch": 0.26, "learning_rate": 4.333716416131115e-06, "loss": 1.8059, "step": 755 }, { "epoch": 0.26, "learning_rate": 4.3318141355763355e-06, "loss": 1.7767, "step": 756 }, { "epoch": 0.26, "learning_rate": 4.329909562112133e-06, "loss": 1.8728, "step": 757 }, { "epoch": 0.26, "learning_rate": 4.3280026981224904e-06, "loss": 1.675, "step": 758 }, { "epoch": 0.26, "learning_rate": 4.326093545994258e-06, "loss": 1.6379, "step": 759 }, { "epoch": 0.26, "learning_rate": 4.3241821081171505e-06, "loss": 1.6837, "step": 760 }, { "epoch": 0.26, "learning_rate": 4.322268386883744e-06, "loss": 1.5621, "step": 761 }, { "epoch": 0.26, "learning_rate": 4.3203523846894715e-06, "loss": 1.5516, "step": 762 }, { "epoch": 0.26, "learning_rate": 4.318434103932622e-06, "loss": 1.7996, "step": 763 }, { "epoch": 0.26, "learning_rate": 4.316513547014337e-06, "loss": 1.9899, "step": 764 }, { "epoch": 0.26, "learning_rate": 4.3145907163386064e-06, "loss": 1.5629, "step": 765 }, { "epoch": 0.26, "learning_rate": 4.312665614312266e-06, "loss": 1.72, "step": 766 }, { "epoch": 0.26, "learning_rate": 4.310738243344996e-06, "loss": 1.6548, "step": 767 }, { "epoch": 0.27, "learning_rate": 4.308808605849315e-06, "loss": 1.7496, "step": 768 }, { "epoch": 0.27, "learning_rate": 4.306876704240579e-06, "loss": 1.8818, "step": 769 }, { "epoch": 0.27, "learning_rate": 4.304942540936979e-06, "loss": 1.7621, "step": 770 }, { "epoch": 0.27, "learning_rate": 4.303006118359536e-06, "loss": 1.7621, "step": 771 }, { "epoch": 0.27, "learning_rate": 4.301067438932101e-06, "loss": 1.7537, "step": 772 }, { "epoch": 0.27, "learning_rate": 4.2991265050813465e-06, "loss": 1.8977, "step": 773 }, { "epoch": 0.27, "learning_rate": 4.29718331923677e-06, "loss": 1.9125, "step": 774 }, { "epoch": 0.27, "learning_rate": 4.2952378838306855e-06, "loss": 1.8668, "step": 775 }, { "epoch": 0.27, "learning_rate": 4.293290201298224e-06, "loss": 1.9398, "step": 776 }, { "epoch": 0.27, "learning_rate": 4.29134027407733e-06, "loss": 1.5236, "step": 777 }, { "epoch": 0.27, "learning_rate": 4.289388104608756e-06, "loss": 1.6025, "step": 778 }, { "epoch": 0.27, "learning_rate": 4.287433695336062e-06, "loss": 1.6011, "step": 779 }, { "epoch": 0.27, "learning_rate": 4.285477048705612e-06, "loss": 1.6393, "step": 780 }, { "epoch": 0.27, "learning_rate": 4.283518167166571e-06, "loss": 1.6724, "step": 781 }, { "epoch": 0.27, "learning_rate": 4.281557053170898e-06, "loss": 1.9319, "step": 782 }, { "epoch": 0.27, "learning_rate": 4.279593709173352e-06, "loss": 1.7316, "step": 783 }, { "epoch": 0.27, "learning_rate": 4.2776281376314785e-06, "loss": 1.724, "step": 784 }, { "epoch": 0.27, "learning_rate": 4.275660341005614e-06, "loss": 1.5801, "step": 785 }, { "epoch": 0.27, "learning_rate": 4.273690321758879e-06, "loss": 2.0066, "step": 786 }, { "epoch": 0.27, "learning_rate": 4.2717180823571754e-06, "loss": 1.8422, "step": 787 }, { "epoch": 0.27, "learning_rate": 4.269743625269186e-06, "loss": 1.7437, "step": 788 }, { "epoch": 0.27, "learning_rate": 4.267766952966369e-06, "loss": 1.7069, "step": 789 }, { "epoch": 0.27, "learning_rate": 4.265788067922955e-06, "loss": 1.4509, "step": 790 }, { "epoch": 0.27, "learning_rate": 4.263806972615943e-06, "loss": 1.5903, "step": 791 }, { "epoch": 0.27, "learning_rate": 4.2618236695251e-06, "loss": 1.7574, "step": 792 }, { "epoch": 0.27, "learning_rate": 4.259838161132957e-06, "loss": 1.7194, "step": 793 }, { "epoch": 0.27, "learning_rate": 4.257850449924806e-06, "loss": 1.9177, "step": 794 }, { "epoch": 0.27, "learning_rate": 4.255860538388694e-06, "loss": 1.8317, "step": 795 }, { "epoch": 0.27, "learning_rate": 4.253868429015423e-06, "loss": 1.5938, "step": 796 }, { "epoch": 0.28, "learning_rate": 4.251874124298547e-06, "loss": 1.7239, "step": 797 }, { "epoch": 0.28, "learning_rate": 4.249877626734366e-06, "loss": 1.7878, "step": 798 }, { "epoch": 0.28, "learning_rate": 4.247878938821929e-06, "loss": 1.5808, "step": 799 }, { "epoch": 0.28, "learning_rate": 4.245878063063022e-06, "loss": 1.7935, "step": 800 }, { "epoch": 0.28, "learning_rate": 4.2438750019621705e-06, "loss": 1.7988, "step": 801 }, { "epoch": 0.28, "learning_rate": 4.241869758026638e-06, "loss": 1.6848, "step": 802 }, { "epoch": 0.28, "learning_rate": 4.239862333766418e-06, "loss": 1.8274, "step": 803 }, { "epoch": 0.28, "learning_rate": 4.2378527316942336e-06, "loss": 1.7397, "step": 804 }, { "epoch": 0.28, "learning_rate": 4.235840954325534e-06, "loss": 1.5367, "step": 805 }, { "epoch": 0.28, "learning_rate": 4.233827004178492e-06, "loss": 1.7559, "step": 806 }, { "epoch": 0.28, "learning_rate": 4.231810883773999e-06, "loss": 1.6405, "step": 807 }, { "epoch": 0.28, "learning_rate": 4.2297925956356636e-06, "loss": 1.5851, "step": 808 }, { "epoch": 0.28, "learning_rate": 4.227772142289807e-06, "loss": 1.6726, "step": 809 }, { "epoch": 0.28, "learning_rate": 4.225749526265461e-06, "loss": 1.4935, "step": 810 }, { "epoch": 0.28, "learning_rate": 4.2237247500943655e-06, "loss": 1.6622, "step": 811 }, { "epoch": 0.28, "learning_rate": 4.221697816310963e-06, "loss": 1.6991, "step": 812 }, { "epoch": 0.28, "learning_rate": 4.219668727452397e-06, "loss": 2.0518, "step": 813 }, { "epoch": 0.28, "learning_rate": 4.217637486058509e-06, "loss": 1.8496, "step": 814 }, { "epoch": 0.28, "learning_rate": 4.215604094671835e-06, "loss": 1.6289, "step": 815 }, { "epoch": 0.28, "learning_rate": 4.2135685558376e-06, "loss": 1.5591, "step": 816 }, { "epoch": 0.28, "learning_rate": 4.21153087210372e-06, "loss": 1.7098, "step": 817 }, { "epoch": 0.28, "learning_rate": 4.209491046020795e-06, "loss": 1.5388, "step": 818 }, { "epoch": 0.28, "learning_rate": 4.207449080142104e-06, "loss": 1.8663, "step": 819 }, { "epoch": 0.28, "learning_rate": 4.2054049770236095e-06, "loss": 1.6649, "step": 820 }, { "epoch": 0.28, "learning_rate": 4.203358739223944e-06, "loss": 1.5564, "step": 821 }, { "epoch": 0.28, "learning_rate": 4.201310369304416e-06, "loss": 1.3456, "step": 822 }, { "epoch": 0.28, "learning_rate": 4.1992598698289985e-06, "loss": 1.7987, "step": 823 }, { "epoch": 0.28, "learning_rate": 4.197207243364335e-06, "loss": 1.6718, "step": 824 }, { "epoch": 0.28, "learning_rate": 4.195152492479727e-06, "loss": 1.7434, "step": 825 }, { "epoch": 0.29, "learning_rate": 4.19309561974714e-06, "loss": 1.9647, "step": 826 }, { "epoch": 0.29, "learning_rate": 4.1910366277411904e-06, "loss": 1.734, "step": 827 }, { "epoch": 0.29, "learning_rate": 4.188975519039151e-06, "loss": 1.7631, "step": 828 }, { "epoch": 0.29, "learning_rate": 4.186912296220942e-06, "loss": 1.6118, "step": 829 }, { "epoch": 0.29, "learning_rate": 4.18484696186913e-06, "loss": 1.7721, "step": 830 }, { "epoch": 0.29, "learning_rate": 4.182779518568925e-06, "loss": 1.6536, "step": 831 }, { "epoch": 0.29, "learning_rate": 4.18070996890818e-06, "loss": 1.9198, "step": 832 }, { "epoch": 0.29, "learning_rate": 4.178638315477378e-06, "loss": 1.7946, "step": 833 }, { "epoch": 0.29, "learning_rate": 4.17656456086964e-06, "loss": 1.6522, "step": 834 }, { "epoch": 0.29, "learning_rate": 4.174488707680717e-06, "loss": 1.7137, "step": 835 }, { "epoch": 0.29, "learning_rate": 4.172410758508985e-06, "loss": 1.8427, "step": 836 }, { "epoch": 0.29, "learning_rate": 4.170330715955444e-06, "loss": 1.8977, "step": 837 }, { "epoch": 0.29, "learning_rate": 4.168248582623716e-06, "loss": 1.824, "step": 838 }, { "epoch": 0.29, "learning_rate": 4.166164361120036e-06, "loss": 1.7101, "step": 839 }, { "epoch": 0.29, "learning_rate": 4.164078054053259e-06, "loss": 1.6909, "step": 840 }, { "epoch": 0.29, "learning_rate": 4.161989664034844e-06, "loss": 1.7089, "step": 841 }, { "epoch": 0.29, "learning_rate": 4.1598991936788636e-06, "loss": 1.7474, "step": 842 }, { "epoch": 0.29, "learning_rate": 4.1578066456019885e-06, "loss": 1.7157, "step": 843 }, { "epoch": 0.29, "learning_rate": 4.155712022423493e-06, "loss": 1.9891, "step": 844 }, { "epoch": 0.29, "learning_rate": 4.1536153267652505e-06, "loss": 2.0682, "step": 845 }, { "epoch": 0.29, "learning_rate": 4.151516561251724e-06, "loss": 1.6029, "step": 846 }, { "epoch": 0.29, "learning_rate": 4.149415728509971e-06, "loss": 1.7631, "step": 847 }, { "epoch": 0.29, "learning_rate": 4.147312831169637e-06, "loss": 1.7053, "step": 848 }, { "epoch": 0.29, "learning_rate": 4.145207871862947e-06, "loss": 1.6868, "step": 849 }, { "epoch": 0.29, "learning_rate": 4.143100853224714e-06, "loss": 1.6692, "step": 850 }, { "epoch": 0.29, "learning_rate": 4.140991777892325e-06, "loss": 2.1541, "step": 851 }, { "epoch": 0.29, "learning_rate": 4.1388806485057375e-06, "loss": 1.6421, "step": 852 }, { "epoch": 0.29, "learning_rate": 4.136767467707487e-06, "loss": 1.7104, "step": 853 }, { "epoch": 0.29, "learning_rate": 4.134652238142674e-06, "loss": 1.5607, "step": 854 }, { "epoch": 0.3, "learning_rate": 4.1325349624589625e-06, "loss": 1.7463, "step": 855 }, { "epoch": 0.3, "learning_rate": 4.13041564330658e-06, "loss": 1.577, "step": 856 }, { "epoch": 0.3, "learning_rate": 4.128294283338308e-06, "loss": 2.0624, "step": 857 }, { "epoch": 0.3, "learning_rate": 4.126170885209485e-06, "loss": 1.7479, "step": 858 }, { "epoch": 0.3, "learning_rate": 4.124045451578001e-06, "loss": 1.4856, "step": 859 }, { "epoch": 0.3, "learning_rate": 4.121917985104296e-06, "loss": 1.5798, "step": 860 }, { "epoch": 0.3, "learning_rate": 4.119788488451347e-06, "loss": 1.6511, "step": 861 }, { "epoch": 0.3, "learning_rate": 4.117656964284681e-06, "loss": 1.6794, "step": 862 }, { "epoch": 0.3, "learning_rate": 4.115523415272358e-06, "loss": 1.9987, "step": 863 }, { "epoch": 0.3, "learning_rate": 4.113387844084972e-06, "loss": 1.5935, "step": 864 }, { "epoch": 0.3, "learning_rate": 4.111250253395652e-06, "loss": 1.7307, "step": 865 }, { "epoch": 0.3, "learning_rate": 4.109110645880051e-06, "loss": 1.6713, "step": 866 }, { "epoch": 0.3, "learning_rate": 4.106969024216348e-06, "loss": 1.7588, "step": 867 }, { "epoch": 0.3, "learning_rate": 4.104825391085246e-06, "loss": 1.8358, "step": 868 }, { "epoch": 0.3, "learning_rate": 4.102679749169959e-06, "loss": 1.7935, "step": 869 }, { "epoch": 0.3, "learning_rate": 4.10053210115622e-06, "loss": 2.1616, "step": 870 }, { "epoch": 0.3, "learning_rate": 4.098382449732276e-06, "loss": 2.0032, "step": 871 }, { "epoch": 0.3, "learning_rate": 4.096230797588874e-06, "loss": 1.6086, "step": 872 }, { "epoch": 0.3, "learning_rate": 4.094077147419271e-06, "loss": 1.6863, "step": 873 }, { "epoch": 0.3, "learning_rate": 4.091921501919225e-06, "loss": 1.8842, "step": 874 }, { "epoch": 0.3, "learning_rate": 4.089763863786988e-06, "loss": 1.8572, "step": 875 }, { "epoch": 0.3, "learning_rate": 4.087604235723308e-06, "loss": 2.0255, "step": 876 }, { "epoch": 0.3, "learning_rate": 4.085442620431427e-06, "loss": 1.769, "step": 877 }, { "epoch": 0.3, "learning_rate": 4.083279020617068e-06, "loss": 1.8889, "step": 878 }, { "epoch": 0.3, "learning_rate": 4.081113438988443e-06, "loss": 1.708, "step": 879 }, { "epoch": 0.3, "learning_rate": 4.078945878256244e-06, "loss": 1.7092, "step": 880 }, { "epoch": 0.3, "learning_rate": 4.076776341133639e-06, "loss": 1.8127, "step": 881 }, { "epoch": 0.3, "learning_rate": 4.074604830336269e-06, "loss": 1.9823, "step": 882 }, { "epoch": 0.31, "learning_rate": 4.0724313485822496e-06, "loss": 1.7898, "step": 883 }, { "epoch": 0.31, "learning_rate": 4.070255898592159e-06, "loss": 1.5903, "step": 884 }, { "epoch": 0.31, "learning_rate": 4.068078483089041e-06, "loss": 1.4545, "step": 885 }, { "epoch": 0.31, "learning_rate": 4.0658991047983985e-06, "loss": 1.6963, "step": 886 }, { "epoch": 0.31, "learning_rate": 4.063717766448194e-06, "loss": 1.5681, "step": 887 }, { "epoch": 0.31, "learning_rate": 4.061534470768841e-06, "loss": 1.9557, "step": 888 }, { "epoch": 0.31, "learning_rate": 4.059349220493202e-06, "loss": 1.7633, "step": 889 }, { "epoch": 0.31, "learning_rate": 4.0571620183565915e-06, "loss": 1.8518, "step": 890 }, { "epoch": 0.31, "learning_rate": 4.05497286709676e-06, "loss": 1.738, "step": 891 }, { "epoch": 0.31, "learning_rate": 4.052781769453904e-06, "loss": 1.8607, "step": 892 }, { "epoch": 0.31, "learning_rate": 4.0505887281706505e-06, "loss": 1.7408, "step": 893 }, { "epoch": 0.31, "learning_rate": 4.048393745992064e-06, "loss": 1.9365, "step": 894 }, { "epoch": 0.31, "learning_rate": 4.046196825665638e-06, "loss": 1.8113, "step": 895 }, { "epoch": 0.31, "learning_rate": 4.043997969941289e-06, "loss": 1.4421, "step": 896 }, { "epoch": 0.31, "learning_rate": 4.041797181571358e-06, "loss": 1.8118, "step": 897 }, { "epoch": 0.31, "learning_rate": 4.039594463310606e-06, "loss": 1.4891, "step": 898 }, { "epoch": 0.31, "learning_rate": 4.037389817916209e-06, "loss": 1.5962, "step": 899 }, { "epoch": 0.31, "learning_rate": 4.035183248147752e-06, "loss": 1.6162, "step": 900 }, { "epoch": 0.31, "learning_rate": 4.032974756767237e-06, "loss": 1.6868, "step": 901 }, { "epoch": 0.31, "learning_rate": 4.030764346539061e-06, "loss": 1.4378, "step": 902 }, { "epoch": 0.31, "learning_rate": 4.028552020230031e-06, "loss": 1.7108, "step": 903 }, { "epoch": 0.31, "learning_rate": 4.026337780609347e-06, "loss": 1.6437, "step": 904 }, { "epoch": 0.31, "learning_rate": 4.024121630448608e-06, "loss": 1.5417, "step": 905 }, { "epoch": 0.31, "learning_rate": 4.021903572521802e-06, "loss": 1.6132, "step": 906 }, { "epoch": 0.31, "learning_rate": 4.019683609605305e-06, "loss": 1.9753, "step": 907 }, { "epoch": 0.31, "learning_rate": 4.017461744477879e-06, "loss": 1.9679, "step": 908 }, { "epoch": 0.31, "learning_rate": 4.015237979920666e-06, "loss": 1.6514, "step": 909 }, { "epoch": 0.31, "learning_rate": 4.013012318717186e-06, "loss": 1.6392, "step": 910 }, { "epoch": 0.31, "learning_rate": 4.0107847636533314e-06, "loss": 1.7103, "step": 911 }, { "epoch": 0.32, "learning_rate": 4.008555317517367e-06, "loss": 1.7113, "step": 912 }, { "epoch": 0.32, "learning_rate": 4.006323983099926e-06, "loss": 1.9093, "step": 913 }, { "epoch": 0.32, "learning_rate": 4.004090763194001e-06, "loss": 1.7091, "step": 914 }, { "epoch": 0.32, "learning_rate": 4.001855660594948e-06, "loss": 1.6576, "step": 915 }, { "epoch": 0.32, "learning_rate": 3.9996186781004784e-06, "loss": 1.5237, "step": 916 }, { "epoch": 0.32, "learning_rate": 3.997379818510658e-06, "loss": 1.6288, "step": 917 }, { "epoch": 0.32, "learning_rate": 3.9951390846279004e-06, "loss": 1.5573, "step": 918 }, { "epoch": 0.32, "learning_rate": 3.992896479256966e-06, "loss": 1.6628, "step": 919 }, { "epoch": 0.32, "learning_rate": 3.9906520052049574e-06, "loss": 1.679, "step": 920 }, { "epoch": 0.32, "learning_rate": 3.988405665281319e-06, "loss": 1.5472, "step": 921 }, { "epoch": 0.32, "learning_rate": 3.986157462297825e-06, "loss": 1.442, "step": 922 }, { "epoch": 0.32, "learning_rate": 3.983907399068586e-06, "loss": 1.5928, "step": 923 }, { "epoch": 0.32, "learning_rate": 3.981655478410043e-06, "loss": 1.8367, "step": 924 }, { "epoch": 0.32, "learning_rate": 3.979401703140955e-06, "loss": 1.4514, "step": 925 }, { "epoch": 0.32, "learning_rate": 3.977146076082409e-06, "loss": 2.0298, "step": 926 }, { "epoch": 0.32, "learning_rate": 3.974888600057808e-06, "loss": 1.6066, "step": 927 }, { "epoch": 0.32, "learning_rate": 3.972629277892867e-06, "loss": 1.6202, "step": 928 }, { "epoch": 0.32, "learning_rate": 3.970368112415613e-06, "loss": 1.7167, "step": 929 }, { "epoch": 0.32, "learning_rate": 3.968105106456385e-06, "loss": 1.7051, "step": 930 }, { "epoch": 0.32, "learning_rate": 3.965840262847817e-06, "loss": 1.6996, "step": 931 }, { "epoch": 0.32, "learning_rate": 3.963573584424852e-06, "loss": 2.1051, "step": 932 }, { "epoch": 0.32, "learning_rate": 3.961305074024722e-06, "loss": 2.0374, "step": 933 }, { "epoch": 0.32, "learning_rate": 3.959034734486958e-06, "loss": 1.5112, "step": 934 }, { "epoch": 0.32, "learning_rate": 3.956762568653378e-06, "loss": 1.5099, "step": 935 }, { "epoch": 0.32, "learning_rate": 3.954488579368087e-06, "loss": 1.6893, "step": 936 }, { "epoch": 0.32, "learning_rate": 3.95221276947747e-06, "loss": 1.6199, "step": 937 }, { "epoch": 0.32, "learning_rate": 3.9499351418301935e-06, "loss": 1.939, "step": 938 }, { "epoch": 0.32, "learning_rate": 3.947655699277197e-06, "loss": 1.9095, "step": 939 }, { "epoch": 0.32, "learning_rate": 3.945374444671697e-06, "loss": 1.7665, "step": 940 }, { "epoch": 0.33, "learning_rate": 3.943091380869169e-06, "loss": 1.6393, "step": 941 }, { "epoch": 0.33, "learning_rate": 3.940806510727364e-06, "loss": 1.6263, "step": 942 }, { "epoch": 0.33, "learning_rate": 3.938519837106284e-06, "loss": 1.6744, "step": 943 }, { "epoch": 0.33, "learning_rate": 3.936231362868196e-06, "loss": 1.8961, "step": 944 }, { "epoch": 0.33, "learning_rate": 3.933941090877615e-06, "loss": 2.1582, "step": 945 }, { "epoch": 0.33, "learning_rate": 3.931649024001312e-06, "loss": 1.5308, "step": 946 }, { "epoch": 0.33, "learning_rate": 3.9293551651083e-06, "loss": 1.5902, "step": 947 }, { "epoch": 0.33, "learning_rate": 3.927059517069836e-06, "loss": 1.7356, "step": 948 }, { "epoch": 0.33, "learning_rate": 3.924762082759419e-06, "loss": 1.5519, "step": 949 }, { "epoch": 0.33, "learning_rate": 3.922462865052782e-06, "loss": 1.5103, "step": 950 }, { "epoch": 0.33, "learning_rate": 3.92016186682789e-06, "loss": 1.9646, "step": 951 }, { "epoch": 0.33, "learning_rate": 3.917859090964937e-06, "loss": 1.6812, "step": 952 }, { "epoch": 0.33, "learning_rate": 3.915554540346343e-06, "loss": 1.6785, "step": 953 }, { "epoch": 0.33, "learning_rate": 3.913248217856748e-06, "loss": 1.6869, "step": 954 }, { "epoch": 0.33, "learning_rate": 3.910940126383012e-06, "loss": 1.7711, "step": 955 }, { "epoch": 0.33, "learning_rate": 3.908630268814207e-06, "loss": 1.6195, "step": 956 }, { "epoch": 0.33, "learning_rate": 3.906318648041617e-06, "loss": 1.8285, "step": 957 }, { "epoch": 0.33, "learning_rate": 3.9040052669587325e-06, "loss": 1.8789, "step": 958 }, { "epoch": 0.33, "learning_rate": 3.901690128461248e-06, "loss": 1.5684, "step": 959 }, { "epoch": 0.33, "learning_rate": 3.899373235447056e-06, "loss": 1.6762, "step": 960 }, { "epoch": 0.33, "learning_rate": 3.897054590816247e-06, "loss": 1.6647, "step": 961 }, { "epoch": 0.33, "learning_rate": 3.894734197471104e-06, "loss": 1.6564, "step": 962 }, { "epoch": 0.33, "learning_rate": 3.892412058316099e-06, "loss": 1.8878, "step": 963 }, { "epoch": 0.33, "learning_rate": 3.890088176257887e-06, "loss": 1.6516, "step": 964 }, { "epoch": 0.33, "learning_rate": 3.887762554205308e-06, "loss": 1.5947, "step": 965 }, { "epoch": 0.33, "learning_rate": 3.885435195069377e-06, "loss": 1.4537, "step": 966 }, { "epoch": 0.33, "learning_rate": 3.883106101763285e-06, "loss": 1.7277, "step": 967 }, { "epoch": 0.33, "learning_rate": 3.880775277202394e-06, "loss": 1.6116, "step": 968 }, { "epoch": 0.33, "learning_rate": 3.87844272430423e-06, "loss": 1.9343, "step": 969 }, { "epoch": 0.34, "learning_rate": 3.876108445988487e-06, "loss": 1.7587, "step": 970 }, { "epoch": 0.34, "learning_rate": 3.8737724451770155e-06, "loss": 1.609, "step": 971 }, { "epoch": 0.34, "learning_rate": 3.871434724793823e-06, "loss": 1.4827, "step": 972 }, { "epoch": 0.34, "learning_rate": 3.86909528776507e-06, "loss": 1.6387, "step": 973 }, { "epoch": 0.34, "learning_rate": 3.866754137019065e-06, "loss": 1.6508, "step": 974 }, { "epoch": 0.34, "learning_rate": 3.8644112754862614e-06, "loss": 1.7145, "step": 975 }, { "epoch": 0.34, "learning_rate": 3.8620667060992544e-06, "loss": 2.1013, "step": 976 }, { "epoch": 0.34, "learning_rate": 3.859720431792778e-06, "loss": 1.6334, "step": 977 }, { "epoch": 0.34, "learning_rate": 3.857372455503698e-06, "loss": 1.7008, "step": 978 }, { "epoch": 0.34, "learning_rate": 3.8550227801710104e-06, "loss": 1.8376, "step": 979 }, { "epoch": 0.34, "learning_rate": 3.852671408735842e-06, "loss": 1.6759, "step": 980 }, { "epoch": 0.34, "learning_rate": 3.850318344141439e-06, "loss": 1.7334, "step": 981 }, { "epoch": 0.34, "learning_rate": 3.847963589333167e-06, "loss": 2.1074, "step": 982 }, { "epoch": 0.34, "learning_rate": 3.84560714725851e-06, "loss": 1.828, "step": 983 }, { "epoch": 0.34, "learning_rate": 3.8432490208670605e-06, "loss": 1.67, "step": 984 }, { "epoch": 0.34, "learning_rate": 3.840889213110521e-06, "loss": 1.7013, "step": 985 }, { "epoch": 0.34, "learning_rate": 3.8385277269427e-06, "loss": 1.6133, "step": 986 }, { "epoch": 0.34, "learning_rate": 3.836164565319503e-06, "loss": 1.5621, "step": 987 }, { "epoch": 0.34, "learning_rate": 3.833799731198937e-06, "loss": 2.2605, "step": 988 }, { "epoch": 0.34, "learning_rate": 3.8314332275411005e-06, "loss": 1.718, "step": 989 }, { "epoch": 0.34, "learning_rate": 3.829065057308182e-06, "loss": 1.5947, "step": 990 }, { "epoch": 0.34, "learning_rate": 3.826695223464455e-06, "loss": 1.4987, "step": 991 }, { "epoch": 0.34, "learning_rate": 3.824323728976275e-06, "loss": 1.4634, "step": 992 }, { "epoch": 0.34, "learning_rate": 3.821950576812081e-06, "loss": 1.6909, "step": 993 }, { "epoch": 0.34, "learning_rate": 3.819575769942382e-06, "loss": 1.8054, "step": 994 }, { "epoch": 0.34, "learning_rate": 3.817199311339759e-06, "loss": 1.6444, "step": 995 }, { "epoch": 0.34, "learning_rate": 3.81482120397886e-06, "loss": 1.6694, "step": 996 }, { "epoch": 0.34, "learning_rate": 3.8124414508364005e-06, "loss": 1.504, "step": 997 }, { "epoch": 0.34, "learning_rate": 3.8100600548911527e-06, "loss": 1.5531, "step": 998 }, { "epoch": 0.35, "learning_rate": 3.8076770191239444e-06, "loss": 1.6003, "step": 999 }, { "epoch": 0.35, "learning_rate": 3.805292346517659e-06, "loss": 1.6395, "step": 1000 }, { "epoch": 0.35, "learning_rate": 3.8029060400572263e-06, "loss": 2.1799, "step": 1001 }, { "epoch": 0.35, "learning_rate": 3.8005181027296224e-06, "loss": 1.5195, "step": 1002 }, { "epoch": 0.35, "learning_rate": 3.798128537523865e-06, "loss": 1.7726, "step": 1003 }, { "epoch": 0.35, "learning_rate": 3.795737347431009e-06, "loss": 1.4639, "step": 1004 }, { "epoch": 0.35, "learning_rate": 3.793344535444142e-06, "loss": 1.5388, "step": 1005 }, { "epoch": 0.35, "learning_rate": 3.790950104558384e-06, "loss": 1.8657, "step": 1006 }, { "epoch": 0.35, "learning_rate": 3.7885540577708806e-06, "loss": 1.8783, "step": 1007 }, { "epoch": 0.35, "learning_rate": 3.786156398080799e-06, "loss": 1.6086, "step": 1008 }, { "epoch": 0.35, "learning_rate": 3.783757128489326e-06, "loss": 1.5317, "step": 1009 }, { "epoch": 0.35, "learning_rate": 3.7813562519996633e-06, "loss": 1.6959, "step": 1010 }, { "epoch": 0.35, "learning_rate": 3.7789537716170257e-06, "loss": 1.673, "step": 1011 }, { "epoch": 0.35, "learning_rate": 3.776549690348632e-06, "loss": 1.8192, "step": 1012 }, { "epoch": 0.35, "learning_rate": 3.77414401120371e-06, "loss": 1.8608, "step": 1013 }, { "epoch": 0.35, "learning_rate": 3.771736737193481e-06, "loss": 1.8787, "step": 1014 }, { "epoch": 0.35, "learning_rate": 3.76932787133117e-06, "loss": 1.5908, "step": 1015 }, { "epoch": 0.35, "learning_rate": 3.7669174166319873e-06, "loss": 1.6598, "step": 1016 }, { "epoch": 0.35, "learning_rate": 3.7645053761131383e-06, "loss": 1.786, "step": 1017 }, { "epoch": 0.35, "learning_rate": 3.7620917527938085e-06, "loss": 1.6001, "step": 1018 }, { "epoch": 0.35, "learning_rate": 3.759676549695168e-06, "loss": 1.8268, "step": 1019 }, { "epoch": 0.35, "learning_rate": 3.7572597698403622e-06, "loss": 1.9512, "step": 1020 }, { "epoch": 0.35, "learning_rate": 3.754841416254512e-06, "loss": 1.4779, "step": 1021 }, { "epoch": 0.35, "learning_rate": 3.752421491964706e-06, "loss": 1.6023, "step": 1022 }, { "epoch": 0.35, "learning_rate": 3.7500000000000005e-06, "loss": 1.58, "step": 1023 }, { "epoch": 0.35, "learning_rate": 3.747576943391413e-06, "loss": 1.5548, "step": 1024 }, { "epoch": 0.35, "learning_rate": 3.745152325171921e-06, "loss": 1.7513, "step": 1025 }, { "epoch": 0.35, "learning_rate": 3.7427261483764555e-06, "loss": 1.9652, "step": 1026 }, { "epoch": 0.35, "learning_rate": 3.7402984160418974e-06, "loss": 1.6281, "step": 1027 }, { "epoch": 0.36, "learning_rate": 3.7378691312070777e-06, "loss": 1.6998, "step": 1028 }, { "epoch": 0.36, "learning_rate": 3.7354382969127676e-06, "loss": 1.6018, "step": 1029 }, { "epoch": 0.36, "learning_rate": 3.73300591620168e-06, "loss": 1.8903, "step": 1030 }, { "epoch": 0.36, "learning_rate": 3.7305719921184626e-06, "loss": 1.5015, "step": 1031 }, { "epoch": 0.36, "learning_rate": 3.7281365277096937e-06, "loss": 1.8361, "step": 1032 }, { "epoch": 0.36, "learning_rate": 3.725699526023882e-06, "loss": 1.6112, "step": 1033 }, { "epoch": 0.36, "learning_rate": 3.7232609901114595e-06, "loss": 1.4282, "step": 1034 }, { "epoch": 0.36, "learning_rate": 3.7208209230247785e-06, "loss": 1.8552, "step": 1035 }, { "epoch": 0.36, "learning_rate": 3.7183793278181063e-06, "loss": 1.6577, "step": 1036 }, { "epoch": 0.36, "learning_rate": 3.715936207547626e-06, "loss": 1.8723, "step": 1037 }, { "epoch": 0.36, "learning_rate": 3.713491565271427e-06, "loss": 1.9797, "step": 1038 }, { "epoch": 0.36, "learning_rate": 3.711045404049507e-06, "loss": 1.7056, "step": 1039 }, { "epoch": 0.36, "learning_rate": 3.708597726943761e-06, "loss": 1.719, "step": 1040 }, { "epoch": 0.36, "learning_rate": 3.706148537017984e-06, "loss": 1.5676, "step": 1041 }, { "epoch": 0.36, "learning_rate": 3.703697837337864e-06, "loss": 1.5428, "step": 1042 }, { "epoch": 0.36, "learning_rate": 3.701245630970979e-06, "loss": 1.5231, "step": 1043 }, { "epoch": 0.36, "learning_rate": 3.698791920986792e-06, "loss": 1.6993, "step": 1044 }, { "epoch": 0.36, "learning_rate": 3.69633671045665e-06, "loss": 1.8549, "step": 1045 }, { "epoch": 0.36, "learning_rate": 3.6938800024537763e-06, "loss": 1.749, "step": 1046 }, { "epoch": 0.36, "learning_rate": 3.6914218000532697e-06, "loss": 1.5437, "step": 1047 }, { "epoch": 0.36, "learning_rate": 3.6889621063321e-06, "loss": 1.6741, "step": 1048 }, { "epoch": 0.36, "learning_rate": 3.6865009243691015e-06, "loss": 1.6779, "step": 1049 }, { "epoch": 0.36, "learning_rate": 3.6840382572449733e-06, "loss": 1.7084, "step": 1050 }, { "epoch": 0.36, "learning_rate": 3.681574108042274e-06, "loss": 1.7047, "step": 1051 }, { "epoch": 0.36, "learning_rate": 3.6791084798454155e-06, "loss": 1.7634, "step": 1052 }, { "epoch": 0.36, "learning_rate": 3.676641375740662e-06, "loss": 1.6593, "step": 1053 }, { "epoch": 0.36, "learning_rate": 3.6741727988161253e-06, "loss": 1.7072, "step": 1054 }, { "epoch": 0.36, "learning_rate": 3.6717027521617593e-06, "loss": 1.6049, "step": 1055 }, { "epoch": 0.36, "learning_rate": 3.6692312388693607e-06, "loss": 1.7709, "step": 1056 }, { "epoch": 0.37, "learning_rate": 3.666758262032558e-06, "loss": 1.9137, "step": 1057 }, { "epoch": 0.37, "learning_rate": 3.664283824746815e-06, "loss": 1.6357, "step": 1058 }, { "epoch": 0.37, "learning_rate": 3.661807930109422e-06, "loss": 1.5616, "step": 1059 }, { "epoch": 0.37, "learning_rate": 3.659330581219492e-06, "loss": 1.6068, "step": 1060 }, { "epoch": 0.37, "learning_rate": 3.6568517811779637e-06, "loss": 1.6616, "step": 1061 }, { "epoch": 0.37, "learning_rate": 3.654371533087586e-06, "loss": 1.4467, "step": 1062 }, { "epoch": 0.37, "learning_rate": 3.651889840052922e-06, "loss": 1.8032, "step": 1063 }, { "epoch": 0.37, "learning_rate": 3.6494067051803463e-06, "loss": 1.8658, "step": 1064 }, { "epoch": 0.37, "learning_rate": 3.6469221315780357e-06, "loss": 1.5392, "step": 1065 }, { "epoch": 0.37, "learning_rate": 3.6444361223559683e-06, "loss": 1.6637, "step": 1066 }, { "epoch": 0.37, "learning_rate": 3.64194868062592e-06, "loss": 1.723, "step": 1067 }, { "epoch": 0.37, "learning_rate": 3.6394598095014577e-06, "loss": 1.822, "step": 1068 }, { "epoch": 0.37, "learning_rate": 3.6369695120979403e-06, "loss": 1.7567, "step": 1069 }, { "epoch": 0.37, "learning_rate": 3.6344777915325115e-06, "loss": 1.9238, "step": 1070 }, { "epoch": 0.37, "learning_rate": 3.631984650924094e-06, "loss": 1.498, "step": 1071 }, { "epoch": 0.37, "learning_rate": 3.629490093393389e-06, "loss": 1.5813, "step": 1072 }, { "epoch": 0.37, "learning_rate": 3.6269941220628742e-06, "loss": 1.6052, "step": 1073 }, { "epoch": 0.37, "learning_rate": 3.6244967400567925e-06, "loss": 1.4849, "step": 1074 }, { "epoch": 0.37, "learning_rate": 3.621997950501156e-06, "loss": 1.6111, "step": 1075 }, { "epoch": 0.37, "learning_rate": 3.6194977565237367e-06, "loss": 1.7714, "step": 1076 }, { "epoch": 0.37, "learning_rate": 3.6169961612540648e-06, "loss": 1.8148, "step": 1077 }, { "epoch": 0.37, "learning_rate": 3.614493167823426e-06, "loss": 1.639, "step": 1078 }, { "epoch": 0.37, "learning_rate": 3.6119887793648535e-06, "loss": 1.6497, "step": 1079 }, { "epoch": 0.37, "learning_rate": 3.6094829990131296e-06, "loss": 1.4919, "step": 1080 }, { "epoch": 0.37, "learning_rate": 3.6069758299047767e-06, "loss": 1.7347, "step": 1081 }, { "epoch": 0.37, "learning_rate": 3.604467275178057e-06, "loss": 1.9839, "step": 1082 }, { "epoch": 0.37, "learning_rate": 3.6019573379729644e-06, "loss": 1.8619, "step": 1083 }, { "epoch": 0.37, "learning_rate": 3.5994460214312276e-06, "loss": 1.6171, "step": 1084 }, { "epoch": 0.37, "learning_rate": 3.5969333286962984e-06, "loss": 1.6565, "step": 1085 }, { "epoch": 0.38, "learning_rate": 3.594419262913351e-06, "loss": 1.6421, "step": 1086 }, { "epoch": 0.38, "learning_rate": 3.5919038272292824e-06, "loss": 1.5537, "step": 1087 }, { "epoch": 0.38, "learning_rate": 3.5893870247926986e-06, "loss": 1.9873, "step": 1088 }, { "epoch": 0.38, "learning_rate": 3.5868688587539214e-06, "loss": 1.9861, "step": 1089 }, { "epoch": 0.38, "learning_rate": 3.584349332264975e-06, "loss": 1.4818, "step": 1090 }, { "epoch": 0.38, "learning_rate": 3.5818284484795905e-06, "loss": 1.3639, "step": 1091 }, { "epoch": 0.38, "learning_rate": 3.579306210553196e-06, "loss": 1.6097, "step": 1092 }, { "epoch": 0.38, "learning_rate": 3.5767826216429147e-06, "loss": 1.5222, "step": 1093 }, { "epoch": 0.38, "learning_rate": 3.574257684907561e-06, "loss": 1.7039, "step": 1094 }, { "epoch": 0.38, "learning_rate": 3.5717314035076355e-06, "loss": 1.8561, "step": 1095 }, { "epoch": 0.38, "learning_rate": 3.5692037806053243e-06, "loss": 1.8502, "step": 1096 }, { "epoch": 0.38, "learning_rate": 3.5666748193644896e-06, "loss": 1.5924, "step": 1097 }, { "epoch": 0.38, "learning_rate": 3.5641445229506715e-06, "loss": 1.7657, "step": 1098 }, { "epoch": 0.38, "learning_rate": 3.5616128945310802e-06, "loss": 1.5802, "step": 1099 }, { "epoch": 0.38, "learning_rate": 3.5590799372745915e-06, "loss": 1.7898, "step": 1100 }, { "epoch": 0.38, "learning_rate": 3.556545654351749e-06, "loss": 1.9984, "step": 1101 }, { "epoch": 0.38, "learning_rate": 3.554010048934751e-06, "loss": 1.549, "step": 1102 }, { "epoch": 0.38, "learning_rate": 3.551473124197454e-06, "loss": 1.6697, "step": 1103 }, { "epoch": 0.38, "learning_rate": 3.548934883315365e-06, "loss": 1.6104, "step": 1104 }, { "epoch": 0.38, "learning_rate": 3.546395329465637e-06, "loss": 1.8062, "step": 1105 }, { "epoch": 0.38, "learning_rate": 3.5438544658270696e-06, "loss": 1.5747, "step": 1106 }, { "epoch": 0.38, "learning_rate": 3.5413122955801004e-06, "loss": 2.002, "step": 1107 }, { "epoch": 0.38, "learning_rate": 3.5387688219068e-06, "loss": 1.6187, "step": 1108 }, { "epoch": 0.38, "learning_rate": 3.5362240479908755e-06, "loss": 1.6851, "step": 1109 }, { "epoch": 0.38, "learning_rate": 3.533677977017658e-06, "loss": 1.7805, "step": 1110 }, { "epoch": 0.38, "learning_rate": 3.5311306121741017e-06, "loss": 1.6821, "step": 1111 }, { "epoch": 0.38, "learning_rate": 3.5285819566487834e-06, "loss": 1.7168, "step": 1112 }, { "epoch": 0.38, "learning_rate": 3.5260320136318927e-06, "loss": 2.1205, "step": 1113 }, { "epoch": 0.38, "learning_rate": 3.5234807863152316e-06, "loss": 1.766, "step": 1114 }, { "epoch": 0.39, "learning_rate": 3.5209282778922103e-06, "loss": 1.6099, "step": 1115 }, { "epoch": 0.39, "learning_rate": 3.518374491557844e-06, "loss": 1.814, "step": 1116 }, { "epoch": 0.39, "learning_rate": 3.515819430508742e-06, "loss": 1.5934, "step": 1117 }, { "epoch": 0.39, "learning_rate": 3.513263097943115e-06, "loss": 1.7142, "step": 1118 }, { "epoch": 0.39, "learning_rate": 3.5107054970607624e-06, "loss": 1.6416, "step": 1119 }, { "epoch": 0.39, "learning_rate": 3.5081466310630737e-06, "loss": 1.7161, "step": 1120 }, { "epoch": 0.39, "learning_rate": 3.505586503153017e-06, "loss": 1.8615, "step": 1121 }, { "epoch": 0.39, "learning_rate": 3.5030251165351446e-06, "loss": 1.6625, "step": 1122 }, { "epoch": 0.39, "learning_rate": 3.5004624744155842e-06, "loss": 1.7736, "step": 1123 }, { "epoch": 0.39, "learning_rate": 3.4978985800020315e-06, "loss": 1.5845, "step": 1124 }, { "epoch": 0.39, "learning_rate": 3.495333436503753e-06, "loss": 1.7164, "step": 1125 }, { "epoch": 0.39, "learning_rate": 3.492767047131577e-06, "loss": 1.9949, "step": 1126 }, { "epoch": 0.39, "learning_rate": 3.4901994150978926e-06, "loss": 1.621, "step": 1127 }, { "epoch": 0.39, "learning_rate": 3.4876305436166423e-06, "loss": 1.6877, "step": 1128 }, { "epoch": 0.39, "learning_rate": 3.4850604359033234e-06, "loss": 1.7013, "step": 1129 }, { "epoch": 0.39, "learning_rate": 3.4824890951749763e-06, "loss": 1.8052, "step": 1130 }, { "epoch": 0.39, "learning_rate": 3.479916524650188e-06, "loss": 1.6155, "step": 1131 }, { "epoch": 0.39, "learning_rate": 3.4773427275490847e-06, "loss": 1.7887, "step": 1132 }, { "epoch": 0.39, "learning_rate": 3.4747677070933257e-06, "loss": 1.7098, "step": 1133 }, { "epoch": 0.39, "learning_rate": 3.4721914665061036e-06, "loss": 1.8024, "step": 1134 }, { "epoch": 0.39, "learning_rate": 3.4696140090121377e-06, "loss": 1.6746, "step": 1135 }, { "epoch": 0.39, "learning_rate": 3.4670353378376705e-06, "loss": 1.5505, "step": 1136 }, { "epoch": 0.39, "learning_rate": 3.4644554562104638e-06, "loss": 1.8048, "step": 1137 }, { "epoch": 0.39, "learning_rate": 3.461874367359794e-06, "loss": 1.796, "step": 1138 }, { "epoch": 0.39, "learning_rate": 3.4592920745164494e-06, "loss": 1.6082, "step": 1139 }, { "epoch": 0.39, "learning_rate": 3.4567085809127247e-06, "loss": 1.7219, "step": 1140 }, { "epoch": 0.39, "learning_rate": 3.454123889782418e-06, "loss": 1.9111, "step": 1141 }, { "epoch": 0.39, "learning_rate": 3.4515380043608264e-06, "loss": 1.8003, "step": 1142 }, { "epoch": 0.39, "learning_rate": 3.4489509278847415e-06, "loss": 1.6147, "step": 1143 }, { "epoch": 0.4, "learning_rate": 3.4463626635924457e-06, "loss": 1.8689, "step": 1144 }, { "epoch": 0.4, "learning_rate": 3.443773214723709e-06, "loss": 1.7599, "step": 1145 }, { "epoch": 0.4, "learning_rate": 3.441182584519783e-06, "loss": 1.4946, "step": 1146 }, { "epoch": 0.4, "learning_rate": 3.4385907762234e-06, "loss": 1.7041, "step": 1147 }, { "epoch": 0.4, "learning_rate": 3.4359977930787645e-06, "loss": 1.7513, "step": 1148 }, { "epoch": 0.4, "learning_rate": 3.4334036383315527e-06, "loss": 1.5692, "step": 1149 }, { "epoch": 0.4, "learning_rate": 3.4308083152289073e-06, "loss": 1.6755, "step": 1150 }, { "epoch": 0.4, "learning_rate": 3.428211827019434e-06, "loss": 1.9631, "step": 1151 }, { "epoch": 0.4, "learning_rate": 3.425614176953197e-06, "loss": 1.5745, "step": 1152 }, { "epoch": 0.4, "learning_rate": 3.4230153682817112e-06, "loss": 1.5573, "step": 1153 }, { "epoch": 0.4, "learning_rate": 3.4204154042579472e-06, "loss": 1.6251, "step": 1154 }, { "epoch": 0.4, "learning_rate": 3.4178142881363192e-06, "loss": 1.6663, "step": 1155 }, { "epoch": 0.4, "learning_rate": 3.4152120231726825e-06, "loss": 1.6471, "step": 1156 }, { "epoch": 0.4, "learning_rate": 3.412608612624332e-06, "loss": 2.1289, "step": 1157 }, { "epoch": 0.4, "learning_rate": 3.410004059749996e-06, "loss": 1.7523, "step": 1158 }, { "epoch": 0.4, "learning_rate": 3.407398367809832e-06, "loss": 1.7734, "step": 1159 }, { "epoch": 0.4, "learning_rate": 3.404791540065425e-06, "loss": 1.5858, "step": 1160 }, { "epoch": 0.4, "learning_rate": 3.4021835797797807e-06, "loss": 1.7737, "step": 1161 }, { "epoch": 0.4, "learning_rate": 3.399574490217321e-06, "loss": 1.6875, "step": 1162 }, { "epoch": 0.4, "learning_rate": 3.3969642746438836e-06, "loss": 1.7317, "step": 1163 }, { "epoch": 0.4, "learning_rate": 3.3943529363267146e-06, "loss": 1.9542, "step": 1164 }, { "epoch": 0.4, "learning_rate": 3.391740478534467e-06, "loss": 1.661, "step": 1165 }, { "epoch": 0.4, "learning_rate": 3.389126904537192e-06, "loss": 1.6262, "step": 1166 }, { "epoch": 0.4, "learning_rate": 3.386512217606339e-06, "loss": 1.4976, "step": 1167 }, { "epoch": 0.4, "learning_rate": 3.3838964210147536e-06, "loss": 1.8207, "step": 1168 }, { "epoch": 0.4, "learning_rate": 3.3812795180366657e-06, "loss": 2.265, "step": 1169 }, { "epoch": 0.4, "learning_rate": 3.378661511947694e-06, "loss": 1.8398, "step": 1170 }, { "epoch": 0.4, "learning_rate": 3.376042406024835e-06, "loss": 1.7194, "step": 1171 }, { "epoch": 0.4, "learning_rate": 3.3734222035464632e-06, "loss": 1.7423, "step": 1172 }, { "epoch": 0.41, "learning_rate": 3.370800907792325e-06, "loss": 1.5692, "step": 1173 }, { "epoch": 0.41, "learning_rate": 3.368178522043539e-06, "loss": 1.5594, "step": 1174 }, { "epoch": 0.41, "learning_rate": 3.3655550495825824e-06, "loss": 1.5569, "step": 1175 }, { "epoch": 0.41, "learning_rate": 3.3629304936932948e-06, "loss": 2.1508, "step": 1176 }, { "epoch": 0.41, "learning_rate": 3.3603048576608737e-06, "loss": 1.7025, "step": 1177 }, { "epoch": 0.41, "learning_rate": 3.3576781447718676e-06, "loss": 1.5986, "step": 1178 }, { "epoch": 0.41, "learning_rate": 3.3550503583141726e-06, "loss": 1.5211, "step": 1179 }, { "epoch": 0.41, "learning_rate": 3.352421501577028e-06, "loss": 1.7258, "step": 1180 }, { "epoch": 0.41, "learning_rate": 3.3497915778510127e-06, "loss": 1.5883, "step": 1181 }, { "epoch": 0.41, "learning_rate": 3.3471605904280446e-06, "loss": 1.8839, "step": 1182 }, { "epoch": 0.41, "learning_rate": 3.3445285426013683e-06, "loss": 1.6685, "step": 1183 }, { "epoch": 0.41, "learning_rate": 3.3418954376655587e-06, "loss": 1.7593, "step": 1184 }, { "epoch": 0.41, "learning_rate": 3.3392612789165124e-06, "loss": 1.7195, "step": 1185 }, { "epoch": 0.41, "learning_rate": 3.3366260696514462e-06, "loss": 1.6315, "step": 1186 }, { "epoch": 0.41, "learning_rate": 3.3339898131688916e-06, "loss": 1.6024, "step": 1187 }, { "epoch": 0.41, "learning_rate": 3.3313525127686897e-06, "loss": 1.7604, "step": 1188 }, { "epoch": 0.41, "learning_rate": 3.3287141717519898e-06, "loss": 1.8076, "step": 1189 }, { "epoch": 0.41, "learning_rate": 3.3260747934212428e-06, "loss": 1.5455, "step": 1190 }, { "epoch": 0.41, "learning_rate": 3.3234343810801995e-06, "loss": 1.5108, "step": 1191 }, { "epoch": 0.41, "learning_rate": 3.3207929380339034e-06, "loss": 1.8112, "step": 1192 }, { "epoch": 0.41, "learning_rate": 3.3181504675886877e-06, "loss": 1.8383, "step": 1193 }, { "epoch": 0.41, "learning_rate": 3.315506973052174e-06, "loss": 1.8492, "step": 1194 }, { "epoch": 0.41, "learning_rate": 3.312862457733263e-06, "loss": 1.7056, "step": 1195 }, { "epoch": 0.41, "learning_rate": 3.3102169249421358e-06, "loss": 1.5165, "step": 1196 }, { "epoch": 0.41, "learning_rate": 3.3075703779902454e-06, "loss": 1.7104, "step": 1197 }, { "epoch": 0.41, "learning_rate": 3.304922820190313e-06, "loss": 1.506, "step": 1198 }, { "epoch": 0.41, "learning_rate": 3.3022742548563293e-06, "loss": 1.8054, "step": 1199 }, { "epoch": 0.41, "learning_rate": 3.2996246853035417e-06, "loss": 1.5925, "step": 1200 }, { "epoch": 0.41, "learning_rate": 3.2969741148484575e-06, "loss": 2.1381, "step": 1201 }, { "epoch": 0.42, "learning_rate": 3.2943225468088345e-06, "loss": 1.6012, "step": 1202 }, { "epoch": 0.42, "learning_rate": 3.291669984503682e-06, "loss": 1.442, "step": 1203 }, { "epoch": 0.42, "learning_rate": 3.2890164312532503e-06, "loss": 1.5325, "step": 1204 }, { "epoch": 0.42, "learning_rate": 3.2863618903790346e-06, "loss": 1.7043, "step": 1205 }, { "epoch": 0.42, "learning_rate": 3.283706365203762e-06, "loss": 1.6383, "step": 1206 }, { "epoch": 0.42, "learning_rate": 3.281049859051394e-06, "loss": 1.7086, "step": 1207 }, { "epoch": 0.42, "learning_rate": 3.2783923752471195e-06, "loss": 1.7889, "step": 1208 }, { "epoch": 0.42, "learning_rate": 3.275733917117351e-06, "loss": 1.6429, "step": 1209 }, { "epoch": 0.42, "learning_rate": 3.2730744879897215e-06, "loss": 1.5715, "step": 1210 }, { "epoch": 0.42, "learning_rate": 3.270414091193077e-06, "loss": 1.6127, "step": 1211 }, { "epoch": 0.42, "learning_rate": 3.2677527300574772e-06, "loss": 1.6804, "step": 1212 }, { "epoch": 0.42, "learning_rate": 3.2650904079141886e-06, "loss": 2.0482, "step": 1213 }, { "epoch": 0.42, "learning_rate": 3.26242712809568e-06, "loss": 1.8146, "step": 1214 }, { "epoch": 0.42, "learning_rate": 3.2597628939356174e-06, "loss": 1.7684, "step": 1215 }, { "epoch": 0.42, "learning_rate": 3.257097708768865e-06, "loss": 1.5035, "step": 1216 }, { "epoch": 0.42, "learning_rate": 3.2544315759314737e-06, "loss": 1.5741, "step": 1217 }, { "epoch": 0.42, "learning_rate": 3.2517644987606827e-06, "loss": 1.5702, "step": 1218 }, { "epoch": 0.42, "learning_rate": 3.2490964805949143e-06, "loss": 2.0778, "step": 1219 }, { "epoch": 0.42, "learning_rate": 3.2464275247737653e-06, "loss": 1.8, "step": 1220 }, { "epoch": 0.42, "learning_rate": 3.2437576346380077e-06, "loss": 1.52, "step": 1221 }, { "epoch": 0.42, "learning_rate": 3.2410868135295846e-06, "loss": 1.5696, "step": 1222 }, { "epoch": 0.42, "learning_rate": 3.2384150647916033e-06, "loss": 1.6827, "step": 1223 }, { "epoch": 0.42, "learning_rate": 3.2357423917683312e-06, "loss": 1.7526, "step": 1224 }, { "epoch": 0.42, "learning_rate": 3.233068797805194e-06, "loss": 1.6808, "step": 1225 }, { "epoch": 0.42, "learning_rate": 3.230394286248768e-06, "loss": 2.252, "step": 1226 }, { "epoch": 0.42, "learning_rate": 3.2277188604467826e-06, "loss": 1.7174, "step": 1227 }, { "epoch": 0.42, "learning_rate": 3.225042523748107e-06, "loss": 1.4952, "step": 1228 }, { "epoch": 0.42, "learning_rate": 3.222365279502752e-06, "loss": 1.4629, "step": 1229 }, { "epoch": 0.42, "learning_rate": 3.2196871310618655e-06, "loss": 1.8895, "step": 1230 }, { "epoch": 0.43, "learning_rate": 3.217008081777726e-06, "loss": 1.5431, "step": 1231 }, { "epoch": 0.43, "learning_rate": 3.214328135003739e-06, "loss": 2.0038, "step": 1232 }, { "epoch": 0.43, "learning_rate": 3.211647294094437e-06, "loss": 1.7257, "step": 1233 }, { "epoch": 0.43, "learning_rate": 3.2089655624054674e-06, "loss": 1.4354, "step": 1234 }, { "epoch": 0.43, "learning_rate": 3.206282943293593e-06, "loss": 1.6359, "step": 1235 }, { "epoch": 0.43, "learning_rate": 3.2035994401166896e-06, "loss": 1.5643, "step": 1236 }, { "epoch": 0.43, "learning_rate": 3.2009150562337397e-06, "loss": 1.6166, "step": 1237 }, { "epoch": 0.43, "learning_rate": 3.1982297950048267e-06, "loss": 1.9572, "step": 1238 }, { "epoch": 0.43, "learning_rate": 3.195543659791132e-06, "loss": 2.041, "step": 1239 }, { "epoch": 0.43, "learning_rate": 3.1928566539549316e-06, "loss": 1.5981, "step": 1240 }, { "epoch": 0.43, "learning_rate": 3.190168780859592e-06, "loss": 1.5344, "step": 1241 }, { "epoch": 0.43, "learning_rate": 3.1874800438695654e-06, "loss": 1.6752, "step": 1242 }, { "epoch": 0.43, "learning_rate": 3.1847904463503814e-06, "loss": 1.4113, "step": 1243 }, { "epoch": 0.43, "learning_rate": 3.182099991668653e-06, "loss": 1.6653, "step": 1244 }, { "epoch": 0.43, "learning_rate": 3.179408683192061e-06, "loss": 1.7976, "step": 1245 }, { "epoch": 0.43, "learning_rate": 3.176716524289358e-06, "loss": 1.8802, "step": 1246 }, { "epoch": 0.43, "learning_rate": 3.17402351833036e-06, "loss": 1.8191, "step": 1247 }, { "epoch": 0.43, "learning_rate": 3.171329668685942e-06, "loss": 1.5293, "step": 1248 }, { "epoch": 0.43, "learning_rate": 3.168634978728037e-06, "loss": 1.5381, "step": 1249 }, { "epoch": 0.43, "learning_rate": 3.1659394518296303e-06, "loss": 1.7428, "step": 1250 }, { "epoch": 0.43, "learning_rate": 3.163243091364752e-06, "loss": 1.9436, "step": 1251 }, { "epoch": 0.43, "learning_rate": 3.1605459007084784e-06, "loss": 1.5049, "step": 1252 }, { "epoch": 0.43, "learning_rate": 3.157847883236922e-06, "loss": 1.5243, "step": 1253 }, { "epoch": 0.43, "learning_rate": 3.1551490423272337e-06, "loss": 1.5956, "step": 1254 }, { "epoch": 0.43, "learning_rate": 3.1524493813575936e-06, "loss": 1.7551, "step": 1255 }, { "epoch": 0.43, "learning_rate": 3.1497489037072066e-06, "loss": 1.6066, "step": 1256 }, { "epoch": 0.43, "learning_rate": 3.147047612756302e-06, "loss": 1.7294, "step": 1257 }, { "epoch": 0.43, "learning_rate": 3.144345511886127e-06, "loss": 1.7426, "step": 1258 }, { "epoch": 0.43, "learning_rate": 3.141642604478942e-06, "loss": 1.5708, "step": 1259 }, { "epoch": 0.44, "learning_rate": 3.138938893918017e-06, "loss": 1.5487, "step": 1260 }, { "epoch": 0.44, "learning_rate": 3.1362343835876274e-06, "loss": 1.6582, "step": 1261 }, { "epoch": 0.44, "learning_rate": 3.1335290768730485e-06, "loss": 1.6226, "step": 1262 }, { "epoch": 0.44, "learning_rate": 3.1308229771605546e-06, "loss": 1.8431, "step": 1263 }, { "epoch": 0.44, "learning_rate": 3.128116087837412e-06, "loss": 1.6046, "step": 1264 }, { "epoch": 0.44, "learning_rate": 3.1254084122918738e-06, "loss": 1.73, "step": 1265 }, { "epoch": 0.44, "learning_rate": 3.1226999539131798e-06, "loss": 1.8805, "step": 1266 }, { "epoch": 0.44, "learning_rate": 3.1199907160915465e-06, "loss": 1.5688, "step": 1267 }, { "epoch": 0.44, "learning_rate": 3.1172807022181693e-06, "loss": 1.8367, "step": 1268 }, { "epoch": 0.44, "learning_rate": 3.1145699156852133e-06, "loss": 1.5712, "step": 1269 }, { "epoch": 0.44, "learning_rate": 3.1118583598858097e-06, "loss": 2.0917, "step": 1270 }, { "epoch": 0.44, "learning_rate": 3.109146038214055e-06, "loss": 1.6392, "step": 1271 }, { "epoch": 0.44, "learning_rate": 3.1064329540650033e-06, "loss": 1.707, "step": 1272 }, { "epoch": 0.44, "learning_rate": 3.1037191108346626e-06, "loss": 1.5704, "step": 1273 }, { "epoch": 0.44, "learning_rate": 3.1010045119199917e-06, "loss": 1.7042, "step": 1274 }, { "epoch": 0.44, "learning_rate": 3.0982891607188948e-06, "loss": 1.6958, "step": 1275 }, { "epoch": 0.44, "learning_rate": 3.095573060630218e-06, "loss": 1.8655, "step": 1276 }, { "epoch": 0.44, "learning_rate": 3.0928562150537444e-06, "loss": 1.8367, "step": 1277 }, { "epoch": 0.44, "learning_rate": 3.090138627390193e-06, "loss": 1.6962, "step": 1278 }, { "epoch": 0.44, "learning_rate": 3.0874203010412057e-06, "loss": 1.6669, "step": 1279 }, { "epoch": 0.44, "learning_rate": 3.084701239409355e-06, "loss": 1.7567, "step": 1280 }, { "epoch": 0.44, "learning_rate": 3.0819814458981304e-06, "loss": 1.7784, "step": 1281 }, { "epoch": 0.44, "learning_rate": 3.07926092391194e-06, "loss": 1.9146, "step": 1282 }, { "epoch": 0.44, "learning_rate": 3.0765396768561005e-06, "loss": 1.6562, "step": 1283 }, { "epoch": 0.44, "learning_rate": 3.0738177081368394e-06, "loss": 1.4889, "step": 1284 }, { "epoch": 0.44, "learning_rate": 3.0710950211612846e-06, "loss": 1.5638, "step": 1285 }, { "epoch": 0.44, "learning_rate": 3.0683716193374658e-06, "loss": 1.5352, "step": 1286 }, { "epoch": 0.44, "learning_rate": 3.0656475060743065e-06, "loss": 1.6444, "step": 1287 }, { "epoch": 0.44, "learning_rate": 3.0629226847816175e-06, "loss": 2.1238, "step": 1288 }, { "epoch": 0.45, "learning_rate": 3.0601971588701023e-06, "loss": 1.8784, "step": 1289 }, { "epoch": 0.45, "learning_rate": 3.057470931751341e-06, "loss": 1.6216, "step": 1290 }, { "epoch": 0.45, "learning_rate": 3.0547440068377943e-06, "loss": 1.6292, "step": 1291 }, { "epoch": 0.45, "learning_rate": 3.052016387542794e-06, "loss": 1.5352, "step": 1292 }, { "epoch": 0.45, "learning_rate": 3.0492880772805433e-06, "loss": 1.5801, "step": 1293 }, { "epoch": 0.45, "learning_rate": 3.0465590794661088e-06, "loss": 1.7721, "step": 1294 }, { "epoch": 0.45, "learning_rate": 3.043829397515419e-06, "loss": 1.5731, "step": 1295 }, { "epoch": 0.45, "learning_rate": 3.0410990348452572e-06, "loss": 1.5693, "step": 1296 }, { "epoch": 0.45, "learning_rate": 3.038367994873261e-06, "loss": 1.4973, "step": 1297 }, { "epoch": 0.45, "learning_rate": 3.035636281017913e-06, "loss": 1.5514, "step": 1298 }, { "epoch": 0.45, "learning_rate": 3.0329038966985404e-06, "loss": 1.6069, "step": 1299 }, { "epoch": 0.45, "learning_rate": 3.0301708453353118e-06, "loss": 1.5393, "step": 1300 }, { "epoch": 0.45, "learning_rate": 3.0274371303492273e-06, "loss": 1.9724, "step": 1301 }, { "epoch": 0.45, "learning_rate": 3.0247027551621187e-06, "loss": 1.7944, "step": 1302 }, { "epoch": 0.45, "learning_rate": 3.021967723196647e-06, "loss": 1.5555, "step": 1303 }, { "epoch": 0.45, "learning_rate": 3.0192320378762915e-06, "loss": 1.8423, "step": 1304 }, { "epoch": 0.45, "learning_rate": 3.016495702625351e-06, "loss": 1.6544, "step": 1305 }, { "epoch": 0.45, "learning_rate": 3.0137587208689377e-06, "loss": 1.7098, "step": 1306 }, { "epoch": 0.45, "learning_rate": 3.011021096032973e-06, "loss": 1.8547, "step": 1307 }, { "epoch": 0.45, "learning_rate": 3.0082828315441824e-06, "loss": 1.6975, "step": 1308 }, { "epoch": 0.45, "learning_rate": 3.0055439308300954e-06, "loss": 1.9026, "step": 1309 }, { "epoch": 0.45, "learning_rate": 3.002804397319033e-06, "loss": 1.6289, "step": 1310 }, { "epoch": 0.45, "learning_rate": 3.0000642344401115e-06, "loss": 1.6741, "step": 1311 }, { "epoch": 0.45, "learning_rate": 2.997323445623234e-06, "loss": 1.9025, "step": 1312 }, { "epoch": 0.45, "learning_rate": 2.9945820342990877e-06, "loss": 1.7939, "step": 1313 }, { "epoch": 0.45, "learning_rate": 2.9918400038991384e-06, "loss": 1.4979, "step": 1314 }, { "epoch": 0.45, "learning_rate": 2.989097357855627e-06, "loss": 1.7263, "step": 1315 }, { "epoch": 0.45, "learning_rate": 2.986354099601564e-06, "loss": 1.8082, "step": 1316 }, { "epoch": 0.45, "learning_rate": 2.9836102325707282e-06, "loss": 1.6865, "step": 1317 }, { "epoch": 0.46, "learning_rate": 2.98086576019766e-06, "loss": 1.5768, "step": 1318 }, { "epoch": 0.46, "learning_rate": 2.978120685917656e-06, "loss": 1.9091, "step": 1319 }, { "epoch": 0.46, "learning_rate": 2.975375013166767e-06, "loss": 1.9377, "step": 1320 }, { "epoch": 0.46, "learning_rate": 2.972628745381794e-06, "loss": 1.6537, "step": 1321 }, { "epoch": 0.46, "learning_rate": 2.96988188600028e-06, "loss": 1.5724, "step": 1322 }, { "epoch": 0.46, "learning_rate": 2.967134438460513e-06, "loss": 1.4879, "step": 1323 }, { "epoch": 0.46, "learning_rate": 2.9643864062015116e-06, "loss": 1.4816, "step": 1324 }, { "epoch": 0.46, "learning_rate": 2.961637792663032e-06, "loss": 1.7969, "step": 1325 }, { "epoch": 0.46, "learning_rate": 2.958888601285554e-06, "loss": 2.0592, "step": 1326 }, { "epoch": 0.46, "learning_rate": 2.956138835510282e-06, "loss": 1.9066, "step": 1327 }, { "epoch": 0.46, "learning_rate": 2.9533884987791394e-06, "loss": 1.665, "step": 1328 }, { "epoch": 0.46, "learning_rate": 2.950637594534765e-06, "loss": 1.6693, "step": 1329 }, { "epoch": 0.46, "learning_rate": 2.9478861262205056e-06, "loss": 1.7086, "step": 1330 }, { "epoch": 0.46, "learning_rate": 2.945134097280417e-06, "loss": 1.5278, "step": 1331 }, { "epoch": 0.46, "learning_rate": 2.9423815111592557e-06, "loss": 1.8035, "step": 1332 }, { "epoch": 0.46, "learning_rate": 2.9396283713024732e-06, "loss": 1.5507, "step": 1333 }, { "epoch": 0.46, "learning_rate": 2.936874681156219e-06, "loss": 1.4396, "step": 1334 }, { "epoch": 0.46, "learning_rate": 2.9341204441673267e-06, "loss": 1.7264, "step": 1335 }, { "epoch": 0.46, "learning_rate": 2.9313656637833167e-06, "loss": 1.6681, "step": 1336 }, { "epoch": 0.46, "learning_rate": 2.9286103434523894e-06, "loss": 1.8992, "step": 1337 }, { "epoch": 0.46, "learning_rate": 2.9258544866234206e-06, "loss": 1.7515, "step": 1338 }, { "epoch": 0.46, "learning_rate": 2.9230980967459594e-06, "loss": 1.6829, "step": 1339 }, { "epoch": 0.46, "learning_rate": 2.9203411772702187e-06, "loss": 1.5127, "step": 1340 }, { "epoch": 0.46, "learning_rate": 2.9175837316470775e-06, "loss": 1.5503, "step": 1341 }, { "epoch": 0.46, "learning_rate": 2.914825763328072e-06, "loss": 1.4485, "step": 1342 }, { "epoch": 0.46, "learning_rate": 2.9120672757653917e-06, "loss": 1.6827, "step": 1343 }, { "epoch": 0.46, "learning_rate": 2.9093082724118776e-06, "loss": 1.7842, "step": 1344 }, { "epoch": 0.46, "learning_rate": 2.9065487567210176e-06, "loss": 1.4478, "step": 1345 }, { "epoch": 0.46, "learning_rate": 2.903788732146937e-06, "loss": 1.5469, "step": 1346 }, { "epoch": 0.47, "learning_rate": 2.9010282021444008e-06, "loss": 1.67, "step": 1347 }, { "epoch": 0.47, "learning_rate": 2.898267170168807e-06, "loss": 1.7361, "step": 1348 }, { "epoch": 0.47, "learning_rate": 2.89550563967618e-06, "loss": 1.3917, "step": 1349 }, { "epoch": 0.47, "learning_rate": 2.8927436141231695e-06, "loss": 1.7849, "step": 1350 }, { "epoch": 0.47, "learning_rate": 2.889981096967045e-06, "loss": 1.8929, "step": 1351 }, { "epoch": 0.47, "learning_rate": 2.8872180916656906e-06, "loss": 1.7362, "step": 1352 }, { "epoch": 0.47, "learning_rate": 2.8844546016776014e-06, "loss": 1.6289, "step": 1353 }, { "epoch": 0.47, "learning_rate": 2.8816906304618807e-06, "loss": 1.5798, "step": 1354 }, { "epoch": 0.47, "learning_rate": 2.8789261814782315e-06, "loss": 1.6255, "step": 1355 }, { "epoch": 0.47, "learning_rate": 2.876161258186958e-06, "loss": 1.4587, "step": 1356 }, { "epoch": 0.47, "learning_rate": 2.8733958640489552e-06, "loss": 1.7612, "step": 1357 }, { "epoch": 0.47, "learning_rate": 2.8706300025257096e-06, "loss": 1.7184, "step": 1358 }, { "epoch": 0.47, "learning_rate": 2.8678636770792907e-06, "loss": 1.7983, "step": 1359 }, { "epoch": 0.47, "learning_rate": 2.865096891172351e-06, "loss": 1.5917, "step": 1360 }, { "epoch": 0.47, "learning_rate": 2.862329648268117e-06, "loss": 1.4084, "step": 1361 }, { "epoch": 0.47, "learning_rate": 2.8595619518303894e-06, "loss": 1.7023, "step": 1362 }, { "epoch": 0.47, "learning_rate": 2.8567938053235363e-06, "loss": 1.719, "step": 1363 }, { "epoch": 0.47, "learning_rate": 2.854025212212487e-06, "loss": 1.7808, "step": 1364 }, { "epoch": 0.47, "learning_rate": 2.8512561759627322e-06, "loss": 1.4897, "step": 1365 }, { "epoch": 0.47, "learning_rate": 2.848486700040316e-06, "loss": 1.6052, "step": 1366 }, { "epoch": 0.47, "learning_rate": 2.8457167879118332e-06, "loss": 1.3289, "step": 1367 }, { "epoch": 0.47, "learning_rate": 2.8429464430444255e-06, "loss": 1.5861, "step": 1368 }, { "epoch": 0.47, "learning_rate": 2.8401756689057736e-06, "loss": 1.9816, "step": 1369 }, { "epoch": 0.47, "learning_rate": 2.837404468964099e-06, "loss": 1.7037, "step": 1370 }, { "epoch": 0.47, "learning_rate": 2.8346328466881544e-06, "loss": 1.5413, "step": 1371 }, { "epoch": 0.47, "learning_rate": 2.831860805547221e-06, "loss": 1.8048, "step": 1372 }, { "epoch": 0.47, "learning_rate": 2.829088349011104e-06, "loss": 1.5509, "step": 1373 }, { "epoch": 0.47, "learning_rate": 2.82631548055013e-06, "loss": 1.5315, "step": 1374 }, { "epoch": 0.47, "learning_rate": 2.8235422036351384e-06, "loss": 1.7278, "step": 1375 }, { "epoch": 0.48, "learning_rate": 2.8207685217374847e-06, "loss": 2.0551, "step": 1376 }, { "epoch": 0.48, "learning_rate": 2.8179944383290277e-06, "loss": 1.7334, "step": 1377 }, { "epoch": 0.48, "learning_rate": 2.8152199568821278e-06, "loss": 1.7656, "step": 1378 }, { "epoch": 0.48, "learning_rate": 2.812445080869646e-06, "loss": 1.575, "step": 1379 }, { "epoch": 0.48, "learning_rate": 2.809669813764938e-06, "loss": 1.8176, "step": 1380 }, { "epoch": 0.48, "learning_rate": 2.8068941590418462e-06, "loss": 1.7029, "step": 1381 }, { "epoch": 0.48, "learning_rate": 2.8041181201746998e-06, "loss": 1.8981, "step": 1382 }, { "epoch": 0.48, "learning_rate": 2.8013417006383078e-06, "loss": 1.6245, "step": 1383 }, { "epoch": 0.48, "learning_rate": 2.7985649039079575e-06, "loss": 1.4507, "step": 1384 }, { "epoch": 0.48, "learning_rate": 2.795787733459408e-06, "loss": 1.9188, "step": 1385 }, { "epoch": 0.48, "learning_rate": 2.793010192768884e-06, "loss": 1.7711, "step": 1386 }, { "epoch": 0.48, "learning_rate": 2.7902322853130758e-06, "loss": 1.5562, "step": 1387 }, { "epoch": 0.48, "learning_rate": 2.787454014569132e-06, "loss": 1.9641, "step": 1388 }, { "epoch": 0.48, "learning_rate": 2.784675384014656e-06, "loss": 1.7126, "step": 1389 }, { "epoch": 0.48, "learning_rate": 2.7818963971277024e-06, "loss": 1.4998, "step": 1390 }, { "epoch": 0.48, "learning_rate": 2.7791170573867698e-06, "loss": 1.5914, "step": 1391 }, { "epoch": 0.48, "learning_rate": 2.7763373682708e-06, "loss": 1.7294, "step": 1392 }, { "epoch": 0.48, "learning_rate": 2.7735573332591727e-06, "loss": 1.8026, "step": 1393 }, { "epoch": 0.48, "learning_rate": 2.7707769558316993e-06, "loss": 1.6163, "step": 1394 }, { "epoch": 0.48, "learning_rate": 2.76799623946862e-06, "loss": 1.8942, "step": 1395 }, { "epoch": 0.48, "learning_rate": 2.7652151876505996e-06, "loss": 1.5818, "step": 1396 }, { "epoch": 0.48, "learning_rate": 2.7624338038587223e-06, "loss": 1.4757, "step": 1397 }, { "epoch": 0.48, "learning_rate": 2.759652091574489e-06, "loss": 1.5325, "step": 1398 }, { "epoch": 0.48, "learning_rate": 2.7568700542798112e-06, "loss": 1.8544, "step": 1399 }, { "epoch": 0.48, "learning_rate": 2.754087695457005e-06, "loss": 1.8112, "step": 1400 }, { "epoch": 0.48, "learning_rate": 2.751305018588793e-06, "loss": 2.1598, "step": 1401 }, { "epoch": 0.48, "learning_rate": 2.7485220271582935e-06, "loss": 1.6759, "step": 1402 }, { "epoch": 0.48, "learning_rate": 2.745738724649018e-06, "loss": 1.5492, "step": 1403 }, { "epoch": 0.48, "learning_rate": 2.7429551145448695e-06, "loss": 1.704, "step": 1404 }, { "epoch": 0.49, "learning_rate": 2.740171200330134e-06, "loss": 1.349, "step": 1405 }, { "epoch": 0.49, "learning_rate": 2.7373869854894787e-06, "loss": 1.5133, "step": 1406 }, { "epoch": 0.49, "learning_rate": 2.7346024735079483e-06, "loss": 2.0825, "step": 1407 }, { "epoch": 0.49, "learning_rate": 2.7318176678709583e-06, "loss": 1.6023, "step": 1408 }, { "epoch": 0.49, "learning_rate": 2.729032572064292e-06, "loss": 1.4114, "step": 1409 }, { "epoch": 0.49, "learning_rate": 2.726247189574095e-06, "loss": 1.6002, "step": 1410 }, { "epoch": 0.49, "learning_rate": 2.7234615238868733e-06, "loss": 1.662, "step": 1411 }, { "epoch": 0.49, "learning_rate": 2.7206755784894866e-06, "loss": 1.5885, "step": 1412 }, { "epoch": 0.49, "learning_rate": 2.717889356869146e-06, "loss": 1.6345, "step": 1413 }, { "epoch": 0.49, "learning_rate": 2.7151028625134054e-06, "loss": 1.946, "step": 1414 }, { "epoch": 0.49, "learning_rate": 2.7123160989101623e-06, "loss": 1.5234, "step": 1415 }, { "epoch": 0.49, "learning_rate": 2.7095290695476516e-06, "loss": 1.7032, "step": 1416 }, { "epoch": 0.49, "learning_rate": 2.7067417779144395e-06, "loss": 1.6755, "step": 1417 }, { "epoch": 0.49, "learning_rate": 2.703954227499421e-06, "loss": 1.58, "step": 1418 }, { "epoch": 0.49, "learning_rate": 2.7011664217918154e-06, "loss": 1.3066, "step": 1419 }, { "epoch": 0.49, "learning_rate": 2.69837836428116e-06, "loss": 1.8619, "step": 1420 }, { "epoch": 0.49, "learning_rate": 2.6955900584573103e-06, "loss": 1.6481, "step": 1421 }, { "epoch": 0.49, "learning_rate": 2.692801507810431e-06, "loss": 1.6434, "step": 1422 }, { "epoch": 0.49, "learning_rate": 2.6900127158309903e-06, "loss": 1.5099, "step": 1423 }, { "epoch": 0.49, "learning_rate": 2.6872236860097634e-06, "loss": 1.618, "step": 1424 }, { "epoch": 0.49, "learning_rate": 2.684434421837821e-06, "loss": 1.562, "step": 1425 }, { "epoch": 0.49, "learning_rate": 2.681644926806527e-06, "loss": 2.1527, "step": 1426 }, { "epoch": 0.49, "learning_rate": 2.6788552044075342e-06, "loss": 1.7001, "step": 1427 }, { "epoch": 0.49, "learning_rate": 2.67606525813278e-06, "loss": 1.7274, "step": 1428 }, { "epoch": 0.49, "learning_rate": 2.673275091474483e-06, "loss": 1.7321, "step": 1429 }, { "epoch": 0.49, "learning_rate": 2.670484707925137e-06, "loss": 1.5569, "step": 1430 }, { "epoch": 0.49, "learning_rate": 2.667694110977506e-06, "loss": 1.7062, "step": 1431 }, { "epoch": 0.49, "learning_rate": 2.664903304124624e-06, "loss": 2.0936, "step": 1432 }, { "epoch": 0.49, "learning_rate": 2.6621122908597852e-06, "loss": 1.7246, "step": 1433 }, { "epoch": 0.5, "learning_rate": 2.6593210746765423e-06, "loss": 1.7333, "step": 1434 }, { "epoch": 0.5, "learning_rate": 2.656529659068705e-06, "loss": 1.5604, "step": 1435 }, { "epoch": 0.5, "learning_rate": 2.6537380475303288e-06, "loss": 1.6525, "step": 1436 }, { "epoch": 0.5, "learning_rate": 2.6509462435557155e-06, "loss": 1.5148, "step": 1437 }, { "epoch": 0.5, "learning_rate": 2.6481542506394093e-06, "loss": 1.7698, "step": 1438 }, { "epoch": 0.5, "learning_rate": 2.6453620722761897e-06, "loss": 1.8311, "step": 1439 }, { "epoch": 0.5, "learning_rate": 2.642569711961069e-06, "loss": 1.6544, "step": 1440 }, { "epoch": 0.5, "learning_rate": 2.639777173189286e-06, "loss": 1.4865, "step": 1441 }, { "epoch": 0.5, "learning_rate": 2.6369844594563034e-06, "loss": 1.6189, "step": 1442 }, { "epoch": 0.5, "learning_rate": 2.634191574257804e-06, "loss": 1.7343, "step": 1443 }, { "epoch": 0.5, "learning_rate": 2.6313985210896842e-06, "loss": 1.8418, "step": 1444 }, { "epoch": 0.5, "learning_rate": 2.628605303448051e-06, "loss": 1.7386, "step": 1445 }, { "epoch": 0.5, "learning_rate": 2.625811924829217e-06, "loss": 1.6324, "step": 1446 }, { "epoch": 0.5, "learning_rate": 2.6230183887296955e-06, "loss": 1.5256, "step": 1447 }, { "epoch": 0.5, "learning_rate": 2.6202246986461993e-06, "loss": 1.5422, "step": 1448 }, { "epoch": 0.5, "learning_rate": 2.617430858075632e-06, "loss": 1.5696, "step": 1449 }, { "epoch": 0.5, "learning_rate": 2.6146368705150854e-06, "loss": 1.6046, "step": 1450 }, { "epoch": 0.5, "learning_rate": 2.6118427394618357e-06, "loss": 1.88, "step": 1451 }, { "epoch": 0.5, "learning_rate": 2.6090484684133406e-06, "loss": 1.553, "step": 1452 }, { "epoch": 0.5, "learning_rate": 2.6062540608672298e-06, "loss": 1.4404, "step": 1453 }, { "epoch": 0.5, "learning_rate": 2.6034595203213065e-06, "loss": 1.4801, "step": 1454 }, { "epoch": 0.5, "learning_rate": 2.6006648502735384e-06, "loss": 1.4968, "step": 1455 }, { "epoch": 0.5, "learning_rate": 2.5978700542220566e-06, "loss": 1.4639, "step": 1456 }, { "epoch": 0.5, "learning_rate": 2.595075135665149e-06, "loss": 1.8613, "step": 1457 }, { "epoch": 0.5, "learning_rate": 2.5922800981012596e-06, "loss": 1.9624, "step": 1458 }, { "epoch": 0.5, "learning_rate": 2.5894849450289764e-06, "loss": 1.8229, "step": 1459 }, { "epoch": 0.5, "learning_rate": 2.586689679947036e-06, "loss": 1.5318, "step": 1460 }, { "epoch": 0.5, "learning_rate": 2.5838943063543137e-06, "loss": 1.5972, "step": 1461 }, { "epoch": 0.51, "learning_rate": 2.581098827749821e-06, "loss": 1.8695, "step": 1462 }, { "epoch": 0.51, "learning_rate": 2.578303247632701e-06, "loss": 2.1134, "step": 1463 }, { "epoch": 0.51, "learning_rate": 2.5755075695022223e-06, "loss": 1.6827, "step": 1464 }, { "epoch": 0.51, "learning_rate": 2.572711796857779e-06, "loss": 1.5632, "step": 1465 }, { "epoch": 0.51, "learning_rate": 2.5699159331988806e-06, "loss": 1.5114, "step": 1466 }, { "epoch": 0.51, "learning_rate": 2.5671199820251537e-06, "loss": 1.6805, "step": 1467 }, { "epoch": 0.51, "learning_rate": 2.5643239468363303e-06, "loss": 1.4941, "step": 1468 }, { "epoch": 0.51, "learning_rate": 2.5615278311322513e-06, "loss": 1.9277, "step": 1469 }, { "epoch": 0.51, "learning_rate": 2.5587316384128557e-06, "loss": 1.6394, "step": 1470 }, { "epoch": 0.51, "learning_rate": 2.555935372178183e-06, "loss": 1.6499, "step": 1471 }, { "epoch": 0.51, "learning_rate": 2.553139035928359e-06, "loss": 1.5699, "step": 1472 }, { "epoch": 0.51, "learning_rate": 2.550342633163601e-06, "loss": 1.5121, "step": 1473 }, { "epoch": 0.51, "learning_rate": 2.547546167384209e-06, "loss": 1.7122, "step": 1474 }, { "epoch": 0.51, "learning_rate": 2.5447496420905608e-06, "loss": 1.5393, "step": 1475 }, { "epoch": 0.51, "learning_rate": 2.54195306078311e-06, "loss": 1.8925, "step": 1476 }, { "epoch": 0.51, "learning_rate": 2.5391564269623792e-06, "loss": 1.5781, "step": 1477 }, { "epoch": 0.51, "learning_rate": 2.5363597441289574e-06, "loss": 1.668, "step": 1478 }, { "epoch": 0.51, "learning_rate": 2.533563015783494e-06, "loss": 1.7606, "step": 1479 }, { "epoch": 0.51, "learning_rate": 2.5307662454266973e-06, "loss": 1.544, "step": 1480 }, { "epoch": 0.51, "learning_rate": 2.5279694365593266e-06, "loss": 1.5676, "step": 1481 }, { "epoch": 0.51, "learning_rate": 2.525172592682189e-06, "loss": 1.9381, "step": 1482 }, { "epoch": 0.51, "learning_rate": 2.522375717296137e-06, "loss": 1.5902, "step": 1483 }, { "epoch": 0.51, "learning_rate": 2.5195788139020627e-06, "loss": 1.375, "step": 1484 }, { "epoch": 0.51, "learning_rate": 2.516781886000891e-06, "loss": 1.6531, "step": 1485 }, { "epoch": 0.51, "learning_rate": 2.513984937093578e-06, "loss": 1.6412, "step": 1486 }, { "epoch": 0.51, "learning_rate": 2.511187970681109e-06, "loss": 1.447, "step": 1487 }, { "epoch": 0.51, "learning_rate": 2.5083909902644874e-06, "loss": 1.531, "step": 1488 }, { "epoch": 0.51, "learning_rate": 2.505593999344737e-06, "loss": 1.7216, "step": 1489 }, { "epoch": 0.51, "learning_rate": 2.502797001422893e-06, "loss": 1.5187, "step": 1490 }, { "epoch": 0.52, "learning_rate": 2.5e-06, "loss": 1.6316, "step": 1491 }, { "epoch": 0.52, "learning_rate": 2.497202998577107e-06, "loss": 1.4749, "step": 1492 }, { "epoch": 0.52, "learning_rate": 2.494406000655264e-06, "loss": 1.8296, "step": 1493 }, { "epoch": 0.52, "learning_rate": 2.4916090097355134e-06, "loss": 1.8361, "step": 1494 }, { "epoch": 0.52, "learning_rate": 2.4888120293188915e-06, "loss": 1.7156, "step": 1495 }, { "epoch": 0.52, "learning_rate": 2.486015062906423e-06, "loss": 1.533, "step": 1496 }, { "epoch": 0.52, "learning_rate": 2.4832181139991103e-06, "loss": 1.5723, "step": 1497 }, { "epoch": 0.52, "learning_rate": 2.480421186097938e-06, "loss": 1.5797, "step": 1498 }, { "epoch": 0.52, "learning_rate": 2.4776242827038636e-06, "loss": 1.5251, "step": 1499 }, { "epoch": 0.52, "learning_rate": 2.4748274073178114e-06, "loss": 1.8552, "step": 1500 }, { "epoch": 0.52, "learning_rate": 2.472030563440674e-06, "loss": 2.1112, "step": 1501 }, { "epoch": 0.52, "learning_rate": 2.4692337545733035e-06, "loss": 1.5638, "step": 1502 }, { "epoch": 0.52, "learning_rate": 2.466436984216507e-06, "loss": 1.5205, "step": 1503 }, { "epoch": 0.52, "learning_rate": 2.4636402558710434e-06, "loss": 1.7573, "step": 1504 }, { "epoch": 0.52, "learning_rate": 2.460843573037622e-06, "loss": 1.6746, "step": 1505 }, { "epoch": 0.52, "learning_rate": 2.4580469392168905e-06, "loss": 1.5563, "step": 1506 }, { "epoch": 0.52, "learning_rate": 2.4552503579094396e-06, "loss": 1.8474, "step": 1507 }, { "epoch": 0.52, "learning_rate": 2.4524538326157913e-06, "loss": 1.4503, "step": 1508 }, { "epoch": 0.52, "learning_rate": 2.4496573668364e-06, "loss": 1.6378, "step": 1509 }, { "epoch": 0.52, "learning_rate": 2.4468609640716413e-06, "loss": 1.5332, "step": 1510 }, { "epoch": 0.52, "learning_rate": 2.4440646278218178e-06, "loss": 1.5895, "step": 1511 }, { "epoch": 0.52, "learning_rate": 2.4412683615871447e-06, "loss": 1.5884, "step": 1512 }, { "epoch": 0.52, "learning_rate": 2.4384721688677495e-06, "loss": 1.7515, "step": 1513 }, { "epoch": 0.52, "learning_rate": 2.43567605316367e-06, "loss": 1.8978, "step": 1514 }, { "epoch": 0.52, "learning_rate": 2.4328800179748475e-06, "loss": 1.5123, "step": 1515 }, { "epoch": 0.52, "learning_rate": 2.4300840668011198e-06, "loss": 1.6948, "step": 1516 }, { "epoch": 0.52, "learning_rate": 2.4272882031422216e-06, "loss": 1.7023, "step": 1517 }, { "epoch": 0.52, "learning_rate": 2.4244924304977785e-06, "loss": 1.4452, "step": 1518 }, { "epoch": 0.52, "learning_rate": 2.4216967523673e-06, "loss": 1.8806, "step": 1519 }, { "epoch": 0.53, "learning_rate": 2.4189011722501795e-06, "loss": 1.9073, "step": 1520 }, { "epoch": 0.53, "learning_rate": 2.416105693645687e-06, "loss": 1.5703, "step": 1521 }, { "epoch": 0.53, "learning_rate": 2.4133103200529645e-06, "loss": 1.3706, "step": 1522 }, { "epoch": 0.53, "learning_rate": 2.410515054971024e-06, "loss": 1.668, "step": 1523 }, { "epoch": 0.53, "learning_rate": 2.4077199018987417e-06, "loss": 1.4595, "step": 1524 }, { "epoch": 0.53, "learning_rate": 2.4049248643348512e-06, "loss": 1.5999, "step": 1525 }, { "epoch": 0.53, "learning_rate": 2.402129945777944e-06, "loss": 2.0917, "step": 1526 }, { "epoch": 0.53, "learning_rate": 2.399335149726463e-06, "loss": 1.5011, "step": 1527 }, { "epoch": 0.53, "learning_rate": 2.3965404796786947e-06, "loss": 1.8844, "step": 1528 }, { "epoch": 0.53, "learning_rate": 2.393745939132771e-06, "loss": 1.511, "step": 1529 }, { "epoch": 0.53, "learning_rate": 2.3909515315866606e-06, "loss": 1.5874, "step": 1530 }, { "epoch": 0.53, "learning_rate": 2.388157260538165e-06, "loss": 1.573, "step": 1531 }, { "epoch": 0.53, "learning_rate": 2.3853631294849154e-06, "loss": 1.9552, "step": 1532 }, { "epoch": 0.53, "learning_rate": 2.3825691419243696e-06, "loss": 1.7302, "step": 1533 }, { "epoch": 0.53, "learning_rate": 2.379775301353801e-06, "loss": 1.7449, "step": 1534 }, { "epoch": 0.53, "learning_rate": 2.376981611270305e-06, "loss": 1.8121, "step": 1535 }, { "epoch": 0.53, "learning_rate": 2.374188075170783e-06, "loss": 1.5758, "step": 1536 }, { "epoch": 0.53, "learning_rate": 2.3713946965519496e-06, "loss": 1.5842, "step": 1537 }, { "epoch": 0.53, "learning_rate": 2.368601478910316e-06, "loss": 1.8931, "step": 1538 }, { "epoch": 0.53, "learning_rate": 2.365808425742196e-06, "loss": 1.5366, "step": 1539 }, { "epoch": 0.53, "learning_rate": 2.3630155405436974e-06, "loss": 1.4948, "step": 1540 }, { "epoch": 0.53, "learning_rate": 2.3602228268107146e-06, "loss": 1.4619, "step": 1541 }, { "epoch": 0.53, "learning_rate": 2.357430288038932e-06, "loss": 1.6107, "step": 1542 }, { "epoch": 0.53, "learning_rate": 2.3546379277238107e-06, "loss": 1.8141, "step": 1543 }, { "epoch": 0.53, "learning_rate": 2.3518457493605916e-06, "loss": 1.582, "step": 1544 }, { "epoch": 0.53, "learning_rate": 2.349053756444285e-06, "loss": 1.907, "step": 1545 }, { "epoch": 0.53, "learning_rate": 2.3462619524696725e-06, "loss": 1.6992, "step": 1546 }, { "epoch": 0.53, "learning_rate": 2.3434703409312954e-06, "loss": 1.7067, "step": 1547 }, { "epoch": 0.53, "learning_rate": 2.3406789253234577e-06, "loss": 1.5826, "step": 1548 }, { "epoch": 0.54, "learning_rate": 2.337887709140216e-06, "loss": 1.5051, "step": 1549 }, { "epoch": 0.54, "learning_rate": 2.3350966958753766e-06, "loss": 1.5397, "step": 1550 }, { "epoch": 0.54, "learning_rate": 2.332305889022494e-06, "loss": 1.7383, "step": 1551 }, { "epoch": 0.54, "learning_rate": 2.329515292074864e-06, "loss": 1.5107, "step": 1552 }, { "epoch": 0.54, "learning_rate": 2.3267249085255177e-06, "loss": 1.7102, "step": 1553 }, { "epoch": 0.54, "learning_rate": 2.3239347418672203e-06, "loss": 1.4646, "step": 1554 }, { "epoch": 0.54, "learning_rate": 2.321144795592467e-06, "loss": 1.6548, "step": 1555 }, { "epoch": 0.54, "learning_rate": 2.318355073193474e-06, "loss": 1.5096, "step": 1556 }, { "epoch": 0.54, "learning_rate": 2.3155655781621795e-06, "loss": 1.7437, "step": 1557 }, { "epoch": 0.54, "learning_rate": 2.3127763139902375e-06, "loss": 1.4691, "step": 1558 }, { "epoch": 0.54, "learning_rate": 2.3099872841690105e-06, "loss": 1.6309, "step": 1559 }, { "epoch": 0.54, "learning_rate": 2.30719849218957e-06, "loss": 1.5032, "step": 1560 }, { "epoch": 0.54, "learning_rate": 2.30440994154269e-06, "loss": 1.7113, "step": 1561 }, { "epoch": 0.54, "learning_rate": 2.301621635718841e-06, "loss": 1.5509, "step": 1562 }, { "epoch": 0.54, "learning_rate": 2.2988335782081854e-06, "loss": 1.8027, "step": 1563 }, { "epoch": 0.54, "learning_rate": 2.29604577250058e-06, "loss": 1.808, "step": 1564 }, { "epoch": 0.54, "learning_rate": 2.2932582220855613e-06, "loss": 1.5581, "step": 1565 }, { "epoch": 0.54, "learning_rate": 2.290470930452349e-06, "loss": 1.537, "step": 1566 }, { "epoch": 0.54, "learning_rate": 2.2876839010898377e-06, "loss": 1.4591, "step": 1567 }, { "epoch": 0.54, "learning_rate": 2.284897137486596e-06, "loss": 1.6868, "step": 1568 }, { "epoch": 0.54, "learning_rate": 2.2821106431308546e-06, "loss": 1.874, "step": 1569 }, { "epoch": 0.54, "learning_rate": 2.2793244215105134e-06, "loss": 1.4889, "step": 1570 }, { "epoch": 0.54, "learning_rate": 2.2765384761131276e-06, "loss": 1.8386, "step": 1571 }, { "epoch": 0.54, "learning_rate": 2.273752810425906e-06, "loss": 1.3929, "step": 1572 }, { "epoch": 0.54, "learning_rate": 2.2709674279357085e-06, "loss": 1.5985, "step": 1573 }, { "epoch": 0.54, "learning_rate": 2.2681823321290425e-06, "loss": 1.8302, "step": 1574 }, { "epoch": 0.54, "learning_rate": 2.265397526492052e-06, "loss": 1.6324, "step": 1575 }, { "epoch": 0.54, "learning_rate": 2.2626130145105213e-06, "loss": 2.0604, "step": 1576 }, { "epoch": 0.54, "learning_rate": 2.2598287996698674e-06, "loss": 1.5831, "step": 1577 }, { "epoch": 0.55, "learning_rate": 2.2570448854551314e-06, "loss": 1.8505, "step": 1578 }, { "epoch": 0.55, "learning_rate": 2.2542612753509825e-06, "loss": 1.4113, "step": 1579 }, { "epoch": 0.55, "learning_rate": 2.251477972841708e-06, "loss": 1.709, "step": 1580 }, { "epoch": 0.55, "learning_rate": 2.248694981411208e-06, "loss": 1.479, "step": 1581 }, { "epoch": 0.55, "learning_rate": 2.2459123045429953e-06, "loss": 2.0779, "step": 1582 }, { "epoch": 0.55, "learning_rate": 2.24312994572019e-06, "loss": 1.6514, "step": 1583 }, { "epoch": 0.55, "learning_rate": 2.2403479084255118e-06, "loss": 1.5194, "step": 1584 }, { "epoch": 0.55, "learning_rate": 2.237566196141278e-06, "loss": 1.3811, "step": 1585 }, { "epoch": 0.55, "learning_rate": 2.2347848123494017e-06, "loss": 1.5948, "step": 1586 }, { "epoch": 0.55, "learning_rate": 2.2320037605313807e-06, "loss": 1.7306, "step": 1587 }, { "epoch": 0.55, "learning_rate": 2.229223044168301e-06, "loss": 2.0273, "step": 1588 }, { "epoch": 0.55, "learning_rate": 2.2264426667408285e-06, "loss": 1.6594, "step": 1589 }, { "epoch": 0.55, "learning_rate": 2.2236626317292006e-06, "loss": 1.5144, "step": 1590 }, { "epoch": 0.55, "learning_rate": 2.220882942613231e-06, "loss": 1.4905, "step": 1591 }, { "epoch": 0.55, "learning_rate": 2.218103602872299e-06, "loss": 1.4609, "step": 1592 }, { "epoch": 0.55, "learning_rate": 2.2153246159853448e-06, "loss": 1.5933, "step": 1593 }, { "epoch": 0.55, "learning_rate": 2.2125459854308686e-06, "loss": 1.5811, "step": 1594 }, { "epoch": 0.55, "learning_rate": 2.2097677146869242e-06, "loss": 1.7272, "step": 1595 }, { "epoch": 0.55, "learning_rate": 2.2069898072311165e-06, "loss": 1.4519, "step": 1596 }, { "epoch": 0.55, "learning_rate": 2.2042122665405928e-06, "loss": 1.5175, "step": 1597 }, { "epoch": 0.55, "learning_rate": 2.201435096092042e-06, "loss": 1.5696, "step": 1598 }, { "epoch": 0.55, "learning_rate": 2.1986582993616926e-06, "loss": 1.5531, "step": 1599 }, { "epoch": 0.55, "learning_rate": 2.195881879825301e-06, "loss": 1.7148, "step": 1600 }, { "epoch": 0.55, "learning_rate": 2.1931058409581546e-06, "loss": 1.8872, "step": 1601 }, { "epoch": 0.55, "learning_rate": 2.190330186235063e-06, "loss": 1.6201, "step": 1602 }, { "epoch": 0.55, "learning_rate": 2.1875549191303542e-06, "loss": 1.4167, "step": 1603 }, { "epoch": 0.55, "learning_rate": 2.1847800431178726e-06, "loss": 1.9366, "step": 1604 }, { "epoch": 0.55, "learning_rate": 2.1820055616709735e-06, "loss": 1.5033, "step": 1605 }, { "epoch": 0.55, "learning_rate": 2.179231478262516e-06, "loss": 1.5444, "step": 1606 }, { "epoch": 0.56, "learning_rate": 2.1764577963648616e-06, "loss": 1.9603, "step": 1607 }, { "epoch": 0.56, "learning_rate": 2.173684519449872e-06, "loss": 1.4834, "step": 1608 }, { "epoch": 0.56, "learning_rate": 2.170911650988897e-06, "loss": 1.832, "step": 1609 }, { "epoch": 0.56, "learning_rate": 2.1681391944527803e-06, "loss": 1.6599, "step": 1610 }, { "epoch": 0.56, "learning_rate": 2.165367153311847e-06, "loss": 1.541, "step": 1611 }, { "epoch": 0.56, "learning_rate": 2.1625955310359013e-06, "loss": 1.7987, "step": 1612 }, { "epoch": 0.56, "learning_rate": 2.1598243310942264e-06, "loss": 1.6499, "step": 1613 }, { "epoch": 0.56, "learning_rate": 2.1570535569555757e-06, "loss": 1.5419, "step": 1614 }, { "epoch": 0.56, "learning_rate": 2.154283212088168e-06, "loss": 1.7151, "step": 1615 }, { "epoch": 0.56, "learning_rate": 2.1515132999596847e-06, "loss": 1.8717, "step": 1616 }, { "epoch": 0.56, "learning_rate": 2.148743824037269e-06, "loss": 1.6174, "step": 1617 }, { "epoch": 0.56, "learning_rate": 2.1459747877875136e-06, "loss": 1.5518, "step": 1618 }, { "epoch": 0.56, "learning_rate": 2.1432061946764645e-06, "loss": 1.8231, "step": 1619 }, { "epoch": 0.56, "learning_rate": 2.140438048169611e-06, "loss": 2.0564, "step": 1620 }, { "epoch": 0.56, "learning_rate": 2.1376703517318835e-06, "loss": 1.4094, "step": 1621 }, { "epoch": 0.56, "learning_rate": 2.13490310882765e-06, "loss": 1.5895, "step": 1622 }, { "epoch": 0.56, "learning_rate": 2.1321363229207097e-06, "loss": 1.4731, "step": 1623 }, { "epoch": 0.56, "learning_rate": 2.1293699974742917e-06, "loss": 1.4834, "step": 1624 }, { "epoch": 0.56, "learning_rate": 2.1266041359510456e-06, "loss": 1.4612, "step": 1625 }, { "epoch": 0.56, "learning_rate": 2.1238387418130425e-06, "loss": 1.7557, "step": 1626 }, { "epoch": 0.56, "learning_rate": 2.1210738185217693e-06, "loss": 1.4547, "step": 1627 }, { "epoch": 0.56, "learning_rate": 2.1183093695381197e-06, "loss": 1.5508, "step": 1628 }, { "epoch": 0.56, "learning_rate": 2.115545398322399e-06, "loss": 1.5332, "step": 1629 }, { "epoch": 0.56, "learning_rate": 2.1127819083343107e-06, "loss": 1.6985, "step": 1630 }, { "epoch": 0.56, "learning_rate": 2.1100189030329557e-06, "loss": 1.7244, "step": 1631 }, { "epoch": 0.56, "learning_rate": 2.107256385876831e-06, "loss": 1.913, "step": 1632 }, { "epoch": 0.56, "learning_rate": 2.104494360323821e-06, "loss": 1.8333, "step": 1633 }, { "epoch": 0.56, "learning_rate": 2.101732829831194e-06, "loss": 1.7115, "step": 1634 }, { "epoch": 0.56, "learning_rate": 2.0989717978555992e-06, "loss": 1.6175, "step": 1635 }, { "epoch": 0.57, "learning_rate": 2.096211267853064e-06, "loss": 1.5409, "step": 1636 }, { "epoch": 0.57, "learning_rate": 2.0934512432789832e-06, "loss": 1.7653, "step": 1637 }, { "epoch": 0.57, "learning_rate": 2.0906917275881224e-06, "loss": 1.6522, "step": 1638 }, { "epoch": 0.57, "learning_rate": 2.0879327242346096e-06, "loss": 1.5996, "step": 1639 }, { "epoch": 0.57, "learning_rate": 2.085174236671929e-06, "loss": 1.483, "step": 1640 }, { "epoch": 0.57, "learning_rate": 2.0824162683529225e-06, "loss": 1.4396, "step": 1641 }, { "epoch": 0.57, "learning_rate": 2.0796588227297817e-06, "loss": 1.6522, "step": 1642 }, { "epoch": 0.57, "learning_rate": 2.0769019032540415e-06, "loss": 1.838, "step": 1643 }, { "epoch": 0.57, "learning_rate": 2.0741455133765794e-06, "loss": 1.7157, "step": 1644 }, { "epoch": 0.57, "learning_rate": 2.0713896565476114e-06, "loss": 1.8799, "step": 1645 }, { "epoch": 0.57, "learning_rate": 2.068634336216684e-06, "loss": 1.736, "step": 1646 }, { "epoch": 0.57, "learning_rate": 2.0658795558326745e-06, "loss": 1.6107, "step": 1647 }, { "epoch": 0.57, "learning_rate": 2.063125318843783e-06, "loss": 1.6458, "step": 1648 }, { "epoch": 0.57, "learning_rate": 2.060371628697527e-06, "loss": 1.5614, "step": 1649 }, { "epoch": 0.57, "learning_rate": 2.057618488840745e-06, "loss": 1.6188, "step": 1650 }, { "epoch": 0.57, "learning_rate": 2.054865902719584e-06, "loss": 1.9863, "step": 1651 }, { "epoch": 0.57, "learning_rate": 2.0521138737794952e-06, "loss": 1.7948, "step": 1652 }, { "epoch": 0.57, "learning_rate": 2.049362405465236e-06, "loss": 1.486, "step": 1653 }, { "epoch": 0.57, "learning_rate": 2.0466115012208606e-06, "loss": 1.3921, "step": 1654 }, { "epoch": 0.57, "learning_rate": 2.0438611644897186e-06, "loss": 1.535, "step": 1655 }, { "epoch": 0.57, "learning_rate": 2.0411113987144468e-06, "loss": 1.5465, "step": 1656 }, { "epoch": 0.57, "learning_rate": 2.0383622073369684e-06, "loss": 1.9537, "step": 1657 }, { "epoch": 0.57, "learning_rate": 2.035613593798489e-06, "loss": 1.6429, "step": 1658 }, { "epoch": 0.57, "learning_rate": 2.0328655615394884e-06, "loss": 1.478, "step": 1659 }, { "epoch": 0.57, "learning_rate": 2.0301181139997206e-06, "loss": 1.4009, "step": 1660 }, { "epoch": 0.57, "learning_rate": 2.027371254618208e-06, "loss": 1.7972, "step": 1661 }, { "epoch": 0.57, "learning_rate": 2.024624986833234e-06, "loss": 1.672, "step": 1662 }, { "epoch": 0.57, "learning_rate": 2.021879314082344e-06, "loss": 1.825, "step": 1663 }, { "epoch": 0.57, "learning_rate": 2.0191342398023406e-06, "loss": 1.9142, "step": 1664 }, { "epoch": 0.58, "learning_rate": 2.016389767429272e-06, "loss": 1.3831, "step": 1665 }, { "epoch": 0.58, "learning_rate": 2.0136459003984364e-06, "loss": 1.6675, "step": 1666 }, { "epoch": 0.58, "learning_rate": 2.0109026421443744e-06, "loss": 1.3867, "step": 1667 }, { "epoch": 0.58, "learning_rate": 2.008159996100862e-06, "loss": 1.554, "step": 1668 }, { "epoch": 0.58, "learning_rate": 2.0054179657009127e-06, "loss": 1.5786, "step": 1669 }, { "epoch": 0.58, "learning_rate": 2.0026765543767672e-06, "loss": 1.9648, "step": 1670 }, { "epoch": 0.58, "learning_rate": 1.9999357655598894e-06, "loss": 1.6187, "step": 1671 }, { "epoch": 0.58, "learning_rate": 1.9971956026809675e-06, "loss": 1.4735, "step": 1672 }, { "epoch": 0.58, "learning_rate": 1.994456069169906e-06, "loss": 1.8132, "step": 1673 }, { "epoch": 0.58, "learning_rate": 1.991717168455818e-06, "loss": 1.6433, "step": 1674 }, { "epoch": 0.58, "learning_rate": 1.9889789039670276e-06, "loss": 1.7058, "step": 1675 }, { "epoch": 0.58, "learning_rate": 1.9862412791310636e-06, "loss": 2.2418, "step": 1676 }, { "epoch": 0.58, "learning_rate": 1.98350429737465e-06, "loss": 1.7611, "step": 1677 }, { "epoch": 0.58, "learning_rate": 1.980767962123709e-06, "loss": 1.7841, "step": 1678 }, { "epoch": 0.58, "learning_rate": 1.978032276803354e-06, "loss": 1.4768, "step": 1679 }, { "epoch": 0.58, "learning_rate": 1.9752972448378817e-06, "loss": 1.4494, "step": 1680 }, { "epoch": 0.58, "learning_rate": 1.9725628696507736e-06, "loss": 1.6603, "step": 1681 }, { "epoch": 0.58, "learning_rate": 1.969829154664689e-06, "loss": 1.8782, "step": 1682 }, { "epoch": 0.58, "learning_rate": 1.9670961033014604e-06, "loss": 1.6877, "step": 1683 }, { "epoch": 0.58, "learning_rate": 1.964363718982088e-06, "loss": 1.501, "step": 1684 }, { "epoch": 0.58, "learning_rate": 1.9616320051267394e-06, "loss": 1.5795, "step": 1685 }, { "epoch": 0.58, "learning_rate": 1.958900965154743e-06, "loss": 1.6285, "step": 1686 }, { "epoch": 0.58, "learning_rate": 1.956170602484582e-06, "loss": 1.6619, "step": 1687 }, { "epoch": 0.58, "learning_rate": 1.9534409205338916e-06, "loss": 1.6934, "step": 1688 }, { "epoch": 0.58, "learning_rate": 1.950711922719458e-06, "loss": 1.839, "step": 1689 }, { "epoch": 0.58, "learning_rate": 1.947983612457207e-06, "loss": 1.3976, "step": 1690 }, { "epoch": 0.58, "learning_rate": 1.9452559931622066e-06, "loss": 1.8503, "step": 1691 }, { "epoch": 0.58, "learning_rate": 1.94252906824866e-06, "loss": 1.4395, "step": 1692 }, { "epoch": 0.58, "learning_rate": 1.9398028411298985e-06, "loss": 1.5173, "step": 1693 }, { "epoch": 0.59, "learning_rate": 1.9370773152183825e-06, "loss": 1.7988, "step": 1694 }, { "epoch": 0.59, "learning_rate": 1.934352493925695e-06, "loss": 1.7167, "step": 1695 }, { "epoch": 0.59, "learning_rate": 1.931628380662535e-06, "loss": 1.5708, "step": 1696 }, { "epoch": 0.59, "learning_rate": 1.928904978838716e-06, "loss": 1.4635, "step": 1697 }, { "epoch": 0.59, "learning_rate": 1.926182291863162e-06, "loss": 1.5454, "step": 1698 }, { "epoch": 0.59, "learning_rate": 1.9234603231439e-06, "loss": 1.3342, "step": 1699 }, { "epoch": 0.59, "learning_rate": 1.9207390760880605e-06, "loss": 1.644, "step": 1700 }, { "epoch": 0.59, "learning_rate": 1.91801855410187e-06, "loss": 1.7932, "step": 1701 }, { "epoch": 0.59, "learning_rate": 1.915298760590646e-06, "loss": 1.5874, "step": 1702 }, { "epoch": 0.59, "learning_rate": 1.9125796989587947e-06, "loss": 1.5454, "step": 1703 }, { "epoch": 0.59, "learning_rate": 1.9098613726098084e-06, "loss": 1.4022, "step": 1704 }, { "epoch": 0.59, "learning_rate": 1.907143784946256e-06, "loss": 1.5114, "step": 1705 }, { "epoch": 0.59, "learning_rate": 1.9044269393697825e-06, "loss": 1.5394, "step": 1706 }, { "epoch": 0.59, "learning_rate": 1.9017108392811065e-06, "loss": 1.9905, "step": 1707 }, { "epoch": 0.59, "learning_rate": 1.8989954880800091e-06, "loss": 1.6301, "step": 1708 }, { "epoch": 0.59, "learning_rate": 1.8962808891653378e-06, "loss": 1.3964, "step": 1709 }, { "epoch": 0.59, "learning_rate": 1.8935670459349973e-06, "loss": 1.4912, "step": 1710 }, { "epoch": 0.59, "learning_rate": 1.8908539617859455e-06, "loss": 1.5795, "step": 1711 }, { "epoch": 0.59, "learning_rate": 1.8881416401141905e-06, "loss": 1.672, "step": 1712 }, { "epoch": 0.59, "learning_rate": 1.8854300843147878e-06, "loss": 1.8278, "step": 1713 }, { "epoch": 0.59, "learning_rate": 1.8827192977818311e-06, "loss": 1.5721, "step": 1714 }, { "epoch": 0.59, "learning_rate": 1.8800092839084542e-06, "loss": 1.4724, "step": 1715 }, { "epoch": 0.59, "learning_rate": 1.8773000460868206e-06, "loss": 1.5793, "step": 1716 }, { "epoch": 0.59, "learning_rate": 1.8745915877081266e-06, "loss": 1.5222, "step": 1717 }, { "epoch": 0.59, "learning_rate": 1.8718839121625885e-06, "loss": 1.5464, "step": 1718 }, { "epoch": 0.59, "learning_rate": 1.8691770228394458e-06, "loss": 1.678, "step": 1719 }, { "epoch": 0.59, "learning_rate": 1.8664709231269526e-06, "loss": 1.8334, "step": 1720 }, { "epoch": 0.59, "learning_rate": 1.8637656164123736e-06, "loss": 1.5776, "step": 1721 }, { "epoch": 0.59, "learning_rate": 1.861061106081983e-06, "loss": 1.4636, "step": 1722 }, { "epoch": 0.6, "learning_rate": 1.8583573955210582e-06, "loss": 1.4316, "step": 1723 }, { "epoch": 0.6, "learning_rate": 1.8556544881138734e-06, "loss": 1.7445, "step": 1724 }, { "epoch": 0.6, "learning_rate": 1.852952387243698e-06, "loss": 1.7755, "step": 1725 }, { "epoch": 0.6, "learning_rate": 1.8502510962927945e-06, "loss": 2.0458, "step": 1726 }, { "epoch": 0.6, "learning_rate": 1.8475506186424075e-06, "loss": 1.4626, "step": 1727 }, { "epoch": 0.6, "learning_rate": 1.8448509576727667e-06, "loss": 1.5712, "step": 1728 }, { "epoch": 0.6, "learning_rate": 1.8421521167630791e-06, "loss": 1.5297, "step": 1729 }, { "epoch": 0.6, "learning_rate": 1.8394540992915227e-06, "loss": 1.665, "step": 1730 }, { "epoch": 0.6, "learning_rate": 1.8367569086352483e-06, "loss": 1.5227, "step": 1731 }, { "epoch": 0.6, "learning_rate": 1.8340605481703705e-06, "loss": 1.6637, "step": 1732 }, { "epoch": 0.6, "learning_rate": 1.831365021271963e-06, "loss": 1.6497, "step": 1733 }, { "epoch": 0.6, "learning_rate": 1.828670331314058e-06, "loss": 1.5863, "step": 1734 }, { "epoch": 0.6, "learning_rate": 1.8259764816696413e-06, "loss": 1.7242, "step": 1735 }, { "epoch": 0.6, "learning_rate": 1.823283475710642e-06, "loss": 1.4706, "step": 1736 }, { "epoch": 0.6, "learning_rate": 1.8205913168079392e-06, "loss": 1.6484, "step": 1737 }, { "epoch": 0.6, "learning_rate": 1.8179000083313483e-06, "loss": 1.6116, "step": 1738 }, { "epoch": 0.6, "learning_rate": 1.8152095536496188e-06, "loss": 1.6443, "step": 1739 }, { "epoch": 0.6, "learning_rate": 1.8125199561304356e-06, "loss": 1.5687, "step": 1740 }, { "epoch": 0.6, "learning_rate": 1.809831219140408e-06, "loss": 1.818, "step": 1741 }, { "epoch": 0.6, "learning_rate": 1.807143346045069e-06, "loss": 1.6885, "step": 1742 }, { "epoch": 0.6, "learning_rate": 1.8044563402088686e-06, "loss": 1.6333, "step": 1743 }, { "epoch": 0.6, "learning_rate": 1.8017702049951735e-06, "loss": 1.8987, "step": 1744 }, { "epoch": 0.6, "learning_rate": 1.7990849437662607e-06, "loss": 1.5627, "step": 1745 }, { "epoch": 0.6, "learning_rate": 1.7964005598833108e-06, "loss": 1.7615, "step": 1746 }, { "epoch": 0.6, "learning_rate": 1.7937170567064078e-06, "loss": 1.719, "step": 1747 }, { "epoch": 0.6, "learning_rate": 1.7910344375945343e-06, "loss": 1.7517, "step": 1748 }, { "epoch": 0.6, "learning_rate": 1.7883527059055633e-06, "loss": 1.4003, "step": 1749 }, { "epoch": 0.6, "learning_rate": 1.7856718649962606e-06, "loss": 1.4749, "step": 1750 }, { "epoch": 0.6, "learning_rate": 1.7829919182222752e-06, "loss": 1.954, "step": 1751 }, { "epoch": 0.61, "learning_rate": 1.780312868938135e-06, "loss": 1.7142, "step": 1752 }, { "epoch": 0.61, "learning_rate": 1.777634720497248e-06, "loss": 1.5801, "step": 1753 }, { "epoch": 0.61, "learning_rate": 1.774957476251894e-06, "loss": 1.5853, "step": 1754 }, { "epoch": 0.61, "learning_rate": 1.772281139553218e-06, "loss": 1.6119, "step": 1755 }, { "epoch": 0.61, "learning_rate": 1.7696057137512319e-06, "loss": 1.4376, "step": 1756 }, { "epoch": 0.61, "learning_rate": 1.7669312021948077e-06, "loss": 1.8029, "step": 1757 }, { "epoch": 0.61, "learning_rate": 1.7642576082316696e-06, "loss": 2.0156, "step": 1758 }, { "epoch": 0.61, "learning_rate": 1.7615849352083975e-06, "loss": 1.5719, "step": 1759 }, { "epoch": 0.61, "learning_rate": 1.7589131864704162e-06, "loss": 1.692, "step": 1760 }, { "epoch": 0.61, "learning_rate": 1.7562423653619931e-06, "loss": 1.5631, "step": 1761 }, { "epoch": 0.61, "learning_rate": 1.7535724752262355e-06, "loss": 1.7822, "step": 1762 }, { "epoch": 0.61, "learning_rate": 1.750903519405087e-06, "loss": 1.9311, "step": 1763 }, { "epoch": 0.61, "learning_rate": 1.7482355012393177e-06, "loss": 1.7366, "step": 1764 }, { "epoch": 0.61, "learning_rate": 1.7455684240685267e-06, "loss": 1.5381, "step": 1765 }, { "epoch": 0.61, "learning_rate": 1.7429022912311363e-06, "loss": 1.645, "step": 1766 }, { "epoch": 0.61, "learning_rate": 1.740237106064383e-06, "loss": 1.5101, "step": 1767 }, { "epoch": 0.61, "learning_rate": 1.737572871904321e-06, "loss": 1.7759, "step": 1768 }, { "epoch": 0.61, "learning_rate": 1.7349095920858116e-06, "loss": 1.6687, "step": 1769 }, { "epoch": 0.61, "learning_rate": 1.7322472699425236e-06, "loss": 1.812, "step": 1770 }, { "epoch": 0.61, "learning_rate": 1.7295859088069234e-06, "loss": 1.5005, "step": 1771 }, { "epoch": 0.61, "learning_rate": 1.7269255120102795e-06, "loss": 1.5153, "step": 1772 }, { "epoch": 0.61, "learning_rate": 1.72426608288265e-06, "loss": 1.4963, "step": 1773 }, { "epoch": 0.61, "learning_rate": 1.7216076247528813e-06, "loss": 1.6633, "step": 1774 }, { "epoch": 0.61, "learning_rate": 1.7189501409486061e-06, "loss": 1.5518, "step": 1775 }, { "epoch": 0.61, "learning_rate": 1.716293634796239e-06, "loss": 1.759, "step": 1776 }, { "epoch": 0.61, "learning_rate": 1.7136381096209665e-06, "loss": 1.6047, "step": 1777 }, { "epoch": 0.61, "learning_rate": 1.7109835687467497e-06, "loss": 1.7313, "step": 1778 }, { "epoch": 0.61, "learning_rate": 1.7083300154963195e-06, "loss": 1.4836, "step": 1779 }, { "epoch": 0.61, "learning_rate": 1.7056774531911661e-06, "loss": 1.4446, "step": 1780 }, { "epoch": 0.62, "learning_rate": 1.7030258851515435e-06, "loss": 1.6571, "step": 1781 }, { "epoch": 0.62, "learning_rate": 1.7003753146964594e-06, "loss": 2.0797, "step": 1782 }, { "epoch": 0.62, "learning_rate": 1.6977257451436714e-06, "loss": 1.7539, "step": 1783 }, { "epoch": 0.62, "learning_rate": 1.6950771798096868e-06, "loss": 1.5515, "step": 1784 }, { "epoch": 0.62, "learning_rate": 1.6924296220097559e-06, "loss": 1.6227, "step": 1785 }, { "epoch": 0.62, "learning_rate": 1.6897830750578653e-06, "loss": 1.5756, "step": 1786 }, { "epoch": 0.62, "learning_rate": 1.6871375422667374e-06, "loss": 1.4432, "step": 1787 }, { "epoch": 0.62, "learning_rate": 1.6844930269478274e-06, "loss": 1.859, "step": 1788 }, { "epoch": 0.62, "learning_rate": 1.6818495324113129e-06, "loss": 1.7598, "step": 1789 }, { "epoch": 0.62, "learning_rate": 1.6792070619660977e-06, "loss": 1.5791, "step": 1790 }, { "epoch": 0.62, "learning_rate": 1.6765656189198013e-06, "loss": 1.7471, "step": 1791 }, { "epoch": 0.62, "learning_rate": 1.6739252065787578e-06, "loss": 1.7686, "step": 1792 }, { "epoch": 0.62, "learning_rate": 1.671285828248011e-06, "loss": 1.5259, "step": 1793 }, { "epoch": 0.62, "learning_rate": 1.6686474872313116e-06, "loss": 1.816, "step": 1794 }, { "epoch": 0.62, "learning_rate": 1.6660101868311093e-06, "loss": 1.8595, "step": 1795 }, { "epoch": 0.62, "learning_rate": 1.6633739303485544e-06, "loss": 1.434, "step": 1796 }, { "epoch": 0.62, "learning_rate": 1.6607387210834889e-06, "loss": 1.5554, "step": 1797 }, { "epoch": 0.62, "learning_rate": 1.6581045623344422e-06, "loss": 1.3839, "step": 1798 }, { "epoch": 0.62, "learning_rate": 1.6554714573986325e-06, "loss": 1.576, "step": 1799 }, { "epoch": 0.62, "learning_rate": 1.6528394095719558e-06, "loss": 1.619, "step": 1800 }, { "epoch": 0.62, "learning_rate": 1.6502084221489877e-06, "loss": 2.0856, "step": 1801 }, { "epoch": 0.62, "learning_rate": 1.6475784984229731e-06, "loss": 1.9014, "step": 1802 }, { "epoch": 0.62, "learning_rate": 1.6449496416858285e-06, "loss": 1.5056, "step": 1803 }, { "epoch": 0.62, "learning_rate": 1.6423218552281328e-06, "loss": 1.6118, "step": 1804 }, { "epoch": 0.62, "learning_rate": 1.6396951423391267e-06, "loss": 1.4836, "step": 1805 }, { "epoch": 0.62, "learning_rate": 1.6370695063067054e-06, "loss": 1.7457, "step": 1806 }, { "epoch": 0.62, "learning_rate": 1.6344449504174193e-06, "loss": 1.9423, "step": 1807 }, { "epoch": 0.62, "learning_rate": 1.6318214779564614e-06, "loss": 1.4467, "step": 1808 }, { "epoch": 0.62, "learning_rate": 1.6291990922076744e-06, "loss": 1.5476, "step": 1809 }, { "epoch": 0.63, "learning_rate": 1.6265777964535382e-06, "loss": 1.7957, "step": 1810 }, { "epoch": 0.63, "learning_rate": 1.6239575939751662e-06, "loss": 1.5784, "step": 1811 }, { "epoch": 0.63, "learning_rate": 1.6213384880523065e-06, "loss": 1.7235, "step": 1812 }, { "epoch": 0.63, "learning_rate": 1.6187204819633347e-06, "loss": 1.8124, "step": 1813 }, { "epoch": 0.63, "learning_rate": 1.6161035789852475e-06, "loss": 1.5508, "step": 1814 }, { "epoch": 0.63, "learning_rate": 1.613487782393661e-06, "loss": 1.6434, "step": 1815 }, { "epoch": 0.63, "learning_rate": 1.6108730954628093e-06, "loss": 1.4287, "step": 1816 }, { "epoch": 0.63, "learning_rate": 1.6082595214655337e-06, "loss": 1.5916, "step": 1817 }, { "epoch": 0.63, "learning_rate": 1.6056470636732852e-06, "loss": 1.5013, "step": 1818 }, { "epoch": 0.63, "learning_rate": 1.6030357253561175e-06, "loss": 1.8545, "step": 1819 }, { "epoch": 0.63, "learning_rate": 1.6004255097826799e-06, "loss": 1.8418, "step": 1820 }, { "epoch": 0.63, "learning_rate": 1.5978164202202201e-06, "loss": 1.6421, "step": 1821 }, { "epoch": 0.63, "learning_rate": 1.5952084599345758e-06, "loss": 1.6472, "step": 1822 }, { "epoch": 0.63, "learning_rate": 1.5926016321901688e-06, "loss": 1.624, "step": 1823 }, { "epoch": 0.63, "learning_rate": 1.5899959402500049e-06, "loss": 1.6912, "step": 1824 }, { "epoch": 0.63, "learning_rate": 1.587391387375669e-06, "loss": 1.6984, "step": 1825 }, { "epoch": 0.63, "learning_rate": 1.584787976827318e-06, "loss": 1.7952, "step": 1826 }, { "epoch": 0.63, "learning_rate": 1.5821857118636814e-06, "loss": 1.2917, "step": 1827 }, { "epoch": 0.63, "learning_rate": 1.5795845957420525e-06, "loss": 1.623, "step": 1828 }, { "epoch": 0.63, "learning_rate": 1.5769846317182894e-06, "loss": 1.7155, "step": 1829 }, { "epoch": 0.63, "learning_rate": 1.574385823046804e-06, "loss": 1.5424, "step": 1830 }, { "epoch": 0.63, "learning_rate": 1.571788172980566e-06, "loss": 1.8938, "step": 1831 }, { "epoch": 0.63, "learning_rate": 1.5691916847710931e-06, "loss": 2.0154, "step": 1832 }, { "epoch": 0.63, "learning_rate": 1.5665963616684477e-06, "loss": 1.6523, "step": 1833 }, { "epoch": 0.63, "learning_rate": 1.5640022069212357e-06, "loss": 1.5607, "step": 1834 }, { "epoch": 0.63, "learning_rate": 1.5614092237766008e-06, "loss": 1.562, "step": 1835 }, { "epoch": 0.63, "learning_rate": 1.5588174154802175e-06, "loss": 1.6591, "step": 1836 }, { "epoch": 0.63, "learning_rate": 1.5562267852762914e-06, "loss": 1.8246, "step": 1837 }, { "epoch": 0.63, "learning_rate": 1.5536373364075551e-06, "loss": 1.979, "step": 1838 }, { "epoch": 0.64, "learning_rate": 1.5510490721152594e-06, "loss": 1.7963, "step": 1839 }, { "epoch": 0.64, "learning_rate": 1.5484619956391744e-06, "loss": 1.6794, "step": 1840 }, { "epoch": 0.64, "learning_rate": 1.545876110217583e-06, "loss": 1.4916, "step": 1841 }, { "epoch": 0.64, "learning_rate": 1.5432914190872757e-06, "loss": 1.4825, "step": 1842 }, { "epoch": 0.64, "learning_rate": 1.5407079254835508e-06, "loss": 1.393, "step": 1843 }, { "epoch": 0.64, "learning_rate": 1.5381256326402067e-06, "loss": 1.8885, "step": 1844 }, { "epoch": 0.64, "learning_rate": 1.5355445437895373e-06, "loss": 1.9379, "step": 1845 }, { "epoch": 0.64, "learning_rate": 1.5329646621623302e-06, "loss": 1.766, "step": 1846 }, { "epoch": 0.64, "learning_rate": 1.5303859909878632e-06, "loss": 1.7012, "step": 1847 }, { "epoch": 0.64, "learning_rate": 1.5278085334938972e-06, "loss": 1.4857, "step": 1848 }, { "epoch": 0.64, "learning_rate": 1.5252322929066754e-06, "loss": 1.8398, "step": 1849 }, { "epoch": 0.64, "learning_rate": 1.522657272450917e-06, "loss": 1.6586, "step": 1850 }, { "epoch": 0.64, "learning_rate": 1.520083475349813e-06, "loss": 2.0615, "step": 1851 }, { "epoch": 0.64, "learning_rate": 1.517510904825024e-06, "loss": 1.6603, "step": 1852 }, { "epoch": 0.64, "learning_rate": 1.514939564096678e-06, "loss": 1.5115, "step": 1853 }, { "epoch": 0.64, "learning_rate": 1.5123694563833585e-06, "loss": 1.6227, "step": 1854 }, { "epoch": 0.64, "learning_rate": 1.509800584902108e-06, "loss": 1.5509, "step": 1855 }, { "epoch": 0.64, "learning_rate": 1.5072329528684238e-06, "loss": 1.7675, "step": 1856 }, { "epoch": 0.64, "learning_rate": 1.5046665634962477e-06, "loss": 1.7496, "step": 1857 }, { "epoch": 0.64, "learning_rate": 1.502101419997969e-06, "loss": 1.625, "step": 1858 }, { "epoch": 0.64, "learning_rate": 1.4995375255844162e-06, "loss": 1.4944, "step": 1859 }, { "epoch": 0.64, "learning_rate": 1.4969748834648556e-06, "loss": 1.6395, "step": 1860 }, { "epoch": 0.64, "learning_rate": 1.4944134968469835e-06, "loss": 1.6935, "step": 1861 }, { "epoch": 0.64, "learning_rate": 1.4918533689369274e-06, "loss": 1.6766, "step": 1862 }, { "epoch": 0.64, "learning_rate": 1.489294502939238e-06, "loss": 1.9343, "step": 1863 }, { "epoch": 0.64, "learning_rate": 1.4867369020568856e-06, "loss": 1.7136, "step": 1864 }, { "epoch": 0.64, "learning_rate": 1.4841805694912584e-06, "loss": 1.4814, "step": 1865 }, { "epoch": 0.64, "learning_rate": 1.4816255084421575e-06, "loss": 1.5822, "step": 1866 }, { "epoch": 0.64, "learning_rate": 1.4790717221077899e-06, "loss": 1.6703, "step": 1867 }, { "epoch": 0.65, "learning_rate": 1.4765192136847686e-06, "loss": 1.5952, "step": 1868 }, { "epoch": 0.65, "learning_rate": 1.4739679863681086e-06, "loss": 1.78, "step": 1869 }, { "epoch": 0.65, "learning_rate": 1.4714180433512177e-06, "loss": 1.6703, "step": 1870 }, { "epoch": 0.65, "learning_rate": 1.468869387825899e-06, "loss": 1.7712, "step": 1871 }, { "epoch": 0.65, "learning_rate": 1.4663220229823438e-06, "loss": 1.7351, "step": 1872 }, { "epoch": 0.65, "learning_rate": 1.4637759520091252e-06, "loss": 1.72, "step": 1873 }, { "epoch": 0.65, "learning_rate": 1.4612311780931998e-06, "loss": 1.4607, "step": 1874 }, { "epoch": 0.65, "learning_rate": 1.4586877044199015e-06, "loss": 1.6566, "step": 1875 }, { "epoch": 0.65, "learning_rate": 1.4561455341729314e-06, "loss": 2.0385, "step": 1876 }, { "epoch": 0.65, "learning_rate": 1.4536046705343634e-06, "loss": 1.5286, "step": 1877 }, { "epoch": 0.65, "learning_rate": 1.4510651166846369e-06, "loss": 1.6749, "step": 1878 }, { "epoch": 0.65, "learning_rate": 1.4485268758025467e-06, "loss": 1.4661, "step": 1879 }, { "epoch": 0.65, "learning_rate": 1.4459899510652491e-06, "loss": 1.572, "step": 1880 }, { "epoch": 0.65, "learning_rate": 1.443454345648252e-06, "loss": 1.7108, "step": 1881 }, { "epoch": 0.65, "learning_rate": 1.4409200627254083e-06, "loss": 1.9614, "step": 1882 }, { "epoch": 0.65, "learning_rate": 1.4383871054689214e-06, "loss": 1.467, "step": 1883 }, { "epoch": 0.65, "learning_rate": 1.43585547704933e-06, "loss": 1.4949, "step": 1884 }, { "epoch": 0.65, "learning_rate": 1.4333251806355114e-06, "loss": 1.5752, "step": 1885 }, { "epoch": 0.65, "learning_rate": 1.4307962193946763e-06, "loss": 1.5463, "step": 1886 }, { "epoch": 0.65, "learning_rate": 1.4282685964923643e-06, "loss": 1.6765, "step": 1887 }, { "epoch": 0.65, "learning_rate": 1.42574231509244e-06, "loss": 1.8243, "step": 1888 }, { "epoch": 0.65, "learning_rate": 1.423217378357085e-06, "loss": 1.6932, "step": 1889 }, { "epoch": 0.65, "learning_rate": 1.4206937894468033e-06, "loss": 1.5262, "step": 1890 }, { "epoch": 0.65, "learning_rate": 1.4181715515204095e-06, "loss": 1.6176, "step": 1891 }, { "epoch": 0.65, "learning_rate": 1.4156506677350257e-06, "loss": 1.5153, "step": 1892 }, { "epoch": 0.65, "learning_rate": 1.4131311412460797e-06, "loss": 1.6582, "step": 1893 }, { "epoch": 0.65, "learning_rate": 1.4106129752073023e-06, "loss": 1.9279, "step": 1894 }, { "epoch": 0.65, "learning_rate": 1.4080961727707185e-06, "loss": 1.5945, "step": 1895 }, { "epoch": 0.65, "learning_rate": 1.4055807370866488e-06, "loss": 1.6086, "step": 1896 }, { "epoch": 0.66, "learning_rate": 1.4030666713037027e-06, "loss": 1.6239, "step": 1897 }, { "epoch": 0.66, "learning_rate": 1.4005539785687728e-06, "loss": 1.6288, "step": 1898 }, { "epoch": 0.66, "learning_rate": 1.3980426620270351e-06, "loss": 1.6384, "step": 1899 }, { "epoch": 0.66, "learning_rate": 1.3955327248219438e-06, "loss": 1.5018, "step": 1900 }, { "epoch": 0.66, "learning_rate": 1.3930241700952241e-06, "loss": 1.7585, "step": 1901 }, { "epoch": 0.66, "learning_rate": 1.390517000986871e-06, "loss": 1.6084, "step": 1902 }, { "epoch": 0.66, "learning_rate": 1.3880112206351476e-06, "loss": 1.4116, "step": 1903 }, { "epoch": 0.66, "learning_rate": 1.3855068321765752e-06, "loss": 1.5515, "step": 1904 }, { "epoch": 0.66, "learning_rate": 1.3830038387459354e-06, "loss": 1.4287, "step": 1905 }, { "epoch": 0.66, "learning_rate": 1.3805022434762643e-06, "loss": 1.483, "step": 1906 }, { "epoch": 0.66, "learning_rate": 1.3780020494988447e-06, "loss": 1.8979, "step": 1907 }, { "epoch": 0.66, "learning_rate": 1.3755032599432075e-06, "loss": 1.4253, "step": 1908 }, { "epoch": 0.66, "learning_rate": 1.3730058779371266e-06, "loss": 1.4702, "step": 1909 }, { "epoch": 0.66, "learning_rate": 1.370509906606612e-06, "loss": 1.3997, "step": 1910 }, { "epoch": 0.66, "learning_rate": 1.3680153490759074e-06, "loss": 1.764, "step": 1911 }, { "epoch": 0.66, "learning_rate": 1.3655222084674902e-06, "loss": 1.5158, "step": 1912 }, { "epoch": 0.66, "learning_rate": 1.36303048790206e-06, "loss": 1.7848, "step": 1913 }, { "epoch": 0.66, "learning_rate": 1.3605401904985427e-06, "loss": 2.0399, "step": 1914 }, { "epoch": 0.66, "learning_rate": 1.3580513193740818e-06, "loss": 1.2382, "step": 1915 }, { "epoch": 0.66, "learning_rate": 1.3555638776440327e-06, "loss": 1.8503, "step": 1916 }, { "epoch": 0.66, "learning_rate": 1.3530778684219647e-06, "loss": 1.428, "step": 1917 }, { "epoch": 0.66, "learning_rate": 1.3505932948196548e-06, "loss": 1.5243, "step": 1918 }, { "epoch": 0.66, "learning_rate": 1.3481101599470794e-06, "loss": 1.8704, "step": 1919 }, { "epoch": 0.66, "learning_rate": 1.3456284669124159e-06, "loss": 1.7241, "step": 1920 }, { "epoch": 0.66, "learning_rate": 1.343148218822037e-06, "loss": 1.6106, "step": 1921 }, { "epoch": 0.66, "learning_rate": 1.340669418780508e-06, "loss": 1.9131, "step": 1922 }, { "epoch": 0.66, "learning_rate": 1.3381920698905788e-06, "loss": 1.5419, "step": 1923 }, { "epoch": 0.66, "learning_rate": 1.335716175253185e-06, "loss": 1.3101, "step": 1924 }, { "epoch": 0.66, "learning_rate": 1.3332417379674426e-06, "loss": 1.5309, "step": 1925 }, { "epoch": 0.67, "learning_rate": 1.3307687611306397e-06, "loss": 1.9965, "step": 1926 }, { "epoch": 0.67, "learning_rate": 1.328297247838241e-06, "loss": 1.6803, "step": 1927 }, { "epoch": 0.67, "learning_rate": 1.3258272011838751e-06, "loss": 1.495, "step": 1928 }, { "epoch": 0.67, "learning_rate": 1.3233586242593388e-06, "loss": 1.6434, "step": 1929 }, { "epoch": 0.67, "learning_rate": 1.3208915201545853e-06, "loss": 1.5968, "step": 1930 }, { "epoch": 0.67, "learning_rate": 1.318425891957727e-06, "loss": 1.4586, "step": 1931 }, { "epoch": 0.67, "learning_rate": 1.315961742755027e-06, "loss": 1.963, "step": 1932 }, { "epoch": 0.67, "learning_rate": 1.313499075630899e-06, "loss": 1.9928, "step": 1933 }, { "epoch": 0.67, "learning_rate": 1.311037893667901e-06, "loss": 1.4509, "step": 1934 }, { "epoch": 0.67, "learning_rate": 1.3085781999467303e-06, "loss": 1.5649, "step": 1935 }, { "epoch": 0.67, "learning_rate": 1.3061199975462247e-06, "loss": 1.6814, "step": 1936 }, { "epoch": 0.67, "learning_rate": 1.3036632895433505e-06, "loss": 1.6016, "step": 1937 }, { "epoch": 0.67, "learning_rate": 1.3012080790132092e-06, "loss": 1.8605, "step": 1938 }, { "epoch": 0.67, "learning_rate": 1.2987543690290222e-06, "loss": 1.6849, "step": 1939 }, { "epoch": 0.67, "learning_rate": 1.2963021626621375e-06, "loss": 1.5664, "step": 1940 }, { "epoch": 0.67, "learning_rate": 1.293851462982017e-06, "loss": 1.5341, "step": 1941 }, { "epoch": 0.67, "learning_rate": 1.2914022730562397e-06, "loss": 1.6035, "step": 1942 }, { "epoch": 0.67, "learning_rate": 1.288954595950494e-06, "loss": 1.7446, "step": 1943 }, { "epoch": 0.67, "learning_rate": 1.2865084347285728e-06, "loss": 1.9457, "step": 1944 }, { "epoch": 0.67, "learning_rate": 1.2840637924523741e-06, "loss": 1.8076, "step": 1945 }, { "epoch": 0.67, "learning_rate": 1.2816206721818944e-06, "loss": 1.595, "step": 1946 }, { "epoch": 0.67, "learning_rate": 1.2791790769752232e-06, "loss": 1.571, "step": 1947 }, { "epoch": 0.67, "learning_rate": 1.2767390098885413e-06, "loss": 1.5974, "step": 1948 }, { "epoch": 0.67, "learning_rate": 1.2743004739761177e-06, "loss": 1.641, "step": 1949 }, { "epoch": 0.67, "learning_rate": 1.2718634722903073e-06, "loss": 1.4402, "step": 1950 }, { "epoch": 0.67, "learning_rate": 1.2694280078815382e-06, "loss": 1.7001, "step": 1951 }, { "epoch": 0.67, "learning_rate": 1.26699408379832e-06, "loss": 1.6182, "step": 1952 }, { "epoch": 0.67, "learning_rate": 1.2645617030872328e-06, "loss": 1.6224, "step": 1953 }, { "epoch": 0.67, "learning_rate": 1.262130868792923e-06, "loss": 1.5422, "step": 1954 }, { "epoch": 0.68, "learning_rate": 1.2597015839581034e-06, "loss": 1.3892, "step": 1955 }, { "epoch": 0.68, "learning_rate": 1.2572738516235462e-06, "loss": 1.5129, "step": 1956 }, { "epoch": 0.68, "learning_rate": 1.25484767482808e-06, "loss": 1.8203, "step": 1957 }, { "epoch": 0.68, "learning_rate": 1.252423056608587e-06, "loss": 1.6572, "step": 1958 }, { "epoch": 0.68, "learning_rate": 1.2500000000000007e-06, "loss": 1.5959, "step": 1959 }, { "epoch": 0.68, "learning_rate": 1.2475785080352945e-06, "loss": 1.4061, "step": 1960 }, { "epoch": 0.68, "learning_rate": 1.245158583745488e-06, "loss": 1.6512, "step": 1961 }, { "epoch": 0.68, "learning_rate": 1.242740230159638e-06, "loss": 1.7584, "step": 1962 }, { "epoch": 0.68, "learning_rate": 1.240323450304832e-06, "loss": 1.8512, "step": 1963 }, { "epoch": 0.68, "learning_rate": 1.2379082472061922e-06, "loss": 1.7299, "step": 1964 }, { "epoch": 0.68, "learning_rate": 1.2354946238868631e-06, "loss": 1.5029, "step": 1965 }, { "epoch": 0.68, "learning_rate": 1.2330825833680135e-06, "loss": 1.4548, "step": 1966 }, { "epoch": 0.68, "learning_rate": 1.2306721286688312e-06, "loss": 1.7703, "step": 1967 }, { "epoch": 0.68, "learning_rate": 1.2282632628065197e-06, "loss": 1.6116, "step": 1968 }, { "epoch": 0.68, "learning_rate": 1.2258559887962912e-06, "loss": 1.7711, "step": 1969 }, { "epoch": 0.68, "learning_rate": 1.223450309651368e-06, "loss": 1.7858, "step": 1970 }, { "epoch": 0.68, "learning_rate": 1.2210462283829754e-06, "loss": 1.7089, "step": 1971 }, { "epoch": 0.68, "learning_rate": 1.218643748000337e-06, "loss": 1.4012, "step": 1972 }, { "epoch": 0.68, "learning_rate": 1.2162428715106752e-06, "loss": 1.6985, "step": 1973 }, { "epoch": 0.68, "learning_rate": 1.2138436019192019e-06, "loss": 1.8162, "step": 1974 }, { "epoch": 0.68, "learning_rate": 1.2114459422291205e-06, "loss": 1.4503, "step": 1975 }, { "epoch": 0.68, "learning_rate": 1.2090498954416161e-06, "loss": 1.8599, "step": 1976 }, { "epoch": 0.68, "learning_rate": 1.2066554645558578e-06, "loss": 1.6599, "step": 1977 }, { "epoch": 0.68, "learning_rate": 1.2042626525689917e-06, "loss": 1.4969, "step": 1978 }, { "epoch": 0.68, "learning_rate": 1.2018714624761351e-06, "loss": 1.58, "step": 1979 }, { "epoch": 0.68, "learning_rate": 1.1994818972703774e-06, "loss": 1.5592, "step": 1980 }, { "epoch": 0.68, "learning_rate": 1.197093959942774e-06, "loss": 1.4758, "step": 1981 }, { "epoch": 0.68, "learning_rate": 1.1947076534823424e-06, "loss": 2.1161, "step": 1982 }, { "epoch": 0.68, "learning_rate": 1.1923229808760565e-06, "loss": 1.808, "step": 1983 }, { "epoch": 0.69, "learning_rate": 1.189939945108849e-06, "loss": 1.4987, "step": 1984 }, { "epoch": 0.69, "learning_rate": 1.1875585491636e-06, "loss": 1.7258, "step": 1985 }, { "epoch": 0.69, "learning_rate": 1.1851787960211397e-06, "loss": 1.5554, "step": 1986 }, { "epoch": 0.69, "learning_rate": 1.1828006886602422e-06, "loss": 1.7095, "step": 1987 }, { "epoch": 0.69, "learning_rate": 1.1804242300576185e-06, "loss": 1.6643, "step": 1988 }, { "epoch": 0.69, "learning_rate": 1.1780494231879183e-06, "loss": 1.8994, "step": 1989 }, { "epoch": 0.69, "learning_rate": 1.1756762710237246e-06, "loss": 1.7428, "step": 1990 }, { "epoch": 0.69, "learning_rate": 1.1733047765355466e-06, "loss": 1.4318, "step": 1991 }, { "epoch": 0.69, "learning_rate": 1.170934942691819e-06, "loss": 1.791, "step": 1992 }, { "epoch": 0.69, "learning_rate": 1.1685667724589003e-06, "loss": 1.3523, "step": 1993 }, { "epoch": 0.69, "learning_rate": 1.1662002688010635e-06, "loss": 2.1409, "step": 1994 }, { "epoch": 0.69, "learning_rate": 1.1638354346804974e-06, "loss": 1.4891, "step": 1995 }, { "epoch": 0.69, "learning_rate": 1.1614722730573017e-06, "loss": 1.5226, "step": 1996 }, { "epoch": 0.69, "learning_rate": 1.1591107868894797e-06, "loss": 1.5912, "step": 1997 }, { "epoch": 0.69, "learning_rate": 1.1567509791329402e-06, "loss": 1.4608, "step": 1998 }, { "epoch": 0.69, "learning_rate": 1.154392852741491e-06, "loss": 1.438, "step": 1999 }, { "epoch": 0.69, "learning_rate": 1.1520364106668342e-06, "loss": 1.5742, "step": 2000 }, { "epoch": 0.69, "learning_rate": 1.1496816558585622e-06, "loss": 1.9122, "step": 2001 }, { "epoch": 0.69, "learning_rate": 1.1473285912641594e-06, "loss": 1.8635, "step": 2002 }, { "epoch": 0.69, "learning_rate": 1.1449772198289904e-06, "loss": 1.441, "step": 2003 }, { "epoch": 0.69, "learning_rate": 1.1426275444963033e-06, "loss": 1.8732, "step": 2004 }, { "epoch": 0.69, "learning_rate": 1.1402795682072223e-06, "loss": 1.6263, "step": 2005 }, { "epoch": 0.69, "learning_rate": 1.1379332939007455e-06, "loss": 1.5576, "step": 2006 }, { "epoch": 0.69, "learning_rate": 1.1355887245137383e-06, "loss": 1.6836, "step": 2007 }, { "epoch": 0.69, "learning_rate": 1.1332458629809354e-06, "loss": 1.6416, "step": 2008 }, { "epoch": 0.69, "learning_rate": 1.1309047122349307e-06, "loss": 1.6093, "step": 2009 }, { "epoch": 0.69, "learning_rate": 1.1285652752061774e-06, "loss": 1.5291, "step": 2010 }, { "epoch": 0.69, "learning_rate": 1.1262275548229852e-06, "loss": 1.5818, "step": 2011 }, { "epoch": 0.69, "learning_rate": 1.1238915540115142e-06, "loss": 1.5872, "step": 2012 }, { "epoch": 0.7, "learning_rate": 1.121557275695771e-06, "loss": 2.0916, "step": 2013 }, { "epoch": 0.7, "learning_rate": 1.1192247227976074e-06, "loss": 1.61, "step": 2014 }, { "epoch": 0.7, "learning_rate": 1.1168938982367162e-06, "loss": 1.4951, "step": 2015 }, { "epoch": 0.7, "learning_rate": 1.1145648049306238e-06, "loss": 1.3937, "step": 2016 }, { "epoch": 0.7, "learning_rate": 1.112237445794693e-06, "loss": 1.5526, "step": 2017 }, { "epoch": 0.7, "learning_rate": 1.1099118237421133e-06, "loss": 1.6725, "step": 2018 }, { "epoch": 0.7, "learning_rate": 1.1075879416839022e-06, "loss": 1.9414, "step": 2019 }, { "epoch": 0.7, "learning_rate": 1.1052658025288961e-06, "loss": 1.7894, "step": 2020 }, { "epoch": 0.7, "learning_rate": 1.102945409183754e-06, "loss": 1.6056, "step": 2021 }, { "epoch": 0.7, "learning_rate": 1.100626764552945e-06, "loss": 1.6771, "step": 2022 }, { "epoch": 0.7, "learning_rate": 1.0983098715387528e-06, "loss": 1.3717, "step": 2023 }, { "epoch": 0.7, "learning_rate": 1.0959947330412681e-06, "loss": 1.6129, "step": 2024 }, { "epoch": 0.7, "learning_rate": 1.093681351958383e-06, "loss": 1.6753, "step": 2025 }, { "epoch": 0.7, "learning_rate": 1.0913697311857923e-06, "loss": 1.8035, "step": 2026 }, { "epoch": 0.7, "learning_rate": 1.089059873616988e-06, "loss": 1.5709, "step": 2027 }, { "epoch": 0.7, "learning_rate": 1.0867517821432525e-06, "loss": 1.5297, "step": 2028 }, { "epoch": 0.7, "learning_rate": 1.0844454596536574e-06, "loss": 1.4899, "step": 2029 }, { "epoch": 0.7, "learning_rate": 1.082140909035064e-06, "loss": 1.5999, "step": 2030 }, { "epoch": 0.7, "learning_rate": 1.079838133172111e-06, "loss": 1.7917, "step": 2031 }, { "epoch": 0.7, "learning_rate": 1.0775371349472182e-06, "loss": 1.9175, "step": 2032 }, { "epoch": 0.7, "learning_rate": 1.0752379172405808e-06, "loss": 1.5822, "step": 2033 }, { "epoch": 0.7, "learning_rate": 1.0729404829301644e-06, "loss": 1.6302, "step": 2034 }, { "epoch": 0.7, "learning_rate": 1.0706448348917006e-06, "loss": 1.6638, "step": 2035 }, { "epoch": 0.7, "learning_rate": 1.0683509759986888e-06, "loss": 1.5094, "step": 2036 }, { "epoch": 0.7, "learning_rate": 1.0660589091223854e-06, "loss": 1.5393, "step": 2037 }, { "epoch": 0.7, "learning_rate": 1.0637686371318048e-06, "loss": 1.561, "step": 2038 }, { "epoch": 0.7, "learning_rate": 1.061480162893716e-06, "loss": 1.8113, "step": 2039 }, { "epoch": 0.7, "learning_rate": 1.0591934892726372e-06, "loss": 1.6581, "step": 2040 }, { "epoch": 0.71, "learning_rate": 1.0569086191308306e-06, "loss": 1.4513, "step": 2041 }, { "epoch": 0.71, "learning_rate": 1.054625555328304e-06, "loss": 1.4886, "step": 2042 }, { "epoch": 0.71, "learning_rate": 1.052344300722803e-06, "loss": 1.585, "step": 2043 }, { "epoch": 0.71, "learning_rate": 1.0500648581698071e-06, "loss": 1.8429, "step": 2044 }, { "epoch": 0.71, "learning_rate": 1.0477872305225308e-06, "loss": 1.7168, "step": 2045 }, { "epoch": 0.71, "learning_rate": 1.0455114206319144e-06, "loss": 1.4437, "step": 2046 }, { "epoch": 0.71, "learning_rate": 1.043237431346622e-06, "loss": 1.5583, "step": 2047 }, { "epoch": 0.71, "learning_rate": 1.0409652655130417e-06, "loss": 1.4423, "step": 2048 }, { "epoch": 0.71, "learning_rate": 1.0386949259752784e-06, "loss": 1.6493, "step": 2049 }, { "epoch": 0.71, "learning_rate": 1.0364264155751489e-06, "loss": 1.5702, "step": 2050 }, { "epoch": 0.71, "learning_rate": 1.0341597371521825e-06, "loss": 1.8756, "step": 2051 }, { "epoch": 0.71, "learning_rate": 1.031894893543616e-06, "loss": 1.4606, "step": 2052 }, { "epoch": 0.71, "learning_rate": 1.0296318875843865e-06, "loss": 1.38, "step": 2053 }, { "epoch": 0.71, "learning_rate": 1.0273707221071342e-06, "loss": 1.6348, "step": 2054 }, { "epoch": 0.71, "learning_rate": 1.0251113999421936e-06, "loss": 1.7105, "step": 2055 }, { "epoch": 0.71, "learning_rate": 1.0228539239175914e-06, "loss": 1.4365, "step": 2056 }, { "epoch": 0.71, "learning_rate": 1.0205982968590453e-06, "loss": 1.7537, "step": 2057 }, { "epoch": 0.71, "learning_rate": 1.0183445215899585e-06, "loss": 1.7556, "step": 2058 }, { "epoch": 0.71, "learning_rate": 1.0160926009314139e-06, "loss": 1.5906, "step": 2059 }, { "epoch": 0.71, "learning_rate": 1.0138425377021757e-06, "loss": 1.4738, "step": 2060 }, { "epoch": 0.71, "learning_rate": 1.0115943347186826e-06, "loss": 1.6812, "step": 2061 }, { "epoch": 0.71, "learning_rate": 1.0093479947950428e-06, "loss": 1.4932, "step": 2062 }, { "epoch": 0.71, "learning_rate": 1.0071035207430352e-06, "loss": 1.7665, "step": 2063 }, { "epoch": 0.71, "learning_rate": 1.0048609153721004e-06, "loss": 1.7889, "step": 2064 }, { "epoch": 0.71, "learning_rate": 1.002620181489343e-06, "loss": 1.5608, "step": 2065 }, { "epoch": 0.71, "learning_rate": 1.000381321899522e-06, "loss": 1.4442, "step": 2066 }, { "epoch": 0.71, "learning_rate": 9.981443394050525e-07, "loss": 1.5365, "step": 2067 }, { "epoch": 0.71, "learning_rate": 9.959092368060002e-07, "loss": 1.745, "step": 2068 }, { "epoch": 0.71, "learning_rate": 9.93676016900075e-07, "loss": 1.9203, "step": 2069 }, { "epoch": 0.72, "learning_rate": 9.914446824826326e-07, "loss": 1.6671, "step": 2070 }, { "epoch": 0.72, "learning_rate": 9.892152363466692e-07, "loss": 1.5873, "step": 2071 }, { "epoch": 0.72, "learning_rate": 9.869876812828153e-07, "loss": 1.6377, "step": 2072 }, { "epoch": 0.72, "learning_rate": 9.847620200793343e-07, "loss": 1.5745, "step": 2073 }, { "epoch": 0.72, "learning_rate": 9.825382555221216e-07, "loss": 1.4346, "step": 2074 }, { "epoch": 0.72, "learning_rate": 9.803163903946952e-07, "loss": 1.4779, "step": 2075 }, { "epoch": 0.72, "learning_rate": 9.780964274781984e-07, "loss": 1.9495, "step": 2076 }, { "epoch": 0.72, "learning_rate": 9.758783695513927e-07, "loss": 1.6017, "step": 2077 }, { "epoch": 0.72, "learning_rate": 9.73662219390653e-07, "loss": 1.5728, "step": 2078 }, { "epoch": 0.72, "learning_rate": 9.714479797699695e-07, "loss": 1.5847, "step": 2079 }, { "epoch": 0.72, "learning_rate": 9.692356534609393e-07, "loss": 1.5092, "step": 2080 }, { "epoch": 0.72, "learning_rate": 9.670252432327644e-07, "loss": 1.6115, "step": 2081 }, { "epoch": 0.72, "learning_rate": 9.648167518522479e-07, "loss": 1.7988, "step": 2082 }, { "epoch": 0.72, "learning_rate": 9.626101820837927e-07, "loss": 1.5914, "step": 2083 }, { "epoch": 0.72, "learning_rate": 9.604055366893947e-07, "loss": 1.4357, "step": 2084 }, { "epoch": 0.72, "learning_rate": 9.582028184286423e-07, "loss": 1.6915, "step": 2085 }, { "epoch": 0.72, "learning_rate": 9.560020300587123e-07, "loss": 1.6974, "step": 2086 }, { "epoch": 0.72, "learning_rate": 9.538031743343628e-07, "loss": 1.4539, "step": 2087 }, { "epoch": 0.72, "learning_rate": 9.516062540079357e-07, "loss": 1.838, "step": 2088 }, { "epoch": 0.72, "learning_rate": 9.494112718293503e-07, "loss": 1.864, "step": 2089 }, { "epoch": 0.72, "learning_rate": 9.472182305460976e-07, "loss": 1.5208, "step": 2090 }, { "epoch": 0.72, "learning_rate": 9.450271329032404e-07, "loss": 1.4611, "step": 2091 }, { "epoch": 0.72, "learning_rate": 9.428379816434088e-07, "loss": 1.452, "step": 2092 }, { "epoch": 0.72, "learning_rate": 9.406507795067981e-07, "loss": 1.5616, "step": 2093 }, { "epoch": 0.72, "learning_rate": 9.3846552923116e-07, "loss": 1.7511, "step": 2094 }, { "epoch": 0.72, "learning_rate": 9.362822335518062e-07, "loss": 1.8119, "step": 2095 }, { "epoch": 0.72, "learning_rate": 9.341008952016023e-07, "loss": 1.4897, "step": 2096 }, { "epoch": 0.72, "learning_rate": 9.319215169109599e-07, "loss": 1.4653, "step": 2097 }, { "epoch": 0.72, "learning_rate": 9.297441014078421e-07, "loss": 1.4905, "step": 2098 }, { "epoch": 0.73, "learning_rate": 9.275686514177506e-07, "loss": 1.6323, "step": 2099 }, { "epoch": 0.73, "learning_rate": 9.253951696637311e-07, "loss": 1.6251, "step": 2100 }, { "epoch": 0.73, "learning_rate": 9.232236588663615e-07, "loss": 2.0021, "step": 2101 }, { "epoch": 0.73, "learning_rate": 9.210541217437566e-07, "loss": 1.7344, "step": 2102 }, { "epoch": 0.73, "learning_rate": 9.188865610115572e-07, "loss": 1.3672, "step": 2103 }, { "epoch": 0.73, "learning_rate": 9.167209793829324e-07, "loss": 1.6768, "step": 2104 }, { "epoch": 0.73, "learning_rate": 9.145573795685741e-07, "loss": 1.4743, "step": 2105 }, { "epoch": 0.73, "learning_rate": 9.123957642766917e-07, "loss": 1.5444, "step": 2106 }, { "epoch": 0.73, "learning_rate": 9.102361362130133e-07, "loss": 1.9025, "step": 2107 }, { "epoch": 0.73, "learning_rate": 9.080784980807758e-07, "loss": 1.4126, "step": 2108 }, { "epoch": 0.73, "learning_rate": 9.059228525807296e-07, "loss": 1.6039, "step": 2109 }, { "epoch": 0.73, "learning_rate": 9.037692024111269e-07, "loss": 1.3994, "step": 2110 }, { "epoch": 0.73, "learning_rate": 9.01617550267726e-07, "loss": 1.4426, "step": 2111 }, { "epoch": 0.73, "learning_rate": 8.994678988437802e-07, "loss": 1.6608, "step": 2112 }, { "epoch": 0.73, "learning_rate": 8.973202508300422e-07, "loss": 1.4816, "step": 2113 }, { "epoch": 0.73, "learning_rate": 8.951746089147556e-07, "loss": 1.7329, "step": 2114 }, { "epoch": 0.73, "learning_rate": 8.930309757836517e-07, "loss": 1.5293, "step": 2115 }, { "epoch": 0.73, "learning_rate": 8.90889354119949e-07, "loss": 1.5702, "step": 2116 }, { "epoch": 0.73, "learning_rate": 8.887497466043488e-07, "loss": 1.571, "step": 2117 }, { "epoch": 0.73, "learning_rate": 8.866121559150287e-07, "loss": 1.6064, "step": 2118 }, { "epoch": 0.73, "learning_rate": 8.844765847276432e-07, "loss": 1.853, "step": 2119 }, { "epoch": 0.73, "learning_rate": 8.823430357153193e-07, "loss": 1.8717, "step": 2120 }, { "epoch": 0.73, "learning_rate": 8.802115115486534e-07, "loss": 1.4395, "step": 2121 }, { "epoch": 0.73, "learning_rate": 8.78082014895705e-07, "loss": 1.5258, "step": 2122 }, { "epoch": 0.73, "learning_rate": 8.759545484219983e-07, "loss": 1.5524, "step": 2123 }, { "epoch": 0.73, "learning_rate": 8.738291147905157e-07, "loss": 1.7059, "step": 2124 }, { "epoch": 0.73, "learning_rate": 8.717057166616926e-07, "loss": 1.6263, "step": 2125 }, { "epoch": 0.73, "learning_rate": 8.695843566934212e-07, "loss": 1.6823, "step": 2126 }, { "epoch": 0.73, "learning_rate": 8.674650375410379e-07, "loss": 1.6133, "step": 2127 }, { "epoch": 0.74, "learning_rate": 8.653477618573261e-07, "loss": 1.3455, "step": 2128 }, { "epoch": 0.74, "learning_rate": 8.632325322925128e-07, "loss": 1.6584, "step": 2129 }, { "epoch": 0.74, "learning_rate": 8.611193514942634e-07, "loss": 1.5491, "step": 2130 }, { "epoch": 0.74, "learning_rate": 8.590082221076764e-07, "loss": 1.7052, "step": 2131 }, { "epoch": 0.74, "learning_rate": 8.568991467752857e-07, "loss": 1.8984, "step": 2132 }, { "epoch": 0.74, "learning_rate": 8.54792128137053e-07, "loss": 1.3291, "step": 2133 }, { "epoch": 0.74, "learning_rate": 8.526871688303642e-07, "loss": 1.7284, "step": 2134 }, { "epoch": 0.74, "learning_rate": 8.505842714900298e-07, "loss": 1.7836, "step": 2135 }, { "epoch": 0.74, "learning_rate": 8.484834387482777e-07, "loss": 1.5059, "step": 2136 }, { "epoch": 0.74, "learning_rate": 8.463846732347511e-07, "loss": 1.6932, "step": 2137 }, { "epoch": 0.74, "learning_rate": 8.442879775765073e-07, "loss": 1.7823, "step": 2138 }, { "epoch": 0.74, "learning_rate": 8.421933543980126e-07, "loss": 1.5189, "step": 2139 }, { "epoch": 0.74, "learning_rate": 8.401008063211368e-07, "loss": 1.4525, "step": 2140 }, { "epoch": 0.74, "learning_rate": 8.380103359651554e-07, "loss": 1.505, "step": 2141 }, { "epoch": 0.74, "learning_rate": 8.359219459467415e-07, "loss": 1.5786, "step": 2142 }, { "epoch": 0.74, "learning_rate": 8.338356388799637e-07, "loss": 1.45, "step": 2143 }, { "epoch": 0.74, "learning_rate": 8.317514173762853e-07, "loss": 1.642, "step": 2144 }, { "epoch": 0.74, "learning_rate": 8.296692840445569e-07, "loss": 1.7694, "step": 2145 }, { "epoch": 0.74, "learning_rate": 8.275892414910159e-07, "loss": 1.5929, "step": 2146 }, { "epoch": 0.74, "learning_rate": 8.255112923192834e-07, "loss": 1.6609, "step": 2147 }, { "epoch": 0.74, "learning_rate": 8.234354391303606e-07, "loss": 1.5378, "step": 2148 }, { "epoch": 0.74, "learning_rate": 8.213616845226227e-07, "loss": 1.6169, "step": 2149 }, { "epoch": 0.74, "learning_rate": 8.192900310918206e-07, "loss": 1.73, "step": 2150 }, { "epoch": 0.74, "learning_rate": 8.172204814310741e-07, "loss": 1.9122, "step": 2151 }, { "epoch": 0.74, "learning_rate": 8.151530381308706e-07, "loss": 1.5941, "step": 2152 }, { "epoch": 0.74, "learning_rate": 8.130877037790594e-07, "loss": 1.6527, "step": 2153 }, { "epoch": 0.74, "learning_rate": 8.110244809608494e-07, "loss": 1.5682, "step": 2154 }, { "epoch": 0.74, "learning_rate": 8.089633722588103e-07, "loss": 1.3174, "step": 2155 }, { "epoch": 0.74, "learning_rate": 8.069043802528606e-07, "loss": 1.4044, "step": 2156 }, { "epoch": 0.75, "learning_rate": 8.048475075202727e-07, "loss": 1.8618, "step": 2157 }, { "epoch": 0.75, "learning_rate": 8.027927566356661e-07, "loss": 1.7036, "step": 2158 }, { "epoch": 0.75, "learning_rate": 8.007401301710022e-07, "loss": 1.767, "step": 2159 }, { "epoch": 0.75, "learning_rate": 7.986896306955849e-07, "loss": 1.5994, "step": 2160 }, { "epoch": 0.75, "learning_rate": 7.966412607760565e-07, "loss": 1.5309, "step": 2161 }, { "epoch": 0.75, "learning_rate": 7.945950229763915e-07, "loss": 1.7045, "step": 2162 }, { "epoch": 0.75, "learning_rate": 7.925509198578959e-07, "loss": 2.0975, "step": 2163 }, { "epoch": 0.75, "learning_rate": 7.905089539792066e-07, "loss": 1.5223, "step": 2164 }, { "epoch": 0.75, "learning_rate": 7.884691278962806e-07, "loss": 1.3291, "step": 2165 }, { "epoch": 0.75, "learning_rate": 7.864314441624005e-07, "loss": 1.6207, "step": 2166 }, { "epoch": 0.75, "learning_rate": 7.843959053281663e-07, "loss": 1.579, "step": 2167 }, { "epoch": 0.75, "learning_rate": 7.823625139414914e-07, "loss": 1.6628, "step": 2168 }, { "epoch": 0.75, "learning_rate": 7.803312725476031e-07, "loss": 1.7509, "step": 2169 }, { "epoch": 0.75, "learning_rate": 7.783021836890378e-07, "loss": 1.4731, "step": 2170 }, { "epoch": 0.75, "learning_rate": 7.762752499056359e-07, "loss": 1.5486, "step": 2171 }, { "epoch": 0.75, "learning_rate": 7.742504737345399e-07, "loss": 1.4486, "step": 2172 }, { "epoch": 0.75, "learning_rate": 7.722278577101946e-07, "loss": 1.755, "step": 2173 }, { "epoch": 0.75, "learning_rate": 7.702074043643376e-07, "loss": 1.426, "step": 2174 }, { "epoch": 0.75, "learning_rate": 7.681891162260016e-07, "loss": 1.6226, "step": 2175 }, { "epoch": 0.75, "learning_rate": 7.661729958215089e-07, "loss": 1.7922, "step": 2176 }, { "epoch": 0.75, "learning_rate": 7.641590456744663e-07, "loss": 1.652, "step": 2177 }, { "epoch": 0.75, "learning_rate": 7.621472683057668e-07, "loss": 1.7333, "step": 2178 }, { "epoch": 0.75, "learning_rate": 7.60137666233583e-07, "loss": 1.4584, "step": 2179 }, { "epoch": 0.75, "learning_rate": 7.581302419733633e-07, "loss": 1.2452, "step": 2180 }, { "epoch": 0.75, "learning_rate": 7.561249980378302e-07, "loss": 1.5802, "step": 2181 }, { "epoch": 0.75, "learning_rate": 7.541219369369787e-07, "loss": 1.7277, "step": 2182 }, { "epoch": 0.75, "learning_rate": 7.521210611780715e-07, "loss": 1.6169, "step": 2183 }, { "epoch": 0.75, "learning_rate": 7.501223732656337e-07, "loss": 1.5262, "step": 2184 }, { "epoch": 0.75, "learning_rate": 7.481258757014534e-07, "loss": 1.4961, "step": 2185 }, { "epoch": 0.76, "learning_rate": 7.461315709845777e-07, "loss": 1.676, "step": 2186 }, { "epoch": 0.76, "learning_rate": 7.441394616113062e-07, "loss": 1.4846, "step": 2187 }, { "epoch": 0.76, "learning_rate": 7.421495500751943e-07, "loss": 1.6221, "step": 2188 }, { "epoch": 0.76, "learning_rate": 7.401618388670428e-07, "loss": 1.5383, "step": 2189 }, { "epoch": 0.76, "learning_rate": 7.38176330474901e-07, "loss": 1.7217, "step": 2190 }, { "epoch": 0.76, "learning_rate": 7.361930273840581e-07, "loss": 1.4891, "step": 2191 }, { "epoch": 0.76, "learning_rate": 7.342119320770466e-07, "loss": 1.4116, "step": 2192 }, { "epoch": 0.76, "learning_rate": 7.322330470336314e-07, "loss": 1.6045, "step": 2193 }, { "epoch": 0.76, "learning_rate": 7.302563747308139e-07, "loss": 1.9141, "step": 2194 }, { "epoch": 0.76, "learning_rate": 7.282819176428252e-07, "loss": 1.8837, "step": 2195 }, { "epoch": 0.76, "learning_rate": 7.263096782411219e-07, "loss": 1.564, "step": 2196 }, { "epoch": 0.76, "learning_rate": 7.243396589943869e-07, "loss": 1.6665, "step": 2197 }, { "epoch": 0.76, "learning_rate": 7.22371862368522e-07, "loss": 1.4332, "step": 2198 }, { "epoch": 0.76, "learning_rate": 7.204062908266491e-07, "loss": 1.7101, "step": 2199 }, { "epoch": 0.76, "learning_rate": 7.184429468291023e-07, "loss": 1.4939, "step": 2200 }, { "epoch": 0.76, "learning_rate": 7.164818328334309e-07, "loss": 1.8073, "step": 2201 }, { "epoch": 0.76, "learning_rate": 7.145229512943886e-07, "loss": 1.506, "step": 2202 }, { "epoch": 0.76, "learning_rate": 7.125663046639386e-07, "loss": 1.475, "step": 2203 }, { "epoch": 0.76, "learning_rate": 7.106118953912452e-07, "loss": 1.4258, "step": 2204 }, { "epoch": 0.76, "learning_rate": 7.086597259226708e-07, "loss": 1.4663, "step": 2205 }, { "epoch": 0.76, "learning_rate": 7.067097987017762e-07, "loss": 1.656, "step": 2206 }, { "epoch": 0.76, "learning_rate": 7.047621161693152e-07, "loss": 2.011, "step": 2207 }, { "epoch": 0.76, "learning_rate": 7.028166807632311e-07, "loss": 1.553, "step": 2208 }, { "epoch": 0.76, "learning_rate": 7.008734949186538e-07, "loss": 1.8424, "step": 2209 }, { "epoch": 0.76, "learning_rate": 6.989325610678991e-07, "loss": 1.4431, "step": 2210 }, { "epoch": 0.76, "learning_rate": 6.969938816404639e-07, "loss": 1.3063, "step": 2211 }, { "epoch": 0.76, "learning_rate": 6.950574590630213e-07, "loss": 1.4911, "step": 2212 }, { "epoch": 0.76, "learning_rate": 6.931232957594214e-07, "loss": 1.952, "step": 2213 }, { "epoch": 0.76, "learning_rate": 6.911913941506862e-07, "loss": 1.7788, "step": 2214 }, { "epoch": 0.77, "learning_rate": 6.892617566550044e-07, "loss": 1.4917, "step": 2215 }, { "epoch": 0.77, "learning_rate": 6.873343856877346e-07, "loss": 1.3774, "step": 2216 }, { "epoch": 0.77, "learning_rate": 6.854092836613948e-07, "loss": 1.6733, "step": 2217 }, { "epoch": 0.77, "learning_rate": 6.834864529856635e-07, "loss": 1.6304, "step": 2218 }, { "epoch": 0.77, "learning_rate": 6.815658960673782e-07, "loss": 1.5774, "step": 2219 }, { "epoch": 0.77, "learning_rate": 6.796476153105294e-07, "loss": 1.7089, "step": 2220 }, { "epoch": 0.77, "learning_rate": 6.777316131162564e-07, "loss": 1.8242, "step": 2221 }, { "epoch": 0.77, "learning_rate": 6.758178918828495e-07, "loss": 1.5934, "step": 2222 }, { "epoch": 0.77, "learning_rate": 6.739064540057425e-07, "loss": 1.5873, "step": 2223 }, { "epoch": 0.77, "learning_rate": 6.719973018775098e-07, "loss": 1.5388, "step": 2224 }, { "epoch": 0.77, "learning_rate": 6.700904378878675e-07, "loss": 1.561, "step": 2225 }, { "epoch": 0.77, "learning_rate": 6.681858644236655e-07, "loss": 1.9172, "step": 2226 }, { "epoch": 0.77, "learning_rate": 6.662835838688863e-07, "loss": 1.5916, "step": 2227 }, { "epoch": 0.77, "learning_rate": 6.643835986046437e-07, "loss": 1.604, "step": 2228 }, { "epoch": 0.77, "learning_rate": 6.624859110091791e-07, "loss": 1.446, "step": 2229 }, { "epoch": 0.77, "learning_rate": 6.60590523457855e-07, "loss": 1.4954, "step": 2230 }, { "epoch": 0.77, "learning_rate": 6.586974383231573e-07, "loss": 1.4618, "step": 2231 }, { "epoch": 0.77, "learning_rate": 6.568066579746901e-07, "loss": 1.9556, "step": 2232 }, { "epoch": 0.77, "learning_rate": 6.549181847791705e-07, "loss": 1.6008, "step": 2233 }, { "epoch": 0.77, "learning_rate": 6.530320211004299e-07, "loss": 1.4735, "step": 2234 }, { "epoch": 0.77, "learning_rate": 6.511481692994077e-07, "loss": 1.5183, "step": 2235 }, { "epoch": 0.77, "learning_rate": 6.492666317341481e-07, "loss": 1.7098, "step": 2236 }, { "epoch": 0.77, "learning_rate": 6.473874107598019e-07, "loss": 1.6752, "step": 2237 }, { "epoch": 0.77, "learning_rate": 6.455105087286173e-07, "loss": 1.8108, "step": 2238 }, { "epoch": 0.77, "learning_rate": 6.436359279899426e-07, "loss": 1.658, "step": 2239 }, { "epoch": 0.77, "learning_rate": 6.417636708902167e-07, "loss": 1.542, "step": 2240 }, { "epoch": 0.77, "learning_rate": 6.398937397729732e-07, "loss": 1.4695, "step": 2241 }, { "epoch": 0.77, "learning_rate": 6.380261369788335e-07, "loss": 1.5533, "step": 2242 }, { "epoch": 0.77, "learning_rate": 6.361608648455039e-07, "loss": 1.7709, "step": 2243 }, { "epoch": 0.78, "learning_rate": 6.342979257077728e-07, "loss": 1.7029, "step": 2244 }, { "epoch": 0.78, "learning_rate": 6.324373218975105e-07, "loss": 1.5923, "step": 2245 }, { "epoch": 0.78, "learning_rate": 6.30579055743662e-07, "loss": 1.4519, "step": 2246 }, { "epoch": 0.78, "learning_rate": 6.28723129572247e-07, "loss": 1.5549, "step": 2247 }, { "epoch": 0.78, "learning_rate": 6.268695457063581e-07, "loss": 1.4233, "step": 2248 }, { "epoch": 0.78, "learning_rate": 6.250183064661519e-07, "loss": 1.3868, "step": 2249 }, { "epoch": 0.78, "learning_rate": 6.231694141688535e-07, "loss": 1.5118, "step": 2250 }, { "epoch": 0.78, "learning_rate": 6.2132287112875e-07, "loss": 1.9352, "step": 2251 }, { "epoch": 0.78, "learning_rate": 6.194786796571869e-07, "loss": 1.587, "step": 2252 }, { "epoch": 0.78, "learning_rate": 6.176368420625653e-07, "loss": 1.6799, "step": 2253 }, { "epoch": 0.78, "learning_rate": 6.157973606503428e-07, "loss": 1.4388, "step": 2254 }, { "epoch": 0.78, "learning_rate": 6.139602377230247e-07, "loss": 1.6627, "step": 2255 }, { "epoch": 0.78, "learning_rate": 6.121254755801659e-07, "loss": 1.5471, "step": 2256 }, { "epoch": 0.78, "learning_rate": 6.102930765183671e-07, "loss": 1.9771, "step": 2257 }, { "epoch": 0.78, "learning_rate": 6.084630428312679e-07, "loss": 2.0004, "step": 2258 }, { "epoch": 0.78, "learning_rate": 6.066353768095504e-07, "loss": 1.381, "step": 2259 }, { "epoch": 0.78, "learning_rate": 6.048100807409321e-07, "loss": 1.4989, "step": 2260 }, { "epoch": 0.78, "learning_rate": 6.029871569101628e-07, "loss": 1.5438, "step": 2261 }, { "epoch": 0.78, "learning_rate": 6.011666075990236e-07, "loss": 1.5522, "step": 2262 }, { "epoch": 0.78, "learning_rate": 5.993484350863246e-07, "loss": 1.8348, "step": 2263 }, { "epoch": 0.78, "learning_rate": 5.975326416478985e-07, "loss": 1.7606, "step": 2264 }, { "epoch": 0.78, "learning_rate": 5.957192295566022e-07, "loss": 1.3685, "step": 2265 }, { "epoch": 0.78, "learning_rate": 5.939082010823116e-07, "loss": 1.5419, "step": 2266 }, { "epoch": 0.78, "learning_rate": 5.92099558491917e-07, "loss": 1.7112, "step": 2267 }, { "epoch": 0.78, "learning_rate": 5.902933040493242e-07, "loss": 1.7025, "step": 2268 }, { "epoch": 0.78, "learning_rate": 5.884894400154501e-07, "loss": 1.6481, "step": 2269 }, { "epoch": 0.78, "learning_rate": 5.866879686482177e-07, "loss": 1.8546, "step": 2270 }, { "epoch": 0.78, "learning_rate": 5.848888922025553e-07, "loss": 1.5386, "step": 2271 }, { "epoch": 0.78, "learning_rate": 5.830922129303949e-07, "loss": 1.6553, "step": 2272 }, { "epoch": 0.79, "learning_rate": 5.812979330806673e-07, "loss": 1.6582, "step": 2273 }, { "epoch": 0.79, "learning_rate": 5.79506054899299e-07, "loss": 1.5612, "step": 2274 }, { "epoch": 0.79, "learning_rate": 5.777165806292109e-07, "loss": 1.547, "step": 2275 }, { "epoch": 0.79, "learning_rate": 5.759295125103165e-07, "loss": 1.7527, "step": 2276 }, { "epoch": 0.79, "learning_rate": 5.741448527795137e-07, "loss": 1.7013, "step": 2277 }, { "epoch": 0.79, "learning_rate": 5.723626036706903e-07, "loss": 1.686, "step": 2278 }, { "epoch": 0.79, "learning_rate": 5.705827674147125e-07, "loss": 1.3982, "step": 2279 }, { "epoch": 0.79, "learning_rate": 5.688053462394297e-07, "loss": 1.6907, "step": 2280 }, { "epoch": 0.79, "learning_rate": 5.670303423696655e-07, "loss": 1.6958, "step": 2281 }, { "epoch": 0.79, "learning_rate": 5.652577580272206e-07, "loss": 1.804, "step": 2282 }, { "epoch": 0.79, "learning_rate": 5.634875954308638e-07, "loss": 1.8352, "step": 2283 }, { "epoch": 0.79, "learning_rate": 5.617198567963353e-07, "loss": 1.6562, "step": 2284 }, { "epoch": 0.79, "learning_rate": 5.59954544336341e-07, "loss": 1.8068, "step": 2285 }, { "epoch": 0.79, "learning_rate": 5.581916602605478e-07, "loss": 1.4788, "step": 2286 }, { "epoch": 0.79, "learning_rate": 5.564312067755856e-07, "loss": 1.5497, "step": 2287 }, { "epoch": 0.79, "learning_rate": 5.54673186085039e-07, "loss": 1.5836, "step": 2288 }, { "epoch": 0.79, "learning_rate": 5.52917600389451e-07, "loss": 1.7191, "step": 2289 }, { "epoch": 0.79, "learning_rate": 5.51164451886313e-07, "loss": 1.4189, "step": 2290 }, { "epoch": 0.79, "learning_rate": 5.494137427700688e-07, "loss": 1.4644, "step": 2291 }, { "epoch": 0.79, "learning_rate": 5.476654752321065e-07, "loss": 1.5623, "step": 2292 }, { "epoch": 0.79, "learning_rate": 5.45919651460759e-07, "loss": 1.8083, "step": 2293 }, { "epoch": 0.79, "learning_rate": 5.441762736413015e-07, "loss": 1.6001, "step": 2294 }, { "epoch": 0.79, "learning_rate": 5.424353439559446e-07, "loss": 1.8433, "step": 2295 }, { "epoch": 0.79, "learning_rate": 5.406968645838373e-07, "loss": 1.7036, "step": 2296 }, { "epoch": 0.79, "learning_rate": 5.389608377010608e-07, "loss": 1.6904, "step": 2297 }, { "epoch": 0.79, "learning_rate": 5.37227265480626e-07, "loss": 1.506, "step": 2298 }, { "epoch": 0.79, "learning_rate": 5.354961500924705e-07, "loss": 1.7775, "step": 2299 }, { "epoch": 0.79, "learning_rate": 5.337674937034581e-07, "loss": 1.7062, "step": 2300 }, { "epoch": 0.79, "learning_rate": 5.320412984773749e-07, "loss": 1.6257, "step": 2301 }, { "epoch": 0.8, "learning_rate": 5.303175665749244e-07, "loss": 1.6719, "step": 2302 }, { "epoch": 0.8, "learning_rate": 5.28596300153728e-07, "loss": 1.4378, "step": 2303 }, { "epoch": 0.8, "learning_rate": 5.268775013683222e-07, "loss": 1.4978, "step": 2304 }, { "epoch": 0.8, "learning_rate": 5.251611723701516e-07, "loss": 1.9258, "step": 2305 }, { "epoch": 0.8, "learning_rate": 5.234473153075729e-07, "loss": 1.7032, "step": 2306 }, { "epoch": 0.8, "learning_rate": 5.217359323258459e-07, "loss": 1.9305, "step": 2307 }, { "epoch": 0.8, "learning_rate": 5.200270255671335e-07, "loss": 1.7896, "step": 2308 }, { "epoch": 0.8, "learning_rate": 5.183205971705016e-07, "loss": 1.4623, "step": 2309 }, { "epoch": 0.8, "learning_rate": 5.166166492719124e-07, "loss": 1.4498, "step": 2310 }, { "epoch": 0.8, "learning_rate": 5.149151840042224e-07, "loss": 1.54, "step": 2311 }, { "epoch": 0.8, "learning_rate": 5.132162034971816e-07, "loss": 1.6113, "step": 2312 }, { "epoch": 0.8, "learning_rate": 5.115197098774302e-07, "loss": 1.7538, "step": 2313 }, { "epoch": 0.8, "learning_rate": 5.09825705268494e-07, "loss": 1.5729, "step": 2314 }, { "epoch": 0.8, "learning_rate": 5.081341917907853e-07, "loss": 1.5573, "step": 2315 }, { "epoch": 0.8, "learning_rate": 5.064451715615962e-07, "loss": 1.5953, "step": 2316 }, { "epoch": 0.8, "learning_rate": 5.047586466950985e-07, "loss": 1.6207, "step": 2317 }, { "epoch": 0.8, "learning_rate": 5.030746193023411e-07, "loss": 1.5295, "step": 2318 }, { "epoch": 0.8, "learning_rate": 5.013930914912477e-07, "loss": 1.7083, "step": 2319 }, { "epoch": 0.8, "learning_rate": 4.997140653666102e-07, "loss": 1.6516, "step": 2320 }, { "epoch": 0.8, "learning_rate": 4.980375430300921e-07, "loss": 1.4624, "step": 2321 }, { "epoch": 0.8, "learning_rate": 4.963635265802219e-07, "loss": 1.5502, "step": 2322 }, { "epoch": 0.8, "learning_rate": 4.946920181123904e-07, "loss": 1.5521, "step": 2323 }, { "epoch": 0.8, "learning_rate": 4.930230197188513e-07, "loss": 1.7139, "step": 2324 }, { "epoch": 0.8, "learning_rate": 4.913565334887135e-07, "loss": 1.5536, "step": 2325 }, { "epoch": 0.8, "learning_rate": 4.896925615079445e-07, "loss": 1.8553, "step": 2326 }, { "epoch": 0.8, "learning_rate": 4.880311058593617e-07, "loss": 1.5938, "step": 2327 }, { "epoch": 0.8, "learning_rate": 4.86372168622635e-07, "loss": 1.5245, "step": 2328 }, { "epoch": 0.8, "learning_rate": 4.847157518742817e-07, "loss": 1.5248, "step": 2329 }, { "epoch": 0.8, "learning_rate": 4.830618576876627e-07, "loss": 1.34, "step": 2330 }, { "epoch": 0.81, "learning_rate": 4.814104881329829e-07, "loss": 1.6223, "step": 2331 }, { "epoch": 0.81, "learning_rate": 4.797616452772869e-07, "loss": 2.0458, "step": 2332 }, { "epoch": 0.81, "learning_rate": 4.781153311844556e-07, "loss": 1.6184, "step": 2333 }, { "epoch": 0.81, "learning_rate": 4.764715479152046e-07, "loss": 1.5773, "step": 2334 }, { "epoch": 0.81, "learning_rate": 4.748302975270838e-07, "loss": 1.4764, "step": 2335 }, { "epoch": 0.81, "learning_rate": 4.7319158207446953e-07, "loss": 1.7306, "step": 2336 }, { "epoch": 0.81, "learning_rate": 4.715554036085673e-07, "loss": 1.4944, "step": 2337 }, { "epoch": 0.81, "learning_rate": 4.699217641774073e-07, "loss": 1.8217, "step": 2338 }, { "epoch": 0.81, "learning_rate": 4.6829066582583933e-07, "loss": 1.8247, "step": 2339 }, { "epoch": 0.81, "learning_rate": 4.6666211059553436e-07, "loss": 1.5582, "step": 2340 }, { "epoch": 0.81, "learning_rate": 4.650361005249804e-07, "loss": 1.6292, "step": 2341 }, { "epoch": 0.81, "learning_rate": 4.634126376494777e-07, "loss": 1.3291, "step": 2342 }, { "epoch": 0.81, "learning_rate": 4.617917240011394e-07, "loss": 1.4395, "step": 2343 }, { "epoch": 0.81, "learning_rate": 4.60173361608888e-07, "loss": 1.7206, "step": 2344 }, { "epoch": 0.81, "learning_rate": 4.585575524984515e-07, "loss": 1.7843, "step": 2345 }, { "epoch": 0.81, "learning_rate": 4.569442986923631e-07, "loss": 1.673, "step": 2346 }, { "epoch": 0.81, "learning_rate": 4.553336022099575e-07, "loss": 1.4612, "step": 2347 }, { "epoch": 0.81, "learning_rate": 4.537254650673664e-07, "loss": 1.5892, "step": 2348 }, { "epoch": 0.81, "learning_rate": 4.5211988927752026e-07, "loss": 1.4785, "step": 2349 }, { "epoch": 0.81, "learning_rate": 4.505168768501431e-07, "loss": 1.6421, "step": 2350 }, { "epoch": 0.81, "learning_rate": 4.489164297917492e-07, "loss": 2.0184, "step": 2351 }, { "epoch": 0.81, "learning_rate": 4.473185501056415e-07, "loss": 1.6018, "step": 2352 }, { "epoch": 0.81, "learning_rate": 4.457232397919115e-07, "loss": 1.4064, "step": 2353 }, { "epoch": 0.81, "learning_rate": 4.4413050084743196e-07, "loss": 1.5752, "step": 2354 }, { "epoch": 0.81, "learning_rate": 4.4254033526585917e-07, "loss": 1.5575, "step": 2355 }, { "epoch": 0.81, "learning_rate": 4.4095274503762694e-07, "loss": 1.6189, "step": 2356 }, { "epoch": 0.81, "learning_rate": 4.393677321499468e-07, "loss": 2.2028, "step": 2357 }, { "epoch": 0.81, "learning_rate": 4.37785298586802e-07, "loss": 1.6514, "step": 2358 }, { "epoch": 0.81, "learning_rate": 4.3620544632895e-07, "loss": 1.5183, "step": 2359 }, { "epoch": 0.82, "learning_rate": 4.346281773539149e-07, "loss": 1.5509, "step": 2360 }, { "epoch": 0.82, "learning_rate": 4.330534936359873e-07, "loss": 1.5074, "step": 2361 }, { "epoch": 0.82, "learning_rate": 4.3148139714622365e-07, "loss": 1.6754, "step": 2362 }, { "epoch": 0.82, "learning_rate": 4.2991188985244124e-07, "loss": 1.9567, "step": 2363 }, { "epoch": 0.82, "learning_rate": 4.2834497371921496e-07, "loss": 1.8143, "step": 2364 }, { "epoch": 0.82, "learning_rate": 4.267806507078778e-07, "loss": 1.4965, "step": 2365 }, { "epoch": 0.82, "learning_rate": 4.2521892277651745e-07, "loss": 1.635, "step": 2366 }, { "epoch": 0.82, "learning_rate": 4.2365979187997094e-07, "loss": 1.616, "step": 2367 }, { "epoch": 0.82, "learning_rate": 4.2210325996982685e-07, "loss": 1.5437, "step": 2368 }, { "epoch": 0.82, "learning_rate": 4.2054932899441905e-07, "loss": 1.8419, "step": 2369 }, { "epoch": 0.82, "learning_rate": 4.189980008988273e-07, "loss": 1.9507, "step": 2370 }, { "epoch": 0.82, "learning_rate": 4.174492776248712e-07, "loss": 1.4067, "step": 2371 }, { "epoch": 0.82, "learning_rate": 4.1590316111111224e-07, "loss": 1.4564, "step": 2372 }, { "epoch": 0.82, "learning_rate": 4.143596532928468e-07, "loss": 1.6514, "step": 2373 }, { "epoch": 0.82, "learning_rate": 4.128187561021077e-07, "loss": 1.4668, "step": 2374 }, { "epoch": 0.82, "learning_rate": 4.1128047146765936e-07, "loss": 1.5287, "step": 2375 }, { "epoch": 0.82, "learning_rate": 4.097448013149949e-07, "loss": 1.9406, "step": 2376 }, { "epoch": 0.82, "learning_rate": 4.0821174756633635e-07, "loss": 1.4042, "step": 2377 }, { "epoch": 0.82, "learning_rate": 4.06681312140631e-07, "loss": 1.5109, "step": 2378 }, { "epoch": 0.82, "learning_rate": 4.0515349695354726e-07, "loss": 1.6144, "step": 2379 }, { "epoch": 0.82, "learning_rate": 4.03628303917474e-07, "loss": 1.6879, "step": 2380 }, { "epoch": 0.82, "learning_rate": 4.021057349415197e-07, "loss": 1.6982, "step": 2381 }, { "epoch": 0.82, "learning_rate": 4.0058579193150537e-07, "loss": 2.1384, "step": 2382 }, { "epoch": 0.82, "learning_rate": 3.990684767899677e-07, "loss": 1.7596, "step": 2383 }, { "epoch": 0.82, "learning_rate": 3.975537914161526e-07, "loss": 1.7068, "step": 2384 }, { "epoch": 0.82, "learning_rate": 3.9604173770601524e-07, "loss": 1.5492, "step": 2385 }, { "epoch": 0.82, "learning_rate": 3.9453231755221504e-07, "loss": 1.4999, "step": 2386 }, { "epoch": 0.82, "learning_rate": 3.930255328441171e-07, "loss": 1.5792, "step": 2387 }, { "epoch": 0.82, "learning_rate": 3.915213854677863e-07, "loss": 2.0093, "step": 2388 }, { "epoch": 0.83, "learning_rate": 3.9001987730598544e-07, "loss": 1.8104, "step": 2389 }, { "epoch": 0.83, "learning_rate": 3.885210102381762e-07, "loss": 1.6161, "step": 2390 }, { "epoch": 0.83, "learning_rate": 3.8702478614051353e-07, "loss": 1.3884, "step": 2391 }, { "epoch": 0.83, "learning_rate": 3.8553120688584216e-07, "loss": 1.5848, "step": 2392 }, { "epoch": 0.83, "learning_rate": 3.8404027434369884e-07, "loss": 1.7087, "step": 2393 }, { "epoch": 0.83, "learning_rate": 3.8255199038030685e-07, "loss": 1.6064, "step": 2394 }, { "epoch": 0.83, "learning_rate": 3.8106635685857237e-07, "loss": 1.7878, "step": 2395 }, { "epoch": 0.83, "learning_rate": 3.795833756380868e-07, "loss": 1.5444, "step": 2396 }, { "epoch": 0.83, "learning_rate": 3.7810304857511914e-07, "loss": 1.5869, "step": 2397 }, { "epoch": 0.83, "learning_rate": 3.7662537752261696e-07, "loss": 1.3966, "step": 2398 }, { "epoch": 0.83, "learning_rate": 3.751503643302035e-07, "loss": 1.5099, "step": 2399 }, { "epoch": 0.83, "learning_rate": 3.736780108441762e-07, "loss": 1.5881, "step": 2400 }, { "epoch": 0.83, "learning_rate": 3.722083189075007e-07, "loss": 1.933, "step": 2401 }, { "epoch": 0.83, "learning_rate": 3.707412903598134e-07, "loss": 1.5201, "step": 2402 }, { "epoch": 0.83, "learning_rate": 3.6927692703741636e-07, "loss": 1.6024, "step": 2403 }, { "epoch": 0.83, "learning_rate": 3.6781523077327435e-07, "loss": 1.4163, "step": 2404 }, { "epoch": 0.83, "learning_rate": 3.6635620339701556e-07, "loss": 1.5891, "step": 2405 }, { "epoch": 0.83, "learning_rate": 3.648998467349263e-07, "loss": 1.5095, "step": 2406 }, { "epoch": 0.83, "learning_rate": 3.634461626099495e-07, "loss": 1.9166, "step": 2407 }, { "epoch": 0.83, "learning_rate": 3.61995152841684e-07, "loss": 1.6528, "step": 2408 }, { "epoch": 0.83, "learning_rate": 3.6054681924638154e-07, "loss": 1.4281, "step": 2409 }, { "epoch": 0.83, "learning_rate": 3.591011636369418e-07, "loss": 1.662, "step": 2410 }, { "epoch": 0.83, "learning_rate": 3.5765818782291435e-07, "loss": 1.5417, "step": 2411 }, { "epoch": 0.83, "learning_rate": 3.562178936104943e-07, "loss": 1.5492, "step": 2412 }, { "epoch": 0.83, "learning_rate": 3.547802828025185e-07, "loss": 1.6875, "step": 2413 }, { "epoch": 0.83, "learning_rate": 3.5334535719846767e-07, "loss": 1.6951, "step": 2414 }, { "epoch": 0.83, "learning_rate": 3.51913118594458e-07, "loss": 1.5868, "step": 2415 }, { "epoch": 0.83, "learning_rate": 3.5048356878324615e-07, "loss": 1.6359, "step": 2416 }, { "epoch": 0.83, "learning_rate": 3.490567095542194e-07, "loss": 1.5231, "step": 2417 }, { "epoch": 0.84, "learning_rate": 3.4763254269339965e-07, "loss": 1.3003, "step": 2418 }, { "epoch": 0.84, "learning_rate": 3.4621106998343923e-07, "loss": 1.5792, "step": 2419 }, { "epoch": 0.84, "learning_rate": 3.447922932036152e-07, "loss": 1.5164, "step": 2420 }, { "epoch": 0.84, "learning_rate": 3.4337621412983276e-07, "loss": 1.5306, "step": 2421 }, { "epoch": 0.84, "learning_rate": 3.4196283453461986e-07, "loss": 1.345, "step": 2422 }, { "epoch": 0.84, "learning_rate": 3.405521561871247e-07, "loss": 1.7007, "step": 2423 }, { "epoch": 0.84, "learning_rate": 3.391441808531143e-07, "loss": 1.7152, "step": 2424 }, { "epoch": 0.84, "learning_rate": 3.3773891029497326e-07, "loss": 1.5012, "step": 2425 }, { "epoch": 0.84, "learning_rate": 3.3633634627169925e-07, "loss": 1.9722, "step": 2426 }, { "epoch": 0.84, "learning_rate": 3.3493649053890325e-07, "loss": 1.5418, "step": 2427 }, { "epoch": 0.84, "learning_rate": 3.335393448488064e-07, "loss": 1.504, "step": 2428 }, { "epoch": 0.84, "learning_rate": 3.3214491095023616e-07, "loss": 1.5387, "step": 2429 }, { "epoch": 0.84, "learning_rate": 3.307531905886266e-07, "loss": 1.5515, "step": 2430 }, { "epoch": 0.84, "learning_rate": 3.293641855060162e-07, "loss": 1.6902, "step": 2431 }, { "epoch": 0.84, "learning_rate": 3.279778974410425e-07, "loss": 2.0486, "step": 2432 }, { "epoch": 0.84, "learning_rate": 3.2659432812894297e-07, "loss": 1.8453, "step": 2433 }, { "epoch": 0.84, "learning_rate": 3.2521347930155353e-07, "loss": 1.5719, "step": 2434 }, { "epoch": 0.84, "learning_rate": 3.23835352687302e-07, "loss": 1.5574, "step": 2435 }, { "epoch": 0.84, "learning_rate": 3.2245995001121103e-07, "loss": 1.6403, "step": 2436 }, { "epoch": 0.84, "learning_rate": 3.2108727299489354e-07, "loss": 1.8047, "step": 2437 }, { "epoch": 0.84, "learning_rate": 3.197173233565487e-07, "loss": 1.6533, "step": 2438 }, { "epoch": 0.84, "learning_rate": 3.1835010281096426e-07, "loss": 1.9545, "step": 2439 }, { "epoch": 0.84, "learning_rate": 3.1698561306951065e-07, "loss": 1.428, "step": 2440 }, { "epoch": 0.84, "learning_rate": 3.1562385584014035e-07, "loss": 1.7081, "step": 2441 }, { "epoch": 0.84, "learning_rate": 3.142648328273848e-07, "loss": 1.5529, "step": 2442 }, { "epoch": 0.84, "learning_rate": 3.129085457323539e-07, "loss": 1.5933, "step": 2443 }, { "epoch": 0.84, "learning_rate": 3.115549962527334e-07, "loss": 1.6663, "step": 2444 }, { "epoch": 0.84, "learning_rate": 3.102041860827804e-07, "loss": 1.7963, "step": 2445 }, { "epoch": 0.84, "learning_rate": 3.088561169133253e-07, "loss": 1.571, "step": 2446 }, { "epoch": 0.85, "learning_rate": 3.075107904317667e-07, "loss": 1.5607, "step": 2447 }, { "epoch": 0.85, "learning_rate": 3.061682083220696e-07, "loss": 1.543, "step": 2448 }, { "epoch": 0.85, "learning_rate": 3.048283722647649e-07, "loss": 1.5359, "step": 2449 }, { "epoch": 0.85, "learning_rate": 3.034912839369447e-07, "loss": 1.6731, "step": 2450 }, { "epoch": 0.85, "learning_rate": 3.0215694501226386e-07, "loss": 2.0188, "step": 2451 }, { "epoch": 0.85, "learning_rate": 3.008253571609332e-07, "loss": 1.3986, "step": 2452 }, { "epoch": 0.85, "learning_rate": 2.9949652204972257e-07, "loss": 1.5521, "step": 2453 }, { "epoch": 0.85, "learning_rate": 2.9817044134195396e-07, "loss": 1.6309, "step": 2454 }, { "epoch": 0.85, "learning_rate": 2.9684711669750313e-07, "loss": 1.5947, "step": 2455 }, { "epoch": 0.85, "learning_rate": 2.955265497727958e-07, "loss": 1.66, "step": 2456 }, { "epoch": 0.85, "learning_rate": 2.942087422208051e-07, "loss": 1.8275, "step": 2457 }, { "epoch": 0.85, "learning_rate": 2.9289369569105107e-07, "loss": 1.7596, "step": 2458 }, { "epoch": 0.85, "learning_rate": 2.9158141182959646e-07, "loss": 1.5922, "step": 2459 }, { "epoch": 0.85, "learning_rate": 2.902718922790479e-07, "loss": 1.6189, "step": 2460 }, { "epoch": 0.85, "learning_rate": 2.889651386785497e-07, "loss": 1.5317, "step": 2461 }, { "epoch": 0.85, "learning_rate": 2.8766115266378623e-07, "loss": 1.5135, "step": 2462 }, { "epoch": 0.85, "learning_rate": 2.8635993586697555e-07, "loss": 1.7942, "step": 2463 }, { "epoch": 0.85, "learning_rate": 2.850614899168705e-07, "loss": 1.7119, "step": 2464 }, { "epoch": 0.85, "learning_rate": 2.837658164387563e-07, "loss": 1.5768, "step": 2465 }, { "epoch": 0.85, "learning_rate": 2.8247291705444575e-07, "loss": 1.6462, "step": 2466 }, { "epoch": 0.85, "learning_rate": 2.811827933822814e-07, "loss": 1.6107, "step": 2467 }, { "epoch": 0.85, "learning_rate": 2.7989544703713065e-07, "loss": 1.5541, "step": 2468 }, { "epoch": 0.85, "learning_rate": 2.7861087963038436e-07, "loss": 1.9399, "step": 2469 }, { "epoch": 0.85, "learning_rate": 2.77329092769954e-07, "loss": 1.6315, "step": 2470 }, { "epoch": 0.85, "learning_rate": 2.7605008806027205e-07, "loss": 1.5076, "step": 2471 }, { "epoch": 0.85, "learning_rate": 2.74773867102289e-07, "loss": 1.5122, "step": 2472 }, { "epoch": 0.85, "learning_rate": 2.7350043149346835e-07, "loss": 1.6587, "step": 2473 }, { "epoch": 0.85, "learning_rate": 2.722297828277895e-07, "loss": 1.5425, "step": 2474 }, { "epoch": 0.85, "learning_rate": 2.70961922695743e-07, "loss": 1.7128, "step": 2475 }, { "epoch": 0.86, "learning_rate": 2.6969685268432745e-07, "loss": 1.8413, "step": 2476 }, { "epoch": 0.86, "learning_rate": 2.6843457437705137e-07, "loss": 1.4835, "step": 2477 }, { "epoch": 0.86, "learning_rate": 2.6717508935392685e-07, "loss": 1.5698, "step": 2478 }, { "epoch": 0.86, "learning_rate": 2.6591839919146963e-07, "loss": 1.6888, "step": 2479 }, { "epoch": 0.86, "learning_rate": 2.6466450546269865e-07, "loss": 1.5464, "step": 2480 }, { "epoch": 0.86, "learning_rate": 2.634134097371319e-07, "loss": 1.712, "step": 2481 }, { "epoch": 0.86, "learning_rate": 2.6216511358078374e-07, "loss": 1.6406, "step": 2482 }, { "epoch": 0.86, "learning_rate": 2.609196185561663e-07, "loss": 1.6786, "step": 2483 }, { "epoch": 0.86, "learning_rate": 2.596769262222845e-07, "loss": 1.7068, "step": 2484 }, { "epoch": 0.86, "learning_rate": 2.5843703813463407e-07, "loss": 1.6152, "step": 2485 }, { "epoch": 0.86, "learning_rate": 2.571999558452032e-07, "loss": 1.5646, "step": 2486 }, { "epoch": 0.86, "learning_rate": 2.5596568090246546e-07, "loss": 1.5715, "step": 2487 }, { "epoch": 0.86, "learning_rate": 2.5473421485138124e-07, "loss": 1.8807, "step": 2488 }, { "epoch": 0.86, "learning_rate": 2.535055592333954e-07, "loss": 1.6062, "step": 2489 }, { "epoch": 0.86, "learning_rate": 2.522797155864354e-07, "loss": 1.8334, "step": 2490 }, { "epoch": 0.86, "learning_rate": 2.510566854449073e-07, "loss": 1.3867, "step": 2491 }, { "epoch": 0.86, "learning_rate": 2.4983647033969714e-07, "loss": 1.5438, "step": 2492 }, { "epoch": 0.86, "learning_rate": 2.4861907179816654e-07, "loss": 1.5923, "step": 2493 }, { "epoch": 0.86, "learning_rate": 2.474044913441509e-07, "loss": 1.6378, "step": 2494 }, { "epoch": 0.86, "learning_rate": 2.4619273049796e-07, "loss": 2.0924, "step": 2495 }, { "epoch": 0.86, "learning_rate": 2.4498379077637277e-07, "loss": 1.6428, "step": 2496 }, { "epoch": 0.86, "learning_rate": 2.4377767369263674e-07, "loss": 1.7196, "step": 2497 }, { "epoch": 0.86, "learning_rate": 2.4257438075646706e-07, "loss": 1.6904, "step": 2498 }, { "epoch": 0.86, "learning_rate": 2.4137391347404475e-07, "loss": 1.6013, "step": 2499 }, { "epoch": 0.86, "learning_rate": 2.401762733480115e-07, "loss": 1.6079, "step": 2500 }, { "epoch": 0.86, "learning_rate": 2.389814618774719e-07, "loss": 1.9382, "step": 2501 }, { "epoch": 0.86, "learning_rate": 2.3778948055798974e-07, "loss": 1.6655, "step": 2502 }, { "epoch": 0.86, "learning_rate": 2.3660033088158646e-07, "loss": 1.4401, "step": 2503 }, { "epoch": 0.86, "learning_rate": 2.354140143367384e-07, "loss": 1.4532, "step": 2504 }, { "epoch": 0.87, "learning_rate": 2.3423053240837518e-07, "loss": 1.5779, "step": 2505 }, { "epoch": 0.87, "learning_rate": 2.330498865778799e-07, "loss": 1.444, "step": 2506 }, { "epoch": 0.87, "learning_rate": 2.3187207832308406e-07, "loss": 1.693, "step": 2507 }, { "epoch": 0.87, "learning_rate": 2.3069710911826858e-07, "loss": 1.4564, "step": 2508 }, { "epoch": 0.87, "learning_rate": 2.295249804341601e-07, "loss": 1.5829, "step": 2509 }, { "epoch": 0.87, "learning_rate": 2.2835569373792942e-07, "loss": 1.406, "step": 2510 }, { "epoch": 0.87, "learning_rate": 2.271892504931905e-07, "loss": 1.6909, "step": 2511 }, { "epoch": 0.87, "learning_rate": 2.2602565215999878e-07, "loss": 1.6461, "step": 2512 }, { "epoch": 0.87, "learning_rate": 2.248649001948472e-07, "loss": 1.7048, "step": 2513 }, { "epoch": 0.87, "learning_rate": 2.2370699605066577e-07, "loss": 1.6876, "step": 2514 }, { "epoch": 0.87, "learning_rate": 2.2255194117682206e-07, "loss": 1.6787, "step": 2515 }, { "epoch": 0.87, "learning_rate": 2.2139973701911455e-07, "loss": 1.3859, "step": 2516 }, { "epoch": 0.87, "learning_rate": 2.2025038501977485e-07, "loss": 1.5568, "step": 2517 }, { "epoch": 0.87, "learning_rate": 2.1910388661746495e-07, "loss": 1.584, "step": 2518 }, { "epoch": 0.87, "learning_rate": 2.17960243247273e-07, "loss": 1.7122, "step": 2519 }, { "epoch": 0.87, "learning_rate": 2.1681945634071555e-07, "loss": 1.6323, "step": 2520 }, { "epoch": 0.87, "learning_rate": 2.1568152732573315e-07, "loss": 1.6494, "step": 2521 }, { "epoch": 0.87, "learning_rate": 2.1454645762668785e-07, "loss": 1.5939, "step": 2522 }, { "epoch": 0.87, "learning_rate": 2.1341424866436366e-07, "loss": 1.6736, "step": 2523 }, { "epoch": 0.87, "learning_rate": 2.1228490185596444e-07, "loss": 1.6248, "step": 2524 }, { "epoch": 0.87, "learning_rate": 2.1115841861510945e-07, "loss": 1.761, "step": 2525 }, { "epoch": 0.87, "learning_rate": 2.1003480035183515e-07, "loss": 1.9089, "step": 2526 }, { "epoch": 0.87, "learning_rate": 2.0891404847259267e-07, "loss": 1.6395, "step": 2527 }, { "epoch": 0.87, "learning_rate": 2.077961643802423e-07, "loss": 1.3802, "step": 2528 }, { "epoch": 0.87, "learning_rate": 2.0668114947405727e-07, "loss": 1.7568, "step": 2529 }, { "epoch": 0.87, "learning_rate": 2.0556900514971916e-07, "loss": 1.686, "step": 2530 }, { "epoch": 0.87, "learning_rate": 2.044597327993153e-07, "loss": 1.4901, "step": 2531 }, { "epoch": 0.87, "learning_rate": 2.0335333381133805e-07, "loss": 1.7194, "step": 2532 }, { "epoch": 0.87, "learning_rate": 2.022498095706843e-07, "loss": 1.7496, "step": 2533 }, { "epoch": 0.88, "learning_rate": 2.0114916145865248e-07, "loss": 1.651, "step": 2534 }, { "epoch": 0.88, "learning_rate": 2.0005139085293945e-07, "loss": 1.4885, "step": 2535 }, { "epoch": 0.88, "learning_rate": 1.9895649912764186e-07, "loss": 1.5026, "step": 2536 }, { "epoch": 0.88, "learning_rate": 1.978644876532526e-07, "loss": 1.601, "step": 2537 }, { "epoch": 0.88, "learning_rate": 1.9677535779665803e-07, "loss": 1.9132, "step": 2538 }, { "epoch": 0.88, "learning_rate": 1.9568911092113923e-07, "loss": 1.944, "step": 2539 }, { "epoch": 0.88, "learning_rate": 1.9460574838636699e-07, "loss": 1.448, "step": 2540 }, { "epoch": 0.88, "learning_rate": 1.9352527154840345e-07, "loss": 1.5568, "step": 2541 }, { "epoch": 0.88, "learning_rate": 1.9244768175969709e-07, "loss": 1.4785, "step": 2542 }, { "epoch": 0.88, "learning_rate": 1.9137298036908392e-07, "loss": 1.6857, "step": 2543 }, { "epoch": 0.88, "learning_rate": 1.9030116872178317e-07, "loss": 1.637, "step": 2544 }, { "epoch": 0.88, "learning_rate": 1.8923224815939832e-07, "loss": 1.6648, "step": 2545 }, { "epoch": 0.88, "learning_rate": 1.881662200199133e-07, "loss": 1.5137, "step": 2546 }, { "epoch": 0.88, "learning_rate": 1.8710308563769124e-07, "loss": 1.5206, "step": 2547 }, { "epoch": 0.88, "learning_rate": 1.8604284634347407e-07, "loss": 1.6508, "step": 2548 }, { "epoch": 0.88, "learning_rate": 1.8498550346437854e-07, "loss": 1.7041, "step": 2549 }, { "epoch": 0.88, "learning_rate": 1.8393105832389791e-07, "loss": 1.7133, "step": 2550 }, { "epoch": 0.88, "learning_rate": 1.8287951224189555e-07, "loss": 1.8717, "step": 2551 }, { "epoch": 0.88, "learning_rate": 1.8183086653460913e-07, "loss": 1.5651, "step": 2552 }, { "epoch": 0.88, "learning_rate": 1.8078512251464285e-07, "loss": 1.713, "step": 2553 }, { "epoch": 0.88, "learning_rate": 1.7974228149097133e-07, "loss": 1.6522, "step": 2554 }, { "epoch": 0.88, "learning_rate": 1.7870234476893427e-07, "loss": 1.4933, "step": 2555 }, { "epoch": 0.88, "learning_rate": 1.7766531365023598e-07, "loss": 1.6074, "step": 2556 }, { "epoch": 0.88, "learning_rate": 1.7663118943294367e-07, "loss": 1.9575, "step": 2557 }, { "epoch": 0.88, "learning_rate": 1.7559997341148717e-07, "loss": 1.4565, "step": 2558 }, { "epoch": 0.88, "learning_rate": 1.745716668766545e-07, "loss": 1.4113, "step": 2559 }, { "epoch": 0.88, "learning_rate": 1.735462711155922e-07, "loss": 1.5734, "step": 2560 }, { "epoch": 0.88, "learning_rate": 1.7252378741180408e-07, "loss": 1.3948, "step": 2561 }, { "epoch": 0.88, "learning_rate": 1.7150421704514865e-07, "loss": 1.5436, "step": 2562 }, { "epoch": 0.89, "learning_rate": 1.7048756129183692e-07, "loss": 1.8143, "step": 2563 }, { "epoch": 0.89, "learning_rate": 1.6947382142443263e-07, "loss": 1.8214, "step": 2564 }, { "epoch": 0.89, "learning_rate": 1.684629987118494e-07, "loss": 1.4473, "step": 2565 }, { "epoch": 0.89, "learning_rate": 1.6745509441934898e-07, "loss": 1.6843, "step": 2566 }, { "epoch": 0.89, "learning_rate": 1.6645010980854082e-07, "loss": 1.6538, "step": 2567 }, { "epoch": 0.89, "learning_rate": 1.6544804613737892e-07, "loss": 1.6781, "step": 2568 }, { "epoch": 0.89, "learning_rate": 1.6444890466016135e-07, "loss": 1.6611, "step": 2569 }, { "epoch": 0.89, "learning_rate": 1.6345268662752904e-07, "loss": 1.7708, "step": 2570 }, { "epoch": 0.89, "learning_rate": 1.6245939328646322e-07, "loss": 1.4401, "step": 2571 }, { "epoch": 0.89, "learning_rate": 1.614690258802834e-07, "loss": 1.4695, "step": 2572 }, { "epoch": 0.89, "learning_rate": 1.604815856486483e-07, "loss": 1.5404, "step": 2573 }, { "epoch": 0.89, "learning_rate": 1.594970738275517e-07, "loss": 1.567, "step": 2574 }, { "epoch": 0.89, "learning_rate": 1.5851549164932118e-07, "loss": 1.5015, "step": 2575 }, { "epoch": 0.89, "learning_rate": 1.575368403426189e-07, "loss": 1.7352, "step": 2576 }, { "epoch": 0.89, "learning_rate": 1.565611211324372e-07, "loss": 1.7206, "step": 2577 }, { "epoch": 0.89, "learning_rate": 1.5558833524009776e-07, "loss": 1.672, "step": 2578 }, { "epoch": 0.89, "learning_rate": 1.5461848388325207e-07, "loss": 1.4583, "step": 2579 }, { "epoch": 0.89, "learning_rate": 1.5365156827587823e-07, "loss": 1.6184, "step": 2580 }, { "epoch": 0.89, "learning_rate": 1.5268758962827784e-07, "loss": 1.5793, "step": 2581 }, { "epoch": 0.89, "learning_rate": 1.5172654914707845e-07, "loss": 2.0498, "step": 2582 }, { "epoch": 0.89, "learning_rate": 1.507684480352292e-07, "loss": 1.5372, "step": 2583 }, { "epoch": 0.89, "learning_rate": 1.4981328749199885e-07, "loss": 1.4788, "step": 2584 }, { "epoch": 0.89, "learning_rate": 1.4886106871297684e-07, "loss": 1.5952, "step": 2585 }, { "epoch": 0.89, "learning_rate": 1.4791179289006973e-07, "loss": 1.4768, "step": 2586 }, { "epoch": 0.89, "learning_rate": 1.4696546121150013e-07, "loss": 1.4835, "step": 2587 }, { "epoch": 0.89, "learning_rate": 1.4602207486180574e-07, "loss": 1.9852, "step": 2588 }, { "epoch": 0.89, "learning_rate": 1.4508163502183786e-07, "loss": 1.934, "step": 2589 }, { "epoch": 0.89, "learning_rate": 1.4414414286875927e-07, "loss": 1.6646, "step": 2590 }, { "epoch": 0.89, "learning_rate": 1.432095995760424e-07, "loss": 1.5377, "step": 2591 }, { "epoch": 0.9, "learning_rate": 1.422780063134696e-07, "loss": 1.5392, "step": 2592 }, { "epoch": 0.9, "learning_rate": 1.4134936424713057e-07, "loss": 1.5642, "step": 2593 }, { "epoch": 0.9, "learning_rate": 1.4042367453942051e-07, "loss": 1.7754, "step": 2594 }, { "epoch": 0.9, "learning_rate": 1.3950093834903865e-07, "loss": 1.8496, "step": 2595 }, { "epoch": 0.9, "learning_rate": 1.3858115683098832e-07, "loss": 1.6406, "step": 2596 }, { "epoch": 0.9, "learning_rate": 1.3766433113657357e-07, "loss": 1.6362, "step": 2597 }, { "epoch": 0.9, "learning_rate": 1.3675046241339918e-07, "loss": 1.6838, "step": 2598 }, { "epoch": 0.9, "learning_rate": 1.35839551805369e-07, "loss": 1.5052, "step": 2599 }, { "epoch": 0.9, "learning_rate": 1.349316004526824e-07, "loss": 1.4626, "step": 2600 }, { "epoch": 0.9, "learning_rate": 1.3402660949183661e-07, "loss": 1.8546, "step": 2601 }, { "epoch": 0.9, "learning_rate": 1.3312458005562273e-07, "loss": 1.668, "step": 2602 }, { "epoch": 0.9, "learning_rate": 1.32225513273124e-07, "loss": 1.368, "step": 2603 }, { "epoch": 0.9, "learning_rate": 1.3132941026971602e-07, "loss": 1.5713, "step": 2604 }, { "epoch": 0.9, "learning_rate": 1.3043627216706462e-07, "loss": 1.4761, "step": 2605 }, { "epoch": 0.9, "learning_rate": 1.2954610008312411e-07, "loss": 1.4915, "step": 2606 }, { "epoch": 0.9, "learning_rate": 1.286588951321363e-07, "loss": 1.8463, "step": 2607 }, { "epoch": 0.9, "learning_rate": 1.277746584246295e-07, "loss": 1.8223, "step": 2608 }, { "epoch": 0.9, "learning_rate": 1.2689339106741529e-07, "loss": 1.6838, "step": 2609 }, { "epoch": 0.9, "learning_rate": 1.2601509416358986e-07, "loss": 1.4026, "step": 2610 }, { "epoch": 0.9, "learning_rate": 1.251397688125311e-07, "loss": 1.8009, "step": 2611 }, { "epoch": 0.9, "learning_rate": 1.2426741610989669e-07, "loss": 1.4875, "step": 2612 }, { "epoch": 0.9, "learning_rate": 1.2339803714762316e-07, "loss": 1.8263, "step": 2613 }, { "epoch": 0.9, "learning_rate": 1.225316330139259e-07, "loss": 1.8676, "step": 2614 }, { "epoch": 0.9, "learning_rate": 1.2166820479329572e-07, "loss": 1.6351, "step": 2615 }, { "epoch": 0.9, "learning_rate": 1.2080775356649866e-07, "loss": 1.4998, "step": 2616 }, { "epoch": 0.9, "learning_rate": 1.199502804105751e-07, "loss": 1.4734, "step": 2617 }, { "epoch": 0.9, "learning_rate": 1.1909578639883652e-07, "loss": 1.7769, "step": 2618 }, { "epoch": 0.9, "learning_rate": 1.1824427260086618e-07, "loss": 1.9006, "step": 2619 }, { "epoch": 0.91, "learning_rate": 1.1739574008251737e-07, "loss": 1.7671, "step": 2620 }, { "epoch": 0.91, "learning_rate": 1.1655018990591044e-07, "loss": 1.5348, "step": 2621 }, { "epoch": 0.91, "learning_rate": 1.1570762312943295e-07, "loss": 1.5972, "step": 2622 }, { "epoch": 0.91, "learning_rate": 1.1486804080773878e-07, "loss": 1.6427, "step": 2623 }, { "epoch": 0.91, "learning_rate": 1.1403144399174642e-07, "loss": 1.6162, "step": 2624 }, { "epoch": 0.91, "learning_rate": 1.1319783372863601e-07, "loss": 1.692, "step": 2625 }, { "epoch": 0.91, "learning_rate": 1.1236721106185039e-07, "loss": 1.696, "step": 2626 }, { "epoch": 0.91, "learning_rate": 1.1153957703109259e-07, "loss": 1.6255, "step": 2627 }, { "epoch": 0.91, "learning_rate": 1.107149326723242e-07, "loss": 1.6006, "step": 2628 }, { "epoch": 0.91, "learning_rate": 1.0989327901776564e-07, "loss": 1.5449, "step": 2629 }, { "epoch": 0.91, "learning_rate": 1.0907461709589256e-07, "loss": 1.7904, "step": 2630 }, { "epoch": 0.91, "learning_rate": 1.0825894793143721e-07, "loss": 1.64, "step": 2631 }, { "epoch": 0.91, "learning_rate": 1.0744627254538425e-07, "loss": 1.729, "step": 2632 }, { "epoch": 0.91, "learning_rate": 1.0663659195497222e-07, "loss": 1.8414, "step": 2633 }, { "epoch": 0.91, "learning_rate": 1.0582990717369012e-07, "loss": 1.4546, "step": 2634 }, { "epoch": 0.91, "learning_rate": 1.0502621921127776e-07, "loss": 1.5232, "step": 2635 }, { "epoch": 0.91, "learning_rate": 1.0422552907372374e-07, "loss": 1.5709, "step": 2636 }, { "epoch": 0.91, "learning_rate": 1.034278377632636e-07, "loss": 1.5354, "step": 2637 }, { "epoch": 0.91, "learning_rate": 1.0263314627837972e-07, "loss": 1.9481, "step": 2638 }, { "epoch": 0.91, "learning_rate": 1.0184145561379949e-07, "loss": 1.6448, "step": 2639 }, { "epoch": 0.91, "learning_rate": 1.0105276676049408e-07, "loss": 1.524, "step": 2640 }, { "epoch": 0.91, "learning_rate": 1.0026708070567714e-07, "loss": 1.5229, "step": 2641 }, { "epoch": 0.91, "learning_rate": 9.948439843280395e-08, "loss": 1.5729, "step": 2642 }, { "epoch": 0.91, "learning_rate": 9.870472092156941e-08, "loss": 1.3105, "step": 2643 }, { "epoch": 0.91, "learning_rate": 9.792804914790788e-08, "loss": 2.2023, "step": 2644 }, { "epoch": 0.91, "learning_rate": 9.715438408399114e-08, "loss": 1.7637, "step": 2645 }, { "epoch": 0.91, "learning_rate": 9.638372669822732e-08, "loss": 1.6968, "step": 2646 }, { "epoch": 0.91, "learning_rate": 9.561607795526007e-08, "loss": 1.5674, "step": 2647 }, { "epoch": 0.91, "learning_rate": 9.485143881596715e-08, "loss": 1.4352, "step": 2648 }, { "epoch": 0.92, "learning_rate": 9.40898102374585e-08, "loss": 1.6951, "step": 2649 }, { "epoch": 0.92, "learning_rate": 9.333119317307598e-08, "loss": 1.5022, "step": 2650 }, { "epoch": 0.92, "learning_rate": 9.257558857239224e-08, "loss": 2.045, "step": 2651 }, { "epoch": 0.92, "learning_rate": 9.182299738120931e-08, "loss": 1.4657, "step": 2652 }, { "epoch": 0.92, "learning_rate": 9.107342054155643e-08, "loss": 1.4337, "step": 2653 }, { "epoch": 0.92, "learning_rate": 9.032685899169058e-08, "loss": 1.509, "step": 2654 }, { "epoch": 0.92, "learning_rate": 8.958331366609424e-08, "loss": 1.6292, "step": 2655 }, { "epoch": 0.92, "learning_rate": 8.884278549547376e-08, "loss": 1.6194, "step": 2656 }, { "epoch": 0.92, "learning_rate": 8.81052754067599e-08, "loss": 2.1788, "step": 2657 }, { "epoch": 0.92, "learning_rate": 8.73707843231053e-08, "loss": 1.6537, "step": 2658 }, { "epoch": 0.92, "learning_rate": 8.663931316388319e-08, "loss": 1.5941, "step": 2659 }, { "epoch": 0.92, "learning_rate": 8.591086284468697e-08, "loss": 1.4792, "step": 2660 }, { "epoch": 0.92, "learning_rate": 8.518543427732951e-08, "loss": 1.799, "step": 2661 }, { "epoch": 0.92, "learning_rate": 8.446302836984e-08, "loss": 1.5403, "step": 2662 }, { "epoch": 0.92, "learning_rate": 8.374364602646512e-08, "loss": 2.0206, "step": 2663 }, { "epoch": 0.92, "learning_rate": 8.302728814766708e-08, "loss": 1.7427, "step": 2664 }, { "epoch": 0.92, "learning_rate": 8.231395563012085e-08, "loss": 1.6262, "step": 2665 }, { "epoch": 0.92, "learning_rate": 8.16036493667166e-08, "loss": 1.6968, "step": 2666 }, { "epoch": 0.92, "learning_rate": 8.089637024655483e-08, "loss": 1.6653, "step": 2667 }, { "epoch": 0.92, "learning_rate": 8.019211915494734e-08, "loss": 1.5815, "step": 2668 }, { "epoch": 0.92, "learning_rate": 7.949089697341622e-08, "loss": 1.9333, "step": 2669 }, { "epoch": 0.92, "learning_rate": 7.879270457969185e-08, "loss": 1.6504, "step": 2670 }, { "epoch": 0.92, "learning_rate": 7.809754284771181e-08, "loss": 1.6368, "step": 2671 }, { "epoch": 0.92, "learning_rate": 7.740541264762091e-08, "loss": 1.6587, "step": 2672 }, { "epoch": 0.92, "learning_rate": 7.671631484576891e-08, "loss": 1.5764, "step": 2673 }, { "epoch": 0.92, "learning_rate": 7.603025030471001e-08, "loss": 1.6145, "step": 2674 }, { "epoch": 0.92, "learning_rate": 7.534721988320143e-08, "loss": 1.858, "step": 2675 }, { "epoch": 0.92, "learning_rate": 7.466722443620261e-08, "loss": 1.8699, "step": 2676 }, { "epoch": 0.92, "learning_rate": 7.399026481487437e-08, "loss": 1.7959, "step": 2677 }, { "epoch": 0.93, "learning_rate": 7.331634186657693e-08, "loss": 1.6914, "step": 2678 }, { "epoch": 0.93, "learning_rate": 7.264545643486997e-08, "loss": 1.4119, "step": 2679 }, { "epoch": 0.93, "learning_rate": 7.19776093595112e-08, "loss": 1.4412, "step": 2680 }, { "epoch": 0.93, "learning_rate": 7.131280147645442e-08, "loss": 1.3082, "step": 2681 }, { "epoch": 0.93, "learning_rate": 7.065103361785008e-08, "loss": 2.0193, "step": 2682 }, { "epoch": 0.93, "learning_rate": 6.999230661204281e-08, "loss": 1.5685, "step": 2683 }, { "epoch": 0.93, "learning_rate": 6.933662128357166e-08, "loss": 1.6383, "step": 2684 }, { "epoch": 0.93, "learning_rate": 6.868397845316676e-08, "loss": 1.595, "step": 2685 }, { "epoch": 0.93, "learning_rate": 6.803437893775217e-08, "loss": 1.5424, "step": 2686 }, { "epoch": 0.93, "learning_rate": 6.738782355044048e-08, "loss": 1.4274, "step": 2687 }, { "epoch": 0.93, "learning_rate": 6.674431310053519e-08, "loss": 1.8247, "step": 2688 }, { "epoch": 0.93, "learning_rate": 6.610384839352862e-08, "loss": 1.9102, "step": 2689 }, { "epoch": 0.93, "learning_rate": 6.546643023109923e-08, "loss": 1.6519, "step": 2690 }, { "epoch": 0.93, "learning_rate": 6.483205941111347e-08, "loss": 1.4519, "step": 2691 }, { "epoch": 0.93, "learning_rate": 6.420073672762289e-08, "loss": 1.3024, "step": 2692 }, { "epoch": 0.93, "learning_rate": 6.35724629708634e-08, "loss": 1.7228, "step": 2693 }, { "epoch": 0.93, "learning_rate": 6.294723892725452e-08, "loss": 1.828, "step": 2694 }, { "epoch": 0.93, "learning_rate": 6.232506537939942e-08, "loss": 1.7853, "step": 2695 }, { "epoch": 0.93, "learning_rate": 6.170594310608152e-08, "loss": 1.5985, "step": 2696 }, { "epoch": 0.93, "learning_rate": 6.108987288226536e-08, "loss": 1.5872, "step": 2697 }, { "epoch": 0.93, "learning_rate": 6.047685547909605e-08, "loss": 1.6425, "step": 2698 }, { "epoch": 0.93, "learning_rate": 5.986689166389614e-08, "loss": 1.5815, "step": 2699 }, { "epoch": 0.93, "learning_rate": 5.92599822001666e-08, "loss": 1.4116, "step": 2700 }, { "epoch": 0.93, "learning_rate": 5.865612784758557e-08, "loss": 1.8015, "step": 2701 }, { "epoch": 0.93, "learning_rate": 5.805532936200647e-08, "loss": 1.6017, "step": 2702 }, { "epoch": 0.93, "learning_rate": 5.745758749545749e-08, "loss": 1.4194, "step": 2703 }, { "epoch": 0.93, "learning_rate": 5.6862902996141775e-08, "loss": 1.5257, "step": 2704 }, { "epoch": 0.93, "learning_rate": 5.627127660843418e-08, "loss": 1.5126, "step": 2705 }, { "epoch": 0.93, "learning_rate": 5.568270907288287e-08, "loss": 1.5537, "step": 2706 }, { "epoch": 0.94, "learning_rate": 5.509720112620659e-08, "loss": 1.7432, "step": 2707 }, { "epoch": 0.94, "learning_rate": 5.451475350129465e-08, "loss": 1.5514, "step": 2708 }, { "epoch": 0.94, "learning_rate": 5.393536692720525e-08, "loss": 1.5634, "step": 2709 }, { "epoch": 0.94, "learning_rate": 5.335904212916576e-08, "loss": 1.536, "step": 2710 }, { "epoch": 0.94, "learning_rate": 5.278577982857025e-08, "loss": 1.7533, "step": 2711 }, { "epoch": 0.94, "learning_rate": 5.221558074297972e-08, "loss": 1.5383, "step": 2712 }, { "epoch": 0.94, "learning_rate": 5.164844558612131e-08, "loss": 1.7543, "step": 2713 }, { "epoch": 0.94, "learning_rate": 5.108437506788661e-08, "loss": 1.8617, "step": 2714 }, { "epoch": 0.94, "learning_rate": 5.052336989433082e-08, "loss": 1.5544, "step": 2715 }, { "epoch": 0.94, "learning_rate": 4.99654307676728e-08, "loss": 1.4586, "step": 2716 }, { "epoch": 0.94, "learning_rate": 4.9410558386293885e-08, "loss": 1.6497, "step": 2717 }, { "epoch": 0.94, "learning_rate": 4.8858753444735455e-08, "loss": 1.5417, "step": 2718 }, { "epoch": 0.94, "learning_rate": 4.831001663370083e-08, "loss": 1.5791, "step": 2719 }, { "epoch": 0.94, "learning_rate": 4.776434864005114e-08, "loss": 1.8152, "step": 2720 }, { "epoch": 0.94, "learning_rate": 4.7221750146808357e-08, "loss": 1.512, "step": 2721 }, { "epoch": 0.94, "learning_rate": 4.6682221833150575e-08, "loss": 1.5485, "step": 2722 }, { "epoch": 0.94, "learning_rate": 4.61457643744137e-08, "loss": 1.5026, "step": 2723 }, { "epoch": 0.94, "learning_rate": 4.561237844208977e-08, "loss": 1.6616, "step": 2724 }, { "epoch": 0.94, "learning_rate": 4.508206470382554e-08, "loss": 1.6603, "step": 2725 }, { "epoch": 0.94, "learning_rate": 4.455482382342336e-08, "loss": 1.5626, "step": 2726 }, { "epoch": 0.94, "learning_rate": 4.40306564608381e-08, "loss": 1.6552, "step": 2727 }, { "epoch": 0.94, "learning_rate": 4.350956327217798e-08, "loss": 1.2916, "step": 2728 }, { "epoch": 0.94, "learning_rate": 4.2991544909703756e-08, "loss": 1.527, "step": 2729 }, { "epoch": 0.94, "learning_rate": 4.247660202182674e-08, "loss": 1.594, "step": 2730 }, { "epoch": 0.94, "learning_rate": 4.1964735253108013e-08, "loss": 1.6069, "step": 2731 }, { "epoch": 0.94, "learning_rate": 4.1455945244259496e-08, "loss": 1.7617, "step": 2732 }, { "epoch": 0.94, "learning_rate": 4.0950232632141205e-08, "loss": 1.5927, "step": 2733 }, { "epoch": 0.94, "learning_rate": 4.044759804976123e-08, "loss": 1.639, "step": 2734 }, { "epoch": 0.94, "learning_rate": 3.994804212627462e-08, "loss": 1.649, "step": 2735 }, { "epoch": 0.95, "learning_rate": 3.9451565486983976e-08, "loss": 1.6508, "step": 2736 }, { "epoch": 0.95, "learning_rate": 3.8958168753335524e-08, "loss": 1.6494, "step": 2737 }, { "epoch": 0.95, "learning_rate": 3.846785254292246e-08, "loss": 1.7863, "step": 2738 }, { "epoch": 0.95, "learning_rate": 3.798061746947995e-08, "loss": 1.7207, "step": 2739 }, { "epoch": 0.95, "learning_rate": 3.7496464142887936e-08, "loss": 1.5994, "step": 2740 }, { "epoch": 0.95, "learning_rate": 3.701539316916858e-08, "loss": 1.8635, "step": 2741 }, { "epoch": 0.95, "learning_rate": 3.653740515048576e-08, "loss": 1.5797, "step": 2742 }, { "epoch": 0.95, "learning_rate": 3.6062500685143943e-08, "loss": 1.6168, "step": 2743 }, { "epoch": 0.95, "learning_rate": 3.5590680367588173e-08, "loss": 2.0146, "step": 2744 }, { "epoch": 0.95, "learning_rate": 3.512194478840353e-08, "loss": 1.8618, "step": 2745 }, { "epoch": 0.95, "learning_rate": 3.4656294534313185e-08, "loss": 1.59, "step": 2746 }, { "epoch": 0.95, "learning_rate": 3.4193730188178674e-08, "loss": 1.4832, "step": 2747 }, { "epoch": 0.95, "learning_rate": 3.37342523289988e-08, "loss": 1.5182, "step": 2748 }, { "epoch": 0.95, "learning_rate": 3.327786153190848e-08, "loss": 1.5751, "step": 2749 }, { "epoch": 0.95, "learning_rate": 3.2824558368179384e-08, "loss": 1.6605, "step": 2750 }, { "epoch": 0.95, "learning_rate": 3.237434340521789e-08, "loss": 1.9529, "step": 2751 }, { "epoch": 0.95, "learning_rate": 3.1927217206564884e-08, "loss": 1.5468, "step": 2752 }, { "epoch": 0.95, "learning_rate": 3.14831803318949e-08, "loss": 1.3994, "step": 2753 }, { "epoch": 0.95, "learning_rate": 3.1042233337015834e-08, "loss": 1.6871, "step": 2754 }, { "epoch": 0.95, "learning_rate": 3.0604376773867295e-08, "loss": 1.5355, "step": 2755 }, { "epoch": 0.95, "learning_rate": 3.016961119052114e-08, "loss": 1.6871, "step": 2756 }, { "epoch": 0.95, "learning_rate": 2.9737937131180394e-08, "loss": 1.9548, "step": 2757 }, { "epoch": 0.95, "learning_rate": 2.9309355136177277e-08, "loss": 1.393, "step": 2758 }, { "epoch": 0.95, "learning_rate": 2.888386574197488e-08, "loss": 1.6332, "step": 2759 }, { "epoch": 0.95, "learning_rate": 2.8461469481164682e-08, "loss": 1.3722, "step": 2760 }, { "epoch": 0.95, "learning_rate": 2.804216688246597e-08, "loss": 1.7014, "step": 2761 }, { "epoch": 0.95, "learning_rate": 2.7625958470726687e-08, "loss": 1.3975, "step": 2762 }, { "epoch": 0.95, "learning_rate": 2.7212844766920932e-08, "loss": 1.5089, "step": 2763 }, { "epoch": 0.95, "learning_rate": 2.6802826288149507e-08, "loss": 1.8073, "step": 2764 }, { "epoch": 0.96, "learning_rate": 2.6395903547638825e-08, "loss": 1.3666, "step": 2765 }, { "epoch": 0.96, "learning_rate": 2.5992077054739772e-08, "loss": 1.3473, "step": 2766 }, { "epoch": 0.96, "learning_rate": 2.5591347314928572e-08, "loss": 1.542, "step": 2767 }, { "epoch": 0.96, "learning_rate": 2.5193714829804263e-08, "loss": 1.4615, "step": 2768 }, { "epoch": 0.96, "learning_rate": 2.4799180097089815e-08, "loss": 1.8224, "step": 2769 }, { "epoch": 0.96, "learning_rate": 2.4407743610630198e-08, "loss": 1.7444, "step": 2770 }, { "epoch": 0.96, "learning_rate": 2.4019405860392366e-08, "loss": 1.4194, "step": 2771 }, { "epoch": 0.96, "learning_rate": 2.3634167332464153e-08, "loss": 1.6793, "step": 2772 }, { "epoch": 0.96, "learning_rate": 2.325202850905539e-08, "loss": 1.6451, "step": 2773 }, { "epoch": 0.96, "learning_rate": 2.287298986849401e-08, "loss": 1.6185, "step": 2774 }, { "epoch": 0.96, "learning_rate": 2.2497051885228825e-08, "loss": 1.4991, "step": 2775 }, { "epoch": 0.96, "learning_rate": 2.212421502982731e-08, "loss": 1.9742, "step": 2776 }, { "epoch": 0.96, "learning_rate": 2.175447976897449e-08, "loss": 1.7157, "step": 2777 }, { "epoch": 0.96, "learning_rate": 2.1387846565474047e-08, "loss": 1.4539, "step": 2778 }, { "epoch": 0.96, "learning_rate": 2.1024315878246103e-08, "loss": 1.6504, "step": 2779 }, { "epoch": 0.96, "learning_rate": 2.0663888162327227e-08, "loss": 1.5648, "step": 2780 }, { "epoch": 0.96, "learning_rate": 2.0306563868870698e-08, "loss": 1.5128, "step": 2781 }, { "epoch": 0.96, "learning_rate": 1.9952343445144572e-08, "loss": 1.8926, "step": 2782 }, { "epoch": 0.96, "learning_rate": 1.9601227334531958e-08, "loss": 1.4902, "step": 2783 }, { "epoch": 0.96, "learning_rate": 1.9253215976530183e-08, "loss": 1.4919, "step": 2784 }, { "epoch": 0.96, "learning_rate": 1.890830980674996e-08, "loss": 1.5569, "step": 2785 }, { "epoch": 0.96, "learning_rate": 1.8566509256915945e-08, "loss": 1.4932, "step": 2786 }, { "epoch": 0.96, "learning_rate": 1.822781475486507e-08, "loss": 1.547, "step": 2787 }, { "epoch": 0.96, "learning_rate": 1.789222672454627e-08, "loss": 1.9072, "step": 2788 }, { "epoch": 0.96, "learning_rate": 1.7559745586019918e-08, "loss": 1.5845, "step": 2789 }, { "epoch": 0.96, "learning_rate": 1.723037175545783e-08, "loss": 1.4984, "step": 2790 }, { "epoch": 0.96, "learning_rate": 1.6904105645142443e-08, "loss": 1.6639, "step": 2791 }, { "epoch": 0.96, "learning_rate": 1.6580947663465963e-08, "loss": 1.5928, "step": 2792 }, { "epoch": 0.96, "learning_rate": 1.6260898214929544e-08, "loss": 1.5853, "step": 2793 }, { "epoch": 0.97, "learning_rate": 1.5943957700144676e-08, "loss": 1.7766, "step": 2794 }, { "epoch": 0.97, "learning_rate": 1.5630126515830123e-08, "loss": 1.6807, "step": 2795 }, { "epoch": 0.97, "learning_rate": 1.5319405054813054e-08, "loss": 1.4792, "step": 2796 }, { "epoch": 0.97, "learning_rate": 1.5011793706028454e-08, "loss": 1.4191, "step": 2797 }, { "epoch": 0.97, "learning_rate": 1.4707292854518051e-08, "loss": 1.4969, "step": 2798 }, { "epoch": 0.97, "learning_rate": 1.4405902881430289e-08, "loss": 1.359, "step": 2799 }, { "epoch": 0.97, "learning_rate": 1.4107624164019229e-08, "loss": 1.5552, "step": 2800 }, { "epoch": 0.97, "learning_rate": 1.3812457075644825e-08, "loss": 1.9365, "step": 2801 }, { "epoch": 0.97, "learning_rate": 1.3520401985772646e-08, "loss": 1.7675, "step": 2802 }, { "epoch": 0.97, "learning_rate": 1.3231459259972213e-08, "loss": 1.6952, "step": 2803 }, { "epoch": 0.97, "learning_rate": 1.2945629259917547e-08, "loss": 1.6212, "step": 2804 }, { "epoch": 0.97, "learning_rate": 1.2662912343386069e-08, "loss": 1.5599, "step": 2805 }, { "epoch": 0.97, "learning_rate": 1.2383308864258869e-08, "loss": 1.6421, "step": 2806 }, { "epoch": 0.97, "learning_rate": 1.2106819172520434e-08, "loss": 1.8564, "step": 2807 }, { "epoch": 0.97, "learning_rate": 1.1833443614256423e-08, "loss": 1.5264, "step": 2808 }, { "epoch": 0.97, "learning_rate": 1.1563182531655614e-08, "loss": 1.6401, "step": 2809 }, { "epoch": 0.97, "learning_rate": 1.1296036263007403e-08, "loss": 1.4414, "step": 2810 }, { "epoch": 0.97, "learning_rate": 1.1032005142703195e-08, "loss": 1.8239, "step": 2811 }, { "epoch": 0.97, "learning_rate": 1.0771089501234732e-08, "loss": 1.631, "step": 2812 }, { "epoch": 0.97, "learning_rate": 1.0513289665193827e-08, "loss": 1.802, "step": 2813 }, { "epoch": 0.97, "learning_rate": 1.0258605957272627e-08, "loss": 1.7412, "step": 2814 }, { "epoch": 0.97, "learning_rate": 1.0007038696262517e-08, "loss": 1.3794, "step": 2815 }, { "epoch": 0.97, "learning_rate": 9.758588197053832e-09, "loss": 1.2725, "step": 2816 }, { "epoch": 0.97, "learning_rate": 9.513254770636138e-09, "loss": 1.567, "step": 2817 }, { "epoch": 0.97, "learning_rate": 9.271038724096848e-09, "loss": 1.8143, "step": 2818 }, { "epoch": 0.97, "learning_rate": 9.031940360621493e-09, "loss": 1.6013, "step": 2819 }, { "epoch": 0.97, "learning_rate": 8.795959979493174e-09, "loss": 1.7264, "step": 2820 }, { "epoch": 0.97, "learning_rate": 8.56309787609172e-09, "loss": 1.6342, "step": 2821 }, { "epoch": 0.97, "learning_rate": 8.333354341894529e-09, "loss": 1.5963, "step": 2822 }, { "epoch": 0.98, "learning_rate": 8.106729664475178e-09, "loss": 1.6074, "step": 2823 }, { "epoch": 0.98, "learning_rate": 7.883224127502586e-09, "loss": 1.4963, "step": 2824 }, { "epoch": 0.98, "learning_rate": 7.662838010742413e-09, "loss": 1.6063, "step": 2825 }, { "epoch": 0.98, "learning_rate": 7.4455715900556554e-09, "loss": 1.9386, "step": 2826 }, { "epoch": 0.98, "learning_rate": 7.231425137397274e-09, "loss": 1.7327, "step": 2827 }, { "epoch": 0.98, "learning_rate": 7.0203989208178505e-09, "loss": 1.5227, "step": 2828 }, { "epoch": 0.98, "learning_rate": 6.812493204462478e-09, "loss": 1.4691, "step": 2829 }, { "epoch": 0.98, "learning_rate": 6.607708248569378e-09, "loss": 1.4445, "step": 2830 }, { "epoch": 0.98, "learning_rate": 6.406044309471004e-09, "loss": 1.4952, "step": 2831 }, { "epoch": 0.98, "learning_rate": 6.207501639593494e-09, "loss": 1.9622, "step": 2832 }, { "epoch": 0.98, "learning_rate": 6.012080487455552e-09, "loss": 1.531, "step": 2833 }, { "epoch": 0.98, "learning_rate": 5.819781097668731e-09, "loss": 1.7888, "step": 2834 }, { "epoch": 0.98, "learning_rate": 5.6306037109371544e-09, "loss": 1.6034, "step": 2835 }, { "epoch": 0.98, "learning_rate": 5.444548564056962e-09, "loss": 1.5496, "step": 2836 }, { "epoch": 0.98, "learning_rate": 5.2616158899160275e-09, "loss": 1.5006, "step": 2837 }, { "epoch": 0.98, "learning_rate": 5.081805917494243e-09, "loss": 1.9667, "step": 2838 }, { "epoch": 0.98, "learning_rate": 4.905118871862402e-09, "loss": 1.6316, "step": 2839 }, { "epoch": 0.98, "learning_rate": 4.731554974182207e-09, "loss": 1.6021, "step": 2840 }, { "epoch": 0.98, "learning_rate": 4.56111444170626e-09, "loss": 1.4916, "step": 2841 }, { "epoch": 0.98, "learning_rate": 4.3937974877777955e-09, "loss": 1.6472, "step": 2842 }, { "epoch": 0.98, "learning_rate": 4.229604321829561e-09, "loss": 1.5095, "step": 2843 }, { "epoch": 0.98, "learning_rate": 4.068535149384656e-09, "loss": 1.9553, "step": 2844 }, { "epoch": 0.98, "learning_rate": 3.91059017205625e-09, "loss": 1.8514, "step": 2845 }, { "epoch": 0.98, "learning_rate": 3.755769587546199e-09, "loss": 1.6368, "step": 2846 }, { "epoch": 0.98, "learning_rate": 3.6040735896455957e-09, "loss": 1.4846, "step": 2847 }, { "epoch": 0.98, "learning_rate": 3.4555023682347754e-09, "loss": 1.3198, "step": 2848 }, { "epoch": 0.98, "learning_rate": 3.3100561092824778e-09, "loss": 1.4333, "step": 2849 }, { "epoch": 0.98, "learning_rate": 3.1677349948461277e-09, "loss": 1.5963, "step": 2850 }, { "epoch": 0.98, "learning_rate": 3.0285392030710016e-09, "loss": 1.8674, "step": 2851 }, { "epoch": 0.99, "learning_rate": 2.892468908190782e-09, "loss": 1.4938, "step": 2852 }, { "epoch": 0.99, "learning_rate": 2.7595242805267263e-09, "loss": 1.4785, "step": 2853 }, { "epoch": 0.99, "learning_rate": 2.6297054864876658e-09, "loss": 1.3541, "step": 2854 }, { "epoch": 0.99, "learning_rate": 2.5030126885694505e-09, "loss": 1.6179, "step": 2855 }, { "epoch": 0.99, "learning_rate": 2.3794460453555046e-09, "loss": 1.5153, "step": 2856 }, { "epoch": 0.99, "learning_rate": 2.259005711516271e-09, "loss": 1.8696, "step": 2857 }, { "epoch": 0.99, "learning_rate": 2.141691837808657e-09, "loss": 1.4778, "step": 2858 }, { "epoch": 0.99, "learning_rate": 2.027504571076033e-09, "loss": 1.4357, "step": 2859 }, { "epoch": 0.99, "learning_rate": 1.9164440542482343e-09, "loss": 1.7209, "step": 2860 }, { "epoch": 0.99, "learning_rate": 1.8085104263418362e-09, "loss": 1.6591, "step": 2861 }, { "epoch": 0.99, "learning_rate": 1.7037038224584913e-09, "loss": 1.6698, "step": 2862 }, { "epoch": 0.99, "learning_rate": 1.6020243737865926e-09, "loss": 1.5636, "step": 2863 }, { "epoch": 0.99, "learning_rate": 1.50347220759961e-09, "loss": 1.6493, "step": 2864 }, { "epoch": 0.99, "learning_rate": 1.4080474472569216e-09, "loss": 1.4504, "step": 2865 }, { "epoch": 0.99, "learning_rate": 1.3157502122032595e-09, "loss": 1.619, "step": 2866 }, { "epoch": 0.99, "learning_rate": 1.2265806179681539e-09, "loss": 1.5797, "step": 2867 }, { "epoch": 0.99, "learning_rate": 1.1405387761664888e-09, "loss": 1.774, "step": 2868 }, { "epoch": 0.99, "learning_rate": 1.0576247944985018e-09, "loss": 1.7655, "step": 2869 }, { "epoch": 0.99, "learning_rate": 9.778387767486741e-10, "loss": 1.8595, "step": 2870 }, { "epoch": 0.99, "learning_rate": 9.011808227865626e-10, "loss": 1.6127, "step": 2871 }, { "epoch": 0.99, "learning_rate": 8.276510285656902e-10, "loss": 1.597, "step": 2872 }, { "epoch": 0.99, "learning_rate": 7.57249486124656e-10, "loss": 1.5323, "step": 2873 }, { "epoch": 0.99, "learning_rate": 6.899762835860246e-10, "loss": 1.619, "step": 2874 }, { "epoch": 0.99, "learning_rate": 6.258315051568819e-10, "loss": 1.6949, "step": 2875 }, { "epoch": 0.99, "learning_rate": 5.648152311282795e-10, "loss": 1.86, "step": 2876 }, { "epoch": 0.99, "learning_rate": 5.069275378746796e-10, "loss": 1.6693, "step": 2877 }, { "epoch": 0.99, "learning_rate": 4.521684978556207e-10, "loss": 1.692, "step": 2878 }, { "epoch": 0.99, "learning_rate": 4.0053817961321903e-10, "loss": 1.604, "step": 2879 }, { "epoch": 0.99, "learning_rate": 3.5203664777438973e-10, "loss": 1.5215, "step": 2880 }, { "epoch": 1.0, "learning_rate": 3.066639630491808e-10, "loss": 1.6036, "step": 2881 }, { "epoch": 1.0, "learning_rate": 2.6442018223132857e-10, "loss": 1.6215, "step": 2882 }, { "epoch": 1.0, "learning_rate": 2.2530535819742515e-10, "loss": 1.526, "step": 2883 }, { "epoch": 1.0, "learning_rate": 1.8931953990886097e-10, "loss": 1.5385, "step": 2884 }, { "epoch": 1.0, "learning_rate": 1.564627724090495e-10, "loss": 1.5454, "step": 2885 }, { "epoch": 1.0, "learning_rate": 1.2673509682564754e-10, "loss": 1.4312, "step": 2886 }, { "epoch": 1.0, "learning_rate": 1.0013655036916758e-10, "loss": 1.3765, "step": 2887 }, { "epoch": 1.0, "learning_rate": 7.666716633325522e-11, "loss": 1.8422, "step": 2888 }, { "epoch": 1.0, "learning_rate": 5.632697409496679e-11, "loss": 1.5835, "step": 2889 }, { "epoch": 1.0, "learning_rate": 3.911599911449182e-11, "loss": 1.5709, "step": 2890 }, { "epoch": 1.0, "learning_rate": 2.5034262935152987e-11, "loss": 1.4362, "step": 2891 }, { "epoch": 1.0, "learning_rate": 1.4081783183128584e-11, "loss": 1.6266, "step": 2892 }, { "epoch": 1.0, "learning_rate": 6.258573567730075e-12, "loss": 1.7788, "step": 2893 }, { "epoch": 1.0, "learning_rate": 1.5646438816796506e-12, "loss": 1.7117, "step": 2894 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 1.7859, "step": 2895 }, { "epoch": 1.0, "step": 2895, "total_flos": 104942126952448.0, "train_loss": 1.7047598777863233, "train_runtime": 2031.3793, "train_samples_per_second": 45.598, "train_steps_per_second": 1.425 } ], "max_steps": 2895, "num_train_epochs": 1, "total_flos": 104942126952448.0, "trial_name": null, "trial_params": null }